Files
Python_CookBook_repo/2.字符串和文本/4.使用正则对文本进行匹配和查找.py
2025-09-10 16:12:45 +08:00

49 lines
1.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import re
if __name__ == '__main__':
# 如果只是简单的文字匹配或者查找,下面三个方法足以解决问题:
url = "http://www.baidu.com"
url.startswith("http")
url.endswith(".com")
url.find("baidu")
# 但如果是更加复杂的匹配就要用到re库的正则了
text1 = '11/27/2012'
text2 = 'Nov 27, 2012'
if re.match(r'\d+/\d+/\d+', text1):
print("yes")
else:
print("no")
if re.match(r'\d+/\d+/\d+', text2):
print("yes")
else:
print("no")
# match可以被一次性消费但是如果想要多次匹配就要先把正则编译
datepat = re.compile(r'\d+/\d+/\d+')
if datepat.match(text1):
print("yes")
else:
print("no")
if datepat.match(text2):
print("yes")
else:
print("no")
# 这里要注意的是match方法是从头匹配如果要匹配的内容在一堆垃圾里面请使用findall
# 我们还会使用捕获组,这样可以把每个组单独提取出来
datepat = re.compile(r'(\d+)/(\d+)/(\d+)')
m = datepat.match(text1)
print(m.group(0))
print(m.group(1))
print(m.group(2))
print(m.group(3))
# match只能匹配开头它不管结尾如果想要精确匹配需要加休止符$
datepat = re.compile(r'(\d+)/(\d+)/(\d+)$')