49 lines
1.3 KiB
Python
49 lines
1.3 KiB
Python
|
import re
|
|||
|
|
|||
|
if __name__ == '__main__':
|
|||
|
# 如果只是简单的文字匹配或者查找,下面三个方法足以解决问题:
|
|||
|
url = "http://www.baidu.com"
|
|||
|
url.startswith("http")
|
|||
|
url.endswith(".com")
|
|||
|
url.find("baidu")
|
|||
|
|
|||
|
# 但如果是更加复杂的匹配,就要用到re库的正则了
|
|||
|
text1 = '11/27/2012'
|
|||
|
text2 = 'Nov 27, 2012'
|
|||
|
|
|||
|
if re.match(r'\d+/\d+/\d+', text1):
|
|||
|
print("yes")
|
|||
|
else:
|
|||
|
print("no")
|
|||
|
|
|||
|
if re.match(r'\d+/\d+/\d+', text2):
|
|||
|
print("yes")
|
|||
|
else:
|
|||
|
print("no")
|
|||
|
|
|||
|
# match可以被一次性消费,但是如果想要多次匹配,就要先把正则编译
|
|||
|
datepat = re.compile(r'\d+/\d+/\d+')
|
|||
|
|
|||
|
if datepat.match(text1):
|
|||
|
print("yes")
|
|||
|
else:
|
|||
|
print("no")
|
|||
|
|
|||
|
if datepat.match(text2):
|
|||
|
print("yes")
|
|||
|
else:
|
|||
|
print("no")
|
|||
|
|
|||
|
# 这里要注意的是,match方法是从头匹配,如果要匹配的内容在一堆垃圾里面,请使用findall
|
|||
|
|
|||
|
# 我们还会使用捕获组,这样可以把每个组单独提取出来
|
|||
|
datepat = re.compile(r'(\d+)/(\d+)/(\d+)')
|
|||
|
m = datepat.match(text1)
|
|||
|
print(m.group(0))
|
|||
|
print(m.group(1))
|
|||
|
print(m.group(2))
|
|||
|
print(m.group(3))
|
|||
|
|
|||
|
# match只能匹配开头,它不管结尾,如果想要精确匹配需要加休止符$
|
|||
|
datepat = re.compile(r'(\d+)/(\d+)/(\d+)$')
|
|||
|
|