2025-09-10:仓库迁移

2025-09-10 16:12:45 +08:00
parent e0e49b0ac9
commit 3130e336a1
146 changed files with 4066 additions and 0 deletions
--- a/2.字符串和文本/7.定义实现最短匹配的正则表达式.py
+++ b/2.字符串和文本/7.定义实现最短匹配的正则表达式.py
@@ -0,0 +1,18 @@
+import re
+
+if __name__ == '__main__':
+    # 在进行文本匹配的时候，re库使用的是贪心算法，即找最长匹配字符串
+    text1 = 'Computer says "no".'
+    text2 = 'Computer says "yes" and "no".'
+    # 这样的算法在简单的环境中不会出现问题，但如果遇到闭包匹配（最典型的是双引号）就会出现问题
+    said = re.compile(r'\"(.*)\"')
+    print(said.findall(text1))
+
+    # 看，这里就输出了距离最长的两个引号中间的内容
+    print(said.findall(text2))
+
+    # 问题就出现在.匹配字符上，默认.*会匹配除了\n以外的所有字符，其中也包括“”
+    # 解决方案是强制取消正则的贪心算法，进行最短匹配
+    # 技术上我们在.*后界面加一个?来强制取消贪心
+    said_shot = re.compile(r'\"(.*?)\"')
+    print(said_shot.findall(text2))