2025-09-10:仓库迁移

This commit is contained in:
2025-09-10 16:12:45 +08:00
parent e0e49b0ac9
commit 3130e336a1
146 changed files with 4066 additions and 0 deletions

View File

@@ -0,0 +1,37 @@
import unicodedata
if __name__ == '__main__':
s1 = 'Spicy Jalape\u00f1o'
s2 = 'Spicy Jalapen\u0303o'
# 可以看到某些字符串的表示在unicode下有多种选项
print(s1,s2)
print(s1==s2)
# 显然这种情况在我们判断字符串的时候极其不利这时候我们就需要把unicode编码进行规范
# 有两种规范方式NFC全组成和NFD组合字符
t1 = unicodedata.normalize('NFD', s1)
t2 = unicodedata.normalize('NFD', s2)
print(t1, t2)
print(t1==t2)
print(ascii(t1))
t3 = unicodedata.normalize('NFC', s1)
t4 = unicodedata.normalize('NFC', s2)
print(ascii(t3))
# unicodedata同时还提供NFKC和NFKD编码这种编码提供了额外的兼容功能能把下面这种字符分开
s = '\ufb01'
print(s)
print(unicodedata.normalize('NFD', s))
print(unicodedata.normalize('NFKC', s))
print(unicodedata.normalize('NFKD', s))
# 如果要去除音符标记~那么我们可以先用组合字符NFD标准化再进行去除
suka = unicodedata.normalize('NFD', s1)
print(suka)
suka = ''.join(c for c in unicodedata.normalize('NFD', suka) if not unicodedata.combining(c))
print(suka)
# 在上面的例子里,我们使用了unicodedata.combining()函数来判断字符是否属于组合型字符