Files
Python_CookBook_repo/6.数据编码与处理/7.用命名空间来解析XML文档.py
2025-09-10 16:12:45 +08:00

35 lines
1.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from xml.etree.ElementTree import parse
class XMLNameSpace:
def __init__(self, **kwargs):
self.namespaces = {}
for name, url in kwargs.items():
self.register(name, url)
def register(self, name, url):
self.namespaces[name] = '{'+url+'}'
def __call__(self, path):
return path.format_map(self.namespaces)
if __name__ == '__main__':
# 如果是一个有命名空间的XML文件那么很明显传统的读取方法会变得繁琐
doc = parse('6.数据编码与处理/7.exp.xml')
print(doc.findtext('author'))
print(doc.find('content'))
# 由于html不在全局命名空间下所以找不到这个东西
print(doc.find('content/html'))
# 在指定了命名空间的所有层级下,都要使用{}来标识命名空间
print(doc.find('content/{http://www.w3.org/1999/xhtml}html'))
# 记住,我说的是,所有层级
# 不起效
print(doc.find('content/{http://www.w3.org/1999/xhtml}html/head'))
# 正常生效
print(doc.find('content/{http://www.w3.org/1999/xhtml}html/{http://www.w3.org/1999/xhtml}head'))
# 这样就很甜蜜曹丹,好麻烦,而且还要手动输入命名空间,不如写个类让它自己解析了
ns = XMLNameSpace(html="http://www.w3.org/1999/xhtml")
print(doc.find(ns('content/{html}html')))