Files
Python_CookBook_repo/5.文件与IO/16.为已经打开的文件修改编码方式.py
2025-09-10 16:12:45 +08:00

41 lines
1.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import urllib.request
import io
import gzip
import requests
if __name__ == "__main__":
req = urllib.request.Request('http://www.python.org/')
# 网页返回有时候会被压缩,用这句话让服务器帮我们解压完发过来,别给.gz
req.add_header('Accept-Encoding', 'gzip,deflate')
req.AutomaticDecompression='DecompressionMethods.GZip'
u = urllib.request.urlopen(req)
# 如果想将u以utf-8的方式添加编码可以使用io.TextIOWrapper对它进行封装
text = io.TextIOWrapper(u, encoding='utf-8')
print(text.read(10))
# 如果文件已经以文本形式打开,想要更换编码层,可以先将原有的编码层移除并替换
text = io.TextIOWrapper(text.detach(), encoding='latin-1')
print(text.read(10))
# Python打开文件时将文件分为三层源文件、源文件的二进制缓存buffer和解码器
f = open("5.文件与IO/1.somefile.txt", 'rt')
# 解码器层
print(f)
# 二进制缓存
print(f.buffer)
# 源文件
print(f.buffer.raw)
f.close()
# 如果想要改变decoder最好的办法就是使用detach()函数返回上一层的buffer然后再用io.TextIOWraper封装
f = open("5.文件与IO/1.somefile.txt", 'rt')
print(f)
b = f.detach()
print(b)
# 返回buffer层后再对buffer重编码
new = io.TextIOWrapper(b, encoding='latin-1', errors='xmlcharrefreplace')
print(new)