2025-09-10:仓库迁移
This commit is contained in:
40
5.文件与IO/16.为已经打开的文件修改编码方式.py
Normal file
40
5.文件与IO/16.为已经打开的文件修改编码方式.py
Normal file
@@ -0,0 +1,40 @@
|
||||
import urllib.request
|
||||
import io
|
||||
import gzip
|
||||
|
||||
import requests
|
||||
|
||||
if __name__ == "__main__":
|
||||
req = urllib.request.Request('http://www.python.org/')
|
||||
# 网页返回有时候会被压缩,用这句话让服务器帮我们解压完发过来,别给.gz
|
||||
req.add_header('Accept-Encoding', 'gzip,deflate')
|
||||
req.AutomaticDecompression='DecompressionMethods.GZip'
|
||||
u = urllib.request.urlopen(req)
|
||||
# 如果想将u以utf-8的方式添加编码,可以使用io.TextIOWrapper对它进行封装
|
||||
text = io.TextIOWrapper(u, encoding='utf-8')
|
||||
print(text.read(10))
|
||||
|
||||
# 如果文件已经以文本形式打开,想要更换编码层,可以先将原有的编码层移除并替换
|
||||
text = io.TextIOWrapper(text.detach(), encoding='latin-1')
|
||||
print(text.read(10))
|
||||
|
||||
# Python打开文件时,将文件分为三层,源文件、源文件的二进制缓存buffer和解码器
|
||||
f = open("5.文件与IO/1.somefile.txt", 'rt')
|
||||
# 解码器层
|
||||
print(f)
|
||||
# 二进制缓存
|
||||
print(f.buffer)
|
||||
# 源文件
|
||||
print(f.buffer.raw)
|
||||
f.close()
|
||||
|
||||
# 如果想要改变decoder,最好的办法就是使用detach()函数返回上一层的buffer,然后再用io.TextIOWraper封装
|
||||
f = open("5.文件与IO/1.somefile.txt", 'rt')
|
||||
print(f)
|
||||
b = f.detach()
|
||||
print(b)
|
||||
# 返回buffer层后再对buffer重编码
|
||||
new = io.TextIOWrapper(b, encoding='latin-1', errors='xmlcharrefreplace')
|
||||
print(new)
|
||||
|
||||
|
Reference in New Issue
Block a user