41 lines
1.4 KiB
Python
41 lines
1.4 KiB
Python
|
import urllib.request
|
|||
|
import io
|
|||
|
import gzip
|
|||
|
|
|||
|
import requests
|
|||
|
|
|||
|
if __name__ == "__main__":
|
|||
|
req = urllib.request.Request('http://www.python.org/')
|
|||
|
# 网页返回有时候会被压缩,用这句话让服务器帮我们解压完发过来,别给.gz
|
|||
|
req.add_header('Accept-Encoding', 'gzip,deflate')
|
|||
|
req.AutomaticDecompression='DecompressionMethods.GZip'
|
|||
|
u = urllib.request.urlopen(req)
|
|||
|
# 如果想将u以utf-8的方式添加编码,可以使用io.TextIOWrapper对它进行封装
|
|||
|
text = io.TextIOWrapper(u, encoding='utf-8')
|
|||
|
print(text.read(10))
|
|||
|
|
|||
|
# 如果文件已经以文本形式打开,想要更换编码层,可以先将原有的编码层移除并替换
|
|||
|
text = io.TextIOWrapper(text.detach(), encoding='latin-1')
|
|||
|
print(text.read(10))
|
|||
|
|
|||
|
# Python打开文件时,将文件分为三层,源文件、源文件的二进制缓存buffer和解码器
|
|||
|
f = open("5.文件与IO/1.somefile.txt", 'rt')
|
|||
|
# 解码器层
|
|||
|
print(f)
|
|||
|
# 二进制缓存
|
|||
|
print(f.buffer)
|
|||
|
# 源文件
|
|||
|
print(f.buffer.raw)
|
|||
|
f.close()
|
|||
|
|
|||
|
# 如果想要改变decoder,最好的办法就是使用detach()函数返回上一层的buffer,然后再用io.TextIOWraper封装
|
|||
|
f = open("5.文件与IO/1.somefile.txt", 'rt')
|
|||
|
print(f)
|
|||
|
b = f.detach()
|
|||
|
print(b)
|
|||
|
# 返回buffer层后再对buffer重编码
|
|||
|
new = io.TextIOWrapper(b, encoding='latin-1', errors='xmlcharrefreplace')
|
|||
|
print(new)
|
|||
|
|
|||
|
|