Python_CookBook_repo/5.文件与IO/16.为已经打开的文件修改编码方式.py

import urllib.request
import io
import gzip

import requests

if __name__ == "__main__":
    req = urllib.request.Request('http://www.python.org/')
    # 网页返回有时候会被压缩，用这句话让服务器帮我们解压完发过来，别给.gz
    req.add_header('Accept-Encoding', 'gzip,deflate')
    req.AutomaticDecompression='DecompressionMethods.GZip'
    u = urllib.request.urlopen(req)
    # 如果想将u以utf-8的方式添加编码，可以使用io.TextIOWrapper对它进行封装
    text = io.TextIOWrapper(u, encoding='utf-8')
    print(text.read(10))

    # 如果文件已经以文本形式打开，想要更换编码层，可以先将原有的编码层移除并替换
    text = io.TextIOWrapper(text.detach(), encoding='latin-1')
    print(text.read(10))

    # Python打开文件时，将文件分为三层，源文件、源文件的二进制缓存buffer和解码器
    f = open("5.文件与IO/1.somefile.txt", 'rt')
    # 解码器层
    print(f)
    # 二进制缓存
    print(f.buffer)
    # 源文件
    print(f.buffer.raw)
    f.close()

    # 如果想要改变decoder，最好的办法就是使用detach()函数返回上一层的buffer，然后再用io.TextIOWraper封装
    f = open("5.文件与IO/1.somefile.txt", 'rt')
    print(f)
    b = f.detach()
    print(b)
    # 返回buffer层后再对buffer重编码
    new = io.TextIOWrapper(b, encoding='latin-1', errors='xmlcharrefreplace')
    print(new)