2025-09-10:仓库迁移

2025-09-10 16:12:45 +08:00
parent e0e49b0ac9
commit 3130e336a1
146 changed files with 4066 additions and 0 deletions
--- a/6.数据编码与处理/12.读取嵌套型和大小可变的二进制结构.py
+++ b/6.数据编码与处理/12.读取嵌套型和大小可变的二进制结构.py
@@ -0,0 +1,220 @@
+import struct, itertools
+
+if __name__ == "__main__":
+    # 有时候，我们需要将一系列嵌套的可变长度记录与二进制编码之间做一些转换
+    polys = [
+        [(1.0, 2.5), (3.5, 4.0), (2.5, 1.5)],
+        [(7.0, 1.2), (5.1, 3.0), (0.5, 7.5), (0.8, 9.0)],
+        [(3.4, 6.3), (1.2, 0.5), (4.6, 9.2)]
+    ]
+    # 现在我们需要将它组织成下面这样的二进制结构
+    # 文件头
+    """
+    字节      类型              描述
+     0        int       文件代码(0x1234，小端)
+     4       double         x的最小值(小端)
+    12       double         y的最小值(小端)
+    20       double         x的最大值(小端)
+    28       double         y的最大值(小端)
+    36        int           三角形数量(小端)
+    """
+    # 文件内容
+    """
+    字节      类型              描述
+     0        int           记录长度(N字节)
+    4-N      Points     (X,Y)坐标，以浮点数表示
+    """
+
+    # 正常情况下，我们通过文件的具体结构来组织二进制文件的写入和读取
+
+    def write_ploys(filename, ploys):
+        # 将嵌套的多个点列表展开变成一个大的点列表
+        flattened = list(itertools.chain(*ploys))
+        min_x = min(x for x, y in flattened)
+        min_y = min(y for x, y in flattened)
+        max_x = max(x for x, y in flattened)
+        max_y = max(y for x, y in flattened)
+
+        with open(filename, 'wb') as f:
+            # 将数据按结构写入结构体中打包写入文件
+            f.write(struct.pack('<iddddi', 0x1234, min_x, min_y, max_x, max_y, len(ploys)))
+
+            for ploy in ploys:
+                # 计算需要多少空间
+                size = len(ploy) * struct.calcsize('<dd')
+                # 写入记录长度
+                f.write(struct.pack('<i', size + 4))
+                # 将所有点坐标写入
+                for pt in ploy:
+                    f.write(struct.pack('<dd', *pt))
+
+    def read_ploys(filename):
+        with open(filename, 'rb') as f:
+            # 按照设计好的数据结构读出文件头
+            header = f.read(40)
+            # 解包文件头里的信息
+            file_code, min_x, min_y, max_x, max_y, num_ploys = struct.unpack('<iddddi', header)
+            # 初始化数组，用于存放即将解包的点坐标组
+            ploys = []
+            for n in range(num_ploys):
+                # 用于存储还原的一组点坐标
+                ploy = []
+                # 得到一组的数据有多长
+                pbytes,  = struct.unpack('<i', f.read(4))
+                # 因为一对点坐标是两个double，字节长16，所以一共要读长度//16次，一次读出16字节解包
+                for m in range(pbytes//16):
+                    pt = struct.unpack('<dd', f.read(16))
+                    ploy.append(pt)
+                ploys.append(ploy)
+
+        return ploys
+
+    # write_ploys("6.数据编码与处理/12.test.bin", ploys=polys)
+    data = read_ploys("12.test.bin")
+    print(data)
+
+    # 正常来说，我们是用上面这种方法来读取数据，但是这样很乱，所以有了基于类的升级款：
+
+    # 字段数据，在外层被 .属性 调用时__get__方法会运作
+    class StructField:
+        # 默认方法，标明格式和偏移量
+        def __init__(self, format, offset):
+            self.format = format
+            self.offset = offset
+
+        # 被 .属性 调用时，外层obj作为instance进入函数
+        def __get__(self, instance, cls):
+            if instance is None:
+                return self
+            else:
+                # 从Structure._buffer里把文件薅出来解包
+                r = struct.unpack_from(self.format, instance._buffer, self.offset)
+                return r[0] if len(r) == 1 else r
+
+    class Structure:
+        def __init__(self, bytedata):
+            self._buffer = memoryview(bytedata)
+
+    # 然后我们就可以定义一个文件头
+    class PolyHeader(Structure):
+        file_code = StructField('<i', 0)
+        min_x = StructField('<d', 4)
+        min_y = StructField('<d', 12)
+        max_x = StructField('<d', 20)
+        max_y = StructField('<d', 28)
+        num_poly = StructField('<i', 36)
+
+    with open("12.test.bin", 'rb') as f:
+        phead = PolyHeader(f.read(40))
+        print(phead.min_x)
+
+    # 但是这样还是很麻烦，为什么呢？因为我还要定义好大一个PolyHeader类，里面还要写死一些东西
+    # 于是就有了上面方案的进化元类版本
+
+    # 元类
+    class StructureMeta(type):
+        # 默认方法，通过init函数生成类字段
+        def __init__(self, clsname, bases, clsdict):
+            fields = getattr(self, '_fields_', [])
+            byte_order = ''
+            offset = 0
+            for format, field_name in fields:
+                if format.startswith(('<', '>', '!', '@')):
+                    byte_order = format[0]
+                    format = format[1:]
+                format = byte_order + format
+                setattr(self, field_name, StructField(format, offset))
+                offset += struct.calcsize(format)
+            setattr(self, 'struct_size', offset)
+
+    # 改进的structure类
+    class Structure_v2(metaclass=StructureMeta):
+        def __init__(self, bytedata):
+            self._buffer = memoryview(bytedata)
+
+        # 类的方法,在实例化以后才能被调用
+        @classmethod
+        def from_file(cls, f):
+            return cls(f.read(cls.struct_size))
+
+    # 经过修改之后我们只需要告诉类字段名称和格式就行了
+    class PolyHeader_v2(Structure_v2):
+        _fields_ = [
+            ('<i', 'file_code'),
+            ('d', 'min_x'),
+            ('d', 'min_y'),
+            ('d', 'max_x'),
+            ('d', 'max_y'),
+            ('i', 'num_polys'),
+        ]
+
+    with open("12.test.bin", 'rb') as f:
+        phead = PolyHeader_v2.from_file(f)
+        print(phead.max_y)
+
+    # 这个东西还能继续优化,比如加入一些新功能
+
+    # 改进的元类，如果输入的是一个对象，就设置为NestedStruct，如果是格式，就设置为字段
+    class StructureMeta_v2(type):
+        # 默认方法，通过init函数生成类字段
+        def __init__(self, clsname, bases, clsdict):
+            fields = getattr(self, '_fields_', [])
+            byte_order = ''
+            offset = 0
+            for format, field_name in fields:
+                if isinstance(format, StructureMeta_v2):
+                    setattr(self, field_name, NestedStruct(field_name, format, offset))
+                    offset += format.struct_size
+                else:
+                    if format.startswith(('<', '>', '!', '@')):
+                        byte_order = format[0]
+                        format = format[1:]
+                    format = byte_order + format
+                    setattr(self, field_name, StructField(format, offset))
+                    offset += struct.calcsize(format)
+            setattr(self, 'struct_size', offset)
+
+
+    # 改进的structure类
+    class NestedStruct:
+        def __init__(self, name, struct_type, offset):
+            self.name = name
+            self.struct_type = struct_type
+            self.offset = offset
+
+        def __get__(self, instance, cls):
+            if instance is None:
+                return self
+            else:
+                data = instance._buffer[self.offset: self.offset + self.struct_type.struct_size]
+                result = self.struct_type(data)
+                setattr(instance, self.name, result)
+                return result
+
+    # 改进的structure类，基础方法在init里设置memoryview来进行懒加载
+    class Structure_v3(metaclass=StructureMeta_v2):
+        def __init__(self, bytedata):
+            self._buffer = memoryview(bytedata)
+
+        # 类的方法,在实例化以后才能被调用
+        @classmethod
+        def from_file(cls, f):
+            return cls(f.read(cls.struct_size))
+
+    class Point(Structure_v3):
+        _fields_ = [
+            ('<d', 'x'),
+            ('d', 'y')
+        ]
+
+    class PloyHeader(Structure_v3):
+        _fields_ = [
+            ('<i', 'file_code'),
+            (Point, 'min'),
+            (Point, 'max'),
+            ('i', 'num_polys'),
+        ]
+
+    with open("12.test.bin", 'rb') as f:
+        phead = PloyHeader.from_file(f)
+        print(phead.min)