Files
Python_CookBook_repo/6.数据编码与处理/12.读取嵌套型和大小可变的二进制结构.py
2025-09-10 16:12:45 +08:00

220 lines
8.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import struct, itertools
if __name__ == "__main__":
# 有时候,我们需要将一系列嵌套的可变长度记录与二进制编码之间做一些转换
polys = [
[(1.0, 2.5), (3.5, 4.0), (2.5, 1.5)],
[(7.0, 1.2), (5.1, 3.0), (0.5, 7.5), (0.8, 9.0)],
[(3.4, 6.3), (1.2, 0.5), (4.6, 9.2)]
]
# 现在我们需要将它组织成下面这样的二进制结构
# 文件头
"""
字节 类型 描述
0 int 文件代码(0x1234小端)
4 double x的最小值(小端)
12 double y的最小值(小端)
20 double x的最大值(小端)
28 double y的最大值(小端)
36 int 三角形数量(小端)
"""
# 文件内容
"""
字节 类型 描述
0 int 记录长度(N字节)
4-N Points (X,Y)坐标,以浮点数表示
"""
# 正常情况下,我们通过文件的具体结构来组织二进制文件的写入和读取
def write_ploys(filename, ploys):
# 将嵌套的多个点列表展开变成一个大的点列表
flattened = list(itertools.chain(*ploys))
min_x = min(x for x, y in flattened)
min_y = min(y for x, y in flattened)
max_x = max(x for x, y in flattened)
max_y = max(y for x, y in flattened)
with open(filename, 'wb') as f:
# 将数据按结构写入结构体中打包写入文件
f.write(struct.pack('<iddddi', 0x1234, min_x, min_y, max_x, max_y, len(ploys)))
for ploy in ploys:
# 计算需要多少空间
size = len(ploy) * struct.calcsize('<dd')
# 写入记录长度
f.write(struct.pack('<i', size + 4))
# 将所有点坐标写入
for pt in ploy:
f.write(struct.pack('<dd', *pt))
def read_ploys(filename):
with open(filename, 'rb') as f:
# 按照设计好的数据结构读出文件头
header = f.read(40)
# 解包文件头里的信息
file_code, min_x, min_y, max_x, max_y, num_ploys = struct.unpack('<iddddi', header)
# 初始化数组,用于存放即将解包的点坐标组
ploys = []
for n in range(num_ploys):
# 用于存储还原的一组点坐标
ploy = []
# 得到一组的数据有多长
pbytes, = struct.unpack('<i', f.read(4))
# 因为一对点坐标是两个double字节长16所以一共要读长度//16次一次读出16字节解包
for m in range(pbytes//16):
pt = struct.unpack('<dd', f.read(16))
ploy.append(pt)
ploys.append(ploy)
return ploys
# write_ploys("6.数据编码与处理/12.test.bin", ploys=polys)
data = read_ploys("12.test.bin")
print(data)
# 正常来说,我们是用上面这种方法来读取数据,但是这样很乱,所以有了基于类的升级款:
# 字段数据,在外层被 .属性 调用时__get__方法会运作
class StructField:
# 默认方法,标明格式和偏移量
def __init__(self, format, offset):
self.format = format
self.offset = offset
# 被 .属性 调用时外层obj作为instance进入函数
def __get__(self, instance, cls):
if instance is None:
return self
else:
# 从Structure._buffer里把文件薅出来解包
r = struct.unpack_from(self.format, instance._buffer, self.offset)
return r[0] if len(r) == 1 else r
class Structure:
def __init__(self, bytedata):
self._buffer = memoryview(bytedata)
# 然后我们就可以定义一个文件头
class PolyHeader(Structure):
file_code = StructField('<i', 0)
min_x = StructField('<d', 4)
min_y = StructField('<d', 12)
max_x = StructField('<d', 20)
max_y = StructField('<d', 28)
num_poly = StructField('<i', 36)
with open("12.test.bin", 'rb') as f:
phead = PolyHeader(f.read(40))
print(phead.min_x)
# 但是这样还是很麻烦为什么呢因为我还要定义好大一个PolyHeader类里面还要写死一些东西
# 于是就有了上面方案的进化元类版本
# 元类
class StructureMeta(type):
# 默认方法通过init函数生成类字段
def __init__(self, clsname, bases, clsdict):
fields = getattr(self, '_fields_', [])
byte_order = ''
offset = 0
for format, field_name in fields:
if format.startswith(('<', '>', '!', '@')):
byte_order = format[0]
format = format[1:]
format = byte_order + format
setattr(self, field_name, StructField(format, offset))
offset += struct.calcsize(format)
setattr(self, 'struct_size', offset)
# 改进的structure类
class Structure_v2(metaclass=StructureMeta):
def __init__(self, bytedata):
self._buffer = memoryview(bytedata)
# 类的方法,在实例化以后才能被调用
@classmethod
def from_file(cls, f):
return cls(f.read(cls.struct_size))
# 经过修改之后我们只需要告诉类字段名称和格式就行了
class PolyHeader_v2(Structure_v2):
_fields_ = [
('<i', 'file_code'),
('d', 'min_x'),
('d', 'min_y'),
('d', 'max_x'),
('d', 'max_y'),
('i', 'num_polys'),
]
with open("12.test.bin", 'rb') as f:
phead = PolyHeader_v2.from_file(f)
print(phead.max_y)
# 这个东西还能继续优化,比如加入一些新功能
# 改进的元类如果输入的是一个对象就设置为NestedStruct如果是格式就设置为字段
class StructureMeta_v2(type):
# 默认方法通过init函数生成类字段
def __init__(self, clsname, bases, clsdict):
fields = getattr(self, '_fields_', [])
byte_order = ''
offset = 0
for format, field_name in fields:
if isinstance(format, StructureMeta_v2):
setattr(self, field_name, NestedStruct(field_name, format, offset))
offset += format.struct_size
else:
if format.startswith(('<', '>', '!', '@')):
byte_order = format[0]
format = format[1:]
format = byte_order + format
setattr(self, field_name, StructField(format, offset))
offset += struct.calcsize(format)
setattr(self, 'struct_size', offset)
# 改进的structure类
class NestedStruct:
def __init__(self, name, struct_type, offset):
self.name = name
self.struct_type = struct_type
self.offset = offset
def __get__(self, instance, cls):
if instance is None:
return self
else:
data = instance._buffer[self.offset: self.offset + self.struct_type.struct_size]
result = self.struct_type(data)
setattr(instance, self.name, result)
return result
# 改进的structure类基础方法在init里设置memoryview来进行懒加载
class Structure_v3(metaclass=StructureMeta_v2):
def __init__(self, bytedata):
self._buffer = memoryview(bytedata)
# 类的方法,在实例化以后才能被调用
@classmethod
def from_file(cls, f):
return cls(f.read(cls.struct_size))
class Point(Structure_v3):
_fields_ = [
('<d', 'x'),
('d', 'y')
]
class PloyHeader(Structure_v3):
_fields_ = [
('<i', 'file_code'),
(Point, 'min'),
(Point, 'max'),
('i', 'num_polys'),
]
with open("12.test.bin", 'rb') as f:
phead = PloyHeader.from_file(f)
print(phead.min)