xref: /aosp_15_r20/external/fonttools/Lib/fontTools/misc/sstruct.py (revision e1fe3e4ad2793916b15cccdc4a7da52a7e1dd0e9)
1*e1fe3e4aSElliott Hughes"""sstruct.py -- SuperStruct
2*e1fe3e4aSElliott Hughes
3*e1fe3e4aSElliott HughesHigher level layer on top of the struct module, enabling to
4*e1fe3e4aSElliott Hughesbind names to struct elements. The interface is similar to
5*e1fe3e4aSElliott Hughesstruct, except the objects passed and returned are not tuples
6*e1fe3e4aSElliott Hughes(or argument lists), but dictionaries or instances.
7*e1fe3e4aSElliott Hughes
8*e1fe3e4aSElliott HughesJust like struct, we use fmt strings to describe a data
9*e1fe3e4aSElliott Hughesstructure, except we use one line per element. Lines are
10*e1fe3e4aSElliott Hughesseparated by newlines or semi-colons. Each line contains
11*e1fe3e4aSElliott Hugheseither one of the special struct characters ('@', '=', '<',
12*e1fe3e4aSElliott Hughes'>' or '!') or a 'name:formatchar' combo (eg. 'myFloat:f').
13*e1fe3e4aSElliott HughesRepetitions, like the struct module offers them are not useful
14*e1fe3e4aSElliott Hughesin this context, except for fixed length strings  (eg. 'myInt:5h'
15*e1fe3e4aSElliott Hughesis not allowed but 'myString:5s' is). The 'x' fmt character
16*e1fe3e4aSElliott Hughes(pad byte) is treated as 'special', since it is by definition
17*e1fe3e4aSElliott Hughesanonymous. Extra whitespace is allowed everywhere.
18*e1fe3e4aSElliott Hughes
19*e1fe3e4aSElliott HughesThe sstruct module offers one feature that the "normal" struct
20*e1fe3e4aSElliott Hughesmodule doesn't: support for fixed point numbers. These are spelled
21*e1fe3e4aSElliott Hughesas "n.mF", where n is the number of bits before the point, and m
22*e1fe3e4aSElliott Hughesthe number of bits after the point. Fixed point numbers get
23*e1fe3e4aSElliott Hughesconverted to floats.
24*e1fe3e4aSElliott Hughes
25*e1fe3e4aSElliott Hughespack(fmt, object):
26*e1fe3e4aSElliott Hughes	'object' is either a dictionary or an instance (or actually
27*e1fe3e4aSElliott Hughes	anything that has a __dict__ attribute). If it is a dictionary,
28*e1fe3e4aSElliott Hughes	its keys are used for names. If it is an instance, it's
29*e1fe3e4aSElliott Hughes	attributes are used to grab struct elements from. Returns
30*e1fe3e4aSElliott Hughes	a string containing the data.
31*e1fe3e4aSElliott Hughes
32*e1fe3e4aSElliott Hughesunpack(fmt, data, object=None)
33*e1fe3e4aSElliott Hughes	If 'object' is omitted (or None), a new dictionary will be
34*e1fe3e4aSElliott Hughes	returned. If 'object' is a dictionary, it will be used to add
35*e1fe3e4aSElliott Hughes	struct elements to. If it is an instance (or in fact anything
36*e1fe3e4aSElliott Hughes	that has a __dict__ attribute), an attribute will be added for
37*e1fe3e4aSElliott Hughes	each struct element. In the latter two cases, 'object' itself
38*e1fe3e4aSElliott Hughes	is returned.
39*e1fe3e4aSElliott Hughes
40*e1fe3e4aSElliott Hughesunpack2(fmt, data, object=None)
41*e1fe3e4aSElliott Hughes	Convenience function. Same as unpack, except data may be longer
42*e1fe3e4aSElliott Hughes	than needed. The returned value is a tuple: (object, leftoverdata).
43*e1fe3e4aSElliott Hughes
44*e1fe3e4aSElliott Hughescalcsize(fmt)
45*e1fe3e4aSElliott Hughes	like struct.calcsize(), but uses our own fmt strings:
46*e1fe3e4aSElliott Hughes	it returns the size of the data in bytes.
47*e1fe3e4aSElliott Hughes"""
48*e1fe3e4aSElliott Hughes
49*e1fe3e4aSElliott Hughesfrom fontTools.misc.fixedTools import fixedToFloat as fi2fl, floatToFixed as fl2fi
50*e1fe3e4aSElliott Hughesfrom fontTools.misc.textTools import tobytes, tostr
51*e1fe3e4aSElliott Hughesimport struct
52*e1fe3e4aSElliott Hughesimport re
53*e1fe3e4aSElliott Hughes
54*e1fe3e4aSElliott Hughes__version__ = "1.2"
55*e1fe3e4aSElliott Hughes__copyright__ = "Copyright 1998, Just van Rossum <[email protected]>"
56*e1fe3e4aSElliott Hughes
57*e1fe3e4aSElliott Hughes
58*e1fe3e4aSElliott Hughesclass Error(Exception):
59*e1fe3e4aSElliott Hughes    pass
60*e1fe3e4aSElliott Hughes
61*e1fe3e4aSElliott Hughes
62*e1fe3e4aSElliott Hughesdef pack(fmt, obj):
63*e1fe3e4aSElliott Hughes    formatstring, names, fixes = getformat(fmt, keep_pad_byte=True)
64*e1fe3e4aSElliott Hughes    elements = []
65*e1fe3e4aSElliott Hughes    if not isinstance(obj, dict):
66*e1fe3e4aSElliott Hughes        obj = obj.__dict__
67*e1fe3e4aSElliott Hughes    for name in names:
68*e1fe3e4aSElliott Hughes        value = obj[name]
69*e1fe3e4aSElliott Hughes        if name in fixes:
70*e1fe3e4aSElliott Hughes            # fixed point conversion
71*e1fe3e4aSElliott Hughes            value = fl2fi(value, fixes[name])
72*e1fe3e4aSElliott Hughes        elif isinstance(value, str):
73*e1fe3e4aSElliott Hughes            value = tobytes(value)
74*e1fe3e4aSElliott Hughes        elements.append(value)
75*e1fe3e4aSElliott Hughes    data = struct.pack(*(formatstring,) + tuple(elements))
76*e1fe3e4aSElliott Hughes    return data
77*e1fe3e4aSElliott Hughes
78*e1fe3e4aSElliott Hughes
79*e1fe3e4aSElliott Hughesdef unpack(fmt, data, obj=None):
80*e1fe3e4aSElliott Hughes    if obj is None:
81*e1fe3e4aSElliott Hughes        obj = {}
82*e1fe3e4aSElliott Hughes    data = tobytes(data)
83*e1fe3e4aSElliott Hughes    formatstring, names, fixes = getformat(fmt)
84*e1fe3e4aSElliott Hughes    if isinstance(obj, dict):
85*e1fe3e4aSElliott Hughes        d = obj
86*e1fe3e4aSElliott Hughes    else:
87*e1fe3e4aSElliott Hughes        d = obj.__dict__
88*e1fe3e4aSElliott Hughes    elements = struct.unpack(formatstring, data)
89*e1fe3e4aSElliott Hughes    for i in range(len(names)):
90*e1fe3e4aSElliott Hughes        name = names[i]
91*e1fe3e4aSElliott Hughes        value = elements[i]
92*e1fe3e4aSElliott Hughes        if name in fixes:
93*e1fe3e4aSElliott Hughes            # fixed point conversion
94*e1fe3e4aSElliott Hughes            value = fi2fl(value, fixes[name])
95*e1fe3e4aSElliott Hughes        elif isinstance(value, bytes):
96*e1fe3e4aSElliott Hughes            try:
97*e1fe3e4aSElliott Hughes                value = tostr(value)
98*e1fe3e4aSElliott Hughes            except UnicodeDecodeError:
99*e1fe3e4aSElliott Hughes                pass
100*e1fe3e4aSElliott Hughes        d[name] = value
101*e1fe3e4aSElliott Hughes    return obj
102*e1fe3e4aSElliott Hughes
103*e1fe3e4aSElliott Hughes
104*e1fe3e4aSElliott Hughesdef unpack2(fmt, data, obj=None):
105*e1fe3e4aSElliott Hughes    length = calcsize(fmt)
106*e1fe3e4aSElliott Hughes    return unpack(fmt, data[:length], obj), data[length:]
107*e1fe3e4aSElliott Hughes
108*e1fe3e4aSElliott Hughes
109*e1fe3e4aSElliott Hughesdef calcsize(fmt):
110*e1fe3e4aSElliott Hughes    formatstring, names, fixes = getformat(fmt)
111*e1fe3e4aSElliott Hughes    return struct.calcsize(formatstring)
112*e1fe3e4aSElliott Hughes
113*e1fe3e4aSElliott Hughes
114*e1fe3e4aSElliott Hughes# matches "name:formatchar" (whitespace is allowed)
115*e1fe3e4aSElliott Hughes_elementRE = re.compile(
116*e1fe3e4aSElliott Hughes    r"\s*"  # whitespace
117*e1fe3e4aSElliott Hughes    r"([A-Za-z_][A-Za-z_0-9]*)"  # name (python identifier)
118*e1fe3e4aSElliott Hughes    r"\s*:\s*"  # whitespace : whitespace
119*e1fe3e4aSElliott Hughes    r"([xcbB?hHiIlLqQfd]|"  # formatchar...
120*e1fe3e4aSElliott Hughes    r"[0-9]+[ps]|"  # ...formatchar...
121*e1fe3e4aSElliott Hughes    r"([0-9]+)\.([0-9]+)(F))"  # ...formatchar
122*e1fe3e4aSElliott Hughes    r"\s*"  # whitespace
123*e1fe3e4aSElliott Hughes    r"(#.*)?$"  # [comment] + end of string
124*e1fe3e4aSElliott Hughes)
125*e1fe3e4aSElliott Hughes
126*e1fe3e4aSElliott Hughes# matches the special struct fmt chars and 'x' (pad byte)
127*e1fe3e4aSElliott Hughes_extraRE = re.compile(r"\s*([x@=<>!])\s*(#.*)?$")
128*e1fe3e4aSElliott Hughes
129*e1fe3e4aSElliott Hughes# matches an "empty" string, possibly containing whitespace and/or a comment
130*e1fe3e4aSElliott Hughes_emptyRE = re.compile(r"\s*(#.*)?$")
131*e1fe3e4aSElliott Hughes
132*e1fe3e4aSElliott Hughes_fixedpointmappings = {8: "b", 16: "h", 32: "l"}
133*e1fe3e4aSElliott Hughes
134*e1fe3e4aSElliott Hughes_formatcache = {}
135*e1fe3e4aSElliott Hughes
136*e1fe3e4aSElliott Hughes
137*e1fe3e4aSElliott Hughesdef getformat(fmt, keep_pad_byte=False):
138*e1fe3e4aSElliott Hughes    fmt = tostr(fmt, encoding="ascii")
139*e1fe3e4aSElliott Hughes    try:
140*e1fe3e4aSElliott Hughes        formatstring, names, fixes = _formatcache[fmt]
141*e1fe3e4aSElliott Hughes    except KeyError:
142*e1fe3e4aSElliott Hughes        lines = re.split("[\n;]", fmt)
143*e1fe3e4aSElliott Hughes        formatstring = ""
144*e1fe3e4aSElliott Hughes        names = []
145*e1fe3e4aSElliott Hughes        fixes = {}
146*e1fe3e4aSElliott Hughes        for line in lines:
147*e1fe3e4aSElliott Hughes            if _emptyRE.match(line):
148*e1fe3e4aSElliott Hughes                continue
149*e1fe3e4aSElliott Hughes            m = _extraRE.match(line)
150*e1fe3e4aSElliott Hughes            if m:
151*e1fe3e4aSElliott Hughes                formatchar = m.group(1)
152*e1fe3e4aSElliott Hughes                if formatchar != "x" and formatstring:
153*e1fe3e4aSElliott Hughes                    raise Error("a special fmt char must be first")
154*e1fe3e4aSElliott Hughes            else:
155*e1fe3e4aSElliott Hughes                m = _elementRE.match(line)
156*e1fe3e4aSElliott Hughes                if not m:
157*e1fe3e4aSElliott Hughes                    raise Error("syntax error in fmt: '%s'" % line)
158*e1fe3e4aSElliott Hughes                name = m.group(1)
159*e1fe3e4aSElliott Hughes                formatchar = m.group(2)
160*e1fe3e4aSElliott Hughes                if keep_pad_byte or formatchar != "x":
161*e1fe3e4aSElliott Hughes                    names.append(name)
162*e1fe3e4aSElliott Hughes                if m.group(3):
163*e1fe3e4aSElliott Hughes                    # fixed point
164*e1fe3e4aSElliott Hughes                    before = int(m.group(3))
165*e1fe3e4aSElliott Hughes                    after = int(m.group(4))
166*e1fe3e4aSElliott Hughes                    bits = before + after
167*e1fe3e4aSElliott Hughes                    if bits not in [8, 16, 32]:
168*e1fe3e4aSElliott Hughes                        raise Error("fixed point must be 8, 16 or 32 bits long")
169*e1fe3e4aSElliott Hughes                    formatchar = _fixedpointmappings[bits]
170*e1fe3e4aSElliott Hughes                    assert m.group(5) == "F"
171*e1fe3e4aSElliott Hughes                    fixes[name] = after
172*e1fe3e4aSElliott Hughes            formatstring = formatstring + formatchar
173*e1fe3e4aSElliott Hughes        _formatcache[fmt] = formatstring, names, fixes
174*e1fe3e4aSElliott Hughes    return formatstring, names, fixes
175*e1fe3e4aSElliott Hughes
176*e1fe3e4aSElliott Hughes
177*e1fe3e4aSElliott Hughesdef _test():
178*e1fe3e4aSElliott Hughes    fmt = """
179*e1fe3e4aSElliott Hughes		# comments are allowed
180*e1fe3e4aSElliott Hughes		>  # big endian (see documentation for struct)
181*e1fe3e4aSElliott Hughes		# empty lines are allowed:
182*e1fe3e4aSElliott Hughes
183*e1fe3e4aSElliott Hughes		ashort: h
184*e1fe3e4aSElliott Hughes		along: l
185*e1fe3e4aSElliott Hughes		abyte: b	# a byte
186*e1fe3e4aSElliott Hughes		achar: c
187*e1fe3e4aSElliott Hughes		astr: 5s
188*e1fe3e4aSElliott Hughes		afloat: f; adouble: d	# multiple "statements" are allowed
189*e1fe3e4aSElliott Hughes		afixed: 16.16F
190*e1fe3e4aSElliott Hughes		abool: ?
191*e1fe3e4aSElliott Hughes		apad: x
192*e1fe3e4aSElliott Hughes	"""
193*e1fe3e4aSElliott Hughes
194*e1fe3e4aSElliott Hughes    print("size:", calcsize(fmt))
195*e1fe3e4aSElliott Hughes
196*e1fe3e4aSElliott Hughes    class foo(object):
197*e1fe3e4aSElliott Hughes        pass
198*e1fe3e4aSElliott Hughes
199*e1fe3e4aSElliott Hughes    i = foo()
200*e1fe3e4aSElliott Hughes
201*e1fe3e4aSElliott Hughes    i.ashort = 0x7FFF
202*e1fe3e4aSElliott Hughes    i.along = 0x7FFFFFFF
203*e1fe3e4aSElliott Hughes    i.abyte = 0x7F
204*e1fe3e4aSElliott Hughes    i.achar = "a"
205*e1fe3e4aSElliott Hughes    i.astr = "12345"
206*e1fe3e4aSElliott Hughes    i.afloat = 0.5
207*e1fe3e4aSElliott Hughes    i.adouble = 0.5
208*e1fe3e4aSElliott Hughes    i.afixed = 1.5
209*e1fe3e4aSElliott Hughes    i.abool = True
210*e1fe3e4aSElliott Hughes
211*e1fe3e4aSElliott Hughes    data = pack(fmt, i)
212*e1fe3e4aSElliott Hughes    print("data:", repr(data))
213*e1fe3e4aSElliott Hughes    print(unpack(fmt, data))
214*e1fe3e4aSElliott Hughes    i2 = foo()
215*e1fe3e4aSElliott Hughes    unpack(fmt, data, i2)
216*e1fe3e4aSElliott Hughes    print(vars(i2))
217*e1fe3e4aSElliott Hughes
218*e1fe3e4aSElliott Hughes
219*e1fe3e4aSElliott Hughesif __name__ == "__main__":
220*e1fe3e4aSElliott Hughes    _test()
221