1*e1fe3e4aSElliott Hughes"""sstruct.py -- SuperStruct 2*e1fe3e4aSElliott Hughes 3*e1fe3e4aSElliott HughesHigher level layer on top of the struct module, enabling to 4*e1fe3e4aSElliott Hughesbind names to struct elements. The interface is similar to 5*e1fe3e4aSElliott Hughesstruct, except the objects passed and returned are not tuples 6*e1fe3e4aSElliott Hughes(or argument lists), but dictionaries or instances. 7*e1fe3e4aSElliott Hughes 8*e1fe3e4aSElliott HughesJust like struct, we use fmt strings to describe a data 9*e1fe3e4aSElliott Hughesstructure, except we use one line per element. Lines are 10*e1fe3e4aSElliott Hughesseparated by newlines or semi-colons. Each line contains 11*e1fe3e4aSElliott Hugheseither one of the special struct characters ('@', '=', '<', 12*e1fe3e4aSElliott Hughes'>' or '!') or a 'name:formatchar' combo (eg. 'myFloat:f'). 13*e1fe3e4aSElliott HughesRepetitions, like the struct module offers them are not useful 14*e1fe3e4aSElliott Hughesin this context, except for fixed length strings (eg. 'myInt:5h' 15*e1fe3e4aSElliott Hughesis not allowed but 'myString:5s' is). The 'x' fmt character 16*e1fe3e4aSElliott Hughes(pad byte) is treated as 'special', since it is by definition 17*e1fe3e4aSElliott Hughesanonymous. Extra whitespace is allowed everywhere. 18*e1fe3e4aSElliott Hughes 19*e1fe3e4aSElliott HughesThe sstruct module offers one feature that the "normal" struct 20*e1fe3e4aSElliott Hughesmodule doesn't: support for fixed point numbers. These are spelled 21*e1fe3e4aSElliott Hughesas "n.mF", where n is the number of bits before the point, and m 22*e1fe3e4aSElliott Hughesthe number of bits after the point. Fixed point numbers get 23*e1fe3e4aSElliott Hughesconverted to floats. 24*e1fe3e4aSElliott Hughes 25*e1fe3e4aSElliott Hughespack(fmt, object): 26*e1fe3e4aSElliott Hughes 'object' is either a dictionary or an instance (or actually 27*e1fe3e4aSElliott Hughes anything that has a __dict__ attribute). If it is a dictionary, 28*e1fe3e4aSElliott Hughes its keys are used for names. If it is an instance, it's 29*e1fe3e4aSElliott Hughes attributes are used to grab struct elements from. Returns 30*e1fe3e4aSElliott Hughes a string containing the data. 31*e1fe3e4aSElliott Hughes 32*e1fe3e4aSElliott Hughesunpack(fmt, data, object=None) 33*e1fe3e4aSElliott Hughes If 'object' is omitted (or None), a new dictionary will be 34*e1fe3e4aSElliott Hughes returned. If 'object' is a dictionary, it will be used to add 35*e1fe3e4aSElliott Hughes struct elements to. If it is an instance (or in fact anything 36*e1fe3e4aSElliott Hughes that has a __dict__ attribute), an attribute will be added for 37*e1fe3e4aSElliott Hughes each struct element. In the latter two cases, 'object' itself 38*e1fe3e4aSElliott Hughes is returned. 39*e1fe3e4aSElliott Hughes 40*e1fe3e4aSElliott Hughesunpack2(fmt, data, object=None) 41*e1fe3e4aSElliott Hughes Convenience function. Same as unpack, except data may be longer 42*e1fe3e4aSElliott Hughes than needed. The returned value is a tuple: (object, leftoverdata). 43*e1fe3e4aSElliott Hughes 44*e1fe3e4aSElliott Hughescalcsize(fmt) 45*e1fe3e4aSElliott Hughes like struct.calcsize(), but uses our own fmt strings: 46*e1fe3e4aSElliott Hughes it returns the size of the data in bytes. 47*e1fe3e4aSElliott Hughes""" 48*e1fe3e4aSElliott Hughes 49*e1fe3e4aSElliott Hughesfrom fontTools.misc.fixedTools import fixedToFloat as fi2fl, floatToFixed as fl2fi 50*e1fe3e4aSElliott Hughesfrom fontTools.misc.textTools import tobytes, tostr 51*e1fe3e4aSElliott Hughesimport struct 52*e1fe3e4aSElliott Hughesimport re 53*e1fe3e4aSElliott Hughes 54*e1fe3e4aSElliott Hughes__version__ = "1.2" 55*e1fe3e4aSElliott Hughes__copyright__ = "Copyright 1998, Just van Rossum <[email protected]>" 56*e1fe3e4aSElliott Hughes 57*e1fe3e4aSElliott Hughes 58*e1fe3e4aSElliott Hughesclass Error(Exception): 59*e1fe3e4aSElliott Hughes pass 60*e1fe3e4aSElliott Hughes 61*e1fe3e4aSElliott Hughes 62*e1fe3e4aSElliott Hughesdef pack(fmt, obj): 63*e1fe3e4aSElliott Hughes formatstring, names, fixes = getformat(fmt, keep_pad_byte=True) 64*e1fe3e4aSElliott Hughes elements = [] 65*e1fe3e4aSElliott Hughes if not isinstance(obj, dict): 66*e1fe3e4aSElliott Hughes obj = obj.__dict__ 67*e1fe3e4aSElliott Hughes for name in names: 68*e1fe3e4aSElliott Hughes value = obj[name] 69*e1fe3e4aSElliott Hughes if name in fixes: 70*e1fe3e4aSElliott Hughes # fixed point conversion 71*e1fe3e4aSElliott Hughes value = fl2fi(value, fixes[name]) 72*e1fe3e4aSElliott Hughes elif isinstance(value, str): 73*e1fe3e4aSElliott Hughes value = tobytes(value) 74*e1fe3e4aSElliott Hughes elements.append(value) 75*e1fe3e4aSElliott Hughes data = struct.pack(*(formatstring,) + tuple(elements)) 76*e1fe3e4aSElliott Hughes return data 77*e1fe3e4aSElliott Hughes 78*e1fe3e4aSElliott Hughes 79*e1fe3e4aSElliott Hughesdef unpack(fmt, data, obj=None): 80*e1fe3e4aSElliott Hughes if obj is None: 81*e1fe3e4aSElliott Hughes obj = {} 82*e1fe3e4aSElliott Hughes data = tobytes(data) 83*e1fe3e4aSElliott Hughes formatstring, names, fixes = getformat(fmt) 84*e1fe3e4aSElliott Hughes if isinstance(obj, dict): 85*e1fe3e4aSElliott Hughes d = obj 86*e1fe3e4aSElliott Hughes else: 87*e1fe3e4aSElliott Hughes d = obj.__dict__ 88*e1fe3e4aSElliott Hughes elements = struct.unpack(formatstring, data) 89*e1fe3e4aSElliott Hughes for i in range(len(names)): 90*e1fe3e4aSElliott Hughes name = names[i] 91*e1fe3e4aSElliott Hughes value = elements[i] 92*e1fe3e4aSElliott Hughes if name in fixes: 93*e1fe3e4aSElliott Hughes # fixed point conversion 94*e1fe3e4aSElliott Hughes value = fi2fl(value, fixes[name]) 95*e1fe3e4aSElliott Hughes elif isinstance(value, bytes): 96*e1fe3e4aSElliott Hughes try: 97*e1fe3e4aSElliott Hughes value = tostr(value) 98*e1fe3e4aSElliott Hughes except UnicodeDecodeError: 99*e1fe3e4aSElliott Hughes pass 100*e1fe3e4aSElliott Hughes d[name] = value 101*e1fe3e4aSElliott Hughes return obj 102*e1fe3e4aSElliott Hughes 103*e1fe3e4aSElliott Hughes 104*e1fe3e4aSElliott Hughesdef unpack2(fmt, data, obj=None): 105*e1fe3e4aSElliott Hughes length = calcsize(fmt) 106*e1fe3e4aSElliott Hughes return unpack(fmt, data[:length], obj), data[length:] 107*e1fe3e4aSElliott Hughes 108*e1fe3e4aSElliott Hughes 109*e1fe3e4aSElliott Hughesdef calcsize(fmt): 110*e1fe3e4aSElliott Hughes formatstring, names, fixes = getformat(fmt) 111*e1fe3e4aSElliott Hughes return struct.calcsize(formatstring) 112*e1fe3e4aSElliott Hughes 113*e1fe3e4aSElliott Hughes 114*e1fe3e4aSElliott Hughes# matches "name:formatchar" (whitespace is allowed) 115*e1fe3e4aSElliott Hughes_elementRE = re.compile( 116*e1fe3e4aSElliott Hughes r"\s*" # whitespace 117*e1fe3e4aSElliott Hughes r"([A-Za-z_][A-Za-z_0-9]*)" # name (python identifier) 118*e1fe3e4aSElliott Hughes r"\s*:\s*" # whitespace : whitespace 119*e1fe3e4aSElliott Hughes r"([xcbB?hHiIlLqQfd]|" # formatchar... 120*e1fe3e4aSElliott Hughes r"[0-9]+[ps]|" # ...formatchar... 121*e1fe3e4aSElliott Hughes r"([0-9]+)\.([0-9]+)(F))" # ...formatchar 122*e1fe3e4aSElliott Hughes r"\s*" # whitespace 123*e1fe3e4aSElliott Hughes r"(#.*)?$" # [comment] + end of string 124*e1fe3e4aSElliott Hughes) 125*e1fe3e4aSElliott Hughes 126*e1fe3e4aSElliott Hughes# matches the special struct fmt chars and 'x' (pad byte) 127*e1fe3e4aSElliott Hughes_extraRE = re.compile(r"\s*([x@=<>!])\s*(#.*)?$") 128*e1fe3e4aSElliott Hughes 129*e1fe3e4aSElliott Hughes# matches an "empty" string, possibly containing whitespace and/or a comment 130*e1fe3e4aSElliott Hughes_emptyRE = re.compile(r"\s*(#.*)?$") 131*e1fe3e4aSElliott Hughes 132*e1fe3e4aSElliott Hughes_fixedpointmappings = {8: "b", 16: "h", 32: "l"} 133*e1fe3e4aSElliott Hughes 134*e1fe3e4aSElliott Hughes_formatcache = {} 135*e1fe3e4aSElliott Hughes 136*e1fe3e4aSElliott Hughes 137*e1fe3e4aSElliott Hughesdef getformat(fmt, keep_pad_byte=False): 138*e1fe3e4aSElliott Hughes fmt = tostr(fmt, encoding="ascii") 139*e1fe3e4aSElliott Hughes try: 140*e1fe3e4aSElliott Hughes formatstring, names, fixes = _formatcache[fmt] 141*e1fe3e4aSElliott Hughes except KeyError: 142*e1fe3e4aSElliott Hughes lines = re.split("[\n;]", fmt) 143*e1fe3e4aSElliott Hughes formatstring = "" 144*e1fe3e4aSElliott Hughes names = [] 145*e1fe3e4aSElliott Hughes fixes = {} 146*e1fe3e4aSElliott Hughes for line in lines: 147*e1fe3e4aSElliott Hughes if _emptyRE.match(line): 148*e1fe3e4aSElliott Hughes continue 149*e1fe3e4aSElliott Hughes m = _extraRE.match(line) 150*e1fe3e4aSElliott Hughes if m: 151*e1fe3e4aSElliott Hughes formatchar = m.group(1) 152*e1fe3e4aSElliott Hughes if formatchar != "x" and formatstring: 153*e1fe3e4aSElliott Hughes raise Error("a special fmt char must be first") 154*e1fe3e4aSElliott Hughes else: 155*e1fe3e4aSElliott Hughes m = _elementRE.match(line) 156*e1fe3e4aSElliott Hughes if not m: 157*e1fe3e4aSElliott Hughes raise Error("syntax error in fmt: '%s'" % line) 158*e1fe3e4aSElliott Hughes name = m.group(1) 159*e1fe3e4aSElliott Hughes formatchar = m.group(2) 160*e1fe3e4aSElliott Hughes if keep_pad_byte or formatchar != "x": 161*e1fe3e4aSElliott Hughes names.append(name) 162*e1fe3e4aSElliott Hughes if m.group(3): 163*e1fe3e4aSElliott Hughes # fixed point 164*e1fe3e4aSElliott Hughes before = int(m.group(3)) 165*e1fe3e4aSElliott Hughes after = int(m.group(4)) 166*e1fe3e4aSElliott Hughes bits = before + after 167*e1fe3e4aSElliott Hughes if bits not in [8, 16, 32]: 168*e1fe3e4aSElliott Hughes raise Error("fixed point must be 8, 16 or 32 bits long") 169*e1fe3e4aSElliott Hughes formatchar = _fixedpointmappings[bits] 170*e1fe3e4aSElliott Hughes assert m.group(5) == "F" 171*e1fe3e4aSElliott Hughes fixes[name] = after 172*e1fe3e4aSElliott Hughes formatstring = formatstring + formatchar 173*e1fe3e4aSElliott Hughes _formatcache[fmt] = formatstring, names, fixes 174*e1fe3e4aSElliott Hughes return formatstring, names, fixes 175*e1fe3e4aSElliott Hughes 176*e1fe3e4aSElliott Hughes 177*e1fe3e4aSElliott Hughesdef _test(): 178*e1fe3e4aSElliott Hughes fmt = """ 179*e1fe3e4aSElliott Hughes # comments are allowed 180*e1fe3e4aSElliott Hughes > # big endian (see documentation for struct) 181*e1fe3e4aSElliott Hughes # empty lines are allowed: 182*e1fe3e4aSElliott Hughes 183*e1fe3e4aSElliott Hughes ashort: h 184*e1fe3e4aSElliott Hughes along: l 185*e1fe3e4aSElliott Hughes abyte: b # a byte 186*e1fe3e4aSElliott Hughes achar: c 187*e1fe3e4aSElliott Hughes astr: 5s 188*e1fe3e4aSElliott Hughes afloat: f; adouble: d # multiple "statements" are allowed 189*e1fe3e4aSElliott Hughes afixed: 16.16F 190*e1fe3e4aSElliott Hughes abool: ? 191*e1fe3e4aSElliott Hughes apad: x 192*e1fe3e4aSElliott Hughes """ 193*e1fe3e4aSElliott Hughes 194*e1fe3e4aSElliott Hughes print("size:", calcsize(fmt)) 195*e1fe3e4aSElliott Hughes 196*e1fe3e4aSElliott Hughes class foo(object): 197*e1fe3e4aSElliott Hughes pass 198*e1fe3e4aSElliott Hughes 199*e1fe3e4aSElliott Hughes i = foo() 200*e1fe3e4aSElliott Hughes 201*e1fe3e4aSElliott Hughes i.ashort = 0x7FFF 202*e1fe3e4aSElliott Hughes i.along = 0x7FFFFFFF 203*e1fe3e4aSElliott Hughes i.abyte = 0x7F 204*e1fe3e4aSElliott Hughes i.achar = "a" 205*e1fe3e4aSElliott Hughes i.astr = "12345" 206*e1fe3e4aSElliott Hughes i.afloat = 0.5 207*e1fe3e4aSElliott Hughes i.adouble = 0.5 208*e1fe3e4aSElliott Hughes i.afixed = 1.5 209*e1fe3e4aSElliott Hughes i.abool = True 210*e1fe3e4aSElliott Hughes 211*e1fe3e4aSElliott Hughes data = pack(fmt, i) 212*e1fe3e4aSElliott Hughes print("data:", repr(data)) 213*e1fe3e4aSElliott Hughes print(unpack(fmt, data)) 214*e1fe3e4aSElliott Hughes i2 = foo() 215*e1fe3e4aSElliott Hughes unpack(fmt, data, i2) 216*e1fe3e4aSElliott Hughes print(vars(i2)) 217*e1fe3e4aSElliott Hughes 218*e1fe3e4aSElliott Hughes 219*e1fe3e4aSElliott Hughesif __name__ == "__main__": 220*e1fe3e4aSElliott Hughes _test() 221