xref: /aosp_15_r20/external/fonttools/Lib/fontTools/tfmLib.py (revision e1fe3e4ad2793916b15cccdc4a7da52a7e1dd0e9)
1"""Module for reading TFM (TeX Font Metrics) files.
2
3The TFM format is described in the TFtoPL WEB source code, whose typeset form
4can be found on `CTAN <http://mirrors.ctan.org/info/knuth-pdf/texware/tftopl.pdf>`_.
5
6	>>> from fontTools.tfmLib import TFM
7	>>> tfm = TFM("Tests/tfmLib/data/cmr10.tfm")
8	>>>
9	>>> # Accessing an attribute gets you metadata.
10	>>> tfm.checksum
11	1274110073
12	>>> tfm.designsize
13	10.0
14	>>> tfm.codingscheme
15	'TeX text'
16	>>> tfm.family
17	'CMR'
18	>>> tfm.seven_bit_safe_flag
19	False
20	>>> tfm.face
21	234
22	>>> tfm.extraheader
23	{}
24	>>> tfm.fontdimens
25	{'SLANT': 0.0, 'SPACE': 0.33333396911621094, 'STRETCH': 0.16666698455810547, 'SHRINK': 0.11111164093017578, 'XHEIGHT': 0.4305553436279297, 'QUAD': 1.0000028610229492, 'EXTRASPACE': 0.11111164093017578}
26	>>> # Accessing a character gets you its metrics.
27	>>> # “width” is always available, other metrics are available only when
28	>>> # applicable. All values are relative to “designsize”.
29	>>> tfm.chars[ord("g")]
30	{'width': 0.5000019073486328, 'height': 0.4305553436279297, 'depth': 0.1944446563720703, 'italic': 0.013888359069824219}
31	>>> # Kerning and ligature can be accessed as well.
32	>>> tfm.kerning[ord("c")]
33	{104: -0.02777862548828125, 107: -0.02777862548828125}
34	>>> tfm.ligatures[ord("f")]
35	{105: ('LIG', 12), 102: ('LIG', 11), 108: ('LIG', 13)}
36"""
37
38from types import SimpleNamespace
39
40from fontTools.misc.sstruct import calcsize, unpack, unpack2
41
42SIZES_FORMAT = """
43    >
44    lf: h    # length of the entire file, in words
45    lh: h    # length of the header data, in words
46    bc: h    # smallest character code in the font
47    ec: h    # largest character code in the font
48    nw: h    # number of words in the width table
49    nh: h    # number of words in the height table
50    nd: h    # number of words in the depth table
51    ni: h    # number of words in the italic correction table
52    nl: h    # number of words in the ligature/kern table
53    nk: h    # number of words in the kern table
54    ne: h    # number of words in the extensible character table
55    np: h    # number of font parameter words
56"""
57
58SIZES_SIZE = calcsize(SIZES_FORMAT)
59
60FIXED_FORMAT = "12.20F"
61
62HEADER_FORMAT1 = f"""
63    >
64    checksum:            L
65    designsize:          {FIXED_FORMAT}
66"""
67
68HEADER_FORMAT2 = f"""
69    {HEADER_FORMAT1}
70    codingscheme:        40p
71"""
72
73HEADER_FORMAT3 = f"""
74    {HEADER_FORMAT2}
75    family:              20p
76"""
77
78HEADER_FORMAT4 = f"""
79    {HEADER_FORMAT3}
80    seven_bit_safe_flag: ?
81    ignored:             x
82    ignored:             x
83    face:                B
84"""
85
86HEADER_SIZE1 = calcsize(HEADER_FORMAT1)
87HEADER_SIZE2 = calcsize(HEADER_FORMAT2)
88HEADER_SIZE3 = calcsize(HEADER_FORMAT3)
89HEADER_SIZE4 = calcsize(HEADER_FORMAT4)
90
91LIG_KERN_COMMAND = """
92    >
93    skip_byte: B
94    next_char: B
95    op_byte: B
96    remainder: B
97"""
98
99BASE_PARAMS = [
100    "SLANT",
101    "SPACE",
102    "STRETCH",
103    "SHRINK",
104    "XHEIGHT",
105    "QUAD",
106    "EXTRASPACE",
107]
108
109MATHSY_PARAMS = [
110    "NUM1",
111    "NUM2",
112    "NUM3",
113    "DENOM1",
114    "DENOM2",
115    "SUP1",
116    "SUP2",
117    "SUP3",
118    "SUB1",
119    "SUB2",
120    "SUPDROP",
121    "SUBDROP",
122    "DELIM1",
123    "DELIM2",
124    "AXISHEIGHT",
125]
126
127MATHEX_PARAMS = [
128    "DEFAULTRULETHICKNESS",
129    "BIGOPSPACING1",
130    "BIGOPSPACING2",
131    "BIGOPSPACING3",
132    "BIGOPSPACING4",
133    "BIGOPSPACING5",
134]
135
136VANILLA = 0
137MATHSY = 1
138MATHEX = 2
139
140UNREACHABLE = 0
141PASSTHROUGH = 1
142ACCESSABLE = 2
143
144NO_TAG = 0
145LIG_TAG = 1
146LIST_TAG = 2
147EXT_TAG = 3
148
149STOP_FLAG = 128
150KERN_FLAG = 128
151
152
153class TFMException(Exception):
154    def __init__(self, message):
155        super().__init__(message)
156
157
158class TFM:
159    def __init__(self, file):
160        self._read(file)
161
162    def __repr__(self):
163        return (
164            f"<TFM"
165            f" for {self.family}"
166            f" in {self.codingscheme}"
167            f" at {self.designsize:g}pt>"
168        )
169
170    def _read(self, file):
171        if hasattr(file, "read"):
172            data = file.read()
173        else:
174            with open(file, "rb") as fp:
175                data = fp.read()
176
177        self._data = data
178
179        if len(data) < SIZES_SIZE:
180            raise TFMException("Too short input file")
181
182        sizes = SimpleNamespace()
183        unpack2(SIZES_FORMAT, data, sizes)
184
185        # Do some file structure sanity checks.
186        # TeX and TFtoPL do additional functional checks and might even correct
187        # “errors” in the input file, but we instead try to output the file as
188        # it is as long as it is parsable, even if the data make no sense.
189
190        if sizes.lf < 0:
191            raise TFMException("The file claims to have negative or zero length!")
192
193        if len(data) < sizes.lf * 4:
194            raise TFMException("The file has fewer bytes than it claims!")
195
196        for name, length in vars(sizes).items():
197            if length < 0:
198                raise TFMException("The subfile size: '{name}' is negative!")
199
200        if sizes.lh < 2:
201            raise TFMException(f"The header length is only {sizes.lh}!")
202
203        if sizes.bc > sizes.ec + 1 or sizes.ec > 255:
204            raise TFMException(
205                f"The character code range {sizes.bc}..{sizes.ec} is illegal!"
206            )
207
208        if sizes.nw == 0 or sizes.nh == 0 or sizes.nd == 0 or sizes.ni == 0:
209            raise TFMException("Incomplete subfiles for character dimensions!")
210
211        if sizes.ne > 256:
212            raise TFMException(f"There are {ne} extensible recipes!")
213
214        if sizes.lf != (
215            6
216            + sizes.lh
217            + (sizes.ec - sizes.bc + 1)
218            + sizes.nw
219            + sizes.nh
220            + sizes.nd
221            + sizes.ni
222            + sizes.nl
223            + sizes.nk
224            + sizes.ne
225            + sizes.np
226        ):
227            raise TFMException("Subfile sizes don’t add up to the stated total")
228
229        # Subfile offsets, used in the helper function below. These all are
230        # 32-bit word offsets not 8-bit byte offsets.
231        char_base = 6 + sizes.lh - sizes.bc
232        width_base = char_base + sizes.ec + 1
233        height_base = width_base + sizes.nw
234        depth_base = height_base + sizes.nh
235        italic_base = depth_base + sizes.nd
236        lig_kern_base = italic_base + sizes.ni
237        kern_base = lig_kern_base + sizes.nl
238        exten_base = kern_base + sizes.nk
239        param_base = exten_base + sizes.ne
240
241        # Helper functions for accessing individual data. If this looks
242        # nonidiomatic Python, I blame the effect of reading the literate WEB
243        # documentation of TFtoPL.
244        def char_info(c):
245            return 4 * (char_base + c)
246
247        def width_index(c):
248            return data[char_info(c)]
249
250        def noneexistent(c):
251            return c < sizes.bc or c > sizes.ec or width_index(c) == 0
252
253        def height_index(c):
254            return data[char_info(c) + 1] // 16
255
256        def depth_index(c):
257            return data[char_info(c) + 1] % 16
258
259        def italic_index(c):
260            return data[char_info(c) + 2] // 4
261
262        def tag(c):
263            return data[char_info(c) + 2] % 4
264
265        def remainder(c):
266            return data[char_info(c) + 3]
267
268        def width(c):
269            r = 4 * (width_base + width_index(c))
270            return read_fixed(r, "v")["v"]
271
272        def height(c):
273            r = 4 * (height_base + height_index(c))
274            return read_fixed(r, "v")["v"]
275
276        def depth(c):
277            r = 4 * (depth_base + depth_index(c))
278            return read_fixed(r, "v")["v"]
279
280        def italic(c):
281            r = 4 * (italic_base + italic_index(c))
282            return read_fixed(r, "v")["v"]
283
284        def exten(c):
285            return 4 * (exten_base + remainder(c))
286
287        def lig_step(i):
288            return 4 * (lig_kern_base + i)
289
290        def lig_kern_command(i):
291            command = SimpleNamespace()
292            unpack2(LIG_KERN_COMMAND, data[i:], command)
293            return command
294
295        def kern(i):
296            r = 4 * (kern_base + i)
297            return read_fixed(r, "v")["v"]
298
299        def param(i):
300            return 4 * (param_base + i)
301
302        def read_fixed(index, key, obj=None):
303            ret = unpack2(f">;{key}:{FIXED_FORMAT}", data[index:], obj)
304            return ret[0]
305
306        # Set all attributes to empty values regardless of the header size.
307        unpack(HEADER_FORMAT4, [0] * HEADER_SIZE4, self)
308
309        offset = 24
310        length = sizes.lh * 4
311        self.extraheader = {}
312        if length >= HEADER_SIZE4:
313            rest = unpack2(HEADER_FORMAT4, data[offset:], self)[1]
314            if self.face < 18:
315                s = self.face % 2
316                b = self.face // 2
317                self.face = "MBL"[b % 3] + "RI"[s] + "RCE"[b // 3]
318            for i in range(sizes.lh - HEADER_SIZE4 // 4):
319                rest = unpack2(f">;HEADER{i + 18}:l", rest, self.extraheader)[1]
320        elif length >= HEADER_SIZE3:
321            unpack2(HEADER_FORMAT3, data[offset:], self)
322        elif length >= HEADER_SIZE2:
323            unpack2(HEADER_FORMAT2, data[offset:], self)
324        elif length >= HEADER_SIZE1:
325            unpack2(HEADER_FORMAT1, data[offset:], self)
326
327        self.fonttype = VANILLA
328        scheme = self.codingscheme.upper()
329        if scheme.startswith("TEX MATH SY"):
330            self.fonttype = MATHSY
331        elif scheme.startswith("TEX MATH EX"):
332            self.fonttype = MATHEX
333
334        self.fontdimens = {}
335        for i in range(sizes.np):
336            name = f"PARAMETER{i+1}"
337            if i <= 6:
338                name = BASE_PARAMS[i]
339            elif self.fonttype == MATHSY and i <= 21:
340                name = MATHSY_PARAMS[i - 7]
341            elif self.fonttype == MATHEX and i <= 12:
342                name = MATHEX_PARAMS[i - 7]
343            read_fixed(param(i), name, self.fontdimens)
344
345        lig_kern_map = {}
346        self.right_boundary_char = None
347        self.left_boundary_char = None
348        if sizes.nl > 0:
349            cmd = lig_kern_command(lig_step(0))
350            if cmd.skip_byte == 255:
351                self.right_boundary_char = cmd.next_char
352
353            cmd = lig_kern_command(lig_step((sizes.nl - 1)))
354            if cmd.skip_byte == 255:
355                self.left_boundary_char = 256
356                r = 256 * cmd.op_byte + cmd.remainder
357                lig_kern_map[self.left_boundary_char] = r
358
359        self.chars = {}
360        for c in range(sizes.bc, sizes.ec + 1):
361            if width_index(c) > 0:
362                self.chars[c] = info = {}
363                info["width"] = width(c)
364                if height_index(c) > 0:
365                    info["height"] = height(c)
366                if depth_index(c) > 0:
367                    info["depth"] = depth(c)
368                if italic_index(c) > 0:
369                    info["italic"] = italic(c)
370                char_tag = tag(c)
371                if char_tag == NO_TAG:
372                    pass
373                elif char_tag == LIG_TAG:
374                    lig_kern_map[c] = remainder(c)
375                elif char_tag == LIST_TAG:
376                    info["nextlarger"] = remainder(c)
377                elif char_tag == EXT_TAG:
378                    info["varchar"] = varchar = {}
379                    for i in range(4):
380                        part = data[exten(c) + i]
381                        if i == 3 or part > 0:
382                            name = "rep"
383                            if i == 0:
384                                name = "top"
385                            elif i == 1:
386                                name = "mid"
387                            elif i == 2:
388                                name = "bot"
389                            if noneexistent(part):
390                                varchar[name] = c
391                            else:
392                                varchar[name] = part
393
394        self.ligatures = {}
395        self.kerning = {}
396        for c, i in sorted(lig_kern_map.items()):
397            cmd = lig_kern_command(lig_step(i))
398            if cmd.skip_byte > STOP_FLAG:
399                i = 256 * cmd.op_byte + cmd.remainder
400
401            while i < sizes.nl:
402                cmd = lig_kern_command(lig_step(i))
403                if cmd.skip_byte > STOP_FLAG:
404                    pass
405                else:
406                    if cmd.op_byte >= KERN_FLAG:
407                        r = 256 * (cmd.op_byte - KERN_FLAG) + cmd.remainder
408                        self.kerning.setdefault(c, {})[cmd.next_char] = kern(r)
409                    else:
410                        r = cmd.op_byte
411                        if r == 4 or (r > 7 and r != 11):
412                            # Ligature step with nonstandard code, we output
413                            # the code verbatim.
414                            lig = r
415                        else:
416                            lig = ""
417                            if r % 4 > 1:
418                                lig += "/"
419                            lig += "LIG"
420                            if r % 2 != 0:
421                                lig += "/"
422                            while r > 3:
423                                lig += ">"
424                                r -= 4
425                        self.ligatures.setdefault(c, {})[cmd.next_char] = (
426                            lig,
427                            cmd.remainder,
428                        )
429
430                if cmd.skip_byte >= STOP_FLAG:
431                    break
432                i += cmd.skip_byte + 1
433
434
435if __name__ == "__main__":
436    import sys
437
438    tfm = TFM(sys.argv[1])
439    print(
440        "\n".join(
441            x
442            for x in [
443                f"tfm.checksum={tfm.checksum}",
444                f"tfm.designsize={tfm.designsize}",
445                f"tfm.codingscheme={tfm.codingscheme}",
446                f"tfm.fonttype={tfm.fonttype}",
447                f"tfm.family={tfm.family}",
448                f"tfm.seven_bit_safe_flag={tfm.seven_bit_safe_flag}",
449                f"tfm.face={tfm.face}",
450                f"tfm.extraheader={tfm.extraheader}",
451                f"tfm.fontdimens={tfm.fontdimens}",
452                f"tfm.right_boundary_char={tfm.right_boundary_char}",
453                f"tfm.left_boundary_char={tfm.left_boundary_char}",
454                f"tfm.kerning={tfm.kerning}",
455                f"tfm.ligatures={tfm.ligatures}",
456                f"tfm.chars={tfm.chars}",
457            ]
458        )
459    )
460    print(tfm)
461