1"""Module for reading TFM (TeX Font Metrics) files. 2 3The TFM format is described in the TFtoPL WEB source code, whose typeset form 4can be found on `CTAN <http://mirrors.ctan.org/info/knuth-pdf/texware/tftopl.pdf>`_. 5 6 >>> from fontTools.tfmLib import TFM 7 >>> tfm = TFM("Tests/tfmLib/data/cmr10.tfm") 8 >>> 9 >>> # Accessing an attribute gets you metadata. 10 >>> tfm.checksum 11 1274110073 12 >>> tfm.designsize 13 10.0 14 >>> tfm.codingscheme 15 'TeX text' 16 >>> tfm.family 17 'CMR' 18 >>> tfm.seven_bit_safe_flag 19 False 20 >>> tfm.face 21 234 22 >>> tfm.extraheader 23 {} 24 >>> tfm.fontdimens 25 {'SLANT': 0.0, 'SPACE': 0.33333396911621094, 'STRETCH': 0.16666698455810547, 'SHRINK': 0.11111164093017578, 'XHEIGHT': 0.4305553436279297, 'QUAD': 1.0000028610229492, 'EXTRASPACE': 0.11111164093017578} 26 >>> # Accessing a character gets you its metrics. 27 >>> # “width” is always available, other metrics are available only when 28 >>> # applicable. All values are relative to “designsize”. 29 >>> tfm.chars[ord("g")] 30 {'width': 0.5000019073486328, 'height': 0.4305553436279297, 'depth': 0.1944446563720703, 'italic': 0.013888359069824219} 31 >>> # Kerning and ligature can be accessed as well. 32 >>> tfm.kerning[ord("c")] 33 {104: -0.02777862548828125, 107: -0.02777862548828125} 34 >>> tfm.ligatures[ord("f")] 35 {105: ('LIG', 12), 102: ('LIG', 11), 108: ('LIG', 13)} 36""" 37 38from types import SimpleNamespace 39 40from fontTools.misc.sstruct import calcsize, unpack, unpack2 41 42SIZES_FORMAT = """ 43 > 44 lf: h # length of the entire file, in words 45 lh: h # length of the header data, in words 46 bc: h # smallest character code in the font 47 ec: h # largest character code in the font 48 nw: h # number of words in the width table 49 nh: h # number of words in the height table 50 nd: h # number of words in the depth table 51 ni: h # number of words in the italic correction table 52 nl: h # number of words in the ligature/kern table 53 nk: h # number of words in the kern table 54 ne: h # number of words in the extensible character table 55 np: h # number of font parameter words 56""" 57 58SIZES_SIZE = calcsize(SIZES_FORMAT) 59 60FIXED_FORMAT = "12.20F" 61 62HEADER_FORMAT1 = f""" 63 > 64 checksum: L 65 designsize: {FIXED_FORMAT} 66""" 67 68HEADER_FORMAT2 = f""" 69 {HEADER_FORMAT1} 70 codingscheme: 40p 71""" 72 73HEADER_FORMAT3 = f""" 74 {HEADER_FORMAT2} 75 family: 20p 76""" 77 78HEADER_FORMAT4 = f""" 79 {HEADER_FORMAT3} 80 seven_bit_safe_flag: ? 81 ignored: x 82 ignored: x 83 face: B 84""" 85 86HEADER_SIZE1 = calcsize(HEADER_FORMAT1) 87HEADER_SIZE2 = calcsize(HEADER_FORMAT2) 88HEADER_SIZE3 = calcsize(HEADER_FORMAT3) 89HEADER_SIZE4 = calcsize(HEADER_FORMAT4) 90 91LIG_KERN_COMMAND = """ 92 > 93 skip_byte: B 94 next_char: B 95 op_byte: B 96 remainder: B 97""" 98 99BASE_PARAMS = [ 100 "SLANT", 101 "SPACE", 102 "STRETCH", 103 "SHRINK", 104 "XHEIGHT", 105 "QUAD", 106 "EXTRASPACE", 107] 108 109MATHSY_PARAMS = [ 110 "NUM1", 111 "NUM2", 112 "NUM3", 113 "DENOM1", 114 "DENOM2", 115 "SUP1", 116 "SUP2", 117 "SUP3", 118 "SUB1", 119 "SUB2", 120 "SUPDROP", 121 "SUBDROP", 122 "DELIM1", 123 "DELIM2", 124 "AXISHEIGHT", 125] 126 127MATHEX_PARAMS = [ 128 "DEFAULTRULETHICKNESS", 129 "BIGOPSPACING1", 130 "BIGOPSPACING2", 131 "BIGOPSPACING3", 132 "BIGOPSPACING4", 133 "BIGOPSPACING5", 134] 135 136VANILLA = 0 137MATHSY = 1 138MATHEX = 2 139 140UNREACHABLE = 0 141PASSTHROUGH = 1 142ACCESSABLE = 2 143 144NO_TAG = 0 145LIG_TAG = 1 146LIST_TAG = 2 147EXT_TAG = 3 148 149STOP_FLAG = 128 150KERN_FLAG = 128 151 152 153class TFMException(Exception): 154 def __init__(self, message): 155 super().__init__(message) 156 157 158class TFM: 159 def __init__(self, file): 160 self._read(file) 161 162 def __repr__(self): 163 return ( 164 f"<TFM" 165 f" for {self.family}" 166 f" in {self.codingscheme}" 167 f" at {self.designsize:g}pt>" 168 ) 169 170 def _read(self, file): 171 if hasattr(file, "read"): 172 data = file.read() 173 else: 174 with open(file, "rb") as fp: 175 data = fp.read() 176 177 self._data = data 178 179 if len(data) < SIZES_SIZE: 180 raise TFMException("Too short input file") 181 182 sizes = SimpleNamespace() 183 unpack2(SIZES_FORMAT, data, sizes) 184 185 # Do some file structure sanity checks. 186 # TeX and TFtoPL do additional functional checks and might even correct 187 # “errors” in the input file, but we instead try to output the file as 188 # it is as long as it is parsable, even if the data make no sense. 189 190 if sizes.lf < 0: 191 raise TFMException("The file claims to have negative or zero length!") 192 193 if len(data) < sizes.lf * 4: 194 raise TFMException("The file has fewer bytes than it claims!") 195 196 for name, length in vars(sizes).items(): 197 if length < 0: 198 raise TFMException("The subfile size: '{name}' is negative!") 199 200 if sizes.lh < 2: 201 raise TFMException(f"The header length is only {sizes.lh}!") 202 203 if sizes.bc > sizes.ec + 1 or sizes.ec > 255: 204 raise TFMException( 205 f"The character code range {sizes.bc}..{sizes.ec} is illegal!" 206 ) 207 208 if sizes.nw == 0 or sizes.nh == 0 or sizes.nd == 0 or sizes.ni == 0: 209 raise TFMException("Incomplete subfiles for character dimensions!") 210 211 if sizes.ne > 256: 212 raise TFMException(f"There are {ne} extensible recipes!") 213 214 if sizes.lf != ( 215 6 216 + sizes.lh 217 + (sizes.ec - sizes.bc + 1) 218 + sizes.nw 219 + sizes.nh 220 + sizes.nd 221 + sizes.ni 222 + sizes.nl 223 + sizes.nk 224 + sizes.ne 225 + sizes.np 226 ): 227 raise TFMException("Subfile sizes don’t add up to the stated total") 228 229 # Subfile offsets, used in the helper function below. These all are 230 # 32-bit word offsets not 8-bit byte offsets. 231 char_base = 6 + sizes.lh - sizes.bc 232 width_base = char_base + sizes.ec + 1 233 height_base = width_base + sizes.nw 234 depth_base = height_base + sizes.nh 235 italic_base = depth_base + sizes.nd 236 lig_kern_base = italic_base + sizes.ni 237 kern_base = lig_kern_base + sizes.nl 238 exten_base = kern_base + sizes.nk 239 param_base = exten_base + sizes.ne 240 241 # Helper functions for accessing individual data. If this looks 242 # nonidiomatic Python, I blame the effect of reading the literate WEB 243 # documentation of TFtoPL. 244 def char_info(c): 245 return 4 * (char_base + c) 246 247 def width_index(c): 248 return data[char_info(c)] 249 250 def noneexistent(c): 251 return c < sizes.bc or c > sizes.ec or width_index(c) == 0 252 253 def height_index(c): 254 return data[char_info(c) + 1] // 16 255 256 def depth_index(c): 257 return data[char_info(c) + 1] % 16 258 259 def italic_index(c): 260 return data[char_info(c) + 2] // 4 261 262 def tag(c): 263 return data[char_info(c) + 2] % 4 264 265 def remainder(c): 266 return data[char_info(c) + 3] 267 268 def width(c): 269 r = 4 * (width_base + width_index(c)) 270 return read_fixed(r, "v")["v"] 271 272 def height(c): 273 r = 4 * (height_base + height_index(c)) 274 return read_fixed(r, "v")["v"] 275 276 def depth(c): 277 r = 4 * (depth_base + depth_index(c)) 278 return read_fixed(r, "v")["v"] 279 280 def italic(c): 281 r = 4 * (italic_base + italic_index(c)) 282 return read_fixed(r, "v")["v"] 283 284 def exten(c): 285 return 4 * (exten_base + remainder(c)) 286 287 def lig_step(i): 288 return 4 * (lig_kern_base + i) 289 290 def lig_kern_command(i): 291 command = SimpleNamespace() 292 unpack2(LIG_KERN_COMMAND, data[i:], command) 293 return command 294 295 def kern(i): 296 r = 4 * (kern_base + i) 297 return read_fixed(r, "v")["v"] 298 299 def param(i): 300 return 4 * (param_base + i) 301 302 def read_fixed(index, key, obj=None): 303 ret = unpack2(f">;{key}:{FIXED_FORMAT}", data[index:], obj) 304 return ret[0] 305 306 # Set all attributes to empty values regardless of the header size. 307 unpack(HEADER_FORMAT4, [0] * HEADER_SIZE4, self) 308 309 offset = 24 310 length = sizes.lh * 4 311 self.extraheader = {} 312 if length >= HEADER_SIZE4: 313 rest = unpack2(HEADER_FORMAT4, data[offset:], self)[1] 314 if self.face < 18: 315 s = self.face % 2 316 b = self.face // 2 317 self.face = "MBL"[b % 3] + "RI"[s] + "RCE"[b // 3] 318 for i in range(sizes.lh - HEADER_SIZE4 // 4): 319 rest = unpack2(f">;HEADER{i + 18}:l", rest, self.extraheader)[1] 320 elif length >= HEADER_SIZE3: 321 unpack2(HEADER_FORMAT3, data[offset:], self) 322 elif length >= HEADER_SIZE2: 323 unpack2(HEADER_FORMAT2, data[offset:], self) 324 elif length >= HEADER_SIZE1: 325 unpack2(HEADER_FORMAT1, data[offset:], self) 326 327 self.fonttype = VANILLA 328 scheme = self.codingscheme.upper() 329 if scheme.startswith("TEX MATH SY"): 330 self.fonttype = MATHSY 331 elif scheme.startswith("TEX MATH EX"): 332 self.fonttype = MATHEX 333 334 self.fontdimens = {} 335 for i in range(sizes.np): 336 name = f"PARAMETER{i+1}" 337 if i <= 6: 338 name = BASE_PARAMS[i] 339 elif self.fonttype == MATHSY and i <= 21: 340 name = MATHSY_PARAMS[i - 7] 341 elif self.fonttype == MATHEX and i <= 12: 342 name = MATHEX_PARAMS[i - 7] 343 read_fixed(param(i), name, self.fontdimens) 344 345 lig_kern_map = {} 346 self.right_boundary_char = None 347 self.left_boundary_char = None 348 if sizes.nl > 0: 349 cmd = lig_kern_command(lig_step(0)) 350 if cmd.skip_byte == 255: 351 self.right_boundary_char = cmd.next_char 352 353 cmd = lig_kern_command(lig_step((sizes.nl - 1))) 354 if cmd.skip_byte == 255: 355 self.left_boundary_char = 256 356 r = 256 * cmd.op_byte + cmd.remainder 357 lig_kern_map[self.left_boundary_char] = r 358 359 self.chars = {} 360 for c in range(sizes.bc, sizes.ec + 1): 361 if width_index(c) > 0: 362 self.chars[c] = info = {} 363 info["width"] = width(c) 364 if height_index(c) > 0: 365 info["height"] = height(c) 366 if depth_index(c) > 0: 367 info["depth"] = depth(c) 368 if italic_index(c) > 0: 369 info["italic"] = italic(c) 370 char_tag = tag(c) 371 if char_tag == NO_TAG: 372 pass 373 elif char_tag == LIG_TAG: 374 lig_kern_map[c] = remainder(c) 375 elif char_tag == LIST_TAG: 376 info["nextlarger"] = remainder(c) 377 elif char_tag == EXT_TAG: 378 info["varchar"] = varchar = {} 379 for i in range(4): 380 part = data[exten(c) + i] 381 if i == 3 or part > 0: 382 name = "rep" 383 if i == 0: 384 name = "top" 385 elif i == 1: 386 name = "mid" 387 elif i == 2: 388 name = "bot" 389 if noneexistent(part): 390 varchar[name] = c 391 else: 392 varchar[name] = part 393 394 self.ligatures = {} 395 self.kerning = {} 396 for c, i in sorted(lig_kern_map.items()): 397 cmd = lig_kern_command(lig_step(i)) 398 if cmd.skip_byte > STOP_FLAG: 399 i = 256 * cmd.op_byte + cmd.remainder 400 401 while i < sizes.nl: 402 cmd = lig_kern_command(lig_step(i)) 403 if cmd.skip_byte > STOP_FLAG: 404 pass 405 else: 406 if cmd.op_byte >= KERN_FLAG: 407 r = 256 * (cmd.op_byte - KERN_FLAG) + cmd.remainder 408 self.kerning.setdefault(c, {})[cmd.next_char] = kern(r) 409 else: 410 r = cmd.op_byte 411 if r == 4 or (r > 7 and r != 11): 412 # Ligature step with nonstandard code, we output 413 # the code verbatim. 414 lig = r 415 else: 416 lig = "" 417 if r % 4 > 1: 418 lig += "/" 419 lig += "LIG" 420 if r % 2 != 0: 421 lig += "/" 422 while r > 3: 423 lig += ">" 424 r -= 4 425 self.ligatures.setdefault(c, {})[cmd.next_char] = ( 426 lig, 427 cmd.remainder, 428 ) 429 430 if cmd.skip_byte >= STOP_FLAG: 431 break 432 i += cmd.skip_byte + 1 433 434 435if __name__ == "__main__": 436 import sys 437 438 tfm = TFM(sys.argv[1]) 439 print( 440 "\n".join( 441 x 442 for x in [ 443 f"tfm.checksum={tfm.checksum}", 444 f"tfm.designsize={tfm.designsize}", 445 f"tfm.codingscheme={tfm.codingscheme}", 446 f"tfm.fonttype={tfm.fonttype}", 447 f"tfm.family={tfm.family}", 448 f"tfm.seven_bit_safe_flag={tfm.seven_bit_safe_flag}", 449 f"tfm.face={tfm.face}", 450 f"tfm.extraheader={tfm.extraheader}", 451 f"tfm.fontdimens={tfm.fontdimens}", 452 f"tfm.right_boundary_char={tfm.right_boundary_char}", 453 f"tfm.left_boundary_char={tfm.left_boundary_char}", 454 f"tfm.kerning={tfm.kerning}", 455 f"tfm.ligatures={tfm.ligatures}", 456 f"tfm.chars={tfm.chars}", 457 ] 458 ) 459 ) 460 print(tfm) 461