1# -*- coding: utf-8 -*- 2from fontTools.misc import sstruct 3from fontTools.misc.textTools import ( 4 bytechr, 5 byteord, 6 bytesjoin, 7 strjoin, 8 tobytes, 9 tostr, 10 safeEval, 11) 12from fontTools.misc.encodingTools import getEncoding 13from fontTools.ttLib import newTable 14from fontTools.ttLib.ttVisitor import TTVisitor 15from fontTools import ttLib 16import fontTools.ttLib.tables.otTables as otTables 17from fontTools.ttLib.tables import C_P_A_L_ 18from . import DefaultTable 19import struct 20import logging 21 22 23log = logging.getLogger(__name__) 24 25nameRecordFormat = """ 26 > # big endian 27 platformID: H 28 platEncID: H 29 langID: H 30 nameID: H 31 length: H 32 offset: H 33""" 34 35nameRecordSize = sstruct.calcsize(nameRecordFormat) 36 37 38class table__n_a_m_e(DefaultTable.DefaultTable): 39 dependencies = ["ltag"] 40 41 def decompile(self, data, ttFont): 42 format, n, stringOffset = struct.unpack(b">HHH", data[:6]) 43 expectedStringOffset = 6 + n * nameRecordSize 44 if stringOffset != expectedStringOffset: 45 log.error( 46 "'name' table stringOffset incorrect. Expected: %s; Actual: %s", 47 expectedStringOffset, 48 stringOffset, 49 ) 50 stringData = data[stringOffset:] 51 data = data[6:] 52 self.names = [] 53 for i in range(n): 54 if len(data) < 12: 55 log.error("skipping malformed name record #%d", i) 56 continue 57 name, data = sstruct.unpack2(nameRecordFormat, data, NameRecord()) 58 name.string = stringData[name.offset : name.offset + name.length] 59 if name.offset + name.length > len(stringData): 60 log.error("skipping malformed name record #%d", i) 61 continue 62 assert len(name.string) == name.length 63 # if (name.platEncID, name.platformID) in ((0, 0), (1, 3)): 64 # if len(name.string) % 2: 65 # print "2-byte string doesn't have even length!" 66 # print name.__dict__ 67 del name.offset, name.length 68 self.names.append(name) 69 70 def compile(self, ttFont): 71 if not hasattr(self, "names"): 72 # only happens when there are NO name table entries read 73 # from the TTX file 74 self.names = [] 75 names = self.names 76 names.sort() # sort according to the spec; see NameRecord.__lt__() 77 stringData = b"" 78 format = 0 79 n = len(names) 80 stringOffset = 6 + n * sstruct.calcsize(nameRecordFormat) 81 data = struct.pack(b">HHH", format, n, stringOffset) 82 lastoffset = 0 83 done = {} # remember the data so we can reuse the "pointers" 84 for name in names: 85 string = name.toBytes() 86 if string in done: 87 name.offset, name.length = done[string] 88 else: 89 name.offset, name.length = done[string] = len(stringData), len(string) 90 stringData = bytesjoin([stringData, string]) 91 data = data + sstruct.pack(nameRecordFormat, name) 92 return data + stringData 93 94 def toXML(self, writer, ttFont): 95 for name in self.names: 96 name.toXML(writer, ttFont) 97 98 def fromXML(self, name, attrs, content, ttFont): 99 if name != "namerecord": 100 return # ignore unknown tags 101 if not hasattr(self, "names"): 102 self.names = [] 103 name = NameRecord() 104 self.names.append(name) 105 name.fromXML(name, attrs, content, ttFont) 106 107 def getName(self, nameID, platformID, platEncID, langID=None): 108 for namerecord in self.names: 109 if ( 110 namerecord.nameID == nameID 111 and namerecord.platformID == platformID 112 and namerecord.platEncID == platEncID 113 ): 114 if langID is None or namerecord.langID == langID: 115 return namerecord 116 return None # not found 117 118 def getDebugName(self, nameID): 119 englishName = someName = None 120 for name in self.names: 121 if name.nameID != nameID: 122 continue 123 try: 124 unistr = name.toUnicode() 125 except UnicodeDecodeError: 126 continue 127 128 someName = unistr 129 if (name.platformID, name.langID) in ((1, 0), (3, 0x409)): 130 englishName = unistr 131 break 132 if englishName: 133 return englishName 134 elif someName: 135 return someName 136 else: 137 return None 138 139 def getFirstDebugName(self, nameIDs): 140 for nameID in nameIDs: 141 name = self.getDebugName(nameID) 142 if name is not None: 143 return name 144 return None 145 146 def getBestFamilyName(self): 147 # 21 = WWS Family Name 148 # 16 = Typographic Family Name 149 # 1 = Family Name 150 return self.getFirstDebugName((21, 16, 1)) 151 152 def getBestSubFamilyName(self): 153 # 22 = WWS SubFamily Name 154 # 17 = Typographic SubFamily Name 155 # 2 = SubFamily Name 156 return self.getFirstDebugName((22, 17, 2)) 157 158 def getBestFullName(self): 159 # 4 = Full Name 160 # 6 = PostScript Name 161 for nameIDs in ((21, 22), (16, 17), (1, 2), (4,), (6,)): 162 if len(nameIDs) == 2: 163 name_fam = self.getDebugName(nameIDs[0]) 164 name_subfam = self.getDebugName(nameIDs[1]) 165 if None in [name_fam, name_subfam]: 166 continue # if any is None, skip 167 name = f"{name_fam} {name_subfam}" 168 if name_subfam.lower() == "regular": 169 name = f"{name_fam}" 170 return name 171 else: 172 name = self.getDebugName(nameIDs[0]) 173 if name is not None: 174 return name 175 return None 176 177 def setName(self, string, nameID, platformID, platEncID, langID): 178 """Set the 'string' for the name record identified by 'nameID', 'platformID', 179 'platEncID' and 'langID'. If a record with that nameID doesn't exist, create it 180 and append to the name table. 181 182 'string' can be of type `str` (`unicode` in PY2) or `bytes`. In the latter case, 183 it is assumed to be already encoded with the correct plaform-specific encoding 184 identified by the (platformID, platEncID, langID) triplet. A warning is issued 185 to prevent unexpected results. 186 """ 187 if not hasattr(self, "names"): 188 self.names = [] 189 if not isinstance(string, str): 190 if isinstance(string, bytes): 191 log.warning( 192 "name string is bytes, ensure it's correctly encoded: %r", string 193 ) 194 else: 195 raise TypeError( 196 "expected unicode or bytes, found %s: %r" 197 % (type(string).__name__, string) 198 ) 199 namerecord = self.getName(nameID, platformID, platEncID, langID) 200 if namerecord: 201 namerecord.string = string 202 else: 203 self.names.append(makeName(string, nameID, platformID, platEncID, langID)) 204 205 def removeNames(self, nameID=None, platformID=None, platEncID=None, langID=None): 206 """Remove any name records identified by the given combination of 'nameID', 207 'platformID', 'platEncID' and 'langID'. 208 """ 209 args = { 210 argName: argValue 211 for argName, argValue in ( 212 ("nameID", nameID), 213 ("platformID", platformID), 214 ("platEncID", platEncID), 215 ("langID", langID), 216 ) 217 if argValue is not None 218 } 219 if not args: 220 # no arguments, nothing to do 221 return 222 self.names = [ 223 rec 224 for rec in self.names 225 if any( 226 argValue != getattr(rec, argName) for argName, argValue in args.items() 227 ) 228 ] 229 230 @staticmethod 231 def removeUnusedNames(ttFont): 232 """Remove any name records which are not in NameID range 0-255 and not utilized 233 within the font itself.""" 234 visitor = NameRecordVisitor() 235 visitor.visit(ttFont) 236 toDelete = set() 237 for record in ttFont["name"].names: 238 # Name IDs 26 to 255, inclusive, are reserved for future standard names. 239 # https://learn.microsoft.com/en-us/typography/opentype/spec/name#name-ids 240 if record.nameID < 256: 241 continue 242 if record.nameID not in visitor.seen: 243 toDelete.add(record.nameID) 244 245 for nameID in toDelete: 246 ttFont["name"].removeNames(nameID) 247 return toDelete 248 249 def _findUnusedNameID(self, minNameID=256): 250 """Finds an unused name id. 251 252 The nameID is assigned in the range between 'minNameID' and 32767 (inclusive), 253 following the last nameID in the name table. 254 """ 255 names = getattr(self, "names", []) 256 nameID = 1 + max([n.nameID for n in names] + [minNameID - 1]) 257 if nameID > 32767: 258 raise ValueError("nameID must be less than 32768") 259 return nameID 260 261 def findMultilingualName( 262 self, names, windows=True, mac=True, minNameID=0, ttFont=None 263 ): 264 """Return the name ID of an existing multilingual name that 265 matches the 'names' dictionary, or None if not found. 266 267 'names' is a dictionary with the name in multiple languages, 268 such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}. 269 The keys can be arbitrary IETF BCP 47 language codes; 270 the values are Unicode strings. 271 272 If 'windows' is True, the returned name ID is guaranteed 273 exist for all requested languages for platformID=3 and 274 platEncID=1. 275 If 'mac' is True, the returned name ID is guaranteed to exist 276 for all requested languages for platformID=1 and platEncID=0. 277 278 The returned name ID will not be less than the 'minNameID' 279 argument. 280 """ 281 # Gather the set of requested 282 # (string, platformID, platEncID, langID) 283 # tuples 284 reqNameSet = set() 285 for lang, name in sorted(names.items()): 286 if windows: 287 windowsName = _makeWindowsName(name, None, lang) 288 if windowsName is not None: 289 reqNameSet.add( 290 ( 291 windowsName.string, 292 windowsName.platformID, 293 windowsName.platEncID, 294 windowsName.langID, 295 ) 296 ) 297 if mac: 298 macName = _makeMacName(name, None, lang, ttFont) 299 if macName is not None: 300 reqNameSet.add( 301 ( 302 macName.string, 303 macName.platformID, 304 macName.platEncID, 305 macName.langID, 306 ) 307 ) 308 309 # Collect matching name IDs 310 matchingNames = dict() 311 for name in self.names: 312 try: 313 key = (name.toUnicode(), name.platformID, name.platEncID, name.langID) 314 except UnicodeDecodeError: 315 continue 316 if key in reqNameSet and name.nameID >= minNameID: 317 nameSet = matchingNames.setdefault(name.nameID, set()) 318 nameSet.add(key) 319 320 # Return the first name ID that defines all requested strings 321 for nameID, nameSet in sorted(matchingNames.items()): 322 if nameSet == reqNameSet: 323 return nameID 324 325 return None # not found 326 327 def addMultilingualName( 328 self, names, ttFont=None, nameID=None, windows=True, mac=True, minNameID=0 329 ): 330 """Add a multilingual name, returning its name ID 331 332 'names' is a dictionary with the name in multiple languages, 333 such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}. 334 The keys can be arbitrary IETF BCP 47 language codes; 335 the values are Unicode strings. 336 337 'ttFont' is the TTFont to which the names are added, or None. 338 If present, the font's 'ltag' table can get populated 339 to store exotic language codes, which allows encoding 340 names that otherwise cannot get encoded at all. 341 342 'nameID' is the name ID to be used, or None to let the library 343 find an existing set of name records that match, or pick an 344 unused name ID. 345 346 If 'windows' is True, a platformID=3 name record will be added. 347 If 'mac' is True, a platformID=1 name record will be added. 348 349 If the 'nameID' argument is None, the created nameID will not 350 be less than the 'minNameID' argument. 351 """ 352 if not hasattr(self, "names"): 353 self.names = [] 354 if nameID is None: 355 # Reuse nameID if possible 356 nameID = self.findMultilingualName( 357 names, windows=windows, mac=mac, minNameID=minNameID, ttFont=ttFont 358 ) 359 if nameID is not None: 360 return nameID 361 nameID = self._findUnusedNameID() 362 # TODO: Should minimize BCP 47 language codes. 363 # https://github.com/fonttools/fonttools/issues/930 364 for lang, name in sorted(names.items()): 365 if windows: 366 windowsName = _makeWindowsName(name, nameID, lang) 367 if windowsName is not None: 368 self.names.append(windowsName) 369 else: 370 # We cannot not make a Windows name: make sure we add a 371 # Mac name as a fallback. This can happen for exotic 372 # BCP47 language tags that have no Windows language code. 373 mac = True 374 if mac: 375 macName = _makeMacName(name, nameID, lang, ttFont) 376 if macName is not None: 377 self.names.append(macName) 378 return nameID 379 380 def addName(self, string, platforms=((1, 0, 0), (3, 1, 0x409)), minNameID=255): 381 """Add a new name record containing 'string' for each (platformID, platEncID, 382 langID) tuple specified in the 'platforms' list. 383 384 The nameID is assigned in the range between 'minNameID'+1 and 32767 (inclusive), 385 following the last nameID in the name table. 386 If no 'platforms' are specified, two English name records are added, one for the 387 Macintosh (platformID=0), and one for the Windows platform (3). 388 389 The 'string' must be a Unicode string, so it can be encoded with different, 390 platform-specific encodings. 391 392 Return the new nameID. 393 """ 394 assert ( 395 len(platforms) > 0 396 ), "'platforms' must contain at least one (platformID, platEncID, langID) tuple" 397 if not hasattr(self, "names"): 398 self.names = [] 399 if not isinstance(string, str): 400 raise TypeError( 401 "expected str, found %s: %r" % (type(string).__name__, string) 402 ) 403 nameID = self._findUnusedNameID(minNameID + 1) 404 for platformID, platEncID, langID in platforms: 405 self.names.append(makeName(string, nameID, platformID, platEncID, langID)) 406 return nameID 407 408 409def makeName(string, nameID, platformID, platEncID, langID): 410 name = NameRecord() 411 name.string, name.nameID, name.platformID, name.platEncID, name.langID = ( 412 string, 413 nameID, 414 platformID, 415 platEncID, 416 langID, 417 ) 418 return name 419 420 421def _makeWindowsName(name, nameID, language): 422 """Create a NameRecord for the Microsoft Windows platform 423 424 'language' is an arbitrary IETF BCP 47 language identifier such 425 as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. If Microsoft Windows 426 does not support the desired language, the result will be None. 427 Future versions of fonttools might return a NameRecord for the 428 OpenType 'name' table format 1, but this is not implemented yet. 429 """ 430 langID = _WINDOWS_LANGUAGE_CODES.get(language.lower()) 431 if langID is not None: 432 return makeName(name, nameID, 3, 1, langID) 433 else: 434 log.warning( 435 "cannot add Windows name in language %s " 436 "because fonttools does not yet support " 437 "name table format 1" % language 438 ) 439 return None 440 441 442def _makeMacName(name, nameID, language, font=None): 443 """Create a NameRecord for Apple platforms 444 445 'language' is an arbitrary IETF BCP 47 language identifier such 446 as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. When possible, we 447 create a Macintosh NameRecord that is understood by old applications 448 (platform ID 1 and an old-style Macintosh language enum). If this 449 is not possible, we create a Unicode NameRecord (platform ID 0) 450 whose language points to the font’s 'ltag' table. The latter 451 can encode any string in any language, but legacy applications 452 might not recognize the format (in which case they will ignore 453 those names). 454 455 'font' should be the TTFont for which you want to create a name. 456 If 'font' is None, we only return NameRecords for legacy Macintosh; 457 in that case, the result will be None for names that need to 458 be encoded with an 'ltag' table. 459 460 See the section “The language identifier” in Apple’s specification: 461 https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html 462 """ 463 macLang = _MAC_LANGUAGE_CODES.get(language.lower()) 464 macScript = _MAC_LANGUAGE_TO_SCRIPT.get(macLang) 465 if macLang is not None and macScript is not None: 466 encoding = getEncoding(1, macScript, macLang, default="ascii") 467 # Check if we can actually encode this name. If we can't, 468 # for example because we have no support for the legacy 469 # encoding, or because the name string contains Unicode 470 # characters that the legacy encoding cannot represent, 471 # we fall back to encoding the name in Unicode and put 472 # the language tag into the ltag table. 473 try: 474 _ = tobytes(name, encoding, errors="strict") 475 return makeName(name, nameID, 1, macScript, macLang) 476 except UnicodeEncodeError: 477 pass 478 if font is not None: 479 ltag = font.tables.get("ltag") 480 if ltag is None: 481 ltag = font["ltag"] = newTable("ltag") 482 # 0 = Unicode; 4 = “Unicode 2.0 or later semantics (non-BMP characters allowed)” 483 # “The preferred platform-specific code for Unicode would be 3 or 4.” 484 # https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html 485 return makeName(name, nameID, 0, 4, ltag.addTag(language)) 486 else: 487 log.warning( 488 "cannot store language %s into 'ltag' table " 489 "without having access to the TTFont object" % language 490 ) 491 return None 492 493 494class NameRecord(object): 495 def getEncoding(self, default="ascii"): 496 """Returns the Python encoding name for this name entry based on its platformID, 497 platEncID, and langID. If encoding for these values is not known, by default 498 'ascii' is returned. That can be overriden by passing a value to the default 499 argument. 500 """ 501 return getEncoding(self.platformID, self.platEncID, self.langID, default) 502 503 def encodingIsUnicodeCompatible(self): 504 return self.getEncoding(None) in ["utf_16_be", "ucs2be", "ascii", "latin1"] 505 506 def __str__(self): 507 return self.toStr(errors="backslashreplace") 508 509 def isUnicode(self): 510 return self.platformID == 0 or ( 511 self.platformID == 3 and self.platEncID in [0, 1, 10] 512 ) 513 514 def toUnicode(self, errors="strict"): 515 """ 516 If self.string is a Unicode string, return it; otherwise try decoding the 517 bytes in self.string to a Unicode string using the encoding of this 518 entry as returned by self.getEncoding(); Note that self.getEncoding() 519 returns 'ascii' if the encoding is unknown to the library. 520 521 Certain heuristics are performed to recover data from bytes that are 522 ill-formed in the chosen encoding, or that otherwise look misencoded 523 (mostly around bad UTF-16BE encoded bytes, or bytes that look like UTF-16BE 524 but marked otherwise). If the bytes are ill-formed and the heuristics fail, 525 the error is handled according to the errors parameter to this function, which is 526 passed to the underlying decode() function; by default it throws a 527 UnicodeDecodeError exception. 528 529 Note: The mentioned heuristics mean that roundtripping a font to XML and back 530 to binary might recover some misencoded data whereas just loading the font 531 and saving it back will not change them. 532 """ 533 534 def isascii(b): 535 return (b >= 0x20 and b <= 0x7E) or b in [0x09, 0x0A, 0x0D] 536 537 encoding = self.getEncoding() 538 string = self.string 539 540 if ( 541 isinstance(string, bytes) 542 and encoding == "utf_16_be" 543 and len(string) % 2 == 1 544 ): 545 # Recover badly encoded UTF-16 strings that have an odd number of bytes: 546 # - If the last byte is zero, drop it. Otherwise, 547 # - If all the odd bytes are zero and all the even bytes are ASCII, 548 # prepend one zero byte. Otherwise, 549 # - If first byte is zero and all other bytes are ASCII, insert zero 550 # bytes between consecutive ASCII bytes. 551 # 552 # (Yes, I've seen all of these in the wild... sigh) 553 if byteord(string[-1]) == 0: 554 string = string[:-1] 555 elif all( 556 byteord(b) == 0 if i % 2 else isascii(byteord(b)) 557 for i, b in enumerate(string) 558 ): 559 string = b"\0" + string 560 elif byteord(string[0]) == 0 and all( 561 isascii(byteord(b)) for b in string[1:] 562 ): 563 string = bytesjoin(b"\0" + bytechr(byteord(b)) for b in string[1:]) 564 565 string = tostr(string, encoding=encoding, errors=errors) 566 567 # If decoded strings still looks like UTF-16BE, it suggests a double-encoding. 568 # Fix it up. 569 if all( 570 ord(c) == 0 if i % 2 == 0 else isascii(ord(c)) for i, c in enumerate(string) 571 ): 572 # If string claims to be Mac encoding, but looks like UTF-16BE with ASCII text, 573 # narrow it down. 574 string = "".join(c for c in string[1::2]) 575 576 return string 577 578 def toBytes(self, errors="strict"): 579 """If self.string is a bytes object, return it; otherwise try encoding 580 the Unicode string in self.string to bytes using the encoding of this 581 entry as returned by self.getEncoding(); Note that self.getEncoding() 582 returns 'ascii' if the encoding is unknown to the library. 583 584 If the Unicode string cannot be encoded to bytes in the chosen encoding, 585 the error is handled according to the errors parameter to this function, 586 which is passed to the underlying encode() function; by default it throws a 587 UnicodeEncodeError exception. 588 """ 589 return tobytes(self.string, encoding=self.getEncoding(), errors=errors) 590 591 toStr = toUnicode 592 593 def toXML(self, writer, ttFont): 594 try: 595 unistr = self.toUnicode() 596 except UnicodeDecodeError: 597 unistr = None 598 attrs = [ 599 ("nameID", self.nameID), 600 ("platformID", self.platformID), 601 ("platEncID", self.platEncID), 602 ("langID", hex(self.langID)), 603 ] 604 605 if unistr is None or not self.encodingIsUnicodeCompatible(): 606 attrs.append(("unicode", unistr is not None)) 607 608 writer.begintag("namerecord", attrs) 609 writer.newline() 610 if unistr is not None: 611 writer.write(unistr) 612 else: 613 writer.write8bit(self.string) 614 writer.newline() 615 writer.endtag("namerecord") 616 writer.newline() 617 618 def fromXML(self, name, attrs, content, ttFont): 619 self.nameID = safeEval(attrs["nameID"]) 620 self.platformID = safeEval(attrs["platformID"]) 621 self.platEncID = safeEval(attrs["platEncID"]) 622 self.langID = safeEval(attrs["langID"]) 623 s = strjoin(content).strip() 624 encoding = self.getEncoding() 625 if self.encodingIsUnicodeCompatible() or safeEval( 626 attrs.get("unicode", "False") 627 ): 628 self.string = s.encode(encoding) 629 else: 630 # This is the inverse of write8bit... 631 self.string = s.encode("latin1") 632 633 def __lt__(self, other): 634 if type(self) != type(other): 635 return NotImplemented 636 637 try: 638 selfTuple = ( 639 self.platformID, 640 self.platEncID, 641 self.langID, 642 self.nameID, 643 ) 644 otherTuple = ( 645 other.platformID, 646 other.platEncID, 647 other.langID, 648 other.nameID, 649 ) 650 except AttributeError: 651 # This can only happen for 652 # 1) an object that is not a NameRecord, or 653 # 2) an unlikely incomplete NameRecord object which has not been 654 # fully populated 655 return NotImplemented 656 657 try: 658 # Include the actual NameRecord string in the comparison tuples 659 selfTuple = selfTuple + (self.toBytes(),) 660 otherTuple = otherTuple + (other.toBytes(),) 661 except UnicodeEncodeError as e: 662 # toBytes caused an encoding error in either of the two, so content 663 # to sorting based on IDs only 664 log.error("NameRecord sorting failed to encode: %s" % e) 665 666 # Implemented so that list.sort() sorts according to the spec by using 667 # the order of the tuple items and their comparison 668 return selfTuple < otherTuple 669 670 def __repr__(self): 671 return "<NameRecord NameID=%d; PlatformID=%d; LanguageID=%d>" % ( 672 self.nameID, 673 self.platformID, 674 self.langID, 675 ) 676 677 678# Windows language ID → IETF BCP-47 language tag 679# 680# While Microsoft indicates a region/country for all its language 681# IDs, we follow Unicode practice by omitting “most likely subtags” 682# as per Unicode CLDR. For example, English is simply “en” and not 683# “en-Latn” because according to Unicode, the default script 684# for English is Latin. 685# 686# http://www.unicode.org/cldr/charts/latest/supplemental/likely_subtags.html 687# http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry 688_WINDOWS_LANGUAGES = { 689 0x0436: "af", 690 0x041C: "sq", 691 0x0484: "gsw", 692 0x045E: "am", 693 0x1401: "ar-DZ", 694 0x3C01: "ar-BH", 695 0x0C01: "ar", 696 0x0801: "ar-IQ", 697 0x2C01: "ar-JO", 698 0x3401: "ar-KW", 699 0x3001: "ar-LB", 700 0x1001: "ar-LY", 701 0x1801: "ary", 702 0x2001: "ar-OM", 703 0x4001: "ar-QA", 704 0x0401: "ar-SA", 705 0x2801: "ar-SY", 706 0x1C01: "aeb", 707 0x3801: "ar-AE", 708 0x2401: "ar-YE", 709 0x042B: "hy", 710 0x044D: "as", 711 0x082C: "az-Cyrl", 712 0x042C: "az", 713 0x046D: "ba", 714 0x042D: "eu", 715 0x0423: "be", 716 0x0845: "bn", 717 0x0445: "bn-IN", 718 0x201A: "bs-Cyrl", 719 0x141A: "bs", 720 0x047E: "br", 721 0x0402: "bg", 722 0x0403: "ca", 723 0x0C04: "zh-HK", 724 0x1404: "zh-MO", 725 0x0804: "zh", 726 0x1004: "zh-SG", 727 0x0404: "zh-TW", 728 0x0483: "co", 729 0x041A: "hr", 730 0x101A: "hr-BA", 731 0x0405: "cs", 732 0x0406: "da", 733 0x048C: "prs", 734 0x0465: "dv", 735 0x0813: "nl-BE", 736 0x0413: "nl", 737 0x0C09: "en-AU", 738 0x2809: "en-BZ", 739 0x1009: "en-CA", 740 0x2409: "en-029", 741 0x4009: "en-IN", 742 0x1809: "en-IE", 743 0x2009: "en-JM", 744 0x4409: "en-MY", 745 0x1409: "en-NZ", 746 0x3409: "en-PH", 747 0x4809: "en-SG", 748 0x1C09: "en-ZA", 749 0x2C09: "en-TT", 750 0x0809: "en-GB", 751 0x0409: "en", 752 0x3009: "en-ZW", 753 0x0425: "et", 754 0x0438: "fo", 755 0x0464: "fil", 756 0x040B: "fi", 757 0x080C: "fr-BE", 758 0x0C0C: "fr-CA", 759 0x040C: "fr", 760 0x140C: "fr-LU", 761 0x180C: "fr-MC", 762 0x100C: "fr-CH", 763 0x0462: "fy", 764 0x0456: "gl", 765 0x0437: "ka", 766 0x0C07: "de-AT", 767 0x0407: "de", 768 0x1407: "de-LI", 769 0x1007: "de-LU", 770 0x0807: "de-CH", 771 0x0408: "el", 772 0x046F: "kl", 773 0x0447: "gu", 774 0x0468: "ha", 775 0x040D: "he", 776 0x0439: "hi", 777 0x040E: "hu", 778 0x040F: "is", 779 0x0470: "ig", 780 0x0421: "id", 781 0x045D: "iu", 782 0x085D: "iu-Latn", 783 0x083C: "ga", 784 0x0434: "xh", 785 0x0435: "zu", 786 0x0410: "it", 787 0x0810: "it-CH", 788 0x0411: "ja", 789 0x044B: "kn", 790 0x043F: "kk", 791 0x0453: "km", 792 0x0486: "quc", 793 0x0487: "rw", 794 0x0441: "sw", 795 0x0457: "kok", 796 0x0412: "ko", 797 0x0440: "ky", 798 0x0454: "lo", 799 0x0426: "lv", 800 0x0427: "lt", 801 0x082E: "dsb", 802 0x046E: "lb", 803 0x042F: "mk", 804 0x083E: "ms-BN", 805 0x043E: "ms", 806 0x044C: "ml", 807 0x043A: "mt", 808 0x0481: "mi", 809 0x047A: "arn", 810 0x044E: "mr", 811 0x047C: "moh", 812 0x0450: "mn", 813 0x0850: "mn-CN", 814 0x0461: "ne", 815 0x0414: "nb", 816 0x0814: "nn", 817 0x0482: "oc", 818 0x0448: "or", 819 0x0463: "ps", 820 0x0415: "pl", 821 0x0416: "pt", 822 0x0816: "pt-PT", 823 0x0446: "pa", 824 0x046B: "qu-BO", 825 0x086B: "qu-EC", 826 0x0C6B: "qu", 827 0x0418: "ro", 828 0x0417: "rm", 829 0x0419: "ru", 830 0x243B: "smn", 831 0x103B: "smj-NO", 832 0x143B: "smj", 833 0x0C3B: "se-FI", 834 0x043B: "se", 835 0x083B: "se-SE", 836 0x203B: "sms", 837 0x183B: "sma-NO", 838 0x1C3B: "sms", 839 0x044F: "sa", 840 0x1C1A: "sr-Cyrl-BA", 841 0x0C1A: "sr", 842 0x181A: "sr-Latn-BA", 843 0x081A: "sr-Latn", 844 0x046C: "nso", 845 0x0432: "tn", 846 0x045B: "si", 847 0x041B: "sk", 848 0x0424: "sl", 849 0x2C0A: "es-AR", 850 0x400A: "es-BO", 851 0x340A: "es-CL", 852 0x240A: "es-CO", 853 0x140A: "es-CR", 854 0x1C0A: "es-DO", 855 0x300A: "es-EC", 856 0x440A: "es-SV", 857 0x100A: "es-GT", 858 0x480A: "es-HN", 859 0x080A: "es-MX", 860 0x4C0A: "es-NI", 861 0x180A: "es-PA", 862 0x3C0A: "es-PY", 863 0x280A: "es-PE", 864 0x500A: "es-PR", 865 # Microsoft has defined two different language codes for 866 # “Spanish with modern sorting” and “Spanish with traditional 867 # sorting”. This makes sense for collation APIs, and it would be 868 # possible to express this in BCP 47 language tags via Unicode 869 # extensions (eg., “es-u-co-trad” is “Spanish with traditional 870 # sorting”). However, for storing names in fonts, this distinction 871 # does not make sense, so we use “es” in both cases. 872 0x0C0A: "es", 873 0x040A: "es", 874 0x540A: "es-US", 875 0x380A: "es-UY", 876 0x200A: "es-VE", 877 0x081D: "sv-FI", 878 0x041D: "sv", 879 0x045A: "syr", 880 0x0428: "tg", 881 0x085F: "tzm", 882 0x0449: "ta", 883 0x0444: "tt", 884 0x044A: "te", 885 0x041E: "th", 886 0x0451: "bo", 887 0x041F: "tr", 888 0x0442: "tk", 889 0x0480: "ug", 890 0x0422: "uk", 891 0x042E: "hsb", 892 0x0420: "ur", 893 0x0843: "uz-Cyrl", 894 0x0443: "uz", 895 0x042A: "vi", 896 0x0452: "cy", 897 0x0488: "wo", 898 0x0485: "sah", 899 0x0478: "ii", 900 0x046A: "yo", 901} 902 903 904_MAC_LANGUAGES = { 905 0: "en", 906 1: "fr", 907 2: "de", 908 3: "it", 909 4: "nl", 910 5: "sv", 911 6: "es", 912 7: "da", 913 8: "pt", 914 9: "no", 915 10: "he", 916 11: "ja", 917 12: "ar", 918 13: "fi", 919 14: "el", 920 15: "is", 921 16: "mt", 922 17: "tr", 923 18: "hr", 924 19: "zh-Hant", 925 20: "ur", 926 21: "hi", 927 22: "th", 928 23: "ko", 929 24: "lt", 930 25: "pl", 931 26: "hu", 932 27: "es", 933 28: "lv", 934 29: "se", 935 30: "fo", 936 31: "fa", 937 32: "ru", 938 33: "zh", 939 34: "nl-BE", 940 35: "ga", 941 36: "sq", 942 37: "ro", 943 38: "cz", 944 39: "sk", 945 40: "sl", 946 41: "yi", 947 42: "sr", 948 43: "mk", 949 44: "bg", 950 45: "uk", 951 46: "be", 952 47: "uz", 953 48: "kk", 954 49: "az-Cyrl", 955 50: "az-Arab", 956 51: "hy", 957 52: "ka", 958 53: "mo", 959 54: "ky", 960 55: "tg", 961 56: "tk", 962 57: "mn-CN", 963 58: "mn", 964 59: "ps", 965 60: "ks", 966 61: "ku", 967 62: "sd", 968 63: "bo", 969 64: "ne", 970 65: "sa", 971 66: "mr", 972 67: "bn", 973 68: "as", 974 69: "gu", 975 70: "pa", 976 71: "or", 977 72: "ml", 978 73: "kn", 979 74: "ta", 980 75: "te", 981 76: "si", 982 77: "my", 983 78: "km", 984 79: "lo", 985 80: "vi", 986 81: "id", 987 82: "tl", 988 83: "ms", 989 84: "ms-Arab", 990 85: "am", 991 86: "ti", 992 87: "om", 993 88: "so", 994 89: "sw", 995 90: "rw", 996 91: "rn", 997 92: "ny", 998 93: "mg", 999 94: "eo", 1000 128: "cy", 1001 129: "eu", 1002 130: "ca", 1003 131: "la", 1004 132: "qu", 1005 133: "gn", 1006 134: "ay", 1007 135: "tt", 1008 136: "ug", 1009 137: "dz", 1010 138: "jv", 1011 139: "su", 1012 140: "gl", 1013 141: "af", 1014 142: "br", 1015 143: "iu", 1016 144: "gd", 1017 145: "gv", 1018 146: "ga", 1019 147: "to", 1020 148: "el-polyton", 1021 149: "kl", 1022 150: "az", 1023 151: "nn", 1024} 1025 1026 1027_WINDOWS_LANGUAGE_CODES = { 1028 lang.lower(): code for code, lang in _WINDOWS_LANGUAGES.items() 1029} 1030_MAC_LANGUAGE_CODES = {lang.lower(): code for code, lang in _MAC_LANGUAGES.items()} 1031 1032 1033# MacOS language ID → MacOS script ID 1034# 1035# Note that the script ID is not sufficient to determine what encoding 1036# to use in TrueType files. For some languages, MacOS used a modification 1037# of a mainstream script. For example, an Icelandic name would be stored 1038# with smRoman in the TrueType naming table, but the actual encoding 1039# is a special Icelandic version of the normal Macintosh Roman encoding. 1040# As another example, Inuktitut uses an 8-bit encoding for Canadian Aboriginal 1041# Syllables but MacOS had run out of available script codes, so this was 1042# done as a (pretty radical) “modification” of Ethiopic. 1043# 1044# http://unicode.org/Public/MAPPINGS/VENDORS/APPLE/Readme.txt 1045_MAC_LANGUAGE_TO_SCRIPT = { 1046 0: 0, # langEnglish → smRoman 1047 1: 0, # langFrench → smRoman 1048 2: 0, # langGerman → smRoman 1049 3: 0, # langItalian → smRoman 1050 4: 0, # langDutch → smRoman 1051 5: 0, # langSwedish → smRoman 1052 6: 0, # langSpanish → smRoman 1053 7: 0, # langDanish → smRoman 1054 8: 0, # langPortuguese → smRoman 1055 9: 0, # langNorwegian → smRoman 1056 10: 5, # langHebrew → smHebrew 1057 11: 1, # langJapanese → smJapanese 1058 12: 4, # langArabic → smArabic 1059 13: 0, # langFinnish → smRoman 1060 14: 6, # langGreek → smGreek 1061 15: 0, # langIcelandic → smRoman (modified) 1062 16: 0, # langMaltese → smRoman 1063 17: 0, # langTurkish → smRoman (modified) 1064 18: 0, # langCroatian → smRoman (modified) 1065 19: 2, # langTradChinese → smTradChinese 1066 20: 4, # langUrdu → smArabic 1067 21: 9, # langHindi → smDevanagari 1068 22: 21, # langThai → smThai 1069 23: 3, # langKorean → smKorean 1070 24: 29, # langLithuanian → smCentralEuroRoman 1071 25: 29, # langPolish → smCentralEuroRoman 1072 26: 29, # langHungarian → smCentralEuroRoman 1073 27: 29, # langEstonian → smCentralEuroRoman 1074 28: 29, # langLatvian → smCentralEuroRoman 1075 29: 0, # langSami → smRoman 1076 30: 0, # langFaroese → smRoman (modified) 1077 31: 4, # langFarsi → smArabic (modified) 1078 32: 7, # langRussian → smCyrillic 1079 33: 25, # langSimpChinese → smSimpChinese 1080 34: 0, # langFlemish → smRoman 1081 35: 0, # langIrishGaelic → smRoman (modified) 1082 36: 0, # langAlbanian → smRoman 1083 37: 0, # langRomanian → smRoman (modified) 1084 38: 29, # langCzech → smCentralEuroRoman 1085 39: 29, # langSlovak → smCentralEuroRoman 1086 40: 0, # langSlovenian → smRoman (modified) 1087 41: 5, # langYiddish → smHebrew 1088 42: 7, # langSerbian → smCyrillic 1089 43: 7, # langMacedonian → smCyrillic 1090 44: 7, # langBulgarian → smCyrillic 1091 45: 7, # langUkrainian → smCyrillic (modified) 1092 46: 7, # langByelorussian → smCyrillic 1093 47: 7, # langUzbek → smCyrillic 1094 48: 7, # langKazakh → smCyrillic 1095 49: 7, # langAzerbaijani → smCyrillic 1096 50: 4, # langAzerbaijanAr → smArabic 1097 51: 24, # langArmenian → smArmenian 1098 52: 23, # langGeorgian → smGeorgian 1099 53: 7, # langMoldavian → smCyrillic 1100 54: 7, # langKirghiz → smCyrillic 1101 55: 7, # langTajiki → smCyrillic 1102 56: 7, # langTurkmen → smCyrillic 1103 57: 27, # langMongolian → smMongolian 1104 58: 7, # langMongolianCyr → smCyrillic 1105 59: 4, # langPashto → smArabic 1106 60: 4, # langKurdish → smArabic 1107 61: 4, # langKashmiri → smArabic 1108 62: 4, # langSindhi → smArabic 1109 63: 26, # langTibetan → smTibetan 1110 64: 9, # langNepali → smDevanagari 1111 65: 9, # langSanskrit → smDevanagari 1112 66: 9, # langMarathi → smDevanagari 1113 67: 13, # langBengali → smBengali 1114 68: 13, # langAssamese → smBengali 1115 69: 11, # langGujarati → smGujarati 1116 70: 10, # langPunjabi → smGurmukhi 1117 71: 12, # langOriya → smOriya 1118 72: 17, # langMalayalam → smMalayalam 1119 73: 16, # langKannada → smKannada 1120 74: 14, # langTamil → smTamil 1121 75: 15, # langTelugu → smTelugu 1122 76: 18, # langSinhalese → smSinhalese 1123 77: 19, # langBurmese → smBurmese 1124 78: 20, # langKhmer → smKhmer 1125 79: 22, # langLao → smLao 1126 80: 30, # langVietnamese → smVietnamese 1127 81: 0, # langIndonesian → smRoman 1128 82: 0, # langTagalog → smRoman 1129 83: 0, # langMalayRoman → smRoman 1130 84: 4, # langMalayArabic → smArabic 1131 85: 28, # langAmharic → smEthiopic 1132 86: 28, # langTigrinya → smEthiopic 1133 87: 28, # langOromo → smEthiopic 1134 88: 0, # langSomali → smRoman 1135 89: 0, # langSwahili → smRoman 1136 90: 0, # langKinyarwanda → smRoman 1137 91: 0, # langRundi → smRoman 1138 92: 0, # langNyanja → smRoman 1139 93: 0, # langMalagasy → smRoman 1140 94: 0, # langEsperanto → smRoman 1141 128: 0, # langWelsh → smRoman (modified) 1142 129: 0, # langBasque → smRoman 1143 130: 0, # langCatalan → smRoman 1144 131: 0, # langLatin → smRoman 1145 132: 0, # langQuechua → smRoman 1146 133: 0, # langGuarani → smRoman 1147 134: 0, # langAymara → smRoman 1148 135: 7, # langTatar → smCyrillic 1149 136: 4, # langUighur → smArabic 1150 137: 26, # langDzongkha → smTibetan 1151 138: 0, # langJavaneseRom → smRoman 1152 139: 0, # langSundaneseRom → smRoman 1153 140: 0, # langGalician → smRoman 1154 141: 0, # langAfrikaans → smRoman 1155 142: 0, # langBreton → smRoman (modified) 1156 143: 28, # langInuktitut → smEthiopic (modified) 1157 144: 0, # langScottishGaelic → smRoman (modified) 1158 145: 0, # langManxGaelic → smRoman (modified) 1159 146: 0, # langIrishGaelicScript → smRoman (modified) 1160 147: 0, # langTongan → smRoman 1161 148: 6, # langGreekAncient → smRoman 1162 149: 0, # langGreenlandic → smRoman 1163 150: 0, # langAzerbaijanRoman → smRoman 1164 151: 0, # langNynorsk → smRoman 1165} 1166 1167 1168class NameRecordVisitor(TTVisitor): 1169 # Font tables that have NameIDs we need to collect. 1170 TABLES = ("GSUB", "GPOS", "fvar", "CPAL", "STAT") 1171 1172 def __init__(self): 1173 self.seen = set() 1174 1175 1176@NameRecordVisitor.register_attrs( 1177 ( 1178 (otTables.FeatureParamsSize, ("SubfamilyID", "SubfamilyNameID")), 1179 (otTables.FeatureParamsStylisticSet, ("UINameID",)), 1180 ( 1181 otTables.FeatureParamsCharacterVariants, 1182 ( 1183 "FeatUILabelNameID", 1184 "FeatUITooltipTextNameID", 1185 "SampleTextNameID", 1186 "FirstParamUILabelNameID", 1187 ), 1188 ), 1189 (otTables.STAT, ("ElidedFallbackNameID",)), 1190 (otTables.AxisRecord, ("AxisNameID",)), 1191 (otTables.AxisValue, ("ValueNameID",)), 1192 (otTables.FeatureName, ("FeatureNameID",)), 1193 (otTables.Setting, ("SettingNameID",)), 1194 ) 1195) 1196def visit(visitor, obj, attr, value): 1197 visitor.seen.add(value) 1198 1199 1200@NameRecordVisitor.register(ttLib.getTableClass("fvar")) 1201def visit(visitor, obj): 1202 for inst in obj.instances: 1203 if inst.postscriptNameID != 0xFFFF: 1204 visitor.seen.add(inst.postscriptNameID) 1205 visitor.seen.add(inst.subfamilyNameID) 1206 1207 for axis in obj.axes: 1208 visitor.seen.add(axis.axisNameID) 1209 1210 1211@NameRecordVisitor.register(ttLib.getTableClass("CPAL")) 1212def visit(visitor, obj): 1213 if obj.version == 1: 1214 visitor.seen.update(obj.paletteLabels) 1215 visitor.seen.update(obj.paletteEntryLabels) 1216 1217 1218@NameRecordVisitor.register(ttLib.TTFont) 1219def visit(visitor, font, *args, **kwargs): 1220 if hasattr(visitor, "font"): 1221 return False 1222 1223 visitor.font = font 1224 for tag in visitor.TABLES: 1225 if tag in font: 1226 visitor.visit(font[tag], *args, **kwargs) 1227 del visitor.font 1228 return False 1229