xref: /aosp_15_r20/external/fonttools/Lib/fontTools/ttLib/tables/_n_a_m_e.py (revision e1fe3e4ad2793916b15cccdc4a7da52a7e1dd0e9)
1# -*- coding: utf-8 -*-
2from fontTools.misc import sstruct
3from fontTools.misc.textTools import (
4    bytechr,
5    byteord,
6    bytesjoin,
7    strjoin,
8    tobytes,
9    tostr,
10    safeEval,
11)
12from fontTools.misc.encodingTools import getEncoding
13from fontTools.ttLib import newTable
14from fontTools.ttLib.ttVisitor import TTVisitor
15from fontTools import ttLib
16import fontTools.ttLib.tables.otTables as otTables
17from fontTools.ttLib.tables import C_P_A_L_
18from . import DefaultTable
19import struct
20import logging
21
22
23log = logging.getLogger(__name__)
24
25nameRecordFormat = """
26		>	# big endian
27		platformID:	H
28		platEncID:	H
29		langID:		H
30		nameID:		H
31		length:		H
32		offset:		H
33"""
34
35nameRecordSize = sstruct.calcsize(nameRecordFormat)
36
37
38class table__n_a_m_e(DefaultTable.DefaultTable):
39    dependencies = ["ltag"]
40
41    def decompile(self, data, ttFont):
42        format, n, stringOffset = struct.unpack(b">HHH", data[:6])
43        expectedStringOffset = 6 + n * nameRecordSize
44        if stringOffset != expectedStringOffset:
45            log.error(
46                "'name' table stringOffset incorrect. Expected: %s; Actual: %s",
47                expectedStringOffset,
48                stringOffset,
49            )
50        stringData = data[stringOffset:]
51        data = data[6:]
52        self.names = []
53        for i in range(n):
54            if len(data) < 12:
55                log.error("skipping malformed name record #%d", i)
56                continue
57            name, data = sstruct.unpack2(nameRecordFormat, data, NameRecord())
58            name.string = stringData[name.offset : name.offset + name.length]
59            if name.offset + name.length > len(stringData):
60                log.error("skipping malformed name record #%d", i)
61                continue
62            assert len(name.string) == name.length
63            # if (name.platEncID, name.platformID) in ((0, 0), (1, 3)):
64            # 	if len(name.string) % 2:
65            # 		print "2-byte string doesn't have even length!"
66            # 		print name.__dict__
67            del name.offset, name.length
68            self.names.append(name)
69
70    def compile(self, ttFont):
71        if not hasattr(self, "names"):
72            # only happens when there are NO name table entries read
73            # from the TTX file
74            self.names = []
75        names = self.names
76        names.sort()  # sort according to the spec; see NameRecord.__lt__()
77        stringData = b""
78        format = 0
79        n = len(names)
80        stringOffset = 6 + n * sstruct.calcsize(nameRecordFormat)
81        data = struct.pack(b">HHH", format, n, stringOffset)
82        lastoffset = 0
83        done = {}  # remember the data so we can reuse the "pointers"
84        for name in names:
85            string = name.toBytes()
86            if string in done:
87                name.offset, name.length = done[string]
88            else:
89                name.offset, name.length = done[string] = len(stringData), len(string)
90                stringData = bytesjoin([stringData, string])
91            data = data + sstruct.pack(nameRecordFormat, name)
92        return data + stringData
93
94    def toXML(self, writer, ttFont):
95        for name in self.names:
96            name.toXML(writer, ttFont)
97
98    def fromXML(self, name, attrs, content, ttFont):
99        if name != "namerecord":
100            return  # ignore unknown tags
101        if not hasattr(self, "names"):
102            self.names = []
103        name = NameRecord()
104        self.names.append(name)
105        name.fromXML(name, attrs, content, ttFont)
106
107    def getName(self, nameID, platformID, platEncID, langID=None):
108        for namerecord in self.names:
109            if (
110                namerecord.nameID == nameID
111                and namerecord.platformID == platformID
112                and namerecord.platEncID == platEncID
113            ):
114                if langID is None or namerecord.langID == langID:
115                    return namerecord
116        return None  # not found
117
118    def getDebugName(self, nameID):
119        englishName = someName = None
120        for name in self.names:
121            if name.nameID != nameID:
122                continue
123            try:
124                unistr = name.toUnicode()
125            except UnicodeDecodeError:
126                continue
127
128            someName = unistr
129            if (name.platformID, name.langID) in ((1, 0), (3, 0x409)):
130                englishName = unistr
131                break
132        if englishName:
133            return englishName
134        elif someName:
135            return someName
136        else:
137            return None
138
139    def getFirstDebugName(self, nameIDs):
140        for nameID in nameIDs:
141            name = self.getDebugName(nameID)
142            if name is not None:
143                return name
144        return None
145
146    def getBestFamilyName(self):
147        # 21 = WWS Family Name
148        # 16 = Typographic Family Name
149        # 1 = Family Name
150        return self.getFirstDebugName((21, 16, 1))
151
152    def getBestSubFamilyName(self):
153        # 22 = WWS SubFamily Name
154        # 17 = Typographic SubFamily Name
155        # 2 = SubFamily Name
156        return self.getFirstDebugName((22, 17, 2))
157
158    def getBestFullName(self):
159        # 4 = Full Name
160        # 6 = PostScript Name
161        for nameIDs in ((21, 22), (16, 17), (1, 2), (4,), (6,)):
162            if len(nameIDs) == 2:
163                name_fam = self.getDebugName(nameIDs[0])
164                name_subfam = self.getDebugName(nameIDs[1])
165                if None in [name_fam, name_subfam]:
166                    continue  # if any is None, skip
167                name = f"{name_fam} {name_subfam}"
168                if name_subfam.lower() == "regular":
169                    name = f"{name_fam}"
170                return name
171            else:
172                name = self.getDebugName(nameIDs[0])
173                if name is not None:
174                    return name
175        return None
176
177    def setName(self, string, nameID, platformID, platEncID, langID):
178        """Set the 'string' for the name record identified by 'nameID', 'platformID',
179        'platEncID' and 'langID'. If a record with that nameID doesn't exist, create it
180        and append to the name table.
181
182        'string' can be of type `str` (`unicode` in PY2) or `bytes`. In the latter case,
183        it is assumed to be already encoded with the correct plaform-specific encoding
184        identified by the (platformID, platEncID, langID) triplet. A warning is issued
185        to prevent unexpected results.
186        """
187        if not hasattr(self, "names"):
188            self.names = []
189        if not isinstance(string, str):
190            if isinstance(string, bytes):
191                log.warning(
192                    "name string is bytes, ensure it's correctly encoded: %r", string
193                )
194            else:
195                raise TypeError(
196                    "expected unicode or bytes, found %s: %r"
197                    % (type(string).__name__, string)
198                )
199        namerecord = self.getName(nameID, platformID, platEncID, langID)
200        if namerecord:
201            namerecord.string = string
202        else:
203            self.names.append(makeName(string, nameID, platformID, platEncID, langID))
204
205    def removeNames(self, nameID=None, platformID=None, platEncID=None, langID=None):
206        """Remove any name records identified by the given combination of 'nameID',
207        'platformID', 'platEncID' and 'langID'.
208        """
209        args = {
210            argName: argValue
211            for argName, argValue in (
212                ("nameID", nameID),
213                ("platformID", platformID),
214                ("platEncID", platEncID),
215                ("langID", langID),
216            )
217            if argValue is not None
218        }
219        if not args:
220            # no arguments, nothing to do
221            return
222        self.names = [
223            rec
224            for rec in self.names
225            if any(
226                argValue != getattr(rec, argName) for argName, argValue in args.items()
227            )
228        ]
229
230    @staticmethod
231    def removeUnusedNames(ttFont):
232        """Remove any name records which are not in NameID range 0-255 and not utilized
233        within the font itself."""
234        visitor = NameRecordVisitor()
235        visitor.visit(ttFont)
236        toDelete = set()
237        for record in ttFont["name"].names:
238            # Name IDs 26 to 255, inclusive, are reserved for future standard names.
239            # https://learn.microsoft.com/en-us/typography/opentype/spec/name#name-ids
240            if record.nameID < 256:
241                continue
242            if record.nameID not in visitor.seen:
243                toDelete.add(record.nameID)
244
245        for nameID in toDelete:
246            ttFont["name"].removeNames(nameID)
247        return toDelete
248
249    def _findUnusedNameID(self, minNameID=256):
250        """Finds an unused name id.
251
252        The nameID is assigned in the range between 'minNameID' and 32767 (inclusive),
253        following the last nameID in the name table.
254        """
255        names = getattr(self, "names", [])
256        nameID = 1 + max([n.nameID for n in names] + [minNameID - 1])
257        if nameID > 32767:
258            raise ValueError("nameID must be less than 32768")
259        return nameID
260
261    def findMultilingualName(
262        self, names, windows=True, mac=True, minNameID=0, ttFont=None
263    ):
264        """Return the name ID of an existing multilingual name that
265        matches the 'names' dictionary, or None if not found.
266
267        'names' is a dictionary with the name in multiple languages,
268        such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}.
269        The keys can be arbitrary IETF BCP 47 language codes;
270        the values are Unicode strings.
271
272        If 'windows' is True, the returned name ID is guaranteed
273        exist for all requested languages for platformID=3 and
274        platEncID=1.
275        If 'mac' is True, the returned name ID is guaranteed to exist
276        for all requested languages for platformID=1 and platEncID=0.
277
278        The returned name ID will not be less than the 'minNameID'
279        argument.
280        """
281        # Gather the set of requested
282        #   (string, platformID, platEncID, langID)
283        # tuples
284        reqNameSet = set()
285        for lang, name in sorted(names.items()):
286            if windows:
287                windowsName = _makeWindowsName(name, None, lang)
288                if windowsName is not None:
289                    reqNameSet.add(
290                        (
291                            windowsName.string,
292                            windowsName.platformID,
293                            windowsName.platEncID,
294                            windowsName.langID,
295                        )
296                    )
297            if mac:
298                macName = _makeMacName(name, None, lang, ttFont)
299                if macName is not None:
300                    reqNameSet.add(
301                        (
302                            macName.string,
303                            macName.platformID,
304                            macName.platEncID,
305                            macName.langID,
306                        )
307                    )
308
309        # Collect matching name IDs
310        matchingNames = dict()
311        for name in self.names:
312            try:
313                key = (name.toUnicode(), name.platformID, name.platEncID, name.langID)
314            except UnicodeDecodeError:
315                continue
316            if key in reqNameSet and name.nameID >= minNameID:
317                nameSet = matchingNames.setdefault(name.nameID, set())
318                nameSet.add(key)
319
320        # Return the first name ID that defines all requested strings
321        for nameID, nameSet in sorted(matchingNames.items()):
322            if nameSet == reqNameSet:
323                return nameID
324
325        return None  # not found
326
327    def addMultilingualName(
328        self, names, ttFont=None, nameID=None, windows=True, mac=True, minNameID=0
329    ):
330        """Add a multilingual name, returning its name ID
331
332        'names' is a dictionary with the name in multiple languages,
333        such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}.
334        The keys can be arbitrary IETF BCP 47 language codes;
335        the values are Unicode strings.
336
337        'ttFont' is the TTFont to which the names are added, or None.
338        If present, the font's 'ltag' table can get populated
339        to store exotic language codes, which allows encoding
340        names that otherwise cannot get encoded at all.
341
342        'nameID' is the name ID to be used, or None to let the library
343        find an existing set of name records that match, or pick an
344        unused name ID.
345
346        If 'windows' is True, a platformID=3 name record will be added.
347        If 'mac' is True, a platformID=1 name record will be added.
348
349        If the 'nameID' argument is None, the created nameID will not
350        be less than the 'minNameID' argument.
351        """
352        if not hasattr(self, "names"):
353            self.names = []
354        if nameID is None:
355            # Reuse nameID if possible
356            nameID = self.findMultilingualName(
357                names, windows=windows, mac=mac, minNameID=minNameID, ttFont=ttFont
358            )
359            if nameID is not None:
360                return nameID
361            nameID = self._findUnusedNameID()
362        # TODO: Should minimize BCP 47 language codes.
363        # https://github.com/fonttools/fonttools/issues/930
364        for lang, name in sorted(names.items()):
365            if windows:
366                windowsName = _makeWindowsName(name, nameID, lang)
367                if windowsName is not None:
368                    self.names.append(windowsName)
369                else:
370                    # We cannot not make a Windows name: make sure we add a
371                    # Mac name as a fallback. This can happen for exotic
372                    # BCP47 language tags that have no Windows language code.
373                    mac = True
374            if mac:
375                macName = _makeMacName(name, nameID, lang, ttFont)
376                if macName is not None:
377                    self.names.append(macName)
378        return nameID
379
380    def addName(self, string, platforms=((1, 0, 0), (3, 1, 0x409)), minNameID=255):
381        """Add a new name record containing 'string' for each (platformID, platEncID,
382        langID) tuple specified in the 'platforms' list.
383
384        The nameID is assigned in the range between 'minNameID'+1 and 32767 (inclusive),
385        following the last nameID in the name table.
386        If no 'platforms' are specified, two English name records are added, one for the
387        Macintosh (platformID=0), and one for the Windows platform (3).
388
389        The 'string' must be a Unicode string, so it can be encoded with different,
390        platform-specific encodings.
391
392        Return the new nameID.
393        """
394        assert (
395            len(platforms) > 0
396        ), "'platforms' must contain at least one (platformID, platEncID, langID) tuple"
397        if not hasattr(self, "names"):
398            self.names = []
399        if not isinstance(string, str):
400            raise TypeError(
401                "expected str, found %s: %r" % (type(string).__name__, string)
402            )
403        nameID = self._findUnusedNameID(minNameID + 1)
404        for platformID, platEncID, langID in platforms:
405            self.names.append(makeName(string, nameID, platformID, platEncID, langID))
406        return nameID
407
408
409def makeName(string, nameID, platformID, platEncID, langID):
410    name = NameRecord()
411    name.string, name.nameID, name.platformID, name.platEncID, name.langID = (
412        string,
413        nameID,
414        platformID,
415        platEncID,
416        langID,
417    )
418    return name
419
420
421def _makeWindowsName(name, nameID, language):
422    """Create a NameRecord for the Microsoft Windows platform
423
424    'language' is an arbitrary IETF BCP 47 language identifier such
425    as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. If Microsoft Windows
426    does not support the desired language, the result will be None.
427    Future versions of fonttools might return a NameRecord for the
428    OpenType 'name' table format 1, but this is not implemented yet.
429    """
430    langID = _WINDOWS_LANGUAGE_CODES.get(language.lower())
431    if langID is not None:
432        return makeName(name, nameID, 3, 1, langID)
433    else:
434        log.warning(
435            "cannot add Windows name in language %s "
436            "because fonttools does not yet support "
437            "name table format 1" % language
438        )
439        return None
440
441
442def _makeMacName(name, nameID, language, font=None):
443    """Create a NameRecord for Apple platforms
444
445    'language' is an arbitrary IETF BCP 47 language identifier such
446    as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. When possible, we
447    create a Macintosh NameRecord that is understood by old applications
448    (platform ID 1 and an old-style Macintosh language enum). If this
449    is not possible, we create a Unicode NameRecord (platform ID 0)
450    whose language points to the font’s 'ltag' table. The latter
451    can encode any string in any language, but legacy applications
452    might not recognize the format (in which case they will ignore
453    those names).
454
455    'font' should be the TTFont for which you want to create a name.
456    If 'font' is None, we only return NameRecords for legacy Macintosh;
457    in that case, the result will be None for names that need to
458    be encoded with an 'ltag' table.
459
460    See the section “The language identifier” in Apple’s specification:
461    https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
462    """
463    macLang = _MAC_LANGUAGE_CODES.get(language.lower())
464    macScript = _MAC_LANGUAGE_TO_SCRIPT.get(macLang)
465    if macLang is not None and macScript is not None:
466        encoding = getEncoding(1, macScript, macLang, default="ascii")
467        # Check if we can actually encode this name. If we can't,
468        # for example because we have no support for the legacy
469        # encoding, or because the name string contains Unicode
470        # characters that the legacy encoding cannot represent,
471        # we fall back to encoding the name in Unicode and put
472        # the language tag into the ltag table.
473        try:
474            _ = tobytes(name, encoding, errors="strict")
475            return makeName(name, nameID, 1, macScript, macLang)
476        except UnicodeEncodeError:
477            pass
478    if font is not None:
479        ltag = font.tables.get("ltag")
480        if ltag is None:
481            ltag = font["ltag"] = newTable("ltag")
482        # 0 = Unicode; 4 = “Unicode 2.0 or later semantics (non-BMP characters allowed)”
483        # “The preferred platform-specific code for Unicode would be 3 or 4.”
484        # https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
485        return makeName(name, nameID, 0, 4, ltag.addTag(language))
486    else:
487        log.warning(
488            "cannot store language %s into 'ltag' table "
489            "without having access to the TTFont object" % language
490        )
491        return None
492
493
494class NameRecord(object):
495    def getEncoding(self, default="ascii"):
496        """Returns the Python encoding name for this name entry based on its platformID,
497        platEncID, and langID.  If encoding for these values is not known, by default
498        'ascii' is returned.  That can be overriden by passing a value to the default
499        argument.
500        """
501        return getEncoding(self.platformID, self.platEncID, self.langID, default)
502
503    def encodingIsUnicodeCompatible(self):
504        return self.getEncoding(None) in ["utf_16_be", "ucs2be", "ascii", "latin1"]
505
506    def __str__(self):
507        return self.toStr(errors="backslashreplace")
508
509    def isUnicode(self):
510        return self.platformID == 0 or (
511            self.platformID == 3 and self.platEncID in [0, 1, 10]
512        )
513
514    def toUnicode(self, errors="strict"):
515        """
516        If self.string is a Unicode string, return it; otherwise try decoding the
517        bytes in self.string to a Unicode string using the encoding of this
518        entry as returned by self.getEncoding(); Note that  self.getEncoding()
519        returns 'ascii' if the encoding is unknown to the library.
520
521        Certain heuristics are performed to recover data from bytes that are
522        ill-formed in the chosen encoding, or that otherwise look misencoded
523        (mostly around bad UTF-16BE encoded bytes, or bytes that look like UTF-16BE
524        but marked otherwise).  If the bytes are ill-formed and the heuristics fail,
525        the error is handled according to the errors parameter to this function, which is
526        passed to the underlying decode() function; by default it throws a
527        UnicodeDecodeError exception.
528
529        Note: The mentioned heuristics mean that roundtripping a font to XML and back
530        to binary might recover some misencoded data whereas just loading the font
531        and saving it back will not change them.
532        """
533
534        def isascii(b):
535            return (b >= 0x20 and b <= 0x7E) or b in [0x09, 0x0A, 0x0D]
536
537        encoding = self.getEncoding()
538        string = self.string
539
540        if (
541            isinstance(string, bytes)
542            and encoding == "utf_16_be"
543            and len(string) % 2 == 1
544        ):
545            # Recover badly encoded UTF-16 strings that have an odd number of bytes:
546            # - If the last byte is zero, drop it.  Otherwise,
547            # - If all the odd bytes are zero and all the even bytes are ASCII,
548            #   prepend one zero byte.  Otherwise,
549            # - If first byte is zero and all other bytes are ASCII, insert zero
550            #   bytes between consecutive ASCII bytes.
551            #
552            # (Yes, I've seen all of these in the wild... sigh)
553            if byteord(string[-1]) == 0:
554                string = string[:-1]
555            elif all(
556                byteord(b) == 0 if i % 2 else isascii(byteord(b))
557                for i, b in enumerate(string)
558            ):
559                string = b"\0" + string
560            elif byteord(string[0]) == 0 and all(
561                isascii(byteord(b)) for b in string[1:]
562            ):
563                string = bytesjoin(b"\0" + bytechr(byteord(b)) for b in string[1:])
564
565        string = tostr(string, encoding=encoding, errors=errors)
566
567        # If decoded strings still looks like UTF-16BE, it suggests a double-encoding.
568        # Fix it up.
569        if all(
570            ord(c) == 0 if i % 2 == 0 else isascii(ord(c)) for i, c in enumerate(string)
571        ):
572            # If string claims to be Mac encoding, but looks like UTF-16BE with ASCII text,
573            # narrow it down.
574            string = "".join(c for c in string[1::2])
575
576        return string
577
578    def toBytes(self, errors="strict"):
579        """If self.string is a bytes object, return it; otherwise try encoding
580        the Unicode string in self.string to bytes using the encoding of this
581        entry as returned by self.getEncoding(); Note that self.getEncoding()
582        returns 'ascii' if the encoding is unknown to the library.
583
584        If the Unicode string cannot be encoded to bytes in the chosen encoding,
585        the error is handled according to the errors parameter to this function,
586        which is passed to the underlying encode() function; by default it throws a
587        UnicodeEncodeError exception.
588        """
589        return tobytes(self.string, encoding=self.getEncoding(), errors=errors)
590
591    toStr = toUnicode
592
593    def toXML(self, writer, ttFont):
594        try:
595            unistr = self.toUnicode()
596        except UnicodeDecodeError:
597            unistr = None
598        attrs = [
599            ("nameID", self.nameID),
600            ("platformID", self.platformID),
601            ("platEncID", self.platEncID),
602            ("langID", hex(self.langID)),
603        ]
604
605        if unistr is None or not self.encodingIsUnicodeCompatible():
606            attrs.append(("unicode", unistr is not None))
607
608        writer.begintag("namerecord", attrs)
609        writer.newline()
610        if unistr is not None:
611            writer.write(unistr)
612        else:
613            writer.write8bit(self.string)
614        writer.newline()
615        writer.endtag("namerecord")
616        writer.newline()
617
618    def fromXML(self, name, attrs, content, ttFont):
619        self.nameID = safeEval(attrs["nameID"])
620        self.platformID = safeEval(attrs["platformID"])
621        self.platEncID = safeEval(attrs["platEncID"])
622        self.langID = safeEval(attrs["langID"])
623        s = strjoin(content).strip()
624        encoding = self.getEncoding()
625        if self.encodingIsUnicodeCompatible() or safeEval(
626            attrs.get("unicode", "False")
627        ):
628            self.string = s.encode(encoding)
629        else:
630            # This is the inverse of write8bit...
631            self.string = s.encode("latin1")
632
633    def __lt__(self, other):
634        if type(self) != type(other):
635            return NotImplemented
636
637        try:
638            selfTuple = (
639                self.platformID,
640                self.platEncID,
641                self.langID,
642                self.nameID,
643            )
644            otherTuple = (
645                other.platformID,
646                other.platEncID,
647                other.langID,
648                other.nameID,
649            )
650        except AttributeError:
651            # This can only happen for
652            # 1) an object that is not a NameRecord, or
653            # 2) an unlikely incomplete NameRecord object which has not been
654            #    fully populated
655            return NotImplemented
656
657        try:
658            # Include the actual NameRecord string in the comparison tuples
659            selfTuple = selfTuple + (self.toBytes(),)
660            otherTuple = otherTuple + (other.toBytes(),)
661        except UnicodeEncodeError as e:
662            # toBytes caused an encoding error in either of the two, so content
663            # to sorting based on IDs only
664            log.error("NameRecord sorting failed to encode: %s" % e)
665
666        # Implemented so that list.sort() sorts according to the spec by using
667        # the order of the tuple items and their comparison
668        return selfTuple < otherTuple
669
670    def __repr__(self):
671        return "<NameRecord NameID=%d; PlatformID=%d; LanguageID=%d>" % (
672            self.nameID,
673            self.platformID,
674            self.langID,
675        )
676
677
678# Windows language ID → IETF BCP-47 language tag
679#
680# While Microsoft indicates a region/country for all its language
681# IDs, we follow Unicode practice by omitting “most likely subtags”
682# as per Unicode CLDR. For example, English is simply “en” and not
683# “en-Latn” because according to Unicode, the default script
684# for English is Latin.
685#
686# http://www.unicode.org/cldr/charts/latest/supplemental/likely_subtags.html
687# http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
688_WINDOWS_LANGUAGES = {
689    0x0436: "af",
690    0x041C: "sq",
691    0x0484: "gsw",
692    0x045E: "am",
693    0x1401: "ar-DZ",
694    0x3C01: "ar-BH",
695    0x0C01: "ar",
696    0x0801: "ar-IQ",
697    0x2C01: "ar-JO",
698    0x3401: "ar-KW",
699    0x3001: "ar-LB",
700    0x1001: "ar-LY",
701    0x1801: "ary",
702    0x2001: "ar-OM",
703    0x4001: "ar-QA",
704    0x0401: "ar-SA",
705    0x2801: "ar-SY",
706    0x1C01: "aeb",
707    0x3801: "ar-AE",
708    0x2401: "ar-YE",
709    0x042B: "hy",
710    0x044D: "as",
711    0x082C: "az-Cyrl",
712    0x042C: "az",
713    0x046D: "ba",
714    0x042D: "eu",
715    0x0423: "be",
716    0x0845: "bn",
717    0x0445: "bn-IN",
718    0x201A: "bs-Cyrl",
719    0x141A: "bs",
720    0x047E: "br",
721    0x0402: "bg",
722    0x0403: "ca",
723    0x0C04: "zh-HK",
724    0x1404: "zh-MO",
725    0x0804: "zh",
726    0x1004: "zh-SG",
727    0x0404: "zh-TW",
728    0x0483: "co",
729    0x041A: "hr",
730    0x101A: "hr-BA",
731    0x0405: "cs",
732    0x0406: "da",
733    0x048C: "prs",
734    0x0465: "dv",
735    0x0813: "nl-BE",
736    0x0413: "nl",
737    0x0C09: "en-AU",
738    0x2809: "en-BZ",
739    0x1009: "en-CA",
740    0x2409: "en-029",
741    0x4009: "en-IN",
742    0x1809: "en-IE",
743    0x2009: "en-JM",
744    0x4409: "en-MY",
745    0x1409: "en-NZ",
746    0x3409: "en-PH",
747    0x4809: "en-SG",
748    0x1C09: "en-ZA",
749    0x2C09: "en-TT",
750    0x0809: "en-GB",
751    0x0409: "en",
752    0x3009: "en-ZW",
753    0x0425: "et",
754    0x0438: "fo",
755    0x0464: "fil",
756    0x040B: "fi",
757    0x080C: "fr-BE",
758    0x0C0C: "fr-CA",
759    0x040C: "fr",
760    0x140C: "fr-LU",
761    0x180C: "fr-MC",
762    0x100C: "fr-CH",
763    0x0462: "fy",
764    0x0456: "gl",
765    0x0437: "ka",
766    0x0C07: "de-AT",
767    0x0407: "de",
768    0x1407: "de-LI",
769    0x1007: "de-LU",
770    0x0807: "de-CH",
771    0x0408: "el",
772    0x046F: "kl",
773    0x0447: "gu",
774    0x0468: "ha",
775    0x040D: "he",
776    0x0439: "hi",
777    0x040E: "hu",
778    0x040F: "is",
779    0x0470: "ig",
780    0x0421: "id",
781    0x045D: "iu",
782    0x085D: "iu-Latn",
783    0x083C: "ga",
784    0x0434: "xh",
785    0x0435: "zu",
786    0x0410: "it",
787    0x0810: "it-CH",
788    0x0411: "ja",
789    0x044B: "kn",
790    0x043F: "kk",
791    0x0453: "km",
792    0x0486: "quc",
793    0x0487: "rw",
794    0x0441: "sw",
795    0x0457: "kok",
796    0x0412: "ko",
797    0x0440: "ky",
798    0x0454: "lo",
799    0x0426: "lv",
800    0x0427: "lt",
801    0x082E: "dsb",
802    0x046E: "lb",
803    0x042F: "mk",
804    0x083E: "ms-BN",
805    0x043E: "ms",
806    0x044C: "ml",
807    0x043A: "mt",
808    0x0481: "mi",
809    0x047A: "arn",
810    0x044E: "mr",
811    0x047C: "moh",
812    0x0450: "mn",
813    0x0850: "mn-CN",
814    0x0461: "ne",
815    0x0414: "nb",
816    0x0814: "nn",
817    0x0482: "oc",
818    0x0448: "or",
819    0x0463: "ps",
820    0x0415: "pl",
821    0x0416: "pt",
822    0x0816: "pt-PT",
823    0x0446: "pa",
824    0x046B: "qu-BO",
825    0x086B: "qu-EC",
826    0x0C6B: "qu",
827    0x0418: "ro",
828    0x0417: "rm",
829    0x0419: "ru",
830    0x243B: "smn",
831    0x103B: "smj-NO",
832    0x143B: "smj",
833    0x0C3B: "se-FI",
834    0x043B: "se",
835    0x083B: "se-SE",
836    0x203B: "sms",
837    0x183B: "sma-NO",
838    0x1C3B: "sms",
839    0x044F: "sa",
840    0x1C1A: "sr-Cyrl-BA",
841    0x0C1A: "sr",
842    0x181A: "sr-Latn-BA",
843    0x081A: "sr-Latn",
844    0x046C: "nso",
845    0x0432: "tn",
846    0x045B: "si",
847    0x041B: "sk",
848    0x0424: "sl",
849    0x2C0A: "es-AR",
850    0x400A: "es-BO",
851    0x340A: "es-CL",
852    0x240A: "es-CO",
853    0x140A: "es-CR",
854    0x1C0A: "es-DO",
855    0x300A: "es-EC",
856    0x440A: "es-SV",
857    0x100A: "es-GT",
858    0x480A: "es-HN",
859    0x080A: "es-MX",
860    0x4C0A: "es-NI",
861    0x180A: "es-PA",
862    0x3C0A: "es-PY",
863    0x280A: "es-PE",
864    0x500A: "es-PR",
865    # Microsoft has defined two different language codes for
866    # “Spanish with modern sorting” and “Spanish with traditional
867    # sorting”. This makes sense for collation APIs, and it would be
868    # possible to express this in BCP 47 language tags via Unicode
869    # extensions (eg., “es-u-co-trad” is “Spanish with traditional
870    # sorting”). However, for storing names in fonts, this distinction
871    # does not make sense, so we use “es” in both cases.
872    0x0C0A: "es",
873    0x040A: "es",
874    0x540A: "es-US",
875    0x380A: "es-UY",
876    0x200A: "es-VE",
877    0x081D: "sv-FI",
878    0x041D: "sv",
879    0x045A: "syr",
880    0x0428: "tg",
881    0x085F: "tzm",
882    0x0449: "ta",
883    0x0444: "tt",
884    0x044A: "te",
885    0x041E: "th",
886    0x0451: "bo",
887    0x041F: "tr",
888    0x0442: "tk",
889    0x0480: "ug",
890    0x0422: "uk",
891    0x042E: "hsb",
892    0x0420: "ur",
893    0x0843: "uz-Cyrl",
894    0x0443: "uz",
895    0x042A: "vi",
896    0x0452: "cy",
897    0x0488: "wo",
898    0x0485: "sah",
899    0x0478: "ii",
900    0x046A: "yo",
901}
902
903
904_MAC_LANGUAGES = {
905    0: "en",
906    1: "fr",
907    2: "de",
908    3: "it",
909    4: "nl",
910    5: "sv",
911    6: "es",
912    7: "da",
913    8: "pt",
914    9: "no",
915    10: "he",
916    11: "ja",
917    12: "ar",
918    13: "fi",
919    14: "el",
920    15: "is",
921    16: "mt",
922    17: "tr",
923    18: "hr",
924    19: "zh-Hant",
925    20: "ur",
926    21: "hi",
927    22: "th",
928    23: "ko",
929    24: "lt",
930    25: "pl",
931    26: "hu",
932    27: "es",
933    28: "lv",
934    29: "se",
935    30: "fo",
936    31: "fa",
937    32: "ru",
938    33: "zh",
939    34: "nl-BE",
940    35: "ga",
941    36: "sq",
942    37: "ro",
943    38: "cz",
944    39: "sk",
945    40: "sl",
946    41: "yi",
947    42: "sr",
948    43: "mk",
949    44: "bg",
950    45: "uk",
951    46: "be",
952    47: "uz",
953    48: "kk",
954    49: "az-Cyrl",
955    50: "az-Arab",
956    51: "hy",
957    52: "ka",
958    53: "mo",
959    54: "ky",
960    55: "tg",
961    56: "tk",
962    57: "mn-CN",
963    58: "mn",
964    59: "ps",
965    60: "ks",
966    61: "ku",
967    62: "sd",
968    63: "bo",
969    64: "ne",
970    65: "sa",
971    66: "mr",
972    67: "bn",
973    68: "as",
974    69: "gu",
975    70: "pa",
976    71: "or",
977    72: "ml",
978    73: "kn",
979    74: "ta",
980    75: "te",
981    76: "si",
982    77: "my",
983    78: "km",
984    79: "lo",
985    80: "vi",
986    81: "id",
987    82: "tl",
988    83: "ms",
989    84: "ms-Arab",
990    85: "am",
991    86: "ti",
992    87: "om",
993    88: "so",
994    89: "sw",
995    90: "rw",
996    91: "rn",
997    92: "ny",
998    93: "mg",
999    94: "eo",
1000    128: "cy",
1001    129: "eu",
1002    130: "ca",
1003    131: "la",
1004    132: "qu",
1005    133: "gn",
1006    134: "ay",
1007    135: "tt",
1008    136: "ug",
1009    137: "dz",
1010    138: "jv",
1011    139: "su",
1012    140: "gl",
1013    141: "af",
1014    142: "br",
1015    143: "iu",
1016    144: "gd",
1017    145: "gv",
1018    146: "ga",
1019    147: "to",
1020    148: "el-polyton",
1021    149: "kl",
1022    150: "az",
1023    151: "nn",
1024}
1025
1026
1027_WINDOWS_LANGUAGE_CODES = {
1028    lang.lower(): code for code, lang in _WINDOWS_LANGUAGES.items()
1029}
1030_MAC_LANGUAGE_CODES = {lang.lower(): code for code, lang in _MAC_LANGUAGES.items()}
1031
1032
1033# MacOS language ID → MacOS script ID
1034#
1035# Note that the script ID is not sufficient to determine what encoding
1036# to use in TrueType files. For some languages, MacOS used a modification
1037# of a mainstream script. For example, an Icelandic name would be stored
1038# with smRoman in the TrueType naming table, but the actual encoding
1039# is a special Icelandic version of the normal Macintosh Roman encoding.
1040# As another example, Inuktitut uses an 8-bit encoding for Canadian Aboriginal
1041# Syllables but MacOS had run out of available script codes, so this was
1042# done as a (pretty radical) “modification” of Ethiopic.
1043#
1044# http://unicode.org/Public/MAPPINGS/VENDORS/APPLE/Readme.txt
1045_MAC_LANGUAGE_TO_SCRIPT = {
1046    0: 0,  # langEnglish → smRoman
1047    1: 0,  # langFrench → smRoman
1048    2: 0,  # langGerman → smRoman
1049    3: 0,  # langItalian → smRoman
1050    4: 0,  # langDutch → smRoman
1051    5: 0,  # langSwedish → smRoman
1052    6: 0,  # langSpanish → smRoman
1053    7: 0,  # langDanish → smRoman
1054    8: 0,  # langPortuguese → smRoman
1055    9: 0,  # langNorwegian → smRoman
1056    10: 5,  # langHebrew → smHebrew
1057    11: 1,  # langJapanese → smJapanese
1058    12: 4,  # langArabic → smArabic
1059    13: 0,  # langFinnish → smRoman
1060    14: 6,  # langGreek → smGreek
1061    15: 0,  # langIcelandic → smRoman (modified)
1062    16: 0,  # langMaltese → smRoman
1063    17: 0,  # langTurkish → smRoman (modified)
1064    18: 0,  # langCroatian → smRoman (modified)
1065    19: 2,  # langTradChinese → smTradChinese
1066    20: 4,  # langUrdu → smArabic
1067    21: 9,  # langHindi → smDevanagari
1068    22: 21,  # langThai → smThai
1069    23: 3,  # langKorean → smKorean
1070    24: 29,  # langLithuanian → smCentralEuroRoman
1071    25: 29,  # langPolish → smCentralEuroRoman
1072    26: 29,  # langHungarian → smCentralEuroRoman
1073    27: 29,  # langEstonian → smCentralEuroRoman
1074    28: 29,  # langLatvian → smCentralEuroRoman
1075    29: 0,  # langSami → smRoman
1076    30: 0,  # langFaroese → smRoman (modified)
1077    31: 4,  # langFarsi → smArabic (modified)
1078    32: 7,  # langRussian → smCyrillic
1079    33: 25,  # langSimpChinese → smSimpChinese
1080    34: 0,  # langFlemish → smRoman
1081    35: 0,  # langIrishGaelic → smRoman (modified)
1082    36: 0,  # langAlbanian → smRoman
1083    37: 0,  # langRomanian → smRoman (modified)
1084    38: 29,  # langCzech → smCentralEuroRoman
1085    39: 29,  # langSlovak → smCentralEuroRoman
1086    40: 0,  # langSlovenian → smRoman (modified)
1087    41: 5,  # langYiddish → smHebrew
1088    42: 7,  # langSerbian → smCyrillic
1089    43: 7,  # langMacedonian → smCyrillic
1090    44: 7,  # langBulgarian → smCyrillic
1091    45: 7,  # langUkrainian → smCyrillic (modified)
1092    46: 7,  # langByelorussian → smCyrillic
1093    47: 7,  # langUzbek → smCyrillic
1094    48: 7,  # langKazakh → smCyrillic
1095    49: 7,  # langAzerbaijani → smCyrillic
1096    50: 4,  # langAzerbaijanAr → smArabic
1097    51: 24,  # langArmenian → smArmenian
1098    52: 23,  # langGeorgian → smGeorgian
1099    53: 7,  # langMoldavian → smCyrillic
1100    54: 7,  # langKirghiz → smCyrillic
1101    55: 7,  # langTajiki → smCyrillic
1102    56: 7,  # langTurkmen → smCyrillic
1103    57: 27,  # langMongolian → smMongolian
1104    58: 7,  # langMongolianCyr → smCyrillic
1105    59: 4,  # langPashto → smArabic
1106    60: 4,  # langKurdish → smArabic
1107    61: 4,  # langKashmiri → smArabic
1108    62: 4,  # langSindhi → smArabic
1109    63: 26,  # langTibetan → smTibetan
1110    64: 9,  # langNepali → smDevanagari
1111    65: 9,  # langSanskrit → smDevanagari
1112    66: 9,  # langMarathi → smDevanagari
1113    67: 13,  # langBengali → smBengali
1114    68: 13,  # langAssamese → smBengali
1115    69: 11,  # langGujarati → smGujarati
1116    70: 10,  # langPunjabi → smGurmukhi
1117    71: 12,  # langOriya → smOriya
1118    72: 17,  # langMalayalam → smMalayalam
1119    73: 16,  # langKannada → smKannada
1120    74: 14,  # langTamil → smTamil
1121    75: 15,  # langTelugu → smTelugu
1122    76: 18,  # langSinhalese → smSinhalese
1123    77: 19,  # langBurmese → smBurmese
1124    78: 20,  # langKhmer → smKhmer
1125    79: 22,  # langLao → smLao
1126    80: 30,  # langVietnamese → smVietnamese
1127    81: 0,  # langIndonesian → smRoman
1128    82: 0,  # langTagalog → smRoman
1129    83: 0,  # langMalayRoman → smRoman
1130    84: 4,  # langMalayArabic → smArabic
1131    85: 28,  # langAmharic → smEthiopic
1132    86: 28,  # langTigrinya → smEthiopic
1133    87: 28,  # langOromo → smEthiopic
1134    88: 0,  # langSomali → smRoman
1135    89: 0,  # langSwahili → smRoman
1136    90: 0,  # langKinyarwanda → smRoman
1137    91: 0,  # langRundi → smRoman
1138    92: 0,  # langNyanja → smRoman
1139    93: 0,  # langMalagasy → smRoman
1140    94: 0,  # langEsperanto → smRoman
1141    128: 0,  # langWelsh → smRoman (modified)
1142    129: 0,  # langBasque → smRoman
1143    130: 0,  # langCatalan → smRoman
1144    131: 0,  # langLatin → smRoman
1145    132: 0,  # langQuechua → smRoman
1146    133: 0,  # langGuarani → smRoman
1147    134: 0,  # langAymara → smRoman
1148    135: 7,  # langTatar → smCyrillic
1149    136: 4,  # langUighur → smArabic
1150    137: 26,  # langDzongkha → smTibetan
1151    138: 0,  # langJavaneseRom → smRoman
1152    139: 0,  # langSundaneseRom → smRoman
1153    140: 0,  # langGalician → smRoman
1154    141: 0,  # langAfrikaans → smRoman
1155    142: 0,  # langBreton → smRoman (modified)
1156    143: 28,  # langInuktitut → smEthiopic (modified)
1157    144: 0,  # langScottishGaelic → smRoman (modified)
1158    145: 0,  # langManxGaelic → smRoman (modified)
1159    146: 0,  # langIrishGaelicScript → smRoman (modified)
1160    147: 0,  # langTongan → smRoman
1161    148: 6,  # langGreekAncient → smRoman
1162    149: 0,  # langGreenlandic → smRoman
1163    150: 0,  # langAzerbaijanRoman → smRoman
1164    151: 0,  # langNynorsk → smRoman
1165}
1166
1167
1168class NameRecordVisitor(TTVisitor):
1169    # Font tables that have NameIDs we need to collect.
1170    TABLES = ("GSUB", "GPOS", "fvar", "CPAL", "STAT")
1171
1172    def __init__(self):
1173        self.seen = set()
1174
1175
1176@NameRecordVisitor.register_attrs(
1177    (
1178        (otTables.FeatureParamsSize, ("SubfamilyID", "SubfamilyNameID")),
1179        (otTables.FeatureParamsStylisticSet, ("UINameID",)),
1180        (
1181            otTables.FeatureParamsCharacterVariants,
1182            (
1183                "FeatUILabelNameID",
1184                "FeatUITooltipTextNameID",
1185                "SampleTextNameID",
1186                "FirstParamUILabelNameID",
1187            ),
1188        ),
1189        (otTables.STAT, ("ElidedFallbackNameID",)),
1190        (otTables.AxisRecord, ("AxisNameID",)),
1191        (otTables.AxisValue, ("ValueNameID",)),
1192        (otTables.FeatureName, ("FeatureNameID",)),
1193        (otTables.Setting, ("SettingNameID",)),
1194    )
1195)
1196def visit(visitor, obj, attr, value):
1197    visitor.seen.add(value)
1198
1199
1200@NameRecordVisitor.register(ttLib.getTableClass("fvar"))
1201def visit(visitor, obj):
1202    for inst in obj.instances:
1203        if inst.postscriptNameID != 0xFFFF:
1204            visitor.seen.add(inst.postscriptNameID)
1205        visitor.seen.add(inst.subfamilyNameID)
1206
1207    for axis in obj.axes:
1208        visitor.seen.add(axis.axisNameID)
1209
1210
1211@NameRecordVisitor.register(ttLib.getTableClass("CPAL"))
1212def visit(visitor, obj):
1213    if obj.version == 1:
1214        visitor.seen.update(obj.paletteLabels)
1215        visitor.seen.update(obj.paletteEntryLabels)
1216
1217
1218@NameRecordVisitor.register(ttLib.TTFont)
1219def visit(visitor, font, *args, **kwargs):
1220    if hasattr(visitor, "font"):
1221        return False
1222
1223    visitor.font = font
1224    for tag in visitor.TABLES:
1225        if tag in font:
1226            visitor.visit(font[tag], *args, **kwargs)
1227    del visitor.font
1228    return False
1229