xref: /aosp_15_r20/external/fonttools/Lib/fontTools/encodings/codecs.py (revision e1fe3e4ad2793916b15cccdc4a7da52a7e1dd0e9)
1"""Extend the Python codecs module with a few encodings that are used in OpenType (name table)
2but missing from Python.  See https://github.com/fonttools/fonttools/issues/236 for details."""
3
4import codecs
5import encodings
6
7
8class ExtendCodec(codecs.Codec):
9    def __init__(self, name, base_encoding, mapping):
10        self.name = name
11        self.base_encoding = base_encoding
12        self.mapping = mapping
13        self.reverse = {v: k for k, v in mapping.items()}
14        self.max_len = max(len(v) for v in mapping.values())
15        self.info = codecs.CodecInfo(
16            name=self.name, encode=self.encode, decode=self.decode
17        )
18        codecs.register_error(name, self.error)
19
20    def _map(self, mapper, output_type, exc_type, input, errors):
21        base_error_handler = codecs.lookup_error(errors)
22        length = len(input)
23        out = output_type()
24        while input:
25            # first try to use self.error as the error handler
26            try:
27                part = mapper(input, self.base_encoding, errors=self.name)
28                out += part
29                break  # All converted
30            except exc_type as e:
31                # else convert the correct part, handle error as requested and continue
32                out += mapper(input[: e.start], self.base_encoding, self.name)
33                replacement, pos = base_error_handler(e)
34                out += replacement
35                input = input[pos:]
36        return out, length
37
38    def encode(self, input, errors="strict"):
39        return self._map(codecs.encode, bytes, UnicodeEncodeError, input, errors)
40
41    def decode(self, input, errors="strict"):
42        return self._map(codecs.decode, str, UnicodeDecodeError, input, errors)
43
44    def error(self, e):
45        if isinstance(e, UnicodeDecodeError):
46            for end in range(e.start + 1, e.end + 1):
47                s = e.object[e.start : end]
48                if s in self.mapping:
49                    return self.mapping[s], end
50        elif isinstance(e, UnicodeEncodeError):
51            for end in range(e.start + 1, e.start + self.max_len + 1):
52                s = e.object[e.start : end]
53                if s in self.reverse:
54                    return self.reverse[s], end
55        e.encoding = self.name
56        raise e
57
58
59_extended_encodings = {
60    "x_mac_japanese_ttx": (
61        "shift_jis",
62        {
63            b"\xFC": chr(0x007C),
64            b"\x7E": chr(0x007E),
65            b"\x80": chr(0x005C),
66            b"\xA0": chr(0x00A0),
67            b"\xFD": chr(0x00A9),
68            b"\xFE": chr(0x2122),
69            b"\xFF": chr(0x2026),
70        },
71    ),
72    "x_mac_trad_chinese_ttx": (
73        "big5",
74        {
75            b"\x80": chr(0x005C),
76            b"\xA0": chr(0x00A0),
77            b"\xFD": chr(0x00A9),
78            b"\xFE": chr(0x2122),
79            b"\xFF": chr(0x2026),
80        },
81    ),
82    "x_mac_korean_ttx": (
83        "euc_kr",
84        {
85            b"\x80": chr(0x00A0),
86            b"\x81": chr(0x20A9),
87            b"\x82": chr(0x2014),
88            b"\x83": chr(0x00A9),
89            b"\xFE": chr(0x2122),
90            b"\xFF": chr(0x2026),
91        },
92    ),
93    "x_mac_simp_chinese_ttx": (
94        "gb2312",
95        {
96            b"\x80": chr(0x00FC),
97            b"\xA0": chr(0x00A0),
98            b"\xFD": chr(0x00A9),
99            b"\xFE": chr(0x2122),
100            b"\xFF": chr(0x2026),
101        },
102    ),
103}
104
105_cache = {}
106
107
108def search_function(name):
109    name = encodings.normalize_encoding(name)  # Rather undocumented...
110    if name in _extended_encodings:
111        if name not in _cache:
112            base_encoding, mapping = _extended_encodings[name]
113            assert name[-4:] == "_ttx"
114            # Python 2 didn't have any of the encodings that we are implementing
115            # in this file.  Python 3 added aliases for the East Asian ones, mapping
116            # them "temporarily" to the same base encoding as us, with a comment
117            # suggesting that full implementation will appear some time later.
118            # As such, try the Python version of the x_mac_... first, if that is found,
119            # use *that* as our base encoding.  This would make our encoding upgrade
120            # to the full encoding when and if Python finally implements that.
121            # http://bugs.python.org/issue24041
122            base_encodings = [name[:-4], base_encoding]
123            for base_encoding in base_encodings:
124                try:
125                    codecs.lookup(base_encoding)
126                except LookupError:
127                    continue
128                _cache[name] = ExtendCodec(name, base_encoding, mapping)
129                break
130        return _cache[name].info
131
132    return None
133
134
135codecs.register(search_function)
136