xref: /aosp_15_r20/external/fonttools/Lib/fontTools/encodings/codecs.py (revision e1fe3e4ad2793916b15cccdc4a7da52a7e1dd0e9)
1*e1fe3e4aSElliott Hughes"""Extend the Python codecs module with a few encodings that are used in OpenType (name table)
2*e1fe3e4aSElliott Hughesbut missing from Python.  See https://github.com/fonttools/fonttools/issues/236 for details."""
3*e1fe3e4aSElliott Hughes
4*e1fe3e4aSElliott Hughesimport codecs
5*e1fe3e4aSElliott Hughesimport encodings
6*e1fe3e4aSElliott Hughes
7*e1fe3e4aSElliott Hughes
8*e1fe3e4aSElliott Hughesclass ExtendCodec(codecs.Codec):
9*e1fe3e4aSElliott Hughes    def __init__(self, name, base_encoding, mapping):
10*e1fe3e4aSElliott Hughes        self.name = name
11*e1fe3e4aSElliott Hughes        self.base_encoding = base_encoding
12*e1fe3e4aSElliott Hughes        self.mapping = mapping
13*e1fe3e4aSElliott Hughes        self.reverse = {v: k for k, v in mapping.items()}
14*e1fe3e4aSElliott Hughes        self.max_len = max(len(v) for v in mapping.values())
15*e1fe3e4aSElliott Hughes        self.info = codecs.CodecInfo(
16*e1fe3e4aSElliott Hughes            name=self.name, encode=self.encode, decode=self.decode
17*e1fe3e4aSElliott Hughes        )
18*e1fe3e4aSElliott Hughes        codecs.register_error(name, self.error)
19*e1fe3e4aSElliott Hughes
20*e1fe3e4aSElliott Hughes    def _map(self, mapper, output_type, exc_type, input, errors):
21*e1fe3e4aSElliott Hughes        base_error_handler = codecs.lookup_error(errors)
22*e1fe3e4aSElliott Hughes        length = len(input)
23*e1fe3e4aSElliott Hughes        out = output_type()
24*e1fe3e4aSElliott Hughes        while input:
25*e1fe3e4aSElliott Hughes            # first try to use self.error as the error handler
26*e1fe3e4aSElliott Hughes            try:
27*e1fe3e4aSElliott Hughes                part = mapper(input, self.base_encoding, errors=self.name)
28*e1fe3e4aSElliott Hughes                out += part
29*e1fe3e4aSElliott Hughes                break  # All converted
30*e1fe3e4aSElliott Hughes            except exc_type as e:
31*e1fe3e4aSElliott Hughes                # else convert the correct part, handle error as requested and continue
32*e1fe3e4aSElliott Hughes                out += mapper(input[: e.start], self.base_encoding, self.name)
33*e1fe3e4aSElliott Hughes                replacement, pos = base_error_handler(e)
34*e1fe3e4aSElliott Hughes                out += replacement
35*e1fe3e4aSElliott Hughes                input = input[pos:]
36*e1fe3e4aSElliott Hughes        return out, length
37*e1fe3e4aSElliott Hughes
38*e1fe3e4aSElliott Hughes    def encode(self, input, errors="strict"):
39*e1fe3e4aSElliott Hughes        return self._map(codecs.encode, bytes, UnicodeEncodeError, input, errors)
40*e1fe3e4aSElliott Hughes
41*e1fe3e4aSElliott Hughes    def decode(self, input, errors="strict"):
42*e1fe3e4aSElliott Hughes        return self._map(codecs.decode, str, UnicodeDecodeError, input, errors)
43*e1fe3e4aSElliott Hughes
44*e1fe3e4aSElliott Hughes    def error(self, e):
45*e1fe3e4aSElliott Hughes        if isinstance(e, UnicodeDecodeError):
46*e1fe3e4aSElliott Hughes            for end in range(e.start + 1, e.end + 1):
47*e1fe3e4aSElliott Hughes                s = e.object[e.start : end]
48*e1fe3e4aSElliott Hughes                if s in self.mapping:
49*e1fe3e4aSElliott Hughes                    return self.mapping[s], end
50*e1fe3e4aSElliott Hughes        elif isinstance(e, UnicodeEncodeError):
51*e1fe3e4aSElliott Hughes            for end in range(e.start + 1, e.start + self.max_len + 1):
52*e1fe3e4aSElliott Hughes                s = e.object[e.start : end]
53*e1fe3e4aSElliott Hughes                if s in self.reverse:
54*e1fe3e4aSElliott Hughes                    return self.reverse[s], end
55*e1fe3e4aSElliott Hughes        e.encoding = self.name
56*e1fe3e4aSElliott Hughes        raise e
57*e1fe3e4aSElliott Hughes
58*e1fe3e4aSElliott Hughes
59*e1fe3e4aSElliott Hughes_extended_encodings = {
60*e1fe3e4aSElliott Hughes    "x_mac_japanese_ttx": (
61*e1fe3e4aSElliott Hughes        "shift_jis",
62*e1fe3e4aSElliott Hughes        {
63*e1fe3e4aSElliott Hughes            b"\xFC": chr(0x007C),
64*e1fe3e4aSElliott Hughes            b"\x7E": chr(0x007E),
65*e1fe3e4aSElliott Hughes            b"\x80": chr(0x005C),
66*e1fe3e4aSElliott Hughes            b"\xA0": chr(0x00A0),
67*e1fe3e4aSElliott Hughes            b"\xFD": chr(0x00A9),
68*e1fe3e4aSElliott Hughes            b"\xFE": chr(0x2122),
69*e1fe3e4aSElliott Hughes            b"\xFF": chr(0x2026),
70*e1fe3e4aSElliott Hughes        },
71*e1fe3e4aSElliott Hughes    ),
72*e1fe3e4aSElliott Hughes    "x_mac_trad_chinese_ttx": (
73*e1fe3e4aSElliott Hughes        "big5",
74*e1fe3e4aSElliott Hughes        {
75*e1fe3e4aSElliott Hughes            b"\x80": chr(0x005C),
76*e1fe3e4aSElliott Hughes            b"\xA0": chr(0x00A0),
77*e1fe3e4aSElliott Hughes            b"\xFD": chr(0x00A9),
78*e1fe3e4aSElliott Hughes            b"\xFE": chr(0x2122),
79*e1fe3e4aSElliott Hughes            b"\xFF": chr(0x2026),
80*e1fe3e4aSElliott Hughes        },
81*e1fe3e4aSElliott Hughes    ),
82*e1fe3e4aSElliott Hughes    "x_mac_korean_ttx": (
83*e1fe3e4aSElliott Hughes        "euc_kr",
84*e1fe3e4aSElliott Hughes        {
85*e1fe3e4aSElliott Hughes            b"\x80": chr(0x00A0),
86*e1fe3e4aSElliott Hughes            b"\x81": chr(0x20A9),
87*e1fe3e4aSElliott Hughes            b"\x82": chr(0x2014),
88*e1fe3e4aSElliott Hughes            b"\x83": chr(0x00A9),
89*e1fe3e4aSElliott Hughes            b"\xFE": chr(0x2122),
90*e1fe3e4aSElliott Hughes            b"\xFF": chr(0x2026),
91*e1fe3e4aSElliott Hughes        },
92*e1fe3e4aSElliott Hughes    ),
93*e1fe3e4aSElliott Hughes    "x_mac_simp_chinese_ttx": (
94*e1fe3e4aSElliott Hughes        "gb2312",
95*e1fe3e4aSElliott Hughes        {
96*e1fe3e4aSElliott Hughes            b"\x80": chr(0x00FC),
97*e1fe3e4aSElliott Hughes            b"\xA0": chr(0x00A0),
98*e1fe3e4aSElliott Hughes            b"\xFD": chr(0x00A9),
99*e1fe3e4aSElliott Hughes            b"\xFE": chr(0x2122),
100*e1fe3e4aSElliott Hughes            b"\xFF": chr(0x2026),
101*e1fe3e4aSElliott Hughes        },
102*e1fe3e4aSElliott Hughes    ),
103*e1fe3e4aSElliott Hughes}
104*e1fe3e4aSElliott Hughes
105*e1fe3e4aSElliott Hughes_cache = {}
106*e1fe3e4aSElliott Hughes
107*e1fe3e4aSElliott Hughes
108*e1fe3e4aSElliott Hughesdef search_function(name):
109*e1fe3e4aSElliott Hughes    name = encodings.normalize_encoding(name)  # Rather undocumented...
110*e1fe3e4aSElliott Hughes    if name in _extended_encodings:
111*e1fe3e4aSElliott Hughes        if name not in _cache:
112*e1fe3e4aSElliott Hughes            base_encoding, mapping = _extended_encodings[name]
113*e1fe3e4aSElliott Hughes            assert name[-4:] == "_ttx"
114*e1fe3e4aSElliott Hughes            # Python 2 didn't have any of the encodings that we are implementing
115*e1fe3e4aSElliott Hughes            # in this file.  Python 3 added aliases for the East Asian ones, mapping
116*e1fe3e4aSElliott Hughes            # them "temporarily" to the same base encoding as us, with a comment
117*e1fe3e4aSElliott Hughes            # suggesting that full implementation will appear some time later.
118*e1fe3e4aSElliott Hughes            # As such, try the Python version of the x_mac_... first, if that is found,
119*e1fe3e4aSElliott Hughes            # use *that* as our base encoding.  This would make our encoding upgrade
120*e1fe3e4aSElliott Hughes            # to the full encoding when and if Python finally implements that.
121*e1fe3e4aSElliott Hughes            # http://bugs.python.org/issue24041
122*e1fe3e4aSElliott Hughes            base_encodings = [name[:-4], base_encoding]
123*e1fe3e4aSElliott Hughes            for base_encoding in base_encodings:
124*e1fe3e4aSElliott Hughes                try:
125*e1fe3e4aSElliott Hughes                    codecs.lookup(base_encoding)
126*e1fe3e4aSElliott Hughes                except LookupError:
127*e1fe3e4aSElliott Hughes                    continue
128*e1fe3e4aSElliott Hughes                _cache[name] = ExtendCodec(name, base_encoding, mapping)
129*e1fe3e4aSElliott Hughes                break
130*e1fe3e4aSElliott Hughes        return _cache[name].info
131*e1fe3e4aSElliott Hughes
132*e1fe3e4aSElliott Hughes    return None
133*e1fe3e4aSElliott Hughes
134*e1fe3e4aSElliott Hughes
135*e1fe3e4aSElliott Hughescodecs.register(search_function)
136