1"""Extend the Python codecs module with a few encodings that are used in OpenType (name table) 2but missing from Python. See https://github.com/fonttools/fonttools/issues/236 for details.""" 3 4import codecs 5import encodings 6 7 8class ExtendCodec(codecs.Codec): 9 def __init__(self, name, base_encoding, mapping): 10 self.name = name 11 self.base_encoding = base_encoding 12 self.mapping = mapping 13 self.reverse = {v: k for k, v in mapping.items()} 14 self.max_len = max(len(v) for v in mapping.values()) 15 self.info = codecs.CodecInfo( 16 name=self.name, encode=self.encode, decode=self.decode 17 ) 18 codecs.register_error(name, self.error) 19 20 def _map(self, mapper, output_type, exc_type, input, errors): 21 base_error_handler = codecs.lookup_error(errors) 22 length = len(input) 23 out = output_type() 24 while input: 25 # first try to use self.error as the error handler 26 try: 27 part = mapper(input, self.base_encoding, errors=self.name) 28 out += part 29 break # All converted 30 except exc_type as e: 31 # else convert the correct part, handle error as requested and continue 32 out += mapper(input[: e.start], self.base_encoding, self.name) 33 replacement, pos = base_error_handler(e) 34 out += replacement 35 input = input[pos:] 36 return out, length 37 38 def encode(self, input, errors="strict"): 39 return self._map(codecs.encode, bytes, UnicodeEncodeError, input, errors) 40 41 def decode(self, input, errors="strict"): 42 return self._map(codecs.decode, str, UnicodeDecodeError, input, errors) 43 44 def error(self, e): 45 if isinstance(e, UnicodeDecodeError): 46 for end in range(e.start + 1, e.end + 1): 47 s = e.object[e.start : end] 48 if s in self.mapping: 49 return self.mapping[s], end 50 elif isinstance(e, UnicodeEncodeError): 51 for end in range(e.start + 1, e.start + self.max_len + 1): 52 s = e.object[e.start : end] 53 if s in self.reverse: 54 return self.reverse[s], end 55 e.encoding = self.name 56 raise e 57 58 59_extended_encodings = { 60 "x_mac_japanese_ttx": ( 61 "shift_jis", 62 { 63 b"\xFC": chr(0x007C), 64 b"\x7E": chr(0x007E), 65 b"\x80": chr(0x005C), 66 b"\xA0": chr(0x00A0), 67 b"\xFD": chr(0x00A9), 68 b"\xFE": chr(0x2122), 69 b"\xFF": chr(0x2026), 70 }, 71 ), 72 "x_mac_trad_chinese_ttx": ( 73 "big5", 74 { 75 b"\x80": chr(0x005C), 76 b"\xA0": chr(0x00A0), 77 b"\xFD": chr(0x00A9), 78 b"\xFE": chr(0x2122), 79 b"\xFF": chr(0x2026), 80 }, 81 ), 82 "x_mac_korean_ttx": ( 83 "euc_kr", 84 { 85 b"\x80": chr(0x00A0), 86 b"\x81": chr(0x20A9), 87 b"\x82": chr(0x2014), 88 b"\x83": chr(0x00A9), 89 b"\xFE": chr(0x2122), 90 b"\xFF": chr(0x2026), 91 }, 92 ), 93 "x_mac_simp_chinese_ttx": ( 94 "gb2312", 95 { 96 b"\x80": chr(0x00FC), 97 b"\xA0": chr(0x00A0), 98 b"\xFD": chr(0x00A9), 99 b"\xFE": chr(0x2122), 100 b"\xFF": chr(0x2026), 101 }, 102 ), 103} 104 105_cache = {} 106 107 108def search_function(name): 109 name = encodings.normalize_encoding(name) # Rather undocumented... 110 if name in _extended_encodings: 111 if name not in _cache: 112 base_encoding, mapping = _extended_encodings[name] 113 assert name[-4:] == "_ttx" 114 # Python 2 didn't have any of the encodings that we are implementing 115 # in this file. Python 3 added aliases for the East Asian ones, mapping 116 # them "temporarily" to the same base encoding as us, with a comment 117 # suggesting that full implementation will appear some time later. 118 # As such, try the Python version of the x_mac_... first, if that is found, 119 # use *that* as our base encoding. This would make our encoding upgrade 120 # to the full encoding when and if Python finally implements that. 121 # http://bugs.python.org/issue24041 122 base_encodings = [name[:-4], base_encoding] 123 for base_encoding in base_encodings: 124 try: 125 codecs.lookup(base_encoding) 126 except LookupError: 127 continue 128 _cache[name] = ExtendCodec(name, base_encoding, mapping) 129 break 130 return _cache[name].info 131 132 return None 133 134 135codecs.register(search_function) 136