xref: /aosp_15_r20/prebuilts/build-tools/common/py3-stdlib/encodings/punycode.py (revision cda5da8d549138a6648c5ee6d7a49cf8f4a657be)
1*cda5da8dSAndroid Build Coastguard Worker""" Codec for the Punicode encoding, as specified in RFC 3492
2*cda5da8dSAndroid Build Coastguard Worker
3*cda5da8dSAndroid Build Coastguard WorkerWritten by Martin v. Löwis.
4*cda5da8dSAndroid Build Coastguard Worker"""
5*cda5da8dSAndroid Build Coastguard Worker
6*cda5da8dSAndroid Build Coastguard Workerimport codecs
7*cda5da8dSAndroid Build Coastguard Worker
8*cda5da8dSAndroid Build Coastguard Worker##################### Encoding #####################################
9*cda5da8dSAndroid Build Coastguard Worker
10*cda5da8dSAndroid Build Coastguard Workerdef segregate(str):
11*cda5da8dSAndroid Build Coastguard Worker    """3.1 Basic code point segregation"""
12*cda5da8dSAndroid Build Coastguard Worker    base = bytearray()
13*cda5da8dSAndroid Build Coastguard Worker    extended = set()
14*cda5da8dSAndroid Build Coastguard Worker    for c in str:
15*cda5da8dSAndroid Build Coastguard Worker        if ord(c) < 128:
16*cda5da8dSAndroid Build Coastguard Worker            base.append(ord(c))
17*cda5da8dSAndroid Build Coastguard Worker        else:
18*cda5da8dSAndroid Build Coastguard Worker            extended.add(c)
19*cda5da8dSAndroid Build Coastguard Worker    extended = sorted(extended)
20*cda5da8dSAndroid Build Coastguard Worker    return bytes(base), extended
21*cda5da8dSAndroid Build Coastguard Worker
22*cda5da8dSAndroid Build Coastguard Workerdef selective_len(str, max):
23*cda5da8dSAndroid Build Coastguard Worker    """Return the length of str, considering only characters below max."""
24*cda5da8dSAndroid Build Coastguard Worker    res = 0
25*cda5da8dSAndroid Build Coastguard Worker    for c in str:
26*cda5da8dSAndroid Build Coastguard Worker        if ord(c) < max:
27*cda5da8dSAndroid Build Coastguard Worker            res += 1
28*cda5da8dSAndroid Build Coastguard Worker    return res
29*cda5da8dSAndroid Build Coastguard Worker
30*cda5da8dSAndroid Build Coastguard Workerdef selective_find(str, char, index, pos):
31*cda5da8dSAndroid Build Coastguard Worker    """Return a pair (index, pos), indicating the next occurrence of
32*cda5da8dSAndroid Build Coastguard Worker    char in str. index is the position of the character considering
33*cda5da8dSAndroid Build Coastguard Worker    only ordinals up to and including char, and pos is the position in
34*cda5da8dSAndroid Build Coastguard Worker    the full string. index/pos is the starting position in the full
35*cda5da8dSAndroid Build Coastguard Worker    string."""
36*cda5da8dSAndroid Build Coastguard Worker
37*cda5da8dSAndroid Build Coastguard Worker    l = len(str)
38*cda5da8dSAndroid Build Coastguard Worker    while 1:
39*cda5da8dSAndroid Build Coastguard Worker        pos += 1
40*cda5da8dSAndroid Build Coastguard Worker        if pos == l:
41*cda5da8dSAndroid Build Coastguard Worker            return (-1, -1)
42*cda5da8dSAndroid Build Coastguard Worker        c = str[pos]
43*cda5da8dSAndroid Build Coastguard Worker        if c == char:
44*cda5da8dSAndroid Build Coastguard Worker            return index+1, pos
45*cda5da8dSAndroid Build Coastguard Worker        elif c < char:
46*cda5da8dSAndroid Build Coastguard Worker            index += 1
47*cda5da8dSAndroid Build Coastguard Worker
48*cda5da8dSAndroid Build Coastguard Workerdef insertion_unsort(str, extended):
49*cda5da8dSAndroid Build Coastguard Worker    """3.2 Insertion unsort coding"""
50*cda5da8dSAndroid Build Coastguard Worker    oldchar = 0x80
51*cda5da8dSAndroid Build Coastguard Worker    result = []
52*cda5da8dSAndroid Build Coastguard Worker    oldindex = -1
53*cda5da8dSAndroid Build Coastguard Worker    for c in extended:
54*cda5da8dSAndroid Build Coastguard Worker        index = pos = -1
55*cda5da8dSAndroid Build Coastguard Worker        char = ord(c)
56*cda5da8dSAndroid Build Coastguard Worker        curlen = selective_len(str, char)
57*cda5da8dSAndroid Build Coastguard Worker        delta = (curlen+1) * (char - oldchar)
58*cda5da8dSAndroid Build Coastguard Worker        while 1:
59*cda5da8dSAndroid Build Coastguard Worker            index,pos = selective_find(str,c,index,pos)
60*cda5da8dSAndroid Build Coastguard Worker            if index == -1:
61*cda5da8dSAndroid Build Coastguard Worker                break
62*cda5da8dSAndroid Build Coastguard Worker            delta += index - oldindex
63*cda5da8dSAndroid Build Coastguard Worker            result.append(delta-1)
64*cda5da8dSAndroid Build Coastguard Worker            oldindex = index
65*cda5da8dSAndroid Build Coastguard Worker            delta = 0
66*cda5da8dSAndroid Build Coastguard Worker        oldchar = char
67*cda5da8dSAndroid Build Coastguard Worker
68*cda5da8dSAndroid Build Coastguard Worker    return result
69*cda5da8dSAndroid Build Coastguard Worker
70*cda5da8dSAndroid Build Coastguard Workerdef T(j, bias):
71*cda5da8dSAndroid Build Coastguard Worker    # Punycode parameters: tmin = 1, tmax = 26, base = 36
72*cda5da8dSAndroid Build Coastguard Worker    res = 36 * (j + 1) - bias
73*cda5da8dSAndroid Build Coastguard Worker    if res < 1: return 1
74*cda5da8dSAndroid Build Coastguard Worker    if res > 26: return 26
75*cda5da8dSAndroid Build Coastguard Worker    return res
76*cda5da8dSAndroid Build Coastguard Worker
77*cda5da8dSAndroid Build Coastguard Workerdigits = b"abcdefghijklmnopqrstuvwxyz0123456789"
78*cda5da8dSAndroid Build Coastguard Workerdef generate_generalized_integer(N, bias):
79*cda5da8dSAndroid Build Coastguard Worker    """3.3 Generalized variable-length integers"""
80*cda5da8dSAndroid Build Coastguard Worker    result = bytearray()
81*cda5da8dSAndroid Build Coastguard Worker    j = 0
82*cda5da8dSAndroid Build Coastguard Worker    while 1:
83*cda5da8dSAndroid Build Coastguard Worker        t = T(j, bias)
84*cda5da8dSAndroid Build Coastguard Worker        if N < t:
85*cda5da8dSAndroid Build Coastguard Worker            result.append(digits[N])
86*cda5da8dSAndroid Build Coastguard Worker            return bytes(result)
87*cda5da8dSAndroid Build Coastguard Worker        result.append(digits[t + ((N - t) % (36 - t))])
88*cda5da8dSAndroid Build Coastguard Worker        N = (N - t) // (36 - t)
89*cda5da8dSAndroid Build Coastguard Worker        j += 1
90*cda5da8dSAndroid Build Coastguard Worker
91*cda5da8dSAndroid Build Coastguard Workerdef adapt(delta, first, numchars):
92*cda5da8dSAndroid Build Coastguard Worker    if first:
93*cda5da8dSAndroid Build Coastguard Worker        delta //= 700
94*cda5da8dSAndroid Build Coastguard Worker    else:
95*cda5da8dSAndroid Build Coastguard Worker        delta //= 2
96*cda5da8dSAndroid Build Coastguard Worker    delta += delta // numchars
97*cda5da8dSAndroid Build Coastguard Worker    # ((base - tmin) * tmax) // 2 == 455
98*cda5da8dSAndroid Build Coastguard Worker    divisions = 0
99*cda5da8dSAndroid Build Coastguard Worker    while delta > 455:
100*cda5da8dSAndroid Build Coastguard Worker        delta = delta // 35 # base - tmin
101*cda5da8dSAndroid Build Coastguard Worker        divisions += 36
102*cda5da8dSAndroid Build Coastguard Worker    bias = divisions + (36 * delta // (delta + 38))
103*cda5da8dSAndroid Build Coastguard Worker    return bias
104*cda5da8dSAndroid Build Coastguard Worker
105*cda5da8dSAndroid Build Coastguard Worker
106*cda5da8dSAndroid Build Coastguard Workerdef generate_integers(baselen, deltas):
107*cda5da8dSAndroid Build Coastguard Worker    """3.4 Bias adaptation"""
108*cda5da8dSAndroid Build Coastguard Worker    # Punycode parameters: initial bias = 72, damp = 700, skew = 38
109*cda5da8dSAndroid Build Coastguard Worker    result = bytearray()
110*cda5da8dSAndroid Build Coastguard Worker    bias = 72
111*cda5da8dSAndroid Build Coastguard Worker    for points, delta in enumerate(deltas):
112*cda5da8dSAndroid Build Coastguard Worker        s = generate_generalized_integer(delta, bias)
113*cda5da8dSAndroid Build Coastguard Worker        result.extend(s)
114*cda5da8dSAndroid Build Coastguard Worker        bias = adapt(delta, points==0, baselen+points+1)
115*cda5da8dSAndroid Build Coastguard Worker    return bytes(result)
116*cda5da8dSAndroid Build Coastguard Worker
117*cda5da8dSAndroid Build Coastguard Workerdef punycode_encode(text):
118*cda5da8dSAndroid Build Coastguard Worker    base, extended = segregate(text)
119*cda5da8dSAndroid Build Coastguard Worker    deltas = insertion_unsort(text, extended)
120*cda5da8dSAndroid Build Coastguard Worker    extended = generate_integers(len(base), deltas)
121*cda5da8dSAndroid Build Coastguard Worker    if base:
122*cda5da8dSAndroid Build Coastguard Worker        return base + b"-" + extended
123*cda5da8dSAndroid Build Coastguard Worker    return extended
124*cda5da8dSAndroid Build Coastguard Worker
125*cda5da8dSAndroid Build Coastguard Worker##################### Decoding #####################################
126*cda5da8dSAndroid Build Coastguard Worker
127*cda5da8dSAndroid Build Coastguard Workerdef decode_generalized_number(extended, extpos, bias, errors):
128*cda5da8dSAndroid Build Coastguard Worker    """3.3 Generalized variable-length integers"""
129*cda5da8dSAndroid Build Coastguard Worker    result = 0
130*cda5da8dSAndroid Build Coastguard Worker    w = 1
131*cda5da8dSAndroid Build Coastguard Worker    j = 0
132*cda5da8dSAndroid Build Coastguard Worker    while 1:
133*cda5da8dSAndroid Build Coastguard Worker        try:
134*cda5da8dSAndroid Build Coastguard Worker            char = ord(extended[extpos])
135*cda5da8dSAndroid Build Coastguard Worker        except IndexError:
136*cda5da8dSAndroid Build Coastguard Worker            if errors == "strict":
137*cda5da8dSAndroid Build Coastguard Worker                raise UnicodeError("incomplete punicode string")
138*cda5da8dSAndroid Build Coastguard Worker            return extpos + 1, None
139*cda5da8dSAndroid Build Coastguard Worker        extpos += 1
140*cda5da8dSAndroid Build Coastguard Worker        if 0x41 <= char <= 0x5A: # A-Z
141*cda5da8dSAndroid Build Coastguard Worker            digit = char - 0x41
142*cda5da8dSAndroid Build Coastguard Worker        elif 0x30 <= char <= 0x39:
143*cda5da8dSAndroid Build Coastguard Worker            digit = char - 22 # 0x30-26
144*cda5da8dSAndroid Build Coastguard Worker        elif errors == "strict":
145*cda5da8dSAndroid Build Coastguard Worker            raise UnicodeError("Invalid extended code point '%s'"
146*cda5da8dSAndroid Build Coastguard Worker                               % extended[extpos-1])
147*cda5da8dSAndroid Build Coastguard Worker        else:
148*cda5da8dSAndroid Build Coastguard Worker            return extpos, None
149*cda5da8dSAndroid Build Coastguard Worker        t = T(j, bias)
150*cda5da8dSAndroid Build Coastguard Worker        result += digit * w
151*cda5da8dSAndroid Build Coastguard Worker        if digit < t:
152*cda5da8dSAndroid Build Coastguard Worker            return extpos, result
153*cda5da8dSAndroid Build Coastguard Worker        w = w * (36 - t)
154*cda5da8dSAndroid Build Coastguard Worker        j += 1
155*cda5da8dSAndroid Build Coastguard Worker
156*cda5da8dSAndroid Build Coastguard Worker
157*cda5da8dSAndroid Build Coastguard Workerdef insertion_sort(base, extended, errors):
158*cda5da8dSAndroid Build Coastguard Worker    """3.2 Insertion unsort coding"""
159*cda5da8dSAndroid Build Coastguard Worker    char = 0x80
160*cda5da8dSAndroid Build Coastguard Worker    pos = -1
161*cda5da8dSAndroid Build Coastguard Worker    bias = 72
162*cda5da8dSAndroid Build Coastguard Worker    extpos = 0
163*cda5da8dSAndroid Build Coastguard Worker    while extpos < len(extended):
164*cda5da8dSAndroid Build Coastguard Worker        newpos, delta = decode_generalized_number(extended, extpos,
165*cda5da8dSAndroid Build Coastguard Worker                                                  bias, errors)
166*cda5da8dSAndroid Build Coastguard Worker        if delta is None:
167*cda5da8dSAndroid Build Coastguard Worker            # There was an error in decoding. We can't continue because
168*cda5da8dSAndroid Build Coastguard Worker            # synchronization is lost.
169*cda5da8dSAndroid Build Coastguard Worker            return base
170*cda5da8dSAndroid Build Coastguard Worker        pos += delta+1
171*cda5da8dSAndroid Build Coastguard Worker        char += pos // (len(base) + 1)
172*cda5da8dSAndroid Build Coastguard Worker        if char > 0x10FFFF:
173*cda5da8dSAndroid Build Coastguard Worker            if errors == "strict":
174*cda5da8dSAndroid Build Coastguard Worker                raise UnicodeError("Invalid character U+%x" % char)
175*cda5da8dSAndroid Build Coastguard Worker            char = ord('?')
176*cda5da8dSAndroid Build Coastguard Worker        pos = pos % (len(base) + 1)
177*cda5da8dSAndroid Build Coastguard Worker        base = base[:pos] + chr(char) + base[pos:]
178*cda5da8dSAndroid Build Coastguard Worker        bias = adapt(delta, (extpos == 0), len(base))
179*cda5da8dSAndroid Build Coastguard Worker        extpos = newpos
180*cda5da8dSAndroid Build Coastguard Worker    return base
181*cda5da8dSAndroid Build Coastguard Worker
182*cda5da8dSAndroid Build Coastguard Workerdef punycode_decode(text, errors):
183*cda5da8dSAndroid Build Coastguard Worker    if isinstance(text, str):
184*cda5da8dSAndroid Build Coastguard Worker        text = text.encode("ascii")
185*cda5da8dSAndroid Build Coastguard Worker    if isinstance(text, memoryview):
186*cda5da8dSAndroid Build Coastguard Worker        text = bytes(text)
187*cda5da8dSAndroid Build Coastguard Worker    pos = text.rfind(b"-")
188*cda5da8dSAndroid Build Coastguard Worker    if pos == -1:
189*cda5da8dSAndroid Build Coastguard Worker        base = ""
190*cda5da8dSAndroid Build Coastguard Worker        extended = str(text, "ascii").upper()
191*cda5da8dSAndroid Build Coastguard Worker    else:
192*cda5da8dSAndroid Build Coastguard Worker        base = str(text[:pos], "ascii", errors)
193*cda5da8dSAndroid Build Coastguard Worker        extended = str(text[pos+1:], "ascii").upper()
194*cda5da8dSAndroid Build Coastguard Worker    return insertion_sort(base, extended, errors)
195*cda5da8dSAndroid Build Coastguard Worker
196*cda5da8dSAndroid Build Coastguard Worker### Codec APIs
197*cda5da8dSAndroid Build Coastguard Worker
198*cda5da8dSAndroid Build Coastguard Workerclass Codec(codecs.Codec):
199*cda5da8dSAndroid Build Coastguard Worker
200*cda5da8dSAndroid Build Coastguard Worker    def encode(self, input, errors='strict'):
201*cda5da8dSAndroid Build Coastguard Worker        res = punycode_encode(input)
202*cda5da8dSAndroid Build Coastguard Worker        return res, len(input)
203*cda5da8dSAndroid Build Coastguard Worker
204*cda5da8dSAndroid Build Coastguard Worker    def decode(self, input, errors='strict'):
205*cda5da8dSAndroid Build Coastguard Worker        if errors not in ('strict', 'replace', 'ignore'):
206*cda5da8dSAndroid Build Coastguard Worker            raise UnicodeError("Unsupported error handling "+errors)
207*cda5da8dSAndroid Build Coastguard Worker        res = punycode_decode(input, errors)
208*cda5da8dSAndroid Build Coastguard Worker        return res, len(input)
209*cda5da8dSAndroid Build Coastguard Worker
210*cda5da8dSAndroid Build Coastguard Workerclass IncrementalEncoder(codecs.IncrementalEncoder):
211*cda5da8dSAndroid Build Coastguard Worker    def encode(self, input, final=False):
212*cda5da8dSAndroid Build Coastguard Worker        return punycode_encode(input)
213*cda5da8dSAndroid Build Coastguard Worker
214*cda5da8dSAndroid Build Coastguard Workerclass IncrementalDecoder(codecs.IncrementalDecoder):
215*cda5da8dSAndroid Build Coastguard Worker    def decode(self, input, final=False):
216*cda5da8dSAndroid Build Coastguard Worker        if self.errors not in ('strict', 'replace', 'ignore'):
217*cda5da8dSAndroid Build Coastguard Worker            raise UnicodeError("Unsupported error handling "+self.errors)
218*cda5da8dSAndroid Build Coastguard Worker        return punycode_decode(input, self.errors)
219*cda5da8dSAndroid Build Coastguard Worker
220*cda5da8dSAndroid Build Coastguard Workerclass StreamWriter(Codec,codecs.StreamWriter):
221*cda5da8dSAndroid Build Coastguard Worker    pass
222*cda5da8dSAndroid Build Coastguard Worker
223*cda5da8dSAndroid Build Coastguard Workerclass StreamReader(Codec,codecs.StreamReader):
224*cda5da8dSAndroid Build Coastguard Worker    pass
225*cda5da8dSAndroid Build Coastguard Worker
226*cda5da8dSAndroid Build Coastguard Worker### encodings module API
227*cda5da8dSAndroid Build Coastguard Worker
228*cda5da8dSAndroid Build Coastguard Workerdef getregentry():
229*cda5da8dSAndroid Build Coastguard Worker    return codecs.CodecInfo(
230*cda5da8dSAndroid Build Coastguard Worker        name='punycode',
231*cda5da8dSAndroid Build Coastguard Worker        encode=Codec().encode,
232*cda5da8dSAndroid Build Coastguard Worker        decode=Codec().decode,
233*cda5da8dSAndroid Build Coastguard Worker        incrementalencoder=IncrementalEncoder,
234*cda5da8dSAndroid Build Coastguard Worker        incrementaldecoder=IncrementalDecoder,
235*cda5da8dSAndroid Build Coastguard Worker        streamwriter=StreamWriter,
236*cda5da8dSAndroid Build Coastguard Worker        streamreader=StreamReader,
237*cda5da8dSAndroid Build Coastguard Worker    )
238