1*cda5da8dSAndroid Build Coastguard Worker""" Codec for the Punicode encoding, as specified in RFC 3492 2*cda5da8dSAndroid Build Coastguard Worker 3*cda5da8dSAndroid Build Coastguard WorkerWritten by Martin v. Löwis. 4*cda5da8dSAndroid Build Coastguard Worker""" 5*cda5da8dSAndroid Build Coastguard Worker 6*cda5da8dSAndroid Build Coastguard Workerimport codecs 7*cda5da8dSAndroid Build Coastguard Worker 8*cda5da8dSAndroid Build Coastguard Worker##################### Encoding ##################################### 9*cda5da8dSAndroid Build Coastguard Worker 10*cda5da8dSAndroid Build Coastguard Workerdef segregate(str): 11*cda5da8dSAndroid Build Coastguard Worker """3.1 Basic code point segregation""" 12*cda5da8dSAndroid Build Coastguard Worker base = bytearray() 13*cda5da8dSAndroid Build Coastguard Worker extended = set() 14*cda5da8dSAndroid Build Coastguard Worker for c in str: 15*cda5da8dSAndroid Build Coastguard Worker if ord(c) < 128: 16*cda5da8dSAndroid Build Coastguard Worker base.append(ord(c)) 17*cda5da8dSAndroid Build Coastguard Worker else: 18*cda5da8dSAndroid Build Coastguard Worker extended.add(c) 19*cda5da8dSAndroid Build Coastguard Worker extended = sorted(extended) 20*cda5da8dSAndroid Build Coastguard Worker return bytes(base), extended 21*cda5da8dSAndroid Build Coastguard Worker 22*cda5da8dSAndroid Build Coastguard Workerdef selective_len(str, max): 23*cda5da8dSAndroid Build Coastguard Worker """Return the length of str, considering only characters below max.""" 24*cda5da8dSAndroid Build Coastguard Worker res = 0 25*cda5da8dSAndroid Build Coastguard Worker for c in str: 26*cda5da8dSAndroid Build Coastguard Worker if ord(c) < max: 27*cda5da8dSAndroid Build Coastguard Worker res += 1 28*cda5da8dSAndroid Build Coastguard Worker return res 29*cda5da8dSAndroid Build Coastguard Worker 30*cda5da8dSAndroid Build Coastguard Workerdef selective_find(str, char, index, pos): 31*cda5da8dSAndroid Build Coastguard Worker """Return a pair (index, pos), indicating the next occurrence of 32*cda5da8dSAndroid Build Coastguard Worker char in str. index is the position of the character considering 33*cda5da8dSAndroid Build Coastguard Worker only ordinals up to and including char, and pos is the position in 34*cda5da8dSAndroid Build Coastguard Worker the full string. index/pos is the starting position in the full 35*cda5da8dSAndroid Build Coastguard Worker string.""" 36*cda5da8dSAndroid Build Coastguard Worker 37*cda5da8dSAndroid Build Coastguard Worker l = len(str) 38*cda5da8dSAndroid Build Coastguard Worker while 1: 39*cda5da8dSAndroid Build Coastguard Worker pos += 1 40*cda5da8dSAndroid Build Coastguard Worker if pos == l: 41*cda5da8dSAndroid Build Coastguard Worker return (-1, -1) 42*cda5da8dSAndroid Build Coastguard Worker c = str[pos] 43*cda5da8dSAndroid Build Coastguard Worker if c == char: 44*cda5da8dSAndroid Build Coastguard Worker return index+1, pos 45*cda5da8dSAndroid Build Coastguard Worker elif c < char: 46*cda5da8dSAndroid Build Coastguard Worker index += 1 47*cda5da8dSAndroid Build Coastguard Worker 48*cda5da8dSAndroid Build Coastguard Workerdef insertion_unsort(str, extended): 49*cda5da8dSAndroid Build Coastguard Worker """3.2 Insertion unsort coding""" 50*cda5da8dSAndroid Build Coastguard Worker oldchar = 0x80 51*cda5da8dSAndroid Build Coastguard Worker result = [] 52*cda5da8dSAndroid Build Coastguard Worker oldindex = -1 53*cda5da8dSAndroid Build Coastguard Worker for c in extended: 54*cda5da8dSAndroid Build Coastguard Worker index = pos = -1 55*cda5da8dSAndroid Build Coastguard Worker char = ord(c) 56*cda5da8dSAndroid Build Coastguard Worker curlen = selective_len(str, char) 57*cda5da8dSAndroid Build Coastguard Worker delta = (curlen+1) * (char - oldchar) 58*cda5da8dSAndroid Build Coastguard Worker while 1: 59*cda5da8dSAndroid Build Coastguard Worker index,pos = selective_find(str,c,index,pos) 60*cda5da8dSAndroid Build Coastguard Worker if index == -1: 61*cda5da8dSAndroid Build Coastguard Worker break 62*cda5da8dSAndroid Build Coastguard Worker delta += index - oldindex 63*cda5da8dSAndroid Build Coastguard Worker result.append(delta-1) 64*cda5da8dSAndroid Build Coastguard Worker oldindex = index 65*cda5da8dSAndroid Build Coastguard Worker delta = 0 66*cda5da8dSAndroid Build Coastguard Worker oldchar = char 67*cda5da8dSAndroid Build Coastguard Worker 68*cda5da8dSAndroid Build Coastguard Worker return result 69*cda5da8dSAndroid Build Coastguard Worker 70*cda5da8dSAndroid Build Coastguard Workerdef T(j, bias): 71*cda5da8dSAndroid Build Coastguard Worker # Punycode parameters: tmin = 1, tmax = 26, base = 36 72*cda5da8dSAndroid Build Coastguard Worker res = 36 * (j + 1) - bias 73*cda5da8dSAndroid Build Coastguard Worker if res < 1: return 1 74*cda5da8dSAndroid Build Coastguard Worker if res > 26: return 26 75*cda5da8dSAndroid Build Coastguard Worker return res 76*cda5da8dSAndroid Build Coastguard Worker 77*cda5da8dSAndroid Build Coastguard Workerdigits = b"abcdefghijklmnopqrstuvwxyz0123456789" 78*cda5da8dSAndroid Build Coastguard Workerdef generate_generalized_integer(N, bias): 79*cda5da8dSAndroid Build Coastguard Worker """3.3 Generalized variable-length integers""" 80*cda5da8dSAndroid Build Coastguard Worker result = bytearray() 81*cda5da8dSAndroid Build Coastguard Worker j = 0 82*cda5da8dSAndroid Build Coastguard Worker while 1: 83*cda5da8dSAndroid Build Coastguard Worker t = T(j, bias) 84*cda5da8dSAndroid Build Coastguard Worker if N < t: 85*cda5da8dSAndroid Build Coastguard Worker result.append(digits[N]) 86*cda5da8dSAndroid Build Coastguard Worker return bytes(result) 87*cda5da8dSAndroid Build Coastguard Worker result.append(digits[t + ((N - t) % (36 - t))]) 88*cda5da8dSAndroid Build Coastguard Worker N = (N - t) // (36 - t) 89*cda5da8dSAndroid Build Coastguard Worker j += 1 90*cda5da8dSAndroid Build Coastguard Worker 91*cda5da8dSAndroid Build Coastguard Workerdef adapt(delta, first, numchars): 92*cda5da8dSAndroid Build Coastguard Worker if first: 93*cda5da8dSAndroid Build Coastguard Worker delta //= 700 94*cda5da8dSAndroid Build Coastguard Worker else: 95*cda5da8dSAndroid Build Coastguard Worker delta //= 2 96*cda5da8dSAndroid Build Coastguard Worker delta += delta // numchars 97*cda5da8dSAndroid Build Coastguard Worker # ((base - tmin) * tmax) // 2 == 455 98*cda5da8dSAndroid Build Coastguard Worker divisions = 0 99*cda5da8dSAndroid Build Coastguard Worker while delta > 455: 100*cda5da8dSAndroid Build Coastguard Worker delta = delta // 35 # base - tmin 101*cda5da8dSAndroid Build Coastguard Worker divisions += 36 102*cda5da8dSAndroid Build Coastguard Worker bias = divisions + (36 * delta // (delta + 38)) 103*cda5da8dSAndroid Build Coastguard Worker return bias 104*cda5da8dSAndroid Build Coastguard Worker 105*cda5da8dSAndroid Build Coastguard Worker 106*cda5da8dSAndroid Build Coastguard Workerdef generate_integers(baselen, deltas): 107*cda5da8dSAndroid Build Coastguard Worker """3.4 Bias adaptation""" 108*cda5da8dSAndroid Build Coastguard Worker # Punycode parameters: initial bias = 72, damp = 700, skew = 38 109*cda5da8dSAndroid Build Coastguard Worker result = bytearray() 110*cda5da8dSAndroid Build Coastguard Worker bias = 72 111*cda5da8dSAndroid Build Coastguard Worker for points, delta in enumerate(deltas): 112*cda5da8dSAndroid Build Coastguard Worker s = generate_generalized_integer(delta, bias) 113*cda5da8dSAndroid Build Coastguard Worker result.extend(s) 114*cda5da8dSAndroid Build Coastguard Worker bias = adapt(delta, points==0, baselen+points+1) 115*cda5da8dSAndroid Build Coastguard Worker return bytes(result) 116*cda5da8dSAndroid Build Coastguard Worker 117*cda5da8dSAndroid Build Coastguard Workerdef punycode_encode(text): 118*cda5da8dSAndroid Build Coastguard Worker base, extended = segregate(text) 119*cda5da8dSAndroid Build Coastguard Worker deltas = insertion_unsort(text, extended) 120*cda5da8dSAndroid Build Coastguard Worker extended = generate_integers(len(base), deltas) 121*cda5da8dSAndroid Build Coastguard Worker if base: 122*cda5da8dSAndroid Build Coastguard Worker return base + b"-" + extended 123*cda5da8dSAndroid Build Coastguard Worker return extended 124*cda5da8dSAndroid Build Coastguard Worker 125*cda5da8dSAndroid Build Coastguard Worker##################### Decoding ##################################### 126*cda5da8dSAndroid Build Coastguard Worker 127*cda5da8dSAndroid Build Coastguard Workerdef decode_generalized_number(extended, extpos, bias, errors): 128*cda5da8dSAndroid Build Coastguard Worker """3.3 Generalized variable-length integers""" 129*cda5da8dSAndroid Build Coastguard Worker result = 0 130*cda5da8dSAndroid Build Coastguard Worker w = 1 131*cda5da8dSAndroid Build Coastguard Worker j = 0 132*cda5da8dSAndroid Build Coastguard Worker while 1: 133*cda5da8dSAndroid Build Coastguard Worker try: 134*cda5da8dSAndroid Build Coastguard Worker char = ord(extended[extpos]) 135*cda5da8dSAndroid Build Coastguard Worker except IndexError: 136*cda5da8dSAndroid Build Coastguard Worker if errors == "strict": 137*cda5da8dSAndroid Build Coastguard Worker raise UnicodeError("incomplete punicode string") 138*cda5da8dSAndroid Build Coastguard Worker return extpos + 1, None 139*cda5da8dSAndroid Build Coastguard Worker extpos += 1 140*cda5da8dSAndroid Build Coastguard Worker if 0x41 <= char <= 0x5A: # A-Z 141*cda5da8dSAndroid Build Coastguard Worker digit = char - 0x41 142*cda5da8dSAndroid Build Coastguard Worker elif 0x30 <= char <= 0x39: 143*cda5da8dSAndroid Build Coastguard Worker digit = char - 22 # 0x30-26 144*cda5da8dSAndroid Build Coastguard Worker elif errors == "strict": 145*cda5da8dSAndroid Build Coastguard Worker raise UnicodeError("Invalid extended code point '%s'" 146*cda5da8dSAndroid Build Coastguard Worker % extended[extpos-1]) 147*cda5da8dSAndroid Build Coastguard Worker else: 148*cda5da8dSAndroid Build Coastguard Worker return extpos, None 149*cda5da8dSAndroid Build Coastguard Worker t = T(j, bias) 150*cda5da8dSAndroid Build Coastguard Worker result += digit * w 151*cda5da8dSAndroid Build Coastguard Worker if digit < t: 152*cda5da8dSAndroid Build Coastguard Worker return extpos, result 153*cda5da8dSAndroid Build Coastguard Worker w = w * (36 - t) 154*cda5da8dSAndroid Build Coastguard Worker j += 1 155*cda5da8dSAndroid Build Coastguard Worker 156*cda5da8dSAndroid Build Coastguard Worker 157*cda5da8dSAndroid Build Coastguard Workerdef insertion_sort(base, extended, errors): 158*cda5da8dSAndroid Build Coastguard Worker """3.2 Insertion unsort coding""" 159*cda5da8dSAndroid Build Coastguard Worker char = 0x80 160*cda5da8dSAndroid Build Coastguard Worker pos = -1 161*cda5da8dSAndroid Build Coastguard Worker bias = 72 162*cda5da8dSAndroid Build Coastguard Worker extpos = 0 163*cda5da8dSAndroid Build Coastguard Worker while extpos < len(extended): 164*cda5da8dSAndroid Build Coastguard Worker newpos, delta = decode_generalized_number(extended, extpos, 165*cda5da8dSAndroid Build Coastguard Worker bias, errors) 166*cda5da8dSAndroid Build Coastguard Worker if delta is None: 167*cda5da8dSAndroid Build Coastguard Worker # There was an error in decoding. We can't continue because 168*cda5da8dSAndroid Build Coastguard Worker # synchronization is lost. 169*cda5da8dSAndroid Build Coastguard Worker return base 170*cda5da8dSAndroid Build Coastguard Worker pos += delta+1 171*cda5da8dSAndroid Build Coastguard Worker char += pos // (len(base) + 1) 172*cda5da8dSAndroid Build Coastguard Worker if char > 0x10FFFF: 173*cda5da8dSAndroid Build Coastguard Worker if errors == "strict": 174*cda5da8dSAndroid Build Coastguard Worker raise UnicodeError("Invalid character U+%x" % char) 175*cda5da8dSAndroid Build Coastguard Worker char = ord('?') 176*cda5da8dSAndroid Build Coastguard Worker pos = pos % (len(base) + 1) 177*cda5da8dSAndroid Build Coastguard Worker base = base[:pos] + chr(char) + base[pos:] 178*cda5da8dSAndroid Build Coastguard Worker bias = adapt(delta, (extpos == 0), len(base)) 179*cda5da8dSAndroid Build Coastguard Worker extpos = newpos 180*cda5da8dSAndroid Build Coastguard Worker return base 181*cda5da8dSAndroid Build Coastguard Worker 182*cda5da8dSAndroid Build Coastguard Workerdef punycode_decode(text, errors): 183*cda5da8dSAndroid Build Coastguard Worker if isinstance(text, str): 184*cda5da8dSAndroid Build Coastguard Worker text = text.encode("ascii") 185*cda5da8dSAndroid Build Coastguard Worker if isinstance(text, memoryview): 186*cda5da8dSAndroid Build Coastguard Worker text = bytes(text) 187*cda5da8dSAndroid Build Coastguard Worker pos = text.rfind(b"-") 188*cda5da8dSAndroid Build Coastguard Worker if pos == -1: 189*cda5da8dSAndroid Build Coastguard Worker base = "" 190*cda5da8dSAndroid Build Coastguard Worker extended = str(text, "ascii").upper() 191*cda5da8dSAndroid Build Coastguard Worker else: 192*cda5da8dSAndroid Build Coastguard Worker base = str(text[:pos], "ascii", errors) 193*cda5da8dSAndroid Build Coastguard Worker extended = str(text[pos+1:], "ascii").upper() 194*cda5da8dSAndroid Build Coastguard Worker return insertion_sort(base, extended, errors) 195*cda5da8dSAndroid Build Coastguard Worker 196*cda5da8dSAndroid Build Coastguard Worker### Codec APIs 197*cda5da8dSAndroid Build Coastguard Worker 198*cda5da8dSAndroid Build Coastguard Workerclass Codec(codecs.Codec): 199*cda5da8dSAndroid Build Coastguard Worker 200*cda5da8dSAndroid Build Coastguard Worker def encode(self, input, errors='strict'): 201*cda5da8dSAndroid Build Coastguard Worker res = punycode_encode(input) 202*cda5da8dSAndroid Build Coastguard Worker return res, len(input) 203*cda5da8dSAndroid Build Coastguard Worker 204*cda5da8dSAndroid Build Coastguard Worker def decode(self, input, errors='strict'): 205*cda5da8dSAndroid Build Coastguard Worker if errors not in ('strict', 'replace', 'ignore'): 206*cda5da8dSAndroid Build Coastguard Worker raise UnicodeError("Unsupported error handling "+errors) 207*cda5da8dSAndroid Build Coastguard Worker res = punycode_decode(input, errors) 208*cda5da8dSAndroid Build Coastguard Worker return res, len(input) 209*cda5da8dSAndroid Build Coastguard Worker 210*cda5da8dSAndroid Build Coastguard Workerclass IncrementalEncoder(codecs.IncrementalEncoder): 211*cda5da8dSAndroid Build Coastguard Worker def encode(self, input, final=False): 212*cda5da8dSAndroid Build Coastguard Worker return punycode_encode(input) 213*cda5da8dSAndroid Build Coastguard Worker 214*cda5da8dSAndroid Build Coastguard Workerclass IncrementalDecoder(codecs.IncrementalDecoder): 215*cda5da8dSAndroid Build Coastguard Worker def decode(self, input, final=False): 216*cda5da8dSAndroid Build Coastguard Worker if self.errors not in ('strict', 'replace', 'ignore'): 217*cda5da8dSAndroid Build Coastguard Worker raise UnicodeError("Unsupported error handling "+self.errors) 218*cda5da8dSAndroid Build Coastguard Worker return punycode_decode(input, self.errors) 219*cda5da8dSAndroid Build Coastguard Worker 220*cda5da8dSAndroid Build Coastguard Workerclass StreamWriter(Codec,codecs.StreamWriter): 221*cda5da8dSAndroid Build Coastguard Worker pass 222*cda5da8dSAndroid Build Coastguard Worker 223*cda5da8dSAndroid Build Coastguard Workerclass StreamReader(Codec,codecs.StreamReader): 224*cda5da8dSAndroid Build Coastguard Worker pass 225*cda5da8dSAndroid Build Coastguard Worker 226*cda5da8dSAndroid Build Coastguard Worker### encodings module API 227*cda5da8dSAndroid Build Coastguard Worker 228*cda5da8dSAndroid Build Coastguard Workerdef getregentry(): 229*cda5da8dSAndroid Build Coastguard Worker return codecs.CodecInfo( 230*cda5da8dSAndroid Build Coastguard Worker name='punycode', 231*cda5da8dSAndroid Build Coastguard Worker encode=Codec().encode, 232*cda5da8dSAndroid Build Coastguard Worker decode=Codec().decode, 233*cda5da8dSAndroid Build Coastguard Worker incrementalencoder=IncrementalEncoder, 234*cda5da8dSAndroid Build Coastguard Worker incrementaldecoder=IncrementalDecoder, 235*cda5da8dSAndroid Build Coastguard Worker streamwriter=StreamWriter, 236*cda5da8dSAndroid Build Coastguard Worker streamreader=StreamReader, 237*cda5da8dSAndroid Build Coastguard Worker ) 238