1*5c90c05cSAndroid Build Coastguard Worker#!/usr/bin/env python3 2*5c90c05cSAndroid Build Coastguard Worker 3*5c90c05cSAndroid Build Coastguard Worker# This script is based on 4*5c90c05cSAndroid Build Coastguard Worker# https://github.com/rust-lang/rust/blob/master/library/core/src/unicode/printable.py 5*5c90c05cSAndroid Build Coastguard Worker# distributed under https://github.com/rust-lang/rust/blob/master/LICENSE-MIT. 6*5c90c05cSAndroid Build Coastguard Worker 7*5c90c05cSAndroid Build Coastguard Worker# This script uses the following Unicode tables: 8*5c90c05cSAndroid Build Coastguard Worker# - UnicodeData.txt 9*5c90c05cSAndroid Build Coastguard Worker 10*5c90c05cSAndroid Build Coastguard Worker 11*5c90c05cSAndroid Build Coastguard Workerfrom collections import namedtuple 12*5c90c05cSAndroid Build Coastguard Workerimport csv 13*5c90c05cSAndroid Build Coastguard Workerimport os 14*5c90c05cSAndroid Build Coastguard Workerimport subprocess 15*5c90c05cSAndroid Build Coastguard Worker 16*5c90c05cSAndroid Build Coastguard WorkerNUM_CODEPOINTS=0x110000 17*5c90c05cSAndroid Build Coastguard Worker 18*5c90c05cSAndroid Build Coastguard Workerdef to_ranges(iter): 19*5c90c05cSAndroid Build Coastguard Worker current = None 20*5c90c05cSAndroid Build Coastguard Worker for i in iter: 21*5c90c05cSAndroid Build Coastguard Worker if current is None or i != current[1] or i in (0x10000, 0x20000): 22*5c90c05cSAndroid Build Coastguard Worker if current is not None: 23*5c90c05cSAndroid Build Coastguard Worker yield tuple(current) 24*5c90c05cSAndroid Build Coastguard Worker current = [i, i + 1] 25*5c90c05cSAndroid Build Coastguard Worker else: 26*5c90c05cSAndroid Build Coastguard Worker current[1] += 1 27*5c90c05cSAndroid Build Coastguard Worker if current is not None: 28*5c90c05cSAndroid Build Coastguard Worker yield tuple(current) 29*5c90c05cSAndroid Build Coastguard Worker 30*5c90c05cSAndroid Build Coastguard Workerdef get_escaped(codepoints): 31*5c90c05cSAndroid Build Coastguard Worker for c in codepoints: 32*5c90c05cSAndroid Build Coastguard Worker if (c.class_ or "Cn") in "Cc Cf Cs Co Cn Zl Zp Zs".split() and c.value != ord(' '): 33*5c90c05cSAndroid Build Coastguard Worker yield c.value 34*5c90c05cSAndroid Build Coastguard Worker 35*5c90c05cSAndroid Build Coastguard Workerdef get_file(f): 36*5c90c05cSAndroid Build Coastguard Worker try: 37*5c90c05cSAndroid Build Coastguard Worker return open(os.path.basename(f)) 38*5c90c05cSAndroid Build Coastguard Worker except FileNotFoundError: 39*5c90c05cSAndroid Build Coastguard Worker subprocess.run(["curl", "-O", f], check=True) 40*5c90c05cSAndroid Build Coastguard Worker return open(os.path.basename(f)) 41*5c90c05cSAndroid Build Coastguard Worker 42*5c90c05cSAndroid Build Coastguard WorkerCodepoint = namedtuple('Codepoint', 'value class_') 43*5c90c05cSAndroid Build Coastguard Worker 44*5c90c05cSAndroid Build Coastguard Workerdef get_codepoints(f): 45*5c90c05cSAndroid Build Coastguard Worker r = csv.reader(f, delimiter=";") 46*5c90c05cSAndroid Build Coastguard Worker prev_codepoint = 0 47*5c90c05cSAndroid Build Coastguard Worker class_first = None 48*5c90c05cSAndroid Build Coastguard Worker for row in r: 49*5c90c05cSAndroid Build Coastguard Worker codepoint = int(row[0], 16) 50*5c90c05cSAndroid Build Coastguard Worker name = row[1] 51*5c90c05cSAndroid Build Coastguard Worker class_ = row[2] 52*5c90c05cSAndroid Build Coastguard Worker 53*5c90c05cSAndroid Build Coastguard Worker if class_first is not None: 54*5c90c05cSAndroid Build Coastguard Worker if not name.endswith("Last>"): 55*5c90c05cSAndroid Build Coastguard Worker raise ValueError("Missing Last after First") 56*5c90c05cSAndroid Build Coastguard Worker 57*5c90c05cSAndroid Build Coastguard Worker for c in range(prev_codepoint + 1, codepoint): 58*5c90c05cSAndroid Build Coastguard Worker yield Codepoint(c, class_first) 59*5c90c05cSAndroid Build Coastguard Worker 60*5c90c05cSAndroid Build Coastguard Worker class_first = None 61*5c90c05cSAndroid Build Coastguard Worker if name.endswith("First>"): 62*5c90c05cSAndroid Build Coastguard Worker class_first = class_ 63*5c90c05cSAndroid Build Coastguard Worker 64*5c90c05cSAndroid Build Coastguard Worker yield Codepoint(codepoint, class_) 65*5c90c05cSAndroid Build Coastguard Worker prev_codepoint = codepoint 66*5c90c05cSAndroid Build Coastguard Worker 67*5c90c05cSAndroid Build Coastguard Worker if class_first is not None: 68*5c90c05cSAndroid Build Coastguard Worker raise ValueError("Missing Last after First") 69*5c90c05cSAndroid Build Coastguard Worker 70*5c90c05cSAndroid Build Coastguard Worker for c in range(prev_codepoint + 1, NUM_CODEPOINTS): 71*5c90c05cSAndroid Build Coastguard Worker yield Codepoint(c, None) 72*5c90c05cSAndroid Build Coastguard Worker 73*5c90c05cSAndroid Build Coastguard Workerdef compress_singletons(singletons): 74*5c90c05cSAndroid Build Coastguard Worker uppers = [] # (upper, # items in lowers) 75*5c90c05cSAndroid Build Coastguard Worker lowers = [] 76*5c90c05cSAndroid Build Coastguard Worker 77*5c90c05cSAndroid Build Coastguard Worker for i in singletons: 78*5c90c05cSAndroid Build Coastguard Worker upper = i >> 8 79*5c90c05cSAndroid Build Coastguard Worker lower = i & 0xff 80*5c90c05cSAndroid Build Coastguard Worker if len(uppers) == 0 or uppers[-1][0] != upper: 81*5c90c05cSAndroid Build Coastguard Worker uppers.append((upper, 1)) 82*5c90c05cSAndroid Build Coastguard Worker else: 83*5c90c05cSAndroid Build Coastguard Worker upper, count = uppers[-1] 84*5c90c05cSAndroid Build Coastguard Worker uppers[-1] = upper, count + 1 85*5c90c05cSAndroid Build Coastguard Worker lowers.append(lower) 86*5c90c05cSAndroid Build Coastguard Worker 87*5c90c05cSAndroid Build Coastguard Worker return uppers, lowers 88*5c90c05cSAndroid Build Coastguard Worker 89*5c90c05cSAndroid Build Coastguard Workerdef compress_normal(normal): 90*5c90c05cSAndroid Build Coastguard Worker # lengths 0x00..0x7f are encoded as 00, 01, ..., 7e, 7f 91*5c90c05cSAndroid Build Coastguard Worker # lengths 0x80..0x7fff are encoded as 80 80, 80 81, ..., ff fe, ff ff 92*5c90c05cSAndroid Build Coastguard Worker compressed = [] # [truelen, (truelenaux), falselen, (falselenaux)] 93*5c90c05cSAndroid Build Coastguard Worker 94*5c90c05cSAndroid Build Coastguard Worker prev_start = 0 95*5c90c05cSAndroid Build Coastguard Worker for start, count in normal: 96*5c90c05cSAndroid Build Coastguard Worker truelen = start - prev_start 97*5c90c05cSAndroid Build Coastguard Worker falselen = count 98*5c90c05cSAndroid Build Coastguard Worker prev_start = start + count 99*5c90c05cSAndroid Build Coastguard Worker 100*5c90c05cSAndroid Build Coastguard Worker assert truelen < 0x8000 and falselen < 0x8000 101*5c90c05cSAndroid Build Coastguard Worker entry = [] 102*5c90c05cSAndroid Build Coastguard Worker if truelen > 0x7f: 103*5c90c05cSAndroid Build Coastguard Worker entry.append(0x80 | (truelen >> 8)) 104*5c90c05cSAndroid Build Coastguard Worker entry.append(truelen & 0xff) 105*5c90c05cSAndroid Build Coastguard Worker else: 106*5c90c05cSAndroid Build Coastguard Worker entry.append(truelen & 0x7f) 107*5c90c05cSAndroid Build Coastguard Worker if falselen > 0x7f: 108*5c90c05cSAndroid Build Coastguard Worker entry.append(0x80 | (falselen >> 8)) 109*5c90c05cSAndroid Build Coastguard Worker entry.append(falselen & 0xff) 110*5c90c05cSAndroid Build Coastguard Worker else: 111*5c90c05cSAndroid Build Coastguard Worker entry.append(falselen & 0x7f) 112*5c90c05cSAndroid Build Coastguard Worker 113*5c90c05cSAndroid Build Coastguard Worker compressed.append(entry) 114*5c90c05cSAndroid Build Coastguard Worker 115*5c90c05cSAndroid Build Coastguard Worker return compressed 116*5c90c05cSAndroid Build Coastguard Worker 117*5c90c05cSAndroid Build Coastguard Workerdef print_singletons(uppers, lowers, uppersname, lowersname): 118*5c90c05cSAndroid Build Coastguard Worker print(" static constexpr singleton {}[] = {{".format(uppersname)) 119*5c90c05cSAndroid Build Coastguard Worker for u, c in uppers: 120*5c90c05cSAndroid Build Coastguard Worker print(" {{{:#04x}, {}}},".format(u, c)) 121*5c90c05cSAndroid Build Coastguard Worker print(" };") 122*5c90c05cSAndroid Build Coastguard Worker print(" static constexpr unsigned char {}[] = {{".format(lowersname)) 123*5c90c05cSAndroid Build Coastguard Worker for i in range(0, len(lowers), 8): 124*5c90c05cSAndroid Build Coastguard Worker print(" {}".format(" ".join("{:#04x},".format(l) for l in lowers[i:i+8]))) 125*5c90c05cSAndroid Build Coastguard Worker print(" };") 126*5c90c05cSAndroid Build Coastguard Worker 127*5c90c05cSAndroid Build Coastguard Workerdef print_normal(normal, normalname): 128*5c90c05cSAndroid Build Coastguard Worker print(" static constexpr unsigned char {}[] = {{".format(normalname)) 129*5c90c05cSAndroid Build Coastguard Worker for v in normal: 130*5c90c05cSAndroid Build Coastguard Worker print(" {}".format(" ".join("{:#04x},".format(i) for i in v))) 131*5c90c05cSAndroid Build Coastguard Worker print(" };") 132*5c90c05cSAndroid Build Coastguard Worker 133*5c90c05cSAndroid Build Coastguard Workerdef main(): 134*5c90c05cSAndroid Build Coastguard Worker file = get_file("https://www.unicode.org/Public/UNIDATA/UnicodeData.txt") 135*5c90c05cSAndroid Build Coastguard Worker 136*5c90c05cSAndroid Build Coastguard Worker codepoints = get_codepoints(file) 137*5c90c05cSAndroid Build Coastguard Worker 138*5c90c05cSAndroid Build Coastguard Worker CUTOFF=0x10000 139*5c90c05cSAndroid Build Coastguard Worker singletons0 = [] 140*5c90c05cSAndroid Build Coastguard Worker singletons1 = [] 141*5c90c05cSAndroid Build Coastguard Worker normal0 = [] 142*5c90c05cSAndroid Build Coastguard Worker normal1 = [] 143*5c90c05cSAndroid Build Coastguard Worker extra = [] 144*5c90c05cSAndroid Build Coastguard Worker 145*5c90c05cSAndroid Build Coastguard Worker for a, b in to_ranges(get_escaped(codepoints)): 146*5c90c05cSAndroid Build Coastguard Worker if a > 2 * CUTOFF: 147*5c90c05cSAndroid Build Coastguard Worker extra.append((a, b - a)) 148*5c90c05cSAndroid Build Coastguard Worker elif a == b - 1: 149*5c90c05cSAndroid Build Coastguard Worker if a & CUTOFF: 150*5c90c05cSAndroid Build Coastguard Worker singletons1.append(a & ~CUTOFF) 151*5c90c05cSAndroid Build Coastguard Worker else: 152*5c90c05cSAndroid Build Coastguard Worker singletons0.append(a) 153*5c90c05cSAndroid Build Coastguard Worker elif a == b - 2: 154*5c90c05cSAndroid Build Coastguard Worker if a & CUTOFF: 155*5c90c05cSAndroid Build Coastguard Worker singletons1.append(a & ~CUTOFF) 156*5c90c05cSAndroid Build Coastguard Worker singletons1.append((a + 1) & ~CUTOFF) 157*5c90c05cSAndroid Build Coastguard Worker else: 158*5c90c05cSAndroid Build Coastguard Worker singletons0.append(a) 159*5c90c05cSAndroid Build Coastguard Worker singletons0.append(a + 1) 160*5c90c05cSAndroid Build Coastguard Worker else: 161*5c90c05cSAndroid Build Coastguard Worker if a >= 2 * CUTOFF: 162*5c90c05cSAndroid Build Coastguard Worker extra.append((a, b - a)) 163*5c90c05cSAndroid Build Coastguard Worker elif a & CUTOFF: 164*5c90c05cSAndroid Build Coastguard Worker normal1.append((a & ~CUTOFF, b - a)) 165*5c90c05cSAndroid Build Coastguard Worker else: 166*5c90c05cSAndroid Build Coastguard Worker normal0.append((a, b - a)) 167*5c90c05cSAndroid Build Coastguard Worker 168*5c90c05cSAndroid Build Coastguard Worker singletons0u, singletons0l = compress_singletons(singletons0) 169*5c90c05cSAndroid Build Coastguard Worker singletons1u, singletons1l = compress_singletons(singletons1) 170*5c90c05cSAndroid Build Coastguard Worker normal0 = compress_normal(normal0) 171*5c90c05cSAndroid Build Coastguard Worker normal1 = compress_normal(normal1) 172*5c90c05cSAndroid Build Coastguard Worker 173*5c90c05cSAndroid Build Coastguard Worker print("""\ 174*5c90c05cSAndroid Build Coastguard WorkerFMT_FUNC auto is_printable(uint32_t cp) -> bool {\ 175*5c90c05cSAndroid Build Coastguard Worker""") 176*5c90c05cSAndroid Build Coastguard Worker print_singletons(singletons0u, singletons0l, 'singletons0', 'singletons0_lower') 177*5c90c05cSAndroid Build Coastguard Worker print_singletons(singletons1u, singletons1l, 'singletons1', 'singletons1_lower') 178*5c90c05cSAndroid Build Coastguard Worker print_normal(normal0, 'normal0') 179*5c90c05cSAndroid Build Coastguard Worker print_normal(normal1, 'normal1') 180*5c90c05cSAndroid Build Coastguard Worker print("""\ 181*5c90c05cSAndroid Build Coastguard Worker auto lower = static_cast<uint16_t>(cp); 182*5c90c05cSAndroid Build Coastguard Worker if (cp < 0x10000) { 183*5c90c05cSAndroid Build Coastguard Worker return is_printable(lower, singletons0, 184*5c90c05cSAndroid Build Coastguard Worker sizeof(singletons0) / sizeof(*singletons0), 185*5c90c05cSAndroid Build Coastguard Worker singletons0_lower, normal0, sizeof(normal0)); 186*5c90c05cSAndroid Build Coastguard Worker } 187*5c90c05cSAndroid Build Coastguard Worker if (cp < 0x20000) { 188*5c90c05cSAndroid Build Coastguard Worker return is_printable(lower, singletons1, 189*5c90c05cSAndroid Build Coastguard Worker sizeof(singletons1) / sizeof(*singletons1), 190*5c90c05cSAndroid Build Coastguard Worker singletons1_lower, normal1, sizeof(normal1)); 191*5c90c05cSAndroid Build Coastguard Worker }\ 192*5c90c05cSAndroid Build Coastguard Worker""") 193*5c90c05cSAndroid Build Coastguard Worker for a, b in extra: 194*5c90c05cSAndroid Build Coastguard Worker print(" if (0x{:x} <= cp && cp < 0x{:x}) return false;".format(a, a + b)) 195*5c90c05cSAndroid Build Coastguard Worker print("""\ 196*5c90c05cSAndroid Build Coastguard Worker return cp < 0x{:x}; 197*5c90c05cSAndroid Build Coastguard Worker}}\ 198*5c90c05cSAndroid Build Coastguard Worker""".format(NUM_CODEPOINTS)) 199*5c90c05cSAndroid Build Coastguard Worker 200*5c90c05cSAndroid Build Coastguard Workerif __name__ == '__main__': 201*5c90c05cSAndroid Build Coastguard Worker main() 202