xref: /aosp_15_r20/external/fmtlib/support/printable.py (revision 5c90c05cd622c0a81b57953a4d343e0e489f2e08)
1*5c90c05cSAndroid Build Coastguard Worker#!/usr/bin/env python3
2*5c90c05cSAndroid Build Coastguard Worker
3*5c90c05cSAndroid Build Coastguard Worker# This script is based on
4*5c90c05cSAndroid Build Coastguard Worker# https://github.com/rust-lang/rust/blob/master/library/core/src/unicode/printable.py
5*5c90c05cSAndroid Build Coastguard Worker# distributed under https://github.com/rust-lang/rust/blob/master/LICENSE-MIT.
6*5c90c05cSAndroid Build Coastguard Worker
7*5c90c05cSAndroid Build Coastguard Worker# This script uses the following Unicode tables:
8*5c90c05cSAndroid Build Coastguard Worker# - UnicodeData.txt
9*5c90c05cSAndroid Build Coastguard Worker
10*5c90c05cSAndroid Build Coastguard Worker
11*5c90c05cSAndroid Build Coastguard Workerfrom collections import namedtuple
12*5c90c05cSAndroid Build Coastguard Workerimport csv
13*5c90c05cSAndroid Build Coastguard Workerimport os
14*5c90c05cSAndroid Build Coastguard Workerimport subprocess
15*5c90c05cSAndroid Build Coastguard Worker
16*5c90c05cSAndroid Build Coastguard WorkerNUM_CODEPOINTS=0x110000
17*5c90c05cSAndroid Build Coastguard Worker
18*5c90c05cSAndroid Build Coastguard Workerdef to_ranges(iter):
19*5c90c05cSAndroid Build Coastguard Worker    current = None
20*5c90c05cSAndroid Build Coastguard Worker    for i in iter:
21*5c90c05cSAndroid Build Coastguard Worker        if current is None or i != current[1] or i in (0x10000, 0x20000):
22*5c90c05cSAndroid Build Coastguard Worker            if current is not None:
23*5c90c05cSAndroid Build Coastguard Worker                yield tuple(current)
24*5c90c05cSAndroid Build Coastguard Worker            current = [i, i + 1]
25*5c90c05cSAndroid Build Coastguard Worker        else:
26*5c90c05cSAndroid Build Coastguard Worker            current[1] += 1
27*5c90c05cSAndroid Build Coastguard Worker    if current is not None:
28*5c90c05cSAndroid Build Coastguard Worker        yield tuple(current)
29*5c90c05cSAndroid Build Coastguard Worker
30*5c90c05cSAndroid Build Coastguard Workerdef get_escaped(codepoints):
31*5c90c05cSAndroid Build Coastguard Worker    for c in codepoints:
32*5c90c05cSAndroid Build Coastguard Worker        if (c.class_ or "Cn") in "Cc Cf Cs Co Cn Zl Zp Zs".split() and c.value != ord(' '):
33*5c90c05cSAndroid Build Coastguard Worker            yield c.value
34*5c90c05cSAndroid Build Coastguard Worker
35*5c90c05cSAndroid Build Coastguard Workerdef get_file(f):
36*5c90c05cSAndroid Build Coastguard Worker    try:
37*5c90c05cSAndroid Build Coastguard Worker        return open(os.path.basename(f))
38*5c90c05cSAndroid Build Coastguard Worker    except FileNotFoundError:
39*5c90c05cSAndroid Build Coastguard Worker        subprocess.run(["curl", "-O", f], check=True)
40*5c90c05cSAndroid Build Coastguard Worker        return open(os.path.basename(f))
41*5c90c05cSAndroid Build Coastguard Worker
42*5c90c05cSAndroid Build Coastguard WorkerCodepoint = namedtuple('Codepoint', 'value class_')
43*5c90c05cSAndroid Build Coastguard Worker
44*5c90c05cSAndroid Build Coastguard Workerdef get_codepoints(f):
45*5c90c05cSAndroid Build Coastguard Worker    r = csv.reader(f, delimiter=";")
46*5c90c05cSAndroid Build Coastguard Worker    prev_codepoint = 0
47*5c90c05cSAndroid Build Coastguard Worker    class_first = None
48*5c90c05cSAndroid Build Coastguard Worker    for row in r:
49*5c90c05cSAndroid Build Coastguard Worker        codepoint = int(row[0], 16)
50*5c90c05cSAndroid Build Coastguard Worker        name = row[1]
51*5c90c05cSAndroid Build Coastguard Worker        class_ = row[2]
52*5c90c05cSAndroid Build Coastguard Worker
53*5c90c05cSAndroid Build Coastguard Worker        if class_first is not None:
54*5c90c05cSAndroid Build Coastguard Worker            if not name.endswith("Last>"):
55*5c90c05cSAndroid Build Coastguard Worker                raise ValueError("Missing Last after First")
56*5c90c05cSAndroid Build Coastguard Worker
57*5c90c05cSAndroid Build Coastguard Worker        for c in range(prev_codepoint + 1, codepoint):
58*5c90c05cSAndroid Build Coastguard Worker            yield Codepoint(c, class_first)
59*5c90c05cSAndroid Build Coastguard Worker
60*5c90c05cSAndroid Build Coastguard Worker        class_first = None
61*5c90c05cSAndroid Build Coastguard Worker        if name.endswith("First>"):
62*5c90c05cSAndroid Build Coastguard Worker            class_first = class_
63*5c90c05cSAndroid Build Coastguard Worker
64*5c90c05cSAndroid Build Coastguard Worker        yield Codepoint(codepoint, class_)
65*5c90c05cSAndroid Build Coastguard Worker        prev_codepoint = codepoint
66*5c90c05cSAndroid Build Coastguard Worker
67*5c90c05cSAndroid Build Coastguard Worker    if class_first is not None:
68*5c90c05cSAndroid Build Coastguard Worker        raise ValueError("Missing Last after First")
69*5c90c05cSAndroid Build Coastguard Worker
70*5c90c05cSAndroid Build Coastguard Worker    for c in range(prev_codepoint + 1, NUM_CODEPOINTS):
71*5c90c05cSAndroid Build Coastguard Worker        yield Codepoint(c, None)
72*5c90c05cSAndroid Build Coastguard Worker
73*5c90c05cSAndroid Build Coastguard Workerdef compress_singletons(singletons):
74*5c90c05cSAndroid Build Coastguard Worker    uppers = [] # (upper, # items in lowers)
75*5c90c05cSAndroid Build Coastguard Worker    lowers = []
76*5c90c05cSAndroid Build Coastguard Worker
77*5c90c05cSAndroid Build Coastguard Worker    for i in singletons:
78*5c90c05cSAndroid Build Coastguard Worker        upper = i >> 8
79*5c90c05cSAndroid Build Coastguard Worker        lower = i & 0xff
80*5c90c05cSAndroid Build Coastguard Worker        if len(uppers) == 0 or uppers[-1][0] != upper:
81*5c90c05cSAndroid Build Coastguard Worker            uppers.append((upper, 1))
82*5c90c05cSAndroid Build Coastguard Worker        else:
83*5c90c05cSAndroid Build Coastguard Worker            upper, count = uppers[-1]
84*5c90c05cSAndroid Build Coastguard Worker            uppers[-1] = upper, count + 1
85*5c90c05cSAndroid Build Coastguard Worker        lowers.append(lower)
86*5c90c05cSAndroid Build Coastguard Worker
87*5c90c05cSAndroid Build Coastguard Worker    return uppers, lowers
88*5c90c05cSAndroid Build Coastguard Worker
89*5c90c05cSAndroid Build Coastguard Workerdef compress_normal(normal):
90*5c90c05cSAndroid Build Coastguard Worker    # lengths 0x00..0x7f are encoded as 00, 01, ..., 7e, 7f
91*5c90c05cSAndroid Build Coastguard Worker    # lengths 0x80..0x7fff are encoded as 80 80, 80 81, ..., ff fe, ff ff
92*5c90c05cSAndroid Build Coastguard Worker    compressed = [] # [truelen, (truelenaux), falselen, (falselenaux)]
93*5c90c05cSAndroid Build Coastguard Worker
94*5c90c05cSAndroid Build Coastguard Worker    prev_start = 0
95*5c90c05cSAndroid Build Coastguard Worker    for start, count in normal:
96*5c90c05cSAndroid Build Coastguard Worker        truelen = start - prev_start
97*5c90c05cSAndroid Build Coastguard Worker        falselen = count
98*5c90c05cSAndroid Build Coastguard Worker        prev_start = start + count
99*5c90c05cSAndroid Build Coastguard Worker
100*5c90c05cSAndroid Build Coastguard Worker        assert truelen < 0x8000 and falselen < 0x8000
101*5c90c05cSAndroid Build Coastguard Worker        entry = []
102*5c90c05cSAndroid Build Coastguard Worker        if truelen > 0x7f:
103*5c90c05cSAndroid Build Coastguard Worker            entry.append(0x80 | (truelen >> 8))
104*5c90c05cSAndroid Build Coastguard Worker            entry.append(truelen & 0xff)
105*5c90c05cSAndroid Build Coastguard Worker        else:
106*5c90c05cSAndroid Build Coastguard Worker            entry.append(truelen & 0x7f)
107*5c90c05cSAndroid Build Coastguard Worker        if falselen > 0x7f:
108*5c90c05cSAndroid Build Coastguard Worker            entry.append(0x80 | (falselen >> 8))
109*5c90c05cSAndroid Build Coastguard Worker            entry.append(falselen & 0xff)
110*5c90c05cSAndroid Build Coastguard Worker        else:
111*5c90c05cSAndroid Build Coastguard Worker            entry.append(falselen & 0x7f)
112*5c90c05cSAndroid Build Coastguard Worker
113*5c90c05cSAndroid Build Coastguard Worker        compressed.append(entry)
114*5c90c05cSAndroid Build Coastguard Worker
115*5c90c05cSAndroid Build Coastguard Worker    return compressed
116*5c90c05cSAndroid Build Coastguard Worker
117*5c90c05cSAndroid Build Coastguard Workerdef print_singletons(uppers, lowers, uppersname, lowersname):
118*5c90c05cSAndroid Build Coastguard Worker    print("  static constexpr singleton {}[] = {{".format(uppersname))
119*5c90c05cSAndroid Build Coastguard Worker    for u, c in uppers:
120*5c90c05cSAndroid Build Coastguard Worker        print("    {{{:#04x}, {}}},".format(u, c))
121*5c90c05cSAndroid Build Coastguard Worker    print("  };")
122*5c90c05cSAndroid Build Coastguard Worker    print("  static constexpr unsigned char {}[] = {{".format(lowersname))
123*5c90c05cSAndroid Build Coastguard Worker    for i in range(0, len(lowers), 8):
124*5c90c05cSAndroid Build Coastguard Worker        print("    {}".format(" ".join("{:#04x},".format(l) for l in lowers[i:i+8])))
125*5c90c05cSAndroid Build Coastguard Worker    print("  };")
126*5c90c05cSAndroid Build Coastguard Worker
127*5c90c05cSAndroid Build Coastguard Workerdef print_normal(normal, normalname):
128*5c90c05cSAndroid Build Coastguard Worker    print("  static constexpr unsigned char {}[] = {{".format(normalname))
129*5c90c05cSAndroid Build Coastguard Worker    for v in normal:
130*5c90c05cSAndroid Build Coastguard Worker        print("    {}".format(" ".join("{:#04x},".format(i) for i in v)))
131*5c90c05cSAndroid Build Coastguard Worker    print("  };")
132*5c90c05cSAndroid Build Coastguard Worker
133*5c90c05cSAndroid Build Coastguard Workerdef main():
134*5c90c05cSAndroid Build Coastguard Worker    file = get_file("https://www.unicode.org/Public/UNIDATA/UnicodeData.txt")
135*5c90c05cSAndroid Build Coastguard Worker
136*5c90c05cSAndroid Build Coastguard Worker    codepoints = get_codepoints(file)
137*5c90c05cSAndroid Build Coastguard Worker
138*5c90c05cSAndroid Build Coastguard Worker    CUTOFF=0x10000
139*5c90c05cSAndroid Build Coastguard Worker    singletons0 = []
140*5c90c05cSAndroid Build Coastguard Worker    singletons1 = []
141*5c90c05cSAndroid Build Coastguard Worker    normal0 = []
142*5c90c05cSAndroid Build Coastguard Worker    normal1 = []
143*5c90c05cSAndroid Build Coastguard Worker    extra = []
144*5c90c05cSAndroid Build Coastguard Worker
145*5c90c05cSAndroid Build Coastguard Worker    for a, b in to_ranges(get_escaped(codepoints)):
146*5c90c05cSAndroid Build Coastguard Worker        if a > 2 * CUTOFF:
147*5c90c05cSAndroid Build Coastguard Worker            extra.append((a, b - a))
148*5c90c05cSAndroid Build Coastguard Worker        elif a == b - 1:
149*5c90c05cSAndroid Build Coastguard Worker            if a & CUTOFF:
150*5c90c05cSAndroid Build Coastguard Worker                singletons1.append(a & ~CUTOFF)
151*5c90c05cSAndroid Build Coastguard Worker            else:
152*5c90c05cSAndroid Build Coastguard Worker                singletons0.append(a)
153*5c90c05cSAndroid Build Coastguard Worker        elif a == b - 2:
154*5c90c05cSAndroid Build Coastguard Worker            if a & CUTOFF:
155*5c90c05cSAndroid Build Coastguard Worker                singletons1.append(a & ~CUTOFF)
156*5c90c05cSAndroid Build Coastguard Worker                singletons1.append((a + 1) & ~CUTOFF)
157*5c90c05cSAndroid Build Coastguard Worker            else:
158*5c90c05cSAndroid Build Coastguard Worker                singletons0.append(a)
159*5c90c05cSAndroid Build Coastguard Worker                singletons0.append(a + 1)
160*5c90c05cSAndroid Build Coastguard Worker        else:
161*5c90c05cSAndroid Build Coastguard Worker            if a >= 2 * CUTOFF:
162*5c90c05cSAndroid Build Coastguard Worker                extra.append((a, b - a))
163*5c90c05cSAndroid Build Coastguard Worker            elif a & CUTOFF:
164*5c90c05cSAndroid Build Coastguard Worker                normal1.append((a & ~CUTOFF, b - a))
165*5c90c05cSAndroid Build Coastguard Worker            else:
166*5c90c05cSAndroid Build Coastguard Worker                normal0.append((a, b - a))
167*5c90c05cSAndroid Build Coastguard Worker
168*5c90c05cSAndroid Build Coastguard Worker    singletons0u, singletons0l = compress_singletons(singletons0)
169*5c90c05cSAndroid Build Coastguard Worker    singletons1u, singletons1l = compress_singletons(singletons1)
170*5c90c05cSAndroid Build Coastguard Worker    normal0 = compress_normal(normal0)
171*5c90c05cSAndroid Build Coastguard Worker    normal1 = compress_normal(normal1)
172*5c90c05cSAndroid Build Coastguard Worker
173*5c90c05cSAndroid Build Coastguard Worker    print("""\
174*5c90c05cSAndroid Build Coastguard WorkerFMT_FUNC auto is_printable(uint32_t cp) -> bool {\
175*5c90c05cSAndroid Build Coastguard Worker""")
176*5c90c05cSAndroid Build Coastguard Worker    print_singletons(singletons0u, singletons0l, 'singletons0', 'singletons0_lower')
177*5c90c05cSAndroid Build Coastguard Worker    print_singletons(singletons1u, singletons1l, 'singletons1', 'singletons1_lower')
178*5c90c05cSAndroid Build Coastguard Worker    print_normal(normal0, 'normal0')
179*5c90c05cSAndroid Build Coastguard Worker    print_normal(normal1, 'normal1')
180*5c90c05cSAndroid Build Coastguard Worker    print("""\
181*5c90c05cSAndroid Build Coastguard Worker  auto lower = static_cast<uint16_t>(cp);
182*5c90c05cSAndroid Build Coastguard Worker  if (cp < 0x10000) {
183*5c90c05cSAndroid Build Coastguard Worker    return is_printable(lower, singletons0,
184*5c90c05cSAndroid Build Coastguard Worker                        sizeof(singletons0) / sizeof(*singletons0),
185*5c90c05cSAndroid Build Coastguard Worker                        singletons0_lower, normal0, sizeof(normal0));
186*5c90c05cSAndroid Build Coastguard Worker  }
187*5c90c05cSAndroid Build Coastguard Worker  if (cp < 0x20000) {
188*5c90c05cSAndroid Build Coastguard Worker    return is_printable(lower, singletons1,
189*5c90c05cSAndroid Build Coastguard Worker                        sizeof(singletons1) / sizeof(*singletons1),
190*5c90c05cSAndroid Build Coastguard Worker                        singletons1_lower, normal1, sizeof(normal1));
191*5c90c05cSAndroid Build Coastguard Worker  }\
192*5c90c05cSAndroid Build Coastguard Worker""")
193*5c90c05cSAndroid Build Coastguard Worker    for a, b in extra:
194*5c90c05cSAndroid Build Coastguard Worker        print("  if (0x{:x} <= cp && cp < 0x{:x}) return false;".format(a, a + b))
195*5c90c05cSAndroid Build Coastguard Worker    print("""\
196*5c90c05cSAndroid Build Coastguard Worker  return cp < 0x{:x};
197*5c90c05cSAndroid Build Coastguard Worker}}\
198*5c90c05cSAndroid Build Coastguard Worker""".format(NUM_CODEPOINTS))
199*5c90c05cSAndroid Build Coastguard Worker
200*5c90c05cSAndroid Build Coastguard Workerif __name__ == '__main__':
201*5c90c05cSAndroid Build Coastguard Worker    main()
202