1*2d1272b8SAndroid Build Coastguard Worker#!/usr/bin/env python3 2*2d1272b8SAndroid Build Coastguard Worker# flake8: noqa: F821 3*2d1272b8SAndroid Build Coastguard Worker 4*2d1272b8SAndroid Build Coastguard Worker"""usage: ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt ArabicShaping.txt DerivedCoreProperties.txt UnicodeData.txt Blocks.txt Scripts.txt IndicSyllabicCategory-Additional.txt IndicPositionalCategory-Additional.txt 5*2d1272b8SAndroid Build Coastguard Worker 6*2d1272b8SAndroid Build Coastguard WorkerInput files: 7*2d1272b8SAndroid Build Coastguard Worker* https://unicode.org/Public/UCD/latest/ucd/IndicSyllabicCategory.txt 8*2d1272b8SAndroid Build Coastguard Worker* https://unicode.org/Public/UCD/latest/ucd/IndicPositionalCategory.txt 9*2d1272b8SAndroid Build Coastguard Worker* https://unicode.org/Public/UCD/latest/ucd/ArabicShaping.txt 10*2d1272b8SAndroid Build Coastguard Worker* https://unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt 11*2d1272b8SAndroid Build Coastguard Worker* https://unicode.org/Public/UCD/latest/ucd/UnicodeData.txt 12*2d1272b8SAndroid Build Coastguard Worker* https://unicode.org/Public/UCD/latest/ucd/Blocks.txt 13*2d1272b8SAndroid Build Coastguard Worker* https://unicode.org/Public/UCD/latest/ucd/Scripts.txt 14*2d1272b8SAndroid Build Coastguard Worker* ms-use/IndicSyllabicCategory-Additional.txt 15*2d1272b8SAndroid Build Coastguard Worker* ms-use/IndicPositionalCategory-Additional.txt 16*2d1272b8SAndroid Build Coastguard Worker""" 17*2d1272b8SAndroid Build Coastguard Worker 18*2d1272b8SAndroid Build Coastguard Workerimport logging 19*2d1272b8SAndroid Build Coastguard Workerlogging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) 20*2d1272b8SAndroid Build Coastguard Worker 21*2d1272b8SAndroid Build Coastguard Worker 22*2d1272b8SAndroid Build Coastguard Workerimport sys 23*2d1272b8SAndroid Build Coastguard Worker 24*2d1272b8SAndroid Build Coastguard Workerif len (sys.argv) != 10: 25*2d1272b8SAndroid Build Coastguard Worker sys.exit (__doc__) 26*2d1272b8SAndroid Build Coastguard Worker 27*2d1272b8SAndroid Build Coastguard WorkerDISABLED_SCRIPTS = { 28*2d1272b8SAndroid Build Coastguard Worker 'Arabic', 29*2d1272b8SAndroid Build Coastguard Worker 'Lao', 30*2d1272b8SAndroid Build Coastguard Worker 'Samaritan', 31*2d1272b8SAndroid Build Coastguard Worker 'Syriac', 32*2d1272b8SAndroid Build Coastguard Worker 'Thai', 33*2d1272b8SAndroid Build Coastguard Worker} 34*2d1272b8SAndroid Build Coastguard Worker 35*2d1272b8SAndroid Build Coastguard Workerfiles = [open (x, encoding='utf-8') for x in sys.argv[1:]] 36*2d1272b8SAndroid Build Coastguard Worker 37*2d1272b8SAndroid Build Coastguard Workerheaders = [[f.readline () for i in range (2)] for j,f in enumerate(files) if j != 4] 38*2d1272b8SAndroid Build Coastguard Workerfor j in range(7, 9): 39*2d1272b8SAndroid Build Coastguard Worker for line in files[j]: 40*2d1272b8SAndroid Build Coastguard Worker line = line.rstrip() 41*2d1272b8SAndroid Build Coastguard Worker if not line: 42*2d1272b8SAndroid Build Coastguard Worker break 43*2d1272b8SAndroid Build Coastguard Worker headers[j - 1].append(line) 44*2d1272b8SAndroid Build Coastguard Workerheaders.append (["UnicodeData.txt does not have a header."]) 45*2d1272b8SAndroid Build Coastguard Worker 46*2d1272b8SAndroid Build Coastguard Workerunicode_data = [{} for _ in files] 47*2d1272b8SAndroid Build Coastguard Workervalues = [{} for _ in files] 48*2d1272b8SAndroid Build Coastguard Workerfor i, f in enumerate (files): 49*2d1272b8SAndroid Build Coastguard Worker for line in f: 50*2d1272b8SAndroid Build Coastguard Worker 51*2d1272b8SAndroid Build Coastguard Worker j = line.find ('#') 52*2d1272b8SAndroid Build Coastguard Worker if j >= 0: 53*2d1272b8SAndroid Build Coastguard Worker line = line[:j] 54*2d1272b8SAndroid Build Coastguard Worker 55*2d1272b8SAndroid Build Coastguard Worker fields = [x.strip () for x in line.split (';')] 56*2d1272b8SAndroid Build Coastguard Worker if len (fields) == 1: 57*2d1272b8SAndroid Build Coastguard Worker continue 58*2d1272b8SAndroid Build Coastguard Worker 59*2d1272b8SAndroid Build Coastguard Worker uu = fields[0].split ('..') 60*2d1272b8SAndroid Build Coastguard Worker start = int (uu[0], 16) 61*2d1272b8SAndroid Build Coastguard Worker if len (uu) == 1: 62*2d1272b8SAndroid Build Coastguard Worker end = start 63*2d1272b8SAndroid Build Coastguard Worker else: 64*2d1272b8SAndroid Build Coastguard Worker end = int (uu[1], 16) 65*2d1272b8SAndroid Build Coastguard Worker 66*2d1272b8SAndroid Build Coastguard Worker t = fields[1 if i not in [2, 4] else 2] 67*2d1272b8SAndroid Build Coastguard Worker 68*2d1272b8SAndroid Build Coastguard Worker if i == 2: 69*2d1272b8SAndroid Build Coastguard Worker t = 'jt_' + t 70*2d1272b8SAndroid Build Coastguard Worker elif i == 3 and t != 'Default_Ignorable_Code_Point': 71*2d1272b8SAndroid Build Coastguard Worker continue 72*2d1272b8SAndroid Build Coastguard Worker elif i == 7 and t == 'Consonant_Final_Modifier': 73*2d1272b8SAndroid Build Coastguard Worker # TODO: https://github.com/MicrosoftDocs/typography-issues/issues/336 74*2d1272b8SAndroid Build Coastguard Worker t = 'Syllable_Modifier' 75*2d1272b8SAndroid Build Coastguard Worker elif i == 8 and t == 'NA': 76*2d1272b8SAndroid Build Coastguard Worker t = 'Not_Applicable' 77*2d1272b8SAndroid Build Coastguard Worker 78*2d1272b8SAndroid Build Coastguard Worker i0 = i if i < 7 else i - 7 79*2d1272b8SAndroid Build Coastguard Worker for u in range (start, end + 1): 80*2d1272b8SAndroid Build Coastguard Worker unicode_data[i0][u] = t 81*2d1272b8SAndroid Build Coastguard Worker values[i0][t] = values[i0].get (t, 0) + end - start + 1 82*2d1272b8SAndroid Build Coastguard Worker 83*2d1272b8SAndroid Build Coastguard Workerdefaults = ('Other', 'Not_Applicable', 'jt_X', '', 'Cn', 'No_Block', 'Unknown') 84*2d1272b8SAndroid Build Coastguard Worker 85*2d1272b8SAndroid Build Coastguard Worker# Merge data into one dict: 86*2d1272b8SAndroid Build Coastguard Workerfor i,v in enumerate (defaults): 87*2d1272b8SAndroid Build Coastguard Worker values[i][v] = values[i].get (v, 0) + 1 88*2d1272b8SAndroid Build Coastguard Workercombined = {} 89*2d1272b8SAndroid Build Coastguard Workerfor i,d in enumerate (unicode_data): 90*2d1272b8SAndroid Build Coastguard Worker for u,v in d.items (): 91*2d1272b8SAndroid Build Coastguard Worker if not u in combined: 92*2d1272b8SAndroid Build Coastguard Worker if i >= 4: 93*2d1272b8SAndroid Build Coastguard Worker continue 94*2d1272b8SAndroid Build Coastguard Worker combined[u] = list (defaults) 95*2d1272b8SAndroid Build Coastguard Worker combined[u][i] = v 96*2d1272b8SAndroid Build Coastguard Workercombined = {k: v for k, v in combined.items() if v[6] not in DISABLED_SCRIPTS} 97*2d1272b8SAndroid Build Coastguard Worker 98*2d1272b8SAndroid Build Coastguard Worker 99*2d1272b8SAndroid Build Coastguard Workerproperty_names = [ 100*2d1272b8SAndroid Build Coastguard Worker # General_Category 101*2d1272b8SAndroid Build Coastguard Worker 'Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu', 'Mc', 102*2d1272b8SAndroid Build Coastguard Worker 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po', 103*2d1272b8SAndroid Build Coastguard Worker 'Ps', 'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs', 104*2d1272b8SAndroid Build Coastguard Worker # Indic_Syllabic_Category 105*2d1272b8SAndroid Build Coastguard Worker 'Other', 106*2d1272b8SAndroid Build Coastguard Worker 'Bindu', 107*2d1272b8SAndroid Build Coastguard Worker 'Visarga', 108*2d1272b8SAndroid Build Coastguard Worker 'Avagraha', 109*2d1272b8SAndroid Build Coastguard Worker 'Nukta', 110*2d1272b8SAndroid Build Coastguard Worker 'Virama', 111*2d1272b8SAndroid Build Coastguard Worker 'Pure_Killer', 112*2d1272b8SAndroid Build Coastguard Worker 'Reordering_Killer', 113*2d1272b8SAndroid Build Coastguard Worker 'Invisible_Stacker', 114*2d1272b8SAndroid Build Coastguard Worker 'Vowel_Independent', 115*2d1272b8SAndroid Build Coastguard Worker 'Vowel_Dependent', 116*2d1272b8SAndroid Build Coastguard Worker 'Vowel', 117*2d1272b8SAndroid Build Coastguard Worker 'Consonant_Placeholder', 118*2d1272b8SAndroid Build Coastguard Worker 'Consonant', 119*2d1272b8SAndroid Build Coastguard Worker 'Consonant_Dead', 120*2d1272b8SAndroid Build Coastguard Worker 'Consonant_With_Stacker', 121*2d1272b8SAndroid Build Coastguard Worker 'Consonant_Prefixed', 122*2d1272b8SAndroid Build Coastguard Worker 'Consonant_Preceding_Repha', 123*2d1272b8SAndroid Build Coastguard Worker 'Consonant_Succeeding_Repha', 124*2d1272b8SAndroid Build Coastguard Worker 'Consonant_Subjoined', 125*2d1272b8SAndroid Build Coastguard Worker 'Consonant_Medial', 126*2d1272b8SAndroid Build Coastguard Worker 'Consonant_Final', 127*2d1272b8SAndroid Build Coastguard Worker 'Consonant_Head_Letter', 128*2d1272b8SAndroid Build Coastguard Worker 'Consonant_Initial_Postfixed', 129*2d1272b8SAndroid Build Coastguard Worker 'Modifying_Letter', 130*2d1272b8SAndroid Build Coastguard Worker 'Tone_Letter', 131*2d1272b8SAndroid Build Coastguard Worker 'Tone_Mark', 132*2d1272b8SAndroid Build Coastguard Worker 'Gemination_Mark', 133*2d1272b8SAndroid Build Coastguard Worker 'Cantillation_Mark', 134*2d1272b8SAndroid Build Coastguard Worker 'Register_Shifter', 135*2d1272b8SAndroid Build Coastguard Worker 'Syllable_Modifier', 136*2d1272b8SAndroid Build Coastguard Worker 'Consonant_Killer', 137*2d1272b8SAndroid Build Coastguard Worker 'Non_Joiner', 138*2d1272b8SAndroid Build Coastguard Worker 'Joiner', 139*2d1272b8SAndroid Build Coastguard Worker 'Number_Joiner', 140*2d1272b8SAndroid Build Coastguard Worker 'Number', 141*2d1272b8SAndroid Build Coastguard Worker 'Brahmi_Joining_Number', 142*2d1272b8SAndroid Build Coastguard Worker 'Symbol_Modifier', 143*2d1272b8SAndroid Build Coastguard Worker 'Hieroglyph', 144*2d1272b8SAndroid Build Coastguard Worker 'Hieroglyph_Joiner', 145*2d1272b8SAndroid Build Coastguard Worker 'Hieroglyph_Mark_Begin', 146*2d1272b8SAndroid Build Coastguard Worker 'Hieroglyph_Mark_End', 147*2d1272b8SAndroid Build Coastguard Worker 'Hieroglyph_Mirror', 148*2d1272b8SAndroid Build Coastguard Worker 'Hieroglyph_Modifier', 149*2d1272b8SAndroid Build Coastguard Worker 'Hieroglyph_Segment_Begin', 150*2d1272b8SAndroid Build Coastguard Worker 'Hieroglyph_Segment_End', 151*2d1272b8SAndroid Build Coastguard Worker # Indic_Positional_Category 152*2d1272b8SAndroid Build Coastguard Worker 'Not_Applicable', 153*2d1272b8SAndroid Build Coastguard Worker 'Right', 154*2d1272b8SAndroid Build Coastguard Worker 'Left', 155*2d1272b8SAndroid Build Coastguard Worker 'Visual_Order_Left', 156*2d1272b8SAndroid Build Coastguard Worker 'Left_And_Right', 157*2d1272b8SAndroid Build Coastguard Worker 'Top', 158*2d1272b8SAndroid Build Coastguard Worker 'Bottom', 159*2d1272b8SAndroid Build Coastguard Worker 'Top_And_Bottom', 160*2d1272b8SAndroid Build Coastguard Worker 'Top_And_Bottom_And_Left', 161*2d1272b8SAndroid Build Coastguard Worker 'Top_And_Right', 162*2d1272b8SAndroid Build Coastguard Worker 'Top_And_Left', 163*2d1272b8SAndroid Build Coastguard Worker 'Top_And_Left_And_Right', 164*2d1272b8SAndroid Build Coastguard Worker 'Bottom_And_Left', 165*2d1272b8SAndroid Build Coastguard Worker 'Bottom_And_Right', 166*2d1272b8SAndroid Build Coastguard Worker 'Top_And_Bottom_And_Right', 167*2d1272b8SAndroid Build Coastguard Worker 'Overstruck', 168*2d1272b8SAndroid Build Coastguard Worker # Joining_Type 169*2d1272b8SAndroid Build Coastguard Worker 'jt_C', 170*2d1272b8SAndroid Build Coastguard Worker 'jt_D', 171*2d1272b8SAndroid Build Coastguard Worker 'jt_L', 172*2d1272b8SAndroid Build Coastguard Worker 'jt_R', 173*2d1272b8SAndroid Build Coastguard Worker 'jt_T', 174*2d1272b8SAndroid Build Coastguard Worker 'jt_U', 175*2d1272b8SAndroid Build Coastguard Worker 'jt_X', 176*2d1272b8SAndroid Build Coastguard Worker] 177*2d1272b8SAndroid Build Coastguard Worker 178*2d1272b8SAndroid Build Coastguard Workerclass PropertyValue(object): 179*2d1272b8SAndroid Build Coastguard Worker def __init__(self, name_): 180*2d1272b8SAndroid Build Coastguard Worker self.name = name_ 181*2d1272b8SAndroid Build Coastguard Worker def __str__(self): 182*2d1272b8SAndroid Build Coastguard Worker return self.name 183*2d1272b8SAndroid Build Coastguard Worker def __eq__(self, other): 184*2d1272b8SAndroid Build Coastguard Worker return self.name == (other if isinstance(other, str) else other.name) 185*2d1272b8SAndroid Build Coastguard Worker def __ne__(self, other): 186*2d1272b8SAndroid Build Coastguard Worker return not (self == other) 187*2d1272b8SAndroid Build Coastguard Worker def __hash__(self): 188*2d1272b8SAndroid Build Coastguard Worker return hash(str(self)) 189*2d1272b8SAndroid Build Coastguard Worker 190*2d1272b8SAndroid Build Coastguard Workerproperty_values = {} 191*2d1272b8SAndroid Build Coastguard Worker 192*2d1272b8SAndroid Build Coastguard Workerfor name in property_names: 193*2d1272b8SAndroid Build Coastguard Worker value = PropertyValue(name) 194*2d1272b8SAndroid Build Coastguard Worker assert value not in property_values 195*2d1272b8SAndroid Build Coastguard Worker assert value not in globals() 196*2d1272b8SAndroid Build Coastguard Worker property_values[name] = value 197*2d1272b8SAndroid Build Coastguard Workerglobals().update(property_values) 198*2d1272b8SAndroid Build Coastguard Worker 199*2d1272b8SAndroid Build Coastguard Worker 200*2d1272b8SAndroid Build Coastguard Workerdef is_BASE(U, UISC, UDI, UGC, AJT): 201*2d1272b8SAndroid Build Coastguard Worker return (UISC in [Number, Consonant, Consonant_Head_Letter, 202*2d1272b8SAndroid Build Coastguard Worker Tone_Letter, 203*2d1272b8SAndroid Build Coastguard Worker Vowel_Independent, 204*2d1272b8SAndroid Build Coastguard Worker ] or 205*2d1272b8SAndroid Build Coastguard Worker # TODO: https://github.com/MicrosoftDocs/typography-issues/issues/484 206*2d1272b8SAndroid Build Coastguard Worker AJT in [jt_C, jt_D, jt_L, jt_R] and UISC != Joiner or 207*2d1272b8SAndroid Build Coastguard Worker (UGC == Lo and UISC in [Avagraha, Bindu, Consonant_Final, Consonant_Medial, 208*2d1272b8SAndroid Build Coastguard Worker Consonant_Subjoined, Vowel, Vowel_Dependent])) 209*2d1272b8SAndroid Build Coastguard Workerdef is_BASE_NUM(U, UISC, UDI, UGC, AJT): 210*2d1272b8SAndroid Build Coastguard Worker return UISC == Brahmi_Joining_Number 211*2d1272b8SAndroid Build Coastguard Workerdef is_BASE_OTHER(U, UISC, UDI, UGC, AJT): 212*2d1272b8SAndroid Build Coastguard Worker if UISC == Consonant_Placeholder: return True 213*2d1272b8SAndroid Build Coastguard Worker return U in [0x2015, 0x2022, 0x25FB, 0x25FC, 0x25FD, 0x25FE] 214*2d1272b8SAndroid Build Coastguard Workerdef is_CGJ(U, UISC, UDI, UGC, AJT): 215*2d1272b8SAndroid Build Coastguard Worker # Also includes VARIATION_SELECTOR and ZWJ 216*2d1272b8SAndroid Build Coastguard Worker return UISC == Joiner or UDI and UGC in [Mc, Me, Mn] 217*2d1272b8SAndroid Build Coastguard Workerdef is_CONS_FINAL(U, UISC, UDI, UGC, AJT): 218*2d1272b8SAndroid Build Coastguard Worker return ((UISC == Consonant_Final and UGC != Lo) or 219*2d1272b8SAndroid Build Coastguard Worker UISC == Consonant_Succeeding_Repha) 220*2d1272b8SAndroid Build Coastguard Workerdef is_CONS_FINAL_MOD(U, UISC, UDI, UGC, AJT): 221*2d1272b8SAndroid Build Coastguard Worker return UISC == Syllable_Modifier 222*2d1272b8SAndroid Build Coastguard Workerdef is_CONS_MED(U, UISC, UDI, UGC, AJT): 223*2d1272b8SAndroid Build Coastguard Worker # Consonant_Initial_Postfixed is new in Unicode 11; not in the spec. 224*2d1272b8SAndroid Build Coastguard Worker return (UISC == Consonant_Medial and UGC != Lo or 225*2d1272b8SAndroid Build Coastguard Worker UISC == Consonant_Initial_Postfixed) 226*2d1272b8SAndroid Build Coastguard Workerdef is_CONS_MOD(U, UISC, UDI, UGC, AJT): 227*2d1272b8SAndroid Build Coastguard Worker return UISC in [Nukta, Gemination_Mark, Consonant_Killer] 228*2d1272b8SAndroid Build Coastguard Workerdef is_CONS_SUB(U, UISC, UDI, UGC, AJT): 229*2d1272b8SAndroid Build Coastguard Worker return UISC == Consonant_Subjoined and UGC != Lo 230*2d1272b8SAndroid Build Coastguard Workerdef is_CONS_WITH_STACKER(U, UISC, UDI, UGC, AJT): 231*2d1272b8SAndroid Build Coastguard Worker return UISC == Consonant_With_Stacker 232*2d1272b8SAndroid Build Coastguard Workerdef is_HALANT(U, UISC, UDI, UGC, AJT): 233*2d1272b8SAndroid Build Coastguard Worker return UISC == Virama and not is_HALANT_OR_VOWEL_MODIFIER(U, UISC, UDI, UGC, AJT) 234*2d1272b8SAndroid Build Coastguard Workerdef is_HALANT_OR_VOWEL_MODIFIER(U, UISC, UDI, UGC, AJT): 235*2d1272b8SAndroid Build Coastguard Worker # Split off of HALANT 236*2d1272b8SAndroid Build Coastguard Worker return U == 0x0DCA 237*2d1272b8SAndroid Build Coastguard Workerdef is_HALANT_NUM(U, UISC, UDI, UGC, AJT): 238*2d1272b8SAndroid Build Coastguard Worker return UISC == Number_Joiner 239*2d1272b8SAndroid Build Coastguard Workerdef is_HIEROGLYPH(U, UISC, UDI, UGC, AJT): 240*2d1272b8SAndroid Build Coastguard Worker return UISC == Hieroglyph 241*2d1272b8SAndroid Build Coastguard Workerdef is_HIEROGLYPH_JOINER(U, UISC, UDI, UGC, AJT): 242*2d1272b8SAndroid Build Coastguard Worker return UISC == Hieroglyph_Joiner 243*2d1272b8SAndroid Build Coastguard Workerdef is_HIEROGLYPH_MIRROR(U, UISC, UDI, UGC, AJT): 244*2d1272b8SAndroid Build Coastguard Worker return UISC == Hieroglyph_Mirror 245*2d1272b8SAndroid Build Coastguard Workerdef is_HIEROGLYPH_MOD(U, UISC, UDI, UGC, AJT): 246*2d1272b8SAndroid Build Coastguard Worker return UISC == Hieroglyph_Modifier 247*2d1272b8SAndroid Build Coastguard Workerdef is_HIEROGLYPH_SEGMENT_BEGIN(U, UISC, UDI, UGC, AJT): 248*2d1272b8SAndroid Build Coastguard Worker return UISC in [Hieroglyph_Mark_Begin, Hieroglyph_Segment_Begin] 249*2d1272b8SAndroid Build Coastguard Workerdef is_HIEROGLYPH_SEGMENT_END(U, UISC, UDI, UGC, AJT): 250*2d1272b8SAndroid Build Coastguard Worker return UISC in [Hieroglyph_Mark_End, Hieroglyph_Segment_End] 251*2d1272b8SAndroid Build Coastguard Workerdef is_INVISIBLE_STACKER(U, UISC, UDI, UGC, AJT): 252*2d1272b8SAndroid Build Coastguard Worker # Split off of HALANT 253*2d1272b8SAndroid Build Coastguard Worker return (UISC == Invisible_Stacker 254*2d1272b8SAndroid Build Coastguard Worker and not is_SAKOT(U, UISC, UDI, UGC, AJT) 255*2d1272b8SAndroid Build Coastguard Worker ) 256*2d1272b8SAndroid Build Coastguard Workerdef is_ZWNJ(U, UISC, UDI, UGC, AJT): 257*2d1272b8SAndroid Build Coastguard Worker return UISC == Non_Joiner 258*2d1272b8SAndroid Build Coastguard Workerdef is_OTHER(U, UISC, UDI, UGC, AJT): 259*2d1272b8SAndroid Build Coastguard Worker # Also includes BASE_IND and SYM 260*2d1272b8SAndroid Build Coastguard Worker return ((UGC == Po or UISC in [Consonant_Dead, Joiner, Modifying_Letter, Other]) 261*2d1272b8SAndroid Build Coastguard Worker and not is_BASE(U, UISC, UDI, UGC, AJT) 262*2d1272b8SAndroid Build Coastguard Worker and not is_BASE_OTHER(U, UISC, UDI, UGC, AJT) 263*2d1272b8SAndroid Build Coastguard Worker and not is_CGJ(U, UISC, UDI, UGC, AJT) 264*2d1272b8SAndroid Build Coastguard Worker and not is_SYM_MOD(U, UISC, UDI, UGC, AJT) 265*2d1272b8SAndroid Build Coastguard Worker and not is_Word_Joiner(U, UISC, UDI, UGC, AJT) 266*2d1272b8SAndroid Build Coastguard Worker ) 267*2d1272b8SAndroid Build Coastguard Workerdef is_REORDERING_KILLER(U, UISC, UDI, UGC, AJT): 268*2d1272b8SAndroid Build Coastguard Worker return UISC == Reordering_Killer 269*2d1272b8SAndroid Build Coastguard Workerdef is_REPHA(U, UISC, UDI, UGC, AJT): 270*2d1272b8SAndroid Build Coastguard Worker return UISC in [Consonant_Preceding_Repha, Consonant_Prefixed] 271*2d1272b8SAndroid Build Coastguard Workerdef is_SAKOT(U, UISC, UDI, UGC, AJT): 272*2d1272b8SAndroid Build Coastguard Worker # Split off of HALANT 273*2d1272b8SAndroid Build Coastguard Worker return U == 0x1A60 274*2d1272b8SAndroid Build Coastguard Workerdef is_SYM_MOD(U, UISC, UDI, UGC, AJT): 275*2d1272b8SAndroid Build Coastguard Worker return UISC == Symbol_Modifier 276*2d1272b8SAndroid Build Coastguard Workerdef is_VOWEL(U, UISC, UDI, UGC, AJT): 277*2d1272b8SAndroid Build Coastguard Worker return (UISC == Pure_Killer or 278*2d1272b8SAndroid Build Coastguard Worker UGC != Lo and UISC in [Vowel, Vowel_Dependent]) 279*2d1272b8SAndroid Build Coastguard Workerdef is_VOWEL_MOD(U, UISC, UDI, UGC, AJT): 280*2d1272b8SAndroid Build Coastguard Worker return (UISC in [Tone_Mark, Cantillation_Mark, Register_Shifter, Visarga] or 281*2d1272b8SAndroid Build Coastguard Worker UGC != Lo and UISC == Bindu) 282*2d1272b8SAndroid Build Coastguard Workerdef is_Word_Joiner(U, UISC, UDI, UGC, AJT): 283*2d1272b8SAndroid Build Coastguard Worker # Also includes Rsv 284*2d1272b8SAndroid Build Coastguard Worker return (UDI and U not in [0x115F, 0x1160, 0x3164, 0xFFA0, 0x1BCA0, 0x1BCA1, 0x1BCA2, 0x1BCA3] 285*2d1272b8SAndroid Build Coastguard Worker and UISC == Other 286*2d1272b8SAndroid Build Coastguard Worker and not is_CGJ(U, UISC, UDI, UGC, AJT) 287*2d1272b8SAndroid Build Coastguard Worker ) or UGC == Cn 288*2d1272b8SAndroid Build Coastguard Worker 289*2d1272b8SAndroid Build Coastguard Workeruse_mapping = { 290*2d1272b8SAndroid Build Coastguard Worker 'B': is_BASE, 291*2d1272b8SAndroid Build Coastguard Worker 'N': is_BASE_NUM, 292*2d1272b8SAndroid Build Coastguard Worker 'GB': is_BASE_OTHER, 293*2d1272b8SAndroid Build Coastguard Worker 'CGJ': is_CGJ, 294*2d1272b8SAndroid Build Coastguard Worker 'F': is_CONS_FINAL, 295*2d1272b8SAndroid Build Coastguard Worker 'FM': is_CONS_FINAL_MOD, 296*2d1272b8SAndroid Build Coastguard Worker 'M': is_CONS_MED, 297*2d1272b8SAndroid Build Coastguard Worker 'CM': is_CONS_MOD, 298*2d1272b8SAndroid Build Coastguard Worker 'SUB': is_CONS_SUB, 299*2d1272b8SAndroid Build Coastguard Worker 'CS': is_CONS_WITH_STACKER, 300*2d1272b8SAndroid Build Coastguard Worker 'H': is_HALANT, 301*2d1272b8SAndroid Build Coastguard Worker 'HVM': is_HALANT_OR_VOWEL_MODIFIER, 302*2d1272b8SAndroid Build Coastguard Worker 'HN': is_HALANT_NUM, 303*2d1272b8SAndroid Build Coastguard Worker 'IS': is_INVISIBLE_STACKER, 304*2d1272b8SAndroid Build Coastguard Worker 'G': is_HIEROGLYPH, 305*2d1272b8SAndroid Build Coastguard Worker 'HM': is_HIEROGLYPH_MOD, 306*2d1272b8SAndroid Build Coastguard Worker 'HR': is_HIEROGLYPH_MIRROR, 307*2d1272b8SAndroid Build Coastguard Worker 'J': is_HIEROGLYPH_JOINER, 308*2d1272b8SAndroid Build Coastguard Worker 'SB': is_HIEROGLYPH_SEGMENT_BEGIN, 309*2d1272b8SAndroid Build Coastguard Worker 'SE': is_HIEROGLYPH_SEGMENT_END, 310*2d1272b8SAndroid Build Coastguard Worker 'ZWNJ': is_ZWNJ, 311*2d1272b8SAndroid Build Coastguard Worker 'O': is_OTHER, 312*2d1272b8SAndroid Build Coastguard Worker 'RK': is_REORDERING_KILLER, 313*2d1272b8SAndroid Build Coastguard Worker 'R': is_REPHA, 314*2d1272b8SAndroid Build Coastguard Worker 'Sk': is_SAKOT, 315*2d1272b8SAndroid Build Coastguard Worker 'SM': is_SYM_MOD, 316*2d1272b8SAndroid Build Coastguard Worker 'V': is_VOWEL, 317*2d1272b8SAndroid Build Coastguard Worker 'VM': is_VOWEL_MOD, 318*2d1272b8SAndroid Build Coastguard Worker 'WJ': is_Word_Joiner, 319*2d1272b8SAndroid Build Coastguard Worker} 320*2d1272b8SAndroid Build Coastguard Worker 321*2d1272b8SAndroid Build Coastguard Workeruse_positions = { 322*2d1272b8SAndroid Build Coastguard Worker 'F': { 323*2d1272b8SAndroid Build Coastguard Worker 'Abv': [Top], 324*2d1272b8SAndroid Build Coastguard Worker 'Blw': [Bottom], 325*2d1272b8SAndroid Build Coastguard Worker 'Pst': [Right], 326*2d1272b8SAndroid Build Coastguard Worker }, 327*2d1272b8SAndroid Build Coastguard Worker 'M': { 328*2d1272b8SAndroid Build Coastguard Worker 'Abv': [Top], 329*2d1272b8SAndroid Build Coastguard Worker 'Blw': [Bottom, Bottom_And_Left, Bottom_And_Right], 330*2d1272b8SAndroid Build Coastguard Worker 'Pst': [Right], 331*2d1272b8SAndroid Build Coastguard Worker 'Pre': [Left, Top_And_Bottom_And_Left], 332*2d1272b8SAndroid Build Coastguard Worker }, 333*2d1272b8SAndroid Build Coastguard Worker 'CM': { 334*2d1272b8SAndroid Build Coastguard Worker 'Abv': [Top], 335*2d1272b8SAndroid Build Coastguard Worker 'Blw': [Bottom, Overstruck], 336*2d1272b8SAndroid Build Coastguard Worker }, 337*2d1272b8SAndroid Build Coastguard Worker 'V': { 338*2d1272b8SAndroid Build Coastguard Worker 'Abv': [Top, Top_And_Bottom, Top_And_Bottom_And_Right, Top_And_Right], 339*2d1272b8SAndroid Build Coastguard Worker 'Blw': [Bottom, Overstruck, Bottom_And_Right], 340*2d1272b8SAndroid Build Coastguard Worker 'Pst': [Right], 341*2d1272b8SAndroid Build Coastguard Worker 'Pre': [Left, Top_And_Left, Top_And_Left_And_Right, Left_And_Right], 342*2d1272b8SAndroid Build Coastguard Worker }, 343*2d1272b8SAndroid Build Coastguard Worker 'VM': { 344*2d1272b8SAndroid Build Coastguard Worker 'Abv': [Top], 345*2d1272b8SAndroid Build Coastguard Worker 'Blw': [Bottom, Overstruck], 346*2d1272b8SAndroid Build Coastguard Worker 'Pst': [Right], 347*2d1272b8SAndroid Build Coastguard Worker 'Pre': [Left], 348*2d1272b8SAndroid Build Coastguard Worker }, 349*2d1272b8SAndroid Build Coastguard Worker 'SM': { 350*2d1272b8SAndroid Build Coastguard Worker 'Abv': [Top], 351*2d1272b8SAndroid Build Coastguard Worker 'Blw': [Bottom], 352*2d1272b8SAndroid Build Coastguard Worker }, 353*2d1272b8SAndroid Build Coastguard Worker 'H': None, 354*2d1272b8SAndroid Build Coastguard Worker 'HM': None, 355*2d1272b8SAndroid Build Coastguard Worker 'HR': None, 356*2d1272b8SAndroid Build Coastguard Worker 'HVM': None, 357*2d1272b8SAndroid Build Coastguard Worker 'IS': None, 358*2d1272b8SAndroid Build Coastguard Worker 'B': None, 359*2d1272b8SAndroid Build Coastguard Worker 'FM': { 360*2d1272b8SAndroid Build Coastguard Worker 'Abv': [Top], 361*2d1272b8SAndroid Build Coastguard Worker 'Blw': [Bottom], 362*2d1272b8SAndroid Build Coastguard Worker 'Pst': [Not_Applicable], 363*2d1272b8SAndroid Build Coastguard Worker }, 364*2d1272b8SAndroid Build Coastguard Worker 'R': None, 365*2d1272b8SAndroid Build Coastguard Worker 'RK': None, 366*2d1272b8SAndroid Build Coastguard Worker 'SUB': None, 367*2d1272b8SAndroid Build Coastguard Worker} 368*2d1272b8SAndroid Build Coastguard Worker 369*2d1272b8SAndroid Build Coastguard Workerdef map_to_use(data): 370*2d1272b8SAndroid Build Coastguard Worker out = {} 371*2d1272b8SAndroid Build Coastguard Worker items = use_mapping.items() 372*2d1272b8SAndroid Build Coastguard Worker for U, (UISC, UIPC, AJT, UDI, UGC, UBlock, _) in data.items(): 373*2d1272b8SAndroid Build Coastguard Worker 374*2d1272b8SAndroid Build Coastguard Worker # Resolve Indic_Syllabic_Category 375*2d1272b8SAndroid Build Coastguard Worker 376*2d1272b8SAndroid Build Coastguard Worker # TODO: These don't have UISC assigned in Unicode 13.0.0, but have UIPC 377*2d1272b8SAndroid Build Coastguard Worker if 0x1CE2 <= U <= 0x1CE8: UISC = Cantillation_Mark 378*2d1272b8SAndroid Build Coastguard Worker 379*2d1272b8SAndroid Build Coastguard Worker # Tibetan: 380*2d1272b8SAndroid Build Coastguard Worker # TODO: These don't have UISC assigned in Unicode 13.0.0, but have UIPC 381*2d1272b8SAndroid Build Coastguard Worker if 0x0F18 <= U <= 0x0F19 or 0x0F3E <= U <= 0x0F3F: UISC = Vowel_Dependent 382*2d1272b8SAndroid Build Coastguard Worker 383*2d1272b8SAndroid Build Coastguard Worker # TODO: U+1CED should only be allowed after some of 384*2d1272b8SAndroid Build Coastguard Worker # the nasalization marks, maybe only for U+1CE9..U+1CF1. 385*2d1272b8SAndroid Build Coastguard Worker if U == 0x1CED: UISC = Tone_Mark 386*2d1272b8SAndroid Build Coastguard Worker 387*2d1272b8SAndroid Build Coastguard Worker values = [k for k,v in items if v(U, UISC, UDI, UGC, AJT)] 388*2d1272b8SAndroid Build Coastguard Worker assert len(values) == 1, "%s %s %s %s %s %s" % (hex(U), UISC, UDI, UGC, AJT, values) 389*2d1272b8SAndroid Build Coastguard Worker USE = values[0] 390*2d1272b8SAndroid Build Coastguard Worker 391*2d1272b8SAndroid Build Coastguard Worker # Resolve Indic_Positional_Category 392*2d1272b8SAndroid Build Coastguard Worker 393*2d1272b8SAndroid Build Coastguard Worker # TODO: https://github.com/harfbuzz/harfbuzz/pull/1037 394*2d1272b8SAndroid Build Coastguard Worker # and https://github.com/harfbuzz/harfbuzz/issues/1631 395*2d1272b8SAndroid Build Coastguard Worker if U in [0x11302, 0x11303, 0x114C1]: UIPC = Top 396*2d1272b8SAndroid Build Coastguard Worker 397*2d1272b8SAndroid Build Coastguard Worker # TODO: https://github.com/microsoft/font-tools/issues/17#issuecomment-2346952091 398*2d1272b8SAndroid Build Coastguard Worker if U == 0x113CF: UIPC = Bottom 399*2d1272b8SAndroid Build Coastguard Worker 400*2d1272b8SAndroid Build Coastguard Worker assert (UIPC in [Not_Applicable, Visual_Order_Left] or 401*2d1272b8SAndroid Build Coastguard Worker U in {0x0F7F, 0x11A3A} or 402*2d1272b8SAndroid Build Coastguard Worker USE in use_positions), "%s %s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UDI, UGC, AJT) 403*2d1272b8SAndroid Build Coastguard Worker 404*2d1272b8SAndroid Build Coastguard Worker pos_mapping = use_positions.get(USE, None) 405*2d1272b8SAndroid Build Coastguard Worker if pos_mapping: 406*2d1272b8SAndroid Build Coastguard Worker values = [k for k,v in pos_mapping.items() if v and UIPC in v] 407*2d1272b8SAndroid Build Coastguard Worker assert len(values) == 1, "%s %s %s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UDI, UGC, AJT, values) 408*2d1272b8SAndroid Build Coastguard Worker USE = USE + values[0] 409*2d1272b8SAndroid Build Coastguard Worker 410*2d1272b8SAndroid Build Coastguard Worker out[U] = (USE, UBlock) 411*2d1272b8SAndroid Build Coastguard Worker return out 412*2d1272b8SAndroid Build Coastguard Worker 413*2d1272b8SAndroid Build Coastguard Workeruse_data = map_to_use(combined) 414*2d1272b8SAndroid Build Coastguard Worker 415*2d1272b8SAndroid Build Coastguard Workerprint ("/* == Start of generated table == */") 416*2d1272b8SAndroid Build Coastguard Workerprint ("/*") 417*2d1272b8SAndroid Build Coastguard Workerprint (" * The following table is generated by running:") 418*2d1272b8SAndroid Build Coastguard Workerprint (" *") 419*2d1272b8SAndroid Build Coastguard Workerprint (" * {} IndicSyllabicCategory.txt IndicPositionalCategory.txt ArabicShaping.txt DerivedCoreProperties.txt UnicodeData.txt Blocks.txt Scripts.txt IndicSyllabicCategory-Additional.txt IndicPositionalCategory-Additional.txt".format (sys.argv[0])) 420*2d1272b8SAndroid Build Coastguard Workerprint (" *") 421*2d1272b8SAndroid Build Coastguard Workerprint (" * on files with these headers:") 422*2d1272b8SAndroid Build Coastguard Workerprint (" *") 423*2d1272b8SAndroid Build Coastguard Workerfor h in headers: 424*2d1272b8SAndroid Build Coastguard Worker for l in h: 425*2d1272b8SAndroid Build Coastguard Worker print (" * %s" % (l.strip())) 426*2d1272b8SAndroid Build Coastguard Workerprint (" */") 427*2d1272b8SAndroid Build Coastguard Workerprint () 428*2d1272b8SAndroid Build Coastguard Workerprint ("#ifndef HB_OT_SHAPER_USE_TABLE_HH") 429*2d1272b8SAndroid Build Coastguard Workerprint ("#define HB_OT_SHAPER_USE_TABLE_HH") 430*2d1272b8SAndroid Build Coastguard Workerprint () 431*2d1272b8SAndroid Build Coastguard Workerprint ('#include "hb.hh"') 432*2d1272b8SAndroid Build Coastguard Workerprint () 433*2d1272b8SAndroid Build Coastguard Workerprint ('#include "hb-ot-shaper-use-machine.hh"') 434*2d1272b8SAndroid Build Coastguard Workerprint () 435*2d1272b8SAndroid Build Coastguard Worker 436*2d1272b8SAndroid Build Coastguard Workertotal = 0 437*2d1272b8SAndroid Build Coastguard Workerused = 0 438*2d1272b8SAndroid Build Coastguard Workerlast_block = None 439*2d1272b8SAndroid Build Coastguard Workerdef print_block (block, start, end, use_data): 440*2d1272b8SAndroid Build Coastguard Worker global total, used, last_block 441*2d1272b8SAndroid Build Coastguard Worker if block and block != last_block: 442*2d1272b8SAndroid Build Coastguard Worker print () 443*2d1272b8SAndroid Build Coastguard Worker print () 444*2d1272b8SAndroid Build Coastguard Worker print (" /* %s */" % block) 445*2d1272b8SAndroid Build Coastguard Worker if start % 16: 446*2d1272b8SAndroid Build Coastguard Worker print (' ' * (20 + (start % 16 * 6)), end='') 447*2d1272b8SAndroid Build Coastguard Worker num = 0 448*2d1272b8SAndroid Build Coastguard Worker assert start % 8 == 0 449*2d1272b8SAndroid Build Coastguard Worker assert (end+1) % 8 == 0 450*2d1272b8SAndroid Build Coastguard Worker for u in range (start, end+1): 451*2d1272b8SAndroid Build Coastguard Worker if u % 16 == 0: 452*2d1272b8SAndroid Build Coastguard Worker print () 453*2d1272b8SAndroid Build Coastguard Worker print (" /* %04X */" % u, end='') 454*2d1272b8SAndroid Build Coastguard Worker if u in use_data: 455*2d1272b8SAndroid Build Coastguard Worker num += 1 456*2d1272b8SAndroid Build Coastguard Worker d = use_data.get (u) 457*2d1272b8SAndroid Build Coastguard Worker if d is not None: 458*2d1272b8SAndroid Build Coastguard Worker d = d[0] 459*2d1272b8SAndroid Build Coastguard Worker elif u in unicode_data[4]: 460*2d1272b8SAndroid Build Coastguard Worker d = 'O' 461*2d1272b8SAndroid Build Coastguard Worker else: 462*2d1272b8SAndroid Build Coastguard Worker d = 'WJ' 463*2d1272b8SAndroid Build Coastguard Worker print ("%6s," % d, end='') 464*2d1272b8SAndroid Build Coastguard Worker 465*2d1272b8SAndroid Build Coastguard Worker total += end - start + 1 466*2d1272b8SAndroid Build Coastguard Worker used += num 467*2d1272b8SAndroid Build Coastguard Worker if block: 468*2d1272b8SAndroid Build Coastguard Worker last_block = block 469*2d1272b8SAndroid Build Coastguard Worker 470*2d1272b8SAndroid Build Coastguard Workeruu = sorted (use_data.keys ()) 471*2d1272b8SAndroid Build Coastguard Worker 472*2d1272b8SAndroid Build Coastguard Workerlast = -100000 473*2d1272b8SAndroid Build Coastguard Workernum = 0 474*2d1272b8SAndroid Build Coastguard Workeroffset = 0 475*2d1272b8SAndroid Build Coastguard Workerstarts = [] 476*2d1272b8SAndroid Build Coastguard Workerends = [] 477*2d1272b8SAndroid Build Coastguard Workerprint ('#pragma GCC diagnostic push') 478*2d1272b8SAndroid Build Coastguard Workerprint ('#pragma GCC diagnostic ignored "-Wunused-macros"') 479*2d1272b8SAndroid Build Coastguard Workerfor k,v in sorted(use_mapping.items()): 480*2d1272b8SAndroid Build Coastguard Worker if k in use_positions and use_positions[k]: continue 481*2d1272b8SAndroid Build Coastguard Worker print ("#define %s USE(%s) /* %s */" % (k, k, v.__name__[3:])) 482*2d1272b8SAndroid Build Coastguard Workerfor k,v in sorted(use_positions.items()): 483*2d1272b8SAndroid Build Coastguard Worker if not v: continue 484*2d1272b8SAndroid Build Coastguard Worker for suf in v.keys(): 485*2d1272b8SAndroid Build Coastguard Worker tag = k + suf 486*2d1272b8SAndroid Build Coastguard Worker print ("#define %s USE(%s)" % (tag, tag)) 487*2d1272b8SAndroid Build Coastguard Workerprint ('#pragma GCC diagnostic pop') 488*2d1272b8SAndroid Build Coastguard Workerprint ("") 489*2d1272b8SAndroid Build Coastguard Worker 490*2d1272b8SAndroid Build Coastguard Worker 491*2d1272b8SAndroid Build Coastguard Workerimport packTab 492*2d1272b8SAndroid Build Coastguard Workerdata = {u:v[0] for u,v in use_data.items()} 493*2d1272b8SAndroid Build Coastguard Worker 494*2d1272b8SAndroid Build Coastguard WorkerDEFAULT = 5 495*2d1272b8SAndroid Build Coastguard WorkerCOMPACT = 9 496*2d1272b8SAndroid Build Coastguard Workerfor compression in (DEFAULT, COMPACT): 497*2d1272b8SAndroid Build Coastguard Worker 498*2d1272b8SAndroid Build Coastguard Worker logging.info(' Compression=%d:' % compression) 499*2d1272b8SAndroid Build Coastguard Worker print() 500*2d1272b8SAndroid Build Coastguard Worker if compression == DEFAULT: 501*2d1272b8SAndroid Build Coastguard Worker print('#ifndef HB_OPTIMIZE_SIZE') 502*2d1272b8SAndroid Build Coastguard Worker elif compression == COMPACT: 503*2d1272b8SAndroid Build Coastguard Worker print('#else') 504*2d1272b8SAndroid Build Coastguard Worker else: 505*2d1272b8SAndroid Build Coastguard Worker assert False 506*2d1272b8SAndroid Build Coastguard Worker print() 507*2d1272b8SAndroid Build Coastguard Worker 508*2d1272b8SAndroid Build Coastguard Worker code = packTab.Code('hb_use') 509*2d1272b8SAndroid Build Coastguard Worker sol = packTab.pack_table(data, compression=compression, default='O') 510*2d1272b8SAndroid Build Coastguard Worker logging.info(' FullCost=%d' % (sol.fullCost)) 511*2d1272b8SAndroid Build Coastguard Worker sol.genCode(code, f'get_category') 512*2d1272b8SAndroid Build Coastguard Worker code.print_c(linkage='static inline') 513*2d1272b8SAndroid Build Coastguard Worker print () 514*2d1272b8SAndroid Build Coastguard Worker 515*2d1272b8SAndroid Build Coastguard Workerprint('#endif') 516*2d1272b8SAndroid Build Coastguard Worker 517*2d1272b8SAndroid Build Coastguard Workerprint () 518*2d1272b8SAndroid Build Coastguard Workerfor k in sorted(use_mapping.keys()): 519*2d1272b8SAndroid Build Coastguard Worker if k in use_positions and use_positions[k]: continue 520*2d1272b8SAndroid Build Coastguard Worker print ("#undef %s" % k) 521*2d1272b8SAndroid Build Coastguard Workerfor k,v in sorted(use_positions.items()): 522*2d1272b8SAndroid Build Coastguard Worker if not v: continue 523*2d1272b8SAndroid Build Coastguard Worker for suf in v.keys(): 524*2d1272b8SAndroid Build Coastguard Worker tag = k + suf 525*2d1272b8SAndroid Build Coastguard Worker print ("#undef %s" % tag) 526*2d1272b8SAndroid Build Coastguard Workerprint () 527*2d1272b8SAndroid Build Coastguard Workerprint () 528*2d1272b8SAndroid Build Coastguard Workerprint ("#endif /* HB_OT_SHAPER_USE_TABLE_HH */") 529*2d1272b8SAndroid Build Coastguard Workerprint ("/* == End of generated table == */") 530