1from fontTools import unicodedata 2 3import pytest 4 5 6def test_script(): 7 assert unicodedata.script("a") == "Latn" 8 assert unicodedata.script(chr(0)) == "Zyyy" 9 assert unicodedata.script(chr(0x0378)) == "Zzzz" 10 assert unicodedata.script(chr(0x10FFFF)) == "Zzzz" 11 12 # these were randomly sampled, one character per script 13 assert unicodedata.script(chr(0x1E918)) == "Adlm" 14 assert unicodedata.script(chr(0x1170D)) == "Ahom" 15 assert unicodedata.script(chr(0x145A0)) == "Hluw" 16 assert unicodedata.script(chr(0x0607)) == "Arab" 17 assert unicodedata.script(chr(0x056C)) == "Armn" 18 assert unicodedata.script(chr(0x10B27)) == "Avst" 19 assert unicodedata.script(chr(0x1B41)) == "Bali" 20 assert unicodedata.script(chr(0x168AD)) == "Bamu" 21 assert unicodedata.script(chr(0x16ADD)) == "Bass" 22 assert unicodedata.script(chr(0x1BE5)) == "Batk" 23 assert unicodedata.script(chr(0x09F3)) == "Beng" 24 assert unicodedata.script(chr(0x11C5B)) == "Bhks" 25 assert unicodedata.script(chr(0x3126)) == "Bopo" 26 assert unicodedata.script(chr(0x1103B)) == "Brah" 27 assert unicodedata.script(chr(0x2849)) == "Brai" 28 assert unicodedata.script(chr(0x1A0A)) == "Bugi" 29 assert unicodedata.script(chr(0x174E)) == "Buhd" 30 assert unicodedata.script(chr(0x18EE)) == "Cans" 31 assert unicodedata.script(chr(0x102B7)) == "Cari" 32 assert unicodedata.script(chr(0x1053D)) == "Aghb" 33 assert unicodedata.script(chr(0x11123)) == "Cakm" 34 assert unicodedata.script(chr(0xAA1F)) == "Cham" 35 assert unicodedata.script(chr(0xAB95)) == "Cher" 36 assert unicodedata.script(chr(0x1F0C7)) == "Zyyy" 37 assert unicodedata.script(chr(0x2C85)) == "Copt" 38 assert unicodedata.script(chr(0x12014)) == "Xsux" 39 assert unicodedata.script(chr(0x1082E)) == "Cprt" 40 assert unicodedata.script(chr(0xA686)) == "Cyrl" 41 assert unicodedata.script(chr(0x10417)) == "Dsrt" 42 assert unicodedata.script(chr(0x093E)) == "Deva" 43 assert unicodedata.script(chr(0x1BC4B)) == "Dupl" 44 assert unicodedata.script(chr(0x1310C)) == "Egyp" 45 assert unicodedata.script(chr(0x1051C)) == "Elba" 46 assert unicodedata.script(chr(0x2DA6)) == "Ethi" 47 assert unicodedata.script(chr(0x10AD)) == "Geor" 48 assert unicodedata.script(chr(0x2C52)) == "Glag" 49 assert unicodedata.script(chr(0x10343)) == "Goth" 50 assert unicodedata.script(chr(0x11371)) == "Gran" 51 assert unicodedata.script(chr(0x03D0)) == "Grek" 52 assert unicodedata.script(chr(0x0AAA)) == "Gujr" 53 assert unicodedata.script(chr(0x0A4C)) == "Guru" 54 assert unicodedata.script(chr(0x23C9F)) == "Hani" 55 assert unicodedata.script(chr(0xC259)) == "Hang" 56 assert unicodedata.script(chr(0x1722)) == "Hano" 57 assert unicodedata.script(chr(0x108F5)) == "Hatr" 58 assert unicodedata.script(chr(0x05C2)) == "Hebr" 59 assert unicodedata.script(chr(0x1B072)) == "Hira" 60 assert unicodedata.script(chr(0x10847)) == "Armi" 61 assert unicodedata.script(chr(0x033A)) == "Zinh" 62 assert unicodedata.script(chr(0x10B66)) == "Phli" 63 assert unicodedata.script(chr(0x10B4B)) == "Prti" 64 assert unicodedata.script(chr(0xA98A)) == "Java" 65 assert unicodedata.script(chr(0x110B2)) == "Kthi" 66 assert unicodedata.script(chr(0x0CC6)) == "Knda" 67 assert unicodedata.script(chr(0x3337)) == "Kana" 68 assert unicodedata.script(chr(0xA915)) == "Kali" 69 assert unicodedata.script(chr(0x10A2E)) == "Khar" 70 assert unicodedata.script(chr(0x17AA)) == "Khmr" 71 assert unicodedata.script(chr(0x11225)) == "Khoj" 72 assert unicodedata.script(chr(0x112B6)) == "Sind" 73 assert unicodedata.script(chr(0x0ED7)) == "Laoo" 74 assert unicodedata.script(chr(0xAB3C)) == "Latn" 75 assert unicodedata.script(chr(0x1C48)) == "Lepc" 76 assert unicodedata.script(chr(0x1923)) == "Limb" 77 assert unicodedata.script(chr(0x1071D)) == "Lina" 78 assert unicodedata.script(chr(0x100EC)) == "Linb" 79 assert unicodedata.script(chr(0xA4E9)) == "Lisu" 80 assert unicodedata.script(chr(0x10284)) == "Lyci" 81 assert unicodedata.script(chr(0x10926)) == "Lydi" 82 assert unicodedata.script(chr(0x11161)) == "Mahj" 83 assert unicodedata.script(chr(0x0D56)) == "Mlym" 84 assert unicodedata.script(chr(0x0856)) == "Mand" 85 assert unicodedata.script(chr(0x10AF0)) == "Mani" 86 assert unicodedata.script(chr(0x11CB0)) == "Marc" 87 assert unicodedata.script(chr(0x11D28)) == "Gonm" 88 assert unicodedata.script(chr(0xABDD)) == "Mtei" 89 assert unicodedata.script(chr(0x1E897)) == "Mend" 90 assert unicodedata.script(chr(0x109B0)) == "Merc" 91 assert unicodedata.script(chr(0x10993)) == "Mero" 92 assert unicodedata.script(chr(0x16F5D)) == "Plrd" 93 assert unicodedata.script(chr(0x1160B)) == "Modi" 94 assert unicodedata.script(chr(0x18A8)) == "Mong" 95 assert unicodedata.script(chr(0x16A48)) == "Mroo" 96 assert unicodedata.script(chr(0x1128C)) == "Mult" 97 assert unicodedata.script(chr(0x105B)) == "Mymr" 98 assert unicodedata.script(chr(0x108AF)) == "Nbat" 99 assert unicodedata.script(chr(0x19B3)) == "Talu" 100 assert unicodedata.script(chr(0x1143D)) == "Newa" 101 assert unicodedata.script(chr(0x07F4)) == "Nkoo" 102 assert unicodedata.script(chr(0x1B192)) == "Nshu" 103 assert unicodedata.script(chr(0x169C)) == "Ogam" 104 assert unicodedata.script(chr(0x1C56)) == "Olck" 105 assert unicodedata.script(chr(0x10CE9)) == "Hung" 106 assert unicodedata.script(chr(0x10316)) == "Ital" 107 assert unicodedata.script(chr(0x10A93)) == "Narb" 108 assert unicodedata.script(chr(0x1035A)) == "Perm" 109 assert unicodedata.script(chr(0x103D5)) == "Xpeo" 110 assert unicodedata.script(chr(0x10A65)) == "Sarb" 111 assert unicodedata.script(chr(0x10C09)) == "Orkh" 112 assert unicodedata.script(chr(0x0B60)) == "Orya" 113 assert unicodedata.script(chr(0x104CF)) == "Osge" 114 assert unicodedata.script(chr(0x104A8)) == "Osma" 115 assert unicodedata.script(chr(0x16B12)) == "Hmng" 116 assert unicodedata.script(chr(0x10879)) == "Palm" 117 assert unicodedata.script(chr(0x11AF1)) == "Pauc" 118 assert unicodedata.script(chr(0xA869)) == "Phag" 119 assert unicodedata.script(chr(0x10909)) == "Phnx" 120 assert unicodedata.script(chr(0x10B81)) == "Phlp" 121 assert unicodedata.script(chr(0xA941)) == "Rjng" 122 assert unicodedata.script(chr(0x16C3)) == "Runr" 123 assert unicodedata.script(chr(0x0814)) == "Samr" 124 assert unicodedata.script(chr(0xA88C)) == "Saur" 125 assert unicodedata.script(chr(0x111C8)) == "Shrd" 126 assert unicodedata.script(chr(0x1045F)) == "Shaw" 127 assert unicodedata.script(chr(0x115AD)) == "Sidd" 128 assert unicodedata.script(chr(0x1D8C0)) == "Sgnw" 129 assert unicodedata.script(chr(0x0DB9)) == "Sinh" 130 assert unicodedata.script(chr(0x110F9)) == "Sora" 131 assert unicodedata.script(chr(0x11A60)) == "Soyo" 132 assert unicodedata.script(chr(0x1B94)) == "Sund" 133 assert unicodedata.script(chr(0xA81F)) == "Sylo" 134 assert unicodedata.script(chr(0x0740)) == "Syrc" 135 assert unicodedata.script(chr(0x1714)) == "Tglg" 136 assert unicodedata.script(chr(0x1761)) == "Tagb" 137 assert unicodedata.script(chr(0x1965)) == "Tale" 138 assert unicodedata.script(chr(0x1A32)) == "Lana" 139 assert unicodedata.script(chr(0xAA86)) == "Tavt" 140 assert unicodedata.script(chr(0x116A5)) == "Takr" 141 assert unicodedata.script(chr(0x0B8E)) == "Taml" 142 assert unicodedata.script(chr(0x1754D)) == "Tang" 143 assert unicodedata.script(chr(0x0C40)) == "Telu" 144 assert unicodedata.script(chr(0x07A4)) == "Thaa" 145 assert unicodedata.script(chr(0x0E42)) == "Thai" 146 assert unicodedata.script(chr(0x0F09)) == "Tibt" 147 assert unicodedata.script(chr(0x2D3A)) == "Tfng" 148 assert unicodedata.script(chr(0x114B0)) == "Tirh" 149 assert unicodedata.script(chr(0x1038B)) == "Ugar" 150 assert unicodedata.script(chr(0xA585)) == "Vaii" 151 assert unicodedata.script(chr(0x118CF)) == "Wara" 152 assert unicodedata.script(chr(0xA066)) == "Yiii" 153 assert unicodedata.script(chr(0x11A31)) == "Zanb" 154 assert unicodedata.script(chr(0x11F00)) == "Kawi" 155 156 157def test_script_extension(): 158 assert unicodedata.script_extension("a") == {"Latn"} 159 assert unicodedata.script_extension(chr(0)) == {"Zyyy"} 160 assert unicodedata.script_extension(chr(0x0378)) == {"Zzzz"} 161 assert unicodedata.script_extension(chr(0x10FFFF)) == {"Zzzz"} 162 163 assert unicodedata.script_extension("\u0660") == {"Arab", "Thaa", "Yezi"} 164 assert unicodedata.script_extension("\u0964") == { 165 "Beng", 166 "Deva", 167 "Dogr", 168 "Gong", 169 "Gonm", 170 "Gran", 171 "Gujr", 172 "Guru", 173 "Knda", 174 "Mahj", 175 "Mlym", 176 "Nand", 177 "Orya", 178 "Sind", 179 "Sinh", 180 "Sylo", 181 "Takr", 182 "Taml", 183 "Telu", 184 "Tirh", 185 } 186 187 188def test_script_name(): 189 assert unicodedata.script_name("Latn") == "Latin" 190 assert unicodedata.script_name("Zyyy") == "Common" 191 assert unicodedata.script_name("Zzzz") == "Unknown" 192 # underscores in long names are replaced by spaces 193 assert unicodedata.script_name("Egyp") == "Egyptian Hieroglyphs" 194 195 with pytest.raises(KeyError): 196 unicodedata.script_name("QQQQ") 197 assert unicodedata.script_name("QQQQ", default="Unknown") 198 199 200def test_script_code(): 201 assert unicodedata.script_code("Latin") == "Latn" 202 assert unicodedata.script_code("Common") == "Zyyy" 203 assert unicodedata.script_code("Unknown") == "Zzzz" 204 # case, whitespace, underscores and hyphens are ignored 205 assert unicodedata.script_code("Egyptian Hieroglyphs") == "Egyp" 206 assert unicodedata.script_code("Egyptian_Hieroglyphs") == "Egyp" 207 assert unicodedata.script_code("egyptianhieroglyphs") == "Egyp" 208 assert unicodedata.script_code("Egyptian-Hieroglyphs") == "Egyp" 209 210 with pytest.raises(KeyError): 211 unicodedata.script_code("Does not exist") 212 assert unicodedata.script_code("Does not exist", default="Zzzz") == "Zzzz" 213 214 215def test_block(): 216 assert unicodedata.block("\x00") == "Basic Latin" 217 assert unicodedata.block("\x7F") == "Basic Latin" 218 assert unicodedata.block("\x80") == "Latin-1 Supplement" 219 assert unicodedata.block("\u1c90") == "Georgian Extended" 220 assert unicodedata.block("\u0870") == "Arabic Extended-B" 221 assert unicodedata.block("\U00011B00") == "Devanagari Extended-A" 222 223 224def test_ot_tags_from_script(): 225 # simple 226 assert unicodedata.ot_tags_from_script("Latn") == ["latn"] 227 # script mapped to multiple new and old script tags 228 assert unicodedata.ot_tags_from_script("Deva") == ["dev2", "deva"] 229 # exceptions 230 assert unicodedata.ot_tags_from_script("Hira") == ["kana"] 231 assert unicodedata.ot_tags_from_script("Zmth") == ["math"] 232 # special script codes map to DFLT 233 assert unicodedata.ot_tags_from_script("Zinh") == ["DFLT"] 234 assert unicodedata.ot_tags_from_script("Zyyy") == ["DFLT"] 235 assert unicodedata.ot_tags_from_script("Zzzz") == ["DFLT"] 236 # this is invalid or unknown 237 assert unicodedata.ot_tags_from_script("Aaaa") == ["DFLT"] 238 239 240def test_ot_tag_to_script(): 241 assert unicodedata.ot_tag_to_script("latn") == "Latn" 242 assert unicodedata.ot_tag_to_script("kana") == "Kana" 243 assert unicodedata.ot_tag_to_script("DFLT") == None 244 assert unicodedata.ot_tag_to_script("aaaa") == None 245 assert unicodedata.ot_tag_to_script("beng") == "Beng" 246 assert unicodedata.ot_tag_to_script("bng2") == "Beng" 247 assert unicodedata.ot_tag_to_script("dev2") == "Deva" 248 assert unicodedata.ot_tag_to_script("gjr2") == "Gujr" 249 assert unicodedata.ot_tag_to_script("yi ") == "Yiii" 250 assert unicodedata.ot_tag_to_script("nko ") == "Nkoo" 251 assert unicodedata.ot_tag_to_script("vai ") == "Vaii" 252 assert unicodedata.ot_tag_to_script("lao ") == "Laoo" 253 assert unicodedata.ot_tag_to_script("yi") == "Yiii" 254 assert unicodedata.ot_tag_to_script("math") == "Zmth" 255 # both 'hang' and 'jamo' tags map to the Hangul script 256 assert unicodedata.ot_tag_to_script("hang") == "Hang" 257 assert unicodedata.ot_tag_to_script("jamo") == "Hang" 258 259 for invalid_value in ("", " ", "z zz", "zzzzz"): 260 with pytest.raises(ValueError, match="invalid OpenType tag"): 261 unicodedata.ot_tag_to_script(invalid_value) 262 263 264def test_script_horizontal_direction(): 265 assert unicodedata.script_horizontal_direction("Latn") == "LTR" 266 assert unicodedata.script_horizontal_direction("Arab") == "RTL" 267 assert unicodedata.script_horizontal_direction("Thaa") == "RTL" 268 assert unicodedata.script_horizontal_direction("Ougr") == "RTL" 269 270 with pytest.raises(KeyError): 271 unicodedata.script_horizontal_direction("Azzz") 272 assert unicodedata.script_horizontal_direction("Azzz", default="LTR") == "LTR" 273 274 275if __name__ == "__main__": 276 import sys 277 278 sys.exit(pytest.main(sys.argv)) 279