xref: /aosp_15_r20/external/fonttools/Tests/unicodedata_test.py (revision e1fe3e4ad2793916b15cccdc4a7da52a7e1dd0e9)
1from fontTools import unicodedata
2
3import pytest
4
5
6def test_script():
7    assert unicodedata.script("a") == "Latn"
8    assert unicodedata.script(chr(0)) == "Zyyy"
9    assert unicodedata.script(chr(0x0378)) == "Zzzz"
10    assert unicodedata.script(chr(0x10FFFF)) == "Zzzz"
11
12    # these were randomly sampled, one character per script
13    assert unicodedata.script(chr(0x1E918)) == "Adlm"
14    assert unicodedata.script(chr(0x1170D)) == "Ahom"
15    assert unicodedata.script(chr(0x145A0)) == "Hluw"
16    assert unicodedata.script(chr(0x0607)) == "Arab"
17    assert unicodedata.script(chr(0x056C)) == "Armn"
18    assert unicodedata.script(chr(0x10B27)) == "Avst"
19    assert unicodedata.script(chr(0x1B41)) == "Bali"
20    assert unicodedata.script(chr(0x168AD)) == "Bamu"
21    assert unicodedata.script(chr(0x16ADD)) == "Bass"
22    assert unicodedata.script(chr(0x1BE5)) == "Batk"
23    assert unicodedata.script(chr(0x09F3)) == "Beng"
24    assert unicodedata.script(chr(0x11C5B)) == "Bhks"
25    assert unicodedata.script(chr(0x3126)) == "Bopo"
26    assert unicodedata.script(chr(0x1103B)) == "Brah"
27    assert unicodedata.script(chr(0x2849)) == "Brai"
28    assert unicodedata.script(chr(0x1A0A)) == "Bugi"
29    assert unicodedata.script(chr(0x174E)) == "Buhd"
30    assert unicodedata.script(chr(0x18EE)) == "Cans"
31    assert unicodedata.script(chr(0x102B7)) == "Cari"
32    assert unicodedata.script(chr(0x1053D)) == "Aghb"
33    assert unicodedata.script(chr(0x11123)) == "Cakm"
34    assert unicodedata.script(chr(0xAA1F)) == "Cham"
35    assert unicodedata.script(chr(0xAB95)) == "Cher"
36    assert unicodedata.script(chr(0x1F0C7)) == "Zyyy"
37    assert unicodedata.script(chr(0x2C85)) == "Copt"
38    assert unicodedata.script(chr(0x12014)) == "Xsux"
39    assert unicodedata.script(chr(0x1082E)) == "Cprt"
40    assert unicodedata.script(chr(0xA686)) == "Cyrl"
41    assert unicodedata.script(chr(0x10417)) == "Dsrt"
42    assert unicodedata.script(chr(0x093E)) == "Deva"
43    assert unicodedata.script(chr(0x1BC4B)) == "Dupl"
44    assert unicodedata.script(chr(0x1310C)) == "Egyp"
45    assert unicodedata.script(chr(0x1051C)) == "Elba"
46    assert unicodedata.script(chr(0x2DA6)) == "Ethi"
47    assert unicodedata.script(chr(0x10AD)) == "Geor"
48    assert unicodedata.script(chr(0x2C52)) == "Glag"
49    assert unicodedata.script(chr(0x10343)) == "Goth"
50    assert unicodedata.script(chr(0x11371)) == "Gran"
51    assert unicodedata.script(chr(0x03D0)) == "Grek"
52    assert unicodedata.script(chr(0x0AAA)) == "Gujr"
53    assert unicodedata.script(chr(0x0A4C)) == "Guru"
54    assert unicodedata.script(chr(0x23C9F)) == "Hani"
55    assert unicodedata.script(chr(0xC259)) == "Hang"
56    assert unicodedata.script(chr(0x1722)) == "Hano"
57    assert unicodedata.script(chr(0x108F5)) == "Hatr"
58    assert unicodedata.script(chr(0x05C2)) == "Hebr"
59    assert unicodedata.script(chr(0x1B072)) == "Hira"
60    assert unicodedata.script(chr(0x10847)) == "Armi"
61    assert unicodedata.script(chr(0x033A)) == "Zinh"
62    assert unicodedata.script(chr(0x10B66)) == "Phli"
63    assert unicodedata.script(chr(0x10B4B)) == "Prti"
64    assert unicodedata.script(chr(0xA98A)) == "Java"
65    assert unicodedata.script(chr(0x110B2)) == "Kthi"
66    assert unicodedata.script(chr(0x0CC6)) == "Knda"
67    assert unicodedata.script(chr(0x3337)) == "Kana"
68    assert unicodedata.script(chr(0xA915)) == "Kali"
69    assert unicodedata.script(chr(0x10A2E)) == "Khar"
70    assert unicodedata.script(chr(0x17AA)) == "Khmr"
71    assert unicodedata.script(chr(0x11225)) == "Khoj"
72    assert unicodedata.script(chr(0x112B6)) == "Sind"
73    assert unicodedata.script(chr(0x0ED7)) == "Laoo"
74    assert unicodedata.script(chr(0xAB3C)) == "Latn"
75    assert unicodedata.script(chr(0x1C48)) == "Lepc"
76    assert unicodedata.script(chr(0x1923)) == "Limb"
77    assert unicodedata.script(chr(0x1071D)) == "Lina"
78    assert unicodedata.script(chr(0x100EC)) == "Linb"
79    assert unicodedata.script(chr(0xA4E9)) == "Lisu"
80    assert unicodedata.script(chr(0x10284)) == "Lyci"
81    assert unicodedata.script(chr(0x10926)) == "Lydi"
82    assert unicodedata.script(chr(0x11161)) == "Mahj"
83    assert unicodedata.script(chr(0x0D56)) == "Mlym"
84    assert unicodedata.script(chr(0x0856)) == "Mand"
85    assert unicodedata.script(chr(0x10AF0)) == "Mani"
86    assert unicodedata.script(chr(0x11CB0)) == "Marc"
87    assert unicodedata.script(chr(0x11D28)) == "Gonm"
88    assert unicodedata.script(chr(0xABDD)) == "Mtei"
89    assert unicodedata.script(chr(0x1E897)) == "Mend"
90    assert unicodedata.script(chr(0x109B0)) == "Merc"
91    assert unicodedata.script(chr(0x10993)) == "Mero"
92    assert unicodedata.script(chr(0x16F5D)) == "Plrd"
93    assert unicodedata.script(chr(0x1160B)) == "Modi"
94    assert unicodedata.script(chr(0x18A8)) == "Mong"
95    assert unicodedata.script(chr(0x16A48)) == "Mroo"
96    assert unicodedata.script(chr(0x1128C)) == "Mult"
97    assert unicodedata.script(chr(0x105B)) == "Mymr"
98    assert unicodedata.script(chr(0x108AF)) == "Nbat"
99    assert unicodedata.script(chr(0x19B3)) == "Talu"
100    assert unicodedata.script(chr(0x1143D)) == "Newa"
101    assert unicodedata.script(chr(0x07F4)) == "Nkoo"
102    assert unicodedata.script(chr(0x1B192)) == "Nshu"
103    assert unicodedata.script(chr(0x169C)) == "Ogam"
104    assert unicodedata.script(chr(0x1C56)) == "Olck"
105    assert unicodedata.script(chr(0x10CE9)) == "Hung"
106    assert unicodedata.script(chr(0x10316)) == "Ital"
107    assert unicodedata.script(chr(0x10A93)) == "Narb"
108    assert unicodedata.script(chr(0x1035A)) == "Perm"
109    assert unicodedata.script(chr(0x103D5)) == "Xpeo"
110    assert unicodedata.script(chr(0x10A65)) == "Sarb"
111    assert unicodedata.script(chr(0x10C09)) == "Orkh"
112    assert unicodedata.script(chr(0x0B60)) == "Orya"
113    assert unicodedata.script(chr(0x104CF)) == "Osge"
114    assert unicodedata.script(chr(0x104A8)) == "Osma"
115    assert unicodedata.script(chr(0x16B12)) == "Hmng"
116    assert unicodedata.script(chr(0x10879)) == "Palm"
117    assert unicodedata.script(chr(0x11AF1)) == "Pauc"
118    assert unicodedata.script(chr(0xA869)) == "Phag"
119    assert unicodedata.script(chr(0x10909)) == "Phnx"
120    assert unicodedata.script(chr(0x10B81)) == "Phlp"
121    assert unicodedata.script(chr(0xA941)) == "Rjng"
122    assert unicodedata.script(chr(0x16C3)) == "Runr"
123    assert unicodedata.script(chr(0x0814)) == "Samr"
124    assert unicodedata.script(chr(0xA88C)) == "Saur"
125    assert unicodedata.script(chr(0x111C8)) == "Shrd"
126    assert unicodedata.script(chr(0x1045F)) == "Shaw"
127    assert unicodedata.script(chr(0x115AD)) == "Sidd"
128    assert unicodedata.script(chr(0x1D8C0)) == "Sgnw"
129    assert unicodedata.script(chr(0x0DB9)) == "Sinh"
130    assert unicodedata.script(chr(0x110F9)) == "Sora"
131    assert unicodedata.script(chr(0x11A60)) == "Soyo"
132    assert unicodedata.script(chr(0x1B94)) == "Sund"
133    assert unicodedata.script(chr(0xA81F)) == "Sylo"
134    assert unicodedata.script(chr(0x0740)) == "Syrc"
135    assert unicodedata.script(chr(0x1714)) == "Tglg"
136    assert unicodedata.script(chr(0x1761)) == "Tagb"
137    assert unicodedata.script(chr(0x1965)) == "Tale"
138    assert unicodedata.script(chr(0x1A32)) == "Lana"
139    assert unicodedata.script(chr(0xAA86)) == "Tavt"
140    assert unicodedata.script(chr(0x116A5)) == "Takr"
141    assert unicodedata.script(chr(0x0B8E)) == "Taml"
142    assert unicodedata.script(chr(0x1754D)) == "Tang"
143    assert unicodedata.script(chr(0x0C40)) == "Telu"
144    assert unicodedata.script(chr(0x07A4)) == "Thaa"
145    assert unicodedata.script(chr(0x0E42)) == "Thai"
146    assert unicodedata.script(chr(0x0F09)) == "Tibt"
147    assert unicodedata.script(chr(0x2D3A)) == "Tfng"
148    assert unicodedata.script(chr(0x114B0)) == "Tirh"
149    assert unicodedata.script(chr(0x1038B)) == "Ugar"
150    assert unicodedata.script(chr(0xA585)) == "Vaii"
151    assert unicodedata.script(chr(0x118CF)) == "Wara"
152    assert unicodedata.script(chr(0xA066)) == "Yiii"
153    assert unicodedata.script(chr(0x11A31)) == "Zanb"
154    assert unicodedata.script(chr(0x11F00)) == "Kawi"
155
156
157def test_script_extension():
158    assert unicodedata.script_extension("a") == {"Latn"}
159    assert unicodedata.script_extension(chr(0)) == {"Zyyy"}
160    assert unicodedata.script_extension(chr(0x0378)) == {"Zzzz"}
161    assert unicodedata.script_extension(chr(0x10FFFF)) == {"Zzzz"}
162
163    assert unicodedata.script_extension("\u0660") == {"Arab", "Thaa", "Yezi"}
164    assert unicodedata.script_extension("\u0964") == {
165        "Beng",
166        "Deva",
167        "Dogr",
168        "Gong",
169        "Gonm",
170        "Gran",
171        "Gujr",
172        "Guru",
173        "Knda",
174        "Mahj",
175        "Mlym",
176        "Nand",
177        "Orya",
178        "Sind",
179        "Sinh",
180        "Sylo",
181        "Takr",
182        "Taml",
183        "Telu",
184        "Tirh",
185    }
186
187
188def test_script_name():
189    assert unicodedata.script_name("Latn") == "Latin"
190    assert unicodedata.script_name("Zyyy") == "Common"
191    assert unicodedata.script_name("Zzzz") == "Unknown"
192    # underscores in long names are replaced by spaces
193    assert unicodedata.script_name("Egyp") == "Egyptian Hieroglyphs"
194
195    with pytest.raises(KeyError):
196        unicodedata.script_name("QQQQ")
197    assert unicodedata.script_name("QQQQ", default="Unknown")
198
199
200def test_script_code():
201    assert unicodedata.script_code("Latin") == "Latn"
202    assert unicodedata.script_code("Common") == "Zyyy"
203    assert unicodedata.script_code("Unknown") == "Zzzz"
204    # case, whitespace, underscores and hyphens are ignored
205    assert unicodedata.script_code("Egyptian Hieroglyphs") == "Egyp"
206    assert unicodedata.script_code("Egyptian_Hieroglyphs") == "Egyp"
207    assert unicodedata.script_code("egyptianhieroglyphs") == "Egyp"
208    assert unicodedata.script_code("Egyptian-Hieroglyphs") == "Egyp"
209
210    with pytest.raises(KeyError):
211        unicodedata.script_code("Does not exist")
212    assert unicodedata.script_code("Does not exist", default="Zzzz") == "Zzzz"
213
214
215def test_block():
216    assert unicodedata.block("\x00") == "Basic Latin"
217    assert unicodedata.block("\x7F") == "Basic Latin"
218    assert unicodedata.block("\x80") == "Latin-1 Supplement"
219    assert unicodedata.block("\u1c90") == "Georgian Extended"
220    assert unicodedata.block("\u0870") == "Arabic Extended-B"
221    assert unicodedata.block("\U00011B00") == "Devanagari Extended-A"
222
223
224def test_ot_tags_from_script():
225    # simple
226    assert unicodedata.ot_tags_from_script("Latn") == ["latn"]
227    # script mapped to multiple new and old script tags
228    assert unicodedata.ot_tags_from_script("Deva") == ["dev2", "deva"]
229    # exceptions
230    assert unicodedata.ot_tags_from_script("Hira") == ["kana"]
231    assert unicodedata.ot_tags_from_script("Zmth") == ["math"]
232    # special script codes map to DFLT
233    assert unicodedata.ot_tags_from_script("Zinh") == ["DFLT"]
234    assert unicodedata.ot_tags_from_script("Zyyy") == ["DFLT"]
235    assert unicodedata.ot_tags_from_script("Zzzz") == ["DFLT"]
236    # this is invalid or unknown
237    assert unicodedata.ot_tags_from_script("Aaaa") == ["DFLT"]
238
239
240def test_ot_tag_to_script():
241    assert unicodedata.ot_tag_to_script("latn") == "Latn"
242    assert unicodedata.ot_tag_to_script("kana") == "Kana"
243    assert unicodedata.ot_tag_to_script("DFLT") == None
244    assert unicodedata.ot_tag_to_script("aaaa") == None
245    assert unicodedata.ot_tag_to_script("beng") == "Beng"
246    assert unicodedata.ot_tag_to_script("bng2") == "Beng"
247    assert unicodedata.ot_tag_to_script("dev2") == "Deva"
248    assert unicodedata.ot_tag_to_script("gjr2") == "Gujr"
249    assert unicodedata.ot_tag_to_script("yi  ") == "Yiii"
250    assert unicodedata.ot_tag_to_script("nko ") == "Nkoo"
251    assert unicodedata.ot_tag_to_script("vai ") == "Vaii"
252    assert unicodedata.ot_tag_to_script("lao ") == "Laoo"
253    assert unicodedata.ot_tag_to_script("yi") == "Yiii"
254    assert unicodedata.ot_tag_to_script("math") == "Zmth"
255    # both 'hang' and 'jamo' tags map to the Hangul script
256    assert unicodedata.ot_tag_to_script("hang") == "Hang"
257    assert unicodedata.ot_tag_to_script("jamo") == "Hang"
258
259    for invalid_value in ("", " ", "z zz", "zzzzz"):
260        with pytest.raises(ValueError, match="invalid OpenType tag"):
261            unicodedata.ot_tag_to_script(invalid_value)
262
263
264def test_script_horizontal_direction():
265    assert unicodedata.script_horizontal_direction("Latn") == "LTR"
266    assert unicodedata.script_horizontal_direction("Arab") == "RTL"
267    assert unicodedata.script_horizontal_direction("Thaa") == "RTL"
268    assert unicodedata.script_horizontal_direction("Ougr") == "RTL"
269
270    with pytest.raises(KeyError):
271        unicodedata.script_horizontal_direction("Azzz")
272    assert unicodedata.script_horizontal_direction("Azzz", default="LTR") == "LTR"
273
274
275if __name__ == "__main__":
276    import sys
277
278    sys.exit(pytest.main(sys.argv))
279