xref: /aosp_15_r20/external/skia/src/sfnt/SkOTTable_name.cpp (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2  * Copyright 2013 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/sfnt/SkOTTable_name.h"
9 
10 #include "src/base/SkEndian.h"
11 #include "src/base/SkTSearch.h"
12 #include "src/base/SkUTF.h"
13 #include "src/core/SkStringUtils.h"
14 
next_unichar_UTF16BE(const uint8_t ** srcPtr,size_t * length)15 static SkUnichar next_unichar_UTF16BE(const uint8_t** srcPtr, size_t* length) {
16     SkASSERT(srcPtr && *srcPtr && length);
17     SkASSERT(*length > 0);
18 
19     uint16_t leading;
20     if (*length < sizeof(leading)) {
21         *length = 0;
22         return 0xFFFD;
23     }
24     memcpy(&leading, *srcPtr, sizeof(leading));
25     *srcPtr += sizeof(leading);
26     *length -= sizeof(leading);
27     SkUnichar c = SkEndian_SwapBE16(leading);
28 
29     if (SkUTF::IsTrailingSurrogateUTF16(c)) {
30         return 0xFFFD;
31     }
32     if (SkUTF::IsLeadingSurrogateUTF16(c)) {
33         uint16_t trailing;
34         if (*length < sizeof(trailing)) {
35             *length = 0;
36             return 0xFFFD;
37         }
38         memcpy(&trailing, *srcPtr, sizeof(trailing));
39         SkUnichar c2 = SkEndian_SwapBE16(trailing);
40         if (!SkUTF::IsTrailingSurrogateUTF16(c2)) {
41             return 0xFFFD;
42         }
43         *srcPtr += sizeof(trailing);
44         *length -= sizeof(trailing);
45 
46         c = (c << 10) + c2 + (0x10000 - (0xD800 << 10) - 0xDC00);
47     }
48     return c;
49 }
50 
SkString_from_UTF16BE(const uint8_t * utf16be,size_t length,SkString & utf8)51 static void SkString_from_UTF16BE(const uint8_t* utf16be, size_t length, SkString& utf8) {
52     // Note that utf16be may not be 2-byte aligned.
53     SkASSERT(utf16be != nullptr);
54 
55     utf8.reset();
56     while (length) {
57         utf8.appendUnichar(next_unichar_UTF16BE(&utf16be, &length));
58     }
59 }
60 
61 /** UnicodeFromMacRoman[macRomanPoint - 0x80] -> unicodeCodePoint.
62  *  Derived from http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMAN.TXT .
63  *  In MacRoman the first 128 code points match ASCII code points.
64  *  This maps the second 128 MacRoman code points to unicode code points.
65  */
66 static const uint16_t UnicodeFromMacRoman[0x80] = {
67     0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1,
68     0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8,
69     0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3,
70     0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC,
71     0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF,
72     0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8,
73     0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211,
74     0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8,
75     0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB,
76     0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153,
77     0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA,
78     0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02,
79     0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1,
80     0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4,
81     0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC,
82     0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7,
83 };
84 
SkStringFromMacRoman(const uint8_t * macRoman,size_t length,SkString & utf8)85 static void SkStringFromMacRoman(const uint8_t* macRoman, size_t length, SkString& utf8) {
86     utf8.reset();
87     for (size_t i = 0; i < length; ++i) {
88         utf8.appendUnichar(macRoman[i] < 0x80 ? macRoman[i]
89                                               : UnicodeFromMacRoman[macRoman[i] - 0x80]);
90     }
91 }
92 
93 static const struct BCP47FromLanguageId {
94     uint16_t languageID;
95     const char* bcp47;
96 }
97 /** The Mac and Windows values do not conflict, so this is currently one single table. */
98 BCP47FromLanguageID[] = {
99     /** A mapping from Mac Language Designators to BCP 47 codes.
100      *  The following list was constructed more or less manually.
101      *  Apple now uses BCP 47 (post OSX10.4), so there will be no new entries.
102      */
103     {0, "en"}, //English
104     {1, "fr"}, //French
105     {2, "de"}, //German
106     {3, "it"}, //Italian
107     {4, "nl"}, //Dutch
108     {5, "sv"}, //Swedish
109     {6, "es"}, //Spanish
110     {7, "da"}, //Danish
111     {8, "pt"}, //Portuguese
112     {9, "nb"}, //Norwegian
113     {10, "he"}, //Hebrew
114     {11, "ja"}, //Japanese
115     {12, "ar"}, //Arabic
116     {13, "fi"}, //Finnish
117     {14, "el"}, //Greek
118     {15, "is"}, //Icelandic
119     {16, "mt"}, //Maltese
120     {17, "tr"}, //Turkish
121     {18, "hr"}, //Croatian
122     {19, "zh-Hant"}, //Chinese (Traditional)
123     {20, "ur"}, //Urdu
124     {21, "hi"}, //Hindi
125     {22, "th"}, //Thai
126     {23, "ko"}, //Korean
127     {24, "lt"}, //Lithuanian
128     {25, "pl"}, //Polish
129     {26, "hu"}, //Hungarian
130     {27, "et"}, //Estonian
131     {28, "lv"}, //Latvian
132     {29, "se"}, //Sami
133     {30, "fo"}, //Faroese
134     {31, "fa"}, //Farsi (Persian)
135     {32, "ru"}, //Russian
136     {33, "zh-Hans"}, //Chinese (Simplified)
137     {34, "nl"}, //Dutch
138     {35, "ga"}, //Irish(Gaelic)
139     {36, "sq"}, //Albanian
140     {37, "ro"}, //Romanian
141     {38, "cs"}, //Czech
142     {39, "sk"}, //Slovak
143     {40, "sl"}, //Slovenian
144     {41, "yi"}, //Yiddish
145     {42, "sr"}, //Serbian
146     {43, "mk"}, //Macedonian
147     {44, "bg"}, //Bulgarian
148     {45, "uk"}, //Ukrainian
149     {46, "be"}, //Byelorussian
150     {47, "uz"}, //Uzbek
151     {48, "kk"}, //Kazakh
152     {49, "az-Cyrl"}, //Azerbaijani (Cyrillic)
153     {50, "az-Arab"}, //Azerbaijani (Arabic)
154     {51, "hy"}, //Armenian
155     {52, "ka"}, //Georgian
156     {53, "mo"}, //Moldavian
157     {54, "ky"}, //Kirghiz
158     {55, "tg"}, //Tajiki
159     {56, "tk"}, //Turkmen
160     {57, "mn-Mong"}, //Mongolian (Traditional)
161     {58, "mn-Cyrl"}, //Mongolian (Cyrillic)
162     {59, "ps"}, //Pashto
163     {60, "ku"}, //Kurdish
164     {61, "ks"}, //Kashmiri
165     {62, "sd"}, //Sindhi
166     {63, "bo"}, //Tibetan
167     {64, "ne"}, //Nepali
168     {65, "sa"}, //Sanskrit
169     {66, "mr"}, //Marathi
170     {67, "bn"}, //Bengali
171     {68, "as"}, //Assamese
172     {69, "gu"}, //Gujarati
173     {70, "pa"}, //Punjabi
174     {71, "or"}, //Oriya
175     {72, "ml"}, //Malayalam
176     {73, "kn"}, //Kannada
177     {74, "ta"}, //Tamil
178     {75, "te"}, //Telugu
179     {76, "si"}, //Sinhalese
180     {77, "my"}, //Burmese
181     {78, "km"}, //Khmer
182     {79, "lo"}, //Lao
183     {80, "vi"}, //Vietnamese
184     {81, "id"}, //Indonesian
185     {82, "tl"}, //Tagalog
186     {83, "ms-Latn"}, //Malay (Roman)
187     {84, "ms-Arab"}, //Malay (Arabic)
188     {85, "am"}, //Amharic
189     {86, "ti"}, //Tigrinya
190     {87, "om"}, //Oromo
191     {88, "so"}, //Somali
192     {89, "sw"}, //Swahili
193     {90, "rw"}, //Kinyarwanda/Ruanda
194     {91, "rn"}, //Rundi
195     {92, "ny"}, //Nyanja/Chewa
196     {93, "mg"}, //Malagasy
197     {94, "eo"}, //Esperanto
198     {128, "cy"}, //Welsh
199     {129, "eu"}, //Basque
200     {130, "ca"}, //Catalan
201     {131, "la"}, //Latin
202     {132, "qu"}, //Quechua
203     {133, "gn"}, //Guarani
204     {134, "ay"}, //Aymara
205     {135, "tt"}, //Tatar
206     {136, "ug"}, //Uighur
207     {137, "dz"}, //Dzongkha
208     {138, "jv-Latn"}, //Javanese (Roman)
209     {139, "su-Latn"}, //Sundanese (Roman)
210     {140, "gl"}, //Galician
211     {141, "af"}, //Afrikaans
212     {142, "br"}, //Breton
213     {143, "iu"}, //Inuktitut
214     {144, "gd"}, //Scottish (Gaelic)
215     {145, "gv"}, //Manx (Gaelic)
216     {146, "ga"}, //Irish (Gaelic with Lenition)
217     {147, "to"}, //Tongan
218     {148, "el"}, //Greek (Polytonic) Note: ISO 15924 does not have an equivalent script name.
219     {149, "kl"}, //Greenlandic
220     {150, "az-Latn"}, //Azerbaijani (Roman)
221     {151, "nn"}, //Nynorsk
222 
223     /** A mapping from Windows LCID to BCP 47 codes.
224      *  This list is the sorted, curated output of tools/win_lcid.cpp.
225      *  Note that these are sorted by value for quick binary lookup, and not logically by lsb.
226      *  The 'bare' language ids (e.g. 0x0001 for Arabic) are ommitted
227      *  as they do not appear as valid language ids in the OpenType specification.
228      */
229     { 0x0401, "ar-SA" }, //Arabic
230     { 0x0402, "bg-BG" }, //Bulgarian
231     { 0x0403, "ca-ES" }, //Catalan
232     { 0x0404, "zh-TW" }, //Chinese (Traditional)
233     { 0x0405, "cs-CZ" }, //Czech
234     { 0x0406, "da-DK" }, //Danish
235     { 0x0407, "de-DE" }, //German
236     { 0x0408, "el-GR" }, //Greek
237     { 0x0409, "en-US" }, //English
238     { 0x040a, "es-ES_tradnl" }, //Spanish
239     { 0x040b, "fi-FI" }, //Finnish
240     { 0x040c, "fr-FR" }, //French
241     { 0x040d, "he-IL" }, //Hebrew
242     { 0x040d, "he" }, //Hebrew
243     { 0x040e, "hu-HU" }, //Hungarian
244     { 0x040e, "hu" }, //Hungarian
245     { 0x040f, "is-IS" }, //Icelandic
246     { 0x0410, "it-IT" }, //Italian
247     { 0x0411, "ja-JP" }, //Japanese
248     { 0x0412, "ko-KR" }, //Korean
249     { 0x0413, "nl-NL" }, //Dutch
250     { 0x0414, "nb-NO" }, //Norwegian (Bokmål)
251     { 0x0415, "pl-PL" }, //Polish
252     { 0x0416, "pt-BR" }, //Portuguese
253     { 0x0417, "rm-CH" }, //Romansh
254     { 0x0418, "ro-RO" }, //Romanian
255     { 0x0419, "ru-RU" }, //Russian
256     { 0x041a, "hr-HR" }, //Croatian
257     { 0x041b, "sk-SK" }, //Slovak
258     { 0x041c, "sq-AL" }, //Albanian
259     { 0x041d, "sv-SE" }, //Swedish
260     { 0x041e, "th-TH" }, //Thai
261     { 0x041f, "tr-TR" }, //Turkish
262     { 0x0420, "ur-PK" }, //Urdu
263     { 0x0421, "id-ID" }, //Indonesian
264     { 0x0422, "uk-UA" }, //Ukrainian
265     { 0x0423, "be-BY" }, //Belarusian
266     { 0x0424, "sl-SI" }, //Slovenian
267     { 0x0425, "et-EE" }, //Estonian
268     { 0x0426, "lv-LV" }, //Latvian
269     { 0x0427, "lt-LT" }, //Lithuanian
270     { 0x0428, "tg-Cyrl-TJ" }, //Tajik (Cyrillic)
271     { 0x0429, "fa-IR" }, //Persian
272     { 0x042a, "vi-VN" }, //Vietnamese
273     { 0x042b, "hy-AM" }, //Armenian
274     { 0x042c, "az-Latn-AZ" }, //Azeri (Latin)
275     { 0x042d, "eu-ES" }, //Basque
276     { 0x042e, "hsb-DE" }, //Upper Sorbian
277     { 0x042f, "mk-MK" }, //Macedonian (FYROM)
278     { 0x0432, "tn-ZA" }, //Setswana
279     { 0x0434, "xh-ZA" }, //isiXhosa
280     { 0x0435, "zu-ZA" }, //isiZulu
281     { 0x0436, "af-ZA" }, //Afrikaans
282     { 0x0437, "ka-GE" }, //Georgian
283     { 0x0438, "fo-FO" }, //Faroese
284     { 0x0439, "hi-IN" }, //Hindi
285     { 0x043a, "mt-MT" }, //Maltese
286     { 0x043b, "se-NO" }, //Sami (Northern)
287     { 0x043e, "ms-MY" }, //Malay
288     { 0x043f, "kk-KZ" }, //Kazakh
289     { 0x0440, "ky-KG" }, //Kyrgyz
290     { 0x0441, "sw-KE" }, //Kiswahili
291     { 0x0442, "tk-TM" }, //Turkmen
292     { 0x0443, "uz-Latn-UZ" }, //Uzbek (Latin)
293     { 0x0443, "uz" }, //Uzbek
294     { 0x0444, "tt-RU" }, //Tatar
295     { 0x0445, "bn-IN" }, //Bengali
296     { 0x0446, "pa-IN" }, //Punjabi
297     { 0x0447, "gu-IN" }, //Gujarati
298     { 0x0448, "or-IN" }, //Oriya
299     { 0x0449, "ta-IN" }, //Tamil
300     { 0x044a, "te-IN" }, //Telugu
301     { 0x044b, "kn-IN" }, //Kannada
302     { 0x044c, "ml-IN" }, //Malayalam
303     { 0x044d, "as-IN" }, //Assamese
304     { 0x044e, "mr-IN" }, //Marathi
305     { 0x044f, "sa-IN" }, //Sanskrit
306     { 0x0450, "mn-Cyrl" }, //Mongolian (Cyrillic)
307     { 0x0451, "bo-CN" }, //Tibetan
308     { 0x0452, "cy-GB" }, //Welsh
309     { 0x0453, "km-KH" }, //Khmer
310     { 0x0454, "lo-LA" }, //Lao
311     { 0x0456, "gl-ES" }, //Galician
312     { 0x0457, "kok-IN" }, //Konkani
313     { 0x045a, "syr-SY" }, //Syriac
314     { 0x045b, "si-LK" }, //Sinhala
315     { 0x045d, "iu-Cans-CA" }, //Inuktitut (Syllabics)
316     { 0x045e, "am-ET" }, //Amharic
317     { 0x0461, "ne-NP" }, //Nepali
318     { 0x0462, "fy-NL" }, //Frisian
319     { 0x0463, "ps-AF" }, //Pashto
320     { 0x0464, "fil-PH" }, //Filipino
321     { 0x0465, "dv-MV" }, //Divehi
322     { 0x0468, "ha-Latn-NG" }, //Hausa (Latin)
323     { 0x046a, "yo-NG" }, //Yoruba
324     { 0x046b, "quz-BO" }, //Quechua
325     { 0x046c, "nso-ZA" }, //Sesotho sa Leboa
326     { 0x046d, "ba-RU" }, //Bashkir
327     { 0x046e, "lb-LU" }, //Luxembourgish
328     { 0x046f, "kl-GL" }, //Greenlandic
329     { 0x0470, "ig-NG" }, //Igbo
330     { 0x0478, "ii-CN" }, //Yi
331     { 0x047a, "arn-CL" }, //Mapudungun
332     { 0x047c, "moh-CA" }, //Mohawk
333     { 0x047e, "br-FR" }, //Breton
334     { 0x0480, "ug-CN" }, //Uyghur
335     { 0x0481, "mi-NZ" }, //Maori
336     { 0x0482, "oc-FR" }, //Occitan
337     { 0x0483, "co-FR" }, //Corsican
338     { 0x0484, "gsw-FR" }, //Alsatian
339     { 0x0485, "sah-RU" }, //Yakut
340     { 0x0486, "qut-GT" }, //K'iche
341     { 0x0487, "rw-RW" }, //Kinyarwanda
342     { 0x0488, "wo-SN" }, //Wolof
343     { 0x048c, "prs-AF" }, //Dari
344     { 0x0491, "gd-GB" }, //Scottish Gaelic
345     { 0x0801, "ar-IQ" }, //Arabic
346     { 0x0804, "zh-Hans" }, //Chinese (Simplified)
347     { 0x0807, "de-CH" }, //German
348     { 0x0809, "en-GB" }, //English
349     { 0x080a, "es-MX" }, //Spanish
350     { 0x080c, "fr-BE" }, //French
351     { 0x0810, "it-CH" }, //Italian
352     { 0x0813, "nl-BE" }, //Dutch
353     { 0x0814, "nn-NO" }, //Norwegian (Nynorsk)
354     { 0x0816, "pt-PT" }, //Portuguese
355     { 0x081a, "sr-Latn-CS" }, //Serbian (Latin)
356     { 0x081d, "sv-FI" }, //Swedish
357     { 0x082c, "az-Cyrl-AZ" }, //Azeri (Cyrillic)
358     { 0x082e, "dsb-DE" }, //Lower Sorbian
359     { 0x082e, "dsb" }, //Lower Sorbian
360     { 0x083b, "se-SE" }, //Sami (Northern)
361     { 0x083c, "ga-IE" }, //Irish
362     { 0x083e, "ms-BN" }, //Malay
363     { 0x0843, "uz-Cyrl-UZ" }, //Uzbek (Cyrillic)
364     { 0x0845, "bn-BD" }, //Bengali
365     { 0x0850, "mn-Mong-CN" }, //Mongolian (Traditional Mongolian)
366     { 0x085d, "iu-Latn-CA" }, //Inuktitut (Latin)
367     { 0x085f, "tzm-Latn-DZ" }, //Tamazight (Latin)
368     { 0x086b, "quz-EC" }, //Quechua
369     { 0x0c01, "ar-EG" }, //Arabic
370     { 0x0c04, "zh-Hant" }, //Chinese (Traditional)
371     { 0x0c07, "de-AT" }, //German
372     { 0x0c09, "en-AU" }, //English
373     { 0x0c0a, "es-ES" }, //Spanish
374     { 0x0c0c, "fr-CA" }, //French
375     { 0x0c1a, "sr-Cyrl-CS" }, //Serbian (Cyrillic)
376     { 0x0c3b, "se-FI" }, //Sami (Northern)
377     { 0x0c6b, "quz-PE" }, //Quechua
378     { 0x1001, "ar-LY" }, //Arabic
379     { 0x1004, "zh-SG" }, //Chinese (Simplified)
380     { 0x1007, "de-LU" }, //German
381     { 0x1009, "en-CA" }, //English
382     { 0x100a, "es-GT" }, //Spanish
383     { 0x100c, "fr-CH" }, //French
384     { 0x101a, "hr-BA" }, //Croatian (Latin)
385     { 0x103b, "smj-NO" }, //Sami (Lule)
386     { 0x1401, "ar-DZ" }, //Arabic
387     { 0x1404, "zh-MO" }, //Chinese (Traditional)
388     { 0x1407, "de-LI" }, //German
389     { 0x1409, "en-NZ" }, //English
390     { 0x140a, "es-CR" }, //Spanish
391     { 0x140c, "fr-LU" }, //French
392     { 0x141a, "bs-Latn-BA" }, //Bosnian (Latin)
393     { 0x141a, "bs" }, //Bosnian
394     { 0x143b, "smj-SE" }, //Sami (Lule)
395     { 0x143b, "smj" }, //Sami (Lule)
396     { 0x1801, "ar-MA" }, //Arabic
397     { 0x1809, "en-IE" }, //English
398     { 0x180a, "es-PA" }, //Spanish
399     { 0x180c, "fr-MC" }, //French
400     { 0x181a, "sr-Latn-BA" }, //Serbian (Latin)
401     { 0x183b, "sma-NO" }, //Sami (Southern)
402     { 0x1c01, "ar-TN" }, //Arabic
403     { 0x1c09, "en-ZA" }, //English
404     { 0x1c0a, "es-DO" }, //Spanish
405     { 0x1c1a, "sr-Cyrl-BA" }, //Serbian (Cyrillic)
406     { 0x1c3b, "sma-SE" }, //Sami (Southern)
407     { 0x1c3b, "sma" }, //Sami (Southern)
408     { 0x2001, "ar-OM" }, //Arabic
409     { 0x2009, "en-JM" }, //English
410     { 0x200a, "es-VE" }, //Spanish
411     { 0x201a, "bs-Cyrl-BA" }, //Bosnian (Cyrillic)
412     { 0x201a, "bs-Cyrl" }, //Bosnian (Cyrillic)
413     { 0x203b, "sms-FI" }, //Sami (Skolt)
414     { 0x203b, "sms" }, //Sami (Skolt)
415     { 0x2401, "ar-YE" }, //Arabic
416     { 0x2409, "en-029" }, //English
417     { 0x240a, "es-CO" }, //Spanish
418     { 0x241a, "sr-Latn-RS" }, //Serbian (Latin)
419     { 0x243b, "smn-FI" }, //Sami (Inari)
420     { 0x2801, "ar-SY" }, //Arabic
421     { 0x2809, "en-BZ" }, //English
422     { 0x280a, "es-PE" }, //Spanish
423     { 0x281a, "sr-Cyrl-RS" }, //Serbian (Cyrillic)
424     { 0x2c01, "ar-JO" }, //Arabic
425     { 0x2c09, "en-TT" }, //English
426     { 0x2c0a, "es-AR" }, //Spanish
427     { 0x2c1a, "sr-Latn-ME" }, //Serbian (Latin)
428     { 0x3001, "ar-LB" }, //Arabic
429     { 0x3009, "en-ZW" }, //English
430     { 0x300a, "es-EC" }, //Spanish
431     { 0x301a, "sr-Cyrl-ME" }, //Serbian (Cyrillic)
432     { 0x3401, "ar-KW" }, //Arabic
433     { 0x3409, "en-PH" }, //English
434     { 0x340a, "es-CL" }, //Spanish
435     { 0x3801, "ar-AE" }, //Arabic
436     { 0x380a, "es-UY" }, //Spanish
437     { 0x3c01, "ar-BH" }, //Arabic
438     { 0x3c0a, "es-PY" }, //Spanish
439     { 0x4001, "ar-QA" }, //Arabic
440     { 0x4009, "en-IN" }, //English
441     { 0x400a, "es-BO" }, //Spanish
442     { 0x4409, "en-MY" }, //English
443     { 0x440a, "es-SV" }, //Spanish
444     { 0x4809, "en-SG" }, //English
445     { 0x480a, "es-HN" }, //Spanish
446     { 0x4c0a, "es-NI" }, //Spanish
447     { 0x500a, "es-PR" }, //Spanish
448     { 0x540a, "es-US" }, //Spanish
449 };
450 
451 namespace {
BCP47FromLanguageIdLess(const BCP47FromLanguageId & a,const BCP47FromLanguageId & b)452 bool BCP47FromLanguageIdLess(const BCP47FromLanguageId& a, const BCP47FromLanguageId& b) {
453     return a.languageID < b.languageID;
454 }
455 }  // namespace
456 
next(SkOTTableName::Iterator::Record & record)457 bool SkOTTableName::Iterator::next(SkOTTableName::Iterator::Record& record) {
458     SkOTTableName nameTable;
459     if (fNameTableSize < sizeof(nameTable)) {
460         return false;
461     }
462     memcpy(&nameTable, fNameTable, sizeof(nameTable));
463 
464     const uint8_t* nameRecords = fNameTable + sizeof(nameTable);
465     const size_t nameRecordsSize = fNameTableSize - sizeof(nameTable);
466 
467     const size_t stringTableOffset = SkEndian_SwapBE16(nameTable.stringOffset);
468     if (fNameTableSize < stringTableOffset) {
469         return false;
470     }
471     const uint8_t* stringTable = fNameTable + stringTableOffset;
472     const size_t stringTableSize = fNameTableSize - stringTableOffset;
473 
474     // Find the next record which matches the requested type.
475     SkOTTableName::Record nameRecord;
476     const size_t nameRecordsCount = SkEndian_SwapBE16(nameTable.count);
477     const size_t nameRecordsMax = std::min(nameRecordsCount, nameRecordsSize / sizeof(nameRecord));
478     do {
479         if (fIndex >= nameRecordsMax) {
480             return false;
481         }
482 
483         memcpy(&nameRecord, nameRecords + sizeof(nameRecord)*fIndex, sizeof(nameRecord));
484         ++fIndex;
485     } while (fType != -1 && nameRecord.nameID.fontSpecific != fType);
486 
487     record.type = nameRecord.nameID.fontSpecific;
488 
489     // Decode the name into UTF-8.
490     const size_t nameOffset = SkEndian_SwapBE16(nameRecord.offset);
491     const size_t nameLength = SkEndian_SwapBE16(nameRecord.length);
492     if (stringTableSize < nameOffset + nameLength) {
493         return false; // continue?
494     }
495     const uint8_t* nameString = stringTable + nameOffset;
496     switch (nameRecord.platformID.value) {
497         case SkOTTableName::Record::PlatformID::Windows:
498             if (SkOTTableName::Record::EncodingID::Windows::UnicodeBMPUCS2
499                    != nameRecord.encodingID.windows.value
500                 && SkOTTableName::Record::EncodingID::Windows::UnicodeUCS4
501                    != nameRecord.encodingID.windows.value
502                 && SkOTTableName::Record::EncodingID::Windows::Symbol
503                    != nameRecord.encodingID.windows.value)
504             {
505                 record.name.reset();
506                 break; // continue?
507             }
508             [[fallthrough]];
509         case SkOTTableName::Record::PlatformID::Unicode:
510         case SkOTTableName::Record::PlatformID::ISO:
511             SkString_from_UTF16BE(nameString, nameLength, record.name);
512             break;
513 
514         case SkOTTableName::Record::PlatformID::Macintosh:
515             // TODO: need better decoding, especially on Mac.
516             if (SkOTTableName::Record::EncodingID::Macintosh::Roman
517                 != nameRecord.encodingID.macintosh.value)
518             {
519                 record.name.reset();
520                 break;  // continue?
521             }
522             SkStringFromMacRoman(nameString, nameLength, record.name);
523             break;
524 
525         case SkOTTableName::Record::PlatformID::Custom:
526             // These should never appear in a 'name' table.
527         default:
528             SkASSERT(false);
529             record.name.reset();
530             break;  // continue?
531     }
532 
533     // Determine the language.
534     const uint16_t languageID = SkEndian_SwapBE16(nameRecord.languageID.languageTagID);
535 
536     // Handle format 1 languages.
537     if (SkOTTableName::format_1 == nameTable.format && languageID >= 0x8000) {
538         const uint16_t languageTagRecordIndex = languageID - 0x8000;
539 
540         if (nameRecordsSize < sizeof(nameRecord)*nameRecordsCount) {
541             return false; //"und" or break?
542         }
543         const uint8_t* format1extData = nameRecords + sizeof(nameRecord)*nameRecordsCount;
544         size_t format1extSize = nameRecordsSize - sizeof(nameRecord)*nameRecordsCount;
545         SkOTTableName::Format1Ext format1ext;
546         if (format1extSize < sizeof(format1ext)) {
547             return false; // "und" or break?
548         }
549         memcpy(&format1ext, format1extData, sizeof(format1ext));
550 
551         const uint8_t* languageTagRecords = format1extData + sizeof(format1ext);
552         size_t languageTagRecordsSize = format1extSize - sizeof(format1ext);
553         if (languageTagRecordIndex < SkEndian_SwapBE16(format1ext.langTagCount)) {
554             SkOTTableName::Format1Ext::LangTagRecord languageTagRecord;
555             if (languageTagRecordsSize < sizeof(languageTagRecord)*(languageTagRecordIndex+1)) {
556                 return false; // "und"?
557             }
558             const uint8_t* languageTagData = languageTagRecords
559                                            + sizeof(languageTagRecord)*languageTagRecordIndex;
560             memcpy(&languageTagRecord, languageTagData, sizeof(languageTagRecord));
561 
562             uint16_t languageOffset = SkEndian_SwapBE16(languageTagRecord.offset);
563             uint16_t languageLength = SkEndian_SwapBE16(languageTagRecord.length);
564 
565             if (fNameTableSize < stringTableOffset + languageOffset + languageLength) {
566                 return false; // "und"?
567             }
568             const uint8_t* languageString = stringTable + languageOffset;
569             SkString_from_UTF16BE(languageString, languageLength, record.language);
570             return true;
571         }
572     }
573 
574     // Handle format 0 languages, translating them into BCP 47.
575     const BCP47FromLanguageId target = { languageID, "" };
576     int languageIndex = SkTSearch<BCP47FromLanguageId, BCP47FromLanguageIdLess>(
577         BCP47FromLanguageID, std::size(BCP47FromLanguageID), target, sizeof(target));
578     if (languageIndex >= 0) {
579         record.language = BCP47FromLanguageID[languageIndex].bcp47;
580         return true;
581     }
582 
583     // Unknown language, return the BCP 47 code 'und' for 'undetermined'.
584     record.language = "und";
585     return true;
586 }
587