xref: /aosp_15_r20/external/harfbuzz_ng/src/hb-ot-tag.cc (revision 2d1272b857b1f7575e6e246373e1cb218663db8a)
1 /*
2  * Copyright © 2009  Red Hat, Inc.
3  * Copyright © 2011  Google, Inc.
4  *
5  *  This is part of HarfBuzz, a text shaping library.
6  *
7  * Permission is hereby granted, without written agreement and without
8  * license or royalty fees, to use, copy, modify, and distribute this
9  * software and its documentation for any purpose, provided that the
10  * above copyright notice and the following two paragraphs appear in
11  * all copies of this software.
12  *
13  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17  * DAMAGE.
18  *
19  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
22  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24  *
25  * Red Hat Author(s): Behdad Esfahbod
26  * Google Author(s): Behdad Esfahbod, Roozbeh Pournader
27  */
28 
29 #include "hb.hh"
30 #include "inttypes.h"
31 
32 #ifndef HB_NO_OT_TAG
33 
34 
35 /* hb_script_t */
36 
37 static hb_tag_t
hb_ot_old_tag_from_script(hb_script_t script)38 hb_ot_old_tag_from_script (hb_script_t script)
39 {
40   /* This seems to be accurate as of end of 2012. */
41 
42   switch ((hb_tag_t) script)
43   {
44     case HB_SCRIPT_INVALID:		return HB_OT_TAG_DEFAULT_SCRIPT;
45     case HB_SCRIPT_MATH:		return HB_OT_TAG_MATH_SCRIPT;
46 
47     /* KATAKANA and HIRAGANA both map to 'kana' */
48     case HB_SCRIPT_HIRAGANA:		return HB_TAG('k','a','n','a');
49 
50     /* Spaces at the end are preserved, unlike ISO 15924 */
51     case HB_SCRIPT_LAO:			return HB_TAG('l','a','o',' ');
52     case HB_SCRIPT_YI:			return HB_TAG('y','i',' ',' ');
53     /* Unicode-5.0 additions */
54     case HB_SCRIPT_NKO:			return HB_TAG('n','k','o',' ');
55     /* Unicode-5.1 additions */
56     case HB_SCRIPT_VAI:			return HB_TAG('v','a','i',' ');
57   }
58 
59   /* Else, just change first char to lowercase and return */
60   return ((hb_tag_t) script) | 0x20000000u;
61 }
62 
63 static hb_script_t
hb_ot_old_tag_to_script(hb_tag_t tag)64 hb_ot_old_tag_to_script (hb_tag_t tag)
65 {
66   if (unlikely (tag == HB_OT_TAG_DEFAULT_SCRIPT))
67     return HB_SCRIPT_INVALID;
68   if (unlikely (tag == HB_OT_TAG_MATH_SCRIPT))
69     return HB_SCRIPT_MATH;
70 
71   /* This side of the conversion is fully algorithmic. */
72 
73   /* Any spaces at the end of the tag are replaced by repeating the last
74    * letter.  Eg 'nko ' -> 'Nkoo' */
75   if (unlikely ((tag & 0x0000FF00u) == 0x00002000u))
76     tag |= (tag >> 8) & 0x0000FF00u; /* Copy second letter to third */
77   if (unlikely ((tag & 0x000000FFu) == 0x00000020u))
78     tag |= (tag >> 8) & 0x000000FFu; /* Copy third letter to fourth */
79 
80   /* Change first char to uppercase and return */
81   return (hb_script_t) (tag & ~0x20000000u);
82 }
83 
84 static hb_tag_t
hb_ot_new_tag_from_script(hb_script_t script)85 hb_ot_new_tag_from_script (hb_script_t script)
86 {
87   switch ((hb_tag_t) script) {
88     case HB_SCRIPT_BENGALI:		return HB_TAG('b','n','g','2');
89     case HB_SCRIPT_DEVANAGARI:		return HB_TAG('d','e','v','2');
90     case HB_SCRIPT_GUJARATI:		return HB_TAG('g','j','r','2');
91     case HB_SCRIPT_GURMUKHI:		return HB_TAG('g','u','r','2');
92     case HB_SCRIPT_KANNADA:		return HB_TAG('k','n','d','2');
93     case HB_SCRIPT_MALAYALAM:		return HB_TAG('m','l','m','2');
94     case HB_SCRIPT_ORIYA:		return HB_TAG('o','r','y','2');
95     case HB_SCRIPT_TAMIL:		return HB_TAG('t','m','l','2');
96     case HB_SCRIPT_TELUGU:		return HB_TAG('t','e','l','2');
97     case HB_SCRIPT_MYANMAR:		return HB_TAG('m','y','m','2');
98   }
99 
100   return HB_OT_TAG_DEFAULT_SCRIPT;
101 }
102 
103 static hb_script_t
hb_ot_new_tag_to_script(hb_tag_t tag)104 hb_ot_new_tag_to_script (hb_tag_t tag)
105 {
106   switch (tag) {
107     case HB_TAG('b','n','g','2'):	return HB_SCRIPT_BENGALI;
108     case HB_TAG('d','e','v','2'):	return HB_SCRIPT_DEVANAGARI;
109     case HB_TAG('g','j','r','2'):	return HB_SCRIPT_GUJARATI;
110     case HB_TAG('g','u','r','2'):	return HB_SCRIPT_GURMUKHI;
111     case HB_TAG('k','n','d','2'):	return HB_SCRIPT_KANNADA;
112     case HB_TAG('m','l','m','2'):	return HB_SCRIPT_MALAYALAM;
113     case HB_TAG('o','r','y','2'):	return HB_SCRIPT_ORIYA;
114     case HB_TAG('t','m','l','2'):	return HB_SCRIPT_TAMIL;
115     case HB_TAG('t','e','l','2'):	return HB_SCRIPT_TELUGU;
116     case HB_TAG('m','y','m','2'):	return HB_SCRIPT_MYANMAR;
117   }
118 
119   return HB_SCRIPT_UNKNOWN;
120 }
121 
122 #ifndef HB_DISABLE_DEPRECATED
123 /**
124  * hb_ot_tags_from_script:
125  * @script: an #hb_script_t to convert.
126  * @script_tag_1: (out): output #hb_tag_t.
127  * @script_tag_2: (out): output #hb_tag_t.
128  *
129  * Converts an #hb_script_t to script tags.
130  *
131  * Since: 0.6.0
132  * Deprecated: 2.0.0: use hb_ot_tags_from_script_and_language() instead
133  **/
134 void
hb_ot_tags_from_script(hb_script_t script,hb_tag_t * script_tag_1,hb_tag_t * script_tag_2)135 hb_ot_tags_from_script (hb_script_t  script,
136 			hb_tag_t    *script_tag_1,
137 			hb_tag_t    *script_tag_2)
138 {
139   unsigned int count = 2;
140   hb_tag_t tags[2];
141   hb_ot_tags_from_script_and_language (script, HB_LANGUAGE_INVALID, &count, tags, nullptr, nullptr);
142   *script_tag_1 = count > 0 ? tags[0] : HB_OT_TAG_DEFAULT_SCRIPT;
143   *script_tag_2 = count > 1 ? tags[1] : HB_OT_TAG_DEFAULT_SCRIPT;
144 }
145 #endif
146 
147 /*
148  * Complete list at:
149  * https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags
150  *
151  * Most of the script tags are the same as the ISO 15924 tag but lowercased.
152  * So we just do that, and handle the exceptional cases in a switch.
153  */
154 
155 static void
hb_ot_all_tags_from_script(hb_script_t script,unsigned int * count,hb_tag_t * tags)156 hb_ot_all_tags_from_script (hb_script_t   script,
157 			    unsigned int *count /* IN/OUT */,
158 			    hb_tag_t     *tags /* OUT */)
159 {
160   unsigned int i = 0;
161 
162   hb_tag_t new_tag = hb_ot_new_tag_from_script (script);
163   if (unlikely (new_tag != HB_OT_TAG_DEFAULT_SCRIPT))
164   {
165     /* HB_SCRIPT_MYANMAR maps to 'mym2', but there is no 'mym3'. */
166     if (new_tag != HB_TAG('m','y','m','2'))
167       tags[i++] = new_tag | '3';
168     if (*count > i)
169       tags[i++] = new_tag;
170   }
171 
172   if (*count > i)
173   {
174     hb_tag_t old_tag = hb_ot_old_tag_from_script (script);
175     if (old_tag != HB_OT_TAG_DEFAULT_SCRIPT)
176       tags[i++] = old_tag;
177   }
178 
179   *count = i;
180 }
181 
182 /**
183  * hb_ot_tag_to_script:
184  * @tag: a script tag
185  *
186  * Converts a script tag to an #hb_script_t.
187  *
188  * Return value: The #hb_script_t corresponding to @tag.
189  *
190  **/
191 hb_script_t
hb_ot_tag_to_script(hb_tag_t tag)192 hb_ot_tag_to_script (hb_tag_t tag)
193 {
194   unsigned char digit = tag & 0x000000FFu;
195   if (unlikely (digit == '2' || digit == '3'))
196     return hb_ot_new_tag_to_script (tag & 0xFFFFFF32);
197 
198   return hb_ot_old_tag_to_script (tag);
199 }
200 
201 
202 /* hb_language_t */
203 
204 static inline bool
subtag_matches(const char * lang_str,const char * limit,const char * subtag,unsigned subtag_len)205 subtag_matches (const char *lang_str,
206 		const char *limit,
207 		const char *subtag,
208 		unsigned    subtag_len)
209 {
210   if (likely ((unsigned) (limit - lang_str) < subtag_len))
211     return false;
212 
213   do {
214     const char *s = strstr (lang_str, subtag);
215     if (!s || s >= limit)
216       return false;
217     if (!ISALNUM (s[subtag_len]))
218       return true;
219     lang_str = s + subtag_len;
220   } while (true);
221 }
222 
223 static bool
lang_matches(const char * lang_str,const char * limit,const char * spec,unsigned spec_len)224 lang_matches (const char *lang_str,
225 	      const char *limit,
226 	      const char *spec,
227 	      unsigned    spec_len)
228 {
229   /* Same as hb_language_matches(); duplicated. */
230 
231   if (likely ((unsigned) (limit - lang_str) < spec_len))
232     return false;
233 
234   return strncmp (lang_str, spec, spec_len) == 0 &&
235 	 (lang_str[spec_len] == '\0' || lang_str[spec_len] == '-');
236 }
237 
238 struct LangTag
239 {
240   hb_tag_t language;
241   hb_tag_t tag;
242 
cmpLangTag243   int cmp (hb_tag_t a) const
244   {
245     return a < this->language ? -1 : a > this->language ? +1 : 0;
246   }
cmpLangTag247   int cmp (const LangTag *that) const
248   { return cmp (that->language); }
249 };
250 
251 #include "hb-ot-tag-table.hh"
252 
253 /* The corresponding languages IDs for the following IDs are unclear,
254  * overlap, or are architecturally weird. Needs more research. */
255 
256 /*{"??",	{HB_TAG('B','C','R',' ')}},*/	/* Bible Cree */
257 /*{"zh?",	{HB_TAG('C','H','N',' ')}},*/	/* Chinese (seen in Microsoft fonts) */
258 /*{"ar-Syrc?",	{HB_TAG('G','A','R',' ')}},*/	/* Garshuni */
259 /*{"??",	{HB_TAG('N','G','R',' ')}},*/	/* Nagari */
260 /*{"??",	{HB_TAG('Y','I','C',' ')}},*/	/* Yi Classic */
261 /*{"zh?",	{HB_TAG('Z','H','P',' ')}},*/	/* Chinese Phonetic */
262 
263 #ifndef HB_DISABLE_DEPRECATED
264 /**
265  * hb_ot_tag_from_language:
266  * @language: an #hb_language_t to convert.
267  *
268  * Converts an #hb_language_t to an #hb_tag_t.
269  *
270  * Since: 0.6.0
271  * Deprecated: 2.0.0: use hb_ot_tags_from_script_and_language() instead
272  **/
273 hb_tag_t
hb_ot_tag_from_language(hb_language_t language)274 hb_ot_tag_from_language (hb_language_t language)
275 {
276   unsigned int count = 1;
277   hb_tag_t tags[1];
278   hb_ot_tags_from_script_and_language (HB_SCRIPT_UNKNOWN, language, nullptr, nullptr, &count, tags);
279   return count > 0 ? tags[0] : HB_OT_TAG_DEFAULT_LANGUAGE;
280 }
281 #endif
282 
283 static void
hb_ot_tags_from_language(const char * lang_str,const char * limit,unsigned int * count,hb_tag_t * tags)284 hb_ot_tags_from_language (const char   *lang_str,
285 			  const char   *limit,
286 			  unsigned int *count,
287 			  hb_tag_t     *tags)
288 {
289 
290 #ifndef HB_NO_LANGUAGE_LONG
291   /* Check for matches of multiple subtags. */
292   if (hb_ot_tags_from_complex_language (lang_str, limit, count, tags))
293     return;
294 #endif
295 
296   /* Find a language matching in the first component. */
297 #ifndef HB_NO_LANGUAGE_LONG
298   const char *s; s = strchr (lang_str, '-');
299 #endif
300   {
301 #ifndef HB_NO_LANGUAGE_LONG
302     if (s && limit - lang_str >= 6)
303     {
304       const char *extlang_end = strchr (s + 1, '-');
305       /* If there is an extended language tag, use it. */
306       if (3 == (extlang_end ? extlang_end - s - 1 : strlen (s + 1)) &&
307 	  ISALPHA (s[1]))
308 	lang_str = s + 1;
309     }
310 #endif
311     const LangTag *ot_languages = nullptr;
312     unsigned ot_languages_len = 0;
313     const char *dash = strchr (lang_str, '-');
314     unsigned first_len = dash ? dash - lang_str : limit - lang_str;
315     if (first_len == 2)
316     {
317       ot_languages = ot_languages2;
318       ot_languages_len = ARRAY_LENGTH (ot_languages2);
319     }
320 #ifndef HB_NO_LANGUAGE_LONG
321     else if (first_len == 3)
322     {
323       ot_languages = ot_languages3;
324       ot_languages_len = ARRAY_LENGTH (ot_languages3);
325     }
326 #endif
327 
328     hb_tag_t lang_tag = hb_tag_from_string (lang_str, first_len);
329 
330     static hb_atomic_int_t last_tag_idx; /* Poor man's cache. */
331     unsigned tag_idx = last_tag_idx;
332 
333     if (likely (tag_idx < ot_languages_len && ot_languages[tag_idx].language == lang_tag) ||
334 	hb_sorted_array (ot_languages, ot_languages_len).bfind (lang_tag, &tag_idx))
335     {
336       last_tag_idx = tag_idx;
337       unsigned int i;
338       while (tag_idx != 0 &&
339 	     ot_languages[tag_idx].language == ot_languages[tag_idx - 1].language)
340 	tag_idx--;
341       for (i = 0;
342 	   i < *count &&
343 	   tag_idx + i < ot_languages_len &&
344 	   ot_languages[tag_idx + i].tag != HB_TAG_NONE &&
345 	   ot_languages[tag_idx + i].language == ot_languages[tag_idx].language;
346 	   i++)
347 	tags[i] = ot_languages[tag_idx + i].tag;
348       *count = i;
349       return;
350     }
351   }
352 
353 #ifndef HB_NO_LANGUAGE_LONG
354   if (!s)
355     s = lang_str + strlen (lang_str);
356   if (s - lang_str == 3) {
357     /* Assume it's ISO-639-3 and upper-case and use it. */
358     tags[0] = hb_tag_from_string (lang_str, s - lang_str) & ~0x20202000u;
359     *count = 1;
360     return;
361   }
362 #endif
363 
364   *count = 0;
365 }
366 
367 static bool
parse_private_use_subtag(const char * private_use_subtag,unsigned int * count,hb_tag_t * tags,const char * prefix,unsigned char (* normalize)(unsigned char))368 parse_private_use_subtag (const char     *private_use_subtag,
369 			  unsigned int   *count,
370 			  hb_tag_t       *tags,
371 			  const char     *prefix,
372 			  unsigned char (*normalize) (unsigned char))
373 {
374 #ifdef HB_NO_LANGUAGE_PRIVATE_SUBTAG
375   return false;
376 #endif
377 
378   if (!(private_use_subtag && count && tags && *count)) return false;
379 
380   const char *s = strstr (private_use_subtag, prefix);
381   if (!s) return false;
382 
383   char tag[4];
384   int i;
385   s += strlen (prefix);
386   if (s[0] == '-') {
387     s += 1;
388     char c;
389     for (i = 0; i < 8 && ISHEX (s[i]); i++)
390     {
391       c = FROMHEX (s[i]);
392       if (i % 2 == 0)
393 	tag[i / 2] = c << 4;
394       else
395 	tag[i / 2] += c;
396     }
397     if (i != 8) return false;
398   } else {
399     for (i = 0; i < 4 && ISALNUM (s[i]); i++)
400       tag[i] = normalize (s[i]);
401     if (!i) return false;
402 
403     for (; i < 4; i++)
404       tag[i] = ' ';
405   }
406   tags[0] = HB_TAG (tag[0], tag[1], tag[2], tag[3]);
407   if ((tags[0] & 0xDFDFDFDF) == HB_OT_TAG_DEFAULT_SCRIPT)
408     tags[0] ^= ~0xDFDFDFDF;
409   *count = 1;
410   return true;
411 }
412 
413 /**
414  * hb_ot_tags_from_script_and_language:
415  * @script: an #hb_script_t to convert.
416  * @language: (nullable): an #hb_language_t to convert.
417  * @script_count: (inout) (optional): maximum number of script tags to retrieve (IN)
418  * and actual number of script tags retrieved (OUT)
419  * @script_tags: (out) (optional): array of size at least @script_count to store the
420  * script tag results
421  * @language_count: (inout) (optional): maximum number of language tags to retrieve
422  * (IN) and actual number of language tags retrieved (OUT)
423  * @language_tags: (out) (optional): array of size at least @language_count to store
424  * the language tag results
425  *
426  * Converts an #hb_script_t and an #hb_language_t to script and language tags.
427  *
428  * Since: 2.0.0
429  **/
430 void
hb_ot_tags_from_script_and_language(hb_script_t script,hb_language_t language,unsigned int * script_count,hb_tag_t * script_tags,unsigned int * language_count,hb_tag_t * language_tags)431 hb_ot_tags_from_script_and_language (hb_script_t   script,
432 				     hb_language_t language,
433 				     unsigned int *script_count /* IN/OUT */,
434 				     hb_tag_t     *script_tags /* OUT */,
435 				     unsigned int *language_count /* IN/OUT */,
436 				     hb_tag_t     *language_tags /* OUT */)
437 {
438   bool needs_script = true;
439 
440   if (language == HB_LANGUAGE_INVALID)
441   {
442     if (language_count && language_tags && *language_count)
443       *language_count = 0;
444   }
445   else
446   {
447     const char *lang_str, *s, *limit, *private_use_subtag;
448     bool needs_language;
449 
450     lang_str = hb_language_to_string (language);
451     limit = nullptr;
452     private_use_subtag = nullptr;
453     if (lang_str[0] == 'x' && lang_str[1] == '-')
454     {
455       private_use_subtag = lang_str;
456     } else {
457       for (s = lang_str + 1; *s; s++)
458       {
459 	if (s[-1] == '-' && s[1] == '-')
460 	{
461 	  if (s[0] == 'x')
462 	  {
463 	    private_use_subtag = s;
464 	    if (!limit)
465 	      limit = s - 1;
466 	    break;
467 	  } else if (!limit)
468 	  {
469 	    limit = s - 1;
470 	  }
471 	}
472       }
473       if (!limit)
474 	limit = s;
475     }
476 
477     needs_script = !parse_private_use_subtag (private_use_subtag, script_count, script_tags, "-hbsc", TOLOWER);
478     needs_language = !parse_private_use_subtag (private_use_subtag, language_count, language_tags, "-hbot", TOUPPER);
479 
480     if (needs_language && language_count && language_tags && *language_count)
481       hb_ot_tags_from_language (lang_str, limit, language_count, language_tags);
482   }
483 
484   if (needs_script && script_count && script_tags && *script_count)
485     hb_ot_all_tags_from_script (script, script_count, script_tags);
486 }
487 
488 /**
489  * hb_ot_tag_to_language:
490  * @tag: an language tag
491  *
492  * Converts a language tag to an #hb_language_t.
493  *
494  * Return value: (transfer none) (nullable):
495  * The #hb_language_t corresponding to @tag.
496  *
497  * Since: 0.9.2
498  **/
499 hb_language_t
hb_ot_tag_to_language(hb_tag_t tag)500 hb_ot_tag_to_language (hb_tag_t tag)
501 {
502   unsigned int i;
503 
504   if (tag == HB_OT_TAG_DEFAULT_LANGUAGE)
505     return nullptr;
506 
507 #ifndef HB_NO_LANGUAGE_LONG
508   {
509     hb_language_t disambiguated_tag = hb_ot_ambiguous_tag_to_language (tag);
510     if (disambiguated_tag != HB_LANGUAGE_INVALID)
511       return disambiguated_tag;
512   }
513 #endif
514 
515   char buf[4];
516   for (i = 0; i < ARRAY_LENGTH (ot_languages2); i++)
517     if (ot_languages2[i].tag == tag)
518     {
519       hb_tag_to_string (ot_languages2[i].language, buf);
520       return hb_language_from_string (buf, 2);
521     }
522 #ifndef HB_NO_LANGUAGE_LONG
523   for (i = 0; i < ARRAY_LENGTH (ot_languages3); i++)
524     if (ot_languages3[i].tag == tag)
525     {
526       hb_tag_to_string (ot_languages3[i].language, buf);
527       return hb_language_from_string (buf, 3);
528     }
529 #endif
530 
531   /* Return a custom language in the form of "x-hbot-AABBCCDD".
532    * If it's three letters long, also guess it's ISO 639-3 and lower-case and
533    * prepend it (if it's not a registered tag, the private use subtags will
534    * ensure that calling hb_ot_tag_from_language on the result will still return
535    * the same tag as the original tag).
536    */
537   {
538     char buf[20];
539     char *str = buf;
540     if (ISALPHA (tag >> 24)
541 	&& ISALPHA ((tag >> 16) & 0xFF)
542 	&& ISALPHA ((tag >> 8) & 0xFF)
543 	&& (tag & 0xFF) == ' ')
544     {
545       buf[0] = TOLOWER (tag >> 24);
546       buf[1] = TOLOWER ((tag >> 16) & 0xFF);
547       buf[2] = TOLOWER ((tag >> 8) & 0xFF);
548       buf[3] = '-';
549       str += 4;
550     }
551     snprintf (str, 16, "x-hbot-%08" PRIx32, tag);
552     return hb_language_from_string (&*buf, -1);
553   }
554 }
555 
556 /**
557  * hb_ot_tags_to_script_and_language:
558  * @script_tag: a script tag
559  * @language_tag: a language tag
560  * @script: (out) (optional): the #hb_script_t corresponding to @script_tag.
561  * @language: (out) (optional): the #hb_language_t corresponding to @script_tag and
562  * @language_tag.
563  *
564  * Converts a script tag and a language tag to an #hb_script_t and an
565  * #hb_language_t.
566  *
567  * Since: 2.0.0
568  **/
569 void
hb_ot_tags_to_script_and_language(hb_tag_t script_tag,hb_tag_t language_tag,hb_script_t * script,hb_language_t * language)570 hb_ot_tags_to_script_and_language (hb_tag_t       script_tag,
571 				   hb_tag_t       language_tag,
572 				   hb_script_t   *script /* OUT */,
573 				   hb_language_t *language /* OUT */)
574 {
575   hb_script_t script_out = hb_ot_tag_to_script (script_tag);
576   if (script)
577     *script = script_out;
578   if (language)
579   {
580     unsigned int script_count = 1;
581     hb_tag_t primary_script_tag[1];
582     hb_ot_tags_from_script_and_language (script_out,
583 					 HB_LANGUAGE_INVALID,
584 					 &script_count,
585 					 primary_script_tag,
586 					 nullptr, nullptr);
587     *language = hb_ot_tag_to_language (language_tag);
588     if (script_count == 0 || primary_script_tag[0] != script_tag)
589     {
590       unsigned char *buf;
591       const char *lang_str = hb_language_to_string (*language);
592       size_t len = strlen (lang_str);
593       buf = (unsigned char *) hb_malloc (len + 16);
594       if (unlikely (!buf))
595       {
596 	*language = nullptr;
597       }
598       else
599       {
600 	int shift;
601 	hb_memcpy (buf, lang_str, len);
602 	if (lang_str[0] != 'x' || lang_str[1] != '-') {
603 	  buf[len++] = '-';
604 	  buf[len++] = 'x';
605 	}
606 	buf[len++] = '-';
607 	buf[len++] = 'h';
608 	buf[len++] = 'b';
609 	buf[len++] = 's';
610 	buf[len++] = 'c';
611 	buf[len++] = '-';
612 	for (shift = 28; shift >= 0; shift -= 4)
613 	  buf[len++] = TOHEX (script_tag >> shift);
614 	*language = hb_language_from_string ((char *) buf, len);
615 	hb_free (buf);
616       }
617     }
618   }
619 }
620 
621 #ifdef MAIN
622 static inline void
test_langs_sorted()623 test_langs_sorted ()
624 {
625   for (unsigned int i = 1; i < ARRAY_LENGTH (ot_languages2); i++)
626   {
627     int c = ot_languages2[i].cmp (&ot_languages2[i - 1]);
628     if (c > 0)
629     {
630       fprintf (stderr, "ot_languages2 not sorted at index %u: %08x %d %08x\n",
631 	       i, ot_languages2[i-1].language, c, ot_languages2[i].language);
632       abort();
633     }
634   }
635 #ifndef HB_NO_LANGUAGE_LONG
636   for (unsigned int i = 1; i < ARRAY_LENGTH (ot_languages3); i++)
637   {
638     int c = ot_languages3[i].cmp (&ot_languages3[i - 1]);
639     if (c > 0)
640     {
641       fprintf (stderr, "ot_languages3 not sorted at index %u: %08x %d %08x\n",
642 	       i, ot_languages3[i-1].language, c, ot_languages3[i].language);
643       abort();
644     }
645   }
646 #endif
647 }
648 
649 int
main()650 main ()
651 {
652   test_langs_sorted ();
653   return 0;
654 }
655 
656 #endif
657 
658 
659 #endif
660