xref: /aosp_15_r20/external/cronet/third_party/icu/source/common/loclikely.cpp (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 1997-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  loclikely.cpp
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2010feb25
16 *   created by: Markus W. Scherer
17 *
18 *   Code for likely and minimized locale subtags, separated out from other .cpp files
19 *   that then do not depend on resource bundle code and likely-subtags data.
20 */
21 
22 #include "unicode/bytestream.h"
23 #include "unicode/utypes.h"
24 #include "unicode/locid.h"
25 #include "unicode/putil.h"
26 #include "unicode/uchar.h"
27 #include "unicode/uloc.h"
28 #include "unicode/ures.h"
29 #include "unicode/uscript.h"
30 #include "bytesinkutil.h"
31 #include "charstr.h"
32 #include "cmemory.h"
33 #include "cstring.h"
34 #include "loclikelysubtags.h"
35 #include "ulocimp.h"
36 #include "ustr_imp.h"
37 
38 /**
39  * Append a tag to a buffer, adding the separator if necessary.  The buffer
40  * must be large enough to contain the resulting tag plus any separator
41  * necessary. The tag must not be a zero-length string.
42  *
43  * @param tag The tag to add.
44  * @param tagLength The length of the tag.
45  * @param buffer The output buffer.
46  * @param bufferLength The length of the output buffer.  This is an input/output parameter.
47  **/
48 static void U_CALLCONV
appendTag(const char * tag,int32_t tagLength,char * buffer,int32_t * bufferLength,UBool withSeparator)49 appendTag(
50     const char* tag,
51     int32_t tagLength,
52     char* buffer,
53     int32_t* bufferLength,
54     UBool withSeparator) {
55 
56     if (withSeparator) {
57         buffer[*bufferLength] = '_';
58         ++(*bufferLength);
59     }
60 
61     uprv_memmove(
62         &buffer[*bufferLength],
63         tag,
64         tagLength);
65 
66     *bufferLength += tagLength;
67 }
68 
69 /**
70  * Create a tag string from the supplied parameters.  The lang, script and region
71  * parameters may be nullptr pointers. If they are, their corresponding length parameters
72  * must be less than or equal to 0.
73  *
74  * If any of the language, script or region parameters are empty, and the alternateTags
75  * parameter is not nullptr, it will be parsed for potential language, script and region tags
76  * to be used when constructing the new tag.  If the alternateTags parameter is nullptr, or
77  * it contains no language tag, the default tag for the unknown language is used.
78  *
79  * If the length of the new string exceeds the capacity of the output buffer,
80  * the function copies as many bytes to the output buffer as it can, and returns
81  * the error U_BUFFER_OVERFLOW_ERROR.
82  *
83  * If an illegal argument is provided, the function returns the error
84  * U_ILLEGAL_ARGUMENT_ERROR.
85  *
86  * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
87  * the tag string fits in the output buffer, but the null terminator doesn't.
88  *
89  * @param lang The language tag to use.
90  * @param langLength The length of the language tag.
91  * @param script The script tag to use.
92  * @param scriptLength The length of the script tag.
93  * @param region The region tag to use.
94  * @param regionLength The length of the region tag.
95  * @param trailing Any trailing data to append to the new tag.
96  * @param trailingLength The length of the trailing data.
97  * @param alternateTags A string containing any alternate tags.
98  * @param sink The output sink receiving the tag string.
99  * @param err A pointer to a UErrorCode for error reporting.
100  **/
101 static void U_CALLCONV
createTagStringWithAlternates(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * trailing,int32_t trailingLength,const char * alternateTags,icu::ByteSink & sink,UErrorCode * err)102 createTagStringWithAlternates(
103     const char* lang,
104     int32_t langLength,
105     const char* script,
106     int32_t scriptLength,
107     const char* region,
108     int32_t regionLength,
109     const char* trailing,
110     int32_t trailingLength,
111     const char* alternateTags,
112     icu::ByteSink& sink,
113     UErrorCode* err) {
114 
115     if (U_FAILURE(*err)) {
116         goto error;
117     }
118     else if (langLength >= ULOC_LANG_CAPACITY ||
119              scriptLength >= ULOC_SCRIPT_CAPACITY ||
120              regionLength >= ULOC_COUNTRY_CAPACITY) {
121         goto error;
122     }
123     else {
124         /**
125          * ULOC_FULLNAME_CAPACITY will provide enough capacity
126          * that we can build a string that contains the language,
127          * script and region code without worrying about overrunning
128          * the user-supplied buffer.
129          **/
130         char tagBuffer[ULOC_FULLNAME_CAPACITY];
131         int32_t tagLength = 0;
132         UBool regionAppended = false;
133 
134         if (langLength > 0) {
135             appendTag(
136                 lang,
137                 langLength,
138                 tagBuffer,
139                 &tagLength,
140                 /*withSeparator=*/false);
141         }
142         else if (alternateTags == nullptr) {
143             /*
144              * Use the empty string for an unknown language, if
145              * we found no language.
146              */
147         }
148         else {
149             /*
150              * Parse the alternateTags string for the language.
151              */
152             char alternateLang[ULOC_LANG_CAPACITY];
153             int32_t alternateLangLength = sizeof(alternateLang);
154 
155             alternateLangLength =
156                 uloc_getLanguage(
157                     alternateTags,
158                     alternateLang,
159                     alternateLangLength,
160                     err);
161             if(U_FAILURE(*err) ||
162                 alternateLangLength >= ULOC_LANG_CAPACITY) {
163                 goto error;
164             }
165             else if (alternateLangLength == 0) {
166                 /*
167                  * Use the empty string for an unknown language, if
168                  * we found no language.
169                  */
170             }
171             else {
172                 appendTag(
173                     alternateLang,
174                     alternateLangLength,
175                     tagBuffer,
176                     &tagLength,
177                     /*withSeparator=*/false);
178             }
179         }
180 
181         if (scriptLength > 0) {
182             appendTag(
183                 script,
184                 scriptLength,
185                 tagBuffer,
186                 &tagLength,
187                 /*withSeparator=*/true);
188         }
189         else if (alternateTags != nullptr) {
190             /*
191              * Parse the alternateTags string for the script.
192              */
193             char alternateScript[ULOC_SCRIPT_CAPACITY];
194 
195             const int32_t alternateScriptLength =
196                 uloc_getScript(
197                     alternateTags,
198                     alternateScript,
199                     sizeof(alternateScript),
200                     err);
201 
202             if (U_FAILURE(*err) ||
203                 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
204                 goto error;
205             }
206             else if (alternateScriptLength > 0) {
207                 appendTag(
208                     alternateScript,
209                     alternateScriptLength,
210                     tagBuffer,
211                     &tagLength,
212                     /*withSeparator=*/true);
213             }
214         }
215 
216         if (regionLength > 0) {
217             appendTag(
218                 region,
219                 regionLength,
220                 tagBuffer,
221                 &tagLength,
222                 /*withSeparator=*/true);
223 
224             regionAppended = true;
225         }
226         else if (alternateTags != nullptr) {
227             /*
228              * Parse the alternateTags string for the region.
229              */
230             char alternateRegion[ULOC_COUNTRY_CAPACITY];
231 
232             const int32_t alternateRegionLength =
233                 uloc_getCountry(
234                     alternateTags,
235                     alternateRegion,
236                     sizeof(alternateRegion),
237                     err);
238             if (U_FAILURE(*err) ||
239                 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
240                 goto error;
241             }
242             else if (alternateRegionLength > 0) {
243                 appendTag(
244                     alternateRegion,
245                     alternateRegionLength,
246                     tagBuffer,
247                     &tagLength,
248                     /*withSeparator=*/true);
249 
250                 regionAppended = true;
251             }
252         }
253 
254         /**
255          * Copy the partial tag from our internal buffer to the supplied
256          * target.
257          **/
258         sink.Append(tagBuffer, tagLength);
259 
260         if (trailingLength > 0) {
261             if (*trailing != '@') {
262                 sink.Append("_", 1);
263                 if (!regionAppended) {
264                     /* extra separator is required */
265                     sink.Append("_", 1);
266                 }
267             }
268 
269             /*
270              * Copy the trailing data into the supplied buffer.
271              */
272             sink.Append(trailing, trailingLength);
273         }
274 
275         return;
276     }
277 
278 error:
279 
280     /**
281      * An overflow indicates the locale ID passed in
282      * is ill-formed.  If we got here, and there was
283      * no previous error, it's an implicit overflow.
284      **/
285     if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
286         U_SUCCESS(*err)) {
287         *err = U_ILLEGAL_ARGUMENT_ERROR;
288     }
289 }
290 
291 /**
292  * Parse the language, script, and region subtags from a tag string, and copy the
293  * results into the corresponding output parameters. The buffers are null-terminated,
294  * unless overflow occurs.
295  *
296  * The langLength, scriptLength, and regionLength parameters are input/output
297  * parameters, and must contain the capacity of their corresponding buffers on
298  * input.  On output, they will contain the actual length of the buffers, not
299  * including the null terminator.
300  *
301  * If the length of any of the output subtags exceeds the capacity of the corresponding
302  * buffer, the function copies as many bytes to the output buffer as it can, and returns
303  * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
304  * occurs.
305  *
306  * If an illegal argument is provided, the function returns the error
307  * U_ILLEGAL_ARGUMENT_ERROR.
308  *
309  * @param localeID The locale ID to parse.
310  * @param lang The language tag buffer.
311  * @param langLength The length of the language tag.
312  * @param script The script tag buffer.
313  * @param scriptLength The length of the script tag.
314  * @param region The region tag buffer.
315  * @param regionLength The length of the region tag.
316  * @param err A pointer to a UErrorCode for error reporting.
317  * @return The number of chars of the localeID parameter consumed.
318  **/
319 static int32_t U_CALLCONV
parseTagString(const char * localeID,char * lang,int32_t * langLength,char * script,int32_t * scriptLength,char * region,int32_t * regionLength,UErrorCode * err)320 parseTagString(
321     const char* localeID,
322     char* lang,
323     int32_t* langLength,
324     char* script,
325     int32_t* scriptLength,
326     char* region,
327     int32_t* regionLength,
328     UErrorCode* err)
329 {
330     const char* position = localeID;
331     int32_t subtagLength = 0;
332 
333     if(U_FAILURE(*err) ||
334        localeID == nullptr ||
335        lang == nullptr ||
336        langLength == nullptr ||
337        script == nullptr ||
338        scriptLength == nullptr ||
339        region == nullptr ||
340        regionLength == nullptr) {
341         goto error;
342     }
343 
344     subtagLength = ulocimp_getLanguage(position, &position, *err).extract(lang, *langLength, *err);
345 
346     /*
347      * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
348      * to be an error, because it indicates the user-supplied tag is
349      * not well-formed.
350      */
351     if(U_FAILURE(*err)) {
352         goto error;
353     }
354 
355     *langLength = subtagLength;
356 
357     /*
358      * If no language was present, use the empty string instead.
359      * Otherwise, move past any separator.
360      */
361     if (_isIDSeparator(*position)) {
362         ++position;
363     }
364 
365     subtagLength = ulocimp_getScript(position, &position, *err).extract(script, *scriptLength, *err);
366 
367     if(U_FAILURE(*err)) {
368         goto error;
369     }
370 
371     *scriptLength = subtagLength;
372 
373     if (*scriptLength > 0) {
374         /*
375          * Move past any separator.
376          */
377         if (_isIDSeparator(*position)) {
378             ++position;
379         }
380     }
381 
382     subtagLength = ulocimp_getCountry(position, &position, *err).extract(region, *regionLength, *err);
383 
384     if(U_FAILURE(*err)) {
385         goto error;
386     }
387 
388     *regionLength = subtagLength;
389 
390     if (*regionLength <= 0 && *position != 0 && *position != '@') {
391         /* back up over consumed trailing separator */
392         --position;
393     }
394 
395 exit:
396 
397     return (int32_t)(position - localeID);
398 
399 error:
400 
401     /**
402      * If we get here, we have no explicit error, it's the result of an
403      * illegal argument.
404      **/
405     if (!U_FAILURE(*err)) {
406         *err = U_ILLEGAL_ARGUMENT_ERROR;
407     }
408 
409     goto exit;
410 }
411 
412 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \
413     int32_t count = 0; \
414     int32_t i; \
415     for (i = 0; i < trailingLength; i++) { \
416         if (trailing[i] == '-' || trailing[i] == '_') { \
417             count = 0; \
418             if (count > 8) { \
419                 goto error; \
420             } \
421         } else if (trailing[i] == '@') { \
422             break; \
423         } else if (count > 8) { \
424             goto error; \
425         } else { \
426             count++; \
427         } \
428     } \
429 } UPRV_BLOCK_MACRO_END
430 
431 static UBool
_uloc_addLikelySubtags(const char * localeID,icu::ByteSink & sink,UErrorCode * err)432 _uloc_addLikelySubtags(const char* localeID,
433                        icu::ByteSink& sink,
434                        UErrorCode* err) {
435     char lang[ULOC_LANG_CAPACITY];
436     int32_t langLength = sizeof(lang);
437     char script[ULOC_SCRIPT_CAPACITY];
438     int32_t scriptLength = sizeof(script);
439     char region[ULOC_COUNTRY_CAPACITY];
440     int32_t regionLength = sizeof(region);
441     const char* trailing = "";
442     int32_t trailingLength = 0;
443     int32_t trailingIndex = 0;
444 
445     if(U_FAILURE(*err)) {
446         goto error;
447     }
448     if (localeID == nullptr) {
449         goto error;
450     }
451 
452     trailingIndex = parseTagString(
453         localeID,
454         lang,
455         &langLength,
456         script,
457         &scriptLength,
458         region,
459         &regionLength,
460         err);
461     if(U_FAILURE(*err)) {
462         /* Overflow indicates an illegal argument error */
463         if (*err == U_BUFFER_OVERFLOW_ERROR) {
464             *err = U_ILLEGAL_ARGUMENT_ERROR;
465         }
466 
467         goto error;
468     }
469     if (langLength > 3) {
470         goto error;
471     }
472 
473     /* Find the length of the trailing portion. */
474     while (_isIDSeparator(localeID[trailingIndex])) {
475         trailingIndex++;
476     }
477     trailing = &localeID[trailingIndex];
478     trailingLength = (int32_t)uprv_strlen(trailing);
479 
480     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
481     {
482         const icu::XLikelySubtags* likelySubtags = icu::XLikelySubtags::getSingleton(*err);
483         if(U_FAILURE(*err)) {
484             goto error;
485         }
486         // We need to keep l on the stack because lsr may point into internal
487         // memory of l.
488         icu::Locale l = icu::Locale::createFromName(localeID);
489         if (l.isBogus()) {
490             goto error;
491         }
492         icu::LSR lsr = likelySubtags->makeMaximizedLsrFrom(l, true, *err);
493         if(U_FAILURE(*err)) {
494             goto error;
495         }
496         const char* language = lsr.language;
497         if (uprv_strcmp(language, "und") == 0) {
498             language = "";
499         }
500         createTagStringWithAlternates(
501             language,
502             (int32_t)uprv_strlen(language),
503             lsr.script,
504             (int32_t)uprv_strlen(lsr.script),
505             lsr.region,
506             (int32_t)uprv_strlen(lsr.region),
507             trailing,
508             trailingLength,
509             nullptr,
510             sink,
511             err);
512         if(U_FAILURE(*err)) {
513             goto error;
514         }
515     }
516     return true;
517 
518 error:
519 
520     if (!U_FAILURE(*err)) {
521         *err = U_ILLEGAL_ARGUMENT_ERROR;
522     }
523     return false;
524 }
525 
526 // Add likely subtags to the sink
527 // return true if the value in the sink is produced by a match during the lookup
528 // return false if the value in the sink is the same as input because there are
529 // no match after the lookup.
530 static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*);
531 
532 static void
_uloc_minimizeSubtags(const char * localeID,icu::ByteSink & sink,bool favorScript,UErrorCode * err)533 _uloc_minimizeSubtags(const char* localeID,
534                       icu::ByteSink& sink,
535                       bool favorScript,
536                       UErrorCode* err) {
537     icu::CharString maximizedTagBuffer;
538 
539     char lang[ULOC_LANG_CAPACITY];
540     int32_t langLength = sizeof(lang);
541     char script[ULOC_SCRIPT_CAPACITY];
542     int32_t scriptLength = sizeof(script);
543     char region[ULOC_COUNTRY_CAPACITY];
544     int32_t regionLength = sizeof(region);
545     const char* trailing = "";
546     int32_t trailingLength = 0;
547     int32_t trailingIndex = 0;
548 
549     if(U_FAILURE(*err)) {
550         goto error;
551     }
552     else if (localeID == nullptr) {
553         goto error;
554     }
555 
556     trailingIndex =
557         parseTagString(
558             localeID,
559             lang,
560             &langLength,
561             script,
562             &scriptLength,
563             region,
564             &regionLength,
565             err);
566     if(U_FAILURE(*err)) {
567 
568         /* Overflow indicates an illegal argument error */
569         if (*err == U_BUFFER_OVERFLOW_ERROR) {
570             *err = U_ILLEGAL_ARGUMENT_ERROR;
571         }
572 
573         goto error;
574     }
575 
576     /* Find the spot where the variants or the keywords begin, if any. */
577     while (_isIDSeparator(localeID[trailingIndex])) {
578         trailingIndex++;
579     }
580     trailing = &localeID[trailingIndex];
581     trailingLength = (int32_t)uprv_strlen(trailing);
582 
583     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
584 
585     {
586         const icu::XLikelySubtags* likelySubtags = icu::XLikelySubtags::getSingleton(*err);
587         if(U_FAILURE(*err)) {
588             goto error;
589         }
590         icu::LSR lsr = likelySubtags->minimizeSubtags(
591             {lang, langLength},
592             {script, scriptLength},
593             {region, regionLength},
594             favorScript,
595             *err);
596         if(U_FAILURE(*err)) {
597             goto error;
598         }
599         const char* language = lsr.language;
600         if (uprv_strcmp(language, "und") == 0) {
601             language = "";
602         }
603         createTagStringWithAlternates(
604             language,
605             (int32_t)uprv_strlen(language),
606             lsr.script,
607             (int32_t)uprv_strlen(lsr.script),
608             lsr.region,
609             (int32_t)uprv_strlen(lsr.region),
610             trailing,
611             trailingLength,
612             nullptr,
613             sink,
614             err);
615         if(U_FAILURE(*err)) {
616             goto error;
617         }
618         return;
619     }
620 
621 error:
622 
623     if (!U_FAILURE(*err)) {
624         *err = U_ILLEGAL_ARGUMENT_ERROR;
625     }
626 }
627 
628 U_CAPI int32_t U_EXPORT2
uloc_addLikelySubtags(const char * localeID,char * maximizedLocaleID,int32_t maximizedLocaleIDCapacity,UErrorCode * status)629 uloc_addLikelySubtags(const char* localeID,
630                       char* maximizedLocaleID,
631                       int32_t maximizedLocaleIDCapacity,
632                       UErrorCode* status) {
633     if (U_FAILURE(*status)) {
634         return 0;
635     }
636 
637     icu::CheckedArrayByteSink sink(
638             maximizedLocaleID, maximizedLocaleIDCapacity);
639 
640     ulocimp_addLikelySubtags(localeID, sink, status);
641     int32_t reslen = sink.NumberOfBytesAppended();
642 
643     if (U_FAILURE(*status)) {
644         return sink.Overflowed() ? reslen : -1;
645     }
646 
647     if (sink.Overflowed()) {
648         *status = U_BUFFER_OVERFLOW_ERROR;
649     } else {
650         u_terminateChars(
651                 maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status);
652     }
653 
654     return reslen;
655 }
656 
657 static UBool
_ulocimp_addLikelySubtags(const char * localeID,icu::ByteSink & sink,UErrorCode * status)658 _ulocimp_addLikelySubtags(const char* localeID,
659                           icu::ByteSink& sink,
660                           UErrorCode* status) {
661     icu::CharString localeBuffer;
662     {
663         icu::CharStringByteSink localeSink(&localeBuffer);
664         ulocimp_canonicalize(localeID, localeSink, status);
665     }
666     if (U_SUCCESS(*status)) {
667         return _uloc_addLikelySubtags(localeBuffer.data(), sink, status);
668     } else {
669         return false;
670     }
671 }
672 
673 U_CAPI void U_EXPORT2
ulocimp_addLikelySubtags(const char * localeID,icu::ByteSink & sink,UErrorCode * status)674 ulocimp_addLikelySubtags(const char* localeID,
675                          icu::ByteSink& sink,
676                          UErrorCode* status) {
677     _ulocimp_addLikelySubtags(localeID, sink, status);
678 }
679 
680 U_CAPI int32_t U_EXPORT2
uloc_minimizeSubtags(const char * localeID,char * minimizedLocaleID,int32_t minimizedLocaleIDCapacity,UErrorCode * status)681 uloc_minimizeSubtags(const char* localeID,
682                      char* minimizedLocaleID,
683                      int32_t minimizedLocaleIDCapacity,
684                      UErrorCode* status) {
685     if (U_FAILURE(*status)) {
686         return 0;
687     }
688 
689     icu::CheckedArrayByteSink sink(
690             minimizedLocaleID, minimizedLocaleIDCapacity);
691 
692     ulocimp_minimizeSubtags(localeID, sink, false, status);
693     int32_t reslen = sink.NumberOfBytesAppended();
694 
695     if (U_FAILURE(*status)) {
696         return sink.Overflowed() ? reslen : -1;
697     }
698 
699     if (sink.Overflowed()) {
700         *status = U_BUFFER_OVERFLOW_ERROR;
701     } else {
702         u_terminateChars(
703                 minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status);
704     }
705 
706     return reslen;
707 }
708 
709 U_CAPI void U_EXPORT2
ulocimp_minimizeSubtags(const char * localeID,icu::ByteSink & sink,bool favorScript,UErrorCode * status)710 ulocimp_minimizeSubtags(const char* localeID,
711                         icu::ByteSink& sink,
712                         bool favorScript,
713                         UErrorCode* status) {
714     icu::CharString localeBuffer;
715     {
716         icu::CharStringByteSink localeSink(&localeBuffer);
717         ulocimp_canonicalize(localeID, localeSink, status);
718     }
719     _uloc_minimizeSubtags(localeBuffer.data(), sink, favorScript, status);
720 }
721 
722 // Pairs of (language subtag, + or -) for finding out fast if common languages
723 // are LTR (minus) or RTL (plus).
724 static const char LANG_DIR_STRING[] =
725         "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
726 
727 // Implemented here because this calls ulocimp_addLikelySubtags().
728 U_CAPI UBool U_EXPORT2
uloc_isRightToLeft(const char * locale)729 uloc_isRightToLeft(const char *locale) {
730     UErrorCode errorCode = U_ZERO_ERROR;
731     char script[8];
732     int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
733     if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
734             scriptLength == 0) {
735         // Fastpath: We know the likely scripts and their writing direction
736         // for some common languages.
737         errorCode = U_ZERO_ERROR;
738         char lang[8];
739         int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
740         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
741             return false;
742         }
743         if (langLength > 0) {
744             const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
745             if (langPtr != nullptr) {
746                 switch (langPtr[langLength]) {
747                 case '-': return false;
748                 case '+': return true;
749                 default: break;  // partial match of a longer code
750                 }
751             }
752         }
753         // Otherwise, find the likely script.
754         errorCode = U_ZERO_ERROR;
755         icu::CharString likely;
756         {
757             icu::CharStringByteSink sink(&likely);
758             ulocimp_addLikelySubtags(locale, sink, &errorCode);
759         }
760         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
761             return false;
762         }
763         scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode);
764         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
765                 scriptLength == 0) {
766             return false;
767         }
768     }
769     UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
770     return uscript_isRightToLeft(scriptCode);
771 }
772 
773 U_NAMESPACE_BEGIN
774 
775 UBool
isRightToLeft() const776 Locale::isRightToLeft() const {
777     return uloc_isRightToLeft(getBaseName());
778 }
779 
780 U_NAMESPACE_END
781 
782 // The following must at least allow for rg key value (6) plus terminator (1).
783 #define ULOC_RG_BUFLEN 8
784 
785 U_CAPI int32_t U_EXPORT2
ulocimp_getRegionForSupplementalData(const char * localeID,UBool inferRegion,char * region,int32_t regionCapacity,UErrorCode * status)786 ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
787                                      char *region, int32_t regionCapacity, UErrorCode* status) {
788     if (U_FAILURE(*status)) {
789         return 0;
790     }
791     char rgBuf[ULOC_RG_BUFLEN];
792     UErrorCode rgStatus = U_ZERO_ERROR;
793 
794     // First check for rg keyword value
795     icu::CharString rg;
796     {
797         icu::CharStringByteSink sink(&rg);
798         ulocimp_getKeywordValue(localeID, "rg", sink, &rgStatus);
799     }
800     int32_t rgLen = rg.length();
801     if (U_FAILURE(rgStatus) || rgLen < 3 || rgLen > 7) {
802         rgLen = 0;
803     } else {
804         // chop off the subdivision code (which will generally be "zzzz" anyway)
805         const char* const data = rg.data();
806         if (uprv_isASCIILetter(data[0])) {
807             rgLen = 2;
808             rgBuf[0] = uprv_toupper(data[0]);
809             rgBuf[1] = uprv_toupper(data[1]);
810         } else {
811             // assume three-digit region code
812             rgLen = 3;
813             uprv_memcpy(rgBuf, data, rgLen);
814         }
815     }
816 
817     if (rgLen == 0) {
818         // No valid rg keyword value, try for unicode_region_subtag
819         rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
820         if (U_FAILURE(*status)) {
821             rgLen = 0;
822         } else if (rgLen == 0 && inferRegion) {
823             // no unicode_region_subtag but inferRegion true, try likely subtags
824             rgStatus = U_ZERO_ERROR;
825             icu::CharString locBuf;
826             {
827                 icu::CharStringByteSink sink(&locBuf);
828                 ulocimp_addLikelySubtags(localeID, sink, &rgStatus);
829             }
830             if (U_SUCCESS(rgStatus)) {
831                 rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status);
832                 if (U_FAILURE(*status)) {
833                     rgLen = 0;
834                 }
835             }
836         }
837     }
838 
839     rgBuf[rgLen] = 0;
840     uprv_strncpy(region, rgBuf, regionCapacity);
841     return u_terminateChars(region, regionCapacity, rgLen, status);
842 }
843 
844