xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/util/LocaleIDParser.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 /*
2  **********************************************************************
3  * Copyright (c) 2002-2011, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: Mark Davis
7  **********************************************************************
8  */
9 package org.unicode.cldr.util;
10 
11 import com.google.common.collect.ImmutableList;
12 import com.ibm.icu.impl.Utility;
13 import com.ibm.icu.text.UnicodeSet;
14 import java.util.ArrayList;
15 import java.util.Collection;
16 import java.util.EnumSet;
17 import java.util.Iterator;
18 import java.util.List;
19 import java.util.Set;
20 import java.util.TreeSet;
21 import org.unicode.cldr.util.SupplementalDataInfo.ParentLocaleComponent;
22 
23 public class LocaleIDParser {
24     /**
25      * @return Returns the language.
26      */
getLanguage()27     public String getLanguage() {
28         return language;
29     }
30 
31     /**
32      * @return Returns the language.
33      */
getLanguageScript()34     public String getLanguageScript() {
35         if (script.length() != 0) return language + "_" + script;
36         return language;
37     }
38 
getLanguageScript(Collection<String> in)39     public static Set<String> getLanguageScript(Collection<String> in) {
40         return getLanguageScript(in, null);
41     }
42 
getLanguageScript(Collection<String> in, Set<String> output)43     public static Set<String> getLanguageScript(Collection<String> in, Set<String> output) {
44         if (output == null) output = new TreeSet<>();
45         LocaleIDParser lparser = new LocaleIDParser();
46         for (Iterator<String> it = in.iterator(); it.hasNext(); ) {
47             output.add(lparser.set(it.next()).getLanguageScript());
48         }
49         return output;
50     }
51 
52     /**
53      * @return Returns the region.
54      */
getRegion()55     public String getRegion() {
56         return region;
57     }
58 
59     /**
60      * @return Returns the script.
61      */
getScript()62     public String getScript() {
63         return script;
64     }
65 
66     /**
67      * @return Returns the variants.
68      */
getVariants()69     public String[] getVariants() {
70         return variants.clone();
71     }
72 
73     // TODO, update to RFC3066
74     // http://www.inter-locale.com/ID/draft-phillips-langtags-08.html
75     private String language;
76     private String script;
77     private String region;
78     private String[] variants;
79 
80     static final UnicodeSet letters = new UnicodeSet("[a-zA-Z]");
81     static final UnicodeSet digits = new UnicodeSet("[0-9]");
82 
set(String localeID)83     public LocaleIDParser set(String localeID) {
84         region = script = "";
85         variants = new String[0];
86 
87         String[] pieces = new String[100]; // fix limitation later
88         Utility.split(localeID, '_', pieces);
89         int i = 0;
90         language = pieces[i++];
91         if (i >= pieces.length) return this;
92         if (pieces[i].length() == 4) {
93             script = pieces[i++];
94             if (i >= pieces.length) return this;
95         }
96         if (pieces[i].length() == 2 && letters.containsAll(pieces[i])
97                 || pieces[i].length() == 3 && digits.containsAll(pieces[i])) {
98             region = pieces[i++];
99             if (i >= pieces.length) return this;
100         }
101         List<String> al = new ArrayList<>();
102         while (i < pieces.length && pieces[i].length() > 0) {
103             al.add(pieces[i++]);
104         }
105         variants = new String[al.size()];
106         al.toArray(variants);
107         return this;
108     }
109 
110     /**
111      * Get the parent of a locale. If the input is "root", then return null. For example, if
112      * localeName is "fr_CA", return "fr".
113      *
114      * <p>Only works on canonical locale names (right casing, etc.)!
115      *
116      * <p>Formerly this function returned an empty string when localeName was "_VETTING". Now it
117      * returns "root" where it would have returned an empty string. TODO: explain "__VETTING",
118      * somehow related to SUMMARY_LOCALE. Note that CLDRLocale.process() changes "__" to "_" before
119      * this function is called. Reference: https://unicode-org.atlassian.net/browse/CLDR-13133
120      */
getParent(String localeId)121     public static final String getParent(String localeId) {
122         return getParent(localeId, ParentLocaleComponent.main);
123     }
124 
125     /**
126      * Get the parent of a locale. If the input is "root", then return null. For example, if
127      * localeId is "fr_CA", return "fr". There is a different inheritance chain for certain
128      * supplemental data elements.
129      *
130      * @param localeId Only works on canonical locale names (right casing, etc.)!
131      * @param component picks the component that indicates the inheritance chain. Is either the
132      *     standard ('main') used for all ldml-dtd items, or is one of the particular elements in
133      *     supplemental data that has a different inheritance, such as collations or plurals
134      */
getParent(String localeId, ParentLocaleComponent component)135     public static String getParent(String localeId, ParentLocaleComponent component) {
136         SupplementalDataInfo sdi = SupplementalDataInfo.getInstance();
137         String explicitParent = sdi.getExplicitParentLocale(localeId, component);
138         if (explicitParent != null) {
139             return explicitParent;
140         }
141         int pos = localeId.lastIndexOf('_');
142         if (pos >= 0) {
143             String truncated = localeId.substring(0, pos);
144             // if the final item is a script, and it is not the default content, then go directly to
145             // root
146             int pos2 = getScriptPosition(localeId);
147             boolean skipNonLikely = sdi.parentLocalesSkipNonLikely(component);
148             if (pos2 > 0 && skipNonLikely) {
149                 String script = localeId.substring(pos + 1);
150                 String defaultScript = sdi.getDefaultScript(truncated);
151                 if (!script.equals(defaultScript)) {
152                     return "root";
153                 }
154             }
155             if (truncated.length() == 0) {
156                 return "root";
157             }
158             return truncated;
159         }
160         if (localeId.equals("root")) {
161             return null;
162         }
163         return "root";
164     }
165 
166     /**
167      * Return the base language subtag: en_US => en, en_Latn_US => en, en => en, root => root
168      *
169      * @param localeID
170      * @return
171      */
getSimpleBaseLanguage(String localeID)172     public static String getSimpleBaseLanguage(String localeID) {
173         int pos = localeID.indexOf('_');
174         if (pos >= 0) {
175             return localeID.substring(0, pos);
176         }
177         return localeID;
178     }
179 
180     /**
181      * If the locale consists of baseLanguage+script, return the position of the separator,
182      * otherwise -1.
183      *
184      * @param s
185      */
getScriptPosition(String locale)186     public static int getScriptPosition(String locale) {
187         int pos = locale.indexOf('_');
188         if (pos >= 0 && pos + 5 == locale.length()) {
189             int pos2 = locale.indexOf('_', pos + 1);
190             if (pos2 < 0) {
191                 return pos;
192             }
193         }
194         return -1;
195     }
196 
197     /**
198      * Utility to get the simple parent of a locale. If the input is "root", then the output is
199      * null. This method is similar to the getParent() method above, except that it does NOT pay any
200      * attention to the explicit parent locales information. Thus, getParent("zh_Hant") will return
201      * "root", but getSimpleParent("zh_Hant") would return "zh".
202      */
getSimpleParent(String localeName)203     public static String getSimpleParent(String localeName) {
204         int pos = localeName.lastIndexOf('_');
205         if (pos >= 0) {
206             return localeName.substring(0, pos);
207         }
208         if (localeName.equals("root") || localeName.equals(CLDRFile.SUPPLEMENTAL_NAME)) return null;
209         return "root";
210     }
211 
setLanguage(String language)212     public LocaleIDParser setLanguage(String language) {
213         this.language = language;
214         return this;
215     }
216 
setRegion(String region)217     public LocaleIDParser setRegion(String region) {
218         this.region = region;
219         return this;
220     }
221 
setScript(String script)222     public LocaleIDParser setScript(String script) {
223         this.script = script;
224         return this;
225     }
226 
setVariants(String[] variants)227     public LocaleIDParser setVariants(String[] variants) {
228         this.variants = variants.clone();
229         return this;
230     }
231 
232     public enum Level {
233         Language,
234         Script,
235         Region,
236         Variants,
237         Other
238     }
239 
240     /**
241      * Returns an int mask indicating the level
242      *
243      * @return (2 if script is present) + (4 if region is present) + (8 if region is present)
244      */
getLevels()245     public Set<Level> getLevels() {
246         EnumSet<Level> result = EnumSet.of(Level.Language);
247         if (getScript().length() != 0) result.add(Level.Script);
248         if (getRegion().length() != 0) result.add(Level.Region);
249         if (getVariants().length != 0) result.add(Level.Variants);
250         return result;
251     }
252 
getSiblings(Set<String> set)253     public Set<String> getSiblings(Set<String> set) {
254         Set<Level> myLevel = getLevels();
255         String localeID = toString();
256         String parentID = getParent(localeID);
257 
258         String prefix = (parentID == null || "root".equals(parentID)) ? "" : parentID + "_";
259         Set<String> siblings = new TreeSet<>();
260         for (String id : set) {
261             if (id.startsWith(prefix) && set(id).getLevels().equals(myLevel)) {
262                 siblings.add(id);
263             }
264         }
265         set(localeID); // leave in known state
266         return siblings;
267     }
268 
269     @Override
toString()270     public String toString() {
271         StringBuffer result = new StringBuffer(language);
272         if (script.length() != 0) result.append('_').append(script);
273         if (region.length() != 0) result.append('_').append(region);
274         if (variants != null) {
275             for (int i = 0; i < variants.length; ++i) {
276                 result.append('_').append(variants[i]);
277             }
278         }
279         return result.toString();
280     }
281 
282     public static final ImmutableList<String> FALLBACK_CHAIN = ImmutableList.of();
283     public static final ImmutableList<String> ROOT_PARENT_CHAIN =
284             ImmutableList.of(XMLSource.ROOT_ID);
285 
286     /**
287      * Return localeIds getParent chain. Return null if there is none (localeID == root or
288      * code-fallback). Note: an L1 locale will have exactly 1 element, and be identical to
289      * ROOT_PARENT_CHAIN. TODO optimize by caching the chains Returns a
290      */
getParentChain(String localeID)291     public static List<String> getParentChain(String localeID) {
292         if (XMLSource.ROOT_ID.equals(localeID)) {
293             return FALLBACK_CHAIN;
294         }
295         List<String> result = null;
296         while (true) {
297             String parent = getParent(localeID);
298             if (parent.equals(XMLSource.ROOT_ID)) {
299                 if (result == null) {
300                     return ROOT_PARENT_CHAIN;
301                 } else {
302                     result.addAll(ROOT_PARENT_CHAIN);
303                     return ImmutableList.copyOf(result);
304                 }
305             }
306             if (result == null) {
307                 result = new ArrayList<>();
308             }
309             result.add(parent);
310             localeID = parent;
311         }
312     }
313 
isL1(String localeId)314     public static boolean isL1(String localeId) {
315         return XMLSource.ROOT_ID.equals(getParent(localeId));
316     }
317 }
318