xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/tool/LikelySubtags.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.collect.ImmutableMap;
4 import com.ibm.icu.impl.Row;
5 import com.ibm.icu.impl.Row.R2;
6 import java.util.Arrays;
7 import java.util.Collections;
8 import java.util.Date;
9 import java.util.HashMap;
10 import java.util.List;
11 import java.util.Map;
12 import java.util.Set;
13 import java.util.TreeSet;
14 import org.unicode.cldr.util.Builder;
15 import org.unicode.cldr.util.CLDRConfig;
16 import org.unicode.cldr.util.LanguageTagParser;
17 import org.unicode.cldr.util.LanguageTagParser.OutputOption;
18 import org.unicode.cldr.util.SupplementalDataInfo;
19 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData;
20 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type;
21 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo;
22 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
23 
24 public class LikelySubtags {
25     static final boolean DEBUG = true;
26     static final String TAG_SEPARATOR = "_";
27     private static final boolean SKIP_UND = true;
28 
29     private boolean favorRegion = false;
30     private final Map<String, String> toMaximized;
31 
32     /**
33      * Create the likely subtags.
34      *
35      * @param toMaximized
36      */
LikelySubtags(Map<String, String> toMaximized)37     public LikelySubtags(Map<String, String> toMaximized) {
38         this.toMaximized =
39                 toMaximized == null
40                         ? LikelySubtagsData.getInstance().defaultToMaximized
41                         : ImmutableMap.copyOf(toMaximized);
42     }
43 
44     /** thread-safe data loading. Retooled so that the constant data is shared across instances. */
45     private static class LikelySubtagsData {
46         private static final LikelySubtagsData SINGLETON = new LikelySubtagsData();
47 
getInstance()48         private static LikelySubtagsData getInstance() {
49             return SINGLETON;
50         }
51 
52         private final SupplementalDataInfo supplementalDataInfo =
53                 CLDRConfig.getInstance().getSupplementalDataInfo();
54         private final Map<String, String> defaultToMaximized =
55                 supplementalDataInfo.getLikelySubtags();
56         private final Map<String, String> currencyToLikelyTerritory;
57 
LikelySubtagsData()58         private LikelySubtagsData() {
59             Map<String, String> _currencyToLikelyTerritory = new HashMap<>();
60             Date now = new Date();
61             Set<Row.R2<Double, String>> sorted = new TreeSet<>();
62             for (String territory : supplementalDataInfo.getTerritoriesWithPopulationData()) {
63                 PopulationData pop = supplementalDataInfo.getPopulationDataForTerritory(territory);
64                 double population = pop.getPopulation();
65                 sorted.add(Row.of(-population, territory));
66             }
67             for (R2<Double, String> item : sorted) {
68                 String territory = item.get1();
69                 Set<CurrencyDateInfo> targetCurrencyInfo =
70                         supplementalDataInfo.getCurrencyDateInfo(territory);
71                 if (targetCurrencyInfo == null) {
72                     continue;
73                 }
74                 for (CurrencyDateInfo cdi : targetCurrencyInfo) {
75                     String currency = cdi.getCurrency();
76                     if (!_currencyToLikelyTerritory.containsKey(currency)
77                             && cdi.getStart().before(now)
78                             && cdi.getEnd().after(now)
79                             && cdi.isLegalTender()) {
80                         _currencyToLikelyTerritory.put(currency, territory);
81                     }
82                 }
83             }
84             currencyToLikelyTerritory = ImmutableMap.copyOf(_currencyToLikelyTerritory);
85         }
86     }
87 
88     /**
89      * Create the likely subtags.
90      *
91      * @param toMaximized
92      */
LikelySubtags()93     public LikelySubtags() {
94         this(null);
95     }
96 
isFavorRegion()97     public boolean isFavorRegion() {
98         return favorRegion;
99     }
100 
setFavorRegion(boolean favorRegion)101     public LikelySubtags setFavorRegion(boolean favorRegion) {
102         this.favorRegion = favorRegion;
103         return this;
104     }
105 
getToMaximized()106     public Map<String, String> getToMaximized() {
107         return toMaximized;
108     }
109 
maximize(String languageTag, Map<String, String> toMaximized)110     public static String maximize(String languageTag, Map<String, String> toMaximized) {
111         return new LikelySubtags(toMaximized).maximize(languageTag);
112     }
113 
minimize( String input, Map<String, String> toMaximized, boolean favorRegion)114     public static String minimize(
115             String input, Map<String, String> toMaximized, boolean favorRegion) {
116         return new LikelySubtags(toMaximized).setFavorRegion(favorRegion).minimize(input);
117     }
118 
119     // TODO Old, crufty code, needs reworking.
maximize(String languageTag)120     public synchronized String maximize(String languageTag) {
121         if (languageTag == null) {
122             return null;
123         }
124         LanguageTagParser ltp = new LanguageTagParser();
125         if (DEBUG && languageTag.equals("es" + TAG_SEPARATOR + "Hans" + TAG_SEPARATOR + "CN")) {
126             System.out.print(""); // debug
127         }
128         // clean up the input by removing Zzzz, ZZ, and changing "" into und.
129         ltp.set(languageTag);
130         return maximize(ltp);
131     }
132 
133     /** Maximize to a string (modifying the LanguageTagParser in so doing) */
maximize(LanguageTagParser ltp)134     public String maximize(LanguageTagParser ltp) {
135         if (maximizeInPlace(ltp)) {
136             return ltp.toString();
137         } else {
138             return null;
139         }
140     }
141 
142     /**
143      * Maximize in place, for use when the modified LanguageTagParser is the desired return value
144      */
maximizeInPlace(LanguageTagParser ltp)145     public boolean maximizeInPlace(LanguageTagParser ltp) {
146         String language = ltp.getLanguage();
147         String region = ltp.getRegion();
148         String script = ltp.getScript();
149         List<String> variants = ltp.getVariants();
150         Map<String, String> extensions = ltp.getExtensions();
151         Map<String, String> localeExtensions = ltp.getLocaleExtensions();
152 
153         String sourceLanguage = language;
154         String sourceScript = script;
155         String sourceRegion = region;
156 
157         if (language.equals("")) {
158             ltp.setLanguage(language = "und");
159         }
160         if (script.equals("Zzzz")) {
161             ltp.setScript(script = "");
162         }
163         if (region.equals("ZZ")) {
164             ltp.setRegion(region = "");
165         }
166         if (variants.size() != 0) {
167             ltp.setVariants(Collections.<String>emptySet());
168         }
169         if (extensions.size() != 0) {
170             ltp.setExtensions(Collections.<String, String>emptyMap());
171         }
172         if (localeExtensions.size() != 0) {
173             ltp.setExtensions(Collections.<String, String>emptyMap());
174         }
175 
176         if (!ltp.getLanguage().equals("und")
177                 && !ltp.getScript().isEmpty()
178                 && !ltp.getRegion().isEmpty()) {
179             return true;
180         }
181 
182         // check whole
183         String result = toMaximized.get(ltp.toString());
184         if (result != null) {
185             ltp.set(result)
186                     .setVariants(variants)
187                     .setExtensions(extensions)
188                     .setLocaleExtensions(localeExtensions);
189             return true;
190         }
191 
192         boolean noLanguage = language.equals("und");
193         boolean noScript = script.isEmpty();
194         boolean noRegion = region.isEmpty();
195 
196         // not efficient, but simple to match spec.
197         for (int count = 0; ; ++count) { // breaks down below
198             for (String script2 : noScript ? Arrays.asList(script) : Arrays.asList(script, "")) {
199                 ltp.setScript(script2);
200 
201                 for (String region2 :
202                         noRegion ? Arrays.asList(region) : Arrays.asList(region, "")) {
203                     ltp.setRegion(region2);
204                     result = toMaximized.get(ltp.toString());
205                     if (result != null) {
206                         ltp.set(result);
207                         if (!noLanguage) {
208                             ltp.setLanguage(language);
209                         }
210                         if (!noScript) {
211                             ltp.setScript(script);
212                         }
213                         if (!noRegion) {
214                             ltp.setRegion(region);
215                         }
216                         ltp.setVariants(variants)
217                                 .setExtensions(extensions)
218                                 .setLocaleExtensions(localeExtensions);
219                         if (count == 1) {
220                             System.out.println(
221                                     "2nd pass, "
222                                             + new LanguageTagParser()
223                                                     .setLanguage(sourceLanguage)
224                                                     .setScript(sourceScript)
225                                                     .setRegion(sourceRegion)
226                                             + " ==> "
227                                             + ltp);
228                         }
229                         return true;
230                     }
231                 }
232             }
233 
234             if (SKIP_UND || ltp.getLanguage().equals("und")) {
235                 break;
236             } else {
237                 // Otherwise repeat the loop, trying for und matches
238                 ltp.setLanguage("und");
239             }
240         }
241         return false; // couldn't maximize
242     }
243 
244     // TODO, optimize if needed by adding private routine that maximizes a LanguageTagParser instead
245     // of multiple parsings
246     // TODO Old, crufty code, needs reworking.
minimize(String input)247     public String minimize(String input) {
248         return minimize(input, OutputOption.ICU_LCVARIANT);
249     }
250 
minimize(String input, OutputOption oo)251     public synchronized String minimize(String input, OutputOption oo) {
252         String maximized = maximize(input, toMaximized);
253         if (maximized == null) {
254             return null;
255         }
256         if (DEBUG && maximized.equals("sr" + TAG_SEPARATOR + "Latn" + TAG_SEPARATOR + "RS")) {
257             System.out.print(""); // debug
258         }
259         LanguageTagParser ltp = new LanguageTagParser().set(maximized);
260         String language = ltp.getLanguage();
261         String region = ltp.getRegion();
262         String script = ltp.getScript();
263 
264         // handle variants
265         List<String> variants = ltp.getVariants();
266         Map<String, String> extensions = ltp.getExtensions();
267         Map<String, String> localeExtensions = ltp.getLocaleExtensions();
268 
269         String maximizedCheck = maximized;
270         if (!variants.isEmpty() || !extensions.isEmpty() || !localeExtensions.isEmpty()) {
271             maximizedCheck = ltp.toLSR();
272         }
273         // try building up from shorter to longer, and find the first that matches
274         // could be more optimized, but for this code we want simplest
275         String[] trials = {
276             language,
277             language + TAG_SEPARATOR + (favorRegion ? region : script),
278             language + TAG_SEPARATOR + (!favorRegion ? region : script)
279         };
280         for (String trial : trials) {
281             String newMaximized = maximize(trial, toMaximized);
282             if (maximizedCheck.equals(newMaximized)) {
283                 if (variants.isEmpty() && extensions.isEmpty() && localeExtensions.isEmpty()) {
284                     return trial;
285                 }
286                 return ltp.set(trial)
287                         .setVariants(variants)
288                         .setExtensions(extensions)
289                         .setLocaleExtensions(extensions)
290                         .toString(oo);
291             }
292         }
293         return maximized;
294     }
295 
296     static final Map<String, String> EXTRA_SCRIPTS =
297             Builder.with(new HashMap<String, String>())
298                     .on("crs", "pcm", "tlh")
299                     .put("Latn")
300                     .freeze();
301 
getLikelyScript(String code)302     public String getLikelyScript(String code) {
303         String max = this.maximize(code);
304 
305         String script = null;
306         if (max != null) {
307             script = new LanguageTagParser().set(max).getScript();
308         } else {
309             Map<Type, BasicLanguageData> data =
310                     LikelySubtagsData.getInstance()
311                             .supplementalDataInfo
312                             .getBasicLanguageDataMap(code);
313             if (data != null) {
314                 for (BasicLanguageData item : data.values()) {
315                     Set<String> scripts = item.getScripts();
316                     if (scripts == null || scripts.size() == 0) continue;
317                     script = scripts.iterator().next();
318                     Type type = item.getType();
319                     if (type == Type.primary) {
320                         break;
321                     }
322                 }
323             }
324             if (script == null) {
325                 script = EXTRA_SCRIPTS.get(code);
326                 if (script == null) {
327                     script = "Zzzz";
328                 }
329             }
330         }
331         return script;
332     }
333 
getLikelyTerritoryFromCurrency(String code)334     public String getLikelyTerritoryFromCurrency(String code) {
335         return LikelySubtagsData.getInstance().currencyToLikelyTerritory.get(code);
336     }
337 }
338