1 package org.unicode.cldr.tool; 2 3 import com.google.common.collect.ImmutableMap; 4 import com.ibm.icu.impl.Row; 5 import com.ibm.icu.impl.Row.R2; 6 import java.util.Arrays; 7 import java.util.Collections; 8 import java.util.Date; 9 import java.util.HashMap; 10 import java.util.List; 11 import java.util.Map; 12 import java.util.Set; 13 import java.util.TreeSet; 14 import org.unicode.cldr.util.Builder; 15 import org.unicode.cldr.util.CLDRConfig; 16 import org.unicode.cldr.util.LanguageTagParser; 17 import org.unicode.cldr.util.LanguageTagParser.OutputOption; 18 import org.unicode.cldr.util.SupplementalDataInfo; 19 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData; 20 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type; 21 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo; 22 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 23 24 public class LikelySubtags { 25 static final boolean DEBUG = true; 26 static final String TAG_SEPARATOR = "_"; 27 private static final boolean SKIP_UND = true; 28 29 private boolean favorRegion = false; 30 private final Map<String, String> toMaximized; 31 32 /** 33 * Create the likely subtags. 34 * 35 * @param toMaximized 36 */ LikelySubtags(Map<String, String> toMaximized)37 public LikelySubtags(Map<String, String> toMaximized) { 38 this.toMaximized = 39 toMaximized == null 40 ? LikelySubtagsData.getInstance().defaultToMaximized 41 : ImmutableMap.copyOf(toMaximized); 42 } 43 44 /** thread-safe data loading. Retooled so that the constant data is shared across instances. */ 45 private static class LikelySubtagsData { 46 private static final LikelySubtagsData SINGLETON = new LikelySubtagsData(); 47 getInstance()48 private static LikelySubtagsData getInstance() { 49 return SINGLETON; 50 } 51 52 private final SupplementalDataInfo supplementalDataInfo = 53 CLDRConfig.getInstance().getSupplementalDataInfo(); 54 private final Map<String, String> defaultToMaximized = 55 supplementalDataInfo.getLikelySubtags(); 56 private final Map<String, String> currencyToLikelyTerritory; 57 LikelySubtagsData()58 private LikelySubtagsData() { 59 Map<String, String> _currencyToLikelyTerritory = new HashMap<>(); 60 Date now = new Date(); 61 Set<Row.R2<Double, String>> sorted = new TreeSet<>(); 62 for (String territory : supplementalDataInfo.getTerritoriesWithPopulationData()) { 63 PopulationData pop = supplementalDataInfo.getPopulationDataForTerritory(territory); 64 double population = pop.getPopulation(); 65 sorted.add(Row.of(-population, territory)); 66 } 67 for (R2<Double, String> item : sorted) { 68 String territory = item.get1(); 69 Set<CurrencyDateInfo> targetCurrencyInfo = 70 supplementalDataInfo.getCurrencyDateInfo(territory); 71 if (targetCurrencyInfo == null) { 72 continue; 73 } 74 for (CurrencyDateInfo cdi : targetCurrencyInfo) { 75 String currency = cdi.getCurrency(); 76 if (!_currencyToLikelyTerritory.containsKey(currency) 77 && cdi.getStart().before(now) 78 && cdi.getEnd().after(now) 79 && cdi.isLegalTender()) { 80 _currencyToLikelyTerritory.put(currency, territory); 81 } 82 } 83 } 84 currencyToLikelyTerritory = ImmutableMap.copyOf(_currencyToLikelyTerritory); 85 } 86 } 87 88 /** 89 * Create the likely subtags. 90 * 91 * @param toMaximized 92 */ LikelySubtags()93 public LikelySubtags() { 94 this(null); 95 } 96 isFavorRegion()97 public boolean isFavorRegion() { 98 return favorRegion; 99 } 100 setFavorRegion(boolean favorRegion)101 public LikelySubtags setFavorRegion(boolean favorRegion) { 102 this.favorRegion = favorRegion; 103 return this; 104 } 105 getToMaximized()106 public Map<String, String> getToMaximized() { 107 return toMaximized; 108 } 109 maximize(String languageTag, Map<String, String> toMaximized)110 public static String maximize(String languageTag, Map<String, String> toMaximized) { 111 return new LikelySubtags(toMaximized).maximize(languageTag); 112 } 113 minimize( String input, Map<String, String> toMaximized, boolean favorRegion)114 public static String minimize( 115 String input, Map<String, String> toMaximized, boolean favorRegion) { 116 return new LikelySubtags(toMaximized).setFavorRegion(favorRegion).minimize(input); 117 } 118 119 // TODO Old, crufty code, needs reworking. maximize(String languageTag)120 public synchronized String maximize(String languageTag) { 121 if (languageTag == null) { 122 return null; 123 } 124 LanguageTagParser ltp = new LanguageTagParser(); 125 if (DEBUG && languageTag.equals("es" + TAG_SEPARATOR + "Hans" + TAG_SEPARATOR + "CN")) { 126 System.out.print(""); // debug 127 } 128 // clean up the input by removing Zzzz, ZZ, and changing "" into und. 129 ltp.set(languageTag); 130 return maximize(ltp); 131 } 132 133 /** Maximize to a string (modifying the LanguageTagParser in so doing) */ maximize(LanguageTagParser ltp)134 public String maximize(LanguageTagParser ltp) { 135 if (maximizeInPlace(ltp)) { 136 return ltp.toString(); 137 } else { 138 return null; 139 } 140 } 141 142 /** 143 * Maximize in place, for use when the modified LanguageTagParser is the desired return value 144 */ maximizeInPlace(LanguageTagParser ltp)145 public boolean maximizeInPlace(LanguageTagParser ltp) { 146 String language = ltp.getLanguage(); 147 String region = ltp.getRegion(); 148 String script = ltp.getScript(); 149 List<String> variants = ltp.getVariants(); 150 Map<String, String> extensions = ltp.getExtensions(); 151 Map<String, String> localeExtensions = ltp.getLocaleExtensions(); 152 153 String sourceLanguage = language; 154 String sourceScript = script; 155 String sourceRegion = region; 156 157 if (language.equals("")) { 158 ltp.setLanguage(language = "und"); 159 } 160 if (script.equals("Zzzz")) { 161 ltp.setScript(script = ""); 162 } 163 if (region.equals("ZZ")) { 164 ltp.setRegion(region = ""); 165 } 166 if (variants.size() != 0) { 167 ltp.setVariants(Collections.<String>emptySet()); 168 } 169 if (extensions.size() != 0) { 170 ltp.setExtensions(Collections.<String, String>emptyMap()); 171 } 172 if (localeExtensions.size() != 0) { 173 ltp.setExtensions(Collections.<String, String>emptyMap()); 174 } 175 176 if (!ltp.getLanguage().equals("und") 177 && !ltp.getScript().isEmpty() 178 && !ltp.getRegion().isEmpty()) { 179 return true; 180 } 181 182 // check whole 183 String result = toMaximized.get(ltp.toString()); 184 if (result != null) { 185 ltp.set(result) 186 .setVariants(variants) 187 .setExtensions(extensions) 188 .setLocaleExtensions(localeExtensions); 189 return true; 190 } 191 192 boolean noLanguage = language.equals("und"); 193 boolean noScript = script.isEmpty(); 194 boolean noRegion = region.isEmpty(); 195 196 // not efficient, but simple to match spec. 197 for (int count = 0; ; ++count) { // breaks down below 198 for (String script2 : noScript ? Arrays.asList(script) : Arrays.asList(script, "")) { 199 ltp.setScript(script2); 200 201 for (String region2 : 202 noRegion ? Arrays.asList(region) : Arrays.asList(region, "")) { 203 ltp.setRegion(region2); 204 result = toMaximized.get(ltp.toString()); 205 if (result != null) { 206 ltp.set(result); 207 if (!noLanguage) { 208 ltp.setLanguage(language); 209 } 210 if (!noScript) { 211 ltp.setScript(script); 212 } 213 if (!noRegion) { 214 ltp.setRegion(region); 215 } 216 ltp.setVariants(variants) 217 .setExtensions(extensions) 218 .setLocaleExtensions(localeExtensions); 219 if (count == 1) { 220 System.out.println( 221 "2nd pass, " 222 + new LanguageTagParser() 223 .setLanguage(sourceLanguage) 224 .setScript(sourceScript) 225 .setRegion(sourceRegion) 226 + " ==> " 227 + ltp); 228 } 229 return true; 230 } 231 } 232 } 233 234 if (SKIP_UND || ltp.getLanguage().equals("und")) { 235 break; 236 } else { 237 // Otherwise repeat the loop, trying for und matches 238 ltp.setLanguage("und"); 239 } 240 } 241 return false; // couldn't maximize 242 } 243 244 // TODO, optimize if needed by adding private routine that maximizes a LanguageTagParser instead 245 // of multiple parsings 246 // TODO Old, crufty code, needs reworking. minimize(String input)247 public String minimize(String input) { 248 return minimize(input, OutputOption.ICU_LCVARIANT); 249 } 250 minimize(String input, OutputOption oo)251 public synchronized String minimize(String input, OutputOption oo) { 252 String maximized = maximize(input, toMaximized); 253 if (maximized == null) { 254 return null; 255 } 256 if (DEBUG && maximized.equals("sr" + TAG_SEPARATOR + "Latn" + TAG_SEPARATOR + "RS")) { 257 System.out.print(""); // debug 258 } 259 LanguageTagParser ltp = new LanguageTagParser().set(maximized); 260 String language = ltp.getLanguage(); 261 String region = ltp.getRegion(); 262 String script = ltp.getScript(); 263 264 // handle variants 265 List<String> variants = ltp.getVariants(); 266 Map<String, String> extensions = ltp.getExtensions(); 267 Map<String, String> localeExtensions = ltp.getLocaleExtensions(); 268 269 String maximizedCheck = maximized; 270 if (!variants.isEmpty() || !extensions.isEmpty() || !localeExtensions.isEmpty()) { 271 maximizedCheck = ltp.toLSR(); 272 } 273 // try building up from shorter to longer, and find the first that matches 274 // could be more optimized, but for this code we want simplest 275 String[] trials = { 276 language, 277 language + TAG_SEPARATOR + (favorRegion ? region : script), 278 language + TAG_SEPARATOR + (!favorRegion ? region : script) 279 }; 280 for (String trial : trials) { 281 String newMaximized = maximize(trial, toMaximized); 282 if (maximizedCheck.equals(newMaximized)) { 283 if (variants.isEmpty() && extensions.isEmpty() && localeExtensions.isEmpty()) { 284 return trial; 285 } 286 return ltp.set(trial) 287 .setVariants(variants) 288 .setExtensions(extensions) 289 .setLocaleExtensions(extensions) 290 .toString(oo); 291 } 292 } 293 return maximized; 294 } 295 296 static final Map<String, String> EXTRA_SCRIPTS = 297 Builder.with(new HashMap<String, String>()) 298 .on("crs", "pcm", "tlh") 299 .put("Latn") 300 .freeze(); 301 getLikelyScript(String code)302 public String getLikelyScript(String code) { 303 String max = this.maximize(code); 304 305 String script = null; 306 if (max != null) { 307 script = new LanguageTagParser().set(max).getScript(); 308 } else { 309 Map<Type, BasicLanguageData> data = 310 LikelySubtagsData.getInstance() 311 .supplementalDataInfo 312 .getBasicLanguageDataMap(code); 313 if (data != null) { 314 for (BasicLanguageData item : data.values()) { 315 Set<String> scripts = item.getScripts(); 316 if (scripts == null || scripts.size() == 0) continue; 317 script = scripts.iterator().next(); 318 Type type = item.getType(); 319 if (type == Type.primary) { 320 break; 321 } 322 } 323 } 324 if (script == null) { 325 script = EXTRA_SCRIPTS.get(code); 326 if (script == null) { 327 script = "Zzzz"; 328 } 329 } 330 } 331 return script; 332 } 333 getLikelyTerritoryFromCurrency(String code)334 public String getLikelyTerritoryFromCurrency(String code) { 335 return LikelySubtagsData.getInstance().currencyToLikelyTerritory.get(code); 336 } 337 } 338