1 package org.unicode.cldr.tool; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.collect.ImmutableList; 5 import com.google.common.collect.ImmutableMap; 6 import com.google.common.collect.ImmutableSet; 7 import com.ibm.icu.impl.Relation; 8 import com.ibm.icu.impl.Row; 9 import com.ibm.icu.impl.Row.R2; 10 import com.ibm.icu.impl.Row.R3; 11 import com.ibm.icu.impl.Row.R4; 12 import com.ibm.icu.lang.UScript; 13 import com.ibm.icu.text.Collator; 14 import com.ibm.icu.text.NumberFormat; 15 import com.ibm.icu.text.UTF16; 16 import com.ibm.icu.text.UnicodeSet; 17 import com.ibm.icu.text.UnicodeSetIterator; 18 import com.ibm.icu.util.ULocale; 19 import java.io.BufferedReader; 20 import java.io.File; 21 import java.io.IOException; 22 import java.io.PrintWriter; 23 import java.nio.file.Files; 24 import java.util.Arrays; 25 import java.util.BitSet; 26 import java.util.Collection; 27 import java.util.Comparator; 28 import java.util.HashMap; 29 import java.util.HashSet; 30 import java.util.LinkedHashSet; 31 import java.util.List; 32 import java.util.Map; 33 import java.util.Map.Entry; 34 import java.util.Set; 35 import java.util.TreeMap; 36 import java.util.TreeSet; 37 import org.unicode.cldr.draft.FileUtilities; 38 import org.unicode.cldr.draft.ScriptMetadata; 39 import org.unicode.cldr.draft.ScriptMetadata.Info; 40 import org.unicode.cldr.util.Builder; 41 import org.unicode.cldr.util.CLDRConfig; 42 import org.unicode.cldr.util.CLDRFile; 43 import org.unicode.cldr.util.CLDRLocale; 44 import org.unicode.cldr.util.CLDRPaths; 45 import org.unicode.cldr.util.CldrUtility; 46 import org.unicode.cldr.util.Containment; 47 import org.unicode.cldr.util.Counter; 48 import org.unicode.cldr.util.Factory; 49 import org.unicode.cldr.util.Iso3166Data; 50 import org.unicode.cldr.util.Iso639Data; 51 import org.unicode.cldr.util.Iso639Data.Scope; 52 import org.unicode.cldr.util.LanguageTagParser; 53 import org.unicode.cldr.util.LocaleIDParser; 54 import org.unicode.cldr.util.LocaleNames; 55 import org.unicode.cldr.util.Organization; 56 import org.unicode.cldr.util.PatternCache; 57 import org.unicode.cldr.util.SimpleFactory; 58 import org.unicode.cldr.util.StandardCodes; 59 import org.unicode.cldr.util.StandardCodes.LstrType; 60 import org.unicode.cldr.util.SupplementalDataInfo; 61 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData; 62 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type; 63 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus; 64 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 65 import org.unicode.cldr.util.Validity; 66 import org.unicode.cldr.util.Validity.Status; 67 68 /** 69 * Problems: "und_Hani", "zh_Hani" "und_Sinh", "si_Sinh" 70 * 71 * @author markdavis 72 */ 73 public class GenerateMaximalLocales { 74 75 private static final Map<String, Status> LANGUAGE_CODE_TO_STATUS = 76 Validity.getInstance().getCodeToStatus(LstrType.language); 77 78 private static final String TEMP_UNKNOWN_REGION = "XZ"; 79 80 private static final String DEBUG_ADD_KEY = "und_Latn_ZA"; 81 82 private static final boolean SHOW_ADD = 83 CldrUtility.getProperty("GenerateMaximalLocalesDebug", false); 84 private static final boolean SUPPRESS_CHANGES = 85 CldrUtility.getProperty("GenerateMaximalLocalesSuppress", false); 86 private static final boolean SHOW_CONTAINERS = false; 87 88 private static final boolean SHOW_ALL_LANGUAGE_CODES = false; 89 private static final boolean SHOW_DETAILED = false; 90 private static final boolean SHOW_INCLUDED_EXCLUDED = false; 91 92 enum OutputStyle { 93 PLAINTEXT, 94 C, 95 C_ALT, 96 XML 97 } 98 99 private static OutputStyle OUTPUT_STYLE = 100 OutputStyle.valueOf(CldrUtility.getProperty("OutputStyle", "XML", "XML").toUpperCase()); 101 102 // set based on above 103 private static final String SEPARATOR = 104 OUTPUT_STYLE == OutputStyle.C || OUTPUT_STYLE == OutputStyle.C_ALT 105 ? CldrUtility.LINE_SEPARATOR 106 : "\t"; 107 private static final String TAG_SEPARATOR = OUTPUT_STYLE == OutputStyle.C_ALT ? "-" : "_"; 108 // private static final boolean FAVOR_REGION = true; // OUTPUT_STYLE == OutputStyle.C_ALT; 109 110 private static final boolean tryDifferent = true; 111 112 private static final File list[] = { 113 new File(CLDRPaths.MAIN_DIRECTORY), 114 new File(CLDRPaths.SEED_DIRECTORY), 115 new File(CLDRPaths.EXEMPLARS_DIRECTORY) 116 }; 117 118 private static Factory factory = SimpleFactory.make(list, ".*"); 119 private static Factory mainFactory = CLDRConfig.getInstance().getCldrFactory(); 120 private static SupplementalDataInfo supplementalData = 121 SupplementalDataInfo.getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY); 122 private static StandardCodes standardCodes = StandardCodes.make(); 123 private static CLDRFile english = factory.make("en", false); 124 static Relation<String, String> cldrContainerToLanguages = 125 Relation.of(new HashMap<String, Set<String>>(), HashSet.class); 126 127 static { 128 for (CLDRLocale locale : 129 ToolConfig.getToolInstance().getCldrFactory().getAvailableCLDRLocales()) { 130 String region = locale.getCountry(); 131 if (region == null || region.isEmpty() || Containment.isLeaf(region)) { 132 continue; 133 } cldrContainerToLanguages.put(region, locale.getLanguage())134 cldrContainerToLanguages.put(region, locale.getLanguage()); 135 } cldrContainerToLanguages.freeze()136 cldrContainerToLanguages.freeze(); 137 System.out.println("Keep containers " + cldrContainerToLanguages); 138 } 139 140 private static final List<String> KEEP_TARGETS = 141 Arrays.asList("und_Arab_PK", "und_Latn_ET", "hi_Latn"); 142 private static final ImmutableSet<String> deprecatedISONotInLST = ImmutableSet.of("scc", "scr"); 143 144 /** 145 * This is the simplest way to override, by supplying the max value. It gets a very low weight, 146 * so doesn't override any stronger value. 147 */ 148 private static final String[] MAX_ADDITIONS = 149 new String[] { 150 "bss_Latn_CM", 151 "gez_Ethi_ET", 152 "ken_Latn_CM", 153 "und_Arab_PK", 154 "wa_Latn_BE", 155 "fub_Arab_CM", 156 "fuf_Latn_GN", 157 "kby_Arab_NE", 158 "kdh_Latn_TG", 159 "apd_Arab_TG", 160 "zlm_Latn_TG", 161 "cr_Cans_CA", 162 "hif_Latn_FJ", 163 "gon_Telu_IN", 164 "lzz_Latn_TR", 165 "lif_Deva_NP", 166 "unx_Beng_IN", 167 "unr_Beng_IN", 168 "ttt_Latn_AZ", 169 "pnt_Grek_GR", 170 "tly_Latn_AZ", 171 "tkr_Latn_AZ", 172 "bsq_Bass_LR", 173 "ccp_Cakm_BD", 174 "blt_Tavt_VN", 175 "rhg_Arab_MM", 176 "rhg_Rohg_MM", 177 "clc_Latn_CA", 178 "crg_Latn_CA", 179 "hur_Latn_CA", 180 "kwk_Latn_CA", 181 "lil_Latn_CA", 182 "ojs_Cans_CA", 183 "oka_Latn_CA", 184 "pqm_Latn_CA", 185 "hi_Latn_IN", 186 "no_Latn_NO", 187 "tok_Latn_001", 188 "prg_Latn_PL", 189 "ie_Latn_EE", 190 }; 191 192 /** 193 * The following overrides MASH the final values, so they may not result in consistent results. 194 * Safer is to add to MAX_ADDITIONS. However, if you add, add both the language and 195 * language+script mappings. 196 */ 197 // Many of the overrides below can be removed once the language/pop/country data is updated. 198 private static final Map<String, String> LANGUAGE_OVERRIDES = 199 CldrUtility.asMap( 200 new String[][] { 201 {"cic", "cic_Latn_US"}, 202 {"cic_Latn", "cic_Latn_US"}, 203 {"eo", "eo_Latn_001"}, 204 {"eo_Latn", "eo_Latn_001"}, 205 {"es", "es_Latn_ES"}, 206 {"es_Latn", "es_Latn_ES"}, 207 {"ff_BF", "ff_Latn_BF"}, 208 {"ff_GM", "ff_Latn_GM"}, 209 {"ff_GH", "ff_Latn_GH"}, 210 {"ff_GW", "ff_Latn_GW"}, 211 {"ff_LR", "ff_Latn_LR"}, 212 {"ff_NE", "ff_Latn_NE"}, 213 {"ff_NG", "ff_Latn_NG"}, 214 {"ff_SL", "ff_Latn_SL"}, 215 {"ff_Adlm", "ff_Adlm_GN"}, 216 {"ia", "ia_Latn_001"}, 217 {"ia_Latn", "ia_Latn_001"}, 218 {"io", "io_Latn_001"}, 219 {"io_Latn", "io_Latn_001"}, 220 {"jbo", "jbo_Latn_001"}, 221 {"jbo_Latn", "jbo_Latn_001"}, 222 {"ku_Arab", "ku_Arab_IQ"}, 223 {"lrc", "lrc_Arab_IR"}, 224 {"lrc_Arab", "lrc_Arab_IR"}, 225 {"man", "man_Latn_GM"}, 226 {"man_Latn", "man_Latn_GM"}, 227 {"mas", "mas_Latn_KE"}, 228 {"mas_Latn", "mas_Latn_KE"}, 229 {"mn", "mn_Cyrl_MN"}, 230 {"mn_Cyrl", "mn_Cyrl_MN"}, 231 {"mro", "mro_Mroo_BD"}, 232 {"mro_BD", "mro_Mroo_BD"}, 233 {"ms_Arab", "ms_Arab_MY"}, 234 {"pap", "pap_Latn_CW"}, 235 {"pap_Latn", "pap_Latn_CW"}, 236 { 237 "rif", "rif_Latn_MA" 238 }, // https://unicode-org.atlassian.net/browse/CLDR-14962?focusedCommentId=165053 239 {"rif_Latn", "rif_Latn_MA"}, 240 {"rif_Tfng", "rif_Tfng_MA"}, 241 {"rif_MA", "rif_Latn_MA"}, // Ibid 242 {"shi", "shi_Tfng_MA"}, 243 {"shi_Tfng", "shi_Tfng_MA"}, 244 {"shi_MA", "shi_Tfng_MA"}, 245 {"sr_Latn", "sr_Latn_RS"}, 246 {"ss", "ss_Latn_ZA"}, 247 {"ss_Latn", "ss_Latn_ZA"}, 248 {"swc", "swc_Latn_CD"}, 249 {"ti", "ti_Ethi_ET"}, 250 {"ti_Ethi", "ti_Ethi_ET"}, 251 {LocaleNames.UND, "en_Latn_US"}, 252 {"und_Adlm", "ff_Adlm_GN"}, 253 {"und_Adlm_GN", "ff_Adlm_GN"}, 254 {"und_Arab", "ar_Arab_EG"}, 255 {"und_Arab_PK", "ur_Arab_PK"}, 256 {"und_Bopo", "zh_Bopo_TW"}, 257 {"und_Deva_FJ", "hif_Deva_FJ"}, 258 {"und_EZ", "de_Latn_EZ"}, 259 {"und_Hani", "zh_Hani_CN"}, 260 {"und_Hani_CN", "zh_Hani_CN"}, 261 {"und_Kana", "ja_Kana_JP"}, 262 {"und_Kana_JP", "ja_Kana_JP"}, 263 {"und_Latn", "en_Latn_US"}, 264 {"und_001", "en_Latn_US"}, // to not be overridden by tok_Latn_001 265 {"und_Latn_001", "en_Latn_US"}, // to not be overridden by tok_Latn_001 266 {"und_Latn_ET", "en_Latn_ET"}, 267 {"und_Latn_NE", "ha_Latn_NE"}, 268 {"und_Latn_PH", "fil_Latn_PH"}, 269 {"und_ML", "bm_Latn_ML"}, 270 {"und_Latn_ML", "bm_Latn_ML"}, 271 {"und_MU", "mfe_Latn_MU"}, 272 {"und_NE", "ha_Latn_NE"}, 273 {"und_PH", "fil_Latn_PH"}, 274 {"und_PK", "ur_Arab_PK"}, 275 {"und_SO", "so_Latn_SO"}, 276 {"und_SS", "en_Latn_SS"}, 277 {"und_TK", "tkl_Latn_TK"}, 278 {"und_UN", "en_Latn_UN"}, 279 {"und_005", "pt_Latn_BR"}, 280 {"vo", "vo_Latn_001"}, 281 {"vo_Latn", "vo_Latn_001"}, 282 {"yi", "yi_Hebr_001"}, 283 {"yi_Hebr", "yi_Hebr_001"}, 284 {"yue", "yue_Hant_HK"}, 285 {"yue_Hant", "yue_Hant_HK"}, 286 {"yue_Hans", "yue_Hans_CN"}, 287 {"yue_CN", "yue_Hans_CN"}, 288 {"zh_Hani", "zh_Hani_CN"}, 289 {"zh_Bopo", "zh_Bopo_TW"}, 290 {"ccp", "ccp_Cakm_BD"}, 291 {"ccp_Cakm", "ccp_Cakm_BD"}, 292 {"und_Cakm", "ccp_Cakm_BD"}, 293 {"cu_Glag", "cu_Glag_BG"}, 294 {"sd_Khoj", "sd_Khoj_IN"}, 295 {"lif_Limb", "lif_Limb_IN"}, 296 {"grc_Linb", "grc_Linb_GR"}, 297 {"arc_Nbat", "arc_Nbat_JO"}, 298 {"arc_Palm", "arc_Palm_SY"}, 299 {"pal_Phlp", "pal_Phlp_CN"}, 300 {"en_Shaw", "en_Shaw_GB"}, 301 {"sd_Sind", "sd_Sind_IN"}, 302 {"und_Brai", "fr_Brai_FR"}, // hack 303 {"und_Hanb", "zh_Hanb_TW"}, // Special script code 304 {"zh_Hanb", "zh_Hanb_TW"}, // Special script code 305 {"und_Jamo", "ko_Jamo_KR"}, // Special script code 306 307 // {"und_Cyrl_PL", "be_Cyrl_PL"}, 308 309 // {"cr", "cr_Cans_CA"}, 310 // {"hif", "hif_Latn_FJ"}, 311 // {"gon", "gon_Telu_IN"}, 312 // {"lzz", "lzz_Latn_TR"}, 313 // {"lif", "lif_Deva_NP"}, 314 // {"unx", "unx_Beng_IN"}, 315 // {"unr", "unr_Beng_IN"}, 316 // {"ttt", "ttt_Latn_AZ"}, 317 // {"pnt", "pnt_Grek_GR"}, 318 // {"tly", "tly_Latn_AZ"}, 319 // {"tkr", "tkr_Latn_AZ"}, 320 // {"bsq", "bsq_Bass_LR"}, 321 // {"ccp", "ccp_Cakm_BD"}, 322 // {"blt", "blt_Tavt_VN"}, 323 // { "mis_Medf", "mis_Medf_NG" }, 324 325 {"ku_Yezi", "ku_Yezi_GE"}, 326 {"und_EU", "en_Latn_IE"}, 327 {"hnj", "hnj_Hmnp_US"}, // preferred lang/script in CLDR 328 {"hnj_Hmnp", "hnj_Hmnp_US"}, 329 {"und_Hmnp", "hnj_Hmnp_US"}, 330 {"rhg", "rhg_Rohg_MM"}, // preferred lang/script in CLDR 331 {"rhg_Arab", "rhg_Arab_MM"}, 332 {"und_Arab_MM", "rhg_Arab_MM"}, 333 {"sd_IN", "sd_Deva_IN"}, // preferred in CLDR 334 // { "sd_Deva", "sd_Deva_IN"}, 335 {"und_Cpmn", "und_Cpmn_CY"}, 336 {"oc_ES", "oc_Latn_ES"}, 337 {"os", "os_Cyrl_GE"}, 338 {"os_Cyrl", "os_Cyrl_GE"}, 339 }); 340 341 /** 342 * The following supplements the suppress-script. It overrides info from exemplars and the 343 * locale info. 344 */ 345 private static String[][] SpecialScripts = { 346 {"zh", "Hans"}, // Hans (not Hani) 347 {"yue", "Hant"}, // Hans (not Hani) 348 {"chk", "Latn"}, // Chuukese (Micronesia) 349 {"fil", "Latn"}, // Filipino (Philippines)" 350 {"ko", "Kore"}, // Korean (North Korea) 351 {"ko_KR", "Kore"}, // Korean (North Korea) 352 {"pap", "Latn"}, // Papiamento (Netherlands Antilles) 353 {"pau", "Latn"}, // Palauan (Palau) 354 {"su", "Latn"}, // Sundanese (Indonesia) 355 {"tet", "Latn"}, // Tetum (East Timor) 356 {"tk", "Latn"}, // Turkmen (Turkmenistan) 357 {"ty", "Latn"}, // Tahitian (French Polynesia) 358 {"ja", "Jpan"}, // Special script for japan 359 {LocaleNames.UND, "Latn"}, // Ultimate fallback 360 }; 361 362 private static Map<String, String> localeToScriptCache = new TreeMap<>(); 363 364 static { 365 for (String language : standardCodes.getAvailableCodes("language")) { 366 Map<String, String> info = standardCodes.getLangData("language", language); 367 String script = info.get("Suppress-Script"); 368 if (script != null) { localeToScriptCache.put(language, script)369 localeToScriptCache.put(language, script); 370 } 371 } 372 for (String[] pair : SpecialScripts) { localeToScriptCache.put(pair[0], pair[1])373 localeToScriptCache.put(pair[0], pair[1]); 374 } 375 } 376 377 private static Map<String, String> FALLBACK_SCRIPTS; 378 379 static { 380 LanguageTagParser additionLtp = new LanguageTagParser(); 381 Map<String, String> _FALLBACK_SCRIPTS = new TreeMap<>(); 382 for (String addition : MAX_ADDITIONS) { 383 additionLtp.set(addition); 384 String lan = additionLtp.getLanguage(); _FALLBACK_SCRIPTS.put(lan, additionLtp.getScript())385 _FALLBACK_SCRIPTS.put(lan, additionLtp.getScript()); 386 } 387 FALLBACK_SCRIPTS = ImmutableMap.copyOf(_FALLBACK_SCRIPTS); 388 } 389 390 private static int errorCount; 391 main(String[] args)392 public static void main(String[] args) throws IOException { 393 if (true) { 394 throw new IllegalArgumentException("Don't run this tool until it is fixed"); 395 } 396 397 printDefaultLanguagesAndScripts(); 398 399 Map<String, String> toMaximized = new TreeMap<>(); 400 401 tryDifferentAlgorithm(toMaximized); 402 403 minimize(toMaximized); 404 405 // HACK TEMP_UNKNOWN_REGION 406 // this is to get around the removal of items with ZZ in minimize. 407 // probably cleaner way to do it, but this provides control over just those we want to 408 // retain. 409 Set<String> toRemove = new TreeSet<>(); 410 Map<String, String> toFix = new TreeMap<>(); 411 for (Entry<String, String> entry : toMaximized.entrySet()) { 412 String key = entry.getKey(); 413 String value = entry.getValue(); 414 if (key.contains(TEMP_UNKNOWN_REGION)) { 415 toRemove.add(key); 416 } else if (value.contains(TEMP_UNKNOWN_REGION)) { 417 toFix.put(key, value.replace(TEMP_UNKNOWN_REGION, UNKNOWN_REGION)); 418 } 419 } 420 for (String key : toRemove) { 421 toMaximized.remove(key); 422 } 423 toMaximized.putAll(toFix); 424 425 Map<String, String> oldLikely = SupplementalDataInfo.getInstance().getLikelySubtags(); 426 Set<String> changes = 427 compareMapsAndFixNew( 428 "*WARNING* Likely Subtags: ", 429 oldLikely, 430 toMaximized, 431 "ms_Arab", 432 "ms_Arab_ID"); 433 System.out.println(Joiner.on("\n").join(changes)); 434 435 if (OUTPUT_STYLE == OutputStyle.C_ALT) { 436 doAlt(toMaximized); 437 } 438 439 if (SHOW_ADD) 440 System.out.println( 441 "/*" 442 + CldrUtility.LINE_SEPARATOR 443 + " To Maximize:" 444 + CldrUtility.LINE_SEPARATOR 445 + " If using raw strings, make sure the input language/locale uses the right separator, and has the right casing." 446 + CldrUtility.LINE_SEPARATOR 447 + " Remove the script Zzzz and the region ZZ if they occur; change an empty language subtag to 'und'." 448 + CldrUtility.LINE_SEPARATOR 449 + " Get the language, region, and script from the cleaned-up tag, plus any variants/extensions" 450 + CldrUtility.LINE_SEPARATOR 451 + " Try each of the following in order (where the field exists)" 452 + CldrUtility.LINE_SEPARATOR 453 + " Lookup language-script-region. If in the table, return the result + variants" 454 + CldrUtility.LINE_SEPARATOR 455 + " Lookup language-script. If in the table, return the result (substituting the original region if it exists) + variants" 456 + CldrUtility.LINE_SEPARATOR 457 + " Lookup language-region. If in the table, return the result (substituting the original script if it exists) + variants" 458 + CldrUtility.LINE_SEPARATOR 459 + " Lookup language. If in the table, return the result (substituting the original region and script if either or both exist) + variants" 460 + CldrUtility.LINE_SEPARATOR 461 + CldrUtility.LINE_SEPARATOR 462 + " Example: Input is zh-ZZZZ-SG." 463 + CldrUtility.LINE_SEPARATOR 464 + " Normalize to zh-SG. Lookup in table. No match." 465 + CldrUtility.LINE_SEPARATOR 466 + " Remove SG, but remember it. Lookup zh, and get the match (zh-Hans-CN). Substitute SG, and return zh-Hans-SG." 467 + CldrUtility.LINE_SEPARATOR 468 + CldrUtility.LINE_SEPARATOR 469 + " To Minimize:" 470 + CldrUtility.LINE_SEPARATOR 471 + " First get max = maximize(input)." 472 + CldrUtility.LINE_SEPARATOR 473 + " Then for trial in {language, language-region, language-script}" 474 + CldrUtility.LINE_SEPARATOR 475 + " If maximize(trial) == max, then return trial." 476 + CldrUtility.LINE_SEPARATOR 477 + " If you don't get a match, return max." 478 + CldrUtility.LINE_SEPARATOR 479 + CldrUtility.LINE_SEPARATOR 480 + " Example: Input is zh-Hant. Maximize to get zh-Hant-TW." 481 + CldrUtility.LINE_SEPARATOR 482 + " zh => zh-Hans-CN. No match, so continue." 483 + CldrUtility.LINE_SEPARATOR 484 + " zh-TW => zh-Hans-TW. Match, so return zh-TW." 485 + CldrUtility.LINE_SEPARATOR 486 + CldrUtility.LINE_SEPARATOR 487 + " (A variant of this uses {language, language-script, language-region}): that is, tries script before language." 488 + CldrUtility.LINE_SEPARATOR 489 + " toMaximal size:\t" 490 + toMaximized.size() 491 + CldrUtility.LINE_SEPARATOR 492 + "*/"); 493 494 final File newLikelySubtags = printLikelySubtags(toMaximized); 495 496 printDefaultContent(toMaximized); 497 498 // Do this here so the two "Copying…" messages show up together. 499 if (OUTPUT_STYLE == OutputStyle.XML) { 500 final File oldLikelySubtags = 501 CLDRConfig.getInstance().getEnglish().getSupplementalFile("likelySubtags.xml"); 502 System.out.println("Copying " + newLikelySubtags + " to " + oldLikelySubtags); 503 oldLikelySubtags.delete(); 504 Files.copy(newLikelySubtags.toPath(), oldLikelySubtags.toPath()); 505 System.err.println("TODO: Please revert removal of 'sil1' entries, see CLDR-16380"); 506 } 507 508 System.out.println( 509 CldrUtility.LINE_SEPARATOR + "ERRORS:\t" + errorCount + CldrUtility.LINE_SEPARATOR); 510 511 System.exit(errorCount > 0 ? 1 : 0); 512 } 513 514 static class RowData implements Comparable<RowData> { 515 OfficialStatus os; 516 String name; 517 Long pop; 518 RowData(OfficialStatus os, String name, Long pop)519 public RowData(OfficialStatus os, String name, Long pop) { 520 this.os = os; 521 this.name = name; 522 this.pop = pop; 523 } 524 getStatus()525 public OfficialStatus getStatus() { 526 // TODO Auto-generated method stub 527 return os; 528 } 529 getName()530 public CharSequence getName() { 531 // TODO Auto-generated method stub 532 return name; 533 } 534 getLiteratePopulation()535 public Long getLiteratePopulation() { 536 // TODO Auto-generated method stub 537 return pop; 538 } 539 540 @Override compareTo(RowData o)541 public int compareTo(RowData o) { 542 // TODO Auto-generated method stub 543 int result = os.compareTo(o.os); 544 if (result != 0) return -result; 545 long result2 = pop - o.pop; 546 if (result2 != 0) return result2 < 0 ? 1 : -1; 547 return name.compareTo(o.name); 548 } 549 550 @Override equals(Object o)551 public boolean equals(Object o) { 552 return 0 == compareTo((RowData) o); 553 } 554 555 @Override hashCode()556 public int hashCode() { 557 throw new UnsupportedOperationException(); 558 } 559 } 560 printDefaultLanguagesAndScripts()561 private static void printDefaultLanguagesAndScripts() { 562 563 final int minTotalPopulation = 10000000; 564 final int minTerritoryPopulation = 1000000; 565 final double minTerritoryPercent = 1.0 / 3; 566 Map<String, Set<RowData>> languageToReason = new TreeMap<>(); 567 Counter<String> languageToLiteratePopulation = new Counter<>(); 568 NumberFormat nf = NumberFormat.getIntegerInstance(ULocale.ENGLISH); 569 nf.setGroupingUsed(true); 570 LanguageTagParser ltp = new LanguageTagParser(); 571 LikelySubtags likelySubtags = new LikelySubtags(); 572 /* 573 * A. X is a qualified language**, and at least one of the following is true: 574 * 575 * 1. X is has official status* in any country 576 * 2. X exceeds a threshold population† of literate users worldwide: 1M 577 * 3. X exceeds a threshold population† in some country Z: 100K and 20% of Z's population†. 578 * 579 * B. X is an exception explicitly approved by the committee or X has minimal 580 * language coverage‡ in CLDR itself. 581 * C. The language is in the CLDR-target locales 582 */ 583 OfficialStatus minimalStatus = 584 OfficialStatus.official_regional; // OfficialStatus.de_facto_official; 585 Map<String, String> languages = new TreeMap<>(); 586 for (String language : standardCodes.getAvailableCodes("language")) { 587 String path = CLDRFile.getKey(CLDRFile.LANGUAGE_NAME, language); 588 String result = english.getStringValue(path); 589 if (result != null) { 590 languages.put(language, result); 591 } 592 } 593 594 if (SHOW_ALL_LANGUAGE_CODES) { 595 for (String language : languages.keySet()) { 596 System.out.println(language + "\t" + languages.get(language)); 597 } 598 } else { 599 System.out.println( 600 "- GenerateMaximalLocales.java: SHOW_ALL_LANGUAGE_CODES=true to show all language codes"); 601 } 602 603 // also CLDR-target locales 604 final Set<String> CLDRMainLanguages = 605 new TreeSet<>(StandardCodes.make().getLocaleCoverageLocales(Organization.cldr)); 606 607 for (String territory : supplementalData.getTerritoriesWithPopulationData()) { 608 if (Iso3166Data.isRegionCodeNotForTranslation(territory)) { 609 System.out.println( 610 "Iso3166Data.isRegionCodeNotForTranslation(" 611 + territory 612 + ") true, skipping"); 613 continue; 614 } 615 PopulationData territoryPop = supplementalData.getPopulationDataForTerritory(territory); 616 double territoryPopulation = territoryPop.getLiteratePopulation(); 617 for (String languageScript : 618 supplementalData.getLanguagesForTerritoryWithPopulationData(territory)) { 619 PopulationData popData = 620 supplementalData.getLanguageAndTerritoryPopulationData( 621 languageScript, territory); 622 ltp.set(languageScript); 623 String language = ltp.getLanguage(); 624 // if (ltp.getScript().isEmpty()) { 625 // String max = likelySubtags.maximize(languageScript); 626 // if (max != null) { 627 // ltp.set(max).setRegion(""); 628 // languageScript = ltp.toString(); 629 // } 630 // } 631 boolean add = false; 632 // #1 633 OfficialStatus status = popData.getOfficialStatus(); 634 if (status.compareTo(minimalStatus) >= 0) { 635 add = true; 636 } 637 long literatePopulation = getWritingPopulation(popData); 638 // #2 639 languageToLiteratePopulation.add(language, literatePopulation); 640 // #3 641 if (literatePopulation > minTerritoryPopulation 642 && literatePopulation > minTerritoryPercent * territoryPopulation) { 643 add = true; 644 } 645 if (add == false && CLDRMainLanguages.contains(language)) { 646 add = true; 647 } 648 if (add) { 649 add(languageToReason, language, territory, status, literatePopulation); 650 Set<String> containers = Containment.leafToContainer(territory); 651 if (containers == null) { 652 throw new NullPointerException( 653 "Containment.leafToContainer(" + territory + ") is null"); 654 } 655 // Add the containing regions 656 for (String container : containers) { 657 add( 658 languageToReason, 659 language, 660 container, 661 OfficialStatus.unknown, 662 literatePopulation); 663 } 664 } 665 } 666 } 667 // #2, now that we have the data 668 for (String language : languageToLiteratePopulation.keySet()) { 669 long totalPop = languageToLiteratePopulation.getCount(language); 670 if (totalPop > minTotalPopulation) { 671 add(languageToReason, language, "001", OfficialStatus.unknown, totalPop); 672 } 673 } 674 675 // Specials 676 add(languageToReason, LocaleNames.UND, "001", OfficialStatus.unknown, 0); 677 678 // for (String language : Iso639Data.getAvailable()) { 679 // Scope scope = Iso639Data.getScope(language); 680 // Type type = Iso639Data.getType(language); 681 // if (scope == Scope.Special) { 682 // add(languageToReason, language, "001", OfficialStatus.unknown, -1); 683 // } 684 // } 685 // print them 686 687 System.out.println("Detailed - Including:\t" + languageToReason.size()); 688 689 if (!SHOW_DETAILED) { 690 System.out.println( 691 "- GenerateMaximalLocales.java: SHOW_DETAILED=true to show more details"); 692 } else { 693 for (String language : languageToReason.keySet()) { 694 Set<RowData> reasons = languageToReason.get(language); 695 696 RowData lastReason = reasons.iterator().next(); 697 698 System.out 699 .append(language) 700 .append("\t") 701 .append(english.getName(language)) 702 .append("\t") 703 .append(lastReason.getStatus().toShortString()) 704 .append("\t") 705 .append(nf.format(languageToLiteratePopulation.getCount(language))); 706 for (RowData reason : reasons) { 707 String status = reason.getStatus().toShortString(); 708 System.out 709 .append("\t") 710 .append(status) 711 .append("-") 712 .append(reason.getName()) 713 .append("-") 714 .append(nf.format(reason.getLiteratePopulation())); 715 } 716 System.out.append("\n"); 717 } 718 } 719 720 // now list them 721 722 Set<String> others = new TreeSet<>(); 723 others.addAll(standardCodes.getGoodAvailableCodes("language")); 724 others.removeAll(languageToReason.keySet()); 725 System.out.println("\nIncluded Languages:\t" + languageToReason.keySet().size()); 726 if (SHOW_INCLUDED_EXCLUDED) { 727 showLanguages(languageToReason.keySet(), languageToReason); 728 } 729 System.out.println("\nExcluded Languages:\t" + others.size()); 730 if (SHOW_INCLUDED_EXCLUDED) { 731 showLanguages(others, languageToReason); 732 } else { 733 System.out.println( 734 " - GenerateMaximalLocales.java: set SHOW_INCLUDED_EXCLUDED=true to show reason details"); 735 } 736 } 737 getWritingPopulation(PopulationData popData)738 private static long getWritingPopulation(PopulationData popData) { 739 final double writingPopulation = popData.getWritingPopulation(); 740 if (!Double.isNaN(writingPopulation)) { 741 return (long) writingPopulation; 742 } 743 return (long) popData.getLiteratePopulation(); 744 } 745 showLanguages( Set<String> others, Map<String, Set<RowData>> languageToReason)746 private static void showLanguages( 747 Set<String> others, Map<String, Set<RowData>> languageToReason) { 748 Set<String> sorted = new TreeSet<>(Collator.getInstance(ULocale.ENGLISH)); 749 for (String language : others) { 750 sorted.add(getLanguageName(language, languageToReason)); 751 } 752 char last = 0; 753 for (String language : sorted) { 754 final char curr = language.charAt(0); 755 if (last != curr) { 756 System.out.println(); 757 } else if (last != '\u0000') { 758 System.out.print(", "); 759 } 760 System.out.print(language); 761 last = curr; 762 } 763 System.out.println(); 764 } 765 getLanguageName( String language, Map<String, Set<RowData>> languageToReason)766 private static String getLanguageName( 767 String language, Map<String, Set<RowData>> languageToReason) { 768 OfficialStatus best = OfficialStatus.unknown; 769 Set<RowData> reasons = languageToReason.get(language); 770 if (reasons != null) { 771 for (RowData reason : reasons) { 772 final OfficialStatus currentStatus = reason.getStatus(); 773 if (best.compareTo(currentStatus) < 0) { 774 best = currentStatus; 775 } 776 } 777 } 778 String status = best.toShortString(); 779 Scope scope = Iso639Data.getScope(language); 780 if (scope == Scope.Special) { 781 status = "S"; 782 } 783 String languageFormatted = english.getName(language) + " [" + language + "]-" + status; 784 return languageFormatted; 785 } 786 add( Map<String, Set<RowData>> languageToReason, String language, String territoryRaw, OfficialStatus status, long population)787 private static void add( 788 Map<String, Set<RowData>> languageToReason, 789 String language, 790 String territoryRaw, 791 OfficialStatus status, 792 long population) { 793 String territory = english.getName("territory", territoryRaw) + " [" + territoryRaw + "]"; 794 Set<RowData> set = languageToReason.get(language); 795 if (set == null) { 796 languageToReason.put(language, set = new TreeSet<>()); 797 } 798 set.add(new RowData(status, territory, population)); 799 } 800 801 /** In computing the defaultContents, no and nb require special handling. */ 802 static final Map<String, String> SPECIAL_CHILD_TO_PARENT = 803 ImmutableMap.of("nb", "no", "nb_NO", "nb"); 804 805 /* 806 * Compute the defaultContent values for supplemental data. 807 * It uses the maximization data and the simpleParent (truncation). 808 * We can't use the normal "getParent" because that messes up the logic 809 * used to handle inconsistencies in scripts in CLDR.<br> 810 * That is, there are three situations: <ul> 811 * <li>all children have explicit scripts; </li> 812 * <li>no children have scripts; and </li> 813 * <li>some do and some don't</li></ul> 814 */ 815 printDefaultContent(Map<String, String> toMaximized)816 private static void printDefaultContent(Map<String, String> toMaximized) throws IOException { 817 818 Set<String> defaultLocaleContent = new TreeSet<>(); 819 820 // go through all the cldr locales, and add default contents 821 // now computed from toMaximized 822 Set<String> available = factory.getAvailable(); 823 Relation<String, String> toSimpleChildren = 824 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 825 LanguageTagParser ltp = new LanguageTagParser(); 826 827 // System.out.println(maximize("az_Latn_AZ", toMaximized)); 828 Set<String> hasSimpleChildWithScript = new TreeSet<>(); 829 830 // first get a mapping to children 831 for (String locale : available) { 832 if (locale.equals(LocaleNames.ROOT)) { 833 continue; 834 } 835 if (ltp.set(locale).getVariants().size() != 0) { 836 continue; 837 } 838 String parent = SPECIAL_CHILD_TO_PARENT.get(locale); 839 if (parent == null) { 840 parent = 841 LocaleIDParser.getSimpleParent( 842 locale); // we can't use the regular getParent (see above) 843 } 844 845 if (ltp.getScript().length() != 0) { 846 hasSimpleChildWithScript.add(parent); 847 } 848 if (parent.equals(LocaleNames.ROOT)) { 849 continue; 850 } 851 toSimpleChildren.put(parent, locale); 852 } 853 854 // Suppress script for locales for which we only have one locale in common/main. See ticket 855 // #7834. 856 Set<String> suppressScriptLocales = 857 new HashSet<>( 858 Arrays.asList( 859 "bm_ML", "en_US", "ha_NG", "iu_CA", "ms_MY", "mn_MN", "byn_ER", 860 "ff_SN", "dyo_SN", "kk_KZ", "ku_TR", "ky_KG", "ml_IN", "so_SO", 861 "sw_TZ", "wo_SN", "yo_NG", "dje_NE", "blt_VN", "hi_IN", "nv_US", 862 "doi_IN")); 863 864 // if any have a script, then throw out any that don't have a script (unless they're 865 // specifically included.) 866 Set<String> toRemove = new TreeSet<>(); 867 for (String locale : hasSimpleChildWithScript) { 868 toRemove.clear(); 869 Set<String> children = toSimpleChildren.getAll(locale); 870 for (String child : children) { 871 if (ltp.set(child).getScript().length() == 0 872 && !suppressScriptLocales.contains(child)) { 873 toRemove.add(child); 874 } 875 } 876 if (toRemove.size() != 0) { 877 System.out.println( 878 "\tRemoving:\t" + locale + "\t" + toRemove + "\tfrom\t" + children); 879 toSimpleChildren.removeAll(locale, toRemove); 880 } 881 } 882 883 // we add a child as a default locale if it has the same maximization 884 main: 885 for (String locale : toSimpleChildren.keySet()) { 886 String maximized = maximize(locale, toMaximized); 887 if (maximized == null) { 888 if (SHOW_ADD) System.out.println("Missing maximized:\t" + locale); 889 continue; 890 } 891 Set<String> children = toSimpleChildren.getAll(locale); 892 Map<String, String> debugStuff = new TreeMap<>(); 893 for (String child : children) { 894 String maximizedChild = maximize(child, toMaximized); 895 if (maximized.equals(maximizedChild)) { 896 defaultLocaleContent.add(child); 897 continue main; 898 } 899 debugStuff.put(child, maximizedChild); 900 } 901 if (SHOW_ADD) 902 System.out.println( 903 "Can't find maximized: " 904 + locale 905 + "=" 906 + maximized 907 + "\tin\t" 908 + debugStuff); 909 } 910 911 for (String specialChild : SPECIAL_CHILD_TO_PARENT.keySet()) { 912 defaultLocaleContent.add(specialChild); 913 } 914 defaultLocaleContent.remove("und_ZZ"); // und_ZZ isn't ever a real locale. (old sandbox) 915 defaultLocaleContent.remove("mul_ZZ"); // mul_ZZ isn't ever a real locale. 916 917 showDefaultContentDifferencesAndFix(defaultLocaleContent); 918 919 final File genSuppDir = new File(CLDRPaths.GEN_DIRECTORY, "supplemental"); 920 final File genSuppMetadataFile = new File(genSuppDir, "supplementalMetadata.xml"); 921 final File oldSuppMetadataFile = 922 new File(CLDRPaths.SUPPLEMENTAL_DIRECTORY, "supplementalMetadata.xml"); 923 924 try (PrintWriter genFile = FileUtilities.openUTF8Writer(genSuppMetadataFile); 925 BufferedReader oldFile = FileUtilities.openUTF8Reader(oldSuppMetadataFile); ) { 926 CldrUtility.copyUpTo( 927 oldFile, 928 PatternCache.get("\\s*<defaultContent locales=\"\\s*"), 929 genFile, 930 false); 931 932 String sep = CldrUtility.LINE_SEPARATOR + "\t\t\t"; 933 String broken = 934 CldrUtility.breakLines( 935 CldrUtility.join(defaultLocaleContent, " "), 936 sep, 937 PatternCache.get("(\\S)\\S*").matcher(""), 938 80); 939 940 genFile.println("\t\t<defaultContent locales=\"" + broken + "\""); 941 genFile.println("\t\t/>"); 942 943 // genFile.println("</supplementalData>"); 944 CldrUtility.copyUpTo( 945 oldFile, 946 PatternCache.get("\\s*/>\\s*(<!--.*)?"), 947 null, 948 true); // skip to matching > 949 CldrUtility.copyUpTo(oldFile, null, genFile, true); // copy the rest 950 } 951 952 // Move it into place 953 System.out.println( 954 "Copying generated " + genSuppMetadataFile + " to " + oldSuppMetadataFile); 955 oldSuppMetadataFile.delete(); 956 Files.copy(genSuppMetadataFile.toPath(), oldSuppMetadataFile.toPath()); 957 } 958 959 private static class MaxData { 960 Relation<String, Row.R3<Double, String, String>> languages = 961 Relation.of( 962 new TreeMap<String, Set<Row.R3<Double, String, String>>>(), TreeSet.class); 963 Map<String, Counter<String>> languagesToScripts = new TreeMap<>(); 964 Map<String, Counter<String>> languagesToRegions = new TreeMap<>(); 965 966 Relation<String, Row.R3<Double, String, String>> scripts = 967 Relation.of( 968 new TreeMap<String, Set<Row.R3<Double, String, String>>>(), TreeSet.class); 969 Map<String, Counter<String>> scriptsToLanguages = new TreeMap<>(); 970 Map<String, Counter<String>> scriptsToRegions = new TreeMap<>(); 971 972 Relation<String, Row.R3<Double, String, String>> regions = 973 Relation.of( 974 new TreeMap<String, Set<Row.R3<Double, String, String>>>(), TreeSet.class); 975 Map<String, Counter<String>> regionsToLanguages = new TreeMap<>(); 976 Map<String, Counter<String>> regionsToScripts = new TreeMap<>(); 977 978 Map<String, Counter<Row.R2<String, String>>> containersToLanguage = new TreeMap<>(); 979 Relation<String, Row.R4<Double, String, String, String>> containersToLangRegion = 980 Relation.of( 981 new TreeMap<String, Set<Row.R4<Double, String, String, String>>>(), 982 TreeSet.class); 983 984 Relation<Row.R2<String, String>, Row.R2<Double, String>> languageScripts = 985 Relation.of( 986 new TreeMap<Row.R2<String, String>, Set<Row.R2<Double, String>>>(), 987 TreeSet.class); 988 Relation<Row.R2<String, String>, Row.R2<Double, String>> scriptRegions = 989 Relation.of( 990 new TreeMap<Row.R2<String, String>, Set<Row.R2<Double, String>>>(), 991 TreeSet.class); 992 Relation<Row.R2<String, String>, Row.R2<Double, String>> languageRegions = 993 Relation.of( 994 new TreeMap<Row.R2<String, String>, Set<Row.R2<Double, String>>>(), 995 TreeSet.class); 996 997 /** 998 * Add population information. "order" is the negative of the population (makes the first be 999 * the highest). 1000 * 1001 * @param language 1002 * @param script 1003 * @param region 1004 * @param order 1005 */ add(String language, String script, String region, Double order)1006 void add(String language, String script, String region, Double order) { 1007 if (SHOW_ADD && language.equals(LocaleNames.MIS)) { 1008 System.out.println(language + "\t" + script + "\t" + region + "\t" + -order); 1009 } 1010 languages.put(language, Row.of(order, script, region)); 1011 // addCounter(languagesToScripts, language, script, order); 1012 // addCounter(languagesToRegions, language, region, order); 1013 1014 scripts.put(script, Row.of(order, language, region)); 1015 // addCounter(scriptsToLanguages, script, language, order); 1016 // addCounter(scriptsToRegions, script, region, order); 1017 1018 regions.put(region, Row.of(order, language, script)); 1019 // addCounter(regionsToLanguages, region, language, order); 1020 // addCounter(regionsToScripts, region, script, order); 1021 1022 languageScripts.put(Row.of(language, script), Row.of(order, region)); 1023 scriptRegions.put(Row.of(script, region), Row.of(order, language)); 1024 languageRegions.put(Row.of(language, region), Row.of(order, script)); 1025 1026 Set<String> containerSet = Containment.leafToContainer(region); 1027 if (containerSet != null) { 1028 for (String container : containerSet) { 1029 1030 containersToLangRegion.put(container, Row.of(order, language, script, region)); 1031 Counter<R2<String, String>> data = containersToLanguage.get(container); 1032 if (data == null) { 1033 containersToLanguage.put(container, data = new Counter<>()); 1034 } 1035 data.add(Row.of(language, script), (long) (double) order); 1036 } 1037 } 1038 1039 if (SHOW_ADD) 1040 System.out.println( 1041 "Data:\t" + language + "\t" + script + "\t" + region + "\t" + order); 1042 } 1043 // private void addCounter(Map<String, Counter<String>> map, String key, String key2, Double 1044 // count) { 1045 // Counter<String> counter = map.get(key); 1046 // if (counter == null) { 1047 // map.put(key, counter = new Counter<String>()); 1048 // } 1049 // counter.add(key2, count.longValue()); 1050 // } 1051 } 1052 1053 private static final double MIN_UNOFFICIAL_LANGUAGE_SIZE = 10000000; 1054 private static final double MIN_UNOFFICIAL_LANGUAGE_PROPORTION = 0.20; 1055 private static final double MIN_UNOFFICIAL_CLDR_LANGUAGE_SIZE = 100000; 1056 private static final double UNOFFICIAL_SCALE_DOWN = 0.2; 1057 1058 private static NumberFormat percent = NumberFormat.getPercentInstance(); 1059 private static NumberFormat number = NumberFormat.getIntegerInstance(); 1060 tryDifferentAlgorithm(Map<String, String> toMaximized)1061 private static void tryDifferentAlgorithm(Map<String, String> toMaximized) { 1062 // we are going to try a different approach. 1063 // first gather counts for maximized values 1064 // Set<Row.R3<String,String,String>,Double> rowsToCounts = new TreeMap(); 1065 MaxData maxData = new MaxData(); 1066 Set<String> cldrLocales = factory.getAvailable(); 1067 Set<String> otherTerritories = 1068 new TreeSet<>(standardCodes.getGoodAvailableCodes("territory")); 1069 1070 // process all the information to get the top values for each triple. 1071 // each of the combinations of 1 or 2 components gets to be a key. 1072 for (String region : supplementalData.getTerritoriesWithPopulationData()) { 1073 otherTerritories.remove(region); 1074 PopulationData regionData = supplementalData.getPopulationDataForTerritory(region); 1075 final double literateTerritoryPopulation = regionData.getLiteratePopulation(); 1076 // we need any unofficial language to meet a certain absolute size requirement and 1077 // proportion size 1078 // requirement. 1079 // so the bar is x percent of the population, reset up to y absolute size. 1080 double minimalLiteratePopulation = 1081 literateTerritoryPopulation * MIN_UNOFFICIAL_LANGUAGE_PROPORTION; 1082 if (minimalLiteratePopulation < MIN_UNOFFICIAL_LANGUAGE_SIZE) { 1083 minimalLiteratePopulation = MIN_UNOFFICIAL_LANGUAGE_SIZE; 1084 } 1085 1086 for (String writtenLanguage : 1087 supplementalData.getLanguagesForTerritoryWithPopulationData(region)) { 1088 PopulationData data = 1089 supplementalData.getLanguageAndTerritoryPopulationData( 1090 writtenLanguage, region); 1091 final double literatePopulation = 1092 getWritingPopulation(data); // data.getLiteratePopulation(); 1093 double order = -literatePopulation; // negative so we get the inverse order 1094 1095 if (data.getOfficialStatus() == OfficialStatus.unknown) { 1096 final String locale = writtenLanguage + "_" + region; 1097 if (literatePopulation >= minimalLiteratePopulation) { 1098 // ok, skip 1099 } else if (literatePopulation >= MIN_UNOFFICIAL_CLDR_LANGUAGE_SIZE 1100 && cldrLocales.contains(locale)) { 1101 // ok, skip 1102 } else { 1103 // if (SHOW_ADD) 1104 // System.out.println("Skipping:\t" + writtenLanguage + "\t" + region + "\t" 1105 // + english.getName(locale) 1106 // + "\t-- too small:\t" + number.format(literatePopulation)); 1107 // continue; 1108 } 1109 order *= UNOFFICIAL_SCALE_DOWN; 1110 if (SHOW_ADD) 1111 System.out.println( 1112 "Retaining\t" 1113 + writtenLanguage 1114 + "\t" 1115 + region 1116 + "\t" 1117 + english.getName(locale) 1118 + "\t" 1119 + number.format(literatePopulation) 1120 + "\t" 1121 + percent.format( 1122 literatePopulation / literateTerritoryPopulation) 1123 + (cldrLocales.contains(locale) ? "\tin-CLDR" : "")); 1124 } 1125 String script; 1126 String language = writtenLanguage; 1127 final int pos = writtenLanguage.indexOf('_'); 1128 if (pos > 0) { 1129 language = writtenLanguage.substring(0, pos); 1130 script = writtenLanguage.substring(pos + 1); 1131 } else { 1132 script = getScriptForLocale2(language); 1133 } 1134 maxData.add(language, script, region, order); 1135 } 1136 } 1137 1138 LanguageTagParser additionLtp = new LanguageTagParser(); 1139 1140 for (String addition : MAX_ADDITIONS) { 1141 additionLtp.set(addition); 1142 String lan = additionLtp.getLanguage(); 1143 Set<R3<Double, String, String>> key = maxData.languages.get(lan); 1144 if (key == null) { 1145 maxData.add(lan, additionLtp.getScript(), additionLtp.getRegion(), 1.0); 1146 } else { 1147 int debug = 0; 1148 } 1149 } 1150 1151 for (Entry<String, Collection<String>> entry : 1152 DeriveScripts.getLanguageToScript().asMap().entrySet()) { 1153 String language = entry.getKey(); 1154 final Collection<String> values = entry.getValue(); 1155 if (values.size() != 1) { 1156 continue; // skip, no either way 1157 } 1158 Set<R3<Double, String, String>> old = maxData.languages.get(language); 1159 if (!maxData.languages.containsKey(language)) { 1160 maxData.add(language, values.iterator().next(), TEMP_UNKNOWN_REGION, 1.0); 1161 } 1162 } 1163 1164 // add others, with English default 1165 for (String region : otherTerritories) { 1166 if (region.length() == 3) continue; // FIX ONCE WE ADD REGIONS 1167 maxData.add("en", "Latn", region, 1.0); 1168 } 1169 1170 // get a reverse mapping, so that we can add the aliases 1171 1172 Map<String, R2<List<String>, String>> languageAliases = 1173 SupplementalDataInfo.getInstance().getLocaleAliasInfo().get("language"); 1174 for (Entry<String, R2<List<String>, String>> str : languageAliases.entrySet()) { 1175 String reason = str.getValue().get1(); 1176 if ("overlong".equals(reason) 1177 || "bibliographic".equals(reason) 1178 || "macrolanguage".equals(reason)) { 1179 continue; 1180 } 1181 List<String> replacements = str.getValue().get0(); 1182 if (replacements == null) { 1183 continue; 1184 } 1185 String goodLanguage = replacements.get(0); 1186 1187 String badLanguage = str.getKey(); 1188 if (badLanguage.contains("_")) { 1189 continue; 1190 } 1191 if (deprecatedISONotInLST.contains(badLanguage)) { 1192 continue; 1193 } 1194 Set<R3<Double, String, String>> goodLanguageData = 1195 maxData.languages.getAll(goodLanguage); 1196 if (goodLanguageData == null) { 1197 continue; 1198 } 1199 R3<Double, String, String> value = goodLanguageData.iterator().next(); 1200 final String script = value.get1(); 1201 final String region = value.get2(); 1202 maxData.add(badLanguage, script, region, 1.0); 1203 System.out.println( 1204 "Adding aliases: " 1205 + badLanguage 1206 + ", " 1207 + script 1208 + ", " 1209 + region 1210 + ", " 1211 + reason); 1212 } 1213 1214 // now, get the best for each one 1215 for (String language : maxData.languages.keySet()) { 1216 R3<Double, String, String> value = maxData.languages.getAll(language).iterator().next(); 1217 final Comparable<String> script = value.get1(); 1218 final Comparable<String> region = value.get2(); 1219 add( 1220 language, 1221 language + "_" + script + "_" + region, 1222 toMaximized, 1223 "L->SR", 1224 LocaleOverride.REPLACE_EXISTING, 1225 SHOW_ADD); 1226 } 1227 for (String language : maxData.languagesToScripts.keySet()) { 1228 String script = 1229 maxData.languagesToScripts 1230 .get(language) 1231 .getKeysetSortedByCount(true) 1232 .iterator() 1233 .next(); 1234 add( 1235 language, 1236 language + "_" + script, 1237 toMaximized, 1238 "L->S", 1239 LocaleOverride.REPLACE_EXISTING, 1240 SHOW_ADD); 1241 } 1242 for (String language : maxData.languagesToRegions.keySet()) { 1243 String region = 1244 maxData.languagesToRegions 1245 .get(language) 1246 .getKeysetSortedByCount(true) 1247 .iterator() 1248 .next(); 1249 add( 1250 language, 1251 language + "_" + region, 1252 toMaximized, 1253 "L->R", 1254 LocaleOverride.REPLACE_EXISTING, 1255 SHOW_ADD); 1256 } 1257 1258 for (String script : maxData.scripts.keySet()) { 1259 R3<Double, String, String> value = maxData.scripts.getAll(script).iterator().next(); 1260 final Comparable<String> language = value.get1(); 1261 final Comparable<String> region = value.get2(); 1262 add( 1263 "und_" + script, 1264 language + "_" + script + "_" + region, 1265 toMaximized, 1266 "S->LR", 1267 LocaleOverride.REPLACE_EXISTING, 1268 SHOW_ADD); 1269 } 1270 for (String script : maxData.scriptsToLanguages.keySet()) { 1271 String language = 1272 maxData.scriptsToLanguages 1273 .get(script) 1274 .getKeysetSortedByCount(true) 1275 .iterator() 1276 .next(); 1277 add( 1278 "und_" + script, 1279 language + "_" + script, 1280 toMaximized, 1281 "S->L", 1282 LocaleOverride.REPLACE_EXISTING, 1283 SHOW_ADD); 1284 } 1285 for (String script : maxData.scriptsToRegions.keySet()) { 1286 String region = 1287 maxData.scriptsToRegions 1288 .get(script) 1289 .getKeysetSortedByCount(true) 1290 .iterator() 1291 .next(); 1292 add( 1293 "und_" + script, 1294 "und_" + script + "_" + region, 1295 toMaximized, 1296 "S->R", 1297 LocaleOverride.REPLACE_EXISTING, 1298 SHOW_ADD); 1299 } 1300 1301 for (String region : maxData.regions.keySet()) { 1302 R3<Double, String, String> value = maxData.regions.getAll(region).iterator().next(); 1303 final Comparable<String> language = value.get1(); 1304 final Comparable<String> script = value.get2(); 1305 add( 1306 "und_" + region, 1307 language + "_" + script + "_" + region, 1308 toMaximized, 1309 "R->LS", 1310 LocaleOverride.REPLACE_EXISTING, 1311 SHOW_ADD); 1312 } 1313 for (String region : maxData.regionsToLanguages.keySet()) { 1314 String language = 1315 maxData.regionsToLanguages 1316 .get(region) 1317 .getKeysetSortedByCount(true) 1318 .iterator() 1319 .next(); 1320 add( 1321 "und_" + region, 1322 language + "_" + region, 1323 toMaximized, 1324 "R->L", 1325 LocaleOverride.REPLACE_EXISTING, 1326 SHOW_ADD); 1327 } 1328 for (String region : maxData.regionsToScripts.keySet()) { 1329 String script = 1330 maxData.regionsToScripts 1331 .get(region) 1332 .getKeysetSortedByCount(true) 1333 .iterator() 1334 .next(); 1335 add( 1336 "und_" + region, 1337 "und_" + script + "_" + region, 1338 toMaximized, 1339 "R->S", 1340 LocaleOverride.REPLACE_EXISTING, 1341 SHOW_ADD); 1342 } 1343 1344 for (Entry<String, Counter<R2<String, String>>> containerAndInfo : 1345 maxData.containersToLanguage.entrySet()) { 1346 String region = containerAndInfo.getKey(); 1347 if (region.equals("001")) { 1348 continue; 1349 } 1350 Counter<R2<String, String>> data = containerAndInfo.getValue(); 1351 Set<R2<String, String>> keysetSortedByCount = data.getKeysetSortedByCount(true); 1352 if (SHOW_CONTAINERS) { // debug 1353 System.out.println( 1354 "Container2L:\t" 1355 + region 1356 + "\t" 1357 + shorten(data.getEntrySetSortedByCount(true, null))); 1358 System.out.println( 1359 "Container2LR:\t" 1360 + region 1361 + "\t" 1362 + maxData.containersToLangRegion.get(region)); 1363 } 1364 R2<String, String> value = 1365 keysetSortedByCount.iterator().next(); // will get most negative 1366 final Comparable<String> language = value.get0(); 1367 final Comparable<String> script = value.get1(); 1368 1369 // fix special cases like es-419, where a locale exists. 1370 // for those cases, what we add as output is the container. Otherwise the region. 1371 Set<String> skipLanguages = cldrContainerToLanguages.get(region); 1372 if (skipLanguages != null && skipLanguages.contains(language)) { 1373 add( 1374 "und_" + region, 1375 language + "_" + script + "_" + region, 1376 toMaximized, 1377 "R*->LS", 1378 LocaleOverride.REPLACE_EXISTING, 1379 SHOW_ADD); 1380 continue; 1381 } 1382 1383 // we now have the best language and script. Find the best region for that 1384 for (R4<Double, String, String, String> e : 1385 maxData.containersToLangRegion.get(region)) { 1386 final Comparable<String> language2 = e.get1(); 1387 final Comparable<String> script2 = e.get2(); 1388 if (language2.equals(language) && script2.equals(script)) { 1389 add( 1390 "und_" + region, 1391 language + "_" + script + "_" + e.get3(), 1392 toMaximized, 1393 "R*->LS", 1394 LocaleOverride.REPLACE_EXISTING, 1395 SHOW_ADD); 1396 break; 1397 } 1398 } 1399 } 1400 1401 for (R2<String, String> languageScript : maxData.languageScripts.keySet()) { 1402 R2<Double, String> value = 1403 maxData.languageScripts.getAll(languageScript).iterator().next(); 1404 final Comparable<String> language = languageScript.get0(); 1405 final Comparable<String> script = languageScript.get1(); 1406 final Comparable<String> region = value.get1(); 1407 add( 1408 language + "_" + script, 1409 language + "_" + script + "_" + region, 1410 toMaximized, 1411 "LS->R", 1412 LocaleOverride.REPLACE_EXISTING, 1413 SHOW_ADD); 1414 } 1415 1416 for (R2<String, String> scriptRegion : maxData.scriptRegions.keySet()) { 1417 R2<Double, String> value = maxData.scriptRegions.getAll(scriptRegion).iterator().next(); 1418 final Comparable<String> script = scriptRegion.get0(); 1419 final Comparable<String> region = scriptRegion.get1(); 1420 final Comparable<String> language = value.get1(); 1421 add( 1422 "und_" + script + "_" + region, 1423 language + "_" + script + "_" + region, 1424 toMaximized, 1425 "SR->L", 1426 LocaleOverride.REPLACE_EXISTING, 1427 SHOW_ADD); 1428 } 1429 1430 for (R2<String, String> languageRegion : maxData.languageRegions.keySet()) { 1431 R2<Double, String> value = 1432 maxData.languageRegions.getAll(languageRegion).iterator().next(); 1433 final Comparable<String> language = languageRegion.get0(); 1434 final Comparable<String> region = languageRegion.get1(); 1435 final Comparable<String> script = value.get1(); 1436 add( 1437 language + "_" + region, 1438 language + "_" + script + "_" + region, 1439 toMaximized, 1440 "LR->S", 1441 LocaleOverride.REPLACE_EXISTING, 1442 SHOW_ADD); 1443 } 1444 1445 // get the script info from metadata as fallback 1446 1447 TreeSet<String> sorted = new TreeSet<>(ScriptMetadata.getScripts()); 1448 for (String script : sorted) { 1449 Info i = ScriptMetadata.getInfo(script); 1450 String likelyLanguage = i.likelyLanguage; 1451 if (LANGUAGE_CODE_TO_STATUS.get(likelyLanguage) == Status.special) { 1452 likelyLanguage = LocaleNames.UND; 1453 } 1454 String originCountry = i.originCountry; 1455 final String result = likelyLanguage + "_" + script + "_" + originCountry; 1456 add( 1457 "und_" + script, 1458 result, 1459 toMaximized, 1460 "S->LR•", 1461 LocaleOverride.KEEP_EXISTING, 1462 SHOW_ADD); 1463 add( 1464 likelyLanguage, 1465 result, 1466 toMaximized, 1467 "L->SR•", 1468 LocaleOverride.KEEP_EXISTING, 1469 SHOW_ADD); 1470 } 1471 1472 // add overrides 1473 for (String key : LANGUAGE_OVERRIDES.keySet()) { 1474 add( 1475 key, 1476 LANGUAGE_OVERRIDES.get(key), 1477 toMaximized, 1478 "OVERRIDE", 1479 LocaleOverride.REPLACE_EXISTING, 1480 true); 1481 } 1482 1483 // Make sure that the mapping is Idempotent. If we have A ==> B, we must never have B ==> C 1484 // We run this check until we get no problems. 1485 Set<List<String>> problems = new HashSet<>(); 1486 1487 while (true) { 1488 problems.clear(); 1489 for (Entry<String, String> entry : toMaximized.entrySet()) { 1490 String source = entry.getKey(); 1491 String target = entry.getValue(); 1492 if (target.contains("_Zzzz") || target.contains("_ZZ")) { // these are special cases 1493 continue; 1494 } 1495 String idempotentCandidate = LikelySubtags.maximize(target, toMaximized); 1496 1497 if (idempotentCandidate == null) { 1498 System.out.println("Can't maximize " + target); 1499 } else if (!idempotentCandidate.equals(target)) { 1500 problems.add(ImmutableList.of(source, target, idempotentCandidate)); 1501 } 1502 } 1503 if (problems.isEmpty()) { 1504 break; 1505 } 1506 for (List<String> row : problems) { 1507 System.out.println( 1508 "Idempotence: dropping mapping " 1509 + row.get(0) 1510 + " to " 1511 + row.get(1) 1512 + " since the target maps further to " 1513 + row.get(2)); 1514 toMaximized.remove(row.get(0)); 1515 } 1516 } 1517 } 1518 shorten(Object data)1519 public static String shorten(Object data) { 1520 String info = data.toString(); 1521 if (info.length() > 255) { 1522 info = info.substring(0, 127) + "…"; 1523 } 1524 return info; 1525 } 1526 doAlt(Map<String, String> toMaximized)1527 private static void doAlt(Map<String, String> toMaximized) { 1528 // TODO Auto-generated method stub 1529 Map<String, String> temp = new TreeMap<>(); 1530 for (String locale : toMaximized.keySet()) { 1531 String target = toMaximized.get(locale); 1532 temp.put(toAlt(locale, true), toAlt(target, true)); 1533 } 1534 toMaximized.clear(); 1535 toMaximized.putAll(temp); 1536 } 1537 maximize(String languageTag, Map<String, String> toMaximized)1538 public static String maximize(String languageTag, Map<String, String> toMaximized) { 1539 LanguageTagParser ltp = new LanguageTagParser(); 1540 1541 // clean up the input by removing Zzzz, ZZ, and changing "" into und. 1542 ltp.set(languageTag); 1543 String language = ltp.getLanguage(); 1544 String region = ltp.getRegion(); 1545 String script = ltp.getScript(); 1546 boolean changed = false; 1547 if (language.equals("")) { 1548 ltp.setLanguage(language = LocaleNames.UND); 1549 changed = true; 1550 } 1551 if (region.equals(UNKNOWN_SCRIPT)) { 1552 ltp.setScript(script = ""); 1553 changed = true; 1554 } 1555 if (ltp.getRegion().equals(UNKNOWN_REGION)) { 1556 ltp.setRegion(region = ""); 1557 changed = true; 1558 } 1559 if (changed) { 1560 languageTag = ltp.toString(); 1561 } 1562 // check whole 1563 String result = toMaximized.get(languageTag); 1564 if (result != null) { 1565 return result; 1566 } 1567 // try empty region 1568 if (region.length() != 0) { 1569 result = toMaximized.get(ltp.setRegion("").toString()); 1570 if (result != null) { 1571 return ltp.set(result).setRegion(region).toString(); 1572 } 1573 ltp.setRegion(region); // restore 1574 } 1575 // try empty script 1576 if (script.length() != 0) { 1577 result = toMaximized.get(ltp.setScript("").toString()); 1578 if (result != null) { 1579 return ltp.set(result).setScript(script).toString(); 1580 } 1581 // try empty script and region 1582 if (region.length() != 0) { 1583 result = toMaximized.get(ltp.setRegion("").toString()); 1584 if (result != null) { 1585 return ltp.set(result).setScript(script).setRegion(region).toString(); 1586 } 1587 } 1588 } 1589 if (!language.equals(LocaleNames.UND) && script.length() != 0 && region.length() != 0) { 1590 return languageTag; // it was ok, and we couldn't do anything with it 1591 } 1592 return null; // couldn't maximize 1593 } 1594 minimize( String input, Map<String, String> toMaximized, boolean favorRegion)1595 public static String minimize( 1596 String input, Map<String, String> toMaximized, boolean favorRegion) { 1597 if (input.equals("nb_Latn_SJ")) { 1598 System.out.print(""); // debug 1599 } 1600 String maximized = maximize(input, toMaximized); 1601 if (maximized == null) { 1602 return null; // failed 1603 } 1604 LanguageTagParser ltp = new LanguageTagParser().set(maximized); 1605 String language = ltp.getLanguage(); 1606 String region = ltp.getRegion(); 1607 String script = ltp.getScript(); 1608 // try building up from shorter to longer, and find the first that matches 1609 // could be more optimized, but for this code we want simplest 1610 String[] trials = { 1611 language, 1612 language + TAG_SEPARATOR + (favorRegion ? region : script), 1613 language + TAG_SEPARATOR + (!favorRegion ? region : script) 1614 }; 1615 for (String trial : trials) { 1616 String newMaximized = maximize(trial, toMaximized); 1617 if (maximized.equals(newMaximized)) { 1618 return trial; 1619 } 1620 } 1621 return maximized; 1622 } 1623 1624 // /** 1625 // * Verify that we can map from each language, script, and country to something. 1626 // * @param toMaximized 1627 // */ 1628 // private static void checkConsistency(Map<String, String> toMaximized) { 1629 // Map<String,String> needMappings = new TreeMap(); 1630 // LanguageTagParser parser = new LanguageTagParser(); 1631 // for (String maximized : new TreeSet<String>(toMaximized.values())) { 1632 // parser.set(maximized); 1633 // final String language = parser.getLanguage(); 1634 // final String script = parser.getScript(); 1635 // final String region = parser.getRegion(); 1636 // if (language.length() == 0 || script.length() == 0 || region.length() == 0) { 1637 // failure(" { \"" + maximized + "\", \"" + maximized + "\" }, // " + 1638 // english.getName(maximized) + 1639 // "\t\tFailed-Consistency"); 1640 // continue; 1641 // } 1642 // addIfNotIn(language, maximized, needMappings, toMaximized, "Consistency"); 1643 // addIfNotIn(language + "_" + script, maximized, needMappings, toMaximized, "Consistency"); 1644 // addIfNotIn(language + "_" + region, maximized, needMappings, toMaximized, "Consistency"); 1645 // addIfNotIn("und_" + script, maximized, needMappings, toMaximized, "Consistency"); 1646 // addIfNotIn("und_" + script + "_" + region, maximized, needMappings, toMaximized, 1647 // "Consistency"); 1648 // addIfNotIn("und_" + region, maximized, needMappings, toMaximized, "Consistency"); 1649 // } 1650 // toMaximized.putAll(needMappings); 1651 // } 1652 1653 // private static void failure(String string) { 1654 // System.out.println(string); 1655 // errorCount++; 1656 // } 1657 1658 // private static void addIfNotIn(String key, String value, Map<String, String> toAdd, 1659 // Map<String, String> 1660 // otherToCheck, String kind) { 1661 // addIfNotIn(key, value, toAdd, otherToCheck == null ? null : otherToCheck.keySet(), null, 1662 // kind); 1663 // } 1664 1665 // private static void addIfNotIn(String key, String value, Map<String, String> toAdd, 1666 // Set<String> skipKey, 1667 // Set<String> skipValue, String kind) { 1668 // if (!key.equals(value) 1669 // && !toAdd.containsKey(key) 1670 // && (skipKey == null || !skipKey.contains(key)) 1671 // && (skipValue == null || !skipValue.contains(value))) { 1672 // add(key, value, toAdd, kind); 1673 // } 1674 // } 1675 1676 enum LocaleOverride { 1677 KEEP_EXISTING, 1678 REPLACE_EXISTING 1679 } 1680 add( String key, String value, Map<String, String> toAdd, String kind, LocaleOverride override, boolean showAction)1681 private static void add( 1682 String key, 1683 String value, 1684 Map<String, String> toAdd, 1685 String kind, 1686 LocaleOverride override, 1687 boolean showAction) { 1688 if (SHOW_ADD && key.startsWith(LocaleNames.MIS)) { 1689 int debug = 1; 1690 } 1691 if (key.equals(DEBUG_ADD_KEY)) { 1692 System.out.println("*debug*"); 1693 } 1694 String oldValue = toAdd.get(key); 1695 if (oldValue == null) { 1696 if (showAction) { 1697 System.out.println( 1698 "\tAdding:\t\t" 1699 + getName(key) 1700 + "\t=>\t" 1701 + getName(value) 1702 + "\t\t\t\t" 1703 + kind); 1704 } 1705 } else if (override == LocaleOverride.KEEP_EXISTING || value.equals(oldValue)) { 1706 // if (showAction) { 1707 // System.out.println("Skipping:\t" + key + "\t=>\t" + value + "\t\t\t\t" + kind); 1708 // } 1709 return; 1710 } else { 1711 if (showAction) { 1712 System.out.println( 1713 "\tReplacing:\t" 1714 + getName(key) 1715 + "\t=>\t" 1716 + getName(value) 1717 + "\t, was\t" 1718 + getName(oldValue) 1719 + "\t\t" 1720 + kind); 1721 } 1722 } 1723 toAdd.put(key, value); 1724 } 1725 getName(String value)1726 private static String getName(String value) { 1727 return ConvertLanguageData.getLanguageCodeAndName(value); 1728 } 1729 printLikelySubtags(Map<String, String> fluffup)1730 private static File printLikelySubtags(Map<String, String> fluffup) throws IOException { 1731 final File genDir = new File(CLDRPaths.GEN_DIRECTORY, "supplemental"); 1732 final File genFile = 1733 new File( 1734 genDir, 1735 "likelySubtags" + (OUTPUT_STYLE == OutputStyle.XML ? ".xml" : ".txt")); 1736 System.out.println("Writing to " + genFile); 1737 1738 try (PrintWriter out = FileUtilities.openUTF8Writer(genFile)) { 1739 String spacing = OUTPUT_STYLE == OutputStyle.PLAINTEXT ? "\t" : " "; 1740 String header = 1741 OUTPUT_STYLE != OutputStyle.XML 1742 ? "const MapToMaximalSubtags default_subtags[] = {" 1743 : "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" 1744 + CldrUtility.LINE_SEPARATOR 1745 + "<!DOCTYPE supplementalData SYSTEM \"../../common/dtd/ldmlSupplemental.dtd\">" 1746 + CldrUtility.LINE_SEPARATOR 1747 + "<!--" 1748 + CldrUtility.LINE_SEPARATOR 1749 + CldrUtility.getCopyrightString() 1750 + CldrUtility.LINE_SEPARATOR 1751 + "-->" 1752 + CldrUtility.LINE_SEPARATOR 1753 + "<!--" 1754 + CldrUtility.LINE_SEPARATOR 1755 + "Likely subtags data is generated programatically from CLDR's language/territory/population" 1756 + CldrUtility.LINE_SEPARATOR 1757 + "data using the GenerateMaximalLocales tool. Under normal circumstances, this file should" 1758 + CldrUtility.LINE_SEPARATOR 1759 + "not be patched by hand, as any changes made in that fashion may be lost." 1760 + CldrUtility.LINE_SEPARATOR 1761 + "-->" 1762 + CldrUtility.LINE_SEPARATOR 1763 + "<supplementalData>" 1764 + CldrUtility.LINE_SEPARATOR 1765 + " <version number=\"$" 1766 + "Revision$\"/>" 1767 + CldrUtility.LINE_SEPARATOR 1768 + " <likelySubtags>"; 1769 String footer = 1770 OUTPUT_STYLE != OutputStyle.XML 1771 ? SEPARATOR + "};" 1772 : " </likelySubtags>" 1773 + CldrUtility.LINE_SEPARATOR 1774 + "</supplementalData>"; 1775 out.println(header); 1776 boolean first = true; 1777 Set<String> keys = new TreeSet<>(new LocaleStringComparator()); 1778 keys.addAll(fluffup.keySet()); 1779 for (String printingLocale : keys) { 1780 String printingTarget = fluffup.get(printingLocale); 1781 String comment = 1782 printingName(printingLocale, spacing) 1783 + spacing 1784 + "=>" 1785 + spacing 1786 + printingName(printingTarget, spacing); 1787 1788 if (OUTPUT_STYLE == OutputStyle.XML) { 1789 out.println( 1790 "\t\t<likelySubtag from=\"" 1791 + printingLocale 1792 + "\" to=\"" 1793 + printingTarget 1794 + "\"" 1795 + "/>" 1796 + CldrUtility.LINE_SEPARATOR 1797 + "\t\t" 1798 + "<!--" 1799 + comment 1800 + "-->"); 1801 } else { 1802 if (first) { 1803 first = false; 1804 } else { 1805 out.print(","); 1806 } 1807 if (comment.length() > 70 && SEPARATOR.equals(CldrUtility.LINE_SEPARATOR)) { 1808 comment = 1809 printingName(printingLocale, spacing) 1810 + SEPARATOR 1811 + " // " 1812 + spacing 1813 + "=>" 1814 + spacing 1815 + printingName(printingTarget, spacing); 1816 } 1817 out.print( 1818 " {" 1819 + SEPARATOR 1820 + " // " 1821 + comment 1822 + SEPARATOR 1823 + " \"" 1824 + printingLocale 1825 + "\"," 1826 + SEPARATOR 1827 + " \"" 1828 + printingTarget 1829 + "\"" 1830 + CldrUtility.LINE_SEPARATOR 1831 + " }"); 1832 } 1833 } 1834 out.println(footer); 1835 out.close(); 1836 } 1837 return genFile; 1838 } 1839 printingName(String locale, String spacing)1840 public static String printingName(String locale, String spacing) { 1841 if (locale == null) { 1842 return null; 1843 } 1844 LanguageTagParser parser = new LanguageTagParser().set(locale); 1845 String lang = parser.getLanguage(); 1846 String script = parser.getScript(); 1847 String region = parser.getRegion(); 1848 return "{" 1849 + spacing 1850 + (lang.equals(LocaleNames.UND) 1851 ? "?" 1852 : english.getName(CLDRFile.LANGUAGE_NAME, lang)) 1853 + ";" 1854 + spacing 1855 + (script == null || script.equals("") 1856 ? "?" 1857 : english.getName(CLDRFile.SCRIPT_NAME, script)) 1858 + ";" 1859 + spacing 1860 + (region == null || region.equals("") 1861 ? "?" 1862 : english.getName(CLDRFile.TERRITORY_NAME, region)) 1863 + spacing 1864 + "}"; 1865 } 1866 1867 private static final String[][] ALT_REVERSAL = { 1868 // { "no", "nb" }, 1869 // { "nb", "no" }, 1870 {"he", "iw"}, 1871 {"iw", "he"}, 1872 }; 1873 toAlt(String locale, boolean change)1874 public static String toAlt(String locale, boolean change) { 1875 if (!change || locale == null) { 1876 return locale; 1877 } 1878 String firstTag = getFirstTag(locale); 1879 for (String[] pair : ALT_REVERSAL) { 1880 if (firstTag.equals(pair[0])) { 1881 locale = pair[1] + locale.substring(pair[1].length()); 1882 break; 1883 } 1884 } 1885 locale = locale.replace("_", "-"); 1886 return locale; 1887 } 1888 getFirstTag(String locale)1889 private static String getFirstTag(String locale) { 1890 int pos = locale.indexOf('_'); 1891 return pos < 0 ? locale : locale.substring(0, pos); 1892 } 1893 1894 // private static Map<String, String> getBackMapping(Map<String, String> fluffup) { 1895 // Relation<String,String> backMap = new Relation(new TreeMap(), TreeSet.class, 1896 // BEST_LANGUAGE_COMPARATOR); 1897 // for (String source : fluffup.keySet()) { 1898 // if (source.startsWith(LocaleNames.UND)) { 1899 // continue; 1900 // } 1901 // String maximized = fluffup.get(source); 1902 // backMap.put(maximized, source); // put in right order 1903 // } 1904 // Map<String,String> returnBackMap = new TreeMap(); 1905 // for (String maximized : backMap.keySet()) { 1906 // final Set<String> all = backMap.getAll(maximized); 1907 // final String minimized = all.iterator().next(); 1908 // returnBackMap.put(maximized, minimized); 1909 // } 1910 // return returnBackMap; 1911 // } 1912 1913 /** 1914 * Language tags are presumed to share the first language, except possibly LocaleNames.UND. Best 1915 * is least 1916 */ 1917 // private static Comparator BEST_LANGUAGE_COMPARATOR = new Comparator<String>() { 1918 // LanguageTagParser p1 = new LanguageTagParser(); 1919 // LanguageTagParser p2 = new LanguageTagParser(); 1920 // public int compare(String o1, String o2) { 1921 // if (o1.equals(o2)) return 0; 1922 // p1.set(o1); 1923 // p2.set(o2); 1924 // String lang1 = p1.getLanguage(); 1925 // String lang2 = p2.getLanguage(); 1926 // 1927 // // compare languages first 1928 // // put und at the end 1929 // int result = lang1.compareTo(lang2); 1930 // if (result != 0) { 1931 // if (lang1.equals(LocaleNames.UND)) return 1; 1932 // if (lang2.equals(LocaleNames.UND)) return -1; 1933 // return result; 1934 // } 1935 // 1936 // // now scripts and regions. 1937 // // if they have different numbers of fields, the shorter wins. 1938 // // If there are two fields, region is lowest. 1939 // // The simplest way is to just compare scripts first 1940 // // so zh-TW < zh-Hant, because we first compare "" to Hant 1941 // String script1 = p1.getScript(); 1942 // String script2 = p2.getScript(); 1943 // int scriptOrder = script1.compareTo(script2); 1944 // if (scriptOrder != 0) return scriptOrder; 1945 // 1946 // String region1 = p1.getRegion(); 1947 // String region2 = p2.getRegion(); 1948 // int regionOrder = region1.compareTo(region2); 1949 // if (regionOrder != 0) return regionOrder; 1950 // 1951 // return o1.compareTo(o2); 1952 // } 1953 // 1954 // }; 1955 minimize(Map<String, String> fluffup)1956 public static void minimize(Map<String, String> fluffup) { 1957 LanguageTagParser parser = new LanguageTagParser(); 1958 LanguageTagParser targetParser = new LanguageTagParser(); 1959 Set<String> removals = new TreeSet<>(); 1960 while (true) { 1961 removals.clear(); 1962 for (String locale : fluffup.keySet()) { 1963 String target = fluffup.get(locale); 1964 if (targetParser.set(target).getRegion().equals(UNKNOWN_REGION)) { 1965 removals.add(locale); 1966 if (SHOW_ADD) 1967 System.out.println( 1968 "Removing:\t" 1969 + getName(locale) 1970 + "\t=>\t" 1971 + getName(target) 1972 + "\t\t - Unknown Region in target"); 1973 continue; 1974 } 1975 if (targetParser.getScript().equals(UNKNOWN_SCRIPT)) { 1976 removals.add(locale); 1977 if (SHOW_ADD) 1978 System.out.println( 1979 "Removing:\t" 1980 + getName(locale) 1981 + "\t=>\t" 1982 + getName(target) 1983 + "\t\t - Unknown Script in target"); 1984 continue; 1985 } 1986 1987 String region = parser.set(locale).getRegion(); 1988 if (region.length() != 0) { 1989 if (region.equals(UNKNOWN_REGION)) { 1990 removals.add(locale); 1991 if (SHOW_ADD) 1992 System.out.println( 1993 "Removing:\t" 1994 + getName(locale) 1995 + "\t=>\t" 1996 + getName(target) 1997 + "\t\t - Unknown Region in source"); 1998 continue; 1999 } 2000 parser.setRegion(""); 2001 String newLocale = parser.toString(); 2002 String newTarget = fluffup.get(newLocale); 2003 if (newTarget != null) { 2004 newTarget = targetParser.set(newTarget).setRegion(region).toString(); 2005 if (target.equals(newTarget) && !KEEP_TARGETS.contains(locale)) { 2006 removals.add(locale); 2007 if (SHOW_ADD) 2008 System.out.println( 2009 "Removing:\t" 2010 + locale 2011 + "\t=>\t" 2012 + target 2013 + "\t\tRedundant with " 2014 + newLocale); 2015 continue; 2016 } 2017 } 2018 } 2019 String script = parser.set(locale).getScript(); 2020 if (locale.equals(DEBUG_ADD_KEY)) { 2021 System.out.println("*debug*"); 2022 } 2023 if (script.length() != 0) { 2024 if (script.equals(UNKNOWN_SCRIPT)) { 2025 removals.add(locale); 2026 if (SHOW_ADD) 2027 System.out.println( 2028 "Removing:\t" 2029 + locale 2030 + "\t=>\t" 2031 + target 2032 + "\t\t - Unknown Script"); 2033 continue; 2034 } 2035 parser.setScript(""); 2036 String newLocale = parser.toString(); 2037 String newTarget = fluffup.get(newLocale); 2038 if (newTarget != null) { 2039 newTarget = targetParser.set(newTarget).setScript(script).toString(); 2040 if (target.equals(newTarget) && !KEEP_TARGETS.contains(locale)) { 2041 removals.add(locale); 2042 if (SHOW_ADD) 2043 System.out.println( 2044 "Removing:\t" 2045 + locale 2046 + "\t=>\t" 2047 + target 2048 + "\t\tRedundant with " 2049 + newLocale); 2050 continue; 2051 } 2052 } 2053 } 2054 } 2055 if (removals.size() == 0) { 2056 break; 2057 } 2058 for (String locale : removals) { 2059 fluffup.remove(locale); 2060 } 2061 } 2062 } 2063 2064 // private static void addLanguageScript(Map<String, String> fluffup, LanguageTagParser parser) 2065 // { 2066 // // add script 2067 // Map<String, String> temp = new TreeMap<String, String>(); 2068 // while (true) { 2069 // temp.clear(); 2070 // for (String target : new TreeSet<String>(fluffup.values())) { 2071 // parser.set(target); 2072 // final String territory = parser.getRegion(); 2073 // if (territory.length() == 0) { 2074 // continue; 2075 // } 2076 // parser.setRegion(""); 2077 // String possibleSource = parser.toString(); 2078 // if (fluffup.containsKey(possibleSource)) { 2079 // continue; 2080 // } 2081 // String other = temp.get(possibleSource); 2082 // if (other != null) { 2083 // if (!target.equals(other)) { 2084 // System.out.println("**Failure with multiple sources in addLanguageScript: " 2085 // + possibleSource + "\t=>\t" + target + ", " + other); 2086 // } 2087 // continue; 2088 // } 2089 // temp.put(possibleSource, target); 2090 // if (SHOW_ADD) System.out.println("Adding:\t" + possibleSource + "\t=>\t" + target + 2091 // "\t\tLanguage-Script"); 2092 // } 2093 // if (temp.size() == 0) { 2094 // break; 2095 // } 2096 // fluffup.putAll(temp); 2097 // } 2098 // 2099 // } 2100 2101 // private static void addLanguageCountry(Map<String, String> fluffup, LanguageTagParser parser) 2102 // { 2103 // // add script 2104 // Map<String, String> temp = new TreeMap<String, String>(); 2105 // while (true) { 2106 // temp.clear(); 2107 // for (String target : new TreeSet<String>(fluffup.values())) { 2108 // parser.set(target); 2109 // String script = parser.getScript(); 2110 // if (script.length() == 0) { 2111 // continue; 2112 // } 2113 // parser.setScript(""); 2114 // String possibleSource = parser.toString(); 2115 // if (fluffup.containsKey(possibleSource)) { 2116 // continue; 2117 // } 2118 // String other = temp.get(possibleSource); 2119 // 2120 // if (other != null) { 2121 // if (!target.equals(other)) { 2122 // script = getScriptForLocale(possibleSource); 2123 // if (script == null) { 2124 // System.out.println("**Failure with multiple sources in addLanguageCountry: " 2125 // + possibleSource + "\t=>\t" + target + ", " + other); 2126 // continue; // error message in routine 2127 // } 2128 // parser.setScript(script); 2129 // target = parser.toString(); 2130 // } 2131 // } 2132 // 2133 // temp.put(possibleSource, target); 2134 // if (SHOW_ADD) System.out.println("Adding:\t" + possibleSource + "\t=>\t" + target + 2135 // "\t\tLanguageCountry"); 2136 // } 2137 // if (temp.size() == 0) { 2138 // break; 2139 // } 2140 // fluffup.putAll(temp); 2141 // } 2142 // 2143 // } 2144 2145 // private static void addScript(Map<String, String> fluffup, LanguageTagParser parser) { 2146 // // add script 2147 // Map<String, String> temp = new TreeMap<String, String>(); 2148 // while (true) { 2149 // temp.clear(); 2150 // Set skipTarget = fluffup.keySet(); 2151 // for (String locale : fluffup.keySet()) { 2152 // String target = fluffup.get(locale); 2153 // parser.set(target); 2154 // if (parser.getScript().length() != 0) { 2155 // continue; 2156 // } 2157 // String script = getScriptForLocale(target); 2158 // 2159 // if (script == null) { 2160 // continue; // error message in routine 2161 // } 2162 // parser.setScript(script); 2163 // String furtherTarget = parser.toString(); 2164 // addIfNotIn(target, furtherTarget, temp, fluffup, "Script"); 2165 // } 2166 // if (temp.size() == 0) { 2167 // break; 2168 // } 2169 // fluffup.putAll(temp); 2170 // } 2171 // } 2172 2173 // private static String getScriptForLocale(String locale) { 2174 // String result = getScriptForLocale2(locale); 2175 // if (result != null) return result; 2176 // int pos = locale.indexOf('_'); 2177 // if (pos >= 0) { 2178 // result = getScriptForLocale2(locale.substring(0,pos)); 2179 // } 2180 // return result; 2181 // } 2182 2183 private static String UNKNOWN_SCRIPT = "Zzzz"; 2184 private static String UNKNOWN_REGION = "ZZ"; 2185 getScriptForLocale2(String locale)2186 private static String getScriptForLocale2(String locale) { 2187 String result = localeToScriptCache.get(locale); 2188 if (result != null) { 2189 return result; 2190 } 2191 if (locale.equals("ky")) { 2192 int debug = 0; 2193 } 2194 try { 2195 Map<Type, BasicLanguageData> data = supplementalData.getBasicLanguageDataMap(locale); 2196 if (data != null) { 2197 for (BasicLanguageData datum : data.values()) { 2198 final Set<String> scripts = datum.getScripts(); 2199 boolean isPrimary = datum.getType() == BasicLanguageData.Type.primary; 2200 if (scripts.size() != 1) { 2201 if (scripts.size() > 1 && isPrimary) { 2202 break; 2203 } 2204 continue; 2205 } 2206 String script = scripts.iterator().next(); 2207 if (isPrimary) { 2208 return result = script; 2209 } else if (result == null) { 2210 result = script; 2211 } 2212 } 2213 if (result != null) { 2214 return result; 2215 } 2216 } 2217 CLDRFile cldrFile; 2218 try { 2219 cldrFile = factory.make(locale, true); 2220 } catch (RuntimeException e) { 2221 result = FALLBACK_SCRIPTS.get(locale); 2222 if (result == null) { 2223 System.err.println( 2224 "***Failed to find script in L-S-R or MAX_ADDITIONS for: " 2225 + locale 2226 + "\t" 2227 + english.getName(locale)); 2228 return result = UNKNOWN_SCRIPT; 2229 } else { 2230 return result; 2231 } 2232 } 2233 UnicodeSet exemplars = getExemplarSet(cldrFile, ""); 2234 Set<String> CLDRScripts = getScriptsFromUnicodeSet(exemplars); 2235 CLDRScripts.remove(UNKNOWN_SCRIPT); 2236 if (CLDRScripts.size() == 1) { 2237 return result = CLDRScripts.iterator().next(); 2238 } else if (CLDRScripts.size() == 0) { 2239 System.out.println("**Failed to get script for:\t" + locale); 2240 return result = UNKNOWN_SCRIPT; 2241 } else { 2242 System.out.println( 2243 "**Failed, too many scripts for:\t" + locale + ", " + CLDRScripts); 2244 return result = UNKNOWN_SCRIPT; 2245 } 2246 } finally { 2247 if (result.equals(UNKNOWN_SCRIPT)) { 2248 String temp = LANGUAGE_OVERRIDES.get(locale); 2249 if (temp != null) { 2250 result = new LanguageTagParser().set(temp).getScript(); 2251 System.err.println( 2252 "***Warning, Getting script from LANGUAGE_OVERRIDES for " 2253 + locale 2254 + " => " 2255 + result); 2256 } 2257 } 2258 localeToScriptCache.put(locale, result); 2259 if (SHOW_ADD) 2260 System.out.println( 2261 "Script:\t" 2262 + locale 2263 + "\t" 2264 + english.getName(locale) 2265 + "\t=>\t" 2266 + result 2267 + "\t" 2268 + english.getName(CLDRFile.SCRIPT_NAME, result)); 2269 } 2270 } 2271 2272 // private static Map<String, String> closeMapping(Map<String, String> fluffup) { 2273 // if (SHOW_ADD) System.out.flush(); 2274 // Map<String,String> temp = new TreeMap<String,String>(); 2275 // while (true) { 2276 // temp.clear(); 2277 // for (String locale : fluffup.keySet()) { 2278 // String target = fluffup.get(locale); 2279 // if (target.equals("si_Sinh") || target.equals("zh-Hani")) { 2280 // System.out.println("????"); 2281 // } 2282 // String furtherTarget = fluffup.get(target); 2283 // if (furtherTarget == null) { 2284 // continue; 2285 // } 2286 // addIfNotIn(locale, furtherTarget, temp, null, "Close"); 2287 // } 2288 // if (temp.size() == 0) { 2289 // break; 2290 // } 2291 // fluffup.putAll(temp); 2292 // } 2293 // if (SHOW_ADD) System.out.flush(); 2294 // return temp; 2295 // } 2296 getScriptsFromUnicodeSet(UnicodeSet exemplars)2297 public static Set<String> getScriptsFromUnicodeSet(UnicodeSet exemplars) { 2298 // use bits first, since that's faster 2299 BitSet scriptBits = new BitSet(); 2300 boolean show = false; 2301 for (UnicodeSetIterator it = new UnicodeSetIterator(exemplars); it.next(); ) { 2302 if (show) System.out.println(Integer.toHexString(it.codepoint)); 2303 if (it.codepoint != UnicodeSetIterator.IS_STRING) { 2304 scriptBits.set(UScript.getScript(it.codepoint)); 2305 } else { 2306 int cp; 2307 for (int i = 0; i < it.string.length(); i += UTF16.getCharCount(cp)) { 2308 scriptBits.set(UScript.getScript(cp = UTF16.charAt(it.string, i))); 2309 } 2310 } 2311 } 2312 scriptBits.clear(UScript.COMMON); 2313 scriptBits.clear(UScript.INHERITED); 2314 Set<String> scripts = new TreeSet<>(); 2315 for (int j = 0; j < scriptBits.size(); ++j) { 2316 if (scriptBits.get(j)) { 2317 scripts.add(UScript.getShortName(j)); 2318 } 2319 } 2320 return scripts; 2321 } 2322 getExemplarSet(CLDRFile cldrfile, String type)2323 public static UnicodeSet getExemplarSet(CLDRFile cldrfile, String type) { 2324 if (type.length() != 0) type = "[@type=\"" + type + "\"]"; 2325 String v = cldrfile.getStringValue("//ldml/characters/exemplarCharacters" + type); 2326 if (v == null) return new UnicodeSet(); 2327 return new UnicodeSet(v); 2328 } 2329 2330 // private static String[][] SpecialCases = { 2331 // { "zh_Hani", "zh_Hans_CN"}, 2332 // { "si_Sinh", "si_Sinh_LK"}, 2333 // { "ii", "ii_CN"}, // Sichuan Yi (Yi) 2334 // { "iu", "iu_CA"}, // Inuktitut (Unified Canadian Aboriginal Syllabics) 2335 // { LocaleNames.UND, "en"}, // English default 2336 // }; 2337 showDefaultContentDifferencesAndFix(Set<String> defaultLocaleContent)2338 static void showDefaultContentDifferencesAndFix(Set<String> defaultLocaleContent) { 2339 Set<String> errors = new LinkedHashSet<>(); 2340 Map<String, String> oldDefaultContent = 2341 SupplementalDataInfo.makeLocaleToDefaultContents( 2342 ConvertLanguageData.supplementalData.getDefaultContentLocales(), 2343 new TreeMap<String, String>(), 2344 errors); 2345 if (!errors.isEmpty()) { 2346 System.out.println(Joiner.on("\n").join(errors)); 2347 errors.clear(); 2348 } 2349 Map<String, String> newDefaultContent = 2350 SupplementalDataInfo.makeLocaleToDefaultContents( 2351 defaultLocaleContent, new TreeMap<String, String>(), errors); 2352 if (!errors.isEmpty()) { 2353 System.out.println("Default Content errors: " + Joiner.on("\n").join(errors)); 2354 errors.clear(); 2355 } 2356 Set<String> changes = 2357 compareMapsAndFixNew( 2358 "*WARNING* Default Content: ", 2359 oldDefaultContent, 2360 newDefaultContent, 2361 "ar", 2362 "ar_001"); 2363 System.out.println(Joiner.on("\n").join(changes)); 2364 defaultLocaleContent.clear(); 2365 defaultLocaleContent.addAll(newDefaultContent.values()); 2366 newDefaultContent = 2367 SupplementalDataInfo.makeLocaleToDefaultContents( 2368 defaultLocaleContent, new TreeMap<String, String>(), errors); 2369 if (!errors.isEmpty()) { 2370 System.out.println("***New Errors: " + Joiner.on("\n").join(errors)); 2371 } 2372 } 2373 compareMapsAndFixNew( String title, Map<String, String> oldContent, Map<String, String> newContent, String... allowedOverrideValues)2374 private static Set<String> compareMapsAndFixNew( 2375 String title, 2376 Map<String, String> oldContent, 2377 Map<String, String> newContent, 2378 String... allowedOverrideValues) { 2379 Map<String, String> allowedOverrideValuesTest = new HashMap<>(); 2380 for (int i = 0; i < allowedOverrideValues.length; i += 2) { 2381 allowedOverrideValuesTest.put(allowedOverrideValues[i], allowedOverrideValues[i + 1]); 2382 } 2383 Set<String> changes = new TreeSet<>(); 2384 for (String parent : 2385 Builder.with(new TreeSet<String>()) 2386 .addAll(newContent.keySet()) 2387 .addAll(oldContent.keySet()) 2388 .get()) { 2389 String oldValue = oldContent.get(parent); 2390 String newValue = newContent.get(parent); 2391 String overrideValue = allowedOverrideValuesTest.get(parent); 2392 if (overrideValue != null) { 2393 newContent.put(parent, overrideValue); 2394 newValue = overrideValue; 2395 } 2396 if (CldrUtility.equals(oldValue, newValue)) { 2397 continue; 2398 } 2399 String message; 2400 if (oldValue == null) { 2401 message = 2402 "Adding " 2403 + ConvertLanguageData.getLanguageCodeAndName(parent) 2404 + " => " 2405 + ConvertLanguageData.getLanguageCodeAndName(newValue); 2406 newContent.put(parent, newValue); 2407 } else if (newValue == null) { 2408 if (SUPPRESS_CHANGES) { 2409 message = 2410 "Suppressing removal of " 2411 + ConvertLanguageData.getLanguageCodeAndName(parent) 2412 + " => " 2413 + ConvertLanguageData.getLanguageCodeAndName(oldValue); 2414 newContent.put(parent, oldValue); 2415 } else { 2416 message = 2417 "Removing " 2418 + ConvertLanguageData.getLanguageCodeAndName(parent) 2419 + " => " 2420 + ConvertLanguageData.getLanguageCodeAndName(oldValue); 2421 newContent.remove(oldValue); 2422 } 2423 } else { 2424 if (SUPPRESS_CHANGES) { 2425 message = 2426 "Suppressing change of " 2427 + ConvertLanguageData.getLanguageCodeAndName(parent) 2428 + " => " 2429 + ConvertLanguageData.getLanguageCodeAndName(oldValue) 2430 + " to " 2431 + ConvertLanguageData.getLanguageCodeAndName(newValue); 2432 newContent.remove(newValue); 2433 newContent.put(parent, oldValue); 2434 } else { 2435 message = 2436 "Changing " 2437 + ConvertLanguageData.getLanguageCodeAndName(parent) 2438 + " => " 2439 + ConvertLanguageData.getLanguageCodeAndName(oldValue) 2440 + " to " 2441 + ConvertLanguageData.getLanguageCodeAndName(newValue); 2442 newContent.remove(oldValue); 2443 newContent.put(parent, newValue); 2444 } 2445 } 2446 changes.add(title + message); 2447 } 2448 return changes; 2449 } 2450 2451 public static class LocaleStringComparator implements Comparator<String> { 2452 LanguageTagParser ltp0 = new LanguageTagParser(); 2453 LanguageTagParser ltp1 = new LanguageTagParser(); 2454 2455 @Override compare(String arg0, String arg1)2456 public int compare(String arg0, String arg1) { 2457 ltp0.set(arg0); 2458 ltp1.set(arg1); 2459 String s0 = ltp0.getLanguage(); 2460 String s1 = ltp1.getLanguage(); 2461 int result = s0.compareTo(s1); 2462 if (result != 0) { 2463 return s0.equals(LocaleNames.UND) ? 1 : s1.equals(LocaleNames.UND) ? -1 : result; 2464 } 2465 s0 = ltp0.getScript(); 2466 s1 = ltp1.getScript(); 2467 result = s0.compareTo(s1); 2468 if (result != 0) { 2469 return result; 2470 } 2471 s0 = ltp0.getRegion(); 2472 s1 = ltp1.getRegion(); 2473 result = s0.compareTo(s1); 2474 if (result != 0) { 2475 return result; 2476 } 2477 return arg0.compareTo(arg1); // just in case 2478 } 2479 } 2480 } 2481