1 package org.unicode.cldr.unittest; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.collect.Sets; 5 import com.ibm.icu.dev.test.TestFmwk; 6 import com.ibm.icu.impl.Relation; 7 import com.ibm.icu.impl.Row.R2; 8 import java.io.IOException; 9 import java.util.ArrayList; 10 import java.util.Arrays; 11 import java.util.Collections; 12 import java.util.HashMap; 13 import java.util.HashSet; 14 import java.util.LinkedHashSet; 15 import java.util.List; 16 import java.util.Map; 17 import java.util.Map.Entry; 18 import java.util.Set; 19 import java.util.TreeMap; 20 import java.util.TreeSet; 21 import java.util.regex.Matcher; 22 import org.unicode.cldr.draft.ScriptMetadata; 23 import org.unicode.cldr.draft.ScriptMetadata.Info; 24 import org.unicode.cldr.tool.GenerateMaximalLocales; 25 import org.unicode.cldr.tool.LikelySubtags; 26 import org.unicode.cldr.util.Builder; 27 import org.unicode.cldr.util.CLDRConfig; 28 import org.unicode.cldr.util.CLDRFile; 29 import org.unicode.cldr.util.CLDRLocale; 30 import org.unicode.cldr.util.ChainedMap; 31 import org.unicode.cldr.util.ChainedMap.M3; 32 import org.unicode.cldr.util.CldrUtility; 33 import org.unicode.cldr.util.Iso3166Data; 34 import org.unicode.cldr.util.LanguageTagParser; 35 import org.unicode.cldr.util.LocaleIDParser; 36 import org.unicode.cldr.util.PatternCache; 37 import org.unicode.cldr.util.StandardCodes; 38 import org.unicode.cldr.util.SupplementalDataInfo; 39 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData; 40 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type; 41 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus; 42 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 43 44 public class TestInheritance extends TestFmwk { 45 46 static CLDRConfig testInfo = CLDRConfig.getInstance(); 47 48 private static boolean DEBUG = CldrUtility.getProperty("DEBUG", false); 49 50 private static Matcher pathMatcher = 51 PatternCache.get(CldrUtility.getProperty("XPATH", ".*")).matcher(""); 52 main(String[] args)53 public static void main(String[] args) throws IOException { 54 new TestInheritance().run(args); 55 } 56 57 private static final SupplementalDataInfo dataInfo = SupplementalDataInfo.getInstance(); 58 private static final Set<String> defaultContents = dataInfo.getDefaultContentLocales(); 59 60 private static final boolean EXPECT_EQUALITY = false; 61 62 private static Set<String> availableLocales = testInfo.getFullCldrFactory().getAvailable(); 63 TestLocalesHaveOfficial()64 public void TestLocalesHaveOfficial() { 65 // If we have a language, we have all the region locales where the 66 // language is official 67 Set<String> SKIP_TERRITORIES = new HashSet<>(Arrays.asList("001", "150")); 68 SKIP_TERRITORIES.addAll(Iso3166Data.getRegionCodesNotForTranslation()); 69 for (Entry<String, R2<List<String>, String>> s : 70 dataInfo.getLocaleAliasInfo().get("territory").entrySet()) { 71 SKIP_TERRITORIES.add(s.getKey()); 72 } 73 74 LanguageTagParser ltp = new LanguageTagParser(); 75 76 Relation<String, String> languageLocalesSeen = 77 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 78 79 Set<String> testOrg = StandardCodes.make().getLocaleCoverageLocales("google"); 80 ChainedMap.M4<String, OfficialStatus, String, Boolean> languageToOfficialChildren = 81 ChainedMap.of( 82 new TreeMap<String, Object>(), 83 new TreeMap<OfficialStatus, Object>(), 84 new TreeMap<String, Object>(), 85 Boolean.class); 86 87 // gather the data 88 89 for (String language : dataInfo.getLanguagesForTerritoriesPopulationData()) { 90 for (String territory : dataInfo.getTerritoriesForPopulationData(language)) { 91 if (SKIP_TERRITORIES.contains(territory)) { 92 continue; 93 } 94 PopulationData data = 95 dataInfo.getLanguageAndTerritoryPopulationData(language, territory); 96 OfficialStatus status = data.getOfficialStatus(); 97 if (data.getOfficialStatus() != OfficialStatus.unknown) { 98 String locale = removeScript(language + "_" + territory); 99 String lang = removeScript(ltp.set(locale).getLanguage()); 100 languageToOfficialChildren.put(lang, status, locale, Boolean.TRUE); 101 languageLocalesSeen.put(lang, locale); 102 } 103 } 104 } 105 106 // flesh it out by adding 'clean' codes. 107 // also get the child locales in cldr. 108 109 Relation<String, String> languageToChildren = 110 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 111 for (String locale : testInfo.getCldrFactory().getAvailable()) { 112 String lang = ltp.set(locale).getLanguage(); 113 if (SKIP_TERRITORIES.contains(ltp.getRegion())) { 114 continue; 115 } 116 lang = removeScript(lang); 117 locale = removeScript(locale); 118 119 if (!lang.equals(locale)) { 120 languageToChildren.put(lang, locale); 121 Set<String> localesSeen = languageLocalesSeen.get(lang); 122 if (localesSeen == null || !localesSeen.contains(locale)) { 123 languageToOfficialChildren.put( 124 lang, OfficialStatus.unknown, locale, Boolean.TRUE); 125 } 126 } 127 } 128 129 for (Entry<String, Set<String>> languageAndChildren : languageToChildren.keyValuesSet()) { 130 String language = languageAndChildren.getKey(); 131 Set<String> children = languageAndChildren.getValue(); 132 M3<OfficialStatus, String, Boolean> officalStatusToChildren = 133 languageToOfficialChildren.get(language); 134 for (Entry<OfficialStatus, Map<String, Boolean>> entry : officalStatusToChildren) { 135 OfficialStatus status = entry.getKey(); 136 if (status != OfficialStatus.official 137 && status != OfficialStatus.de_facto_official) { 138 continue; 139 } 140 Set<String> officalChildren = entry.getValue().keySet(); 141 if (!children.containsAll(officalChildren)) { 142 Set<String> missing = new TreeSet<>(officalChildren); 143 missing.removeAll(children); 144 String message = 145 "Missing CLDR locales for " + status + " languages: " + missing; 146 errln(message); 147 } else { 148 logln( 149 "CLDR locales " 150 + children 151 + " cover " 152 + status 153 + " locales " 154 + officalChildren); 155 } 156 } 157 } 158 159 if (DEBUG) { 160 Set<String> languages = new TreeSet<>(languageToChildren.keySet()); 161 languages.addAll(languageToOfficialChildren.keySet()); 162 System.out.print("\ncode\tlanguage"); 163 for (OfficialStatus status : OfficialStatus.values()) { 164 System.out.print("\tNo\t" + status); 165 } 166 System.out.println(); 167 for (String language : languages) { 168 if (!testOrg.contains(language)) { 169 continue; 170 } 171 System.out.print(language + "\t" + testInfo.getEnglish().getName(language)); 172 173 M3<OfficialStatus, String, Boolean> officialChildren = 174 languageToOfficialChildren.get(language); 175 for (OfficialStatus status : OfficialStatus.values()) { 176 Map<String, Boolean> children = officialChildren.get(status); 177 if (children == null) { 178 System.out.print("\t" + 0 + "\t"); 179 } else { 180 System.out.print( 181 "\t" + children.size() + "\t" + show(children.keySet(), false)); 182 } 183 } 184 System.out.println(); 185 } 186 } 187 } 188 show(Set<String> joint, boolean showStatus)189 private String show(Set<String> joint, boolean showStatus) { 190 StringBuffer b = new StringBuffer(); 191 for (String s : joint) { 192 if (b.length() != 0) { 193 b.append(", "); 194 } 195 LanguageTagParser ltp = new LanguageTagParser().set(s); 196 String script = ltp.getScript(); 197 if (script.length() != 0) { 198 b.append(testInfo.getEnglish().getName(CLDRFile.SCRIPT_NAME, script)); 199 } 200 String region = ltp.getRegion(); 201 if (region.length() != 0) { 202 if (script.length() != 0) { 203 b.append("-"); 204 } 205 b.append(testInfo.getEnglish().getName(CLDRFile.TERRITORY_NAME, region)); 206 } 207 b.append(" [").append(s); 208 if (showStatus) { 209 PopulationData data = 210 dataInfo.getLanguageAndTerritoryPopulationData(ltp.getLanguage(), region); 211 if (data == null) { 212 data = 213 dataInfo.getLanguageAndTerritoryPopulationData( 214 ltp.getLanguageScript(), region); 215 } 216 b.append("; "); 217 b.append(data == null ? "?" : data.getOfficialStatus()); 218 } 219 b.append("]"); 220 } 221 return b.toString(); 222 } 223 removeScript(String lang)224 private String removeScript(String lang) { 225 if (!lang.contains("_")) { 226 return lang; 227 } 228 LanguageTagParser ltp = new LanguageTagParser().set(lang); 229 // String ls = ltp.getLanguageScript(); 230 // if (defaultContents.contains(ls)) { 231 ltp.setScript(""); 232 // } 233 return ltp.toString(); 234 } 235 TestLikelyAndDefaultConsistency()236 public void TestLikelyAndDefaultConsistency() { 237 LikelySubtags likelySubtags = new LikelySubtags(); 238 LanguageTagParser ltp = new LanguageTagParser(); 239 // find multiscript locales 240 Relation<String, String> base2scripts = 241 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 242 Map<String, String> parent2default = new TreeMap<>(); 243 Map<String, String> default2parent = new TreeMap<>(); 244 Relation<String, String> base2locales = 245 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 246 247 Set<String> knownMultiScriptLanguages = new HashSet<>(Arrays.asList("bm", "ha")); 248 // get multiscript locales 249 for (String localeID : availableLocales) { 250 String script = ltp.set(localeID).getScript(); 251 final String base = ltp.getLanguage(); 252 if (!availableLocales.contains(base) && !base.equals("und")) { 253 errln("Missing lang-subtag base " + base + " for: " + localeID); 254 } 255 base2locales.put(base, localeID); 256 if (!script.isEmpty() && !base.equals("en")) { // HACK for en 257 base2scripts.put(base, script); 258 } 259 if (script.isEmpty() && knownMultiScriptLanguages.contains(base)) { 260 base2scripts.put(base, dataInfo.getDefaultScript(base)); 261 } 262 } 263 264 // get default contents 265 for (String localeID : defaultContents) { 266 checkLocale(localeID, false); 267 String parent = 268 LocaleIDParser.getParent(localeID); // was using getSimpleParent, not sure why 269 parent2default.put(parent, localeID); 270 default2parent.put(localeID, parent); 271 // if (!available.contains(simpleParent)) { 272 // // verify that base language has locale in CLDR (we don't want 273 // others) 274 // errln("Default contents contains locale not in CLDR:\t" + 275 // simpleParent); 276 // } 277 } 278 279 // get likely 280 Map<String, String> likely2Maximized = likelySubtags.getToMaximized(); 281 for (Entry<String, String> likelyAndMaximized : likely2Maximized.entrySet()) { 282 checkLocale(likelyAndMaximized.getKey(), true); 283 checkLocale(likelyAndMaximized.getValue(), true); 284 } 285 Map<String, String> exceptionDcLikely = new HashMap<>(); 286 Map<String, String> exceptionLikelyDc = new HashMap<>(); 287 for (String[] s : 288 new String[][] { 289 {"ar_001", "ar_Arab_EG"}, {"nb", "no_Latn_NO"}, 290 }) { 291 exceptionDcLikely.put(s[0], s[1]); 292 exceptionLikelyDc.put(s[1], s[0]); 293 } 294 295 verifyDefaultContentsImplicationsForLikelySubtags( 296 ltp, parent2default, likely2Maximized, exceptionDcLikely); 297 298 verifyLikelySubtagsImplicationsForDefaultContents( 299 ltp, base2scripts, parent2default, likely2Maximized, exceptionLikelyDc); 300 301 verifyScriptsWithDefaultContents(ltp, base2scripts, parent2default, base2locales); 302 } 303 TestParentLocaleRelationships()304 public void TestParentLocaleRelationships() { 305 // Testing invariant relationships between locales - See 306 // http://unicode.org/cldr/trac/ticket/5758 307 308 /* Examples: 309 <parentLocale parent="no" locales="nb nn no_NO"/> 310 default content locales distinguish the child locale that has identical content, such as: 311 ebu_KE ee_GH el_GR en_Dsrt_US en_Shaw_GB en_US eo_001 es_ES et_EE eu_ES ewo_CM 312 */ 313 Matcher langScript = PatternCache.get("^[a-z]{2,3}_[A-Z][a-z]{3}$").matcher(""); 314 for (final String loc : availableLocales) { 315 // we only check locales of the form: lang_script 316 if (langScript.reset(loc).matches()) { 317 if (ALLOW_DIFFERENT_PARENT_LOCALE.contains(loc)) { 318 // Skip any in that list 319 continue; 320 } 321 String languageSubtag = loc.split("_")[0]; 322 String expectedParent = languageSubtag; 323 if (!defaultContents.contains(loc)) { 324 expectedParent = "root"; 325 } 326 String truncationParent = LocaleIDParser.getSimpleParent(loc); 327 String actualParent = LocaleIDParser.getParent(loc); 328 boolean hasExplicitParent = !actualParent.equals(truncationParent); 329 330 if (!actualParent.equals(expectedParent)) { 331 errln( 332 "Unexpected parent locale for locale " 333 + loc 334 + ". Expected: " 335 + expectedParent 336 + " Got: " 337 + actualParent 338 + " " 339 + ALLOW_DIFFERENT_PARENT_LOCALE_MESSAGE); 340 } 341 342 if (hasExplicitParent && defaultContents.contains(loc)) { 343 errln( 344 "Locale " 345 + loc 346 + " can't have an explicit parent AND be a default content locale"); 347 } 348 } 349 } 350 } 351 352 final String ALLOW_DIFFERENT_PARENT_LOCALE_MESSAGE = 353 "See ALLOW_DIFFERENT_PARENT_LOCALE in TestInheritance.java"; 354 public final Set<String> ALLOW_DIFFERENT_PARENT_LOCALE = 355 Collections.unmodifiableSet( 356 Sets.newHashSet( 357 // Update this if additional locales have explicit parents in a 358 // different language code 359 360 // Per CLDR-2698/14493 we allow nb,nn to have an explicit parent no 361 // which is a different language code. 362 "nn", 363 "nb", 364 // Per CLDR-15276 hi-Latn can have an explicit parent 365 "hi_Latn")); 366 TestParentLocaleInvariants()367 public void TestParentLocaleInvariants() { 368 // Testing invariant relationships in parent locales - See 369 // http://unicode.org/cldr/trac/ticket/7887 370 CLDRLocale cldrRoot = CLDRLocale.getInstance("root"); 371 LikelySubtags likely = new LikelySubtags(); 372 for (String loc : availableLocales) { 373 CLDRLocale cldrLoc = CLDRLocale.getInstance(loc); 374 CLDRLocale cldrParent = cldrLoc.getParent(); 375 if (cldrParent != null) { 376 CLDRLocale locLikely = CLDRLocale.getInstance(likely.maximize(loc)); 377 CLDRLocale parentLikely = 378 CLDRLocale.getInstance(likely.maximize(cldrParent.toString())); 379 final String locLang = cldrLoc.getLanguage(); 380 final String locScript = cldrLoc.getScript(); 381 final String locRegion = cldrLoc.getCountry(); 382 final String parentLang = cldrParent.getLanguage(); 383 final boolean parentIsRoot = cldrRoot.equals(cldrParent); 384 if (!parentIsRoot 385 && !ALLOW_DIFFERENT_PARENT_LOCALE.contains(loc) 386 && !locLang.equals(parentLang)) { 387 errln( 388 "Parent locale [" 389 + cldrParent 390 + "] for locale [" 391 + loc 392 + "] cannot be a different language code. " 393 + ALLOW_DIFFERENT_PARENT_LOCALE_MESSAGE); 394 } 395 if (!parentIsRoot && !locLikely.getScript().equals(parentLikely.getScript())) { 396 errln( 397 "Parent locale [" 398 + cldrParent 399 + "] for locale [" 400 + loc 401 + "] cannot have a different script code."); 402 } 403 String cldrTruncationParent = LocaleIDParser.getSimpleParent(loc); 404 boolean hasExplicitParent = !cldrTruncationParent.equals(cldrParent.toString()); 405 if (hasExplicitParent 406 && parentIsRoot 407 && locScript.length() == 0 408 && locRegion.length() == 0 409 && !ALLOW_DIFFERENT_PARENT_LOCALE.contains(loc)) { 410 errln( 411 "Base language locale [" 412 + loc 413 + "] cannot have an explicit parent (" 414 + cldrParent 415 + ") " 416 + ALLOW_DIFFERENT_PARENT_LOCALE_MESSAGE); 417 } 418 } 419 } 420 } 421 TestParentLocalesForCycles()422 public void TestParentLocalesForCycles() { 423 // Testing for cyclic relationships in parent locales - See 424 // http://unicode.org/cldr/trac/ticket/7887 425 for (String loc : availableLocales) { 426 String currentLoc = loc; 427 boolean foundError = false; 428 List<String> inheritanceChain = new ArrayList<>(Arrays.asList(loc)); 429 while (currentLoc != null && !foundError) { 430 currentLoc = LocaleIDParser.getParent(currentLoc); 431 if (inheritanceChain.contains(currentLoc)) { 432 foundError = true; 433 inheritanceChain.add(currentLoc); 434 errln( 435 "Inheritance chain for locale [" 436 + loc 437 + "] contains a cyclic relationship. " 438 + inheritanceChain.toString()); 439 } 440 inheritanceChain.add(currentLoc); 441 } 442 } 443 } 444 verifyScriptsWithDefaultContents( LanguageTagParser ltp, Relation<String, String> base2scripts, Map<String, String> parent2default, Relation<String, String> base2locales)445 private void verifyScriptsWithDefaultContents( 446 LanguageTagParser ltp, 447 Relation<String, String> base2scripts, 448 Map<String, String> parent2default, 449 Relation<String, String> base2locales) { 450 Set<String> skip = Builder.with(new HashSet<String>()).addAll("root", "und").freeze(); 451 Set<String> languagesWithOneOrLessLocaleScriptInCommon = 452 new HashSet<>(Arrays.asList("bm", "ha", "hi", "ms", "iu", "mn")); 453 Set<String> baseLanguagesWhoseDefaultContentHasNoRegion = 454 new HashSet<>(Arrays.asList("no")); 455 // for each base we have to have, 456 // if multiscript, we have default contents for base+script, 457 // base+script+region; 458 // otherwise base+region. 459 for (String base : base2locales.keySet()) { 460 if (skip.contains(base)) { 461 continue; 462 } 463 String defaultContent = parent2default.get(base); 464 // Set<String> likely = base2likely.get(base); 465 // if (likely == null) { 466 // errln("Missing likely subtags for: " + base + " " + 467 // suggestLikelySubtagFor(base)); 468 // } 469 if (defaultContent == null) { 470 errln("Missing default content for: " + base + " " + suggestLikelySubtagFor(base)); 471 continue; 472 } 473 Set<String> scripts = base2scripts.get(base); 474 ltp.set(defaultContent); 475 String script = ltp.getScript(); 476 String region = ltp.getRegion(); 477 if (scripts == null || languagesWithOneOrLessLocaleScriptInCommon.contains(base)) { 478 if (!script.isEmpty()) { 479 errln( 480 "Script should be empty in default content for: " 481 + base 482 + "," 483 + defaultContent); 484 } 485 if (region.isEmpty() 486 && !baseLanguagesWhoseDefaultContentHasNoRegion.contains(base)) { 487 errln( 488 "Region must not be empty in default content for: " 489 + base 490 + "," 491 + defaultContent); 492 } 493 } else { 494 if (script.isEmpty()) { 495 errln( 496 "Script should not be empty in default content for: " 497 + base 498 + "," 499 + defaultContent); 500 } 501 if (!region.isEmpty()) { 502 errln( 503 "Region should be empty in default content for: " 504 + base 505 + "," 506 + defaultContent); 507 } 508 String defaultContent2 = parent2default.get(defaultContent); 509 if (defaultContent2 == null) { 510 errln("Missing default content for: " + defaultContent); 511 continue; 512 } 513 ltp.set(defaultContent2); 514 region = ltp.getRegion(); 515 if (region.isEmpty()) { 516 errln( 517 "Region must not be empty in default content for: " 518 + base 519 + "," 520 + defaultContent); 521 } 522 } 523 } 524 } 525 verifyLikelySubtagsImplicationsForDefaultContents( LanguageTagParser ltp, Relation<String, String> base2scripts, Map<String, String> parent2default, Map<String, String> likely2Maximized, Map<String, String> exceptionLikelyDc)526 private void verifyLikelySubtagsImplicationsForDefaultContents( 527 LanguageTagParser ltp, 528 Relation<String, String> base2scripts, 529 Map<String, String> parent2default, 530 Map<String, String> likely2Maximized, 531 Map<String, String> exceptionLikelyDc) { 532 // Now check invariants for all LikelySubtags implications for Default 533 // Contents 534 // a) suppose likely max for la_Scrp => la_Scrp_RG 535 // Then default contents la_Scrp => la_Scrp_RG 536 // b) suppose likely max for la_RG => la_Scrp_RG 537 // Then we can draw no conclusions // was default contents la_Scrp => 538 // la_Scrp_RG 539 // c) suppose likely max for la => la_Scrp_RG 540 // Then default contents la => la_Scrp && la_Scrp => la_Scrp_RG 541 // or default contents la => la_RG && ! la_Scrp => la_Scrp_RG 542 543 TreeSet<String> additionalDefaultContents = new TreeSet<>(); 544 545 for (Entry<String, String> entry : likely2Maximized.entrySet()) { 546 String source = entry.getKey(); 547 String likelyMax = entry.getValue(); 548 String sourceLang = ltp.set(source).getLanguage(); 549 if (sourceLang.equals("und") || source.equals("zh_Hani") || source.equals("tl")) { 550 continue; 551 } 552 String sourceScript = ltp.getScript(); 553 String sourceRegion = ltp.getRegion(); 554 555 String likelyMaxLang = ltp.set(likelyMax).getLanguage(); 556 String likelyMaxScript = ltp.getScript(); 557 String likelyMaxRegion = ltp.getRegion(); 558 559 String dc = parent2default.get(source); 560 String possibleException = exceptionLikelyDc.get(likelyMax); 561 if (possibleException != null && possibleException.equals(dc)) { 562 continue; 563 } 564 String likelyLangScript = likelyMaxLang + "_" + likelyMaxScript; 565 String dcFromLangScript = parent2default.get(likelyLangScript); 566 567 boolean consistent = true; 568 String caseNumber = null; 569 if (consistent) { 570 if (!sourceScript.isEmpty()) { 571 caseNumber = "a"; 572 if (dc == null) { 573 if (EXPECT_EQUALITY) { 574 String expected = likelyMax; 575 errln( 576 "Default contents null for " 577 + source 578 + ", expected:\t" 579 + expected); 580 additionalDefaultContents.add(expected); 581 } 582 continue; 583 } 584 consistent = likelyMax.equals(dc); 585 } else if (!sourceRegion.isEmpty()) { // a 586 caseNumber = "b"; 587 // consistent = likelyMax.equals(dcFromLangScript); 588 } else { // c 589 caseNumber = "c"; 590 if (dc == null) { 591 if (EXPECT_EQUALITY) { 592 String expected = 593 base2scripts.get(source) == null 594 ? likelyMaxLang + "_" + likelyMaxRegion 595 : likelyMaxLang + "_" + likelyMaxScript; 596 errln( 597 "Default contents null for " 598 + source 599 + ", expected:\t" 600 + expected); 601 additionalDefaultContents.add(expected); 602 } 603 continue; 604 } 605 String dcScript = ltp.set(dc).getScript(); 606 consistent = 607 likelyLangScript.equals(dc) && likelyMax.equals(dcFromLangScript) 608 || dcScript.isEmpty() && !likelyMax.equals(dcFromLangScript); 609 // || dcScript.isEmpty() && dcRegion.equals(likelyMaxRegion) 610 // && dcFromLangScript == null; 611 } 612 } 613 if (!consistent) { 614 errln( 615 "default contents inconsistent with likely subtag: (" 616 + caseNumber 617 + ")" 618 + "\n\t" 619 + source 620 + " => (ls) " 621 + likelyMax 622 + "\n\t" 623 + source 624 + " => (dc) " 625 + dc 626 + "\n\t" 627 + likelyLangScript 628 + " => (dc) " 629 + dcFromLangScript); 630 } 631 } 632 if (additionalDefaultContents.size() != 0) { 633 errln( 634 "Suggested additions to supplementalMetadata/../defaultContent:\n" 635 + Joiner.on(" ").join(additionalDefaultContents)); 636 } 637 } 638 verifyDefaultContentsImplicationsForLikelySubtags( LanguageTagParser ltp, Map<String, String> parent2default, Map<String, String> likely2Maximized, Map<String, String> exceptionDcLikely)639 private void verifyDefaultContentsImplicationsForLikelySubtags( 640 LanguageTagParser ltp, 641 Map<String, String> parent2default, 642 Map<String, String> likely2Maximized, 643 Map<String, String> exceptionDcLikely) { 644 // Now check invariants for all Default Contents implications for 645 // LikelySubtags 646 // a) suppose default contents la => la_Scrp. 647 // Then the likely contents for la => la_Scrp_* 648 // b) suppose default contents la => la_RG. 649 // Then the likely contents for la => la_*_RG 650 // c) suppose default contents la_Scrp => la_Scrp_RG. 651 // Then the likely contents of la_Scrp => la_Scrp_RG OR likely contents 652 // for la => la_*_* 653 for (Entry<String, String> parentAndDefault : parent2default.entrySet()) { 654 String source = parentAndDefault.getKey(); 655 String dc = parentAndDefault.getValue(); 656 String likelyMax = likely2Maximized.get(source); 657 658 // skip special exceptions 659 String possibleException = exceptionDcLikely.get(dc); 660 if (possibleException != null && possibleException.equals(likelyMax)) { 661 continue; 662 } 663 664 String sourceLang = ltp.set(source).getLanguage(); 665 String sourceScript = ltp.getScript(); 666 // there cannot be a sourceRegion 667 668 String dcScript = ltp.set(dc).getScript(); 669 String dcRegion = ltp.getRegion(); 670 671 String likelyMaxLang = "", likelyMaxScript = "", likelyMaxRegion = ""; 672 if (likelyMax != null) { 673 likelyMaxLang = ltp.set(likelyMax).getLanguage(); 674 likelyMaxScript = ltp.getScript(); 675 likelyMaxRegion = ltp.getRegion(); 676 } 677 678 String likelyMax2 = likely2Maximized.get(sourceLang); 679 680 boolean consistent = true; 681 682 if (sourceScript.isEmpty()) { // a or b 683 if (!dcScript.isEmpty()) { // a 684 consistent = likelyMaxLang.equals(source) && likelyMaxScript.equals(dcScript); 685 } else { // b 686 consistent = likelyMaxLang.equals(source) && likelyMaxRegion.equals(dcRegion); 687 } 688 } else { // c 689 consistent = dc.equals(likelyMax) || likelyMax2 != null; 690 } 691 if (!consistent) { 692 errln( 693 "likely subtag inconsistent with default contents: " 694 + "\n\t" 695 + source 696 + " =>( dc) " 697 + dc 698 + "\n\t" 699 + source 700 + " => (ls) " 701 + likelyMax 702 + (source.equals(sourceLang) 703 ? "" 704 : "\n\t" + sourceLang + " => (ls) " + likelyMax2)); 705 } 706 } 707 } 708 709 /** 710 * Suggest a likely subtag 711 * 712 * @param base 713 * @return 714 */ suggestLikelySubtagFor(String base)715 static String suggestLikelySubtagFor(String base) { 716 SupplementalDataInfo sdi = SupplementalDataInfo.getInstance(); 717 718 CLDRLocale loc = CLDRLocale.getInstance(base); 719 720 if (!loc.getLanguage().equals(base)) { 721 return " (no suggestion- not a simple language locale)"; // no 722 // suggestion 723 // unless 724 // just 725 // a 726 // language 727 // locale. 728 } 729 Set<BasicLanguageData> basicData = sdi.getBasicLanguageData(base); 730 731 for (BasicLanguageData bld : basicData) { 732 if (bld.getType() 733 == org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type.primary) { 734 Set<String> scripts = bld.getScripts(); 735 Set<String> territories = bld.getTerritories(); 736 737 if (scripts.size() == 1) { 738 if (territories.size() == 1) { 739 return createSuggestion( 740 loc, 741 CLDRLocale.getInstance( 742 base 743 + "_" 744 + scripts.iterator().next() 745 + "_" 746 + territories.iterator().next())); 747 } 748 } 749 return "(no suggestion - multiple scripts or territories)"; 750 } 751 } 752 return ("(no suggestion- no data)"); 753 } 754 755 /** Format and return a suggested likelysubtag */ createSuggestion(CLDRLocale loc, CLDRLocale toLoc)756 private static String createSuggestion(CLDRLocale loc, CLDRLocale toLoc) { 757 return " Suggest this to likelySubtags.xml: <likelySubtag from=\"" 758 + loc 759 + "\" to=\"" 760 + toLoc 761 + "\"/>\n" 762 + " <!--{ " 763 + loc.getDisplayName() 764 + "; ?; ? } => { " 765 + loc.getDisplayName() 766 + "; " 767 + toLoc.toULocale().getDisplayScript() 768 + "; " 769 + toLoc.toULocale().getDisplayCountry() 770 + " }-->"; 771 } 772 TestDeprecatedTerritoryDataLocaleIds()773 public void TestDeprecatedTerritoryDataLocaleIds() { 774 HashSet<String> checked = new HashSet<>(); 775 for (String language : dataInfo.getLanguagesForTerritoriesPopulationData()) { 776 checkLocale(language, false); // checks la_Scrp and la 777 for (String region : dataInfo.getTerritoriesForPopulationData(language)) { 778 if (!checked.contains(region)) { 779 checkValidCode(language + "_" + region, "territory", region, false); 780 checked.add(region); 781 } 782 } 783 } 784 for (String language : dataInfo.getBasicLanguageDataLanguages()) { 785 checkLocale(language, false); // checks la_Scrp and la 786 Set<BasicLanguageData> data = dataInfo.getBasicLanguageData(language); 787 for (BasicLanguageData datum : data) { 788 for (String script : datum.getScripts()) { 789 checkValidCode(language + "_" + script, "script", script, false); 790 checked.add(script); 791 } 792 for (String region : datum.getTerritories()) { 793 checkValidCode(language + "_" + region, "territory", region, false); 794 checked.add(region); 795 } 796 } 797 } 798 } 799 TestBasicLanguageDataAgainstScriptMetadata()800 public void TestBasicLanguageDataAgainstScriptMetadata() { 801 // the invariants are: 802 // if there is primary data, the script must be there 803 // otherwise it must be in the secondary 804 main: 805 for (String script : ScriptMetadata.getScripts()) { 806 Info info = ScriptMetadata.getInfo(script); 807 String language = info.likelyLanguage; 808 if (language.equals("und")) { 809 continue; 810 } 811 Map<Type, BasicLanguageData> data = dataInfo.getBasicLanguageDataMap(language); 812 if (data == null) { 813 logln( 814 "Warning: ScriptMetadata has " 815 + language 816 + " for " 817 + script 818 + "," 819 + " but " 820 + language 821 + " is missing in language_script.txt"); 822 continue; 823 } 824 for (BasicLanguageData entry : data.values()) { 825 if (entry.getScripts().contains(script)) { 826 continue main; 827 } 828 continue; 829 } 830 logln( 831 "Warning: ScriptMetadata has " 832 + language 833 + " for " 834 + script 835 + "," 836 + " but " 837 + language 838 + " doesn't have " 839 + script 840 + " in language_script.txt"); 841 } 842 } 843 TestCldrFileConsistency()844 public void TestCldrFileConsistency() { 845 boolean haveErrors = false; 846 for (String locale : testInfo.getCldrFactory().getAvailable()) { 847 CLDRFile cldrFileToCheck = testInfo.getCLDRFile(locale, false); 848 int errors = 0; 849 for (String path : cldrFileToCheck) { 850 if (!pathMatcher.reset(path).find()) { 851 continue; 852 } 853 String fullPath = cldrFileToCheck.getFullXPath(path); 854 if (fullPath == null) { 855 // try again, for debugging 856 fullPath = cldrFileToCheck.getFullXPath(path); 857 String value = cldrFileToCheck.getStringValue(path); 858 if (DEBUG) { 859 errln( 860 "Invalid full path\t" 861 + locale 862 + ", " 863 + path 864 + ", " 865 + fullPath 866 + ", " 867 + value); 868 } 869 errors++; 870 haveErrors = true; 871 } 872 } 873 if (errors != 0) { 874 errln(locale + (errors != 0 ? "\tinvalid getFullXPath() values:" + errors : "")); 875 } else { 876 logln(locale); 877 } 878 } 879 if (haveErrors && !DEBUG) { 880 errln("Use -DDEBUG to see details"); 881 } 882 } 883 884 static SupplementalDataInfo info = SupplementalDataInfo.getInstance(); 885 LanguageTagParser ltp = new LanguageTagParser(); 886 887 Matcher aliasMatcher = PatternCache.get("//ldml.*/alias.*").matcher(""); 888 minimize(Map<String, String> likelySubtags, String locale)889 private String minimize(Map<String, String> likelySubtags, String locale) { 890 String result = GenerateMaximalLocales.minimize(locale, likelySubtags, false); 891 if (result == null) { 892 LanguageTagParser ltp3 = new LanguageTagParser().set(locale); 893 List<String> variants = ltp3.getVariants(); 894 Map<String, String> extensions = ltp3.getExtensions(); 895 Set<String> emptySet = Collections.emptySet(); 896 ltp3.setVariants(emptySet); 897 Map<String, String> emptyMap = Collections.emptyMap(); 898 ltp3.setExtensions(emptyMap); 899 String newLocale = ltp3.toString(); 900 result = GenerateMaximalLocales.minimize(newLocale, likelySubtags, false); 901 if (result != null) { 902 ltp3.set(result); 903 ltp3.setVariants(variants); 904 ltp3.setExtensions(extensions); 905 result = ltp3.toString(); 906 } 907 } 908 return result; 909 } 910 maximize(Map<String, String> likelySubtags, String locale)911 private String maximize(Map<String, String> likelySubtags, String locale) { 912 String result = GenerateMaximalLocales.maximize(locale, likelySubtags); 913 if (result == null) { 914 LanguageTagParser ltp3 = new LanguageTagParser().set(locale); 915 List<String> variants = ltp3.getVariants(); 916 Map<String, String> extensions = ltp3.getExtensions(); 917 Set<String> emptySet = Collections.emptySet(); 918 ltp3.setVariants(emptySet); 919 Map<String, String> emptyMap = Collections.emptyMap(); 920 ltp3.setExtensions(emptyMap); 921 String newLocale = ltp3.toString(); 922 result = GenerateMaximalLocales.maximize(newLocale, likelySubtags); 923 if (result != null) { 924 ltp3.set(result); 925 ltp3.setVariants(variants); 926 ltp3.setExtensions(extensions); 927 result = ltp3.toString(); 928 } 929 } 930 return result; 931 } 932 933 // TODO move this into central utilities equals(CharSequence string, int codePoint)934 public static boolean equals(CharSequence string, int codePoint) { 935 if (string == null) { 936 return false; 937 } 938 switch (string.length()) { 939 case 1: 940 return codePoint == string.charAt(0); 941 case 2: 942 return codePoint >= 0x10000 && codePoint == Character.codePointAt(string, 0); 943 default: 944 return false; 945 } 946 } 947 948 // TODO move this into central utilities 949 950 private static final StandardCodes STANDARD_CODES = StandardCodes.make(); 951 private static final Map<String, Map<String, R2<List<String>, String>>> DEPRECATED_INFO = 952 dataInfo.getLocaleAliasInfo(); 953 checkLocale(String localeID, boolean allowDeprecated)954 private void checkLocale(String localeID, boolean allowDeprecated) { 955 // verify that the localeID is valid 956 LanguageTagParser ltp = new LanguageTagParser().set(localeID); 957 String language = ltp.getLanguage(); 958 String script = ltp.getScript(); 959 String region = ltp.getRegion(); 960 // TODO check variants, extensions also. 961 checkValidCode(localeID, "language", language, allowDeprecated); 962 checkValidCode(localeID, "script", script, allowDeprecated); 963 checkValidCode(localeID, "territory", region, allowDeprecated); 964 } 965 checkValidCode( String localeID, String subtagType, String subtag, boolean allowDeprecated)966 private void checkValidCode( 967 String localeID, String subtagType, String subtag, boolean allowDeprecated) { 968 if (subtagType.equals("language")) { 969 if (subtag.equals("und")) { 970 return; 971 } 972 } else { 973 if (subtag.isEmpty()) { 974 return; 975 } 976 } 977 if (!STANDARD_CODES.getAvailableCodes(subtagType).contains(subtag)) { 978 errln("Locale " + localeID + " contains illegal " + showCode(subtagType, subtag)); 979 } else if (!allowDeprecated) { 980 // "language" -> "sh" -> <{"sr_Latn"}, reason> 981 R2<List<String>, String> deprecatedInfo = DEPRECATED_INFO.get(subtagType).get(subtag); 982 if (deprecatedInfo != null) { 983 errln( 984 "Locale " 985 + localeID 986 + " contains deprecated " 987 + showCode(subtagType, subtag) 988 + " " 989 + deprecatedInfo.get1() 990 + "; suggest " 991 + showName(deprecatedInfo.get0(), subtagType)); 992 } 993 } 994 } 995 showName(List<String> deprecatedInfo, String subtagType)996 private String showName(List<String> deprecatedInfo, String subtagType) { 997 StringBuilder result = new StringBuilder(); 998 for (String s : deprecatedInfo) { 999 result.append(showName(subtagType, s)).append(" "); 1000 } 1001 return result.toString(); 1002 } 1003 showCode(String subtagType, String subtag)1004 private String showCode(String subtagType, String subtag) { 1005 return subtagType + " code: " + showName(subtagType, subtag); 1006 } 1007 showName(String subtagType, String subtag)1008 private String showName(String subtagType, String subtag) { 1009 return subtag + " (" + getName(subtagType, subtag) + ")"; 1010 } 1011 getName(String subtagType, String subtag)1012 private String getName(String subtagType, String subtag) { 1013 Map<String, String> data = STANDARD_CODES.getLangData(subtagType, subtag); 1014 if (data == null) { 1015 return "<no name>"; 1016 } 1017 return data.get("Description"); 1018 } 1019 1020 // TODO move this into central utilities equals(int codePoint, CharSequence string)1021 public static boolean equals(int codePoint, CharSequence string) { 1022 return equals(string, codePoint); 1023 } 1024 1025 // TODO move this into central utilities equals(Object a, Object b)1026 public static boolean equals(Object a, Object b) { 1027 return a == b ? true : a == null || b == null ? false : a.equals(b); 1028 } 1029 1030 // TODO move this into central utilities showDifferences(Map<K, V> a, Map<K, V> b)1031 private <K, V> String showDifferences(Map<K, V> a, Map<K, V> b) { 1032 StringBuilder result = new StringBuilder(); 1033 Set<K> keys = new LinkedHashSet<>(); 1034 keys.addAll(a.keySet()); 1035 keys.addAll(b.keySet()); 1036 for (K key : keys) { 1037 if (!a.containsKey(key)) { 1038 result.append(key).append("→‹").append(a.get(key)).append("›,∅; "); 1039 } else if (!b.containsKey(key)) { 1040 result.append(key).append("→∅,‹").append(b.get(key)).append("›; "); 1041 } else { 1042 V aKey = a.get(key); 1043 V bKey = b.get(key); 1044 if (!equals(aKey, bKey)) { 1045 result.append(key) 1046 .append("→‹") 1047 .append(a.get(key)) 1048 .append("›,‹") 1049 .append(b.get(key)) 1050 .append("›; "); 1051 } 1052 } 1053 } 1054 return result.toString(); 1055 } 1056 TestLanguageTagParser()1057 public void TestLanguageTagParser() { 1058 LanguageTagParser ltp = new LanguageTagParser(); 1059 ltp.set("en-Cyrl-US"); 1060 assertEquals(null, "en", ltp.getLanguage()); 1061 assertEquals(null, "en_Cyrl", ltp.getLanguageScript()); 1062 assertEquals(null, "Cyrl", ltp.getScript()); 1063 assertEquals(null, "US", ltp.getRegion()); 1064 try { 1065 ltp.set("$"); 1066 assertFalse("expected exception", true); 1067 } catch (Exception e) { 1068 logln(e.getMessage()); 1069 } 1070 } 1071 TestParentChain()1072 public void TestParentChain() { 1073 String[][] tests = { 1074 {"en_DE", "[en_150, en_001, en, root]"}, 1075 {"fr_CA", "[fr, root]"}, 1076 {"fr", "[root]"}, 1077 {"root", "[]"}, 1078 }; 1079 1080 for (String[] test : tests) { 1081 assertEquals(test[0], test[1], LocaleIDParser.getParentChain(test[0]).toString()); 1082 } 1083 } 1084 } 1085