1 package org.unicode.cldr.unittest; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.collect.ImmutableSet; 5 import com.google.common.collect.Multimap; 6 import com.google.common.collect.TreeMultimap; 7 import com.ibm.icu.impl.Relation; 8 import com.ibm.icu.impl.Row; 9 import com.ibm.icu.impl.Row.R2; 10 import com.ibm.icu.impl.Row.R3; 11 import com.ibm.icu.impl.Utility; 12 import com.ibm.icu.impl.number.DecimalQuantity; 13 import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD; 14 import com.ibm.icu.lang.UCharacter; 15 import com.ibm.icu.lang.UCharacterEnums; 16 import com.ibm.icu.lang.UScript; 17 import com.ibm.icu.text.PluralRules; 18 import com.ibm.icu.text.PluralRules.DecimalQuantitySamples; 19 import com.ibm.icu.text.PluralRules.DecimalQuantitySamplesRange; 20 import com.ibm.icu.text.PluralRules.SampleType; 21 import com.ibm.icu.text.StringTransform; 22 import com.ibm.icu.text.UnicodeSet; 23 import com.ibm.icu.util.Output; 24 import com.ibm.icu.util.TimeZone; 25 import com.ibm.icu.util.ULocale; 26 import java.util.ArrayList; 27 import java.util.Arrays; 28 import java.util.Collection; 29 import java.util.Collections; 30 import java.util.Date; 31 import java.util.EnumMap; 32 import java.util.EnumSet; 33 import java.util.HashMap; 34 import java.util.HashSet; 35 import java.util.Iterator; 36 import java.util.LinkedHashMap; 37 import java.util.LinkedHashSet; 38 import java.util.List; 39 import java.util.Locale; 40 import java.util.Map; 41 import java.util.Map.Entry; 42 import java.util.Set; 43 import java.util.TreeMap; 44 import java.util.TreeSet; 45 import java.util.logging.Logger; 46 import java.util.regex.Matcher; 47 import java.util.regex.Pattern; 48 import java.util.stream.Collectors; 49 import org.unicode.cldr.draft.ScriptMetadata; 50 import org.unicode.cldr.test.CoverageLevel2; 51 import org.unicode.cldr.tool.LikelySubtags; 52 import org.unicode.cldr.tool.PluralMinimalPairs; 53 import org.unicode.cldr.tool.PluralRulesFactory; 54 import org.unicode.cldr.util.Builder; 55 import org.unicode.cldr.util.CLDRConfig; 56 import org.unicode.cldr.util.CLDRFile; 57 import org.unicode.cldr.util.CLDRFile.WinningChoice; 58 import org.unicode.cldr.util.CLDRLocale; 59 import org.unicode.cldr.util.CLDRURLS; 60 import org.unicode.cldr.util.CldrUtility; 61 import org.unicode.cldr.util.DateConstants; 62 import org.unicode.cldr.util.GrammarInfo; 63 import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature; 64 import org.unicode.cldr.util.GrammarInfo.GrammaticalScope; 65 import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget; 66 import org.unicode.cldr.util.Iso3166Data; 67 import org.unicode.cldr.util.Iso639Data; 68 import org.unicode.cldr.util.Iso639Data.Scope; 69 import org.unicode.cldr.util.IsoCurrencyParser; 70 import org.unicode.cldr.util.LanguageTagCanonicalizer; 71 import org.unicode.cldr.util.LanguageTagParser; 72 import org.unicode.cldr.util.Level; 73 import org.unicode.cldr.util.LocaleNames; 74 import org.unicode.cldr.util.Organization; 75 import org.unicode.cldr.util.Pair; 76 import org.unicode.cldr.util.PluralRanges; 77 import org.unicode.cldr.util.PreferredAndAllowedHour; 78 import org.unicode.cldr.util.PreferredAndAllowedHour.HourStyle; 79 import org.unicode.cldr.util.StandardCodes; 80 import org.unicode.cldr.util.StandardCodes.CodeType; 81 import org.unicode.cldr.util.StandardCodes.LstrType; 82 import org.unicode.cldr.util.SupplementalDataInfo; 83 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData; 84 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type; 85 import org.unicode.cldr.util.SupplementalDataInfo.ContainmentStyle; 86 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo; 87 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyNumberInfo; 88 import org.unicode.cldr.util.SupplementalDataInfo.DateRange; 89 import org.unicode.cldr.util.SupplementalDataInfo.MetaZoneRange; 90 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus; 91 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 92 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 93 import org.unicode.cldr.util.SupplementalDataInfo.PluralType; 94 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 95 import org.unicode.cldr.util.SupplementalDataInfo.SampleList; 96 import org.unicode.cldr.util.Validity; 97 import org.unicode.cldr.util.Validity.Status; 98 99 public class TestSupplementalInfo extends TestFmwkPlus { 100 static CLDRConfig testInfo = CLDRConfig.getInstance(); 101 102 private static final StandardCodes STANDARD_CODES = StandardCodes.make(); 103 104 private static final SupplementalDataInfo SUPPLEMENTAL = testInfo.getSupplementalDataInfo(); 105 main(String[] args)106 public static void main(String[] args) { 107 new TestSupplementalInfo().run(args); 108 } 109 TestPluralSampleOrder()110 public void TestPluralSampleOrder() { 111 HashSet<PluralInfo> seen = new HashSet<>(); 112 for (String locale : SUPPLEMENTAL.getPluralLocales()) { 113 if (locale.equals(LocaleNames.ROOT)) { 114 continue; 115 } 116 PluralInfo pi = SUPPLEMENTAL.getPlurals(locale); 117 if (seen.contains(pi)) { 118 continue; 119 } 120 seen.add(pi); 121 for (SampleType s : SampleType.values()) { 122 for (Count c : pi.getCounts(s)) { 123 DecimalQuantitySamples sSamples = 124 pi.getPluralRules().getDecimalSamples(c.toString(), s); 125 if (sSamples == null) { 126 errln(locale + " no sample for " + c); 127 continue; 128 } 129 if (s == SampleType.DECIMAL) { 130 continue; // skip 131 } 132 DecimalQuantitySamplesRange lastSample = null; 133 for (DecimalQuantitySamplesRange sample : sSamples.getSamples()) { 134 if (lastSample != null) { 135 if (compare(lastSample.start, sample.start) > 0) { 136 errln( 137 locale 138 + ":" 139 + c 140 + ": out of order with " 141 + lastSample 142 + " > " 143 + sample); 144 } else if (false) { 145 logln( 146 locale 147 + ":" 148 + c 149 + ": in order with " 150 + lastSample 151 + " < " 152 + sample); 153 } 154 } 155 lastSample = sample; 156 } 157 } 158 } 159 } 160 } 161 compare(DecimalQuantity me, DecimalQuantity other)162 public static int compare(DecimalQuantity me, DecimalQuantity other) { 163 // We place exponent notation samples entirely after ones without exponent 164 if (me.getExponent() != other.getExponent()) { 165 return me.getExponent() < other.getExponent() ? -1 : 1; 166 } 167 168 return (int) (me.toDouble() - other.toDouble()); 169 } 170 TestPluralRanges()171 public void TestPluralRanges() { 172 PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL); 173 Set<String> localesToTest = new TreeSet<>(SUPPLEMENTAL.getPluralRangesLocales()); 174 for (String locale : StandardCodes.make().getLocaleCoverageLocales("google")) { // superset 175 if (locale.equals("*") || locale.contains("_")) { 176 continue; 177 } 178 localesToTest.add(locale); 179 } 180 Set<String> modernLocales = 181 StandardCodes.make() 182 .getLocaleCoverageLocales(Organization.cldr, EnumSet.of(Level.MODERN)); 183 184 Output<DecimalQuantity> maxSample = new Output<>(); 185 Output<DecimalQuantity> minSample = new Output<>(); 186 187 for (String locale : localesToTest) { 188 final String templateLine = 189 "Template for " 190 + ULocale.getDisplayName(locale, "en") 191 + " (" 192 + locale 193 + ") translators to fix:"; 194 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale); 195 Set<Count> counts = pluralInfo.getCounts(); 196 197 final PluralMinimalPairs samplePatterns = 198 PluralMinimalPairs.getInstance(new ULocale(locale).toString()); 199 200 // check that there are no null values 201 PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale); 202 if (pluralRanges == null) { 203 if (!modernLocales.contains(locale)) { 204 logln("Missing plural ranges for " + locale); 205 } else { 206 errOrLog( 207 CoverageIssue.error, 208 locale + "\tMissing plural ranges", 209 "Cldrbug:7839", 210 "Missing plural data for modern locales"); 211 StringBuilder failureCases = new StringBuilder(templateLine); 212 for (Count start : counts) { 213 for (Count end : counts) { 214 pluralInfo.rangeExists(start, end, minSample, maxSample); 215 final String rangeLine = 216 getRangeLine( 217 start, end, null, maxSample, minSample, samplePatterns); 218 failureCases.append("\n" + locale + "\t" + rangeLine); 219 } 220 } 221 errOrLog(CoverageIssue.warn, failureCases.toString()); 222 } 223 continue; 224 } 225 EnumSet<Count> found = EnumSet.noneOf(Count.class); 226 for (Count count : Count.values()) { 227 if (pluralRanges.isExplicitlySet(count) && !counts.contains(count)) { 228 assertTrue( 229 locale 230 + "\t pluralRanges categories must be valid for locale:\t" 231 + count 232 + " must be in " 233 + counts, 234 !pluralRanges.isExplicitlySet(count)); 235 } 236 for (Count end : Count.values()) { 237 Count result = pluralRanges.getExplicit(count, end); 238 if (result != null) { 239 found.add(result); 240 } 241 } 242 } 243 244 // check empty range results 245 if (found.isEmpty()) { 246 errOrLog( 247 CoverageIssue.error, 248 "Empty range results for " + locale, 249 "Cldrbug:7839", 250 "Missing plural data for modern locales"); 251 } else { 252 if (samplePatterns == null) { 253 errOrLog( 254 CoverageIssue.error, 255 locale + "\tMissing sample patterns", 256 "Cldrbug:7839", 257 "Missing plural data for modern locales"); 258 } else { 259 for (Count result : found) { 260 String samplePattern = 261 samplePatterns.get(PluralRules.PluralType.CARDINAL, result); 262 if (samplePattern != null && !samplePattern.contains("{0}")) { 263 errln( 264 "Plural Ranges cannot have results that don't use {0} in samples: " 265 + locale 266 + ", " 267 + result 268 + "\t«" 269 + samplePattern 270 + "»"); 271 } 272 } 273 } 274 if (isVerbose()) { 275 logln("Range results for " + locale + ":\t" + found); 276 } 277 } 278 279 // check for missing values 280 boolean failure = false; 281 StringBuilder failureCases = new StringBuilder(templateLine); 282 for (Count start : counts) { 283 for (Count end : counts) { 284 boolean needsValue = pluralInfo.rangeExists(start, end, minSample, maxSample); 285 Count explicitValue = pluralRanges.getExplicit(start, end); 286 final String rangeLine = 287 getRangeLine( 288 start, 289 end, 290 explicitValue, 291 maxSample, 292 minSample, 293 samplePatterns); 294 failureCases.append("\n" + locale + "\t" + rangeLine); 295 if (needsValue && explicitValue == null) { 296 errOrLog( 297 CoverageIssue.error, 298 locale + "\tNo explicit value for range: " + rangeLine, 299 "Cldrbug:7839", 300 "Missing plural data for modern locales"); 301 failure = true; 302 failureCases.append("\tError — need explicit result"); 303 } else if (!needsValue && explicitValue != null) { 304 errOrLog( 305 CoverageIssue.error, 306 locale 307 + "\tDoesn't need explicit value, but has one: " 308 + PluralRanges.showRange(start, end, explicitValue), 309 "Cldrbug:7839", 310 "Missing plural data for modern locales"); 311 failureCases.append("\tUnnecessary"); 312 failure = true; 313 } else { 314 failureCases.append("\tOK"); 315 } 316 } 317 } 318 if (failure) { 319 errOrLog(CoverageIssue.warn, failureCases.toString()); 320 } 321 } 322 } 323 getRangeLine( Count start, Count end, Count result, Output<DecimalQuantity> maxSample, Output<DecimalQuantity> minSample, PluralMinimalPairs samplePatterns)324 private String getRangeLine( 325 Count start, 326 Count end, 327 Count result, 328 Output<DecimalQuantity> maxSample, 329 Output<DecimalQuantity> minSample, 330 PluralMinimalPairs samplePatterns) { 331 final String range = minSample + "–" + maxSample; 332 String example = range; 333 if (samplePatterns != null) { 334 example = ""; 335 if (result != null) { 336 String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, result); 337 example += 338 "«" + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) + "»"; 339 } else { 340 for (Count c : new TreeSet<>(Arrays.asList(start, end, Count.other))) { 341 String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, c); 342 example += 343 c 344 + ":«" 345 + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) 346 + "»" 347 + "?\tOR "; 348 } 349 example += " …"; 350 } 351 } 352 return start 353 + "\t" 354 + end 355 + "\t" 356 + (result == null ? "?" : result.toString()) 357 + "\t" 358 + example; 359 } 360 getRangeLine(Count count, PluralRules pluralRules, String pattern)361 private String getRangeLine(Count count, PluralRules pluralRules, String pattern) { 362 String sample = "?"; 363 DecimalQuantitySamples exampleList = 364 pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.INTEGER); 365 if (exampleList == null) { 366 exampleList = 367 pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.DECIMAL); 368 } 369 DecimalQuantity sampleDecimal = PluralInfo.getNonZeroSampleIfPossible(exampleList); 370 sample = sampleDecimal.toString(); 371 372 String example = 373 pattern == null ? "NO-SAMPLE!" : "«" + pattern.replace("{0}", sample) + "»"; 374 return count + "\t" + example; 375 } 376 TestPluralSamples()377 public void TestPluralSamples() { 378 String[][] test = { 379 {"en", "ordinal", "1", "one"}, 380 {"en", "ordinal", "2", "two"}, 381 {"en", "ordinal", "3", "few"}, 382 {"en", "ordinal", "4", "other"}, 383 {"sl", "cardinal", "2", "two"}, 384 }; 385 for (String[] row : test) { 386 checkPluralSamples(row); 387 } 388 } 389 TestPluralSamples2()390 public void TestPluralSamples2() { 391 PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL); 392 for (String locale : prf.getLocales()) { 393 if (locale.equals(LocaleNames.UND)) { 394 continue; 395 } 396 if (locale.equals("pl")) { 397 int debug = 0; 398 } 399 final PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(locale); 400 for (PluralRules.PluralType type : PluralRules.PluralType.values()) { 401 PluralInfo rules = 402 SUPPLEMENTAL.getPlurals( 403 SupplementalDataInfo.PluralType.fromStandardType(type), 404 locale.toString()); 405 if (rules.getCounts().size() == 1) { 406 continue; // don't require rules for unary cases 407 } 408 Multimap<String, Count> sampleToCount = TreeMultimap.create(); 409 410 for (Count count : rules.getCounts()) { 411 String sample = samplePatterns.get(type, count); 412 if (sample == null) { 413 errOrLog( 414 CoverageIssue.error, 415 locale + "\t" + type + " \tmissing samples for " + count, 416 "cldrbug:7075", 417 "Missing ordinal minimal pairs"); 418 } else { 419 sampleToCount.put(sample, count); 420 PluralRules pRules = rules.getPluralRules(); 421 double unique = pRules.getUniqueKeywordValue(count.toString()); 422 if (unique == PluralRules.NO_UNIQUE_VALUE && !sample.contains("{0}")) { 423 errln( 424 "Missing {0} in sample: " 425 + locale 426 + ", " 427 + type 428 + ", " 429 + count 430 + " «" 431 + sample 432 + "»"); 433 } 434 } 435 } 436 for (Entry<String, Collection<Count>> entry : sampleToCount.asMap().entrySet()) { 437 if (entry.getValue().size() > 1) { 438 errln( 439 "Colliding minimal pair samples: " 440 + locale 441 + ", " 442 + type 443 + ", " 444 + entry.getValue() 445 + " «" 446 + entry.getKey() 447 + "»"); 448 } 449 } 450 } 451 } 452 } 453 TestCldrScriptCodes()454 public void TestCldrScriptCodes() { 455 Set<String> codes = SUPPLEMENTAL.getCLDRScriptCodes(); 456 457 Set<String> unicodeScripts = ScriptMetadata.getScripts(); 458 assertRelation( 459 "getCLDRScriptCodes contains Unicode Scripts", 460 true, 461 codes, 462 CONTAINS_ALL, 463 unicodeScripts); 464 465 ImmutableSet<String> allSpecials = 466 ImmutableSet.of("Zinh", "Zmth", "Zsye", "Zsym", "Zxxx", "Zyyy", "Zzzz"); 467 assertRelation( 468 "getCLDRScriptCodes contains allSpecials", true, codes, CONTAINS_ALL, allSpecials); 469 470 ImmutableSet<String> allCompos = ImmutableSet.of("Hanb", "Hrkt", "Jamo", "Jpan", "Kore"); 471 assertRelation( 472 "getCLDRScriptCodes contains allCompos", true, codes, CONTAINS_ALL, allCompos); 473 474 Map<Status, Set<String>> scripts = Validity.getInstance().getStatusToCodes(LstrType.script); 475 for (Entry<Status, Set<String>> e : scripts.entrySet()) { 476 switch (e.getKey()) { 477 case regular: 478 case special: 479 case unknown: 480 assertRelation( 481 "getCLDRScriptCodes contains " + e.getKey(), 482 true, 483 codes, 484 CONTAINS_ALL, 485 e.getValue()); 486 break; 487 default: 488 break; // do nothin 489 } 490 } 491 } 492 checkPluralSamples(String... row)493 public void checkPluralSamples(String... row) { 494 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(PluralType.valueOf(row[1]), row[0]); 495 Count count = 496 pluralInfo.getCount(DecimalQuantity_DualStorageBCD.fromExponentString(row[2])); 497 assertEquals(String.join(", ", row), Count.valueOf(row[3]), count); 498 } 499 TestPluralLocales()500 public void TestPluralLocales() { 501 // get the unique rules 502 for (PluralType type : PluralType.values()) { 503 Relation<PluralInfo, String> pluralsToLocale = 504 Relation.of(new HashMap<PluralInfo, Set<String>>(), TreeSet.class); 505 for (String locale : new TreeSet<>(SUPPLEMENTAL.getPluralLocales(type))) { 506 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale); 507 pluralsToLocale.put(pluralInfo, locale); 508 } 509 510 String[][] equivalents = { 511 {"mo", "ro"}, 512 {"tl", "fil"}, 513 {"he", "iw"}, 514 {"in", "id"}, 515 {"jw", "jv"}, 516 {"ji", "yi"}, 517 {"sh", "sr"}, 518 }; 519 for (Entry<PluralInfo, Set<String>> pluralInfoEntry : pluralsToLocale.keyValuesSet()) { 520 PluralInfo pluralInfo2 = pluralInfoEntry.getKey(); 521 Set<String> locales = pluralInfoEntry.getValue(); 522 // check that equivalent locales are either both in or both out 523 for (String[] row : equivalents) { 524 assertEquals( 525 type + " must be equivalent: " + Arrays.asList(row), 526 locales.contains(row[0]), 527 locales.contains(row[1])); 528 } 529 // check that no rules contain 'within' 530 for (Count count : pluralInfo2.getCounts()) { 531 String rule = pluralInfo2.getRule(count); 532 if (rule == null) { 533 continue; 534 } 535 assertFalse( 536 "Rule '" 537 + rule 538 + "' for " 539 + Arrays.asList(locales) 540 + " doesn't contain 'within'", 541 rule.contains("within")); 542 } 543 } 544 } 545 } 546 TestDigitPluralCases()547 public void TestDigitPluralCases() { 548 String[][] tests = { 549 {"en", "one", "1", "1"}, 550 {"en", "one", "2", ""}, 551 {"en", "one", "3", ""}, 552 {"en", "one", "4", ""}, 553 {"en", "other", "1", "0, 2-9, 0.0, 0.1, 0.2, …"}, 554 {"en", "other", "2", "10-99, 10.0, 10.1, 10.2, …"}, 555 {"en", "other", "3", "100-999, 100.0, 100.1, 100.2, …"}, 556 {"en", "other", "4", "1000-9999, 1000.0, 1000.1, 1000.2, …"}, 557 {"hr", "one", "1", "1, 0.1, 2.10, 1.1, …"}, 558 {"hr", "one", "2", "21, 31, 41, 51, 61, 71, …, 10.1, 12.10, 11.1, …"}, 559 {"hr", "one", "3", "101, 121, 131, 141, 151, 161, …, 100.1, 102.10, 101.1, …"}, 560 {"hr", "one", "4", "1001, 1021, 1031, 1041, 1051, 1061, …, 1000.1, 1002.10, 1001.1, …"}, 561 {"hr", "few", "1", "2-4, 0.2, 0.3, 0.4, …"}, 562 {"hr", "few", "2", "22-24, 32-34, 42-44, …, 10.2, 10.3, 10.4, …"}, 563 {"hr", "few", "3", "102-104, 122-124, 132-134, …, 100.2, 100.3, 100.4, …"}, 564 {"hr", "few", "4", "1002-1004, 1022-1024, 1032-1034, …, 1000.2, 1000.3, 1000.4, …"}, 565 {"hr", "other", "1", "0, 5-9, 0.0, 0.5, 0.6, …"}, 566 {"hr", "other", "2", "10-20, 25-30, 35-40, …, 10.0, 10.5, 10.6, …"}, 567 {"hr", "other", "3", "100, 105-120, 125-130, 135-140, …, 100.0, 100.5, 100.6, …"}, 568 { 569 "hr", 570 "other", 571 "4", 572 "1000, 1005-1020, 1025-1030, 1035-1040, …, 1000.0, 1000.5, 1000.6, …" 573 }, 574 }; 575 for (String[] row : tests) { 576 PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]); 577 SampleList uset = 578 plurals.getSamples9999(Count.valueOf(row[1]), Integer.parseInt(row[2])); 579 assertEquals(row[0] + ", " + row[1] + ", " + row[2], row[3], uset.toString()); 580 } 581 } 582 TestDigitPluralCompleteness()583 public void TestDigitPluralCompleteness() { 584 String[][] exceptionStrings = { 585 // defaults 586 {"*", "zero", "0,00,000,0000"}, 587 {"*", "one", "0"}, 588 {"*", "two", "0,00,000,0000"}, 589 {"*", "few", "0,00,000,0000"}, 590 {"*", "many", "0,00,000,0000"}, 591 {"*", "other", "0,00,000,0000"}, 592 // others 593 {"mo", "other", "00,000,0000"}, // 594 {"ro", "other", "00,000,0000"}, // 595 {"cs", "few", "0"}, // j in 2..4 596 {"sk", "few", "0"}, // j in 2..4 597 {"da", "one", "0"}, // j is 1 or t is not 0 and n within 0..2 598 {"is", "one", "0,00,000,0000"}, // j is 1 or f is 1 599 {"sv", "one", "0"}, // j is 1 600 {"he", "two", "0"}, // j is 2 601 {"ru", "one", "0,00,000,0000"}, // j mod 10 is 1 and j mod 100 602 // is not 11 603 {"uk", "one", "0,00,000,0000"}, // j mod 10 is 1 and j mod 100 604 // is not 11 605 {"bs", "one", "0,00,000,0000"}, // j mod 10 is 1 and j mod 100 606 // is not 11 or f mod 10 is 607 // 1 and f mod 100 is not 11 608 {"hr", "one", "0,00,000,0000"}, // j mod 10 is 1 and j mod 100 609 // is not 11 or f mod 10 is 610 // 1 and f mod 100 is not 11 611 {"sh", "one", "0,00,000,0000"}, // j mod 10 is 1 and j mod 100 612 // is not 11 or f mod 10 is 613 // 1 and f mod 100 is not 11 614 {"sr", "one", "0,00,000,0000"}, // j mod 10 is 1 and j mod 100 615 // is not 11 or f mod 10 is 616 // 1 and f mod 100 is not 11 617 {"mk", "one", "0,00,000,0000"}, // j mod 10 is 1 or f mod 10 618 // is 1 619 {"sl", "one", "0,000,0000"}, // j mod 100 is 1 620 {"sl", "two", "0,000,0000"}, // j mod 100 is 2 621 {"he", "many", "00,000,0000"}, // j not in 0..10 and j mod 10 622 // is 0 623 {"tzm", "one", "0,00"}, // n in 0..1 or n in 11..99 624 {"gd", "one", "0,00"}, // n in 1,11 625 {"gd", "two", "0,00"}, // n in 2,12 626 {"shi", "few", "0,00"}, // n in 2..10 627 {"gd", "few", "0,00"}, // n in 3..10,13..19 628 {"ga", "few", "0"}, // n in 3..6 629 {"ga", "many", "0,00"}, // n in 7..10 630 {"ar", "zero", "0"}, // n is 0 631 {"blo", "zero", "0"}, // n = 0 632 {"cy", "zero", "0"}, // n is 0 633 {"ksh", "zero", "0"}, // n is 0 634 {"lag", "zero", "0"}, // n is 0 635 {"pt", "one", "0"}, // i = 1 and v = 0 or i = 0 and t = 1 636 {"pt_PT", "one", "0"}, // n = 1 and v = 0 637 {"ar", "two", "0"}, // n is 2 638 {"cy", "two", "0"}, // n is 2 639 {"ga", "two", "0"}, // n is 2 640 {"iu", "two", "0"}, // n is 2 641 {"naq", "two", "0"}, // n is 2 642 {"se", "two", "0"}, // n is 2 643 {"sma", "two", "0"}, // n is 2 644 {"smi", "two", "0"}, // n is 2 645 {"smj", "two", "0"}, // n is 2 646 {"smn", "two", "0"}, // n is 2 647 {"sms", "two", "0"}, // n is 2 648 {"cy", "few", "0"}, // n is 3 649 {"cy", "many", "0"}, // n is 6 650 {"br", "many", ""}, // n is not 0 and n mod 1000000 is 0 651 {"gv", "one", "0,00,000,0000"}, // n mod 10 is 1 652 {"be", "one", "0,00,000,0000"}, // n mod 10 is 1 and n mod 100 653 // is not 11 654 {"lv", "one", "0,00,000,0000"}, // n mod 10 is 1 and n mod 100 655 // is not 11 or v is 2 and f 656 // mod 10 is 1 and f mod 100 657 // is not 11 or v is not 2 658 // and f mod 10 is 1 659 {"br", "one", "0,00,000,0000"}, // n mod 10 is 1 and n mod 100 660 // not in 11,71,91 661 {"lt", "one", "0,00,000,0000"}, // n mod 10 is 1 and n mod 100 662 // not in 11..19 663 {"fil", "one", "0,00,000,0000"}, // v = 0 and i = 1,2,3 or v = 664 // 0 and i % 10 != 4,6,9 or 665 // v != 0 and f % 10 != 666 // 4,6,9 667 {"tl", "one", "0,00,000,0000"}, // v = 0 and i = 1,2,3 or v = 668 // 0 and i % 10 != 4,6,9 or 669 // v != 0 and f % 10 != 670 // 4,6,9 671 {"dsb", "one", "0,00,000,0000"}, // v = 0 and i % 100 = 1 or f 672 // % 100 = 1 673 {"kw", "many", "00,000,0000"}, // n != 1 and n % 100 = 1,21,41,61,81 674 {"kw", "zero", "0"}, // n = 0 675 {"mt", "two", "0"}, 676 {"fr", "many", ""}, // e is special 677 {"ca", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5 678 {"es", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5 679 {"it", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5 680 {"pt", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5 681 {"pt_PT", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5 682 }; 683 // parse out the exceptions 684 Map<PluralInfo, Relation<Count, Integer>> exceptions = new HashMap<>(); 685 Relation<Count, Integer> fallback = 686 Relation.of(new EnumMap<Count, Set<Integer>>(Count.class), TreeSet.class); 687 for (String[] row : exceptionStrings) { 688 Relation<Count, Integer> countToDigits; 689 if (row[0].equals("*")) { 690 countToDigits = fallback; 691 } else { 692 PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]); 693 countToDigits = exceptions.get(plurals); 694 if (countToDigits == null) { 695 exceptions.put( 696 plurals, 697 countToDigits = 698 Relation.of( 699 new EnumMap<Count, Set<Integer>>(Count.class), 700 TreeSet.class)); 701 } 702 } 703 Count c = Count.valueOf(row[1]); 704 for (String digit : row[2].split(",")) { 705 // "99" is special, just to have the result be non-empty 706 countToDigits.put(c, digit.length()); 707 } 708 } 709 Set<PluralInfo> seen = new HashSet<>(); 710 Set<String> sorted = new TreeSet<>(SUPPLEMENTAL.getPluralLocales(PluralType.cardinal)); 711 Relation<String, String> ruleToExceptions = 712 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 713 714 for (String locale : sorted) { 715 PluralInfo plurals = SUPPLEMENTAL.getPlurals(locale); 716 if (seen.contains(plurals)) { // skip identicals 717 continue; 718 } 719 Relation<Count, Integer> countToDigits = exceptions.get(plurals); 720 if (countToDigits == null) { 721 countToDigits = fallback; 722 } 723 for (Count c : plurals.getCounts()) { 724 List<String> compose = new ArrayList<>(); 725 boolean needLine = false; 726 Set<Integer> digitSet = countToDigits.get(c); 727 if (digitSet == null) { 728 digitSet = fallback.get(c); 729 } 730 for (int digits = 1; digits < 5; ++digits) { 731 boolean expected = digitSet.contains(digits); 732 boolean hasSamples = plurals.hasSamples(c, digits); 733 if (hasSamples) { 734 compose.add(Utility.repeat("0", digits)); 735 } 736 if (!assertEquals(locale + ", " + digits + ", " + c, expected, hasSamples)) { 737 needLine = true; 738 } 739 } 740 if (needLine) { 741 String countRules = plurals.getPluralRules().getRules(c.toString()); 742 ruleToExceptions.put( 743 countRules == null ? "" : countRules, 744 "{\"" 745 + locale 746 + "\", \"" 747 + c 748 + "\", \"" 749 + Joiner.on(",").join(compose) 750 + "\"},"); 751 } 752 } 753 } 754 if (!ruleToExceptions.isEmpty()) { 755 System.out.println( 756 "To fix the above, review the following, then replace in TestDigitPluralCompleteness"); 757 for (Entry<String, String> entry : ruleToExceptions.entrySet()) { 758 System.out.println(entry.getValue() + "\t// " + entry.getKey()); 759 } 760 } 761 } 762 TestLikelyCode()763 public void TestLikelyCode() { 764 Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags(); 765 String[][] tests = { 766 {"it_AQ", "it_Latn_AQ"}, {"it_Arab", "it_Arab_IT"}, {"az_Cyrl", "az_Cyrl_AZ"}, 767 }; 768 for (String[] pair : tests) { 769 String newMax = LikelySubtags.maximize(pair[0], likely); 770 assertEquals("Likely", pair[1], newMax); 771 } 772 } 773 TestLikelySubtagCompleteness()774 public void TestLikelySubtagCompleteness() { 775 Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags(); 776 777 for (String language : SUPPLEMENTAL.getCLDRLanguageCodes()) { 778 if (!likely.containsKey(language)) { 779 logln("WARNING: No likely subtag for CLDR language code (" + language + ")"); 780 } 781 } 782 for (String script : SUPPLEMENTAL.getCLDRScriptCodes()) { 783 if (!likely.containsKey("und_" + script) 784 && !script.equals("Latn") 785 && !script.equals("Zinh") 786 && !script.equals("Zyyy") 787 && ScriptMetadata.getInfo(script) != null 788 && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.EXCLUSION 789 && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.UNKNOWN) { 790 errln("No likely subtag for CLDR script code (und_" + script + ")"); 791 } 792 } 793 } 794 TestEquivalentLocales()795 public void TestEquivalentLocales() { 796 Set<Set<String>> seen = new HashSet<>(); 797 Set<String> toTest = new TreeSet<>(testInfo.getCldrFactory().getAvailable()); 798 toTest.addAll(SUPPLEMENTAL.getLikelySubtags().keySet()); 799 toTest.addAll(SUPPLEMENTAL.getLikelySubtags().values()); 800 toTest.addAll(SUPPLEMENTAL.getDefaultContentLocales()); 801 LanguageTagParser ltp = new LanguageTagParser(); 802 main: 803 for (String locale : toTest) { 804 if (locale.startsWith(LocaleNames.UND) || locale.equals(LocaleNames.ROOT)) { 805 continue; 806 } 807 Set<String> s = SUPPLEMENTAL.getEquivalentsForLocale(locale); 808 if (seen.contains(s)) { 809 continue; 810 } 811 812 List<String> ss = new ArrayList<>(s); 813 String last = ss.get(ss.size() - 1); 814 ltp.set(last); 815 if (!ltp.getVariants().isEmpty() || !ltp.getExtensions().isEmpty()) { 816 continue; // skip variants for now. 817 } 818 String language = ltp.getLanguage(); 819 String script = ltp.getScript(); 820 String region = ltp.getRegion(); 821 if (!script.isEmpty() && !region.isEmpty()) { 822 String noScript = ltp.setScript("").toString(); 823 String noRegion = ltp.setScript(script).setRegion("").toString(); 824 switch (s.size()) { 825 case 1: // ok if already maximized and strange script/country, 826 // eg it_Arab_JA 827 continue main; 828 case 2: // ok if adds default country/script, eg {en_Cyrl, 829 // en_Cyrl_US} or {en_GB, en_Latn_GB} 830 String first = ss.get(0); 831 if (first.equals(noScript) || first.equals(noRegion)) { 832 continue main; 833 } 834 break; 835 case 3: // ok if different script in different country, eg 836 // {az_IR, az_Arab, az_Arab_IR} 837 if (noScript.equals(ss.get(0)) && noRegion.equals(ss.get(1))) { 838 continue main; 839 } 840 break; 841 case 4: // ok if all combinations, eg {en, en_US, en_Latn, 842 // en_Latn_US} 843 if (language.equals(ss.get(0)) 844 && noScript.equals(ss.get(1)) 845 && noRegion.equals(ss.get(2))) { 846 continue main; 847 } 848 break; 849 } 850 } 851 errln("Strange size or composition:\t" + s + " \t" + showLocaleParts(s)); 852 seen.add(s); 853 } 854 } 855 showLocaleParts(Set<String> s)856 private String showLocaleParts(Set<String> s) { 857 LanguageTagParser ltp = new LanguageTagParser(); 858 Set<String> b = new LinkedHashSet<>(); 859 for (String ss : s) { 860 ltp.set(ss); 861 addName(CLDRFile.LANGUAGE_NAME, ltp.getLanguage(), b); 862 addName(CLDRFile.SCRIPT_NAME, ltp.getScript(), b); 863 addName(CLDRFile.TERRITORY_NAME, ltp.getRegion(), b); 864 } 865 return Joiner.on("; ").join(b); 866 } 867 addName(int languageName, String code, Set<String> b)868 private void addName(int languageName, String code, Set<String> b) { 869 if (code.isEmpty()) { 870 return; 871 } 872 String name = testInfo.getEnglish().getName(languageName, code); 873 if (!code.equals(name)) { 874 b.add(code + "=" + name); 875 } 876 } 877 TestDefaultScriptCompleteness()878 public void TestDefaultScriptCompleteness() { 879 Relation<String, String> scriptToBase = 880 Relation.of(new LinkedHashMap<String, Set<String>>(), TreeSet.class); 881 main: 882 for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) { 883 if (!locale.contains("_") && !LocaleNames.ROOT.equals(locale)) { 884 String defaultScript = SUPPLEMENTAL.getDefaultScript(locale); 885 if (defaultScript != null) { 886 continue; 887 } 888 CLDRFile cldrFile = testInfo.getCLDRFile(locale, false); 889 UnicodeSet set = cldrFile.getExemplarSet("", WinningChoice.NORMAL); 890 for (String s : set) { 891 int script = UScript.getScript(s.codePointAt(0)); 892 if (script != UScript.UNKNOWN 893 && script != UScript.COMMON 894 && script != UScript.INHERITED) { 895 scriptToBase.put(UScript.getShortName(script), locale); 896 continue main; 897 } 898 } 899 scriptToBase.put(UScript.getShortName(UScript.UNKNOWN), locale); 900 } 901 } 902 if (scriptToBase.size() != 0) { 903 for (Entry<String, Set<String>> entry : scriptToBase.keyValuesSet()) { 904 errln("Default Scripts missing:\t" + entry.getKey() + "\t" + entry.getValue()); 905 } 906 } 907 } 908 TestTimeData()909 public void TestTimeData() { 910 Map<String, PreferredAndAllowedHour> timeData = SUPPLEMENTAL.getTimeData(); 911 Set<String> regionsSoFar = new HashSet<>(); 912 Set<String> current24only = new HashSet<>(); 913 Set<String> current12preferred = new HashSet<>(); 914 915 boolean haveWorld = false; 916 917 ImmutableSet<HourStyle> oldSchool = 918 ImmutableSet.copyOf(EnumSet.of(HourStyle.H, HourStyle.h, HourStyle.K, HourStyle.k)); 919 920 for (Entry<String, PreferredAndAllowedHour> e : timeData.entrySet()) { 921 String region = e.getKey(); 922 if (region.equals("001")) { 923 haveWorld = true; 924 } 925 regionsSoFar.add(region); 926 PreferredAndAllowedHour preferredAndAllowedHour = e.getValue(); 927 assertNotNull("Preferred must not be null", preferredAndAllowedHour.preferred); 928 929 // find first h or H 930 HourStyle found = null; 931 932 for (HourStyle item : preferredAndAllowedHour.allowed) { 933 if (oldSchool.contains(item)) { 934 found = item; 935 if (item != preferredAndAllowedHour.preferred) { 936 String message = 937 "Inconsistent values for " 938 + region 939 + ": preferred=" 940 + preferredAndAllowedHour.preferred 941 + " but that isn't the first " 942 + oldSchool 943 + " in allowed: " 944 + preferredAndAllowedHour.allowed; 945 errln(message); 946 } 947 break; 948 } 949 } 950 if (found == null) { 951 errln( 952 region 953 + ": preferred " 954 + preferredAndAllowedHour.preferred 955 + " not in " 956 + preferredAndAllowedHour.allowed); 957 } 958 // final HourStyle firstAllowed = 959 // preferredAndAllowedHour.allowed.iterator().next(); 960 // if (preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed == 961 // HourStyle.h 962 // || preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed == 963 // HourStyle.hb 964 // || preferredAndAllowedHour.preferred == HourStyle.h && firstAllowed == 965 // HourStyle.H) { 966 // errln(region + ": allowed " + preferredAndAllowedHour.allowed 967 // + " starts with preferred " + preferredAndAllowedHour.preferred); 968 // } else if (isVerbose()) { 969 // logln(region + ": allowed " + preferredAndAllowedHour.allowed 970 // + " starts with preferred " + preferredAndAllowedHour.preferred); 971 // } 972 // for (HourStyle c : preferredAndAllowedHour.allowed) { 973 // if (!PreferredAndAllowedHour.HOURS.contains(c)) { 974 // errln(region + ": illegal character in " + 975 // preferredAndAllowedHour.allowed + ". It contains " + c 976 // + " which is not in " + PreferredAndAllowedHour.HOURS); 977 // } 978 // } 979 if (!preferredAndAllowedHour.allowed.contains(HourStyle.h) 980 && !preferredAndAllowedHour.allowed.contains(HourStyle.hb)) { 981 current24only.add(region); 982 } 983 if (preferredAndAllowedHour.preferred == HourStyle.h) { 984 current12preferred.add(region); 985 } 986 } 987 Set<String> missing = 988 new TreeSet<>(STANDARD_CODES.getGoodAvailableCodes(CodeType.territory)); 989 missing.removeAll(regionsSoFar); 990 for (Iterator<String> it = missing.iterator(); it.hasNext(); ) { 991 if (!StandardCodes.isCountry(it.next())) { 992 it.remove(); 993 } 994 } 995 996 // if we don't have 001, then we can't miss any regions 997 if (!missing.isEmpty()) { 998 if (haveWorld) { 999 logln("Implicit regions: " + missing); 1000 } else { 1001 errln("Missing regions: " + missing); 1002 } 1003 } 1004 1005 // The feedback gathered from our translators is that the following use 1006 // 24 hour time ONLY: 1007 Set<String> only24lang = 1008 new TreeSet<>( 1009 Arrays.asList( 1010 ("sq, br, bu, ca, hr, cs, da, de, nl, et, eu, fi, " 1011 + "fr, gl, he, is, id, it, no, pt, ro, ru, sr, sk, sl, sv, tr, hy") 1012 .split(",\\s*"))); 1013 // With the new preferences, this is changed 1014 Set<String> only24region = new TreeSet<>(); 1015 Set<String> either24or12region = new TreeSet<>(); 1016 1017 // get all countries where official or de-facto official 1018 // add them two one of two lists, based on the above list of languages 1019 for (String language : SUPPLEMENTAL.getLanguagesForTerritoriesPopulationData()) { 1020 boolean a24lang = only24lang.contains(language); 1021 for (String region : SUPPLEMENTAL.getTerritoriesForPopulationData(language)) { 1022 PopulationData pop = 1023 SUPPLEMENTAL.getLanguageAndTerritoryPopulationData(language, region); 1024 if (pop.getOfficialStatus().compareTo(OfficialStatus.de_facto_official) < 0) { 1025 continue; 1026 } 1027 if (a24lang) { 1028 only24region.add(region); 1029 } else { 1030 either24or12region.add(region); 1031 } 1032 } 1033 } 1034 // if we have a case like CA, where en uses 12/24 but fr uses 24, remove 1035 // it for safety 1036 only24region.removeAll(either24or12region); 1037 // There are always exceptions... Remove SM (San Marino) and VA (Vatican), 1038 // since they allows 12/24 but the de facto langauge is Italian. 1039 only24region.remove("SM"); 1040 only24region.remove("VA"); 1041 // also remove all the regions where 'h' is preferred 1042 only24region.removeAll(current12preferred); 1043 // now verify 1044 if (!current24only.containsAll(only24region)) { 1045 Set<String> missing24only = new TreeSet<>(only24region); 1046 missing24only.removeAll(current24only); 1047 1048 errln( 1049 "24-hour-only doesn't include needed items:\n" 1050 + " add " 1051 + CldrUtility.join(missing24only, " ") 1052 + "\n\t\t" 1053 + CldrUtility.join( 1054 missing24only, 1055 "\n\t\t", 1056 new NameCodeTransform( 1057 testInfo.getEnglish(), CLDRFile.TERRITORY_NAME))); 1058 } 1059 } 1060 1061 public static class NameCodeTransform implements StringTransform { 1062 private final CLDRFile file; 1063 private final int codeType; 1064 NameCodeTransform(CLDRFile file, int code)1065 public NameCodeTransform(CLDRFile file, int code) { 1066 this.file = file; 1067 this.codeType = code; 1068 } 1069 1070 @Override transform(String code)1071 public String transform(String code) { 1072 return file.getName(codeType, code) + " [" + code + "]"; 1073 } 1074 } 1075 TestAliases()1076 public void TestAliases() { 1077 StandardCodes.make(); 1078 Map<String, Map<String, Map<String, String>>> bcp47Data = StandardCodes.getLStreg(); 1079 Map<String, Map<String, R2<List<String>, String>>> aliases = 1080 SUPPLEMENTAL.getLocaleAliasInfo(); 1081 1082 for (Entry<String, Map<String, R2<List<String>, String>>> typeMap : aliases.entrySet()) { 1083 String type = typeMap.getKey(); 1084 Map<String, R2<List<String>, String>> codeReplacement = typeMap.getValue(); 1085 1086 Map<String, Map<String, String>> bcp47DataTypeData = 1087 bcp47Data.get(type.equals("territory") ? "region" : type); 1088 if (bcp47DataTypeData == null) { 1089 logln("skipping BCP47 test for " + type); 1090 } else { 1091 for (Entry<String, Map<String, String>> codeData : bcp47DataTypeData.entrySet()) { 1092 String code = codeData.getKey(); 1093 if (codeReplacement.containsKey(code) 1094 || codeReplacement.containsKey(code.toUpperCase(Locale.ENGLISH))) { 1095 continue; 1096 // TODO, check the value 1097 } 1098 Map<String, String> data = codeData.getValue(); 1099 if (data.containsKey("Deprecated") 1100 && SUPPLEMENTAL.getCLDRLanguageCodes().contains(code)) { 1101 errln( 1102 "supplementalMetadata.xml: alias is missing <languageAlias type=\"" 1103 + code 1104 + "\" ... /> " 1105 + "\t" 1106 + data); 1107 } 1108 } 1109 } 1110 1111 Set<R3<String, List<String>, List<String>>> failures = new LinkedHashSet<>(); 1112 Set<String> nullReplacements = new TreeSet<>(); 1113 for (Entry<String, R2<List<String>, String>> codeRep : codeReplacement.entrySet()) { 1114 String code = codeRep.getKey(); 1115 List<String> replacements = codeRep.getValue().get0(); 1116 if (replacements == null) { 1117 nullReplacements.add(code); 1118 continue; 1119 } 1120 Set<String> fixedReplacements = new LinkedHashSet<>(); 1121 for (String replacement : replacements) { 1122 R2<List<String>, String> newReplacement = codeReplacement.get(replacement); 1123 if (newReplacement != null) { 1124 List<String> list = newReplacement.get0(); 1125 if (list != null) { 1126 fixedReplacements.addAll(list); 1127 } 1128 } else { 1129 fixedReplacements.add(replacement); 1130 } 1131 } 1132 List<String> fixedList = new ArrayList<>(fixedReplacements); 1133 if (!replacements.equals(fixedList)) { 1134 R3<String, List<String>, List<String>> row = 1135 Row.of(code, replacements, fixedList); 1136 System.out.println(row.toString()); 1137 failures.add(row); 1138 } 1139 } 1140 1141 if (failures.size() != 0) { 1142 for (R3<String, List<String>, List<String>> item : failures) { 1143 String code = item.get0(); 1144 List<String> oldReplacement = item.get1(); 1145 List<String> newReplacement = item.get2(); 1146 1147 errln( 1148 code 1149 + "\t=>\t" 1150 + oldReplacement 1151 + "\tshould be:\n\t" 1152 + "<" 1153 + type 1154 + "Alias type=\"" 1155 + code 1156 + "\" replacement=\"" 1157 + Joiner.on(" ").join(newReplacement) 1158 + "\" reason=\"XXX\"/> <!-- YYY -->\n"); 1159 } 1160 } 1161 if (nullReplacements.size() != 0) { 1162 logln("No Replacements\t" + type + "\t" + nullReplacements); 1163 } 1164 } 1165 } 1166 1167 static final List<String> oldRegions = 1168 Arrays.asList("NT, YD, QU, SU, DD, FX, ZR, AN, BU, TP, CS, YU".split(", ")); 1169 TestTerritoryContainment()1170 public void TestTerritoryContainment() { 1171 Relation<String, String> map = SUPPLEMENTAL.getTerritoryToContained(ContainmentStyle.all); 1172 Relation<String, String> mapCore = SUPPLEMENTAL.getContainmentCore(); 1173 Set<String> mapItems = new LinkedHashSet<>(); 1174 // get all the items 1175 for (String item : map.keySet()) { 1176 mapItems.add(item); 1177 mapItems.addAll(map.getAll(item)); 1178 } 1179 Map<String, Map<String, String>> bcp47RegionData = StandardCodes.getLStreg().get("region"); 1180 1181 // verify that all regions are covered 1182 Set<String> bcp47Regions = new LinkedHashSet<>(bcp47RegionData.keySet()); 1183 bcp47Regions.remove("ZZ"); // We don't care about ZZ since it is the 1184 // unknown region... 1185 for (Iterator<String> it = bcp47Regions.iterator(); it.hasNext(); ) { 1186 String region = it.next(); 1187 Map<String, String> data = bcp47RegionData.get(region); 1188 if (data.containsKey("Deprecated")) { 1189 logln("Removing deprecated " + region); 1190 it.remove(); 1191 } 1192 if ("Private use".equals(data.get("Description"))) { 1193 it.remove(); 1194 } 1195 } 1196 1197 if (!mapItems.equals(bcp47Regions)) { 1198 mapItems.removeAll(oldRegions); 1199 errlnDiff("containment items not in bcp47 regions: ", mapItems, bcp47Regions); 1200 errlnDiff("bcp47 regions not in containment items: ", bcp47Regions, mapItems); 1201 } 1202 1203 // verify that everything in the containment core can be reached 1204 // downwards from 001. 1205 1206 Map<String, Integer> from001 = 1207 getRecursiveContainment("001", map, new LinkedHashMap<String, Integer>(), 1); 1208 from001.put("001", 0); 1209 Set<String> keySet = from001.keySet(); 1210 for (String region : keySet) { 1211 logln( 1212 Utility.repeat("\t", from001.get(region)) 1213 + "\t" 1214 + region 1215 + "\t" 1216 + getRegionName(region)); 1217 } 1218 1219 // Populate mapItems with the core containment 1220 mapItems.clear(); 1221 for (String item : mapCore.keySet()) { 1222 mapItems.add(item); 1223 mapItems.addAll(mapCore.getAll(item)); 1224 } 1225 1226 if (!mapItems.equals(keySet)) { 1227 errlnDiff("containment core items that can't be reached from 001: ", mapItems, keySet); 1228 } 1229 } 1230 errlnDiff(String title, Set<String> mapItems, Set<String> keySet)1231 private void errlnDiff(String title, Set<String> mapItems, Set<String> keySet) { 1232 Set<String> diff = new LinkedHashSet<>(mapItems); 1233 diff.removeAll(keySet); 1234 if (diff.size() != 0) { 1235 errln(title + diff); 1236 } 1237 } 1238 getRegionName(String region)1239 private String getRegionName(String region) { 1240 return testInfo.getEnglish().getName(CLDRFile.TERRITORY_NAME, region); 1241 } 1242 getRecursiveContainment( String region, Relation<String, String> map, Map<String, Integer> result, int depth)1243 private Map<String, Integer> getRecursiveContainment( 1244 String region, Relation<String, String> map, Map<String, Integer> result, int depth) { 1245 Set<String> contained = map.getAll(region); 1246 if (contained == null) { 1247 return result; 1248 } 1249 for (String item : contained) { 1250 if (result.containsKey(item)) { 1251 logln("Duplicate containment " + item + "\t" + getRegionName(item)); 1252 continue; 1253 } 1254 result.put(item, depth); 1255 getRecursiveContainment(item, map, result, depth + 1); 1256 } 1257 return result; 1258 } 1259 TestMacrolanguages()1260 public void TestMacrolanguages() { 1261 Set<String> languageCodes = STANDARD_CODES.getAvailableCodes("language"); 1262 Map<String, Map<String, R2<List<String>, String>>> typeToTagToReplacement = 1263 SUPPLEMENTAL.getLocaleAliasInfo(); 1264 Map<String, R2<List<String>, String>> tagToReplacement = 1265 typeToTagToReplacement.get("language"); 1266 1267 Relation<String, String> replacementToReplaced = 1268 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 1269 for (String language : tagToReplacement.keySet()) { 1270 List<String> replacements = tagToReplacement.get(language).get0(); 1271 if (replacements != null) { 1272 replacementToReplaced.putAll(replacements, language); 1273 } 1274 } 1275 replacementToReplaced.freeze(); 1276 1277 Map<String, Map<String, Map<String, String>>> lstreg = StandardCodes.getLStreg(); 1278 Map<String, Map<String, String>> lstregLanguageInfo = lstreg.get("language"); 1279 1280 Relation<Scope, String> scopeToCodes = 1281 Relation.of(new TreeMap<Scope, Set<String>>(), TreeSet.class); 1282 // the invariant is that every macrolanguage has exactly 1 encompassed 1283 // language that maps to it 1284 1285 main: 1286 for (String language : 1287 Builder.with(new TreeSet<String>()) 1288 .addAll(languageCodes) 1289 .addAll(Iso639Data.getAvailable()) 1290 .get()) { 1291 if (language.equals("no") || language.equals("sa") || language.equals("sh")) { 1292 continue; // special cases 1293 } 1294 Scope languageScope = getScope(language, lstregLanguageInfo); 1295 if (languageScope == Scope.Macrolanguage) { 1296 if (Iso639Data.getHeirarchy(language) != null) { 1297 continue main; // is real family 1298 } 1299 Set<String> replacements = replacementToReplaced.getAll(language); 1300 if (replacements == null || replacements.size() == 0) { 1301 scopeToCodes.put(languageScope, language); 1302 } else { 1303 // it still might be bad, if we don't have a mapping to a 1304 // regular language 1305 for (String replacement : replacements) { 1306 Scope replacementScope = getScope(replacement, lstregLanguageInfo); 1307 if (replacementScope == Scope.Individual) { 1308 continue main; 1309 } 1310 } 1311 scopeToCodes.put(languageScope, language); 1312 } 1313 } 1314 } 1315 // now show the items we found 1316 for (Scope scope : scopeToCodes.keySet()) { 1317 for (String language : scopeToCodes.getAll(scope)) { 1318 String name = testInfo.getEnglish().getName(language); 1319 if (name == null || name.equals(language)) { 1320 Set<String> set = Iso639Data.getNames(language); 1321 if (set != null) { 1322 name = set.iterator().next(); 1323 } else { 1324 Map<String, String> languageInfo = lstregLanguageInfo.get(language); 1325 if (languageInfo != null) { 1326 name = languageInfo.get("Description"); 1327 } 1328 } 1329 } 1330 errln(scope + "\t" + language + "\t" + name + "\t" + Iso639Data.getType(language)); 1331 } 1332 } 1333 } 1334 getScope(String language, Map<String, Map<String, String>> lstregLanguageInfo)1335 private Scope getScope(String language, Map<String, Map<String, String>> lstregLanguageInfo) { 1336 Scope languageScope = Iso639Data.getScope(language); 1337 Map<String, String> languageInfo = lstregLanguageInfo.get(language); 1338 if (languageInfo == null) { 1339 // System.out.println("Couldn't get lstreg info for " + language); 1340 } else { 1341 String lstregScope = languageInfo.get("Scope"); 1342 if (lstregScope != null) { 1343 Scope scope2 = Scope.fromString(lstregScope); 1344 if (languageScope != scope2) { 1345 // System.out.println("Mismatch in scope between LSTR and ISO 639:\t" 1346 // + scope2 + "\t" + 1347 // languageScope); 1348 languageScope = scope2; 1349 } 1350 } 1351 } 1352 return languageScope; 1353 } 1354 1355 static final boolean LOCALES_FIXED = true; 1356 TestPopulation()1357 public void TestPopulation() { 1358 Set<String> languages = SUPPLEMENTAL.getLanguagesForTerritoriesPopulationData(); 1359 Relation<String, String> baseToLanguages = 1360 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 1361 LanguageTagParser ltp = new LanguageTagParser(); 1362 LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(false); 1363 1364 for (String language : languages) { 1365 if (LOCALES_FIXED) { 1366 String canonicalForm = ltc.transform(language); 1367 if (!assertEquals("Canonical form", canonicalForm, language)) { 1368 int debug = 0; 1369 } 1370 } 1371 1372 String base = ltp.set(language).getLanguage(); 1373 String script = ltp.getScript(); 1374 baseToLanguages.put(base, language); 1375 1376 // add basic data, basically just for wo! 1377 // if there are primary scripts, they must include script (if not 1378 // empty) 1379 Set<String> primaryScripts = Collections.emptySet(); 1380 Set<String> secondaryScripts = Collections.emptySet(); 1381 Map<Type, BasicLanguageData> basicData = SUPPLEMENTAL.getBasicLanguageDataMap(base); 1382 if (basicData != null) { 1383 BasicLanguageData s = basicData.get(BasicLanguageData.Type.primary); 1384 if (s != null) { 1385 primaryScripts = s.getScripts(); 1386 } 1387 s = basicData.get(BasicLanguageData.Type.secondary); 1388 if (s != null) { 1389 secondaryScripts = s.getScripts(); 1390 } 1391 } 1392 1393 // do some consistency tests; if there is a script, it must be in 1394 // primaryScripts or secondaryScripts 1395 if (!script.isEmpty() 1396 && !primaryScripts.contains(script) 1397 && !secondaryScripts.contains(script)) { 1398 errln( 1399 base 1400 + ": Script found in territory data (" 1401 + script 1402 + ") is not in primary scripts :\t" 1403 + primaryScripts 1404 + " and not in secondary scripts :\t" 1405 + secondaryScripts); 1406 } 1407 1408 // if there are multiple primary scripts, they will be in 1409 // baseToLanguages 1410 if (primaryScripts.size() > 1) { 1411 for (String script2 : primaryScripts) { 1412 baseToLanguages.put(base, base + "_" + script2); 1413 } 1414 } 1415 } 1416 1417 if (!LOCALES_FIXED) { 1418 // the invariants are that if we have a base, we must not have a script. 1419 // and if we don't have a base, we must have two items 1420 for (String base : baseToLanguages.keySet()) { 1421 Set<String> languagesForBase = baseToLanguages.getAll(base); 1422 if (languagesForBase.contains(base)) { 1423 if (languagesForBase.size() > 1) { 1424 errln("Cannot have base alone with other scripts:\t" + languagesForBase); 1425 } 1426 } else { 1427 if (languagesForBase.size() == 1) { 1428 errln("Cannot have only one script for language:\t" + languagesForBase); 1429 } 1430 } 1431 } 1432 } 1433 } 1434 TestCompleteness()1435 public void TestCompleteness() { 1436 if (SUPPLEMENTAL.getSkippedElements().size() > 0) { 1437 logln( 1438 "SupplementalDataInfo API doesn't support: " 1439 + SUPPLEMENTAL.getSkippedElements().toString()); 1440 } 1441 } 1442 1443 // these are settings for exceptional cases we want to allow 1444 private static final Set<String> EXCEPTION_CURRENCIES_WITH_NEW = 1445 new TreeSet<>(Arrays.asList("ILS", "NZD", "PGK", "TWD")); 1446 1447 // ok since there is no problem with confusion 1448 private static final Set<String> OK_TO_NOT_HAVE_OLD = 1449 new TreeSet<>( 1450 Arrays.asList( 1451 "ADP", "ATS", "BEF", "CYP", "DEM", "ESP", "FIM", "FRF", "GRD", "IEP", 1452 "ITL", "LUF", "MTL", "MTP", "NLG", "PTE", "YUM", "ARA", "BAD", "BGL", 1453 "BOP", "BRC", "BRN", "BRR", "BUK", "CSK", "ECS", "GEK", "GNS", "GQE", 1454 "HRD", "ILP", "LTT", "LVR", "MGF", "MLF", "MZE", "NIC", "PEI", "PES", 1455 "SIT", "SRG", "SUR", "TJR", "TPE", "UAK", "YUD", "YUN", "ZRZ", "GWE")); 1456 1457 private static final Date LIMIT_FOR_NEW_CURRENCY = new Date(new Date().getYear() - 5, 1, 1); 1458 private Matcher oldMatcher = 1459 Pattern.compile("\\bold\\b|\\([0-9]{4}-[0-9]{4}\\)", Pattern.CASE_INSENSITIVE) 1460 .matcher(""); 1461 private Matcher newMatcher = Pattern.compile("\\bnew\\b", Pattern.CASE_INSENSITIVE).matcher(""); 1462 1463 /** 1464 * Test that access to currency info in supplemental data is ok. At this point just a simple 1465 * test. 1466 * 1467 * @param args 1468 */ TestSupplementalCurrency()1469 public void TestSupplementalCurrency() { 1470 IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance(); 1471 Set<String> currencyCodes = STANDARD_CODES.getGoodAvailableCodes("currency"); 1472 Set<String> oncomingCurrencyCodes = STANDARD_CODES.getOncomingCurrencies(); 1473 Relation<String, Pair<String, CurrencyDateInfo>> nonModernCurrencyCodes = 1474 Relation.of( 1475 new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(), TreeSet.class); 1476 Relation<String, Pair<String, CurrencyDateInfo>> modernCurrencyCodes = 1477 Relation.of( 1478 new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(), TreeSet.class); 1479 Relation<String, Pair<String, CurrencyDateInfo>> recentModernCurrencyCodes = 1480 Relation.of( 1481 new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(), TreeSet.class); 1482 Set<String> territoriesWithoutModernCurrencies = 1483 new TreeSet<>(STANDARD_CODES.getGoodAvailableCodes("territory")); 1484 Map<String, Date> currencyFirstValid = new TreeMap<>(); 1485 Map<String, Date> currencyLastValid = new TreeMap<>(); 1486 territoriesWithoutModernCurrencies.remove("ZZ"); 1487 territoriesWithoutModernCurrencies.removeAll(Iso3166Data.getRegionCodesNotForTranslation()); 1488 1489 for (String territory : STANDARD_CODES.getGoodAvailableCodes("territory")) { 1490 /* "EU" behaves like a country for purposes of this test */ 1491 if ((SUPPLEMENTAL.getContained(territory) != null) && !territory.equals("EU")) { 1492 territoriesWithoutModernCurrencies.remove(territory); 1493 continue; 1494 } 1495 Set<CurrencyDateInfo> currencyInfo = SUPPLEMENTAL.getCurrencyDateInfo(territory); 1496 if (currencyInfo == null) { 1497 continue; // error, but will pick up below. 1498 } 1499 for (CurrencyDateInfo dateInfo : currencyInfo) { 1500 final String currency = dateInfo.getCurrency(); 1501 final Date start = dateInfo.getStart(); 1502 final Date end = dateInfo.getEnd(); 1503 if (dateInfo.getErrors().length() != 0) { 1504 logln( 1505 "parsing " 1506 + territory 1507 + "\t" 1508 + dateInfo.toString() 1509 + "\t" 1510 + dateInfo.getErrors()); 1511 } 1512 Date firstValue = currencyFirstValid.get(currency); 1513 if (firstValue == null || firstValue.compareTo(start) < 0) { 1514 currencyFirstValid.put(currency, start); 1515 } 1516 Date lastValue = currencyLastValid.get(currency); 1517 if (lastValue == null || lastValue.compareTo(end) > 0) { 1518 currencyLastValid.put(currency, end); 1519 } 1520 if (start.compareTo(DateConstants.NOW) < 0 1521 && end.compareTo(DateConstants.NOW) >= 0) { // Non-tender is OK... 1522 modernCurrencyCodes.put(currency, new Pair<>(territory, dateInfo)); 1523 territoriesWithoutModernCurrencies.remove(territory); 1524 } else { 1525 nonModernCurrencyCodes.put(currency, new Pair<>(territory, dateInfo)); 1526 if (start.compareTo(DateConstants.NOW) < 0 1527 && end.compareTo(DateConstants.RECENT_HISTORY) >= 0) { 1528 // It was CLDR tender recently. 1529 recentModernCurrencyCodes.put(currency, new Pair<>(territory, dateInfo)); 1530 } 1531 } 1532 logln( 1533 territory 1534 + "\t" 1535 + dateInfo.toString() 1536 + "\t" 1537 + testInfo.getEnglish().getName(CLDRFile.CURRENCY_NAME, currency)); 1538 } 1539 } 1540 // fix up 1541 nonModernCurrencyCodes.removeAll(modernCurrencyCodes.keySet()); 1542 Relation<String, String> isoCurrenciesToCountries = 1543 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class) 1544 .addAllInverted(isoCodes.getCountryToCodes()); 1545 // now print error messages 1546 logln("Modern Codes: " + modernCurrencyCodes.size() + "\t" + modernCurrencyCodes); 1547 Set<String> missing = new TreeSet<>(isoCurrenciesToCountries.keySet()); 1548 missing.removeAll(modernCurrencyCodes.keySet()); 1549 missing.removeAll(oncomingCurrencyCodes); 1550 Set<String> recentMissing = new TreeSet<>(missing); 1551 recentMissing.retainAll(recentModernCurrencyCodes.keySet()); 1552 if (recentMissing.size() != 0) { 1553 warnln( 1554 "WARNING: Codes in ISO 4217 and until-recently legal tender in CLDR. " 1555 + "(may need to update " 1556 + CLDRURLS.UPDATING_CURRENCY_CODES 1557 + " ): " 1558 + currencyDateRelationToString( 1559 recentModernCurrencyCodes, recentMissing)); 1560 missing.removeAll(recentMissing); // not errors 1561 } 1562 if (missing.size() != 0) { 1563 errln( 1564 "Codes in ISO 4217 but not current tender in CLDR " 1565 + "(may need to update as per" 1566 + CLDRURLS.UPDATING_CURRENCY_CODES 1567 + " ): " 1568 + currencyDateRelationToString(nonModernCurrencyCodes, missing)); 1569 } 1570 1571 for (String currency : modernCurrencyCodes.keySet()) { 1572 Set<Pair<String, CurrencyDateInfo>> data = modernCurrencyCodes.getAll(currency); 1573 final String name = testInfo.getEnglish().getName(CLDRFile.CURRENCY_NAME, currency); 1574 1575 Set<String> isoCountries = isoCurrenciesToCountries.getAll(currency); 1576 if (isoCountries == null) { 1577 isoCountries = new TreeSet<>(); 1578 } 1579 1580 TreeSet<String> cldrCountries = new TreeSet<>(); 1581 for (Pair<String, CurrencyDateInfo> x : data) { 1582 cldrCountries.add(x.getFirst()); 1583 } 1584 if (!isoCountries.equals(cldrCountries)) { 1585 // TODO 17397: remove isKnownIssue and the if around errln when the logknown issue 1586 // goes away. 1587 final boolean skipKnownIssue = 1588 currency.equals("ANG") 1589 && isoCountries.isEmpty() 1590 && cldrCountries.equals(Set.of("CW", "SX")) 1591 && logKnownIssue("CLDR-17397", "Mismatched codes " + cldrCountries); 1592 if (!skipKnownIssue) { 1593 errln( 1594 "Mismatch between ISO and Cldr modern currencies for " 1595 + currency 1596 + "\tISO:" 1597 + isoCountries 1598 + "\tCLDR:" 1599 + cldrCountries); 1600 showCountries("iso-cldr", isoCountries, cldrCountries, missing); 1601 showCountries("cldr-iso", cldrCountries, isoCountries, missing); 1602 } 1603 } 1604 1605 if (oldMatcher.reset(name).find()) { 1606 errln( 1607 "Has 'old' in name but still used " 1608 + "\t" 1609 + currency 1610 + "\t" 1611 + name 1612 + "\t" 1613 + data); 1614 } 1615 if (newMatcher.reset(name).find() 1616 && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) { 1617 // find the first use. If older than 5 years, flag as error 1618 if (currencyFirstValid.get(currency).compareTo(LIMIT_FOR_NEW_CURRENCY) < 0) { 1619 errln( 1620 "Has 'new' in name but used since " 1621 + CurrencyDateInfo.formatDate(currencyFirstValid.get(currency)) 1622 + "\t" 1623 + currency 1624 + "\t" 1625 + name 1626 + "\t" 1627 + data); 1628 } else { 1629 logln( 1630 "Has 'new' in name but used since " 1631 + CurrencyDateInfo.formatDate(currencyFirstValid.get(currency)) 1632 + "\t" 1633 + currency 1634 + "\t" 1635 + name 1636 + "\t" 1637 + data); 1638 } 1639 } 1640 } 1641 logln( 1642 "Non-Modern Codes (with dates): " 1643 + nonModernCurrencyCodes.size() 1644 + "\t" 1645 + nonModernCurrencyCodes); 1646 for (String currency : nonModernCurrencyCodes.keySet()) { 1647 final String name = testInfo.getEnglish().getName(CLDRFile.CURRENCY_NAME, currency); 1648 if (name == null) { 1649 errln("No English name for currency " + currency); 1650 continue; 1651 } 1652 if (newMatcher.reset(name).find() 1653 && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) { 1654 logln( 1655 "Has 'new' in name but NOT used since " 1656 + CurrencyDateInfo.formatDate(currencyLastValid.get(currency)) 1657 + "\t" 1658 + currency 1659 + "\t" 1660 + name 1661 + "\t" 1662 + nonModernCurrencyCodes.getAll(currency)); 1663 } else if (!oldMatcher.reset(name).find() && !OK_TO_NOT_HAVE_OLD.contains(currency)) { 1664 logln( 1665 "Doesn't have 'old' or date range in name but NOT used since " 1666 + CurrencyDateInfo.formatDate(currencyLastValid.get(currency)) 1667 + "\t" 1668 + currency 1669 + "\t" 1670 + name 1671 + "\t" 1672 + nonModernCurrencyCodes.getAll(currency)); 1673 for (Pair<String, CurrencyDateInfo> pair : 1674 nonModernCurrencyCodes.getAll(currency)) { 1675 final String territory = pair.getFirst(); 1676 Set<CurrencyDateInfo> currencyInfo = 1677 SUPPLEMENTAL.getCurrencyDateInfo(territory); 1678 for (CurrencyDateInfo dateInfo : currencyInfo) { 1679 if (dateInfo.getEnd().compareTo(DateConstants.NOW) < 0) { 1680 continue; 1681 } 1682 logln( 1683 "\tCurrencies used instead: " 1684 + territory 1685 + "\t" 1686 + dateInfo 1687 + "\t" 1688 + testInfo.getEnglish() 1689 .getName( 1690 CLDRFile.CURRENCY_NAME, 1691 dateInfo.getCurrency())); 1692 } 1693 } 1694 } 1695 } 1696 Set<String> remainder = new TreeSet<>(); 1697 remainder.addAll(currencyCodes); 1698 remainder.removeAll(nonModernCurrencyCodes.keySet()); 1699 // TODO make this an error, except for allowed exceptions. 1700 logln("Currencies without Territories: " + remainder); 1701 if (territoriesWithoutModernCurrencies.size() != 0) { 1702 errln("Modern territory missing currency: " + territoriesWithoutModernCurrencies); 1703 } 1704 } 1705 currencyDateRelationToString( Relation<String, Pair<String, CurrencyDateInfo>> allCodes, Set<String> filter)1706 private String currencyDateRelationToString( 1707 Relation<String, Pair<String, CurrencyDateInfo>> allCodes, Set<String> filter) { 1708 return allCodes.entrySet().stream() 1709 .filter(p -> filter.contains(p.getKey())) 1710 .map(p -> p.getValue().getSecond().toString()) 1711 .collect(Collectors.joining(", ")); 1712 } 1713 showCountries( final String title, Set<String> isoCountries, Set<String> cldrCountries, Set<String> missing)1714 private void showCountries( 1715 final String title, 1716 Set<String> isoCountries, 1717 Set<String> cldrCountries, 1718 Set<String> missing) { 1719 missing.clear(); 1720 missing.addAll(isoCountries); 1721 missing.removeAll(cldrCountries); 1722 for (String country : missing) { 1723 logln("\t\tExtra in " + title + "\t" + country + " - " + getRegionName(country)); 1724 } 1725 } 1726 TestCurrencyDecimalPlaces()1727 public void TestCurrencyDecimalPlaces() { 1728 IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance(); 1729 Relation<String, IsoCurrencyParser.Data> codeList = isoCodes.getCodeList(); 1730 Set<String> currencyCodes = STANDARD_CODES.getGoodAvailableCodes("currency"); 1731 for (String cc : currencyCodes) { 1732 Set<IsoCurrencyParser.Data> d = codeList.get(cc); 1733 if (d != null) { 1734 for (IsoCurrencyParser.Data x : d) { 1735 CurrencyNumberInfo cni = SUPPLEMENTAL.getCurrencyNumberInfo(cc); 1736 if (cni.digits != x.getMinorUnit()) { 1737 logln( 1738 "Mismatch between ISO/CLDR for decimal places for currency => " 1739 + cc 1740 + ". ISO = " 1741 + x.getMinorUnit() 1742 + " CLDR = " 1743 + cni.digits); 1744 } 1745 } 1746 } 1747 } 1748 } 1749 1750 /** Verify that we have a default script for every CLDR base language */ TestDefaultScripts()1751 public void TestDefaultScripts() { 1752 SupplementalDataInfo supp = SUPPLEMENTAL; 1753 Map<String, String> likelyData = supp.getLikelySubtags(); 1754 Map<String, String> baseToDefaultContentScript = new HashMap<>(); 1755 for (CLDRLocale locale : supp.getDefaultContentCLDRLocales()) { 1756 String script = locale.getScript(); 1757 if (!script.isEmpty() && locale.getCountry().isEmpty()) { 1758 baseToDefaultContentScript.put(locale.getLanguage(), script); 1759 } 1760 } 1761 for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) { 1762 if (LocaleNames.ROOT.equals(locale)) { 1763 continue; 1764 } 1765 if (!StandardCodes.isLocaleAtLeastBasic(locale)) { 1766 continue; 1767 } 1768 CLDRLocale loc = CLDRLocale.getInstance(locale); 1769 String baseLanguage = loc.getLanguage(); 1770 String defaultScript = supp.getDefaultScript(baseLanguage); 1771 1772 String defaultContentScript = baseToDefaultContentScript.get(baseLanguage); 1773 if (defaultContentScript != null) { 1774 assertEquals( 1775 loc + " defaultContentScript = default", 1776 defaultScript, 1777 defaultContentScript); 1778 } 1779 String likely = likelyData.get(baseLanguage); 1780 String likelyScript = 1781 likely == null ? null : CLDRLocale.getInstance(likely).getScript(); 1782 Map<Type, BasicLanguageData> scriptInfo = supp.getBasicLanguageDataMap(baseLanguage); 1783 if (scriptInfo == null) { 1784 if (StandardCodes.isLocaleAtLeastBasic(locale)) { 1785 errln(loc + ": has no BasicLanguageData"); 1786 } else { 1787 logln(loc + ": has no BasicLanguageData (not a basic loc)"); 1788 } 1789 } else { 1790 BasicLanguageData data = scriptInfo.get(Type.primary); 1791 if (data == null) { 1792 data = scriptInfo.get(Type.secondary); 1793 } 1794 if (data == null) { 1795 if (StandardCodes.isLocaleAtLeastBasic(locale)) { 1796 errln(loc + ": has no scripts in BasicLanguageData"); 1797 } else { 1798 logln(loc + ": has no scripts in BasicLanguageData (not a basic loc)"); 1799 } 1800 } else if (!data.getScripts().contains(defaultScript)) { 1801 if (StandardCodes.isLocaleAtLeastBasic(locale)) { 1802 errln( 1803 loc 1804 + ": " 1805 + defaultScript 1806 + " not in BasicLanguageData - check <languages> in supplementalData.xml and language_script.tsv " 1807 + data.getScripts()); 1808 } else { 1809 logln( 1810 loc 1811 + ": " 1812 + defaultScript 1813 + " not in BasicLanguageData - check <languages> in supplementalData.xml and language_script.tsv (not a basic loc) " 1814 + data.getScripts()); 1815 } 1816 } 1817 } 1818 1819 assertEquals(loc + " likely = default", defaultScript, likelyScript); 1820 1821 assertNotNull(loc + ": needs default script", defaultScript); 1822 1823 if (!loc.getScript().isEmpty()) { 1824 if (!loc.getScript().equals(defaultScript)) { 1825 assertNotEquals( 1826 locale + ": only include script if not default", 1827 loc.getScript(), 1828 defaultScript); 1829 } 1830 } 1831 } 1832 } 1833 1834 enum CoverageIssue { 1835 log, 1836 warn, 1837 error 1838 } 1839 TestPluralCompleteness()1840 public void TestPluralCompleteness() { 1841 // Set<String> cardinalLocales = new 1842 // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.cardinal)); 1843 // Set<String> ordinalLocales = new 1844 // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.ordinal)); 1845 // Map<ULocale, PluralRulesFactory.SamplePatterns> sampleCardinals = 1846 // PluralRulesFactory.getLocaleToSamplePatterns(); 1847 // Set<ULocale> sampleCardinalLocales = PluralRulesFactory.getLocales(); 1848 // // new HashSet(PluralRulesFactory.getSampleCounts(uLocale, 1849 // type).keySet()); 1850 // Map<ULocale, PluralRules> overrideCardinals = 1851 // PluralRulesFactory.getPluralOverrides(); 1852 // Set<ULocale> overrideCardinalLocales = new 1853 // HashSet<ULocale>(overrideCardinals.keySet()); 1854 1855 Set<String> testLocales = 1856 STANDARD_CODES.getLocaleCoverageLocales( 1857 Organization.google, EnumSet.of(Level.MODERN)); 1858 Set<String> allLocales = testInfo.getCldrFactory().getAvailable(); 1859 LanguageTagParser ltp = new LanguageTagParser(); 1860 for (String locale : allLocales) { 1861 // the only known case where plural rules depend on region or script 1862 // is pt_PT 1863 if (locale.equals(LocaleNames.ROOT)) { 1864 continue; 1865 } 1866 ltp.set(locale); 1867 if (!ltp.getRegion().isEmpty() || !ltp.getScript().isEmpty()) { 1868 continue; 1869 } 1870 CoverageIssue needsCoverage = 1871 testLocales.contains(locale) ? CoverageIssue.error : CoverageIssue.log; 1872 CoverageIssue needsCoverage2 = 1873 needsCoverage == CoverageIssue.error ? CoverageIssue.warn : needsCoverage; 1874 PluralRulesFactory prf = 1875 PluralRulesFactory.getInstance( 1876 CLDRConfig.getInstance().getSupplementalDataInfo()); 1877 1878 for (PluralType type : PluralType.values()) { 1879 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale, false); 1880 if (pluralInfo == null) { 1881 errOrLog( 1882 needsCoverage, 1883 locale + "\t" + type + " \tmissing plural rules", 1884 "Cldrbug:7839", 1885 "Missing plural data for modern locales"); 1886 continue; 1887 } 1888 Set<Count> counts = pluralInfo.getCounts(); 1889 // if (counts.size() == 1) { 1890 // continue; // skip checking samples 1891 // } 1892 HashSet<String> samples = new HashSet<>(); 1893 EnumSet<Count> countsWithNoSamples = EnumSet.noneOf(Count.class); 1894 Relation<String, Count> samplesToCounts = 1895 Relation.of(new HashMap(), LinkedHashSet.class); 1896 Set<Count> countsFound = prf.getSampleCounts(locale, type.standardType); 1897 StringBuilder failureCases = new StringBuilder(); 1898 for (Count count : counts) { 1899 String pattern = 1900 PluralRulesFactory.getSamplePattern(locale, type.standardType, count); 1901 final String rangeLine = 1902 getRangeLine(count, pluralInfo.getPluralRules(), pattern); 1903 failureCases 1904 .append('\n') 1905 .append(locale) 1906 .append('\t') 1907 .append(type) 1908 .append('\t') 1909 .append(rangeLine); 1910 if (countsFound == null || !countsFound.contains(count)) { 1911 countsWithNoSamples.add(count); 1912 } else { 1913 samplesToCounts.put(pattern, count); 1914 logln(locale + "\t" + type + "\t" + count + "\t" + pattern); 1915 } 1916 } 1917 if (!countsWithNoSamples.isEmpty()) { 1918 errOrLog( 1919 needsCoverage, 1920 locale + "\t" + type + "\t missing samples:\t" + countsWithNoSamples, 1921 "cldrbug:7075", 1922 "Missing ordinal minimal pairs"); 1923 errOrLog(needsCoverage2, failureCases.toString()); 1924 } 1925 for (Entry<String, Set<Count>> entry : samplesToCounts.keyValuesSet()) { 1926 if (entry.getValue().size() != 1) { 1927 errOrLog( 1928 needsCoverage, 1929 locale 1930 + "\t" 1931 + type 1932 + "\t duplicate samples: " 1933 + entry.getValue() 1934 + " => «" 1935 + entry.getKey() 1936 + "»", 1937 "cldrbug:7119", 1938 "Some duplicate minimal pairs"); 1939 errOrLog(needsCoverage2, failureCases.toString()); 1940 } 1941 } 1942 } 1943 } 1944 } 1945 errOrLog( CoverageIssue causeError, String message, String logTicket, String logComment)1946 public void errOrLog( 1947 CoverageIssue causeError, String message, String logTicket, String logComment) { 1948 switch (causeError) { 1949 case error: 1950 if (logTicket == null) { 1951 errln(message); 1952 break; 1953 } 1954 logKnownIssue(logTicket, logComment); 1955 // fall through 1956 case warn: 1957 warnln(message); 1958 break; 1959 case log: 1960 logln(message); 1961 break; 1962 } 1963 } 1964 errOrLog(CoverageIssue causeError, String message)1965 public void errOrLog(CoverageIssue causeError, String message) { 1966 errOrLog(causeError, message, null, null); 1967 } 1968 TestNumberingSystemDigits()1969 public void TestNumberingSystemDigits() { 1970 String[] knownExceptions = { 1971 "hanidec", // hanidec is not in codepoint order. 1972 }; 1973 List<String> knownExceptionList = Arrays.asList(knownExceptions); 1974 for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) { 1975 if (knownExceptionList.contains(ns)) { 1976 continue; 1977 } 1978 String digits = SUPPLEMENTAL.getDigits(ns); 1979 int previousChar = 0; 1980 int ch; 1981 1982 for (int i = 0; i < digits.length(); i += Character.charCount(ch)) { 1983 ch = digits.codePointAt(i); 1984 if (i > 0 && ch != previousChar + 1) { 1985 errln( 1986 "Digits for numbering system " 1987 + ns 1988 + " are not in code point order. Previous char = U+" 1989 + Utility.hex(previousChar, 4) 1990 + " Current char = U+" 1991 + Utility.hex(ch, 4)); 1992 break; 1993 } 1994 previousChar = ch; 1995 } 1996 } 1997 } 1998 TestNumberingSystemDigitCompleteness()1999 public void TestNumberingSystemDigitCompleteness() { 2000 List<Integer> unicodeDigits = new ArrayList<>(); 2001 for (int cp = UCharacter.MIN_CODE_POINT; cp <= UCharacter.MAX_CODE_POINT; cp++) { 2002 if (UCharacter.getType(cp) == UCharacterEnums.ECharacterCategory.DECIMAL_DIGIT_NUMBER) { 2003 unicodeDigits.add(cp); 2004 } 2005 } 2006 2007 for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) { 2008 String digits = SUPPLEMENTAL.getDigits(ns); 2009 int ch; 2010 2011 for (int i = 0; i < digits.length(); i += Character.charCount(ch)) { 2012 ch = digits.codePointAt(i); 2013 unicodeDigits.remove(Integer.valueOf(ch)); 2014 } 2015 } 2016 2017 if (unicodeDigits.size() > 0) { 2018 for (Integer i : unicodeDigits) { 2019 errln( 2020 "Unicode digit: " 2021 + UCharacter.getName(i) 2022 + " is not in any numbering system. Script = " 2023 + UScript.getShortName(UScript.getScript(i))); 2024 } 2025 } 2026 } 2027 TestMetazones()2028 public void TestMetazones() { 2029 Date goalMin = new Date(70, 0, 1); 2030 Date goalMax = new Date(300, 0, 2); 2031 ImmutableSet<String> knownTZWithoutMetazone = 2032 ImmutableSet.of("America/Montreal", "Asia/Barnaul", "Asia/Tomsk", "Europe/Kirov"); 2033 for (String timezoneRaw : TimeZone.getAvailableIDs()) { 2034 String timezone = TimeZone.getCanonicalID(timezoneRaw); 2035 String region = TimeZone.getRegion(timezone); 2036 if (!timezone.equals(timezoneRaw) || "001".equals(region)) { 2037 continue; 2038 } 2039 if (knownTZWithoutMetazone.contains(timezone)) { 2040 continue; 2041 } 2042 final Set<MetaZoneRange> ranges = SUPPLEMENTAL.getMetaZoneRanges(timezone); 2043 2044 if (assertNotNull("metazones for " + timezone, ranges)) { 2045 long min = Long.MAX_VALUE; 2046 long max = Long.MIN_VALUE; 2047 for (MetaZoneRange range : ranges) { 2048 if (range.dateRange.from != DateRange.START_OF_TIME) { 2049 min = Math.min(min, range.dateRange.from); 2050 } 2051 if (range.dateRange.to != DateRange.END_OF_TIME) { 2052 max = Math.max(max, range.dateRange.to); 2053 } 2054 } 2055 assertRelation( 2056 timezone + " has metazone before 1970?", true, goalMin, LEQ, new Date(min)); 2057 assertRelation( 2058 timezone + " has metazone until way in the future?", 2059 true, 2060 goalMax, 2061 GEQ, 2062 new Date(max)); 2063 } 2064 } 2065 com.google.common.collect.Interners i; 2066 } 2067 Test9924()2068 public void Test9924() { 2069 Boolean b = org.unicode.cldr.unittest.TestSupplementalInfo.LOCALES_FIXED; 2070 PopulationData zhCNData = 2071 SUPPLEMENTAL.getLanguageAndTerritoryPopulationData(b ? "zh" : "zh_Hans", "CN"); 2072 PopulationData yueCNData = 2073 SUPPLEMENTAL.getLanguageAndTerritoryPopulationData("yue_Hans", "CN"); 2074 assertTrue("yue*10 < zh", yueCNData.getPopulation() < zhCNData.getPopulation()); 2075 } 2076 Test10765()2077 public void Test10765() { // 2078 Set<String> surveyToolLanguages = 2079 SUPPLEMENTAL.getCLDRLanguageCodes(); // codes that show up in Survey Tool 2080 Set<String> mainLanguages = new TreeSet<>(); 2081 LanguageTagParser ltp = new LanguageTagParser(); 2082 for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) { 2083 if (StandardCodes.isLocaleAtLeastBasic(locale)) { 2084 mainLanguages.add(ltp.set(locale).getLanguage()); 2085 } 2086 } 2087 // add special codes we want to see anyway 2088 mainLanguages.add(LocaleNames.UND); 2089 mainLanguages.add(LocaleNames.MUL); 2090 mainLanguages.add(LocaleNames.ZXX); 2091 2092 if (!mainLanguages.containsAll(surveyToolLanguages)) { 2093 CoverageLevel2 coverageLevel = 2094 CoverageLevel2.getInstance(SUPPLEMENTAL, "ja"); // pick "neutral" locale 2095 Set<String> temp = new TreeSet<>(surveyToolLanguages); 2096 temp.removeAll(mainLanguages); 2097 Set<String> modern = new TreeSet<>(); 2098 Set<String> comprehensive = new TreeSet<>(); 2099 for (String lang : temp) { 2100 Level level = coverageLevel.getLevel(CLDRFile.getKey(CLDRFile.LANGUAGE_NAME, lang)); 2101 if (level.compareTo(Level.MODERN) <= 0) { 2102 modern.add(lang); 2103 } else { 2104 comprehensive.add(lang); 2105 } 2106 } 2107 warnln( 2108 "«Modern» Languages in <variable id='$language' type='choice'> that aren't in main/* : " 2109 + getNames(modern)); 2110 logln( 2111 "«Comprehensive» Languages in <variable id='$language' type='choice'> that aren't in main/* : " 2112 + getNames(comprehensive)); 2113 } 2114 if (!surveyToolLanguages.containsAll(mainLanguages)) { 2115 mainLanguages.removeAll(surveyToolLanguages); 2116 // TODO: See https://unicode-org.atlassian.net/browse/CLDR-14974 2117 // Currently there is a requirement that all locales in main/* are in 2118 // attributeValueValidity.xml 2119 assertEquals( 2120 "main/* languages missing from <variable id='$language'/> in attributeValueValidity.xml", 2121 Collections.EMPTY_SET, 2122 mainLanguages); 2123 } 2124 } 2125 getNames(Set<String> temp)2126 private Set<String> getNames(Set<String> temp) { 2127 Set<String> tempNames = new TreeSet<>(); 2128 for (String langCode : temp) { 2129 tempNames.add( 2130 testInfo.getEnglish().getName(CLDRFile.LANGUAGE_NAME, langCode) 2131 + " (" 2132 + langCode 2133 + ")"); 2134 } 2135 return tempNames; 2136 } 2137 TestGrammarInfo()2138 public void TestGrammarInfo() { 2139 final Logger logger = getLogger(); 2140 Multimap<String, String> allValues = TreeMultimap.create(); 2141 for (String locale : SUPPLEMENTAL.hasGrammarInfo()) { 2142 if (locale.contentEquals("tr")) { 2143 int debug = 0; 2144 } 2145 GrammarInfo grammarInfo = SUPPLEMENTAL.getGrammarInfo(locale); 2146 for (GrammaticalTarget target : GrammaticalTarget.values()) { 2147 for (GrammaticalFeature feature : GrammaticalFeature.values()) { 2148 Collection<String> general = 2149 grammarInfo.get(target, feature, GrammaticalScope.general); 2150 for (GrammaticalScope scope : GrammaticalScope.values()) { 2151 Collection<String> units = grammarInfo.get(target, feature, scope); 2152 allValues.putAll(target + "/" + feature + "/" + scope, units); 2153 if (scope != GrammaticalScope.general) { 2154 assertTrue( 2155 general + " > " + scope + " " + units, 2156 general.containsAll(units)); 2157 } 2158 } 2159 } 2160 } 2161 logger.fine(grammarInfo.toString("\n" + locale + "\t")); 2162 } 2163 if (logger.isLoggable(java.util.logging.Level.FINE)) { // if level is at least FINE 2164 logger.fine(""); 2165 for (Entry<String, Collection<String>> entry : allValues.asMap().entrySet()) { 2166 logger.fine(entry.getKey() + "\t" + Joiner.on(", ").join(entry.getValue())); 2167 } 2168 } 2169 } 2170 } 2171