1 package org.unicode.cldr.unittest; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.collect.HashMultimap; 5 import com.google.common.collect.ImmutableSet; 6 import com.google.common.collect.LinkedListMultimap; 7 import com.google.common.collect.Multimap; 8 import com.google.common.collect.TreeMultimap; 9 import com.ibm.icu.impl.Relation; 10 import com.ibm.icu.impl.Row; 11 import com.ibm.icu.impl.Row.R2; 12 import java.io.File; 13 import java.util.ArrayList; 14 import java.util.Arrays; 15 import java.util.Collection; 16 import java.util.EnumMap; 17 import java.util.EnumSet; 18 import java.util.HashMap; 19 import java.util.HashSet; 20 import java.util.LinkedHashMap; 21 import java.util.LinkedHashSet; 22 import java.util.List; 23 import java.util.Map; 24 import java.util.Map.Entry; 25 import java.util.Set; 26 import java.util.TreeMap; 27 import java.util.TreeSet; 28 import java.util.regex.Matcher; 29 import java.util.stream.Collectors; 30 import org.unicode.cldr.test.CoverageLevel2; 31 import org.unicode.cldr.test.ExampleGenerator; 32 import org.unicode.cldr.util.CLDRConfig; 33 import org.unicode.cldr.util.CLDRFile; 34 import org.unicode.cldr.util.CLDRFile.Status; 35 import org.unicode.cldr.util.CLDRLocale; 36 import org.unicode.cldr.util.CLDRPaths; 37 import org.unicode.cldr.util.CLDRURLS; 38 import org.unicode.cldr.util.CldrUtility; 39 import org.unicode.cldr.util.Containment; 40 import org.unicode.cldr.util.Counter; 41 import org.unicode.cldr.util.DtdData; 42 import org.unicode.cldr.util.DtdType; 43 import org.unicode.cldr.util.Emoji; 44 import org.unicode.cldr.util.Factory; 45 import org.unicode.cldr.util.GrammarInfo; 46 import org.unicode.cldr.util.GrammarInfo.CaseValues; 47 import org.unicode.cldr.util.GrammarInfo.GenderValues; 48 import org.unicode.cldr.util.Iso3166Data; 49 import org.unicode.cldr.util.LanguageTagParser; 50 import org.unicode.cldr.util.Level; 51 import org.unicode.cldr.util.Organization; 52 import org.unicode.cldr.util.Pair; 53 import org.unicode.cldr.util.PathDescription; 54 import org.unicode.cldr.util.PathHeader; 55 import org.unicode.cldr.util.PathHeader.PageId; 56 import org.unicode.cldr.util.PathHeader.SectionId; 57 import org.unicode.cldr.util.PathHeader.SurveyToolStatus; 58 import org.unicode.cldr.util.PathHeader.Width; 59 import org.unicode.cldr.util.PathStarrer; 60 import org.unicode.cldr.util.PatternCache; 61 import org.unicode.cldr.util.PatternPlaceholders; 62 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderInfo; 63 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderStatus; 64 import org.unicode.cldr.util.StandardCodes; 65 import org.unicode.cldr.util.SupplementalDataInfo; 66 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 67 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 68 import org.unicode.cldr.util.SupplementalDataInfo.PluralType; 69 import org.unicode.cldr.util.With; 70 import org.unicode.cldr.util.XMLFileReader; 71 import org.unicode.cldr.util.XPathParts; 72 73 public class TestPathHeader extends TestFmwkPlus { 74 private static final DtdType DEBUG_DTD_TYPE = null; // DtdType.supplementalData; 75 private static final String COMMON_DIR = CLDRPaths.BASE_DIRECTORY + "common/"; 76 private static final boolean DEBUG = false; 77 main(String[] args)78 public static void main(String[] args) { 79 new TestPathHeader().run(args); 80 } 81 82 static final CLDRConfig info = CLDRConfig.getInstance(); 83 static final Factory factory = info.getCommonAndSeedAndMainAndAnnotationsFactory(); 84 static final CLDRFile english = factory.make("en", true); 85 static final SupplementalDataInfo supplemental = info.getSupplementalDataInfo(); 86 static PathHeader.Factory pathHeaderFactory = PathHeader.getFactory(english); 87 private EnumSet<PageId> badZonePages = EnumSet.of(PageId.UnknownT); 88 tempTestAnnotation()89 public void tempTestAnnotation() { 90 // NEW: <annotation cp="">face | grin</annotation> 91 // <annotation cp="" type="tts">grinning face</annotation> 92 93 final String path1 = "//ldml/annotations/annotation[@cp=\"\"]"; 94 PathHeader ph1 = pathHeaderFactory.fromPath(path1); 95 logln(ph1.toString() + "\t" + path1); 96 final String path2 = "//ldml/annotations/annotation[@cp=\"\"][@type=\"tts\"]"; 97 PathHeader ph2 = pathHeaderFactory.fromPath(path2); 98 logln(ph2.toString() + "\t" + path2); 99 final String path3 = "//ldml/annotations/annotation[@cp=\"\"]"; 100 PathHeader ph3 = pathHeaderFactory.fromPath(path2); 101 logln(ph3.toString() + "\t" + path3); 102 103 assertNotEquals("pathheader", ph1, ph2); 104 assertNotEquals("pathheader", ph1.toString(), ph2.toString()); 105 assertRelation("pathheader", true, ph1, TestFmwkPlus.LEQ, ph3); 106 assertRelation("pathheader", true, ph3, TestFmwkPlus.LEQ, ph2); 107 } 108 109 static final String[] MIN_LOCALES = { 110 "root", "en", "de", "ru", "ko" 111 }; // choose locales with range of case/gender structures 112 tempTestCompletenessLdmlDtd()113 public void tempTestCompletenessLdmlDtd() { 114 // List<String> failures = null; 115 pathHeaderFactory.clearCache(); 116 PathChecker pathChecker = new PathChecker(); 117 for (String directory : DtdType.ldml.directories) { 118 Factory factory2 = CLDRConfig.getInstance().getMainAndAnnotationsFactory(); 119 Set<String> source = factory2.getAvailable(); 120 for (String file : getFilesToTest(source, MIN_LOCALES)) { 121 if (DEBUG) warnln(" TestCompletenessLdmlDtd: " + directory + ", " + file); 122 DtdData dtdData = null; 123 CLDRFile cldrFile = factory2.make(file, true); 124 for (String path : cldrFile.fullIterable()) { 125 pathChecker.checkPathHeader(cldrFile.getDtdData(), path); 126 } 127 } 128 } 129 Set<String> missing = pathHeaderFactory.getUnmatchedRegexes(); 130 if (missing.size() != 0) { 131 for (String e : missing) { 132 errln("Path Regex never matched:\t" + e); 133 } 134 } 135 if (!pathChecker.badHeaders.isEmpty()) { 136 System.out.println("For help with DTD updates: " + CLDRURLS.CLDR_UPDATINGDTD_URL); 137 } 138 } 139 getFilesToTest(Collection<String> source, String... doFirst)140 private Collection<String> getFilesToTest(Collection<String> source, String... doFirst) { 141 LinkedHashSet<String> files = new LinkedHashSet<>(Arrays.asList(doFirst)); 142 files.retainAll(source); // put first 143 files.addAll(new HashSet<>(source)); // now add others semi-randomly 144 int max = Math.min(30, files.size()); 145 if (getInclusion() == 10 || files.size() <= max) { 146 return files; 147 } 148 ArrayList<String> shortFiles = new ArrayList<>(files); 149 if (getInclusion() > 5) { 150 max += (files.size() - 30) * (getInclusion() - 5) / 10; // use proportional amount 151 } 152 return shortFiles.subList(0, max); 153 } 154 TestCompleteness()155 public void TestCompleteness() { 156 PathHeader.Factory pathHeaderFactory2 = PathHeader.getFactory(english); 157 // List<String> failures = null; 158 pathHeaderFactory2.clearCache(); 159 Multimap<PathHeader.PageId, PathHeader.SectionId> pageUniqueness = TreeMultimap.create(); 160 Multimap<String, Pair<PathHeader.SectionId, PathHeader.PageId>> headerUniqueness = 161 TreeMultimap.create(); 162 Set<String> toTest; 163 switch (getInclusion()) { 164 default: 165 toTest = StandardCodes.make().getLocaleCoverageLocales(Organization.cldr); 166 break; 167 case 10: 168 toTest = factory.getAvailable(); 169 break; 170 } 171 toTest = ImmutableSet.<String>builder().add("en").addAll(toTest).build(); 172 Set<String> seenPaths = new HashSet<>(); 173 Set<String> localSeenPaths = new TreeSet<>(); 174 for (String locale : toTest) { 175 localSeenPaths.clear(); 176 for (String p : factory.make(locale, true).fullIterable()) { 177 if (p.startsWith("//ldml/identity/")) { 178 continue; 179 } 180 if (seenPaths.contains(p)) { 181 continue; 182 } 183 seenPaths.add(p); 184 localSeenPaths.add(p); 185 // if (p.contains("symbol[@alt") && failures == null) { 186 // PathHeader result = pathHeaderFactory2.fromPath(p, failures = new 187 // ArrayList<String>()); 188 // logln("Matching " + p + ": " + result + "\t" + 189 // result.getSurveyToolStatus()); 190 // for (String failure : failures) { 191 // logln("\t" + failure); 192 // } 193 // } 194 PathHeader ph; 195 try { 196 ph = pathHeaderFactory2.fromPath(p); 197 } catch (Exception e1) { 198 try { 199 ph = pathHeaderFactory2.fromPath(p); 200 } catch (Exception e2) { 201 throw new IllegalArgumentException(locale + ":\t" + p, e2); 202 } 203 } 204 if (ph == null) { 205 errln("Failed to create path from: " + p); 206 continue; 207 } 208 final SectionId sectionId = ph.getSectionId(); 209 if (sectionId != SectionId.Special) { 210 pageUniqueness.put(ph.getPageId(), sectionId); 211 headerUniqueness.put(ph.getHeader(), new Pair<>(sectionId, ph.getPageId())); 212 } 213 } 214 if (!localSeenPaths.isEmpty()) { 215 logln(locale + ": checked " + localSeenPaths.size() + " new paths"); 216 } 217 } 218 Set<String> missing = pathHeaderFactory2.getUnmatchedRegexes(); 219 if (missing.size() != 0) { 220 for (String e : missing) { 221 if (e.contains("//ldml/")) { 222 if (e.contains("//ldml/rbnf/") 223 || e.contains("//ldml/segmentations/") 224 || e.contains("//ldml/collations/")) { 225 continue; 226 } 227 logln("Path Regex never matched:\t" + e); 228 } 229 } 230 } 231 232 for (Entry<PageId, Collection<SectionId>> e : pageUniqueness.asMap().entrySet()) { 233 Collection<SectionId> values = e.getValue(); 234 if (values.size() != 1) { 235 warnln("Duplicate page in section: " + CldrUtility.toString(e)); 236 } 237 } 238 239 for (Entry<String, Collection<Pair<SectionId, PageId>>> e : 240 headerUniqueness.asMap().entrySet()) { 241 Collection<Pair<SectionId, PageId>> values = e.getValue(); 242 if (values.size() != 1) { 243 warnln("Duplicate header in (section,page): " + CldrUtility.toString(e)); 244 } 245 } 246 } 247 Test6170()248 public void Test6170() { 249 String p1 = 250 "//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"speed-kilometer-per-hour\"]/unitPattern[@count=\"other\"]"; 251 String p2 = 252 "//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"area-square-meter\"]/unitPattern[@count=\"other\"]"; 253 PathHeader ph1 = pathHeaderFactory.fromPath(p1); 254 PathHeader ph2 = pathHeaderFactory.fromPath(p2); 255 int comp12 = ph1.compareTo(ph2); 256 int comp21 = ph2.compareTo(ph1); 257 assertEquals("comp ph", comp12, -comp21); 258 } 259 TestVariant()260 public void TestVariant() { 261 PathHeader p1 = 262 pathHeaderFactory.fromPath( 263 "//ldml/localeDisplayNames/languages/language[@type=\"ug\"][@alt=\"variant\"]"); 264 PathHeader p2 = 265 pathHeaderFactory.fromPath( 266 "//ldml/localeDisplayNames/languages/language[@type=\"ug\"]"); 267 assertNotEquals("variants", p1, p2); 268 assertNotEquals("variants", p1.toString(), p2.toString()); 269 // Code Lists Languages Arabic Script ug-variant 270 } 271 Test4587()272 public void Test4587() { 273 String test = 274 "//ldml/dates/timeZoneNames/metazone[@type=\"Pacific/Wallis\"]/short/standard"; 275 PathHeader ph = pathHeaderFactory.fromPath(test); 276 if (ph == null) { 277 errln("Failure with " + test); 278 } else { 279 logln(ph + "\t" + test); 280 } 281 } 282 TestMiscPatterns()283 public void TestMiscPatterns() { 284 String test = 285 "//ldml/numbers/miscPatterns[@numberSystem=\"arab\"]/pattern[@type=\"atLeast\"]"; 286 PathHeader ph = pathHeaderFactory.fromPath(test); 287 assertNotNull("MiscPatterns path not found", ph); 288 if (false) System.out.println(english.getStringValue(test)); 289 } 290 TestPluralOrder()291 public void TestPluralOrder() { 292 Set<PathHeader> sorted = new TreeSet<>(); 293 for (String locale : new String[] {"ru", "ar", "ja"}) { 294 sorted.clear(); 295 CLDRFile cldrFile = info.getCLDRFile(locale, true); 296 CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(locale); 297 for (String path : cldrFile.fullIterable()) { 298 if (!path.contains("@count")) { 299 continue; 300 } 301 Level level = coverageLevel.getLevel(path); 302 if (Level.MODERN.compareTo(level) < 0) { 303 continue; 304 } 305 PathHeader p = pathHeaderFactory.fromPath(path); 306 sorted.add(p); 307 } 308 for (PathHeader p : sorted) { 309 logln(locale + "\t" + p + "\t" + p.getOriginalPath()); 310 } 311 } 312 } 313 314 static final String APPEND_TIMEZONE = 315 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/appendItems/appendItem[@request=\"Timezone\"]"; 316 static final String APPEND_TIMEZONE_END = 317 "/dateTimeFormats/appendItems/appendItem[@request=\"Timezone\"]"; 318 static final String BEFORE_PH = 319 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"ms\"]"; 320 static final String AFTER_PH = 321 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"d\"]/greatestDifference[@id=\"d\"]"; 322 TestAppendTimezone()323 public void TestAppendTimezone() { 324 CLDRFile cldrFile = info.getEnglish(); 325 CoverageLevel2 coverageLevel = CoverageLevel2.getInstance("en"); 326 assertEquals( 327 "appendItem:Timezone", Level.MODERATE, coverageLevel.getLevel(APPEND_TIMEZONE)); 328 329 PathHeader ph = pathHeaderFactory.fromPath(APPEND_TIMEZONE); 330 assertEquals("appendItem:Timezone pathheader", "Timezone", ph.getCode()); 331 // check that they are in the right place (they weren't before!) 332 PathHeader phBefore = pathHeaderFactory.fromPath(BEFORE_PH); 333 PathHeader phAfter = pathHeaderFactory.fromPath(AFTER_PH); 334 assertTrue(phBefore, LEQ, ph); 335 assertTrue(ph, LEQ, phAfter); 336 337 PathDescription pathDescription = 338 new PathDescription( 339 supplemental, english, null, null, PathDescription.ErrorHandling.CONTINUE); 340 String description = pathDescription.getDescription(APPEND_TIMEZONE, "tempvalue", null); 341 assertTrue("appendItem:Timezone pathDescription", description.contains("“Timezone”")); 342 343 PatternPlaceholders patternPlaceholders = PatternPlaceholders.getInstance(); 344 PlaceholderStatus status = patternPlaceholders.getStatus(APPEND_TIMEZONE); 345 assertEquals("appendItem:Timezone placeholders", PlaceholderStatus.REQUIRED, status); 346 347 Map<String, PlaceholderInfo> placeholderInfo = patternPlaceholders.get(APPEND_TIMEZONE); 348 PlaceholderInfo placeholderInfo2 = placeholderInfo.get("{1}"); 349 if (assertNotNull("appendItem:Timezone placeholders", placeholderInfo2)) { 350 assertEquals( 351 "appendItem:Timezone placeholders", 352 "APPEND_FIELD_FORMAT", 353 placeholderInfo2.name); 354 assertEquals( 355 "appendItem:Timezone placeholders", "Pacific Time", placeholderInfo2.example); 356 } 357 ExampleGenerator eg = new ExampleGenerator(cldrFile, cldrFile); 358 String example = 359 eg.getExampleHtml(APPEND_TIMEZONE, cldrFile.getStringValue(APPEND_TIMEZONE)); 360 String result = ExampleGenerator.simplify(example, false); 361 assertEquals("", "〖❬6:25:59 PM❭ ❬GMT❭〗", result); 362 } 363 TestOptional()364 public void TestOptional() { 365 if (true) return; 366 Map<PathHeader, String> sorted = new TreeMap<>(); 367 for (String locale : new String[] {"af"}) { 368 sorted.clear(); 369 CLDRFile cldrFile = info.getCLDRFile(locale, true); 370 CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(locale); 371 for (String path : cldrFile.fullIterable()) { 372 Level level = coverageLevel.getLevel(path); 373 if (supplemental.isDeprecated(DtdType.ldml, path)) { 374 continue; 375 } 376 377 if (Level.COMPREHENSIVE.compareTo(level) != 0) { 378 continue; 379 } 380 381 PathHeader ph = pathHeaderFactory.fromPath(path); 382 if (ph == null || ph.shouldHide()) { 383 continue; 384 } 385 final SurveyToolStatus status = ph.getSurveyToolStatus(); 386 sorted.put(ph, locale + "\t" + status + "\t" + ph + "\t" + ph.getOriginalPath()); 387 } 388 Set<String> codes = new LinkedHashSet<>(); 389 PathHeader old = null; 390 String line = null; 391 for (Entry<PathHeader, String> s : sorted.entrySet()) { 392 PathHeader p = s.getKey(); 393 String v = s.getValue(); 394 if (old == null) { 395 line = v; 396 codes.add(p.getCode()); 397 } else if (p.getSectionId() == old.getSectionId() 398 && p.getPageId() == old.getPageId() 399 && p.getHeader().equals(old.getHeader())) { 400 codes.add(p.getCode()); 401 } else { 402 logln(line + "\t" + codes.toString()); 403 codes.clear(); 404 line = v; 405 codes.add(p.getCode()); 406 } 407 old = p; 408 } 409 logln(line + "\t" + codes.toString()); 410 } 411 } 412 TestPluralCanonicals()413 public void TestPluralCanonicals() { 414 Relation<String, String> data = 415 Relation.of(new LinkedHashMap<String, Set<String>>(), TreeSet.class); 416 for (String locale : factory.getAvailable()) { 417 if (locale.contains("_")) { 418 continue; 419 } 420 PluralInfo info = supplemental.getPlurals(PluralType.cardinal, locale); 421 Set<String> keywords = info.getCanonicalKeywords(); 422 data.put(keywords.toString(), locale); 423 } 424 for (Entry<String, Set<String>> entry : data.keyValuesSet()) { 425 logln(entry.getKey() + "\t" + entry.getValue()); 426 } 427 } 428 TestPluralPaths()429 public void TestPluralPaths() { 430 // do the following line once, when the file is opened 431 Set<String> filePaths = pathHeaderFactory.pathsForFile(english); 432 433 // check that English doesn't contain few or many 434 verifyContains(PageId.Duration, filePaths, "few", false); 435 verifyContains(PageId.C_NAmerica, filePaths, "many", false); 436 verifyContains(PageId.C_SAmerica, filePaths, "many", false); 437 verifyContains(PageId.C_NWEurope, filePaths, "many", false); 438 verifyContains(PageId.C_SEEurope, filePaths, "many", false); 439 verifyContains(PageId.C_NAfrica, filePaths, "many", false); 440 verifyContains(PageId.C_WAfrica, filePaths, "many", false); 441 verifyContains(PageId.C_SAfrica, filePaths, "many", false); 442 verifyContains(PageId.C_EAfrica, filePaths, "many", false); 443 verifyContains(PageId.C_CAsia, filePaths, "many", false); 444 verifyContains(PageId.C_WAsia, filePaths, "many", false); 445 verifyContains(PageId.C_SEAsia, filePaths, "many", false); 446 verifyContains(PageId.C_Oceania, filePaths, "many", false); 447 verifyContains(PageId.C_Unknown, filePaths, "many", false); 448 449 // check that Arabic does contain few and many 450 filePaths = pathHeaderFactory.pathsForFile(info.getCLDRFile("ar", true)); 451 452 verifyContains(PageId.Duration, filePaths, "few", true); 453 verifyContains(PageId.C_NAmerica, filePaths, "many", true); 454 verifyContains(PageId.C_SAmerica, filePaths, "many", true); 455 verifyContains(PageId.C_NWEurope, filePaths, "many", true); 456 verifyContains(PageId.C_SEEurope, filePaths, "many", true); 457 verifyContains(PageId.C_NAfrica, filePaths, "many", true); 458 verifyContains(PageId.C_WAfrica, filePaths, "many", true); 459 verifyContains(PageId.C_SAfrica, filePaths, "many", true); 460 verifyContains(PageId.C_EAfrica, filePaths, "many", true); 461 verifyContains(PageId.C_CAsia, filePaths, "many", true); 462 verifyContains(PageId.C_WAsia, filePaths, "many", true); 463 verifyContains(PageId.C_SEAsia, filePaths, "many", true); 464 verifyContains(PageId.C_Oceania, filePaths, "many", true); 465 verifyContains(PageId.C_Unknown, filePaths, "many", true); 466 } 467 TestCoverage()468 public void TestCoverage() { 469 Map<Row.R2<SectionId, PageId>, Counter<Level>> data = new TreeMap<>(); 470 CLDRFile cldrFile = english; 471 for (String path : cldrFile.fullIterable()) { 472 if (supplemental.isDeprecated(DtdType.ldml, path)) { 473 errln("Deprecated path in English: " + path); 474 continue; 475 } 476 Level level = supplemental.getCoverageLevel(path, cldrFile.getLocaleID()); 477 PathHeader p = pathHeaderFactory.fromPath(path); 478 SurveyToolStatus status = p.getSurveyToolStatus(); 479 480 boolean hideCoverage = level == Level.COMPREHENSIVE; 481 boolean hidePathHeader = p.shouldHide(); 482 if (hidePathHeader != hideCoverage) { 483 String message = "PathHeader: " + status + ", Coverage: " + level + ": " + path; 484 if (hidePathHeader && !hideCoverage) { 485 errln(message); 486 } else if (!hidePathHeader && hideCoverage) { 487 logln(message); 488 } 489 } 490 final R2<SectionId, PageId> key = Row.of(p.getSectionId(), p.getPageId()); 491 Counter<Level> counter = data.get(key); 492 if (counter == null) { 493 data.put(key, counter = new Counter<>()); 494 } 495 counter.add(level, 1); 496 } 497 StringBuffer b = new StringBuffer("\t"); 498 for (Level level : Level.values()) { 499 b.append("\t" + level); 500 } 501 logln(b.toString()); 502 for (Entry<R2<SectionId, PageId>, Counter<Level>> entry : data.entrySet()) { 503 b.setLength(0); 504 b.append(entry.getKey().get0() + "\t" + entry.getKey().get1()); 505 Counter<Level> counter = entry.getValue(); 506 long total = 0; 507 for (Level level : Level.values()) { 508 total += counter.getCount(level); 509 b.append("\t" + total); 510 } 511 logln(b.toString()); 512 } 513 } 514 Test00AFile()515 public void Test00AFile() { 516 final String localeId = "en"; 517 Counter<Level> counter = new Counter<>(); 518 Map<String, PathHeader> uniqueness = new HashMap<>(); 519 Set<String> alreadySeen = new HashSet<>(); 520 check(localeId, true, uniqueness, alreadySeen); 521 // check paths 522 for (Entry<SectionId, Set<PageId>> sectionAndPages : 523 PathHeader.Factory.getSectionIdsToPageIds().keyValuesSet()) { 524 final SectionId section = sectionAndPages.getKey(); 525 if (section == SectionId.Supplemental || section == SectionId.BCP47) { 526 continue; 527 } 528 logln(section.toString()); 529 for (PageId page : sectionAndPages.getValue()) { 530 final Set<String> cachedPaths = PathHeader.Factory.getCachedPaths(section, page); 531 if (cachedPaths == null) { 532 if (!badZonePages.contains(page) && page != PageId.Unknown) { 533 errln("Null pages for: " + section + "\t" + page); 534 } 535 } else if (section == SectionId.Special && page == PageId.Unknown) { 536 // skip 537 } else if (section == SectionId.Timezones && page == PageId.UnknownT) { 538 // skip 539 } else if (section == SectionId.Misc && page == PageId.Transforms) { 540 // skip 541 } else { 542 543 int count2 = cachedPaths.size(); 544 if (count2 == 0) { 545 warnln("Missing pages for: " + section + "\t" + page); 546 } else { 547 counter.clear(); 548 for (String s : cachedPaths) { 549 Level coverage = supplemental.getCoverageLevel(s, localeId); 550 counter.add(coverage, 1); 551 } 552 String countString = ""; 553 int total = 0; 554 for (Level item : Level.values()) { 555 long count = counter.get(item); 556 if (count != 0) { 557 if (!countString.isEmpty()) { 558 countString += ",\t+"; 559 } 560 total += count; 561 countString += item + "=" + total; 562 } 563 } 564 logln("\t" + page + "\t" + countString); 565 if (page.toString().startsWith("Unknown")) { 566 logln("\t\t" + cachedPaths); 567 } 568 } 569 } 570 } 571 } 572 } 573 TestMetazones()574 public void TestMetazones() { 575 576 CLDRFile nativeFile = info.getEnglish(); 577 Set<PathHeader> pathHeaders = getPathHeaders(nativeFile); 578 // String oldPage = ""; 579 String oldHeader = ""; 580 for (PathHeader entry : pathHeaders) { 581 final String page = entry.getPage(); 582 // if (!oldPage.equals(page)) { 583 // logln(page); 584 // oldPage = page; 585 // } 586 String header = entry.getHeader(); 587 if (!oldHeader.equals(header)) { 588 logln(page + "\t" + header); 589 oldHeader = header; 590 } 591 } 592 } 593 getPathHeaders(CLDRFile nativeFile)594 public Set<PathHeader> getPathHeaders(CLDRFile nativeFile) { 595 Set<PathHeader> pathHeaders = new TreeSet<>(); 596 for (String path : nativeFile.fullIterable()) { 597 PathHeader p = pathHeaderFactory.fromPath(path); 598 pathHeaders.add(p); 599 } 600 return pathHeaders; 601 } 602 verifyContains( PageId pageId, Set<String> filePaths, String substring, boolean contains)603 public void verifyContains( 604 PageId pageId, Set<String> filePaths, String substring, boolean contains) { 605 String path; 606 path = findOneContaining(allPaths(pageId, filePaths), substring); 607 if (contains) { 608 if (path == null) { 609 errln("No path contains <" + substring + ">"); 610 } 611 } else { 612 if (path != null) { 613 errln("Path contains <" + substring + ">\t" + path); 614 } 615 } 616 } 617 findOneContaining(Collection<String> allPaths, String substring)618 private String findOneContaining(Collection<String> allPaths, String substring) { 619 for (String path : allPaths) { 620 if (path.contains(substring)) { 621 return path; 622 } 623 } 624 return null; 625 } 626 allPaths(PageId pageId, Set<String> filePaths)627 public Set<String> allPaths(PageId pageId, Set<String> filePaths) { 628 Set<String> result = PathHeader.Factory.getCachedPaths(pageId.getSectionId(), pageId); 629 result.retainAll(filePaths); 630 return result; 631 } 632 TestUniqueness()633 public void TestUniqueness() { 634 Factory factory2 = CLDRConfig.getInstance().getMainAndAnnotationsFactory(); 635 Set<String> source = factory2.getAvailable(); 636 for (String file : getFilesToTest(source, MIN_LOCALES)) { 637 CLDRFile nativeFile = factory2.make(file, true); 638 Map<PathHeader, String> headerToPath = new HashMap<>(); 639 Map<String, String> headerVisibleToPath = new HashMap<>(); 640 for (String path : nativeFile.fullIterable()) { 641 PathHeader p = pathHeaderFactory.fromPath(path); 642 if (p.getSectionId() == SectionId.Special) { 643 continue; 644 } 645 String old = headerToPath.get(p); 646 if (old == null) { 647 headerToPath.put(p, path); 648 } else if (!old.equals(path)) { 649 if (true) { // for debugging 650 pathHeaderFactory.clearCache(); 651 List<String> failuresOld = new ArrayList<>(); 652 pathHeaderFactory.fromPath(old, failuresOld); 653 List<String> failuresPath = new ArrayList<>(); 654 pathHeaderFactory.fromPath(path, failuresPath); 655 } 656 errln(file + " collision with path " + p + "\t" + old + "\t" + path); 657 } 658 final String visible = p.toString(); 659 old = headerVisibleToPath.get(visible); 660 if (old == null) { 661 headerVisibleToPath.put(visible, path); 662 } else if (!old.equals(path)) { 663 errln("Collision with path " + visible + "\t" + old + "\t" + path); 664 } 665 } 666 } 667 } 668 TestStatus()669 public void TestStatus() { 670 CLDRFile nativeFile = info.getEnglish(); 671 PathStarrer starrer = new PathStarrer(); 672 EnumMap<SurveyToolStatus, Relation<String, String>> info2 = 673 new EnumMap<>(SurveyToolStatus.class); 674 Set<String> nuked = new HashSet<>(); 675 Set<String> deprecatedStar = new HashSet<>(); 676 677 for (String path : nativeFile.fullIterable()) { 678 679 PathHeader p = pathHeaderFactory.fromPath(path); 680 final SurveyToolStatus surveyToolStatus = p.getSurveyToolStatus(); 681 682 if (p.getSectionId() == SectionId.Special 683 && surveyToolStatus == SurveyToolStatus.READ_WRITE) { 684 errln("SurveyToolStatus should not be " + surveyToolStatus + ": " + p); 685 } 686 687 String starred = starrer.set(path); 688 List<String> attr = starrer.getAttributes(); 689 if (surveyToolStatus != SurveyToolStatus.READ_WRITE) { 690 nuked.add(starred); 691 } 692 693 // check against deprecated 694 boolean isDeprecated = supplemental.isDeprecated(DtdType.ldml, path); 695 if (isDeprecated != (surveyToolStatus == SurveyToolStatus.DEPRECATED)) { 696 if (!deprecatedStar.contains(starred)) { 697 errln( 698 "Different from DtdData deprecated:\t" 699 + isDeprecated 700 + "\t" 701 + surveyToolStatus 702 + "\t" 703 + path); 704 deprecatedStar.add(starred); 705 } 706 } 707 708 Relation<String, String> data = info2.get(surveyToolStatus); 709 if (data == null) { 710 info2.put( 711 surveyToolStatus, 712 data = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class)); 713 } 714 data.put(starred, Joiner.on("|").join(attr)); 715 } 716 for (Entry<SurveyToolStatus, Relation<String, String>> entry : info2.entrySet()) { 717 final SurveyToolStatus status = entry.getKey(); 718 for (Entry<String, Set<String>> item : entry.getValue().keyValuesSet()) { 719 final String starred = item.getKey(); 720 if (status == SurveyToolStatus.READ_WRITE && !nuked.contains(starred)) { 721 continue; 722 } 723 logln(status + "\t" + starred + "\t" + item.getValue()); 724 } 725 } 726 } 727 TestPathsNotInEnglish()728 public void TestPathsNotInEnglish() { 729 Set<String> englishPaths = new HashSet<>(); 730 for (String path : english.fullIterable()) { 731 englishPaths.add(path); 732 } 733 Set<String> alreadySeen = new HashSet<>(englishPaths); 734 735 for (String locale : factory.getAvailable()) { 736 CLDRFile nativeFile = info.getCLDRFile(locale, false); 737 CoverageLevel2 coverageLevel2 = null; 738 for (String path : nativeFile.fullIterable()) { 739 if (alreadySeen.contains(path) || path.contains("@count")) { 740 continue; 741 } 742 if (coverageLevel2 == null) { 743 coverageLevel2 = CoverageLevel2.getInstance(locale); 744 } 745 Level level = coverageLevel2.getLevel(path); 746 if (Level.COMPREHENSIVE.compareTo(level) < 0) { 747 continue; 748 } 749 logln("Path not in English\t" + locale + "\t" + path); 750 alreadySeen.add(path); 751 } 752 } 753 } 754 TestPathDescriptionCompleteness()755 public void TestPathDescriptionCompleteness() { 756 PathDescription pathDescription = 757 new PathDescription( 758 supplemental, english, null, null, PathDescription.ErrorHandling.CONTINUE); 759 Matcher normal = 760 PatternCache.get("https://cldr.unicode.org/translation/[-a-zA-Z0-9_]").matcher(""); 761 // https://cldr.unicode.org/translation/plurals#TOC-Minimal-Pairs 762 Set<String> alreadySeen = new HashSet<>(); 763 PathStarrer starrer = new PathStarrer(); 764 765 checkPathDescriptionCompleteness( 766 pathDescription, 767 normal, 768 "//ldml/numbers/defaultNumberingSystem", 769 alreadySeen, 770 starrer); 771 for (PathHeader pathHeader : getPathHeaders(english)) { 772 if (pathHeader.shouldHide()) { 773 continue; 774 } 775 String path = pathHeader.getOriginalPath(); 776 checkPathDescriptionCompleteness(pathDescription, normal, path, alreadySeen, starrer); 777 } 778 } 779 checkPathDescriptionCompleteness( PathDescription pathDescription, Matcher normal, String path, Set<String> alreadySeen, PathStarrer starrer)780 public void checkPathDescriptionCompleteness( 781 PathDescription pathDescription, 782 Matcher normal, 783 String path, 784 Set<String> alreadySeen, 785 PathStarrer starrer) { 786 String value = english.getStringValue(path); 787 String description = pathDescription.getDescription(path, value, null); 788 String starred = starrer.set(path); 789 if (alreadySeen.contains(starred)) { 790 return; 791 } else if (description == null) { 792 errln("Path has no description:\t" + value + "\t" + path); 793 } else if (!description.contains("https://")) { 794 errln("Description has no URL:\t" + description + "\t" + value + "\t" + path); 795 } else if (!normal.reset(description).find()) { 796 errln( 797 "Description has generic URL, fix to be specific:\t" 798 + description 799 + "\t" 800 + value 801 + "\t" 802 + path); 803 } else if (description == PathDescription.MISSING_DESCRIPTION) { 804 errln("Fallback Description:\t" + value + "\t" + path); 805 } else { 806 return; 807 } 808 // Add if we had a problem, keeping us from being overwhelmed with 809 // errors. 810 alreadySeen.add(starred); 811 } 812 TestTerritoryOrder()813 public void TestTerritoryOrder() { 814 final Set<String> goodAvailableCodes = 815 StandardCodes.make().getGoodAvailableCodes("territory"); 816 Set<String> results = showContained("001", 0, new HashSet<>(goodAvailableCodes)); 817 results.remove("ZZ"); 818 results.removeAll(Iso3166Data.getRegionCodesNotForTranslation()); 819 for (String territory : results) { 820 String sub = Containment.getSubcontinent(territory); 821 String cont = Containment.getContinent(territory); 822 errln( 823 "Missing\t" 824 + getNameAndOrder(territory) 825 + "\t" 826 + getNameAndOrder(sub) 827 + "\t" 828 + getNameAndOrder(cont)); 829 } 830 } 831 showContained(String territory, int level, Set<String> soFar)832 private Set<String> showContained(String territory, int level, Set<String> soFar) { 833 if (!soFar.contains(territory)) { 834 return soFar; 835 } 836 soFar.remove(territory); 837 Set<String> contained = supplemental.getContained(territory); 838 if (contained == null) { 839 return soFar; 840 } 841 for (String containedItem : contained) { 842 logln( 843 level 844 + "\t" 845 + getNameAndOrder(territory) 846 + "\t" 847 + getNameAndOrder(containedItem)); 848 } 849 for (String containedItem : contained) { 850 showContained(containedItem, level + 1, soFar); 851 } 852 return soFar; 853 } 854 getNameAndOrder(String territory)855 private String getNameAndOrder(String territory) { 856 return territory 857 + "\t" 858 + english.getName(CLDRFile.TERRITORY_NAME, territory) 859 + "\t" 860 + Containment.getOrder(territory); 861 } 862 TestZCompleteness()863 public void TestZCompleteness() { 864 Map<String, PathHeader> uniqueness = new HashMap<>(); 865 Set<String> alreadySeen = new HashSet<>(); 866 LanguageTagParser ltp = new LanguageTagParser(); 867 int count = 0; 868 for (String locale : factory.getAvailable()) { 869 if (!ltp.set(locale).getRegion().isEmpty()) { 870 continue; 871 } 872 check(locale, false, uniqueness, alreadySeen); 873 ++count; 874 } 875 logln("Count:\t" + count); 876 } 877 check( String localeID, boolean resolved, Map<String, PathHeader> uniqueness, Set<String> alreadySeen)878 public void check( 879 String localeID, 880 boolean resolved, 881 Map<String, PathHeader> uniqueness, 882 Set<String> alreadySeen) { 883 CLDRFile nativeFile = info.getCLDRFile(localeID, resolved); 884 int count = 0; 885 for (String path : nativeFile) { 886 if (alreadySeen.contains(path)) { 887 continue; 888 } 889 alreadySeen.add(path); 890 final PathHeader pathHeader = pathHeaderFactory.fromPath(path); 891 ++count; 892 if (pathHeader == null) { 893 errln("Null pathheader for " + path); 894 } else { 895 String visible = pathHeader.toString(); 896 PathHeader old = uniqueness.get(visible); 897 if (pathHeader.getSectionId() == SectionId.Timezones) { 898 final PageId pageId = pathHeader.getPageId(); 899 if (badZonePages.contains(pageId) && !pathHeader.getCode().equals("Unknown")) { 900 String msg = "Bad page ID:\t" + pageId + "\t" + pathHeader + "\t" + path; 901 if (!logKnownIssue( 902 "cldrbug:7802", "ICU/CLDR time zone data sync problem - " + msg)) { 903 errln("Bad page ID:\t" + pageId + "\t" + pathHeader + "\t" + path); 904 } 905 } 906 } 907 if (old == null) { 908 if (pathHeader.getSection().equals("Special")) { 909 if (pathHeader.getSection().equals("Unknown")) { 910 errln( 911 "PathHeader has fallback: " 912 + visible 913 + "\t" 914 + pathHeader.getOriginalPath()); 915 // } else { 916 // logln("Special:\t" + visible + "\t" + 917 // pathHeader.getOriginalPath()); 918 } 919 } 920 uniqueness.put(visible, pathHeader); 921 } else if (!old.equals(pathHeader)) { 922 if (pathHeader.getSectionId() == SectionId.Special) { 923 logln( 924 "Special PathHeader not unique: " 925 + visible 926 + "\t" 927 + pathHeader.getOriginalPath() 928 + "\t" 929 + old.getOriginalPath()); 930 } else { 931 errln( 932 "PathHeader not unique: " 933 + visible 934 + "\t" 935 + pathHeader.getOriginalPath() 936 + "\t" 937 + old.getOriginalPath()); 938 } 939 } 940 } 941 } 942 logln(localeID + "\t" + count); 943 } 944 TestContainment()945 public void TestContainment() { 946 Map<String, Map<String, String>> metazoneToRegionToZone = 947 supplemental.getMetazoneToRegionToZone(); 948 Map<String, String> metazoneToContinent = supplemental.getMetazoneToContinentMap(); 949 for (String metazone : metazoneToRegionToZone.keySet()) { 950 Map<String, String> regionToZone = metazoneToRegionToZone.get(metazone); 951 String worldZone = regionToZone.get("001"); 952 String territory = Containment.getRegionFromZone(worldZone); 953 if (territory == null) { 954 territory = "ZZ"; 955 } 956 String cont = Containment.getContinent(territory); 957 int order = Containment.getOrder(territory); 958 String sub = Containment.getSubcontinent(territory); 959 String revision = PathHeader.getMetazonePageTerritory(metazone); 960 String continent = metazoneToContinent.get(metazone); 961 if (continent == null) { 962 continent = "UnknownT"; 963 } 964 // Russia, Antarctica => territory 965 // in Australasia, Asia, S. America => subcontinent 966 // in N. America => N. America (grouping of 3 subcontinents) 967 // in everything else => continent 968 969 if (territory.equals("RU")) { 970 assertEquals("Russia special case", "RU", revision); 971 } else if (territory.equals("US")) { 972 assertEquals("N. America special case", "003", revision); 973 } else if (territory.equals("BR")) { 974 assertEquals("S. America special case", "005", revision); 975 } 976 if (isVerbose()) { 977 String name = english.getName(CLDRFile.TERRITORY_NAME, cont); 978 String name2 = english.getName(CLDRFile.TERRITORY_NAME, sub); 979 String name3 = english.getName(CLDRFile.TERRITORY_NAME, territory); 980 String name4 = english.getName(CLDRFile.TERRITORY_NAME, revision); 981 982 logln( 983 metazone + "\t" + continent + "\t" + name + "\t" + name2 + "\t" + name3 984 + "\t" + order + "\t" + name4); 985 } 986 } 987 } 988 TestZ()989 public void TestZ() { 990 PathStarrer pathStarrer = new PathStarrer(); 991 pathStarrer.setSubstitutionPattern("%A"); 992 993 Set<PathHeader> sorted = new TreeSet<>(); 994 Map<String, String> missing = new TreeMap<>(); 995 Map<String, String> skipped = new TreeMap<>(); 996 Map<String, String> collide = new TreeMap<>(); 997 998 logln("Traversing Paths"); 999 for (String path : english) { 1000 PathHeader pathHeader = pathHeaderFactory.fromPath(path); 1001 String value = english.getStringValue(path); 1002 if (pathHeader == null) { 1003 final String starred = pathStarrer.set(path); 1004 missing.put(starred, value + "\t" + path); 1005 continue; 1006 } 1007 if (pathHeader.getSection().equalsIgnoreCase("skip")) { 1008 final String starred = pathStarrer.set(path); 1009 skipped.put(starred, value + "\t" + path); 1010 continue; 1011 } 1012 sorted.add(pathHeader); 1013 } 1014 logln("\nConverted:\t" + sorted.size()); 1015 String lastHeader = ""; 1016 String lastPage = ""; 1017 String lastSection = ""; 1018 List<String> threeLevel = new ArrayList<>(); 1019 Status status = new Status(); 1020 CoverageLevel2 coverageLevel2 = CoverageLevel2.getInstance("en"); 1021 1022 for (PathHeader pathHeader : sorted) { 1023 String original = pathHeader.getOriginalPath(); 1024 if (!original.equals(status.pathWhereFound)) { 1025 continue; 1026 } 1027 if (!lastSection.equals(pathHeader.getSection())) { 1028 logln(""); 1029 threeLevel.add(pathHeader.getSection()); 1030 threeLevel.add("\t" + pathHeader.getPage()); 1031 threeLevel.add("\t\t" + pathHeader.getHeader()); 1032 lastSection = pathHeader.getSection(); 1033 lastPage = pathHeader.getPage(); 1034 lastHeader = pathHeader.getHeader(); 1035 } else if (!lastPage.equals(pathHeader.getPage())) { 1036 logln(""); 1037 threeLevel.add("\t" + pathHeader.getPage()); 1038 threeLevel.add("\t\t" + pathHeader.getHeader()); 1039 lastPage = pathHeader.getPage(); 1040 lastHeader = pathHeader.getHeader(); 1041 } else if (!lastHeader.equals(pathHeader.getHeader())) { 1042 logln(""); 1043 threeLevel.add("\t\t" + pathHeader.getHeader()); 1044 lastHeader = pathHeader.getHeader(); 1045 } 1046 logln( 1047 pathHeader 1048 + "\t" 1049 + coverageLevel2.getLevel(original) 1050 + "\t" 1051 + english.getStringValue(pathHeader.getOriginalPath()) 1052 + "\t" 1053 + pathHeader.getOriginalPath()); 1054 } 1055 if (collide.size() != 0) { 1056 errln("\nCollide:\t" + collide.size()); 1057 for (Entry<String, String> item : collide.entrySet()) { 1058 errln("\t" + item); 1059 } 1060 } 1061 if (missing.size() != 0) { 1062 errln("\nMissing:\t" + missing.size()); 1063 for (Entry<String, String> item : missing.entrySet()) { 1064 errln("\t" + item.getKey() + "\tvalue:\t" + item.getValue()); 1065 } 1066 } 1067 if (skipped.size() != 0) { 1068 errln("\nSkipped:\t" + skipped.size()); 1069 for (Entry<String, String> item : skipped.entrySet()) { 1070 errln("\t" + item); 1071 } 1072 } 1073 Counter<PathHeader.Factory.CounterData> counterData = 1074 pathHeaderFactory.getInternalCounter(); 1075 logln("\nInternal Counter:\t" + counterData.size()); 1076 for (PathHeader.Factory.CounterData item : counterData.keySet()) { 1077 logln( 1078 "\t" 1079 + counterData.getCount(item) 1080 + "\t" 1081 + item.get2() // externals 1082 + "\t" 1083 + item.get3() 1084 + "\t" 1085 + item.get0() // internals 1086 + "\t" 1087 + item.get1()); 1088 } 1089 logln("\nMenus/Headers:\t" + threeLevel.size()); 1090 for (String item : threeLevel) { 1091 logln(item); 1092 } 1093 LinkedHashMap<String, Set<String>> sectionsToPages = 1094 org.unicode.cldr.util.PathHeader.Factory.getSectionsToPages(); 1095 logln("\nMenus:\t" + sectionsToPages.size()); 1096 for (Entry<String, Set<String>> item : sectionsToPages.entrySet()) { 1097 final String section = item.getKey(); 1098 for (String page : item.getValue()) { 1099 logln("\t" + section + "\t" + page); 1100 int count = 0; 1101 for (String path : pathHeaderFactory.filterCldr(section, page, english)) { 1102 count += 1; // just count them. 1103 } 1104 logln("\t" + count); 1105 } 1106 } 1107 } 1108 1109 public static final Set<String> GERMAN_UNIT_ORDER = 1110 ImmutableSet.of( 1111 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]", 1112 "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]", 1113 "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]", 1114 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]", 1115 "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]", 1116 "//ldml/units/unitLength[@type=\"narrrow\"]/unit[@type=\"volume-liter\"]", 1117 "//ldml/numbers/minimalPairs/caseMinimalPairs", 1118 "//ldml/numbers/minimalPairs/genderMinimalPairs"); 1119 TestOrder()1120 public void TestOrder() { 1121 String[] paths = { 1122 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"narrow\"]/dayPeriod[@type=\"noon\"]", 1123 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"narrow\"]/dayPeriod[@type=\"afternoon1\"]", 1124 }; 1125 PathHeader pathHeaderLast = null; 1126 for (String path : paths) { 1127 PathHeader pathHeader = pathHeaderFactory.fromPath(path); 1128 if (pathHeaderLast != null) { 1129 assertRelation("ordering", true, pathHeaderLast, LEQ, pathHeader); 1130 } 1131 pathHeaderLast = pathHeader; 1132 } 1133 CLDRFile german = factory.make("de", true); 1134 Multimap<PathHeader, String> pathHeaderToPaths = TreeMultimap.create(); 1135 for (String path : german.fullIterable()) { 1136 for (String prefix : GERMAN_UNIT_ORDER) { 1137 if (path.startsWith(prefix)) { 1138 PathHeader pathHeader = pathHeaderFactory.fromPath(path); 1139 pathHeaderToPaths.put(pathHeader, path); 1140 } 1141 } 1142 } 1143 String[] germanExpected = { 1144 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/gender", // Units 1145 // 1146 // Volume 1147 // liter 1148 // 1149 // long-gender 1150 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/displayName", // Units Volume liter long-displayName 1151 "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/displayName", // Units Volume liter short-displayName 1152 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/perUnitPattern", // Units Volume liter long-per 1153 "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/perUnitPattern", // Units Volume liter short-per 1154 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"]", // Units Volume liter long-one-nominative 1155 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"accusative\"]", // Units Volume liter long-one-accusative 1156 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"genitive\"]", // Units Volume liter long-one-genitive 1157 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"dative\"]", // Units Volume liter long-one-dative 1158 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"]", // Units Volume liter long-other-nominative 1159 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"][@case=\"accusative\"]", // Units Volume liter long-other-accusative 1160 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"][@case=\"genitive\"]", // Units Volume liter long-other-genitive 1161 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"][@case=\"dative\"]", // Units Volume liter long-other-dative 1162 "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"]", // Units Volume liter short-one-nominative 1163 "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"]", // Units Volume liter short-other-nominative 1164 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"]", // Units Compound Units power2 long-one-nominative-masculine 1165 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"]", // Units Compound Units power2 long-one-nominative-feminine 1166 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]", // Units Compound Units power2 long-one-nominative-dgender 1167 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"][@case=\"accusative\"]", // Units Compound Units power2 long-one-accusative-masculine 1168 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"accusative\"]", // Units Compound Units power2 long-one-accusative-feminine 1169 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"accusative\"]", // Units Compound Units power2 long-one-accusative-dgender 1170 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"][@case=\"genitive\"]", // Units Compound Units power2 long-one-genitive-masculine 1171 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"genitive\"]", // Units Compound Units power2 long-one-genitive-feminine 1172 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"genitive\"]", // Units Compound Units power2 long-one-genitive-dgender 1173 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"][@case=\"dative\"]", // Units Compound Units power2 long-one-dative-masculine 1174 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"dative\"]", // Units Compound Units power2 long-one-dative-feminine 1175 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"dative\"]", // Units Compound Units power2 long-one-dative-dgender 1176 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"]", // Units Compound Units power2 long-other-nominative-masculine 1177 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"]", // Units Compound Units power2 long-other-nominative-feminine 1178 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]", // Units Compound Units power2 long-other-nominative-dgender 1179 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"][@case=\"accusative\"]", // Units Compound Units power2 long-other-accusative-masculine 1180 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"accusative\"]", // Units Compound Units power2 long-other-accusative-feminine 1181 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"accusative\"]", // Units Compound Units power2 long-other-accusative-dgender 1182 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"][@case=\"genitive\"]", // Units Compound Units power2 long-other-genitive-masculine 1183 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"genitive\"]", // Units Compound Units power2 long-other-genitive-feminine 1184 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"genitive\"]", // Units Compound Units power2 long-other-genitive-dgender 1185 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"][@case=\"dative\"]", // Units Compound Units power2 long-other-dative-masculine 1186 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"dative\"]", // Units Compound Units power2 long-other-dative-feminine 1187 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"dative\"]", // Units Compound Units power2 long-other-dative-dgender 1188 "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]", // Units Compound Units power2 short-one-nominative-dgender 1189 "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]", // Units Compound Units power2 short-other-nominative-dgender 1190 "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]", // Units Compound Units power2 narrow-one-nominative-dgender 1191 "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]", // Units Compound Units power2 narrow-other-nominative-dgender 1192 "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"nominative\"]", // Miscellaneous 1193 // Minimal Pairs 1194 // Case 1195 // nominative 1196 "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"accusative\"]", // Miscellaneous 1197 // Minimal Pairs 1198 // Case 1199 // accusative 1200 "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"genitive\"]", // Miscellaneous 1201 // Minimal Pairs 1202 // Case genitive 1203 "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"dative\"]", // Miscellaneous 1204 // Minimal Pairs 1205 // Case dative 1206 "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"masculine\"]", // Miscellaneous Minimal Pairs Gender masculine 1207 "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"feminine\"]", // Miscellaneous 1208 // Minimal 1209 // Pairs 1210 // Gender 1211 // feminine 1212 "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"neuter\"]", // Miscellaneous 1213 // Minimal Pairs 1214 // Gender neuter 1215 1216 // we don't care about order here. 1217 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1", // Special Suppress compound-UnitPattern1-power2 long 1218 "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1", // Special Suppress compound-UnitPattern1-power2 narrow 1219 "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1", // Special Suppress compound-UnitPattern1-power2 short 1220 }; 1221 1222 int germanExpectedIndex = 0; 1223 int errorCount = 0; 1224 int item = 0; 1225 for (Entry<PathHeader, Collection<String>> entry : pathHeaderToPaths.asMap().entrySet()) { 1226 PathHeader ph = entry.getKey(); 1227 Collection<String> epaths = entry.getValue(); 1228 if (!assertEquals(entry.toString(), 1, epaths.size())) { 1229 ++errorCount; 1230 } 1231 if (!assertEquals( 1232 ++item + ") PathHeader order", 1233 germanExpected[germanExpectedIndex++], 1234 epaths.iterator().next())) { 1235 ++errorCount; 1236 } 1237 } 1238 if (errorCount != 0) { 1239 for (Entry<PathHeader, Collection<String>> entry : 1240 pathHeaderToPaths.asMap().entrySet()) { 1241 PathHeader ph = entry.getKey(); 1242 Collection<String> epaths = entry.getValue(); 1243 System.out.println( 1244 "\"" + epaths.iterator().next().replace("\"", "\\\"") + "\",\t// " + ph); 1245 } 1246 } 1247 } 1248 Test8414()1249 public void Test8414() { 1250 PathDescription pathDescription = 1251 new PathDescription( 1252 supplemental, english, null, null, PathDescription.ErrorHandling.CONTINUE); 1253 1254 String prefix = 1255 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\""; 1256 String suffix = "\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"morning1\"]"; 1257 1258 final String path0 = prefix + "format" + suffix; 1259 final String path1 = prefix + "stand-alone" + suffix; 1260 String v0 = english.getStringValue(path0); 1261 String v1 = english.getStringValue(path1); 1262 String p0 = pathDescription.getDescription(path0, v0, null); 1263 String p1 = pathDescription.getDescription(path1, v1, null); 1264 assertTrue("Check pd for format", p0.contains("in the morning")); 1265 assertTrue("Check pd for stand-alone", !p1.contains("in the morning")); 1266 } 1267 TestCompletenessNonLdmlDtd()1268 public void TestCompletenessNonLdmlDtd() { 1269 PathChecker pathChecker = new PathChecker(); 1270 Set<String> directories = new LinkedHashSet<>(); 1271 Multimap<String, String> pathValuePairs = LinkedListMultimap.create(); 1272 // get all the directories containing non-Ldml dtd files 1273 for (DtdType dtdType : DtdType.values()) { 1274 if (dtdType.getStatus() != DtdType.DtdStatus.active) { 1275 continue; 1276 } 1277 if (dtdType == DtdType.ldml 1278 || dtdType == DtdType.ldmlICU 1279 || dtdType == DtdType.keyboard3 1280 || dtdType == DtdType.keyboardTest3) { 1281 continue; 1282 } 1283 DtdData dtdData = DtdData.getInstance(dtdType); 1284 for (String dir : dtdType.directories) { 1285 if (DEBUG_DTD_TYPE != null && !DEBUG_DTD_TYPE.directories.contains(dir)) { 1286 continue; 1287 } 1288 File dir2 = new File(COMMON_DIR + dir); 1289 logln(dir2.getName()); 1290 for (String file : dir2.list()) { 1291 // don't need to restrict with getFilesToTest(Arrays.asList(dir2.list()), 1292 // "root", "en")) { 1293 if (!file.endsWith(".xml")) { 1294 continue; 1295 } 1296 if (DEBUG) warnln(" TestCompletenessNonLdmlDtd: " + dir + ", " + file); 1297 logln(" \t" + file); 1298 for (Pair<String, String> pathValue : 1299 XMLFileReader.loadPathValues( 1300 dir2 + "/" + file, 1301 new ArrayList<Pair<String, String>>(), 1302 true)) { 1303 final String path = pathValue.getFirst(); 1304 final String value = pathValue.getSecond(); 1305 // logln("\t\t" + path); 1306 if (path.startsWith("//supplementalData/unitPreferenceData/unitPreferences") 1307 && path.contains("skeleton")) { 1308 int debug = 0; 1309 } 1310 pathChecker.checkPathHeader(dtdData, path); 1311 } 1312 } 1313 } 1314 } 1315 if (!pathChecker.badHeaders.isEmpty()) { 1316 System.out.println("For help with DTD updates: " + CLDRURLS.CLDR_UPDATINGDTD_URL); 1317 } 1318 } 1319 1320 private class PathChecker { 1321 PathHeader.Factory phf = pathHeaderFactory; 1322 PathStarrer starrer = new PathStarrer().setSubstitutionPattern("%A"); 1323 1324 Set<String> badHeaders = new TreeSet<>(); 1325 Map<PathHeader, PathHeader> goodHeaders = new HashMap<>(); 1326 Set<PathHeader> seenBad = new HashSet<>(); 1327 1328 { phf.clearCache()1329 phf.clearCache(); 1330 } 1331 checkPathHeader(DtdData dtdData, String rawPath)1332 public void checkPathHeader(DtdData dtdData, String rawPath) { 1333 XPathParts pathPlain = XPathParts.getFrozenInstance(rawPath); 1334 if (dtdData.isMetadata(pathPlain)) { 1335 return; 1336 } 1337 if (dtdData.isDeprecated(pathPlain)) { 1338 return; 1339 } 1340 Multimap<String, String> extras = HashMultimap.create(); 1341 Set<String> fixedPaths = dtdData.getRegularizedPaths(pathPlain, extras); 1342 if (fixedPaths != null) { 1343 for (String fixedPath : fixedPaths) { 1344 checkSubpath(fixedPath); 1345 } 1346 } 1347 for (String path : extras.keySet()) { 1348 checkSubpath(path); 1349 } 1350 } 1351 checkSubpath(String path)1352 public void checkSubpath(String path) { 1353 String message = ": Can't compute path header"; 1354 if (path.contentEquals( 1355 "//supplementalData/grammaticalData/grammaticalFeatures[@targets=\"nominal\"][@locales=\"it\"]/grammaticalGender/_values")) { 1356 int debug = 0; 1357 } 1358 PathHeader ph = null; 1359 try { 1360 ph = phf.fromPath(path); 1361 if (seenBad.contains(ph)) { 1362 return; 1363 } 1364 if (ph.getPageId() == PageId.Deprecated) { 1365 return; // don't care 1366 } 1367 if (ph.getPageId() != PageId.Unknown) { 1368 PathHeader old = goodHeaders.put(ph, ph); 1369 if (old != null && !path.equals(old.getOriginalPath())) { 1370 errln( 1371 "Duplicate path header for: " 1372 + ph 1373 + "\n\t\t " 1374 + path 1375 + "\n\t\t≠" 1376 + old.getOriginalPath()); 1377 seenBad.add(ph); 1378 } 1379 return; 1380 } 1381 // for debugging 1382 phf.clearCache(); 1383 List<String> failures = new ArrayList<>(); 1384 ph = phf.fromPath(path, failures); 1385 message = ": Unknown path header" + failures; 1386 } catch (Exception e) { 1387 message = ": Exception in path header: " + e.getMessage(); 1388 } 1389 String star = starrer.set(path); 1390 if (badHeaders.add(star)) { 1391 errln(star + message + ", " + ph); 1392 System.out.println( 1393 "\tNo match in PathHeader.txt for " 1394 + path 1395 + "\n\tYou get only one message for all paths matching " 1396 + star 1397 + "\n\tFor example, check to see if the field in PathHeader.txt is in PathHeader.PageId." 1398 + "\n\tIf not, either correct PathHeader.txt or add it to PageId" 1399 + "\n\tIf you have a value attribute, you will need extra _ characters. The value attribute will show at the end with prefixed _, eg [...]/_skeleton." 1400 + "If there can be a value for the path then that element will add _. "); 1401 } 1402 } 1403 } 1404 TestSupplementalItems()1405 public void TestSupplementalItems() { 1406 // <weekOfPreference ordering="weekOfYear weekOfMonth" locales="am az bs cs cy da el et 1407 // hi ky lt mk sk ta th"/> 1408 // logln(pathHeaderFactory.getRegexInfo()); 1409 CLDRFile supplementalFile = 1410 CLDRConfig.getInstance().getSupplementalFactory().make("supplementalData", false); 1411 List<String> failures = new ArrayList<>(); 1412 Multimap<String, String> pathValuePairs = LinkedListMultimap.create(); 1413 for (String test : With.in(supplementalFile.iterator("//supplementalData/weekData"))) { 1414 failures.clear(); 1415 XPathParts parts = XPathParts.getFrozenInstance(supplementalFile.getFullXPath(test)); 1416 supplementalFile.getDtdData().getRegularizedPaths(parts, pathValuePairs); 1417 for (Entry<String, Collection<String>> entry : pathValuePairs.asMap().entrySet()) { 1418 final String normalizedPath = entry.getKey(); 1419 final Collection<String> normalizedValue = entry.getValue(); 1420 PathHeader ph = pathHeaderFactory.fromPath(normalizedPath, failures); 1421 if (ph == null || ph.getSectionId() == SectionId.Special) { 1422 errln( 1423 "Failure with " 1424 + test 1425 + " => " 1426 + normalizedPath 1427 + " = " 1428 + normalizedValue); 1429 } else { 1430 logln(ph + "\t" + test + " = " + normalizedValue); 1431 } 1432 } 1433 } 1434 } 1435 test10232()1436 public void test10232() { 1437 String[][] tests = { 1438 {"MMM", "Formats - Flexible - Date Formats"}, 1439 {"dMM", "Formats - Flexible - Date Formats"}, 1440 {"h", "Formats - Flexible - 12 Hour Time Formats"}, 1441 {"hm", "Formats - Flexible - 12 Hour Time Formats"}, 1442 {"Ehm", "Formats - Flexible - 12 Hour Time Formats"}, 1443 {"H", "Formats - Flexible - 24 Hour Time Formats"}, 1444 {"Hm", "Formats - Flexible - 24 Hour Time Formats"}, 1445 {"EHm", "Formats - Flexible - 24 Hour Time Formats"}, 1446 }; 1447 for (String[] test : tests) { 1448 String path = 1449 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"" 1450 + test[0] 1451 + "\"]"; 1452 PathHeader pathHeader = pathHeaderFactory.fromPath(path); 1453 assertEquals( 1454 "flexible formats", 1455 test[1] + "|" + test[0], 1456 pathHeader.getHeader() + "|" + pathHeader.getCode()); 1457 } 1458 } 1459 1460 // Moved from TestAnnotations and generalized testPathHeaderSize()1461 public void testPathHeaderSize() { 1462 String locale = "ar"; // choose one with lots of plurals 1463 int maxSize = 1250; 1464 boolean showTable = false; // only printed if test fails or verbose 1465 1466 Factory factory = CLDRConfig.getInstance().getCommonAndSeedAndMainAndAnnotationsFactory(); 1467 CLDRFile english = factory.make(locale, true); 1468 1469 PathHeader.Factory phf = PathHeader.getFactory(CLDRConfig.getInstance().getEnglish()); 1470 Counter<PageId> counterPageId = new Counter<>(); 1471 Counter<PageId> counterPageIdAll = new Counter<>(); 1472 for (String path : english) { 1473 Level level = 1474 CLDRConfig.getInstance() 1475 .getSupplementalDataInfo() 1476 .getCoverageLevel(path, locale); 1477 PathHeader ph = phf.fromPath(path); 1478 if (level.compareTo(Level.MODERN) <= 0) { 1479 counterPageId.add(ph.getPageId(), 1); 1480 } 1481 counterPageIdAll.add(ph.getPageId(), 1); 1482 } 1483 Set<R2<Long, PageId>> entrySetSortedByCount = 1484 counterPageId.getEntrySetSortedByCount(false, null); 1485 for (R2<Long, PageId> sizeAndPageId : entrySetSortedByCount) { 1486 long size = sizeAndPageId.get0(); 1487 PageId pageId = sizeAndPageId.get1(); 1488 if (!assertTrue( 1489 pageId.getSectionId() 1490 + "/" 1491 + pageId 1492 + " size (" 1493 + size 1494 + ") < " 1495 + maxSize 1496 + "?", 1497 size < maxSize)) { 1498 showTable = true; 1499 } 1500 // System.out.println(pageId + "\t" + size); 1501 } 1502 if (showTable || isVerbose()) { 1503 for (R2<Long, PageId> sizeAndPageId : entrySetSortedByCount) { 1504 PageId pageId = sizeAndPageId.get1(); 1505 System.out.println( 1506 pageId.getSectionId() 1507 + "\t" 1508 + pageId 1509 + "\t" 1510 + sizeAndPageId.get0() 1511 + "\t" 1512 + counterPageIdAll.get(pageId)); 1513 } 1514 } 1515 } 1516 TestCLDR_11454()1517 public void TestCLDR_11454() { 1518 PathHeader.Factory phf = PathHeader.getFactory(); 1519 PathHeader century = 1520 phf.fromPath( 1521 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"duration-century\"]/displayName"); 1522 PathHeader decade = 1523 phf.fromPath( 1524 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"duration-decade\"]/displayName"); 1525 assertEquals("Section", century.getSectionId(), decade.getSectionId()); 1526 assertEquals("Page", century.getPageId(), decade.getPageId()); 1527 } 1528 TestEmojiOrder()1529 public void TestEmojiOrder() { 1530 PathHeader.Factory phf = PathHeader.getFactory(); 1531 String[] desiredOrder = { 1532 "⚕", "⚕", "⚕", 1533 "⚖", "⚖", "⚖" 1534 }; 1535 List<PathHeader> pathHeaders = new ArrayList<>(); 1536 for (String emoji : desiredOrder) { 1537 String base = "//ldml/annotations/annotation[@cp=\"" + emoji + "\"]"; 1538 pathHeaders.add(phf.fromPath(base + "[@type=\"tts\"]")); 1539 pathHeaders.add(phf.fromPath(base)); 1540 logln( 1541 emoji 1542 + ": getEmojiMinorOrder=" 1543 + Emoji.getEmojiMinorOrder(Emoji.getMinorCategory(emoji)) 1544 + ", getEmojiToOrder=" 1545 + Emoji.getEmojiToOrder(emoji)); 1546 } 1547 PathHeader lastItem = null; 1548 for (PathHeader item : pathHeaders) { 1549 if (lastItem != null) { 1550 assertEquals("Section", lastItem.getSectionId(), item.getSectionId()); 1551 assertEquals("Page", lastItem.getPageId(), item.getPageId()); 1552 assertEquals("Header", lastItem.getHeader(), item.getHeader()); 1553 if (!assertTrue(lastItem + " < " + item, lastItem.compareTo(item) < 0)) { 1554 lastItem.compareTo(item); // for debugging 1555 } 1556 } 1557 lastItem = item; 1558 } 1559 } 1560 TestQuotes()1561 public void TestQuotes() { 1562 // quotes should never appear in result 1563 PathHeader.Factory phf = PathHeader.getFactory(); 1564 String[] tests = { 1565 "//supplementalData/plurals[@type=\"ordinal\"]/pluralRules[@locales=\"ig\"]/pluralRule[@count=\"other\"]", 1566 "//supplementalData/transforms/transform[@source=\"und-Khmr\"][@target=\"und-Latn\"]" 1567 }; 1568 for (String test : tests) { 1569 PathHeader trial = phf.fromPath(test); 1570 assertEquals("No quotes in pathheader", false, trial.toString().contains("\"")); 1571 } 1572 } 1573 /** 1574 * Make sure that the PathHeader sort order is consistent with the grammatical feature orders 1575 * "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/displayName" 1576 * //ldml/units/unitLength[@type=\long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"genitive\"]", 1577 * //ldml/units/unitLength[@type=\long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"accusative\"]", 1578 */ TestUnitOrder()1579 public void TestUnitOrder() { 1580 PathHeader.Factory phf = PathHeader.getFactory(); 1581 List<PathHeader> expectedOrder = new ArrayList<>(); 1582 List<Width> widths = Arrays.asList(Width.LONG, Width.SHORT, Width.NARROW); 1583 List<CaseValues> cases = Arrays.asList(GrammarInfo.CaseValues.values()).subList(0, 3); 1584 List<GenderValues> genders = Arrays.asList(GrammarInfo.GenderValues.values()).subList(0, 3); 1585 1586 for (Width width : widths) { 1587 String path = 1588 "//ldml/units/unitLength[@type=\"" 1589 + width 1590 + "\"]/unit[@type=\"length-meter\"]/displayName"; 1591 expectedOrder.add(phf.fromPath(path)); 1592 } 1593 1594 for (Width width : widths) { 1595 for (Count count : Count.values()) { 1596 for (GrammarInfo.CaseValues gCase : cases) { 1597 if (width != Width.LONG && gCase != CaseValues.nominative) { 1598 break; 1599 } 1600 String path = 1601 "//ldml/units/unitLength[@type=\"" 1602 + width 1603 + "\"]/unit[@type=\"length-meter\"]/unitPattern[@count=\"" 1604 + count 1605 + (gCase == CaseValues.nominative ? "" : "\"][@case=\"" + gCase) 1606 + "\"]"; 1607 expectedOrder.add(phf.fromPath(path)); 1608 } 1609 } 1610 } 1611 for (Width width : widths) { 1612 for (Count count : Count.values()) { 1613 for (GrammarInfo.CaseValues gCase : cases) { 1614 if (width != Width.LONG && gCase != CaseValues.nominative) { 1615 break; 1616 } 1617 for (GrammarInfo.GenderValues gGender : genders) { 1618 if (width != Width.LONG && gGender != GenderValues.neuter) { 1619 break; 1620 } 1621 String path = 1622 "//ldml/units/unitLength[@type=\"" 1623 + width 1624 + "\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"" 1625 + count 1626 + (gGender == GenderValues.neuter 1627 ? "" 1628 : "\"][@gender=\"" + gGender) 1629 + (gCase == CaseValues.nominative 1630 ? "" 1631 : "\"][@case=\"" + gCase) 1632 + "\"]"; 1633 expectedOrder.add(phf.fromPath(path)); 1634 } 1635 } 1636 } 1637 } 1638 for (Count count : Count.values()) { 1639 String path = 1640 "//ldml/numbers/minimalPairs/ordinalMinimalPairs[@ordinal=\"" + count + "\"]"; 1641 expectedOrder.add(phf.fromPath(path)); 1642 } 1643 for (Count count : Count.values()) { 1644 String path = 1645 "//ldml/numbers/minimalPairs/pluralMinimalPairs[@count=\"" + count + "\"]"; 1646 expectedOrder.add(phf.fromPath(path)); 1647 } 1648 for (GrammarInfo.CaseValues gCase : cases) { 1649 String path = "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"" + gCase + "\"]"; 1650 expectedOrder.add(phf.fromPath(path)); 1651 } 1652 for (GrammarInfo.GenderValues gGender : genders) { 1653 String path = 1654 "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"" + gGender + "\"]"; 1655 expectedOrder.add(phf.fromPath(path)); 1656 } 1657 1658 PathHeader last = null; 1659 int item = 0; 1660 int errorCount = 0; 1661 for (PathHeader pathHeader : expectedOrder) { 1662 if (last != null) { 1663 if (!assertTrue( 1664 ++item + ")\t" + last + "\t<\t" + pathHeader, 1665 last.compareTo(pathHeader) < 0)) { 1666 errorCount++; 1667 last.compareTo(pathHeader); 1668 } 1669 } 1670 last = pathHeader; 1671 } 1672 if (errorCount != 0 || isVerbose()) { 1673 for (PathHeader pathHeader : expectedOrder) { 1674 System.out.println( 1675 "\"" 1676 + pathHeader.getOriginalPath().replace("\"", "\\\"") 1677 + "\",\t// " 1678 + pathHeader); 1679 } 1680 } 1681 } 1682 testPageSize()1683 public void testPageSize() { 1684 final long minError = 946; // above this, emit error 1685 final long minLog = 700; // otherwise above this, emit warning 1686 Factory factory = CLDRConfig.getInstance().getCommonAndSeedAndMainAndAnnotationsFactory(); 1687 List<String> locales = 1688 StandardCodes.make() 1689 .getLocaleCoverageLocales(Organization.cldr, ImmutableSet.of(Level.MODERN)) 1690 .stream() 1691 .filter(x -> CLDRLocale.getInstance(x).getCountry().isEmpty()) 1692 .collect(Collectors.toUnmodifiableList()); 1693 List<Counter<PageId>> counters = new ArrayList<>(); 1694 final String thresholdExplanation = "log/error thresholds are " + minLog + "/" + minError; 1695 for (String locale : locales) { 1696 CLDRFile cldrFile = factory.make(locale, false); 1697 PathHeader.Factory phf = PathHeader.getFactory(); 1698 Counter<PageId> c = new Counter<>(); 1699 counters.add(c); 1700 for (String path : cldrFile) { 1701 PathHeader ph = phf.fromPath(path); 1702 c.add(ph.getPageId(), 1); 1703 } 1704 for (PageId entry : c.getKeysetSortedByKey()) { 1705 long count = c.getCount(entry); 1706 if (count > minLog) { 1707 final String message = 1708 String.format( 1709 "%s\t%s\t%s\thas too many entries:\t%d\t(%s)", 1710 locale, 1711 entry.getSectionId().toString(), 1712 entry, 1713 count, 1714 thresholdExplanation); 1715 if (count > minError) { 1716 errln(message); 1717 } else { 1718 warnln(message); 1719 } 1720 } 1721 } 1722 } 1723 if (isVerbose()) { 1724 System.out.println(); 1725 Set<PageId> sorted = new TreeSet<>(); 1726 for (Counter<PageId> counter : counters) { 1727 sorted.addAll(counter.keySet()); 1728 } 1729 int i = 0; 1730 System.out.print("Order" + "\t" + "Section" + "\t" + "Page"); 1731 for (String c : locales) { 1732 System.out.print("\t" + c); 1733 } 1734 System.out.println(); 1735 1736 for (PageId entry : sorted) { 1737 System.out.print(++i + "\t" + entry.getSectionId() + "\t" + entry); 1738 for (Counter<PageId> c : counters) { 1739 System.out.print("\t" + c.get(entry)); 1740 } 1741 System.out.println(); 1742 } 1743 } 1744 } 1745 } 1746