xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestPathHeader.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.unittest;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.collect.HashMultimap;
5 import com.google.common.collect.ImmutableSet;
6 import com.google.common.collect.LinkedListMultimap;
7 import com.google.common.collect.Multimap;
8 import com.google.common.collect.TreeMultimap;
9 import com.ibm.icu.impl.Relation;
10 import com.ibm.icu.impl.Row;
11 import com.ibm.icu.impl.Row.R2;
12 import java.io.File;
13 import java.util.ArrayList;
14 import java.util.Arrays;
15 import java.util.Collection;
16 import java.util.EnumMap;
17 import java.util.EnumSet;
18 import java.util.HashMap;
19 import java.util.HashSet;
20 import java.util.LinkedHashMap;
21 import java.util.LinkedHashSet;
22 import java.util.List;
23 import java.util.Map;
24 import java.util.Map.Entry;
25 import java.util.Set;
26 import java.util.TreeMap;
27 import java.util.TreeSet;
28 import java.util.regex.Matcher;
29 import java.util.stream.Collectors;
30 import org.unicode.cldr.test.CoverageLevel2;
31 import org.unicode.cldr.test.ExampleGenerator;
32 import org.unicode.cldr.util.CLDRConfig;
33 import org.unicode.cldr.util.CLDRFile;
34 import org.unicode.cldr.util.CLDRFile.Status;
35 import org.unicode.cldr.util.CLDRLocale;
36 import org.unicode.cldr.util.CLDRPaths;
37 import org.unicode.cldr.util.CLDRURLS;
38 import org.unicode.cldr.util.CldrUtility;
39 import org.unicode.cldr.util.Containment;
40 import org.unicode.cldr.util.Counter;
41 import org.unicode.cldr.util.DtdData;
42 import org.unicode.cldr.util.DtdType;
43 import org.unicode.cldr.util.Emoji;
44 import org.unicode.cldr.util.Factory;
45 import org.unicode.cldr.util.GrammarInfo;
46 import org.unicode.cldr.util.GrammarInfo.CaseValues;
47 import org.unicode.cldr.util.GrammarInfo.GenderValues;
48 import org.unicode.cldr.util.Iso3166Data;
49 import org.unicode.cldr.util.LanguageTagParser;
50 import org.unicode.cldr.util.Level;
51 import org.unicode.cldr.util.Organization;
52 import org.unicode.cldr.util.Pair;
53 import org.unicode.cldr.util.PathDescription;
54 import org.unicode.cldr.util.PathHeader;
55 import org.unicode.cldr.util.PathHeader.PageId;
56 import org.unicode.cldr.util.PathHeader.SectionId;
57 import org.unicode.cldr.util.PathHeader.SurveyToolStatus;
58 import org.unicode.cldr.util.PathHeader.Width;
59 import org.unicode.cldr.util.PathStarrer;
60 import org.unicode.cldr.util.PatternCache;
61 import org.unicode.cldr.util.PatternPlaceholders;
62 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderInfo;
63 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderStatus;
64 import org.unicode.cldr.util.StandardCodes;
65 import org.unicode.cldr.util.SupplementalDataInfo;
66 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
67 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
68 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
69 import org.unicode.cldr.util.With;
70 import org.unicode.cldr.util.XMLFileReader;
71 import org.unicode.cldr.util.XPathParts;
72 
73 public class TestPathHeader extends TestFmwkPlus {
74     private static final DtdType DEBUG_DTD_TYPE = null; // DtdType.supplementalData;
75     private static final String COMMON_DIR = CLDRPaths.BASE_DIRECTORY + "common/";
76     private static final boolean DEBUG = false;
77 
main(String[] args)78     public static void main(String[] args) {
79         new TestPathHeader().run(args);
80     }
81 
82     static final CLDRConfig info = CLDRConfig.getInstance();
83     static final Factory factory = info.getCommonAndSeedAndMainAndAnnotationsFactory();
84     static final CLDRFile english = factory.make("en", true);
85     static final SupplementalDataInfo supplemental = info.getSupplementalDataInfo();
86     static PathHeader.Factory pathHeaderFactory = PathHeader.getFactory(english);
87     private EnumSet<PageId> badZonePages = EnumSet.of(PageId.UnknownT);
88 
tempTestAnnotation()89     public void tempTestAnnotation() {
90         // NEW:     <annotation cp="��">face | grin</annotation>
91         //          <annotation cp="��" type="tts">grinning face</annotation>
92 
93         final String path1 = "//ldml/annotations/annotation[@cp=\"��\"]";
94         PathHeader ph1 = pathHeaderFactory.fromPath(path1);
95         logln(ph1.toString() + "\t" + path1);
96         final String path2 = "//ldml/annotations/annotation[@cp=\"��\"][@type=\"tts\"]";
97         PathHeader ph2 = pathHeaderFactory.fromPath(path2);
98         logln(ph2.toString() + "\t" + path2);
99         final String path3 = "//ldml/annotations/annotation[@cp=\"��\"]";
100         PathHeader ph3 = pathHeaderFactory.fromPath(path2);
101         logln(ph3.toString() + "\t" + path3);
102 
103         assertNotEquals("pathheader", ph1, ph2);
104         assertNotEquals("pathheader", ph1.toString(), ph2.toString());
105         assertRelation("pathheader", true, ph1, TestFmwkPlus.LEQ, ph3);
106         assertRelation("pathheader", true, ph3, TestFmwkPlus.LEQ, ph2);
107     }
108 
109     static final String[] MIN_LOCALES = {
110         "root", "en", "de", "ru", "ko"
111     }; // choose locales with range of case/gender structures
112 
tempTestCompletenessLdmlDtd()113     public void tempTestCompletenessLdmlDtd() {
114         // List<String> failures = null;
115         pathHeaderFactory.clearCache();
116         PathChecker pathChecker = new PathChecker();
117         for (String directory : DtdType.ldml.directories) {
118             Factory factory2 = CLDRConfig.getInstance().getMainAndAnnotationsFactory();
119             Set<String> source = factory2.getAvailable();
120             for (String file : getFilesToTest(source, MIN_LOCALES)) {
121                 if (DEBUG) warnln(" TestCompletenessLdmlDtd: " + directory + ", " + file);
122                 DtdData dtdData = null;
123                 CLDRFile cldrFile = factory2.make(file, true);
124                 for (String path : cldrFile.fullIterable()) {
125                     pathChecker.checkPathHeader(cldrFile.getDtdData(), path);
126                 }
127             }
128         }
129         Set<String> missing = pathHeaderFactory.getUnmatchedRegexes();
130         if (missing.size() != 0) {
131             for (String e : missing) {
132                 errln("Path Regex never matched:\t" + e);
133             }
134         }
135         if (!pathChecker.badHeaders.isEmpty()) {
136             System.out.println("For help with DTD updates: " + CLDRURLS.CLDR_UPDATINGDTD_URL);
137         }
138     }
139 
getFilesToTest(Collection<String> source, String... doFirst)140     private Collection<String> getFilesToTest(Collection<String> source, String... doFirst) {
141         LinkedHashSet<String> files = new LinkedHashSet<>(Arrays.asList(doFirst));
142         files.retainAll(source); // put first
143         files.addAll(new HashSet<>(source)); // now add others semi-randomly
144         int max = Math.min(30, files.size());
145         if (getInclusion() == 10 || files.size() <= max) {
146             return files;
147         }
148         ArrayList<String> shortFiles = new ArrayList<>(files);
149         if (getInclusion() > 5) {
150             max += (files.size() - 30) * (getInclusion() - 5) / 10; // use proportional amount
151         }
152         return shortFiles.subList(0, max);
153     }
154 
TestCompleteness()155     public void TestCompleteness() {
156         PathHeader.Factory pathHeaderFactory2 = PathHeader.getFactory(english);
157         // List<String> failures = null;
158         pathHeaderFactory2.clearCache();
159         Multimap<PathHeader.PageId, PathHeader.SectionId> pageUniqueness = TreeMultimap.create();
160         Multimap<String, Pair<PathHeader.SectionId, PathHeader.PageId>> headerUniqueness =
161                 TreeMultimap.create();
162         Set<String> toTest;
163         switch (getInclusion()) {
164             default:
165                 toTest = StandardCodes.make().getLocaleCoverageLocales(Organization.cldr);
166                 break;
167             case 10:
168                 toTest = factory.getAvailable();
169                 break;
170         }
171         toTest = ImmutableSet.<String>builder().add("en").addAll(toTest).build();
172         Set<String> seenPaths = new HashSet<>();
173         Set<String> localSeenPaths = new TreeSet<>();
174         for (String locale : toTest) {
175             localSeenPaths.clear();
176             for (String p : factory.make(locale, true).fullIterable()) {
177                 if (p.startsWith("//ldml/identity/")) {
178                     continue;
179                 }
180                 if (seenPaths.contains(p)) {
181                     continue;
182                 }
183                 seenPaths.add(p);
184                 localSeenPaths.add(p);
185                 // if (p.contains("symbol[@alt") && failures == null) {
186                 // PathHeader result = pathHeaderFactory2.fromPath(p, failures = new
187                 // ArrayList<String>());
188                 // logln("Matching " + p + ": " + result + "\t" +
189                 // result.getSurveyToolStatus());
190                 // for (String failure : failures) {
191                 // logln("\t" + failure);
192                 // }
193                 // }
194                 PathHeader ph;
195                 try {
196                     ph = pathHeaderFactory2.fromPath(p);
197                 } catch (Exception e1) {
198                     try {
199                         ph = pathHeaderFactory2.fromPath(p);
200                     } catch (Exception e2) {
201                         throw new IllegalArgumentException(locale + ":\t" + p, e2);
202                     }
203                 }
204                 if (ph == null) {
205                     errln("Failed to create path from: " + p);
206                     continue;
207                 }
208                 final SectionId sectionId = ph.getSectionId();
209                 if (sectionId != SectionId.Special) {
210                     pageUniqueness.put(ph.getPageId(), sectionId);
211                     headerUniqueness.put(ph.getHeader(), new Pair<>(sectionId, ph.getPageId()));
212                 }
213             }
214             if (!localSeenPaths.isEmpty()) {
215                 logln(locale + ": checked " + localSeenPaths.size() + " new paths");
216             }
217         }
218         Set<String> missing = pathHeaderFactory2.getUnmatchedRegexes();
219         if (missing.size() != 0) {
220             for (String e : missing) {
221                 if (e.contains("//ldml/")) {
222                     if (e.contains("//ldml/rbnf/")
223                             || e.contains("//ldml/segmentations/")
224                             || e.contains("//ldml/collations/")) {
225                         continue;
226                     }
227                     logln("Path Regex never matched:\t" + e);
228                 }
229             }
230         }
231 
232         for (Entry<PageId, Collection<SectionId>> e : pageUniqueness.asMap().entrySet()) {
233             Collection<SectionId> values = e.getValue();
234             if (values.size() != 1) {
235                 warnln("Duplicate page in section: " + CldrUtility.toString(e));
236             }
237         }
238 
239         for (Entry<String, Collection<Pair<SectionId, PageId>>> e :
240                 headerUniqueness.asMap().entrySet()) {
241             Collection<Pair<SectionId, PageId>> values = e.getValue();
242             if (values.size() != 1) {
243                 warnln("Duplicate header in (section,page): " + CldrUtility.toString(e));
244             }
245         }
246     }
247 
Test6170()248     public void Test6170() {
249         String p1 =
250                 "//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"speed-kilometer-per-hour\"]/unitPattern[@count=\"other\"]";
251         String p2 =
252                 "//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"area-square-meter\"]/unitPattern[@count=\"other\"]";
253         PathHeader ph1 = pathHeaderFactory.fromPath(p1);
254         PathHeader ph2 = pathHeaderFactory.fromPath(p2);
255         int comp12 = ph1.compareTo(ph2);
256         int comp21 = ph2.compareTo(ph1);
257         assertEquals("comp ph", comp12, -comp21);
258     }
259 
TestVariant()260     public void TestVariant() {
261         PathHeader p1 =
262                 pathHeaderFactory.fromPath(
263                         "//ldml/localeDisplayNames/languages/language[@type=\"ug\"][@alt=\"variant\"]");
264         PathHeader p2 =
265                 pathHeaderFactory.fromPath(
266                         "//ldml/localeDisplayNames/languages/language[@type=\"ug\"]");
267         assertNotEquals("variants", p1, p2);
268         assertNotEquals("variants", p1.toString(), p2.toString());
269         // Code Lists Languages Arabic Script ug-variant
270     }
271 
Test4587()272     public void Test4587() {
273         String test =
274                 "//ldml/dates/timeZoneNames/metazone[@type=\"Pacific/Wallis\"]/short/standard";
275         PathHeader ph = pathHeaderFactory.fromPath(test);
276         if (ph == null) {
277             errln("Failure with " + test);
278         } else {
279             logln(ph + "\t" + test);
280         }
281     }
282 
TestMiscPatterns()283     public void TestMiscPatterns() {
284         String test =
285                 "//ldml/numbers/miscPatterns[@numberSystem=\"arab\"]/pattern[@type=\"atLeast\"]";
286         PathHeader ph = pathHeaderFactory.fromPath(test);
287         assertNotNull("MiscPatterns path not found", ph);
288         if (false) System.out.println(english.getStringValue(test));
289     }
290 
TestPluralOrder()291     public void TestPluralOrder() {
292         Set<PathHeader> sorted = new TreeSet<>();
293         for (String locale : new String[] {"ru", "ar", "ja"}) {
294             sorted.clear();
295             CLDRFile cldrFile = info.getCLDRFile(locale, true);
296             CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(locale);
297             for (String path : cldrFile.fullIterable()) {
298                 if (!path.contains("@count")) {
299                     continue;
300                 }
301                 Level level = coverageLevel.getLevel(path);
302                 if (Level.MODERN.compareTo(level) < 0) {
303                     continue;
304                 }
305                 PathHeader p = pathHeaderFactory.fromPath(path);
306                 sorted.add(p);
307             }
308             for (PathHeader p : sorted) {
309                 logln(locale + "\t" + p + "\t" + p.getOriginalPath());
310             }
311         }
312     }
313 
314     static final String APPEND_TIMEZONE =
315             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/appendItems/appendItem[@request=\"Timezone\"]";
316     static final String APPEND_TIMEZONE_END =
317             "/dateTimeFormats/appendItems/appendItem[@request=\"Timezone\"]";
318     static final String BEFORE_PH =
319             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"ms\"]";
320     static final String AFTER_PH =
321             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"d\"]/greatestDifference[@id=\"d\"]";
322 
TestAppendTimezone()323     public void TestAppendTimezone() {
324         CLDRFile cldrFile = info.getEnglish();
325         CoverageLevel2 coverageLevel = CoverageLevel2.getInstance("en");
326         assertEquals(
327                 "appendItem:Timezone", Level.MODERATE, coverageLevel.getLevel(APPEND_TIMEZONE));
328 
329         PathHeader ph = pathHeaderFactory.fromPath(APPEND_TIMEZONE);
330         assertEquals("appendItem:Timezone pathheader", "Timezone", ph.getCode());
331         // check that they are in the right place (they weren't before!)
332         PathHeader phBefore = pathHeaderFactory.fromPath(BEFORE_PH);
333         PathHeader phAfter = pathHeaderFactory.fromPath(AFTER_PH);
334         assertTrue(phBefore, LEQ, ph);
335         assertTrue(ph, LEQ, phAfter);
336 
337         PathDescription pathDescription =
338                 new PathDescription(
339                         supplemental, english, null, null, PathDescription.ErrorHandling.CONTINUE);
340         String description = pathDescription.getDescription(APPEND_TIMEZONE, "tempvalue", null);
341         assertTrue("appendItem:Timezone pathDescription", description.contains("“Timezone”"));
342 
343         PatternPlaceholders patternPlaceholders = PatternPlaceholders.getInstance();
344         PlaceholderStatus status = patternPlaceholders.getStatus(APPEND_TIMEZONE);
345         assertEquals("appendItem:Timezone placeholders", PlaceholderStatus.REQUIRED, status);
346 
347         Map<String, PlaceholderInfo> placeholderInfo = patternPlaceholders.get(APPEND_TIMEZONE);
348         PlaceholderInfo placeholderInfo2 = placeholderInfo.get("{1}");
349         if (assertNotNull("appendItem:Timezone placeholders", placeholderInfo2)) {
350             assertEquals(
351                     "appendItem:Timezone placeholders",
352                     "APPEND_FIELD_FORMAT",
353                     placeholderInfo2.name);
354             assertEquals(
355                     "appendItem:Timezone placeholders", "Pacific Time", placeholderInfo2.example);
356         }
357         ExampleGenerator eg = new ExampleGenerator(cldrFile, cldrFile);
358         String example =
359                 eg.getExampleHtml(APPEND_TIMEZONE, cldrFile.getStringValue(APPEND_TIMEZONE));
360         String result = ExampleGenerator.simplify(example, false);
361         assertEquals("", "〖❬6:25:59 PM❭ ❬GMT❭〗", result);
362     }
363 
TestOptional()364     public void TestOptional() {
365         if (true) return;
366         Map<PathHeader, String> sorted = new TreeMap<>();
367         for (String locale : new String[] {"af"}) {
368             sorted.clear();
369             CLDRFile cldrFile = info.getCLDRFile(locale, true);
370             CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(locale);
371             for (String path : cldrFile.fullIterable()) {
372                 Level level = coverageLevel.getLevel(path);
373                 if (supplemental.isDeprecated(DtdType.ldml, path)) {
374                     continue;
375                 }
376 
377                 if (Level.COMPREHENSIVE.compareTo(level) != 0) {
378                     continue;
379                 }
380 
381                 PathHeader ph = pathHeaderFactory.fromPath(path);
382                 if (ph == null || ph.shouldHide()) {
383                     continue;
384                 }
385                 final SurveyToolStatus status = ph.getSurveyToolStatus();
386                 sorted.put(ph, locale + "\t" + status + "\t" + ph + "\t" + ph.getOriginalPath());
387             }
388             Set<String> codes = new LinkedHashSet<>();
389             PathHeader old = null;
390             String line = null;
391             for (Entry<PathHeader, String> s : sorted.entrySet()) {
392                 PathHeader p = s.getKey();
393                 String v = s.getValue();
394                 if (old == null) {
395                     line = v;
396                     codes.add(p.getCode());
397                 } else if (p.getSectionId() == old.getSectionId()
398                         && p.getPageId() == old.getPageId()
399                         && p.getHeader().equals(old.getHeader())) {
400                     codes.add(p.getCode());
401                 } else {
402                     logln(line + "\t" + codes.toString());
403                     codes.clear();
404                     line = v;
405                     codes.add(p.getCode());
406                 }
407                 old = p;
408             }
409             logln(line + "\t" + codes.toString());
410         }
411     }
412 
TestPluralCanonicals()413     public void TestPluralCanonicals() {
414         Relation<String, String> data =
415                 Relation.of(new LinkedHashMap<String, Set<String>>(), TreeSet.class);
416         for (String locale : factory.getAvailable()) {
417             if (locale.contains("_")) {
418                 continue;
419             }
420             PluralInfo info = supplemental.getPlurals(PluralType.cardinal, locale);
421             Set<String> keywords = info.getCanonicalKeywords();
422             data.put(keywords.toString(), locale);
423         }
424         for (Entry<String, Set<String>> entry : data.keyValuesSet()) {
425             logln(entry.getKey() + "\t" + entry.getValue());
426         }
427     }
428 
TestPluralPaths()429     public void TestPluralPaths() {
430         // do the following line once, when the file is opened
431         Set<String> filePaths = pathHeaderFactory.pathsForFile(english);
432 
433         // check that English doesn't contain few or many
434         verifyContains(PageId.Duration, filePaths, "few", false);
435         verifyContains(PageId.C_NAmerica, filePaths, "many", false);
436         verifyContains(PageId.C_SAmerica, filePaths, "many", false);
437         verifyContains(PageId.C_NWEurope, filePaths, "many", false);
438         verifyContains(PageId.C_SEEurope, filePaths, "many", false);
439         verifyContains(PageId.C_NAfrica, filePaths, "many", false);
440         verifyContains(PageId.C_WAfrica, filePaths, "many", false);
441         verifyContains(PageId.C_SAfrica, filePaths, "many", false);
442         verifyContains(PageId.C_EAfrica, filePaths, "many", false);
443         verifyContains(PageId.C_CAsia, filePaths, "many", false);
444         verifyContains(PageId.C_WAsia, filePaths, "many", false);
445         verifyContains(PageId.C_SEAsia, filePaths, "many", false);
446         verifyContains(PageId.C_Oceania, filePaths, "many", false);
447         verifyContains(PageId.C_Unknown, filePaths, "many", false);
448 
449         // check that Arabic does contain few and many
450         filePaths = pathHeaderFactory.pathsForFile(info.getCLDRFile("ar", true));
451 
452         verifyContains(PageId.Duration, filePaths, "few", true);
453         verifyContains(PageId.C_NAmerica, filePaths, "many", true);
454         verifyContains(PageId.C_SAmerica, filePaths, "many", true);
455         verifyContains(PageId.C_NWEurope, filePaths, "many", true);
456         verifyContains(PageId.C_SEEurope, filePaths, "many", true);
457         verifyContains(PageId.C_NAfrica, filePaths, "many", true);
458         verifyContains(PageId.C_WAfrica, filePaths, "many", true);
459         verifyContains(PageId.C_SAfrica, filePaths, "many", true);
460         verifyContains(PageId.C_EAfrica, filePaths, "many", true);
461         verifyContains(PageId.C_CAsia, filePaths, "many", true);
462         verifyContains(PageId.C_WAsia, filePaths, "many", true);
463         verifyContains(PageId.C_SEAsia, filePaths, "many", true);
464         verifyContains(PageId.C_Oceania, filePaths, "many", true);
465         verifyContains(PageId.C_Unknown, filePaths, "many", true);
466     }
467 
TestCoverage()468     public void TestCoverage() {
469         Map<Row.R2<SectionId, PageId>, Counter<Level>> data = new TreeMap<>();
470         CLDRFile cldrFile = english;
471         for (String path : cldrFile.fullIterable()) {
472             if (supplemental.isDeprecated(DtdType.ldml, path)) {
473                 errln("Deprecated path in English: " + path);
474                 continue;
475             }
476             Level level = supplemental.getCoverageLevel(path, cldrFile.getLocaleID());
477             PathHeader p = pathHeaderFactory.fromPath(path);
478             SurveyToolStatus status = p.getSurveyToolStatus();
479 
480             boolean hideCoverage = level == Level.COMPREHENSIVE;
481             boolean hidePathHeader = p.shouldHide();
482             if (hidePathHeader != hideCoverage) {
483                 String message = "PathHeader: " + status + ", Coverage: " + level + ": " + path;
484                 if (hidePathHeader && !hideCoverage) {
485                     errln(message);
486                 } else if (!hidePathHeader && hideCoverage) {
487                     logln(message);
488                 }
489             }
490             final R2<SectionId, PageId> key = Row.of(p.getSectionId(), p.getPageId());
491             Counter<Level> counter = data.get(key);
492             if (counter == null) {
493                 data.put(key, counter = new Counter<>());
494             }
495             counter.add(level, 1);
496         }
497         StringBuffer b = new StringBuffer("\t");
498         for (Level level : Level.values()) {
499             b.append("\t" + level);
500         }
501         logln(b.toString());
502         for (Entry<R2<SectionId, PageId>, Counter<Level>> entry : data.entrySet()) {
503             b.setLength(0);
504             b.append(entry.getKey().get0() + "\t" + entry.getKey().get1());
505             Counter<Level> counter = entry.getValue();
506             long total = 0;
507             for (Level level : Level.values()) {
508                 total += counter.getCount(level);
509                 b.append("\t" + total);
510             }
511             logln(b.toString());
512         }
513     }
514 
Test00AFile()515     public void Test00AFile() {
516         final String localeId = "en";
517         Counter<Level> counter = new Counter<>();
518         Map<String, PathHeader> uniqueness = new HashMap<>();
519         Set<String> alreadySeen = new HashSet<>();
520         check(localeId, true, uniqueness, alreadySeen);
521         // check paths
522         for (Entry<SectionId, Set<PageId>> sectionAndPages :
523                 PathHeader.Factory.getSectionIdsToPageIds().keyValuesSet()) {
524             final SectionId section = sectionAndPages.getKey();
525             if (section == SectionId.Supplemental || section == SectionId.BCP47) {
526                 continue;
527             }
528             logln(section.toString());
529             for (PageId page : sectionAndPages.getValue()) {
530                 final Set<String> cachedPaths = PathHeader.Factory.getCachedPaths(section, page);
531                 if (cachedPaths == null) {
532                     if (!badZonePages.contains(page) && page != PageId.Unknown) {
533                         errln("Null pages for: " + section + "\t" + page);
534                     }
535                 } else if (section == SectionId.Special && page == PageId.Unknown) {
536                     // skip
537                 } else if (section == SectionId.Timezones && page == PageId.UnknownT) {
538                     // skip
539                 } else if (section == SectionId.Misc && page == PageId.Transforms) {
540                     // skip
541                 } else {
542 
543                     int count2 = cachedPaths.size();
544                     if (count2 == 0) {
545                         warnln("Missing pages for: " + section + "\t" + page);
546                     } else {
547                         counter.clear();
548                         for (String s : cachedPaths) {
549                             Level coverage = supplemental.getCoverageLevel(s, localeId);
550                             counter.add(coverage, 1);
551                         }
552                         String countString = "";
553                         int total = 0;
554                         for (Level item : Level.values()) {
555                             long count = counter.get(item);
556                             if (count != 0) {
557                                 if (!countString.isEmpty()) {
558                                     countString += ",\t+";
559                                 }
560                                 total += count;
561                                 countString += item + "=" + total;
562                             }
563                         }
564                         logln("\t" + page + "\t" + countString);
565                         if (page.toString().startsWith("Unknown")) {
566                             logln("\t\t" + cachedPaths);
567                         }
568                     }
569                 }
570             }
571         }
572     }
573 
TestMetazones()574     public void TestMetazones() {
575 
576         CLDRFile nativeFile = info.getEnglish();
577         Set<PathHeader> pathHeaders = getPathHeaders(nativeFile);
578         // String oldPage = "";
579         String oldHeader = "";
580         for (PathHeader entry : pathHeaders) {
581             final String page = entry.getPage();
582             // if (!oldPage.equals(page)) {
583             // logln(page);
584             // oldPage = page;
585             // }
586             String header = entry.getHeader();
587             if (!oldHeader.equals(header)) {
588                 logln(page + "\t" + header);
589                 oldHeader = header;
590             }
591         }
592     }
593 
getPathHeaders(CLDRFile nativeFile)594     public Set<PathHeader> getPathHeaders(CLDRFile nativeFile) {
595         Set<PathHeader> pathHeaders = new TreeSet<>();
596         for (String path : nativeFile.fullIterable()) {
597             PathHeader p = pathHeaderFactory.fromPath(path);
598             pathHeaders.add(p);
599         }
600         return pathHeaders;
601     }
602 
verifyContains( PageId pageId, Set<String> filePaths, String substring, boolean contains)603     public void verifyContains(
604             PageId pageId, Set<String> filePaths, String substring, boolean contains) {
605         String path;
606         path = findOneContaining(allPaths(pageId, filePaths), substring);
607         if (contains) {
608             if (path == null) {
609                 errln("No path contains <" + substring + ">");
610             }
611         } else {
612             if (path != null) {
613                 errln("Path contains <" + substring + ">\t" + path);
614             }
615         }
616     }
617 
findOneContaining(Collection<String> allPaths, String substring)618     private String findOneContaining(Collection<String> allPaths, String substring) {
619         for (String path : allPaths) {
620             if (path.contains(substring)) {
621                 return path;
622             }
623         }
624         return null;
625     }
626 
allPaths(PageId pageId, Set<String> filePaths)627     public Set<String> allPaths(PageId pageId, Set<String> filePaths) {
628         Set<String> result = PathHeader.Factory.getCachedPaths(pageId.getSectionId(), pageId);
629         result.retainAll(filePaths);
630         return result;
631     }
632 
TestUniqueness()633     public void TestUniqueness() {
634         Factory factory2 = CLDRConfig.getInstance().getMainAndAnnotationsFactory();
635         Set<String> source = factory2.getAvailable();
636         for (String file : getFilesToTest(source, MIN_LOCALES)) {
637             CLDRFile nativeFile = factory2.make(file, true);
638             Map<PathHeader, String> headerToPath = new HashMap<>();
639             Map<String, String> headerVisibleToPath = new HashMap<>();
640             for (String path : nativeFile.fullIterable()) {
641                 PathHeader p = pathHeaderFactory.fromPath(path);
642                 if (p.getSectionId() == SectionId.Special) {
643                     continue;
644                 }
645                 String old = headerToPath.get(p);
646                 if (old == null) {
647                     headerToPath.put(p, path);
648                 } else if (!old.equals(path)) {
649                     if (true) { // for debugging
650                         pathHeaderFactory.clearCache();
651                         List<String> failuresOld = new ArrayList<>();
652                         pathHeaderFactory.fromPath(old, failuresOld);
653                         List<String> failuresPath = new ArrayList<>();
654                         pathHeaderFactory.fromPath(path, failuresPath);
655                     }
656                     errln(file + " collision with path " + p + "\t" + old + "\t" + path);
657                 }
658                 final String visible = p.toString();
659                 old = headerVisibleToPath.get(visible);
660                 if (old == null) {
661                     headerVisibleToPath.put(visible, path);
662                 } else if (!old.equals(path)) {
663                     errln("Collision with path " + visible + "\t" + old + "\t" + path);
664                 }
665             }
666         }
667     }
668 
TestStatus()669     public void TestStatus() {
670         CLDRFile nativeFile = info.getEnglish();
671         PathStarrer starrer = new PathStarrer();
672         EnumMap<SurveyToolStatus, Relation<String, String>> info2 =
673                 new EnumMap<>(SurveyToolStatus.class);
674         Set<String> nuked = new HashSet<>();
675         Set<String> deprecatedStar = new HashSet<>();
676 
677         for (String path : nativeFile.fullIterable()) {
678 
679             PathHeader p = pathHeaderFactory.fromPath(path);
680             final SurveyToolStatus surveyToolStatus = p.getSurveyToolStatus();
681 
682             if (p.getSectionId() == SectionId.Special
683                     && surveyToolStatus == SurveyToolStatus.READ_WRITE) {
684                 errln("SurveyToolStatus should not be " + surveyToolStatus + ": " + p);
685             }
686 
687             String starred = starrer.set(path);
688             List<String> attr = starrer.getAttributes();
689             if (surveyToolStatus != SurveyToolStatus.READ_WRITE) {
690                 nuked.add(starred);
691             }
692 
693             // check against deprecated
694             boolean isDeprecated = supplemental.isDeprecated(DtdType.ldml, path);
695             if (isDeprecated != (surveyToolStatus == SurveyToolStatus.DEPRECATED)) {
696                 if (!deprecatedStar.contains(starred)) {
697                     errln(
698                             "Different from DtdData deprecated:\t"
699                                     + isDeprecated
700                                     + "\t"
701                                     + surveyToolStatus
702                                     + "\t"
703                                     + path);
704                     deprecatedStar.add(starred);
705                 }
706             }
707 
708             Relation<String, String> data = info2.get(surveyToolStatus);
709             if (data == null) {
710                 info2.put(
711                         surveyToolStatus,
712                         data = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class));
713             }
714             data.put(starred, Joiner.on("|").join(attr));
715         }
716         for (Entry<SurveyToolStatus, Relation<String, String>> entry : info2.entrySet()) {
717             final SurveyToolStatus status = entry.getKey();
718             for (Entry<String, Set<String>> item : entry.getValue().keyValuesSet()) {
719                 final String starred = item.getKey();
720                 if (status == SurveyToolStatus.READ_WRITE && !nuked.contains(starred)) {
721                     continue;
722                 }
723                 logln(status + "\t" + starred + "\t" + item.getValue());
724             }
725         }
726     }
727 
TestPathsNotInEnglish()728     public void TestPathsNotInEnglish() {
729         Set<String> englishPaths = new HashSet<>();
730         for (String path : english.fullIterable()) {
731             englishPaths.add(path);
732         }
733         Set<String> alreadySeen = new HashSet<>(englishPaths);
734 
735         for (String locale : factory.getAvailable()) {
736             CLDRFile nativeFile = info.getCLDRFile(locale, false);
737             CoverageLevel2 coverageLevel2 = null;
738             for (String path : nativeFile.fullIterable()) {
739                 if (alreadySeen.contains(path) || path.contains("@count")) {
740                     continue;
741                 }
742                 if (coverageLevel2 == null) {
743                     coverageLevel2 = CoverageLevel2.getInstance(locale);
744                 }
745                 Level level = coverageLevel2.getLevel(path);
746                 if (Level.COMPREHENSIVE.compareTo(level) < 0) {
747                     continue;
748                 }
749                 logln("Path not in English\t" + locale + "\t" + path);
750                 alreadySeen.add(path);
751             }
752         }
753     }
754 
TestPathDescriptionCompleteness()755     public void TestPathDescriptionCompleteness() {
756         PathDescription pathDescription =
757                 new PathDescription(
758                         supplemental, english, null, null, PathDescription.ErrorHandling.CONTINUE);
759         Matcher normal =
760                 PatternCache.get("https://cldr.unicode.org/translation/[-a-zA-Z0-9_]").matcher("");
761         // https://cldr.unicode.org/translation/plurals#TOC-Minimal-Pairs
762         Set<String> alreadySeen = new HashSet<>();
763         PathStarrer starrer = new PathStarrer();
764 
765         checkPathDescriptionCompleteness(
766                 pathDescription,
767                 normal,
768                 "//ldml/numbers/defaultNumberingSystem",
769                 alreadySeen,
770                 starrer);
771         for (PathHeader pathHeader : getPathHeaders(english)) {
772             if (pathHeader.shouldHide()) {
773                 continue;
774             }
775             String path = pathHeader.getOriginalPath();
776             checkPathDescriptionCompleteness(pathDescription, normal, path, alreadySeen, starrer);
777         }
778     }
779 
checkPathDescriptionCompleteness( PathDescription pathDescription, Matcher normal, String path, Set<String> alreadySeen, PathStarrer starrer)780     public void checkPathDescriptionCompleteness(
781             PathDescription pathDescription,
782             Matcher normal,
783             String path,
784             Set<String> alreadySeen,
785             PathStarrer starrer) {
786         String value = english.getStringValue(path);
787         String description = pathDescription.getDescription(path, value, null);
788         String starred = starrer.set(path);
789         if (alreadySeen.contains(starred)) {
790             return;
791         } else if (description == null) {
792             errln("Path has no description:\t" + value + "\t" + path);
793         } else if (!description.contains("https://")) {
794             errln("Description has no URL:\t" + description + "\t" + value + "\t" + path);
795         } else if (!normal.reset(description).find()) {
796             errln(
797                     "Description has generic URL, fix to be specific:\t"
798                             + description
799                             + "\t"
800                             + value
801                             + "\t"
802                             + path);
803         } else if (description == PathDescription.MISSING_DESCRIPTION) {
804             errln("Fallback Description:\t" + value + "\t" + path);
805         } else {
806             return;
807         }
808         // Add if we had a problem, keeping us from being overwhelmed with
809         // errors.
810         alreadySeen.add(starred);
811     }
812 
TestTerritoryOrder()813     public void TestTerritoryOrder() {
814         final Set<String> goodAvailableCodes =
815                 StandardCodes.make().getGoodAvailableCodes("territory");
816         Set<String> results = showContained("001", 0, new HashSet<>(goodAvailableCodes));
817         results.remove("ZZ");
818         results.removeAll(Iso3166Data.getRegionCodesNotForTranslation());
819         for (String territory : results) {
820             String sub = Containment.getSubcontinent(territory);
821             String cont = Containment.getContinent(territory);
822             errln(
823                     "Missing\t"
824                             + getNameAndOrder(territory)
825                             + "\t"
826                             + getNameAndOrder(sub)
827                             + "\t"
828                             + getNameAndOrder(cont));
829         }
830     }
831 
showContained(String territory, int level, Set<String> soFar)832     private Set<String> showContained(String territory, int level, Set<String> soFar) {
833         if (!soFar.contains(territory)) {
834             return soFar;
835         }
836         soFar.remove(territory);
837         Set<String> contained = supplemental.getContained(territory);
838         if (contained == null) {
839             return soFar;
840         }
841         for (String containedItem : contained) {
842             logln(
843                     level
844                             + "\t"
845                             + getNameAndOrder(territory)
846                             + "\t"
847                             + getNameAndOrder(containedItem));
848         }
849         for (String containedItem : contained) {
850             showContained(containedItem, level + 1, soFar);
851         }
852         return soFar;
853     }
854 
getNameAndOrder(String territory)855     private String getNameAndOrder(String territory) {
856         return territory
857                 + "\t"
858                 + english.getName(CLDRFile.TERRITORY_NAME, territory)
859                 + "\t"
860                 + Containment.getOrder(territory);
861     }
862 
TestZCompleteness()863     public void TestZCompleteness() {
864         Map<String, PathHeader> uniqueness = new HashMap<>();
865         Set<String> alreadySeen = new HashSet<>();
866         LanguageTagParser ltp = new LanguageTagParser();
867         int count = 0;
868         for (String locale : factory.getAvailable()) {
869             if (!ltp.set(locale).getRegion().isEmpty()) {
870                 continue;
871             }
872             check(locale, false, uniqueness, alreadySeen);
873             ++count;
874         }
875         logln("Count:\t" + count);
876     }
877 
check( String localeID, boolean resolved, Map<String, PathHeader> uniqueness, Set<String> alreadySeen)878     public void check(
879             String localeID,
880             boolean resolved,
881             Map<String, PathHeader> uniqueness,
882             Set<String> alreadySeen) {
883         CLDRFile nativeFile = info.getCLDRFile(localeID, resolved);
884         int count = 0;
885         for (String path : nativeFile) {
886             if (alreadySeen.contains(path)) {
887                 continue;
888             }
889             alreadySeen.add(path);
890             final PathHeader pathHeader = pathHeaderFactory.fromPath(path);
891             ++count;
892             if (pathHeader == null) {
893                 errln("Null pathheader for " + path);
894             } else {
895                 String visible = pathHeader.toString();
896                 PathHeader old = uniqueness.get(visible);
897                 if (pathHeader.getSectionId() == SectionId.Timezones) {
898                     final PageId pageId = pathHeader.getPageId();
899                     if (badZonePages.contains(pageId) && !pathHeader.getCode().equals("Unknown")) {
900                         String msg = "Bad page ID:\t" + pageId + "\t" + pathHeader + "\t" + path;
901                         if (!logKnownIssue(
902                                 "cldrbug:7802", "ICU/CLDR time zone data sync problem - " + msg)) {
903                             errln("Bad page ID:\t" + pageId + "\t" + pathHeader + "\t" + path);
904                         }
905                     }
906                 }
907                 if (old == null) {
908                     if (pathHeader.getSection().equals("Special")) {
909                         if (pathHeader.getSection().equals("Unknown")) {
910                             errln(
911                                     "PathHeader has fallback: "
912                                             + visible
913                                             + "\t"
914                                             + pathHeader.getOriginalPath());
915                             // } else {
916                             // logln("Special:\t" + visible + "\t" +
917                             // pathHeader.getOriginalPath());
918                         }
919                     }
920                     uniqueness.put(visible, pathHeader);
921                 } else if (!old.equals(pathHeader)) {
922                     if (pathHeader.getSectionId() == SectionId.Special) {
923                         logln(
924                                 "Special PathHeader not unique: "
925                                         + visible
926                                         + "\t"
927                                         + pathHeader.getOriginalPath()
928                                         + "\t"
929                                         + old.getOriginalPath());
930                     } else {
931                         errln(
932                                 "PathHeader not unique: "
933                                         + visible
934                                         + "\t"
935                                         + pathHeader.getOriginalPath()
936                                         + "\t"
937                                         + old.getOriginalPath());
938                     }
939                 }
940             }
941         }
942         logln(localeID + "\t" + count);
943     }
944 
TestContainment()945     public void TestContainment() {
946         Map<String, Map<String, String>> metazoneToRegionToZone =
947                 supplemental.getMetazoneToRegionToZone();
948         Map<String, String> metazoneToContinent = supplemental.getMetazoneToContinentMap();
949         for (String metazone : metazoneToRegionToZone.keySet()) {
950             Map<String, String> regionToZone = metazoneToRegionToZone.get(metazone);
951             String worldZone = regionToZone.get("001");
952             String territory = Containment.getRegionFromZone(worldZone);
953             if (territory == null) {
954                 territory = "ZZ";
955             }
956             String cont = Containment.getContinent(territory);
957             int order = Containment.getOrder(territory);
958             String sub = Containment.getSubcontinent(territory);
959             String revision = PathHeader.getMetazonePageTerritory(metazone);
960             String continent = metazoneToContinent.get(metazone);
961             if (continent == null) {
962                 continent = "UnknownT";
963             }
964             // Russia, Antarctica => territory
965             // in Australasia, Asia, S. America => subcontinent
966             // in N. America => N. America (grouping of 3 subcontinents)
967             // in everything else => continent
968 
969             if (territory.equals("RU")) {
970                 assertEquals("Russia special case", "RU", revision);
971             } else if (territory.equals("US")) {
972                 assertEquals("N. America special case", "003", revision);
973             } else if (territory.equals("BR")) {
974                 assertEquals("S. America special case", "005", revision);
975             }
976             if (isVerbose()) {
977                 String name = english.getName(CLDRFile.TERRITORY_NAME, cont);
978                 String name2 = english.getName(CLDRFile.TERRITORY_NAME, sub);
979                 String name3 = english.getName(CLDRFile.TERRITORY_NAME, territory);
980                 String name4 = english.getName(CLDRFile.TERRITORY_NAME, revision);
981 
982                 logln(
983                         metazone + "\t" + continent + "\t" + name + "\t" + name2 + "\t" + name3
984                                 + "\t" + order + "\t" + name4);
985             }
986         }
987     }
988 
TestZ()989     public void TestZ() {
990         PathStarrer pathStarrer = new PathStarrer();
991         pathStarrer.setSubstitutionPattern("%A");
992 
993         Set<PathHeader> sorted = new TreeSet<>();
994         Map<String, String> missing = new TreeMap<>();
995         Map<String, String> skipped = new TreeMap<>();
996         Map<String, String> collide = new TreeMap<>();
997 
998         logln("Traversing Paths");
999         for (String path : english) {
1000             PathHeader pathHeader = pathHeaderFactory.fromPath(path);
1001             String value = english.getStringValue(path);
1002             if (pathHeader == null) {
1003                 final String starred = pathStarrer.set(path);
1004                 missing.put(starred, value + "\t" + path);
1005                 continue;
1006             }
1007             if (pathHeader.getSection().equalsIgnoreCase("skip")) {
1008                 final String starred = pathStarrer.set(path);
1009                 skipped.put(starred, value + "\t" + path);
1010                 continue;
1011             }
1012             sorted.add(pathHeader);
1013         }
1014         logln("\nConverted:\t" + sorted.size());
1015         String lastHeader = "";
1016         String lastPage = "";
1017         String lastSection = "";
1018         List<String> threeLevel = new ArrayList<>();
1019         Status status = new Status();
1020         CoverageLevel2 coverageLevel2 = CoverageLevel2.getInstance("en");
1021 
1022         for (PathHeader pathHeader : sorted) {
1023             String original = pathHeader.getOriginalPath();
1024             if (!original.equals(status.pathWhereFound)) {
1025                 continue;
1026             }
1027             if (!lastSection.equals(pathHeader.getSection())) {
1028                 logln("");
1029                 threeLevel.add(pathHeader.getSection());
1030                 threeLevel.add("\t" + pathHeader.getPage());
1031                 threeLevel.add("\t\t" + pathHeader.getHeader());
1032                 lastSection = pathHeader.getSection();
1033                 lastPage = pathHeader.getPage();
1034                 lastHeader = pathHeader.getHeader();
1035             } else if (!lastPage.equals(pathHeader.getPage())) {
1036                 logln("");
1037                 threeLevel.add("\t" + pathHeader.getPage());
1038                 threeLevel.add("\t\t" + pathHeader.getHeader());
1039                 lastPage = pathHeader.getPage();
1040                 lastHeader = pathHeader.getHeader();
1041             } else if (!lastHeader.equals(pathHeader.getHeader())) {
1042                 logln("");
1043                 threeLevel.add("\t\t" + pathHeader.getHeader());
1044                 lastHeader = pathHeader.getHeader();
1045             }
1046             logln(
1047                     pathHeader
1048                             + "\t"
1049                             + coverageLevel2.getLevel(original)
1050                             + "\t"
1051                             + english.getStringValue(pathHeader.getOriginalPath())
1052                             + "\t"
1053                             + pathHeader.getOriginalPath());
1054         }
1055         if (collide.size() != 0) {
1056             errln("\nCollide:\t" + collide.size());
1057             for (Entry<String, String> item : collide.entrySet()) {
1058                 errln("\t" + item);
1059             }
1060         }
1061         if (missing.size() != 0) {
1062             errln("\nMissing:\t" + missing.size());
1063             for (Entry<String, String> item : missing.entrySet()) {
1064                 errln("\t" + item.getKey() + "\tvalue:\t" + item.getValue());
1065             }
1066         }
1067         if (skipped.size() != 0) {
1068             errln("\nSkipped:\t" + skipped.size());
1069             for (Entry<String, String> item : skipped.entrySet()) {
1070                 errln("\t" + item);
1071             }
1072         }
1073         Counter<PathHeader.Factory.CounterData> counterData =
1074                 pathHeaderFactory.getInternalCounter();
1075         logln("\nInternal Counter:\t" + counterData.size());
1076         for (PathHeader.Factory.CounterData item : counterData.keySet()) {
1077             logln(
1078                     "\t"
1079                             + counterData.getCount(item)
1080                             + "\t"
1081                             + item.get2() // externals
1082                             + "\t"
1083                             + item.get3()
1084                             + "\t"
1085                             + item.get0() // internals
1086                             + "\t"
1087                             + item.get1());
1088         }
1089         logln("\nMenus/Headers:\t" + threeLevel.size());
1090         for (String item : threeLevel) {
1091             logln(item);
1092         }
1093         LinkedHashMap<String, Set<String>> sectionsToPages =
1094                 org.unicode.cldr.util.PathHeader.Factory.getSectionsToPages();
1095         logln("\nMenus:\t" + sectionsToPages.size());
1096         for (Entry<String, Set<String>> item : sectionsToPages.entrySet()) {
1097             final String section = item.getKey();
1098             for (String page : item.getValue()) {
1099                 logln("\t" + section + "\t" + page);
1100                 int count = 0;
1101                 for (String path : pathHeaderFactory.filterCldr(section, page, english)) {
1102                     count += 1; // just count them.
1103                 }
1104                 logln("\t" + count);
1105             }
1106         }
1107     }
1108 
1109     public static final Set<String> GERMAN_UNIT_ORDER =
1110             ImmutableSet.of(
1111                     "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]",
1112                     "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]",
1113                     "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]",
1114                     "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]",
1115                     "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]",
1116                     "//ldml/units/unitLength[@type=\"narrrow\"]/unit[@type=\"volume-liter\"]",
1117                     "//ldml/numbers/minimalPairs/caseMinimalPairs",
1118                     "//ldml/numbers/minimalPairs/genderMinimalPairs");
1119 
TestOrder()1120     public void TestOrder() {
1121         String[] paths = {
1122             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"narrow\"]/dayPeriod[@type=\"noon\"]",
1123             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"narrow\"]/dayPeriod[@type=\"afternoon1\"]",
1124         };
1125         PathHeader pathHeaderLast = null;
1126         for (String path : paths) {
1127             PathHeader pathHeader = pathHeaderFactory.fromPath(path);
1128             if (pathHeaderLast != null) {
1129                 assertRelation("ordering", true, pathHeaderLast, LEQ, pathHeader);
1130             }
1131             pathHeaderLast = pathHeader;
1132         }
1133         CLDRFile german = factory.make("de", true);
1134         Multimap<PathHeader, String> pathHeaderToPaths = TreeMultimap.create();
1135         for (String path : german.fullIterable()) {
1136             for (String prefix : GERMAN_UNIT_ORDER) {
1137                 if (path.startsWith(prefix)) {
1138                     PathHeader pathHeader = pathHeaderFactory.fromPath(path);
1139                     pathHeaderToPaths.put(pathHeader, path);
1140                 }
1141             }
1142         }
1143         String[] germanExpected = {
1144             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/gender", // Units
1145             //
1146             // Volume
1147             //  liter
1148             //
1149             // long-gender
1150             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/displayName", // Units    Volume  liter   long-displayName
1151             "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/displayName", // Units    Volume  liter   short-displayName
1152             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/perUnitPattern", // Units    Volume  liter   long-per
1153             "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/perUnitPattern", // Units    Volume  liter   short-per
1154             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"]", // Units    Volume  liter   long-one-nominative
1155             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"accusative\"]", // Units    Volume  liter   long-one-accusative
1156             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"genitive\"]", // Units    Volume  liter   long-one-genitive
1157             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"dative\"]", // Units    Volume  liter   long-one-dative
1158             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"]", // Units    Volume  liter   long-other-nominative
1159             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"][@case=\"accusative\"]", // Units    Volume  liter   long-other-accusative
1160             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"][@case=\"genitive\"]", // Units    Volume  liter   long-other-genitive
1161             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"][@case=\"dative\"]", // Units    Volume  liter   long-other-dative
1162             "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"]", // Units    Volume  liter   short-one-nominative
1163             "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"other\"]", // Units    Volume  liter   short-other-nominative
1164             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"]", // Units    Compound Units  power2  long-one-nominative-masculine
1165             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"]", // Units    Compound Units  power2  long-one-nominative-feminine
1166             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]", // Units    Compound Units  power2  long-one-nominative-dgender
1167             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"][@case=\"accusative\"]", // Units    Compound Units  power2  long-one-accusative-masculine
1168             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"accusative\"]", // Units    Compound Units  power2  long-one-accusative-feminine
1169             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"accusative\"]", // Units    Compound Units  power2  long-one-accusative-dgender
1170             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"][@case=\"genitive\"]", // Units    Compound Units  power2  long-one-genitive-masculine
1171             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"genitive\"]", // Units    Compound Units  power2  long-one-genitive-feminine
1172             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"genitive\"]", // Units    Compound Units  power2  long-one-genitive-dgender
1173             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"masculine\"][@case=\"dative\"]", // Units    Compound Units  power2  long-one-dative-masculine
1174             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"dative\"]", // Units    Compound Units  power2  long-one-dative-feminine
1175             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"dative\"]", // Units    Compound Units  power2  long-one-dative-dgender
1176             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"]", // Units    Compound Units  power2  long-other-nominative-masculine
1177             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"]", // Units    Compound Units  power2  long-other-nominative-feminine
1178             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]", // Units    Compound Units  power2  long-other-nominative-dgender
1179             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"][@case=\"accusative\"]", // Units    Compound Units  power2  long-other-accusative-masculine
1180             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"accusative\"]", // Units    Compound Units  power2  long-other-accusative-feminine
1181             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"accusative\"]", // Units    Compound Units  power2  long-other-accusative-dgender
1182             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"][@case=\"genitive\"]", // Units    Compound Units  power2  long-other-genitive-masculine
1183             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"genitive\"]", // Units    Compound Units  power2  long-other-genitive-feminine
1184             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"genitive\"]", // Units    Compound Units  power2  long-other-genitive-dgender
1185             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"masculine\"][@case=\"dative\"]", // Units    Compound Units  power2  long-other-dative-masculine
1186             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"dative\"]", // Units    Compound Units  power2  long-other-dative-feminine
1187             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"dative\"]", // Units    Compound Units  power2  long-other-dative-dgender
1188             "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]", // Units    Compound Units  power2  short-one-nominative-dgender
1189             "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]", // Units    Compound Units  power2  short-other-nominative-dgender
1190             "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]", // Units    Compound Units  power2  narrow-one-nominative-dgender
1191             "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]", // Units    Compound Units  power2  narrow-other-nominative-dgender
1192             "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"nominative\"]", // Miscellaneous
1193             //  Minimal Pairs
1194             //  Case
1195             // nominative
1196             "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"accusative\"]", // Miscellaneous
1197             //  Minimal Pairs
1198             //  Case
1199             // accusative
1200             "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"genitive\"]", // Miscellaneous
1201             // Minimal Pairs
1202             // Case    genitive
1203             "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"dative\"]", // Miscellaneous
1204             // Minimal Pairs
1205             // Case    dative
1206             "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"masculine\"]", // Miscellaneous    Minimal Pairs   Gender  masculine
1207             "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"feminine\"]", // Miscellaneous
1208             //    Minimal
1209             // Pairs
1210             // Gender
1211             // feminine
1212             "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"neuter\"]", // Miscellaneous
1213             //  Minimal Pairs
1214             //  Gender  neuter
1215 
1216             // we don't care about order here.
1217             "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1", // Special  Suppress    compound-UnitPattern1-power2    long
1218             "//ldml/units/unitLength[@type=\"narrow\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1", // Special  Suppress    compound-UnitPattern1-power2    narrow
1219             "//ldml/units/unitLength[@type=\"short\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1", // Special  Suppress    compound-UnitPattern1-power2    short
1220         };
1221 
1222         int germanExpectedIndex = 0;
1223         int errorCount = 0;
1224         int item = 0;
1225         for (Entry<PathHeader, Collection<String>> entry : pathHeaderToPaths.asMap().entrySet()) {
1226             PathHeader ph = entry.getKey();
1227             Collection<String> epaths = entry.getValue();
1228             if (!assertEquals(entry.toString(), 1, epaths.size())) {
1229                 ++errorCount;
1230             }
1231             if (!assertEquals(
1232                     ++item + ") PathHeader order",
1233                     germanExpected[germanExpectedIndex++],
1234                     epaths.iterator().next())) {
1235                 ++errorCount;
1236             }
1237         }
1238         if (errorCount != 0) {
1239             for (Entry<PathHeader, Collection<String>> entry :
1240                     pathHeaderToPaths.asMap().entrySet()) {
1241                 PathHeader ph = entry.getKey();
1242                 Collection<String> epaths = entry.getValue();
1243                 System.out.println(
1244                         "\"" + epaths.iterator().next().replace("\"", "\\\"") + "\",\t// " + ph);
1245             }
1246         }
1247     }
1248 
Test8414()1249     public void Test8414() {
1250         PathDescription pathDescription =
1251                 new PathDescription(
1252                         supplemental, english, null, null, PathDescription.ErrorHandling.CONTINUE);
1253 
1254         String prefix =
1255                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"";
1256         String suffix = "\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"morning1\"]";
1257 
1258         final String path0 = prefix + "format" + suffix;
1259         final String path1 = prefix + "stand-alone" + suffix;
1260         String v0 = english.getStringValue(path0);
1261         String v1 = english.getStringValue(path1);
1262         String p0 = pathDescription.getDescription(path0, v0, null);
1263         String p1 = pathDescription.getDescription(path1, v1, null);
1264         assertTrue("Check pd for format", p0.contains("in the morning"));
1265         assertTrue("Check pd for stand-alone", !p1.contains("in the morning"));
1266     }
1267 
TestCompletenessNonLdmlDtd()1268     public void TestCompletenessNonLdmlDtd() {
1269         PathChecker pathChecker = new PathChecker();
1270         Set<String> directories = new LinkedHashSet<>();
1271         Multimap<String, String> pathValuePairs = LinkedListMultimap.create();
1272         // get all the directories containing non-Ldml dtd files
1273         for (DtdType dtdType : DtdType.values()) {
1274             if (dtdType.getStatus() != DtdType.DtdStatus.active) {
1275                 continue;
1276             }
1277             if (dtdType == DtdType.ldml
1278                     || dtdType == DtdType.ldmlICU
1279                     || dtdType == DtdType.keyboard3
1280                     || dtdType == DtdType.keyboardTest3) {
1281                 continue;
1282             }
1283             DtdData dtdData = DtdData.getInstance(dtdType);
1284             for (String dir : dtdType.directories) {
1285                 if (DEBUG_DTD_TYPE != null && !DEBUG_DTD_TYPE.directories.contains(dir)) {
1286                     continue;
1287                 }
1288                 File dir2 = new File(COMMON_DIR + dir);
1289                 logln(dir2.getName());
1290                 for (String file : dir2.list()) {
1291                     // don't need to restrict with getFilesToTest(Arrays.asList(dir2.list()),
1292                     // "root", "en")) {
1293                     if (!file.endsWith(".xml")) {
1294                         continue;
1295                     }
1296                     if (DEBUG) warnln(" TestCompletenessNonLdmlDtd: " + dir + ", " + file);
1297                     logln(" \t" + file);
1298                     for (Pair<String, String> pathValue :
1299                             XMLFileReader.loadPathValues(
1300                                     dir2 + "/" + file,
1301                                     new ArrayList<Pair<String, String>>(),
1302                                     true)) {
1303                         final String path = pathValue.getFirst();
1304                         final String value = pathValue.getSecond();
1305                         //                        logln("\t\t" + path);
1306                         if (path.startsWith("//supplementalData/unitPreferenceData/unitPreferences")
1307                                 && path.contains("skeleton")) {
1308                             int debug = 0;
1309                         }
1310                         pathChecker.checkPathHeader(dtdData, path);
1311                     }
1312                 }
1313             }
1314         }
1315         if (!pathChecker.badHeaders.isEmpty()) {
1316             System.out.println("For help with DTD updates: " + CLDRURLS.CLDR_UPDATINGDTD_URL);
1317         }
1318     }
1319 
1320     private class PathChecker {
1321         PathHeader.Factory phf = pathHeaderFactory;
1322         PathStarrer starrer = new PathStarrer().setSubstitutionPattern("%A");
1323 
1324         Set<String> badHeaders = new TreeSet<>();
1325         Map<PathHeader, PathHeader> goodHeaders = new HashMap<>();
1326         Set<PathHeader> seenBad = new HashSet<>();
1327 
1328         {
phf.clearCache()1329             phf.clearCache();
1330         }
1331 
checkPathHeader(DtdData dtdData, String rawPath)1332         public void checkPathHeader(DtdData dtdData, String rawPath) {
1333             XPathParts pathPlain = XPathParts.getFrozenInstance(rawPath);
1334             if (dtdData.isMetadata(pathPlain)) {
1335                 return;
1336             }
1337             if (dtdData.isDeprecated(pathPlain)) {
1338                 return;
1339             }
1340             Multimap<String, String> extras = HashMultimap.create();
1341             Set<String> fixedPaths = dtdData.getRegularizedPaths(pathPlain, extras);
1342             if (fixedPaths != null) {
1343                 for (String fixedPath : fixedPaths) {
1344                     checkSubpath(fixedPath);
1345                 }
1346             }
1347             for (String path : extras.keySet()) {
1348                 checkSubpath(path);
1349             }
1350         }
1351 
checkSubpath(String path)1352         public void checkSubpath(String path) {
1353             String message = ": Can't compute path header";
1354             if (path.contentEquals(
1355                     "//supplementalData/grammaticalData/grammaticalFeatures[@targets=\"nominal\"][@locales=\"it\"]/grammaticalGender/_values")) {
1356                 int debug = 0;
1357             }
1358             PathHeader ph = null;
1359             try {
1360                 ph = phf.fromPath(path);
1361                 if (seenBad.contains(ph)) {
1362                     return;
1363                 }
1364                 if (ph.getPageId() == PageId.Deprecated) {
1365                     return; // don't care
1366                 }
1367                 if (ph.getPageId() != PageId.Unknown) {
1368                     PathHeader old = goodHeaders.put(ph, ph);
1369                     if (old != null && !path.equals(old.getOriginalPath())) {
1370                         errln(
1371                                 "Duplicate path header for: "
1372                                         + ph
1373                                         + "\n\t\t "
1374                                         + path
1375                                         + "\n\t\t≠"
1376                                         + old.getOriginalPath());
1377                         seenBad.add(ph);
1378                     }
1379                     return;
1380                 }
1381                 // for debugging
1382                 phf.clearCache();
1383                 List<String> failures = new ArrayList<>();
1384                 ph = phf.fromPath(path, failures);
1385                 message = ": Unknown path header" + failures;
1386             } catch (Exception e) {
1387                 message = ": Exception in path header: " + e.getMessage();
1388             }
1389             String star = starrer.set(path);
1390             if (badHeaders.add(star)) {
1391                 errln(star + message + ", " + ph);
1392                 System.out.println(
1393                         "\tNo match in PathHeader.txt for "
1394                                 + path
1395                                 + "\n\tYou get only one message for all paths matching "
1396                                 + star
1397                                 + "\n\tFor example, check to see if the field in PathHeader.txt is in PathHeader.PageId."
1398                                 + "\n\tIf not, either correct PathHeader.txt or add it to PageId"
1399                                 + "\n\tIf you have a value attribute, you will need extra _ characters. The value attribute will show at the end with prefixed _, eg [...]/_skeleton."
1400                                 + "If there can be a value for the path then that element will add _. ");
1401             }
1402         }
1403     }
1404 
TestSupplementalItems()1405     public void TestSupplementalItems() {
1406         //      <weekOfPreference ordering="weekOfYear weekOfMonth" locales="am az bs cs cy da el et
1407         // hi ky lt mk sk ta th"/>
1408         // logln(pathHeaderFactory.getRegexInfo());
1409         CLDRFile supplementalFile =
1410                 CLDRConfig.getInstance().getSupplementalFactory().make("supplementalData", false);
1411         List<String> failures = new ArrayList<>();
1412         Multimap<String, String> pathValuePairs = LinkedListMultimap.create();
1413         for (String test : With.in(supplementalFile.iterator("//supplementalData/weekData"))) {
1414             failures.clear();
1415             XPathParts parts = XPathParts.getFrozenInstance(supplementalFile.getFullXPath(test));
1416             supplementalFile.getDtdData().getRegularizedPaths(parts, pathValuePairs);
1417             for (Entry<String, Collection<String>> entry : pathValuePairs.asMap().entrySet()) {
1418                 final String normalizedPath = entry.getKey();
1419                 final Collection<String> normalizedValue = entry.getValue();
1420                 PathHeader ph = pathHeaderFactory.fromPath(normalizedPath, failures);
1421                 if (ph == null || ph.getSectionId() == SectionId.Special) {
1422                     errln(
1423                             "Failure with "
1424                                     + test
1425                                     + " => "
1426                                     + normalizedPath
1427                                     + " = "
1428                                     + normalizedValue);
1429                 } else {
1430                     logln(ph + "\t" + test + " = " + normalizedValue);
1431                 }
1432             }
1433         }
1434     }
1435 
test10232()1436     public void test10232() {
1437         String[][] tests = {
1438             {"MMM", "Formats - Flexible - Date Formats"},
1439             {"dMM", "Formats - Flexible - Date Formats"},
1440             {"h", "Formats - Flexible - 12 Hour Time Formats"},
1441             {"hm", "Formats - Flexible - 12 Hour Time Formats"},
1442             {"Ehm", "Formats - Flexible - 12 Hour Time Formats"},
1443             {"H", "Formats - Flexible - 24 Hour Time Formats"},
1444             {"Hm", "Formats - Flexible - 24 Hour Time Formats"},
1445             {"EHm", "Formats - Flexible - 24 Hour Time Formats"},
1446         };
1447         for (String[] test : tests) {
1448             String path =
1449                     "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\""
1450                             + test[0]
1451                             + "\"]";
1452             PathHeader pathHeader = pathHeaderFactory.fromPath(path);
1453             assertEquals(
1454                     "flexible formats",
1455                     test[1] + "|" + test[0],
1456                     pathHeader.getHeader() + "|" + pathHeader.getCode());
1457         }
1458     }
1459 
1460     // Moved from TestAnnotations and generalized
testPathHeaderSize()1461     public void testPathHeaderSize() {
1462         String locale = "ar"; // choose one with lots of plurals
1463         int maxSize = 1250;
1464         boolean showTable = false; // only printed if test fails or verbose
1465 
1466         Factory factory = CLDRConfig.getInstance().getCommonAndSeedAndMainAndAnnotationsFactory();
1467         CLDRFile english = factory.make(locale, true);
1468 
1469         PathHeader.Factory phf = PathHeader.getFactory(CLDRConfig.getInstance().getEnglish());
1470         Counter<PageId> counterPageId = new Counter<>();
1471         Counter<PageId> counterPageIdAll = new Counter<>();
1472         for (String path : english) {
1473             Level level =
1474                     CLDRConfig.getInstance()
1475                             .getSupplementalDataInfo()
1476                             .getCoverageLevel(path, locale);
1477             PathHeader ph = phf.fromPath(path);
1478             if (level.compareTo(Level.MODERN) <= 0) {
1479                 counterPageId.add(ph.getPageId(), 1);
1480             }
1481             counterPageIdAll.add(ph.getPageId(), 1);
1482         }
1483         Set<R2<Long, PageId>> entrySetSortedByCount =
1484                 counterPageId.getEntrySetSortedByCount(false, null);
1485         for (R2<Long, PageId> sizeAndPageId : entrySetSortedByCount) {
1486             long size = sizeAndPageId.get0();
1487             PageId pageId = sizeAndPageId.get1();
1488             if (!assertTrue(
1489                     pageId.getSectionId()
1490                             + "/"
1491                             + pageId
1492                             + " size ("
1493                             + size
1494                             + ") < "
1495                             + maxSize
1496                             + "?",
1497                     size < maxSize)) {
1498                 showTable = true;
1499             }
1500             // System.out.println(pageId + "\t" + size);
1501         }
1502         if (showTable || isVerbose()) {
1503             for (R2<Long, PageId> sizeAndPageId : entrySetSortedByCount) {
1504                 PageId pageId = sizeAndPageId.get1();
1505                 System.out.println(
1506                         pageId.getSectionId()
1507                                 + "\t"
1508                                 + pageId
1509                                 + "\t"
1510                                 + sizeAndPageId.get0()
1511                                 + "\t"
1512                                 + counterPageIdAll.get(pageId));
1513             }
1514         }
1515     }
1516 
TestCLDR_11454()1517     public void TestCLDR_11454() {
1518         PathHeader.Factory phf = PathHeader.getFactory();
1519         PathHeader century =
1520                 phf.fromPath(
1521                         "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"duration-century\"]/displayName");
1522         PathHeader decade =
1523                 phf.fromPath(
1524                         "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"duration-decade\"]/displayName");
1525         assertEquals("Section", century.getSectionId(), decade.getSectionId());
1526         assertEquals("Page", century.getPageId(), decade.getPageId());
1527     }
1528 
TestEmojiOrder()1529     public void TestEmojiOrder() {
1530         PathHeader.Factory phf = PathHeader.getFactory();
1531         String[] desiredOrder = {
1532             "��‍⚕", "��‍⚕", "��‍⚕",
1533             "��‍⚖", "��‍⚖", "��‍⚖"
1534         };
1535         List<PathHeader> pathHeaders = new ArrayList<>();
1536         for (String emoji : desiredOrder) {
1537             String base = "//ldml/annotations/annotation[@cp=\"" + emoji + "\"]";
1538             pathHeaders.add(phf.fromPath(base + "[@type=\"tts\"]"));
1539             pathHeaders.add(phf.fromPath(base));
1540             logln(
1541                     emoji
1542                             + ": getEmojiMinorOrder="
1543                             + Emoji.getEmojiMinorOrder(Emoji.getMinorCategory(emoji))
1544                             + ", getEmojiToOrder="
1545                             + Emoji.getEmojiToOrder(emoji));
1546         }
1547         PathHeader lastItem = null;
1548         for (PathHeader item : pathHeaders) {
1549             if (lastItem != null) {
1550                 assertEquals("Section", lastItem.getSectionId(), item.getSectionId());
1551                 assertEquals("Page", lastItem.getPageId(), item.getPageId());
1552                 assertEquals("Header", lastItem.getHeader(), item.getHeader());
1553                 if (!assertTrue(lastItem + " < " + item, lastItem.compareTo(item) < 0)) {
1554                     lastItem.compareTo(item); // for debugging
1555                 }
1556             }
1557             lastItem = item;
1558         }
1559     }
1560 
TestQuotes()1561     public void TestQuotes() {
1562         // quotes should never appear in result
1563         PathHeader.Factory phf = PathHeader.getFactory();
1564         String[] tests = {
1565             "//supplementalData/plurals[@type=\"ordinal\"]/pluralRules[@locales=\"ig\"]/pluralRule[@count=\"other\"]",
1566             "//supplementalData/transforms/transform[@source=\"und-Khmr\"][@target=\"und-Latn\"]"
1567         };
1568         for (String test : tests) {
1569             PathHeader trial = phf.fromPath(test);
1570             assertEquals("No quotes in pathheader", false, trial.toString().contains("\""));
1571         }
1572     }
1573     /**
1574      * Make sure that the PathHeader sort order is consistent with the grammatical feature orders
1575      * "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"volume-liter\"]/displayName"
1576      * //ldml/units/unitLength[@type=\long\"]/unit[@type=\"volume-liter\"]/unitPattern[@count=\"one\"][@case=\"genitive\"]",
1577      * //ldml/units/unitLength[@type=\long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"accusative\"]",
1578      */
TestUnitOrder()1579     public void TestUnitOrder() {
1580         PathHeader.Factory phf = PathHeader.getFactory();
1581         List<PathHeader> expectedOrder = new ArrayList<>();
1582         List<Width> widths = Arrays.asList(Width.LONG, Width.SHORT, Width.NARROW);
1583         List<CaseValues> cases = Arrays.asList(GrammarInfo.CaseValues.values()).subList(0, 3);
1584         List<GenderValues> genders = Arrays.asList(GrammarInfo.GenderValues.values()).subList(0, 3);
1585 
1586         for (Width width : widths) {
1587             String path =
1588                     "//ldml/units/unitLength[@type=\""
1589                             + width
1590                             + "\"]/unit[@type=\"length-meter\"]/displayName";
1591             expectedOrder.add(phf.fromPath(path));
1592         }
1593 
1594         for (Width width : widths) {
1595             for (Count count : Count.values()) {
1596                 for (GrammarInfo.CaseValues gCase : cases) {
1597                     if (width != Width.LONG && gCase != CaseValues.nominative) {
1598                         break;
1599                     }
1600                     String path =
1601                             "//ldml/units/unitLength[@type=\""
1602                                     + width
1603                                     + "\"]/unit[@type=\"length-meter\"]/unitPattern[@count=\""
1604                                     + count
1605                                     + (gCase == CaseValues.nominative ? "" : "\"][@case=\"" + gCase)
1606                                     + "\"]";
1607                     expectedOrder.add(phf.fromPath(path));
1608                 }
1609             }
1610         }
1611         for (Width width : widths) {
1612             for (Count count : Count.values()) {
1613                 for (GrammarInfo.CaseValues gCase : cases) {
1614                     if (width != Width.LONG && gCase != CaseValues.nominative) {
1615                         break;
1616                     }
1617                     for (GrammarInfo.GenderValues gGender : genders) {
1618                         if (width != Width.LONG && gGender != GenderValues.neuter) {
1619                             break;
1620                         }
1621                         String path =
1622                                 "//ldml/units/unitLength[@type=\""
1623                                         + width
1624                                         + "\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\""
1625                                         + count
1626                                         + (gGender == GenderValues.neuter
1627                                                 ? ""
1628                                                 : "\"][@gender=\"" + gGender)
1629                                         + (gCase == CaseValues.nominative
1630                                                 ? ""
1631                                                 : "\"][@case=\"" + gCase)
1632                                         + "\"]";
1633                         expectedOrder.add(phf.fromPath(path));
1634                     }
1635                 }
1636             }
1637         }
1638         for (Count count : Count.values()) {
1639             String path =
1640                     "//ldml/numbers/minimalPairs/ordinalMinimalPairs[@ordinal=\"" + count + "\"]";
1641             expectedOrder.add(phf.fromPath(path));
1642         }
1643         for (Count count : Count.values()) {
1644             String path =
1645                     "//ldml/numbers/minimalPairs/pluralMinimalPairs[@count=\"" + count + "\"]";
1646             expectedOrder.add(phf.fromPath(path));
1647         }
1648         for (GrammarInfo.CaseValues gCase : cases) {
1649             String path = "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"" + gCase + "\"]";
1650             expectedOrder.add(phf.fromPath(path));
1651         }
1652         for (GrammarInfo.GenderValues gGender : genders) {
1653             String path =
1654                     "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"" + gGender + "\"]";
1655             expectedOrder.add(phf.fromPath(path));
1656         }
1657 
1658         PathHeader last = null;
1659         int item = 0;
1660         int errorCount = 0;
1661         for (PathHeader pathHeader : expectedOrder) {
1662             if (last != null) {
1663                 if (!assertTrue(
1664                         ++item + ")\t" + last + "\t<\t" + pathHeader,
1665                         last.compareTo(pathHeader) < 0)) {
1666                     errorCount++;
1667                     last.compareTo(pathHeader);
1668                 }
1669             }
1670             last = pathHeader;
1671         }
1672         if (errorCount != 0 || isVerbose()) {
1673             for (PathHeader pathHeader : expectedOrder) {
1674                 System.out.println(
1675                         "\""
1676                                 + pathHeader.getOriginalPath().replace("\"", "\\\"")
1677                                 + "\",\t// "
1678                                 + pathHeader);
1679             }
1680         }
1681     }
1682 
testPageSize()1683     public void testPageSize() {
1684         final long minError = 946; // above this, emit error
1685         final long minLog = 700; // otherwise above this, emit warning
1686         Factory factory = CLDRConfig.getInstance().getCommonAndSeedAndMainAndAnnotationsFactory();
1687         List<String> locales =
1688                 StandardCodes.make()
1689                         .getLocaleCoverageLocales(Organization.cldr, ImmutableSet.of(Level.MODERN))
1690                         .stream()
1691                         .filter(x -> CLDRLocale.getInstance(x).getCountry().isEmpty())
1692                         .collect(Collectors.toUnmodifiableList());
1693         List<Counter<PageId>> counters = new ArrayList<>();
1694         final String thresholdExplanation = "log/error thresholds are " + minLog + "/" + minError;
1695         for (String locale : locales) {
1696             CLDRFile cldrFile = factory.make(locale, false);
1697             PathHeader.Factory phf = PathHeader.getFactory();
1698             Counter<PageId> c = new Counter<>();
1699             counters.add(c);
1700             for (String path : cldrFile) {
1701                 PathHeader ph = phf.fromPath(path);
1702                 c.add(ph.getPageId(), 1);
1703             }
1704             for (PageId entry : c.getKeysetSortedByKey()) {
1705                 long count = c.getCount(entry);
1706                 if (count > minLog) {
1707                     final String message =
1708                             String.format(
1709                                     "%s\t%s\t%s\thas too many entries:\t%d\t(%s)",
1710                                     locale,
1711                                     entry.getSectionId().toString(),
1712                                     entry,
1713                                     count,
1714                                     thresholdExplanation);
1715                     if (count > minError) {
1716                         errln(message);
1717                     } else {
1718                         warnln(message);
1719                     }
1720                 }
1721             }
1722         }
1723         if (isVerbose()) {
1724             System.out.println();
1725             Set<PageId> sorted = new TreeSet<>();
1726             for (Counter<PageId> counter : counters) {
1727                 sorted.addAll(counter.keySet());
1728             }
1729             int i = 0;
1730             System.out.print("Order" + "\t" + "Section" + "\t" + "Page");
1731             for (String c : locales) {
1732                 System.out.print("\t" + c);
1733             }
1734             System.out.println();
1735 
1736             for (PageId entry : sorted) {
1737                 System.out.print(++i + "\t" + entry.getSectionId() + "\t" + entry);
1738                 for (Counter<PageId> c : counters) {
1739                     System.out.print("\t" + c.get(entry));
1740                 }
1741                 System.out.println();
1742             }
1743         }
1744     }
1745 }
1746