xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestSupplementalInfo.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.unittest;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.collect.ImmutableSet;
5 import com.google.common.collect.Multimap;
6 import com.google.common.collect.TreeMultimap;
7 import com.ibm.icu.impl.Relation;
8 import com.ibm.icu.impl.Row;
9 import com.ibm.icu.impl.Row.R2;
10 import com.ibm.icu.impl.Row.R3;
11 import com.ibm.icu.impl.Utility;
12 import com.ibm.icu.impl.number.DecimalQuantity;
13 import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
14 import com.ibm.icu.lang.UCharacter;
15 import com.ibm.icu.lang.UCharacterEnums;
16 import com.ibm.icu.lang.UScript;
17 import com.ibm.icu.text.PluralRules;
18 import com.ibm.icu.text.PluralRules.DecimalQuantitySamples;
19 import com.ibm.icu.text.PluralRules.DecimalQuantitySamplesRange;
20 import com.ibm.icu.text.PluralRules.SampleType;
21 import com.ibm.icu.text.StringTransform;
22 import com.ibm.icu.text.UnicodeSet;
23 import com.ibm.icu.util.Output;
24 import com.ibm.icu.util.TimeZone;
25 import com.ibm.icu.util.ULocale;
26 import java.util.ArrayList;
27 import java.util.Arrays;
28 import java.util.Collection;
29 import java.util.Collections;
30 import java.util.Date;
31 import java.util.EnumMap;
32 import java.util.EnumSet;
33 import java.util.HashMap;
34 import java.util.HashSet;
35 import java.util.Iterator;
36 import java.util.LinkedHashMap;
37 import java.util.LinkedHashSet;
38 import java.util.List;
39 import java.util.Locale;
40 import java.util.Map;
41 import java.util.Map.Entry;
42 import java.util.Set;
43 import java.util.TreeMap;
44 import java.util.TreeSet;
45 import java.util.logging.Logger;
46 import java.util.regex.Matcher;
47 import java.util.regex.Pattern;
48 import java.util.stream.Collectors;
49 import org.unicode.cldr.draft.ScriptMetadata;
50 import org.unicode.cldr.test.CoverageLevel2;
51 import org.unicode.cldr.tool.LikelySubtags;
52 import org.unicode.cldr.tool.PluralMinimalPairs;
53 import org.unicode.cldr.tool.PluralRulesFactory;
54 import org.unicode.cldr.util.Builder;
55 import org.unicode.cldr.util.CLDRConfig;
56 import org.unicode.cldr.util.CLDRFile;
57 import org.unicode.cldr.util.CLDRFile.WinningChoice;
58 import org.unicode.cldr.util.CLDRLocale;
59 import org.unicode.cldr.util.CLDRURLS;
60 import org.unicode.cldr.util.CldrUtility;
61 import org.unicode.cldr.util.DateConstants;
62 import org.unicode.cldr.util.GrammarInfo;
63 import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature;
64 import org.unicode.cldr.util.GrammarInfo.GrammaticalScope;
65 import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget;
66 import org.unicode.cldr.util.Iso3166Data;
67 import org.unicode.cldr.util.Iso639Data;
68 import org.unicode.cldr.util.Iso639Data.Scope;
69 import org.unicode.cldr.util.IsoCurrencyParser;
70 import org.unicode.cldr.util.LanguageTagCanonicalizer;
71 import org.unicode.cldr.util.LanguageTagParser;
72 import org.unicode.cldr.util.Level;
73 import org.unicode.cldr.util.LocaleNames;
74 import org.unicode.cldr.util.Organization;
75 import org.unicode.cldr.util.Pair;
76 import org.unicode.cldr.util.PluralRanges;
77 import org.unicode.cldr.util.PreferredAndAllowedHour;
78 import org.unicode.cldr.util.PreferredAndAllowedHour.HourStyle;
79 import org.unicode.cldr.util.StandardCodes;
80 import org.unicode.cldr.util.StandardCodes.CodeType;
81 import org.unicode.cldr.util.StandardCodes.LstrType;
82 import org.unicode.cldr.util.SupplementalDataInfo;
83 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData;
84 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type;
85 import org.unicode.cldr.util.SupplementalDataInfo.ContainmentStyle;
86 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo;
87 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyNumberInfo;
88 import org.unicode.cldr.util.SupplementalDataInfo.DateRange;
89 import org.unicode.cldr.util.SupplementalDataInfo.MetaZoneRange;
90 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus;
91 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
92 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
93 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
94 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
95 import org.unicode.cldr.util.SupplementalDataInfo.SampleList;
96 import org.unicode.cldr.util.Validity;
97 import org.unicode.cldr.util.Validity.Status;
98 
99 public class TestSupplementalInfo extends TestFmwkPlus {
100     static CLDRConfig testInfo = CLDRConfig.getInstance();
101 
102     private static final StandardCodes STANDARD_CODES = StandardCodes.make();
103 
104     private static final SupplementalDataInfo SUPPLEMENTAL = testInfo.getSupplementalDataInfo();
105 
main(String[] args)106     public static void main(String[] args) {
107         new TestSupplementalInfo().run(args);
108     }
109 
TestPluralSampleOrder()110     public void TestPluralSampleOrder() {
111         HashSet<PluralInfo> seen = new HashSet<>();
112         for (String locale : SUPPLEMENTAL.getPluralLocales()) {
113             if (locale.equals(LocaleNames.ROOT)) {
114                 continue;
115             }
116             PluralInfo pi = SUPPLEMENTAL.getPlurals(locale);
117             if (seen.contains(pi)) {
118                 continue;
119             }
120             seen.add(pi);
121             for (SampleType s : SampleType.values()) {
122                 for (Count c : pi.getCounts(s)) {
123                     DecimalQuantitySamples sSamples =
124                             pi.getPluralRules().getDecimalSamples(c.toString(), s);
125                     if (sSamples == null) {
126                         errln(locale + " no sample for " + c);
127                         continue;
128                     }
129                     if (s == SampleType.DECIMAL) {
130                         continue; // skip
131                     }
132                     DecimalQuantitySamplesRange lastSample = null;
133                     for (DecimalQuantitySamplesRange sample : sSamples.getSamples()) {
134                         if (lastSample != null) {
135                             if (compare(lastSample.start, sample.start) > 0) {
136                                 errln(
137                                         locale
138                                                 + ":"
139                                                 + c
140                                                 + ": out of order with "
141                                                 + lastSample
142                                                 + " > "
143                                                 + sample);
144                             } else if (false) {
145                                 logln(
146                                         locale
147                                                 + ":"
148                                                 + c
149                                                 + ": in order with "
150                                                 + lastSample
151                                                 + " < "
152                                                 + sample);
153                             }
154                         }
155                         lastSample = sample;
156                     }
157                 }
158             }
159         }
160     }
161 
compare(DecimalQuantity me, DecimalQuantity other)162     public static int compare(DecimalQuantity me, DecimalQuantity other) {
163         // We place exponent notation samples entirely after ones without exponent
164         if (me.getExponent() != other.getExponent()) {
165             return me.getExponent() < other.getExponent() ? -1 : 1;
166         }
167 
168         return (int) (me.toDouble() - other.toDouble());
169     }
170 
TestPluralRanges()171     public void TestPluralRanges() {
172         PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL);
173         Set<String> localesToTest = new TreeSet<>(SUPPLEMENTAL.getPluralRangesLocales());
174         for (String locale : StandardCodes.make().getLocaleCoverageLocales("google")) { // superset
175             if (locale.equals("*") || locale.contains("_")) {
176                 continue;
177             }
178             localesToTest.add(locale);
179         }
180         Set<String> modernLocales =
181                 StandardCodes.make()
182                         .getLocaleCoverageLocales(Organization.cldr, EnumSet.of(Level.MODERN));
183 
184         Output<DecimalQuantity> maxSample = new Output<>();
185         Output<DecimalQuantity> minSample = new Output<>();
186 
187         for (String locale : localesToTest) {
188             final String templateLine =
189                     "Template for "
190                             + ULocale.getDisplayName(locale, "en")
191                             + " ("
192                             + locale
193                             + ") translators to fix:";
194             PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(locale);
195             Set<Count> counts = pluralInfo.getCounts();
196 
197             final PluralMinimalPairs samplePatterns =
198                     PluralMinimalPairs.getInstance(new ULocale(locale).toString());
199 
200             // check that there are no null values
201             PluralRanges pluralRanges = SUPPLEMENTAL.getPluralRanges(locale);
202             if (pluralRanges == null) {
203                 if (!modernLocales.contains(locale)) {
204                     logln("Missing plural ranges for " + locale);
205                 } else {
206                     errOrLog(
207                             CoverageIssue.error,
208                             locale + "\tMissing plural ranges",
209                             "Cldrbug:7839",
210                             "Missing plural data for modern locales");
211                     StringBuilder failureCases = new StringBuilder(templateLine);
212                     for (Count start : counts) {
213                         for (Count end : counts) {
214                             pluralInfo.rangeExists(start, end, minSample, maxSample);
215                             final String rangeLine =
216                                     getRangeLine(
217                                             start, end, null, maxSample, minSample, samplePatterns);
218                             failureCases.append("\n" + locale + "\t" + rangeLine);
219                         }
220                     }
221                     errOrLog(CoverageIssue.warn, failureCases.toString());
222                 }
223                 continue;
224             }
225             EnumSet<Count> found = EnumSet.noneOf(Count.class);
226             for (Count count : Count.values()) {
227                 if (pluralRanges.isExplicitlySet(count) && !counts.contains(count)) {
228                     assertTrue(
229                             locale
230                                     + "\t pluralRanges categories must be valid for locale:\t"
231                                     + count
232                                     + " must be in "
233                                     + counts,
234                             !pluralRanges.isExplicitlySet(count));
235                 }
236                 for (Count end : Count.values()) {
237                     Count result = pluralRanges.getExplicit(count, end);
238                     if (result != null) {
239                         found.add(result);
240                     }
241                 }
242             }
243 
244             // check empty range results
245             if (found.isEmpty()) {
246                 errOrLog(
247                         CoverageIssue.error,
248                         "Empty range results for " + locale,
249                         "Cldrbug:7839",
250                         "Missing plural data for modern locales");
251             } else {
252                 if (samplePatterns == null) {
253                     errOrLog(
254                             CoverageIssue.error,
255                             locale + "\tMissing sample patterns",
256                             "Cldrbug:7839",
257                             "Missing plural data for modern locales");
258                 } else {
259                     for (Count result : found) {
260                         String samplePattern =
261                                 samplePatterns.get(PluralRules.PluralType.CARDINAL, result);
262                         if (samplePattern != null && !samplePattern.contains("{0}")) {
263                             errln(
264                                     "Plural Ranges cannot have results that don't use {0} in samples: "
265                                             + locale
266                                             + ", "
267                                             + result
268                                             + "\t«"
269                                             + samplePattern
270                                             + "»");
271                         }
272                     }
273                 }
274                 if (isVerbose()) {
275                     logln("Range results for " + locale + ":\t" + found);
276                 }
277             }
278 
279             // check for missing values
280             boolean failure = false;
281             StringBuilder failureCases = new StringBuilder(templateLine);
282             for (Count start : counts) {
283                 for (Count end : counts) {
284                     boolean needsValue = pluralInfo.rangeExists(start, end, minSample, maxSample);
285                     Count explicitValue = pluralRanges.getExplicit(start, end);
286                     final String rangeLine =
287                             getRangeLine(
288                                     start,
289                                     end,
290                                     explicitValue,
291                                     maxSample,
292                                     minSample,
293                                     samplePatterns);
294                     failureCases.append("\n" + locale + "\t" + rangeLine);
295                     if (needsValue && explicitValue == null) {
296                         errOrLog(
297                                 CoverageIssue.error,
298                                 locale + "\tNo explicit value for range: " + rangeLine,
299                                 "Cldrbug:7839",
300                                 "Missing plural data for modern locales");
301                         failure = true;
302                         failureCases.append("\tError — need explicit result");
303                     } else if (!needsValue && explicitValue != null) {
304                         errOrLog(
305                                 CoverageIssue.error,
306                                 locale
307                                         + "\tDoesn't need explicit value, but has one: "
308                                         + PluralRanges.showRange(start, end, explicitValue),
309                                 "Cldrbug:7839",
310                                 "Missing plural data for modern locales");
311                         failureCases.append("\tUnnecessary");
312                         failure = true;
313                     } else {
314                         failureCases.append("\tOK");
315                     }
316                 }
317             }
318             if (failure) {
319                 errOrLog(CoverageIssue.warn, failureCases.toString());
320             }
321         }
322     }
323 
getRangeLine( Count start, Count end, Count result, Output<DecimalQuantity> maxSample, Output<DecimalQuantity> minSample, PluralMinimalPairs samplePatterns)324     private String getRangeLine(
325             Count start,
326             Count end,
327             Count result,
328             Output<DecimalQuantity> maxSample,
329             Output<DecimalQuantity> minSample,
330             PluralMinimalPairs samplePatterns) {
331         final String range = minSample + "–" + maxSample;
332         String example = range;
333         if (samplePatterns != null) {
334             example = "";
335             if (result != null) {
336                 String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, result);
337                 example +=
338                         "«" + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range)) + "»";
339             } else {
340                 for (Count c : new TreeSet<>(Arrays.asList(start, end, Count.other))) {
341                     String pat = samplePatterns.get(PluralRules.PluralType.CARDINAL, c);
342                     example +=
343                             c
344                                     + ":«"
345                                     + (pat == null ? "MISSING-PATTERN" : pat.replace("{0}", range))
346                                     + "»"
347                                     + "?\tOR ";
348                 }
349                 example += " …";
350             }
351         }
352         return start
353                 + "\t"
354                 + end
355                 + "\t"
356                 + (result == null ? "?" : result.toString())
357                 + "\t"
358                 + example;
359     }
360 
getRangeLine(Count count, PluralRules pluralRules, String pattern)361     private String getRangeLine(Count count, PluralRules pluralRules, String pattern) {
362         String sample = "?";
363         DecimalQuantitySamples exampleList =
364                 pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.INTEGER);
365         if (exampleList == null) {
366             exampleList =
367                     pluralRules.getDecimalSamples(count.toString(), PluralRules.SampleType.DECIMAL);
368         }
369         DecimalQuantity sampleDecimal = PluralInfo.getNonZeroSampleIfPossible(exampleList);
370         sample = sampleDecimal.toString();
371 
372         String example =
373                 pattern == null ? "NO-SAMPLE!" : "«" + pattern.replace("{0}", sample) + "»";
374         return count + "\t" + example;
375     }
376 
TestPluralSamples()377     public void TestPluralSamples() {
378         String[][] test = {
379             {"en", "ordinal", "1", "one"},
380             {"en", "ordinal", "2", "two"},
381             {"en", "ordinal", "3", "few"},
382             {"en", "ordinal", "4", "other"},
383             {"sl", "cardinal", "2", "two"},
384         };
385         for (String[] row : test) {
386             checkPluralSamples(row);
387         }
388     }
389 
TestPluralSamples2()390     public void TestPluralSamples2() {
391         PluralRulesFactory prf = PluralRulesFactory.getInstance(SUPPLEMENTAL);
392         for (String locale : prf.getLocales()) {
393             if (locale.equals(LocaleNames.UND)) {
394                 continue;
395             }
396             if (locale.equals("pl")) {
397                 int debug = 0;
398             }
399             final PluralMinimalPairs samplePatterns = PluralMinimalPairs.getInstance(locale);
400             for (PluralRules.PluralType type : PluralRules.PluralType.values()) {
401                 PluralInfo rules =
402                         SUPPLEMENTAL.getPlurals(
403                                 SupplementalDataInfo.PluralType.fromStandardType(type),
404                                 locale.toString());
405                 if (rules.getCounts().size() == 1) {
406                     continue; // don't require rules for unary cases
407                 }
408                 Multimap<String, Count> sampleToCount = TreeMultimap.create();
409 
410                 for (Count count : rules.getCounts()) {
411                     String sample = samplePatterns.get(type, count);
412                     if (sample == null) {
413                         errOrLog(
414                                 CoverageIssue.error,
415                                 locale + "\t" + type + " \tmissing samples for " + count,
416                                 "cldrbug:7075",
417                                 "Missing ordinal minimal pairs");
418                     } else {
419                         sampleToCount.put(sample, count);
420                         PluralRules pRules = rules.getPluralRules();
421                         double unique = pRules.getUniqueKeywordValue(count.toString());
422                         if (unique == PluralRules.NO_UNIQUE_VALUE && !sample.contains("{0}")) {
423                             errln(
424                                     "Missing {0} in sample: "
425                                             + locale
426                                             + ", "
427                                             + type
428                                             + ", "
429                                             + count
430                                             + " «"
431                                             + sample
432                                             + "»");
433                         }
434                     }
435                 }
436                 for (Entry<String, Collection<Count>> entry : sampleToCount.asMap().entrySet()) {
437                     if (entry.getValue().size() > 1) {
438                         errln(
439                                 "Colliding minimal pair samples: "
440                                         + locale
441                                         + ", "
442                                         + type
443                                         + ", "
444                                         + entry.getValue()
445                                         + " «"
446                                         + entry.getKey()
447                                         + "»");
448                     }
449                 }
450             }
451         }
452     }
453 
TestCldrScriptCodes()454     public void TestCldrScriptCodes() {
455         Set<String> codes = SUPPLEMENTAL.getCLDRScriptCodes();
456 
457         Set<String> unicodeScripts = ScriptMetadata.getScripts();
458         assertRelation(
459                 "getCLDRScriptCodes contains Unicode Scripts",
460                 true,
461                 codes,
462                 CONTAINS_ALL,
463                 unicodeScripts);
464 
465         ImmutableSet<String> allSpecials =
466                 ImmutableSet.of("Zinh", "Zmth", "Zsye", "Zsym", "Zxxx", "Zyyy", "Zzzz");
467         assertRelation(
468                 "getCLDRScriptCodes contains allSpecials", true, codes, CONTAINS_ALL, allSpecials);
469 
470         ImmutableSet<String> allCompos = ImmutableSet.of("Hanb", "Hrkt", "Jamo", "Jpan", "Kore");
471         assertRelation(
472                 "getCLDRScriptCodes contains allCompos", true, codes, CONTAINS_ALL, allCompos);
473 
474         Map<Status, Set<String>> scripts = Validity.getInstance().getStatusToCodes(LstrType.script);
475         for (Entry<Status, Set<String>> e : scripts.entrySet()) {
476             switch (e.getKey()) {
477                 case regular:
478                 case special:
479                 case unknown:
480                     assertRelation(
481                             "getCLDRScriptCodes contains " + e.getKey(),
482                             true,
483                             codes,
484                             CONTAINS_ALL,
485                             e.getValue());
486                     break;
487                 default:
488                     break; // do nothin
489             }
490         }
491     }
492 
checkPluralSamples(String... row)493     public void checkPluralSamples(String... row) {
494         PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(PluralType.valueOf(row[1]), row[0]);
495         Count count =
496                 pluralInfo.getCount(DecimalQuantity_DualStorageBCD.fromExponentString(row[2]));
497         assertEquals(String.join(", ", row), Count.valueOf(row[3]), count);
498     }
499 
TestPluralLocales()500     public void TestPluralLocales() {
501         // get the unique rules
502         for (PluralType type : PluralType.values()) {
503             Relation<PluralInfo, String> pluralsToLocale =
504                     Relation.of(new HashMap<PluralInfo, Set<String>>(), TreeSet.class);
505             for (String locale : new TreeSet<>(SUPPLEMENTAL.getPluralLocales(type))) {
506                 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale);
507                 pluralsToLocale.put(pluralInfo, locale);
508             }
509 
510             String[][] equivalents = {
511                 {"mo", "ro"},
512                 {"tl", "fil"},
513                 {"he", "iw"},
514                 {"in", "id"},
515                 {"jw", "jv"},
516                 {"ji", "yi"},
517                 {"sh", "sr"},
518             };
519             for (Entry<PluralInfo, Set<String>> pluralInfoEntry : pluralsToLocale.keyValuesSet()) {
520                 PluralInfo pluralInfo2 = pluralInfoEntry.getKey();
521                 Set<String> locales = pluralInfoEntry.getValue();
522                 // check that equivalent locales are either both in or both out
523                 for (String[] row : equivalents) {
524                     assertEquals(
525                             type + " must be equivalent: " + Arrays.asList(row),
526                             locales.contains(row[0]),
527                             locales.contains(row[1]));
528                 }
529                 // check that no rules contain 'within'
530                 for (Count count : pluralInfo2.getCounts()) {
531                     String rule = pluralInfo2.getRule(count);
532                     if (rule == null) {
533                         continue;
534                     }
535                     assertFalse(
536                             "Rule '"
537                                     + rule
538                                     + "' for "
539                                     + Arrays.asList(locales)
540                                     + " doesn't contain 'within'",
541                             rule.contains("within"));
542                 }
543             }
544         }
545     }
546 
TestDigitPluralCases()547     public void TestDigitPluralCases() {
548         String[][] tests = {
549             {"en", "one", "1", "1"},
550             {"en", "one", "2", ""},
551             {"en", "one", "3", ""},
552             {"en", "one", "4", ""},
553             {"en", "other", "1", "0, 2-9, 0.0, 0.1, 0.2, …"},
554             {"en", "other", "2", "10-99, 10.0, 10.1, 10.2, …"},
555             {"en", "other", "3", "100-999, 100.0, 100.1, 100.2, …"},
556             {"en", "other", "4", "1000-9999, 1000.0, 1000.1, 1000.2, …"},
557             {"hr", "one", "1", "1, 0.1, 2.10, 1.1, …"},
558             {"hr", "one", "2", "21, 31, 41, 51, 61, 71, …, 10.1, 12.10, 11.1, …"},
559             {"hr", "one", "3", "101, 121, 131, 141, 151, 161, …, 100.1, 102.10, 101.1, …"},
560             {"hr", "one", "4", "1001, 1021, 1031, 1041, 1051, 1061, …, 1000.1, 1002.10, 1001.1, …"},
561             {"hr", "few", "1", "2-4, 0.2, 0.3, 0.4, …"},
562             {"hr", "few", "2", "22-24, 32-34, 42-44, …, 10.2, 10.3, 10.4, …"},
563             {"hr", "few", "3", "102-104, 122-124, 132-134, …, 100.2, 100.3, 100.4, …"},
564             {"hr", "few", "4", "1002-1004, 1022-1024, 1032-1034, …, 1000.2, 1000.3, 1000.4, …"},
565             {"hr", "other", "1", "0, 5-9, 0.0, 0.5, 0.6, …"},
566             {"hr", "other", "2", "10-20, 25-30, 35-40, …, 10.0, 10.5, 10.6, …"},
567             {"hr", "other", "3", "100, 105-120, 125-130, 135-140, …, 100.0, 100.5, 100.6, …"},
568             {
569                 "hr",
570                 "other",
571                 "4",
572                 "1000, 1005-1020, 1025-1030, 1035-1040, …, 1000.0, 1000.5, 1000.6, …"
573             },
574         };
575         for (String[] row : tests) {
576             PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]);
577             SampleList uset =
578                     plurals.getSamples9999(Count.valueOf(row[1]), Integer.parseInt(row[2]));
579             assertEquals(row[0] + ", " + row[1] + ", " + row[2], row[3], uset.toString());
580         }
581     }
582 
TestDigitPluralCompleteness()583     public void TestDigitPluralCompleteness() {
584         String[][] exceptionStrings = {
585             // defaults
586             {"*", "zero", "0,00,000,0000"},
587             {"*", "one", "0"},
588             {"*", "two", "0,00,000,0000"},
589             {"*", "few", "0,00,000,0000"},
590             {"*", "many", "0,00,000,0000"},
591             {"*", "other", "0,00,000,0000"},
592             // others
593             {"mo", "other", "00,000,0000"}, //
594             {"ro", "other", "00,000,0000"}, //
595             {"cs", "few", "0"}, // j in 2..4
596             {"sk", "few", "0"}, // j in 2..4
597             {"da", "one", "0"}, // j is 1 or t is not 0 and n within 0..2
598             {"is", "one", "0,00,000,0000"}, // j is 1 or f is 1
599             {"sv", "one", "0"}, // j is 1
600             {"he", "two", "0"}, // j is 2
601             {"ru", "one", "0,00,000,0000"}, // j mod 10 is 1 and j mod 100
602             // is not 11
603             {"uk", "one", "0,00,000,0000"}, // j mod 10 is 1 and j mod 100
604             // is not 11
605             {"bs", "one", "0,00,000,0000"}, // j mod 10 is 1 and j mod 100
606             // is not 11 or f mod 10 is
607             // 1 and f mod 100 is not 11
608             {"hr", "one", "0,00,000,0000"}, // j mod 10 is 1 and j mod 100
609             // is not 11 or f mod 10 is
610             // 1 and f mod 100 is not 11
611             {"sh", "one", "0,00,000,0000"}, // j mod 10 is 1 and j mod 100
612             // is not 11 or f mod 10 is
613             // 1 and f mod 100 is not 11
614             {"sr", "one", "0,00,000,0000"}, // j mod 10 is 1 and j mod 100
615             // is not 11 or f mod 10 is
616             // 1 and f mod 100 is not 11
617             {"mk", "one", "0,00,000,0000"}, // j mod 10 is 1 or f mod 10
618             // is 1
619             {"sl", "one", "0,000,0000"}, // j mod 100 is 1
620             {"sl", "two", "0,000,0000"}, // j mod 100 is 2
621             {"he", "many", "00,000,0000"}, // j not in 0..10 and j mod 10
622             // is 0
623             {"tzm", "one", "0,00"}, // n in 0..1 or n in 11..99
624             {"gd", "one", "0,00"}, // n in 1,11
625             {"gd", "two", "0,00"}, // n in 2,12
626             {"shi", "few", "0,00"}, // n in 2..10
627             {"gd", "few", "0,00"}, // n in 3..10,13..19
628             {"ga", "few", "0"}, // n in 3..6
629             {"ga", "many", "0,00"}, // n in 7..10
630             {"ar", "zero", "0"}, // n is 0
631             {"blo", "zero", "0"}, // n = 0
632             {"cy", "zero", "0"}, // n is 0
633             {"ksh", "zero", "0"}, // n is 0
634             {"lag", "zero", "0"}, // n is 0
635             {"pt", "one", "0"}, // i = 1 and v = 0 or i = 0 and t = 1
636             {"pt_PT", "one", "0"}, // n = 1 and v = 0
637             {"ar", "two", "0"}, // n is 2
638             {"cy", "two", "0"}, // n is 2
639             {"ga", "two", "0"}, // n is 2
640             {"iu", "two", "0"}, // n is 2
641             {"naq", "two", "0"}, // n is 2
642             {"se", "two", "0"}, // n is 2
643             {"sma", "two", "0"}, // n is 2
644             {"smi", "two", "0"}, // n is 2
645             {"smj", "two", "0"}, // n is 2
646             {"smn", "two", "0"}, // n is 2
647             {"sms", "two", "0"}, // n is 2
648             {"cy", "few", "0"}, // n is 3
649             {"cy", "many", "0"}, // n is 6
650             {"br", "many", ""}, // n is not 0 and n mod 1000000 is 0
651             {"gv", "one", "0,00,000,0000"}, // n mod 10 is 1
652             {"be", "one", "0,00,000,0000"}, // n mod 10 is 1 and n mod 100
653             // is not 11
654             {"lv", "one", "0,00,000,0000"}, // n mod 10 is 1 and n mod 100
655             // is not 11 or v is 2 and f
656             // mod 10 is 1 and f mod 100
657             // is not 11 or v is not 2
658             // and f mod 10 is 1
659             {"br", "one", "0,00,000,0000"}, // n mod 10 is 1 and n mod 100
660             // not in 11,71,91
661             {"lt", "one", "0,00,000,0000"}, // n mod 10 is 1 and n mod 100
662             // not in 11..19
663             {"fil", "one", "0,00,000,0000"}, // v = 0 and i = 1,2,3 or v =
664             // 0 and i % 10 != 4,6,9 or
665             // v != 0 and f % 10 !=
666             // 4,6,9
667             {"tl", "one", "0,00,000,0000"}, // v = 0 and i = 1,2,3 or v =
668             // 0 and i % 10 != 4,6,9 or
669             // v != 0 and f % 10 !=
670             // 4,6,9
671             {"dsb", "one", "0,00,000,0000"}, // v = 0 and i % 100 = 1 or f
672             // % 100 = 1
673             {"kw", "many", "00,000,0000"}, // n != 1 and n % 100 = 1,21,41,61,81
674             {"kw", "zero", "0"}, // n = 0
675             {"mt", "two", "0"},
676             {"fr", "many", ""}, // e is special
677             {"ca", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5
678             {"es", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5
679             {"it", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5
680             {"pt", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5
681             {"pt_PT", "many", ""}, // e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5
682         };
683         // parse out the exceptions
684         Map<PluralInfo, Relation<Count, Integer>> exceptions = new HashMap<>();
685         Relation<Count, Integer> fallback =
686                 Relation.of(new EnumMap<Count, Set<Integer>>(Count.class), TreeSet.class);
687         for (String[] row : exceptionStrings) {
688             Relation<Count, Integer> countToDigits;
689             if (row[0].equals("*")) {
690                 countToDigits = fallback;
691             } else {
692                 PluralInfo plurals = SUPPLEMENTAL.getPlurals(row[0]);
693                 countToDigits = exceptions.get(plurals);
694                 if (countToDigits == null) {
695                     exceptions.put(
696                             plurals,
697                             countToDigits =
698                                     Relation.of(
699                                             new EnumMap<Count, Set<Integer>>(Count.class),
700                                             TreeSet.class));
701                 }
702             }
703             Count c = Count.valueOf(row[1]);
704             for (String digit : row[2].split(",")) {
705                 // "99" is special, just to have the result be non-empty
706                 countToDigits.put(c, digit.length());
707             }
708         }
709         Set<PluralInfo> seen = new HashSet<>();
710         Set<String> sorted = new TreeSet<>(SUPPLEMENTAL.getPluralLocales(PluralType.cardinal));
711         Relation<String, String> ruleToExceptions =
712                 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
713 
714         for (String locale : sorted) {
715             PluralInfo plurals = SUPPLEMENTAL.getPlurals(locale);
716             if (seen.contains(plurals)) { // skip identicals
717                 continue;
718             }
719             Relation<Count, Integer> countToDigits = exceptions.get(plurals);
720             if (countToDigits == null) {
721                 countToDigits = fallback;
722             }
723             for (Count c : plurals.getCounts()) {
724                 List<String> compose = new ArrayList<>();
725                 boolean needLine = false;
726                 Set<Integer> digitSet = countToDigits.get(c);
727                 if (digitSet == null) {
728                     digitSet = fallback.get(c);
729                 }
730                 for (int digits = 1; digits < 5; ++digits) {
731                     boolean expected = digitSet.contains(digits);
732                     boolean hasSamples = plurals.hasSamples(c, digits);
733                     if (hasSamples) {
734                         compose.add(Utility.repeat("0", digits));
735                     }
736                     if (!assertEquals(locale + ", " + digits + ", " + c, expected, hasSamples)) {
737                         needLine = true;
738                     }
739                 }
740                 if (needLine) {
741                     String countRules = plurals.getPluralRules().getRules(c.toString());
742                     ruleToExceptions.put(
743                             countRules == null ? "" : countRules,
744                             "{\""
745                                     + locale
746                                     + "\", \""
747                                     + c
748                                     + "\", \""
749                                     + Joiner.on(",").join(compose)
750                                     + "\"},");
751                 }
752             }
753         }
754         if (!ruleToExceptions.isEmpty()) {
755             System.out.println(
756                     "To fix the above, review the following, then replace in TestDigitPluralCompleteness");
757             for (Entry<String, String> entry : ruleToExceptions.entrySet()) {
758                 System.out.println(entry.getValue() + "\t// " + entry.getKey());
759             }
760         }
761     }
762 
TestLikelyCode()763     public void TestLikelyCode() {
764         Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags();
765         String[][] tests = {
766             {"it_AQ", "it_Latn_AQ"}, {"it_Arab", "it_Arab_IT"}, {"az_Cyrl", "az_Cyrl_AZ"},
767         };
768         for (String[] pair : tests) {
769             String newMax = LikelySubtags.maximize(pair[0], likely);
770             assertEquals("Likely", pair[1], newMax);
771         }
772     }
773 
TestLikelySubtagCompleteness()774     public void TestLikelySubtagCompleteness() {
775         Map<String, String> likely = SUPPLEMENTAL.getLikelySubtags();
776 
777         for (String language : SUPPLEMENTAL.getCLDRLanguageCodes()) {
778             if (!likely.containsKey(language)) {
779                 logln("WARNING: No likely subtag for CLDR language code (" + language + ")");
780             }
781         }
782         for (String script : SUPPLEMENTAL.getCLDRScriptCodes()) {
783             if (!likely.containsKey("und_" + script)
784                     && !script.equals("Latn")
785                     && !script.equals("Zinh")
786                     && !script.equals("Zyyy")
787                     && ScriptMetadata.getInfo(script) != null
788                     && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.EXCLUSION
789                     && ScriptMetadata.getInfo(script).idUsage != ScriptMetadata.IdUsage.UNKNOWN) {
790                 errln("No likely subtag for CLDR script code (und_" + script + ")");
791             }
792         }
793     }
794 
TestEquivalentLocales()795     public void TestEquivalentLocales() {
796         Set<Set<String>> seen = new HashSet<>();
797         Set<String> toTest = new TreeSet<>(testInfo.getCldrFactory().getAvailable());
798         toTest.addAll(SUPPLEMENTAL.getLikelySubtags().keySet());
799         toTest.addAll(SUPPLEMENTAL.getLikelySubtags().values());
800         toTest.addAll(SUPPLEMENTAL.getDefaultContentLocales());
801         LanguageTagParser ltp = new LanguageTagParser();
802         main:
803         for (String locale : toTest) {
804             if (locale.startsWith(LocaleNames.UND) || locale.equals(LocaleNames.ROOT)) {
805                 continue;
806             }
807             Set<String> s = SUPPLEMENTAL.getEquivalentsForLocale(locale);
808             if (seen.contains(s)) {
809                 continue;
810             }
811 
812             List<String> ss = new ArrayList<>(s);
813             String last = ss.get(ss.size() - 1);
814             ltp.set(last);
815             if (!ltp.getVariants().isEmpty() || !ltp.getExtensions().isEmpty()) {
816                 continue; // skip variants for now.
817             }
818             String language = ltp.getLanguage();
819             String script = ltp.getScript();
820             String region = ltp.getRegion();
821             if (!script.isEmpty() && !region.isEmpty()) {
822                 String noScript = ltp.setScript("").toString();
823                 String noRegion = ltp.setScript(script).setRegion("").toString();
824                 switch (s.size()) {
825                     case 1: // ok if already maximized and strange script/country,
826                         // eg it_Arab_JA
827                         continue main;
828                     case 2: // ok if adds default country/script, eg {en_Cyrl,
829                         // en_Cyrl_US} or {en_GB, en_Latn_GB}
830                         String first = ss.get(0);
831                         if (first.equals(noScript) || first.equals(noRegion)) {
832                             continue main;
833                         }
834                         break;
835                     case 3: // ok if different script in different country, eg
836                         // {az_IR, az_Arab, az_Arab_IR}
837                         if (noScript.equals(ss.get(0)) && noRegion.equals(ss.get(1))) {
838                             continue main;
839                         }
840                         break;
841                     case 4: // ok if all combinations, eg {en, en_US, en_Latn,
842                         // en_Latn_US}
843                         if (language.equals(ss.get(0))
844                                 && noScript.equals(ss.get(1))
845                                 && noRegion.equals(ss.get(2))) {
846                             continue main;
847                         }
848                         break;
849                 }
850             }
851             errln("Strange size or composition:\t" + s + " \t" + showLocaleParts(s));
852             seen.add(s);
853         }
854     }
855 
showLocaleParts(Set<String> s)856     private String showLocaleParts(Set<String> s) {
857         LanguageTagParser ltp = new LanguageTagParser();
858         Set<String> b = new LinkedHashSet<>();
859         for (String ss : s) {
860             ltp.set(ss);
861             addName(CLDRFile.LANGUAGE_NAME, ltp.getLanguage(), b);
862             addName(CLDRFile.SCRIPT_NAME, ltp.getScript(), b);
863             addName(CLDRFile.TERRITORY_NAME, ltp.getRegion(), b);
864         }
865         return Joiner.on("; ").join(b);
866     }
867 
addName(int languageName, String code, Set<String> b)868     private void addName(int languageName, String code, Set<String> b) {
869         if (code.isEmpty()) {
870             return;
871         }
872         String name = testInfo.getEnglish().getName(languageName, code);
873         if (!code.equals(name)) {
874             b.add(code + "=" + name);
875         }
876     }
877 
TestDefaultScriptCompleteness()878     public void TestDefaultScriptCompleteness() {
879         Relation<String, String> scriptToBase =
880                 Relation.of(new LinkedHashMap<String, Set<String>>(), TreeSet.class);
881         main:
882         for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) {
883             if (!locale.contains("_") && !LocaleNames.ROOT.equals(locale)) {
884                 String defaultScript = SUPPLEMENTAL.getDefaultScript(locale);
885                 if (defaultScript != null) {
886                     continue;
887                 }
888                 CLDRFile cldrFile = testInfo.getCLDRFile(locale, false);
889                 UnicodeSet set = cldrFile.getExemplarSet("", WinningChoice.NORMAL);
890                 for (String s : set) {
891                     int script = UScript.getScript(s.codePointAt(0));
892                     if (script != UScript.UNKNOWN
893                             && script != UScript.COMMON
894                             && script != UScript.INHERITED) {
895                         scriptToBase.put(UScript.getShortName(script), locale);
896                         continue main;
897                     }
898                 }
899                 scriptToBase.put(UScript.getShortName(UScript.UNKNOWN), locale);
900             }
901         }
902         if (scriptToBase.size() != 0) {
903             for (Entry<String, Set<String>> entry : scriptToBase.keyValuesSet()) {
904                 errln("Default Scripts missing:\t" + entry.getKey() + "\t" + entry.getValue());
905             }
906         }
907     }
908 
TestTimeData()909     public void TestTimeData() {
910         Map<String, PreferredAndAllowedHour> timeData = SUPPLEMENTAL.getTimeData();
911         Set<String> regionsSoFar = new HashSet<>();
912         Set<String> current24only = new HashSet<>();
913         Set<String> current12preferred = new HashSet<>();
914 
915         boolean haveWorld = false;
916 
917         ImmutableSet<HourStyle> oldSchool =
918                 ImmutableSet.copyOf(EnumSet.of(HourStyle.H, HourStyle.h, HourStyle.K, HourStyle.k));
919 
920         for (Entry<String, PreferredAndAllowedHour> e : timeData.entrySet()) {
921             String region = e.getKey();
922             if (region.equals("001")) {
923                 haveWorld = true;
924             }
925             regionsSoFar.add(region);
926             PreferredAndAllowedHour preferredAndAllowedHour = e.getValue();
927             assertNotNull("Preferred must not be null", preferredAndAllowedHour.preferred);
928 
929             // find first h or H
930             HourStyle found = null;
931 
932             for (HourStyle item : preferredAndAllowedHour.allowed) {
933                 if (oldSchool.contains(item)) {
934                     found = item;
935                     if (item != preferredAndAllowedHour.preferred) {
936                         String message =
937                                 "Inconsistent values for "
938                                         + region
939                                         + ": preferred="
940                                         + preferredAndAllowedHour.preferred
941                                         + " but that isn't the first "
942                                         + oldSchool
943                                         + " in allowed: "
944                                         + preferredAndAllowedHour.allowed;
945                         errln(message);
946                     }
947                     break;
948                 }
949             }
950             if (found == null) {
951                 errln(
952                         region
953                                 + ": preferred "
954                                 + preferredAndAllowedHour.preferred
955                                 + " not in "
956                                 + preferredAndAllowedHour.allowed);
957             }
958             //            final HourStyle firstAllowed =
959             // preferredAndAllowedHour.allowed.iterator().next();
960             //            if (preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed ==
961             // HourStyle.h
962             //                || preferredAndAllowedHour.preferred == HourStyle.H && firstAllowed ==
963             // HourStyle.hb
964             //                || preferredAndAllowedHour.preferred == HourStyle.h && firstAllowed ==
965             // HourStyle.H) {
966             //                errln(region + ": allowed " + preferredAndAllowedHour.allowed
967             //                    + " starts with preferred " + preferredAndAllowedHour.preferred);
968             //            } else if (isVerbose()) {
969             //                logln(region + ": allowed " + preferredAndAllowedHour.allowed
970             //                    + " starts with preferred " + preferredAndAllowedHour.preferred);
971             //            }
972             // for (HourStyle c : preferredAndAllowedHour.allowed) {
973             // if (!PreferredAndAllowedHour.HOURS.contains(c)) {
974             // errln(region + ": illegal character in " +
975             // preferredAndAllowedHour.allowed + ". It contains " + c
976             // + " which is not in " + PreferredAndAllowedHour.HOURS);
977             // }
978             // }
979             if (!preferredAndAllowedHour.allowed.contains(HourStyle.h)
980                     && !preferredAndAllowedHour.allowed.contains(HourStyle.hb)) {
981                 current24only.add(region);
982             }
983             if (preferredAndAllowedHour.preferred == HourStyle.h) {
984                 current12preferred.add(region);
985             }
986         }
987         Set<String> missing =
988                 new TreeSet<>(STANDARD_CODES.getGoodAvailableCodes(CodeType.territory));
989         missing.removeAll(regionsSoFar);
990         for (Iterator<String> it = missing.iterator(); it.hasNext(); ) {
991             if (!StandardCodes.isCountry(it.next())) {
992                 it.remove();
993             }
994         }
995 
996         // if we don't have 001, then we can't miss any regions
997         if (!missing.isEmpty()) {
998             if (haveWorld) {
999                 logln("Implicit regions: " + missing);
1000             } else {
1001                 errln("Missing regions: " + missing);
1002             }
1003         }
1004 
1005         // The feedback gathered from our translators is that the following use
1006         // 24 hour time ONLY:
1007         Set<String> only24lang =
1008                 new TreeSet<>(
1009                         Arrays.asList(
1010                                 ("sq, br, bu, ca, hr, cs, da, de, nl, et, eu, fi, "
1011                                                 + "fr, gl, he, is, id, it, no, pt, ro, ru, sr, sk, sl, sv, tr, hy")
1012                                         .split(",\\s*")));
1013         // With the new preferences, this is changed
1014         Set<String> only24region = new TreeSet<>();
1015         Set<String> either24or12region = new TreeSet<>();
1016 
1017         // get all countries where official or de-facto official
1018         // add them two one of two lists, based on the above list of languages
1019         for (String language : SUPPLEMENTAL.getLanguagesForTerritoriesPopulationData()) {
1020             boolean a24lang = only24lang.contains(language);
1021             for (String region : SUPPLEMENTAL.getTerritoriesForPopulationData(language)) {
1022                 PopulationData pop =
1023                         SUPPLEMENTAL.getLanguageAndTerritoryPopulationData(language, region);
1024                 if (pop.getOfficialStatus().compareTo(OfficialStatus.de_facto_official) < 0) {
1025                     continue;
1026                 }
1027                 if (a24lang) {
1028                     only24region.add(region);
1029                 } else {
1030                     either24or12region.add(region);
1031                 }
1032             }
1033         }
1034         // if we have a case like CA, where en uses 12/24 but fr uses 24, remove
1035         // it for safety
1036         only24region.removeAll(either24or12region);
1037         // There are always exceptions... Remove SM (San Marino) and VA (Vatican),
1038         // since they allows 12/24 but the de facto langauge is Italian.
1039         only24region.remove("SM");
1040         only24region.remove("VA");
1041         // also remove all the regions where 'h' is preferred
1042         only24region.removeAll(current12preferred);
1043         // now verify
1044         if (!current24only.containsAll(only24region)) {
1045             Set<String> missing24only = new TreeSet<>(only24region);
1046             missing24only.removeAll(current24only);
1047 
1048             errln(
1049                     "24-hour-only doesn't include needed items:\n"
1050                             + " add "
1051                             + CldrUtility.join(missing24only, " ")
1052                             + "\n\t\t"
1053                             + CldrUtility.join(
1054                                     missing24only,
1055                                     "\n\t\t",
1056                                     new NameCodeTransform(
1057                                             testInfo.getEnglish(), CLDRFile.TERRITORY_NAME)));
1058         }
1059     }
1060 
1061     public static class NameCodeTransform implements StringTransform {
1062         private final CLDRFile file;
1063         private final int codeType;
1064 
NameCodeTransform(CLDRFile file, int code)1065         public NameCodeTransform(CLDRFile file, int code) {
1066             this.file = file;
1067             this.codeType = code;
1068         }
1069 
1070         @Override
transform(String code)1071         public String transform(String code) {
1072             return file.getName(codeType, code) + " [" + code + "]";
1073         }
1074     }
1075 
TestAliases()1076     public void TestAliases() {
1077         StandardCodes.make();
1078         Map<String, Map<String, Map<String, String>>> bcp47Data = StandardCodes.getLStreg();
1079         Map<String, Map<String, R2<List<String>, String>>> aliases =
1080                 SUPPLEMENTAL.getLocaleAliasInfo();
1081 
1082         for (Entry<String, Map<String, R2<List<String>, String>>> typeMap : aliases.entrySet()) {
1083             String type = typeMap.getKey();
1084             Map<String, R2<List<String>, String>> codeReplacement = typeMap.getValue();
1085 
1086             Map<String, Map<String, String>> bcp47DataTypeData =
1087                     bcp47Data.get(type.equals("territory") ? "region" : type);
1088             if (bcp47DataTypeData == null) {
1089                 logln("skipping BCP47 test for " + type);
1090             } else {
1091                 for (Entry<String, Map<String, String>> codeData : bcp47DataTypeData.entrySet()) {
1092                     String code = codeData.getKey();
1093                     if (codeReplacement.containsKey(code)
1094                             || codeReplacement.containsKey(code.toUpperCase(Locale.ENGLISH))) {
1095                         continue;
1096                         // TODO, check the value
1097                     }
1098                     Map<String, String> data = codeData.getValue();
1099                     if (data.containsKey("Deprecated")
1100                             && SUPPLEMENTAL.getCLDRLanguageCodes().contains(code)) {
1101                         errln(
1102                                 "supplementalMetadata.xml: alias is missing <languageAlias type=\""
1103                                         + code
1104                                         + "\" ... /> "
1105                                         + "\t"
1106                                         + data);
1107                     }
1108                 }
1109             }
1110 
1111             Set<R3<String, List<String>, List<String>>> failures = new LinkedHashSet<>();
1112             Set<String> nullReplacements = new TreeSet<>();
1113             for (Entry<String, R2<List<String>, String>> codeRep : codeReplacement.entrySet()) {
1114                 String code = codeRep.getKey();
1115                 List<String> replacements = codeRep.getValue().get0();
1116                 if (replacements == null) {
1117                     nullReplacements.add(code);
1118                     continue;
1119                 }
1120                 Set<String> fixedReplacements = new LinkedHashSet<>();
1121                 for (String replacement : replacements) {
1122                     R2<List<String>, String> newReplacement = codeReplacement.get(replacement);
1123                     if (newReplacement != null) {
1124                         List<String> list = newReplacement.get0();
1125                         if (list != null) {
1126                             fixedReplacements.addAll(list);
1127                         }
1128                     } else {
1129                         fixedReplacements.add(replacement);
1130                     }
1131                 }
1132                 List<String> fixedList = new ArrayList<>(fixedReplacements);
1133                 if (!replacements.equals(fixedList)) {
1134                     R3<String, List<String>, List<String>> row =
1135                             Row.of(code, replacements, fixedList);
1136                     System.out.println(row.toString());
1137                     failures.add(row);
1138                 }
1139             }
1140 
1141             if (failures.size() != 0) {
1142                 for (R3<String, List<String>, List<String>> item : failures) {
1143                     String code = item.get0();
1144                     List<String> oldReplacement = item.get1();
1145                     List<String> newReplacement = item.get2();
1146 
1147                     errln(
1148                             code
1149                                     + "\t=>\t"
1150                                     + oldReplacement
1151                                     + "\tshould be:\n\t"
1152                                     + "<"
1153                                     + type
1154                                     + "Alias type=\""
1155                                     + code
1156                                     + "\" replacement=\""
1157                                     + Joiner.on(" ").join(newReplacement)
1158                                     + "\" reason=\"XXX\"/> <!-- YYY -->\n");
1159                 }
1160             }
1161             if (nullReplacements.size() != 0) {
1162                 logln("No Replacements\t" + type + "\t" + nullReplacements);
1163             }
1164         }
1165     }
1166 
1167     static final List<String> oldRegions =
1168             Arrays.asList("NT, YD, QU, SU, DD, FX, ZR, AN, BU, TP, CS, YU".split(", "));
1169 
TestTerritoryContainment()1170     public void TestTerritoryContainment() {
1171         Relation<String, String> map = SUPPLEMENTAL.getTerritoryToContained(ContainmentStyle.all);
1172         Relation<String, String> mapCore = SUPPLEMENTAL.getContainmentCore();
1173         Set<String> mapItems = new LinkedHashSet<>();
1174         // get all the items
1175         for (String item : map.keySet()) {
1176             mapItems.add(item);
1177             mapItems.addAll(map.getAll(item));
1178         }
1179         Map<String, Map<String, String>> bcp47RegionData = StandardCodes.getLStreg().get("region");
1180 
1181         // verify that all regions are covered
1182         Set<String> bcp47Regions = new LinkedHashSet<>(bcp47RegionData.keySet());
1183         bcp47Regions.remove("ZZ"); // We don't care about ZZ since it is the
1184         // unknown region...
1185         for (Iterator<String> it = bcp47Regions.iterator(); it.hasNext(); ) {
1186             String region = it.next();
1187             Map<String, String> data = bcp47RegionData.get(region);
1188             if (data.containsKey("Deprecated")) {
1189                 logln("Removing deprecated " + region);
1190                 it.remove();
1191             }
1192             if ("Private use".equals(data.get("Description"))) {
1193                 it.remove();
1194             }
1195         }
1196 
1197         if (!mapItems.equals(bcp47Regions)) {
1198             mapItems.removeAll(oldRegions);
1199             errlnDiff("containment items not in bcp47 regions: ", mapItems, bcp47Regions);
1200             errlnDiff("bcp47 regions not in containment items: ", bcp47Regions, mapItems);
1201         }
1202 
1203         // verify that everything in the containment core can be reached
1204         // downwards from 001.
1205 
1206         Map<String, Integer> from001 =
1207                 getRecursiveContainment("001", map, new LinkedHashMap<String, Integer>(), 1);
1208         from001.put("001", 0);
1209         Set<String> keySet = from001.keySet();
1210         for (String region : keySet) {
1211             logln(
1212                     Utility.repeat("\t", from001.get(region))
1213                             + "\t"
1214                             + region
1215                             + "\t"
1216                             + getRegionName(region));
1217         }
1218 
1219         // Populate mapItems with the core containment
1220         mapItems.clear();
1221         for (String item : mapCore.keySet()) {
1222             mapItems.add(item);
1223             mapItems.addAll(mapCore.getAll(item));
1224         }
1225 
1226         if (!mapItems.equals(keySet)) {
1227             errlnDiff("containment core items that can't be reached from 001: ", mapItems, keySet);
1228         }
1229     }
1230 
errlnDiff(String title, Set<String> mapItems, Set<String> keySet)1231     private void errlnDiff(String title, Set<String> mapItems, Set<String> keySet) {
1232         Set<String> diff = new LinkedHashSet<>(mapItems);
1233         diff.removeAll(keySet);
1234         if (diff.size() != 0) {
1235             errln(title + diff);
1236         }
1237     }
1238 
getRegionName(String region)1239     private String getRegionName(String region) {
1240         return testInfo.getEnglish().getName(CLDRFile.TERRITORY_NAME, region);
1241     }
1242 
getRecursiveContainment( String region, Relation<String, String> map, Map<String, Integer> result, int depth)1243     private Map<String, Integer> getRecursiveContainment(
1244             String region, Relation<String, String> map, Map<String, Integer> result, int depth) {
1245         Set<String> contained = map.getAll(region);
1246         if (contained == null) {
1247             return result;
1248         }
1249         for (String item : contained) {
1250             if (result.containsKey(item)) {
1251                 logln("Duplicate containment " + item + "\t" + getRegionName(item));
1252                 continue;
1253             }
1254             result.put(item, depth);
1255             getRecursiveContainment(item, map, result, depth + 1);
1256         }
1257         return result;
1258     }
1259 
TestMacrolanguages()1260     public void TestMacrolanguages() {
1261         Set<String> languageCodes = STANDARD_CODES.getAvailableCodes("language");
1262         Map<String, Map<String, R2<List<String>, String>>> typeToTagToReplacement =
1263                 SUPPLEMENTAL.getLocaleAliasInfo();
1264         Map<String, R2<List<String>, String>> tagToReplacement =
1265                 typeToTagToReplacement.get("language");
1266 
1267         Relation<String, String> replacementToReplaced =
1268                 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
1269         for (String language : tagToReplacement.keySet()) {
1270             List<String> replacements = tagToReplacement.get(language).get0();
1271             if (replacements != null) {
1272                 replacementToReplaced.putAll(replacements, language);
1273             }
1274         }
1275         replacementToReplaced.freeze();
1276 
1277         Map<String, Map<String, Map<String, String>>> lstreg = StandardCodes.getLStreg();
1278         Map<String, Map<String, String>> lstregLanguageInfo = lstreg.get("language");
1279 
1280         Relation<Scope, String> scopeToCodes =
1281                 Relation.of(new TreeMap<Scope, Set<String>>(), TreeSet.class);
1282         // the invariant is that every macrolanguage has exactly 1 encompassed
1283         // language that maps to it
1284 
1285         main:
1286         for (String language :
1287                 Builder.with(new TreeSet<String>())
1288                         .addAll(languageCodes)
1289                         .addAll(Iso639Data.getAvailable())
1290                         .get()) {
1291             if (language.equals("no") || language.equals("sa") || language.equals("sh")) {
1292                 continue; // special cases
1293             }
1294             Scope languageScope = getScope(language, lstregLanguageInfo);
1295             if (languageScope == Scope.Macrolanguage) {
1296                 if (Iso639Data.getHeirarchy(language) != null) {
1297                     continue main; // is real family
1298                 }
1299                 Set<String> replacements = replacementToReplaced.getAll(language);
1300                 if (replacements == null || replacements.size() == 0) {
1301                     scopeToCodes.put(languageScope, language);
1302                 } else {
1303                     // it still might be bad, if we don't have a mapping to a
1304                     // regular language
1305                     for (String replacement : replacements) {
1306                         Scope replacementScope = getScope(replacement, lstregLanguageInfo);
1307                         if (replacementScope == Scope.Individual) {
1308                             continue main;
1309                         }
1310                     }
1311                     scopeToCodes.put(languageScope, language);
1312                 }
1313             }
1314         }
1315         // now show the items we found
1316         for (Scope scope : scopeToCodes.keySet()) {
1317             for (String language : scopeToCodes.getAll(scope)) {
1318                 String name = testInfo.getEnglish().getName(language);
1319                 if (name == null || name.equals(language)) {
1320                     Set<String> set = Iso639Data.getNames(language);
1321                     if (set != null) {
1322                         name = set.iterator().next();
1323                     } else {
1324                         Map<String, String> languageInfo = lstregLanguageInfo.get(language);
1325                         if (languageInfo != null) {
1326                             name = languageInfo.get("Description");
1327                         }
1328                     }
1329                 }
1330                 errln(scope + "\t" + language + "\t" + name + "\t" + Iso639Data.getType(language));
1331             }
1332         }
1333     }
1334 
getScope(String language, Map<String, Map<String, String>> lstregLanguageInfo)1335     private Scope getScope(String language, Map<String, Map<String, String>> lstregLanguageInfo) {
1336         Scope languageScope = Iso639Data.getScope(language);
1337         Map<String, String> languageInfo = lstregLanguageInfo.get(language);
1338         if (languageInfo == null) {
1339             // System.out.println("Couldn't get lstreg info for " + language);
1340         } else {
1341             String lstregScope = languageInfo.get("Scope");
1342             if (lstregScope != null) {
1343                 Scope scope2 = Scope.fromString(lstregScope);
1344                 if (languageScope != scope2) {
1345                     // System.out.println("Mismatch in scope between LSTR and ISO 639:\t"
1346                     // + scope2 + "\t" +
1347                     // languageScope);
1348                     languageScope = scope2;
1349                 }
1350             }
1351         }
1352         return languageScope;
1353     }
1354 
1355     static final boolean LOCALES_FIXED = true;
1356 
TestPopulation()1357     public void TestPopulation() {
1358         Set<String> languages = SUPPLEMENTAL.getLanguagesForTerritoriesPopulationData();
1359         Relation<String, String> baseToLanguages =
1360                 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
1361         LanguageTagParser ltp = new LanguageTagParser();
1362         LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(false);
1363 
1364         for (String language : languages) {
1365             if (LOCALES_FIXED) {
1366                 String canonicalForm = ltc.transform(language);
1367                 if (!assertEquals("Canonical form", canonicalForm, language)) {
1368                     int debug = 0;
1369                 }
1370             }
1371 
1372             String base = ltp.set(language).getLanguage();
1373             String script = ltp.getScript();
1374             baseToLanguages.put(base, language);
1375 
1376             // add basic data, basically just for wo!
1377             // if there are primary scripts, they must include script (if not
1378             // empty)
1379             Set<String> primaryScripts = Collections.emptySet();
1380             Set<String> secondaryScripts = Collections.emptySet();
1381             Map<Type, BasicLanguageData> basicData = SUPPLEMENTAL.getBasicLanguageDataMap(base);
1382             if (basicData != null) {
1383                 BasicLanguageData s = basicData.get(BasicLanguageData.Type.primary);
1384                 if (s != null) {
1385                     primaryScripts = s.getScripts();
1386                 }
1387                 s = basicData.get(BasicLanguageData.Type.secondary);
1388                 if (s != null) {
1389                     secondaryScripts = s.getScripts();
1390                 }
1391             }
1392 
1393             // do some consistency tests; if there is a script, it must be in
1394             // primaryScripts or secondaryScripts
1395             if (!script.isEmpty()
1396                     && !primaryScripts.contains(script)
1397                     && !secondaryScripts.contains(script)) {
1398                 errln(
1399                         base
1400                                 + ": Script found in territory data ("
1401                                 + script
1402                                 + ") is not in primary scripts :\t"
1403                                 + primaryScripts
1404                                 + " and not in secondary scripts :\t"
1405                                 + secondaryScripts);
1406             }
1407 
1408             // if there are multiple primary scripts, they will be in
1409             // baseToLanguages
1410             if (primaryScripts.size() > 1) {
1411                 for (String script2 : primaryScripts) {
1412                     baseToLanguages.put(base, base + "_" + script2);
1413                 }
1414             }
1415         }
1416 
1417         if (!LOCALES_FIXED) {
1418             // the invariants are that if we have a base, we must not have a script.
1419             // and if we don't have a base, we must have two items
1420             for (String base : baseToLanguages.keySet()) {
1421                 Set<String> languagesForBase = baseToLanguages.getAll(base);
1422                 if (languagesForBase.contains(base)) {
1423                     if (languagesForBase.size() > 1) {
1424                         errln("Cannot have base alone with other scripts:\t" + languagesForBase);
1425                     }
1426                 } else {
1427                     if (languagesForBase.size() == 1) {
1428                         errln("Cannot have only one script for language:\t" + languagesForBase);
1429                     }
1430                 }
1431             }
1432         }
1433     }
1434 
TestCompleteness()1435     public void TestCompleteness() {
1436         if (SUPPLEMENTAL.getSkippedElements().size() > 0) {
1437             logln(
1438                     "SupplementalDataInfo API doesn't support: "
1439                             + SUPPLEMENTAL.getSkippedElements().toString());
1440         }
1441     }
1442 
1443     // these are settings for exceptional cases we want to allow
1444     private static final Set<String> EXCEPTION_CURRENCIES_WITH_NEW =
1445             new TreeSet<>(Arrays.asList("ILS", "NZD", "PGK", "TWD"));
1446 
1447     // ok since there is no problem with confusion
1448     private static final Set<String> OK_TO_NOT_HAVE_OLD =
1449             new TreeSet<>(
1450                     Arrays.asList(
1451                             "ADP", "ATS", "BEF", "CYP", "DEM", "ESP", "FIM", "FRF", "GRD", "IEP",
1452                             "ITL", "LUF", "MTL", "MTP", "NLG", "PTE", "YUM", "ARA", "BAD", "BGL",
1453                             "BOP", "BRC", "BRN", "BRR", "BUK", "CSK", "ECS", "GEK", "GNS", "GQE",
1454                             "HRD", "ILP", "LTT", "LVR", "MGF", "MLF", "MZE", "NIC", "PEI", "PES",
1455                             "SIT", "SRG", "SUR", "TJR", "TPE", "UAK", "YUD", "YUN", "ZRZ", "GWE"));
1456 
1457     private static final Date LIMIT_FOR_NEW_CURRENCY = new Date(new Date().getYear() - 5, 1, 1);
1458     private Matcher oldMatcher =
1459             Pattern.compile("\\bold\\b|\\([0-9]{4}-[0-9]{4}\\)", Pattern.CASE_INSENSITIVE)
1460                     .matcher("");
1461     private Matcher newMatcher = Pattern.compile("\\bnew\\b", Pattern.CASE_INSENSITIVE).matcher("");
1462 
1463     /**
1464      * Test that access to currency info in supplemental data is ok. At this point just a simple
1465      * test.
1466      *
1467      * @param args
1468      */
TestSupplementalCurrency()1469     public void TestSupplementalCurrency() {
1470         IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance();
1471         Set<String> currencyCodes = STANDARD_CODES.getGoodAvailableCodes("currency");
1472         Set<String> oncomingCurrencyCodes = STANDARD_CODES.getOncomingCurrencies();
1473         Relation<String, Pair<String, CurrencyDateInfo>> nonModernCurrencyCodes =
1474                 Relation.of(
1475                         new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(), TreeSet.class);
1476         Relation<String, Pair<String, CurrencyDateInfo>> modernCurrencyCodes =
1477                 Relation.of(
1478                         new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(), TreeSet.class);
1479         Relation<String, Pair<String, CurrencyDateInfo>> recentModernCurrencyCodes =
1480                 Relation.of(
1481                         new TreeMap<String, Set<Pair<String, CurrencyDateInfo>>>(), TreeSet.class);
1482         Set<String> territoriesWithoutModernCurrencies =
1483                 new TreeSet<>(STANDARD_CODES.getGoodAvailableCodes("territory"));
1484         Map<String, Date> currencyFirstValid = new TreeMap<>();
1485         Map<String, Date> currencyLastValid = new TreeMap<>();
1486         territoriesWithoutModernCurrencies.remove("ZZ");
1487         territoriesWithoutModernCurrencies.removeAll(Iso3166Data.getRegionCodesNotForTranslation());
1488 
1489         for (String territory : STANDARD_CODES.getGoodAvailableCodes("territory")) {
1490             /* "EU" behaves like a country for purposes of this test */
1491             if ((SUPPLEMENTAL.getContained(territory) != null) && !territory.equals("EU")) {
1492                 territoriesWithoutModernCurrencies.remove(territory);
1493                 continue;
1494             }
1495             Set<CurrencyDateInfo> currencyInfo = SUPPLEMENTAL.getCurrencyDateInfo(territory);
1496             if (currencyInfo == null) {
1497                 continue; // error, but will pick up below.
1498             }
1499             for (CurrencyDateInfo dateInfo : currencyInfo) {
1500                 final String currency = dateInfo.getCurrency();
1501                 final Date start = dateInfo.getStart();
1502                 final Date end = dateInfo.getEnd();
1503                 if (dateInfo.getErrors().length() != 0) {
1504                     logln(
1505                             "parsing "
1506                                     + territory
1507                                     + "\t"
1508                                     + dateInfo.toString()
1509                                     + "\t"
1510                                     + dateInfo.getErrors());
1511                 }
1512                 Date firstValue = currencyFirstValid.get(currency);
1513                 if (firstValue == null || firstValue.compareTo(start) < 0) {
1514                     currencyFirstValid.put(currency, start);
1515                 }
1516                 Date lastValue = currencyLastValid.get(currency);
1517                 if (lastValue == null || lastValue.compareTo(end) > 0) {
1518                     currencyLastValid.put(currency, end);
1519                 }
1520                 if (start.compareTo(DateConstants.NOW) < 0
1521                         && end.compareTo(DateConstants.NOW) >= 0) { // Non-tender is OK...
1522                     modernCurrencyCodes.put(currency, new Pair<>(territory, dateInfo));
1523                     territoriesWithoutModernCurrencies.remove(territory);
1524                 } else {
1525                     nonModernCurrencyCodes.put(currency, new Pair<>(territory, dateInfo));
1526                     if (start.compareTo(DateConstants.NOW) < 0
1527                             && end.compareTo(DateConstants.RECENT_HISTORY) >= 0) {
1528                         // It was CLDR tender recently.
1529                         recentModernCurrencyCodes.put(currency, new Pair<>(territory, dateInfo));
1530                     }
1531                 }
1532                 logln(
1533                         territory
1534                                 + "\t"
1535                                 + dateInfo.toString()
1536                                 + "\t"
1537                                 + testInfo.getEnglish().getName(CLDRFile.CURRENCY_NAME, currency));
1538             }
1539         }
1540         // fix up
1541         nonModernCurrencyCodes.removeAll(modernCurrencyCodes.keySet());
1542         Relation<String, String> isoCurrenciesToCountries =
1543                 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class)
1544                         .addAllInverted(isoCodes.getCountryToCodes());
1545         // now print error messages
1546         logln("Modern Codes: " + modernCurrencyCodes.size() + "\t" + modernCurrencyCodes);
1547         Set<String> missing = new TreeSet<>(isoCurrenciesToCountries.keySet());
1548         missing.removeAll(modernCurrencyCodes.keySet());
1549         missing.removeAll(oncomingCurrencyCodes);
1550         Set<String> recentMissing = new TreeSet<>(missing);
1551         recentMissing.retainAll(recentModernCurrencyCodes.keySet());
1552         if (recentMissing.size() != 0) {
1553             warnln(
1554                     "WARNING: Codes in ISO 4217 and until-recently legal tender in CLDR. "
1555                             + "(may need to update "
1556                             + CLDRURLS.UPDATING_CURRENCY_CODES
1557                             + " ): "
1558                             + currencyDateRelationToString(
1559                                     recentModernCurrencyCodes, recentMissing));
1560             missing.removeAll(recentMissing); // not errors
1561         }
1562         if (missing.size() != 0) {
1563             errln(
1564                     "Codes in ISO 4217 but not current tender in CLDR "
1565                             + "(may need to update as per"
1566                             + CLDRURLS.UPDATING_CURRENCY_CODES
1567                             + " ): "
1568                             + currencyDateRelationToString(nonModernCurrencyCodes, missing));
1569         }
1570 
1571         for (String currency : modernCurrencyCodes.keySet()) {
1572             Set<Pair<String, CurrencyDateInfo>> data = modernCurrencyCodes.getAll(currency);
1573             final String name = testInfo.getEnglish().getName(CLDRFile.CURRENCY_NAME, currency);
1574 
1575             Set<String> isoCountries = isoCurrenciesToCountries.getAll(currency);
1576             if (isoCountries == null) {
1577                 isoCountries = new TreeSet<>();
1578             }
1579 
1580             TreeSet<String> cldrCountries = new TreeSet<>();
1581             for (Pair<String, CurrencyDateInfo> x : data) {
1582                 cldrCountries.add(x.getFirst());
1583             }
1584             if (!isoCountries.equals(cldrCountries)) {
1585                 // TODO 17397: remove isKnownIssue and the if around errln when the logknown issue
1586                 // goes away.
1587                 final boolean skipKnownIssue =
1588                         currency.equals("ANG")
1589                                 && isoCountries.isEmpty()
1590                                 && cldrCountries.equals(Set.of("CW", "SX"))
1591                                 && logKnownIssue("CLDR-17397", "Mismatched codes " + cldrCountries);
1592                 if (!skipKnownIssue) {
1593                     errln(
1594                             "Mismatch between ISO and Cldr modern currencies for "
1595                                     + currency
1596                                     + "\tISO:"
1597                                     + isoCountries
1598                                     + "\tCLDR:"
1599                                     + cldrCountries);
1600                     showCountries("iso-cldr", isoCountries, cldrCountries, missing);
1601                     showCountries("cldr-iso", cldrCountries, isoCountries, missing);
1602                 }
1603             }
1604 
1605             if (oldMatcher.reset(name).find()) {
1606                 errln(
1607                         "Has 'old' in name but still used "
1608                                 + "\t"
1609                                 + currency
1610                                 + "\t"
1611                                 + name
1612                                 + "\t"
1613                                 + data);
1614             }
1615             if (newMatcher.reset(name).find()
1616                     && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) {
1617                 // find the first use. If older than 5 years, flag as error
1618                 if (currencyFirstValid.get(currency).compareTo(LIMIT_FOR_NEW_CURRENCY) < 0) {
1619                     errln(
1620                             "Has 'new' in name but used since "
1621                                     + CurrencyDateInfo.formatDate(currencyFirstValid.get(currency))
1622                                     + "\t"
1623                                     + currency
1624                                     + "\t"
1625                                     + name
1626                                     + "\t"
1627                                     + data);
1628                 } else {
1629                     logln(
1630                             "Has 'new' in name but used since "
1631                                     + CurrencyDateInfo.formatDate(currencyFirstValid.get(currency))
1632                                     + "\t"
1633                                     + currency
1634                                     + "\t"
1635                                     + name
1636                                     + "\t"
1637                                     + data);
1638                 }
1639             }
1640         }
1641         logln(
1642                 "Non-Modern Codes (with dates): "
1643                         + nonModernCurrencyCodes.size()
1644                         + "\t"
1645                         + nonModernCurrencyCodes);
1646         for (String currency : nonModernCurrencyCodes.keySet()) {
1647             final String name = testInfo.getEnglish().getName(CLDRFile.CURRENCY_NAME, currency);
1648             if (name == null) {
1649                 errln("No English name for currency " + currency);
1650                 continue;
1651             }
1652             if (newMatcher.reset(name).find()
1653                     && !EXCEPTION_CURRENCIES_WITH_NEW.contains(currency)) {
1654                 logln(
1655                         "Has 'new' in name but NOT used since "
1656                                 + CurrencyDateInfo.formatDate(currencyLastValid.get(currency))
1657                                 + "\t"
1658                                 + currency
1659                                 + "\t"
1660                                 + name
1661                                 + "\t"
1662                                 + nonModernCurrencyCodes.getAll(currency));
1663             } else if (!oldMatcher.reset(name).find() && !OK_TO_NOT_HAVE_OLD.contains(currency)) {
1664                 logln(
1665                         "Doesn't have 'old' or date range in name but NOT used since "
1666                                 + CurrencyDateInfo.formatDate(currencyLastValid.get(currency))
1667                                 + "\t"
1668                                 + currency
1669                                 + "\t"
1670                                 + name
1671                                 + "\t"
1672                                 + nonModernCurrencyCodes.getAll(currency));
1673                 for (Pair<String, CurrencyDateInfo> pair :
1674                         nonModernCurrencyCodes.getAll(currency)) {
1675                     final String territory = pair.getFirst();
1676                     Set<CurrencyDateInfo> currencyInfo =
1677                             SUPPLEMENTAL.getCurrencyDateInfo(territory);
1678                     for (CurrencyDateInfo dateInfo : currencyInfo) {
1679                         if (dateInfo.getEnd().compareTo(DateConstants.NOW) < 0) {
1680                             continue;
1681                         }
1682                         logln(
1683                                 "\tCurrencies used instead: "
1684                                         + territory
1685                                         + "\t"
1686                                         + dateInfo
1687                                         + "\t"
1688                                         + testInfo.getEnglish()
1689                                                 .getName(
1690                                                         CLDRFile.CURRENCY_NAME,
1691                                                         dateInfo.getCurrency()));
1692                     }
1693                 }
1694             }
1695         }
1696         Set<String> remainder = new TreeSet<>();
1697         remainder.addAll(currencyCodes);
1698         remainder.removeAll(nonModernCurrencyCodes.keySet());
1699         // TODO make this an error, except for allowed exceptions.
1700         logln("Currencies without Territories: " + remainder);
1701         if (territoriesWithoutModernCurrencies.size() != 0) {
1702             errln("Modern territory missing currency: " + territoriesWithoutModernCurrencies);
1703         }
1704     }
1705 
currencyDateRelationToString( Relation<String, Pair<String, CurrencyDateInfo>> allCodes, Set<String> filter)1706     private String currencyDateRelationToString(
1707             Relation<String, Pair<String, CurrencyDateInfo>> allCodes, Set<String> filter) {
1708         return allCodes.entrySet().stream()
1709                 .filter(p -> filter.contains(p.getKey()))
1710                 .map(p -> p.getValue().getSecond().toString())
1711                 .collect(Collectors.joining(", "));
1712     }
1713 
showCountries( final String title, Set<String> isoCountries, Set<String> cldrCountries, Set<String> missing)1714     private void showCountries(
1715             final String title,
1716             Set<String> isoCountries,
1717             Set<String> cldrCountries,
1718             Set<String> missing) {
1719         missing.clear();
1720         missing.addAll(isoCountries);
1721         missing.removeAll(cldrCountries);
1722         for (String country : missing) {
1723             logln("\t\tExtra in " + title + "\t" + country + " - " + getRegionName(country));
1724         }
1725     }
1726 
TestCurrencyDecimalPlaces()1727     public void TestCurrencyDecimalPlaces() {
1728         IsoCurrencyParser isoCodes = IsoCurrencyParser.getInstance();
1729         Relation<String, IsoCurrencyParser.Data> codeList = isoCodes.getCodeList();
1730         Set<String> currencyCodes = STANDARD_CODES.getGoodAvailableCodes("currency");
1731         for (String cc : currencyCodes) {
1732             Set<IsoCurrencyParser.Data> d = codeList.get(cc);
1733             if (d != null) {
1734                 for (IsoCurrencyParser.Data x : d) {
1735                     CurrencyNumberInfo cni = SUPPLEMENTAL.getCurrencyNumberInfo(cc);
1736                     if (cni.digits != x.getMinorUnit()) {
1737                         logln(
1738                                 "Mismatch between ISO/CLDR for decimal places for currency => "
1739                                         + cc
1740                                         + ". ISO = "
1741                                         + x.getMinorUnit()
1742                                         + " CLDR = "
1743                                         + cni.digits);
1744                     }
1745                 }
1746             }
1747         }
1748     }
1749 
1750     /** Verify that we have a default script for every CLDR base language */
TestDefaultScripts()1751     public void TestDefaultScripts() {
1752         SupplementalDataInfo supp = SUPPLEMENTAL;
1753         Map<String, String> likelyData = supp.getLikelySubtags();
1754         Map<String, String> baseToDefaultContentScript = new HashMap<>();
1755         for (CLDRLocale locale : supp.getDefaultContentCLDRLocales()) {
1756             String script = locale.getScript();
1757             if (!script.isEmpty() && locale.getCountry().isEmpty()) {
1758                 baseToDefaultContentScript.put(locale.getLanguage(), script);
1759             }
1760         }
1761         for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) {
1762             if (LocaleNames.ROOT.equals(locale)) {
1763                 continue;
1764             }
1765             if (!StandardCodes.isLocaleAtLeastBasic(locale)) {
1766                 continue;
1767             }
1768             CLDRLocale loc = CLDRLocale.getInstance(locale);
1769             String baseLanguage = loc.getLanguage();
1770             String defaultScript = supp.getDefaultScript(baseLanguage);
1771 
1772             String defaultContentScript = baseToDefaultContentScript.get(baseLanguage);
1773             if (defaultContentScript != null) {
1774                 assertEquals(
1775                         loc + " defaultContentScript = default",
1776                         defaultScript,
1777                         defaultContentScript);
1778             }
1779             String likely = likelyData.get(baseLanguage);
1780             String likelyScript =
1781                     likely == null ? null : CLDRLocale.getInstance(likely).getScript();
1782             Map<Type, BasicLanguageData> scriptInfo = supp.getBasicLanguageDataMap(baseLanguage);
1783             if (scriptInfo == null) {
1784                 if (StandardCodes.isLocaleAtLeastBasic(locale)) {
1785                     errln(loc + ": has no BasicLanguageData");
1786                 } else {
1787                     logln(loc + ": has no BasicLanguageData (not a basic loc)");
1788                 }
1789             } else {
1790                 BasicLanguageData data = scriptInfo.get(Type.primary);
1791                 if (data == null) {
1792                     data = scriptInfo.get(Type.secondary);
1793                 }
1794                 if (data == null) {
1795                     if (StandardCodes.isLocaleAtLeastBasic(locale)) {
1796                         errln(loc + ": has no scripts in BasicLanguageData");
1797                     } else {
1798                         logln(loc + ": has no scripts in BasicLanguageData (not a basic loc)");
1799                     }
1800                 } else if (!data.getScripts().contains(defaultScript)) {
1801                     if (StandardCodes.isLocaleAtLeastBasic(locale)) {
1802                         errln(
1803                                 loc
1804                                         + ": "
1805                                         + defaultScript
1806                                         + " not in BasicLanguageData - check <languages> in supplementalData.xml and language_script.tsv  "
1807                                         + data.getScripts());
1808                     } else {
1809                         logln(
1810                                 loc
1811                                         + ": "
1812                                         + defaultScript
1813                                         + " not in BasicLanguageData - check <languages> in supplementalData.xml and language_script.tsv (not a basic loc) "
1814                                         + data.getScripts());
1815                     }
1816                 }
1817             }
1818 
1819             assertEquals(loc + " likely = default", defaultScript, likelyScript);
1820 
1821             assertNotNull(loc + ": needs default script", defaultScript);
1822 
1823             if (!loc.getScript().isEmpty()) {
1824                 if (!loc.getScript().equals(defaultScript)) {
1825                     assertNotEquals(
1826                             locale + ": only include script if not default",
1827                             loc.getScript(),
1828                             defaultScript);
1829                 }
1830             }
1831         }
1832     }
1833 
1834     enum CoverageIssue {
1835         log,
1836         warn,
1837         error
1838     }
1839 
TestPluralCompleteness()1840     public void TestPluralCompleteness() {
1841         // Set<String> cardinalLocales = new
1842         // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.cardinal));
1843         // Set<String> ordinalLocales = new
1844         // TreeSet<String>(SUPPLEMENTAL.getPluralLocales(PluralType.ordinal));
1845         // Map<ULocale, PluralRulesFactory.SamplePatterns> sampleCardinals =
1846         // PluralRulesFactory.getLocaleToSamplePatterns();
1847         // Set<ULocale> sampleCardinalLocales = PluralRulesFactory.getLocales();
1848         // // new HashSet(PluralRulesFactory.getSampleCounts(uLocale,
1849         // type).keySet());
1850         // Map<ULocale, PluralRules> overrideCardinals =
1851         // PluralRulesFactory.getPluralOverrides();
1852         // Set<ULocale> overrideCardinalLocales = new
1853         // HashSet<ULocale>(overrideCardinals.keySet());
1854 
1855         Set<String> testLocales =
1856                 STANDARD_CODES.getLocaleCoverageLocales(
1857                         Organization.google, EnumSet.of(Level.MODERN));
1858         Set<String> allLocales = testInfo.getCldrFactory().getAvailable();
1859         LanguageTagParser ltp = new LanguageTagParser();
1860         for (String locale : allLocales) {
1861             // the only known case where plural rules depend on region or script
1862             // is pt_PT
1863             if (locale.equals(LocaleNames.ROOT)) {
1864                 continue;
1865             }
1866             ltp.set(locale);
1867             if (!ltp.getRegion().isEmpty() || !ltp.getScript().isEmpty()) {
1868                 continue;
1869             }
1870             CoverageIssue needsCoverage =
1871                     testLocales.contains(locale) ? CoverageIssue.error : CoverageIssue.log;
1872             CoverageIssue needsCoverage2 =
1873                     needsCoverage == CoverageIssue.error ? CoverageIssue.warn : needsCoverage;
1874             PluralRulesFactory prf =
1875                     PluralRulesFactory.getInstance(
1876                             CLDRConfig.getInstance().getSupplementalDataInfo());
1877 
1878             for (PluralType type : PluralType.values()) {
1879                 PluralInfo pluralInfo = SUPPLEMENTAL.getPlurals(type, locale, false);
1880                 if (pluralInfo == null) {
1881                     errOrLog(
1882                             needsCoverage,
1883                             locale + "\t" + type + " \tmissing plural rules",
1884                             "Cldrbug:7839",
1885                             "Missing plural data for modern locales");
1886                     continue;
1887                 }
1888                 Set<Count> counts = pluralInfo.getCounts();
1889                 // if (counts.size() == 1) {
1890                 // continue; // skip checking samples
1891                 // }
1892                 HashSet<String> samples = new HashSet<>();
1893                 EnumSet<Count> countsWithNoSamples = EnumSet.noneOf(Count.class);
1894                 Relation<String, Count> samplesToCounts =
1895                         Relation.of(new HashMap(), LinkedHashSet.class);
1896                 Set<Count> countsFound = prf.getSampleCounts(locale, type.standardType);
1897                 StringBuilder failureCases = new StringBuilder();
1898                 for (Count count : counts) {
1899                     String pattern =
1900                             PluralRulesFactory.getSamplePattern(locale, type.standardType, count);
1901                     final String rangeLine =
1902                             getRangeLine(count, pluralInfo.getPluralRules(), pattern);
1903                     failureCases
1904                             .append('\n')
1905                             .append(locale)
1906                             .append('\t')
1907                             .append(type)
1908                             .append('\t')
1909                             .append(rangeLine);
1910                     if (countsFound == null || !countsFound.contains(count)) {
1911                         countsWithNoSamples.add(count);
1912                     } else {
1913                         samplesToCounts.put(pattern, count);
1914                         logln(locale + "\t" + type + "\t" + count + "\t" + pattern);
1915                     }
1916                 }
1917                 if (!countsWithNoSamples.isEmpty()) {
1918                     errOrLog(
1919                             needsCoverage,
1920                             locale + "\t" + type + "\t missing samples:\t" + countsWithNoSamples,
1921                             "cldrbug:7075",
1922                             "Missing ordinal minimal pairs");
1923                     errOrLog(needsCoverage2, failureCases.toString());
1924                 }
1925                 for (Entry<String, Set<Count>> entry : samplesToCounts.keyValuesSet()) {
1926                     if (entry.getValue().size() != 1) {
1927                         errOrLog(
1928                                 needsCoverage,
1929                                 locale
1930                                         + "\t"
1931                                         + type
1932                                         + "\t duplicate samples: "
1933                                         + entry.getValue()
1934                                         + " => «"
1935                                         + entry.getKey()
1936                                         + "»",
1937                                 "cldrbug:7119",
1938                                 "Some duplicate minimal pairs");
1939                         errOrLog(needsCoverage2, failureCases.toString());
1940                     }
1941                 }
1942             }
1943         }
1944     }
1945 
errOrLog( CoverageIssue causeError, String message, String logTicket, String logComment)1946     public void errOrLog(
1947             CoverageIssue causeError, String message, String logTicket, String logComment) {
1948         switch (causeError) {
1949             case error:
1950                 if (logTicket == null) {
1951                     errln(message);
1952                     break;
1953                 }
1954                 logKnownIssue(logTicket, logComment);
1955                 // fall through
1956             case warn:
1957                 warnln(message);
1958                 break;
1959             case log:
1960                 logln(message);
1961                 break;
1962         }
1963     }
1964 
errOrLog(CoverageIssue causeError, String message)1965     public void errOrLog(CoverageIssue causeError, String message) {
1966         errOrLog(causeError, message, null, null);
1967     }
1968 
TestNumberingSystemDigits()1969     public void TestNumberingSystemDigits() {
1970         String[] knownExceptions = {
1971             "hanidec", // hanidec is not in codepoint order.
1972         };
1973         List<String> knownExceptionList = Arrays.asList(knownExceptions);
1974         for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) {
1975             if (knownExceptionList.contains(ns)) {
1976                 continue;
1977             }
1978             String digits = SUPPLEMENTAL.getDigits(ns);
1979             int previousChar = 0;
1980             int ch;
1981 
1982             for (int i = 0; i < digits.length(); i += Character.charCount(ch)) {
1983                 ch = digits.codePointAt(i);
1984                 if (i > 0 && ch != previousChar + 1) {
1985                     errln(
1986                             "Digits for numbering system "
1987                                     + ns
1988                                     + " are not in code point order. Previous char = U+"
1989                                     + Utility.hex(previousChar, 4)
1990                                     + " Current char = U+"
1991                                     + Utility.hex(ch, 4));
1992                     break;
1993                 }
1994                 previousChar = ch;
1995             }
1996         }
1997     }
1998 
TestNumberingSystemDigitCompleteness()1999     public void TestNumberingSystemDigitCompleteness() {
2000         List<Integer> unicodeDigits = new ArrayList<>();
2001         for (int cp = UCharacter.MIN_CODE_POINT; cp <= UCharacter.MAX_CODE_POINT; cp++) {
2002             if (UCharacter.getType(cp) == UCharacterEnums.ECharacterCategory.DECIMAL_DIGIT_NUMBER) {
2003                 unicodeDigits.add(cp);
2004             }
2005         }
2006 
2007         for (String ns : SUPPLEMENTAL.getNumericNumberingSystems()) {
2008             String digits = SUPPLEMENTAL.getDigits(ns);
2009             int ch;
2010 
2011             for (int i = 0; i < digits.length(); i += Character.charCount(ch)) {
2012                 ch = digits.codePointAt(i);
2013                 unicodeDigits.remove(Integer.valueOf(ch));
2014             }
2015         }
2016 
2017         if (unicodeDigits.size() > 0) {
2018             for (Integer i : unicodeDigits) {
2019                 errln(
2020                         "Unicode digit: "
2021                                 + UCharacter.getName(i)
2022                                 + " is not in any numbering system. Script = "
2023                                 + UScript.getShortName(UScript.getScript(i)));
2024             }
2025         }
2026     }
2027 
TestMetazones()2028     public void TestMetazones() {
2029         Date goalMin = new Date(70, 0, 1);
2030         Date goalMax = new Date(300, 0, 2);
2031         ImmutableSet<String> knownTZWithoutMetazone =
2032                 ImmutableSet.of("America/Montreal", "Asia/Barnaul", "Asia/Tomsk", "Europe/Kirov");
2033         for (String timezoneRaw : TimeZone.getAvailableIDs()) {
2034             String timezone = TimeZone.getCanonicalID(timezoneRaw);
2035             String region = TimeZone.getRegion(timezone);
2036             if (!timezone.equals(timezoneRaw) || "001".equals(region)) {
2037                 continue;
2038             }
2039             if (knownTZWithoutMetazone.contains(timezone)) {
2040                 continue;
2041             }
2042             final Set<MetaZoneRange> ranges = SUPPLEMENTAL.getMetaZoneRanges(timezone);
2043 
2044             if (assertNotNull("metazones for " + timezone, ranges)) {
2045                 long min = Long.MAX_VALUE;
2046                 long max = Long.MIN_VALUE;
2047                 for (MetaZoneRange range : ranges) {
2048                     if (range.dateRange.from != DateRange.START_OF_TIME) {
2049                         min = Math.min(min, range.dateRange.from);
2050                     }
2051                     if (range.dateRange.to != DateRange.END_OF_TIME) {
2052                         max = Math.max(max, range.dateRange.to);
2053                     }
2054                 }
2055                 assertRelation(
2056                         timezone + " has metazone before 1970?", true, goalMin, LEQ, new Date(min));
2057                 assertRelation(
2058                         timezone + " has metazone until way in the future?",
2059                         true,
2060                         goalMax,
2061                         GEQ,
2062                         new Date(max));
2063             }
2064         }
2065         com.google.common.collect.Interners i;
2066     }
2067 
Test9924()2068     public void Test9924() {
2069         Boolean b = org.unicode.cldr.unittest.TestSupplementalInfo.LOCALES_FIXED;
2070         PopulationData zhCNData =
2071                 SUPPLEMENTAL.getLanguageAndTerritoryPopulationData(b ? "zh" : "zh_Hans", "CN");
2072         PopulationData yueCNData =
2073                 SUPPLEMENTAL.getLanguageAndTerritoryPopulationData("yue_Hans", "CN");
2074         assertTrue("yue*10 < zh", yueCNData.getPopulation() < zhCNData.getPopulation());
2075     }
2076 
Test10765()2077     public void Test10765() { //
2078         Set<String> surveyToolLanguages =
2079                 SUPPLEMENTAL.getCLDRLanguageCodes(); // codes that show up in Survey Tool
2080         Set<String> mainLanguages = new TreeSet<>();
2081         LanguageTagParser ltp = new LanguageTagParser();
2082         for (String locale : testInfo.getCldrFactory().getAvailableLanguages()) {
2083             if (StandardCodes.isLocaleAtLeastBasic(locale)) {
2084                 mainLanguages.add(ltp.set(locale).getLanguage());
2085             }
2086         }
2087         // add special codes we want to see anyway
2088         mainLanguages.add(LocaleNames.UND);
2089         mainLanguages.add(LocaleNames.MUL);
2090         mainLanguages.add(LocaleNames.ZXX);
2091 
2092         if (!mainLanguages.containsAll(surveyToolLanguages)) {
2093             CoverageLevel2 coverageLevel =
2094                     CoverageLevel2.getInstance(SUPPLEMENTAL, "ja"); // pick "neutral" locale
2095             Set<String> temp = new TreeSet<>(surveyToolLanguages);
2096             temp.removeAll(mainLanguages);
2097             Set<String> modern = new TreeSet<>();
2098             Set<String> comprehensive = new TreeSet<>();
2099             for (String lang : temp) {
2100                 Level level = coverageLevel.getLevel(CLDRFile.getKey(CLDRFile.LANGUAGE_NAME, lang));
2101                 if (level.compareTo(Level.MODERN) <= 0) {
2102                     modern.add(lang);
2103                 } else {
2104                     comprehensive.add(lang);
2105                 }
2106             }
2107             warnln(
2108                     "«Modern» Languages in <variable id='$language' type='choice'> that aren't in main/* : "
2109                             + getNames(modern));
2110             logln(
2111                     "«Comprehensive» Languages in <variable id='$language' type='choice'> that aren't in main/* : "
2112                             + getNames(comprehensive));
2113         }
2114         if (!surveyToolLanguages.containsAll(mainLanguages)) {
2115             mainLanguages.removeAll(surveyToolLanguages);
2116             // TODO: See https://unicode-org.atlassian.net/browse/CLDR-14974
2117             // Currently there is a requirement that all locales in main/* are in
2118             // attributeValueValidity.xml
2119             assertEquals(
2120                     "main/* languages missing from <variable id='$language'/> in attributeValueValidity.xml",
2121                     Collections.EMPTY_SET,
2122                     mainLanguages);
2123         }
2124     }
2125 
getNames(Set<String> temp)2126     private Set<String> getNames(Set<String> temp) {
2127         Set<String> tempNames = new TreeSet<>();
2128         for (String langCode : temp) {
2129             tempNames.add(
2130                     testInfo.getEnglish().getName(CLDRFile.LANGUAGE_NAME, langCode)
2131                             + " ("
2132                             + langCode
2133                             + ")");
2134         }
2135         return tempNames;
2136     }
2137 
TestGrammarInfo()2138     public void TestGrammarInfo() {
2139         final Logger logger = getLogger();
2140         Multimap<String, String> allValues = TreeMultimap.create();
2141         for (String locale : SUPPLEMENTAL.hasGrammarInfo()) {
2142             if (locale.contentEquals("tr")) {
2143                 int debug = 0;
2144             }
2145             GrammarInfo grammarInfo = SUPPLEMENTAL.getGrammarInfo(locale);
2146             for (GrammaticalTarget target : GrammaticalTarget.values()) {
2147                 for (GrammaticalFeature feature : GrammaticalFeature.values()) {
2148                     Collection<String> general =
2149                             grammarInfo.get(target, feature, GrammaticalScope.general);
2150                     for (GrammaticalScope scope : GrammaticalScope.values()) {
2151                         Collection<String> units = grammarInfo.get(target, feature, scope);
2152                         allValues.putAll(target + "/" + feature + "/" + scope, units);
2153                         if (scope != GrammaticalScope.general) {
2154                             assertTrue(
2155                                     general + " > " + scope + " " + units,
2156                                     general.containsAll(units));
2157                         }
2158                     }
2159                 }
2160             }
2161             logger.fine(grammarInfo.toString("\n" + locale + "\t"));
2162         }
2163         if (logger.isLoggable(java.util.logging.Level.FINE)) { // if level is at least FINE
2164             logger.fine("");
2165             for (Entry<String, Collection<String>> entry : allValues.asMap().entrySet()) {
2166                 logger.fine(entry.getKey() + "\t" + Joiner.on(", ").join(entry.getValue()));
2167             }
2168         }
2169     }
2170 }
2171