xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/test/CLDRTest.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 /*
2  ******************************************************************************
3  * Copyright (C) 2004, International Business Machines Corporation and        *
4  * others. All Rights Reserved.                                               *
5  ******************************************************************************
6  */
7 package org.unicode.cldr.test;
8 
9 import static org.unicode.cldr.util.PathUtilities.getNormalizedPath;
10 
11 import com.ibm.icu.dev.test.TestFmwk;
12 import com.ibm.icu.text.BreakIterator;
13 import com.ibm.icu.text.DecimalFormat;
14 import com.ibm.icu.text.NumberFormat;
15 import com.ibm.icu.text.UTF16;
16 import com.ibm.icu.text.UnicodeSet;
17 import com.ibm.icu.util.ULocale;
18 import java.io.File;
19 import java.io.IOException;
20 import java.io.PrintWriter;
21 import java.util.Arrays;
22 import java.util.Calendar;
23 import java.util.Collection;
24 import java.util.Date;
25 import java.util.HashMap;
26 import java.util.HashSet;
27 import java.util.Iterator;
28 import java.util.LinkedHashSet;
29 import java.util.List;
30 import java.util.Map;
31 import java.util.Set;
32 import java.util.TreeMap;
33 import java.util.TreeSet;
34 import org.unicode.cldr.draft.FileUtilities;
35 import org.unicode.cldr.test.DisplayAndInputProcessor.NumericType;
36 import org.unicode.cldr.util.*;
37 import org.xml.sax.SAXException;
38 
39 /**
40  * Initial version of CLDR tests. Each test is named TextXXX. To run all the tests, use the options
41  *
42  * <blockquote>
43  *
44  * -nothrow
45  *
46  * </blockquote>
47  *
48  * To run a particular set of tests, include their names, like
49  *
50  * <blockquote>
51  *
52  * -nothrow TestForIllegalAttributeValues TestMinimalLocalization
53  *
54  * </blockquote>
55  *
56  * To show more information (logln), add -verbose
57  *
58  * <p>There are some environment variables that can be used with the test. <br>
59  * -DSHOW_FILES=<anything> shows all create/open of files. <br>
60  * -DXML_MATCH=<regular expression> skips all locales that don't match the regular expression <br>
61  * -DXML_MAIN_DIR=<filesystem directory> resets to a different main directory (eg not
62  * cldr/common/main. For example, some of the tools generate into a locale directory like
63  * -DXML_MAIN_DIR=C:\Unicode-CVS2\cldr\common\gen\main\ so this can be used to check that directory.
64  * <br>
65  * -DSKIP_DRAFT=<boolean> skips draft locales if <boolean> is a string starting with T or t
66  */
67 public class CLDRTest extends TestFmwk {
68     /** privates */
69     private static String MATCH;
70 
71     private static String MAIN_DIR;
72     private static boolean SKIP_DRAFT;
73     private Set<String> locales;
74     private Set<String> languageLocales;
75     private Factory cldrFactory;
76     private CLDRFile resolvedRoot;
77     private CLDRFile resolvedEnglish;
78     private final UnicodeSet commonAndInherited =
79             new UnicodeSet("[[:script=common:][:script=inherited:][:alphabetic=false:]]");
80     private static final String[] WIDTHS = {"narrow", "wide", "abbreviated", "short"};
81     private static final String[] MONTHORDAYS = {"day", "month"};
82     private Map<String, String> localeNameCache = new HashMap<>();
83     private CLDRFile english = null;
84 
85     private Set<String> surveyInfo = new TreeSet<>();
86 
87     /** TestFmwk boilerplate */
main(String[] args)88     public static void main(String[] args) throws Exception {
89         MATCH = System.getProperty("XML_MATCH");
90         if (MATCH == null) MATCH = ".*";
91         else System.out.println("Resetting MATCH:" + MATCH);
92         MAIN_DIR = System.getProperty("XML_MAIN_DIR");
93         if (MAIN_DIR == null) MAIN_DIR = CLDRPaths.MAIN_DIRECTORY;
94         else System.out.println("Resetting MAIN_DIR:" + MAIN_DIR);
95         SKIP_DRAFT = System.getProperty("XML_SKIP_DRAFT") != null;
96         if (SKIP_DRAFT) System.out.println("Skipping Draft locales");
97 
98         double deltaTime = System.currentTimeMillis();
99         new CLDRTest().run(args);
100         deltaTime = System.currentTimeMillis() - deltaTime;
101         System.out.println("Seconds: " + deltaTime / 1000);
102     }
103 
TestZZZZHack()104     public void TestZZZZHack() throws IOException {
105         // hack to get file written at the end of run.
106         PrintWriter surveyFile =
107                 FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "surveyInfo.txt");
108         for (String s : surveyInfo) {
109             surveyFile.println(s);
110         }
111         surveyFile.close();
112     }
113 
114     /** TestFmwk boilerplate */
CLDRTest()115     public CLDRTest() throws SAXException, IOException {
116         // TODO parameterize the directory and filter
117         cldrFactory = Factory.make(MAIN_DIR, MATCH);
118         // CLDRKey.main(new String[]{"-mde.*"});
119         locales = cldrFactory.getAvailable();
120         languageLocales = cldrFactory.getAvailableLanguages();
121         resolvedRoot = cldrFactory.make(LocaleNames.ROOT, true);
122         /*
123          * PrintWriter out = FileUtilities.openUTF8Writer(Utility.GEN_DIRECTORY + "resolved/", "root.xml");
124          * CLDRFile temp = (CLDRFile) resolvedRoot.clone();
125          * temp.write(out);
126          * out.close();
127          */
128         resolvedEnglish = cldrFactory.make("en", true);
129     }
130 
131     /** Check to make sure that the currency formats are kosher. */
TestCurrencyFormats()132     public void TestCurrencyFormats() {
133         // String decimal =
134         // "//ldml/numbers/decimalFormats/decimalFormatLength/decimalFormat[@type=\"standard\"]/";
135         // String currency =
136         // "//ldml/numbers/currencyFormats/currencyFormatLength/currencyFormat[@type=\"standard\"]/";
137         for (String locale : locales) {
138             boolean isPOSIX = locale.indexOf("POSIX") >= 0;
139             logln("Testing: " + locale);
140             CLDRFile item = cldrFactory.make(locale, false);
141             for (String xpath : item) {
142                 NumericType type = NumericType.getNumericType(xpath);
143                 if (type == NumericType.NOT_NUMERIC) continue;
144                 String value = item.getStringValue(xpath);
145                 // at this point, we only have currency formats
146                 String pattern = DisplayAndInputProcessor.getCanonicalPattern(value, type, isPOSIX);
147                 if (!pattern.equals(value)) {
148                     String draft = "";
149                     if (item.getFullXPath(xpath).indexOf("[@draft=\"unconfirmed\"]") >= 0)
150                         draft = " [draft]";
151                     assertEquals(
152                             getLocaleAndName(locale) + draft + " " + type + " pattern incorrect",
153                             pattern,
154                             value);
155                 }
156             }
157         }
158     }
159 
160     /** Internal class */
161     private static class ValueCount {
162         int count = 1;
163         String value;
164         String fullxpath;
165     }
166 
167     /**
168      * Verify that if all the children of a language locale do not have the same value for the same
169      * key.
170      */
TestCommonChildren()171     public void TestCommonChildren() {
172         if (disableUntilLater("TestCommonChildren")) return;
173 
174         Map<String, ValueCount> currentValues = new TreeMap<>();
175         Set<String> okValues = new TreeSet<>();
176 
177         for (String parent : languageLocales) {
178             logln("Testing: " + parent);
179             currentValues.clear();
180             okValues.clear();
181             Set<String> availableWithParent = cldrFactory.getAvailableWithParent(parent, true);
182             for (String locale : availableWithParent) {
183                 logln("\tTesting: " + locale);
184                 CLDRFile item = cldrFactory.make(locale, false);
185                 // Walk through all the xpaths, adding to currentValues
186                 // Whenever two values for the same xpath are different, we remove from
187                 // currentValues, and add to
188                 // okValues
189                 for (String xpath : item) {
190                     if (okValues.contains(xpath)) continue;
191                     if (xpath.startsWith("//ldml/identity/")) continue; // skip identity elements
192                     String v = item.getStringValue(xpath);
193                     ValueCount last = currentValues.get(xpath);
194                     if (last == null) {
195                         ValueCount vc = new ValueCount();
196                         vc.value = v;
197                         vc.fullxpath = item.getFullXPath(xpath);
198                         currentValues.put(xpath, vc);
199                     } else if (v.equals(last.value)) {
200                         last.count++;
201                     } else {
202                         okValues.add(xpath);
203                         currentValues.remove(xpath);
204                     }
205                 }
206                 // at the end, only the keys left in currentValues are (possibly) faulty
207                 // they are actually bad IFF either
208                 // (a) the count is equal to the total (thus all children are the same), or
209                 // (b) their value is the same as the parent's resolved value (thus all children are
210                 // the same or the
211                 // same
212                 // as the inherited parent value).
213             }
214             if (currentValues.size() == 0) continue;
215             int size = availableWithParent.size();
216             CLDRFile parentCLDR = cldrFactory.make(parent, true);
217             for (String xpath : currentValues.keySet()) {
218                 ValueCount vc = currentValues.get(xpath);
219                 if (vc.count == size
220                         || (vc.value.equals(parentCLDR.getStringValue(xpath))
221                                 && vc.fullxpath.equals(parentCLDR.getStringValue(xpath)))) {
222                     String draft = "";
223                     if (vc.fullxpath.indexOf("[@draft=\"unconfirmed\"]") >= 0) draft = " [draft]";
224                     String count = (vc.count == size ? "" : vc.count + "/") + size;
225                     warnln(
226                             getLocaleAndName(parent)
227                                     + draft
228                                     + "\tall children ("
229                                     + count
230                                     + ") have same value for:\t"
231                                     + xpath
232                                     + ";\t"
233                                     + vc.value);
234                 }
235             }
236         }
237     }
238 
239     static String[] EXEMPLAR_SKIPS = {
240         "/hourFormat", "/exemplarCharacters", "/pattern", "/localizedPatternChars"
241     };
242 
243     /** Check that the exemplars include all characters in the data. */
TestThatExemplarsContainAll()244     public void TestThatExemplarsContainAll() {
245         UnicodeSet allExemplars = new UnicodeSet();
246         if (disableUntilLater("TestThatExemplarsContainAll")) return;
247         Set<String> counts = new TreeSet<>();
248         int totalCount = 0;
249         UnicodeSet localeMissing = new UnicodeSet();
250         for (String locale : locales) {
251             if (locale.equals(LocaleNames.ROOT)) continue;
252             CLDRFile resolved = cldrFactory.make(locale, false); // FIX LATER
253             UnicodeSet exemplars = getFixedExemplarSet(locale, resolved);
254             CLDRFile plain = cldrFactory.make(locale, false);
255             int count = 0;
256             localeMissing.clear();
257             file:
258             for (String xpath : plain) {
259                 for (int i = 0; i < EXEMPLAR_SKIPS.length; ++i) {
260                     if (xpath.indexOf(EXEMPLAR_SKIPS[i]) > 0) continue file; // skip some items.
261                 }
262                 if (SKIP_DRAFT) {
263                     String fullxpath = plain.getFullXPath(xpath);
264                     if (fullxpath.indexOf("[@draft=\"unconfirmed\"") > 0) continue;
265                 }
266                 if (xpath.startsWith("//ldml/posix/messages")) continue;
267                 String value = plain.getStringValue(xpath);
268                 allExemplars.addAll(value);
269                 if (!exemplars.containsAll(value)) {
270                     count++;
271                     UnicodeSet missing = new UnicodeSet().addAll(value).removeAll(exemplars);
272                     localeMissing.addAll(missing);
273                     logln(
274                             getLocaleAndName(locale)
275                                     + "\t"
276                                     + xpath
277                                     + "\t<"
278                                     + value
279                                     + "> contains "
280                                     + missing
281                                     + ", not in exemplars");
282                     surveyInfo.add(
283                             locale
284                                     + "\t"
285                                     + xpath
286                                     + "\t'"
287                                     + value
288                                     + "' contains characters "
289                                     + missing.toPattern(false)
290                                     + ", which are not in exemplars");
291                 }
292             }
293             NumberFormat nf = new DecimalFormat("000");
294             if (count != 0) {
295                 totalCount += count;
296                 counts.add(
297                         nf.format(count) + "\t" + getLocaleAndName(locale) + "\t" + localeMissing);
298             }
299             if (localeMissing.size() != 0) {
300                 errln(getLocaleAndName(locale) + "\t uses " + localeMissing + ", not in exemplars");
301             }
302         }
303         for (String c : counts) {
304             logln(c);
305         }
306         logln("Total Count: " + totalCount);
307         System.out.println("All exemplars: " + allExemplars.toPattern(true));
308     }
309 
310     // Get Date-Time in milliseconds
getDateTimeinMillis(int year, int month, int date)311     private static long getDateTimeinMillis(int year, int month, int date) {
312         Calendar cal = Calendar.getInstance();
313         cal.set(year, month, date);
314         return cal.getTimeInMillis();
315     }
316 
317     static final long disableDate = getDateTimeinMillis(2005, 6 - 1, 3);
318 
319     /** */
disableUntilLater(String string)320     private boolean disableUntilLater(String string) {
321         if (new Date().getTime() >= disableDate) return false;
322         warnln("Disabling " + string + " until " + new Date(disableDate));
323         return true;
324     }
325 
326     /** Internal */
getFixedExemplarSet(String locale, CLDRFile cldrfile)327     private UnicodeSet getFixedExemplarSet(String locale, CLDRFile cldrfile) {
328         UnicodeSet exemplars = getExemplarSet(cldrfile, "");
329         if (exemplars.size() == 0) {
330             errln(getLocaleAndName(locale) + " has empty exemplar set");
331         }
332         exemplars.addAll(getExemplarSet(cldrfile, "standard"));
333         UnicodeSet auxiliary = getExemplarSet(cldrfile, "auxiliary");
334         if (exemplars.containsSome(auxiliary)) {
335             errln(
336                     getLocaleAndName(locale)
337                             + "Auxiliary & main exemplars should be disjoint, but overlap with "
338                             + new UnicodeSet(exemplars).retainAll(auxiliary)
339                             + ": change auxiliary to "
340                             + auxiliary.removeAll(exemplars));
341         }
342         exemplars.addAll(auxiliary);
343         exemplars.addAll(commonAndInherited);
344         return exemplars;
345     }
346 
347     /**
348      * @return Gets an exemplar set. Also verifies that the set contains no properties.
349      */
getExemplarSet(CLDRFile cldrfile, String type)350     public UnicodeSet getExemplarSet(CLDRFile cldrfile, String type) {
351         if (type.length() != 0) type = "[@type=\"" + type + "\"]";
352         String v = cldrfile.getStringValue("//ldml/characters/exemplarCharacters" + type);
353         if (v == null) return new UnicodeSet();
354         String pattern = v;
355         if (pattern.indexOf("[:") >= 0 || pattern.indexOf("\\p{") > 0) {
356             errln(
357                     getLocaleName(cldrfile.getLocaleID())
358                             + " exemplar pattern contains property: "
359                             + pattern);
360         }
361         try {
362             UnicodeSet result = new UnicodeSet(v, UnicodeSet.CASE);
363             result.remove(0x20);
364             return result;
365         } catch (RuntimeException e) {
366             e.printStackTrace();
367             errln(
368                     getLocaleAndName(cldrfile.getLocaleID())
369                             + " has illegal exemplar set: <"
370                             + v
371                             + ">");
372             return new UnicodeSet();
373         }
374         // if (type.length() != 0) System.out.println("fetched set for " + type);
375     }
376 
getLocaleAndName(String locale)377     public String getLocaleAndName(String locale) {
378         return locale + " (" + getLocaleName(locale) + ")";
379     }
380 
381     /**
382      * @return the ID plus its localization (for language, script, and territory IDs only)
383      */
getIDAndLocalization(String id)384     public String getIDAndLocalization(String id) {
385         return id + " " + getLocalization(id);
386     }
387 
388     /**
389      * @return the localization (for language, script, and territory IDs only)
390      */
getLocalization(String id)391     public String getLocalization(String id) {
392         if (english == null) english = cldrFactory.make("en", true);
393         if (id.length() == 0) return "?";
394         // pick on basis of case
395         char ch = id.charAt(0);
396         if ('a' <= ch && ch <= 'z') return getName(english, "languages/language", id);
397         if (id.length() == 4 && 'A' <= ch && ch <= 'Z')
398             return getName(english, "scripts/script", id);
399         return getName(english, "territories/territory", id);
400     }
401 
402     /** Internal */
getIDAndLocalization(Set<String> missing)403     private String getIDAndLocalization(Set<String> missing) {
404         StringBuffer buffer = new StringBuffer();
405         for (String next : missing) {
406             if (buffer.length() != 0) buffer.append("; ");
407             buffer.append(getIDAndLocalization(next));
408         }
409         return buffer.toString();
410     }
411 
getLocaleName(String locale)412     public String getLocaleName(String locale) {
413         String name = localeNameCache.get(locale);
414         if (name != null) return name;
415         if (english == null) english = cldrFactory.make("en", true);
416         String result = english.getName(locale);
417         /*
418          * Collection c = Utility.splitList(locale, '_', false, null);
419          * String[] pieces = new String[c.size()];
420          * c.toArray(pieces);
421          * int i = 0;
422          * String result = getName(english, "languages/language", pieces[i++]);
423          * if (pieces[i].length() == 0) return result;
424          * if (pieces[i].length() == 4) {
425          * result += " " + getName(english, "scripts/script", pieces[i++]);
426          * }
427          * if (pieces[i].length() == 0) return result;
428          * result += " " + getName(english, "territories/territory", pieces[i++]);
429          * if (pieces[i].length() == 0) return result;
430          * result += " " + getName(english, "variant/variants", pieces[i++]);
431          */
432         localeNameCache.put(locale, result);
433         return result;
434     }
435 
436     /** Internal */
getName(CLDRFile english, String kind, String type)437     private String getName(CLDRFile english, String kind, String type) {
438         String v =
439                 english.getStringValue(
440                         "//ldml/localeDisplayNames/" + kind + "[@type=\"" + type + "\"]");
441         if (v == null) return "<" + type + ">";
442         return v;
443     }
444 
445     /**
446      * Make sure we are only using attribute values that are in RFC3066bis, the Olson database (with
447      * aliases removed) or ISO 4217
448      *
449      * @throws IOException
450      */
TestForIllegalAttributeValues()451     public void TestForIllegalAttributeValues() {
452         // check for illegal attribute values that are not in the DTD
453         Map<String, Set<String>> result = new TreeMap<>();
454         Map<String, Set<String>> totalResult = new TreeMap<>();
455         for (String locale : locales) {
456             logln("Testing: " + locale);
457             CLDRFile item = cldrFactory.make(locale, false);
458             result.clear();
459             Set<String> xpathFailures = null; // don't collect
460             // XPathParts parts;
461             // String xpath;
462             // CLDRFile.StringValue value;
463             // String element;
464             // Map attributes;
465             checkAttributeValidity(item, result, xpathFailures);
466 
467             // now show
468             // String localeName = getLocaleAndName(locale);
469             for (Iterator<String> it3 = result.keySet().iterator(); it3.hasNext(); ) {
470                 String code = it3.next();
471                 Set<String> avalues = result.get(code);
472                 errln(
473                         getLocaleAndName(locale)
474                                 + "\tillegal attribute value for "
475                                 + code
476                                 + ", value:\t"
477                                 + show(avalues));
478                 Set<String> totalvalues = totalResult.get(code);
479                 if (totalvalues == null) totalResult.put(code, totalvalues = new TreeSet<>());
480                 totalvalues.addAll(avalues);
481             }
482         }
483         for (Iterator<String> it3 = totalResult.keySet().iterator(); it3.hasNext(); ) {
484             String code = it3.next();
485             Set<String> avalues = totalResult.get(code);
486             errln("All illegal attribute values for " + code + ", value:\t" + show(avalues));
487         }
488     }
489 
490     /**
491      * Tests whether the display names have any collisions, e.g. if in the fully resolved locale $
492      * is used for both USD and UAD.
493      */
TestDisplayNameCollisions()494     public void TestDisplayNameCollisions() {
495         if (disableUntilLater("TestDisplayNameCollisions")) return;
496 
497         Map<String, String>[] maps = new HashMap[CLDRFile.LIMIT_TYPES];
498         for (int i = 0; i < maps.length; ++i) {
499             maps[i] = new HashMap<>();
500         }
501         Set<String> collisions = new TreeSet<>();
502         for (Iterator<String> it = locales.iterator(); it.hasNext(); ) {
503             String locale = it.next();
504             CLDRFile item = cldrFactory.make(locale, true);
505             for (int i = 0; i < maps.length; ++i) {
506                 maps[i].clear();
507             }
508             collisions.clear();
509 
510             for (Iterator<String> it2 = item.iterator(); it2.hasNext(); ) {
511                 String xpath = it2.next();
512                 int nameType = CLDRFile.getNameType(xpath);
513                 if (nameType < 0) continue;
514                 String value = item.getStringValue(xpath);
515                 String xpath2 = maps[nameType].get(value);
516                 if (xpath2 == null) {
517                     maps[nameType].put(value, xpath);
518                     continue;
519                 }
520                 collisions.add(
521                         CLDRFile.getNameTypeName(nameType)
522                                 + "\t"
523                                 + value
524                                 + "\t"
525                                 + xpath
526                                 + "\t"
527                                 + xpath2);
528                 surveyInfo.add(
529                         locale
530                                 + "\t"
531                                 + xpath
532                                 + "\t'"
533                                 + value
534                                 + "' is a duplicate of what is in "
535                                 + xpath2);
536             }
537             String name = getLocaleAndName(locale) + "\t";
538             for (Iterator<String> it2 = collisions.iterator(); it2.hasNext(); ) {
539                 errln(name + it2.next());
540             }
541         }
542     }
543 
544     /**
545      * Checks the validity of attributes, based on StandardCodes. The invalid codes are added to
546      * badCodes, and the failing xpaths are added to xpathFailures.
547      *
548      * @param item
549      * @param badCodes
550      * @param xpathFailures
551      */
checkAttributeValidity( CLDRFile item, Map<String, Set<String>> badCodes, Set<String> xpathFailures)552     public static void checkAttributeValidity(
553             CLDRFile item, Map<String, Set<String>> badCodes, Set<String> xpathFailures) {
554         for (Iterator<String> it2 = item.iterator(); it2.hasNext(); ) {
555             String xpath = it2.next();
556             XPathParts parts = XPathParts.getFrozenInstance(item.getFullXPath(xpath));
557             for (int i = 0; i < parts.size(); ++i) {
558                 if (parts.getAttributeCount(i) == 0) {
559                     continue;
560                 }
561                 String element = parts.getElement(i);
562                 Map<String, String> attributes = parts.getAttributes(i);
563                 for (Iterator<String> it3 = attributes.keySet().iterator(); it3.hasNext(); ) {
564                     String attribute = it3.next();
565                     String avalue = attributes.get(attribute);
566                     checkValidity(xpath, element, attribute, avalue, badCodes, xpathFailures);
567                 }
568             }
569         }
570     }
571 
572     /** Internal */
show(Collection<String> avalues)573     private String show(Collection<String> avalues) {
574         StringBuffer result = new StringBuffer("{");
575         boolean first = true;
576         for (Iterator<String> it3 = avalues.iterator(); it3.hasNext(); ) {
577             if (first) first = false;
578             else result.append(", ");
579             result.append(it3.next().toString());
580         }
581         result.append("}");
582         return result.toString();
583     }
584 
585     /** Internal function */
checkValidity( String xpath, String element, String attribute, String avalue, Map<String, Set<String>> results, Set<String> xpathsFailing)586     private static void checkValidity(
587             String xpath,
588             String element,
589             String attribute,
590             String avalue,
591             Map<String, Set<String>> results,
592             Set<String> xpathsFailing) {
593         StandardCodes codes = StandardCodes.make();
594         if (attribute.equals("type")) {
595             boolean checkReplacements = xpath.indexOf("/identity") < 0;
596             if (element.equals("currency"))
597                 checkCodes(
598                         xpath,
599                         "currency",
600                         avalue,
601                         codes,
602                         results,
603                         xpathsFailing,
604                         checkReplacements);
605             else if (element.equals("script"))
606                 checkCodes(
607                         xpath, "script", avalue, codes, results, xpathsFailing, checkReplacements);
608             else if (element.equals("territory"))
609                 checkCodes(
610                         xpath,
611                         "territory",
612                         avalue,
613                         codes,
614                         results,
615                         xpathsFailing,
616                         checkReplacements);
617             else if (element.equals("language"))
618                 checkCodes(
619                         xpath,
620                         "language",
621                         avalue,
622                         codes,
623                         results,
624                         xpathsFailing,
625                         checkReplacements);
626             else if (element.equals("zone"))
627                 checkCodes(xpath, "tzid", avalue, codes, results, xpathsFailing, checkReplacements);
628         }
629     }
630 
631     /**
632      * Internal function
633      *
634      * @param checkReplacements TODO
635      */
636     private static void checkCodes(
637             String xpath,
638             String code,
639             String avalue,
640             StandardCodes codes,
641             Map<String, Set<String>> results,
642             Set<String> xpathFailures,
643             boolean checkReplacements) {
644         // ok if code is found AND it has no replacement
645         if (codes.getData(code, avalue) != null
646                 && (!checkReplacements || codes.getReplacement(code, avalue) == null)) return;
647 
648         if (xpathFailures != null) xpathFailures.add(xpath);
649         if (results == null) return;
650         Set<String> s = results.get(code);
651         if (s == null) {
652             s = new TreeSet<>();
653             results.put(code, s);
654         }
655         s.add(avalue);
656     }
657 
658     /**
659      * Verify that a small set of locales (currently just English) has everything translated.
660      *
661      * @throws IOException
662      */
663     public void TestCompleteLocales() {
664         // just test English for now
665         if (english == null) english = cldrFactory.make("en", true);
666         checkTranslatedCodes(english);
667     }
668 
669     /**
670      * Tests that the file contains codes for all main display name ids: language, script,
671      * territory, tzid, currency.
672      */
673     private void checkTranslatedCodes(CLDRFile cldrfile) {
674         StandardCodes codes = StandardCodes.make();
675         checkTranslatedCode(
676                 cldrfile, codes, "currency", "//ldml/numbers/currencies/currency", "/displayName");
677         // can't check timezones for English.
678         // checkTranslatedCode(cldrfile, codes, "tzid", "//ldml/dates/timeZoneNames/zone", "");
679         checkTranslatedCode(
680                 cldrfile, codes, "language", "//ldml/localeDisplayNames/languages/language", "");
681         checkTranslatedCode(
682                 cldrfile, codes, "script", "//ldml/localeDisplayNames/scripts/script", "");
683         checkTranslatedCode(
684                 cldrfile,
685                 codes,
686                 "territory",
687                 "//ldml/localeDisplayNames/territories/territory",
688                 "");
689         checkTranslatedCode(
690                 cldrfile, codes, "variant", "//ldml/localeDisplayNames/variants/variant", "");
691     }
692 
693     private void checkTranslatedCode(
694             CLDRFile cldrfile, StandardCodes codes, String type, String prefix, String postfix) {
695         Map<String, Set<String>> completionExceptions = getCompletionExceptions();
696         Set<String> codeItems = codes.getGoodAvailableCodes(type);
697         int count = 0;
698         Set<String> exceptions = completionExceptions.get(type);
699         for (String code : codeItems) {
700             String rfcname = codes.getData(type, code);
701             // if (rfcname.equals("ZZ")) continue;
702             ++count;
703             if (rfcname.equals("PRIVATE USE")) continue;
704             String fullFragment = prefix + "[@type=\"" + code + "\"]" + postfix;
705             String v = cldrfile.getStringValue(fullFragment);
706             if (v == null) {
707                 errln(
708                         "Missing translation for:\t<"
709                                 + type
710                                 + " type=\""
711                                 + code
712                                 + "\">"
713                                 + rfcname
714                                 + "</"
715                                 + type
716                                 + ">");
717                 continue;
718             }
719             String translation = v;
720             if (translation.equals(code)) {
721                 if (exceptions != null && exceptions.contains(code)) continue;
722                 errln(
723                         "Translation = code for:\t<"
724                                 + type
725                                 + " type=\""
726                                 + code
727                                 + "\">"
728                                 + rfcname
729                                 + "</"
730                                 + type
731                                 + ">");
732                 continue;
733             }
734         }
735         logln("Total " + type + ":\t" + count);
736     }
737 
738     private Map<String, Set<String>> theCompletionExceptions = null;
739 
740     private Map<String, Set<String>> getCompletionExceptions() {
741         if (theCompletionExceptions == null) {
742             theCompletionExceptions = new HashMap<>();
743             final Set<String> scriptExceptions = new HashSet<>();
744             scriptExceptions.add("Cham");
745             scriptExceptions.add("Modi");
746             scriptExceptions.add("Thai");
747             scriptExceptions.add("Toto");
748             theCompletionExceptions.put("script", scriptExceptions);
749         }
750         return theCompletionExceptions;
751     }
752 
753     // <territoryContainment><group type="001" contains="002 009 019 142 150"/>
754     // <languageData><language type="af" scripts="Latn" territories="ZA"/>
755     void getSupplementalData(
756             Map<String, Set<String>> language_scripts,
757             Map<String, Set<String>> language_territories,
758             Map<String, Set<String>> group_territory,
759             Map<String, Set<String>> territory_currencies,
760             Map<String, Map<String, String>> aliases) {
761 
762         boolean SHOW = false;
763         Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
764         CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false);
765         for (Iterator<String> it = supp.iterator(); it.hasNext(); ) {
766             String path = it.next();
767             try {
768                 XPathParts parts = XPathParts.getFrozenInstance(supp.getFullXPath(path));
769                 Map<String, String> m;
770                 String type = "";
771                 if (aliases != null && parts.findElement("alias") >= 0) {
772                     m = parts.findAttributes(type = "languageAlias");
773                     if (m == null) m = parts.findAttributes(type = "territoryAlias");
774                     if (m != null) {
775                         Map top = aliases.get(type);
776                         if (top == null) {
777                             aliases.put(type, top = new TreeMap());
778                         }
779                         top.put(m.get("type"), m.get("replacement"));
780                     }
781                 }
782                 if (territory_currencies != null) {
783                     m = parts.findAttributes("region");
784                     if (m != null) {
785                         String region = m.get("iso3166");
786                         Set s = territory_currencies.get(region);
787                         if (s == null) {
788                             territory_currencies.put(region, s = new LinkedHashSet());
789                         }
790                         m = parts.findAttributes("currency");
791                         if (m == null) {
792                             warnln("missing currency for region: " + path);
793                             continue;
794                         }
795                         String currency = m.get("iso4217");
796                         s.add(currency);
797                         m = parts.findAttributes("alternate");
798                         String alternate = m == null ? null : (String) m.get("iso4217");
799                         if (alternate != null) {
800                             s.add(alternate);
801                         }
802                         continue;
803                     }
804                 }
805                 m = parts.findAttributes("group");
806                 if (m != null) {
807                     if (group_territory == null) continue;
808                     type = m.get("type");
809                     String contains = m.get("contains");
810                     group_territory.put(
811                             type, new TreeSet(CldrUtility.splitList(contains, ' ', true)));
812                     continue;
813                 }
814                 m = parts.findAttributes("language");
815                 if (m == null) continue;
816                 String language = m.get("type");
817                 String scripts = m.get("scripts");
818                 if (scripts == null) language_scripts.put(language, new TreeSet<String>());
819                 else {
820                     language_scripts.put(
821                             language, new TreeSet<>(CldrUtility.splitList(scripts, ' ', true)));
822                     if (SHOW)
823                         System.out.println(
824                                 getIDAndLocalization(language)
825                                         + "\t\t"
826                                         + getIDAndLocalization(language_scripts.get(language)));
827                 }
828                 String territories = m.get("territories");
829                 if (territories == null) language_territories.put(language, new TreeSet<String>());
830                 else {
831                     language_territories.put(
832                             language, new TreeSet<>(CldrUtility.splitList(territories, ' ', true)));
833                     if (SHOW)
834                         System.out.println(
835                                 getIDAndLocalization(language)
836                                         + "\t\t"
837                                         + getIDAndLocalization(language_territories.get(language)));
838                 }
839             } catch (RuntimeException e) {
840                 throw (IllegalArgumentException)
841                         new IllegalArgumentException("Failure with: " + path).initCause(e);
842             }
843         }
844     }
845 
846     /** Verify that the minimal localizations are present. */
TestMinimalLocalization()847     public void TestMinimalLocalization() throws IOException {
848         if (disableUntilLater("TestMinimalLocalization")) return;
849 
850         boolean testDraft = false;
851         Map<String, Set<String>> language_scripts = new HashMap<>();
852         Map<String, Set<String>> language_territories = new HashMap<>();
853         getSupplementalData(language_scripts, language_territories, null, null, null);
854         LanguageTagParser localIDParser = new LanguageTagParser();
855         // see
856         // http://oss.software.ibm.com/cvs/icu/~checkout~/locale/docs/design/minimal_requirements.htm
857         int[] failureCount = new int[1];
858         int[] warningCount = new int[1];
859         for (Iterator<String> it = languageLocales.iterator(); it.hasNext(); ) {
860             String locale = it.next();
861             if (locale.equals(LocaleNames.ROOT)) continue;
862             // if (!locale.equals("zh_Hant")) continue;
863 
864             CLDRFile item = cldrFactory.make(locale, true);
865             if (!testDraft && item.isDraft()) {
866                 logln(getLocaleAndName(locale) + "\tskipping draft");
867                 continue;
868             }
869             UnicodeSet exemplars = getFixedExemplarSet(locale, item);
870             CLDRFile missing = SimpleFactory.makeFile(locale);
871             failureCount[0] = 0;
872             warningCount[0] = 0;
873             localIDParser.set(locale);
874             String language = localIDParser.getLanguage();
875             logln("Testing: " + locale);
876             // languages
877             Set<String> languages = new TreeSet<>(CldrUtility.MINIMUM_LANGUAGES);
878             languages.add(language);
879             // LANGUAGE_NAME = 0, SCRIPT_NAME = 1, TERRITORY_NAME = 2, VARIANT_NAME = 3,
880             // CURRENCY_NAME = 4, CURRENCY_SYMBOL = 5, TZID = 6
881 
882             checkForItems(item, languages, CLDRFile.LANGUAGE_NAME, missing, failureCount, null);
883 
884             /*
885              * checkTranslatedCode(cldrfile, codes, "currency", "//ldml/numbers/currencies/currency");
886              * checkTranslatedCode(cldrfile, codes, "tzid", "//ldml/dates/timeZoneNames/zone");
887              * checkTranslatedCode(cldrfile, codes, "variant", "//ldml/localeDisplayNames/variants/variant");
888              */
889 
890             Set<String> scripts = new TreeSet<>();
891             scripts.add("Latn");
892             Set<String> others = language_scripts.get(language);
893             if (others != null) scripts.addAll(others);
894             checkForItems(item, scripts, CLDRFile.SCRIPT_NAME, missing, failureCount, null);
895 
896             Set<String> countries = new TreeSet<>(CldrUtility.MINIMUM_TERRITORIES);
897             others = language_territories.get(language);
898             if (others != null) countries.addAll(others);
899             checkForItems(item, countries, CLDRFile.TERRITORY_NAME, missing, failureCount, null);
900 
901             Set<String> currencies = new TreeSet<>();
902             StandardCodes sc = StandardCodes.make();
903             for (Iterator<String> it2 = countries.iterator(); it2.hasNext(); ) {
904                 String country = it2.next();
905                 Set<String> countryCurrencies = sc.getMainCurrencies(country);
906                 if (countryCurrencies == null) {
907                     errln("Internal Error: no currencies for " + country + ", locale: " + locale);
908                 } else {
909                     currencies.addAll(countryCurrencies);
910                 }
911             }
912             checkForItems(item, currencies, CLDRFile.CURRENCY_NAME, missing, failureCount, null);
913             checkForItems(
914                     item, currencies, CLDRFile.CURRENCY_SYMBOL, missing, failureCount, exemplars);
915 
916             // context=format and width=wide; context=stand-alone & width=abbreviated
917             Set<String> months = new TreeSet<>();
918             for (int i = 1; i <= 12; ++i) months.add(i + "");
919             Set<String> days =
920                     new TreeSet<>(
921                             Arrays.asList(
922                                     new String[] {
923                                         "sun", "mon", "tue", "wed", "thu", "fri", "sat"
924                                     }));
925             for (int i = -7; i < 0; ++i) {
926                 checkForItems(item, (i < -4 ? months : days), i, missing, failureCount, null);
927             }
928 
929             String filename = "missing_" + locale + ".xml";
930             if (failureCount[0] > 0 || warningCount[0] > 0) {
931                 PrintWriter out =
932                         FileUtilities.openUTF8Writer(
933                                 CLDRPaths.GEN_DIRECTORY + "missing/", filename);
934                 missing.write(out);
935                 out.close();
936                 // String s = getIDAndLocalization(missing);
937                 String message =
938                         "missing localizations, creating file"
939                                 + getNormalizedPath(CLDRPaths.GEN_DIRECTORY, "missing", filename);
940                 if (failureCount[0] > 0) warnln(getLocaleAndName(locale) + "\t" + message);
941                 else logln(getLocaleAndName(locale) + "\tpossibly " + message);
942             } else {
943                 new File(CLDRPaths.GEN_DIRECTORY + "missing/", filename).delete();
944             }
945         }
946     }
947 
948     /** Internal */
getDateKey(String monthOrDay, String width, String code)949     private String getDateKey(String monthOrDay, String width, String code) {
950         // String context = width.equals("narrow") ? "format" : "stand-alone";
951         return "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/"
952                 + monthOrDay
953                 + "s/"
954                 + monthOrDay
955                 + "Context[@type=\"format\"]/"
956                 + monthOrDay
957                 + "Width[@type=\""
958                 + width
959                 + "\"]/"
960                 + monthOrDay
961                 + "[@type=\""
962                 + code
963                 + "\"]";
964     }
965 
966     /** Internal */
getDateKey(int type, String code)967     private String getDateKey(int type, String code) {
968         // type is 6..4 for months abbrev..narrow, 3..0 for days short..narrow
969         int monthOrDayType = 0, widthType = type;
970         if (type >= 4) {
971             monthOrDayType = 1;
972             widthType -= 4;
973         }
974         return getDateKey(MONTHORDAYS[monthOrDayType], WIDTHS[widthType], code);
975     }
976 
977     /**
978      * @param item
979      * @param codes
980      * @param missing
981      * @param exemplarTest TODO TODO
982      */
checkForItems( CLDRFile item, Set<String> codes, int type, CLDRFile missing, int failureCount[], UnicodeSet exemplarTest)983     private void checkForItems(
984             CLDRFile item,
985             Set<String> codes,
986             int type,
987             CLDRFile missing,
988             int failureCount[],
989             UnicodeSet exemplarTest) {
990         // check codes
991         for (Iterator<String> it2 = codes.iterator(); it2.hasNext(); ) {
992             String code = it2.next();
993             String key;
994             if (type >= 0) {
995                 key = CLDRFile.getKey(type, code);
996             } else {
997                 key = getDateKey(-type - 1, code);
998             }
999             String v = item.getStringValue(key);
1000             String rootValue = resolvedRoot.getStringValue(key);
1001             if (v == null
1002                     || v.equals(rootValue)
1003                             && (exemplarTest == null || !exemplarTest.containsAll(rootValue))) {
1004                 String englishValue = resolvedEnglish.getStringValue(key);
1005                 String transValue;
1006                 if (englishValue != null) {
1007                     transValue = englishValue;
1008                 } else {
1009                     transValue = code;
1010                 }
1011                 missing.add(key, "TODO " + transValue);
1012                 failureCount[0]++;
1013             } else {
1014                 logln("\t" + code + "\t" + v);
1015             }
1016         }
1017     }
1018 
1019     /*
1020      * void showTestStr() {
1021      * LocaleIDParser lparser = new LocaleIDParser();
1022      * Collection s = split(teststr,',', true, new ArrayList());
1023      * for (Iterator it = s.iterator(); it.hasNext();) {
1024      * String item = (String)it.next();
1025      * lparser.set(item.replace('?', '_'));
1026      * String region = lparser.getRegion();
1027      * System.out.print(item.replace('?', '-') + " (" + getLocalization(region) + "), ");
1028      * //System.out.print(getLocalization(region) + ", ");
1029      * }
1030      * }
1031      * static String teststr =
1032      * "en?AG, en?AI, en?AS, en?AU, en?IN, en?BB, en?BE, en?BM, en?BN, en?BS, en?BW, en?BZ, en?CA, en?CK, en?CM, en?DM, en?ER, en?ET, en?FJ, en?FK, en?FM, en?GB, en?GD, en?GH, en?GI, en?GM, en?GU, en?GY, en?HK, en?IE, en?IL, en?IO, en?JM, en?KE, en?KI, en?KN, en?KY, en?LC, en?LR, en?LS, en?MH, en?MP, en?MS, en?MT, en?MU, en?MW, en?NA, en?NF, en?NG, en?NR, en?NU, en?NZ, en?PG, en?PH, en?PK, en?PN, en?PR, en?PW, en?RW, en?SB, en?SC, en?SG, en?SH, en?SL, en?SO, en?SZ, en?TC, en?TK, en?TO, en?TT, en?UG, en?UM, en?US, en?VC, en?VG, en?VI, en?VU, en?WS, en?ZA, en?ZM, en?ZW"
1033      * ;
1034      */
1035 
1036     CldrUtility.CollectionTransform EnglishName =
1037             new CldrUtility.CollectionTransform() {
1038                 @Override
1039                 public Object transform(Object source) {
1040                     // TODO Auto-generated method stub
1041                     return getLocalization(source.toString()) + " (" + source + ")";
1042                 }
1043             };
1044 
1045     CldrUtility.CollectionTransform EnglishCurrencyName =
1046             new CldrUtility.CollectionTransform() {
1047                 @Override
1048                 public Object transform(Object source) {
1049                     if (english == null) english = cldrFactory.make("en", true);
1050                     return english.getName("currency", source.toString()) + " (" + source + ")";
1051                 }
1052             };
1053 
1054     /** Tests that the supplemental data is well-formed. */
TestSupplementalData()1055     public void TestSupplementalData() {
1056         Map<String, Set<String>> language_scripts = new TreeMap<>();
1057         Map<String, Set<String>> language_territories = new TreeMap<>();
1058         Map<String, Set<String>> groups = new TreeMap<>();
1059         Map<String, Set<String>> territory_currencies = new TreeMap<>();
1060         Map<String, Map<String, String>> aliases = new TreeMap<>();
1061         getSupplementalData(
1062                 language_scripts, language_territories, groups, territory_currencies, aliases);
1063         Set<String> sTerritories = new TreeSet<>();
1064         for (Iterator<Set<String>> it = language_territories.values().iterator(); it.hasNext(); ) {
1065             sTerritories.addAll(it.next());
1066         }
1067         StandardCodes sc = StandardCodes.make();
1068         Set<String> fullTerritories = sc.getAvailableCodes("territory");
1069         Set<String> fullLanguages = sc.getAvailableCodes("language");
1070 
1071         Set<String> allLanguages = new TreeSet<>(language_scripts.keySet());
1072         allLanguages.addAll(language_territories.keySet());
1073         for (Iterator<String> it = allLanguages.iterator(); it.hasNext(); ) {
1074             Object language = it.next();
1075             Set<String> scripts = language_scripts.get(language);
1076             Set<String> territories = language_territories.get(language);
1077             logln(
1078                     EnglishName.transform(language)
1079                             + " scripts: "
1080                             + EnglishName.transform(scripts)
1081                             + " territories: "
1082                             + EnglishName.transform(territories));
1083         }
1084 
1085         Map<String, String> changedLanguage = new TreeMap<>();
1086         for (Iterator<String> it = fullLanguages.iterator(); it.hasNext(); ) {
1087             String code = it.next();
1088             List<String> data = sc.getFullData("language", code);
1089             if (data.size() < 3) {
1090                 System.out.println("data problem: " + data);
1091                 continue;
1092             }
1093             String replacement = data.get(2);
1094             if (!replacement.equals("")) {
1095                 if (!replacement.equals("--")) changedLanguage.put(code, replacement);
1096                 continue;
1097             }
1098         }
1099 
1100         // remove private use, deprecated, groups
1101         Set<String> standardTerritories = new TreeSet<>();
1102         Map<String, String> changedTerritory = new TreeMap<>();
1103         for (Iterator<String> it = fullTerritories.iterator(); it.hasNext(); ) {
1104             String code = it.next();
1105             if (code.equals("200")) continue; // || code.equals("YU") || code.equals("PZ")
1106             List<String> data = sc.getFullData("territory", code);
1107             if (data.get(0).equals("PRIVATE USE")) continue;
1108             if (!data.get(2).equals("")) {
1109                 if (!data.get(2).equals("--")) changedTerritory.put(code, data.get(2));
1110                 continue;
1111             }
1112             standardTerritories.add(code);
1113         }
1114         standardTerritories.removeAll(groups.keySet());
1115 
1116         if (!standardTerritories.containsAll(sTerritories)) {
1117             TreeSet<String> extras = new TreeSet<>(sTerritories);
1118             extras.removeAll(standardTerritories);
1119             errln(
1120                     "Supplemental Language Territories contain illegal values: "
1121                             + EnglishName.transform(extras));
1122         }
1123         if (!sTerritories.containsAll(standardTerritories)) {
1124             TreeSet<String> extras = new TreeSet<>(standardTerritories);
1125             extras.removeAll(sTerritories);
1126             warnln("Missing Language Territories: " + EnglishName.transform(extras));
1127         }
1128 
1129         // now test currencies
1130         logln("Check that no illegal territories are used");
1131         if (!standardTerritories.containsAll(territory_currencies.keySet())) {
1132             TreeSet<String> extras = new TreeSet<>(territory_currencies.keySet());
1133             extras.removeAll(fullTerritories);
1134             if (extras.size() != 0)
1135                 errln("Currency info -- Illegal Territories: " + EnglishName.transform(extras));
1136             extras = new TreeSet<>(territory_currencies.keySet());
1137             extras.retainAll(fullTerritories);
1138             extras.removeAll(standardTerritories);
1139             if (extras.size() != 0)
1140                 warnln("Currency info -- Archaic Territories: " + EnglishName.transform(extras));
1141         }
1142         logln("Check that no territories are missing");
1143         if (!territory_currencies.keySet().containsAll(standardTerritories)) {
1144             TreeSet<String> extras = new TreeSet<>(standardTerritories);
1145             extras.removeAll(territory_currencies.keySet());
1146             errln("Currency info -- Missing Territories: " + EnglishName.transform(extras));
1147         }
1148         Set<String> currencies = new TreeSet<>();
1149         for (Iterator<Set<String>> it = territory_currencies.values().iterator(); it.hasNext(); ) {
1150             currencies.addAll(it.next());
1151         }
1152         logln("Check that no illegal currencies are used");
1153         Set<String> legalCurrencies = new TreeSet<>(sc.getAvailableCodes("currency"));
1154         // first remove non-ISO
1155         for (Iterator<String> it = legalCurrencies.iterator(); it.hasNext(); ) {
1156             String code = it.next();
1157             List<String> data = sc.getFullData("currency", code);
1158             if ("X".equals(data.get(3))) it.remove();
1159         }
1160         if (!legalCurrencies.containsAll(currencies)) {
1161             TreeSet<String> extras = new TreeSet<>(currencies);
1162             extras.removeAll(legalCurrencies);
1163             errln("Currency info -- Illegal Currencies: " + EnglishCurrencyName.transform(extras));
1164         }
1165         logln("Check that there are no missing currencies");
1166         if (!currencies.containsAll(legalCurrencies)) {
1167             TreeSet<String> extras = new TreeSet<>(legalCurrencies);
1168             extras.removeAll(currencies);
1169             Map<String, Set<String>> failures = new TreeMap<>();
1170             for (Iterator<String> it = extras.iterator(); it.hasNext(); ) {
1171                 String code = it.next();
1172                 List<String> data = sc.getFullData("currency", code);
1173                 if (data.get(1).equals("ZZ")) continue;
1174                 String type = data.get(3) + "/" + data.get(1);
1175                 Set<String> s = failures.get(type);
1176                 if (s == null) failures.put(type, s = new TreeSet<>());
1177                 s.add(code);
1178             }
1179             for (Iterator<String> it = failures.keySet().iterator(); it.hasNext(); ) {
1180                 String type = it.next();
1181                 Set<String> s = failures.get(type);
1182                 warnln(
1183                         "Currency info -- Missing Currencies: "
1184                                 + type
1185                                 + "\t \u2192 "
1186                                 + EnglishCurrencyName.transform(s));
1187             }
1188         }
1189         logln("Missing English currency names");
1190         for (Iterator<String> it = legalCurrencies.iterator(); it.hasNext(); ) {
1191             String currency = it.next();
1192             String name = english.getName("currency", currency);
1193             if (name == null) {
1194                 String standardName = sc.getFullData("currency", currency).get(0);
1195                 logln("\t\t\t<currency type=\"" + currency + "\">");
1196                 logln("\t\t\t\t<displayName>" + standardName + "</displayName>");
1197                 logln("\t\t\t</currency>");
1198             }
1199         }
1200         logln("Check Aliases");
1201         for (Iterator<String> it = aliases.keySet().iterator(); it.hasNext(); ) {
1202             // the first part of the mapping had better not be in the standardTerritories
1203             String key = it.next();
1204             Map<String, String> submap = aliases.get(key);
1205             if (key.equals("territoryAlias")) {
1206                 checkEqual(key, submap, changedTerritory);
1207             } else if (key.equals("languageAlias")) {
1208                 for (Iterator<String> it2 = submap.keySet().iterator(); it2.hasNext(); ) {
1209                     String k = it2.next();
1210                     String value = submap.get(k);
1211                     if (value.indexOf("_") >= 0) it2.remove();
1212                 }
1213                 checkEqual(key, submap, changedLanguage);
1214             }
1215         }
1216     }
1217 
1218     /** */
checkEqual(String title, Map map1, Map map2)1219     private void checkEqual(String title, Map map1, Map map2) {
1220         Set foo = new TreeSet(map1.keySet());
1221         foo.removeAll(map2.keySet());
1222         if (!foo.isEmpty()) errln("Extraneous Aliases: " + title + "\t" + foo);
1223         foo = new TreeSet(map2.keySet());
1224         foo.removeAll(map1.keySet());
1225         if (!foo.isEmpty()) errln("Missing Aliases: " + title + "\t" + foo);
1226         foo = map2.keySet();
1227         foo.retainAll(map1.keySet());
1228         for (Iterator it = foo.iterator(); it.hasNext(); ) {
1229             Object key = it.next();
1230             Object result1 = map1.get(key);
1231             Object result2 = map2.get(key);
1232             if (!result1.equals(result2))
1233                 errln("Missing Aliases: " + title + "\t" + key + "\t" + result1 + " != " + result2);
1234         }
1235     }
1236 
1237     /** Test that the zone ids are well-formed. */
TestZones()1238     public void TestZones() {
1239         StandardCodes sc = StandardCodes.make();
1240 
1241         Map<String, String> defaultNames = new TreeMap();
1242         Map<String, String> old_new = sc.getZoneLinkold_new();
1243         Set<String> core = sc.getZoneData().keySet();
1244         logln("Checking for collisions with last field");
1245         for (Iterator<String> it = core.iterator(); it.hasNext(); ) {
1246             String currentItem = it.next();
1247             String defaultName = TimezoneFormatter.getFallbackName(currentItem);
1248             String fullName = defaultNames.get(defaultName);
1249             if (fullName == null) defaultNames.put(defaultName, currentItem);
1250             else {
1251                 errln("Collision between: " + currentItem + " AND " + fullName);
1252             }
1253         }
1254 
1255         logln("Checking that all links are TO canonical zones");
1256         Set<String> s = new TreeSet<>(old_new.values());
1257         s.removeAll(core);
1258         if (s.size() != 0) {
1259             errln("Links go TO zones that are not canonical! " + s);
1260         }
1261 
1262         logln("Checking that no links are FROM canonical zones");
1263         s = new TreeSet<>(core);
1264         s.retainAll(old_new.keySet());
1265         if (s.size() != 0) {
1266             errln("Links go FROM zones that are canonical! " + s);
1267         }
1268 
1269         logln("Checking that the zones with rule data are all canonical");
1270         Set<String> zonesWithRules = sc.getZone_rules().keySet();
1271         s.clear();
1272         s.addAll(zonesWithRules);
1273         s.removeAll(core);
1274         if (s.size() != 0) logln("Zones with rules that are not canonical: " + s);
1275 
1276         logln("Checking that the rule data are all canonical");
1277         s.clear();
1278         s.addAll(core);
1279         s.removeAll(zonesWithRules);
1280         s.removeAll(old_new.keySet());
1281         if (s.size() != 0) logln("Canonical zones that don't have rules or links: " + s);
1282 
1283         for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext(); ) {
1284             String oldItem = it.next();
1285             logln("old: " + oldItem + "\tnew: " + old_new.get(oldItem));
1286         }
1287         Map<String, Set<String>> new_old = new TreeMap<>();
1288         for (Iterator<String> it = core.iterator(); it.hasNext(); ) {
1289             new_old.put(it.next(), new TreeSet<String>());
1290         }
1291         for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext(); ) {
1292             String oldItem = it.next();
1293             String newItem = old_new.get(oldItem);
1294             Set<String> oldItems = new_old.get(newItem);
1295             if (oldItems == null) { // try recursing
1296                 logln("!!!!Skipping " + oldItem + " \u2192 " + newItem);
1297                 continue;
1298                 // new_old.put(oldOne, oldItems = new TreeSet());
1299             }
1300             oldItems.add(oldItem);
1301         }
1302         for (Iterator<String> it = new_old.keySet().iterator(); it.hasNext(); ) {
1303             String newOne = it.next();
1304             Set<String> oldItems = new_old.get(newOne);
1305             logln(newOne + "\t" + oldItems);
1306         }
1307     }
1308 
TestNarrowForms()1309     public void TestNarrowForms() {
1310         if (disableUntilLater("TestMinimalLocalization")) return;
1311 
1312         for (Iterator<String> it = locales.iterator(); it.hasNext(); ) {
1313             String locale = it.next();
1314             logln("Testing: " + getLocaleAndName(locale));
1315             BreakIterator bi = BreakIterator.getCharacterInstance(new ULocale(locale));
1316             CLDRFile item = cldrFactory.make(locale, false);
1317             // Walk through all the xpaths, adding to currentValues
1318             // Whenever two values for the same xpath are different, we remove from currentValues,
1319             // and add to okValues
1320             for (Iterator<String> it2 = item.iterator(); it2.hasNext(); ) {
1321                 String xpath = it2.next();
1322                 if (xpath.indexOf("[@type=\"narrow\"]") >= 0) {
1323                     String value = item.getStringValue(xpath);
1324                     // logln("\tTesting: " + value + "\t path: " + xpath);
1325                     int end = getXGraphemeClusterBoundary(bi, value, 0);
1326                     if (end == value.length()) continue;
1327                     errln(
1328                             getLocaleAndName(locale)
1329                                     + "\tillegal narrow value "
1330                                     + value
1331                                     + "\t path: "
1332                                     + xpath);
1333                     surveyInfo.add(
1334                             locale
1335                                     + "\t"
1336                                     + xpath
1337                                     + "\t'"
1338                                     + value
1339                                     + "' is too wide for a \"narrow\" value.");
1340                 }
1341             }
1342         }
1343     }
1344 
1345     static final UnicodeSet XGRAPHEME = new UnicodeSet("[[:mark:][:grapheme_extend:]]");
1346     static final UnicodeSet DIGIT = new UnicodeSet("[:decimal_number:]");
1347 
getXGraphemeClusterBoundary(BreakIterator bi, String value, int start)1348     private int getXGraphemeClusterBoundary(BreakIterator bi, String value, int start) {
1349         if (value.length() <= 1) return 1;
1350 
1351         bi.setText(value);
1352         if (start != 0) bi.preceding(start + 1); // backup one
1353         int current = bi.next();
1354         // link any digits
1355         if (DIGIT.contains(UTF16.charAt(value, current - 1))) {
1356             current = DIGIT.findIn(value, current, true);
1357         }
1358         // continue collecting any additional characters that are M or grapheme extend
1359         return XGRAPHEME.findIn(value, current, true);
1360     }
1361 }
1362