xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/test/TestSupplementalData.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.test;
2 
3 import com.ibm.icu.impl.Relation;
4 import java.io.BufferedReader;
5 import java.io.IOException;
6 import java.util.Collections;
7 import java.util.HashMap;
8 import java.util.Iterator;
9 import java.util.Map;
10 import java.util.Objects;
11 import java.util.Set;
12 import java.util.TreeMap;
13 import java.util.TreeSet;
14 import java.util.regex.Matcher;
15 import org.unicode.cldr.util.CLDRFile;
16 import org.unicode.cldr.util.CLDRPaths;
17 import org.unicode.cldr.util.CldrUtility;
18 import org.unicode.cldr.util.Factory;
19 import org.unicode.cldr.util.Pair;
20 import org.unicode.cldr.util.PatternCache;
21 import org.unicode.cldr.util.StandardCodes;
22 import org.unicode.cldr.util.SupplementalDataInfo;
23 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData;
24 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
25 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
26 
27 public class TestSupplementalData {
28     static CLDRFile english;
29     private static SupplementalDataInfo supplementalData;
30     private static StandardCodes sc;
31 
main(String[] args)32     public static void main(String[] args) throws IOException {
33         // genData();
34         // if (true) return;
35         Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
36         english = cldrFactory.make("en", true);
37         root = cldrFactory.make("root", true);
38         supplementalData = SupplementalDataInfo.getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY);
39         sc = StandardCodes.make();
40 
41         checkPlurals();
42 
43         System.out.println("Skipped Elements: " + supplementalData.getSkippedElements());
44         checkAgainstLanguageScript();
45         checkTerritoryMapping();
46 
47         checkTelephoneCodeData();
48     }
49 
checkPlurals()50     private static void checkPlurals() {
51         Relation<PluralInfo, String> pluralsToLocale =
52                 Relation.<PluralInfo, String>of(
53                         new HashMap<PluralInfo, Set<String>>(), TreeSet.class);
54         for (String locale : new TreeSet<>(supplementalData.getPluralLocales())) {
55             PluralInfo pluralInfo = supplementalData.getPlurals(locale);
56             System.out.println(locale + ":\t" + pluralInfo);
57             pluralsToLocale.put(pluralInfo, locale);
58         }
59         String locale = "en_US";
60         PluralInfo pluralInfo = supplementalData.getPlurals(locale);
61         System.out.println(locale + ":\t" + pluralInfo);
62 
63         for (PluralInfo pluralInfo2 : pluralsToLocale.keySet()) {
64             System.out.println("Locales: \t" + pluralsToLocale.getAll(pluralInfo2));
65             final Map<Count, String> typeToExamples = pluralInfo2.getCountToStringExamplesMap();
66             for (Count type : typeToExamples.keySet()) {
67                 System.out.println(
68                         "\tPlural Code: \t" + type + " \t=>\t" + typeToExamples.get(type));
69             }
70             System.out.println();
71         }
72     }
73 
checkTelephoneCodeData()74     private static void checkTelephoneCodeData() {
75         System.out.println("==== territories for telephoneCodeData ====");
76         System.out.println(supplementalData.getTerritoriesForTelephoneCodeInfo());
77         System.out.println("==== telephone code data for 001 ====");
78         System.out.println(supplementalData.getTelephoneCodeInfoForTerritory("001"));
79         System.out.println("==== telephone code data for US ====");
80         System.out.println(supplementalData.getTelephoneCodeInfoForTerritory("US"));
81         System.out.println("==== all telephoneCodeData ====");
82         System.out.println(supplementalData.getTerritoryToTelephoneCodeInfo());
83     }
84 
85     static Matcher numericTerritory = PatternCache.get("[0-9]{3}").matcher("");
86     private static CLDRFile root;
87 
checkTerritoryMapping()88     private static void checkTerritoryMapping() {
89         Relation<String, String> alpha3 = supplementalData.getAlpha3TerritoryMapping();
90         Set<String> temp = new TreeSet<>(sc.getAvailableCodes("territory"));
91         for (Iterator<String> it = temp.iterator(); it.hasNext(); ) {
92             String code = it.next();
93             if (numericTerritory.reset(code).matches()) {
94                 it.remove();
95                 continue;
96             }
97             // if (sc.getFullData("territory", code).get(0).equals("PRIVATE USE")) {
98             // it.remove();
99             // continue;
100             // }
101         }
102         showAnyDifferences("alpha3", alpha3.keySet(), "sc", temp);
103     }
104 
showAnyDifferences( String title, Set<String> set, String title2, Set<String> set2)105     private static void showAnyDifferences(
106             String title, Set<String> set, String title2, Set<String> set2) {
107         if (!set.equals(set2)) {
108             showFirstMinusSecond("Failure " + title + "-" + title2 + ": ", set, set2);
109             showFirstMinusSecond("Failure " + title2 + "-" + title + ": ", set2, set);
110         }
111     }
112 
showFirstMinusSecond( String title, Set<String> name, Set<String> availableCodes)113     private static void showFirstMinusSecond(
114             String title, Set<String> name, Set<String> availableCodes) {
115         Set<String> temp = getFirstMinusSecond(name, availableCodes);
116         if (!temp.isEmpty()) {
117             System.out.println(title + getFirstMinusSecond(name, availableCodes));
118         }
119     }
120 
getFirstMinusSecond(Set<String> name, Set<String> availableCodes)121     private static Set<String> getFirstMinusSecond(Set<String> name, Set<String> availableCodes) {
122         Set<String> temp = new TreeSet<>(name);
123         temp.removeAll(availableCodes);
124         return temp;
125     }
126 
checkAgainstLanguageScript()127     static void checkAgainstLanguageScript() {
128         Relation<String, String> otherTerritoryToLanguages =
129                 Relation.<String, String>of(
130                         new TreeMap<String, Set<String>>(), TreeSet.class, null);
131         // get other language data
132         for (String language : sc.getGoodAvailableCodes("language")) {
133             Set<BasicLanguageData> newLanguageData =
134                     supplementalData.getBasicLanguageData(language);
135             if (newLanguageData != null) {
136                 for (BasicLanguageData languageData : newLanguageData) {
137                     Set<String> territories = new TreeSet<>(languageData.getTerritories());
138                     territories.addAll(languageData.getTerritories());
139                     if (territories != null) {
140                         Set<String> scripts = new TreeSet<>(languageData.getScripts());
141                         scripts.addAll(languageData.getScripts());
142                         if (scripts == null || scripts.size() < 2) {
143                             otherTerritoryToLanguages.putAll(territories, language);
144                         } else {
145                             for (String script : scripts) {
146                                 otherTerritoryToLanguages.putAll(
147                                         territories, language + "_" + script);
148                             }
149                         }
150                     }
151                 }
152             }
153         }
154         // compare them, listing differences
155         for (String territory : sc.getGoodAvailableCodes("territory")) {
156             Set<String> languages = supplementalData.getTerritoryToLanguages(territory);
157             Set<String> otherLanguages = otherTerritoryToLanguages.getAll(territory);
158             if (otherLanguages == null) otherLanguages = Collections.emptySet();
159             if (!Objects.equals(languages, otherLanguages)) {
160                 Set<String> languagesLeftover = new TreeSet<>(languages);
161                 languagesLeftover.removeAll(otherLanguages);
162                 Set<String> otherLanguagesLeftover = new TreeSet<>(otherLanguages);
163                 otherLanguagesLeftover.removeAll(languages);
164                 String territoryString = english.getName(CLDRFile.TERRITORY_NAME, territory);
165                 if (otherLanguagesLeftover.size() != 0) {
166                     for (String other : otherLanguagesLeftover) {
167                         String name = english.getName(other);
168                         System.out.println(
169                                 territoryString + "\t" + territory + "\t" + name + "\t" + other);
170                     }
171                 }
172             }
173         }
174     }
175 
176     /**
177      * Temporary function to transform data
178      *
179      * @throws IOException
180      */
genData()181     public static void genData() throws IOException {
182         BufferedReader codes = CldrUtility.getUTF8Data("territory_codes.txt");
183         Set<Pair> sorted = new TreeSet<>();
184         while (true) {
185             String line = codes.readLine();
186             if (line == null) break;
187             line = line.split("#")[0].trim();
188             if (line.length() == 0) continue;
189             String[] sourceValues = line.split("\\s+");
190             String[] values = new String[5];
191             for (int i = 0; i < values.length; ++i) {
192                 if (i >= sourceValues.length || sourceValues[i].equals("-")) values[i] = null;
193                 else values[i] = sourceValues[i];
194             }
195             String alpha2 = values[0];
196             String numeric = values[1];
197             String alpha3 = values[2];
198             String internet = values[3];
199             if (internet != null) {
200                 internet = internet.replace("/", " ");
201             }
202             if (internet != null) internet = internet.toUpperCase();
203             String fips10 = values[4];
204             Pair item =
205                     new Pair(
206                             alpha2,
207                             new Pair(numeric, new Pair(alpha3, new Pair(fips10, internet))));
208             sorted.add(item);
209         }
210         for (Pair item : sorted) {
211             // <territoryCodes type="CM" numeric="120" alpha3="CMR"/>
212             System.out.print("<territoryCodes");
213             Comparable first = item.getFirst();
214             showNonNull("type", first, null);
215             item = (Pair) item.getSecond();
216             showNonNull("numeric", item.getFirst(), null);
217             item = (Pair) item.getSecond();
218             showNonNull("alpha3", item.getFirst(), null);
219             item = (Pair) item.getSecond();
220             showNonNull("fips10", item.getFirst(), first);
221             showNonNull("internet", item.getSecond(), first);
222             System.out.println("/>");
223         }
224         codes.close();
225     }
226 
showNonNull(String title, Object first, Object noDup)227     private static void showNonNull(String title, Object first, Object noDup) {
228         if (first != null && !first.equals(noDup)) {
229             System.out.print(" " + title + "=\"" + first + "\"");
230         }
231     }
232 }
233