xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowRegionalVariants.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.base.Objects;
4 import com.ibm.icu.impl.Relation;
5 import com.ibm.icu.text.UnicodeSet;
6 import java.io.IOException;
7 import java.io.PrintWriter;
8 import java.util.ArrayList;
9 import java.util.Arrays;
10 import java.util.HashSet;
11 import java.util.LinkedHashMap;
12 import java.util.LinkedHashSet;
13 import java.util.Map.Entry;
14 import java.util.Set;
15 import java.util.TreeMap;
16 import java.util.TreeSet;
17 import org.unicode.cldr.draft.FileUtilities;
18 import org.unicode.cldr.tool.Option.Options;
19 import org.unicode.cldr.util.CLDRConfig;
20 import org.unicode.cldr.util.CLDRFile;
21 import org.unicode.cldr.util.CLDRFile.DraftStatus;
22 import org.unicode.cldr.util.CLDRLocale;
23 import org.unicode.cldr.util.CLDRPaths;
24 import org.unicode.cldr.util.ChainedMap;
25 import org.unicode.cldr.util.ChainedMap.M3;
26 import org.unicode.cldr.util.ChainedMap.M4;
27 import org.unicode.cldr.util.Counter;
28 import org.unicode.cldr.util.Factory;
29 import org.unicode.cldr.util.LanguageTagParser;
30 import org.unicode.cldr.util.PathHeader;
31 import org.unicode.cldr.util.PathHeader.SectionId;
32 import org.unicode.cldr.util.StandardCodes;
33 import org.unicode.cldr.util.SupplementalDataInfo;
34 
35 public class ShowRegionalVariants {
36     private static String MY_DIR;
37 
38     private static final boolean SKIP_SUPPRESSED_PATHS = true;
39 
40     private static final CLDRConfig CONFIG = CLDRConfig.getInstance();
41     private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO =
42             CONFIG.getSupplementalDataInfo();
43     private static final Factory FACTORY = CONFIG.getCldrFactory();
44     private static final CLDRFile ENGLISH = CONFIG.getEnglish();
45     private static final CLDRLocale ROOT = CLDRLocale.getInstance("root");
46     // private static final CLDRLocale en_US_POSIX = CLDRLocale.getInstance("en_US_POSIX");
47     private static final CLDRLocale SWISS_HIGH_GERMAN = CLDRLocale.getInstance("de_CH");
48 
49     static final Options myOptions = new Options();
50 
51     enum MyOptions {
52         targetDir(".*", CLDRPaths.GEN_DIRECTORY + "/regional/", "target output file."),
53         ;
54 
55         // boilderplate
56         final Option option;
57 
MyOptions(String argumentPattern, String defaultArgument, String helpText)58         MyOptions(String argumentPattern, String defaultArgument, String helpText) {
59             option = myOptions.add(this, argumentPattern, defaultArgument, helpText);
60         }
61     }
62 
main(String[] args)63     public static void main(String[] args) throws IOException {
64         myOptions.parse(MyOptions.targetDir, args, true);
65 
66         MY_DIR = MyOptions.targetDir.option.getValue();
67 
68         Set<String> coverageLocales = StandardCodes.make().getLocaleCoverageLocales("cldr");
69         Set<String> dc = new HashSet<>(SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales());
70         Set<String> skipLocales = new HashSet<>(Arrays.asList("root", "en_US_POSIX", "sr_Latn"));
71 
72         Relation<CLDRLocale, CLDRLocale> parentToChildren =
73                 Relation.of(new TreeMap<CLDRLocale, Set<CLDRLocale>>(), TreeSet.class);
74         // first, collect all locales for lookup by parents.
75 
76         for (String locale : FACTORY.getAvailable()) {
77             if (skipLocales.contains(locale.toString()) || dc.contains(locale.toString())) {
78                 continue;
79             }
80             CLDRLocale loc = CLDRLocale.getInstance(locale);
81 
82             if (!coverageLocales.contains(loc.getLanguage())) {
83                 continue;
84             }
85             CLDRLocale parent = null;
86             for (CLDRLocale current = loc; ; current = parent) {
87                 parent = current.getParent();
88                 if (!dc.contains(parent.toString())) { // skip over default content
89                     break;
90                 }
91             }
92             if (ROOT.equals(parent)) {
93                 continue;
94             } else if ("root".equals(parent.toString())) {
95                 throw new IllegalArgumentException("CLDRLocale failure");
96             }
97             parentToChildren.put(parent, loc);
98         }
99 
100         // show inheritance
101         System.out.println("Locale Name\tCode\tRegion\tInherits from\tCode");
102         showInheritance(parentToChildren);
103 
104         // next find out the unique items in children
105         Relation<String, String> valueToAncestors =
106                 Relation.of(new LinkedHashMap<String, Set<String>>(), LinkedHashSet.class);
107 
108         int count = 0;
109 
110         try (PrintWriter grandSummary = FileUtilities.openUTF8Writer(MY_DIR, "GrandSummary.txt");
111                 PrintWriter summary = FileUtilities.openUTF8Writer(MY_DIR, "Summary.txt");
112                 PrintWriter detailFile = FileUtilities.openUTF8Writer(MY_DIR, "details.txt"); ) {
113             grandSummary.println("Parent\tName\tTotal Diff Count\tChildren");
114             summary.println("Parent\tName\tDiff Count\tChild\tChild Name");
115             detailFile.println(
116                     "№\tBase\tParent Locales I\tParent Locales II\tChild Locales\tEnglish value\tParent value I\tParent value II\tChild value\tCorrected Child value\tComments\tFix Parent value?\tSection\tPage\tHeader\tCode");
117             PathHeader.Factory phf = PathHeader.getFactory(ENGLISH);
118             String lastBase = "";
119             for (Entry<CLDRLocale, Set<CLDRLocale>> item : parentToChildren.keyValuesSet()) {
120                 CLDRLocale parent = item.getKey();
121                 String base = parent.getLanguage();
122 
123                 CLDRFile parentFile =
124                         FACTORY.make(parent.toString(), true, DraftStatus.contributed);
125                 M4<PathHeader, String, CLDRLocale, Boolean> pathToValuesToLocales =
126                         ChainedMap.of(
127                                 new TreeMap<PathHeader, Object>(),
128                                 new TreeMap<String, Object>(),
129                                 new TreeMap<CLDRLocale, Object>(),
130                                 Boolean.class);
131 
132                 Counter<CLDRLocale> childDiffs = new Counter<>();
133 
134                 for (CLDRLocale child : item.getValue()) {
135                     // childDiffs.add(child, 0); // make sure it shows up
136                     String childString = child.toString();
137                     CLDRFile childFile = FACTORY.make(childString, false, DraftStatus.contributed);
138                     for (String path : childFile) {
139                         if (SKIP_SUPPRESSED_PATHS) {
140                             if (path.contains("/currency") && path.contains("/symbol")) {
141                                 continue;
142                             }
143                         }
144                         String childValue = childFile.getStringValue(path);
145                         if (childValue == null) {
146                             continue;
147                         }
148                         String parentValue = parentFile.getStringValue(path);
149                         if (parentValue == null) {
150                             parentValue = "∅∅∅";
151                         }
152                         if (!Objects.equal(childValue, parentValue)) {
153                             if (SKIP_SUPPRESSED_PATHS) {
154                                 if ("∅∅∅".equals(childValue) || "∅∅∅".equals(parentValue)) {
155                                     continue; // skip suppressed paths
156                                 }
157                             }
158                             if (parentValue != null) {
159                                 if (child.equals(SWISS_HIGH_GERMAN)) {
160                                     String norm = parentValue.replace("ß", "ss");
161                                     if (childValue.equals(norm)) {
162                                         continue;
163                                     }
164                                 } else if (base.equals("en")) {
165                                     if (sameExceptEnd(childValue, "re", parentValue, "er")
166                                             || sameExceptEnd(
167                                                     childValue, "res", parentValue, "ers")) {
168                                         continue;
169                                     }
170                                 }
171                             }
172                             PathHeader pheader = phf.fromPath(path);
173                             if (SectionId.Special == pheader.getSectionId()) {
174                                 continue;
175                             }
176                             pathToValuesToLocales.put(pheader, childValue, child, Boolean.TRUE);
177                             childDiffs.add(child, 1);
178                         }
179                     }
180                 }
181 
182                 long totalChildDiffs = childDiffs.getTotal();
183                 if (totalChildDiffs == 0) {
184                     continue;
185                 }
186 
187                 if (!base.equals(lastBase)) {
188                     detailFile.println();
189                     //                    if (detailFile != null) {
190                     //                        detailFile.close();
191                     //                    }
192                     //                    detailFile = FileUtilities.openUTF8Writer(MY_DIR,
193                     // "detail-" + base + ".txt");
194                     //
195                     // detailFile.println("Section\tPage\tHeader\tCode\tLocales\tvalue\tParent
196                     // Locales\tvalue\tParent Locales\tvalue");
197                     //                    lastBase = base;
198                 }
199 
200                 grandSummary.println(
201                         parent
202                                 + "\t"
203                                 + ENGLISH.getName(parent.toString())
204                                 + "\t"
205                                 + totalChildDiffs
206                                 + "\t"
207                                 + item.getValue());
208                 for (CLDRLocale s : childDiffs.getKeysetSortedByKey()) {
209                     long childDiffValue = childDiffs.get(s);
210                     if (childDiffValue == 0) {
211                         continue;
212                     }
213                     summary.println(
214                             parent
215                                     + "\t"
216                                     + ENGLISH.getName(parent.toString())
217                                     + "\t"
218                                     + childDiffValue
219                                     + "\t"
220                                     + s
221                                     + "\t"
222                                     + ENGLISH.getName(s.toString()));
223                 }
224 
225                 ArrayList<CLDRFile> parentChain = new ArrayList<>();
226                 for (CLDRLocale current = parent; ; ) {
227                     parentChain.add(FACTORY.make(current.toString(), true));
228                     CLDRLocale grand = current.getParent();
229                     if (ROOT.equals(grand)) {
230                         break;
231                     }
232                     current = grand;
233                 }
234 
235                 for (PathHeader ph : pathToValuesToLocales.keySet()) {
236                     M3<String, CLDRLocale, Boolean> values = pathToValuesToLocales.get(ph);
237                     valueToAncestors.clear();
238                     for (String value : values.keySet()) {
239                         Set<CLDRLocale> childLocales = values.get(value).keySet();
240                         String englishValue = ENGLISH.getStringValue(ph.getOriginalPath());
241                         String originalPath = ph.getOriginalPath();
242                         for (CLDRFile grand : parentChain) {
243                             valueToAncestors.put(
244                                     quote(grand.getStringValue(originalPath)), grand.getLocaleID());
245                         }
246                         Set<Entry<String, Set<String>>> keyValuesSet =
247                                 valueToAncestors.keyValuesSet();
248                         final int countParents = keyValuesSet.size();
249                         if (countParents < 1 || countParents > 2) {
250                             throw new IllegalArgumentException("Too few/many parents");
251                         }
252 
253                         // // №  Base    Parent Locales I    Parent Locales II   Child Locales
254                         // English value   Parent value I  Parent value II Child value
255                         // Corrected Child value   Comments    Fix Parent value?   Section Page
256                         // Header  Code
257 
258                         detailFile.print(++count + "\t" + base);
259 
260                         for (Entry<String, Set<String>> entry : keyValuesSet) {
261                             detailFile.print("\t" + entry.getValue());
262                         }
263                         if (countParents == 1) {
264                             detailFile.print("\t");
265                         }
266                         detailFile.print("" + "\t" + childLocales + "\t" + quote(englishValue));
267                         for (Entry<String, Set<String>> entry : keyValuesSet) {
268                             detailFile.print("\t" + entry.getKey());
269                         }
270                         if (countParents == 1) {
271                             detailFile.print("\t");
272                         }
273                         detailFile.print(
274                                 ""
275                                         + "\t"
276                                         + quote(value)
277                                         + "\t"
278                                         + ""
279                                         + "\t"
280                                         + ""
281                                         + "\t"
282                                         + ""
283                                         + "\t"
284                                         + ph);
285                         detailFile.println();
286                     }
287                 }
288             }
289         }
290         System.out.println("DONE");
291         //        if (detailFile != null) {
292         //            detailFile.close();
293         //        }
294     }
295 
showInheritance(Relation<CLDRLocale, CLDRLocale> parentToChildren)296     private static void showInheritance(Relation<CLDRLocale, CLDRLocale> parentToChildren) {
297         Set<CLDRLocale> values = parentToChildren.values();
298         Set<CLDRLocale> topParents = new TreeSet<>(parentToChildren.keySet());
299         topParents.removeAll(values);
300         showInheritance(topParents, "", parentToChildren);
301     }
302 
showInheritance( Set<CLDRLocale> topParents, String prefix, Relation<CLDRLocale, CLDRLocale> parentToChildren)303     private static void showInheritance(
304             Set<CLDRLocale> topParents,
305             String prefix,
306             Relation<CLDRLocale, CLDRLocale> parentToChildren) {
307         for (CLDRLocale locale : topParents) {
308             String current = nameForLocale(locale) + "\t" + prefix;
309             System.out.println(current);
310             Set<CLDRLocale> newChildren = parentToChildren.get(locale);
311             if (newChildren == null) {
312                 continue;
313             }
314             showInheritance(newChildren, current, parentToChildren);
315         }
316     }
317 
318     static final LikelySubtags LS = new LikelySubtags();
319 
nameForLocale(CLDRLocale key)320     private static String nameForLocale(CLDRLocale key) {
321         String country = key.getCountry();
322         if (country.isEmpty()) {
323             String max = LS.maximize(key.toString());
324             LanguageTagParser ltp = new LanguageTagParser().set(max);
325             country = "(" + ltp.getRegion() + ")";
326         }
327         return ENGLISH.getName(key.toString(), false, CLDRFile.SHORT_ALTS)
328                 + "\t"
329                 + key
330                 + "\t"
331                 + country;
332     }
333 
sameExceptEnd( String childValue, String childEnding, String parentValue, String parentEnding)334     private static boolean sameExceptEnd(
335             String childValue, String childEnding, String parentValue, String parentEnding) {
336         if (childValue.endsWith(childEnding)
337                 && parentValue.endsWith(parentEnding)
338                 && childValue
339                         .substring(0, childValue.length() - childEnding.length())
340                         .equals(
341                                 parentValue.substring(
342                                         0, parentValue.length() - parentEnding.length()))) {
343             return true;
344         }
345         return false;
346     }
347 
348     static final UnicodeSet SPREAD_SHEET_SENSITIVE =
349             new UnicodeSet().add('=').add('+').add('0', '9');
350 
quote(String value)351     private static String quote(String value) {
352         if (value == null || value.isEmpty()) {
353             return "∅∅∅";
354         }
355         int first = value.codePointAt(0);
356         return SPREAD_SHEET_SENSITIVE.contains(first) ? "'" + value : value;
357     }
358 }
359