xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateEnums.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.collect.ImmutableMap;
4 import com.ibm.icu.impl.Relation;
5 import com.ibm.icu.impl.Utility;
6 import com.ibm.icu.text.Collator;
7 import com.ibm.icu.text.DateFormat;
8 import com.ibm.icu.text.DecimalFormat;
9 import com.ibm.icu.text.NumberFormat;
10 import com.ibm.icu.text.SimpleDateFormat;
11 import com.ibm.icu.text.Transliterator;
12 import com.ibm.icu.util.ULocale;
13 import java.io.BufferedReader;
14 import java.io.IOException;
15 import java.io.PrintWriter;
16 import java.text.ParseException;
17 import java.util.Arrays;
18 import java.util.Collection;
19 import java.util.Comparator;
20 import java.util.Date;
21 import java.util.HashMap;
22 import java.util.Iterator;
23 import java.util.List;
24 import java.util.Locale;
25 import java.util.Map;
26 import java.util.Set;
27 import java.util.TreeMap;
28 import java.util.TreeSet;
29 import org.unicode.cldr.util.CLDRFile;
30 import org.unicode.cldr.util.CLDRPaths;
31 import org.unicode.cldr.util.CldrUtility;
32 import org.unicode.cldr.util.Factory;
33 import org.unicode.cldr.util.Iso639Data;
34 import org.unicode.cldr.util.Iso639Data.Scope;
35 import org.unicode.cldr.util.Iso639Data.Type;
36 import org.unicode.cldr.util.Log;
37 import org.unicode.cldr.util.StandardCodes;
38 import org.unicode.cldr.util.StandardCodes.LstrType;
39 import org.unicode.cldr.util.SupplementalDataInfo;
40 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
41 import org.unicode.cldr.util.Validity;
42 import org.unicode.cldr.util.Validity.Status;
43 import org.unicode.cldr.util.XPathParts;
44 
45 public class GenerateEnums {
46     private static final String CODE_INDENT = "  ";
47 
48     private static final String DATA_INDENT = "    ";
49 
50     private static final String LIST_INDENT = "              ";
51 
52     private StandardCodes sc = StandardCodes.make();
53 
54     private Factory factory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
55 
56     //    private Factory supplementalFactory = Factory.make(
57     //        CLDRPaths.SUPPLEMENTAL_DIRECTORY, ".*");
58 
59     private Set<String> cldrCodes = new TreeSet<>();
60 
61     // private Map enum_canonical = new TreeMap();
62     private Map<String, String> enum_alpha3 = new TreeMap<>();
63 
64     private Map<String, String> enum_UN = new TreeMap<>();
65 
66     // private Map enum_FIPS10 = new TreeMap();
67 
68     // private Map enum_TLD = new TreeMap();
69 
70     private CLDRFile english = factory.make("en", false);
71 
72     private CLDRFile supplementalMetadata = factory.make("supplementalMetadata", false);
73 
74     private CLDRFile supplementalData = factory.make("supplementalData", false);
75 
76     private Relation<String, String> unlimitedCurrencyCodes;
77 
78     private Set<String> scripts = new TreeSet<>();
79 
80     private Set<String> languages = new TreeSet<>();
81 
main(String[] args)82     public static void main(String[] args) throws IOException {
83         GenerateEnums gen = new GenerateEnums();
84         gen.showLanguageInfo();
85         gen.loadCLDRData();
86         gen.showCounts();
87         gen.showCurrencies();
88         gen.showLanguages();
89         gen.showScripts();
90         gen.showRegionCodeInfo();
91         System.out.println("DONE");
92     }
93 
showCounts()94     private void showCounts() {
95         System.out.format(
96                 "Language Subtags: %s" + CldrUtility.LINE_SEPARATOR,
97                 sc.getGoodAvailableCodes("language").size());
98         System.out.format(
99                 "Script Subtags: %s" + CldrUtility.LINE_SEPARATOR,
100                 sc.getGoodAvailableCodes("script").size());
101         System.out.format(
102                 "Territory Subtags: %s" + CldrUtility.LINE_SEPARATOR,
103                 sc.getGoodAvailableCodes("territory").size());
104     }
105 
showCurrencies()106     private void showCurrencies() throws IOException {
107         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/currency_enum.txt");
108         Log.println();
109         Log.println("Currency Data");
110         Log.println();
111         showGeneratedCommentStart(CODE_INDENT);
112         compareSets(
113                 "currencies from sup.data", currencyCodes, "valid currencies", validCurrencyCodes);
114         Set<String> unused = new TreeSet<>(validCurrencyCodes);
115         unused.removeAll(currencyCodes);
116         showCurrencies(currencyCodes);
117         Log.println();
118         showCurrencies(unused);
119         Map<String, String> sorted = new TreeMap<>(Collator.getInstance(ULocale.ENGLISH));
120         for (String code : validCurrencyCodes) {
121             if (unused.contains(code) && !code.equals("CLF"))
122                 continue; // we include CLF for compatibility
123             sorted.put(getName(code), code);
124         }
125         int lineLength =
126                 "  /** Belgian Franc */                                            BEF,".length();
127         for (String name : sorted.keySet()) {
128             printRow(Log.getLog(), sorted.get(name), name, "currency", null, lineLength);
129         }
130         showGeneratedCommentEnd(CODE_INDENT);
131         Log.close();
132     }
133 
getName(String code)134     private String getName(String code) {
135         String result = english.getName(CLDRFile.CURRENCY_NAME, code);
136         if (result == null) {
137             result = code;
138             System.out.println("Failed to find: " + code);
139         }
140         return result;
141     }
142 
showCurrencies(Set<String> both)143     private void showCurrencies(Set<String> both) {
144         // /** Afghani */ AFN,
145         for (Iterator<String> it = both.iterator(); it.hasNext(); ) {
146             String code = it.next();
147             String englishName = getName(code);
148             if (englishName == null) {}
149             Set<String> regions = unlimitedCurrencyCodes.getAll(code);
150             System.out.println(
151                     code
152                             + "\t"
153                             + englishName
154                             + "\t"
155                             + (validCurrencyCodes.contains(code)
156                                     ? currencyCodes.contains(code) ? "" : "valid-only"
157                                     : "supp-only")
158                             + "\t"
159                             + (regions != null ? regions : "unused"));
160         }
161     }
162 
showScripts()163     private void showScripts() throws IOException {
164         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/script_enum.txt");
165         Log.println();
166         Log.println("Script Data");
167         Log.println();
168 
169         showGeneratedCommentStart(CODE_INDENT);
170         Map<String, String> code_replacements = new TreeMap<>();
171         int len = "  /** Arabic */                                        Arab,".length();
172         for (Iterator<String> it = scripts.iterator(); it.hasNext(); ) {
173             String code = it.next();
174             String englishName = english.getName(CLDRFile.SCRIPT_NAME, code);
175             if (englishName == null) continue;
176             printRow(Log.getLog(), code, null, "script", code_replacements, len);
177             // Log.println(" /**" + englishName + "*/ " + code + ",");
178         }
179         showGeneratedCommentEnd(CODE_INDENT);
180         Log.close();
181     }
182 
showLanguageInfo()183     private void showLanguageInfo() throws IOException {
184         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/language_info.txt");
185         System.out.println();
186         System.out.println("Language Converter");
187         System.out.println();
188         StringBuilder buffer = new StringBuilder();
189         // language information
190         for (String language : sc.getAvailableCodes("language")) {
191             Scope scope = Iso639Data.getScope(language);
192             if (scope == Scope.PrivateUse) {
193                 continue;
194             }
195             buffer.setLength(0);
196             String alpha3 = Iso639Data.toAlpha3(language);
197             if (alpha3 != null) {
198                 buffer.append(".add(\"" + alpha3 + "\")");
199             }
200             Type type = Iso639Data.getType(language);
201             if (type != Type.Living) {
202                 buffer.append(".add(Type." + type + ")");
203             }
204             if (scope != Scope.Individual) {
205                 buffer.append(".add(Scope." + scope + ")");
206             }
207             if (buffer.length() > 0) {
208                 Log.println("\t\tto(\"" + language + "\")" + buffer + ";");
209             }
210         }
211         Log.close();
212     }
213 
showLanguages()214     private void showLanguages() throws IOException {
215         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/language_enum.txt");
216         System.out.println();
217         System.out.println("Language Data");
218         System.out.println();
219 
220         for (Iterator<String> it = languages.iterator(); it.hasNext(); ) {
221             String code = it.next();
222             String englishName = english.getName(CLDRFile.LANGUAGE_NAME, code);
223             if (englishName == null) continue;
224             System.out.println("     /**" + englishName + "*/    " + code + ",");
225         }
226 
227         showGeneratedCommentStart(LIST_INDENT);
228         /*
229          * get the form: "anp frr frs gsw krl zxx aa ab ace ach ada ady ae af afa
230          * afh" + " ain ak akk ale alg alt am an ang apa ar arc arn arp art arw" + "
231          * as ast ath aus av awa ay az ba bad bai bal ban bas bat be"
232          */
233         StringBuffer buffer = new StringBuffer();
234         int lineLimit = 70 - LIST_INDENT.length();
235         char lastChar = 0;
236         for (Iterator<String> it = languages.iterator(); it.hasNext(); ) {
237             String code = it.next();
238             if (code.equals("root")) {
239                 continue;
240             }
241             if (code.charAt(0) != lastChar || buffer.length() + 1 + code.length() > lineLimit) {
242                 if (buffer.length() != 0) Log.println(LIST_INDENT + "+ \"" + buffer + "\"");
243                 buffer.setLength(0);
244                 lastChar = code.charAt(0);
245             }
246             buffer.append(code).append(' ');
247         }
248         // remove the very last space
249         if (buffer.charAt(buffer.length() - 1) == ' ') {
250             buffer.setLength(buffer.length() - 1);
251         }
252         Log.println(LIST_INDENT + "+ \"" + buffer + "\"");
253 
254         showGeneratedCommentEnd(LIST_INDENT);
255         Log.close();
256     }
257 
258     @SuppressWarnings("rawtypes")
join(Collection collection, String separator)259     private Object join(Collection collection, String separator) {
260         if (collection == null) return null;
261         StringBuffer result = new StringBuffer();
262         boolean first = true;
263         for (Iterator it = collection.iterator(); it.hasNext(); ) {
264             if (first) first = false;
265             else result.append(separator);
266             result.append(it.next());
267         }
268         return result.toString();
269     }
270 
271     static NumberFormat threeDigit = new DecimalFormat("000");
272 
loadCLDRData()273     public void loadCLDRData() throws IOException {
274         // BufferedReader codes = Utility.getUTF8Data("territory_codes.txt");
275         // while (true) {
276         // String line = codes.readLine();
277         // if (line == null)
278         // break;
279         // line = line.split("#")[0].trim();
280         // if (line.length() == 0)
281         // continue;
282         // String[] sourceValues = line.split("\\s+");
283         // String[] values = new String[5];
284         // for (int i = 0; i < values.length; ++i) {
285         // if (i >= sourceValues.length || sourceValues[i].equals("-"))
286         // values[i] = null;
287         // else
288         // values[i] = sourceValues[i];
289         // }
290         // String alpha2 = values[0];
291         // cldrCodes.add(alpha2);
292         // if (isPrivateUseRegion(alpha2))
293         // continue;
294         // String numeric = values[1];
295         // String alpha3 = values[2];
296         // String internet = values[3];
297         // if (internet != null)
298         // internet = internet.toUpperCase();
299         // String fips10 = values[4];
300         // String enumValue = enumName(alpha2);
301         // enum_alpha3.put(enumValue, alpha3);
302         // enum_UN.put(enumValue, numeric);
303         // enum_FIPS10.put(enumValue, fips10);
304         // enum_TLD.put(enumValue, internet);
305         // }
306         // codes.close();
307         DecimalFormat threeDigits = new DecimalFormat("000");
308         for (String value : supplementalDataInfo.getNumericTerritoryMapping().keySet()) {
309             cldrCodes.add(value);
310             if (isPrivateUseRegion(value)) continue;
311             enum_UN.put(
312                     value,
313                     threeDigits.format(
314                             supplementalDataInfo
315                                     .getNumericTerritoryMapping()
316                                     .getAll(value)
317                                     .iterator()
318                                     .next()));
319         }
320         for (String value : supplementalDataInfo.getAlpha3TerritoryMapping().keySet()) {
321             cldrCodes.add(value);
322             if (isPrivateUseRegion(value)) continue;
323             enum_alpha3.put(
324                     value,
325                     supplementalDataInfo
326                             .getAlpha3TerritoryMapping()
327                             .getAll(value)
328                             .iterator()
329                             .next());
330         }
331 
332         BufferedReader codes = CldrUtility.getUTF8Data("UnMacroRegions.txt");
333         Map<String, String> macro_name = new TreeMap<>();
334         while (true) {
335             String line = codes.readLine();
336             if (line == null) break;
337             line = line.trim();
338             if (line.length() == 0) continue;
339             if (line.charAt(0) < '0' || line.charAt(0) > '9') {
340                 System.out.println("GenerateEnums: Skipping: " + line);
341                 continue;
342             }
343             String[] sourceValues = line.split("\\s+");
344             int code = Integer.parseInt(sourceValues[0]);
345             String codeName = threeDigit.format(code);
346             macro_name.put(codeName, line);
347         }
348         codes.close();
349         //        String values =
350         // supplementalDataInfo.getValidityInfo().get("$territory").get1().trim();
351         Map<Status, Set<String>> validRegions =
352                 Validity.getInstance().getStatusToCodes(LstrType.region);
353         Set<String> regions = new TreeSet<>();
354         regions.addAll(validRegions.get(Status.regular));
355         regions.addAll(validRegions.get(Status.macroregion));
356         //        String[] validTerritories = values.split("\\s+");
357         //        for (int i = 0; i < validTerritories.length; ++i) {
358         for (String region : regions) {
359             if (corrigendum.contains(region)) {
360                 System.out.println("Skipping " + region + "\t\t" + getEnglishName(region));
361                 continue; // exception, corrigendum
362             }
363             if (isPrivateUseRegion(region)) continue;
364             if (region.charAt(0) < 'A') { // numeric
365                 enum_UN.put(enumName(region), region);
366                 cldrCodes.add(region);
367             } else {
368                 if (enum_alpha3.get(region) == null) {
369                     System.out.println("Missing alpha3 for: " + region);
370                 }
371             }
372         }
373         checkDuplicates(enum_UN);
374         checkDuplicates(enum_alpha3);
375         Set<String> availableCodes = new TreeSet<>(sc.getAvailableCodes("territory"));
376         compareSets("RFC 4646", availableCodes, "CLDR", cldrCodes);
377         Set<String> missing = new TreeSet<>(availableCodes);
378         missing.removeAll(cldrCodes);
379         // don't care list: "003"
380         // missing.remove("003");
381         // missing.remove("172");
382         // Remove the following. They don't have numeric or alpha3 codes so they can't be found.
383         missing.remove("EA");
384         missing.remove("EZ");
385         missing.remove("IC");
386         missing.remove("QU");
387         missing.remove("UN");
388         missing.remove("CQ");
389 
390         if (missing.size() != 0) {
391             throw new IllegalArgumentException("Codes in Registry but not in CLDR: " + missing);
392         }
393 
394         Set<String> UNValues = new TreeSet<>(enum_UN.values());
395 
396         for (Iterator<String> it = macro_name.keySet().iterator(); it.hasNext(); ) {
397             Object key = it.next();
398             Object value = macro_name.get(key);
399             if (!UNValues.contains(key)) {
400                 System.out.println("Macro " + key + "\t" + value);
401             }
402         }
403 
404         for (Iterator<String> it = enum_UN.keySet().iterator(); it.hasNext(); ) {
405             String region = it.next();
406             String englishName = getEnglishName(region);
407             if (englishName == null) {
408                 englishName = "NULL"; // for debugging\
409             }
410             String rfcName = getRFC3066Name(region);
411             if (!englishName.equals(rfcName)) {
412                 System.out.println(
413                         "Different names: {\""
414                                 + region
415                                 + "\",\t\""
416                                 + englishName
417                                 + " ("
418                                 + rfcName
419                                 + ")\"},");
420             }
421         }
422 
423         getContainment();
424 
425         DateFormat[] simpleFormats = {
426             new SimpleDateFormat("yyyy-MM-dd"),
427             new SimpleDateFormat("yyyy-MM"),
428             new SimpleDateFormat("yyyy"),
429         };
430         Date today = new Date();
431         Date longAgo = new Date(1000 - 1900, 1, 1);
432         currencyCodes = new TreeSet<>();
433         unlimitedCurrencyCodes =
434                 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class, null);
435         for (Iterator<String> it =
436                         supplementalData.iterator("//supplementalData/currencyData/region");
437                 it.hasNext(); ) {
438             String path = it.next();
439             XPathParts parts = XPathParts.getFrozenInstance(path);
440             String region = parts.findAttributeValue("region", "iso3166");
441             String code = parts.findAttributeValue("currency", "iso4217");
442             String to = parts.findAttributeValue("currency", "to");
443             main:
444             if (to == null) {
445                 unlimitedCurrencyCodes.put(code, region);
446             } else {
447                 for (int i = 0; i < simpleFormats.length; ++i) {
448                     try {
449                         Date foo = simpleFormats[i].parse(to);
450                         if (foo.compareTo(longAgo) < 0) {
451                             System.out.println("Date Error: can't parse " + to);
452                             break main;
453                         } else if (foo.compareTo(today) >= 0) {
454                             unlimitedCurrencyCodes.put(code, region);
455                         }
456                         break main;
457                     } catch (ParseException e) {
458                     }
459                 }
460                 System.out.println("Date Error: can't parse " + to);
461             }
462             currencyCodes.add(code);
463         }
464 
465         validCurrencyCodes = new TreeSet<>();
466         Set<String> bcp47CurrencyCodes = supplementalDataInfo.getBcp47Keys().getAll("cu");
467         for (String code : bcp47CurrencyCodes) {
468             validCurrencyCodes.add(code.toUpperCase());
469         }
470 
471         scripts = supplementalDataInfo.getCLDRScriptCodes();
472         languages = supplementalDataInfo.getCLDRLanguageCodes();
473 
474         // Set availableCodes = new TreeSet(sc.getAvailableCodes("territory"));
475         // availableCodes.add("003");
476         // for (Iterator it = availableCodes.iterator(); it.hasNext();) {
477         // String code = (String) next())
478         // canonicalRegion_UN.put(alpha2, numeric);
479         // }
480 
481         // for (Iterator it = availableCodes.iterator(); it.hasNext();) {
482         // String code = (String)it.next();
483         // RegionCode region = map_id_canonical_RFC.get(code);
484         // if (region != null) continue; // skip others
485         // region = new RegionCode(code);
486         // map_id_canonical_RFC.put(code,region);
487         // map_canonical_id_RFC.put(region,code);
488         // if ("A".compareTo(code) > 0) {
489         // map_id_canonical_UN.put(code,region);
490         // map_canonical_id_UN.put(region,code);
491         // } else {
492         // map_id_canonical_A2.put(code,region);
493         // map_canonical_id_A2.put(region,code);
494         // }
495         // }
496         // for (Iterator it = goodAvailableCodes.iterator(); it.hasNext();) {
497         // String code = (String)it.next();
498         // good.add(getInstance(code));
499         // }
500     }
501 
getContainment()502     public void getContainment() {
503         // <group type="001" contains="002 009 019 142 150"/> <!--World -->
504         for (Iterator<String> it =
505                         supplementalData.iterator("//supplementalData/territoryContainment/group");
506                 it.hasNext(); ) {
507             String path = it.next();
508             String fullPath = supplementalData.getFullXPath(path);
509             XPathParts parts = XPathParts.getFrozenInstance(fullPath);
510             String container = parts.getAttributeValue(parts.size() - 1, "type");
511             final String containedString = parts.getAttributeValue(-1, "contains");
512             List<String> contained = Arrays.asList(containedString.trim().split("\\s+"));
513             containment.put(container, contained);
514         }
515         // fix recursiveContainment.
516         // for (String region : (Collection<String>)containment.keySet()) {
517         // Set temp = new LinkedHashSet();
518         // addContains(region, temp);
519         // recursiveContainment.put(region, temp);
520         // }
521         Set<String> startingFromWorld = new TreeSet<>();
522         addContains("001", startingFromWorld);
523         compareSets("World", startingFromWorld, "CLDR", cldrCodes);
524         // generateContains();
525     }
526 
generateContains()527     private void generateContains() {
528 
529         for (String region : containment.keySet()) {
530             List<String> plain = containment.get(region);
531             // Collection recursive = (Collection)recursiveContainment.get(region);
532 
533             String setAsString = CldrUtility.join(plain, " ");
534             // String setAsString2 = recursive.equals(plain) ? "" : ", " +
535             // Utility.join(recursive," ");
536             Log.println("\t\tadd(\"" + region + "\", \"" + setAsString + "\");");
537         }
538     }
539 
540     Map<String, List<String>> containment = new TreeMap<>();
541 
542     // Map recursiveContainment = new TreeMap();
543 
addContains(String string, Set<String> startingFromWorld)544     private void addContains(String string, Set<String> startingFromWorld) {
545         startingFromWorld.add(string);
546         List<String> contained = containment.get(string);
547         if (contained == null) return;
548         for (Iterator<String> it = contained.iterator(); it.hasNext(); ) {
549             addContains(it.next(), startingFromWorld);
550         }
551     }
552 
553     @SuppressWarnings("rawtypes")
compareSets(String name, Set availableCodes, String name2, Set cldrCodes)554     private void compareSets(String name, Set availableCodes, String name2, Set cldrCodes) {
555         Set temp = new TreeSet();
556         temp.addAll(availableCodes);
557         temp.removeAll(cldrCodes);
558         System.out.println("In " + name + " but not in " + name2 + ": " + temp);
559         temp.clear();
560         temp.addAll(cldrCodes);
561         temp.removeAll(availableCodes);
562         System.out.println("Not in " + name + " but in " + name2 + ": " + temp);
563     }
564 
565     @SuppressWarnings("rawtypes")
checkDuplicates(Map m)566     private void checkDuplicates(Map m) {
567         Map backMap = new HashMap();
568         for (Iterator it = m.keySet().iterator(); it.hasNext(); ) {
569             Object key = it.next();
570             Object o = m.get(key);
571             Object otherKey = backMap.get(o);
572             if (otherKey != null)
573                 System.out.println("Collision with: " + key + ",\t" + otherKey + ",\t" + o);
574             else backMap.put(o, key);
575         }
576     }
577 
578     Set<String> corrigendum =
579             new TreeSet<>(Arrays.asList(new String[] {"QE", "833", "830", "172"})); // 003, 419
580 
581     private ImmutableMap<String, String> extraNames =
582             ImmutableMap.<String, String>builder()
583                     .put("BU", "Burma")
584                     .put("TP", "East Timor")
585                     .put("YU", "Yugoslavia")
586                     .put("ZR", "Zaire")
587                     .put("CD", "Congo (Kinshasa, Democratic Republic)")
588                     .put("CI", "Ivory Coast (Cote d'Ivoire)")
589                     .put("FM", "Micronesia (Federated States)")
590                     .put("TL", "East Timor (Timor-Leste)")
591                     // .put("155", "Western Europe")
592                     .build();
593 
594     private Set<String> currencyCodes;
595 
596     private Set<String> validCurrencyCodes;
597 
598     static SupplementalDataInfo supplementalDataInfo =
599             SupplementalDataInfo.getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY);
600 
601     /**
602      * Get the RegionCode Enum
603      *
604      * @throws IOException
605      */
showRegionCodeInfo()606     private void showRegionCodeInfo() throws IOException {
607         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_enum.txt");
608         System.out.println();
609         System.out.println("Data for RegionCode");
610         System.out.println();
611         showGeneratedCommentStart(CODE_INDENT);
612 
613         Set<String> reordered = new TreeSet<>(new LengthFirstComparator());
614         reordered.addAll(enum_UN.keySet());
615         Map<String, String> code_replacements = new TreeMap<>();
616         int len = "  /** Polynesia */                                    UN061,".length();
617         for (Iterator<String> it = reordered.iterator(); it.hasNext(); ) {
618             String region = it.next();
619             printRow(Log.getLog(), region, null, "territory", code_replacements, len);
620         }
621         showGeneratedCommentEnd(CODE_INDENT);
622         Log.close();
623 
624         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_info.txt");
625         Log.println();
626         Log.println("Data for ISO Region Codes");
627         Log.println();
628         for (String territory : supplementalDataInfo.getTerritoriesWithPopulationData()) {
629             if (territory.equals("ZZ")) {
630                 continue;
631             }
632             PopulationData popData = supplementalDataInfo.getPopulationDataForTerritory(territory);
633             // to("ak").add(Scope.Macrolanguage).add("aka");
634             Log.formatln(
635                     "    addRegion(RegionCode.%s, %s, %s, %s) // %s",
636                     territory,
637                     format(popData.getPopulation()),
638                     format(popData.getLiteratePopulation() / popData.getPopulation()),
639                     format(popData.getGdp()),
640                     english.getName("territory", territory));
641             // remove all the ISO 639-3 until they are part of BCP 47
642             // we need to remove in earlier pass so we have the count
643             Set<String> languages = new TreeSet<>();
644             for (String language :
645                     supplementalDataInfo.getLanguagesForTerritoryWithPopulationData(territory)) {
646                 if (Iso639Data.getSource(language) == Iso639Data.Source.ISO_639_3) {
647                     continue;
648                 }
649                 popData =
650                         supplementalDataInfo.getLanguageAndTerritoryPopulationData(
651                                 language, territory);
652                 if (popData.getPopulation() == 0
653                         || Double.isNaN(
654                                 popData.getLiteratePopulation() / popData.getPopulation())) {
655                     continue;
656                 }
657                 languages.add(language);
658             }
659             int count = languages.size();
660             for (String language : languages) {
661                 --count; // we need to know the last one
662                 popData =
663                         supplementalDataInfo.getLanguageAndTerritoryPopulationData(
664                                 language, territory);
665                 Log.formatln(
666                         "    .addLanguage(\"%s\", %s, %s)%s // %s",
667                         language,
668                         format(popData.getPopulation()),
669                         format(popData.getLiteratePopulation() / popData.getPopulation()),
670                         (count == 0 ? ";" : ""),
671                         english.getName(language));
672             }
673         }
674         Log.close();
675 
676         Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_converters.txt");
677         Log.println();
678         Log.println("Data for ISO Region Codes");
679         Log.println();
680         showGeneratedCommentStart(DATA_INDENT);
681         // addInfo(RegionCode.US, 840, "USA", "US", "US/XX", ....); ... are
682         // containees
683         reordered = new TreeSet<>(new DeprecatedAndLengthFirstComparator("territory"));
684         reordered.addAll(enum_UN.keySet());
685         for (Iterator<String> it = reordered.iterator(); it.hasNext(); ) {
686             String region = it.next();
687             // String cldrName = region.length() < 5 ? region : region.substring(2); // fix
688             // UN
689             // name
690             // int un = Integer.parseInt((String) enum_UN.get(region)); // get around
691             // dumb octal
692             // syntax
693             String isoCode = enum_alpha3.get(region);
694             if (isoCode == null) continue;
695             Log.println(
696                     DATA_INDENT + "add(" + quote(isoCode) + ", " + "RegionCode." + region + ");");
697         }
698         doAliases(code_replacements);
699         showGeneratedCommentEnd(DATA_INDENT);
700         Log.println();
701         Log.println("Data for M.49 Region Codes");
702         Log.println();
703         showGeneratedCommentStart(DATA_INDENT);
704 
705         for (Iterator<String> it = reordered.iterator(); it.hasNext(); ) {
706             String region = it.next();
707             // String cldrName = region.length() < 5 ? region : region.substring(2); // fix
708             // UN
709             // name
710             int un = Integer.parseInt(enum_UN.get(region), 10); // get
711             // around
712             // dumb
713             // octal
714             // syntax
715             Log.println(DATA_INDENT + "add(" + un + ", " + "RegionCode." + region + ");");
716         }
717         doAliases(code_replacements);
718 
719         System.out.println("Plain list");
720         for (Iterator<String> it = reordered.iterator(); it.hasNext(); ) {
721             String region = it.next();
722             // String cldrName = region.length() < 5 ? region : region.substring(2); // fix
723             // UN
724             // name
725             String newCode = code_replacements.get(region);
726             if (newCode != null) continue;
727 
728             int un = Integer.parseInt(enum_UN.get(region), 10); // get
729             // around
730             // dumb
731             // octal
732             // syntax
733             System.out.println(un + "\t" + region + "\t" + english.getName("territory", region));
734         }
735 
736         showGeneratedCommentEnd(DATA_INDENT);
737 
738         getContainment();
739         Log.close();
740     }
741 
742     static NumberFormat nf = NumberFormat.getInstance(Locale.ENGLISH);
743 
744     static NumberFormat sf = NumberFormat.getScientificInstance(Locale.ENGLISH);
745 
746     static {
747         nf.setMaximumFractionDigits(3);
748         sf.setMaximumFractionDigits(3);
749         nf.setGroupingUsed(false);
750     }
751 
format(double value)752     private String format(double value) {
753         double newValue = CldrUtility.roundToDecimals(value, 3);
754         String option1 = nf.format(newValue);
755         String option2 = sf.format(value);
756         return option1.length() <= option2.length() ? option1 : option2;
757     }
758 
doAliases(Map<String, String> code_replacements)759     private void doAliases(Map<String, String> code_replacements) {
760         for (String code : code_replacements.keySet()) {
761             String newCode = code_replacements.get(code);
762             if (newCode.length() == 0) newCode = "ZZ";
763             Log.println(
764                     DATA_INDENT + "addAlias(" + "RegionCode." + code + ", \"" + newCode + "\");");
765         }
766     }
767 
showGeneratedCommentEnd(String indent)768     private void showGeneratedCommentEnd(String indent) {
769         Log.println(indent + "/* End of generated code. */");
770     }
771 
showGeneratedCommentStart(String indent)772     private void showGeneratedCommentStart(String indent) {
773         Log.println(indent + "/*");
774         Log.println(indent + " * The following information is generated from a tool,");
775         Log.println(indent + " * as described on");
776         Log.println(indent + " * http://wiki/Main/InternationalIdentifierUpdates.");
777         Log.println(indent + " * Do not edit manually.");
778         Log.println(indent + " * Start of generated code.");
779         Log.println(indent + " */");
780     }
781 
782     public static final class LengthFirstComparator implements Comparator<Object> {
783         @Override
compare(Object a, Object b)784         public int compare(Object a, Object b) {
785             String as = a.toString();
786             String bs = b.toString();
787             if (as.length() < bs.length()) return -1;
788             if (as.length() > bs.length()) return 1;
789             return as.compareTo(bs);
790         }
791     }
792 
793     public final class DeprecatedAndLengthFirstComparator implements Comparator<Object> {
794         String type;
795 
DeprecatedAndLengthFirstComparator(String type)796         DeprecatedAndLengthFirstComparator(String type) {
797             this.type = type;
798         }
799 
800         @Override
compare(Object a, Object b)801         public int compare(Object a, Object b) {
802             String as = a.toString();
803             String bs = b.toString();
804             String ar = getDeprecatedReplacement(type, as);
805             String br = getDeprecatedReplacement(type, bs);
806             // put the deprecated ones first, eg those that aren't null
807             if (ar != null) {
808                 if (br == null) return -1;
809             }
810             if (br != null) {
811                 if (ar == null) return 1;
812             }
813             // now check the length
814             if (as.length() < bs.length()) return -1;
815             if (as.length() > bs.length()) return 1;
816             return as.compareTo(bs);
817         }
818     }
819 
820     /**
821      * Returns null if not deprecated, otherwise "" if there is no replacement, otherwise the
822      * replacement.
823      *
824      * @return
825      */
getDeprecatedReplacement(String type, String cldrTypeValue)826     public String getDeprecatedReplacement(String type, String cldrTypeValue) {
827         if (type.equals("currency")) {
828             return null;
829         }
830         String path =
831                 supplementalMetadata.getFullXPath(
832                         "//supplementalData/metadata/alias/"
833                                 + type
834                                 + "Alias[@type=\""
835                                 + cldrTypeValue
836                                 + "\"]",
837                         true);
838         if (path == null) {
839             return null;
840         }
841         XPathParts parts = XPathParts.getFrozenInstance(path);
842         String replacement = parts.findAttributeValue("territoryAlias", "replacement");
843         if (replacement == null) {
844             return "";
845         }
846         return replacement;
847     }
848 
849     static Transliterator doFallbacks =
850             Transliterator.createFromRules("id", "[’ʻ] > ''; ", Transliterator.FORWARD);
851 
printRow( PrintWriter out, String codeName, String englishName, String type, Map<String, String> code_replacements, int lineLength)852     private void printRow(
853             PrintWriter out,
854             String codeName,
855             String englishName,
856             String type,
857             Map<String, String> code_replacements,
858             int lineLength) {
859         // int numeric = Integer.parseInt((String) enum_UN.get(codeName));
860         // String alpha3 = (String) enum_alpha3.get(codeName);
861         String cldrName = codeName.length() < 5 ? codeName : codeName.substring(2); // fix
862         // UN
863         // name
864         String replacement = getDeprecatedReplacement(type, cldrName);
865 
866         String resolvedEnglishName =
867                 englishName != null
868                         ? englishName
869                         : type.equals("territory")
870                                 ? getEnglishName(codeName)
871                                 : type.equals("currency")
872                                         ? getName(codeName)
873                                         : english.getName(CLDRFile.SCRIPT_NAME, codeName);
874         resolvedEnglishName = doFallbacks.transliterate(resolvedEnglishName);
875 
876         String prefix = CODE_INDENT + "/** " + resolvedEnglishName; // + " - " +
877         // threeDigit.format(numeric);
878         String printedCodeName = codeName;
879         if (replacement != null) {
880             code_replacements.put(codeName, replacement);
881             out.println(prefix);
882             prefix =
883                     CODE_INDENT
884                             + " * @deprecated"
885                             + (replacement.length() == 0 ? "" : " see " + replacement);
886             printedCodeName = "@Deprecated " + printedCodeName;
887         }
888         prefix += " */";
889 
890         if (codeName.equals("UN001")) {
891             out.println();
892         }
893         if (prefix.length() > lineLength - (printedCodeName.length() + 1)) {
894             // break at last space
895             int lastFit = prefix.lastIndexOf(' ', lineLength - (printedCodeName.length() + 1) - 2);
896             out.println(prefix.substring(0, lastFit));
897             prefix = CODE_INDENT + " *" + prefix.substring(lastFit);
898         }
899         out.print(prefix);
900         out.print(
901                 Utility.repeat(
902                         " ", (lineLength - (prefix.length() + printedCodeName.length() + 1))));
903         out.println(printedCodeName + ",");
904     }
905 
getEnglishName(String codeName)906     private String getEnglishName(String codeName) {
907         if (codeName.length() > 3) codeName = codeName.substring(2); // fix UN name
908         String name = extraNames.get(codeName);
909         if (name != null) return name;
910         name = english.getName(CLDRFile.TERRITORY_NAME, codeName);
911         if (name != null) return name;
912         return codeName;
913     }
914 
getRFC3066Name(String codeName)915     private String getRFC3066Name(String codeName) {
916         if (codeName.length() > 2) codeName = codeName.substring(2); // fix UN name
917         List<String> list = sc.getFullData("territory", codeName);
918         if (list == null) return null;
919         return list.get(0);
920     }
921 
enumName(String codeName)922     private String enumName(String codeName) {
923         return codeName.charAt(0) < 'A' ? "UN" + codeName : codeName;
924     }
925 
quote(Object input)926     static String quote(Object input) {
927         if (input != null) return '"' + input.toString().trim() + '"';
928         return null;
929     }
930 
isPrivateUseRegion(String codeName)931     static boolean isPrivateUseRegion(String codeName) {
932         // AA, QM..QZ, XA..XZ, ZZ - CLDR codes
933         if (codeName.equals("EU") || codeName.equals("QO") || codeName.equals("ZZ")) {
934             return false;
935         } else if (codeName.equals("AA") || codeName.equals("ZZ")) {
936             return true;
937         } else if (codeName.compareTo("QM") >= 0 && codeName.compareTo("QZ") <= 0) {
938             return true;
939         } else if (codeName.compareTo("XA") >= 0 && codeName.compareTo("XZ") <= 0) {
940             return true;
941         }
942         return false;
943     }
944     /*
945      * <reset before="tertiary">ウ</reset> <x><context>ウ</context><t>ヽ</t></x>
946      * <x><context>ウ</context><i>ヽ</i></x>
947      *
948      * <x><context>う</context><i>ゝ</i></x> <x><context>ゥ</context><i>ヽ</i></x>
949      * <x><context>ゥ</context><i>ヽ</i></x> <x><context>ぅ</context><i>ゝ</i></x>
950      * <x><context>ヴ</context><i>ヽ</i></x>
951      *
952      * <x><context>ゔ</context><i>ゝ</i></x> <x><context>ウ</context><i>ヾ</i><extend>゙</extend></x>
953      * <x><context>ウ</context><i>ヾ</i><extend>゙</extend></x> <x><context>う</context><i>ゞ</i><extend>゙</extend></x>
954      *
955      * <x><context>ゥ</context><i>ヾ</i><extend>゙</extend></x> <x><context>ゥ</context><i>ヾ</i><extend>゙</extend></x>
956      * <x><context>ぅ</context><i>ゞ</i><extend>゙</extend></x> <x><context>ヴ</context><i>ヾ</i><extend>゙</extend></x>
957      *
958      * <x><context>ゔ</context><i>ゞ</i><extend>゙</extend></x>
959      */
960 }
961