xref: /aosp_15_r20/external/cldr/tools/cldr-rdf/src/main/java/org/unicode/cldr/tool/GenerateSubdivisions.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.collect.HashMultimap;
4 import com.google.common.collect.Multimap;
5 import com.ibm.icu.impl.Relation;
6 import com.ibm.icu.impl.Row.R2;
7 import java.io.IOException;
8 import java.io.PrintWriter;
9 import java.util.Arrays;
10 import java.util.HashMap;
11 import java.util.List;
12 import java.util.Map;
13 import java.util.Map.Entry;
14 import java.util.Set;
15 import java.util.TreeSet;
16 import org.unicode.cldr.draft.FileUtilities;
17 import org.unicode.cldr.tool.SubdivisionNode.SubDivisionExtractor;
18 import org.unicode.cldr.tool.SubdivisionNode.SubdivisionSet;
19 import org.unicode.cldr.util.CLDRConfig;
20 import org.unicode.cldr.util.CLDRPaths;
21 import org.unicode.cldr.util.StandardCodes.LstrType;
22 import org.unicode.cldr.util.SupplementalDataInfo;
23 import org.unicode.cldr.util.Validity;
24 import org.unicode.cldr.util.Validity.Status;
25 
26 public class GenerateSubdivisions {
27     private static final String ISO_COUNTRY_CODES =
28             CLDRPaths.CLDR_PRIVATE_DIRECTORY + "iso_country_codes/";
29     static final String ISO_SUBDIVISION_CODES = ISO_COUNTRY_CODES + "iso_country_codes.xml";
30 
31     // TODO: consider whether to use the last archive directory to generate
32     // There are pros and cons.
33     // Pros are that we don't introduce "fake" deprecated elements that are introduced and
34     // deprecated during the 6 month CLDR cycle
35     // Cons are that we may have to repeat work
36 
37     static final class SubdivisionInfo {
38         static final SupplementalDataInfo SDI_LAST =
39                 SupplementalDataInfo.getInstance(
40                         CLDRPaths.LAST_RELEASE_DIRECTORY + "common/supplemental/");
41 
42         static final Map<String, R2<List<String>, String>> SUBDIVISION_ALIASES_FORMER =
43                 SDI_LAST.getLocaleAliasInfo().get("subdivision");
44 
45         static final SubdivisionNames SUBDIVISION_NAMES_ENGLISH_FORMER =
46                 new SubdivisionNames("en", "main", "subdivisions");
47 
48         static final Validity VALIDITY_FORMER =
49                 Validity.getInstance(CLDRPaths.LAST_RELEASE_DIRECTORY + "common/validity/");
50 
51         static final Relation<String, String> formerRegionToSubdivisions =
52                 Relation.of(
53                         new HashMap<String, Set<String>>(),
54                         TreeSet.class,
55                         CLDRConfig.getInstance().getComparatorRoot());
56 
57         static {
58             Map<Status, Set<String>> oldSubdivisionData =
59                     VALIDITY_FORMER.getStatusToCodes(LstrType.subdivision);
60             for (Entry<Status, Set<String>> e : oldSubdivisionData.entrySet()) {
61                 final Status status = e.getKey();
62                 if (status != Status.unknown) { // special is a hack
63                     for (String sdCode : e.getValue()) {
64                         final String region = SubdivisionNames.getRegionFromSubdivision(sdCode);
formerRegionToSubdivisions.put(region, sdCode)65                         formerRegionToSubdivisions.put(region, sdCode);
66                     }
67                 }
68             }
formerRegionToSubdivisions.freeze()69             formerRegionToSubdivisions.freeze();
70         }
71 
72         static final Multimap<String, String> subdivisionIdToOld = HashMultimap.create();
73 
74         static {
75             for (Entry<String, R2<List<String>, String>> entry :
76                     SUBDIVISION_ALIASES_FORMER.entrySet()) {
77                 String oldId = entry.getKey();
78                 for (String newId : entry.getValue().get0()) {
subdivisionIdToOld.put(newId, oldId)79                     subdivisionIdToOld.put(newId, oldId);
80                 }
81             }
82         }
83     }
84 
main(String[] args)85     public static void main(String[] args) throws IOException {
86         CLDRConfig.getInstance().getSupplementalDataInfo();
87         // TODO Restructure so that this call is done first to process the iso data
88         // then the extraction uses that data.
89         // also restructure the SubdivisionInfo to not be static
90         boolean preprocess = args.length > 0;
91         if (preprocess) {
92             for (String source :
93                     Arrays.asList(
94                             "2015-05-04_iso_country_code_ALL_xml",
95                             "2016-01-13_iso_country_code_ALL_xml",
96                             "2016-12-09_iso_country_code_ALL_xml",
97                             "2017-02-12_iso_country_code_ALL_xml",
98                             "2017-09-15_iso_country_code_ALL_xml",
99                             "2018-02-20_iso_country_code_ALL_xml",
100                             "2018-09-02_iso_country_code_ALL_xml",
101                             "2019-02-26_iso_country_code_ALL_xml",
102                             "2020-03-05_iso_country_code_ALL_xml",
103                             "2020-09-09_iso_country_code_ALL_xml",
104                             "2021-09-14_iso_country_code_ALL_xml",
105                             "2022-02-22_iso_country_code_ALL_xml",
106                             "2022-03-18_iso_country_code_ALL_xml",
107                             "2022-08-26_iso_country_code_ALL_xml")) {
108                 SubdivisionSet sdset1 =
109                         new SubdivisionSet(
110                                 CLDRPaths.CLDR_PRIVATE_DIRECTORY
111                                         + source
112                                         + "/iso_country_codes.xml");
113                 try (PrintWriter pw =
114                         FileUtilities.openUTF8Writer(
115                                 CLDRPaths.GEN_DIRECTORY, "subdivision/" + source + ".txt")) {
116                     sdset1.print(pw);
117                 }
118             }
119             return;
120         }
121 
122         SubdivisionSet sdset1 = new SubdivisionSet(GenerateSubdivisions.ISO_SUBDIVISION_CODES);
123         SubDivisionExtractor sdset =
124                 new SubDivisionExtractor(
125                         sdset1,
126                         SubdivisionInfo.VALIDITY_FORMER,
127                         SubdivisionInfo.SUBDIVISION_ALIASES_FORMER,
128                         SubdivisionInfo.formerRegionToSubdivisions);
129 
130         try (PrintWriter pw =
131                 FileUtilities.openUTF8Writer(
132                         CLDRPaths.GEN_DIRECTORY, "subdivision/subdivisions.xml")) {
133             sdset.printXml(pw);
134         }
135         try (PrintWriter pw =
136                 FileUtilities.openUTF8Writer(
137                         CLDRPaths.GEN_DIRECTORY, "subdivision/subdivisionAliases.txt")) {
138             sdset.printAliases(pw);
139         }
140         try (PrintWriter pw =
141                 FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/en.xml")) {
142             sdset.printEnglish(pw);
143         }
144         try (PrintWriter pw =
145                 FileUtilities.openUTF8Writer(
146                         CLDRPaths.GEN_DIRECTORY, "subdivision/categories.txt")) {
147             sdset.printSamples(pw);
148         }
149         try (PrintWriter pw =
150                 FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/en.txt")) {
151             sdset.printEnglishComp(pw);
152         }
153         try (PrintWriter pw =
154                 FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/en-full.txt")) {
155             sdset.printEnglishCompFull(pw);
156         }
157         try (PrintWriter pw =
158                 FileUtilities.openUTF8Writer(
159                         CLDRPaths.GEN_DIRECTORY, "subdivision/missing-mid.txt")) {
160             sdset.printMissingMIDs(pw);
161         }
162     }
163 }
164