1 package org.unicode.cldr.tool; 2 3 import com.google.common.collect.HashMultimap; 4 import com.google.common.collect.Multimap; 5 import com.ibm.icu.impl.Relation; 6 import com.ibm.icu.impl.Row.R2; 7 import java.io.IOException; 8 import java.io.PrintWriter; 9 import java.util.Arrays; 10 import java.util.HashMap; 11 import java.util.List; 12 import java.util.Map; 13 import java.util.Map.Entry; 14 import java.util.Set; 15 import java.util.TreeSet; 16 import org.unicode.cldr.draft.FileUtilities; 17 import org.unicode.cldr.tool.SubdivisionNode.SubDivisionExtractor; 18 import org.unicode.cldr.tool.SubdivisionNode.SubdivisionSet; 19 import org.unicode.cldr.util.CLDRConfig; 20 import org.unicode.cldr.util.CLDRPaths; 21 import org.unicode.cldr.util.StandardCodes.LstrType; 22 import org.unicode.cldr.util.SupplementalDataInfo; 23 import org.unicode.cldr.util.Validity; 24 import org.unicode.cldr.util.Validity.Status; 25 26 public class GenerateSubdivisions { 27 private static final String ISO_COUNTRY_CODES = 28 CLDRPaths.CLDR_PRIVATE_DIRECTORY + "iso_country_codes/"; 29 static final String ISO_SUBDIVISION_CODES = ISO_COUNTRY_CODES + "iso_country_codes.xml"; 30 31 // TODO: consider whether to use the last archive directory to generate 32 // There are pros and cons. 33 // Pros are that we don't introduce "fake" deprecated elements that are introduced and 34 // deprecated during the 6 month CLDR cycle 35 // Cons are that we may have to repeat work 36 37 static final class SubdivisionInfo { 38 static final SupplementalDataInfo SDI_LAST = 39 SupplementalDataInfo.getInstance( 40 CLDRPaths.LAST_RELEASE_DIRECTORY + "common/supplemental/"); 41 42 static final Map<String, R2<List<String>, String>> SUBDIVISION_ALIASES_FORMER = 43 SDI_LAST.getLocaleAliasInfo().get("subdivision"); 44 45 static final SubdivisionNames SUBDIVISION_NAMES_ENGLISH_FORMER = 46 new SubdivisionNames("en", "main", "subdivisions"); 47 48 static final Validity VALIDITY_FORMER = 49 Validity.getInstance(CLDRPaths.LAST_RELEASE_DIRECTORY + "common/validity/"); 50 51 static final Relation<String, String> formerRegionToSubdivisions = 52 Relation.of( 53 new HashMap<String, Set<String>>(), 54 TreeSet.class, 55 CLDRConfig.getInstance().getComparatorRoot()); 56 57 static { 58 Map<Status, Set<String>> oldSubdivisionData = 59 VALIDITY_FORMER.getStatusToCodes(LstrType.subdivision); 60 for (Entry<Status, Set<String>> e : oldSubdivisionData.entrySet()) { 61 final Status status = e.getKey(); 62 if (status != Status.unknown) { // special is a hack 63 for (String sdCode : e.getValue()) { 64 final String region = SubdivisionNames.getRegionFromSubdivision(sdCode); formerRegionToSubdivisions.put(region, sdCode)65 formerRegionToSubdivisions.put(region, sdCode); 66 } 67 } 68 } formerRegionToSubdivisions.freeze()69 formerRegionToSubdivisions.freeze(); 70 } 71 72 static final Multimap<String, String> subdivisionIdToOld = HashMultimap.create(); 73 74 static { 75 for (Entry<String, R2<List<String>, String>> entry : 76 SUBDIVISION_ALIASES_FORMER.entrySet()) { 77 String oldId = entry.getKey(); 78 for (String newId : entry.getValue().get0()) { subdivisionIdToOld.put(newId, oldId)79 subdivisionIdToOld.put(newId, oldId); 80 } 81 } 82 } 83 } 84 main(String[] args)85 public static void main(String[] args) throws IOException { 86 CLDRConfig.getInstance().getSupplementalDataInfo(); 87 // TODO Restructure so that this call is done first to process the iso data 88 // then the extraction uses that data. 89 // also restructure the SubdivisionInfo to not be static 90 boolean preprocess = args.length > 0; 91 if (preprocess) { 92 for (String source : 93 Arrays.asList( 94 "2015-05-04_iso_country_code_ALL_xml", 95 "2016-01-13_iso_country_code_ALL_xml", 96 "2016-12-09_iso_country_code_ALL_xml", 97 "2017-02-12_iso_country_code_ALL_xml", 98 "2017-09-15_iso_country_code_ALL_xml", 99 "2018-02-20_iso_country_code_ALL_xml", 100 "2018-09-02_iso_country_code_ALL_xml", 101 "2019-02-26_iso_country_code_ALL_xml", 102 "2020-03-05_iso_country_code_ALL_xml", 103 "2020-09-09_iso_country_code_ALL_xml", 104 "2021-09-14_iso_country_code_ALL_xml", 105 "2022-02-22_iso_country_code_ALL_xml", 106 "2022-03-18_iso_country_code_ALL_xml", 107 "2022-08-26_iso_country_code_ALL_xml")) { 108 SubdivisionSet sdset1 = 109 new SubdivisionSet( 110 CLDRPaths.CLDR_PRIVATE_DIRECTORY 111 + source 112 + "/iso_country_codes.xml"); 113 try (PrintWriter pw = 114 FileUtilities.openUTF8Writer( 115 CLDRPaths.GEN_DIRECTORY, "subdivision/" + source + ".txt")) { 116 sdset1.print(pw); 117 } 118 } 119 return; 120 } 121 122 SubdivisionSet sdset1 = new SubdivisionSet(GenerateSubdivisions.ISO_SUBDIVISION_CODES); 123 SubDivisionExtractor sdset = 124 new SubDivisionExtractor( 125 sdset1, 126 SubdivisionInfo.VALIDITY_FORMER, 127 SubdivisionInfo.SUBDIVISION_ALIASES_FORMER, 128 SubdivisionInfo.formerRegionToSubdivisions); 129 130 try (PrintWriter pw = 131 FileUtilities.openUTF8Writer( 132 CLDRPaths.GEN_DIRECTORY, "subdivision/subdivisions.xml")) { 133 sdset.printXml(pw); 134 } 135 try (PrintWriter pw = 136 FileUtilities.openUTF8Writer( 137 CLDRPaths.GEN_DIRECTORY, "subdivision/subdivisionAliases.txt")) { 138 sdset.printAliases(pw); 139 } 140 try (PrintWriter pw = 141 FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/en.xml")) { 142 sdset.printEnglish(pw); 143 } 144 try (PrintWriter pw = 145 FileUtilities.openUTF8Writer( 146 CLDRPaths.GEN_DIRECTORY, "subdivision/categories.txt")) { 147 sdset.printSamples(pw); 148 } 149 try (PrintWriter pw = 150 FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/en.txt")) { 151 sdset.printEnglishComp(pw); 152 } 153 try (PrintWriter pw = 154 FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "subdivision/en-full.txt")) { 155 sdset.printEnglishCompFull(pw); 156 } 157 try (PrintWriter pw = 158 FileUtilities.openUTF8Writer( 159 CLDRPaths.GEN_DIRECTORY, "subdivision/missing-mid.txt")) { 160 sdset.printMissingMIDs(pw); 161 } 162 } 163 } 164