1 package org.unicode.cldr.tool; 2 3 import com.google.common.collect.ImmutableMap; 4 import com.ibm.icu.impl.Relation; 5 import com.ibm.icu.impl.Utility; 6 import com.ibm.icu.text.Collator; 7 import com.ibm.icu.text.DateFormat; 8 import com.ibm.icu.text.DecimalFormat; 9 import com.ibm.icu.text.NumberFormat; 10 import com.ibm.icu.text.SimpleDateFormat; 11 import com.ibm.icu.text.Transliterator; 12 import com.ibm.icu.util.ULocale; 13 import java.io.BufferedReader; 14 import java.io.IOException; 15 import java.io.PrintWriter; 16 import java.text.ParseException; 17 import java.util.Arrays; 18 import java.util.Collection; 19 import java.util.Comparator; 20 import java.util.Date; 21 import java.util.HashMap; 22 import java.util.Iterator; 23 import java.util.List; 24 import java.util.Locale; 25 import java.util.Map; 26 import java.util.Set; 27 import java.util.TreeMap; 28 import java.util.TreeSet; 29 import org.unicode.cldr.util.CLDRFile; 30 import org.unicode.cldr.util.CLDRPaths; 31 import org.unicode.cldr.util.CldrUtility; 32 import org.unicode.cldr.util.Factory; 33 import org.unicode.cldr.util.Iso639Data; 34 import org.unicode.cldr.util.Iso639Data.Scope; 35 import org.unicode.cldr.util.Iso639Data.Type; 36 import org.unicode.cldr.util.Log; 37 import org.unicode.cldr.util.StandardCodes; 38 import org.unicode.cldr.util.StandardCodes.LstrType; 39 import org.unicode.cldr.util.SupplementalDataInfo; 40 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 41 import org.unicode.cldr.util.Validity; 42 import org.unicode.cldr.util.Validity.Status; 43 import org.unicode.cldr.util.XPathParts; 44 45 public class GenerateEnums { 46 private static final String CODE_INDENT = " "; 47 48 private static final String DATA_INDENT = " "; 49 50 private static final String LIST_INDENT = " "; 51 52 private StandardCodes sc = StandardCodes.make(); 53 54 private Factory factory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 55 56 // private Factory supplementalFactory = Factory.make( 57 // CLDRPaths.SUPPLEMENTAL_DIRECTORY, ".*"); 58 59 private Set<String> cldrCodes = new TreeSet<>(); 60 61 // private Map enum_canonical = new TreeMap(); 62 private Map<String, String> enum_alpha3 = new TreeMap<>(); 63 64 private Map<String, String> enum_UN = new TreeMap<>(); 65 66 // private Map enum_FIPS10 = new TreeMap(); 67 68 // private Map enum_TLD = new TreeMap(); 69 70 private CLDRFile english = factory.make("en", false); 71 72 private CLDRFile supplementalMetadata = factory.make("supplementalMetadata", false); 73 74 private CLDRFile supplementalData = factory.make("supplementalData", false); 75 76 private Relation<String, String> unlimitedCurrencyCodes; 77 78 private Set<String> scripts = new TreeSet<>(); 79 80 private Set<String> languages = new TreeSet<>(); 81 main(String[] args)82 public static void main(String[] args) throws IOException { 83 GenerateEnums gen = new GenerateEnums(); 84 gen.showLanguageInfo(); 85 gen.loadCLDRData(); 86 gen.showCounts(); 87 gen.showCurrencies(); 88 gen.showLanguages(); 89 gen.showScripts(); 90 gen.showRegionCodeInfo(); 91 System.out.println("DONE"); 92 } 93 showCounts()94 private void showCounts() { 95 System.out.format( 96 "Language Subtags: %s" + CldrUtility.LINE_SEPARATOR, 97 sc.getGoodAvailableCodes("language").size()); 98 System.out.format( 99 "Script Subtags: %s" + CldrUtility.LINE_SEPARATOR, 100 sc.getGoodAvailableCodes("script").size()); 101 System.out.format( 102 "Territory Subtags: %s" + CldrUtility.LINE_SEPARATOR, 103 sc.getGoodAvailableCodes("territory").size()); 104 } 105 showCurrencies()106 private void showCurrencies() throws IOException { 107 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/currency_enum.txt"); 108 Log.println(); 109 Log.println("Currency Data"); 110 Log.println(); 111 showGeneratedCommentStart(CODE_INDENT); 112 compareSets( 113 "currencies from sup.data", currencyCodes, "valid currencies", validCurrencyCodes); 114 Set<String> unused = new TreeSet<>(validCurrencyCodes); 115 unused.removeAll(currencyCodes); 116 showCurrencies(currencyCodes); 117 Log.println(); 118 showCurrencies(unused); 119 Map<String, String> sorted = new TreeMap<>(Collator.getInstance(ULocale.ENGLISH)); 120 for (String code : validCurrencyCodes) { 121 if (unused.contains(code) && !code.equals("CLF")) 122 continue; // we include CLF for compatibility 123 sorted.put(getName(code), code); 124 } 125 int lineLength = 126 " /** Belgian Franc */ BEF,".length(); 127 for (String name : sorted.keySet()) { 128 printRow(Log.getLog(), sorted.get(name), name, "currency", null, lineLength); 129 } 130 showGeneratedCommentEnd(CODE_INDENT); 131 Log.close(); 132 } 133 getName(String code)134 private String getName(String code) { 135 String result = english.getName(CLDRFile.CURRENCY_NAME, code); 136 if (result == null) { 137 result = code; 138 System.out.println("Failed to find: " + code); 139 } 140 return result; 141 } 142 showCurrencies(Set<String> both)143 private void showCurrencies(Set<String> both) { 144 // /** Afghani */ AFN, 145 for (Iterator<String> it = both.iterator(); it.hasNext(); ) { 146 String code = it.next(); 147 String englishName = getName(code); 148 if (englishName == null) {} 149 Set<String> regions = unlimitedCurrencyCodes.getAll(code); 150 System.out.println( 151 code 152 + "\t" 153 + englishName 154 + "\t" 155 + (validCurrencyCodes.contains(code) 156 ? currencyCodes.contains(code) ? "" : "valid-only" 157 : "supp-only") 158 + "\t" 159 + (regions != null ? regions : "unused")); 160 } 161 } 162 showScripts()163 private void showScripts() throws IOException { 164 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/script_enum.txt"); 165 Log.println(); 166 Log.println("Script Data"); 167 Log.println(); 168 169 showGeneratedCommentStart(CODE_INDENT); 170 Map<String, String> code_replacements = new TreeMap<>(); 171 int len = " /** Arabic */ Arab,".length(); 172 for (Iterator<String> it = scripts.iterator(); it.hasNext(); ) { 173 String code = it.next(); 174 String englishName = english.getName(CLDRFile.SCRIPT_NAME, code); 175 if (englishName == null) continue; 176 printRow(Log.getLog(), code, null, "script", code_replacements, len); 177 // Log.println(" /**" + englishName + "*/ " + code + ","); 178 } 179 showGeneratedCommentEnd(CODE_INDENT); 180 Log.close(); 181 } 182 showLanguageInfo()183 private void showLanguageInfo() throws IOException { 184 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/language_info.txt"); 185 System.out.println(); 186 System.out.println("Language Converter"); 187 System.out.println(); 188 StringBuilder buffer = new StringBuilder(); 189 // language information 190 for (String language : sc.getAvailableCodes("language")) { 191 Scope scope = Iso639Data.getScope(language); 192 if (scope == Scope.PrivateUse) { 193 continue; 194 } 195 buffer.setLength(0); 196 String alpha3 = Iso639Data.toAlpha3(language); 197 if (alpha3 != null) { 198 buffer.append(".add(\"" + alpha3 + "\")"); 199 } 200 Type type = Iso639Data.getType(language); 201 if (type != Type.Living) { 202 buffer.append(".add(Type." + type + ")"); 203 } 204 if (scope != Scope.Individual) { 205 buffer.append(".add(Scope." + scope + ")"); 206 } 207 if (buffer.length() > 0) { 208 Log.println("\t\tto(\"" + language + "\")" + buffer + ";"); 209 } 210 } 211 Log.close(); 212 } 213 showLanguages()214 private void showLanguages() throws IOException { 215 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/language_enum.txt"); 216 System.out.println(); 217 System.out.println("Language Data"); 218 System.out.println(); 219 220 for (Iterator<String> it = languages.iterator(); it.hasNext(); ) { 221 String code = it.next(); 222 String englishName = english.getName(CLDRFile.LANGUAGE_NAME, code); 223 if (englishName == null) continue; 224 System.out.println(" /**" + englishName + "*/ " + code + ","); 225 } 226 227 showGeneratedCommentStart(LIST_INDENT); 228 /* 229 * get the form: "anp frr frs gsw krl zxx aa ab ace ach ada ady ae af afa 230 * afh" + " ain ak akk ale alg alt am an ang apa ar arc arn arp art arw" + " 231 * as ast ath aus av awa ay az ba bad bai bal ban bas bat be" 232 */ 233 StringBuffer buffer = new StringBuffer(); 234 int lineLimit = 70 - LIST_INDENT.length(); 235 char lastChar = 0; 236 for (Iterator<String> it = languages.iterator(); it.hasNext(); ) { 237 String code = it.next(); 238 if (code.equals("root")) { 239 continue; 240 } 241 if (code.charAt(0) != lastChar || buffer.length() + 1 + code.length() > lineLimit) { 242 if (buffer.length() != 0) Log.println(LIST_INDENT + "+ \"" + buffer + "\""); 243 buffer.setLength(0); 244 lastChar = code.charAt(0); 245 } 246 buffer.append(code).append(' '); 247 } 248 // remove the very last space 249 if (buffer.charAt(buffer.length() - 1) == ' ') { 250 buffer.setLength(buffer.length() - 1); 251 } 252 Log.println(LIST_INDENT + "+ \"" + buffer + "\""); 253 254 showGeneratedCommentEnd(LIST_INDENT); 255 Log.close(); 256 } 257 258 @SuppressWarnings("rawtypes") join(Collection collection, String separator)259 private Object join(Collection collection, String separator) { 260 if (collection == null) return null; 261 StringBuffer result = new StringBuffer(); 262 boolean first = true; 263 for (Iterator it = collection.iterator(); it.hasNext(); ) { 264 if (first) first = false; 265 else result.append(separator); 266 result.append(it.next()); 267 } 268 return result.toString(); 269 } 270 271 static NumberFormat threeDigit = new DecimalFormat("000"); 272 loadCLDRData()273 public void loadCLDRData() throws IOException { 274 // BufferedReader codes = Utility.getUTF8Data("territory_codes.txt"); 275 // while (true) { 276 // String line = codes.readLine(); 277 // if (line == null) 278 // break; 279 // line = line.split("#")[0].trim(); 280 // if (line.length() == 0) 281 // continue; 282 // String[] sourceValues = line.split("\\s+"); 283 // String[] values = new String[5]; 284 // for (int i = 0; i < values.length; ++i) { 285 // if (i >= sourceValues.length || sourceValues[i].equals("-")) 286 // values[i] = null; 287 // else 288 // values[i] = sourceValues[i]; 289 // } 290 // String alpha2 = values[0]; 291 // cldrCodes.add(alpha2); 292 // if (isPrivateUseRegion(alpha2)) 293 // continue; 294 // String numeric = values[1]; 295 // String alpha3 = values[2]; 296 // String internet = values[3]; 297 // if (internet != null) 298 // internet = internet.toUpperCase(); 299 // String fips10 = values[4]; 300 // String enumValue = enumName(alpha2); 301 // enum_alpha3.put(enumValue, alpha3); 302 // enum_UN.put(enumValue, numeric); 303 // enum_FIPS10.put(enumValue, fips10); 304 // enum_TLD.put(enumValue, internet); 305 // } 306 // codes.close(); 307 DecimalFormat threeDigits = new DecimalFormat("000"); 308 for (String value : supplementalDataInfo.getNumericTerritoryMapping().keySet()) { 309 cldrCodes.add(value); 310 if (isPrivateUseRegion(value)) continue; 311 enum_UN.put( 312 value, 313 threeDigits.format( 314 supplementalDataInfo 315 .getNumericTerritoryMapping() 316 .getAll(value) 317 .iterator() 318 .next())); 319 } 320 for (String value : supplementalDataInfo.getAlpha3TerritoryMapping().keySet()) { 321 cldrCodes.add(value); 322 if (isPrivateUseRegion(value)) continue; 323 enum_alpha3.put( 324 value, 325 supplementalDataInfo 326 .getAlpha3TerritoryMapping() 327 .getAll(value) 328 .iterator() 329 .next()); 330 } 331 332 BufferedReader codes = CldrUtility.getUTF8Data("UnMacroRegions.txt"); 333 Map<String, String> macro_name = new TreeMap<>(); 334 while (true) { 335 String line = codes.readLine(); 336 if (line == null) break; 337 line = line.trim(); 338 if (line.length() == 0) continue; 339 if (line.charAt(0) < '0' || line.charAt(0) > '9') { 340 System.out.println("GenerateEnums: Skipping: " + line); 341 continue; 342 } 343 String[] sourceValues = line.split("\\s+"); 344 int code = Integer.parseInt(sourceValues[0]); 345 String codeName = threeDigit.format(code); 346 macro_name.put(codeName, line); 347 } 348 codes.close(); 349 // String values = 350 // supplementalDataInfo.getValidityInfo().get("$territory").get1().trim(); 351 Map<Status, Set<String>> validRegions = 352 Validity.getInstance().getStatusToCodes(LstrType.region); 353 Set<String> regions = new TreeSet<>(); 354 regions.addAll(validRegions.get(Status.regular)); 355 regions.addAll(validRegions.get(Status.macroregion)); 356 // String[] validTerritories = values.split("\\s+"); 357 // for (int i = 0; i < validTerritories.length; ++i) { 358 for (String region : regions) { 359 if (corrigendum.contains(region)) { 360 System.out.println("Skipping " + region + "\t\t" + getEnglishName(region)); 361 continue; // exception, corrigendum 362 } 363 if (isPrivateUseRegion(region)) continue; 364 if (region.charAt(0) < 'A') { // numeric 365 enum_UN.put(enumName(region), region); 366 cldrCodes.add(region); 367 } else { 368 if (enum_alpha3.get(region) == null) { 369 System.out.println("Missing alpha3 for: " + region); 370 } 371 } 372 } 373 checkDuplicates(enum_UN); 374 checkDuplicates(enum_alpha3); 375 Set<String> availableCodes = new TreeSet<>(sc.getAvailableCodes("territory")); 376 compareSets("RFC 4646", availableCodes, "CLDR", cldrCodes); 377 Set<String> missing = new TreeSet<>(availableCodes); 378 missing.removeAll(cldrCodes); 379 // don't care list: "003" 380 // missing.remove("003"); 381 // missing.remove("172"); 382 // Remove the following. They don't have numeric or alpha3 codes so they can't be found. 383 missing.remove("EA"); 384 missing.remove("EZ"); 385 missing.remove("IC"); 386 missing.remove("QU"); 387 missing.remove("UN"); 388 missing.remove("CQ"); 389 390 if (missing.size() != 0) { 391 throw new IllegalArgumentException("Codes in Registry but not in CLDR: " + missing); 392 } 393 394 Set<String> UNValues = new TreeSet<>(enum_UN.values()); 395 396 for (Iterator<String> it = macro_name.keySet().iterator(); it.hasNext(); ) { 397 Object key = it.next(); 398 Object value = macro_name.get(key); 399 if (!UNValues.contains(key)) { 400 System.out.println("Macro " + key + "\t" + value); 401 } 402 } 403 404 for (Iterator<String> it = enum_UN.keySet().iterator(); it.hasNext(); ) { 405 String region = it.next(); 406 String englishName = getEnglishName(region); 407 if (englishName == null) { 408 englishName = "NULL"; // for debugging\ 409 } 410 String rfcName = getRFC3066Name(region); 411 if (!englishName.equals(rfcName)) { 412 System.out.println( 413 "Different names: {\"" 414 + region 415 + "\",\t\"" 416 + englishName 417 + " (" 418 + rfcName 419 + ")\"},"); 420 } 421 } 422 423 getContainment(); 424 425 DateFormat[] simpleFormats = { 426 new SimpleDateFormat("yyyy-MM-dd"), 427 new SimpleDateFormat("yyyy-MM"), 428 new SimpleDateFormat("yyyy"), 429 }; 430 Date today = new Date(); 431 Date longAgo = new Date(1000 - 1900, 1, 1); 432 currencyCodes = new TreeSet<>(); 433 unlimitedCurrencyCodes = 434 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class, null); 435 for (Iterator<String> it = 436 supplementalData.iterator("//supplementalData/currencyData/region"); 437 it.hasNext(); ) { 438 String path = it.next(); 439 XPathParts parts = XPathParts.getFrozenInstance(path); 440 String region = parts.findAttributeValue("region", "iso3166"); 441 String code = parts.findAttributeValue("currency", "iso4217"); 442 String to = parts.findAttributeValue("currency", "to"); 443 main: 444 if (to == null) { 445 unlimitedCurrencyCodes.put(code, region); 446 } else { 447 for (int i = 0; i < simpleFormats.length; ++i) { 448 try { 449 Date foo = simpleFormats[i].parse(to); 450 if (foo.compareTo(longAgo) < 0) { 451 System.out.println("Date Error: can't parse " + to); 452 break main; 453 } else if (foo.compareTo(today) >= 0) { 454 unlimitedCurrencyCodes.put(code, region); 455 } 456 break main; 457 } catch (ParseException e) { 458 } 459 } 460 System.out.println("Date Error: can't parse " + to); 461 } 462 currencyCodes.add(code); 463 } 464 465 validCurrencyCodes = new TreeSet<>(); 466 Set<String> bcp47CurrencyCodes = supplementalDataInfo.getBcp47Keys().getAll("cu"); 467 for (String code : bcp47CurrencyCodes) { 468 validCurrencyCodes.add(code.toUpperCase()); 469 } 470 471 scripts = supplementalDataInfo.getCLDRScriptCodes(); 472 languages = supplementalDataInfo.getCLDRLanguageCodes(); 473 474 // Set availableCodes = new TreeSet(sc.getAvailableCodes("territory")); 475 // availableCodes.add("003"); 476 // for (Iterator it = availableCodes.iterator(); it.hasNext();) { 477 // String code = (String) next()) 478 // canonicalRegion_UN.put(alpha2, numeric); 479 // } 480 481 // for (Iterator it = availableCodes.iterator(); it.hasNext();) { 482 // String code = (String)it.next(); 483 // RegionCode region = map_id_canonical_RFC.get(code); 484 // if (region != null) continue; // skip others 485 // region = new RegionCode(code); 486 // map_id_canonical_RFC.put(code,region); 487 // map_canonical_id_RFC.put(region,code); 488 // if ("A".compareTo(code) > 0) { 489 // map_id_canonical_UN.put(code,region); 490 // map_canonical_id_UN.put(region,code); 491 // } else { 492 // map_id_canonical_A2.put(code,region); 493 // map_canonical_id_A2.put(region,code); 494 // } 495 // } 496 // for (Iterator it = goodAvailableCodes.iterator(); it.hasNext();) { 497 // String code = (String)it.next(); 498 // good.add(getInstance(code)); 499 // } 500 } 501 getContainment()502 public void getContainment() { 503 // <group type="001" contains="002 009 019 142 150"/> <!--World --> 504 for (Iterator<String> it = 505 supplementalData.iterator("//supplementalData/territoryContainment/group"); 506 it.hasNext(); ) { 507 String path = it.next(); 508 String fullPath = supplementalData.getFullXPath(path); 509 XPathParts parts = XPathParts.getFrozenInstance(fullPath); 510 String container = parts.getAttributeValue(parts.size() - 1, "type"); 511 final String containedString = parts.getAttributeValue(-1, "contains"); 512 List<String> contained = Arrays.asList(containedString.trim().split("\\s+")); 513 containment.put(container, contained); 514 } 515 // fix recursiveContainment. 516 // for (String region : (Collection<String>)containment.keySet()) { 517 // Set temp = new LinkedHashSet(); 518 // addContains(region, temp); 519 // recursiveContainment.put(region, temp); 520 // } 521 Set<String> startingFromWorld = new TreeSet<>(); 522 addContains("001", startingFromWorld); 523 compareSets("World", startingFromWorld, "CLDR", cldrCodes); 524 // generateContains(); 525 } 526 generateContains()527 private void generateContains() { 528 529 for (String region : containment.keySet()) { 530 List<String> plain = containment.get(region); 531 // Collection recursive = (Collection)recursiveContainment.get(region); 532 533 String setAsString = CldrUtility.join(plain, " "); 534 // String setAsString2 = recursive.equals(plain) ? "" : ", " + 535 // Utility.join(recursive," "); 536 Log.println("\t\tadd(\"" + region + "\", \"" + setAsString + "\");"); 537 } 538 } 539 540 Map<String, List<String>> containment = new TreeMap<>(); 541 542 // Map recursiveContainment = new TreeMap(); 543 addContains(String string, Set<String> startingFromWorld)544 private void addContains(String string, Set<String> startingFromWorld) { 545 startingFromWorld.add(string); 546 List<String> contained = containment.get(string); 547 if (contained == null) return; 548 for (Iterator<String> it = contained.iterator(); it.hasNext(); ) { 549 addContains(it.next(), startingFromWorld); 550 } 551 } 552 553 @SuppressWarnings("rawtypes") compareSets(String name, Set availableCodes, String name2, Set cldrCodes)554 private void compareSets(String name, Set availableCodes, String name2, Set cldrCodes) { 555 Set temp = new TreeSet(); 556 temp.addAll(availableCodes); 557 temp.removeAll(cldrCodes); 558 System.out.println("In " + name + " but not in " + name2 + ": " + temp); 559 temp.clear(); 560 temp.addAll(cldrCodes); 561 temp.removeAll(availableCodes); 562 System.out.println("Not in " + name + " but in " + name2 + ": " + temp); 563 } 564 565 @SuppressWarnings("rawtypes") checkDuplicates(Map m)566 private void checkDuplicates(Map m) { 567 Map backMap = new HashMap(); 568 for (Iterator it = m.keySet().iterator(); it.hasNext(); ) { 569 Object key = it.next(); 570 Object o = m.get(key); 571 Object otherKey = backMap.get(o); 572 if (otherKey != null) 573 System.out.println("Collision with: " + key + ",\t" + otherKey + ",\t" + o); 574 else backMap.put(o, key); 575 } 576 } 577 578 Set<String> corrigendum = 579 new TreeSet<>(Arrays.asList(new String[] {"QE", "833", "830", "172"})); // 003, 419 580 581 private ImmutableMap<String, String> extraNames = 582 ImmutableMap.<String, String>builder() 583 .put("BU", "Burma") 584 .put("TP", "East Timor") 585 .put("YU", "Yugoslavia") 586 .put("ZR", "Zaire") 587 .put("CD", "Congo (Kinshasa, Democratic Republic)") 588 .put("CI", "Ivory Coast (Cote d'Ivoire)") 589 .put("FM", "Micronesia (Federated States)") 590 .put("TL", "East Timor (Timor-Leste)") 591 // .put("155", "Western Europe") 592 .build(); 593 594 private Set<String> currencyCodes; 595 596 private Set<String> validCurrencyCodes; 597 598 static SupplementalDataInfo supplementalDataInfo = 599 SupplementalDataInfo.getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY); 600 601 /** 602 * Get the RegionCode Enum 603 * 604 * @throws IOException 605 */ showRegionCodeInfo()606 private void showRegionCodeInfo() throws IOException { 607 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_enum.txt"); 608 System.out.println(); 609 System.out.println("Data for RegionCode"); 610 System.out.println(); 611 showGeneratedCommentStart(CODE_INDENT); 612 613 Set<String> reordered = new TreeSet<>(new LengthFirstComparator()); 614 reordered.addAll(enum_UN.keySet()); 615 Map<String, String> code_replacements = new TreeMap<>(); 616 int len = " /** Polynesia */ UN061,".length(); 617 for (Iterator<String> it = reordered.iterator(); it.hasNext(); ) { 618 String region = it.next(); 619 printRow(Log.getLog(), region, null, "territory", code_replacements, len); 620 } 621 showGeneratedCommentEnd(CODE_INDENT); 622 Log.close(); 623 624 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_info.txt"); 625 Log.println(); 626 Log.println("Data for ISO Region Codes"); 627 Log.println(); 628 for (String territory : supplementalDataInfo.getTerritoriesWithPopulationData()) { 629 if (territory.equals("ZZ")) { 630 continue; 631 } 632 PopulationData popData = supplementalDataInfo.getPopulationDataForTerritory(territory); 633 // to("ak").add(Scope.Macrolanguage).add("aka"); 634 Log.formatln( 635 " addRegion(RegionCode.%s, %s, %s, %s) // %s", 636 territory, 637 format(popData.getPopulation()), 638 format(popData.getLiteratePopulation() / popData.getPopulation()), 639 format(popData.getGdp()), 640 english.getName("territory", territory)); 641 // remove all the ISO 639-3 until they are part of BCP 47 642 // we need to remove in earlier pass so we have the count 643 Set<String> languages = new TreeSet<>(); 644 for (String language : 645 supplementalDataInfo.getLanguagesForTerritoryWithPopulationData(territory)) { 646 if (Iso639Data.getSource(language) == Iso639Data.Source.ISO_639_3) { 647 continue; 648 } 649 popData = 650 supplementalDataInfo.getLanguageAndTerritoryPopulationData( 651 language, territory); 652 if (popData.getPopulation() == 0 653 || Double.isNaN( 654 popData.getLiteratePopulation() / popData.getPopulation())) { 655 continue; 656 } 657 languages.add(language); 658 } 659 int count = languages.size(); 660 for (String language : languages) { 661 --count; // we need to know the last one 662 popData = 663 supplementalDataInfo.getLanguageAndTerritoryPopulationData( 664 language, territory); 665 Log.formatln( 666 " .addLanguage(\"%s\", %s, %s)%s // %s", 667 language, 668 format(popData.getPopulation()), 669 format(popData.getLiteratePopulation() / popData.getPopulation()), 670 (count == 0 ? ";" : ""), 671 english.getName(language)); 672 } 673 } 674 Log.close(); 675 676 Log.setLog(CLDRPaths.GEN_DIRECTORY + "/enum/region_converters.txt"); 677 Log.println(); 678 Log.println("Data for ISO Region Codes"); 679 Log.println(); 680 showGeneratedCommentStart(DATA_INDENT); 681 // addInfo(RegionCode.US, 840, "USA", "US", "US/XX", ....); ... are 682 // containees 683 reordered = new TreeSet<>(new DeprecatedAndLengthFirstComparator("territory")); 684 reordered.addAll(enum_UN.keySet()); 685 for (Iterator<String> it = reordered.iterator(); it.hasNext(); ) { 686 String region = it.next(); 687 // String cldrName = region.length() < 5 ? region : region.substring(2); // fix 688 // UN 689 // name 690 // int un = Integer.parseInt((String) enum_UN.get(region)); // get around 691 // dumb octal 692 // syntax 693 String isoCode = enum_alpha3.get(region); 694 if (isoCode == null) continue; 695 Log.println( 696 DATA_INDENT + "add(" + quote(isoCode) + ", " + "RegionCode." + region + ");"); 697 } 698 doAliases(code_replacements); 699 showGeneratedCommentEnd(DATA_INDENT); 700 Log.println(); 701 Log.println("Data for M.49 Region Codes"); 702 Log.println(); 703 showGeneratedCommentStart(DATA_INDENT); 704 705 for (Iterator<String> it = reordered.iterator(); it.hasNext(); ) { 706 String region = it.next(); 707 // String cldrName = region.length() < 5 ? region : region.substring(2); // fix 708 // UN 709 // name 710 int un = Integer.parseInt(enum_UN.get(region), 10); // get 711 // around 712 // dumb 713 // octal 714 // syntax 715 Log.println(DATA_INDENT + "add(" + un + ", " + "RegionCode." + region + ");"); 716 } 717 doAliases(code_replacements); 718 719 System.out.println("Plain list"); 720 for (Iterator<String> it = reordered.iterator(); it.hasNext(); ) { 721 String region = it.next(); 722 // String cldrName = region.length() < 5 ? region : region.substring(2); // fix 723 // UN 724 // name 725 String newCode = code_replacements.get(region); 726 if (newCode != null) continue; 727 728 int un = Integer.parseInt(enum_UN.get(region), 10); // get 729 // around 730 // dumb 731 // octal 732 // syntax 733 System.out.println(un + "\t" + region + "\t" + english.getName("territory", region)); 734 } 735 736 showGeneratedCommentEnd(DATA_INDENT); 737 738 getContainment(); 739 Log.close(); 740 } 741 742 static NumberFormat nf = NumberFormat.getInstance(Locale.ENGLISH); 743 744 static NumberFormat sf = NumberFormat.getScientificInstance(Locale.ENGLISH); 745 746 static { 747 nf.setMaximumFractionDigits(3); 748 sf.setMaximumFractionDigits(3); 749 nf.setGroupingUsed(false); 750 } 751 format(double value)752 private String format(double value) { 753 double newValue = CldrUtility.roundToDecimals(value, 3); 754 String option1 = nf.format(newValue); 755 String option2 = sf.format(value); 756 return option1.length() <= option2.length() ? option1 : option2; 757 } 758 doAliases(Map<String, String> code_replacements)759 private void doAliases(Map<String, String> code_replacements) { 760 for (String code : code_replacements.keySet()) { 761 String newCode = code_replacements.get(code); 762 if (newCode.length() == 0) newCode = "ZZ"; 763 Log.println( 764 DATA_INDENT + "addAlias(" + "RegionCode." + code + ", \"" + newCode + "\");"); 765 } 766 } 767 showGeneratedCommentEnd(String indent)768 private void showGeneratedCommentEnd(String indent) { 769 Log.println(indent + "/* End of generated code. */"); 770 } 771 showGeneratedCommentStart(String indent)772 private void showGeneratedCommentStart(String indent) { 773 Log.println(indent + "/*"); 774 Log.println(indent + " * The following information is generated from a tool,"); 775 Log.println(indent + " * as described on"); 776 Log.println(indent + " * http://wiki/Main/InternationalIdentifierUpdates."); 777 Log.println(indent + " * Do not edit manually."); 778 Log.println(indent + " * Start of generated code."); 779 Log.println(indent + " */"); 780 } 781 782 public static final class LengthFirstComparator implements Comparator<Object> { 783 @Override compare(Object a, Object b)784 public int compare(Object a, Object b) { 785 String as = a.toString(); 786 String bs = b.toString(); 787 if (as.length() < bs.length()) return -1; 788 if (as.length() > bs.length()) return 1; 789 return as.compareTo(bs); 790 } 791 } 792 793 public final class DeprecatedAndLengthFirstComparator implements Comparator<Object> { 794 String type; 795 DeprecatedAndLengthFirstComparator(String type)796 DeprecatedAndLengthFirstComparator(String type) { 797 this.type = type; 798 } 799 800 @Override compare(Object a, Object b)801 public int compare(Object a, Object b) { 802 String as = a.toString(); 803 String bs = b.toString(); 804 String ar = getDeprecatedReplacement(type, as); 805 String br = getDeprecatedReplacement(type, bs); 806 // put the deprecated ones first, eg those that aren't null 807 if (ar != null) { 808 if (br == null) return -1; 809 } 810 if (br != null) { 811 if (ar == null) return 1; 812 } 813 // now check the length 814 if (as.length() < bs.length()) return -1; 815 if (as.length() > bs.length()) return 1; 816 return as.compareTo(bs); 817 } 818 } 819 820 /** 821 * Returns null if not deprecated, otherwise "" if there is no replacement, otherwise the 822 * replacement. 823 * 824 * @return 825 */ getDeprecatedReplacement(String type, String cldrTypeValue)826 public String getDeprecatedReplacement(String type, String cldrTypeValue) { 827 if (type.equals("currency")) { 828 return null; 829 } 830 String path = 831 supplementalMetadata.getFullXPath( 832 "//supplementalData/metadata/alias/" 833 + type 834 + "Alias[@type=\"" 835 + cldrTypeValue 836 + "\"]", 837 true); 838 if (path == null) { 839 return null; 840 } 841 XPathParts parts = XPathParts.getFrozenInstance(path); 842 String replacement = parts.findAttributeValue("territoryAlias", "replacement"); 843 if (replacement == null) { 844 return ""; 845 } 846 return replacement; 847 } 848 849 static Transliterator doFallbacks = 850 Transliterator.createFromRules("id", "[’ʻ] > ''; ", Transliterator.FORWARD); 851 printRow( PrintWriter out, String codeName, String englishName, String type, Map<String, String> code_replacements, int lineLength)852 private void printRow( 853 PrintWriter out, 854 String codeName, 855 String englishName, 856 String type, 857 Map<String, String> code_replacements, 858 int lineLength) { 859 // int numeric = Integer.parseInt((String) enum_UN.get(codeName)); 860 // String alpha3 = (String) enum_alpha3.get(codeName); 861 String cldrName = codeName.length() < 5 ? codeName : codeName.substring(2); // fix 862 // UN 863 // name 864 String replacement = getDeprecatedReplacement(type, cldrName); 865 866 String resolvedEnglishName = 867 englishName != null 868 ? englishName 869 : type.equals("territory") 870 ? getEnglishName(codeName) 871 : type.equals("currency") 872 ? getName(codeName) 873 : english.getName(CLDRFile.SCRIPT_NAME, codeName); 874 resolvedEnglishName = doFallbacks.transliterate(resolvedEnglishName); 875 876 String prefix = CODE_INDENT + "/** " + resolvedEnglishName; // + " - " + 877 // threeDigit.format(numeric); 878 String printedCodeName = codeName; 879 if (replacement != null) { 880 code_replacements.put(codeName, replacement); 881 out.println(prefix); 882 prefix = 883 CODE_INDENT 884 + " * @deprecated" 885 + (replacement.length() == 0 ? "" : " see " + replacement); 886 printedCodeName = "@Deprecated " + printedCodeName; 887 } 888 prefix += " */"; 889 890 if (codeName.equals("UN001")) { 891 out.println(); 892 } 893 if (prefix.length() > lineLength - (printedCodeName.length() + 1)) { 894 // break at last space 895 int lastFit = prefix.lastIndexOf(' ', lineLength - (printedCodeName.length() + 1) - 2); 896 out.println(prefix.substring(0, lastFit)); 897 prefix = CODE_INDENT + " *" + prefix.substring(lastFit); 898 } 899 out.print(prefix); 900 out.print( 901 Utility.repeat( 902 " ", (lineLength - (prefix.length() + printedCodeName.length() + 1)))); 903 out.println(printedCodeName + ","); 904 } 905 getEnglishName(String codeName)906 private String getEnglishName(String codeName) { 907 if (codeName.length() > 3) codeName = codeName.substring(2); // fix UN name 908 String name = extraNames.get(codeName); 909 if (name != null) return name; 910 name = english.getName(CLDRFile.TERRITORY_NAME, codeName); 911 if (name != null) return name; 912 return codeName; 913 } 914 getRFC3066Name(String codeName)915 private String getRFC3066Name(String codeName) { 916 if (codeName.length() > 2) codeName = codeName.substring(2); // fix UN name 917 List<String> list = sc.getFullData("territory", codeName); 918 if (list == null) return null; 919 return list.get(0); 920 } 921 enumName(String codeName)922 private String enumName(String codeName) { 923 return codeName.charAt(0) < 'A' ? "UN" + codeName : codeName; 924 } 925 quote(Object input)926 static String quote(Object input) { 927 if (input != null) return '"' + input.toString().trim() + '"'; 928 return null; 929 } 930 isPrivateUseRegion(String codeName)931 static boolean isPrivateUseRegion(String codeName) { 932 // AA, QM..QZ, XA..XZ, ZZ - CLDR codes 933 if (codeName.equals("EU") || codeName.equals("QO") || codeName.equals("ZZ")) { 934 return false; 935 } else if (codeName.equals("AA") || codeName.equals("ZZ")) { 936 return true; 937 } else if (codeName.compareTo("QM") >= 0 && codeName.compareTo("QZ") <= 0) { 938 return true; 939 } else if (codeName.compareTo("XA") >= 0 && codeName.compareTo("XZ") <= 0) { 940 return true; 941 } 942 return false; 943 } 944 /* 945 * <reset before="tertiary">ウ</reset> <x><context>ウ</context><t>ヽ</t></x> 946 * <x><context>ウ</context><i>ヽ</i></x> 947 * 948 * <x><context>う</context><i>ゝ</i></x> <x><context>ゥ</context><i>ヽ</i></x> 949 * <x><context>ゥ</context><i>ヽ</i></x> <x><context>ぅ</context><i>ゝ</i></x> 950 * <x><context>ヴ</context><i>ヽ</i></x> 951 * 952 * <x><context>ゔ</context><i>ゝ</i></x> <x><context>ウ</context><i>ヾ</i><extend>゙</extend></x> 953 * <x><context>ウ</context><i>ヾ</i><extend>゙</extend></x> <x><context>う</context><i>ゞ</i><extend>゙</extend></x> 954 * 955 * <x><context>ゥ</context><i>ヾ</i><extend>゙</extend></x> <x><context>ゥ</context><i>ヾ</i><extend>゙</extend></x> 956 * <x><context>ぅ</context><i>ゞ</i><extend>゙</extend></x> <x><context>ヴ</context><i>ヾ</i><extend>゙</extend></x> 957 * 958 * <x><context>ゔ</context><i>ゞ</i><extend>゙</extend></x> 959 */ 960 } 961