1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.util; 10 11 import com.ibm.icu.impl.Relation; 12 import com.ibm.icu.lang.UCharacter; 13 import com.ibm.icu.text.UnicodeSet; 14 import com.ibm.icu.util.Output; 15 import java.io.BufferedReader; 16 import java.util.ArrayList; 17 import java.util.Arrays; 18 import java.util.Collections; 19 import java.util.Comparator; 20 import java.util.EnumMap; 21 import java.util.EnumSet; 22 import java.util.HashMap; 23 import java.util.HashSet; 24 import java.util.Iterator; 25 import java.util.LinkedHashMap; 26 import java.util.LinkedHashSet; 27 import java.util.List; 28 import java.util.Locale; 29 import java.util.Map; 30 import java.util.Map.Entry; 31 import java.util.Set; 32 import java.util.TreeMap; 33 import java.util.TreeSet; 34 import java.util.regex.Pattern; 35 import org.unicode.cldr.draft.ScriptMetadata; 36 import org.unicode.cldr.draft.ScriptMetadata.IdUsage; 37 import org.unicode.cldr.util.Iso639Data.Type; 38 import org.unicode.cldr.util.ZoneParser.ZoneLine; 39 40 /** Provides access to various codes used by CLDR: RFC 3066, ISO 4217, Olson tzids */ 41 public class StandardCodes { 42 43 /** 44 * Convenient for testing whether a locale is at least at Basic level 45 * 46 * @param locale 47 * @return 48 */ isLocaleAtLeastBasic(String locale)49 public static boolean isLocaleAtLeastBasic(String locale) { 50 return CalculatedCoverageLevels.getInstance().isLocaleAtLeastBasic(locale); 51 } 52 53 public enum CodeType { 54 language, 55 script, 56 territory, 57 extlang, 58 legacy, 59 redundant, 60 variant, 61 currency, 62 tzid; 63 from(String name)64 public static CodeType from(String name) { 65 if ("region".equals(name)) { 66 return territory; 67 } 68 return CodeType.valueOf(name); 69 } 70 } 71 72 private static final Set<CodeType> TypeSet = 73 Collections.unmodifiableSet(EnumSet.allOf(CodeType.class)); 74 75 private static final Set<String> TypeStringSet; 76 77 static { 78 LinkedHashSet<String> foo = new LinkedHashSet<>(); 79 for (CodeType x : CodeType.values()) { x.toString()80 foo.add(x.toString()); 81 } 82 TypeStringSet = Collections.unmodifiableSet(foo); 83 } 84 85 public static final String DESCRIPTION_SEPARATOR = "\u25AA"; 86 87 public static final String NO_COUNTRY = "001"; 88 89 private EnumMap<CodeType, Map<String, List<String>>> type_code_data = 90 new EnumMap<>(CodeType.class); 91 92 private EnumMap<CodeType, Map<String, List<String>>> type_name_codes = 93 new EnumMap<>(CodeType.class); 94 95 private EnumMap<CodeType, Map<String, String>> type_code_preferred = 96 new EnumMap<>(CodeType.class); 97 98 private Map<String, Set<String>> country_modernCurrency = new TreeMap<>(); 99 100 private Map<CodeType, Set<String>> goodCodes = new TreeMap<>(); 101 102 private static final boolean DEBUG = false; 103 104 private static final class StandardCodesHelper { 105 static final StandardCodes SINGLETON = new StandardCodes(); 106 } 107 /** Get the singleton copy of the standard codes. */ make()108 public static synchronized StandardCodes make() { 109 return StandardCodesHelper.SINGLETON; 110 } 111 112 /** 113 * The data is the name in the case of RFC3066 codes, and the country code in the case of TZIDs 114 * and ISO currency codes. If the country code is missing, uses ZZ. 115 */ getData(String type, String code)116 public String getData(String type, String code) { 117 Map<String, List<String>> code_data = getCodeData(type); 118 if (code_data == null) return null; 119 List<String> list = code_data.get(code); 120 if (list == null) return null; 121 return list.get(0); 122 } 123 124 /** 125 * @return the full data for the type and code For the data in lstreg, it is description | date 126 * | canonical_value | recommended_prefix # comments 127 */ getFullData(String type, String code)128 public List<String> getFullData(String type, String code) { 129 Map<String, List<String>> code_data = getCodeData(type); 130 if (code_data == null) return null; 131 return code_data.get(code); 132 } 133 134 /** 135 * @return the full data for the type and code For the data in lstreg, it is description | date 136 * | canonical_value | recommended_prefix # comments 137 */ getFullData(CodeType type, String code)138 public List<String> getFullData(CodeType type, String code) { 139 Map<String, List<String>> code_data = type_code_data.get(type); 140 if (code_data == null) return null; 141 return code_data.get(code); 142 } 143 getCodeData(String type)144 private Map<String, List<String>> getCodeData(String type) { 145 return getCodeData(CodeType.from(type)); 146 } 147 getCodeData(CodeType type)148 private Map<String, List<String>> getCodeData(CodeType type) { 149 return type_code_data.get(type); 150 } 151 getCodes(CodeType type)152 public Set<String> getCodes(CodeType type) { 153 return type_code_data.get(type).keySet(); 154 } 155 156 /** 157 * Get at the language registry values, as a Map from label to value. 158 * 159 * @param type 160 * @param code 161 * @return 162 */ getLangData(String type, String code)163 public Map<String, String> getLangData(String type, String code) { 164 try { 165 if (type.equals("territory")) type = "region"; 166 else if (type.equals("variant")) code = code.toLowerCase(Locale.ENGLISH); 167 return (Map) ((Map) getLStreg().get(type)).get(code); 168 } catch (RuntimeException e) { 169 return null; 170 } 171 } 172 173 /** Return a replacement code, if available. If not, return null. */ getReplacement(String type, String code)174 public String getReplacement(String type, String code) { 175 if (type.equals("currency")) return null; // no replacement codes for currencies 176 List<String> data = getFullData(type, code); 177 if (data == null) return null; 178 // if available, the replacement is a non-empty value other than --, in 179 // position 2. 180 if (data.size() < 3) return null; 181 String replacement = data.get(2); 182 if (!replacement.equals("") && !replacement.equals("--")) return replacement; 183 return null; 184 } 185 186 /** 187 * Return the list of codes that have the same data. For example, returns all currency codes for 188 * a country. If there is a preferred one, it is first. 189 * 190 * @param type 191 * @param data 192 * @return 193 */ 194 @Deprecated getCodes(String type, String data)195 public List<String> getCodes(String type, String data) { 196 return getCodes(CodeType.from(type), data); 197 } 198 199 /** 200 * Return the list of codes that have the same data. For example, returns all currency codes for 201 * a country. If there is a preferred one, it is first. 202 */ getCodes(CodeType type, String data)203 public List<String> getCodes(CodeType type, String data) { 204 Map<String, List<String>> data_codes = type_name_codes.get(type); 205 if (data_codes == null) return null; 206 return Collections.unmodifiableList(data_codes.get(data)); 207 } 208 209 /** Where there is a preferred code, return it. */ 210 @Deprecated getPreferred(String type, String code)211 public String getPreferred(String type, String code) { 212 return getPreferred(CodeType.from(type), code); 213 } 214 215 /** Where there is a preferred code, return it. */ getPreferred(CodeType type, String code)216 public String getPreferred(CodeType type, String code) { 217 Map<String, String> code_preferred = type_code_preferred.get(type); 218 if (code_preferred == null) return code; 219 String newCode = code_preferred.get(code); 220 if (newCode == null) return code; 221 return newCode; 222 } 223 224 /** Get all the available types */ getAvailableTypes()225 public Set<String> getAvailableTypes() { 226 return TypeStringSet; 227 } 228 229 /** Get all the available types */ getAvailableTypesEnum()230 public Set<CodeType> getAvailableTypesEnum() { 231 return TypeSet; 232 } 233 234 /** 235 * Get all the available codes for a given type 236 * 237 * @param type 238 * @return 239 */ getAvailableCodes(String type)240 public Set<String> getAvailableCodes(String type) { 241 return getAvailableCodes(CodeType.from(type)); 242 } 243 244 /** 245 * Get all the available codes for a given type 246 * 247 * @param type 248 * @return 249 */ getAvailableCodes(CodeType type)250 public Set<String> getAvailableCodes(CodeType type) { 251 Map<String, List<String>> code_name = type_code_data.get(type); 252 return Collections.unmodifiableSet(code_name.keySet()); 253 } 254 getGoodAvailableCodes(String stringType)255 public Set<String> getGoodAvailableCodes(String stringType) { 256 return getGoodAvailableCodes(CodeType.from(stringType)); 257 } 258 259 /** 260 * Get all the available "real" codes for a given type, excluding private use, but including 261 * some deprecated codes. Use SupplementalDataInfo getLocaleAliases to exclude others. 262 * 263 * @param type 264 * @return 265 */ getGoodAvailableCodes(CodeType type)266 public Set<String> getGoodAvailableCodes(CodeType type) { 267 Set<String> result = goodCodes.get(type); 268 if (result == null) { 269 synchronized (goodCodes) { 270 Map<String, List<String>> code_name = getCodeData(type); 271 SupplementalDataInfo sd = SupplementalDataInfo.getInstance(); 272 if (code_name == null) return null; 273 result = new TreeSet<>(code_name.keySet()); 274 switch (type) { 275 case currency: 276 break; // nothing special 277 case language: 278 return sd.getCLDRLanguageCodes(); 279 case script: 280 return sd.getCLDRScriptCodes(); 281 case tzid: 282 return sd.getCLDRTimezoneCodes(); 283 default: 284 for (Iterator<String> it = result.iterator(); it.hasNext(); ) { 285 String code = it.next(); 286 if (code.equals(LocaleNames.ROOT) || code.equals("QO")) continue; 287 List<String> data = getFullData(type, code); 288 if (data.size() < 3) { 289 if (DEBUG) System.out.println(code + "\t" + data); 290 } 291 if ("PRIVATE USE".equalsIgnoreCase(data.get(0)) 292 || (!data.get(2).equals("") && !data.get(2).equals("--"))) { 293 // System.out.println("Removing: " + code); 294 it.remove(); 295 } 296 } 297 } 298 result = Collections.unmodifiableSet(result); 299 goodCodes.put(type, result); 300 } 301 } 302 return result; 303 } 304 305 private static Set<String> GOOD_COUNTRIES; 306 getGoodCountries()307 public Set<String> getGoodCountries() { 308 synchronized (goodCodes) { 309 if (GOOD_COUNTRIES == null) { 310 Set<String> temp = new LinkedHashSet<>(); 311 for (String s : getGoodAvailableCodes(CodeType.territory)) { 312 if (isCountry(s)) { 313 temp.add(s); 314 } 315 } 316 GOOD_COUNTRIES = Collections.unmodifiableSet(temp); 317 } 318 } 319 return GOOD_COUNTRIES; 320 } 321 322 /** Gets the modern currency. */ getMainCurrencies(String countryCode)323 public Set<String> getMainCurrencies(String countryCode) { 324 return country_modernCurrency.get(countryCode); 325 } 326 327 // /** 328 // * Get rid of this 329 // * 330 // * @param type 331 // * @return 332 // * @throws IOException 333 // * @deprecated 334 // */ 335 // public String getEffectiveLocaleType(String type) throws IOException { 336 // if ((type != null) && 337 // (getLocaleCoverageOrganizations().contains(Organization.valueOf(type)))) { 338 // return type; 339 // } else { 340 // return null; // the default.. for now.. 341 // } 342 // } 343 344 static Comparator caseless = 345 new Comparator() { 346 347 @Override 348 public int compare(Object arg0, Object arg1) { 349 String s1 = (String) arg0; 350 String s2 = (String) arg1; 351 return s1.compareToIgnoreCase(s2); 352 } 353 }; 354 355 /** Used for Locales.txt to mean "all" */ 356 public static final String ALL_LOCALES = "*"; 357 358 /** 359 * Returns locales according to status. It returns a Map of Maps, key 1 is either IBM or Java 360 * (perhaps more later), key 2 is the Level. 361 * 362 * @deprecated 363 */ 364 @Deprecated getLocaleTypes()365 public Map<Organization, Map<String, Level>> getLocaleTypes() { 366 synchronized (StandardCodes.class) { 367 return loadPlatformLocaleStatus().platform_locale_level; 368 } 369 } 370 371 /** 372 * Return map of locales to levels 373 * 374 * @param org 375 * @return 376 */ getLocaleToLevel(Organization org)377 public Map<String, Level> getLocaleToLevel(Organization org) { 378 return getLocaleTypes().get(org); 379 } 380 381 /** returns the highest level in the hierarchy, not including root. */ getHighestLocaleCoverageLevel(String organization, String locale)382 public Level getHighestLocaleCoverageLevel(String organization, String locale) { 383 // first get parent 384 final String parentId = LocaleIDParser.getParent(locale); 385 Level parentLevel = Level.UNDETERMINED; 386 if (parentId != null && !parentId.equals("root")) { 387 parentLevel = getHighestLocaleCoverageLevel(organization, parentId); // recurse 388 } 389 final Level ourLevel = getLocaleCoverageLevel(organization, locale); 390 if (parentLevel.getLevel() > ourLevel.getLevel()) { 391 // if parentLevel is higher 392 return parentLevel; 393 } else { 394 return ourLevel; 395 } 396 } 397 getLocaleCoverageLevel(String organization, String desiredLocale)398 public Level getLocaleCoverageLevel(String organization, String desiredLocale) { 399 return getLocaleCoverageLevel(Organization.fromString(organization), desiredLocale); 400 } 401 getLocaleCoverageLevel(Organization organization, String desiredLocale)402 public Level getLocaleCoverageLevel(Organization organization, String desiredLocale) { 403 return getLocaleCoverageLevel( 404 organization, desiredLocale, new Output<LocaleCoverageType>()); 405 } 406 407 public enum LocaleCoverageType { 408 explicit, 409 parent, 410 star, 411 undetermined 412 } 413 414 /** 415 * Returns coverage level of locale according to organization. Returns Level.UNDETERMINED if 416 * information is missing. A locale of "*" in the data means "everything else". 417 */ getLocaleCoverageLevel( Organization organization, String desiredLocale, Output<LocaleCoverageType> coverageType)418 public Level getLocaleCoverageLevel( 419 Organization organization, 420 String desiredLocale, 421 Output<LocaleCoverageType> coverageType) { 422 coverageType.value = LocaleCoverageType.undetermined; 423 if (organization == null) { 424 return Level.UNDETERMINED; 425 } 426 Map<String, Level> locale_status = 427 loadPlatformLocaleStatus().platform_locale_level.get(organization); 428 if (locale_status == null) { 429 return Level.UNDETERMINED; 430 } 431 // see if there is a parent 432 String originalLocale = desiredLocale; 433 while (desiredLocale != null) { 434 Level status = locale_status.get(desiredLocale); 435 if (status != null && status != Level.UNDETERMINED) { 436 coverageType.value = 437 originalLocale == desiredLocale 438 ? LocaleCoverageType.explicit 439 : LocaleCoverageType.parent; 440 return status; 441 } 442 desiredLocale = LocaleIDParser.getParent(desiredLocale); 443 } 444 Level status = locale_status.get(ALL_LOCALES); 445 if (status != null && status != Level.UNDETERMINED) { 446 coverageType.value = LocaleCoverageType.star; 447 return status; 448 } 449 return Level.UNDETERMINED; 450 } 451 452 /** 453 * Returns coverage level of locale according to organization. Returns Level.UNDETERMINED if 454 * information is missing. 455 */ getDefaultLocaleCoverageLevel(Organization organization)456 public Level getDefaultLocaleCoverageLevel(Organization organization) { 457 return getLocaleCoverageLevel(organization, ALL_LOCALES); 458 } 459 getLocaleCoverageOrganizations()460 public Set<Organization> getLocaleCoverageOrganizations() { 461 return loadPlatformLocaleStatus().platform_locale_level.keySet(); 462 } 463 getLocaleCoverageOrganizationStrings()464 public Set<String> getLocaleCoverageOrganizationStrings() { 465 return loadPlatformLocaleStatus().platform_locale_levelString.keySet(); 466 } 467 getLocaleCoverageLocales(String organization)468 public Set<String> getLocaleCoverageLocales(String organization) { 469 return getLocaleCoverageLocales(Organization.fromString(organization)); 470 } 471 getLocaleCoverageLocales(Organization organization)472 public Set<String> getLocaleCoverageLocales(Organization organization) { 473 return loadPlatformLocaleStatus().platform_locale_level.get(organization).keySet(); 474 } 475 getLocalesToLevelsFor(Organization organization)476 public Map<String, Level> getLocalesToLevelsFor(Organization organization) { 477 return loadPlatformLocaleStatus().platform_locale_level.get(organization); 478 } 479 getLevelsToLocalesFor(Organization organization)480 public Relation<Level, String> getLevelsToLocalesFor(Organization organization) { 481 return loadPlatformLocaleStatus().platform_level_locale.get(organization); 482 } 483 getLocaleCoverageLocales(Organization organization, Set<Level> choice)484 public Set<String> getLocaleCoverageLocales(Organization organization, Set<Level> choice) { 485 Set<String> result = new LinkedHashSet<>(); 486 for (String locale : getLocaleCoverageLocales(organization)) { 487 if (choice.contains(getLocaleCoverageLevel(organization, locale))) { 488 result.add(locale); 489 } 490 } 491 return result; 492 } 493 494 /** 495 * "The target coverage level is set to: - The CLDR Org coverage level if it exists, - Otherise, 496 * the maximum of all the coverage levels for that locale across all Organizations (max Modern) 497 * in Locales.txt, if there is at least one. - Otherwise Basic. - That makes the number the same 498 * for all Organizations, which makes communicating the values less prone to misinterpretation, 499 * and gives all the vetters and managers a common metric for that locale. 500 */ getTargetCoverageLevel(String localeId)501 public Level getTargetCoverageLevel(String localeId) { 502 Level level; 503 504 // First, try CLDR locale 505 level = getLocaleCoverageLevel(Organization.cldr, localeId); 506 if (level != Level.UNDETERMINED) { 507 return level; 508 } 509 510 // Next, Find maximum coverage level 511 for (final Organization o : Organization.values()) { 512 if (o == Organization.cldr 513 || // Already handled, above 514 o == Organization.unaffiliated 515 || o == Organization.surveytool) { 516 continue; // Skip some 'special' orgs 517 } 518 final Output<StandardCodes.LocaleCoverageType> outputType = new Output<>(); 519 final Level orgLevel = getLocaleCoverageLevel(o, localeId, outputType); 520 if (outputType.value == StandardCodes.LocaleCoverageType.undetermined 521 || outputType.value == StandardCodes.LocaleCoverageType.star) { 522 // Skip undetermined or star 523 continue; 524 } 525 // Pin the level to MODERN 526 final Level pinnedOrgLevel = Level.min(Level.MODERN, orgLevel); 527 // Accumulate the maxiumum org level (up to MODERN) 528 level = Level.max(level, pinnedOrgLevel); 529 } 530 if (level != Level.UNDETERMINED) { 531 return level; 532 } 533 534 // Otherwise, BASIC 535 level = Level.BASIC; 536 return level; 537 } 538 539 private static final class LocalesTxtHelper { 540 static LocalesTxtHelper SINGLETON = new LocalesTxtHelper(); 541 542 public LocalesTxtReader reader; 543 LocalesTxtHelper()544 LocalesTxtHelper() { 545 reader = new LocalesTxtReader().read(StandardCodes.make()); // circular dependency 546 } 547 } 548 549 /** 550 * Get the 'platform locale status' (aka Locales.txt) Note, do not call this from the 551 * StandardCodes constructor! 552 * 553 * @return 554 */ loadPlatformLocaleStatus()555 private LocalesTxtReader loadPlatformLocaleStatus() { 556 return LocalesTxtHelper.SINGLETON.reader; 557 } 558 validate(LocaleIDParser parser)559 String validate(LocaleIDParser parser) { 560 String message = ""; 561 String lang = parser.getLanguage(); 562 if (lang.length() == 0) { 563 message += ", Missing language"; 564 } else if (!getAvailableCodes("language").contains(lang)) { 565 message += ", Invalid language code: " + lang; 566 } 567 String script = parser.getScript(); 568 if (script.length() != 0 && !getAvailableCodes("script").contains(script)) { 569 message += ", Invalid script code: " + script; 570 } 571 String territory = parser.getRegion(); 572 if (territory.length() != 0 && !getAvailableCodes("territory").contains(territory)) { 573 message += ", Invalid territory code: " + lang; 574 } 575 return message.length() == 0 ? message : message.substring(2); 576 } 577 578 /** 579 * Ascertain that the given locale in in the given group specified by the organization 580 * 581 * @param locale 582 * @param group 583 * @param org 584 * @return boolean 585 */ isLocaleInGroup(String locale, String group, Organization org)586 public boolean isLocaleInGroup(String locale, String group, Organization org) { 587 return group.equals(getGroup(locale, org)); 588 } 589 isLocaleInGroup(String locale, String group, String org)590 public boolean isLocaleInGroup(String locale, String group, String org) { 591 return isLocaleInGroup(locale, group, Organization.fromString(org)); 592 } 593 getGroup(String locale, String org)594 public String getGroup(String locale, String org) { 595 return getGroup(locale, Organization.fromString(org)); 596 } 597 598 /** 599 * Gets the coverage group given a locale and org 600 * 601 * @param locale 602 * @param org 603 * @return group if availble, null if not 604 */ getGroup(String locale, Organization org)605 private String getGroup(String locale, Organization org) { 606 Level l = getLocaleCoverageLevel(org, locale); 607 if (l.equals(Level.UNDETERMINED)) { 608 return null; 609 } else { 610 return l.toString(); 611 } 612 } 613 614 // ========== PRIVATES ========== 615 StandardCodes()616 private StandardCodes() { 617 String[] files = {"ISO4217.txt"}; // , "TZID.txt" 618 type_code_preferred.put(CodeType.tzid, new TreeMap<String, String>()); 619 add(CodeType.language, "root", "Root"); 620 String originalLine = null; 621 for (int fileIndex = 0; fileIndex < files.length; ++fileIndex) { 622 try { 623 BufferedReader lstreg = CldrUtility.getUTF8Data(files[fileIndex]); 624 while (true) { 625 String line = originalLine = lstreg.readLine(); 626 if (line == null) break; 627 if (line.startsWith("\uFEFF")) { 628 line = line.substring(1); 629 } 630 line = line.trim(); 631 int commentPos = line.indexOf('#'); 632 String comment = ""; 633 if (commentPos >= 0) { 634 comment = line.substring(commentPos + 1).trim(); 635 line = line.substring(0, commentPos); 636 } 637 if (line.length() == 0) continue; 638 List<String> pieces = 639 CldrUtility.splitList(line, '|', true, new ArrayList<String>()); 640 CodeType type = CodeType.from(pieces.get(0)); 641 pieces.remove(0); 642 643 String code = pieces.get(0); 644 pieces.remove(0); 645 if (type.equals("date")) { 646 continue; 647 } 648 649 String oldName = pieces.get(0); 650 int pos = oldName.indexOf(';'); 651 if (pos >= 0) { 652 oldName = oldName.substring(0, pos).trim(); 653 pieces.set(0, oldName); 654 } 655 656 List<String> data = pieces; 657 if (comment.indexOf("deprecated") >= 0) { 658 // System.out.println(originalLine); 659 if (data.get(2).toString().length() == 0) { 660 data.set(2, "--"); 661 } 662 } 663 if (oldName.equalsIgnoreCase("PRIVATE USE")) { 664 int separatorPos = code.indexOf(".."); 665 if (separatorPos < 0) { 666 add(type, code, data); 667 } else { 668 String current = code.substring(0, separatorPos); 669 String end = code.substring(separatorPos + 2); 670 // System.out.println(">>" + code + "\t" + current + "\t" + end); 671 for (; current.compareTo(end) <= 0; current = nextAlpha(current)) { 672 // System.out.println(">" + current); 673 add(type, current, data); 674 } 675 } 676 continue; 677 } 678 if (!type.equals("tzid")) { 679 add(type, code, data); 680 if (type.equals("currency")) { 681 // currency | TPE | Timor Escudo | TP | EAST TIMOR | O 682 if (data.get(3).equals("C")) { 683 String country = data.get(1); 684 Set<String> codes = country_modernCurrency.get(country); 685 if (codes == null) { 686 country_modernCurrency.put(country, codes = new TreeSet<>()); 687 } 688 codes.add(code); 689 } 690 } 691 continue; 692 } 693 // type = tzid 694 // List codes = (List) Utility.splitList(code, ',', true, new 695 // ArrayList()); 696 String preferred = null; 697 for (int i = 0; i < pieces.size(); ++i) { 698 code = pieces.get(i); 699 add(type, code, data); 700 if (preferred == null) preferred = code; 701 else { 702 Map<String, String> code_preferred = type_code_preferred.get(type); 703 code_preferred.put(code, preferred); 704 } 705 } 706 } 707 lstreg.close(); 708 } catch (Exception e) { 709 System.err.println( 710 "WARNING: " 711 + files[fileIndex] 712 + " may be a corrupted UTF-8 file. Please check."); 713 throw (IllegalArgumentException) 714 new IllegalArgumentException( 715 "Can't read " + files[fileIndex] + "\t" + originalLine) 716 .initCause(e); 717 } 718 country_modernCurrency = CldrUtility.protectCollection(country_modernCurrency); 719 } 720 721 // data is: description | date | canonical_value | recommended_prefix # 722 // comments 723 // HACK, just rework 724 725 Map<String, Map<String, Map<String, String>>> languageRegistry = getLStreg(); 726 // languageRegistry = CldrUtility.protectCollection(languageRegistry); 727 728 for (String type : languageRegistry.keySet()) { 729 CodeType type2 = CodeType.from(type); 730 Map<String, Map<String, String>> m = languageRegistry.get(type); 731 for (String code : m.keySet()) { 732 Map<String, String> mm = m.get(code); 733 List<String> data = new ArrayList<>(0); 734 data.add(mm.get("Description")); 735 data.add(mm.get("Added")); 736 String pref = mm.get("Preferred-Value"); 737 if (pref == null) { 738 pref = mm.get("Deprecated"); 739 if (pref == null) pref = ""; 740 else pref = "deprecated"; 741 } 742 data.add(pref); 743 if (type.equals("variant")) { 744 code = code.toUpperCase(); 745 } 746 // data.add(mm.get("Recommended_Prefix")); 747 // {"region", "BQ", "Description", "British Antarctic Territory", 748 // "Preferred-Value", "AQ", "CLDR", "True", "Deprecated", "True"}, 749 add(type2, code, data); 750 } 751 } 752 753 Map<String, List<String>> m = getZoneData(); 754 for (Iterator<String> it = m.keySet().iterator(); it.hasNext(); ) { 755 String code = it.next(); 756 add(CodeType.tzid, code, m.get(code).toString()); 757 } 758 } 759 760 /** 761 * @param current 762 * @return 763 */ nextAlpha(String current)764 private static String nextAlpha(String current) { 765 // Don't care that this is inefficient 766 int value = 0; 767 for (int i = 0; i < current.length(); ++i) { 768 char c = current.charAt(i); 769 c -= c < 'a' ? 'A' : 'a'; 770 value = value * 26 + c; 771 } 772 value += 1; 773 String result = ""; 774 for (int i = 0; i < current.length(); ++i) { 775 result = (char) ((value % 26) + 'A') + result; 776 value = value / 26; 777 } 778 if (UCharacter.toLowerCase(current).equals(current)) { 779 result = UCharacter.toLowerCase(result); 780 } else if (UCharacter.toUpperCase(current).equals(current)) { 781 // do nothing 782 } else { 783 result = UCharacter.toTitleCase(result, null); 784 } 785 return result; 786 } 787 788 /** 789 * @param type 790 * @param string2 791 * @param string3 792 */ 793 private void add(CodeType type, String string2, String string3) { 794 List<String> l = new ArrayList<>(); 795 l.add(string3); 796 add(type, string2, l); 797 } 798 799 private void add(CodeType type, String code, List<String> otherData) { 800 // hack 801 if (type == CodeType.script) { 802 if (code.equals("Qaai")) { 803 otherData = new ArrayList<>(otherData); 804 otherData.set(0, "Inherited"); 805 } else if (code.equals("Zyyy")) { 806 otherData = new ArrayList<>(otherData); 807 otherData.set(0, "Common"); 808 } 809 } 810 811 // assume name is the first item 812 813 String name = otherData.get(0); 814 815 // add to main list 816 Map<String, List<String>> code_data = getCodeData(type); 817 if (code_data == null) { 818 code_data = new TreeMap<>(); 819 type_code_data.put(type, code_data); 820 } 821 List<String> lastData = code_data.get(code); 822 if (lastData != null) { 823 lastData.addAll(otherData); 824 } else { 825 code_data.put(code, otherData); 826 } 827 828 // now add mapping from name to codes 829 Map<String, List<String>> name_codes = type_name_codes.get(type); 830 if (name_codes == null) { 831 name_codes = new TreeMap<>(); 832 type_name_codes.put(type, name_codes); 833 } 834 List<String> codes = name_codes.get(name); 835 if (codes == null) { 836 codes = new ArrayList<>(); 837 name_codes.put(name, codes); 838 } 839 codes.add(code); 840 } 841 842 private Map<String, List<String>> WorldBankInfo; 843 844 public Map<String, List<String>> getWorldBankInfo() { 845 if (WorldBankInfo == null) { 846 List<String> temp = fillFromCommaFile("WorldBankInfo.txt", false); 847 WorldBankInfo = new HashMap<>(); 848 for (String line : temp) { 849 List<String> row = CldrUtility.splitList(line, ';', true); 850 String key = row.get(0); 851 row.remove(0); 852 WorldBankInfo.put(key, row); 853 } 854 WorldBankInfo = CldrUtility.protectCollection(WorldBankInfo); 855 } 856 return WorldBankInfo; 857 } 858 859 Set<String> moribundLanguages; 860 861 public Set<String> getMoribundLanguages() { 862 if (moribundLanguages == null) { 863 List<String> temp = fillFromCommaFile("moribund_languages.txt", true); 864 moribundLanguages = new TreeSet<>(); 865 moribundLanguages.addAll(temp); 866 moribundLanguages = CldrUtility.protectCollection(moribundLanguages); 867 } 868 return moribundLanguages; 869 } 870 871 // produces a list of the 'clean' lines 872 private List<String> fillFromCommaFile(String filename, boolean trim) { 873 try { 874 List<String> result = new ArrayList<>(); 875 String line; 876 BufferedReader lstreg = CldrUtility.getUTF8Data(filename); 877 while (true) { 878 line = lstreg.readLine(); 879 if (line == null) break; 880 int commentPos = line.indexOf('#'); 881 if (commentPos >= 0) { 882 line = line.substring(0, commentPos); 883 } 884 if (trim) { 885 line = line.trim(); 886 } 887 if (line.length() == 0) continue; 888 result.add(line); 889 } 890 return result; 891 } catch (Exception e) { 892 throw (RuntimeException) 893 new IllegalArgumentException("Can't process file: data/" + filename) 894 .initCause(e); 895 } 896 } 897 898 // return a complex map. language -> arn -> {"Comments" -> "x", 899 // "Description->y,...} 900 static String[][] extras = { 901 {"language", "root", "Description", "Root", "CLDR", "True"}, 902 // { "language", "cch", "Description", "Atsam", "CLDR", "True" }, 903 // { "language", "kaj", "Description", "Jju", "CLDR", "True" }, 904 // { "language", "kcg", "Description", "Tyap", "CLDR", "True" }, 905 // { "language", "kfo", "Description", "Koro", "CLDR", "True" }, 906 // { "language", "mfe", "Description", "Morisyen", "CLDR", "True" }, 907 // { "region", "172", "Description", "Commonwealth of Independent States", "CLDR", "True" }, 908 // { "region", "062", "Description", "South-Central Asia", "CLDR", "True" }, 909 // { "region", "003", "Description", "North America", "CLDR", "True" }, 910 // { "variant", "POLYTONI", "Description", "Polytonic Greek", "CLDR", "True", 911 // "Preferred-Value", "POLYTON" }, 912 {"variant", "REVISED", "Description", "Revised Orthography", "CLDR", "True"}, 913 {"variant", "SAAHO", "Description", "Dialect", "CLDR", "True"}, 914 {"variant", "POSIX", "Description", "Computer-Style", "CLDR", "True"}, 915 // {"region", "172", "Description", "Commonwealth of Independent States", 916 // "CLDR", "True"}, 917 // { "region", "", "Description", "European Union", "CLDR", "True" }, 918 {"region", "ZZ", "Description", "Unknown or Invalid Region", "CLDR", "True"}, 919 {"region", "QO", "Description", "Outlying Oceania", "CLDR", "True"}, 920 {"region", "XK", "Description", "Kosovo", "CLDR", "True"}, 921 {"script", "Qaai", "Description", "Inherited", "CLDR", "True"}, 922 // {"region", "003", "Description", "North America", "CLDR", "True"}, 923 // {"region", "062", "Description", "South-central Asia", "CLDR", "True"}, 924 // {"region", "200", "Description", "Czechoslovakia", "CLDR", "True"}, 925 // {"region", "830", "Description", "Channel Islands", "CLDR", "True"}, 926 // {"region", "833", "Description", "Isle of Man", "CLDR", "True"}, 927 928 // {"region", "NT", "Description", "Neutral Zone (formerly between Saudi 929 // Arabia & Iraq)", "CLDR", "True", "Deprecated", "True"}, 930 // {"region", "SU", "Description", "Union of Soviet Socialist Republics", 931 // "CLDR", "True", "Deprecated", "True"}, 932 // {"region", "BQ", "Description", "British Antarctic Territory", 933 // "Preferred-Value", "AQ", "CLDR", "True", "Deprecated", "True"}, 934 // {"region", "CT", "Description", "Canton and Enderbury Islands", 935 // "Preferred-Value", "KI", "CLDR", "True", "Deprecated", "True"}, 936 // {"region", "FQ", "Description", "French Southern and Antarctic Territories 937 // (now split between AQ and TF)", "CLDR", "True", "Deprecated", "True"}, 938 // {"region", "JT", "Description", "Johnston Island", "Preferred-Value", "UM", 939 // "CLDR", "True", "Deprecated", "True"}, 940 // {"region", "MI", "Description", "Midway Islands", "Preferred-Value", "UM", 941 // "CLDR", "True", "Deprecated", "True"}, 942 // {"region", "NQ", "Description", "Dronning Maud Land", "Preferred-Value", 943 // "AQ", "CLDR", "True", "Deprecated", "True"}, 944 // {"region", "PC", "Description", "Pacific Islands Trust Territory (divided 945 // into FM, MH, MP, and PW)", "Preferred-Value", "AQ", "CLDR", "True", 946 // "Deprecated", "True"}, 947 // {"region", "PU", "Description", "U.S. Miscellaneous Pacific Islands", 948 // "Preferred-Value", "UM", "CLDR", "True", "Deprecated", "True"}, 949 // {"region", "PZ", "Description", "Panama Canal Zone", "Preferred-Value", 950 // "PA", "CLDR", "True", "Deprecated", "True"}, 951 // {"region", "VD", "Description", "North Vietnam", "Preferred-Value", "VN", 952 // "CLDR", "True", "Deprecated", "True"}, 953 // {"region", "WK", "Description", "Wake Island", "Preferred-Value", "UM", 954 // "CLDR", "True", "Deprecated", "True"}, 955 }; 956 957 static final String registryName = 958 CldrUtility.getProperty("registry", "language-subtag-registry"); 959 960 public enum LstrType { 961 language( 962 LocaleNames.UND, 963 LocaleNames.ZXX, 964 LocaleNames.MUL, 965 LocaleNames.MIS, 966 LocaleNames.ROOT), 967 script("Zzzz", "Zsym", "Zxxx", "Zmth"), 968 region("ZZ"), 969 variant(), 970 extension(), 971 extlang(true, false), 972 legacy(true, false), 973 redundant(true, false), 974 /** specialized codes for validity; TODO: rename LstrType * */ 975 currency(false, true, "XXX"), 976 subdivision(false, true), 977 unit(false, true), 978 usage(false, true), 979 zone(false, true); 980 981 public final Set<String> specials; 982 public final String unknown; 983 public final boolean isLstr; 984 public final boolean isUnicode; 985 986 private LstrType(String... unknownValue) { 987 this(true, true, unknownValue); 988 } 989 990 private LstrType(boolean lstr, boolean unicode, String... unknownValue) { 991 unknown = unknownValue.length == 0 ? null : unknownValue[0]; 992 LinkedHashSet<String> set = new LinkedHashSet<>(Arrays.asList(unknownValue)); 993 if (unknown != null) { 994 set.remove(unknown); 995 } 996 specials = Collections.unmodifiableSet(set); 997 isLstr = lstr; 998 isUnicode = unicode; 999 } 1000 1001 // 1002 static final Pattern WELLFORMED = Pattern.compile("([0-9]{3}|[a-zA-Z]{2})[a-zA-Z0-9]{1,4}"); 1003 1004 boolean isWellFormed(String candidate) { 1005 switch (this) { 1006 case subdivision: 1007 return WELLFORMED.matcher(candidate).matches(); 1008 default: 1009 throw new UnsupportedOperationException(); 1010 } 1011 } 1012 1013 /** Generate compatibility string, returning 'territory' instead of 'region', etc. */ 1014 public String toCompatString() { 1015 switch (this) { 1016 case region: 1017 return "territory"; 1018 case legacy: 1019 return "language"; 1020 case redundant: 1021 return "language"; 1022 default: 1023 return toString(); 1024 } 1025 } 1026 1027 /** Create LstrType from string, allowing the compat string 'territory'. */ 1028 public static LstrType fromString(String rawType) { 1029 try { 1030 return valueOf(rawType); 1031 } catch (IllegalArgumentException e) { 1032 if ("territory".equals(rawType)) { 1033 return region; 1034 } 1035 throw e; 1036 } 1037 } 1038 } 1039 1040 public enum LstrField { 1041 Type, 1042 Subtag, 1043 Description, 1044 Added, 1045 Scope, 1046 Tag, 1047 Suppress_Script, 1048 Macrolanguage, 1049 Deprecated, 1050 Preferred_Value, 1051 Comments, 1052 Prefix, 1053 CLDR; 1054 1055 public static LstrField from(String s) { 1056 return LstrField.valueOf(s.trim().replace("-", "_")); 1057 } 1058 } 1059 1060 static Map<String, Map<String, Map<String, String>>> LSTREG; 1061 static Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG_ENUM; 1062 static Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG_RAW; 1063 1064 /** 1065 * Returns a map like {extlang={aao={Added=2009-07-29, Description=Algerian Saharan Arabic, ... 1066 * <br> 1067 * That is, type => subtype => map<tag,value>. Descriptions are concatenated together, separated 1068 * by DESCRIPTION_SEPARATOR. 1069 * 1070 * @return 1071 */ 1072 public static Map<String, Map<String, Map<String, String>>> getLStreg() { 1073 if (LSTREG == null) { 1074 initLstr(); 1075 } 1076 return LSTREG; 1077 } 1078 1079 /** 1080 * Returns a map like {extlang={aao={Added=2009-07-29, Description=Algerian Saharan Arabic, ... 1081 * <br> 1082 * That is, type => subtype => map<tag,value>. Descriptions are concatenated together, separated 1083 * by DESCRIPTION_SEPARATOR. 1084 * 1085 * @return 1086 */ 1087 public static Map<LstrType, Map<String, Map<LstrField, String>>> getEnumLstreg() { 1088 if (LSTREG_ENUM == null) { 1089 initLstr(); 1090 } 1091 return LSTREG_ENUM; 1092 } 1093 1094 public static Map<LstrType, Map<String, Map<LstrField, String>>> getLstregEnumRaw() { 1095 if (LSTREG_ENUM == null) { 1096 initLstr(); 1097 } 1098 return LSTREG_RAW; 1099 } 1100 1101 private static void initLstr() { 1102 Map<LstrType, Map<String, Map<LstrField, String>>> result2 = new TreeMap<>(); 1103 1104 int lineNumber = 1; 1105 1106 Set<String> funnyTags = new TreeSet<>(); 1107 String line; 1108 try { 1109 BufferedReader lstreg = CldrUtility.getUTF8Data(registryName); 1110 LstrType lastType = null; 1111 String lastTag = null; 1112 Map<String, Map<LstrField, String>> subtagData = null; 1113 Map<LstrField, String> currentData = null; 1114 LstrField lastLabel = null; 1115 String lastRest = null; 1116 boolean inRealContent = false; 1117 // Map<String, String> translitCache = new HashMap<String, String>(); 1118 for (; ; ++lineNumber) { 1119 line = lstreg.readLine(); 1120 if (line == null) break; 1121 if (line.length() == 0) continue; // skip blanks 1122 if (line.startsWith("File-Date: ")) { 1123 if (DEBUG) System.out.println("Language Subtag Registry: " + line); 1124 inRealContent = true; 1125 continue; 1126 } 1127 if (!inRealContent) { 1128 // skip until we get to real content 1129 continue; 1130 } 1131 // skip cruft 1132 if (line.startsWith("Internet-Draft")) { 1133 continue; 1134 } 1135 if (line.startsWith("Ewell")) { 1136 continue; 1137 } 1138 if (line.startsWith("\f")) { 1139 continue; 1140 } 1141 if (line.startsWith("4. Security Considerations")) { 1142 break; 1143 } 1144 1145 if (line.startsWith("%%")) 1146 continue; // skip separators (ok, since data starts with Type: 1147 if (line.startsWith(" ")) { 1148 currentData.put(lastLabel, lastRest + " " + line.trim()); 1149 continue; 1150 } 1151 1152 /* 1153 * Type: language Subtag: aa Description: Afar Added: 2005-10-16 1154 * Suppress-Script: Latn 1155 */ 1156 int pos2 = line.indexOf(':'); 1157 LstrField label = LstrField.from(line.substring(0, pos2)); 1158 String rest = line.substring(pos2 + 1).trim(); 1159 if (label == LstrField.Type) { 1160 lastType = 1161 rest.equals("grandfathered") 1162 ? LstrType.legacy 1163 : LstrType.fromString(rest); 1164 subtagData = CldrUtility.get(result2, lastType); 1165 if (subtagData == null) { 1166 result2.put(lastType, subtagData = new TreeMap<>()); 1167 } 1168 } else if (label == LstrField.Subtag || label == LstrField.Tag) { 1169 lastTag = rest; 1170 String endTag = null; 1171 // Subtag: qaa..qtz 1172 int pos = lastTag.indexOf(".."); 1173 if (pos >= 0) { 1174 endTag = lastTag.substring(pos + 2); 1175 lastTag = lastTag.substring(0, pos); 1176 } 1177 currentData = new TreeMap<>(); 1178 if (endTag == null) { 1179 putSubtagData(lastTag, subtagData, currentData); 1180 languageCount.add(lastType, 1); 1181 // System.out.println(languageCount.getCount(lastType) + "\t" + lastType + 1182 // "\t" + lastTag); 1183 } else { 1184 for (; lastTag.compareTo(endTag) <= 0; lastTag = nextAlpha(lastTag)) { 1185 // System.out.println(">" + current); 1186 putSubtagData(lastTag, subtagData, currentData); 1187 languageCount.add(lastType, 1); 1188 // System.out.println(languageCount.getCount(lastType) + "\t" + lastType 1189 // + "\t" + lastTag); 1190 } 1191 } 1192 // label.equalsIgnoreCase("Added") || label.equalsIgnoreCase("Suppress-Script")) 1193 // { 1194 // skip 1195 // } else if (pieces.length < 2) { 1196 // System.out.println("Odd Line: " + lastType + "\t" + lastTag + "\t" + line); 1197 } else { 1198 lastLabel = label; 1199 // The following code was removed because in the standard tests (TestAll) both 1200 // lastRest and rest were always equal. 1201 // if(!translitCache.containsKey(rest)) { 1202 // lastRest = 1203 // TransliteratorUtilities.fromXML.transliterate(rest); 1204 // translitCache.put(rest, lastRest); 1205 // if (!lastRest.equals(rest)) { 1206 // System.out.println(System.currentTimeMillis()+" 1207 // initLStr: LastRest: '"+lastRest+"' Rest: '"+rest+"'"); 1208 // } 1209 // } else { 1210 // lastRest = translitCache.get(rest); 1211 // } 1212 lastRest = rest; 1213 String oldValue = CldrUtility.get(currentData, lastLabel); 1214 if (oldValue != null) { 1215 lastRest = oldValue + DESCRIPTION_SEPARATOR + lastRest; 1216 } 1217 currentData.put(lastLabel, lastRest); 1218 } 1219 } 1220 } catch (Exception e) { 1221 throw (RuntimeException) 1222 new IllegalArgumentException( 1223 "Can't process file: data/" 1224 + registryName 1225 + ";\t at line " 1226 + lineNumber) 1227 .initCause(e); 1228 } finally { 1229 if (!funnyTags.isEmpty()) { 1230 if (DEBUG) System.out.println("Funny tags: " + funnyTags); 1231 } 1232 } 1233 // copy raw 1234 Map<LstrType, Map<String, Map<LstrField, String>>> rawLstreg = new TreeMap<>(); 1235 for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry1 : result2.entrySet()) { 1236 LstrType key1 = entry1.getKey(); 1237 TreeMap<String, Map<LstrField, String>> raw1 = new TreeMap<>(); rawLstreg.put(key1, raw1)1238 rawLstreg.put(key1, raw1); 1239 for (Entry<String, Map<LstrField, String>> entry2 : entry1.getValue().entrySet()) { 1240 String key2 = entry2.getKey(); 1241 final Map<LstrField, String> value2 = entry2.getValue(); 1242 TreeMap<LstrField, String> raw2 = new TreeMap<>(); 1243 raw2.putAll(value2); raw1.put(key2, raw2)1244 raw1.put(key2, raw2); 1245 } 1246 } 1247 LSTREG_RAW = CldrUtility.protectCollection(rawLstreg); 1248 1249 // add extras 1250 for (int i = 0; i < extras.length; ++i) { 1251 Map<String, Map<LstrField, String>> subtagData = 1252 CldrUtility.get(result2, LstrType.fromString(extras[i][0])); 1253 if (subtagData == null) { LstrType.fromString(extras[i][0])1254 result2.put(LstrType.fromString(extras[i][0]), subtagData = new TreeMap<>()); 1255 } 1256 Map<LstrField, String> labelData = new TreeMap<>(); 1257 for (int j = 2; j < extras[i].length; j += 2) { LstrField.from(extras[i][j])1258 labelData.put(LstrField.from(extras[i][j]), extras[i][j + 1]); 1259 } 1260 Map<LstrField, String> old = CldrUtility.get(subtagData, extras[i][1]); 1261 if (old != null) { 1262 if (!"Private use".equals(CldrUtility.get(old, LstrField.Description))) { 1263 throw new IllegalArgumentException( 1264 "REPLACING data for " 1265 + extras[i][1] 1266 + "\t" 1267 + old 1268 + "\twith" 1269 + labelData); 1270 } 1271 } 1272 if (false) { 1273 System.out.println( 1274 (old != null ? "REPLACING" + "\t" + old : "ADDING") 1275 + " data for " 1276 + extras[i][1] 1277 + "\twith" 1278 + labelData); 1279 } subtagData.put(extras[i][1], labelData)1280 subtagData.put(extras[i][1], labelData); 1281 } 1282 // build compatibility map 1283 Map<String, Map<String, Map<String, String>>> result = new LinkedHashMap<>(); 1284 for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry : result2.entrySet()) { 1285 Map<String, Map<String, String>> copy2 = new LinkedHashMap<>(); 1286 result.put(entry.getKey().toString(), copy2); 1287 for (Entry<String, Map<LstrField, String>> entry2 : entry.getValue().entrySet()) { 1288 Map<String, String> copy3 = new LinkedHashMap<>(); entry2.getKey()1289 copy2.put(entry2.getKey(), copy3); 1290 for (Entry<LstrField, String> entry3 : entry2.getValue().entrySet()) { entry3.getValue()1291 copy3.put(entry3.getKey().toString(), entry3.getValue()); 1292 } 1293 } 1294 } 1295 LSTREG = CldrUtility.protectCollection(result); 1296 LSTREG_ENUM = CldrUtility.protectCollection(result2); 1297 } 1298 1299 private static <K, K2, V> Map<K2, V> putSubtagData( 1300 K lastTag, Map<K, Map<K2, V>> subtagData, Map<K2, V> currentData) { 1301 Map<K2, V> oldData = subtagData.get(lastTag); 1302 if (oldData != null) { 1303 if (oldData.get("CLDR") != null) { 1304 System.out.println("overriding: " + lastTag + ", " + oldData); 1305 } else { 1306 throw new IllegalArgumentException("Duplicate tag: " + lastTag); 1307 } 1308 } 1309 return subtagData.put(lastTag, currentData); 1310 } 1311 1312 static Counter<LstrType> languageCount = new Counter<>(); 1313 1314 public static Counter<LstrType> getLanguageCount() { 1315 return languageCount; 1316 } 1317 1318 ZoneParser zoneParser = new ZoneParser(); 1319 1320 // static public final Set<String> MODERN_SCRIPTS = Collections 1321 // .unmodifiableSet(new TreeSet( 1322 // // "Bali " + 1323 // // "Bugi " + 1324 // // "Copt " + 1325 // // "Hano " + 1326 // // "Osma " + 1327 // // "Qaai " + 1328 // // "Sylo " + 1329 // // "Syrc " + 1330 // // "Tagb " + 1331 // // "Tglg " + 1332 // Arrays 1333 // .asList("Hans Hant Jpan Hrkt Kore Arab Armn Bali Beng Bopo Cans Cham Cher Cyrl Deva Ethi Geor 1334 // Grek Gujr Guru Hani Hang Hebr Hira Knda Kana Kali Khmr Laoo Latn Lepc Limb Mlym Mong Mymr 1335 // Talu Nkoo Olck Orya Saur Sinh Tale Taml Telu Thaa Thai Tibt Tfng Vaii Yiii" 1336 // .split("\\s+")))); 1337 1338 // updated to http://www.unicode.org/reports/tr31/tr31-9.html#Specific_Character_Adjustments 1339 1340 /** 1341 * @deprecated 1342 */ 1343 @Deprecated 1344 public Map<String, List<ZoneLine>> getZone_rules() { 1345 return zoneParser.getZone_rules(); 1346 } 1347 1348 /** 1349 * @deprecated 1350 */ 1351 @Deprecated 1352 public Map<String, List<String>> getZoneData() { 1353 return zoneParser.getZoneData(); 1354 } 1355 1356 /** 1357 * @deprecated 1358 */ 1359 @Deprecated 1360 public Set<String> getCanonicalTimeZones() { 1361 return zoneParser.getZoneData().keySet(); 1362 } 1363 1364 /** 1365 * @deprecated 1366 */ 1367 @Deprecated 1368 public Map<String, Set<String>> getCountryToZoneSet() { 1369 return zoneParser.getCountryToZoneSet(); 1370 } 1371 1372 /** 1373 * @deprecated 1374 */ 1375 @Deprecated 1376 public List<String> getDeprecatedZoneIDs() { 1377 return zoneParser.getDeprecatedZoneIDs(); 1378 } 1379 1380 /** 1381 * @deprecated 1382 */ 1383 @Deprecated 1384 public Comparator<String> getTZIDComparator() { 1385 return zoneParser.getTZIDComparator(); 1386 } 1387 1388 /** 1389 * @deprecated 1390 */ 1391 @Deprecated 1392 public Map<String, Set<String>> getZoneLinkNew_OldSet() { 1393 return zoneParser.getZoneLinkNew_OldSet(); 1394 } 1395 1396 /** 1397 * @deprecated 1398 */ 1399 @Deprecated 1400 public Map<String, String> getZoneLinkold_new() { 1401 return zoneParser.getZoneLinkold_new(); 1402 } 1403 1404 /** 1405 * @deprecated 1406 */ 1407 @Deprecated 1408 public Map getZoneRuleID_rules() { 1409 return zoneParser.getZoneRuleID_rules(); 1410 } 1411 1412 /** 1413 * @deprecated 1414 */ 1415 @Deprecated 1416 public Map<String, String> getZoneToCounty() { 1417 return zoneParser.getZoneToCounty(); 1418 } 1419 1420 /** 1421 * @deprecated 1422 */ 1423 @Deprecated 1424 public String getZoneVersion() { 1425 return zoneParser.getVersion(); 1426 } 1427 1428 public static String fixLanguageTag(String languageSubtag) { 1429 if (languageSubtag.equals("mo")) { // fix special cases 1430 return "ro"; 1431 } 1432 return languageSubtag; 1433 } 1434 1435 public boolean isModernLanguage(String languageCode) { 1436 if (getMoribundLanguages().contains(languageCode)) return false; 1437 Type type = Iso639Data.getType(languageCode); 1438 if (type == Type.Living) return true; 1439 if (languageCode.equals("eo")) return true; // exception for Esperanto 1440 // Scope scope = Iso639Data.getScope(languageCode); 1441 // if (scope == Scope.Collection) return false; 1442 return false; 1443 } 1444 1445 public static boolean isScriptModern(String script) { 1446 ScriptMetadata.Info info = ScriptMetadata.getInfo(script); 1447 if (info == null) { 1448 if (false) throw new IllegalArgumentException("No script metadata for: " + script); 1449 return false; 1450 } 1451 IdUsage idUsage = info.idUsage; 1452 return idUsage != IdUsage.EXCLUSION && idUsage != IdUsage.UNKNOWN; 1453 } 1454 1455 static final Pattern whitespace = PatternCache.get("\\s+"); 1456 static Set<String> filteredCurrencies = null; 1457 1458 public Set<String> getSurveyToolDisplayCodes(String type) { 1459 return getGoodAvailableCodes(type); 1460 } 1461 1462 static UnicodeSet COUNTRY = new UnicodeSet("[a-zA-Z]").freeze(); 1463 1464 /** 1465 * Quick check for whether valid country. Not complete: should use Validity 1466 * 1467 * @param territory 1468 * @return 1469 */ 1470 public static boolean isCountry(String territory) { 1471 switch (territory) { 1472 case "ZZ": 1473 case "QO": 1474 case "EU": 1475 case "UN": 1476 case "EZ": 1477 return false; 1478 default: 1479 return territory.length() == 2 && COUNTRY.containsAll(territory); 1480 } 1481 } 1482 1483 public boolean isLstregPrivateUse(String type, String code) { 1484 Map<String, String> lStregData = getLStreg().get(type).get(code); 1485 return lStregData.get("Description").equalsIgnoreCase("private use"); 1486 } 1487 1488 public boolean isLstregDeprecated(String type, String code) { 1489 Map<String, String> lStregData = getLStreg().get(type).get(code); 1490 return lStregData.get("Deprecated") != null; 1491 } 1492 1493 /** get prospective currencies. Only needed for a few tests */ 1494 public Set<String> getOncomingCurrencies() { 1495 Set<String> result = new HashSet<>(); 1496 for (Entry<String, List<String>> entry : getCodeData(CodeType.currency).entrySet()) { 1497 if (entry.getValue().get(3).equals("P")) { 1498 result.add(entry.getKey()); 1499 } 1500 } 1501 return result; 1502 } 1503 } 1504