1 package org.unicode.cldr.util; 2 3 import com.google.common.base.Joiner; 4 import com.ibm.icu.text.UnicodeSet; 5 import com.ibm.icu.util.ICUUncheckedIOException; 6 import java.io.BufferedReader; 7 import java.io.IOException; 8 import java.util.Arrays; 9 import java.util.Collections; 10 import java.util.HashMap; 11 import java.util.HashSet; 12 import java.util.LinkedHashSet; 13 import java.util.Map; 14 import java.util.Set; 15 import java.util.TreeSet; 16 17 public class IsoRegionData { 18 static Map<String, String> _numeric = new HashMap<>(); 19 static Map<String, String> _alpha3 = new HashMap<>(); 20 static Map<String, String> _fips10 = new HashMap<>(); 21 static Map<String, String> _internet = new HashMap<>(); 22 static Set<String> other_internet = new TreeSet<>(); 23 static Set<String> available = new HashSet<>(); 24 25 static final UnicodeSet NMTOKEN = 26 new UnicodeSet( 27 "[\\-.0-\\:A-Z_a-z\\u00B7\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u037D\\u037F-\\u1FFF\\u200C\\u200D\\u203F\\u2040\\u2070-\\u218F\\u2C00-\\u2FEF\\u3001-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFFD\\U00010000-\\U000EFFFF]") 28 .freeze(); 29 30 static { 31 /* 32 * # RFC3066; UN Numeric; ISO3166 Alpha-3, internet, FIPS-10 33 * # whitespace delimited: - for empty 34 * # See http://unstats.un.org/unsd/methods/m49/m49regin.htm 35 * # and http://www.iso.org/iso/en/prods-services/iso3166ma/01whats-new/index.html 36 * # See also http://www.cia.gov/cia/publications/factbook/appendix/appendix-d.html 37 * # and http://data.iana.org/TLD/tlds-alpha-by-domain.txt for the latest domains 38 * # and http://www.iana.org/cctld/cctld-whois.htm 39 * # and https://www.icmwg.org/ccwg/documents/ISO3166-FIPS10-A2-Mapping/3166-1-A2--to-FIPS10-A2-mapping.htm 40 * # for FIPS: http://earth-info.nga.mil/gns/html/fips_files.html 41 * RS 688 SRB rs RB 42 */ 43 try { 44 BufferedReader codes; 45 codes = CldrUtility.getUTF8Data("tlds-alpha-by-domain.txt"); 46 47 while (true) { 48 String line = codes.readLine(); 49 if (line == null) break; 50 line = line.split("#")[0].trim(); 51 if (line.length() == 0) continue; 52 // if (line.startsWith("XN--")) { 53 // try { 54 // line = Punycode.decode(line.substring(4), null).toString(); 55 // if (!NMTOKEN.containsAll(line)) { 56 // System.err.println("!NMTOKEN:" + line); 57 // continue; 58 // } 59 // } catch (StringPrepParseException e) { 60 // throw new IllegalArgumentException(e); 61 // } 62 // } 63 other_internet.add(line); 64 } codes.close()65 codes.close(); 66 67 Set<String> errors = new LinkedHashSet<>(); 68 codes = CldrUtility.getUTF8Data("territory_codes.txt"); 69 while (true) { 70 String line = codes.readLine(); 71 if (line == null) break; 72 line = line.split("#")[0].trim(); 73 if (line.length() == 0) continue; 74 String[] sourceValues = line.split("\\s+"); 75 String[] values = new String[5]; 76 for (int i = 0; i < values.length; ++i) { 77 if (i >= sourceValues.length || sourceValues[i].equals("-")) { 78 values[i] = null; 79 } else { 80 values[i] = sourceValues[i]; 81 } 82 } 83 String alpha2 = values[0]; 84 String numeric = values[1]; 85 String alpha3 = values[2]; 86 String internet = values[3]; 87 if (internet != null) { 88 internet = internet.toUpperCase(); 89 LinkedHashSet<String> internetStrings = 90 new LinkedHashSet<>(Arrays.asList(internet.split("/"))); 91 if (!other_internet.containsAll(internetStrings)) { 92 errors.addAll(internetStrings); 93 errors.removeAll(other_internet); 94 } 95 other_internet.removeAll(internetStrings); 96 internet = Joiner.on(" ").join(internetStrings); 97 } 98 String fips10 = values[4]; _numeric.put(alpha2, numeric)99 _numeric.put(alpha2, numeric); _alpha3.put(alpha2, alpha3)100 _alpha3.put(alpha2, alpha3); _fips10.put(alpha2, fips10)101 _fips10.put(alpha2, fips10); _internet.put(alpha2, internet)102 _internet.put(alpha2, internet); 103 } codes.close()104 codes.close(); 105 if (errors.size() != 0) { 106 throw new IllegalArgumentException("Internet values illegal: " + errors); 107 } 108 } catch (IOException e) { 109 throw new ICUUncheckedIOException(e); 110 } 111 _internet.put("ZZ", Joiner.on(" ").join(other_internet)); 112 113 other_internet = Collections.unmodifiableSet(other_internet); 114 _numeric.keySet()115 available.addAll(_numeric.keySet()); _alpha3.keySet()116 available.addAll(_alpha3.keySet()); _fips10.keySet()117 available.addAll(_fips10.keySet()); _internet.keySet()118 available.addAll(_internet.keySet()); 119 120 _numeric = Collections.unmodifiableMap(_numeric); 121 _alpha3 = Collections.unmodifiableMap(_alpha3); 122 _fips10 = Collections.unmodifiableMap(_fips10); 123 _internet = Collections.unmodifiableMap(_internet); 124 available = Collections.unmodifiableSet(available); 125 } 126 getNumeric(String countryCodeAlpha2)127 public static String getNumeric(String countryCodeAlpha2) { 128 return _numeric.get(countryCodeAlpha2); 129 } 130 get_alpha3(String countryCodeAlpha2)131 public static String get_alpha3(String countryCodeAlpha2) { 132 return _alpha3.get(countryCodeAlpha2); 133 } 134 get_fips10(String countryCodeAlpha2)135 public static String get_fips10(String countryCodeAlpha2) { 136 return _fips10.get(countryCodeAlpha2); 137 } 138 get_internet(String countryCodeAlpha2)139 public static String get_internet(String countryCodeAlpha2) { 140 return _internet.get(countryCodeAlpha2); 141 } 142 getOtherInternet()143 public static Set<String> getOtherInternet() { 144 return other_internet; 145 } 146 getAvailable()147 public static Set<String> getAvailable() { 148 return available; 149 } 150 } 151