1 package org.unicode.cldr.tool; 2 3 import com.ibm.icu.impl.Row.R2; 4 import com.ibm.icu.util.ULocale; 5 import java.util.Collections; 6 import java.util.HashMap; 7 import java.util.HashSet; 8 import java.util.LinkedHashMap; 9 import java.util.LinkedHashSet; 10 import java.util.List; 11 import java.util.Locale; 12 import java.util.Map; 13 import java.util.Map.Entry; 14 import java.util.Set; 15 import java.util.TreeMap; 16 import java.util.TreeSet; 17 import org.unicode.cldr.util.Builder; 18 import org.unicode.cldr.util.CLDRConfig; 19 import org.unicode.cldr.util.CldrUtility; 20 import org.unicode.cldr.util.LanguageTagParser; 21 import org.unicode.cldr.util.StandardCodes; 22 import org.unicode.cldr.util.StringIterables; 23 24 public class LanguageCodeConverter { 25 private static Map<String, String> languageNameToCode = new TreeMap<>(); 26 private static Set<String> exceptionCodes = new TreeSet<>(); 27 private static Set<String> parseErrors = new LinkedHashSet<>(); 28 29 private static Map<String, R2<List<String>, String>> languageAliases = 30 CLDRConfig.getInstance().getSupplementalDataInfo().getLocaleAliasInfo().get("language"); 31 32 /** 33 * Public only for testing. 34 * 35 * @internal 36 */ 37 public static final Map<String, String> GOOGLE_CLDR = 38 Builder.with(new LinkedHashMap<String, String>()) // preserve order 39 .put("iw", "he") 40 .put("jw", "jv") 41 // .put("nb", "no") 42 .put("tl", "fil") 43 .put("pt-BR", "pt") 44 .put("xx-bork", "x_bork") 45 .put("xx-elmer", "x_elmer") 46 .put("xx-hacker", "x_hacker") 47 .put("xx-pirate", "x_pirate") 48 .put("xx-klingon", "tlh") 49 .put("zh-CN", "zh") 50 .put("zh-TW", "zh_Hant") 51 .put("zh-HK", "zh_Hant_HK") 52 .put("sit-NP", "lif") 53 .put("ut", "und") 54 .put("un", "und") 55 .put("xx", "und") 56 57 // .put("sh", "fil") 58 .freeze(); 59 60 /** 61 * Public only for testing. 62 * 63 * @internal 64 */ 65 public static final Map<String, String> CLDR_GOOGLE = 66 Builder.with(new HashMap<String, String>()).putAllTransposed(GOOGLE_CLDR).freeze(); 67 68 /** 69 * Public only for testing. 70 * 71 * @internal 72 */ 73 public static final Map<String, String> EXTRA_SCRIPTS = 74 Builder.with(new HashMap<String, String>()) 75 .on("crs", "pcm", "tlh") 76 .put("Latn") 77 .freeze(); 78 79 static { 80 // Reads the CLDR copy of 81 // http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry 82 Map<String, Map<String, Map<String, String>>> lstreg = StandardCodes.getLStreg(); 83 Map<String, Map<String, String>> languages = lstreg.get("language"); 84 Set<String> validCodes = new HashSet<>(); 85 86 for (Entry<String, Map<String, String>> codeInfo : languages.entrySet()) { 87 String code = codeInfo.getKey(); 88 R2<List<String>, String> replacement = languageAliases.get(code); 89 // Returns "sh" -> <{"sr_Latn"}, reason> 90 if (replacement != null) { 91 List<String> replacements = replacement.get0(); 92 if (replacements.size() != 1) { 93 continue; 94 } 95 code = replacements.get(0); 96 if (code.contains("_")) { 97 continue; 98 } 99 } 100 // if (languageAliases.containsKey(code)) { 101 // continue; 102 // } 103 final Map<String, String> info = codeInfo.getValue(); 104 String deprecated = info.get("Deprecated"); 105 if (deprecated != null) { 106 continue; 107 } 108 String name = info.get("Description"); 109 if (name.equals("Private use")) { 110 continue; 111 } 112 validCodes.add(code); 113 if (name.contains(StandardCodes.DESCRIPTION_SEPARATOR)) { 114 for (String namePart : name.split(StandardCodes.DESCRIPTION_SEPARATOR)) { 115 addNameToCode("lstr", code, namePart); 116 } 117 } else { 118 addNameToCode("lstr", code, name); 119 } 120 } 121 122 // CLDRFile english; // = testInfo.getEnglish(); 123 for (String code : validCodes) { 124 String icuName = ULocale.getDisplayName(code, "en"); 125 addNameToCode("cldr", code, icuName); 126 // if (languageAliases.containsKey(code)) { 127 // continue; 128 // } 129 // String cldrName = english.getName("language", code); 130 // if (cldrName != null && !cldrName.equals("private-use")) { 131 // addNameToCode("cldr", code, cldrName); 132 // } 133 } 134 // add exceptions 135 LanguageTagParser ltp = new LanguageTagParser(); 136 for (String line : 137 StringIterables.in( 138 CldrUtility.getUTF8Data("external/alternate_language_names.txt"))) { 139 String[] parts = CldrUtility.cleanSemiFields(line); 140 if (parts == null || parts.length == 0) continue; 141 String code = parts[0]; 142 if (!validCodes.contains(code)) { 143 if (code.equals("*OMIT")) { 144 parseErrors.add("Skipping " + line); 145 continue; 146 } 147 String base = ltp.set(code).getLanguage(); 148 if (!validCodes.contains(base)) { 149 R2<List<String>, String> alias = languageAliases.get(base); 150 if (alias != null) { 151 code = alias.get0().get(0); 152 } else { 153 parseErrors.add("Skipping " + line); 154 continue; 155 } 156 } 157 } toUnderbarLocale(code)158 exceptionCodes.add(toUnderbarLocale(code)); 159 if (parts.length < 2) { 160 continue; 161 } 162 String name = parts[1]; 163 if (parts.length > 2) { 164 name += ";" + parts[2]; // HACK 165 } 166 addNameToCode("exception", code, name); 167 } 168 for (String cldr : GOOGLE_CLDR.values()) { 169 String goodCode = toUnderbarLocale(cldr); 170 exceptionCodes.add(goodCode); 171 } 172 languageNameToCode = Collections.unmodifiableMap(languageNameToCode); 173 exceptionCodes = Collections.unmodifiableSet(exceptionCodes); 174 parseErrors = Collections.unmodifiableSet(parseErrors); 175 } 176 addNameToCode(final String type, final String code, String name)177 private static void addNameToCode(final String type, final String code, String name) { 178 if (code.equals("mru") && name.equals("mru")) { 179 // mru=Mono (Cameroon) 180 // mro=Mru 181 // Ignore the CLDR mapping of the code to itself, 182 // to avoid clobbering the mapping of the real name Mru to the real code mro. 183 return; 184 } 185 name = name.toLowerCase(Locale.ENGLISH); 186 String oldCode = languageNameToCode.get(name); 187 if (oldCode != null) { 188 if (!oldCode.equals(code)) { 189 parseErrors.add( 190 "Name Collision! " 191 + type 192 + ": " 193 + name 194 + " <" 195 + oldCode 196 + ", " 197 + code 198 + ">"); 199 } else { 200 return; 201 } 202 } 203 languageNameToCode.put(name, code); 204 } 205 toGoogleLocaleId(String localeId)206 public static String toGoogleLocaleId(String localeId) { 207 // TODO fix to do languages, etc. field by field 208 localeId = localeId.replace("-", "_"); 209 String result = CLDR_GOOGLE.get(localeId); 210 result = result == null ? localeId : result; 211 return result.replace("_", "-"); 212 } 213 fromGoogleLocaleId(String localeId)214 public static String fromGoogleLocaleId(String localeId) { 215 localeId = localeId.replace("_", "-"); 216 // TODO fix to do languages, etc. field by field 217 String result = GOOGLE_CLDR.get(localeId); 218 result = result == null ? localeId : result; 219 return result.replace("-", "_"); 220 } 221 toUnderbarLocale(String localeId)222 public static String toUnderbarLocale(String localeId) { 223 return localeId.replace("-", "_"); 224 } 225 toHyphenLocale(String localeId)226 public static String toHyphenLocale(String localeId) { 227 return localeId.replace("_", "-"); 228 } 229 getCodeForName(String languageName)230 public static String getCodeForName(String languageName) { 231 return languageNameToCode.get(languageName.toLowerCase(Locale.ENGLISH)); 232 } 233 getExceptionCodes()234 public static Set<String> getExceptionCodes() { 235 return exceptionCodes; 236 } 237 getParseErrors()238 public static Set<String> getParseErrors() { 239 return parseErrors; 240 } 241 getLanguageNameToCode()242 public static Map<String, String> getLanguageNameToCode() { 243 return languageNameToCode; 244 } 245 } 246