1 package org.unicode.cldr.util; 2 3 import com.google.common.base.Joiner; 4 import com.ibm.icu.util.ULocale; 5 import java.util.Collections; 6 import java.util.Comparator; 7 import java.util.LinkedHashMap; 8 import java.util.Map; 9 import java.util.Set; 10 import java.util.TreeMap; 11 import org.unicode.cldr.util.ChainedMap.M3; 12 13 public enum LanguageGroup { 14 root("und"), 15 germanic("gem"), 16 celtic("cel"), 17 romance("roa"), 18 slavic("sla"), 19 baltic("bat"), 20 indic("inc"), 21 iranian("ira"), 22 other_indo("ine_001"), 23 caucasian("cau"), 24 dravidian("dra"), 25 uralic("urj"), 26 cjk("und_Hani"), 27 sino_tibetan("sit"), 28 tai("tai"), 29 austronesian("map"), 30 turkic("trk"), 31 afroasiatic("afa"), 32 austroasiatic("aav"), 33 niger_congo("nic"), 34 east_sudanic("sdv"), 35 songhay("son"), 36 american("und_019"), 37 art("art"), 38 other("und_001"); 39 40 public final String iso; 41 LanguageGroup(String iso)42 LanguageGroup(String iso) { 43 this.iso = iso; 44 } 45 46 static final Map<ULocale, LanguageGroup> LANGUAGE_GROUP; 47 static final M3<LanguageGroup, ULocale, Integer> GROUP_LANGUAGE = 48 ChainedMap.of( 49 new TreeMap<LanguageGroup, Object>(), 50 new LinkedHashMap<ULocale, Object>(), 51 Integer.class); 52 add( Map<ULocale, LanguageGroup> map, LanguageGroup group, String... baseLanguages)53 private static void add( 54 Map<ULocale, LanguageGroup> map, LanguageGroup group, String... baseLanguages) { 55 Map<ULocale, Integer> soFar = GROUP_LANGUAGE.get(group); 56 int count = soFar == null ? 0 : soFar.size(); 57 for (String s : baseLanguages) { 58 ULocale loc = new ULocale(s); 59 if (map.put(loc, group) != null) { 60 throw new IllegalArgumentException("duplicate: " + s + ", " + group); 61 } 62 GROUP_LANGUAGE.put(group, loc, count); 63 ++count; 64 } 65 } 66 67 static { 68 LinkedHashMap<ULocale, LanguageGroup> temp = new LinkedHashMap<>(); 69 LANGUAGE_GROUP = Collections.unmodifiableMap(temp); add(temp, root, "root")70 add(temp, root, "root"); add( temp, germanic, "en", "fy", "nl", "af", "de", "gsw", "wae", "ksh", "lb", "sv", "da", "no", "nb", "nn", "fo", "is", "yi", "nds")71 add( 72 temp, germanic, "en", "fy", "nl", "af", "de", "gsw", "wae", "ksh", "lb", "sv", "da", 73 "no", "nb", "nn", "fo", "is", "yi", "nds"); add(temp, celtic, "ga", "gd", "cy", "gv", "kw", "br")74 add(temp, celtic, "ga", "gd", "cy", "gv", "kw", "br"); add( temp, romance, "fr", "pt", "gl", "es", "ca", "ast", "it", "rm", "ro", "fur", "an", "co", "oc", "sc", "scn", "wa")75 add( 76 temp, romance, "fr", "pt", "gl", "es", "ca", "ast", "it", "rm", "ro", "fur", "an", 77 "co", "oc", "sc", "scn", "wa"); add( temp, slavic, "pl", "cs", "sk", "sl", "hr", "bs", "mk", "sr", "bg", "ru", "be", "uk", "dsb", "hsb", "cu", "szl")78 add( 79 temp, slavic, "pl", "cs", "sk", "sl", "hr", "bs", "mk", "sr", "bg", "ru", "be", 80 "uk", "dsb", "hsb", "cu", "szl"); add(temp, baltic, "lt", "lv", "prg")81 add(temp, baltic, "lt", "lv", "prg"); add( temp, indic, "ur", "hi", "gu", "sd", "bn", "as", "ccp", "or", "mr", "ne", "pa", "si", "kok", "ks", "mai", "doi", "dv", "sa", "trw")82 add( 83 temp, indic, "ur", "hi", "gu", "sd", "bn", "as", "ccp", "or", "mr", "ne", "pa", 84 "si", "kok", "ks", "mai", "doi", "dv", "sa", "trw"); add(temp, iranian, "fa", "ps", "ku", "os", "ckb", "lrc", "mzn", "tg", "bgn", "sdh")85 add(temp, iranian, "fa", "ps", "ku", "os", "ckb", "lrc", "mzn", "tg", "bgn", "sdh"); add(temp, other_indo, "el", "hy", "sq")86 add(temp, other_indo, "el", "hy", "sq"); add(temp, dravidian, "ta", "te", "ml", "kn")87 add(temp, dravidian, "ta", "te", "ml", "kn"); add(temp, cjk, "zh", "yue", "ja", "ko")88 add(temp, cjk, "zh", "yue", "ja", "ko"); add(temp, turkic, "tr", "az", "tk", "kk", "ky", "uz", "ug", "sah", "tt", "ba", "cv")89 add(temp, turkic, "tr", "az", "tk", "kk", "ky", "uz", "ug", "sah", "tt", "ba", "cv"); add(temp, uralic, "hu", "fi", "et", "se", "smn", "myv", "sma", "smj", "sms")90 add(temp, uralic, "hu", "fi", "et", "se", "smn", "myv", "sma", "smj", "sms"); add( temp, afroasiatic, "ar", "mt", "he", "om", "so", "ha", "am", "tzm", "zgh", "aa", "kab", "shi", "ssy", "ti", "byn", "gez", "sid", "syr", "tig", "wal")91 add( 92 temp, 93 afroasiatic, 94 "ar", 95 "mt", 96 "he", 97 "om", 98 "so", 99 "ha", 100 "am", 101 "tzm", 102 "zgh", 103 "aa", 104 "kab", 105 "shi", 106 "ssy", 107 "ti", 108 "byn", 109 "gez", 110 "sid", 111 "syr", 112 "tig", 113 "wal"); add(temp, tai, "th", "lo", "blt")114 add(temp, tai, "th", "lo", "blt"); add( temp, austronesian, "id", "ms", "jv", "fil", "haw", "mg", "to", "ceb", "mi", "su", "trv")115 add( 116 temp, 117 austronesian, 118 "id", 119 "ms", 120 "jv", 121 "fil", 122 "haw", 123 "mg", 124 "to", 125 "ceb", 126 "mi", 127 "su", 128 "trv"); add(temp, austroasiatic, "vi", "km", "sat")129 add(temp, austroasiatic, "vi", "km", "sat"); add( temp, niger_congo, "sw", "swc", "yo", "ig", "ff", "sn", "zu", "wo", "xh", "agq", "ak", "asa", "bas", "bem", "bez", "bm", "cgg", "dua", "dyo", "ebu", "ee", "ewo", "guz", "jgo", "kam", "ki", "kkj", "ksb", "ksf", "lag", "lg", "ln", "lu", "luy", "mua", "nd", "nnh", "nr", "nyn", "rn", "rof", "rw", "sbp", "sg", "ss", "tn", "ts", "vai", "ve", "dav", "jmc", "kde", "mer", "mgh", "mgo", "nmg", "nso", "rwk", "seh", "vun", "xog", "yav", "bss", "cch", "gaa", "kaj", "kcg", "ken", "kpe", "nqo", "ny", "st")130 add( 131 temp, 132 niger_congo, 133 "sw", 134 "swc", 135 "yo", 136 "ig", 137 "ff", 138 "sn", 139 "zu", 140 "wo", 141 "xh", 142 "agq", 143 "ak", 144 "asa", 145 "bas", 146 "bem", 147 "bez", 148 "bm", 149 "cgg", 150 "dua", 151 "dyo", 152 "ebu", 153 "ee", 154 "ewo", 155 "guz", 156 "jgo", 157 "kam", 158 "ki", 159 "kkj", 160 "ksb", 161 "ksf", 162 "lag", 163 "lg", 164 "ln", 165 "lu", 166 "luy", 167 "mua", 168 "nd", 169 "nnh", 170 "nr", 171 "nyn", 172 "rn", 173 "rof", 174 "rw", 175 "sbp", 176 "sg", 177 "ss", 178 "tn", 179 "ts", 180 "vai", 181 "ve", 182 "dav", 183 "jmc", 184 "kde", 185 "mer", 186 "mgh", 187 "mgo", 188 "nmg", 189 "nso", 190 "rwk", 191 "seh", 192 "vun", 193 "xog", 194 "yav", 195 "bss", 196 "cch", 197 "gaa", 198 "kaj", 199 "kcg", 200 "ken", 201 "kpe", 202 "nqo", 203 "ny", 204 "st"); add( temp, american, "chr", "kl", "lkt", "qu", "arn", "cad", "cic", "gn", "iu", "moh", "mus", "nv", "osa", "quc", "nci")205 add( 206 temp, american, "chr", "kl", "lkt", "qu", "arn", "cad", "cic", "gn", "iu", "moh", 207 "mus", "nv", "osa", "quc", "nci"); add(temp, east_sudanic, "luo", "mas", "nus", "saq", "teo", "kln")208 add(temp, east_sudanic, "luo", "mas", "nus", "saq", "teo", "kln"); add(temp, sino_tibetan, "my", "bo", "brx", "dz", "ii", "mni")209 add(temp, sino_tibetan, "my", "bo", "brx", "dz", "ii", "mni"); add(temp, songhay, "dje", "khq", "ses", "twq")210 add(temp, songhay, "dje", "khq", "ses", "twq"); add(temp, caucasian, "ka", "ce")211 add(temp, caucasian, "ka", "ce"); add(temp, other, "eu", "mn", "naq", "pcm", "kea", "mfe", "wbp")212 add(temp, other, "eu", "mn", "naq", "pcm", "kea", "mfe", "wbp"); add(temp, art, "eo", "vo", "ia", "io", "jbo")213 add(temp, art, "eo", "vo", "ia", "io", "jbo"); 214 // GROUP_LANGUAGE.freeze(); 215 } 216 get(ULocale locale)217 public static LanguageGroup get(ULocale locale) { 218 return CldrUtility.ifNull( 219 LANGUAGE_GROUP.get(new ULocale(locale.getLanguage())), LanguageGroup.other); 220 } 221 getExplicit()222 public static Set<ULocale> getExplicit() { 223 return Collections.unmodifiableSet(LANGUAGE_GROUP.keySet()); 224 } 225 getLocales(LanguageGroup group)226 public static Set<ULocale> getLocales(LanguageGroup group) { 227 return Collections.unmodifiableSet(GROUP_LANGUAGE.get(group).keySet()); 228 } 229 230 /** 231 * return position in group, or -1 if in no group 232 * 233 * @param locale 234 * @return 235 */ rankInGroup(ULocale locale)236 public static int rankInGroup(ULocale locale) { 237 locale = new ULocale(locale.getLanguage()); 238 LanguageGroup group = LANGUAGE_GROUP.get(locale); 239 if (group == null) { 240 return Integer.MAX_VALUE; 241 } 242 return GROUP_LANGUAGE.get(group).get(locale); 243 } 244 245 public static Comparator<ULocale> COMPARATOR = 246 new Comparator<ULocale>() { 247 @Override 248 public int compare(ULocale o1, ULocale o2) { 249 LanguageGroup group1 = get(o1); 250 LanguageGroup group2 = get(o2); 251 int diff = group1.ordinal() - group2.ordinal(); 252 if (diff != 0) return diff; 253 int r1 = rankInGroup(o1); 254 int r2 = rankInGroup(o2); 255 diff = r1 - r2; 256 return diff != 0 ? diff : o1.compareTo(o2); 257 } 258 }; 259 main(String[] args)260 public static void main(String[] args) { 261 CLDRFile english = CLDRConfig.getInstance().getEnglish(); 262 System.out.print( 263 "<supplementalData>\n" 264 + "\t<version number=\"$Revision:$\"/>\n" 265 + "\t<languageGroups>\n"); 266 for (LanguageGroup languageGroup : LanguageGroup.values()) { 267 Set<ULocale> locales = LanguageGroup.getLocales(languageGroup); 268 String englishName = languageGroup.getName(english); 269 System.out.print( 270 "\t\t<languageGroup id=\"" 271 + languageGroup.iso 272 + "\" code=\"" 273 + Joiner.on(", ").join(locales) 274 + "\"/>\t<!-- " 275 + englishName 276 + " -->\n"); 277 } 278 System.out.print("\t</languageGroups>" + "\n<supplementalData>\n"); 279 } 280 getName(CLDRFile cldrFile)281 public String getName(CLDRFile cldrFile) { 282 String prefix = ""; 283 LanguageTagParser ltp = new LanguageTagParser().set(iso); 284 switch (ltp.getRegion()) { 285 case "001": 286 if (ltp.getLanguage().equals("und")) { 287 return "Other"; 288 } 289 prefix = "Other "; 290 break; 291 case "": 292 break; 293 default: 294 return cldrFile.getName(CLDRFile.TERRITORY_NAME, ltp.getRegion()); 295 } 296 switch (ltp.getScript()) { 297 case "Hani": 298 return "CJK"; 299 case "": 300 break; 301 default: 302 throw new IllegalArgumentException("Need to fix code: " + ltp.getScript()); 303 } 304 return prefix 305 + cldrFile.getName(ltp.getLanguage()) 306 .replace(" [Other]", "") 307 .replace(" languages", ""); 308 } 309 310 @Override toString()311 public String toString() { 312 return getName(CLDRConfig.getInstance().getEnglish()); 313 } 314 } 315