1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.util; 10 11 import com.google.common.collect.ImmutableList; 12 import com.ibm.icu.impl.Utility; 13 import com.ibm.icu.text.UnicodeSet; 14 import java.util.ArrayList; 15 import java.util.Collection; 16 import java.util.EnumSet; 17 import java.util.Iterator; 18 import java.util.List; 19 import java.util.Set; 20 import java.util.TreeSet; 21 import org.unicode.cldr.util.SupplementalDataInfo.ParentLocaleComponent; 22 23 public class LocaleIDParser { 24 /** 25 * @return Returns the language. 26 */ getLanguage()27 public String getLanguage() { 28 return language; 29 } 30 31 /** 32 * @return Returns the language. 33 */ getLanguageScript()34 public String getLanguageScript() { 35 if (script.length() != 0) return language + "_" + script; 36 return language; 37 } 38 getLanguageScript(Collection<String> in)39 public static Set<String> getLanguageScript(Collection<String> in) { 40 return getLanguageScript(in, null); 41 } 42 getLanguageScript(Collection<String> in, Set<String> output)43 public static Set<String> getLanguageScript(Collection<String> in, Set<String> output) { 44 if (output == null) output = new TreeSet<>(); 45 LocaleIDParser lparser = new LocaleIDParser(); 46 for (Iterator<String> it = in.iterator(); it.hasNext(); ) { 47 output.add(lparser.set(it.next()).getLanguageScript()); 48 } 49 return output; 50 } 51 52 /** 53 * @return Returns the region. 54 */ getRegion()55 public String getRegion() { 56 return region; 57 } 58 59 /** 60 * @return Returns the script. 61 */ getScript()62 public String getScript() { 63 return script; 64 } 65 66 /** 67 * @return Returns the variants. 68 */ getVariants()69 public String[] getVariants() { 70 return variants.clone(); 71 } 72 73 // TODO, update to RFC3066 74 // http://www.inter-locale.com/ID/draft-phillips-langtags-08.html 75 private String language; 76 private String script; 77 private String region; 78 private String[] variants; 79 80 static final UnicodeSet letters = new UnicodeSet("[a-zA-Z]"); 81 static final UnicodeSet digits = new UnicodeSet("[0-9]"); 82 set(String localeID)83 public LocaleIDParser set(String localeID) { 84 region = script = ""; 85 variants = new String[0]; 86 87 String[] pieces = new String[100]; // fix limitation later 88 Utility.split(localeID, '_', pieces); 89 int i = 0; 90 language = pieces[i++]; 91 if (i >= pieces.length) return this; 92 if (pieces[i].length() == 4) { 93 script = pieces[i++]; 94 if (i >= pieces.length) return this; 95 } 96 if (pieces[i].length() == 2 && letters.containsAll(pieces[i]) 97 || pieces[i].length() == 3 && digits.containsAll(pieces[i])) { 98 region = pieces[i++]; 99 if (i >= pieces.length) return this; 100 } 101 List<String> al = new ArrayList<>(); 102 while (i < pieces.length && pieces[i].length() > 0) { 103 al.add(pieces[i++]); 104 } 105 variants = new String[al.size()]; 106 al.toArray(variants); 107 return this; 108 } 109 110 /** 111 * Get the parent of a locale. If the input is "root", then return null. For example, if 112 * localeName is "fr_CA", return "fr". 113 * 114 * <p>Only works on canonical locale names (right casing, etc.)! 115 * 116 * <p>Formerly this function returned an empty string when localeName was "_VETTING". Now it 117 * returns "root" where it would have returned an empty string. TODO: explain "__VETTING", 118 * somehow related to SUMMARY_LOCALE. Note that CLDRLocale.process() changes "__" to "_" before 119 * this function is called. Reference: https://unicode-org.atlassian.net/browse/CLDR-13133 120 */ getParent(String localeId)121 public static final String getParent(String localeId) { 122 return getParent(localeId, ParentLocaleComponent.main); 123 } 124 125 /** 126 * Get the parent of a locale. If the input is "root", then return null. For example, if 127 * localeId is "fr_CA", return "fr". There is a different inheritance chain for certain 128 * supplemental data elements. 129 * 130 * @param localeId Only works on canonical locale names (right casing, etc.)! 131 * @param component picks the component that indicates the inheritance chain. Is either the 132 * standard ('main') used for all ldml-dtd items, or is one of the particular elements in 133 * supplemental data that has a different inheritance, such as collations or plurals 134 */ getParent(String localeId, ParentLocaleComponent component)135 public static String getParent(String localeId, ParentLocaleComponent component) { 136 SupplementalDataInfo sdi = SupplementalDataInfo.getInstance(); 137 String explicitParent = sdi.getExplicitParentLocale(localeId, component); 138 if (explicitParent != null) { 139 return explicitParent; 140 } 141 int pos = localeId.lastIndexOf('_'); 142 if (pos >= 0) { 143 String truncated = localeId.substring(0, pos); 144 // if the final item is a script, and it is not the default content, then go directly to 145 // root 146 int pos2 = getScriptPosition(localeId); 147 boolean skipNonLikely = sdi.parentLocalesSkipNonLikely(component); 148 if (pos2 > 0 && skipNonLikely) { 149 String script = localeId.substring(pos + 1); 150 String defaultScript = sdi.getDefaultScript(truncated); 151 if (!script.equals(defaultScript)) { 152 return "root"; 153 } 154 } 155 if (truncated.length() == 0) { 156 return "root"; 157 } 158 return truncated; 159 } 160 if (localeId.equals("root")) { 161 return null; 162 } 163 return "root"; 164 } 165 166 /** 167 * Return the base language subtag: en_US => en, en_Latn_US => en, en => en, root => root 168 * 169 * @param localeID 170 * @return 171 */ getSimpleBaseLanguage(String localeID)172 public static String getSimpleBaseLanguage(String localeID) { 173 int pos = localeID.indexOf('_'); 174 if (pos >= 0) { 175 return localeID.substring(0, pos); 176 } 177 return localeID; 178 } 179 180 /** 181 * If the locale consists of baseLanguage+script, return the position of the separator, 182 * otherwise -1. 183 * 184 * @param s 185 */ getScriptPosition(String locale)186 public static int getScriptPosition(String locale) { 187 int pos = locale.indexOf('_'); 188 if (pos >= 0 && pos + 5 == locale.length()) { 189 int pos2 = locale.indexOf('_', pos + 1); 190 if (pos2 < 0) { 191 return pos; 192 } 193 } 194 return -1; 195 } 196 197 /** 198 * Utility to get the simple parent of a locale. If the input is "root", then the output is 199 * null. This method is similar to the getParent() method above, except that it does NOT pay any 200 * attention to the explicit parent locales information. Thus, getParent("zh_Hant") will return 201 * "root", but getSimpleParent("zh_Hant") would return "zh". 202 */ getSimpleParent(String localeName)203 public static String getSimpleParent(String localeName) { 204 int pos = localeName.lastIndexOf('_'); 205 if (pos >= 0) { 206 return localeName.substring(0, pos); 207 } 208 if (localeName.equals("root") || localeName.equals(CLDRFile.SUPPLEMENTAL_NAME)) return null; 209 return "root"; 210 } 211 setLanguage(String language)212 public LocaleIDParser setLanguage(String language) { 213 this.language = language; 214 return this; 215 } 216 setRegion(String region)217 public LocaleIDParser setRegion(String region) { 218 this.region = region; 219 return this; 220 } 221 setScript(String script)222 public LocaleIDParser setScript(String script) { 223 this.script = script; 224 return this; 225 } 226 setVariants(String[] variants)227 public LocaleIDParser setVariants(String[] variants) { 228 this.variants = variants.clone(); 229 return this; 230 } 231 232 public enum Level { 233 Language, 234 Script, 235 Region, 236 Variants, 237 Other 238 } 239 240 /** 241 * Returns an int mask indicating the level 242 * 243 * @return (2 if script is present) + (4 if region is present) + (8 if region is present) 244 */ getLevels()245 public Set<Level> getLevels() { 246 EnumSet<Level> result = EnumSet.of(Level.Language); 247 if (getScript().length() != 0) result.add(Level.Script); 248 if (getRegion().length() != 0) result.add(Level.Region); 249 if (getVariants().length != 0) result.add(Level.Variants); 250 return result; 251 } 252 getSiblings(Set<String> set)253 public Set<String> getSiblings(Set<String> set) { 254 Set<Level> myLevel = getLevels(); 255 String localeID = toString(); 256 String parentID = getParent(localeID); 257 258 String prefix = (parentID == null || "root".equals(parentID)) ? "" : parentID + "_"; 259 Set<String> siblings = new TreeSet<>(); 260 for (String id : set) { 261 if (id.startsWith(prefix) && set(id).getLevels().equals(myLevel)) { 262 siblings.add(id); 263 } 264 } 265 set(localeID); // leave in known state 266 return siblings; 267 } 268 269 @Override toString()270 public String toString() { 271 StringBuffer result = new StringBuffer(language); 272 if (script.length() != 0) result.append('_').append(script); 273 if (region.length() != 0) result.append('_').append(region); 274 if (variants != null) { 275 for (int i = 0; i < variants.length; ++i) { 276 result.append('_').append(variants[i]); 277 } 278 } 279 return result.toString(); 280 } 281 282 public static final ImmutableList<String> FALLBACK_CHAIN = ImmutableList.of(); 283 public static final ImmutableList<String> ROOT_PARENT_CHAIN = 284 ImmutableList.of(XMLSource.ROOT_ID); 285 286 /** 287 * Return localeIds getParent chain. Return null if there is none (localeID == root or 288 * code-fallback). Note: an L1 locale will have exactly 1 element, and be identical to 289 * ROOT_PARENT_CHAIN. TODO optimize by caching the chains Returns a 290 */ getParentChain(String localeID)291 public static List<String> getParentChain(String localeID) { 292 if (XMLSource.ROOT_ID.equals(localeID)) { 293 return FALLBACK_CHAIN; 294 } 295 List<String> result = null; 296 while (true) { 297 String parent = getParent(localeID); 298 if (parent.equals(XMLSource.ROOT_ID)) { 299 if (result == null) { 300 return ROOT_PARENT_CHAIN; 301 } else { 302 result.addAll(ROOT_PARENT_CHAIN); 303 return ImmutableList.copyOf(result); 304 } 305 } 306 if (result == null) { 307 result = new ArrayList<>(); 308 } 309 result.add(parent); 310 localeID = parent; 311 } 312 } 313 isL1(String localeId)314 public static boolean isL1(String localeId) { 315 return XMLSource.ROOT_ID.equals(getParent(localeId)); 316 } 317 } 318