1 package org.unicode.cldr.util; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.collect.HashMultimap; 5 import com.google.common.collect.ImmutableMap; 6 import com.google.common.collect.ImmutableMultimap; 7 import com.google.common.collect.Multimap; 8 import com.ibm.icu.impl.Row; 9 import com.ibm.icu.impl.Row.R2; 10 import com.ibm.icu.lang.UCharacter; 11 import java.util.Collection; 12 import java.util.List; 13 import java.util.Map; 14 import java.util.Map.Entry; 15 import java.util.Set; 16 import java.util.function.Function; 17 import java.util.regex.Pattern; 18 import java.util.stream.Collectors; 19 import org.unicode.cldr.util.StandardCodes.LstrField; 20 import org.unicode.cldr.util.StandardCodes.LstrType; 21 22 public class LocaleValidator { 23 static final SupplementalDataInfo SDI = SupplementalDataInfo.getInstance(); 24 25 static final Validity VALIDITY = Validity.getInstance(); 26 static final Set<LstrType> FIELD_ALLOWS_EMPTY = Set.of(LstrType.script, LstrType.region); 27 // Map<LstrType, Map<String, Map<LstrField, String>>> 28 static final Map<String, Validity.Status> VALID_VARIANTS = 29 ImmutableMap.copyOf( 30 StandardCodes.getEnumLstreg().get(LstrType.variant).entrySet().stream() 31 .collect( 32 Collectors.toMap( 33 x -> x.getKey(), 34 y -> 35 y.getValue().get(LstrField.Deprecated) == null 36 ? Validity.Status.regular 37 : Validity.Status.deprecated))); 38 39 private static final Map<String, Validity.Status> KR_REORDER = 40 SupplementalDataInfo.getInstance().getBcp47Keys().get("kr").stream() 41 .filter(x -> !x.equals("REORDER_CODE")) 42 .collect( 43 Collectors.toMap( 44 Function.identity(), 45 y -> { 46 String temp = 47 SupplementalDataInfo.getInstance() 48 .getBcp47Deprecated() 49 .get(Row.of("kr", y)); 50 return "false".equals(temp) 51 ? Validity.Status.regular 52 : Validity.Status.deprecated; 53 })); 54 private static final Map<String, Validity.Status> LOWERCASE_SCRIPT = 55 VALIDITY.getCodeToStatus(LstrType.script).entrySet().stream() 56 .collect( 57 Collectors.toMap( 58 x -> UCharacter.toLowerCase(x.getKey()), x -> x.getValue())); 59 60 private static final Map<String, Validity.Status> LOWERCASE_REGION = 61 VALIDITY.getCodeToStatus(LstrType.script).entrySet().stream() 62 .collect( 63 Collectors.toMap( 64 x -> UCharacter.toLowerCase(x.getKey()), x -> x.getValue())); 65 66 public static class AllowedMatch { 67 final Pattern key; 68 final Pattern value; 69 final Validity.Status status; 70 AllowedMatch(String code)71 public AllowedMatch(String code) { 72 this(code, null, null); 73 } 74 AllowedMatch(String code, String value)75 public AllowedMatch(String code, String value) { 76 this(code, value, null); 77 } 78 AllowedMatch(String code, String value, Validity.Status status)79 public AllowedMatch(String code, String value, Validity.Status status) { 80 this.key = code == null ? null : Pattern.compile(code); 81 this.value = value == null ? null : Pattern.compile(value); 82 this.status = status; 83 } 84 matches(String key0, String value0, Validity.Status status)85 public boolean matches(String key0, String value0, Validity.Status status) { 86 return (key == null || key.matcher(key0).matches()) 87 && (value == null 88 || value.matcher(value0).matches() 89 && (status == null || status == status)); 90 } 91 92 @Override toString()93 public String toString() { 94 return key + "→" + value; 95 } 96 } 97 98 public static class AllowedValid { 99 100 private final Set<Validity.Status> allowedStatus; // allowed without exception 101 private final Multimap<LstrType, AllowedMatch> allowedExceptions; 102 isAllowed(Validity.Status status)103 boolean isAllowed(Validity.Status status) { 104 return allowedStatus.contains(status); 105 } 106 107 /** Only called if isAllowed is not true */ isAllowed(LstrType lstrType, String key, String value, Validity.Status status)108 boolean isAllowed(LstrType lstrType, String key, String value, Validity.Status status) { 109 Collection<AllowedMatch> allowedMatches = allowedExceptions.get(lstrType); 110 if (allowedMatches == null) { 111 return false; 112 } 113 for (AllowedMatch allowedMatch : allowedMatches) { 114 if (allowedMatch.matches(key, value, status)) return true; 115 } 116 return false; 117 } 118 AllowedValid(Set<Validity.Status> allowedStatus, Object... allowedExceptions)119 public AllowedValid(Set<Validity.Status> allowedStatus, Object... allowedExceptions) { 120 this.allowedStatus = 121 allowedStatus == null 122 ? Set.of(Validity.Status.regular) 123 : Set.copyOf(allowedStatus); 124 Multimap<LstrType, AllowedMatch> allowed = HashMultimap.create(); 125 if (allowedExceptions != null) { 126 for (int i = 0; i < allowedExceptions.length; i += 2) { 127 allowed.put( 128 (LstrType) allowedExceptions[i], 129 (AllowedMatch) allowedExceptions[i + 1]); 130 } 131 } 132 this.allowedExceptions = ImmutableMultimap.copyOf(allowed); 133 } 134 135 @Override toString()136 public String toString() { 137 return allowedStatus + " " + allowedExceptions; 138 } 139 } 140 141 /** 142 * @return true iff the component validates 143 */ isValid( LanguageTagParser ltp, LocaleValidator.AllowedValid allowed, Set<String> errors)144 public static boolean isValid( 145 LanguageTagParser ltp, LocaleValidator.AllowedValid allowed, Set<String> errors) { 146 if (errors != null) { 147 errors.clear(); 148 } 149 if (allowed == null) { 150 allowed = new AllowedValid(null, null); 151 } 152 if (ltp.isLegacy() && allowed.isAllowed(Validity.Status.deprecated)) { 153 return true; // don't need further checking, since we already did so when parsing 154 } 155 if (Validation.abort 156 == validates(LstrType.language, ltp.getLanguage(), null, allowed, errors)) { 157 return false; 158 } 159 if (Validation.abort 160 == validates(LstrType.script, ltp.getScript(), null, allowed, errors)) { 161 return false; 162 } 163 if (Validation.abort 164 == validates(LstrType.region, ltp.getRegion(), null, allowed, errors)) { 165 return false; 166 } 167 for (String variant : ltp.getVariants()) { 168 if (Validation.abort == validates(LstrType.variant, variant, null, allowed, errors)) { 169 return false; 170 } 171 } 172 for (Entry<String, List<String>> entry : ltp.getLocaleExtensionsDetailed().entrySet()) { 173 if (Validation.abort 174 == validates( 175 LstrType.extension, 176 entry.getKey(), 177 entry.getValue(), 178 allowed, 179 errors)) { 180 return false; 181 } 182 } 183 for (Entry<String, List<String>> entry : ltp.getExtensionsDetailed().entrySet()) { 184 if (Validation.abort 185 == validates( 186 LstrType.extension, 187 entry.getKey(), 188 entry.getValue(), 189 allowed, 190 errors)) { 191 return false; 192 } 193 } 194 return errors.isEmpty(); // if we didn't abort, then we recorded errors in the set 195 } 196 197 private enum Validation { 198 abort, 199 keepOn 200 } 201 /** 202 * Returns true if it doesn't validate and errors == null (allows for fast rejection) 203 * 204 * @param type 205 * @param values TODO 206 * @param subtag 207 * @return true if the subtag is empty, or it is an allowed status 208 */ validates( LstrType type, String field, List<String> values, LocaleValidator.AllowedValid allowed, Set<String> errors)209 private static LocaleValidator.Validation validates( 210 LstrType type, 211 String field, 212 List<String> values, 213 LocaleValidator.AllowedValid allowed, 214 Set<String> errors) { 215 Validity.Status status; 216 switch (type) { 217 case language: 218 case script: 219 case region: 220 status = VALIDITY.getCodeToStatus(type).get(field); 221 if (status == null) { 222 status = Validity.Status.invalid; 223 } 224 if (allowed.isAllowed(status) 225 || allowed.isAllowed(type, field, null, null) 226 || field.length() == 0) { 227 return Validation.keepOn; 228 } 229 break; 230 case variant: 231 status = VALID_VARIANTS.get(field); 232 if (status == null) { 233 status = Validity.Status.invalid; 234 } 235 if (allowed.isAllowed(status)) { 236 return Validation.keepOn; 237 } 238 break; 239 case extension: 240 switch (field.length()) { 241 case 1: 242 switch (field) { 243 case "t": // value is an LSRV 244 String lsvr = Joiner.on("-").join(values); 245 status = Validity.Status.invalid; 246 try { 247 LanguageTagParser ltp2 = new LanguageTagParser().set(lsvr); 248 if (isValid(ltp2, allowed, errors)) { 249 return Validation.keepOn; 250 } 251 } catch (Exception e) { 252 if (errors != null) { 253 errors.add( 254 String.format( 255 "Disallowed %s=%s, tlang=%s, status=%s", 256 type, lsvr, field, status)); 257 return Validation.keepOn; 258 } 259 } 260 return Validation.abort; 261 case "x": // private use, everything is valid 262 status = Validity.Status.private_use; 263 break; 264 case "u": // value is an attribute, none currently valid 265 status = Validity.Status.invalid; 266 break; 267 default: 268 status = Validity.Status.invalid; 269 break; 270 } 271 break; 272 case 2: 273 // field is a tkey or a ukey, based on last char 274 String tOrU = field.charAt(1) < 'A' ? "t" : "u"; 275 Set<String> subtypes = SDI.getBcp47Keys().get(field); 276 if (subtypes == null) { 277 status = Validity.Status.invalid; 278 } else { 279 String subtype = Joiner.on("-").join(values); 280 final Map<R2<String, String>, String> bcp47Deprecated = 281 SDI.getBcp47Deprecated(); 282 if ("true".equals(bcp47Deprecated.get(Row.of(field, subtype)))) { 283 status = Validity.Status.deprecated; 284 } else { 285 if (subtypes.contains(subtype)) { 286 status = Validity.Status.regular; 287 } else { 288 boolean mapUnknownToRegular = false; 289 fieldSwitch: 290 switch (field) { 291 case "x0": 292 status = Validity.Status.deprecated; 293 break; 294 case "dx": 295 status = 296 checkSpecials( 297 type, 298 field, 299 values, 300 allowed, 301 LOWERCASE_SCRIPT); 302 break; 303 case "kr": 304 status = 305 checkSpecials( 306 type, 307 field, 308 values, 309 allowed, 310 LOWERCASE_SCRIPT, 311 KR_REORDER); 312 break; 313 case "rg": 314 mapUnknownToRegular = true; 315 case "sd": 316 status = 317 checkSpecials( 318 type, 319 field, 320 values, 321 allowed, 322 VALIDITY.getCodeToStatus( 323 LstrType.subdivision)); 324 break; 325 case "vt": 326 status = Validity.Status.invalid; 327 if (values.isEmpty()) { 328 break fieldSwitch; 329 } 330 for (String value : values) { 331 try { 332 int intValue = Integer.parseInt(value, 16); 333 if (intValue < 0 334 || intValue > 0x10FFFF 335 || (Character.MIN_SURROGATE <= intValue 336 && intValue 337 <= Character 338 .MAX_SURROGATE)) { 339 break fieldSwitch; 340 } 341 } catch (NumberFormatException e) { 342 break fieldSwitch; 343 } 344 } 345 status = Validity.Status.regular; 346 break; 347 default: 348 status = Validity.Status.invalid; 349 break; 350 } 351 if (mapUnknownToRegular == true 352 && status == Validity.Status.unknown) { 353 status = Validity.Status.regular; 354 } 355 } 356 } 357 if (allowed.isAllowed(status) 358 || allowed.isAllowed( 359 LstrType.extension, field, subtype, status)) { 360 return Validation.keepOn; 361 } else if (errors == null) { 362 return Validation.abort; 363 } 364 errors.add( 365 String.format( 366 "Disallowed %s=%s=%s, status=%s", 367 type, field, subtype, status)); 368 return Validation.keepOn; 369 } 370 break; 371 default: 372 status = Validity.Status.invalid; 373 break; 374 } 375 break; 376 default: 377 status = null; 378 break; 379 } 380 if (errors == null) { 381 return Validation.abort; 382 } 383 errors.add(String.format("Disallowed %s=%s, status=%s", type, field, status)); 384 return Validation.keepOn; 385 } 386 387 public static Validity.Status checkSpecials( 388 LstrType type, 389 String field, 390 List<String> values, 391 LocaleValidator.AllowedValid allowed, 392 Map<String, Validity.Status>... validityMaps) { 393 if (values.size() > 1 394 && (field.equals("sd") || field.equals("rg"))) { // TODO generalize this 395 return Validity.Status.invalid; 396 } 397 Validity.Status best = null; 398 for (String value : values) { 399 Validity.Status status = null; 400 for (Map<String, Validity.Status> validityMap : validityMaps) { 401 status = validityMap.get(value); 402 if (status != null) { 403 break; 404 } 405 } 406 if (status == null) { 407 return Validity.Status.invalid; 408 } 409 if (allowed.isAllowed(status) || allowed.isAllowed(type, field, value, status)) { 410 if (best == null) { 411 best = status; 412 } 413 } else { 414 return status; 415 } 416 } 417 return best == null ? Validity.Status.invalid : best; 418 } 419 420 public Validity.Status checkRegion( 421 LstrType type, 422 String field, 423 List<String> values, 424 LocaleValidator.AllowedValid allowed) { 425 Validity.Status best = null; 426 for (String value : values) { 427 String value2 = UCharacter.toTitleCase(value, null); 428 Validity.Status status = VALIDITY.getCodeToStatus(LstrType.script).get(value2); 429 if (status == null) { 430 return Validity.Status.invalid; 431 } 432 if (allowed.isAllowed(status) || allowed.isAllowed(type, field, value, null)) { 433 if (best == null) { 434 best = status; 435 } 436 } else { 437 return status; 438 } 439 } 440 return best == null ? Validity.Status.invalid : best; 441 } 442 } 443