1 package org.unicode.cldr.util; 2 3 import com.google.common.base.Splitter; 4 import com.google.common.collect.ComparisonChain; 5 import com.ibm.icu.impl.Relation; 6 import com.ibm.icu.impl.Row; 7 import com.ibm.icu.impl.Row.R2; 8 import com.ibm.icu.impl.Row.R3; 9 import com.ibm.icu.text.UnicodeSet; 10 import com.ibm.icu.util.ICUException; 11 import com.ibm.icu.util.Output; 12 import java.util.Collection; 13 import java.util.Collections; 14 import java.util.EnumMap; 15 import java.util.EnumSet; 16 import java.util.LinkedHashMap; 17 import java.util.LinkedHashSet; 18 import java.util.List; 19 import java.util.Locale; 20 import java.util.Map; 21 import java.util.Map.Entry; 22 import java.util.Objects; 23 import java.util.Set; 24 import java.util.TreeMap; 25 import java.util.TreeSet; 26 import java.util.regex.Pattern; 27 import org.unicode.cldr.util.LanguageInfo.CldrDir; 28 import org.unicode.cldr.util.StandardCodes.LstrType; 29 import org.unicode.cldr.util.SupplementalDataInfo.AttributeValidityInfo; 30 31 public class AttributeValueValidity { 32 33 public enum Status { 34 ok, 35 deprecated, 36 illegal, 37 noTest 38 } 39 40 public enum LocaleSpecific { 41 pluralCardinal, 42 pluralOrdinal, 43 dayPeriodFormat, 44 dayPeriodSelection 45 } 46 47 static final Splitter BAR = Splitter.on('|').trimResults().omitEmptyStrings(); 48 static final Splitter SPACE = 49 Splitter.on(PatternCache.get("\\s+")).trimResults().omitEmptyStrings(); 50 51 private static final Set<DtdType> ALL_DTDs = 52 Collections.unmodifiableSet(EnumSet.allOf(DtdType.class)); 53 54 private static final SupplementalDataInfo supplementalData = 55 CLDRConfig.getInstance().getSupplementalDataInfo(); 56 57 private static Map<DtdType, Map<String, Map<String, MatcherPattern>>> 58 dtd_element_attribute_validity = new EnumMap<>(DtdType.class); 59 private static Map<String, MatcherPattern> common_attribute_validity = new LinkedHashMap<>(); 60 private static Map<String, MatcherPattern> variables = new LinkedHashMap<>(); 61 private static final RegexMatcher NOT_DONE_YET = new RegexMatcher(".*", Pattern.COMMENTS); 62 private static final Map<AttributeValidityInfo, String> failures = new LinkedHashMap<>(); 63 private static final boolean DEBUG = false; 64 65 static { 66 Relation<R2<String, String>, String> bcp47Aliases = supplementalData.getBcp47Aliases(); 67 Set<String> bcp47Keys = new LinkedHashSet<>(); 68 Set<String> bcp47Values = new LinkedHashSet<>(); 69 for (Entry<String, Set<String>> keyValues : 70 supplementalData.getBcp47Keys().keyValuesSet()) { 71 Set<String> fullValues = new TreeSet<>(); 72 String key = keyValues.getKey(); 73 bcp47Keys.add(key); 74 75 Set<String> rawValues = keyValues.getValue(); 76 77 for (String value : rawValues) { 78 if (key.equals("cu")) { // Currency codes are in upper case. value.toUpperCase()79 fullValues.add(value.toUpperCase()); 80 } else { 81 fullValues.add(value); 82 } 83 R2<String, String> keyValue = R2.of(key, value); 84 Set<String> aliases = bcp47Aliases.getAll(keyValue); 85 if (aliases != null) { 86 fullValues.addAll(aliases); 87 } 88 } 89 // Special case exception for generic calendar, since we don't want to expose it in 90 // bcp47 91 if (key.equals("ca")) { 92 fullValues.add("generic"); 93 } 94 fullValues = Collections.unmodifiableSet(fullValues); 95 addCollectionVariable("$_bcp47_" + key, fullValues); 96 97 // add aliased keys 98 Set<String> aliases = supplementalData.getBcp47Aliases().getAll(Row.of(key, "")); 99 if (aliases != null) { 100 for (String aliasKey : aliases) { 101 bcp47Keys.add(aliasKey); 102 addCollectionVariable("$_bcp47_" + aliasKey, fullValues); 103 } 104 } 105 bcp47Values.addAll(fullValues); 106 } 107 bcp47Keys.add("x"); // special-case private use 108 bcp47Keys.add("x0"); // special-case, has no subtypes 109 addCollectionVariable("$_bcp47_keys", bcp47Keys); 110 addCollectionVariable("$_bcp47_value", bcp47Values); 111 112 Validity validity = Validity.getInstance(); 113 for (LstrType key : LstrType.values()) { 114 final Map<Validity.Status, Set<String>> statusToCodes = validity.getStatusToCodes(key); 115 if (statusToCodes == null) { 116 continue; 117 } 118 String keyName = "$_" + key; 119 Set<String> all = new LinkedHashSet<>(); 120 Set<String> prefix = new LinkedHashSet<>(); 121 Set<String> suffix = new LinkedHashSet<>(); 122 Set<String> regularAndUnknown = new LinkedHashSet<>(); 123 for (Entry<Validity.Status, Set<String>> item2 : statusToCodes.entrySet()) { 124 Validity.Status status = item2.getKey(); 125 Set<String> validItems = item2.getValue(); 126 if (key == LstrType.variant) { // uppercased in CLDR 127 Set<String> temp2 = new LinkedHashSet<>(validItems); 128 for (String item : validItems) { item.toUpperCase(Locale.ROOT)129 temp2.add(item.toUpperCase(Locale.ROOT)); 130 } 131 validItems = temp2; 132 } else if (key == LstrType.subdivision) { 133 for (String item : validItems) { 134 if (item.contains("-")) { 135 List<String> parts = Splitter.on('-').splitToList(item); 136 prefix.add(parts.get(0)); 137 suffix.add(parts.get(1)); 138 } else { 139 int prefixWidth = item.charAt(0) < 'A' ? 3 : 2; 140 prefix.add(item.substring(0, prefixWidth)); 141 suffix.add(item.substring(prefixWidth)); 142 } 143 } 144 } 145 all.addAll(validItems); 146 if (status == Validity.Status.regular 147 || status == Validity.Status.special 148 || status == Validity.Status.unknown) { 149 regularAndUnknown.addAll(validItems); 150 } 151 addCollectionVariable(keyName + "_" + status, validItems); 152 // MatcherPattern m = new MatcherPattern(key.toString(), 153 // validItems.toString(), new CollectionMatcher(validItems)); 154 // variables.put(keyName+"_"+status, m); 155 } 156 if (key == LstrType.subdivision) { 157 addCollectionVariable(keyName + "_prefix", prefix); 158 addCollectionVariable(keyName + "_suffix", suffix); 159 } 160 addCollectionVariable(keyName, all); 161 addCollectionVariable(keyName + "_plus", regularAndUnknown); 162 163 // MatcherPattern m = new MatcherPattern(key.toString(), all.toString(), new 164 // CollectionMatcher(all)); 165 // variables.put(keyName, m); 166 // MatcherPattern m2 = new MatcherPattern(key.toString(), 167 // regularAndUnknown.toString(), new CollectionMatcher(regularAndUnknown)); 168 // variables.put(keyName + "_plus", m2); 169 } 170 171 Set<String> main = new LinkedHashSet<>(); 172 main.addAll(StandardCodes.LstrType.language.specials); 173 Set<String> coverage = new LinkedHashSet<>(); 174 Set<String> large_official = new LinkedHashSet<>(); 175 final LocaleIDParser lip = new LocaleIDParser(); 176 177 for (String language : LanguageInfo.getAvailable()) { 178 LanguageInfo info = LanguageInfo.get(language); 179 CldrDir cldrDir = info.getCldrDir(); 180 String base = lip.set(language).getLanguage(); 181 if (cldrDir == CldrDir.main || cldrDir == CldrDir.base) { 182 main.add(base); 183 } 184 if (info.getCldrLevel() == Level.MODERN) { 185 coverage.add(base); 186 } 187 if (info.getLiteratePopulation() > 1000000 && !info.getStatusToRegions().isEmpty()) { 188 large_official.add(base); 189 } 190 } 191 addCollectionVariable("$_language_main", main); 192 addCollectionVariable("$_language_coverage", coverage); 193 addCollectionVariable("$_language_large_official", large_official); 194 Set<String> cldrLang = new TreeSet<>(main); 195 cldrLang.addAll(coverage); 196 cldrLang.addAll(large_official); 197 addCollectionVariable("$_language_cldr", large_official); 198 // System.out.println("\ncldrLang:\n" + Joiner.on(' ').join(cldrLang)); 199 200 Map<String, R2<String, String>> rawVariables = supplementalData.getValidityInfo(); 201 for (Entry<String, R2<String, String>> item : rawVariables.entrySet()) { 202 String id = item.getKey(); 203 String type = item.getValue().get0(); 204 String value = item.getValue().get1(); 205 MatcherPattern mp = getMatcherPattern2(type, value); 206 if (mp != null) { variables.put(id, mp)207 variables.put(id, mp); 208 // variableReplacer.add(id, value); 209 } else { 210 throw new IllegalArgumentException("Duplicate element " + mp); 211 } 212 } 213 // System.out.println("Variables: " + variables.keySet()); 214 215 Map<AttributeValidityInfo, String> rawAttributeValueInfo = 216 supplementalData.getAttributeValidity(); 217 int x = 0; 218 for (Entry<AttributeValidityInfo, String> entry : rawAttributeValueInfo.entrySet()) { 219 AttributeValidityInfo item = entry.getKey(); 220 String value = entry.getValue(); 221 // System.out.println(item); 222 MatcherPattern mp = getMatcherPattern2(item.getType(), value); 223 if (mp == null) { item.getType()224 getMatcherPattern2(item.getType(), value); // for debugging failures.put(item, value)225 failures.put(item, value); 226 continue; 227 } 228 Set<DtdType> dtds = item.getDtds(); 229 if (dtds == null) { 230 dtds = ALL_DTDs; 231 } 232 for (DtdType dtdType : dtds) { 233 DtdData data = DtdData.getInstance(dtdType); 234 Map<String, Map<String, MatcherPattern>> element_attribute_validity = 235 dtd_element_attribute_validity.get(dtdType); 236 if (element_attribute_validity == null) { dtd_element_attribute_validity.put( dtdType, element_attribute_validity = new TreeMap<>())237 dtd_element_attribute_validity.put( 238 dtdType, element_attribute_validity = new TreeMap<>()); 239 } 240 241 // <attributeValues dtds="supplementalData" elements="currency" 242 // attributes="before from to">$currencyDate</attributeValues> 243 244 Set<String> attributeList = item.getAttributes(); 245 Set<String> elementList = item.getElements(); 246 if (elementList.size() == 0) { addAttributes(attributeList, common_attribute_validity, mp)247 addAttributes(attributeList, common_attribute_validity, mp); 248 } else { 249 for (String element : elementList) { 250 // check if unnecessary 251 DtdData.Element elementInfo = data.getElementFromName().get(element); 252 if (elementInfo == null) { 253 throw new ICUException( 254 "Illegal <attributeValues>, element not valid: " 255 + dtdType 256 + ", element: " 257 + element); 258 } else { 259 for (String attribute : attributeList) { 260 DtdData.Attribute attributeInfo = 261 elementInfo.getAttributeNamed(attribute); 262 if (attributeInfo == null) { 263 throw new ICUException( 264 "Illegal <attributeValues>, attribute not valid: " 265 + dtdType 266 + ", element: " 267 + element 268 + ", attribute: " 269 + attribute); 270 } else if (!attributeInfo.values.isEmpty()) { 271 // if (false) { 272 // 273 // System.out.println("Unnecessary <attributeValues …>, the DTD 274 // has specific list: element: " + element + ", attribute: " + 275 // attribute + ", " + attributeInfo.values); 276 // } 277 } 278 } 279 } 280 // System.out.println("\t" + element); 281 Map<String, MatcherPattern> attribute_validity = 282 element_attribute_validity.get(element); 283 if (attribute_validity == null) { element_attribute_validity.put( element, attribute_validity = new TreeMap<>())284 element_attribute_validity.put( 285 element, attribute_validity = new TreeMap<>()); 286 } addAttributes(attributeList, attribute_validity, mp)287 addAttributes(attributeList, attribute_validity, mp); 288 } 289 } 290 } 291 } 292 // show values 293 // for (Entry<DtdType, Map<String, Map<String, MatcherPattern>>> entry1 : 294 // dtd_element_attribute_validity.entrySet()) { 295 // final DtdType dtdType = entry1.getKey(); 296 // Map<String, Map<String, MatcherPattern>> element_attribute_validity = 297 // entry1.getValue(); 298 // DtdData dtdData2 = DtdData.getInstance(dtdType); 299 // for (Element element : dtdData2.getElements()) { 300 // Set<Attribute> attributes = element.getAttributes().keySet(); 301 // 302 // } 303 // for (Entry<String, Map<String, MatcherPattern>> entry2 : 304 // entry1.getValue().entrySet()) { 305 // for (Entry<String, MatcherPattern> entry3 : entry2.getValue().entrySet()) 306 // { 307 // System.out.println(dtdType + "\t" + entry2.getKey() + "\t" + 308 // entry3.getKey() + "\t" + entry3.getValue()); 309 // } 310 // } 311 // } 312 313 // private LocaleIDParser localeIDParser = new LocaleIDParser(); 314 // 315 // @Override 316 // public CheckCLDR setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, 317 // List<CheckStatus> possibleErrors) { 318 // if (cldrFileToCheck == null) return this; 319 // if (Phase.FINAL_TESTING == getPhase() || Phase.BUILD == getPhase()) { 320 // setSkipTest(false); // ok 321 // } else { 322 // setSkipTest(true); 323 // return this; 324 // } 325 // 326 // pluralInfo = supplementalData.getPlurals(PluralType.cardinal, 327 // cldrFileToCheck.getLocaleID()); 328 // super.setCldrFileToCheck(cldrFileToCheck, options, possibleErrors); 329 // isEnglish = 330 // "en".equals(localeIDParser.set(cldrFileToCheck.getLocaleID()).getLanguage()); 331 // synchronized (elementOrder) { 332 // if (!initialized) { 333 // getMetadata(); 334 // initialized = true; 335 // localeMatcher = LocaleMatcher.make(); 336 // } 337 // } 338 // if (!localeMatcher.matches(cldrFileToCheck.getLocaleID())) { 339 // possibleErrors.add(new CheckStatus() 340 // 341 // .setCause(null).setMainType(CheckStatus.errorType).setSubtype(Subtype.invalidLocale) 342 // .setMessage("Invalid Locale {0}", 343 // new Object[] { cldrFileToCheck.getLocaleID() })); 344 // 345 // } 346 // return this; 347 // } 348 } 349 addCollectionVariable(String name, Set<String> validItems)350 private static void addCollectionVariable(String name, Set<String> validItems) { 351 variables.put(name, new CollectionMatcher(validItems)); 352 } 353 getAllPossibleMissing(DtdType dtdType)354 public static Relation<String, String> getAllPossibleMissing(DtdType dtdType) { 355 Relation<String, String> missing = 356 Relation.of(new TreeMap<String, Set<String>>(), LinkedHashSet.class); 357 358 if (dtdType == DtdType.ldmlICU) { 359 return missing; 360 } 361 362 DtdData dtdData2 = DtdData.getInstance(dtdType); 363 Map<String, Map<String, MatcherPattern>> element_attribute_validity = 364 CldrUtility.ifNull( 365 dtd_element_attribute_validity.get(dtdType), 366 Collections.<String, Map<String, MatcherPattern>>emptyMap()); 367 368 for (DtdData.Element element : dtdData2.getElements()) { 369 if (element.isDeprecated()) { 370 continue; 371 } 372 Map<String, MatcherPattern> attribute_validity = 373 CldrUtility.ifNull( 374 element_attribute_validity.get(element.name), 375 Collections.<String, MatcherPattern>emptyMap()); 376 for (DtdData.Attribute attribute : element.getAttributes().keySet()) { 377 if (attribute.isDeprecated()) { 378 continue; 379 } 380 if (!attribute.values.isEmpty()) { 381 continue; 382 } 383 MatcherPattern validity = attribute_validity.get(attribute.name); 384 if (validity != null) { 385 continue; 386 } 387 // <attributeValues attributes="alt" type="choice">$alt</attributeValues> 388 // <attributeValues dtds="supplementalData" elements="character" 389 // attributes="value" type="regex">.</attributeValues> 390 missing.put( 391 attribute.name, 392 new AttributeValueSpec(dtdType, element.name, attribute.name, "$xxx") 393 .toString()); 394 } 395 } 396 return missing; 397 } 398 399 public abstract static class MatcherPattern { 400 matches(String value, Output<String> reason)401 public abstract boolean matches(String value, Output<String> reason); 402 getPattern()403 public String getPattern() { 404 String temp = _getPattern(); 405 return temp.length() <= MAX_STRING ? temp : temp.substring(0, MAX_STRING) + "…"; 406 } 407 _getPattern()408 public abstract String _getPattern(); 409 410 @Override toString()411 public String toString() { 412 return getClass().getName() + "\t" + getPattern(); 413 } 414 } 415 416 // private static MatcherPattern getBcp47MatcherPattern(String key) { 417 // // <key type="calendar">Calendar</key> 418 // // <type key="calendar" type="chinese">Chinese Calendar</type> 419 // 420 // //<attributeValues elements="key" attributes="type" type="bcp47">key</attributeValues> 421 // //<attributeValues elements="type" attributes="key" type="bcp47">key</attributeValues> 422 // //<attributeValues elements="type" attributes="type" 423 // type="bcp47">use-key</attributeValues> 424 // 425 // Set<String> values; 426 // if (key.equals("key")) { 427 // values = BCP47_KEY_VALUES.keySet(); 428 // } else { 429 // values = BCP47_KEY_VALUES.get(key); 430 // } 431 // return new CollectionMatcher(values); 432 // } 433 434 enum MatcherTypes { 435 single, 436 choice, 437 list, 438 unicodeSet, 439 unicodeSetOrString, 440 regex, 441 locale, 442 bcp47, 443 subdivision, 444 localeSpecific, 445 TODO; 446 } 447 getMatcherPattern2(String type, String value)448 private static MatcherPattern getMatcherPattern2(String type, String value) { 449 final MatcherTypes matcherType = 450 type == null ? MatcherTypes.single : MatcherTypes.valueOf(type); 451 452 if (matcherType != MatcherTypes.TODO && value.startsWith("$")) { 453 MatcherPattern result = getVariable(matcherType, value); 454 if (result != null) { 455 return result; 456 } 457 throw new IllegalArgumentException("Unknown variable: " + value); 458 } 459 460 MatcherPattern result; 461 462 switch (matcherType) { 463 case single: 464 result = new CollectionMatcher(Collections.singleton(value.trim())); 465 break; 466 case choice: 467 result = new CollectionMatcher(SPACE.splitToList(value)); 468 break; 469 case unicodeSet: 470 result = new UnicodeSetMatcher(new UnicodeSet(value)); 471 break; 472 case unicodeSetOrString: 473 result = new UnicodeSetOrStringMatcher(new UnicodeSet(value)); 474 break; 475 // case bcp47: 476 // return getBcp47MatcherPattern(value); 477 case regex: 478 result = 479 new RegexMatcher( 480 value, Pattern.COMMENTS); // Pattern.COMMENTS to get whitespace 481 break; 482 case locale: 483 result = value.equals("all") ? LocaleMatcher.ALL_LANGUAGES : LocaleMatcher.REGULAR; 484 break; 485 case localeSpecific: 486 result = LocaleSpecificMatcher.getInstance(value); 487 break; 488 case TODO: 489 result = NOT_DONE_YET; 490 break; 491 case list: 492 result = new ListMatcher(new CollectionMatcher(SPACE.splitToList(value))); 493 break; 494 default: 495 return null; 496 } 497 498 return result; 499 } 500 getVariable(final MatcherTypes matcherType, String value)501 private static MatcherPattern getVariable(final MatcherTypes matcherType, String value) { 502 List<String> values = BAR.splitToList(value); // value.trim().split("|"); 503 MatcherPattern[] reasons = new MatcherPattern[values.size()]; 504 for (int i = 0; i < values.size(); ++i) { 505 reasons[i] = getNonNullVariable(values.get(i)); 506 } 507 MatcherPattern result; 508 509 if (reasons.length == 1) { 510 result = reasons[0]; 511 } else { 512 result = new OrMatcher(reasons); 513 } 514 if (matcherType == MatcherTypes.list) { 515 result = new ListMatcher(result); 516 } 517 return result; 518 } 519 addAttributes( Set<String> attributes, Map<String, MatcherPattern> attribute_validity, MatcherPattern mp)520 private static void addAttributes( 521 Set<String> attributes, 522 Map<String, MatcherPattern> attribute_validity, 523 MatcherPattern mp) { 524 for (String attribute : attributes) { 525 MatcherPattern old = attribute_validity.get(attribute); 526 if (old != null) { 527 mp = new OrMatcher(old, mp); 528 } 529 attribute_validity.put(attribute, mp); 530 } 531 } 532 533 public static class RegexMatcher extends MatcherPattern { 534 535 private java.util.regex.Matcher matcher; 536 RegexMatcher(String pattern, int flags)537 public RegexMatcher(String pattern, int flags) { 538 matcher = Pattern.compile(pattern, flags).matcher(""); 539 } 540 541 @Override matches(String value, Output<String> reason)542 public boolean matches(String value, Output<String> reason) { 543 matcher.reset(value.toString()); 544 boolean result = matcher.matches(); 545 if (!result && reason != null) { 546 reason.value = RegexUtilities.showMismatch(matcher, value.toString()); 547 } 548 return result; 549 } 550 551 @Override _getPattern()552 public String _getPattern() { 553 return matcher.toString(); 554 } 555 } 556 557 private static EnumMap<LocaleSpecific, Set<String>> LOCALE_SPECIFIC = null; 558 559 /** WARNING, not thread-safe. Needs cleanup * */ setLocaleSpecifics(EnumMap<LocaleSpecific, Set<String>> newValues)560 public static void setLocaleSpecifics(EnumMap<LocaleSpecific, Set<String>> newValues) { 561 LOCALE_SPECIFIC = newValues; 562 } 563 564 public static class LocaleSpecificMatcher extends MatcherPattern { 565 final LocaleSpecific ls; 566 LocaleSpecificMatcher(LocaleSpecific ls)567 public LocaleSpecificMatcher(LocaleSpecific ls) { 568 this.ls = ls; 569 } 570 getInstance(String value)571 public static LocaleSpecificMatcher getInstance(String value) { 572 return new LocaleSpecificMatcher(LocaleSpecific.valueOf(value)); 573 } 574 matches(String value)575 public boolean matches(String value) { 576 return LOCALE_SPECIFIC.get(ls).contains(value); 577 } 578 579 static final int MAX_STRING = 64; 580 581 @Override matches(String value, Output<String> reason)582 public boolean matches(String value, Output<String> reason) { 583 boolean result = LOCALE_SPECIFIC.get(ls).contains(value); 584 if (!result && reason != null) { 585 reason.value = "∉ " + getPattern(); 586 } 587 return result; 588 } 589 590 @Override _getPattern()591 public String _getPattern() { 592 return LOCALE_SPECIFIC.get(ls).toString(); 593 } 594 } 595 596 static final int MAX_STRING = 64; 597 598 public static class CollectionMatcher extends MatcherPattern { 599 private final Collection<String> collection; 600 CollectionMatcher(Collection<String> collection)601 public CollectionMatcher(Collection<String> collection) { 602 this.collection = Collections.unmodifiableCollection(new LinkedHashSet<>(collection)); 603 } 604 605 @Override matches(String value, Output<String> reason)606 public boolean matches(String value, Output<String> reason) { 607 boolean result = collection.contains(value); 608 if (!result && reason != null) { 609 reason.value = "∉ " + getPattern(); 610 } 611 return result; 612 } 613 614 @Override _getPattern()615 public String _getPattern() { 616 return collection.toString(); 617 } 618 } 619 620 public static class UnicodeSetMatcher extends MatcherPattern { 621 private final UnicodeSet collection; 622 UnicodeSetMatcher(UnicodeSet collection)623 public UnicodeSetMatcher(UnicodeSet collection) { 624 this.collection = collection.freeze(); 625 } 626 627 @Override matches(String value, Output<String> reason)628 public boolean matches(String value, Output<String> reason) { 629 boolean result = false; 630 try { 631 UnicodeSet valueSet = new UnicodeSet(value); 632 result = collection.containsAll(valueSet); 633 if (!result && reason != null) { 634 reason.value = "∉ " + getPattern(); 635 } 636 } catch (Exception e) { 637 reason.value = " illegal pattern " + getPattern() + ": " + value; 638 } 639 return result; 640 } 641 642 @Override _getPattern()643 public String _getPattern() { 644 return collection.toPattern(false); 645 } 646 } 647 648 public static class UnicodeSetOrStringMatcher extends MatcherPattern { 649 private final UnicodeSet collection; 650 UnicodeSetOrStringMatcher(UnicodeSet collection)651 public UnicodeSetOrStringMatcher(UnicodeSet collection) { 652 this.collection = collection.freeze(); 653 } 654 655 @Override matches(String value, Output<String> reason)656 public boolean matches(String value, Output<String> reason) { 657 boolean result = false; 658 if (UnicodeSet.resemblesPattern(value, 0)) { 659 try { 660 UnicodeSet valueSet = new UnicodeSet(value); 661 result = collection.containsAll(valueSet); 662 if (!result && reason != null) { 663 reason.value = "∉ " + getPattern(); 664 } 665 } catch (Exception e) { 666 reason.value = " illegal pattern " + getPattern() + ": " + value; 667 } 668 } else { 669 result = collection.contains(value); 670 if (!result && reason != null) { 671 reason.value = "∉ " + getPattern(); 672 } 673 } 674 return result; 675 } 676 677 @Override _getPattern()678 public String _getPattern() { 679 return collection.toPattern(false); 680 } 681 } 682 683 public static class OrMatcher extends MatcherPattern { 684 private final MatcherPattern[] operands; 685 OrMatcher(MatcherPattern... operands)686 public OrMatcher(MatcherPattern... operands) { 687 for (MatcherPattern operand : operands) { 688 if (operand == null) { 689 throw new NullPointerException(); 690 } 691 } 692 this.operands = operands; 693 } 694 695 @Override matches(String value, Output<String> reason)696 public boolean matches(String value, Output<String> reason) { 697 StringBuilder fullReason = reason == null ? null : new StringBuilder(); 698 for (MatcherPattern operand : operands) { 699 if (operand.matches(value, reason)) { 700 return true; 701 } 702 if (fullReason != null) { 703 if (fullReason.length() != 0) { 704 fullReason.append("&"); 705 } 706 fullReason.append(reason.value); 707 } 708 } 709 if (fullReason != null) { 710 reason.value = fullReason.toString(); 711 } 712 return false; 713 } 714 715 @Override _getPattern()716 public String _getPattern() { 717 StringBuffer result = new StringBuffer(); 718 for (MatcherPattern operand : operands) { 719 if (result.length() != 0) { 720 result.append('|'); 721 } 722 result.append(operand._getPattern()); 723 } 724 return result.toString(); 725 } 726 } 727 728 public static class ListMatcher extends MatcherPattern { 729 private MatcherPattern other; 730 ListMatcher(MatcherPattern other)731 public ListMatcher(MatcherPattern other) { 732 this.other = other; 733 } 734 735 @Override matches(String value, Output<String> reason)736 public boolean matches(String value, Output<String> reason) { 737 List<String> values = SPACE.splitToList(value); 738 if (values.isEmpty()) return true; 739 for (String valueItem : values) { 740 if (!other.matches(valueItem, reason)) { 741 if (reason != null) { 742 reason.value = "«" + valueItem + "» ∉ " + other.getPattern(); 743 } 744 return false; 745 } 746 } 747 return true; 748 } 749 750 @Override _getPattern()751 public String _getPattern() { 752 return "List of " + other._getPattern(); 753 } 754 } 755 756 public static class LocaleMatcher extends MatcherPattern { 757 final MatcherPattern language; 758 final MatcherPattern script = getNonNullVariable("$_script"); 759 final MatcherPattern territory = getNonNullVariable("$_region"); 760 final MatcherPattern variant = getNonNullVariable("$_variant"); 761 final LocaleIDParser lip = new LocaleIDParser(); 762 763 public static LocaleMatcher REGULAR = new LocaleMatcher("$_language_plus"); 764 public static LocaleMatcher ALL_LANGUAGES = new LocaleMatcher("$_language"); 765 LocaleMatcher(String variable)766 private LocaleMatcher(String variable) { 767 language = getNonNullVariable(variable); 768 } 769 770 @Override matches(String value, Output<String> reason)771 public boolean matches(String value, Output<String> reason) { 772 lip.set(value); 773 String field = lip.getLanguage(); 774 if (!language.matches(field, reason)) { 775 if (reason != null) { 776 reason.value = "invalid base language"; 777 } 778 return false; 779 } 780 field = lip.getScript(); 781 if (field.length() != 0 && !script.matches(field, reason)) { 782 if (reason != null) { 783 reason.value = "invalid script"; 784 } 785 return false; 786 } 787 field = lip.getRegion(); 788 if (field.length() != 0 && !territory.matches(field, reason)) { 789 if (reason != null) { 790 reason.value = "invalid region"; 791 } 792 return false; 793 } 794 String[] fields = lip.getVariants(); 795 for (int i = 0; i < fields.length; ++i) { 796 if (!variant.matches(fields[i], reason)) { 797 if (reason != null) { 798 reason.value = "invalid variant"; 799 } 800 return false; 801 } 802 } 803 return true; 804 } 805 806 @Override _getPattern()807 public String _getPattern() { 808 return "Unicode_Language_Subtag"; 809 } 810 } 811 812 public static final class AttributeValueSpec implements Comparable<AttributeValueSpec> { AttributeValueSpec( DtdType type, String element, String attribute, String attributeValue)813 public AttributeValueSpec( 814 DtdType type, String element, String attribute, String attributeValue) { 815 this.type = type; 816 this.element = element; 817 this.attribute = attribute; 818 this.attributeValue = attributeValue; 819 } 820 821 public final DtdType type; 822 public final String element; 823 public final String attribute; 824 public final String attributeValue; 825 826 @Override hashCode()827 public int hashCode() { 828 return Objects.hash(type, element, attribute, attributeValue); 829 } 830 831 @Override equals(Object obj)832 public boolean equals(Object obj) { 833 AttributeValueSpec other = (AttributeValueSpec) obj; 834 return CldrUtility.deepEquals( 835 type, other.type, 836 element, other.element, 837 attribute, other.attribute, 838 attributeValue, other.attributeValue); 839 } 840 841 @Override compareTo(AttributeValueSpec other)842 public int compareTo(AttributeValueSpec other) { 843 return ComparisonChain.start() 844 .compare(type, other.type) 845 .compare(element, other.element) 846 .compare(attribute, other.attribute) 847 .compare(attributeValue, other.attributeValue) 848 .result(); 849 } 850 851 @Override toString()852 public String toString() { 853 return "<attributeValues" 854 + " dtds='" 855 + type 856 + "\'" 857 + " elements='" 858 + element 859 + "\'" 860 + " attributes='" 861 + attribute 862 + "\'" 863 + " type='TODO\'>" 864 + attributeValue 865 + "</attributeValues>"; 866 } 867 } 868 869 /** 870 * return Status 871 * 872 * @param attribute_validity 873 * @param attribute 874 * @param attributeValue 875 * @param result 876 * @return 877 */ check( Map<String, MatcherPattern> attribute_validity, String element, String attribute, String attributeValue, Output<String> reason)878 private static Status check( 879 Map<String, MatcherPattern> attribute_validity, 880 String element, 881 String attribute, 882 String attributeValue, 883 Output<String> reason) { 884 885 if (attribute_validity == null) { 886 return Status.noTest; // no test 887 } 888 MatcherPattern matcherPattern = attribute_validity.get(attribute); 889 if (matcherPattern == null) { 890 return Status.noTest; // no test 891 } 892 if (matcherPattern.matches(attributeValue, reason)) { 893 return Status.ok; 894 } 895 return Status.illegal; 896 } 897 check( DtdData dtdData, String element, String attribute, String attributeValue, Output<String> reason)898 public static Status check( 899 DtdData dtdData, 900 String element, 901 String attribute, 902 String attributeValue, 903 Output<String> reason) { 904 if (dtdData.isDeprecated(element, attribute, attributeValue)) { 905 return Status.deprecated; 906 } 907 Status haveTest = 908 check(common_attribute_validity, element, attribute, attributeValue, reason); 909 910 if (haveTest == Status.noTest) { 911 final Map<String, Map<String, MatcherPattern>> element_attribute_validity = 912 dtd_element_attribute_validity.get(dtdData.dtdType); 913 if (element_attribute_validity == null) { 914 return Status.noTest; 915 } 916 917 Map<String, MatcherPattern> attribute_validity = 918 element_attribute_validity.get(element); 919 if (attribute_validity == null) { 920 return Status.noTest; 921 } 922 923 haveTest = check(attribute_validity, element, attribute, attributeValue, reason); 924 } 925 return haveTest; 926 } 927 getTodoTests()928 public static Set<R3<DtdType, String, String>> getTodoTests() { 929 Set<Row.R3<DtdType, String, String>> result = new LinkedHashSet<>(); 930 for (Entry<DtdType, Map<String, Map<String, MatcherPattern>>> entry1 : 931 dtd_element_attribute_validity.entrySet()) { 932 for (Entry<String, Map<String, MatcherPattern>> entry2 : entry1.getValue().entrySet()) { 933 for (Entry<String, MatcherPattern> entry3 : entry2.getValue().entrySet()) { 934 if (entry3.getValue() == NOT_DONE_YET) { 935 result.add(Row.of(entry1.getKey(), entry2.getKey(), entry3.getKey())); 936 } 937 } 938 } 939 } 940 return result; 941 } 942 getReadFailures()943 public static Map<AttributeValidityInfo, String> getReadFailures() { 944 return Collections.unmodifiableMap(failures); 945 } 946 getMatcherPattern(String variable)947 public static MatcherPattern getMatcherPattern(String variable) { 948 return variables.get(variable); 949 } 950 getNonNullVariable(String variable)951 private static MatcherPattern getNonNullVariable(String variable) { 952 MatcherPattern result = variables.get(variable); 953 if (result == null) { 954 throw new NullPointerException(); 955 } 956 return result; 957 } 958 getMatcherPatternIds()959 public static Set<String> getMatcherPatternIds() { 960 return Collections.unmodifiableSet(variables.keySet()); 961 } 962 main(String[] args)963 public static void main(String[] args) { 964 for (DtdType type : DtdType.values()) { 965 Relation<String, String> missing = getAllPossibleMissing(type); 966 for (Entry<String, String> x : missing.keyValueSet()) { 967 System.out.println(type + "\t" + CldrUtility.toString(x)); 968 } 969 } 970 } 971 } 972