1 package org.unicode.cldr.util; 2 3 import com.google.common.base.CharMatcher; 4 import com.google.common.base.Joiner; 5 import com.google.common.base.Splitter; 6 import com.google.common.collect.ImmutableMultimap; 7 import com.google.common.collect.ImmutableSet; 8 import com.google.common.collect.ImmutableSet.Builder; 9 import com.google.common.collect.ImmutableSetMultimap; 10 import com.google.common.collect.Multimap; 11 import com.google.common.collect.TreeMultimap; 12 import com.ibm.icu.impl.Relation; 13 import com.ibm.icu.text.Transform; 14 import java.io.File; 15 import java.io.StringReader; 16 import java.util.ArrayList; 17 import java.util.Arrays; 18 import java.util.Collection; 19 import java.util.Collections; 20 import java.util.Comparator; 21 import java.util.HashMap; 22 import java.util.HashSet; 23 import java.util.Iterator; 24 import java.util.LinkedHashMap; 25 import java.util.LinkedHashSet; 26 import java.util.List; 27 import java.util.Locale; 28 import java.util.Map; 29 import java.util.Map.Entry; 30 import java.util.Set; 31 import java.util.TreeMap; 32 import java.util.concurrent.ConcurrentHashMap; 33 import java.util.concurrent.ConcurrentMap; 34 import java.util.regex.Pattern; 35 import org.unicode.cldr.util.DtdData.Element.ValueConstraint; 36 import org.unicode.cldr.util.MatchValue.LiteralMatchValue; 37 import org.unicode.cldr.util.personname.PersonNameFormatter; 38 39 /** 40 * An immutable object that contains the structure of a DTD. 41 * 42 * @author markdavis 43 */ 44 public class DtdData extends XMLFileReader.SimpleHandler { 45 private static final String COMMENT_PREFIX = System.lineSeparator() + " "; 46 private static final boolean SHOW_ALL = CldrUtility.getProperty("show_all", false); 47 private static final boolean USE_SYNTHESIZED = false; 48 49 private static final boolean DEBUG = false; 50 private static final Pattern FILLER = PatternCache.get("[^-a-zA-Z0-9#_:]"); 51 52 private final Relation<String, Attribute> nameToAttributes = 53 Relation.of(new TreeMap<String, Set<Attribute>>(), LinkedHashSet.class); 54 private Map<String, Element> nameToElement = new HashMap<>(); 55 private MapComparator<String> elementComparator; 56 private MapComparator<String> attributeComparator; 57 58 // TODO Make this data driven. See https://unicode-org.atlassian.net/browse/CLDR-17321 59 public static final Multimap<DtdType, String> HACK_PCDATA_ALLOWS_EMPTY = 60 ImmutableMultimap.<DtdType, String>builder() 61 .putAll( 62 DtdType.ldml, 63 "nameOrderLocales", 64 "foreignSpaceReplacement", 65 "nativeSpaceReplacement", 66 "language", 67 "script", 68 "region", 69 "variant", 70 "territory") 71 .putAll(DtdType.supplementalData, "variable", "attributeValues") 72 .build(); 73 74 public final Element ROOT; 75 public final Element PCDATA = elementFrom("#PCDATA"); 76 public final Element ANY = elementFrom("ANY"); 77 public final DtdType dtdType; 78 public final String version; 79 private Element lastElement; 80 private Attribute lastAttribute; 81 private Set<String> preCommentCache; 82 private DtdComparator dtdComparator; 83 84 public enum AttributeStatus { 85 distinguished("§d"), 86 value("§v"), 87 metadata("§m︎"); 88 public final String shortName; 89 AttributeStatus(String shortName)90 AttributeStatus(String shortName) { 91 this.shortName = shortName; 92 } 93 getShortName(AttributeStatus status)94 public static String getShortName(AttributeStatus status) { 95 return status == null ? "" : status.shortName; 96 } 97 } 98 99 public enum Mode { 100 REQUIRED("#REQUIRED"), 101 OPTIONAL("#IMPLIED"), 102 FIXED("#FIXED"), 103 NULL("null"); 104 105 public final String source; 106 Mode(String s)107 Mode(String s) { 108 source = s; 109 } 110 forString(String mode)111 public static Mode forString(String mode) { 112 for (Mode value : Mode.values()) { 113 if (value.source.equals(mode)) { 114 return value; 115 } 116 } 117 if (mode == null) { 118 return NULL; 119 } 120 throw new IllegalArgumentException(mode); 121 } 122 } 123 124 public enum AttributeType { 125 CDATA, 126 ID, 127 IDREF, 128 IDREFS, 129 ENTITY, 130 ENTITIES, 131 NMTOKEN, 132 NMTOKENS, 133 ENUMERATED_TYPE 134 } 135 136 static final Set<String> DRAFT_ON_NON_LEAF_ALLOWED = 137 ImmutableSet.of("collation", "transform", "unitPreferenceData", "rulesetGrouping"); 138 139 public static class Attribute implements Named { 140 private static final Joiner JOINER_COMMA_SPACE = Joiner.on(", "); 141 public static final String AUG_TRAIL = "⟫"; 142 public static final String AUG_LEAD = "⟪"; 143 public static final String ENUM_TRAIL = "⟩"; 144 public static final String ENUM_LEAD = "⟨"; 145 public static final Pattern LEAD_TRAIL = 146 Pattern.compile( 147 "(.*[" 148 + AUG_LEAD 149 + ENUM_LEAD 150 + "])(.*)([" 151 + AUG_TRAIL 152 + ENUM_TRAIL 153 + "].*)"); 154 public final String name; 155 public final Element element; 156 public final Mode mode; 157 public final String defaultValue; 158 public final AttributeType type; 159 public final Map<String, Integer> values; // immutable 160 private final Set<String> commentsPre; 161 private Set<String> commentsPost; 162 private boolean isDeprecatedAttribute; 163 private boolean attributeAllowsUEscape = false; 164 public AttributeStatus attributeStatus = 165 AttributeStatus.distinguished; // default unless reset by annotations, or for xml: 166 // attributes 167 private Set<String> deprecatedValues = Collections.emptySet(); 168 public MatchValue matchValue; 169 private final Comparator<String> attributeValueComparator; 170 Attribute( DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment)171 private Attribute( 172 DtdType dtdType, 173 Element element2, 174 String aName, 175 Mode mode2, 176 String[] split, 177 String value2, 178 Set<String> firstComment) { 179 commentsPre = firstComment; 180 element = element2; 181 name = aName.intern(); 182 if (name.equals("draft") // normally never permitted on elements with children, but 183 // special cases... 184 && dtdType == DtdType.ldml 185 && !DRAFT_ON_NON_LEAF_ALLOWED.contains(element.getName())) { 186 int elementChildrenCount = element.getChildren().size(); 187 if (elementChildrenCount > 1 188 || elementChildrenCount == 1 189 && !element.getChildren() 190 .keySet() 191 .iterator() 192 .next() 193 .getName() 194 .equals("cp")) { 195 isDeprecatedAttribute = true; 196 if (DEBUG) { 197 System.out.println(element.getName() + ":" + element.getChildren()); 198 } 199 } 200 } else if (name.startsWith("xml:")) { 201 attributeStatus = AttributeStatus.metadata; 202 } 203 mode = mode2; 204 defaultValue = value2 == null ? null : value2.intern(); 205 AttributeType _type = AttributeType.ENUMERATED_TYPE; 206 Map<String, Integer> _values = Collections.emptyMap(); 207 if (split.length == 1) { 208 try { 209 _type = AttributeType.valueOf(split[0]); 210 } catch (Exception e) { 211 } 212 } 213 type = _type; 214 215 if (_type == AttributeType.ENUMERATED_TYPE) { 216 LinkedHashMap<String, Integer> temp = new LinkedHashMap<>(); 217 for (String part : split) { 218 if (part.length() != 0) { 219 temp.put(part.intern(), temp.size()); 220 } 221 } 222 _values = Collections.unmodifiableMap(temp); 223 } 224 values = _values; 225 attributeValueComparator = getAttributeValueComparator(dtdType, element.name, name); 226 } 227 228 @Override toString()229 public String toString() { 230 return element.name + ":" + name; 231 } 232 getSampleValue()233 public String getSampleValue() { 234 return type == AttributeType.ENUMERATED_TYPE 235 ? (values.containsKey("year") ? "year" : values.keySet().iterator().next()) 236 : matchValue != null ? matchValue.getSample() : MatchValue.DEFAULT_SAMPLE; 237 } 238 appendDtdString(StringBuilder b)239 public StringBuilder appendDtdString(StringBuilder b) { 240 Attribute a = this; 241 b.append("<!ATTLIST " + element.name + " " + a.name); 242 boolean first; 243 if (a.type == AttributeType.ENUMERATED_TYPE) { 244 b.append(" ("); 245 first = true; 246 for (String s : a.values.keySet()) { 247 if (deprecatedValues.contains(s)) { 248 continue; 249 } 250 if (first) { 251 first = false; 252 } else { 253 b.append(" | "); 254 } 255 b.append(s); 256 } 257 b.append(")"); 258 } else { 259 b.append(' ').append(a.type); 260 } 261 if (a.mode != Mode.NULL) { 262 b.append(" ").append(a.mode.source); 263 } 264 if (a.defaultValue != null) { 265 b.append(" \"").append(a.defaultValue).append('"'); 266 } 267 b.append(" >"); 268 return b; 269 } 270 features()271 public String features() { 272 return (type == AttributeType.ENUMERATED_TYPE 273 ? values.keySet().toString() 274 : type.toString()) 275 + (mode == Mode.NULL ? "" : ", mode=" + mode) 276 + (defaultValue == null ? "" : ", default=" + defaultValue); 277 } 278 279 @Override getName()280 public String getName() { 281 return name; 282 } 283 284 private static Splitter COMMA = Splitter.on(',').trimResults(); 285 addComment(String commentIn)286 public void addComment(String commentIn) { 287 if (commentIn.startsWith("@")) { 288 switch (commentIn) { 289 case "@METADATA": 290 attributeStatus = AttributeStatus.metadata; 291 break; 292 case "@VALUE": 293 attributeStatus = AttributeStatus.value; 294 break; 295 case "@DEPRECATED": 296 isDeprecatedAttribute = true; 297 break; 298 case "@ALLOWS_UESC": 299 attributeAllowsUEscape = true; 300 break; 301 302 default: 303 int colonPos = commentIn.indexOf(':'); 304 if (colonPos < 0) { 305 throw new IllegalArgumentException( 306 element.name 307 + " " 308 + name 309 + "= : Unrecognized ATTLIST annotation: " 310 + commentIn); 311 } 312 String command = commentIn.substring(0, colonPos); 313 String argument = commentIn.substring(colonPos + 1); 314 switch (command) { 315 case "@DEPRECATED": 316 deprecatedValues = 317 Collections.unmodifiableSet( 318 new HashSet<>(COMMA.splitToList(argument))); 319 break; 320 case "@MATCH": 321 if (matchValue != null) { 322 throw new IllegalArgumentException( 323 element.name 324 + " " 325 + name 326 + "= : Conflicting @MATCH: " 327 + matchValue.getName() 328 + " & " 329 + argument); 330 } 331 matchValue = MatchValue.of(argument); 332 break; 333 default: 334 throw new IllegalArgumentException( 335 element.name 336 + " " 337 + name 338 + "= : Unrecognized ATTLIST annotation: " 339 + commentIn); 340 } 341 } 342 return; 343 } 344 commentsPost = addUnmodifiable(commentsPost, commentIn.trim()); 345 } 346 347 /** Special version of identity; only considers name and name of element */ 348 @Override equals(Object obj)349 public boolean equals(Object obj) { 350 if (!(obj instanceof Attribute)) { 351 return false; 352 } 353 Attribute that = (Attribute) obj; 354 return name.equals(that.name) 355 && element.name.equals( 356 that.element.name) // don't use plain element: circularity 357 // not relevant to identity 358 // && Objects.equals(comment, that.comment) 359 // && mode.equals(that.mode) 360 // && Objects.equals(defaultValue, that.defaultValue) 361 // && type.equals(that.type) 362 // && values.equals(that.values) 363 ; 364 } 365 366 /** Special version of identity; only considers name and name of element */ 367 @Override hashCode()368 public int hashCode() { 369 return name.hashCode() * 37 370 + element.name.hashCode() // don't use plain element: circularity 371 // not relevant to identity 372 // ) * 37 + Objects.hashCode(comment)) * 37 373 // + mode.hashCode()) * 37 374 // + Objects.hashCode(defaultValue)) * 37 375 // + type.hashCode()) * 37 376 // + values.hashCode() 377 ; 378 } 379 isDeprecated()380 public boolean isDeprecated() { 381 return isDeprecatedAttribute; 382 } 383 allowsUEscape()384 public boolean allowsUEscape() { 385 return attributeAllowsUEscape; 386 } 387 isDeprecatedValue(String value)388 public boolean isDeprecatedValue(String value) { 389 return deprecatedValues.contains(value); 390 } 391 getStatus()392 public AttributeStatus getStatus() { 393 return attributeStatus; 394 } 395 getValueStatus(String value)396 public ValueStatus getValueStatus(String value) { 397 return deprecatedValues.contains(value) 398 ? ValueStatus.invalid 399 : type == AttributeType.ENUMERATED_TYPE 400 ? (values.containsKey(value) ? ValueStatus.valid : ValueStatus.invalid) 401 : matchValue == null 402 ? ValueStatus.unknown 403 : matchValue.is(value) 404 ? ValueStatus.valid 405 : ValueStatus.invalid; 406 } 407 getMatchString()408 public String getMatchString() { 409 return type == AttributeType.ENUMERATED_TYPE 410 ? ENUM_LEAD + JOINER_COMMA_SPACE.join(values.keySet()) + ENUM_TRAIL 411 : matchValue != null ? AUG_LEAD + matchValue.toString() + AUG_TRAIL : ""; 412 } 413 getMatchLiterals()414 public Set<String> getMatchLiterals() { 415 if (type == AttributeType.ENUMERATED_TYPE) { 416 return values.keySet(); 417 } else if (matchValue != null && matchValue instanceof LiteralMatchValue) { 418 return ((LiteralMatchValue) matchValue).getItems(); 419 } 420 return null; 421 } 422 getMatchingName(Map<Attribute, Integer> attributes)423 public Attribute getMatchingName(Map<Attribute, Integer> attributes) { 424 for (Attribute attribute : attributes.keySet()) { 425 if (name.equals(attribute.getName())) { 426 return attribute; 427 } 428 } 429 return null; 430 } 431 } 432 433 public enum ValueStatus { 434 invalid, 435 unknown, 436 valid 437 } 438 DtdData(DtdType type, String version)439 private DtdData(DtdType type, String version) { 440 this.dtdType = type; 441 this.ROOT = elementFrom(type.rootElement()); 442 this.version = version; 443 } 444 addAttribute(String eName, String aName, String type, String mode, String value)445 private void addAttribute(String eName, String aName, String type, String mode, String value) { 446 Attribute a = 447 new Attribute( 448 dtdType, 449 nameToElement.get(eName), 450 aName, 451 Mode.forString(mode), 452 FILLER.split(type), 453 value, 454 preCommentCache); 455 preCommentCache = null; 456 getAttributesFromName().put(aName, a); 457 CldrUtility.putNew(a.element.attributes, a, a.element.attributes.size()); 458 lastElement = null; 459 lastAttribute = a; 460 } 461 462 public enum ElementType { 463 EMPTY, 464 ANY, 465 PCDATA("(#PCDATA)"), 466 CHILDREN; 467 public final String source; 468 ElementType(String s)469 private ElementType(String s) { 470 source = s; 471 } 472 ElementType()473 private ElementType() { 474 source = name(); 475 } 476 } 477 478 interface Named { getName()479 String getName(); 480 } 481 482 public enum ElementStatus { 483 regular, 484 metadata 485 } 486 487 public static class Element implements Named { 488 public enum ValueConstraint { 489 empty, 490 nonempty, 491 any 492 } 493 494 public final String name; 495 private String rawModel; 496 private ElementType type; 497 private final Map<Element, Integer> children = new LinkedHashMap<>(); 498 private final Map<Attribute, Integer> attributes = new LinkedHashMap<>(); 499 private Set<String> commentsPre; 500 private Set<String> commentsPost; 501 private String model; 502 private boolean isOrderedElement; 503 private boolean isDeprecatedElement; 504 private boolean isTechPreviewElement; 505 private ElementStatus elementStatus = ElementStatus.regular; 506 private ValueConstraint valueConstraint = ValueConstraint.nonempty; 507 Element(String name2)508 private Element(String name2) { 509 name = name2.intern(); 510 } 511 setChildren(DtdData dtdData, String model, Set<String> precomments)512 private void setChildren(DtdData dtdData, String model, Set<String> precomments) { 513 this.commentsPre = precomments; 514 rawModel = model; 515 this.model = clean(model); 516 valueConstraint = ValueConstraint.empty; 517 if (model.equals("EMPTY")) { 518 type = ElementType.EMPTY; 519 return; 520 } 521 type = ElementType.CHILDREN; 522 for (String part : FILLER.split(model)) { 523 if (part.length() != 0) { 524 if (part.equals("#PCDATA")) { 525 type = ElementType.PCDATA; 526 if (HACK_PCDATA_ALLOWS_EMPTY.get(dtdData.dtdType).contains(name)) { 527 // TODO move to @ annotation in .dtd file 528 valueConstraint = ValueConstraint.any; 529 } else { 530 valueConstraint = ValueConstraint.nonempty; 531 } 532 } else if (part.equals("ANY")) { 533 type = ElementType.ANY; 534 } else { 535 CldrUtility.putNew(children, dtdData.elementFrom(part), children.size()); 536 } 537 } 538 } 539 if ((type == ElementType.CHILDREN) == (children.size() == 0) 540 && !model.startsWith("(#PCDATA|cp")) { 541 throw new IllegalArgumentException( 542 "CLDR does not permit Mixed content. " + name + ":" + model); 543 } 544 } 545 546 static final Pattern CLEANER1 = PatternCache.get("([,|(])(?=\\S)"); 547 static final Pattern CLEANER2 = PatternCache.get("(?=\\S)([|)])"); 548 clean(String model2)549 private String clean(String model2) { 550 // (x) -> ( x ); 551 // x,y -> x, y 552 // x|y -> x | y 553 String result = CLEANER1.matcher(model2).replaceAll("$1 "); 554 result = CLEANER2.matcher(result).replaceAll(" $1"); 555 return result.equals(model2) ? model2 : result; // for debugging 556 } 557 containsAttribute(String string)558 public boolean containsAttribute(String string) { 559 for (Attribute a : attributes.keySet()) { 560 if (a.name.equals(string)) { 561 return true; 562 } 563 } 564 return false; 565 } 566 567 @Override toString()568 public String toString() { 569 return name; 570 } 571 toDtdString()572 public String toDtdString() { 573 return "<!ELEMENT " + name + " " + getRawModel() + " >"; 574 } 575 getType()576 public ElementType getType() { 577 return type; 578 } 579 getChildren()580 public Map<Element, Integer> getChildren() { 581 return Collections.unmodifiableMap(children); 582 } 583 getAttributes()584 public Map<Attribute, Integer> getAttributes() { 585 return Collections.unmodifiableMap(attributes); 586 } 587 588 @Override getName()589 public String getName() { 590 return name; 591 } 592 getChildNamed(String string)593 public Element getChildNamed(String string) { 594 for (Element e : children.keySet()) { 595 if (e.name.equals(string)) { 596 return e; 597 } 598 } 599 return null; 600 } 601 getAttributeNamed(String string)602 public Attribute getAttributeNamed(String string) { 603 for (Attribute a : attributes.keySet()) { 604 if (a.name.equals(string)) { 605 return a; 606 } 607 } 608 return null; 609 } 610 addComment(String addition)611 public void addComment(String addition) { 612 if (addition.startsWith("@")) { 613 // there are exactly 4 cases: deprecated, ordered, techPreview and metadata 614 switch (addition) { 615 case "@ORDERED": 616 isOrderedElement = true; 617 break; 618 case "@DEPRECATED": 619 isDeprecatedElement = true; 620 break; 621 case "@METADATA": 622 elementStatus = ElementStatus.metadata; 623 break; 624 case "@TECHPREVIEW": 625 isTechPreviewElement = true; 626 break; 627 default: 628 if (addition.startsWith("@MATCH") || addition.startsWith("@VALUE")) { 629 // Try to catch this case 630 throw new IllegalArgumentException( 631 name 632 + ": Unrecognized ELEMENT annotation (this isn't ATTLIST!): " 633 + addition); 634 } else { 635 throw new IllegalArgumentException( 636 name + ": Unrecognized ELEMENT annotation: " + addition); 637 } 638 } 639 return; 640 } 641 commentsPost = addUnmodifiable(commentsPost, addition.trim()); 642 } 643 644 /** Special version of equals. Only the name is considered in the identity. */ 645 @Override equals(Object obj)646 public boolean equals(Object obj) { 647 if (!(obj instanceof Element)) { 648 return false; 649 } 650 Element that = (Element) obj; 651 return name.equals(that.name) 652 // not relevant to the identity of the object 653 // && Objects.equals(comment, that.comment) 654 // && type == that.type 655 // && attributes.equals(that.attributes) 656 // && children.equals(that.children) 657 ; 658 } 659 660 /** Special version of hashcode. Only the name is considered in the identity. */ 661 @Override hashCode()662 public int hashCode() { 663 return name.hashCode() 664 // not relevant to the identity of the object 665 // * 37 + Objects.hashCode(comment) 666 // ) * 37 + Objects.hashCode(type) 667 // ) * 37 + attributes.hashCode() 668 // ) * 37 + children.hashCode() 669 ; 670 } 671 isDeprecated()672 public boolean isDeprecated() { 673 return isDeprecatedElement; 674 } 675 isOrdered()676 public boolean isOrdered() { 677 return isOrderedElement; 678 } 679 isTechPreview()680 public boolean isTechPreview() { 681 return isTechPreviewElement; 682 } 683 getElementStatus()684 public ElementStatus getElementStatus() { 685 return elementStatus; 686 } 687 getValueConstraint()688 public ValueConstraint getValueConstraint() { 689 return valueConstraint; 690 } 691 692 /** 693 * @return the rawModel 694 */ getRawModel()695 public String getRawModel() { 696 return rawModel; 697 } 698 } 699 elementFrom(String name)700 private Element elementFrom(String name) { 701 Element result = nameToElement.get(name); 702 if (result == null) { 703 nameToElement.put(name, result = new Element(name)); 704 } 705 return result; 706 } 707 addElement(String name2, String model)708 private void addElement(String name2, String model) { 709 Element element = elementFrom(name2); 710 element.setChildren(this, model, preCommentCache); 711 preCommentCache = null; 712 lastElement = element; 713 lastAttribute = null; 714 } 715 addComment(String comment)716 private void addComment(String comment) { 717 comment = comment.trim(); 718 if (preCommentCache != null || comment.startsWith("#")) { // the precomments are "sticky" 719 if (comment.startsWith("@")) { 720 throw new IllegalArgumentException( 721 "@ annotation comment must follow element or attribute, without intervening # comment"); 722 } 723 preCommentCache = addUnmodifiable(preCommentCache, comment); 724 } else if (lastElement != null) { 725 lastElement.addComment(comment); 726 } else if (lastAttribute != null) { 727 lastAttribute.addComment(comment); 728 } else { 729 if (comment.startsWith("@")) { 730 throw new IllegalArgumentException( 731 "@ annotation comment must follow element or attribute, without intervening # comment"); 732 } 733 preCommentCache = addUnmodifiable(preCommentCache, comment); 734 } 735 } 736 737 // TODO hide this 738 /** 739 * @deprecated 740 */ 741 @Deprecated 742 @Override handleElementDecl(String name, String model)743 public void handleElementDecl(String name, String model) { 744 if (SHOW_ALL) { 745 // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, 746 // contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, 747 // listPatterns?, collations?, posix?, segmentations?, rbnf?, annotations?, metadata?, 748 // references?, special*))) > 749 System.out.println(System.lineSeparator() + "<!ELEMENT " + name + " " + model + " >"); 750 } 751 addElement(name, model); 752 } 753 754 // TODO hide this 755 /** 756 * @deprecated 757 */ 758 @Deprecated 759 @Override handleStartDtd(String name, String publicId, String systemId)760 public void handleStartDtd(String name, String publicId, String systemId) { 761 DtdType explicitDtdType = DtdType.valueOf(name); 762 if (explicitDtdType != dtdType && explicitDtdType != dtdType.rootType) { 763 throw new IllegalArgumentException("Mismatch in dtdTypes"); 764 } 765 } 766 767 /** 768 * @deprecated 769 */ 770 @Deprecated 771 @Override handleAttributeDecl( String eName, String aName, String type, String mode, String value)772 public void handleAttributeDecl( 773 String eName, String aName, String type, String mode, String value) { 774 if (SHOW_ALL) { 775 // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | 776 // false ) #IMPLIED > 777 // <!ATTLIST version number CDATA #REQUIRED > 778 // <!ATTLIST version cldrVersion CDATA #FIXED "27" > 779 780 System.out.println( 781 "<!ATTLIST " 782 + eName 783 + " " 784 + aName 785 + " " 786 + type 787 + " " 788 + mode 789 + (value == null ? "" : " \"" + value + "\"") 790 + " >"); 791 } 792 // HACK for 1.1.1 793 if (eName.equals("draft")) { 794 eName = "week"; 795 } 796 addAttribute(eName, aName, type, mode, value); 797 } 798 799 /** 800 * @deprecated 801 */ 802 @Deprecated 803 @Override handleComment(String path, String comment)804 public void handleComment(String path, String comment) { 805 if (comment.contains("Copyright")) { 806 // Zap the copyright comment, replace it with the current one. 807 comment = CldrUtility.getCopyrightString(); 808 } 809 if (SHOW_ALL) { 810 // <!-- true and false are deprecated. --> 811 System.out.println("<!-- " + comment.trim() + " -->"); 812 } 813 addComment(comment); 814 } 815 816 // TODO hide this 817 /** 818 * @deprecated 819 */ 820 @Deprecated 821 @Override handleEndDtd()822 public void handleEndDtd() { 823 throw new XMLFileReader.AbortException(); 824 } 825 826 /** 827 * Note that it always gets the trunk version 828 * 829 * @deprecated depends on static config, use {@link DtdData#getInstance(DtdType, File)} instead 830 */ 831 @Deprecated getInstance(DtdType type)832 public static DtdData getInstance(DtdType type) { 833 return getInstance(type, CLDRConfig.getInstance().getCldrBaseDirectory()); 834 } 835 836 /** Special form using version, used only by tests, etc. */ getInstance(DtdType type, String version)837 public static DtdData getInstance(DtdType type, String version) { 838 // Map out versions that had no DTD 839 if (version != null) { 840 switch (version) { 841 case "1.1.1": 842 version = "1.1"; 843 break; 844 case "1.4.1": 845 version = "1.4"; 846 break; 847 case "1.5.1": 848 version = "1.5.0.1"; 849 break; 850 default: 851 } 852 } 853 File directory = 854 version == null 855 ? CLDRConfig.getInstance().getCldrBaseDirectory() 856 : new File(CLDRPaths.ARCHIVE_DIRECTORY + "/cldr-" + version); 857 858 return getInstance(type, version, directory); 859 } 860 861 private static final ConcurrentMap<Pair<DtdType, File>, DtdData> CACHE = 862 new ConcurrentHashMap<>(); 863 864 /** 865 * Normal version of DtdData Get a DtdData, given the CLDR root directory. 866 * 867 * @param type which DtdType to return 868 * @param directory the CLDR Root directory, which contains the "common" directory. 869 * @return 870 */ getInstance(DtdType type, File directory)871 public static DtdData getInstance(DtdType type, File directory) { 872 Pair<DtdType, File> key = new Pair<>(type, directory); 873 DtdData data = CACHE.computeIfAbsent(key, k -> getInstance(type, null, directory)); 874 return data; 875 } 876 getInstance(DtdType type, String version, File directory)877 private static DtdData getInstance(DtdType type, String version, File directory) { 878 DtdData simpleHandler = new DtdData(type, version); 879 XMLFileReader xfr = new XMLFileReader().setHandler(simpleHandler); 880 if (type != type.rootType) { 881 // read the real first, then add onto it. 882 readFile(type.rootType, xfr, directory); 883 } 884 readFile(type, xfr, directory); 885 // HACK 886 if (type == DtdType.ldmlICU) { 887 Element special = simpleHandler.nameToElement.get("special"); 888 for (String extraElementName : 889 Arrays.asList( 890 "icu:breakIteratorData", 891 "icu:UCARules", 892 "icu:scripts", 893 "icu:transforms", 894 "icu:ruleBasedNumberFormats", 895 "icu:isLeapMonth", 896 "icu:version", 897 "icu:breakDictionaryData", 898 "icu:depends")) { 899 Element extraElement = simpleHandler.nameToElement.get(extraElementName); 900 special.children.put(extraElement, special.children.size()); 901 } 902 } 903 if (simpleHandler.ROOT.children.size() == 0) { 904 throw new IllegalArgumentException( 905 "Internal Error: DtdData.getInstance(" 906 + type 907 + ", ...): readFile() failed to return any children!"); 908 // should never happen 909 } 910 simpleHandler.finish(); 911 simpleHandler.freeze(); 912 return simpleHandler; 913 } 914 finish()915 private void finish() { 916 dtdComparator = new DtdComparator(); 917 } 918 readFile(DtdType type, XMLFileReader xfr, File directory)919 public static void readFile(DtdType type, XMLFileReader xfr, File directory) { 920 File file = new File(directory, type.dtdPath); 921 StringReader s = 922 new StringReader( 923 "<?xml version='1.0' encoding='UTF-8' ?>" 924 + "<!DOCTYPE " 925 + type 926 + " SYSTEM '" 927 + file.getAbsolutePath() 928 + "'>"); 929 try { 930 xfr.read(type.toString(), s, -1, true); // DTD_TYPE_TO_FILE.get(type) 931 } catch (IllegalArgumentException iae) { 932 // rethrow 933 throw new IllegalArgumentException("Error while reading " + type, iae); 934 } 935 } 936 freeze()937 private void freeze() { 938 if (version == null) { // only generate for new versions 939 MergeLists<String> elementMergeList = new MergeLists<>(); 940 elementMergeList.add(dtdType.toString()); 941 MergeLists<String> attributeMergeList = new MergeLists<>(); 942 attributeMergeList.add("_q"); 943 944 for (Element element : nameToElement.values()) { 945 if (element.children.size() > 0) { 946 Collection<String> names = getNames(element.children.keySet()); 947 elementMergeList.add(names); 948 if (DEBUG) { 949 System.out.println(element.getName() + "\t→\t" + names); 950 } 951 } 952 if (element.attributes.size() > 0) { 953 Collection<String> names = getNames(element.attributes.keySet()); 954 attributeMergeList.add(names); 955 if (DEBUG) { 956 System.out.println(element.getName() + "\t→\t@" + names); 957 } 958 } 959 } 960 List<String> elementList = elementMergeList.merge(); 961 List<String> attributeList = attributeMergeList.merge(); 962 if (DEBUG) { 963 System.out.println("Element Ordering:\t" + elementList); 964 System.out.println("Attribute Ordering:\t" + attributeList); 965 } 966 elementComparator = new MapComparator<>(elementList).setErrorOnMissing(true).freeze(); 967 attributeComparator = 968 new MapComparator<>(attributeList).setErrorOnMissing(true).freeze(); 969 } 970 nameToAttributes.freeze(); 971 nameToElement = Collections.unmodifiableMap(nameToElement); 972 } 973 getNames(Collection<? extends Named> keySet)974 private Collection<String> getNames(Collection<? extends Named> keySet) { 975 List<String> result = new ArrayList<>(); 976 for (Named e : keySet) { 977 result.add(e.getName()); 978 } 979 return result; 980 } 981 982 public enum DtdItem { 983 ELEMENT, 984 ATTRIBUTE, 985 ATTRIBUTE_VALUE 986 } 987 988 public interface AttributeValueComparator { compare(String element, String attribute, String value1, String value2)989 public int compare(String element, String attribute, String value1, String value2); 990 } 991 getDtdComparator(AttributeValueComparator avc)992 public Comparator<String> getDtdComparator(AttributeValueComparator avc) { 993 return dtdComparator; 994 } 995 getDtdComparator()996 public DtdComparator getDtdComparator() { 997 return dtdComparator; 998 } 999 1000 public class DtdComparator implements Comparator<String> { 1001 @Override compare(String path1, String path2)1002 public int compare(String path1, String path2) { 1003 XPathParts a = XPathParts.getFrozenInstance(path1); 1004 XPathParts b = XPathParts.getFrozenInstance(path2); 1005 return xpathComparator(a, b); 1006 } 1007 xpathComparator(XPathParts a, XPathParts b)1008 public int xpathComparator(XPathParts a, XPathParts b) { 1009 // there must always be at least one element 1010 String baseA = a.getElement(0); 1011 String baseB = b.getElement(0); 1012 if (!ROOT.name.equals(baseA) || !ROOT.name.equals(baseB)) { 1013 throw new IllegalArgumentException( 1014 "Comparing different DTDs: " + ROOT.name + ", " + baseA + ", " + baseB); 1015 } 1016 int min = Math.min(a.size(), b.size()); 1017 Element parent = ROOT; 1018 Element elementA; 1019 for (int i = 1; i < min; ++i, parent = elementA) { 1020 // add extra test for "fake" elements, used in diffing. they always start with _ 1021 String elementRawA = a.getElement(i); 1022 String elementRawB = b.getElement(i); 1023 if (elementRawA.startsWith("_")) { 1024 return elementRawB.startsWith("_") ? elementRawA.compareTo(elementRawB) : -1; 1025 } else if (elementRawB.startsWith("_")) { 1026 return 1; 1027 } 1028 // 1029 elementA = nameToElement.get(elementRawA); 1030 Element elementB = nameToElement.get(elementRawB); 1031 if (elementA != elementB) { 1032 int aa = parent.children.get(elementA); 1033 int bb = parent.children.get(elementB); 1034 return aa - bb; 1035 } 1036 int countA = a.getAttributeCount(i); 1037 int countB = b.getAttributeCount(i); 1038 if (countA == 0 && countB == 0) { 1039 continue; 1040 } 1041 // we have two ways to compare the attributes. One based on the dtd, 1042 // and one based on explicit comparators 1043 1044 // at this point the elements are the same and correspond to elementA 1045 // in the dtd 1046 1047 // Handle the special added elements 1048 String aqValue = a.getAttributeValue(i, "_q"); 1049 if (aqValue != null) { 1050 String bqValue = b.getAttributeValue(i, "_q"); 1051 if (!aqValue.equals(bqValue)) { 1052 int aValue = Integer.parseInt(aqValue); 1053 int bValue = Integer.parseInt(bqValue); 1054 return aValue - bValue; 1055 } 1056 --countA; 1057 --countB; 1058 } 1059 1060 attributes: 1061 for (Entry<Attribute, Integer> attr : elementA.attributes.entrySet()) { 1062 Attribute main = attr.getKey(); 1063 String valueA = a.getAttributeValue(i, main.name); 1064 String valueB = b.getAttributeValue(i, main.name); 1065 if (valueA == null) { 1066 if (valueB != null) { 1067 return -1; 1068 } 1069 } else if (valueB == null) { 1070 return 1; 1071 } else if (valueA.equals(valueB)) { 1072 --countA; 1073 --countB; 1074 if (countA == 0 && countB == 0) { 1075 break attributes; 1076 } 1077 continue; // TODO 1078 } else if (main.attributeValueComparator != null) { 1079 return main.attributeValueComparator.compare(valueA, valueB); 1080 } else if (main.values.size() != 0) { 1081 int aa = main.values.get(valueA); 1082 int bb = main.values.get(valueB); 1083 return aa - bb; 1084 } else { 1085 return valueA.compareTo(valueB); 1086 } 1087 } 1088 if (countA != 0 || countB != 0) { 1089 throw new IllegalArgumentException(); 1090 } 1091 } 1092 return a.size() - b.size(); 1093 } 1094 } 1095 getAttributeComparator()1096 public MapComparator<String> getAttributeComparator() { 1097 return attributeComparator; 1098 } 1099 getElementComparator()1100 public MapComparator<String> getElementComparator() { 1101 return elementComparator; 1102 } 1103 getAttributesFromName()1104 public Relation<String, Attribute> getAttributesFromName() { 1105 return nameToAttributes; 1106 } 1107 getElementFromName()1108 public Map<String, Element> getElementFromName() { 1109 return nameToElement; 1110 } 1111 1112 @Override toString()1113 public String toString() { 1114 StringBuilder b = new StringBuilder(); 1115 // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?, 1116 // contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?, 1117 // listPatterns?, collations?, posix?, segmentations?, rbnf?, metadata?, references?, 1118 // special*))) > 1119 // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false 1120 // ) #IMPLIED > <!-- true and false are deprecated. --> 1121 Seen seen = new Seen(dtdType); 1122 seen.seenElements.add(ANY); 1123 seen.seenElements.add(PCDATA); 1124 toString(ROOT, b, seen); 1125 1126 // Hack for ldmlIcu: catch the items that are not mentioned in the original 1127 int currentEnd = b.length(); 1128 for (Element e : nameToElement.values()) { 1129 toString(e, b, seen); 1130 } 1131 if (currentEnd != b.length()) { 1132 b.insert( 1133 currentEnd, 1134 System.lineSeparator() 1135 + System.lineSeparator() 1136 + "<!-- Elements not reachable from root! -->" 1137 + System.lineSeparator()); 1138 } 1139 return b.toString(); 1140 } 1141 1142 static final class Seen { 1143 Set<Element> seenElements = new HashSet<>(); 1144 Set<Attribute> seenAttributes = new HashSet<>(); 1145 Seen(DtdType dtdType)1146 public Seen(DtdType dtdType) { 1147 if (dtdType.rootType == dtdType) { 1148 return; 1149 } 1150 DtdData otherData = DtdData.getInstance(dtdType.rootType); 1151 walk(otherData, otherData.ROOT); 1152 seenElements.remove(otherData.nameToElement.get("special")); 1153 } 1154 walk(DtdData otherData, Element current)1155 private void walk(DtdData otherData, Element current) { 1156 seenElements.add(current); 1157 seenAttributes.addAll(current.attributes.keySet()); 1158 for (Element e : current.children.keySet()) { 1159 walk(otherData, e); 1160 } 1161 } 1162 } 1163 getDescendents(Element start, Set<Element> toAddTo)1164 public Set<Element> getDescendents(Element start, Set<Element> toAddTo) { 1165 if (!toAddTo.contains(start)) { 1166 toAddTo.add(start); 1167 for (Element e : start.children.keySet()) { 1168 getDescendents(e, toAddTo); 1169 } 1170 } 1171 return toAddTo; 1172 } 1173 toString(Element current, StringBuilder b, Seen seen)1174 private void toString(Element current, StringBuilder b, Seen seen) { 1175 boolean first = true; 1176 if (seen.seenElements.contains(current)) { 1177 return; 1178 } 1179 seen.seenElements.add(current); 1180 boolean elementDeprecated = isDeprecated(current.name, "*", "*"); 1181 1182 showComments(b, current.commentsPre, true); 1183 b.append("\n\n<!ELEMENT " + current.name + " " + current.model + " >"); 1184 if (USE_SYNTHESIZED) { 1185 Element aliasElement = getElementFromName().get("alias"); 1186 // b.append(current.rawChildren); 1187 if (!current.children.isEmpty()) { 1188 LinkedHashSet<Element> elements = new LinkedHashSet<>(current.children.keySet()); 1189 boolean hasAlias = aliasElement != null && elements.remove(aliasElement); 1190 // boolean hasSpecial = specialElement != null && elements.remove(specialElement); 1191 if (hasAlias) { 1192 b.append("(alias |"); 1193 } 1194 b.append("("); 1195 // <!ELEMENT transformNames ( alias | (transformName | special)* ) > 1196 // <!ELEMENT layout ( alias | (orientation*, inList*, inText*, special*) ) > 1197 1198 for (Element e : elements) { 1199 if (first) { 1200 first = false; 1201 } else { 1202 b.append(", "); 1203 } 1204 b.append(e.name); 1205 if (e.type != ElementType.PCDATA) { 1206 b.append("*"); 1207 } 1208 } 1209 if (hasAlias) { 1210 b.append(")"); 1211 } 1212 b.append(")"); 1213 } else { 1214 b.append(current.type == null ? "???" : current.type.source); 1215 } 1216 b.append(">"); 1217 } 1218 showComments(b, current.commentsPost, false); 1219 if (isOrdered(current.name)) { 1220 b.append(COMMENT_PREFIX + "<!--@ORDERED-->"); 1221 } 1222 if (isTechPreview(current.name)) { 1223 b.append(COMMENT_PREFIX + "<!--@TECHPREVIEW-->"); 1224 } 1225 if (current.getElementStatus() != ElementStatus.regular) { 1226 b.append( 1227 COMMENT_PREFIX 1228 + "<!--@" 1229 + current.getElementStatus().toString().toUpperCase(Locale.ROOT) 1230 + "-->"); 1231 } 1232 if (elementDeprecated) { 1233 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->"); 1234 } 1235 1236 LinkedHashSet<String> deprecatedValues = new LinkedHashSet<>(); 1237 1238 for (Attribute a : current.attributes.keySet()) { 1239 if (seen.seenAttributes.contains(a)) { 1240 continue; 1241 } 1242 seen.seenAttributes.add(a); 1243 boolean attributeDeprecated = 1244 elementDeprecated || isDeprecated(current.name, a.name, "*"); 1245 boolean attributeUEscaped = allowsUEscape(current.name, a.name, "*"); 1246 deprecatedValues.clear(); 1247 1248 showComments(b, a.commentsPre, true); 1249 b.append("\n<!ATTLIST " + current.name + " " + a.name); 1250 if (a.type == AttributeType.ENUMERATED_TYPE) { 1251 b.append(" ("); 1252 first = true; 1253 for (String s : a.values.keySet()) { 1254 if (first) { 1255 first = false; 1256 } else { 1257 b.append(" | "); 1258 } 1259 b.append(s); 1260 if (!attributeDeprecated && isDeprecated(current.name, a.name, s)) { 1261 deprecatedValues.add(s); 1262 } 1263 } 1264 b.append(")"); 1265 } else { 1266 b.append(' ').append(a.type); 1267 } 1268 if (a.mode != Mode.NULL) { 1269 b.append(" ").append(a.mode.source); 1270 } 1271 if (a.defaultValue != null) { 1272 b.append(" \"").append(a.defaultValue).append('"'); 1273 } 1274 b.append(" >"); 1275 showComments(b, a.commentsPost, false); 1276 // if (attributeDeprecated != deprecatedComment) { 1277 // System.out.println("*** BAD DEPRECATION ***" + a); 1278 // } 1279 if (a.matchValue != null) { 1280 b.append(COMMENT_PREFIX + "<!--@MATCH:" + a.matchValue.getName() + "-->"); 1281 } 1282 if (METADATA.contains(a.name) || a.attributeStatus == AttributeStatus.metadata) { 1283 b.append(COMMENT_PREFIX + "<!--@METADATA-->"); 1284 } else if (!isDistinguishing(current.name, a.name)) { 1285 b.append(COMMENT_PREFIX + "<!--@VALUE-->"); 1286 } 1287 if (attributeDeprecated) { 1288 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->"); 1289 } else if (!deprecatedValues.isEmpty()) { 1290 b.append( 1291 COMMENT_PREFIX 1292 + "<!--@DEPRECATED:" 1293 + Joiner.on(", ").join(deprecatedValues) 1294 + "-->"); 1295 } 1296 if (attributeUEscaped) { 1297 b.append(COMMENT_PREFIX + "<!--@ALLOWS_UESC-->"); 1298 } 1299 } 1300 if (current.children.size() > 0) { 1301 for (Element e : current.children.keySet()) { 1302 toString(e, b, seen); 1303 } 1304 } 1305 } 1306 showComments(StringBuilder b, Set<String> comments, boolean separate)1307 private void showComments(StringBuilder b, Set<String> comments, boolean separate) { 1308 if (comments == null) { 1309 return; 1310 } 1311 if (separate && b.length() != 0) { 1312 b.append(System.lineSeparator()); 1313 } 1314 for (String c : comments) { 1315 boolean deprecatedComment = false; // the following served its purpose... 1316 // c.toLowerCase(Locale.ENGLISH).contains("deprecat"); 1317 if (!deprecatedComment) { 1318 if (separate) { 1319 // special handling for very first comment 1320 if (b.length() == 0) { 1321 b.append("<!--") 1322 .append(System.lineSeparator()) 1323 .append(c) 1324 .append(System.lineSeparator()) 1325 .append("-->"); 1326 continue; 1327 } 1328 b.append(System.lineSeparator()); 1329 } else { 1330 b.append(COMMENT_PREFIX); 1331 } 1332 b.append("<!-- ").append(c).append(" -->"); 1333 } 1334 } 1335 } 1336 removeFirst(Collection<T> elements, Transform<T, Boolean> matcher)1337 public static <T> T removeFirst(Collection<T> elements, Transform<T, Boolean> matcher) { 1338 for (Iterator<T> it = elements.iterator(); it.hasNext(); ) { 1339 T item = it.next(); 1340 if (matcher.transform(item) == Boolean.TRUE) { 1341 it.remove(); 1342 return item; 1343 } 1344 } 1345 return null; 1346 } 1347 getElements()1348 public Set<Element> getElements() { 1349 return new LinkedHashSet<>(nameToElement.values()); 1350 } 1351 getAttributes()1352 public Set<Attribute> getAttributes() { 1353 return new LinkedHashSet<>(nameToAttributes.values()); 1354 } 1355 isDistinguishing(String elementName, String attribute)1356 public boolean isDistinguishing(String elementName, String attribute) { 1357 return getAttributeStatus(elementName, attribute) == AttributeStatus.distinguished; 1358 } 1359 1360 static final Set<String> METADATA = 1361 new HashSet<>(Arrays.asList("references", "standard", "draft")); 1362 addUnmodifiable(Set<String> comment, String addition)1363 static final Set<String> addUnmodifiable(Set<String> comment, String addition) { 1364 if (comment == null) { 1365 return Collections.singleton(addition); 1366 } else { 1367 comment = new LinkedHashSet<>(comment); 1368 comment.add(addition); 1369 return Collections.unmodifiableSet(comment); 1370 } 1371 } 1372 1373 public class IllegalByDtdException extends RuntimeException { 1374 private static final long serialVersionUID = 1L; 1375 public final String elementName; 1376 public final String attributeName; 1377 public final String attributeValue; 1378 IllegalByDtdException( String elementName, String attributeName, String attributeValue)1379 public IllegalByDtdException( 1380 String elementName, String attributeName, String attributeValue) { 1381 this.elementName = elementName; 1382 this.attributeName = attributeName; 1383 this.attributeValue = attributeValue; 1384 } 1385 1386 @Override getMessage()1387 public String getMessage() { 1388 return "Dtd " 1389 + dtdType 1390 + " doesn’t allow " 1391 + "element=" 1392 + elementName 1393 + (attributeName == null ? "" : ", attribute: " + attributeName) 1394 + (attributeValue == null ? "" : ", attributeValue: " + attributeValue); 1395 } 1396 } 1397 1398 // @SuppressWarnings("unused") isDeprecated(String elementName, String attributeName, String attributeValue)1399 public boolean isDeprecated(String elementName, String attributeName, String attributeValue) { 1400 Element element = getElementThrowingIfNull(elementName, null, null); 1401 if (element.isDeprecatedElement) { 1402 return true; 1403 } 1404 if ("*".equals(attributeName) || "_q".equals(attributeName)) { 1405 return false; 1406 } 1407 Attribute attribute = element.getAttributeNamed(attributeName); 1408 if (attribute == null) { 1409 throw new IllegalByDtdException(elementName, attributeName, attributeValue); 1410 } else if (attribute.isDeprecatedAttribute) { 1411 return true; 1412 } 1413 return attribute.deprecatedValues.contains( 1414 attributeValue); // don't need special test for "*" 1415 } 1416 allowsUEscape(String elementName, String attributeName, String attributeValue)1417 public boolean allowsUEscape(String elementName, String attributeName, String attributeValue) { 1418 Element element = getElementThrowingIfNull(elementName, null, null); 1419 Attribute attribute = element.getAttributeNamed(attributeName); 1420 if (attribute == null) { 1421 throw new IllegalByDtdException(elementName, attributeName, attributeValue); 1422 } else if (attribute.allowsUEscape()) { 1423 return true; 1424 } 1425 return false; 1426 } 1427 1428 /** 1429 * Returns whether an element (specified by its full name) is ordered. This method understands 1430 * all elements in the DTDs used (including the ICU extensions), but will throw 1431 * IllegalByDtdException for unknown elements. See CLDR-8614 for more background. 1432 */ isOrdered(String elementName)1433 public boolean isOrdered(String elementName) { 1434 Element element = getElementThrowingIfNull(elementName, null, null); 1435 return element.isOrdered(); 1436 } 1437 getElementThrowingIfNull( String elementName, String attributeName, String value)1438 public Element getElementThrowingIfNull( 1439 String elementName, String attributeName, String value) { 1440 Element element = nameToElement.get(elementName); 1441 if (element == null) { 1442 throw new IllegalByDtdException(elementName, attributeName, value); 1443 } 1444 return element; 1445 } 1446 1447 /** 1448 * Returns whether an element (specified by its full name) is a tech preview. This method 1449 * understands all elements in the DTDs used (including the ICU extensions), but will throw 1450 * IllegalByDtdException for unknown elements. See CLDR-8614 for more background. 1451 */ isTechPreview(String elementName)1452 public boolean isTechPreview(String elementName) { 1453 Element element = getElementThrowingIfNull(elementName, null, null); 1454 return element.isTechPreview(); 1455 } 1456 getAttributeStatus(String elementName, String attributeName)1457 public AttributeStatus getAttributeStatus(String elementName, String attributeName) { 1458 if ("_q".equals(attributeName)) { 1459 return AttributeStatus.distinguished; // special case 1460 } 1461 Element element = nameToElement.get(elementName); 1462 if (element == null) { 1463 if (elementName.startsWith("icu:")) { 1464 return AttributeStatus.distinguished; 1465 } 1466 throw new IllegalByDtdException(elementName, attributeName, null); 1467 } 1468 Attribute attribute = element.getAttributeNamed(attributeName); 1469 if (attribute == null) { 1470 if (elementName.startsWith("icu:")) { 1471 return AttributeStatus.distinguished; 1472 } 1473 throw new IllegalByDtdException(elementName, attributeName, null); 1474 } 1475 return attribute.attributeStatus; 1476 } 1477 1478 // The default is a map comparator, which compares numbers as numbers, and strings with UCA 1479 private static MapComparator<String> valueOrdering = 1480 new MapComparator<String>().setErrorOnMissing(false).freeze(); 1481 1482 static MapComparator<String> dayValueOrder = 1483 new MapComparator<String>() 1484 .add("sun", "mon", "tue", "wed", "thu", "fri", "sat") 1485 .freeze(); 1486 static MapComparator<String> dayPeriodOrder = 1487 new MapComparator<String>() 1488 .add( 1489 "midnight", 1490 "am", 1491 "noon", 1492 "pm", 1493 "morning1", 1494 "morning2", 1495 "afternoon1", 1496 "afternoon2", 1497 "evening1", 1498 "evening2", 1499 "night1", 1500 "night2", 1501 // The ones on the following line are no longer used actively. Can be 1502 // removed later? 1503 "earlyMorning", 1504 "morning", 1505 "midDay", 1506 "afternoon", 1507 "evening", 1508 "night", 1509 "weeHours") 1510 .freeze(); 1511 static MapComparator<String> dateTimeFormatOrder = 1512 new MapComparator<String>().add("standard", "atTime").freeze(); 1513 static MapComparator<String> listPatternOrder = 1514 new MapComparator<String>().add("start", "middle", "end", "2", "3").freeze(); 1515 static MapComparator<String> widthOrder = 1516 new MapComparator<String>() 1517 .add("abbreviated", "narrow", "short", "wide", "all") 1518 .freeze(); 1519 static MapComparator<String> lengthOrder = 1520 new MapComparator<String>().add("full", "long", "medium", "short").freeze(); 1521 static MapComparator<String> dateFieldOrder = 1522 new MapComparator<String>() 1523 .add( 1524 "era", 1525 "era-short", 1526 "era-narrow", 1527 "year", 1528 "year-short", 1529 "year-narrow", 1530 "quarter", 1531 "quarter-short", 1532 "quarter-narrow", 1533 "month", 1534 "month-short", 1535 "month-narrow", 1536 "week", 1537 "week-short", 1538 "week-narrow", 1539 "weekOfMonth", 1540 "weekOfMonth-short", 1541 "weekOfMonth-narrow", 1542 "day", 1543 "day-short", 1544 "day-narrow", 1545 "dayOfYear", 1546 "dayOfYear-short", 1547 "dayOfYear-narrow", 1548 "weekday", 1549 "weekday-short", 1550 "weekday-narrow", 1551 "weekdayOfMonth", 1552 "weekdayOfMonth-short", 1553 "weekdayOfMonth-narrow", 1554 "sun", 1555 "sun-short", 1556 "sun-narrow", 1557 "mon", 1558 "mon-short", 1559 "mon-narrow", 1560 "tue", 1561 "tue-short", 1562 "tue-narrow", 1563 "wed", 1564 "wed-short", 1565 "wed-narrow", 1566 "thu", 1567 "thu-short", 1568 "thu-narrow", 1569 "fri", 1570 "fri-short", 1571 "fri-narrow", 1572 "sat", 1573 "sat-short", 1574 "sat-narrow", 1575 "dayperiod-short", 1576 "dayperiod", 1577 "dayperiod-narrow", 1578 "hour", 1579 "hour-short", 1580 "hour-narrow", 1581 "minute", 1582 "minute-short", 1583 "minute-narrow", 1584 "second", 1585 "second-short", 1586 "second-narrow", 1587 "zone", 1588 "zone-short", 1589 "zone-narrow") 1590 .freeze(); 1591 static MapComparator<String> nameFieldOrder = 1592 new MapComparator<String>().add(PersonNameFormatter.ModifiedField.ALL_SAMPLES).freeze(); 1593 static MapComparator<String> orderValueOrder = 1594 new MapComparator<String>() 1595 .add(PersonNameFormatter.Order.ALL, Object::toString) 1596 .freeze(); 1597 static MapComparator<String> lengthValueOrder = 1598 new MapComparator<String>() 1599 .add(PersonNameFormatter.Length.ALL, Object::toString) 1600 .freeze(); 1601 static MapComparator<String> usageValueOrder = 1602 new MapComparator<String>() 1603 .add(PersonNameFormatter.Usage.ALL, Object::toString) 1604 .freeze(); 1605 static MapComparator<String> formalityValueOrder = 1606 new MapComparator<String>() 1607 .add(PersonNameFormatter.Formality.ALL, Object::toString) 1608 .freeze(); 1609 static MapComparator<String> sampleNameItemOrder = 1610 new MapComparator<String>() 1611 .add(PersonNameFormatter.SampleType.ALL, Object::toString) 1612 .freeze(); 1613 1614 // TODO We could build most of the above from the dtd data for literal values. That way they 1615 // would always be 1616 // in sync. 1617 getUnitOrder()1618 public static MapComparator<String> getUnitOrder() { 1619 return UnitOrderHolder.INSTANCE; 1620 } 1621 1622 private static final class UnitOrderHolder { 1623 private static final MapComparator<String> INSTANCE = 1624 // new MapComparator<String>() 1625 // 1626 // .add(Validity.getInstance().getCodeToStatus(LstrType.unit).keySet()) 1627 // .freeze(); 1628 // } 1629 new MapComparator<>( 1630 Arrays.asList( 1631 "acceleration-g-force", 1632 "acceleration-meter-per-square-second", 1633 "acceleration-meter-per-second-squared", // deprecated 1634 "angle-revolution", 1635 "angle-radian", 1636 "angle-degree", 1637 "angle-arc-minute", 1638 "angle-arc-second", 1639 "area-square-kilometer", 1640 "area-hectare", 1641 "area-square-meter", 1642 "area-square-centimeter", 1643 "area-square-mile", 1644 "area-acre", 1645 "area-square-yard", 1646 "area-square-foot", 1647 "area-square-inch", 1648 "area-dunam", 1649 "concentr-karat", 1650 "proportion-karat", // deprecated 1651 "concentr-milligram-ofglucose-per-deciliter", 1652 "concentr-milligram-per-deciliter", 1653 "concentr-millimole-per-liter", 1654 "concentr-item", 1655 "concentr-portion", 1656 "concentr-permillion", 1657 "concentr-part-per-million", // deprecated 1658 "concentr-percent", 1659 "concentr-permille", 1660 "concentr-permyriad", 1661 "concentr-mole", 1662 "concentr-ofglucose", 1663 "consumption-liter-per-kilometer", 1664 "consumption-liter-per-100-kilometer", 1665 "consumption-liter-per-100kilometers", // deprecated 1666 "consumption-mile-per-gallon", 1667 "consumption-mile-per-gallon-imperial", 1668 "digital-petabyte", 1669 "digital-terabyte", 1670 "digital-terabit", 1671 "digital-gigabyte", 1672 "digital-gigabit", 1673 "digital-megabyte", 1674 "digital-megabit", 1675 "digital-kilobyte", 1676 "digital-kilobit", 1677 "digital-byte", 1678 "digital-bit", 1679 "duration-century", 1680 "duration-decade", 1681 "duration-year", 1682 "duration-year-person", 1683 "duration-quarter", 1684 "duration-month", 1685 "duration-month-person", 1686 "duration-week", 1687 "duration-week-person", 1688 "duration-day", 1689 "duration-day-person", 1690 "duration-hour", 1691 "duration-minute", 1692 "duration-second", 1693 "duration-millisecond", 1694 "duration-microsecond", 1695 "duration-nanosecond", 1696 "electric-ampere", 1697 "electric-milliampere", 1698 "electric-ohm", 1699 "electric-volt", 1700 "energy-kilocalorie", 1701 "energy-calorie", 1702 "energy-foodcalorie", 1703 "energy-kilojoule", 1704 "energy-joule", 1705 "energy-kilowatt-hour", 1706 "energy-electronvolt", 1707 "energy-british-thermal-unit", 1708 "energy-therm-us", 1709 "force-pound-force", 1710 "force-newton", 1711 "force-kilowatt-hour-per-100-kilometer", 1712 "frequency-gigahertz", 1713 "frequency-megahertz", 1714 "frequency-kilohertz", 1715 "frequency-hertz", 1716 "graphics-em", 1717 "graphics-pixel", 1718 "graphics-megapixel", 1719 "graphics-pixel-per-centimeter", 1720 "graphics-pixel-per-inch", 1721 "graphics-dot-per-centimeter", 1722 "graphics-dot-per-inch", 1723 "graphics-dot", 1724 "length-earth-radius", 1725 "length-100-kilometer", 1726 "length-kilometer", 1727 "length-meter", 1728 "length-decimeter", 1729 "length-centimeter", 1730 "length-millimeter", 1731 "length-micrometer", 1732 "length-nanometer", 1733 "length-picometer", 1734 "length-mile", 1735 "length-yard", 1736 "length-foot", 1737 "length-inch", 1738 "length-parsec", 1739 "length-light-year", 1740 "length-astronomical-unit", 1741 "length-furlong", 1742 "length-fathom", 1743 "length-nautical-mile", 1744 "length-mile-scandinavian", 1745 "length-point", 1746 "length-solar-radius", 1747 "light-lux", 1748 "light-candela", 1749 "light-lumen", 1750 "light-solar-luminosity", 1751 "mass-tonne", 1752 "mass-metric-ton", 1753 "mass-kilogram", 1754 "mass-gram", 1755 "mass-milligram", 1756 "mass-microgram", 1757 "mass-ton", 1758 "mass-stone", 1759 "mass-pound", 1760 "mass-ounce", 1761 "mass-ounce-troy", 1762 "mass-carat", 1763 "mass-dalton", 1764 "mass-earth-mass", 1765 "mass-solar-mass", 1766 "mass-grain", 1767 "power-gigawatt", 1768 "power-megawatt", 1769 "power-kilowatt", 1770 "power-watt", 1771 "power-milliwatt", 1772 "power-horsepower", 1773 "pressure-millimeter-ofhg", 1774 "pressure-millimeter-of-mercury", // deprecated 1775 "pressure-ofhg", 1776 "pressure-pound-force-per-square-inch", 1777 "pressure-pound-per-square-inch", // deprecated 1778 "pressure-inch-ofhg", 1779 "pressure-inch-hg", // deprecated 1780 "pressure-bar", 1781 "pressure-millibar", 1782 "pressure-atmosphere", 1783 "pressure-pascal", 1784 "pressure-hectopascal", 1785 "pressure-kilopascal", 1786 "pressure-megapascal", 1787 "speed-kilometer-per-hour", 1788 "speed-meter-per-second", 1789 "speed-mile-per-hour", 1790 "speed-knot", 1791 "speed-beaufort", 1792 "temperature-generic", 1793 "temperature-celsius", 1794 "temperature-fahrenheit", 1795 "temperature-kelvin", 1796 "torque-pound-force-foot", 1797 "torque-pound-foot", // deprecated 1798 "torque-newton-meter", 1799 "volume-cubic-kilometer", 1800 "volume-cubic-meter", 1801 "volume-cubic-centimeter", 1802 "volume-cubic-mile", 1803 "volume-cubic-yard", 1804 "volume-cubic-foot", 1805 "volume-cubic-inch", 1806 "volume-megaliter", 1807 "volume-hectoliter", 1808 "volume-liter", 1809 "volume-deciliter", 1810 "volume-centiliter", 1811 "volume-milliliter", 1812 "volume-pint-metric", 1813 "volume-cup-metric", 1814 "volume-acre-foot", 1815 "volume-bushel", 1816 "volume-gallon", 1817 "volume-gallon-imperial", 1818 "volume-quart", 1819 "volume-pint", 1820 "volume-pint-imperial", 1821 "volume-cup", 1822 "volume-fluid-ounce", 1823 "volume-fluid-ounce-imperial", 1824 "volume-tablespoon", 1825 "volume-teaspoon", 1826 "volume-barrel", 1827 "volume-dessert-spoon", 1828 "volume-dessert-spoon-imperial", 1829 "volume-drop", 1830 "volume-dram", 1831 "volume-jigger", 1832 "volume-pinch", 1833 "volume-quart-imperial", 1834 "angle-steradian", 1835 "concentr-katal", 1836 "electric-coulomb", 1837 "electric-farad", 1838 "electric-henry", 1839 "electric-siemens", 1840 "energy-calorie-it", 1841 "energy-british-thermal-unit-it", 1842 "energy-becquerel", 1843 "energy-sievert", 1844 "energy-gray", 1845 "force-kilogram-force", 1846 "length-rod", 1847 "length-chain", 1848 "magnetic-tesla", 1849 "magnetic-weber", 1850 "temperature-rankine", 1851 "duration-fortnight", 1852 "mass-slug", 1853 "pressure-gasoline-energy-density", 1854 "length-rin", 1855 "length-sun", 1856 "length-shaku-length", 1857 "length-shaku-cloth", 1858 "length-ken", 1859 "length-jo-jp", 1860 "length-ri-jp", 1861 "area-bu-jp", 1862 "area-se-jp", 1863 "area-cho", 1864 "volume-kosaji", 1865 "volume-osaji", 1866 "volume-cup-jp", 1867 "volume-shaku", 1868 "volume-sai", 1869 "volume-to-jp", 1870 "volume-koku", 1871 "mass-fun")) 1872 .freeze(); 1873 } 1874 1875 static MapComparator<String> countValueOrder = 1876 new MapComparator<String>() 1877 .add("0", "1", "zero", "one", "two", "few", "many", "other") 1878 .freeze(); 1879 static MapComparator<String> unitLengthOrder = 1880 new MapComparator<String>().add("long", "short", "narrow").freeze(); 1881 static MapComparator<String> currencyFormatOrder = 1882 new MapComparator<String>().add("standard", "accounting").freeze(); 1883 static Comparator<String> zoneOrder = StandardCodes.make().getTZIDComparator(); 1884 1885 static final Comparator<String> COMP = (Comparator) CLDRConfig.getInstance().getCollator(); 1886 1887 // Hack for US 1888 static final Comparator<String> UNICODE_SET_COMPARATOR = 1889 new Comparator<>() { 1890 @Override 1891 public int compare(String o1, String o2) { 1892 if (o1.contains("{")) { 1893 o1 = o1.replace("{", ""); 1894 } 1895 if (o2.contains("{")) { 1896 o2 = o2.replace("{", ""); 1897 } 1898 return COMP.compare(o1, o2); 1899 } 1900 }; 1901 getAttributeValueComparator(String element, String attribute)1902 public static Comparator<String> getAttributeValueComparator(String element, String attribute) { 1903 return getAttributeValueComparator(DtdType.ldml, element, attribute); 1904 } 1905 getAttributeValueComparator( DtdType type, String element, String attribute)1906 static Comparator<String> getAttributeValueComparator( 1907 DtdType type, String element, String attribute) { 1908 // The default is a map comparator, which compares numbers as numbers, and strings with UCA 1909 Comparator<String> comp = valueOrdering; 1910 if (type != DtdType.ldml && type != DtdType.ldmlICU) { 1911 return comp; 1912 } 1913 if (attribute.equals("day")) { // && (element.startsWith("weekend") 1914 comp = dayValueOrder; 1915 } else if (attribute.equals("type")) { 1916 if (element.endsWith("FormatLength")) { 1917 comp = lengthOrder; 1918 } else if (element.endsWith("Width")) { 1919 comp = widthOrder; 1920 } else if (element.equals("day")) { 1921 comp = dayValueOrder; 1922 } else if (element.equals("field")) { 1923 comp = dateFieldOrder; 1924 } else if (element.equals("zone")) { 1925 comp = zoneOrder; 1926 } else if (element.equals("listPatternPart")) { 1927 comp = listPatternOrder; 1928 } else if (element.equals("currencyFormat")) { 1929 comp = currencyFormatOrder; 1930 } else if (element.equals("unitLength")) { 1931 comp = unitLengthOrder; 1932 } else if (element.equals("unit")) { 1933 comp = getUnitOrder(); 1934 } else if (element.equals("dayPeriod")) { 1935 comp = dayPeriodOrder; 1936 } else if (element.equals("dateTimeFormat")) { 1937 comp = dateTimeFormatOrder; 1938 } else if (element.equals("nameField")) { 1939 comp = nameFieldOrder; 1940 } 1941 } else if (attribute.equals("order") && element.equals("personName")) { 1942 comp = orderValueOrder; 1943 } else if (attribute.equals("length") && element.equals("personName")) { 1944 comp = lengthValueOrder; 1945 } else if (attribute.equals("usage") && element.equals("personName")) { 1946 comp = usageValueOrder; 1947 } else if (attribute.equals("formality")) { 1948 comp = formalityValueOrder; 1949 } else if (attribute.equals("item") && element.equals("sampleName")) { 1950 comp = sampleNameItemOrder; 1951 } else if (attribute.equals("count") && !element.equals("minDays")) { 1952 comp = countValueOrder; 1953 } else if (attribute.equals("cp") && element.equals("annotation")) { 1954 comp = UNICODE_SET_COMPARATOR; 1955 } 1956 return comp; 1957 } 1958 1959 /** Comparator for attributes in CLDR files */ 1960 private static AttributeValueComparator ldmlAvc = 1961 new AttributeValueComparator() { 1962 @Override 1963 public int compare(String element, String attribute, String value1, String value2) { 1964 Comparator<String> comp = getAttributeValueComparator(element, attribute); 1965 return comp.compare(value1, value2); 1966 } 1967 }; 1968 hasValue(String elementName)1969 public boolean hasValue(String elementName) { 1970 return nameToElement.get(elementName).type == ElementType.PCDATA; 1971 } 1972 isMetadata(XPathParts pathPlain)1973 public boolean isMetadata(XPathParts pathPlain) { 1974 for (String s : pathPlain.getElements()) { 1975 Element e = getElementFromName().get(s); 1976 if (e.elementStatus == ElementStatus.metadata) { 1977 return true; 1978 } 1979 } 1980 return false; 1981 } 1982 isMetadataOld(DtdType dtdType2, XPathParts pathPlain)1983 public static boolean isMetadataOld(DtdType dtdType2, XPathParts pathPlain) { 1984 // TODO Don't use hard-coded list; instead add to DTD annotations 1985 final String element1 = pathPlain.getElement(1); 1986 final String element2 = pathPlain.getElement(2); 1987 final String elementN = pathPlain.getElement(-1); 1988 switch (dtdType2) { 1989 case ldml: 1990 switch (element1) { 1991 case "generation": 1992 case "metadata": 1993 return true; 1994 } 1995 break; 1996 case ldmlBCP47: 1997 switch (element1) { 1998 case "generation": 1999 case "version": 2000 return true; 2001 } 2002 break; 2003 //// 2004 // supplementalData/transforms/transform[@source="am"][@target="am_FONIPA"][@direction="forward"]/comment 2005 case supplementalData: 2006 // these are NOT under /metadata/ but are actually metadata 2007 switch (element1) { 2008 case "generation": 2009 case "version": 2010 case "validity": 2011 case "references": 2012 case "coverageLevels": 2013 return true; 2014 case "transforms": 2015 return elementN.equals("comment"); 2016 case "metadata": 2017 // these ARE under /metadata/, but many others under /metadata/ are NOT 2018 // actually metadata. 2019 switch (element2) { 2020 case "validity": 2021 case "serialElements": 2022 case "suppress": 2023 case "distinguishing": 2024 case "blocking": 2025 case "casingData": 2026 return true; 2027 } 2028 break; 2029 } 2030 break; 2031 default: 2032 } 2033 return false; 2034 } 2035 isDeprecated(XPathParts pathPlain)2036 public boolean isDeprecated(XPathParts pathPlain) { 2037 for (int i = 0; i < pathPlain.size(); ++i) { 2038 String elementName = pathPlain.getElement(i); 2039 if (isDeprecated(elementName, "*", null)) { 2040 return true; 2041 } 2042 for (String attribute : pathPlain.getAttributeKeys(i)) { 2043 String attributeValue = pathPlain.getAttributeValue(i, attribute); 2044 if (isDeprecated(elementName, attribute, attributeValue)) { 2045 return true; 2046 } 2047 } 2048 } 2049 return false; 2050 } 2051 2052 public static final Splitter SPACE_SPLITTER = 2053 Splitter.on(CharMatcher.whitespace()).trimResults().omitEmptyStrings(); 2054 public static final Splitter BAR_SPLITTER = Splitter.on('|').trimResults().omitEmptyStrings(); 2055 public static final Splitter CR_SPLITTER = 2056 Splitter.on(CharMatcher.anyOf("\n\r")).trimResults().omitEmptyStrings(); 2057 2058 private static class XPathPartsSet { 2059 private final Set<XPathParts> list = new LinkedHashSet<>(); 2060 addElement(String element)2061 private void addElement(String element) { 2062 if (list.isEmpty()) { 2063 list.add(new XPathParts().addElement(element)); 2064 } else { 2065 for (XPathParts item : list) { 2066 item.addElement(element); 2067 } 2068 } 2069 } 2070 addAttribute(String attribute, String attributeValue)2071 private void addAttribute(String attribute, String attributeValue) { 2072 for (XPathParts item : list) { 2073 item.addAttribute(attribute, attributeValue); 2074 } 2075 } 2076 setElement(int i, String string)2077 private void setElement(int i, String string) { 2078 for (XPathParts item : list) { 2079 item.setElement(i, string); 2080 } 2081 } 2082 addAttributes(String attribute, List<String> attributeValues)2083 private void addAttributes(String attribute, List<String> attributeValues) { 2084 if (attributeValues.size() == 1) { 2085 addAttribute(attribute, attributeValues.iterator().next()); 2086 } else { 2087 // duplicate all the items in the list with the given values 2088 Set<XPathParts> newList = new LinkedHashSet<>(); 2089 for (XPathParts item : list) { 2090 for (String attributeValue : attributeValues) { 2091 XPathParts newItem = item.cloneAsThawed(); 2092 newItem.addAttribute(attribute, attributeValue); 2093 newList.add(newItem); 2094 } 2095 } 2096 list.clear(); 2097 list.addAll(newList); 2098 } 2099 } 2100 toStrings()2101 private ImmutableSet<String> toStrings() { 2102 Builder<String> result = new ImmutableSet.Builder<>(); 2103 2104 for (XPathParts item : list) { 2105 result.add(item.toString()); 2106 } 2107 return result.build(); 2108 } 2109 2110 @Override toString()2111 public String toString() { 2112 return list.toString(); 2113 } 2114 } 2115 getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras)2116 public Set<String> getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras) { 2117 extras.clear(); 2118 Map<String, String> valueAttributes = new HashMap<>(); 2119 XPathPartsSet pathResult = new XPathPartsSet(); 2120 String element = null; 2121 for (int i = 0; i < pathPlain.size(); ++i) { 2122 element = pathPlain.getElement(i); 2123 pathResult.addElement(element); 2124 valueAttributes.clear(); 2125 for (String attribute : pathPlain.getAttributeKeys(i)) { 2126 AttributeStatus status = getAttributeStatus(element, attribute); 2127 final String attributeValue = pathPlain.getAttributeValue(i, attribute); 2128 switch (status) { 2129 case distinguished: 2130 AttributeType attrType = getAttributeType(element, attribute); 2131 if (attrType == AttributeType.NMTOKENS) { 2132 pathResult.addAttributes( 2133 attribute, SPACE_SPLITTER.splitToList(attributeValue)); 2134 } else { 2135 pathResult.addAttribute(attribute, attributeValue); 2136 } 2137 break; 2138 case value: 2139 valueAttributes.put(attribute, attributeValue); 2140 break; 2141 case metadata: 2142 break; 2143 } 2144 } 2145 if (!valueAttributes.isEmpty()) { 2146 boolean hasValue = hasValue(element); 2147 // if it doesn't have a value, we construct new child elements, with _ prefix 2148 // if it does have a value, we have to play a further trick, since 2149 // we can't have a value and child elements at the same level. 2150 // So we use a _ suffix on the element. 2151 if (hasValue) { 2152 pathResult.setElement(i, element + "_"); 2153 } else { 2154 int debug = 0; 2155 } 2156 for (Entry<String, String> attributeAndValue : valueAttributes.entrySet()) { 2157 final String attribute = attributeAndValue.getKey(); 2158 final String attributeValue = attributeAndValue.getValue(); 2159 2160 Set<String> pathsShort = pathResult.toStrings(); 2161 AttributeType attrType = getAttributeType(element, attribute); 2162 for (String pathShort : pathsShort) { 2163 pathShort += "/_" + attribute; 2164 if (attrType == AttributeType.NMTOKENS) { 2165 for (String valuePart : SPACE_SPLITTER.split(attributeValue)) { 2166 extras.put(pathShort, valuePart); 2167 } 2168 } else { 2169 extras.put(pathShort, attributeValue); 2170 } 2171 } 2172 } 2173 if (hasValue) { 2174 pathResult.setElement(i, element); // restore 2175 } 2176 } 2177 } 2178 // Only add the path if it could have a value, looking at the last element 2179 if (!hasValue(element)) { 2180 return null; 2181 } 2182 return pathResult.toStrings(); 2183 } 2184 getAttributeType(String elementName, String attributeName)2185 public AttributeType getAttributeType(String elementName, String attributeName) { 2186 Attribute attr = getAttribute(elementName, attributeName); 2187 return (attr != null) ? attr.type : null; 2188 } 2189 getAttribute(String elementName, String attributeName)2190 public Attribute getAttribute(String elementName, String attributeName) { 2191 Element element = nameToElement.get(elementName); 2192 return (element != null) ? element.getAttributeNamed(attributeName) : null; 2193 } 2194 2195 // TODO: add support for following to DTD annotations, and rework API 2196 2197 static final Set<String> SPACED_VALUES = ImmutableSet.of("idValidity", "languageGroup"); 2198 getValueSplitter(XPathParts pathPlain)2199 public static Splitter getValueSplitter(XPathParts pathPlain) { 2200 if (!Collections.disjoint(pathPlain.getElements(), SPACED_VALUES)) { 2201 return SPACE_SPLITTER; 2202 } else if (pathPlain.getElement(-1).equals("annotation") 2203 && !pathPlain.getAttributeKeys(-1).contains("tts")) { 2204 return BAR_SPLITTER; 2205 } 2206 return CR_SPLITTER; 2207 } 2208 isComment(XPathParts pathPlain, String line)2209 public static boolean isComment(XPathParts pathPlain, String line) { 2210 if (pathPlain.contains("transform")) { 2211 if (line.startsWith("#")) { 2212 return true; 2213 } 2214 } 2215 return false; 2216 } 2217 isExtraSplit(String extraPath)2218 public static boolean isExtraSplit(String extraPath) { 2219 if (extraPath.endsWith("/_type") 2220 && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) { 2221 return true; 2222 } 2223 return false; 2224 } 2225 2226 /** Return the value status for an EAV */ getValueStatus(String elementName, String attributeName, String value)2227 public ValueStatus getValueStatus(String elementName, String attributeName, String value) { 2228 Element element = nameToElement.get(elementName); 2229 if (element == null) { 2230 return ValueStatus.invalid; 2231 } 2232 Attribute attr = element.getAttributeNamed(attributeName); 2233 if (attr == null) { 2234 return ValueStatus.invalid; 2235 } 2236 return attr.getValueStatus(value); 2237 } 2238 2239 /** Return element-attribute pairs with non-enumerated values, for quick checks. */ getNonEnumerated(Map<String, String> matchValues)2240 public Multimap<String, String> getNonEnumerated(Map<String, String> matchValues) { 2241 Multimap<String, String> nonEnumeratedElementToAttribute = 2242 TreeMultimap.create(); // make tree for ease of debugging 2243 for (Entry<String, Element> entry : nameToElement.entrySet()) { 2244 Element element = entry.getValue(); 2245 for (Attribute attribute : element.attributes.keySet()) { 2246 if (attribute.type != AttributeType.ENUMERATED_TYPE) { 2247 String elementName = element.getName(); 2248 String attrName = attribute.getName(); 2249 nonEnumeratedElementToAttribute.put(elementName, attrName); 2250 if (attribute.matchValue != null) { 2251 matchValues.put( 2252 elementName + "\t" + attrName, attribute.matchValue.getName()); 2253 } 2254 } 2255 } 2256 } 2257 return ImmutableSetMultimap.copyOf(nonEnumeratedElementToAttribute); 2258 } 2259 2260 /** Get the value constraint on the last element in a path */ getValueConstraint(String xpath)2261 public static ValueConstraint getValueConstraint(String xpath) { 2262 return getElement(xpath, -1).getValueConstraint(); 2263 } 2264 2265 /** Get an element from a path and element index. */ getElement(String xpath, int elementIndex)2266 public static Element getElement(String xpath, int elementIndex) { 2267 XPathParts parts = XPathParts.getFrozenInstance(xpath); 2268 return DtdData.getInstance(DtdType.valueOf(parts.getElement(0))) 2269 .getElementFromName() 2270 .get(parts.getElement(elementIndex)); 2271 } 2272 } 2273