xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/util/DtdData.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.util;
2 
3 import com.google.common.base.CharMatcher;
4 import com.google.common.base.Joiner;
5 import com.google.common.base.Splitter;
6 import com.google.common.collect.ImmutableMultimap;
7 import com.google.common.collect.ImmutableSet;
8 import com.google.common.collect.ImmutableSet.Builder;
9 import com.google.common.collect.ImmutableSetMultimap;
10 import com.google.common.collect.Multimap;
11 import com.google.common.collect.TreeMultimap;
12 import com.ibm.icu.impl.Relation;
13 import com.ibm.icu.text.Transform;
14 import java.io.File;
15 import java.io.StringReader;
16 import java.util.ArrayList;
17 import java.util.Arrays;
18 import java.util.Collection;
19 import java.util.Collections;
20 import java.util.Comparator;
21 import java.util.HashMap;
22 import java.util.HashSet;
23 import java.util.Iterator;
24 import java.util.LinkedHashMap;
25 import java.util.LinkedHashSet;
26 import java.util.List;
27 import java.util.Locale;
28 import java.util.Map;
29 import java.util.Map.Entry;
30 import java.util.Set;
31 import java.util.TreeMap;
32 import java.util.concurrent.ConcurrentHashMap;
33 import java.util.concurrent.ConcurrentMap;
34 import java.util.regex.Pattern;
35 import org.unicode.cldr.util.DtdData.Element.ValueConstraint;
36 import org.unicode.cldr.util.MatchValue.LiteralMatchValue;
37 import org.unicode.cldr.util.personname.PersonNameFormatter;
38 
39 /**
40  * An immutable object that contains the structure of a DTD.
41  *
42  * @author markdavis
43  */
44 public class DtdData extends XMLFileReader.SimpleHandler {
45     private static final String COMMENT_PREFIX = System.lineSeparator() + "    ";
46     private static final boolean SHOW_ALL = CldrUtility.getProperty("show_all", false);
47     private static final boolean USE_SYNTHESIZED = false;
48 
49     private static final boolean DEBUG = false;
50     private static final Pattern FILLER = PatternCache.get("[^-a-zA-Z0-9#_:]");
51 
52     private final Relation<String, Attribute> nameToAttributes =
53             Relation.of(new TreeMap<String, Set<Attribute>>(), LinkedHashSet.class);
54     private Map<String, Element> nameToElement = new HashMap<>();
55     private MapComparator<String> elementComparator;
56     private MapComparator<String> attributeComparator;
57 
58     // TODO Make this data driven. See https://unicode-org.atlassian.net/browse/CLDR-17321
59     public static final Multimap<DtdType, String> HACK_PCDATA_ALLOWS_EMPTY =
60             ImmutableMultimap.<DtdType, String>builder()
61                     .putAll(
62                             DtdType.ldml,
63                             "nameOrderLocales",
64                             "foreignSpaceReplacement",
65                             "nativeSpaceReplacement",
66                             "language",
67                             "script",
68                             "region",
69                             "variant",
70                             "territory")
71                     .putAll(DtdType.supplementalData, "variable", "attributeValues")
72                     .build();
73 
74     public final Element ROOT;
75     public final Element PCDATA = elementFrom("#PCDATA");
76     public final Element ANY = elementFrom("ANY");
77     public final DtdType dtdType;
78     public final String version;
79     private Element lastElement;
80     private Attribute lastAttribute;
81     private Set<String> preCommentCache;
82     private DtdComparator dtdComparator;
83 
84     public enum AttributeStatus {
85         distinguished("§d"),
86         value("§v"),
87         metadata("§m︎");
88         public final String shortName;
89 
AttributeStatus(String shortName)90         AttributeStatus(String shortName) {
91             this.shortName = shortName;
92         }
93 
getShortName(AttributeStatus status)94         public static String getShortName(AttributeStatus status) {
95             return status == null ? "" : status.shortName;
96         }
97     }
98 
99     public enum Mode {
100         REQUIRED("#REQUIRED"),
101         OPTIONAL("#IMPLIED"),
102         FIXED("#FIXED"),
103         NULL("null");
104 
105         public final String source;
106 
Mode(String s)107         Mode(String s) {
108             source = s;
109         }
110 
forString(String mode)111         public static Mode forString(String mode) {
112             for (Mode value : Mode.values()) {
113                 if (value.source.equals(mode)) {
114                     return value;
115                 }
116             }
117             if (mode == null) {
118                 return NULL;
119             }
120             throw new IllegalArgumentException(mode);
121         }
122     }
123 
124     public enum AttributeType {
125         CDATA,
126         ID,
127         IDREF,
128         IDREFS,
129         ENTITY,
130         ENTITIES,
131         NMTOKEN,
132         NMTOKENS,
133         ENUMERATED_TYPE
134     }
135 
136     static final Set<String> DRAFT_ON_NON_LEAF_ALLOWED =
137             ImmutableSet.of("collation", "transform", "unitPreferenceData", "rulesetGrouping");
138 
139     public static class Attribute implements Named {
140         private static final Joiner JOINER_COMMA_SPACE = Joiner.on(", ");
141         public static final String AUG_TRAIL = "⟫";
142         public static final String AUG_LEAD = "⟪";
143         public static final String ENUM_TRAIL = "⟩";
144         public static final String ENUM_LEAD = "⟨";
145         public static final Pattern LEAD_TRAIL =
146                 Pattern.compile(
147                         "(.*["
148                                 + AUG_LEAD
149                                 + ENUM_LEAD
150                                 + "])(.*)(["
151                                 + AUG_TRAIL
152                                 + ENUM_TRAIL
153                                 + "].*)");
154         public final String name;
155         public final Element element;
156         public final Mode mode;
157         public final String defaultValue;
158         public final AttributeType type;
159         public final Map<String, Integer> values; // immutable
160         private final Set<String> commentsPre;
161         private Set<String> commentsPost;
162         private boolean isDeprecatedAttribute;
163         private boolean attributeAllowsUEscape = false;
164         public AttributeStatus attributeStatus =
165                 AttributeStatus.distinguished; // default unless reset by annotations, or for xml:
166         // attributes
167         private Set<String> deprecatedValues = Collections.emptySet();
168         public MatchValue matchValue;
169         private final Comparator<String> attributeValueComparator;
170 
Attribute( DtdType dtdType, Element element2, String aName, Mode mode2, String[] split, String value2, Set<String> firstComment)171         private Attribute(
172                 DtdType dtdType,
173                 Element element2,
174                 String aName,
175                 Mode mode2,
176                 String[] split,
177                 String value2,
178                 Set<String> firstComment) {
179             commentsPre = firstComment;
180             element = element2;
181             name = aName.intern();
182             if (name.equals("draft") // normally never permitted on elements with children, but
183                     // special cases...
184                     && dtdType == DtdType.ldml
185                     && !DRAFT_ON_NON_LEAF_ALLOWED.contains(element.getName())) {
186                 int elementChildrenCount = element.getChildren().size();
187                 if (elementChildrenCount > 1
188                         || elementChildrenCount == 1
189                                 && !element.getChildren()
190                                         .keySet()
191                                         .iterator()
192                                         .next()
193                                         .getName()
194                                         .equals("cp")) {
195                     isDeprecatedAttribute = true;
196                     if (DEBUG) {
197                         System.out.println(element.getName() + ":" + element.getChildren());
198                     }
199                 }
200             } else if (name.startsWith("xml:")) {
201                 attributeStatus = AttributeStatus.metadata;
202             }
203             mode = mode2;
204             defaultValue = value2 == null ? null : value2.intern();
205             AttributeType _type = AttributeType.ENUMERATED_TYPE;
206             Map<String, Integer> _values = Collections.emptyMap();
207             if (split.length == 1) {
208                 try {
209                     _type = AttributeType.valueOf(split[0]);
210                 } catch (Exception e) {
211                 }
212             }
213             type = _type;
214 
215             if (_type == AttributeType.ENUMERATED_TYPE) {
216                 LinkedHashMap<String, Integer> temp = new LinkedHashMap<>();
217                 for (String part : split) {
218                     if (part.length() != 0) {
219                         temp.put(part.intern(), temp.size());
220                     }
221                 }
222                 _values = Collections.unmodifiableMap(temp);
223             }
224             values = _values;
225             attributeValueComparator = getAttributeValueComparator(dtdType, element.name, name);
226         }
227 
228         @Override
toString()229         public String toString() {
230             return element.name + ":" + name;
231         }
232 
getSampleValue()233         public String getSampleValue() {
234             return type == AttributeType.ENUMERATED_TYPE
235                     ? (values.containsKey("year") ? "year" : values.keySet().iterator().next())
236                     : matchValue != null ? matchValue.getSample() : MatchValue.DEFAULT_SAMPLE;
237         }
238 
appendDtdString(StringBuilder b)239         public StringBuilder appendDtdString(StringBuilder b) {
240             Attribute a = this;
241             b.append("<!ATTLIST " + element.name + " " + a.name);
242             boolean first;
243             if (a.type == AttributeType.ENUMERATED_TYPE) {
244                 b.append(" (");
245                 first = true;
246                 for (String s : a.values.keySet()) {
247                     if (deprecatedValues.contains(s)) {
248                         continue;
249                     }
250                     if (first) {
251                         first = false;
252                     } else {
253                         b.append(" | ");
254                     }
255                     b.append(s);
256                 }
257                 b.append(")");
258             } else {
259                 b.append(' ').append(a.type);
260             }
261             if (a.mode != Mode.NULL) {
262                 b.append(" ").append(a.mode.source);
263             }
264             if (a.defaultValue != null) {
265                 b.append(" \"").append(a.defaultValue).append('"');
266             }
267             b.append(" >");
268             return b;
269         }
270 
features()271         public String features() {
272             return (type == AttributeType.ENUMERATED_TYPE
273                             ? values.keySet().toString()
274                             : type.toString())
275                     + (mode == Mode.NULL ? "" : ", mode=" + mode)
276                     + (defaultValue == null ? "" : ", default=" + defaultValue);
277         }
278 
279         @Override
getName()280         public String getName() {
281             return name;
282         }
283 
284         private static Splitter COMMA = Splitter.on(',').trimResults();
285 
addComment(String commentIn)286         public void addComment(String commentIn) {
287             if (commentIn.startsWith("@")) {
288                 switch (commentIn) {
289                     case "@METADATA":
290                         attributeStatus = AttributeStatus.metadata;
291                         break;
292                     case "@VALUE":
293                         attributeStatus = AttributeStatus.value;
294                         break;
295                     case "@DEPRECATED":
296                         isDeprecatedAttribute = true;
297                         break;
298                     case "@ALLOWS_UESC":
299                         attributeAllowsUEscape = true;
300                         break;
301 
302                     default:
303                         int colonPos = commentIn.indexOf(':');
304                         if (colonPos < 0) {
305                             throw new IllegalArgumentException(
306                                     element.name
307                                             + " "
308                                             + name
309                                             + "= : Unrecognized ATTLIST annotation: "
310                                             + commentIn);
311                         }
312                         String command = commentIn.substring(0, colonPos);
313                         String argument = commentIn.substring(colonPos + 1);
314                         switch (command) {
315                             case "@DEPRECATED":
316                                 deprecatedValues =
317                                         Collections.unmodifiableSet(
318                                                 new HashSet<>(COMMA.splitToList(argument)));
319                                 break;
320                             case "@MATCH":
321                                 if (matchValue != null) {
322                                     throw new IllegalArgumentException(
323                                             element.name
324                                                     + " "
325                                                     + name
326                                                     + "= : Conflicting @MATCH: "
327                                                     + matchValue.getName()
328                                                     + " & "
329                                                     + argument);
330                                 }
331                                 matchValue = MatchValue.of(argument);
332                                 break;
333                             default:
334                                 throw new IllegalArgumentException(
335                                         element.name
336                                                 + " "
337                                                 + name
338                                                 + "= : Unrecognized ATTLIST annotation: "
339                                                 + commentIn);
340                         }
341                 }
342                 return;
343             }
344             commentsPost = addUnmodifiable(commentsPost, commentIn.trim());
345         }
346 
347         /** Special version of identity; only considers name and name of element */
348         @Override
equals(Object obj)349         public boolean equals(Object obj) {
350             if (!(obj instanceof Attribute)) {
351                 return false;
352             }
353             Attribute that = (Attribute) obj;
354             return name.equals(that.name)
355                     && element.name.equals(
356                             that.element.name) // don't use plain element: circularity
357             // not relevant to identity
358             //                && Objects.equals(comment, that.comment)
359             //                && mode.equals(that.mode)
360             //                && Objects.equals(defaultValue, that.defaultValue)
361             //                && type.equals(that.type)
362             //                && values.equals(that.values)
363             ;
364         }
365 
366         /** Special version of identity; only considers name and name of element */
367         @Override
hashCode()368         public int hashCode() {
369             return name.hashCode() * 37
370                     + element.name.hashCode() // don't use plain element: circularity
371             // not relevant to identity
372             //                ) * 37 + Objects.hashCode(comment)) * 37
373             //                + mode.hashCode()) * 37
374             //                + Objects.hashCode(defaultValue)) * 37
375             //                + type.hashCode()) * 37
376             //                + values.hashCode()
377             ;
378         }
379 
isDeprecated()380         public boolean isDeprecated() {
381             return isDeprecatedAttribute;
382         }
383 
allowsUEscape()384         public boolean allowsUEscape() {
385             return attributeAllowsUEscape;
386         }
387 
isDeprecatedValue(String value)388         public boolean isDeprecatedValue(String value) {
389             return deprecatedValues.contains(value);
390         }
391 
getStatus()392         public AttributeStatus getStatus() {
393             return attributeStatus;
394         }
395 
getValueStatus(String value)396         public ValueStatus getValueStatus(String value) {
397             return deprecatedValues.contains(value)
398                     ? ValueStatus.invalid
399                     : type == AttributeType.ENUMERATED_TYPE
400                             ? (values.containsKey(value) ? ValueStatus.valid : ValueStatus.invalid)
401                             : matchValue == null
402                                     ? ValueStatus.unknown
403                                     : matchValue.is(value)
404                                             ? ValueStatus.valid
405                                             : ValueStatus.invalid;
406         }
407 
getMatchString()408         public String getMatchString() {
409             return type == AttributeType.ENUMERATED_TYPE
410                     ? ENUM_LEAD + JOINER_COMMA_SPACE.join(values.keySet()) + ENUM_TRAIL
411                     : matchValue != null ? AUG_LEAD + matchValue.toString() + AUG_TRAIL : "";
412         }
413 
getMatchLiterals()414         public Set<String> getMatchLiterals() {
415             if (type == AttributeType.ENUMERATED_TYPE) {
416                 return values.keySet();
417             } else if (matchValue != null && matchValue instanceof LiteralMatchValue) {
418                 return ((LiteralMatchValue) matchValue).getItems();
419             }
420             return null;
421         }
422 
getMatchingName(Map<Attribute, Integer> attributes)423         public Attribute getMatchingName(Map<Attribute, Integer> attributes) {
424             for (Attribute attribute : attributes.keySet()) {
425                 if (name.equals(attribute.getName())) {
426                     return attribute;
427                 }
428             }
429             return null;
430         }
431     }
432 
433     public enum ValueStatus {
434         invalid,
435         unknown,
436         valid
437     }
438 
DtdData(DtdType type, String version)439     private DtdData(DtdType type, String version) {
440         this.dtdType = type;
441         this.ROOT = elementFrom(type.rootElement());
442         this.version = version;
443     }
444 
addAttribute(String eName, String aName, String type, String mode, String value)445     private void addAttribute(String eName, String aName, String type, String mode, String value) {
446         Attribute a =
447                 new Attribute(
448                         dtdType,
449                         nameToElement.get(eName),
450                         aName,
451                         Mode.forString(mode),
452                         FILLER.split(type),
453                         value,
454                         preCommentCache);
455         preCommentCache = null;
456         getAttributesFromName().put(aName, a);
457         CldrUtility.putNew(a.element.attributes, a, a.element.attributes.size());
458         lastElement = null;
459         lastAttribute = a;
460     }
461 
462     public enum ElementType {
463         EMPTY,
464         ANY,
465         PCDATA("(#PCDATA)"),
466         CHILDREN;
467         public final String source;
468 
ElementType(String s)469         private ElementType(String s) {
470             source = s;
471         }
472 
ElementType()473         private ElementType() {
474             source = name();
475         }
476     }
477 
478     interface Named {
getName()479         String getName();
480     }
481 
482     public enum ElementStatus {
483         regular,
484         metadata
485     }
486 
487     public static class Element implements Named {
488         public enum ValueConstraint {
489             empty,
490             nonempty,
491             any
492         }
493 
494         public final String name;
495         private String rawModel;
496         private ElementType type;
497         private final Map<Element, Integer> children = new LinkedHashMap<>();
498         private final Map<Attribute, Integer> attributes = new LinkedHashMap<>();
499         private Set<String> commentsPre;
500         private Set<String> commentsPost;
501         private String model;
502         private boolean isOrderedElement;
503         private boolean isDeprecatedElement;
504         private boolean isTechPreviewElement;
505         private ElementStatus elementStatus = ElementStatus.regular;
506         private ValueConstraint valueConstraint = ValueConstraint.nonempty;
507 
Element(String name2)508         private Element(String name2) {
509             name = name2.intern();
510         }
511 
setChildren(DtdData dtdData, String model, Set<String> precomments)512         private void setChildren(DtdData dtdData, String model, Set<String> precomments) {
513             this.commentsPre = precomments;
514             rawModel = model;
515             this.model = clean(model);
516             valueConstraint = ValueConstraint.empty;
517             if (model.equals("EMPTY")) {
518                 type = ElementType.EMPTY;
519                 return;
520             }
521             type = ElementType.CHILDREN;
522             for (String part : FILLER.split(model)) {
523                 if (part.length() != 0) {
524                     if (part.equals("#PCDATA")) {
525                         type = ElementType.PCDATA;
526                         if (HACK_PCDATA_ALLOWS_EMPTY.get(dtdData.dtdType).contains(name)) {
527                             // TODO move to @ annotation in .dtd file
528                             valueConstraint = ValueConstraint.any;
529                         } else {
530                             valueConstraint = ValueConstraint.nonempty;
531                         }
532                     } else if (part.equals("ANY")) {
533                         type = ElementType.ANY;
534                     } else {
535                         CldrUtility.putNew(children, dtdData.elementFrom(part), children.size());
536                     }
537                 }
538             }
539             if ((type == ElementType.CHILDREN) == (children.size() == 0)
540                     && !model.startsWith("(#PCDATA|cp")) {
541                 throw new IllegalArgumentException(
542                         "CLDR does not permit Mixed content. " + name + ":" + model);
543             }
544         }
545 
546         static final Pattern CLEANER1 = PatternCache.get("([,|(])(?=\\S)");
547         static final Pattern CLEANER2 = PatternCache.get("(?=\\S)([|)])");
548 
clean(String model2)549         private String clean(String model2) {
550             // (x) -> ( x );
551             // x,y -> x, y
552             // x|y -> x | y
553             String result = CLEANER1.matcher(model2).replaceAll("$1 ");
554             result = CLEANER2.matcher(result).replaceAll(" $1");
555             return result.equals(model2) ? model2 : result; // for debugging
556         }
557 
containsAttribute(String string)558         public boolean containsAttribute(String string) {
559             for (Attribute a : attributes.keySet()) {
560                 if (a.name.equals(string)) {
561                     return true;
562                 }
563             }
564             return false;
565         }
566 
567         @Override
toString()568         public String toString() {
569             return name;
570         }
571 
toDtdString()572         public String toDtdString() {
573             return "<!ELEMENT " + name + " " + getRawModel() + " >";
574         }
575 
getType()576         public ElementType getType() {
577             return type;
578         }
579 
getChildren()580         public Map<Element, Integer> getChildren() {
581             return Collections.unmodifiableMap(children);
582         }
583 
getAttributes()584         public Map<Attribute, Integer> getAttributes() {
585             return Collections.unmodifiableMap(attributes);
586         }
587 
588         @Override
getName()589         public String getName() {
590             return name;
591         }
592 
getChildNamed(String string)593         public Element getChildNamed(String string) {
594             for (Element e : children.keySet()) {
595                 if (e.name.equals(string)) {
596                     return e;
597                 }
598             }
599             return null;
600         }
601 
getAttributeNamed(String string)602         public Attribute getAttributeNamed(String string) {
603             for (Attribute a : attributes.keySet()) {
604                 if (a.name.equals(string)) {
605                     return a;
606                 }
607             }
608             return null;
609         }
610 
addComment(String addition)611         public void addComment(String addition) {
612             if (addition.startsWith("@")) {
613                 // there are exactly 4 cases: deprecated, ordered, techPreview and metadata
614                 switch (addition) {
615                     case "@ORDERED":
616                         isOrderedElement = true;
617                         break;
618                     case "@DEPRECATED":
619                         isDeprecatedElement = true;
620                         break;
621                     case "@METADATA":
622                         elementStatus = ElementStatus.metadata;
623                         break;
624                     case "@TECHPREVIEW":
625                         isTechPreviewElement = true;
626                         break;
627                     default:
628                         if (addition.startsWith("@MATCH") || addition.startsWith("@VALUE")) {
629                             // Try to catch this case
630                             throw new IllegalArgumentException(
631                                     name
632                                             + ": Unrecognized ELEMENT annotation (this isn't ATTLIST!): "
633                                             + addition);
634                         } else {
635                             throw new IllegalArgumentException(
636                                     name + ": Unrecognized ELEMENT annotation: " + addition);
637                         }
638                 }
639                 return;
640             }
641             commentsPost = addUnmodifiable(commentsPost, addition.trim());
642         }
643 
644         /** Special version of equals. Only the name is considered in the identity. */
645         @Override
equals(Object obj)646         public boolean equals(Object obj) {
647             if (!(obj instanceof Element)) {
648                 return false;
649             }
650             Element that = (Element) obj;
651             return name.equals(that.name)
652             // not relevant to the identity of the object
653             //                && Objects.equals(comment, that.comment)
654             //                && type == that.type
655             //                && attributes.equals(that.attributes)
656             //                && children.equals(that.children)
657             ;
658         }
659 
660         /** Special version of hashcode. Only the name is considered in the identity. */
661         @Override
hashCode()662         public int hashCode() {
663             return name.hashCode()
664             // not relevant to the identity of the object
665             // * 37 + Objects.hashCode(comment)
666             // ) * 37 + Objects.hashCode(type)
667             //                ) * 37 + attributes.hashCode()
668             //                ) * 37 + children.hashCode()
669             ;
670         }
671 
isDeprecated()672         public boolean isDeprecated() {
673             return isDeprecatedElement;
674         }
675 
isOrdered()676         public boolean isOrdered() {
677             return isOrderedElement;
678         }
679 
isTechPreview()680         public boolean isTechPreview() {
681             return isTechPreviewElement;
682         }
683 
getElementStatus()684         public ElementStatus getElementStatus() {
685             return elementStatus;
686         }
687 
getValueConstraint()688         public ValueConstraint getValueConstraint() {
689             return valueConstraint;
690         }
691 
692         /**
693          * @return the rawModel
694          */
getRawModel()695         public String getRawModel() {
696             return rawModel;
697         }
698     }
699 
elementFrom(String name)700     private Element elementFrom(String name) {
701         Element result = nameToElement.get(name);
702         if (result == null) {
703             nameToElement.put(name, result = new Element(name));
704         }
705         return result;
706     }
707 
addElement(String name2, String model)708     private void addElement(String name2, String model) {
709         Element element = elementFrom(name2);
710         element.setChildren(this, model, preCommentCache);
711         preCommentCache = null;
712         lastElement = element;
713         lastAttribute = null;
714     }
715 
addComment(String comment)716     private void addComment(String comment) {
717         comment = comment.trim();
718         if (preCommentCache != null || comment.startsWith("#")) { // the precomments are "sticky"
719             if (comment.startsWith("@")) {
720                 throw new IllegalArgumentException(
721                         "@ annotation comment must follow element or attribute, without intervening # comment");
722             }
723             preCommentCache = addUnmodifiable(preCommentCache, comment);
724         } else if (lastElement != null) {
725             lastElement.addComment(comment);
726         } else if (lastAttribute != null) {
727             lastAttribute.addComment(comment);
728         } else {
729             if (comment.startsWith("@")) {
730                 throw new IllegalArgumentException(
731                         "@ annotation comment must follow element or attribute, without intervening # comment");
732             }
733             preCommentCache = addUnmodifiable(preCommentCache, comment);
734         }
735     }
736 
737     // TODO hide this
738     /**
739      * @deprecated
740      */
741     @Deprecated
742     @Override
handleElementDecl(String name, String model)743     public void handleElementDecl(String name, String model) {
744         if (SHOW_ALL) {
745             // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?,
746             // contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?,
747             // listPatterns?, collations?, posix?, segmentations?, rbnf?, annotations?, metadata?,
748             // references?, special*))) >
749             System.out.println(System.lineSeparator() + "<!ELEMENT " + name + " " + model + " >");
750         }
751         addElement(name, model);
752     }
753 
754     // TODO hide this
755     /**
756      * @deprecated
757      */
758     @Deprecated
759     @Override
handleStartDtd(String name, String publicId, String systemId)760     public void handleStartDtd(String name, String publicId, String systemId) {
761         DtdType explicitDtdType = DtdType.valueOf(name);
762         if (explicitDtdType != dtdType && explicitDtdType != dtdType.rootType) {
763             throw new IllegalArgumentException("Mismatch in dtdTypes");
764         }
765     }
766 
767     /**
768      * @deprecated
769      */
770     @Deprecated
771     @Override
handleAttributeDecl( String eName, String aName, String type, String mode, String value)772     public void handleAttributeDecl(
773             String eName, String aName, String type, String mode, String value) {
774         if (SHOW_ALL) {
775             // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true |
776             // false ) #IMPLIED >
777             // <!ATTLIST version number CDATA #REQUIRED >
778             // <!ATTLIST version cldrVersion CDATA #FIXED "27" >
779 
780             System.out.println(
781                     "<!ATTLIST "
782                             + eName
783                             + " "
784                             + aName
785                             + " "
786                             + type
787                             + " "
788                             + mode
789                             + (value == null ? "" : " \"" + value + "\"")
790                             + " >");
791         }
792         // HACK for 1.1.1
793         if (eName.equals("draft")) {
794             eName = "week";
795         }
796         addAttribute(eName, aName, type, mode, value);
797     }
798 
799     /**
800      * @deprecated
801      */
802     @Deprecated
803     @Override
handleComment(String path, String comment)804     public void handleComment(String path, String comment) {
805         if (comment.contains("Copyright")) {
806             // Zap the copyright comment, replace it with the current one.
807             comment = CldrUtility.getCopyrightString();
808         }
809         if (SHOW_ALL) {
810             // <!-- true and false are deprecated. -->
811             System.out.println("<!-- " + comment.trim() + " -->");
812         }
813         addComment(comment);
814     }
815 
816     // TODO hide this
817     /**
818      * @deprecated
819      */
820     @Deprecated
821     @Override
handleEndDtd()822     public void handleEndDtd() {
823         throw new XMLFileReader.AbortException();
824     }
825 
826     /**
827      * Note that it always gets the trunk version
828      *
829      * @deprecated depends on static config, use {@link DtdData#getInstance(DtdType, File)} instead
830      */
831     @Deprecated
getInstance(DtdType type)832     public static DtdData getInstance(DtdType type) {
833         return getInstance(type, CLDRConfig.getInstance().getCldrBaseDirectory());
834     }
835 
836     /** Special form using version, used only by tests, etc. */
getInstance(DtdType type, String version)837     public static DtdData getInstance(DtdType type, String version) {
838         // Map out versions that had no DTD
839         if (version != null) {
840             switch (version) {
841                 case "1.1.1":
842                     version = "1.1";
843                     break;
844                 case "1.4.1":
845                     version = "1.4";
846                     break;
847                 case "1.5.1":
848                     version = "1.5.0.1";
849                     break;
850                 default:
851             }
852         }
853         File directory =
854                 version == null
855                         ? CLDRConfig.getInstance().getCldrBaseDirectory()
856                         : new File(CLDRPaths.ARCHIVE_DIRECTORY + "/cldr-" + version);
857 
858         return getInstance(type, version, directory);
859     }
860 
861     private static final ConcurrentMap<Pair<DtdType, File>, DtdData> CACHE =
862             new ConcurrentHashMap<>();
863 
864     /**
865      * Normal version of DtdData Get a DtdData, given the CLDR root directory.
866      *
867      * @param type which DtdType to return
868      * @param directory the CLDR Root directory, which contains the "common" directory.
869      * @return
870      */
getInstance(DtdType type, File directory)871     public static DtdData getInstance(DtdType type, File directory) {
872         Pair<DtdType, File> key = new Pair<>(type, directory);
873         DtdData data = CACHE.computeIfAbsent(key, k -> getInstance(type, null, directory));
874         return data;
875     }
876 
getInstance(DtdType type, String version, File directory)877     private static DtdData getInstance(DtdType type, String version, File directory) {
878         DtdData simpleHandler = new DtdData(type, version);
879         XMLFileReader xfr = new XMLFileReader().setHandler(simpleHandler);
880         if (type != type.rootType) {
881             // read the real first, then add onto it.
882             readFile(type.rootType, xfr, directory);
883         }
884         readFile(type, xfr, directory);
885         // HACK
886         if (type == DtdType.ldmlICU) {
887             Element special = simpleHandler.nameToElement.get("special");
888             for (String extraElementName :
889                     Arrays.asList(
890                             "icu:breakIteratorData",
891                             "icu:UCARules",
892                             "icu:scripts",
893                             "icu:transforms",
894                             "icu:ruleBasedNumberFormats",
895                             "icu:isLeapMonth",
896                             "icu:version",
897                             "icu:breakDictionaryData",
898                             "icu:depends")) {
899                 Element extraElement = simpleHandler.nameToElement.get(extraElementName);
900                 special.children.put(extraElement, special.children.size());
901             }
902         }
903         if (simpleHandler.ROOT.children.size() == 0) {
904             throw new IllegalArgumentException(
905                     "Internal Error: DtdData.getInstance("
906                             + type
907                             + ", ...): readFile() failed to return any children!");
908             // should never happen
909         }
910         simpleHandler.finish();
911         simpleHandler.freeze();
912         return simpleHandler;
913     }
914 
finish()915     private void finish() {
916         dtdComparator = new DtdComparator();
917     }
918 
readFile(DtdType type, XMLFileReader xfr, File directory)919     public static void readFile(DtdType type, XMLFileReader xfr, File directory) {
920         File file = new File(directory, type.dtdPath);
921         StringReader s =
922                 new StringReader(
923                         "<?xml version='1.0' encoding='UTF-8' ?>"
924                                 + "<!DOCTYPE "
925                                 + type
926                                 + " SYSTEM '"
927                                 + file.getAbsolutePath()
928                                 + "'>");
929         try {
930             xfr.read(type.toString(), s, -1, true); //  DTD_TYPE_TO_FILE.get(type)
931         } catch (IllegalArgumentException iae) {
932             // rethrow
933             throw new IllegalArgumentException("Error while reading " + type, iae);
934         }
935     }
936 
freeze()937     private void freeze() {
938         if (version == null) { // only generate for new versions
939             MergeLists<String> elementMergeList = new MergeLists<>();
940             elementMergeList.add(dtdType.toString());
941             MergeLists<String> attributeMergeList = new MergeLists<>();
942             attributeMergeList.add("_q");
943 
944             for (Element element : nameToElement.values()) {
945                 if (element.children.size() > 0) {
946                     Collection<String> names = getNames(element.children.keySet());
947                     elementMergeList.add(names);
948                     if (DEBUG) {
949                         System.out.println(element.getName() + "\t→\t" + names);
950                     }
951                 }
952                 if (element.attributes.size() > 0) {
953                     Collection<String> names = getNames(element.attributes.keySet());
954                     attributeMergeList.add(names);
955                     if (DEBUG) {
956                         System.out.println(element.getName() + "\t→\t@" + names);
957                     }
958                 }
959             }
960             List<String> elementList = elementMergeList.merge();
961             List<String> attributeList = attributeMergeList.merge();
962             if (DEBUG) {
963                 System.out.println("Element Ordering:\t" + elementList);
964                 System.out.println("Attribute Ordering:\t" + attributeList);
965             }
966             elementComparator = new MapComparator<>(elementList).setErrorOnMissing(true).freeze();
967             attributeComparator =
968                     new MapComparator<>(attributeList).setErrorOnMissing(true).freeze();
969         }
970         nameToAttributes.freeze();
971         nameToElement = Collections.unmodifiableMap(nameToElement);
972     }
973 
getNames(Collection<? extends Named> keySet)974     private Collection<String> getNames(Collection<? extends Named> keySet) {
975         List<String> result = new ArrayList<>();
976         for (Named e : keySet) {
977             result.add(e.getName());
978         }
979         return result;
980     }
981 
982     public enum DtdItem {
983         ELEMENT,
984         ATTRIBUTE,
985         ATTRIBUTE_VALUE
986     }
987 
988     public interface AttributeValueComparator {
compare(String element, String attribute, String value1, String value2)989         public int compare(String element, String attribute, String value1, String value2);
990     }
991 
getDtdComparator(AttributeValueComparator avc)992     public Comparator<String> getDtdComparator(AttributeValueComparator avc) {
993         return dtdComparator;
994     }
995 
getDtdComparator()996     public DtdComparator getDtdComparator() {
997         return dtdComparator;
998     }
999 
1000     public class DtdComparator implements Comparator<String> {
1001         @Override
compare(String path1, String path2)1002         public int compare(String path1, String path2) {
1003             XPathParts a = XPathParts.getFrozenInstance(path1);
1004             XPathParts b = XPathParts.getFrozenInstance(path2);
1005             return xpathComparator(a, b);
1006         }
1007 
xpathComparator(XPathParts a, XPathParts b)1008         public int xpathComparator(XPathParts a, XPathParts b) {
1009             // there must always be at least one element
1010             String baseA = a.getElement(0);
1011             String baseB = b.getElement(0);
1012             if (!ROOT.name.equals(baseA) || !ROOT.name.equals(baseB)) {
1013                 throw new IllegalArgumentException(
1014                         "Comparing different DTDs: " + ROOT.name + ", " + baseA + ", " + baseB);
1015             }
1016             int min = Math.min(a.size(), b.size());
1017             Element parent = ROOT;
1018             Element elementA;
1019             for (int i = 1; i < min; ++i, parent = elementA) {
1020                 // add extra test for "fake" elements, used in diffing. they always start with _
1021                 String elementRawA = a.getElement(i);
1022                 String elementRawB = b.getElement(i);
1023                 if (elementRawA.startsWith("_")) {
1024                     return elementRawB.startsWith("_") ? elementRawA.compareTo(elementRawB) : -1;
1025                 } else if (elementRawB.startsWith("_")) {
1026                     return 1;
1027                 }
1028                 //
1029                 elementA = nameToElement.get(elementRawA);
1030                 Element elementB = nameToElement.get(elementRawB);
1031                 if (elementA != elementB) {
1032                     int aa = parent.children.get(elementA);
1033                     int bb = parent.children.get(elementB);
1034                     return aa - bb;
1035                 }
1036                 int countA = a.getAttributeCount(i);
1037                 int countB = b.getAttributeCount(i);
1038                 if (countA == 0 && countB == 0) {
1039                     continue;
1040                 }
1041                 // we have two ways to compare the attributes. One based on the dtd,
1042                 // and one based on explicit comparators
1043 
1044                 // at this point the elements are the same and correspond to elementA
1045                 // in the dtd
1046 
1047                 // Handle the special added elements
1048                 String aqValue = a.getAttributeValue(i, "_q");
1049                 if (aqValue != null) {
1050                     String bqValue = b.getAttributeValue(i, "_q");
1051                     if (!aqValue.equals(bqValue)) {
1052                         int aValue = Integer.parseInt(aqValue);
1053                         int bValue = Integer.parseInt(bqValue);
1054                         return aValue - bValue;
1055                     }
1056                     --countA;
1057                     --countB;
1058                 }
1059 
1060                 attributes:
1061                 for (Entry<Attribute, Integer> attr : elementA.attributes.entrySet()) {
1062                     Attribute main = attr.getKey();
1063                     String valueA = a.getAttributeValue(i, main.name);
1064                     String valueB = b.getAttributeValue(i, main.name);
1065                     if (valueA == null) {
1066                         if (valueB != null) {
1067                             return -1;
1068                         }
1069                     } else if (valueB == null) {
1070                         return 1;
1071                     } else if (valueA.equals(valueB)) {
1072                         --countA;
1073                         --countB;
1074                         if (countA == 0 && countB == 0) {
1075                             break attributes;
1076                         }
1077                         continue; // TODO
1078                     } else if (main.attributeValueComparator != null) {
1079                         return main.attributeValueComparator.compare(valueA, valueB);
1080                     } else if (main.values.size() != 0) {
1081                         int aa = main.values.get(valueA);
1082                         int bb = main.values.get(valueB);
1083                         return aa - bb;
1084                     } else {
1085                         return valueA.compareTo(valueB);
1086                     }
1087                 }
1088                 if (countA != 0 || countB != 0) {
1089                     throw new IllegalArgumentException();
1090                 }
1091             }
1092             return a.size() - b.size();
1093         }
1094     }
1095 
getAttributeComparator()1096     public MapComparator<String> getAttributeComparator() {
1097         return attributeComparator;
1098     }
1099 
getElementComparator()1100     public MapComparator<String> getElementComparator() {
1101         return elementComparator;
1102     }
1103 
getAttributesFromName()1104     public Relation<String, Attribute> getAttributesFromName() {
1105         return nameToAttributes;
1106     }
1107 
getElementFromName()1108     public Map<String, Element> getElementFromName() {
1109         return nameToElement;
1110     }
1111 
1112     @Override
toString()1113     public String toString() {
1114         StringBuilder b = new StringBuilder();
1115         // <!ELEMENT ldml (identity, (alias | (fallback*, localeDisplayNames?, layout?,
1116         // contextTransforms?, characters?, delimiters?, measurement?, dates?, numbers?, units?,
1117         // listPatterns?, collations?, posix?, segmentations?, rbnf?, metadata?, references?,
1118         // special*))) >
1119         // <!ATTLIST ldml draft ( approved | contributed | provisional | unconfirmed | true | false
1120         // ) #IMPLIED > <!-- true and false are deprecated. -->
1121         Seen seen = new Seen(dtdType);
1122         seen.seenElements.add(ANY);
1123         seen.seenElements.add(PCDATA);
1124         toString(ROOT, b, seen);
1125 
1126         // Hack for ldmlIcu: catch the items that are not mentioned in the original
1127         int currentEnd = b.length();
1128         for (Element e : nameToElement.values()) {
1129             toString(e, b, seen);
1130         }
1131         if (currentEnd != b.length()) {
1132             b.insert(
1133                     currentEnd,
1134                     System.lineSeparator()
1135                             + System.lineSeparator()
1136                             + "<!-- Elements not reachable from root! -->"
1137                             + System.lineSeparator());
1138         }
1139         return b.toString();
1140     }
1141 
1142     static final class Seen {
1143         Set<Element> seenElements = new HashSet<>();
1144         Set<Attribute> seenAttributes = new HashSet<>();
1145 
Seen(DtdType dtdType)1146         public Seen(DtdType dtdType) {
1147             if (dtdType.rootType == dtdType) {
1148                 return;
1149             }
1150             DtdData otherData = DtdData.getInstance(dtdType.rootType);
1151             walk(otherData, otherData.ROOT);
1152             seenElements.remove(otherData.nameToElement.get("special"));
1153         }
1154 
walk(DtdData otherData, Element current)1155         private void walk(DtdData otherData, Element current) {
1156             seenElements.add(current);
1157             seenAttributes.addAll(current.attributes.keySet());
1158             for (Element e : current.children.keySet()) {
1159                 walk(otherData, e);
1160             }
1161         }
1162     }
1163 
getDescendents(Element start, Set<Element> toAddTo)1164     public Set<Element> getDescendents(Element start, Set<Element> toAddTo) {
1165         if (!toAddTo.contains(start)) {
1166             toAddTo.add(start);
1167             for (Element e : start.children.keySet()) {
1168                 getDescendents(e, toAddTo);
1169             }
1170         }
1171         return toAddTo;
1172     }
1173 
toString(Element current, StringBuilder b, Seen seen)1174     private void toString(Element current, StringBuilder b, Seen seen) {
1175         boolean first = true;
1176         if (seen.seenElements.contains(current)) {
1177             return;
1178         }
1179         seen.seenElements.add(current);
1180         boolean elementDeprecated = isDeprecated(current.name, "*", "*");
1181 
1182         showComments(b, current.commentsPre, true);
1183         b.append("\n\n<!ELEMENT " + current.name + " " + current.model + " >");
1184         if (USE_SYNTHESIZED) {
1185             Element aliasElement = getElementFromName().get("alias");
1186             // b.append(current.rawChildren);
1187             if (!current.children.isEmpty()) {
1188                 LinkedHashSet<Element> elements = new LinkedHashSet<>(current.children.keySet());
1189                 boolean hasAlias = aliasElement != null && elements.remove(aliasElement);
1190                 // boolean hasSpecial = specialElement != null && elements.remove(specialElement);
1191                 if (hasAlias) {
1192                     b.append("(alias |");
1193                 }
1194                 b.append("(");
1195                 // <!ELEMENT transformNames ( alias | (transformName | special)* ) >
1196                 // <!ELEMENT layout ( alias | (orientation*, inList*, inText*, special*) ) >
1197 
1198                 for (Element e : elements) {
1199                     if (first) {
1200                         first = false;
1201                     } else {
1202                         b.append(", ");
1203                     }
1204                     b.append(e.name);
1205                     if (e.type != ElementType.PCDATA) {
1206                         b.append("*");
1207                     }
1208                 }
1209                 if (hasAlias) {
1210                     b.append(")");
1211                 }
1212                 b.append(")");
1213             } else {
1214                 b.append(current.type == null ? "???" : current.type.source);
1215             }
1216             b.append(">");
1217         }
1218         showComments(b, current.commentsPost, false);
1219         if (isOrdered(current.name)) {
1220             b.append(COMMENT_PREFIX + "<!--@ORDERED-->");
1221         }
1222         if (isTechPreview(current.name)) {
1223             b.append(COMMENT_PREFIX + "<!--@TECHPREVIEW-->");
1224         }
1225         if (current.getElementStatus() != ElementStatus.regular) {
1226             b.append(
1227                     COMMENT_PREFIX
1228                             + "<!--@"
1229                             + current.getElementStatus().toString().toUpperCase(Locale.ROOT)
1230                             + "-->");
1231         }
1232         if (elementDeprecated) {
1233             b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->");
1234         }
1235 
1236         LinkedHashSet<String> deprecatedValues = new LinkedHashSet<>();
1237 
1238         for (Attribute a : current.attributes.keySet()) {
1239             if (seen.seenAttributes.contains(a)) {
1240                 continue;
1241             }
1242             seen.seenAttributes.add(a);
1243             boolean attributeDeprecated =
1244                     elementDeprecated || isDeprecated(current.name, a.name, "*");
1245             boolean attributeUEscaped = allowsUEscape(current.name, a.name, "*");
1246             deprecatedValues.clear();
1247 
1248             showComments(b, a.commentsPre, true);
1249             b.append("\n<!ATTLIST " + current.name + " " + a.name);
1250             if (a.type == AttributeType.ENUMERATED_TYPE) {
1251                 b.append(" (");
1252                 first = true;
1253                 for (String s : a.values.keySet()) {
1254                     if (first) {
1255                         first = false;
1256                     } else {
1257                         b.append(" | ");
1258                     }
1259                     b.append(s);
1260                     if (!attributeDeprecated && isDeprecated(current.name, a.name, s)) {
1261                         deprecatedValues.add(s);
1262                     }
1263                 }
1264                 b.append(")");
1265             } else {
1266                 b.append(' ').append(a.type);
1267             }
1268             if (a.mode != Mode.NULL) {
1269                 b.append(" ").append(a.mode.source);
1270             }
1271             if (a.defaultValue != null) {
1272                 b.append(" \"").append(a.defaultValue).append('"');
1273             }
1274             b.append(" >");
1275             showComments(b, a.commentsPost, false);
1276             //            if (attributeDeprecated != deprecatedComment) {
1277             //                System.out.println("*** BAD DEPRECATION ***" + a);
1278             //            }
1279             if (a.matchValue != null) {
1280                 b.append(COMMENT_PREFIX + "<!--@MATCH:" + a.matchValue.getName() + "-->");
1281             }
1282             if (METADATA.contains(a.name) || a.attributeStatus == AttributeStatus.metadata) {
1283                 b.append(COMMENT_PREFIX + "<!--@METADATA-->");
1284             } else if (!isDistinguishing(current.name, a.name)) {
1285                 b.append(COMMENT_PREFIX + "<!--@VALUE-->");
1286             }
1287             if (attributeDeprecated) {
1288                 b.append(COMMENT_PREFIX + "<!--@DEPRECATED-->");
1289             } else if (!deprecatedValues.isEmpty()) {
1290                 b.append(
1291                         COMMENT_PREFIX
1292                                 + "<!--@DEPRECATED:"
1293                                 + Joiner.on(", ").join(deprecatedValues)
1294                                 + "-->");
1295             }
1296             if (attributeUEscaped) {
1297                 b.append(COMMENT_PREFIX + "<!--@ALLOWS_UESC-->");
1298             }
1299         }
1300         if (current.children.size() > 0) {
1301             for (Element e : current.children.keySet()) {
1302                 toString(e, b, seen);
1303             }
1304         }
1305     }
1306 
showComments(StringBuilder b, Set<String> comments, boolean separate)1307     private void showComments(StringBuilder b, Set<String> comments, boolean separate) {
1308         if (comments == null) {
1309             return;
1310         }
1311         if (separate && b.length() != 0) {
1312             b.append(System.lineSeparator());
1313         }
1314         for (String c : comments) {
1315             boolean deprecatedComment = false; // the following served its purpose...
1316             // c.toLowerCase(Locale.ENGLISH).contains("deprecat");
1317             if (!deprecatedComment) {
1318                 if (separate) {
1319                     // special handling for very first comment
1320                     if (b.length() == 0) {
1321                         b.append("<!--")
1322                                 .append(System.lineSeparator())
1323                                 .append(c)
1324                                 .append(System.lineSeparator())
1325                                 .append("-->");
1326                         continue;
1327                     }
1328                     b.append(System.lineSeparator());
1329                 } else {
1330                     b.append(COMMENT_PREFIX);
1331                 }
1332                 b.append("<!-- ").append(c).append(" -->");
1333             }
1334         }
1335     }
1336 
removeFirst(Collection<T> elements, Transform<T, Boolean> matcher)1337     public static <T> T removeFirst(Collection<T> elements, Transform<T, Boolean> matcher) {
1338         for (Iterator<T> it = elements.iterator(); it.hasNext(); ) {
1339             T item = it.next();
1340             if (matcher.transform(item) == Boolean.TRUE) {
1341                 it.remove();
1342                 return item;
1343             }
1344         }
1345         return null;
1346     }
1347 
getElements()1348     public Set<Element> getElements() {
1349         return new LinkedHashSet<>(nameToElement.values());
1350     }
1351 
getAttributes()1352     public Set<Attribute> getAttributes() {
1353         return new LinkedHashSet<>(nameToAttributes.values());
1354     }
1355 
isDistinguishing(String elementName, String attribute)1356     public boolean isDistinguishing(String elementName, String attribute) {
1357         return getAttributeStatus(elementName, attribute) == AttributeStatus.distinguished;
1358     }
1359 
1360     static final Set<String> METADATA =
1361             new HashSet<>(Arrays.asList("references", "standard", "draft"));
1362 
addUnmodifiable(Set<String> comment, String addition)1363     static final Set<String> addUnmodifiable(Set<String> comment, String addition) {
1364         if (comment == null) {
1365             return Collections.singleton(addition);
1366         } else {
1367             comment = new LinkedHashSet<>(comment);
1368             comment.add(addition);
1369             return Collections.unmodifiableSet(comment);
1370         }
1371     }
1372 
1373     public class IllegalByDtdException extends RuntimeException {
1374         private static final long serialVersionUID = 1L;
1375         public final String elementName;
1376         public final String attributeName;
1377         public final String attributeValue;
1378 
IllegalByDtdException( String elementName, String attributeName, String attributeValue)1379         public IllegalByDtdException(
1380                 String elementName, String attributeName, String attributeValue) {
1381             this.elementName = elementName;
1382             this.attributeName = attributeName;
1383             this.attributeValue = attributeValue;
1384         }
1385 
1386         @Override
getMessage()1387         public String getMessage() {
1388             return "Dtd "
1389                     + dtdType
1390                     + " doesn’t allow "
1391                     + "element="
1392                     + elementName
1393                     + (attributeName == null ? "" : ", attribute: " + attributeName)
1394                     + (attributeValue == null ? "" : ", attributeValue: " + attributeValue);
1395         }
1396     }
1397 
1398     // @SuppressWarnings("unused")
isDeprecated(String elementName, String attributeName, String attributeValue)1399     public boolean isDeprecated(String elementName, String attributeName, String attributeValue) {
1400         Element element = getElementThrowingIfNull(elementName, null, null);
1401         if (element.isDeprecatedElement) {
1402             return true;
1403         }
1404         if ("*".equals(attributeName) || "_q".equals(attributeName)) {
1405             return false;
1406         }
1407         Attribute attribute = element.getAttributeNamed(attributeName);
1408         if (attribute == null) {
1409             throw new IllegalByDtdException(elementName, attributeName, attributeValue);
1410         } else if (attribute.isDeprecatedAttribute) {
1411             return true;
1412         }
1413         return attribute.deprecatedValues.contains(
1414                 attributeValue); // don't need special test for "*"
1415     }
1416 
allowsUEscape(String elementName, String attributeName, String attributeValue)1417     public boolean allowsUEscape(String elementName, String attributeName, String attributeValue) {
1418         Element element = getElementThrowingIfNull(elementName, null, null);
1419         Attribute attribute = element.getAttributeNamed(attributeName);
1420         if (attribute == null) {
1421             throw new IllegalByDtdException(elementName, attributeName, attributeValue);
1422         } else if (attribute.allowsUEscape()) {
1423             return true;
1424         }
1425         return false;
1426     }
1427 
1428     /**
1429      * Returns whether an element (specified by its full name) is ordered. This method understands
1430      * all elements in the DTDs used (including the ICU extensions), but will throw
1431      * IllegalByDtdException for unknown elements. See CLDR-8614 for more background.
1432      */
isOrdered(String elementName)1433     public boolean isOrdered(String elementName) {
1434         Element element = getElementThrowingIfNull(elementName, null, null);
1435         return element.isOrdered();
1436     }
1437 
getElementThrowingIfNull( String elementName, String attributeName, String value)1438     public Element getElementThrowingIfNull(
1439             String elementName, String attributeName, String value) {
1440         Element element = nameToElement.get(elementName);
1441         if (element == null) {
1442             throw new IllegalByDtdException(elementName, attributeName, value);
1443         }
1444         return element;
1445     }
1446 
1447     /**
1448      * Returns whether an element (specified by its full name) is a tech preview. This method
1449      * understands all elements in the DTDs used (including the ICU extensions), but will throw
1450      * IllegalByDtdException for unknown elements. See CLDR-8614 for more background.
1451      */
isTechPreview(String elementName)1452     public boolean isTechPreview(String elementName) {
1453         Element element = getElementThrowingIfNull(elementName, null, null);
1454         return element.isTechPreview();
1455     }
1456 
getAttributeStatus(String elementName, String attributeName)1457     public AttributeStatus getAttributeStatus(String elementName, String attributeName) {
1458         if ("_q".equals(attributeName)) {
1459             return AttributeStatus.distinguished; // special case
1460         }
1461         Element element = nameToElement.get(elementName);
1462         if (element == null) {
1463             if (elementName.startsWith("icu:")) {
1464                 return AttributeStatus.distinguished;
1465             }
1466             throw new IllegalByDtdException(elementName, attributeName, null);
1467         }
1468         Attribute attribute = element.getAttributeNamed(attributeName);
1469         if (attribute == null) {
1470             if (elementName.startsWith("icu:")) {
1471                 return AttributeStatus.distinguished;
1472             }
1473             throw new IllegalByDtdException(elementName, attributeName, null);
1474         }
1475         return attribute.attributeStatus;
1476     }
1477 
1478     // The default is a map comparator, which compares numbers as numbers, and strings with UCA
1479     private static MapComparator<String> valueOrdering =
1480             new MapComparator<String>().setErrorOnMissing(false).freeze();
1481 
1482     static MapComparator<String> dayValueOrder =
1483             new MapComparator<String>()
1484                     .add("sun", "mon", "tue", "wed", "thu", "fri", "sat")
1485                     .freeze();
1486     static MapComparator<String> dayPeriodOrder =
1487             new MapComparator<String>()
1488                     .add(
1489                             "midnight",
1490                             "am",
1491                             "noon",
1492                             "pm",
1493                             "morning1",
1494                             "morning2",
1495                             "afternoon1",
1496                             "afternoon2",
1497                             "evening1",
1498                             "evening2",
1499                             "night1",
1500                             "night2",
1501                             // The ones on the following line are no longer used actively. Can be
1502                             // removed later?
1503                             "earlyMorning",
1504                             "morning",
1505                             "midDay",
1506                             "afternoon",
1507                             "evening",
1508                             "night",
1509                             "weeHours")
1510                     .freeze();
1511     static MapComparator<String> dateTimeFormatOrder =
1512             new MapComparator<String>().add("standard", "atTime").freeze();
1513     static MapComparator<String> listPatternOrder =
1514             new MapComparator<String>().add("start", "middle", "end", "2", "3").freeze();
1515     static MapComparator<String> widthOrder =
1516             new MapComparator<String>()
1517                     .add("abbreviated", "narrow", "short", "wide", "all")
1518                     .freeze();
1519     static MapComparator<String> lengthOrder =
1520             new MapComparator<String>().add("full", "long", "medium", "short").freeze();
1521     static MapComparator<String> dateFieldOrder =
1522             new MapComparator<String>()
1523                     .add(
1524                             "era",
1525                             "era-short",
1526                             "era-narrow",
1527                             "year",
1528                             "year-short",
1529                             "year-narrow",
1530                             "quarter",
1531                             "quarter-short",
1532                             "quarter-narrow",
1533                             "month",
1534                             "month-short",
1535                             "month-narrow",
1536                             "week",
1537                             "week-short",
1538                             "week-narrow",
1539                             "weekOfMonth",
1540                             "weekOfMonth-short",
1541                             "weekOfMonth-narrow",
1542                             "day",
1543                             "day-short",
1544                             "day-narrow",
1545                             "dayOfYear",
1546                             "dayOfYear-short",
1547                             "dayOfYear-narrow",
1548                             "weekday",
1549                             "weekday-short",
1550                             "weekday-narrow",
1551                             "weekdayOfMonth",
1552                             "weekdayOfMonth-short",
1553                             "weekdayOfMonth-narrow",
1554                             "sun",
1555                             "sun-short",
1556                             "sun-narrow",
1557                             "mon",
1558                             "mon-short",
1559                             "mon-narrow",
1560                             "tue",
1561                             "tue-short",
1562                             "tue-narrow",
1563                             "wed",
1564                             "wed-short",
1565                             "wed-narrow",
1566                             "thu",
1567                             "thu-short",
1568                             "thu-narrow",
1569                             "fri",
1570                             "fri-short",
1571                             "fri-narrow",
1572                             "sat",
1573                             "sat-short",
1574                             "sat-narrow",
1575                             "dayperiod-short",
1576                             "dayperiod",
1577                             "dayperiod-narrow",
1578                             "hour",
1579                             "hour-short",
1580                             "hour-narrow",
1581                             "minute",
1582                             "minute-short",
1583                             "minute-narrow",
1584                             "second",
1585                             "second-short",
1586                             "second-narrow",
1587                             "zone",
1588                             "zone-short",
1589                             "zone-narrow")
1590                     .freeze();
1591     static MapComparator<String> nameFieldOrder =
1592             new MapComparator<String>().add(PersonNameFormatter.ModifiedField.ALL_SAMPLES).freeze();
1593     static MapComparator<String> orderValueOrder =
1594             new MapComparator<String>()
1595                     .add(PersonNameFormatter.Order.ALL, Object::toString)
1596                     .freeze();
1597     static MapComparator<String> lengthValueOrder =
1598             new MapComparator<String>()
1599                     .add(PersonNameFormatter.Length.ALL, Object::toString)
1600                     .freeze();
1601     static MapComparator<String> usageValueOrder =
1602             new MapComparator<String>()
1603                     .add(PersonNameFormatter.Usage.ALL, Object::toString)
1604                     .freeze();
1605     static MapComparator<String> formalityValueOrder =
1606             new MapComparator<String>()
1607                     .add(PersonNameFormatter.Formality.ALL, Object::toString)
1608                     .freeze();
1609     static MapComparator<String> sampleNameItemOrder =
1610             new MapComparator<String>()
1611                     .add(PersonNameFormatter.SampleType.ALL, Object::toString)
1612                     .freeze();
1613 
1614     // TODO We could build most of the above from the dtd data for literal values. That way they
1615     // would always be
1616     // in sync.
1617 
getUnitOrder()1618     public static MapComparator<String> getUnitOrder() {
1619         return UnitOrderHolder.INSTANCE;
1620     }
1621 
1622     private static final class UnitOrderHolder {
1623         private static final MapComparator<String> INSTANCE =
1624                 //                new MapComparator<String>()
1625                 //
1626                 // .add(Validity.getInstance().getCodeToStatus(LstrType.unit).keySet())
1627                 //                        .freeze();
1628                 //    }
1629                 new MapComparator<>(
1630                                 Arrays.asList(
1631                                         "acceleration-g-force",
1632                                         "acceleration-meter-per-square-second",
1633                                         "acceleration-meter-per-second-squared", // deprecated
1634                                         "angle-revolution",
1635                                         "angle-radian",
1636                                         "angle-degree",
1637                                         "angle-arc-minute",
1638                                         "angle-arc-second",
1639                                         "area-square-kilometer",
1640                                         "area-hectare",
1641                                         "area-square-meter",
1642                                         "area-square-centimeter",
1643                                         "area-square-mile",
1644                                         "area-acre",
1645                                         "area-square-yard",
1646                                         "area-square-foot",
1647                                         "area-square-inch",
1648                                         "area-dunam",
1649                                         "concentr-karat",
1650                                         "proportion-karat", // deprecated
1651                                         "concentr-milligram-ofglucose-per-deciliter",
1652                                         "concentr-milligram-per-deciliter",
1653                                         "concentr-millimole-per-liter",
1654                                         "concentr-item",
1655                                         "concentr-portion",
1656                                         "concentr-permillion",
1657                                         "concentr-part-per-million", // deprecated
1658                                         "concentr-percent",
1659                                         "concentr-permille",
1660                                         "concentr-permyriad",
1661                                         "concentr-mole",
1662                                         "concentr-ofglucose",
1663                                         "consumption-liter-per-kilometer",
1664                                         "consumption-liter-per-100-kilometer",
1665                                         "consumption-liter-per-100kilometers", // deprecated
1666                                         "consumption-mile-per-gallon",
1667                                         "consumption-mile-per-gallon-imperial",
1668                                         "digital-petabyte",
1669                                         "digital-terabyte",
1670                                         "digital-terabit",
1671                                         "digital-gigabyte",
1672                                         "digital-gigabit",
1673                                         "digital-megabyte",
1674                                         "digital-megabit",
1675                                         "digital-kilobyte",
1676                                         "digital-kilobit",
1677                                         "digital-byte",
1678                                         "digital-bit",
1679                                         "duration-century",
1680                                         "duration-decade",
1681                                         "duration-year",
1682                                         "duration-year-person",
1683                                         "duration-quarter",
1684                                         "duration-month",
1685                                         "duration-month-person",
1686                                         "duration-week",
1687                                         "duration-week-person",
1688                                         "duration-day",
1689                                         "duration-day-person",
1690                                         "duration-hour",
1691                                         "duration-minute",
1692                                         "duration-second",
1693                                         "duration-millisecond",
1694                                         "duration-microsecond",
1695                                         "duration-nanosecond",
1696                                         "electric-ampere",
1697                                         "electric-milliampere",
1698                                         "electric-ohm",
1699                                         "electric-volt",
1700                                         "energy-kilocalorie",
1701                                         "energy-calorie",
1702                                         "energy-foodcalorie",
1703                                         "energy-kilojoule",
1704                                         "energy-joule",
1705                                         "energy-kilowatt-hour",
1706                                         "energy-electronvolt",
1707                                         "energy-british-thermal-unit",
1708                                         "energy-therm-us",
1709                                         "force-pound-force",
1710                                         "force-newton",
1711                                         "force-kilowatt-hour-per-100-kilometer",
1712                                         "frequency-gigahertz",
1713                                         "frequency-megahertz",
1714                                         "frequency-kilohertz",
1715                                         "frequency-hertz",
1716                                         "graphics-em",
1717                                         "graphics-pixel",
1718                                         "graphics-megapixel",
1719                                         "graphics-pixel-per-centimeter",
1720                                         "graphics-pixel-per-inch",
1721                                         "graphics-dot-per-centimeter",
1722                                         "graphics-dot-per-inch",
1723                                         "graphics-dot",
1724                                         "length-earth-radius",
1725                                         "length-100-kilometer",
1726                                         "length-kilometer",
1727                                         "length-meter",
1728                                         "length-decimeter",
1729                                         "length-centimeter",
1730                                         "length-millimeter",
1731                                         "length-micrometer",
1732                                         "length-nanometer",
1733                                         "length-picometer",
1734                                         "length-mile",
1735                                         "length-yard",
1736                                         "length-foot",
1737                                         "length-inch",
1738                                         "length-parsec",
1739                                         "length-light-year",
1740                                         "length-astronomical-unit",
1741                                         "length-furlong",
1742                                         "length-fathom",
1743                                         "length-nautical-mile",
1744                                         "length-mile-scandinavian",
1745                                         "length-point",
1746                                         "length-solar-radius",
1747                                         "light-lux",
1748                                         "light-candela",
1749                                         "light-lumen",
1750                                         "light-solar-luminosity",
1751                                         "mass-tonne",
1752                                         "mass-metric-ton",
1753                                         "mass-kilogram",
1754                                         "mass-gram",
1755                                         "mass-milligram",
1756                                         "mass-microgram",
1757                                         "mass-ton",
1758                                         "mass-stone",
1759                                         "mass-pound",
1760                                         "mass-ounce",
1761                                         "mass-ounce-troy",
1762                                         "mass-carat",
1763                                         "mass-dalton",
1764                                         "mass-earth-mass",
1765                                         "mass-solar-mass",
1766                                         "mass-grain",
1767                                         "power-gigawatt",
1768                                         "power-megawatt",
1769                                         "power-kilowatt",
1770                                         "power-watt",
1771                                         "power-milliwatt",
1772                                         "power-horsepower",
1773                                         "pressure-millimeter-ofhg",
1774                                         "pressure-millimeter-of-mercury", // deprecated
1775                                         "pressure-ofhg",
1776                                         "pressure-pound-force-per-square-inch",
1777                                         "pressure-pound-per-square-inch", // deprecated
1778                                         "pressure-inch-ofhg",
1779                                         "pressure-inch-hg", // deprecated
1780                                         "pressure-bar",
1781                                         "pressure-millibar",
1782                                         "pressure-atmosphere",
1783                                         "pressure-pascal",
1784                                         "pressure-hectopascal",
1785                                         "pressure-kilopascal",
1786                                         "pressure-megapascal",
1787                                         "speed-kilometer-per-hour",
1788                                         "speed-meter-per-second",
1789                                         "speed-mile-per-hour",
1790                                         "speed-knot",
1791                                         "speed-beaufort",
1792                                         "temperature-generic",
1793                                         "temperature-celsius",
1794                                         "temperature-fahrenheit",
1795                                         "temperature-kelvin",
1796                                         "torque-pound-force-foot",
1797                                         "torque-pound-foot", // deprecated
1798                                         "torque-newton-meter",
1799                                         "volume-cubic-kilometer",
1800                                         "volume-cubic-meter",
1801                                         "volume-cubic-centimeter",
1802                                         "volume-cubic-mile",
1803                                         "volume-cubic-yard",
1804                                         "volume-cubic-foot",
1805                                         "volume-cubic-inch",
1806                                         "volume-megaliter",
1807                                         "volume-hectoliter",
1808                                         "volume-liter",
1809                                         "volume-deciliter",
1810                                         "volume-centiliter",
1811                                         "volume-milliliter",
1812                                         "volume-pint-metric",
1813                                         "volume-cup-metric",
1814                                         "volume-acre-foot",
1815                                         "volume-bushel",
1816                                         "volume-gallon",
1817                                         "volume-gallon-imperial",
1818                                         "volume-quart",
1819                                         "volume-pint",
1820                                         "volume-pint-imperial",
1821                                         "volume-cup",
1822                                         "volume-fluid-ounce",
1823                                         "volume-fluid-ounce-imperial",
1824                                         "volume-tablespoon",
1825                                         "volume-teaspoon",
1826                                         "volume-barrel",
1827                                         "volume-dessert-spoon",
1828                                         "volume-dessert-spoon-imperial",
1829                                         "volume-drop",
1830                                         "volume-dram",
1831                                         "volume-jigger",
1832                                         "volume-pinch",
1833                                         "volume-quart-imperial",
1834                                         "angle-steradian",
1835                                         "concentr-katal",
1836                                         "electric-coulomb",
1837                                         "electric-farad",
1838                                         "electric-henry",
1839                                         "electric-siemens",
1840                                         "energy-calorie-it",
1841                                         "energy-british-thermal-unit-it",
1842                                         "energy-becquerel",
1843                                         "energy-sievert",
1844                                         "energy-gray",
1845                                         "force-kilogram-force",
1846                                         "length-rod",
1847                                         "length-chain",
1848                                         "magnetic-tesla",
1849                                         "magnetic-weber",
1850                                         "temperature-rankine",
1851                                         "duration-fortnight",
1852                                         "mass-slug",
1853                                         "pressure-gasoline-energy-density",
1854                                         "length-rin",
1855                                         "length-sun",
1856                                         "length-shaku-length",
1857                                         "length-shaku-cloth",
1858                                         "length-ken",
1859                                         "length-jo-jp",
1860                                         "length-ri-jp",
1861                                         "area-bu-jp",
1862                                         "area-se-jp",
1863                                         "area-cho",
1864                                         "volume-kosaji",
1865                                         "volume-osaji",
1866                                         "volume-cup-jp",
1867                                         "volume-shaku",
1868                                         "volume-sai",
1869                                         "volume-to-jp",
1870                                         "volume-koku",
1871                                         "mass-fun"))
1872                         .freeze();
1873     }
1874 
1875     static MapComparator<String> countValueOrder =
1876             new MapComparator<String>()
1877                     .add("0", "1", "zero", "one", "two", "few", "many", "other")
1878                     .freeze();
1879     static MapComparator<String> unitLengthOrder =
1880             new MapComparator<String>().add("long", "short", "narrow").freeze();
1881     static MapComparator<String> currencyFormatOrder =
1882             new MapComparator<String>().add("standard", "accounting").freeze();
1883     static Comparator<String> zoneOrder = StandardCodes.make().getTZIDComparator();
1884 
1885     static final Comparator<String> COMP = (Comparator) CLDRConfig.getInstance().getCollator();
1886 
1887     // Hack for US
1888     static final Comparator<String> UNICODE_SET_COMPARATOR =
1889             new Comparator<>() {
1890                 @Override
1891                 public int compare(String o1, String o2) {
1892                     if (o1.contains("{")) {
1893                         o1 = o1.replace("{", "");
1894                     }
1895                     if (o2.contains("{")) {
1896                         o2 = o2.replace("{", "");
1897                     }
1898                     return COMP.compare(o1, o2);
1899                 }
1900             };
1901 
getAttributeValueComparator(String element, String attribute)1902     public static Comparator<String> getAttributeValueComparator(String element, String attribute) {
1903         return getAttributeValueComparator(DtdType.ldml, element, attribute);
1904     }
1905 
getAttributeValueComparator( DtdType type, String element, String attribute)1906     static Comparator<String> getAttributeValueComparator(
1907             DtdType type, String element, String attribute) {
1908         // The default is a map comparator, which compares numbers as numbers, and strings with UCA
1909         Comparator<String> comp = valueOrdering;
1910         if (type != DtdType.ldml && type != DtdType.ldmlICU) {
1911             return comp;
1912         }
1913         if (attribute.equals("day")) { // && (element.startsWith("weekend")
1914             comp = dayValueOrder;
1915         } else if (attribute.equals("type")) {
1916             if (element.endsWith("FormatLength")) {
1917                 comp = lengthOrder;
1918             } else if (element.endsWith("Width")) {
1919                 comp = widthOrder;
1920             } else if (element.equals("day")) {
1921                 comp = dayValueOrder;
1922             } else if (element.equals("field")) {
1923                 comp = dateFieldOrder;
1924             } else if (element.equals("zone")) {
1925                 comp = zoneOrder;
1926             } else if (element.equals("listPatternPart")) {
1927                 comp = listPatternOrder;
1928             } else if (element.equals("currencyFormat")) {
1929                 comp = currencyFormatOrder;
1930             } else if (element.equals("unitLength")) {
1931                 comp = unitLengthOrder;
1932             } else if (element.equals("unit")) {
1933                 comp = getUnitOrder();
1934             } else if (element.equals("dayPeriod")) {
1935                 comp = dayPeriodOrder;
1936             } else if (element.equals("dateTimeFormat")) {
1937                 comp = dateTimeFormatOrder;
1938             } else if (element.equals("nameField")) {
1939                 comp = nameFieldOrder;
1940             }
1941         } else if (attribute.equals("order") && element.equals("personName")) {
1942             comp = orderValueOrder;
1943         } else if (attribute.equals("length") && element.equals("personName")) {
1944             comp = lengthValueOrder;
1945         } else if (attribute.equals("usage") && element.equals("personName")) {
1946             comp = usageValueOrder;
1947         } else if (attribute.equals("formality")) {
1948             comp = formalityValueOrder;
1949         } else if (attribute.equals("item") && element.equals("sampleName")) {
1950             comp = sampleNameItemOrder;
1951         } else if (attribute.equals("count") && !element.equals("minDays")) {
1952             comp = countValueOrder;
1953         } else if (attribute.equals("cp") && element.equals("annotation")) {
1954             comp = UNICODE_SET_COMPARATOR;
1955         }
1956         return comp;
1957     }
1958 
1959     /** Comparator for attributes in CLDR files */
1960     private static AttributeValueComparator ldmlAvc =
1961             new AttributeValueComparator() {
1962                 @Override
1963                 public int compare(String element, String attribute, String value1, String value2) {
1964                     Comparator<String> comp = getAttributeValueComparator(element, attribute);
1965                     return comp.compare(value1, value2);
1966                 }
1967             };
1968 
hasValue(String elementName)1969     public boolean hasValue(String elementName) {
1970         return nameToElement.get(elementName).type == ElementType.PCDATA;
1971     }
1972 
isMetadata(XPathParts pathPlain)1973     public boolean isMetadata(XPathParts pathPlain) {
1974         for (String s : pathPlain.getElements()) {
1975             Element e = getElementFromName().get(s);
1976             if (e.elementStatus == ElementStatus.metadata) {
1977                 return true;
1978             }
1979         }
1980         return false;
1981     }
1982 
isMetadataOld(DtdType dtdType2, XPathParts pathPlain)1983     public static boolean isMetadataOld(DtdType dtdType2, XPathParts pathPlain) {
1984         // TODO Don't use hard-coded list; instead add to DTD annotations
1985         final String element1 = pathPlain.getElement(1);
1986         final String element2 = pathPlain.getElement(2);
1987         final String elementN = pathPlain.getElement(-1);
1988         switch (dtdType2) {
1989             case ldml:
1990                 switch (element1) {
1991                     case "generation":
1992                     case "metadata":
1993                         return true;
1994                 }
1995                 break;
1996             case ldmlBCP47:
1997                 switch (element1) {
1998                     case "generation":
1999                     case "version":
2000                         return true;
2001                 }
2002                 break;
2003                 ////
2004                 // supplementalData/transforms/transform[@source="am"][@target="am_FONIPA"][@direction="forward"]/comment
2005             case supplementalData:
2006                 // these are NOT under /metadata/ but are actually metadata
2007                 switch (element1) {
2008                     case "generation":
2009                     case "version":
2010                     case "validity":
2011                     case "references":
2012                     case "coverageLevels":
2013                         return true;
2014                     case "transforms":
2015                         return elementN.equals("comment");
2016                     case "metadata":
2017                         // these ARE under /metadata/, but many others under /metadata/ are NOT
2018                         // actually metadata.
2019                         switch (element2) {
2020                             case "validity":
2021                             case "serialElements":
2022                             case "suppress":
2023                             case "distinguishing":
2024                             case "blocking":
2025                             case "casingData":
2026                                 return true;
2027                         }
2028                         break;
2029                 }
2030                 break;
2031             default:
2032         }
2033         return false;
2034     }
2035 
isDeprecated(XPathParts pathPlain)2036     public boolean isDeprecated(XPathParts pathPlain) {
2037         for (int i = 0; i < pathPlain.size(); ++i) {
2038             String elementName = pathPlain.getElement(i);
2039             if (isDeprecated(elementName, "*", null)) {
2040                 return true;
2041             }
2042             for (String attribute : pathPlain.getAttributeKeys(i)) {
2043                 String attributeValue = pathPlain.getAttributeValue(i, attribute);
2044                 if (isDeprecated(elementName, attribute, attributeValue)) {
2045                     return true;
2046                 }
2047             }
2048         }
2049         return false;
2050     }
2051 
2052     public static final Splitter SPACE_SPLITTER =
2053             Splitter.on(CharMatcher.whitespace()).trimResults().omitEmptyStrings();
2054     public static final Splitter BAR_SPLITTER = Splitter.on('|').trimResults().omitEmptyStrings();
2055     public static final Splitter CR_SPLITTER =
2056             Splitter.on(CharMatcher.anyOf("\n\r")).trimResults().omitEmptyStrings();
2057 
2058     private static class XPathPartsSet {
2059         private final Set<XPathParts> list = new LinkedHashSet<>();
2060 
addElement(String element)2061         private void addElement(String element) {
2062             if (list.isEmpty()) {
2063                 list.add(new XPathParts().addElement(element));
2064             } else {
2065                 for (XPathParts item : list) {
2066                     item.addElement(element);
2067                 }
2068             }
2069         }
2070 
addAttribute(String attribute, String attributeValue)2071         private void addAttribute(String attribute, String attributeValue) {
2072             for (XPathParts item : list) {
2073                 item.addAttribute(attribute, attributeValue);
2074             }
2075         }
2076 
setElement(int i, String string)2077         private void setElement(int i, String string) {
2078             for (XPathParts item : list) {
2079                 item.setElement(i, string);
2080             }
2081         }
2082 
addAttributes(String attribute, List<String> attributeValues)2083         private void addAttributes(String attribute, List<String> attributeValues) {
2084             if (attributeValues.size() == 1) {
2085                 addAttribute(attribute, attributeValues.iterator().next());
2086             } else {
2087                 // duplicate all the items in the list with the given values
2088                 Set<XPathParts> newList = new LinkedHashSet<>();
2089                 for (XPathParts item : list) {
2090                     for (String attributeValue : attributeValues) {
2091                         XPathParts newItem = item.cloneAsThawed();
2092                         newItem.addAttribute(attribute, attributeValue);
2093                         newList.add(newItem);
2094                     }
2095                 }
2096                 list.clear();
2097                 list.addAll(newList);
2098             }
2099         }
2100 
toStrings()2101         private ImmutableSet<String> toStrings() {
2102             Builder<String> result = new ImmutableSet.Builder<>();
2103 
2104             for (XPathParts item : list) {
2105                 result.add(item.toString());
2106             }
2107             return result.build();
2108         }
2109 
2110         @Override
toString()2111         public String toString() {
2112             return list.toString();
2113         }
2114     }
2115 
getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras)2116     public Set<String> getRegularizedPaths(XPathParts pathPlain, Multimap<String, String> extras) {
2117         extras.clear();
2118         Map<String, String> valueAttributes = new HashMap<>();
2119         XPathPartsSet pathResult = new XPathPartsSet();
2120         String element = null;
2121         for (int i = 0; i < pathPlain.size(); ++i) {
2122             element = pathPlain.getElement(i);
2123             pathResult.addElement(element);
2124             valueAttributes.clear();
2125             for (String attribute : pathPlain.getAttributeKeys(i)) {
2126                 AttributeStatus status = getAttributeStatus(element, attribute);
2127                 final String attributeValue = pathPlain.getAttributeValue(i, attribute);
2128                 switch (status) {
2129                     case distinguished:
2130                         AttributeType attrType = getAttributeType(element, attribute);
2131                         if (attrType == AttributeType.NMTOKENS) {
2132                             pathResult.addAttributes(
2133                                     attribute, SPACE_SPLITTER.splitToList(attributeValue));
2134                         } else {
2135                             pathResult.addAttribute(attribute, attributeValue);
2136                         }
2137                         break;
2138                     case value:
2139                         valueAttributes.put(attribute, attributeValue);
2140                         break;
2141                     case metadata:
2142                         break;
2143                 }
2144             }
2145             if (!valueAttributes.isEmpty()) {
2146                 boolean hasValue = hasValue(element);
2147                 // if it doesn't have a value, we construct new child elements, with _ prefix
2148                 // if it does have a value, we have to play a further trick, since
2149                 // we can't have a value and child elements at the same level.
2150                 // So we use a _ suffix on the element.
2151                 if (hasValue) {
2152                     pathResult.setElement(i, element + "_");
2153                 } else {
2154                     int debug = 0;
2155                 }
2156                 for (Entry<String, String> attributeAndValue : valueAttributes.entrySet()) {
2157                     final String attribute = attributeAndValue.getKey();
2158                     final String attributeValue = attributeAndValue.getValue();
2159 
2160                     Set<String> pathsShort = pathResult.toStrings();
2161                     AttributeType attrType = getAttributeType(element, attribute);
2162                     for (String pathShort : pathsShort) {
2163                         pathShort += "/_" + attribute;
2164                         if (attrType == AttributeType.NMTOKENS) {
2165                             for (String valuePart : SPACE_SPLITTER.split(attributeValue)) {
2166                                 extras.put(pathShort, valuePart);
2167                             }
2168                         } else {
2169                             extras.put(pathShort, attributeValue);
2170                         }
2171                     }
2172                 }
2173                 if (hasValue) {
2174                     pathResult.setElement(i, element); // restore
2175                 }
2176             }
2177         }
2178         // Only add the path if it could have a value, looking at the last element
2179         if (!hasValue(element)) {
2180             return null;
2181         }
2182         return pathResult.toStrings();
2183     }
2184 
getAttributeType(String elementName, String attributeName)2185     public AttributeType getAttributeType(String elementName, String attributeName) {
2186         Attribute attr = getAttribute(elementName, attributeName);
2187         return (attr != null) ? attr.type : null;
2188     }
2189 
getAttribute(String elementName, String attributeName)2190     public Attribute getAttribute(String elementName, String attributeName) {
2191         Element element = nameToElement.get(elementName);
2192         return (element != null) ? element.getAttributeNamed(attributeName) : null;
2193     }
2194 
2195     // TODO: add support for following to DTD annotations, and rework API
2196 
2197     static final Set<String> SPACED_VALUES = ImmutableSet.of("idValidity", "languageGroup");
2198 
getValueSplitter(XPathParts pathPlain)2199     public static Splitter getValueSplitter(XPathParts pathPlain) {
2200         if (!Collections.disjoint(pathPlain.getElements(), SPACED_VALUES)) {
2201             return SPACE_SPLITTER;
2202         } else if (pathPlain.getElement(-1).equals("annotation")
2203                 && !pathPlain.getAttributeKeys(-1).contains("tts")) {
2204             return BAR_SPLITTER;
2205         }
2206         return CR_SPLITTER;
2207     }
2208 
isComment(XPathParts pathPlain, String line)2209     public static boolean isComment(XPathParts pathPlain, String line) {
2210         if (pathPlain.contains("transform")) {
2211             if (line.startsWith("#")) {
2212                 return true;
2213             }
2214         }
2215         return false;
2216     }
2217 
isExtraSplit(String extraPath)2218     public static boolean isExtraSplit(String extraPath) {
2219         if (extraPath.endsWith("/_type")
2220                 && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) {
2221             return true;
2222         }
2223         return false;
2224     }
2225 
2226     /** Return the value status for an EAV */
getValueStatus(String elementName, String attributeName, String value)2227     public ValueStatus getValueStatus(String elementName, String attributeName, String value) {
2228         Element element = nameToElement.get(elementName);
2229         if (element == null) {
2230             return ValueStatus.invalid;
2231         }
2232         Attribute attr = element.getAttributeNamed(attributeName);
2233         if (attr == null) {
2234             return ValueStatus.invalid;
2235         }
2236         return attr.getValueStatus(value);
2237     }
2238 
2239     /** Return element-attribute pairs with non-enumerated values, for quick checks. */
getNonEnumerated(Map<String, String> matchValues)2240     public Multimap<String, String> getNonEnumerated(Map<String, String> matchValues) {
2241         Multimap<String, String> nonEnumeratedElementToAttribute =
2242                 TreeMultimap.create(); // make tree for ease of debugging
2243         for (Entry<String, Element> entry : nameToElement.entrySet()) {
2244             Element element = entry.getValue();
2245             for (Attribute attribute : element.attributes.keySet()) {
2246                 if (attribute.type != AttributeType.ENUMERATED_TYPE) {
2247                     String elementName = element.getName();
2248                     String attrName = attribute.getName();
2249                     nonEnumeratedElementToAttribute.put(elementName, attrName);
2250                     if (attribute.matchValue != null) {
2251                         matchValues.put(
2252                                 elementName + "\t" + attrName, attribute.matchValue.getName());
2253                     }
2254                 }
2255             }
2256         }
2257         return ImmutableSetMultimap.copyOf(nonEnumeratedElementToAttribute);
2258     }
2259 
2260     /** Get the value constraint on the last element in a path */
getValueConstraint(String xpath)2261     public static ValueConstraint getValueConstraint(String xpath) {
2262         return getElement(xpath, -1).getValueConstraint();
2263     }
2264 
2265     /** Get an element from a path and element index. */
getElement(String xpath, int elementIndex)2266     public static Element getElement(String xpath, int elementIndex) {
2267         XPathParts parts = XPathParts.getFrozenInstance(xpath);
2268         return DtdData.getInstance(DtdType.valueOf(parts.getElement(0)))
2269                 .getElementFromName()
2270                 .get(parts.getElement(elementIndex));
2271     }
2272 }
2273