xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/util/AttributeValueValidity.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.util;
2 
3 import com.google.common.base.Splitter;
4 import com.google.common.collect.ComparisonChain;
5 import com.ibm.icu.impl.Relation;
6 import com.ibm.icu.impl.Row;
7 import com.ibm.icu.impl.Row.R2;
8 import com.ibm.icu.impl.Row.R3;
9 import com.ibm.icu.text.UnicodeSet;
10 import com.ibm.icu.util.ICUException;
11 import com.ibm.icu.util.Output;
12 import java.util.Collection;
13 import java.util.Collections;
14 import java.util.EnumMap;
15 import java.util.EnumSet;
16 import java.util.LinkedHashMap;
17 import java.util.LinkedHashSet;
18 import java.util.List;
19 import java.util.Locale;
20 import java.util.Map;
21 import java.util.Map.Entry;
22 import java.util.Objects;
23 import java.util.Set;
24 import java.util.TreeMap;
25 import java.util.TreeSet;
26 import java.util.regex.Pattern;
27 import org.unicode.cldr.util.LanguageInfo.CldrDir;
28 import org.unicode.cldr.util.StandardCodes.LstrType;
29 import org.unicode.cldr.util.SupplementalDataInfo.AttributeValidityInfo;
30 
31 public class AttributeValueValidity {
32 
33     public enum Status {
34         ok,
35         deprecated,
36         illegal,
37         noTest
38     }
39 
40     public enum LocaleSpecific {
41         pluralCardinal,
42         pluralOrdinal,
43         dayPeriodFormat,
44         dayPeriodSelection
45     }
46 
47     static final Splitter BAR = Splitter.on('|').trimResults().omitEmptyStrings();
48     static final Splitter SPACE =
49             Splitter.on(PatternCache.get("\\s+")).trimResults().omitEmptyStrings();
50 
51     private static final Set<DtdType> ALL_DTDs =
52             Collections.unmodifiableSet(EnumSet.allOf(DtdType.class));
53 
54     private static final SupplementalDataInfo supplementalData =
55             CLDRConfig.getInstance().getSupplementalDataInfo();
56 
57     private static Map<DtdType, Map<String, Map<String, MatcherPattern>>>
58             dtd_element_attribute_validity = new EnumMap<>(DtdType.class);
59     private static Map<String, MatcherPattern> common_attribute_validity = new LinkedHashMap<>();
60     private static Map<String, MatcherPattern> variables = new LinkedHashMap<>();
61     private static final RegexMatcher NOT_DONE_YET = new RegexMatcher(".*", Pattern.COMMENTS);
62     private static final Map<AttributeValidityInfo, String> failures = new LinkedHashMap<>();
63     private static final boolean DEBUG = false;
64 
65     static {
66         Relation<R2<String, String>, String> bcp47Aliases = supplementalData.getBcp47Aliases();
67         Set<String> bcp47Keys = new LinkedHashSet<>();
68         Set<String> bcp47Values = new LinkedHashSet<>();
69         for (Entry<String, Set<String>> keyValues :
70                 supplementalData.getBcp47Keys().keyValuesSet()) {
71             Set<String> fullValues = new TreeSet<>();
72             String key = keyValues.getKey();
73             bcp47Keys.add(key);
74 
75             Set<String> rawValues = keyValues.getValue();
76 
77             for (String value : rawValues) {
78                 if (key.equals("cu")) { // Currency codes are in upper case.
value.toUpperCase()79                     fullValues.add(value.toUpperCase());
80                 } else {
81                     fullValues.add(value);
82                 }
83                 R2<String, String> keyValue = R2.of(key, value);
84                 Set<String> aliases = bcp47Aliases.getAll(keyValue);
85                 if (aliases != null) {
86                     fullValues.addAll(aliases);
87                 }
88             }
89             // Special case exception for generic calendar, since we don't want to expose it in
90             // bcp47
91             if (key.equals("ca")) {
92                 fullValues.add("generic");
93             }
94             fullValues = Collections.unmodifiableSet(fullValues);
95             addCollectionVariable("$_bcp47_" + key, fullValues);
96 
97             // add aliased keys
98             Set<String> aliases = supplementalData.getBcp47Aliases().getAll(Row.of(key, ""));
99             if (aliases != null) {
100                 for (String aliasKey : aliases) {
101                     bcp47Keys.add(aliasKey);
102                     addCollectionVariable("$_bcp47_" + aliasKey, fullValues);
103                 }
104             }
105             bcp47Values.addAll(fullValues);
106         }
107         bcp47Keys.add("x"); // special-case private use
108         bcp47Keys.add("x0"); // special-case, has no subtypes
109         addCollectionVariable("$_bcp47_keys", bcp47Keys);
110         addCollectionVariable("$_bcp47_value", bcp47Values);
111 
112         Validity validity = Validity.getInstance();
113         for (LstrType key : LstrType.values()) {
114             final Map<Validity.Status, Set<String>> statusToCodes = validity.getStatusToCodes(key);
115             if (statusToCodes == null) {
116                 continue;
117             }
118             String keyName = "$_" + key;
119             Set<String> all = new LinkedHashSet<>();
120             Set<String> prefix = new LinkedHashSet<>();
121             Set<String> suffix = new LinkedHashSet<>();
122             Set<String> regularAndUnknown = new LinkedHashSet<>();
123             for (Entry<Validity.Status, Set<String>> item2 : statusToCodes.entrySet()) {
124                 Validity.Status status = item2.getKey();
125                 Set<String> validItems = item2.getValue();
126                 if (key == LstrType.variant) { // uppercased in CLDR
127                     Set<String> temp2 = new LinkedHashSet<>(validItems);
128                     for (String item : validItems) {
item.toUpperCase(Locale.ROOT)129                         temp2.add(item.toUpperCase(Locale.ROOT));
130                     }
131                     validItems = temp2;
132                 } else if (key == LstrType.subdivision) {
133                     for (String item : validItems) {
134                         if (item.contains("-")) {
135                             List<String> parts = Splitter.on('-').splitToList(item);
136                             prefix.add(parts.get(0));
137                             suffix.add(parts.get(1));
138                         } else {
139                             int prefixWidth = item.charAt(0) < 'A' ? 3 : 2;
140                             prefix.add(item.substring(0, prefixWidth));
141                             suffix.add(item.substring(prefixWidth));
142                         }
143                     }
144                 }
145                 all.addAll(validItems);
146                 if (status == Validity.Status.regular
147                         || status == Validity.Status.special
148                         || status == Validity.Status.unknown) {
149                     regularAndUnknown.addAll(validItems);
150                 }
151                 addCollectionVariable(keyName + "_" + status, validItems);
152                 //                MatcherPattern m = new MatcherPattern(key.toString(),
153                 // validItems.toString(), new CollectionMatcher(validItems));
154                 //                variables.put(keyName+"_"+status, m);
155             }
156             if (key == LstrType.subdivision) {
157                 addCollectionVariable(keyName + "_prefix", prefix);
158                 addCollectionVariable(keyName + "_suffix", suffix);
159             }
160             addCollectionVariable(keyName, all);
161             addCollectionVariable(keyName + "_plus", regularAndUnknown);
162 
163             //            MatcherPattern m = new MatcherPattern(key.toString(), all.toString(), new
164             // CollectionMatcher(all));
165             //            variables.put(keyName, m);
166             //            MatcherPattern m2 = new MatcherPattern(key.toString(),
167             // regularAndUnknown.toString(), new CollectionMatcher(regularAndUnknown));
168             //            variables.put(keyName + "_plus", m2);
169         }
170 
171         Set<String> main = new LinkedHashSet<>();
172         main.addAll(StandardCodes.LstrType.language.specials);
173         Set<String> coverage = new LinkedHashSet<>();
174         Set<String> large_official = new LinkedHashSet<>();
175         final LocaleIDParser lip = new LocaleIDParser();
176 
177         for (String language : LanguageInfo.getAvailable()) {
178             LanguageInfo info = LanguageInfo.get(language);
179             CldrDir cldrDir = info.getCldrDir();
180             String base = lip.set(language).getLanguage();
181             if (cldrDir == CldrDir.main || cldrDir == CldrDir.base) {
182                 main.add(base);
183             }
184             if (info.getCldrLevel() == Level.MODERN) {
185                 coverage.add(base);
186             }
187             if (info.getLiteratePopulation() > 1000000 && !info.getStatusToRegions().isEmpty()) {
188                 large_official.add(base);
189             }
190         }
191         addCollectionVariable("$_language_main", main);
192         addCollectionVariable("$_language_coverage", coverage);
193         addCollectionVariable("$_language_large_official", large_official);
194         Set<String> cldrLang = new TreeSet<>(main);
195         cldrLang.addAll(coverage);
196         cldrLang.addAll(large_official);
197         addCollectionVariable("$_language_cldr", large_official);
198         // System.out.println("\ncldrLang:\n" + Joiner.on(' ').join(cldrLang));
199 
200         Map<String, R2<String, String>> rawVariables = supplementalData.getValidityInfo();
201         for (Entry<String, R2<String, String>> item : rawVariables.entrySet()) {
202             String id = item.getKey();
203             String type = item.getValue().get0();
204             String value = item.getValue().get1();
205             MatcherPattern mp = getMatcherPattern2(type, value);
206             if (mp != null) {
variables.put(id, mp)207                 variables.put(id, mp);
208                 // variableReplacer.add(id, value);
209             } else {
210                 throw new IllegalArgumentException("Duplicate element " + mp);
211             }
212         }
213         // System.out.println("Variables: " + variables.keySet());
214 
215         Map<AttributeValidityInfo, String> rawAttributeValueInfo =
216                 supplementalData.getAttributeValidity();
217         int x = 0;
218         for (Entry<AttributeValidityInfo, String> entry : rawAttributeValueInfo.entrySet()) {
219             AttributeValidityInfo item = entry.getKey();
220             String value = entry.getValue();
221             // System.out.println(item);
222             MatcherPattern mp = getMatcherPattern2(item.getType(), value);
223             if (mp == null) {
item.getType()224                 getMatcherPattern2(item.getType(), value); // for debugging
failures.put(item, value)225                 failures.put(item, value);
226                 continue;
227             }
228             Set<DtdType> dtds = item.getDtds();
229             if (dtds == null) {
230                 dtds = ALL_DTDs;
231             }
232             for (DtdType dtdType : dtds) {
233                 DtdData data = DtdData.getInstance(dtdType);
234                 Map<String, Map<String, MatcherPattern>> element_attribute_validity =
235                         dtd_element_attribute_validity.get(dtdType);
236                 if (element_attribute_validity == null) {
dtd_element_attribute_validity.put( dtdType, element_attribute_validity = new TreeMap<>())237                     dtd_element_attribute_validity.put(
238                             dtdType, element_attribute_validity = new TreeMap<>());
239                 }
240 
241                 //             <attributeValues dtds="supplementalData" elements="currency"
242                 // attributes="before from to">$currencyDate</attributeValues>
243 
244                 Set<String> attributeList = item.getAttributes();
245                 Set<String> elementList = item.getElements();
246                 if (elementList.size() == 0) {
addAttributes(attributeList, common_attribute_validity, mp)247                     addAttributes(attributeList, common_attribute_validity, mp);
248                 } else {
249                     for (String element : elementList) {
250                         // check if unnecessary
251                         DtdData.Element elementInfo = data.getElementFromName().get(element);
252                         if (elementInfo == null) {
253                             throw new ICUException(
254                                     "Illegal <attributeValues>, element not valid: "
255                                             + dtdType
256                                             + ", element: "
257                                             + element);
258                         } else {
259                             for (String attribute : attributeList) {
260                                 DtdData.Attribute attributeInfo =
261                                         elementInfo.getAttributeNamed(attribute);
262                                 if (attributeInfo == null) {
263                                     throw new ICUException(
264                                             "Illegal <attributeValues>, attribute not valid: "
265                                                     + dtdType
266                                                     + ", element: "
267                                                     + element
268                                                     + ", attribute: "
269                                                     + attribute);
270                                 } else if (!attributeInfo.values.isEmpty()) {
271                                     //                                    if (false) {
272                                     //
273                                     // System.out.println("Unnecessary <attributeValues …>, the DTD
274                                     // has specific list: element: " + element + ", attribute: " +
275                                     // attribute + ", " + attributeInfo.values);
276                                     //                                    }
277                                 }
278                             }
279                         }
280                         // System.out.println("\t" + element);
281                         Map<String, MatcherPattern> attribute_validity =
282                                 element_attribute_validity.get(element);
283                         if (attribute_validity == null) {
element_attribute_validity.put( element, attribute_validity = new TreeMap<>())284                             element_attribute_validity.put(
285                                     element, attribute_validity = new TreeMap<>());
286                         }
addAttributes(attributeList, attribute_validity, mp)287                         addAttributes(attributeList, attribute_validity, mp);
288                     }
289                 }
290             }
291         }
292         // show values
293         //        for (Entry<DtdType, Map<String, Map<String, MatcherPattern>>> entry1 :
294         // dtd_element_attribute_validity.entrySet()) {
295         //            final DtdType dtdType = entry1.getKey();
296         //            Map<String, Map<String, MatcherPattern>> element_attribute_validity =
297         // entry1.getValue();
298         //            DtdData dtdData2 = DtdData.getInstance(dtdType);
299         //            for (Element element : dtdData2.getElements()) {
300         //                Set<Attribute> attributes = element.getAttributes().keySet();
301         //
302         //            }
303         //            for (Entry<String, Map<String, MatcherPattern>> entry2 :
304         // entry1.getValue().entrySet()) {
305         //                for (Entry<String, MatcherPattern> entry3 : entry2.getValue().entrySet())
306         // {
307         //                    System.out.println(dtdType + "\t" + entry2.getKey() + "\t" +
308         // entry3.getKey() + "\t" + entry3.getValue());
309         //                }
310         //            }
311         //        }
312 
313         //        private LocaleIDParser localeIDParser = new LocaleIDParser();
314         //
315         //        @Override
316         //        public CheckCLDR setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options,
317         //            List<CheckStatus> possibleErrors) {
318         //            if (cldrFileToCheck == null) return this;
319         //            if (Phase.FINAL_TESTING == getPhase() || Phase.BUILD == getPhase()) {
320         //                setSkipTest(false); // ok
321         //            } else {
322         //                setSkipTest(true);
323         //                return this;
324         //            }
325         //
326         //            pluralInfo = supplementalData.getPlurals(PluralType.cardinal,
327         // cldrFileToCheck.getLocaleID());
328         //            super.setCldrFileToCheck(cldrFileToCheck, options, possibleErrors);
329         //            isEnglish =
330         // "en".equals(localeIDParser.set(cldrFileToCheck.getLocaleID()).getLanguage());
331         //            synchronized (elementOrder) {
332         //                if (!initialized) {
333         //                    getMetadata();
334         //                    initialized = true;
335         //                    localeMatcher = LocaleMatcher.make();
336         //                }
337         //            }
338         //            if (!localeMatcher.matches(cldrFileToCheck.getLocaleID())) {
339         //                possibleErrors.add(new CheckStatus()
340         //
341         // .setCause(null).setMainType(CheckStatus.errorType).setSubtype(Subtype.invalidLocale)
342         //                .setMessage("Invalid Locale {0}",
343         //                    new Object[] { cldrFileToCheck.getLocaleID() }));
344         //
345         //            }
346         //            return this;
347         //        }
348     }
349 
addCollectionVariable(String name, Set<String> validItems)350     private static void addCollectionVariable(String name, Set<String> validItems) {
351         variables.put(name, new CollectionMatcher(validItems));
352     }
353 
getAllPossibleMissing(DtdType dtdType)354     public static Relation<String, String> getAllPossibleMissing(DtdType dtdType) {
355         Relation<String, String> missing =
356                 Relation.of(new TreeMap<String, Set<String>>(), LinkedHashSet.class);
357 
358         if (dtdType == DtdType.ldmlICU) {
359             return missing;
360         }
361 
362         DtdData dtdData2 = DtdData.getInstance(dtdType);
363         Map<String, Map<String, MatcherPattern>> element_attribute_validity =
364                 CldrUtility.ifNull(
365                         dtd_element_attribute_validity.get(dtdType),
366                         Collections.<String, Map<String, MatcherPattern>>emptyMap());
367 
368         for (DtdData.Element element : dtdData2.getElements()) {
369             if (element.isDeprecated()) {
370                 continue;
371             }
372             Map<String, MatcherPattern> attribute_validity =
373                     CldrUtility.ifNull(
374                             element_attribute_validity.get(element.name),
375                             Collections.<String, MatcherPattern>emptyMap());
376             for (DtdData.Attribute attribute : element.getAttributes().keySet()) {
377                 if (attribute.isDeprecated()) {
378                     continue;
379                 }
380                 if (!attribute.values.isEmpty()) {
381                     continue;
382                 }
383                 MatcherPattern validity = attribute_validity.get(attribute.name);
384                 if (validity != null) {
385                     continue;
386                 }
387                 //            <attributeValues attributes="alt" type="choice">$alt</attributeValues>
388                 //             <attributeValues dtds="supplementalData" elements="character"
389                 // attributes="value" type="regex">.</attributeValues>
390                 missing.put(
391                         attribute.name,
392                         new AttributeValueSpec(dtdType, element.name, attribute.name, "$xxx")
393                                 .toString());
394             }
395         }
396         return missing;
397     }
398 
399     public abstract static class MatcherPattern {
400 
matches(String value, Output<String> reason)401         public abstract boolean matches(String value, Output<String> reason);
402 
getPattern()403         public String getPattern() {
404             String temp = _getPattern();
405             return temp.length() <= MAX_STRING ? temp : temp.substring(0, MAX_STRING) + "…";
406         }
407 
_getPattern()408         public abstract String _getPattern();
409 
410         @Override
toString()411         public String toString() {
412             return getClass().getName() + "\t" + getPattern();
413         }
414     }
415 
416     //    private static MatcherPattern getBcp47MatcherPattern(String key) {
417     //        // <key type="calendar">Calendar</key>
418     //        // <type key="calendar" type="chinese">Chinese Calendar</type>
419     //
420     //        //<attributeValues elements="key" attributes="type" type="bcp47">key</attributeValues>
421     //        //<attributeValues elements="type" attributes="key" type="bcp47">key</attributeValues>
422     //        //<attributeValues elements="type" attributes="type"
423     // type="bcp47">use-key</attributeValues>
424     //
425     //        Set<String> values;
426     //        if (key.equals("key")) {
427     //            values = BCP47_KEY_VALUES.keySet();
428     //        } else {
429     //            values = BCP47_KEY_VALUES.get(key);
430     //        }
431     //        return new CollectionMatcher(values);
432     //    }
433 
434     enum MatcherTypes {
435         single,
436         choice,
437         list,
438         unicodeSet,
439         unicodeSetOrString,
440         regex,
441         locale,
442         bcp47,
443         subdivision,
444         localeSpecific,
445         TODO;
446     }
447 
getMatcherPattern2(String type, String value)448     private static MatcherPattern getMatcherPattern2(String type, String value) {
449         final MatcherTypes matcherType =
450                 type == null ? MatcherTypes.single : MatcherTypes.valueOf(type);
451 
452         if (matcherType != MatcherTypes.TODO && value.startsWith("$")) {
453             MatcherPattern result = getVariable(matcherType, value);
454             if (result != null) {
455                 return result;
456             }
457             throw new IllegalArgumentException("Unknown variable: " + value);
458         }
459 
460         MatcherPattern result;
461 
462         switch (matcherType) {
463             case single:
464                 result = new CollectionMatcher(Collections.singleton(value.trim()));
465                 break;
466             case choice:
467                 result = new CollectionMatcher(SPACE.splitToList(value));
468                 break;
469             case unicodeSet:
470                 result = new UnicodeSetMatcher(new UnicodeSet(value));
471                 break;
472             case unicodeSetOrString:
473                 result = new UnicodeSetOrStringMatcher(new UnicodeSet(value));
474                 break;
475                 //        case bcp47:
476                 //            return getBcp47MatcherPattern(value);
477             case regex:
478                 result =
479                         new RegexMatcher(
480                                 value, Pattern.COMMENTS); // Pattern.COMMENTS to get whitespace
481                 break;
482             case locale:
483                 result = value.equals("all") ? LocaleMatcher.ALL_LANGUAGES : LocaleMatcher.REGULAR;
484                 break;
485             case localeSpecific:
486                 result = LocaleSpecificMatcher.getInstance(value);
487                 break;
488             case TODO:
489                 result = NOT_DONE_YET;
490                 break;
491             case list:
492                 result = new ListMatcher(new CollectionMatcher(SPACE.splitToList(value)));
493                 break;
494             default:
495                 return null;
496         }
497 
498         return result;
499     }
500 
getVariable(final MatcherTypes matcherType, String value)501     private static MatcherPattern getVariable(final MatcherTypes matcherType, String value) {
502         List<String> values = BAR.splitToList(value); // value.trim().split("|");
503         MatcherPattern[] reasons = new MatcherPattern[values.size()];
504         for (int i = 0; i < values.size(); ++i) {
505             reasons[i] = getNonNullVariable(values.get(i));
506         }
507         MatcherPattern result;
508 
509         if (reasons.length == 1) {
510             result = reasons[0];
511         } else {
512             result = new OrMatcher(reasons);
513         }
514         if (matcherType == MatcherTypes.list) {
515             result = new ListMatcher(result);
516         }
517         return result;
518     }
519 
addAttributes( Set<String> attributes, Map<String, MatcherPattern> attribute_validity, MatcherPattern mp)520     private static void addAttributes(
521             Set<String> attributes,
522             Map<String, MatcherPattern> attribute_validity,
523             MatcherPattern mp) {
524         for (String attribute : attributes) {
525             MatcherPattern old = attribute_validity.get(attribute);
526             if (old != null) {
527                 mp = new OrMatcher(old, mp);
528             }
529             attribute_validity.put(attribute, mp);
530         }
531     }
532 
533     public static class RegexMatcher extends MatcherPattern {
534 
535         private java.util.regex.Matcher matcher;
536 
RegexMatcher(String pattern, int flags)537         public RegexMatcher(String pattern, int flags) {
538             matcher = Pattern.compile(pattern, flags).matcher("");
539         }
540 
541         @Override
matches(String value, Output<String> reason)542         public boolean matches(String value, Output<String> reason) {
543             matcher.reset(value.toString());
544             boolean result = matcher.matches();
545             if (!result && reason != null) {
546                 reason.value = RegexUtilities.showMismatch(matcher, value.toString());
547             }
548             return result;
549         }
550 
551         @Override
_getPattern()552         public String _getPattern() {
553             return matcher.toString();
554         }
555     }
556 
557     private static EnumMap<LocaleSpecific, Set<String>> LOCALE_SPECIFIC = null;
558 
559     /** WARNING, not thread-safe. Needs cleanup * */
setLocaleSpecifics(EnumMap<LocaleSpecific, Set<String>> newValues)560     public static void setLocaleSpecifics(EnumMap<LocaleSpecific, Set<String>> newValues) {
561         LOCALE_SPECIFIC = newValues;
562     }
563 
564     public static class LocaleSpecificMatcher extends MatcherPattern {
565         final LocaleSpecific ls;
566 
LocaleSpecificMatcher(LocaleSpecific ls)567         public LocaleSpecificMatcher(LocaleSpecific ls) {
568             this.ls = ls;
569         }
570 
getInstance(String value)571         public static LocaleSpecificMatcher getInstance(String value) {
572             return new LocaleSpecificMatcher(LocaleSpecific.valueOf(value));
573         }
574 
matches(String value)575         public boolean matches(String value) {
576             return LOCALE_SPECIFIC.get(ls).contains(value);
577         }
578 
579         static final int MAX_STRING = 64;
580 
581         @Override
matches(String value, Output<String> reason)582         public boolean matches(String value, Output<String> reason) {
583             boolean result = LOCALE_SPECIFIC.get(ls).contains(value);
584             if (!result && reason != null) {
585                 reason.value = "∉ " + getPattern();
586             }
587             return result;
588         }
589 
590         @Override
_getPattern()591         public String _getPattern() {
592             return LOCALE_SPECIFIC.get(ls).toString();
593         }
594     }
595 
596     static final int MAX_STRING = 64;
597 
598     public static class CollectionMatcher extends MatcherPattern {
599         private final Collection<String> collection;
600 
CollectionMatcher(Collection<String> collection)601         public CollectionMatcher(Collection<String> collection) {
602             this.collection = Collections.unmodifiableCollection(new LinkedHashSet<>(collection));
603         }
604 
605         @Override
matches(String value, Output<String> reason)606         public boolean matches(String value, Output<String> reason) {
607             boolean result = collection.contains(value);
608             if (!result && reason != null) {
609                 reason.value = "∉ " + getPattern();
610             }
611             return result;
612         }
613 
614         @Override
_getPattern()615         public String _getPattern() {
616             return collection.toString();
617         }
618     }
619 
620     public static class UnicodeSetMatcher extends MatcherPattern {
621         private final UnicodeSet collection;
622 
UnicodeSetMatcher(UnicodeSet collection)623         public UnicodeSetMatcher(UnicodeSet collection) {
624             this.collection = collection.freeze();
625         }
626 
627         @Override
matches(String value, Output<String> reason)628         public boolean matches(String value, Output<String> reason) {
629             boolean result = false;
630             try {
631                 UnicodeSet valueSet = new UnicodeSet(value);
632                 result = collection.containsAll(valueSet);
633                 if (!result && reason != null) {
634                     reason.value = "∉ " + getPattern();
635                 }
636             } catch (Exception e) {
637                 reason.value = " illegal pattern " + getPattern() + ": " + value;
638             }
639             return result;
640         }
641 
642         @Override
_getPattern()643         public String _getPattern() {
644             return collection.toPattern(false);
645         }
646     }
647 
648     public static class UnicodeSetOrStringMatcher extends MatcherPattern {
649         private final UnicodeSet collection;
650 
UnicodeSetOrStringMatcher(UnicodeSet collection)651         public UnicodeSetOrStringMatcher(UnicodeSet collection) {
652             this.collection = collection.freeze();
653         }
654 
655         @Override
matches(String value, Output<String> reason)656         public boolean matches(String value, Output<String> reason) {
657             boolean result = false;
658             if (UnicodeSet.resemblesPattern(value, 0)) {
659                 try {
660                     UnicodeSet valueSet = new UnicodeSet(value);
661                     result = collection.containsAll(valueSet);
662                     if (!result && reason != null) {
663                         reason.value = "∉ " + getPattern();
664                     }
665                 } catch (Exception e) {
666                     reason.value = " illegal pattern " + getPattern() + ": " + value;
667                 }
668             } else {
669                 result = collection.contains(value);
670                 if (!result && reason != null) {
671                     reason.value = "∉ " + getPattern();
672                 }
673             }
674             return result;
675         }
676 
677         @Override
_getPattern()678         public String _getPattern() {
679             return collection.toPattern(false);
680         }
681     }
682 
683     public static class OrMatcher extends MatcherPattern {
684         private final MatcherPattern[] operands;
685 
OrMatcher(MatcherPattern... operands)686         public OrMatcher(MatcherPattern... operands) {
687             for (MatcherPattern operand : operands) {
688                 if (operand == null) {
689                     throw new NullPointerException();
690                 }
691             }
692             this.operands = operands;
693         }
694 
695         @Override
matches(String value, Output<String> reason)696         public boolean matches(String value, Output<String> reason) {
697             StringBuilder fullReason = reason == null ? null : new StringBuilder();
698             for (MatcherPattern operand : operands) {
699                 if (operand.matches(value, reason)) {
700                     return true;
701                 }
702                 if (fullReason != null) {
703                     if (fullReason.length() != 0) {
704                         fullReason.append("&");
705                     }
706                     fullReason.append(reason.value);
707                 }
708             }
709             if (fullReason != null) {
710                 reason.value = fullReason.toString();
711             }
712             return false;
713         }
714 
715         @Override
_getPattern()716         public String _getPattern() {
717             StringBuffer result = new StringBuffer();
718             for (MatcherPattern operand : operands) {
719                 if (result.length() != 0) {
720                     result.append('|');
721                 }
722                 result.append(operand._getPattern());
723             }
724             return result.toString();
725         }
726     }
727 
728     public static class ListMatcher extends MatcherPattern {
729         private MatcherPattern other;
730 
ListMatcher(MatcherPattern other)731         public ListMatcher(MatcherPattern other) {
732             this.other = other;
733         }
734 
735         @Override
matches(String value, Output<String> reason)736         public boolean matches(String value, Output<String> reason) {
737             List<String> values = SPACE.splitToList(value);
738             if (values.isEmpty()) return true;
739             for (String valueItem : values) {
740                 if (!other.matches(valueItem, reason)) {
741                     if (reason != null) {
742                         reason.value = "«" + valueItem + "» ∉ " + other.getPattern();
743                     }
744                     return false;
745                 }
746             }
747             return true;
748         }
749 
750         @Override
_getPattern()751         public String _getPattern() {
752             return "List of " + other._getPattern();
753         }
754     }
755 
756     public static class LocaleMatcher extends MatcherPattern {
757         final MatcherPattern language;
758         final MatcherPattern script = getNonNullVariable("$_script");
759         final MatcherPattern territory = getNonNullVariable("$_region");
760         final MatcherPattern variant = getNonNullVariable("$_variant");
761         final LocaleIDParser lip = new LocaleIDParser();
762 
763         public static LocaleMatcher REGULAR = new LocaleMatcher("$_language_plus");
764         public static LocaleMatcher ALL_LANGUAGES = new LocaleMatcher("$_language");
765 
LocaleMatcher(String variable)766         private LocaleMatcher(String variable) {
767             language = getNonNullVariable(variable);
768         }
769 
770         @Override
matches(String value, Output<String> reason)771         public boolean matches(String value, Output<String> reason) {
772             lip.set(value);
773             String field = lip.getLanguage();
774             if (!language.matches(field, reason)) {
775                 if (reason != null) {
776                     reason.value = "invalid base language";
777                 }
778                 return false;
779             }
780             field = lip.getScript();
781             if (field.length() != 0 && !script.matches(field, reason)) {
782                 if (reason != null) {
783                     reason.value = "invalid script";
784                 }
785                 return false;
786             }
787             field = lip.getRegion();
788             if (field.length() != 0 && !territory.matches(field, reason)) {
789                 if (reason != null) {
790                     reason.value = "invalid region";
791                 }
792                 return false;
793             }
794             String[] fields = lip.getVariants();
795             for (int i = 0; i < fields.length; ++i) {
796                 if (!variant.matches(fields[i], reason)) {
797                     if (reason != null) {
798                         reason.value = "invalid variant";
799                     }
800                     return false;
801                 }
802             }
803             return true;
804         }
805 
806         @Override
_getPattern()807         public String _getPattern() {
808             return "Unicode_Language_Subtag";
809         }
810     }
811 
812     public static final class AttributeValueSpec implements Comparable<AttributeValueSpec> {
AttributeValueSpec( DtdType type, String element, String attribute, String attributeValue)813         public AttributeValueSpec(
814                 DtdType type, String element, String attribute, String attributeValue) {
815             this.type = type;
816             this.element = element;
817             this.attribute = attribute;
818             this.attributeValue = attributeValue;
819         }
820 
821         public final DtdType type;
822         public final String element;
823         public final String attribute;
824         public final String attributeValue;
825 
826         @Override
hashCode()827         public int hashCode() {
828             return Objects.hash(type, element, attribute, attributeValue);
829         }
830 
831         @Override
equals(Object obj)832         public boolean equals(Object obj) {
833             AttributeValueSpec other = (AttributeValueSpec) obj;
834             return CldrUtility.deepEquals(
835                     type, other.type,
836                     element, other.element,
837                     attribute, other.attribute,
838                     attributeValue, other.attributeValue);
839         }
840 
841         @Override
compareTo(AttributeValueSpec other)842         public int compareTo(AttributeValueSpec other) {
843             return ComparisonChain.start()
844                     .compare(type, other.type)
845                     .compare(element, other.element)
846                     .compare(attribute, other.attribute)
847                     .compare(attributeValue, other.attributeValue)
848                     .result();
849         }
850 
851         @Override
toString()852         public String toString() {
853             return "<attributeValues"
854                     + " dtds='"
855                     + type
856                     + "\'"
857                     + " elements='"
858                     + element
859                     + "\'"
860                     + " attributes='"
861                     + attribute
862                     + "\'"
863                     + " type='TODO\'>"
864                     + attributeValue
865                     + "</attributeValues>";
866         }
867     }
868 
869     /**
870      * return Status
871      *
872      * @param attribute_validity
873      * @param attribute
874      * @param attributeValue
875      * @param result
876      * @return
877      */
check( Map<String, MatcherPattern> attribute_validity, String element, String attribute, String attributeValue, Output<String> reason)878     private static Status check(
879             Map<String, MatcherPattern> attribute_validity,
880             String element,
881             String attribute,
882             String attributeValue,
883             Output<String> reason) {
884 
885         if (attribute_validity == null) {
886             return Status.noTest; // no test
887         }
888         MatcherPattern matcherPattern = attribute_validity.get(attribute);
889         if (matcherPattern == null) {
890             return Status.noTest; // no test
891         }
892         if (matcherPattern.matches(attributeValue, reason)) {
893             return Status.ok;
894         }
895         return Status.illegal;
896     }
897 
check( DtdData dtdData, String element, String attribute, String attributeValue, Output<String> reason)898     public static Status check(
899             DtdData dtdData,
900             String element,
901             String attribute,
902             String attributeValue,
903             Output<String> reason) {
904         if (dtdData.isDeprecated(element, attribute, attributeValue)) {
905             return Status.deprecated;
906         }
907         Status haveTest =
908                 check(common_attribute_validity, element, attribute, attributeValue, reason);
909 
910         if (haveTest == Status.noTest) {
911             final Map<String, Map<String, MatcherPattern>> element_attribute_validity =
912                     dtd_element_attribute_validity.get(dtdData.dtdType);
913             if (element_attribute_validity == null) {
914                 return Status.noTest;
915             }
916 
917             Map<String, MatcherPattern> attribute_validity =
918                     element_attribute_validity.get(element);
919             if (attribute_validity == null) {
920                 return Status.noTest;
921             }
922 
923             haveTest = check(attribute_validity, element, attribute, attributeValue, reason);
924         }
925         return haveTest;
926     }
927 
getTodoTests()928     public static Set<R3<DtdType, String, String>> getTodoTests() {
929         Set<Row.R3<DtdType, String, String>> result = new LinkedHashSet<>();
930         for (Entry<DtdType, Map<String, Map<String, MatcherPattern>>> entry1 :
931                 dtd_element_attribute_validity.entrySet()) {
932             for (Entry<String, Map<String, MatcherPattern>> entry2 : entry1.getValue().entrySet()) {
933                 for (Entry<String, MatcherPattern> entry3 : entry2.getValue().entrySet()) {
934                     if (entry3.getValue() == NOT_DONE_YET) {
935                         result.add(Row.of(entry1.getKey(), entry2.getKey(), entry3.getKey()));
936                     }
937                 }
938             }
939         }
940         return result;
941     }
942 
getReadFailures()943     public static Map<AttributeValidityInfo, String> getReadFailures() {
944         return Collections.unmodifiableMap(failures);
945     }
946 
getMatcherPattern(String variable)947     public static MatcherPattern getMatcherPattern(String variable) {
948         return variables.get(variable);
949     }
950 
getNonNullVariable(String variable)951     private static MatcherPattern getNonNullVariable(String variable) {
952         MatcherPattern result = variables.get(variable);
953         if (result == null) {
954             throw new NullPointerException();
955         }
956         return result;
957     }
958 
getMatcherPatternIds()959     public static Set<String> getMatcherPatternIds() {
960         return Collections.unmodifiableSet(variables.keySet());
961     }
962 
main(String[] args)963     public static void main(String[] args) {
964         for (DtdType type : DtdType.values()) {
965             Relation<String, String> missing = getAllPossibleMissing(type);
966             for (Entry<String, String> x : missing.keyValueSet()) {
967                 System.out.println(type + "\t" + CldrUtility.toString(x));
968             }
969         }
970     }
971 }
972