xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestAttributeValues.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.unittest;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.base.Splitter;
5 import com.google.common.collect.ImmutableList;
6 import com.google.common.collect.ImmutableMap;
7 import com.google.common.collect.ImmutableSet;
8 import com.google.common.collect.ImmutableSortedSet;
9 import com.google.common.collect.Multimap;
10 import com.ibm.icu.dev.test.TestFmwk;
11 import com.ibm.icu.impl.Row.R3;
12 import com.ibm.icu.util.Output;
13 import java.io.File;
14 import java.io.FileInputStream;
15 import java.io.InputStream;
16 import java.util.Collections;
17 import java.util.HashSet;
18 import java.util.LinkedHashSet;
19 import java.util.List;
20 import java.util.Locale;
21 import java.util.Map;
22 import java.util.Map.Entry;
23 import java.util.Set;
24 import java.util.TreeMap;
25 import java.util.TreeSet;
26 import java.util.concurrent.ConcurrentHashMap;
27 import java.util.concurrent.atomic.AtomicInteger;
28 import java.util.regex.Matcher;
29 import java.util.stream.Collectors;
30 import java.util.stream.Stream;
31 import javax.xml.stream.XMLInputFactory;
32 import javax.xml.stream.XMLStreamConstants;
33 import javax.xml.stream.XMLStreamException;
34 import javax.xml.stream.XMLStreamReader;
35 import org.unicode.cldr.tool.VerifyAttributeValues;
36 import org.unicode.cldr.tool.VerifyAttributeValues.Errors;
37 import org.unicode.cldr.util.AttributeValueValidity;
38 import org.unicode.cldr.util.AttributeValueValidity.AttributeValueSpec;
39 import org.unicode.cldr.util.AttributeValueValidity.MatcherPattern;
40 import org.unicode.cldr.util.AttributeValueValidity.Status;
41 import org.unicode.cldr.util.CLDRConfig;
42 import org.unicode.cldr.util.CLDRFile;
43 import org.unicode.cldr.util.CLDRPaths;
44 import org.unicode.cldr.util.ChainedMap;
45 import org.unicode.cldr.util.ChainedMap.M4;
46 import org.unicode.cldr.util.DtdData;
47 import org.unicode.cldr.util.DtdData.ValueStatus;
48 import org.unicode.cldr.util.DtdType;
49 import org.unicode.cldr.util.LanguageInfo;
50 import org.unicode.cldr.util.Organization;
51 import org.unicode.cldr.util.StandardCodes;
52 import org.unicode.cldr.util.StandardCodes.LstrField;
53 import org.unicode.cldr.util.StandardCodes.LstrType;
54 import org.unicode.cldr.util.SupplementalDataInfo.AttributeValidityInfo;
55 import org.unicode.cldr.util.Validity;
56 import org.unicode.cldr.util.XPathParts;
57 import org.xml.sax.Attributes;
58 
59 public class TestAttributeValues extends TestFmwk {
60     private static final boolean SERIAL = false;
61 
62     private static final Validity VALIDITY = Validity.getInstance();
63     private static final File BASE_DIR = new File(CLDRPaths.BASE_DIRECTORY);
64     public static final Joiner SPACE_JOINER = Joiner.on(' ');
65     public static final Splitter SPACE_SPLITTER = Splitter.on(' ').trimResults().omitEmptyStrings();
66     static final Splitter SEMI_SPACE = Splitter.on(';').trimResults().omitEmptyStrings();
67     private static final CLDRConfig config = CLDRConfig.getInstance();
68 
69     static final List<String> COMMON_AND_SEED =
70             ImmutableList.of(CLDRPaths.COMMON_DIRECTORY, CLDRPaths.SEED_DIRECTORY);
71 
main(String[] args)72     public static void main(String[] args) {
73         new TestAttributeValues().run(args);
74     }
75 
TestValid()76     public void TestValid() {
77         String dtdTypeArg = params.props == null ? null : (String) params.props.get("dtdtype");
78 
79         // short- circuits for testing. null means do all
80         Set<DtdType> checkTypes =
81                 dtdTypeArg == null
82                         ? DtdType.STANDARD_SET
83                         : Collections.singleton(DtdType.valueOf(dtdTypeArg));
84         ImmutableSet<ValueStatus> showStatuses =
85                 null; // ImmutableSet.of(ValueStatus.invalid, ValueStatus.unknown);
86 
87         for (DtdType dtdType : checkTypes) {
88             PathChecker pathChecker = new PathChecker(this, DtdData.getInstance(dtdType));
89             for (String mainDirs : COMMON_AND_SEED) {
90                 Set<String> files = new TreeSet<>();
91                 for (String stringDir : dtdType.directories) {
92                     addXMLFiles(dtdType, mainDirs + stringDir, files);
93                     if (isVerbose())
94                         synchronized (pathChecker.testLog) {
95                             warnln(mainDirs + stringDir);
96                         }
97                 }
98                 Stream<String> stream = SERIAL ? files.stream() : files.parallelStream();
99                 stream.forEach(file -> checkFile(pathChecker, file));
100 
101                 //                for (String file : files) {
102                 //                    checkFile(pathChecker, file);
103                 //                }
104             }
105             pathChecker.show(isVerbose(), showStatuses);
106         }
107         //        List<String> localesToTest = Arrays.asList("en", "root"); // , "zh", "hi", "ja",
108         // "ru", "cy"
109         //        Set<String> localesToTest =
110         // config.getCommonAndSeedAndMainAndAnnotationsFactory().getAvailable();
111         //        // TODO, add all other files
112 
113         //        for (String locale : localesToTest) {
114         //            CLDRFile file = config.getCLDRFile(locale, false);
115         //            for (String dpath : file) {
116         //                String path = file.getFullXPath(dpath);
117         //                pathChecker.checkPath(path);
118         //            }
119         //        }
120     }
121 
122     static final Set<String> CLDR_LOCALES =
123             ImmutableSortedSet.copyOf(
124                     StandardCodes.make().getLocaleCoverageLocales(Organization.cldr).stream()
125                             .map(x -> x + ".xml")
126                             .collect(Collectors.toSet()));
127 
addXMLFiles(DtdType dtdType, String path, Set<String> files)128     private void addXMLFiles(DtdType dtdType, String path, Set<String> files) {
129         File dirFile = new File(path);
130         if (!dirFile.exists()) {
131             return;
132         }
133         if (!dirFile.isDirectory()) {
134             //            if (getInclusion() <= 5
135             //                && dtdType == DtdType.ldml) {
136             //                if (path.contains("/annotationsDerived/")) {
137             //                    return;
138             //                }
139             //                String ending = path.substring(path.lastIndexOf('/')+1);
140             //                if (!CLDR_LOCALES.contains(ending)) {
141             //                    return;
142             //                }
143             //            }
144             files.add(path);
145         } else {
146             for (String file : dirFile.list()) {
147                 String localeID = file.replace(".xml", "");
148                 if (StandardCodes.isLocaleAtLeastBasic(localeID)) {
149                     addXMLFiles(dtdType, path + "/" + file, files);
150                 }
151             }
152         }
153     }
154 
checkFile(PathChecker pathChecker, String fullFile)155     private void checkFile(PathChecker pathChecker, String fullFile) {
156         if (!fullFile.endsWith(".xml")) {
157             return;
158         }
159         pathChecker.fileCount.incrementAndGet();
160         //        if (isVerbose()) synchronized (this) {
161         //            logln(fullFile);
162         //        }
163         XMLInputFactory f = XMLInputFactory.newInstance();
164         //        XMLInputFactory f =
165         // XMLInputFactory.newFactory("org.apache.xerces.jaxp.SAXParserFactoryImpl",
166         //            ClassLoader.getSystemClassLoader());
167 
168         int _elementCount = 0;
169         int _attributeCount = 0;
170         String lastElement = null;
171 
172         try {
173             XMLStreamReader r = null;
174             try (InputStream fis = new FileInputStream(fullFile)) {
175                 r = f.createXMLStreamReader(fullFile, fis);
176                 String element = null;
177                 try {
178                     while (r.hasNext()) {
179                         switch (r.next()) {
180                             case XMLStreamConstants.START_ELEMENT:
181                                 element = r.getLocalName();
182                                 lastElement = element;
183                                 ++_elementCount;
184                                 int attributeSize = r.getAttributeCount();
185                                 for (int i = 0; i < attributeSize; ++i) {
186                                     ++_attributeCount;
187                                     String attribute = r.getAttributeLocalName(i);
188                                     String attributeValue = r.getAttributeValue(i);
189                                     pathChecker.checkAttribute(element, attribute, attributeValue);
190                                 }
191                                 break;
192                         }
193                     }
194                 } catch (XMLStreamException e) {
195                     synchronized (pathChecker.testLog) {
196                         pathChecker.testLog.errln(fullFile + "error");
197                     }
198                     e.printStackTrace(pathChecker.testLog.getLogPrintWriter());
199                 }
200                 // XMLFileReader.read("noId", inputStreamReader, -1, true, myHandler);
201             } catch (XMLStreamException e) {
202                 throw (IllegalArgumentException)
203                         new IllegalArgumentException("Can't read " + fullFile).initCause(e);
204             } catch (Throwable e) {
205                 if (r == null) throw e;
206                 throw new IllegalArgumentException(" at " + r.getLocation(), e);
207             }
208         } catch (Exception e) {
209             e.printStackTrace(this.getErrorLogPrintWriter());
210             errln("Exception occured in " + fullFile + " after parsing " + lastElement + " - " + e);
211         }
212         pathChecker.elementCount.addAndGet(_elementCount);
213         pathChecker.attributeCount.addAndGet(_attributeCount);
214     }
215 
216     static class PathChecker {
217         private final ChainedMap.M5<ValueStatus, String, String, String, Boolean> valueStatusInfo =
218                 ChainedMap.of(
219                         new TreeMap(), new TreeMap(), new TreeMap(), new TreeMap(), Boolean.class);
220         private final Set<String> seen = new HashSet<>();
221         private final Map<String, Map<String, Map<String, Boolean>>> seenEAV =
222                 new ConcurrentHashMap<>();
223         private final TestFmwk testLog;
224         private final DtdData dtdData;
225         private final Multimap<String, String> needsTesting;
226         private final Map<String, String> matchValues;
227 
228         private final AtomicInteger fileCount = new AtomicInteger();
229         private final AtomicInteger elementCount = new AtomicInteger();
230         private final AtomicInteger attributeCount = new AtomicInteger();
231 
PathChecker(TestFmwk testLog, DtdData dtdData)232         public PathChecker(TestFmwk testLog, DtdData dtdData) {
233             this.testLog = testLog;
234             this.dtdData = dtdData;
235             Map<String, String> _matchValues = new TreeMap<>();
236             needsTesting = dtdData.getNonEnumerated(_matchValues);
237             matchValues = ImmutableMap.copyOf(_matchValues);
238         }
239 
checkPath(String path)240         private void checkPath(String path) {
241             if (seen.contains(path)) {
242                 return;
243             }
244             seen.add(path);
245             if (path.contains("length-point")) {
246                 int debug = 0;
247             }
248             XPathParts parts = XPathParts.getFrozenInstance(path);
249             for (int elementIndex = 0; elementIndex < parts.size(); ++elementIndex) {
250                 String element = parts.getElement(elementIndex);
251                 for (Entry<String, String> entry : parts.getAttributes(elementIndex).entrySet()) {
252                     String attribute = entry.getKey();
253                     String attrValue = entry.getValue();
254                     checkAttribute(element, attribute, attrValue);
255                 }
256             }
257         }
258 
checkElement(String element, Attributes atts)259         public void checkElement(String element, Attributes atts) {
260             int length = atts.getLength();
261             for (int i = 0; i < length; ++i) {
262                 checkAttribute(element, atts.getQName(i), atts.getValue(i));
263             }
264         }
265 
checkAttribute(String element, String attribute, String attrValue)266         private void checkAttribute(String element, String attribute, String attrValue) {
267             // skip cases we know we don't need to test
268             if (!needsTesting.containsEntry(element, attribute)) {
269                 return;
270             }
271             // check if we've seen the EAV yet
272             // we don't need to synchronize because a miss isn't serious
273             Map<String, Map<String, Boolean>> sub = seenEAV.get(element);
274             if (sub == null) {
275                 Map<String, Map<String, Boolean>> subAlready =
276                         seenEAV.putIfAbsent(element, sub = new ConcurrentHashMap<>());
277                 if (subAlready != null) {
278                     sub = subAlready; // discards empty map
279                 }
280             }
281             Map<String, Boolean> set = sub.get(attribute);
282             if (set == null) {
283                 Map<String, Boolean> setAlready =
284                         sub.putIfAbsent(attribute, set = new ConcurrentHashMap<>());
285                 if (setAlready != null) {
286                     set = setAlready; // discards empty map
287                 }
288             }
289             if (set.putIfAbsent(attrValue, Boolean.TRUE) != null) {
290                 return;
291             }
292 
293             // get the status & store
294             ValueStatus valueStatus = dtdData.getValueStatus(element, attribute, attrValue);
295             if (valueStatus != ValueStatus.valid) {
296                 // Set breakpoint here for debugging (referenced from
297                 // http://cldr.unicode.org/development/testattributevalues)
298                 dtdData.getValueStatus(element, attribute, attrValue);
299             }
300             synchronized (valueStatusInfo) {
301                 valueStatusInfo.put(valueStatus, element, attribute, attrValue, Boolean.TRUE);
302             }
303         }
304 
show(boolean verbose, ImmutableSet<ValueStatus> retain)305         void show(boolean verbose, ImmutableSet<ValueStatus> retain) {
306             if (dtdData.dtdType == DtdType.keyboard3
307                     && testLog.logKnownIssue("CLDR-14974", "skipping for keyboard")) {
308                 testLog.warnln("Skipping for keyboard3");
309             }
310             boolean haveProblems = false;
311             for (ValueStatus valueStatus : ValueStatus.values()) {
312                 if (valueStatus == ValueStatus.valid) {
313                     continue;
314                 }
315                 M4<String, String, String, Boolean> info = valueStatusInfo.get(valueStatus);
316                 if (info != null) {
317                     haveProblems = true;
318                 }
319             }
320 
321             if (!verbose && !haveProblems) {
322                 return;
323             }
324             StringBuilder out = new StringBuilder();
325             out.append(
326                     "\nIf the test fails, look at https://cldr.unicode.org/development/cldr-development-site/testattributevalues\n");
327 
328             out.append("file\tCount:\t" + dtdData.dtdType + "\t" + fileCount + "\n");
329             out.append("element\tCount:\t" + dtdData.dtdType + "\t" + elementCount + "\n");
330             out.append("attribute\tCount:\t" + dtdData.dtdType + "\t" + attributeCount + "\n");
331 
332             out.append(
333                     "\nStatus\tDtdType\tElement\tAttribute\tMatch expression\t#Failures\tFailing values\n");
334 
335             for (Entry<ValueStatus, Map<String, Map<String, Map<String, Boolean>>>> entry :
336                     valueStatusInfo) {
337                 ValueStatus valueStatus = entry.getKey();
338                 if (retain != null && !retain.contains(valueStatus)) {
339                     continue;
340                 }
341                 if (!verbose && haveProblems && valueStatus == ValueStatus.valid) {
342                     continue;
343                 }
344                 for (Entry<String, Map<String, Map<String, Boolean>>> entry2 :
345                         entry.getValue().entrySet()) {
346                     String elementName = entry2.getKey();
347                     for (Entry<String, Map<String, Boolean>> entry3 :
348                             entry2.getValue().entrySet()) {
349                         String attributeName = entry3.getKey();
350                         Set<String> validFound = entry3.getValue().keySet();
351                         String matchValue = matchValues.get(elementName + "\t" + attributeName);
352                         out.append(
353                                 valueStatus
354                                         + "\t"
355                                         + dtdData.dtdType
356                                         + "\t"
357                                         + elementName
358                                         + "\t"
359                                         + attributeName
360                                         + "\t"
361                                         + (matchValue == null ? "" : matchValue)
362                                         + "\t"
363                                         + validFound.size()
364                                         + "\t"
365                                         + Joiner.on(", ").join(validFound)
366                                         + "\n");
367                         if (valueStatus == ValueStatus.valid)
368                             try {
369                                 LstrType lstr = LstrType.fromString(elementName);
370                                 Map<String, Validity.Status> codeToStatus =
371                                         VALIDITY.getCodeToStatus(lstr);
372                                 Set<String> missing = new TreeSet<>(codeToStatus.keySet());
373                                 if (lstr == LstrType.variant) {
374                                     for (String item : validFound) {
375                                         missing.remove(item.toLowerCase(Locale.ROOT));
376                                     }
377                                 } else {
378                                     missing.removeAll(validFound);
379                                 }
380                                 Set<String> deprecated =
381                                         VALIDITY.getStatusToCodes(lstr).get(LstrField.Deprecated);
382                                 if (deprecated != null) {
383                                     missing.removeAll(deprecated);
384                                 }
385                                 if (!missing.isEmpty()) {
386                                     out.append(
387                                             "unused"
388                                                     + "\t"
389                                                     + dtdData.dtdType
390                                                     + "\t"
391                                                     + elementName
392                                                     + "\t"
393                                                     + attributeName
394                                                     + "\t"
395                                                     + ""
396                                                     + "\t"
397                                                     + ""
398                                                     + "\t"
399                                                     + Joiner.on(", ").join(missing)
400                                                     + "\n");
401                                 }
402                             } catch (Exception e) {
403                             }
404                     }
405                 }
406             }
407             synchronized (testLog) {
408                 testLog.errln(out.toString());
409             }
410         }
411     }
412 
xTestA()413     public void xTestA() {
414         MatcherPattern mp = AttributeValueValidity.getMatcherPattern("$language");
415         for (String language : LanguageInfo.getAvailable()) {
416             if (mp.matches(language, null)) {
417                 LanguageInfo languageInfo = LanguageInfo.get(language);
418                 show(language, languageInfo);
419             }
420         }
421     }
422 
show(String language, LanguageInfo languageInfo)423     private void show(String language, LanguageInfo languageInfo) {
424         logln(
425                 language
426                         + "\t"
427                         + config.getEnglish().getName(CLDRFile.LANGUAGE_NAME, language)
428                         + "\t"
429                         + languageInfo);
430     }
431 
432     //    public void TestAttributeValueValidity() {
433     //        for (String test : Arrays.asList(
434     //            "supplementalData;     territoryAlias;     replacement;    AA")) {
435     //            quickTest(test);
436     //        }
437     //    }
438 
quickTest(String test)439     private Status quickTest(String test) {
440         List<String> parts = SEMI_SPACE.splitToList(test);
441         Output<String> reason = new Output<>();
442         Status value =
443                 AttributeValueValidity.check(
444                         DtdData.getInstance(DtdType.valueOf(parts.get(0))),
445                         parts.get(1),
446                         parts.get(2),
447                         parts.get(3),
448                         reason);
449         if (value != Status.ok) {
450             errln(test + "\t" + value + "\t" + reason);
451         }
452         return value;
453     }
454 
oldTestSingleFile()455     public void oldTestSingleFile() {
456         Errors errors = new Errors();
457         Set<AttributeValueSpec> missing = new TreeSet<>();
458         VerifyAttributeValues.check(CLDRPaths.MAIN_DIRECTORY + "en.xml", errors, missing);
459         for (AttributeValueSpec entry1 : missing) {
460             errln("Missing Tests: " + entry1);
461         }
462         for (R3<String, AttributeValueSpec, String> item : errors.getRows()) {
463             errln(item.get0() + "; \t" + item.get2() + "; \t" + item.get1());
464         }
465     }
466 
oldTestCoreValidity()467     public void oldTestCoreValidity() {
468         int maxPerDirectory =
469                 // getInclusion() <= 5 ? 20 :
470                 Integer.MAX_VALUE;
471         Matcher fileMatcher = null;
472         Set<AttributeValueSpec> missing = new LinkedHashSet<>();
473         Errors errors = new Errors();
474         VerifyAttributeValues.findAttributeValues(
475                 BASE_DIR,
476                 maxPerDirectory,
477                 fileMatcher,
478                 errors,
479                 missing,
480                 isVerbose() ? getErrorLogPrintWriter() : null);
481 
482         int count = 0;
483         for (Entry<AttributeValidityInfo, String> entry :
484                 AttributeValueValidity.getReadFailures().entrySet()) {
485             errln("Read error: " + ++count + "\t" + entry.getKey() + " => " + entry.getValue());
486         }
487 
488         count = 0;
489         for (R3<DtdType, String, String> entry1 : AttributeValueValidity.getTodoTests()) {
490             warnln(
491                     "Unfinished Test: "
492                             + ++count
493                             + "\t"
494                             + new AttributeValueSpec(
495                                             entry1.get0(), entry1.get1(), entry1.get2(), "")
496                                     .toString());
497         }
498 
499         count = 0;
500         for (AttributeValueSpec entry1 : missing) {
501             errln("Missing Test: " + entry1);
502         }
503 
504         count = 0;
505         for (R3<String, AttributeValueSpec, String> item : errors.getRows()) {
506             if ("deprecated".equals(item.get2()))
507                 errln(
508                         "Deprecated: "
509                                 + ++count
510                                 + "; \t"
511                                 + item.get0()
512                                 + "; \t"
513                                 + item.get1().type
514                                 + "; \t"
515                                 + item.get1().element
516                                 + "; \t"
517                                 + item.get1().attribute
518                                 + "; \t"
519                                 + item.get1().attributeValue
520                                 + "; \t"
521                                 + item.get2());
522         }
523 
524         count = 0;
525         for (R3<String, AttributeValueSpec, String> item : errors.getRows()) {
526             if (!"deprecated".equals(item.get2()))
527                 errln(
528                         "Invalid: "
529                                 + ++count
530                                 + "; \t"
531                                 + item.get0()
532                                 + "; \t"
533                                 + item.get1().type
534                                 + "; \t"
535                                 + item.get1().element
536                                 + "; \t"
537                                 + item.get1().attribute
538                                 + "; \t"
539                                 + item.get1().attributeValue
540                                 + "; \t"
541                                 + item.get2());
542         }
543     }
544 }
545