1 package org.unicode.cldr.unittest; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.base.Splitter; 5 import com.google.common.collect.ImmutableList; 6 import com.google.common.collect.ImmutableMap; 7 import com.google.common.collect.ImmutableSet; 8 import com.google.common.collect.ImmutableSortedSet; 9 import com.google.common.collect.Multimap; 10 import com.ibm.icu.dev.test.TestFmwk; 11 import com.ibm.icu.impl.Row.R3; 12 import com.ibm.icu.util.Output; 13 import java.io.File; 14 import java.io.FileInputStream; 15 import java.io.InputStream; 16 import java.util.Collections; 17 import java.util.HashSet; 18 import java.util.LinkedHashSet; 19 import java.util.List; 20 import java.util.Locale; 21 import java.util.Map; 22 import java.util.Map.Entry; 23 import java.util.Set; 24 import java.util.TreeMap; 25 import java.util.TreeSet; 26 import java.util.concurrent.ConcurrentHashMap; 27 import java.util.concurrent.atomic.AtomicInteger; 28 import java.util.regex.Matcher; 29 import java.util.stream.Collectors; 30 import java.util.stream.Stream; 31 import javax.xml.stream.XMLInputFactory; 32 import javax.xml.stream.XMLStreamConstants; 33 import javax.xml.stream.XMLStreamException; 34 import javax.xml.stream.XMLStreamReader; 35 import org.unicode.cldr.tool.VerifyAttributeValues; 36 import org.unicode.cldr.tool.VerifyAttributeValues.Errors; 37 import org.unicode.cldr.util.AttributeValueValidity; 38 import org.unicode.cldr.util.AttributeValueValidity.AttributeValueSpec; 39 import org.unicode.cldr.util.AttributeValueValidity.MatcherPattern; 40 import org.unicode.cldr.util.AttributeValueValidity.Status; 41 import org.unicode.cldr.util.CLDRConfig; 42 import org.unicode.cldr.util.CLDRFile; 43 import org.unicode.cldr.util.CLDRPaths; 44 import org.unicode.cldr.util.ChainedMap; 45 import org.unicode.cldr.util.ChainedMap.M4; 46 import org.unicode.cldr.util.DtdData; 47 import org.unicode.cldr.util.DtdData.ValueStatus; 48 import org.unicode.cldr.util.DtdType; 49 import org.unicode.cldr.util.LanguageInfo; 50 import org.unicode.cldr.util.Organization; 51 import org.unicode.cldr.util.StandardCodes; 52 import org.unicode.cldr.util.StandardCodes.LstrField; 53 import org.unicode.cldr.util.StandardCodes.LstrType; 54 import org.unicode.cldr.util.SupplementalDataInfo.AttributeValidityInfo; 55 import org.unicode.cldr.util.Validity; 56 import org.unicode.cldr.util.XPathParts; 57 import org.xml.sax.Attributes; 58 59 public class TestAttributeValues extends TestFmwk { 60 private static final boolean SERIAL = false; 61 62 private static final Validity VALIDITY = Validity.getInstance(); 63 private static final File BASE_DIR = new File(CLDRPaths.BASE_DIRECTORY); 64 public static final Joiner SPACE_JOINER = Joiner.on(' '); 65 public static final Splitter SPACE_SPLITTER = Splitter.on(' ').trimResults().omitEmptyStrings(); 66 static final Splitter SEMI_SPACE = Splitter.on(';').trimResults().omitEmptyStrings(); 67 private static final CLDRConfig config = CLDRConfig.getInstance(); 68 69 static final List<String> COMMON_AND_SEED = 70 ImmutableList.of(CLDRPaths.COMMON_DIRECTORY, CLDRPaths.SEED_DIRECTORY); 71 main(String[] args)72 public static void main(String[] args) { 73 new TestAttributeValues().run(args); 74 } 75 TestValid()76 public void TestValid() { 77 String dtdTypeArg = params.props == null ? null : (String) params.props.get("dtdtype"); 78 79 // short- circuits for testing. null means do all 80 Set<DtdType> checkTypes = 81 dtdTypeArg == null 82 ? DtdType.STANDARD_SET 83 : Collections.singleton(DtdType.valueOf(dtdTypeArg)); 84 ImmutableSet<ValueStatus> showStatuses = 85 null; // ImmutableSet.of(ValueStatus.invalid, ValueStatus.unknown); 86 87 for (DtdType dtdType : checkTypes) { 88 PathChecker pathChecker = new PathChecker(this, DtdData.getInstance(dtdType)); 89 for (String mainDirs : COMMON_AND_SEED) { 90 Set<String> files = new TreeSet<>(); 91 for (String stringDir : dtdType.directories) { 92 addXMLFiles(dtdType, mainDirs + stringDir, files); 93 if (isVerbose()) 94 synchronized (pathChecker.testLog) { 95 warnln(mainDirs + stringDir); 96 } 97 } 98 Stream<String> stream = SERIAL ? files.stream() : files.parallelStream(); 99 stream.forEach(file -> checkFile(pathChecker, file)); 100 101 // for (String file : files) { 102 // checkFile(pathChecker, file); 103 // } 104 } 105 pathChecker.show(isVerbose(), showStatuses); 106 } 107 // List<String> localesToTest = Arrays.asList("en", "root"); // , "zh", "hi", "ja", 108 // "ru", "cy" 109 // Set<String> localesToTest = 110 // config.getCommonAndSeedAndMainAndAnnotationsFactory().getAvailable(); 111 // // TODO, add all other files 112 113 // for (String locale : localesToTest) { 114 // CLDRFile file = config.getCLDRFile(locale, false); 115 // for (String dpath : file) { 116 // String path = file.getFullXPath(dpath); 117 // pathChecker.checkPath(path); 118 // } 119 // } 120 } 121 122 static final Set<String> CLDR_LOCALES = 123 ImmutableSortedSet.copyOf( 124 StandardCodes.make().getLocaleCoverageLocales(Organization.cldr).stream() 125 .map(x -> x + ".xml") 126 .collect(Collectors.toSet())); 127 addXMLFiles(DtdType dtdType, String path, Set<String> files)128 private void addXMLFiles(DtdType dtdType, String path, Set<String> files) { 129 File dirFile = new File(path); 130 if (!dirFile.exists()) { 131 return; 132 } 133 if (!dirFile.isDirectory()) { 134 // if (getInclusion() <= 5 135 // && dtdType == DtdType.ldml) { 136 // if (path.contains("/annotationsDerived/")) { 137 // return; 138 // } 139 // String ending = path.substring(path.lastIndexOf('/')+1); 140 // if (!CLDR_LOCALES.contains(ending)) { 141 // return; 142 // } 143 // } 144 files.add(path); 145 } else { 146 for (String file : dirFile.list()) { 147 String localeID = file.replace(".xml", ""); 148 if (StandardCodes.isLocaleAtLeastBasic(localeID)) { 149 addXMLFiles(dtdType, path + "/" + file, files); 150 } 151 } 152 } 153 } 154 checkFile(PathChecker pathChecker, String fullFile)155 private void checkFile(PathChecker pathChecker, String fullFile) { 156 if (!fullFile.endsWith(".xml")) { 157 return; 158 } 159 pathChecker.fileCount.incrementAndGet(); 160 // if (isVerbose()) synchronized (this) { 161 // logln(fullFile); 162 // } 163 XMLInputFactory f = XMLInputFactory.newInstance(); 164 // XMLInputFactory f = 165 // XMLInputFactory.newFactory("org.apache.xerces.jaxp.SAXParserFactoryImpl", 166 // ClassLoader.getSystemClassLoader()); 167 168 int _elementCount = 0; 169 int _attributeCount = 0; 170 String lastElement = null; 171 172 try { 173 XMLStreamReader r = null; 174 try (InputStream fis = new FileInputStream(fullFile)) { 175 r = f.createXMLStreamReader(fullFile, fis); 176 String element = null; 177 try { 178 while (r.hasNext()) { 179 switch (r.next()) { 180 case XMLStreamConstants.START_ELEMENT: 181 element = r.getLocalName(); 182 lastElement = element; 183 ++_elementCount; 184 int attributeSize = r.getAttributeCount(); 185 for (int i = 0; i < attributeSize; ++i) { 186 ++_attributeCount; 187 String attribute = r.getAttributeLocalName(i); 188 String attributeValue = r.getAttributeValue(i); 189 pathChecker.checkAttribute(element, attribute, attributeValue); 190 } 191 break; 192 } 193 } 194 } catch (XMLStreamException e) { 195 synchronized (pathChecker.testLog) { 196 pathChecker.testLog.errln(fullFile + "error"); 197 } 198 e.printStackTrace(pathChecker.testLog.getLogPrintWriter()); 199 } 200 // XMLFileReader.read("noId", inputStreamReader, -1, true, myHandler); 201 } catch (XMLStreamException e) { 202 throw (IllegalArgumentException) 203 new IllegalArgumentException("Can't read " + fullFile).initCause(e); 204 } catch (Throwable e) { 205 if (r == null) throw e; 206 throw new IllegalArgumentException(" at " + r.getLocation(), e); 207 } 208 } catch (Exception e) { 209 e.printStackTrace(this.getErrorLogPrintWriter()); 210 errln("Exception occured in " + fullFile + " after parsing " + lastElement + " - " + e); 211 } 212 pathChecker.elementCount.addAndGet(_elementCount); 213 pathChecker.attributeCount.addAndGet(_attributeCount); 214 } 215 216 static class PathChecker { 217 private final ChainedMap.M5<ValueStatus, String, String, String, Boolean> valueStatusInfo = 218 ChainedMap.of( 219 new TreeMap(), new TreeMap(), new TreeMap(), new TreeMap(), Boolean.class); 220 private final Set<String> seen = new HashSet<>(); 221 private final Map<String, Map<String, Map<String, Boolean>>> seenEAV = 222 new ConcurrentHashMap<>(); 223 private final TestFmwk testLog; 224 private final DtdData dtdData; 225 private final Multimap<String, String> needsTesting; 226 private final Map<String, String> matchValues; 227 228 private final AtomicInteger fileCount = new AtomicInteger(); 229 private final AtomicInteger elementCount = new AtomicInteger(); 230 private final AtomicInteger attributeCount = new AtomicInteger(); 231 PathChecker(TestFmwk testLog, DtdData dtdData)232 public PathChecker(TestFmwk testLog, DtdData dtdData) { 233 this.testLog = testLog; 234 this.dtdData = dtdData; 235 Map<String, String> _matchValues = new TreeMap<>(); 236 needsTesting = dtdData.getNonEnumerated(_matchValues); 237 matchValues = ImmutableMap.copyOf(_matchValues); 238 } 239 checkPath(String path)240 private void checkPath(String path) { 241 if (seen.contains(path)) { 242 return; 243 } 244 seen.add(path); 245 if (path.contains("length-point")) { 246 int debug = 0; 247 } 248 XPathParts parts = XPathParts.getFrozenInstance(path); 249 for (int elementIndex = 0; elementIndex < parts.size(); ++elementIndex) { 250 String element = parts.getElement(elementIndex); 251 for (Entry<String, String> entry : parts.getAttributes(elementIndex).entrySet()) { 252 String attribute = entry.getKey(); 253 String attrValue = entry.getValue(); 254 checkAttribute(element, attribute, attrValue); 255 } 256 } 257 } 258 checkElement(String element, Attributes atts)259 public void checkElement(String element, Attributes atts) { 260 int length = atts.getLength(); 261 for (int i = 0; i < length; ++i) { 262 checkAttribute(element, atts.getQName(i), atts.getValue(i)); 263 } 264 } 265 checkAttribute(String element, String attribute, String attrValue)266 private void checkAttribute(String element, String attribute, String attrValue) { 267 // skip cases we know we don't need to test 268 if (!needsTesting.containsEntry(element, attribute)) { 269 return; 270 } 271 // check if we've seen the EAV yet 272 // we don't need to synchronize because a miss isn't serious 273 Map<String, Map<String, Boolean>> sub = seenEAV.get(element); 274 if (sub == null) { 275 Map<String, Map<String, Boolean>> subAlready = 276 seenEAV.putIfAbsent(element, sub = new ConcurrentHashMap<>()); 277 if (subAlready != null) { 278 sub = subAlready; // discards empty map 279 } 280 } 281 Map<String, Boolean> set = sub.get(attribute); 282 if (set == null) { 283 Map<String, Boolean> setAlready = 284 sub.putIfAbsent(attribute, set = new ConcurrentHashMap<>()); 285 if (setAlready != null) { 286 set = setAlready; // discards empty map 287 } 288 } 289 if (set.putIfAbsent(attrValue, Boolean.TRUE) != null) { 290 return; 291 } 292 293 // get the status & store 294 ValueStatus valueStatus = dtdData.getValueStatus(element, attribute, attrValue); 295 if (valueStatus != ValueStatus.valid) { 296 // Set breakpoint here for debugging (referenced from 297 // http://cldr.unicode.org/development/testattributevalues) 298 dtdData.getValueStatus(element, attribute, attrValue); 299 } 300 synchronized (valueStatusInfo) { 301 valueStatusInfo.put(valueStatus, element, attribute, attrValue, Boolean.TRUE); 302 } 303 } 304 show(boolean verbose, ImmutableSet<ValueStatus> retain)305 void show(boolean verbose, ImmutableSet<ValueStatus> retain) { 306 if (dtdData.dtdType == DtdType.keyboard3 307 && testLog.logKnownIssue("CLDR-14974", "skipping for keyboard")) { 308 testLog.warnln("Skipping for keyboard3"); 309 } 310 boolean haveProblems = false; 311 for (ValueStatus valueStatus : ValueStatus.values()) { 312 if (valueStatus == ValueStatus.valid) { 313 continue; 314 } 315 M4<String, String, String, Boolean> info = valueStatusInfo.get(valueStatus); 316 if (info != null) { 317 haveProblems = true; 318 } 319 } 320 321 if (!verbose && !haveProblems) { 322 return; 323 } 324 StringBuilder out = new StringBuilder(); 325 out.append( 326 "\nIf the test fails, look at https://cldr.unicode.org/development/cldr-development-site/testattributevalues\n"); 327 328 out.append("file\tCount:\t" + dtdData.dtdType + "\t" + fileCount + "\n"); 329 out.append("element\tCount:\t" + dtdData.dtdType + "\t" + elementCount + "\n"); 330 out.append("attribute\tCount:\t" + dtdData.dtdType + "\t" + attributeCount + "\n"); 331 332 out.append( 333 "\nStatus\tDtdType\tElement\tAttribute\tMatch expression\t#Failures\tFailing values\n"); 334 335 for (Entry<ValueStatus, Map<String, Map<String, Map<String, Boolean>>>> entry : 336 valueStatusInfo) { 337 ValueStatus valueStatus = entry.getKey(); 338 if (retain != null && !retain.contains(valueStatus)) { 339 continue; 340 } 341 if (!verbose && haveProblems && valueStatus == ValueStatus.valid) { 342 continue; 343 } 344 for (Entry<String, Map<String, Map<String, Boolean>>> entry2 : 345 entry.getValue().entrySet()) { 346 String elementName = entry2.getKey(); 347 for (Entry<String, Map<String, Boolean>> entry3 : 348 entry2.getValue().entrySet()) { 349 String attributeName = entry3.getKey(); 350 Set<String> validFound = entry3.getValue().keySet(); 351 String matchValue = matchValues.get(elementName + "\t" + attributeName); 352 out.append( 353 valueStatus 354 + "\t" 355 + dtdData.dtdType 356 + "\t" 357 + elementName 358 + "\t" 359 + attributeName 360 + "\t" 361 + (matchValue == null ? "" : matchValue) 362 + "\t" 363 + validFound.size() 364 + "\t" 365 + Joiner.on(", ").join(validFound) 366 + "\n"); 367 if (valueStatus == ValueStatus.valid) 368 try { 369 LstrType lstr = LstrType.fromString(elementName); 370 Map<String, Validity.Status> codeToStatus = 371 VALIDITY.getCodeToStatus(lstr); 372 Set<String> missing = new TreeSet<>(codeToStatus.keySet()); 373 if (lstr == LstrType.variant) { 374 for (String item : validFound) { 375 missing.remove(item.toLowerCase(Locale.ROOT)); 376 } 377 } else { 378 missing.removeAll(validFound); 379 } 380 Set<String> deprecated = 381 VALIDITY.getStatusToCodes(lstr).get(LstrField.Deprecated); 382 if (deprecated != null) { 383 missing.removeAll(deprecated); 384 } 385 if (!missing.isEmpty()) { 386 out.append( 387 "unused" 388 + "\t" 389 + dtdData.dtdType 390 + "\t" 391 + elementName 392 + "\t" 393 + attributeName 394 + "\t" 395 + "" 396 + "\t" 397 + "" 398 + "\t" 399 + Joiner.on(", ").join(missing) 400 + "\n"); 401 } 402 } catch (Exception e) { 403 } 404 } 405 } 406 } 407 synchronized (testLog) { 408 testLog.errln(out.toString()); 409 } 410 } 411 } 412 xTestA()413 public void xTestA() { 414 MatcherPattern mp = AttributeValueValidity.getMatcherPattern("$language"); 415 for (String language : LanguageInfo.getAvailable()) { 416 if (mp.matches(language, null)) { 417 LanguageInfo languageInfo = LanguageInfo.get(language); 418 show(language, languageInfo); 419 } 420 } 421 } 422 show(String language, LanguageInfo languageInfo)423 private void show(String language, LanguageInfo languageInfo) { 424 logln( 425 language 426 + "\t" 427 + config.getEnglish().getName(CLDRFile.LANGUAGE_NAME, language) 428 + "\t" 429 + languageInfo); 430 } 431 432 // public void TestAttributeValueValidity() { 433 // for (String test : Arrays.asList( 434 // "supplementalData; territoryAlias; replacement; AA")) { 435 // quickTest(test); 436 // } 437 // } 438 quickTest(String test)439 private Status quickTest(String test) { 440 List<String> parts = SEMI_SPACE.splitToList(test); 441 Output<String> reason = new Output<>(); 442 Status value = 443 AttributeValueValidity.check( 444 DtdData.getInstance(DtdType.valueOf(parts.get(0))), 445 parts.get(1), 446 parts.get(2), 447 parts.get(3), 448 reason); 449 if (value != Status.ok) { 450 errln(test + "\t" + value + "\t" + reason); 451 } 452 return value; 453 } 454 oldTestSingleFile()455 public void oldTestSingleFile() { 456 Errors errors = new Errors(); 457 Set<AttributeValueSpec> missing = new TreeSet<>(); 458 VerifyAttributeValues.check(CLDRPaths.MAIN_DIRECTORY + "en.xml", errors, missing); 459 for (AttributeValueSpec entry1 : missing) { 460 errln("Missing Tests: " + entry1); 461 } 462 for (R3<String, AttributeValueSpec, String> item : errors.getRows()) { 463 errln(item.get0() + "; \t" + item.get2() + "; \t" + item.get1()); 464 } 465 } 466 oldTestCoreValidity()467 public void oldTestCoreValidity() { 468 int maxPerDirectory = 469 // getInclusion() <= 5 ? 20 : 470 Integer.MAX_VALUE; 471 Matcher fileMatcher = null; 472 Set<AttributeValueSpec> missing = new LinkedHashSet<>(); 473 Errors errors = new Errors(); 474 VerifyAttributeValues.findAttributeValues( 475 BASE_DIR, 476 maxPerDirectory, 477 fileMatcher, 478 errors, 479 missing, 480 isVerbose() ? getErrorLogPrintWriter() : null); 481 482 int count = 0; 483 for (Entry<AttributeValidityInfo, String> entry : 484 AttributeValueValidity.getReadFailures().entrySet()) { 485 errln("Read error: " + ++count + "\t" + entry.getKey() + " => " + entry.getValue()); 486 } 487 488 count = 0; 489 for (R3<DtdType, String, String> entry1 : AttributeValueValidity.getTodoTests()) { 490 warnln( 491 "Unfinished Test: " 492 + ++count 493 + "\t" 494 + new AttributeValueSpec( 495 entry1.get0(), entry1.get1(), entry1.get2(), "") 496 .toString()); 497 } 498 499 count = 0; 500 for (AttributeValueSpec entry1 : missing) { 501 errln("Missing Test: " + entry1); 502 } 503 504 count = 0; 505 for (R3<String, AttributeValueSpec, String> item : errors.getRows()) { 506 if ("deprecated".equals(item.get2())) 507 errln( 508 "Deprecated: " 509 + ++count 510 + "; \t" 511 + item.get0() 512 + "; \t" 513 + item.get1().type 514 + "; \t" 515 + item.get1().element 516 + "; \t" 517 + item.get1().attribute 518 + "; \t" 519 + item.get1().attributeValue 520 + "; \t" 521 + item.get2()); 522 } 523 524 count = 0; 525 for (R3<String, AttributeValueSpec, String> item : errors.getRows()) { 526 if (!"deprecated".equals(item.get2())) 527 errln( 528 "Invalid: " 529 + ++count 530 + "; \t" 531 + item.get0() 532 + "; \t" 533 + item.get1().type 534 + "; \t" 535 + item.get1().element 536 + "; \t" 537 + item.get1().attribute 538 + "; \t" 539 + item.get1().attributeValue 540 + "; \t" 541 + item.get2()); 542 } 543 } 544 } 545