1 /* 2 ****************************************************************************** 3 * Copyright (C) 2004, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 package org.unicode.cldr.test; 8 9 import static org.unicode.cldr.util.PathUtilities.getNormalizedPath; 10 11 import com.ibm.icu.dev.test.TestFmwk; 12 import com.ibm.icu.text.BreakIterator; 13 import com.ibm.icu.text.DecimalFormat; 14 import com.ibm.icu.text.NumberFormat; 15 import com.ibm.icu.text.UTF16; 16 import com.ibm.icu.text.UnicodeSet; 17 import com.ibm.icu.util.ULocale; 18 import java.io.File; 19 import java.io.IOException; 20 import java.io.PrintWriter; 21 import java.util.Arrays; 22 import java.util.Calendar; 23 import java.util.Collection; 24 import java.util.Date; 25 import java.util.HashMap; 26 import java.util.HashSet; 27 import java.util.Iterator; 28 import java.util.LinkedHashSet; 29 import java.util.List; 30 import java.util.Map; 31 import java.util.Set; 32 import java.util.TreeMap; 33 import java.util.TreeSet; 34 import org.unicode.cldr.draft.FileUtilities; 35 import org.unicode.cldr.test.DisplayAndInputProcessor.NumericType; 36 import org.unicode.cldr.util.*; 37 import org.xml.sax.SAXException; 38 39 /** 40 * Initial version of CLDR tests. Each test is named TextXXX. To run all the tests, use the options 41 * 42 * <blockquote> 43 * 44 * -nothrow 45 * 46 * </blockquote> 47 * 48 * To run a particular set of tests, include their names, like 49 * 50 * <blockquote> 51 * 52 * -nothrow TestForIllegalAttributeValues TestMinimalLocalization 53 * 54 * </blockquote> 55 * 56 * To show more information (logln), add -verbose 57 * 58 * <p>There are some environment variables that can be used with the test. <br> 59 * -DSHOW_FILES=<anything> shows all create/open of files. <br> 60 * -DXML_MATCH=<regular expression> skips all locales that don't match the regular expression <br> 61 * -DXML_MAIN_DIR=<filesystem directory> resets to a different main directory (eg not 62 * cldr/common/main. For example, some of the tools generate into a locale directory like 63 * -DXML_MAIN_DIR=C:\Unicode-CVS2\cldr\common\gen\main\ so this can be used to check that directory. 64 * <br> 65 * -DSKIP_DRAFT=<boolean> skips draft locales if <boolean> is a string starting with T or t 66 */ 67 public class CLDRTest extends TestFmwk { 68 /** privates */ 69 private static String MATCH; 70 71 private static String MAIN_DIR; 72 private static boolean SKIP_DRAFT; 73 private Set<String> locales; 74 private Set<String> languageLocales; 75 private Factory cldrFactory; 76 private CLDRFile resolvedRoot; 77 private CLDRFile resolvedEnglish; 78 private final UnicodeSet commonAndInherited = 79 new UnicodeSet("[[:script=common:][:script=inherited:][:alphabetic=false:]]"); 80 private static final String[] WIDTHS = {"narrow", "wide", "abbreviated", "short"}; 81 private static final String[] MONTHORDAYS = {"day", "month"}; 82 private Map<String, String> localeNameCache = new HashMap<>(); 83 private CLDRFile english = null; 84 85 private Set<String> surveyInfo = new TreeSet<>(); 86 87 /** TestFmwk boilerplate */ main(String[] args)88 public static void main(String[] args) throws Exception { 89 MATCH = System.getProperty("XML_MATCH"); 90 if (MATCH == null) MATCH = ".*"; 91 else System.out.println("Resetting MATCH:" + MATCH); 92 MAIN_DIR = System.getProperty("XML_MAIN_DIR"); 93 if (MAIN_DIR == null) MAIN_DIR = CLDRPaths.MAIN_DIRECTORY; 94 else System.out.println("Resetting MAIN_DIR:" + MAIN_DIR); 95 SKIP_DRAFT = System.getProperty("XML_SKIP_DRAFT") != null; 96 if (SKIP_DRAFT) System.out.println("Skipping Draft locales"); 97 98 double deltaTime = System.currentTimeMillis(); 99 new CLDRTest().run(args); 100 deltaTime = System.currentTimeMillis() - deltaTime; 101 System.out.println("Seconds: " + deltaTime / 1000); 102 } 103 TestZZZZHack()104 public void TestZZZZHack() throws IOException { 105 // hack to get file written at the end of run. 106 PrintWriter surveyFile = 107 FileUtilities.openUTF8Writer(CLDRPaths.GEN_DIRECTORY, "surveyInfo.txt"); 108 for (String s : surveyInfo) { 109 surveyFile.println(s); 110 } 111 surveyFile.close(); 112 } 113 114 /** TestFmwk boilerplate */ CLDRTest()115 public CLDRTest() throws SAXException, IOException { 116 // TODO parameterize the directory and filter 117 cldrFactory = Factory.make(MAIN_DIR, MATCH); 118 // CLDRKey.main(new String[]{"-mde.*"}); 119 locales = cldrFactory.getAvailable(); 120 languageLocales = cldrFactory.getAvailableLanguages(); 121 resolvedRoot = cldrFactory.make(LocaleNames.ROOT, true); 122 /* 123 * PrintWriter out = FileUtilities.openUTF8Writer(Utility.GEN_DIRECTORY + "resolved/", "root.xml"); 124 * CLDRFile temp = (CLDRFile) resolvedRoot.clone(); 125 * temp.write(out); 126 * out.close(); 127 */ 128 resolvedEnglish = cldrFactory.make("en", true); 129 } 130 131 /** Check to make sure that the currency formats are kosher. */ TestCurrencyFormats()132 public void TestCurrencyFormats() { 133 // String decimal = 134 // "//ldml/numbers/decimalFormats/decimalFormatLength/decimalFormat[@type=\"standard\"]/"; 135 // String currency = 136 // "//ldml/numbers/currencyFormats/currencyFormatLength/currencyFormat[@type=\"standard\"]/"; 137 for (String locale : locales) { 138 boolean isPOSIX = locale.indexOf("POSIX") >= 0; 139 logln("Testing: " + locale); 140 CLDRFile item = cldrFactory.make(locale, false); 141 for (String xpath : item) { 142 NumericType type = NumericType.getNumericType(xpath); 143 if (type == NumericType.NOT_NUMERIC) continue; 144 String value = item.getStringValue(xpath); 145 // at this point, we only have currency formats 146 String pattern = DisplayAndInputProcessor.getCanonicalPattern(value, type, isPOSIX); 147 if (!pattern.equals(value)) { 148 String draft = ""; 149 if (item.getFullXPath(xpath).indexOf("[@draft=\"unconfirmed\"]") >= 0) 150 draft = " [draft]"; 151 assertEquals( 152 getLocaleAndName(locale) + draft + " " + type + " pattern incorrect", 153 pattern, 154 value); 155 } 156 } 157 } 158 } 159 160 /** Internal class */ 161 private static class ValueCount { 162 int count = 1; 163 String value; 164 String fullxpath; 165 } 166 167 /** 168 * Verify that if all the children of a language locale do not have the same value for the same 169 * key. 170 */ TestCommonChildren()171 public void TestCommonChildren() { 172 if (disableUntilLater("TestCommonChildren")) return; 173 174 Map<String, ValueCount> currentValues = new TreeMap<>(); 175 Set<String> okValues = new TreeSet<>(); 176 177 for (String parent : languageLocales) { 178 logln("Testing: " + parent); 179 currentValues.clear(); 180 okValues.clear(); 181 Set<String> availableWithParent = cldrFactory.getAvailableWithParent(parent, true); 182 for (String locale : availableWithParent) { 183 logln("\tTesting: " + locale); 184 CLDRFile item = cldrFactory.make(locale, false); 185 // Walk through all the xpaths, adding to currentValues 186 // Whenever two values for the same xpath are different, we remove from 187 // currentValues, and add to 188 // okValues 189 for (String xpath : item) { 190 if (okValues.contains(xpath)) continue; 191 if (xpath.startsWith("//ldml/identity/")) continue; // skip identity elements 192 String v = item.getStringValue(xpath); 193 ValueCount last = currentValues.get(xpath); 194 if (last == null) { 195 ValueCount vc = new ValueCount(); 196 vc.value = v; 197 vc.fullxpath = item.getFullXPath(xpath); 198 currentValues.put(xpath, vc); 199 } else if (v.equals(last.value)) { 200 last.count++; 201 } else { 202 okValues.add(xpath); 203 currentValues.remove(xpath); 204 } 205 } 206 // at the end, only the keys left in currentValues are (possibly) faulty 207 // they are actually bad IFF either 208 // (a) the count is equal to the total (thus all children are the same), or 209 // (b) their value is the same as the parent's resolved value (thus all children are 210 // the same or the 211 // same 212 // as the inherited parent value). 213 } 214 if (currentValues.size() == 0) continue; 215 int size = availableWithParent.size(); 216 CLDRFile parentCLDR = cldrFactory.make(parent, true); 217 for (String xpath : currentValues.keySet()) { 218 ValueCount vc = currentValues.get(xpath); 219 if (vc.count == size 220 || (vc.value.equals(parentCLDR.getStringValue(xpath)) 221 && vc.fullxpath.equals(parentCLDR.getStringValue(xpath)))) { 222 String draft = ""; 223 if (vc.fullxpath.indexOf("[@draft=\"unconfirmed\"]") >= 0) draft = " [draft]"; 224 String count = (vc.count == size ? "" : vc.count + "/") + size; 225 warnln( 226 getLocaleAndName(parent) 227 + draft 228 + "\tall children (" 229 + count 230 + ") have same value for:\t" 231 + xpath 232 + ";\t" 233 + vc.value); 234 } 235 } 236 } 237 } 238 239 static String[] EXEMPLAR_SKIPS = { 240 "/hourFormat", "/exemplarCharacters", "/pattern", "/localizedPatternChars" 241 }; 242 243 /** Check that the exemplars include all characters in the data. */ TestThatExemplarsContainAll()244 public void TestThatExemplarsContainAll() { 245 UnicodeSet allExemplars = new UnicodeSet(); 246 if (disableUntilLater("TestThatExemplarsContainAll")) return; 247 Set<String> counts = new TreeSet<>(); 248 int totalCount = 0; 249 UnicodeSet localeMissing = new UnicodeSet(); 250 for (String locale : locales) { 251 if (locale.equals(LocaleNames.ROOT)) continue; 252 CLDRFile resolved = cldrFactory.make(locale, false); // FIX LATER 253 UnicodeSet exemplars = getFixedExemplarSet(locale, resolved); 254 CLDRFile plain = cldrFactory.make(locale, false); 255 int count = 0; 256 localeMissing.clear(); 257 file: 258 for (String xpath : plain) { 259 for (int i = 0; i < EXEMPLAR_SKIPS.length; ++i) { 260 if (xpath.indexOf(EXEMPLAR_SKIPS[i]) > 0) continue file; // skip some items. 261 } 262 if (SKIP_DRAFT) { 263 String fullxpath = plain.getFullXPath(xpath); 264 if (fullxpath.indexOf("[@draft=\"unconfirmed\"") > 0) continue; 265 } 266 if (xpath.startsWith("//ldml/posix/messages")) continue; 267 String value = plain.getStringValue(xpath); 268 allExemplars.addAll(value); 269 if (!exemplars.containsAll(value)) { 270 count++; 271 UnicodeSet missing = new UnicodeSet().addAll(value).removeAll(exemplars); 272 localeMissing.addAll(missing); 273 logln( 274 getLocaleAndName(locale) 275 + "\t" 276 + xpath 277 + "\t<" 278 + value 279 + "> contains " 280 + missing 281 + ", not in exemplars"); 282 surveyInfo.add( 283 locale 284 + "\t" 285 + xpath 286 + "\t'" 287 + value 288 + "' contains characters " 289 + missing.toPattern(false) 290 + ", which are not in exemplars"); 291 } 292 } 293 NumberFormat nf = new DecimalFormat("000"); 294 if (count != 0) { 295 totalCount += count; 296 counts.add( 297 nf.format(count) + "\t" + getLocaleAndName(locale) + "\t" + localeMissing); 298 } 299 if (localeMissing.size() != 0) { 300 errln(getLocaleAndName(locale) + "\t uses " + localeMissing + ", not in exemplars"); 301 } 302 } 303 for (String c : counts) { 304 logln(c); 305 } 306 logln("Total Count: " + totalCount); 307 System.out.println("All exemplars: " + allExemplars.toPattern(true)); 308 } 309 310 // Get Date-Time in milliseconds getDateTimeinMillis(int year, int month, int date)311 private static long getDateTimeinMillis(int year, int month, int date) { 312 Calendar cal = Calendar.getInstance(); 313 cal.set(year, month, date); 314 return cal.getTimeInMillis(); 315 } 316 317 static final long disableDate = getDateTimeinMillis(2005, 6 - 1, 3); 318 319 /** */ disableUntilLater(String string)320 private boolean disableUntilLater(String string) { 321 if (new Date().getTime() >= disableDate) return false; 322 warnln("Disabling " + string + " until " + new Date(disableDate)); 323 return true; 324 } 325 326 /** Internal */ getFixedExemplarSet(String locale, CLDRFile cldrfile)327 private UnicodeSet getFixedExemplarSet(String locale, CLDRFile cldrfile) { 328 UnicodeSet exemplars = getExemplarSet(cldrfile, ""); 329 if (exemplars.size() == 0) { 330 errln(getLocaleAndName(locale) + " has empty exemplar set"); 331 } 332 exemplars.addAll(getExemplarSet(cldrfile, "standard")); 333 UnicodeSet auxiliary = getExemplarSet(cldrfile, "auxiliary"); 334 if (exemplars.containsSome(auxiliary)) { 335 errln( 336 getLocaleAndName(locale) 337 + "Auxiliary & main exemplars should be disjoint, but overlap with " 338 + new UnicodeSet(exemplars).retainAll(auxiliary) 339 + ": change auxiliary to " 340 + auxiliary.removeAll(exemplars)); 341 } 342 exemplars.addAll(auxiliary); 343 exemplars.addAll(commonAndInherited); 344 return exemplars; 345 } 346 347 /** 348 * @return Gets an exemplar set. Also verifies that the set contains no properties. 349 */ getExemplarSet(CLDRFile cldrfile, String type)350 public UnicodeSet getExemplarSet(CLDRFile cldrfile, String type) { 351 if (type.length() != 0) type = "[@type=\"" + type + "\"]"; 352 String v = cldrfile.getStringValue("//ldml/characters/exemplarCharacters" + type); 353 if (v == null) return new UnicodeSet(); 354 String pattern = v; 355 if (pattern.indexOf("[:") >= 0 || pattern.indexOf("\\p{") > 0) { 356 errln( 357 getLocaleName(cldrfile.getLocaleID()) 358 + " exemplar pattern contains property: " 359 + pattern); 360 } 361 try { 362 UnicodeSet result = new UnicodeSet(v, UnicodeSet.CASE); 363 result.remove(0x20); 364 return result; 365 } catch (RuntimeException e) { 366 e.printStackTrace(); 367 errln( 368 getLocaleAndName(cldrfile.getLocaleID()) 369 + " has illegal exemplar set: <" 370 + v 371 + ">"); 372 return new UnicodeSet(); 373 } 374 // if (type.length() != 0) System.out.println("fetched set for " + type); 375 } 376 getLocaleAndName(String locale)377 public String getLocaleAndName(String locale) { 378 return locale + " (" + getLocaleName(locale) + ")"; 379 } 380 381 /** 382 * @return the ID plus its localization (for language, script, and territory IDs only) 383 */ getIDAndLocalization(String id)384 public String getIDAndLocalization(String id) { 385 return id + " " + getLocalization(id); 386 } 387 388 /** 389 * @return the localization (for language, script, and territory IDs only) 390 */ getLocalization(String id)391 public String getLocalization(String id) { 392 if (english == null) english = cldrFactory.make("en", true); 393 if (id.length() == 0) return "?"; 394 // pick on basis of case 395 char ch = id.charAt(0); 396 if ('a' <= ch && ch <= 'z') return getName(english, "languages/language", id); 397 if (id.length() == 4 && 'A' <= ch && ch <= 'Z') 398 return getName(english, "scripts/script", id); 399 return getName(english, "territories/territory", id); 400 } 401 402 /** Internal */ getIDAndLocalization(Set<String> missing)403 private String getIDAndLocalization(Set<String> missing) { 404 StringBuffer buffer = new StringBuffer(); 405 for (String next : missing) { 406 if (buffer.length() != 0) buffer.append("; "); 407 buffer.append(getIDAndLocalization(next)); 408 } 409 return buffer.toString(); 410 } 411 getLocaleName(String locale)412 public String getLocaleName(String locale) { 413 String name = localeNameCache.get(locale); 414 if (name != null) return name; 415 if (english == null) english = cldrFactory.make("en", true); 416 String result = english.getName(locale); 417 /* 418 * Collection c = Utility.splitList(locale, '_', false, null); 419 * String[] pieces = new String[c.size()]; 420 * c.toArray(pieces); 421 * int i = 0; 422 * String result = getName(english, "languages/language", pieces[i++]); 423 * if (pieces[i].length() == 0) return result; 424 * if (pieces[i].length() == 4) { 425 * result += " " + getName(english, "scripts/script", pieces[i++]); 426 * } 427 * if (pieces[i].length() == 0) return result; 428 * result += " " + getName(english, "territories/territory", pieces[i++]); 429 * if (pieces[i].length() == 0) return result; 430 * result += " " + getName(english, "variant/variants", pieces[i++]); 431 */ 432 localeNameCache.put(locale, result); 433 return result; 434 } 435 436 /** Internal */ getName(CLDRFile english, String kind, String type)437 private String getName(CLDRFile english, String kind, String type) { 438 String v = 439 english.getStringValue( 440 "//ldml/localeDisplayNames/" + kind + "[@type=\"" + type + "\"]"); 441 if (v == null) return "<" + type + ">"; 442 return v; 443 } 444 445 /** 446 * Make sure we are only using attribute values that are in RFC3066bis, the Olson database (with 447 * aliases removed) or ISO 4217 448 * 449 * @throws IOException 450 */ TestForIllegalAttributeValues()451 public void TestForIllegalAttributeValues() { 452 // check for illegal attribute values that are not in the DTD 453 Map<String, Set<String>> result = new TreeMap<>(); 454 Map<String, Set<String>> totalResult = new TreeMap<>(); 455 for (String locale : locales) { 456 logln("Testing: " + locale); 457 CLDRFile item = cldrFactory.make(locale, false); 458 result.clear(); 459 Set<String> xpathFailures = null; // don't collect 460 // XPathParts parts; 461 // String xpath; 462 // CLDRFile.StringValue value; 463 // String element; 464 // Map attributes; 465 checkAttributeValidity(item, result, xpathFailures); 466 467 // now show 468 // String localeName = getLocaleAndName(locale); 469 for (Iterator<String> it3 = result.keySet().iterator(); it3.hasNext(); ) { 470 String code = it3.next(); 471 Set<String> avalues = result.get(code); 472 errln( 473 getLocaleAndName(locale) 474 + "\tillegal attribute value for " 475 + code 476 + ", value:\t" 477 + show(avalues)); 478 Set<String> totalvalues = totalResult.get(code); 479 if (totalvalues == null) totalResult.put(code, totalvalues = new TreeSet<>()); 480 totalvalues.addAll(avalues); 481 } 482 } 483 for (Iterator<String> it3 = totalResult.keySet().iterator(); it3.hasNext(); ) { 484 String code = it3.next(); 485 Set<String> avalues = totalResult.get(code); 486 errln("All illegal attribute values for " + code + ", value:\t" + show(avalues)); 487 } 488 } 489 490 /** 491 * Tests whether the display names have any collisions, e.g. if in the fully resolved locale $ 492 * is used for both USD and UAD. 493 */ TestDisplayNameCollisions()494 public void TestDisplayNameCollisions() { 495 if (disableUntilLater("TestDisplayNameCollisions")) return; 496 497 Map<String, String>[] maps = new HashMap[CLDRFile.LIMIT_TYPES]; 498 for (int i = 0; i < maps.length; ++i) { 499 maps[i] = new HashMap<>(); 500 } 501 Set<String> collisions = new TreeSet<>(); 502 for (Iterator<String> it = locales.iterator(); it.hasNext(); ) { 503 String locale = it.next(); 504 CLDRFile item = cldrFactory.make(locale, true); 505 for (int i = 0; i < maps.length; ++i) { 506 maps[i].clear(); 507 } 508 collisions.clear(); 509 510 for (Iterator<String> it2 = item.iterator(); it2.hasNext(); ) { 511 String xpath = it2.next(); 512 int nameType = CLDRFile.getNameType(xpath); 513 if (nameType < 0) continue; 514 String value = item.getStringValue(xpath); 515 String xpath2 = maps[nameType].get(value); 516 if (xpath2 == null) { 517 maps[nameType].put(value, xpath); 518 continue; 519 } 520 collisions.add( 521 CLDRFile.getNameTypeName(nameType) 522 + "\t" 523 + value 524 + "\t" 525 + xpath 526 + "\t" 527 + xpath2); 528 surveyInfo.add( 529 locale 530 + "\t" 531 + xpath 532 + "\t'" 533 + value 534 + "' is a duplicate of what is in " 535 + xpath2); 536 } 537 String name = getLocaleAndName(locale) + "\t"; 538 for (Iterator<String> it2 = collisions.iterator(); it2.hasNext(); ) { 539 errln(name + it2.next()); 540 } 541 } 542 } 543 544 /** 545 * Checks the validity of attributes, based on StandardCodes. The invalid codes are added to 546 * badCodes, and the failing xpaths are added to xpathFailures. 547 * 548 * @param item 549 * @param badCodes 550 * @param xpathFailures 551 */ checkAttributeValidity( CLDRFile item, Map<String, Set<String>> badCodes, Set<String> xpathFailures)552 public static void checkAttributeValidity( 553 CLDRFile item, Map<String, Set<String>> badCodes, Set<String> xpathFailures) { 554 for (Iterator<String> it2 = item.iterator(); it2.hasNext(); ) { 555 String xpath = it2.next(); 556 XPathParts parts = XPathParts.getFrozenInstance(item.getFullXPath(xpath)); 557 for (int i = 0; i < parts.size(); ++i) { 558 if (parts.getAttributeCount(i) == 0) { 559 continue; 560 } 561 String element = parts.getElement(i); 562 Map<String, String> attributes = parts.getAttributes(i); 563 for (Iterator<String> it3 = attributes.keySet().iterator(); it3.hasNext(); ) { 564 String attribute = it3.next(); 565 String avalue = attributes.get(attribute); 566 checkValidity(xpath, element, attribute, avalue, badCodes, xpathFailures); 567 } 568 } 569 } 570 } 571 572 /** Internal */ show(Collection<String> avalues)573 private String show(Collection<String> avalues) { 574 StringBuffer result = new StringBuffer("{"); 575 boolean first = true; 576 for (Iterator<String> it3 = avalues.iterator(); it3.hasNext(); ) { 577 if (first) first = false; 578 else result.append(", "); 579 result.append(it3.next().toString()); 580 } 581 result.append("}"); 582 return result.toString(); 583 } 584 585 /** Internal function */ checkValidity( String xpath, String element, String attribute, String avalue, Map<String, Set<String>> results, Set<String> xpathsFailing)586 private static void checkValidity( 587 String xpath, 588 String element, 589 String attribute, 590 String avalue, 591 Map<String, Set<String>> results, 592 Set<String> xpathsFailing) { 593 StandardCodes codes = StandardCodes.make(); 594 if (attribute.equals("type")) { 595 boolean checkReplacements = xpath.indexOf("/identity") < 0; 596 if (element.equals("currency")) 597 checkCodes( 598 xpath, 599 "currency", 600 avalue, 601 codes, 602 results, 603 xpathsFailing, 604 checkReplacements); 605 else if (element.equals("script")) 606 checkCodes( 607 xpath, "script", avalue, codes, results, xpathsFailing, checkReplacements); 608 else if (element.equals("territory")) 609 checkCodes( 610 xpath, 611 "territory", 612 avalue, 613 codes, 614 results, 615 xpathsFailing, 616 checkReplacements); 617 else if (element.equals("language")) 618 checkCodes( 619 xpath, 620 "language", 621 avalue, 622 codes, 623 results, 624 xpathsFailing, 625 checkReplacements); 626 else if (element.equals("zone")) 627 checkCodes(xpath, "tzid", avalue, codes, results, xpathsFailing, checkReplacements); 628 } 629 } 630 631 /** 632 * Internal function 633 * 634 * @param checkReplacements TODO 635 */ 636 private static void checkCodes( 637 String xpath, 638 String code, 639 String avalue, 640 StandardCodes codes, 641 Map<String, Set<String>> results, 642 Set<String> xpathFailures, 643 boolean checkReplacements) { 644 // ok if code is found AND it has no replacement 645 if (codes.getData(code, avalue) != null 646 && (!checkReplacements || codes.getReplacement(code, avalue) == null)) return; 647 648 if (xpathFailures != null) xpathFailures.add(xpath); 649 if (results == null) return; 650 Set<String> s = results.get(code); 651 if (s == null) { 652 s = new TreeSet<>(); 653 results.put(code, s); 654 } 655 s.add(avalue); 656 } 657 658 /** 659 * Verify that a small set of locales (currently just English) has everything translated. 660 * 661 * @throws IOException 662 */ 663 public void TestCompleteLocales() { 664 // just test English for now 665 if (english == null) english = cldrFactory.make("en", true); 666 checkTranslatedCodes(english); 667 } 668 669 /** 670 * Tests that the file contains codes for all main display name ids: language, script, 671 * territory, tzid, currency. 672 */ 673 private void checkTranslatedCodes(CLDRFile cldrfile) { 674 StandardCodes codes = StandardCodes.make(); 675 checkTranslatedCode( 676 cldrfile, codes, "currency", "//ldml/numbers/currencies/currency", "/displayName"); 677 // can't check timezones for English. 678 // checkTranslatedCode(cldrfile, codes, "tzid", "//ldml/dates/timeZoneNames/zone", ""); 679 checkTranslatedCode( 680 cldrfile, codes, "language", "//ldml/localeDisplayNames/languages/language", ""); 681 checkTranslatedCode( 682 cldrfile, codes, "script", "//ldml/localeDisplayNames/scripts/script", ""); 683 checkTranslatedCode( 684 cldrfile, 685 codes, 686 "territory", 687 "//ldml/localeDisplayNames/territories/territory", 688 ""); 689 checkTranslatedCode( 690 cldrfile, codes, "variant", "//ldml/localeDisplayNames/variants/variant", ""); 691 } 692 693 private void checkTranslatedCode( 694 CLDRFile cldrfile, StandardCodes codes, String type, String prefix, String postfix) { 695 Map<String, Set<String>> completionExceptions = getCompletionExceptions(); 696 Set<String> codeItems = codes.getGoodAvailableCodes(type); 697 int count = 0; 698 Set<String> exceptions = completionExceptions.get(type); 699 for (String code : codeItems) { 700 String rfcname = codes.getData(type, code); 701 // if (rfcname.equals("ZZ")) continue; 702 ++count; 703 if (rfcname.equals("PRIVATE USE")) continue; 704 String fullFragment = prefix + "[@type=\"" + code + "\"]" + postfix; 705 String v = cldrfile.getStringValue(fullFragment); 706 if (v == null) { 707 errln( 708 "Missing translation for:\t<" 709 + type 710 + " type=\"" 711 + code 712 + "\">" 713 + rfcname 714 + "</" 715 + type 716 + ">"); 717 continue; 718 } 719 String translation = v; 720 if (translation.equals(code)) { 721 if (exceptions != null && exceptions.contains(code)) continue; 722 errln( 723 "Translation = code for:\t<" 724 + type 725 + " type=\"" 726 + code 727 + "\">" 728 + rfcname 729 + "</" 730 + type 731 + ">"); 732 continue; 733 } 734 } 735 logln("Total " + type + ":\t" + count); 736 } 737 738 private Map<String, Set<String>> theCompletionExceptions = null; 739 740 private Map<String, Set<String>> getCompletionExceptions() { 741 if (theCompletionExceptions == null) { 742 theCompletionExceptions = new HashMap<>(); 743 final Set<String> scriptExceptions = new HashSet<>(); 744 scriptExceptions.add("Cham"); 745 scriptExceptions.add("Modi"); 746 scriptExceptions.add("Thai"); 747 scriptExceptions.add("Toto"); 748 theCompletionExceptions.put("script", scriptExceptions); 749 } 750 return theCompletionExceptions; 751 } 752 753 // <territoryContainment><group type="001" contains="002 009 019 142 150"/> 754 // <languageData><language type="af" scripts="Latn" territories="ZA"/> 755 void getSupplementalData( 756 Map<String, Set<String>> language_scripts, 757 Map<String, Set<String>> language_territories, 758 Map<String, Set<String>> group_territory, 759 Map<String, Set<String>> territory_currencies, 760 Map<String, Map<String, String>> aliases) { 761 762 boolean SHOW = false; 763 Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 764 CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false); 765 for (Iterator<String> it = supp.iterator(); it.hasNext(); ) { 766 String path = it.next(); 767 try { 768 XPathParts parts = XPathParts.getFrozenInstance(supp.getFullXPath(path)); 769 Map<String, String> m; 770 String type = ""; 771 if (aliases != null && parts.findElement("alias") >= 0) { 772 m = parts.findAttributes(type = "languageAlias"); 773 if (m == null) m = parts.findAttributes(type = "territoryAlias"); 774 if (m != null) { 775 Map top = aliases.get(type); 776 if (top == null) { 777 aliases.put(type, top = new TreeMap()); 778 } 779 top.put(m.get("type"), m.get("replacement")); 780 } 781 } 782 if (territory_currencies != null) { 783 m = parts.findAttributes("region"); 784 if (m != null) { 785 String region = m.get("iso3166"); 786 Set s = territory_currencies.get(region); 787 if (s == null) { 788 territory_currencies.put(region, s = new LinkedHashSet()); 789 } 790 m = parts.findAttributes("currency"); 791 if (m == null) { 792 warnln("missing currency for region: " + path); 793 continue; 794 } 795 String currency = m.get("iso4217"); 796 s.add(currency); 797 m = parts.findAttributes("alternate"); 798 String alternate = m == null ? null : (String) m.get("iso4217"); 799 if (alternate != null) { 800 s.add(alternate); 801 } 802 continue; 803 } 804 } 805 m = parts.findAttributes("group"); 806 if (m != null) { 807 if (group_territory == null) continue; 808 type = m.get("type"); 809 String contains = m.get("contains"); 810 group_territory.put( 811 type, new TreeSet(CldrUtility.splitList(contains, ' ', true))); 812 continue; 813 } 814 m = parts.findAttributes("language"); 815 if (m == null) continue; 816 String language = m.get("type"); 817 String scripts = m.get("scripts"); 818 if (scripts == null) language_scripts.put(language, new TreeSet<String>()); 819 else { 820 language_scripts.put( 821 language, new TreeSet<>(CldrUtility.splitList(scripts, ' ', true))); 822 if (SHOW) 823 System.out.println( 824 getIDAndLocalization(language) 825 + "\t\t" 826 + getIDAndLocalization(language_scripts.get(language))); 827 } 828 String territories = m.get("territories"); 829 if (territories == null) language_territories.put(language, new TreeSet<String>()); 830 else { 831 language_territories.put( 832 language, new TreeSet<>(CldrUtility.splitList(territories, ' ', true))); 833 if (SHOW) 834 System.out.println( 835 getIDAndLocalization(language) 836 + "\t\t" 837 + getIDAndLocalization(language_territories.get(language))); 838 } 839 } catch (RuntimeException e) { 840 throw (IllegalArgumentException) 841 new IllegalArgumentException("Failure with: " + path).initCause(e); 842 } 843 } 844 } 845 846 /** Verify that the minimal localizations are present. */ TestMinimalLocalization()847 public void TestMinimalLocalization() throws IOException { 848 if (disableUntilLater("TestMinimalLocalization")) return; 849 850 boolean testDraft = false; 851 Map<String, Set<String>> language_scripts = new HashMap<>(); 852 Map<String, Set<String>> language_territories = new HashMap<>(); 853 getSupplementalData(language_scripts, language_territories, null, null, null); 854 LanguageTagParser localIDParser = new LanguageTagParser(); 855 // see 856 // http://oss.software.ibm.com/cvs/icu/~checkout~/locale/docs/design/minimal_requirements.htm 857 int[] failureCount = new int[1]; 858 int[] warningCount = new int[1]; 859 for (Iterator<String> it = languageLocales.iterator(); it.hasNext(); ) { 860 String locale = it.next(); 861 if (locale.equals(LocaleNames.ROOT)) continue; 862 // if (!locale.equals("zh_Hant")) continue; 863 864 CLDRFile item = cldrFactory.make(locale, true); 865 if (!testDraft && item.isDraft()) { 866 logln(getLocaleAndName(locale) + "\tskipping draft"); 867 continue; 868 } 869 UnicodeSet exemplars = getFixedExemplarSet(locale, item); 870 CLDRFile missing = SimpleFactory.makeFile(locale); 871 failureCount[0] = 0; 872 warningCount[0] = 0; 873 localIDParser.set(locale); 874 String language = localIDParser.getLanguage(); 875 logln("Testing: " + locale); 876 // languages 877 Set<String> languages = new TreeSet<>(CldrUtility.MINIMUM_LANGUAGES); 878 languages.add(language); 879 // LANGUAGE_NAME = 0, SCRIPT_NAME = 1, TERRITORY_NAME = 2, VARIANT_NAME = 3, 880 // CURRENCY_NAME = 4, CURRENCY_SYMBOL = 5, TZID = 6 881 882 checkForItems(item, languages, CLDRFile.LANGUAGE_NAME, missing, failureCount, null); 883 884 /* 885 * checkTranslatedCode(cldrfile, codes, "currency", "//ldml/numbers/currencies/currency"); 886 * checkTranslatedCode(cldrfile, codes, "tzid", "//ldml/dates/timeZoneNames/zone"); 887 * checkTranslatedCode(cldrfile, codes, "variant", "//ldml/localeDisplayNames/variants/variant"); 888 */ 889 890 Set<String> scripts = new TreeSet<>(); 891 scripts.add("Latn"); 892 Set<String> others = language_scripts.get(language); 893 if (others != null) scripts.addAll(others); 894 checkForItems(item, scripts, CLDRFile.SCRIPT_NAME, missing, failureCount, null); 895 896 Set<String> countries = new TreeSet<>(CldrUtility.MINIMUM_TERRITORIES); 897 others = language_territories.get(language); 898 if (others != null) countries.addAll(others); 899 checkForItems(item, countries, CLDRFile.TERRITORY_NAME, missing, failureCount, null); 900 901 Set<String> currencies = new TreeSet<>(); 902 StandardCodes sc = StandardCodes.make(); 903 for (Iterator<String> it2 = countries.iterator(); it2.hasNext(); ) { 904 String country = it2.next(); 905 Set<String> countryCurrencies = sc.getMainCurrencies(country); 906 if (countryCurrencies == null) { 907 errln("Internal Error: no currencies for " + country + ", locale: " + locale); 908 } else { 909 currencies.addAll(countryCurrencies); 910 } 911 } 912 checkForItems(item, currencies, CLDRFile.CURRENCY_NAME, missing, failureCount, null); 913 checkForItems( 914 item, currencies, CLDRFile.CURRENCY_SYMBOL, missing, failureCount, exemplars); 915 916 // context=format and width=wide; context=stand-alone & width=abbreviated 917 Set<String> months = new TreeSet<>(); 918 for (int i = 1; i <= 12; ++i) months.add(i + ""); 919 Set<String> days = 920 new TreeSet<>( 921 Arrays.asList( 922 new String[] { 923 "sun", "mon", "tue", "wed", "thu", "fri", "sat" 924 })); 925 for (int i = -7; i < 0; ++i) { 926 checkForItems(item, (i < -4 ? months : days), i, missing, failureCount, null); 927 } 928 929 String filename = "missing_" + locale + ".xml"; 930 if (failureCount[0] > 0 || warningCount[0] > 0) { 931 PrintWriter out = 932 FileUtilities.openUTF8Writer( 933 CLDRPaths.GEN_DIRECTORY + "missing/", filename); 934 missing.write(out); 935 out.close(); 936 // String s = getIDAndLocalization(missing); 937 String message = 938 "missing localizations, creating file" 939 + getNormalizedPath(CLDRPaths.GEN_DIRECTORY, "missing", filename); 940 if (failureCount[0] > 0) warnln(getLocaleAndName(locale) + "\t" + message); 941 else logln(getLocaleAndName(locale) + "\tpossibly " + message); 942 } else { 943 new File(CLDRPaths.GEN_DIRECTORY + "missing/", filename).delete(); 944 } 945 } 946 } 947 948 /** Internal */ getDateKey(String monthOrDay, String width, String code)949 private String getDateKey(String monthOrDay, String width, String code) { 950 // String context = width.equals("narrow") ? "format" : "stand-alone"; 951 return "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/" 952 + monthOrDay 953 + "s/" 954 + monthOrDay 955 + "Context[@type=\"format\"]/" 956 + monthOrDay 957 + "Width[@type=\"" 958 + width 959 + "\"]/" 960 + monthOrDay 961 + "[@type=\"" 962 + code 963 + "\"]"; 964 } 965 966 /** Internal */ getDateKey(int type, String code)967 private String getDateKey(int type, String code) { 968 // type is 6..4 for months abbrev..narrow, 3..0 for days short..narrow 969 int monthOrDayType = 0, widthType = type; 970 if (type >= 4) { 971 monthOrDayType = 1; 972 widthType -= 4; 973 } 974 return getDateKey(MONTHORDAYS[monthOrDayType], WIDTHS[widthType], code); 975 } 976 977 /** 978 * @param item 979 * @param codes 980 * @param missing 981 * @param exemplarTest TODO TODO 982 */ checkForItems( CLDRFile item, Set<String> codes, int type, CLDRFile missing, int failureCount[], UnicodeSet exemplarTest)983 private void checkForItems( 984 CLDRFile item, 985 Set<String> codes, 986 int type, 987 CLDRFile missing, 988 int failureCount[], 989 UnicodeSet exemplarTest) { 990 // check codes 991 for (Iterator<String> it2 = codes.iterator(); it2.hasNext(); ) { 992 String code = it2.next(); 993 String key; 994 if (type >= 0) { 995 key = CLDRFile.getKey(type, code); 996 } else { 997 key = getDateKey(-type - 1, code); 998 } 999 String v = item.getStringValue(key); 1000 String rootValue = resolvedRoot.getStringValue(key); 1001 if (v == null 1002 || v.equals(rootValue) 1003 && (exemplarTest == null || !exemplarTest.containsAll(rootValue))) { 1004 String englishValue = resolvedEnglish.getStringValue(key); 1005 String transValue; 1006 if (englishValue != null) { 1007 transValue = englishValue; 1008 } else { 1009 transValue = code; 1010 } 1011 missing.add(key, "TODO " + transValue); 1012 failureCount[0]++; 1013 } else { 1014 logln("\t" + code + "\t" + v); 1015 } 1016 } 1017 } 1018 1019 /* 1020 * void showTestStr() { 1021 * LocaleIDParser lparser = new LocaleIDParser(); 1022 * Collection s = split(teststr,',', true, new ArrayList()); 1023 * for (Iterator it = s.iterator(); it.hasNext();) { 1024 * String item = (String)it.next(); 1025 * lparser.set(item.replace('?', '_')); 1026 * String region = lparser.getRegion(); 1027 * System.out.print(item.replace('?', '-') + " (" + getLocalization(region) + "), "); 1028 * //System.out.print(getLocalization(region) + ", "); 1029 * } 1030 * } 1031 * static String teststr = 1032 * "en?AG, en?AI, en?AS, en?AU, en?IN, en?BB, en?BE, en?BM, en?BN, en?BS, en?BW, en?BZ, en?CA, en?CK, en?CM, en?DM, en?ER, en?ET, en?FJ, en?FK, en?FM, en?GB, en?GD, en?GH, en?GI, en?GM, en?GU, en?GY, en?HK, en?IE, en?IL, en?IO, en?JM, en?KE, en?KI, en?KN, en?KY, en?LC, en?LR, en?LS, en?MH, en?MP, en?MS, en?MT, en?MU, en?MW, en?NA, en?NF, en?NG, en?NR, en?NU, en?NZ, en?PG, en?PH, en?PK, en?PN, en?PR, en?PW, en?RW, en?SB, en?SC, en?SG, en?SH, en?SL, en?SO, en?SZ, en?TC, en?TK, en?TO, en?TT, en?UG, en?UM, en?US, en?VC, en?VG, en?VI, en?VU, en?WS, en?ZA, en?ZM, en?ZW" 1033 * ; 1034 */ 1035 1036 CldrUtility.CollectionTransform EnglishName = 1037 new CldrUtility.CollectionTransform() { 1038 @Override 1039 public Object transform(Object source) { 1040 // TODO Auto-generated method stub 1041 return getLocalization(source.toString()) + " (" + source + ")"; 1042 } 1043 }; 1044 1045 CldrUtility.CollectionTransform EnglishCurrencyName = 1046 new CldrUtility.CollectionTransform() { 1047 @Override 1048 public Object transform(Object source) { 1049 if (english == null) english = cldrFactory.make("en", true); 1050 return english.getName("currency", source.toString()) + " (" + source + ")"; 1051 } 1052 }; 1053 1054 /** Tests that the supplemental data is well-formed. */ TestSupplementalData()1055 public void TestSupplementalData() { 1056 Map<String, Set<String>> language_scripts = new TreeMap<>(); 1057 Map<String, Set<String>> language_territories = new TreeMap<>(); 1058 Map<String, Set<String>> groups = new TreeMap<>(); 1059 Map<String, Set<String>> territory_currencies = new TreeMap<>(); 1060 Map<String, Map<String, String>> aliases = new TreeMap<>(); 1061 getSupplementalData( 1062 language_scripts, language_territories, groups, territory_currencies, aliases); 1063 Set<String> sTerritories = new TreeSet<>(); 1064 for (Iterator<Set<String>> it = language_territories.values().iterator(); it.hasNext(); ) { 1065 sTerritories.addAll(it.next()); 1066 } 1067 StandardCodes sc = StandardCodes.make(); 1068 Set<String> fullTerritories = sc.getAvailableCodes("territory"); 1069 Set<String> fullLanguages = sc.getAvailableCodes("language"); 1070 1071 Set<String> allLanguages = new TreeSet<>(language_scripts.keySet()); 1072 allLanguages.addAll(language_territories.keySet()); 1073 for (Iterator<String> it = allLanguages.iterator(); it.hasNext(); ) { 1074 Object language = it.next(); 1075 Set<String> scripts = language_scripts.get(language); 1076 Set<String> territories = language_territories.get(language); 1077 logln( 1078 EnglishName.transform(language) 1079 + " scripts: " 1080 + EnglishName.transform(scripts) 1081 + " territories: " 1082 + EnglishName.transform(territories)); 1083 } 1084 1085 Map<String, String> changedLanguage = new TreeMap<>(); 1086 for (Iterator<String> it = fullLanguages.iterator(); it.hasNext(); ) { 1087 String code = it.next(); 1088 List<String> data = sc.getFullData("language", code); 1089 if (data.size() < 3) { 1090 System.out.println("data problem: " + data); 1091 continue; 1092 } 1093 String replacement = data.get(2); 1094 if (!replacement.equals("")) { 1095 if (!replacement.equals("--")) changedLanguage.put(code, replacement); 1096 continue; 1097 } 1098 } 1099 1100 // remove private use, deprecated, groups 1101 Set<String> standardTerritories = new TreeSet<>(); 1102 Map<String, String> changedTerritory = new TreeMap<>(); 1103 for (Iterator<String> it = fullTerritories.iterator(); it.hasNext(); ) { 1104 String code = it.next(); 1105 if (code.equals("200")) continue; // || code.equals("YU") || code.equals("PZ") 1106 List<String> data = sc.getFullData("territory", code); 1107 if (data.get(0).equals("PRIVATE USE")) continue; 1108 if (!data.get(2).equals("")) { 1109 if (!data.get(2).equals("--")) changedTerritory.put(code, data.get(2)); 1110 continue; 1111 } 1112 standardTerritories.add(code); 1113 } 1114 standardTerritories.removeAll(groups.keySet()); 1115 1116 if (!standardTerritories.containsAll(sTerritories)) { 1117 TreeSet<String> extras = new TreeSet<>(sTerritories); 1118 extras.removeAll(standardTerritories); 1119 errln( 1120 "Supplemental Language Territories contain illegal values: " 1121 + EnglishName.transform(extras)); 1122 } 1123 if (!sTerritories.containsAll(standardTerritories)) { 1124 TreeSet<String> extras = new TreeSet<>(standardTerritories); 1125 extras.removeAll(sTerritories); 1126 warnln("Missing Language Territories: " + EnglishName.transform(extras)); 1127 } 1128 1129 // now test currencies 1130 logln("Check that no illegal territories are used"); 1131 if (!standardTerritories.containsAll(territory_currencies.keySet())) { 1132 TreeSet<String> extras = new TreeSet<>(territory_currencies.keySet()); 1133 extras.removeAll(fullTerritories); 1134 if (extras.size() != 0) 1135 errln("Currency info -- Illegal Territories: " + EnglishName.transform(extras)); 1136 extras = new TreeSet<>(territory_currencies.keySet()); 1137 extras.retainAll(fullTerritories); 1138 extras.removeAll(standardTerritories); 1139 if (extras.size() != 0) 1140 warnln("Currency info -- Archaic Territories: " + EnglishName.transform(extras)); 1141 } 1142 logln("Check that no territories are missing"); 1143 if (!territory_currencies.keySet().containsAll(standardTerritories)) { 1144 TreeSet<String> extras = new TreeSet<>(standardTerritories); 1145 extras.removeAll(territory_currencies.keySet()); 1146 errln("Currency info -- Missing Territories: " + EnglishName.transform(extras)); 1147 } 1148 Set<String> currencies = new TreeSet<>(); 1149 for (Iterator<Set<String>> it = territory_currencies.values().iterator(); it.hasNext(); ) { 1150 currencies.addAll(it.next()); 1151 } 1152 logln("Check that no illegal currencies are used"); 1153 Set<String> legalCurrencies = new TreeSet<>(sc.getAvailableCodes("currency")); 1154 // first remove non-ISO 1155 for (Iterator<String> it = legalCurrencies.iterator(); it.hasNext(); ) { 1156 String code = it.next(); 1157 List<String> data = sc.getFullData("currency", code); 1158 if ("X".equals(data.get(3))) it.remove(); 1159 } 1160 if (!legalCurrencies.containsAll(currencies)) { 1161 TreeSet<String> extras = new TreeSet<>(currencies); 1162 extras.removeAll(legalCurrencies); 1163 errln("Currency info -- Illegal Currencies: " + EnglishCurrencyName.transform(extras)); 1164 } 1165 logln("Check that there are no missing currencies"); 1166 if (!currencies.containsAll(legalCurrencies)) { 1167 TreeSet<String> extras = new TreeSet<>(legalCurrencies); 1168 extras.removeAll(currencies); 1169 Map<String, Set<String>> failures = new TreeMap<>(); 1170 for (Iterator<String> it = extras.iterator(); it.hasNext(); ) { 1171 String code = it.next(); 1172 List<String> data = sc.getFullData("currency", code); 1173 if (data.get(1).equals("ZZ")) continue; 1174 String type = data.get(3) + "/" + data.get(1); 1175 Set<String> s = failures.get(type); 1176 if (s == null) failures.put(type, s = new TreeSet<>()); 1177 s.add(code); 1178 } 1179 for (Iterator<String> it = failures.keySet().iterator(); it.hasNext(); ) { 1180 String type = it.next(); 1181 Set<String> s = failures.get(type); 1182 warnln( 1183 "Currency info -- Missing Currencies: " 1184 + type 1185 + "\t \u2192 " 1186 + EnglishCurrencyName.transform(s)); 1187 } 1188 } 1189 logln("Missing English currency names"); 1190 for (Iterator<String> it = legalCurrencies.iterator(); it.hasNext(); ) { 1191 String currency = it.next(); 1192 String name = english.getName("currency", currency); 1193 if (name == null) { 1194 String standardName = sc.getFullData("currency", currency).get(0); 1195 logln("\t\t\t<currency type=\"" + currency + "\">"); 1196 logln("\t\t\t\t<displayName>" + standardName + "</displayName>"); 1197 logln("\t\t\t</currency>"); 1198 } 1199 } 1200 logln("Check Aliases"); 1201 for (Iterator<String> it = aliases.keySet().iterator(); it.hasNext(); ) { 1202 // the first part of the mapping had better not be in the standardTerritories 1203 String key = it.next(); 1204 Map<String, String> submap = aliases.get(key); 1205 if (key.equals("territoryAlias")) { 1206 checkEqual(key, submap, changedTerritory); 1207 } else if (key.equals("languageAlias")) { 1208 for (Iterator<String> it2 = submap.keySet().iterator(); it2.hasNext(); ) { 1209 String k = it2.next(); 1210 String value = submap.get(k); 1211 if (value.indexOf("_") >= 0) it2.remove(); 1212 } 1213 checkEqual(key, submap, changedLanguage); 1214 } 1215 } 1216 } 1217 1218 /** */ checkEqual(String title, Map map1, Map map2)1219 private void checkEqual(String title, Map map1, Map map2) { 1220 Set foo = new TreeSet(map1.keySet()); 1221 foo.removeAll(map2.keySet()); 1222 if (!foo.isEmpty()) errln("Extraneous Aliases: " + title + "\t" + foo); 1223 foo = new TreeSet(map2.keySet()); 1224 foo.removeAll(map1.keySet()); 1225 if (!foo.isEmpty()) errln("Missing Aliases: " + title + "\t" + foo); 1226 foo = map2.keySet(); 1227 foo.retainAll(map1.keySet()); 1228 for (Iterator it = foo.iterator(); it.hasNext(); ) { 1229 Object key = it.next(); 1230 Object result1 = map1.get(key); 1231 Object result2 = map2.get(key); 1232 if (!result1.equals(result2)) 1233 errln("Missing Aliases: " + title + "\t" + key + "\t" + result1 + " != " + result2); 1234 } 1235 } 1236 1237 /** Test that the zone ids are well-formed. */ TestZones()1238 public void TestZones() { 1239 StandardCodes sc = StandardCodes.make(); 1240 1241 Map<String, String> defaultNames = new TreeMap(); 1242 Map<String, String> old_new = sc.getZoneLinkold_new(); 1243 Set<String> core = sc.getZoneData().keySet(); 1244 logln("Checking for collisions with last field"); 1245 for (Iterator<String> it = core.iterator(); it.hasNext(); ) { 1246 String currentItem = it.next(); 1247 String defaultName = TimezoneFormatter.getFallbackName(currentItem); 1248 String fullName = defaultNames.get(defaultName); 1249 if (fullName == null) defaultNames.put(defaultName, currentItem); 1250 else { 1251 errln("Collision between: " + currentItem + " AND " + fullName); 1252 } 1253 } 1254 1255 logln("Checking that all links are TO canonical zones"); 1256 Set<String> s = new TreeSet<>(old_new.values()); 1257 s.removeAll(core); 1258 if (s.size() != 0) { 1259 errln("Links go TO zones that are not canonical! " + s); 1260 } 1261 1262 logln("Checking that no links are FROM canonical zones"); 1263 s = new TreeSet<>(core); 1264 s.retainAll(old_new.keySet()); 1265 if (s.size() != 0) { 1266 errln("Links go FROM zones that are canonical! " + s); 1267 } 1268 1269 logln("Checking that the zones with rule data are all canonical"); 1270 Set<String> zonesWithRules = sc.getZone_rules().keySet(); 1271 s.clear(); 1272 s.addAll(zonesWithRules); 1273 s.removeAll(core); 1274 if (s.size() != 0) logln("Zones with rules that are not canonical: " + s); 1275 1276 logln("Checking that the rule data are all canonical"); 1277 s.clear(); 1278 s.addAll(core); 1279 s.removeAll(zonesWithRules); 1280 s.removeAll(old_new.keySet()); 1281 if (s.size() != 0) logln("Canonical zones that don't have rules or links: " + s); 1282 1283 for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext(); ) { 1284 String oldItem = it.next(); 1285 logln("old: " + oldItem + "\tnew: " + old_new.get(oldItem)); 1286 } 1287 Map<String, Set<String>> new_old = new TreeMap<>(); 1288 for (Iterator<String> it = core.iterator(); it.hasNext(); ) { 1289 new_old.put(it.next(), new TreeSet<String>()); 1290 } 1291 for (Iterator<String> it = old_new.keySet().iterator(); it.hasNext(); ) { 1292 String oldItem = it.next(); 1293 String newItem = old_new.get(oldItem); 1294 Set<String> oldItems = new_old.get(newItem); 1295 if (oldItems == null) { // try recursing 1296 logln("!!!!Skipping " + oldItem + " \u2192 " + newItem); 1297 continue; 1298 // new_old.put(oldOne, oldItems = new TreeSet()); 1299 } 1300 oldItems.add(oldItem); 1301 } 1302 for (Iterator<String> it = new_old.keySet().iterator(); it.hasNext(); ) { 1303 String newOne = it.next(); 1304 Set<String> oldItems = new_old.get(newOne); 1305 logln(newOne + "\t" + oldItems); 1306 } 1307 } 1308 TestNarrowForms()1309 public void TestNarrowForms() { 1310 if (disableUntilLater("TestMinimalLocalization")) return; 1311 1312 for (Iterator<String> it = locales.iterator(); it.hasNext(); ) { 1313 String locale = it.next(); 1314 logln("Testing: " + getLocaleAndName(locale)); 1315 BreakIterator bi = BreakIterator.getCharacterInstance(new ULocale(locale)); 1316 CLDRFile item = cldrFactory.make(locale, false); 1317 // Walk through all the xpaths, adding to currentValues 1318 // Whenever two values for the same xpath are different, we remove from currentValues, 1319 // and add to okValues 1320 for (Iterator<String> it2 = item.iterator(); it2.hasNext(); ) { 1321 String xpath = it2.next(); 1322 if (xpath.indexOf("[@type=\"narrow\"]") >= 0) { 1323 String value = item.getStringValue(xpath); 1324 // logln("\tTesting: " + value + "\t path: " + xpath); 1325 int end = getXGraphemeClusterBoundary(bi, value, 0); 1326 if (end == value.length()) continue; 1327 errln( 1328 getLocaleAndName(locale) 1329 + "\tillegal narrow value " 1330 + value 1331 + "\t path: " 1332 + xpath); 1333 surveyInfo.add( 1334 locale 1335 + "\t" 1336 + xpath 1337 + "\t'" 1338 + value 1339 + "' is too wide for a \"narrow\" value."); 1340 } 1341 } 1342 } 1343 } 1344 1345 static final UnicodeSet XGRAPHEME = new UnicodeSet("[[:mark:][:grapheme_extend:]]"); 1346 static final UnicodeSet DIGIT = new UnicodeSet("[:decimal_number:]"); 1347 getXGraphemeClusterBoundary(BreakIterator bi, String value, int start)1348 private int getXGraphemeClusterBoundary(BreakIterator bi, String value, int start) { 1349 if (value.length() <= 1) return 1; 1350 1351 bi.setText(value); 1352 if (start != 0) bi.preceding(start + 1); // backup one 1353 int current = bi.next(); 1354 // link any digits 1355 if (DIGIT.contains(UTF16.charAt(value, current - 1))) { 1356 current = DIGIT.findIn(value, current, true); 1357 } 1358 // continue collecting any additional characters that are M or grapheme extend 1359 return XGRAPHEME.findIn(value, current, true); 1360 } 1361 } 1362