1 /* 2 ****************************************************************************** 3 * Copyright (C) 2004-2013, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 package org.unicode.cldr.tool; 8 9 import com.google.common.base.Joiner; 10 import com.google.common.base.Splitter; 11 import com.ibm.icu.dev.tool.shared.UOption; 12 import com.ibm.icu.impl.Utility; 13 import com.ibm.icu.text.Collator; 14 import com.ibm.icu.text.DateTimePatternGenerator; 15 import com.ibm.icu.text.DateTimePatternGenerator.VariableField; 16 import com.ibm.icu.text.Normalizer; 17 import com.ibm.icu.text.NumberFormat; 18 import com.ibm.icu.text.UnicodeSet; 19 import com.ibm.icu.util.ICUException; 20 import com.ibm.icu.util.Output; 21 import com.ibm.icu.util.ULocale; 22 import java.io.File; 23 import java.io.PrintWriter; 24 import java.util.ArrayList; 25 import java.util.Arrays; 26 import java.util.EnumMap; 27 import java.util.HashMap; 28 import java.util.HashSet; 29 import java.util.Iterator; 30 import java.util.LinkedHashMap; 31 import java.util.LinkedHashSet; 32 import java.util.List; 33 import java.util.Map; 34 import java.util.Map.Entry; 35 import java.util.Objects; 36 import java.util.Set; 37 import java.util.TreeMap; 38 import java.util.TreeSet; 39 import java.util.regex.Matcher; 40 import java.util.regex.Pattern; 41 import org.unicode.cldr.draft.FileUtilities; 42 import org.unicode.cldr.test.CLDRTest; 43 import org.unicode.cldr.test.CoverageLevel2; 44 import org.unicode.cldr.test.DisplayAndInputProcessor; 45 import org.unicode.cldr.test.QuickCheck; 46 import org.unicode.cldr.test.SubmissionLocales; 47 import org.unicode.cldr.util.Annotations; 48 import org.unicode.cldr.util.CLDRConfig; 49 import org.unicode.cldr.util.CLDRFile; 50 import org.unicode.cldr.util.CLDRFile.DraftStatus; 51 import org.unicode.cldr.util.CLDRFile.ExemplarType; 52 import org.unicode.cldr.util.CLDRFile.NumberingSystem; 53 import org.unicode.cldr.util.CLDRFile.WinningChoice; 54 import org.unicode.cldr.util.CLDRLocale; 55 import org.unicode.cldr.util.CLDRPaths; 56 import org.unicode.cldr.util.CLDRTool; 57 import org.unicode.cldr.util.CldrUtility; 58 import org.unicode.cldr.util.DateTimeCanonicalizer; 59 import org.unicode.cldr.util.DateTimeCanonicalizer.DateTimePatternType; 60 import org.unicode.cldr.util.DowngradePaths; 61 import org.unicode.cldr.util.DtdData; 62 import org.unicode.cldr.util.DtdType; 63 import org.unicode.cldr.util.Factory; 64 import org.unicode.cldr.util.FileProcessor; 65 import org.unicode.cldr.util.GlossonymConstructor; 66 import org.unicode.cldr.util.LanguageTagParser; 67 import org.unicode.cldr.util.Level; 68 import org.unicode.cldr.util.LocaleIDParser; 69 import org.unicode.cldr.util.LocaleNames; 70 import org.unicode.cldr.util.LogicalGrouping; 71 import org.unicode.cldr.util.PathChecker; 72 import org.unicode.cldr.util.PatternCache; 73 import org.unicode.cldr.util.RegexLookup; 74 import org.unicode.cldr.util.RegexUtilities; 75 import org.unicode.cldr.util.SimpleFactory; 76 import org.unicode.cldr.util.StandardCodes; 77 import org.unicode.cldr.util.StringId; 78 import org.unicode.cldr.util.SupplementalDataInfo; 79 // import org.unicode.cldr.util.Log; 80 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 81 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 82 import org.unicode.cldr.util.VoteResolver; 83 import org.unicode.cldr.util.XMLSource; 84 import org.unicode.cldr.util.XPathParts; 85 import org.unicode.cldr.util.XPathParts.Comments; 86 import org.unicode.cldr.util.XPathParts.Comments.CommentType; 87 88 /** 89 * Tool for applying modifications to the CLDR files. Use -h to see the options. 90 * 91 * <p>There are some environment variables that can be used with the program <br> 92 * -DSHOW_FILES=<anything> shows all create/open of files. 93 */ 94 @CLDRTool( 95 alias = "modify", 96 description = 97 "Tool for applying modifications to the CLDR files. Use -h to see the options.") 98 public class CLDRModify { 99 static final String DEBUG_PATHS = null; // ".*currency.*"; 100 static final boolean COMMENT_REMOVALS = false; // append removals as comments 101 static final UnicodeSet whitespace = new UnicodeSet("[:whitespace:]").freeze(); 102 static final UnicodeSet HEX = new UnicodeSet("[a-fA-F0-9]").freeze(); 103 private static final DtdData dtdData = DtdData.getInstance(DtdType.ldml); 104 105 // TODO make this into input option. 106 107 enum ConfigKeys { 108 action, 109 locale, 110 path, 111 value, 112 new_path, 113 new_value 114 } 115 116 enum ConfigAction { 117 /** Remove a path */ 118 delete, 119 /** Add a path/value */ 120 add, 121 /** Replace a path/value. Equals 'add' but tests selected paths */ 122 replace, 123 /** Add a a path/value. Equals 'add' but tests that path did NOT exist */ 124 addNew, 125 } 126 127 static final class ConfigMatch { 128 final String exactMatch; 129 final Matcher regexMatch; // doesn't have to be thread safe 130 final ConfigAction action; 131 final boolean hexPath; 132 ConfigMatch(ConfigKeys key, String match)133 public ConfigMatch(ConfigKeys key, String match) { 134 if (key == ConfigKeys.action) { 135 exactMatch = null; 136 regexMatch = null; 137 action = ConfigAction.valueOf(match); 138 hexPath = false; 139 } else if (match.startsWith("/") && match.endsWith("/")) { 140 if (key != ConfigKeys.locale && key != ConfigKeys.path && key != ConfigKeys.value) { 141 throw new IllegalArgumentException("Regex only allowed for old path/value."); 142 } 143 exactMatch = null; 144 regexMatch = 145 PatternCache.get( 146 match.substring(1, match.length() - 1) 147 .replace("[@", "\\[@")) 148 .matcher(""); 149 action = null; 150 hexPath = false; 151 } else { 152 exactMatch = match; 153 regexMatch = null; 154 action = null; 155 hexPath = 156 (key == ConfigKeys.new_path || key == ConfigKeys.path) 157 && HEX.containsAll(match); 158 } 159 } 160 matches(String other)161 public boolean matches(String other) { 162 if (exactMatch == null) { 163 return regexMatch.reset(other).find(); 164 } else if (hexPath) { 165 // convert path to id for comparison 166 return exactMatch.equals(StringId.getHexId(other)); 167 } else { 168 return exactMatch.equals(other); 169 } 170 } 171 172 @Override toString()173 public String toString() { 174 return action != null 175 ? action.toString() 176 : exactMatch == null 177 ? regexMatch.toString() 178 : hexPath ? "*" + exactMatch + "*" : exactMatch; 179 } 180 getPath(CLDRFile cldrFileToFilter)181 public String getPath(CLDRFile cldrFileToFilter) { 182 if (!hexPath) { 183 return exactMatch; 184 } 185 // ensure that we have all the possible paths cached 186 String path = StringId.getStringFromHexId(exactMatch); 187 if (path == null) { 188 for (String eachPath : cldrFileToFilter.fullIterable()) { 189 StringId.getHexId(eachPath); 190 } 191 path = StringId.getStringFromHexId(exactMatch); 192 if (path == null) { 193 throw new IllegalArgumentException("No path for hex id: " + exactMatch); 194 } 195 } 196 return path; 197 } 198 getModified( ConfigMatch valueMatch, String value, ConfigMatch newValue)199 public static String getModified( 200 ConfigMatch valueMatch, String value, ConfigMatch newValue) { 201 if (valueMatch == null) { // match anything 202 if (newValue != null && newValue.exactMatch != null) { 203 return newValue.exactMatch; 204 } 205 if (value != null) { 206 return value; 207 } 208 throw new IllegalArgumentException("Can't have both old and new be null."); 209 } else if (valueMatch.exactMatch == null) { // regex 210 if (newValue == null || newValue.exactMatch == null) { 211 throw new IllegalArgumentException("Can't have regex without replacement."); 212 } 213 StringBuffer buffer = new StringBuffer(); 214 valueMatch.regexMatch.appendReplacement(buffer, newValue.exactMatch); 215 return buffer.toString(); 216 } else { 217 return newValue.exactMatch != null ? newValue.exactMatch : value; 218 } 219 } 220 } 221 222 static FixList fixList = new FixList(); 223 224 private static final int HELP1 = 0, 225 HELP2 = 1, 226 SOURCEDIR = 2, 227 DESTDIR = 3, 228 MATCH = 4, 229 JOIN = 5, 230 MINIMIZE = 6, 231 FIX = 7, 232 JOIN_ARGS = 8, 233 VET_ADD = 9, 234 RESOLVE = 10, 235 PATH = 11, 236 USER = 12, 237 ALL_DIRS = 13, 238 CHECK = 14, 239 KONFIG = 15, 240 RETAIN = 16; 241 242 private static final UOption[] options = { 243 UOption.HELP_H(), 244 UOption.HELP_QUESTION_MARK(), 245 UOption.SOURCEDIR().setDefault(CLDRPaths.MAIN_DIRECTORY), 246 UOption.DESTDIR().setDefault(CLDRPaths.GEN_DIRECTORY + "cldrModify/"), 247 UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"), 248 UOption.create("join", 'j', UOption.OPTIONAL_ARG), 249 UOption.create("minimize", 'r', UOption.NO_ARG), 250 UOption.create("fix", 'f', UOption.OPTIONAL_ARG), 251 UOption.create("join-args", 'i', UOption.OPTIONAL_ARG), 252 UOption.create("vet", 'v', UOption.OPTIONAL_ARG), 253 UOption.create("resolve", 'z', UOption.OPTIONAL_ARG), 254 UOption.create("path", 'p', UOption.REQUIRES_ARG), 255 UOption.create("user", 'u', UOption.REQUIRES_ARG), 256 UOption.create("all", 'a', UOption.REQUIRES_ARG), 257 UOption.create("check", 'c', UOption.NO_ARG), 258 UOption.create("konfig", 'k', UOption.OPTIONAL_ARG).setDefault("modify_config.txt"), 259 UOption.create("Retain", 'R', UOption.NO_ARG), 260 }; 261 262 private static final UnicodeSet allMergeOptions = new UnicodeSet("[rcd]"); 263 264 static final String HELP_TEXT1 = 265 "Use the following options" 266 + XPathParts.NEWLINE 267 + "-h or -?\t for this message" 268 + XPathParts.NEWLINE 269 + "-" 270 + options[SOURCEDIR].shortName 271 + "\t source directory. Default = -s" 272 + CldrUtility.getCanonicalName(CLDRPaths.MAIN_DIRECTORY) 273 + XPathParts.NEWLINE 274 + "\tExample:-sC:\\Unicode-CVS2\\cldr\\common\\gen\\source\\" 275 + XPathParts.NEWLINE 276 + "-" 277 + options[DESTDIR].shortName 278 + "\t destination directory. Default = -d" 279 + CldrUtility.getCanonicalName(CLDRPaths.GEN_DIRECTORY + "main/") 280 + XPathParts.NEWLINE 281 + "-m<regex>\t to restrict the locales to what matches <regex>" 282 + XPathParts.NEWLINE 283 + "-j<merge_dir>/X'\t to merge two sets of files together (from <source_dir>/X and <merge_dir>/X', " 284 + XPathParts.NEWLINE 285 + "\twhere * in X' is replaced by X)." 286 + XPathParts.NEWLINE 287 + "\tExample:-jC:\\Unicode-CVS2\\cldr\\dropbox\\to_be_merged\\missing\\missing_*" 288 + XPathParts.NEWLINE 289 + "-i\t merge arguments:" 290 + XPathParts.NEWLINE 291 + "\tr\t replace contents (otherwise new data will be draft=\"unconfirmed\")" 292 + XPathParts.NEWLINE 293 + "\tc\t ignore comments in <merge_dir> files" 294 + XPathParts.NEWLINE 295 + "-v\t incorporate vetting information, and generate diff files." 296 + XPathParts.NEWLINE 297 + "-z\t generate resolved files" 298 + XPathParts.NEWLINE 299 + "-p\t set path for -fx" 300 + XPathParts.NEWLINE 301 + "-u\t set user for -fb" 302 + XPathParts.NEWLINE 303 + "-a\t pattern: recurse over all subdirectories that match pattern" 304 + XPathParts.NEWLINE 305 + "-c\t check that resulting xml files are valid. Requires that a dtd directory be copied to the output directory, in the appropriate location." 306 + XPathParts.NEWLINE 307 + "-k\t config_file\twith -fk perform modifications according to what is in the config file. For format details, see:" 308 + XPathParts.NEWLINE 309 + "\t\thttp://cldr.unicode.org/development/cldr-big-red-switch/cldrmodify-passes/cldrmodify-config." 310 + XPathParts.NEWLINE 311 + "-R\t retain unchanged files" 312 + XPathParts.NEWLINE 313 + "-f\t to perform various fixes on the files (add following arguments to specify which ones, eg -fxi)" 314 + XPathParts.NEWLINE; 315 316 static final String HELP_TEXT2 = 317 "Note: A set of bat files are also generated in <dest_dir>/diff. They will invoke a comparison program on the results." 318 + XPathParts.NEWLINE; 319 private static final boolean SHOW_DETAILS = false; 320 private static boolean SHOW_PROCESSING = false; 321 322 static String sourceInput; 323 324 /** Picks options and executes. Use -h to see options. */ main(String[] args)325 public static void main(String[] args) throws Exception { 326 long startTime = System.currentTimeMillis(); 327 UOption.parseArgs(args, options); 328 if (options[HELP1].doesOccur || options[HELP2].doesOccur) { 329 System.out.println(HELP_TEXT1 + fixList.showHelp() + HELP_TEXT2); 330 return; 331 } 332 checkSuboptions(FIX, fixList.getOptions()); 333 checkSuboptions(JOIN_ARGS, allMergeOptions); 334 String recurseOnDirectories = options[ALL_DIRS].value; 335 boolean makeResolved = options[RESOLVE].doesOccur; // Utility.COMMON_DIRECTORY + "main/"; 336 337 sourceInput = options[SOURCEDIR].value; 338 String destInput = options[DESTDIR].value; 339 if (recurseOnDirectories != null) { 340 sourceInput = removeSuffix(sourceInput, "main/", "main"); 341 destInput = removeSuffix(destInput, "main/", "main"); 342 } 343 String sourceDirBase = 344 CldrUtility.checkValidDirectory(sourceInput); // Utility.COMMON_DIRECTORY + "main/"; 345 String targetDirBase = 346 CldrUtility.checkValidDirectory(destInput); // Utility.GEN_DIRECTORY + "main/"; 347 System.out.format("Source:\t%s\n", sourceDirBase); 348 System.out.format("Target:\t%s\n", targetDirBase); 349 350 boolean retainUnchangedFiles = options[RETAIN].doesOccur; 351 352 Set<String> dirSet = new TreeSet<>(); 353 if (recurseOnDirectories == null) { 354 dirSet.add(""); 355 } else { 356 String[] subdirs = new File(sourceDirBase).list(); 357 Matcher subdirMatch = PatternCache.get(recurseOnDirectories).matcher(""); 358 for (String subdir : subdirs) { 359 if (!subdirMatch.reset(subdir).find()) continue; 360 dirSet.add(subdir + "/"); 361 } 362 } 363 for (String dir : dirSet) { 364 String sourceDir = sourceDirBase + dir; 365 if (!new File(sourceDir).isDirectory()) continue; 366 String targetDir = targetDirBase + dir; 367 try { 368 Factory cldrFactoryForAvailable = Factory.make(sourceDir, ".*"); 369 Factory cldrFactory = cldrFactoryForAvailable; 370 // Need root.xml or else cannot load resolved locales. 371 /* 372 * TODO: when seed and common are merged per https://unicode-org.atlassian.net/browse/CLDR-6396 373 * this will become moot; in the meantime it became necessary to do this not only for "Q" 374 * but also for "p" per https://unicode-org.atlassian.net/browse/CLDR-15054 375 */ 376 if (sourceDir.endsWith("/seed/annotations/") && "Q".equals(options[FIX].value)) { 377 System.err.println( 378 "Correcting factory so that annotations can load, including " 379 + CLDRPaths.ANNOTATIONS_DIRECTORY); 380 final File[] paths = { 381 new File(sourceDir), 382 new File(CLDRPaths.ANNOTATIONS_DIRECTORY) // common/annotations - to load 383 // root.xml 384 }; 385 cldrFactory = SimpleFactory.make(paths, ".*"); 386 } else if (sourceDir.contains("/seed/") && "p".equals(options[FIX].value)) { 387 System.err.println("Correcting factory to enable getting root"); 388 final File[] paths = { 389 new File(sourceDir), 390 new File(CLDRPaths.ANNOTATIONS_DIRECTORY), // to load 391 // common/annotations/root.xml 392 new File(CLDRPaths.MAIN_DIRECTORY) // to load common/main/root.xml 393 }; 394 cldrFactory = SimpleFactory.make(paths, ".*"); 395 } else { 396 System.err.println("!!! " + sourceDir); 397 } 398 399 if (options[VET_ADD].doesOccur) { 400 VettingAdder va = new VettingAdder(options[VET_ADD].value); 401 va.showFiles(cldrFactory, targetDir); 402 return; 403 } 404 405 Factory mergeFactory = null; 406 407 String join_prefix = "", join_postfix = ""; 408 if (options[JOIN].doesOccur) { 409 String mergeDir = options[JOIN].value; 410 File temp = new File(mergeDir); 411 mergeDir = 412 CldrUtility.checkValidDirectory( 413 temp.getParent() + File.separator); // Utility.COMMON_DIRECTORY 414 // + "main/"; 415 String filename = temp.getName(); 416 join_prefix = join_postfix = ""; 417 int pos = filename.indexOf("*"); 418 if (pos >= 0) { 419 join_prefix = filename.substring(0, pos); 420 join_postfix = filename.substring(pos + 1); 421 } 422 mergeFactory = Factory.make(mergeDir, ".*"); 423 } 424 Set<String> locales = new TreeSet<>(cldrFactoryForAvailable.getAvailable()); 425 if (mergeFactory != null) { 426 Set<String> temp = new TreeSet<>(mergeFactory.getAvailable()); 427 Set<String> locales3 = new TreeSet<>(); 428 for (String locale : temp) { 429 if (!locale.startsWith(join_prefix) || !locale.endsWith(join_postfix)) 430 continue; 431 locales3.add( 432 locale.substring( 433 join_prefix.length(), 434 locale.length() - join_postfix.length())); 435 } 436 locales.retainAll(locales3); 437 System.out.println("Merging: " + locales3); 438 } 439 new CldrUtility.MatcherFilter(options[MATCH].value).retainAll(locales); 440 441 fixList.handleSetup(); 442 443 long lastTime = System.currentTimeMillis(); 444 int spin = 0; 445 System.out.format(locales.size() + " Locales:\t%s\n", locales.toString()); 446 int totalRemoved = 0; 447 for (String test : locales) { 448 spin++; 449 if (SHOW_PROCESSING) { 450 long now = System.currentTimeMillis(); 451 if (now - lastTime > 5000) { 452 System.out.println( 453 " .. still processing " 454 + test 455 + " [" 456 + spin 457 + "/" 458 + locales.size() 459 + "]"); 460 lastTime = now; 461 } 462 } 463 464 // TODO parameterize the directory and filter 465 466 final CLDRFile originalCldrFile = cldrFactory.make(test, makeResolved); 467 CLDRFile k = originalCldrFile.cloneAsThawed(); 468 if (DEBUG_PATHS != null) { 469 System.out.println("Debug1 (" + test + "):\t" + k.toString(DEBUG_PATHS)); 470 } 471 if (mergeFactory != null) { 472 int mergeOption = CLDRFile.MERGE_ADD_ALTERNATE; 473 CLDRFile toMergeIn = 474 mergeFactory 475 .make(join_prefix + test + join_postfix, false) 476 .cloneAsThawed(); 477 if (toMergeIn != null) { 478 if (options[JOIN_ARGS].doesOccur) { 479 if (options[JOIN_ARGS].value.indexOf("r") >= 0) 480 mergeOption = CLDRFile.MERGE_REPLACE_MY_DRAFT; 481 if (options[JOIN_ARGS].value.indexOf("d") >= 0) 482 mergeOption = CLDRFile.MERGE_REPLACE_MINE; 483 if (options[JOIN_ARGS].value.indexOf("c") >= 0) 484 toMergeIn.clearComments(); 485 if (options[JOIN_ARGS].value.indexOf("x") >= 0) 486 removePosix(toMergeIn); 487 } 488 toMergeIn.makeDraft(DraftStatus.contributed); 489 k.putAll(toMergeIn, mergeOption); 490 } 491 // special fix 492 k.removeComment( 493 " The following are strings that are not found in the locale (currently), but need valid translations for localizing timezones. "); 494 } 495 if (DEBUG_PATHS != null) { 496 System.out.println("Debug2 (" + test + "):\t" + k.toString(DEBUG_PATHS)); 497 } 498 if (options[FIX].doesOccur) { 499 fix(k, options[FIX].value, options[KONFIG].value, cldrFactory); 500 System.out.println("#TOTAL\tItems changed: " + fixList.totalChanged); 501 } 502 if (DEBUG_PATHS != null) { 503 System.out.println("Debug3 (" + test + "):\t" + k.toString(DEBUG_PATHS)); 504 } 505 if (DEBUG_PATHS != null) { 506 System.out.println("Debug4 (" + test + "):\t" + k.toString(DEBUG_PATHS)); 507 } 508 509 PrintWriter pw = FileUtilities.openUTF8Writer(targetDir, test + ".xml"); 510 String testPath = 511 "//ldml/dates/calendars/calendar[@type=\"persian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"abbreviated\"]/month[@type=\"1\"]"; 512 if (false) { 513 System.out.println("Printing Raw File:"); 514 testPath = 515 "//ldml/dates/calendars/calendar[@type=\"persian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"abbreviated\"]/alias"; 516 System.out.println(k.getStringValue(testPath)); 517 TreeSet s = new TreeSet(); 518 k.forEach(s::add); 519 520 System.out.println(k.getStringValue(testPath)); 521 Set orderedSet = new TreeSet(k.getComparator()); 522 k.forEach(orderedSet::add); 523 for (Iterator it3 = orderedSet.iterator(); it3.hasNext(); ) { 524 String path = (String) it3.next(); 525 if (path.equals(testPath)) { 526 System.out.println("huh?"); 527 } 528 String value = k.getStringValue(path); 529 String fullpath = k.getFullXPath(path); 530 System.out.println("\t=\t" + fullpath); 531 System.out.println("\t=\t" + value); 532 } 533 System.out.println("Done Printing Raw File:"); 534 } 535 536 k.write(pw); 537 pw.close(); 538 539 File oldFile = new File(sourceDir, test + ".xml"); 540 File newFile = new File(targetDir, test + ".xml"); 541 if (!retainUnchangedFiles 542 && !oldFile.equals( 543 newFile) // only skip if the source & target are different. 544 && equalsSkippingCopyright(oldFile, newFile)) { 545 newFile.delete(); 546 continue; 547 } 548 549 if (options[CHECK].doesOccur) { 550 QuickCheck.check(new File(targetDir, test + ".xml")); 551 } 552 } 553 if (totalSkeletons.size() != 0) { 554 System.out.println("Total Skeletons" + totalSkeletons); 555 } 556 if (totalRemoved > 0) { 557 System.out.println("# Removed:\t" + totalRemoved); 558 } 559 } finally { 560 fixList.handleCleanup(); 561 System.out.println( 562 "Done -- Elapsed time: " 563 + ((System.currentTimeMillis() - startTime) / 60000.0) 564 + " minutes"); 565 } 566 } 567 } 568 equalsSkippingCopyright(File oldFile, File newFile)569 public static boolean equalsSkippingCopyright(File oldFile, File newFile) { 570 Iterator<String> oldIterator = FileUtilities.in(oldFile).iterator(); 571 Iterator<String> newIterator = FileUtilities.in(newFile).iterator(); 572 while (true) { 573 boolean oldHasNext = oldIterator.hasNext(); 574 boolean newHasNext = newIterator.hasNext(); 575 if (oldHasNext != newHasNext) { 576 return false; 577 } 578 if (!oldHasNext) { 579 return true; 580 } 581 String oldLine = oldIterator.next(); 582 String newLine = newIterator.next(); 583 if (!oldLine.equals(newLine)) { 584 if (oldLine.startsWith("<!-- Copyright ©") 585 && newLine.startsWith("<!-- Copyright ©")) { 586 continue; 587 } 588 return false; 589 } 590 } 591 } 592 removeSuffix(String value, String... suffices)593 private static String removeSuffix(String value, String... suffices) { 594 for (String suffix : suffices) { 595 if (value.endsWith(suffix)) { 596 return value.substring(0, value.length() - suffix.length()); 597 } 598 } 599 return value; 600 } 601 602 /* 603 * Use the coverage to determine what we should keep in the case of a locale just below root. 604 */ 605 606 static class RetainWhenMinimizing implements CLDRFile.RetentionTest { 607 private CLDRFile file; 608 private CLDRLocale c; 609 private boolean isArabicSublocale; 610 setParentFile(CLDRFile file)611 public RetainWhenMinimizing setParentFile(CLDRFile file) { 612 this.file = file; 613 this.c = CLDRLocale.getInstance(file.getLocaleIDFromIdentity()); 614 isArabicSublocale = "ar".equals(c.getLanguage()) && !"001".equals(c.getCountry()); 615 return this; 616 } 617 618 @Override getRetention(String path)619 public Retention getRetention(String path) { 620 if (path.startsWith("//ldml/identity/")) { 621 return Retention.RETAIN; 622 } 623 // special case for Arabic 624 if (isArabicSublocale && path.startsWith("//ldml/numbers/defaultNumberingSystem")) { 625 return Retention.RETAIN; 626 } 627 String localeId = file.getSourceLocaleID(path, null); 628 if ((c.isLanguageLocale() || c.equals(CLDRLocale.getInstance("pt_PT"))) 629 && (XMLSource.ROOT_ID.equals(localeId) 630 || XMLSource.CODE_FALLBACK_ID.equals(localeId))) { 631 return Retention.RETAIN; 632 } 633 return Retention.RETAIN_IF_DIFFERENT; 634 } 635 } 636 637 static final Splitter COMMA_SEMI = 638 Splitter.on(Pattern.compile("[,;|]")).trimResults().omitEmptyStrings(); 639 protected static final boolean NUMBER_SYSTEM_HACK = true; 640 checkSuboptions(int i, UnicodeSet allowedOptions)641 private static void checkSuboptions(int i, UnicodeSet allowedOptions) { 642 UOption givenOptions = options[i]; 643 if (givenOptions.doesOccur && !allowedOptions.containsAll(givenOptions.value)) { 644 throw new IllegalArgumentException( 645 "Illegal sub-options for " 646 + givenOptions.shortName 647 + ": " 648 + new UnicodeSet().addAll(givenOptions.value).removeAll(allowedOptions) 649 + CldrUtility.LINE_SEPARATOR 650 + "Use -? for help."); 651 } 652 if (i == FIX && givenOptions.value != null) { 653 final UnicodeSet allowedFilters = new UnicodeSet().add('P').add('Q').add('V'); 654 for (char c : givenOptions.value.toCharArray()) { 655 if (!allowedFilters.contains(c)) { 656 throw new IllegalArgumentException( 657 "The filter " + c + " is currently disabled, see CLDR-17144"); 658 } 659 } 660 } 661 } 662 removePosix(CLDRFile toMergeIn)663 private static void removePosix(CLDRFile toMergeIn) { 664 Set<String> toRemove = new HashSet<>(); 665 for (String xpath : toMergeIn) { 666 if (xpath.startsWith("//ldml/posix")) toRemove.add(xpath); 667 } 668 toMergeIn.removeAll(toRemove, false); 669 } 670 671 static PathChecker pathChecker = new PathChecker(); 672 673 /** Implementation for a certain type of filter. Each filter has a letter associated with it. */ 674 abstract static class CLDRFilter { 675 protected CLDRFile cldrFileToFilter; 676 protected CLDRFile cldrFileToFilterResolved; 677 private String localeID; 678 protected Set<String> availableChildren; 679 private Set<String> toBeRemoved; 680 private CLDRFile toBeReplaced; 681 protected Factory factory; 682 protected int countChanges; 683 684 /** 685 * Called when a new locale is being processed 686 * 687 * @param k 688 * @param factory 689 * @param removal 690 * @param replacements 691 */ setFile( CLDRFile k, Factory factory, Set<String> removal, CLDRFile replacements)692 public final void setFile( 693 CLDRFile k, Factory factory, Set<String> removal, CLDRFile replacements) { 694 this.cldrFileToFilter = k; 695 cldrFileToFilterResolved = null; 696 this.factory = factory; 697 localeID = k.getLocaleID(); 698 this.toBeRemoved = removal; 699 this.toBeReplaced = replacements; 700 countChanges = 0; 701 handleStart(); 702 } 703 704 /** Called by setFile() before all processing for a file */ handleStart()705 public void handleStart() {} 706 707 /** 708 * Called for each xpath 709 * 710 * @param xpath 711 */ handlePath(String xpath)712 public abstract void handlePath(String xpath); 713 714 /** Called after all xpaths in this file are handled */ handleEnd()715 public void handleEnd() {} 716 getResolved()717 public CLDRFile getResolved() { 718 if (cldrFileToFilterResolved == null) { 719 if (cldrFileToFilter.isResolved()) { 720 cldrFileToFilterResolved = cldrFileToFilter; 721 } else { 722 cldrFileToFilterResolved = factory.make(cldrFileToFilter.getLocaleID(), true); 723 } 724 } 725 return cldrFileToFilterResolved; 726 } 727 show(String reason, String detail)728 public void show(String reason, String detail) { 729 System.out.println("%" + localeID + "\t" + reason + "\tConsidering " + detail); 730 } 731 retain(String path, String reason)732 public void retain(String path, String reason) { 733 System.out.println( 734 "%" 735 + localeID 736 + "\t" 737 + reason 738 + "\tRetaining: " 739 + cldrFileToFilter.getStringValue(path) 740 + "\t at: " 741 + path); 742 } 743 remove(String path)744 public void remove(String path) { 745 remove(path, "-"); 746 } 747 remove(String path, String reason)748 public void remove(String path, String reason) { 749 if (toBeRemoved.contains(path)) return; 750 toBeRemoved.add(path); 751 String oldValueOldPath = cldrFileToFilter.getStringValue(path); 752 showAction(reason, "Removing", oldValueOldPath, null, null, path, path); 753 } 754 replace(String oldFullPath, String newFullPath, String newValue)755 public void replace(String oldFullPath, String newFullPath, String newValue) { 756 replace(oldFullPath, newFullPath, newValue, "-"); 757 } 758 showAction( String reason, String action, String oldValueOldPath, String oldValueNewPath, String newValue, String oldFullPath, String newFullPath)759 public void showAction( 760 String reason, 761 String action, 762 String oldValueOldPath, 763 String oldValueNewPath, 764 String newValue, 765 String oldFullPath, 766 String newFullPath) { 767 System.out.println( 768 "%" 769 + localeID 770 + "\t" 771 + action 772 + "\t" 773 + reason 774 + "\t«" 775 + oldValueOldPath 776 + "»" 777 + (newFullPath.equals(oldFullPath) || oldValueNewPath == null 778 ? "" 779 : oldValueNewPath.equals(oldValueOldPath) 780 ? "/=" 781 : "/«" + oldValueNewPath + "»") 782 + "\t→\t" 783 + (newValue == null 784 ? "∅" 785 : newValue.equals(oldValueOldPath) ? "≡" : "«" + newValue + "»") 786 + "\t" 787 + oldFullPath 788 + (newFullPath.equals(oldFullPath) ? "" : "\t→\t" + newFullPath)); 789 ++countChanges; 790 } 791 792 /** 793 * There are the following cases, where: 794 * 795 * <pre> 796 * pathSame, new value null: Removing v p 797 * pathSame, new value not null: Replacing v v' p 798 * pathChanges, nothing at new path: Moving v p p' 799 * pathChanges, same value at new path: Replacing v v' p p' 800 * pathChanges, value changes: Overriding v v' p p' 801 * 802 * <pre> 803 * @param oldFullPath 804 * @param newFullPath 805 * @param newValue 806 * @param reason 807 */ replace( String oldFullPath, String newFullPath, String newValue, String reason)808 public void replace( 809 String oldFullPath, String newFullPath, String newValue, String reason) { 810 String oldValueOldPath = cldrFileToFilter.getStringValue(oldFullPath); 811 String temp = cldrFileToFilter.getFullXPath(oldFullPath); 812 if (temp != null) { 813 oldFullPath = temp; 814 } 815 boolean pathSame = oldFullPath.equals(newFullPath); 816 817 if (!pathChecker.checkPath(newFullPath)) { 818 throw new IllegalArgumentException("Bad path: " + newFullPath); 819 } 820 821 if (pathSame) { 822 if (newValue == null) { 823 remove(oldFullPath, reason); 824 } else if (oldValueOldPath == null) { 825 toBeReplaced.add(oldFullPath, newValue); 826 showAction( 827 reason, 828 "Adding", 829 oldValueOldPath, 830 null, 831 newValue, 832 oldFullPath, 833 newFullPath); 834 } else { 835 toBeReplaced.add(oldFullPath, newValue); 836 showAction( 837 reason, 838 "Replacing", 839 oldValueOldPath, 840 null, 841 newValue, 842 oldFullPath, 843 newFullPath); 844 } 845 return; 846 } 847 String oldValueNewPath = cldrFileToFilter.getStringValue(newFullPath); 848 toBeRemoved.add(oldFullPath); 849 toBeReplaced.add(newFullPath, newValue); 850 851 if (oldValueNewPath == null) { 852 showAction( 853 reason, 854 "Moving", 855 oldValueOldPath, 856 oldValueNewPath, 857 newValue, 858 oldFullPath, 859 newFullPath); 860 } else if (oldValueNewPath.equals(newValue)) { 861 showAction( 862 reason, 863 "Unchanged Value", 864 oldValueOldPath, 865 oldValueNewPath, 866 newValue, 867 oldFullPath, 868 newFullPath); 869 } else { 870 showAction( 871 reason, 872 "Overriding", 873 oldValueOldPath, 874 oldValueNewPath, 875 newValue, 876 oldFullPath, 877 newFullPath); 878 } 879 } 880 881 /** 882 * Adds a new path-value pair to the CLDRFile. 883 * 884 * @param path the new path 885 * @param value the value 886 * @param reason Reason for adding the path and value. 887 */ add(String path, String value, String reason)888 public void add(String path, String value, String reason) { 889 String oldValueOldPath = cldrFileToFilter.getStringValue(path); 890 if (oldValueOldPath == null) { 891 toBeRemoved.remove(path); 892 toBeReplaced.add(path, value); 893 showAction(reason, "Adding", oldValueOldPath, null, value, path, path); 894 } else { 895 replace(path, path, value); 896 } 897 } 898 getReplacementFile()899 public CLDRFile getReplacementFile() { 900 return toBeReplaced; 901 } 902 903 /** 904 * Called before all files are processed. Note: TODO: This is called unconditionally, 905 * whether the filter is enabled or not. It should only be called if the filter is enabled. 906 * Reference: https://unicode-org.atlassian.net/browse/CLDR-16343 907 */ handleSetup()908 public void handleSetup() {} 909 910 /** 911 * Called after all files are processed. Note: TODO: This is called unconditionally, whether 912 * the filter is enabled or not. It should only be called if the filter is enabled. 913 * Reference: https://unicode-org.atlassian.net/browse/CLDR-16343 914 */ handleCleanup()915 public void handleCleanup() {} 916 getLocaleID()917 public String getLocaleID() { 918 return localeID; 919 } 920 } 921 922 static class FixList { 923 // simple class, so we use quick list 924 CLDRFilter[] filters = new CLDRFilter[128]; // only ascii 925 String[] helps = new String[128]; // only ascii 926 UnicodeSet options = new UnicodeSet(); 927 String inputOptions = null; 928 int totalChanged = 0; 929 add(char letter, String help)930 void add(char letter, String help) { 931 add(letter, help, null); 932 } 933 handleSetup()934 public void handleSetup() { 935 for (int i = 0; i < filters.length; ++i) { 936 if (filters[i] != null) { 937 filters[i].handleSetup(); 938 } 939 } 940 } 941 handleCleanup()942 public void handleCleanup() { 943 for (int i = 0; i < filters.length; ++i) { 944 if (filters[i] != null) { 945 filters[i].handleCleanup(); 946 } 947 } 948 } 949 getOptions()950 public UnicodeSet getOptions() { 951 return options; 952 } 953 add(char letter, String help, CLDRFilter filter)954 void add(char letter, String help, CLDRFilter filter) { 955 if (helps[letter] != null) 956 throw new IllegalArgumentException("Duplicate letter: " + letter); 957 filters[letter] = filter; 958 helps[letter] = help; 959 options.add(letter); 960 } 961 setFile( CLDRFile file, String inputOptions, Factory factory, Set<String> removal, CLDRFile replacements)962 void setFile( 963 CLDRFile file, 964 String inputOptions, 965 Factory factory, 966 Set<String> removal, 967 CLDRFile replacements) { 968 this.inputOptions = inputOptions; 969 for (int i = 0; i < inputOptions.length(); ++i) { 970 char c = inputOptions.charAt(i); 971 if (filters[c] != null) { 972 try { 973 filters[c].setFile(file, factory, removal, replacements); 974 } catch (RuntimeException e) { 975 System.err.println("Failure in " + filters[c].localeID + "\t START"); 976 throw e; 977 } 978 } 979 } 980 } 981 handleStart()982 void handleStart() { 983 for (int i = 0; i < inputOptions.length(); ++i) { 984 char c = inputOptions.charAt(i); 985 if (filters[c] != null) { 986 try { 987 filters[c].handleStart(); 988 } catch (RuntimeException e) { 989 System.err.println("Failure in " + filters[c].localeID + "\t START"); 990 throw e; 991 } 992 } 993 } 994 } 995 handlePath(String xpath)996 void handlePath(String xpath) { 997 for (int i = 0; i < inputOptions.length(); ++i) { 998 char c = inputOptions.charAt(i); 999 if (filters[c] != null) { 1000 try { 1001 filters[c].handlePath(xpath); 1002 } catch (RuntimeException e) { 1003 System.err.println("Failure in " + filters[c].localeID + "\t " + xpath); 1004 throw e; 1005 } 1006 } 1007 } 1008 } 1009 handleEnd()1010 void handleEnd() { 1011 for (int i = 0; i < inputOptions.length(); ++i) { 1012 char c = inputOptions.charAt(i); 1013 if (filters[c] != null) { 1014 try { 1015 filters[c].handleEnd(); 1016 if (filters[c].countChanges != 0) { 1017 totalChanged += filters[c].countChanges; 1018 System.out.println( 1019 "#" 1020 + filters[c].localeID 1021 + "\tItems changed: " 1022 + filters[c].countChanges); 1023 } 1024 } catch (RuntimeException e) { 1025 System.err.println("Failure in " + filters[c].localeID + "\t START"); 1026 throw e; 1027 } 1028 } 1029 } 1030 } 1031 showHelp()1032 String showHelp() { 1033 String result = ""; 1034 for (int i = 0; i < filters.length; ++i) { 1035 if (helps[i] != null) { 1036 result += "\t" + (char) i + "\t " + helps[i] + XPathParts.NEWLINE; 1037 } 1038 } 1039 return result; 1040 } 1041 } 1042 1043 static Set<String> totalSkeletons = new HashSet<>(); 1044 1045 static Map<String, String> rootUnitMap = new HashMap<>(); 1046 1047 static { 1048 rootUnitMap.put("second", "s"); 1049 rootUnitMap.put("minute", "min"); 1050 rootUnitMap.put("hour", "h"); 1051 rootUnitMap.put("day", "d"); 1052 rootUnitMap.put("week", "w"); 1053 rootUnitMap.put("month", "m"); 1054 rootUnitMap.put("year", "y"); 1055 1056 fixList.add( 1057 'z', 1058 "Remove deprecated elements", 1059 new CLDRFilter() { 1060 1061 public boolean isDeprecated( 1062 DtdType type, String element, String attribute, String value) { 1063 return DtdData.getInstance(type).isDeprecated(element, attribute, value); 1064 } 1065 1066 public boolean isDeprecated(DtdType type, String path) { 1067 1068 XPathParts parts = XPathParts.getFrozenInstance(path); 1069 for (int i = 0; i < parts.size(); ++i) { 1070 String element = parts.getElement(i); 1071 if (isDeprecated(type, element, "*", "*")) { 1072 return true; 1073 } 1074 for (Entry<String, String> entry : parts.getAttributes(i).entrySet()) { 1075 String attribute = entry.getKey(); 1076 String value = entry.getValue(); 1077 if (isDeprecated(type, element, attribute, value)) { 1078 return true; 1079 } 1080 } 1081 } 1082 return false; 1083 } 1084 1085 @Override 1086 public void handlePath(String xpath) { 1087 String fullPath = cldrFileToFilter.getFullXPath(xpath); 1088 XPathParts parts = XPathParts.getFrozenInstance(fullPath); 1089 for (int i = 0; i < parts.size(); ++i) { 1090 String element = parts.getElement(i); 1091 if (dtdData.isDeprecated(element, "*", "*")) { 1092 remove(fullPath, "Deprecated element"); 1093 return; 1094 } 1095 for (Entry<String, String> entry : parts.getAttributes(i).entrySet()) { 1096 String attribute = entry.getKey(); 1097 String value = entry.getValue(); 1098 if (dtdData.isDeprecated(element, attribute, value)) { 1099 remove(fullPath, "Element with deprecated attribute(s)"); 1100 } 1101 } 1102 } 1103 } 1104 }); 1105 1106 fixList.add( 1107 'e', 1108 "fix Interindic", 1109 new CLDRFilter() { 1110 @Override 1111 public void handlePath(String xpath) { 1112 if (xpath.indexOf("=\"InterIndic\"") < 0) return; 1113 String v = cldrFileToFilter.getStringValue(xpath); 1114 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1115 XPathParts fullparts = XPathParts.getFrozenInstance(fullXPath); 1116 Map<String, String> attributes = fullparts.findAttributes("transform"); 1117 String oldValue = attributes.get("direction"); 1118 if ("both".equals(oldValue)) { 1119 attributes.put("direction", "forward"); 1120 replace(xpath, fullparts.toString(), v); 1121 } 1122 } 1123 }); 1124 1125 fixList.add( 1126 'B', 1127 "fix bogus values", 1128 new CLDRFilter() { 1129 RegexLookup<Integer> paths = 1130 RegexLookup.<Integer>of() 1131 .setPatternTransform(RegexLookup.RegexFinderTransformPath2) 1132 .add( 1133 "//ldml/localeDisplayNames/languages/language[@type='([^']*)']", 1134 0) 1135 .add( 1136 "//ldml/localeDisplayNames/scripts/script[@type='([^']*)']", 1137 0) 1138 .add( 1139 "//ldml/localeDisplayNames/territories/territory[@type='([^']*)']", 1140 0) 1141 .add("//ldml/dates/timeZoneNames/metazone[@type='([^']*)']", 0) 1142 .add( 1143 "//ldml/dates/timeZoneNames/zone[@type='([^']*)']/exemplarCity", 1144 0) 1145 .add( 1146 "//ldml/numbers/currencies/currency[@type='([^']*)']/displayName", 1147 0); 1148 Output<String[]> arguments = new Output<>(); 1149 CLDRFile english = CLDRConfig.getInstance().getEnglish(); 1150 boolean skip; 1151 1152 @Override 1153 public void handleStart() { 1154 CLDRFile resolved = factory.make(cldrFileToFilter.getLocaleID(), true); 1155 UnicodeSet exemplars = 1156 resolved.getExemplarSet(ExemplarType.main, WinningChoice.WINNING); 1157 skip = exemplars.containsSome('a', 'z'); 1158 // TODO add simpler way to skip file entirely 1159 } 1160 1161 @Override 1162 public void handlePath(String xpath) { 1163 if (skip) { 1164 return; 1165 } 1166 Integer lookupValue = paths.get(xpath, null, arguments); 1167 if (lookupValue == null) { 1168 return; 1169 } 1170 String type = arguments.value[1]; 1171 String value = cldrFileToFilter.getStringValue(xpath); 1172 if (value.equals(type)) { 1173 remove(xpath, "Matches code"); 1174 return; 1175 } 1176 String evalue = english.getStringValue(xpath); 1177 if (value.equals(evalue)) { 1178 remove(xpath, "Matches English"); 1179 return; 1180 } 1181 } 1182 }); 1183 1184 fixList.add( 1185 's', 1186 "fix alt accounting", 1187 new CLDRFilter() { 1188 @Override 1189 public void handlePath(String xpath) { 1190 XPathParts parts = XPathParts.getFrozenInstance(xpath); 1191 if (!parts.containsAttributeValue("alt", "accounting")) { 1192 return; 1193 } 1194 String oldFullXPath = cldrFileToFilter.getFullXPath(xpath); 1195 String value = cldrFileToFilter.getStringValue(xpath); 1196 XPathParts fullparts = 1197 XPathParts.getFrozenInstance(oldFullXPath) 1198 .cloneAsThawed(); // not frozen, for removeAttribute 1199 fullparts.removeAttribute("pattern", "alt"); 1200 fullparts.setAttribute("currencyFormat", "type", "accounting"); 1201 String newFullXPath = fullparts.toString(); 1202 replace( 1203 oldFullXPath, 1204 newFullXPath, 1205 value, 1206 "Move alt=accounting value to new path"); 1207 } 1208 }); 1209 1210 fixList.add( 1211 'n', 1212 "add unit displayName", 1213 new CLDRFilter() { 1214 @Override 1215 public void handlePath(String xpath) { 1216 if (xpath.indexOf("/units/unitLength[@type=\"long\"]") < 0 1217 || xpath.indexOf("/unitPattern[@count=\"other\"]") < 0 1218 || xpath.indexOf("[@draft=\"unconfirmed\"]") >= 0) { 1219 return; 1220 } 1221 String value = cldrFileToFilter.getStringValue(xpath); 1222 String newValue = null; 1223 if (value.startsWith("{0}")) { 1224 newValue = value.substring(3).trim(); 1225 } else if (value.endsWith("{0}")) { 1226 newValue = value.substring(0, value.length() - 3).trim(); 1227 } else { 1228 System.out.println( 1229 "unitPattern-other does not start or end with \"{0}\": \"" 1230 + value 1231 + "\""); 1232 return; 1233 } 1234 1235 String oldFullXPath = cldrFileToFilter.getFullXPath(xpath); 1236 String newFullXPath = 1237 oldFullXPath 1238 .substring(0, oldFullXPath.indexOf("unitPattern")) 1239 .concat("displayName[@draft=\"provisional\"]"); 1240 add( 1241 newFullXPath, 1242 newValue, 1243 "create unit displayName-long from unitPattern-long-other"); 1244 String newFullXPathShort = 1245 newFullXPath.replace("[@type=\"long\"]", "[@type=\"short\"]"); 1246 add( 1247 newFullXPathShort, 1248 newValue, 1249 "create unit displayName-short from unitPattern-long-other"); 1250 } 1251 }); 1252 1253 fixList.add( 1254 'x', 1255 "retain paths", 1256 new CLDRFilter() { 1257 Matcher m = null; 1258 1259 @Override 1260 public void handlePath(String xpath) { 1261 if (m == null) { 1262 m = PatternCache.get(options[PATH].value).matcher(""); 1263 } 1264 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1265 if (!m.reset(fullXPath).matches()) { 1266 remove(xpath); 1267 } 1268 } 1269 }); 1270 1271 fixList.add( 1272 'l', 1273 "change language code", 1274 new CLDRFilter() { 1275 private CLDRFile resolved; 1276 1277 @Override 1278 public void handleStart() { 1279 resolved = factory.make(cldrFileToFilter.getLocaleID(), true); 1280 } 1281 1282 @Override 1283 public void handlePath(String xpath) { 1284 if (!xpath.contains("/language")) { 1285 return; 1286 } 1287 XPathParts parts = XPathParts.getFrozenInstance(xpath); 1288 String languageCode = parts.findAttributeValue("language", "type"); 1289 String v = resolved.getStringValue(xpath); 1290 if (!languageCode.equals("swc")) { 1291 return; 1292 } 1293 parts = parts.cloneAsThawed(); 1294 parts.setAttribute("language", "type", "sw_CD"); 1295 replace(xpath, parts.toString(), v); 1296 } 1297 }); 1298 1299 fixList.add( 1300 'g', 1301 "Swap alt/non-alt values for Czechia", 1302 new CLDRFilter() { 1303 1304 @Override 1305 public void handleStart() {} 1306 1307 @Override 1308 public void handlePath(String xpath) { 1309 XPathParts parts = XPathParts.getFrozenInstance(xpath); 1310 if (!parts.containsAttributeValue("alt", "variant") 1311 || !parts.containsAttributeValue("type", "CZ")) { 1312 return; 1313 } 1314 String variantValue = cldrFileToFilter.getStringValue(xpath); 1315 String nonVariantXpath = xpath.replaceAll("\\[\\@alt=\"variant\"\\]", ""); 1316 String nonVariantValue = cldrFileToFilter.getStringValue(nonVariantXpath); 1317 replace(xpath, xpath, nonVariantValue); 1318 replace(nonVariantXpath, nonVariantXpath, variantValue); 1319 } 1320 }); 1321 1322 fixList.add( 1323 'u', 1324 "fix duration unit patterns", 1325 new CLDRFilter() { 1326 1327 @Override 1328 public void handlePath(String xpath) { 1329 if (!xpath.contains("/units")) { 1330 return; 1331 } 1332 if (!xpath.contains("/durationUnitPattern")) { 1333 return; 1334 } 1335 1336 String value = cldrFileToFilter.getStringValue(xpath); 1337 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1338 1339 XPathParts parts = XPathParts.getFrozenInstance(fullXPath); 1340 String unittype = parts.findAttributeValue("durationUnit", "type"); 1341 1342 String newFullXpath = 1343 "//ldml/units/durationUnit[@type=\"" 1344 + unittype 1345 + "\"]/durationUnitPattern"; 1346 replace( 1347 fullXPath, 1348 newFullXpath, 1349 value, 1350 "converting to new duration unit structure"); 1351 } 1352 }); 1353 1354 fixList.add( 1355 'a', 1356 "Fix 0/1", 1357 new CLDRFilter() { 1358 final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze(); 1359 PluralInfo info; 1360 1361 @Override 1362 public void handleStart() { 1363 info = SupplementalDataInfo.getInstance().getPlurals(super.localeID); 1364 } 1365 1366 @Override 1367 public void handlePath(String xpath) { 1368 if (xpath.indexOf("count") < 0) { 1369 return; 1370 } 1371 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1372 XPathParts parts = 1373 XPathParts.getFrozenInstance(fullpath) 1374 .cloneAsThawed(); // not frozen, for setAttribute 1375 String countValue = parts.getAttributeValue(-1, "count"); 1376 if (!DIGITS.containsAll(countValue)) { 1377 return; 1378 } 1379 int intValue = Integer.parseInt(countValue); 1380 Count count = info.getCount(intValue); 1381 parts.setAttribute(-1, "count", count.toString()); 1382 String newPath = parts.toString(); 1383 String oldValue = cldrFileToFilter.getStringValue(newPath); 1384 String value = cldrFileToFilter.getStringValue(xpath); 1385 if (oldValue != null) { 1386 String fixed = oldValue.replace("{0}", countValue); 1387 if (value.equals(oldValue) || value.equals(fixed)) { 1388 remove( 1389 fullpath, 1390 "Superfluous given: " + count + "→«" + oldValue + "»"); 1391 } else { 1392 remove(fullpath, "Can’t replace: " + count + "→«" + oldValue + "»"); 1393 } 1394 return; 1395 } 1396 replace(fullpath, newPath, value, "Moving 0/1"); 1397 } 1398 }); 1399 1400 fixList.add( 1401 'b', 1402 "Prep for bulk import", 1403 new CLDRFilter() { 1404 1405 @Override 1406 public void handlePath(String xpath) { 1407 if (!options[USER].doesOccur) { 1408 return; 1409 } 1410 String userID = options[USER].value; 1411 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1412 String value = cldrFileToFilter.getStringValue(xpath); 1413 XPathParts parts = 1414 XPathParts.getFrozenInstance(fullpath) 1415 .cloneAsThawed(); // not frozen, for addAttribute 1416 parts.addAttribute("draft", "unconfirmed"); 1417 parts.addAttribute("alt", "proposed-u" + userID + "-implicit1.8"); 1418 String newPath = parts.toString(); 1419 replace(fullpath, newPath, value); 1420 } 1421 }); 1422 1423 fixList.add( 1424 'c', 1425 "Fix transiton from an old currency code to a new one", 1426 new CLDRFilter() { 1427 @Override 1428 public void handlePath(String xpath) { 1429 String oldCurrencyCode = "VEF"; 1430 String newCurrencyCode = "VES"; 1431 int fromDate = 2008; 1432 int toDate = 2018; 1433 String leadingParenString = " ("; 1434 String trailingParenString = ")"; 1435 String separator = "\u2013"; 1436 String languageTag = "root"; 1437 1438 if (xpath.indexOf( 1439 "/currency[@type=\"" + oldCurrencyCode + "\"]/displayName") 1440 < 0) { 1441 return; 1442 } 1443 String value = cldrFileToFilter.getStringValue(xpath); 1444 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1445 String newFullXPath = fullXPath.replace(oldCurrencyCode, newCurrencyCode); 1446 cldrFileToFilter.add(newFullXPath, value); 1447 1448 // Exceptions for locales that use an alternate numbering system or a 1449 // different format for the dates at 1450 // the end. 1451 // Add additional ones as necessary 1452 String localeID = cldrFileToFilter.getLocaleID(); 1453 if (localeID.equals("ne")) { 1454 languageTag = "root-u-nu-deva"; 1455 } else if (localeID.equals("bn")) { 1456 languageTag = "root-u-nu-beng"; 1457 } else if (localeID.equals("ar")) { 1458 leadingParenString = " - "; 1459 trailingParenString = ""; 1460 } else if (localeID.equals("fa")) { 1461 languageTag = "root-u-nu-arabext"; 1462 separator = Utility.unescape(" \\u062A\\u0627 "); 1463 } 1464 1465 NumberFormat nf = 1466 NumberFormat.getInstance(ULocale.forLanguageTag(languageTag)); 1467 nf.setGroupingUsed(false); 1468 1469 String tagString = 1470 leadingParenString 1471 + nf.format(fromDate) 1472 + separator 1473 + nf.format(toDate) 1474 + trailingParenString; 1475 1476 replace(fullXPath, fullXPath, value + tagString); 1477 } 1478 }); 1479 1480 fixList.add( 1481 'p', 1482 "input-processor", 1483 new CLDRFilter() { 1484 private DisplayAndInputProcessor inputProcessor; 1485 1486 @Override 1487 public void handleStart() { 1488 inputProcessor = new DisplayAndInputProcessor(cldrFileToFilter, true); 1489 inputProcessor.enableInheritanceReplacement(getResolved()); 1490 } 1491 1492 @Override 1493 public void handleEnd() { 1494 inputProcessor = null; // clean up, just in case 1495 } 1496 1497 @Override 1498 public void handlePath(String xpath) { 1499 String value = cldrFileToFilter.getStringValue(xpath); 1500 String newValue = inputProcessor.processInput(xpath, value, null); 1501 if (value.equals(newValue)) { 1502 return; 1503 } 1504 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1505 replace(fullXPath, fullXPath, newValue); 1506 } 1507 }); 1508 1509 // 'P' Process, like 'p' but without inheritance replacement 1510 fixList.add( 1511 'P', 1512 "input-Processor-no-inheritance-replacement", 1513 new CLDRFilter() { 1514 private DisplayAndInputProcessor inputProcessor; 1515 1516 @Override 1517 public void handleStart() { 1518 inputProcessor = new DisplayAndInputProcessor(cldrFileToFilter, true); 1519 } 1520 1521 @Override 1522 public void handleEnd() { 1523 inputProcessor = null; // clean up, just in case 1524 } 1525 1526 @Override 1527 public void handlePath(String xpath) { 1528 String value = cldrFileToFilter.getStringValue(xpath); 1529 String newValue = inputProcessor.processInput(xpath, value, null); 1530 if (value.equals(newValue)) { 1531 return; 1532 } 1533 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1534 replace(fullXPath, fullXPath, newValue); 1535 } 1536 }); 1537 1538 // use DAIP for one thing only: replaceBaileyWithInheritanceMarker 1539 fixList.add( 1540 'I', 1541 "Inheritance-substitution", 1542 new CLDRFilter() { 1543 private DisplayAndInputProcessor inputProcessor; 1544 private final int STEPS_FROM_ROOT = 1545 1; // only process if locale's level matches; root = 0, en = 1, ... 1546 1547 @Override 1548 public void handleStart() { 1549 int steps = stepsFromRoot(cldrFileToFilter.getLocaleID()); 1550 if (steps == STEPS_FROM_ROOT) { 1551 inputProcessor = new DisplayAndInputProcessor(cldrFileToFilter, true); 1552 inputProcessor.enableInheritanceReplacement(getResolved()); 1553 } else { 1554 inputProcessor = null; 1555 } 1556 } 1557 1558 @Override 1559 public void handleEnd() { 1560 inputProcessor = null; // clean up, just in case 1561 } 1562 1563 @Override 1564 public void handlePath(String xpath) { 1565 if (inputProcessor == null) { 1566 return; 1567 } 1568 String value = cldrFileToFilter.getStringValue(xpath); 1569 String newValue = 1570 inputProcessor.replaceBaileyWithInheritanceMarker(xpath, value); 1571 if (value.equals(newValue)) { 1572 return; 1573 } 1574 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1575 replace(fullXPath, fullXPath, newValue); 1576 } 1577 }); 1578 1579 // Un-drop hard inheritance: revert INHERITANCE_MARKER to pre-drop-hard-inheritance values 1580 fixList.add( 1581 'U', 1582 "Un-drop inheritance", 1583 new CLDRFilter() { 1584 // baseDir needs to be the "pre-drop" path of an existing copy of old 1585 // common/main 1586 // For example, 2022_10_07_pre folder gets xml from pull request 2433, commit 1587 // 80029f1 1588 // Also ldml.dtd is required; for example: 1589 // mkdir ../2022_10_07_pre/common/dtd 1590 // cp common/dtd/ldml.dtd ../2022_10_07_pre/common/dtd 1591 private final String baseDir = "../2022_10_07_pre/"; 1592 private final File[] list = 1593 new File[] { 1594 new File(baseDir + "common/main/"), 1595 new File(baseDir + "common/annotations/") 1596 }; 1597 private Factory preFactory = null; 1598 private CLDRFile preFile = null; 1599 1600 @Override 1601 public void handleStart() { 1602 if (preFactory == null) { 1603 preFactory = SimpleFactory.make(list, ".*"); 1604 } 1605 String localeID = cldrFileToFilter.getLocaleID(); 1606 try { 1607 preFile = preFactory.make(localeID, false /* not resolved */); 1608 } catch (Exception e) { 1609 System.out.println("Skipping " + localeID + " due to " + e); 1610 preFile = null; 1611 } 1612 } 1613 1614 @Override 1615 public void handlePath(String xpath) { 1616 if (preFile == null) { 1617 return; 1618 } 1619 if (xpath.contains("personName")) { 1620 return; 1621 } 1622 String value = cldrFileToFilter.getStringValue(xpath); 1623 if (CldrUtility.INHERITANCE_MARKER.equals(value)) { 1624 String preValue = preFile.getStringValue(xpath); 1625 if (!CldrUtility.INHERITANCE_MARKER.equals(preValue)) { 1626 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1627 replace(fullXPath, fullXPath, preValue); 1628 } 1629 } 1630 } 1631 }); 1632 1633 fixList.add( 1634 't', 1635 "Fix missing count values groups", 1636 new CLDRFilter() { 1637 1638 @Override 1639 public void handlePath(String xpath) { 1640 if (xpath.indexOf("@count=\"other\"") < 0) { 1641 return; 1642 } 1643 1644 String value = cldrFileToFilter.getStringValue(xpath); 1645 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1646 String[] missingCounts = {"one"}; 1647 for (String count : missingCounts) { 1648 String newFullXPath = fullXPath.replace("other", count); 1649 if (cldrFileToFilter.getWinningValue(newFullXPath) == null) { 1650 add(newFullXPath, value, "Adding missing plural form"); 1651 } 1652 } 1653 } 1654 }); 1655 1656 fixList.add( 1657 'f', 1658 "NFC (all but transforms, exemplarCharacters, pc, sc, tc, qc, ic)", 1659 new CLDRFilter() { 1660 @Override 1661 public void handlePath(String xpath) { 1662 if (xpath.indexOf("/segmentation") >= 0 1663 || xpath.indexOf("/transforms") >= 0 1664 || xpath.indexOf("/exemplarCharacters") >= 0 1665 || xpath.indexOf("/pc") >= 0 1666 || xpath.indexOf("/sc") >= 0 1667 || xpath.indexOf("/tc") >= 0 1668 || xpath.indexOf("/qc") >= 0 1669 || xpath.indexOf("/ic") >= 0) return; 1670 String value = cldrFileToFilter.getStringValue(xpath); 1671 String nfcValue = Normalizer.compose(value, false); 1672 if (value.equals(nfcValue)) return; 1673 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1674 replace(fullXPath, fullXPath, nfcValue); 1675 } 1676 }); 1677 1678 fixList.add( 1679 'v', 1680 "remove illegal codes", 1681 new CLDRFilter() { 1682 StandardCodes sc = StandardCodes.make(); 1683 String[] codeTypes = {"language", "script", "territory", "currency"}; 1684 1685 @Override 1686 public void handlePath(String xpath) { 1687 if (xpath.indexOf("/currency") < 0 1688 && xpath.indexOf("/timeZoneNames") < 0 1689 && xpath.indexOf("/localeDisplayNames") < 0) return; 1690 XPathParts parts = XPathParts.getFrozenInstance(xpath); 1691 String code; 1692 for (int i = 0; i < codeTypes.length; ++i) { 1693 code = parts.findAttributeValue(codeTypes[i], "type"); 1694 if (code != null) { 1695 if (!sc.getGoodAvailableCodes(codeTypes[i]).contains(code)) 1696 remove(xpath); 1697 return; 1698 } 1699 } 1700 code = parts.findAttributeValue("zone", "type"); 1701 if (code != null) { 1702 if (code.indexOf("/GMT") >= 0) remove(xpath); 1703 } 1704 } 1705 }); 1706 1707 fixList.add( 1708 'w', 1709 "fix alt='...proposed' when there is no alternative", 1710 new CLDRFilter() { 1711 private Set<String> newFullXPathSoFar = new HashSet<>(); 1712 1713 @Override 1714 public void handlePath(String xpath) { 1715 if (xpath.indexOf("proposed") < 0) return; 1716 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 1717 XPathParts parts = 1718 XPathParts.getFrozenInstance(fullXPath) 1719 .cloneAsThawed(); // not frozen, for removeProposed 1720 String newFullXPath = parts.removeProposed().toString(); 1721 // now see if there is an uninherited value 1722 String value = cldrFileToFilter.getStringValue(xpath); 1723 String baseValue = cldrFileToFilter.getStringValue(newFullXPath); 1724 if (baseValue != null) { 1725 // if the value AND the fullxpath are the same as what we have, then 1726 // delete 1727 if (value.equals(baseValue)) { 1728 String baseFullXPath = cldrFileToFilter.getFullXPath(newFullXPath); 1729 if (baseFullXPath.equals(newFullXPath)) { 1730 remove(xpath, "alt=base"); 1731 } 1732 } 1733 return; // there is, so skip 1734 } 1735 // there isn't, so modif if we haven't done so already 1736 if (!newFullXPathSoFar.contains(newFullXPath)) { 1737 replace(fullXPath, newFullXPath, value); 1738 newFullXPathSoFar.add(newFullXPath); 1739 } 1740 } 1741 }); 1742 1743 fixList.add( 1744 'S', 1745 "add datetimeSkeleton to dateFormat,timeFormat", 1746 new CLDRFilter() { 1747 DateTimePatternGenerator dateTimePatternGenerator = 1748 DateTimePatternGenerator.getEmptyInstance(); 1749 1750 @Override 1751 public void handlePath(String xpath) { 1752 // desired xpaths are like 1753 // //ldml/dates/calendars/calendar[@type="..."]/dateFormats/dateFormatLength[@type="..."]/dateFormat[@type="standard"]/pattern[@type="standard"] 1754 // //ldml/dates/calendars/calendar[@type="..."]/dateFormats/dateFormatLength[@type="..."]/dateFormat[@type="standard"]/pattern[@type="standard"][@draft="..."] 1755 // //ldml/dates/calendars/calendar[@type="..."]/dateFormats/dateFormatLength[@type="..."]/dateFormat[@type="standard"]/pattern[@type="standard"][@numbers="..."] 1756 // //ldml/dates/calendars/calendar[@type="..."]/dateFormats/dateFormatLength[@type="..."]/dateFormat[@type="standard"]/pattern[@type="standard"][@numbers="..."][@draft="..."] 1757 // //ldml/dates/calendars/calendar[@type="..."]/dateFormats/dateFormatLength[@type="..."]/dateFormat[@type="standard"]/pattern[@type="standard"][@alt="variant"] 1758 // //ldml/dates/calendars/calendar[@type="..."]/timeFormats/timeFormatLength[@type="..."]/timeFormat[@type="standard"]/pattern[@type="standard"] 1759 // //ldml/dates/calendars/calendar[@type="..."]/timeFormats/timeFormatLength[@type="..."]/timeFormat[@type="standard"]/pattern[@type="standard"][@draft="..."] 1760 if (xpath.indexOf("/dateFormat[@type=\"standard\"]/pattern") < 0 1761 && xpath.indexOf("/timeFormat[@type=\"standard\"]/pattern") < 0) { 1762 return; 1763 } 1764 String patternValue = cldrFileToFilter.getStringValue(xpath); 1765 String skeletonValue = patternValue; 1766 if (!patternValue.equals("↑↑↑")) { 1767 skeletonValue = dateTimePatternGenerator.getSkeleton(patternValue); 1768 if (skeletonValue == null || skeletonValue.length() < 1) { 1769 show( 1770 "empty skeleton for datetime pattern \"" 1771 + patternValue 1772 + "\"", 1773 "path " + xpath); 1774 return; 1775 } 1776 } 1777 1778 String patternFullXPath = cldrFileToFilter.getFullXPath(xpath); 1779 // Replace pattern[@type="standard"] with datetimeSkeleton, preserve other 1780 // attributes (including numbers per TC discussion). 1781 // Note that for the alt="variant" patterns there are corresponding 1782 // alt="variant" availableFormats that must be used. 1783 String skeletonFullXPath = 1784 patternFullXPath.replace( 1785 "/pattern[@type=\"standard\"]", 1786 "/datetimeSkeleton"); // .replaceAll("\\[@numbers=\"[^\"]+\"\\]", "") 1787 add( 1788 skeletonFullXPath, 1789 skeletonValue, 1790 "create datetimeSkeleton from dateFormat/pattern or timeFormat/pattern"); 1791 } 1792 }); 1793 1794 /* 1795 * Fix id to be identical to skeleton 1796 * Eliminate any single-field ids 1797 * Add "L" (stand-alone month), "?" (other stand-alones) 1798 * Remove any fields with both a date and a time 1799 * Test that datetime format is valid format (will have to fix by hand) 1800 * Map k, K to H, h 1801 * 1802 * In Survey Tool: don't show id; compute when item added or changed 1803 * test validity 1804 */ 1805 fixList.add( 1806 'd', 1807 "fix dates", 1808 new CLDRFilter() { 1809 DateTimePatternGenerator dateTimePatternGenerator = 1810 DateTimePatternGenerator.getEmptyInstance(); 1811 DateTimePatternGenerator.FormatParser formatParser = 1812 new DateTimePatternGenerator.FormatParser(); 1813 Map<String, Set<String>> seenSoFar = new HashMap<>(); 1814 1815 @Override 1816 public void handleStart() { 1817 seenSoFar.clear(); 1818 } 1819 1820 @Override 1821 public void handlePath(String xpath) { 1822 if (xpath.contains("timeFormatLength") && xpath.contains("full")) { 1823 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1824 String value = cldrFileToFilter.getStringValue(xpath); 1825 boolean gotChange = false; 1826 List<Object> list = formatParser.set(value).getItems(); 1827 for (int i = 0; i < list.size(); ++i) { 1828 Object item = list.get(i); 1829 if (item instanceof DateTimePatternGenerator.VariableField) { 1830 String itemString = item.toString(); 1831 if (itemString.charAt(0) == 'z') { 1832 list.set( 1833 i, 1834 new VariableField( 1835 Utility.repeat("v", itemString.length()))); 1836 gotChange = true; 1837 } 1838 } 1839 } 1840 if (gotChange) { 1841 String newValue = toStringWorkaround(); 1842 if (value != newValue) { 1843 replace(xpath, fullpath, newValue); 1844 } 1845 } 1846 } 1847 if (xpath.indexOf("/availableFormats") < 0) { 1848 return; 1849 } 1850 String value = cldrFileToFilter.getStringValue(xpath); 1851 if (value == null) { 1852 return; // not in current file 1853 } 1854 1855 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1856 XPathParts fullparts = XPathParts.getFrozenInstance(fullpath); 1857 Map<String, String> attributes = fullparts.findAttributes("dateFormatItem"); 1858 String id = attributes.get("id"); 1859 String oldID = id; 1860 try { 1861 id = dateTimePatternGenerator.getBaseSkeleton(id); 1862 if (id.equals(oldID)) { 1863 return; 1864 } 1865 System.out.println(oldID + " => " + id); 1866 } catch (RuntimeException e) { 1867 id = "[error]"; 1868 return; 1869 } 1870 1871 attributes.put("id", id); 1872 totalSkeletons.add(id); 1873 1874 replace(xpath, fullparts.toString(), value); 1875 } 1876 1877 private String toStringWorkaround() { 1878 StringBuffer result = new StringBuffer(); 1879 List<Object> items = formatParser.getItems(); 1880 for (int i = 0; i < items.size(); ++i) { 1881 Object item = items.get(i); 1882 if (item instanceof String) { 1883 result.append(formatParser.quoteLiteral((String) items.get(i))); 1884 } else { 1885 result.append(items.get(i).toString()); 1886 } 1887 } 1888 return result.toString(); 1889 } 1890 }); 1891 1892 fixList.add( 1893 'y', 1894 "fix years to be y (with exceptions)", 1895 new CLDRFilter() { 1896 DateTimeCanonicalizer dtc = new DateTimeCanonicalizer(true); 1897 Map<String, Set<String>> seenSoFar = new HashMap<>(); 1898 1899 @Override 1900 public void handleStart() { 1901 seenSoFar.clear(); 1902 } 1903 1904 @Override 1905 public void handlePath(String xpath) { 1906 DateTimePatternType datetimePatternType = 1907 DateTimePatternType.fromPath(xpath); 1908 1909 // check to see if we need to change the value 1910 if (!DateTimePatternType.STOCK_AVAILABLE_INTERVAL_PATTERNS.contains( 1911 datetimePatternType)) { 1912 return; 1913 } 1914 String oldValue = cldrFileToFilter.getStringValue(xpath); 1915 String value = 1916 dtc.getCanonicalDatePattern(xpath, oldValue, datetimePatternType); 1917 String fullPath = cldrFileToFilter.getFullXPath(xpath); 1918 if (value.equals(oldValue)) { 1919 return; 1920 } 1921 // made it through the gauntlet, so replace 1922 replace(xpath, fullPath, value); 1923 } 1924 }); 1925 1926 // This should only be applied to specific locales, and the results checked manually 1927 // afterward. 1928 // It will only create ranges using the same digits as in root, not script-specific digits. 1929 // Any pre-existing year ranges should use the range marker from the intervalFormats "y" 1930 // item. 1931 // This make several assumptions and is somewhat *FRAGILE*. 1932 fixList.add( 1933 'j', 1934 "add year ranges from root to Japanese calendar eras", 1935 new CLDRFilter() { 1936 private CLDRFile rootFile; 1937 1938 @Override 1939 public void handleStart() { 1940 rootFile = factory.make("root", false); 1941 } 1942 1943 @Override 1944 public void handlePath(String xpath) { 1945 // Skip paths we don't care about 1946 if (xpath.indexOf("/calendar[@type=\"japanese\"]/eras/era") < 0) return; 1947 // Get root name for the era, check it 1948 String rootEraValue = rootFile.getStringValue(xpath); 1949 int rootEraIndex = rootEraValue.indexOf(" ("); 1950 if (rootEraIndex < 0) 1951 return; // this era does not have a year range in root, no need to add 1952 // one in this 1953 // locale 1954 // Get range marker from intervalFormat range for y 1955 String yearIntervalFormat = 1956 cldrFileToFilter.getStringValue( 1957 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"y\"]/greatestDifference[@id=\"y\"]"); 1958 if (yearIntervalFormat == null) 1959 return; // oops, no intervalFormat data for y 1960 String rangeMarker = 1961 yearIntervalFormat.replaceAll( 1962 "[.y\u5E74\uB144]", ""); // *FRAGILE* strip out 1963 // everything except the 1964 // range-indicating part 1965 // Get current locale name for this era, check it 1966 String eraValue = cldrFileToFilter.getStringValue(xpath); 1967 if (eraValue.indexOf('(') >= 0 && eraValue.indexOf(rangeMarker) >= 0) 1968 return; // this eraValue already 1969 // has a year range that 1970 // uses the appropriate 1971 // rangeMarker 1972 // Now update the root year range it with the rangeMarker for this locale, 1973 // and append it to this 1974 // locale's name 1975 String rootYearRange = rootEraValue.substring(rootEraIndex); 1976 String appendYearRange = 1977 rootYearRange.replaceAll("[\u002D\u2013]", rangeMarker); 1978 String newEraValue = eraValue.concat(appendYearRange); 1979 String fullpath = cldrFileToFilter.getFullXPath(xpath); 1980 replace(xpath, fullpath, newEraValue); 1981 } 1982 }); 1983 1984 fixList.add( 1985 'r', 1986 "fix references and standards", 1987 new CLDRFilter() { 1988 int currentRef = 500; 1989 Map<String, TreeMap<String, String>> locale_oldref_newref = new TreeMap<>(); 1990 TreeMap<String, String> oldref_newref; 1991 1992 @Override 1993 public void handleStart() { 1994 String locale = cldrFileToFilter.getLocaleID(); 1995 oldref_newref = locale_oldref_newref.get(locale); 1996 if (oldref_newref == null) { 1997 oldref_newref = new TreeMap<>(); 1998 locale_oldref_newref.put(locale, oldref_newref); 1999 } 2000 } 2001 2002 @Override 2003 public void handlePath(String xpath) { 2004 // must be minimized for this to work. 2005 String fullpath = cldrFileToFilter.getFullXPath(xpath); 2006 if (!fullpath.contains("reference")) return; 2007 String value = cldrFileToFilter.getStringValue(xpath); 2008 XPathParts fullparts = 2009 XPathParts.getFrozenInstance(fullpath) 2010 .cloneAsThawed(); // can't be frozen 2011 if ("reference".equals(fullparts.getElement(-1))) { 2012 fixType(value, "type", fullpath, fullparts); 2013 } else if (fullparts.getAttributeValue(-1, "references") != null) { 2014 fixType(value, "references", fullpath, fullparts); 2015 } else { 2016 System.out.println("CLDRModify: Skipping: " + xpath); 2017 } 2018 } 2019 2020 /** 2021 * @param value 2022 * @param type 2023 * @param oldFullPath 2024 * @param fullparts the XPathParts -- must not be frozen, for addAttribute 2025 */ 2026 private void fixType( 2027 String value, String type, String oldFullPath, XPathParts fullparts) { 2028 String ref = fullparts.getAttributeValue(-1, type); 2029 if (whitespace.containsSome(ref)) { 2030 throw new IllegalArgumentException("Whitespace in references"); 2031 } 2032 String newRef = getNewRef(ref); 2033 fullparts.addAttribute(type, newRef); 2034 replace(oldFullPath, fullparts.toString(), value); 2035 } 2036 2037 private String getNewRef(String ref) { 2038 String newRef = oldref_newref.get(ref); 2039 if (newRef == null) { 2040 newRef = String.valueOf(currentRef++); 2041 newRef = "R" + Utility.repeat("0", (3 - newRef.length())) + newRef; 2042 oldref_newref.put(ref, newRef); 2043 } 2044 return newRef; 2045 } 2046 }); 2047 2048 fixList.add( 2049 'q', 2050 "fix annotation punctuation", 2051 new CLDRFilter() { 2052 @Override 2053 public void handlePath(String xpath) { 2054 if (!xpath.contains("/annotation")) { 2055 return; 2056 } 2057 String fullpath = cldrFileToFilter.getFullXPath(xpath); 2058 XPathParts parts = XPathParts.getFrozenInstance(fullpath); 2059 String cp = parts.getAttributeValue(2, "cp"); 2060 String tts = parts.getAttributeValue(2, "tts"); 2061 String type = parts.getAttributeValue(2, "type"); 2062 if ("tts".equals(type)) { 2063 return; // ok, skip 2064 } 2065 parts = parts.cloneAsThawed(); 2066 String hex = "1F600"; 2067 if (cp.startsWith("[")) { 2068 UnicodeSet us = new UnicodeSet(cp); 2069 if (us.size() == 1) { 2070 cp = us.iterator().next(); 2071 hex = Utility.hex(cp); 2072 } else { 2073 hex = us.toString(); 2074 } 2075 parts.putAttributeValue(2, "cp", cp); 2076 } 2077 parts.removeAttribute(2, "tts"); 2078 if (tts != null) { 2079 String newTts = CldrUtility.join(COMMA_SEMI.splitToList(tts), ", "); 2080 XPathParts parts2 = parts.cloneAsThawed(); 2081 parts2.putAttributeValue(2, "type", "tts"); 2082 add(parts2.toString(), newTts, "separate tts"); 2083 } 2084 String value = cldrFileToFilter.getStringValue(xpath); 2085 String newValue = CldrUtility.join(COMMA_SEMI.splitToList(value), " | "); 2086 final String newFullPath = parts.toString(); 2087 Comments comments = cldrFileToFilter.getXpath_comments(); 2088 String comment = comments.removeComment(CommentType.PREBLOCK, xpath); 2089 comment = hex + (comment == null ? "" : " " + comment); 2090 comments.addComment(CommentType.PREBLOCK, newFullPath, comment); 2091 if (!fullpath.equals(newFullPath) || !value.equals(newValue)) { 2092 replace(fullpath, newFullPath, newValue); 2093 } 2094 } 2095 }); 2096 2097 fixList.add( 2098 'Q', 2099 "add annotation names to keywords", 2100 new CLDRFilter() { 2101 Set<String> available = Annotations.getAllAvailable(); 2102 TreeSet<String> sorted = new TreeSet<>(Collator.getInstance(ULocale.ROOT)); 2103 CLDRFile resolved; 2104 Set<String> handledCharacters = new HashSet<>(); 2105 boolean isTop; 2106 2107 @Override 2108 public void handleStart() { 2109 String localeID = cldrFileToFilter.getLocaleID(); 2110 if (!available.contains(localeID)) { 2111 throw new IllegalArgumentException( 2112 "no annotations available, probably wrong directory"); 2113 } 2114 resolved = factory.make(localeID, true); 2115 CLDRLocale parent = CLDRLocale.getInstance(localeID).getParent(); 2116 isTop = CLDRLocale.ROOT.equals(parent); 2117 } 2118 2119 @Override 2120 public void handlePath(String xpath) { 2121 if (!xpath.contains("/annotation")) { 2122 return; 2123 } 2124 // <annotation cp="">100 | honderd | persent | telling | 2125 // vol</annotation> 2126 // <annotation cp="" type="tts">honderd punte</annotation> 2127 // we will copy honderd punte into the list of keywords. 2128 String fullpath = cldrFileToFilter.getFullXPath(xpath); 2129 XPathParts parts = XPathParts.getFrozenInstance(fullpath); 2130 String cp = parts.getAttributeValue(2, "cp"); 2131 String type = parts.getAttributeValue(2, "type"); 2132 if (!isTop) { 2133 // If we run into the keyword first (or only the keywords) 2134 // we construct the tts version for consistent processing 2135 // and mark it as handled. We only do this for non-top locales, 2136 // because if the top locales don't have a tts we're not going to add 2137 // anyway. 2138 if (handledCharacters.contains(cp)) { 2139 return; // already handled 2140 } 2141 // repeat the above, but for the tts path 2142 xpath = parts.cloneAsThawed().setAttribute(2, "type", "tts").toString(); 2143 fullpath = cldrFileToFilter.getFullXPath(xpath); 2144 parts = XPathParts.getFrozenInstance(fullpath); 2145 type = parts.getAttributeValue(2, "type"); 2146 // mark the character as seen 2147 handledCharacters.add(cp); 2148 } else if (type == null) { 2149 return; // no TTS, and top level, so skip 2150 } 2151 String keywordPath = 2152 parts.cloneAsThawed() 2153 .removeAttribute(2, "type") 2154 .toString(); // construct the path without tts 2155 String distinguishingKeywordPath = 2156 CLDRFile.getDistinguishingXPath(keywordPath, null); 2157 String rawKeywordValue = cldrFileToFilter.getStringValue(keywordPath); 2158 2159 // skip if keywords AND name are inherited 2160 if (rawKeywordValue == null 2161 || rawKeywordValue.equals(CldrUtility.INHERITANCE_MARKER)) { 2162 String rawName = cldrFileToFilter.getStringValue(xpath); 2163 if (rawName == null || rawName.equals(CldrUtility.INHERITANCE_MARKER)) { 2164 return; 2165 } 2166 } 2167 2168 // skip if the name is not above root 2169 String nameSourceLocale = resolved.getSourceLocaleID(xpath, null); 2170 if (XMLSource.ROOT_ID.equals(nameSourceLocale) 2171 || XMLSource.CODE_FALLBACK_ID.equals(nameSourceLocale)) { 2172 return; 2173 } 2174 2175 String name = resolved.getStringValue(xpath); 2176 String keywordValue = resolved.getStringValue(keywordPath); 2177 String sourceLocaleId = 2178 resolved.getSourceLocaleID(distinguishingKeywordPath, null); 2179 sorted.clear(); 2180 sorted.add(name); 2181 2182 List<String> items; 2183 if (!sourceLocaleId.equals(XMLSource.ROOT_ID) 2184 && !sourceLocaleId.equals(XMLSource.CODE_FALLBACK_ID)) { 2185 items = Annotations.splitter.splitToList(keywordValue); 2186 sorted.addAll(items); 2187 } 2188 2189 DisplayAndInputProcessor.filterCoveredKeywords(sorted); 2190 DisplayAndInputProcessor.filterKeywordsDifferingOnlyInCase(sorted); 2191 String newKeywordValue = Joiner.on(" | ").join(sorted); 2192 if (!newKeywordValue.equals(keywordValue)) { 2193 replace(keywordPath, keywordPath, newKeywordValue); 2194 } 2195 } 2196 }); 2197 2198 fixList.add( 2199 'N', 2200 "add number symbols to exemplars", 2201 new CLDRFilter() { 2202 CLDRFile resolved; 2203 UnicodeSet numberStuff = new UnicodeSet(); 2204 Set<String> seen = new HashSet<>(); 2205 Set<String> hackAllowOnly = new HashSet<>(); 2206 boolean skip = false; 2207 2208 @Override 2209 public void handleStart() { 2210 String localeID = cldrFileToFilter.getLocaleID(); 2211 resolved = factory.make(localeID, true); 2212 numberStuff.clear(); 2213 seen.clear(); 2214 skip = localeID.equals("root"); 2215 // TODO add return value to handleStart to skip calling handlePath 2216 2217 if (NUMBER_SYSTEM_HACK) { 2218 hackAllowOnly.clear(); 2219 for (NumberingSystem system : NumberingSystem.values()) { 2220 String numberingSystem = 2221 system.path == null 2222 ? "latn" 2223 : cldrFileToFilter.getStringValue(system.path); 2224 if (numberingSystem != null) { 2225 hackAllowOnly.add(numberingSystem); 2226 } 2227 } 2228 } 2229 } 2230 2231 @Override 2232 public void handlePath(String xpath) { 2233 // the following doesn't work without NUMBER_SYSTEM_HACK, because there are 2234 // spurious numbersystems in the data. 2235 // http://unicode.org/cldr/trac/ticket/10648 2236 // so using a hack for now in handleEnd 2237 if (skip || !xpath.startsWith("//ldml/numbers/symbols")) { 2238 return; 2239 } 2240 2241 // //ldml/numbers/symbols[@numberSystem="latn"]/exponential 2242 XPathParts parts = XPathParts.getFrozenInstance(xpath); 2243 String system = parts.getAttributeValue(2, "numberSystem"); 2244 if (system == null) { 2245 System.err.println( 2246 "Bogus numberSystem:\t" 2247 + cldrFileToFilter.getLocaleID() 2248 + " \t" 2249 + xpath); 2250 return; 2251 } else if (seen.contains(system) || !hackAllowOnly.contains(system)) { 2252 return; 2253 } 2254 seen.add(system); 2255 UnicodeSet exemplars = resolved.getExemplarsNumeric(system); 2256 System.out.println("# " + system + " ==> " + exemplars.toPattern(false)); 2257 for (String s : exemplars) { 2258 numberStuff.addAll(s); // add individual characters 2259 } 2260 } 2261 2262 @Override 2263 public void handleEnd() { 2264 if (!numberStuff.isEmpty()) { 2265 UnicodeSet current = 2266 cldrFileToFilter.getExemplarSet( 2267 ExemplarType.numbers, WinningChoice.WINNING); 2268 if (!numberStuff.equals(current)) { 2269 DisplayAndInputProcessor daip = 2270 new DisplayAndInputProcessor(cldrFileToFilter); 2271 if (current != null && !current.isEmpty()) { 2272 numberStuff.addAll(current); 2273 } 2274 String path = CLDRFile.getExemplarPath(ExemplarType.numbers); 2275 String value = daip.getPrettyPrinter().format(numberStuff); 2276 replace(path, path, value); 2277 } 2278 } 2279 } 2280 }); 2281 2282 fixList.add( 2283 'k', 2284 "fix according to -k config file. Details on http://cldr.unicode.org/development/cldr-big-red-switch/cldrmodify-passes/cldrmodify-config", 2285 new CLDRFilter() { 2286 private Map<ConfigMatch, LinkedHashSet<Map<ConfigKeys, ConfigMatch>>> 2287 locale2keyValues; 2288 private LinkedHashSet<Map<ConfigKeys, ConfigMatch>> keyValues = 2289 new LinkedHashSet<>(); 2290 2291 @Override 2292 public void handleStart() { 2293 super.handleStart(); 2294 if (!options[FIX].doesOccur || !options[FIX].value.equals("k")) { 2295 return; 2296 } 2297 if (locale2keyValues == null) { 2298 fillCache(); 2299 } 2300 // set up for the specific locale we are dealing with. 2301 // a small optimization 2302 String localeId = getLocaleID(); 2303 keyValues.clear(); 2304 for (Entry<ConfigMatch, LinkedHashSet<Map<ConfigKeys, ConfigMatch>>> 2305 localeMatcher : locale2keyValues.entrySet()) { 2306 if (localeMatcher.getKey().matches(localeId)) { 2307 keyValues.addAll(localeMatcher.getValue()); 2308 } 2309 } 2310 System.out.println("# Checking entries & changing:\t" + keyValues.size()); 2311 for (Map<ConfigKeys, ConfigMatch> entry : keyValues) { 2312 ConfigMatch action = entry.get(ConfigKeys.action); 2313 ConfigMatch pathMatch = entry.get(ConfigKeys.path); 2314 ConfigMatch valueMatch = entry.get(ConfigKeys.value); 2315 ConfigMatch newPath = entry.get(ConfigKeys.new_path); 2316 ConfigMatch newValue = entry.get(ConfigKeys.new_value); 2317 switch (action.action) { 2318 // we add all the values up front 2319 case addNew: 2320 case add: 2321 if (pathMatch != null 2322 || valueMatch != null 2323 || newPath == null 2324 || newValue == null) { 2325 throw new IllegalArgumentException( 2326 "Bad arguments, must have non-null for one of:" 2327 + "path, value, new_path, new_value " 2328 + ":\n\t" 2329 + entry); 2330 } 2331 String newPathString = newPath.getPath(getResolved()); 2332 if (action.action == ConfigAction.add 2333 || cldrFileToFilter.getStringValue(newPathString) 2334 == null) { 2335 replace( 2336 newPathString, 2337 newPathString, 2338 newValue.exactMatch, 2339 "config"); 2340 } 2341 break; 2342 // we just check 2343 case replace: 2344 if ((pathMatch == null && valueMatch == null) 2345 || (newPath == null && newValue == null)) { 2346 throw new IllegalArgumentException( 2347 "Bad arguments, must have " 2348 + "(path!=null OR value=null) AND (new_path!=null OR new_value!=null):\n\t" 2349 + entry); 2350 } 2351 break; 2352 // For delete, we just check; we'll remove later 2353 case delete: 2354 if (newPath != null || newValue != null) { 2355 throw new IllegalArgumentException( 2356 "Bad arguments, must have " 2357 + "newPath=null, newValue=null" 2358 + entry); 2359 } 2360 break; 2361 default: // fall through 2362 throw new IllegalArgumentException("Internal Error"); 2363 } 2364 } 2365 } 2366 2367 private void fillCache() { 2368 locale2keyValues = new LinkedHashMap<>(); 2369 String configFileName = options[KONFIG].value; 2370 FileProcessor myReader = 2371 new FileProcessor() { 2372 { 2373 doHash = false; 2374 } 2375 2376 @Override 2377 protected boolean handleLine(int lineCount, String line) { 2378 line = line.trim(); 2379 String[] lineParts = line.split("\\s*;\\s*"); 2380 Map<ConfigKeys, ConfigMatch> keyValue = 2381 new EnumMap<>(ConfigKeys.class); 2382 for (String linePart : lineParts) { 2383 int pos = linePart.indexOf('='); 2384 if (pos < 0) { 2385 throw new IllegalArgumentException( 2386 lineCount 2387 + ":\t No = in command: «" 2388 + linePart 2389 + "» in " 2390 + line); 2391 } 2392 ConfigKeys key = 2393 ConfigKeys.valueOf( 2394 linePart.substring(0, pos).trim()); 2395 if (keyValue.containsKey(key)) { 2396 throw new IllegalArgumentException( 2397 "Must not have multiple keys: " + key); 2398 } 2399 String match = linePart.substring(pos + 1).trim(); 2400 keyValue.put(key, new ConfigMatch(key, match)); 2401 } 2402 final ConfigMatch locale = keyValue.get(ConfigKeys.locale); 2403 if (locale == null 2404 || keyValue.get(ConfigKeys.action) == null) { 2405 throw new IllegalArgumentException(); 2406 } 2407 2408 // validate new path 2409 LinkedHashSet<Map<ConfigKeys, ConfigMatch>> keyValues = 2410 locale2keyValues.get(locale); 2411 if (keyValues == null) { 2412 locale2keyValues.put( 2413 locale, keyValues = new LinkedHashSet<>()); 2414 } 2415 keyValues.add(keyValue); 2416 return true; 2417 } 2418 }; 2419 myReader.process(CLDRModify.class, configFileName); 2420 } 2421 2422 static final String DEBUG_PATH = 2423 "//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"long\"][@usage=\"referring\"][@formality=\"formal\"]/namePattern"; 2424 2425 @Override 2426 public void handlePath(String xpath) { 2427 // slow method; could optimize 2428 if (DEBUG_PATH != null && DEBUG_PATH.equals(xpath)) { 2429 System.out.println(xpath); 2430 } 2431 for (Map<ConfigKeys, ConfigMatch> entry : keyValues) { 2432 ConfigMatch pathMatch = entry.get(ConfigKeys.path); 2433 if (pathMatch != null && !pathMatch.matches(xpath)) { 2434 if (DEBUG_PATH != null 2435 && pathMatch != null 2436 && pathMatch.regexMatch != null) { 2437 System.out.println( 2438 RegexUtilities.showMismatch( 2439 pathMatch.regexMatch, xpath)); 2440 } 2441 continue; 2442 } 2443 ConfigMatch valueMatch = entry.get(ConfigKeys.value); 2444 final String value = cldrFileToFilter.getStringValue(xpath); 2445 if (valueMatch != null && !valueMatch.matches(value)) { 2446 continue; 2447 } 2448 ConfigMatch action = entry.get(ConfigKeys.action); 2449 switch (action.action) { 2450 case delete: 2451 remove(xpath, "config"); 2452 break; 2453 case replace: 2454 ConfigMatch newPath = entry.get(ConfigKeys.new_path); 2455 ConfigMatch newValue = entry.get(ConfigKeys.new_value); 2456 2457 String fullpath = cldrFileToFilter.getFullXPath(xpath); 2458 String draft = ""; 2459 int loc = fullpath.indexOf("[@draft="); 2460 if (loc >= 0) { 2461 int loc2 = fullpath.indexOf(']', loc + 7); 2462 draft = fullpath.substring(loc, loc2 + 1); 2463 } 2464 2465 String modPath = 2466 ConfigMatch.getModified(pathMatch, xpath, newPath) 2467 + draft; 2468 String modValue = 2469 ConfigMatch.getModified(valueMatch, value, newValue); 2470 replace(xpath, modPath, modValue, "config"); 2471 } 2472 } 2473 } 2474 }); 2475 fixList.add('i', "fix Identical Children"); 2476 fixList.add('o', "check attribute validity"); 2477 2478 /** 2479 * Goal is: if value in vxml is ^^^, then add ^^^ to trunk IFF (a) if there is no value in 2480 * trunk (b) the value in trunk = bailey. 2481 */ 2482 fixList.add( 2483 '^', 2484 "add inheritance-marked items from vxml to trunk", 2485 new CLDRFilter() { 2486 Factory VxmlFactory; 2487 final ArrayList<File> fileList = new ArrayList<>(); 2488 2489 @Override 2490 public void handleStart() { 2491 if (fileList.isEmpty()) { 2492 for (String top : Arrays.asList("common/", "seed/")) { 2493 // for (String leaf : Arrays.asList("main/", "annotations/")) { 2494 String leaf = 2495 sourceInput.contains("annotations") 2496 ? "annotations/" 2497 : "main/"; 2498 String key = top + leaf; 2499 fileList.add( 2500 new File( 2501 CLDRPaths.AUX_DIRECTORY 2502 + "voting/" 2503 + CLDRFile.GEN_VERSION 2504 + "/vxml/" 2505 + key)); 2506 } 2507 VxmlFactory = 2508 SimpleFactory.make( 2509 fileList.toArray(new File[fileList.size()]), ".*"); 2510 } 2511 2512 String localeID = cldrFileToFilter.getLocaleID(); 2513 2514 CLDRFile vxmlCommonMainFile; 2515 try { 2516 vxmlCommonMainFile = VxmlFactory.make(localeID, false); 2517 } catch (Exception e) { 2518 System.out.println( 2519 "#ERROR: VXML file not found for " 2520 + localeID 2521 + " in " 2522 + fileList); 2523 return; 2524 } 2525 CLDRFile resolved = cldrFileToFilter; 2526 2527 if (!cldrFileToFilter.isResolved()) { 2528 resolved = factory.make(cldrFileToFilter.getLocaleID(), true); 2529 } 2530 2531 for (String xpath : vxmlCommonMainFile) { 2532 String vxmlValue = vxmlCommonMainFile.getStringValue(xpath); 2533 if (vxmlValue == null) { 2534 continue; 2535 } 2536 if (!CldrUtility.INHERITANCE_MARKER.equals(vxmlValue)) { 2537 continue; 2538 } 2539 2540 String trunkValue = resolved.getStringValue(xpath); 2541 if (trunkValue != null) { 2542 String baileyValue = resolved.getBaileyValue(xpath, null, null); 2543 if (!trunkValue.equals(baileyValue)) { 2544 continue; 2545 } 2546 } 2547 // at this point, the vxmlValue is ^^^ and the trunk value is either 2548 // null or == baileyValue 2549 String fullPath = 2550 resolved.getFullXPath(xpath); // get the draft status, etc. 2551 if (fullPath == null) { // debugging 2552 fullPath = vxmlCommonMainFile.getFullXPath(xpath); 2553 if (fullPath == null) { 2554 throw new ICUException( 2555 "getFullXPath not working for " 2556 + localeID 2557 + ", " 2558 + xpath); 2559 } 2560 } 2561 add( 2562 fullPath, 2563 vxmlValue, 2564 "Add or replace by " + CldrUtility.INHERITANCE_MARKER); 2565 } 2566 } 2567 2568 @Override 2569 public void handlePath(String xpath) { 2570 // Everything done in handleStart 2571 } 2572 }); 2573 2574 fixList.add( 2575 'L', 2576 "fix logical groups by adding all the bailey values", 2577 new CLDRFilter() { 2578 Set<String> seen = new HashSet<>(); 2579 CLDRFile resolved; 2580 boolean skip; 2581 CoverageLevel2 coverageLeveler; 2582 2583 @Override 2584 public void handleStart() { 2585 seen.clear(); 2586 resolved = getResolved(); 2587 skip = false; 2588 coverageLeveler = null; 2589 2590 String localeID = cldrFileToFilter.getLocaleID(); 2591 LanguageTagParser ltp = new LanguageTagParser().set(localeID); 2592 if (!ltp.getRegion().isEmpty() || !ltp.getVariants().isEmpty()) { 2593 skip = true; 2594 } else { 2595 coverageLeveler = CoverageLevel2.getInstance(localeID); 2596 } 2597 } 2598 2599 @Override 2600 public void handlePath(String xpath) { 2601 if (skip 2602 || seen.contains(xpath) 2603 || coverageLeveler.getLevel(xpath) == Level.COMPREHENSIVE) { 2604 return; 2605 } 2606 Set<String> paths = LogicalGrouping.getPaths(cldrFileToFilter, xpath); 2607 if (paths == null || paths.size() < 2) { 2608 return; 2609 } 2610 Set<String> needed = new LinkedHashSet<>(); 2611 for (String path2 : paths) { 2612 if (path2.equals(xpath)) { 2613 continue; 2614 } 2615 if (cldrFileToFilter.isHere(path2)) { 2616 continue; 2617 } 2618 if (LogicalGrouping.isOptional(cldrFileToFilter, path2)) { 2619 continue; 2620 } 2621 // ok, we have a path missing a value 2622 needed.add(path2); 2623 } 2624 if (needed.isEmpty()) { 2625 return; 2626 } 2627 // we need at least one value 2628 2629 // flesh out by adding a bailey value 2630 // TODO resolve the draft status in a better way 2631 // For now, get the lowest draft status, and we'll reset everything to that. 2632 2633 DraftStatus worstStatus = 2634 DraftStatus.contributed; // don't ever add an approved. 2635 for (String path2 : paths) { 2636 XPathParts parts = XPathParts.getFrozenInstance(path2); 2637 String rawStatus = parts.getAttributeValue(-1, "draft"); 2638 if (rawStatus == null) { 2639 continue; 2640 } 2641 DraftStatus df = DraftStatus.forString(rawStatus); 2642 if (df.compareTo(worstStatus) < 0) { 2643 worstStatus = df; 2644 } 2645 } 2646 2647 for (String path2 : paths) { 2648 String fullPath = resolved.getFullXPath(path2); 2649 String value = resolved.getStringValue(path2); 2650 if (LogicalGrouping.isOptional(cldrFileToFilter, path2) 2651 && !cldrFileToFilter.isHere(path2)) { 2652 continue; 2653 } 2654 2655 XPathParts fullparts = 2656 XPathParts.getFrozenInstance(fullPath) 2657 .cloneAsThawed(); // not frozen, for setAttribute 2658 fullparts.setAttribute(-1, "draft", worstStatus.toString()); 2659 replace( 2660 fullPath, 2661 fullparts.toString(), 2662 value, 2663 "Fleshing out bailey to " + worstStatus); 2664 } 2665 seen.addAll(paths); 2666 } 2667 }); 2668 2669 // 'R' = Revert to baseline version under certain conditions 2670 fixList.add( 2671 'R', 2672 "Revert under certain conditions", 2673 new CLDRFilter() { 2674 // vxmlDir needs to be the "plain" (without post-processing) path of an existing 2675 // copy of common/main 2676 // For example, vetdata-2023-01-23-plain-dropfalse ... see 2677 // https://github.com/unicode-org/cldr/pull/2659 2678 // Also ldml.dtd is required -- and should already have been created by ST when 2679 // generating vxml 2680 private final String vxmlDir = "../vetdata-2023-01-23-plain-dropfalse/vxml/"; 2681 private Factory vxmlFactory = null; 2682 private CLDRFile vxmlFile = null; 2683 private CLDRFile baselineFileUnresolved = null; 2684 private CLDRFile baselineFileResolved = null; 2685 private File[] list = null; 2686 2687 @Override 2688 public void handleSetup() { 2689 final String vxmlSubPath = 2690 vxmlDir + "common/" + new File(options[SOURCEDIR].value).getName(); 2691 // System.out.println(vxmlSubPath); 2692 list = new File[] {new File(vxmlSubPath)}; 2693 } 2694 2695 @Override 2696 public void handleStart() { 2697 if (vxmlFactory == null) { 2698 vxmlFactory = SimpleFactory.make(list, ".*"); 2699 if (!pathHasError( 2700 "zh_Hant", 2701 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-core\"]")) { 2702 throw new RuntimeException("pathHasError wrong?"); 2703 } 2704 } 2705 String localeID = cldrFileToFilter.getLocaleID(); 2706 if (cldrFileToFilter 2707 .isResolved()) { // true only if "-z" added to command line 2708 baselineFileResolved = cldrFileToFilter; 2709 baselineFileUnresolved = cldrFileToFilter.getUnresolved(); 2710 } else { // true unless "-z" added to command line 2711 baselineFileResolved = getResolved(); 2712 baselineFileUnresolved = cldrFileToFilter; 2713 } 2714 try { 2715 vxmlFile = vxmlFactory.make(localeID, false /* not resolved */); 2716 } catch (Exception e) { 2717 System.out.println("Skipping " + localeID + " due to " + e); 2718 vxmlFile = null; 2719 } 2720 } 2721 2722 @Override 2723 public void handlePath(String xpath) { 2724 boolean debugging = false; // xpath.contains("Ciudad_Juarez"); 2725 if (debugging) { 2726 System.out.println("handlePath: got Ciudad_Juarez"); 2727 } 2728 if (vxmlFile == null) { 2729 if (debugging) { 2730 System.out.println("handlePath: vxmlFile is null"); 2731 } 2732 return; // use baseline 2733 } 2734 String vxmlValue = vxmlFile.getStringValue(xpath); 2735 if (vxmlValue == null) { 2736 throw new RuntimeException( 2737 this.getLocaleID() + ":" + xpath + ": vxmlValue == null"); 2738 } 2739 if (!wantRevertToBaseline(xpath, vxmlValue)) { 2740 if (debugging) { 2741 System.out.println("handlePath: wantRevertToBaseline false"); 2742 } 2743 String fullXPath = vxmlFile.getFullXPath(xpath); 2744 replace(fullXPath, fullXPath, vxmlValue); 2745 } else { 2746 if (debugging) { 2747 System.out.println("handlePath: wantRevertToBaseline true"); 2748 } 2749 } 2750 } 2751 2752 private boolean wantRevertToBaseline(String xpath, String vxmlValue) { 2753 String localeID = cldrFileToFilter.getLocaleID(); 2754 boolean debugging = false; // xpath.contains("Ciudad_Juarez"); 2755 // boolean deb = 2756 // "//ldml/dates/timeZoneNames/zone[@type=\"America/Ciudad_Juarez\"]/exemplarCity".equals(xpath); 2757 // boolean deb = ("ru".equals(localeID) && 2758 // "//ldml/dates/timeZoneNames/zone[@type=\"America/Ciudad_Juarez\"]/exemplarCity".equals(xpath)); 2759 if (debugging) { 2760 System.out.println("wantRevertToBaseline: got Ciudad_Juarez"); 2761 } 2762 String fullXPath = vxmlFile.getFullXPath(xpath); 2763 if (!changesWereAllowed(localeID, xpath, fullXPath)) { 2764 // criterion 2: if Survey Tool did NOT allow changes in the locale/path 2765 // in v43, MUST revert to baseline 2766 if (debugging) { 2767 System.out.println( 2768 "wantRevertToBaseline: return true since changes not allowed"); 2769 } 2770 return true; 2771 } 2772 if (!CldrUtility.INHERITANCE_MARKER.equals(vxmlValue)) { 2773 // criterion zero: if vxml value is not ↑↑↑, don't revert to baseline 2774 if (debugging) { 2775 System.out.println("wantRevertToBaseline: return for 0"); 2776 } 2777 return false; 2778 } 2779 // String baselineValue = baselineFileResolved.getStringValue(xpath); 2780 String baselineValue = baselineFileUnresolved.getStringValue(xpath); 2781 if (baselineValue == null 2782 || CldrUtility.INHERITANCE_MARKER.equals(baselineValue)) { 2783 // criterion 1: if baseline value is not a hard value, don't revert to 2784 // baseline 2785 if (debugging) { 2786 System.out.println( 2787 "wantRevertToBaseline: return for 1; baselineValue = " 2788 + baselineValue); 2789 } 2790 return false; 2791 } 2792 Output<String> inheritancePathWhereFound = new Output<>(); 2793 Output<String> localeWhereFound = new Output<>(); 2794 baselineFileResolved.getBaileyValue( 2795 xpath, inheritancePathWhereFound, localeWhereFound); 2796 if (localeID.equals(localeWhereFound.value) 2797 || xpath.equals(inheritancePathWhereFound.value)) { 2798 // criterion 3: if bailey value is not from different path and locale, 2799 // don't revert to baseline 2800 if (debugging) { 2801 System.out.println( 2802 "wantRevertToBaseline: found at " 2803 + localeWhereFound.value 2804 + " " 2805 + inheritancePathWhereFound.value); 2806 System.out.println("wantRevertToBaseline: return for 3"); 2807 } 2808 return false; 2809 } 2810 if (debugging) { 2811 System.out.println("wantRevertToBaseline: return true"); 2812 } 2813 return true; 2814 } 2815 2816 private boolean changesWereAllowed( 2817 String localeID, String xpath, String fullXPath) { 2818 boolean isError = pathHasError(localeID, xpath); 2819 String oldValue = baselineFileUnresolved.getWinningValue(xpath); 2820 boolean isMissing = 2821 (oldValue == null 2822 || CLDRFile.DraftStatus.forXpath(fullXPath).ordinal() 2823 <= CLDRFile.DraftStatus.provisional.ordinal()); 2824 String locOrAncestor = localeID; 2825 while (!"root".equals(locOrAncestor)) { 2826 if (SubmissionLocales.allowEvenIfLimited( 2827 locOrAncestor, xpath, isError, isMissing)) { 2828 return true; 2829 } 2830 locOrAncestor = LocaleIDParser.getParent(locOrAncestor); 2831 } 2832 return false; 2833 } 2834 2835 /** 2836 * These were derived from all errors found running this command: java 2837 * -DCLDR_DIR=$(pwd) -jar tools/cldr-code/target/cldr-code.jar check -S 2838 * common,seed -e -z FINAL_TESTING >> org.unicode.cldr.test.ConsoleCheckCLDR 2839 * 2840 * <p>TODO: this is incomplete? Should include some "errors" that are not in 2841 * personNames?? 2842 */ 2843 private final String[] ERR_LOCALES_PATHS = 2844 new String[] { 2845 "ja", 2846 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-prefix\"]", 2847 "nl_BE", 2848 "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname\"]", 2849 "yue", 2850 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"title\"]", 2851 "yue", 2852 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-prefix\"]", 2853 "yue", 2854 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-core\"]", 2855 "zh", 2856 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"title\"]", 2857 "zh", 2858 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-prefix\"]", 2859 "zh", 2860 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-core\"]", 2861 "zh_Hant", 2862 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"title\"]", 2863 "zh_Hant", 2864 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-prefix\"]", 2865 "zh_Hant", 2866 "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-core\"]", 2867 }; 2868 2869 private boolean pathHasError(String localeID, String xpath) { 2870 for (int i = 0; i < ERR_LOCALES_PATHS.length; i += 2) { 2871 String errLoc = ERR_LOCALES_PATHS[i]; 2872 String errPath = ERR_LOCALES_PATHS[i + 1]; 2873 if (localeID.equals(errLoc) && xpath.equals(errPath)) { 2874 return true; 2875 } 2876 } 2877 return false; 2878 } 2879 2880 @Override 2881 public void handleEnd() { 2882 // look for paths in vxmlFile that aren't in baselineFileUnresolved 2883 final Set<String> vPaths = new HashSet<>(); 2884 final Set<String> bPaths = new HashSet<>(); 2885 vxmlFile.getPaths("", null, vPaths); 2886 baselineFileUnresolved.getPaths("", null, bPaths); 2887 vPaths.removeAll(bPaths); 2888 for (final String dPath : vPaths) { 2889 // System.out.println(">!> " + dPath); 2890 final String fPath = vxmlFile.getFullXPath(dPath); 2891 add( 2892 fPath, 2893 vxmlFile.getWinningValue(fPath), 2894 "in vxmlFile, missing from baseline"); 2895 } 2896 } 2897 }); 2898 2899 fixList.add( 2900 'V', 2901 "Fix values that would inherit laterally", 2902 new CLDRFilter() { 2903 boolean skip = false; 2904 boolean isL1 = false; 2905 String parentId = null; 2906 CLDRFile parentFile = null; 2907 Set<String> pathsHandled = new HashSet<>(); 2908 String onlyValues = null; 2909 String message = null; 2910 2911 @Override 2912 public void handleStart() { 2913 // skip if the locale is root. 2914 skip = getLocaleID().equals(XMLSource.ROOT_ID); 2915 if (!skip) { 2916 parentId = LocaleIDParser.getParent(getLocaleID()); 2917 // This locale is "L1" (level one) if its parent is root. 2918 isL1 = parentId.equals(XMLSource.ROOT_ID); 2919 parentFile = null; // lazy evaluate 2920 } 2921 pathsHandled.clear(); 2922 onlyValues = CldrUtility.INHERITANCE_MARKER; 2923 message = "fix ↑↑↑ lateral"; 2924 } 2925 2926 @Override 2927 public void handlePath(String xpath) { 2928 if (skip) { 2929 return; 2930 } 2931 String value = cldrFileToFilter.getStringValue(xpath); 2932 if (!Objects.equals(onlyValues, value)) { 2933 return; 2934 } 2935 2936 // remember which paths we handle, so we can skip them in handleEnd 2937 pathsHandled.add(xpath); 2938 2939 Output<String> pathWhereFound = new Output<>(); 2940 Output<String> localeWhereFound = new Output<>(); 2941 String baileyValue = 2942 getResolved() 2943 .getBaileyValue(xpath, pathWhereFound, localeWhereFound); 2944 if (baileyValue != null 2945 && !xpath.equals(pathWhereFound.value) 2946 && !GlossonymConstructor.PSEUDO_PATH.equals(pathWhereFound.value)) { 2947 2948 // we have lateral inheritance, so we decide whether to harden. 2949 2950 boolean harden = false; 2951 String message2 = ""; 2952 2953 // if we are L1, then we make a hard value, to protect higher values 2954 2955 if (isL1) { 2956 harden = true; 2957 message2 = "; L1"; 2958 } else { 2959 // for all others, we check to see if the parent's lateral value is 2960 // the same as ours 2961 // If it is, we are ok, since one of that parent's parents will be 2962 // hardened 2963 2964 if (parentFile == null) { 2965 parentFile = factory.make(parentId, true); 2966 } 2967 String parentValue = parentFile.getStringValueWithBailey(xpath); 2968 if (!baileyValue.equals(parentValue)) { 2969 harden = true; // true if parentValue == null, see comment below 2970 } 2971 message2 = "; L2+"; 2972 2973 // Problem case: the parent value is null (not inheritance marker) 2974 // but the child value is ^^^. 2975 // See if we need to fix that. 2976 // Currently harden is true if parentValue is null, which, as of 2977 // 2023-09-20, happens here for only two paths, both in locale 2978 // en_AU: 2979 // //ldml/dates/calendars/calendar[@type="islamic"]/dateTimeFormats/availableFormats/dateFormatItem[@id="yMEd"] 2980 // //ldml/dates/calendars/calendar[@type="islamic"]/dateTimeFormats/availableFormats/dateFormatItem[@id="yMd"] 2981 } 2982 if (harden) { 2983 String fullPath = cldrFileToFilter.getFullXPath(xpath); 2984 replace(fullPath, fullPath, baileyValue, message + message2); 2985 } 2986 } 2987 } 2988 2989 @Override 2990 public void handleEnd() { 2991 if (skip || isL1) { 2992 return; 2993 } 2994 // Handle all the null cases that are in the L1 value. 2995 onlyValues = null; 2996 message = "fix null lateral"; 2997 2998 List<String> parentChain = LocaleIDParser.getParentChain(getLocaleID()); 2999 String localeL1 = 3000 parentChain.get(parentChain.size() - 2); // get last before root 3001 CLDRFile fileL1 = factory.make(localeL1, false); // only unresolved paths 3002 for (String path : fileL1) { 3003 if (!pathsHandled.contains(path)) { 3004 handlePath(path); 3005 } 3006 } 3007 } 3008 }); 3009 3010 fixList.add( 3011 'D', 3012 "Downgrade paths", 3013 new CLDRFilter() { 3014 3015 boolean skipLocale = false; 3016 3017 @Override 3018 public void handleStart() { 3019 // TODO Auto-generated method stub 3020 super.handleSetup(); 3021 String locale = getLocaleID(); 3022 skipLocale = 3023 locale.equals("en") 3024 || locale.equals("root") 3025 || !DowngradePaths.lookingAt(locale); 3026 } 3027 3028 @Override 3029 public void handlePath(String xpath) { 3030 if (skipLocale) { // fast path 3031 return; 3032 } 3033 String value = cldrFileToFilter.getStringValue(xpath); 3034 if (!DowngradePaths.lookingAt(getLocaleID(), xpath, value)) { 3035 return; 3036 } 3037 String fullPath = cldrFileToFilter.getFullXPath(xpath); 3038 XPathParts fullParts = XPathParts.getFrozenInstance(fullPath); 3039 String oldDraft = fullParts.getAttributeValue(-1, "draft"); 3040 if (oldDraft != null) { 3041 DraftStatus oldDraftEnum = DraftStatus.forString(oldDraft); 3042 if (oldDraftEnum == DraftStatus.provisional 3043 || oldDraftEnum == DraftStatus.unconfirmed) { 3044 return; 3045 } 3046 } 3047 fullParts = fullParts.cloneAsThawed(); 3048 fullParts.setAttribute(-1, "draft", "provisional"); 3049 replace(fullPath, fullParts.toString(), value, "Downgrade to provisional"); 3050 } 3051 }); 3052 3053 fixList.add( 3054 'G', 3055 "upGrade basic paths to contributed", 3056 new CLDRFilter() { 3057 3058 // boolean skipLocale = false; 3059 CoverageLevel2 coverageLeveler; 3060 final CLDRFile.DraftStatus TARGET_STATUS = DraftStatus.contributed; 3061 final Level TARGET_LEVEL = Level.BASIC; 3062 3063 @Override 3064 public void handleStart() { 3065 super.handleSetup(); 3066 String locale = getLocaleID(); 3067 // skipLocale = false; 3068 final CLDRConfig config = CLDRConfig.getInstance(); 3069 coverageLeveler = 3070 CoverageLevel2.getInstance( 3071 config.getSupplementalDataInfo(), locale); 3072 } 3073 3074 @Override 3075 public void handlePath(String xpath) { 3076 // if (skipLocale) { // fast path 3077 // return; 3078 // } 3079 if (!TARGET_LEVEL.isAtLeast(coverageLeveler.getLevel(xpath))) { 3080 return; // skip 3081 } 3082 String fullPath = cldrFileToFilter.getFullXPath(xpath); 3083 final CLDRFile.DraftStatus oldDraft = 3084 CLDRFile.DraftStatus.forXpath(fullPath); 3085 if (oldDraft.compareTo(TARGET_STATUS) > 0) { 3086 return; // already at contributed or better 3087 } 3088 // Now we need the value 3089 final String value = cldrFileToFilter.getStringValue(xpath); 3090 final String newPath = TARGET_STATUS.updateXPath(fullPath); 3091 replace(fullPath, newPath, value, "Upgrade to " + TARGET_STATUS.name()); 3092 } 3093 }); 3094 3095 fixList.add( 3096 'Z', 3097 "Zero lateral: convert inheritance marker to specific value if inheritance would be lateral/problematic", 3098 new CLDRFilter() { 3099 @Override 3100 public void handlePath(String xpath) { 3101 String value = cldrFileToFilter.getStringValue(xpath); 3102 if (!CldrUtility.INHERITANCE_MARKER.equals(value)) { 3103 return; 3104 } 3105 String newValue = 3106 VoteResolver.reviseInheritanceAsNeeded(xpath, value, getResolved()); 3107 if (value.equals(newValue)) { 3108 return; 3109 } 3110 String fullXPath = cldrFileToFilter.getFullXPath(xpath); 3111 replace(fullXPath, fullXPath, newValue); 3112 } 3113 }); 3114 } 3115 getLast2Dirs(File sourceDir1)3116 public static String getLast2Dirs(File sourceDir1) { 3117 String[] pathElements = sourceDir1.toString().split("/"); 3118 return pathElements[pathElements.length - 2] 3119 + "/" 3120 + pathElements[pathElements.length - 1] 3121 + "/"; 3122 } 3123 3124 // references="http://www.stat.fi/tk/tt/luokitukset/lk/kieli_02.html" 3125 3126 private static class ValuePair { 3127 String value; 3128 String fullxpath; 3129 } 3130 3131 /** 3132 * Find the set of xpaths that (a) have all the same values (if present) in the children (b) are 3133 * absent in the parent, (c) are different than what is in the fully resolved parent and add 3134 * them. 3135 */ fixIdenticalChildren(Factory cldrFactory, CLDRFile k, CLDRFile replacements)3136 static void fixIdenticalChildren(Factory cldrFactory, CLDRFile k, CLDRFile replacements) { 3137 String key = k.getLocaleID(); 3138 if (key.equals("root")) return; 3139 Set<String> availableChildren = cldrFactory.getAvailableWithParent(key, true); 3140 if (availableChildren.size() == 0) return; 3141 Set<String> skipPaths = new HashSet<>(); 3142 Map<String, ValuePair> haveSameValues = new TreeMap<>(); 3143 CLDRFile resolvedFile = cldrFactory.make(key, true); 3144 // get only those paths that are not in "root" 3145 resolvedFile.forEach(skipPaths::add); 3146 3147 // first, collect all the paths 3148 for (String locale : availableChildren) { 3149 if (locale.indexOf("POSIX") >= 0) continue; 3150 CLDRFile item = cldrFactory.make(locale, false); 3151 for (String xpath : item) { 3152 if (skipPaths.contains(xpath)) continue; 3153 // skip certain elements 3154 if (xpath.indexOf("/identity") >= 0) continue; 3155 if (xpath.startsWith("//ldml/numbers/currencies/currency")) continue; 3156 if (xpath.startsWith("//ldml/dates/timeZoneNames/metazone[")) continue; 3157 if (xpath.indexOf("[@alt") >= 0) continue; 3158 if (xpath.indexOf("/alias") >= 0) continue; 3159 3160 // must be string vale 3161 ValuePair v1 = new ValuePair(); 3162 v1.value = item.getStringValue(xpath); 3163 v1.fullxpath = item.getFullXPath(xpath); 3164 3165 ValuePair vAlready = haveSameValues.get(xpath); 3166 if (vAlready == null) { 3167 haveSameValues.put(xpath, v1); 3168 } else if (!v1.value.equals(vAlready.value) 3169 || !v1.fullxpath.equals(vAlready.fullxpath)) { 3170 skipPaths.add(xpath); 3171 haveSameValues.remove(xpath); 3172 } 3173 } 3174 } 3175 // at this point, haveSameValues is all kosher, so add items 3176 for (String xpath : haveSameValues.keySet()) { 3177 ValuePair v = haveSameValues.get(xpath); 3178 // if (v.value.equals(resolvedFile.getStringValue(xpath)) 3179 // && v.fullxpath.equals(resolvedFile.getFullXPath(xpath))) continue; 3180 replacements.add(v.fullxpath, v.value); 3181 } 3182 } 3183 fixAltProposed()3184 static void fixAltProposed() { 3185 throw new IllegalArgumentException(); 3186 } 3187 3188 /** Perform various fixes TODO add options to pick which one. */ fix(CLDRFile k, String inputOptions, String config, Factory cldrFactory)3189 private static void fix(CLDRFile k, String inputOptions, String config, Factory cldrFactory) { 3190 3191 // TODO before modifying, make sure that it is fully resolved. 3192 // then minimize against the NEW parents 3193 3194 Set<String> removal = new TreeSet<>(k.getComparator()); 3195 CLDRFile replacements = SimpleFactory.makeFile("temp"); 3196 fixList.setFile(k, inputOptions, cldrFactory, removal, replacements); 3197 3198 for (String xpath : k) { 3199 fixList.handlePath(xpath); 3200 } 3201 fixList.handleEnd(); 3202 3203 // remove bad attributes 3204 3205 if (inputOptions.indexOf('v') >= 0) { 3206 CLDRTest.checkAttributeValidity(k, null, removal); 3207 } 3208 3209 // raise identical elements 3210 3211 if (inputOptions.indexOf('i') >= 0) { 3212 fixIdenticalChildren(cldrFactory, k, replacements); 3213 } 3214 3215 // now do the actions we collected 3216 3217 if (SHOW_DETAILS) { 3218 if (removal.size() != 0 || !replacements.isEmpty()) { 3219 if (!removal.isEmpty()) { 3220 System.out.println("Removals:"); 3221 for (String path : removal) { 3222 System.out.println(path + " =\t " + k.getStringValue(path)); 3223 } 3224 } 3225 if (!replacements.isEmpty()) { 3226 System.out.println("Additions/Replacements:"); 3227 System.out.println(replacements.toString().replaceAll("\u00A0", "<NBSP>")); 3228 } 3229 } 3230 } 3231 if (removal.size() != 0) { 3232 k.removeAll(removal, COMMENT_REMOVALS); 3233 } 3234 k.putAll(replacements, CLDRFile.MERGE_REPLACE_MINE); 3235 } 3236 3237 /** 3238 * How many steps from root is the given locale? 3239 * 3240 * @param origLoc 3241 * @return the number of steps; e.g., 0 for "root", -1 for "code-fallback", 1 for "fr", 2 for 3242 * "fr_CA", ... 3243 */ stepsFromRoot(String origLoc)3244 private static int stepsFromRoot(String origLoc) { 3245 int steps = 0; 3246 String loc = origLoc; 3247 while (!LocaleNames.ROOT.equals(loc)) { 3248 loc = LocaleIDParser.getParent(loc); 3249 if (loc == null) { 3250 throw new IllegalArgumentException("Missing root in inheritance chain"); 3251 } 3252 ++steps; 3253 } 3254 System.out.println("stepsFromRoot = " + steps + " for " + origLoc); 3255 return steps; 3256 } 3257 3258 /** Internal */ testJavaSemantics()3259 public static void testJavaSemantics() { 3260 Collator caseInsensitive = Collator.getInstance(ULocale.ROOT); 3261 caseInsensitive.setStrength(Collator.SECONDARY); 3262 Set<String> setWithCaseInsensitive = new TreeSet<>(caseInsensitive); 3263 setWithCaseInsensitive.addAll(Arrays.asList(new String[] {"a", "b", "c"})); 3264 Set<String> plainSet = new TreeSet<>(); 3265 plainSet.addAll(Arrays.asList(new String[] {"a", "b", "B"})); 3266 System.out.println("S1 equals S2?\t" + setWithCaseInsensitive.equals(plainSet)); 3267 System.out.println("S2 equals S1?\t" + plainSet.equals(setWithCaseInsensitive)); 3268 setWithCaseInsensitive.removeAll(plainSet); 3269 System.out.println("S1 removeAll S2 is empty?\t" + setWithCaseInsensitive.isEmpty()); 3270 } 3271 } 3272