1 package org.unicode.cldr.tool; 2 3 import com.ibm.icu.impl.Row; 4 import com.ibm.icu.impl.Row.R2; 5 import com.ibm.icu.text.Collator; 6 import com.ibm.icu.text.NumberFormat; 7 import com.ibm.icu.text.UTF16; 8 import java.io.File; 9 import java.io.IOException; 10 import java.io.PrintWriter; 11 import java.util.Comparator; 12 import java.util.HashSet; 13 import java.util.Set; 14 import java.util.TreeSet; 15 import org.unicode.cldr.draft.FileUtilities; 16 import org.unicode.cldr.util.CLDRFile; 17 import org.unicode.cldr.util.CLDRFile.Status; 18 import org.unicode.cldr.util.CLDRPaths; 19 import org.unicode.cldr.util.CldrUtility; 20 import org.unicode.cldr.util.Counter; 21 import org.unicode.cldr.util.EscapingUtilities; 22 import org.unicode.cldr.util.Factory; 23 import org.unicode.cldr.util.PathUtilities; 24 import org.unicode.cldr.util.PrettyPath; 25 import org.unicode.cldr.util.SimpleFactory; 26 import org.unicode.cldr.util.Timer; 27 28 public class GenerateComparison { 29 30 private static PrettyPath prettyPathMaker; 31 32 private static Collator collator = Collator.getInstance(); 33 34 static class EnglishRowComparator implements Comparator<R2<String, String>> { 35 private static Comparator<String> unicode = new UTF16.StringComparator(true, false, 0); 36 37 @Override compare(R2<String, String> arg0, R2<String, String> arg1)38 public int compare(R2<String, String> arg0, R2<String, String> arg1) { 39 int result = collator.compare(arg0.get0(), arg1.get0()); 40 if (result != 0) return result; 41 result = unicode.compare(arg0.get0(), arg1.get0()); 42 if (result != 0) return result; 43 result = collator.compare(arg0.get1(), arg1.get1()); 44 if (result != 0) return result; 45 result = unicode.compare(arg0.get1(), arg1.get1()); 46 return result; 47 } 48 } 49 50 static EnglishRowComparator ENG = new EnglishRowComparator(); 51 52 static final String warningMessage = 53 "<p><b>Warning: this chart is still under development. For how to use it, see <a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\">Help: How to Vet</a>.</b></p>"; 54 main(String[] args)55 public static void main(String[] args) throws IOException { 56 57 // Setup 58 Timer timer = new Timer(); 59 Timer totalTimer = new Timer(); 60 long totalPaths = 0; 61 format = NumberFormat.getNumberInstance(); 62 format.setGroupingUsed(true); 63 64 Counter<String> totalCounter = new Counter<>(); 65 66 // Get the args 67 68 String oldDirectory = 69 CldrUtility.getProperty( 70 "oldDirectory", 71 PathUtilities.getNormalizedPathString( 72 new File(CLDRPaths.BASE_DIRECTORY, "common/main")) 73 + "/"); 74 String newDirectory = 75 CldrUtility.getProperty( 76 "newDirectory", 77 PathUtilities.getNormalizedPathString( 78 new File( 79 CLDRPaths.BASE_DIRECTORY, 80 "../cldr-release-1-7/common/main")) 81 + "/"); 82 String changesDirectory = 83 CldrUtility.getProperty( 84 "changesDirectory", 85 PathUtilities.getNormalizedPathString( 86 CLDRPaths.CHART_DIRECTORY + "/changes/") 87 + "/"); 88 89 String filter = CldrUtility.getProperty("localeFilter", ".*"); 90 boolean SHOW_ALIASED = 91 CldrUtility.getProperty("showAliased", "false").toLowerCase().startsWith("t"); 92 93 // Create the factories 94 95 Factory oldFactory = Factory.make(oldDirectory, filter); 96 Factory newFactory = Factory.make(newDirectory, filter); 97 CLDRFile english = newFactory.make("en", true); 98 CLDRFile newRoot = newFactory.make("root", true); 99 100 // Get the union of all the language locales, sorted by English name 101 102 Set<String> oldList = oldFactory.getAvailableLanguages(); 103 Set<String> newList = newFactory.getAvailableLanguages(); 104 Set<String> unifiedList = new HashSet<>(oldList); 105 unifiedList.addAll(newList); 106 Set<R2<String, String>> pairs = new TreeSet<>(); 107 for (String code : unifiedList) { 108 pairs.add(Row.of(english.getName(code), code)); 109 } 110 111 prettyPathMaker = new PrettyPath(); 112 int totalDifferences = 0; 113 int differences = 0; 114 115 Set<R2<String, String>> indexInfo = new TreeSet<>(ENG); 116 117 // iterate through those 118 for (R2<String, String> pair : pairs) { 119 timer.start(); 120 final String locale = pair.get1(); 121 final String localeName = pair.get0(); 122 System.out.println(locale); 123 differences = 0; 124 System.out.println(); 125 126 // Create CLDR files for both; null if can't open 127 128 CLDRFile oldFile = null; 129 if (oldList.contains(locale)) { 130 try { 131 oldFile = oldFactory.make(locale, true, true); 132 } catch (Exception e) { 133 addToIndex(indexInfo, "ERROR1.6 ", locale, localeName); 134 continue; 135 } 136 } else { 137 oldFile = SimpleFactory.makeFile(locale); // make empty file 138 } 139 CLDRFile newFile = null; 140 if (newList.contains(locale)) { 141 try { 142 newFile = newFactory.make(locale, true, true); 143 } catch (Exception e) { 144 addToIndex(indexInfo, "ERROR1.7 ", locale, localeName); 145 continue; 146 } 147 } else { 148 newFile = SimpleFactory.makeFile(locale); // make empty file 149 } 150 151 // for(String str : newFile) { 152 // String xo = newFile.getFullXPath(str); 153 // String v = newFile.getStringValue(str); 154 // 155 // System.out.println(xo+"\t"+v+"\n"); 156 // 157 // } 158 // Check for null cases 159 160 if (oldFile == null) { 161 addToIndex(indexInfo, "NEW ", locale, localeName); 162 continue; 163 } else if (newFile == null) { 164 addToIndex(indexInfo, "DELETED ", locale, localeName); 165 continue; 166 } 167 System.out.println("*** " + localeName + "\t" + locale); 168 System.out.println(); 169 170 // exclude aliased locales 171 if (newFile.isAliasedAtTopLevel()) { 172 continue; 173 } 174 175 // Get the union of all the paths 176 177 Set<String> paths; 178 try { 179 paths = new HashSet<>(); 180 oldFile.forEach(paths::add); 181 if (oldList.contains(locale)) { 182 paths.addAll(oldFile.getExtraPaths()); 183 } 184 newFile.forEach(paths::add); 185 if (newList.contains(locale)) { 186 paths.addAll(newFile.getExtraPaths()); 187 } 188 } catch (Exception e) { 189 System.err.println("Locale: " + locale + ", " + localeName); 190 e.printStackTrace(); 191 addToIndex(indexInfo, "ERROR ", locale, localeName); 192 continue; 193 } 194 195 // We now have the full set of all the paths for old and new files 196 // TODO Sort by the pretty form 197 // Set<R2<String,String>> pathPairs = new TreeSet(); 198 // for (String code : unifiedList) { 199 // pairs.add(Row.make(code, english.getName(code))); 200 // } 201 202 // Initialize sets 203 // .addColumn("Code", "class='source'", "<a name=\"{0}\" 204 // href='likely_subtags.html#und_{0}'>{0}</a>", 205 // "class='source'", true) 206 207 final String localeDisplayName = english.getName(locale); 208 TablePrinter table = 209 new TablePrinter() 210 .setCaption("Changes in " + localeDisplayName + " (" + locale + ")") 211 .addColumn("PRETTY_SORT1") 212 .setSortPriority(1) 213 .setHidden(true) 214 .setRepeatHeader(true) 215 .addColumn("PRETTY_SORT2") 216 .setSortPriority(2) 217 .setHidden(true) 218 .addColumn("PRETTY_SORT3") 219 .setSortPriority(3) 220 .setHidden(true) 221 .addColumn("ESCAPED_PATH") 222 .setHidden(true) 223 .addColumn("Inh.") 224 .setCellAttributes("class=\"{0}\"") 225 .setSortPriority(0) 226 .setSpanRows(true) 227 .setRepeatHeader(true) 228 .addColumn("Section") 229 .setSpanRows(true) 230 .setCellAttributes("class='section'") 231 .addColumn("Subsection") 232 .setSpanRows(true) 233 .setCellAttributes("class='subsection'") 234 .addColumn("Item") 235 .setSpanRows(true) 236 .setCellPattern("<a href=\"{4}\">{0}</a>") 237 .setCellAttributes("class='item'") 238 .addColumn("English") 239 .setCellAttributes("class='english'") 240 .addColumn("Status") 241 .setSortPriority(4) 242 .setCellAttributes("class=\"{0}\"") 243 .addColumn("Old" + localeDisplayName) 244 .setCellAttributes("class='old'") 245 .addColumn("New" + localeDisplayName) 246 .setCellAttributes("class='new'"); 247 Counter<String> fileCounter = new Counter<>(); 248 249 for (String path : paths) { 250 if (path.contains("/alias") || path.contains("/identity")) { 251 continue; 252 } 253 String cleanedPath = CLDRFile.getNondraftNonaltXPath(path); 254 255 String oldValue = oldFile.getStringValue(cleanedPath); 256 String newValue = newFile.getStringValue(path); 257 String englishValue = english.getStringValue(cleanedPath); 258 259 // for debugging 260 if (oldValue != null && oldValue.contains("{1} {0}")) { 261 System.out.print(""); 262 } 263 264 if (equals(newValue, oldValue)) { 265 continue; 266 } 267 268 // get the actual place the data is stored 269 // AND adjust if the same as root! 270 271 Status newStatus = new Status(); 272 String newFoundLocale = getStatus(newFile, newRoot, path, newValue, newStatus); 273 274 // At this point, we have two unequal values 275 // TODO check for non-distinguishing attribute value differences 276 277 boolean isAliased = false; 278 279 // Skip deletions of alt-proposed 280 281 // if (newValue == null) { 282 // if (path.contains("@alt=\"proposed")) { 283 // continue; 284 // } 285 // } 286 287 // Skip if both inherited from the same locale, since we should catch it 288 // in that locale. 289 290 // Mark as aliased if new locale or path is different 291 if (!newStatus.pathWhereFound.equals(path)) { 292 isAliased = true; 293 // continue; 294 } 295 296 if (!newFoundLocale.equals(locale)) { 297 isAliased = true; 298 // continue; 299 } 300 301 // // skip if old locale or path is aliased 302 // if (!oldFoundLocale.equals(locale)) { 303 // //isAliased=true; 304 // continue; 305 // } 306 // 307 // // Skip if either found path is are different 308 // if (!oldStatus.pathWhereFound.equals(cleanedPath)) { 309 // //isAliased=true; 310 // continue; 311 // } 312 313 // Now check other aliases 314 315 // final boolean newIsAlias = !newStatus.pathWhereFound.equals(path); 316 // if (newIsAlias) { // new is alias 317 // // filter out cases of a new string that is found via alias 318 // if (oldValue == null) { 319 // continue; 320 // } 321 // 322 // } 323 324 if (isAliased && !SHOW_ALIASED) { 325 continue; 326 } 327 328 // We definitely have a difference worth recording, so do so 329 330 String newFullPath = newFile.getFullXPath(path); 331 final boolean reject = 332 newFullPath != null 333 && newFullPath.contains("@draft") 334 && !newFullPath.contains("@draft=\"contributed\""); 335 String status; 336 if (reject) { 337 status = "NOT-ACC"; 338 } else if (newValue == null) { 339 status = "deleted"; 340 } else if (oldValue == null) { 341 status = "added"; 342 } else { 343 status = "changed"; 344 } 345 String coreStatus = status; 346 if (isAliased) { 347 status = "I+" + status; 348 } 349 fileCounter.increment(status); 350 totalCounter.increment(status); 351 352 String pretty_sort = prettyPathMaker.getPrettyPath(cleanedPath); 353 String[] prettyPartsSort = pretty_sort.split("[|]"); 354 if (prettyPartsSort.length != 3) { 355 System.out.println( 356 "Bad pretty path: " + pretty_sort + ", original: " + cleanedPath); 357 } 358 String prettySort1 = prettyPartsSort[0]; 359 String prettySort2 = prettyPartsSort[1]; 360 String prettySort3 = prettyPartsSort[2]; 361 362 String pretty = prettyPathMaker.getOutputForm(pretty_sort); 363 String escapedPath = 364 "http://unicode.org/cldr/apps/survey?_=" 365 + locale 366 + "&xpath=" 367 + EscapingUtilities.urlEscape(cleanedPath); 368 String[] prettyParts = pretty.split("[|]"); 369 if (prettyParts.length != 3) { 370 System.out.println("Bad pretty path: " + pretty + ", original: " + cleanedPath); 371 } 372 String pretty1 = prettyParts[0]; 373 String pretty2 = prettyParts[1]; 374 String pretty3 = prettyParts[2]; 375 376 // http://kwanyin.unicode.org/cldr-apps/survey?_=kw_GB&xpath=%2F%2Fldml%2FlocaleDisplayNames%2Flanguages%2Flanguage%5B%40type%3D%22mt%22%5D 377 378 table.addRow() 379 .addCell(prettySort1) 380 .addCell(prettySort2) 381 .addCell(prettySort3) 382 .addCell(escapedPath) 383 .addCell(isAliased ? "I" : "") 384 .addCell(pretty1) 385 .addCell(pretty2) 386 .addCell(pretty3) 387 .addCell(englishValue == null ? "-" : englishValue) 388 .addCell(coreStatus) 389 .addCell(oldValue == null ? "-" : oldValue) 390 .addCell(newValue == null ? "-" : newValue) 391 .finishRow(); 392 393 totalDifferences++; 394 differences++; 395 } 396 397 addToIndex(indexInfo, "", locale, localeName, fileCounter); 398 PrintWriter out = FileUtilities.openUTF8Writer(changesDirectory, locale + ".html"); 399 String title = "Changes in " + localeDisplayName; 400 out.println( 401 "<html>" 402 + "<head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>" 403 + CldrUtility.LINE_SEPARATOR 404 + "<title>" 405 + title 406 + "</title>" 407 + CldrUtility.LINE_SEPARATOR 408 + "<link rel='stylesheet' href='index.css' type='text/css'>" 409 + CldrUtility.LINE_SEPARATOR 410 + "<base target='_blank'>" 411 + CldrUtility.LINE_SEPARATOR 412 + "</head><body>" 413 + CldrUtility.LINE_SEPARATOR 414 + "<h1>" 415 + title 416 + "</h1>" 417 + CldrUtility.LINE_SEPARATOR 418 + "<a href='index.html'>Index</a> | <a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\"><b style=\"background-color: yellow;\"><i>Help: How to Vet</i></b></a>" 419 + warningMessage); 420 421 TablePrinter table2 = 422 new TablePrinter() 423 .setCaption("Totals") 424 .addColumn("Inh.") 425 .setSortPriority(0) 426 .addColumn("Status") 427 .setSortPriority(1) 428 .addColumn("Total"); 429 430 for (String key : fileCounter.getKeysetSortedByKey()) { 431 boolean inherited = key.startsWith("I+"); 432 table2.addRow() 433 .addCell(inherited ? "I" : "") 434 .addCell(inherited ? key.substring(2) : key) 435 .addCell(format.format(fileCounter.getCount(key))) 436 .finishRow(); 437 } 438 out.println(table2); 439 out.println("<br>"); 440 out.println(table); 441 442 // show status on console 443 444 System.out.println( 445 locale 446 + "\tDifferences:\t" 447 + format.format(differences) 448 + "\tPaths:\t" 449 + format.format(paths.size()) 450 + "\tTime:\t" 451 + timer); 452 453 totalPaths += paths.size(); 454 out.println(ShowData.dateFooter()); 455 out.println(CldrUtility.ANALYTICS); 456 out.println("</body></html>"); 457 out.close(); 458 } 459 PrintWriter indexFile = FileUtilities.openUTF8Writer(changesDirectory, "index.html"); 460 indexFile.println( 461 "<html>" 462 + "<head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>" 463 + CldrUtility.LINE_SEPARATOR 464 + "<title>" 465 + "Change Summary" 466 + "</title>" 467 + CldrUtility.LINE_SEPARATOR 468 + "<link rel='stylesheet' href='index.css' type='text/css'>" 469 + CldrUtility.LINE_SEPARATOR 470 + "<base target='_blank'>" 471 + CldrUtility.LINE_SEPARATOR 472 + "</head><body>" 473 + CldrUtility.LINE_SEPARATOR 474 + "<h1>" 475 + "Change Summary" 476 + "</h1>" 477 + CldrUtility.LINE_SEPARATOR 478 + "<a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\"><b style=\"background-color: yellow;\"><i>Help: How to Vet</i></b></a>" 479 + warningMessage 480 + "<table><tr>"); 481 482 String separator = ""; 483 int last = 0; 484 for (R2<String, String> indexPair : indexInfo) { 485 int firstChar = indexPair.get0().codePointAt(0); 486 indexFile 487 .append( 488 firstChar == last 489 ? separator 490 : (last == 0 ? "" : "</td></tr>\n<tr>") 491 + "<th>" 492 + String.valueOf((char) firstChar) 493 + "</th><td>") 494 .append(indexPair.get1()); 495 separator = " | "; 496 last = indexPair.get0().codePointAt(0); 497 } 498 indexFile.println("</tr></table>"); 499 indexFile.println(ShowData.dateFooter()); 500 indexFile.println(CldrUtility.ANALYTICS); 501 indexFile.println("</body></html>"); 502 indexFile.close(); 503 504 System.out.println(); 505 506 for (String key : totalCounter.getKeysetSortedByKey()) { 507 System.out.println(key + "\t" + totalCounter.getCount(key)); 508 } 509 510 System.out.println( 511 "Total Differences:\t" 512 + format.format(totalDifferences) 513 + "\tPaths:\t" 514 + format.format(totalPaths) 515 + "\tTotal Time:\t" 516 + format.format(totalTimer.getDuration()) 517 + "ms"); 518 } 519 520 // static Transliterator urlHex = Transliterator.createFromRules("foo", 521 // "([^!(-*,-\\:A-Z_a-z~]) > &hex($1) ;" + 522 // ":: null;" + 523 // "'\\u00' > '%' ;" 524 // , Transliterator.FORWARD); 525 526 private static NumberFormat format; 527 addToIndex( Set<R2<String, String>> indexInfo, String title, final String locale, final String localeName)528 private static void addToIndex( 529 Set<R2<String, String>> indexInfo, 530 String title, 531 final String locale, 532 final String localeName) { 533 addToIndex(indexInfo, title, locale, localeName, null); 534 } 535 addToIndex( Set<R2<String, String>> indexInfo, String title, final String locale, final String localeName, Counter<String> fileCounter)536 private static void addToIndex( 537 Set<R2<String, String>> indexInfo, 538 String title, 539 final String locale, 540 final String localeName, 541 Counter<String> fileCounter) { 542 if (title.startsWith("ERROR")) { 543 indexInfo.add(R2.of(localeName, title + " " + localeName + " (" + locale + ")")); 544 return; 545 } 546 String counterString = ""; 547 if (fileCounter != null) { 548 for (String s : fileCounter) { 549 if (counterString.length() != 0) { 550 counterString += "; "; 551 } 552 counterString += s.charAt(0) + ":" + format.format(fileCounter.getCount(s)); 553 } 554 } 555 indexInfo.add( 556 R2.of( 557 localeName, 558 "<a href='" 559 + locale 560 + ".html'>" 561 + title 562 + localeName 563 + " (" 564 + locale 565 + ")</a>" 566 + (counterString.length() == 0 ? "" : " [" + counterString + "]"))); 567 } 568 569 // private static int accumulate(Set<R2<String,String>> rejected, int totalRejected, 570 // final String locale, String indicator, String oldValue, String newValue, String path) { 571 // String pretty = prettyPathMaker.getPrettyPath(path, false); 572 // String line = locale + "\t" + indicator +"\t\u200E[" + oldValue + "]\u200E\t\u200E[" + 573 // newValue + "]\u200E\t" + 574 // pretty; 575 // String pretty2 = prettyPathMaker.getOutputForm(pretty); 576 // rejected.add(Row.make(pretty2, line)); 577 // totalRejected++; 578 // return totalRejected; 579 // } 580 getStatus( CLDRFile oldFile, CLDRFile oldRoot, String path, String oldString, Status oldStatus)581 private static String getStatus( 582 CLDRFile oldFile, CLDRFile oldRoot, String path, String oldString, Status oldStatus) { 583 String oldLocale = oldFile.getSourceLocaleID(path, oldStatus); 584 if (!oldLocale.equals("root")) { 585 String oldRootValue = oldRoot.getStringValue(oldStatus.pathWhereFound); 586 if (equals(oldString, oldRootValue)) { 587 oldLocale = "root"; 588 } 589 } 590 return oldLocale; 591 } 592 showSet( PrintWriter out, Set<R2<String, String>> rejected, final String locale, String title)593 private static void showSet( 594 PrintWriter out, Set<R2<String, String>> rejected, final String locale, String title) { 595 if (rejected.size() != 0) { 596 out.println(); 597 out.println(locale + "\t" + title + "\t" + rejected.size()); 598 for (R2<String, String> prettyAndline : rejected) { 599 out.println(prettyAndline.get1()); 600 } 601 } 602 } 603 equals(String newString, String oldString)604 private static boolean equals(String newString, String oldString) { 605 if (newString == null) { 606 return oldString == null; 607 } 608 return newString.equals(oldString); 609 } 610 } 611