xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateComparison.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.tool;
2 
3 import com.ibm.icu.impl.Row;
4 import com.ibm.icu.impl.Row.R2;
5 import com.ibm.icu.text.Collator;
6 import com.ibm.icu.text.NumberFormat;
7 import com.ibm.icu.text.UTF16;
8 import java.io.File;
9 import java.io.IOException;
10 import java.io.PrintWriter;
11 import java.util.Comparator;
12 import java.util.HashSet;
13 import java.util.Set;
14 import java.util.TreeSet;
15 import org.unicode.cldr.draft.FileUtilities;
16 import org.unicode.cldr.util.CLDRFile;
17 import org.unicode.cldr.util.CLDRFile.Status;
18 import org.unicode.cldr.util.CLDRPaths;
19 import org.unicode.cldr.util.CldrUtility;
20 import org.unicode.cldr.util.Counter;
21 import org.unicode.cldr.util.EscapingUtilities;
22 import org.unicode.cldr.util.Factory;
23 import org.unicode.cldr.util.PathUtilities;
24 import org.unicode.cldr.util.PrettyPath;
25 import org.unicode.cldr.util.SimpleFactory;
26 import org.unicode.cldr.util.Timer;
27 
28 public class GenerateComparison {
29 
30     private static PrettyPath prettyPathMaker;
31 
32     private static Collator collator = Collator.getInstance();
33 
34     static class EnglishRowComparator implements Comparator<R2<String, String>> {
35         private static Comparator<String> unicode = new UTF16.StringComparator(true, false, 0);
36 
37         @Override
compare(R2<String, String> arg0, R2<String, String> arg1)38         public int compare(R2<String, String> arg0, R2<String, String> arg1) {
39             int result = collator.compare(arg0.get0(), arg1.get0());
40             if (result != 0) return result;
41             result = unicode.compare(arg0.get0(), arg1.get0());
42             if (result != 0) return result;
43             result = collator.compare(arg0.get1(), arg1.get1());
44             if (result != 0) return result;
45             result = unicode.compare(arg0.get1(), arg1.get1());
46             return result;
47         }
48     }
49 
50     static EnglishRowComparator ENG = new EnglishRowComparator();
51 
52     static final String warningMessage =
53             "<p><b>Warning: this chart is still under development. For how to use it, see <a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\">Help: How to Vet</a>.</b></p>";
54 
main(String[] args)55     public static void main(String[] args) throws IOException {
56 
57         // Setup
58         Timer timer = new Timer();
59         Timer totalTimer = new Timer();
60         long totalPaths = 0;
61         format = NumberFormat.getNumberInstance();
62         format.setGroupingUsed(true);
63 
64         Counter<String> totalCounter = new Counter<>();
65 
66         // Get the args
67 
68         String oldDirectory =
69                 CldrUtility.getProperty(
70                         "oldDirectory",
71                         PathUtilities.getNormalizedPathString(
72                                         new File(CLDRPaths.BASE_DIRECTORY, "common/main"))
73                                 + "/");
74         String newDirectory =
75                 CldrUtility.getProperty(
76                         "newDirectory",
77                         PathUtilities.getNormalizedPathString(
78                                         new File(
79                                                 CLDRPaths.BASE_DIRECTORY,
80                                                 "../cldr-release-1-7/common/main"))
81                                 + "/");
82         String changesDirectory =
83                 CldrUtility.getProperty(
84                         "changesDirectory",
85                         PathUtilities.getNormalizedPathString(
86                                         CLDRPaths.CHART_DIRECTORY + "/changes/")
87                                 + "/");
88 
89         String filter = CldrUtility.getProperty("localeFilter", ".*");
90         boolean SHOW_ALIASED =
91                 CldrUtility.getProperty("showAliased", "false").toLowerCase().startsWith("t");
92 
93         // Create the factories
94 
95         Factory oldFactory = Factory.make(oldDirectory, filter);
96         Factory newFactory = Factory.make(newDirectory, filter);
97         CLDRFile english = newFactory.make("en", true);
98         CLDRFile newRoot = newFactory.make("root", true);
99 
100         // Get the union of all the language locales, sorted by English name
101 
102         Set<String> oldList = oldFactory.getAvailableLanguages();
103         Set<String> newList = newFactory.getAvailableLanguages();
104         Set<String> unifiedList = new HashSet<>(oldList);
105         unifiedList.addAll(newList);
106         Set<R2<String, String>> pairs = new TreeSet<>();
107         for (String code : unifiedList) {
108             pairs.add(Row.of(english.getName(code), code));
109         }
110 
111         prettyPathMaker = new PrettyPath();
112         int totalDifferences = 0;
113         int differences = 0;
114 
115         Set<R2<String, String>> indexInfo = new TreeSet<>(ENG);
116 
117         // iterate through those
118         for (R2<String, String> pair : pairs) {
119             timer.start();
120             final String locale = pair.get1();
121             final String localeName = pair.get0();
122             System.out.println(locale);
123             differences = 0;
124             System.out.println();
125 
126             // Create CLDR files for both; null if can't open
127 
128             CLDRFile oldFile = null;
129             if (oldList.contains(locale)) {
130                 try {
131                     oldFile = oldFactory.make(locale, true, true);
132                 } catch (Exception e) {
133                     addToIndex(indexInfo, "ERROR1.6 ", locale, localeName);
134                     continue;
135                 }
136             } else {
137                 oldFile = SimpleFactory.makeFile(locale); // make empty file
138             }
139             CLDRFile newFile = null;
140             if (newList.contains(locale)) {
141                 try {
142                     newFile = newFactory.make(locale, true, true);
143                 } catch (Exception e) {
144                     addToIndex(indexInfo, "ERROR1.7 ", locale, localeName);
145                     continue;
146                 }
147             } else {
148                 newFile = SimpleFactory.makeFile(locale); // make empty file
149             }
150 
151             // for(String str : newFile) {
152             // String xo = newFile.getFullXPath(str);
153             // String v = newFile.getStringValue(str);
154             //
155             // System.out.println(xo+"\t"+v+"\n");
156             //
157             // }
158             // Check for null cases
159 
160             if (oldFile == null) {
161                 addToIndex(indexInfo, "NEW ", locale, localeName);
162                 continue;
163             } else if (newFile == null) {
164                 addToIndex(indexInfo, "DELETED ", locale, localeName);
165                 continue;
166             }
167             System.out.println("*** " + localeName + "\t" + locale);
168             System.out.println();
169 
170             // exclude aliased locales
171             if (newFile.isAliasedAtTopLevel()) {
172                 continue;
173             }
174 
175             // Get the union of all the paths
176 
177             Set<String> paths;
178             try {
179                 paths = new HashSet<>();
180                 oldFile.forEach(paths::add);
181                 if (oldList.contains(locale)) {
182                     paths.addAll(oldFile.getExtraPaths());
183                 }
184                 newFile.forEach(paths::add);
185                 if (newList.contains(locale)) {
186                     paths.addAll(newFile.getExtraPaths());
187                 }
188             } catch (Exception e) {
189                 System.err.println("Locale: " + locale + ", " + localeName);
190                 e.printStackTrace();
191                 addToIndex(indexInfo, "ERROR ", locale, localeName);
192                 continue;
193             }
194 
195             // We now have the full set of all the paths for old and new files
196             // TODO Sort by the pretty form
197             // Set<R2<String,String>> pathPairs = new TreeSet();
198             // for (String code : unifiedList) {
199             // pairs.add(Row.make(code, english.getName(code)));
200             // }
201 
202             // Initialize sets
203             // .addColumn("Code", "class='source'", "<a name=\"{0}\"
204             // href='likely_subtags.html#und_{0}'>{0}</a>",
205             // "class='source'", true)
206 
207             final String localeDisplayName = english.getName(locale);
208             TablePrinter table =
209                     new TablePrinter()
210                             .setCaption("Changes in " + localeDisplayName + " (" + locale + ")")
211                             .addColumn("PRETTY_SORT1")
212                             .setSortPriority(1)
213                             .setHidden(true)
214                             .setRepeatHeader(true)
215                             .addColumn("PRETTY_SORT2")
216                             .setSortPriority(2)
217                             .setHidden(true)
218                             .addColumn("PRETTY_SORT3")
219                             .setSortPriority(3)
220                             .setHidden(true)
221                             .addColumn("ESCAPED_PATH")
222                             .setHidden(true)
223                             .addColumn("Inh.")
224                             .setCellAttributes("class=\"{0}\"")
225                             .setSortPriority(0)
226                             .setSpanRows(true)
227                             .setRepeatHeader(true)
228                             .addColumn("Section")
229                             .setSpanRows(true)
230                             .setCellAttributes("class='section'")
231                             .addColumn("Subsection")
232                             .setSpanRows(true)
233                             .setCellAttributes("class='subsection'")
234                             .addColumn("Item")
235                             .setSpanRows(true)
236                             .setCellPattern("<a href=\"{4}\">{0}</a>")
237                             .setCellAttributes("class='item'")
238                             .addColumn("English")
239                             .setCellAttributes("class='english'")
240                             .addColumn("Status")
241                             .setSortPriority(4)
242                             .setCellAttributes("class=\"{0}\"")
243                             .addColumn("Old" + localeDisplayName)
244                             .setCellAttributes("class='old'")
245                             .addColumn("New" + localeDisplayName)
246                             .setCellAttributes("class='new'");
247             Counter<String> fileCounter = new Counter<>();
248 
249             for (String path : paths) {
250                 if (path.contains("/alias") || path.contains("/identity")) {
251                     continue;
252                 }
253                 String cleanedPath = CLDRFile.getNondraftNonaltXPath(path);
254 
255                 String oldValue = oldFile.getStringValue(cleanedPath);
256                 String newValue = newFile.getStringValue(path);
257                 String englishValue = english.getStringValue(cleanedPath);
258 
259                 // for debugging
260                 if (oldValue != null && oldValue.contains("{1} {0}")) {
261                     System.out.print("");
262                 }
263 
264                 if (equals(newValue, oldValue)) {
265                     continue;
266                 }
267 
268                 // get the actual place the data is stored
269                 // AND adjust if the same as root!
270 
271                 Status newStatus = new Status();
272                 String newFoundLocale = getStatus(newFile, newRoot, path, newValue, newStatus);
273 
274                 // At this point, we have two unequal values
275                 // TODO check for non-distinguishing attribute value differences
276 
277                 boolean isAliased = false;
278 
279                 // Skip deletions of alt-proposed
280 
281                 // if (newValue == null) {
282                 // if (path.contains("@alt=\"proposed")) {
283                 // continue;
284                 // }
285                 // }
286 
287                 // Skip if both inherited from the same locale, since we should catch it
288                 // in that locale.
289 
290                 // Mark as aliased if new locale or path is different
291                 if (!newStatus.pathWhereFound.equals(path)) {
292                     isAliased = true;
293                     // continue;
294                 }
295 
296                 if (!newFoundLocale.equals(locale)) {
297                     isAliased = true;
298                     // continue;
299                 }
300 
301                 // // skip if old locale or path is aliased
302                 // if (!oldFoundLocale.equals(locale)) {
303                 // //isAliased=true;
304                 // continue;
305                 // }
306                 //
307                 // // Skip if either found path is are different
308                 // if (!oldStatus.pathWhereFound.equals(cleanedPath)) {
309                 // //isAliased=true;
310                 // continue;
311                 // }
312 
313                 // Now check other aliases
314 
315                 // final boolean newIsAlias = !newStatus.pathWhereFound.equals(path);
316                 // if (newIsAlias) { // new is alias
317                 // // filter out cases of a new string that is found via alias
318                 // if (oldValue == null) {
319                 // continue;
320                 // }
321                 //
322                 // }
323 
324                 if (isAliased && !SHOW_ALIASED) {
325                     continue;
326                 }
327 
328                 // We definitely have a difference worth recording, so do so
329 
330                 String newFullPath = newFile.getFullXPath(path);
331                 final boolean reject =
332                         newFullPath != null
333                                 && newFullPath.contains("@draft")
334                                 && !newFullPath.contains("@draft=\"contributed\"");
335                 String status;
336                 if (reject) {
337                     status = "NOT-ACC";
338                 } else if (newValue == null) {
339                     status = "deleted";
340                 } else if (oldValue == null) {
341                     status = "added";
342                 } else {
343                     status = "changed";
344                 }
345                 String coreStatus = status;
346                 if (isAliased) {
347                     status = "I+" + status;
348                 }
349                 fileCounter.increment(status);
350                 totalCounter.increment(status);
351 
352                 String pretty_sort = prettyPathMaker.getPrettyPath(cleanedPath);
353                 String[] prettyPartsSort = pretty_sort.split("[|]");
354                 if (prettyPartsSort.length != 3) {
355                     System.out.println(
356                             "Bad pretty path: " + pretty_sort + ", original: " + cleanedPath);
357                 }
358                 String prettySort1 = prettyPartsSort[0];
359                 String prettySort2 = prettyPartsSort[1];
360                 String prettySort3 = prettyPartsSort[2];
361 
362                 String pretty = prettyPathMaker.getOutputForm(pretty_sort);
363                 String escapedPath =
364                         "http://unicode.org/cldr/apps/survey?_="
365                                 + locale
366                                 + "&xpath="
367                                 + EscapingUtilities.urlEscape(cleanedPath);
368                 String[] prettyParts = pretty.split("[|]");
369                 if (prettyParts.length != 3) {
370                     System.out.println("Bad pretty path: " + pretty + ", original: " + cleanedPath);
371                 }
372                 String pretty1 = prettyParts[0];
373                 String pretty2 = prettyParts[1];
374                 String pretty3 = prettyParts[2];
375 
376                 // http://kwanyin.unicode.org/cldr-apps/survey?_=kw_GB&xpath=%2F%2Fldml%2FlocaleDisplayNames%2Flanguages%2Flanguage%5B%40type%3D%22mt%22%5D
377 
378                 table.addRow()
379                         .addCell(prettySort1)
380                         .addCell(prettySort2)
381                         .addCell(prettySort3)
382                         .addCell(escapedPath)
383                         .addCell(isAliased ? "I" : "")
384                         .addCell(pretty1)
385                         .addCell(pretty2)
386                         .addCell(pretty3)
387                         .addCell(englishValue == null ? "-" : englishValue)
388                         .addCell(coreStatus)
389                         .addCell(oldValue == null ? "-" : oldValue)
390                         .addCell(newValue == null ? "-" : newValue)
391                         .finishRow();
392 
393                 totalDifferences++;
394                 differences++;
395             }
396 
397             addToIndex(indexInfo, "", locale, localeName, fileCounter);
398             PrintWriter out = FileUtilities.openUTF8Writer(changesDirectory, locale + ".html");
399             String title = "Changes in " + localeDisplayName;
400             out.println(
401                     "<html>"
402                             + "<head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>"
403                             + CldrUtility.LINE_SEPARATOR
404                             + "<title>"
405                             + title
406                             + "</title>"
407                             + CldrUtility.LINE_SEPARATOR
408                             + "<link rel='stylesheet' href='index.css' type='text/css'>"
409                             + CldrUtility.LINE_SEPARATOR
410                             + "<base target='_blank'>"
411                             + CldrUtility.LINE_SEPARATOR
412                             + "</head><body>"
413                             + CldrUtility.LINE_SEPARATOR
414                             + "<h1>"
415                             + title
416                             + "</h1>"
417                             + CldrUtility.LINE_SEPARATOR
418                             + "<a href='index.html'>Index</a> | <a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\"><b style=\"background-color: yellow;\"><i>Help: How to Vet</i></b></a>"
419                             + warningMessage);
420 
421             TablePrinter table2 =
422                     new TablePrinter()
423                             .setCaption("Totals")
424                             .addColumn("Inh.")
425                             .setSortPriority(0)
426                             .addColumn("Status")
427                             .setSortPriority(1)
428                             .addColumn("Total");
429 
430             for (String key : fileCounter.getKeysetSortedByKey()) {
431                 boolean inherited = key.startsWith("I+");
432                 table2.addRow()
433                         .addCell(inherited ? "I" : "")
434                         .addCell(inherited ? key.substring(2) : key)
435                         .addCell(format.format(fileCounter.getCount(key)))
436                         .finishRow();
437             }
438             out.println(table2);
439             out.println("<br>");
440             out.println(table);
441 
442             // show status on console
443 
444             System.out.println(
445                     locale
446                             + "\tDifferences:\t"
447                             + format.format(differences)
448                             + "\tPaths:\t"
449                             + format.format(paths.size())
450                             + "\tTime:\t"
451                             + timer);
452 
453             totalPaths += paths.size();
454             out.println(ShowData.dateFooter());
455             out.println(CldrUtility.ANALYTICS);
456             out.println("</body></html>");
457             out.close();
458         }
459         PrintWriter indexFile = FileUtilities.openUTF8Writer(changesDirectory, "index.html");
460         indexFile.println(
461                 "<html>"
462                         + "<head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>"
463                         + CldrUtility.LINE_SEPARATOR
464                         + "<title>"
465                         + "Change Summary"
466                         + "</title>"
467                         + CldrUtility.LINE_SEPARATOR
468                         + "<link rel='stylesheet' href='index.css' type='text/css'>"
469                         + CldrUtility.LINE_SEPARATOR
470                         + "<base target='_blank'>"
471                         + CldrUtility.LINE_SEPARATOR
472                         + "</head><body>"
473                         + CldrUtility.LINE_SEPARATOR
474                         + "<h1>"
475                         + "Change Summary"
476                         + "</h1>"
477                         + CldrUtility.LINE_SEPARATOR
478                         + "<a href=\"http://unicode.org/cldr/data/docs/survey/vetting.html\"><b style=\"background-color: yellow;\"><i>Help: How to Vet</i></b></a>"
479                         + warningMessage
480                         + "<table><tr>");
481 
482         String separator = "";
483         int last = 0;
484         for (R2<String, String> indexPair : indexInfo) {
485             int firstChar = indexPair.get0().codePointAt(0);
486             indexFile
487                     .append(
488                             firstChar == last
489                                     ? separator
490                                     : (last == 0 ? "" : "</td></tr>\n<tr>")
491                                             + "<th>"
492                                             + String.valueOf((char) firstChar)
493                                             + "</th><td>")
494                     .append(indexPair.get1());
495             separator = " | ";
496             last = indexPair.get0().codePointAt(0);
497         }
498         indexFile.println("</tr></table>");
499         indexFile.println(ShowData.dateFooter());
500         indexFile.println(CldrUtility.ANALYTICS);
501         indexFile.println("</body></html>");
502         indexFile.close();
503 
504         System.out.println();
505 
506         for (String key : totalCounter.getKeysetSortedByKey()) {
507             System.out.println(key + "\t" + totalCounter.getCount(key));
508         }
509 
510         System.out.println(
511                 "Total Differences:\t"
512                         + format.format(totalDifferences)
513                         + "\tPaths:\t"
514                         + format.format(totalPaths)
515                         + "\tTotal Time:\t"
516                         + format.format(totalTimer.getDuration())
517                         + "ms");
518     }
519 
520     // static Transliterator urlHex = Transliterator.createFromRules("foo",
521     // "([^!(-*,-\\:A-Z_a-z~]) > &hex($1) ;" +
522     // ":: null;" +
523     // "'\\u00' > '%' ;"
524     // , Transliterator.FORWARD);
525 
526     private static NumberFormat format;
527 
addToIndex( Set<R2<String, String>> indexInfo, String title, final String locale, final String localeName)528     private static void addToIndex(
529             Set<R2<String, String>> indexInfo,
530             String title,
531             final String locale,
532             final String localeName) {
533         addToIndex(indexInfo, title, locale, localeName, null);
534     }
535 
addToIndex( Set<R2<String, String>> indexInfo, String title, final String locale, final String localeName, Counter<String> fileCounter)536     private static void addToIndex(
537             Set<R2<String, String>> indexInfo,
538             String title,
539             final String locale,
540             final String localeName,
541             Counter<String> fileCounter) {
542         if (title.startsWith("ERROR")) {
543             indexInfo.add(R2.of(localeName, title + " " + localeName + " (" + locale + ")"));
544             return;
545         }
546         String counterString = "";
547         if (fileCounter != null) {
548             for (String s : fileCounter) {
549                 if (counterString.length() != 0) {
550                     counterString += "; ";
551                 }
552                 counterString += s.charAt(0) + ":" + format.format(fileCounter.getCount(s));
553             }
554         }
555         indexInfo.add(
556                 R2.of(
557                         localeName,
558                         "<a href='"
559                                 + locale
560                                 + ".html'>"
561                                 + title
562                                 + localeName
563                                 + " ("
564                                 + locale
565                                 + ")</a>"
566                                 + (counterString.length() == 0 ? "" : " [" + counterString + "]")));
567     }
568 
569     // private static int accumulate(Set<R2<String,String>> rejected, int totalRejected,
570     // final String locale, String indicator, String oldValue, String newValue, String path) {
571     // String pretty = prettyPathMaker.getPrettyPath(path, false);
572     // String line = locale + "\t" + indicator +"\t\u200E[" + oldValue + "]\u200E\t\u200E[" +
573     // newValue + "]\u200E\t" +
574     // pretty;
575     // String pretty2 = prettyPathMaker.getOutputForm(pretty);
576     // rejected.add(Row.make(pretty2, line));
577     // totalRejected++;
578     // return totalRejected;
579     // }
580 
getStatus( CLDRFile oldFile, CLDRFile oldRoot, String path, String oldString, Status oldStatus)581     private static String getStatus(
582             CLDRFile oldFile, CLDRFile oldRoot, String path, String oldString, Status oldStatus) {
583         String oldLocale = oldFile.getSourceLocaleID(path, oldStatus);
584         if (!oldLocale.equals("root")) {
585             String oldRootValue = oldRoot.getStringValue(oldStatus.pathWhereFound);
586             if (equals(oldString, oldRootValue)) {
587                 oldLocale = "root";
588             }
589         }
590         return oldLocale;
591     }
592 
showSet( PrintWriter out, Set<R2<String, String>> rejected, final String locale, String title)593     private static void showSet(
594             PrintWriter out, Set<R2<String, String>> rejected, final String locale, String title) {
595         if (rejected.size() != 0) {
596             out.println();
597             out.println(locale + "\t" + title + "\t" + rejected.size());
598             for (R2<String, String> prettyAndline : rejected) {
599                 out.println(prettyAndline.get1());
600             }
601         }
602     }
603 
equals(String newString, String oldString)604     private static boolean equals(String newString, String oldString) {
605         if (newString == null) {
606             return oldString == null;
607         }
608         return newString.equals(oldString);
609     }
610 }
611