xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartDtdDelta.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.base.MoreObjects;
5 import com.google.common.base.Splitter;
6 import com.google.common.collect.ImmutableMultimap;
7 import com.google.common.collect.ImmutableSet;
8 import com.google.common.collect.Multimap;
9 import com.ibm.icu.impl.Utility;
10 import com.ibm.icu.util.VersionInfo;
11 import java.io.FileNotFoundException;
12 import java.io.IOException;
13 import java.io.PrintWriter;
14 import java.util.ArrayList;
15 import java.util.Collections;
16 import java.util.EnumMap;
17 import java.util.EnumSet;
18 import java.util.HashSet;
19 import java.util.LinkedHashSet;
20 import java.util.List;
21 import java.util.Map;
22 import java.util.Set;
23 import java.util.regex.Matcher;
24 import org.unicode.cldr.draft.FileUtilities;
25 import org.unicode.cldr.tool.ToolConstants.ChartStatus;
26 import org.unicode.cldr.util.CLDRConfig;
27 import org.unicode.cldr.util.CLDRPaths;
28 import org.unicode.cldr.util.CldrUtility;
29 import org.unicode.cldr.util.DtdData;
30 import org.unicode.cldr.util.DtdData.Attribute;
31 import org.unicode.cldr.util.DtdData.AttributeStatus;
32 import org.unicode.cldr.util.DtdData.Element;
33 import org.unicode.cldr.util.DtdType;
34 import org.unicode.cldr.util.SupplementalDataInfo;
35 
36 /**
37  * Changed ShowDtdDiffs into a chart.
38  *
39  * @author markdavis
40  */
41 public class ChartDtdDelta extends Chart {
42 
43     private static final Splitter SPLITTER_SPACE = Splitter.on(' ');
44 
45     private static final String NEW_PREFIX = "+";
46 
47     private static final String DEPRECATED_PREFIX = "⊖";
48     private static final String UNDEPRECATED_PREFIX = "⊙"; // no occurances yet
49 
50     private static final String ORDERED_SIGN = "⇣";
51     private static final String UNORDERED_SIGN = "⇟";
52 
53     private static final String TECHPREVIEW_SIGN = "��";
54     private static final String UNTECHPREVIEW_SIGN = "ⓟ";
55 
56     private static final Set<String> OMITTED_ATTRIBUTES = Collections.singleton("⊕");
57 
main(String[] args)58     public static void main(String[] args) {
59         new ChartDtdDelta().writeChart(null);
60     }
61 
62     @Override
getDirectory()63     public String getDirectory() {
64         return FormattedFileWriter.CHART_TARGET_DIR;
65     }
66 
67     @Override
getTitle()68     public String getTitle() {
69         return "DTD Deltas";
70     }
71 
72     @Override
getExplanation()73     public String getExplanation() {
74         return "<p>Changes to the LDML DTDs over time.</p>\n"
75                 + "<ul>\n"
76                 + "<li>New elements or attributes are indicated with a + sign, and newly deprecated ones with a ⊖ sign.</li>\n"
77                 + "<li>Element attributes are abbreviated as ⊕ where is no change to them, "
78                 + "but the element is newly the child of another.</li>\n"
79                 + "<li>LDML DTDs have augmented data:\n"
80                 + "<ul><li>Attribute status is marked by: "
81                 + AttributeStatus.distinguished.shortName
82                 + "="
83                 + AttributeStatus.distinguished
84                 + ", "
85                 + AttributeStatus.value.shortName
86                 + "="
87                 + AttributeStatus.value
88                 + ", or "
89                 + AttributeStatus.metadata.shortName
90                 + "="
91                 + AttributeStatus.metadata
92                 + ".</li>\n"
93                 + "<li>Attribute value constraints are marked with ⟨…⟩ (for DTD constraints) and ⟪…⟫ (for augmented constraints, added in v35.0).</li>\n"
94                 + "<li>Changes in status or constraints are shown with ➠, with identical sections shown with ….</li>\n"
95                 + "<li>Newly ordered elements are indicated with "
96                 + ORDERED_SIGN
97                 + "; newly unordered with "
98                 + UNORDERED_SIGN
99                 + ".</li>\n"
100                 + "<li>Newly tech-preview items are marked with "
101                 + TECHPREVIEW_SIGN
102                 + "; newly graduated from tech preview with "
103                 + UNTECHPREVIEW_SIGN
104                 + ".</li>\n"
105                 + "<li>The following elements are skipped: "
106                 + SKIP_ELEMENTS
107                 + " and "
108                 + SKIP_TYPE_ELEMENTS
109                 + "</li>\n"
110                 + "<li>The following attributes are skipped: "
111                 + SKIP_ATTRIBUTES
112                 + " and "
113                 + SKIP_ATTRIBUTE_MATCHES
114                 + "</li>\n"
115                 + "</ul></li></ul>\n"
116                 + "<p>For more information, see the LDML spec.</p>";
117     }
118 
119     @Override
writeContents(FormattedFileWriter pw)120     public void writeContents(FormattedFileWriter pw) throws IOException {
121         TablePrinter tablePrinter =
122                 new TablePrinter()
123                         .addColumn(
124                                 "Version",
125                                 "class='source'",
126                                 CldrUtility.getDoubleLinkMsg(),
127                                 "class='source'",
128                                 true)
129                         .setSortPriority(0)
130                         .setSortAscending(false)
131                         .setBreakSpans(true)
132                         .addColumn("Dtd Type", "class='source'", null, "class='source'", true)
133                         .setSortPriority(1)
134                         .addColumn(
135                                 "Intermediate Path", "class='source'", null, "class='target'", true)
136                         .setSortPriority(2)
137                         .addColumn("Element", "class='target'", null, "class='target'", true)
138                         .setSpanRows(false)
139                         .addColumn("Attributes", "class='target'", null, "class='target'", true)
140                         .setSpanRows(false);
141 
142         String last = null;
143 
144         for (String current :
145                 ToolConstants.CHART_STATUS != ChartStatus.release
146                         ? ToolConstants.CLDR_RELEASE_AND_DEV_VERSION_SET
147                         : ToolConstants.CLDR_RELEASE_VERSION_SET) {
148             System.out.println("DTD delta: " + current);
149             final boolean finalVersion = current.equals(ToolConstants.DEV_VERSION);
150             String currentName = finalVersion ? ToolConstants.CHART_DISPLAY_VERSION : current;
151             for (DtdType type : TYPES) {
152                 String firstVersion = type.firstVersion; // FIRST_VERSION.get(type);
153                 if (firstVersion != null
154                         && current != null
155                         && VersionInfo.getInstance(current)
156                                         .compareTo(VersionInfo.getInstance(firstVersion))
157                                 < 0) {
158                     // skip if current is too old to have “type”
159                     continue;
160                 }
161                 DtdData dtdCurrent = null;
162                 try {
163                     dtdCurrent =
164                             DtdData.getInstance(
165                                     type,
166                                     finalVersion
167                                             // && ToolConstants.CHART_STATUS !=
168                                             // ToolConstants.ChartStatus.release
169                                             ? null
170                                             : current);
171                 } catch (Exception e) {
172                     if (!(e.getCause() instanceof FileNotFoundException)) {
173                         throw e;
174                     }
175                     System.out.println(e.getMessage() + ", " + e.getCause().getMessage());
176                     continue;
177                 }
178                 DtdData dtdLast = null;
179                 if (last != null
180                         && (firstVersion == null
181                                 || VersionInfo.getInstance(last)
182                                                 .compareTo(VersionInfo.getInstance(firstVersion))
183                                         >= 0)) {
184                     // only read if last isn’t too old to have “type”
185                     dtdLast = DtdData.getInstance(type, last);
186                 }
187                 diff(currentName, dtdLast, dtdCurrent);
188             }
189             last = current;
190             if (current.contentEquals(ToolConstants.CHART_VERSION)) {
191                 break;
192             }
193         }
194 
195         for (DiffElement datum : data) {
196             tablePrinter
197                     .addRow()
198                     .addCell(datum.getVersionString())
199                     .addCell(datum.dtdType)
200                     .addCell(datum.newPath)
201                     .addCell(datum.newElement)
202                     .addCell(datum.attributeNames)
203                     .finishRow();
204         }
205         pw.write(tablePrinter.toTable());
206         pw.write(Utility.repeat("<br>", 50));
207         try (PrintWriter tsvFile =
208                 FileUtilities.openUTF8Writer(
209                         CLDRPaths.CHART_DIRECTORY + "/tsv/", "dtd_deltas.tsv")) {
210             tablePrinter.toTsv(tsvFile);
211         }
212     }
213 
214     static final String NONE = " ";
215 
216     static final SupplementalDataInfo SDI = CLDRConfig.getInstance().getSupplementalDataInfo();
217 
218     static Set<DtdType> TYPES = EnumSet.allOf(DtdType.class);
219 
220     static {
221         TYPES.remove(DtdType.ldmlICU);
222     }
223 
224     static final Map<DtdType, String> FIRST_VERSION = new EnumMap<>(DtdType.class);
225 
226     static {
FIRST_VERSION.put(DtdType.ldmlBCP47, "1.7.2")227         FIRST_VERSION.put(DtdType.ldmlBCP47, "1.7.2");
FIRST_VERSION.put(DtdType.keyboard3, "22.1")228         FIRST_VERSION.put(DtdType.keyboard3, "22.1");
229     }
230 
diff(String prefix, DtdData dtdLast, DtdData dtdCurrent)231     private void diff(String prefix, DtdData dtdLast, DtdData dtdCurrent) {
232         Map<String, Element> oldNameToElement =
233                 dtdLast == null ? Collections.emptyMap() : dtdLast.getElementFromName();
234         checkNames(
235                 prefix,
236                 dtdCurrent,
237                 dtdLast,
238                 oldNameToElement,
239                 "/",
240                 dtdCurrent.ROOT,
241                 new HashSet<Element>(),
242                 false);
243     }
244 
245     static final DtdType DEBUG_DTD = null; // set to enable
246     static final String DEBUG_ELEMENT = "lias";
247     static final boolean SHOW = false;
248 
249     @SuppressWarnings("unused")
checkNames( String version, DtdData dtdCurrent, DtdData dtdLast, Map<String, Element> oldNameToElement, String path, Element element, HashSet<Element> seen, boolean showAnyway)250     private void checkNames(
251             String version,
252             DtdData dtdCurrent,
253             DtdData dtdLast,
254             Map<String, Element> oldNameToElement,
255             String path,
256             Element element,
257             HashSet<Element> seen,
258             boolean showAnyway) {
259         String name = element.getName();
260 
261         if (SKIP_ELEMENTS.contains(name)) {
262             return;
263         }
264         if (SKIP_TYPE_ELEMENTS.containsEntry(dtdCurrent.dtdType, name)) {
265             return;
266         }
267 
268         String newPath = path + "/" + element.name;
269 
270         // if an element is newly a child of another but has already been seen, you'll have special
271         // indication
272         if (seen.contains(element)) {
273             if (showAnyway) {
274                 addData(dtdCurrent, NEW_PREFIX + name, version, newPath, OMITTED_ATTRIBUTES);
275             }
276             return;
277         }
278 
279         seen.add(element);
280         if (SHOW && ToolConstants.CHART_DISPLAY_VERSION.equals(version)) {
281             System.out.println(dtdCurrent.dtdType + "\t" + name);
282         }
283         if (DEBUG_DTD == dtdCurrent.dtdType && name.contains(DEBUG_ELEMENT)) {
284             int debug = 0;
285         }
286 
287         Element oldElement = null;
288         boolean ordered = element.isOrdered();
289         boolean currentTechPreview = element.isTechPreview();
290 
291         if (!oldNameToElement.containsKey(name)) {
292             Set<String> attributeNames =
293                     getAttributeNames(
294                             dtdCurrent,
295                             dtdLast,
296                             name,
297                             Collections.emptyMap(),
298                             element.getAttributes());
299             final String prefix = NEW_PREFIX + (currentTechPreview ? TECHPREVIEW_SIGN : "");
300             addData(
301                     dtdCurrent,
302                     prefix + name + (ordered ? ORDERED_SIGN : ""),
303                     version,
304                     newPath,
305                     attributeNames);
306         } else {
307             oldElement = oldNameToElement.get(name);
308             boolean oldOrdered = oldElement.isOrdered();
309             Set<String> attributeNames =
310                     getAttributeNames(
311                             dtdCurrent,
312                             dtdLast,
313                             name,
314                             oldElement.getAttributes(),
315                             element.getAttributes());
316             boolean currentDeprecated = element.isDeprecated();
317             boolean lastDeprecated =
318                     dtdLast == null
319                             ? false
320                             : oldElement.isDeprecated(); //  + (currentDeprecated ? "ⓓ" : "")
321             boolean lastTechPreview =
322                     dtdLast == null
323                             ? false
324                             : oldElement.isTechPreview(); //  + (currentDeprecated ? "ⓓ" : "")
325 
326             String deprecatedStatus =
327                     currentDeprecated == lastDeprecated
328                             ? ""
329                             : currentDeprecated ? DEPRECATED_PREFIX : UNDEPRECATED_PREFIX;
330             String orderingStatus =
331                     (ordered == oldOrdered || currentDeprecated)
332                             ? ""
333                             : ordered ? ORDERED_SIGN : UNORDERED_SIGN;
334             String previewStatus =
335                     (currentTechPreview == lastTechPreview || currentDeprecated)
336                             ? ""
337                             : currentTechPreview ? TECHPREVIEW_SIGN : UNTECHPREVIEW_SIGN;
338 
339             if (!orderingStatus.isEmpty()
340                     || !previewStatus.isEmpty()
341                     || !deprecatedStatus.isEmpty()
342                     || !attributeNames.isEmpty()) {
343                 addData(
344                         dtdCurrent,
345                         deprecatedStatus + previewStatus + name + orderingStatus,
346                         version,
347                         newPath,
348                         attributeNames);
349             }
350         }
351         if (element.getName().equals("coordinateUnit")) {
352             System.out.println(version + "\toordinateUnit\t" + element.getChildren().keySet());
353         }
354         Set<Element> oldChildren =
355                 oldElement == null ? Collections.emptySet() : oldElement.getChildren().keySet();
356         for (Element child : element.getChildren().keySet()) {
357             showAnyway = true;
358             for (Element oldChild : oldChildren) {
359                 if (oldChild.getName().equals(child.getName())) {
360                     showAnyway = false;
361                     break;
362                 }
363             }
364             checkNames(
365                     version,
366                     dtdCurrent,
367                     dtdLast,
368                     oldNameToElement,
369                     newPath,
370                     child,
371                     seen,
372                     showAnyway);
373         }
374     }
375 
376     enum DiffType {
377         Element,
378         Attribute,
379         AttributeValue
380     }
381 
382     private static class DiffElement {
383 
384         private static final String START_ATTR = "<div>";
385         private static final String END_ATTR = "</div>";
386         final VersionInfo version;
387         final DtdType dtdType;
388         final boolean isBeta;
389         final String newPath;
390         final String newElement;
391         final String attributeNames;
392 
DiffElement( DtdData dtdCurrent, String version, String newPath, String newElement, Set<String> attributeNames2)393         public DiffElement(
394                 DtdData dtdCurrent,
395                 String version,
396                 String newPath,
397                 String newElement,
398                 Set<String> attributeNames2) {
399             isBeta = version.endsWith("β");
400             try {
401                 this.version =
402                         isBeta
403                                 ? VersionInfo.getInstance(
404                                         version.substring(0, version.length() - 1))
405                                 : VersionInfo.getInstance(version);
406             } catch (Exception e) {
407                 e.printStackTrace();
408                 throw e;
409             }
410             dtdType = dtdCurrent.dtdType;
411             this.newPath = fix(newPath);
412             this.attributeNames =
413                     attributeNames2.isEmpty()
414                             ? NONE
415                             : START_ATTR
416                                     + Joiner.on(END_ATTR + START_ATTR).join(attributeNames2)
417                                     + END_ATTR;
418             this.newElement = newElement;
419         }
420 
fix(String substring)421         private String fix(String substring) {
422             int base = substring.indexOf('/', 2);
423             if (base < 0) return "";
424             int last = substring.lastIndexOf('/');
425             if (last <= base) return "/";
426             substring = substring.substring(base, last);
427             return substring.replace("/", "\u200B/") + "/";
428         }
429 
430         @Override
toString()431         public String toString() {
432             return MoreObjects.toStringHelper(this)
433                     .add("version", getVersionString())
434                     .add("dtdType", dtdType)
435                     .add("newPath", newPath)
436                     .add("newElement", newElement)
437                     .add("attributeNames", attributeNames)
438                     .toString();
439         }
440 
getVersionString()441         private String getVersionString() {
442             return version.getVersionString(2, 4) + (isBeta ? "β" : "");
443         }
444     }
445 
446     List<DiffElement> data = new ArrayList<>();
447 
addData( DtdData dtdCurrent, String element, String prefix, String newPath, Set<String> attributeNames)448     private void addData(
449             DtdData dtdCurrent,
450             String element,
451             String prefix,
452             String newPath,
453             Set<String> attributeNames) {
454         DiffElement item = new DiffElement(dtdCurrent, prefix, newPath, element, attributeNames);
455         data.add(item);
456     }
457 
458     static final Set<String> SKIP_ELEMENTS =
459             ImmutableSet.of("generation", "identity", "special"); // , "telephoneCodeData"
460 
461     static final Multimap<DtdType, String> SKIP_TYPE_ELEMENTS =
462             ImmutableMultimap.of(DtdType.ldml, "alias");
463 
464     static final Set<String> SKIP_ATTRIBUTES = ImmutableSet.of("references", "standard", "draft");
465 
466     static final Multimap<String, String> SKIP_ATTRIBUTE_MATCHES =
467             ImmutableMultimap.of("alt", "", "alt", "⟪literal/variant⟫");
468 
getAttributeNames( DtdData dtdCurrent, DtdData dtdLast, String elementName, Map<Attribute, Integer> attributesOld, Map<Attribute, Integer> attributes)469     private static Set<String> getAttributeNames(
470             DtdData dtdCurrent,
471             DtdData dtdLast,
472             String elementName,
473             Map<Attribute, Integer> attributesOld,
474             Map<Attribute, Integer> attributes) {
475         Set<String> names = new LinkedHashSet<>();
476         if (elementName.equals("coordinateUnit")) {
477             int debug = 0;
478         }
479 
480         main:
481         // we want to add a name that is new or that becomes deprecated
482         for (Attribute attribute : attributes.keySet()) {
483             String name = attribute.getName();
484             if (SKIP_ATTRIBUTES.contains(name)) {
485                 continue;
486             }
487             String match = attribute.getMatchString();
488             AttributeStatus status = attribute.attributeStatus;
489             String display = NEW_PREFIX + name;
490             //            if (isDeprecated(dtdCurrent, elementName, name)) { //
491             // SDI.isDeprecated(dtdCurrent, elementName, name, "*")) {
492             //                continue;
493             //            }
494             String oldMatch = "?";
495             String pre, post;
496             Attribute attributeOld = attribute.getMatchingName(attributesOld);
497             if (attributeOld == null) {
498                 if (SKIP_ATTRIBUTE_MATCHES.containsEntry(name, match)) {
499                     continue main;
500                 }
501                 display =
502                         NEW_PREFIX
503                                 + name
504                                 + " "
505                                 + AttributeStatus.getShortName(status)
506                                 + " "
507                                 + match;
508             } else if (attribute.isDeprecated() && !attributeOld.isDeprecated()) {
509                 display = DEPRECATED_PREFIX + name;
510             } else {
511                 oldMatch = attributeOld.getMatchString();
512                 AttributeStatus oldStatus = attributeOld.attributeStatus;
513 
514                 boolean matchEquals = match.equals(oldMatch);
515                 if (status != oldStatus) {
516                     pre = AttributeStatus.getShortName(oldStatus);
517                     post = AttributeStatus.getShortName(status);
518                     if (!matchEquals) {
519                         pre += " " + oldMatch;
520                         post += " " + match;
521                     }
522                 } else if (!matchEquals) {
523                     if (oldMatch.isEmpty() && SKIP_ATTRIBUTE_MATCHES.containsEntry(name, match)) {
524                         continue main;
525                     }
526                     pre = oldMatch;
527                     post = match;
528                 } else {
529                     continue main; // skip attribute entirely;
530                 }
531                 display = name + " " + diff(pre, post);
532             }
533             names.add(display);
534         }
535         return names;
536     }
537 
diff(String pre, String post)538     public static String diff(String pre, String post) {
539         Matcher matcherPre = Attribute.LEAD_TRAIL.matcher(pre);
540         Matcher matcherPost = Attribute.LEAD_TRAIL.matcher(post);
541         if (matcherPre.matches() && matcherPost.matches()) {
542             List<String> preParts = SPLITTER_SPACE.splitToList(matcherPre.group(2));
543             List<String> postParts = SPLITTER_SPACE.splitToList(matcherPost.group(2));
544             pre = matcherPre.group(1) + remove(preParts, postParts) + matcherPre.group(3);
545             post = matcherPost.group(1) + remove(postParts, preParts) + matcherPost.group(3);
546         }
547         return pre + "➠" + post;
548     }
549 
remove(List<String> main, List<String> toRemove)550     private static String remove(List<String> main, List<String> toRemove) {
551         List<String> result = new ArrayList<>();
552         boolean removed = false;
553         for (String s : main) {
554             if (toRemove.contains(s)) {
555                 removed = true;
556             } else {
557                 if (removed) {
558                     result.add("…");
559                     removed = false;
560                 }
561                 result.add(s);
562             }
563         }
564         if (removed) {
565             result.add("…");
566         }
567         return Joiner.on(" ").join(result);
568     }
569 
570     //    private static boolean isDeprecated(DtdData dtdCurrent, String elementName, String
571     // attributeName) {
572     //        try {
573     //            return dtdCurrent.isDeprecated(elementName, attributeName, "*");
574     //        } catch (DtdData.IllegalByDtdException e) {
575     //            return true;
576     //        }
577     //    }
578 }
579