xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/util/DiffLanguageGroups.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.util;
2 
3 import com.google.common.collect.ImmutableSet;
4 import com.google.common.collect.ImmutableSetMultimap;
5 import com.google.common.collect.ImmutableSortedMap;
6 import com.google.common.collect.Multimap;
7 import com.google.common.collect.Sets;
8 import com.google.common.collect.Sets.SetView;
9 import com.google.common.collect.TreeMultimap;
10 import java.util.ArrayList;
11 import java.util.Collection;
12 import java.util.List;
13 import java.util.Map;
14 import java.util.Map.Entry;
15 import java.util.Set;
16 import java.util.SortedMap;
17 import java.util.TreeMap;
18 import java.util.TreeSet;
19 import java.util.stream.Collectors;
20 import org.unicode.cldr.util.StandardCodes.LstrType;
21 import org.unicode.cldr.util.Validity.Status;
22 
23 public class DiffLanguageGroups {
24     static final String OLD = "OLD";
25     static final String NEW = "NEW";
26     private static final String IN = " ➡︎ ";
27     static final CLDRConfig CONFIG = CLDRConfig.getInstance();
28     static final SupplementalDataInfo SDI = CONFIG.getSupplementalDataInfo();
29     static final CLDRFile ENGLISH = CONFIG.getEnglish();
30     static final Set<String> CLDR_ORG_LANGUAGES =
31             StandardCodes.make().getLocaleCoverageLocales(Organization.cldr).stream()
32                     .filter(x -> !x.contains("_"))
33                     .collect(Collectors.toUnmodifiableSet());
34     static final Set<String> OTHER_CLDR_LANGUAGES =
35             Sets.difference(
36                     CONFIG.getCldrFactory().getAvailableLanguages().stream()
37                             .filter(x -> !x.contains("_") && !x.equals("root"))
38                             .collect(Collectors.toUnmodifiableSet()),
39                     CLDR_ORG_LANGUAGES);
40 
main(String[] args)41     public static void main(String[] args) {
42         String newPath = CLDRPaths.COMMON_DIRECTORY + "supplemental/languageGroup.xml";
43         String oldPath = CLDRPaths.COMMON_DIRECTORY + "supplemental-temp/languageGroup43.xml";
44         if (args.length > 0) {
45             newPath = args[0];
46             if (args.length > 1) {
47                 oldPath = args[1];
48             }
49         }
50         final Set<String> validRegular =
51                 Sets.union(
52                         Validity.getInstance()
53                                 .getStatusToCodes(LstrType.language)
54                                 .get(Status.regular),
55                         Set.of("mul"));
56 
57         // Get OLD information
58 
59         System.out.println("* " + OLD + " = " + "v43\t\t");
60         Multimap<String, String> oldErrors = TreeMultimap.create();
61 
62         SortedMap<String, String> oldChildToParent =
63                 invertToMap(loadLanguageGroups(oldPath), oldErrors);
64         if (!oldErrors.isEmpty()) {
65             showErrors(OLD, oldErrors);
66         }
67         Set<String> oldSet = getAllKeysAndValues(oldChildToParent);
68         checkAgainstReference(OLD + " Missing", "∉ CLDR_ORG", CLDR_ORG_LANGUAGES, oldSet);
69         checkAgainstReference(OLD + " Missing", "∉ CLDR_Other", OTHER_CLDR_LANGUAGES, oldSet);
70         checkAgainstReference(OLD + " Invalid", "", oldSet, validRegular);
71 
72         // get NEW information
73 
74         System.out.println("* " + NEW + " = " + "PR\t\t");
75         Multimap<String, String> newErrors = TreeMultimap.create();
76         SortedMap<String, String> newChildToParent =
77                 invertToMap(loadLanguageGroups(newPath), newErrors);
78         if (!newErrors.isEmpty()) {
79             showErrors(NEW, newErrors);
80         }
81 
82         Set<String> newSet = getAllKeysAndValues(newChildToParent);
83         checkAgainstReference(NEW + " Missing", "∉ CLDR_ORG", CLDR_ORG_LANGUAGES, newSet);
84         checkAgainstReference(NEW + " Missing", "∉ CLDR_Other", OTHER_CLDR_LANGUAGES, newSet);
85         checkAgainstReference(NEW + " Invalid", "", newSet, validRegular);
86 
87         // Show differences
88 
89         showDiff("Δ Removing (" + OLD + "-" + NEW + ")", Sets.difference(oldSet, newSet));
90 
91         showDiff("Δ Adding (" + NEW + "-" + OLD + ")", Sets.difference(newSet, oldSet));
92         for (String joint : Sets.difference(newSet, oldSet)) {
93             List<String> newChain = getChain(joint, newChildToParent, new ArrayList<>());
94             System.out.println(
95                     NEW
96                             + " Added"
97                             + "\t"
98                             + show(joint)
99                             + "\t"
100                             + IN
101                             + newChain.stream().map(x -> show(x)).collect(Collectors.joining(IN)));
102         }
103 
104         Set<String> changed = new TreeSet<>();
105         for (String joint : Sets.intersection(oldSet, newSet)) {
106             List<String> oldChain = getChain(joint, oldChildToParent, new ArrayList<>());
107             List<String> newChain = getChain(joint, newChildToParent, new ArrayList<>());
108             if (!oldChain.equals(newChain)) {
109                 changed.add(joint);
110             }
111         }
112         showDiff("Δ Moving (" + OLD + " to " + NEW + ")", changed);
113 
114         for (String joint : changed) {
115             List<String> oldChain = getChain(joint, oldChildToParent, new ArrayList<>());
116             List<String> newChain = getChain(joint, newChildToParent, new ArrayList<>());
117             System.out.println(
118                     OLD
119                             + " Removed "
120                             + "\t"
121                             + show(joint)
122                             + "\t"
123                             + IN
124                             + oldChain.stream().map(x -> show(x)).collect(Collectors.joining(IN)));
125             System.out.println(
126                     NEW
127                             + " Moved to "
128                             + "\t"
129                             + show(joint)
130                             + "\t"
131                             + IN
132                             + newChain.stream().map(x -> show(x)).collect(Collectors.joining(IN)));
133         }
134     }
135 
checkAgainstReference( String col1, String col2, Set<String> cldrLanguages, Set<String> oldSet)136     private static void checkAgainstReference(
137             String col1, String col2, Set<String> cldrLanguages, Set<String> oldSet) {
138         SetView<String> missing = Sets.difference(cldrLanguages, oldSet);
139         if (!missing.isEmpty()) {
140             System.out.println(
141                     col1
142                             + "\t"
143                             + col2
144                             + "\t"
145                             + missing.stream().map(x -> show(x)).collect(Collectors.joining(", ")));
146         }
147     }
148 
showDiff(String title, Set<String> oldMinusOther)149     public static void showDiff(String title, Set<String> oldMinusOther) {
150         if (!oldMinusOther.isEmpty()) {
151             System.out.println(
152                     title
153                             + "\t"
154                             + oldMinusOther.size()
155                             + ":\t"
156                             + oldMinusOther.stream()
157                                     .map(x -> show(x))
158                                     .collect(Collectors.joining(", ")));
159         }
160     }
161 
show(String languageCode)162     static String show(String languageCode) {
163         return languageCode.equals("mul")
164                 ? "Ω"
165                 : ENGLISH.getName(CLDRFile.LANGUAGE_NAME, languageCode) + " ⁅" + languageCode + "⁆";
166     }
167 
showErrors(String title, Multimap<String, String> oldErrors)168     public static void showErrors(String title, Multimap<String, String> oldErrors) {
169         for (Entry<String, Collection<String>> entry : oldErrors.asMap().entrySet()) {
170             System.out.println(
171                     title
172                             + " Multiple parents"
173                             + "\t"
174                             + show(entry.getKey())
175                             + "\t"
176                             + entry.getValue().stream()
177                                     .map(x -> show(x))
178                                     .collect(Collectors.joining(" ���� ")));
179         }
180     }
181 
getChain( String joint, Map<String, String> childToParent, List<String> result)182     private static List<String> getChain(
183             String joint, Map<String, String> childToParent, List<String> result) {
184         String parent = childToParent.get(joint);
185         if (parent == null) {
186             return result;
187         }
188         result.add(parent);
189         return getChain(parent, childToParent, result);
190     }
191 
loadLanguageGroups(String filename)192     public static Multimap<String, String> loadLanguageGroups(String filename) {
193         Multimap<String, String> newParentToChildren = TreeMultimap.create();
194 
195         for (Pair<String, String> item :
196                 XMLFileReader.loadPathValues(
197                         filename, new ArrayList<Pair<String, String>>(), false)) {
198             handleLanguageGroups(
199                     item.getSecond(),
200                     XPathParts.getFrozenInstance(item.getFirst()),
201                     newParentToChildren);
202         }
203         newParentToChildren = ImmutableSetMultimap.copyOf(newParentToChildren);
204         return newParentToChildren;
205     }
206 
invertToMap( Multimap<String, String> oldParentToChildren, Multimap<String, String> childToParents)207     public static SortedMap<String, String> invertToMap(
208             Multimap<String, String> oldParentToChildren, Multimap<String, String> childToParents) {
209         TreeMap<String, String> childToParent = new TreeMap<>();
210         for (Entry<String, String> parentToChildren : oldParentToChildren.entries()) {
211             final String parent = parentToChildren.getKey();
212             final String child = parentToChildren.getValue();
213             String old = childToParent.put(child, parent);
214             if (old != null) {
215                 childToParents.put(child, old);
216                 childToParents.put(child, parent);
217             }
218         }
219         return ImmutableSortedMap.copyOf(childToParent);
220     }
221 
getAllKeysAndValues(Map<String, String> newItems)222     public static Set<String> getAllKeysAndValues(Map<String, String> newItems) {
223         Set<String> newSet = new TreeSet<>(newItems.values());
224         newSet.addAll(newItems.keySet());
225         return ImmutableSet.copyOf(newSet);
226     }
227 
handleLanguageGroups( String value, XPathParts parts, Multimap<String, String> languageGroups)228     private static boolean handleLanguageGroups(
229             String value, XPathParts parts, Multimap<String, String> languageGroups) {
230         String parent = parts.getAttributeValue(-1, "parent");
231         List<String> children = SupplementalDataInfo.WHITESPACE_SPLTTER.splitToList(value);
232         languageGroups.putAll(parent, children);
233         return true;
234     }
235 }
236