1 package org.unicode.cldr.util; 2 3 import com.google.common.collect.ImmutableSet; 4 import com.google.common.collect.ImmutableSetMultimap; 5 import com.google.common.collect.ImmutableSortedMap; 6 import com.google.common.collect.Multimap; 7 import com.google.common.collect.Sets; 8 import com.google.common.collect.Sets.SetView; 9 import com.google.common.collect.TreeMultimap; 10 import java.util.ArrayList; 11 import java.util.Collection; 12 import java.util.List; 13 import java.util.Map; 14 import java.util.Map.Entry; 15 import java.util.Set; 16 import java.util.SortedMap; 17 import java.util.TreeMap; 18 import java.util.TreeSet; 19 import java.util.stream.Collectors; 20 import org.unicode.cldr.util.StandardCodes.LstrType; 21 import org.unicode.cldr.util.Validity.Status; 22 23 public class DiffLanguageGroups { 24 static final String OLD = "OLD"; 25 static final String NEW = "NEW"; 26 private static final String IN = " ➡︎ "; 27 static final CLDRConfig CONFIG = CLDRConfig.getInstance(); 28 static final SupplementalDataInfo SDI = CONFIG.getSupplementalDataInfo(); 29 static final CLDRFile ENGLISH = CONFIG.getEnglish(); 30 static final Set<String> CLDR_ORG_LANGUAGES = 31 StandardCodes.make().getLocaleCoverageLocales(Organization.cldr).stream() 32 .filter(x -> !x.contains("_")) 33 .collect(Collectors.toUnmodifiableSet()); 34 static final Set<String> OTHER_CLDR_LANGUAGES = 35 Sets.difference( 36 CONFIG.getCldrFactory().getAvailableLanguages().stream() 37 .filter(x -> !x.contains("_") && !x.equals("root")) 38 .collect(Collectors.toUnmodifiableSet()), 39 CLDR_ORG_LANGUAGES); 40 main(String[] args)41 public static void main(String[] args) { 42 String newPath = CLDRPaths.COMMON_DIRECTORY + "supplemental/languageGroup.xml"; 43 String oldPath = CLDRPaths.COMMON_DIRECTORY + "supplemental-temp/languageGroup43.xml"; 44 if (args.length > 0) { 45 newPath = args[0]; 46 if (args.length > 1) { 47 oldPath = args[1]; 48 } 49 } 50 final Set<String> validRegular = 51 Sets.union( 52 Validity.getInstance() 53 .getStatusToCodes(LstrType.language) 54 .get(Status.regular), 55 Set.of("mul")); 56 57 // Get OLD information 58 59 System.out.println("* " + OLD + " = " + "v43\t\t"); 60 Multimap<String, String> oldErrors = TreeMultimap.create(); 61 62 SortedMap<String, String> oldChildToParent = 63 invertToMap(loadLanguageGroups(oldPath), oldErrors); 64 if (!oldErrors.isEmpty()) { 65 showErrors(OLD, oldErrors); 66 } 67 Set<String> oldSet = getAllKeysAndValues(oldChildToParent); 68 checkAgainstReference(OLD + " Missing", "∉ CLDR_ORG", CLDR_ORG_LANGUAGES, oldSet); 69 checkAgainstReference(OLD + " Missing", "∉ CLDR_Other", OTHER_CLDR_LANGUAGES, oldSet); 70 checkAgainstReference(OLD + " Invalid", "", oldSet, validRegular); 71 72 // get NEW information 73 74 System.out.println("* " + NEW + " = " + "PR\t\t"); 75 Multimap<String, String> newErrors = TreeMultimap.create(); 76 SortedMap<String, String> newChildToParent = 77 invertToMap(loadLanguageGroups(newPath), newErrors); 78 if (!newErrors.isEmpty()) { 79 showErrors(NEW, newErrors); 80 } 81 82 Set<String> newSet = getAllKeysAndValues(newChildToParent); 83 checkAgainstReference(NEW + " Missing", "∉ CLDR_ORG", CLDR_ORG_LANGUAGES, newSet); 84 checkAgainstReference(NEW + " Missing", "∉ CLDR_Other", OTHER_CLDR_LANGUAGES, newSet); 85 checkAgainstReference(NEW + " Invalid", "", newSet, validRegular); 86 87 // Show differences 88 89 showDiff("Δ Removing (" + OLD + "-" + NEW + ")", Sets.difference(oldSet, newSet)); 90 91 showDiff("Δ Adding (" + NEW + "-" + OLD + ")", Sets.difference(newSet, oldSet)); 92 for (String joint : Sets.difference(newSet, oldSet)) { 93 List<String> newChain = getChain(joint, newChildToParent, new ArrayList<>()); 94 System.out.println( 95 NEW 96 + " Added" 97 + "\t" 98 + show(joint) 99 + "\t" 100 + IN 101 + newChain.stream().map(x -> show(x)).collect(Collectors.joining(IN))); 102 } 103 104 Set<String> changed = new TreeSet<>(); 105 for (String joint : Sets.intersection(oldSet, newSet)) { 106 List<String> oldChain = getChain(joint, oldChildToParent, new ArrayList<>()); 107 List<String> newChain = getChain(joint, newChildToParent, new ArrayList<>()); 108 if (!oldChain.equals(newChain)) { 109 changed.add(joint); 110 } 111 } 112 showDiff("Δ Moving (" + OLD + " to " + NEW + ")", changed); 113 114 for (String joint : changed) { 115 List<String> oldChain = getChain(joint, oldChildToParent, new ArrayList<>()); 116 List<String> newChain = getChain(joint, newChildToParent, new ArrayList<>()); 117 System.out.println( 118 OLD 119 + " Removed " 120 + "\t" 121 + show(joint) 122 + "\t" 123 + IN 124 + oldChain.stream().map(x -> show(x)).collect(Collectors.joining(IN))); 125 System.out.println( 126 NEW 127 + " Moved to " 128 + "\t" 129 + show(joint) 130 + "\t" 131 + IN 132 + newChain.stream().map(x -> show(x)).collect(Collectors.joining(IN))); 133 } 134 } 135 checkAgainstReference( String col1, String col2, Set<String> cldrLanguages, Set<String> oldSet)136 private static void checkAgainstReference( 137 String col1, String col2, Set<String> cldrLanguages, Set<String> oldSet) { 138 SetView<String> missing = Sets.difference(cldrLanguages, oldSet); 139 if (!missing.isEmpty()) { 140 System.out.println( 141 col1 142 + "\t" 143 + col2 144 + "\t" 145 + missing.stream().map(x -> show(x)).collect(Collectors.joining(", "))); 146 } 147 } 148 showDiff(String title, Set<String> oldMinusOther)149 public static void showDiff(String title, Set<String> oldMinusOther) { 150 if (!oldMinusOther.isEmpty()) { 151 System.out.println( 152 title 153 + "\t" 154 + oldMinusOther.size() 155 + ":\t" 156 + oldMinusOther.stream() 157 .map(x -> show(x)) 158 .collect(Collectors.joining(", "))); 159 } 160 } 161 show(String languageCode)162 static String show(String languageCode) { 163 return languageCode.equals("mul") 164 ? "Ω" 165 : ENGLISH.getName(CLDRFile.LANGUAGE_NAME, languageCode) + " ⁅" + languageCode + "⁆"; 166 } 167 showErrors(String title, Multimap<String, String> oldErrors)168 public static void showErrors(String title, Multimap<String, String> oldErrors) { 169 for (Entry<String, Collection<String>> entry : oldErrors.asMap().entrySet()) { 170 System.out.println( 171 title 172 + " Multiple parents" 173 + "\t" 174 + show(entry.getKey()) 175 + "\t" 176 + entry.getValue().stream() 177 .map(x -> show(x)) 178 .collect(Collectors.joining(" "))); 179 } 180 } 181 getChain( String joint, Map<String, String> childToParent, List<String> result)182 private static List<String> getChain( 183 String joint, Map<String, String> childToParent, List<String> result) { 184 String parent = childToParent.get(joint); 185 if (parent == null) { 186 return result; 187 } 188 result.add(parent); 189 return getChain(parent, childToParent, result); 190 } 191 loadLanguageGroups(String filename)192 public static Multimap<String, String> loadLanguageGroups(String filename) { 193 Multimap<String, String> newParentToChildren = TreeMultimap.create(); 194 195 for (Pair<String, String> item : 196 XMLFileReader.loadPathValues( 197 filename, new ArrayList<Pair<String, String>>(), false)) { 198 handleLanguageGroups( 199 item.getSecond(), 200 XPathParts.getFrozenInstance(item.getFirst()), 201 newParentToChildren); 202 } 203 newParentToChildren = ImmutableSetMultimap.copyOf(newParentToChildren); 204 return newParentToChildren; 205 } 206 invertToMap( Multimap<String, String> oldParentToChildren, Multimap<String, String> childToParents)207 public static SortedMap<String, String> invertToMap( 208 Multimap<String, String> oldParentToChildren, Multimap<String, String> childToParents) { 209 TreeMap<String, String> childToParent = new TreeMap<>(); 210 for (Entry<String, String> parentToChildren : oldParentToChildren.entries()) { 211 final String parent = parentToChildren.getKey(); 212 final String child = parentToChildren.getValue(); 213 String old = childToParent.put(child, parent); 214 if (old != null) { 215 childToParents.put(child, old); 216 childToParents.put(child, parent); 217 } 218 } 219 return ImmutableSortedMap.copyOf(childToParent); 220 } 221 getAllKeysAndValues(Map<String, String> newItems)222 public static Set<String> getAllKeysAndValues(Map<String, String> newItems) { 223 Set<String> newSet = new TreeSet<>(newItems.values()); 224 newSet.addAll(newItems.keySet()); 225 return ImmutableSet.copyOf(newSet); 226 } 227 handleLanguageGroups( String value, XPathParts parts, Multimap<String, String> languageGroups)228 private static boolean handleLanguageGroups( 229 String value, XPathParts parts, Multimap<String, String> languageGroups) { 230 String parent = parts.getAttributeValue(-1, "parent"); 231 List<String> children = SupplementalDataInfo.WHITESPACE_SPLTTER.splitToList(value); 232 languageGroups.putAll(parent, children); 233 return true; 234 } 235 } 236