1 package org.unicode.cldr.tool; 2 3 import com.google.common.base.Objects; 4 import com.ibm.icu.impl.Relation; 5 import com.ibm.icu.text.UnicodeSet; 6 import java.io.IOException; 7 import java.io.PrintWriter; 8 import java.util.ArrayList; 9 import java.util.Arrays; 10 import java.util.HashSet; 11 import java.util.LinkedHashMap; 12 import java.util.LinkedHashSet; 13 import java.util.Map.Entry; 14 import java.util.Set; 15 import java.util.TreeMap; 16 import java.util.TreeSet; 17 import org.unicode.cldr.draft.FileUtilities; 18 import org.unicode.cldr.tool.Option.Options; 19 import org.unicode.cldr.util.CLDRConfig; 20 import org.unicode.cldr.util.CLDRFile; 21 import org.unicode.cldr.util.CLDRFile.DraftStatus; 22 import org.unicode.cldr.util.CLDRLocale; 23 import org.unicode.cldr.util.CLDRPaths; 24 import org.unicode.cldr.util.ChainedMap; 25 import org.unicode.cldr.util.ChainedMap.M3; 26 import org.unicode.cldr.util.ChainedMap.M4; 27 import org.unicode.cldr.util.Counter; 28 import org.unicode.cldr.util.Factory; 29 import org.unicode.cldr.util.LanguageTagParser; 30 import org.unicode.cldr.util.PathHeader; 31 import org.unicode.cldr.util.PathHeader.SectionId; 32 import org.unicode.cldr.util.StandardCodes; 33 import org.unicode.cldr.util.SupplementalDataInfo; 34 35 public class ShowRegionalVariants { 36 private static String MY_DIR; 37 38 private static final boolean SKIP_SUPPRESSED_PATHS = true; 39 40 private static final CLDRConfig CONFIG = CLDRConfig.getInstance(); 41 private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = 42 CONFIG.getSupplementalDataInfo(); 43 private static final Factory FACTORY = CONFIG.getCldrFactory(); 44 private static final CLDRFile ENGLISH = CONFIG.getEnglish(); 45 private static final CLDRLocale ROOT = CLDRLocale.getInstance("root"); 46 // private static final CLDRLocale en_US_POSIX = CLDRLocale.getInstance("en_US_POSIX"); 47 private static final CLDRLocale SWISS_HIGH_GERMAN = CLDRLocale.getInstance("de_CH"); 48 49 static final Options myOptions = new Options(); 50 51 enum MyOptions { 52 targetDir(".*", CLDRPaths.GEN_DIRECTORY + "/regional/", "target output file."), 53 ; 54 55 // boilderplate 56 final Option option; 57 MyOptions(String argumentPattern, String defaultArgument, String helpText)58 MyOptions(String argumentPattern, String defaultArgument, String helpText) { 59 option = myOptions.add(this, argumentPattern, defaultArgument, helpText); 60 } 61 } 62 main(String[] args)63 public static void main(String[] args) throws IOException { 64 myOptions.parse(MyOptions.targetDir, args, true); 65 66 MY_DIR = MyOptions.targetDir.option.getValue(); 67 68 Set<String> coverageLocales = StandardCodes.make().getLocaleCoverageLocales("cldr"); 69 Set<String> dc = new HashSet<>(SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales()); 70 Set<String> skipLocales = new HashSet<>(Arrays.asList("root", "en_US_POSIX", "sr_Latn")); 71 72 Relation<CLDRLocale, CLDRLocale> parentToChildren = 73 Relation.of(new TreeMap<CLDRLocale, Set<CLDRLocale>>(), TreeSet.class); 74 // first, collect all locales for lookup by parents. 75 76 for (String locale : FACTORY.getAvailable()) { 77 if (skipLocales.contains(locale.toString()) || dc.contains(locale.toString())) { 78 continue; 79 } 80 CLDRLocale loc = CLDRLocale.getInstance(locale); 81 82 if (!coverageLocales.contains(loc.getLanguage())) { 83 continue; 84 } 85 CLDRLocale parent = null; 86 for (CLDRLocale current = loc; ; current = parent) { 87 parent = current.getParent(); 88 if (!dc.contains(parent.toString())) { // skip over default content 89 break; 90 } 91 } 92 if (ROOT.equals(parent)) { 93 continue; 94 } else if ("root".equals(parent.toString())) { 95 throw new IllegalArgumentException("CLDRLocale failure"); 96 } 97 parentToChildren.put(parent, loc); 98 } 99 100 // show inheritance 101 System.out.println("Locale Name\tCode\tRegion\tInherits from\tCode"); 102 showInheritance(parentToChildren); 103 104 // next find out the unique items in children 105 Relation<String, String> valueToAncestors = 106 Relation.of(new LinkedHashMap<String, Set<String>>(), LinkedHashSet.class); 107 108 int count = 0; 109 110 try (PrintWriter grandSummary = FileUtilities.openUTF8Writer(MY_DIR, "GrandSummary.txt"); 111 PrintWriter summary = FileUtilities.openUTF8Writer(MY_DIR, "Summary.txt"); 112 PrintWriter detailFile = FileUtilities.openUTF8Writer(MY_DIR, "details.txt"); ) { 113 grandSummary.println("Parent\tName\tTotal Diff Count\tChildren"); 114 summary.println("Parent\tName\tDiff Count\tChild\tChild Name"); 115 detailFile.println( 116 "№\tBase\tParent Locales I\tParent Locales II\tChild Locales\tEnglish value\tParent value I\tParent value II\tChild value\tCorrected Child value\tComments\tFix Parent value?\tSection\tPage\tHeader\tCode"); 117 PathHeader.Factory phf = PathHeader.getFactory(ENGLISH); 118 String lastBase = ""; 119 for (Entry<CLDRLocale, Set<CLDRLocale>> item : parentToChildren.keyValuesSet()) { 120 CLDRLocale parent = item.getKey(); 121 String base = parent.getLanguage(); 122 123 CLDRFile parentFile = 124 FACTORY.make(parent.toString(), true, DraftStatus.contributed); 125 M4<PathHeader, String, CLDRLocale, Boolean> pathToValuesToLocales = 126 ChainedMap.of( 127 new TreeMap<PathHeader, Object>(), 128 new TreeMap<String, Object>(), 129 new TreeMap<CLDRLocale, Object>(), 130 Boolean.class); 131 132 Counter<CLDRLocale> childDiffs = new Counter<>(); 133 134 for (CLDRLocale child : item.getValue()) { 135 // childDiffs.add(child, 0); // make sure it shows up 136 String childString = child.toString(); 137 CLDRFile childFile = FACTORY.make(childString, false, DraftStatus.contributed); 138 for (String path : childFile) { 139 if (SKIP_SUPPRESSED_PATHS) { 140 if (path.contains("/currency") && path.contains("/symbol")) { 141 continue; 142 } 143 } 144 String childValue = childFile.getStringValue(path); 145 if (childValue == null) { 146 continue; 147 } 148 String parentValue = parentFile.getStringValue(path); 149 if (parentValue == null) { 150 parentValue = "∅∅∅"; 151 } 152 if (!Objects.equal(childValue, parentValue)) { 153 if (SKIP_SUPPRESSED_PATHS) { 154 if ("∅∅∅".equals(childValue) || "∅∅∅".equals(parentValue)) { 155 continue; // skip suppressed paths 156 } 157 } 158 if (parentValue != null) { 159 if (child.equals(SWISS_HIGH_GERMAN)) { 160 String norm = parentValue.replace("ß", "ss"); 161 if (childValue.equals(norm)) { 162 continue; 163 } 164 } else if (base.equals("en")) { 165 if (sameExceptEnd(childValue, "re", parentValue, "er") 166 || sameExceptEnd( 167 childValue, "res", parentValue, "ers")) { 168 continue; 169 } 170 } 171 } 172 PathHeader pheader = phf.fromPath(path); 173 if (SectionId.Special == pheader.getSectionId()) { 174 continue; 175 } 176 pathToValuesToLocales.put(pheader, childValue, child, Boolean.TRUE); 177 childDiffs.add(child, 1); 178 } 179 } 180 } 181 182 long totalChildDiffs = childDiffs.getTotal(); 183 if (totalChildDiffs == 0) { 184 continue; 185 } 186 187 if (!base.equals(lastBase)) { 188 detailFile.println(); 189 // if (detailFile != null) { 190 // detailFile.close(); 191 // } 192 // detailFile = FileUtilities.openUTF8Writer(MY_DIR, 193 // "detail-" + base + ".txt"); 194 // 195 // detailFile.println("Section\tPage\tHeader\tCode\tLocales\tvalue\tParent 196 // Locales\tvalue\tParent Locales\tvalue"); 197 // lastBase = base; 198 } 199 200 grandSummary.println( 201 parent 202 + "\t" 203 + ENGLISH.getName(parent.toString()) 204 + "\t" 205 + totalChildDiffs 206 + "\t" 207 + item.getValue()); 208 for (CLDRLocale s : childDiffs.getKeysetSortedByKey()) { 209 long childDiffValue = childDiffs.get(s); 210 if (childDiffValue == 0) { 211 continue; 212 } 213 summary.println( 214 parent 215 + "\t" 216 + ENGLISH.getName(parent.toString()) 217 + "\t" 218 + childDiffValue 219 + "\t" 220 + s 221 + "\t" 222 + ENGLISH.getName(s.toString())); 223 } 224 225 ArrayList<CLDRFile> parentChain = new ArrayList<>(); 226 for (CLDRLocale current = parent; ; ) { 227 parentChain.add(FACTORY.make(current.toString(), true)); 228 CLDRLocale grand = current.getParent(); 229 if (ROOT.equals(grand)) { 230 break; 231 } 232 current = grand; 233 } 234 235 for (PathHeader ph : pathToValuesToLocales.keySet()) { 236 M3<String, CLDRLocale, Boolean> values = pathToValuesToLocales.get(ph); 237 valueToAncestors.clear(); 238 for (String value : values.keySet()) { 239 Set<CLDRLocale> childLocales = values.get(value).keySet(); 240 String englishValue = ENGLISH.getStringValue(ph.getOriginalPath()); 241 String originalPath = ph.getOriginalPath(); 242 for (CLDRFile grand : parentChain) { 243 valueToAncestors.put( 244 quote(grand.getStringValue(originalPath)), grand.getLocaleID()); 245 } 246 Set<Entry<String, Set<String>>> keyValuesSet = 247 valueToAncestors.keyValuesSet(); 248 final int countParents = keyValuesSet.size(); 249 if (countParents < 1 || countParents > 2) { 250 throw new IllegalArgumentException("Too few/many parents"); 251 } 252 253 // // № Base Parent Locales I Parent Locales II Child Locales 254 // English value Parent value I Parent value II Child value 255 // Corrected Child value Comments Fix Parent value? Section Page 256 // Header Code 257 258 detailFile.print(++count + "\t" + base); 259 260 for (Entry<String, Set<String>> entry : keyValuesSet) { 261 detailFile.print("\t" + entry.getValue()); 262 } 263 if (countParents == 1) { 264 detailFile.print("\t"); 265 } 266 detailFile.print("" + "\t" + childLocales + "\t" + quote(englishValue)); 267 for (Entry<String, Set<String>> entry : keyValuesSet) { 268 detailFile.print("\t" + entry.getKey()); 269 } 270 if (countParents == 1) { 271 detailFile.print("\t"); 272 } 273 detailFile.print( 274 "" 275 + "\t" 276 + quote(value) 277 + "\t" 278 + "" 279 + "\t" 280 + "" 281 + "\t" 282 + "" 283 + "\t" 284 + ph); 285 detailFile.println(); 286 } 287 } 288 } 289 } 290 System.out.println("DONE"); 291 // if (detailFile != null) { 292 // detailFile.close(); 293 // } 294 } 295 showInheritance(Relation<CLDRLocale, CLDRLocale> parentToChildren)296 private static void showInheritance(Relation<CLDRLocale, CLDRLocale> parentToChildren) { 297 Set<CLDRLocale> values = parentToChildren.values(); 298 Set<CLDRLocale> topParents = new TreeSet<>(parentToChildren.keySet()); 299 topParents.removeAll(values); 300 showInheritance(topParents, "", parentToChildren); 301 } 302 showInheritance( Set<CLDRLocale> topParents, String prefix, Relation<CLDRLocale, CLDRLocale> parentToChildren)303 private static void showInheritance( 304 Set<CLDRLocale> topParents, 305 String prefix, 306 Relation<CLDRLocale, CLDRLocale> parentToChildren) { 307 for (CLDRLocale locale : topParents) { 308 String current = nameForLocale(locale) + "\t" + prefix; 309 System.out.println(current); 310 Set<CLDRLocale> newChildren = parentToChildren.get(locale); 311 if (newChildren == null) { 312 continue; 313 } 314 showInheritance(newChildren, current, parentToChildren); 315 } 316 } 317 318 static final LikelySubtags LS = new LikelySubtags(); 319 nameForLocale(CLDRLocale key)320 private static String nameForLocale(CLDRLocale key) { 321 String country = key.getCountry(); 322 if (country.isEmpty()) { 323 String max = LS.maximize(key.toString()); 324 LanguageTagParser ltp = new LanguageTagParser().set(max); 325 country = "(" + ltp.getRegion() + ")"; 326 } 327 return ENGLISH.getName(key.toString(), false, CLDRFile.SHORT_ALTS) 328 + "\t" 329 + key 330 + "\t" 331 + country; 332 } 333 sameExceptEnd( String childValue, String childEnding, String parentValue, String parentEnding)334 private static boolean sameExceptEnd( 335 String childValue, String childEnding, String parentValue, String parentEnding) { 336 if (childValue.endsWith(childEnding) 337 && parentValue.endsWith(parentEnding) 338 && childValue 339 .substring(0, childValue.length() - childEnding.length()) 340 .equals( 341 parentValue.substring( 342 0, parentValue.length() - parentEnding.length()))) { 343 return true; 344 } 345 return false; 346 } 347 348 static final UnicodeSet SPREAD_SHEET_SENSITIVE = 349 new UnicodeSet().add('=').add('+').add('0', '9'); 350 quote(String value)351 private static String quote(String value) { 352 if (value == null || value.isEmpty()) { 353 return "∅∅∅"; 354 } 355 int first = value.codePointAt(0); 356 return SPREAD_SHEET_SENSITIVE.contains(first) ? "'" + value : value; 357 } 358 } 359