1 package org.unicode.cldr.tool; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.base.Splitter; 5 import com.google.common.collect.Comparators; 6 import com.google.common.collect.ImmutableList; 7 import com.google.common.collect.ImmutableMultimap; 8 import com.google.common.collect.ImmutableSet; 9 import com.google.common.collect.Lists; 10 import com.google.common.collect.Multimap; 11 import com.google.common.collect.TreeMultimap; 12 import com.ibm.icu.impl.Relation; 13 import com.ibm.icu.text.NumberFormat; 14 import com.ibm.icu.util.ULocale; 15 import java.io.File; 16 import java.io.IOException; 17 import java.io.PrintWriter; 18 import java.util.ArrayList; 19 import java.util.Arrays; 20 import java.util.Collection; 21 import java.util.Collections; 22 import java.util.Comparator; 23 import java.util.EnumMap; 24 import java.util.EnumSet; 25 import java.util.Iterator; 26 import java.util.List; 27 import java.util.Locale; 28 import java.util.Map; 29 import java.util.Map.Entry; 30 import java.util.Set; 31 import java.util.TreeMap; 32 import java.util.TreeSet; 33 import java.util.regex.Matcher; 34 import java.util.stream.Collectors; 35 import org.unicode.cldr.draft.FileUtilities; 36 import org.unicode.cldr.tool.FormattedFileWriter.Anchors; 37 import org.unicode.cldr.tool.Option.Options; 38 import org.unicode.cldr.util.CLDRConfig; 39 import org.unicode.cldr.util.CLDRFile; 40 import org.unicode.cldr.util.CLDRFile.DraftStatus; 41 import org.unicode.cldr.util.CLDRLocale; 42 import org.unicode.cldr.util.CLDRPaths; 43 import org.unicode.cldr.util.CldrUtility; 44 import org.unicode.cldr.util.CoreCoverageInfo; 45 import org.unicode.cldr.util.CoreCoverageInfo.CoreItems; 46 import org.unicode.cldr.util.Counter; 47 import org.unicode.cldr.util.CoverageInfo; 48 import org.unicode.cldr.util.DtdType; 49 import org.unicode.cldr.util.LanguageTagCanonicalizer; 50 import org.unicode.cldr.util.LanguageTagParser; 51 import org.unicode.cldr.util.Level; 52 import org.unicode.cldr.util.LocaleNames; 53 import org.unicode.cldr.util.Organization; 54 import org.unicode.cldr.util.PathHeader; 55 import org.unicode.cldr.util.PathHeader.Factory; 56 import org.unicode.cldr.util.PathHeader.SurveyToolStatus; 57 import org.unicode.cldr.util.PathStarrer; 58 import org.unicode.cldr.util.PatternCache; 59 import org.unicode.cldr.util.RegexLookup; 60 import org.unicode.cldr.util.SimpleFactory; 61 import org.unicode.cldr.util.StandardCodes; 62 import org.unicode.cldr.util.SupplementalDataInfo; 63 import org.unicode.cldr.util.TempPrintWriter; 64 import org.unicode.cldr.util.VettingViewer; 65 import org.unicode.cldr.util.VettingViewer.MissingStatus; 66 67 public class ShowLocaleCoverage { 68 69 private static final String TSV_BASE = 70 "https://github.com/unicode-org/cldr-staging/blob/main/docs/charts/" 71 + ToolConstants.CHART_VI.getVersionString(1, 2) 72 + "/tsv/"; 73 public static final Splitter LF_SPLITTER = Splitter.on('\n'); 74 75 // thresholds for measuring Level attainment 76 private static final double BASIC_THRESHOLD = 1; 77 private static final double MODERATE_THRESHOLD = 0.995; 78 private static final double MODERN_THRESHOLD = 0.995; 79 80 private static final CLDRConfig CONFIG = CLDRConfig.getInstance(); 81 private static final String TSV_MISSING_SUMMARY_HEADER = 82 "#Path Level" 83 + "\t#Locales" 84 + "\tLocales" 85 + "\tSection" 86 + "\tPage" 87 + "\tHeader" 88 + "\tCode"; 89 90 private static final String TSV_MISSING_HEADER = 91 "#LCode" 92 + "\tEnglish Name" 93 + "\tScript" 94 + "\tLocale Level" 95 + "\tPath Level" 96 + "\tSTStatus" 97 + "\tBailey" 98 + "\tSection" 99 + "\tPage" 100 + "\tHeader" 101 + "\tCode" 102 + "\tST Link"; 103 104 private static final String PROPERTIES_HEADER = 105 "# coverageLevels.txt\n" 106 + "# Copyright © 2023 Unicode, Inc.\n" 107 + "# CLDR data files are interpreted according to the\n" 108 + "# LDML specification: http://unicode.org/reports/tr35/\n" 109 + "# For terms of use, see http://www.unicode.org/copyright.html\n" 110 + "#\n" 111 + "# For format and usage information, see:\n" 112 + "# https://cldr.unicode.org/index/cldr-spec/coverage-levels.\n" 113 + "\n"; 114 private static final String TSV_MISSING_BASIC_HEADER = 115 "#Locale\tProv.\tUnconf.\tMissing\tPath*\tAttributes"; 116 private static final String TSV_MISSING_COUNTS_HEADER = 117 "#Locale\tTargetLevel\t№ Found\t№ Unconfirmed\t№ Missing"; 118 119 private static final boolean DEBUG = true; 120 private static final char DEBUG_FILTER = 121 0; // use letter to only load locales starting with that letter 122 123 private static final String LATEST = ToolConstants.CHART_VERSION; 124 private static CLDRConfig testInfo = ToolConfig.getToolInstance(); 125 private static final StandardCodes SC = StandardCodes.make(); 126 private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = 127 testInfo.getSupplementalDataInfo(); 128 private static final StandardCodes STANDARD_CODES = SC; 129 130 private static org.unicode.cldr.util.Factory factory = 131 testInfo.getCommonAndSeedAndMainAndAnnotationsFactory(); 132 private static final CLDRFile ENGLISH = factory.make("en", true); 133 134 static final Options myOptions = new Options(); 135 136 enum MyOptions { 137 filter(".+", ".*", "Filter the information based on id, using a regex argument."), 138 // draftStatus(".+", "unconfirmed", "Filter the information to a minimum draft 139 // status."), 140 chart(null, null, "chart only"), 141 organization(".+", null, "Only locales for organization"), 142 version(".+", LATEST, "To get different versions"), 143 rawData(null, null, "Output the raw data from all coverage levels"), 144 targetDir(".*", CLDRPaths.GEN_DIRECTORY + "/statistics/", "target output file."), 145 directories( 146 "(.*:)?[a-z]+(,[a-z]+)*", 147 "common", 148 "Space-delimited list of main source directories: common,seed,exemplar.\n" 149 + "Optional, <baseDir>:common,seed"), 150 ; 151 152 // targetDirectory(".+", CldrUtility.CHART_DIRECTORY + "keyboards/", "The target 153 // directory."), 154 // layouts(null, null, "Only create html files for keyboard layouts"), 155 // repertoire(null, null, "Only create html files for repertoire"), ; 156 // boilerplate 157 final Option option; 158 MyOptions(String argumentPattern, String defaultArgument, String helpText)159 MyOptions(String argumentPattern, String defaultArgument, String helpText) { 160 option = myOptions.add(this, argumentPattern, defaultArgument, helpText); 161 } 162 } 163 164 private static final RegexLookup<Boolean> SUPPRESS_PATHS_CAN_BE_EMPTY = 165 new RegexLookup<Boolean>() 166 .add("\\[@alt=\"accounting\"]", true) 167 .add("\\[@alt=\"variant\"]", true) 168 .add("^//ldml/localeDisplayNames/territories/territory.*@alt=\"short", true) 169 .add("^//ldml/localeDisplayNames/languages/language.*_", true) 170 .add("^//ldml/numbers/currencies/currency.*/symbol", true) 171 .add("^//ldml/characters/exemplarCharacters", true); 172 173 private static DraftStatus minimumDraftStatus = DraftStatus.unconfirmed; 174 private static final Factory pathHeaderFactory = PathHeader.getFactory(ENGLISH); 175 176 private static Set<String> COMMON_LOCALES; 177 178 public static class StatusData { 179 int missing; 180 int provisional; 181 int unconfirmed; 182 Set<List<String>> values = 183 new TreeSet<>(Comparators.lexicographical(Comparator.<String>naturalOrder())); 184 } 185 186 public static class StatusCounter { 187 private static final Set<String> ATTRS_TO_REMOVE = Set.of("standard"); 188 PathStarrer pathStarrer = new PathStarrer().setSubstitutionPattern("*"); 189 Map<String, StatusData> starredPathToData = new TreeMap<>(); 190 int missingTotal; 191 int provisionalTotal; 192 int unconfirmedTotal; 193 gatherStarred(String path, DraftStatus draftStatus)194 public void gatherStarred(String path, DraftStatus draftStatus) { 195 String starredPath = pathStarrer.set(path); 196 StatusData statusData = starredPathToData.get(starredPath); 197 if (statusData == null) { 198 starredPathToData.put(starredPath, statusData = new StatusData()); 199 } 200 if (draftStatus == null) { 201 ++statusData.missing; 202 ++missingTotal; 203 } else { 204 switch (draftStatus) { 205 case unconfirmed: 206 ++statusData.unconfirmed; 207 ++unconfirmedTotal; 208 break; 209 case provisional: 210 ++statusData.provisional; 211 ++provisionalTotal; 212 break; 213 default: 214 break; 215 } 216 } 217 final List<String> attributes = 218 CldrUtility.removeAll( 219 new ArrayList<>(pathStarrer.getAttributes()), ATTRS_TO_REMOVE); 220 if (!attributes.isEmpty()) { 221 statusData.values.add(attributes); 222 } 223 } 224 } 225 main(String[] args)226 public static void main(String[] args) throws IOException { 227 myOptions.parse(MyOptions.filter, args, true); 228 229 Matcher matcher = PatternCache.get(MyOptions.filter.option.getValue()).matcher(""); 230 231 if (MyOptions.chart.option.doesOccur()) { 232 showCoverage(null, matcher); 233 return; 234 } 235 236 Set<String> locales = null; 237 String organization = MyOptions.organization.option.getValue(); 238 boolean useOrgLevel = MyOptions.organization.option.doesOccur(); 239 if (useOrgLevel) { 240 locales = STANDARD_CODES.getLocaleCoverageLocales(organization); 241 } 242 243 if (MyOptions.version.option.doesOccur()) { 244 String number = MyOptions.version.option.getValue().trim(); 245 if (!number.contains(".")) { 246 number += ".0"; 247 } 248 factory = 249 org.unicode.cldr.util.Factory.make( 250 CLDRPaths.ARCHIVE_DIRECTORY + "cldr-" + number + "/common/main/", ".*"); 251 } else { 252 if (MyOptions.directories.option.doesOccur()) { 253 String directories = MyOptions.directories.option.getValue().trim(); 254 CLDRConfig cldrConfig = CONFIG; 255 String base = null; 256 int colonPos = directories.indexOf(':'); 257 if (colonPos >= 0) { 258 base = directories.substring(0, colonPos).trim(); 259 directories = directories.substring(colonPos + 1).trim(); 260 } else { 261 base = cldrConfig.getCldrBaseDirectory().toString(); 262 } 263 String[] items = directories.split(",\\s*"); 264 File[] fullDirectories = new File[items.length]; 265 int i = 0; 266 for (String item : items) { 267 fullDirectories[i++] = new File(base + "/" + item + "/main"); 268 } 269 factory = SimpleFactory.make(fullDirectories, ".*"); 270 COMMON_LOCALES = 271 SimpleFactory.make(base + "/" + "common" + "/main", ".*") 272 .getAvailableLanguages(); 273 } 274 } 275 fixCommonLocales(); 276 277 showCoverage(null, matcher, locales, useOrgLevel); 278 } 279 fixCommonLocales()280 private static void fixCommonLocales() { 281 if (COMMON_LOCALES == null) { 282 COMMON_LOCALES = factory.getAvailableLanguages(); 283 } 284 } 285 286 public static class FoundAndTotal { 287 final int found; 288 final int total; 289 290 @SafeVarargs FoundAndTotal(Counter<Level>.... counters)291 public FoundAndTotal(Counter<Level>... counters) { 292 final int[] count = {0, 0, 0}; 293 for (Level level : Level.values()) { 294 if (level == Level.COMPREHENSIVE) { 295 continue; 296 } 297 int i = 0; 298 for (Counter<Level> counter : counters) { 299 count[i++] += counter.get(level); 300 } 301 } 302 found = count[0]; 303 total = found + count[1] + count[2]; 304 } 305 306 @Override toString()307 public String toString() { 308 return found + "/" + total; 309 } 310 } 311 showCoverage(Anchors anchors, Matcher matcher)312 static void showCoverage(Anchors anchors, Matcher matcher) throws IOException { 313 showCoverage(anchors, matcher, null, false); 314 } 315 showCoverage( Anchors anchors, Matcher matcher, Set<String> locales, boolean useOrgLevel)316 private static void showCoverage( 317 Anchors anchors, Matcher matcher, Set<String> locales, boolean useOrgLevel) 318 throws IOException { 319 final String title = "Locale Coverage"; 320 try (PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, title, null, anchors)); 321 PrintWriter tsv_summary = 322 FileUtilities.openUTF8Writer( 323 CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-coverage.tsv"); 324 PrintWriter tsv_missing = 325 FileUtilities.openUTF8Writer( 326 CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing.tsv"); 327 PrintWriter tsv_missing_summary = 328 FileUtilities.openUTF8Writer( 329 CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-summary.tsv"); 330 PrintWriter tsv_missing_basic = 331 FileUtilities.openUTF8Writer( 332 CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-basic.tsv"); 333 PrintWriter tsv_missing_counts = 334 FileUtilities.openUTF8Writer( 335 CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-counts.tsv"); 336 TempPrintWriter propertiesCoverage = 337 TempPrintWriter.openUTF8Writer( 338 CLDRPaths.COMMON_DIRECTORY + "properties/", 339 "coverageLevels.txt"); ) { 340 tsv_missing_summary.println(TSV_MISSING_SUMMARY_HEADER); 341 tsv_missing.println(TSV_MISSING_HEADER); 342 tsv_missing_basic.println(TSV_MISSING_BASIC_HEADER); 343 tsv_missing_counts.println(TSV_MISSING_COUNTS_HEADER); 344 345 final int propertiesCoverageTabCount = 2; 346 propertiesCoverage.printlnWithTabs(propertiesCoverageTabCount, PROPERTIES_HEADER); 347 348 Set<String> checkModernLocales = 349 STANDARD_CODES.getLocaleCoverageLocales( 350 Organization.cldr, EnumSet.of(Level.MODERN)); 351 Set<String> availableLanguages = new TreeSet<>(factory.getAvailableLanguages()); 352 availableLanguages.addAll(checkModernLocales); 353 354 Multimap<String, String> languageToRegion = TreeMultimap.create(); 355 LanguageTagParser ltp = new LanguageTagParser(); 356 LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(true); 357 for (String locale : factory.getAvailable()) { 358 String country = ltp.set(locale).getRegion(); 359 if (!country.isEmpty()) { 360 languageToRegion.put(ltc.transform(ltp.getLanguageScript()), country); 361 } 362 } 363 languageToRegion = ImmutableMultimap.copyOf(languageToRegion); 364 365 fixCommonLocales(); 366 367 System.out.println(Joiner.on("\n").join(languageToRegion.asMap().entrySet())); 368 369 System.out.println("# Checking: " + availableLanguages); 370 371 NumberFormat percentFormat = NumberFormat.getPercentInstance(Locale.ENGLISH); 372 percentFormat.setMaximumFractionDigits(1); 373 374 pw.println( 375 "<p style='text-align: left'>This chart shows the coverage levels in this release. " 376 + "Totals are listed after the main chart.</p>\n" 377 + "<blockquote><ul>\n" 378 + "<li><a href='#main_table'>Main Table</a></li>\n" 379 + "<li><a href='#level_counts'>Level Counts</a></li>\n" 380 + "</ul></blockquote>\n" 381 + "<h3>Column Key</h3>\n" 382 + "<table class='subtle' style='margin-left:3em; margin-right:3em'>\n" 383 + "<tr><th>Default Region</th><td>The default region for locale code, based on likely subtags</td></tr>\n" 384 + "<tr><th>№ Locales</th><td>Note that the coverage of regional locales inherits from their parents.</td></tr>\n" 385 + "<tr><th>Target Level</th><td>The default target Coverage Level in CLDR. " 386 + "Particular organizations may have different target levels. " 387 + "Languages with high levels of coverage are marked with ‡, even though they are not tracked by the technical committee.</td></tr>\n" 388 + "<tr><th>≟</th><td>Indicates whether the CLDR Target is less than, equal to, or greater than the Computed Level.</td></tr>\n" 389 + "<tr><th>Computed Level</th><td>Computed from the percentage values, " 390 + "taking the first level that meets a threshold (currently " 391 + percentFormat.format(MODERN_THRESHOLD) 392 + ", ⓜ " 393 + percentFormat.format(MODERATE_THRESHOLD) 394 + ", ⓑ " 395 + percentFormat.format(BASIC_THRESHOLD) 396 + ").</td></tr>\n" 397 + "<tr><th>ICU</th><td>Indicates whether included in the current version of ICU</td></tr>\n" 398 + "<tr><th>Confirmed</th><td>Confirmed items as a percentage of all supplied items. " 399 + "If low, the coverage can be improved by getting multiple organizations to confirm.</td></tr>\n" 400 + "<tr><th>%, ⓜ%, ⓑ%, ⓒ%</th><td>Coverage at Levels: = Modern, ⓜ = Moderate, ⓑ = Basic, ⓒ = Core. " 401 + "The percentage of items at that level and below is computed from <i>confirmed_items/total_items</i>. " 402 + "A high-level summary of the meaning of the coverage values is at " 403 + "<a target='_blank' href='http://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels'>Coverage Levels</a>. " 404 + "The Core values are described on <a target='_blank' href='https://cldr.unicode.org/index/cldr-spec/core-data-for-new-locales'>Core Data</a>. " 405 + "</td></tr>\n" 406 + "<tr><th>Missing Features</th><td>These are not single items, but rather specific features, such as plural rules or unit grammar info. " 407 + "They are listed if missing at the computed level. For more information, see <a href='https://cldr.unicode.org/index/locale-coverage'>Missing Features</a><br>" 408 + "Example: <i>ⓜ collation</i> means this feature should be supported at a Moderate level.<br>" 409 + "<ul><li>" 410 + "<i>Except for Core, these are not accounted for in the percent values.</i>" 411 + "</li><li>" 412 + "The information needs to be provided in tickets, not through the Survey Tool." 413 + "</li></ul>" 414 + "</td></tr>\n" 415 + "<tr><th>" 416 + linkTsv("", "TSVFiles") 417 + ":</th><td>\n" 418 + "<ul><li>" 419 + linkTsv("locale-coverage.tsv") 420 + " — A version of this file, suitable for loading into a spreadsheet.</li>\n" 421 + "<li>" 422 + linkTsv("locale-missing.tsv") 423 + " — Missing items for the CLDR target locales.</li>\n" 424 + "<li>" 425 + linkTsv("locale-missing-summary.tsv") 426 + " — Summary of missing items for the CLDR target locales, by Section/Page/Header.</li>\n" 427 + "<li>" 428 + linkTsv("locale-missing-basic.tsv") 429 + " — Missing items that keep locales from reaching the Basic level.</li>\n" 430 + "<li>" 431 + linkTsv("locale-missing-counts.tsv") 432 + " — Counts of items per locale that are found, unconfirmed, or missing, at the target level. " 433 + "(Or at *basic, if there is no target level.)</li>\n" 434 + "</td></tr>\n" 435 + "</table>\n"); 436 437 Relation<MissingStatus, String> missingPaths = 438 Relation.of( 439 new EnumMap<MissingStatus, Set<String>>(MissingStatus.class), 440 TreeSet.class, 441 CLDRFile.getComparator(DtdType.ldml)); 442 Set<String> unconfirmed = new TreeSet<>(CLDRFile.getComparator(DtdType.ldml)); 443 444 Set<String> defaultContents = SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales(); 445 446 Counter<Level> foundCounter = new Counter<>(); 447 Counter<Level> unconfirmedCounter = new Counter<>(); 448 Counter<Level> missingCounter = new Counter<>(); 449 450 List<Level> levelsToShow = new ArrayList<>(EnumSet.allOf(Level.class)); 451 levelsToShow.remove(Level.COMPREHENSIVE); 452 levelsToShow.remove(Level.UNDETERMINED); 453 levelsToShow = ImmutableList.copyOf(levelsToShow); 454 List<Level> reversedLevels = new ArrayList<>(levelsToShow); 455 Collections.reverse(reversedLevels); 456 reversedLevels = ImmutableList.copyOf(reversedLevels); 457 458 int localeCount = 0; 459 460 final TablePrinter tablePrinter = 461 new TablePrinter() 462 .addColumn( 463 "Language", 464 "class='source'", 465 CldrUtility.getDoubleLinkMsg(), 466 "class='source'", 467 true) 468 .setBreakSpans(true) 469 .addColumn( 470 "English Name", "class='source'", null, "class='source'", true) 471 .setBreakSpans(true) 472 .addColumn( 473 "Native Name", "class='source'", null, "class='source'", true) 474 .setBreakSpans(true) 475 .addColumn("Script", "class='source'", null, "class='source'", true) 476 .setBreakSpans(true) 477 .addColumn( 478 "Default Region", 479 "class='source'", 480 null, 481 "class='source'", 482 true) 483 .setBreakSpans(true) 484 .addColumn( 485 "№ Locales", 486 "class='source'", 487 null, 488 "class='targetRight'", 489 true) 490 .setBreakSpans(true) 491 .setCellPattern("{0,number}") 492 .addColumn( 493 "Target Level", "class='source'", null, "class='source'", true) 494 .setBreakSpans(true) 495 .addColumn("≟", "class='target'", null, "class='target'", true) 496 .setBreakSpans(true) 497 .setSortPriority(1) 498 .setSortAscending(false) 499 .addColumn( 500 "Computed Level", 501 "class='target'", 502 null, 503 "class='target'", 504 true) 505 .setBreakSpans(true) 506 .setSortPriority(0) 507 .setSortAscending(false) 508 .addColumn("ICU", "class='target'", null, "class='target'", true) 509 .setBreakSpans(true) 510 .addColumn( 511 "Confirmed", 512 "class='target'", 513 null, 514 "class='targetRight' style='color:gray'", 515 true) 516 .setBreakSpans(true) 517 .setCellPattern("{0,number,0.0%}"); 518 519 NumberFormat tsvPercent = NumberFormat.getPercentInstance(Locale.ENGLISH); 520 tsvPercent.setMaximumFractionDigits(2); 521 522 for (Level level : reversedLevels) { 523 String titleLevel = level.getAbbreviation() + "%"; 524 tablePrinter 525 .addColumn(titleLevel, "class='target'", null, "class='targetRight'", true) 526 .setCellPattern("{0,number,0.0%}") 527 .setBreakSpans(true); 528 529 switch (level) { 530 default: 531 tablePrinter.setSortPriority(2).setSortAscending(false); 532 break; 533 case BASIC: 534 tablePrinter.setSortPriority(3).setSortAscending(false); 535 break; 536 case MODERATE: 537 tablePrinter.setSortPriority(4).setSortAscending(false); 538 break; 539 case MODERN: 540 tablePrinter.setSortPriority(5).setSortAscending(false); 541 break; 542 } 543 } 544 tablePrinter 545 .addColumn("Missing Features", "class='target'", null, "class='target'", true) 546 .setBreakSpans(true); 547 548 long start = System.currentTimeMillis(); 549 LikelySubtags likelySubtags = new LikelySubtags(); 550 551 EnumMap<Level, Double> targetLevel = new EnumMap<>(Level.class); 552 targetLevel.put(Level.CORE, 2 / 100d); 553 targetLevel.put(Level.BASIC, 16 / 100d); 554 targetLevel.put(Level.MODERATE, 33 / 100d); 555 targetLevel.put(Level.MODERN, 100 / 100d); 556 557 Multimap<String, String> pathToLocale = TreeMultimap.create(); 558 559 Counter<Level> computedLevels = new Counter<>(); 560 Counter<Level> computedSublocaleLevels = new Counter<>(); 561 562 for (String locale : availableLanguages) { 563 try { 564 if (locale.contains("supplemental") // for old versionsl 565 // || locale.startsWith("sr_Latn") 566 ) { 567 continue; 568 } 569 if (locales != null && !locales.contains(locale)) { 570 String base = CLDRLocale.getInstance(locale).getLanguage(); 571 if (!locales.contains(base)) { 572 continue; 573 } 574 } 575 if (matcher != null && !matcher.reset(locale).matches()) { 576 continue; 577 } 578 if (defaultContents.contains(locale) 579 || LocaleNames.ROOT.equals(locale) 580 || LocaleNames.UND.equals(locale)) { 581 continue; 582 } 583 584 tsv_missing_summary.flush(); 585 tsv_missing.flush(); 586 tsv_missing_basic.flush(); 587 tsv_missing_counts.flush(); 588 589 boolean isSeed = new File(CLDRPaths.SEED_DIRECTORY, locale + ".xml").exists(); 590 591 String region = ltp.set(locale).getRegion(); 592 if (!region.isEmpty()) continue; // skip regions 593 594 final Level cldrLocaleLevelGoal = 595 SC.getLocaleCoverageLevel(Organization.cldr, locale); 596 final String specialFlag = getSpecialFlag(locale); 597 598 final boolean cldrLevelGoalBasicToModern = 599 Level.CORE_TO_MODERN.contains(cldrLocaleLevelGoal); 600 601 String max = likelySubtags.maximize(locale); 602 final String script = ltp.set(max).getScript(); 603 final String defRegion = ltp.getRegion(); 604 605 final String language = likelySubtags.minimize(locale); 606 607 missingPaths.clear(); 608 unconfirmed.clear(); 609 610 final CLDRFile file = factory.make(locale, true, minimumDraftStatus); 611 612 if (locale.equals("af")) { 613 int debug = 0; 614 } 615 616 Iterable<String> pathSource = new IterableFilter(file.fullIterable()); 617 618 VettingViewer.getStatus( 619 pathSource, 620 file, 621 pathHeaderFactory, 622 foundCounter, 623 unconfirmedCounter, 624 missingCounter, 625 missingPaths, 626 unconfirmed); 627 628 { 629 long found = 0; 630 long unconfirmedc = 0; 631 long missing = 0; 632 Level adjustedGoal = 633 cldrLocaleLevelGoal.compareTo(Level.BASIC) < 0 634 ? Level.BASIC 635 : cldrLocaleLevelGoal; 636 for (Level level : Level.values()) { 637 if (level.compareTo(adjustedGoal) <= 0) { 638 found += foundCounter.get(level); 639 unconfirmedc += unconfirmedCounter.get(level); 640 missing += missingCounter.get(level); 641 } 642 } 643 String goalFlag = cldrLocaleLevelGoal == adjustedGoal ? "" : "*"; 644 tsv_missing_counts.println( 645 specialFlag 646 + locale 647 + "\t" 648 + goalFlag 649 + adjustedGoal 650 + "\t" 651 + found 652 + "\t" 653 + unconfirmedc 654 + "\t" 655 + missing); 656 } 657 658 Collection<String> sublocales = languageToRegion.asMap().get(language); 659 if (sublocales == null) { 660 sublocales = Collections.emptySet(); 661 } 662 sublocales = ImmutableSet.copyOf(sublocales); 663 664 // get the totals 665 666 EnumMap<Level, Integer> totals = new EnumMap<>(Level.class); 667 EnumMap<Level, Integer> confirmed = new EnumMap<>(Level.class); 668 Set<CoreItems> specialMissingPaths = EnumSet.noneOf(CoreItems.class); 669 670 StatusCounter starredCounter = new StatusCounter(); 671 672 { 673 Multimap<CoreItems, String> detailedErrors = TreeMultimap.create(); 674 Set<CoreItems> coverage = 675 CoreCoverageInfo.getCoreCoverageInfo(file, detailedErrors); 676 for (CoreItems item : coverage) { 677 foundCounter.add(item.desiredLevel, 1); 678 } 679 for (Entry<CoreItems, String> entry : detailedErrors.entries()) { 680 CoreItems coreItem = entry.getKey(); 681 String path = entry.getValue(); 682 specialMissingPaths.add(coreItem); 683 // if goal (eg modern) >= itemLevel, indicate it is missing 684 if (coreItem.desiredLevel == Level.BASIC) { 685 starredCounter.gatherStarred(path, null); 686 } 687 missingCounter.add(coreItem.desiredLevel, 1); 688 } 689 } 690 691 if (cldrLevelGoalBasicToModern) { 692 Level goalLevel = cldrLocaleLevelGoal; 693 for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) { 694 String path = entry.getValue(); 695 String status = entry.getKey().toString(); 696 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 697 if (goalLevel.compareTo(foundLevel) >= 0) { 698 String line = 699 spreadsheetLine( 700 locale, 701 language, 702 script, 703 specialFlag, 704 file.getStringValue(path), 705 goalLevel, 706 foundLevel, 707 status, 708 path, 709 file, 710 pathToLocale); 711 String lineToPrint1 = line; 712 tsv_missing.println(lineToPrint1); 713 } 714 } 715 for (String path : unconfirmed) { 716 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 717 if (goalLevel.compareTo(foundLevel) >= 0) { 718 String line = 719 spreadsheetLine( 720 locale, 721 language, 722 script, 723 specialFlag, 724 file.getStringValue(path), 725 goalLevel, 726 foundLevel, 727 "n/a", 728 path, 729 file, 730 pathToLocale); 731 tsv_missing.println(line); 732 } 733 } 734 } else { 735 Level goalLevel = Level.BASIC; 736 for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) { 737 String path = entry.getValue(); 738 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 739 if (goalLevel.compareTo(foundLevel) >= 0) { 740 starredCounter.gatherStarred(path, null); 741 } 742 } 743 for (String path : unconfirmed) { 744 String fullPath = file.getFullXPath(path); 745 DraftStatus draftStatus = 746 fullPath.contains("unconfirmed") 747 ? DraftStatus.unconfirmed 748 : DraftStatus.provisional; 749 750 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 751 if (goalLevel.compareTo(foundLevel) >= 0) { 752 starredCounter.gatherStarred(path, draftStatus); 753 } 754 } 755 } 756 757 if (!starredCounter.starredPathToData.isEmpty()) { 758 for (Entry<String, StatusData> starred : 759 starredCounter.starredPathToData.entrySet()) { 760 String starredPath = starred.getKey(); 761 StatusData statusData = starred.getValue(); 762 String valueString = 763 statusData.values.stream() 764 .map(x -> Joiner.on(", ").join(x)) 765 .collect(Collectors.joining("; ")); 766 767 tsv_missing_basic.println( 768 specialFlag 769 + locale // 770 + "\t" 771 + statusData.missing // 772 + "\t" 773 + statusData.provisional // 774 + "\t" 775 + statusData.unconfirmed // 776 + "\t" 777 + starredPath.replace("\"*\"", "'*'") 778 + "\t" 779 + valueString 780 // 781 ); 782 } 783 tsv_missing_basic.println( 784 specialFlag 785 + locale // 786 + "\t" 787 + starredCounter.missingTotal // 788 + "\t" 789 + starredCounter.provisionalTotal // 790 + "\t" 791 + starredCounter.unconfirmedTotal // 792 + "\tTotals\t"); 793 tsv_missing_basic.println("\t\t\t\t\t"); // for a proper table in github 794 } 795 796 int sumFound = 0; 797 int sumMissing = 0; 798 int sumUnconfirmed = 0; 799 800 for (Level level : levelsToShow) { 801 long foundCount = foundCounter.get(level); 802 long unconfirmedCount = unconfirmedCounter.get(level); 803 long missingCount = missingCounter.get(level); 804 805 sumFound += foundCount; 806 sumUnconfirmed += unconfirmedCount; 807 sumMissing += missingCount; 808 809 confirmed.put(level, sumFound); 810 totals.put(level, sumFound + sumUnconfirmed + sumMissing); 811 } 812 813 // double modernTotal = totals.get(Level.MODERN); 814 815 // first get the accumulated values 816 EnumMap<Level, Integer> accumTotals = new EnumMap<>(Level.class); 817 EnumMap<Level, Integer> accumConfirmed = new EnumMap<>(Level.class); 818 int currTotals = 0; 819 int currConfirmed = 0; 820 for (Level level : levelsToShow) { 821 currTotals += totals.get(level); 822 currConfirmed += confirmed.get(level); 823 accumConfirmed.put(level, currConfirmed); 824 accumTotals.put(level, currTotals); 825 } 826 827 // print the totals 828 829 Level computed = Level.UNDETERMINED; 830 Map<Level, Double> levelToProportion = new EnumMap<>(Level.class); 831 832 for (Level level : reversedLevels) { 833 int confirmedCoverage = accumConfirmed.get(level); 834 double total = accumTotals.get(level); 835 836 final double proportion = confirmedCoverage / total; 837 levelToProportion.put(level, proportion); 838 839 if (computed == Level.UNDETERMINED) { 840 switch (level) { 841 case MODERN: 842 if (proportion >= MODERN_THRESHOLD) { 843 computed = level; 844 } 845 break; 846 case MODERATE: 847 if (proportion >= MODERATE_THRESHOLD) { 848 computed = level; 849 } 850 break; 851 case BASIC: 852 if (proportion >= BASIC_THRESHOLD) { 853 computed = level; 854 } 855 break; 856 default: 857 break; 858 } 859 } 860 } 861 862 Set<CoreItems> shownMissingPaths = EnumSet.noneOf(CoreItems.class); 863 Level computedWithCore = 864 computed == Level.UNDETERMINED ? Level.BASIC : computed; 865 for (CoreItems item : specialMissingPaths) { 866 if (item.desiredLevel.compareTo(computedWithCore) <= 0) { 867 shownMissingPaths.add(item); 868 } else { 869 int debug = 0; 870 } 871 } 872 computedLevels.add(computed, 1); 873 computedSublocaleLevels.add(computed, sublocales.size()); 874 875 final String coreMissingString = Joiner.on(", ").join(shownMissingPaths); 876 final String visibleLevelComputed = 877 computed == Level.UNDETERMINED ? "" : computed.toString(); 878 final String visibleLevelGoal = 879 cldrLocaleLevelGoal == Level.UNDETERMINED 880 ? "" 881 : specialFlag + cldrLocaleLevelGoal.toString(); 882 final String goalComparedToComputed = 883 computed == cldrLocaleLevelGoal 884 ? " ≡" 885 : cldrLocaleLevelGoal.compareTo(computed) < 0 ? " <" : " >"; 886 887 tablePrinter 888 .addRow() 889 .addCell(language) 890 .addCell(ENGLISH.getName(language)) 891 .addCell(file.getName(language)) 892 .addCell(script) 893 .addCell(defRegion) 894 .addCell(sublocales.size()) 895 .addCell(visibleLevelGoal) 896 .addCell(goalComparedToComputed) 897 .addCell(visibleLevelComputed) 898 .addCell(getIcuValue(language)) 899 .addCell(sumFound / (double) (sumFound + sumUnconfirmed)); 900 901 // print the totals 902 for (Level level : reversedLevels) { 903 tablePrinter.addCell(levelToProportion.get(level)); 904 } 905 906 tablePrinter.addCell(coreMissingString).finishRow(); 907 908 // now write properties file line 909 910 if (computed != Level.UNDETERMINED) { 911 propertiesCoverage.printlnWithTabs( 912 propertiesCoverageTabCount, 913 locale 914 + " ;\t" 915 + visibleLevelComputed 916 + " ;\t" 917 + ENGLISH.getName(locale)); 918 // TODO decide whether to restore this 919 // Level higher = Level.UNDETERMINED; 920 // switch (computed) { 921 // default: 922 // higher = Level.UNDETERMINED; 923 // break; 924 // case MODERATE: 925 // higher = Level.MODERN; 926 // break; 927 // case BASIC: 928 // higher = Level.MODERATE; 929 // break; 930 // } 931 // double higherProportion = higher == 932 // Level.UNDETERMINED ? 0d : levelToProportion.get(higher); 933 // 934 // if (higherProportion >= THRESHOLD_HIGHER) { 935 // propertiesCoverage.println( 936 // " ;\t" + 937 // tsvPercent.format(higherProportion) + 938 // " ;\t" + higher 939 // ); 940 // } else { 941 // propertiesCoverage.println(" ;\t" + "" + " 942 // ;\t" + ""); 943 // } 944 } 945 localeCount++; 946 } catch (Exception e) { 947 throw new IllegalArgumentException(e); 948 } 949 } 950 String lineToPrint = "\n#EOF"; 951 propertiesCoverage.printlnWithTabs(propertiesCoverageTabCount, lineToPrint); 952 953 pw.println("<h3><a name='main_table' href='#main_table'>Main Table</a></h3>"); 954 pw.println(tablePrinter.toTable()); 955 956 pw.println( 957 "<h3><a name='level_counts' href='#level_counts'>Level Counts</a></h3>\n" 958 + "<table class='subtle'><tr>\n" 959 + "<th style='text-align:left'>" 960 + "Level" 961 + "</th>" 962 + "<th style='text-align:left'>" 963 + "Languages" 964 + "</th>" 965 + "<th style='text-align:left'>" 966 + "Locales" 967 + "</th>" 968 + "</tr>"); 969 long totalCount = 0; 970 long totalLocaleCount = 0; 971 for (Level level : Lists.reverse(Arrays.asList(Level.values()))) { 972 final long count = computedLevels.get(level); 973 final long localesCount = computedSublocaleLevels.get(level); 974 if (count == 0 || level == Level.UNDETERMINED) { 975 continue; 976 } 977 totalCount += count; 978 totalLocaleCount += localesCount; 979 String visibleImputed = 980 level == Level.UNDETERMINED 981 ? "<" + Level.BASIC.toString() 982 : level.toString(); 983 pw.println( 984 "<tr>" 985 + "<th style='text-align:left'>" 986 + visibleImputed 987 + "</th>" 988 + "<td style='text-align:right'>" 989 + count 990 + "</td>" 991 + "<td style='text-align:right'>" 992 + localesCount 993 + "</td>" 994 + "</tr>"); 995 } 996 pw.println( 997 "<tr>" 998 + "<th style='text-align:left'>" 999 + "Total" 1000 + "</th>" 1001 + "<td style='text-align:right'>" 1002 + totalCount 1003 + "</td>" 1004 + "<td style='text-align:right'>" 1005 + totalLocaleCount 1006 + "</td>" 1007 + "</tr>\n"); 1008 1009 pw.println( 1010 "<tr>" 1011 + "<th style='text-align:left'>" 1012 + "in dev." 1013 + "</th>" 1014 + "<td style='text-align:right'>" 1015 + computedLevels.get(Level.UNDETERMINED) 1016 + "</td>" 1017 + "<td style='text-align:right'>" 1018 + computedSublocaleLevels.get(Level.UNDETERMINED) 1019 + "</td>" 1020 + "</tr>\n" 1021 + "</table>"); 1022 1023 Multimap<Level, String> levelToLocales = TreeMultimap.create(); 1024 1025 for (Entry<String, Collection<String>> entry : pathToLocale.asMap().entrySet()) { 1026 String path = entry.getKey(); 1027 Collection<String> localeSet = entry.getValue(); 1028 levelToLocales.clear(); 1029 for (String locale : localeSet) { 1030 Level foundLevel = coverageInfo.getCoverageLevel(path, locale); 1031 levelToLocales.put(foundLevel, locale); 1032 } 1033 String phString = "n/a\tn/a\tn/a\tn/a"; 1034 try { 1035 PathHeader ph = pathHeaderFactory.fromPath(path); 1036 phString = ph.toString(); 1037 } catch (Exception e) { 1038 } 1039 for (Entry<Level, Collection<String>> entry2 : levelToLocales.asMap().entrySet()) { 1040 Level level = entry2.getKey(); 1041 localeSet = entry2.getValue(); 1042 tsv_missing_summary.println( 1043 level 1044 + "\t" 1045 + localeSet.size() 1046 + "\t" 1047 + Joiner.on(" ") 1048 .join( 1049 localeSet.stream() 1050 .map(x -> x + getSpecialFlag(x)) 1051 .collect(Collectors.toSet())) 1052 + "\t" 1053 + phString); 1054 } 1055 } 1056 tablePrinter.toTsv(tsv_summary); 1057 long end = System.currentTimeMillis(); 1058 System.out.println( 1059 (end - start) 1060 + " millis = " 1061 + ((end - start) / localeCount) 1062 + " millis/locale"); 1063 ShowPlurals.appendBlanksForScrolling(pw); 1064 } 1065 } 1066 linkTsv(String tsvFileName)1067 private static String linkTsv(String tsvFileName) { 1068 return "<a href='" + TSV_BASE + tsvFileName + "' target='cldr-tsv'>" + tsvFileName + "</a>"; 1069 } 1070 linkTsv(String tsvFileName, String anchorText)1071 private static String linkTsv(String tsvFileName, String anchorText) { 1072 return "<a href='" + TSV_BASE + tsvFileName + "' target='cldr-tsv'>" + anchorText + "</a>"; 1073 } 1074 getSpecialFlag(String locale)1075 private static String getSpecialFlag(String locale) { 1076 return SC.getLocaleCoverageLevel(Organization.special, locale) == Level.UNDETERMINED 1077 ? "" 1078 : "‡"; 1079 } 1080 1081 private static class IterableFilter implements Iterable<String> { 1082 private Iterable<String> source; 1083 IterableFilter(Iterable<String> source)1084 IterableFilter(Iterable<String> source) { 1085 this.source = source; 1086 } 1087 1088 /** 1089 * When some paths are defined after submission, we need to change them to COMPREHENSIVE in 1090 * computing the vetting status. 1091 */ 1092 private static final Set<String> SUPPRESS_PATHS_AFTER_SUBMISSION = ImmutableSet.of(); 1093 1094 @Override iterator()1095 public Iterator<String> iterator() { 1096 return new IteratorFilter(source.iterator()); 1097 } 1098 1099 private static class IteratorFilter implements Iterator<String> { 1100 Iterator<String> source; 1101 String peek; 1102 IteratorFilter(Iterator<String> source)1103 public IteratorFilter(Iterator<String> source) { 1104 this.source = source; 1105 fillPeek(); 1106 } 1107 1108 @Override hasNext()1109 public boolean hasNext() { 1110 return peek != null; 1111 } 1112 1113 @Override next()1114 public String next() { 1115 String result = peek; 1116 fillPeek(); 1117 return result; 1118 } 1119 fillPeek()1120 private void fillPeek() { 1121 peek = null; 1122 while (source.hasNext()) { 1123 peek = source.next(); 1124 // if it is ok to assess, then break 1125 if (!SUPPRESS_PATHS_AFTER_SUBMISSION.contains(peek) 1126 && SUPPRESS_PATHS_CAN_BE_EMPTY.get(peek) != Boolean.TRUE) { 1127 break; 1128 } 1129 peek = null; 1130 } 1131 } 1132 } 1133 } 1134 1135 private static final CoverageInfo coverageInfo = new CoverageInfo(SUPPLEMENTAL_DATA_INFO); 1136 spreadsheetLine( String locale, String language, String script, String specialFlag, String nativeValue, Level cldrLocaleLevelGoal, Level itemLevel, String status, String path, CLDRFile resolvedFile, Multimap<String, String> pathToLocale)1137 private static String spreadsheetLine( 1138 String locale, 1139 String language, 1140 String script, 1141 String specialFlag, 1142 String nativeValue, 1143 Level cldrLocaleLevelGoal, 1144 Level itemLevel, 1145 String status, 1146 String path, 1147 CLDRFile resolvedFile, 1148 Multimap<String, String> pathToLocale) { 1149 if (pathToLocale != null) { 1150 pathToLocale.put(path, locale); 1151 } 1152 // String stLink = "n/a"; 1153 // String englishValue = "n/a"; 1154 // StatusAction action = null; 1155 // String icuValue = getIcuValue(locale); 1156 1157 SurveyToolStatus surveyToolStatus = null; 1158 String bailey = resolvedFile == null ? "" : resolvedFile.getStringValue(path); 1159 1160 String phString = "na\tn/a\tn/a\t" + path; 1161 try { 1162 PathHeader ph = pathHeaderFactory.fromPath(path); 1163 phString = ph.toString(); 1164 // stLink = URLS.forXpath(locale, path); 1165 // englishValue = ENGLISH.getStringValue(path); 1166 // action = Phase.SUBMISSION.getShowRowAction(dummyPathValueInfo, 1167 // InputMethod.DIRECT, ph, dummyUserInfo); 1168 } catch (Exception e) { 1169 1170 } 1171 1172 String line = 1173 specialFlag 1174 + language 1175 + "\t" 1176 + ENGLISH.getName(language) 1177 + "\t" 1178 + ENGLISH.getName("script", script) 1179 + "\t" 1180 + cldrLocaleLevelGoal 1181 + "\t" 1182 + itemLevel 1183 + "\t" 1184 + (surveyToolStatus == null ? "n/a" : surveyToolStatus.toString()) 1185 + "\t" 1186 + bailey 1187 + "\t" 1188 + phString 1189 + "\t" 1190 + PathHeader.getUrlForLocalePath(locale, path); 1191 return line; 1192 } 1193 getIcuValue(String locale)1194 private static String getIcuValue(String locale) { 1195 return ICU_Locales.contains(new ULocale(locale)) ? "ICU" : ""; 1196 } 1197 1198 private static final Set<ULocale> ICU_Locales = 1199 ImmutableSet.copyOf(ULocale.getAvailableLocales()); 1200 } 1201