xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowLocaleCoverage.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.base.Splitter;
5 import com.google.common.collect.Comparators;
6 import com.google.common.collect.ImmutableList;
7 import com.google.common.collect.ImmutableMultimap;
8 import com.google.common.collect.ImmutableSet;
9 import com.google.common.collect.Lists;
10 import com.google.common.collect.Multimap;
11 import com.google.common.collect.TreeMultimap;
12 import com.ibm.icu.impl.Relation;
13 import com.ibm.icu.text.NumberFormat;
14 import com.ibm.icu.util.ULocale;
15 import java.io.File;
16 import java.io.IOException;
17 import java.io.PrintWriter;
18 import java.util.ArrayList;
19 import java.util.Arrays;
20 import java.util.Collection;
21 import java.util.Collections;
22 import java.util.Comparator;
23 import java.util.EnumMap;
24 import java.util.EnumSet;
25 import java.util.Iterator;
26 import java.util.List;
27 import java.util.Locale;
28 import java.util.Map;
29 import java.util.Map.Entry;
30 import java.util.Set;
31 import java.util.TreeMap;
32 import java.util.TreeSet;
33 import java.util.regex.Matcher;
34 import java.util.stream.Collectors;
35 import org.unicode.cldr.draft.FileUtilities;
36 import org.unicode.cldr.tool.FormattedFileWriter.Anchors;
37 import org.unicode.cldr.tool.Option.Options;
38 import org.unicode.cldr.util.CLDRConfig;
39 import org.unicode.cldr.util.CLDRFile;
40 import org.unicode.cldr.util.CLDRFile.DraftStatus;
41 import org.unicode.cldr.util.CLDRLocale;
42 import org.unicode.cldr.util.CLDRPaths;
43 import org.unicode.cldr.util.CldrUtility;
44 import org.unicode.cldr.util.CoreCoverageInfo;
45 import org.unicode.cldr.util.CoreCoverageInfo.CoreItems;
46 import org.unicode.cldr.util.Counter;
47 import org.unicode.cldr.util.CoverageInfo;
48 import org.unicode.cldr.util.DtdType;
49 import org.unicode.cldr.util.LanguageTagCanonicalizer;
50 import org.unicode.cldr.util.LanguageTagParser;
51 import org.unicode.cldr.util.Level;
52 import org.unicode.cldr.util.LocaleNames;
53 import org.unicode.cldr.util.Organization;
54 import org.unicode.cldr.util.PathHeader;
55 import org.unicode.cldr.util.PathHeader.Factory;
56 import org.unicode.cldr.util.PathHeader.SurveyToolStatus;
57 import org.unicode.cldr.util.PathStarrer;
58 import org.unicode.cldr.util.PatternCache;
59 import org.unicode.cldr.util.RegexLookup;
60 import org.unicode.cldr.util.SimpleFactory;
61 import org.unicode.cldr.util.StandardCodes;
62 import org.unicode.cldr.util.SupplementalDataInfo;
63 import org.unicode.cldr.util.TempPrintWriter;
64 import org.unicode.cldr.util.VettingViewer;
65 import org.unicode.cldr.util.VettingViewer.MissingStatus;
66 
67 public class ShowLocaleCoverage {
68 
69     private static final String TSV_BASE =
70             "https://github.com/unicode-org/cldr-staging/blob/main/docs/charts/"
71                     + ToolConstants.CHART_VI.getVersionString(1, 2)
72                     + "/tsv/";
73     public static final Splitter LF_SPLITTER = Splitter.on('\n');
74 
75     // thresholds for measuring Level attainment
76     private static final double BASIC_THRESHOLD = 1;
77     private static final double MODERATE_THRESHOLD = 0.995;
78     private static final double MODERN_THRESHOLD = 0.995;
79 
80     private static final CLDRConfig CONFIG = CLDRConfig.getInstance();
81     private static final String TSV_MISSING_SUMMARY_HEADER =
82             "#Path Level"
83                     + "\t#Locales"
84                     + "\tLocales"
85                     + "\tSection"
86                     + "\tPage"
87                     + "\tHeader"
88                     + "\tCode";
89 
90     private static final String TSV_MISSING_HEADER =
91             "#LCode"
92                     + "\tEnglish Name"
93                     + "\tScript"
94                     + "\tLocale Level"
95                     + "\tPath Level"
96                     + "\tSTStatus"
97                     + "\tBailey"
98                     + "\tSection"
99                     + "\tPage"
100                     + "\tHeader"
101                     + "\tCode"
102                     + "\tST Link";
103 
104     private static final String PROPERTIES_HEADER =
105             "# coverageLevels.txt\n"
106                     + "# Copyright © 2023 Unicode, Inc.\n"
107                     + "# CLDR data files are interpreted according to the\n"
108                     + "# LDML specification: http://unicode.org/reports/tr35/\n"
109                     + "# For terms of use, see http://www.unicode.org/copyright.html\n"
110                     + "#\n"
111                     + "# For format and usage information, see:\n"
112                     + "# https://cldr.unicode.org/index/cldr-spec/coverage-levels.\n"
113                     + "\n";
114     private static final String TSV_MISSING_BASIC_HEADER =
115             "#Locale\tProv.\tUnconf.\tMissing\tPath*\tAttributes";
116     private static final String TSV_MISSING_COUNTS_HEADER =
117             "#Locale\tTargetLevel\t№ Found\t№ Unconfirmed\t№ Missing";
118 
119     private static final boolean DEBUG = true;
120     private static final char DEBUG_FILTER =
121             0; // use letter to only load locales starting with that letter
122 
123     private static final String LATEST = ToolConstants.CHART_VERSION;
124     private static CLDRConfig testInfo = ToolConfig.getToolInstance();
125     private static final StandardCodes SC = StandardCodes.make();
126     private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO =
127             testInfo.getSupplementalDataInfo();
128     private static final StandardCodes STANDARD_CODES = SC;
129 
130     private static org.unicode.cldr.util.Factory factory =
131             testInfo.getCommonAndSeedAndMainAndAnnotationsFactory();
132     private static final CLDRFile ENGLISH = factory.make("en", true);
133 
134     static final Options myOptions = new Options();
135 
136     enum MyOptions {
137         filter(".+", ".*", "Filter the information based on id, using a regex argument."),
138         //        draftStatus(".+", "unconfirmed", "Filter the information to a minimum draft
139         // status."),
140         chart(null, null, "chart only"),
141         organization(".+", null, "Only locales for organization"),
142         version(".+", LATEST, "To get different versions"),
143         rawData(null, null, "Output the raw data from all coverage levels"),
144         targetDir(".*", CLDRPaths.GEN_DIRECTORY + "/statistics/", "target output file."),
145         directories(
146                 "(.*:)?[a-z]+(,[a-z]+)*",
147                 "common",
148                 "Space-delimited list of main source directories: common,seed,exemplar.\n"
149                         + "Optional, <baseDir>:common,seed"),
150         ;
151 
152         // targetDirectory(".+", CldrUtility.CHART_DIRECTORY + "keyboards/", "The target
153         // directory."),
154         // layouts(null, null, "Only create html files for keyboard layouts"),
155         // repertoire(null, null, "Only create html files for repertoire"), ;
156         // boilerplate
157         final Option option;
158 
MyOptions(String argumentPattern, String defaultArgument, String helpText)159         MyOptions(String argumentPattern, String defaultArgument, String helpText) {
160             option = myOptions.add(this, argumentPattern, defaultArgument, helpText);
161         }
162     }
163 
164     private static final RegexLookup<Boolean> SUPPRESS_PATHS_CAN_BE_EMPTY =
165             new RegexLookup<Boolean>()
166                     .add("\\[@alt=\"accounting\"]", true)
167                     .add("\\[@alt=\"variant\"]", true)
168                     .add("^//ldml/localeDisplayNames/territories/territory.*@alt=\"short", true)
169                     .add("^//ldml/localeDisplayNames/languages/language.*_", true)
170                     .add("^//ldml/numbers/currencies/currency.*/symbol", true)
171                     .add("^//ldml/characters/exemplarCharacters", true);
172 
173     private static DraftStatus minimumDraftStatus = DraftStatus.unconfirmed;
174     private static final Factory pathHeaderFactory = PathHeader.getFactory(ENGLISH);
175 
176     private static Set<String> COMMON_LOCALES;
177 
178     public static class StatusData {
179         int missing;
180         int provisional;
181         int unconfirmed;
182         Set<List<String>> values =
183                 new TreeSet<>(Comparators.lexicographical(Comparator.<String>naturalOrder()));
184     }
185 
186     public static class StatusCounter {
187         private static final Set<String> ATTRS_TO_REMOVE = Set.of("standard");
188         PathStarrer pathStarrer = new PathStarrer().setSubstitutionPattern("*");
189         Map<String, StatusData> starredPathToData = new TreeMap<>();
190         int missingTotal;
191         int provisionalTotal;
192         int unconfirmedTotal;
193 
gatherStarred(String path, DraftStatus draftStatus)194         public void gatherStarred(String path, DraftStatus draftStatus) {
195             String starredPath = pathStarrer.set(path);
196             StatusData statusData = starredPathToData.get(starredPath);
197             if (statusData == null) {
198                 starredPathToData.put(starredPath, statusData = new StatusData());
199             }
200             if (draftStatus == null) {
201                 ++statusData.missing;
202                 ++missingTotal;
203             } else {
204                 switch (draftStatus) {
205                     case unconfirmed:
206                         ++statusData.unconfirmed;
207                         ++unconfirmedTotal;
208                         break;
209                     case provisional:
210                         ++statusData.provisional;
211                         ++provisionalTotal;
212                         break;
213                     default:
214                         break;
215                 }
216             }
217             final List<String> attributes =
218                     CldrUtility.removeAll(
219                             new ArrayList<>(pathStarrer.getAttributes()), ATTRS_TO_REMOVE);
220             if (!attributes.isEmpty()) {
221                 statusData.values.add(attributes);
222             }
223         }
224     }
225 
main(String[] args)226     public static void main(String[] args) throws IOException {
227         myOptions.parse(MyOptions.filter, args, true);
228 
229         Matcher matcher = PatternCache.get(MyOptions.filter.option.getValue()).matcher("");
230 
231         if (MyOptions.chart.option.doesOccur()) {
232             showCoverage(null, matcher);
233             return;
234         }
235 
236         Set<String> locales = null;
237         String organization = MyOptions.organization.option.getValue();
238         boolean useOrgLevel = MyOptions.organization.option.doesOccur();
239         if (useOrgLevel) {
240             locales = STANDARD_CODES.getLocaleCoverageLocales(organization);
241         }
242 
243         if (MyOptions.version.option.doesOccur()) {
244             String number = MyOptions.version.option.getValue().trim();
245             if (!number.contains(".")) {
246                 number += ".0";
247             }
248             factory =
249                     org.unicode.cldr.util.Factory.make(
250                             CLDRPaths.ARCHIVE_DIRECTORY + "cldr-" + number + "/common/main/", ".*");
251         } else {
252             if (MyOptions.directories.option.doesOccur()) {
253                 String directories = MyOptions.directories.option.getValue().trim();
254                 CLDRConfig cldrConfig = CONFIG;
255                 String base = null;
256                 int colonPos = directories.indexOf(':');
257                 if (colonPos >= 0) {
258                     base = directories.substring(0, colonPos).trim();
259                     directories = directories.substring(colonPos + 1).trim();
260                 } else {
261                     base = cldrConfig.getCldrBaseDirectory().toString();
262                 }
263                 String[] items = directories.split(",\\s*");
264                 File[] fullDirectories = new File[items.length];
265                 int i = 0;
266                 for (String item : items) {
267                     fullDirectories[i++] = new File(base + "/" + item + "/main");
268                 }
269                 factory = SimpleFactory.make(fullDirectories, ".*");
270                 COMMON_LOCALES =
271                         SimpleFactory.make(base + "/" + "common" + "/main", ".*")
272                                 .getAvailableLanguages();
273             }
274         }
275         fixCommonLocales();
276 
277         showCoverage(null, matcher, locales, useOrgLevel);
278     }
279 
fixCommonLocales()280     private static void fixCommonLocales() {
281         if (COMMON_LOCALES == null) {
282             COMMON_LOCALES = factory.getAvailableLanguages();
283         }
284     }
285 
286     public static class FoundAndTotal {
287         final int found;
288         final int total;
289 
290         @SafeVarargs
FoundAndTotal(Counter<Level>.... counters)291         public FoundAndTotal(Counter<Level>... counters) {
292             final int[] count = {0, 0, 0};
293             for (Level level : Level.values()) {
294                 if (level == Level.COMPREHENSIVE) {
295                     continue;
296                 }
297                 int i = 0;
298                 for (Counter<Level> counter : counters) {
299                     count[i++] += counter.get(level);
300                 }
301             }
302             found = count[0];
303             total = found + count[1] + count[2];
304         }
305 
306         @Override
toString()307         public String toString() {
308             return found + "/" + total;
309         }
310     }
311 
showCoverage(Anchors anchors, Matcher matcher)312     static void showCoverage(Anchors anchors, Matcher matcher) throws IOException {
313         showCoverage(anchors, matcher, null, false);
314     }
315 
showCoverage( Anchors anchors, Matcher matcher, Set<String> locales, boolean useOrgLevel)316     private static void showCoverage(
317             Anchors anchors, Matcher matcher, Set<String> locales, boolean useOrgLevel)
318             throws IOException {
319         final String title = "Locale Coverage";
320         try (PrintWriter pw = new PrintWriter(new FormattedFileWriter(null, title, null, anchors));
321                 PrintWriter tsv_summary =
322                         FileUtilities.openUTF8Writer(
323                                 CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-coverage.tsv");
324                 PrintWriter tsv_missing =
325                         FileUtilities.openUTF8Writer(
326                                 CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing.tsv");
327                 PrintWriter tsv_missing_summary =
328                         FileUtilities.openUTF8Writer(
329                                 CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-summary.tsv");
330                 PrintWriter tsv_missing_basic =
331                         FileUtilities.openUTF8Writer(
332                                 CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-basic.tsv");
333                 PrintWriter tsv_missing_counts =
334                         FileUtilities.openUTF8Writer(
335                                 CLDRPaths.CHART_DIRECTORY + "tsv/", "locale-missing-counts.tsv");
336                 TempPrintWriter propertiesCoverage =
337                         TempPrintWriter.openUTF8Writer(
338                                 CLDRPaths.COMMON_DIRECTORY + "properties/",
339                                 "coverageLevels.txt"); ) {
340             tsv_missing_summary.println(TSV_MISSING_SUMMARY_HEADER);
341             tsv_missing.println(TSV_MISSING_HEADER);
342             tsv_missing_basic.println(TSV_MISSING_BASIC_HEADER);
343             tsv_missing_counts.println(TSV_MISSING_COUNTS_HEADER);
344 
345             final int propertiesCoverageTabCount = 2;
346             propertiesCoverage.printlnWithTabs(propertiesCoverageTabCount, PROPERTIES_HEADER);
347 
348             Set<String> checkModernLocales =
349                     STANDARD_CODES.getLocaleCoverageLocales(
350                             Organization.cldr, EnumSet.of(Level.MODERN));
351             Set<String> availableLanguages = new TreeSet<>(factory.getAvailableLanguages());
352             availableLanguages.addAll(checkModernLocales);
353 
354             Multimap<String, String> languageToRegion = TreeMultimap.create();
355             LanguageTagParser ltp = new LanguageTagParser();
356             LanguageTagCanonicalizer ltc = new LanguageTagCanonicalizer(true);
357             for (String locale : factory.getAvailable()) {
358                 String country = ltp.set(locale).getRegion();
359                 if (!country.isEmpty()) {
360                     languageToRegion.put(ltc.transform(ltp.getLanguageScript()), country);
361                 }
362             }
363             languageToRegion = ImmutableMultimap.copyOf(languageToRegion);
364 
365             fixCommonLocales();
366 
367             System.out.println(Joiner.on("\n").join(languageToRegion.asMap().entrySet()));
368 
369             System.out.println("# Checking: " + availableLanguages);
370 
371             NumberFormat percentFormat = NumberFormat.getPercentInstance(Locale.ENGLISH);
372             percentFormat.setMaximumFractionDigits(1);
373 
374             pw.println(
375                     "<p style='text-align: left'>This chart shows the coverage levels in this release. "
376                             + "Totals are listed after the main chart.</p>\n"
377                             + "<blockquote><ul>\n"
378                             + "<li><a href='#main_table'>Main Table</a></li>\n"
379                             + "<li><a href='#level_counts'>Level Counts</a></li>\n"
380                             + "</ul></blockquote>\n"
381                             + "<h3>Column Key</h3>\n"
382                             + "<table class='subtle' style='margin-left:3em; margin-right:3em'>\n"
383                             + "<tr><th>Default Region</th><td>The default region for locale code, based on likely subtags</td></tr>\n"
384                             + "<tr><th>№ Locales</th><td>Note that the coverage of regional locales inherits from their parents.</td></tr>\n"
385                             + "<tr><th>Target Level</th><td>The default target Coverage Level in CLDR. "
386                             + "Particular organizations may have different target levels. "
387                             + "Languages with high levels of coverage are marked with ‡, even though they are not tracked by the technical committee.</td></tr>\n"
388                             + "<tr><th>≟</th><td>Indicates whether the CLDR Target is less than, equal to, or greater than the Computed Level.</td></tr>\n"
389                             + "<tr><th>Computed Level</th><td>Computed from the percentage values, "
390                             + "taking the first level that meets a threshold (currently �� "
391                             + percentFormat.format(MODERN_THRESHOLD)
392                             + ", ⓜ "
393                             + percentFormat.format(MODERATE_THRESHOLD)
394                             + ", ⓑ "
395                             + percentFormat.format(BASIC_THRESHOLD)
396                             + ").</td></tr>\n"
397                             + "<tr><th>ICU</th><td>Indicates whether included in the current version of ICU</td></tr>\n"
398                             + "<tr><th>Confirmed</th><td>Confirmed items as a percentage of all supplied items. "
399                             + "If low, the coverage can be improved by getting multiple organizations to confirm.</td></tr>\n"
400                             + "<tr><th>��%, ⓜ%, ⓑ%, ⓒ%</th><td>Coverage at Levels: �� = Modern, ⓜ = Moderate, ⓑ = Basic, ⓒ = Core. "
401                             + "The percentage of items at that level and below is computed from <i>confirmed_items/total_items</i>. "
402                             + "A high-level summary of the meaning of the coverage values is at "
403                             + "<a target='_blank' href='http://www.unicode.org/reports/tr35/tr35-info.html#Coverage_Levels'>Coverage Levels</a>. "
404                             + "The Core values are described on <a target='_blank' href='https://cldr.unicode.org/index/cldr-spec/core-data-for-new-locales'>Core Data</a>. "
405                             + "</td></tr>\n"
406                             + "<tr><th>Missing Features</th><td>These are not single items, but rather specific features, such as plural rules or unit grammar info. "
407                             + "They are listed if missing at the computed level. For more information, see <a href='https://cldr.unicode.org/index/locale-coverage'>Missing Features</a><br>"
408                             + "Example: <i>ⓜ collation</i> means this feature should be supported at a Moderate level.<br>"
409                             + "<ul><li>"
410                             + "<i>Except for Core, these are not accounted for in the percent values.</i>"
411                             + "</li><li>"
412                             + "The information needs to be provided in tickets, not through the Survey Tool."
413                             + "</li></ul>"
414                             + "</td></tr>\n"
415                             + "<tr><th>"
416                             + linkTsv("", "TSVFiles")
417                             + ":</th><td>\n"
418                             + "<ul><li>"
419                             + linkTsv("locale-coverage.tsv")
420                             + " — A version of this file, suitable for loading into a spreadsheet.</li>\n"
421                             + "<li>"
422                             + linkTsv("locale-missing.tsv")
423                             + " — Missing items for the CLDR target locales.</li>\n"
424                             + "<li>"
425                             + linkTsv("locale-missing-summary.tsv")
426                             + " — Summary of missing items for the CLDR target locales, by Section/Page/Header.</li>\n"
427                             + "<li>"
428                             + linkTsv("locale-missing-basic.tsv")
429                             + " — Missing items that keep locales from reaching the Basic level.</li>\n"
430                             + "<li>"
431                             + linkTsv("locale-missing-counts.tsv")
432                             + " — Counts of items per locale that are found, unconfirmed, or missing, at the target level. "
433                             + "(Or at *basic, if there is no target level.)</li>\n"
434                             + "</td></tr>\n"
435                             + "</table>\n");
436 
437             Relation<MissingStatus, String> missingPaths =
438                     Relation.of(
439                             new EnumMap<MissingStatus, Set<String>>(MissingStatus.class),
440                             TreeSet.class,
441                             CLDRFile.getComparator(DtdType.ldml));
442             Set<String> unconfirmed = new TreeSet<>(CLDRFile.getComparator(DtdType.ldml));
443 
444             Set<String> defaultContents = SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales();
445 
446             Counter<Level> foundCounter = new Counter<>();
447             Counter<Level> unconfirmedCounter = new Counter<>();
448             Counter<Level> missingCounter = new Counter<>();
449 
450             List<Level> levelsToShow = new ArrayList<>(EnumSet.allOf(Level.class));
451             levelsToShow.remove(Level.COMPREHENSIVE);
452             levelsToShow.remove(Level.UNDETERMINED);
453             levelsToShow = ImmutableList.copyOf(levelsToShow);
454             List<Level> reversedLevels = new ArrayList<>(levelsToShow);
455             Collections.reverse(reversedLevels);
456             reversedLevels = ImmutableList.copyOf(reversedLevels);
457 
458             int localeCount = 0;
459 
460             final TablePrinter tablePrinter =
461                     new TablePrinter()
462                             .addColumn(
463                                     "Language",
464                                     "class='source'",
465                                     CldrUtility.getDoubleLinkMsg(),
466                                     "class='source'",
467                                     true)
468                             .setBreakSpans(true)
469                             .addColumn(
470                                     "English Name", "class='source'", null, "class='source'", true)
471                             .setBreakSpans(true)
472                             .addColumn(
473                                     "Native Name", "class='source'", null, "class='source'", true)
474                             .setBreakSpans(true)
475                             .addColumn("Script", "class='source'", null, "class='source'", true)
476                             .setBreakSpans(true)
477                             .addColumn(
478                                     "Default Region",
479                                     "class='source'",
480                                     null,
481                                     "class='source'",
482                                     true)
483                             .setBreakSpans(true)
484                             .addColumn(
485                                     "№ Locales",
486                                     "class='source'",
487                                     null,
488                                     "class='targetRight'",
489                                     true)
490                             .setBreakSpans(true)
491                             .setCellPattern("{0,number}")
492                             .addColumn(
493                                     "Target Level", "class='source'", null, "class='source'", true)
494                             .setBreakSpans(true)
495                             .addColumn("≟", "class='target'", null, "class='target'", true)
496                             .setBreakSpans(true)
497                             .setSortPriority(1)
498                             .setSortAscending(false)
499                             .addColumn(
500                                     "Computed Level",
501                                     "class='target'",
502                                     null,
503                                     "class='target'",
504                                     true)
505                             .setBreakSpans(true)
506                             .setSortPriority(0)
507                             .setSortAscending(false)
508                             .addColumn("ICU", "class='target'", null, "class='target'", true)
509                             .setBreakSpans(true)
510                             .addColumn(
511                                     "Confirmed",
512                                     "class='target'",
513                                     null,
514                                     "class='targetRight' style='color:gray'",
515                                     true)
516                             .setBreakSpans(true)
517                             .setCellPattern("{0,number,0.0%}");
518 
519             NumberFormat tsvPercent = NumberFormat.getPercentInstance(Locale.ENGLISH);
520             tsvPercent.setMaximumFractionDigits(2);
521 
522             for (Level level : reversedLevels) {
523                 String titleLevel = level.getAbbreviation() + "%";
524                 tablePrinter
525                         .addColumn(titleLevel, "class='target'", null, "class='targetRight'", true)
526                         .setCellPattern("{0,number,0.0%}")
527                         .setBreakSpans(true);
528 
529                 switch (level) {
530                     default:
531                         tablePrinter.setSortPriority(2).setSortAscending(false);
532                         break;
533                     case BASIC:
534                         tablePrinter.setSortPriority(3).setSortAscending(false);
535                         break;
536                     case MODERATE:
537                         tablePrinter.setSortPriority(4).setSortAscending(false);
538                         break;
539                     case MODERN:
540                         tablePrinter.setSortPriority(5).setSortAscending(false);
541                         break;
542                 }
543             }
544             tablePrinter
545                     .addColumn("Missing Features", "class='target'", null, "class='target'", true)
546                     .setBreakSpans(true);
547 
548             long start = System.currentTimeMillis();
549             LikelySubtags likelySubtags = new LikelySubtags();
550 
551             EnumMap<Level, Double> targetLevel = new EnumMap<>(Level.class);
552             targetLevel.put(Level.CORE, 2 / 100d);
553             targetLevel.put(Level.BASIC, 16 / 100d);
554             targetLevel.put(Level.MODERATE, 33 / 100d);
555             targetLevel.put(Level.MODERN, 100 / 100d);
556 
557             Multimap<String, String> pathToLocale = TreeMultimap.create();
558 
559             Counter<Level> computedLevels = new Counter<>();
560             Counter<Level> computedSublocaleLevels = new Counter<>();
561 
562             for (String locale : availableLanguages) {
563                 try {
564                     if (locale.contains("supplemental") // for old versionsl
565                     //                        || locale.startsWith("sr_Latn")
566                     ) {
567                         continue;
568                     }
569                     if (locales != null && !locales.contains(locale)) {
570                         String base = CLDRLocale.getInstance(locale).getLanguage();
571                         if (!locales.contains(base)) {
572                             continue;
573                         }
574                     }
575                     if (matcher != null && !matcher.reset(locale).matches()) {
576                         continue;
577                     }
578                     if (defaultContents.contains(locale)
579                             || LocaleNames.ROOT.equals(locale)
580                             || LocaleNames.UND.equals(locale)) {
581                         continue;
582                     }
583 
584                     tsv_missing_summary.flush();
585                     tsv_missing.flush();
586                     tsv_missing_basic.flush();
587                     tsv_missing_counts.flush();
588 
589                     boolean isSeed = new File(CLDRPaths.SEED_DIRECTORY, locale + ".xml").exists();
590 
591                     String region = ltp.set(locale).getRegion();
592                     if (!region.isEmpty()) continue; // skip regions
593 
594                     final Level cldrLocaleLevelGoal =
595                             SC.getLocaleCoverageLevel(Organization.cldr, locale);
596                     final String specialFlag = getSpecialFlag(locale);
597 
598                     final boolean cldrLevelGoalBasicToModern =
599                             Level.CORE_TO_MODERN.contains(cldrLocaleLevelGoal);
600 
601                     String max = likelySubtags.maximize(locale);
602                     final String script = ltp.set(max).getScript();
603                     final String defRegion = ltp.getRegion();
604 
605                     final String language = likelySubtags.minimize(locale);
606 
607                     missingPaths.clear();
608                     unconfirmed.clear();
609 
610                     final CLDRFile file = factory.make(locale, true, minimumDraftStatus);
611 
612                     if (locale.equals("af")) {
613                         int debug = 0;
614                     }
615 
616                     Iterable<String> pathSource = new IterableFilter(file.fullIterable());
617 
618                     VettingViewer.getStatus(
619                             pathSource,
620                             file,
621                             pathHeaderFactory,
622                             foundCounter,
623                             unconfirmedCounter,
624                             missingCounter,
625                             missingPaths,
626                             unconfirmed);
627 
628                     {
629                         long found = 0;
630                         long unconfirmedc = 0;
631                         long missing = 0;
632                         Level adjustedGoal =
633                                 cldrLocaleLevelGoal.compareTo(Level.BASIC) < 0
634                                         ? Level.BASIC
635                                         : cldrLocaleLevelGoal;
636                         for (Level level : Level.values()) {
637                             if (level.compareTo(adjustedGoal) <= 0) {
638                                 found += foundCounter.get(level);
639                                 unconfirmedc += unconfirmedCounter.get(level);
640                                 missing += missingCounter.get(level);
641                             }
642                         }
643                         String goalFlag = cldrLocaleLevelGoal == adjustedGoal ? "" : "*";
644                         tsv_missing_counts.println(
645                                 specialFlag
646                                         + locale
647                                         + "\t"
648                                         + goalFlag
649                                         + adjustedGoal
650                                         + "\t"
651                                         + found
652                                         + "\t"
653                                         + unconfirmedc
654                                         + "\t"
655                                         + missing);
656                     }
657 
658                     Collection<String> sublocales = languageToRegion.asMap().get(language);
659                     if (sublocales == null) {
660                         sublocales = Collections.emptySet();
661                     }
662                     sublocales = ImmutableSet.copyOf(sublocales);
663 
664                     // get the totals
665 
666                     EnumMap<Level, Integer> totals = new EnumMap<>(Level.class);
667                     EnumMap<Level, Integer> confirmed = new EnumMap<>(Level.class);
668                     Set<CoreItems> specialMissingPaths = EnumSet.noneOf(CoreItems.class);
669 
670                     StatusCounter starredCounter = new StatusCounter();
671 
672                     {
673                         Multimap<CoreItems, String> detailedErrors = TreeMultimap.create();
674                         Set<CoreItems> coverage =
675                                 CoreCoverageInfo.getCoreCoverageInfo(file, detailedErrors);
676                         for (CoreItems item : coverage) {
677                             foundCounter.add(item.desiredLevel, 1);
678                         }
679                         for (Entry<CoreItems, String> entry : detailedErrors.entries()) {
680                             CoreItems coreItem = entry.getKey();
681                             String path = entry.getValue();
682                             specialMissingPaths.add(coreItem);
683                             // if goal (eg modern) >= itemLevel, indicate it is missing
684                             if (coreItem.desiredLevel == Level.BASIC) {
685                                 starredCounter.gatherStarred(path, null);
686                             }
687                             missingCounter.add(coreItem.desiredLevel, 1);
688                         }
689                     }
690 
691                     if (cldrLevelGoalBasicToModern) {
692                         Level goalLevel = cldrLocaleLevelGoal;
693                         for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) {
694                             String path = entry.getValue();
695                             String status = entry.getKey().toString();
696                             Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
697                             if (goalLevel.compareTo(foundLevel) >= 0) {
698                                 String line =
699                                         spreadsheetLine(
700                                                 locale,
701                                                 language,
702                                                 script,
703                                                 specialFlag,
704                                                 file.getStringValue(path),
705                                                 goalLevel,
706                                                 foundLevel,
707                                                 status,
708                                                 path,
709                                                 file,
710                                                 pathToLocale);
711                                 String lineToPrint1 = line;
712                                 tsv_missing.println(lineToPrint1);
713                             }
714                         }
715                         for (String path : unconfirmed) {
716                             Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
717                             if (goalLevel.compareTo(foundLevel) >= 0) {
718                                 String line =
719                                         spreadsheetLine(
720                                                 locale,
721                                                 language,
722                                                 script,
723                                                 specialFlag,
724                                                 file.getStringValue(path),
725                                                 goalLevel,
726                                                 foundLevel,
727                                                 "n/a",
728                                                 path,
729                                                 file,
730                                                 pathToLocale);
731                                 tsv_missing.println(line);
732                             }
733                         }
734                     } else {
735                         Level goalLevel = Level.BASIC;
736                         for (Entry<MissingStatus, String> entry : missingPaths.entrySet()) {
737                             String path = entry.getValue();
738                             Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
739                             if (goalLevel.compareTo(foundLevel) >= 0) {
740                                 starredCounter.gatherStarred(path, null);
741                             }
742                         }
743                         for (String path : unconfirmed) {
744                             String fullPath = file.getFullXPath(path);
745                             DraftStatus draftStatus =
746                                     fullPath.contains("unconfirmed")
747                                             ? DraftStatus.unconfirmed
748                                             : DraftStatus.provisional;
749 
750                             Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
751                             if (goalLevel.compareTo(foundLevel) >= 0) {
752                                 starredCounter.gatherStarred(path, draftStatus);
753                             }
754                         }
755                     }
756 
757                     if (!starredCounter.starredPathToData.isEmpty()) {
758                         for (Entry<String, StatusData> starred :
759                                 starredCounter.starredPathToData.entrySet()) {
760                             String starredPath = starred.getKey();
761                             StatusData statusData = starred.getValue();
762                             String valueString =
763                                     statusData.values.stream()
764                                             .map(x -> Joiner.on(", ").join(x))
765                                             .collect(Collectors.joining("; "));
766 
767                             tsv_missing_basic.println(
768                                     specialFlag
769                                             + locale //
770                                             + "\t"
771                                             + statusData.missing //
772                                             + "\t"
773                                             + statusData.provisional //
774                                             + "\t"
775                                             + statusData.unconfirmed //
776                                             + "\t"
777                                             + starredPath.replace("\"*\"", "'*'")
778                                             + "\t"
779                                             + valueString
780                                     //
781                                     );
782                         }
783                         tsv_missing_basic.println(
784                                 specialFlag
785                                         + locale //
786                                         + "\t"
787                                         + starredCounter.missingTotal //
788                                         + "\t"
789                                         + starredCounter.provisionalTotal //
790                                         + "\t"
791                                         + starredCounter.unconfirmedTotal //
792                                         + "\tTotals\t");
793                         tsv_missing_basic.println("\t\t\t\t\t"); // for a proper table in github
794                     }
795 
796                     int sumFound = 0;
797                     int sumMissing = 0;
798                     int sumUnconfirmed = 0;
799 
800                     for (Level level : levelsToShow) {
801                         long foundCount = foundCounter.get(level);
802                         long unconfirmedCount = unconfirmedCounter.get(level);
803                         long missingCount = missingCounter.get(level);
804 
805                         sumFound += foundCount;
806                         sumUnconfirmed += unconfirmedCount;
807                         sumMissing += missingCount;
808 
809                         confirmed.put(level, sumFound);
810                         totals.put(level, sumFound + sumUnconfirmed + sumMissing);
811                     }
812 
813                     // double modernTotal = totals.get(Level.MODERN);
814 
815                     // first get the accumulated values
816                     EnumMap<Level, Integer> accumTotals = new EnumMap<>(Level.class);
817                     EnumMap<Level, Integer> accumConfirmed = new EnumMap<>(Level.class);
818                     int currTotals = 0;
819                     int currConfirmed = 0;
820                     for (Level level : levelsToShow) {
821                         currTotals += totals.get(level);
822                         currConfirmed += confirmed.get(level);
823                         accumConfirmed.put(level, currConfirmed);
824                         accumTotals.put(level, currTotals);
825                     }
826 
827                     // print the totals
828 
829                     Level computed = Level.UNDETERMINED;
830                     Map<Level, Double> levelToProportion = new EnumMap<>(Level.class);
831 
832                     for (Level level : reversedLevels) {
833                         int confirmedCoverage = accumConfirmed.get(level);
834                         double total = accumTotals.get(level);
835 
836                         final double proportion = confirmedCoverage / total;
837                         levelToProportion.put(level, proportion);
838 
839                         if (computed == Level.UNDETERMINED) {
840                             switch (level) {
841                                 case MODERN:
842                                     if (proportion >= MODERN_THRESHOLD) {
843                                         computed = level;
844                                     }
845                                     break;
846                                 case MODERATE:
847                                     if (proportion >= MODERATE_THRESHOLD) {
848                                         computed = level;
849                                     }
850                                     break;
851                                 case BASIC:
852                                     if (proportion >= BASIC_THRESHOLD) {
853                                         computed = level;
854                                     }
855                                     break;
856                                 default:
857                                     break;
858                             }
859                         }
860                     }
861 
862                     Set<CoreItems> shownMissingPaths = EnumSet.noneOf(CoreItems.class);
863                     Level computedWithCore =
864                             computed == Level.UNDETERMINED ? Level.BASIC : computed;
865                     for (CoreItems item : specialMissingPaths) {
866                         if (item.desiredLevel.compareTo(computedWithCore) <= 0) {
867                             shownMissingPaths.add(item);
868                         } else {
869                             int debug = 0;
870                         }
871                     }
872                     computedLevels.add(computed, 1);
873                     computedSublocaleLevels.add(computed, sublocales.size());
874 
875                     final String coreMissingString = Joiner.on(", ").join(shownMissingPaths);
876                     final String visibleLevelComputed =
877                             computed == Level.UNDETERMINED ? "" : computed.toString();
878                     final String visibleLevelGoal =
879                             cldrLocaleLevelGoal == Level.UNDETERMINED
880                                     ? ""
881                                     : specialFlag + cldrLocaleLevelGoal.toString();
882                     final String goalComparedToComputed =
883                             computed == cldrLocaleLevelGoal
884                                     ? " ≡"
885                                     : cldrLocaleLevelGoal.compareTo(computed) < 0 ? " <" : " >";
886 
887                     tablePrinter
888                             .addRow()
889                             .addCell(language)
890                             .addCell(ENGLISH.getName(language))
891                             .addCell(file.getName(language))
892                             .addCell(script)
893                             .addCell(defRegion)
894                             .addCell(sublocales.size())
895                             .addCell(visibleLevelGoal)
896                             .addCell(goalComparedToComputed)
897                             .addCell(visibleLevelComputed)
898                             .addCell(getIcuValue(language))
899                             .addCell(sumFound / (double) (sumFound + sumUnconfirmed));
900 
901                     // print the totals
902                     for (Level level : reversedLevels) {
903                         tablePrinter.addCell(levelToProportion.get(level));
904                     }
905 
906                     tablePrinter.addCell(coreMissingString).finishRow();
907 
908                     // now write properties file line
909 
910                     if (computed != Level.UNDETERMINED) {
911                         propertiesCoverage.printlnWithTabs(
912                                 propertiesCoverageTabCount,
913                                 locale
914                                         + " ;\t"
915                                         + visibleLevelComputed
916                                         + " ;\t"
917                                         + ENGLISH.getName(locale));
918                         // TODO decide whether to restore this
919                         //                        Level higher = Level.UNDETERMINED;
920                         //                        switch (computed) {
921                         //                        default:
922                         //                            higher = Level.UNDETERMINED;
923                         //                            break;
924                         //                        case MODERATE:
925                         //                            higher = Level.MODERN;
926                         //                            break;
927                         //                        case BASIC:
928                         //                            higher = Level.MODERATE;
929                         //                            break;
930                         //                        }
931                         //                        double higherProportion = higher ==
932                         // Level.UNDETERMINED ? 0d : levelToProportion.get(higher);
933                         //
934                         //                        if (higherProportion >= THRESHOLD_HIGHER) {
935                         //                            propertiesCoverage.println(
936                         //                                " ;\t" +
937                         // tsvPercent.format(higherProportion) +
938                         //                                " ;\t" + higher
939                         //                                );
940                         //                        } else {
941                         //                            propertiesCoverage.println(" ;\t" + "" + "
942                         // ;\t" + "");
943                         //                        }
944                     }
945                     localeCount++;
946                 } catch (Exception e) {
947                     throw new IllegalArgumentException(e);
948                 }
949             }
950             String lineToPrint = "\n#EOF";
951             propertiesCoverage.printlnWithTabs(propertiesCoverageTabCount, lineToPrint);
952 
953             pw.println("<h3><a name='main_table' href='#main_table'>Main Table</a></h3>");
954             pw.println(tablePrinter.toTable());
955 
956             pw.println(
957                     "<h3><a name='level_counts' href='#level_counts'>Level Counts</a></h3>\n"
958                             + "<table class='subtle'><tr>\n"
959                             + "<th style='text-align:left'>"
960                             + "Level"
961                             + "</th>"
962                             + "<th style='text-align:left'>"
963                             + "Languages"
964                             + "</th>"
965                             + "<th style='text-align:left'>"
966                             + "Locales"
967                             + "</th>"
968                             + "</tr>");
969             long totalCount = 0;
970             long totalLocaleCount = 0;
971             for (Level level : Lists.reverse(Arrays.asList(Level.values()))) {
972                 final long count = computedLevels.get(level);
973                 final long localesCount = computedSublocaleLevels.get(level);
974                 if (count == 0 || level == Level.UNDETERMINED) {
975                     continue;
976                 }
977                 totalCount += count;
978                 totalLocaleCount += localesCount;
979                 String visibleImputed =
980                         level == Level.UNDETERMINED
981                                 ? "<" + Level.BASIC.toString()
982                                 : level.toString();
983                 pw.println(
984                         "<tr>"
985                                 + "<th style='text-align:left'>"
986                                 + visibleImputed
987                                 + "</th>"
988                                 + "<td style='text-align:right'>"
989                                 + count
990                                 + "</td>"
991                                 + "<td style='text-align:right'>"
992                                 + localesCount
993                                 + "</td>"
994                                 + "</tr>");
995             }
996             pw.println(
997                     "<tr>"
998                             + "<th style='text-align:left'>"
999                             + "Total"
1000                             + "</th>"
1001                             + "<td style='text-align:right'>"
1002                             + totalCount
1003                             + "</td>"
1004                             + "<td style='text-align:right'>"
1005                             + totalLocaleCount
1006                             + "</td>"
1007                             + "</tr>\n");
1008 
1009             pw.println(
1010                     "<tr>"
1011                             + "<th style='text-align:left'>"
1012                             + "in dev."
1013                             + "</th>"
1014                             + "<td style='text-align:right'>"
1015                             + computedLevels.get(Level.UNDETERMINED)
1016                             + "</td>"
1017                             + "<td style='text-align:right'>"
1018                             + computedSublocaleLevels.get(Level.UNDETERMINED)
1019                             + "</td>"
1020                             + "</tr>\n"
1021                             + "</table>");
1022 
1023             Multimap<Level, String> levelToLocales = TreeMultimap.create();
1024 
1025             for (Entry<String, Collection<String>> entry : pathToLocale.asMap().entrySet()) {
1026                 String path = entry.getKey();
1027                 Collection<String> localeSet = entry.getValue();
1028                 levelToLocales.clear();
1029                 for (String locale : localeSet) {
1030                     Level foundLevel = coverageInfo.getCoverageLevel(path, locale);
1031                     levelToLocales.put(foundLevel, locale);
1032                 }
1033                 String phString = "n/a\tn/a\tn/a\tn/a";
1034                 try {
1035                     PathHeader ph = pathHeaderFactory.fromPath(path);
1036                     phString = ph.toString();
1037                 } catch (Exception e) {
1038                 }
1039                 for (Entry<Level, Collection<String>> entry2 : levelToLocales.asMap().entrySet()) {
1040                     Level level = entry2.getKey();
1041                     localeSet = entry2.getValue();
1042                     tsv_missing_summary.println(
1043                             level
1044                                     + "\t"
1045                                     + localeSet.size()
1046                                     + "\t"
1047                                     + Joiner.on(" ")
1048                                             .join(
1049                                                     localeSet.stream()
1050                                                             .map(x -> x + getSpecialFlag(x))
1051                                                             .collect(Collectors.toSet()))
1052                                     + "\t"
1053                                     + phString);
1054                 }
1055             }
1056             tablePrinter.toTsv(tsv_summary);
1057             long end = System.currentTimeMillis();
1058             System.out.println(
1059                     (end - start)
1060                             + " millis = "
1061                             + ((end - start) / localeCount)
1062                             + " millis/locale");
1063             ShowPlurals.appendBlanksForScrolling(pw);
1064         }
1065     }
1066 
linkTsv(String tsvFileName)1067     private static String linkTsv(String tsvFileName) {
1068         return "<a href='" + TSV_BASE + tsvFileName + "' target='cldr-tsv'>" + tsvFileName + "</a>";
1069     }
1070 
linkTsv(String tsvFileName, String anchorText)1071     private static String linkTsv(String tsvFileName, String anchorText) {
1072         return "<a href='" + TSV_BASE + tsvFileName + "' target='cldr-tsv'>" + anchorText + "</a>";
1073     }
1074 
getSpecialFlag(String locale)1075     private static String getSpecialFlag(String locale) {
1076         return SC.getLocaleCoverageLevel(Organization.special, locale) == Level.UNDETERMINED
1077                 ? ""
1078                 : "‡";
1079     }
1080 
1081     private static class IterableFilter implements Iterable<String> {
1082         private Iterable<String> source;
1083 
IterableFilter(Iterable<String> source)1084         IterableFilter(Iterable<String> source) {
1085             this.source = source;
1086         }
1087 
1088         /**
1089          * When some paths are defined after submission, we need to change them to COMPREHENSIVE in
1090          * computing the vetting status.
1091          */
1092         private static final Set<String> SUPPRESS_PATHS_AFTER_SUBMISSION = ImmutableSet.of();
1093 
1094         @Override
iterator()1095         public Iterator<String> iterator() {
1096             return new IteratorFilter(source.iterator());
1097         }
1098 
1099         private static class IteratorFilter implements Iterator<String> {
1100             Iterator<String> source;
1101             String peek;
1102 
IteratorFilter(Iterator<String> source)1103             public IteratorFilter(Iterator<String> source) {
1104                 this.source = source;
1105                 fillPeek();
1106             }
1107 
1108             @Override
hasNext()1109             public boolean hasNext() {
1110                 return peek != null;
1111             }
1112 
1113             @Override
next()1114             public String next() {
1115                 String result = peek;
1116                 fillPeek();
1117                 return result;
1118             }
1119 
fillPeek()1120             private void fillPeek() {
1121                 peek = null;
1122                 while (source.hasNext()) {
1123                     peek = source.next();
1124                     // if it is ok to assess, then break
1125                     if (!SUPPRESS_PATHS_AFTER_SUBMISSION.contains(peek)
1126                             && SUPPRESS_PATHS_CAN_BE_EMPTY.get(peek) != Boolean.TRUE) {
1127                         break;
1128                     }
1129                     peek = null;
1130                 }
1131             }
1132         }
1133     }
1134 
1135     private static final CoverageInfo coverageInfo = new CoverageInfo(SUPPLEMENTAL_DATA_INFO);
1136 
spreadsheetLine( String locale, String language, String script, String specialFlag, String nativeValue, Level cldrLocaleLevelGoal, Level itemLevel, String status, String path, CLDRFile resolvedFile, Multimap<String, String> pathToLocale)1137     private static String spreadsheetLine(
1138             String locale,
1139             String language,
1140             String script,
1141             String specialFlag,
1142             String nativeValue,
1143             Level cldrLocaleLevelGoal,
1144             Level itemLevel,
1145             String status,
1146             String path,
1147             CLDRFile resolvedFile,
1148             Multimap<String, String> pathToLocale) {
1149         if (pathToLocale != null) {
1150             pathToLocale.put(path, locale);
1151         }
1152         //        String stLink = "n/a";
1153         //        String englishValue = "n/a";
1154         //        StatusAction action = null;
1155         //        String icuValue = getIcuValue(locale);
1156 
1157         SurveyToolStatus surveyToolStatus = null;
1158         String bailey = resolvedFile == null ? "" : resolvedFile.getStringValue(path);
1159 
1160         String phString = "na\tn/a\tn/a\t" + path;
1161         try {
1162             PathHeader ph = pathHeaderFactory.fromPath(path);
1163             phString = ph.toString();
1164             //            stLink = URLS.forXpath(locale, path);
1165             //            englishValue = ENGLISH.getStringValue(path);
1166             //            action = Phase.SUBMISSION.getShowRowAction(dummyPathValueInfo,
1167             // InputMethod.DIRECT, ph, dummyUserInfo);
1168         } catch (Exception e) {
1169 
1170         }
1171 
1172         String line =
1173                 specialFlag
1174                         + language
1175                         + "\t"
1176                         + ENGLISH.getName(language)
1177                         + "\t"
1178                         + ENGLISH.getName("script", script)
1179                         + "\t"
1180                         + cldrLocaleLevelGoal
1181                         + "\t"
1182                         + itemLevel
1183                         + "\t"
1184                         + (surveyToolStatus == null ? "n/a" : surveyToolStatus.toString())
1185                         + "\t"
1186                         + bailey
1187                         + "\t"
1188                         + phString
1189                         + "\t"
1190                         + PathHeader.getUrlForLocalePath(locale, path);
1191         return line;
1192     }
1193 
getIcuValue(String locale)1194     private static String getIcuValue(String locale) {
1195         return ICU_Locales.contains(new ULocale(locale)) ? "ICU" : "";
1196     }
1197 
1198     private static final Set<ULocale> ICU_Locales =
1199             ImmutableSet.copyOf(ULocale.getAvailableLocales());
1200 }
1201