xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/util/VerifyCompactNumbers.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.util;
2 
3 import com.ibm.icu.text.CompactDecimalFormat;
4 import com.ibm.icu.text.CompactDecimalFormat.CompactStyle;
5 import com.ibm.icu.text.NumberFormat;
6 import com.ibm.icu.util.Currency;
7 import com.ibm.icu.util.ICUUncheckedIOException;
8 import com.ibm.icu.util.ULocale;
9 import java.io.File;
10 import java.io.IOException;
11 import java.io.PrintWriter;
12 import java.util.Arrays;
13 import java.util.HashSet;
14 import java.util.LinkedHashSet;
15 import java.util.Map;
16 import java.util.Set;
17 import java.util.TreeMap;
18 import java.util.TreeSet;
19 import java.util.regex.Pattern;
20 import org.unicode.cldr.draft.FileUtilities;
21 import org.unicode.cldr.test.BuildIcuCompactDecimalFormat;
22 import org.unicode.cldr.test.BuildIcuCompactDecimalFormat.CurrencyStyle;
23 import org.unicode.cldr.tool.ChartDelta;
24 import org.unicode.cldr.tool.FormattedFileWriter;
25 import org.unicode.cldr.tool.Option;
26 import org.unicode.cldr.tool.Option.Options;
27 import org.unicode.cldr.tool.ShowData;
28 import org.unicode.cldr.tool.ShowPlurals;
29 import org.unicode.cldr.tool.TablePrinter;
30 import org.unicode.cldr.util.CLDRFile.DraftStatus;
31 import org.unicode.cldr.util.PathHeader.PageId;
32 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
33 
34 public class VerifyCompactNumbers {
35 
36     private static final CLDRConfig CLDR_CONFIG = CLDRConfig.getInstance();
37     private static final String DIR = CLDRPaths.VERIFY_DIR + "numbers/";
38 
39     static final Options myOptions = new Options();
40 
41     enum MyOptions {
42         organization(".*", "CLDR", "organization"),
43         filter(".*", ".*", "locale filter (regex)"),
44         currency(".*", "EUR", "show currency"),
45         ;
46         // boilerplate
47         final Option option;
48 
MyOptions(String argumentPattern, String defaultArgument, String helpText)49         MyOptions(String argumentPattern, String defaultArgument, String helpText) {
50             option = myOptions.add(this, argumentPattern, defaultArgument, helpText);
51         }
52     }
53 
54     // later, look at DateTimeFormats to set up as an HTML table
55 
56     public static final Set<String> USES_GROUPS_OF_4 =
57             new HashSet<>(Arrays.asList("ko", "ja", "zh", "zh_Hant"));
58 
59     /**
60      * Produce a set of static tables from the vxml data. Only a stopgap until the above is
61      * integrated into ST.
62      *
63      * @param args
64      * @throws IOException
65      */
main(String[] args)66     public static void main(String[] args) throws IOException {
67         myOptions.parse(MyOptions.organization, args, true);
68         new File(DIR).mkdirs();
69         FileCopier.copy(ShowData.class, "verify-index.html", CLDRPaths.VERIFY_DIR, "index.html");
70         FileCopier.copy(ChartDelta.class, "index.css", CLDRPaths.VERIFY_DIR, "index.css");
71         FormattedFileWriter.copyIncludeHtmls(CLDRPaths.VERIFY_DIR);
72 
73         String organization = MyOptions.organization.option.getValue();
74         String filter = MyOptions.filter.option.getValue();
75         boolean showCurrency = true; // MyOptions.currency.option.doesOccur();
76         String currencyCode = MyOptions.currency.option.getValue();
77 
78         Factory factory2 = Factory.make(CLDRPaths.MAIN_DIRECTORY, filter);
79         CLDRFile englishCldrFile = factory2.make("en", true);
80 
81         SupplementalDataInfo sdi = CLDR_CONFIG.getSupplementalDataInfo();
82         Set<String> defaultContentLocales = sdi.getDefaultContentLocales();
83         NumberFormat enf = NumberFormat.getIntegerInstance(ULocale.ENGLISH);
84         enf.setGroupingUsed(false);
85 
86         Set<String> availableLanguages = new TreeSet<>(factory2.getAvailableLanguages());
87         if (Pattern.matches(filter, "pt_PT")) {
88             availableLanguages.add("pt_PT");
89         }
90 
91         PrintWriter plainText = FileUtilities.openUTF8Writer(DIR, "compactTestFile.txt");
92         DateTimeFormats.writeCss(DIR);
93         final CLDRFile english = CLDR_CONFIG.getEnglish();
94 
95         Map<String, String> indexMap = new TreeMap<>(CLDR_CONFIG.getCollator());
96 
97         for (String locale : availableLanguages) {
98             if (defaultContentLocales.contains(locale)) {
99                 continue;
100             }
101             Level level = StandardCodes.make().getLocaleCoverageLevel(organization, locale);
102             if (Level.MODERN.compareTo(level) > 0) {
103                 continue;
104             }
105             // TODO: fix to ignore locales with no data.
106             if (locale.equals("ne") || locale.equals("cy")) {
107                 continue;
108             }
109 
110             PrintWriter out = FileUtilities.openUTF8Writer(DIR, locale + ".html");
111             String title = "Verify Number Formats: " + englishCldrFile.getName(locale);
112             out.println(
113                     "<!doctype HTML PUBLIC '-//W3C//DTD HTML 4.0 Transitional//EN'><html><head>\n"
114                             + "<meta http-equiv='Content-Type' content='text/html; charset=utf-8'>\n"
115                             + "<title>"
116                             + title
117                             + "</title>\n"
118                             + "<link rel='stylesheet' type='text/css' href='index.css'>\n"
119                             + "</head><body><h1>"
120                             + title
121                             + "</h1>\n"
122                             + "<p><a href='index.html'>Index</a></p>\n");
123 
124             CLDRFile cldrFile = factory2.make(locale, true, DraftStatus.contributed);
125 
126             showNumbers(cldrFile, showCurrency, currencyCode, out, factory2);
127 
128             out.println("</body></html>");
129             out.close();
130             indexMap.put(english.getName(locale), locale + ".html");
131         }
132         try (PrintWriter index = DateTimeFormats.openIndex(DIR, "Numbers")) {
133             DateTimeFormats.writeIndexMap(indexMap, index);
134         }
135 
136         plainText.close();
137     }
138 
showNumbers( CLDRFile cldrFile, boolean showCurrency, String currencyCode, Appendable out, Factory factory)139     public static void showNumbers(
140             CLDRFile cldrFile,
141             boolean showCurrency,
142             String currencyCode,
143             Appendable out,
144             Factory factory) {
145         try {
146             Set<String> debugCreationErrors = new LinkedHashSet<>();
147             Set<String> errors = new LinkedHashSet<>();
148             String locale = cldrFile.getLocaleID();
149 
150             TablePrinter tablePrinter1 =
151                     new TablePrinter()
152                             // .setCaption("Timezone Formats")
153                             .setTableAttributes("class='dtf-table'")
154                             .addColumn("Numeric Format")
155                             .setHeaderCell(true)
156                             .setHeaderAttributes("class='dtf-th'")
157                             .setCellAttributes("class='dtf-s'")
158                             .addColumn("Compact-Short")
159                             .setHeaderAttributes("class='dtf-th'")
160                             .setCellAttributes("class='dtf-s'")
161                             .addColumn("Compact-Long")
162                             .setHeaderAttributes("class='dtf-th'")
163                             .setCellAttributes("class='dtf-s'");
164             if (showCurrency) {
165                 tablePrinter1
166                         .addColumn("Compact-Short<br>+Currency")
167                         .setHeaderAttributes("class='dtf-th'")
168                         .setCellAttributes("class='dtf-s'")
169                 //                    .addColumn("Compact-Short<br>+Unit")
170                 //                    .setHeaderAttributes("class='dtf-th'")
171                 //                    .setCellAttributes("class='dtf-s'")
172                 // .addColumn("Compact-Long<br>+Currency")
173                 // .addColumn("Compact-Long<br>+Currency-Long")
174                 //                    .addColumn("Numeric
175                 // Format").setHeaderCell(true).setHeaderAttributes("class='dtf-th'")
176                 //                      .setCellAttributes("class='dtf-s'")
177                 ;
178             }
179             //
180             // tablePrinter1.addColumn("View").setHeaderCell(true).setHeaderAttributes("class='dtf-th'").setCellAttributes("class='dtf-s'");
181 
182             ULocale locale2 = new ULocale(locale);
183             ICUServiceBuilder builder = new ICUServiceBuilder().setCldrFile(cldrFile);
184             NumberFormat nf = builder.getNumberFormat(1);
185 
186             // nf.setMaximumFractionDigits(0);
187             SupplementalDataInfo sdi = CLDR_CONFIG.getSupplementalDataInfo();
188             String[] debugOriginals = null;
189             CompactDecimalFormat cdf =
190                     BuildIcuCompactDecimalFormat.build(
191                             cldrFile,
192                             debugCreationErrors,
193                             debugOriginals,
194                             CompactStyle.SHORT,
195                             locale2,
196                             CurrencyStyle.PLAIN,
197                             currencyCode);
198             captureErrors(debugCreationErrors, errors, locale, "short");
199             CompactDecimalFormat cdfs =
200                     BuildIcuCompactDecimalFormat.build(
201                             cldrFile,
202                             debugCreationErrors,
203                             debugOriginals,
204                             CompactStyle.LONG,
205                             locale2,
206                             CurrencyStyle.PLAIN,
207                             currencyCode);
208             captureErrors(debugCreationErrors, errors, locale, "long");
209 
210             CompactDecimalFormat cdfCurr =
211                     BuildIcuCompactDecimalFormat.build(
212                             cldrFile,
213                             debugCreationErrors,
214                             debugOriginals,
215                             CompactStyle.SHORT,
216                             locale2,
217                             CurrencyStyle.CURRENCY,
218                             currencyCode);
219             captureErrors(debugCreationErrors, errors, locale, "short-curr");
220             //            CompactDecimalFormat cdfU = BuildIcuCompactDecimalFormat.build(cldrFile,
221             // debugCreationErrors,
222             //                debugOriginals, CompactStyle.SHORT, locale2, CurrencyStyle.UNIT,
223             // "EUR");
224             //            captureErrors(debugCreationErrors, errors, locale, "short-kg");
225             //             CompactDecimalFormat cdfsCurr =
226             // BuildIcuCompactDecimalFormat.build(cldrFile, debugCreationErrors,
227             //             debugOriginals, CompactStyle.SHORT, locale2, CurrencyStyle.CURRENCY,
228             // currencyCode);
229             //             CompactDecimalFormat cdfsCurrISO =
230             // BuildIcuCompactDecimalFormat.build(cldrFile, debugCreationErrors,
231             //             debugOriginals, CompactStyle.LONG, locale2, CurrencyStyle.ISO_CURRENCY,
232             // "EUR");
233 
234             Set<Double> allSamples =
235                     collectSamplesAndSetFormats(currencyCode, locale, sdi, cdf, cdfs, cdfCurr);
236 
237             try {
238                 for (double source : allSamples) {
239                     if (false && source == 22000000 && locale.equals("cs")) {
240                         System.out.println("**");
241                     }
242 
243                     String formattedNumber = nf.format(source);
244                     String compactFormattedNumber = cdf == null ? "n/a" : cdf.format(source);
245                     String compactLongFormattedNumber = cdfs == null ? "n/a" : cdfs.format(source);
246                     String compactCurrFormattedNumber =
247                             !showCurrency || cdfs == null ? "n/a" : cdfCurr.format(source);
248                     // plainText.println(locale
249                     // + "\t__" + source
250                     // + "\t__" + compactFormattedNumber
251                     // + "\t__" + compactLongFormattedNumber
252                     // );
253                     tablePrinter1
254                             .addRow()
255                             .addCell(formattedNumber)
256                             .addCell(compactFormattedNumber)
257                             .addCell(compactLongFormattedNumber);
258                     if (showCurrency) {
259                         tablePrinter1.addCell(compactCurrFormattedNumber)
260                         //                            .addCell(cdfU.format(source))
261                         //                             .addCell(cdfsCurr.format(source))
262                         // .addCell(cdfsCurrLong.format(source))
263                         // .addCell(cdfsCurrLong.format(source))
264                         // .addCell(formattedNumber)
265                         ;
266                     }
267                     //                    String view = PathHeader.getLinkedView(surveyUrl,
268                     // cldrFile, METAZONE_PREFIX + metazone + METAZONE_SUFFIX);
269                     //                    tablePrinter1.addCell(view == null
270                     //                            ? ""
271                     //                                    : view);
272                     tablePrinter1.finishRow();
273                 }
274             } catch (Exception e) {
275                 e.printStackTrace();
276             }
277             out.append(
278                     "<p>To correct problems in compact numbers below, please go to "
279                             + PathHeader.SECTION_LINK
280                             + CLDR_CONFIG
281                                     .urls()
282                                     .forPage(
283                                             cldrFile.getLocaleID(),
284                                             PageId.Compact_Decimal_Formatting)
285                             + "'><em>"
286                             + PageId.Compact_Decimal_Formatting
287                             + "</em></a>.</p>");
288             out.append(tablePrinter1.toString() + "\n");
289             out.append("<h3>Plural Rules</h3>");
290             out.append(
291                     "<p>Look over the Minimal Pairs to make sure they are ok. "
292                             + "Then review the examples in the cell to the left. "
293                             + "All of those you should be able to substitute for the numbers in the Minimal Pairs, "
294                             + "with an acceptable result. "
295                             + "If any would be incorrect, please "
296                             + "<a target='ticket' href='"
297                             + CLDRURLS.CLDR_NEWTICKET_URL
298                             + "'>file a ticket</a>.</p>"
299                             + "<p>For more details, see "
300                             + "<a target='CLDR-ST-DOCS' href='http://cldr.unicode.org/index/cldr-spec/plural-rules'>Plural Rules</a>.</p>");
301             ShowPlurals showPlurals = new ShowPlurals(CLDR_CONFIG.getSupplementalDataInfo());
302             showPlurals.printPluralTable(cldrFile, locale, out, factory);
303             ShowPlurals.appendBlanksForScrolling(out);
304             showErrors(errors, out);
305             showErrors(debugCreationErrors, out);
306         } catch (IOException e) {
307             throw new ICUUncheckedIOException(e);
308         }
309     }
310 
collectSamplesAndSetFormats( String currencyCode, String locale, SupplementalDataInfo sdi, CompactDecimalFormat cdf, CompactDecimalFormat cdfs, CompactDecimalFormat cdfCurr)311     public static Set<Double> collectSamplesAndSetFormats(
312             String currencyCode,
313             String locale,
314             SupplementalDataInfo sdi,
315             CompactDecimalFormat cdf,
316             CompactDecimalFormat cdfs,
317             CompactDecimalFormat cdfCurr) {
318         // Collect samples for display
319         // one path for group-3, one for group-4
320         // TODO, fix for indic.
321         int factor = USES_GROUPS_OF_4.contains(locale) ? 10000 : 1000;
322 
323         // we want to collect a sample of at least one sample for each plural category for each
324         // power of ten
325         PluralInfo pluralInfo = sdi.getPlurals(locale);
326         Set<Double> samples = new TreeSet<>();
327         samples.add(1.1d);
328         samples.add(1.5d);
329         samples.add(1100d);
330         collectItems(pluralInfo, 1, 10, samples);
331         collectItems(pluralInfo, 10, 100, samples);
332         collectItems(pluralInfo, 100, 1000, samples);
333         int sigDigits = 3;
334         if (factor > 1000) {
335             collectItems(pluralInfo, 1000, 10000, samples);
336             sigDigits = 4;
337         }
338         if (cdf != null) {
339             cdf.setMaximumSignificantDigits(sigDigits);
340         }
341         if (cdfs != null) {
342             cdfs.setMaximumSignificantDigits(sigDigits);
343         }
344         if (cdfCurr != null) {
345             cdfCurr.setCurrency(Currency.getInstance(currencyCode));
346             cdfCurr.setMaximumSignificantDigits(sigDigits);
347         }
348         //            cdfU.setMaximumSignificantDigits(sigDigits);
349 
350         // for (Entry<Count, List<Double>> entry : pluralInfo.getCountToExamplesMap().entrySet()) {
351         // samples.add(entry.getValue().get(0));
352         // }
353         //
354         // Set<Double> samples2 = new TreeSet<Double>();
355         // for (int i = 10; i < factor; i *= 10) {
356         // for (Double sample : samples) {
357         // samples2.add(sample*i);
358         // }
359         // }
360         // samples.addAll(samples2);
361 
362         Set<Double> allSamples = new TreeSet<>();
363         for (long i = 1; i <= 100000000000000L; i *= factor) {
364             for (Double sample : samples) {
365                 double source = i * sample;
366                 allSamples.add(source);
367             }
368         }
369         return allSamples;
370     }
371 
372     private static String surveyUrl =
373             CLDR_CONFIG.getProperty("CLDR_SURVEY_URL", "http://st.unicode.org/cldr-apps/survey");
374 
showErrors(Set<String> errors, Appendable out)375     private static void showErrors(Set<String> errors, Appendable out) throws IOException {
376         if (errors.size() != 0) {
377             out.append("<h2>" + "Errors" + "</h2>\n");
378             for (String s : errors) {
379                 out.append("<p>" + s + "</p>\n");
380             }
381             errors.clear();
382         }
383     }
384 
collectItems( PluralInfo pluralInfo, double start, double limit, Set<Double> samples)385     private static Set<Double> collectItems(
386             PluralInfo pluralInfo, double start, double limit, Set<Double> samples) {
387         // TODO optimize once we have all the keywords
388         Map<String, Double> ones = new TreeMap<>();
389         for (double i = start; i < limit; ++i) {
390             String cat = pluralInfo.getPluralRules().select(i);
391             if (ones.containsKey(cat)) {
392                 continue;
393             }
394             ones.put(cat, i);
395         }
396         samples.addAll(ones.values());
397         return samples;
398     }
399 
captureErrors( Set<String> debugCreationErrors, Set<String> errors, String locale, String length)400     private static void captureErrors(
401             Set<String> debugCreationErrors, Set<String> errors, String locale, String length) {
402         if (debugCreationErrors.size() != 0) {
403             for (String s : debugCreationErrors) {
404                 errors.add(locale + "\t" + length + "\t" + s);
405             }
406             debugCreationErrors.clear();
407         }
408     }
409 }
410