1 package org.unicode.cldr.util; 2 3 import com.ibm.icu.text.CompactDecimalFormat; 4 import com.ibm.icu.text.CompactDecimalFormat.CompactStyle; 5 import com.ibm.icu.text.NumberFormat; 6 import com.ibm.icu.util.Currency; 7 import com.ibm.icu.util.ICUUncheckedIOException; 8 import com.ibm.icu.util.ULocale; 9 import java.io.File; 10 import java.io.IOException; 11 import java.io.PrintWriter; 12 import java.util.Arrays; 13 import java.util.HashSet; 14 import java.util.LinkedHashSet; 15 import java.util.Map; 16 import java.util.Set; 17 import java.util.TreeMap; 18 import java.util.TreeSet; 19 import java.util.regex.Pattern; 20 import org.unicode.cldr.draft.FileUtilities; 21 import org.unicode.cldr.test.BuildIcuCompactDecimalFormat; 22 import org.unicode.cldr.test.BuildIcuCompactDecimalFormat.CurrencyStyle; 23 import org.unicode.cldr.tool.ChartDelta; 24 import org.unicode.cldr.tool.FormattedFileWriter; 25 import org.unicode.cldr.tool.Option; 26 import org.unicode.cldr.tool.Option.Options; 27 import org.unicode.cldr.tool.ShowData; 28 import org.unicode.cldr.tool.ShowPlurals; 29 import org.unicode.cldr.tool.TablePrinter; 30 import org.unicode.cldr.util.CLDRFile.DraftStatus; 31 import org.unicode.cldr.util.PathHeader.PageId; 32 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 33 34 public class VerifyCompactNumbers { 35 36 private static final CLDRConfig CLDR_CONFIG = CLDRConfig.getInstance(); 37 private static final String DIR = CLDRPaths.VERIFY_DIR + "numbers/"; 38 39 static final Options myOptions = new Options(); 40 41 enum MyOptions { 42 organization(".*", "CLDR", "organization"), 43 filter(".*", ".*", "locale filter (regex)"), 44 currency(".*", "EUR", "show currency"), 45 ; 46 // boilerplate 47 final Option option; 48 MyOptions(String argumentPattern, String defaultArgument, String helpText)49 MyOptions(String argumentPattern, String defaultArgument, String helpText) { 50 option = myOptions.add(this, argumentPattern, defaultArgument, helpText); 51 } 52 } 53 54 // later, look at DateTimeFormats to set up as an HTML table 55 56 public static final Set<String> USES_GROUPS_OF_4 = 57 new HashSet<>(Arrays.asList("ko", "ja", "zh", "zh_Hant")); 58 59 /** 60 * Produce a set of static tables from the vxml data. Only a stopgap until the above is 61 * integrated into ST. 62 * 63 * @param args 64 * @throws IOException 65 */ main(String[] args)66 public static void main(String[] args) throws IOException { 67 myOptions.parse(MyOptions.organization, args, true); 68 new File(DIR).mkdirs(); 69 FileCopier.copy(ShowData.class, "verify-index.html", CLDRPaths.VERIFY_DIR, "index.html"); 70 FileCopier.copy(ChartDelta.class, "index.css", CLDRPaths.VERIFY_DIR, "index.css"); 71 FormattedFileWriter.copyIncludeHtmls(CLDRPaths.VERIFY_DIR); 72 73 String organization = MyOptions.organization.option.getValue(); 74 String filter = MyOptions.filter.option.getValue(); 75 boolean showCurrency = true; // MyOptions.currency.option.doesOccur(); 76 String currencyCode = MyOptions.currency.option.getValue(); 77 78 Factory factory2 = Factory.make(CLDRPaths.MAIN_DIRECTORY, filter); 79 CLDRFile englishCldrFile = factory2.make("en", true); 80 81 SupplementalDataInfo sdi = CLDR_CONFIG.getSupplementalDataInfo(); 82 Set<String> defaultContentLocales = sdi.getDefaultContentLocales(); 83 NumberFormat enf = NumberFormat.getIntegerInstance(ULocale.ENGLISH); 84 enf.setGroupingUsed(false); 85 86 Set<String> availableLanguages = new TreeSet<>(factory2.getAvailableLanguages()); 87 if (Pattern.matches(filter, "pt_PT")) { 88 availableLanguages.add("pt_PT"); 89 } 90 91 PrintWriter plainText = FileUtilities.openUTF8Writer(DIR, "compactTestFile.txt"); 92 DateTimeFormats.writeCss(DIR); 93 final CLDRFile english = CLDR_CONFIG.getEnglish(); 94 95 Map<String, String> indexMap = new TreeMap<>(CLDR_CONFIG.getCollator()); 96 97 for (String locale : availableLanguages) { 98 if (defaultContentLocales.contains(locale)) { 99 continue; 100 } 101 Level level = StandardCodes.make().getLocaleCoverageLevel(organization, locale); 102 if (Level.MODERN.compareTo(level) > 0) { 103 continue; 104 } 105 // TODO: fix to ignore locales with no data. 106 if (locale.equals("ne") || locale.equals("cy")) { 107 continue; 108 } 109 110 PrintWriter out = FileUtilities.openUTF8Writer(DIR, locale + ".html"); 111 String title = "Verify Number Formats: " + englishCldrFile.getName(locale); 112 out.println( 113 "<!doctype HTML PUBLIC '-//W3C//DTD HTML 4.0 Transitional//EN'><html><head>\n" 114 + "<meta http-equiv='Content-Type' content='text/html; charset=utf-8'>\n" 115 + "<title>" 116 + title 117 + "</title>\n" 118 + "<link rel='stylesheet' type='text/css' href='index.css'>\n" 119 + "</head><body><h1>" 120 + title 121 + "</h1>\n" 122 + "<p><a href='index.html'>Index</a></p>\n"); 123 124 CLDRFile cldrFile = factory2.make(locale, true, DraftStatus.contributed); 125 126 showNumbers(cldrFile, showCurrency, currencyCode, out, factory2); 127 128 out.println("</body></html>"); 129 out.close(); 130 indexMap.put(english.getName(locale), locale + ".html"); 131 } 132 try (PrintWriter index = DateTimeFormats.openIndex(DIR, "Numbers")) { 133 DateTimeFormats.writeIndexMap(indexMap, index); 134 } 135 136 plainText.close(); 137 } 138 showNumbers( CLDRFile cldrFile, boolean showCurrency, String currencyCode, Appendable out, Factory factory)139 public static void showNumbers( 140 CLDRFile cldrFile, 141 boolean showCurrency, 142 String currencyCode, 143 Appendable out, 144 Factory factory) { 145 try { 146 Set<String> debugCreationErrors = new LinkedHashSet<>(); 147 Set<String> errors = new LinkedHashSet<>(); 148 String locale = cldrFile.getLocaleID(); 149 150 TablePrinter tablePrinter1 = 151 new TablePrinter() 152 // .setCaption("Timezone Formats") 153 .setTableAttributes("class='dtf-table'") 154 .addColumn("Numeric Format") 155 .setHeaderCell(true) 156 .setHeaderAttributes("class='dtf-th'") 157 .setCellAttributes("class='dtf-s'") 158 .addColumn("Compact-Short") 159 .setHeaderAttributes("class='dtf-th'") 160 .setCellAttributes("class='dtf-s'") 161 .addColumn("Compact-Long") 162 .setHeaderAttributes("class='dtf-th'") 163 .setCellAttributes("class='dtf-s'"); 164 if (showCurrency) { 165 tablePrinter1 166 .addColumn("Compact-Short<br>+Currency") 167 .setHeaderAttributes("class='dtf-th'") 168 .setCellAttributes("class='dtf-s'") 169 // .addColumn("Compact-Short<br>+Unit") 170 // .setHeaderAttributes("class='dtf-th'") 171 // .setCellAttributes("class='dtf-s'") 172 // .addColumn("Compact-Long<br>+Currency") 173 // .addColumn("Compact-Long<br>+Currency-Long") 174 // .addColumn("Numeric 175 // Format").setHeaderCell(true).setHeaderAttributes("class='dtf-th'") 176 // .setCellAttributes("class='dtf-s'") 177 ; 178 } 179 // 180 // tablePrinter1.addColumn("View").setHeaderCell(true).setHeaderAttributes("class='dtf-th'").setCellAttributes("class='dtf-s'"); 181 182 ULocale locale2 = new ULocale(locale); 183 ICUServiceBuilder builder = new ICUServiceBuilder().setCldrFile(cldrFile); 184 NumberFormat nf = builder.getNumberFormat(1); 185 186 // nf.setMaximumFractionDigits(0); 187 SupplementalDataInfo sdi = CLDR_CONFIG.getSupplementalDataInfo(); 188 String[] debugOriginals = null; 189 CompactDecimalFormat cdf = 190 BuildIcuCompactDecimalFormat.build( 191 cldrFile, 192 debugCreationErrors, 193 debugOriginals, 194 CompactStyle.SHORT, 195 locale2, 196 CurrencyStyle.PLAIN, 197 currencyCode); 198 captureErrors(debugCreationErrors, errors, locale, "short"); 199 CompactDecimalFormat cdfs = 200 BuildIcuCompactDecimalFormat.build( 201 cldrFile, 202 debugCreationErrors, 203 debugOriginals, 204 CompactStyle.LONG, 205 locale2, 206 CurrencyStyle.PLAIN, 207 currencyCode); 208 captureErrors(debugCreationErrors, errors, locale, "long"); 209 210 CompactDecimalFormat cdfCurr = 211 BuildIcuCompactDecimalFormat.build( 212 cldrFile, 213 debugCreationErrors, 214 debugOriginals, 215 CompactStyle.SHORT, 216 locale2, 217 CurrencyStyle.CURRENCY, 218 currencyCode); 219 captureErrors(debugCreationErrors, errors, locale, "short-curr"); 220 // CompactDecimalFormat cdfU = BuildIcuCompactDecimalFormat.build(cldrFile, 221 // debugCreationErrors, 222 // debugOriginals, CompactStyle.SHORT, locale2, CurrencyStyle.UNIT, 223 // "EUR"); 224 // captureErrors(debugCreationErrors, errors, locale, "short-kg"); 225 // CompactDecimalFormat cdfsCurr = 226 // BuildIcuCompactDecimalFormat.build(cldrFile, debugCreationErrors, 227 // debugOriginals, CompactStyle.SHORT, locale2, CurrencyStyle.CURRENCY, 228 // currencyCode); 229 // CompactDecimalFormat cdfsCurrISO = 230 // BuildIcuCompactDecimalFormat.build(cldrFile, debugCreationErrors, 231 // debugOriginals, CompactStyle.LONG, locale2, CurrencyStyle.ISO_CURRENCY, 232 // "EUR"); 233 234 Set<Double> allSamples = 235 collectSamplesAndSetFormats(currencyCode, locale, sdi, cdf, cdfs, cdfCurr); 236 237 try { 238 for (double source : allSamples) { 239 if (false && source == 22000000 && locale.equals("cs")) { 240 System.out.println("**"); 241 } 242 243 String formattedNumber = nf.format(source); 244 String compactFormattedNumber = cdf == null ? "n/a" : cdf.format(source); 245 String compactLongFormattedNumber = cdfs == null ? "n/a" : cdfs.format(source); 246 String compactCurrFormattedNumber = 247 !showCurrency || cdfs == null ? "n/a" : cdfCurr.format(source); 248 // plainText.println(locale 249 // + "\t__" + source 250 // + "\t__" + compactFormattedNumber 251 // + "\t__" + compactLongFormattedNumber 252 // ); 253 tablePrinter1 254 .addRow() 255 .addCell(formattedNumber) 256 .addCell(compactFormattedNumber) 257 .addCell(compactLongFormattedNumber); 258 if (showCurrency) { 259 tablePrinter1.addCell(compactCurrFormattedNumber) 260 // .addCell(cdfU.format(source)) 261 // .addCell(cdfsCurr.format(source)) 262 // .addCell(cdfsCurrLong.format(source)) 263 // .addCell(cdfsCurrLong.format(source)) 264 // .addCell(formattedNumber) 265 ; 266 } 267 // String view = PathHeader.getLinkedView(surveyUrl, 268 // cldrFile, METAZONE_PREFIX + metazone + METAZONE_SUFFIX); 269 // tablePrinter1.addCell(view == null 270 // ? "" 271 // : view); 272 tablePrinter1.finishRow(); 273 } 274 } catch (Exception e) { 275 e.printStackTrace(); 276 } 277 out.append( 278 "<p>To correct problems in compact numbers below, please go to " 279 + PathHeader.SECTION_LINK 280 + CLDR_CONFIG 281 .urls() 282 .forPage( 283 cldrFile.getLocaleID(), 284 PageId.Compact_Decimal_Formatting) 285 + "'><em>" 286 + PageId.Compact_Decimal_Formatting 287 + "</em></a>.</p>"); 288 out.append(tablePrinter1.toString() + "\n"); 289 out.append("<h3>Plural Rules</h3>"); 290 out.append( 291 "<p>Look over the Minimal Pairs to make sure they are ok. " 292 + "Then review the examples in the cell to the left. " 293 + "All of those you should be able to substitute for the numbers in the Minimal Pairs, " 294 + "with an acceptable result. " 295 + "If any would be incorrect, please " 296 + "<a target='ticket' href='" 297 + CLDRURLS.CLDR_NEWTICKET_URL 298 + "'>file a ticket</a>.</p>" 299 + "<p>For more details, see " 300 + "<a target='CLDR-ST-DOCS' href='http://cldr.unicode.org/index/cldr-spec/plural-rules'>Plural Rules</a>.</p>"); 301 ShowPlurals showPlurals = new ShowPlurals(CLDR_CONFIG.getSupplementalDataInfo()); 302 showPlurals.printPluralTable(cldrFile, locale, out, factory); 303 ShowPlurals.appendBlanksForScrolling(out); 304 showErrors(errors, out); 305 showErrors(debugCreationErrors, out); 306 } catch (IOException e) { 307 throw new ICUUncheckedIOException(e); 308 } 309 } 310 collectSamplesAndSetFormats( String currencyCode, String locale, SupplementalDataInfo sdi, CompactDecimalFormat cdf, CompactDecimalFormat cdfs, CompactDecimalFormat cdfCurr)311 public static Set<Double> collectSamplesAndSetFormats( 312 String currencyCode, 313 String locale, 314 SupplementalDataInfo sdi, 315 CompactDecimalFormat cdf, 316 CompactDecimalFormat cdfs, 317 CompactDecimalFormat cdfCurr) { 318 // Collect samples for display 319 // one path for group-3, one for group-4 320 // TODO, fix for indic. 321 int factor = USES_GROUPS_OF_4.contains(locale) ? 10000 : 1000; 322 323 // we want to collect a sample of at least one sample for each plural category for each 324 // power of ten 325 PluralInfo pluralInfo = sdi.getPlurals(locale); 326 Set<Double> samples = new TreeSet<>(); 327 samples.add(1.1d); 328 samples.add(1.5d); 329 samples.add(1100d); 330 collectItems(pluralInfo, 1, 10, samples); 331 collectItems(pluralInfo, 10, 100, samples); 332 collectItems(pluralInfo, 100, 1000, samples); 333 int sigDigits = 3; 334 if (factor > 1000) { 335 collectItems(pluralInfo, 1000, 10000, samples); 336 sigDigits = 4; 337 } 338 if (cdf != null) { 339 cdf.setMaximumSignificantDigits(sigDigits); 340 } 341 if (cdfs != null) { 342 cdfs.setMaximumSignificantDigits(sigDigits); 343 } 344 if (cdfCurr != null) { 345 cdfCurr.setCurrency(Currency.getInstance(currencyCode)); 346 cdfCurr.setMaximumSignificantDigits(sigDigits); 347 } 348 // cdfU.setMaximumSignificantDigits(sigDigits); 349 350 // for (Entry<Count, List<Double>> entry : pluralInfo.getCountToExamplesMap().entrySet()) { 351 // samples.add(entry.getValue().get(0)); 352 // } 353 // 354 // Set<Double> samples2 = new TreeSet<Double>(); 355 // for (int i = 10; i < factor; i *= 10) { 356 // for (Double sample : samples) { 357 // samples2.add(sample*i); 358 // } 359 // } 360 // samples.addAll(samples2); 361 362 Set<Double> allSamples = new TreeSet<>(); 363 for (long i = 1; i <= 100000000000000L; i *= factor) { 364 for (Double sample : samples) { 365 double source = i * sample; 366 allSamples.add(source); 367 } 368 } 369 return allSamples; 370 } 371 372 private static String surveyUrl = 373 CLDR_CONFIG.getProperty("CLDR_SURVEY_URL", "http://st.unicode.org/cldr-apps/survey"); 374 showErrors(Set<String> errors, Appendable out)375 private static void showErrors(Set<String> errors, Appendable out) throws IOException { 376 if (errors.size() != 0) { 377 out.append("<h2>" + "Errors" + "</h2>\n"); 378 for (String s : errors) { 379 out.append("<p>" + s + "</p>\n"); 380 } 381 errors.clear(); 382 } 383 } 384 collectItems( PluralInfo pluralInfo, double start, double limit, Set<Double> samples)385 private static Set<Double> collectItems( 386 PluralInfo pluralInfo, double start, double limit, Set<Double> samples) { 387 // TODO optimize once we have all the keywords 388 Map<String, Double> ones = new TreeMap<>(); 389 for (double i = start; i < limit; ++i) { 390 String cat = pluralInfo.getPluralRules().select(i); 391 if (ones.containsKey(cat)) { 392 continue; 393 } 394 ones.put(cat, i); 395 } 396 samples.addAll(ones.values()); 397 return samples; 398 } 399 captureErrors( Set<String> debugCreationErrors, Set<String> errors, String locale, String length)400 private static void captureErrors( 401 Set<String> debugCreationErrors, Set<String> errors, String locale, String length) { 402 if (debugCreationErrors.size() != 0) { 403 for (String s : debugCreationErrors) { 404 errors.add(locale + "\t" + length + "\t" + s); 405 } 406 debugCreationErrors.clear(); 407 } 408 } 409 } 410