1 /* 2 ****************************************************************************** 3 * Copyright (C) 2004-2011, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 package org.unicode.cldr.tool; 8 9 import com.google.common.base.Joiner; 10 import com.google.common.collect.ImmutableMap; 11 import com.google.common.collect.ImmutableSet; 12 import com.google.common.collect.Multimap; 13 import com.google.common.collect.Multimaps; 14 import com.google.common.collect.TreeMultimap; 15 import com.ibm.icu.impl.Relation; 16 import com.ibm.icu.impl.Row.R2; 17 import com.ibm.icu.impl.Row.R4; 18 import com.ibm.icu.lang.UCharacter; 19 import com.ibm.icu.text.Collator; 20 import com.ibm.icu.text.Normalizer; 21 import com.ibm.icu.text.Normalizer2; 22 import com.ibm.icu.text.NumberFormat; 23 import com.ibm.icu.text.UTF16; 24 import com.ibm.icu.text.UnicodeSet; 25 import com.ibm.icu.util.ICUUncheckedIOException; 26 import com.ibm.icu.util.ULocale; 27 import java.io.IOException; 28 import java.io.PrintWriter; 29 import java.io.StringWriter; 30 import java.io.UnsupportedEncodingException; 31 import java.util.ArrayList; 32 import java.util.Arrays; 33 import java.util.Collection; 34 import java.util.Collections; 35 import java.util.Comparator; 36 import java.util.Date; 37 import java.util.EnumSet; 38 import java.util.HashMap; 39 import java.util.HashSet; 40 import java.util.Iterator; 41 import java.util.LinkedHashSet; 42 import java.util.List; 43 import java.util.Locale; 44 import java.util.Map; 45 import java.util.Map.Entry; 46 import java.util.Set; 47 import java.util.SortedMap; 48 import java.util.TreeMap; 49 import java.util.TreeSet; 50 import org.unicode.cldr.draft.FileUtilities; 51 import org.unicode.cldr.draft.ScriptMetadata; 52 import org.unicode.cldr.draft.ScriptMetadata.Info; 53 import org.unicode.cldr.util.ArrayComparator; 54 import org.unicode.cldr.util.CLDRConfig; 55 import org.unicode.cldr.util.CLDRFile; 56 import org.unicode.cldr.util.CLDRFile.WinningChoice; 57 import org.unicode.cldr.util.CLDRLocale; 58 import org.unicode.cldr.util.CLDRPaths; 59 import org.unicode.cldr.util.CLDRTool; 60 import org.unicode.cldr.util.CLDRURLS; 61 import org.unicode.cldr.util.CldrUtility; 62 import org.unicode.cldr.util.Factory; 63 import org.unicode.cldr.util.FileCopier; 64 import org.unicode.cldr.util.Iso639Data; 65 import org.unicode.cldr.util.Iso639Data.Scope; 66 import org.unicode.cldr.util.Iso639Data.Type; 67 import org.unicode.cldr.util.LanguageTagParser; 68 import org.unicode.cldr.util.Level; 69 import org.unicode.cldr.util.Log; 70 import org.unicode.cldr.util.Organization; 71 import org.unicode.cldr.util.StandardCodes; 72 import org.unicode.cldr.util.StandardCodes.CodeType; 73 import org.unicode.cldr.util.SupplementalDataInfo; 74 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData; 75 import org.unicode.cldr.util.SupplementalDataInfo.ContainmentStyle; 76 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo; 77 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyNumberInfo; 78 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus; 79 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 80 import org.unicode.cldr.util.TransliteratorUtilities; 81 import org.unicode.cldr.util.XPathParts; 82 83 @CLDRTool(alias = "showlanguages", description = "Generate Language info charts") 84 public class ShowLanguages { 85 private static final boolean SHOW_NATIVE = true; 86 87 static Comparator col = 88 new org.unicode.cldr.util.MultiComparator( 89 Collator.getInstance(new ULocale("en")), 90 new UTF16.StringComparator(true, false, 0)); 91 92 static StandardCodes sc = StandardCodes.make(); 93 94 static Factory cldrFactory = 95 CLDRConfig.getInstance().getCldrFactory(); // .make(CLDRPaths.MAIN_DIRECTORY, ".*"); 96 static CLDRFile english = CLDRConfig.getInstance().getEnglish(); 97 main(String[] args)98 public static void main(String[] args) throws IOException { 99 System.out.println("Writing into " + FormattedFileWriter.CHART_TARGET_DIR); 100 FileCopier.ensureDirectoryExists(FormattedFileWriter.CHART_TARGET_DIR); 101 FileCopier.copy(ShowLanguages.class, "index.css", FormattedFileWriter.CHART_TARGET_DIR); 102 FormattedFileWriter.copyIncludeHtmls(FormattedFileWriter.CHART_TARGET_DIR); 103 104 StringWriter sw = printLanguageData(cldrFactory, "index.html"); 105 writeSupplementalIndex("index.html", sw); 106 107 // cldrFactory = Factory.make(Utility.COMMON_DIRECTORY + "../dropbox/extra2/", ".*"); 108 // printLanguageData(cldrFactory, "language_info2.txt"); 109 System.out.println("Done - wrote into " + FormattedFileWriter.CHART_TARGET_DIR); 110 } 111 112 /** */ 113 public static FormattedFileWriter.Anchors SUPPLEMENTAL_INDEX_ANCHORS = 114 new FormattedFileWriter.Anchors(); 115 116 static SupplementalDataInfo supplementalDataInfo = 117 SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY); 118 printLanguageData(Factory cldrFactory, String filename)119 private static StringWriter printLanguageData(Factory cldrFactory, String filename) 120 throws IOException { 121 StringWriter sw = new StringWriter(); 122 PrintWriter pw = new PrintWriter(sw); 123 124 LanguageInfo linfo = new LanguageInfo(cldrFactory); 125 linfo.showCoverageGoals(pw); 126 127 new ChartDtdDelta().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 128 ShowLocaleCoverage.showCoverage(SUPPLEMENTAL_INDEX_ANCHORS, null); 129 130 new ChartDayPeriods().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 131 new ChartLanguageMatching().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 132 new ChartLanguageGroups().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 133 new ChartSubdivisions().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 134 if (ToolConstants.CHART_VERSION.compareTo("37") >= 0) { 135 new ChartUnitConversions().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 136 new ChartUnitPreferences().writeChart(SUPPLEMENTAL_INDEX_ANCHORS); 137 } 138 // since we don't want these listed on the supplemental page, use null 139 140 new ShowPlurals().printPlurals(english, null, pw, cldrFactory); 141 142 linfo.printLikelySubtags(pw); 143 144 linfo.showCountryLanguageInfo(pw); 145 146 linfo.showLanguageCountryInfo(pw); 147 148 // linfo.showTerritoryInfo(); 149 // linfo.printCountryData(pw); 150 151 // linfo.printDeprecatedItems(pw); 152 153 // PrintWriter pw1 = new PrintWriter(new FormattedFileWriter(pw, "Languages and 154 // Territories", null)); 155 // pw1.println("<tr><th>Language \u2192 Territories"); 156 // pw1.println("</th><th>Territory \u2192 Language"); 157 // pw1.println("</th><th>Territories Not Represented"); 158 // pw1.println("</th><th>Languages Not Represented"); 159 // pw1.println("</th></tr>"); 160 // 161 // pw1.println("<tr><td>"); 162 // linfo.print(pw1, CLDRFile.LANGUAGE_NAME, CLDRFile.TERRITORY_NAME); 163 // pw1.println("</td><td>"); 164 // linfo.print(pw1, CLDRFile.TERRITORY_NAME, CLDRFile.LANGUAGE_NAME); 165 // pw1.println("</td><td>"); 166 // linfo.printMissing(pw1, CLDRFile.TERRITORY_NAME, CLDRFile.TERRITORY_NAME); 167 // pw1.println("</td><td>"); 168 // linfo.printMissing(pw1, CLDRFile.LANGUAGE_NAME, CLDRFile.TERRITORY_NAME); 169 // pw1.println("</td></tr>"); 170 // 171 // pw1.close(); 172 173 printLanguageScript(linfo, pw); 174 printScriptLanguageTerritory(linfo, pw); 175 176 linfo.showCorrespondances(); 177 178 // linfo.showCalendarData(pw); 179 180 linfo.showCountryInfo(pw); 181 linfo.printCurrency(pw); 182 linfo.printContains(pw); 183 184 linfo.printWindows_Tzid(pw); 185 linfo.printAliases(pw); 186 187 linfo.printCharacters(pw); 188 189 pw.close(); 190 191 return sw; 192 } 193 writeSupplementalIndex(String filename, StringWriter sw)194 private static void writeSupplementalIndex(String filename, StringWriter sw) 195 throws IOException { 196 String[] replacements = { 197 "%date%", CldrUtility.isoFormatDateOnly(new Date()), 198 "%contents%", SUPPLEMENTAL_INDEX_ANCHORS.toString(), 199 "%data%", sw.toString(), 200 "%index%", "../index.html" 201 }; 202 PrintWriter pw2 = 203 org.unicode.cldr.draft.FileUtilities.openUTF8Writer( 204 FormattedFileWriter.CHART_TARGET_DIR, filename); 205 FileUtilities.appendFile(ShowLanguages.class, "supplemental.html", replacements, pw2); 206 pw2.close(); 207 } 208 printLanguageScript(LanguageInfo linfo, PrintWriter pw)209 private static void printLanguageScript(LanguageInfo linfo, PrintWriter pw) throws IOException { 210 PrintWriter pw1; 211 TablePrinter tablePrinter = 212 new TablePrinter() 213 .addColumn("Language", "class='source'", null, "class='source'", true) 214 .setSpanRows(true) 215 .setSortPriority(0) 216 .setBreakSpans(true) 217 .addColumn( 218 "Code", 219 "class='source'", 220 CldrUtility.getDoubleLinkMsg(), 221 "class='source'", 222 true) 223 .setSpanRows(true) 224 .addColumn( 225 "ML", 226 "class='target' title='modern language'", 227 null, 228 "class='target'", 229 true) 230 .setSpanRows(true) 231 .setSortPriority(1) 232 .addColumn( 233 "P", "class='target' title='primary'", null, "class='target'", true) 234 .setSortPriority(3) 235 .addColumn("Script", "class='target'", null, "class='target'", true) 236 .setSortPriority(3) 237 .addColumn("Code", "class='target'", null, "class='target'", true) 238 .addColumn( 239 "MS", 240 "class='target' title='modern script'", 241 null, 242 "class='target'", 243 true) 244 .setSortPriority(2); 245 246 TablePrinter tablePrinter2 = 247 new TablePrinter() 248 .addColumn("Script", "class='source'", null, "class='source'", true) 249 .setSpanRows(true) 250 .setSortPriority(0) 251 .setBreakSpans(true) 252 .addColumn( 253 "Code", 254 "class='source'", 255 CldrUtility.getDoubleLinkMsg(), 256 "class='source'", 257 true) 258 .setSpanRows(true) 259 .addColumn( 260 "MS", 261 "class='target' title='modern script'", 262 null, 263 "class='target'", 264 true) 265 .setSpanRows(true) 266 .setSortPriority(1) 267 .addColumn("Language", "class='target'", null, "class='target'", true) 268 .setSortPriority(3) 269 .addColumn("Code", "class='target'", null, "class='target'", true) 270 .addColumn( 271 "ML", 272 "class='target' title='modern language'", 273 null, 274 "class='target'", 275 true) 276 .setSortPriority(2) 277 .addColumn( 278 "P", "class='target' title='primary'", null, "class='target'", true) 279 .setSortPriority(3); 280 281 // get the codes so we can show the remainder 282 Set<String> remainingScripts = 283 new TreeSet<>(getScriptsToShow()); // StandardCodes.MODERN_SCRIPTS); 284 UnicodeSet temp = new UnicodeSet(); 285 for (String script : getScriptsToShow()) { 286 temp.clear(); 287 try { 288 temp.applyPropertyAlias("script", script); 289 } catch (RuntimeException e) { 290 } // fall through 291 if (temp.size() == 0) { 292 remainingScripts.remove(script); 293 System.out.println("Removing: " + script); 294 } else { 295 System.out.println("Keeping: " + script); 296 } 297 } 298 remainingScripts.remove("Brai"); 299 remainingScripts.remove("Hira"); 300 remainingScripts.remove("Qaai"); 301 remainingScripts.remove("Hrkt"); 302 remainingScripts.remove("Zzzz"); 303 remainingScripts.remove("Zyyy"); 304 305 Set<String> remainingLanguages = new TreeSet<>(getLanguagesToShow()); 306 for (String language : getLanguagesToShow()) { 307 Scope s = Iso639Data.getScope(language); 308 Type t = Iso639Data.getType(language); 309 if (s != Scope.Individual && s != Scope.Macrolanguage || t != Type.Living) { 310 remainingLanguages.remove(language); 311 } 312 } 313 314 Set<String> languages = supplementalDataInfo.getBasicLanguageDataLanguages(); 315 for (String language : languages) { 316 Set<BasicLanguageData> basicLanguageData = 317 supplementalDataInfo.getBasicLanguageData(language); 318 for (BasicLanguageData basicData : basicLanguageData) { 319 String secondary = 320 isOfficial(language) // basicData.getType() == 321 // BasicLanguageData.Type.primary 322 ? "\u00A0" 323 : "N"; 324 for (String script : basicData.getScripts()) { 325 addLanguageScriptCells( 326 tablePrinter, tablePrinter2, language, script, secondary); 327 remainingScripts.remove(script); 328 remainingLanguages.remove(language); 329 } 330 } 331 } 332 for (String language : remainingLanguages) { 333 addLanguageScriptCells(tablePrinter, tablePrinter2, language, "Zzzz", "?"); 334 } 335 for (String script : remainingScripts) { 336 addLanguageScriptCells(tablePrinter, tablePrinter2, "und", script, "?"); 337 } 338 339 pw1 = 340 new PrintWriter( 341 new FormattedFileWriter( 342 null, "Languages and Scripts", null, SUPPLEMENTAL_INDEX_ANCHORS)); 343 pw1.println(tablePrinter.toTable()); 344 pw1.close(); 345 346 pw1 = 347 new PrintWriter( 348 new FormattedFileWriter( 349 null, "Scripts and Languages", null, SUPPLEMENTAL_INDEX_ANCHORS)); 350 pw1.println(tablePrinter2.toTable()); 351 pw1.close(); 352 } 353 354 static final Map<String, OfficialStatus> languageToBestStatus = new HashMap<>(); 355 356 static { 357 for (String language : supplementalDataInfo.getLanguagesForTerritoriesPopulationData()) { 358 Set<String> territories = 359 supplementalDataInfo.getTerritoriesForPopulationData(language); 360 if (territories == null) { 361 continue; 362 } 363 int underbar = language.indexOf('_'); 364 String base = underbar < 0 ? null : language.substring(0, underbar); 365 366 for (String territory : territories) { 367 PopulationData data = 368 supplementalDataInfo.getLanguageAndTerritoryPopulationData( 369 language, territory); 370 OfficialStatus status = data.getOfficialStatus(); 371 OfficialStatus old; 372 old = languageToBestStatus.get(language); 373 if (old == null || status.compareTo(old) > 0) { 374 languageToBestStatus.put(language, status); 375 } 376 if (base != null) { 377 old = languageToBestStatus.get(base); 378 if (old == null || status.compareTo(old) > 0) { 379 languageToBestStatus.put(base, status); 380 } 381 } 382 } 383 } 384 } 385 386 private static boolean isOfficial(String language) { 387 OfficialStatus status = languageToBestStatus.get(language); 388 if (status != null && status.isMajor()) { 389 return true; 390 } 391 int underbar = language.indexOf('_'); 392 if (underbar < 0) { 393 return false; 394 } 395 return isOfficial(language.substring(0, underbar)); 396 } 397 398 private static Set<String> getLanguagesToShow() { 399 return getEnglishTypes("language", CLDRFile.LANGUAGE_NAME); 400 } 401 402 private static Set<String> getEnglishTypes(String type, int code) { 403 Set<String> result = new HashSet<>(sc.getSurveyToolDisplayCodes(type)); 404 for (Iterator<String> it = english.getAvailableIterator(code); it.hasNext(); ) { 405 XPathParts parts = XPathParts.getFrozenInstance(it.next()); 406 String newType = parts.getAttributeValue(-1, "type"); 407 if (!result.contains(newType)) { 408 result.add(newType); 409 } 410 } 411 return result; 412 } 413 414 private static Set<String> getScriptsToShow() { 415 return getEnglishTypes("script", CLDRFile.SCRIPT_NAME); 416 } 417 418 private static void printScriptLanguageTerritory(LanguageInfo linfo, PrintWriter pw) 419 throws IOException { 420 PrintWriter pw1; 421 TablePrinter tablePrinter2 = 422 new TablePrinter() 423 .addColumn( 424 "Sample Char", 425 "class='source'", 426 null, 427 "class='source sample'", 428 true) 429 .setSpanRows(true) 430 .addColumn("Script", "class='source'", null, "class='source'", true) 431 .setSpanRows(true) 432 .setSortPriority(0) 433 .setBreakSpans(true) 434 .addColumn( 435 "Code", 436 "class='source'", 437 CldrUtility.getDoubleLinkMsg(), 438 "class='source'", 439 true) 440 .setSpanRows(true) 441 .addColumn("T", "class='target'", null, "class='target'", true) 442 .setSortPriority(1) 443 .addColumn("Language", "class='target'", null, "class='target'", true) 444 .setSortPriority(2) 445 .addColumn("Native", "class='target'", null, "class='target'", true) 446 .addColumn("Code", "class='target'", null, "class='target'", true) 447 .addColumn("T", "class='target'", null, "class='target'", true) 448 .setSortPriority(3) 449 .addColumn("Territory", "class='target'", null, "class='target'", true) 450 .setSortPriority(4) 451 .addColumn("Native", "class='target'", null, "class='target'", true) 452 .addColumn("Code", "class='target'", null, "class='target'", true); 453 454 // get the codes so we can show the remainder 455 Set<String> remainingScripts = new TreeSet<>(getScriptsToShow()); 456 Set<String> remainingTerritories = new TreeSet<>(sc.getGoodAvailableCodes("territory")); 457 UnicodeSet temp = new UnicodeSet(); 458 for (String script : getScriptsToShow()) { 459 temp.clear(); 460 try { 461 temp.applyPropertyAlias("script", script); 462 } catch (RuntimeException e) { 463 } // fall through 464 if (temp.size() == 0) { 465 remainingScripts.remove(script); 466 System.out.println("Removing: " + script); 467 } else { 468 System.out.println("Keeping: " + script); 469 } 470 } 471 remainingScripts.remove("Brai"); 472 remainingScripts.remove("Hira"); 473 remainingScripts.remove("Qaai"); 474 remainingScripts.remove("Hrkt"); 475 remainingScripts.remove("Zzzz"); 476 remainingScripts.remove("Zyyy"); 477 478 Set<String> remainingLanguages = new TreeSet<>(getLanguagesToShow()); 479 for (String language : getLanguagesToShow()) { 480 Scope s = Iso639Data.getScope(language); 481 Type t = Iso639Data.getType(language); 482 if (s != Scope.Individual && s != Scope.Macrolanguage || t != Type.Living) { 483 remainingLanguages.remove(language); 484 } 485 } 486 487 Set<String> languages = supplementalDataInfo.getBasicLanguageDataLanguages(); 488 for (String language : languages) { 489 Set<BasicLanguageData> basicLanguageData = 490 supplementalDataInfo.getBasicLanguageData(language); 491 for (BasicLanguageData basicData : basicLanguageData) { 492 if (basicData.getType() != BasicLanguageData.Type.primary) { 493 continue; 494 } 495 Set<String> mainTerritories = getTerritories(language); 496 if (mainTerritories.size() == 0) { 497 continue; 498 // mainTerritories.add("ZZ"); 499 } 500 501 TreeSet<String> mainScripts = new TreeSet<>(basicData.getScripts()); 502 if (mainScripts.size() == 0) { 503 continue; 504 } 505 for (String script : mainScripts) { 506 for (String territory : mainTerritories) { 507 addLanguageScriptCells2(tablePrinter2, language, script, territory); 508 remainingTerritories.remove(territory); 509 } 510 remainingScripts.remove(script); 511 } 512 } 513 remainingLanguages.remove(language); 514 } 515 // for (String language : remainingLanguages) { 516 // addLanguageScriptCells2( tablePrinter2, language, "Zzzz", "ZZ"); 517 // } 518 // for (String script : remainingScripts) { 519 // addLanguageScriptCells2( tablePrinter2, "und", script, "ZZ"); 520 // } 521 // for (String territory : remainingTerritories) { 522 // addLanguageScriptCells2( tablePrinter2, "und", "Zzzz", territory); 523 // } 524 525 pw1 = 526 new PrintWriter( 527 new FormattedFileWriter( 528 null, 529 "Scripts, Languages, and Territories", 530 null, 531 SUPPLEMENTAL_INDEX_ANCHORS)); 532 pw1.println(tablePrinter2.toTable()); 533 pw1.close(); 534 } 535 536 private static Relation<String, String> territoryFix; 537 getTerritories(String language)538 private static Set<String> getTerritories(String language) { 539 if (territoryFix == null) { // set up the data 540 initTerritoryFix(); 541 } 542 Set<String> territories = territoryFix.getAll(language); 543 if (territories == null) { 544 territories = new TreeSet<>(); 545 } 546 return territories; 547 } 548 initTerritoryFix()549 private static void initTerritoryFix() { 550 territoryFix = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 551 Set<String> languages = supplementalDataInfo.getLanguages(); 552 LanguageTagParser ltp = new LanguageTagParser(); 553 for (String language2 : languages) { 554 if (language2.contains("_")) { 555 ltp.set(language2).getLanguage(); 556 addOfficialTerritory(ltp, language2, ltp.getLanguage()); 557 } else { 558 addOfficialTerritory(ltp, language2, language2); 559 } 560 } 561 } 562 addOfficialTerritory( LanguageTagParser ltp, String language, String baseLanguage)563 private static void addOfficialTerritory( 564 LanguageTagParser ltp, String language, String baseLanguage) { 565 // territoryFix.putAll(baseLanguage, 566 // supplementalDataInfo.getTerritoriesForPopulationData(language)); 567 Set<String> territories = supplementalDataInfo.getTerritoriesForPopulationData(language); 568 if (territories == null) { 569 return; 570 } 571 for (String territory : territories) { 572 PopulationData data = 573 supplementalDataInfo.getLanguageAndTerritoryPopulationData(language, territory); 574 OfficialStatus status = data.getOfficialStatus(); 575 if (status.isMajor()) { 576 territoryFix.put(baseLanguage, territory); 577 System.out.println( 578 "\tAdding\t" + baseLanguage + "\t" + territory + "\t" + language); 579 } 580 } 581 } 582 addLanguageScriptCells2( TablePrinter tablePrinter2, String language, String script, String territory)583 private static void addLanguageScriptCells2( 584 TablePrinter tablePrinter2, String language, String script, String territory) { 585 CLDRFile nativeLanguage = null; 586 if (SHOW_NATIVE) { 587 try { 588 nativeLanguage = cldrFactory.make(language + "_" + script + "_" + territory, true); 589 } catch (RuntimeException e) { 590 try { 591 nativeLanguage = cldrFactory.make(language + "_" + script, true); 592 } catch (RuntimeException e2) { 593 try { 594 nativeLanguage = cldrFactory.make(language, true); 595 } catch (RuntimeException e3) { 596 } 597 } 598 } 599 // check for overlap 600 if (nativeLanguage != null 601 && !script.equals("Jpan") 602 && !script.equals("Hans") 603 && !script.equals("Hant")) { 604 UnicodeSet scriptSet; 605 try { 606 String tempScript = script.equals("Kore") ? "Hang" : script; 607 scriptSet = new UnicodeSet("[:script=" + tempScript + ":]"); 608 } catch (RuntimeException e) { 609 scriptSet = new UnicodeSet(); 610 } 611 UnicodeSet exemplars = nativeLanguage.getExemplarSet("", WinningChoice.WINNING); 612 if (scriptSet.containsNone(exemplars)) { 613 System.out.println( 614 "Skipping CLDR file -- exemplars differ: " 615 + language 616 + "\t" 617 + nativeLanguage.getLocaleID() 618 + "\t" 619 + scriptSet 620 + "\t" 621 + exemplars); 622 nativeLanguage = null; 623 } 624 } 625 } 626 String languageName = english.getName(CLDRFile.LANGUAGE_NAME, language); 627 if (languageName == null) languageName = "???"; 628 String isLanguageTranslated = ""; 629 String nativeLanguageName = 630 nativeLanguage == null 631 ? null 632 : nativeLanguage.getName(CLDRFile.LANGUAGE_NAME, language); 633 if (nativeLanguageName == null || nativeLanguageName.equals(language)) { 634 nativeLanguageName = "<i>n/a</i>"; 635 isLanguageTranslated = "n"; 636 } 637 638 String scriptName = english.getName(CLDRFile.SCRIPT_NAME, script); 639 // String nativeScriptName = nativeLanguage == null ? null : 640 // nativeLanguage.getName(CLDRFile.SCRIPT_NAME,script); 641 // if (nativeScriptName != null && !nativeScriptName.equals(script)) { 642 // scriptName = nativeScriptName + "[" + scriptName + "]"; 643 // } 644 645 String isTerritoryTranslated = ""; 646 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territory); 647 String nativeTerritoryName = 648 nativeLanguage == null 649 ? null 650 : nativeLanguage.getName(CLDRFile.TERRITORY_NAME, territory); 651 if (nativeTerritoryName == null || nativeTerritoryName.equals(territory)) { 652 nativeTerritoryName = "<i>n/a</i>"; 653 isTerritoryTranslated = "n"; 654 } 655 656 // Type t = Iso639Data.getType(language); 657 // if ((s == Scope.Individual || s == Scope.Macrolanguage || s == Scope.Collection) && t == 658 // Type.Living) { 659 // // ok 660 // } else if (!language.equals("und")){ 661 // scriptModern = "N"; 662 // } 663 // String languageModern = oldLanguage.contains(t) ? "O" : language.equals("und") ? "?" : 664 // ""; 665 666 Info scriptMetatdata = ScriptMetadata.getInfo(script); 667 tablePrinter2 668 .addRow() 669 .addCell(scriptMetatdata.sampleChar) 670 .addCell(scriptName) 671 .addCell(script) 672 .addCell(isLanguageTranslated) 673 .addCell(languageName) 674 .addCell(nativeLanguageName) 675 .addCell(language) 676 .addCell(isTerritoryTranslated) 677 .addCell(territoryName) 678 .addCell(nativeTerritoryName) 679 .addCell(territory) 680 .finishRow(); 681 } 682 683 static ImmutableMap<String, String> fixScriptGif = 684 ImmutableMap.<String, String>builder() 685 .put("hangul", "hangulsyllables") 686 .put("japanese", "hiragana") 687 .put("unknown or invalid script", "unknown") 688 .put("Hant", "Hant") 689 .put("Hans", "Hans") 690 .build(); 691 getGifName(String script)692 private static String getGifName(String script) { 693 String temp = fixScriptGif.get(script); 694 if (temp != null) { 695 return temp; 696 } 697 String scriptName = english.getName(CLDRFile.SCRIPT_NAME, script); 698 scriptName = scriptName.toLowerCase(Locale.ENGLISH); 699 temp = fixScriptGif.get(scriptName); 700 if (temp != null) { 701 return temp; 702 } 703 return scriptName; 704 } 705 706 private static Set<Type> oldLanguage = 707 Collections.unmodifiableSet( 708 EnumSet.of(Type.Ancient, Type.Extinct, Type.Historical, Type.Constructed)); 709 addLanguageScriptCells( TablePrinter tablePrinter, TablePrinter tablePrinter2, String language, String script, String secondary)710 private static void addLanguageScriptCells( 711 TablePrinter tablePrinter, 712 TablePrinter tablePrinter2, 713 String language, 714 String script, 715 String secondary) { 716 try { 717 String languageName = english.getName(CLDRFile.LANGUAGE_NAME, language); 718 if (languageName == null) { 719 languageName = "¿" + language + "?"; 720 System.err.println("No English Language Name for:" + language); 721 } 722 String scriptName = english.getName(CLDRFile.SCRIPT_NAME, script); 723 if (scriptName == null) { 724 scriptName = "¿" + script + "?"; 725 System.err.println("No English Language Name for:" + script); 726 } 727 String scriptModern = 728 StandardCodes.isScriptModern(script) ? "" : script.equals("Zzzz") ? "n/a" : "N"; 729 // Scope s = Iso639Data.getScope(language); 730 Type t = Iso639Data.getType(language); 731 // if ((s == Scope.Individual || s == Scope.Macrolanguage || s == Scope.Collection) && t 732 // == Type.Living) { 733 // // ok 734 // } else if (!language.equals("und")){ 735 // scriptModern = "N"; 736 // } 737 String languageModern = 738 oldLanguage.contains(t) ? "O" : language.equals("und") ? "?" : ""; 739 740 tablePrinter 741 .addRow() 742 .addCell(languageName) 743 .addCell(language) 744 .addCell(languageModern) 745 .addCell(secondary) 746 .addCell(scriptName) 747 .addCell(script) 748 .addCell(scriptModern) 749 .finishRow(); 750 751 tablePrinter2 752 .addRow() 753 .addCell(scriptName) 754 .addCell(script) 755 .addCell(scriptModern) 756 .addCell(languageName) 757 .addCell(language) 758 .addCell(languageModern) 759 .addCell(secondary) 760 .finishRow(); 761 } catch (RuntimeException e) { 762 throw e; 763 } 764 } 765 766 // TODO This is old code that read supplemental data. Should be replaced by using 767 // SupplementalDataInfo. 768 // https://unicode-org.atlassian.net/browse/CLDR-15673 769 770 static class LanguageInfo { 771 private static final Map<String, Map<String, String>> localeAliasInfo = new TreeMap<>(); 772 773 Multimap<String, String> language_scripts = TreeMultimap.create(); 774 775 Multimap<String, String> language_territories = TreeMultimap.create(); 776 777 List<Map<String, String>> deprecatedItems = new ArrayList<>(); 778 779 Multimap<String, String> territory_languages; 780 781 Multimap<String, String> script_languages; 782 783 // Map group_contains = new TreeMap(); 784 785 Set<String[]> aliases = 786 new TreeSet<String[]>( 787 new ArrayComparator(new Comparator[] {new UTF16.StringComparator(), col})); 788 789 Comparator col3 = new ArrayComparator(new Comparator[] {col, col, col}); 790 791 Map<String, String> currency_fractions = new TreeMap<String, String>(col); 792 793 Map<String, Set> currency_territory = new TreeMap<String, Set>(col); 794 795 Map<String, Set> territory_currency = new TreeMap<String, Set>(col); 796 797 Set<String> territoriesWithCurrencies = new TreeSet<>(); 798 799 Set<String> currenciesWithTerritories = new TreeSet<>(); 800 801 Map<String, Map<String, Set<String>>> territoryData = new TreeMap<>(); 802 803 Set<String> territoryTypes = new TreeSet<>(); 804 805 Map<String, LinkedHashSet<String>> charSubstitutions = 806 new TreeMap<String, LinkedHashSet<String>>(col); 807 808 String defaultDigits = null; 809 810 Map<String, Map<String, Object>> territoryLanguageData = new TreeMap<>(); 811 812 private Relation<String, String> territoriesToModernCurrencies = 813 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class, null); 814 LanguageInfo(Factory cldrFactory)815 public LanguageInfo(Factory cldrFactory) throws IOException { 816 CLDRFile supp = cldrFactory.make(CLDRFile.SUPPLEMENTAL_NAME, false); 817 for (Iterator<String> it = supp.iterator(); it.hasNext(); ) { 818 String path = it.next(); 819 String fullPath = supp.getFullXPath(path); 820 if (fullPath == null) { 821 supp.getFullXPath(path); 822 } 823 XPathParts parts = XPathParts.getFrozenInstance(fullPath); 824 825 // <zoneItem type="America/Adak" territory="US" aliases="America/Atka US/Aleutian"/> 826 if (path.indexOf("/zoneItem") >= 0) { 827 Map<String, String> attributes = parts.getAttributes(parts.size() - 1); 828 String type = attributes.get("type"); 829 // String territory = attributes.get("territory"); 830 String aliasAttributes = attributes.get("aliases"); 831 if (aliasAttributes != null) { 832 String[] aliasesList = aliasAttributes.split("\\s+"); 833 834 for (int i = 0; i < aliasesList.length; ++i) { 835 String alias = aliasesList[i]; 836 aliases.add(new String[] {"timezone", alias, type}); 837 } 838 } 839 // TODO territory, multizone 840 continue; 841 } 842 843 if (path.indexOf("/currencyData") >= 0) { 844 if (path.indexOf("/fractions") >= 0) { 845 // <info iso4217="ADP" digits="0" rounding="0"/> 846 String element = parts.getElement(parts.size() - 1); 847 if (!element.equals("info")) 848 throw new IllegalArgumentException( 849 "Unexpected fractions element: " + element); 850 Map<String, String> attributes = parts.getAttributes(parts.size() - 1); 851 String iso4217 = attributes.get("iso4217"); 852 String digits = attributes.get("digits"); 853 String rounding = attributes.get("rounding"); 854 digits = digits + (rounding.equals("0") ? "" : " (" + rounding + ")"); 855 if (iso4217.equals("DEFAULT")) defaultDigits = digits; 856 else 857 currency_fractions.put( 858 getName(CLDRFile.CURRENCY_NAME, iso4217, false), digits); 859 continue; 860 } 861 // <region iso3166="AR"> 862 // <currency iso4217="ARS" from="1992-01-01"/> 863 if (path.indexOf("/region") >= 0) { 864 Map<String, String> attributes = parts.getAttributes(parts.size() - 2); 865 String iso3166 = attributes.get("iso3166"); 866 attributes = parts.getAttributes(parts.size() - 1); 867 String iso4217 = attributes.get("iso4217"); 868 String to = attributes.get("to"); 869 if (to == null) to = "\u221E"; 870 String from = attributes.get("from"); 871 if (from == null) from = "-\u221E"; 872 String countryName = getName(CLDRFile.TERRITORY_NAME, iso3166, false); 873 String currencyName = getName(CLDRFile.CURRENCY_NAME, iso4217, false); 874 Set info = territory_currency.get(countryName); 875 if (info == null) 876 territory_currency.put(countryName, info = new TreeSet(col3)); 877 info.add(new String[] {from, to, currencyName}); 878 info = currency_territory.get(currencyName); 879 if (info == null) 880 currency_territory.put(currencyName, info = new TreeSet(col)); 881 territoriesWithCurrencies.add(iso3166); 882 currenciesWithTerritories.add(iso4217); 883 if (to.equals("\u221E") || to.compareTo("2006") > 0) { 884 territoriesToModernCurrencies.put(iso3166, iso4217); 885 info.add("<b>" + countryName + "</b>"); 886 887 } else { 888 info.add("<i>" + countryName + "</i>"); 889 } 890 continue; 891 } 892 } 893 894 if (path.indexOf("/languageData") >= 0) { 895 Map<String, String> attributes = parts.findAttributes("language"); 896 String language = attributes.get("type"); 897 String alt = attributes.get("alt"); 898 addTokens(language, attributes.get("scripts"), " ", language_scripts); 899 // mark the territories 900 if (alt == null) 901 ; // nothing 902 else if ("secondary".equals(alt)) language += "*"; 903 else language += "*" + alt; 904 // <language type="af" scripts="Latn" territories="ZA"/> 905 addTokens(language, attributes.get("territories"), " ", language_territories); 906 continue; 907 } 908 909 if (path.indexOf("/deprecatedItems") >= 0) { 910 deprecatedItems.add(parts.findAttributes("deprecatedItems")); 911 continue; 912 } 913 if (path.indexOf("/calendarData") >= 0) { 914 Map<String, String> attributes = parts.findAttributes("calendar"); 915 if (attributes == null) { 916 System.err.println( 917 "Err: on path " 918 + fullPath 919 + " , no attributes on 'calendar'. Probably, this tool is out of date."); 920 } else { 921 String type = attributes.get("type"); 922 String territories = attributes.get("territories"); 923 if (territories == null) { 924 System.err.println( 925 "Err: on path " 926 + fullPath 927 + ", missing territories. Probably, this tool is out of date."); 928 } else if (type == null) { 929 System.err.println( 930 "Err: on path " 931 + fullPath 932 + ", missing type. Probably, this tool is out of date."); 933 } else { 934 addTerritoryInfo(territories, "calendar", type); 935 } 936 } 937 } 938 if (path.indexOf("/weekData") >= 0 || path.indexOf("measurementData") >= 0) { 939 String element = parts.getElement(parts.size() - 1); 940 Map<String, String> attributes = parts.getAttributes(parts.size() - 1); 941 // later, make this a table 942 String key = "count"; 943 String display = "Days in week (min)"; 944 boolean useTerritory = true; 945 switch (element) { 946 case "firstDay": 947 key = "day"; 948 display = "First day of week"; 949 break; 950 case "weekendStart": 951 key = "day"; 952 display = "First day of weekend"; 953 break; 954 case "weekendEnd": 955 key = "day"; 956 display = "Last day of weekend"; 957 break; 958 case "measurementSystem": 959 // <measurementSystem type="metric" territories="001"/> 960 key = "type"; 961 display = "Meas. system"; 962 break; 963 case "paperSize": 964 key = "type"; 965 display = "Paper Size"; 966 break; 967 case "weekOfPreference": 968 useTerritory = false; 969 break; 970 } 971 if (useTerritory) { 972 String type = attributes.get(key); 973 String territories = attributes.get("territories"); 974 addTerritoryInfo(territories, display, type); 975 } 976 } 977 if (path.indexOf("/generation") >= 0 || path.indexOf("/version") >= 0) continue; 978 System.out.println("Skipped Element: " + path); 979 } 980 981 for (String territory : supplementalDataInfo.getTerritoriesWithPopulationData()) { 982 for (String language : 983 supplementalDataInfo.getLanguagesForTerritoryWithPopulationData( 984 territory)) { 985 language_territories.put(language, territory); 986 } 987 } 988 territory_languages = Multimaps.invertFrom(language_territories, TreeMultimap.create()); 989 script_languages = Multimaps.invertFrom(language_scripts, TreeMultimap.create()); 990 991 // now get some metadata 992 localeAliasInfo.put("language", new TreeMap<String, String>()); 993 localeAliasInfo.put("script", new TreeMap<String, String>()); 994 localeAliasInfo.put("territory", new TreeMap<String, String>()); 995 localeAliasInfo.put("variant", new TreeMap<String, String>()); 996 localeAliasInfo.put("zone", new TreeMap<String, String>()); 997 localeAliasInfo.put("subdivision", new TreeMap<String, String>()); 998 localeAliasInfo.put("unit", new TreeMap<String, String>()); 999 localeAliasInfo.put("usage", new TreeMap<String, String>()); 1000 1001 // localeAliasInfo.get("language").put("nb", "no"); 1002 localeAliasInfo.get("language").put("zh_CN", "zh_Hans_CN"); 1003 localeAliasInfo.get("language").put("zh_SG", "zh_Hans_SG"); 1004 localeAliasInfo.get("language").put("zh_TW", "zh_Hant_TW"); 1005 localeAliasInfo.get("language").put("zh_MO", "zh_Hant_MO"); 1006 localeAliasInfo.get("language").put("zh_HK", "zh_Hant_HK"); 1007 1008 // CLDRFile supp2 = cldrFactory.make(CLDRFile.SUPPLEMENTAL_METADATA, false); 1009 Map<String, Map<String, R2<List<String>, String>>> localeAliasInfo2 = 1010 supplementalDataInfo.getLocaleAliasInfo(); 1011 for (Entry<String, Map<String, R2<List<String>, String>>> entry1 : 1012 localeAliasInfo2.entrySet()) { 1013 String element = entry1.getKey(); 1014 for (Entry<String, R2<List<String>, String>> entry2 : 1015 entry1.getValue().entrySet()) { 1016 String type = entry2.getKey(); 1017 R2<List<String>, String> replacementReason = entry2.getValue(); 1018 List<String> replacementList = replacementReason.get0(); 1019 String replacement = 1020 replacementList == null ? null : Joiner.on(" ").join(replacementList); 1021 String reason = replacementReason.get1(); 1022 if (element.equals("timezone")) { 1023 element = "zone"; 1024 } 1025 try { 1026 localeAliasInfo 1027 .get(element) 1028 .put(type, replacement == null ? "?" : replacement); 1029 } catch (Exception e) { 1030 // TODO Auto-generated catch block 1031 throw new IllegalArgumentException( 1032 "Can't find alias data for '" + element + "'", e); 1033 } 1034 1035 String name = ""; 1036 if (replacement == null) { 1037 name = "(none)"; 1038 } else if (element.equals("language")) { 1039 name = getName(replacement, false); 1040 } else if (element.equals("zone")) { 1041 element = "timezone"; 1042 name = replacement + "*"; 1043 } else { 1044 int typeCode = CLDRFile.typeNameToCode(element); 1045 if (typeCode >= 0) { 1046 name = getName(typeCode, replacement, false); 1047 } else { 1048 name = "*" + replacement; 1049 } 1050 } 1051 if (element.equals("territory")) { 1052 territoryAliases.put(type, name); 1053 aliases.add( 1054 new String[] { 1055 element, 1056 getName(CLDRFile.TERRITORY_NAME, type, false), 1057 name, 1058 reason 1059 }); 1060 } else { 1061 aliases.add(new String[] {element, type, name, reason}); 1062 } 1063 continue; 1064 } 1065 } 1066 Log.setLog(CLDRPaths.CHART_DIRECTORY + "supplemental/", "characterLog.txt"); 1067 Log.close(); 1068 } 1069 printLikelySubtags(PrintWriter index)1070 public void printLikelySubtags(PrintWriter index) throws IOException { 1071 1072 PrintWriter pw = 1073 new PrintWriter( 1074 new FormattedFileWriter( 1075 null, "Likely Subtags", null, SUPPLEMENTAL_INDEX_ANCHORS)); 1076 1077 TablePrinter tablePrinter = 1078 new TablePrinter() 1079 .addColumn( 1080 "Source Lang", "class='source'", null, "class='source'", true) 1081 .setSortPriority(1) 1082 .setSpanRows(false) 1083 .addColumn( 1084 "Source Script", "class='source'", null, "class='source'", true) 1085 .setSortPriority(0) 1086 .setSpanRows(false) 1087 .setBreakSpans(true) 1088 .addColumn( 1089 "Source Region", "class='source'", null, "class='source'", true) 1090 .setSortPriority(2) 1091 .setSpanRows(false) 1092 .addColumn( 1093 "Target Lang", "class='target'", null, "class='target'", true) 1094 .setSortPriority(3) 1095 .setBreakSpans(true) 1096 .addColumn( 1097 "Target Script", "class='target'", null, "class='target'", true) 1098 .setSortPriority(4) 1099 .addColumn( 1100 "Target Region", "class='target'", null, "class='target'", true) 1101 .setSortPriority(5) 1102 .addColumn( 1103 "Source ID", 1104 "class='source'", 1105 CldrUtility.getDoubleLinkMsg(), 1106 "class='source'", 1107 true) 1108 .addColumn("Target ID", "class='target'", null, "class='target'", true); 1109 Map<String, String> subtags = supplementalDataInfo.getLikelySubtags(); 1110 LanguageTagParser sourceParsed = new LanguageTagParser(); 1111 LanguageTagParser targetParsed = new LanguageTagParser(); 1112 for (String source : subtags.keySet()) { 1113 String target = subtags.get(source); 1114 sourceParsed.set(source); 1115 targetParsed.set(target); 1116 tablePrinter 1117 .addRow() 1118 .addCell(getName(CLDRFile.LANGUAGE_NAME, sourceParsed.getLanguage())) 1119 .addCell(getName(CLDRFile.SCRIPT_NAME, sourceParsed.getScript())) 1120 .addCell(getName(CLDRFile.TERRITORY_NAME, sourceParsed.getRegion())) 1121 .addCell(getName(CLDRFile.LANGUAGE_NAME, targetParsed.getLanguage())) 1122 .addCell(getName(CLDRFile.SCRIPT_NAME, targetParsed.getScript())) 1123 .addCell(getName(CLDRFile.TERRITORY_NAME, targetParsed.getRegion())) 1124 .addCell(source) 1125 .addCell(target) 1126 .finishRow(); 1127 } 1128 pw.println(tablePrinter.toTable()); 1129 pw.close(); 1130 } 1131 1132 static class LanguageData extends R4<Double, Double, Double, String> { LanguageData(Double a, Double b, Double c, String d)1133 public LanguageData(Double a, Double b, Double c, String d) { 1134 super(a, b, c, d); 1135 } 1136 } 1137 getName(final int type, final String value)1138 private String getName(final int type, final String value) { 1139 if (value == null || value.equals("") || value.equals("und")) { 1140 return "\u00A0"; 1141 } 1142 String result = english.getName(type, value); 1143 if (result == null) { 1144 result = value; 1145 } 1146 return result; 1147 } 1148 1149 static final Comparator INVERSE_COMPARABLE = 1150 new Comparator() { 1151 @Override 1152 public int compare(Object o1, Object o2) { 1153 return ((Comparable) o2).compareTo(o1); 1154 } 1155 }; 1156 1157 // http://www.faqs.org/rfcs/rfc2396.html 1158 // delims = "<" | ">" | "#" | "%" | <"> 1159 // "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`" 1160 // Within a query component, the characters ";", "/", "?", ":", "@", 1161 // "&", "=", "+", ",", and "$" are reserved. 1162 static final UnicodeSet ESCAPED_URI_QUERY = 1163 new UnicodeSet( 1164 "[\\u0000-\\u0020\\u007F <>#%\"\\{}|\\\\\\^\\[\\]`;/?:@\\&=+,$\\u0080-\\U0001FFFF]") 1165 .freeze(); 1166 1167 private static final int MINIMAL_BIG_VENDOR = 8; 1168 1169 static { System.out.println(new UnicodeSet(ESCAPED_URI_QUERY).complement())1170 System.out.println(new UnicodeSet(ESCAPED_URI_QUERY).complement()); 1171 } 1172 urlEncode(String input)1173 private String urlEncode(String input) { 1174 try { 1175 byte[] utf8 = input.getBytes("utf-8"); 1176 StringBuffer output = new StringBuffer(); 1177 for (int i = 0; i < utf8.length; ++i) { 1178 int b = utf8[i] & 0xFF; 1179 if (ESCAPED_URI_QUERY.contains(b)) { 1180 output.append('%'); 1181 if (b < 0x10) output.append('0'); 1182 output.append(Integer.toString(b, 16)); 1183 } else { 1184 output.append((char) b); 1185 } 1186 } 1187 return output.toString(); 1188 } catch (UnsupportedEncodingException e) { 1189 throw (IllegalArgumentException) new IllegalArgumentException().initCause(e); 1190 } 1191 } 1192 addBug( int bugNumber, String text, String from, String subject, String body)1193 private String addBug( 1194 int bugNumber, String text, String from, String subject, String body) { 1195 return "<a target='_blank' href='" + CLDRURLS.CLDR_NEWTICKET_URL + "'>" + text + "</a>"; 1196 } 1197 showLanguageCountryInfo(PrintWriter pw)1198 private void showLanguageCountryInfo(PrintWriter pw) throws IOException { 1199 FormattedFileWriter ffw = 1200 new FormattedFileWriter( 1201 null, 1202 "Language-Territory Information", 1203 null 1204 // "<div style='margin:1em'><p>The language data is provided for 1205 // localization testing, and is under development for CLDR 1.5. " 1206 // + 1207 // "To add a new territory for a language, see the <i>add new</i> links 1208 // below. " + 1209 // "For more information, see <a 1210 // href=\"territory_language_information.html\">Territory-Language 1211 // Information.</a>" 1212 // + 1213 // "<p></div>" 1214 , 1215 SUPPLEMENTAL_INDEX_ANCHORS); 1216 PrintWriter pw21 = new PrintWriter(ffw); 1217 PrintWriter pw2 = pw21; 1218 NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); 1219 nf.setGroupingUsed(true); 1220 // NumberFormat percent = new DecimalFormat("000.0%"); 1221 TablePrinter tablePrinter = 1222 new TablePrinter() 1223 // tablePrinter.setSortPriorities(0,5) 1224 .addColumn("L", "class='source'", null, "class='source'", true) 1225 .setSortPriority(0) 1226 .setBreakSpans(true) 1227 .setRepeatHeader(true) 1228 .setHidden(true) 1229 .addColumn("Language", "class='source'", null, "class='source'", true) 1230 .setSortPriority(0) 1231 .setBreakSpans(true) 1232 .addColumn( 1233 "Code", 1234 "class='source'", 1235 CldrUtility.getDoubleLinkMsg(), 1236 "class='source'", 1237 true) 1238 // .addColumn("Report Bug", "class='target'", null, "class='target'", 1239 // false) 1240 .addColumn("Territory", "class='target'", null, "class='target'", true) 1241 .addColumn( 1242 "Code", 1243 "class='target'", 1244 "<a href=\"territory_language_information.html#{0}\">{0}</a>", 1245 "class='target'", 1246 true) 1247 .addColumn( 1248 "Language Population", 1249 "class='target'", 1250 "{0,number,#,#@@}", 1251 "class='targetRight'", 1252 true) 1253 .setSortPriority(1) 1254 .setSortAscending(false) 1255 // .addColumn("Territory Population", "class='target'", "{0,number,#,##0}", 1256 // "class='targetRight'", true) 1257 // .addColumn("Language Literacy", "class='target'", "{0,number,00.0}%", 1258 // "class='targetRight'", true) 1259 // .addColumn("Territory Literacy", "class='target'", "{0,number,00.0}%", 1260 // "class='targetRight'", true) 1261 // .addColumn("Territory GDP (PPP)", "class='target'", "{0,number,#,##0}", 1262 // "class='targetRight'", true) 1263 ; 1264 TreeSet<String> languages = new TreeSet<>(); 1265 Collection<Comparable[]> data = new ArrayList<>(); 1266 String msg = "<br><i>Please click on each country code</i>"; 1267 1268 Collection<Comparable[]> plainData = new ArrayList<>(); 1269 1270 for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) { 1271 // PopulationData territoryData = 1272 // supplementalDataInfo.getPopulationDataForTerritory(territoryCode); 1273 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territoryCode); 1274 for (String languageCode : 1275 supplementalDataInfo.getLanguagesForTerritoryWithPopulationData( 1276 territoryCode)) { 1277 PopulationData languageData = 1278 supplementalDataInfo.getLanguageAndTerritoryPopulationData( 1279 languageCode, territoryCode); 1280 languages.add(languageCode); 1281 Comparable[] items = 1282 new Comparable[] { 1283 getFirstPrimaryWeight(getLanguageName(languageCode)), 1284 getLanguageName(languageCode), // + getLanguagePluralMessage(msg, 1285 // languageCode), 1286 languageCode, 1287 // bug, 1288 territoryName + getOfficialStatus(territoryCode, languageCode), 1289 territoryCode, 1290 languageData.getPopulation(), 1291 // population, 1292 // languageliteracy, 1293 // territoryLiteracy, 1294 // gdp 1295 }; 1296 Comparable[] plainItems = 1297 new Comparable[] { 1298 getLanguageName(languageCode), // + getLanguagePluralMessage(msg, 1299 // languageCode), 1300 languageCode, 1301 territoryName, 1302 territoryCode, 1303 getRawOfficialStatus(territoryCode, languageCode), 1304 languageData.getPopulation(), 1305 languageData.getLiteratePopulation() 1306 }; 1307 1308 data.add(items); 1309 plainData.add(plainItems); 1310 } 1311 } 1312 for (String languageCode : languages) { 1313 Comparable[] items = 1314 new Comparable[] { 1315 getFirstPrimaryWeight(getLanguageName(languageCode)), 1316 getLanguageName( 1317 languageCode), // + getLanguagePluralMessage(msg, languageCode), 1318 languageCode, 1319 // bug, 1320 addBug( 1321 1217, 1322 "<i>add new</i>", 1323 "<email>", 1324 "Add territory to " 1325 + getLanguageName(languageCode) 1326 + " (" 1327 + languageCode 1328 + ")", 1329 "<territory, speaker population in territory, and references>"), 1330 "", 1331 0.0d, 1332 // 0.0d, 1333 // 0.0d, 1334 // 0.0d, 1335 // gdp 1336 }; 1337 data.add(items); 1338 } 1339 Comparable[][] flattened = data.toArray(new Comparable[data.size()][]); 1340 String value = tablePrinter.addRows(flattened).toTable(); 1341 pw2.println(value); 1342 pw2.close(); 1343 try (PrintWriter pw21plain = 1344 FileUtilities.openUTF8Writer(ffw.getDir(), ffw.getBaseFileName() + ".txt")) { 1345 for (Comparable[] row : plainData) { 1346 pw21plain.println(Joiner.on("\t").join(row)); 1347 } 1348 } 1349 } 1350 getLanguagePluralMessage(String msg, String languageCode)1351 private String getLanguagePluralMessage(String msg, String languageCode) { 1352 String mainLanguageCode = new LanguageTagParser().set(languageCode).getLanguage(); 1353 String messageWithPlurals = 1354 msg 1355 + ", on <a href='language_plural_rules.html#" 1356 + mainLanguageCode 1357 + "'>plurals</a>" 1358 + ", and on <a href='likely_subtags.html#" 1359 + mainLanguageCode 1360 + "'>likely-subtags</a>"; 1361 return messageWithPlurals; 1362 } 1363 getLanguageName(String languageCode)1364 private String getLanguageName(String languageCode) { 1365 String result = english.getName(languageCode); 1366 if (!result.equals(languageCode)) return result; 1367 Set<String> names = Iso639Data.getNames(languageCode); 1368 if (names != null && names.size() != 0) { 1369 return names.iterator().next(); 1370 } 1371 return languageCode; 1372 } 1373 1374 static final Set<Organization> TC_Vendors = 1375 ImmutableSet.of( 1376 Organization.apple, 1377 Organization.google, 1378 Organization.microsoft, 1379 Organization.cldr); 1380 showCoverageGoals(PrintWriter pw)1381 private void showCoverageGoals(PrintWriter pw) throws IOException { 1382 PrintWriter pw2 = 1383 new PrintWriter( 1384 new FormattedFileWriter( 1385 null, 1386 "Coverage Goals", 1387 null 1388 // "<p>" + 1389 // "The following show default coverage goals for larger 1390 // organizations. " + 1391 // "<i>[n/a]</i> shows where there is no specific value for a 1392 // given organization, " + 1393 // "while <i>(...)</i> indicates that the goal is inherited from 1394 // the parent. " + 1395 // "A * is added if the goal differs from the parent locale's 1396 // goal. " + 1397 // "For information on what these goals mean (comprehensive, 1398 // modern, moderate,...), see the LDML specification " 1399 // + 1400 // "<a 1401 // href='http://www.unicode.org/reports/tr35/#Coverage_Levels'>Appendix M: Coverage Levels</a>. " + 1402 // + 1403 // "</p>" 1404 , 1405 null)); 1406 1407 TablePrinter tablePrinter = 1408 new TablePrinter() 1409 // tablePrinter.setSortPriorities(0,4) 1410 .addColumn("Language", "class='source'", null, "class='source'", false) 1411 .setSortPriority(0) 1412 .setBreakSpans(false) 1413 .addColumn( 1414 "Code", 1415 "class='source'", 1416 "<a href=\"http://www.unicode.org/cldr/data/common/main/{0}.xml\">{0}</a>", 1417 "class='source'", 1418 false) 1419 .addColumn("D. Votes", "class='target'", null, "class='target'", false); 1420 1421 Map<Organization, Map<String, Level>> vendordata = sc.getLocaleTypes(); 1422 Set<String> locales = new TreeSet<>(); 1423 Set<Organization> vendors = new LinkedHashSet<>(); 1424 Set<Organization> smallVendors = new LinkedHashSet<>(); 1425 1426 for (Organization organization : TC_Vendors) { 1427 // if (vendor.equals(Organization.java)) continue; 1428 Map<String, Level> data = vendordata.get(organization); 1429 vendors.add(organization); 1430 tablePrinter 1431 .addColumn( 1432 organization.getDisplayName(), 1433 "class='target'", 1434 null, 1435 "class='target'", 1436 false) 1437 .setSpanRows(false); 1438 locales.addAll(data.keySet()); 1439 } 1440 1441 for (Entry<Organization, Map<String, Level>> vendorData : vendordata.entrySet()) { 1442 Organization vendor = vendorData.getKey(); 1443 if (!TC_Vendors.contains(vendor)) { 1444 smallVendors.add(vendor); 1445 continue; 1446 } 1447 } 1448 1449 Collection<Comparable[]> data = new ArrayList<>(); 1450 List<String> list = new ArrayList<>(); 1451 LanguageTagParser ltp = new LanguageTagParser(); 1452 // String alias2 = getAlias("sh_YU"); 1453 1454 pw2.append("<h2>TC Orgs</h2>"); 1455 1456 for (String locale : locales) { 1457 list.clear(); 1458 String localeCode = locale.equals("*") ? "und" : locale; 1459 String alias = getAlias(localeCode); 1460 if (!alias.equals(localeCode)) { 1461 throw new IllegalArgumentException( 1462 "Should use canonical form: " + locale + " => " + alias); 1463 } 1464 String baseLang = ltp.set(localeCode).getLanguage(); 1465 String baseLangName = getLanguageName(baseLang); 1466 list.add("und".equals(localeCode) ? "other" : baseLangName); 1467 list.add(locale); 1468 int defaultVotes = 1469 supplementalDataInfo.getRequiredVotes(CLDRLocale.getInstance(locale), null); 1470 list.add(String.valueOf(defaultVotes)); 1471 for (Organization vendor : vendors) { 1472 String status = getVendorStatus(locale, vendor, vendordata); 1473 // if (!baseLang.equals(locale) && !status.startsWith("<")) { 1474 // String langStatus = getVendorStatus(baseLang, vendor, 1475 // vendordata); 1476 // if (!langStatus.equals(status)) { 1477 // status += "*"; 1478 // } 1479 // } 1480 list.add(status); 1481 } 1482 data.add(list.toArray(new String[list.size()])); 1483 } 1484 Comparable[][] flattened = data.toArray(new Comparable[data.size()][]); 1485 String value = tablePrinter.addRows(flattened).toTable(); 1486 pw2.println(value); 1487 1488 pw2.append("<h2>Others</h2><div align='left'><ul>"); 1489 1490 for (Organization vendor2 : smallVendors) { 1491 pw2.append("<li><b>"); 1492 pw2.append(TransliteratorUtilities.toHTML.transform(vendor2.getDisplayName())) 1493 .append(": </b>"); 1494 boolean first1 = true; 1495 for (Level level : Level.values()) { 1496 boolean first2 = true; 1497 Level other = null; 1498 for (Entry<String, Level> data2 : vendordata.get(vendor2).entrySet()) { 1499 String key = data2.getKey(); 1500 Level level2 = data2.getValue(); 1501 if (level != level2) { 1502 continue; 1503 } 1504 if (key.equals("*")) { 1505 other = level2; 1506 continue; 1507 } 1508 if (first2) { 1509 if (first1) { 1510 first1 = false; 1511 } else { 1512 pw2.append("; "); 1513 } 1514 pw2.append(level2.toString()).append(": "); 1515 first2 = false; 1516 } else { 1517 pw2.append(", "); 1518 } 1519 pw2.append(TransliteratorUtilities.toHTML.transform(key)); 1520 } 1521 if (other != null) { 1522 if (first2) { 1523 if (first1) { 1524 first1 = false; 1525 } else { 1526 pw2.append("; "); 1527 } 1528 pw2.append(level.toString()).append(": "); 1529 first2 = false; 1530 } else { 1531 pw2.append(", "); 1532 } 1533 pw2.append("<i>other</i>"); 1534 } 1535 } 1536 pw2.append("</li>"); 1537 } 1538 pw2.append("</ul></div>"); 1539 pw2.close(); 1540 } 1541 1542 LanguageTagParser lpt2 = new LanguageTagParser(); 1543 1544 // TODO replace this with standard call. 1545 getAlias(String locale)1546 private String getAlias(String locale) { 1547 lpt2.set(locale); 1548 locale = lpt2.toString(); // normalize 1549 // String language = lpt2.getLanguage(); 1550 String script = lpt2.getScript(); 1551 String region = lpt2.getRegion(); 1552 // List variants = lpt2.getVariants(); 1553 String temp; 1554 for (String old : localeAliasInfo.get("language").keySet()) { 1555 if (locale.startsWith(old)) { 1556 // the above is a rough check, and will fail with old=moh and locale=mo 1557 if (!locale.equals(old) && !locale.startsWith(old + "_")) { 1558 continue; 1559 } 1560 temp = localeAliasInfo.get("language").get(old); 1561 lpt2.setLanguage(temp.split("\\s+")[0] + locale.substring(old.length())); 1562 break; 1563 } 1564 } 1565 temp = localeAliasInfo.get("script").get(script); 1566 if (temp != null) { 1567 lpt2.setScript(temp.split("\\s+")[0]); 1568 } 1569 temp = localeAliasInfo.get("territory").get(region); 1570 if (temp != null) { 1571 lpt2.setRegion(temp.split("\\s+")[0]); 1572 } 1573 return lpt2.toString(); 1574 } 1575 getVendorStatus( String locale, Organization vendor, Map<Organization, Map<String, Level>> vendordata)1576 private String getVendorStatus( 1577 String locale, 1578 Organization vendor, 1579 Map<Organization, Map<String, Level>> vendordata) { 1580 Level statusLevel = vendordata.get(vendor).get(locale); 1581 return statusLevel == null ? "" : statusLevel.toString(); 1582 // String status = statusLevel == null ? null : statusLevel.toString(); 1583 // String curLocale = locale; 1584 // while (status == null) { 1585 // curLocale = LocaleIDParser.getParent(curLocale); 1586 // if ("root".equals(curLocale)) { 1587 // status = " "; 1588 // break; 1589 // } 1590 // statusLevel = vendordata.get(vendor).get(curLocale); 1591 // if (statusLevel != null) { 1592 // status = statusLevel + "†"; 1593 // } 1594 // } 1595 // return status; 1596 } 1597 showCountryLanguageInfo(PrintWriter pw)1598 private void showCountryLanguageInfo(PrintWriter pw) throws IOException { 1599 PrintWriter pw21 = 1600 new PrintWriter( 1601 new FormattedFileWriter( 1602 null, 1603 "Territory-Language Information", 1604 null, 1605 SUPPLEMENTAL_INDEX_ANCHORS)); 1606 PrintWriter pw2 = pw21; 1607 NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); 1608 nf.setGroupingUsed(true); 1609 // NumberFormat percent = new DecimalFormat("000.0%"); 1610 TablePrinter tablePrinter = 1611 new TablePrinter() 1612 // tablePrinter.setSortPriorities(0,4) 1613 .addColumn("T", "class='source'", null, "class='source'", true) 1614 .setSortPriority(0) 1615 .setBreakSpans(true) 1616 .setRepeatHeader(true) 1617 .setHidden(true) 1618 .addColumn("Territory", "class='source'", null, "class='source'", true) 1619 .setSortPriority(0) 1620 .setBreakSpans(true) 1621 .addColumn( 1622 "Code", 1623 "class='source'", 1624 CldrUtility.getDoubleLinkMsg(), 1625 "class='source'", 1626 true) 1627 .addColumn( 1628 "Terr. Literacy", 1629 "class='target'", 1630 "{0,number,@@}%", 1631 "class='targetRight'", 1632 true); 1633 1634 tablePrinter 1635 .addColumn("Language", "class='target'", null, "class='target'", false) 1636 .addColumn( 1637 "Code", 1638 "class='target'", 1639 "<a href=\"language_territory_information.html#{0}\">{0}</a>", 1640 "class='target'", 1641 false) 1642 .addColumn( 1643 "Lang. Pop.", 1644 "class='target'", 1645 "{0,number,#,#@@}", 1646 "class='targetRight'", 1647 true) 1648 .addColumn( 1649 "Pop.%", 1650 "class='target'", "{0,number,@@}%", "class='targetRight'", true) 1651 .setSortAscending(false) 1652 .setSortPriority(1) 1653 .addColumn( 1654 "Literacy%", 1655 "class='target'", "{0,number,@@}%", "class='targetRight'", true) 1656 .addColumn( 1657 "Written%", 1658 "class='target'", "{0,number,@@}%", "class='targetRight'", true) 1659 .addColumn("Report Bug", "class='target'", null, "class='target'", false); 1660 1661 for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) { 1662 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territoryCode); 1663 PopulationData territoryData2 = 1664 supplementalDataInfo.getPopulationDataForTerritory(territoryCode); 1665 double territoryLiteracy = territoryData2.getLiteratePopulationPercent(); 1666 1667 for (String languageCode : 1668 supplementalDataInfo.getLanguagesForTerritoryWithPopulationData( 1669 territoryCode)) { 1670 PopulationData languageData = 1671 supplementalDataInfo.getLanguageAndTerritoryPopulationData( 1672 languageCode, territoryCode); 1673 double languagePopulationPercent = 1674 100 * languageData.getPopulation() / territoryData2.getPopulation(); 1675 double languageliteracy = languageData.getLiteratePopulationPercent(); 1676 double writingFrequency = languageData.getWritingPercent(); 1677 1678 tablePrinter 1679 .addRow() 1680 .addCell(getFirstPrimaryWeight(territoryName)) 1681 .addCell(territoryName) 1682 .addCell(territoryCode) 1683 .addCell(territoryLiteracy) 1684 .addCell( 1685 getLanguageName(languageCode) 1686 + getOfficialStatus(territoryCode, languageCode)) 1687 .addCell(languageCode) 1688 .addCell(languageData.getPopulation()) 1689 .addCell(languagePopulationPercent) 1690 .addCell(languageliteracy) 1691 .addCell(writingFrequency) 1692 .addCell( 1693 addBug( 1694 1217, 1695 "<i>bug</i>", 1696 "<email>", 1697 "Fix info for " 1698 + getLanguageName(languageCode) 1699 + " (" 1700 + languageCode 1701 + ")" 1702 + " in " 1703 + territoryName 1704 + " (" 1705 + territoryCode 1706 + ")", 1707 "<fixed data for territory, plus references>")) 1708 .finishRow(); 1709 } 1710 1711 tablePrinter 1712 .addRow() 1713 .addCell(getFirstPrimaryWeight(territoryName)) 1714 .addCell(territoryName) 1715 .addCell(territoryCode) 1716 .addCell(territoryLiteracy) 1717 .addCell( 1718 addBug( 1719 1217, 1720 "<i>add new</i>", 1721 "<email>", 1722 "Add language to " 1723 + territoryName 1724 + "(" 1725 + territoryCode 1726 + ")", 1727 "<language, speaker pop. and literacy in territory, plus references>")) 1728 .addCell("") 1729 .addCell(0.0d) 1730 .addCell(0.0d) 1731 .addCell(0.0d) 1732 .addCell(0.0d) 1733 .addCell("") 1734 .finishRow(); 1735 } 1736 String value = tablePrinter.toTable(); 1737 pw2.println(value); 1738 pw2.close(); 1739 } 1740 showCountryInfo(PrintWriter pw)1741 private void showCountryInfo(PrintWriter pw) throws IOException { 1742 PrintWriter pw21 = 1743 new PrintWriter( 1744 new FormattedFileWriter( 1745 null, 1746 "Territory Information", 1747 null, 1748 SUPPLEMENTAL_INDEX_ANCHORS)); 1749 PrintWriter pw2 = pw21; 1750 NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); 1751 nf.setGroupingUsed(true); 1752 // NumberFormat percent = new DecimalFormat("000.0%"); 1753 TablePrinter tablePrinter = 1754 new TablePrinter() 1755 // tablePrinter.setSortPriorities(0,4) 1756 .addColumn("T", "class='source'", null, "class='source'", true) 1757 .setSortPriority(0) 1758 .setBreakSpans(true) 1759 .setRepeatHeader(true) 1760 .setHidden(true) 1761 .addColumn("Territory", "class='source'", null, "class='source'", true) 1762 .setSortPriority(0) 1763 .setBreakSpans(true) 1764 .addColumn( 1765 "Code", 1766 "class='source'", 1767 CldrUtility.getDoubleLinkMsg(), 1768 "class='source'", 1769 true) 1770 .addColumn( 1771 "Terr. Pop (M)", 1772 "class='target'", 1773 "{0,number,#,#@@}", 1774 "class='targetRight'", 1775 true) 1776 .addColumn( 1777 "Terr. GDP ($M PPP)", 1778 "class='target'", 1779 "{0,number,#,#@@}", 1780 "class='targetRight'", 1781 true) 1782 .addColumn( 1783 "Currencies (2006...)", 1784 "class='target'", 1785 null, 1786 "class='target'", 1787 true); 1788 for (Iterator<String> it = territoryTypes.iterator(); it.hasNext(); ) { 1789 String header = it.next(); 1790 if (header.equals("calendar")) header = "calendar (+gregorian)"; 1791 tablePrinter 1792 .addColumn(header) 1793 .setHeaderAttributes("class='target'") 1794 .setCellAttributes("class='target'") 1795 .setSpanRows(true); 1796 } 1797 1798 tablePrinter.addColumn("Report Bug", "class='target'", null, "class='target'", false); 1799 1800 for (String territoryCode : supplementalDataInfo.getTerritoriesWithPopulationData()) { 1801 String territoryName = english.getName(CLDRFile.TERRITORY_NAME, territoryCode); 1802 PopulationData territoryData2 = 1803 supplementalDataInfo.getPopulationDataForTerritory(territoryCode); 1804 double population = territoryData2.getPopulation() / 1000000; 1805 double gdp = territoryData2.getGdp() / 1000000; 1806 1807 Map<String, Set<String>> worldData = 1808 territoryData.get(getName(CLDRFile.TERRITORY_NAME, "001", false)); 1809 Map<String, Set<String>> countryData = 1810 territoryData.get(getName(CLDRFile.TERRITORY_NAME, territoryCode, false)); 1811 1812 tablePrinter 1813 .addRow() 1814 .addCell(getFirstPrimaryWeight(territoryName)) 1815 .addCell(territoryName) 1816 .addCell(territoryCode) 1817 .addCell(population) 1818 .addCell(gdp) 1819 .addCell(getCurrencyNames(territoryCode)); 1820 1821 addOtherCountryData(tablePrinter, worldData, countryData); 1822 1823 tablePrinter 1824 .addCell( 1825 addBug( 1826 1217, 1827 "<i>bug</i>", 1828 "<email>", 1829 "Fix info for " 1830 + territoryName 1831 + " (" 1832 + territoryCode 1833 + ")", 1834 "<fixed data for territory, plus references>")) 1835 .finishRow(); 1836 } 1837 String value = tablePrinter.toTable(); 1838 pw2.println(value); 1839 pw2.close(); 1840 } 1841 1842 static Normalizer2 nfd = Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE); 1843 1844 // Do just an approximation for now 1845 getFirstPrimaryWeight(String territoryName)1846 private String getFirstPrimaryWeight(String territoryName) { 1847 char first = territoryName.charAt(0); 1848 String result = nfd.getDecomposition(first); 1849 if (result == null) { 1850 return UTF16.valueOf(first); 1851 } 1852 return UTF16.valueOf(result.codePointAt(0)); 1853 } 1854 1855 // private String getTerritoryWithLikelyLink(String territoryCode) { 1856 // return "<a href='likely_subtags.html#und_"+ territoryCode + "'>" + territoryCode + 1857 // "</a>"; 1858 // } 1859 getOfficialStatus(String territoryCode, String languageCode)1860 private String getOfficialStatus(String territoryCode, String languageCode) { 1861 PopulationData x = 1862 supplementalDataInfo.getLanguageAndTerritoryPopulationData( 1863 languageCode, territoryCode); 1864 if (x == null || x.getOfficialStatus() == OfficialStatus.unknown) return ""; 1865 return " <span title='" 1866 + x.getOfficialStatus().toString().replace('_', ' ') 1867 + "'>{" 1868 + x.getOfficialStatus().toShortString() 1869 + "}</span>"; 1870 } 1871 getRawOfficialStatus(String territoryCode, String languageCode)1872 private String getRawOfficialStatus(String territoryCode, String languageCode) { 1873 PopulationData x = 1874 supplementalDataInfo.getLanguageAndTerritoryPopulationData( 1875 languageCode, territoryCode); 1876 if (x == null || x.getOfficialStatus() == OfficialStatus.unknown) return ""; 1877 return x.getOfficialStatus().toString(); 1878 } 1879 addOtherCountryData( TablePrinter tablePrinter, Map<String, Set<String>> worldData, Map<String, Set<String>> countryData)1880 private void addOtherCountryData( 1881 TablePrinter tablePrinter, 1882 Map<String, Set<String>> worldData, 1883 Map<String, Set<String>> countryData) { 1884 for (Iterator<String> it2 = territoryTypes.iterator(); it2.hasNext(); ) { 1885 String type = it2.next(); 1886 Set<String> worldResults = worldData.get(type); 1887 Set<String> territoryResults = null; 1888 if (countryData != null) { 1889 territoryResults = countryData.get(type); 1890 } 1891 if (territoryResults == null) { 1892 territoryResults = worldResults; 1893 } 1894 String out = ""; 1895 if (territoryResults != null) { 1896 out = territoryResults + ""; 1897 out = out.substring(1, out.length() - 1); // remove [ and ] 1898 } 1899 tablePrinter.addCell(out); 1900 } 1901 } 1902 getCurrencyNames(String territoryCode)1903 private String getCurrencyNames(String territoryCode) { 1904 Set<String> currencies = territoriesToModernCurrencies.getAll(territoryCode); 1905 if (currencies == null || currencies.size() == 0) return ""; 1906 StringBuilder buffer = new StringBuilder(); 1907 for (String code : currencies) { 1908 if (buffer.length() != 0) buffer.append(",<br>"); 1909 buffer.append(getName(CLDRFile.CURRENCY_NAME, code, false)); 1910 } 1911 return buffer.toString(); 1912 } 1913 addCharSubstitution(String value, String substitute)1914 private void addCharSubstitution(String value, String substitute) { 1915 if (substitute.equals(value)) return; 1916 LinkedHashSet<String> already = charSubstitutions.get(value); 1917 if (already == null) charSubstitutions.put(value, already = new LinkedHashSet<>(0)); 1918 already.add(substitute); 1919 Log.logln(hex(value, " ") + "; " + hex(substitute, " ")); 1920 } 1921 1922 /** */ 1923 // public void showTerritoryInfo() { 1924 // Map territory_parent = new TreeMap(); 1925 // gather("001", territory_parent); 1926 // for (Iterator it = territory_parent.keySet().iterator(); it.hasNext();) { 1927 // String territory = (String) it.next(); 1928 // String parent = (String) territory_parent.get(territory); 1929 // System.out.println(territory + "\t" + 1930 // english.getName(english.TERRITORY_NAME, territory) + "\t" 1931 // + parent + "\t" + english.getName(english.TERRITORY_NAME, parent)); 1932 // } 1933 // } 1934 1935 // private void gather(String item, Map territory_parent) { 1936 // Collection containedByItem = (Collection) group_contains.get(item); 1937 // if (containedByItem == null) 1938 // return; 1939 // for (Iterator it = containedByItem.iterator(); it.hasNext();) { 1940 // String contained = (String) it.next(); 1941 // territory_parent.put(contained, item); 1942 // gather(contained, territory_parent); 1943 // } 1944 // } 1945 addTerritoryInfo(String territoriesList, String type, String info)1946 private void addTerritoryInfo(String territoriesList, String type, String info) { 1947 String[] territories = territoriesList.split("\\s+"); 1948 territoryTypes.add(type); 1949 for (int i = 0; i < territories.length; ++i) { 1950 String territory = getName(CLDRFile.TERRITORY_NAME, territories[i], false); 1951 Map<String, Set<String>> s = territoryData.get(territory); 1952 if (s == null) { 1953 territoryData.put(territory, s = new TreeMap<>()); 1954 } 1955 Set<String> ss = s.get(type); 1956 if (ss == null) { 1957 s.put(type, ss = new TreeSet<>()); 1958 } 1959 ss.add(info); 1960 } 1961 } 1962 showCalendarData(PrintWriter pw0)1963 public void showCalendarData(PrintWriter pw0) throws IOException { 1964 PrintWriter pw = 1965 new PrintWriter( 1966 new FormattedFileWriter( 1967 null, 1968 "Other Territory Data", 1969 null, 1970 SUPPLEMENTAL_INDEX_ANCHORS)); 1971 pw.println("<table>"); 1972 pw.println("<tr><th class='source'>Territory</th>"); 1973 for (Iterator<String> it = territoryTypes.iterator(); it.hasNext(); ) { 1974 String header = it.next(); 1975 if (header.equals("calendar")) header = "calendar (+gregorian)"; 1976 pw.println("<th class='target'>" + header + "</th>"); 1977 } 1978 pw.println("</tr>"); 1979 1980 String worldName = getName(CLDRFile.TERRITORY_NAME, "001", false); 1981 Map<String, Set<String>> worldData = territoryData.get(worldName); 1982 for (Iterator<String> it = territoryData.keySet().iterator(); it.hasNext(); ) { 1983 String country = it.next(); 1984 if (country.equals(worldName)) continue; 1985 showCountry(pw, country, country, worldData); 1986 } 1987 showCountry(pw, worldName, "Other", worldData); 1988 pw.println("</table>"); 1989 pw.close(); 1990 } 1991 showCountry( PrintWriter pw, String country, String countryTitle, Map<String, Set<String>> worldData)1992 private void showCountry( 1993 PrintWriter pw, 1994 String country, 1995 String countryTitle, 1996 Map<String, Set<String>> worldData) { 1997 pw.println("<tr><td class='source'>" + countryTitle + "</td>"); 1998 Map<String, Set<String>> data = territoryData.get(country); 1999 for (Iterator<String> it2 = territoryTypes.iterator(); it2.hasNext(); ) { 2000 String type = it2.next(); 2001 String target = "target"; 2002 Set<String> results = data.get(type); 2003 Set<String> worldResults = worldData.get(type); 2004 if (results == null) { 2005 results = worldResults; 2006 target = "target2"; 2007 } else if (results.equals(worldResults)) { 2008 target = "target2"; 2009 } 2010 String out = ""; 2011 if (results != null) { 2012 out = results + ""; 2013 out = out.substring(1, out.length() - 1); // remove [ and ] 2014 } 2015 pw.println("<td class='" + target + "'>" + out + "</td>"); 2016 } 2017 pw.println("</tr>"); 2018 } 2019 showCorrespondances()2020 public void showCorrespondances() { 2021 // show correspondances between language and script 2022 Map<String, String> name_script = new TreeMap<>(); 2023 for (Iterator<String> it = sc.getAvailableCodes("script").iterator(); it.hasNext(); ) { 2024 String script = it.next(); 2025 String name = english.getName(CLDRFile.SCRIPT_NAME, script); 2026 if (name == null) name = script; 2027 name_script.put(name, script); 2028 /* 2029 * source == CLDRFile.TERRITORY_NAME && target == CLDRFile.LANGUAGE_NAME ? territory_languages 2030 * : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.TERRITORY_NAME ? language_territories 2031 * : source == CLDRFile.SCRIPT_NAME && target == CLDRFile.LANGUAGE_NAME ? script_languages 2032 * : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.SCRIPT_NAME ? language_scripts 2033 */ } 2034 String delimiter = "\\P{L}+"; 2035 Map<String, String> name_language = new TreeMap<>(); 2036 for (Iterator<String> it = sc.getAvailableCodes("language").iterator(); 2037 it.hasNext(); ) { 2038 String language = it.next(); 2039 String names = english.getName(CLDRFile.LANGUAGE_NAME, language); 2040 if (names == null) names = language; 2041 name_language.put(names, language); 2042 } 2043 for (Iterator<String> it = sc.getAvailableCodes("language").iterator(); 2044 it.hasNext(); ) { 2045 String language = it.next(); 2046 String names = english.getName(CLDRFile.LANGUAGE_NAME, language); 2047 if (names == null) names = language; 2048 String[] words = names.split(delimiter); 2049 if (words.length > 1) { 2050 // System.out.println(names); 2051 } 2052 for (int i = 0; i < words.length; ++i) { 2053 String name = words[i]; 2054 String script = name_script.get(name); 2055 if (script != null) { 2056 Set<String> langSet = (Set<String>) script_languages.asMap().get(script); 2057 if (langSet != null && langSet.contains(language)) System.out.print("*"); 2058 System.out.println( 2059 "\t" + name + " [" + language + "]\t=> " + name + " [" + script 2060 + "]"); 2061 } else { 2062 String language2 = name_language.get(name); 2063 if (language2 != null && !language.equals(language2)) { 2064 Set<String> langSet = (Set<String>) language_scripts.get(language); 2065 if (langSet != null) System.out.print("*"); 2066 System.out.print( 2067 "?\tSame script?\t + " 2068 + getName(CLDRFile.LANGUAGE_NAME, language, false) 2069 + "\t & " 2070 + getName(CLDRFile.LANGUAGE_NAME, language2, false)); 2071 langSet = (Set<String>) language_scripts.get(language2); 2072 if (langSet != null) System.out.print("*"); 2073 System.out.println(); 2074 } 2075 } 2076 } 2077 } 2078 } 2079 2080 /** 2081 * @throws IOException 2082 */ printCurrency(PrintWriter index)2083 public void printCurrency(PrintWriter index) throws IOException { 2084 PrintWriter pw = 2085 new PrintWriter( 2086 new FormattedFileWriter( 2087 null, 2088 "Detailed Territory-Currency Information", 2089 null 2090 // "<p>The following table shows when currencies were in use in 2091 // different countries. " + 2092 // "See also <a href='#format_info'>Decimal Digits and 2093 // Rounding</a>. " + 2094 // "To correct any information here, please file a " + 2095 // addBug(1274, "bug", "<email>", "Currency Bug", 2096 // "<currency, country, and references supporting change>") + 2097 // ".</p>" 2098 , 2099 SUPPLEMENTAL_INDEX_ANCHORS)); 2100 String section1 = "Territory to Currency"; 2101 String section2 = "Decimal Digits and Rounding"; 2102 showContents(pw, "territory_currency", section1, "format_info", section2); 2103 2104 pw.println( 2105 "<h2>" 2106 + CldrUtility.getDoubleLinkedText( 2107 "territory_currency", "1. " + section1) 2108 + "</h2>"); 2109 2110 // doTitle(pw, "Territory \u2192 Currency"); 2111 pw.println("<table>"); 2112 pw.println( 2113 "<tr><th class='source'>Territory</th>" 2114 + "<th class='source'>Code</th>" 2115 + "<th class='target'>From</th>" 2116 + "<th class='target'>To</th>" 2117 + "<th class='target'>Currency</th>" 2118 + "<th class='target'>Name</th>" 2119 + "</tr>"); 2120 2121 Relation<String, String> currencyToTerritory = 2122 Relation.of(new HashMap<String, Set<String>>(), HashSet.class); 2123 Relation<String, String> modernCurrencyToTerritory = 2124 Relation.of(new HashMap<String, Set<String>>(), HashSet.class); 2125 2126 for (Entry<String, String> nameCode : NAME_TO_REGION.entrySet()) { 2127 String name = nameCode.getKey(); 2128 String regionCode = nameCode.getValue(); 2129 if (!StandardCodes.isCountry(regionCode)) { 2130 continue; 2131 } 2132 if (sc.isLstregPrivateUse("region", regionCode)) { 2133 continue; 2134 } 2135 Set<CurrencyDateInfo> info = supplementalDataInfo.getCurrencyDateInfo(regionCode); 2136 2137 int infoSize = 1; 2138 if (info != null) { 2139 infoSize = info.size(); 2140 } 2141 pw.println( 2142 "<tr>" 2143 + "<td class='source' rowSpan='" 2144 + infoSize 2145 + "'>" 2146 + name 2147 + "</td>" 2148 + "<td class='source' rowSpan='" 2149 + infoSize 2150 + "'>" 2151 + CldrUtility.getDoubleLinkedText(regionCode) 2152 + "</td>"); 2153 if (info == null) { 2154 pw.println( 2155 "<td class='target'>" 2156 + "<i>na</i>" 2157 + "</td>" 2158 + "<td class='target'>" 2159 + "<i>na</i>" 2160 + "</td>" 2161 + "<td class='target'>" 2162 + "<i>na</i>" 2163 + "</td>" 2164 + "<td class='target'>" 2165 + "<i>na</i>" 2166 + "</td>" 2167 + "</tr>"); 2168 continue; 2169 } 2170 boolean first = true; 2171 for (CurrencyDateInfo infoItem : info) { 2172 Date endData = infoItem.getEnd(); 2173 if (endData.equals(CurrencyDateInfo.END_OF_TIME)) { 2174 modernCurrencyToTerritory.put( 2175 infoItem.getCurrency(), getTerritoryName(regionCode)); 2176 } else { 2177 currencyToTerritory.put( 2178 infoItem.getCurrency(), getTerritoryName(regionCode)); 2179 } 2180 if (first) first = false; 2181 else pw.println("<tr>"); 2182 pw.println( 2183 "<td class='target'>" 2184 + CurrencyDateInfo.formatDate(infoItem.getStart()) 2185 + "</td>" 2186 + "<td class='target'>" 2187 + CurrencyDateInfo.formatDate(endData) 2188 + "</td>" 2189 + "<td class='target'>" 2190 + infoItem.getCurrency() 2191 + "</td>" 2192 + "<td class='target'>" 2193 + english.getName("currency", infoItem.getCurrency()) 2194 + "</td>" 2195 + "</tr>"); 2196 } 2197 } 2198 // doFooter(pw); 2199 // pw.close(); 2200 // pw = new PrintWriter(new FormattedFileWriter(index, "Currency Format Info", null)); 2201 pw.write("</table>"); 2202 2203 pw.println( 2204 "<h2>" 2205 + CldrUtility.getDoubleLinkedText("format_info", "2. " + section2) 2206 + "</h2>"); 2207 2208 pw.write( 2209 "<p>This table shows the number of digits used for each currency, " 2210 + " and the countries where it is or was in use. " 2211 + "Countries where the currency is in current use are bolded. " 2212 + "If the currency uses ‘nickel rounding’ in transactions, the digits are followed by ‘(5)’. " 2213 + "Where the values are different in a cash context, that is shown in a second column." 2214 + "</p>"); 2215 pw.write("<div align='center'><table>"); 2216 2217 // doTitle(pw, "Currency Format Info"); 2218 // <info iso4217="CZK" digits="2" rounding="0" cashDigits="0" 2219 // cashRounding="0"/> 2220 2221 pw.println( 2222 "<tr>" 2223 + "<th class='source nowrap'>Name</th>" 2224 + "<th class='source'>Currency</th>" 2225 + "<th class='target'>Digits</th>" 2226 + "<th class='target'>Cash Digits</th>" 2227 + "<th class='target'>Countries</th>" 2228 + "</tr>"); 2229 Set<String> currencyList = new TreeSet<String>(col); 2230 currencyList.addAll(currency_fractions.keySet()); 2231 currencyList.addAll(currency_territory.keySet()); 2232 2233 for (Entry<String, String> nameCode : NAME_TO_CURRENCY.entrySet()) { 2234 // String name = nameCode.getKey(); 2235 String currency = nameCode.getValue(); 2236 CurrencyNumberInfo info = supplementalDataInfo.getCurrencyNumberInfo(currency); 2237 Set<String> territories = currencyToTerritory.get(currency); 2238 Set<String> modernTerritories = modernCurrencyToTerritory.get(currency); 2239 2240 // String fractions = (String) currency_fractions.get(currency); 2241 // if (fractions == null) 2242 // fractions = defaultDigits; 2243 // Set territories = (Set) currency_territory.get(currency); 2244 pw.print( 2245 "<tr>" 2246 + "<td class='source nowrap'>" 2247 + TransliteratorUtilities.toHTML.transform( 2248 english.getName("currency", currency)) 2249 + "</td>" 2250 + "<td class='source'>" 2251 + CldrUtility.getDoubleLinkedText(currency) 2252 + "</td>" 2253 + "<td class='target'>" 2254 + info.getDigits() 2255 + (info.getRounding() == 0 ? "" : " (" + info.getRounding() + ")") 2256 + "</td>" 2257 + "<td class='target'>" 2258 + (info.cashDigits == info.getDigits() 2259 && info.cashRounding == info.getRounding() 2260 ? "" 2261 : (info.cashDigits 2262 + (info.cashRounding == 0 2263 ? "" 2264 : " (" + info.cashRounding + ")"))) 2265 + "</td>" 2266 + "<td class='target'>"); 2267 boolean first = true; 2268 boolean needBreak = false; 2269 if (modernTerritories != null) { 2270 needBreak = true; 2271 for (String territory : modernTerritories) { 2272 if (first) first = false; 2273 else pw.print(", "); 2274 pw.print("<b>" + territory + "</b>"); 2275 } 2276 } 2277 // boolean haveBreak = true; 2278 if (territories != null) { 2279 for (String territory : territories) { 2280 if (first) first = false; 2281 else if (!needBreak) pw.print(", "); 2282 else { 2283 pw.print(",<br>"); 2284 needBreak = false; 2285 } 2286 pw.print(territory); 2287 } 2288 } 2289 pw.println("</td></tr>"); 2290 } 2291 pw.println("</table>"); 2292 pw.close(); 2293 // doFooter(pw); 2294 2295 // if (false) { 2296 // doTitle(pw, "Territories Versus Currencies"); 2297 // pw.println("<tr><th>Territories Without Currencies</th><th>Currencies Without 2298 // Territories</th></tr>"); 2299 // pw.println("<tr><td class='target'>"); 2300 // Set territoriesWithoutCurrencies = new TreeSet(); 2301 // territoriesWithoutCurrencies.addAll(sc.getGoodAvailableCodes("territory")); 2302 // territoriesWithoutCurrencies.removeAll(territoriesWithCurrencies); 2303 // territoriesWithoutCurrencies.removeAll(group_contains.keySet()); 2304 // boolean first = true; 2305 // for (Iterator it = territoriesWithoutCurrencies.iterator(); it.hasNext();) { 2306 // if (first) first = false; 2307 // else pw.print(", "); 2308 // pw.print(english.getName(CLDRFile.TERRITORY_NAME, it.next().toString(), false)); 2309 // } 2310 // pw.println("</td><td class='target'>"); 2311 // Set currenciesWithoutTerritories = new TreeSet(); 2312 // currenciesWithoutTerritories.addAll(sc.getGoodAvailableCodes("currency")); 2313 // currenciesWithoutTerritories.removeAll(currenciesWithTerritories); 2314 // first = true; 2315 // for (Iterator it = currenciesWithoutTerritories.iterator(); it.hasNext();) { 2316 // if (first) first = false; 2317 // else pw.print(", "); 2318 // pw.print(english.getName(CLDRFile.CURRENCY_NAME, it.next().toString(), false)); 2319 // } 2320 // pw.println("</td></tr>"); 2321 // doFooter(pw); 2322 // } 2323 } 2324 getTerritoryName(String territory)2325 private String getTerritoryName(String territory) { 2326 String name; 2327 name = english.getName("territory", territory); 2328 if (name == null) { 2329 name = sc.getData("territory", territory); 2330 } 2331 if (name != null) { 2332 return TransliteratorUtilities.toHTML.transform(name) + " (" + territory + ")"; 2333 } else { 2334 return territory; 2335 } 2336 } 2337 2338 /** 2339 * @throws IOException 2340 */ printAliases(PrintWriter index)2341 public void printAliases(PrintWriter index) throws IOException { 2342 PrintWriter pw = 2343 new PrintWriter( 2344 new FormattedFileWriter( 2345 null, "Aliases", null, SUPPLEMENTAL_INDEX_ANCHORS)); 2346 2347 // doTitle(pw, "Aliases"); 2348 pw.println("<table>"); 2349 pw.println( 2350 "<tr><th class='source'>" 2351 + "Type" 2352 + "</th>" 2353 + "<th class='source'>" 2354 + "Code" 2355 + "</th>" 2356 + "<th class='target'>" 2357 + "Reason" 2358 + "</th>" 2359 + "<th class='target'>" 2360 + "Substitute (if available)" 2361 + "</th></tr>"); 2362 for (Iterator<String[]> it = aliases.iterator(); it.hasNext(); ) { 2363 String[] items = it.next(); 2364 pw.println( 2365 "<tr><td class='source'>" 2366 + items[0] 2367 + "</td>" 2368 + "<td class='source'>" 2369 + CldrUtility.getDoubleLinkedText(items[1]) 2370 + "</td>" 2371 + "<td class='target'>" 2372 + items[3] 2373 + "</td>" 2374 + "<td class='target'>" 2375 + items[2] 2376 + "</td></tr>"); 2377 } 2378 // doFooter(pw); 2379 pw.println("</table>"); 2380 pw.close(); 2381 } 2382 2383 // deprecatedItems 2384 // public void printDeprecatedItems(PrintWriter pw) { 2385 // doTitle(pw, "Deprecated Items"); 2386 // pw.print("<tr><td class='z0'><b>Type</b></td><td class='z1'><b>Elements</b></td><td 2387 // class='z2'><b>Attributes</b></td><td class='z4'><b>Values</b></td>"); 2388 // for (Iterator it = deprecatedItems.iterator(); it.hasNext();) { 2389 // Map source = (Map)it.next(); 2390 // Object item; 2391 // pw.print("<tr>"); 2392 // pw.print("<td class='z0'>" + ((item = source.get("type")) != null ? item : "<i>any</i>") 2393 // + "</td>"); 2394 // pw.print("<td class='z1'>" + ((item = source.get("elements")) != null ? item : 2395 // "<i>any</i>") + "</td>"); 2396 // pw.print("<td class='z2'>" + ((item = source.get("attributes")) != null ? item : 2397 // "<i>any</i>") + "</td>"); 2398 // pw.print("<td class='z4'>" + ((item = source.get("values")) != null ? item : 2399 // "<i>any</i>") + "</td>"); 2400 // pw.print("</tr>"); 2401 // } 2402 // doFooter(pw); 2403 // } 2404 printWindows_Tzid(PrintWriter index)2405 public void printWindows_Tzid(PrintWriter index) throws IOException { 2406 Map<String, Map<String, Map<String, String>>> zoneMapping = 2407 supplementalDataInfo.getTypeToZoneToRegionToZone(); 2408 PrintWriter pw = 2409 new PrintWriter( 2410 new FormattedFileWriter( 2411 null, "Zone \u2192 Tzid", null, SUPPLEMENTAL_INDEX_ANCHORS)); 2412 for (Entry<String, Map<String, Map<String, String>>> typeAndZoneToRegionToZone : 2413 zoneMapping.entrySet()) { 2414 String type = typeAndZoneToRegionToZone.getKey(); 2415 Map<String, Map<String, String>> zoneToRegionToZone = 2416 typeAndZoneToRegionToZone.getValue(); 2417 pw.println("<br><h1>Mapping for: " + type + "</h1><br>"); 2418 // doTitle(pw, "Windows \u2192 Tzid"); 2419 pw.println("<table>"); 2420 pw.println( 2421 "<tr><th class='source'>" 2422 + type 2423 + "</th><th class='source'>" 2424 + "Region" 2425 + "</th><th class='target'>" 2426 + "TZID" 2427 + "</th></tr>"); 2428 2429 for (Entry<String, Map<String, String>> zoneAndregionToZone : 2430 zoneToRegionToZone.entrySet()) { 2431 String source = zoneAndregionToZone.getKey(); 2432 Map<String, String> regionToZone = zoneAndregionToZone.getValue(); 2433 for (Entry<String, String> regionAndZone : regionToZone.entrySet()) { 2434 String region = regionAndZone.getKey(); 2435 String target = regionAndZone.getValue(); 2436 if (region == null) region = "<i>any</a>"; 2437 pw.println( 2438 "<tr><td class='source'>" 2439 + source 2440 + "</td><td class='source'>" 2441 + region 2442 + "</td><td class='target'>" 2443 + target 2444 + "</td></tr>"); 2445 } 2446 } 2447 // doFooter(pw); 2448 pw.println("</table>"); 2449 } 2450 pw.close(); 2451 } 2452 2453 // <info iso4217="ADP" digits="0" rounding="0"/> 2454 printCharacters(PrintWriter index)2455 public void printCharacters(PrintWriter index) throws IOException { 2456 String title = "Character Fallback Substitutions"; 2457 2458 PrintWriter pw = 2459 new PrintWriter( 2460 new FormattedFileWriter(null, title, null, SUPPLEMENTAL_INDEX_ANCHORS)); 2461 // doTitle(pw, title); 2462 pw.println("<table>"); 2463 2464 pw.println( 2465 "<tr><th colSpan='3'>Substitute for character (if not in repertoire)</th><th colSpan='4'>The following (in priority order, first string that <i>is</i> in repertoire)</th></tr>"); 2466 UnicodeSet chars = new UnicodeSet("[:NFKC_QuickCheck=N:]"); 2467 for (com.ibm.icu.text.UnicodeSetIterator it = 2468 new com.ibm.icu.text.UnicodeSetIterator(chars); 2469 it.next(); ) { 2470 String value = it.getString(); 2471 addCharSubstitution(value, Normalizer.normalize(value, Normalizer.NFC)); 2472 addCharSubstitution(value, Normalizer.normalize(value, Normalizer.NFKC)); 2473 } 2474 int[] counts = new int[4]; 2475 for (Iterator<String> it = charSubstitutions.keySet().iterator(); it.hasNext(); ) { 2476 String value = it.next(); 2477 LinkedHashSet<String> substitutes = charSubstitutions.get(value); 2478 String nfc = Normalizer.normalize(value, Normalizer.NFC); 2479 String nfkc = Normalizer.normalize(value, Normalizer.NFKC); 2480 2481 String sourceTag = "<td class='source'>"; 2482 if (substitutes.size() > 1) { 2483 sourceTag = "<td class='source' rowSpan='" + substitutes.size() + "'>"; 2484 } 2485 boolean first = true; 2486 for (Iterator<String> it2 = substitutes.iterator(); it2.hasNext(); ) { 2487 String substitute = it2.next(); 2488 String type = "Explicit"; 2489 String targetTag = "<td class='target3'>"; 2490 if (substitute.equals(nfc)) { 2491 type = "NFC"; 2492 targetTag = "<td class='target'>"; 2493 counts[2]++; 2494 } else if (substitute.equals(nfkc)) { 2495 type = "NFKC"; 2496 targetTag = "<td class='target4'>"; 2497 counts[3]++; 2498 } else { 2499 counts[0]++; 2500 } 2501 pw.println( 2502 "<tr>" 2503 + (!first 2504 ? "" 2505 : sourceTag 2506 + hex(value, ", ") 2507 + "</td>" 2508 + sourceTag 2509 + TransliteratorUtilities.toHTML.transliterate( 2510 value) 2511 + "</td>" 2512 + sourceTag 2513 + UCharacter.getName(value, ", ") 2514 + "</td>") 2515 + targetTag 2516 + type 2517 + "</td>" 2518 + targetTag 2519 + hex(substitute, ", ") 2520 + "</td>" 2521 + targetTag 2522 + TransliteratorUtilities.toHTML.transliterate(substitute) 2523 + "</td>" 2524 + targetTag 2525 + UCharacter.getName(substitute, ", ") 2526 + "</td></tr>"); 2527 first = false; 2528 } 2529 } 2530 // doFooter(pw); 2531 pw.println("</table>"); 2532 2533 pw.close(); 2534 for (int i = 0; i < counts.length; ++i) { 2535 System.out.println("Count\t" + i + "\t" + counts[i]); 2536 } 2537 } 2538 hex(String s, String separator)2539 public static String hex(String s, String separator) { 2540 StringBuffer result = new StringBuffer(); 2541 int cp; 2542 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 2543 cp = UTF16.charAt(s, i); 2544 if (i != 0) result.append(separator); 2545 result.append(com.ibm.icu.impl.Utility.hex(cp)); 2546 } 2547 return result.toString(); 2548 } 2549 2550 /** */ 2551 // private PrintWriter doTitle(PrintWriter pw, String title) { 2552 // //String anchor = FileUtilities.anchorize(title); 2553 // pw.println("<div align='center'><table>"); 2554 // //anchors.put(title, anchor); 2555 // //PrintWriter result = null; 2556 // //return result; 2557 // } 2558 2559 // private void doFooter(PrintWriter pw) { 2560 // pw.println("</table></div>"); 2561 // } printContains2( PrintWriter pw, String lead, String start, int depth, boolean isFirst)2562 public void printContains2( 2563 PrintWriter pw, String lead, String start, int depth, boolean isFirst) { 2564 String name = depth == 4 ? start : getName(CLDRFile.TERRITORY_NAME, start, false); 2565 if (!isFirst) pw.print(lead); 2566 int count = getTotalContainedItems(start, depth); 2567 pw.print( 2568 "<td class='z" 2569 + depth 2570 + "' rowSpan='" 2571 + count 2572 + "'>" 2573 + name 2574 + "</td>"); // colSpan='" + (5 - 2575 // depth) + "' 2576 if (depth == 4) pw.println("</tr>"); 2577 Collection<String> contains = getContainedCollection(start, depth); 2578 if (contains != null) { 2579 Collection<String> contains2 = new TreeSet<String>(territoryNameComparator); 2580 contains2.addAll(contains); 2581 boolean first = true; 2582 for (Iterator<String> it = contains2.iterator(); it.hasNext(); ) { 2583 String item = it.next(); 2584 printContains2(pw, lead, item, depth + 1, first); // + "<td> </td>" 2585 first = false; 2586 } 2587 } 2588 } 2589 getTotalContainedItems(String start, int depth)2590 private int getTotalContainedItems(String start, int depth) { 2591 Collection<String> c = getContainedCollection(start, depth); 2592 if (c == null) return 1; 2593 int sum = 0; 2594 for (Iterator<String> it = c.iterator(); it.hasNext(); ) { 2595 sum += getTotalContainedItems(it.next(), depth + 1); 2596 } 2597 return sum; 2598 } 2599 2600 /** */ getContainedCollection(String start, int depth)2601 private Collection<String> getContainedCollection(String start, int depth) { 2602 Collection<String> contains = supplementalDataInfo.getContainmentCore().get(start); 2603 if (contains == null) { 2604 contains = sc.getCountryToZoneSet().get(start); 2605 if (contains == null && depth == 3) { 2606 contains = new TreeSet<>(); 2607 if (start.compareTo("A") >= 0) { 2608 contains.add("<font color='red'>MISSING TZID</font>"); 2609 } else { 2610 contains.add("<font color='red'>Not yet ISO code</font>"); 2611 } 2612 } 2613 } 2614 return contains; 2615 } 2616 2617 /** 2618 * @param table TODO 2619 */ printMissing(PrintWriter pw, int source, int table)2620 public void printMissing(PrintWriter pw, int source, int table) { 2621 Set<String> missingItems = new HashSet<>(); 2622 String type = null; 2623 if (source == CLDRFile.TERRITORY_NAME) { 2624 type = "territory"; 2625 missingItems.addAll(sc.getAvailableCodes(type)); 2626 missingItems.removeAll(territory_languages.keySet()); 2627 missingItems.removeAll(supplementalDataInfo.getContainmentCore().keySet()); 2628 missingItems.remove("200"); // czechoslovakia 2629 } else if (source == CLDRFile.SCRIPT_NAME) { 2630 type = "script"; 2631 missingItems.addAll(sc.getAvailableCodes(type)); 2632 missingItems.removeAll(script_languages.keySet()); 2633 } else if (source == CLDRFile.LANGUAGE_NAME) { 2634 type = "language"; 2635 missingItems.addAll(sc.getAvailableCodes(type)); 2636 if (table == CLDRFile.SCRIPT_NAME) 2637 missingItems.removeAll(language_scripts.keySet()); 2638 if (table == CLDRFile.TERRITORY_NAME) 2639 missingItems.removeAll(language_territories.keySet()); 2640 } else { 2641 throw new IllegalArgumentException("Illegal code"); 2642 } 2643 Set<String> missingItemsNamed = new TreeSet<String>(col); 2644 for (Iterator<String> it = missingItems.iterator(); it.hasNext(); ) { 2645 String item = it.next(); 2646 List<String> data = sc.getFullData(type, item); 2647 if (data.get(0).equals("PRIVATE USE")) continue; 2648 if (data.size() < 3) continue; 2649 if (!"".equals(data.get(2))) continue; 2650 2651 String itemName = getName(source, item, true); 2652 missingItemsNamed.add(itemName); 2653 } 2654 pw.println("<div align='center'><table>"); 2655 for (Iterator<String> it = missingItemsNamed.iterator(); it.hasNext(); ) { 2656 pw.println("<tr><td class='target'>" + it.next() + "</td></tr>"); 2657 } 2658 pw.println("</table></div>"); 2659 } 2660 2661 // source, eg english.TERRITORY_NAME 2662 // target, eg english.LANGUAGE_NAME print(PrintWriter pw, int source, int target)2663 public void print(PrintWriter pw, int source, int target) { 2664 Multimap<String, String> data = 2665 source == CLDRFile.TERRITORY_NAME && target == CLDRFile.LANGUAGE_NAME 2666 ? territory_languages 2667 : source == CLDRFile.LANGUAGE_NAME && target == CLDRFile.TERRITORY_NAME 2668 ? language_territories 2669 : source == CLDRFile.SCRIPT_NAME 2670 && target == CLDRFile.LANGUAGE_NAME 2671 ? script_languages 2672 : source == CLDRFile.LANGUAGE_NAME 2673 && target == CLDRFile.SCRIPT_NAME 2674 ? language_scripts 2675 : null; 2676 // transform into names, and sort 2677 Map<String, Set<String>> territory_languageNames = 2678 new TreeMap<String, Set<String>>(col); 2679 for (Iterator<String> it = data.keySet().iterator(); it.hasNext(); ) { 2680 String territory = it.next(); 2681 String territoryName = getName(source, territory, true); 2682 Set<String> s = territory_languageNames.get(territoryName); 2683 if (s == null) 2684 territory_languageNames.put(territoryName, s = new TreeSet<String>(col)); 2685 for (Iterator<String> it2 = data.get(territory).iterator(); it2.hasNext(); ) { 2686 String language = it2.next(); 2687 String languageName = getName(target, language, true); 2688 s.add(languageName); 2689 } 2690 } 2691 2692 pw.println("<div align='center'><table>"); 2693 2694 for (Iterator<String> it = territory_languageNames.keySet().iterator(); 2695 it.hasNext(); ) { 2696 String territoryName = it.next(); 2697 pw.println("<tr><td class='source' colspan='2'>" + territoryName + "</td></tr>"); 2698 Set<String> s = territory_languageNames.get(territoryName); 2699 for (Iterator<String> it2 = s.iterator(); it2.hasNext(); ) { 2700 String languageName = it2.next(); 2701 pw.println( 2702 "<tr><td> </td><td class='target'>" + languageName + "</td></tr>"); 2703 } 2704 } 2705 pw.println("</table></div>"); 2706 } 2707 2708 /** 2709 * @param codeFirst TODO 2710 */ getName(int type, String oldcode, boolean codeFirst)2711 private String getName(int type, String oldcode, boolean codeFirst) { 2712 if (oldcode.contains(" ")) { 2713 String[] result = oldcode.split("\\s+"); 2714 for (int i = 0; i < result.length; ++i) { 2715 result[i] = getName(type, result[i], codeFirst); 2716 } 2717 return CldrUtility.join(Arrays.asList(result), ", "); 2718 } else { 2719 int pos = oldcode.indexOf('*'); 2720 String code = pos < 0 ? oldcode : oldcode.substring(0, pos); 2721 String ename = english.getName(type, code); 2722 String nameString = ename == null ? code : ename; 2723 return nameString.equals(oldcode) 2724 ? nameString 2725 : codeFirst 2726 ? "[" + oldcode + "]" + "\t" + nameString 2727 : nameString + "\t" + "[" + oldcode + "]"; 2728 } 2729 } 2730 2731 private String getName(String locale, boolean codeFirst) { 2732 String ename = getLanguageName(locale); 2733 return codeFirst 2734 ? "[" + locale + "]\t" + (ename == null ? locale : ename) 2735 : (ename == null ? locale : ename) + "\t[" + locale + "]"; 2736 } 2737 2738 Comparator territoryNameComparator = 2739 new Comparator() { 2740 @Override 2741 public int compare(Object o1, Object o2) { 2742 return col.compare( 2743 getName(CLDRFile.TERRITORY_NAME, (String) o1, false), 2744 getName(CLDRFile.TERRITORY_NAME, (String) o2, false)); 2745 } 2746 }; 2747 2748 static String[] stringArrayPattern = new String[0]; 2749 static String[][] string2ArrayPattern = new String[0][]; 2750 2751 public static Map<String, String> territoryAliases = new HashMap<>(); 2752 2753 public void printContains(PrintWriter index) throws IOException { 2754 String title = "Territory Containment (UN M.49)"; 2755 2756 PrintWriter pw = 2757 new PrintWriter( 2758 new FormattedFileWriter(null, title, null, SUPPLEMENTAL_INDEX_ANCHORS)); 2759 // doTitle(pw, title); 2760 List<String[]> rows = new ArrayList<>(); 2761 printContains3("001", rows, new ArrayList<String>()); 2762 TablePrinter tablePrinter = 2763 new TablePrinter() 2764 .addColumn("World", "class='source'", null, "class='z0'", true) 2765 .setSortPriority(0) 2766 .addColumn("Continent", "class='source'", null, "class='z1'", true) 2767 .setSortPriority(1) 2768 .addColumn("Subcontinent", "class='source'", null, "class='z2'", true) 2769 .setSortPriority(2) 2770 .addColumn( 2771 "Country (Territory)", 2772 "class='source'", 2773 null, 2774 "class='z3'", 2775 true) 2776 .setSortPriority(3) 2777 .addColumn("Time Zone", "class='source'", null, "class='z4'", true) 2778 .setSortPriority(4); 2779 String[][] flatData = rows.toArray(string2ArrayPattern); 2780 pw.println(tablePrinter.addRows(flatData).toTable()); 2781 2782 showSubtable( 2783 pw, ContainmentStyle.grouping, "Groupings", "Grouping", "Contained Regions"); 2784 showSubtable( 2785 pw, 2786 ContainmentStyle.deprecated, 2787 "Deprecated", 2788 "Container", 2789 "Deprecated Region"); 2790 2791 // Relation<String, String> deprecated = supplementalDataInfo 2792 // .getTerritoryToContained(ContainmentStyle.deprecated); 2793 // 2794 // for (String region : deprecated.keySet()) { 2795 // nameToContainers.add(region); 2796 // } 2797 // pw.println("<h2>Groupings and Deprecated Regions</h2>"); 2798 // for (String region : nameToContainers) { 2799 // String name = getName(CLDRFile.TERRITORY_NAME, region, false); 2800 // Set<String> dep = deprecated.get(region); 2801 // Set<String> gro = grouping.get(region); 2802 // Iterator<String> depIt = (dep == null ? Collections.EMPTY_SET : 2803 // dep).iterator(); 2804 // Iterator<String> groIt = (gro == null ? Collections.EMPTY_SET : 2805 // gro).iterator(); 2806 // while (depIt.hasNext() || groIt.hasNext()) { 2807 // String dep1 = depIt.hasNext() ? getName(CLDRFile.TERRITORY_NAME, 2808 // depIt.next(), false) : ""; 2809 // String gro1 = groIt.hasNext() ? getName(CLDRFile.TERRITORY_NAME, 2810 // groIt.next(), false) : ""; 2811 // tablePrinter2.addRow() 2812 // .addCell(name) 2813 // .addCell(gro1) 2814 // .addCell(dep1) 2815 // .finishRow(); 2816 // } 2817 // } 2818 // pw.println(tablePrinter2.toTable()); 2819 // pw.println("<h2>Other Groupings</h2>"); 2820 // for (Entry<String, Set<String>> regionContained : grouping.keyValuesSet()) 2821 // { 2822 // showContainers(pw, regionContained); 2823 // } 2824 // 2825 // pw.println("<h2>Deprecated Codes</h2>"); 2826 // for (Entry<String, Set<String>> regionContained : 2827 // deprecated.keyValuesSet()) { 2828 // showContainers(pw, regionContained); 2829 // } 2830 pw.close(); 2831 } 2832 2833 public void showSubtable( 2834 PrintWriter pw, 2835 ContainmentStyle containmentStyle, 2836 String title, 2837 String containerTitle, 2838 String containeeTitle) { 2839 pw.println("<h2>" + title + "</h2>"); 2840 TablePrinter tablePrinter2 = 2841 new TablePrinter() 2842 .addColumn(containerTitle, "class='source'", null, "class='z0'", true) 2843 .setSortPriority(0) 2844 .addColumn(containeeTitle, "class='source'", null, "class='z4'", true) 2845 .setSortPriority(1); 2846 2847 Relation<String, String> grouping = 2848 supplementalDataInfo.getTerritoryToContained(containmentStyle); 2849 2850 for (Entry<String, String> containerRegion : grouping.keyValueSet()) { 2851 String container = 2852 getName(CLDRFile.TERRITORY_NAME, containerRegion.getKey(), false); 2853 String containee = 2854 getName(CLDRFile.TERRITORY_NAME, containerRegion.getValue(), false); 2855 tablePrinter2.addRow().addCell(container).addCell(containee).finishRow(); 2856 } 2857 pw.println(tablePrinter2.toTable()); 2858 } 2859 2860 public void showContainers(PrintWriter pw, Entry<String, Set<String>> regionContained) { 2861 String region = regionContained.getKey(); 2862 Set<String> contained = regionContained.getValue(); 2863 pw.println("<ul><li>" + getName(CLDRFile.TERRITORY_NAME, region, false) + "<ul>"); 2864 for (String sub : contained) { 2865 pw.println("<li>" + getName(CLDRFile.TERRITORY_NAME, sub, false) + "</li>"); 2866 } 2867 pw.println("</ul></li></ul>"); 2868 } 2869 2870 private void printContains3( 2871 String start, List<String[]> rows, ArrayList<String> currentRow) { 2872 int len = currentRow.size(); 2873 if (len > 3) { 2874 return; // skip long items 2875 } 2876 currentRow.add(getName(CLDRFile.TERRITORY_NAME, start, false)); 2877 // Collection<String> contains = (Collection<String>) group_contains.get(start); 2878 Collection<String> contains = supplementalDataInfo.getContainmentCore().get(start); 2879 if (contains == null) { 2880 contains = sc.getCountryToZoneSet().get(start); 2881 currentRow.add(""); 2882 if (contains == null) { 2883 currentRow.set(len + 1, "???"); 2884 rows.add(currentRow.toArray(stringArrayPattern)); 2885 } else { 2886 for (String item : contains) { 2887 currentRow.set(len + 1, item); 2888 rows.add(currentRow.toArray(stringArrayPattern)); 2889 } 2890 } 2891 currentRow.remove(len + 1); 2892 } else { 2893 for (String item : contains) { 2894 if (territoryAliases.keySet().contains(item)) { 2895 continue; 2896 } 2897 printContains3(item, rows, currentRow); 2898 } 2899 } 2900 currentRow.remove(len); 2901 } 2902 } 2903 2904 /** */ getInverse( Map<String, Set<String>> language_territories)2905 private static Map<String, Set<String>> getInverse( 2906 Map<String, Set<String>> language_territories) { 2907 // get inverse relation 2908 Map<String, Set<String>> territory_languages = new TreeMap<>(); 2909 for (Iterator<String> it = language_territories.keySet().iterator(); it.hasNext(); ) { 2910 String language = it.next(); 2911 Set<String> territories = language_territories.get(language); 2912 for (Iterator<String> it2 = territories.iterator(); it2.hasNext(); ) { 2913 String territory = it2.next(); 2914 Set<String> languages = territory_languages.get(territory); 2915 if (languages == null) 2916 territory_languages.put(territory, languages = new TreeSet<String>(col)); 2917 languages.add(language); 2918 } 2919 } 2920 return territory_languages; 2921 } 2922 2923 static final Map<String, String> NAME_TO_REGION = getNameToCode(CodeType.territory, "region"); 2924 static final Map<String, String> NAME_TO_CURRENCY = 2925 getNameToCode(CodeType.currency, "currency"); 2926 getNameToCode(CodeType codeType, String cldrCodeType)2927 private static SortedMap<String, String> getNameToCode(CodeType codeType, String cldrCodeType) { 2928 SortedMap<String, String> temp = new TreeMap<String, String>(col); 2929 for (String territory : StandardCodes.make().getAvailableCodes(codeType)) { 2930 String name = english.getName(cldrCodeType, territory); 2931 temp.put(name == null ? territory : name, territory); 2932 } 2933 temp = Collections.unmodifiableSortedMap(temp); 2934 return temp; 2935 } 2936 2937 /** 2938 * @param value_delimiter TODO 2939 */ addTokens( String key, String values, String value_delimiter, Map<String, Set<String>> key_value)2940 private static void addTokens( 2941 String key, String values, String value_delimiter, Map<String, Set<String>> key_value) { 2942 if (values != null) { 2943 Set<String> s = key_value.get(key); 2944 if (s == null) key_value.put(key, s = new TreeSet<String>(col)); 2945 s.addAll(Arrays.asList(values.split(value_delimiter))); 2946 } 2947 } 2948 addTokens( String key, String values, String value_delimiter, Multimap<String, String> key_value)2949 private static void addTokens( 2950 String key, String values, String value_delimiter, Multimap<String, String> key_value) { 2951 if (values != null) { 2952 key_value.putAll(key, Arrays.asList(values.split(value_delimiter))); 2953 } 2954 } 2955 showContents(Appendable pw, String... items)2956 public static void showContents(Appendable pw, String... items) { 2957 try { 2958 pw.append("</div>" + System.lineSeparator()); 2959 pw.append("<h3>Contents</h3>" + System.lineSeparator()); 2960 pw.append("<ol>" + System.lineSeparator()); 2961 for (int i = 0; i < items.length; i += 2) { 2962 pw.append( 2963 "<li><a href='#" 2964 + items[i] 2965 + "'>" 2966 + items[i + 1] 2967 + "</a></li>" 2968 + System.lineSeparator()); 2969 } 2970 pw.append("</ol><hr>" + System.lineSeparator()); 2971 2972 pw.append("<div align='center'>" + System.lineSeparator()); 2973 } catch (IOException e) { 2974 throw new ICUUncheckedIOException(e); 2975 } 2976 } 2977 } 2978