1 package org.unicode.cldr.util; 2 3 import com.google.common.base.Splitter; 4 import com.ibm.icu.dev.util.UnicodeMap; 5 import com.ibm.icu.impl.Relation; 6 import com.ibm.icu.impl.Row; 7 import com.ibm.icu.lang.UCharacter; 8 import com.ibm.icu.text.Collator; 9 import com.ibm.icu.text.Transform; 10 import com.ibm.icu.text.UnicodeSet; 11 import com.ibm.icu.util.ICUException; 12 import com.ibm.icu.util.Output; 13 import com.ibm.icu.util.ULocale; 14 import java.util.Arrays; 15 import java.util.Collections; 16 import java.util.EnumMap; 17 import java.util.HashMap; 18 import java.util.HashSet; 19 import java.util.Iterator; 20 import java.util.LinkedHashMap; 21 import java.util.LinkedHashSet; 22 import java.util.List; 23 import java.util.Locale; 24 import java.util.Map; 25 import java.util.Map.Entry; 26 import java.util.Set; 27 import java.util.TreeMap; 28 import java.util.TreeSet; 29 import java.util.logging.Logger; 30 import java.util.regex.Matcher; 31 import java.util.regex.Pattern; 32 import org.unicode.cldr.draft.ScriptMetadata; 33 import org.unicode.cldr.draft.ScriptMetadata.Info; 34 import org.unicode.cldr.tool.LikelySubtags; 35 import org.unicode.cldr.util.RegexLookup.Finder; 36 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 37 import org.unicode.cldr.util.With.SimpleIterator; 38 import org.unicode.cldr.util.personname.PersonNameFormatter; 39 40 /** 41 * Provides a mechanism for dividing up LDML paths into understandable categories, eg for the Survey 42 * tool. 43 */ 44 public class PathHeader implements Comparable<PathHeader> { 45 /** Link to a section. Commenting out the page switch for now. */ 46 public static final String SECTION_LINK = "<a " + /* "target='CLDR_ST-SECTION' "+*/ "href='"; 47 48 static boolean UNIFORM_CONTINENTS = true; 49 static Factory factorySingleton = null; 50 51 static final boolean SKIP_ORIGINAL_PATH = true; 52 53 private static final Logger logger = Logger.getLogger(PathHeader.class.getName()); 54 55 static final Splitter HYPHEN_SPLITTER = Splitter.on('-'); 56 57 public enum Width { 58 FULL, 59 LONG, 60 WIDE, 61 SHORT, 62 NARROW; 63 getValue(String input)64 public static Width getValue(String input) { 65 try { 66 return Width.valueOf(input.toUpperCase(Locale.ENGLISH)); 67 } catch (RuntimeException e) { 68 e.printStackTrace(); 69 throw e; 70 } 71 } 72 73 @Override toString()74 public String toString() { 75 return name().toLowerCase(Locale.ENGLISH); 76 } 77 } 78 79 /** What status the survey tool should use. Can be overridden in Phase.getAction() */ 80 public enum SurveyToolStatus { 81 /** Never show. */ 82 DEPRECATED, 83 /** Hide. Can be overridden in Phase.getAction() */ 84 HIDE, 85 /** 86 * Don't allow Change box (except TC), instead show ticket. But allow votes. Can be 87 * overridden in Phase.getAction() 88 */ 89 READ_ONLY, 90 /** Allow change box and votes. Can be overridden in Phase.getAction() */ 91 READ_WRITE, 92 /** 93 * Changes are allowed as READ_WRITE, but field is always displayed as LTR, even in RTL 94 * locales (used for patterns). 95 */ 96 LTR_ALWAYS 97 } 98 99 private static final EnumNames<SectionId> SectionIdNames = new EnumNames<>(); 100 101 /** 102 * The Section for a path. Don't change these without committee buy-in. The 'name' may be 103 * 'Core_Data' and the toString is 'Core Data' toString gives the human name 104 */ 105 public enum SectionId { 106 Core_Data("Core Data"), 107 Locale_Display_Names("Locale Display Names"), 108 DateTime("Date & Time"), 109 Timezones, 110 Numbers, 111 Currencies, 112 Units, 113 Characters, 114 Misc("Miscellaneous"), 115 BCP47, 116 Supplemental, 117 Special; 118 SectionId(String... alternateNames)119 SectionId(String... alternateNames) { 120 SectionIdNames.add(this, alternateNames); 121 } 122 forString(String name)123 public static SectionId forString(String name) { 124 return SectionIdNames.forString(name); 125 } 126 127 @Override toString()128 public String toString() { 129 return SectionIdNames.toString(this); 130 } 131 } 132 133 private static final EnumNames<PageId> PageIdNames = new EnumNames<>(); 134 private static final Relation<SectionId, PageId> SectionIdToPageIds = 135 Relation.of(new TreeMap<>(), TreeSet.class); 136 137 private static class SubstringOrder implements Comparable<SubstringOrder> { 138 final String mainOrder; 139 final int order; 140 SubstringOrder(String source)141 public SubstringOrder(String source) { 142 int pos = source.lastIndexOf('-') + 1; 143 int ordering = COUNTS.indexOf(source.substring(pos)); 144 // account for digits, and "some" future proofing. 145 order = ordering < 0 ? source.charAt(pos) : 0x10000 + ordering; 146 mainOrder = source.substring(0, pos); 147 } 148 149 @Override 150 public String toString() { 151 return "{" + mainOrder + ", " + order + "}"; 152 } 153 154 @Override 155 public int compareTo(SubstringOrder other) { 156 int diff = alphabeticCompare(mainOrder, other.mainOrder); 157 if (diff != 0) { 158 return diff; 159 } 160 return order - other.order; 161 } 162 } 163 164 /** 165 * The Page for a path (within a Section). Don't change these without committee buy-in. the name 166 * is for example WAsia where toString gives Western Asia 167 */ 168 public enum PageId { 169 Alphabetic_Information(SectionId.Core_Data, "Alphabetic Information"), 170 Numbering_Systems(SectionId.Core_Data, "Numbering Systems"), 171 LinguisticElements(SectionId.Core_Data, "Linguistic Elements"), 172 173 Locale_Name_Patterns(SectionId.Locale_Display_Names, "Locale Name Patterns"), 174 Languages_A_D(SectionId.Locale_Display_Names, "Languages (A-D)"), 175 Languages_E_J(SectionId.Locale_Display_Names, "Languages (E-J)"), 176 Languages_K_N(SectionId.Locale_Display_Names, "Languages (K-N)"), 177 Languages_O_S(SectionId.Locale_Display_Names, "Languages (O-S)"), 178 Languages_T_Z(SectionId.Locale_Display_Names, "Languages (T-Z)"), 179 Scripts(SectionId.Locale_Display_Names), 180 Territories(SectionId.Locale_Display_Names, "Geographic Regions"), 181 T_NAmerica(SectionId.Locale_Display_Names, "Territories (North America)"), 182 T_SAmerica(SectionId.Locale_Display_Names, "Territories (South America)"), 183 T_Africa(SectionId.Locale_Display_Names, "Territories (Africa)"), 184 T_Europe(SectionId.Locale_Display_Names, "Territories (Europe)"), 185 T_Asia(SectionId.Locale_Display_Names, "Territories (Asia)"), 186 T_Oceania(SectionId.Locale_Display_Names, "Territories (Oceania)"), 187 Locale_Variants(SectionId.Locale_Display_Names, "Locale Variants"), 188 Keys(SectionId.Locale_Display_Names), 189 190 Fields(SectionId.DateTime), 191 Gregorian(SectionId.DateTime), 192 Generic(SectionId.DateTime), 193 Buddhist(SectionId.DateTime), 194 Chinese(SectionId.DateTime), 195 Coptic(SectionId.DateTime), 196 Dangi(SectionId.DateTime), 197 Ethiopic(SectionId.DateTime), 198 Ethiopic_Amete_Alem(SectionId.DateTime, "Ethiopic-Amete-Alem"), 199 Hebrew(SectionId.DateTime), 200 Indian(SectionId.DateTime), 201 Islamic(SectionId.DateTime), 202 Japanese(SectionId.DateTime), 203 Persian(SectionId.DateTime), 204 Minguo(SectionId.DateTime), 205 206 Timezone_Display_Patterns(SectionId.Timezones, "Timezone Display Patterns"), 207 NAmerica(SectionId.Timezones, "North America"), 208 SAmerica(SectionId.Timezones, "South America"), 209 Africa(SectionId.Timezones), 210 Europe(SectionId.Timezones), 211 Russia(SectionId.Timezones), 212 WAsia(SectionId.Timezones, "Western Asia"), 213 CAsia(SectionId.Timezones, "Central Asia"), 214 EAsia(SectionId.Timezones, "Eastern Asia"), 215 SAsia(SectionId.Timezones, "Southern Asia"), 216 SEAsia(SectionId.Timezones, "Southeast Asia"), 217 Australasia(SectionId.Timezones), 218 Antarctica(SectionId.Timezones), 219 Oceania(SectionId.Timezones), 220 UnknownT(SectionId.Timezones, "Unknown Region"), 221 Overrides(SectionId.Timezones), 222 223 Symbols(SectionId.Numbers), 224 Number_Formatting_Patterns(SectionId.Numbers, "Number Formatting Patterns"), 225 Compact_Decimal_Formatting(SectionId.Numbers, "Compact Decimal Formatting"), 226 Compact_Decimal_Formatting_Other( 227 SectionId.Numbers, "Compact Decimal Formatting (Other Numbering Systems)"), 228 229 Measurement_Systems(SectionId.Units, "Measurement Systems"), 230 Duration(SectionId.Units), 231 Graphics(SectionId.Units), 232 Length(SectionId.Units), 233 Area(SectionId.Units), 234 Volume_Metric(SectionId.Units, "Volume Metric"), 235 Volume_Other(SectionId.Units, "Volume Other"), 236 SpeedAcceleration(SectionId.Units, "Speed and Acceleration"), 237 MassWeight(SectionId.Units, "Mass and Weight"), 238 EnergyPower(SectionId.Units, "Energy and Power"), 239 ElectricalFrequency(SectionId.Units, "Electrical and Frequency"), 240 Weather(SectionId.Units), 241 Digital(SectionId.Units), 242 Coordinates(SectionId.Units), 243 OtherUnits(SectionId.Units, "Other Units"), 244 CompoundUnits(SectionId.Units, "Compound Units"), 245 246 Displaying_Lists(SectionId.Misc, "Displaying Lists"), 247 MinimalPairs(SectionId.Misc, "Minimal Pairs"), 248 PersonNameFormats(SectionId.Misc, "Person Name Formats"), 249 Transforms(SectionId.Misc), 250 251 Identity(SectionId.Special), 252 Version(SectionId.Special), 253 Suppress(SectionId.Special), 254 Deprecated(SectionId.Special), 255 Unknown(SectionId.Special), 256 257 C_NAmerica(SectionId.Currencies, "North America (C)"), 258 // need to add (C) to differentiate from Timezone territories 259 C_SAmerica(SectionId.Currencies, "South America (C)"), 260 C_NWEurope(SectionId.Currencies, "Northern/Western Europe"), 261 C_SEEurope(SectionId.Currencies, "Southern/Eastern Europe"), 262 C_NAfrica(SectionId.Currencies, "Northern Africa"), 263 C_WAfrica(SectionId.Currencies, "Western Africa"), 264 C_MAfrica(SectionId.Currencies, "Middle Africa"), 265 C_EAfrica(SectionId.Currencies, "Eastern Africa"), 266 C_SAfrica(SectionId.Currencies, "Southern Africa"), 267 C_WAsia(SectionId.Currencies, "Western Asia (C)"), 268 C_CAsia(SectionId.Currencies, "Central Asia (C)"), 269 C_EAsia(SectionId.Currencies, "Eastern Asia (C)"), 270 C_SAsia(SectionId.Currencies, "Southern Asia (C)"), 271 C_SEAsia(SectionId.Currencies, "Southeast Asia (C)"), 272 C_Oceania(SectionId.Currencies, "Oceania (C)"), 273 C_Unknown(SectionId.Currencies, "Unknown Region (C)"), 274 275 // BCP47 276 u_Extension(SectionId.BCP47), 277 t_Extension(SectionId.BCP47), 278 279 // Supplemental 280 Alias(SectionId.Supplemental), 281 IdValidity(SectionId.Supplemental), 282 Locale(SectionId.Supplemental), 283 RegionMapping(SectionId.Supplemental), 284 WZoneMapping(SectionId.Supplemental), 285 Transform(SectionId.Supplemental), 286 Units(SectionId.Supplemental), 287 Likely(SectionId.Supplemental), 288 LanguageMatch(SectionId.Supplemental), 289 TerritoryInfo(SectionId.Supplemental), 290 LanguageInfo(SectionId.Supplemental), 291 LanguageGroup(SectionId.Supplemental), 292 Fallback(SectionId.Supplemental), 293 Gender(SectionId.Supplemental), 294 Grammar(SectionId.Supplemental), 295 Metazone(SectionId.Supplemental), 296 NumberSystem(SectionId.Supplemental), 297 Plural(SectionId.Supplemental), 298 PluralRange(SectionId.Supplemental), 299 Containment(SectionId.Supplemental), 300 Currency(SectionId.Supplemental), 301 Calendar(SectionId.Supplemental), 302 WeekData(SectionId.Supplemental), 303 Measurement(SectionId.Supplemental), 304 Language(SectionId.Supplemental), 305 RBNF(SectionId.Supplemental), 306 Segmentation(SectionId.Supplemental), 307 DayPeriod(SectionId.Supplemental), 308 309 Category(SectionId.Characters), 310 311 // [Smileys, People, Animals & Nature, Food & Drink, Travel & Places, Activities, Objects, 312 // Symbols, Flags] 313 Smileys(SectionId.Characters, "Smileys & Emotion"), 314 People(SectionId.Characters, "People & Body"), 315 People2(SectionId.Characters, "People & Body 2"), 316 Animals_Nature(SectionId.Characters, "Animals & Nature"), 317 Food_Drink(SectionId.Characters, "Food & Drink"), 318 Travel_Places(SectionId.Characters, "Travel & Places"), 319 Travel_Places2(SectionId.Characters, "Travel & Places 2"), 320 Activities(SectionId.Characters), 321 Objects(SectionId.Characters), 322 Objects2(SectionId.Characters), 323 EmojiSymbols(SectionId.Characters, "Emoji Symbols"), 324 Punctuation(SectionId.Characters), 325 MathSymbols(SectionId.Characters, "Math Symbols"), 326 OtherSymbols(SectionId.Characters, "Other Symbols"), 327 Flags(SectionId.Characters), 328 Component(SectionId.Characters), 329 Typography(SectionId.Characters), 330 ; 331 332 private final SectionId sectionId; 333 334 PageId(SectionId sectionId, String... alternateNames) { 335 this.sectionId = sectionId; 336 SectionIdToPageIds.put(sectionId, this); 337 PageIdNames.add(this, alternateNames); 338 } 339 340 /** 341 * Construct a pageId given a string 342 * 343 * @param name 344 * @return 345 */ 346 public static PageId forString(String name) { 347 try { 348 return PageIdNames.forString(name); 349 } catch (Exception e) { 350 throw new ICUException("No PageId for " + name, e); 351 } 352 } 353 354 /** 355 * Returns the page id 356 * 357 * @return a page ID, such as 'Languages' 358 */ 359 @Override 360 public String toString() { 361 return PageIdNames.toString(this); 362 } 363 364 /** 365 * Get the containing section id, such as 'Code Lists' 366 * 367 * @return the containing section ID 368 */ 369 public SectionId getSectionId() { 370 return sectionId; 371 } 372 } 373 374 private final SectionId sectionId; 375 private final PageId pageId; 376 private final String header; 377 private final String code; 378 private final String originalPath; 379 private final SurveyToolStatus status; 380 381 // Used for ordering 382 private final int headerOrder; 383 private final long codeOrder; 384 private final SubstringOrder codeSuborder; 385 386 static final Pattern SEMI = PatternCache.get("\\s*;\\s*"); 387 static final Matcher ALT_MATCHER = PatternCache.get("\\[@alt=\"([^\"]*+)\"]").matcher(""); 388 389 static final SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance(); 390 static final Map<String, String> metazoneToContinent = 391 supplementalDataInfo.getMetazoneToContinentMap(); 392 static final Map<String, String> metazoneToPageTerritory = new HashMap<>(); 393 394 static { 395 Map<String, Map<String, String>> metazoneToRegionToZone = 396 supplementalDataInfo.getMetazoneToRegionToZone(); 397 for (Entry<String, Map<String, String>> metazoneEntry : metazoneToRegionToZone.entrySet()) { 398 String metazone = metazoneEntry.getKey(); 399 String worldZone = metazoneEntry.getValue().get("001"); 400 String territory = Containment.getRegionFromZone(worldZone); 401 if (territory == null) { 402 territory = "ZZ"; 403 } 404 // Russia, Antarctica => territory 405 // in Australasia, Asia, S. America => subcontinent 406 // in N. America => N. America (grouping of 3 subcontinents) 407 // in everything else => continent 408 if (territory.equals("RU") || territory.equals("AQ")) { 409 metazoneToPageTerritory.put(metazone, territory); 410 } else { 411 String continent = Containment.getContinent(territory); 412 String subcontinent = Containment.getSubcontinent(territory); 413 if (continent.equals("142")) { // Asia 414 metazoneToPageTerritory.put(metazone, subcontinent); 415 } else if (continent.equals("019")) { // Americas 416 metazoneToPageTerritory.put( 417 metazone, subcontinent.equals("005") ? subcontinent : "003"); 418 } else if (subcontinent.equals("053")) { // Australasia 419 metazoneToPageTerritory.put(metazone, subcontinent); 420 } else { 421 metazoneToPageTerritory.put(metazone, continent); 422 } 423 } 424 } 425 } 426 427 private PathHeader( 428 SectionId sectionId, 429 PageId pageId, 430 String header, 431 int headerOrder, 432 String code, 433 long codeOrder, 434 SubstringOrder suborder, 435 SurveyToolStatus status, 436 String originalPath) { 437 this.sectionId = sectionId; 438 this.pageId = pageId; 439 this.header = header; 440 this.headerOrder = headerOrder; 441 this.code = code; 442 this.codeOrder = codeOrder; 443 this.codeSuborder = suborder; 444 this.originalPath = originalPath; 445 this.status = status; 446 } 447 448 /** 449 * Return a factory for use in creating the headers. This is cached after first use. The calls 450 * are thread-safe. Null gets the default (CLDRConfig) english file. 451 * 452 * @param englishFile 453 */ 454 public static Factory getFactory(CLDRFile englishFile) { 455 if (factorySingleton == null) { 456 if (englishFile == null) { 457 englishFile = CLDRConfig.getInstance().getEnglish(); 458 } 459 if (!englishFile.getLocaleID().equals(ULocale.ENGLISH.getBaseName())) { 460 throw new IllegalArgumentException( 461 "PathHeader's CLDRFile must be '" 462 + ULocale.ENGLISH.getBaseName() 463 + "', but found '" 464 + englishFile.getLocaleID() 465 + "'"); 466 } 467 factorySingleton = new Factory(englishFile); 468 } 469 return factorySingleton; 470 } 471 472 /** Convenience method for common case. See {{@link #getFactory(CLDRFile)}} */ 473 public static Factory getFactory() { 474 return getFactory(null); 475 } 476 477 /** 478 * @deprecated 479 */ 480 @Deprecated 481 public String getSection() { 482 return sectionId.toString(); 483 } 484 485 public SectionId getSectionId() { 486 return sectionId; 487 } 488 489 /** 490 * @deprecated 491 */ 492 @Deprecated 493 public String getPage() { 494 return pageId.toString(); 495 } 496 497 public PageId getPageId() { 498 return pageId; 499 } 500 501 public String getHeader() { 502 return header == null ? "" : header; 503 } 504 505 public String getCode() { 506 return code; 507 } 508 509 public String getHeaderCode() { 510 return getHeader() + ": " + getCode(); 511 } 512 513 public String getOriginalPath() { 514 return originalPath; 515 } 516 517 public SurveyToolStatus getSurveyToolStatus() { 518 return status; 519 } 520 521 @Override 522 public String toString() { 523 return sectionId 524 + "\t" 525 + pageId 526 + "\t" 527 + header // + "\t" + headerOrder 528 + "\t" 529 + code // + "\t" + codeOrder 530 ; 531 } 532 533 /** 534 * Compare this PathHeader to another one 535 * 536 * @param other the object to be compared. 537 * @return 0 if equal, -1 if less, 1 if more 538 * <p>Note: if we ever have to compare just the header or just the code, methods to do that 539 * were in release 44 (compareHeader and compareCode), but they were unused and therefore 540 * removed in CLDR-11155. 541 */ 542 @Override 543 public int compareTo(PathHeader other) { 544 // Within each section, order alphabetically if the integer orders are 545 // not different. 546 try { 547 int result; 548 if (0 != (result = sectionId.compareTo(other.sectionId))) { 549 return result; 550 } 551 if (0 != (result = pageId.compareTo(other.pageId))) { 552 return result; 553 } 554 if (0 != (result = headerOrder - other.headerOrder)) { 555 return result; 556 } 557 if (0 != (result = alphabeticCompare(header, other.header))) { 558 return result; 559 } 560 long longResult; 561 if (0 != (longResult = codeOrder - other.codeOrder)) { 562 return longResult < 0 ? -1 : 1; 563 } 564 if (codeSuborder != null) { // do all three cases, for transitivity 565 if (other.codeSuborder != null) { 566 if (0 != (result = codeSuborder.compareTo(other.codeSuborder))) { 567 return result; 568 } 569 } else { 570 return 1; // if codeSuborder != null (and other.codeSuborder 571 // == null), it is greater 572 } 573 } else if (other.codeSuborder != null) { 574 return -1; // if codeSuborder == null (and other.codeSuborder != 575 // null), it is greater 576 } 577 if (0 != (result = alphabeticCompare(code, other.code))) { 578 return result; 579 } 580 if (!SKIP_ORIGINAL_PATH) { 581 if (0 != (result = alphabeticCompare(originalPath, other.originalPath))) { 582 return result; 583 } 584 } 585 return 0; 586 } catch (RuntimeException e) { 587 throw new IllegalArgumentException( 588 "Internal problem comparing " + this + " and " + other, e); 589 } 590 } 591 592 @Override 593 public boolean equals(Object obj) { 594 PathHeader other; 595 try { 596 other = (PathHeader) obj; 597 } catch (Exception e) { 598 return false; 599 } 600 return sectionId == other.sectionId 601 && pageId == other.pageId 602 && header.equals(other.header) 603 && code.equals(other.code); 604 } 605 606 @Override 607 public int hashCode() { 608 return sectionId.hashCode() ^ pageId.hashCode() ^ header.hashCode() ^ code.hashCode(); 609 } 610 611 public static class Factory implements Transform<String, PathHeader> { 612 static final RegexLookup<RawData> lookup = 613 RegexLookup.of(new PathHeaderTransform()) 614 .setPatternTransform(RegexLookup.RegexFinderTransformPath) 615 .loadFromFile(PathHeader.class, "data/PathHeader.txt"); 616 // synchronized with lookup 617 static final Output<String[]> args = new Output<>(); 618 // synchronized with lookup 619 static final Counter<RawData> counter = new Counter<>(); 620 // synchronized with lookup 621 static final Map<RawData, String> samples = new HashMap<>(); 622 // synchronized with lookup 623 static long order; 624 static SubstringOrder suborder; 625 626 static final Map<String, PathHeader> cache = new HashMap<>(); 627 // synchronized with cache 628 static final Map<SectionId, Map<PageId, SectionPage>> sectionToPageToSectionPage = 629 new EnumMap<>(SectionId.class); 630 static final Relation<SectionPage, String> sectionPageToPaths = 631 Relation.of(new TreeMap<>(), HashSet.class); 632 private static CLDRFile englishFile; 633 private final Set<String> matchersFound = new HashSet<>(); 634 635 /** 636 * Create a factory for creating PathHeaders. 637 * 638 * @param englishFile - only sets the file (statically!) if not already set. 639 */ 640 private Factory(CLDRFile englishFile) { 641 setEnglishCLDRFileIfNotSet(englishFile); // temporary 642 } 643 644 /** 645 * Set englishFile if it is not already set. 646 * 647 * @param englishFile2 the value to set for englishFile 648 */ 649 private static void setEnglishCLDRFileIfNotSet(CLDRFile englishFile2) { 650 synchronized (Factory.class) { 651 if (englishFile == null) { 652 englishFile = englishFile2; 653 } 654 } 655 } 656 657 /** Use only when trying to find unmatched patterns */ 658 public void clearCache() { 659 synchronized (cache) { 660 cache.clear(); 661 } 662 } 663 664 /** Return the PathHeader for a given path. Thread-safe. */ 665 public PathHeader fromPath(String path) { 666 return fromPath(path, null); 667 } 668 669 /** Return the PathHeader for a given path. Thread-safe. */ 670 @Override 671 public PathHeader transform(String path) { 672 return fromPath(path, null); 673 } 674 675 /** 676 * Return the PathHeader for a given path. Thread-safe. 677 * 678 * @param failures a list of failures to add to. 679 */ 680 public PathHeader fromPath(final String path, List<String> failures) { 681 if (path == null) { 682 throw new NullPointerException("Path cannot be null"); 683 } 684 synchronized (cache) { 685 PathHeader old = cache.get(path); 686 if (old != null) { 687 return old; 688 } 689 } 690 synchronized (lookup) { 691 String cleanPath = path; 692 // special handling for alt 693 String alt = null; 694 int altPos = cleanPath.indexOf("[@alt="); 695 if (altPos >= 0 && !cleanPath.endsWith("/symbol[@alt=\"narrow\"]")) { 696 if (ALT_MATCHER.reset(cleanPath).find()) { 697 alt = ALT_MATCHER.group(1); 698 cleanPath = 699 cleanPath.substring(0, ALT_MATCHER.start()) 700 + cleanPath.substring(ALT_MATCHER.end()); 701 int pos = alt.indexOf("proposed"); 702 if (pos >= 0 && !path.startsWith("//ldml/collations")) { 703 alt = pos == 0 ? null : alt.substring(0, pos - 1); 704 // drop "proposed", 705 // change "xxx-proposed" to xxx. 706 } 707 } else { 708 throw new IllegalArgumentException(); 709 } 710 } 711 Output<Finder> matcherFound = new Output<>(); 712 RawData data = lookup.get(cleanPath, null, args, matcherFound, failures); 713 if (data == null) { 714 return null; 715 } 716 matchersFound.add(matcherFound.value.toString()); 717 counter.add(data, 1); 718 if (!samples.containsKey(data)) { 719 samples.put(data, cleanPath); 720 } 721 try { 722 PathHeader result = makePathHeader(data, path, alt); 723 synchronized (cache) { 724 PathHeader old = cache.get(path); 725 if (old == null) { 726 cache.put(path, result); 727 } else { 728 result = old; 729 } 730 Map<PageId, SectionPage> pageToPathHeaders = 731 sectionToPageToSectionPage.get(result.sectionId); 732 if (pageToPathHeaders == null) { 733 sectionToPageToSectionPage.put( 734 result.sectionId, 735 pageToPathHeaders = new EnumMap<>(PageId.class)); 736 } 737 SectionPage sectionPage = pageToPathHeaders.get(result.pageId); 738 if (sectionPage == null) { 739 sectionPage = new SectionPage(result.sectionId, result.pageId); 740 pageToPathHeaders.put(result.pageId, sectionPage); 741 } 742 sectionPageToPaths.put(sectionPage, path); 743 } 744 return result; 745 } catch (Exception e) { 746 throw new IllegalArgumentException( 747 "Probably mismatch in Page/Section enum, or too few capturing groups in regex for " 748 + path, 749 e); 750 } 751 } 752 } 753 makePathHeader(RawData data, String path, String alt)754 private PathHeader makePathHeader(RawData data, String path, String alt) { 755 // Caution: each call to PathHeader.Factory.fix changes the value of 756 // PathHeader.Factory.order 757 SectionId newSectionId = SectionId.forString(fix(data.section, 0)); 758 PageId newPageId = PageId.forString(fix(data.page, 0)); 759 String newHeader = fix(data.header, data.headerOrder); 760 int newHeaderOrder = (int) order; 761 String codeDashAlt = data.code + (alt == null ? "" : ("-" + alt)); 762 String newCode = fix(codeDashAlt, data.codeOrder); 763 long newCodeOrder = order; 764 return new PathHeader( 765 newSectionId, 766 newPageId, 767 newHeader, 768 newHeaderOrder, 769 newCode, 770 newCodeOrder, 771 suborder, 772 data.status, 773 path); 774 } 775 776 private static class SectionPage implements Comparable<SectionPage> { 777 private final SectionId sectionId; 778 private final PageId pageId; 779 SectionPage(SectionId sectionId, PageId pageId)780 public SectionPage(SectionId sectionId, PageId pageId) { 781 this.sectionId = sectionId; 782 this.pageId = pageId; 783 } 784 785 @Override compareTo(SectionPage other)786 public int compareTo(SectionPage other) { 787 // Within each section, order alphabetically if the integer 788 // orders are 789 // not different. 790 int result; 791 if (0 != (result = sectionId.compareTo(other.sectionId))) { 792 return result; 793 } 794 if (0 != (result = pageId.compareTo(other.pageId))) { 795 return result; 796 } 797 return 0; 798 } 799 800 @Override equals(Object obj)801 public boolean equals(Object obj) { 802 PathHeader other; 803 try { 804 other = (PathHeader) obj; 805 } catch (Exception e) { 806 return false; 807 } 808 return sectionId == other.sectionId && pageId == other.pageId; 809 } 810 811 @Override hashCode()812 public int hashCode() { 813 return sectionId.hashCode() ^ pageId.hashCode(); 814 } 815 816 @Override toString()817 public String toString() { 818 return sectionId + " > " + pageId; 819 } 820 } 821 822 /** 823 * Returns a set of paths currently associated with the given section and page. 824 * 825 * <p><b>Warning:</b> 826 * 827 * <ol> 828 * <li>The set may not be complete for a cldrFile unless all of paths in the file have had 829 * fromPath called. And this includes getExtraPaths(). 830 * <li>The set may include paths that have no value in the current cldrFile. 831 * <li>The set may be empty, if the section/page aren't valid. 832 * </ol> 833 * 834 * Thread-safe. 835 */ getCachedPaths(SectionId sectionId, PageId page)836 public static Set<String> getCachedPaths(SectionId sectionId, PageId page) { 837 Set<String> target = new HashSet<>(); 838 synchronized (cache) { 839 Map<PageId, SectionPage> pageToSectionPage = 840 sectionToPageToSectionPage.get(sectionId); 841 if (pageToSectionPage == null) { 842 return target; 843 } 844 SectionPage sectionPage = pageToSectionPage.get(page); 845 if (sectionPage == null) { 846 return target; 847 } 848 Set<String> set = sectionPageToPaths.getAll(sectionPage); 849 target.addAll(set); 850 } 851 return target; 852 } 853 854 /** 855 * Return the Sections and Pages that are in defined, for display in menus. Both are 856 * ordered. 857 */ getSectionIdsToPageIds()858 public static Relation<SectionId, PageId> getSectionIdsToPageIds() { 859 SectionIdToPageIds.freeze(); // just in case 860 return SectionIdToPageIds; 861 } 862 863 /** 864 * Return the names for Sections and Pages that are defined, for display in menus. Both are 865 * ordered. 866 * 867 * @deprecated Use getSectionIdsToPageIds 868 */ 869 @Deprecated getSectionsToPages()870 public static LinkedHashMap<String, Set<String>> getSectionsToPages() { 871 LinkedHashMap<String, Set<String>> sectionsToPages = new LinkedHashMap<>(); 872 for (PageId pageId : PageId.values()) { 873 String sectionId2 = pageId.getSectionId().toString(); 874 Set<String> pages = 875 sectionsToPages.computeIfAbsent(sectionId2, k -> new LinkedHashSet<>()); 876 pages.add(pageId.toString()); 877 } 878 return sectionsToPages; 879 } 880 881 /** 882 * @deprecated, use the filterCldr with the section/page ids. 883 */ filterCldr(String section, String page, CLDRFile file)884 public Iterable<String> filterCldr(String section, String page, CLDRFile file) { 885 return new FilteredIterable(section, page, file); 886 } 887 888 private class FilteredIterable implements Iterable<String>, SimpleIterator<String> { 889 private final SectionId sectionId; 890 private final PageId pageId; 891 private final Iterator<String> fileIterator; 892 FilteredIterable(SectionId sectionId, PageId pageId, CLDRFile file)893 FilteredIterable(SectionId sectionId, PageId pageId, CLDRFile file) { 894 this.sectionId = sectionId; 895 this.pageId = pageId; 896 this.fileIterator = file.fullIterable().iterator(); 897 } 898 FilteredIterable(String section, String page, CLDRFile file)899 public FilteredIterable(String section, String page, CLDRFile file) { 900 this(SectionId.forString(section), PageId.forString(page), file); 901 } 902 903 @Override iterator()904 public Iterator<String> iterator() { 905 return With.toIterator(this); 906 } 907 908 @Override next()909 public String next() { 910 while (fileIterator.hasNext()) { 911 String path = fileIterator.next(); 912 PathHeader pathHeader = fromPath(path); 913 if (sectionId == pathHeader.sectionId && pageId == pathHeader.pageId) { 914 return path; 915 } 916 } 917 return null; 918 } 919 } 920 921 private static class ChronologicalOrder { 922 private final Map<String, Integer> map = new HashMap<>(); 923 private String item; 924 private int order; 925 private final ChronologicalOrder toClear; 926 ChronologicalOrder(ChronologicalOrder toClear)927 ChronologicalOrder(ChronologicalOrder toClear) { 928 this.toClear = toClear; 929 } 930 getOrder()931 int getOrder() { 932 return order; 933 } 934 set(String itemToOrder)935 public String set(String itemToOrder) { 936 if (itemToOrder.startsWith("*")) { 937 item = itemToOrder.substring(1, itemToOrder.length()); 938 return item; // keep old order 939 } 940 item = itemToOrder; 941 Integer old = map.get(item); 942 if (old != null) { 943 order = old.intValue(); 944 } else { 945 order = map.size(); 946 map.put(item, order); 947 clearLower(); 948 } 949 return item; 950 } 951 clearLower()952 private void clearLower() { 953 if (toClear != null) { 954 toClear.map.clear(); 955 toClear.order = 0; 956 toClear.clearLower(); 957 } 958 } 959 } 960 961 static class RawData { 962 static ChronologicalOrder codeOrdering = new ChronologicalOrder(null); 963 static ChronologicalOrder headerOrdering = new ChronologicalOrder(codeOrdering); 964 RawData(String source)965 public RawData(String source) { 966 String[] split = SEMI.split(source); 967 section = split[0]; 968 // HACK 969 if (section.equals("Timezones") && split[1].equals("Indian")) { 970 page = "Indian2"; 971 } else { 972 page = split[1]; 973 } 974 975 header = headerOrdering.set(split[2]); 976 headerOrder = headerOrdering.getOrder(); 977 978 code = codeOrdering.set(split[3]); 979 codeOrder = codeOrdering.getOrder(); 980 981 status = 982 split.length < 5 983 ? SurveyToolStatus.READ_WRITE 984 : SurveyToolStatus.valueOf(split[4]); 985 } 986 987 public final String section; 988 public final String page; 989 public final String header; 990 public final int headerOrder; 991 public final String code; 992 public final int codeOrder; 993 public final SurveyToolStatus status; 994 995 @Override 996 public String toString() { 997 return section 998 + "\t" 999 + page 1000 + "\t" 1001 + header 1002 + "\t" 1003 + headerOrder 1004 + "\t" 1005 + code 1006 + "\t" 1007 + codeOrder 1008 + "\t" 1009 + status; 1010 } 1011 } 1012 1013 static class PathHeaderTransform implements Transform<String, RawData> { 1014 @Override 1015 public RawData transform(String source) { 1016 return new RawData(source); 1017 } 1018 } 1019 1020 /** 1021 * Internal data, for testing and debugging. 1022 * 1023 * @deprecated 1024 */ 1025 @Deprecated 1026 public class CounterData extends Row.R4<String, RawData, String, String> { 1027 public CounterData(String a, RawData b, String c) { 1028 super( 1029 a, 1030 b, 1031 c == null ? "no sample" : c, 1032 c == null ? "no sample" : fromPath(c).toString()); 1033 } 1034 } 1035 1036 /** 1037 * Get the internal data, for testing and debugging. 1038 * 1039 * @deprecated 1040 */ 1041 @Deprecated 1042 public Counter<CounterData> getInternalCounter() { 1043 synchronized (lookup) { 1044 Counter<CounterData> result = new Counter<>(); 1045 for (Map.Entry<Finder, RawData> foo : lookup) { 1046 Finder finder = foo.getKey(); 1047 RawData data = foo.getValue(); 1048 long count = counter.get(data); 1049 result.add(new CounterData(finder.toString(), data, samples.get(data)), count); 1050 } 1051 return result; 1052 } 1053 } 1054 1055 static Map<String, Transform<String, String>> functionMap = new HashMap<>(); 1056 static String[] months = { 1057 "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", 1058 "Und" 1059 }; 1060 static List<String> days = Arrays.asList("sun", "mon", "tue", "wed", "thu", "fri", "sat"); 1061 static List<String> unitOrder = DtdData.getUnitOrder().getOrder(); 1062 static final MapComparator<String> dayPeriods = 1063 new MapComparator<String>() 1064 .add( 1065 "am", 1066 "pm", 1067 "midnight", 1068 "noon", 1069 "morning1", 1070 "morning2", 1071 "afternoon1", 1072 "afternoon2", 1073 "evening1", 1074 "evening2", 1075 "night1", 1076 "night2") 1077 .freeze(); 1078 static LikelySubtags likelySubtags = new LikelySubtags(); 1079 static HyphenSplitter hyphenSplitter = new HyphenSplitter(); 1080 static Transform<String, String> catFromTerritory; 1081 static Transform<String, String> catFromTimezone; 1082 1083 static { 1084 // Put any new functions used in PathHeader.txt in here. 1085 // To change the order of items within a section or heading, set 1086 // order/suborder to be the relative position of the current item. 1087 functionMap.put( 1088 "month", 1089 new Transform<>() { 1090 @Override 1091 public String transform(String source) { 1092 int m = Integer.parseInt(source); 1093 order = m; 1094 return months[m - 1]; 1095 } 1096 }); 1097 functionMap.put( 1098 "count", 1099 new Transform<>() { 1100 @Override 1101 public String transform(String source) { 1102 suborder = new SubstringOrder(source); 1103 return source; 1104 } 1105 }); 1106 functionMap.put( 1107 "count2", 1108 new Transform<>() { 1109 @Override 1110 public String transform(String source) { 1111 int pos = source.indexOf('-'); 1112 source = pos + source.substring(pos); 1113 suborder = new SubstringOrder(source); // make 10000-... 1114 // into 5- 1115 return source; 1116 } 1117 }); 1118 functionMap.put( 1119 "currencySymbol", 1120 new Transform<>() { 1121 @Override 1122 public String transform(String source) { 1123 order = 901; 1124 if (source.endsWith("narrow")) { 1125 order = 902; 1126 } 1127 if (source.endsWith("variant")) { 1128 order = 903; 1129 } 1130 return source; 1131 } 1132 }); 1133 // &unitCount($1-$3-$4), where $1 is length, $2 is count, $3 is case (optional) 1134 // but also 1135 // &unitCount($1-$3-$5-$4), where $5 is case, $4 is gender — notice order change 1136 functionMap.put( 1137 "unitCount", 1138 new Transform<>() { 1139 @Override 1140 public String transform(String source) { 1141 List<String> parts = HYPHEN_SPLITTER.splitToList(source); 1142 if (parts.size() == 1) { 1143 return source; 1144 } 1145 int lengthNumber = Width.getValue(parts.get(0)).ordinal(); 1146 int type = 0; 1147 int rest = 0; 1148 switch (parts.get(1)) { 1149 case "gender": 1150 type = 0; 1151 break; 1152 case "displayName": 1153 type = 1; 1154 break; 1155 case "per": 1156 type = 2; 1157 break; 1158 default: 1159 type = 3; 1160 int countNumber = 1161 (parts.size() > 1 1162 ? Count.valueOf(parts.get(1)) 1163 : Count.other) 1164 .ordinal(); 1165 int caseNumber = 1166 (parts.size() > 2 1167 ? GrammarInfo.CaseValues.valueOf( 1168 parts.get(2)) 1169 : GrammarInfo.CaseValues.nominative) 1170 .ordinal(); 1171 int genderNumber = GrammarInfo.GenderValues.neuter.ordinal(); 1172 if (parts.size() > 3) { 1173 String genderPart = parts.get(3); 1174 if (!genderPart.equals("dgender")) { 1175 genderNumber = 1176 GrammarInfo.GenderValues.valueOf(genderPart) 1177 .ordinal(); 1178 } 1179 type = 4; 1180 } 1181 rest = (countNumber << 16) | (caseNumber << 8) | genderNumber; 1182 break; 1183 } 1184 order = (type << 28) | (lengthNumber << 24) | rest; 1185 return source; 1186 } 1187 }); 1188 1189 functionMap.put( 1190 "pluralNumber", 1191 new Transform<>() { 1192 @Override 1193 public String transform(String source) { 1194 order = GrammarInfo.PluralValues.valueOf(source).ordinal(); 1195 return source; 1196 } 1197 }); 1198 1199 functionMap.put( 1200 "caseNumber", 1201 new Transform<>() { 1202 @Override 1203 public String transform(String source) { 1204 order = GrammarInfo.CaseValues.valueOf(source).ordinal(); 1205 return source; 1206 } 1207 }); 1208 1209 functionMap.put( 1210 "genderNumber", 1211 new Transform<>() { 1212 @Override 1213 public String transform(String source) { 1214 order = GrammarInfo.GenderValues.valueOf(source).ordinal(); 1215 return source; 1216 } 1217 }); 1218 1219 functionMap.put( 1220 "day", 1221 new Transform<>() { 1222 @Override 1223 public String transform(String source) { 1224 int m = days.indexOf(source); 1225 order = m; 1226 return source; 1227 } 1228 }); 1229 functionMap.put( 1230 "dayPeriod", 1231 new Transform<>() { 1232 @Override 1233 public String transform(String source) { 1234 try { 1235 order = dayPeriods.getNumericOrder(source); 1236 } catch (Exception e) { 1237 // if an old item is tried, like "evening", this will fail. 1238 // so that old data still works, hack this. 1239 order = Math.abs(source.hashCode() << 16); 1240 } 1241 return source; 1242 } 1243 }); 1244 functionMap.put( 1245 "calendar", 1246 new Transform<>() { 1247 final Map<String, String> fixNames = 1248 Builder.with(new HashMap<String, String>()) 1249 .put("islamicc", "Islamic Civil") 1250 .put("roc", "Minguo") 1251 .put("Ethioaa", "Ethiopic Amete Alem") 1252 .put("Gregory", "Gregorian") 1253 .put("iso8601", "ISO 8601") 1254 .freeze(); 1255 1256 @Override 1257 public String transform(String source) { 1258 String result = fixNames.get(source); 1259 return result != null ? result : UCharacter.toTitleCase(source, null); 1260 } 1261 }); 1262 1263 functionMap.put( 1264 "calField", 1265 new Transform<>() { 1266 @Override 1267 public String transform(String source) { 1268 String[] fields = source.split(":", 3); 1269 order = 0; 1270 final List<String> widthValues = 1271 Arrays.asList("wide", "abbreviated", "short", "narrow"); 1272 final List<String> calendarFieldValues = 1273 Arrays.asList( 1274 "Eras", 1275 "Quarters", 1276 "Months", 1277 "Days", 1278 "DayPeriods", 1279 "Formats"); 1280 final List<String> calendarFormatTypes = 1281 Arrays.asList("Standard", "Flexible", "Intervals"); 1282 final List<String> calendarContextTypes = 1283 Arrays.asList("none", "format", "stand-alone"); 1284 final List<String> calendarFormatSubtypes = 1285 Arrays.asList( 1286 "date", 1287 "time", 1288 "time12", 1289 "time24", 1290 "dateTime", 1291 "fallback"); 1292 1293 Map<String, String> fixNames = 1294 Builder.with(new HashMap<String, String>()) 1295 .put("DayPeriods", "Day Periods") 1296 .put("format", "Formatting") 1297 .put("stand-alone", "Standalone") 1298 .put("none", "") 1299 .put("date", "Date Formats") 1300 .put("time", "Time Formats") 1301 .put("time12", "12 Hour Time Formats") 1302 .put("time24", "24 Hour Time Formats") 1303 .put("dateTime", "Date & Time Combination Formats") 1304 .freeze(); 1305 1306 if (calendarFieldValues.contains(fields[0])) { 1307 order = calendarFieldValues.indexOf(fields[0]) * 100; 1308 } else { 1309 order = calendarFieldValues.size() * 100; 1310 } 1311 1312 if (fields[0].equals("Formats")) { 1313 if (calendarFormatTypes.contains(fields[1])) { 1314 order += calendarFormatTypes.indexOf(fields[1]) * 10; 1315 } else { 1316 order += calendarFormatTypes.size() * 10; 1317 } 1318 if (calendarFormatSubtypes.contains(fields[2])) { 1319 order += calendarFormatSubtypes.indexOf(fields[2]); 1320 } else { 1321 order += calendarFormatSubtypes.size(); 1322 } 1323 } else { 1324 if (widthValues.contains(fields[1])) { 1325 order += widthValues.indexOf(fields[1]) * 10; 1326 } else { 1327 order += widthValues.size() * 10; 1328 } 1329 if (calendarContextTypes.contains(fields[2])) { 1330 order += calendarContextTypes.indexOf(fields[2]); 1331 } else { 1332 order += calendarContextTypes.size(); 1333 } 1334 } 1335 1336 String[] fixedFields = new String[fields.length]; 1337 for (int i = 0; i < fields.length; i++) { 1338 String s = fixNames.get(fields[i]); 1339 fixedFields[i] = s != null ? s : fields[i]; 1340 } 1341 1342 return fixedFields[0] 1343 + " - " 1344 + fixedFields[1] 1345 + (fixedFields[2].length() > 0 ? " - " + fixedFields[2] : ""); 1346 } 1347 }); 1348 1349 functionMap.put( 1350 "titlecase", 1351 new Transform<>() { 1352 @Override 1353 public String transform(String source) { 1354 return UCharacter.toTitleCase(source, null); 1355 } 1356 }); 1357 functionMap.put( 1358 "categoryFromScript", 1359 new Transform<>() { 1360 @Override 1361 public String transform(String source) { 1362 String script = hyphenSplitter.split(source); 1363 Info info = ScriptMetadata.getInfo(script); 1364 if (info == null) { 1365 info = ScriptMetadata.getInfo("Zzzz"); 1366 } 1367 order = 100 - info.idUsage.ordinal(); 1368 return info.idUsage.name; 1369 } 1370 }); 1371 functionMap.put( 1372 "categoryFromKey", 1373 new Transform<>() { 1374 final Map<String, String> fixNames = 1375 Builder.with(new HashMap<String, String>()) 1376 .put("cf", "Currency Format") 1377 .put("em", "Emoji Presentation") 1378 .put("fw", "First Day of Week") 1379 .put("lb", "Line Break") 1380 .put("hc", "Hour Cycle") 1381 .put("ms", "Measurement System") 1382 .put("ss", "Sentence Break Suppressions") 1383 .freeze(); 1384 1385 @Override 1386 public String transform(String source) { 1387 String fixedName = fixNames.get(source); 1388 return fixedName != null ? fixedName : source; 1389 } 1390 }); 1391 functionMap.put( 1392 "languageSection", 1393 new Transform<>() { 1394 final char[] languageRangeStartPoints = {'A', 'E', 'K', 'O', 'T'}; 1395 final char[] languageRangeEndPoints = {'D', 'J', 'N', 'S', 'Z'}; 1396 1397 @Override 1398 public String transform(String source0) { 1399 char firstLetter = getEnglishFirstLetter(source0).charAt(0); 1400 for (int i = 0; i < languageRangeStartPoints.length; i++) { 1401 if (firstLetter >= languageRangeStartPoints[i] 1402 && firstLetter <= languageRangeEndPoints[i]) { 1403 return "Languages (" 1404 + Character.toUpperCase(languageRangeStartPoints[i]) 1405 + "-" 1406 + Character.toUpperCase(languageRangeEndPoints[i]) 1407 + ")"; 1408 } 1409 } 1410 return "Languages"; 1411 } 1412 }); 1413 functionMap.put( 1414 "firstLetter", 1415 new Transform<>() { 1416 @Override 1417 public String transform(String source0) { 1418 return getEnglishFirstLetter(source0); 1419 } 1420 }); 1421 functionMap.put( 1422 "languageSort", 1423 new Transform<>() { 1424 @Override 1425 public String transform(String source0) { 1426 String languageOnlyPart; 1427 int underscorePos = source0.indexOf("_"); 1428 if (underscorePos > 0) { 1429 languageOnlyPart = source0.substring(0, underscorePos); 1430 } else { 1431 languageOnlyPart = source0; 1432 } 1433 1434 return englishFile.getName(CLDRFile.LANGUAGE_NAME, languageOnlyPart) 1435 + " \u25BA " 1436 + source0; 1437 } 1438 }); 1439 functionMap.put( 1440 "scriptFromLanguage", 1441 new Transform<>() { 1442 @Override 1443 public String transform(String source0) { 1444 String language = hyphenSplitter.split(source0); 1445 String script = likelySubtags.getLikelyScript(language); 1446 if (script == null) { 1447 script = likelySubtags.getLikelyScript(language); 1448 } 1449 String scriptName = englishFile.getName(CLDRFile.SCRIPT_NAME, script); 1450 return "Languages in " 1451 + (script.equals("Hans") || script.equals("Hant") 1452 ? "Han Script" 1453 : scriptName.endsWith(" Script") 1454 ? scriptName 1455 : scriptName + " Script"); 1456 } 1457 }); 1458 functionMap.put( 1459 "categoryFromTerritory", 1460 catFromTerritory = 1461 new Transform<>() { 1462 @Override 1463 public String transform(String source) { 1464 String territory = getSubdivisionsTerritory(source, null); 1465 String container = Containment.getContainer(territory); 1466 order = Containment.getOrder(territory); 1467 return englishFile.getName(CLDRFile.TERRITORY_NAME, container); 1468 } 1469 }); 1470 functionMap.put( 1471 "territorySection", 1472 new Transform<>() { 1473 final Set<String> specialRegions = 1474 new HashSet<>(Arrays.asList("EZ", "EU", "QO", "UN", "ZZ")); 1475 1476 @Override 1477 public String transform(String source0) { 1478 // support subdivisions 1479 String theTerritory = getSubdivisionsTerritory(source0, null); 1480 try { 1481 if (specialRegions.contains(theTerritory) 1482 || theTerritory.charAt(0) < 'A' 1483 && Integer.parseInt(theTerritory) > 0) { 1484 return "Geographic Regions"; 1485 } 1486 } catch (NumberFormatException ex) { 1487 } 1488 String theContinent = Containment.getContinent(theTerritory); 1489 String theSubContinent; 1490 switch (theContinent) { // was Integer.valueOf 1491 case "019": // Americas - For the territorySection, we just group 1492 // North America & South America 1493 final String subcontinent = 1494 Containment.getSubcontinent(theTerritory); 1495 theSubContinent = 1496 subcontinent.equals("005") 1497 ? "005" 1498 : "003"; // was Integer.valueOf(subcontinent) == 1499 // 5 1500 return "Territories (" 1501 + englishFile.getName( 1502 CLDRFile.TERRITORY_NAME, theSubContinent) 1503 + ")"; 1504 case "001": 1505 case "ZZ": 1506 return "Geographic Regions"; // not in containment 1507 default: 1508 return "Territories (" 1509 + englishFile.getName( 1510 CLDRFile.TERRITORY_NAME, theContinent) 1511 + ")"; 1512 } 1513 } 1514 }); 1515 functionMap.put( 1516 "categoryFromTimezone", 1517 catFromTimezone = 1518 new Transform<>() { 1519 @Override 1520 public String transform(String source0) { 1521 String territory = Containment.getRegionFromZone(source0); 1522 if (territory == null) { 1523 territory = "ZZ"; 1524 } 1525 return catFromTerritory.transform(territory); 1526 } 1527 }); 1528 functionMap.put( 1529 "timeZonePage", 1530 new Transform<>() { 1531 Set<String> singlePageTerritories = 1532 new HashSet<>(Arrays.asList("AQ", "RU", "ZZ")); 1533 1534 @Override 1535 public String transform(String source0) { 1536 String theTerritory = Containment.getRegionFromZone(source0); 1537 if (theTerritory == null 1538 || "001".equals(theTerritory) 1539 || "ZZ".equals(theTerritory)) { 1540 if ("Etc/Unknown".equals(source0)) { 1541 theTerritory = "ZZ"; 1542 } else { 1543 throw new IllegalArgumentException( 1544 "ICU needs zone update? Source: " 1545 + source0 1546 + "; Territory: " 1547 + theTerritory); 1548 } 1549 } 1550 if (singlePageTerritories.contains(theTerritory)) { 1551 return englishFile.getName(CLDRFile.TERRITORY_NAME, theTerritory); 1552 } 1553 String theContinent = Containment.getContinent(theTerritory); 1554 final String subcontinent = Containment.getSubcontinent(theTerritory); 1555 String theSubContinent; 1556 switch (Integer.parseInt(theContinent)) { 1557 case 9: // Oceania - For the timeZonePage, we group Australasia on 1558 // one page, and the rest of Oceania on the other. 1559 try { 1560 theSubContinent = 1561 subcontinent.equals("053") ? "053" : "009"; // was 1562 // Integer.valueOf(subcontinent) == 1563 // 53 1564 } catch (NumberFormatException ex) { 1565 theSubContinent = "009"; 1566 } 1567 return englishFile.getName( 1568 CLDRFile.TERRITORY_NAME, theSubContinent); 1569 case 19: // Americas - For the timeZonePage, we just group North 1570 // America & South America 1571 theSubContinent = 1572 Integer.parseInt(subcontinent) == 5 ? "005" : "003"; 1573 return englishFile.getName( 1574 CLDRFile.TERRITORY_NAME, theSubContinent); 1575 case 142: // Asia 1576 return englishFile.getName( 1577 CLDRFile.TERRITORY_NAME, subcontinent); 1578 default: 1579 return englishFile.getName( 1580 CLDRFile.TERRITORY_NAME, theContinent); 1581 } 1582 } 1583 }); 1584 1585 functionMap.put( 1586 "timezoneSorting", 1587 new Transform<>() { 1588 @Override 1589 public String transform(String source) { 1590 final List<String> codeValues = 1591 Arrays.asList( 1592 "generic-long", 1593 "generic-short", 1594 "standard-long", 1595 "standard-short", 1596 "daylight-long", 1597 "daylight-short"); 1598 if (codeValues.contains(source)) { 1599 order = codeValues.indexOf(source); 1600 } else { 1601 order = codeValues.size(); 1602 } 1603 return source; 1604 } 1605 }); 1606 1607 functionMap.put( 1608 "tzdpField", 1609 new Transform<>() { 1610 @Override 1611 public String transform(String source) { 1612 Map<String, String> fieldNames = 1613 Builder.with(new HashMap<String, String>()) 1614 .put("regionFormat", "Region Format - Generic") 1615 .put( 1616 "regionFormat-standard", 1617 "Region Format - Standard") 1618 .put( 1619 "regionFormat-daylight", 1620 "Region Format - Daylight") 1621 .put("gmtFormat", "GMT Format") 1622 .put("hourFormat", "GMT Hours/Minutes Format") 1623 .put("gmtZeroFormat", "GMT Zero Format") 1624 .put("fallbackFormat", "Location Fallback Format") 1625 .freeze(); 1626 final List<String> fieldOrder = 1627 Arrays.asList( 1628 "regionFormat", 1629 "regionFormat-standard", 1630 "regionFormat-daylight", 1631 "gmtFormat", 1632 "hourFormat", 1633 "gmtZeroFormat", 1634 "fallbackFormat"); 1635 1636 if (fieldOrder.contains(source)) { 1637 order = fieldOrder.indexOf(source); 1638 } else { 1639 order = fieldOrder.size(); 1640 } 1641 1642 String result = fieldNames.get(source); 1643 return result == null ? source : result; 1644 } 1645 }); 1646 functionMap.put( 1647 "unit", 1648 new Transform<>() { 1649 @Override 1650 public String transform(String source) { 1651 int m = unitOrder.indexOf(source); 1652 order = m; 1653 return source.substring(source.indexOf('-') + 1); 1654 } 1655 }); 1656 1657 functionMap.put( 1658 "numericSort", 1659 new Transform<>() { 1660 // Probably only works well for small values, like -5 through +4. 1661 @Override 1662 public String transform(String source) { 1663 Integer pos = Integer.parseInt(source) + 5; 1664 suborder = new SubstringOrder(pos.toString()); 1665 return source; 1666 } 1667 }); 1668 1669 functionMap.put( 1670 "metazone", 1671 new Transform<>() { 1672 1673 @Override 1674 public String transform(String source) { 1675 if (PathHeader.UNIFORM_CONTINENTS) { 1676 String container = getMetazonePageTerritory(source); 1677 order = Containment.getOrder(container); 1678 return englishFile.getName(CLDRFile.TERRITORY_NAME, container); 1679 } else { 1680 String continent = metazoneToContinent.get(source); 1681 if (continent == null) { 1682 continent = "UnknownT"; 1683 } 1684 return continent; 1685 } 1686 } 1687 }); 1688 1689 Object[][] ctto = { 1690 {"BUK", "MM"}, 1691 {"CSD", "RS"}, 1692 {"CSK", "CZ"}, 1693 {"DDM", "DE"}, 1694 {"EUR", "ZZ"}, 1695 {"RHD", "ZW"}, 1696 {"SUR", "RU"}, 1697 {"TPE", "TL"}, 1698 {"XAG", "ZZ"}, 1699 {"XAU", "ZZ"}, 1700 {"XBA", "ZZ"}, 1701 {"XBB", "ZZ"}, 1702 {"XBC", "ZZ"}, 1703 {"XBD", "ZZ"}, 1704 {"XDR", "ZZ"}, 1705 {"XEU", "ZZ"}, 1706 {"XFO", "ZZ"}, 1707 {"XFU", "ZZ"}, 1708 {"XPD", "ZZ"}, 1709 {"XPT", "ZZ"}, 1710 {"XRE", "ZZ"}, 1711 {"XSU", "ZZ"}, 1712 {"XTS", "ZZ"}, 1713 {"XUA", "ZZ"}, 1714 {"XXX", "ZZ"}, 1715 {"YDD", "YE"}, 1716 {"YUD", "RS"}, 1717 {"YUM", "RS"}, 1718 {"YUN", "RS"}, 1719 {"YUR", "RS"}, 1720 {"ZRN", "CD"}, 1721 {"ZRZ", "CD"}, 1722 }; 1723 1724 Object[][] sctc = { 1725 {"Northern America", "North America (C)"}, 1726 {"Central America", "North America (C)"}, 1727 {"Caribbean", "North America (C)"}, 1728 {"South America", "South America (C)"}, 1729 {"Northern Africa", "Northern Africa"}, 1730 {"Western Africa", "Western Africa"}, 1731 {"Middle Africa", "Middle Africa"}, 1732 {"Eastern Africa", "Eastern Africa"}, 1733 {"Southern Africa", "Southern Africa"}, 1734 {"Europe", "Northern/Western Europe"}, 1735 {"Northern Europe", "Northern/Western Europe"}, 1736 {"Western Europe", "Northern/Western Europe"}, 1737 {"Eastern Europe", "Southern/Eastern Europe"}, 1738 {"Southern Europe", "Southern/Eastern Europe"}, 1739 {"Western Asia", "Western Asia (C)"}, 1740 {"Central Asia", "Central Asia (C)"}, 1741 {"Eastern Asia", "Eastern Asia (C)"}, 1742 {"Southern Asia", "Southern Asia (C)"}, 1743 {"Southeast Asia", "Southeast Asia (C)"}, 1744 {"Australasia", "Oceania (C)"}, 1745 {"Melanesia", "Oceania (C)"}, 1746 {"Micronesian Region", "Oceania (C)"}, // HACK 1747 {"Polynesia", "Oceania (C)"}, 1748 {"Unknown Region", "Unknown Region (C)"}, 1749 }; 1750 1751 final Map<String, String> currencyToTerritoryOverrides = CldrUtility.asMap(ctto); 1752 final Map<String, String> subContinentToContinent = CldrUtility.asMap(sctc); 1753 final Set<String> fundCurrencies = 1754 new HashSet<>( 1755 Arrays.asList( 1756 "CHE", "CHW", "CLF", "COU", "ECV", "MXV", "USN", "USS", "UYI", 1757 "XEU", "ZAL")); 1758 final Set<String> offshoreCurrencies = new HashSet<>(Arrays.asList("CNH")); 1759 // TODO: Put this into supplementalDataInfo ? 1760 1761 functionMap.put( 1762 "categoryFromCurrency", 1763 new Transform<>() { 1764 @Override 1765 public String transform(String source0) { 1766 String tenderOrNot = ""; 1767 String territory = 1768 likelySubtags.getLikelyTerritoryFromCurrency(source0); 1769 if (territory == null) { 1770 String tag; 1771 if (fundCurrencies.contains(source0)) { 1772 tag = " (fund)"; 1773 } else if (offshoreCurrencies.contains(source0)) { 1774 tag = " (offshore)"; 1775 } else { 1776 tag = " (old)"; 1777 } 1778 tenderOrNot = ": " + source0 + tag; 1779 } 1780 if (currencyToTerritoryOverrides.keySet().contains(source0)) { 1781 territory = currencyToTerritoryOverrides.get(source0); 1782 } else if (territory == null) { 1783 territory = source0.substring(0, 2); 1784 } 1785 1786 if (territory.equals("ZZ")) { 1787 order = 999; 1788 return englishFile.getName(CLDRFile.TERRITORY_NAME, territory) 1789 + ": " 1790 + source0; 1791 } else { 1792 return catFromTerritory.transform(territory) 1793 + ": " 1794 + englishFile.getName(CLDRFile.TERRITORY_NAME, territory) 1795 + tenderOrNot; 1796 } 1797 } 1798 }); 1799 functionMap.put( 1800 "continentFromCurrency", 1801 new Transform<>() { 1802 @Override 1803 public String transform(String source0) { 1804 String subContinent; 1805 String territory = 1806 likelySubtags.getLikelyTerritoryFromCurrency(source0); 1807 if (currencyToTerritoryOverrides.keySet().contains(source0)) { 1808 territory = currencyToTerritoryOverrides.get(source0); 1809 } else if (territory == null) { 1810 territory = source0.substring(0, 2); 1811 } 1812 1813 if (territory.equals("ZZ")) { 1814 order = 999; 1815 subContinent = 1816 englishFile.getName(CLDRFile.TERRITORY_NAME, territory); 1817 } else { 1818 subContinent = catFromTerritory.transform(territory); 1819 } 1820 1821 String result = 1822 subContinentToContinent.get( 1823 subContinent); // the continent is the last word in the 1824 // territory representation 1825 return result; 1826 } 1827 }); 1828 functionMap.put( 1829 "numberingSystem", 1830 new Transform<>() { 1831 @Override 1832 public String transform(String source0) { 1833 if ("latn".equals(source0)) { 1834 return ""; 1835 } 1836 String displayName = 1837 englishFile.getStringValue( 1838 "//ldml/localeDisplayNames/types/type[@key=\"numbers\"][@type=\"" 1839 + source0 1840 + "\"]"); 1841 return "using " 1842 + (displayName == null 1843 ? source0 1844 : displayName + " (" + source0 + ")"); 1845 } 1846 }); 1847 1848 functionMap.put( 1849 "datefield", 1850 new Transform<>() { 1851 private final String[] datefield = { 1852 "era", "era-short", "era-narrow", 1853 "century", "century-short", "century-narrow", 1854 "year", "year-short", "year-narrow", 1855 "quarter", "quarter-short", "quarter-narrow", 1856 "month", "month-short", "month-narrow", 1857 "week", "week-short", "week-narrow", 1858 "weekOfMonth", "weekOfMonth-short", "weekOfMonth-narrow", 1859 "day", "day-short", "day-narrow", 1860 "dayOfYear", "dayOfYear-short", "dayOfYear-narrow", 1861 "weekday", "weekday-short", "weekday-narrow", 1862 "weekdayOfMonth", "weekdayOfMonth-short", "weekdayOfMonth-narrow", 1863 "dayperiod", "dayperiod-short", "dayperiod-narrow", 1864 "zone", "zone-short", "zone-narrow", 1865 "hour", "hour-short", "hour-narrow", 1866 "minute", "minute-short", "minute-narrow", 1867 "second", "second-short", "second-narrow", 1868 "millisecond", "millisecond-short", "millisecond-narrow", 1869 "microsecond", "microsecond-short", "microsecond-narrow", 1870 "nanosecond", "nanosecond-short", "nanosecond-narrow", 1871 }; 1872 1873 @Override 1874 public String transform(String source) { 1875 order = getIndex(source, datefield); 1876 return source; 1877 } 1878 }); 1879 // //ldml/dates/fields/field[@type="%A"]/relative[@type="%A"] 1880 functionMap.put( 1881 "relativeDate", 1882 new Transform<>() { 1883 private final String[] relativeDateField = { 1884 "year", "year-short", "year-narrow", 1885 "quarter", "quarter-short", "quarter-narrow", 1886 "month", "month-short", "month-narrow", 1887 "week", "week-short", "week-narrow", 1888 "day", "day-short", "day-narrow", 1889 "hour", "hour-short", "hour-narrow", 1890 "minute", "minute-short", "minute-narrow", 1891 "second", "second-short", "second-narrow", 1892 "sun", "sun-short", "sun-narrow", 1893 "mon", "mon-short", "mon-narrow", 1894 "tue", "tue-short", "tue-narrow", 1895 "wed", "wed-short", "wed-narrow", 1896 "thu", "thu-short", "thu-narrow", 1897 "fri", "fri-short", "fri-narrow", 1898 "sat", "sat-short", "sat-narrow", 1899 }; 1900 private final String[] longNames = { 1901 "Year", "Year Short", "Year Narrow", 1902 "Quarter", "Quarter Short", "Quarter Narrow", 1903 "Month", "Month Short", "Month Narrow", 1904 "Week", "Week Short", "Week Narrow", 1905 "Day", "Day Short", "Day Narrow", 1906 "Hour", "Hour Short", "Hour Narrow", 1907 "Minute", "Minute Short", "Minute Narrow", 1908 "Second", "Second Short", "Second Narrow", 1909 "Sunday", "Sunday Short", "Sunday Narrow", 1910 "Monday", "Monday Short", "Monday Narrow", 1911 "Tuesday", "Tuesday Short", "Tuesday Narrow", 1912 "Wednesday", "Wednesday Short", "Wednesday Narrow", 1913 "Thursday", "Thursday Short", "Thursday Narrow", 1914 "Friday", "Friday Short", "Friday Narrow", 1915 "Saturday", "Saturday Short", "Saturday Narrow", 1916 }; 1917 1918 @Override 1919 public String transform(String source) { 1920 order = getIndex(source, relativeDateField) + 100; 1921 return "Relative " + longNames[getIndex(source, relativeDateField)]; 1922 } 1923 }); 1924 // Sorts numberSystem items (except for decimal formats). 1925 functionMap.put( 1926 "number", 1927 new Transform<>() { 1928 private final String[] symbols = { 1929 "decimal", 1930 "group", 1931 "plusSign", 1932 "minusSign", 1933 "approximatelySign", 1934 "percentSign", 1935 "perMille", 1936 "exponential", 1937 "superscriptingExponent", 1938 "infinity", 1939 "nan", 1940 "list", 1941 "currencies" 1942 }; 1943 1944 @Override 1945 public String transform(String source) { 1946 String[] parts = source.split("-"); 1947 order = getIndex(parts[0], symbols); 1948 // e.g. "currencies-one" 1949 if (parts.length > 1) { 1950 suborder = new SubstringOrder(parts[1]); 1951 } 1952 return source; 1953 } 1954 }); 1955 functionMap.put( 1956 "numberFormat", 1957 new Transform<>() { 1958 @Override 1959 public String transform(String source) { 1960 final List<String> fieldOrder = 1961 Arrays.asList( 1962 "standard-decimal", 1963 "standard-currency", 1964 "standard-currency-accounting", 1965 "standard-percent", 1966 "standard-scientific"); 1967 1968 if (fieldOrder.contains(source)) { 1969 order = fieldOrder.indexOf(source); 1970 } else { 1971 order = fieldOrder.size(); 1972 } 1973 1974 return source; 1975 } 1976 }); 1977 1978 functionMap.put( 1979 "localePattern", 1980 new Transform<>() { 1981 @Override 1982 public String transform(String source) { 1983 // Put localeKeyTypePattern behind localePattern and 1984 // localeSeparator. 1985 if (source.equals("localeKeyTypePattern")) { 1986 order = 10; 1987 } 1988 return source; 1989 } 1990 }); 1991 functionMap.put( 1992 "listOrder", 1993 new Transform<>() { 1994 private String[] listParts = {"2", "start", "middle", "end"}; 1995 1996 @Override 1997 public String transform(String source) { 1998 order = getIndex(source, listParts); 1999 return source; 2000 } 2001 }); 2002 2003 functionMap.put( 2004 "personNameSection", 2005 new Transform<>() { 2006 @Override 2007 public String transform(String source) { 2008 // sampleName item values in desired sort order 2009 final List<String> itemValues = 2010 PersonNameFormatter.SampleType.ALL_STRINGS; 2011 if (source.equals("NameOrder")) { 2012 order = 0; 2013 return "NameOrder for Locales"; 2014 } 2015 if (source.equals("Parameters")) { 2016 order = 4; 2017 return "Default Parameters"; 2018 } 2019 2020 if (source.equals("AuxiliaryItems")) { 2021 order = 10; 2022 return source; 2023 } 2024 String itemPrefix = "SampleName:"; 2025 if (source.startsWith(itemPrefix)) { 2026 String itemValue = source.substring(itemPrefix.length()); 2027 order = 20 + itemValues.indexOf(itemValue); 2028 return "SampleName Fields for Item: " + itemValue; 2029 } 2030 String pnPrefix = "PersonName:"; 2031 if (source.startsWith(pnPrefix)) { 2032 String attrValues = source.substring(pnPrefix.length()); 2033 List<String> parts = HYPHEN_SPLITTER.splitToList(attrValues); 2034 2035 String nameOrder = parts.get(0); 2036 if (nameOrder.contentEquals("sorting")) { 2037 order = 40; 2038 return "PersonName Sorting Patterns (Usage: referring)"; 2039 } 2040 order = 30; 2041 if (nameOrder.contentEquals("surnameFirst")) { 2042 order += 1; 2043 } 2044 String nameUsage = parts.get(1); 2045 if (nameUsage.contentEquals("monogram")) { 2046 order += 20; 2047 return "PersonName Monogram Patterns for Order: " + nameOrder; 2048 } 2049 return "PersonName Main Patterns for Order: " + nameOrder; 2050 } 2051 order = 60; 2052 return source; 2053 } 2054 }); 2055 2056 functionMap.put( 2057 "personNameOrder", 2058 new Transform<>() { 2059 @Override 2060 public String transform(String source) { 2061 // personName attribute values: each group in desired 2062 // sort order, but groups from least important to most 2063 final List<String> attrValues = 2064 Arrays.asList( 2065 "referring", 2066 "addressing", // usage values to include 2067 "formal", 2068 "informal", // formality values 2069 "long", 2070 "medium", 2071 "short"); // length values 2072 // order & length values handled in &personNameSection 2073 2074 List<String> parts = HYPHEN_SPLITTER.splitToList(source); 2075 order = 0; 2076 String attributes = ""; 2077 boolean skipReferring = false; 2078 for (String part : parts) { 2079 if (attrValues.contains(part)) { 2080 order += (1 << attrValues.indexOf(part)); 2081 // anything else like alt="variant" is at order 0 2082 if (!skipReferring || !part.contentEquals("referring")) { 2083 // Add this part to display attribute string 2084 if (attributes.length() == 0) { 2085 attributes = part; 2086 } else { 2087 attributes = attributes + "-" + part; 2088 } 2089 } 2090 } else if (part.contentEquals("sorting")) { 2091 skipReferring = true; // For order=sorting, don't display 2092 // usage=referring 2093 } 2094 } 2095 return attributes; 2096 } 2097 }); 2098 2099 functionMap.put( 2100 "sampleNameOrder", 2101 new Transform<>() { 2102 @Override 2103 public String transform(String source) { 2104 // The various nameField attribute values: each group in desired 2105 // sort order, but groups from least important to most 2106 final List<String> attrValues = 2107 Arrays.asList( 2108 "informal", 2109 "prefix", 2110 "core", // modifiers for nameField type 2111 "prefix", 2112 "given", 2113 "given2", 2114 "surname", 2115 "surname2", 2116 "suffix"); // values for nameField type 2117 2118 List<String> parts = HYPHEN_SPLITTER.splitToList(source); 2119 order = 0; 2120 for (String part : parts) { 2121 if (attrValues.contains(part)) { 2122 order += (1 << attrValues.indexOf(part)); 2123 } // anything else like alt="variant" is at order 0 2124 } 2125 return source; 2126 } 2127 }); 2128 2129 functionMap.put( 2130 "alphaOrder", 2131 new Transform<>() { 2132 @Override 2133 public String transform(String source) { 2134 order = 0; 2135 return source; 2136 } 2137 }); 2138 functionMap.put( 2139 "transform", 2140 new Transform<>() { 2141 Splitter commas = Splitter.on(',').trimResults(); 2142 2143 @Override 2144 public String transform(String source) { 2145 List<String> parts = commas.splitToList(source); 2146 return parts.get(1) 2147 + (parts.get(0).equals("both") ? "↔︎" : "→") 2148 + parts.get(2) 2149 + (parts.size() > 3 ? "/" + parts.get(3) : ""); 2150 } 2151 }); 2152 functionMap.put( 2153 "major", 2154 new Transform<>() { 2155 @Override 2156 public String transform(String source) { 2157 return getCharacterPageId(source).toString(); 2158 } 2159 }); 2160 functionMap.put( 2161 "minor", 2162 new Transform<>() { 2163 @Override 2164 public String transform(String source) { 2165 String minorCat = Emoji.getMinorCategory(source); 2166 order = Emoji.getEmojiMinorOrder(minorCat); 2167 return minorCat; 2168 } 2169 }); 2170 /** 2171 * Use the ordering of the emoji in getEmojiToOrder rather than alphabetic, since the 2172 * collator data won't be ready until the candidates are final. 2173 */ 2174 functionMap.put( 2175 "emoji", 2176 new Transform<>() { 2177 @Override 2178 public String transform(String source) { 2179 int dashPos = source.indexOf(' '); 2180 String emoji = source.substring(0, dashPos); 2181 order = 2182 (Emoji.getEmojiToOrder(emoji) << 1) 2183 + (source.endsWith("name") ? 0 : 1); 2184 return source; 2185 } 2186 }); 2187 } 2188 2189 private static int getIndex(String item, String[] array) { 2190 for (int i = 0; i < array.length; i++) { 2191 if (item.equals(array[i])) { 2192 return i; 2193 } 2194 } 2195 return -1; 2196 } 2197 2198 private static String getEnglishFirstLetter(String s) { 2199 String languageOnlyPart; 2200 int underscorePos = s.indexOf("_"); 2201 if (underscorePos > 0) { 2202 languageOnlyPart = s.substring(0, underscorePos); 2203 } else { 2204 languageOnlyPart = s; 2205 } 2206 final String name = englishFile.getName(CLDRFile.LANGUAGE_NAME, languageOnlyPart); 2207 return name == null ? "?" : name.substring(0, 1).toUpperCase(); 2208 } 2209 2210 static class HyphenSplitter { 2211 String main; 2212 String extras; 2213 2214 String split(String source) { 2215 int hyphenPos = source.indexOf('-'); 2216 if (hyphenPos < 0) { 2217 main = source; 2218 extras = ""; 2219 } else { 2220 main = source.substring(0, hyphenPos); 2221 extras = source.substring(hyphenPos); 2222 } 2223 return main; 2224 } 2225 } 2226 2227 /** 2228 * This converts "functions", like &month, and sets the order. 2229 * 2230 * @param input 2231 * @param orderIn 2232 * @return 2233 */ 2234 private static String fix(String input, int orderIn) { 2235 input = RegexLookup.replace(input, args.value); 2236 order = orderIn; 2237 suborder = null; 2238 int pos = 0; 2239 while (true) { 2240 int functionStart = input.indexOf('&', pos); 2241 if (functionStart < 0) { 2242 if ("Volume".equals(input)) { 2243 return getVolumePageId(args.value[0] /* path */).toString(); 2244 } 2245 return input; 2246 } 2247 int functionEnd = input.indexOf('(', functionStart); 2248 int argEnd = 2249 input.indexOf( 2250 ')', functionEnd + 2); // we must insert at least one character 2251 Transform<String, String> func = 2252 functionMap.get(input.substring(functionStart + 1, functionEnd)); 2253 final String arg = input.substring(functionEnd + 1, argEnd); 2254 String temp = func.transform(arg); 2255 if (temp == null) { 2256 func.transform(arg); 2257 throw new IllegalArgumentException( 2258 "Function returns invalid results for «" + arg + "»."); 2259 } 2260 input = input.substring(0, functionStart) + temp + input.substring(argEnd + 1); 2261 pos = functionStart + temp.length(); 2262 } 2263 } 2264 2265 private static Set<UnitConverter.UnitSystem> METRIC = 2266 Set.of(UnitConverter.UnitSystem.metric, UnitConverter.UnitSystem.metric_adjacent); 2267 2268 private static PageId getVolumePageId(String path) { 2269 // Extract the unit from the path. For example, if path is 2270 // //ldml/units/unitLength[@type="narrow"]/unit[@type="volume-cubic-kilometer"]/displayName 2271 // then extract "volume-cubic-kilometer" which is the long unit id 2272 final String longUnitId = 2273 XPathParts.getFrozenInstance(path).findAttributeValue("unit", "type"); 2274 if (longUnitId == null) { 2275 throw new InternalCldrException("Missing unit in path " + path); 2276 } 2277 final UnitConverter uc = supplementalDataInfo.getUnitConverter(); 2278 // Convert, for example, "volume-cubic-kilometer" to "cubic-kilometer" 2279 final String shortUnitId = uc.getShortId(longUnitId); 2280 if (!Collections.disjoint(METRIC, uc.getSystemsEnum(shortUnitId))) { 2281 return PageId.Volume_Metric; 2282 } else { 2283 return PageId.Volume_Other; 2284 } 2285 } 2286 2287 /** 2288 * Collect all the paths for a CLDRFile, and make sure that they have cached PathHeaders 2289 * 2290 * @param file 2291 * @return immutable set of paths in the file 2292 */ 2293 public Set<String> pathsForFile(CLDRFile file) { 2294 // make sure we cache all the path headers 2295 HashSet<String> filePaths = new HashSet<>(); 2296 file.fullIterable().forEach(filePaths::add); 2297 for (String path : filePaths) { 2298 try { 2299 fromPath(path); // call to make sure cached 2300 } catch (Throwable t) { 2301 // ... some other exception 2302 } 2303 } 2304 return Collections.unmodifiableSet(filePaths); 2305 } 2306 2307 /** 2308 * Returns those regexes that were never matched. 2309 * 2310 * @return 2311 */ 2312 public Set<String> getUnmatchedRegexes() { 2313 Map<String, RawData> outputUnmatched = new LinkedHashMap<>(); 2314 lookup.getUnmatchedPatterns(matchersFound, outputUnmatched); 2315 return outputUnmatched.keySet(); 2316 } 2317 } 2318 2319 /** 2320 * Return the territory used for the title of the Metazone page in the Survey Tool. 2321 * 2322 * @param source 2323 * @return 2324 */ 2325 public static String getMetazonePageTerritory(String source) { 2326 String result = metazoneToPageTerritory.get(source); 2327 return result == null ? "ZZ" : result; 2328 } 2329 2330 private static final List<String> COUNTS = 2331 Arrays.asList("displayName", "zero", "one", "two", "few", "many", "other", "per"); 2332 2333 private static Collator alphabetic; 2334 2335 private static int alphabeticCompare(String aa, String bb) { 2336 if (alphabetic == null) { 2337 initializeAlphabetic(); 2338 } 2339 return alphabetic.compare(aa, bb); 2340 } 2341 2342 private static synchronized void initializeAlphabetic() { 2343 // Lazy initialization: don't call CLDRConfig.getInstance() too early or we'll get 2344 // "CLDRConfig.getInstance() was called prior to SurveyTool setup" when called from 2345 // com.ibm.ws.microprofile.openapi.impl.core.jackson.ModelResolver._addEnumProps 2346 if (alphabetic == null) { 2347 alphabetic = CLDRConfig.getInstance().getCollatorRoot(); 2348 } 2349 } 2350 2351 /** 2352 * @deprecated use CLDRConfig.getInstance().urls() instead 2353 */ 2354 @Deprecated 2355 public enum BaseUrl { 2356 // http://st.unicode.org/smoketest/survey?_=af&strid=55053dffac611328 2357 // http://st.unicode.org/cldr-apps/survey?_=en&strid=3cd31261bf6738e1 2358 SMOKE("https://st.unicode.org/smoketest/survey"), 2359 PRODUCTION("https://st.unicode.org/cldr-apps/survey"); 2360 final String base; 2361 2362 private BaseUrl(String url) { 2363 base = url; 2364 } 2365 } 2366 2367 /** 2368 * @deprecated, use CLDRConfig.urls().forPathHeader() instead. 2369 * @param baseUrl 2370 * @param locale 2371 * @return 2372 */ 2373 public String getUrl(BaseUrl baseUrl, String locale) { 2374 return getUrl(baseUrl.base, locale); 2375 } 2376 2377 /** 2378 * @deprecated, use CLDRConfig.urls().forPathHeader() instead. 2379 * @param baseUrl 2380 * @param locale 2381 * @return 2382 */ 2383 public String getUrl(String baseUrl, String locale) { 2384 return getUrl(baseUrl, locale, getOriginalPath()); 2385 } 2386 2387 /** 2388 * Map http://st.unicode.org/smoketest/survey to http://st.unicode.org/smoketest etc 2389 * 2390 * @param str 2391 * @return 2392 */ 2393 public static String trimLast(String str) { 2394 int n = str.lastIndexOf('/'); 2395 if (n == -1) return ""; 2396 return str.substring(0, n + 1); 2397 } 2398 2399 public static String getUrlForLocalePath(String locale, String path) { 2400 return getUrl(SURVEY_URL, locale, path); 2401 } 2402 2403 public static String getUrl(String baseUrl, String locale, String path) { 2404 return trimLast(baseUrl) + "v#/" + locale + "//" + StringId.getHexId(path); 2405 } 2406 2407 /** 2408 * @deprecated use the version with CLDRURLS instead 2409 * @param baseUrl 2410 * @param file 2411 * @param path 2412 * @return 2413 */ 2414 @Deprecated 2415 public static String getLinkedView(String baseUrl, CLDRFile file, String path) { 2416 return SECTION_LINK 2417 + PathHeader.getUrl(baseUrl, file.getLocaleID(), path) 2418 + "'><em>view</em></a>"; 2419 } 2420 2421 public static String getLinkedView(CLDRURLS urls, CLDRFile file, String path) { 2422 return SECTION_LINK + urls.forXpath(file.getLocaleID(), path) + "'><em>view</em></a>"; 2423 } 2424 2425 private static final String SURVEY_URL = CLDRConfig.getInstance().urls().base(); 2426 2427 /** 2428 * If a subdivision, return the (uppercased) territory and if suffix != null, the suffix. 2429 * Otherwise return the input as is. 2430 * 2431 * @param input 2432 * @param suffix 2433 * @return 2434 */ 2435 private static String getSubdivisionsTerritory(String input, Output<String> suffix) { 2436 String theTerritory; 2437 if (StandardCodes.LstrType.subdivision.isWellFormed(input)) { 2438 int territoryEnd = input.charAt(0) < 'A' ? 3 : 2; 2439 theTerritory = input.substring(0, territoryEnd).toUpperCase(Locale.ROOT); 2440 if (suffix != null) { 2441 suffix.value = input.substring(territoryEnd); 2442 } 2443 } else { 2444 theTerritory = input; 2445 if (suffix != null) { 2446 suffix.value = ""; 2447 } 2448 } 2449 return theTerritory; 2450 } 2451 2452 /** 2453 * Should this path header be hidden? 2454 * 2455 * @return true to hide, else false 2456 */ 2457 public boolean shouldHide() { 2458 switch (status) { 2459 case HIDE: 2460 case DEPRECATED: 2461 return true; 2462 case READ_ONLY: 2463 case READ_WRITE: 2464 case LTR_ALWAYS: 2465 return false; 2466 default: 2467 logger.log(java.util.logging.Level.SEVERE, "Missing case for " + status); 2468 return false; 2469 } 2470 } 2471 2472 /** 2473 * Are reading and writing allowed for this path header? 2474 * 2475 * @return true if reading and writing are allowed, else false 2476 */ 2477 public boolean canReadAndWrite() { 2478 switch (status) { 2479 case READ_WRITE: 2480 case LTR_ALWAYS: 2481 return true; 2482 case HIDE: 2483 case DEPRECATED: 2484 case READ_ONLY: 2485 return false; 2486 default: 2487 logger.log(java.util.logging.Level.SEVERE, "Missing case for " + status); 2488 return false; 2489 } 2490 } 2491 2492 private static UnicodeMap<PageId> nonEmojiMap = null; 2493 2494 /** 2495 * Return the PageId for the given character 2496 * 2497 * @param cp the character as a string 2498 * @return the PageId 2499 */ 2500 private static PageId getCharacterPageId(String cp) { 2501 if (Emoji.getAllRgiNoES().contains(cp)) { 2502 return Emoji.getPageId(cp); 2503 } 2504 if (nonEmojiMap == null) { 2505 nonEmojiMap = createNonEmojiMap(); 2506 } 2507 PageId pageId = nonEmojiMap.get(cp); 2508 if (pageId == null) { 2509 throw new InternalCldrException("Failure getting character page id"); 2510 } 2511 return pageId; 2512 } 2513 2514 /** 2515 * Create the map from non-emoji characters to pages. Call with lazy initialization to avoid 2516 * static initialization bugs, otherwise PageId.OtherSymbols could still be null. 2517 * 2518 * @return the map from character to PageId 2519 */ 2520 private static UnicodeMap<PageId> createNonEmojiMap() { 2521 return new UnicodeMap<PageId>() 2522 .putAll(new UnicodeSet("[:P:]"), PageId.Punctuation) 2523 .putAll(new UnicodeSet("[:Sm:]"), PageId.MathSymbols) 2524 .putAll(new UnicodeSet("[^[:Sm:][:P:]]"), PageId.OtherSymbols) 2525 .freeze(); 2526 } 2527 } 2528