xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/util/PathHeader.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.util;
2 
3 import com.google.common.base.Splitter;
4 import com.ibm.icu.dev.util.UnicodeMap;
5 import com.ibm.icu.impl.Relation;
6 import com.ibm.icu.impl.Row;
7 import com.ibm.icu.lang.UCharacter;
8 import com.ibm.icu.text.Collator;
9 import com.ibm.icu.text.Transform;
10 import com.ibm.icu.text.UnicodeSet;
11 import com.ibm.icu.util.ICUException;
12 import com.ibm.icu.util.Output;
13 import com.ibm.icu.util.ULocale;
14 import java.util.Arrays;
15 import java.util.Collections;
16 import java.util.EnumMap;
17 import java.util.HashMap;
18 import java.util.HashSet;
19 import java.util.Iterator;
20 import java.util.LinkedHashMap;
21 import java.util.LinkedHashSet;
22 import java.util.List;
23 import java.util.Locale;
24 import java.util.Map;
25 import java.util.Map.Entry;
26 import java.util.Set;
27 import java.util.TreeMap;
28 import java.util.TreeSet;
29 import java.util.logging.Logger;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32 import org.unicode.cldr.draft.ScriptMetadata;
33 import org.unicode.cldr.draft.ScriptMetadata.Info;
34 import org.unicode.cldr.tool.LikelySubtags;
35 import org.unicode.cldr.util.RegexLookup.Finder;
36 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
37 import org.unicode.cldr.util.With.SimpleIterator;
38 import org.unicode.cldr.util.personname.PersonNameFormatter;
39 
40 /**
41  * Provides a mechanism for dividing up LDML paths into understandable categories, eg for the Survey
42  * tool.
43  */
44 public class PathHeader implements Comparable<PathHeader> {
45     /** Link to a section. Commenting out the page switch for now. */
46     public static final String SECTION_LINK = "<a " + /* "target='CLDR_ST-SECTION' "+*/ "href='";
47 
48     static boolean UNIFORM_CONTINENTS = true;
49     static Factory factorySingleton = null;
50 
51     static final boolean SKIP_ORIGINAL_PATH = true;
52 
53     private static final Logger logger = Logger.getLogger(PathHeader.class.getName());
54 
55     static final Splitter HYPHEN_SPLITTER = Splitter.on('-');
56 
57     public enum Width {
58         FULL,
59         LONG,
60         WIDE,
61         SHORT,
62         NARROW;
63 
getValue(String input)64         public static Width getValue(String input) {
65             try {
66                 return Width.valueOf(input.toUpperCase(Locale.ENGLISH));
67             } catch (RuntimeException e) {
68                 e.printStackTrace();
69                 throw e;
70             }
71         }
72 
73         @Override
toString()74         public String toString() {
75             return name().toLowerCase(Locale.ENGLISH);
76         }
77     }
78 
79     /** What status the survey tool should use. Can be overridden in Phase.getAction() */
80     public enum SurveyToolStatus {
81         /** Never show. */
82         DEPRECATED,
83         /** Hide. Can be overridden in Phase.getAction() */
84         HIDE,
85         /**
86          * Don't allow Change box (except TC), instead show ticket. But allow votes. Can be
87          * overridden in Phase.getAction()
88          */
89         READ_ONLY,
90         /** Allow change box and votes. Can be overridden in Phase.getAction() */
91         READ_WRITE,
92         /**
93          * Changes are allowed as READ_WRITE, but field is always displayed as LTR, even in RTL
94          * locales (used for patterns).
95          */
96         LTR_ALWAYS
97     }
98 
99     private static final EnumNames<SectionId> SectionIdNames = new EnumNames<>();
100 
101     /**
102      * The Section for a path. Don't change these without committee buy-in. The 'name' may be
103      * 'Core_Data' and the toString is 'Core Data' toString gives the human name
104      */
105     public enum SectionId {
106         Core_Data("Core Data"),
107         Locale_Display_Names("Locale Display Names"),
108         DateTime("Date & Time"),
109         Timezones,
110         Numbers,
111         Currencies,
112         Units,
113         Characters,
114         Misc("Miscellaneous"),
115         BCP47,
116         Supplemental,
117         Special;
118 
SectionId(String... alternateNames)119         SectionId(String... alternateNames) {
120             SectionIdNames.add(this, alternateNames);
121         }
122 
forString(String name)123         public static SectionId forString(String name) {
124             return SectionIdNames.forString(name);
125         }
126 
127         @Override
toString()128         public String toString() {
129             return SectionIdNames.toString(this);
130         }
131     }
132 
133     private static final EnumNames<PageId> PageIdNames = new EnumNames<>();
134     private static final Relation<SectionId, PageId> SectionIdToPageIds =
135             Relation.of(new TreeMap<>(), TreeSet.class);
136 
137     private static class SubstringOrder implements Comparable<SubstringOrder> {
138         final String mainOrder;
139         final int order;
140 
SubstringOrder(String source)141         public SubstringOrder(String source) {
142             int pos = source.lastIndexOf('-') + 1;
143             int ordering = COUNTS.indexOf(source.substring(pos));
144             // account for digits, and "some" future proofing.
145             order = ordering < 0 ? source.charAt(pos) : 0x10000 + ordering;
146             mainOrder = source.substring(0, pos);
147         }
148 
149         @Override
150         public String toString() {
151             return "{" + mainOrder + ", " + order + "}";
152         }
153 
154         @Override
155         public int compareTo(SubstringOrder other) {
156             int diff = alphabeticCompare(mainOrder, other.mainOrder);
157             if (diff != 0) {
158                 return diff;
159             }
160             return order - other.order;
161         }
162     }
163 
164     /**
165      * The Page for a path (within a Section). Don't change these without committee buy-in. the name
166      * is for example WAsia where toString gives Western Asia
167      */
168     public enum PageId {
169         Alphabetic_Information(SectionId.Core_Data, "Alphabetic Information"),
170         Numbering_Systems(SectionId.Core_Data, "Numbering Systems"),
171         LinguisticElements(SectionId.Core_Data, "Linguistic Elements"),
172 
173         Locale_Name_Patterns(SectionId.Locale_Display_Names, "Locale Name Patterns"),
174         Languages_A_D(SectionId.Locale_Display_Names, "Languages (A-D)"),
175         Languages_E_J(SectionId.Locale_Display_Names, "Languages (E-J)"),
176         Languages_K_N(SectionId.Locale_Display_Names, "Languages (K-N)"),
177         Languages_O_S(SectionId.Locale_Display_Names, "Languages (O-S)"),
178         Languages_T_Z(SectionId.Locale_Display_Names, "Languages (T-Z)"),
179         Scripts(SectionId.Locale_Display_Names),
180         Territories(SectionId.Locale_Display_Names, "Geographic Regions"),
181         T_NAmerica(SectionId.Locale_Display_Names, "Territories (North America)"),
182         T_SAmerica(SectionId.Locale_Display_Names, "Territories (South America)"),
183         T_Africa(SectionId.Locale_Display_Names, "Territories (Africa)"),
184         T_Europe(SectionId.Locale_Display_Names, "Territories (Europe)"),
185         T_Asia(SectionId.Locale_Display_Names, "Territories (Asia)"),
186         T_Oceania(SectionId.Locale_Display_Names, "Territories (Oceania)"),
187         Locale_Variants(SectionId.Locale_Display_Names, "Locale Variants"),
188         Keys(SectionId.Locale_Display_Names),
189 
190         Fields(SectionId.DateTime),
191         Gregorian(SectionId.DateTime),
192         Generic(SectionId.DateTime),
193         Buddhist(SectionId.DateTime),
194         Chinese(SectionId.DateTime),
195         Coptic(SectionId.DateTime),
196         Dangi(SectionId.DateTime),
197         Ethiopic(SectionId.DateTime),
198         Ethiopic_Amete_Alem(SectionId.DateTime, "Ethiopic-Amete-Alem"),
199         Hebrew(SectionId.DateTime),
200         Indian(SectionId.DateTime),
201         Islamic(SectionId.DateTime),
202         Japanese(SectionId.DateTime),
203         Persian(SectionId.DateTime),
204         Minguo(SectionId.DateTime),
205 
206         Timezone_Display_Patterns(SectionId.Timezones, "Timezone Display Patterns"),
207         NAmerica(SectionId.Timezones, "North America"),
208         SAmerica(SectionId.Timezones, "South America"),
209         Africa(SectionId.Timezones),
210         Europe(SectionId.Timezones),
211         Russia(SectionId.Timezones),
212         WAsia(SectionId.Timezones, "Western Asia"),
213         CAsia(SectionId.Timezones, "Central Asia"),
214         EAsia(SectionId.Timezones, "Eastern Asia"),
215         SAsia(SectionId.Timezones, "Southern Asia"),
216         SEAsia(SectionId.Timezones, "Southeast Asia"),
217         Australasia(SectionId.Timezones),
218         Antarctica(SectionId.Timezones),
219         Oceania(SectionId.Timezones),
220         UnknownT(SectionId.Timezones, "Unknown Region"),
221         Overrides(SectionId.Timezones),
222 
223         Symbols(SectionId.Numbers),
224         Number_Formatting_Patterns(SectionId.Numbers, "Number Formatting Patterns"),
225         Compact_Decimal_Formatting(SectionId.Numbers, "Compact Decimal Formatting"),
226         Compact_Decimal_Formatting_Other(
227                 SectionId.Numbers, "Compact Decimal Formatting (Other Numbering Systems)"),
228 
229         Measurement_Systems(SectionId.Units, "Measurement Systems"),
230         Duration(SectionId.Units),
231         Graphics(SectionId.Units),
232         Length(SectionId.Units),
233         Area(SectionId.Units),
234         Volume_Metric(SectionId.Units, "Volume Metric"),
235         Volume_Other(SectionId.Units, "Volume Other"),
236         SpeedAcceleration(SectionId.Units, "Speed and Acceleration"),
237         MassWeight(SectionId.Units, "Mass and Weight"),
238         EnergyPower(SectionId.Units, "Energy and Power"),
239         ElectricalFrequency(SectionId.Units, "Electrical and Frequency"),
240         Weather(SectionId.Units),
241         Digital(SectionId.Units),
242         Coordinates(SectionId.Units),
243         OtherUnits(SectionId.Units, "Other Units"),
244         CompoundUnits(SectionId.Units, "Compound Units"),
245 
246         Displaying_Lists(SectionId.Misc, "Displaying Lists"),
247         MinimalPairs(SectionId.Misc, "Minimal Pairs"),
248         PersonNameFormats(SectionId.Misc, "Person Name Formats"),
249         Transforms(SectionId.Misc),
250 
251         Identity(SectionId.Special),
252         Version(SectionId.Special),
253         Suppress(SectionId.Special),
254         Deprecated(SectionId.Special),
255         Unknown(SectionId.Special),
256 
257         C_NAmerica(SectionId.Currencies, "North America (C)"),
258         // need to add (C) to differentiate from Timezone territories
259         C_SAmerica(SectionId.Currencies, "South America (C)"),
260         C_NWEurope(SectionId.Currencies, "Northern/Western Europe"),
261         C_SEEurope(SectionId.Currencies, "Southern/Eastern Europe"),
262         C_NAfrica(SectionId.Currencies, "Northern Africa"),
263         C_WAfrica(SectionId.Currencies, "Western Africa"),
264         C_MAfrica(SectionId.Currencies, "Middle Africa"),
265         C_EAfrica(SectionId.Currencies, "Eastern Africa"),
266         C_SAfrica(SectionId.Currencies, "Southern Africa"),
267         C_WAsia(SectionId.Currencies, "Western Asia (C)"),
268         C_CAsia(SectionId.Currencies, "Central Asia (C)"),
269         C_EAsia(SectionId.Currencies, "Eastern Asia (C)"),
270         C_SAsia(SectionId.Currencies, "Southern Asia (C)"),
271         C_SEAsia(SectionId.Currencies, "Southeast Asia (C)"),
272         C_Oceania(SectionId.Currencies, "Oceania (C)"),
273         C_Unknown(SectionId.Currencies, "Unknown Region (C)"),
274 
275         // BCP47
276         u_Extension(SectionId.BCP47),
277         t_Extension(SectionId.BCP47),
278 
279         // Supplemental
280         Alias(SectionId.Supplemental),
281         IdValidity(SectionId.Supplemental),
282         Locale(SectionId.Supplemental),
283         RegionMapping(SectionId.Supplemental),
284         WZoneMapping(SectionId.Supplemental),
285         Transform(SectionId.Supplemental),
286         Units(SectionId.Supplemental),
287         Likely(SectionId.Supplemental),
288         LanguageMatch(SectionId.Supplemental),
289         TerritoryInfo(SectionId.Supplemental),
290         LanguageInfo(SectionId.Supplemental),
291         LanguageGroup(SectionId.Supplemental),
292         Fallback(SectionId.Supplemental),
293         Gender(SectionId.Supplemental),
294         Grammar(SectionId.Supplemental),
295         Metazone(SectionId.Supplemental),
296         NumberSystem(SectionId.Supplemental),
297         Plural(SectionId.Supplemental),
298         PluralRange(SectionId.Supplemental),
299         Containment(SectionId.Supplemental),
300         Currency(SectionId.Supplemental),
301         Calendar(SectionId.Supplemental),
302         WeekData(SectionId.Supplemental),
303         Measurement(SectionId.Supplemental),
304         Language(SectionId.Supplemental),
305         RBNF(SectionId.Supplemental),
306         Segmentation(SectionId.Supplemental),
307         DayPeriod(SectionId.Supplemental),
308 
309         Category(SectionId.Characters),
310 
311         // [Smileys, People, Animals & Nature, Food & Drink, Travel & Places, Activities, Objects,
312         // Symbols, Flags]
313         Smileys(SectionId.Characters, "Smileys & Emotion"),
314         People(SectionId.Characters, "People & Body"),
315         People2(SectionId.Characters, "People & Body 2"),
316         Animals_Nature(SectionId.Characters, "Animals & Nature"),
317         Food_Drink(SectionId.Characters, "Food & Drink"),
318         Travel_Places(SectionId.Characters, "Travel & Places"),
319         Travel_Places2(SectionId.Characters, "Travel & Places 2"),
320         Activities(SectionId.Characters),
321         Objects(SectionId.Characters),
322         Objects2(SectionId.Characters),
323         EmojiSymbols(SectionId.Characters, "Emoji Symbols"),
324         Punctuation(SectionId.Characters),
325         MathSymbols(SectionId.Characters, "Math Symbols"),
326         OtherSymbols(SectionId.Characters, "Other Symbols"),
327         Flags(SectionId.Characters),
328         Component(SectionId.Characters),
329         Typography(SectionId.Characters),
330         ;
331 
332         private final SectionId sectionId;
333 
334         PageId(SectionId sectionId, String... alternateNames) {
335             this.sectionId = sectionId;
336             SectionIdToPageIds.put(sectionId, this);
337             PageIdNames.add(this, alternateNames);
338         }
339 
340         /**
341          * Construct a pageId given a string
342          *
343          * @param name
344          * @return
345          */
346         public static PageId forString(String name) {
347             try {
348                 return PageIdNames.forString(name);
349             } catch (Exception e) {
350                 throw new ICUException("No PageId for " + name, e);
351             }
352         }
353 
354         /**
355          * Returns the page id
356          *
357          * @return a page ID, such as 'Languages'
358          */
359         @Override
360         public String toString() {
361             return PageIdNames.toString(this);
362         }
363 
364         /**
365          * Get the containing section id, such as 'Code Lists'
366          *
367          * @return the containing section ID
368          */
369         public SectionId getSectionId() {
370             return sectionId;
371         }
372     }
373 
374     private final SectionId sectionId;
375     private final PageId pageId;
376     private final String header;
377     private final String code;
378     private final String originalPath;
379     private final SurveyToolStatus status;
380 
381     // Used for ordering
382     private final int headerOrder;
383     private final long codeOrder;
384     private final SubstringOrder codeSuborder;
385 
386     static final Pattern SEMI = PatternCache.get("\\s*;\\s*");
387     static final Matcher ALT_MATCHER = PatternCache.get("\\[@alt=\"([^\"]*+)\"]").matcher("");
388 
389     static final SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance();
390     static final Map<String, String> metazoneToContinent =
391             supplementalDataInfo.getMetazoneToContinentMap();
392     static final Map<String, String> metazoneToPageTerritory = new HashMap<>();
393 
394     static {
395         Map<String, Map<String, String>> metazoneToRegionToZone =
396                 supplementalDataInfo.getMetazoneToRegionToZone();
397         for (Entry<String, Map<String, String>> metazoneEntry : metazoneToRegionToZone.entrySet()) {
398             String metazone = metazoneEntry.getKey();
399             String worldZone = metazoneEntry.getValue().get("001");
400             String territory = Containment.getRegionFromZone(worldZone);
401             if (territory == null) {
402                 territory = "ZZ";
403             }
404             // Russia, Antarctica => territory
405             // in Australasia, Asia, S. America => subcontinent
406             // in N. America => N. America (grouping of 3 subcontinents)
407             // in everything else => continent
408             if (territory.equals("RU") || territory.equals("AQ")) {
409                 metazoneToPageTerritory.put(metazone, territory);
410             } else {
411                 String continent = Containment.getContinent(territory);
412                 String subcontinent = Containment.getSubcontinent(territory);
413                 if (continent.equals("142")) { // Asia
414                     metazoneToPageTerritory.put(metazone, subcontinent);
415                 } else if (continent.equals("019")) { // Americas
416                     metazoneToPageTerritory.put(
417                             metazone, subcontinent.equals("005") ? subcontinent : "003");
418                 } else if (subcontinent.equals("053")) { // Australasia
419                     metazoneToPageTerritory.put(metazone, subcontinent);
420                 } else {
421                     metazoneToPageTerritory.put(metazone, continent);
422                 }
423             }
424         }
425     }
426 
427     private PathHeader(
428             SectionId sectionId,
429             PageId pageId,
430             String header,
431             int headerOrder,
432             String code,
433             long codeOrder,
434             SubstringOrder suborder,
435             SurveyToolStatus status,
436             String originalPath) {
437         this.sectionId = sectionId;
438         this.pageId = pageId;
439         this.header = header;
440         this.headerOrder = headerOrder;
441         this.code = code;
442         this.codeOrder = codeOrder;
443         this.codeSuborder = suborder;
444         this.originalPath = originalPath;
445         this.status = status;
446     }
447 
448     /**
449      * Return a factory for use in creating the headers. This is cached after first use. The calls
450      * are thread-safe. Null gets the default (CLDRConfig) english file.
451      *
452      * @param englishFile
453      */
454     public static Factory getFactory(CLDRFile englishFile) {
455         if (factorySingleton == null) {
456             if (englishFile == null) {
457                 englishFile = CLDRConfig.getInstance().getEnglish();
458             }
459             if (!englishFile.getLocaleID().equals(ULocale.ENGLISH.getBaseName())) {
460                 throw new IllegalArgumentException(
461                         "PathHeader's CLDRFile must be '"
462                                 + ULocale.ENGLISH.getBaseName()
463                                 + "', but found '"
464                                 + englishFile.getLocaleID()
465                                 + "'");
466             }
467             factorySingleton = new Factory(englishFile);
468         }
469         return factorySingleton;
470     }
471 
472     /** Convenience method for common case. See {{@link #getFactory(CLDRFile)}} */
473     public static Factory getFactory() {
474         return getFactory(null);
475     }
476 
477     /**
478      * @deprecated
479      */
480     @Deprecated
481     public String getSection() {
482         return sectionId.toString();
483     }
484 
485     public SectionId getSectionId() {
486         return sectionId;
487     }
488 
489     /**
490      * @deprecated
491      */
492     @Deprecated
493     public String getPage() {
494         return pageId.toString();
495     }
496 
497     public PageId getPageId() {
498         return pageId;
499     }
500 
501     public String getHeader() {
502         return header == null ? "" : header;
503     }
504 
505     public String getCode() {
506         return code;
507     }
508 
509     public String getHeaderCode() {
510         return getHeader() + ": " + getCode();
511     }
512 
513     public String getOriginalPath() {
514         return originalPath;
515     }
516 
517     public SurveyToolStatus getSurveyToolStatus() {
518         return status;
519     }
520 
521     @Override
522     public String toString() {
523         return sectionId
524                 + "\t"
525                 + pageId
526                 + "\t"
527                 + header // + "\t" + headerOrder
528                 + "\t"
529                 + code // + "\t" + codeOrder
530         ;
531     }
532 
533     /**
534      * Compare this PathHeader to another one
535      *
536      * @param other the object to be compared.
537      * @return 0 if equal, -1 if less, 1 if more
538      *     <p>Note: if we ever have to compare just the header or just the code, methods to do that
539      *     were in release 44 (compareHeader and compareCode), but they were unused and therefore
540      *     removed in CLDR-11155.
541      */
542     @Override
543     public int compareTo(PathHeader other) {
544         // Within each section, order alphabetically if the integer orders are
545         // not different.
546         try {
547             int result;
548             if (0 != (result = sectionId.compareTo(other.sectionId))) {
549                 return result;
550             }
551             if (0 != (result = pageId.compareTo(other.pageId))) {
552                 return result;
553             }
554             if (0 != (result = headerOrder - other.headerOrder)) {
555                 return result;
556             }
557             if (0 != (result = alphabeticCompare(header, other.header))) {
558                 return result;
559             }
560             long longResult;
561             if (0 != (longResult = codeOrder - other.codeOrder)) {
562                 return longResult < 0 ? -1 : 1;
563             }
564             if (codeSuborder != null) { // do all three cases, for transitivity
565                 if (other.codeSuborder != null) {
566                     if (0 != (result = codeSuborder.compareTo(other.codeSuborder))) {
567                         return result;
568                     }
569                 } else {
570                     return 1; // if codeSuborder != null (and other.codeSuborder
571                     // == null), it is greater
572                 }
573             } else if (other.codeSuborder != null) {
574                 return -1; // if codeSuborder == null (and other.codeSuborder !=
575                 // null), it is greater
576             }
577             if (0 != (result = alphabeticCompare(code, other.code))) {
578                 return result;
579             }
580             if (!SKIP_ORIGINAL_PATH) {
581                 if (0 != (result = alphabeticCompare(originalPath, other.originalPath))) {
582                     return result;
583                 }
584             }
585             return 0;
586         } catch (RuntimeException e) {
587             throw new IllegalArgumentException(
588                     "Internal problem comparing " + this + " and " + other, e);
589         }
590     }
591 
592     @Override
593     public boolean equals(Object obj) {
594         PathHeader other;
595         try {
596             other = (PathHeader) obj;
597         } catch (Exception e) {
598             return false;
599         }
600         return sectionId == other.sectionId
601                 && pageId == other.pageId
602                 && header.equals(other.header)
603                 && code.equals(other.code);
604     }
605 
606     @Override
607     public int hashCode() {
608         return sectionId.hashCode() ^ pageId.hashCode() ^ header.hashCode() ^ code.hashCode();
609     }
610 
611     public static class Factory implements Transform<String, PathHeader> {
612         static final RegexLookup<RawData> lookup =
613                 RegexLookup.of(new PathHeaderTransform())
614                         .setPatternTransform(RegexLookup.RegexFinderTransformPath)
615                         .loadFromFile(PathHeader.class, "data/PathHeader.txt");
616         // synchronized with lookup
617         static final Output<String[]> args = new Output<>();
618         // synchronized with lookup
619         static final Counter<RawData> counter = new Counter<>();
620         // synchronized with lookup
621         static final Map<RawData, String> samples = new HashMap<>();
622         // synchronized with lookup
623         static long order;
624         static SubstringOrder suborder;
625 
626         static final Map<String, PathHeader> cache = new HashMap<>();
627         // synchronized with cache
628         static final Map<SectionId, Map<PageId, SectionPage>> sectionToPageToSectionPage =
629                 new EnumMap<>(SectionId.class);
630         static final Relation<SectionPage, String> sectionPageToPaths =
631                 Relation.of(new TreeMap<>(), HashSet.class);
632         private static CLDRFile englishFile;
633         private final Set<String> matchersFound = new HashSet<>();
634 
635         /**
636          * Create a factory for creating PathHeaders.
637          *
638          * @param englishFile - only sets the file (statically!) if not already set.
639          */
640         private Factory(CLDRFile englishFile) {
641             setEnglishCLDRFileIfNotSet(englishFile); // temporary
642         }
643 
644         /**
645          * Set englishFile if it is not already set.
646          *
647          * @param englishFile2 the value to set for englishFile
648          */
649         private static void setEnglishCLDRFileIfNotSet(CLDRFile englishFile2) {
650             synchronized (Factory.class) {
651                 if (englishFile == null) {
652                     englishFile = englishFile2;
653                 }
654             }
655         }
656 
657         /** Use only when trying to find unmatched patterns */
658         public void clearCache() {
659             synchronized (cache) {
660                 cache.clear();
661             }
662         }
663 
664         /** Return the PathHeader for a given path. Thread-safe. */
665         public PathHeader fromPath(String path) {
666             return fromPath(path, null);
667         }
668 
669         /** Return the PathHeader for a given path. Thread-safe. */
670         @Override
671         public PathHeader transform(String path) {
672             return fromPath(path, null);
673         }
674 
675         /**
676          * Return the PathHeader for a given path. Thread-safe.
677          *
678          * @param failures a list of failures to add to.
679          */
680         public PathHeader fromPath(final String path, List<String> failures) {
681             if (path == null) {
682                 throw new NullPointerException("Path cannot be null");
683             }
684             synchronized (cache) {
685                 PathHeader old = cache.get(path);
686                 if (old != null) {
687                     return old;
688                 }
689             }
690             synchronized (lookup) {
691                 String cleanPath = path;
692                 // special handling for alt
693                 String alt = null;
694                 int altPos = cleanPath.indexOf("[@alt=");
695                 if (altPos >= 0 && !cleanPath.endsWith("/symbol[@alt=\"narrow\"]")) {
696                     if (ALT_MATCHER.reset(cleanPath).find()) {
697                         alt = ALT_MATCHER.group(1);
698                         cleanPath =
699                                 cleanPath.substring(0, ALT_MATCHER.start())
700                                         + cleanPath.substring(ALT_MATCHER.end());
701                         int pos = alt.indexOf("proposed");
702                         if (pos >= 0 && !path.startsWith("//ldml/collations")) {
703                             alt = pos == 0 ? null : alt.substring(0, pos - 1);
704                             // drop "proposed",
705                             // change "xxx-proposed" to xxx.
706                         }
707                     } else {
708                         throw new IllegalArgumentException();
709                     }
710                 }
711                 Output<Finder> matcherFound = new Output<>();
712                 RawData data = lookup.get(cleanPath, null, args, matcherFound, failures);
713                 if (data == null) {
714                     return null;
715                 }
716                 matchersFound.add(matcherFound.value.toString());
717                 counter.add(data, 1);
718                 if (!samples.containsKey(data)) {
719                     samples.put(data, cleanPath);
720                 }
721                 try {
722                     PathHeader result = makePathHeader(data, path, alt);
723                     synchronized (cache) {
724                         PathHeader old = cache.get(path);
725                         if (old == null) {
726                             cache.put(path, result);
727                         } else {
728                             result = old;
729                         }
730                         Map<PageId, SectionPage> pageToPathHeaders =
731                                 sectionToPageToSectionPage.get(result.sectionId);
732                         if (pageToPathHeaders == null) {
733                             sectionToPageToSectionPage.put(
734                                     result.sectionId,
735                                     pageToPathHeaders = new EnumMap<>(PageId.class));
736                         }
737                         SectionPage sectionPage = pageToPathHeaders.get(result.pageId);
738                         if (sectionPage == null) {
739                             sectionPage = new SectionPage(result.sectionId, result.pageId);
740                             pageToPathHeaders.put(result.pageId, sectionPage);
741                         }
742                         sectionPageToPaths.put(sectionPage, path);
743                     }
744                     return result;
745                 } catch (Exception e) {
746                     throw new IllegalArgumentException(
747                             "Probably mismatch in Page/Section enum, or too few capturing groups in regex for "
748                                     + path,
749                             e);
750                 }
751             }
752         }
753 
makePathHeader(RawData data, String path, String alt)754         private PathHeader makePathHeader(RawData data, String path, String alt) {
755             // Caution: each call to PathHeader.Factory.fix changes the value of
756             // PathHeader.Factory.order
757             SectionId newSectionId = SectionId.forString(fix(data.section, 0));
758             PageId newPageId = PageId.forString(fix(data.page, 0));
759             String newHeader = fix(data.header, data.headerOrder);
760             int newHeaderOrder = (int) order;
761             String codeDashAlt = data.code + (alt == null ? "" : ("-" + alt));
762             String newCode = fix(codeDashAlt, data.codeOrder);
763             long newCodeOrder = order;
764             return new PathHeader(
765                     newSectionId,
766                     newPageId,
767                     newHeader,
768                     newHeaderOrder,
769                     newCode,
770                     newCodeOrder,
771                     suborder,
772                     data.status,
773                     path);
774         }
775 
776         private static class SectionPage implements Comparable<SectionPage> {
777             private final SectionId sectionId;
778             private final PageId pageId;
779 
SectionPage(SectionId sectionId, PageId pageId)780             public SectionPage(SectionId sectionId, PageId pageId) {
781                 this.sectionId = sectionId;
782                 this.pageId = pageId;
783             }
784 
785             @Override
compareTo(SectionPage other)786             public int compareTo(SectionPage other) {
787                 // Within each section, order alphabetically if the integer
788                 // orders are
789                 // not different.
790                 int result;
791                 if (0 != (result = sectionId.compareTo(other.sectionId))) {
792                     return result;
793                 }
794                 if (0 != (result = pageId.compareTo(other.pageId))) {
795                     return result;
796                 }
797                 return 0;
798             }
799 
800             @Override
equals(Object obj)801             public boolean equals(Object obj) {
802                 PathHeader other;
803                 try {
804                     other = (PathHeader) obj;
805                 } catch (Exception e) {
806                     return false;
807                 }
808                 return sectionId == other.sectionId && pageId == other.pageId;
809             }
810 
811             @Override
hashCode()812             public int hashCode() {
813                 return sectionId.hashCode() ^ pageId.hashCode();
814             }
815 
816             @Override
toString()817             public String toString() {
818                 return sectionId + " > " + pageId;
819             }
820         }
821 
822         /**
823          * Returns a set of paths currently associated with the given section and page.
824          *
825          * <p><b>Warning:</b>
826          *
827          * <ol>
828          *   <li>The set may not be complete for a cldrFile unless all of paths in the file have had
829          *       fromPath called. And this includes getExtraPaths().
830          *   <li>The set may include paths that have no value in the current cldrFile.
831          *   <li>The set may be empty, if the section/page aren't valid.
832          * </ol>
833          *
834          * Thread-safe.
835          */
getCachedPaths(SectionId sectionId, PageId page)836         public static Set<String> getCachedPaths(SectionId sectionId, PageId page) {
837             Set<String> target = new HashSet<>();
838             synchronized (cache) {
839                 Map<PageId, SectionPage> pageToSectionPage =
840                         sectionToPageToSectionPage.get(sectionId);
841                 if (pageToSectionPage == null) {
842                     return target;
843                 }
844                 SectionPage sectionPage = pageToSectionPage.get(page);
845                 if (sectionPage == null) {
846                     return target;
847                 }
848                 Set<String> set = sectionPageToPaths.getAll(sectionPage);
849                 target.addAll(set);
850             }
851             return target;
852         }
853 
854         /**
855          * Return the Sections and Pages that are in defined, for display in menus. Both are
856          * ordered.
857          */
getSectionIdsToPageIds()858         public static Relation<SectionId, PageId> getSectionIdsToPageIds() {
859             SectionIdToPageIds.freeze(); // just in case
860             return SectionIdToPageIds;
861         }
862 
863         /**
864          * Return the names for Sections and Pages that are defined, for display in menus. Both are
865          * ordered.
866          *
867          * @deprecated Use getSectionIdsToPageIds
868          */
869         @Deprecated
getSectionsToPages()870         public static LinkedHashMap<String, Set<String>> getSectionsToPages() {
871             LinkedHashMap<String, Set<String>> sectionsToPages = new LinkedHashMap<>();
872             for (PageId pageId : PageId.values()) {
873                 String sectionId2 = pageId.getSectionId().toString();
874                 Set<String> pages =
875                         sectionsToPages.computeIfAbsent(sectionId2, k -> new LinkedHashSet<>());
876                 pages.add(pageId.toString());
877             }
878             return sectionsToPages;
879         }
880 
881         /**
882          * @deprecated, use the filterCldr with the section/page ids.
883          */
filterCldr(String section, String page, CLDRFile file)884         public Iterable<String> filterCldr(String section, String page, CLDRFile file) {
885             return new FilteredIterable(section, page, file);
886         }
887 
888         private class FilteredIterable implements Iterable<String>, SimpleIterator<String> {
889             private final SectionId sectionId;
890             private final PageId pageId;
891             private final Iterator<String> fileIterator;
892 
FilteredIterable(SectionId sectionId, PageId pageId, CLDRFile file)893             FilteredIterable(SectionId sectionId, PageId pageId, CLDRFile file) {
894                 this.sectionId = sectionId;
895                 this.pageId = pageId;
896                 this.fileIterator = file.fullIterable().iterator();
897             }
898 
FilteredIterable(String section, String page, CLDRFile file)899             public FilteredIterable(String section, String page, CLDRFile file) {
900                 this(SectionId.forString(section), PageId.forString(page), file);
901             }
902 
903             @Override
iterator()904             public Iterator<String> iterator() {
905                 return With.toIterator(this);
906             }
907 
908             @Override
next()909             public String next() {
910                 while (fileIterator.hasNext()) {
911                     String path = fileIterator.next();
912                     PathHeader pathHeader = fromPath(path);
913                     if (sectionId == pathHeader.sectionId && pageId == pathHeader.pageId) {
914                         return path;
915                     }
916                 }
917                 return null;
918             }
919         }
920 
921         private static class ChronologicalOrder {
922             private final Map<String, Integer> map = new HashMap<>();
923             private String item;
924             private int order;
925             private final ChronologicalOrder toClear;
926 
ChronologicalOrder(ChronologicalOrder toClear)927             ChronologicalOrder(ChronologicalOrder toClear) {
928                 this.toClear = toClear;
929             }
930 
getOrder()931             int getOrder() {
932                 return order;
933             }
934 
set(String itemToOrder)935             public String set(String itemToOrder) {
936                 if (itemToOrder.startsWith("*")) {
937                     item = itemToOrder.substring(1, itemToOrder.length());
938                     return item; // keep old order
939                 }
940                 item = itemToOrder;
941                 Integer old = map.get(item);
942                 if (old != null) {
943                     order = old.intValue();
944                 } else {
945                     order = map.size();
946                     map.put(item, order);
947                     clearLower();
948                 }
949                 return item;
950             }
951 
clearLower()952             private void clearLower() {
953                 if (toClear != null) {
954                     toClear.map.clear();
955                     toClear.order = 0;
956                     toClear.clearLower();
957                 }
958             }
959         }
960 
961         static class RawData {
962             static ChronologicalOrder codeOrdering = new ChronologicalOrder(null);
963             static ChronologicalOrder headerOrdering = new ChronologicalOrder(codeOrdering);
964 
RawData(String source)965             public RawData(String source) {
966                 String[] split = SEMI.split(source);
967                 section = split[0];
968                 // HACK
969                 if (section.equals("Timezones") && split[1].equals("Indian")) {
970                     page = "Indian2";
971                 } else {
972                     page = split[1];
973                 }
974 
975                 header = headerOrdering.set(split[2]);
976                 headerOrder = headerOrdering.getOrder();
977 
978                 code = codeOrdering.set(split[3]);
979                 codeOrder = codeOrdering.getOrder();
980 
981                 status =
982                         split.length < 5
983                                 ? SurveyToolStatus.READ_WRITE
984                                 : SurveyToolStatus.valueOf(split[4]);
985             }
986 
987             public final String section;
988             public final String page;
989             public final String header;
990             public final int headerOrder;
991             public final String code;
992             public final int codeOrder;
993             public final SurveyToolStatus status;
994 
995             @Override
996             public String toString() {
997                 return section
998                         + "\t"
999                         + page
1000                         + "\t"
1001                         + header
1002                         + "\t"
1003                         + headerOrder
1004                         + "\t"
1005                         + code
1006                         + "\t"
1007                         + codeOrder
1008                         + "\t"
1009                         + status;
1010             }
1011         }
1012 
1013         static class PathHeaderTransform implements Transform<String, RawData> {
1014             @Override
1015             public RawData transform(String source) {
1016                 return new RawData(source);
1017             }
1018         }
1019 
1020         /**
1021          * Internal data, for testing and debugging.
1022          *
1023          * @deprecated
1024          */
1025         @Deprecated
1026         public class CounterData extends Row.R4<String, RawData, String, String> {
1027             public CounterData(String a, RawData b, String c) {
1028                 super(
1029                         a,
1030                         b,
1031                         c == null ? "no sample" : c,
1032                         c == null ? "no sample" : fromPath(c).toString());
1033             }
1034         }
1035 
1036         /**
1037          * Get the internal data, for testing and debugging.
1038          *
1039          * @deprecated
1040          */
1041         @Deprecated
1042         public Counter<CounterData> getInternalCounter() {
1043             synchronized (lookup) {
1044                 Counter<CounterData> result = new Counter<>();
1045                 for (Map.Entry<Finder, RawData> foo : lookup) {
1046                     Finder finder = foo.getKey();
1047                     RawData data = foo.getValue();
1048                     long count = counter.get(data);
1049                     result.add(new CounterData(finder.toString(), data, samples.get(data)), count);
1050                 }
1051                 return result;
1052             }
1053         }
1054 
1055         static Map<String, Transform<String, String>> functionMap = new HashMap<>();
1056         static String[] months = {
1057             "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
1058             "Und"
1059         };
1060         static List<String> days = Arrays.asList("sun", "mon", "tue", "wed", "thu", "fri", "sat");
1061         static List<String> unitOrder = DtdData.getUnitOrder().getOrder();
1062         static final MapComparator<String> dayPeriods =
1063                 new MapComparator<String>()
1064                         .add(
1065                                 "am",
1066                                 "pm",
1067                                 "midnight",
1068                                 "noon",
1069                                 "morning1",
1070                                 "morning2",
1071                                 "afternoon1",
1072                                 "afternoon2",
1073                                 "evening1",
1074                                 "evening2",
1075                                 "night1",
1076                                 "night2")
1077                         .freeze();
1078         static LikelySubtags likelySubtags = new LikelySubtags();
1079         static HyphenSplitter hyphenSplitter = new HyphenSplitter();
1080         static Transform<String, String> catFromTerritory;
1081         static Transform<String, String> catFromTimezone;
1082 
1083         static {
1084             // Put any new functions used in PathHeader.txt in here.
1085             // To change the order of items within a section or heading, set
1086             // order/suborder to be the relative position of the current item.
1087             functionMap.put(
1088                     "month",
1089                     new Transform<>() {
1090                         @Override
1091                         public String transform(String source) {
1092                             int m = Integer.parseInt(source);
1093                             order = m;
1094                             return months[m - 1];
1095                         }
1096                     });
1097             functionMap.put(
1098                     "count",
1099                     new Transform<>() {
1100                         @Override
1101                         public String transform(String source) {
1102                             suborder = new SubstringOrder(source);
1103                             return source;
1104                         }
1105                     });
1106             functionMap.put(
1107                     "count2",
1108                     new Transform<>() {
1109                         @Override
1110                         public String transform(String source) {
1111                             int pos = source.indexOf('-');
1112                             source = pos + source.substring(pos);
1113                             suborder = new SubstringOrder(source); // make 10000-...
1114                             // into 5-
1115                             return source;
1116                         }
1117                     });
1118             functionMap.put(
1119                     "currencySymbol",
1120                     new Transform<>() {
1121                         @Override
1122                         public String transform(String source) {
1123                             order = 901;
1124                             if (source.endsWith("narrow")) {
1125                                 order = 902;
1126                             }
1127                             if (source.endsWith("variant")) {
1128                                 order = 903;
1129                             }
1130                             return source;
1131                         }
1132                     });
1133             // &unitCount($1-$3-$4), where $1 is length, $2 is count, $3 is case (optional)
1134             // but also
1135             // &unitCount($1-$3-$5-$4), where $5 is case, $4 is gender — notice order change
1136             functionMap.put(
1137                     "unitCount",
1138                     new Transform<>() {
1139                         @Override
1140                         public String transform(String source) {
1141                             List<String> parts = HYPHEN_SPLITTER.splitToList(source);
1142                             if (parts.size() == 1) {
1143                                 return source;
1144                             }
1145                             int lengthNumber = Width.getValue(parts.get(0)).ordinal();
1146                             int type = 0;
1147                             int rest = 0;
1148                             switch (parts.get(1)) {
1149                                 case "gender":
1150                                     type = 0;
1151                                     break;
1152                                 case "displayName":
1153                                     type = 1;
1154                                     break;
1155                                 case "per":
1156                                     type = 2;
1157                                     break;
1158                                 default:
1159                                     type = 3;
1160                                     int countNumber =
1161                                             (parts.size() > 1
1162                                                             ? Count.valueOf(parts.get(1))
1163                                                             : Count.other)
1164                                                     .ordinal();
1165                                     int caseNumber =
1166                                             (parts.size() > 2
1167                                                             ? GrammarInfo.CaseValues.valueOf(
1168                                                                     parts.get(2))
1169                                                             : GrammarInfo.CaseValues.nominative)
1170                                                     .ordinal();
1171                                     int genderNumber = GrammarInfo.GenderValues.neuter.ordinal();
1172                                     if (parts.size() > 3) {
1173                                         String genderPart = parts.get(3);
1174                                         if (!genderPart.equals("dgender")) {
1175                                             genderNumber =
1176                                                     GrammarInfo.GenderValues.valueOf(genderPart)
1177                                                             .ordinal();
1178                                         }
1179                                         type = 4;
1180                                     }
1181                                     rest = (countNumber << 16) | (caseNumber << 8) | genderNumber;
1182                                     break;
1183                             }
1184                             order = (type << 28) | (lengthNumber << 24) | rest;
1185                             return source;
1186                         }
1187                     });
1188 
1189             functionMap.put(
1190                     "pluralNumber",
1191                     new Transform<>() {
1192                         @Override
1193                         public String transform(String source) {
1194                             order = GrammarInfo.PluralValues.valueOf(source).ordinal();
1195                             return source;
1196                         }
1197                     });
1198 
1199             functionMap.put(
1200                     "caseNumber",
1201                     new Transform<>() {
1202                         @Override
1203                         public String transform(String source) {
1204                             order = GrammarInfo.CaseValues.valueOf(source).ordinal();
1205                             return source;
1206                         }
1207                     });
1208 
1209             functionMap.put(
1210                     "genderNumber",
1211                     new Transform<>() {
1212                         @Override
1213                         public String transform(String source) {
1214                             order = GrammarInfo.GenderValues.valueOf(source).ordinal();
1215                             return source;
1216                         }
1217                     });
1218 
1219             functionMap.put(
1220                     "day",
1221                     new Transform<>() {
1222                         @Override
1223                         public String transform(String source) {
1224                             int m = days.indexOf(source);
1225                             order = m;
1226                             return source;
1227                         }
1228                     });
1229             functionMap.put(
1230                     "dayPeriod",
1231                     new Transform<>() {
1232                         @Override
1233                         public String transform(String source) {
1234                             try {
1235                                 order = dayPeriods.getNumericOrder(source);
1236                             } catch (Exception e) {
1237                                 // if an old item is tried, like "evening", this will fail.
1238                                 // so that old data still works, hack this.
1239                                 order = Math.abs(source.hashCode() << 16);
1240                             }
1241                             return source;
1242                         }
1243                     });
1244             functionMap.put(
1245                     "calendar",
1246                     new Transform<>() {
1247                         final Map<String, String> fixNames =
1248                                 Builder.with(new HashMap<String, String>())
1249                                         .put("islamicc", "Islamic Civil")
1250                                         .put("roc", "Minguo")
1251                                         .put("Ethioaa", "Ethiopic Amete Alem")
1252                                         .put("Gregory", "Gregorian")
1253                                         .put("iso8601", "ISO 8601")
1254                                         .freeze();
1255 
1256                         @Override
1257                         public String transform(String source) {
1258                             String result = fixNames.get(source);
1259                             return result != null ? result : UCharacter.toTitleCase(source, null);
1260                         }
1261                     });
1262 
1263             functionMap.put(
1264                     "calField",
1265                     new Transform<>() {
1266                         @Override
1267                         public String transform(String source) {
1268                             String[] fields = source.split(":", 3);
1269                             order = 0;
1270                             final List<String> widthValues =
1271                                     Arrays.asList("wide", "abbreviated", "short", "narrow");
1272                             final List<String> calendarFieldValues =
1273                                     Arrays.asList(
1274                                             "Eras",
1275                                             "Quarters",
1276                                             "Months",
1277                                             "Days",
1278                                             "DayPeriods",
1279                                             "Formats");
1280                             final List<String> calendarFormatTypes =
1281                                     Arrays.asList("Standard", "Flexible", "Intervals");
1282                             final List<String> calendarContextTypes =
1283                                     Arrays.asList("none", "format", "stand-alone");
1284                             final List<String> calendarFormatSubtypes =
1285                                     Arrays.asList(
1286                                             "date",
1287                                             "time",
1288                                             "time12",
1289                                             "time24",
1290                                             "dateTime",
1291                                             "fallback");
1292 
1293                             Map<String, String> fixNames =
1294                                     Builder.with(new HashMap<String, String>())
1295                                             .put("DayPeriods", "Day Periods")
1296                                             .put("format", "Formatting")
1297                                             .put("stand-alone", "Standalone")
1298                                             .put("none", "")
1299                                             .put("date", "Date Formats")
1300                                             .put("time", "Time Formats")
1301                                             .put("time12", "12 Hour Time Formats")
1302                                             .put("time24", "24 Hour Time Formats")
1303                                             .put("dateTime", "Date & Time Combination Formats")
1304                                             .freeze();
1305 
1306                             if (calendarFieldValues.contains(fields[0])) {
1307                                 order = calendarFieldValues.indexOf(fields[0]) * 100;
1308                             } else {
1309                                 order = calendarFieldValues.size() * 100;
1310                             }
1311 
1312                             if (fields[0].equals("Formats")) {
1313                                 if (calendarFormatTypes.contains(fields[1])) {
1314                                     order += calendarFormatTypes.indexOf(fields[1]) * 10;
1315                                 } else {
1316                                     order += calendarFormatTypes.size() * 10;
1317                                 }
1318                                 if (calendarFormatSubtypes.contains(fields[2])) {
1319                                     order += calendarFormatSubtypes.indexOf(fields[2]);
1320                                 } else {
1321                                     order += calendarFormatSubtypes.size();
1322                                 }
1323                             } else {
1324                                 if (widthValues.contains(fields[1])) {
1325                                     order += widthValues.indexOf(fields[1]) * 10;
1326                                 } else {
1327                                     order += widthValues.size() * 10;
1328                                 }
1329                                 if (calendarContextTypes.contains(fields[2])) {
1330                                     order += calendarContextTypes.indexOf(fields[2]);
1331                                 } else {
1332                                     order += calendarContextTypes.size();
1333                                 }
1334                             }
1335 
1336                             String[] fixedFields = new String[fields.length];
1337                             for (int i = 0; i < fields.length; i++) {
1338                                 String s = fixNames.get(fields[i]);
1339                                 fixedFields[i] = s != null ? s : fields[i];
1340                             }
1341 
1342                             return fixedFields[0]
1343                                     + " - "
1344                                     + fixedFields[1]
1345                                     + (fixedFields[2].length() > 0 ? " - " + fixedFields[2] : "");
1346                         }
1347                     });
1348 
1349             functionMap.put(
1350                     "titlecase",
1351                     new Transform<>() {
1352                         @Override
1353                         public String transform(String source) {
1354                             return UCharacter.toTitleCase(source, null);
1355                         }
1356                     });
1357             functionMap.put(
1358                     "categoryFromScript",
1359                     new Transform<>() {
1360                         @Override
1361                         public String transform(String source) {
1362                             String script = hyphenSplitter.split(source);
1363                             Info info = ScriptMetadata.getInfo(script);
1364                             if (info == null) {
1365                                 info = ScriptMetadata.getInfo("Zzzz");
1366                             }
1367                             order = 100 - info.idUsage.ordinal();
1368                             return info.idUsage.name;
1369                         }
1370                     });
1371             functionMap.put(
1372                     "categoryFromKey",
1373                     new Transform<>() {
1374                         final Map<String, String> fixNames =
1375                                 Builder.with(new HashMap<String, String>())
1376                                         .put("cf", "Currency Format")
1377                                         .put("em", "Emoji Presentation")
1378                                         .put("fw", "First Day of Week")
1379                                         .put("lb", "Line Break")
1380                                         .put("hc", "Hour Cycle")
1381                                         .put("ms", "Measurement System")
1382                                         .put("ss", "Sentence Break Suppressions")
1383                                         .freeze();
1384 
1385                         @Override
1386                         public String transform(String source) {
1387                             String fixedName = fixNames.get(source);
1388                             return fixedName != null ? fixedName : source;
1389                         }
1390                     });
1391             functionMap.put(
1392                     "languageSection",
1393                     new Transform<>() {
1394                         final char[] languageRangeStartPoints = {'A', 'E', 'K', 'O', 'T'};
1395                         final char[] languageRangeEndPoints = {'D', 'J', 'N', 'S', 'Z'};
1396 
1397                         @Override
1398                         public String transform(String source0) {
1399                             char firstLetter = getEnglishFirstLetter(source0).charAt(0);
1400                             for (int i = 0; i < languageRangeStartPoints.length; i++) {
1401                                 if (firstLetter >= languageRangeStartPoints[i]
1402                                         && firstLetter <= languageRangeEndPoints[i]) {
1403                                     return "Languages ("
1404                                             + Character.toUpperCase(languageRangeStartPoints[i])
1405                                             + "-"
1406                                             + Character.toUpperCase(languageRangeEndPoints[i])
1407                                             + ")";
1408                                 }
1409                             }
1410                             return "Languages";
1411                         }
1412                     });
1413             functionMap.put(
1414                     "firstLetter",
1415                     new Transform<>() {
1416                         @Override
1417                         public String transform(String source0) {
1418                             return getEnglishFirstLetter(source0);
1419                         }
1420                     });
1421             functionMap.put(
1422                     "languageSort",
1423                     new Transform<>() {
1424                         @Override
1425                         public String transform(String source0) {
1426                             String languageOnlyPart;
1427                             int underscorePos = source0.indexOf("_");
1428                             if (underscorePos > 0) {
1429                                 languageOnlyPart = source0.substring(0, underscorePos);
1430                             } else {
1431                                 languageOnlyPart = source0;
1432                             }
1433 
1434                             return englishFile.getName(CLDRFile.LANGUAGE_NAME, languageOnlyPart)
1435                                     + " \u25BA "
1436                                     + source0;
1437                         }
1438                     });
1439             functionMap.put(
1440                     "scriptFromLanguage",
1441                     new Transform<>() {
1442                         @Override
1443                         public String transform(String source0) {
1444                             String language = hyphenSplitter.split(source0);
1445                             String script = likelySubtags.getLikelyScript(language);
1446                             if (script == null) {
1447                                 script = likelySubtags.getLikelyScript(language);
1448                             }
1449                             String scriptName = englishFile.getName(CLDRFile.SCRIPT_NAME, script);
1450                             return "Languages in "
1451                                     + (script.equals("Hans") || script.equals("Hant")
1452                                             ? "Han Script"
1453                                             : scriptName.endsWith(" Script")
1454                                                     ? scriptName
1455                                                     : scriptName + " Script");
1456                         }
1457                     });
1458             functionMap.put(
1459                     "categoryFromTerritory",
1460                     catFromTerritory =
1461                             new Transform<>() {
1462                                 @Override
1463                                 public String transform(String source) {
1464                                     String territory = getSubdivisionsTerritory(source, null);
1465                                     String container = Containment.getContainer(territory);
1466                                     order = Containment.getOrder(territory);
1467                                     return englishFile.getName(CLDRFile.TERRITORY_NAME, container);
1468                                 }
1469                             });
1470             functionMap.put(
1471                     "territorySection",
1472                     new Transform<>() {
1473                         final Set<String> specialRegions =
1474                                 new HashSet<>(Arrays.asList("EZ", "EU", "QO", "UN", "ZZ"));
1475 
1476                         @Override
1477                         public String transform(String source0) {
1478                             // support subdivisions
1479                             String theTerritory = getSubdivisionsTerritory(source0, null);
1480                             try {
1481                                 if (specialRegions.contains(theTerritory)
1482                                         || theTerritory.charAt(0) < 'A'
1483                                                 && Integer.parseInt(theTerritory) > 0) {
1484                                     return "Geographic Regions";
1485                                 }
1486                             } catch (NumberFormatException ex) {
1487                             }
1488                             String theContinent = Containment.getContinent(theTerritory);
1489                             String theSubContinent;
1490                             switch (theContinent) { // was Integer.valueOf
1491                                 case "019": // Americas - For the territorySection, we just group
1492                                     // North America & South America
1493                                     final String subcontinent =
1494                                             Containment.getSubcontinent(theTerritory);
1495                                     theSubContinent =
1496                                             subcontinent.equals("005")
1497                                                     ? "005"
1498                                                     : "003"; // was Integer.valueOf(subcontinent) ==
1499                                     // 5
1500                                     return "Territories ("
1501                                             + englishFile.getName(
1502                                                     CLDRFile.TERRITORY_NAME, theSubContinent)
1503                                             + ")";
1504                                 case "001":
1505                                 case "ZZ":
1506                                     return "Geographic Regions"; // not in containment
1507                                 default:
1508                                     return "Territories ("
1509                                             + englishFile.getName(
1510                                                     CLDRFile.TERRITORY_NAME, theContinent)
1511                                             + ")";
1512                             }
1513                         }
1514                     });
1515             functionMap.put(
1516                     "categoryFromTimezone",
1517                     catFromTimezone =
1518                             new Transform<>() {
1519                                 @Override
1520                                 public String transform(String source0) {
1521                                     String territory = Containment.getRegionFromZone(source0);
1522                                     if (territory == null) {
1523                                         territory = "ZZ";
1524                                     }
1525                                     return catFromTerritory.transform(territory);
1526                                 }
1527                             });
1528             functionMap.put(
1529                     "timeZonePage",
1530                     new Transform<>() {
1531                         Set<String> singlePageTerritories =
1532                                 new HashSet<>(Arrays.asList("AQ", "RU", "ZZ"));
1533 
1534                         @Override
1535                         public String transform(String source0) {
1536                             String theTerritory = Containment.getRegionFromZone(source0);
1537                             if (theTerritory == null
1538                                     || "001".equals(theTerritory)
1539                                     || "ZZ".equals(theTerritory)) {
1540                                 if ("Etc/Unknown".equals(source0)) {
1541                                     theTerritory = "ZZ";
1542                                 } else {
1543                                     throw new IllegalArgumentException(
1544                                             "ICU needs zone update? Source: "
1545                                                     + source0
1546                                                     + "; Territory: "
1547                                                     + theTerritory);
1548                                 }
1549                             }
1550                             if (singlePageTerritories.contains(theTerritory)) {
1551                                 return englishFile.getName(CLDRFile.TERRITORY_NAME, theTerritory);
1552                             }
1553                             String theContinent = Containment.getContinent(theTerritory);
1554                             final String subcontinent = Containment.getSubcontinent(theTerritory);
1555                             String theSubContinent;
1556                             switch (Integer.parseInt(theContinent)) {
1557                                 case 9: // Oceania - For the timeZonePage, we group Australasia on
1558                                     // one page, and the rest of Oceania on the other.
1559                                     try {
1560                                         theSubContinent =
1561                                                 subcontinent.equals("053") ? "053" : "009"; // was
1562                                         // Integer.valueOf(subcontinent) ==
1563                                         // 53
1564                                     } catch (NumberFormatException ex) {
1565                                         theSubContinent = "009";
1566                                     }
1567                                     return englishFile.getName(
1568                                             CLDRFile.TERRITORY_NAME, theSubContinent);
1569                                 case 19: // Americas - For the timeZonePage, we just group North
1570                                     // America & South America
1571                                     theSubContinent =
1572                                             Integer.parseInt(subcontinent) == 5 ? "005" : "003";
1573                                     return englishFile.getName(
1574                                             CLDRFile.TERRITORY_NAME, theSubContinent);
1575                                 case 142: // Asia
1576                                     return englishFile.getName(
1577                                             CLDRFile.TERRITORY_NAME, subcontinent);
1578                                 default:
1579                                     return englishFile.getName(
1580                                             CLDRFile.TERRITORY_NAME, theContinent);
1581                             }
1582                         }
1583                     });
1584 
1585             functionMap.put(
1586                     "timezoneSorting",
1587                     new Transform<>() {
1588                         @Override
1589                         public String transform(String source) {
1590                             final List<String> codeValues =
1591                                     Arrays.asList(
1592                                             "generic-long",
1593                                             "generic-short",
1594                                             "standard-long",
1595                                             "standard-short",
1596                                             "daylight-long",
1597                                             "daylight-short");
1598                             if (codeValues.contains(source)) {
1599                                 order = codeValues.indexOf(source);
1600                             } else {
1601                                 order = codeValues.size();
1602                             }
1603                             return source;
1604                         }
1605                     });
1606 
1607             functionMap.put(
1608                     "tzdpField",
1609                     new Transform<>() {
1610                         @Override
1611                         public String transform(String source) {
1612                             Map<String, String> fieldNames =
1613                                     Builder.with(new HashMap<String, String>())
1614                                             .put("regionFormat", "Region Format - Generic")
1615                                             .put(
1616                                                     "regionFormat-standard",
1617                                                     "Region Format - Standard")
1618                                             .put(
1619                                                     "regionFormat-daylight",
1620                                                     "Region Format - Daylight")
1621                                             .put("gmtFormat", "GMT Format")
1622                                             .put("hourFormat", "GMT Hours/Minutes Format")
1623                                             .put("gmtZeroFormat", "GMT Zero Format")
1624                                             .put("fallbackFormat", "Location Fallback Format")
1625                                             .freeze();
1626                             final List<String> fieldOrder =
1627                                     Arrays.asList(
1628                                             "regionFormat",
1629                                             "regionFormat-standard",
1630                                             "regionFormat-daylight",
1631                                             "gmtFormat",
1632                                             "hourFormat",
1633                                             "gmtZeroFormat",
1634                                             "fallbackFormat");
1635 
1636                             if (fieldOrder.contains(source)) {
1637                                 order = fieldOrder.indexOf(source);
1638                             } else {
1639                                 order = fieldOrder.size();
1640                             }
1641 
1642                             String result = fieldNames.get(source);
1643                             return result == null ? source : result;
1644                         }
1645                     });
1646             functionMap.put(
1647                     "unit",
1648                     new Transform<>() {
1649                         @Override
1650                         public String transform(String source) {
1651                             int m = unitOrder.indexOf(source);
1652                             order = m;
1653                             return source.substring(source.indexOf('-') + 1);
1654                         }
1655                     });
1656 
1657             functionMap.put(
1658                     "numericSort",
1659                     new Transform<>() {
1660                         // Probably only works well for small values, like -5 through +4.
1661                         @Override
1662                         public String transform(String source) {
1663                             Integer pos = Integer.parseInt(source) + 5;
1664                             suborder = new SubstringOrder(pos.toString());
1665                             return source;
1666                         }
1667                     });
1668 
1669             functionMap.put(
1670                     "metazone",
1671                     new Transform<>() {
1672 
1673                         @Override
1674                         public String transform(String source) {
1675                             if (PathHeader.UNIFORM_CONTINENTS) {
1676                                 String container = getMetazonePageTerritory(source);
1677                                 order = Containment.getOrder(container);
1678                                 return englishFile.getName(CLDRFile.TERRITORY_NAME, container);
1679                             } else {
1680                                 String continent = metazoneToContinent.get(source);
1681                                 if (continent == null) {
1682                                     continent = "UnknownT";
1683                                 }
1684                                 return continent;
1685                             }
1686                         }
1687                     });
1688 
1689             Object[][] ctto = {
1690                 {"BUK", "MM"},
1691                 {"CSD", "RS"},
1692                 {"CSK", "CZ"},
1693                 {"DDM", "DE"},
1694                 {"EUR", "ZZ"},
1695                 {"RHD", "ZW"},
1696                 {"SUR", "RU"},
1697                 {"TPE", "TL"},
1698                 {"XAG", "ZZ"},
1699                 {"XAU", "ZZ"},
1700                 {"XBA", "ZZ"},
1701                 {"XBB", "ZZ"},
1702                 {"XBC", "ZZ"},
1703                 {"XBD", "ZZ"},
1704                 {"XDR", "ZZ"},
1705                 {"XEU", "ZZ"},
1706                 {"XFO", "ZZ"},
1707                 {"XFU", "ZZ"},
1708                 {"XPD", "ZZ"},
1709                 {"XPT", "ZZ"},
1710                 {"XRE", "ZZ"},
1711                 {"XSU", "ZZ"},
1712                 {"XTS", "ZZ"},
1713                 {"XUA", "ZZ"},
1714                 {"XXX", "ZZ"},
1715                 {"YDD", "YE"},
1716                 {"YUD", "RS"},
1717                 {"YUM", "RS"},
1718                 {"YUN", "RS"},
1719                 {"YUR", "RS"},
1720                 {"ZRN", "CD"},
1721                 {"ZRZ", "CD"},
1722             };
1723 
1724             Object[][] sctc = {
1725                 {"Northern America", "North America (C)"},
1726                 {"Central America", "North America (C)"},
1727                 {"Caribbean", "North America (C)"},
1728                 {"South America", "South America (C)"},
1729                 {"Northern Africa", "Northern Africa"},
1730                 {"Western Africa", "Western Africa"},
1731                 {"Middle Africa", "Middle Africa"},
1732                 {"Eastern Africa", "Eastern Africa"},
1733                 {"Southern Africa", "Southern Africa"},
1734                 {"Europe", "Northern/Western Europe"},
1735                 {"Northern Europe", "Northern/Western Europe"},
1736                 {"Western Europe", "Northern/Western Europe"},
1737                 {"Eastern Europe", "Southern/Eastern Europe"},
1738                 {"Southern Europe", "Southern/Eastern Europe"},
1739                 {"Western Asia", "Western Asia (C)"},
1740                 {"Central Asia", "Central Asia (C)"},
1741                 {"Eastern Asia", "Eastern Asia (C)"},
1742                 {"Southern Asia", "Southern Asia (C)"},
1743                 {"Southeast Asia", "Southeast Asia (C)"},
1744                 {"Australasia", "Oceania (C)"},
1745                 {"Melanesia", "Oceania (C)"},
1746                 {"Micronesian Region", "Oceania (C)"}, // HACK
1747                 {"Polynesia", "Oceania (C)"},
1748                 {"Unknown Region", "Unknown Region (C)"},
1749             };
1750 
1751             final Map<String, String> currencyToTerritoryOverrides = CldrUtility.asMap(ctto);
1752             final Map<String, String> subContinentToContinent = CldrUtility.asMap(sctc);
1753             final Set<String> fundCurrencies =
1754                     new HashSet<>(
1755                             Arrays.asList(
1756                                     "CHE", "CHW", "CLF", "COU", "ECV", "MXV", "USN", "USS", "UYI",
1757                                     "XEU", "ZAL"));
1758             final Set<String> offshoreCurrencies = new HashSet<>(Arrays.asList("CNH"));
1759             // TODO: Put this into supplementalDataInfo ?
1760 
1761             functionMap.put(
1762                     "categoryFromCurrency",
1763                     new Transform<>() {
1764                         @Override
1765                         public String transform(String source0) {
1766                             String tenderOrNot = "";
1767                             String territory =
1768                                     likelySubtags.getLikelyTerritoryFromCurrency(source0);
1769                             if (territory == null) {
1770                                 String tag;
1771                                 if (fundCurrencies.contains(source0)) {
1772                                     tag = " (fund)";
1773                                 } else if (offshoreCurrencies.contains(source0)) {
1774                                     tag = " (offshore)";
1775                                 } else {
1776                                     tag = " (old)";
1777                                 }
1778                                 tenderOrNot = ": " + source0 + tag;
1779                             }
1780                             if (currencyToTerritoryOverrides.keySet().contains(source0)) {
1781                                 territory = currencyToTerritoryOverrides.get(source0);
1782                             } else if (territory == null) {
1783                                 territory = source0.substring(0, 2);
1784                             }
1785 
1786                             if (territory.equals("ZZ")) {
1787                                 order = 999;
1788                                 return englishFile.getName(CLDRFile.TERRITORY_NAME, territory)
1789                                         + ": "
1790                                         + source0;
1791                             } else {
1792                                 return catFromTerritory.transform(territory)
1793                                         + ": "
1794                                         + englishFile.getName(CLDRFile.TERRITORY_NAME, territory)
1795                                         + tenderOrNot;
1796                             }
1797                         }
1798                     });
1799             functionMap.put(
1800                     "continentFromCurrency",
1801                     new Transform<>() {
1802                         @Override
1803                         public String transform(String source0) {
1804                             String subContinent;
1805                             String territory =
1806                                     likelySubtags.getLikelyTerritoryFromCurrency(source0);
1807                             if (currencyToTerritoryOverrides.keySet().contains(source0)) {
1808                                 territory = currencyToTerritoryOverrides.get(source0);
1809                             } else if (territory == null) {
1810                                 territory = source0.substring(0, 2);
1811                             }
1812 
1813                             if (territory.equals("ZZ")) {
1814                                 order = 999;
1815                                 subContinent =
1816                                         englishFile.getName(CLDRFile.TERRITORY_NAME, territory);
1817                             } else {
1818                                 subContinent = catFromTerritory.transform(territory);
1819                             }
1820 
1821                             String result =
1822                                     subContinentToContinent.get(
1823                                             subContinent); // the continent is the last word in the
1824                             // territory representation
1825                             return result;
1826                         }
1827                     });
1828             functionMap.put(
1829                     "numberingSystem",
1830                     new Transform<>() {
1831                         @Override
1832                         public String transform(String source0) {
1833                             if ("latn".equals(source0)) {
1834                                 return "";
1835                             }
1836                             String displayName =
1837                                     englishFile.getStringValue(
1838                                             "//ldml/localeDisplayNames/types/type[@key=\"numbers\"][@type=\""
1839                                                     + source0
1840                                                     + "\"]");
1841                             return "using "
1842                                     + (displayName == null
1843                                             ? source0
1844                                             : displayName + " (" + source0 + ")");
1845                         }
1846                     });
1847 
1848             functionMap.put(
1849                     "datefield",
1850                     new Transform<>() {
1851                         private final String[] datefield = {
1852                             "era", "era-short", "era-narrow",
1853                             "century", "century-short", "century-narrow",
1854                             "year", "year-short", "year-narrow",
1855                             "quarter", "quarter-short", "quarter-narrow",
1856                             "month", "month-short", "month-narrow",
1857                             "week", "week-short", "week-narrow",
1858                             "weekOfMonth", "weekOfMonth-short", "weekOfMonth-narrow",
1859                             "day", "day-short", "day-narrow",
1860                             "dayOfYear", "dayOfYear-short", "dayOfYear-narrow",
1861                             "weekday", "weekday-short", "weekday-narrow",
1862                             "weekdayOfMonth", "weekdayOfMonth-short", "weekdayOfMonth-narrow",
1863                             "dayperiod", "dayperiod-short", "dayperiod-narrow",
1864                             "zone", "zone-short", "zone-narrow",
1865                             "hour", "hour-short", "hour-narrow",
1866                             "minute", "minute-short", "minute-narrow",
1867                             "second", "second-short", "second-narrow",
1868                             "millisecond", "millisecond-short", "millisecond-narrow",
1869                             "microsecond", "microsecond-short", "microsecond-narrow",
1870                             "nanosecond", "nanosecond-short", "nanosecond-narrow",
1871                         };
1872 
1873                         @Override
1874                         public String transform(String source) {
1875                             order = getIndex(source, datefield);
1876                             return source;
1877                         }
1878                     });
1879             // //ldml/dates/fields/field[@type="%A"]/relative[@type="%A"]
1880             functionMap.put(
1881                     "relativeDate",
1882                     new Transform<>() {
1883                         private final String[] relativeDateField = {
1884                             "year", "year-short", "year-narrow",
1885                             "quarter", "quarter-short", "quarter-narrow",
1886                             "month", "month-short", "month-narrow",
1887                             "week", "week-short", "week-narrow",
1888                             "day", "day-short", "day-narrow",
1889                             "hour", "hour-short", "hour-narrow",
1890                             "minute", "minute-short", "minute-narrow",
1891                             "second", "second-short", "second-narrow",
1892                             "sun", "sun-short", "sun-narrow",
1893                             "mon", "mon-short", "mon-narrow",
1894                             "tue", "tue-short", "tue-narrow",
1895                             "wed", "wed-short", "wed-narrow",
1896                             "thu", "thu-short", "thu-narrow",
1897                             "fri", "fri-short", "fri-narrow",
1898                             "sat", "sat-short", "sat-narrow",
1899                         };
1900                         private final String[] longNames = {
1901                             "Year", "Year Short", "Year Narrow",
1902                             "Quarter", "Quarter Short", "Quarter Narrow",
1903                             "Month", "Month Short", "Month Narrow",
1904                             "Week", "Week Short", "Week Narrow",
1905                             "Day", "Day Short", "Day Narrow",
1906                             "Hour", "Hour Short", "Hour Narrow",
1907                             "Minute", "Minute Short", "Minute Narrow",
1908                             "Second", "Second Short", "Second Narrow",
1909                             "Sunday", "Sunday Short", "Sunday Narrow",
1910                             "Monday", "Monday Short", "Monday Narrow",
1911                             "Tuesday", "Tuesday Short", "Tuesday Narrow",
1912                             "Wednesday", "Wednesday Short", "Wednesday Narrow",
1913                             "Thursday", "Thursday Short", "Thursday Narrow",
1914                             "Friday", "Friday Short", "Friday Narrow",
1915                             "Saturday", "Saturday Short", "Saturday Narrow",
1916                         };
1917 
1918                         @Override
1919                         public String transform(String source) {
1920                             order = getIndex(source, relativeDateField) + 100;
1921                             return "Relative " + longNames[getIndex(source, relativeDateField)];
1922                         }
1923                     });
1924             // Sorts numberSystem items (except for decimal formats).
1925             functionMap.put(
1926                     "number",
1927                     new Transform<>() {
1928                         private final String[] symbols = {
1929                             "decimal",
1930                             "group",
1931                             "plusSign",
1932                             "minusSign",
1933                             "approximatelySign",
1934                             "percentSign",
1935                             "perMille",
1936                             "exponential",
1937                             "superscriptingExponent",
1938                             "infinity",
1939                             "nan",
1940                             "list",
1941                             "currencies"
1942                         };
1943 
1944                         @Override
1945                         public String transform(String source) {
1946                             String[] parts = source.split("-");
1947                             order = getIndex(parts[0], symbols);
1948                             // e.g. "currencies-one"
1949                             if (parts.length > 1) {
1950                                 suborder = new SubstringOrder(parts[1]);
1951                             }
1952                             return source;
1953                         }
1954                     });
1955             functionMap.put(
1956                     "numberFormat",
1957                     new Transform<>() {
1958                         @Override
1959                         public String transform(String source) {
1960                             final List<String> fieldOrder =
1961                                     Arrays.asList(
1962                                             "standard-decimal",
1963                                             "standard-currency",
1964                                             "standard-currency-accounting",
1965                                             "standard-percent",
1966                                             "standard-scientific");
1967 
1968                             if (fieldOrder.contains(source)) {
1969                                 order = fieldOrder.indexOf(source);
1970                             } else {
1971                                 order = fieldOrder.size();
1972                             }
1973 
1974                             return source;
1975                         }
1976                     });
1977 
1978             functionMap.put(
1979                     "localePattern",
1980                     new Transform<>() {
1981                         @Override
1982                         public String transform(String source) {
1983                             // Put localeKeyTypePattern behind localePattern and
1984                             // localeSeparator.
1985                             if (source.equals("localeKeyTypePattern")) {
1986                                 order = 10;
1987                             }
1988                             return source;
1989                         }
1990                     });
1991             functionMap.put(
1992                     "listOrder",
1993                     new Transform<>() {
1994                         private String[] listParts = {"2", "start", "middle", "end"};
1995 
1996                         @Override
1997                         public String transform(String source) {
1998                             order = getIndex(source, listParts);
1999                             return source;
2000                         }
2001                     });
2002 
2003             functionMap.put(
2004                     "personNameSection",
2005                     new Transform<>() {
2006                         @Override
2007                         public String transform(String source) {
2008                             // sampleName item values in desired sort order
2009                             final List<String> itemValues =
2010                                     PersonNameFormatter.SampleType.ALL_STRINGS;
2011                             if (source.equals("NameOrder")) {
2012                                 order = 0;
2013                                 return "NameOrder for Locales";
2014                             }
2015                             if (source.equals("Parameters")) {
2016                                 order = 4;
2017                                 return "Default Parameters";
2018                             }
2019 
2020                             if (source.equals("AuxiliaryItems")) {
2021                                 order = 10;
2022                                 return source;
2023                             }
2024                             String itemPrefix = "SampleName:";
2025                             if (source.startsWith(itemPrefix)) {
2026                                 String itemValue = source.substring(itemPrefix.length());
2027                                 order = 20 + itemValues.indexOf(itemValue);
2028                                 return "SampleName Fields for Item: " + itemValue;
2029                             }
2030                             String pnPrefix = "PersonName:";
2031                             if (source.startsWith(pnPrefix)) {
2032                                 String attrValues = source.substring(pnPrefix.length());
2033                                 List<String> parts = HYPHEN_SPLITTER.splitToList(attrValues);
2034 
2035                                 String nameOrder = parts.get(0);
2036                                 if (nameOrder.contentEquals("sorting")) {
2037                                     order = 40;
2038                                     return "PersonName Sorting Patterns (Usage: referring)";
2039                                 }
2040                                 order = 30;
2041                                 if (nameOrder.contentEquals("surnameFirst")) {
2042                                     order += 1;
2043                                 }
2044                                 String nameUsage = parts.get(1);
2045                                 if (nameUsage.contentEquals("monogram")) {
2046                                     order += 20;
2047                                     return "PersonName Monogram Patterns for Order: " + nameOrder;
2048                                 }
2049                                 return "PersonName Main Patterns for Order: " + nameOrder;
2050                             }
2051                             order = 60;
2052                             return source;
2053                         }
2054                     });
2055 
2056             functionMap.put(
2057                     "personNameOrder",
2058                     new Transform<>() {
2059                         @Override
2060                         public String transform(String source) {
2061                             // personName attribute values: each group in desired
2062                             // sort order, but groups from least important to most
2063                             final List<String> attrValues =
2064                                     Arrays.asList(
2065                                             "referring",
2066                                             "addressing", // usage values to include
2067                                             "formal",
2068                                             "informal", // formality values
2069                                             "long",
2070                                             "medium",
2071                                             "short"); // length values
2072                             // order & length values handled in &personNameSection
2073 
2074                             List<String> parts = HYPHEN_SPLITTER.splitToList(source);
2075                             order = 0;
2076                             String attributes = "";
2077                             boolean skipReferring = false;
2078                             for (String part : parts) {
2079                                 if (attrValues.contains(part)) {
2080                                     order += (1 << attrValues.indexOf(part));
2081                                     // anything else like alt="variant" is at order 0
2082                                     if (!skipReferring || !part.contentEquals("referring")) {
2083                                         // Add this part to display attribute string
2084                                         if (attributes.length() == 0) {
2085                                             attributes = part;
2086                                         } else {
2087                                             attributes = attributes + "-" + part;
2088                                         }
2089                                     }
2090                                 } else if (part.contentEquals("sorting")) {
2091                                     skipReferring = true; // For order=sorting, don't display
2092                                     // usage=referring
2093                                 }
2094                             }
2095                             return attributes;
2096                         }
2097                     });
2098 
2099             functionMap.put(
2100                     "sampleNameOrder",
2101                     new Transform<>() {
2102                         @Override
2103                         public String transform(String source) {
2104                             // The various nameField attribute values: each group in desired
2105                             // sort order, but groups from least important to most
2106                             final List<String> attrValues =
2107                                     Arrays.asList(
2108                                             "informal",
2109                                             "prefix",
2110                                             "core", // modifiers for nameField type
2111                                             "prefix",
2112                                             "given",
2113                                             "given2",
2114                                             "surname",
2115                                             "surname2",
2116                                             "suffix"); // values for nameField type
2117 
2118                             List<String> parts = HYPHEN_SPLITTER.splitToList(source);
2119                             order = 0;
2120                             for (String part : parts) {
2121                                 if (attrValues.contains(part)) {
2122                                     order += (1 << attrValues.indexOf(part));
2123                                 } // anything else like alt="variant" is at order 0
2124                             }
2125                             return source;
2126                         }
2127                     });
2128 
2129             functionMap.put(
2130                     "alphaOrder",
2131                     new Transform<>() {
2132                         @Override
2133                         public String transform(String source) {
2134                             order = 0;
2135                             return source;
2136                         }
2137                     });
2138             functionMap.put(
2139                     "transform",
2140                     new Transform<>() {
2141                         Splitter commas = Splitter.on(',').trimResults();
2142 
2143                         @Override
2144                         public String transform(String source) {
2145                             List<String> parts = commas.splitToList(source);
2146                             return parts.get(1)
2147                                     + (parts.get(0).equals("both") ? "↔︎" : "→")
2148                                     + parts.get(2)
2149                                     + (parts.size() > 3 ? "/" + parts.get(3) : "");
2150                         }
2151                     });
2152             functionMap.put(
2153                     "major",
2154                     new Transform<>() {
2155                         @Override
2156                         public String transform(String source) {
2157                             return getCharacterPageId(source).toString();
2158                         }
2159                     });
2160             functionMap.put(
2161                     "minor",
2162                     new Transform<>() {
2163                         @Override
2164                         public String transform(String source) {
2165                             String minorCat = Emoji.getMinorCategory(source);
2166                             order = Emoji.getEmojiMinorOrder(minorCat);
2167                             return minorCat;
2168                         }
2169                     });
2170             /**
2171              * Use the ordering of the emoji in getEmojiToOrder rather than alphabetic, since the
2172              * collator data won't be ready until the candidates are final.
2173              */
2174             functionMap.put(
2175                     "emoji",
2176                     new Transform<>() {
2177                         @Override
2178                         public String transform(String source) {
2179                             int dashPos = source.indexOf(' ');
2180                             String emoji = source.substring(0, dashPos);
2181                             order =
2182                                     (Emoji.getEmojiToOrder(emoji) << 1)
2183                                             + (source.endsWith("name") ? 0 : 1);
2184                             return source;
2185                         }
2186                     });
2187         }
2188 
2189         private static int getIndex(String item, String[] array) {
2190             for (int i = 0; i < array.length; i++) {
2191                 if (item.equals(array[i])) {
2192                     return i;
2193                 }
2194             }
2195             return -1;
2196         }
2197 
2198         private static String getEnglishFirstLetter(String s) {
2199             String languageOnlyPart;
2200             int underscorePos = s.indexOf("_");
2201             if (underscorePos > 0) {
2202                 languageOnlyPart = s.substring(0, underscorePos);
2203             } else {
2204                 languageOnlyPart = s;
2205             }
2206             final String name = englishFile.getName(CLDRFile.LANGUAGE_NAME, languageOnlyPart);
2207             return name == null ? "?" : name.substring(0, 1).toUpperCase();
2208         }
2209 
2210         static class HyphenSplitter {
2211             String main;
2212             String extras;
2213 
2214             String split(String source) {
2215                 int hyphenPos = source.indexOf('-');
2216                 if (hyphenPos < 0) {
2217                     main = source;
2218                     extras = "";
2219                 } else {
2220                     main = source.substring(0, hyphenPos);
2221                     extras = source.substring(hyphenPos);
2222                 }
2223                 return main;
2224             }
2225         }
2226 
2227         /**
2228          * This converts "functions", like &month, and sets the order.
2229          *
2230          * @param input
2231          * @param orderIn
2232          * @return
2233          */
2234         private static String fix(String input, int orderIn) {
2235             input = RegexLookup.replace(input, args.value);
2236             order = orderIn;
2237             suborder = null;
2238             int pos = 0;
2239             while (true) {
2240                 int functionStart = input.indexOf('&', pos);
2241                 if (functionStart < 0) {
2242                     if ("Volume".equals(input)) {
2243                         return getVolumePageId(args.value[0] /* path */).toString();
2244                     }
2245                     return input;
2246                 }
2247                 int functionEnd = input.indexOf('(', functionStart);
2248                 int argEnd =
2249                         input.indexOf(
2250                                 ')', functionEnd + 2); // we must insert at least one character
2251                 Transform<String, String> func =
2252                         functionMap.get(input.substring(functionStart + 1, functionEnd));
2253                 final String arg = input.substring(functionEnd + 1, argEnd);
2254                 String temp = func.transform(arg);
2255                 if (temp == null) {
2256                     func.transform(arg);
2257                     throw new IllegalArgumentException(
2258                             "Function returns invalid results for «" + arg + "».");
2259                 }
2260                 input = input.substring(0, functionStart) + temp + input.substring(argEnd + 1);
2261                 pos = functionStart + temp.length();
2262             }
2263         }
2264 
2265         private static Set<UnitConverter.UnitSystem> METRIC =
2266                 Set.of(UnitConverter.UnitSystem.metric, UnitConverter.UnitSystem.metric_adjacent);
2267 
2268         private static PageId getVolumePageId(String path) {
2269             // Extract the unit from the path. For example, if path is
2270             // //ldml/units/unitLength[@type="narrow"]/unit[@type="volume-cubic-kilometer"]/displayName
2271             // then extract "volume-cubic-kilometer" which is the long unit id
2272             final String longUnitId =
2273                     XPathParts.getFrozenInstance(path).findAttributeValue("unit", "type");
2274             if (longUnitId == null) {
2275                 throw new InternalCldrException("Missing unit in path " + path);
2276             }
2277             final UnitConverter uc = supplementalDataInfo.getUnitConverter();
2278             // Convert, for example, "volume-cubic-kilometer" to "cubic-kilometer"
2279             final String shortUnitId = uc.getShortId(longUnitId);
2280             if (!Collections.disjoint(METRIC, uc.getSystemsEnum(shortUnitId))) {
2281                 return PageId.Volume_Metric;
2282             } else {
2283                 return PageId.Volume_Other;
2284             }
2285         }
2286 
2287         /**
2288          * Collect all the paths for a CLDRFile, and make sure that they have cached PathHeaders
2289          *
2290          * @param file
2291          * @return immutable set of paths in the file
2292          */
2293         public Set<String> pathsForFile(CLDRFile file) {
2294             // make sure we cache all the path headers
2295             HashSet<String> filePaths = new HashSet<>();
2296             file.fullIterable().forEach(filePaths::add);
2297             for (String path : filePaths) {
2298                 try {
2299                     fromPath(path); // call to make sure cached
2300                 } catch (Throwable t) {
2301                     // ... some other exception
2302                 }
2303             }
2304             return Collections.unmodifiableSet(filePaths);
2305         }
2306 
2307         /**
2308          * Returns those regexes that were never matched.
2309          *
2310          * @return
2311          */
2312         public Set<String> getUnmatchedRegexes() {
2313             Map<String, RawData> outputUnmatched = new LinkedHashMap<>();
2314             lookup.getUnmatchedPatterns(matchersFound, outputUnmatched);
2315             return outputUnmatched.keySet();
2316         }
2317     }
2318 
2319     /**
2320      * Return the territory used for the title of the Metazone page in the Survey Tool.
2321      *
2322      * @param source
2323      * @return
2324      */
2325     public static String getMetazonePageTerritory(String source) {
2326         String result = metazoneToPageTerritory.get(source);
2327         return result == null ? "ZZ" : result;
2328     }
2329 
2330     private static final List<String> COUNTS =
2331             Arrays.asList("displayName", "zero", "one", "two", "few", "many", "other", "per");
2332 
2333     private static Collator alphabetic;
2334 
2335     private static int alphabeticCompare(String aa, String bb) {
2336         if (alphabetic == null) {
2337             initializeAlphabetic();
2338         }
2339         return alphabetic.compare(aa, bb);
2340     }
2341 
2342     private static synchronized void initializeAlphabetic() {
2343         // Lazy initialization: don't call CLDRConfig.getInstance() too early or we'll get
2344         // "CLDRConfig.getInstance() was called prior to SurveyTool setup" when called from
2345         // com.ibm.ws.microprofile.openapi.impl.core.jackson.ModelResolver._addEnumProps
2346         if (alphabetic == null) {
2347             alphabetic = CLDRConfig.getInstance().getCollatorRoot();
2348         }
2349     }
2350 
2351     /**
2352      * @deprecated use CLDRConfig.getInstance().urls() instead
2353      */
2354     @Deprecated
2355     public enum BaseUrl {
2356         // http://st.unicode.org/smoketest/survey?_=af&strid=55053dffac611328
2357         // http://st.unicode.org/cldr-apps/survey?_=en&strid=3cd31261bf6738e1
2358         SMOKE("https://st.unicode.org/smoketest/survey"),
2359         PRODUCTION("https://st.unicode.org/cldr-apps/survey");
2360         final String base;
2361 
2362         private BaseUrl(String url) {
2363             base = url;
2364         }
2365     }
2366 
2367     /**
2368      * @deprecated, use CLDRConfig.urls().forPathHeader() instead.
2369      * @param baseUrl
2370      * @param locale
2371      * @return
2372      */
2373     public String getUrl(BaseUrl baseUrl, String locale) {
2374         return getUrl(baseUrl.base, locale);
2375     }
2376 
2377     /**
2378      * @deprecated, use CLDRConfig.urls().forPathHeader() instead.
2379      * @param baseUrl
2380      * @param locale
2381      * @return
2382      */
2383     public String getUrl(String baseUrl, String locale) {
2384         return getUrl(baseUrl, locale, getOriginalPath());
2385     }
2386 
2387     /**
2388      * Map http://st.unicode.org/smoketest/survey to http://st.unicode.org/smoketest etc
2389      *
2390      * @param str
2391      * @return
2392      */
2393     public static String trimLast(String str) {
2394         int n = str.lastIndexOf('/');
2395         if (n == -1) return "";
2396         return str.substring(0, n + 1);
2397     }
2398 
2399     public static String getUrlForLocalePath(String locale, String path) {
2400         return getUrl(SURVEY_URL, locale, path);
2401     }
2402 
2403     public static String getUrl(String baseUrl, String locale, String path) {
2404         return trimLast(baseUrl) + "v#/" + locale + "//" + StringId.getHexId(path);
2405     }
2406 
2407     /**
2408      * @deprecated use the version with CLDRURLS instead
2409      * @param baseUrl
2410      * @param file
2411      * @param path
2412      * @return
2413      */
2414     @Deprecated
2415     public static String getLinkedView(String baseUrl, CLDRFile file, String path) {
2416         return SECTION_LINK
2417                 + PathHeader.getUrl(baseUrl, file.getLocaleID(), path)
2418                 + "'><em>view</em></a>";
2419     }
2420 
2421     public static String getLinkedView(CLDRURLS urls, CLDRFile file, String path) {
2422         return SECTION_LINK + urls.forXpath(file.getLocaleID(), path) + "'><em>view</em></a>";
2423     }
2424 
2425     private static final String SURVEY_URL = CLDRConfig.getInstance().urls().base();
2426 
2427     /**
2428      * If a subdivision, return the (uppercased) territory and if suffix != null, the suffix.
2429      * Otherwise return the input as is.
2430      *
2431      * @param input
2432      * @param suffix
2433      * @return
2434      */
2435     private static String getSubdivisionsTerritory(String input, Output<String> suffix) {
2436         String theTerritory;
2437         if (StandardCodes.LstrType.subdivision.isWellFormed(input)) {
2438             int territoryEnd = input.charAt(0) < 'A' ? 3 : 2;
2439             theTerritory = input.substring(0, territoryEnd).toUpperCase(Locale.ROOT);
2440             if (suffix != null) {
2441                 suffix.value = input.substring(territoryEnd);
2442             }
2443         } else {
2444             theTerritory = input;
2445             if (suffix != null) {
2446                 suffix.value = "";
2447             }
2448         }
2449         return theTerritory;
2450     }
2451 
2452     /**
2453      * Should this path header be hidden?
2454      *
2455      * @return true to hide, else false
2456      */
2457     public boolean shouldHide() {
2458         switch (status) {
2459             case HIDE:
2460             case DEPRECATED:
2461                 return true;
2462             case READ_ONLY:
2463             case READ_WRITE:
2464             case LTR_ALWAYS:
2465                 return false;
2466             default:
2467                 logger.log(java.util.logging.Level.SEVERE, "Missing case for " + status);
2468                 return false;
2469         }
2470     }
2471 
2472     /**
2473      * Are reading and writing allowed for this path header?
2474      *
2475      * @return true if reading and writing are allowed, else false
2476      */
2477     public boolean canReadAndWrite() {
2478         switch (status) {
2479             case READ_WRITE:
2480             case LTR_ALWAYS:
2481                 return true;
2482             case HIDE:
2483             case DEPRECATED:
2484             case READ_ONLY:
2485                 return false;
2486             default:
2487                 logger.log(java.util.logging.Level.SEVERE, "Missing case for " + status);
2488                 return false;
2489         }
2490     }
2491 
2492     private static UnicodeMap<PageId> nonEmojiMap = null;
2493 
2494     /**
2495      * Return the PageId for the given character
2496      *
2497      * @param cp the character as a string
2498      * @return the PageId
2499      */
2500     private static PageId getCharacterPageId(String cp) {
2501         if (Emoji.getAllRgiNoES().contains(cp)) {
2502             return Emoji.getPageId(cp);
2503         }
2504         if (nonEmojiMap == null) {
2505             nonEmojiMap = createNonEmojiMap();
2506         }
2507         PageId pageId = nonEmojiMap.get(cp);
2508         if (pageId == null) {
2509             throw new InternalCldrException("Failure getting character page id");
2510         }
2511         return pageId;
2512     }
2513 
2514     /**
2515      * Create the map from non-emoji characters to pages. Call with lazy initialization to avoid
2516      * static initialization bugs, otherwise PageId.OtherSymbols could still be null.
2517      *
2518      * @return the map from character to PageId
2519      */
2520     private static UnicodeMap<PageId> createNonEmojiMap() {
2521         return new UnicodeMap<PageId>()
2522                 .putAll(new UnicodeSet("[:P:]"), PageId.Punctuation)
2523                 .putAll(new UnicodeSet("[:Sm:]"), PageId.MathSymbols)
2524                 .putAll(new UnicodeSet("[^[:Sm:][:P:]]"), PageId.OtherSymbols)
2525                 .freeze();
2526     }
2527 }
2528