xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRTransforms.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 /** */
2 package org.unicode.cldr.util;
3 
4 import com.google.common.collect.BiMap;
5 import com.google.common.collect.HashBiMap;
6 import com.google.common.collect.HashMultimap;
7 import com.google.common.collect.ImmutableSet;
8 import com.google.common.collect.Multimap;
9 import com.google.common.collect.Multimaps;
10 import com.google.common.collect.TreeMultimap;
11 import com.ibm.icu.lang.UScript;
12 import com.ibm.icu.text.RuleBasedTransliterator;
13 import com.ibm.icu.text.Transliterator;
14 import com.ibm.icu.text.UnicodeFilter;
15 import com.ibm.icu.util.ICUUncheckedIOException;
16 import java.io.File;
17 import java.io.IOException;
18 import java.io.UncheckedIOException;
19 import java.io.Writer;
20 import java.util.Arrays;
21 import java.util.Collection;
22 import java.util.Collections;
23 import java.util.HashSet;
24 import java.util.LinkedHashSet;
25 import java.util.List;
26 import java.util.Locale;
27 import java.util.Map;
28 import java.util.Map.Entry;
29 import java.util.Set;
30 import java.util.TreeMap;
31 import java.util.TreeSet;
32 import java.util.regex.Matcher;
33 import java.util.regex.Pattern;
34 import java.util.stream.Collectors;
35 import org.unicode.cldr.tool.LikelySubtags;
36 import org.unicode.cldr.util.DiscreteComparator.Builder;
37 
38 public class CLDRTransforms {
39 
40     public static final String TRANSFORM_DIR = (CLDRPaths.COMMON_DIRECTORY + "transforms/");
41 
42     static final CLDRTransforms SINGLETON = new CLDRTransforms();
43 
44     private static final boolean PARANOID = true;
45 
getInstance()46     public static CLDRTransforms getInstance() {
47         return SINGLETON;
48     }
49 
getShowProgress()50     public Appendable getShowProgress() {
51         return showProgress;
52     }
53 
setShowProgress(Appendable showProgress)54     public CLDRTransforms setShowProgress(Appendable showProgress) {
55         this.showProgress = showProgress;
56         return this;
57     }
58 
59     final Set<String> overridden = new HashSet<>();
60     // final DependencyOrder dependencyOrder = new DependencyOrder();
61 
62     //    static public class RegexFindFilenameFilter implements FilenameFilter {
63     //        Matcher matcher;
64     //
65     //        public RegexFindFilenameFilter(Matcher filter) {
66     //            matcher = filter;
67     //        }
68     //
69     //        @Override
70     //        public boolean accept(File dir, String name) {
71     //            return matcher.reset(name).find();
72     //        }
73     //    }
74 
75     /**
76      * @param dir TODO
77      * @param namesMatchingRegex TODO
78      * @param showProgress null if no progress needed
79      * @param skipDashTIds TODO
80      * @return
81      */
registerCldrTransforms( String dir, String namesMatchingRegex, Appendable showProgress, boolean keepDashTIds)82     public static void registerCldrTransforms(
83             String dir, String namesMatchingRegex, Appendable showProgress, boolean keepDashTIds) {
84         CLDRTransforms r = getInstance();
85         if (dir == null) {
86             dir = TRANSFORM_DIR;
87         }
88         // reorder to preload some
89         r.showProgress = showProgress;
90         Set<String> ordered = getFileRegistrationOrder(dir);
91 
92         if (namesMatchingRegex != null) {
93             Matcher filter = PatternCache.get(namesMatchingRegex).matcher("");
94             ordered =
95                     ordered.stream()
96                             .filter(x -> filter.reset(x).matches())
97                             .collect(Collectors.toCollection(LinkedHashSet::new));
98             //            r.deregisterIcuTransliterators(filter);
99             //            files = Arrays.asList(new File(TRANSFORM_DIR).list(new
100             // RegexFindFilenameFilter(filter)));
101             //            ordered = r.dependencyOrder.getOrderedItems(files, filter, true);
102         }
103 
104         // System.out.println(ordered);
105         for (String cldrFileName : ordered) {
106             r.registerTransliteratorsFromXML(
107                     dir, cldrFileName, Collections.emptySet(), keepDashTIds);
108         }
109         Transliterator.registerAny(); // do this last!
110     }
111 
getAvailableIds()112     public static List<String> getAvailableIds() {
113         return Arrays.asList(new File(TRANSFORM_DIR).list());
114     }
115 
getOverriddenTransliterators()116     public Set<String> getOverriddenTransliterators() {
117         return Collections.unmodifiableSet(overridden);
118     }
119 
120     static Transliterator fixup = Transliterator.getInstance("[:Mn:]any-hex/java");
121 
getInstance(String id)122     public Transliterator getInstance(String id) {
123         if (!overridden.contains(id)) {
124             throw new IllegalArgumentException("No overriden transform for " + id);
125         }
126         return Transliterator.getInstance(id);
127     }
128 
129     public static Pattern TRANSFORM_ID_PATTERN = PatternCache.get("(.+)-([^/]+)(/(.*))?");
130 
getReverseInstance(String id)131     public Transliterator getReverseInstance(String id) {
132         Matcher matcher = TRANSFORM_ID_PATTERN.matcher(id);
133         if (!matcher.matches()) {
134             throw new IllegalArgumentException("**No transform for " + id);
135         }
136         return getInstance(
137                 matcher.group(2)
138                         + "-"
139                         + matcher.group(1)
140                         + (matcher.group(4) == null ? "" : "/" + matcher.group(4)));
141     }
142 
143     private BiMap<String, String> displayNameToId = HashBiMap.create();
144 
getDisplayNameToId()145     public BiMap<String, String> getDisplayNameToId() {
146         return displayNameToId;
147     }
148 
addDisplayNameToId(Map<String, String> ids2, ParsedTransformID directionInfo)149     private void addDisplayNameToId(Map<String, String> ids2, ParsedTransformID directionInfo) {
150         displayNameToId.put(directionInfo.getDisplayId(), directionInfo.toString());
151     }
152 
registerTransliteratorsFromXML( String dir, String cldrFileName, Set<String> cantSkip, boolean keepDashTIds)153     public String registerTransliteratorsFromXML(
154             String dir, String cldrFileName, Set<String> cantSkip, boolean keepDashTIds) {
155         ParsedTransformID directionInfo = new ParsedTransformID();
156         String ruleString = getIcuRulesFromXmlFile(dir, cldrFileName, directionInfo);
157 
158         String id = directionInfo.getId();
159         addDisplayNameToId(displayNameToId, directionInfo);
160 
161         if (directionInfo.getDirection() == Direction.both
162                 || directionInfo.getDirection() == Direction.forward) {
163             for (String alias : directionInfo.getAliases()) {
164                 if (!keepDashTIds && alias.contains("-t-")) {
165                     continue;
166                 }
167                 Transliterator.unregister(alias);
168                 Transliterator.registerAlias(alias, id);
169             }
170             internalRegister(id, ruleString, Transliterator.FORWARD);
171         }
172         if (directionInfo.getDirection() == Direction.both
173                 || directionInfo.getDirection() == Direction.backward) {
174             for (String alias : directionInfo.getBackwardAliases()) {
175                 if (!keepDashTIds && alias.contains("-t-")) {
176                     continue;
177                 }
178                 Transliterator.unregister(alias);
179                 Transliterator.registerAlias(alias, directionInfo.getBackwardId());
180             }
181             internalRegister(id, ruleString, Transliterator.REVERSE);
182         }
183         return id;
184     }
185 
186     /**
187      * Return Icu rules, and the direction info
188      *
189      * @param dir TODO
190      * @param cldrFileName
191      * @param directionInfo
192      * @return
193      */
getIcuRulesFromXmlFile( String dir, String cldrFileName, ParsedTransformID directionInfo)194     public static String getIcuRulesFromXmlFile(
195             String dir, String cldrFileName, ParsedTransformID directionInfo) {
196         final MyHandler myHandler = new MyHandler(cldrFileName, directionInfo);
197         XMLFileReader xfr = new XMLFileReader().setHandler(myHandler);
198         xfr.read(
199                 dir + cldrFileName,
200                 XMLFileReader.CONTENT_HANDLER | XMLFileReader.ERROR_HANDLER,
201                 true);
202         return myHandler.getRules();
203     }
204 
internalRegister(String id, String ruleString, int direction)205     private void internalRegister(String id, String ruleString, int direction) {
206         if (direction == Transliterator.REVERSE) {
207             id = ParsedTransformID.reverse(id);
208         }
209         internalRegisterNoReverseId(id, ruleString, direction);
210     }
211 
internalRegisterNoReverseId(String id, String ruleString, int direction)212     private void internalRegisterNoReverseId(String id, String ruleString, int direction) {
213         try {
214             Transliterator t = Transliterator.createFromRules(id, ruleString, direction);
215             overridden.add(id);
216             Transliterator oldTranslit = null;
217             if (showProgress != null) {
218                 try {
219                     oldTranslit = Transliterator.getInstance(id);
220                 } catch (Exception e) {
221                 }
222             }
223             Transliterator.unregister(id);
224             Transliterator.registerInstance(t);
225 
226             if (PARANOID) { // for paranoid testing
227                 String r1 =
228                         CLDRTransforms.showTransliterator("", t, 9999, new StringBuilder())
229                                 .toString();
230                 Transliterator t2 = Transliterator.getInstance(id);
231                 String r2 =
232                         CLDRTransforms.showTransliterator("", t2, 9999, new StringBuilder())
233                                 .toString();
234                 if (!r1.equals(r2)) {
235                     throw new IllegalArgumentException(
236                             "Rules unequal\n" + ruleString + "$$$\n$$$" + r2);
237                 }
238             }
239             // verifyNullFilter("halfwidth-fullwidth");
240             if (showProgress != null) {
241                 append(
242                         "Registered new Transliterator: "
243                                 + id
244                                 + (oldTranslit == null ? "" : "\told:\t" + oldTranslit.getID())
245                                 + '\n');
246                 if (id.startsWith("el-")) {
247                     CLDRTransforms.showTransliterator("", t, 999);
248                     Transliterator t2 = Transliterator.getInstance(id);
249                     CLDRTransforms.showTransliterator("", t2, 999);
250                 }
251             }
252         } catch (RuntimeException e) {
253             if (showProgress != null) {
254                 e.printStackTrace();
255                 append(
256                         "Couldn't register new Transliterator: "
257                                 + id
258                                 + "\t"
259                                 + e.getMessage()
260                                 + '\n');
261             } else {
262                 throw (IllegalArgumentException)
263                         new IllegalArgumentException("Couldn't register new Transliterator: " + id)
264                                 .initCause(e);
265             }
266         }
267     }
268 
269     Appendable showProgress;
270 
append(String string)271     private void append(String string) {
272         try {
273             if (showProgress == null) {
274                 return;
275             }
276             showProgress.append(string);
277             if (showProgress instanceof Writer) {
278                 ((Writer) showProgress).flush();
279             }
280         } catch (IOException e) {
281             throw new ICUUncheckedIOException(e);
282         }
283     }
284 
appendln(String s)285     private void appendln(String s) {
286         append(s + "\n");
287     }
288 
289     // ===================================
290 
291     //    @SuppressWarnings("deprecation")
292     //    public void registerFromIcuFormatFiles(String directory) throws IOException {
293     //
294     ////        deregisterIcuTransliterators((Matcher) null);
295     //
296     //        Matcher getId = PatternCache.get("\\s*(\\S*)\\s*\\{\\s*").matcher("");
297     //        Matcher getSource =
298     // PatternCache.get("\\s*(\\S*)\\s*\\{\\s*\\\"(.*)\\\".*").matcher("");
299     //        Matcher translitID = PatternCache.get("([^-]+)-([^/]+)+(?:[/](.+))?").matcher("");
300     //
301     //        Map<String, String> fixedIDs = new TreeMap<>();
302     //        Set<String> oddIDs = new TreeSet<>();
303     //
304     //        File dir = new File(directory);
305     //        // get the list of files to take, and their directions
306     //        BufferedReader input = FileUtilities.openUTF8Reader(directory, "root.txt");
307     //        String id = null;
308     //        String filename = null;
309     //        Map<String, String> aliasMap = new LinkedHashMap<>();
310     //
311     //        // deregisterIcuTransliterators();
312     //
313     //        // do first, since others depend on theseregisterFromIcuFile
314     //        /**
315     //         * Special aliases.
316     //         * Tone-Digit {
317     //         * alias {"Pinyin-NumericPinyin"}
318     //         * }
319     //         * Digit-Tone {
320     //         * alias {"NumericPinyin-Pinyin"}
321     //         * }
322     //         */
323     //        // registerFromIcuFile("Latin-ConjoiningJamo", directory, null);
324     //        // registerFromIcuFile("Pinyin-NumericPinyin", directory, null);
325     //        // Transliterator.registerAlias("Tone-Digit", "Pinyin-NumericPinyin");
326     //        // Transliterator.registerAlias("Digit-Tone", "NumericPinyin-Pinyin");
327     //        // registerFromIcuFile("Fullwidth-Halfwidth", directory, null);
328     //        // registerFromIcuFile("Hiragana-Katakana", directory, null);
329     //        // registerFromIcuFile("Latin-Katakana", directory, null);
330     //        // registerFromIcuFile("Hiragana-Latin", directory, null);
331     //
332     //        while (true) {
333     //            String line = input.readLine();
334     //            if (line == null) break;
335     //            line = line.trim();
336     //            if (line.startsWith("\uFEFF")) {
337     //                line = line.substring(1);
338     //            }
339     //            if (line.startsWith("TransliteratorNamePattern")) break; // done
340     //            // if (line.indexOf("Ethiopic") >= 0) {
341     //            // appendln("Skipping Ethiopic");
342     //            // continue;
343     //            // }
344     //            if (getId.reset(line).matches()) {
345     //                String temp = getId.group(1);
346     //                if (!temp.equals("file") && !temp.equals("internal")) id = temp;
347     //                continue;
348     //            }
349     //            if (getSource.reset(line).matches()) {
350     //                String operation = getSource.group(1);
351     //                String source = getSource.group(2);
352     //                if (operation.equals("alias")) {
353     //                    aliasMap.put(id, source);
354     //                    checkIdFix(id, fixedIDs, oddIDs, translitID);
355     //                    id = null;
356     //                } else if (operation.equals("resource:process(transliterator)")) {
357     //                    filename = source;
358     //                } else if (operation.equals("direction")) {
359     //                    try {
360     //                        if (id == null || filename == null) {
361     //                            // appendln("skipping: " + line);
362     //                            continue;
363     //                        }
364     //                        if (filename.indexOf("InterIndic") >= 0 && filename.indexOf("Latin")
365     // >= 0) {
366     //                            // append("**" + id);
367     //                        }
368     //                        checkIdFix(id, fixedIDs, oddIDs, translitID);
369     //
370     //                        final int direction = source.equals("FORWARD") ?
371     // Transliterator.FORWARD
372     //                            : Transliterator.REVERSE;
373     //                        registerFromIcuFile(id, directory, filename, direction);
374     //
375     //                        verifyNullFilter("halfwidth-fullwidth");
376     //
377     //                        id = null;
378     //                        filename = null;
379     //                    } catch (RuntimeException e) {
380     //                        throw (RuntimeException) new IllegalArgumentException("Failed with " +
381     // filename + ", " + source)
382     //                        .initCause(e);
383     //                    }
384     //                } else {
385     //                    append(dir + "root.txt unhandled line:" + line);
386     //                }
387     //                continue;
388     //            }
389     //            String trimmed = line.trim();
390     //            if (trimmed.equals("")) continue;
391     //            if (trimmed.equals("}")) continue;
392     //            if (trimmed.startsWith("//")) continue;
393     //            throw new IllegalArgumentException("Unhandled:" + line);
394     //        }
395     //
396     //        final Set<String> rawIds = idToRules.keySet();
397     //        Set<String> ordered = dependencyOrder.getOrderedItems(rawIds, null, false);
398     //        ordered.retainAll(rawIds); // since we are in ID space, kick out anything that isn't
399     //
400     //        for (String id2 : ordered) {
401     //            RuleDirection stuff = idToRules.get(id2);
402     //            internalRegisterNoReverseId(id2, stuff.ruleString, stuff.direction);
403     //            verifyNullFilter("halfwidth-fullwidth"); // TESTING
404     //        }
405     //
406     //        for (Iterator<String> it = aliasMap.keySet().iterator(); it.hasNext();) {
407     //            id = it.next();
408     //            String source = aliasMap.get(id);
409     //            Transliterator.unregister(id);
410     //            Transliterator t = Transliterator.createFromRules(id, "::" + source + ";",
411     // Transliterator.FORWARD);
412     //            Transliterator.registerInstance(t);
413     //            // verifyNullFilter("halfwidth-fullwidth");
414     //            appendln("Registered new Transliterator Alias: " + id);
415     //
416     //        }
417     //        appendln("Fixed IDs");
418     //        for (Iterator<String> it = fixedIDs.keySet().iterator(); it.hasNext();) {
419     //            String id2 = it.next();
420     //            appendln("\t" + id2 + "\t" + fixedIDs.get(id2));
421     //        }
422     //        appendln("Odd IDs");
423     //        for (Iterator<String> it = oddIDs.iterator(); it.hasNext();) {
424     //            String id2 = it.next();
425     //            appendln("\t" + id2);
426     //        }
427     //        Transliterator.registerAny(); // do this last!
428     //    }
429 
430     Map<String, RuleDirection> idToRules = new TreeMap<>();
431 
432     private class RuleDirection {
433         String ruleString;
434         int direction;
435 
RuleDirection(String ruleString, int direction)436         public RuleDirection(String ruleString, int direction) {
437             super();
438             this.ruleString = ruleString;
439             this.direction = direction;
440         }
441     }
442 
registerFromIcuFile(String id, String directory, String filename, int direction)443     private void registerFromIcuFile(String id, String directory, String filename, int direction) {
444         if (filename == null) {
445             filename = id.replace("-", "_").replace("/", "_") + ".txt";
446         }
447         String ruleString = CldrUtility.getText(directory, filename);
448         idToRules.put(id, new RuleDirection(ruleString, direction));
449     }
450 
451     // private void registerFromIcuFile(String id, String dir, String filename) {
452     // registerFromIcuFile(id, dir, filename, Transliterator.FORWARD);
453     // registerFromIcuFile(id, dir, filename, Transliterator.REVERSE);
454     // }
455 
checkIdFix( String id, Map<String, String> fixedIDs, Set<String> oddIDs, Matcher translitID)456     public void checkIdFix(
457             String id, Map<String, String> fixedIDs, Set<String> oddIDs, Matcher translitID) {
458         if (fixedIDs.containsKey(id)) return;
459         if (!translitID.reset(id).matches()) {
460             appendln("Can't fix: " + id);
461             fixedIDs.put(id, "?" + id);
462             return;
463         }
464         String source1 = translitID.group(1);
465         String target1 = translitID.group(2);
466         String variant = translitID.group(3);
467         String source = fixID(source1);
468         String target = fixID(target1);
469         if (!source1.equals(source)) {
470             fixedIDs.put(source1, source);
471         }
472         if (!target1.equals(target)) {
473             fixedIDs.put(target1, target);
474         }
475         if (variant != null) {
476             oddIDs.add("variant: " + variant);
477         }
478     }
479 
fixID(String source)480     static String fixID(String source) {
481         return source; // for now
482     }
483 
484     //    public void deregisterIcuTransliterators(Matcher filter) {
485     //        // Remove all of the current registrations
486     //        // first load into array, so we don't get sync problems.
487     //        List<String> rawAvailable = new ArrayList<>();
488     //        for (Enumeration<String> en = Transliterator.getAvailableIDs(); en.hasMoreElements();)
489     // {
490     //            final String id = en.nextElement();
491     //            if (filter != null && !filter.reset(id).matches()) {
492     //                continue;
493     //            }
494     //            rawAvailable.add(id);
495     //        }
496     //
497     //        // deregisterIcuTransliterators(rawAvailable);
498     //
499     //        Set<String> available = dependencyOrder.getOrderedItems(rawAvailable, filter, false);
500     //        List<String> reversed = new LinkedList<>();
501     //        for (String item : available) {
502     //            reversed.add(0, item);
503     //        }
504     //        // available.retainAll(rawAvailable); // remove the items we won't touch anyway
505     //        // rawAvailable.removeAll(available); // now the ones whose order doesn't matter
506     //        // deregisterIcuTransliterators(rawAvailable);
507     //        deregisterIcuTransliterators(reversed);
508     //
509     //        for (Enumeration<String> en = Transliterator.getAvailableIDs(); en.hasMoreElements();)
510     // {
511     //            String oldId = en.nextElement();
512     //            append("Retaining: " + oldId + "\n");
513     //        }
514     //    }
515     //
516     //    public void deregisterIcuTransliterators(Collection<String> available) {
517     //        for (String oldId : available) {
518     //            Transliterator t;
519     //            try {
520     //                t = Transliterator.getInstance(oldId);
521     //            } catch (IllegalArgumentException e) {
522     //                if (e.getMessage().startsWith("Illegal ID")) {
523     //                    continue;
524     //                }
525     //                append("Failure with: " + oldId);
526     //                t = Transliterator.getInstance(oldId);
527     //                throw e;
528     //            } catch (RuntimeException e) {
529     //                append("Failure with: " + oldId);
530     //                t = Transliterator.getInstance(oldId);
531     //                throw e;
532     //            }
533     //            String className = t.getClass().getName();
534     //            if (className.endsWith(".CompoundTransliterator")
535     //                || className.endsWith(".RuleBasedTransliterator")
536     //                || className.endsWith(".AnyTransliterator")) {
537     //                appendln("REMOVING: " + oldId);
538     //                Transliterator.unregister(oldId);
539     //            } else {
540     //                appendln("Retaining: " + oldId + "\t\t" + className);
541     //            }
542     //        }
543     //    }
544 
545     public enum Direction {
546         backward,
547         both,
548         forward
549     }
550 
551     public enum Visibility {
552         external,
553         internal
554     }
555 
556     public static class ParsedTransformID {
557         public String source = "Any";
558         public String target = "Any";
559         public String variant;
560         protected String[] aliases = {};
561         protected String[] backwardAliases = {};
562         protected Direction direction = null;
563         protected Visibility visibility;
564 
getId()565         public String getId() {
566             return getSource()
567                     + "-"
568                     + getTarget()
569                     + (getVariant() == null ? "" : "/" + getVariant());
570         }
571 
getDisplayId()572         public String getDisplayId() {
573             return getDisplaySource()
574                     + "-"
575                     + getDisplayTarget()
576                     + (getVariant() == null ? "" : "/" + getDisplayVariant());
577         }
578 
getDisplayVariant()579         private String getDisplayVariant() {
580             return getVariant();
581         }
582 
getDisplayTarget()583         private String getDisplayTarget() {
584             return getDisplaySourceOrTarget(getTarget());
585         }
586 
getDisplaySource()587         private String getDisplaySource() {
588             return getDisplaySourceOrTarget(getSource());
589         }
590 
getDisplaySourceOrTarget(String sourceOrTarget)591         private String getDisplaySourceOrTarget(String sourceOrTarget) {
592             int uscript = UScript.getCodeFromName(sourceOrTarget);
593             if (uscript >= 0) {
594                 return UScript.getName(uscript);
595             }
596             if (sourceOrTarget.contains("FONIPA")) {
597                 return "IPA";
598             }
599             if (sourceOrTarget.equals("InterIndic")) {
600                 return "Indic";
601             }
602             try {
603                 String name = CLDRConfig.getInstance().getEnglish().getName(sourceOrTarget);
604                 return name;
605             } catch (Exception e) {
606                 return sourceOrTarget;
607             }
608         }
609 
610         static final LikelySubtags likely = new LikelySubtags();
611 
getScriptCode(String sourceOrTarget)612         public static String getScriptCode(String sourceOrTarget) {
613             int uscript = UScript.getCodeFromName(sourceOrTarget);
614             if (uscript >= 0) {
615                 return UScript.getShortName(uscript);
616             }
617             if (sourceOrTarget.contains("FONIPA")) {
618                 return "Ipa0";
619             }
620             if (sourceOrTarget.equals("InterIndic")) {
621                 return "Ind0";
622             }
623             try {
624                 String max = likely.maximize(sourceOrTarget);
625                 return max == null ? null : new LanguageTagParser().set(max).getScript();
626             } catch (Exception e) {
627                 return null;
628             }
629         }
630 
getBackwardId()631         public String getBackwardId() {
632             return getTarget()
633                     + "-"
634                     + getSource()
635                     + (getVariant() == null ? "" : "/" + getVariant());
636         }
637 
ParsedTransformID()638         public ParsedTransformID() {}
639 
set( String source, String target, String variant, Direction direction)640         public ParsedTransformID set(
641                 String source, String target, String variant, Direction direction) {
642             this.source = source;
643             this.target = target;
644             this.variant = variant;
645             this.direction = direction;
646             return this;
647         }
648 
set(String id)649         public ParsedTransformID set(String id) {
650             variant = null;
651             int pos = id.indexOf('-');
652             if (pos < 0) {
653                 source = "Any";
654                 target = id;
655                 return this;
656             }
657             source = id.substring(0, pos);
658             int pos2 = id.indexOf('/', pos);
659             if (pos2 < 0) {
660                 target = id.substring(pos + 1);
661                 return this;
662             }
663             target = id.substring(pos + 1, pos2);
664             variant = id.substring(pos2 + 1);
665             return this;
666         }
667 
reverse()668         public ParsedTransformID reverse() {
669             String temp = source;
670             source = target;
671             target = temp;
672             return this;
673         }
674 
getTargetVariant()675         public String getTargetVariant() {
676             return target + (variant == null ? "" : "/" + variant);
677         }
678 
getSourceVariant()679         public String getSourceVariant() {
680             return source + (variant == null ? "" : "/" + variant);
681         }
682 
setDirection(Direction direction)683         protected void setDirection(Direction direction) {
684             this.direction = direction;
685         }
686 
getDirection()687         public Direction getDirection() {
688             return direction;
689         }
690 
setVariant(String variant)691         public void setVariant(String variant) {
692             this.variant = variant;
693         }
694 
getVariant()695         protected String getVariant() {
696             return variant;
697         }
698 
setTarget(String target)699         public void setTarget(String target) {
700             this.target = target;
701         }
702 
getTarget()703         public String getTarget() {
704             return target;
705         }
706 
setSource(String source)707         public void setSource(String source) {
708             this.source = source;
709         }
710 
getSource()711         public String getSource() {
712             return source;
713         }
714 
715         @Override
toString()716         public String toString() {
717             return source + "-" + getTargetVariant();
718         }
719 
getId(String source, String target, String variant)720         public static String getId(String source, String target, String variant) {
721             String id = source + '-' + target;
722             if (variant != null) id += "/" + variant;
723             return id;
724         }
725 
reverse(String id)726         public static String reverse(String id) {
727             return new ParsedTransformID().set(id).getBackwardId();
728         }
729 
setAliases(String[] aliases)730         public void setAliases(String[] aliases) {
731             this.aliases = aliases;
732         }
733 
getAliases()734         public String[] getAliases() {
735             return aliases;
736         }
737 
setBackwardAliases(String[] backwardAliases)738         public void setBackwardAliases(String[] backwardAliases) {
739             this.backwardAliases = backwardAliases;
740         }
741 
getBackwardAliases()742         public String[] getBackwardAliases() {
743             return backwardAliases;
744         }
745 
setVisibility(String string)746         protected void setVisibility(String string) {
747             visibility = Visibility.valueOf(string);
748         }
749 
getVisibility()750         public Visibility getVisibility() {
751             return visibility;
752         }
753     }
754 
755     /**
756      * Verify that if the transliterator exists, it has a null filter
757      *
758      * @param id
759      */
verifyNullFilter(String id)760     public static void verifyNullFilter(String id) {
761         Transliterator widen;
762         try {
763             widen = Transliterator.getInstance(id);
764         } catch (Exception e) {
765             return;
766         }
767         UnicodeFilter filter = widen.getFilter();
768         if (filter != null) {
769             throw new IllegalArgumentException(id + " has non-empty filter: " + filter);
770         }
771     }
772 
773     public static class MyHandler extends XMLFileReader.SimpleHandler {
774         boolean first = true;
775         ParsedTransformID directionInfo;
776         String cldrFileName;
777         StringBuilder rules = new StringBuilder();
778 
getRules()779         public String getRules() {
780             return rules.toString();
781         }
782 
MyHandler(String cldrFileName, ParsedTransformID directionInfo)783         public MyHandler(String cldrFileName, ParsedTransformID directionInfo) {
784             super();
785             this.cldrFileName = cldrFileName;
786             this.directionInfo = directionInfo;
787         }
788 
789         @Override
handlePathValue(String path, String value)790         public void handlePathValue(String path, String value) {
791             if (first) {
792                 if (path.startsWith("//supplementalData/version")) {
793                     return;
794                 } else if (path.startsWith("//supplementalData/generation")) {
795                     return;
796                 }
797                 XPathParts parts = XPathParts.getFrozenInstance(path);
798                 Map<String, String> attributes = parts.findAttributes("transform");
799                 if (attributes == null) {
800                     throw new IllegalArgumentException(
801                             "Not an XML transform file: " + cldrFileName + "\t" + path);
802                 }
803                 directionInfo.setSource(attributes.get("source"));
804                 directionInfo.setTarget(attributes.get("target"));
805                 directionInfo.setVariant(attributes.get("variant"));
806                 directionInfo.setDirection(
807                         Direction.valueOf(attributes.get("direction").toLowerCase(Locale.ENGLISH)));
808 
809                 String alias = attributes.get("alias");
810                 if (alias != null) {
811                     directionInfo.setAliases(alias.trim().split("\\s+"));
812                 }
813 
814                 String backwardAlias = attributes.get("backwardAlias");
815                 if (backwardAlias != null) {
816                     directionInfo.setBackwardAliases(backwardAlias.trim().split("\\s+"));
817                 }
818 
819                 directionInfo.setVisibility(attributes.get("visibility"));
820                 first = false;
821             }
822             if (path.indexOf("/comment") >= 0) {
823                 // skip
824             } else if (path.indexOf("/tRule") >= 0) {
825                 value = fixup.transliterate(value);
826                 rules.append(value).append(CldrUtility.LINE_SEPARATOR);
827             } else {
828                 throw new IllegalArgumentException("Unknown element: " + path + "\t " + value);
829             }
830         }
831     }
832 
833     static boolean ALREADY_REGISTERED = false;
834     /**
835      * Register just those transliterators that are different than ICU. TODO: check against the file
836      * system to make sure the list is accurate.
837      */
registerModified()838     public void registerModified() {
839         synchronized (CLDRTransforms.class) {
840             if (ALREADY_REGISTERED) {
841                 return;
842             }
843             // NEW
844             registerTranslit("Lao-Latin", "ບ", "b");
845             registerTranslit("Khmer-Latin", "ឥ", "ĕ");
846             registerTranslit("Sinhala-Latin", "ක", "ka");
847             registerTranslit("Japn-Latn", "譆", "aa");
848 
849             // MODIFIED
850             registerTranslit("Han-SpacedHan", "《", "«");
851             registerTranslit("Greek-Latin", "΄", "´");
852             registerTranslit("Hebrew-Latin", "־", "-");
853             registerTranslit("Cyrillic-Latin", "ө", "ö");
854             registerTranslit("Myanmar-Latin", "ဿ", "s");
855             registerTranslit("Latin-Armenian", "’", "՚");
856 
857             registerTranslit("Interindic-Latin", "\uE070", ".", "\uE03C", "\u0323", "\uE04D", "");
858 
859             registerTranslit("Malayalam-Interindic", "ൺ", "");
860             registerTranslit("Interindic-Malayalam", "", "ണ്");
861             registerTranslit("Malayalam-Latin", "ൺ", "ṇ");
862 
863             registerTranslit("Devanagari-Interindic", "ॲ", "\uE084");
864             registerTranslit("Devanagari-Latin", "ॲ", "æ");
865 
866             registerTranslit("Arabic-Latin", "؉", "‰");
867             ALREADY_REGISTERED = true;
868         }
869     }
870 
871     private static final ImmutableSet<String> noSkip = ImmutableSet.of();
872 
873     private static final boolean SHOW = false;
874     private static final boolean SHOW_FAILED_MATCHES = false;
875 
876     /** Register a transliterator and verify that a sample changed value is accurate */
registerTranslit(String ID, String... sourcePairs)877     public void registerTranslit(String ID, String... sourcePairs) {
878         String internalId = registerTransliteratorsFromXML(TRANSFORM_DIR, ID, noSkip, true);
879         Transliterator.registerAny(); // do this last!
880         Transliterator t = null;
881         try {
882             t = Transliterator.getInstance(internalId);
883         } catch (Exception e) {
884             System.out.println("For " + ID + " (" + internalId + ")");
885             e.printStackTrace();
886             return;
887         }
888         testSourceTarget(t, sourcePairs);
889     }
890 
showTransliterator(String prefix, Transliterator t, int limit)891     public static void showTransliterator(String prefix, Transliterator t, int limit) {
892         showTransliterator(prefix, t, limit, System.out);
893         System.out.flush();
894     }
895 
showTransliterator( String prefix, Transliterator t, int limit, T output)896     public static <T extends Appendable> T showTransliterator(
897             String prefix, Transliterator t, int limit, T output) {
898         if (!prefix.isEmpty()) {
899             prefix += " ";
900         }
901         try {
902             output.append(prefix + "ID:\t" + t.getID() + "\n");
903             output.append(prefix + "Class:\t" + t.getClass().getName() + "\n");
904             if (t.getFilter() != null) {
905                 output.append(prefix + "Filter:\t" + t.getFilter().toPattern(false) + "\n");
906             }
907             if (t instanceof RuleBasedTransliterator) {
908                 RuleBasedTransliterator rbt = (RuleBasedTransliterator) t;
909                 String[] rules = rbt.toRules(true).split("\n");
910                 int length = rules.length;
911                 if (limit >= 0 && limit < length) length = limit;
912                 output.append(prefix + "Rules:\n");
913                 prefix += "\t";
914                 for (int i = 0; i < length; ++i) {
915                     output.append(prefix + rules[i] + "\n");
916                 }
917             } else {
918                 Transliterator[] elements = t.getElements();
919                 if (elements[0] == t) {
920                     output.append(prefix + "\tNonRuleBased\n");
921                     return output;
922                 } else {
923                     prefix += "\t";
924                     for (int i = 0; i < elements.length; ++i) {
925                         showTransliterator(prefix, elements[i], limit, output);
926                     }
927                 }
928             }
929         } catch (IOException e) {
930             throw new UncheckedIOException(e);
931         }
932         return output;
933     }
934 
testSourceTarget(Transliterator t, String... sourcePairs)935     public static void testSourceTarget(Transliterator t, String... sourcePairs) {
936         for (int i = 0; i < sourcePairs.length; i += 2) {
937             String sourceTest = sourcePairs[i];
938             String targetTest = sourcePairs[i + 1];
939             String target = t.transform(sourceTest);
940             if (!target.equals(targetTest)) {
941                 throw new IllegalArgumentException(
942                         t.getID()
943                                 + " For "
944                                 + sourceTest
945                                 + ", expected "
946                                 + targetTest
947                                 + ", got "
948                                 + target);
949             }
950         }
951     }
952 
953     /**
954      * Gets a transform from a script to Latin. for testing For a locale, use
955      * ExemplarUtilities.getScript(locale) to get the script
956      */
getTestingLatinScriptTransform(final String script)957     public static Transliterator getTestingLatinScriptTransform(final String script) {
958         String id;
959 
960         switch (script) {
961             case "Latn":
962                 return null;
963             case "Khmr":
964                 id = "Khmr-Latn/UNGEGN";
965                 break;
966             case "Laoo":
967                 id = "Laoo-Latn/UNGEGN";
968                 break;
969             case "Sinh":
970                 id = "Sinh-Latn/UNGEGN";
971                 break;
972             case "Japn":
973                 id = "Jpan-Latn";
974                 break;
975             case "Kore":
976                 id = "Hangul-Latn";
977                 break;
978             case "Hant":
979             case "Hans":
980                 id = "Han-Latn";
981                 break;
982             case "Olck":
983                 id = "sat_Olck-sat_FONIPA"; // Latin IPA
984                 break;
985             case "Cher":
986                 id = "chr-chr_FONIPA";
987                 break;
988             default:
989                 id = script + "-Latn";
990         }
991         return Transliterator.getInstance(id);
992     }
993 
994     /**
995      * Returns the set of all files that can be registered, in an order that makes sure that all
996      * dependencies are handled. That is, if X uses Y in its rules, then Y has to come before X.
997      *
998      * <p>The problem is that when you build a transliterator from rules, and one of those rules is
999      * to call another transliterator X, it inserts the <b>currently</b> registered transliterator
1000      * into the transliterator being built. So whenever a transliterator X is changed, you have to
1001      * reregister every transliterator that calls X. Otherwise the old version of X sticks around in
1002      * those calling transliterators. So the order that you register transliterators is important!
1003      */
getFileRegistrationOrder(String dir)1004     public static Set<String> getFileRegistrationOrder(String dir) {
1005         if (dir == null) {
1006             dir = TRANSFORM_DIR;
1007         }
1008         List<String> files = getAvailableIds();
1009         Multimap<String, String> fileToAliases = HashMultimap.create();
1010         Multimap<String, String> fileToDependencies = TreeMultimap.create();
1011         for (String file : files) {
1012             // Very simple test that depends on standard format
1013             // eg
1014             //            ::[॑ ॒ ॔ ॓ ़ ँ-ः । ॥ ॰ ०-९ ॐ ॲ ऄ-ऋ ॠ ऌ ॡ ऍ-कक़ खख़ गग़ घ-जज़ झ-डड़ ढढ़ ण-फफ़ ब-यय़
1015             // र-ह ऽ ॽ ा-ॄ ॢ ॣ ॅ-्];
1016             //            ::NFD;
1017             //            ::Devanagari-InterIndic;
1018             //            ::InterIndic-Latin;
1019             //            ::NFC;
1020             ParsedTransformID directionInfo = new ParsedTransformID();
1021             String ruleString = getIcuRulesFromXmlFile(dir, file, directionInfo);
1022             Set<String> others = new LinkedHashSet<>();
1023             Set<String> order =
1024                     ruleString
1025                             .lines()
1026                             .map(x -> x.trim())
1027                             .filter(x -> x.contains("::") && !x.trim().startsWith("#"))
1028                             .map(x -> parseDoubleColon(x, others))
1029                             .collect(Collectors.toCollection(LinkedHashSet::new));
1030             order.addAll(others);
1031             if (SHOW) {
1032                 System.out.println(file + "=>" + order);
1033             }
1034             if (!order.isEmpty()) {
1035                 fileToDependencies.putAll(file, order);
1036             }
1037             if (directionInfo.direction != Direction.backward) { // that is, forward or both
1038                 fileToAliases.put(file, directionInfo.getId());
1039                 fileToAliases.putAll(file, Arrays.asList(directionInfo.getAliases()));
1040                 if (SHOW) {
1041                     System.out.println(
1042                             "\t"
1043                                     + directionInfo.getId()
1044                                     + "\t"
1045                                     + Arrays.asList(directionInfo.getAliases()));
1046                 }
1047             }
1048             if (directionInfo.direction != Direction.forward) { // that is, backward or both
1049                 fileToAliases.put(file, directionInfo.getBackwardId());
1050                 fileToAliases.putAll(file, Arrays.asList(directionInfo.getBackwardAliases()));
1051                 if (SHOW) {
1052                     System.out.println(
1053                             "\t"
1054                                     + directionInfo.getBackwardId()
1055                                     + "\t"
1056                                     + Arrays.asList(directionInfo.getBackwardAliases()));
1057                 }
1058             }
1059         }
1060         TreeMultimap<String, String> aliasesToFile =
1061                 Multimaps.invertFrom(fileToAliases, TreeMultimap.create());
1062         Multimap<String, String> fileToDependentFiles = TreeMultimap.create();
1063 
1064         for (Entry<String, Collection<String>> entry : fileToDependencies.asMap().entrySet()) {
1065             Set<String> v =
1066                     entry.getValue().stream()
1067                             .filter(x -> aliasesToFile.containsKey(x))
1068                             .map(y -> aliasesToFile.get(y).first())
1069                             .collect(Collectors.toSet());
1070             fileToDependentFiles.putAll(entry.getKey(), v);
1071         }
1072         Builder<String> comp = new DiscreteComparator.Builder<>(null);
1073         fileToDependentFiles.forEach(
1074                 (x, y) -> {
1075                     if (SHOW) {
1076                         System.out.println(x + "=" + y);
1077                     }
1078                     comp.add(y, x); // put dependent earlier
1079                 });
1080         // .add("c", "d", "b", "a").add("m", "n", "d").get();
1081 
1082         DiscreteComparator<String> comp2 = comp.get();
1083         Set<String> orderedDependents = new LinkedHashSet<>(comp2.getOrdering());
1084         orderedDependents.retainAll(
1085                 fileToDependentFiles.values()); // remove files that are not dependents
1086         Set<String> remainingFiles = new TreeSet<>(files);
1087         remainingFiles.removeAll(orderedDependents);
1088         orderedDependents.addAll(remainingFiles);
1089         if (SHOW_FAILED_MATCHES) {
1090             System.out.println(orderedDependents);
1091         }
1092         return ImmutableSet.copyOf(orderedDependents);
1093     }
1094     // fails match: :: [:Latin:] fullwidth-halfwidth ();
1095 
1096     static final Pattern TRANSLIT_FINDER =
1097             Pattern.compile(
1098                     "\\s*::\\s*"
1099                             + "(?:\\[[^\\]]+\\]\\s*)?"
1100                             + "([A-Za-z0-9////_//-]*)?"
1101                             + "(?:"
1102                             + "\\s*\\("
1103                             + "(?:\\[[^\\]]+\\]\\s*)?"
1104                             + "([A-Za-z0-9////_//-]*)?"
1105                             + "\\s*\\)"
1106                             + ")?"
1107                             + "\\s*;\\s*(#.*)?");
1108     //    static {
1109     //        Matcher matcher = TRANSLIT_FINDER.matcher("::[:Latin:] fullwidth-halfwidth();");
1110     //        System.out.println(matcher.matches());
1111     //    }
1112 
parseDoubleColon(String x, Set<String> others)1113     static String parseDoubleColon(String x, Set<String> others) {
1114         Matcher matcher = TRANSLIT_FINDER.matcher(x);
1115         if (matcher.matches()) {
1116             String first = matcher.group(1);
1117             String second = matcher.group(2);
1118             if (SHOW) {
1119                 System.out.println("1: " + first + "\t2:" + second);
1120             }
1121             if (second != null && !second.isBlank()) {
1122                 others.add(second);
1123             }
1124             return first == null || first.isBlank() ? "" : first;
1125         } else {
1126             if (SHOW_FAILED_MATCHES) {
1127                 System.out.println("fails match: " + x);
1128             }
1129         }
1130         return "";
1131     }
1132 }
1133