xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestBcp47Transforms.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.unittest;
2 
3 import com.ibm.icu.dev.test.TestFmwk;
4 import com.ibm.icu.impl.Relation;
5 import com.ibm.icu.impl.Row;
6 import com.ibm.icu.impl.Row.R2;
7 import com.ibm.icu.lang.UCharacter;
8 import com.ibm.icu.lang.UProperty;
9 import com.ibm.icu.lang.UScript;
10 import com.ibm.icu.text.Transliterator;
11 import com.ibm.icu.util.ULocale;
12 import java.io.File;
13 import java.util.Arrays;
14 import java.util.Collections;
15 import java.util.HashMap;
16 import java.util.HashSet;
17 import java.util.Locale;
18 import java.util.Map;
19 import java.util.Map.Entry;
20 import java.util.Set;
21 import java.util.TreeMap;
22 import java.util.TreeSet;
23 import org.unicode.cldr.util.Builder;
24 import org.unicode.cldr.util.Builder.MBuilder;
25 import org.unicode.cldr.util.CLDRTransforms;
26 import org.unicode.cldr.util.CLDRTransforms.Direction;
27 import org.unicode.cldr.util.CLDRTransforms.MyHandler;
28 import org.unicode.cldr.util.CLDRTransforms.ParsedTransformID;
29 import org.unicode.cldr.util.CLDRTransforms.Visibility;
30 import org.unicode.cldr.util.StandardCodes;
31 import org.unicode.cldr.util.SupplementalDataInfo;
32 import org.unicode.cldr.util.XMLFileReader;
33 
34 public class TestBcp47Transforms extends TestFmwk {
35 
main(String[] args)36     public static void main(String[] args) {
37         new TestBcp47Transforms().run(args);
38     }
39 
TestNames()40     public void TestNames() {
41         SupplementalDataInfo suppData = SupplementalDataInfo.getInstance();
42         Relation<String, String> extensionToKeys = suppData.getBcp47Extension2Keys();
43         Set<String> keys = extensionToKeys.getAll("t");
44         // extension="t" name="m0"
45         Relation<String, String> keyToSubtypes = suppData.getBcp47Keys();
46         Map<R2<String, String>, String> descriptions = suppData.getBcp47Descriptions();
47         for (String key : keys) {
48             for (String subtype : keyToSubtypes.getAll(key)) {
49                 String description = descriptions.get(Row.of(key, subtype));
50                 System.out.println(key + ", " + subtype + ", " + description);
51             }
52         }
53         Map<String, String> old2newName = new TreeMap<String, String>();
54         for (String file : Arrays.asList(new File(CLDRTransforms.TRANSFORM_DIR).list())) {
55             if (!file.endsWith(".xml")) continue;
56             ParsedTransformID directionInfo = new ParsedTransformID();
57             getIcuRulesFromXmlFile(CLDRTransforms.TRANSFORM_DIR, file, directionInfo);
58             if (directionInfo.getVisibility() == Visibility.internal) continue;
59             String source = directionInfo.source;
60             String target = directionInfo.target;
61             String variant = directionInfo.variant;
62             String standard = getStandard0(source, target, variant);
63             // System.out.println(standard
64             // + "\t =>\t" + directionInfo
65             // + "\tdirection:\t" + directionInfo.getDirection()
66             // + "\tvisibility:\t" + directionInfo.getVisibility()
67             // );
68             if (!standard.contains("?")) {
69                 old2newName.put(directionInfo.toString(), standard);
70             }
71             if (directionInfo.getDirection() == Direction.both) {
72                 standard = getStandard0(source, target, variant);
73                 if (!standard.contains("?")) {
74                     old2newName.put(directionInfo.toString(), standard);
75                 }
76             }
77         }
78         for (String source : Collections.list(Transliterator.getAvailableSources())) {
79             for (String target : Collections.list(Transliterator.getAvailableTargets(source))) {
80                 for (String variant :
81                         Collections.list(Transliterator.getAvailableVariants(source, target))) {
82                     if (variant.isEmpty()) variant = null;
83                     String name = source + "-" + target + (variant == null ? "" : "/" + variant);
84                     if (!old2newName.containsKey(name)) {
85                         String standard = getStandard0(source, target, variant);
86                         if (!standard.contains("?")) {
87                             old2newName.put(name, standard);
88                         }
89                     }
90                 }
91             }
92         }
93         for (Entry<String, String> entry : old2newName.entrySet()) {
94             System.out.println(entry);
95         }
96         System.out.println("Missing");
97         for (Entry<String, Set<R2<Type, String>>> entry : MISSING.keyValuesSet()) {
98             System.out.println(entry);
99         }
100     }
101 
102     enum Type {
103         source,
104         target,
105         mechanism
106     }
107 
getStandard0(String source, String target, String variant)108     private String getStandard0(String source, String target, String variant) {
109         String id = source + "-" + target + "/" + variant;
110         String newSource = getStandard(Type.source, source, id);
111         String newTarget = getStandard(Type.target, target, id);
112         String newMechanism = getStandard(Type.mechanism, variant, id);
113         return newTarget + "-t-" + newSource + (newMechanism == null ? "" : "-m0-" + newMechanism);
114     }
115 
116     static ULocale.Builder ubuilder = new ULocale.Builder();
117     static Relation<String, Row.R2<Type, String>> MISSING =
118             Relation.<String, Row.R2<Type, String>>of(
119                     new TreeMap<String, Set<Row.R2<Type, String>>>(), TreeSet.class);
120     static StandardCodes sc = StandardCodes.make();
121 
122     static Map<String, String> SPECIAL_CASES;
123     static Set<String> languages = sc.getAvailableCodes("language");
124     static Set<String> scripts = new HashSet<String>();
125     static Set<String> regions = new HashSet<String>();
126 
127     static {
128         MBuilder<String, String, HashMap<String, String>> builder =
129                 Builder.with(new HashMap<String, String>());
130         // add language names
131         for (String s : languages) {
132             final String data = sc.getData("language", s);
add(builder, s, data)133             add(builder, s, data);
134         }
135         // add script names. They override (eg Latin => und-Latn)
136         for (String s : sc.getAvailableCodes("script")) {
s.toLowerCase(Locale.ENGLISH)137             scripts.add(s.toLowerCase(Locale.ENGLISH));
138             final String data = sc.getData("script", s);
add(builder, "und-" + s, data)139             add(builder, "und-" + s, data);
140             // System.out.println(data + "\t" + s);
141         }
142         for (String s : sc.getAvailableCodes("territory")) {
s.toLowerCase(Locale.ENGLISH)143             regions.add(s.toLowerCase(Locale.ENGLISH));
144         }
145         // real special cases
146         builder.put("any", "und")
147                 .put("simplified", "Hans")
148                 .put("traditional", "Hant")
149                 .put("ipa", "und-fonipa")
150                 .put("xsampa", "und-fonxsamp")
151                 .put("japanesekana", "und-Hrkt");
152         /*
153          * source fullwidth source jamo target accents target ascii target
154          * halfwidth target jamo target numericpinyin target publishing
155          */
156         SPECIAL_CASES = builder.freeze();
157     }
158 
add( MBuilder<String, String, HashMap<String, String>> builder, String code, String names)159     public static void add(
160             MBuilder<String, String, HashMap<String, String>> builder, String code, String names) {
161         names = names.toLowerCase(Locale.ENGLISH);
162         if (!names.contains("▪")) {
163             builder.put(names, code);
164             return;
165         }
166         for (String name : names.split("▪")) {
167             builder.put(name, code);
168         }
169     }
170 
getStandard(Type type, String source, String id)171     private String getStandard(Type type, String source, String id) {
172         source = source == null ? null : source.toLowerCase(Locale.ENGLISH);
173         if (type == Type.mechanism) {
174             if (source == null) return null;
175             if (source.equals("bgn") || source.equals("ungegn")) return source;
176             MISSING.put(source, Row.of(type, id));
177             return "?" + source;
178         }
179         String special = SPECIAL_CASES.get(source);
180         if (special != null) {
181             return special;
182         }
183         int code;
184         try {
185             code = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, source);
186             return "und-" + UScript.getShortName(code);
187         } catch (Exception e1) {
188         }
189         try {
190             ULocale ulocale = new ULocale(source);
191             // hack for now
192             String language = ulocale.getLanguage();
193             if (languages.contains(language)) {
194                 String script = ulocale.getScript();
195                 if (script.isEmpty() || scripts.contains(script.toLowerCase(Locale.ENGLISH))) {
196                     String region = ulocale.getCountry();
197                     if (region.isEmpty() || regions.contains(region.toLowerCase(Locale.ENGLISH))) {
198                         return ulocale.toLanguageTag();
199                     }
200                 }
201             }
202         } catch (Exception e) {
203         }
204         // we failed
205         MISSING.put(source, Row.of(type, id));
206         return "?" + source;
207     }
208 
getIcuRulesFromXmlFile( String dir, String cldrFileName, ParsedTransformID directionInfo)209     public String getIcuRulesFromXmlFile(
210             String dir, String cldrFileName, ParsedTransformID directionInfo) {
211         final MyHandler myHandler = new CLDRTransforms.MyHandler(cldrFileName, directionInfo);
212         XMLFileReader xfr = new XMLFileReader().setHandler(myHandler);
213         xfr.read(
214                 dir + cldrFileName,
215                 XMLFileReader.CONTENT_HANDLER | XMLFileReader.ERROR_HANDLER,
216                 true);
217         return myHandler.getRules();
218     }
219 }
220