1 package org.unicode.cldr.unittest; 2 3 import com.ibm.icu.dev.test.TestFmwk; 4 import com.ibm.icu.impl.Relation; 5 import com.ibm.icu.impl.Row; 6 import com.ibm.icu.impl.Row.R2; 7 import com.ibm.icu.lang.UCharacter; 8 import com.ibm.icu.lang.UProperty; 9 import com.ibm.icu.lang.UScript; 10 import com.ibm.icu.text.Transliterator; 11 import com.ibm.icu.util.ULocale; 12 import java.io.File; 13 import java.util.Arrays; 14 import java.util.Collections; 15 import java.util.HashMap; 16 import java.util.HashSet; 17 import java.util.Locale; 18 import java.util.Map; 19 import java.util.Map.Entry; 20 import java.util.Set; 21 import java.util.TreeMap; 22 import java.util.TreeSet; 23 import org.unicode.cldr.util.Builder; 24 import org.unicode.cldr.util.Builder.MBuilder; 25 import org.unicode.cldr.util.CLDRTransforms; 26 import org.unicode.cldr.util.CLDRTransforms.Direction; 27 import org.unicode.cldr.util.CLDRTransforms.MyHandler; 28 import org.unicode.cldr.util.CLDRTransforms.ParsedTransformID; 29 import org.unicode.cldr.util.CLDRTransforms.Visibility; 30 import org.unicode.cldr.util.StandardCodes; 31 import org.unicode.cldr.util.SupplementalDataInfo; 32 import org.unicode.cldr.util.XMLFileReader; 33 34 public class TestBcp47Transforms extends TestFmwk { 35 main(String[] args)36 public static void main(String[] args) { 37 new TestBcp47Transforms().run(args); 38 } 39 TestNames()40 public void TestNames() { 41 SupplementalDataInfo suppData = SupplementalDataInfo.getInstance(); 42 Relation<String, String> extensionToKeys = suppData.getBcp47Extension2Keys(); 43 Set<String> keys = extensionToKeys.getAll("t"); 44 // extension="t" name="m0" 45 Relation<String, String> keyToSubtypes = suppData.getBcp47Keys(); 46 Map<R2<String, String>, String> descriptions = suppData.getBcp47Descriptions(); 47 for (String key : keys) { 48 for (String subtype : keyToSubtypes.getAll(key)) { 49 String description = descriptions.get(Row.of(key, subtype)); 50 System.out.println(key + ", " + subtype + ", " + description); 51 } 52 } 53 Map<String, String> old2newName = new TreeMap<String, String>(); 54 for (String file : Arrays.asList(new File(CLDRTransforms.TRANSFORM_DIR).list())) { 55 if (!file.endsWith(".xml")) continue; 56 ParsedTransformID directionInfo = new ParsedTransformID(); 57 getIcuRulesFromXmlFile(CLDRTransforms.TRANSFORM_DIR, file, directionInfo); 58 if (directionInfo.getVisibility() == Visibility.internal) continue; 59 String source = directionInfo.source; 60 String target = directionInfo.target; 61 String variant = directionInfo.variant; 62 String standard = getStandard0(source, target, variant); 63 // System.out.println(standard 64 // + "\t =>\t" + directionInfo 65 // + "\tdirection:\t" + directionInfo.getDirection() 66 // + "\tvisibility:\t" + directionInfo.getVisibility() 67 // ); 68 if (!standard.contains("?")) { 69 old2newName.put(directionInfo.toString(), standard); 70 } 71 if (directionInfo.getDirection() == Direction.both) { 72 standard = getStandard0(source, target, variant); 73 if (!standard.contains("?")) { 74 old2newName.put(directionInfo.toString(), standard); 75 } 76 } 77 } 78 for (String source : Collections.list(Transliterator.getAvailableSources())) { 79 for (String target : Collections.list(Transliterator.getAvailableTargets(source))) { 80 for (String variant : 81 Collections.list(Transliterator.getAvailableVariants(source, target))) { 82 if (variant.isEmpty()) variant = null; 83 String name = source + "-" + target + (variant == null ? "" : "/" + variant); 84 if (!old2newName.containsKey(name)) { 85 String standard = getStandard0(source, target, variant); 86 if (!standard.contains("?")) { 87 old2newName.put(name, standard); 88 } 89 } 90 } 91 } 92 } 93 for (Entry<String, String> entry : old2newName.entrySet()) { 94 System.out.println(entry); 95 } 96 System.out.println("Missing"); 97 for (Entry<String, Set<R2<Type, String>>> entry : MISSING.keyValuesSet()) { 98 System.out.println(entry); 99 } 100 } 101 102 enum Type { 103 source, 104 target, 105 mechanism 106 } 107 getStandard0(String source, String target, String variant)108 private String getStandard0(String source, String target, String variant) { 109 String id = source + "-" + target + "/" + variant; 110 String newSource = getStandard(Type.source, source, id); 111 String newTarget = getStandard(Type.target, target, id); 112 String newMechanism = getStandard(Type.mechanism, variant, id); 113 return newTarget + "-t-" + newSource + (newMechanism == null ? "" : "-m0-" + newMechanism); 114 } 115 116 static ULocale.Builder ubuilder = new ULocale.Builder(); 117 static Relation<String, Row.R2<Type, String>> MISSING = 118 Relation.<String, Row.R2<Type, String>>of( 119 new TreeMap<String, Set<Row.R2<Type, String>>>(), TreeSet.class); 120 static StandardCodes sc = StandardCodes.make(); 121 122 static Map<String, String> SPECIAL_CASES; 123 static Set<String> languages = sc.getAvailableCodes("language"); 124 static Set<String> scripts = new HashSet<String>(); 125 static Set<String> regions = new HashSet<String>(); 126 127 static { 128 MBuilder<String, String, HashMap<String, String>> builder = 129 Builder.with(new HashMap<String, String>()); 130 // add language names 131 for (String s : languages) { 132 final String data = sc.getData("language", s); add(builder, s, data)133 add(builder, s, data); 134 } 135 // add script names. They override (eg Latin => und-Latn) 136 for (String s : sc.getAvailableCodes("script")) { s.toLowerCase(Locale.ENGLISH)137 scripts.add(s.toLowerCase(Locale.ENGLISH)); 138 final String data = sc.getData("script", s); add(builder, "und-" + s, data)139 add(builder, "und-" + s, data); 140 // System.out.println(data + "\t" + s); 141 } 142 for (String s : sc.getAvailableCodes("territory")) { s.toLowerCase(Locale.ENGLISH)143 regions.add(s.toLowerCase(Locale.ENGLISH)); 144 } 145 // real special cases 146 builder.put("any", "und") 147 .put("simplified", "Hans") 148 .put("traditional", "Hant") 149 .put("ipa", "und-fonipa") 150 .put("xsampa", "und-fonxsamp") 151 .put("japanesekana", "und-Hrkt"); 152 /* 153 * source fullwidth source jamo target accents target ascii target 154 * halfwidth target jamo target numericpinyin target publishing 155 */ 156 SPECIAL_CASES = builder.freeze(); 157 } 158 add( MBuilder<String, String, HashMap<String, String>> builder, String code, String names)159 public static void add( 160 MBuilder<String, String, HashMap<String, String>> builder, String code, String names) { 161 names = names.toLowerCase(Locale.ENGLISH); 162 if (!names.contains("▪")) { 163 builder.put(names, code); 164 return; 165 } 166 for (String name : names.split("▪")) { 167 builder.put(name, code); 168 } 169 } 170 getStandard(Type type, String source, String id)171 private String getStandard(Type type, String source, String id) { 172 source = source == null ? null : source.toLowerCase(Locale.ENGLISH); 173 if (type == Type.mechanism) { 174 if (source == null) return null; 175 if (source.equals("bgn") || source.equals("ungegn")) return source; 176 MISSING.put(source, Row.of(type, id)); 177 return "?" + source; 178 } 179 String special = SPECIAL_CASES.get(source); 180 if (special != null) { 181 return special; 182 } 183 int code; 184 try { 185 code = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, source); 186 return "und-" + UScript.getShortName(code); 187 } catch (Exception e1) { 188 } 189 try { 190 ULocale ulocale = new ULocale(source); 191 // hack for now 192 String language = ulocale.getLanguage(); 193 if (languages.contains(language)) { 194 String script = ulocale.getScript(); 195 if (script.isEmpty() || scripts.contains(script.toLowerCase(Locale.ENGLISH))) { 196 String region = ulocale.getCountry(); 197 if (region.isEmpty() || regions.contains(region.toLowerCase(Locale.ENGLISH))) { 198 return ulocale.toLanguageTag(); 199 } 200 } 201 } 202 } catch (Exception e) { 203 } 204 // we failed 205 MISSING.put(source, Row.of(type, id)); 206 return "?" + source; 207 } 208 getIcuRulesFromXmlFile( String dir, String cldrFileName, ParsedTransformID directionInfo)209 public String getIcuRulesFromXmlFile( 210 String dir, String cldrFileName, ParsedTransformID directionInfo) { 211 final MyHandler myHandler = new CLDRTransforms.MyHandler(cldrFileName, directionInfo); 212 XMLFileReader xfr = new XMLFileReader().setHandler(myHandler); 213 xfr.read( 214 dir + cldrFileName, 215 XMLFileReader.CONTENT_HANDLER | XMLFileReader.ERROR_HANDLER, 216 true); 217 return myHandler.getRules(); 218 } 219 } 220