1 package org.unicode.cldr.tool; 2 3 import com.google.common.collect.ImmutableMap; 4 import com.google.common.collect.ImmutableMultimap; 5 import com.google.common.collect.Multimap; 6 import com.google.common.collect.TreeMultimap; 7 import com.ibm.icu.lang.UScript; 8 import com.ibm.icu.text.UnicodeSet; 9 import java.io.File; 10 import java.util.Collection; 11 import java.util.HashSet; 12 import java.util.Map; 13 import java.util.Map.Entry; 14 import java.util.Set; 15 import java.util.TreeMap; 16 import org.unicode.cldr.util.CLDRConfig; 17 import org.unicode.cldr.util.CLDRFile; 18 import org.unicode.cldr.util.CLDRFile.WinningChoice; 19 import org.unicode.cldr.util.CLDRPaths; 20 import org.unicode.cldr.util.Factory; 21 import org.unicode.cldr.util.Iso639Data; 22 import org.unicode.cldr.util.LanguageTagCanonicalizer; 23 import org.unicode.cldr.util.LanguageTagParser; 24 import org.unicode.cldr.util.SimpleFactory; 25 import org.unicode.cldr.util.StandardCodes; 26 import org.unicode.cldr.util.StandardCodes.LstrField; 27 import org.unicode.cldr.util.StandardCodes.LstrType; 28 import org.unicode.cldr.util.SupplementalDataInfo; 29 30 public class DeriveScripts { 31 private static final boolean SHOW = false; 32 33 static final CLDRConfig CONFIG = CLDRConfig.getInstance(); 34 static final SupplementalDataInfo SUP = CONFIG.getSupplementalDataInfo(); 35 static final Multimap<String, String> LANG_TO_SCRIPT; 36 static final Map<String, String> SUPPRESS; 37 38 static { 39 File[] paths = { 40 // new File(CLDRPaths.MAIN_DIRECTORY), 41 // new File(CLDRPaths.SEED_DIRECTORY), 42 new File(CLDRPaths.EXEMPLARS_DIRECTORY) 43 }; 44 final Factory fullCldrFactory = SimpleFactory.make(paths, ".*"); 45 LikelySubtags ls = new LikelySubtags(); 46 LanguageTagParser ltp = new LanguageTagParser(); 47 Set<String> seen = new HashSet<>(); 48 49 Multimap<String, String> langToScript = TreeMultimap.create(); 50 51 Map<String, String> suppress = new TreeMap<>(); 52 final Map<String, Map<LstrField, String>> langToInfo = 53 StandardCodes.getLstregEnumRaw().get(LstrType.language); 54 for (Entry<String, Map<LstrField, String>> entry : langToInfo.entrySet()) { 55 final String suppressValue = entry.getValue().get(LstrField.Suppress_Script); 56 if (suppressValue != null) { 57 final String langCode = entry.getKey(); 58 String likelyScript = ls.getLikelyScript(langCode); 59 if (!likelyScript.equals("Zzzz")) { 60 // if (!suppressValue.equals(likelyScript)) { 61 // System.out.println("#" + langCode + "\tWarning: 62 // likely=" + likelyScript + ", suppress=" + suppressValue); 63 // } else { 64 // System.out.println("#" + langCode + 65 // "\tSuppress=Likely: " + suppressValue); 66 // } 67 continue; 68 } suppress.put(langCode, suppressValue)69 suppress.put(langCode, suppressValue); 70 } 71 } 72 SUPPRESS = ImmutableMap.copyOf(suppress); 73 74 LanguageTagCanonicalizer canon = new LanguageTagCanonicalizer(); 75 76 for (String file : fullCldrFactory.getAvailable()) { 77 String langScript = ltp.set(file).getLanguage(); 78 if (!file.equals(langScript)) { // skip other variants 79 continue; 80 } 81 // System.out.println(file); 82 // if (!seen.add(lang)) { // add if not present 83 // continue; 84 // } 85 String lang = canon.transform(ltp.getLanguage()); 86 if (lang.equals("root")) { 87 continue; 88 } 89 90 // String likelyScript = ls.getLikelyScript(lang); 91 // if (!likelyScript.equals("Zzzz")) { 92 // continue; 93 // } 94 95 String script = ""; 96 // script = ltp.getScript(); 97 // if (!script.isEmpty()) { 98 // add(langToScript, lang, script); 99 // continue; 100 // } 101 102 CLDRFile cldrFile; 103 try { 104 cldrFile = fullCldrFactory.make(lang, false); 105 } catch (final SimpleFactory.NoSourceDirectoryException nsde) { 106 throw new RuntimeException( 107 "Cannot load locale " 108 + lang 109 + " for " 110 + file 111 + " (canonicalized from " 112 + ltp.getLanguage() 113 + ")", 114 nsde); 115 } 116 UnicodeSet exemplars = cldrFile.getExemplarSet("", WinningChoice.WINNING); 117 for (String s : exemplars) { 118 int scriptNum = UScript.getScript(s.codePointAt(0)); 119 if (scriptNum != UScript.COMMON 120 && scriptNum != UScript.INHERITED 121 && scriptNum != UScript.UNKNOWN) { 122 script = UScript.getShortName(scriptNum); 123 break; 124 } 125 } 126 if (!script.isEmpty()) { add(langToScript, lang, script)127 add(langToScript, lang, script); 128 } 129 } 130 LANG_TO_SCRIPT = ImmutableMultimap.copyOf(langToScript); 131 } 132 add(Multimap<String, String> langToScript, String lang, String script)133 private static void add(Multimap<String, String> langToScript, String lang, String script) { 134 if (script != null) { 135 if (langToScript.put(lang, script)) { 136 if (SHOW) 137 System.out.println("# Adding from actual exemplars: " + lang + ", " + script); 138 } 139 } 140 } 141 getLanguageToScript()142 public static Multimap<String, String> getLanguageToScript() { 143 return LANG_TO_SCRIPT; 144 } 145 showLine(String language, String scriptField, String status)146 public static void showLine(String language, String scriptField, String status) { 147 CLDRFile english = CONFIG.getEnglish(); 148 System.out.println( 149 language 150 + ";\t" 151 + scriptField 152 + "\t# " 153 + english.getName(CLDRFile.LANGUAGE_NAME, language) 154 + ";\t" 155 + status 156 + ";\t" 157 + Iso639Data.getScope(language) 158 + ";\t" 159 + Iso639Data.getType(language)); 160 } 161 main(String[] args)162 public static void main(String[] args) { 163 LikelySubtags ls = new LikelySubtags(); 164 CLDRFile english = CONFIG.getEnglish(); 165 int count = 0; 166 167 int i = 0; 168 System.out.println("#From Suppress Script"); 169 for (Entry<String, String> entry : SUPPRESS.entrySet()) { 170 showLine(entry.getKey(), entry.getValue(), "Suppress"); 171 ++i; 172 } 173 System.out.println("#total:\t" + i); 174 i = 0; 175 boolean haveMore = true; 176 177 System.out.println("\n#From Exemplars"); 178 for (int scriptCount = 1; haveMore; ++scriptCount) { 179 haveMore = false; 180 if (scriptCount != 1) { 181 System.out.println("\n#NEEDS RESOLUTION:\t" + scriptCount + " scripts"); 182 } 183 for (Entry<String, Collection<String>> entry : 184 getLanguageToScript().asMap().entrySet()) { 185 Collection<String> scripts = entry.getValue(); 186 final int scriptsSize = scripts.size(); 187 if (scriptsSize != scriptCount) { 188 if (scriptsSize > scriptCount) { 189 haveMore = true; 190 } 191 continue; 192 } 193 194 String lang = entry.getKey(); 195 showLine( 196 lang, 197 scripts.size() == 1 ? scripts.iterator().next() : scripts.toString(), 198 "Exemplars" + (scripts.size() == 1 ? "" : "*")); 199 ++i; 200 String likelyScript = scriptsSize == 1 ? "" : ls.getLikelyScript(lang); 201 System.out.println( 202 ++count 203 + "\t" 204 + scriptsSize 205 + "\t" 206 + lang 207 + "\t" 208 + english.getName(lang) 209 + "\t" 210 + scripts 211 + "\t" 212 + likelyScript 213 // + "\t" + script + "\t" + 214 // english.getName(CLDRFile.SCRIPT_NAME, script) 215 ); 216 } 217 System.out.println("#total:\t" + i); 218 i = 0; 219 } 220 } 221 getSuppress()222 public static Map<String, String> getSuppress() { 223 return SUPPRESS; 224 } 225 } 226