xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/tool/DeriveScripts.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.collect.ImmutableMap;
4 import com.google.common.collect.ImmutableMultimap;
5 import com.google.common.collect.Multimap;
6 import com.google.common.collect.TreeMultimap;
7 import com.ibm.icu.lang.UScript;
8 import com.ibm.icu.text.UnicodeSet;
9 import java.io.File;
10 import java.util.Collection;
11 import java.util.HashSet;
12 import java.util.Map;
13 import java.util.Map.Entry;
14 import java.util.Set;
15 import java.util.TreeMap;
16 import org.unicode.cldr.util.CLDRConfig;
17 import org.unicode.cldr.util.CLDRFile;
18 import org.unicode.cldr.util.CLDRFile.WinningChoice;
19 import org.unicode.cldr.util.CLDRPaths;
20 import org.unicode.cldr.util.Factory;
21 import org.unicode.cldr.util.Iso639Data;
22 import org.unicode.cldr.util.LanguageTagCanonicalizer;
23 import org.unicode.cldr.util.LanguageTagParser;
24 import org.unicode.cldr.util.SimpleFactory;
25 import org.unicode.cldr.util.StandardCodes;
26 import org.unicode.cldr.util.StandardCodes.LstrField;
27 import org.unicode.cldr.util.StandardCodes.LstrType;
28 import org.unicode.cldr.util.SupplementalDataInfo;
29 
30 public class DeriveScripts {
31     private static final boolean SHOW = false;
32 
33     static final CLDRConfig CONFIG = CLDRConfig.getInstance();
34     static final SupplementalDataInfo SUP = CONFIG.getSupplementalDataInfo();
35     static final Multimap<String, String> LANG_TO_SCRIPT;
36     static final Map<String, String> SUPPRESS;
37 
38     static {
39         File[] paths = {
40             //            new File(CLDRPaths.MAIN_DIRECTORY),
41             //            new File(CLDRPaths.SEED_DIRECTORY),
42             new File(CLDRPaths.EXEMPLARS_DIRECTORY)
43         };
44         final Factory fullCldrFactory = SimpleFactory.make(paths, ".*");
45         LikelySubtags ls = new LikelySubtags();
46         LanguageTagParser ltp = new LanguageTagParser();
47         Set<String> seen = new HashSet<>();
48 
49         Multimap<String, String> langToScript = TreeMultimap.create();
50 
51         Map<String, String> suppress = new TreeMap<>();
52         final Map<String, Map<LstrField, String>> langToInfo =
53                 StandardCodes.getLstregEnumRaw().get(LstrType.language);
54         for (Entry<String, Map<LstrField, String>> entry : langToInfo.entrySet()) {
55             final String suppressValue = entry.getValue().get(LstrField.Suppress_Script);
56             if (suppressValue != null) {
57                 final String langCode = entry.getKey();
58                 String likelyScript = ls.getLikelyScript(langCode);
59                 if (!likelyScript.equals("Zzzz")) {
60                     //                    if (!suppressValue.equals(likelyScript)) {
61                     //                        System.out.println("#" + langCode + "\tWarning:
62                     // likely=" + likelyScript + ", suppress=" + suppressValue);
63                     //                    } else {
64                     //                        System.out.println("#" + langCode +
65                     // "\tSuppress=Likely: " + suppressValue);
66                     //                    }
67                     continue;
68                 }
suppress.put(langCode, suppressValue)69                 suppress.put(langCode, suppressValue);
70             }
71         }
72         SUPPRESS = ImmutableMap.copyOf(suppress);
73 
74         LanguageTagCanonicalizer canon = new LanguageTagCanonicalizer();
75 
76         for (String file : fullCldrFactory.getAvailable()) {
77             String langScript = ltp.set(file).getLanguage();
78             if (!file.equals(langScript)) { // skip other variants
79                 continue;
80             }
81             //            System.out.println(file);
82             //            if (!seen.add(lang)) { // add if not present
83             //                continue;
84             //            }
85             String lang = canon.transform(ltp.getLanguage());
86             if (lang.equals("root")) {
87                 continue;
88             }
89 
90             //            String likelyScript = ls.getLikelyScript(lang);
91             //            if (!likelyScript.equals("Zzzz")) {
92             //                continue;
93             //            }
94 
95             String script = "";
96             //            script = ltp.getScript();
97             //            if (!script.isEmpty()) {
98             //                add(langToScript, lang, script);
99             //                continue;
100             //            }
101 
102             CLDRFile cldrFile;
103             try {
104                 cldrFile = fullCldrFactory.make(lang, false);
105             } catch (final SimpleFactory.NoSourceDirectoryException nsde) {
106                 throw new RuntimeException(
107                         "Cannot load locale "
108                                 + lang
109                                 + " for "
110                                 + file
111                                 + " (canonicalized from "
112                                 + ltp.getLanguage()
113                                 + ")",
114                         nsde);
115             }
116             UnicodeSet exemplars = cldrFile.getExemplarSet("", WinningChoice.WINNING);
117             for (String s : exemplars) {
118                 int scriptNum = UScript.getScript(s.codePointAt(0));
119                 if (scriptNum != UScript.COMMON
120                         && scriptNum != UScript.INHERITED
121                         && scriptNum != UScript.UNKNOWN) {
122                     script = UScript.getShortName(scriptNum);
123                     break;
124                 }
125             }
126             if (!script.isEmpty()) {
add(langToScript, lang, script)127                 add(langToScript, lang, script);
128             }
129         }
130         LANG_TO_SCRIPT = ImmutableMultimap.copyOf(langToScript);
131     }
132 
add(Multimap<String, String> langToScript, String lang, String script)133     private static void add(Multimap<String, String> langToScript, String lang, String script) {
134         if (script != null) {
135             if (langToScript.put(lang, script)) {
136                 if (SHOW)
137                     System.out.println("# Adding from actual exemplars: " + lang + ", " + script);
138             }
139         }
140     }
141 
getLanguageToScript()142     public static Multimap<String, String> getLanguageToScript() {
143         return LANG_TO_SCRIPT;
144     }
145 
showLine(String language, String scriptField, String status)146     public static void showLine(String language, String scriptField, String status) {
147         CLDRFile english = CONFIG.getEnglish();
148         System.out.println(
149                 language
150                         + ";\t"
151                         + scriptField
152                         + "\t# "
153                         + english.getName(CLDRFile.LANGUAGE_NAME, language)
154                         + ";\t"
155                         + status
156                         + ";\t"
157                         + Iso639Data.getScope(language)
158                         + ";\t"
159                         + Iso639Data.getType(language));
160     }
161 
main(String[] args)162     public static void main(String[] args) {
163         LikelySubtags ls = new LikelySubtags();
164         CLDRFile english = CONFIG.getEnglish();
165         int count = 0;
166 
167         int i = 0;
168         System.out.println("#From Suppress Script");
169         for (Entry<String, String> entry : SUPPRESS.entrySet()) {
170             showLine(entry.getKey(), entry.getValue(), "Suppress");
171             ++i;
172         }
173         System.out.println("#total:\t" + i);
174         i = 0;
175         boolean haveMore = true;
176 
177         System.out.println("\n#From Exemplars");
178         for (int scriptCount = 1; haveMore; ++scriptCount) {
179             haveMore = false;
180             if (scriptCount != 1) {
181                 System.out.println("\n#NEEDS RESOLUTION:\t" + scriptCount + " scripts");
182             }
183             for (Entry<String, Collection<String>> entry :
184                     getLanguageToScript().asMap().entrySet()) {
185                 Collection<String> scripts = entry.getValue();
186                 final int scriptsSize = scripts.size();
187                 if (scriptsSize != scriptCount) {
188                     if (scriptsSize > scriptCount) {
189                         haveMore = true;
190                     }
191                     continue;
192                 }
193 
194                 String lang = entry.getKey();
195                 showLine(
196                         lang,
197                         scripts.size() == 1 ? scripts.iterator().next() : scripts.toString(),
198                         "Exemplars" + (scripts.size() == 1 ? "" : "*"));
199                 ++i;
200                 String likelyScript = scriptsSize == 1 ? "" : ls.getLikelyScript(lang);
201                 System.out.println(
202                         ++count
203                                 + "\t"
204                                 + scriptsSize
205                                 + "\t"
206                                 + lang
207                                 + "\t"
208                                 + english.getName(lang)
209                                 + "\t"
210                                 + scripts
211                                 + "\t"
212                                 + likelyScript
213                         //                + "\t" + script + "\t" +
214                         // english.getName(CLDRFile.SCRIPT_NAME, script)
215                         );
216             }
217             System.out.println("#total:\t" + i);
218             i = 0;
219         }
220     }
221 
getSuppress()222     public static Map<String, String> getSuppress() {
223         return SUPPRESS;
224     }
225 }
226