xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartLanguageGroups.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.collect.ImmutableSet;
4 import com.google.common.collect.ImmutableSet.Builder;
5 import com.google.common.collect.Multimap;
6 import com.ibm.icu.text.Collator;
7 import com.ibm.icu.util.ULocale;
8 import java.io.IOException;
9 import java.util.Collection;
10 import java.util.Comparator;
11 import java.util.LinkedHashSet;
12 import java.util.Map;
13 import java.util.Map.Entry;
14 import java.util.Set;
15 import java.util.TreeSet;
16 import org.unicode.cldr.util.*;
17 import org.unicode.cldr.util.StandardCodes.LstrField;
18 import org.unicode.cldr.util.StandardCodes.LstrType;
19 
20 public class ChartLanguageGroups extends Chart {
21 
22     private static final String SHOULD_NOT_BE_LEAF_NODE = "��";
23     private static final String LEAF_NODES = "��";
24     private static final String TREE_NODES = "��";
25 
main(String[] args)26     public static void main(String[] args) {
27         new ChartLanguageGroups().writeChart(null);
28     }
29 
30     static final Set<String> COLLECTIONS;
31 
32     static {
33         Map<String, Map<LstrField, String>> languages =
34                 StandardCodes.getEnumLstreg().get(LstrType.language);
35         Builder<String> _collections = ImmutableSet.<String>builder();
36         for (Entry<String, Map<LstrField, String>> e : languages.entrySet()) {
37             String scope = e.getValue().get(LstrField.Scope);
38             if (scope != null && "Collection".equalsIgnoreCase(scope)) {
e.getKey()39                 _collections.add(e.getKey());
40             }
41         }
42         COLLECTIONS = _collections.build();
43     }
44 
45     @Override
getDirectory()46     public String getDirectory() {
47         return FormattedFileWriter.CHART_TARGET_DIR;
48     }
49 
50     @Override
getTitle()51     public String getTitle() {
52         return "Language Groups";
53     }
54 
55     @Override
getExplanation()56     public String getExplanation() {
57         return "<p>This chart shows draft language groups based on data extracted from wikidata. "
58                 + "The <b>Status</b> cell indicates the nature of the items in the adjacent <b>Contained</b> cell:<p>"
59                 + "<ul>\n"
60                 + "<li>A "
61                 + TREE_NODES
62                 + " indicates that the contained languages are tree nodes (contain other languages or langauge groups), "
63                 + "and will be listed further down in the chart in a <b>Language Group</b> cell.</li>\n"
64                 + "<li>A "
65                 + LEAF_NODES
66                 + " indicates that the contained languages are leaf nodes (contain nothing).</li>\n"
67                 + "<li>A "
68                 + SHOULD_NOT_BE_LEAF_NODE
69                 + " before an item <i>in</i> a <b>Contained</b> cell indicates a leaf node that shouldn’t be — that is, its ISO 639 Scope is "
70                 + "<a href='http://www-01.sil.org/iso639-3/scope.asp#C' target='_blank'>Collection</a>.</li>\n"
71                 + "</ul>\n"
72                 + "<p><b>Caveats:</b> Only the wikidata containment for "
73                 + "<a href='http://unicode.org/reports/tr35/#unicode_language_subtag'>valid language codes</a> is used."
74                 + "The containment data is not complete: "
75                 + "if a language doesn't appear in the chart it could be an isolate, or just be missing data."
76                 + "The data doesn't completely match wikipedia’s; there are some patches for CLDR languages.</p>\n";
77     }
78 
79     Collator ENGLISH_ORDER = Collator.getInstance(ULocale.ENGLISH);
80 
81     @Override
writeContents(FormattedFileWriter pw)82     public void writeContents(FormattedFileWriter pw) throws IOException {
83 
84         Multimap<String, String> lg =
85                 CLDRConfig.getInstance().getSupplementalDataInfo().getLanguageGroups();
86 
87         TablePrinter tablePrinter =
88                 new TablePrinter()
89                         .addColumn(
90                                 "Language Group",
91                                 "class='source'",
92                                 CldrUtility.getDoubleLinkMsg(),
93                                 "class='source'",
94                                 true)
95                         .setBreakSpans(true)
96                         .addColumn("Name", "class='source'", null, "class='source'", true)
97                         .addColumn("St.", "class='source'", null, "class='source'", true)
98                         .addColumn("Contained", "class='source'", null, "class='target'", true)
99                         .setBreakSpans(true);
100 
101         show(lg, LocaleNames.MUL, tablePrinter);
102         pw.write(tablePrinter.toTable());
103     }
104 
show(Multimap<String, String> lg, String parent, TablePrinter tablePrinter)105     private void show(Multimap<String, String> lg, String parent, TablePrinter tablePrinter) {
106         Collection<String> children = lg.get(parent);
107         if (children == null || children.isEmpty()) {
108             return;
109         }
110         TreeSet<Pair<String, String>> nameAndCode =
111                 new TreeSet<>(
112                         new Comparator<Pair<String, String>>() {
113                             @Override
114                             public int compare(Pair<String, String> o1, Pair<String, String> o2) {
115                                 int diff = ENGLISH_ORDER.compare(o1.getFirst(), o2.getFirst());
116                                 if (diff != 0) {
117                                     return diff;
118                                 }
119                                 return o1.getSecond().compareTo(o2.getSecond());
120                             }
121                         });
122         for (String lang : children) {
123             nameAndCode.add(Pair.of(getLangName(lang), lang));
124         }
125         StringBuilder treeList = new StringBuilder();
126         StringBuilder leafList = new StringBuilder();
127         LinkedHashSet<Pair<String, String>> nameAndCodeWithChildren = new LinkedHashSet<>();
128         for (Pair<String, String> pair : nameAndCode) {
129             String code = pair.getSecond();
130             if (lg.containsKey(code)) {
131                 addChildren(treeList, TREE_NODES, pair, false);
132                 nameAndCodeWithChildren.add(pair);
133             } else if (!code.equals("und")) {
134                 addChildren(leafList, LEAF_NODES, pair, true);
135             }
136         }
137         if (treeList.length() != 0) {
138             addRow(parent, tablePrinter, TREE_NODES, treeList);
139         }
140         if (leafList.length() != 0) {
141             addRow(parent, tablePrinter, LEAF_NODES, leafList);
142         }
143 
144         for (Pair<String, String> pair : nameAndCodeWithChildren) {
145             show(lg, pair.getSecond(), tablePrinter);
146         }
147     }
148 
addRow( String parent, TablePrinter tablePrinter, String marker, StringBuilder treeList)149     private void addRow(
150             String parent, TablePrinter tablePrinter, String marker, StringBuilder treeList) {
151         tablePrinter
152                 .addRow()
153                 .addCell(parent)
154                 .addCell(getLangName(parent))
155                 .addCell(marker)
156                 .addCell(treeList.toString())
157                 .finishRow();
158     }
159 
addChildren( StringBuilder treeList, String marker, Pair<String, String> pair, boolean showCollections)160     private void addChildren(
161             StringBuilder treeList,
162             String marker,
163             Pair<String, String> pair,
164             boolean showCollections) {
165         if (treeList.length() != 0) {
166             treeList.append("; ");
167         }
168         treeList.append(getPairName(pair, showCollections));
169     }
170 
getPairName(Pair<String, String> pair, boolean showCollection)171     private String getPairName(Pair<String, String> pair, boolean showCollection) {
172         return (showCollection && COLLECTIONS.contains(pair.getSecond())
173                         ? SHOULD_NOT_BE_LEAF_NODE + " "
174                         : "")
175                 + pair.getSecond()
176                 + " “"
177                 + pair.getFirst()
178                 + "”";
179     }
180 
getLangName(String langCode)181     private String getLangName(String langCode) {
182         return langCode.equals(LocaleNames.MUL)
183                 ? "All"
184                 : langCode.equals("zh")
185                         ? "Mandarin Chinese"
186                         : ENGLISH.getName(CLDRFile.LANGUAGE_NAME, langCode)
187                                 .replace(" (Other)", "")
188                                 .replace(" languages", "");
189     }
190 }
191