1 package org.unicode.cldr.tool; 2 3 import com.google.common.collect.ImmutableSet; 4 import com.google.common.collect.ImmutableSet.Builder; 5 import com.google.common.collect.Multimap; 6 import com.ibm.icu.text.Collator; 7 import com.ibm.icu.util.ULocale; 8 import java.io.IOException; 9 import java.util.Collection; 10 import java.util.Comparator; 11 import java.util.LinkedHashSet; 12 import java.util.Map; 13 import java.util.Map.Entry; 14 import java.util.Set; 15 import java.util.TreeSet; 16 import org.unicode.cldr.util.*; 17 import org.unicode.cldr.util.StandardCodes.LstrField; 18 import org.unicode.cldr.util.StandardCodes.LstrType; 19 20 public class ChartLanguageGroups extends Chart { 21 22 private static final String SHOULD_NOT_BE_LEAF_NODE = ""; 23 private static final String LEAF_NODES = ""; 24 private static final String TREE_NODES = ""; 25 main(String[] args)26 public static void main(String[] args) { 27 new ChartLanguageGroups().writeChart(null); 28 } 29 30 static final Set<String> COLLECTIONS; 31 32 static { 33 Map<String, Map<LstrField, String>> languages = 34 StandardCodes.getEnumLstreg().get(LstrType.language); 35 Builder<String> _collections = ImmutableSet.<String>builder(); 36 for (Entry<String, Map<LstrField, String>> e : languages.entrySet()) { 37 String scope = e.getValue().get(LstrField.Scope); 38 if (scope != null && "Collection".equalsIgnoreCase(scope)) { e.getKey()39 _collections.add(e.getKey()); 40 } 41 } 42 COLLECTIONS = _collections.build(); 43 } 44 45 @Override getDirectory()46 public String getDirectory() { 47 return FormattedFileWriter.CHART_TARGET_DIR; 48 } 49 50 @Override getTitle()51 public String getTitle() { 52 return "Language Groups"; 53 } 54 55 @Override getExplanation()56 public String getExplanation() { 57 return "<p>This chart shows draft language groups based on data extracted from wikidata. " 58 + "The <b>Status</b> cell indicates the nature of the items in the adjacent <b>Contained</b> cell:<p>" 59 + "<ul>\n" 60 + "<li>A " 61 + TREE_NODES 62 + " indicates that the contained languages are tree nodes (contain other languages or langauge groups), " 63 + "and will be listed further down in the chart in a <b>Language Group</b> cell.</li>\n" 64 + "<li>A " 65 + LEAF_NODES 66 + " indicates that the contained languages are leaf nodes (contain nothing).</li>\n" 67 + "<li>A " 68 + SHOULD_NOT_BE_LEAF_NODE 69 + " before an item <i>in</i> a <b>Contained</b> cell indicates a leaf node that shouldn’t be — that is, its ISO 639 Scope is " 70 + "<a href='http://www-01.sil.org/iso639-3/scope.asp#C' target='_blank'>Collection</a>.</li>\n" 71 + "</ul>\n" 72 + "<p><b>Caveats:</b> Only the wikidata containment for " 73 + "<a href='http://unicode.org/reports/tr35/#unicode_language_subtag'>valid language codes</a> is used." 74 + "The containment data is not complete: " 75 + "if a language doesn't appear in the chart it could be an isolate, or just be missing data." 76 + "The data doesn't completely match wikipedia’s; there are some patches for CLDR languages.</p>\n"; 77 } 78 79 Collator ENGLISH_ORDER = Collator.getInstance(ULocale.ENGLISH); 80 81 @Override writeContents(FormattedFileWriter pw)82 public void writeContents(FormattedFileWriter pw) throws IOException { 83 84 Multimap<String, String> lg = 85 CLDRConfig.getInstance().getSupplementalDataInfo().getLanguageGroups(); 86 87 TablePrinter tablePrinter = 88 new TablePrinter() 89 .addColumn( 90 "Language Group", 91 "class='source'", 92 CldrUtility.getDoubleLinkMsg(), 93 "class='source'", 94 true) 95 .setBreakSpans(true) 96 .addColumn("Name", "class='source'", null, "class='source'", true) 97 .addColumn("St.", "class='source'", null, "class='source'", true) 98 .addColumn("Contained", "class='source'", null, "class='target'", true) 99 .setBreakSpans(true); 100 101 show(lg, LocaleNames.MUL, tablePrinter); 102 pw.write(tablePrinter.toTable()); 103 } 104 show(Multimap<String, String> lg, String parent, TablePrinter tablePrinter)105 private void show(Multimap<String, String> lg, String parent, TablePrinter tablePrinter) { 106 Collection<String> children = lg.get(parent); 107 if (children == null || children.isEmpty()) { 108 return; 109 } 110 TreeSet<Pair<String, String>> nameAndCode = 111 new TreeSet<>( 112 new Comparator<Pair<String, String>>() { 113 @Override 114 public int compare(Pair<String, String> o1, Pair<String, String> o2) { 115 int diff = ENGLISH_ORDER.compare(o1.getFirst(), o2.getFirst()); 116 if (diff != 0) { 117 return diff; 118 } 119 return o1.getSecond().compareTo(o2.getSecond()); 120 } 121 }); 122 for (String lang : children) { 123 nameAndCode.add(Pair.of(getLangName(lang), lang)); 124 } 125 StringBuilder treeList = new StringBuilder(); 126 StringBuilder leafList = new StringBuilder(); 127 LinkedHashSet<Pair<String, String>> nameAndCodeWithChildren = new LinkedHashSet<>(); 128 for (Pair<String, String> pair : nameAndCode) { 129 String code = pair.getSecond(); 130 if (lg.containsKey(code)) { 131 addChildren(treeList, TREE_NODES, pair, false); 132 nameAndCodeWithChildren.add(pair); 133 } else if (!code.equals("und")) { 134 addChildren(leafList, LEAF_NODES, pair, true); 135 } 136 } 137 if (treeList.length() != 0) { 138 addRow(parent, tablePrinter, TREE_NODES, treeList); 139 } 140 if (leafList.length() != 0) { 141 addRow(parent, tablePrinter, LEAF_NODES, leafList); 142 } 143 144 for (Pair<String, String> pair : nameAndCodeWithChildren) { 145 show(lg, pair.getSecond(), tablePrinter); 146 } 147 } 148 addRow( String parent, TablePrinter tablePrinter, String marker, StringBuilder treeList)149 private void addRow( 150 String parent, TablePrinter tablePrinter, String marker, StringBuilder treeList) { 151 tablePrinter 152 .addRow() 153 .addCell(parent) 154 .addCell(getLangName(parent)) 155 .addCell(marker) 156 .addCell(treeList.toString()) 157 .finishRow(); 158 } 159 addChildren( StringBuilder treeList, String marker, Pair<String, String> pair, boolean showCollections)160 private void addChildren( 161 StringBuilder treeList, 162 String marker, 163 Pair<String, String> pair, 164 boolean showCollections) { 165 if (treeList.length() != 0) { 166 treeList.append("; "); 167 } 168 treeList.append(getPairName(pair, showCollections)); 169 } 170 getPairName(Pair<String, String> pair, boolean showCollection)171 private String getPairName(Pair<String, String> pair, boolean showCollection) { 172 return (showCollection && COLLECTIONS.contains(pair.getSecond()) 173 ? SHOULD_NOT_BE_LEAF_NODE + " " 174 : "") 175 + pair.getSecond() 176 + " “" 177 + pair.getFirst() 178 + "”"; 179 } 180 getLangName(String langCode)181 private String getLangName(String langCode) { 182 return langCode.equals(LocaleNames.MUL) 183 ? "All" 184 : langCode.equals("zh") 185 ? "Mandarin Chinese" 186 : ENGLISH.getName(CLDRFile.LANGUAGE_NAME, langCode) 187 .replace(" (Other)", "") 188 .replace(" languages", ""); 189 } 190 } 191