1 package org.unicode.cldr.tool; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.base.Splitter; 5 import com.google.common.collect.ImmutableSet; 6 import com.google.common.collect.Sets; 7 import com.ibm.icu.text.Collator; 8 import java.io.BufferedReader; 9 import java.io.File; 10 import java.io.IOException; 11 import java.util.HashMap; 12 import java.util.Map; 13 import java.util.Set; 14 import java.util.TreeSet; 15 import org.unicode.cldr.draft.FileUtilities; 16 import org.unicode.cldr.util.CLDRConfig; 17 import org.unicode.cldr.util.CLDRFile; 18 import org.unicode.cldr.util.CLDRPaths; 19 import org.unicode.cldr.util.Emoji; 20 import org.unicode.cldr.util.Factory; 21 import org.unicode.cldr.util.SimpleFactory; 22 import org.unicode.cldr.util.XPathParts; 23 24 public class CompareEmoji { 25 private static final Splitter BAR_SPLITTER = Splitter.on("|").trimResults().omitEmptyStrings(); 26 static final CLDRConfig CONFIG = CLDRConfig.getInstance(); 27 static final Factory FACTORY = CONFIG.getAnnotationsFactory(); 28 private static final File[] paths = {new File(CLDRPaths.ANNOTATIONS_DERIVED_DIRECTORY)}; 29 static final Factory FACTORY_DERIVED = SimpleFactory.make(paths, ".*"); 30 31 private static final Joiner BAR_JOINER = Joiner.on(" | "); 32 private static final Collator collator = CLDRConfig.getInstance().getCollator(); 33 private static final String base = 34 "/Users/markdavis/github/private/DATA/cldr-private/emoji_diff/"; 35 private static final Set<String> sorted = 36 ImmutableSet.copyOf(Emoji.getAllRgi().addAllTo(new TreeSet<>(collator))); 37 38 enum Status { 39 regular, 40 constructed, 41 missing; 42 abbreviation()43 char abbreviation() { 44 return Character.toUpperCase(name().charAt(0)); 45 } 46 } 47 48 private static class EmojiData { 49 String shortName; 50 Set<String> searchKeywords; 51 Status status; 52 53 @Override toString()54 public String toString() { 55 return shortName + "; " + searchKeywords + "; " + status; 56 } 57 } 58 main(String[] args)59 public static void main(String[] args) throws IOException { 60 final String locale = "zh_Hant"; 61 62 Map<String, EmojiData> annotations = getDataFor(locale); 63 64 Map<String, Set<String>> removed = loadItems(locale, "_removed.csv", new HashMap<>()); 65 Map<String, Set<String>> added = loadItems(locale, "_added.csv", new HashMap<>()); 66 67 int count = 0; 68 System.out.println("No.\tEmoji\tType\tName\tCommon\tRemoved\tAdded"); 69 for (String key : sorted) { 70 String minimal = key.replace(Emoji.EMOJI_VARIANT, ""); 71 EmojiData v = annotations.get(minimal); 72 Set<String> commonSet; 73 String shortName; 74 Status status; 75 if (v == null) { 76 commonSet = Set.of(); 77 shortName = "<constructed>"; 78 status = Status.missing; 79 } else { 80 commonSet = v.searchKeywords; 81 shortName = v.shortName; 82 status = v.status; 83 } 84 85 Set<String> removedSet = removed.get(key); 86 Set<String> addedSet = added.get(key); 87 if (removedSet == null && addedSet == null) { 88 continue; 89 } 90 if (removedSet != null) { 91 commonSet = Sets.difference(commonSet, removedSet); 92 } 93 System.out.println( 94 ++count // 95 + "\t" 96 + key // 97 + "\t" 98 + status.abbreviation() // 99 + "\t" 100 + shortName // 101 + "\t" 102 + BAR_JOINER.join(commonSet) // 103 + "\t" 104 + (removedSet == null ? "" : BAR_JOINER.join(removedSet)) // 105 + "\t" 106 + (addedSet == null ? "" : BAR_JOINER.join(addedSet)) // 107 ); 108 } 109 } 110 getDataFor(String locale)111 private static Map<String, EmojiData> getDataFor(String locale) { 112 Map<String, EmojiData> result = new HashMap<>(); 113 CLDRFile cldrfile = FACTORY.make(locale, true); 114 getDataIn(cldrfile, result, Status.regular); 115 CLDRFile cldrfileDerived = FACTORY_DERIVED.make(locale, true); 116 getDataIn(cldrfileDerived, result, Status.constructed); 117 return result; 118 } 119 getDataIn(CLDRFile cldrfile, Map<String, EmojiData> result, Status status)120 public static void getDataIn(CLDRFile cldrfile, Map<String, EmojiData> result, Status status) { 121 for (String path : cldrfile) { 122 XPathParts parts = XPathParts.getFrozenInstance(path); 123 String cp = parts.getAttributeValue(-1, "cp"); 124 if (cp == null) { 125 continue; 126 } 127 EmojiData record = result.get(cp); 128 if (record == null) { 129 result.put(cp, record = new EmojiData()); 130 record.status = status; 131 } 132 boolean istts = parts.getAttributeValue(-1, "type") != null; 133 String value = cldrfile.getStringValue(path); 134 if (istts) { 135 record.shortName = value; 136 } else { 137 record.searchKeywords = ImmutableSet.copyOf(BAR_SPLITTER.splitToList(value)); 138 } 139 } 140 } 141 loadItems( String locale, String suffix, Map<String, Set<String>> result)142 public static Map<String, Set<String>> loadItems( 143 String locale, String suffix, Map<String, Set<String>> result) throws IOException { 144 try (BufferedReader reader = FileUtilities.openUTF8Reader(base, locale + suffix)) { 145 while (true) { 146 String line = reader.readLine(); 147 if (line == null) { 148 return result; 149 } 150 if (line.startsWith("Emoji,")) { 151 continue; 152 } 153 String[] split = FileUtilities.splitCommaSeparated(line); 154 if (split.length < 2) { 155 continue; 156 } 157 String key = split[0]; 158 Set<String> values = new TreeSet<>(collator); 159 for (int i = 1; i < split.length; ++i) { 160 values.add(split[i]); 161 } 162 values = ImmutableSet.copyOf(values); 163 result.put(key, values); 164 } 165 } 166 } 167 } 168