xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CompareEmoji.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.base.Splitter;
5 import com.google.common.collect.ImmutableSet;
6 import com.google.common.collect.Sets;
7 import com.ibm.icu.text.Collator;
8 import java.io.BufferedReader;
9 import java.io.File;
10 import java.io.IOException;
11 import java.util.HashMap;
12 import java.util.Map;
13 import java.util.Set;
14 import java.util.TreeSet;
15 import org.unicode.cldr.draft.FileUtilities;
16 import org.unicode.cldr.util.CLDRConfig;
17 import org.unicode.cldr.util.CLDRFile;
18 import org.unicode.cldr.util.CLDRPaths;
19 import org.unicode.cldr.util.Emoji;
20 import org.unicode.cldr.util.Factory;
21 import org.unicode.cldr.util.SimpleFactory;
22 import org.unicode.cldr.util.XPathParts;
23 
24 public class CompareEmoji {
25     private static final Splitter BAR_SPLITTER = Splitter.on("|").trimResults().omitEmptyStrings();
26     static final CLDRConfig CONFIG = CLDRConfig.getInstance();
27     static final Factory FACTORY = CONFIG.getAnnotationsFactory();
28     private static final File[] paths = {new File(CLDRPaths.ANNOTATIONS_DERIVED_DIRECTORY)};
29     static final Factory FACTORY_DERIVED = SimpleFactory.make(paths, ".*");
30 
31     private static final Joiner BAR_JOINER = Joiner.on(" | ");
32     private static final Collator collator = CLDRConfig.getInstance().getCollator();
33     private static final String base =
34             "/Users/markdavis/github/private/DATA/cldr-private/emoji_diff/";
35     private static final Set<String> sorted =
36             ImmutableSet.copyOf(Emoji.getAllRgi().addAllTo(new TreeSet<>(collator)));
37 
38     enum Status {
39         regular,
40         constructed,
41         missing;
42 
abbreviation()43         char abbreviation() {
44             return Character.toUpperCase(name().charAt(0));
45         }
46     }
47 
48     private static class EmojiData {
49         String shortName;
50         Set<String> searchKeywords;
51         Status status;
52 
53         @Override
toString()54         public String toString() {
55             return shortName + "; " + searchKeywords + "; " + status;
56         }
57     }
58 
main(String[] args)59     public static void main(String[] args) throws IOException {
60         final String locale = "zh_Hant";
61 
62         Map<String, EmojiData> annotations = getDataFor(locale);
63 
64         Map<String, Set<String>> removed = loadItems(locale, "_removed.csv", new HashMap<>());
65         Map<String, Set<String>> added = loadItems(locale, "_added.csv", new HashMap<>());
66 
67         int count = 0;
68         System.out.println("No.\tEmoji\tType\tName\tCommon\tRemoved\tAdded");
69         for (String key : sorted) {
70             String minimal = key.replace(Emoji.EMOJI_VARIANT, "");
71             EmojiData v = annotations.get(minimal);
72             Set<String> commonSet;
73             String shortName;
74             Status status;
75             if (v == null) {
76                 commonSet = Set.of();
77                 shortName = "<constructed>";
78                 status = Status.missing;
79             } else {
80                 commonSet = v.searchKeywords;
81                 shortName = v.shortName;
82                 status = v.status;
83             }
84 
85             Set<String> removedSet = removed.get(key);
86             Set<String> addedSet = added.get(key);
87             if (removedSet == null && addedSet == null) {
88                 continue;
89             }
90             if (removedSet != null) {
91                 commonSet = Sets.difference(commonSet, removedSet);
92             }
93             System.out.println(
94                     ++count //
95                             + "\t"
96                             + key //
97                             + "\t"
98                             + status.abbreviation() //
99                             + "\t"
100                             + shortName //
101                             + "\t"
102                             + BAR_JOINER.join(commonSet) //
103                             + "\t"
104                             + (removedSet == null ? "" : BAR_JOINER.join(removedSet)) //
105                             + "\t"
106                             + (addedSet == null ? "" : BAR_JOINER.join(addedSet)) //
107                     );
108         }
109     }
110 
getDataFor(String locale)111     private static Map<String, EmojiData> getDataFor(String locale) {
112         Map<String, EmojiData> result = new HashMap<>();
113         CLDRFile cldrfile = FACTORY.make(locale, true);
114         getDataIn(cldrfile, result, Status.regular);
115         CLDRFile cldrfileDerived = FACTORY_DERIVED.make(locale, true);
116         getDataIn(cldrfileDerived, result, Status.constructed);
117         return result;
118     }
119 
getDataIn(CLDRFile cldrfile, Map<String, EmojiData> result, Status status)120     public static void getDataIn(CLDRFile cldrfile, Map<String, EmojiData> result, Status status) {
121         for (String path : cldrfile) {
122             XPathParts parts = XPathParts.getFrozenInstance(path);
123             String cp = parts.getAttributeValue(-1, "cp");
124             if (cp == null) {
125                 continue;
126             }
127             EmojiData record = result.get(cp);
128             if (record == null) {
129                 result.put(cp, record = new EmojiData());
130                 record.status = status;
131             }
132             boolean istts = parts.getAttributeValue(-1, "type") != null;
133             String value = cldrfile.getStringValue(path);
134             if (istts) {
135                 record.shortName = value;
136             } else {
137                 record.searchKeywords = ImmutableSet.copyOf(BAR_SPLITTER.splitToList(value));
138             }
139         }
140     }
141 
loadItems( String locale, String suffix, Map<String, Set<String>> result)142     public static Map<String, Set<String>> loadItems(
143             String locale, String suffix, Map<String, Set<String>> result) throws IOException {
144         try (BufferedReader reader = FileUtilities.openUTF8Reader(base, locale + suffix)) {
145             while (true) {
146                 String line = reader.readLine();
147                 if (line == null) {
148                     return result;
149                 }
150                 if (line.startsWith("Emoji,")) {
151                     continue;
152                 }
153                 String[] split = FileUtilities.splitCommaSeparated(line);
154                 if (split.length < 2) {
155                     continue;
156                 }
157                 String key = split[0];
158                 Set<String> values = new TreeSet<>(collator);
159                 for (int i = 1; i < split.length; ++i) {
160                     values.add(split[i]);
161                 }
162                 values = ImmutableSet.copyOf(values);
163                 result.put(key, values);
164             }
165         }
166     }
167 }
168