1 package org.unicode.cldr.util; 2 3 import com.google.common.base.CharMatcher; 4 import com.google.common.base.Splitter; 5 import com.google.common.collect.ImmutableMap; 6 import com.google.common.collect.ImmutableSet; 7 import com.ibm.icu.dev.util.UnicodeMap; 8 import com.ibm.icu.impl.Utility; 9 import com.ibm.icu.lang.CharSequences; 10 import com.ibm.icu.text.UnicodeSet; 11 import com.ibm.icu.util.ICUException; 12 import java.util.ArrayList; 13 import java.util.HashMap; 14 import java.util.Iterator; 15 import java.util.LinkedHashMap; 16 import java.util.LinkedHashSet; 17 import java.util.List; 18 import java.util.Map; 19 import java.util.Map.Entry; 20 import java.util.Set; 21 import org.unicode.cldr.draft.FileUtilities; 22 import org.unicode.cldr.util.PathHeader.PageId; 23 24 public class Emoji { 25 public static final String EMOJI_VARIANT = "\uFE0F"; 26 public static final String COMBINING_ENCLOSING_KEYCAP = "\u20E3"; 27 public static final String ZWJ = "\u200D"; 28 public static final UnicodeSet REGIONAL_INDICATORS = new UnicodeSet(0x1F1E6, 0x1F1FF).freeze(); 29 public static final UnicodeSet MODIFIERS = new UnicodeSet("[-]").freeze(); 30 public static final UnicodeSet TAGS = new UnicodeSet(0xE0000, 0xE007F).freeze(); 31 public static final UnicodeSet FAMILY = new UnicodeSet("[\u200D - ❤]").freeze(); 32 public static final UnicodeSet GENDER = new UnicodeSet().add(0x2640).add(0x2642).freeze(); 33 public static final UnicodeSet SPECIALS = 34 new UnicodeSet( 35 "[" 36 + "{⬛}{❄}{}{}{}{}{}{} {} {☠} {} {} {} {} {} {} {}" 37 + "{⚧}{⚕}{⚖}{✈}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}" 38 + "{❤}, {❤}, {}, {}" // #E13.1 39 + "]") 40 .freeze(); 41 // May have to add from above, if there is a failure in testAnnotationPaths. Failure will be 42 // like: 43 // got java.util.TreeSet<[//ldml/annotations/annotation[@cp="⚧"][@type="tts"], 44 // //ldml/annotations/annotation[@cp="⚕"][@type="tts"], ... 45 // just extract the items in "...", and change into {...} for adding above. 46 // Example: //ldml/annotations/annotation[@cp="⚕"] ==> {⚕} 47 public static final UnicodeSet MAN_WOMAN = new UnicodeSet("[ ]").freeze(); 48 public static final UnicodeSet OBJECT = 49 new UnicodeSet("[ ✈ ⚖ ⚕]").freeze(); 50 51 static final UnicodeMap<String> emojiToMajorCategory = new UnicodeMap<>(); 52 static final UnicodeMap<String> emojiToMinorCategory = new UnicodeMap<>(); 53 static final UnicodeMap<String> toName = new UnicodeMap<>(); 54 55 static { 56 emojiToMajorCategory.setErrorOnReset(true); 57 emojiToMinorCategory.setErrorOnReset(true); 58 toName.setErrorOnReset(true); 59 } 60 /** 61 * A mapping from a majorCategory to a unique ordering number, based on the first time it is 62 * encountered. 63 */ 64 static final Map<String, Long> majorToOrder = new HashMap<>(); 65 /** 66 * A mapping from a minorCategory to a unique ordering number, based on the first time it is 67 * encountered. 68 */ 69 static final Map<String, Long> minorToOrder = new HashMap<>(); 70 71 static final Map<String, Long> emojiToOrder = new LinkedHashMap<>(); 72 static final UnicodeSet nonConstructed = new UnicodeSet(); 73 static final UnicodeSet allRgi = new UnicodeSet(); 74 static final UnicodeSet allRgiNoES = new UnicodeSet(); 75 76 static { 77 /* 78 * Example from emoji-test.txt: 79 * # group: Smileys & Emotion 80 * # subgroup: face-smiling 81 * 1F600 ; fully-qualified # grinning face 82 */ 83 Splitter semi = Splitter.on(CharMatcher.anyOf(";#")).trimResults(); 84 String majorCategory = null; 85 String minorCategory = null; 86 for (String line : FileUtilities.in(Emoji.class, "data/emoji/emoji-test.txt")) { 87 if (line.startsWith("#")) { 88 line = line.substring(1).trim(); 89 if (line.startsWith("group:")) { 90 majorCategory = line.substring("group:".length()).trim(); majorToOrder.computeIfAbsent(majorCategory, k -> (long) majorToOrder.size())91 majorToOrder.computeIfAbsent(majorCategory, k -> (long) majorToOrder.size()); 92 } else if (line.startsWith("subgroup:")) { 93 minorCategory = line.substring("subgroup:".length()).trim(); minorToOrder.computeIfAbsent(minorCategory, k -> (long) minorToOrder.size())94 minorToOrder.computeIfAbsent(minorCategory, k -> (long) minorToOrder.size()); 95 } 96 continue; 97 } 98 line = line.trim(); 99 if (line.isEmpty()) { 100 continue; 101 } 102 Iterator<String> it = semi.split(line).iterator(); 103 104 String emojiHex = it.next(); 105 String original = Utility.fromHex(emojiHex, 4, " "); 106 String type = it.next(); 107 if (type.startsWith("fully-qualified")) { 108 allRgi.add(original); original.replace(Emoji.EMOJI_VARIANT, "")109 allRgiNoES.add(original.replace(Emoji.EMOJI_VARIANT, "")); 110 } emojiToMajorCategory.put(original, majorCategory)111 emojiToMajorCategory.put(original, majorCategory); emojiToMinorCategory.put(original, minorCategory)112 emojiToMinorCategory.put(original, minorCategory); 113 String comment = it.next(); 114 // The comment is now of the form: # E0.6 beaming face with smiling eyes 115 int spacePos = comment.indexOf(' '); 116 // The format changed in v15.1, so there is no version number. 117 // Thus the following is commented out: 118 // spacePos = comment.indexOf(' ', spacePos + 1); // get second space 119 String name = comment.substring(spacePos + 1).trim(); toName.put(original, name)120 toName.put(original, name); 121 122 // add all the non-constructed values to a set for annotations 123 124 String minimal = original.replace(EMOJI_VARIANT, ""); 125 126 // Add the order. If it is not minimal, add that also. 127 if (!emojiToOrder.containsKey(original)) { putUnique(emojiToOrder, original, emojiToOrder.size() * 100L)128 putUnique(emojiToOrder, original, emojiToOrder.size() * 100L); 129 } 130 if (!emojiToOrder.containsKey(minimal)) { putUnique(emojiToOrder, minimal, emojiToOrder.size() * 100L)131 putUnique(emojiToOrder, minimal, emojiToOrder.size() * 100L); 132 } 133 134 boolean singleton = CharSequences.getSingleCodePoint(minimal) != Integer.MAX_VALUE; 135 136 // skip constructed values 137 if (minimal.contains(COMBINING_ENCLOSING_KEYCAP) 138 || REGIONAL_INDICATORS.containsSome(minimal) 139 || TAGS.containsSome(minimal) 140 || !singleton && MODIFIERS.containsSome(minimal) 141 || !singleton && FAMILY.containsAll(minimal)) { 142 // do nothing 143 } else if (minimal.contains(ZWJ)) { // only do certain ZWJ sequences 144 if (SPECIALS.contains(minimal) 145 || GENDER.containsSome(minimal) 146 || MAN_WOMAN.contains(minimal.codePointAt(0)) 147 && OBJECT.contains(minimal.codePointBefore(minimal.length()))) { 148 nonConstructed.add(minimal); 149 } 150 } else if (!minimal.contains("")) { 151 nonConstructed.add(minimal); 152 } 153 } emojiToMajorCategory.freeze()154 emojiToMajorCategory.freeze(); emojiToMinorCategory.freeze()155 emojiToMinorCategory.freeze(); 156 nonConstructed.add(MODIFIERS); // needed for names nonConstructed.freeze()157 nonConstructed.freeze(); toName.freeze()158 toName.freeze(); allRgi.freeze()159 allRgi.freeze(); allRgiNoES.freeze()160 allRgiNoES.freeze(); 161 } 162 putUnique(Map<K, V> map, K key, V value)163 private static <K, V> void putUnique(Map<K, V> map, K key, V value) { 164 V oldValue = map.put(key, value); 165 if (oldValue != null) { 166 throw new ICUException( 167 "Attempt to change value of " 168 + map 169 + " for " 170 + key 171 + " from " 172 + oldValue 173 + " to " 174 + value); 175 } 176 } 177 getAllRgi()178 public static UnicodeSet getAllRgi() { 179 return allRgi; 180 } 181 getAllRgiNoES()182 public static UnicodeSet getAllRgiNoES() { 183 return allRgiNoES; 184 } 185 186 public static final UnicodeMap<String> EXTRA_SYMBOL_MINOR_CATEGORIES = new UnicodeMap<>(); 187 public static final Map<String, Long> EXTRA_SYMBOL_ORDER; 188 private static final boolean DEBUG = false; 189 190 static { 191 String[][] data = { 192 {"arrow", "→ ↓ ↑ ← ↔ ↕ ⇆ ⇅"}, 193 {"alphanum", "© ® ℗ ™ µ"}, 194 {"geometric", "▼ ▶ ▲ ◀ ● ○ ◯ ◊"}, 195 {"math", "× ÷ √ ∞ ∆ ∇ ⁻ ¹ ² ³ ≡ ∈ ⊂ ∩ ∪ ° + ± − = ≈ ≠ > < ≤ ≥ ¬ | ~"}, 196 { 197 "punctuation", 198 "§ † ‡ \\u0020 , 、 ، ; : ؛ ! ¡ ? ¿ ؟ ¶ ※ / \\ & # % ‰ ′ ″ ‴ @ * ♪ ♭ ♯ ` ´ ^ ¨ ‐ ― _ - – — • · . … 。 ‧ ・ ‘ ’ ‚ ' “ ” „ » « ( ) [ ] { } 〔 〕 〈 〉 《 》 「 」 『 』 〖 〗 【 】" 199 }, 200 {"currency", "€ £ ¥ ₹ ₽ $ ¢ ฿ ₪ ₺ ₫ ₱ ₩ ₡ ₦ ₮ ৳ ₴ ₸ ₲ ₵ ៛ ₭ ֏ ₥ ₾ ₼ ₿ ؋ ₧ ¤"}, 201 { 202 "other-symbol", 203 "‾‽‸⁂↚↛↮↙↜↝↞↟↠↡↢↣↤↥↦↧↨↫↬↭↯↰↱↲↳↴↵↶↷↸↹↺↻↼↽↾↿⇀⇁⇂⇃⇄⇇⇈⇉⇊⇋⇌⇐⇍⇑⇒⇏⇓⇔⇎⇖⇗⇘⇙⇚⇛⇜⇝⇞⇟⇠⇡⇢⇣⇤⇥⇦⇧⇨⇩⇪⇵∀∂∃∅∉∋∎∏∑≮≯∓∕⁄∗∘∙∝∟∠∣∥∧∫∬∮∴∵∶∷∼∽∾≃≅≌≒≖≣≦≧≪≫≬≳≺≻⊁⊃⊆⊇⊕⊖⊗⊘⊙⊚⊛⊞⊟⊥⊮⊰⊱⋭⊶⊹⊿⋁⋂⋃⋅⋆⋈⋒⋘⋙⋮⋯⋰⋱■□▢▣▤▥▦▧▨▩▬▭▮▰△▴▵▷▸▹►▻▽▾▿◁◂◃◄◅◆◇◈◉◌◍◎◐◑◒◓◔◕◖◗◘◙◜◝◞◟◠◡◢◣◤◥◦◳◷◻◽◿⨧⨯⨼⩣⩽⪍⪚⪺₢₣₤₰₳₶₷₨﷼" 204 }, 205 }; 206 // get the maximum suborder for each subcategory 207 Map<String, Long> subcategoryToMaxSuborder = new HashMap<>(); 208 for (String[] row : data) { 209 final String subcategory = row[0]; 210 for (Entry<String, String> entry : emojiToMinorCategory.entrySet()) { 211 if (entry.getValue().equals(subcategory)) { 212 String emoji = entry.getKey(); 213 Long order = emojiToOrder.get(emoji); 214 Long currentMax = subcategoryToMaxSuborder.get(subcategory); 215 if (currentMax == null || currentMax < order) { subcategoryToMaxSuborder.put(subcategory, order)216 subcategoryToMaxSuborder.put(subcategory, order); 217 } 218 } 219 } 220 } 221 if (DEBUG) System.out.println(subcategoryToMaxSuborder); 222 Map<String, Long> _EXTRA_SYMBOL_ORDER = new LinkedHashMap<>(); 223 for (String[] row : data) { 224 final String subcategory = row[0]; 225 final String characters = row[1]; 226 227 List<String> items = new ArrayList<>(); 228 for (int cp : With.codePointArray(characters)) { 229 if (cp != ' ') { With.fromCodePoint(cp)230 items.add(With.fromCodePoint(cp)); 231 } 232 } 233 final UnicodeSet uset = new UnicodeSet().addAll(items); 234 if (uset.containsSome(EXTRA_SYMBOL_MINOR_CATEGORIES.keySet())) { 235 throw new IllegalArgumentException( 236 "Duplicate values in " + EXTRA_SYMBOL_MINOR_CATEGORIES); 237 } EXTRA_SYMBOL_MINOR_CATEGORIES.putAll(uset, subcategory)238 EXTRA_SYMBOL_MINOR_CATEGORIES.putAll(uset, subcategory); 239 long count = subcategoryToMaxSuborder.get(subcategory); 240 for (String s : items) { 241 ++count; _EXTRA_SYMBOL_ORDER.put(s, count)242 _EXTRA_SYMBOL_ORDER.put(s, count); 243 } subcategoryToMaxSuborder.put(subcategory, count)244 subcategoryToMaxSuborder.put(subcategory, count); 245 } 246 if (DEBUG) System.out.println(_EXTRA_SYMBOL_ORDER); EXTRA_SYMBOL_MINOR_CATEGORIES.freeze()247 EXTRA_SYMBOL_MINOR_CATEGORIES.freeze(); 248 EXTRA_SYMBOL_ORDER = ImmutableMap.copyOf(_EXTRA_SYMBOL_ORDER); 249 } 250 getMinorCategory(String emoji)251 public static String getMinorCategory(String emoji) { 252 String minorCat = emojiToMinorCategory.get(emoji); 253 if (minorCat == null) { 254 minorCat = EXTRA_SYMBOL_MINOR_CATEGORIES.get(emoji); 255 if (minorCat == null) { 256 throw new InternalCldrException( 257 "No minor category (aka subgroup) found for " 258 + emoji 259 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"minor\", ..."); 260 } 261 } 262 return minorCat; 263 } 264 getName(String emoji)265 public static String getName(String emoji) { 266 return toName.get(emoji); 267 } 268 getEmojiToOrder(String emoji)269 public static long getEmojiToOrder(String emoji) { 270 Long result = emojiToOrder.get(emoji); 271 if (result == null) { 272 result = EXTRA_SYMBOL_ORDER.get(emoji); 273 if (result == null) { 274 throw new InternalCldrException( 275 "No Order found for " 276 + emoji 277 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"minor\", ..."); 278 } 279 } 280 return result; 281 } 282 getEmojiMinorOrder(String minor)283 public static long getEmojiMinorOrder(String minor) { 284 Long result = minorToOrder.get(minor); 285 if (result == null) { 286 throw new InternalCldrException( 287 "No minor category (aka subgroup) found for " 288 + minor 289 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"minor\", ..."); 290 } 291 return result; 292 } 293 getMajorCategory(String emoji)294 public static String getMajorCategory(String emoji) { 295 String majorCat = emojiToMajorCategory.get(emoji); 296 if (majorCat == null) { 297 if (EXTRA_SYMBOL_MINOR_CATEGORIES.containsKey(emoji)) { 298 majorCat = "Symbols"; 299 } else { 300 throw new InternalCldrException( 301 "No minor category (aka subgroup) found for " 302 + emoji 303 + ". Update emoji-test.txt to latest, and adjust PathHeader.. functionMap.put(\"major\", ..."); 304 } 305 } 306 return majorCat; 307 } 308 getMinorCategoriesWithExtras()309 public static Set<String> getMinorCategoriesWithExtras() { 310 Set<String> result = new LinkedHashSet<>(emojiToMinorCategory.values()); 311 result.addAll(EXTRA_SYMBOL_MINOR_CATEGORIES.getAvailableValues()); 312 return ImmutableSet.copyOf(result); 313 } 314 getEmojiInMinorCategoriesWithExtras(String minorCategory)315 public static UnicodeSet getEmojiInMinorCategoriesWithExtras(String minorCategory) { 316 return new UnicodeSet(emojiToMinorCategory.getSet(minorCategory)) 317 .addAll(EXTRA_SYMBOL_MINOR_CATEGORIES.getSet(minorCategory)) 318 .freeze(); 319 } 320 getNonConstructed()321 public static UnicodeSet getNonConstructed() { 322 return nonConstructed; 323 } 324 325 private static Set<String> NAME_PATHS = null; 326 public static final String TYPE_TTS = "[@type=\"tts\"]"; 327 getNamePaths()328 public static synchronized Set<String> getNamePaths() { 329 return NAME_PATHS != null ? NAME_PATHS : (NAME_PATHS = buildPaths(TYPE_TTS)); 330 } 331 buildPaths(String suffix)332 private static ImmutableSet<String> buildPaths(String suffix) { 333 ImmutableSet.Builder<String> builder = ImmutableSet.builder(); 334 for (String s : Emoji.getNonConstructed()) { 335 String base = "//ldml/annotations/annotation[@cp=\"" + s + "\"]" + suffix; 336 builder.add(base); 337 } 338 return builder.build(); 339 } 340 341 /** 342 * Return the PageId for the given emoji, making adjustments for pages that are united in 343 * emoji-test.txt but divided in Survey Tool, such as Symbols, Symbols2, and Symbols3 344 * 345 * @param emoji the emoji as a string 346 * @return the adjusted PageId 347 */ getPageId(String emoji)348 public static PageId getPageId(String emoji) { 349 final String major = getMajorCategory(emoji); 350 final String minor = getMinorCategory(emoji); 351 final PageId pageId = PageId.forString(major); 352 final Long minorOrder = minorToOrder.get(minor); 353 switch (pageId) { 354 case Objects: 355 return (minorOrder < minorToOrder.get("money")) ? PageId.Objects : PageId.Objects2; 356 case People: 357 return (minorOrder < minorToOrder.get("person-fantasy")) 358 ? PageId.People 359 : PageId.People2; 360 case Symbols: 361 return (minorOrder < minorToOrder.get("transport-sign")) 362 ? PageId.Symbols 363 : PageId.EmojiSymbols; 364 case Travel_Places: 365 return (minorOrder < minorToOrder.get("transport-ground")) 366 ? PageId.Travel_Places 367 : PageId.Travel_Places2; 368 default: 369 return pageId; 370 } 371 } 372 } 373