1 package org.unicode.cldr.util; 2 3 import java.nio.charset.StandardCharsets; 4 import java.security.MessageDigest; 5 import java.util.Map; 6 import java.util.concurrent.ConcurrentHashMap; 7 8 /** 9 * Produce an ID for a string based on a long hash. When used properly, the odds of collision are so 10 * low that the ID can be used as a proxy for the original string. The ID is non-negative. The 11 * algorithm uses SHA-1 over the UTF-8 bytes in the string. Also provides lookup for long previously 12 * generated for string. 13 * 14 * @author markdavis 15 */ 16 public final class StringId { 17 private static final Map<String, Long> STRING_TO_ID = new ConcurrentHashMap<>(); 18 private static final Map<Long, String> ID_TO_STRING = new ConcurrentHashMap<>(); 19 private static final MessageDigest digest; 20 private static final int RETRY_LIMIT = 9; 21 22 static { 23 try { 24 digest = MessageDigest.getInstance("SHA-1"); 25 } catch (Exception e) { 26 throw new IllegalArgumentException(e); // darn'd checked exceptions 27 } 28 } 29 30 /** 31 * Get the ID for a string. 32 * 33 * @param string input string. 34 * @return a value from 0 to 0x7FFFFFFFFFFFFFFFL. 35 */ getId(CharSequence charSequence)36 public static long getId(CharSequence charSequence) { 37 String string = charSequence.toString(); 38 Long resultLong = STRING_TO_ID.get(string); 39 if (resultLong != null) { 40 return resultLong; 41 } 42 int retryCount = RETRY_LIMIT; 43 while (true) { 44 try { 45 synchronized (digest) { 46 byte[] hash = digest.digest(string.getBytes(StandardCharsets.UTF_8)); 47 long result = 0; 48 for (int i = 0; i < 8; ++i) { 49 result <<= 8; 50 result ^= hash[i]; 51 } 52 // mash the top bit to make things easier 53 result &= 0x7FFFFFFFFFFFFFFFL; 54 STRING_TO_ID.put(string, result); 55 ID_TO_STRING.put(result, string); 56 return result; 57 } 58 } catch (RuntimeException e) { 59 if (--retryCount < 0) { 60 throw e; 61 } 62 } 63 } 64 } 65 66 /** 67 * Get the hex ID for a string. 68 * 69 * @param string input string. 70 * @return a string with the hex value 71 */ getHexId(CharSequence string)72 public static String getHexId(CharSequence string) { 73 return Long.toHexString(getId(string)); 74 } 75 76 /** 77 * Get the hex ID for a string. 78 * 79 * @param string input string. 80 * @return a string with the hex value 81 */ getStringFromHexId(String string)82 public static String getStringFromHexId(String string) { 83 return getStringFromId(Long.parseLong(string, 16)); 84 } 85 86 /** 87 * Returns string previously used to generate the longValue with getId. 88 * 89 * @param longValue 90 * @return String previously used to generate the longValue with getId. 91 */ getStringFromId(long longValue)92 public static String getStringFromId(long longValue) { 93 return ID_TO_STRING.get(longValue); 94 } 95 } 96