xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/util/StringId.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.util;
2 
3 import java.nio.charset.StandardCharsets;
4 import java.security.MessageDigest;
5 import java.util.Map;
6 import java.util.concurrent.ConcurrentHashMap;
7 
8 /**
9  * Produce an ID for a string based on a long hash. When used properly, the odds of collision are so
10  * low that the ID can be used as a proxy for the original string. The ID is non-negative. The
11  * algorithm uses SHA-1 over the UTF-8 bytes in the string. Also provides lookup for long previously
12  * generated for string.
13  *
14  * @author markdavis
15  */
16 public final class StringId {
17     private static final Map<String, Long> STRING_TO_ID = new ConcurrentHashMap<>();
18     private static final Map<Long, String> ID_TO_STRING = new ConcurrentHashMap<>();
19     private static final MessageDigest digest;
20     private static final int RETRY_LIMIT = 9;
21 
22     static {
23         try {
24             digest = MessageDigest.getInstance("SHA-1");
25         } catch (Exception e) {
26             throw new IllegalArgumentException(e); // darn'd checked exceptions
27         }
28     }
29 
30     /**
31      * Get the ID for a string.
32      *
33      * @param string input string.
34      * @return a value from 0 to 0x7FFFFFFFFFFFFFFFL.
35      */
getId(CharSequence charSequence)36     public static long getId(CharSequence charSequence) {
37         String string = charSequence.toString();
38         Long resultLong = STRING_TO_ID.get(string);
39         if (resultLong != null) {
40             return resultLong;
41         }
42         int retryCount = RETRY_LIMIT;
43         while (true) {
44             try {
45                 synchronized (digest) {
46                     byte[] hash = digest.digest(string.getBytes(StandardCharsets.UTF_8));
47                     long result = 0;
48                     for (int i = 0; i < 8; ++i) {
49                         result <<= 8;
50                         result ^= hash[i];
51                     }
52                     // mash the top bit to make things easier
53                     result &= 0x7FFFFFFFFFFFFFFFL;
54                     STRING_TO_ID.put(string, result);
55                     ID_TO_STRING.put(result, string);
56                     return result;
57                 }
58             } catch (RuntimeException e) {
59                 if (--retryCount < 0) {
60                     throw e;
61                 }
62             }
63         }
64     }
65 
66     /**
67      * Get the hex ID for a string.
68      *
69      * @param string input string.
70      * @return a string with the hex value
71      */
getHexId(CharSequence string)72     public static String getHexId(CharSequence string) {
73         return Long.toHexString(getId(string));
74     }
75 
76     /**
77      * Get the hex ID for a string.
78      *
79      * @param string input string.
80      * @return a string with the hex value
81      */
getStringFromHexId(String string)82     public static String getStringFromHexId(String string) {
83         return getStringFromId(Long.parseLong(string, 16));
84     }
85 
86     /**
87      * Returns string previously used to generate the longValue with getId.
88      *
89      * @param longValue
90      * @return String previously used to generate the longValue with getId.
91      */
getStringFromId(long longValue)92     public static String getStringFromId(long longValue) {
93         return ID_TO_STRING.get(longValue);
94     }
95 }
96