xref: /aosp_15_r20/external/icu/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/localedistance/Trie.java (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 package org.unicode.icu.tool.cldrtoicu.localedistance;
4 
5 import static com.google.common.base.Preconditions.checkArgument;
6 import static com.google.common.base.Preconditions.checkState;
7 
8 import java.nio.ByteBuffer;
9 import java.util.function.Consumer;
10 
11 import com.ibm.icu.impl.locale.LocaleDistance;
12 import com.ibm.icu.util.BytesTrieBuilder;
13 
14 /**
15  * Trie constructed by adding "spans" of data representing prefix
16  * sequences with mapped values.
17  *
18  * <p>When a prefix needs to be added to a Trie, a new span is created to
19  * represents the additional data. If a final value is added to a span, then
20  * the current prefix data is committed to the underlying Trie as its key.
21  *
22  * <p>Typical use might look like:
23  * <pre>{@code
24  * Trie trie = new Trie();
25  * mappedData.forEach(
26  *     (prefix, subValues) -> trie.root().with(prefix, subSpan -> process(subSpan, subValues));
27  * byte[] bytes = trie.toByteArray();
28  * }
29  * }</pre>
30  * where the {@code process} method may create more sub-spans, and eventually
31  * calls {@link Span#putPrefixAndValue(int)} to commit the current sequence
32  * of prefixes and the given value to the Trie.
33  *
34  * <p>Since spans share a common buffer for prefix data, it is important
35  * that extended spans are consumed before the parent span is used again.
36  * This is one reason why the API requires a consumer to be passed when a
37  * span is extended.
38  */
39 final class Trie {
40     private final BytesTrieBuilder trieBuilder = new BytesTrieBuilder();
41     private final byte[] spanBytes = new byte[24];
42 
43     /**
44      * Represents a sequence of prefixes to be added to the underlying Trie
45      * when a value is specified.
46      *
47      * <p>The position of a span cannot be modified, but they are not thread
48      * safe (since they share the same underlying buffer).
49      */
50     final class Span {
51         // The index *after* the last prefix was added.
52         private final int index;
53 
54         // The root span.
Span()55         private Span() {
56             this.index = 0;
57         }
58 
59         // An extended span with the given prefix included.
Span(int index, String prefix)60         private Span(int index, String prefix) {
61             checkArgument(index >= 0, "bad index: %s", index);
62             checkState(!prefix.isEmpty(), "invalid subtag: %s", prefix);
63             checkState(index + prefix.length() <= spanBytes.length, "span too long");
64             if (prefix.equals("*")) {
65                 spanBytes[index++] = '*';
66             } else {
67                 checkArgument(!prefix.contains("*"), "prefix must not contain '*': %s", prefix);
68                 for (int i = 0; i < prefix.length(); i++) {
69                     char c = prefix.charAt(i);
70                     checkArgument(c < LocaleDistance.END_OF_SUBTAG, "invalid trie character: %s", c);
71                     spanBytes[index++] = (byte) c;
72                 }
73                 // Mark the final character as a terminator to avoid overlap matches.
74                 spanBytes[index - 1] |= (byte) LocaleDistance.END_OF_SUBTAG;
75             }
76             this.index = index;
77         }
78 
79         /**
80          * Extends the current span by creating a new span with the given ASCII
81          * prefix data, and passing it to the given consumer. The original span is
82          * not modified, but must not be used again until the consumer is finished.
83          *
84          * <p>The prefix string must contain only 7-bit ASCII characters.
85          */
86         public void with(String prefix, Consumer<Span> withFn) {
87             withFn.accept(new Span(index, prefix));
88         }
89 
90         /**
91          * Commits the current prefix data and the given value to the underlying Trie.
92          */
93         public void putPrefixAndValue(int value) {
94             checkArgument(value >= 0, "bad trie value: %s", value);
95             checkState(index > 0, "missing prefix for value: %s", value);
96             trieBuilder.add(spanBytes, index, value);
97         }
98     }
99 
100     /** Returns the root span with no current prefix data. */
root()101     public Span root() {
102         return new Span();
103     }
104 
105     /** Serializes the underlying Trie data to a byte array (see also {@link BytesTrieBuilder}). */
toByteArray()106     public byte[] toByteArray() {
107         ByteBuffer buffer = trieBuilder.buildByteBuffer(BytesTrieBuilder.Option.SMALL);
108         byte[] bytes = new byte[buffer.remaining()];
109         buffer.get(bytes);
110         return bytes;
111     }
112 }
113