1 // © 2017 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 package org.unicode.icu.tool.cldrtoicu.localedistance; 4 5 import static com.google.common.base.Preconditions.checkArgument; 6 import static com.google.common.base.Preconditions.checkState; 7 8 import java.nio.ByteBuffer; 9 import java.util.function.Consumer; 10 11 import com.ibm.icu.impl.locale.LocaleDistance; 12 import com.ibm.icu.util.BytesTrieBuilder; 13 14 /** 15 * Trie constructed by adding "spans" of data representing prefix 16 * sequences with mapped values. 17 * 18 * <p>When a prefix needs to be added to a Trie, a new span is created to 19 * represents the additional data. If a final value is added to a span, then 20 * the current prefix data is committed to the underlying Trie as its key. 21 * 22 * <p>Typical use might look like: 23 * <pre>{@code 24 * Trie trie = new Trie(); 25 * mappedData.forEach( 26 * (prefix, subValues) -> trie.root().with(prefix, subSpan -> process(subSpan, subValues)); 27 * byte[] bytes = trie.toByteArray(); 28 * } 29 * }</pre> 30 * where the {@code process} method may create more sub-spans, and eventually 31 * calls {@link Span#putPrefixAndValue(int)} to commit the current sequence 32 * of prefixes and the given value to the Trie. 33 * 34 * <p>Since spans share a common buffer for prefix data, it is important 35 * that extended spans are consumed before the parent span is used again. 36 * This is one reason why the API requires a consumer to be passed when a 37 * span is extended. 38 */ 39 final class Trie { 40 private final BytesTrieBuilder trieBuilder = new BytesTrieBuilder(); 41 private final byte[] spanBytes = new byte[24]; 42 43 /** 44 * Represents a sequence of prefixes to be added to the underlying Trie 45 * when a value is specified. 46 * 47 * <p>The position of a span cannot be modified, but they are not thread 48 * safe (since they share the same underlying buffer). 49 */ 50 final class Span { 51 // The index *after* the last prefix was added. 52 private final int index; 53 54 // The root span. Span()55 private Span() { 56 this.index = 0; 57 } 58 59 // An extended span with the given prefix included. Span(int index, String prefix)60 private Span(int index, String prefix) { 61 checkArgument(index >= 0, "bad index: %s", index); 62 checkState(!prefix.isEmpty(), "invalid subtag: %s", prefix); 63 checkState(index + prefix.length() <= spanBytes.length, "span too long"); 64 if (prefix.equals("*")) { 65 spanBytes[index++] = '*'; 66 } else { 67 checkArgument(!prefix.contains("*"), "prefix must not contain '*': %s", prefix); 68 for (int i = 0; i < prefix.length(); i++) { 69 char c = prefix.charAt(i); 70 checkArgument(c < LocaleDistance.END_OF_SUBTAG, "invalid trie character: %s", c); 71 spanBytes[index++] = (byte) c; 72 } 73 // Mark the final character as a terminator to avoid overlap matches. 74 spanBytes[index - 1] |= (byte) LocaleDistance.END_OF_SUBTAG; 75 } 76 this.index = index; 77 } 78 79 /** 80 * Extends the current span by creating a new span with the given ASCII 81 * prefix data, and passing it to the given consumer. The original span is 82 * not modified, but must not be used again until the consumer is finished. 83 * 84 * <p>The prefix string must contain only 7-bit ASCII characters. 85 */ 86 public void with(String prefix, Consumer<Span> withFn) { 87 withFn.accept(new Span(index, prefix)); 88 } 89 90 /** 91 * Commits the current prefix data and the given value to the underlying Trie. 92 */ 93 public void putPrefixAndValue(int value) { 94 checkArgument(value >= 0, "bad trie value: %s", value); 95 checkState(index > 0, "missing prefix for value: %s", value); 96 trieBuilder.add(spanBytes, index, value); 97 } 98 } 99 100 /** Returns the root span with no current prefix data. */ root()101 public Span root() { 102 return new Span(); 103 } 104 105 /** Serializes the underlying Trie data to a byte array (see also {@link BytesTrieBuilder}). */ toByteArray()106 public byte[] toByteArray() { 107 ByteBuffer buffer = trieBuilder.buildByteBuffer(BytesTrieBuilder.Option.SMALL); 108 byte[] bytes = new byte[buffer.remaining()]; 109 buffer.get(bytes); 110 return bytes; 111 } 112 } 113