xref: /aosp_15_r20/external/icu/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbPath.java (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 package org.unicode.icu.tool.cldrtoicu;
4 
5 import static com.google.common.base.CharMatcher.whitespace;
6 import static com.google.common.base.Preconditions.checkArgument;
7 import static com.google.common.base.Preconditions.checkState;
8 import static com.google.common.collect.ImmutableList.toImmutableList;
9 
10 import java.util.Arrays;
11 import java.util.Comparator;
12 import java.util.Objects;
13 import java.util.function.Function;
14 
15 import com.google.common.base.CharMatcher;
16 import com.google.common.base.Splitter;
17 import com.google.common.collect.Comparators;
18 import com.google.common.collect.ImmutableList;
19 import com.google.common.collect.Iterables;
20 
21 /**
22  * A resource bundle path, used to identify entries in ICU data.
23  *
24  * <p>Immutable and thread safe.
25  */
26 public final class RbPath implements Comparable<RbPath> {
27     private static final Splitter PATH_SPLITTER = Splitter.on('/').trimResults();
28 
29     // This defines ordering of paths in IcuData instances and thus the order in ICU data files.
30     // If there's ever a reason to have a different "natural" order for paths, this Comparator
31     // should be moved into the ICU file writer class(es).
32     private static final Comparator<RbPath> ORDERING =
33         Comparator.comparing(
34             p -> p.segments,
35             Comparators.lexicographical(Comparator.<String>naturalOrder()));
36 
37     // Matches the definition of invariant characters in "uinvchar.cpp". We can make this all much
38     // faster if needed with a custom matcher (it's just a 128 way bit lookup via 2 longs).
39     private static final CharMatcher INVARIANT_CHARS =
40         CharMatcher.ascii().and(CharMatcher.anyOf("!#$@[\\]^`{|}~").negate());
41 
42     // Note that we must also prohibit double-quote from appearing anywhere other than surrounding
43     // segment values. This is because some segment values can contain special ICU data characters
44     // (e.g. ':') but must be treated as literals. There is not proper "escaping" mechanism in ICU
45     // data for key values (since '\' is not an invariant, things like \\uxxxx are not possible).
46     //
47     // Ideally quoting would be done when the file is written, but that would require additional
48     // complexity in RbPath, since suffixes like ":intvector" must not be quoted and must somehow
49     // be distinguished from timezone "metazone" names which also contain ':'.
50     private static final CharMatcher QUOTED_SEGMENT_CHARS =
51         INVARIANT_CHARS
52             .and(CharMatcher.javaIsoControl().negate())
53             .and(CharMatcher.isNot('"'));
54     private static final CharMatcher UNQUOTED_SEGMENT_CHARS =
55         QUOTED_SEGMENT_CHARS.and(whitespace().negate());
56 
57     /**
58      * Returns a path with the specified segments in (possibly empty). Note that unlike
59      * {@link #parse(String)}, {@code '/'} is not treated specially and can be present in a path
60      * element constructed by this method.
61      */
of(String... segments)62     public static RbPath of(String... segments) {
63         return of(Arrays.asList(segments));
64     }
65 
66     /**
67      * Returns a path with the specified segments in (possibly empty). Note that unlike
68      * {@link #parse(String)}, {@code '/'} is not treated specially and can be present in a path
69      * element constructed by this method.
70      */
of(Iterable<String> segments)71     public static RbPath of(Iterable<String> segments) {
72         return new RbPath(segments);
73     }
74 
75     /** Parses the given path string, assuming {@code '/'} as a path separator. */
parse(String path)76     public static RbPath parse(String path) {
77         checkArgument(!path.isEmpty(), "cannot parse an empty path string");
78         // Allow leading '/', but don't allow empty segments anywhere else.
79         if (path.startsWith("/")) {
80             path = path.substring(1);
81         }
82         return new RbPath(PATH_SPLITTER.split(path));
83     }
84 
85     /** Returns the common prefix length of two paths (useful when thinking of path hierarchies). */
getCommonPrefixLength(RbPath lhs, RbPath rhs)86     public static int getCommonPrefixLength(RbPath lhs, RbPath rhs) {
87         int maxLength = Math.min(lhs.length(), rhs.length());
88         int n = 0;
89         while (n < maxLength && lhs.getSegment(n).equals(rhs.getSegment(n))) {
90             n++;
91         }
92         return n;
93     }
94 
95     private final ImmutableList<String> segments;
96     private final int hashCode;
97 
RbPath(Iterable<String> segments)98     private RbPath(Iterable<String> segments) {
99         this.segments = ImmutableList.copyOf(segments);
100         // Use "this.segments" since the incoming list can have a different hash!
101         this.hashCode = Objects.hash(this.segments);
102         for (String segment : this.segments) {
103             checkArgument(!segment.isEmpty(), "path segments must not be empty: %s", this.segments);
104             // Either the label is quoted (e.g. "foo") or it is bar (e.g. foo) but it can only
105             // contain double quotes at either end, or not at all. If the string is quoted, only
106             // validate the content, and not the quotes themselves.
107             switch (segment.charAt(0)) {
108             case '<':
109                 // Allow anything in hidden labels, since they will be removed later and never
110                 // appear in the final ICU data.
111                 checkArgument(segment.endsWith(">"),
112                     "mismatched quoting for hidden label: %s", segment);
113                 continue;
114 
115             case '"':
116                 checkArgument(segment.endsWith("\""),
117                     "mismatched quoting for segment: %s", segment);
118                 checkArgument(
119                     QUOTED_SEGMENT_CHARS.matchesAllOf(segment.substring(1, segment.length() - 1)),
120                     "invalid character in unquoted resource bundle path segment: %s", segment);
121                 break;
122 
123             default:
124                 checkArgument(
125                     UNQUOTED_SEGMENT_CHARS.matchesAllOf(segment),
126                     "invalid character in unquoted resource bundle path segment: %s", segment);
127                 break;
128             }
129         }
130     }
131 
132     /** Returns the number of segments in this path. */
length()133     public int length() {
134         return segments.size();
135     }
136 
137     /** Returns the Nth segments in this path. */
getSegment(int n)138     public String getSegment(int n) {
139         return segments.get(n);
140     }
141 
142     /** Returns a new path extended at the end by the specified segments. */
extendBy(String... parts)143     public RbPath extendBy(String... parts) {
144         return new RbPath(Iterables.concat(segments, Arrays.asList(parts)));
145     }
146 
147     /** Returns whether this path starts with the specified prefix. */
startsWith(RbPath prefix)148     public boolean startsWith(RbPath prefix) {
149         return prefix.length() <= length() && matchesSublist(prefix, 0);
150     }
151 
152     /** Returns whether this path ends with the specified suffix. */
endsWith(RbPath suffix)153     public boolean endsWith(RbPath suffix) {
154         return suffix.length() <= length() && matchesSublist(suffix, length() - suffix.length());
155     }
156 
157     /** Returns whether this path contains the specified path. */
contains(RbPath path)158     public boolean contains(RbPath path) {
159         int maxOffset = length() - path.length();
160         for (int i = 0; i <= maxOffset; i++) {
161             if (matchesSublist(path, i)) {
162                 return true;
163             }
164         }
165         return false;
166     }
167 
168     // Assume length check has been done.
matchesSublist(RbPath path, int offset)169     private boolean matchesSublist(RbPath path, int offset) {
170         for (int i = 0; i < path.length(); i++) {
171             if (!path.getSegment(i).equals(getSegment(i + offset))) {
172                 return false;
173             }
174         }
175         return true;
176     }
177 
178     // TODO: Remove this and isAnonymous() since they are only called once each, in the same place.
getParent()179     public RbPath getParent() {
180         checkState(length() > 0, "cannot get parent of the empty path");
181         return new RbPath(segments.subList(0, length() - 1));
182     }
183 
isAnonymous()184     public boolean isAnonymous() {
185         return length() > 0 && segments.get(length() - 1).charAt(0) == '<';
186     }
187 
188     // TODO: Remove this special case code (called exactly once).
mapSegments(Function<? super String, String> fn)189     public RbPath mapSegments(Function<? super String, String> fn) {
190         return new RbPath(segments.stream().map(fn).collect(toImmutableList()));
191     }
192 
193     // TODO: Remove this and isAlias() in favour of having properly typed paths.
isIntPath()194     public boolean isIntPath() {
195         return typeSuffixIsAnyOf(":int", ":intvector");
196     }
197 
isBinPath()198     public boolean isBinPath() {
199         return typeSuffixIsAnyOf(":bin");
200     }
201 
isAlias()202     public boolean isAlias() {
203         return typeSuffixIsAnyOf(":alias");
204     }
205 
typeSuffixIsAnyOf(String... types)206     private boolean typeSuffixIsAnyOf(String... types) {
207         String lastElement = getSegment(length() - 1);
208         for (String type : types) {
209             if (lastElement.endsWith(type)) {
210                 return true;
211             }
212         }
213         return false;
214     }
215 
compareTo(RbPath other)216     @Override public int compareTo(RbPath other) {
217         return ORDERING.compare(this, other);
218     }
219 
equals(Object other)220     @Override public boolean equals(Object other) {
221         return (other instanceof RbPath) && segments.equals(((RbPath) other).segments);
222     }
223 
hashCode()224     @Override public int hashCode() {
225         return hashCode;
226     }
227 
toString()228     @Override public String toString() {
229         return String.join("/", segments);
230     }
231 }
232