xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/json/CldrNode.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.json;
2 
3 import com.ibm.icu.impl.Utility;
4 import java.text.ParseException;
5 import java.util.HashMap;
6 import java.util.Map;
7 
8 /** CldrNode represent a Element in XML as it appears in a CldrItem's path. */
9 public class CldrNode {
10 
createNode(String parent, String pathSegment, String fullPathSegment)11     public static CldrNode createNode(String parent, String pathSegment, String fullPathSegment)
12             throws ParseException {
13         return createNode(parent, pathSegment, fullPathSegment, null);
14     }
15 
createNode( String parent, String pathSegment, String fullPathSegment, CldrItem item)16     public static CldrNode createNode(
17             String parent, String pathSegment, String fullPathSegment, CldrItem item)
18             throws ParseException {
19         CldrNode node = new CldrNode();
20         node.item = item;
21         node.parent = parent;
22         node.name = extractAttrs(pathSegment, node.distinguishingAttributes);
23         String fullTrunk = extractAttrs(fullPathSegment, node.nondistinguishingAttributes);
24         if (!node.name.equals(fullTrunk)) {
25             throw new ParseException(
26                     "Error in parsing \"" + pathSegment + " \":\"" + fullPathSegment, 0);
27         }
28 
29         for (String key : node.distinguishingAttributes.keySet()) {
30             node.nondistinguishingAttributes.remove(key);
31         }
32 
33         String[] suppressList = LdmlConvertRules.ATTR_SUPPRESS_LIST;
34 
35         // let's check if there is anything that can be suppressed
36         // TODO: should hash the parent and pathSegment values so we don't have to linear
37         // search.
38         for (int i = 0; i < suppressList.length; i += 3) {
39             if (node.name.equals(suppressList[i])) {
40                 String key = suppressList[i + 2];
41                 String value = node.distinguishingAttributes.get(key);
42                 if (value != null && value.equals(suppressList[i + 1])) {
43                     node.distinguishingAttributes.remove(key);
44                 }
45             }
46         }
47         return node;
48     }
49 
50     /**
51      * Extract all the attributes and their value in the path.
52      *
53      * @param pathSegment A complete or partial path.
54      * @param attributes String map to receive attribute mapping.
55      * @return Part of the string before the first attribute.
56      * @throws ParseException
57      */
extractAttrs(String pathSegment, Map<String, String> attributes)58     private static String extractAttrs(String pathSegment, Map<String, String> attributes)
59             throws ParseException {
60         int start = 0;
61 
62         String trunk = new String();
63         while (true) {
64             int ind1 = pathSegment.indexOf("[@", start);
65             if (ind1 < 0) {
66                 if (trunk.isEmpty()) {
67                     trunk = pathSegment;
68                 }
69                 break;
70             }
71             if (trunk.isEmpty()) {
72                 trunk = pathSegment.substring(0, ind1);
73             }
74             ind1 += 2;
75             int ind2 = pathSegment.indexOf("=", ind1);
76             if (ind2 < 0) {
77                 throw new ParseException("Missing '=' in attribute specification.", ind1);
78             }
79             String attr = pathSegment.substring(ind1, ind2);
80 
81             ind1 = ind2 + 1;
82             if (pathSegment.charAt(ind1) == '"') {
83                 ind1 += 1;
84                 ind2 = pathSegment.indexOf("\"]", ind1);
85             } else {
86                 ind2 = pathSegment.indexOf("]", ind1);
87             }
88 
89             if (ind2 < 0) {
90                 throw new ParseException("Unexpected end in attribute specification.", ind1);
91             }
92 
93             String value = pathSegment.substring(ind1, ind2);
94 
95             start = ind2;
96 
97             attributes.put(attr, value);
98         }
99 
100         return trunk;
101     }
102 
103     /** distinguishing attributes as identified by CLDR tools. */
104     private Map<String, String> distinguishingAttributes;
105 
106     /** non-distinguishing attributes as identified by CLDR tools. */
107     private Map<String, String> nondistinguishingAttributes;
108 
109     /** name of the element. */
110     private String name;
111 
112     /** parent element for this element. */
113     private String parent;
114 
115     /** CldrItem, if any */
116     private CldrItem item;
117 
getUntransformedPath()118     public String getUntransformedPath() {
119         if (item != null) {
120             return item.getUntransformedPath();
121         } else {
122             return "noitem";
123         }
124     }
125 
126     /**
127      * This name is derived from element name and attributes. Once it is calculated, it is cached in
128      * this variable.
129      */
130     private String uniqueNodeName;
131 
CldrNode()132     private CldrNode() {
133         distinguishingAttributes = new HashMap<>();
134         nondistinguishingAttributes = new HashMap<>();
135     }
136 
137     /**
138      * Get the string map for attributes that should be treated as values.
139      *
140      * @return String map.
141      */
getAttrAsValueMap()142     public Map<String, String> getAttrAsValueMap() {
143         Map<String, String> attributesAsValues = new HashMap<>();
144         for (String key : distinguishingAttributes.keySet()) {
145             String keyStr = LdmlConvertRules.getKeyStr(getParent(), name, key);
146             String keyStrMidStar = LdmlConvertRules.getKeyStr(getParent(), "*", key);
147             String keyStr2 = LdmlConvertRules.getKeyStr(name, key);
148             if (LdmlConvertRules.ATTR_AS_VALUE_SET.contains(keyStr)
149                     || LdmlConvertRules.ATTR_AS_VALUE_SET.contains(keyStr2)) {
150                 if (LdmlConvertRules.COMPACTABLE_ATTR_AS_VALUE_SET.contains(keyStr)
151                         || LdmlConvertRules.COMPACTABLE_ATTR_AS_VALUE_SET.contains(keyStrMidStar)) {
152                     attributesAsValues.put(
153                             LdmlConvertRules.ANONYMOUS_KEY, distinguishingAttributes.get(key));
154                 } else {
155                     attributesAsValues.put(key, distinguishingAttributes.get(key));
156                 }
157             }
158         }
159 
160         for (String key : nondistinguishingAttributes.keySet()) {
161             if (LdmlConvertRules.IGNORABLE_NONDISTINGUISHING_ATTR_SET.contains(key)) {
162                 continue;
163             }
164             String keyStr = LdmlConvertRules.getKeyStr(getParent(), name, key);
165             String keyStrMidStar = LdmlConvertRules.getKeyStr(getParent(), "*", key);
166             if (LdmlConvertRules.COMPACTABLE_ATTR_AS_VALUE_SET.contains(keyStr)
167                     || LdmlConvertRules.COMPACTABLE_ATTR_AS_VALUE_SET.contains(keyStrMidStar)) {
168                 attributesAsValues.put(
169                         LdmlConvertRules.ANONYMOUS_KEY, nondistinguishingAttributes.get(key));
170             } else {
171                 attributesAsValues.put(key, nondistinguishingAttributes.get(key));
172             }
173         }
174 
175         // ADJUST RADIX BASED ON ICU RULE
176         final String radixValue = attributesAsValues.get("radix");
177         if (radixValue != null) {
178             attributesAsValues.remove("radix");
179             for (Map.Entry<String, String> attributes : attributesAsValues.entrySet()) {
180                 String oldKey = attributes.getKey();
181                 String newValue = attributes.getValue();
182                 String newKey = oldKey + "/" + radixValue;
183                 attributesAsValues.remove(oldKey);
184                 attributesAsValues.put(newKey, newValue);
185             }
186         }
187         return attributesAsValues;
188     }
189 
setDistinguishingAttributes(Map<String, String> distinguishingAttributes)190     public void setDistinguishingAttributes(Map<String, String> distinguishingAttributes) {
191         this.distinguishingAttributes = distinguishingAttributes;
192     }
193 
setNondistinguishingAttributes(Map<String, String> nondistinguishingAttributes)194     public void setNondistinguishingAttributes(Map<String, String> nondistinguishingAttributes) {
195         this.nondistinguishingAttributes = nondistinguishingAttributes;
196     }
197 
getDistinguishingAttributes()198     public Map<String, String> getDistinguishingAttributes() {
199         return distinguishingAttributes;
200     }
201 
getName()202     public String getName() {
203         return name;
204     }
205 
getNondistinguishingAttributes()206     public Map<String, String> getNondistinguishingAttributes() {
207         return nondistinguishingAttributes;
208     }
209 
210     /**
211      * Construct a name that can be used as key in its container (by incorporating distinguishing
212      * attributes).
213      *
214      * <p>Each segment in CLDR path corresponding to a XML element. Element name itself can not be
215      * used as JSON key because it might not be unique in its container. A set of rules is used here
216      * to construct this key name. Some of the attributes will be used in constructing the key name,
217      * the remaining attributes are returned and should be used to fill the mapping.
218      *
219      * <p>The basic mapping is from <element_name>[@<attr_name>=<attr_value>]+ to
220      * <element_name>-<attr_name>-<attr_value>
221      *
222      * @return A unique name that can be used as key in its container.
223      */
getNodeKeyName()224     public String getNodeKeyName() {
225         if (uniqueNodeName != null) {
226             return uniqueNodeName;
227         }
228 
229         // decide the main name
230         StringBuffer strbuf = new StringBuffer();
231         String lastKey = null; // for err message
232         for (String key : distinguishingAttributes.keySet()) {
233             String attrIdStr = LdmlConvertRules.getKeyStr(getParent(), name, key);
234             String attrIdStr2 = LdmlConvertRules.getKeyStr(name, key);
235             if (LdmlConvertRules.IsSuppresedAttr(attrIdStr)) {
236                 continue;
237             }
238             if (LdmlConvertRules.ATTR_AS_VALUE_SET.contains(attrIdStr)
239                     || LdmlConvertRules.ATTR_AS_VALUE_SET.contains(attrIdStr2)) { // with *
240                 continue;
241             }
242 
243             if (!key.equals("alt")
244                     && !key.equals("count")
245                     && !LdmlConvertRules.NAME_PART_DISTINGUISHING_ATTR_SET.contains(attrIdStr)) {
246                 if (strbuf.length() != 0) {
247                     throw new IllegalArgumentException(
248                             "Can not have more than 1 key values in name: "
249                                     + "both '"
250                                     + strbuf
251                                     + "' ("
252                                     + lastKey
253                                     + ") and '"
254                                     + distinguishingAttributes.get(key)
255                                     + "' ("
256                                     + key
257                                     + "). attrIdStr="
258                                     + attrIdStr
259                                     + " - check LdmlConvertRules.java#NAME_PART_DISTINGUISHING_ATTR_SET");
260                 }
261                 strbuf.append(distinguishingAttributes.get(key));
262                 lastKey = key;
263             }
264         }
265         if (strbuf.length() == 0) {
266             strbuf.append(name);
267         }
268 
269         // append distinguishing attributes
270         for (String key : distinguishingAttributes.keySet()) {
271             String attrIdStr = LdmlConvertRules.getKeyStr(getParent(), name, key);
272             String attrIdStr2 = LdmlConvertRules.getKeyStr(name, key);
273             if (LdmlConvertRules.IsSuppresedAttr(attrIdStr)) {
274                 continue;
275             }
276             if (LdmlConvertRules.ATTR_AS_VALUE_SET.contains(attrIdStr)
277                     || LdmlConvertRules.ATTR_AS_VALUE_SET.contains(attrIdStr2)) {
278                 continue;
279             }
280 
281             if (!key.equals("alt")
282                     && !LdmlConvertRules.NAME_PART_DISTINGUISHING_ATTR_SET.contains(attrIdStr)) {
283                 continue;
284             }
285             strbuf.append("-");
286             strbuf.append(key);
287             strbuf.append("-");
288             strbuf.append(distinguishingAttributes.get(key));
289         }
290         uniqueNodeName = strbuf.toString();
291 
292         if (uniqueNodeName.length() == 1 && name.equals("character")) {
293             // character attribute has value that can be any unicode character. Those
294             // might not be url safe and can be difficult for user to specify. It is
295             // converted to hex string here.
296             uniqueNodeName = "U+" + Utility.hex(uniqueNodeName.charAt(0), 4);
297         } else if (isTimezoneType()) {
298             // time zone name has GMT+9 type of thing. "+" need to be removed to make
299             // it URL safe.
300             uniqueNodeName = uniqueNodeName.replaceFirst("\\+", "");
301         }
302 
303         return uniqueNodeName;
304     }
305 
306     /**
307      * Construct a name that has all distinguishing attributes that should not be ignored.
308      *
309      * <p>Different from getNodeKeyName, this name has include those distinguishing attributes that
310      * will be treated as values.
311      *
312      * @return A distinguishing name for differentiating element.
313      */
getNodeDistinguishingName()314     public String getNodeDistinguishingName() {
315         // decide the main name
316         StringBuffer strbuf = new StringBuffer();
317         strbuf.append(name);
318 
319         // append distinguishing attributes
320         for (String key : distinguishingAttributes.keySet()) {
321             strbuf.append("-");
322             strbuf.append(key);
323             strbuf.append("-");
324             strbuf.append(distinguishingAttributes.get(key));
325         }
326         return strbuf.toString();
327     }
328 
isTimezoneType()329     public boolean isTimezoneType() {
330         return LdmlConvertRules.TIMEZONE_ELEMENT_NAME_SET.contains(name);
331     }
332 
333     @Override
toString()334     public String toString() {
335         return "[CldrNode " + getParent() + "/" + getNodeDistinguishingName() + "]";
336     }
337 
getParent()338     public String getParent() {
339         return parent;
340     }
341 }
342