xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/json/CldrItem.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.json;
2 
3 import java.text.ParseException;
4 import java.util.ArrayList;
5 import java.util.Set;
6 import java.util.TreeSet;
7 import org.unicode.cldr.json.LdmlConvertRules.SplittableAttributeSpec;
8 import org.unicode.cldr.util.DtdData;
9 import org.unicode.cldr.util.DtdType;
10 import org.unicode.cldr.util.XPathParts;
11 import org.unicode.cldr.util.ZoneParser;
12 
13 /** A object to present a CLDR XML item. */
14 public class CldrItem implements Comparable<CldrItem> {
15 
16     private static boolean DEBUG = false;
17 
18     /**
19      * Split the path to an array of string, each string represent a segment.
20      *
21      * @param path The path of XML element.
22      * @return array of segments.
23      */
splitPathToSegments(String path)24     private static String[] splitPathToSegments(String path) {
25         // remove leading //
26         if (path.startsWith("//")) {
27             path = path.substring(2);
28         }
29 
30         int start = 0;
31         ArrayList<String> segments = new ArrayList<>();
32         boolean inBracket = false;
33         boolean inBracketQuote = false;
34         for (int pos = start; pos < path.length(); ++pos) {
35             char ch = path.charAt(pos);
36             if (inBracketQuote) {
37                 if (ch == '"') {
38                     inBracketQuote = false;
39                 }
40             } else if (inBracket) {
41                 if (ch == ']') {
42                     inBracket = false;
43                 } else if (ch == '"') {
44                     inBracketQuote = true;
45                 }
46             } else {
47                 if (ch == '[') {
48                     inBracket = true;
49                 } else if (ch == '/') {
50                     segments.add(path.substring(start, pos));
51                     start = pos + 1;
52                 }
53             }
54         }
55         segments.add(path.substring(start, path.length()));
56 
57         return segments.toArray(new String[segments.size()]);
58     }
59 
60     /**
61      * The full path of a CLDR item.
62      *
63      * <p>Comparing to path, this full contains non-distinguishable attributes.
64      */
65     private String fullPath;
66 
67     /**
68      * The resolution path of a CLDR item.
69      *
70      * <p>This path only contains distinguishable attributes that are necessary to identify a CLDR
71      * XML item in the CLDR tree.
72      */
73     private String path;
74 
75     /**
76      * The full path of a CLDR item.
77      *
78      * <p>Comparing to path, this full contains non-distinguishable attributes.
79      */
80     private String untransformedFullPath;
81 
82     /**
83      * The resolution path of a CLDR item.
84      *
85      * <p>This path only contains distinguishable attributes that are necessary to identify a CLDR
86      * XML item in the CLDR tree.
87      */
88     private String untransformedPath;
89 
getUntransformedPath()90     protected String getUntransformedPath() {
91         return untransformedPath;
92     }
93 
94     @Override
toString()95     public String toString() {
96         return "[CldrItem " + getUntransformedPath() + "]";
97     }
98 
99     /** The value of this CLDR item. */
100     private String value;
101 
CldrItem( final String path, String fullPath, String untransformedPath, String untransformedFullPath, String value)102     CldrItem(
103             final String path,
104             String fullPath,
105             String untransformedPath,
106             String untransformedFullPath,
107             String value) {
108 
109         if (DEBUG) {
110             System.out.println("---");
111             System.out.println("    PATH => " + path);
112             System.out.println("FULLPATH => " + fullPath);
113             System.out.println("   VALUE => " + value);
114             System.out.println("---");
115         }
116 
117         if (path.isEmpty()) {
118             // Should not happen
119             throw new IllegalArgumentException(
120                     "empty path with "
121                             + fullPath
122                             + "|"
123                             + untransformedPath
124                             + "|"
125                             + untransformedFullPath
126                             + " = "
127                             + value);
128         }
129 
130         this.path = path;
131         this.fullPath = fullPath;
132         this.untransformedPath = untransformedPath;
133         this.untransformedFullPath = untransformedFullPath;
134 
135         if (value == null) {
136             this.value = "";
137         } else {
138             this.value = value;
139         }
140     }
141 
getFullPath()142     public String getFullPath() {
143         return fullPath;
144     }
145 
getPath()146     public String getPath() {
147         return path;
148     }
149 
150     /**
151      * Obtain the sortKey string, construct it if not yet.
152      *
153      * @return sort key string.
154      */
getValue()155     public String getValue() {
156         return value;
157     }
158 
159     // Zone and time zone element has '/' in attribute value, like
160     // .../zone[@type="America/Adak"]/...
161     // Such element can not be converted to "zone-type-America/Adak" as it is
162     // not url safe. To deal with such issue, two segment are generated. It is
163     // like the original path is written as:
164     // .../zone/America/Adak/...
165 
setValue(String value)166     public void setValue(String value) {
167         this.value = value;
168     }
169 
setFullPath(String fullPath)170     public void setFullPath(String fullPath) {
171         this.fullPath = fullPath;
172     }
173 
174     /**
175      * This function create a node list from a CLDR path.
176      *
177      * <p>Mostly, the node has one-to-one correspondence with path segment. But there are special
178      * cases where one segment can be split to multiple nodes. If necessary, several segments can
179      * also be combined to one node.
180      *
181      * @return A list of node in strict parent-to-child order.
182      * @throws ParseException
183      */
getNodesInPath()184     public ArrayList<CldrNode> getNodesInPath() throws ParseException {
185         String[] pathSegments = splitPathToSegments(path);
186         String[] fullPathSegments = splitPathToSegments(fullPath);
187         assert (pathSegments.length == fullPathSegments.length);
188         ArrayList<CldrNode> nodesInPath = new ArrayList<>();
189 
190         String parent = "";
191         for (int i = 0; i < pathSegments.length; i++) {
192             CldrNode node = CldrNode.createNode(parent, pathSegments[i], fullPathSegments[i], this);
193 
194             // Zone and time zone element has '/' in attribute value, like
195             // .../zone[@type="America/Adak"]/...
196             // Such element can not be converted to "zone-type-America/Adak" as it is
197             // not url safe. To deal with such issue, two segment are generated. It is
198             // like the original path is written as:
199             // .../zone/America/Adak/...
200             String nodeName = node.getName();
201             if (node.isTimezoneType()) {
202                 nodesInPath.add(CldrNode.createNode(parent, node.getName(), node.getName(), this));
203                 String typeValue = node.getDistinguishingAttributes().get("type");
204                 typeValue = typeValue.replaceAll("Asia:Taipei", "Asia/Taipei");
205                 String[] segments = typeValue.split("/");
206                 for (int j = 0; j < segments.length; j++) {
207                     CldrNode newNode =
208                             CldrNode.createNode(parent, node.getName(), node.getName(), this);
209                     if (j == segments.length - 1) {
210                         newNode.getDistinguishingAttributes()
211                                 .putAll(node.getDistinguishingAttributes());
212                         newNode.getDistinguishingAttributes().remove("type");
213                     }
214                     newNode.getDistinguishingAttributes().put("type", segments[j]);
215                     nodesInPath.add(newNode);
216                 }
217             } else {
218                 nodesInPath.add(node);
219             }
220             parent = nodeName;
221         }
222         return nodesInPath;
223     }
224 
setPath(String path)225     public void setPath(String path) {
226         if (path.isEmpty()) {
227             throw new IllegalArgumentException("empty path");
228         }
229         this.path = path;
230     }
231 
232     /**
233      * Some CLDR items have attributes that should be split before transformation. For examples,
234      * item like: <calendarPreference territories="CN CX" ordering="gregorian chinese"/> should
235      * really be treated as 2 separate items: <calendarPreference territories="CN"
236      * ordering="gregorian chinese"/> <calendarPreference territories="CX" ordering="gregorian
237      * chinese"/>
238      *
239      * @return Array of CldrItem if it can be split, otherwise null if nothing to split.
240      */
split()241     public CldrItem[] split() {
242         XPathParts xpp = XPathParts.getFrozenInstance(path);
243         XPathParts fullxpp = XPathParts.getFrozenInstance(fullPath);
244         XPathParts untransformedxpp = XPathParts.getFrozenInstance(untransformedPath);
245         XPathParts untransformedfullxpp = XPathParts.getFrozenInstance(untransformedFullPath);
246 
247         for (SplittableAttributeSpec s : LdmlConvertRules.getSplittableAttrs()) {
248             if (fullxpp.containsElement(s.element) && fullxpp.containsAttribute(s.attribute)) {
249                 ArrayList<CldrItem> list = new ArrayList<>();
250                 String wordString = fullxpp.findAttributeValue(s.element, s.attribute);
251                 String[] words = wordString.trim().split("\\s+");
252                 Set<String> hadWords = new TreeSet<>();
253                 for (String word : words) {
254                     if (hadWords.add(word) == false) {
255                         System.err.println(
256                                 "Warning: Duplicate attribute " + word + " in " + fullPath);
257                         continue;
258                     }
259                     // TODO: Ideally, there would be a separate post-split path transform.
260 
261                     XPathParts newxpp = xpp.cloneAsThawed();
262                     XPathParts newfullxpp = fullxpp.cloneAsThawed();
263                     XPathParts untransformednewxpp = untransformedxpp.cloneAsThawed();
264                     XPathParts untransformednewfullxpp = untransformedfullxpp.cloneAsThawed();
265 
266                     newxpp.setAttribute(s.element, s.attribute, word);
267                     newfullxpp.setAttribute(s.element, s.attribute, word);
268                     untransformednewxpp.setAttribute(s.element, s.attribute, word);
269                     untransformednewfullxpp.setAttribute(s.element, s.attribute, word);
270 
271                     if (s.attrAsValueAfterSplit != null) {
272                         String newValue =
273                                 fullxpp.findAttributeValue(s.element, s.attrAsValueAfterSplit);
274                         newxpp.removeAttribute(s.element, s.attrAsValueAfterSplit);
275                         newxpp.removeAttribute(s.element, s.attribute);
276                         newxpp.addElement(word);
277                         newfullxpp.removeAttribute(s.element, s.attrAsValueAfterSplit);
278                         newfullxpp.removeAttribute(s.element, s.attribute);
279                         newfullxpp.addElement(word);
280                         list.add(
281                                 new CldrItem(
282                                         newxpp.toString(),
283                                         newfullxpp.toString(),
284                                         untransformednewxpp.toString(),
285                                         untransformednewfullxpp.toString(),
286                                         newValue));
287                     } else {
288                         list.add(
289                                 new CldrItem(
290                                         newxpp.toString(),
291                                         newfullxpp.toString(),
292                                         untransformednewxpp.toString(),
293                                         untransformednewfullxpp.toString(),
294                                         value));
295                     }
296                 }
297                 return list.toArray(new CldrItem[list.size()]);
298             }
299         }
300         return null; // nothing to split
301     }
302 
303     /**
304      * Check if the element path contains any item that need to be sorted first.
305      *
306      * @return True if the element need to be sorted before further process.
307      */
needsSort()308     public boolean needsSort() {
309         for (String item : LdmlConvertRules.ELEMENT_NEED_SORT) {
310             XPathParts xpp = XPathParts.getFrozenInstance(path);
311             if (xpp.containsElement(item)) {
312                 return true;
313             }
314         }
315         return false;
316     }
317 
isAliasItem()318     public boolean isAliasItem() {
319         return path.endsWith("/alias");
320     }
321 
322     @Override
compareTo(CldrItem otherItem)323     public int compareTo(CldrItem otherItem) {
324         XPathParts thisxpp = XPathParts.getFrozenInstance(untransformedPath);
325         XPathParts otherxpp = XPathParts.getFrozenInstance(otherItem.untransformedFullPath);
326         if (thisxpp.containsElement("zone") && otherxpp.containsElement("zone")) {
327             String[] thisZonePieces = thisxpp.findAttributeValue("zone", "type").split("/");
328             String[] otherZonePieces = otherxpp.findAttributeValue("zone", "type").split("/");
329             int result = ZoneParser.regionalCompare.compare(thisZonePieces[0], otherZonePieces[0]);
330             if (result != 0) {
331                 return result;
332             }
333             result = thisZonePieces[1].compareTo(otherZonePieces[1]);
334             if (result != 0) {
335                 return result;
336             }
337         }
338 
339         DtdType fileDtdType;
340         if (thisxpp.getElement(0).equals("supplementalData")) {
341             fileDtdType = DtdType.supplementalData;
342         } else {
343             fileDtdType = DtdType.ldml;
344         }
345         int result = 0;
346         if (thisxpp.getElement(1).equals("weekData")
347                 && thisxpp.getElement(2).equals(otherxpp.getElement(2))) {
348             String thisTerritory = thisxpp.findFirstAttributeValue("territories");
349             String otherTerritory = otherxpp.findFirstAttributeValue("territories");
350             if (thisTerritory != null && otherTerritory != null) {
351                 result = thisTerritory.compareTo(otherTerritory);
352             }
353             if (result != 0) {
354                 return result;
355             }
356         }
357         if (thisxpp.getElement(1).equals("measurementData")
358                 && thisxpp.getElement(2).equals(otherxpp.getElement(2))) {
359             String thisCategory = thisxpp.findAttributeValue("measurementSystem", "category");
360             if (thisCategory == null) {
361                 thisCategory = "";
362             }
363             String otherCategory = otherxpp.findAttributeValue("measurementSystem", "category");
364             if (otherCategory == null) {
365                 otherCategory = "";
366             }
367             if (!thisCategory.equals(otherCategory)) {
368                 result = thisCategory.compareTo(otherCategory);
369                 return result;
370             }
371             String thisTerritory = thisxpp.findFirstAttributeValue("territories");
372             String otherTerritory = otherxpp.findFirstAttributeValue("territories");
373             if (thisTerritory != null && otherTerritory != null) {
374                 result = thisTerritory.compareTo(otherTerritory);
375             }
376             if (result != 0) {
377                 return result;
378             }
379         }
380         result =
381                 DtdData.getInstance(fileDtdType)
382                         .getDtdComparator(null)
383                         .compare(untransformedPath, otherItem.untransformedPath);
384         return result;
385     }
386 
adjustRbnfPath()387     void adjustRbnfPath() {
388         XPathParts xpp = XPathParts.getFrozenInstance(getFullPath());
389         final String sub = xpp.findAttributeValue("rbnfrule", "value");
390         if (sub != null) {
391             xpp = xpp.cloneAsThawed();
392             final String value = getValue();
393             xpp.removeAttribute(-1, "value");
394             xpp.addAttribute(sub, value);
395             setFullPath(xpp.toString());
396             setValue("");
397         }
398         // ADJUST ACCESS=PRIVATE/PUBLIC BASED ON ICU RULE
399         String fullpath = getFullPath();
400         if (fullpath.contains("/ruleset")) {
401             int ruleStartIndex = fullpath.indexOf("/ruleset[");
402             String checkString = fullpath.substring(ruleStartIndex);
403 
404             int ruleEndIndex = 0;
405             if (checkString.contains("/")) {
406                 ruleEndIndex = fullpath.indexOf("/", ruleStartIndex + 1);
407             }
408             if (ruleEndIndex > ruleStartIndex) {
409                 String oldRulePath = fullpath.substring(ruleStartIndex, ruleEndIndex);
410 
411                 String newRulePath = oldRulePath;
412                 if (newRulePath.contains("@type")) {
413                     int typeIndexStart = newRulePath.indexOf("\"", newRulePath.indexOf("@type"));
414                     int typeIndexEnd = newRulePath.indexOf("\"", typeIndexStart + 1);
415                     String type = newRulePath.substring(typeIndexStart + 1, typeIndexEnd);
416 
417                     String newType = "";
418                     if (newRulePath.contains("@access")) {
419                         newType = "%%" + type;
420                     } else {
421                         newType = "%" + type;
422                     }
423                     newRulePath = newRulePath.replace(type, newType);
424                     setPath(getPath().replace(type, newType));
425                 }
426                 fullpath = fullpath.replace(oldRulePath, newRulePath);
427                 setFullPath(fullpath);
428             }
429         }
430     }
431 }
432