1 package org.unicode.cldr.json; 2 3 import java.text.ParseException; 4 import java.util.ArrayList; 5 import java.util.Set; 6 import java.util.TreeSet; 7 import org.unicode.cldr.json.LdmlConvertRules.SplittableAttributeSpec; 8 import org.unicode.cldr.util.DtdData; 9 import org.unicode.cldr.util.DtdType; 10 import org.unicode.cldr.util.XPathParts; 11 import org.unicode.cldr.util.ZoneParser; 12 13 /** A object to present a CLDR XML item. */ 14 public class CldrItem implements Comparable<CldrItem> { 15 16 private static boolean DEBUG = false; 17 18 /** 19 * Split the path to an array of string, each string represent a segment. 20 * 21 * @param path The path of XML element. 22 * @return array of segments. 23 */ splitPathToSegments(String path)24 private static String[] splitPathToSegments(String path) { 25 // remove leading // 26 if (path.startsWith("//")) { 27 path = path.substring(2); 28 } 29 30 int start = 0; 31 ArrayList<String> segments = new ArrayList<>(); 32 boolean inBracket = false; 33 boolean inBracketQuote = false; 34 for (int pos = start; pos < path.length(); ++pos) { 35 char ch = path.charAt(pos); 36 if (inBracketQuote) { 37 if (ch == '"') { 38 inBracketQuote = false; 39 } 40 } else if (inBracket) { 41 if (ch == ']') { 42 inBracket = false; 43 } else if (ch == '"') { 44 inBracketQuote = true; 45 } 46 } else { 47 if (ch == '[') { 48 inBracket = true; 49 } else if (ch == '/') { 50 segments.add(path.substring(start, pos)); 51 start = pos + 1; 52 } 53 } 54 } 55 segments.add(path.substring(start, path.length())); 56 57 return segments.toArray(new String[segments.size()]); 58 } 59 60 /** 61 * The full path of a CLDR item. 62 * 63 * <p>Comparing to path, this full contains non-distinguishable attributes. 64 */ 65 private String fullPath; 66 67 /** 68 * The resolution path of a CLDR item. 69 * 70 * <p>This path only contains distinguishable attributes that are necessary to identify a CLDR 71 * XML item in the CLDR tree. 72 */ 73 private String path; 74 75 /** 76 * The full path of a CLDR item. 77 * 78 * <p>Comparing to path, this full contains non-distinguishable attributes. 79 */ 80 private String untransformedFullPath; 81 82 /** 83 * The resolution path of a CLDR item. 84 * 85 * <p>This path only contains distinguishable attributes that are necessary to identify a CLDR 86 * XML item in the CLDR tree. 87 */ 88 private String untransformedPath; 89 getUntransformedPath()90 protected String getUntransformedPath() { 91 return untransformedPath; 92 } 93 94 @Override toString()95 public String toString() { 96 return "[CldrItem " + getUntransformedPath() + "]"; 97 } 98 99 /** The value of this CLDR item. */ 100 private String value; 101 CldrItem( final String path, String fullPath, String untransformedPath, String untransformedFullPath, String value)102 CldrItem( 103 final String path, 104 String fullPath, 105 String untransformedPath, 106 String untransformedFullPath, 107 String value) { 108 109 if (DEBUG) { 110 System.out.println("---"); 111 System.out.println(" PATH => " + path); 112 System.out.println("FULLPATH => " + fullPath); 113 System.out.println(" VALUE => " + value); 114 System.out.println("---"); 115 } 116 117 if (path.isEmpty()) { 118 // Should not happen 119 throw new IllegalArgumentException( 120 "empty path with " 121 + fullPath 122 + "|" 123 + untransformedPath 124 + "|" 125 + untransformedFullPath 126 + " = " 127 + value); 128 } 129 130 this.path = path; 131 this.fullPath = fullPath; 132 this.untransformedPath = untransformedPath; 133 this.untransformedFullPath = untransformedFullPath; 134 135 if (value == null) { 136 this.value = ""; 137 } else { 138 this.value = value; 139 } 140 } 141 getFullPath()142 public String getFullPath() { 143 return fullPath; 144 } 145 getPath()146 public String getPath() { 147 return path; 148 } 149 150 /** 151 * Obtain the sortKey string, construct it if not yet. 152 * 153 * @return sort key string. 154 */ getValue()155 public String getValue() { 156 return value; 157 } 158 159 // Zone and time zone element has '/' in attribute value, like 160 // .../zone[@type="America/Adak"]/... 161 // Such element can not be converted to "zone-type-America/Adak" as it is 162 // not url safe. To deal with such issue, two segment are generated. It is 163 // like the original path is written as: 164 // .../zone/America/Adak/... 165 setValue(String value)166 public void setValue(String value) { 167 this.value = value; 168 } 169 setFullPath(String fullPath)170 public void setFullPath(String fullPath) { 171 this.fullPath = fullPath; 172 } 173 174 /** 175 * This function create a node list from a CLDR path. 176 * 177 * <p>Mostly, the node has one-to-one correspondence with path segment. But there are special 178 * cases where one segment can be split to multiple nodes. If necessary, several segments can 179 * also be combined to one node. 180 * 181 * @return A list of node in strict parent-to-child order. 182 * @throws ParseException 183 */ getNodesInPath()184 public ArrayList<CldrNode> getNodesInPath() throws ParseException { 185 String[] pathSegments = splitPathToSegments(path); 186 String[] fullPathSegments = splitPathToSegments(fullPath); 187 assert (pathSegments.length == fullPathSegments.length); 188 ArrayList<CldrNode> nodesInPath = new ArrayList<>(); 189 190 String parent = ""; 191 for (int i = 0; i < pathSegments.length; i++) { 192 CldrNode node = CldrNode.createNode(parent, pathSegments[i], fullPathSegments[i], this); 193 194 // Zone and time zone element has '/' in attribute value, like 195 // .../zone[@type="America/Adak"]/... 196 // Such element can not be converted to "zone-type-America/Adak" as it is 197 // not url safe. To deal with such issue, two segment are generated. It is 198 // like the original path is written as: 199 // .../zone/America/Adak/... 200 String nodeName = node.getName(); 201 if (node.isTimezoneType()) { 202 nodesInPath.add(CldrNode.createNode(parent, node.getName(), node.getName(), this)); 203 String typeValue = node.getDistinguishingAttributes().get("type"); 204 typeValue = typeValue.replaceAll("Asia:Taipei", "Asia/Taipei"); 205 String[] segments = typeValue.split("/"); 206 for (int j = 0; j < segments.length; j++) { 207 CldrNode newNode = 208 CldrNode.createNode(parent, node.getName(), node.getName(), this); 209 if (j == segments.length - 1) { 210 newNode.getDistinguishingAttributes() 211 .putAll(node.getDistinguishingAttributes()); 212 newNode.getDistinguishingAttributes().remove("type"); 213 } 214 newNode.getDistinguishingAttributes().put("type", segments[j]); 215 nodesInPath.add(newNode); 216 } 217 } else { 218 nodesInPath.add(node); 219 } 220 parent = nodeName; 221 } 222 return nodesInPath; 223 } 224 setPath(String path)225 public void setPath(String path) { 226 if (path.isEmpty()) { 227 throw new IllegalArgumentException("empty path"); 228 } 229 this.path = path; 230 } 231 232 /** 233 * Some CLDR items have attributes that should be split before transformation. For examples, 234 * item like: <calendarPreference territories="CN CX" ordering="gregorian chinese"/> should 235 * really be treated as 2 separate items: <calendarPreference territories="CN" 236 * ordering="gregorian chinese"/> <calendarPreference territories="CX" ordering="gregorian 237 * chinese"/> 238 * 239 * @return Array of CldrItem if it can be split, otherwise null if nothing to split. 240 */ split()241 public CldrItem[] split() { 242 XPathParts xpp = XPathParts.getFrozenInstance(path); 243 XPathParts fullxpp = XPathParts.getFrozenInstance(fullPath); 244 XPathParts untransformedxpp = XPathParts.getFrozenInstance(untransformedPath); 245 XPathParts untransformedfullxpp = XPathParts.getFrozenInstance(untransformedFullPath); 246 247 for (SplittableAttributeSpec s : LdmlConvertRules.getSplittableAttrs()) { 248 if (fullxpp.containsElement(s.element) && fullxpp.containsAttribute(s.attribute)) { 249 ArrayList<CldrItem> list = new ArrayList<>(); 250 String wordString = fullxpp.findAttributeValue(s.element, s.attribute); 251 String[] words = wordString.trim().split("\\s+"); 252 Set<String> hadWords = new TreeSet<>(); 253 for (String word : words) { 254 if (hadWords.add(word) == false) { 255 System.err.println( 256 "Warning: Duplicate attribute " + word + " in " + fullPath); 257 continue; 258 } 259 // TODO: Ideally, there would be a separate post-split path transform. 260 261 XPathParts newxpp = xpp.cloneAsThawed(); 262 XPathParts newfullxpp = fullxpp.cloneAsThawed(); 263 XPathParts untransformednewxpp = untransformedxpp.cloneAsThawed(); 264 XPathParts untransformednewfullxpp = untransformedfullxpp.cloneAsThawed(); 265 266 newxpp.setAttribute(s.element, s.attribute, word); 267 newfullxpp.setAttribute(s.element, s.attribute, word); 268 untransformednewxpp.setAttribute(s.element, s.attribute, word); 269 untransformednewfullxpp.setAttribute(s.element, s.attribute, word); 270 271 if (s.attrAsValueAfterSplit != null) { 272 String newValue = 273 fullxpp.findAttributeValue(s.element, s.attrAsValueAfterSplit); 274 newxpp.removeAttribute(s.element, s.attrAsValueAfterSplit); 275 newxpp.removeAttribute(s.element, s.attribute); 276 newxpp.addElement(word); 277 newfullxpp.removeAttribute(s.element, s.attrAsValueAfterSplit); 278 newfullxpp.removeAttribute(s.element, s.attribute); 279 newfullxpp.addElement(word); 280 list.add( 281 new CldrItem( 282 newxpp.toString(), 283 newfullxpp.toString(), 284 untransformednewxpp.toString(), 285 untransformednewfullxpp.toString(), 286 newValue)); 287 } else { 288 list.add( 289 new CldrItem( 290 newxpp.toString(), 291 newfullxpp.toString(), 292 untransformednewxpp.toString(), 293 untransformednewfullxpp.toString(), 294 value)); 295 } 296 } 297 return list.toArray(new CldrItem[list.size()]); 298 } 299 } 300 return null; // nothing to split 301 } 302 303 /** 304 * Check if the element path contains any item that need to be sorted first. 305 * 306 * @return True if the element need to be sorted before further process. 307 */ needsSort()308 public boolean needsSort() { 309 for (String item : LdmlConvertRules.ELEMENT_NEED_SORT) { 310 XPathParts xpp = XPathParts.getFrozenInstance(path); 311 if (xpp.containsElement(item)) { 312 return true; 313 } 314 } 315 return false; 316 } 317 isAliasItem()318 public boolean isAliasItem() { 319 return path.endsWith("/alias"); 320 } 321 322 @Override compareTo(CldrItem otherItem)323 public int compareTo(CldrItem otherItem) { 324 XPathParts thisxpp = XPathParts.getFrozenInstance(untransformedPath); 325 XPathParts otherxpp = XPathParts.getFrozenInstance(otherItem.untransformedFullPath); 326 if (thisxpp.containsElement("zone") && otherxpp.containsElement("zone")) { 327 String[] thisZonePieces = thisxpp.findAttributeValue("zone", "type").split("/"); 328 String[] otherZonePieces = otherxpp.findAttributeValue("zone", "type").split("/"); 329 int result = ZoneParser.regionalCompare.compare(thisZonePieces[0], otherZonePieces[0]); 330 if (result != 0) { 331 return result; 332 } 333 result = thisZonePieces[1].compareTo(otherZonePieces[1]); 334 if (result != 0) { 335 return result; 336 } 337 } 338 339 DtdType fileDtdType; 340 if (thisxpp.getElement(0).equals("supplementalData")) { 341 fileDtdType = DtdType.supplementalData; 342 } else { 343 fileDtdType = DtdType.ldml; 344 } 345 int result = 0; 346 if (thisxpp.getElement(1).equals("weekData") 347 && thisxpp.getElement(2).equals(otherxpp.getElement(2))) { 348 String thisTerritory = thisxpp.findFirstAttributeValue("territories"); 349 String otherTerritory = otherxpp.findFirstAttributeValue("territories"); 350 if (thisTerritory != null && otherTerritory != null) { 351 result = thisTerritory.compareTo(otherTerritory); 352 } 353 if (result != 0) { 354 return result; 355 } 356 } 357 if (thisxpp.getElement(1).equals("measurementData") 358 && thisxpp.getElement(2).equals(otherxpp.getElement(2))) { 359 String thisCategory = thisxpp.findAttributeValue("measurementSystem", "category"); 360 if (thisCategory == null) { 361 thisCategory = ""; 362 } 363 String otherCategory = otherxpp.findAttributeValue("measurementSystem", "category"); 364 if (otherCategory == null) { 365 otherCategory = ""; 366 } 367 if (!thisCategory.equals(otherCategory)) { 368 result = thisCategory.compareTo(otherCategory); 369 return result; 370 } 371 String thisTerritory = thisxpp.findFirstAttributeValue("territories"); 372 String otherTerritory = otherxpp.findFirstAttributeValue("territories"); 373 if (thisTerritory != null && otherTerritory != null) { 374 result = thisTerritory.compareTo(otherTerritory); 375 } 376 if (result != 0) { 377 return result; 378 } 379 } 380 result = 381 DtdData.getInstance(fileDtdType) 382 .getDtdComparator(null) 383 .compare(untransformedPath, otherItem.untransformedPath); 384 return result; 385 } 386 adjustRbnfPath()387 void adjustRbnfPath() { 388 XPathParts xpp = XPathParts.getFrozenInstance(getFullPath()); 389 final String sub = xpp.findAttributeValue("rbnfrule", "value"); 390 if (sub != null) { 391 xpp = xpp.cloneAsThawed(); 392 final String value = getValue(); 393 xpp.removeAttribute(-1, "value"); 394 xpp.addAttribute(sub, value); 395 setFullPath(xpp.toString()); 396 setValue(""); 397 } 398 // ADJUST ACCESS=PRIVATE/PUBLIC BASED ON ICU RULE 399 String fullpath = getFullPath(); 400 if (fullpath.contains("/ruleset")) { 401 int ruleStartIndex = fullpath.indexOf("/ruleset["); 402 String checkString = fullpath.substring(ruleStartIndex); 403 404 int ruleEndIndex = 0; 405 if (checkString.contains("/")) { 406 ruleEndIndex = fullpath.indexOf("/", ruleStartIndex + 1); 407 } 408 if (ruleEndIndex > ruleStartIndex) { 409 String oldRulePath = fullpath.substring(ruleStartIndex, ruleEndIndex); 410 411 String newRulePath = oldRulePath; 412 if (newRulePath.contains("@type")) { 413 int typeIndexStart = newRulePath.indexOf("\"", newRulePath.indexOf("@type")); 414 int typeIndexEnd = newRulePath.indexOf("\"", typeIndexStart + 1); 415 String type = newRulePath.substring(typeIndexStart + 1, typeIndexEnd); 416 417 String newType = ""; 418 if (newRulePath.contains("@access")) { 419 newType = "%%" + type; 420 } else { 421 newType = "%" + type; 422 } 423 newRulePath = newRulePath.replace(type, newType); 424 setPath(getPath().replace(type, newType)); 425 } 426 fullpath = fullpath.replace(oldRulePath, newRulePath); 427 setFullPath(fullpath); 428 } 429 } 430 } 431 } 432