1 package org.unicode.cldr.json; 2 3 import com.ibm.icu.impl.Utility; 4 import java.text.ParseException; 5 import java.util.HashMap; 6 import java.util.Map; 7 8 /** CldrNode represent a Element in XML as it appears in a CldrItem's path. */ 9 public class CldrNode { 10 createNode(String parent, String pathSegment, String fullPathSegment)11 public static CldrNode createNode(String parent, String pathSegment, String fullPathSegment) 12 throws ParseException { 13 return createNode(parent, pathSegment, fullPathSegment, null); 14 } 15 createNode( String parent, String pathSegment, String fullPathSegment, CldrItem item)16 public static CldrNode createNode( 17 String parent, String pathSegment, String fullPathSegment, CldrItem item) 18 throws ParseException { 19 CldrNode node = new CldrNode(); 20 node.item = item; 21 node.parent = parent; 22 node.name = extractAttrs(pathSegment, node.distinguishingAttributes); 23 String fullTrunk = extractAttrs(fullPathSegment, node.nondistinguishingAttributes); 24 if (!node.name.equals(fullTrunk)) { 25 throw new ParseException( 26 "Error in parsing \"" + pathSegment + " \":\"" + fullPathSegment, 0); 27 } 28 29 for (String key : node.distinguishingAttributes.keySet()) { 30 node.nondistinguishingAttributes.remove(key); 31 } 32 33 String[] suppressList = LdmlConvertRules.ATTR_SUPPRESS_LIST; 34 35 // let's check if there is anything that can be suppressed 36 // TODO: should hash the parent and pathSegment values so we don't have to linear 37 // search. 38 for (int i = 0; i < suppressList.length; i += 3) { 39 if (node.name.equals(suppressList[i])) { 40 String key = suppressList[i + 2]; 41 String value = node.distinguishingAttributes.get(key); 42 if (value != null && value.equals(suppressList[i + 1])) { 43 node.distinguishingAttributes.remove(key); 44 } 45 } 46 } 47 return node; 48 } 49 50 /** 51 * Extract all the attributes and their value in the path. 52 * 53 * @param pathSegment A complete or partial path. 54 * @param attributes String map to receive attribute mapping. 55 * @return Part of the string before the first attribute. 56 * @throws ParseException 57 */ extractAttrs(String pathSegment, Map<String, String> attributes)58 private static String extractAttrs(String pathSegment, Map<String, String> attributes) 59 throws ParseException { 60 int start = 0; 61 62 String trunk = new String(); 63 while (true) { 64 int ind1 = pathSegment.indexOf("[@", start); 65 if (ind1 < 0) { 66 if (trunk.isEmpty()) { 67 trunk = pathSegment; 68 } 69 break; 70 } 71 if (trunk.isEmpty()) { 72 trunk = pathSegment.substring(0, ind1); 73 } 74 ind1 += 2; 75 int ind2 = pathSegment.indexOf("=", ind1); 76 if (ind2 < 0) { 77 throw new ParseException("Missing '=' in attribute specification.", ind1); 78 } 79 String attr = pathSegment.substring(ind1, ind2); 80 81 ind1 = ind2 + 1; 82 if (pathSegment.charAt(ind1) == '"') { 83 ind1 += 1; 84 ind2 = pathSegment.indexOf("\"]", ind1); 85 } else { 86 ind2 = pathSegment.indexOf("]", ind1); 87 } 88 89 if (ind2 < 0) { 90 throw new ParseException("Unexpected end in attribute specification.", ind1); 91 } 92 93 String value = pathSegment.substring(ind1, ind2); 94 95 start = ind2; 96 97 attributes.put(attr, value); 98 } 99 100 return trunk; 101 } 102 103 /** distinguishing attributes as identified by CLDR tools. */ 104 private Map<String, String> distinguishingAttributes; 105 106 /** non-distinguishing attributes as identified by CLDR tools. */ 107 private Map<String, String> nondistinguishingAttributes; 108 109 /** name of the element. */ 110 private String name; 111 112 /** parent element for this element. */ 113 private String parent; 114 115 /** CldrItem, if any */ 116 private CldrItem item; 117 getUntransformedPath()118 public String getUntransformedPath() { 119 if (item != null) { 120 return item.getUntransformedPath(); 121 } else { 122 return "noitem"; 123 } 124 } 125 126 /** 127 * This name is derived from element name and attributes. Once it is calculated, it is cached in 128 * this variable. 129 */ 130 private String uniqueNodeName; 131 CldrNode()132 private CldrNode() { 133 distinguishingAttributes = new HashMap<>(); 134 nondistinguishingAttributes = new HashMap<>(); 135 } 136 137 /** 138 * Get the string map for attributes that should be treated as values. 139 * 140 * @return String map. 141 */ getAttrAsValueMap()142 public Map<String, String> getAttrAsValueMap() { 143 Map<String, String> attributesAsValues = new HashMap<>(); 144 for (String key : distinguishingAttributes.keySet()) { 145 String keyStr = LdmlConvertRules.getKeyStr(getParent(), name, key); 146 String keyStrMidStar = LdmlConvertRules.getKeyStr(getParent(), "*", key); 147 String keyStr2 = LdmlConvertRules.getKeyStr(name, key); 148 if (LdmlConvertRules.ATTR_AS_VALUE_SET.contains(keyStr) 149 || LdmlConvertRules.ATTR_AS_VALUE_SET.contains(keyStr2)) { 150 if (LdmlConvertRules.COMPACTABLE_ATTR_AS_VALUE_SET.contains(keyStr) 151 || LdmlConvertRules.COMPACTABLE_ATTR_AS_VALUE_SET.contains(keyStrMidStar)) { 152 attributesAsValues.put( 153 LdmlConvertRules.ANONYMOUS_KEY, distinguishingAttributes.get(key)); 154 } else { 155 attributesAsValues.put(key, distinguishingAttributes.get(key)); 156 } 157 } 158 } 159 160 for (String key : nondistinguishingAttributes.keySet()) { 161 if (LdmlConvertRules.IGNORABLE_NONDISTINGUISHING_ATTR_SET.contains(key)) { 162 continue; 163 } 164 String keyStr = LdmlConvertRules.getKeyStr(getParent(), name, key); 165 String keyStrMidStar = LdmlConvertRules.getKeyStr(getParent(), "*", key); 166 if (LdmlConvertRules.COMPACTABLE_ATTR_AS_VALUE_SET.contains(keyStr) 167 || LdmlConvertRules.COMPACTABLE_ATTR_AS_VALUE_SET.contains(keyStrMidStar)) { 168 attributesAsValues.put( 169 LdmlConvertRules.ANONYMOUS_KEY, nondistinguishingAttributes.get(key)); 170 } else { 171 attributesAsValues.put(key, nondistinguishingAttributes.get(key)); 172 } 173 } 174 175 // ADJUST RADIX BASED ON ICU RULE 176 final String radixValue = attributesAsValues.get("radix"); 177 if (radixValue != null) { 178 attributesAsValues.remove("radix"); 179 for (Map.Entry<String, String> attributes : attributesAsValues.entrySet()) { 180 String oldKey = attributes.getKey(); 181 String newValue = attributes.getValue(); 182 String newKey = oldKey + "/" + radixValue; 183 attributesAsValues.remove(oldKey); 184 attributesAsValues.put(newKey, newValue); 185 } 186 } 187 return attributesAsValues; 188 } 189 setDistinguishingAttributes(Map<String, String> distinguishingAttributes)190 public void setDistinguishingAttributes(Map<String, String> distinguishingAttributes) { 191 this.distinguishingAttributes = distinguishingAttributes; 192 } 193 setNondistinguishingAttributes(Map<String, String> nondistinguishingAttributes)194 public void setNondistinguishingAttributes(Map<String, String> nondistinguishingAttributes) { 195 this.nondistinguishingAttributes = nondistinguishingAttributes; 196 } 197 getDistinguishingAttributes()198 public Map<String, String> getDistinguishingAttributes() { 199 return distinguishingAttributes; 200 } 201 getName()202 public String getName() { 203 return name; 204 } 205 getNondistinguishingAttributes()206 public Map<String, String> getNondistinguishingAttributes() { 207 return nondistinguishingAttributes; 208 } 209 210 /** 211 * Construct a name that can be used as key in its container (by incorporating distinguishing 212 * attributes). 213 * 214 * <p>Each segment in CLDR path corresponding to a XML element. Element name itself can not be 215 * used as JSON key because it might not be unique in its container. A set of rules is used here 216 * to construct this key name. Some of the attributes will be used in constructing the key name, 217 * the remaining attributes are returned and should be used to fill the mapping. 218 * 219 * <p>The basic mapping is from <element_name>[@<attr_name>=<attr_value>]+ to 220 * <element_name>-<attr_name>-<attr_value> 221 * 222 * @return A unique name that can be used as key in its container. 223 */ getNodeKeyName()224 public String getNodeKeyName() { 225 if (uniqueNodeName != null) { 226 return uniqueNodeName; 227 } 228 229 // decide the main name 230 StringBuffer strbuf = new StringBuffer(); 231 String lastKey = null; // for err message 232 for (String key : distinguishingAttributes.keySet()) { 233 String attrIdStr = LdmlConvertRules.getKeyStr(getParent(), name, key); 234 String attrIdStr2 = LdmlConvertRules.getKeyStr(name, key); 235 if (LdmlConvertRules.IsSuppresedAttr(attrIdStr)) { 236 continue; 237 } 238 if (LdmlConvertRules.ATTR_AS_VALUE_SET.contains(attrIdStr) 239 || LdmlConvertRules.ATTR_AS_VALUE_SET.contains(attrIdStr2)) { // with * 240 continue; 241 } 242 243 if (!key.equals("alt") 244 && !key.equals("count") 245 && !LdmlConvertRules.NAME_PART_DISTINGUISHING_ATTR_SET.contains(attrIdStr)) { 246 if (strbuf.length() != 0) { 247 throw new IllegalArgumentException( 248 "Can not have more than 1 key values in name: " 249 + "both '" 250 + strbuf 251 + "' (" 252 + lastKey 253 + ") and '" 254 + distinguishingAttributes.get(key) 255 + "' (" 256 + key 257 + "). attrIdStr=" 258 + attrIdStr 259 + " - check LdmlConvertRules.java#NAME_PART_DISTINGUISHING_ATTR_SET"); 260 } 261 strbuf.append(distinguishingAttributes.get(key)); 262 lastKey = key; 263 } 264 } 265 if (strbuf.length() == 0) { 266 strbuf.append(name); 267 } 268 269 // append distinguishing attributes 270 for (String key : distinguishingAttributes.keySet()) { 271 String attrIdStr = LdmlConvertRules.getKeyStr(getParent(), name, key); 272 String attrIdStr2 = LdmlConvertRules.getKeyStr(name, key); 273 if (LdmlConvertRules.IsSuppresedAttr(attrIdStr)) { 274 continue; 275 } 276 if (LdmlConvertRules.ATTR_AS_VALUE_SET.contains(attrIdStr) 277 || LdmlConvertRules.ATTR_AS_VALUE_SET.contains(attrIdStr2)) { 278 continue; 279 } 280 281 if (!key.equals("alt") 282 && !LdmlConvertRules.NAME_PART_DISTINGUISHING_ATTR_SET.contains(attrIdStr)) { 283 continue; 284 } 285 strbuf.append("-"); 286 strbuf.append(key); 287 strbuf.append("-"); 288 strbuf.append(distinguishingAttributes.get(key)); 289 } 290 uniqueNodeName = strbuf.toString(); 291 292 if (uniqueNodeName.length() == 1 && name.equals("character")) { 293 // character attribute has value that can be any unicode character. Those 294 // might not be url safe and can be difficult for user to specify. It is 295 // converted to hex string here. 296 uniqueNodeName = "U+" + Utility.hex(uniqueNodeName.charAt(0), 4); 297 } else if (isTimezoneType()) { 298 // time zone name has GMT+9 type of thing. "+" need to be removed to make 299 // it URL safe. 300 uniqueNodeName = uniqueNodeName.replaceFirst("\\+", ""); 301 } 302 303 return uniqueNodeName; 304 } 305 306 /** 307 * Construct a name that has all distinguishing attributes that should not be ignored. 308 * 309 * <p>Different from getNodeKeyName, this name has include those distinguishing attributes that 310 * will be treated as values. 311 * 312 * @return A distinguishing name for differentiating element. 313 */ getNodeDistinguishingName()314 public String getNodeDistinguishingName() { 315 // decide the main name 316 StringBuffer strbuf = new StringBuffer(); 317 strbuf.append(name); 318 319 // append distinguishing attributes 320 for (String key : distinguishingAttributes.keySet()) { 321 strbuf.append("-"); 322 strbuf.append(key); 323 strbuf.append("-"); 324 strbuf.append(distinguishingAttributes.get(key)); 325 } 326 return strbuf.toString(); 327 } 328 isTimezoneType()329 public boolean isTimezoneType() { 330 return LdmlConvertRules.TIMEZONE_ELEMENT_NAME_SET.contains(name); 331 } 332 333 @Override toString()334 public String toString() { 335 return "[CldrNode " + getParent() + "/" + getNodeDistinguishingName() + "]"; 336 } 337 getParent()338 public String getParent() { 339 return parent; 340 } 341 } 342