1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2013, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: John Emmons 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.posix; 10 11 import com.ibm.icu.lang.UCharacter; 12 import com.ibm.icu.text.UTF16; 13 import com.ibm.icu.text.UnicodeSet; 14 import java.text.StringCharacterIterator; 15 import java.util.HashMap; 16 import java.util.Iterator; 17 import java.util.Map; 18 import org.unicode.cldr.util.CLDRFile; 19 20 public class POSIXUtilities { 21 22 private static UnicodeSet repertoire = new UnicodeSet(0x0000, 0x10FFFF); 23 private static CLDRFile char_fallbk; 24 private static Map<Integer, String> controlCodeNames = new HashMap<>(); 25 26 // Since UCharacter.getExtendedName() in ICU doesn't provide the names for control characters 27 // we have to force the issue here. Required elements for the POSIX portable character set will 28 // be 29 // used when necessary (in lower case). Otherwise, the name from the Unicode data file is used. initControlCodeNames()30 private static void initControlCodeNames() { 31 controlCodeNames.put(0x0000, "NULL"); 32 controlCodeNames.put(0x0001, "START_OF_HEADING"); 33 controlCodeNames.put(0x0002, "START_OF_TEXT"); 34 controlCodeNames.put(0x0003, "END_OF_TEXT"); 35 controlCodeNames.put(0x0004, "END_OF_TRANSMISSION"); 36 controlCodeNames.put(0x0005, "ENQUIRY"); 37 controlCodeNames.put(0x0006, "ACKNOWLEDGE"); 38 controlCodeNames.put(0x0007, "ALERT"); 39 controlCodeNames.put(0x0008, "BACKSPACE"); 40 controlCodeNames.put(0x0009, "tab"); // Required element for POSIX portable character set 41 controlCodeNames.put( 42 0x000A, "newline"); // Required element for POSIX portable character set 43 controlCodeNames.put( 44 0x000B, "vertical-tab"); // Required element for POSIX portable character set 45 controlCodeNames.put( 46 0x000C, "form-feed"); // Required element for POSIX portable character set 47 controlCodeNames.put( 48 0x000D, "carriage-return"); // Required element for POSIX portable character set 49 controlCodeNames.put(0x000E, "SHIFT_OUT"); 50 controlCodeNames.put(0x000F, "SHIFT_IN"); 51 controlCodeNames.put(0x0010, "DATA_LINK_ESCAPE"); 52 controlCodeNames.put(0x0011, "DEVICE_CONTROL_ONE"); 53 controlCodeNames.put(0x0012, "DEVICE_CONTROL_TWO"); 54 controlCodeNames.put(0x0013, "DEVICE_CONTROL_THREE"); 55 controlCodeNames.put(0x0014, "DEVICE_CONTROL_FOUR"); 56 controlCodeNames.put(0x0015, "NEGATIVE_ACKNOWLEDGE"); 57 controlCodeNames.put(0x0016, "SYNCHRONOUS_IDLE"); 58 controlCodeNames.put(0x0017, "END_OF_TRANSMISSION_BLOCK"); 59 controlCodeNames.put(0x0018, "CANCEL"); 60 controlCodeNames.put(0x0019, "END_OF_MEDIUM"); 61 controlCodeNames.put(0x001A, "SUBSTITUTE"); 62 controlCodeNames.put(0x001B, "ESCAPE"); 63 controlCodeNames.put(0x001C, "INFORMATION_SEPARATOR_FOUR"); 64 controlCodeNames.put(0x001D, "INFORMATION_SEPARATOR_THREE"); 65 controlCodeNames.put(0x001E, "INFORMATION_SEPARATOR_TWO"); 66 controlCodeNames.put(0x001F, "INFORMATION_SEPARATOR_ONE"); 67 controlCodeNames.put(0x007F, "DELETE"); 68 controlCodeNames.put(0x0080, "CONTROL-0080"); 69 controlCodeNames.put(0x0081, "CONTROL-0081"); 70 controlCodeNames.put(0x0082, "BREAK_PERMITTED_HERE"); 71 controlCodeNames.put(0x0083, "NO_BREAK_HERE"); 72 controlCodeNames.put(0x0084, "CONTROL-0084"); 73 controlCodeNames.put(0x0085, "NEXT_LINE"); 74 controlCodeNames.put(0x0086, "START_OF_SELECTED_AREA"); 75 controlCodeNames.put(0x0087, "END_OF_SELECTED_AREA"); 76 controlCodeNames.put(0x0088, "CHARACTER_TABULATION_SET"); 77 controlCodeNames.put(0x0089, "CHARACTER_TABULATION_WITH_JUSTIFICATION"); 78 controlCodeNames.put(0x008A, "LINE_TABULATION_SET"); 79 controlCodeNames.put(0x008B, "PARTIAL_LINE_FORWARD"); 80 controlCodeNames.put(0x008C, "PARTIAL_LINE_BACKWARD"); 81 controlCodeNames.put(0x008D, "REVERSE_LINE_FEED"); 82 controlCodeNames.put(0x008E, "SINGLE_SHIFT_TWO"); 83 controlCodeNames.put(0x008F, "SINGLE_SHIFT_THREE"); 84 controlCodeNames.put(0x0090, "DEVICE_CONTROL_STRING"); 85 controlCodeNames.put(0x0091, "PRIVATE_USE_ONE"); 86 controlCodeNames.put(0x0092, "PRIVATE_USE_TWO"); 87 controlCodeNames.put(0x0093, "SET_TRANSMIT_STATE"); 88 controlCodeNames.put(0x0094, "CANCEL_CHARACTER"); 89 controlCodeNames.put(0x0095, "MESSAGE_WAITING"); 90 controlCodeNames.put(0x0096, "START_OF_GUARDED_AREA"); 91 controlCodeNames.put(0x0097, "END_OF_GUARDED_AREA"); 92 controlCodeNames.put(0x0098, "START_OF_STRING"); 93 controlCodeNames.put(0x0099, "CONTROL-0099"); 94 controlCodeNames.put(0x009A, "SINGLE_CHARACTER_INTRODUCER"); 95 controlCodeNames.put(0x009B, "CONTROL_SEQUENCE_INTRODUCER"); 96 controlCodeNames.put(0x009C, "STRING_TERMINATOR"); 97 controlCodeNames.put(0x009D, "OPERATING_SYSTEM_COMMAND"); 98 controlCodeNames.put(0x009E, "PRIVACY_MESSAGE"); 99 controlCodeNames.put(0x009F, "APPLICATION_PROGRAM_COMMAND"); 100 } 101 setRepertoire(UnicodeSet rep)102 public static void setRepertoire(UnicodeSet rep) { 103 repertoire = rep; 104 } 105 setCharFallback(CLDRFile fallbk)106 public static void setCharFallback(CLDRFile fallbk) { 107 char_fallbk = fallbk; 108 } 109 POSIXContraction(String s)110 public static String POSIXContraction(String s) { 111 int cp; 112 StringBuffer result = new StringBuffer(); 113 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 114 cp = UTF16.charAt(s, i); 115 result.append(POSIXCharName(cp)); 116 } 117 return result.toString().replaceAll("><", "-"); 118 } 119 POSIXCharName(String s)120 public static String POSIXCharName(String s) { 121 int cp; 122 StringBuffer result = new StringBuffer(); 123 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 124 cp = UTF16.charAt(s, i); 125 result.append(POSIXCharName(cp)); 126 } 127 return result.toString(); 128 } 129 POSIXCharName(int cp)130 public static String POSIXCharName(int cp) { 131 132 StringBuffer result = new StringBuffer(); 133 result.append("<"); 134 if ((cp >= 0x0041 && cp <= 0x005A) || (cp >= 0x0061 && cp <= 0x007A)) // Latin letters 135 result.append((char) cp); 136 else if (cp >= 0x0030 && cp <= 0x0039) // digits 137 { 138 String n = UCharacter.getExtendedName(cp); 139 result.append(n.replaceAll(" ", "_").replaceAll("DIGIT_", "").toLowerCase()); 140 } else if ((cp >= 0x0000 && cp <= 0x001F) || (cp >= 0x007F && cp <= 0x009F)) { // Controls 141 if (controlCodeNames.isEmpty()) { 142 initControlCodeNames(); 143 } 144 result.append(controlCodeNames.get(cp)); 145 } else if (cp == 0x0020) 146 result.append("space"); // Required elements for POSIX portable character set 147 else // everything else 148 { 149 String n = UCharacter.getExtendedName(cp); 150 result.append( 151 n.replaceAll(" ", "_").replaceAll("<", "").replaceAll(">", "").toUpperCase()); 152 } 153 154 int i = result.indexOf("_("); 155 if (i >= 0) result.setLength(i); 156 157 result.append(">"); 158 159 if (!repertoire.contains(cp)) { 160 System.out.println( 161 "WARNING: character " + result.toString() + " is not in the target codeset."); 162 163 String substituteString = ""; 164 boolean SubFound = false; 165 String SearchLocation = 166 "//supplementalData/characters/character-fallback/character[@value=\"" 167 + UCharacter.toString(cp) 168 + "\"]/substitute"; 169 170 for (Iterator<String> it = 171 char_fallbk.iterator(SearchLocation, char_fallbk.getComparator()); 172 it.hasNext() && !SubFound; ) { 173 String path = it.next(); 174 substituteString = char_fallbk.getStringValue(path); 175 if (repertoire.containsAll(substituteString)) SubFound = true; 176 } 177 178 if (SubFound) { 179 System.out.println( 180 " Substituted: " + POSIXUtilities.POSIXCharName(substituteString)); 181 result = new StringBuffer(POSIXUtilities.POSIXCharName(substituteString)); 182 } else 183 System.out.println( 184 " No acceptable substitute found. The resulting locale source may not compile."); 185 } 186 187 return result.toString(); 188 } 189 POSIXCharFullName(String s)190 public static String POSIXCharFullName(String s) { 191 int cp; 192 StringBuffer result = new StringBuffer(); 193 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 194 cp = UTF16.charAt(s, i); 195 result.append(POSIXCharFullName(cp)); 196 } 197 return result.toString(); 198 } 199 POSIXCharFullName(int cp)200 public static String POSIXCharFullName(int cp) { 201 StringBuffer result = new StringBuffer(); 202 result.append("<"); 203 String n = UCharacter.getExtendedName(cp); 204 result.append(n.replaceAll(" ", "_").replaceAll("<", "").replaceAll(">", "").toUpperCase()); 205 206 int i = result.indexOf("_("); 207 if (i >= 0) result.setLength(i); 208 209 result.append(">"); 210 211 return result.toString(); 212 } 213 214 // POSIXCharNameNP replaces all non-portable characters with their expanded POSIX character 215 // name. 216 POSIXCharNameNP(String s)217 public static String POSIXCharNameNP(String s) { 218 int cp; 219 StringBuffer result = new StringBuffer(); 220 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 221 cp = UTF16.charAt(s, i); 222 if (cp <= 0x007F) result.append((char) cp); 223 else result.append(POSIXCharName(cp)); 224 } 225 return result.toString(); 226 } 227 POSIXDateTimeFormat(String s, boolean UseAltDigits, POSIXVariant variant)228 public static String POSIXDateTimeFormat(String s, boolean UseAltDigits, POSIXVariant variant) { 229 230 // This is an array of the POSIX date / time field descriptors and their corresponding 231 // representations 232 // in LDML. We use these to replace the LDML fields with POSIX field descriptors. 233 234 String[][] FieldDescriptors = { 235 {"/d/", "<SOLIDUS>%d<SOLIDUS>", "<SOLIDUS>%d<SOLIDUS>", "<SOLIDUS>%d<SOLIDUS>"}, 236 {"/", "<SOLIDUS>", "<SOLIDUS>", "<SOLIDUS>"}, 237 {"DDD", "%j", "%j", "%j"}, 238 {"EEEE", "%A", "%A", "%A"}, 239 {"EEE", "%a", "%a", "%a"}, 240 {"G", "%N", "%N", "%N"}, 241 {"HH", "%H", "%OH", "%H"}, 242 {"H", "%H", "%OH", "%k"}, // solaris defines exact mapping for "H"" 243 {"KK", "%I", "%OI", "%I"}, 244 {"K", "%I", "%OI", "%l"}, 245 {"MMMM", "%B", "%B", "%B"}, 246 {"MMM", "%b", "%b", "%b"}, 247 {"MM", "%m", "%Om", "%m"}, 248 {"M", "%m", "%Om", "%m"}, 249 {"VVVV", "%Z", "%Z", "%Z"}, 250 {"V", "%Z", "%Z", "%Z"}, 251 {"a", "%p", "%p", "%p"}, 252 {"dd", "%d", "%Od", "%d"}, 253 {"d", "%e", "%Oe", "%e"}, 254 {"hh", "%I", "%OI", "%I"}, 255 {"h", "%I", "%OI", "%l"}, // solaris defines exact mapping for "h" 256 {"kk", "%H", "%OH", "%H"}, 257 {"k", "%H", "%OH", "%k"}, 258 {"mm", "%M", "%OM", "%M"}, 259 {"m", "%M", "%OM", "%M"}, 260 {"vvvv", "%Z", "%Z", "%Z"}, 261 {"v", "%Z", "%Z", "%Z"}, 262 {"yyyy", "%Y", "%Oy", "%Y"}, 263 {"yy", "%y", "%Oy", "%y"}, 264 {"y", "%Y", "%Oy", "%Y"}, 265 {"zzzz", "%Z", "%Z", "%Z"}, 266 {"zzz", "%Z", "%Z", "%Z"}, 267 {"zz", "%Z", "%Z", "%Z"}, 268 {"z", "%Z", "%Z", "%Z"}, 269 {"ss", "%S", "%OS", "%S"}, 270 {"s", "%S", "%OS", "%S"} 271 }; 272 273 boolean inquotes = false; 274 StringBuffer result = new StringBuffer(""); 275 276 for (int pos = 0; pos < s.length(); ) { 277 boolean replaced = false; 278 for (int i = 0; i < FieldDescriptors.length && !replaced && !inquotes; i++) { 279 if (s.indexOf(FieldDescriptors[i][0], pos) == pos) { 280 if (UseAltDigits) result.append(FieldDescriptors[i][2]); 281 else if (variant.platform.equals(POSIXVariant.SOLARIS)) 282 result.append(FieldDescriptors[i][3]); 283 else result.append(FieldDescriptors[i][1]); 284 replaced = true; 285 pos += FieldDescriptors[i][0].length(); 286 } 287 } 288 289 if (!replaced) { 290 if (s.charAt(pos) == '\'') { 291 if (pos < (s.length() - 1) && s.charAt(pos + 1) == '\'') { 292 result.append('\''); 293 pos++; 294 } else inquotes = !inquotes; 295 } else result.append(s.charAt(pos)); 296 pos++; 297 } 298 } 299 return result.toString(); 300 } 301 POSIXGrouping(String grouping_pattern)302 public static String POSIXGrouping(String grouping_pattern) { 303 304 // Parse the decimal pattern to get the number of digits to use in the POSIX style pattern. 305 306 int i = grouping_pattern.indexOf("."); 307 int j; 308 boolean first_grouping = true; 309 String result; 310 311 if (i < 0) result = "-1"; 312 else { 313 result = new String(); 314 while ((j = grouping_pattern.lastIndexOf(",", i - 1)) > 0) { 315 if (!first_grouping) result = result.concat(";"); 316 Integer num_digits = i - j - 1; 317 result = result.concat(num_digits.toString()); 318 319 first_grouping = false; 320 i = j; 321 } 322 } 323 324 if (result.length() == 0) result = "-1"; 325 326 return result; 327 } 328 isBetween(int a, int b, int c)329 public static boolean isBetween(int a, int b, int c) { 330 return ((a < b && b < c) || (c < b && b < a)); 331 } 332 POSIXYesNoExpr(String s)333 public static String POSIXYesNoExpr(String s) { 334 StringBuffer result = new StringBuffer(); 335 String[] YesNoElements; 336 YesNoElements = s.split(":"); 337 for (int i = 0; i < YesNoElements.length; i++) { 338 String cur = YesNoElements[i]; 339 if (cur.length() >= 1 && cur.toLowerCase().equals(cur)) { 340 if (result.length() > 0) result.append(")|("); 341 else result.append("^(("); 342 343 StringCharacterIterator si = new StringCharacterIterator(cur); 344 boolean OptLastChars = false; 345 for (char c = si.first(); c != StringCharacterIterator.DONE; c = si.next()) { 346 if (c != Character.toUpperCase(c)) { 347 if (si.getIndex() == 1) { 348 result.append("("); 349 OptLastChars = true; 350 } 351 result.append("["); 352 result.append(c); 353 result.append(Character.toUpperCase(c)); 354 result.append("]"); 355 } else result.append(c); 356 } 357 if (OptLastChars) result.append(")?"); 358 } 359 } 360 result.append("))"); 361 return (POSIXCharNameNP(result.toString())); 362 } 363 } 364