xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/posix/POSIXUtilities.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 /*
2  **********************************************************************
3  * Copyright (c) 2002-2013, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: John Emmons
7  **********************************************************************
8  */
9 package org.unicode.cldr.posix;
10 
11 import com.ibm.icu.lang.UCharacter;
12 import com.ibm.icu.text.UTF16;
13 import com.ibm.icu.text.UnicodeSet;
14 import java.text.StringCharacterIterator;
15 import java.util.HashMap;
16 import java.util.Iterator;
17 import java.util.Map;
18 import org.unicode.cldr.util.CLDRFile;
19 
20 public class POSIXUtilities {
21 
22     private static UnicodeSet repertoire = new UnicodeSet(0x0000, 0x10FFFF);
23     private static CLDRFile char_fallbk;
24     private static Map<Integer, String> controlCodeNames = new HashMap<>();
25 
26     // Since UCharacter.getExtendedName() in ICU doesn't provide the names for control characters
27     // we have to force the issue here. Required elements for the POSIX portable character set will
28     // be
29     // used when necessary (in lower case). Otherwise, the name from the Unicode data file is used.
initControlCodeNames()30     private static void initControlCodeNames() {
31         controlCodeNames.put(0x0000, "NULL");
32         controlCodeNames.put(0x0001, "START_OF_HEADING");
33         controlCodeNames.put(0x0002, "START_OF_TEXT");
34         controlCodeNames.put(0x0003, "END_OF_TEXT");
35         controlCodeNames.put(0x0004, "END_OF_TRANSMISSION");
36         controlCodeNames.put(0x0005, "ENQUIRY");
37         controlCodeNames.put(0x0006, "ACKNOWLEDGE");
38         controlCodeNames.put(0x0007, "ALERT");
39         controlCodeNames.put(0x0008, "BACKSPACE");
40         controlCodeNames.put(0x0009, "tab"); // Required element for POSIX portable character set
41         controlCodeNames.put(
42                 0x000A, "newline"); // Required element for POSIX portable character set
43         controlCodeNames.put(
44                 0x000B, "vertical-tab"); // Required element for POSIX portable character set
45         controlCodeNames.put(
46                 0x000C, "form-feed"); // Required element for POSIX portable character set
47         controlCodeNames.put(
48                 0x000D, "carriage-return"); // Required element for POSIX portable character set
49         controlCodeNames.put(0x000E, "SHIFT_OUT");
50         controlCodeNames.put(0x000F, "SHIFT_IN");
51         controlCodeNames.put(0x0010, "DATA_LINK_ESCAPE");
52         controlCodeNames.put(0x0011, "DEVICE_CONTROL_ONE");
53         controlCodeNames.put(0x0012, "DEVICE_CONTROL_TWO");
54         controlCodeNames.put(0x0013, "DEVICE_CONTROL_THREE");
55         controlCodeNames.put(0x0014, "DEVICE_CONTROL_FOUR");
56         controlCodeNames.put(0x0015, "NEGATIVE_ACKNOWLEDGE");
57         controlCodeNames.put(0x0016, "SYNCHRONOUS_IDLE");
58         controlCodeNames.put(0x0017, "END_OF_TRANSMISSION_BLOCK");
59         controlCodeNames.put(0x0018, "CANCEL");
60         controlCodeNames.put(0x0019, "END_OF_MEDIUM");
61         controlCodeNames.put(0x001A, "SUBSTITUTE");
62         controlCodeNames.put(0x001B, "ESCAPE");
63         controlCodeNames.put(0x001C, "INFORMATION_SEPARATOR_FOUR");
64         controlCodeNames.put(0x001D, "INFORMATION_SEPARATOR_THREE");
65         controlCodeNames.put(0x001E, "INFORMATION_SEPARATOR_TWO");
66         controlCodeNames.put(0x001F, "INFORMATION_SEPARATOR_ONE");
67         controlCodeNames.put(0x007F, "DELETE");
68         controlCodeNames.put(0x0080, "CONTROL-0080");
69         controlCodeNames.put(0x0081, "CONTROL-0081");
70         controlCodeNames.put(0x0082, "BREAK_PERMITTED_HERE");
71         controlCodeNames.put(0x0083, "NO_BREAK_HERE");
72         controlCodeNames.put(0x0084, "CONTROL-0084");
73         controlCodeNames.put(0x0085, "NEXT_LINE");
74         controlCodeNames.put(0x0086, "START_OF_SELECTED_AREA");
75         controlCodeNames.put(0x0087, "END_OF_SELECTED_AREA");
76         controlCodeNames.put(0x0088, "CHARACTER_TABULATION_SET");
77         controlCodeNames.put(0x0089, "CHARACTER_TABULATION_WITH_JUSTIFICATION");
78         controlCodeNames.put(0x008A, "LINE_TABULATION_SET");
79         controlCodeNames.put(0x008B, "PARTIAL_LINE_FORWARD");
80         controlCodeNames.put(0x008C, "PARTIAL_LINE_BACKWARD");
81         controlCodeNames.put(0x008D, "REVERSE_LINE_FEED");
82         controlCodeNames.put(0x008E, "SINGLE_SHIFT_TWO");
83         controlCodeNames.put(0x008F, "SINGLE_SHIFT_THREE");
84         controlCodeNames.put(0x0090, "DEVICE_CONTROL_STRING");
85         controlCodeNames.put(0x0091, "PRIVATE_USE_ONE");
86         controlCodeNames.put(0x0092, "PRIVATE_USE_TWO");
87         controlCodeNames.put(0x0093, "SET_TRANSMIT_STATE");
88         controlCodeNames.put(0x0094, "CANCEL_CHARACTER");
89         controlCodeNames.put(0x0095, "MESSAGE_WAITING");
90         controlCodeNames.put(0x0096, "START_OF_GUARDED_AREA");
91         controlCodeNames.put(0x0097, "END_OF_GUARDED_AREA");
92         controlCodeNames.put(0x0098, "START_OF_STRING");
93         controlCodeNames.put(0x0099, "CONTROL-0099");
94         controlCodeNames.put(0x009A, "SINGLE_CHARACTER_INTRODUCER");
95         controlCodeNames.put(0x009B, "CONTROL_SEQUENCE_INTRODUCER");
96         controlCodeNames.put(0x009C, "STRING_TERMINATOR");
97         controlCodeNames.put(0x009D, "OPERATING_SYSTEM_COMMAND");
98         controlCodeNames.put(0x009E, "PRIVACY_MESSAGE");
99         controlCodeNames.put(0x009F, "APPLICATION_PROGRAM_COMMAND");
100     }
101 
setRepertoire(UnicodeSet rep)102     public static void setRepertoire(UnicodeSet rep) {
103         repertoire = rep;
104     }
105 
setCharFallback(CLDRFile fallbk)106     public static void setCharFallback(CLDRFile fallbk) {
107         char_fallbk = fallbk;
108     }
109 
POSIXContraction(String s)110     public static String POSIXContraction(String s) {
111         int cp;
112         StringBuffer result = new StringBuffer();
113         for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
114             cp = UTF16.charAt(s, i);
115             result.append(POSIXCharName(cp));
116         }
117         return result.toString().replaceAll("><", "-");
118     }
119 
POSIXCharName(String s)120     public static String POSIXCharName(String s) {
121         int cp;
122         StringBuffer result = new StringBuffer();
123         for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
124             cp = UTF16.charAt(s, i);
125             result.append(POSIXCharName(cp));
126         }
127         return result.toString();
128     }
129 
POSIXCharName(int cp)130     public static String POSIXCharName(int cp) {
131 
132         StringBuffer result = new StringBuffer();
133         result.append("<");
134         if ((cp >= 0x0041 && cp <= 0x005A) || (cp >= 0x0061 && cp <= 0x007A)) // Latin letters
135         result.append((char) cp);
136         else if (cp >= 0x0030 && cp <= 0x0039) // digits
137         {
138             String n = UCharacter.getExtendedName(cp);
139             result.append(n.replaceAll(" ", "_").replaceAll("DIGIT_", "").toLowerCase());
140         } else if ((cp >= 0x0000 && cp <= 0x001F) || (cp >= 0x007F && cp <= 0x009F)) { // Controls
141             if (controlCodeNames.isEmpty()) {
142                 initControlCodeNames();
143             }
144             result.append(controlCodeNames.get(cp));
145         } else if (cp == 0x0020)
146             result.append("space"); // Required elements for POSIX portable character set
147         else // everything else
148         {
149             String n = UCharacter.getExtendedName(cp);
150             result.append(
151                     n.replaceAll(" ", "_").replaceAll("<", "").replaceAll(">", "").toUpperCase());
152         }
153 
154         int i = result.indexOf("_(");
155         if (i >= 0) result.setLength(i);
156 
157         result.append(">");
158 
159         if (!repertoire.contains(cp)) {
160             System.out.println(
161                     "WARNING: character " + result.toString() + " is not in the target codeset.");
162 
163             String substituteString = "";
164             boolean SubFound = false;
165             String SearchLocation =
166                     "//supplementalData/characters/character-fallback/character[@value=\""
167                             + UCharacter.toString(cp)
168                             + "\"]/substitute";
169 
170             for (Iterator<String> it =
171                             char_fallbk.iterator(SearchLocation, char_fallbk.getComparator());
172                     it.hasNext() && !SubFound; ) {
173                 String path = it.next();
174                 substituteString = char_fallbk.getStringValue(path);
175                 if (repertoire.containsAll(substituteString)) SubFound = true;
176             }
177 
178             if (SubFound) {
179                 System.out.println(
180                         "	Substituted: " + POSIXUtilities.POSIXCharName(substituteString));
181                 result = new StringBuffer(POSIXUtilities.POSIXCharName(substituteString));
182             } else
183                 System.out.println(
184                         "	No acceptable substitute found. The resulting locale source may not compile.");
185         }
186 
187         return result.toString();
188     }
189 
POSIXCharFullName(String s)190     public static String POSIXCharFullName(String s) {
191         int cp;
192         StringBuffer result = new StringBuffer();
193         for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
194             cp = UTF16.charAt(s, i);
195             result.append(POSIXCharFullName(cp));
196         }
197         return result.toString();
198     }
199 
POSIXCharFullName(int cp)200     public static String POSIXCharFullName(int cp) {
201         StringBuffer result = new StringBuffer();
202         result.append("<");
203         String n = UCharacter.getExtendedName(cp);
204         result.append(n.replaceAll(" ", "_").replaceAll("<", "").replaceAll(">", "").toUpperCase());
205 
206         int i = result.indexOf("_(");
207         if (i >= 0) result.setLength(i);
208 
209         result.append(">");
210 
211         return result.toString();
212     }
213 
214     // POSIXCharNameNP replaces all non-portable characters with their expanded POSIX character
215     // name.
216 
POSIXCharNameNP(String s)217     public static String POSIXCharNameNP(String s) {
218         int cp;
219         StringBuffer result = new StringBuffer();
220         for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
221             cp = UTF16.charAt(s, i);
222             if (cp <= 0x007F) result.append((char) cp);
223             else result.append(POSIXCharName(cp));
224         }
225         return result.toString();
226     }
227 
POSIXDateTimeFormat(String s, boolean UseAltDigits, POSIXVariant variant)228     public static String POSIXDateTimeFormat(String s, boolean UseAltDigits, POSIXVariant variant) {
229 
230         // This is an array of the POSIX date / time field descriptors and their corresponding
231         // representations
232         // in LDML. We use these to replace the LDML fields with POSIX field descriptors.
233 
234         String[][] FieldDescriptors = {
235             {"/d/", "<SOLIDUS>%d<SOLIDUS>", "<SOLIDUS>%d<SOLIDUS>", "<SOLIDUS>%d<SOLIDUS>"},
236             {"/", "<SOLIDUS>", "<SOLIDUS>", "<SOLIDUS>"},
237             {"DDD", "%j", "%j", "%j"},
238             {"EEEE", "%A", "%A", "%A"},
239             {"EEE", "%a", "%a", "%a"},
240             {"G", "%N", "%N", "%N"},
241             {"HH", "%H", "%OH", "%H"},
242             {"H", "%H", "%OH", "%k"}, // solaris defines exact mapping for "H""
243             {"KK", "%I", "%OI", "%I"},
244             {"K", "%I", "%OI", "%l"},
245             {"MMMM", "%B", "%B", "%B"},
246             {"MMM", "%b", "%b", "%b"},
247             {"MM", "%m", "%Om", "%m"},
248             {"M", "%m", "%Om", "%m"},
249             {"VVVV", "%Z", "%Z", "%Z"},
250             {"V", "%Z", "%Z", "%Z"},
251             {"a", "%p", "%p", "%p"},
252             {"dd", "%d", "%Od", "%d"},
253             {"d", "%e", "%Oe", "%e"},
254             {"hh", "%I", "%OI", "%I"},
255             {"h", "%I", "%OI", "%l"}, // solaris defines exact mapping for "h"
256             {"kk", "%H", "%OH", "%H"},
257             {"k", "%H", "%OH", "%k"},
258             {"mm", "%M", "%OM", "%M"},
259             {"m", "%M", "%OM", "%M"},
260             {"vvvv", "%Z", "%Z", "%Z"},
261             {"v", "%Z", "%Z", "%Z"},
262             {"yyyy", "%Y", "%Oy", "%Y"},
263             {"yy", "%y", "%Oy", "%y"},
264             {"y", "%Y", "%Oy", "%Y"},
265             {"zzzz", "%Z", "%Z", "%Z"},
266             {"zzz", "%Z", "%Z", "%Z"},
267             {"zz", "%Z", "%Z", "%Z"},
268             {"z", "%Z", "%Z", "%Z"},
269             {"ss", "%S", "%OS", "%S"},
270             {"s", "%S", "%OS", "%S"}
271         };
272 
273         boolean inquotes = false;
274         StringBuffer result = new StringBuffer("");
275 
276         for (int pos = 0; pos < s.length(); ) {
277             boolean replaced = false;
278             for (int i = 0; i < FieldDescriptors.length && !replaced && !inquotes; i++) {
279                 if (s.indexOf(FieldDescriptors[i][0], pos) == pos) {
280                     if (UseAltDigits) result.append(FieldDescriptors[i][2]);
281                     else if (variant.platform.equals(POSIXVariant.SOLARIS))
282                         result.append(FieldDescriptors[i][3]);
283                     else result.append(FieldDescriptors[i][1]);
284                     replaced = true;
285                     pos += FieldDescriptors[i][0].length();
286                 }
287             }
288 
289             if (!replaced) {
290                 if (s.charAt(pos) == '\'') {
291                     if (pos < (s.length() - 1) && s.charAt(pos + 1) == '\'') {
292                         result.append('\'');
293                         pos++;
294                     } else inquotes = !inquotes;
295                 } else result.append(s.charAt(pos));
296                 pos++;
297             }
298         }
299         return result.toString();
300     }
301 
POSIXGrouping(String grouping_pattern)302     public static String POSIXGrouping(String grouping_pattern) {
303 
304         // Parse the decimal pattern to get the number of digits to use in the POSIX style pattern.
305 
306         int i = grouping_pattern.indexOf(".");
307         int j;
308         boolean first_grouping = true;
309         String result;
310 
311         if (i < 0) result = "-1";
312         else {
313             result = new String();
314             while ((j = grouping_pattern.lastIndexOf(",", i - 1)) > 0) {
315                 if (!first_grouping) result = result.concat(";");
316                 Integer num_digits = i - j - 1;
317                 result = result.concat(num_digits.toString());
318 
319                 first_grouping = false;
320                 i = j;
321             }
322         }
323 
324         if (result.length() == 0) result = "-1";
325 
326         return result;
327     }
328 
isBetween(int a, int b, int c)329     public static boolean isBetween(int a, int b, int c) {
330         return ((a < b && b < c) || (c < b && b < a));
331     }
332 
POSIXYesNoExpr(String s)333     public static String POSIXYesNoExpr(String s) {
334         StringBuffer result = new StringBuffer();
335         String[] YesNoElements;
336         YesNoElements = s.split(":");
337         for (int i = 0; i < YesNoElements.length; i++) {
338             String cur = YesNoElements[i];
339             if (cur.length() >= 1 && cur.toLowerCase().equals(cur)) {
340                 if (result.length() > 0) result.append(")|(");
341                 else result.append("^((");
342 
343                 StringCharacterIterator si = new StringCharacterIterator(cur);
344                 boolean OptLastChars = false;
345                 for (char c = si.first(); c != StringCharacterIterator.DONE; c = si.next()) {
346                     if (c != Character.toUpperCase(c)) {
347                         if (si.getIndex() == 1) {
348                             result.append("(");
349                             OptLastChars = true;
350                         }
351                         result.append("[");
352                         result.append(c);
353                         result.append(Character.toUpperCase(c));
354                         result.append("]");
355                     } else result.append(c);
356                 }
357                 if (OptLastChars) result.append(")?");
358             }
359         }
360         result.append("))");
361         return (POSIXCharNameNP(result.toString()));
362     }
363 }
364