xref: /aosp_15_r20/external/icu/icu4c/source/common/util.cpp (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker **********************************************************************
5*0e209d39SAndroid Build Coastguard Worker *   Copyright (c) 2001-2011, International Business Machines
6*0e209d39SAndroid Build Coastguard Worker *   Corporation and others.  All Rights Reserved.
7*0e209d39SAndroid Build Coastguard Worker **********************************************************************
8*0e209d39SAndroid Build Coastguard Worker *   Date        Name        Description
9*0e209d39SAndroid Build Coastguard Worker *   11/19/2001  aliu        Creation.
10*0e209d39SAndroid Build Coastguard Worker **********************************************************************
11*0e209d39SAndroid Build Coastguard Worker */
12*0e209d39SAndroid Build Coastguard Worker 
13*0e209d39SAndroid Build Coastguard Worker #include "unicode/unimatch.h"
14*0e209d39SAndroid Build Coastguard Worker #include "unicode/utf16.h"
15*0e209d39SAndroid Build Coastguard Worker #include "patternprops.h"
16*0e209d39SAndroid Build Coastguard Worker #include "util.h"
17*0e209d39SAndroid Build Coastguard Worker 
18*0e209d39SAndroid Build Coastguard Worker // Define char16_t constants using hex for EBCDIC compatibility
19*0e209d39SAndroid Build Coastguard Worker 
20*0e209d39SAndroid Build Coastguard Worker static const char16_t BACKSLASH  = 0x005C; /*\*/
21*0e209d39SAndroid Build Coastguard Worker static const char16_t UPPER_U    = 0x0055; /*U*/
22*0e209d39SAndroid Build Coastguard Worker static const char16_t LOWER_U    = 0x0075; /*u*/
23*0e209d39SAndroid Build Coastguard Worker static const char16_t APOSTROPHE = 0x0027; // '\''
24*0e209d39SAndroid Build Coastguard Worker static const char16_t SPACE      = 0x0020; // ' '
25*0e209d39SAndroid Build Coastguard Worker 
26*0e209d39SAndroid Build Coastguard Worker // "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
27*0e209d39SAndroid Build Coastguard Worker static const char16_t DIGITS[] = {
28*0e209d39SAndroid Build Coastguard Worker     48,49,50,51,52,53,54,55,56,57,
29*0e209d39SAndroid Build Coastguard Worker     65,66,67,68,69,70,71,72,73,74,
30*0e209d39SAndroid Build Coastguard Worker     75,76,77,78,79,80,81,82,83,84,
31*0e209d39SAndroid Build Coastguard Worker     85,86,87,88,89,90
32*0e209d39SAndroid Build Coastguard Worker };
33*0e209d39SAndroid Build Coastguard Worker 
34*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
35*0e209d39SAndroid Build Coastguard Worker 
appendNumber(UnicodeString & result,int32_t n,int32_t radix,int32_t minDigits)36*0e209d39SAndroid Build Coastguard Worker UnicodeString& ICU_Utility::appendNumber(UnicodeString& result, int32_t n,
37*0e209d39SAndroid Build Coastguard Worker                                      int32_t radix, int32_t minDigits) {
38*0e209d39SAndroid Build Coastguard Worker     if (radix < 2 || radix > 36) {
39*0e209d39SAndroid Build Coastguard Worker         // Bogus radix
40*0e209d39SAndroid Build Coastguard Worker         return result.append((char16_t)63/*?*/);
41*0e209d39SAndroid Build Coastguard Worker     }
42*0e209d39SAndroid Build Coastguard Worker     // Handle negatives
43*0e209d39SAndroid Build Coastguard Worker     if (n < 0) {
44*0e209d39SAndroid Build Coastguard Worker         n = -n;
45*0e209d39SAndroid Build Coastguard Worker         result.append((char16_t)45/*-*/);
46*0e209d39SAndroid Build Coastguard Worker     }
47*0e209d39SAndroid Build Coastguard Worker     // First determine the number of digits
48*0e209d39SAndroid Build Coastguard Worker     int32_t nn = n;
49*0e209d39SAndroid Build Coastguard Worker     int32_t r = 1;
50*0e209d39SAndroid Build Coastguard Worker     while (nn >= radix) {
51*0e209d39SAndroid Build Coastguard Worker         nn /= radix;
52*0e209d39SAndroid Build Coastguard Worker         r *= radix;
53*0e209d39SAndroid Build Coastguard Worker         --minDigits;
54*0e209d39SAndroid Build Coastguard Worker     }
55*0e209d39SAndroid Build Coastguard Worker     // Now generate the digits
56*0e209d39SAndroid Build Coastguard Worker     while (--minDigits > 0) {
57*0e209d39SAndroid Build Coastguard Worker         result.append(DIGITS[0]);
58*0e209d39SAndroid Build Coastguard Worker     }
59*0e209d39SAndroid Build Coastguard Worker     while (r > 0) {
60*0e209d39SAndroid Build Coastguard Worker         int32_t digit = n / r;
61*0e209d39SAndroid Build Coastguard Worker         result.append(DIGITS[digit]);
62*0e209d39SAndroid Build Coastguard Worker         n -= digit * r;
63*0e209d39SAndroid Build Coastguard Worker         r /= radix;
64*0e209d39SAndroid Build Coastguard Worker     }
65*0e209d39SAndroid Build Coastguard Worker     return result;
66*0e209d39SAndroid Build Coastguard Worker }
67*0e209d39SAndroid Build Coastguard Worker 
isUnprintable(UChar32 c)68*0e209d39SAndroid Build Coastguard Worker UBool ICU_Utility::isUnprintable(UChar32 c) {
69*0e209d39SAndroid Build Coastguard Worker     return !(c >= 0x20 && c <= 0x7E);
70*0e209d39SAndroid Build Coastguard Worker }
71*0e209d39SAndroid Build Coastguard Worker 
shouldAlwaysBeEscaped(UChar32 c)72*0e209d39SAndroid Build Coastguard Worker UBool ICU_Utility::shouldAlwaysBeEscaped(UChar32 c) {
73*0e209d39SAndroid Build Coastguard Worker     if (c < 0x20) {
74*0e209d39SAndroid Build Coastguard Worker         return true;  // C0 control codes
75*0e209d39SAndroid Build Coastguard Worker     } else if (c <= 0x7e) {
76*0e209d39SAndroid Build Coastguard Worker         return false;  // printable ASCII
77*0e209d39SAndroid Build Coastguard Worker     } else if (c <= 0x9f) {
78*0e209d39SAndroid Build Coastguard Worker         return true;  // C1 control codes
79*0e209d39SAndroid Build Coastguard Worker     } else if (c < 0xd800) {
80*0e209d39SAndroid Build Coastguard Worker         return false;  // most of the BMP
81*0e209d39SAndroid Build Coastguard Worker     } else if (c <= 0xdfff || (0xfdd0 <= c && c <= 0xfdef) || (c & 0xfffe) == 0xfffe) {
82*0e209d39SAndroid Build Coastguard Worker         return true;  // surrogate or noncharacter code points
83*0e209d39SAndroid Build Coastguard Worker     } else if (c <= 0x10ffff) {
84*0e209d39SAndroid Build Coastguard Worker         return false;  // all else
85*0e209d39SAndroid Build Coastguard Worker     } else {
86*0e209d39SAndroid Build Coastguard Worker         return true;  // not a code point
87*0e209d39SAndroid Build Coastguard Worker     }
88*0e209d39SAndroid Build Coastguard Worker }
89*0e209d39SAndroid Build Coastguard Worker 
escapeUnprintable(UnicodeString & result,UChar32 c)90*0e209d39SAndroid Build Coastguard Worker UBool ICU_Utility::escapeUnprintable(UnicodeString& result, UChar32 c) {
91*0e209d39SAndroid Build Coastguard Worker     if (isUnprintable(c)) {
92*0e209d39SAndroid Build Coastguard Worker         escape(result, c);
93*0e209d39SAndroid Build Coastguard Worker         return true;
94*0e209d39SAndroid Build Coastguard Worker     }
95*0e209d39SAndroid Build Coastguard Worker     return false;
96*0e209d39SAndroid Build Coastguard Worker }
97*0e209d39SAndroid Build Coastguard Worker 
escape(UnicodeString & result,UChar32 c)98*0e209d39SAndroid Build Coastguard Worker UnicodeString &ICU_Utility::escape(UnicodeString& result, UChar32 c) {
99*0e209d39SAndroid Build Coastguard Worker     result.append(BACKSLASH);
100*0e209d39SAndroid Build Coastguard Worker     if (c & ~0xFFFF) {
101*0e209d39SAndroid Build Coastguard Worker         result.append(UPPER_U);
102*0e209d39SAndroid Build Coastguard Worker         result.append(DIGITS[0xF&(c>>28)]);
103*0e209d39SAndroid Build Coastguard Worker         result.append(DIGITS[0xF&(c>>24)]);
104*0e209d39SAndroid Build Coastguard Worker         result.append(DIGITS[0xF&(c>>20)]);
105*0e209d39SAndroid Build Coastguard Worker         result.append(DIGITS[0xF&(c>>16)]);
106*0e209d39SAndroid Build Coastguard Worker     } else {
107*0e209d39SAndroid Build Coastguard Worker         result.append(LOWER_U);
108*0e209d39SAndroid Build Coastguard Worker     }
109*0e209d39SAndroid Build Coastguard Worker     result.append(DIGITS[0xF&(c>>12)]);
110*0e209d39SAndroid Build Coastguard Worker     result.append(DIGITS[0xF&(c>>8)]);
111*0e209d39SAndroid Build Coastguard Worker     result.append(DIGITS[0xF&(c>>4)]);
112*0e209d39SAndroid Build Coastguard Worker     result.append(DIGITS[0xF&c]);
113*0e209d39SAndroid Build Coastguard Worker     return result;
114*0e209d39SAndroid Build Coastguard Worker }
115*0e209d39SAndroid Build Coastguard Worker 
116*0e209d39SAndroid Build Coastguard Worker /**
117*0e209d39SAndroid Build Coastguard Worker  * Returns the index of a character, ignoring quoted text.
118*0e209d39SAndroid Build Coastguard Worker  * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
119*0e209d39SAndroid Build Coastguard Worker  * found by a search for 'h'.
120*0e209d39SAndroid Build Coastguard Worker  */
121*0e209d39SAndroid Build Coastguard Worker // FOR FUTURE USE.  DISABLE FOR NOW for coverage reasons.
122*0e209d39SAndroid Build Coastguard Worker /*
123*0e209d39SAndroid Build Coastguard Worker int32_t ICU_Utility::quotedIndexOf(const UnicodeString& text,
124*0e209d39SAndroid Build Coastguard Worker                                int32_t start, int32_t limit,
125*0e209d39SAndroid Build Coastguard Worker                                char16_t charToFind) {
126*0e209d39SAndroid Build Coastguard Worker     for (int32_t i=start; i<limit; ++i) {
127*0e209d39SAndroid Build Coastguard Worker         char16_t c = text.charAt(i);
128*0e209d39SAndroid Build Coastguard Worker         if (c == BACKSLASH) {
129*0e209d39SAndroid Build Coastguard Worker             ++i;
130*0e209d39SAndroid Build Coastguard Worker         } else if (c == APOSTROPHE) {
131*0e209d39SAndroid Build Coastguard Worker             while (++i < limit
132*0e209d39SAndroid Build Coastguard Worker                    && text.charAt(i) != APOSTROPHE) {}
133*0e209d39SAndroid Build Coastguard Worker         } else if (c == charToFind) {
134*0e209d39SAndroid Build Coastguard Worker             return i;
135*0e209d39SAndroid Build Coastguard Worker         }
136*0e209d39SAndroid Build Coastguard Worker     }
137*0e209d39SAndroid Build Coastguard Worker     return -1;
138*0e209d39SAndroid Build Coastguard Worker }
139*0e209d39SAndroid Build Coastguard Worker */
140*0e209d39SAndroid Build Coastguard Worker 
141*0e209d39SAndroid Build Coastguard Worker /**
142*0e209d39SAndroid Build Coastguard Worker  * Skip over a sequence of zero or more white space characters at pos.
143*0e209d39SAndroid Build Coastguard Worker  * @param advance if true, advance pos to the first non-white-space
144*0e209d39SAndroid Build Coastguard Worker  * character at or after pos, or str.length(), if there is none.
145*0e209d39SAndroid Build Coastguard Worker  * Otherwise leave pos unchanged.
146*0e209d39SAndroid Build Coastguard Worker  * @return the index of the first non-white-space character at or
147*0e209d39SAndroid Build Coastguard Worker  * after pos, or str.length(), if there is none.
148*0e209d39SAndroid Build Coastguard Worker  */
skipWhitespace(const UnicodeString & str,int32_t & pos,UBool advance)149*0e209d39SAndroid Build Coastguard Worker int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos,
150*0e209d39SAndroid Build Coastguard Worker                                     UBool advance) {
151*0e209d39SAndroid Build Coastguard Worker     int32_t p = pos;
152*0e209d39SAndroid Build Coastguard Worker     const char16_t* s = str.getBuffer();
153*0e209d39SAndroid Build Coastguard Worker     p = (int32_t)(PatternProps::skipWhiteSpace(s + p, str.length() - p) - s);
154*0e209d39SAndroid Build Coastguard Worker     if (advance) {
155*0e209d39SAndroid Build Coastguard Worker         pos = p;
156*0e209d39SAndroid Build Coastguard Worker     }
157*0e209d39SAndroid Build Coastguard Worker     return p;
158*0e209d39SAndroid Build Coastguard Worker }
159*0e209d39SAndroid Build Coastguard Worker 
160*0e209d39SAndroid Build Coastguard Worker /**
161*0e209d39SAndroid Build Coastguard Worker  * Skip over Pattern_White_Space in a Replaceable.
162*0e209d39SAndroid Build Coastguard Worker  * Skipping may be done in the forward or
163*0e209d39SAndroid Build Coastguard Worker  * reverse direction.  In either case, the leftmost index will be
164*0e209d39SAndroid Build Coastguard Worker  * inclusive, and the rightmost index will be exclusive.  That is,
165*0e209d39SAndroid Build Coastguard Worker  * given a range defined as [start, limit), the call
166*0e209d39SAndroid Build Coastguard Worker  * skipWhitespace(text, start, limit) will advance start past leading
167*0e209d39SAndroid Build Coastguard Worker  * whitespace, whereas the call skipWhitespace(text, limit, start),
168*0e209d39SAndroid Build Coastguard Worker  * will back up limit past trailing whitespace.
169*0e209d39SAndroid Build Coastguard Worker  * @param text the text to be analyzed
170*0e209d39SAndroid Build Coastguard Worker  * @param pos either the start or limit of a range of 'text', to skip
171*0e209d39SAndroid Build Coastguard Worker  * leading or trailing whitespace, respectively
172*0e209d39SAndroid Build Coastguard Worker  * @param stop either the limit or start of a range of 'text', to skip
173*0e209d39SAndroid Build Coastguard Worker  * leading or trailing whitespace, respectively
174*0e209d39SAndroid Build Coastguard Worker  * @return the new start or limit, depending on what was passed in to
175*0e209d39SAndroid Build Coastguard Worker  * 'pos'
176*0e209d39SAndroid Build Coastguard Worker  */
177*0e209d39SAndroid Build Coastguard Worker //?FOR FUTURE USE.  DISABLE FOR NOW for coverage reasons.
178*0e209d39SAndroid Build Coastguard Worker //?int32_t ICU_Utility::skipWhitespace(const Replaceable& text,
179*0e209d39SAndroid Build Coastguard Worker //?                                    int32_t pos, int32_t stop) {
180*0e209d39SAndroid Build Coastguard Worker //?    UChar32 c;
181*0e209d39SAndroid Build Coastguard Worker //?    UBool isForward = (stop >= pos);
182*0e209d39SAndroid Build Coastguard Worker //?
183*0e209d39SAndroid Build Coastguard Worker //?    if (!isForward) {
184*0e209d39SAndroid Build Coastguard Worker //?        --pos; // pos is a limit, so back up by one
185*0e209d39SAndroid Build Coastguard Worker //?    }
186*0e209d39SAndroid Build Coastguard Worker //?
187*0e209d39SAndroid Build Coastguard Worker //?    while (pos != stop &&
188*0e209d39SAndroid Build Coastguard Worker //?           PatternProps::isWhiteSpace(c = text.char32At(pos))) {
189*0e209d39SAndroid Build Coastguard Worker //?        if (isForward) {
190*0e209d39SAndroid Build Coastguard Worker //?            pos += U16_LENGTH(c);
191*0e209d39SAndroid Build Coastguard Worker //?        } else {
192*0e209d39SAndroid Build Coastguard Worker //?            pos -= U16_LENGTH(c);
193*0e209d39SAndroid Build Coastguard Worker //?        }
194*0e209d39SAndroid Build Coastguard Worker //?    }
195*0e209d39SAndroid Build Coastguard Worker //?
196*0e209d39SAndroid Build Coastguard Worker //?    if (!isForward) {
197*0e209d39SAndroid Build Coastguard Worker //?        ++pos; // make pos back into a limit
198*0e209d39SAndroid Build Coastguard Worker //?    }
199*0e209d39SAndroid Build Coastguard Worker //?
200*0e209d39SAndroid Build Coastguard Worker //?    return pos;
201*0e209d39SAndroid Build Coastguard Worker //?}
202*0e209d39SAndroid Build Coastguard Worker 
203*0e209d39SAndroid Build Coastguard Worker /**
204*0e209d39SAndroid Build Coastguard Worker  * Parse a single non-whitespace character 'ch', optionally
205*0e209d39SAndroid Build Coastguard Worker  * preceded by whitespace.
206*0e209d39SAndroid Build Coastguard Worker  * @param id the string to be parsed
207*0e209d39SAndroid Build Coastguard Worker  * @param pos INPUT-OUTPUT parameter.  On input, pos[0] is the
208*0e209d39SAndroid Build Coastguard Worker  * offset of the first character to be parsed.  On output, pos[0]
209*0e209d39SAndroid Build Coastguard Worker  * is the index after the last parsed character.  If the parse
210*0e209d39SAndroid Build Coastguard Worker  * fails, pos[0] will be unchanged.
211*0e209d39SAndroid Build Coastguard Worker  * @param ch the non-whitespace character to be parsed.
212*0e209d39SAndroid Build Coastguard Worker  * @return true if 'ch' is seen preceded by zero or more
213*0e209d39SAndroid Build Coastguard Worker  * whitespace characters.
214*0e209d39SAndroid Build Coastguard Worker  */
parseChar(const UnicodeString & id,int32_t & pos,char16_t ch)215*0e209d39SAndroid Build Coastguard Worker UBool ICU_Utility::parseChar(const UnicodeString& id, int32_t& pos, char16_t ch) {
216*0e209d39SAndroid Build Coastguard Worker     int32_t start = pos;
217*0e209d39SAndroid Build Coastguard Worker     skipWhitespace(id, pos, true);
218*0e209d39SAndroid Build Coastguard Worker     if (pos == id.length() ||
219*0e209d39SAndroid Build Coastguard Worker         id.charAt(pos) != ch) {
220*0e209d39SAndroid Build Coastguard Worker         pos = start;
221*0e209d39SAndroid Build Coastguard Worker         return false;
222*0e209d39SAndroid Build Coastguard Worker     }
223*0e209d39SAndroid Build Coastguard Worker     ++pos;
224*0e209d39SAndroid Build Coastguard Worker     return true;
225*0e209d39SAndroid Build Coastguard Worker }
226*0e209d39SAndroid Build Coastguard Worker 
227*0e209d39SAndroid Build Coastguard Worker /**
228*0e209d39SAndroid Build Coastguard Worker  * Parse a pattern string within the given Replaceable and a parsing
229*0e209d39SAndroid Build Coastguard Worker  * pattern.  Characters are matched literally and case-sensitively
230*0e209d39SAndroid Build Coastguard Worker  * except for the following special characters:
231*0e209d39SAndroid Build Coastguard Worker  *
232*0e209d39SAndroid Build Coastguard Worker  * ~  zero or more Pattern_White_Space chars
233*0e209d39SAndroid Build Coastguard Worker  *
234*0e209d39SAndroid Build Coastguard Worker  * If end of pattern is reached with all matches along the way,
235*0e209d39SAndroid Build Coastguard Worker  * pos is advanced to the first unparsed index and returned.
236*0e209d39SAndroid Build Coastguard Worker  * Otherwise -1 is returned.
237*0e209d39SAndroid Build Coastguard Worker  * @param pat pattern that controls parsing
238*0e209d39SAndroid Build Coastguard Worker  * @param text text to be parsed, starting at index
239*0e209d39SAndroid Build Coastguard Worker  * @param index offset to first character to parse
240*0e209d39SAndroid Build Coastguard Worker  * @param limit offset after last character to parse
241*0e209d39SAndroid Build Coastguard Worker  * @return index after last parsed character, or -1 on parse failure.
242*0e209d39SAndroid Build Coastguard Worker  */
parsePattern(const UnicodeString & pat,const Replaceable & text,int32_t index,int32_t limit)243*0e209d39SAndroid Build Coastguard Worker int32_t ICU_Utility::parsePattern(const UnicodeString& pat,
244*0e209d39SAndroid Build Coastguard Worker                                   const Replaceable& text,
245*0e209d39SAndroid Build Coastguard Worker                                   int32_t index,
246*0e209d39SAndroid Build Coastguard Worker                                   int32_t limit) {
247*0e209d39SAndroid Build Coastguard Worker     int32_t ipat = 0;
248*0e209d39SAndroid Build Coastguard Worker 
249*0e209d39SAndroid Build Coastguard Worker     // empty pattern matches immediately
250*0e209d39SAndroid Build Coastguard Worker     if (ipat == pat.length()) {
251*0e209d39SAndroid Build Coastguard Worker         return index;
252*0e209d39SAndroid Build Coastguard Worker     }
253*0e209d39SAndroid Build Coastguard Worker 
254*0e209d39SAndroid Build Coastguard Worker     UChar32 cpat = pat.char32At(ipat);
255*0e209d39SAndroid Build Coastguard Worker 
256*0e209d39SAndroid Build Coastguard Worker     while (index < limit) {
257*0e209d39SAndroid Build Coastguard Worker         UChar32 c = text.char32At(index);
258*0e209d39SAndroid Build Coastguard Worker 
259*0e209d39SAndroid Build Coastguard Worker         // parse \s*
260*0e209d39SAndroid Build Coastguard Worker         if (cpat == 126 /*~*/) {
261*0e209d39SAndroid Build Coastguard Worker             if (PatternProps::isWhiteSpace(c)) {
262*0e209d39SAndroid Build Coastguard Worker                 index += U16_LENGTH(c);
263*0e209d39SAndroid Build Coastguard Worker                 continue;
264*0e209d39SAndroid Build Coastguard Worker             } else {
265*0e209d39SAndroid Build Coastguard Worker                 if (++ipat == pat.length()) {
266*0e209d39SAndroid Build Coastguard Worker                     return index; // success; c unparsed
267*0e209d39SAndroid Build Coastguard Worker                 }
268*0e209d39SAndroid Build Coastguard Worker                 // fall thru; process c again with next cpat
269*0e209d39SAndroid Build Coastguard Worker             }
270*0e209d39SAndroid Build Coastguard Worker         }
271*0e209d39SAndroid Build Coastguard Worker 
272*0e209d39SAndroid Build Coastguard Worker         // parse literal
273*0e209d39SAndroid Build Coastguard Worker         else if (c == cpat) {
274*0e209d39SAndroid Build Coastguard Worker             index += U16_LENGTH(c);
275*0e209d39SAndroid Build Coastguard Worker             ipat += U16_LENGTH(cpat);
276*0e209d39SAndroid Build Coastguard Worker             if (ipat == pat.length()) {
277*0e209d39SAndroid Build Coastguard Worker                 return index; // success; c parsed
278*0e209d39SAndroid Build Coastguard Worker             }
279*0e209d39SAndroid Build Coastguard Worker             // fall thru; get next cpat
280*0e209d39SAndroid Build Coastguard Worker         }
281*0e209d39SAndroid Build Coastguard Worker 
282*0e209d39SAndroid Build Coastguard Worker         // match failure of literal
283*0e209d39SAndroid Build Coastguard Worker         else {
284*0e209d39SAndroid Build Coastguard Worker             return -1;
285*0e209d39SAndroid Build Coastguard Worker         }
286*0e209d39SAndroid Build Coastguard Worker 
287*0e209d39SAndroid Build Coastguard Worker         cpat = pat.char32At(ipat);
288*0e209d39SAndroid Build Coastguard Worker     }
289*0e209d39SAndroid Build Coastguard Worker 
290*0e209d39SAndroid Build Coastguard Worker     return -1; // text ended before end of pat
291*0e209d39SAndroid Build Coastguard Worker }
292*0e209d39SAndroid Build Coastguard Worker 
parseAsciiInteger(const UnicodeString & str,int32_t & pos)293*0e209d39SAndroid Build Coastguard Worker int32_t ICU_Utility::parseAsciiInteger(const UnicodeString& str, int32_t& pos) {
294*0e209d39SAndroid Build Coastguard Worker     int32_t result = 0;
295*0e209d39SAndroid Build Coastguard Worker     char16_t c;
296*0e209d39SAndroid Build Coastguard Worker     while (pos < str.length() && (c = str.charAt(pos)) >= u'0' && c <= u'9') {
297*0e209d39SAndroid Build Coastguard Worker         result = result * 10 + (c - u'0');
298*0e209d39SAndroid Build Coastguard Worker         pos++;
299*0e209d39SAndroid Build Coastguard Worker     }
300*0e209d39SAndroid Build Coastguard Worker     return result;
301*0e209d39SAndroid Build Coastguard Worker }
302*0e209d39SAndroid Build Coastguard Worker 
303*0e209d39SAndroid Build Coastguard Worker /**
304*0e209d39SAndroid Build Coastguard Worker  * Append a character to a rule that is being built up.  To flush
305*0e209d39SAndroid Build Coastguard Worker  * the quoteBuf to rule, make one final call with isLiteral == true.
306*0e209d39SAndroid Build Coastguard Worker  * If there is no final character, pass in (UChar32)-1 as c.
307*0e209d39SAndroid Build Coastguard Worker  * @param rule the string to append the character to
308*0e209d39SAndroid Build Coastguard Worker  * @param c the character to append, or (UChar32)-1 if none.
309*0e209d39SAndroid Build Coastguard Worker  * @param isLiteral if true, then the given character should not be
310*0e209d39SAndroid Build Coastguard Worker  * quoted or escaped.  Usually this means it is a syntactic element
311*0e209d39SAndroid Build Coastguard Worker  * such as > or $
312*0e209d39SAndroid Build Coastguard Worker  * @param escapeUnprintable if true, then unprintable characters
313*0e209d39SAndroid Build Coastguard Worker  * should be escaped using \uxxxx or \Uxxxxxxxx.  These escapes will
314*0e209d39SAndroid Build Coastguard Worker  * appear outside of quotes.
315*0e209d39SAndroid Build Coastguard Worker  * @param quoteBuf a buffer which is used to build up quoted
316*0e209d39SAndroid Build Coastguard Worker  * substrings.  The caller should initially supply an empty buffer,
317*0e209d39SAndroid Build Coastguard Worker  * and thereafter should not modify the buffer.  The buffer should be
318*0e209d39SAndroid Build Coastguard Worker  * cleared out by, at the end, calling this method with a literal
319*0e209d39SAndroid Build Coastguard Worker  * character.
320*0e209d39SAndroid Build Coastguard Worker  */
appendToRule(UnicodeString & rule,UChar32 c,UBool isLiteral,UBool escapeUnprintable,UnicodeString & quoteBuf)321*0e209d39SAndroid Build Coastguard Worker void ICU_Utility::appendToRule(UnicodeString& rule,
322*0e209d39SAndroid Build Coastguard Worker                                UChar32 c,
323*0e209d39SAndroid Build Coastguard Worker                                UBool isLiteral,
324*0e209d39SAndroid Build Coastguard Worker                                UBool escapeUnprintable,
325*0e209d39SAndroid Build Coastguard Worker                                UnicodeString& quoteBuf) {
326*0e209d39SAndroid Build Coastguard Worker     // If we are escaping unprintables, then escape them outside
327*0e209d39SAndroid Build Coastguard Worker     // quotes.  \u and \U are not recognized within quotes.  The same
328*0e209d39SAndroid Build Coastguard Worker     // logic applies to literals, but literals are never escaped.
329*0e209d39SAndroid Build Coastguard Worker     if (isLiteral ||
330*0e209d39SAndroid Build Coastguard Worker         (escapeUnprintable && ICU_Utility::isUnprintable(c))) {
331*0e209d39SAndroid Build Coastguard Worker         if (quoteBuf.length() > 0) {
332*0e209d39SAndroid Build Coastguard Worker             // We prefer backslash APOSTROPHE to double APOSTROPHE
333*0e209d39SAndroid Build Coastguard Worker             // (more readable, less similar to ") so if there are
334*0e209d39SAndroid Build Coastguard Worker             // double APOSTROPHEs at the ends, we pull them outside
335*0e209d39SAndroid Build Coastguard Worker             // of the quote.
336*0e209d39SAndroid Build Coastguard Worker 
337*0e209d39SAndroid Build Coastguard Worker             // If the first thing in the quoteBuf is APOSTROPHE
338*0e209d39SAndroid Build Coastguard Worker             // (doubled) then pull it out.
339*0e209d39SAndroid Build Coastguard Worker             while (quoteBuf.length() >= 2 &&
340*0e209d39SAndroid Build Coastguard Worker                    quoteBuf.charAt(0) == APOSTROPHE &&
341*0e209d39SAndroid Build Coastguard Worker                    quoteBuf.charAt(1) == APOSTROPHE) {
342*0e209d39SAndroid Build Coastguard Worker                 rule.append(BACKSLASH).append(APOSTROPHE);
343*0e209d39SAndroid Build Coastguard Worker                 quoteBuf.remove(0, 2);
344*0e209d39SAndroid Build Coastguard Worker             }
345*0e209d39SAndroid Build Coastguard Worker             // If the last thing in the quoteBuf is APOSTROPHE
346*0e209d39SAndroid Build Coastguard Worker             // (doubled) then remove and count it and add it after.
347*0e209d39SAndroid Build Coastguard Worker             int32_t trailingCount = 0;
348*0e209d39SAndroid Build Coastguard Worker             while (quoteBuf.length() >= 2 &&
349*0e209d39SAndroid Build Coastguard Worker                    quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE &&
350*0e209d39SAndroid Build Coastguard Worker                    quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) {
351*0e209d39SAndroid Build Coastguard Worker                 quoteBuf.truncate(quoteBuf.length()-2);
352*0e209d39SAndroid Build Coastguard Worker                 ++trailingCount;
353*0e209d39SAndroid Build Coastguard Worker             }
354*0e209d39SAndroid Build Coastguard Worker             if (quoteBuf.length() > 0) {
355*0e209d39SAndroid Build Coastguard Worker                 rule.append(APOSTROPHE);
356*0e209d39SAndroid Build Coastguard Worker                 rule.append(quoteBuf);
357*0e209d39SAndroid Build Coastguard Worker                 rule.append(APOSTROPHE);
358*0e209d39SAndroid Build Coastguard Worker                 quoteBuf.truncate(0);
359*0e209d39SAndroid Build Coastguard Worker             }
360*0e209d39SAndroid Build Coastguard Worker             while (trailingCount-- > 0) {
361*0e209d39SAndroid Build Coastguard Worker                 rule.append(BACKSLASH).append(APOSTROPHE);
362*0e209d39SAndroid Build Coastguard Worker             }
363*0e209d39SAndroid Build Coastguard Worker         }
364*0e209d39SAndroid Build Coastguard Worker         if (c != (UChar32)-1) {
365*0e209d39SAndroid Build Coastguard Worker             /* Since spaces are ignored during parsing, they are
366*0e209d39SAndroid Build Coastguard Worker              * emitted only for readability.  We emit one here
367*0e209d39SAndroid Build Coastguard Worker              * only if there isn't already one at the end of the
368*0e209d39SAndroid Build Coastguard Worker              * rule.
369*0e209d39SAndroid Build Coastguard Worker              */
370*0e209d39SAndroid Build Coastguard Worker             if (c == SPACE) {
371*0e209d39SAndroid Build Coastguard Worker                 int32_t len = rule.length();
372*0e209d39SAndroid Build Coastguard Worker                 if (len > 0 && rule.charAt(len-1) != c) {
373*0e209d39SAndroid Build Coastguard Worker                     rule.append(c);
374*0e209d39SAndroid Build Coastguard Worker                 }
375*0e209d39SAndroid Build Coastguard Worker             } else if (!escapeUnprintable || !ICU_Utility::escapeUnprintable(rule, c)) {
376*0e209d39SAndroid Build Coastguard Worker                 rule.append(c);
377*0e209d39SAndroid Build Coastguard Worker             }
378*0e209d39SAndroid Build Coastguard Worker         }
379*0e209d39SAndroid Build Coastguard Worker     }
380*0e209d39SAndroid Build Coastguard Worker 
381*0e209d39SAndroid Build Coastguard Worker     // Escape ' and '\' and don't begin a quote just for them
382*0e209d39SAndroid Build Coastguard Worker     else if (quoteBuf.length() == 0 &&
383*0e209d39SAndroid Build Coastguard Worker              (c == APOSTROPHE || c == BACKSLASH)) {
384*0e209d39SAndroid Build Coastguard Worker         rule.append(BACKSLASH);
385*0e209d39SAndroid Build Coastguard Worker         rule.append(c);
386*0e209d39SAndroid Build Coastguard Worker     }
387*0e209d39SAndroid Build Coastguard Worker 
388*0e209d39SAndroid Build Coastguard Worker     // Specials (printable ascii that isn't [0-9a-zA-Z]) and
389*0e209d39SAndroid Build Coastguard Worker     // whitespace need quoting.  Also append stuff to quotes if we are
390*0e209d39SAndroid Build Coastguard Worker     // building up a quoted substring already.
391*0e209d39SAndroid Build Coastguard Worker     else if (quoteBuf.length() > 0 ||
392*0e209d39SAndroid Build Coastguard Worker              (c >= 0x0021 && c <= 0x007E &&
393*0e209d39SAndroid Build Coastguard Worker               !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) ||
394*0e209d39SAndroid Build Coastguard Worker                 (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) ||
395*0e209d39SAndroid Build Coastguard Worker                 (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) ||
396*0e209d39SAndroid Build Coastguard Worker              PatternProps::isWhiteSpace(c)) {
397*0e209d39SAndroid Build Coastguard Worker         quoteBuf.append(c);
398*0e209d39SAndroid Build Coastguard Worker         // Double ' within a quote
399*0e209d39SAndroid Build Coastguard Worker         if (c == APOSTROPHE) {
400*0e209d39SAndroid Build Coastguard Worker             quoteBuf.append(c);
401*0e209d39SAndroid Build Coastguard Worker         }
402*0e209d39SAndroid Build Coastguard Worker     }
403*0e209d39SAndroid Build Coastguard Worker 
404*0e209d39SAndroid Build Coastguard Worker     // Otherwise just append
405*0e209d39SAndroid Build Coastguard Worker     else {
406*0e209d39SAndroid Build Coastguard Worker         rule.append(c);
407*0e209d39SAndroid Build Coastguard Worker     }
408*0e209d39SAndroid Build Coastguard Worker }
409*0e209d39SAndroid Build Coastguard Worker 
appendToRule(UnicodeString & rule,const UnicodeString & text,UBool isLiteral,UBool escapeUnprintable,UnicodeString & quoteBuf)410*0e209d39SAndroid Build Coastguard Worker void ICU_Utility::appendToRule(UnicodeString& rule,
411*0e209d39SAndroid Build Coastguard Worker                                const UnicodeString& text,
412*0e209d39SAndroid Build Coastguard Worker                                UBool isLiteral,
413*0e209d39SAndroid Build Coastguard Worker                                UBool escapeUnprintable,
414*0e209d39SAndroid Build Coastguard Worker                                UnicodeString& quoteBuf) {
415*0e209d39SAndroid Build Coastguard Worker     for (int32_t i=0; i<text.length(); ++i) {
416*0e209d39SAndroid Build Coastguard Worker         appendToRule(rule, text[i], isLiteral, escapeUnprintable, quoteBuf);
417*0e209d39SAndroid Build Coastguard Worker     }
418*0e209d39SAndroid Build Coastguard Worker }
419*0e209d39SAndroid Build Coastguard Worker 
420*0e209d39SAndroid Build Coastguard Worker /**
421*0e209d39SAndroid Build Coastguard Worker  * Given a matcher reference, which may be null, append its
422*0e209d39SAndroid Build Coastguard Worker  * pattern as a literal to the given rule.
423*0e209d39SAndroid Build Coastguard Worker  */
appendToRule(UnicodeString & rule,const UnicodeMatcher * matcher,UBool escapeUnprintable,UnicodeString & quoteBuf)424*0e209d39SAndroid Build Coastguard Worker void ICU_Utility::appendToRule(UnicodeString& rule,
425*0e209d39SAndroid Build Coastguard Worker                                const UnicodeMatcher* matcher,
426*0e209d39SAndroid Build Coastguard Worker                                UBool escapeUnprintable,
427*0e209d39SAndroid Build Coastguard Worker                                UnicodeString& quoteBuf) {
428*0e209d39SAndroid Build Coastguard Worker     if (matcher != nullptr) {
429*0e209d39SAndroid Build Coastguard Worker         UnicodeString pat;
430*0e209d39SAndroid Build Coastguard Worker         appendToRule(rule, matcher->toPattern(pat, escapeUnprintable),
431*0e209d39SAndroid Build Coastguard Worker                      true, escapeUnprintable, quoteBuf);
432*0e209d39SAndroid Build Coastguard Worker     }
433*0e209d39SAndroid Build Coastguard Worker }
434*0e209d39SAndroid Build Coastguard Worker 
435*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
436