1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker **********************************************************************
5*0e209d39SAndroid Build Coastguard Worker * Copyright (c) 2001-2011, International Business Machines
6*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved.
7*0e209d39SAndroid Build Coastguard Worker **********************************************************************
8*0e209d39SAndroid Build Coastguard Worker * Date Name Description
9*0e209d39SAndroid Build Coastguard Worker * 11/19/2001 aliu Creation.
10*0e209d39SAndroid Build Coastguard Worker **********************************************************************
11*0e209d39SAndroid Build Coastguard Worker */
12*0e209d39SAndroid Build Coastguard Worker
13*0e209d39SAndroid Build Coastguard Worker #include "unicode/unimatch.h"
14*0e209d39SAndroid Build Coastguard Worker #include "unicode/utf16.h"
15*0e209d39SAndroid Build Coastguard Worker #include "patternprops.h"
16*0e209d39SAndroid Build Coastguard Worker #include "util.h"
17*0e209d39SAndroid Build Coastguard Worker
18*0e209d39SAndroid Build Coastguard Worker // Define char16_t constants using hex for EBCDIC compatibility
19*0e209d39SAndroid Build Coastguard Worker
20*0e209d39SAndroid Build Coastguard Worker static const char16_t BACKSLASH = 0x005C; /*\*/
21*0e209d39SAndroid Build Coastguard Worker static const char16_t UPPER_U = 0x0055; /*U*/
22*0e209d39SAndroid Build Coastguard Worker static const char16_t LOWER_U = 0x0075; /*u*/
23*0e209d39SAndroid Build Coastguard Worker static const char16_t APOSTROPHE = 0x0027; // '\''
24*0e209d39SAndroid Build Coastguard Worker static const char16_t SPACE = 0x0020; // ' '
25*0e209d39SAndroid Build Coastguard Worker
26*0e209d39SAndroid Build Coastguard Worker // "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
27*0e209d39SAndroid Build Coastguard Worker static const char16_t DIGITS[] = {
28*0e209d39SAndroid Build Coastguard Worker 48,49,50,51,52,53,54,55,56,57,
29*0e209d39SAndroid Build Coastguard Worker 65,66,67,68,69,70,71,72,73,74,
30*0e209d39SAndroid Build Coastguard Worker 75,76,77,78,79,80,81,82,83,84,
31*0e209d39SAndroid Build Coastguard Worker 85,86,87,88,89,90
32*0e209d39SAndroid Build Coastguard Worker };
33*0e209d39SAndroid Build Coastguard Worker
34*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
35*0e209d39SAndroid Build Coastguard Worker
appendNumber(UnicodeString & result,int32_t n,int32_t radix,int32_t minDigits)36*0e209d39SAndroid Build Coastguard Worker UnicodeString& ICU_Utility::appendNumber(UnicodeString& result, int32_t n,
37*0e209d39SAndroid Build Coastguard Worker int32_t radix, int32_t minDigits) {
38*0e209d39SAndroid Build Coastguard Worker if (radix < 2 || radix > 36) {
39*0e209d39SAndroid Build Coastguard Worker // Bogus radix
40*0e209d39SAndroid Build Coastguard Worker return result.append((char16_t)63/*?*/);
41*0e209d39SAndroid Build Coastguard Worker }
42*0e209d39SAndroid Build Coastguard Worker // Handle negatives
43*0e209d39SAndroid Build Coastguard Worker if (n < 0) {
44*0e209d39SAndroid Build Coastguard Worker n = -n;
45*0e209d39SAndroid Build Coastguard Worker result.append((char16_t)45/*-*/);
46*0e209d39SAndroid Build Coastguard Worker }
47*0e209d39SAndroid Build Coastguard Worker // First determine the number of digits
48*0e209d39SAndroid Build Coastguard Worker int32_t nn = n;
49*0e209d39SAndroid Build Coastguard Worker int32_t r = 1;
50*0e209d39SAndroid Build Coastguard Worker while (nn >= radix) {
51*0e209d39SAndroid Build Coastguard Worker nn /= radix;
52*0e209d39SAndroid Build Coastguard Worker r *= radix;
53*0e209d39SAndroid Build Coastguard Worker --minDigits;
54*0e209d39SAndroid Build Coastguard Worker }
55*0e209d39SAndroid Build Coastguard Worker // Now generate the digits
56*0e209d39SAndroid Build Coastguard Worker while (--minDigits > 0) {
57*0e209d39SAndroid Build Coastguard Worker result.append(DIGITS[0]);
58*0e209d39SAndroid Build Coastguard Worker }
59*0e209d39SAndroid Build Coastguard Worker while (r > 0) {
60*0e209d39SAndroid Build Coastguard Worker int32_t digit = n / r;
61*0e209d39SAndroid Build Coastguard Worker result.append(DIGITS[digit]);
62*0e209d39SAndroid Build Coastguard Worker n -= digit * r;
63*0e209d39SAndroid Build Coastguard Worker r /= radix;
64*0e209d39SAndroid Build Coastguard Worker }
65*0e209d39SAndroid Build Coastguard Worker return result;
66*0e209d39SAndroid Build Coastguard Worker }
67*0e209d39SAndroid Build Coastguard Worker
isUnprintable(UChar32 c)68*0e209d39SAndroid Build Coastguard Worker UBool ICU_Utility::isUnprintable(UChar32 c) {
69*0e209d39SAndroid Build Coastguard Worker return !(c >= 0x20 && c <= 0x7E);
70*0e209d39SAndroid Build Coastguard Worker }
71*0e209d39SAndroid Build Coastguard Worker
shouldAlwaysBeEscaped(UChar32 c)72*0e209d39SAndroid Build Coastguard Worker UBool ICU_Utility::shouldAlwaysBeEscaped(UChar32 c) {
73*0e209d39SAndroid Build Coastguard Worker if (c < 0x20) {
74*0e209d39SAndroid Build Coastguard Worker return true; // C0 control codes
75*0e209d39SAndroid Build Coastguard Worker } else if (c <= 0x7e) {
76*0e209d39SAndroid Build Coastguard Worker return false; // printable ASCII
77*0e209d39SAndroid Build Coastguard Worker } else if (c <= 0x9f) {
78*0e209d39SAndroid Build Coastguard Worker return true; // C1 control codes
79*0e209d39SAndroid Build Coastguard Worker } else if (c < 0xd800) {
80*0e209d39SAndroid Build Coastguard Worker return false; // most of the BMP
81*0e209d39SAndroid Build Coastguard Worker } else if (c <= 0xdfff || (0xfdd0 <= c && c <= 0xfdef) || (c & 0xfffe) == 0xfffe) {
82*0e209d39SAndroid Build Coastguard Worker return true; // surrogate or noncharacter code points
83*0e209d39SAndroid Build Coastguard Worker } else if (c <= 0x10ffff) {
84*0e209d39SAndroid Build Coastguard Worker return false; // all else
85*0e209d39SAndroid Build Coastguard Worker } else {
86*0e209d39SAndroid Build Coastguard Worker return true; // not a code point
87*0e209d39SAndroid Build Coastguard Worker }
88*0e209d39SAndroid Build Coastguard Worker }
89*0e209d39SAndroid Build Coastguard Worker
escapeUnprintable(UnicodeString & result,UChar32 c)90*0e209d39SAndroid Build Coastguard Worker UBool ICU_Utility::escapeUnprintable(UnicodeString& result, UChar32 c) {
91*0e209d39SAndroid Build Coastguard Worker if (isUnprintable(c)) {
92*0e209d39SAndroid Build Coastguard Worker escape(result, c);
93*0e209d39SAndroid Build Coastguard Worker return true;
94*0e209d39SAndroid Build Coastguard Worker }
95*0e209d39SAndroid Build Coastguard Worker return false;
96*0e209d39SAndroid Build Coastguard Worker }
97*0e209d39SAndroid Build Coastguard Worker
escape(UnicodeString & result,UChar32 c)98*0e209d39SAndroid Build Coastguard Worker UnicodeString &ICU_Utility::escape(UnicodeString& result, UChar32 c) {
99*0e209d39SAndroid Build Coastguard Worker result.append(BACKSLASH);
100*0e209d39SAndroid Build Coastguard Worker if (c & ~0xFFFF) {
101*0e209d39SAndroid Build Coastguard Worker result.append(UPPER_U);
102*0e209d39SAndroid Build Coastguard Worker result.append(DIGITS[0xF&(c>>28)]);
103*0e209d39SAndroid Build Coastguard Worker result.append(DIGITS[0xF&(c>>24)]);
104*0e209d39SAndroid Build Coastguard Worker result.append(DIGITS[0xF&(c>>20)]);
105*0e209d39SAndroid Build Coastguard Worker result.append(DIGITS[0xF&(c>>16)]);
106*0e209d39SAndroid Build Coastguard Worker } else {
107*0e209d39SAndroid Build Coastguard Worker result.append(LOWER_U);
108*0e209d39SAndroid Build Coastguard Worker }
109*0e209d39SAndroid Build Coastguard Worker result.append(DIGITS[0xF&(c>>12)]);
110*0e209d39SAndroid Build Coastguard Worker result.append(DIGITS[0xF&(c>>8)]);
111*0e209d39SAndroid Build Coastguard Worker result.append(DIGITS[0xF&(c>>4)]);
112*0e209d39SAndroid Build Coastguard Worker result.append(DIGITS[0xF&c]);
113*0e209d39SAndroid Build Coastguard Worker return result;
114*0e209d39SAndroid Build Coastguard Worker }
115*0e209d39SAndroid Build Coastguard Worker
116*0e209d39SAndroid Build Coastguard Worker /**
117*0e209d39SAndroid Build Coastguard Worker * Returns the index of a character, ignoring quoted text.
118*0e209d39SAndroid Build Coastguard Worker * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
119*0e209d39SAndroid Build Coastguard Worker * found by a search for 'h'.
120*0e209d39SAndroid Build Coastguard Worker */
121*0e209d39SAndroid Build Coastguard Worker // FOR FUTURE USE. DISABLE FOR NOW for coverage reasons.
122*0e209d39SAndroid Build Coastguard Worker /*
123*0e209d39SAndroid Build Coastguard Worker int32_t ICU_Utility::quotedIndexOf(const UnicodeString& text,
124*0e209d39SAndroid Build Coastguard Worker int32_t start, int32_t limit,
125*0e209d39SAndroid Build Coastguard Worker char16_t charToFind) {
126*0e209d39SAndroid Build Coastguard Worker for (int32_t i=start; i<limit; ++i) {
127*0e209d39SAndroid Build Coastguard Worker char16_t c = text.charAt(i);
128*0e209d39SAndroid Build Coastguard Worker if (c == BACKSLASH) {
129*0e209d39SAndroid Build Coastguard Worker ++i;
130*0e209d39SAndroid Build Coastguard Worker } else if (c == APOSTROPHE) {
131*0e209d39SAndroid Build Coastguard Worker while (++i < limit
132*0e209d39SAndroid Build Coastguard Worker && text.charAt(i) != APOSTROPHE) {}
133*0e209d39SAndroid Build Coastguard Worker } else if (c == charToFind) {
134*0e209d39SAndroid Build Coastguard Worker return i;
135*0e209d39SAndroid Build Coastguard Worker }
136*0e209d39SAndroid Build Coastguard Worker }
137*0e209d39SAndroid Build Coastguard Worker return -1;
138*0e209d39SAndroid Build Coastguard Worker }
139*0e209d39SAndroid Build Coastguard Worker */
140*0e209d39SAndroid Build Coastguard Worker
141*0e209d39SAndroid Build Coastguard Worker /**
142*0e209d39SAndroid Build Coastguard Worker * Skip over a sequence of zero or more white space characters at pos.
143*0e209d39SAndroid Build Coastguard Worker * @param advance if true, advance pos to the first non-white-space
144*0e209d39SAndroid Build Coastguard Worker * character at or after pos, or str.length(), if there is none.
145*0e209d39SAndroid Build Coastguard Worker * Otherwise leave pos unchanged.
146*0e209d39SAndroid Build Coastguard Worker * @return the index of the first non-white-space character at or
147*0e209d39SAndroid Build Coastguard Worker * after pos, or str.length(), if there is none.
148*0e209d39SAndroid Build Coastguard Worker */
skipWhitespace(const UnicodeString & str,int32_t & pos,UBool advance)149*0e209d39SAndroid Build Coastguard Worker int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos,
150*0e209d39SAndroid Build Coastguard Worker UBool advance) {
151*0e209d39SAndroid Build Coastguard Worker int32_t p = pos;
152*0e209d39SAndroid Build Coastguard Worker const char16_t* s = str.getBuffer();
153*0e209d39SAndroid Build Coastguard Worker p = (int32_t)(PatternProps::skipWhiteSpace(s + p, str.length() - p) - s);
154*0e209d39SAndroid Build Coastguard Worker if (advance) {
155*0e209d39SAndroid Build Coastguard Worker pos = p;
156*0e209d39SAndroid Build Coastguard Worker }
157*0e209d39SAndroid Build Coastguard Worker return p;
158*0e209d39SAndroid Build Coastguard Worker }
159*0e209d39SAndroid Build Coastguard Worker
160*0e209d39SAndroid Build Coastguard Worker /**
161*0e209d39SAndroid Build Coastguard Worker * Skip over Pattern_White_Space in a Replaceable.
162*0e209d39SAndroid Build Coastguard Worker * Skipping may be done in the forward or
163*0e209d39SAndroid Build Coastguard Worker * reverse direction. In either case, the leftmost index will be
164*0e209d39SAndroid Build Coastguard Worker * inclusive, and the rightmost index will be exclusive. That is,
165*0e209d39SAndroid Build Coastguard Worker * given a range defined as [start, limit), the call
166*0e209d39SAndroid Build Coastguard Worker * skipWhitespace(text, start, limit) will advance start past leading
167*0e209d39SAndroid Build Coastguard Worker * whitespace, whereas the call skipWhitespace(text, limit, start),
168*0e209d39SAndroid Build Coastguard Worker * will back up limit past trailing whitespace.
169*0e209d39SAndroid Build Coastguard Worker * @param text the text to be analyzed
170*0e209d39SAndroid Build Coastguard Worker * @param pos either the start or limit of a range of 'text', to skip
171*0e209d39SAndroid Build Coastguard Worker * leading or trailing whitespace, respectively
172*0e209d39SAndroid Build Coastguard Worker * @param stop either the limit or start of a range of 'text', to skip
173*0e209d39SAndroid Build Coastguard Worker * leading or trailing whitespace, respectively
174*0e209d39SAndroid Build Coastguard Worker * @return the new start or limit, depending on what was passed in to
175*0e209d39SAndroid Build Coastguard Worker * 'pos'
176*0e209d39SAndroid Build Coastguard Worker */
177*0e209d39SAndroid Build Coastguard Worker //?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons.
178*0e209d39SAndroid Build Coastguard Worker //?int32_t ICU_Utility::skipWhitespace(const Replaceable& text,
179*0e209d39SAndroid Build Coastguard Worker //? int32_t pos, int32_t stop) {
180*0e209d39SAndroid Build Coastguard Worker //? UChar32 c;
181*0e209d39SAndroid Build Coastguard Worker //? UBool isForward = (stop >= pos);
182*0e209d39SAndroid Build Coastguard Worker //?
183*0e209d39SAndroid Build Coastguard Worker //? if (!isForward) {
184*0e209d39SAndroid Build Coastguard Worker //? --pos; // pos is a limit, so back up by one
185*0e209d39SAndroid Build Coastguard Worker //? }
186*0e209d39SAndroid Build Coastguard Worker //?
187*0e209d39SAndroid Build Coastguard Worker //? while (pos != stop &&
188*0e209d39SAndroid Build Coastguard Worker //? PatternProps::isWhiteSpace(c = text.char32At(pos))) {
189*0e209d39SAndroid Build Coastguard Worker //? if (isForward) {
190*0e209d39SAndroid Build Coastguard Worker //? pos += U16_LENGTH(c);
191*0e209d39SAndroid Build Coastguard Worker //? } else {
192*0e209d39SAndroid Build Coastguard Worker //? pos -= U16_LENGTH(c);
193*0e209d39SAndroid Build Coastguard Worker //? }
194*0e209d39SAndroid Build Coastguard Worker //? }
195*0e209d39SAndroid Build Coastguard Worker //?
196*0e209d39SAndroid Build Coastguard Worker //? if (!isForward) {
197*0e209d39SAndroid Build Coastguard Worker //? ++pos; // make pos back into a limit
198*0e209d39SAndroid Build Coastguard Worker //? }
199*0e209d39SAndroid Build Coastguard Worker //?
200*0e209d39SAndroid Build Coastguard Worker //? return pos;
201*0e209d39SAndroid Build Coastguard Worker //?}
202*0e209d39SAndroid Build Coastguard Worker
203*0e209d39SAndroid Build Coastguard Worker /**
204*0e209d39SAndroid Build Coastguard Worker * Parse a single non-whitespace character 'ch', optionally
205*0e209d39SAndroid Build Coastguard Worker * preceded by whitespace.
206*0e209d39SAndroid Build Coastguard Worker * @param id the string to be parsed
207*0e209d39SAndroid Build Coastguard Worker * @param pos INPUT-OUTPUT parameter. On input, pos[0] is the
208*0e209d39SAndroid Build Coastguard Worker * offset of the first character to be parsed. On output, pos[0]
209*0e209d39SAndroid Build Coastguard Worker * is the index after the last parsed character. If the parse
210*0e209d39SAndroid Build Coastguard Worker * fails, pos[0] will be unchanged.
211*0e209d39SAndroid Build Coastguard Worker * @param ch the non-whitespace character to be parsed.
212*0e209d39SAndroid Build Coastguard Worker * @return true if 'ch' is seen preceded by zero or more
213*0e209d39SAndroid Build Coastguard Worker * whitespace characters.
214*0e209d39SAndroid Build Coastguard Worker */
parseChar(const UnicodeString & id,int32_t & pos,char16_t ch)215*0e209d39SAndroid Build Coastguard Worker UBool ICU_Utility::parseChar(const UnicodeString& id, int32_t& pos, char16_t ch) {
216*0e209d39SAndroid Build Coastguard Worker int32_t start = pos;
217*0e209d39SAndroid Build Coastguard Worker skipWhitespace(id, pos, true);
218*0e209d39SAndroid Build Coastguard Worker if (pos == id.length() ||
219*0e209d39SAndroid Build Coastguard Worker id.charAt(pos) != ch) {
220*0e209d39SAndroid Build Coastguard Worker pos = start;
221*0e209d39SAndroid Build Coastguard Worker return false;
222*0e209d39SAndroid Build Coastguard Worker }
223*0e209d39SAndroid Build Coastguard Worker ++pos;
224*0e209d39SAndroid Build Coastguard Worker return true;
225*0e209d39SAndroid Build Coastguard Worker }
226*0e209d39SAndroid Build Coastguard Worker
227*0e209d39SAndroid Build Coastguard Worker /**
228*0e209d39SAndroid Build Coastguard Worker * Parse a pattern string within the given Replaceable and a parsing
229*0e209d39SAndroid Build Coastguard Worker * pattern. Characters are matched literally and case-sensitively
230*0e209d39SAndroid Build Coastguard Worker * except for the following special characters:
231*0e209d39SAndroid Build Coastguard Worker *
232*0e209d39SAndroid Build Coastguard Worker * ~ zero or more Pattern_White_Space chars
233*0e209d39SAndroid Build Coastguard Worker *
234*0e209d39SAndroid Build Coastguard Worker * If end of pattern is reached with all matches along the way,
235*0e209d39SAndroid Build Coastguard Worker * pos is advanced to the first unparsed index and returned.
236*0e209d39SAndroid Build Coastguard Worker * Otherwise -1 is returned.
237*0e209d39SAndroid Build Coastguard Worker * @param pat pattern that controls parsing
238*0e209d39SAndroid Build Coastguard Worker * @param text text to be parsed, starting at index
239*0e209d39SAndroid Build Coastguard Worker * @param index offset to first character to parse
240*0e209d39SAndroid Build Coastguard Worker * @param limit offset after last character to parse
241*0e209d39SAndroid Build Coastguard Worker * @return index after last parsed character, or -1 on parse failure.
242*0e209d39SAndroid Build Coastguard Worker */
parsePattern(const UnicodeString & pat,const Replaceable & text,int32_t index,int32_t limit)243*0e209d39SAndroid Build Coastguard Worker int32_t ICU_Utility::parsePattern(const UnicodeString& pat,
244*0e209d39SAndroid Build Coastguard Worker const Replaceable& text,
245*0e209d39SAndroid Build Coastguard Worker int32_t index,
246*0e209d39SAndroid Build Coastguard Worker int32_t limit) {
247*0e209d39SAndroid Build Coastguard Worker int32_t ipat = 0;
248*0e209d39SAndroid Build Coastguard Worker
249*0e209d39SAndroid Build Coastguard Worker // empty pattern matches immediately
250*0e209d39SAndroid Build Coastguard Worker if (ipat == pat.length()) {
251*0e209d39SAndroid Build Coastguard Worker return index;
252*0e209d39SAndroid Build Coastguard Worker }
253*0e209d39SAndroid Build Coastguard Worker
254*0e209d39SAndroid Build Coastguard Worker UChar32 cpat = pat.char32At(ipat);
255*0e209d39SAndroid Build Coastguard Worker
256*0e209d39SAndroid Build Coastguard Worker while (index < limit) {
257*0e209d39SAndroid Build Coastguard Worker UChar32 c = text.char32At(index);
258*0e209d39SAndroid Build Coastguard Worker
259*0e209d39SAndroid Build Coastguard Worker // parse \s*
260*0e209d39SAndroid Build Coastguard Worker if (cpat == 126 /*~*/) {
261*0e209d39SAndroid Build Coastguard Worker if (PatternProps::isWhiteSpace(c)) {
262*0e209d39SAndroid Build Coastguard Worker index += U16_LENGTH(c);
263*0e209d39SAndroid Build Coastguard Worker continue;
264*0e209d39SAndroid Build Coastguard Worker } else {
265*0e209d39SAndroid Build Coastguard Worker if (++ipat == pat.length()) {
266*0e209d39SAndroid Build Coastguard Worker return index; // success; c unparsed
267*0e209d39SAndroid Build Coastguard Worker }
268*0e209d39SAndroid Build Coastguard Worker // fall thru; process c again with next cpat
269*0e209d39SAndroid Build Coastguard Worker }
270*0e209d39SAndroid Build Coastguard Worker }
271*0e209d39SAndroid Build Coastguard Worker
272*0e209d39SAndroid Build Coastguard Worker // parse literal
273*0e209d39SAndroid Build Coastguard Worker else if (c == cpat) {
274*0e209d39SAndroid Build Coastguard Worker index += U16_LENGTH(c);
275*0e209d39SAndroid Build Coastguard Worker ipat += U16_LENGTH(cpat);
276*0e209d39SAndroid Build Coastguard Worker if (ipat == pat.length()) {
277*0e209d39SAndroid Build Coastguard Worker return index; // success; c parsed
278*0e209d39SAndroid Build Coastguard Worker }
279*0e209d39SAndroid Build Coastguard Worker // fall thru; get next cpat
280*0e209d39SAndroid Build Coastguard Worker }
281*0e209d39SAndroid Build Coastguard Worker
282*0e209d39SAndroid Build Coastguard Worker // match failure of literal
283*0e209d39SAndroid Build Coastguard Worker else {
284*0e209d39SAndroid Build Coastguard Worker return -1;
285*0e209d39SAndroid Build Coastguard Worker }
286*0e209d39SAndroid Build Coastguard Worker
287*0e209d39SAndroid Build Coastguard Worker cpat = pat.char32At(ipat);
288*0e209d39SAndroid Build Coastguard Worker }
289*0e209d39SAndroid Build Coastguard Worker
290*0e209d39SAndroid Build Coastguard Worker return -1; // text ended before end of pat
291*0e209d39SAndroid Build Coastguard Worker }
292*0e209d39SAndroid Build Coastguard Worker
parseAsciiInteger(const UnicodeString & str,int32_t & pos)293*0e209d39SAndroid Build Coastguard Worker int32_t ICU_Utility::parseAsciiInteger(const UnicodeString& str, int32_t& pos) {
294*0e209d39SAndroid Build Coastguard Worker int32_t result = 0;
295*0e209d39SAndroid Build Coastguard Worker char16_t c;
296*0e209d39SAndroid Build Coastguard Worker while (pos < str.length() && (c = str.charAt(pos)) >= u'0' && c <= u'9') {
297*0e209d39SAndroid Build Coastguard Worker result = result * 10 + (c - u'0');
298*0e209d39SAndroid Build Coastguard Worker pos++;
299*0e209d39SAndroid Build Coastguard Worker }
300*0e209d39SAndroid Build Coastguard Worker return result;
301*0e209d39SAndroid Build Coastguard Worker }
302*0e209d39SAndroid Build Coastguard Worker
303*0e209d39SAndroid Build Coastguard Worker /**
304*0e209d39SAndroid Build Coastguard Worker * Append a character to a rule that is being built up. To flush
305*0e209d39SAndroid Build Coastguard Worker * the quoteBuf to rule, make one final call with isLiteral == true.
306*0e209d39SAndroid Build Coastguard Worker * If there is no final character, pass in (UChar32)-1 as c.
307*0e209d39SAndroid Build Coastguard Worker * @param rule the string to append the character to
308*0e209d39SAndroid Build Coastguard Worker * @param c the character to append, or (UChar32)-1 if none.
309*0e209d39SAndroid Build Coastguard Worker * @param isLiteral if true, then the given character should not be
310*0e209d39SAndroid Build Coastguard Worker * quoted or escaped. Usually this means it is a syntactic element
311*0e209d39SAndroid Build Coastguard Worker * such as > or $
312*0e209d39SAndroid Build Coastguard Worker * @param escapeUnprintable if true, then unprintable characters
313*0e209d39SAndroid Build Coastguard Worker * should be escaped using \uxxxx or \Uxxxxxxxx. These escapes will
314*0e209d39SAndroid Build Coastguard Worker * appear outside of quotes.
315*0e209d39SAndroid Build Coastguard Worker * @param quoteBuf a buffer which is used to build up quoted
316*0e209d39SAndroid Build Coastguard Worker * substrings. The caller should initially supply an empty buffer,
317*0e209d39SAndroid Build Coastguard Worker * and thereafter should not modify the buffer. The buffer should be
318*0e209d39SAndroid Build Coastguard Worker * cleared out by, at the end, calling this method with a literal
319*0e209d39SAndroid Build Coastguard Worker * character.
320*0e209d39SAndroid Build Coastguard Worker */
appendToRule(UnicodeString & rule,UChar32 c,UBool isLiteral,UBool escapeUnprintable,UnicodeString & quoteBuf)321*0e209d39SAndroid Build Coastguard Worker void ICU_Utility::appendToRule(UnicodeString& rule,
322*0e209d39SAndroid Build Coastguard Worker UChar32 c,
323*0e209d39SAndroid Build Coastguard Worker UBool isLiteral,
324*0e209d39SAndroid Build Coastguard Worker UBool escapeUnprintable,
325*0e209d39SAndroid Build Coastguard Worker UnicodeString& quoteBuf) {
326*0e209d39SAndroid Build Coastguard Worker // If we are escaping unprintables, then escape them outside
327*0e209d39SAndroid Build Coastguard Worker // quotes. \u and \U are not recognized within quotes. The same
328*0e209d39SAndroid Build Coastguard Worker // logic applies to literals, but literals are never escaped.
329*0e209d39SAndroid Build Coastguard Worker if (isLiteral ||
330*0e209d39SAndroid Build Coastguard Worker (escapeUnprintable && ICU_Utility::isUnprintable(c))) {
331*0e209d39SAndroid Build Coastguard Worker if (quoteBuf.length() > 0) {
332*0e209d39SAndroid Build Coastguard Worker // We prefer backslash APOSTROPHE to double APOSTROPHE
333*0e209d39SAndroid Build Coastguard Worker // (more readable, less similar to ") so if there are
334*0e209d39SAndroid Build Coastguard Worker // double APOSTROPHEs at the ends, we pull them outside
335*0e209d39SAndroid Build Coastguard Worker // of the quote.
336*0e209d39SAndroid Build Coastguard Worker
337*0e209d39SAndroid Build Coastguard Worker // If the first thing in the quoteBuf is APOSTROPHE
338*0e209d39SAndroid Build Coastguard Worker // (doubled) then pull it out.
339*0e209d39SAndroid Build Coastguard Worker while (quoteBuf.length() >= 2 &&
340*0e209d39SAndroid Build Coastguard Worker quoteBuf.charAt(0) == APOSTROPHE &&
341*0e209d39SAndroid Build Coastguard Worker quoteBuf.charAt(1) == APOSTROPHE) {
342*0e209d39SAndroid Build Coastguard Worker rule.append(BACKSLASH).append(APOSTROPHE);
343*0e209d39SAndroid Build Coastguard Worker quoteBuf.remove(0, 2);
344*0e209d39SAndroid Build Coastguard Worker }
345*0e209d39SAndroid Build Coastguard Worker // If the last thing in the quoteBuf is APOSTROPHE
346*0e209d39SAndroid Build Coastguard Worker // (doubled) then remove and count it and add it after.
347*0e209d39SAndroid Build Coastguard Worker int32_t trailingCount = 0;
348*0e209d39SAndroid Build Coastguard Worker while (quoteBuf.length() >= 2 &&
349*0e209d39SAndroid Build Coastguard Worker quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE &&
350*0e209d39SAndroid Build Coastguard Worker quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) {
351*0e209d39SAndroid Build Coastguard Worker quoteBuf.truncate(quoteBuf.length()-2);
352*0e209d39SAndroid Build Coastguard Worker ++trailingCount;
353*0e209d39SAndroid Build Coastguard Worker }
354*0e209d39SAndroid Build Coastguard Worker if (quoteBuf.length() > 0) {
355*0e209d39SAndroid Build Coastguard Worker rule.append(APOSTROPHE);
356*0e209d39SAndroid Build Coastguard Worker rule.append(quoteBuf);
357*0e209d39SAndroid Build Coastguard Worker rule.append(APOSTROPHE);
358*0e209d39SAndroid Build Coastguard Worker quoteBuf.truncate(0);
359*0e209d39SAndroid Build Coastguard Worker }
360*0e209d39SAndroid Build Coastguard Worker while (trailingCount-- > 0) {
361*0e209d39SAndroid Build Coastguard Worker rule.append(BACKSLASH).append(APOSTROPHE);
362*0e209d39SAndroid Build Coastguard Worker }
363*0e209d39SAndroid Build Coastguard Worker }
364*0e209d39SAndroid Build Coastguard Worker if (c != (UChar32)-1) {
365*0e209d39SAndroid Build Coastguard Worker /* Since spaces are ignored during parsing, they are
366*0e209d39SAndroid Build Coastguard Worker * emitted only for readability. We emit one here
367*0e209d39SAndroid Build Coastguard Worker * only if there isn't already one at the end of the
368*0e209d39SAndroid Build Coastguard Worker * rule.
369*0e209d39SAndroid Build Coastguard Worker */
370*0e209d39SAndroid Build Coastguard Worker if (c == SPACE) {
371*0e209d39SAndroid Build Coastguard Worker int32_t len = rule.length();
372*0e209d39SAndroid Build Coastguard Worker if (len > 0 && rule.charAt(len-1) != c) {
373*0e209d39SAndroid Build Coastguard Worker rule.append(c);
374*0e209d39SAndroid Build Coastguard Worker }
375*0e209d39SAndroid Build Coastguard Worker } else if (!escapeUnprintable || !ICU_Utility::escapeUnprintable(rule, c)) {
376*0e209d39SAndroid Build Coastguard Worker rule.append(c);
377*0e209d39SAndroid Build Coastguard Worker }
378*0e209d39SAndroid Build Coastguard Worker }
379*0e209d39SAndroid Build Coastguard Worker }
380*0e209d39SAndroid Build Coastguard Worker
381*0e209d39SAndroid Build Coastguard Worker // Escape ' and '\' and don't begin a quote just for them
382*0e209d39SAndroid Build Coastguard Worker else if (quoteBuf.length() == 0 &&
383*0e209d39SAndroid Build Coastguard Worker (c == APOSTROPHE || c == BACKSLASH)) {
384*0e209d39SAndroid Build Coastguard Worker rule.append(BACKSLASH);
385*0e209d39SAndroid Build Coastguard Worker rule.append(c);
386*0e209d39SAndroid Build Coastguard Worker }
387*0e209d39SAndroid Build Coastguard Worker
388*0e209d39SAndroid Build Coastguard Worker // Specials (printable ascii that isn't [0-9a-zA-Z]) and
389*0e209d39SAndroid Build Coastguard Worker // whitespace need quoting. Also append stuff to quotes if we are
390*0e209d39SAndroid Build Coastguard Worker // building up a quoted substring already.
391*0e209d39SAndroid Build Coastguard Worker else if (quoteBuf.length() > 0 ||
392*0e209d39SAndroid Build Coastguard Worker (c >= 0x0021 && c <= 0x007E &&
393*0e209d39SAndroid Build Coastguard Worker !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) ||
394*0e209d39SAndroid Build Coastguard Worker (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) ||
395*0e209d39SAndroid Build Coastguard Worker (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) ||
396*0e209d39SAndroid Build Coastguard Worker PatternProps::isWhiteSpace(c)) {
397*0e209d39SAndroid Build Coastguard Worker quoteBuf.append(c);
398*0e209d39SAndroid Build Coastguard Worker // Double ' within a quote
399*0e209d39SAndroid Build Coastguard Worker if (c == APOSTROPHE) {
400*0e209d39SAndroid Build Coastguard Worker quoteBuf.append(c);
401*0e209d39SAndroid Build Coastguard Worker }
402*0e209d39SAndroid Build Coastguard Worker }
403*0e209d39SAndroid Build Coastguard Worker
404*0e209d39SAndroid Build Coastguard Worker // Otherwise just append
405*0e209d39SAndroid Build Coastguard Worker else {
406*0e209d39SAndroid Build Coastguard Worker rule.append(c);
407*0e209d39SAndroid Build Coastguard Worker }
408*0e209d39SAndroid Build Coastguard Worker }
409*0e209d39SAndroid Build Coastguard Worker
appendToRule(UnicodeString & rule,const UnicodeString & text,UBool isLiteral,UBool escapeUnprintable,UnicodeString & quoteBuf)410*0e209d39SAndroid Build Coastguard Worker void ICU_Utility::appendToRule(UnicodeString& rule,
411*0e209d39SAndroid Build Coastguard Worker const UnicodeString& text,
412*0e209d39SAndroid Build Coastguard Worker UBool isLiteral,
413*0e209d39SAndroid Build Coastguard Worker UBool escapeUnprintable,
414*0e209d39SAndroid Build Coastguard Worker UnicodeString& quoteBuf) {
415*0e209d39SAndroid Build Coastguard Worker for (int32_t i=0; i<text.length(); ++i) {
416*0e209d39SAndroid Build Coastguard Worker appendToRule(rule, text[i], isLiteral, escapeUnprintable, quoteBuf);
417*0e209d39SAndroid Build Coastguard Worker }
418*0e209d39SAndroid Build Coastguard Worker }
419*0e209d39SAndroid Build Coastguard Worker
420*0e209d39SAndroid Build Coastguard Worker /**
421*0e209d39SAndroid Build Coastguard Worker * Given a matcher reference, which may be null, append its
422*0e209d39SAndroid Build Coastguard Worker * pattern as a literal to the given rule.
423*0e209d39SAndroid Build Coastguard Worker */
appendToRule(UnicodeString & rule,const UnicodeMatcher * matcher,UBool escapeUnprintable,UnicodeString & quoteBuf)424*0e209d39SAndroid Build Coastguard Worker void ICU_Utility::appendToRule(UnicodeString& rule,
425*0e209d39SAndroid Build Coastguard Worker const UnicodeMatcher* matcher,
426*0e209d39SAndroid Build Coastguard Worker UBool escapeUnprintable,
427*0e209d39SAndroid Build Coastguard Worker UnicodeString& quoteBuf) {
428*0e209d39SAndroid Build Coastguard Worker if (matcher != nullptr) {
429*0e209d39SAndroid Build Coastguard Worker UnicodeString pat;
430*0e209d39SAndroid Build Coastguard Worker appendToRule(rule, matcher->toPattern(pat, escapeUnprintable),
431*0e209d39SAndroid Build Coastguard Worker true, escapeUnprintable, quoteBuf);
432*0e209d39SAndroid Build Coastguard Worker }
433*0e209d39SAndroid Build Coastguard Worker }
434*0e209d39SAndroid Build Coastguard Worker
435*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
436