xref: /aosp_15_r20/external/pcre/src/pcre2_convert.c (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1*22dc650dSSadaf Ebrahimi /*************************************************
2*22dc650dSSadaf Ebrahimi *      Perl-Compatible Regular Expressions       *
3*22dc650dSSadaf Ebrahimi *************************************************/
4*22dc650dSSadaf Ebrahimi 
5*22dc650dSSadaf Ebrahimi /* PCRE is a library of functions to support regular expressions whose syntax
6*22dc650dSSadaf Ebrahimi and semantics are as close as possible to those of the Perl 5 language.
7*22dc650dSSadaf Ebrahimi 
8*22dc650dSSadaf Ebrahimi                        Written by Philip Hazel
9*22dc650dSSadaf Ebrahimi      Original API code Copyright (c) 1997-2012 University of Cambridge
10*22dc650dSSadaf Ebrahimi           New API code Copyright (c) 2016-2022 University of Cambridge
11*22dc650dSSadaf Ebrahimi 
12*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
13*22dc650dSSadaf Ebrahimi Redistribution and use in source and binary forms, with or without
14*22dc650dSSadaf Ebrahimi modification, are permitted provided that the following conditions are met:
15*22dc650dSSadaf Ebrahimi 
16*22dc650dSSadaf Ebrahimi     * Redistributions of source code must retain the above copyright notice,
17*22dc650dSSadaf Ebrahimi       this list of conditions and the following disclaimer.
18*22dc650dSSadaf Ebrahimi 
19*22dc650dSSadaf Ebrahimi     * Redistributions in binary form must reproduce the above copyright
20*22dc650dSSadaf Ebrahimi       notice, this list of conditions and the following disclaimer in the
21*22dc650dSSadaf Ebrahimi       documentation and/or other materials provided with the distribution.
22*22dc650dSSadaf Ebrahimi 
23*22dc650dSSadaf Ebrahimi     * Neither the name of the University of Cambridge nor the names of its
24*22dc650dSSadaf Ebrahimi       contributors may be used to endorse or promote products derived from
25*22dc650dSSadaf Ebrahimi       this software without specific prior written permission.
26*22dc650dSSadaf Ebrahimi 
27*22dc650dSSadaf Ebrahimi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28*22dc650dSSadaf Ebrahimi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29*22dc650dSSadaf Ebrahimi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30*22dc650dSSadaf Ebrahimi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31*22dc650dSSadaf Ebrahimi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32*22dc650dSSadaf Ebrahimi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33*22dc650dSSadaf Ebrahimi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34*22dc650dSSadaf Ebrahimi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35*22dc650dSSadaf Ebrahimi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36*22dc650dSSadaf Ebrahimi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37*22dc650dSSadaf Ebrahimi POSSIBILITY OF SUCH DAMAGE.
38*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
39*22dc650dSSadaf Ebrahimi */
40*22dc650dSSadaf Ebrahimi 
41*22dc650dSSadaf Ebrahimi 
42*22dc650dSSadaf Ebrahimi #ifdef HAVE_CONFIG_H
43*22dc650dSSadaf Ebrahimi #include "config.h"
44*22dc650dSSadaf Ebrahimi #endif
45*22dc650dSSadaf Ebrahimi 
46*22dc650dSSadaf Ebrahimi #include "pcre2_internal.h"
47*22dc650dSSadaf Ebrahimi 
48*22dc650dSSadaf Ebrahimi #define TYPE_OPTIONS (PCRE2_CONVERT_GLOB| \
49*22dc650dSSadaf Ebrahimi   PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED)
50*22dc650dSSadaf Ebrahimi 
51*22dc650dSSadaf Ebrahimi #define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \
52*22dc650dSSadaf Ebrahimi   PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \
53*22dc650dSSadaf Ebrahimi   PCRE2_CONVERT_GLOB_NO_STARSTAR| \
54*22dc650dSSadaf Ebrahimi   TYPE_OPTIONS)
55*22dc650dSSadaf Ebrahimi 
56*22dc650dSSadaf Ebrahimi #define DUMMY_BUFFER_SIZE 100
57*22dc650dSSadaf Ebrahimi 
58*22dc650dSSadaf Ebrahimi /* Generated pattern fragments */
59*22dc650dSSadaf Ebrahimi 
60*22dc650dSSadaf Ebrahimi #define STR_BACKSLASH_A STR_BACKSLASH STR_A
61*22dc650dSSadaf Ebrahimi #define STR_BACKSLASH_z STR_BACKSLASH STR_z
62*22dc650dSSadaf Ebrahimi #define STR_COLON_RIGHT_SQUARE_BRACKET STR_COLON STR_RIGHT_SQUARE_BRACKET
63*22dc650dSSadaf Ebrahimi #define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN
64*22dc650dSSadaf Ebrahimi #define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS
65*22dc650dSSadaf Ebrahimi #define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS
66*22dc650dSSadaf Ebrahimi #define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
67*22dc650dSSadaf Ebrahimi 
68*22dc650dSSadaf Ebrahimi /* States for POSIX processing */
69*22dc650dSSadaf Ebrahimi 
70*22dc650dSSadaf Ebrahimi enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET,
71*22dc650dSSadaf Ebrahimi        POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED };
72*22dc650dSSadaf Ebrahimi 
73*22dc650dSSadaf Ebrahimi /* Macro to add a character string to the output buffer, checking for overflow. */
74*22dc650dSSadaf Ebrahimi 
75*22dc650dSSadaf Ebrahimi #define PUTCHARS(string) \
76*22dc650dSSadaf Ebrahimi   { \
77*22dc650dSSadaf Ebrahimi   for (s = (char *)(string); *s != 0; s++) \
78*22dc650dSSadaf Ebrahimi     { \
79*22dc650dSSadaf Ebrahimi     if (p >= endp) return PCRE2_ERROR_NOMEMORY; \
80*22dc650dSSadaf Ebrahimi     *p++ = *s; \
81*22dc650dSSadaf Ebrahimi     } \
82*22dc650dSSadaf Ebrahimi   }
83*22dc650dSSadaf Ebrahimi 
84*22dc650dSSadaf Ebrahimi /* Literals that must be escaped: \ ? * + | . ^ $ { } [ ] ( ) */
85*22dc650dSSadaf Ebrahimi 
86*22dc650dSSadaf Ebrahimi static const char *pcre2_escaped_literals =
87*22dc650dSSadaf Ebrahimi   STR_BACKSLASH STR_QUESTION_MARK STR_ASTERISK STR_PLUS
88*22dc650dSSadaf Ebrahimi   STR_VERTICAL_LINE STR_DOT STR_CIRCUMFLEX_ACCENT STR_DOLLAR_SIGN
89*22dc650dSSadaf Ebrahimi   STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
90*22dc650dSSadaf Ebrahimi   STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
91*22dc650dSSadaf Ebrahimi   STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS;
92*22dc650dSSadaf Ebrahimi 
93*22dc650dSSadaf Ebrahimi /* Recognized escaped metacharacters in POSIX basic patterns. */
94*22dc650dSSadaf Ebrahimi 
95*22dc650dSSadaf Ebrahimi static const char *posix_meta_escapes =
96*22dc650dSSadaf Ebrahimi   STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS
97*22dc650dSSadaf Ebrahimi   STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
98*22dc650dSSadaf Ebrahimi   STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;
99*22dc650dSSadaf Ebrahimi 
100*22dc650dSSadaf Ebrahimi 
101*22dc650dSSadaf Ebrahimi 
102*22dc650dSSadaf Ebrahimi /*************************************************
103*22dc650dSSadaf Ebrahimi *           Convert a POSIX pattern              *
104*22dc650dSSadaf Ebrahimi *************************************************/
105*22dc650dSSadaf Ebrahimi 
106*22dc650dSSadaf Ebrahimi /* This function handles both basic and extended POSIX patterns.
107*22dc650dSSadaf Ebrahimi 
108*22dc650dSSadaf Ebrahimi Arguments:
109*22dc650dSSadaf Ebrahimi   pattype        the pattern type
110*22dc650dSSadaf Ebrahimi   pattern        the pattern
111*22dc650dSSadaf Ebrahimi   plength        length in code units
112*22dc650dSSadaf Ebrahimi   utf            TRUE if UTF
113*22dc650dSSadaf Ebrahimi   use_buffer     where to put the output
114*22dc650dSSadaf Ebrahimi   use_length     length of use_buffer
115*22dc650dSSadaf Ebrahimi   bufflenptr     where to put the used length
116*22dc650dSSadaf Ebrahimi   dummyrun       TRUE if a dummy run
117*22dc650dSSadaf Ebrahimi   ccontext       the convert context
118*22dc650dSSadaf Ebrahimi 
119*22dc650dSSadaf Ebrahimi Returns:         0 => success
120*22dc650dSSadaf Ebrahimi                 !0 => error code
121*22dc650dSSadaf Ebrahimi */
122*22dc650dSSadaf Ebrahimi 
123*22dc650dSSadaf Ebrahimi static int
convert_posix(uint32_t pattype,PCRE2_SPTR pattern,PCRE2_SIZE plength,BOOL utf,PCRE2_UCHAR * use_buffer,PCRE2_SIZE use_length,PCRE2_SIZE * bufflenptr,BOOL dummyrun,pcre2_convert_context * ccontext)124*22dc650dSSadaf Ebrahimi convert_posix(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength,
125*22dc650dSSadaf Ebrahimi   BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
126*22dc650dSSadaf Ebrahimi   PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
127*22dc650dSSadaf Ebrahimi {
128*22dc650dSSadaf Ebrahimi char *s;
129*22dc650dSSadaf Ebrahimi PCRE2_SPTR posix = pattern;
130*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *p = use_buffer;
131*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *pp = p;
132*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *endp = p + use_length - 1;  /* Allow for trailing zero */
133*22dc650dSSadaf Ebrahimi PCRE2_SIZE convlength = 0;
134*22dc650dSSadaf Ebrahimi 
135*22dc650dSSadaf Ebrahimi uint32_t bracount = 0;
136*22dc650dSSadaf Ebrahimi uint32_t posix_state = POSIX_START_REGEX;
137*22dc650dSSadaf Ebrahimi uint32_t lastspecial = 0;
138*22dc650dSSadaf Ebrahimi BOOL extended = (pattype & PCRE2_CONVERT_POSIX_EXTENDED) != 0;
139*22dc650dSSadaf Ebrahimi BOOL nextisliteral = FALSE;
140*22dc650dSSadaf Ebrahimi 
141*22dc650dSSadaf Ebrahimi (void)utf;       /* Not used when Unicode not supported */
142*22dc650dSSadaf Ebrahimi (void)ccontext;  /* Not currently used */
143*22dc650dSSadaf Ebrahimi 
144*22dc650dSSadaf Ebrahimi /* Initialize default for error offset as end of input. */
145*22dc650dSSadaf Ebrahimi 
146*22dc650dSSadaf Ebrahimi *bufflenptr = plength;
147*22dc650dSSadaf Ebrahimi PUTCHARS(STR_STAR_NUL);
148*22dc650dSSadaf Ebrahimi 
149*22dc650dSSadaf Ebrahimi /* Now scan the input. */
150*22dc650dSSadaf Ebrahimi 
151*22dc650dSSadaf Ebrahimi while (plength > 0)
152*22dc650dSSadaf Ebrahimi   {
153*22dc650dSSadaf Ebrahimi   uint32_t c, sc;
154*22dc650dSSadaf Ebrahimi   int clength = 1;
155*22dc650dSSadaf Ebrahimi 
156*22dc650dSSadaf Ebrahimi   /* Add in the length of the last item, then, if in the dummy run, pull the
157*22dc650dSSadaf Ebrahimi   pointer back to the start of the (temporary) buffer and then remember the
158*22dc650dSSadaf Ebrahimi   start of the next item. */
159*22dc650dSSadaf Ebrahimi 
160*22dc650dSSadaf Ebrahimi   convlength += p - pp;
161*22dc650dSSadaf Ebrahimi   if (dummyrun) p = use_buffer;
162*22dc650dSSadaf Ebrahimi   pp = p;
163*22dc650dSSadaf Ebrahimi 
164*22dc650dSSadaf Ebrahimi   /* Pick up the next character */
165*22dc650dSSadaf Ebrahimi 
166*22dc650dSSadaf Ebrahimi #ifndef SUPPORT_UNICODE
167*22dc650dSSadaf Ebrahimi   c = *posix;
168*22dc650dSSadaf Ebrahimi #else
169*22dc650dSSadaf Ebrahimi   GETCHARLENTEST(c, posix, clength);
170*22dc650dSSadaf Ebrahimi #endif
171*22dc650dSSadaf Ebrahimi   posix += clength;
172*22dc650dSSadaf Ebrahimi   plength -= clength;
173*22dc650dSSadaf Ebrahimi 
174*22dc650dSSadaf Ebrahimi   sc = nextisliteral? 0 : c;
175*22dc650dSSadaf Ebrahimi   nextisliteral = FALSE;
176*22dc650dSSadaf Ebrahimi 
177*22dc650dSSadaf Ebrahimi   /* Handle a character within a class. */
178*22dc650dSSadaf Ebrahimi 
179*22dc650dSSadaf Ebrahimi   if (posix_state >= POSIX_CLASS_NOT_STARTED)
180*22dc650dSSadaf Ebrahimi     {
181*22dc650dSSadaf Ebrahimi     if (c == CHAR_RIGHT_SQUARE_BRACKET)
182*22dc650dSSadaf Ebrahimi       {
183*22dc650dSSadaf Ebrahimi       PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
184*22dc650dSSadaf Ebrahimi       posix_state = POSIX_NOT_BRACKET;
185*22dc650dSSadaf Ebrahimi       }
186*22dc650dSSadaf Ebrahimi 
187*22dc650dSSadaf Ebrahimi     /* Not the end of the class */
188*22dc650dSSadaf Ebrahimi 
189*22dc650dSSadaf Ebrahimi     else
190*22dc650dSSadaf Ebrahimi       {
191*22dc650dSSadaf Ebrahimi       switch (posix_state)
192*22dc650dSSadaf Ebrahimi         {
193*22dc650dSSadaf Ebrahimi         case POSIX_CLASS_STARTED:
194*22dc650dSSadaf Ebrahimi         if (c <= 127 && islower(c)) break;  /* Remain in started state */
195*22dc650dSSadaf Ebrahimi         posix_state = POSIX_CLASS_NOT_STARTED;
196*22dc650dSSadaf Ebrahimi         if (c == CHAR_COLON  && plength > 0 &&
197*22dc650dSSadaf Ebrahimi             *posix == CHAR_RIGHT_SQUARE_BRACKET)
198*22dc650dSSadaf Ebrahimi           {
199*22dc650dSSadaf Ebrahimi           PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET);
200*22dc650dSSadaf Ebrahimi           plength--;
201*22dc650dSSadaf Ebrahimi           posix++;
202*22dc650dSSadaf Ebrahimi           continue;    /* With next character after :] */
203*22dc650dSSadaf Ebrahimi           }
204*22dc650dSSadaf Ebrahimi         /* Fall through */
205*22dc650dSSadaf Ebrahimi 
206*22dc650dSSadaf Ebrahimi         case POSIX_CLASS_NOT_STARTED:
207*22dc650dSSadaf Ebrahimi         if (c == CHAR_LEFT_SQUARE_BRACKET)
208*22dc650dSSadaf Ebrahimi           posix_state = POSIX_CLASS_STARTING;
209*22dc650dSSadaf Ebrahimi         break;
210*22dc650dSSadaf Ebrahimi 
211*22dc650dSSadaf Ebrahimi         case POSIX_CLASS_STARTING:
212*22dc650dSSadaf Ebrahimi         if (c == CHAR_COLON) posix_state = POSIX_CLASS_STARTED;
213*22dc650dSSadaf Ebrahimi         break;
214*22dc650dSSadaf Ebrahimi         }
215*22dc650dSSadaf Ebrahimi 
216*22dc650dSSadaf Ebrahimi       if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH);
217*22dc650dSSadaf Ebrahimi       if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
218*22dc650dSSadaf Ebrahimi       memcpy(p, posix - clength, CU2BYTES(clength));
219*22dc650dSSadaf Ebrahimi       p += clength;
220*22dc650dSSadaf Ebrahimi       }
221*22dc650dSSadaf Ebrahimi     }
222*22dc650dSSadaf Ebrahimi 
223*22dc650dSSadaf Ebrahimi   /* Handle a character not within a class. */
224*22dc650dSSadaf Ebrahimi 
225*22dc650dSSadaf Ebrahimi   else switch(sc)
226*22dc650dSSadaf Ebrahimi     {
227*22dc650dSSadaf Ebrahimi     case CHAR_LEFT_SQUARE_BRACKET:
228*22dc650dSSadaf Ebrahimi     PUTCHARS(STR_LEFT_SQUARE_BRACKET);
229*22dc650dSSadaf Ebrahimi 
230*22dc650dSSadaf Ebrahimi #ifdef NEVER
231*22dc650dSSadaf Ebrahimi     /* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does
232*22dc650dSSadaf Ebrahimi     support) but they are not part of POSIX 1003.1. */
233*22dc650dSSadaf Ebrahimi 
234*22dc650dSSadaf Ebrahimi     if (plength >= 6)
235*22dc650dSSadaf Ebrahimi       {
236*22dc650dSSadaf Ebrahimi       if (posix[0] == CHAR_LEFT_SQUARE_BRACKET &&
237*22dc650dSSadaf Ebrahimi           posix[1] == CHAR_COLON &&
238*22dc650dSSadaf Ebrahimi           (posix[2] == CHAR_LESS_THAN_SIGN ||
239*22dc650dSSadaf Ebrahimi            posix[2] == CHAR_GREATER_THAN_SIGN) &&
240*22dc650dSSadaf Ebrahimi           posix[3] == CHAR_COLON &&
241*22dc650dSSadaf Ebrahimi           posix[4] == CHAR_RIGHT_SQUARE_BRACKET &&
242*22dc650dSSadaf Ebrahimi           posix[5] == CHAR_RIGHT_SQUARE_BRACKET)
243*22dc650dSSadaf Ebrahimi         {
244*22dc650dSSadaf Ebrahimi         if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY;
245*22dc650dSSadaf Ebrahimi         memcpy(p, posix, CU2BYTES(6));
246*22dc650dSSadaf Ebrahimi         p += 6;
247*22dc650dSSadaf Ebrahimi         posix += 6;
248*22dc650dSSadaf Ebrahimi         plength -= 6;
249*22dc650dSSadaf Ebrahimi         continue;  /* With next character */
250*22dc650dSSadaf Ebrahimi         }
251*22dc650dSSadaf Ebrahimi       }
252*22dc650dSSadaf Ebrahimi #endif
253*22dc650dSSadaf Ebrahimi 
254*22dc650dSSadaf Ebrahimi     /* Handle start of "normal" character classes */
255*22dc650dSSadaf Ebrahimi 
256*22dc650dSSadaf Ebrahimi     posix_state = POSIX_CLASS_NOT_STARTED;
257*22dc650dSSadaf Ebrahimi 
258*22dc650dSSadaf Ebrahimi     /* Handle ^ and ] as first characters */
259*22dc650dSSadaf Ebrahimi 
260*22dc650dSSadaf Ebrahimi     if (plength > 0)
261*22dc650dSSadaf Ebrahimi       {
262*22dc650dSSadaf Ebrahimi       if (*posix == CHAR_CIRCUMFLEX_ACCENT)
263*22dc650dSSadaf Ebrahimi         {
264*22dc650dSSadaf Ebrahimi         posix++;
265*22dc650dSSadaf Ebrahimi         plength--;
266*22dc650dSSadaf Ebrahimi         PUTCHARS(STR_CIRCUMFLEX_ACCENT);
267*22dc650dSSadaf Ebrahimi         }
268*22dc650dSSadaf Ebrahimi       if (plength > 0 && *posix == CHAR_RIGHT_SQUARE_BRACKET)
269*22dc650dSSadaf Ebrahimi         {
270*22dc650dSSadaf Ebrahimi         posix++;
271*22dc650dSSadaf Ebrahimi         plength--;
272*22dc650dSSadaf Ebrahimi         PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
273*22dc650dSSadaf Ebrahimi         }
274*22dc650dSSadaf Ebrahimi       }
275*22dc650dSSadaf Ebrahimi     break;
276*22dc650dSSadaf Ebrahimi 
277*22dc650dSSadaf Ebrahimi     case CHAR_BACKSLASH:
278*22dc650dSSadaf Ebrahimi     if (plength == 0) return PCRE2_ERROR_END_BACKSLASH;
279*22dc650dSSadaf Ebrahimi     if (extended) nextisliteral = TRUE; else
280*22dc650dSSadaf Ebrahimi       {
281*22dc650dSSadaf Ebrahimi       if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL)
282*22dc650dSSadaf Ebrahimi         {
283*22dc650dSSadaf Ebrahimi         if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
284*22dc650dSSadaf Ebrahimi         if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
285*22dc650dSSadaf Ebrahimi         lastspecial = *p++ = *posix++;
286*22dc650dSSadaf Ebrahimi         plength--;
287*22dc650dSSadaf Ebrahimi         }
288*22dc650dSSadaf Ebrahimi       else nextisliteral = TRUE;
289*22dc650dSSadaf Ebrahimi       }
290*22dc650dSSadaf Ebrahimi     break;
291*22dc650dSSadaf Ebrahimi 
292*22dc650dSSadaf Ebrahimi     case CHAR_RIGHT_PARENTHESIS:
293*22dc650dSSadaf Ebrahimi     if (!extended || bracount == 0) goto ESCAPE_LITERAL;
294*22dc650dSSadaf Ebrahimi     bracount--;
295*22dc650dSSadaf Ebrahimi     goto COPY_SPECIAL;
296*22dc650dSSadaf Ebrahimi 
297*22dc650dSSadaf Ebrahimi     case CHAR_LEFT_PARENTHESIS:
298*22dc650dSSadaf Ebrahimi     bracount++;
299*22dc650dSSadaf Ebrahimi     /* Fall through */
300*22dc650dSSadaf Ebrahimi 
301*22dc650dSSadaf Ebrahimi     case CHAR_QUESTION_MARK:
302*22dc650dSSadaf Ebrahimi     case CHAR_PLUS:
303*22dc650dSSadaf Ebrahimi     case CHAR_LEFT_CURLY_BRACKET:
304*22dc650dSSadaf Ebrahimi     case CHAR_RIGHT_CURLY_BRACKET:
305*22dc650dSSadaf Ebrahimi     case CHAR_VERTICAL_LINE:
306*22dc650dSSadaf Ebrahimi     if (!extended) goto ESCAPE_LITERAL;
307*22dc650dSSadaf Ebrahimi     /* Fall through */
308*22dc650dSSadaf Ebrahimi 
309*22dc650dSSadaf Ebrahimi     case CHAR_DOT:
310*22dc650dSSadaf Ebrahimi     case CHAR_DOLLAR_SIGN:
311*22dc650dSSadaf Ebrahimi     posix_state = POSIX_NOT_BRACKET;
312*22dc650dSSadaf Ebrahimi     COPY_SPECIAL:
313*22dc650dSSadaf Ebrahimi     lastspecial = c;
314*22dc650dSSadaf Ebrahimi     if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
315*22dc650dSSadaf Ebrahimi     *p++ = c;
316*22dc650dSSadaf Ebrahimi     break;
317*22dc650dSSadaf Ebrahimi 
318*22dc650dSSadaf Ebrahimi     case CHAR_ASTERISK:
319*22dc650dSSadaf Ebrahimi     if (lastspecial != CHAR_ASTERISK)
320*22dc650dSSadaf Ebrahimi       {
321*22dc650dSSadaf Ebrahimi       if (!extended && (posix_state < POSIX_NOT_BRACKET ||
322*22dc650dSSadaf Ebrahimi           lastspecial == CHAR_LEFT_PARENTHESIS))
323*22dc650dSSadaf Ebrahimi         goto ESCAPE_LITERAL;
324*22dc650dSSadaf Ebrahimi       goto COPY_SPECIAL;
325*22dc650dSSadaf Ebrahimi       }
326*22dc650dSSadaf Ebrahimi     break;   /* Ignore second and subsequent asterisks */
327*22dc650dSSadaf Ebrahimi 
328*22dc650dSSadaf Ebrahimi     case CHAR_CIRCUMFLEX_ACCENT:
329*22dc650dSSadaf Ebrahimi     if (extended) goto COPY_SPECIAL;
330*22dc650dSSadaf Ebrahimi     if (posix_state == POSIX_START_REGEX ||
331*22dc650dSSadaf Ebrahimi         lastspecial == CHAR_LEFT_PARENTHESIS)
332*22dc650dSSadaf Ebrahimi       {
333*22dc650dSSadaf Ebrahimi       posix_state = POSIX_ANCHORED;
334*22dc650dSSadaf Ebrahimi       goto COPY_SPECIAL;
335*22dc650dSSadaf Ebrahimi       }
336*22dc650dSSadaf Ebrahimi     /* Fall through */
337*22dc650dSSadaf Ebrahimi 
338*22dc650dSSadaf Ebrahimi     default:
339*22dc650dSSadaf Ebrahimi     if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
340*22dc650dSSadaf Ebrahimi       {
341*22dc650dSSadaf Ebrahimi       ESCAPE_LITERAL:
342*22dc650dSSadaf Ebrahimi       PUTCHARS(STR_BACKSLASH);
343*22dc650dSSadaf Ebrahimi       }
344*22dc650dSSadaf Ebrahimi     lastspecial = 0xff;  /* Indicates nothing special */
345*22dc650dSSadaf Ebrahimi     if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
346*22dc650dSSadaf Ebrahimi     memcpy(p, posix - clength, CU2BYTES(clength));
347*22dc650dSSadaf Ebrahimi     p += clength;
348*22dc650dSSadaf Ebrahimi     posix_state = POSIX_NOT_BRACKET;
349*22dc650dSSadaf Ebrahimi     break;
350*22dc650dSSadaf Ebrahimi     }
351*22dc650dSSadaf Ebrahimi   }
352*22dc650dSSadaf Ebrahimi 
353*22dc650dSSadaf Ebrahimi if (posix_state >= POSIX_CLASS_NOT_STARTED)
354*22dc650dSSadaf Ebrahimi   return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
355*22dc650dSSadaf Ebrahimi convlength += p - pp;        /* Final segment */
356*22dc650dSSadaf Ebrahimi *bufflenptr = convlength;
357*22dc650dSSadaf Ebrahimi *p++ = 0;
358*22dc650dSSadaf Ebrahimi return 0;
359*22dc650dSSadaf Ebrahimi }
360*22dc650dSSadaf Ebrahimi 
361*22dc650dSSadaf Ebrahimi 
362*22dc650dSSadaf Ebrahimi /*************************************************
363*22dc650dSSadaf Ebrahimi *           Convert a glob pattern               *
364*22dc650dSSadaf Ebrahimi *************************************************/
365*22dc650dSSadaf Ebrahimi 
366*22dc650dSSadaf Ebrahimi /* Context for writing the output into a buffer. */
367*22dc650dSSadaf Ebrahimi 
368*22dc650dSSadaf Ebrahimi typedef struct pcre2_output_context {
369*22dc650dSSadaf Ebrahimi   PCRE2_UCHAR *output;                  /* current output position */
370*22dc650dSSadaf Ebrahimi   PCRE2_SPTR output_end;                /* output end */
371*22dc650dSSadaf Ebrahimi   PCRE2_SIZE output_size;               /* size of the output */
372*22dc650dSSadaf Ebrahimi   uint8_t out_str[8];                   /* string copied to the output */
373*22dc650dSSadaf Ebrahimi } pcre2_output_context;
374*22dc650dSSadaf Ebrahimi 
375*22dc650dSSadaf Ebrahimi 
376*22dc650dSSadaf Ebrahimi /* Write a character into the output.
377*22dc650dSSadaf Ebrahimi 
378*22dc650dSSadaf Ebrahimi Arguments:
379*22dc650dSSadaf Ebrahimi   out            output context
380*22dc650dSSadaf Ebrahimi   chr            the next character
381*22dc650dSSadaf Ebrahimi */
382*22dc650dSSadaf Ebrahimi 
383*22dc650dSSadaf Ebrahimi static void
convert_glob_write(pcre2_output_context * out,PCRE2_UCHAR chr)384*22dc650dSSadaf Ebrahimi convert_glob_write(pcre2_output_context *out, PCRE2_UCHAR chr)
385*22dc650dSSadaf Ebrahimi {
386*22dc650dSSadaf Ebrahimi out->output_size++;
387*22dc650dSSadaf Ebrahimi 
388*22dc650dSSadaf Ebrahimi if (out->output < out->output_end)
389*22dc650dSSadaf Ebrahimi   *out->output++ = chr;
390*22dc650dSSadaf Ebrahimi }
391*22dc650dSSadaf Ebrahimi 
392*22dc650dSSadaf Ebrahimi 
393*22dc650dSSadaf Ebrahimi /* Write a string into the output.
394*22dc650dSSadaf Ebrahimi 
395*22dc650dSSadaf Ebrahimi Arguments:
396*22dc650dSSadaf Ebrahimi   out            output context
397*22dc650dSSadaf Ebrahimi   length         length of out->out_str
398*22dc650dSSadaf Ebrahimi */
399*22dc650dSSadaf Ebrahimi 
400*22dc650dSSadaf Ebrahimi static void
convert_glob_write_str(pcre2_output_context * out,PCRE2_SIZE length)401*22dc650dSSadaf Ebrahimi convert_glob_write_str(pcre2_output_context *out, PCRE2_SIZE length)
402*22dc650dSSadaf Ebrahimi {
403*22dc650dSSadaf Ebrahimi uint8_t *out_str = out->out_str;
404*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *output = out->output;
405*22dc650dSSadaf Ebrahimi PCRE2_SPTR output_end = out->output_end;
406*22dc650dSSadaf Ebrahimi PCRE2_SIZE output_size = out->output_size;
407*22dc650dSSadaf Ebrahimi 
408*22dc650dSSadaf Ebrahimi do
409*22dc650dSSadaf Ebrahimi   {
410*22dc650dSSadaf Ebrahimi   output_size++;
411*22dc650dSSadaf Ebrahimi 
412*22dc650dSSadaf Ebrahimi   if (output < output_end)
413*22dc650dSSadaf Ebrahimi     *output++ = *out_str++;
414*22dc650dSSadaf Ebrahimi   }
415*22dc650dSSadaf Ebrahimi while (--length != 0);
416*22dc650dSSadaf Ebrahimi 
417*22dc650dSSadaf Ebrahimi out->output = output;
418*22dc650dSSadaf Ebrahimi out->output_size = output_size;
419*22dc650dSSadaf Ebrahimi }
420*22dc650dSSadaf Ebrahimi 
421*22dc650dSSadaf Ebrahimi 
422*22dc650dSSadaf Ebrahimi /* Prints the separator into the output.
423*22dc650dSSadaf Ebrahimi 
424*22dc650dSSadaf Ebrahimi Arguments:
425*22dc650dSSadaf Ebrahimi   out            output context
426*22dc650dSSadaf Ebrahimi   separator      glob separator
427*22dc650dSSadaf Ebrahimi   with_escape    backslash is needed before separator
428*22dc650dSSadaf Ebrahimi */
429*22dc650dSSadaf Ebrahimi 
430*22dc650dSSadaf Ebrahimi static void
convert_glob_print_separator(pcre2_output_context * out,PCRE2_UCHAR separator,BOOL with_escape)431*22dc650dSSadaf Ebrahimi convert_glob_print_separator(pcre2_output_context *out,
432*22dc650dSSadaf Ebrahimi   PCRE2_UCHAR separator, BOOL with_escape)
433*22dc650dSSadaf Ebrahimi {
434*22dc650dSSadaf Ebrahimi if (with_escape)
435*22dc650dSSadaf Ebrahimi   convert_glob_write(out, CHAR_BACKSLASH);
436*22dc650dSSadaf Ebrahimi 
437*22dc650dSSadaf Ebrahimi convert_glob_write(out, separator);
438*22dc650dSSadaf Ebrahimi }
439*22dc650dSSadaf Ebrahimi 
440*22dc650dSSadaf Ebrahimi 
441*22dc650dSSadaf Ebrahimi /* Prints a wildcard into the output.
442*22dc650dSSadaf Ebrahimi 
443*22dc650dSSadaf Ebrahimi Arguments:
444*22dc650dSSadaf Ebrahimi   out            output context
445*22dc650dSSadaf Ebrahimi   separator      glob separator
446*22dc650dSSadaf Ebrahimi   with_escape    backslash is needed before separator
447*22dc650dSSadaf Ebrahimi */
448*22dc650dSSadaf Ebrahimi 
449*22dc650dSSadaf Ebrahimi static void
convert_glob_print_wildcard(pcre2_output_context * out,PCRE2_UCHAR separator,BOOL with_escape)450*22dc650dSSadaf Ebrahimi convert_glob_print_wildcard(pcre2_output_context *out,
451*22dc650dSSadaf Ebrahimi   PCRE2_UCHAR separator, BOOL with_escape)
452*22dc650dSSadaf Ebrahimi {
453*22dc650dSSadaf Ebrahimi out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
454*22dc650dSSadaf Ebrahimi out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
455*22dc650dSSadaf Ebrahimi convert_glob_write_str(out, 2);
456*22dc650dSSadaf Ebrahimi 
457*22dc650dSSadaf Ebrahimi convert_glob_print_separator(out, separator, with_escape);
458*22dc650dSSadaf Ebrahimi 
459*22dc650dSSadaf Ebrahimi convert_glob_write(out, CHAR_RIGHT_SQUARE_BRACKET);
460*22dc650dSSadaf Ebrahimi }
461*22dc650dSSadaf Ebrahimi 
462*22dc650dSSadaf Ebrahimi 
463*22dc650dSSadaf Ebrahimi /* Parse a posix class.
464*22dc650dSSadaf Ebrahimi 
465*22dc650dSSadaf Ebrahimi Arguments:
466*22dc650dSSadaf Ebrahimi   from           starting point of scanning the range
467*22dc650dSSadaf Ebrahimi   pattern_end    end of pattern
468*22dc650dSSadaf Ebrahimi   out            output context
469*22dc650dSSadaf Ebrahimi 
470*22dc650dSSadaf Ebrahimi Returns:  >0 => class index
471*22dc650dSSadaf Ebrahimi           0  => malformed class
472*22dc650dSSadaf Ebrahimi */
473*22dc650dSSadaf Ebrahimi 
474*22dc650dSSadaf Ebrahimi static int
convert_glob_parse_class(PCRE2_SPTR * from,PCRE2_SPTR pattern_end,pcre2_output_context * out)475*22dc650dSSadaf Ebrahimi convert_glob_parse_class(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
476*22dc650dSSadaf Ebrahimi   pcre2_output_context *out)
477*22dc650dSSadaf Ebrahimi {
478*22dc650dSSadaf Ebrahimi static const char *posix_classes = "alnum:alpha:ascii:blank:cntrl:digit:"
479*22dc650dSSadaf Ebrahimi   "graph:lower:print:punct:space:upper:word:xdigit:";
480*22dc650dSSadaf Ebrahimi PCRE2_SPTR start = *from + 1;
481*22dc650dSSadaf Ebrahimi PCRE2_SPTR pattern = start;
482*22dc650dSSadaf Ebrahimi const char *class_ptr;
483*22dc650dSSadaf Ebrahimi PCRE2_UCHAR c;
484*22dc650dSSadaf Ebrahimi int class_index;
485*22dc650dSSadaf Ebrahimi 
486*22dc650dSSadaf Ebrahimi while (TRUE)
487*22dc650dSSadaf Ebrahimi   {
488*22dc650dSSadaf Ebrahimi   if (pattern >= pattern_end) return 0;
489*22dc650dSSadaf Ebrahimi 
490*22dc650dSSadaf Ebrahimi   c = *pattern++;
491*22dc650dSSadaf Ebrahimi 
492*22dc650dSSadaf Ebrahimi   if (c < CHAR_a || c > CHAR_z) break;
493*22dc650dSSadaf Ebrahimi   }
494*22dc650dSSadaf Ebrahimi 
495*22dc650dSSadaf Ebrahimi if (c != CHAR_COLON || pattern >= pattern_end ||
496*22dc650dSSadaf Ebrahimi     *pattern != CHAR_RIGHT_SQUARE_BRACKET)
497*22dc650dSSadaf Ebrahimi   return 0;
498*22dc650dSSadaf Ebrahimi 
499*22dc650dSSadaf Ebrahimi class_ptr = posix_classes;
500*22dc650dSSadaf Ebrahimi class_index = 1;
501*22dc650dSSadaf Ebrahimi 
502*22dc650dSSadaf Ebrahimi while (TRUE)
503*22dc650dSSadaf Ebrahimi   {
504*22dc650dSSadaf Ebrahimi   if (*class_ptr == CHAR_NUL) return 0;
505*22dc650dSSadaf Ebrahimi 
506*22dc650dSSadaf Ebrahimi   pattern = start;
507*22dc650dSSadaf Ebrahimi 
508*22dc650dSSadaf Ebrahimi   while (*pattern == (PCRE2_UCHAR) *class_ptr)
509*22dc650dSSadaf Ebrahimi     {
510*22dc650dSSadaf Ebrahimi     if (*pattern == CHAR_COLON)
511*22dc650dSSadaf Ebrahimi       {
512*22dc650dSSadaf Ebrahimi       pattern += 2;
513*22dc650dSSadaf Ebrahimi       start -= 2;
514*22dc650dSSadaf Ebrahimi 
515*22dc650dSSadaf Ebrahimi       do convert_glob_write(out, *start++); while (start < pattern);
516*22dc650dSSadaf Ebrahimi 
517*22dc650dSSadaf Ebrahimi       *from = pattern;
518*22dc650dSSadaf Ebrahimi       return class_index;
519*22dc650dSSadaf Ebrahimi       }
520*22dc650dSSadaf Ebrahimi     pattern++;
521*22dc650dSSadaf Ebrahimi     class_ptr++;
522*22dc650dSSadaf Ebrahimi     }
523*22dc650dSSadaf Ebrahimi 
524*22dc650dSSadaf Ebrahimi   while (*class_ptr != CHAR_COLON) class_ptr++;
525*22dc650dSSadaf Ebrahimi   class_ptr++;
526*22dc650dSSadaf Ebrahimi   class_index++;
527*22dc650dSSadaf Ebrahimi   }
528*22dc650dSSadaf Ebrahimi }
529*22dc650dSSadaf Ebrahimi 
530*22dc650dSSadaf Ebrahimi /* Checks whether the character is in the class.
531*22dc650dSSadaf Ebrahimi 
532*22dc650dSSadaf Ebrahimi Arguments:
533*22dc650dSSadaf Ebrahimi   class_index    class index
534*22dc650dSSadaf Ebrahimi   c              character
535*22dc650dSSadaf Ebrahimi 
536*22dc650dSSadaf Ebrahimi Returns:   !0 => character is found in the class
537*22dc650dSSadaf Ebrahimi             0 => otherwise
538*22dc650dSSadaf Ebrahimi */
539*22dc650dSSadaf Ebrahimi 
540*22dc650dSSadaf Ebrahimi static BOOL
convert_glob_char_in_class(int class_index,PCRE2_UCHAR c)541*22dc650dSSadaf Ebrahimi convert_glob_char_in_class(int class_index, PCRE2_UCHAR c)
542*22dc650dSSadaf Ebrahimi {
543*22dc650dSSadaf Ebrahimi #if PCRE2_CODE_UNIT_WIDTH != 8
544*22dc650dSSadaf Ebrahimi if (c > 0xff)
545*22dc650dSSadaf Ebrahimi   {
546*22dc650dSSadaf Ebrahimi   /* ctype functions are not sane for c > 0xff */
547*22dc650dSSadaf Ebrahimi   return 0;
548*22dc650dSSadaf Ebrahimi   }
549*22dc650dSSadaf Ebrahimi #endif
550*22dc650dSSadaf Ebrahimi 
551*22dc650dSSadaf Ebrahimi switch (class_index)
552*22dc650dSSadaf Ebrahimi   {
553*22dc650dSSadaf Ebrahimi   case 1: return isalnum(c);
554*22dc650dSSadaf Ebrahimi   case 2: return isalpha(c);
555*22dc650dSSadaf Ebrahimi   case 3: return 1;
556*22dc650dSSadaf Ebrahimi   case 4: return c == CHAR_HT || c == CHAR_SPACE;
557*22dc650dSSadaf Ebrahimi   case 5: return iscntrl(c);
558*22dc650dSSadaf Ebrahimi   case 6: return isdigit(c);
559*22dc650dSSadaf Ebrahimi   case 7: return isgraph(c);
560*22dc650dSSadaf Ebrahimi   case 8: return islower(c);
561*22dc650dSSadaf Ebrahimi   case 9: return isprint(c);
562*22dc650dSSadaf Ebrahimi   case 10: return ispunct(c);
563*22dc650dSSadaf Ebrahimi   case 11: return isspace(c);
564*22dc650dSSadaf Ebrahimi   case 12: return isupper(c);
565*22dc650dSSadaf Ebrahimi   case 13: return isalnum(c) || c == CHAR_UNDERSCORE;
566*22dc650dSSadaf Ebrahimi   default: return isxdigit(c);
567*22dc650dSSadaf Ebrahimi   }
568*22dc650dSSadaf Ebrahimi }
569*22dc650dSSadaf Ebrahimi 
570*22dc650dSSadaf Ebrahimi /* Parse a range of characters.
571*22dc650dSSadaf Ebrahimi 
572*22dc650dSSadaf Ebrahimi Arguments:
573*22dc650dSSadaf Ebrahimi   from           starting point of scanning the range
574*22dc650dSSadaf Ebrahimi   pattern_end    end of pattern
575*22dc650dSSadaf Ebrahimi   out            output context
576*22dc650dSSadaf Ebrahimi   separator      glob separator
577*22dc650dSSadaf Ebrahimi   with_escape    backslash is needed before separator
578*22dc650dSSadaf Ebrahimi 
579*22dc650dSSadaf Ebrahimi Returns:         0 => success
580*22dc650dSSadaf Ebrahimi                 !0 => error code
581*22dc650dSSadaf Ebrahimi */
582*22dc650dSSadaf Ebrahimi 
583*22dc650dSSadaf Ebrahimi static int
convert_glob_parse_range(PCRE2_SPTR * from,PCRE2_SPTR pattern_end,pcre2_output_context * out,BOOL utf,PCRE2_UCHAR separator,BOOL with_escape,PCRE2_UCHAR escape,BOOL no_wildsep)584*22dc650dSSadaf Ebrahimi convert_glob_parse_range(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
585*22dc650dSSadaf Ebrahimi   pcre2_output_context *out, BOOL utf, PCRE2_UCHAR separator,
586*22dc650dSSadaf Ebrahimi   BOOL with_escape, PCRE2_UCHAR escape, BOOL no_wildsep)
587*22dc650dSSadaf Ebrahimi {
588*22dc650dSSadaf Ebrahimi BOOL is_negative = FALSE;
589*22dc650dSSadaf Ebrahimi BOOL separator_seen = FALSE;
590*22dc650dSSadaf Ebrahimi BOOL has_prev_c;
591*22dc650dSSadaf Ebrahimi PCRE2_SPTR pattern = *from;
592*22dc650dSSadaf Ebrahimi PCRE2_SPTR char_start = NULL;
593*22dc650dSSadaf Ebrahimi uint32_t c, prev_c;
594*22dc650dSSadaf Ebrahimi int len, class_index;
595*22dc650dSSadaf Ebrahimi 
596*22dc650dSSadaf Ebrahimi (void)utf; /* Avoid compiler warning. */
597*22dc650dSSadaf Ebrahimi 
598*22dc650dSSadaf Ebrahimi if (pattern >= pattern_end)
599*22dc650dSSadaf Ebrahimi   {
600*22dc650dSSadaf Ebrahimi   *from = pattern;
601*22dc650dSSadaf Ebrahimi   return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
602*22dc650dSSadaf Ebrahimi   }
603*22dc650dSSadaf Ebrahimi 
604*22dc650dSSadaf Ebrahimi if (*pattern == CHAR_EXCLAMATION_MARK
605*22dc650dSSadaf Ebrahimi     || *pattern == CHAR_CIRCUMFLEX_ACCENT)
606*22dc650dSSadaf Ebrahimi   {
607*22dc650dSSadaf Ebrahimi   pattern++;
608*22dc650dSSadaf Ebrahimi 
609*22dc650dSSadaf Ebrahimi   if (pattern >= pattern_end)
610*22dc650dSSadaf Ebrahimi     {
611*22dc650dSSadaf Ebrahimi     *from = pattern;
612*22dc650dSSadaf Ebrahimi     return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
613*22dc650dSSadaf Ebrahimi     }
614*22dc650dSSadaf Ebrahimi 
615*22dc650dSSadaf Ebrahimi   is_negative = TRUE;
616*22dc650dSSadaf Ebrahimi 
617*22dc650dSSadaf Ebrahimi   out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
618*22dc650dSSadaf Ebrahimi   out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
619*22dc650dSSadaf Ebrahimi   len = 2;
620*22dc650dSSadaf Ebrahimi 
621*22dc650dSSadaf Ebrahimi   if (!no_wildsep)
622*22dc650dSSadaf Ebrahimi     {
623*22dc650dSSadaf Ebrahimi     if (with_escape)
624*22dc650dSSadaf Ebrahimi       {
625*22dc650dSSadaf Ebrahimi       out->out_str[len] = CHAR_BACKSLASH;
626*22dc650dSSadaf Ebrahimi       len++;
627*22dc650dSSadaf Ebrahimi       }
628*22dc650dSSadaf Ebrahimi     out->out_str[len] = (uint8_t) separator;
629*22dc650dSSadaf Ebrahimi     }
630*22dc650dSSadaf Ebrahimi 
631*22dc650dSSadaf Ebrahimi   convert_glob_write_str(out, len + 1);
632*22dc650dSSadaf Ebrahimi   }
633*22dc650dSSadaf Ebrahimi else
634*22dc650dSSadaf Ebrahimi   convert_glob_write(out, CHAR_LEFT_SQUARE_BRACKET);
635*22dc650dSSadaf Ebrahimi 
636*22dc650dSSadaf Ebrahimi has_prev_c = FALSE;
637*22dc650dSSadaf Ebrahimi prev_c = 0;
638*22dc650dSSadaf Ebrahimi 
639*22dc650dSSadaf Ebrahimi if (*pattern == CHAR_RIGHT_SQUARE_BRACKET)
640*22dc650dSSadaf Ebrahimi   {
641*22dc650dSSadaf Ebrahimi   out->out_str[0] = CHAR_BACKSLASH;
642*22dc650dSSadaf Ebrahimi   out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET;
643*22dc650dSSadaf Ebrahimi   convert_glob_write_str(out, 2);
644*22dc650dSSadaf Ebrahimi   has_prev_c = TRUE;
645*22dc650dSSadaf Ebrahimi   prev_c = CHAR_RIGHT_SQUARE_BRACKET;
646*22dc650dSSadaf Ebrahimi   pattern++;
647*22dc650dSSadaf Ebrahimi   }
648*22dc650dSSadaf Ebrahimi 
649*22dc650dSSadaf Ebrahimi while (pattern < pattern_end)
650*22dc650dSSadaf Ebrahimi   {
651*22dc650dSSadaf Ebrahimi   char_start = pattern;
652*22dc650dSSadaf Ebrahimi   GETCHARINCTEST(c, pattern);
653*22dc650dSSadaf Ebrahimi 
654*22dc650dSSadaf Ebrahimi   if (c == CHAR_RIGHT_SQUARE_BRACKET)
655*22dc650dSSadaf Ebrahimi     {
656*22dc650dSSadaf Ebrahimi     convert_glob_write(out, c);
657*22dc650dSSadaf Ebrahimi 
658*22dc650dSSadaf Ebrahimi     if (!is_negative && !no_wildsep && separator_seen)
659*22dc650dSSadaf Ebrahimi       {
660*22dc650dSSadaf Ebrahimi       out->out_str[0] = CHAR_LEFT_PARENTHESIS;
661*22dc650dSSadaf Ebrahimi       out->out_str[1] = CHAR_QUESTION_MARK;
662*22dc650dSSadaf Ebrahimi       out->out_str[2] = CHAR_LESS_THAN_SIGN;
663*22dc650dSSadaf Ebrahimi       out->out_str[3] = CHAR_EXCLAMATION_MARK;
664*22dc650dSSadaf Ebrahimi       convert_glob_write_str(out, 4);
665*22dc650dSSadaf Ebrahimi 
666*22dc650dSSadaf Ebrahimi       convert_glob_print_separator(out, separator, with_escape);
667*22dc650dSSadaf Ebrahimi       convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
668*22dc650dSSadaf Ebrahimi       }
669*22dc650dSSadaf Ebrahimi 
670*22dc650dSSadaf Ebrahimi     *from = pattern;
671*22dc650dSSadaf Ebrahimi     return 0;
672*22dc650dSSadaf Ebrahimi     }
673*22dc650dSSadaf Ebrahimi 
674*22dc650dSSadaf Ebrahimi   if (pattern >= pattern_end) break;
675*22dc650dSSadaf Ebrahimi 
676*22dc650dSSadaf Ebrahimi   if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
677*22dc650dSSadaf Ebrahimi     {
678*22dc650dSSadaf Ebrahimi     *from = pattern;
679*22dc650dSSadaf Ebrahimi     class_index = convert_glob_parse_class(from, pattern_end, out);
680*22dc650dSSadaf Ebrahimi 
681*22dc650dSSadaf Ebrahimi     if (class_index != 0)
682*22dc650dSSadaf Ebrahimi       {
683*22dc650dSSadaf Ebrahimi       pattern = *from;
684*22dc650dSSadaf Ebrahimi 
685*22dc650dSSadaf Ebrahimi       has_prev_c = FALSE;
686*22dc650dSSadaf Ebrahimi       prev_c = 0;
687*22dc650dSSadaf Ebrahimi 
688*22dc650dSSadaf Ebrahimi       if (!is_negative &&
689*22dc650dSSadaf Ebrahimi           convert_glob_char_in_class (class_index, separator))
690*22dc650dSSadaf Ebrahimi         separator_seen = TRUE;
691*22dc650dSSadaf Ebrahimi       continue;
692*22dc650dSSadaf Ebrahimi       }
693*22dc650dSSadaf Ebrahimi     }
694*22dc650dSSadaf Ebrahimi   else if (c == CHAR_MINUS && has_prev_c &&
695*22dc650dSSadaf Ebrahimi            *pattern != CHAR_RIGHT_SQUARE_BRACKET)
696*22dc650dSSadaf Ebrahimi     {
697*22dc650dSSadaf Ebrahimi     convert_glob_write(out, CHAR_MINUS);
698*22dc650dSSadaf Ebrahimi 
699*22dc650dSSadaf Ebrahimi     char_start = pattern;
700*22dc650dSSadaf Ebrahimi     GETCHARINCTEST(c, pattern);
701*22dc650dSSadaf Ebrahimi 
702*22dc650dSSadaf Ebrahimi     if (pattern >= pattern_end) break;
703*22dc650dSSadaf Ebrahimi 
704*22dc650dSSadaf Ebrahimi     if (escape != 0 && c == escape)
705*22dc650dSSadaf Ebrahimi       {
706*22dc650dSSadaf Ebrahimi       char_start = pattern;
707*22dc650dSSadaf Ebrahimi       GETCHARINCTEST(c, pattern);
708*22dc650dSSadaf Ebrahimi       }
709*22dc650dSSadaf Ebrahimi     else if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
710*22dc650dSSadaf Ebrahimi       {
711*22dc650dSSadaf Ebrahimi       *from = pattern;
712*22dc650dSSadaf Ebrahimi       return PCRE2_ERROR_CONVERT_SYNTAX;
713*22dc650dSSadaf Ebrahimi       }
714*22dc650dSSadaf Ebrahimi 
715*22dc650dSSadaf Ebrahimi     if (prev_c > c)
716*22dc650dSSadaf Ebrahimi       {
717*22dc650dSSadaf Ebrahimi       *from = pattern;
718*22dc650dSSadaf Ebrahimi       return PCRE2_ERROR_CONVERT_SYNTAX;
719*22dc650dSSadaf Ebrahimi       }
720*22dc650dSSadaf Ebrahimi 
721*22dc650dSSadaf Ebrahimi     if (prev_c < separator && separator < c) separator_seen = TRUE;
722*22dc650dSSadaf Ebrahimi 
723*22dc650dSSadaf Ebrahimi     has_prev_c = FALSE;
724*22dc650dSSadaf Ebrahimi     prev_c = 0;
725*22dc650dSSadaf Ebrahimi     }
726*22dc650dSSadaf Ebrahimi   else
727*22dc650dSSadaf Ebrahimi     {
728*22dc650dSSadaf Ebrahimi     if (escape != 0 && c == escape)
729*22dc650dSSadaf Ebrahimi       {
730*22dc650dSSadaf Ebrahimi       char_start = pattern;
731*22dc650dSSadaf Ebrahimi       GETCHARINCTEST(c, pattern);
732*22dc650dSSadaf Ebrahimi 
733*22dc650dSSadaf Ebrahimi       if (pattern >= pattern_end) break;
734*22dc650dSSadaf Ebrahimi       }
735*22dc650dSSadaf Ebrahimi 
736*22dc650dSSadaf Ebrahimi     has_prev_c = TRUE;
737*22dc650dSSadaf Ebrahimi     prev_c = c;
738*22dc650dSSadaf Ebrahimi     }
739*22dc650dSSadaf Ebrahimi 
740*22dc650dSSadaf Ebrahimi   if (c == CHAR_LEFT_SQUARE_BRACKET || c == CHAR_RIGHT_SQUARE_BRACKET ||
741*22dc650dSSadaf Ebrahimi       c == CHAR_BACKSLASH || c == CHAR_MINUS)
742*22dc650dSSadaf Ebrahimi     convert_glob_write(out, CHAR_BACKSLASH);
743*22dc650dSSadaf Ebrahimi 
744*22dc650dSSadaf Ebrahimi   if (c == separator) separator_seen = TRUE;
745*22dc650dSSadaf Ebrahimi 
746*22dc650dSSadaf Ebrahimi   do convert_glob_write(out, *char_start++); while (char_start < pattern);
747*22dc650dSSadaf Ebrahimi   }
748*22dc650dSSadaf Ebrahimi 
749*22dc650dSSadaf Ebrahimi *from = pattern;
750*22dc650dSSadaf Ebrahimi return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
751*22dc650dSSadaf Ebrahimi }
752*22dc650dSSadaf Ebrahimi 
753*22dc650dSSadaf Ebrahimi 
754*22dc650dSSadaf Ebrahimi /* Prints a (*COMMIT) into the output.
755*22dc650dSSadaf Ebrahimi 
756*22dc650dSSadaf Ebrahimi Arguments:
757*22dc650dSSadaf Ebrahimi   out            output context
758*22dc650dSSadaf Ebrahimi */
759*22dc650dSSadaf Ebrahimi 
760*22dc650dSSadaf Ebrahimi static void
convert_glob_print_commit(pcre2_output_context * out)761*22dc650dSSadaf Ebrahimi convert_glob_print_commit(pcre2_output_context *out)
762*22dc650dSSadaf Ebrahimi {
763*22dc650dSSadaf Ebrahimi out->out_str[0] = CHAR_LEFT_PARENTHESIS;
764*22dc650dSSadaf Ebrahimi out->out_str[1] = CHAR_ASTERISK;
765*22dc650dSSadaf Ebrahimi out->out_str[2] = CHAR_C;
766*22dc650dSSadaf Ebrahimi out->out_str[3] = CHAR_O;
767*22dc650dSSadaf Ebrahimi out->out_str[4] = CHAR_M;
768*22dc650dSSadaf Ebrahimi out->out_str[5] = CHAR_M;
769*22dc650dSSadaf Ebrahimi out->out_str[6] = CHAR_I;
770*22dc650dSSadaf Ebrahimi out->out_str[7] = CHAR_T;
771*22dc650dSSadaf Ebrahimi convert_glob_write_str(out, 8);
772*22dc650dSSadaf Ebrahimi convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
773*22dc650dSSadaf Ebrahimi }
774*22dc650dSSadaf Ebrahimi 
775*22dc650dSSadaf Ebrahimi 
776*22dc650dSSadaf Ebrahimi /* Bash glob converter.
777*22dc650dSSadaf Ebrahimi 
778*22dc650dSSadaf Ebrahimi Arguments:
779*22dc650dSSadaf Ebrahimi   pattype        the pattern type
780*22dc650dSSadaf Ebrahimi   pattern        the pattern
781*22dc650dSSadaf Ebrahimi   plength        length in code units
782*22dc650dSSadaf Ebrahimi   utf            TRUE if UTF
783*22dc650dSSadaf Ebrahimi   use_buffer     where to put the output
784*22dc650dSSadaf Ebrahimi   use_length     length of use_buffer
785*22dc650dSSadaf Ebrahimi   bufflenptr     where to put the used length
786*22dc650dSSadaf Ebrahimi   dummyrun       TRUE if a dummy run
787*22dc650dSSadaf Ebrahimi   ccontext       the convert context
788*22dc650dSSadaf Ebrahimi 
789*22dc650dSSadaf Ebrahimi Returns:         0 => success
790*22dc650dSSadaf Ebrahimi                 !0 => error code
791*22dc650dSSadaf Ebrahimi */
792*22dc650dSSadaf Ebrahimi 
793*22dc650dSSadaf Ebrahimi static int
convert_glob(uint32_t options,PCRE2_SPTR pattern,PCRE2_SIZE plength,BOOL utf,PCRE2_UCHAR * use_buffer,PCRE2_SIZE use_length,PCRE2_SIZE * bufflenptr,BOOL dummyrun,pcre2_convert_context * ccontext)794*22dc650dSSadaf Ebrahimi convert_glob(uint32_t options, PCRE2_SPTR pattern, PCRE2_SIZE plength,
795*22dc650dSSadaf Ebrahimi   BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
796*22dc650dSSadaf Ebrahimi   PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
797*22dc650dSSadaf Ebrahimi {
798*22dc650dSSadaf Ebrahimi pcre2_output_context out;
799*22dc650dSSadaf Ebrahimi PCRE2_SPTR pattern_start = pattern;
800*22dc650dSSadaf Ebrahimi PCRE2_SPTR pattern_end = pattern + plength;
801*22dc650dSSadaf Ebrahimi PCRE2_UCHAR separator = ccontext->glob_separator;
802*22dc650dSSadaf Ebrahimi PCRE2_UCHAR escape = ccontext->glob_escape;
803*22dc650dSSadaf Ebrahimi PCRE2_UCHAR c;
804*22dc650dSSadaf Ebrahimi BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0;
805*22dc650dSSadaf Ebrahimi BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0;
806*22dc650dSSadaf Ebrahimi BOOL in_atomic = FALSE;
807*22dc650dSSadaf Ebrahimi BOOL after_starstar = FALSE;
808*22dc650dSSadaf Ebrahimi BOOL no_slash_z = FALSE;
809*22dc650dSSadaf Ebrahimi BOOL with_escape, is_start, after_separator;
810*22dc650dSSadaf Ebrahimi int result = 0;
811*22dc650dSSadaf Ebrahimi 
812*22dc650dSSadaf Ebrahimi (void)utf; /* Avoid compiler warning. */
813*22dc650dSSadaf Ebrahimi 
814*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_UNICODE
815*22dc650dSSadaf Ebrahimi if (utf && (separator >= 128 || escape >= 128))
816*22dc650dSSadaf Ebrahimi   {
817*22dc650dSSadaf Ebrahimi   /* Currently only ASCII characters are supported. */
818*22dc650dSSadaf Ebrahimi   *bufflenptr = 0;
819*22dc650dSSadaf Ebrahimi   return PCRE2_ERROR_CONVERT_SYNTAX;
820*22dc650dSSadaf Ebrahimi   }
821*22dc650dSSadaf Ebrahimi #endif
822*22dc650dSSadaf Ebrahimi 
823*22dc650dSSadaf Ebrahimi with_escape = strchr(pcre2_escaped_literals, separator) != NULL;
824*22dc650dSSadaf Ebrahimi 
825*22dc650dSSadaf Ebrahimi /* Initialize default for error offset as end of input. */
826*22dc650dSSadaf Ebrahimi out.output = use_buffer;
827*22dc650dSSadaf Ebrahimi out.output_end = use_buffer + use_length;
828*22dc650dSSadaf Ebrahimi out.output_size = 0;
829*22dc650dSSadaf Ebrahimi 
830*22dc650dSSadaf Ebrahimi out.out_str[0] = CHAR_LEFT_PARENTHESIS;
831*22dc650dSSadaf Ebrahimi out.out_str[1] = CHAR_QUESTION_MARK;
832*22dc650dSSadaf Ebrahimi out.out_str[2] = CHAR_s;
833*22dc650dSSadaf Ebrahimi out.out_str[3] = CHAR_RIGHT_PARENTHESIS;
834*22dc650dSSadaf Ebrahimi convert_glob_write_str(&out, 4);
835*22dc650dSSadaf Ebrahimi 
836*22dc650dSSadaf Ebrahimi is_start = TRUE;
837*22dc650dSSadaf Ebrahimi 
838*22dc650dSSadaf Ebrahimi if (pattern < pattern_end && pattern[0] == CHAR_ASTERISK)
839*22dc650dSSadaf Ebrahimi   {
840*22dc650dSSadaf Ebrahimi   if (no_wildsep)
841*22dc650dSSadaf Ebrahimi     is_start = FALSE;
842*22dc650dSSadaf Ebrahimi   else if (!no_starstar && pattern + 1 < pattern_end &&
843*22dc650dSSadaf Ebrahimi            pattern[1] == CHAR_ASTERISK)
844*22dc650dSSadaf Ebrahimi     is_start = FALSE;
845*22dc650dSSadaf Ebrahimi   }
846*22dc650dSSadaf Ebrahimi 
847*22dc650dSSadaf Ebrahimi if (is_start)
848*22dc650dSSadaf Ebrahimi   {
849*22dc650dSSadaf Ebrahimi   out.out_str[0] = CHAR_BACKSLASH;
850*22dc650dSSadaf Ebrahimi   out.out_str[1] = CHAR_A;
851*22dc650dSSadaf Ebrahimi   convert_glob_write_str(&out, 2);
852*22dc650dSSadaf Ebrahimi   }
853*22dc650dSSadaf Ebrahimi 
854*22dc650dSSadaf Ebrahimi while (pattern < pattern_end)
855*22dc650dSSadaf Ebrahimi   {
856*22dc650dSSadaf Ebrahimi   c = *pattern++;
857*22dc650dSSadaf Ebrahimi 
858*22dc650dSSadaf Ebrahimi   if (c == CHAR_ASTERISK)
859*22dc650dSSadaf Ebrahimi     {
860*22dc650dSSadaf Ebrahimi     is_start = pattern == pattern_start + 1;
861*22dc650dSSadaf Ebrahimi 
862*22dc650dSSadaf Ebrahimi     if (in_atomic)
863*22dc650dSSadaf Ebrahimi       {
864*22dc650dSSadaf Ebrahimi       convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
865*22dc650dSSadaf Ebrahimi       in_atomic = FALSE;
866*22dc650dSSadaf Ebrahimi       }
867*22dc650dSSadaf Ebrahimi 
868*22dc650dSSadaf Ebrahimi     if (!no_starstar && pattern < pattern_end && *pattern == CHAR_ASTERISK)
869*22dc650dSSadaf Ebrahimi       {
870*22dc650dSSadaf Ebrahimi       after_separator = is_start || (pattern[-2] == separator);
871*22dc650dSSadaf Ebrahimi 
872*22dc650dSSadaf Ebrahimi       do pattern++; while (pattern < pattern_end &&
873*22dc650dSSadaf Ebrahimi                            *pattern == CHAR_ASTERISK);
874*22dc650dSSadaf Ebrahimi 
875*22dc650dSSadaf Ebrahimi       if (pattern >= pattern_end)
876*22dc650dSSadaf Ebrahimi         {
877*22dc650dSSadaf Ebrahimi         no_slash_z = TRUE;
878*22dc650dSSadaf Ebrahimi         break;
879*22dc650dSSadaf Ebrahimi         }
880*22dc650dSSadaf Ebrahimi 
881*22dc650dSSadaf Ebrahimi       after_starstar = TRUE;
882*22dc650dSSadaf Ebrahimi 
883*22dc650dSSadaf Ebrahimi       if (after_separator && escape != 0 && *pattern == escape &&
884*22dc650dSSadaf Ebrahimi           pattern + 1 < pattern_end && pattern[1] == separator)
885*22dc650dSSadaf Ebrahimi         pattern++;
886*22dc650dSSadaf Ebrahimi 
887*22dc650dSSadaf Ebrahimi       if (is_start)
888*22dc650dSSadaf Ebrahimi         {
889*22dc650dSSadaf Ebrahimi         if (*pattern != separator) continue;
890*22dc650dSSadaf Ebrahimi 
891*22dc650dSSadaf Ebrahimi         out.out_str[0] = CHAR_LEFT_PARENTHESIS;
892*22dc650dSSadaf Ebrahimi         out.out_str[1] = CHAR_QUESTION_MARK;
893*22dc650dSSadaf Ebrahimi         out.out_str[2] = CHAR_COLON;
894*22dc650dSSadaf Ebrahimi         out.out_str[3] = CHAR_BACKSLASH;
895*22dc650dSSadaf Ebrahimi         out.out_str[4] = CHAR_A;
896*22dc650dSSadaf Ebrahimi         out.out_str[5] = CHAR_VERTICAL_LINE;
897*22dc650dSSadaf Ebrahimi         convert_glob_write_str(&out, 6);
898*22dc650dSSadaf Ebrahimi 
899*22dc650dSSadaf Ebrahimi         convert_glob_print_separator(&out, separator, with_escape);
900*22dc650dSSadaf Ebrahimi         convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
901*22dc650dSSadaf Ebrahimi 
902*22dc650dSSadaf Ebrahimi         pattern++;
903*22dc650dSSadaf Ebrahimi         continue;
904*22dc650dSSadaf Ebrahimi         }
905*22dc650dSSadaf Ebrahimi 
906*22dc650dSSadaf Ebrahimi       convert_glob_print_commit(&out);
907*22dc650dSSadaf Ebrahimi 
908*22dc650dSSadaf Ebrahimi       if (!after_separator || *pattern != separator)
909*22dc650dSSadaf Ebrahimi         {
910*22dc650dSSadaf Ebrahimi         out.out_str[0] = CHAR_DOT;
911*22dc650dSSadaf Ebrahimi         out.out_str[1] = CHAR_ASTERISK;
912*22dc650dSSadaf Ebrahimi         out.out_str[2] = CHAR_QUESTION_MARK;
913*22dc650dSSadaf Ebrahimi         convert_glob_write_str(&out, 3);
914*22dc650dSSadaf Ebrahimi         continue;
915*22dc650dSSadaf Ebrahimi         }
916*22dc650dSSadaf Ebrahimi 
917*22dc650dSSadaf Ebrahimi       out.out_str[0] = CHAR_LEFT_PARENTHESIS;
918*22dc650dSSadaf Ebrahimi       out.out_str[1] = CHAR_QUESTION_MARK;
919*22dc650dSSadaf Ebrahimi       out.out_str[2] = CHAR_COLON;
920*22dc650dSSadaf Ebrahimi       out.out_str[3] = CHAR_DOT;
921*22dc650dSSadaf Ebrahimi       out.out_str[4] = CHAR_ASTERISK;
922*22dc650dSSadaf Ebrahimi       out.out_str[5] = CHAR_QUESTION_MARK;
923*22dc650dSSadaf Ebrahimi 
924*22dc650dSSadaf Ebrahimi       convert_glob_write_str(&out, 6);
925*22dc650dSSadaf Ebrahimi 
926*22dc650dSSadaf Ebrahimi       convert_glob_print_separator(&out, separator, with_escape);
927*22dc650dSSadaf Ebrahimi 
928*22dc650dSSadaf Ebrahimi       out.out_str[0] = CHAR_RIGHT_PARENTHESIS;
929*22dc650dSSadaf Ebrahimi       out.out_str[1] = CHAR_QUESTION_MARK;
930*22dc650dSSadaf Ebrahimi       out.out_str[2] = CHAR_QUESTION_MARK;
931*22dc650dSSadaf Ebrahimi       convert_glob_write_str(&out, 3);
932*22dc650dSSadaf Ebrahimi 
933*22dc650dSSadaf Ebrahimi       pattern++;
934*22dc650dSSadaf Ebrahimi       continue;
935*22dc650dSSadaf Ebrahimi       }
936*22dc650dSSadaf Ebrahimi 
937*22dc650dSSadaf Ebrahimi     if (pattern < pattern_end && *pattern == CHAR_ASTERISK)
938*22dc650dSSadaf Ebrahimi       {
939*22dc650dSSadaf Ebrahimi       do pattern++; while (pattern < pattern_end &&
940*22dc650dSSadaf Ebrahimi                            *pattern == CHAR_ASTERISK);
941*22dc650dSSadaf Ebrahimi       }
942*22dc650dSSadaf Ebrahimi 
943*22dc650dSSadaf Ebrahimi     if (no_wildsep)
944*22dc650dSSadaf Ebrahimi       {
945*22dc650dSSadaf Ebrahimi       if (pattern >= pattern_end)
946*22dc650dSSadaf Ebrahimi         {
947*22dc650dSSadaf Ebrahimi         no_slash_z = TRUE;
948*22dc650dSSadaf Ebrahimi         break;
949*22dc650dSSadaf Ebrahimi         }
950*22dc650dSSadaf Ebrahimi 
951*22dc650dSSadaf Ebrahimi       /* Start check must be after the end check. */
952*22dc650dSSadaf Ebrahimi       if (is_start) continue;
953*22dc650dSSadaf Ebrahimi       }
954*22dc650dSSadaf Ebrahimi 
955*22dc650dSSadaf Ebrahimi     if (!is_start)
956*22dc650dSSadaf Ebrahimi       {
957*22dc650dSSadaf Ebrahimi       if (after_starstar)
958*22dc650dSSadaf Ebrahimi         {
959*22dc650dSSadaf Ebrahimi         out.out_str[0] = CHAR_LEFT_PARENTHESIS;
960*22dc650dSSadaf Ebrahimi         out.out_str[1] = CHAR_QUESTION_MARK;
961*22dc650dSSadaf Ebrahimi         out.out_str[2] = CHAR_GREATER_THAN_SIGN;
962*22dc650dSSadaf Ebrahimi         convert_glob_write_str(&out, 3);
963*22dc650dSSadaf Ebrahimi         in_atomic = TRUE;
964*22dc650dSSadaf Ebrahimi         }
965*22dc650dSSadaf Ebrahimi       else
966*22dc650dSSadaf Ebrahimi         convert_glob_print_commit(&out);
967*22dc650dSSadaf Ebrahimi       }
968*22dc650dSSadaf Ebrahimi 
969*22dc650dSSadaf Ebrahimi     if (no_wildsep)
970*22dc650dSSadaf Ebrahimi       convert_glob_write(&out, CHAR_DOT);
971*22dc650dSSadaf Ebrahimi     else
972*22dc650dSSadaf Ebrahimi       convert_glob_print_wildcard(&out, separator, with_escape);
973*22dc650dSSadaf Ebrahimi 
974*22dc650dSSadaf Ebrahimi     out.out_str[0] = CHAR_ASTERISK;
975*22dc650dSSadaf Ebrahimi     out.out_str[1] = CHAR_QUESTION_MARK;
976*22dc650dSSadaf Ebrahimi     if (pattern >= pattern_end)
977*22dc650dSSadaf Ebrahimi       out.out_str[1] = CHAR_PLUS;
978*22dc650dSSadaf Ebrahimi     convert_glob_write_str(&out, 2);
979*22dc650dSSadaf Ebrahimi     continue;
980*22dc650dSSadaf Ebrahimi     }
981*22dc650dSSadaf Ebrahimi 
982*22dc650dSSadaf Ebrahimi   if (c == CHAR_QUESTION_MARK)
983*22dc650dSSadaf Ebrahimi     {
984*22dc650dSSadaf Ebrahimi     if (no_wildsep)
985*22dc650dSSadaf Ebrahimi       convert_glob_write(&out, CHAR_DOT);
986*22dc650dSSadaf Ebrahimi     else
987*22dc650dSSadaf Ebrahimi       convert_glob_print_wildcard(&out, separator, with_escape);
988*22dc650dSSadaf Ebrahimi     continue;
989*22dc650dSSadaf Ebrahimi     }
990*22dc650dSSadaf Ebrahimi 
991*22dc650dSSadaf Ebrahimi   if (c == CHAR_LEFT_SQUARE_BRACKET)
992*22dc650dSSadaf Ebrahimi     {
993*22dc650dSSadaf Ebrahimi     result = convert_glob_parse_range(&pattern, pattern_end,
994*22dc650dSSadaf Ebrahimi       &out, utf, separator, with_escape, escape, no_wildsep);
995*22dc650dSSadaf Ebrahimi     if (result != 0) break;
996*22dc650dSSadaf Ebrahimi     continue;
997*22dc650dSSadaf Ebrahimi     }
998*22dc650dSSadaf Ebrahimi 
999*22dc650dSSadaf Ebrahimi   if (escape != 0 && c == escape)
1000*22dc650dSSadaf Ebrahimi     {
1001*22dc650dSSadaf Ebrahimi     if (pattern >= pattern_end)
1002*22dc650dSSadaf Ebrahimi       {
1003*22dc650dSSadaf Ebrahimi       result = PCRE2_ERROR_CONVERT_SYNTAX;
1004*22dc650dSSadaf Ebrahimi       break;
1005*22dc650dSSadaf Ebrahimi       }
1006*22dc650dSSadaf Ebrahimi     c = *pattern++;
1007*22dc650dSSadaf Ebrahimi     }
1008*22dc650dSSadaf Ebrahimi 
1009*22dc650dSSadaf Ebrahimi   if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
1010*22dc650dSSadaf Ebrahimi     convert_glob_write(&out, CHAR_BACKSLASH);
1011*22dc650dSSadaf Ebrahimi 
1012*22dc650dSSadaf Ebrahimi   convert_glob_write(&out, c);
1013*22dc650dSSadaf Ebrahimi   }
1014*22dc650dSSadaf Ebrahimi 
1015*22dc650dSSadaf Ebrahimi if (result == 0)
1016*22dc650dSSadaf Ebrahimi   {
1017*22dc650dSSadaf Ebrahimi   if (!no_slash_z)
1018*22dc650dSSadaf Ebrahimi     {
1019*22dc650dSSadaf Ebrahimi     out.out_str[0] = CHAR_BACKSLASH;
1020*22dc650dSSadaf Ebrahimi     out.out_str[1] = CHAR_z;
1021*22dc650dSSadaf Ebrahimi     convert_glob_write_str(&out, 2);
1022*22dc650dSSadaf Ebrahimi     }
1023*22dc650dSSadaf Ebrahimi 
1024*22dc650dSSadaf Ebrahimi   if (in_atomic)
1025*22dc650dSSadaf Ebrahimi     convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
1026*22dc650dSSadaf Ebrahimi 
1027*22dc650dSSadaf Ebrahimi   convert_glob_write(&out, CHAR_NUL);
1028*22dc650dSSadaf Ebrahimi 
1029*22dc650dSSadaf Ebrahimi   if (!dummyrun && out.output_size != (PCRE2_SIZE) (out.output - use_buffer))
1030*22dc650dSSadaf Ebrahimi     result = PCRE2_ERROR_NOMEMORY;
1031*22dc650dSSadaf Ebrahimi   }
1032*22dc650dSSadaf Ebrahimi 
1033*22dc650dSSadaf Ebrahimi if (result != 0)
1034*22dc650dSSadaf Ebrahimi   {
1035*22dc650dSSadaf Ebrahimi   *bufflenptr = pattern - pattern_start;
1036*22dc650dSSadaf Ebrahimi   return result;
1037*22dc650dSSadaf Ebrahimi   }
1038*22dc650dSSadaf Ebrahimi 
1039*22dc650dSSadaf Ebrahimi *bufflenptr = out.output_size - 1;
1040*22dc650dSSadaf Ebrahimi return 0;
1041*22dc650dSSadaf Ebrahimi }
1042*22dc650dSSadaf Ebrahimi 
1043*22dc650dSSadaf Ebrahimi 
1044*22dc650dSSadaf Ebrahimi /*************************************************
1045*22dc650dSSadaf Ebrahimi *                Convert pattern                 *
1046*22dc650dSSadaf Ebrahimi *************************************************/
1047*22dc650dSSadaf Ebrahimi 
1048*22dc650dSSadaf Ebrahimi /* This is the external-facing function for converting other forms of pattern
1049*22dc650dSSadaf Ebrahimi into PCRE2 regular expression patterns. On error, the bufflenptr argument is
1050*22dc650dSSadaf Ebrahimi used to return an offset in the original pattern.
1051*22dc650dSSadaf Ebrahimi 
1052*22dc650dSSadaf Ebrahimi Arguments:
1053*22dc650dSSadaf Ebrahimi   pattern     the input pattern
1054*22dc650dSSadaf Ebrahimi   plength     length of input, or PCRE2_ZERO_TERMINATED
1055*22dc650dSSadaf Ebrahimi   options     options bits
1056*22dc650dSSadaf Ebrahimi   buffptr     pointer to pointer to output buffer
1057*22dc650dSSadaf Ebrahimi   bufflenptr  pointer to length of output buffer
1058*22dc650dSSadaf Ebrahimi   ccontext    convert context or NULL
1059*22dc650dSSadaf Ebrahimi 
1060*22dc650dSSadaf Ebrahimi Returns:      0 for success, else an error code (+ve or -ve)
1061*22dc650dSSadaf Ebrahimi */
1062*22dc650dSSadaf Ebrahimi 
1063*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_pattern_convert(PCRE2_SPTR pattern,PCRE2_SIZE plength,uint32_t options,PCRE2_UCHAR ** buffptr,PCRE2_SIZE * bufflenptr,pcre2_convert_context * ccontext)1064*22dc650dSSadaf Ebrahimi pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE plength, uint32_t options,
1065*22dc650dSSadaf Ebrahimi   PCRE2_UCHAR **buffptr, PCRE2_SIZE *bufflenptr,
1066*22dc650dSSadaf Ebrahimi   pcre2_convert_context *ccontext)
1067*22dc650dSSadaf Ebrahimi {
1068*22dc650dSSadaf Ebrahimi int i, rc;
1069*22dc650dSSadaf Ebrahimi PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE];
1070*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *use_buffer = dummy_buffer;
1071*22dc650dSSadaf Ebrahimi PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE;
1072*22dc650dSSadaf Ebrahimi BOOL utf = (options & PCRE2_CONVERT_UTF) != 0;
1073*22dc650dSSadaf Ebrahimi uint32_t pattype = options & TYPE_OPTIONS;
1074*22dc650dSSadaf Ebrahimi 
1075*22dc650dSSadaf Ebrahimi if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL;
1076*22dc650dSSadaf Ebrahimi 
1077*22dc650dSSadaf Ebrahimi if ((options & ~ALL_OPTIONS) != 0 ||        /* Undefined bit set */
1078*22dc650dSSadaf Ebrahimi     (pattype & (~pattype+1)) != pattype ||  /* More than one type set */
1079*22dc650dSSadaf Ebrahimi     pattype == 0)                           /* No type set */
1080*22dc650dSSadaf Ebrahimi   {
1081*22dc650dSSadaf Ebrahimi   *bufflenptr = 0;                          /* Error offset */
1082*22dc650dSSadaf Ebrahimi   return PCRE2_ERROR_BADOPTION;
1083*22dc650dSSadaf Ebrahimi   }
1084*22dc650dSSadaf Ebrahimi 
1085*22dc650dSSadaf Ebrahimi if (plength == PCRE2_ZERO_TERMINATED) plength = PRIV(strlen)(pattern);
1086*22dc650dSSadaf Ebrahimi if (ccontext == NULL) ccontext =
1087*22dc650dSSadaf Ebrahimi   (pcre2_convert_context *)(&PRIV(default_convert_context));
1088*22dc650dSSadaf Ebrahimi 
1089*22dc650dSSadaf Ebrahimi /* Check UTF if required. */
1090*22dc650dSSadaf Ebrahimi 
1091*22dc650dSSadaf Ebrahimi #ifndef SUPPORT_UNICODE
1092*22dc650dSSadaf Ebrahimi if (utf)
1093*22dc650dSSadaf Ebrahimi   {
1094*22dc650dSSadaf Ebrahimi   *bufflenptr = 0;  /* Error offset */
1095*22dc650dSSadaf Ebrahimi   return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
1096*22dc650dSSadaf Ebrahimi   }
1097*22dc650dSSadaf Ebrahimi #else
1098*22dc650dSSadaf Ebrahimi if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0)
1099*22dc650dSSadaf Ebrahimi   {
1100*22dc650dSSadaf Ebrahimi   PCRE2_SIZE erroroffset;
1101*22dc650dSSadaf Ebrahimi   rc = PRIV(valid_utf)(pattern, plength, &erroroffset);
1102*22dc650dSSadaf Ebrahimi   if (rc != 0)
1103*22dc650dSSadaf Ebrahimi     {
1104*22dc650dSSadaf Ebrahimi     *bufflenptr = erroroffset;
1105*22dc650dSSadaf Ebrahimi     return rc;
1106*22dc650dSSadaf Ebrahimi     }
1107*22dc650dSSadaf Ebrahimi   }
1108*22dc650dSSadaf Ebrahimi #endif
1109*22dc650dSSadaf Ebrahimi 
1110*22dc650dSSadaf Ebrahimi /* If buffptr is not NULL, and what it points to is not NULL, we are being
1111*22dc650dSSadaf Ebrahimi provided with a buffer and a length, so set them as the buffer to use. */
1112*22dc650dSSadaf Ebrahimi 
1113*22dc650dSSadaf Ebrahimi if (buffptr != NULL && *buffptr != NULL)
1114*22dc650dSSadaf Ebrahimi   {
1115*22dc650dSSadaf Ebrahimi   use_buffer = *buffptr;
1116*22dc650dSSadaf Ebrahimi   use_length = *bufflenptr;
1117*22dc650dSSadaf Ebrahimi   }
1118*22dc650dSSadaf Ebrahimi 
1119*22dc650dSSadaf Ebrahimi /* Call an individual converter, either just once (if a buffer was provided or
1120*22dc650dSSadaf Ebrahimi just the length is needed), or twice (if a memory allocation is required). */
1121*22dc650dSSadaf Ebrahimi 
1122*22dc650dSSadaf Ebrahimi for (i = 0; i < 2; i++)
1123*22dc650dSSadaf Ebrahimi   {
1124*22dc650dSSadaf Ebrahimi   PCRE2_UCHAR *allocated;
1125*22dc650dSSadaf Ebrahimi   BOOL dummyrun = buffptr == NULL || *buffptr == NULL;
1126*22dc650dSSadaf Ebrahimi 
1127*22dc650dSSadaf Ebrahimi   switch(pattype)
1128*22dc650dSSadaf Ebrahimi     {
1129*22dc650dSSadaf Ebrahimi     case PCRE2_CONVERT_GLOB:
1130*22dc650dSSadaf Ebrahimi     rc = convert_glob(options & ~PCRE2_CONVERT_GLOB, pattern, plength, utf,
1131*22dc650dSSadaf Ebrahimi       use_buffer, use_length, bufflenptr, dummyrun, ccontext);
1132*22dc650dSSadaf Ebrahimi     break;
1133*22dc650dSSadaf Ebrahimi 
1134*22dc650dSSadaf Ebrahimi     case PCRE2_CONVERT_POSIX_BASIC:
1135*22dc650dSSadaf Ebrahimi     case PCRE2_CONVERT_POSIX_EXTENDED:
1136*22dc650dSSadaf Ebrahimi     rc = convert_posix(pattype, pattern, plength, utf, use_buffer, use_length,
1137*22dc650dSSadaf Ebrahimi       bufflenptr, dummyrun, ccontext);
1138*22dc650dSSadaf Ebrahimi     break;
1139*22dc650dSSadaf Ebrahimi 
1140*22dc650dSSadaf Ebrahimi     default:
1141*22dc650dSSadaf Ebrahimi     *bufflenptr = 0;  /* Error offset */
1142*22dc650dSSadaf Ebrahimi     return PCRE2_ERROR_INTERNAL;
1143*22dc650dSSadaf Ebrahimi     }
1144*22dc650dSSadaf Ebrahimi 
1145*22dc650dSSadaf Ebrahimi   if (rc != 0 ||           /* Error */
1146*22dc650dSSadaf Ebrahimi       buffptr == NULL ||   /* Just the length is required */
1147*22dc650dSSadaf Ebrahimi       *buffptr != NULL)    /* Buffer was provided or allocated */
1148*22dc650dSSadaf Ebrahimi     return rc;
1149*22dc650dSSadaf Ebrahimi 
1150*22dc650dSSadaf Ebrahimi   /* Allocate memory for the buffer, with hidden space for an allocator at
1151*22dc650dSSadaf Ebrahimi   the start. The next time round the loop runs the conversion for real. */
1152*22dc650dSSadaf Ebrahimi 
1153*22dc650dSSadaf Ebrahimi   allocated = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
1154*22dc650dSSadaf Ebrahimi     (*bufflenptr + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)ccontext);
1155*22dc650dSSadaf Ebrahimi   if (allocated == NULL) return PCRE2_ERROR_NOMEMORY;
1156*22dc650dSSadaf Ebrahimi   *buffptr = (PCRE2_UCHAR *)(((char *)allocated) + sizeof(pcre2_memctl));
1157*22dc650dSSadaf Ebrahimi 
1158*22dc650dSSadaf Ebrahimi   use_buffer = *buffptr;
1159*22dc650dSSadaf Ebrahimi   use_length = *bufflenptr + 1;
1160*22dc650dSSadaf Ebrahimi   }
1161*22dc650dSSadaf Ebrahimi 
1162*22dc650dSSadaf Ebrahimi /* Control should never get here. */
1163*22dc650dSSadaf Ebrahimi 
1164*22dc650dSSadaf Ebrahimi return PCRE2_ERROR_INTERNAL;
1165*22dc650dSSadaf Ebrahimi }
1166*22dc650dSSadaf Ebrahimi 
1167*22dc650dSSadaf Ebrahimi 
1168*22dc650dSSadaf Ebrahimi /*************************************************
1169*22dc650dSSadaf Ebrahimi *            Free converted pattern              *
1170*22dc650dSSadaf Ebrahimi *************************************************/
1171*22dc650dSSadaf Ebrahimi 
1172*22dc650dSSadaf Ebrahimi /* This frees a converted pattern that was put in newly-allocated memory.
1173*22dc650dSSadaf Ebrahimi 
1174*22dc650dSSadaf Ebrahimi Argument:   the converted pattern
1175*22dc650dSSadaf Ebrahimi Returns:    nothing
1176*22dc650dSSadaf Ebrahimi */
1177*22dc650dSSadaf Ebrahimi 
1178*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_converted_pattern_free(PCRE2_UCHAR * converted)1179*22dc650dSSadaf Ebrahimi pcre2_converted_pattern_free(PCRE2_UCHAR *converted)
1180*22dc650dSSadaf Ebrahimi {
1181*22dc650dSSadaf Ebrahimi if (converted != NULL)
1182*22dc650dSSadaf Ebrahimi   {
1183*22dc650dSSadaf Ebrahimi   pcre2_memctl *memctl =
1184*22dc650dSSadaf Ebrahimi     (pcre2_memctl *)((char *)converted - sizeof(pcre2_memctl));
1185*22dc650dSSadaf Ebrahimi   memctl->free(memctl, memctl->memory_data);
1186*22dc650dSSadaf Ebrahimi   }
1187*22dc650dSSadaf Ebrahimi }
1188*22dc650dSSadaf Ebrahimi 
1189*22dc650dSSadaf Ebrahimi /* End of pcre2_convert.c */
1190