xref: /aosp_15_r20/external/pcre/src/pcre2_substring.c (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1*22dc650dSSadaf Ebrahimi /*************************************************
2*22dc650dSSadaf Ebrahimi *      Perl-Compatible Regular Expressions       *
3*22dc650dSSadaf Ebrahimi *************************************************/
4*22dc650dSSadaf Ebrahimi 
5*22dc650dSSadaf Ebrahimi /* PCRE is a library of functions to support regular expressions whose syntax
6*22dc650dSSadaf Ebrahimi and semantics are as close as possible to those of the Perl 5 language.
7*22dc650dSSadaf Ebrahimi 
8*22dc650dSSadaf Ebrahimi                        Written by Philip Hazel
9*22dc650dSSadaf Ebrahimi      Original API code Copyright (c) 1997-2012 University of Cambridge
10*22dc650dSSadaf Ebrahimi           New API code Copyright (c) 2016-2023 University of Cambridge
11*22dc650dSSadaf Ebrahimi 
12*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
13*22dc650dSSadaf Ebrahimi Redistribution and use in source and binary forms, with or without
14*22dc650dSSadaf Ebrahimi modification, are permitted provided that the following conditions are met:
15*22dc650dSSadaf Ebrahimi 
16*22dc650dSSadaf Ebrahimi     * Redistributions of source code must retain the above copyright notice,
17*22dc650dSSadaf Ebrahimi       this list of conditions and the following disclaimer.
18*22dc650dSSadaf Ebrahimi 
19*22dc650dSSadaf Ebrahimi     * Redistributions in binary form must reproduce the above copyright
20*22dc650dSSadaf Ebrahimi       notice, this list of conditions and the following disclaimer in the
21*22dc650dSSadaf Ebrahimi       documentation and/or other materials provided with the distribution.
22*22dc650dSSadaf Ebrahimi 
23*22dc650dSSadaf Ebrahimi     * Neither the name of the University of Cambridge nor the names of its
24*22dc650dSSadaf Ebrahimi       contributors may be used to endorse or promote products derived from
25*22dc650dSSadaf Ebrahimi       this software without specific prior written permission.
26*22dc650dSSadaf Ebrahimi 
27*22dc650dSSadaf Ebrahimi THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28*22dc650dSSadaf Ebrahimi AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29*22dc650dSSadaf Ebrahimi IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30*22dc650dSSadaf Ebrahimi ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31*22dc650dSSadaf Ebrahimi LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32*22dc650dSSadaf Ebrahimi CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33*22dc650dSSadaf Ebrahimi SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34*22dc650dSSadaf Ebrahimi INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35*22dc650dSSadaf Ebrahimi CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36*22dc650dSSadaf Ebrahimi ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37*22dc650dSSadaf Ebrahimi POSSIBILITY OF SUCH DAMAGE.
38*22dc650dSSadaf Ebrahimi -----------------------------------------------------------------------------
39*22dc650dSSadaf Ebrahimi */
40*22dc650dSSadaf Ebrahimi 
41*22dc650dSSadaf Ebrahimi 
42*22dc650dSSadaf Ebrahimi #ifdef HAVE_CONFIG_H
43*22dc650dSSadaf Ebrahimi #include "config.h"
44*22dc650dSSadaf Ebrahimi #endif
45*22dc650dSSadaf Ebrahimi 
46*22dc650dSSadaf Ebrahimi #include "pcre2_internal.h"
47*22dc650dSSadaf Ebrahimi 
48*22dc650dSSadaf Ebrahimi 
49*22dc650dSSadaf Ebrahimi 
50*22dc650dSSadaf Ebrahimi /*************************************************
51*22dc650dSSadaf Ebrahimi *   Copy named captured string to given buffer   *
52*22dc650dSSadaf Ebrahimi *************************************************/
53*22dc650dSSadaf Ebrahimi 
54*22dc650dSSadaf Ebrahimi /* This function copies a single captured substring into a given buffer,
55*22dc650dSSadaf Ebrahimi identifying it by name. If the regex permits duplicate names, the first
56*22dc650dSSadaf Ebrahimi substring that is set is chosen.
57*22dc650dSSadaf Ebrahimi 
58*22dc650dSSadaf Ebrahimi Arguments:
59*22dc650dSSadaf Ebrahimi   match_data     points to the match data
60*22dc650dSSadaf Ebrahimi   stringname     the name of the required substring
61*22dc650dSSadaf Ebrahimi   buffer         where to put the substring
62*22dc650dSSadaf Ebrahimi   sizeptr        the size of the buffer, updated to the size of the substring
63*22dc650dSSadaf Ebrahimi 
64*22dc650dSSadaf Ebrahimi Returns:         if successful: zero
65*22dc650dSSadaf Ebrahimi                  if not successful, a negative error code:
66*22dc650dSSadaf Ebrahimi                    (1) an error from nametable_scan()
67*22dc650dSSadaf Ebrahimi                    (2) an error from copy_bynumber()
68*22dc650dSSadaf Ebrahimi                    (3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
69*22dc650dSSadaf Ebrahimi                    (4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
70*22dc650dSSadaf Ebrahimi */
71*22dc650dSSadaf Ebrahimi 
72*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_copy_byname(pcre2_match_data * match_data,PCRE2_SPTR stringname,PCRE2_UCHAR * buffer,PCRE2_SIZE * sizeptr)73*22dc650dSSadaf Ebrahimi pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname,
74*22dc650dSSadaf Ebrahimi   PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
75*22dc650dSSadaf Ebrahimi {
76*22dc650dSSadaf Ebrahimi PCRE2_SPTR first, last, entry;
77*22dc650dSSadaf Ebrahimi int failrc, entrysize;
78*22dc650dSSadaf Ebrahimi if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
79*22dc650dSSadaf Ebrahimi   return PCRE2_ERROR_DFA_UFUNC;
80*22dc650dSSadaf Ebrahimi entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
81*22dc650dSSadaf Ebrahimi   &first, &last);
82*22dc650dSSadaf Ebrahimi if (entrysize < 0) return entrysize;
83*22dc650dSSadaf Ebrahimi failrc = PCRE2_ERROR_UNAVAILABLE;
84*22dc650dSSadaf Ebrahimi for (entry = first; entry <= last; entry += entrysize)
85*22dc650dSSadaf Ebrahimi   {
86*22dc650dSSadaf Ebrahimi   uint32_t n = GET2(entry, 0);
87*22dc650dSSadaf Ebrahimi   if (n < match_data->oveccount)
88*22dc650dSSadaf Ebrahimi     {
89*22dc650dSSadaf Ebrahimi     if (match_data->ovector[n*2] != PCRE2_UNSET)
90*22dc650dSSadaf Ebrahimi       return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
91*22dc650dSSadaf Ebrahimi     failrc = PCRE2_ERROR_UNSET;
92*22dc650dSSadaf Ebrahimi     }
93*22dc650dSSadaf Ebrahimi   }
94*22dc650dSSadaf Ebrahimi return failrc;
95*22dc650dSSadaf Ebrahimi }
96*22dc650dSSadaf Ebrahimi 
97*22dc650dSSadaf Ebrahimi 
98*22dc650dSSadaf Ebrahimi 
99*22dc650dSSadaf Ebrahimi /*************************************************
100*22dc650dSSadaf Ebrahimi *  Copy numbered captured string to given buffer *
101*22dc650dSSadaf Ebrahimi *************************************************/
102*22dc650dSSadaf Ebrahimi 
103*22dc650dSSadaf Ebrahimi /* This function copies a single captured substring into a given buffer,
104*22dc650dSSadaf Ebrahimi identifying it by number.
105*22dc650dSSadaf Ebrahimi 
106*22dc650dSSadaf Ebrahimi Arguments:
107*22dc650dSSadaf Ebrahimi   match_data     points to the match data
108*22dc650dSSadaf Ebrahimi   stringnumber   the number of the required substring
109*22dc650dSSadaf Ebrahimi   buffer         where to put the substring
110*22dc650dSSadaf Ebrahimi   sizeptr        the size of the buffer, updated to the size of the substring
111*22dc650dSSadaf Ebrahimi 
112*22dc650dSSadaf Ebrahimi Returns:         if successful: 0
113*22dc650dSSadaf Ebrahimi                  if not successful, a negative error code:
114*22dc650dSSadaf Ebrahimi                    PCRE2_ERROR_NOMEMORY: buffer too small
115*22dc650dSSadaf Ebrahimi                    PCRE2_ERROR_NOSUBSTRING: no such substring
116*22dc650dSSadaf Ebrahimi                    PCRE2_ERROR_UNAVAILABLE: ovector too small
117*22dc650dSSadaf Ebrahimi                    PCRE2_ERROR_UNSET: substring is not set
118*22dc650dSSadaf Ebrahimi */
119*22dc650dSSadaf Ebrahimi 
120*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_copy_bynumber(pcre2_match_data * match_data,uint32_t stringnumber,PCRE2_UCHAR * buffer,PCRE2_SIZE * sizeptr)121*22dc650dSSadaf Ebrahimi pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
122*22dc650dSSadaf Ebrahimi   uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
123*22dc650dSSadaf Ebrahimi {
124*22dc650dSSadaf Ebrahimi int rc;
125*22dc650dSSadaf Ebrahimi PCRE2_SIZE size;
126*22dc650dSSadaf Ebrahimi rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
127*22dc650dSSadaf Ebrahimi if (rc < 0) return rc;
128*22dc650dSSadaf Ebrahimi if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
129*22dc650dSSadaf Ebrahimi memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],
130*22dc650dSSadaf Ebrahimi   CU2BYTES(size));
131*22dc650dSSadaf Ebrahimi buffer[size] = 0;
132*22dc650dSSadaf Ebrahimi *sizeptr = size;
133*22dc650dSSadaf Ebrahimi return 0;
134*22dc650dSSadaf Ebrahimi }
135*22dc650dSSadaf Ebrahimi 
136*22dc650dSSadaf Ebrahimi 
137*22dc650dSSadaf Ebrahimi 
138*22dc650dSSadaf Ebrahimi /*************************************************
139*22dc650dSSadaf Ebrahimi *          Extract named captured string         *
140*22dc650dSSadaf Ebrahimi *************************************************/
141*22dc650dSSadaf Ebrahimi 
142*22dc650dSSadaf Ebrahimi /* This function copies a single captured substring, identified by name, into
143*22dc650dSSadaf Ebrahimi new memory. If the regex permits duplicate names, the first substring that is
144*22dc650dSSadaf Ebrahimi set is chosen.
145*22dc650dSSadaf Ebrahimi 
146*22dc650dSSadaf Ebrahimi Arguments:
147*22dc650dSSadaf Ebrahimi   match_data     pointer to match_data
148*22dc650dSSadaf Ebrahimi   stringname     the name of the required substring
149*22dc650dSSadaf Ebrahimi   stringptr      where to put the pointer to the new memory
150*22dc650dSSadaf Ebrahimi   sizeptr        where to put the length of the substring
151*22dc650dSSadaf Ebrahimi 
152*22dc650dSSadaf Ebrahimi Returns:         if successful: zero
153*22dc650dSSadaf Ebrahimi                  if not successful, a negative value:
154*22dc650dSSadaf Ebrahimi                    (1) an error from nametable_scan()
155*22dc650dSSadaf Ebrahimi                    (2) an error from get_bynumber()
156*22dc650dSSadaf Ebrahimi                    (3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
157*22dc650dSSadaf Ebrahimi                    (4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
158*22dc650dSSadaf Ebrahimi */
159*22dc650dSSadaf Ebrahimi 
160*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_get_byname(pcre2_match_data * match_data,PCRE2_SPTR stringname,PCRE2_UCHAR ** stringptr,PCRE2_SIZE * sizeptr)161*22dc650dSSadaf Ebrahimi pcre2_substring_get_byname(pcre2_match_data *match_data,
162*22dc650dSSadaf Ebrahimi   PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
163*22dc650dSSadaf Ebrahimi {
164*22dc650dSSadaf Ebrahimi PCRE2_SPTR first, last, entry;
165*22dc650dSSadaf Ebrahimi int failrc, entrysize;
166*22dc650dSSadaf Ebrahimi if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
167*22dc650dSSadaf Ebrahimi   return PCRE2_ERROR_DFA_UFUNC;
168*22dc650dSSadaf Ebrahimi entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
169*22dc650dSSadaf Ebrahimi   &first, &last);
170*22dc650dSSadaf Ebrahimi if (entrysize < 0) return entrysize;
171*22dc650dSSadaf Ebrahimi failrc = PCRE2_ERROR_UNAVAILABLE;
172*22dc650dSSadaf Ebrahimi for (entry = first; entry <= last; entry += entrysize)
173*22dc650dSSadaf Ebrahimi   {
174*22dc650dSSadaf Ebrahimi   uint32_t n = GET2(entry, 0);
175*22dc650dSSadaf Ebrahimi   if (n < match_data->oveccount)
176*22dc650dSSadaf Ebrahimi     {
177*22dc650dSSadaf Ebrahimi     if (match_data->ovector[n*2] != PCRE2_UNSET)
178*22dc650dSSadaf Ebrahimi       return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
179*22dc650dSSadaf Ebrahimi     failrc = PCRE2_ERROR_UNSET;
180*22dc650dSSadaf Ebrahimi     }
181*22dc650dSSadaf Ebrahimi   }
182*22dc650dSSadaf Ebrahimi return failrc;
183*22dc650dSSadaf Ebrahimi }
184*22dc650dSSadaf Ebrahimi 
185*22dc650dSSadaf Ebrahimi 
186*22dc650dSSadaf Ebrahimi 
187*22dc650dSSadaf Ebrahimi /*************************************************
188*22dc650dSSadaf Ebrahimi *      Extract captured string to new memory     *
189*22dc650dSSadaf Ebrahimi *************************************************/
190*22dc650dSSadaf Ebrahimi 
191*22dc650dSSadaf Ebrahimi /* This function copies a single captured substring into a piece of new
192*22dc650dSSadaf Ebrahimi memory.
193*22dc650dSSadaf Ebrahimi 
194*22dc650dSSadaf Ebrahimi Arguments:
195*22dc650dSSadaf Ebrahimi   match_data     points to match data
196*22dc650dSSadaf Ebrahimi   stringnumber   the number of the required substring
197*22dc650dSSadaf Ebrahimi   stringptr      where to put a pointer to the new memory
198*22dc650dSSadaf Ebrahimi   sizeptr        where to put the size of the substring
199*22dc650dSSadaf Ebrahimi 
200*22dc650dSSadaf Ebrahimi Returns:         if successful: 0
201*22dc650dSSadaf Ebrahimi                  if not successful, a negative error code:
202*22dc650dSSadaf Ebrahimi                    PCRE2_ERROR_NOMEMORY: failed to get memory
203*22dc650dSSadaf Ebrahimi                    PCRE2_ERROR_NOSUBSTRING: no such substring
204*22dc650dSSadaf Ebrahimi                    PCRE2_ERROR_UNAVAILABLE: ovector too small
205*22dc650dSSadaf Ebrahimi                    PCRE2_ERROR_UNSET: substring is not set
206*22dc650dSSadaf Ebrahimi */
207*22dc650dSSadaf Ebrahimi 
208*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_get_bynumber(pcre2_match_data * match_data,uint32_t stringnumber,PCRE2_UCHAR ** stringptr,PCRE2_SIZE * sizeptr)209*22dc650dSSadaf Ebrahimi pcre2_substring_get_bynumber(pcre2_match_data *match_data,
210*22dc650dSSadaf Ebrahimi   uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
211*22dc650dSSadaf Ebrahimi {
212*22dc650dSSadaf Ebrahimi int rc;
213*22dc650dSSadaf Ebrahimi PCRE2_SIZE size;
214*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *yield;
215*22dc650dSSadaf Ebrahimi rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
216*22dc650dSSadaf Ebrahimi if (rc < 0) return rc;
217*22dc650dSSadaf Ebrahimi yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
218*22dc650dSSadaf Ebrahimi   (size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
219*22dc650dSSadaf Ebrahimi if (yield == NULL) return PCRE2_ERROR_NOMEMORY;
220*22dc650dSSadaf Ebrahimi yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));
221*22dc650dSSadaf Ebrahimi memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],
222*22dc650dSSadaf Ebrahimi   CU2BYTES(size));
223*22dc650dSSadaf Ebrahimi yield[size] = 0;
224*22dc650dSSadaf Ebrahimi *stringptr = yield;
225*22dc650dSSadaf Ebrahimi *sizeptr = size;
226*22dc650dSSadaf Ebrahimi return 0;
227*22dc650dSSadaf Ebrahimi }
228*22dc650dSSadaf Ebrahimi 
229*22dc650dSSadaf Ebrahimi 
230*22dc650dSSadaf Ebrahimi 
231*22dc650dSSadaf Ebrahimi /*************************************************
232*22dc650dSSadaf Ebrahimi *       Free memory obtained by get_substring    *
233*22dc650dSSadaf Ebrahimi *************************************************/
234*22dc650dSSadaf Ebrahimi 
235*22dc650dSSadaf Ebrahimi /*
236*22dc650dSSadaf Ebrahimi Argument:     the result of a previous pcre2_substring_get_byxxx()
237*22dc650dSSadaf Ebrahimi Returns:      nothing
238*22dc650dSSadaf Ebrahimi */
239*22dc650dSSadaf Ebrahimi 
240*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_substring_free(PCRE2_UCHAR * string)241*22dc650dSSadaf Ebrahimi pcre2_substring_free(PCRE2_UCHAR *string)
242*22dc650dSSadaf Ebrahimi {
243*22dc650dSSadaf Ebrahimi if (string != NULL)
244*22dc650dSSadaf Ebrahimi   {
245*22dc650dSSadaf Ebrahimi   pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl));
246*22dc650dSSadaf Ebrahimi   memctl->free(memctl, memctl->memory_data);
247*22dc650dSSadaf Ebrahimi   }
248*22dc650dSSadaf Ebrahimi }
249*22dc650dSSadaf Ebrahimi 
250*22dc650dSSadaf Ebrahimi 
251*22dc650dSSadaf Ebrahimi 
252*22dc650dSSadaf Ebrahimi /*************************************************
253*22dc650dSSadaf Ebrahimi *         Get length of a named substring        *
254*22dc650dSSadaf Ebrahimi *************************************************/
255*22dc650dSSadaf Ebrahimi 
256*22dc650dSSadaf Ebrahimi /* This function returns the length of a named captured substring. If the regex
257*22dc650dSSadaf Ebrahimi permits duplicate names, the first substring that is set is chosen.
258*22dc650dSSadaf Ebrahimi 
259*22dc650dSSadaf Ebrahimi Arguments:
260*22dc650dSSadaf Ebrahimi   match_data      pointer to match data
261*22dc650dSSadaf Ebrahimi   stringname      the name of the required substring
262*22dc650dSSadaf Ebrahimi   sizeptr         where to put the length
263*22dc650dSSadaf Ebrahimi 
264*22dc650dSSadaf Ebrahimi Returns:          0 if successful, else a negative error number
265*22dc650dSSadaf Ebrahimi */
266*22dc650dSSadaf Ebrahimi 
267*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_length_byname(pcre2_match_data * match_data,PCRE2_SPTR stringname,PCRE2_SIZE * sizeptr)268*22dc650dSSadaf Ebrahimi pcre2_substring_length_byname(pcre2_match_data *match_data,
269*22dc650dSSadaf Ebrahimi   PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr)
270*22dc650dSSadaf Ebrahimi {
271*22dc650dSSadaf Ebrahimi PCRE2_SPTR first, last, entry;
272*22dc650dSSadaf Ebrahimi int failrc, entrysize;
273*22dc650dSSadaf Ebrahimi if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
274*22dc650dSSadaf Ebrahimi   return PCRE2_ERROR_DFA_UFUNC;
275*22dc650dSSadaf Ebrahimi entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
276*22dc650dSSadaf Ebrahimi   &first, &last);
277*22dc650dSSadaf Ebrahimi if (entrysize < 0) return entrysize;
278*22dc650dSSadaf Ebrahimi failrc = PCRE2_ERROR_UNAVAILABLE;
279*22dc650dSSadaf Ebrahimi for (entry = first; entry <= last; entry += entrysize)
280*22dc650dSSadaf Ebrahimi   {
281*22dc650dSSadaf Ebrahimi   uint32_t n = GET2(entry, 0);
282*22dc650dSSadaf Ebrahimi   if (n < match_data->oveccount)
283*22dc650dSSadaf Ebrahimi     {
284*22dc650dSSadaf Ebrahimi     if (match_data->ovector[n*2] != PCRE2_UNSET)
285*22dc650dSSadaf Ebrahimi       return pcre2_substring_length_bynumber(match_data, n, sizeptr);
286*22dc650dSSadaf Ebrahimi     failrc = PCRE2_ERROR_UNSET;
287*22dc650dSSadaf Ebrahimi     }
288*22dc650dSSadaf Ebrahimi   }
289*22dc650dSSadaf Ebrahimi return failrc;
290*22dc650dSSadaf Ebrahimi }
291*22dc650dSSadaf Ebrahimi 
292*22dc650dSSadaf Ebrahimi 
293*22dc650dSSadaf Ebrahimi 
294*22dc650dSSadaf Ebrahimi /*************************************************
295*22dc650dSSadaf Ebrahimi *        Get length of a numbered substring      *
296*22dc650dSSadaf Ebrahimi *************************************************/
297*22dc650dSSadaf Ebrahimi 
298*22dc650dSSadaf Ebrahimi /* This function returns the length of a captured substring. If the start is
299*22dc650dSSadaf Ebrahimi beyond the end (which can happen when \K is used in an assertion), it sets the
300*22dc650dSSadaf Ebrahimi length to zero.
301*22dc650dSSadaf Ebrahimi 
302*22dc650dSSadaf Ebrahimi Arguments:
303*22dc650dSSadaf Ebrahimi   match_data      pointer to match data
304*22dc650dSSadaf Ebrahimi   stringnumber    the number of the required substring
305*22dc650dSSadaf Ebrahimi   sizeptr         where to put the length, if not NULL
306*22dc650dSSadaf Ebrahimi 
307*22dc650dSSadaf Ebrahimi Returns:         if successful: 0
308*22dc650dSSadaf Ebrahimi                  if not successful, a negative error code:
309*22dc650dSSadaf Ebrahimi                    PCRE2_ERROR_NOSUBSTRING: no such substring
310*22dc650dSSadaf Ebrahimi                    PCRE2_ERROR_UNAVAILABLE: ovector is too small
311*22dc650dSSadaf Ebrahimi                    PCRE2_ERROR_UNSET: substring is not set
312*22dc650dSSadaf Ebrahimi                    PCRE2_ERROR_INVALIDOFFSET: internal error, should not occur
313*22dc650dSSadaf Ebrahimi */
314*22dc650dSSadaf Ebrahimi 
315*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_length_bynumber(pcre2_match_data * match_data,uint32_t stringnumber,PCRE2_SIZE * sizeptr)316*22dc650dSSadaf Ebrahimi pcre2_substring_length_bynumber(pcre2_match_data *match_data,
317*22dc650dSSadaf Ebrahimi   uint32_t stringnumber, PCRE2_SIZE *sizeptr)
318*22dc650dSSadaf Ebrahimi {
319*22dc650dSSadaf Ebrahimi PCRE2_SIZE left, right;
320*22dc650dSSadaf Ebrahimi int count = match_data->rc;
321*22dc650dSSadaf Ebrahimi if (count == PCRE2_ERROR_PARTIAL)
322*22dc650dSSadaf Ebrahimi   {
323*22dc650dSSadaf Ebrahimi   if (stringnumber > 0) return PCRE2_ERROR_PARTIAL;
324*22dc650dSSadaf Ebrahimi   count = 0;
325*22dc650dSSadaf Ebrahimi   }
326*22dc650dSSadaf Ebrahimi else if (count < 0) return count;            /* Match failed */
327*22dc650dSSadaf Ebrahimi 
328*22dc650dSSadaf Ebrahimi if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER)
329*22dc650dSSadaf Ebrahimi   {
330*22dc650dSSadaf Ebrahimi   if (stringnumber > match_data->code->top_bracket)
331*22dc650dSSadaf Ebrahimi     return PCRE2_ERROR_NOSUBSTRING;
332*22dc650dSSadaf Ebrahimi   if (stringnumber >= match_data->oveccount)
333*22dc650dSSadaf Ebrahimi     return PCRE2_ERROR_UNAVAILABLE;
334*22dc650dSSadaf Ebrahimi   if (match_data->ovector[stringnumber*2] == PCRE2_UNSET)
335*22dc650dSSadaf Ebrahimi     return PCRE2_ERROR_UNSET;
336*22dc650dSSadaf Ebrahimi   }
337*22dc650dSSadaf Ebrahimi else  /* Matched using pcre2_dfa_match() */
338*22dc650dSSadaf Ebrahimi   {
339*22dc650dSSadaf Ebrahimi   if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE;
340*22dc650dSSadaf Ebrahimi   if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET;
341*22dc650dSSadaf Ebrahimi   }
342*22dc650dSSadaf Ebrahimi 
343*22dc650dSSadaf Ebrahimi left = match_data->ovector[stringnumber*2];
344*22dc650dSSadaf Ebrahimi right = match_data->ovector[stringnumber*2+1];
345*22dc650dSSadaf Ebrahimi if (left > match_data->subject_length || right > match_data->subject_length)
346*22dc650dSSadaf Ebrahimi   return PCRE2_ERROR_INVALIDOFFSET;
347*22dc650dSSadaf Ebrahimi if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;
348*22dc650dSSadaf Ebrahimi return 0;
349*22dc650dSSadaf Ebrahimi }
350*22dc650dSSadaf Ebrahimi 
351*22dc650dSSadaf Ebrahimi 
352*22dc650dSSadaf Ebrahimi 
353*22dc650dSSadaf Ebrahimi /*************************************************
354*22dc650dSSadaf Ebrahimi *    Extract all captured strings to new memory  *
355*22dc650dSSadaf Ebrahimi *************************************************/
356*22dc650dSSadaf Ebrahimi 
357*22dc650dSSadaf Ebrahimi /* This function gets one chunk of memory and builds a list of pointers and all
358*22dc650dSSadaf Ebrahimi the captured substrings in it. A NULL pointer is put on the end of the list.
359*22dc650dSSadaf Ebrahimi The substrings are zero-terminated, but also, if the final argument is
360*22dc650dSSadaf Ebrahimi non-NULL, a list of lengths is also returned. This allows binary data to be
361*22dc650dSSadaf Ebrahimi handled.
362*22dc650dSSadaf Ebrahimi 
363*22dc650dSSadaf Ebrahimi Arguments:
364*22dc650dSSadaf Ebrahimi   match_data     points to the match data
365*22dc650dSSadaf Ebrahimi   listptr        set to point to the list of pointers
366*22dc650dSSadaf Ebrahimi   lengthsptr     set to point to the list of lengths (may be NULL)
367*22dc650dSSadaf Ebrahimi 
368*22dc650dSSadaf Ebrahimi Returns:         if successful: 0
369*22dc650dSSadaf Ebrahimi                  if not successful, a negative error code:
370*22dc650dSSadaf Ebrahimi                    PCRE2_ERROR_NOMEMORY: failed to get memory,
371*22dc650dSSadaf Ebrahimi                    or a match failure code
372*22dc650dSSadaf Ebrahimi */
373*22dc650dSSadaf Ebrahimi 
374*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_list_get(pcre2_match_data * match_data,PCRE2_UCHAR *** listptr,PCRE2_SIZE ** lengthsptr)375*22dc650dSSadaf Ebrahimi pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
376*22dc650dSSadaf Ebrahimi   PCRE2_SIZE **lengthsptr)
377*22dc650dSSadaf Ebrahimi {
378*22dc650dSSadaf Ebrahimi int i, count, count2;
379*22dc650dSSadaf Ebrahimi PCRE2_SIZE size;
380*22dc650dSSadaf Ebrahimi PCRE2_SIZE *lensp;
381*22dc650dSSadaf Ebrahimi pcre2_memctl *memp;
382*22dc650dSSadaf Ebrahimi PCRE2_UCHAR **listp;
383*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *sp;
384*22dc650dSSadaf Ebrahimi PCRE2_SIZE *ovector;
385*22dc650dSSadaf Ebrahimi 
386*22dc650dSSadaf Ebrahimi if ((count = match_data->rc) < 0) return count;   /* Match failed */
387*22dc650dSSadaf Ebrahimi if (count == 0) count = match_data->oveccount;    /* Ovector too small */
388*22dc650dSSadaf Ebrahimi 
389*22dc650dSSadaf Ebrahimi count2 = 2*count;
390*22dc650dSSadaf Ebrahimi ovector = match_data->ovector;
391*22dc650dSSadaf Ebrahimi size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *);      /* For final NULL */
392*22dc650dSSadaf Ebrahimi if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count;  /* For lengths */
393*22dc650dSSadaf Ebrahimi 
394*22dc650dSSadaf Ebrahimi for (i = 0; i < count2; i += 2)
395*22dc650dSSadaf Ebrahimi   {
396*22dc650dSSadaf Ebrahimi   size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);
397*22dc650dSSadaf Ebrahimi   if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);
398*22dc650dSSadaf Ebrahimi   }
399*22dc650dSSadaf Ebrahimi 
400*22dc650dSSadaf Ebrahimi memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
401*22dc650dSSadaf Ebrahimi if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
402*22dc650dSSadaf Ebrahimi 
403*22dc650dSSadaf Ebrahimi *listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
404*22dc650dSSadaf Ebrahimi lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
405*22dc650dSSadaf Ebrahimi 
406*22dc650dSSadaf Ebrahimi if (lengthsptr == NULL)
407*22dc650dSSadaf Ebrahimi   {
408*22dc650dSSadaf Ebrahimi   sp = (PCRE2_UCHAR *)lensp;
409*22dc650dSSadaf Ebrahimi   lensp = NULL;
410*22dc650dSSadaf Ebrahimi   }
411*22dc650dSSadaf Ebrahimi else
412*22dc650dSSadaf Ebrahimi   {
413*22dc650dSSadaf Ebrahimi   *lengthsptr = lensp;
414*22dc650dSSadaf Ebrahimi   sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
415*22dc650dSSadaf Ebrahimi   }
416*22dc650dSSadaf Ebrahimi 
417*22dc650dSSadaf Ebrahimi for (i = 0; i < count2; i += 2)
418*22dc650dSSadaf Ebrahimi   {
419*22dc650dSSadaf Ebrahimi   size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
420*22dc650dSSadaf Ebrahimi 
421*22dc650dSSadaf Ebrahimi   /* Size == 0 includes the case when the capture is unset. Avoid adding
422*22dc650dSSadaf Ebrahimi   PCRE2_UNSET to match_data->subject because it overflows, even though with
423*22dc650dSSadaf Ebrahimi   zero size calling memcpy() is harmless. */
424*22dc650dSSadaf Ebrahimi 
425*22dc650dSSadaf Ebrahimi   if (size != 0) memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size));
426*22dc650dSSadaf Ebrahimi   *listp++ = sp;
427*22dc650dSSadaf Ebrahimi   if (lensp != NULL) *lensp++ = size;
428*22dc650dSSadaf Ebrahimi   sp += size;
429*22dc650dSSadaf Ebrahimi   *sp++ = 0;
430*22dc650dSSadaf Ebrahimi   }
431*22dc650dSSadaf Ebrahimi 
432*22dc650dSSadaf Ebrahimi *listp = NULL;
433*22dc650dSSadaf Ebrahimi return 0;
434*22dc650dSSadaf Ebrahimi }
435*22dc650dSSadaf Ebrahimi 
436*22dc650dSSadaf Ebrahimi 
437*22dc650dSSadaf Ebrahimi 
438*22dc650dSSadaf Ebrahimi /*************************************************
439*22dc650dSSadaf Ebrahimi *   Free memory obtained by substring_list_get   *
440*22dc650dSSadaf Ebrahimi *************************************************/
441*22dc650dSSadaf Ebrahimi 
442*22dc650dSSadaf Ebrahimi /*
443*22dc650dSSadaf Ebrahimi Argument:     the result of a previous pcre2_substring_list_get()
444*22dc650dSSadaf Ebrahimi Returns:      nothing
445*22dc650dSSadaf Ebrahimi */
446*22dc650dSSadaf Ebrahimi 
447*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_substring_list_free(PCRE2_UCHAR ** list)448*22dc650dSSadaf Ebrahimi pcre2_substring_list_free(PCRE2_UCHAR **list)
449*22dc650dSSadaf Ebrahimi {
450*22dc650dSSadaf Ebrahimi if (list != NULL)
451*22dc650dSSadaf Ebrahimi   {
452*22dc650dSSadaf Ebrahimi   pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl));
453*22dc650dSSadaf Ebrahimi   memctl->free(memctl, memctl->memory_data);
454*22dc650dSSadaf Ebrahimi   }
455*22dc650dSSadaf Ebrahimi }
456*22dc650dSSadaf Ebrahimi 
457*22dc650dSSadaf Ebrahimi 
458*22dc650dSSadaf Ebrahimi 
459*22dc650dSSadaf Ebrahimi /*************************************************
460*22dc650dSSadaf Ebrahimi *     Find (multiple) entries for named string   *
461*22dc650dSSadaf Ebrahimi *************************************************/
462*22dc650dSSadaf Ebrahimi 
463*22dc650dSSadaf Ebrahimi /* This function scans the nametable for a given name, using binary chop. It
464*22dc650dSSadaf Ebrahimi returns either two pointers to the entries in the table, or, if no pointers are
465*22dc650dSSadaf Ebrahimi given, the number of a unique group with the given name. If duplicate names are
466*22dc650dSSadaf Ebrahimi permitted, and the name is not unique, an error is generated.
467*22dc650dSSadaf Ebrahimi 
468*22dc650dSSadaf Ebrahimi Arguments:
469*22dc650dSSadaf Ebrahimi   code        the compiled regex
470*22dc650dSSadaf Ebrahimi   stringname  the name whose entries required
471*22dc650dSSadaf Ebrahimi   firstptr    where to put the pointer to the first entry
472*22dc650dSSadaf Ebrahimi   lastptr     where to put the pointer to the last entry
473*22dc650dSSadaf Ebrahimi 
474*22dc650dSSadaf Ebrahimi Returns:      PCRE2_ERROR_NOSUBSTRING if the name is not found
475*22dc650dSSadaf Ebrahimi               otherwise, if firstptr and lastptr are NULL:
476*22dc650dSSadaf Ebrahimi                 a group number for a unique substring
477*22dc650dSSadaf Ebrahimi                 else PCRE2_ERROR_NOUNIQUESUBSTRING
478*22dc650dSSadaf Ebrahimi               otherwise:
479*22dc650dSSadaf Ebrahimi                 the length of each entry, having set firstptr and lastptr
480*22dc650dSSadaf Ebrahimi */
481*22dc650dSSadaf Ebrahimi 
482*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_nametable_scan(const pcre2_code * code,PCRE2_SPTR stringname,PCRE2_SPTR * firstptr,PCRE2_SPTR * lastptr)483*22dc650dSSadaf Ebrahimi pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname,
484*22dc650dSSadaf Ebrahimi   PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr)
485*22dc650dSSadaf Ebrahimi {
486*22dc650dSSadaf Ebrahimi uint16_t bot = 0;
487*22dc650dSSadaf Ebrahimi uint16_t top = code->name_count;
488*22dc650dSSadaf Ebrahimi uint16_t entrysize = code->name_entry_size;
489*22dc650dSSadaf Ebrahimi PCRE2_SPTR nametable = (PCRE2_SPTR)((char *)code + sizeof(pcre2_real_code));
490*22dc650dSSadaf Ebrahimi 
491*22dc650dSSadaf Ebrahimi while (top > bot)
492*22dc650dSSadaf Ebrahimi   {
493*22dc650dSSadaf Ebrahimi   uint16_t mid = (top + bot) / 2;
494*22dc650dSSadaf Ebrahimi   PCRE2_SPTR entry = nametable + entrysize*mid;
495*22dc650dSSadaf Ebrahimi   int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
496*22dc650dSSadaf Ebrahimi   if (c == 0)
497*22dc650dSSadaf Ebrahimi     {
498*22dc650dSSadaf Ebrahimi     PCRE2_SPTR first;
499*22dc650dSSadaf Ebrahimi     PCRE2_SPTR last;
500*22dc650dSSadaf Ebrahimi     PCRE2_SPTR lastentry;
501*22dc650dSSadaf Ebrahimi     lastentry = nametable + entrysize * (code->name_count - 1);
502*22dc650dSSadaf Ebrahimi     first = last = entry;
503*22dc650dSSadaf Ebrahimi     while (first > nametable)
504*22dc650dSSadaf Ebrahimi       {
505*22dc650dSSadaf Ebrahimi       if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break;
506*22dc650dSSadaf Ebrahimi       first -= entrysize;
507*22dc650dSSadaf Ebrahimi       }
508*22dc650dSSadaf Ebrahimi     while (last < lastentry)
509*22dc650dSSadaf Ebrahimi       {
510*22dc650dSSadaf Ebrahimi       if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
511*22dc650dSSadaf Ebrahimi       last += entrysize;
512*22dc650dSSadaf Ebrahimi       }
513*22dc650dSSadaf Ebrahimi     if (firstptr == NULL) return (first == last)?
514*22dc650dSSadaf Ebrahimi       (int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
515*22dc650dSSadaf Ebrahimi     *firstptr = first;
516*22dc650dSSadaf Ebrahimi     *lastptr = last;
517*22dc650dSSadaf Ebrahimi     return entrysize;
518*22dc650dSSadaf Ebrahimi     }
519*22dc650dSSadaf Ebrahimi   if (c > 0) bot = mid + 1; else top = mid;
520*22dc650dSSadaf Ebrahimi   }
521*22dc650dSSadaf Ebrahimi 
522*22dc650dSSadaf Ebrahimi return PCRE2_ERROR_NOSUBSTRING;
523*22dc650dSSadaf Ebrahimi }
524*22dc650dSSadaf Ebrahimi 
525*22dc650dSSadaf Ebrahimi 
526*22dc650dSSadaf Ebrahimi /*************************************************
527*22dc650dSSadaf Ebrahimi *           Find number for named string         *
528*22dc650dSSadaf Ebrahimi *************************************************/
529*22dc650dSSadaf Ebrahimi 
530*22dc650dSSadaf Ebrahimi /* This function is a convenience wrapper for pcre2_substring_nametable_scan()
531*22dc650dSSadaf Ebrahimi when it is known that names are unique. If there are duplicate names, it is not
532*22dc650dSSadaf Ebrahimi defined which number is returned.
533*22dc650dSSadaf Ebrahimi 
534*22dc650dSSadaf Ebrahimi Arguments:
535*22dc650dSSadaf Ebrahimi   code        the compiled regex
536*22dc650dSSadaf Ebrahimi   stringname  the name whose number is required
537*22dc650dSSadaf Ebrahimi 
538*22dc650dSSadaf Ebrahimi Returns:      the number of the named parenthesis, or a negative number
539*22dc650dSSadaf Ebrahimi                 PCRE2_ERROR_NOSUBSTRING if not found
540*22dc650dSSadaf Ebrahimi                 PCRE2_ERROR_NOUNIQUESUBSTRING if not unique
541*22dc650dSSadaf Ebrahimi */
542*22dc650dSSadaf Ebrahimi 
543*22dc650dSSadaf Ebrahimi PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_number_from_name(const pcre2_code * code,PCRE2_SPTR stringname)544*22dc650dSSadaf Ebrahimi pcre2_substring_number_from_name(const pcre2_code *code,
545*22dc650dSSadaf Ebrahimi   PCRE2_SPTR stringname)
546*22dc650dSSadaf Ebrahimi {
547*22dc650dSSadaf Ebrahimi return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);
548*22dc650dSSadaf Ebrahimi }
549*22dc650dSSadaf Ebrahimi 
550*22dc650dSSadaf Ebrahimi /* End of pcre2_substring.c */
551