1*c9945492SAndroid Build Coastguard Worker /*
2*c9945492SAndroid Build Coastguard Worker regexec.c - TRE POSIX compatible matching functions (and more).
3*c9945492SAndroid Build Coastguard Worker
4*c9945492SAndroid Build Coastguard Worker Copyright (c) 2001-2009 Ville Laurikari <[email protected]>
5*c9945492SAndroid Build Coastguard Worker All rights reserved.
6*c9945492SAndroid Build Coastguard Worker
7*c9945492SAndroid Build Coastguard Worker Redistribution and use in source and binary forms, with or without
8*c9945492SAndroid Build Coastguard Worker modification, are permitted provided that the following conditions
9*c9945492SAndroid Build Coastguard Worker are met:
10*c9945492SAndroid Build Coastguard Worker
11*c9945492SAndroid Build Coastguard Worker 1. Redistributions of source code must retain the above copyright
12*c9945492SAndroid Build Coastguard Worker notice, this list of conditions and the following disclaimer.
13*c9945492SAndroid Build Coastguard Worker
14*c9945492SAndroid Build Coastguard Worker 2. Redistributions in binary form must reproduce the above copyright
15*c9945492SAndroid Build Coastguard Worker notice, this list of conditions and the following disclaimer in the
16*c9945492SAndroid Build Coastguard Worker documentation and/or other materials provided with the distribution.
17*c9945492SAndroid Build Coastguard Worker
18*c9945492SAndroid Build Coastguard Worker THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19*c9945492SAndroid Build Coastguard Worker ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20*c9945492SAndroid Build Coastguard Worker LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21*c9945492SAndroid Build Coastguard Worker A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22*c9945492SAndroid Build Coastguard Worker HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23*c9945492SAndroid Build Coastguard Worker SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24*c9945492SAndroid Build Coastguard Worker LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25*c9945492SAndroid Build Coastguard Worker DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26*c9945492SAndroid Build Coastguard Worker THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27*c9945492SAndroid Build Coastguard Worker (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28*c9945492SAndroid Build Coastguard Worker OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*c9945492SAndroid Build Coastguard Worker
30*c9945492SAndroid Build Coastguard Worker */
31*c9945492SAndroid Build Coastguard Worker
32*c9945492SAndroid Build Coastguard Worker #include <stdlib.h>
33*c9945492SAndroid Build Coastguard Worker #include <string.h>
34*c9945492SAndroid Build Coastguard Worker #include <wchar.h>
35*c9945492SAndroid Build Coastguard Worker #include <wctype.h>
36*c9945492SAndroid Build Coastguard Worker #include <limits.h>
37*c9945492SAndroid Build Coastguard Worker #include <stdint.h>
38*c9945492SAndroid Build Coastguard Worker
39*c9945492SAndroid Build Coastguard Worker #include <regex.h>
40*c9945492SAndroid Build Coastguard Worker
41*c9945492SAndroid Build Coastguard Worker #include "tre.h"
42*c9945492SAndroid Build Coastguard Worker
43*c9945492SAndroid Build Coastguard Worker #include <assert.h>
44*c9945492SAndroid Build Coastguard Worker
45*c9945492SAndroid Build Coastguard Worker static void
46*c9945492SAndroid Build Coastguard Worker tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
47*c9945492SAndroid Build Coastguard Worker const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo);
48*c9945492SAndroid Build Coastguard Worker
49*c9945492SAndroid Build Coastguard Worker /***********************************************************************
50*c9945492SAndroid Build Coastguard Worker from tre-match-utils.h
51*c9945492SAndroid Build Coastguard Worker ***********************************************************************/
52*c9945492SAndroid Build Coastguard Worker
53*c9945492SAndroid Build Coastguard Worker #define GET_NEXT_WCHAR() do { \
54*c9945492SAndroid Build Coastguard Worker prev_c = next_c; pos += pos_add_next; \
55*c9945492SAndroid Build Coastguard Worker if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX)) <= 0) { \
56*c9945492SAndroid Build Coastguard Worker if (pos_add_next < 0) { ret = REG_NOMATCH; goto error_exit; } \
57*c9945492SAndroid Build Coastguard Worker else pos_add_next++; \
58*c9945492SAndroid Build Coastguard Worker } \
59*c9945492SAndroid Build Coastguard Worker str_byte += pos_add_next; \
60*c9945492SAndroid Build Coastguard Worker } while (0)
61*c9945492SAndroid Build Coastguard Worker
62*c9945492SAndroid Build Coastguard Worker #define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c))
63*c9945492SAndroid Build Coastguard Worker
64*c9945492SAndroid Build Coastguard Worker #define CHECK_ASSERTIONS(assertions) \
65*c9945492SAndroid Build Coastguard Worker (((assertions & ASSERT_AT_BOL) \
66*c9945492SAndroid Build Coastguard Worker && (pos > 0 || reg_notbol) \
67*c9945492SAndroid Build Coastguard Worker && (prev_c != L'\n' || !reg_newline)) \
68*c9945492SAndroid Build Coastguard Worker || ((assertions & ASSERT_AT_EOL) \
69*c9945492SAndroid Build Coastguard Worker && (next_c != L'\0' || reg_noteol) \
70*c9945492SAndroid Build Coastguard Worker && (next_c != L'\n' || !reg_newline)) \
71*c9945492SAndroid Build Coastguard Worker || ((assertions & ASSERT_AT_BOW) \
72*c9945492SAndroid Build Coastguard Worker && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \
73*c9945492SAndroid Build Coastguard Worker || ((assertions & ASSERT_AT_EOW) \
74*c9945492SAndroid Build Coastguard Worker && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \
75*c9945492SAndroid Build Coastguard Worker || ((assertions & ASSERT_AT_WB) \
76*c9945492SAndroid Build Coastguard Worker && (pos != 0 && next_c != L'\0' \
77*c9945492SAndroid Build Coastguard Worker && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \
78*c9945492SAndroid Build Coastguard Worker || ((assertions & ASSERT_AT_WB_NEG) \
79*c9945492SAndroid Build Coastguard Worker && (pos == 0 || next_c == L'\0' \
80*c9945492SAndroid Build Coastguard Worker || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
81*c9945492SAndroid Build Coastguard Worker
82*c9945492SAndroid Build Coastguard Worker #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \
83*c9945492SAndroid Build Coastguard Worker (((trans_i->assertions & ASSERT_CHAR_CLASS) \
84*c9945492SAndroid Build Coastguard Worker && !(tnfa->cflags & REG_ICASE) \
85*c9945492SAndroid Build Coastguard Worker && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) \
86*c9945492SAndroid Build Coastguard Worker || ((trans_i->assertions & ASSERT_CHAR_CLASS) \
87*c9945492SAndroid Build Coastguard Worker && (tnfa->cflags & REG_ICASE) \
88*c9945492SAndroid Build Coastguard Worker && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class) \
89*c9945492SAndroid Build Coastguard Worker && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class)) \
90*c9945492SAndroid Build Coastguard Worker || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) \
91*c9945492SAndroid Build Coastguard Worker && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
92*c9945492SAndroid Build Coastguard Worker tnfa->cflags & REG_ICASE)))
93*c9945492SAndroid Build Coastguard Worker
94*c9945492SAndroid Build Coastguard Worker
95*c9945492SAndroid Build Coastguard Worker
96*c9945492SAndroid Build Coastguard Worker
97*c9945492SAndroid Build Coastguard Worker /* Returns 1 if `t1' wins `t2', 0 otherwise. */
98*c9945492SAndroid Build Coastguard Worker static int
tre_tag_order(int num_tags,tre_tag_direction_t * tag_directions,regoff_t * t1,regoff_t * t2)99*c9945492SAndroid Build Coastguard Worker tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
100*c9945492SAndroid Build Coastguard Worker regoff_t *t1, regoff_t *t2)
101*c9945492SAndroid Build Coastguard Worker {
102*c9945492SAndroid Build Coastguard Worker int i;
103*c9945492SAndroid Build Coastguard Worker for (i = 0; i < num_tags; i++)
104*c9945492SAndroid Build Coastguard Worker {
105*c9945492SAndroid Build Coastguard Worker if (tag_directions[i] == TRE_TAG_MINIMIZE)
106*c9945492SAndroid Build Coastguard Worker {
107*c9945492SAndroid Build Coastguard Worker if (t1[i] < t2[i])
108*c9945492SAndroid Build Coastguard Worker return 1;
109*c9945492SAndroid Build Coastguard Worker if (t1[i] > t2[i])
110*c9945492SAndroid Build Coastguard Worker return 0;
111*c9945492SAndroid Build Coastguard Worker }
112*c9945492SAndroid Build Coastguard Worker else
113*c9945492SAndroid Build Coastguard Worker {
114*c9945492SAndroid Build Coastguard Worker if (t1[i] > t2[i])
115*c9945492SAndroid Build Coastguard Worker return 1;
116*c9945492SAndroid Build Coastguard Worker if (t1[i] < t2[i])
117*c9945492SAndroid Build Coastguard Worker return 0;
118*c9945492SAndroid Build Coastguard Worker }
119*c9945492SAndroid Build Coastguard Worker }
120*c9945492SAndroid Build Coastguard Worker /* assert(0);*/
121*c9945492SAndroid Build Coastguard Worker return 0;
122*c9945492SAndroid Build Coastguard Worker }
123*c9945492SAndroid Build Coastguard Worker
124*c9945492SAndroid Build Coastguard Worker static int
tre_neg_char_classes_match(tre_ctype_t * classes,tre_cint_t wc,int icase)125*c9945492SAndroid Build Coastguard Worker tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
126*c9945492SAndroid Build Coastguard Worker {
127*c9945492SAndroid Build Coastguard Worker while (*classes != (tre_ctype_t)0)
128*c9945492SAndroid Build Coastguard Worker if ((!icase && tre_isctype(wc, *classes))
129*c9945492SAndroid Build Coastguard Worker || (icase && (tre_isctype(tre_toupper(wc), *classes)
130*c9945492SAndroid Build Coastguard Worker || tre_isctype(tre_tolower(wc), *classes))))
131*c9945492SAndroid Build Coastguard Worker return 1; /* Match. */
132*c9945492SAndroid Build Coastguard Worker else
133*c9945492SAndroid Build Coastguard Worker classes++;
134*c9945492SAndroid Build Coastguard Worker return 0; /* No match. */
135*c9945492SAndroid Build Coastguard Worker }
136*c9945492SAndroid Build Coastguard Worker
137*c9945492SAndroid Build Coastguard Worker
138*c9945492SAndroid Build Coastguard Worker /***********************************************************************
139*c9945492SAndroid Build Coastguard Worker from tre-match-parallel.c
140*c9945492SAndroid Build Coastguard Worker ***********************************************************************/
141*c9945492SAndroid Build Coastguard Worker
142*c9945492SAndroid Build Coastguard Worker /*
143*c9945492SAndroid Build Coastguard Worker This algorithm searches for matches basically by reading characters
144*c9945492SAndroid Build Coastguard Worker in the searched string one by one, starting at the beginning. All
145*c9945492SAndroid Build Coastguard Worker matching paths in the TNFA are traversed in parallel. When two or
146*c9945492SAndroid Build Coastguard Worker more paths reach the same state, exactly one is chosen according to
147*c9945492SAndroid Build Coastguard Worker tag ordering rules; if returning submatches is not required it does
148*c9945492SAndroid Build Coastguard Worker not matter which path is chosen.
149*c9945492SAndroid Build Coastguard Worker
150*c9945492SAndroid Build Coastguard Worker The worst case time required for finding the leftmost and longest
151*c9945492SAndroid Build Coastguard Worker match, or determining that there is no match, is always linearly
152*c9945492SAndroid Build Coastguard Worker dependent on the length of the text being searched.
153*c9945492SAndroid Build Coastguard Worker
154*c9945492SAndroid Build Coastguard Worker This algorithm cannot handle TNFAs with back referencing nodes.
155*c9945492SAndroid Build Coastguard Worker See `tre-match-backtrack.c'.
156*c9945492SAndroid Build Coastguard Worker */
157*c9945492SAndroid Build Coastguard Worker
158*c9945492SAndroid Build Coastguard Worker typedef struct {
159*c9945492SAndroid Build Coastguard Worker tre_tnfa_transition_t *state;
160*c9945492SAndroid Build Coastguard Worker regoff_t *tags;
161*c9945492SAndroid Build Coastguard Worker } tre_tnfa_reach_t;
162*c9945492SAndroid Build Coastguard Worker
163*c9945492SAndroid Build Coastguard Worker typedef struct {
164*c9945492SAndroid Build Coastguard Worker regoff_t pos;
165*c9945492SAndroid Build Coastguard Worker regoff_t **tags;
166*c9945492SAndroid Build Coastguard Worker } tre_reach_pos_t;
167*c9945492SAndroid Build Coastguard Worker
168*c9945492SAndroid Build Coastguard Worker
169*c9945492SAndroid Build Coastguard Worker static reg_errcode_t
tre_tnfa_run_parallel(const tre_tnfa_t * tnfa,const void * string,regoff_t * match_tags,int eflags,regoff_t * match_end_ofs)170*c9945492SAndroid Build Coastguard Worker tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string,
171*c9945492SAndroid Build Coastguard Worker regoff_t *match_tags, int eflags,
172*c9945492SAndroid Build Coastguard Worker regoff_t *match_end_ofs)
173*c9945492SAndroid Build Coastguard Worker {
174*c9945492SAndroid Build Coastguard Worker /* State variables required by GET_NEXT_WCHAR. */
175*c9945492SAndroid Build Coastguard Worker tre_char_t prev_c = 0, next_c = 0;
176*c9945492SAndroid Build Coastguard Worker const char *str_byte = string;
177*c9945492SAndroid Build Coastguard Worker regoff_t pos = -1;
178*c9945492SAndroid Build Coastguard Worker regoff_t pos_add_next = 1;
179*c9945492SAndroid Build Coastguard Worker #ifdef TRE_MBSTATE
180*c9945492SAndroid Build Coastguard Worker mbstate_t mbstate;
181*c9945492SAndroid Build Coastguard Worker #endif /* TRE_MBSTATE */
182*c9945492SAndroid Build Coastguard Worker int reg_notbol = eflags & REG_NOTBOL;
183*c9945492SAndroid Build Coastguard Worker int reg_noteol = eflags & REG_NOTEOL;
184*c9945492SAndroid Build Coastguard Worker int reg_newline = tnfa->cflags & REG_NEWLINE;
185*c9945492SAndroid Build Coastguard Worker reg_errcode_t ret;
186*c9945492SAndroid Build Coastguard Worker
187*c9945492SAndroid Build Coastguard Worker char *buf;
188*c9945492SAndroid Build Coastguard Worker tre_tnfa_transition_t *trans_i;
189*c9945492SAndroid Build Coastguard Worker tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i;
190*c9945492SAndroid Build Coastguard Worker tre_reach_pos_t *reach_pos;
191*c9945492SAndroid Build Coastguard Worker int *tag_i;
192*c9945492SAndroid Build Coastguard Worker int num_tags, i;
193*c9945492SAndroid Build Coastguard Worker
194*c9945492SAndroid Build Coastguard Worker regoff_t match_eo = -1; /* end offset of match (-1 if no match found yet) */
195*c9945492SAndroid Build Coastguard Worker int new_match = 0;
196*c9945492SAndroid Build Coastguard Worker regoff_t *tmp_tags = NULL;
197*c9945492SAndroid Build Coastguard Worker regoff_t *tmp_iptr;
198*c9945492SAndroid Build Coastguard Worker
199*c9945492SAndroid Build Coastguard Worker #ifdef TRE_MBSTATE
200*c9945492SAndroid Build Coastguard Worker memset(&mbstate, '\0', sizeof(mbstate));
201*c9945492SAndroid Build Coastguard Worker #endif /* TRE_MBSTATE */
202*c9945492SAndroid Build Coastguard Worker
203*c9945492SAndroid Build Coastguard Worker if (!match_tags)
204*c9945492SAndroid Build Coastguard Worker num_tags = 0;
205*c9945492SAndroid Build Coastguard Worker else
206*c9945492SAndroid Build Coastguard Worker num_tags = tnfa->num_tags;
207*c9945492SAndroid Build Coastguard Worker
208*c9945492SAndroid Build Coastguard Worker /* Allocate memory for temporary data required for matching. This needs to
209*c9945492SAndroid Build Coastguard Worker be done for every matching operation to be thread safe. This allocates
210*c9945492SAndroid Build Coastguard Worker everything in a single large block with calloc(). */
211*c9945492SAndroid Build Coastguard Worker {
212*c9945492SAndroid Build Coastguard Worker size_t tbytes, rbytes, pbytes, xbytes, total_bytes;
213*c9945492SAndroid Build Coastguard Worker char *tmp_buf;
214*c9945492SAndroid Build Coastguard Worker
215*c9945492SAndroid Build Coastguard Worker /* Ensure that tbytes and xbytes*num_states cannot overflow, and that
216*c9945492SAndroid Build Coastguard Worker * they don't contribute more than 1/8 of SIZE_MAX to total_bytes. */
217*c9945492SAndroid Build Coastguard Worker if (num_tags > SIZE_MAX/(8 * sizeof(regoff_t) * tnfa->num_states))
218*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
219*c9945492SAndroid Build Coastguard Worker
220*c9945492SAndroid Build Coastguard Worker /* Likewise check rbytes. */
221*c9945492SAndroid Build Coastguard Worker if (tnfa->num_states+1 > SIZE_MAX/(8 * sizeof(*reach_next)))
222*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
223*c9945492SAndroid Build Coastguard Worker
224*c9945492SAndroid Build Coastguard Worker /* Likewise check pbytes. */
225*c9945492SAndroid Build Coastguard Worker if (tnfa->num_states > SIZE_MAX/(8 * sizeof(*reach_pos)))
226*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
227*c9945492SAndroid Build Coastguard Worker
228*c9945492SAndroid Build Coastguard Worker /* Compute the length of the block we need. */
229*c9945492SAndroid Build Coastguard Worker tbytes = sizeof(*tmp_tags) * num_tags;
230*c9945492SAndroid Build Coastguard Worker rbytes = sizeof(*reach_next) * (tnfa->num_states + 1);
231*c9945492SAndroid Build Coastguard Worker pbytes = sizeof(*reach_pos) * tnfa->num_states;
232*c9945492SAndroid Build Coastguard Worker xbytes = sizeof(regoff_t) * num_tags;
233*c9945492SAndroid Build Coastguard Worker total_bytes =
234*c9945492SAndroid Build Coastguard Worker (sizeof(long) - 1) * 4 /* for alignment paddings */
235*c9945492SAndroid Build Coastguard Worker + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes;
236*c9945492SAndroid Build Coastguard Worker
237*c9945492SAndroid Build Coastguard Worker /* Allocate the memory. */
238*c9945492SAndroid Build Coastguard Worker buf = calloc(total_bytes, 1);
239*c9945492SAndroid Build Coastguard Worker if (buf == NULL)
240*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
241*c9945492SAndroid Build Coastguard Worker
242*c9945492SAndroid Build Coastguard Worker /* Get the various pointers within tmp_buf (properly aligned). */
243*c9945492SAndroid Build Coastguard Worker tmp_tags = (void *)buf;
244*c9945492SAndroid Build Coastguard Worker tmp_buf = buf + tbytes;
245*c9945492SAndroid Build Coastguard Worker tmp_buf += ALIGN(tmp_buf, long);
246*c9945492SAndroid Build Coastguard Worker reach_next = (void *)tmp_buf;
247*c9945492SAndroid Build Coastguard Worker tmp_buf += rbytes;
248*c9945492SAndroid Build Coastguard Worker tmp_buf += ALIGN(tmp_buf, long);
249*c9945492SAndroid Build Coastguard Worker reach = (void *)tmp_buf;
250*c9945492SAndroid Build Coastguard Worker tmp_buf += rbytes;
251*c9945492SAndroid Build Coastguard Worker tmp_buf += ALIGN(tmp_buf, long);
252*c9945492SAndroid Build Coastguard Worker reach_pos = (void *)tmp_buf;
253*c9945492SAndroid Build Coastguard Worker tmp_buf += pbytes;
254*c9945492SAndroid Build Coastguard Worker tmp_buf += ALIGN(tmp_buf, long);
255*c9945492SAndroid Build Coastguard Worker for (i = 0; i < tnfa->num_states; i++)
256*c9945492SAndroid Build Coastguard Worker {
257*c9945492SAndroid Build Coastguard Worker reach[i].tags = (void *)tmp_buf;
258*c9945492SAndroid Build Coastguard Worker tmp_buf += xbytes;
259*c9945492SAndroid Build Coastguard Worker reach_next[i].tags = (void *)tmp_buf;
260*c9945492SAndroid Build Coastguard Worker tmp_buf += xbytes;
261*c9945492SAndroid Build Coastguard Worker }
262*c9945492SAndroid Build Coastguard Worker }
263*c9945492SAndroid Build Coastguard Worker
264*c9945492SAndroid Build Coastguard Worker for (i = 0; i < tnfa->num_states; i++)
265*c9945492SAndroid Build Coastguard Worker reach_pos[i].pos = -1;
266*c9945492SAndroid Build Coastguard Worker
267*c9945492SAndroid Build Coastguard Worker GET_NEXT_WCHAR();
268*c9945492SAndroid Build Coastguard Worker pos = 0;
269*c9945492SAndroid Build Coastguard Worker
270*c9945492SAndroid Build Coastguard Worker reach_next_i = reach_next;
271*c9945492SAndroid Build Coastguard Worker while (1)
272*c9945492SAndroid Build Coastguard Worker {
273*c9945492SAndroid Build Coastguard Worker /* If no match found yet, add the initial states to `reach_next'. */
274*c9945492SAndroid Build Coastguard Worker if (match_eo < 0)
275*c9945492SAndroid Build Coastguard Worker {
276*c9945492SAndroid Build Coastguard Worker trans_i = tnfa->initial;
277*c9945492SAndroid Build Coastguard Worker while (trans_i->state != NULL)
278*c9945492SAndroid Build Coastguard Worker {
279*c9945492SAndroid Build Coastguard Worker if (reach_pos[trans_i->state_id].pos < pos)
280*c9945492SAndroid Build Coastguard Worker {
281*c9945492SAndroid Build Coastguard Worker if (trans_i->assertions
282*c9945492SAndroid Build Coastguard Worker && CHECK_ASSERTIONS(trans_i->assertions))
283*c9945492SAndroid Build Coastguard Worker {
284*c9945492SAndroid Build Coastguard Worker trans_i++;
285*c9945492SAndroid Build Coastguard Worker continue;
286*c9945492SAndroid Build Coastguard Worker }
287*c9945492SAndroid Build Coastguard Worker
288*c9945492SAndroid Build Coastguard Worker reach_next_i->state = trans_i->state;
289*c9945492SAndroid Build Coastguard Worker for (i = 0; i < num_tags; i++)
290*c9945492SAndroid Build Coastguard Worker reach_next_i->tags[i] = -1;
291*c9945492SAndroid Build Coastguard Worker tag_i = trans_i->tags;
292*c9945492SAndroid Build Coastguard Worker if (tag_i)
293*c9945492SAndroid Build Coastguard Worker while (*tag_i >= 0)
294*c9945492SAndroid Build Coastguard Worker {
295*c9945492SAndroid Build Coastguard Worker if (*tag_i < num_tags)
296*c9945492SAndroid Build Coastguard Worker reach_next_i->tags[*tag_i] = pos;
297*c9945492SAndroid Build Coastguard Worker tag_i++;
298*c9945492SAndroid Build Coastguard Worker }
299*c9945492SAndroid Build Coastguard Worker if (reach_next_i->state == tnfa->final)
300*c9945492SAndroid Build Coastguard Worker {
301*c9945492SAndroid Build Coastguard Worker match_eo = pos;
302*c9945492SAndroid Build Coastguard Worker new_match = 1;
303*c9945492SAndroid Build Coastguard Worker for (i = 0; i < num_tags; i++)
304*c9945492SAndroid Build Coastguard Worker match_tags[i] = reach_next_i->tags[i];
305*c9945492SAndroid Build Coastguard Worker }
306*c9945492SAndroid Build Coastguard Worker reach_pos[trans_i->state_id].pos = pos;
307*c9945492SAndroid Build Coastguard Worker reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
308*c9945492SAndroid Build Coastguard Worker reach_next_i++;
309*c9945492SAndroid Build Coastguard Worker }
310*c9945492SAndroid Build Coastguard Worker trans_i++;
311*c9945492SAndroid Build Coastguard Worker }
312*c9945492SAndroid Build Coastguard Worker reach_next_i->state = NULL;
313*c9945492SAndroid Build Coastguard Worker }
314*c9945492SAndroid Build Coastguard Worker else
315*c9945492SAndroid Build Coastguard Worker {
316*c9945492SAndroid Build Coastguard Worker if (num_tags == 0 || reach_next_i == reach_next)
317*c9945492SAndroid Build Coastguard Worker /* We have found a match. */
318*c9945492SAndroid Build Coastguard Worker break;
319*c9945492SAndroid Build Coastguard Worker }
320*c9945492SAndroid Build Coastguard Worker
321*c9945492SAndroid Build Coastguard Worker /* Check for end of string. */
322*c9945492SAndroid Build Coastguard Worker if (!next_c) break;
323*c9945492SAndroid Build Coastguard Worker
324*c9945492SAndroid Build Coastguard Worker GET_NEXT_WCHAR();
325*c9945492SAndroid Build Coastguard Worker
326*c9945492SAndroid Build Coastguard Worker /* Swap `reach' and `reach_next'. */
327*c9945492SAndroid Build Coastguard Worker reach_i = reach;
328*c9945492SAndroid Build Coastguard Worker reach = reach_next;
329*c9945492SAndroid Build Coastguard Worker reach_next = reach_i;
330*c9945492SAndroid Build Coastguard Worker
331*c9945492SAndroid Build Coastguard Worker /* For each state in `reach', weed out states that don't fulfill the
332*c9945492SAndroid Build Coastguard Worker minimal matching conditions. */
333*c9945492SAndroid Build Coastguard Worker if (tnfa->num_minimals && new_match)
334*c9945492SAndroid Build Coastguard Worker {
335*c9945492SAndroid Build Coastguard Worker new_match = 0;
336*c9945492SAndroid Build Coastguard Worker reach_next_i = reach_next;
337*c9945492SAndroid Build Coastguard Worker for (reach_i = reach; reach_i->state; reach_i++)
338*c9945492SAndroid Build Coastguard Worker {
339*c9945492SAndroid Build Coastguard Worker int skip = 0;
340*c9945492SAndroid Build Coastguard Worker for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2)
341*c9945492SAndroid Build Coastguard Worker {
342*c9945492SAndroid Build Coastguard Worker int end = tnfa->minimal_tags[i];
343*c9945492SAndroid Build Coastguard Worker int start = tnfa->minimal_tags[i + 1];
344*c9945492SAndroid Build Coastguard Worker if (end >= num_tags)
345*c9945492SAndroid Build Coastguard Worker {
346*c9945492SAndroid Build Coastguard Worker skip = 1;
347*c9945492SAndroid Build Coastguard Worker break;
348*c9945492SAndroid Build Coastguard Worker }
349*c9945492SAndroid Build Coastguard Worker else if (reach_i->tags[start] == match_tags[start]
350*c9945492SAndroid Build Coastguard Worker && reach_i->tags[end] < match_tags[end])
351*c9945492SAndroid Build Coastguard Worker {
352*c9945492SAndroid Build Coastguard Worker skip = 1;
353*c9945492SAndroid Build Coastguard Worker break;
354*c9945492SAndroid Build Coastguard Worker }
355*c9945492SAndroid Build Coastguard Worker }
356*c9945492SAndroid Build Coastguard Worker if (!skip)
357*c9945492SAndroid Build Coastguard Worker {
358*c9945492SAndroid Build Coastguard Worker reach_next_i->state = reach_i->state;
359*c9945492SAndroid Build Coastguard Worker tmp_iptr = reach_next_i->tags;
360*c9945492SAndroid Build Coastguard Worker reach_next_i->tags = reach_i->tags;
361*c9945492SAndroid Build Coastguard Worker reach_i->tags = tmp_iptr;
362*c9945492SAndroid Build Coastguard Worker reach_next_i++;
363*c9945492SAndroid Build Coastguard Worker }
364*c9945492SAndroid Build Coastguard Worker }
365*c9945492SAndroid Build Coastguard Worker reach_next_i->state = NULL;
366*c9945492SAndroid Build Coastguard Worker
367*c9945492SAndroid Build Coastguard Worker /* Swap `reach' and `reach_next'. */
368*c9945492SAndroid Build Coastguard Worker reach_i = reach;
369*c9945492SAndroid Build Coastguard Worker reach = reach_next;
370*c9945492SAndroid Build Coastguard Worker reach_next = reach_i;
371*c9945492SAndroid Build Coastguard Worker }
372*c9945492SAndroid Build Coastguard Worker
373*c9945492SAndroid Build Coastguard Worker /* For each state in `reach' see if there is a transition leaving with
374*c9945492SAndroid Build Coastguard Worker the current input symbol to a state not yet in `reach_next', and
375*c9945492SAndroid Build Coastguard Worker add the destination states to `reach_next'. */
376*c9945492SAndroid Build Coastguard Worker reach_next_i = reach_next;
377*c9945492SAndroid Build Coastguard Worker for (reach_i = reach; reach_i->state; reach_i++)
378*c9945492SAndroid Build Coastguard Worker {
379*c9945492SAndroid Build Coastguard Worker for (trans_i = reach_i->state; trans_i->state; trans_i++)
380*c9945492SAndroid Build Coastguard Worker {
381*c9945492SAndroid Build Coastguard Worker /* Does this transition match the input symbol? */
382*c9945492SAndroid Build Coastguard Worker if (trans_i->code_min <= (tre_cint_t)prev_c &&
383*c9945492SAndroid Build Coastguard Worker trans_i->code_max >= (tre_cint_t)prev_c)
384*c9945492SAndroid Build Coastguard Worker {
385*c9945492SAndroid Build Coastguard Worker if (trans_i->assertions
386*c9945492SAndroid Build Coastguard Worker && (CHECK_ASSERTIONS(trans_i->assertions)
387*c9945492SAndroid Build Coastguard Worker || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
388*c9945492SAndroid Build Coastguard Worker {
389*c9945492SAndroid Build Coastguard Worker continue;
390*c9945492SAndroid Build Coastguard Worker }
391*c9945492SAndroid Build Coastguard Worker
392*c9945492SAndroid Build Coastguard Worker /* Compute the tags after this transition. */
393*c9945492SAndroid Build Coastguard Worker for (i = 0; i < num_tags; i++)
394*c9945492SAndroid Build Coastguard Worker tmp_tags[i] = reach_i->tags[i];
395*c9945492SAndroid Build Coastguard Worker tag_i = trans_i->tags;
396*c9945492SAndroid Build Coastguard Worker if (tag_i != NULL)
397*c9945492SAndroid Build Coastguard Worker while (*tag_i >= 0)
398*c9945492SAndroid Build Coastguard Worker {
399*c9945492SAndroid Build Coastguard Worker if (*tag_i < num_tags)
400*c9945492SAndroid Build Coastguard Worker tmp_tags[*tag_i] = pos;
401*c9945492SAndroid Build Coastguard Worker tag_i++;
402*c9945492SAndroid Build Coastguard Worker }
403*c9945492SAndroid Build Coastguard Worker
404*c9945492SAndroid Build Coastguard Worker if (reach_pos[trans_i->state_id].pos < pos)
405*c9945492SAndroid Build Coastguard Worker {
406*c9945492SAndroid Build Coastguard Worker /* Found an unvisited node. */
407*c9945492SAndroid Build Coastguard Worker reach_next_i->state = trans_i->state;
408*c9945492SAndroid Build Coastguard Worker tmp_iptr = reach_next_i->tags;
409*c9945492SAndroid Build Coastguard Worker reach_next_i->tags = tmp_tags;
410*c9945492SAndroid Build Coastguard Worker tmp_tags = tmp_iptr;
411*c9945492SAndroid Build Coastguard Worker reach_pos[trans_i->state_id].pos = pos;
412*c9945492SAndroid Build Coastguard Worker reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
413*c9945492SAndroid Build Coastguard Worker
414*c9945492SAndroid Build Coastguard Worker if (reach_next_i->state == tnfa->final
415*c9945492SAndroid Build Coastguard Worker && (match_eo == -1
416*c9945492SAndroid Build Coastguard Worker || (num_tags > 0
417*c9945492SAndroid Build Coastguard Worker && reach_next_i->tags[0] <= match_tags[0])))
418*c9945492SAndroid Build Coastguard Worker {
419*c9945492SAndroid Build Coastguard Worker match_eo = pos;
420*c9945492SAndroid Build Coastguard Worker new_match = 1;
421*c9945492SAndroid Build Coastguard Worker for (i = 0; i < num_tags; i++)
422*c9945492SAndroid Build Coastguard Worker match_tags[i] = reach_next_i->tags[i];
423*c9945492SAndroid Build Coastguard Worker }
424*c9945492SAndroid Build Coastguard Worker reach_next_i++;
425*c9945492SAndroid Build Coastguard Worker
426*c9945492SAndroid Build Coastguard Worker }
427*c9945492SAndroid Build Coastguard Worker else
428*c9945492SAndroid Build Coastguard Worker {
429*c9945492SAndroid Build Coastguard Worker assert(reach_pos[trans_i->state_id].pos == pos);
430*c9945492SAndroid Build Coastguard Worker /* Another path has also reached this state. We choose
431*c9945492SAndroid Build Coastguard Worker the winner by examining the tag values for both
432*c9945492SAndroid Build Coastguard Worker paths. */
433*c9945492SAndroid Build Coastguard Worker if (tre_tag_order(num_tags, tnfa->tag_directions,
434*c9945492SAndroid Build Coastguard Worker tmp_tags,
435*c9945492SAndroid Build Coastguard Worker *reach_pos[trans_i->state_id].tags))
436*c9945492SAndroid Build Coastguard Worker {
437*c9945492SAndroid Build Coastguard Worker /* The new path wins. */
438*c9945492SAndroid Build Coastguard Worker tmp_iptr = *reach_pos[trans_i->state_id].tags;
439*c9945492SAndroid Build Coastguard Worker *reach_pos[trans_i->state_id].tags = tmp_tags;
440*c9945492SAndroid Build Coastguard Worker if (trans_i->state == tnfa->final)
441*c9945492SAndroid Build Coastguard Worker {
442*c9945492SAndroid Build Coastguard Worker match_eo = pos;
443*c9945492SAndroid Build Coastguard Worker new_match = 1;
444*c9945492SAndroid Build Coastguard Worker for (i = 0; i < num_tags; i++)
445*c9945492SAndroid Build Coastguard Worker match_tags[i] = tmp_tags[i];
446*c9945492SAndroid Build Coastguard Worker }
447*c9945492SAndroid Build Coastguard Worker tmp_tags = tmp_iptr;
448*c9945492SAndroid Build Coastguard Worker }
449*c9945492SAndroid Build Coastguard Worker }
450*c9945492SAndroid Build Coastguard Worker }
451*c9945492SAndroid Build Coastguard Worker }
452*c9945492SAndroid Build Coastguard Worker }
453*c9945492SAndroid Build Coastguard Worker reach_next_i->state = NULL;
454*c9945492SAndroid Build Coastguard Worker }
455*c9945492SAndroid Build Coastguard Worker
456*c9945492SAndroid Build Coastguard Worker *match_end_ofs = match_eo;
457*c9945492SAndroid Build Coastguard Worker ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
458*c9945492SAndroid Build Coastguard Worker error_exit:
459*c9945492SAndroid Build Coastguard Worker xfree(buf);
460*c9945492SAndroid Build Coastguard Worker return ret;
461*c9945492SAndroid Build Coastguard Worker }
462*c9945492SAndroid Build Coastguard Worker
463*c9945492SAndroid Build Coastguard Worker
464*c9945492SAndroid Build Coastguard Worker
465*c9945492SAndroid Build Coastguard Worker /***********************************************************************
466*c9945492SAndroid Build Coastguard Worker from tre-match-backtrack.c
467*c9945492SAndroid Build Coastguard Worker ***********************************************************************/
468*c9945492SAndroid Build Coastguard Worker
469*c9945492SAndroid Build Coastguard Worker /*
470*c9945492SAndroid Build Coastguard Worker This matcher is for regexps that use back referencing. Regexp matching
471*c9945492SAndroid Build Coastguard Worker with back referencing is an NP-complete problem on the number of back
472*c9945492SAndroid Build Coastguard Worker references. The easiest way to match them is to use a backtracking
473*c9945492SAndroid Build Coastguard Worker routine which basically goes through all possible paths in the TNFA
474*c9945492SAndroid Build Coastguard Worker and chooses the one which results in the best (leftmost and longest)
475*c9945492SAndroid Build Coastguard Worker match. This can be spectacularly expensive and may run out of stack
476*c9945492SAndroid Build Coastguard Worker space, but there really is no better known generic algorithm. Quoting
477*c9945492SAndroid Build Coastguard Worker Henry Spencer from comp.compilers:
478*c9945492SAndroid Build Coastguard Worker <URL: http://compilers.iecc.com/comparch/article/93-03-102>
479*c9945492SAndroid Build Coastguard Worker
480*c9945492SAndroid Build Coastguard Worker POSIX.2 REs require longest match, which is really exciting to
481*c9945492SAndroid Build Coastguard Worker implement since the obsolete ("basic") variant also includes
482*c9945492SAndroid Build Coastguard Worker \<digit>. I haven't found a better way of tackling this than doing
483*c9945492SAndroid Build Coastguard Worker a preliminary match using a DFA (or simulation) on a modified RE
484*c9945492SAndroid Build Coastguard Worker that just replicates subREs for \<digit>, and then doing a
485*c9945492SAndroid Build Coastguard Worker backtracking match to determine whether the subRE matches were
486*c9945492SAndroid Build Coastguard Worker right. This can be rather slow, but I console myself with the
487*c9945492SAndroid Build Coastguard Worker thought that people who use \<digit> deserve very slow execution.
488*c9945492SAndroid Build Coastguard Worker (Pun unintentional but very appropriate.)
489*c9945492SAndroid Build Coastguard Worker
490*c9945492SAndroid Build Coastguard Worker */
491*c9945492SAndroid Build Coastguard Worker
492*c9945492SAndroid Build Coastguard Worker typedef struct {
493*c9945492SAndroid Build Coastguard Worker regoff_t pos;
494*c9945492SAndroid Build Coastguard Worker const char *str_byte;
495*c9945492SAndroid Build Coastguard Worker tre_tnfa_transition_t *state;
496*c9945492SAndroid Build Coastguard Worker int state_id;
497*c9945492SAndroid Build Coastguard Worker int next_c;
498*c9945492SAndroid Build Coastguard Worker regoff_t *tags;
499*c9945492SAndroid Build Coastguard Worker #ifdef TRE_MBSTATE
500*c9945492SAndroid Build Coastguard Worker mbstate_t mbstate;
501*c9945492SAndroid Build Coastguard Worker #endif /* TRE_MBSTATE */
502*c9945492SAndroid Build Coastguard Worker } tre_backtrack_item_t;
503*c9945492SAndroid Build Coastguard Worker
504*c9945492SAndroid Build Coastguard Worker typedef struct tre_backtrack_struct {
505*c9945492SAndroid Build Coastguard Worker tre_backtrack_item_t item;
506*c9945492SAndroid Build Coastguard Worker struct tre_backtrack_struct *prev;
507*c9945492SAndroid Build Coastguard Worker struct tre_backtrack_struct *next;
508*c9945492SAndroid Build Coastguard Worker } *tre_backtrack_t;
509*c9945492SAndroid Build Coastguard Worker
510*c9945492SAndroid Build Coastguard Worker #ifdef TRE_MBSTATE
511*c9945492SAndroid Build Coastguard Worker #define BT_STACK_MBSTATE_IN stack->item.mbstate = (mbstate)
512*c9945492SAndroid Build Coastguard Worker #define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate
513*c9945492SAndroid Build Coastguard Worker #else /* !TRE_MBSTATE */
514*c9945492SAndroid Build Coastguard Worker #define BT_STACK_MBSTATE_IN
515*c9945492SAndroid Build Coastguard Worker #define BT_STACK_MBSTATE_OUT
516*c9945492SAndroid Build Coastguard Worker #endif /* !TRE_MBSTATE */
517*c9945492SAndroid Build Coastguard Worker
518*c9945492SAndroid Build Coastguard Worker #define tre_bt_mem_new tre_mem_new
519*c9945492SAndroid Build Coastguard Worker #define tre_bt_mem_alloc tre_mem_alloc
520*c9945492SAndroid Build Coastguard Worker #define tre_bt_mem_destroy tre_mem_destroy
521*c9945492SAndroid Build Coastguard Worker
522*c9945492SAndroid Build Coastguard Worker
523*c9945492SAndroid Build Coastguard Worker #define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate) \
524*c9945492SAndroid Build Coastguard Worker do \
525*c9945492SAndroid Build Coastguard Worker { \
526*c9945492SAndroid Build Coastguard Worker int i; \
527*c9945492SAndroid Build Coastguard Worker if (!stack->next) \
528*c9945492SAndroid Build Coastguard Worker { \
529*c9945492SAndroid Build Coastguard Worker tre_backtrack_t s; \
530*c9945492SAndroid Build Coastguard Worker s = tre_bt_mem_alloc(mem, sizeof(*s)); \
531*c9945492SAndroid Build Coastguard Worker if (!s) \
532*c9945492SAndroid Build Coastguard Worker { \
533*c9945492SAndroid Build Coastguard Worker tre_bt_mem_destroy(mem); \
534*c9945492SAndroid Build Coastguard Worker if (tags) \
535*c9945492SAndroid Build Coastguard Worker xfree(tags); \
536*c9945492SAndroid Build Coastguard Worker if (pmatch) \
537*c9945492SAndroid Build Coastguard Worker xfree(pmatch); \
538*c9945492SAndroid Build Coastguard Worker if (states_seen) \
539*c9945492SAndroid Build Coastguard Worker xfree(states_seen); \
540*c9945492SAndroid Build Coastguard Worker return REG_ESPACE; \
541*c9945492SAndroid Build Coastguard Worker } \
542*c9945492SAndroid Build Coastguard Worker s->prev = stack; \
543*c9945492SAndroid Build Coastguard Worker s->next = NULL; \
544*c9945492SAndroid Build Coastguard Worker s->item.tags = tre_bt_mem_alloc(mem, \
545*c9945492SAndroid Build Coastguard Worker sizeof(*tags) * tnfa->num_tags); \
546*c9945492SAndroid Build Coastguard Worker if (!s->item.tags) \
547*c9945492SAndroid Build Coastguard Worker { \
548*c9945492SAndroid Build Coastguard Worker tre_bt_mem_destroy(mem); \
549*c9945492SAndroid Build Coastguard Worker if (tags) \
550*c9945492SAndroid Build Coastguard Worker xfree(tags); \
551*c9945492SAndroid Build Coastguard Worker if (pmatch) \
552*c9945492SAndroid Build Coastguard Worker xfree(pmatch); \
553*c9945492SAndroid Build Coastguard Worker if (states_seen) \
554*c9945492SAndroid Build Coastguard Worker xfree(states_seen); \
555*c9945492SAndroid Build Coastguard Worker return REG_ESPACE; \
556*c9945492SAndroid Build Coastguard Worker } \
557*c9945492SAndroid Build Coastguard Worker stack->next = s; \
558*c9945492SAndroid Build Coastguard Worker stack = s; \
559*c9945492SAndroid Build Coastguard Worker } \
560*c9945492SAndroid Build Coastguard Worker else \
561*c9945492SAndroid Build Coastguard Worker stack = stack->next; \
562*c9945492SAndroid Build Coastguard Worker stack->item.pos = (_pos); \
563*c9945492SAndroid Build Coastguard Worker stack->item.str_byte = (_str_byte); \
564*c9945492SAndroid Build Coastguard Worker stack->item.state = (_state); \
565*c9945492SAndroid Build Coastguard Worker stack->item.state_id = (_state_id); \
566*c9945492SAndroid Build Coastguard Worker stack->item.next_c = (_next_c); \
567*c9945492SAndroid Build Coastguard Worker for (i = 0; i < tnfa->num_tags; i++) \
568*c9945492SAndroid Build Coastguard Worker stack->item.tags[i] = (_tags)[i]; \
569*c9945492SAndroid Build Coastguard Worker BT_STACK_MBSTATE_IN; \
570*c9945492SAndroid Build Coastguard Worker } \
571*c9945492SAndroid Build Coastguard Worker while (0)
572*c9945492SAndroid Build Coastguard Worker
573*c9945492SAndroid Build Coastguard Worker #define BT_STACK_POP() \
574*c9945492SAndroid Build Coastguard Worker do \
575*c9945492SAndroid Build Coastguard Worker { \
576*c9945492SAndroid Build Coastguard Worker int i; \
577*c9945492SAndroid Build Coastguard Worker assert(stack->prev); \
578*c9945492SAndroid Build Coastguard Worker pos = stack->item.pos; \
579*c9945492SAndroid Build Coastguard Worker str_byte = stack->item.str_byte; \
580*c9945492SAndroid Build Coastguard Worker state = stack->item.state; \
581*c9945492SAndroid Build Coastguard Worker next_c = stack->item.next_c; \
582*c9945492SAndroid Build Coastguard Worker for (i = 0; i < tnfa->num_tags; i++) \
583*c9945492SAndroid Build Coastguard Worker tags[i] = stack->item.tags[i]; \
584*c9945492SAndroid Build Coastguard Worker BT_STACK_MBSTATE_OUT; \
585*c9945492SAndroid Build Coastguard Worker stack = stack->prev; \
586*c9945492SAndroid Build Coastguard Worker } \
587*c9945492SAndroid Build Coastguard Worker while (0)
588*c9945492SAndroid Build Coastguard Worker
589*c9945492SAndroid Build Coastguard Worker #undef MIN
590*c9945492SAndroid Build Coastguard Worker #define MIN(a, b) ((a) <= (b) ? (a) : (b))
591*c9945492SAndroid Build Coastguard Worker
592*c9945492SAndroid Build Coastguard Worker static reg_errcode_t
tre_tnfa_run_backtrack(const tre_tnfa_t * tnfa,const void * string,regoff_t * match_tags,int eflags,regoff_t * match_end_ofs)593*c9945492SAndroid Build Coastguard Worker tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
594*c9945492SAndroid Build Coastguard Worker regoff_t *match_tags, int eflags, regoff_t *match_end_ofs)
595*c9945492SAndroid Build Coastguard Worker {
596*c9945492SAndroid Build Coastguard Worker /* State variables required by GET_NEXT_WCHAR. */
597*c9945492SAndroid Build Coastguard Worker tre_char_t prev_c = 0, next_c = 0;
598*c9945492SAndroid Build Coastguard Worker const char *str_byte = string;
599*c9945492SAndroid Build Coastguard Worker regoff_t pos = 0;
600*c9945492SAndroid Build Coastguard Worker regoff_t pos_add_next = 1;
601*c9945492SAndroid Build Coastguard Worker #ifdef TRE_MBSTATE
602*c9945492SAndroid Build Coastguard Worker mbstate_t mbstate;
603*c9945492SAndroid Build Coastguard Worker #endif /* TRE_MBSTATE */
604*c9945492SAndroid Build Coastguard Worker int reg_notbol = eflags & REG_NOTBOL;
605*c9945492SAndroid Build Coastguard Worker int reg_noteol = eflags & REG_NOTEOL;
606*c9945492SAndroid Build Coastguard Worker int reg_newline = tnfa->cflags & REG_NEWLINE;
607*c9945492SAndroid Build Coastguard Worker
608*c9945492SAndroid Build Coastguard Worker /* These are used to remember the necessary values of the above
609*c9945492SAndroid Build Coastguard Worker variables to return to the position where the current search
610*c9945492SAndroid Build Coastguard Worker started from. */
611*c9945492SAndroid Build Coastguard Worker int next_c_start;
612*c9945492SAndroid Build Coastguard Worker const char *str_byte_start;
613*c9945492SAndroid Build Coastguard Worker regoff_t pos_start = -1;
614*c9945492SAndroid Build Coastguard Worker #ifdef TRE_MBSTATE
615*c9945492SAndroid Build Coastguard Worker mbstate_t mbstate_start;
616*c9945492SAndroid Build Coastguard Worker #endif /* TRE_MBSTATE */
617*c9945492SAndroid Build Coastguard Worker
618*c9945492SAndroid Build Coastguard Worker /* End offset of best match so far, or -1 if no match found yet. */
619*c9945492SAndroid Build Coastguard Worker regoff_t match_eo = -1;
620*c9945492SAndroid Build Coastguard Worker /* Tag arrays. */
621*c9945492SAndroid Build Coastguard Worker int *next_tags;
622*c9945492SAndroid Build Coastguard Worker regoff_t *tags = NULL;
623*c9945492SAndroid Build Coastguard Worker /* Current TNFA state. */
624*c9945492SAndroid Build Coastguard Worker tre_tnfa_transition_t *state;
625*c9945492SAndroid Build Coastguard Worker int *states_seen = NULL;
626*c9945492SAndroid Build Coastguard Worker
627*c9945492SAndroid Build Coastguard Worker /* Memory allocator to for allocating the backtracking stack. */
628*c9945492SAndroid Build Coastguard Worker tre_mem_t mem = tre_bt_mem_new();
629*c9945492SAndroid Build Coastguard Worker
630*c9945492SAndroid Build Coastguard Worker /* The backtracking stack. */
631*c9945492SAndroid Build Coastguard Worker tre_backtrack_t stack;
632*c9945492SAndroid Build Coastguard Worker
633*c9945492SAndroid Build Coastguard Worker tre_tnfa_transition_t *trans_i;
634*c9945492SAndroid Build Coastguard Worker regmatch_t *pmatch = NULL;
635*c9945492SAndroid Build Coastguard Worker int ret;
636*c9945492SAndroid Build Coastguard Worker
637*c9945492SAndroid Build Coastguard Worker #ifdef TRE_MBSTATE
638*c9945492SAndroid Build Coastguard Worker memset(&mbstate, '\0', sizeof(mbstate));
639*c9945492SAndroid Build Coastguard Worker #endif /* TRE_MBSTATE */
640*c9945492SAndroid Build Coastguard Worker
641*c9945492SAndroid Build Coastguard Worker if (!mem)
642*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
643*c9945492SAndroid Build Coastguard Worker stack = tre_bt_mem_alloc(mem, sizeof(*stack));
644*c9945492SAndroid Build Coastguard Worker if (!stack)
645*c9945492SAndroid Build Coastguard Worker {
646*c9945492SAndroid Build Coastguard Worker ret = REG_ESPACE;
647*c9945492SAndroid Build Coastguard Worker goto error_exit;
648*c9945492SAndroid Build Coastguard Worker }
649*c9945492SAndroid Build Coastguard Worker stack->prev = NULL;
650*c9945492SAndroid Build Coastguard Worker stack->next = NULL;
651*c9945492SAndroid Build Coastguard Worker
652*c9945492SAndroid Build Coastguard Worker if (tnfa->num_tags)
653*c9945492SAndroid Build Coastguard Worker {
654*c9945492SAndroid Build Coastguard Worker tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
655*c9945492SAndroid Build Coastguard Worker if (!tags)
656*c9945492SAndroid Build Coastguard Worker {
657*c9945492SAndroid Build Coastguard Worker ret = REG_ESPACE;
658*c9945492SAndroid Build Coastguard Worker goto error_exit;
659*c9945492SAndroid Build Coastguard Worker }
660*c9945492SAndroid Build Coastguard Worker }
661*c9945492SAndroid Build Coastguard Worker if (tnfa->num_submatches)
662*c9945492SAndroid Build Coastguard Worker {
663*c9945492SAndroid Build Coastguard Worker pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches);
664*c9945492SAndroid Build Coastguard Worker if (!pmatch)
665*c9945492SAndroid Build Coastguard Worker {
666*c9945492SAndroid Build Coastguard Worker ret = REG_ESPACE;
667*c9945492SAndroid Build Coastguard Worker goto error_exit;
668*c9945492SAndroid Build Coastguard Worker }
669*c9945492SAndroid Build Coastguard Worker }
670*c9945492SAndroid Build Coastguard Worker if (tnfa->num_states)
671*c9945492SAndroid Build Coastguard Worker {
672*c9945492SAndroid Build Coastguard Worker states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states);
673*c9945492SAndroid Build Coastguard Worker if (!states_seen)
674*c9945492SAndroid Build Coastguard Worker {
675*c9945492SAndroid Build Coastguard Worker ret = REG_ESPACE;
676*c9945492SAndroid Build Coastguard Worker goto error_exit;
677*c9945492SAndroid Build Coastguard Worker }
678*c9945492SAndroid Build Coastguard Worker }
679*c9945492SAndroid Build Coastguard Worker
680*c9945492SAndroid Build Coastguard Worker retry:
681*c9945492SAndroid Build Coastguard Worker {
682*c9945492SAndroid Build Coastguard Worker int i;
683*c9945492SAndroid Build Coastguard Worker for (i = 0; i < tnfa->num_tags; i++)
684*c9945492SAndroid Build Coastguard Worker {
685*c9945492SAndroid Build Coastguard Worker tags[i] = -1;
686*c9945492SAndroid Build Coastguard Worker if (match_tags)
687*c9945492SAndroid Build Coastguard Worker match_tags[i] = -1;
688*c9945492SAndroid Build Coastguard Worker }
689*c9945492SAndroid Build Coastguard Worker for (i = 0; i < tnfa->num_states; i++)
690*c9945492SAndroid Build Coastguard Worker states_seen[i] = 0;
691*c9945492SAndroid Build Coastguard Worker }
692*c9945492SAndroid Build Coastguard Worker
693*c9945492SAndroid Build Coastguard Worker state = NULL;
694*c9945492SAndroid Build Coastguard Worker pos = pos_start;
695*c9945492SAndroid Build Coastguard Worker GET_NEXT_WCHAR();
696*c9945492SAndroid Build Coastguard Worker pos_start = pos;
697*c9945492SAndroid Build Coastguard Worker next_c_start = next_c;
698*c9945492SAndroid Build Coastguard Worker str_byte_start = str_byte;
699*c9945492SAndroid Build Coastguard Worker #ifdef TRE_MBSTATE
700*c9945492SAndroid Build Coastguard Worker mbstate_start = mbstate;
701*c9945492SAndroid Build Coastguard Worker #endif /* TRE_MBSTATE */
702*c9945492SAndroid Build Coastguard Worker
703*c9945492SAndroid Build Coastguard Worker /* Handle initial states. */
704*c9945492SAndroid Build Coastguard Worker next_tags = NULL;
705*c9945492SAndroid Build Coastguard Worker for (trans_i = tnfa->initial; trans_i->state; trans_i++)
706*c9945492SAndroid Build Coastguard Worker {
707*c9945492SAndroid Build Coastguard Worker if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions))
708*c9945492SAndroid Build Coastguard Worker {
709*c9945492SAndroid Build Coastguard Worker continue;
710*c9945492SAndroid Build Coastguard Worker }
711*c9945492SAndroid Build Coastguard Worker if (state == NULL)
712*c9945492SAndroid Build Coastguard Worker {
713*c9945492SAndroid Build Coastguard Worker /* Start from this state. */
714*c9945492SAndroid Build Coastguard Worker state = trans_i->state;
715*c9945492SAndroid Build Coastguard Worker next_tags = trans_i->tags;
716*c9945492SAndroid Build Coastguard Worker }
717*c9945492SAndroid Build Coastguard Worker else
718*c9945492SAndroid Build Coastguard Worker {
719*c9945492SAndroid Build Coastguard Worker /* Backtrack to this state. */
720*c9945492SAndroid Build Coastguard Worker BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,
721*c9945492SAndroid Build Coastguard Worker trans_i->state_id, next_c, tags, mbstate);
722*c9945492SAndroid Build Coastguard Worker {
723*c9945492SAndroid Build Coastguard Worker int *tmp = trans_i->tags;
724*c9945492SAndroid Build Coastguard Worker if (tmp)
725*c9945492SAndroid Build Coastguard Worker while (*tmp >= 0)
726*c9945492SAndroid Build Coastguard Worker stack->item.tags[*tmp++] = pos;
727*c9945492SAndroid Build Coastguard Worker }
728*c9945492SAndroid Build Coastguard Worker }
729*c9945492SAndroid Build Coastguard Worker }
730*c9945492SAndroid Build Coastguard Worker
731*c9945492SAndroid Build Coastguard Worker if (next_tags)
732*c9945492SAndroid Build Coastguard Worker for (; *next_tags >= 0; next_tags++)
733*c9945492SAndroid Build Coastguard Worker tags[*next_tags] = pos;
734*c9945492SAndroid Build Coastguard Worker
735*c9945492SAndroid Build Coastguard Worker
736*c9945492SAndroid Build Coastguard Worker if (state == NULL)
737*c9945492SAndroid Build Coastguard Worker goto backtrack;
738*c9945492SAndroid Build Coastguard Worker
739*c9945492SAndroid Build Coastguard Worker while (1)
740*c9945492SAndroid Build Coastguard Worker {
741*c9945492SAndroid Build Coastguard Worker tre_tnfa_transition_t *next_state;
742*c9945492SAndroid Build Coastguard Worker int empty_br_match;
743*c9945492SAndroid Build Coastguard Worker
744*c9945492SAndroid Build Coastguard Worker if (state == tnfa->final)
745*c9945492SAndroid Build Coastguard Worker {
746*c9945492SAndroid Build Coastguard Worker if (match_eo < pos
747*c9945492SAndroid Build Coastguard Worker || (match_eo == pos
748*c9945492SAndroid Build Coastguard Worker && match_tags
749*c9945492SAndroid Build Coastguard Worker && tre_tag_order(tnfa->num_tags, tnfa->tag_directions,
750*c9945492SAndroid Build Coastguard Worker tags, match_tags)))
751*c9945492SAndroid Build Coastguard Worker {
752*c9945492SAndroid Build Coastguard Worker int i;
753*c9945492SAndroid Build Coastguard Worker /* This match wins the previous match. */
754*c9945492SAndroid Build Coastguard Worker match_eo = pos;
755*c9945492SAndroid Build Coastguard Worker if (match_tags)
756*c9945492SAndroid Build Coastguard Worker for (i = 0; i < tnfa->num_tags; i++)
757*c9945492SAndroid Build Coastguard Worker match_tags[i] = tags[i];
758*c9945492SAndroid Build Coastguard Worker }
759*c9945492SAndroid Build Coastguard Worker /* Our TNFAs never have transitions leaving from the final state,
760*c9945492SAndroid Build Coastguard Worker so we jump right to backtracking. */
761*c9945492SAndroid Build Coastguard Worker goto backtrack;
762*c9945492SAndroid Build Coastguard Worker }
763*c9945492SAndroid Build Coastguard Worker
764*c9945492SAndroid Build Coastguard Worker /* Go to the next character in the input string. */
765*c9945492SAndroid Build Coastguard Worker empty_br_match = 0;
766*c9945492SAndroid Build Coastguard Worker trans_i = state;
767*c9945492SAndroid Build Coastguard Worker if (trans_i->state && trans_i->assertions & ASSERT_BACKREF)
768*c9945492SAndroid Build Coastguard Worker {
769*c9945492SAndroid Build Coastguard Worker /* This is a back reference state. All transitions leaving from
770*c9945492SAndroid Build Coastguard Worker this state have the same back reference "assertion". Instead
771*c9945492SAndroid Build Coastguard Worker of reading the next character, we match the back reference. */
772*c9945492SAndroid Build Coastguard Worker regoff_t so, eo;
773*c9945492SAndroid Build Coastguard Worker int bt = trans_i->u.backref;
774*c9945492SAndroid Build Coastguard Worker regoff_t bt_len;
775*c9945492SAndroid Build Coastguard Worker int result;
776*c9945492SAndroid Build Coastguard Worker
777*c9945492SAndroid Build Coastguard Worker /* Get the substring we need to match against. Remember to
778*c9945492SAndroid Build Coastguard Worker turn off REG_NOSUB temporarily. */
779*c9945492SAndroid Build Coastguard Worker tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB,
780*c9945492SAndroid Build Coastguard Worker tnfa, tags, pos);
781*c9945492SAndroid Build Coastguard Worker so = pmatch[bt].rm_so;
782*c9945492SAndroid Build Coastguard Worker eo = pmatch[bt].rm_eo;
783*c9945492SAndroid Build Coastguard Worker bt_len = eo - so;
784*c9945492SAndroid Build Coastguard Worker
785*c9945492SAndroid Build Coastguard Worker result = strncmp((const char*)string + so, str_byte - 1,
786*c9945492SAndroid Build Coastguard Worker (size_t)bt_len);
787*c9945492SAndroid Build Coastguard Worker
788*c9945492SAndroid Build Coastguard Worker if (result == 0)
789*c9945492SAndroid Build Coastguard Worker {
790*c9945492SAndroid Build Coastguard Worker /* Back reference matched. Check for infinite loop. */
791*c9945492SAndroid Build Coastguard Worker if (bt_len == 0)
792*c9945492SAndroid Build Coastguard Worker empty_br_match = 1;
793*c9945492SAndroid Build Coastguard Worker if (empty_br_match && states_seen[trans_i->state_id])
794*c9945492SAndroid Build Coastguard Worker {
795*c9945492SAndroid Build Coastguard Worker goto backtrack;
796*c9945492SAndroid Build Coastguard Worker }
797*c9945492SAndroid Build Coastguard Worker
798*c9945492SAndroid Build Coastguard Worker states_seen[trans_i->state_id] = empty_br_match;
799*c9945492SAndroid Build Coastguard Worker
800*c9945492SAndroid Build Coastguard Worker /* Advance in input string and resync `prev_c', `next_c'
801*c9945492SAndroid Build Coastguard Worker and pos. */
802*c9945492SAndroid Build Coastguard Worker str_byte += bt_len - 1;
803*c9945492SAndroid Build Coastguard Worker pos += bt_len - 1;
804*c9945492SAndroid Build Coastguard Worker GET_NEXT_WCHAR();
805*c9945492SAndroid Build Coastguard Worker }
806*c9945492SAndroid Build Coastguard Worker else
807*c9945492SAndroid Build Coastguard Worker {
808*c9945492SAndroid Build Coastguard Worker goto backtrack;
809*c9945492SAndroid Build Coastguard Worker }
810*c9945492SAndroid Build Coastguard Worker }
811*c9945492SAndroid Build Coastguard Worker else
812*c9945492SAndroid Build Coastguard Worker {
813*c9945492SAndroid Build Coastguard Worker /* Check for end of string. */
814*c9945492SAndroid Build Coastguard Worker if (next_c == L'\0')
815*c9945492SAndroid Build Coastguard Worker goto backtrack;
816*c9945492SAndroid Build Coastguard Worker
817*c9945492SAndroid Build Coastguard Worker /* Read the next character. */
818*c9945492SAndroid Build Coastguard Worker GET_NEXT_WCHAR();
819*c9945492SAndroid Build Coastguard Worker }
820*c9945492SAndroid Build Coastguard Worker
821*c9945492SAndroid Build Coastguard Worker next_state = NULL;
822*c9945492SAndroid Build Coastguard Worker for (trans_i = state; trans_i->state; trans_i++)
823*c9945492SAndroid Build Coastguard Worker {
824*c9945492SAndroid Build Coastguard Worker if (trans_i->code_min <= (tre_cint_t)prev_c
825*c9945492SAndroid Build Coastguard Worker && trans_i->code_max >= (tre_cint_t)prev_c)
826*c9945492SAndroid Build Coastguard Worker {
827*c9945492SAndroid Build Coastguard Worker if (trans_i->assertions
828*c9945492SAndroid Build Coastguard Worker && (CHECK_ASSERTIONS(trans_i->assertions)
829*c9945492SAndroid Build Coastguard Worker || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
830*c9945492SAndroid Build Coastguard Worker {
831*c9945492SAndroid Build Coastguard Worker continue;
832*c9945492SAndroid Build Coastguard Worker }
833*c9945492SAndroid Build Coastguard Worker
834*c9945492SAndroid Build Coastguard Worker if (next_state == NULL)
835*c9945492SAndroid Build Coastguard Worker {
836*c9945492SAndroid Build Coastguard Worker /* First matching transition. */
837*c9945492SAndroid Build Coastguard Worker next_state = trans_i->state;
838*c9945492SAndroid Build Coastguard Worker next_tags = trans_i->tags;
839*c9945492SAndroid Build Coastguard Worker }
840*c9945492SAndroid Build Coastguard Worker else
841*c9945492SAndroid Build Coastguard Worker {
842*c9945492SAndroid Build Coastguard Worker /* Second matching transition. We may need to backtrack here
843*c9945492SAndroid Build Coastguard Worker to take this transition instead of the first one, so we
844*c9945492SAndroid Build Coastguard Worker push this transition in the backtracking stack so we can
845*c9945492SAndroid Build Coastguard Worker jump back here if needed. */
846*c9945492SAndroid Build Coastguard Worker BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,
847*c9945492SAndroid Build Coastguard Worker trans_i->state_id, next_c, tags, mbstate);
848*c9945492SAndroid Build Coastguard Worker {
849*c9945492SAndroid Build Coastguard Worker int *tmp;
850*c9945492SAndroid Build Coastguard Worker for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++)
851*c9945492SAndroid Build Coastguard Worker stack->item.tags[*tmp] = pos;
852*c9945492SAndroid Build Coastguard Worker }
853*c9945492SAndroid Build Coastguard Worker #if 0 /* XXX - it's important not to look at all transitions here to keep
854*c9945492SAndroid Build Coastguard Worker the stack small! */
855*c9945492SAndroid Build Coastguard Worker break;
856*c9945492SAndroid Build Coastguard Worker #endif
857*c9945492SAndroid Build Coastguard Worker }
858*c9945492SAndroid Build Coastguard Worker }
859*c9945492SAndroid Build Coastguard Worker }
860*c9945492SAndroid Build Coastguard Worker
861*c9945492SAndroid Build Coastguard Worker if (next_state != NULL)
862*c9945492SAndroid Build Coastguard Worker {
863*c9945492SAndroid Build Coastguard Worker /* Matching transitions were found. Take the first one. */
864*c9945492SAndroid Build Coastguard Worker state = next_state;
865*c9945492SAndroid Build Coastguard Worker
866*c9945492SAndroid Build Coastguard Worker /* Update the tag values. */
867*c9945492SAndroid Build Coastguard Worker if (next_tags)
868*c9945492SAndroid Build Coastguard Worker while (*next_tags >= 0)
869*c9945492SAndroid Build Coastguard Worker tags[*next_tags++] = pos;
870*c9945492SAndroid Build Coastguard Worker }
871*c9945492SAndroid Build Coastguard Worker else
872*c9945492SAndroid Build Coastguard Worker {
873*c9945492SAndroid Build Coastguard Worker backtrack:
874*c9945492SAndroid Build Coastguard Worker /* A matching transition was not found. Try to backtrack. */
875*c9945492SAndroid Build Coastguard Worker if (stack->prev)
876*c9945492SAndroid Build Coastguard Worker {
877*c9945492SAndroid Build Coastguard Worker if (stack->item.state->assertions & ASSERT_BACKREF)
878*c9945492SAndroid Build Coastguard Worker {
879*c9945492SAndroid Build Coastguard Worker states_seen[stack->item.state_id] = 0;
880*c9945492SAndroid Build Coastguard Worker }
881*c9945492SAndroid Build Coastguard Worker
882*c9945492SAndroid Build Coastguard Worker BT_STACK_POP();
883*c9945492SAndroid Build Coastguard Worker }
884*c9945492SAndroid Build Coastguard Worker else if (match_eo < 0)
885*c9945492SAndroid Build Coastguard Worker {
886*c9945492SAndroid Build Coastguard Worker /* Try starting from a later position in the input string. */
887*c9945492SAndroid Build Coastguard Worker /* Check for end of string. */
888*c9945492SAndroid Build Coastguard Worker if (next_c == L'\0')
889*c9945492SAndroid Build Coastguard Worker {
890*c9945492SAndroid Build Coastguard Worker break;
891*c9945492SAndroid Build Coastguard Worker }
892*c9945492SAndroid Build Coastguard Worker next_c = next_c_start;
893*c9945492SAndroid Build Coastguard Worker #ifdef TRE_MBSTATE
894*c9945492SAndroid Build Coastguard Worker mbstate = mbstate_start;
895*c9945492SAndroid Build Coastguard Worker #endif /* TRE_MBSTATE */
896*c9945492SAndroid Build Coastguard Worker str_byte = str_byte_start;
897*c9945492SAndroid Build Coastguard Worker goto retry;
898*c9945492SAndroid Build Coastguard Worker }
899*c9945492SAndroid Build Coastguard Worker else
900*c9945492SAndroid Build Coastguard Worker {
901*c9945492SAndroid Build Coastguard Worker break;
902*c9945492SAndroid Build Coastguard Worker }
903*c9945492SAndroid Build Coastguard Worker }
904*c9945492SAndroid Build Coastguard Worker }
905*c9945492SAndroid Build Coastguard Worker
906*c9945492SAndroid Build Coastguard Worker ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
907*c9945492SAndroid Build Coastguard Worker *match_end_ofs = match_eo;
908*c9945492SAndroid Build Coastguard Worker
909*c9945492SAndroid Build Coastguard Worker error_exit:
910*c9945492SAndroid Build Coastguard Worker tre_bt_mem_destroy(mem);
911*c9945492SAndroid Build Coastguard Worker #ifndef TRE_USE_ALLOCA
912*c9945492SAndroid Build Coastguard Worker if (tags)
913*c9945492SAndroid Build Coastguard Worker xfree(tags);
914*c9945492SAndroid Build Coastguard Worker if (pmatch)
915*c9945492SAndroid Build Coastguard Worker xfree(pmatch);
916*c9945492SAndroid Build Coastguard Worker if (states_seen)
917*c9945492SAndroid Build Coastguard Worker xfree(states_seen);
918*c9945492SAndroid Build Coastguard Worker #endif /* !TRE_USE_ALLOCA */
919*c9945492SAndroid Build Coastguard Worker
920*c9945492SAndroid Build Coastguard Worker return ret;
921*c9945492SAndroid Build Coastguard Worker }
922*c9945492SAndroid Build Coastguard Worker
923*c9945492SAndroid Build Coastguard Worker /***********************************************************************
924*c9945492SAndroid Build Coastguard Worker from regexec.c
925*c9945492SAndroid Build Coastguard Worker ***********************************************************************/
926*c9945492SAndroid Build Coastguard Worker
927*c9945492SAndroid Build Coastguard Worker /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
928*c9945492SAndroid Build Coastguard Worker endpoint values. */
929*c9945492SAndroid Build Coastguard Worker static void
tre_fill_pmatch(size_t nmatch,regmatch_t pmatch[],int cflags,const tre_tnfa_t * tnfa,regoff_t * tags,regoff_t match_eo)930*c9945492SAndroid Build Coastguard Worker tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
931*c9945492SAndroid Build Coastguard Worker const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo)
932*c9945492SAndroid Build Coastguard Worker {
933*c9945492SAndroid Build Coastguard Worker tre_submatch_data_t *submatch_data;
934*c9945492SAndroid Build Coastguard Worker unsigned int i, j;
935*c9945492SAndroid Build Coastguard Worker int *parents;
936*c9945492SAndroid Build Coastguard Worker
937*c9945492SAndroid Build Coastguard Worker i = 0;
938*c9945492SAndroid Build Coastguard Worker if (match_eo >= 0 && !(cflags & REG_NOSUB))
939*c9945492SAndroid Build Coastguard Worker {
940*c9945492SAndroid Build Coastguard Worker /* Construct submatch offsets from the tags. */
941*c9945492SAndroid Build Coastguard Worker submatch_data = tnfa->submatch_data;
942*c9945492SAndroid Build Coastguard Worker while (i < tnfa->num_submatches && i < nmatch)
943*c9945492SAndroid Build Coastguard Worker {
944*c9945492SAndroid Build Coastguard Worker if (submatch_data[i].so_tag == tnfa->end_tag)
945*c9945492SAndroid Build Coastguard Worker pmatch[i].rm_so = match_eo;
946*c9945492SAndroid Build Coastguard Worker else
947*c9945492SAndroid Build Coastguard Worker pmatch[i].rm_so = tags[submatch_data[i].so_tag];
948*c9945492SAndroid Build Coastguard Worker
949*c9945492SAndroid Build Coastguard Worker if (submatch_data[i].eo_tag == tnfa->end_tag)
950*c9945492SAndroid Build Coastguard Worker pmatch[i].rm_eo = match_eo;
951*c9945492SAndroid Build Coastguard Worker else
952*c9945492SAndroid Build Coastguard Worker pmatch[i].rm_eo = tags[submatch_data[i].eo_tag];
953*c9945492SAndroid Build Coastguard Worker
954*c9945492SAndroid Build Coastguard Worker /* If either of the endpoints were not used, this submatch
955*c9945492SAndroid Build Coastguard Worker was not part of the match. */
956*c9945492SAndroid Build Coastguard Worker if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
957*c9945492SAndroid Build Coastguard Worker pmatch[i].rm_so = pmatch[i].rm_eo = -1;
958*c9945492SAndroid Build Coastguard Worker
959*c9945492SAndroid Build Coastguard Worker i++;
960*c9945492SAndroid Build Coastguard Worker }
961*c9945492SAndroid Build Coastguard Worker /* Reset all submatches that are not within all of their parent
962*c9945492SAndroid Build Coastguard Worker submatches. */
963*c9945492SAndroid Build Coastguard Worker i = 0;
964*c9945492SAndroid Build Coastguard Worker while (i < tnfa->num_submatches && i < nmatch)
965*c9945492SAndroid Build Coastguard Worker {
966*c9945492SAndroid Build Coastguard Worker if (pmatch[i].rm_eo == -1)
967*c9945492SAndroid Build Coastguard Worker assert(pmatch[i].rm_so == -1);
968*c9945492SAndroid Build Coastguard Worker assert(pmatch[i].rm_so <= pmatch[i].rm_eo);
969*c9945492SAndroid Build Coastguard Worker
970*c9945492SAndroid Build Coastguard Worker parents = submatch_data[i].parents;
971*c9945492SAndroid Build Coastguard Worker if (parents != NULL)
972*c9945492SAndroid Build Coastguard Worker for (j = 0; parents[j] >= 0; j++)
973*c9945492SAndroid Build Coastguard Worker {
974*c9945492SAndroid Build Coastguard Worker if (pmatch[i].rm_so < pmatch[parents[j]].rm_so
975*c9945492SAndroid Build Coastguard Worker || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
976*c9945492SAndroid Build Coastguard Worker pmatch[i].rm_so = pmatch[i].rm_eo = -1;
977*c9945492SAndroid Build Coastguard Worker }
978*c9945492SAndroid Build Coastguard Worker i++;
979*c9945492SAndroid Build Coastguard Worker }
980*c9945492SAndroid Build Coastguard Worker }
981*c9945492SAndroid Build Coastguard Worker
982*c9945492SAndroid Build Coastguard Worker while (i < nmatch)
983*c9945492SAndroid Build Coastguard Worker {
984*c9945492SAndroid Build Coastguard Worker pmatch[i].rm_so = -1;
985*c9945492SAndroid Build Coastguard Worker pmatch[i].rm_eo = -1;
986*c9945492SAndroid Build Coastguard Worker i++;
987*c9945492SAndroid Build Coastguard Worker }
988*c9945492SAndroid Build Coastguard Worker }
989*c9945492SAndroid Build Coastguard Worker
990*c9945492SAndroid Build Coastguard Worker
991*c9945492SAndroid Build Coastguard Worker /*
992*c9945492SAndroid Build Coastguard Worker Wrapper functions for POSIX compatible regexp matching.
993*c9945492SAndroid Build Coastguard Worker */
994*c9945492SAndroid Build Coastguard Worker
995*c9945492SAndroid Build Coastguard Worker int
regexec(const regex_t * restrict preg,const char * restrict string,size_t nmatch,regmatch_t pmatch[restrict],int eflags)996*c9945492SAndroid Build Coastguard Worker regexec(const regex_t *restrict preg, const char *restrict string,
997*c9945492SAndroid Build Coastguard Worker size_t nmatch, regmatch_t pmatch[restrict], int eflags)
998*c9945492SAndroid Build Coastguard Worker {
999*c9945492SAndroid Build Coastguard Worker tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
1000*c9945492SAndroid Build Coastguard Worker reg_errcode_t status;
1001*c9945492SAndroid Build Coastguard Worker regoff_t *tags = NULL, eo;
1002*c9945492SAndroid Build Coastguard Worker if (tnfa->cflags & REG_NOSUB) nmatch = 0;
1003*c9945492SAndroid Build Coastguard Worker if (tnfa->num_tags > 0 && nmatch > 0)
1004*c9945492SAndroid Build Coastguard Worker {
1005*c9945492SAndroid Build Coastguard Worker tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
1006*c9945492SAndroid Build Coastguard Worker if (tags == NULL)
1007*c9945492SAndroid Build Coastguard Worker return REG_ESPACE;
1008*c9945492SAndroid Build Coastguard Worker }
1009*c9945492SAndroid Build Coastguard Worker
1010*c9945492SAndroid Build Coastguard Worker /* Dispatch to the appropriate matcher. */
1011*c9945492SAndroid Build Coastguard Worker if (tnfa->have_backrefs)
1012*c9945492SAndroid Build Coastguard Worker {
1013*c9945492SAndroid Build Coastguard Worker /* The regex has back references, use the backtracking matcher. */
1014*c9945492SAndroid Build Coastguard Worker status = tre_tnfa_run_backtrack(tnfa, string, tags, eflags, &eo);
1015*c9945492SAndroid Build Coastguard Worker }
1016*c9945492SAndroid Build Coastguard Worker else
1017*c9945492SAndroid Build Coastguard Worker {
1018*c9945492SAndroid Build Coastguard Worker /* Exact matching, no back references, use the parallel matcher. */
1019*c9945492SAndroid Build Coastguard Worker status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo);
1020*c9945492SAndroid Build Coastguard Worker }
1021*c9945492SAndroid Build Coastguard Worker
1022*c9945492SAndroid Build Coastguard Worker if (status == REG_OK)
1023*c9945492SAndroid Build Coastguard Worker /* A match was found, so fill the submatch registers. */
1024*c9945492SAndroid Build Coastguard Worker tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
1025*c9945492SAndroid Build Coastguard Worker if (tags)
1026*c9945492SAndroid Build Coastguard Worker xfree(tags);
1027*c9945492SAndroid Build Coastguard Worker return status;
1028*c9945492SAndroid Build Coastguard Worker }
1029