1*22dc650dSSadaf Ebrahimi /***************************************************************************
2*22dc650dSSadaf Ebrahimi Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it
3*22dc650dSSadaf Ebrahimi tries to compile and match it, deriving options from the string itself. If
4*22dc650dSSadaf Ebrahimi STANDALONE is defined, a main program that calls the driver with the contents
5*22dc650dSSadaf Ebrahimi of specified files is compiled, and commentary on what is happening is output.
6*22dc650dSSadaf Ebrahimi If an argument starts with '=' the rest of it it is taken as a literal string
7*22dc650dSSadaf Ebrahimi rather than a file name. This allows easy testing of short strings.
8*22dc650dSSadaf Ebrahimi
9*22dc650dSSadaf Ebrahimi Written by Philip Hazel, October 2016
10*22dc650dSSadaf Ebrahimi Updated February 2024 (Addison Crump added 16-bit/32-bit and JIT support)
11*22dc650dSSadaf Ebrahimi Further updates March/April/May 2024 by PH
12*22dc650dSSadaf Ebrahimi ***************************************************************************/
13*22dc650dSSadaf Ebrahimi
14*22dc650dSSadaf Ebrahimi #include <errno.h>
15*22dc650dSSadaf Ebrahimi #include <stdarg.h>
16*22dc650dSSadaf Ebrahimi #include <stdio.h>
17*22dc650dSSadaf Ebrahimi #include <stdlib.h>
18*22dc650dSSadaf Ebrahimi #include <string.h>
19*22dc650dSSadaf Ebrahimi #include <unistd.h>
20*22dc650dSSadaf Ebrahimi
21*22dc650dSSadaf Ebrahimi /* stack size adjustment */
22*22dc650dSSadaf Ebrahimi #include <sys/time.h>
23*22dc650dSSadaf Ebrahimi #include <sys/resource.h>
24*22dc650dSSadaf Ebrahimi
25*22dc650dSSadaf Ebrahimi #define STACK_SIZE_MB 256
26*22dc650dSSadaf Ebrahimi #define JIT_SIZE_LIMIT (200 * 1024)
27*22dc650dSSadaf Ebrahimi
28*22dc650dSSadaf Ebrahimi #ifndef PCRE2_CODE_UNIT_WIDTH
29*22dc650dSSadaf Ebrahimi #define PCRE2_CODE_UNIT_WIDTH 8
30*22dc650dSSadaf Ebrahimi #endif
31*22dc650dSSadaf Ebrahimi
32*22dc650dSSadaf Ebrahimi #include "config.h"
33*22dc650dSSadaf Ebrahimi #include "pcre2.h"
34*22dc650dSSadaf Ebrahimi #include "pcre2_internal.h"
35*22dc650dSSadaf Ebrahimi
36*22dc650dSSadaf Ebrahimi #define MAX_MATCH_SIZE 1000
37*22dc650dSSadaf Ebrahimi
38*22dc650dSSadaf Ebrahimi #define DFA_WORKSPACE_COUNT 100
39*22dc650dSSadaf Ebrahimi
40*22dc650dSSadaf Ebrahimi /* When adding new compile or match options, remember to update the functions
41*22dc650dSSadaf Ebrahimi below that output them. */
42*22dc650dSSadaf Ebrahimi
43*22dc650dSSadaf Ebrahimi #define ALLOWED_COMPILE_OPTIONS \
44*22dc650dSSadaf Ebrahimi (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
45*22dc650dSSadaf Ebrahimi PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
46*22dc650dSSadaf Ebrahimi PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED| \
47*22dc650dSSadaf Ebrahimi PCRE2_EXTENDED_MORE|PCRE2_FIRSTLINE| \
48*22dc650dSSadaf Ebrahimi PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
49*22dc650dSSadaf Ebrahimi PCRE2_NO_AUTO_CAPTURE| \
50*22dc650dSSadaf Ebrahimi PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
51*22dc650dSSadaf Ebrahimi PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
52*22dc650dSSadaf Ebrahimi PCRE2_UTF)
53*22dc650dSSadaf Ebrahimi
54*22dc650dSSadaf Ebrahimi #define ALLOWED_MATCH_OPTIONS \
55*22dc650dSSadaf Ebrahimi (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
56*22dc650dSSadaf Ebrahimi PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
57*22dc650dSSadaf Ebrahimi PCRE2_PARTIAL_SOFT)
58*22dc650dSSadaf Ebrahimi
59*22dc650dSSadaf Ebrahimi #define BASE_MATCH_OPTIONS \
60*22dc650dSSadaf Ebrahimi (PCRE2_NO_JIT|PCRE2_DISABLE_RECURSELOOP_CHECK)
61*22dc650dSSadaf Ebrahimi
62*22dc650dSSadaf Ebrahimi
63*22dc650dSSadaf Ebrahimi #if defined(SUPPORT_DIFF_FUZZ) || defined(STANDALONE)
print_compile_options(FILE * stream,uint32_t compile_options)64*22dc650dSSadaf Ebrahimi static void print_compile_options(FILE *stream, uint32_t compile_options)
65*22dc650dSSadaf Ebrahimi {
66*22dc650dSSadaf Ebrahimi fprintf(stream, "Compile options %s%.8x =",
67*22dc650dSSadaf Ebrahimi (compile_options == PCRE2_NEVER_BACKSLASH_C)? "(base) " : "",
68*22dc650dSSadaf Ebrahimi compile_options);
69*22dc650dSSadaf Ebrahimi
70*22dc650dSSadaf Ebrahimi fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
71*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
72*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
73*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
74*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
75*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
76*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
77*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_CASELESS) != 0)? " caseless" : "",
78*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
79*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_DOTALL) != 0)? " dotall" : "",
80*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
81*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
82*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_EXTENDED) != 0)? " extended" : "",
83*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
84*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
85*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
86*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_MULTILINE) != 0)? " multiline" : "",
87*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
88*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
89*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
90*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
91*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
92*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
93*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
94*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
95*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_UCP) != 0)? " ucp" : "",
96*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
97*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
98*22dc650dSSadaf Ebrahimi ((compile_options & PCRE2_UTF) != 0)? " utf" : "");
99*22dc650dSSadaf Ebrahimi }
100*22dc650dSSadaf Ebrahimi
print_match_options(FILE * stream,uint32_t match_options)101*22dc650dSSadaf Ebrahimi static void print_match_options(FILE *stream, uint32_t match_options)
102*22dc650dSSadaf Ebrahimi {
103*22dc650dSSadaf Ebrahimi fprintf(stream, "Match options %s%.8x =",
104*22dc650dSSadaf Ebrahimi (match_options == BASE_MATCH_OPTIONS)? "(base) " : "", match_options);
105*22dc650dSSadaf Ebrahimi
106*22dc650dSSadaf Ebrahimi fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s\n",
107*22dc650dSSadaf Ebrahimi ((match_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
108*22dc650dSSadaf Ebrahimi ((match_options & PCRE2_DISABLE_RECURSELOOP_CHECK) != 0)? " disable_recurseloop_check" : "",
109*22dc650dSSadaf Ebrahimi ((match_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
110*22dc650dSSadaf Ebrahimi ((match_options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
111*22dc650dSSadaf Ebrahimi ((match_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
112*22dc650dSSadaf Ebrahimi ((match_options & PCRE2_NOTBOL) != 0)? " notbol" : "",
113*22dc650dSSadaf Ebrahimi ((match_options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
114*22dc650dSSadaf Ebrahimi ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
115*22dc650dSSadaf Ebrahimi ((match_options & PCRE2_NOTEOL) != 0)? " noteol" : "",
116*22dc650dSSadaf Ebrahimi ((match_options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
117*22dc650dSSadaf Ebrahimi ((match_options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
118*22dc650dSSadaf Ebrahimi }
119*22dc650dSSadaf Ebrahimi
120*22dc650dSSadaf Ebrahimi
121*22dc650dSSadaf Ebrahimi /* This function can print an error message at all code unit widths. */
122*22dc650dSSadaf Ebrahimi
print_error(FILE * f,int errorcode,const char * text,...)123*22dc650dSSadaf Ebrahimi static void print_error(FILE *f, int errorcode, const char *text, ...)
124*22dc650dSSadaf Ebrahimi {
125*22dc650dSSadaf Ebrahimi PCRE2_UCHAR buffer[256];
126*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *p = buffer;
127*22dc650dSSadaf Ebrahimi va_list ap;
128*22dc650dSSadaf Ebrahimi va_start(ap, text);
129*22dc650dSSadaf Ebrahimi vfprintf(f, text, ap);
130*22dc650dSSadaf Ebrahimi va_end(ap);
131*22dc650dSSadaf Ebrahimi pcre2_get_error_message(errorcode, buffer, 256);
132*22dc650dSSadaf Ebrahimi while (*p != 0) fprintf(f, "%c", *p++);
133*22dc650dSSadaf Ebrahimi printf("\n");
134*22dc650dSSadaf Ebrahimi }
135*22dc650dSSadaf Ebrahimi #endif /* defined(SUPPORT_DIFF_FUZZ || defined(STANDALONE) */
136*22dc650dSSadaf Ebrahimi
137*22dc650dSSadaf Ebrahimi
138*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT
139*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_DIFF_FUZZ
dump_matches(FILE * stream,int count,pcre2_match_data * match_data)140*22dc650dSSadaf Ebrahimi static void dump_matches(FILE *stream, int count, pcre2_match_data *match_data)
141*22dc650dSSadaf Ebrahimi {
142*22dc650dSSadaf Ebrahimi int errorcode;
143*22dc650dSSadaf Ebrahimi
144*22dc650dSSadaf Ebrahimi for (int index = 0; index < count; index++)
145*22dc650dSSadaf Ebrahimi {
146*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *bufferptr = NULL;
147*22dc650dSSadaf Ebrahimi PCRE2_SIZE bufflen = 0;
148*22dc650dSSadaf Ebrahimi
149*22dc650dSSadaf Ebrahimi errorcode = pcre2_substring_get_bynumber(match_data, index, &bufferptr,
150*22dc650dSSadaf Ebrahimi &bufflen);
151*22dc650dSSadaf Ebrahimi
152*22dc650dSSadaf Ebrahimi if (errorcode >= 0)
153*22dc650dSSadaf Ebrahimi {
154*22dc650dSSadaf Ebrahimi fprintf(stream, "Match %d (hex encoded): ", index);
155*22dc650dSSadaf Ebrahimi for (PCRE2_SIZE i = 0; i < bufflen; i++)
156*22dc650dSSadaf Ebrahimi {
157*22dc650dSSadaf Ebrahimi fprintf(stream, "%02x", bufferptr[i]);
158*22dc650dSSadaf Ebrahimi }
159*22dc650dSSadaf Ebrahimi fprintf(stream, "\n");
160*22dc650dSSadaf Ebrahimi }
161*22dc650dSSadaf Ebrahimi else
162*22dc650dSSadaf Ebrahimi {
163*22dc650dSSadaf Ebrahimi print_error(stream, errorcode, "Match %d failed: ", index);
164*22dc650dSSadaf Ebrahimi }
165*22dc650dSSadaf Ebrahimi }
166*22dc650dSSadaf Ebrahimi }
167*22dc650dSSadaf Ebrahimi
168*22dc650dSSadaf Ebrahimi /* This function describes the current test case being evaluated, then aborts */
169*22dc650dSSadaf Ebrahimi
describe_failure(const char * task,const unsigned char * data,size_t size,uint32_t compile_options,uint32_t match_options,int errorcode,int errorcode_jit,int matches,int matches_jit,pcre2_match_data * match_data,pcre2_match_data * match_data_jit)170*22dc650dSSadaf Ebrahimi static void describe_failure(
171*22dc650dSSadaf Ebrahimi const char *task,
172*22dc650dSSadaf Ebrahimi const unsigned char *data,
173*22dc650dSSadaf Ebrahimi size_t size,
174*22dc650dSSadaf Ebrahimi uint32_t compile_options,
175*22dc650dSSadaf Ebrahimi uint32_t match_options,
176*22dc650dSSadaf Ebrahimi int errorcode,
177*22dc650dSSadaf Ebrahimi int errorcode_jit,
178*22dc650dSSadaf Ebrahimi int matches,
179*22dc650dSSadaf Ebrahimi int matches_jit,
180*22dc650dSSadaf Ebrahimi pcre2_match_data *match_data,
181*22dc650dSSadaf Ebrahimi pcre2_match_data *match_data_jit
182*22dc650dSSadaf Ebrahimi ) {
183*22dc650dSSadaf Ebrahimi
184*22dc650dSSadaf Ebrahimi fprintf(stderr, "Encountered failure while performing %s; context:\n", task);
185*22dc650dSSadaf Ebrahimi
186*22dc650dSSadaf Ebrahimi fprintf(stderr, "Pattern/sample string (hex encoded): ");
187*22dc650dSSadaf Ebrahimi for (size_t i = 0; i < size; i++)
188*22dc650dSSadaf Ebrahimi {
189*22dc650dSSadaf Ebrahimi fprintf(stderr, "%02x", data[i]);
190*22dc650dSSadaf Ebrahimi }
191*22dc650dSSadaf Ebrahimi fprintf(stderr, "\n");
192*22dc650dSSadaf Ebrahimi
193*22dc650dSSadaf Ebrahimi print_compile_options(stderr, compile_options);
194*22dc650dSSadaf Ebrahimi print_match_options(stderr, match_options);
195*22dc650dSSadaf Ebrahimi
196*22dc650dSSadaf Ebrahimi if (errorcode < 0)
197*22dc650dSSadaf Ebrahimi {
198*22dc650dSSadaf Ebrahimi print_error(stderr, errorcode, "Non-JIT'd operation emitted an error: ");
199*22dc650dSSadaf Ebrahimi }
200*22dc650dSSadaf Ebrahimi
201*22dc650dSSadaf Ebrahimi if (matches >= 0)
202*22dc650dSSadaf Ebrahimi {
203*22dc650dSSadaf Ebrahimi fprintf(stderr, "Non-JIT'd operation did not emit an error.\n");
204*22dc650dSSadaf Ebrahimi if (match_data != NULL)
205*22dc650dSSadaf Ebrahimi {
206*22dc650dSSadaf Ebrahimi fprintf(stderr, "%d matches discovered by non-JIT'd regex:\n", matches);
207*22dc650dSSadaf Ebrahimi dump_matches(stderr, matches, match_data);
208*22dc650dSSadaf Ebrahimi fprintf(stderr, "\n");
209*22dc650dSSadaf Ebrahimi }
210*22dc650dSSadaf Ebrahimi }
211*22dc650dSSadaf Ebrahimi
212*22dc650dSSadaf Ebrahimi if (errorcode_jit < 0)
213*22dc650dSSadaf Ebrahimi {
214*22dc650dSSadaf Ebrahimi print_error(stderr, errorcode_jit, "JIT'd operation emitted error %d:",
215*22dc650dSSadaf Ebrahimi errorcode_jit);
216*22dc650dSSadaf Ebrahimi }
217*22dc650dSSadaf Ebrahimi
218*22dc650dSSadaf Ebrahimi if (matches_jit >= 0)
219*22dc650dSSadaf Ebrahimi {
220*22dc650dSSadaf Ebrahimi fprintf(stderr, "JIT'd operation did not emit an error.\n");
221*22dc650dSSadaf Ebrahimi if (match_data_jit != NULL)
222*22dc650dSSadaf Ebrahimi {
223*22dc650dSSadaf Ebrahimi fprintf(stderr, "%d matches discovered by JIT'd regex:\n", matches_jit);
224*22dc650dSSadaf Ebrahimi dump_matches(stderr, matches_jit, match_data_jit);
225*22dc650dSSadaf Ebrahimi fprintf(stderr, "\n");
226*22dc650dSSadaf Ebrahimi }
227*22dc650dSSadaf Ebrahimi }
228*22dc650dSSadaf Ebrahimi
229*22dc650dSSadaf Ebrahimi abort();
230*22dc650dSSadaf Ebrahimi }
231*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_DIFF_FUZZ */
232*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_JIT */
233*22dc650dSSadaf Ebrahimi
234*22dc650dSSadaf Ebrahimi /* This is the callout function. Its only purpose is to halt matching if there
235*22dc650dSSadaf Ebrahimi are more than 100 callouts, as one way of stopping too much time being spent on
236*22dc650dSSadaf Ebrahimi fruitless matches. The callout data is a pointer to the counter. */
237*22dc650dSSadaf Ebrahimi
callout_function(pcre2_callout_block * cb,void * callout_data)238*22dc650dSSadaf Ebrahimi static int callout_function(pcre2_callout_block *cb, void *callout_data)
239*22dc650dSSadaf Ebrahimi {
240*22dc650dSSadaf Ebrahimi (void)cb; /* Avoid unused parameter warning */
241*22dc650dSSadaf Ebrahimi *((uint32_t *)callout_data) += 1;
242*22dc650dSSadaf Ebrahimi return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
243*22dc650dSSadaf Ebrahimi }
244*22dc650dSSadaf Ebrahimi
245*22dc650dSSadaf Ebrahimi /* Putting in this apparently unnecessary prototype prevents gcc from giving a
246*22dc650dSSadaf Ebrahimi "no previous prototype" warning when compiling at high warning level. */
247*22dc650dSSadaf Ebrahimi
248*22dc650dSSadaf Ebrahimi int LLVMFuzzerInitialize(int *, char ***);
249*22dc650dSSadaf Ebrahimi
250*22dc650dSSadaf Ebrahimi int LLVMFuzzerTestOneInput(unsigned char *, size_t);
251*22dc650dSSadaf Ebrahimi
LLVMFuzzerInitialize(int * argc,char *** argv)252*22dc650dSSadaf Ebrahimi int LLVMFuzzerInitialize(int *argc, char ***argv)
253*22dc650dSSadaf Ebrahimi {
254*22dc650dSSadaf Ebrahimi int rc;
255*22dc650dSSadaf Ebrahimi struct rlimit rlim;
256*22dc650dSSadaf Ebrahimi getrlimit(RLIMIT_STACK, &rlim);
257*22dc650dSSadaf Ebrahimi rlim.rlim_cur = STACK_SIZE_MB * 1024 * 1024;
258*22dc650dSSadaf Ebrahimi if (rlim.rlim_cur > rlim.rlim_max)
259*22dc650dSSadaf Ebrahimi {
260*22dc650dSSadaf Ebrahimi fprintf(stderr, "Hard stack size limit is too small (needed 8MiB)!\n");
261*22dc650dSSadaf Ebrahimi _exit(1);
262*22dc650dSSadaf Ebrahimi }
263*22dc650dSSadaf Ebrahimi rc = setrlimit(RLIMIT_STACK, &rlim);
264*22dc650dSSadaf Ebrahimi if (rc != 0)
265*22dc650dSSadaf Ebrahimi {
266*22dc650dSSadaf Ebrahimi fprintf(stderr, "Failed to expand stack size\n");
267*22dc650dSSadaf Ebrahimi _exit(1);
268*22dc650dSSadaf Ebrahimi }
269*22dc650dSSadaf Ebrahimi
270*22dc650dSSadaf Ebrahimi (void)argc; /* Avoid "unused parameter" warnings */
271*22dc650dSSadaf Ebrahimi (void)argv;
272*22dc650dSSadaf Ebrahimi return 0;
273*22dc650dSSadaf Ebrahimi }
274*22dc650dSSadaf Ebrahimi
275*22dc650dSSadaf Ebrahimi /* Here's the driving function. */
276*22dc650dSSadaf Ebrahimi
LLVMFuzzerTestOneInput(unsigned char * data,size_t size)277*22dc650dSSadaf Ebrahimi int LLVMFuzzerTestOneInput(unsigned char *data, size_t size)
278*22dc650dSSadaf Ebrahimi {
279*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *wdata;
280*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *newwdata = NULL;
281*22dc650dSSadaf Ebrahimi uint32_t compile_options;
282*22dc650dSSadaf Ebrahimi uint32_t match_options;
283*22dc650dSSadaf Ebrahimi uint64_t random_options;
284*22dc650dSSadaf Ebrahimi pcre2_match_data *match_data = NULL;
285*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT
286*22dc650dSSadaf Ebrahimi pcre2_match_data *match_data_jit = NULL;
287*22dc650dSSadaf Ebrahimi #endif
288*22dc650dSSadaf Ebrahimi pcre2_compile_context *compile_context = NULL;
289*22dc650dSSadaf Ebrahimi pcre2_match_context *match_context = NULL;
290*22dc650dSSadaf Ebrahimi size_t match_size;
291*22dc650dSSadaf Ebrahimi int dfa_workspace[DFA_WORKSPACE_COUNT];
292*22dc650dSSadaf Ebrahimi
293*22dc650dSSadaf Ebrahimi if (size < sizeof(random_options)) return -1;
294*22dc650dSSadaf Ebrahimi
295*22dc650dSSadaf Ebrahimi random_options = *(uint64_t *)(data);
296*22dc650dSSadaf Ebrahimi data += sizeof(random_options);
297*22dc650dSSadaf Ebrahimi wdata = (PCRE2_UCHAR *)data;
298*22dc650dSSadaf Ebrahimi size -= sizeof(random_options);
299*22dc650dSSadaf Ebrahimi size /= PCRE2_CODE_UNIT_WIDTH / 8;
300*22dc650dSSadaf Ebrahimi
301*22dc650dSSadaf Ebrahimi /* PCRE2 compiles quantified groups by replicating them. In certain cases of
302*22dc650dSSadaf Ebrahimi very large quantifiers this can lead to unacceptably long JIT compile times. To
303*22dc650dSSadaf Ebrahimi get around this, we scan the data string for large quantifiers that follow a
304*22dc650dSSadaf Ebrahimi closing parenthesis, and reduce the value of the quantifier to 10, assuming
305*22dc650dSSadaf Ebrahimi that this will make minimal difference to the detection of bugs.
306*22dc650dSSadaf Ebrahimi
307*22dc650dSSadaf Ebrahimi Do the same for quantifiers that follow a closing square bracket, because
308*22dc650dSSadaf Ebrahimi classes that contain a number of non-ascii characters can take a lot of time
309*22dc650dSSadaf Ebrahimi when matching.
310*22dc650dSSadaf Ebrahimi
311*22dc650dSSadaf Ebrahimi We have to make a copy of the input because oss-fuzz complains if we overwrite
312*22dc650dSSadaf Ebrahimi the original. Start the scan at the second character so there can be a
313*22dc650dSSadaf Ebrahimi lookbehind for a backslash, and end it before the end so that the next
314*22dc650dSSadaf Ebrahimi character can be checked for an opening brace. */
315*22dc650dSSadaf Ebrahimi
316*22dc650dSSadaf Ebrahimi if (size > 3)
317*22dc650dSSadaf Ebrahimi {
318*22dc650dSSadaf Ebrahimi newwdata = malloc(size * sizeof(PCRE2_UCHAR));
319*22dc650dSSadaf Ebrahimi memcpy(newwdata, wdata, size * sizeof(PCRE2_UCHAR));
320*22dc650dSSadaf Ebrahimi wdata = newwdata;
321*22dc650dSSadaf Ebrahimi
322*22dc650dSSadaf Ebrahimi for (size_t i = 1; i < size - 2; i++)
323*22dc650dSSadaf Ebrahimi {
324*22dc650dSSadaf Ebrahimi size_t j;
325*22dc650dSSadaf Ebrahimi
326*22dc650dSSadaf Ebrahimi if ((wdata[i] != ')' && wdata[i] != ']') || wdata[i-1] == '\\' ||
327*22dc650dSSadaf Ebrahimi wdata[i+1] != '{')
328*22dc650dSSadaf Ebrahimi continue;
329*22dc650dSSadaf Ebrahimi i++; /* Points to '{' */
330*22dc650dSSadaf Ebrahimi
331*22dc650dSSadaf Ebrahimi /* Loop for two values a quantifier. Offset i points to brace or comma at the
332*22dc650dSSadaf Ebrahimi start of the loop.*/
333*22dc650dSSadaf Ebrahimi
334*22dc650dSSadaf Ebrahimi for (int ii = 0; ii < 2; ii++)
335*22dc650dSSadaf Ebrahimi {
336*22dc650dSSadaf Ebrahimi int q = 0;
337*22dc650dSSadaf Ebrahimi
338*22dc650dSSadaf Ebrahimi if (i >= size - 1) goto END_QSCAN; /* Can happen for , */
339*22dc650dSSadaf Ebrahimi
340*22dc650dSSadaf Ebrahimi /* Ignore leading spaces */
341*22dc650dSSadaf Ebrahimi
342*22dc650dSSadaf Ebrahimi while (wdata[i+1] == ' ' || wdata[i+1] == '\t')
343*22dc650dSSadaf Ebrahimi {
344*22dc650dSSadaf Ebrahimi i++;
345*22dc650dSSadaf Ebrahimi if (i >= size - 1) goto END_QSCAN;
346*22dc650dSSadaf Ebrahimi }
347*22dc650dSSadaf Ebrahimi
348*22dc650dSSadaf Ebrahimi /* Scan for a number ending in brace or comma in the first iteration,
349*22dc650dSSadaf Ebrahimi optionally preceded by space. */
350*22dc650dSSadaf Ebrahimi
351*22dc650dSSadaf Ebrahimi for (j = i + 1; j < size && j < i + 7; j++)
352*22dc650dSSadaf Ebrahimi {
353*22dc650dSSadaf Ebrahimi if (wdata[j] == ' ' || wdata[j] == '\t')
354*22dc650dSSadaf Ebrahimi {
355*22dc650dSSadaf Ebrahimi j++;
356*22dc650dSSadaf Ebrahimi while (j < size && (wdata[j] == ' ' || wdata[j] == '\t')) j++;
357*22dc650dSSadaf Ebrahimi if (j >= size) goto OUTERLOOP;
358*22dc650dSSadaf Ebrahimi if (wdata[j] != '}' && wdata[j] != ',') goto OUTERLOOP;
359*22dc650dSSadaf Ebrahimi }
360*22dc650dSSadaf Ebrahimi if (wdata[j] == '}' || (ii == 0 && wdata[j] == ',')) break;
361*22dc650dSSadaf Ebrahimi if (wdata[j] < '0' || wdata[j] > '9')
362*22dc650dSSadaf Ebrahimi {
363*22dc650dSSadaf Ebrahimi j--; /* Ensure this character is checked next. The */
364*22dc650dSSadaf Ebrahimi goto OUTERLOOP; /* string might be (e.g.) "){9){234}" */
365*22dc650dSSadaf Ebrahimi }
366*22dc650dSSadaf Ebrahimi q = q * 10 + wdata[j] - '0';
367*22dc650dSSadaf Ebrahimi }
368*22dc650dSSadaf Ebrahimi
369*22dc650dSSadaf Ebrahimi if (j >= size) goto END_QSCAN; /* End of data */
370*22dc650dSSadaf Ebrahimi
371*22dc650dSSadaf Ebrahimi /* Hit ',' or '}' or read 6 digits. Six digits is a number > 65536 which is
372*22dc650dSSadaf Ebrahimi the maximum quantifier. Leave such numbers alone. */
373*22dc650dSSadaf Ebrahimi
374*22dc650dSSadaf Ebrahimi if (j >= i + 7 || q > 65535) goto OUTERLOOP;
375*22dc650dSSadaf Ebrahimi
376*22dc650dSSadaf Ebrahimi /* Limit the quantifier size to 10 */
377*22dc650dSSadaf Ebrahimi
378*22dc650dSSadaf Ebrahimi if (q > 10)
379*22dc650dSSadaf Ebrahimi {
380*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
381*22dc650dSSadaf Ebrahimi printf("Reduced quantifier value %d to 10.\n", q);
382*22dc650dSSadaf Ebrahimi #endif
383*22dc650dSSadaf Ebrahimi for (size_t k = i + 1; k < j; k++) wdata[k] = '0';
384*22dc650dSSadaf Ebrahimi wdata[j - 2] = '1';
385*22dc650dSSadaf Ebrahimi }
386*22dc650dSSadaf Ebrahimi
387*22dc650dSSadaf Ebrahimi /* Advance to end of number and break if reached closing brace (continue
388*22dc650dSSadaf Ebrahimi after comma, which is only valid in the first time round this loop). */
389*22dc650dSSadaf Ebrahimi
390*22dc650dSSadaf Ebrahimi i = j;
391*22dc650dSSadaf Ebrahimi if (wdata[i] == '}') break;
392*22dc650dSSadaf Ebrahimi }
393*22dc650dSSadaf Ebrahimi
394*22dc650dSSadaf Ebrahimi /* Continue along the data string */
395*22dc650dSSadaf Ebrahimi
396*22dc650dSSadaf Ebrahimi OUTERLOOP:
397*22dc650dSSadaf Ebrahimi i = j;
398*22dc650dSSadaf Ebrahimi continue;
399*22dc650dSSadaf Ebrahimi }
400*22dc650dSSadaf Ebrahimi }
401*22dc650dSSadaf Ebrahimi END_QSCAN:
402*22dc650dSSadaf Ebrahimi
403*22dc650dSSadaf Ebrahimi /* Limiting the length of the subject for matching stops fruitless searches
404*22dc650dSSadaf Ebrahimi in large trees taking too much time. */
405*22dc650dSSadaf Ebrahimi
406*22dc650dSSadaf Ebrahimi match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
407*22dc650dSSadaf Ebrahimi
408*22dc650dSSadaf Ebrahimi /* Create a compile context, and set a limit on the size of the compiled
409*22dc650dSSadaf Ebrahimi pattern. This stops the fuzzer using vast amounts of memory. */
410*22dc650dSSadaf Ebrahimi
411*22dc650dSSadaf Ebrahimi compile_context = pcre2_compile_context_create(NULL);
412*22dc650dSSadaf Ebrahimi if (compile_context == NULL)
413*22dc650dSSadaf Ebrahimi {
414*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
415*22dc650dSSadaf Ebrahimi fprintf(stderr, "** Failed to create compile context block\n");
416*22dc650dSSadaf Ebrahimi #endif
417*22dc650dSSadaf Ebrahimi abort();
418*22dc650dSSadaf Ebrahimi }
419*22dc650dSSadaf Ebrahimi pcre2_set_max_pattern_compiled_length(compile_context, 10*1024*1024);
420*22dc650dSSadaf Ebrahimi
421*22dc650dSSadaf Ebrahimi /* Ensure that all undefined option bits are zero (waste of time trying them)
422*22dc650dSSadaf Ebrahimi and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
423*22dc650dSSadaf Ebrahimi input is valid UTF. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is
424*22dc650dSSadaf Ebrahimi no reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set
425*22dc650dSSadaf Ebrahimi because \C in random patterns is highly likely to cause a crash. */
426*22dc650dSSadaf Ebrahimi
427*22dc650dSSadaf Ebrahimi compile_options = ((random_options >> 32) & ALLOWED_COMPILE_OPTIONS) |
428*22dc650dSSadaf Ebrahimi PCRE2_NEVER_BACKSLASH_C;
429*22dc650dSSadaf Ebrahimi match_options = (((uint32_t)random_options) & ALLOWED_MATCH_OPTIONS) |
430*22dc650dSSadaf Ebrahimi BASE_MATCH_OPTIONS;
431*22dc650dSSadaf Ebrahimi
432*22dc650dSSadaf Ebrahimi /* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
433*22dc650dSSadaf Ebrahimi allowed together and just give an immediate error return. */
434*22dc650dSSadaf Ebrahimi
435*22dc650dSSadaf Ebrahimi if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0)
436*22dc650dSSadaf Ebrahimi match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT);
437*22dc650dSSadaf Ebrahimi
438*22dc650dSSadaf Ebrahimi /* Do the compile with and without the options, and after a successful compile,
439*22dc650dSSadaf Ebrahimi likewise do the match with and without the options. */
440*22dc650dSSadaf Ebrahimi
441*22dc650dSSadaf Ebrahimi for (int i = 0; i < 2; i++)
442*22dc650dSSadaf Ebrahimi {
443*22dc650dSSadaf Ebrahimi uint32_t callout_count;
444*22dc650dSSadaf Ebrahimi int errorcode;
445*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT
446*22dc650dSSadaf Ebrahimi int errorcode_jit;
447*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_DIFF_FUZZ
448*22dc650dSSadaf Ebrahimi int matches = 0;
449*22dc650dSSadaf Ebrahimi int matches_jit = 0;
450*22dc650dSSadaf Ebrahimi #endif
451*22dc650dSSadaf Ebrahimi #endif
452*22dc650dSSadaf Ebrahimi PCRE2_SIZE erroroffset;
453*22dc650dSSadaf Ebrahimi pcre2_code *code;
454*22dc650dSSadaf Ebrahimi
455*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
456*22dc650dSSadaf Ebrahimi printf("\n");
457*22dc650dSSadaf Ebrahimi print_compile_options(stdout, compile_options);
458*22dc650dSSadaf Ebrahimi #endif
459*22dc650dSSadaf Ebrahimi
460*22dc650dSSadaf Ebrahimi code = pcre2_compile((PCRE2_SPTR)wdata, (PCRE2_SIZE)size, compile_options,
461*22dc650dSSadaf Ebrahimi &errorcode, &erroroffset, compile_context);
462*22dc650dSSadaf Ebrahimi
463*22dc650dSSadaf Ebrahimi /* Compilation succeeded */
464*22dc650dSSadaf Ebrahimi
465*22dc650dSSadaf Ebrahimi if (code != NULL)
466*22dc650dSSadaf Ebrahimi {
467*22dc650dSSadaf Ebrahimi int j;
468*22dc650dSSadaf Ebrahimi uint32_t save_match_options = match_options;
469*22dc650dSSadaf Ebrahimi
470*22dc650dSSadaf Ebrahimi /* Call JIT compile only if the compiled pattern is not too big. */
471*22dc650dSSadaf Ebrahimi
472*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT
473*22dc650dSSadaf Ebrahimi int jit_ret = -1;
474*22dc650dSSadaf Ebrahimi if (((struct pcre2_real_code *)code)->blocksize <= JIT_SIZE_LIMIT)
475*22dc650dSSadaf Ebrahimi {
476*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
477*22dc650dSSadaf Ebrahimi printf("Compile succeeded; calling JIT compile\n");
478*22dc650dSSadaf Ebrahimi #endif
479*22dc650dSSadaf Ebrahimi jit_ret = pcre2_jit_compile(code, PCRE2_JIT_COMPLETE);
480*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
481*22dc650dSSadaf Ebrahimi if (jit_ret < 0) printf("JIT compile error %d\n", jit_ret);
482*22dc650dSSadaf Ebrahimi #endif
483*22dc650dSSadaf Ebrahimi }
484*22dc650dSSadaf Ebrahimi else
485*22dc650dSSadaf Ebrahimi {
486*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
487*22dc650dSSadaf Ebrahimi printf("Not calling JIT: compiled pattern is too long "
488*22dc650dSSadaf Ebrahimi "(%ld bytes; limit=%d)\n",
489*22dc650dSSadaf Ebrahimi ((struct pcre2_real_code *)code)->blocksize, JIT_SIZE_LIMIT);
490*22dc650dSSadaf Ebrahimi #endif
491*22dc650dSSadaf Ebrahimi }
492*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_JIT */
493*22dc650dSSadaf Ebrahimi
494*22dc650dSSadaf Ebrahimi /* Create match data and context blocks only when we first need them. Set
495*22dc650dSSadaf Ebrahimi low match and depth limits to avoid wasting too much searching large
496*22dc650dSSadaf Ebrahimi pattern trees. Almost all matches are going to fail. */
497*22dc650dSSadaf Ebrahimi
498*22dc650dSSadaf Ebrahimi if (match_data == NULL)
499*22dc650dSSadaf Ebrahimi {
500*22dc650dSSadaf Ebrahimi match_data = pcre2_match_data_create(32, NULL);
501*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT
502*22dc650dSSadaf Ebrahimi match_data_jit = pcre2_match_data_create(32, NULL);
503*22dc650dSSadaf Ebrahimi if (match_data == NULL || match_data_jit == NULL)
504*22dc650dSSadaf Ebrahimi #else
505*22dc650dSSadaf Ebrahimi if (match_data == NULL)
506*22dc650dSSadaf Ebrahimi #endif
507*22dc650dSSadaf Ebrahimi {
508*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
509*22dc650dSSadaf Ebrahimi fprintf(stderr, "** Failed to create match data block\n");
510*22dc650dSSadaf Ebrahimi #endif
511*22dc650dSSadaf Ebrahimi abort();
512*22dc650dSSadaf Ebrahimi }
513*22dc650dSSadaf Ebrahimi }
514*22dc650dSSadaf Ebrahimi
515*22dc650dSSadaf Ebrahimi if (match_context == NULL)
516*22dc650dSSadaf Ebrahimi {
517*22dc650dSSadaf Ebrahimi match_context = pcre2_match_context_create(NULL);
518*22dc650dSSadaf Ebrahimi if (match_context == NULL)
519*22dc650dSSadaf Ebrahimi {
520*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
521*22dc650dSSadaf Ebrahimi fprintf(stderr, "** Failed to create match context block\n");
522*22dc650dSSadaf Ebrahimi #endif
523*22dc650dSSadaf Ebrahimi abort();
524*22dc650dSSadaf Ebrahimi }
525*22dc650dSSadaf Ebrahimi (void)pcre2_set_match_limit(match_context, 100);
526*22dc650dSSadaf Ebrahimi (void)pcre2_set_depth_limit(match_context, 100);
527*22dc650dSSadaf Ebrahimi (void)pcre2_set_callout(match_context, callout_function, &callout_count);
528*22dc650dSSadaf Ebrahimi }
529*22dc650dSSadaf Ebrahimi
530*22dc650dSSadaf Ebrahimi /* Match twice, with and without options. */
531*22dc650dSSadaf Ebrahimi
532*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
533*22dc650dSSadaf Ebrahimi printf("\n");
534*22dc650dSSadaf Ebrahimi #endif
535*22dc650dSSadaf Ebrahimi for (j = 0; j < 2; j++)
536*22dc650dSSadaf Ebrahimi {
537*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
538*22dc650dSSadaf Ebrahimi print_match_options(stdout, match_options);
539*22dc650dSSadaf Ebrahimi #endif
540*22dc650dSSadaf Ebrahimi
541*22dc650dSSadaf Ebrahimi callout_count = 0;
542*22dc650dSSadaf Ebrahimi errorcode = pcre2_match(code, (PCRE2_SPTR)wdata, (PCRE2_SIZE)match_size, 0,
543*22dc650dSSadaf Ebrahimi match_options, match_data, match_context);
544*22dc650dSSadaf Ebrahimi
545*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
546*22dc650dSSadaf Ebrahimi if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
547*22dc650dSSadaf Ebrahimi print_error(stdout, errorcode, "Match failed: error %d: ", errorcode);
548*22dc650dSSadaf Ebrahimi #endif
549*22dc650dSSadaf Ebrahimi
550*22dc650dSSadaf Ebrahimi /* If JIT is enabled, do a JIT match and, if appropriately compiled, compare
551*22dc650dSSadaf Ebrahimi with the interpreter. */
552*22dc650dSSadaf Ebrahimi
553*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT
554*22dc650dSSadaf Ebrahimi if (jit_ret >= 0)
555*22dc650dSSadaf Ebrahimi {
556*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
557*22dc650dSSadaf Ebrahimi printf("Matching with JIT\n");
558*22dc650dSSadaf Ebrahimi #endif
559*22dc650dSSadaf Ebrahimi callout_count = 0;
560*22dc650dSSadaf Ebrahimi errorcode_jit = pcre2_match(code, (PCRE2_SPTR)wdata, (PCRE2_SIZE)match_size, 0,
561*22dc650dSSadaf Ebrahimi match_options & ~PCRE2_NO_JIT, match_data_jit, match_context);
562*22dc650dSSadaf Ebrahimi
563*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
564*22dc650dSSadaf Ebrahimi if (errorcode_jit >= 0)
565*22dc650dSSadaf Ebrahimi printf("Match returned %d\n", errorcode_jit);
566*22dc650dSSadaf Ebrahimi else
567*22dc650dSSadaf Ebrahimi print_error(stdout, errorcode_jit, "JIT match failed: error %d: ",
568*22dc650dSSadaf Ebrahimi errorcode_jit);
569*22dc650dSSadaf Ebrahimi #else
570*22dc650dSSadaf Ebrahimi (void)errorcode_jit; /* Avoid compiler warning */
571*22dc650dSSadaf Ebrahimi #endif /* STANDALONE */
572*22dc650dSSadaf Ebrahimi
573*22dc650dSSadaf Ebrahimi /* With differential matching enabled, compare with interpreter. */
574*22dc650dSSadaf Ebrahimi
575*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_DIFF_FUZZ
576*22dc650dSSadaf Ebrahimi matches = errorcode;
577*22dc650dSSadaf Ebrahimi matches_jit = errorcode_jit;
578*22dc650dSSadaf Ebrahimi
579*22dc650dSSadaf Ebrahimi if (errorcode_jit != errorcode)
580*22dc650dSSadaf Ebrahimi {
581*22dc650dSSadaf Ebrahimi if (!(errorcode < 0 && errorcode_jit < 0) &&
582*22dc650dSSadaf Ebrahimi errorcode != PCRE2_ERROR_MATCHLIMIT && errorcode != PCRE2_ERROR_CALLOUT &&
583*22dc650dSSadaf Ebrahimi errorcode_jit != PCRE2_ERROR_MATCHLIMIT && errorcode_jit != PCRE2_ERROR_JIT_STACKLIMIT && errorcode_jit != PCRE2_ERROR_CALLOUT)
584*22dc650dSSadaf Ebrahimi {
585*22dc650dSSadaf Ebrahimi describe_failure("match errorcode comparison", wdata, size, compile_options, match_options, errorcode, errorcode_jit, matches, matches_jit, match_data, match_data_jit);
586*22dc650dSSadaf Ebrahimi }
587*22dc650dSSadaf Ebrahimi }
588*22dc650dSSadaf Ebrahimi else
589*22dc650dSSadaf Ebrahimi {
590*22dc650dSSadaf Ebrahimi for (int index = 0; index < errorcode; index++)
591*22dc650dSSadaf Ebrahimi {
592*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *bufferptr, *bufferptr_jit;
593*22dc650dSSadaf Ebrahimi PCRE2_SIZE bufflen, bufflen_jit;
594*22dc650dSSadaf Ebrahimi
595*22dc650dSSadaf Ebrahimi bufferptr = bufferptr_jit = NULL;
596*22dc650dSSadaf Ebrahimi bufflen = bufflen_jit = 0;
597*22dc650dSSadaf Ebrahimi
598*22dc650dSSadaf Ebrahimi errorcode = pcre2_substring_get_bynumber(match_data, (uint32_t) index, &bufferptr, &bufflen);
599*22dc650dSSadaf Ebrahimi errorcode_jit = pcre2_substring_get_bynumber(match_data_jit, (uint32_t) index, &bufferptr_jit, &bufflen_jit);
600*22dc650dSSadaf Ebrahimi
601*22dc650dSSadaf Ebrahimi if (errorcode != errorcode_jit)
602*22dc650dSSadaf Ebrahimi {
603*22dc650dSSadaf Ebrahimi describe_failure("match entry errorcode comparison", wdata, size,
604*22dc650dSSadaf Ebrahimi compile_options, match_options, errorcode, errorcode_jit,
605*22dc650dSSadaf Ebrahimi matches, matches_jit, match_data, match_data_jit);
606*22dc650dSSadaf Ebrahimi }
607*22dc650dSSadaf Ebrahimi
608*22dc650dSSadaf Ebrahimi if (errorcode >= 0)
609*22dc650dSSadaf Ebrahimi {
610*22dc650dSSadaf Ebrahimi if (bufflen != bufflen_jit)
611*22dc650dSSadaf Ebrahimi {
612*22dc650dSSadaf Ebrahimi describe_failure("match entry length comparison", wdata, size,
613*22dc650dSSadaf Ebrahimi compile_options, match_options, errorcode, errorcode_jit,
614*22dc650dSSadaf Ebrahimi matches, matches_jit, match_data, match_data_jit);
615*22dc650dSSadaf Ebrahimi }
616*22dc650dSSadaf Ebrahimi
617*22dc650dSSadaf Ebrahimi if (memcmp(bufferptr, bufferptr_jit, bufflen) != 0)
618*22dc650dSSadaf Ebrahimi {
619*22dc650dSSadaf Ebrahimi describe_failure("match entry content comparison", wdata, size,
620*22dc650dSSadaf Ebrahimi compile_options, match_options, errorcode, errorcode_jit,
621*22dc650dSSadaf Ebrahimi matches, matches_jit, match_data, match_data_jit);
622*22dc650dSSadaf Ebrahimi }
623*22dc650dSSadaf Ebrahimi }
624*22dc650dSSadaf Ebrahimi
625*22dc650dSSadaf Ebrahimi pcre2_substring_free(bufferptr);
626*22dc650dSSadaf Ebrahimi pcre2_substring_free(bufferptr_jit);
627*22dc650dSSadaf Ebrahimi }
628*22dc650dSSadaf Ebrahimi }
629*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_DIFF_FUZZ */
630*22dc650dSSadaf Ebrahimi }
631*22dc650dSSadaf Ebrahimi #endif /* SUPPORT_JIT */
632*22dc650dSSadaf Ebrahimi
633*22dc650dSSadaf Ebrahimi if (match_options == BASE_MATCH_OPTIONS) break; /* Don't do same twice */
634*22dc650dSSadaf Ebrahimi match_options = BASE_MATCH_OPTIONS; /* For second time */
635*22dc650dSSadaf Ebrahimi }
636*22dc650dSSadaf Ebrahimi
637*22dc650dSSadaf Ebrahimi /* Match with DFA twice, with and without options, but remove options that
638*22dc650dSSadaf Ebrahimi are not allowed with DFA. */
639*22dc650dSSadaf Ebrahimi
640*22dc650dSSadaf Ebrahimi match_options = save_match_options & ~BASE_MATCH_OPTIONS;
641*22dc650dSSadaf Ebrahimi
642*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
643*22dc650dSSadaf Ebrahimi printf("\n");
644*22dc650dSSadaf Ebrahimi #endif
645*22dc650dSSadaf Ebrahimi
646*22dc650dSSadaf Ebrahimi for (j = 0; j < 2; j++)
647*22dc650dSSadaf Ebrahimi {
648*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
649*22dc650dSSadaf Ebrahimi printf("DFA match options %.8x =", match_options);
650*22dc650dSSadaf Ebrahimi printf("%s%s%s%s%s%s%s%s%s\n",
651*22dc650dSSadaf Ebrahimi ((match_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
652*22dc650dSSadaf Ebrahimi ((match_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
653*22dc650dSSadaf Ebrahimi ((match_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
654*22dc650dSSadaf Ebrahimi ((match_options & PCRE2_NOTBOL) != 0)? " notbol" : "",
655*22dc650dSSadaf Ebrahimi ((match_options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
656*22dc650dSSadaf Ebrahimi ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
657*22dc650dSSadaf Ebrahimi ((match_options & PCRE2_NOTEOL) != 0)? " noteol" : "",
658*22dc650dSSadaf Ebrahimi ((match_options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
659*22dc650dSSadaf Ebrahimi ((match_options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
660*22dc650dSSadaf Ebrahimi #endif
661*22dc650dSSadaf Ebrahimi
662*22dc650dSSadaf Ebrahimi callout_count = 0;
663*22dc650dSSadaf Ebrahimi errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)wdata,
664*22dc650dSSadaf Ebrahimi (PCRE2_SIZE)match_size, 0, match_options, match_data,
665*22dc650dSSadaf Ebrahimi match_context, dfa_workspace, DFA_WORKSPACE_COUNT);
666*22dc650dSSadaf Ebrahimi
667*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
668*22dc650dSSadaf Ebrahimi if (errorcode >= 0)
669*22dc650dSSadaf Ebrahimi printf("Match returned %d\n", errorcode);
670*22dc650dSSadaf Ebrahimi else
671*22dc650dSSadaf Ebrahimi print_error(stdout, errorcode, "DFA match failed: error %d: ", errorcode);
672*22dc650dSSadaf Ebrahimi #endif
673*22dc650dSSadaf Ebrahimi
674*22dc650dSSadaf Ebrahimi if (match_options == 0) break; /* No point doing same twice */
675*22dc650dSSadaf Ebrahimi match_options = 0; /* For second time */
676*22dc650dSSadaf Ebrahimi }
677*22dc650dSSadaf Ebrahimi
678*22dc650dSSadaf Ebrahimi match_options = save_match_options; /* Reset for the second compile */
679*22dc650dSSadaf Ebrahimi pcre2_code_free(code);
680*22dc650dSSadaf Ebrahimi }
681*22dc650dSSadaf Ebrahimi
682*22dc650dSSadaf Ebrahimi /* Compilation failed */
683*22dc650dSSadaf Ebrahimi
684*22dc650dSSadaf Ebrahimi else
685*22dc650dSSadaf Ebrahimi {
686*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
687*22dc650dSSadaf Ebrahimi print_error(stdout, errorcode, "Error %d at offset %lu: ", errorcode,
688*22dc650dSSadaf Ebrahimi erroroffset);
689*22dc650dSSadaf Ebrahimi #else
690*22dc650dSSadaf Ebrahimi if (errorcode == PCRE2_ERROR_INTERNAL) abort();
691*22dc650dSSadaf Ebrahimi #endif
692*22dc650dSSadaf Ebrahimi }
693*22dc650dSSadaf Ebrahimi
694*22dc650dSSadaf Ebrahimi if (compile_options == PCRE2_NEVER_BACKSLASH_C) break; /* Avoid same twice */
695*22dc650dSSadaf Ebrahimi compile_options = PCRE2_NEVER_BACKSLASH_C; /* For second time */
696*22dc650dSSadaf Ebrahimi }
697*22dc650dSSadaf Ebrahimi
698*22dc650dSSadaf Ebrahimi /* Tidy up before exiting */
699*22dc650dSSadaf Ebrahimi
700*22dc650dSSadaf Ebrahimi if (match_data != NULL) pcre2_match_data_free(match_data);
701*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT
702*22dc650dSSadaf Ebrahimi if (match_data_jit != NULL) pcre2_match_data_free(match_data_jit);
703*22dc650dSSadaf Ebrahimi free(newwdata);
704*22dc650dSSadaf Ebrahimi #endif
705*22dc650dSSadaf Ebrahimi if (match_context != NULL) pcre2_match_context_free(match_context);
706*22dc650dSSadaf Ebrahimi if (compile_context != NULL) pcre2_compile_context_free(compile_context);
707*22dc650dSSadaf Ebrahimi return 0;
708*22dc650dSSadaf Ebrahimi }
709*22dc650dSSadaf Ebrahimi
710*22dc650dSSadaf Ebrahimi
711*22dc650dSSadaf Ebrahimi /* Optional main program. */
712*22dc650dSSadaf Ebrahimi
713*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
main(int argc,char ** argv)714*22dc650dSSadaf Ebrahimi int main(int argc, char **argv)
715*22dc650dSSadaf Ebrahimi {
716*22dc650dSSadaf Ebrahimi LLVMFuzzerInitialize(&argc, &argv);
717*22dc650dSSadaf Ebrahimi
718*22dc650dSSadaf Ebrahimi if (argc < 2)
719*22dc650dSSadaf Ebrahimi {
720*22dc650dSSadaf Ebrahimi printf("** No arguments given\n");
721*22dc650dSSadaf Ebrahimi return 0;
722*22dc650dSSadaf Ebrahimi }
723*22dc650dSSadaf Ebrahimi
724*22dc650dSSadaf Ebrahimi for (int i = 1; i < argc; i++)
725*22dc650dSSadaf Ebrahimi {
726*22dc650dSSadaf Ebrahimi size_t filelen;
727*22dc650dSSadaf Ebrahimi size_t readsize;
728*22dc650dSSadaf Ebrahimi unsigned char *buffer;
729*22dc650dSSadaf Ebrahimi FILE *f;
730*22dc650dSSadaf Ebrahimi
731*22dc650dSSadaf Ebrahimi /* Handle a literal string. Copy to an exact size buffer so that checks for
732*22dc650dSSadaf Ebrahimi overrunning work. */
733*22dc650dSSadaf Ebrahimi
734*22dc650dSSadaf Ebrahimi if (argv[i][0] == '=')
735*22dc650dSSadaf Ebrahimi {
736*22dc650dSSadaf Ebrahimi readsize = strlen(argv[i]) - 1;
737*22dc650dSSadaf Ebrahimi printf("------ <Literal> ------\n");
738*22dc650dSSadaf Ebrahimi printf("Length = %lu\n", readsize);
739*22dc650dSSadaf Ebrahimi printf("%.*s\n", (int)readsize, argv[i]+1);
740*22dc650dSSadaf Ebrahimi buffer = (unsigned char *)malloc(readsize);
741*22dc650dSSadaf Ebrahimi if (buffer == NULL)
742*22dc650dSSadaf Ebrahimi printf("** Failed to allocate %lu bytes of memory\n", readsize);
743*22dc650dSSadaf Ebrahimi else
744*22dc650dSSadaf Ebrahimi {
745*22dc650dSSadaf Ebrahimi memcpy(buffer, argv[i]+1, readsize);
746*22dc650dSSadaf Ebrahimi LLVMFuzzerTestOneInput(buffer, readsize);
747*22dc650dSSadaf Ebrahimi free(buffer);
748*22dc650dSSadaf Ebrahimi }
749*22dc650dSSadaf Ebrahimi continue;
750*22dc650dSSadaf Ebrahimi }
751*22dc650dSSadaf Ebrahimi
752*22dc650dSSadaf Ebrahimi /* Handle a string given in a file */
753*22dc650dSSadaf Ebrahimi
754*22dc650dSSadaf Ebrahimi f = fopen(argv[i], "rb");
755*22dc650dSSadaf Ebrahimi if (f == NULL)
756*22dc650dSSadaf Ebrahimi {
757*22dc650dSSadaf Ebrahimi printf("** Failed to open %s: %s\n", argv[i], strerror(errno));
758*22dc650dSSadaf Ebrahimi continue;
759*22dc650dSSadaf Ebrahimi }
760*22dc650dSSadaf Ebrahimi
761*22dc650dSSadaf Ebrahimi printf("------ %s ------\n", argv[i]);
762*22dc650dSSadaf Ebrahimi
763*22dc650dSSadaf Ebrahimi fseek(f, 0, SEEK_END);
764*22dc650dSSadaf Ebrahimi filelen = ftell(f);
765*22dc650dSSadaf Ebrahimi fseek(f, 0, SEEK_SET);
766*22dc650dSSadaf Ebrahimi
767*22dc650dSSadaf Ebrahimi buffer = (unsigned char *)malloc(filelen);
768*22dc650dSSadaf Ebrahimi if (buffer == NULL)
769*22dc650dSSadaf Ebrahimi {
770*22dc650dSSadaf Ebrahimi printf("** Failed to allocate %lu bytes of memory\n", filelen);
771*22dc650dSSadaf Ebrahimi fclose(f);
772*22dc650dSSadaf Ebrahimi continue;
773*22dc650dSSadaf Ebrahimi }
774*22dc650dSSadaf Ebrahimi
775*22dc650dSSadaf Ebrahimi readsize = fread(buffer, 1, filelen, f);
776*22dc650dSSadaf Ebrahimi fclose(f);
777*22dc650dSSadaf Ebrahimi
778*22dc650dSSadaf Ebrahimi if (readsize != filelen)
779*22dc650dSSadaf Ebrahimi printf("** File size is %lu but fread() returned %lu\n", filelen, readsize);
780*22dc650dSSadaf Ebrahimi else
781*22dc650dSSadaf Ebrahimi {
782*22dc650dSSadaf Ebrahimi printf("Length = %lu\n", filelen);
783*22dc650dSSadaf Ebrahimi LLVMFuzzerTestOneInput(buffer, filelen);
784*22dc650dSSadaf Ebrahimi }
785*22dc650dSSadaf Ebrahimi free(buffer);
786*22dc650dSSadaf Ebrahimi }
787*22dc650dSSadaf Ebrahimi
788*22dc650dSSadaf Ebrahimi return 0;
789*22dc650dSSadaf Ebrahimi }
790*22dc650dSSadaf Ebrahimi #endif /* STANDALONE */
791*22dc650dSSadaf Ebrahimi
792*22dc650dSSadaf Ebrahimi /* End */
793