xref: /aosp_15_r20/external/pcre/src/pcre2_fuzzsupport.c (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1*22dc650dSSadaf Ebrahimi /***************************************************************************
2*22dc650dSSadaf Ebrahimi Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it
3*22dc650dSSadaf Ebrahimi tries to compile and match it, deriving options from the string itself. If
4*22dc650dSSadaf Ebrahimi STANDALONE is defined, a main program that calls the driver with the contents
5*22dc650dSSadaf Ebrahimi of specified files is compiled, and commentary on what is happening is output.
6*22dc650dSSadaf Ebrahimi If an argument starts with '=' the rest of it it is taken as a literal string
7*22dc650dSSadaf Ebrahimi rather than a file name. This allows easy testing of short strings.
8*22dc650dSSadaf Ebrahimi 
9*22dc650dSSadaf Ebrahimi Written by Philip Hazel, October 2016
10*22dc650dSSadaf Ebrahimi Updated February 2024 (Addison Crump added 16-bit/32-bit and JIT support)
11*22dc650dSSadaf Ebrahimi Further updates March/April/May 2024 by PH
12*22dc650dSSadaf Ebrahimi ***************************************************************************/
13*22dc650dSSadaf Ebrahimi 
14*22dc650dSSadaf Ebrahimi #include <errno.h>
15*22dc650dSSadaf Ebrahimi #include <stdarg.h>
16*22dc650dSSadaf Ebrahimi #include <stdio.h>
17*22dc650dSSadaf Ebrahimi #include <stdlib.h>
18*22dc650dSSadaf Ebrahimi #include <string.h>
19*22dc650dSSadaf Ebrahimi #include <unistd.h>
20*22dc650dSSadaf Ebrahimi 
21*22dc650dSSadaf Ebrahimi /* stack size adjustment */
22*22dc650dSSadaf Ebrahimi #include <sys/time.h>
23*22dc650dSSadaf Ebrahimi #include <sys/resource.h>
24*22dc650dSSadaf Ebrahimi 
25*22dc650dSSadaf Ebrahimi #define STACK_SIZE_MB 256
26*22dc650dSSadaf Ebrahimi #define JIT_SIZE_LIMIT (200 * 1024)
27*22dc650dSSadaf Ebrahimi 
28*22dc650dSSadaf Ebrahimi #ifndef PCRE2_CODE_UNIT_WIDTH
29*22dc650dSSadaf Ebrahimi #define PCRE2_CODE_UNIT_WIDTH 8
30*22dc650dSSadaf Ebrahimi #endif
31*22dc650dSSadaf Ebrahimi 
32*22dc650dSSadaf Ebrahimi #include "config.h"
33*22dc650dSSadaf Ebrahimi #include "pcre2.h"
34*22dc650dSSadaf Ebrahimi #include "pcre2_internal.h"
35*22dc650dSSadaf Ebrahimi 
36*22dc650dSSadaf Ebrahimi #define MAX_MATCH_SIZE 1000
37*22dc650dSSadaf Ebrahimi 
38*22dc650dSSadaf Ebrahimi #define DFA_WORKSPACE_COUNT 100
39*22dc650dSSadaf Ebrahimi 
40*22dc650dSSadaf Ebrahimi /* When adding new compile or match options, remember to update the functions
41*22dc650dSSadaf Ebrahimi below that output them. */
42*22dc650dSSadaf Ebrahimi 
43*22dc650dSSadaf Ebrahimi #define ALLOWED_COMPILE_OPTIONS \
44*22dc650dSSadaf Ebrahimi   (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
45*22dc650dSSadaf Ebrahimi    PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
46*22dc650dSSadaf Ebrahimi    PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED| \
47*22dc650dSSadaf Ebrahimi    PCRE2_EXTENDED_MORE|PCRE2_FIRSTLINE| \
48*22dc650dSSadaf Ebrahimi    PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
49*22dc650dSSadaf Ebrahimi    PCRE2_NO_AUTO_CAPTURE| \
50*22dc650dSSadaf Ebrahimi    PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
51*22dc650dSSadaf Ebrahimi    PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
52*22dc650dSSadaf Ebrahimi    PCRE2_UTF)
53*22dc650dSSadaf Ebrahimi 
54*22dc650dSSadaf Ebrahimi #define ALLOWED_MATCH_OPTIONS \
55*22dc650dSSadaf Ebrahimi   (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
56*22dc650dSSadaf Ebrahimi    PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
57*22dc650dSSadaf Ebrahimi    PCRE2_PARTIAL_SOFT)
58*22dc650dSSadaf Ebrahimi 
59*22dc650dSSadaf Ebrahimi #define BASE_MATCH_OPTIONS \
60*22dc650dSSadaf Ebrahimi   (PCRE2_NO_JIT|PCRE2_DISABLE_RECURSELOOP_CHECK)
61*22dc650dSSadaf Ebrahimi 
62*22dc650dSSadaf Ebrahimi 
63*22dc650dSSadaf Ebrahimi #if defined(SUPPORT_DIFF_FUZZ) || defined(STANDALONE)
print_compile_options(FILE * stream,uint32_t compile_options)64*22dc650dSSadaf Ebrahimi static void print_compile_options(FILE *stream, uint32_t compile_options)
65*22dc650dSSadaf Ebrahimi {
66*22dc650dSSadaf Ebrahimi fprintf(stream, "Compile options %s%.8x =",
67*22dc650dSSadaf Ebrahimi   (compile_options == PCRE2_NEVER_BACKSLASH_C)? "(base) " : "",
68*22dc650dSSadaf Ebrahimi   compile_options);
69*22dc650dSSadaf Ebrahimi 
70*22dc650dSSadaf Ebrahimi fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
71*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
72*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
73*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
74*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
75*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
76*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
77*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_CASELESS) != 0)? " caseless" : "",
78*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
79*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_DOTALL) != 0)? " dotall" : "",
80*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
81*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
82*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_EXTENDED) != 0)? " extended" : "",
83*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
84*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
85*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
86*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_MULTILINE) != 0)? " multiline" : "",
87*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
88*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
89*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
90*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
91*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
92*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
93*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
94*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
95*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_UCP) != 0)? " ucp" : "",
96*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
97*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
98*22dc650dSSadaf Ebrahimi   ((compile_options & PCRE2_UTF) != 0)? " utf" : "");
99*22dc650dSSadaf Ebrahimi }
100*22dc650dSSadaf Ebrahimi 
print_match_options(FILE * stream,uint32_t match_options)101*22dc650dSSadaf Ebrahimi static void print_match_options(FILE *stream, uint32_t match_options)
102*22dc650dSSadaf Ebrahimi {
103*22dc650dSSadaf Ebrahimi fprintf(stream, "Match options %s%.8x =",
104*22dc650dSSadaf Ebrahimi   (match_options == BASE_MATCH_OPTIONS)? "(base) " : "", match_options);
105*22dc650dSSadaf Ebrahimi 
106*22dc650dSSadaf Ebrahimi fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s\n",
107*22dc650dSSadaf Ebrahimi   ((match_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
108*22dc650dSSadaf Ebrahimi   ((match_options & PCRE2_DISABLE_RECURSELOOP_CHECK) != 0)? " disable_recurseloop_check" : "",
109*22dc650dSSadaf Ebrahimi   ((match_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
110*22dc650dSSadaf Ebrahimi   ((match_options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
111*22dc650dSSadaf Ebrahimi   ((match_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
112*22dc650dSSadaf Ebrahimi   ((match_options & PCRE2_NOTBOL) != 0)? " notbol" : "",
113*22dc650dSSadaf Ebrahimi   ((match_options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
114*22dc650dSSadaf Ebrahimi   ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
115*22dc650dSSadaf Ebrahimi   ((match_options & PCRE2_NOTEOL) != 0)? " noteol" : "",
116*22dc650dSSadaf Ebrahimi   ((match_options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
117*22dc650dSSadaf Ebrahimi   ((match_options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
118*22dc650dSSadaf Ebrahimi }
119*22dc650dSSadaf Ebrahimi 
120*22dc650dSSadaf Ebrahimi 
121*22dc650dSSadaf Ebrahimi /* This function can print an error message at all code unit widths. */
122*22dc650dSSadaf Ebrahimi 
print_error(FILE * f,int errorcode,const char * text,...)123*22dc650dSSadaf Ebrahimi static void print_error(FILE *f, int errorcode, const char *text, ...)
124*22dc650dSSadaf Ebrahimi {
125*22dc650dSSadaf Ebrahimi PCRE2_UCHAR buffer[256];
126*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *p = buffer;
127*22dc650dSSadaf Ebrahimi va_list ap;
128*22dc650dSSadaf Ebrahimi va_start(ap, text);
129*22dc650dSSadaf Ebrahimi vfprintf(f, text, ap);
130*22dc650dSSadaf Ebrahimi va_end(ap);
131*22dc650dSSadaf Ebrahimi pcre2_get_error_message(errorcode, buffer, 256);
132*22dc650dSSadaf Ebrahimi while (*p != 0) fprintf(f, "%c", *p++);
133*22dc650dSSadaf Ebrahimi printf("\n");
134*22dc650dSSadaf Ebrahimi }
135*22dc650dSSadaf Ebrahimi #endif /* defined(SUPPORT_DIFF_FUZZ || defined(STANDALONE) */
136*22dc650dSSadaf Ebrahimi 
137*22dc650dSSadaf Ebrahimi 
138*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT
139*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_DIFF_FUZZ
dump_matches(FILE * stream,int count,pcre2_match_data * match_data)140*22dc650dSSadaf Ebrahimi static void dump_matches(FILE *stream, int count, pcre2_match_data *match_data)
141*22dc650dSSadaf Ebrahimi {
142*22dc650dSSadaf Ebrahimi int errorcode;
143*22dc650dSSadaf Ebrahimi 
144*22dc650dSSadaf Ebrahimi for (int index = 0; index < count; index++)
145*22dc650dSSadaf Ebrahimi   {
146*22dc650dSSadaf Ebrahimi   PCRE2_UCHAR *bufferptr = NULL;
147*22dc650dSSadaf Ebrahimi   PCRE2_SIZE bufflen = 0;
148*22dc650dSSadaf Ebrahimi 
149*22dc650dSSadaf Ebrahimi   errorcode = pcre2_substring_get_bynumber(match_data, index, &bufferptr,
150*22dc650dSSadaf Ebrahimi     &bufflen);
151*22dc650dSSadaf Ebrahimi 
152*22dc650dSSadaf Ebrahimi   if (errorcode >= 0)
153*22dc650dSSadaf Ebrahimi     {
154*22dc650dSSadaf Ebrahimi     fprintf(stream, "Match %d (hex encoded): ", index);
155*22dc650dSSadaf Ebrahimi     for (PCRE2_SIZE i = 0; i < bufflen; i++)
156*22dc650dSSadaf Ebrahimi       {
157*22dc650dSSadaf Ebrahimi       fprintf(stream, "%02x", bufferptr[i]);
158*22dc650dSSadaf Ebrahimi       }
159*22dc650dSSadaf Ebrahimi     fprintf(stream, "\n");
160*22dc650dSSadaf Ebrahimi     }
161*22dc650dSSadaf Ebrahimi   else
162*22dc650dSSadaf Ebrahimi     {
163*22dc650dSSadaf Ebrahimi     print_error(stream, errorcode, "Match %d failed: ", index);
164*22dc650dSSadaf Ebrahimi     }
165*22dc650dSSadaf Ebrahimi   }
166*22dc650dSSadaf Ebrahimi }
167*22dc650dSSadaf Ebrahimi 
168*22dc650dSSadaf Ebrahimi /* This function describes the current test case being evaluated, then aborts */
169*22dc650dSSadaf Ebrahimi 
describe_failure(const char * task,const unsigned char * data,size_t size,uint32_t compile_options,uint32_t match_options,int errorcode,int errorcode_jit,int matches,int matches_jit,pcre2_match_data * match_data,pcre2_match_data * match_data_jit)170*22dc650dSSadaf Ebrahimi static void describe_failure(
171*22dc650dSSadaf Ebrahimi   const char *task,
172*22dc650dSSadaf Ebrahimi   const unsigned char *data,
173*22dc650dSSadaf Ebrahimi   size_t size,
174*22dc650dSSadaf Ebrahimi   uint32_t compile_options,
175*22dc650dSSadaf Ebrahimi   uint32_t match_options,
176*22dc650dSSadaf Ebrahimi   int errorcode,
177*22dc650dSSadaf Ebrahimi   int errorcode_jit,
178*22dc650dSSadaf Ebrahimi   int matches,
179*22dc650dSSadaf Ebrahimi   int matches_jit,
180*22dc650dSSadaf Ebrahimi   pcre2_match_data *match_data,
181*22dc650dSSadaf Ebrahimi   pcre2_match_data *match_data_jit
182*22dc650dSSadaf Ebrahimi ) {
183*22dc650dSSadaf Ebrahimi 
184*22dc650dSSadaf Ebrahimi fprintf(stderr, "Encountered failure while performing %s; context:\n", task);
185*22dc650dSSadaf Ebrahimi 
186*22dc650dSSadaf Ebrahimi fprintf(stderr, "Pattern/sample string (hex encoded): ");
187*22dc650dSSadaf Ebrahimi for (size_t i = 0; i < size; i++)
188*22dc650dSSadaf Ebrahimi   {
189*22dc650dSSadaf Ebrahimi   fprintf(stderr, "%02x", data[i]);
190*22dc650dSSadaf Ebrahimi   }
191*22dc650dSSadaf Ebrahimi fprintf(stderr, "\n");
192*22dc650dSSadaf Ebrahimi 
193*22dc650dSSadaf Ebrahimi print_compile_options(stderr, compile_options);
194*22dc650dSSadaf Ebrahimi print_match_options(stderr, match_options);
195*22dc650dSSadaf Ebrahimi 
196*22dc650dSSadaf Ebrahimi if (errorcode < 0)
197*22dc650dSSadaf Ebrahimi   {
198*22dc650dSSadaf Ebrahimi   print_error(stderr, errorcode, "Non-JIT'd operation emitted an error: ");
199*22dc650dSSadaf Ebrahimi   }
200*22dc650dSSadaf Ebrahimi 
201*22dc650dSSadaf Ebrahimi if (matches >= 0)
202*22dc650dSSadaf Ebrahimi   {
203*22dc650dSSadaf Ebrahimi   fprintf(stderr, "Non-JIT'd operation did not emit an error.\n");
204*22dc650dSSadaf Ebrahimi   if (match_data != NULL)
205*22dc650dSSadaf Ebrahimi     {
206*22dc650dSSadaf Ebrahimi     fprintf(stderr, "%d matches discovered by non-JIT'd regex:\n", matches);
207*22dc650dSSadaf Ebrahimi     dump_matches(stderr, matches, match_data);
208*22dc650dSSadaf Ebrahimi     fprintf(stderr, "\n");
209*22dc650dSSadaf Ebrahimi     }
210*22dc650dSSadaf Ebrahimi   }
211*22dc650dSSadaf Ebrahimi 
212*22dc650dSSadaf Ebrahimi if (errorcode_jit < 0)
213*22dc650dSSadaf Ebrahimi   {
214*22dc650dSSadaf Ebrahimi   print_error(stderr, errorcode_jit, "JIT'd operation emitted error %d:",
215*22dc650dSSadaf Ebrahimi     errorcode_jit);
216*22dc650dSSadaf Ebrahimi   }
217*22dc650dSSadaf Ebrahimi 
218*22dc650dSSadaf Ebrahimi if (matches_jit >= 0)
219*22dc650dSSadaf Ebrahimi   {
220*22dc650dSSadaf Ebrahimi   fprintf(stderr, "JIT'd operation did not emit an error.\n");
221*22dc650dSSadaf Ebrahimi   if (match_data_jit != NULL)
222*22dc650dSSadaf Ebrahimi     {
223*22dc650dSSadaf Ebrahimi     fprintf(stderr, "%d matches discovered by JIT'd regex:\n", matches_jit);
224*22dc650dSSadaf Ebrahimi     dump_matches(stderr, matches_jit, match_data_jit);
225*22dc650dSSadaf Ebrahimi     fprintf(stderr, "\n");
226*22dc650dSSadaf Ebrahimi     }
227*22dc650dSSadaf Ebrahimi   }
228*22dc650dSSadaf Ebrahimi 
229*22dc650dSSadaf Ebrahimi abort();
230*22dc650dSSadaf Ebrahimi }
231*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_DIFF_FUZZ */
232*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_JIT */
233*22dc650dSSadaf Ebrahimi 
234*22dc650dSSadaf Ebrahimi /* This is the callout function. Its only purpose is to halt matching if there
235*22dc650dSSadaf Ebrahimi are more than 100 callouts, as one way of stopping too much time being spent on
236*22dc650dSSadaf Ebrahimi fruitless matches. The callout data is a pointer to the counter. */
237*22dc650dSSadaf Ebrahimi 
callout_function(pcre2_callout_block * cb,void * callout_data)238*22dc650dSSadaf Ebrahimi static int callout_function(pcre2_callout_block *cb, void *callout_data)
239*22dc650dSSadaf Ebrahimi {
240*22dc650dSSadaf Ebrahimi (void)cb;  /* Avoid unused parameter warning */
241*22dc650dSSadaf Ebrahimi *((uint32_t *)callout_data) += 1;
242*22dc650dSSadaf Ebrahimi return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
243*22dc650dSSadaf Ebrahimi }
244*22dc650dSSadaf Ebrahimi 
245*22dc650dSSadaf Ebrahimi /* Putting in this apparently unnecessary prototype prevents gcc from giving a
246*22dc650dSSadaf Ebrahimi "no previous prototype" warning when compiling at high warning level. */
247*22dc650dSSadaf Ebrahimi 
248*22dc650dSSadaf Ebrahimi int LLVMFuzzerInitialize(int *, char ***);
249*22dc650dSSadaf Ebrahimi 
250*22dc650dSSadaf Ebrahimi int LLVMFuzzerTestOneInput(unsigned char *, size_t);
251*22dc650dSSadaf Ebrahimi 
LLVMFuzzerInitialize(int * argc,char *** argv)252*22dc650dSSadaf Ebrahimi int LLVMFuzzerInitialize(int *argc, char ***argv)
253*22dc650dSSadaf Ebrahimi {
254*22dc650dSSadaf Ebrahimi int rc;
255*22dc650dSSadaf Ebrahimi struct rlimit rlim;
256*22dc650dSSadaf Ebrahimi getrlimit(RLIMIT_STACK, &rlim);
257*22dc650dSSadaf Ebrahimi rlim.rlim_cur = STACK_SIZE_MB * 1024 * 1024;
258*22dc650dSSadaf Ebrahimi if (rlim.rlim_cur > rlim.rlim_max)
259*22dc650dSSadaf Ebrahimi   {
260*22dc650dSSadaf Ebrahimi   fprintf(stderr, "Hard stack size limit is too small (needed 8MiB)!\n");
261*22dc650dSSadaf Ebrahimi   _exit(1);
262*22dc650dSSadaf Ebrahimi   }
263*22dc650dSSadaf Ebrahimi rc = setrlimit(RLIMIT_STACK, &rlim);
264*22dc650dSSadaf Ebrahimi if (rc != 0)
265*22dc650dSSadaf Ebrahimi   {
266*22dc650dSSadaf Ebrahimi   fprintf(stderr, "Failed to expand stack size\n");
267*22dc650dSSadaf Ebrahimi   _exit(1);
268*22dc650dSSadaf Ebrahimi   }
269*22dc650dSSadaf Ebrahimi 
270*22dc650dSSadaf Ebrahimi (void)argc;  /* Avoid "unused parameter" warnings */
271*22dc650dSSadaf Ebrahimi (void)argv;
272*22dc650dSSadaf Ebrahimi return 0;
273*22dc650dSSadaf Ebrahimi }
274*22dc650dSSadaf Ebrahimi 
275*22dc650dSSadaf Ebrahimi /* Here's the driving function. */
276*22dc650dSSadaf Ebrahimi 
LLVMFuzzerTestOneInput(unsigned char * data,size_t size)277*22dc650dSSadaf Ebrahimi int LLVMFuzzerTestOneInput(unsigned char *data, size_t size)
278*22dc650dSSadaf Ebrahimi {
279*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *wdata;
280*22dc650dSSadaf Ebrahimi PCRE2_UCHAR *newwdata = NULL;
281*22dc650dSSadaf Ebrahimi uint32_t compile_options;
282*22dc650dSSadaf Ebrahimi uint32_t match_options;
283*22dc650dSSadaf Ebrahimi uint64_t random_options;
284*22dc650dSSadaf Ebrahimi pcre2_match_data *match_data = NULL;
285*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT
286*22dc650dSSadaf Ebrahimi pcre2_match_data *match_data_jit = NULL;
287*22dc650dSSadaf Ebrahimi #endif
288*22dc650dSSadaf Ebrahimi pcre2_compile_context *compile_context = NULL;
289*22dc650dSSadaf Ebrahimi pcre2_match_context *match_context = NULL;
290*22dc650dSSadaf Ebrahimi size_t match_size;
291*22dc650dSSadaf Ebrahimi int dfa_workspace[DFA_WORKSPACE_COUNT];
292*22dc650dSSadaf Ebrahimi 
293*22dc650dSSadaf Ebrahimi if (size < sizeof(random_options)) return -1;
294*22dc650dSSadaf Ebrahimi 
295*22dc650dSSadaf Ebrahimi random_options = *(uint64_t *)(data);
296*22dc650dSSadaf Ebrahimi data += sizeof(random_options);
297*22dc650dSSadaf Ebrahimi wdata = (PCRE2_UCHAR *)data;
298*22dc650dSSadaf Ebrahimi size -= sizeof(random_options);
299*22dc650dSSadaf Ebrahimi size /= PCRE2_CODE_UNIT_WIDTH / 8;
300*22dc650dSSadaf Ebrahimi 
301*22dc650dSSadaf Ebrahimi /* PCRE2 compiles quantified groups by replicating them. In certain cases of
302*22dc650dSSadaf Ebrahimi very large quantifiers this can lead to unacceptably long JIT compile times. To
303*22dc650dSSadaf Ebrahimi get around this, we scan the data string for large quantifiers that follow a
304*22dc650dSSadaf Ebrahimi closing parenthesis, and reduce the value of the quantifier to 10, assuming
305*22dc650dSSadaf Ebrahimi that this will make minimal difference to the detection of bugs.
306*22dc650dSSadaf Ebrahimi 
307*22dc650dSSadaf Ebrahimi Do the same for quantifiers that follow a closing square bracket, because
308*22dc650dSSadaf Ebrahimi classes that contain a number of non-ascii characters can take a lot of time
309*22dc650dSSadaf Ebrahimi when matching.
310*22dc650dSSadaf Ebrahimi 
311*22dc650dSSadaf Ebrahimi We have to make a copy of the input because oss-fuzz complains if we overwrite
312*22dc650dSSadaf Ebrahimi the original. Start the scan at the second character so there can be a
313*22dc650dSSadaf Ebrahimi lookbehind for a backslash, and end it before the end so that the next
314*22dc650dSSadaf Ebrahimi character can be checked for an opening brace. */
315*22dc650dSSadaf Ebrahimi 
316*22dc650dSSadaf Ebrahimi if (size > 3)
317*22dc650dSSadaf Ebrahimi   {
318*22dc650dSSadaf Ebrahimi   newwdata = malloc(size * sizeof(PCRE2_UCHAR));
319*22dc650dSSadaf Ebrahimi   memcpy(newwdata, wdata, size * sizeof(PCRE2_UCHAR));
320*22dc650dSSadaf Ebrahimi   wdata = newwdata;
321*22dc650dSSadaf Ebrahimi 
322*22dc650dSSadaf Ebrahimi   for (size_t i = 1; i < size - 2; i++)
323*22dc650dSSadaf Ebrahimi     {
324*22dc650dSSadaf Ebrahimi     size_t j;
325*22dc650dSSadaf Ebrahimi 
326*22dc650dSSadaf Ebrahimi     if ((wdata[i] != ')' && wdata[i] != ']') || wdata[i-1] == '\\' ||
327*22dc650dSSadaf Ebrahimi          wdata[i+1] != '{')
328*22dc650dSSadaf Ebrahimi       continue;
329*22dc650dSSadaf Ebrahimi     i++;  /* Points to '{' */
330*22dc650dSSadaf Ebrahimi 
331*22dc650dSSadaf Ebrahimi     /* Loop for two values a quantifier. Offset i points to brace or comma at the
332*22dc650dSSadaf Ebrahimi     start of the loop.*/
333*22dc650dSSadaf Ebrahimi 
334*22dc650dSSadaf Ebrahimi     for (int ii = 0; ii < 2; ii++)
335*22dc650dSSadaf Ebrahimi       {
336*22dc650dSSadaf Ebrahimi       int q = 0;
337*22dc650dSSadaf Ebrahimi 
338*22dc650dSSadaf Ebrahimi       if (i >= size - 1) goto END_QSCAN;  /* Can happen for , */
339*22dc650dSSadaf Ebrahimi 
340*22dc650dSSadaf Ebrahimi       /* Ignore leading spaces */
341*22dc650dSSadaf Ebrahimi 
342*22dc650dSSadaf Ebrahimi       while (wdata[i+1] == ' ' || wdata[i+1] == '\t')
343*22dc650dSSadaf Ebrahimi         {
344*22dc650dSSadaf Ebrahimi         i++;
345*22dc650dSSadaf Ebrahimi         if (i >= size - 1) goto END_QSCAN;
346*22dc650dSSadaf Ebrahimi         }
347*22dc650dSSadaf Ebrahimi 
348*22dc650dSSadaf Ebrahimi       /* Scan for a number ending in brace or comma in the first iteration,
349*22dc650dSSadaf Ebrahimi       optionally preceded by space. */
350*22dc650dSSadaf Ebrahimi 
351*22dc650dSSadaf Ebrahimi       for (j = i + 1; j < size && j < i + 7; j++)
352*22dc650dSSadaf Ebrahimi         {
353*22dc650dSSadaf Ebrahimi         if (wdata[j] == ' ' || wdata[j] == '\t')
354*22dc650dSSadaf Ebrahimi           {
355*22dc650dSSadaf Ebrahimi           j++;
356*22dc650dSSadaf Ebrahimi           while (j < size && (wdata[j] == ' ' || wdata[j] == '\t')) j++;
357*22dc650dSSadaf Ebrahimi           if (j >= size) goto OUTERLOOP;
358*22dc650dSSadaf Ebrahimi           if (wdata[j] != '}' && wdata[j] != ',') goto OUTERLOOP;
359*22dc650dSSadaf Ebrahimi           }
360*22dc650dSSadaf Ebrahimi         if (wdata[j] == '}' || (ii == 0 && wdata[j] == ',')) break;
361*22dc650dSSadaf Ebrahimi         if (wdata[j] < '0' || wdata[j] > '9')
362*22dc650dSSadaf Ebrahimi           {
363*22dc650dSSadaf Ebrahimi           j--;               /* Ensure this character is checked next. The */
364*22dc650dSSadaf Ebrahimi           goto OUTERLOOP;    /* string might be (e.g.) "){9){234}" */
365*22dc650dSSadaf Ebrahimi           }
366*22dc650dSSadaf Ebrahimi         q = q * 10 + wdata[j] - '0';
367*22dc650dSSadaf Ebrahimi         }
368*22dc650dSSadaf Ebrahimi 
369*22dc650dSSadaf Ebrahimi       if (j >= size) goto END_QSCAN;  /* End of data */
370*22dc650dSSadaf Ebrahimi 
371*22dc650dSSadaf Ebrahimi       /* Hit ',' or '}' or read 6 digits. Six digits is a number > 65536 which is
372*22dc650dSSadaf Ebrahimi       the maximum quantifier. Leave such numbers alone. */
373*22dc650dSSadaf Ebrahimi 
374*22dc650dSSadaf Ebrahimi       if (j >= i + 7 || q > 65535) goto OUTERLOOP;
375*22dc650dSSadaf Ebrahimi 
376*22dc650dSSadaf Ebrahimi       /* Limit the quantifier size to 10 */
377*22dc650dSSadaf Ebrahimi 
378*22dc650dSSadaf Ebrahimi       if (q > 10)
379*22dc650dSSadaf Ebrahimi         {
380*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
381*22dc650dSSadaf Ebrahimi         printf("Reduced quantifier value %d to 10.\n", q);
382*22dc650dSSadaf Ebrahimi #endif
383*22dc650dSSadaf Ebrahimi         for (size_t k = i + 1; k < j; k++) wdata[k] = '0';
384*22dc650dSSadaf Ebrahimi         wdata[j - 2] = '1';
385*22dc650dSSadaf Ebrahimi         }
386*22dc650dSSadaf Ebrahimi 
387*22dc650dSSadaf Ebrahimi       /* Advance to end of number and break if reached closing brace (continue
388*22dc650dSSadaf Ebrahimi       after comma, which is only valid in the first time round this loop). */
389*22dc650dSSadaf Ebrahimi 
390*22dc650dSSadaf Ebrahimi       i = j;
391*22dc650dSSadaf Ebrahimi       if (wdata[i] == '}') break;
392*22dc650dSSadaf Ebrahimi       }
393*22dc650dSSadaf Ebrahimi 
394*22dc650dSSadaf Ebrahimi     /* Continue along the data string */
395*22dc650dSSadaf Ebrahimi 
396*22dc650dSSadaf Ebrahimi     OUTERLOOP:
397*22dc650dSSadaf Ebrahimi     i = j;
398*22dc650dSSadaf Ebrahimi     continue;
399*22dc650dSSadaf Ebrahimi     }
400*22dc650dSSadaf Ebrahimi   }
401*22dc650dSSadaf Ebrahimi END_QSCAN:
402*22dc650dSSadaf Ebrahimi 
403*22dc650dSSadaf Ebrahimi /* Limiting the length of the subject for matching stops fruitless searches
404*22dc650dSSadaf Ebrahimi in large trees taking too much time. */
405*22dc650dSSadaf Ebrahimi 
406*22dc650dSSadaf Ebrahimi match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
407*22dc650dSSadaf Ebrahimi 
408*22dc650dSSadaf Ebrahimi /* Create a compile context, and set a limit on the size of the compiled
409*22dc650dSSadaf Ebrahimi pattern. This stops the fuzzer using vast amounts of memory. */
410*22dc650dSSadaf Ebrahimi 
411*22dc650dSSadaf Ebrahimi compile_context = pcre2_compile_context_create(NULL);
412*22dc650dSSadaf Ebrahimi if (compile_context == NULL)
413*22dc650dSSadaf Ebrahimi   {
414*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
415*22dc650dSSadaf Ebrahimi   fprintf(stderr, "** Failed to create compile context block\n");
416*22dc650dSSadaf Ebrahimi #endif
417*22dc650dSSadaf Ebrahimi   abort();
418*22dc650dSSadaf Ebrahimi   }
419*22dc650dSSadaf Ebrahimi pcre2_set_max_pattern_compiled_length(compile_context, 10*1024*1024);
420*22dc650dSSadaf Ebrahimi 
421*22dc650dSSadaf Ebrahimi /* Ensure that all undefined option bits are zero (waste of time trying them)
422*22dc650dSSadaf Ebrahimi and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
423*22dc650dSSadaf Ebrahimi input is valid UTF. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is
424*22dc650dSSadaf Ebrahimi no reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set
425*22dc650dSSadaf Ebrahimi because \C in random patterns is highly likely to cause a crash. */
426*22dc650dSSadaf Ebrahimi 
427*22dc650dSSadaf Ebrahimi compile_options = ((random_options >> 32) & ALLOWED_COMPILE_OPTIONS) |
428*22dc650dSSadaf Ebrahimi   PCRE2_NEVER_BACKSLASH_C;
429*22dc650dSSadaf Ebrahimi match_options = (((uint32_t)random_options) & ALLOWED_MATCH_OPTIONS) |
430*22dc650dSSadaf Ebrahimi   BASE_MATCH_OPTIONS;
431*22dc650dSSadaf Ebrahimi 
432*22dc650dSSadaf Ebrahimi /* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
433*22dc650dSSadaf Ebrahimi allowed together and just give an immediate error return. */
434*22dc650dSSadaf Ebrahimi 
435*22dc650dSSadaf Ebrahimi if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0)
436*22dc650dSSadaf Ebrahimi   match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT);
437*22dc650dSSadaf Ebrahimi 
438*22dc650dSSadaf Ebrahimi /* Do the compile with and without the options, and after a successful compile,
439*22dc650dSSadaf Ebrahimi likewise do the match with and without the options. */
440*22dc650dSSadaf Ebrahimi 
441*22dc650dSSadaf Ebrahimi for (int i = 0; i < 2; i++)
442*22dc650dSSadaf Ebrahimi   {
443*22dc650dSSadaf Ebrahimi   uint32_t callout_count;
444*22dc650dSSadaf Ebrahimi   int errorcode;
445*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT
446*22dc650dSSadaf Ebrahimi   int errorcode_jit;
447*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_DIFF_FUZZ
448*22dc650dSSadaf Ebrahimi   int matches = 0;
449*22dc650dSSadaf Ebrahimi   int matches_jit = 0;
450*22dc650dSSadaf Ebrahimi #endif
451*22dc650dSSadaf Ebrahimi #endif
452*22dc650dSSadaf Ebrahimi   PCRE2_SIZE erroroffset;
453*22dc650dSSadaf Ebrahimi   pcre2_code *code;
454*22dc650dSSadaf Ebrahimi 
455*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
456*22dc650dSSadaf Ebrahimi   printf("\n");
457*22dc650dSSadaf Ebrahimi   print_compile_options(stdout, compile_options);
458*22dc650dSSadaf Ebrahimi #endif
459*22dc650dSSadaf Ebrahimi 
460*22dc650dSSadaf Ebrahimi   code = pcre2_compile((PCRE2_SPTR)wdata, (PCRE2_SIZE)size, compile_options,
461*22dc650dSSadaf Ebrahimi     &errorcode, &erroroffset, compile_context);
462*22dc650dSSadaf Ebrahimi 
463*22dc650dSSadaf Ebrahimi   /* Compilation succeeded */
464*22dc650dSSadaf Ebrahimi 
465*22dc650dSSadaf Ebrahimi   if (code != NULL)
466*22dc650dSSadaf Ebrahimi     {
467*22dc650dSSadaf Ebrahimi     int j;
468*22dc650dSSadaf Ebrahimi     uint32_t save_match_options = match_options;
469*22dc650dSSadaf Ebrahimi 
470*22dc650dSSadaf Ebrahimi     /* Call JIT compile only if the compiled pattern is not too big. */
471*22dc650dSSadaf Ebrahimi 
472*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT
473*22dc650dSSadaf Ebrahimi     int jit_ret = -1;
474*22dc650dSSadaf Ebrahimi     if (((struct pcre2_real_code *)code)->blocksize <= JIT_SIZE_LIMIT)
475*22dc650dSSadaf Ebrahimi       {
476*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
477*22dc650dSSadaf Ebrahimi       printf("Compile succeeded; calling JIT compile\n");
478*22dc650dSSadaf Ebrahimi #endif
479*22dc650dSSadaf Ebrahimi       jit_ret = pcre2_jit_compile(code, PCRE2_JIT_COMPLETE);
480*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
481*22dc650dSSadaf Ebrahimi       if (jit_ret < 0) printf("JIT compile error %d\n", jit_ret);
482*22dc650dSSadaf Ebrahimi #endif
483*22dc650dSSadaf Ebrahimi       }
484*22dc650dSSadaf Ebrahimi     else
485*22dc650dSSadaf Ebrahimi       {
486*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
487*22dc650dSSadaf Ebrahimi       printf("Not calling JIT: compiled pattern is too long "
488*22dc650dSSadaf Ebrahimi         "(%ld bytes; limit=%d)\n",
489*22dc650dSSadaf Ebrahimi         ((struct pcre2_real_code *)code)->blocksize, JIT_SIZE_LIMIT);
490*22dc650dSSadaf Ebrahimi #endif
491*22dc650dSSadaf Ebrahimi       }
492*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_JIT */
493*22dc650dSSadaf Ebrahimi 
494*22dc650dSSadaf Ebrahimi     /* Create match data and context blocks only when we first need them. Set
495*22dc650dSSadaf Ebrahimi     low match and depth limits to avoid wasting too much searching large
496*22dc650dSSadaf Ebrahimi     pattern trees. Almost all matches are going to fail. */
497*22dc650dSSadaf Ebrahimi 
498*22dc650dSSadaf Ebrahimi     if (match_data == NULL)
499*22dc650dSSadaf Ebrahimi       {
500*22dc650dSSadaf Ebrahimi       match_data = pcre2_match_data_create(32, NULL);
501*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT
502*22dc650dSSadaf Ebrahimi       match_data_jit = pcre2_match_data_create(32, NULL);
503*22dc650dSSadaf Ebrahimi       if (match_data == NULL || match_data_jit == NULL)
504*22dc650dSSadaf Ebrahimi #else
505*22dc650dSSadaf Ebrahimi       if (match_data == NULL)
506*22dc650dSSadaf Ebrahimi #endif
507*22dc650dSSadaf Ebrahimi         {
508*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
509*22dc650dSSadaf Ebrahimi         fprintf(stderr, "** Failed to create match data block\n");
510*22dc650dSSadaf Ebrahimi #endif
511*22dc650dSSadaf Ebrahimi         abort();
512*22dc650dSSadaf Ebrahimi         }
513*22dc650dSSadaf Ebrahimi       }
514*22dc650dSSadaf Ebrahimi 
515*22dc650dSSadaf Ebrahimi     if (match_context == NULL)
516*22dc650dSSadaf Ebrahimi       {
517*22dc650dSSadaf Ebrahimi       match_context = pcre2_match_context_create(NULL);
518*22dc650dSSadaf Ebrahimi       if (match_context == NULL)
519*22dc650dSSadaf Ebrahimi         {
520*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
521*22dc650dSSadaf Ebrahimi         fprintf(stderr, "** Failed to create match context block\n");
522*22dc650dSSadaf Ebrahimi #endif
523*22dc650dSSadaf Ebrahimi         abort();
524*22dc650dSSadaf Ebrahimi         }
525*22dc650dSSadaf Ebrahimi       (void)pcre2_set_match_limit(match_context, 100);
526*22dc650dSSadaf Ebrahimi       (void)pcre2_set_depth_limit(match_context, 100);
527*22dc650dSSadaf Ebrahimi       (void)pcre2_set_callout(match_context, callout_function, &callout_count);
528*22dc650dSSadaf Ebrahimi       }
529*22dc650dSSadaf Ebrahimi 
530*22dc650dSSadaf Ebrahimi     /* Match twice, with and without options. */
531*22dc650dSSadaf Ebrahimi 
532*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
533*22dc650dSSadaf Ebrahimi     printf("\n");
534*22dc650dSSadaf Ebrahimi #endif
535*22dc650dSSadaf Ebrahimi     for (j = 0; j < 2; j++)
536*22dc650dSSadaf Ebrahimi       {
537*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
538*22dc650dSSadaf Ebrahimi       print_match_options(stdout, match_options);
539*22dc650dSSadaf Ebrahimi #endif
540*22dc650dSSadaf Ebrahimi 
541*22dc650dSSadaf Ebrahimi       callout_count = 0;
542*22dc650dSSadaf Ebrahimi       errorcode = pcre2_match(code, (PCRE2_SPTR)wdata, (PCRE2_SIZE)match_size, 0,
543*22dc650dSSadaf Ebrahimi         match_options, match_data, match_context);
544*22dc650dSSadaf Ebrahimi 
545*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
546*22dc650dSSadaf Ebrahimi       if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
547*22dc650dSSadaf Ebrahimi         print_error(stdout, errorcode, "Match failed: error %d: ", errorcode);
548*22dc650dSSadaf Ebrahimi #endif
549*22dc650dSSadaf Ebrahimi 
550*22dc650dSSadaf Ebrahimi /* If JIT is enabled, do a JIT match and, if appropriately compiled, compare
551*22dc650dSSadaf Ebrahimi with the interpreter. */
552*22dc650dSSadaf Ebrahimi 
553*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT
554*22dc650dSSadaf Ebrahimi       if (jit_ret >= 0)
555*22dc650dSSadaf Ebrahimi         {
556*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
557*22dc650dSSadaf Ebrahimi         printf("Matching with JIT\n");
558*22dc650dSSadaf Ebrahimi #endif
559*22dc650dSSadaf Ebrahimi         callout_count = 0;
560*22dc650dSSadaf Ebrahimi         errorcode_jit = pcre2_match(code, (PCRE2_SPTR)wdata, (PCRE2_SIZE)match_size, 0,
561*22dc650dSSadaf Ebrahimi           match_options & ~PCRE2_NO_JIT, match_data_jit, match_context);
562*22dc650dSSadaf Ebrahimi 
563*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
564*22dc650dSSadaf Ebrahimi         if (errorcode_jit >= 0)
565*22dc650dSSadaf Ebrahimi           printf("Match returned %d\n", errorcode_jit);
566*22dc650dSSadaf Ebrahimi         else
567*22dc650dSSadaf Ebrahimi           print_error(stdout, errorcode_jit, "JIT match failed: error %d: ",
568*22dc650dSSadaf Ebrahimi             errorcode_jit);
569*22dc650dSSadaf Ebrahimi #else
570*22dc650dSSadaf Ebrahimi         (void)errorcode_jit;   /* Avoid compiler warning */
571*22dc650dSSadaf Ebrahimi #endif  /* STANDALONE */
572*22dc650dSSadaf Ebrahimi 
573*22dc650dSSadaf Ebrahimi /* With differential matching enabled, compare with interpreter. */
574*22dc650dSSadaf Ebrahimi 
575*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_DIFF_FUZZ
576*22dc650dSSadaf Ebrahimi         matches = errorcode;
577*22dc650dSSadaf Ebrahimi         matches_jit = errorcode_jit;
578*22dc650dSSadaf Ebrahimi 
579*22dc650dSSadaf Ebrahimi         if (errorcode_jit != errorcode)
580*22dc650dSSadaf Ebrahimi           {
581*22dc650dSSadaf Ebrahimi           if (!(errorcode < 0 && errorcode_jit < 0) &&
582*22dc650dSSadaf Ebrahimi                 errorcode != PCRE2_ERROR_MATCHLIMIT && errorcode != PCRE2_ERROR_CALLOUT &&
583*22dc650dSSadaf Ebrahimi                 errorcode_jit != PCRE2_ERROR_MATCHLIMIT && errorcode_jit != PCRE2_ERROR_JIT_STACKLIMIT && errorcode_jit != PCRE2_ERROR_CALLOUT)
584*22dc650dSSadaf Ebrahimi             {
585*22dc650dSSadaf Ebrahimi             describe_failure("match errorcode comparison", wdata, size, compile_options, match_options, errorcode, errorcode_jit, matches, matches_jit, match_data, match_data_jit);
586*22dc650dSSadaf Ebrahimi             }
587*22dc650dSSadaf Ebrahimi           }
588*22dc650dSSadaf Ebrahimi         else
589*22dc650dSSadaf Ebrahimi           {
590*22dc650dSSadaf Ebrahimi           for (int index = 0; index < errorcode; index++)
591*22dc650dSSadaf Ebrahimi             {
592*22dc650dSSadaf Ebrahimi             PCRE2_UCHAR *bufferptr, *bufferptr_jit;
593*22dc650dSSadaf Ebrahimi             PCRE2_SIZE bufflen, bufflen_jit;
594*22dc650dSSadaf Ebrahimi 
595*22dc650dSSadaf Ebrahimi             bufferptr = bufferptr_jit = NULL;
596*22dc650dSSadaf Ebrahimi             bufflen = bufflen_jit = 0;
597*22dc650dSSadaf Ebrahimi 
598*22dc650dSSadaf Ebrahimi             errorcode = pcre2_substring_get_bynumber(match_data, (uint32_t) index, &bufferptr, &bufflen);
599*22dc650dSSadaf Ebrahimi             errorcode_jit = pcre2_substring_get_bynumber(match_data_jit, (uint32_t) index, &bufferptr_jit, &bufflen_jit);
600*22dc650dSSadaf Ebrahimi 
601*22dc650dSSadaf Ebrahimi             if (errorcode != errorcode_jit)
602*22dc650dSSadaf Ebrahimi               {
603*22dc650dSSadaf Ebrahimi               describe_failure("match entry errorcode comparison", wdata, size,
604*22dc650dSSadaf Ebrahimi                 compile_options, match_options, errorcode, errorcode_jit,
605*22dc650dSSadaf Ebrahimi                 matches, matches_jit, match_data, match_data_jit);
606*22dc650dSSadaf Ebrahimi               }
607*22dc650dSSadaf Ebrahimi 
608*22dc650dSSadaf Ebrahimi             if (errorcode >= 0)
609*22dc650dSSadaf Ebrahimi               {
610*22dc650dSSadaf Ebrahimi               if (bufflen != bufflen_jit)
611*22dc650dSSadaf Ebrahimi                 {
612*22dc650dSSadaf Ebrahimi                 describe_failure("match entry length comparison", wdata, size,
613*22dc650dSSadaf Ebrahimi                   compile_options, match_options, errorcode, errorcode_jit,
614*22dc650dSSadaf Ebrahimi                   matches, matches_jit, match_data, match_data_jit);
615*22dc650dSSadaf Ebrahimi                 }
616*22dc650dSSadaf Ebrahimi 
617*22dc650dSSadaf Ebrahimi               if (memcmp(bufferptr, bufferptr_jit, bufflen) != 0)
618*22dc650dSSadaf Ebrahimi                 {
619*22dc650dSSadaf Ebrahimi                 describe_failure("match entry content comparison", wdata, size,
620*22dc650dSSadaf Ebrahimi                   compile_options, match_options, errorcode, errorcode_jit,
621*22dc650dSSadaf Ebrahimi                   matches, matches_jit, match_data, match_data_jit);
622*22dc650dSSadaf Ebrahimi                 }
623*22dc650dSSadaf Ebrahimi               }
624*22dc650dSSadaf Ebrahimi 
625*22dc650dSSadaf Ebrahimi               pcre2_substring_free(bufferptr);
626*22dc650dSSadaf Ebrahimi               pcre2_substring_free(bufferptr_jit);
627*22dc650dSSadaf Ebrahimi             }
628*22dc650dSSadaf Ebrahimi           }
629*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_DIFF_FUZZ */
630*22dc650dSSadaf Ebrahimi         }
631*22dc650dSSadaf Ebrahimi #endif  /* SUPPORT_JIT */
632*22dc650dSSadaf Ebrahimi 
633*22dc650dSSadaf Ebrahimi       if (match_options == BASE_MATCH_OPTIONS) break;  /* Don't do same twice */
634*22dc650dSSadaf Ebrahimi       match_options = BASE_MATCH_OPTIONS;              /* For second time */
635*22dc650dSSadaf Ebrahimi       }
636*22dc650dSSadaf Ebrahimi 
637*22dc650dSSadaf Ebrahimi     /* Match with DFA twice, with and without options, but remove options that
638*22dc650dSSadaf Ebrahimi     are not allowed with DFA. */
639*22dc650dSSadaf Ebrahimi 
640*22dc650dSSadaf Ebrahimi     match_options = save_match_options & ~BASE_MATCH_OPTIONS;
641*22dc650dSSadaf Ebrahimi 
642*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
643*22dc650dSSadaf Ebrahimi     printf("\n");
644*22dc650dSSadaf Ebrahimi #endif
645*22dc650dSSadaf Ebrahimi 
646*22dc650dSSadaf Ebrahimi     for (j = 0; j < 2; j++)
647*22dc650dSSadaf Ebrahimi       {
648*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
649*22dc650dSSadaf Ebrahimi       printf("DFA match options %.8x =", match_options);
650*22dc650dSSadaf Ebrahimi       printf("%s%s%s%s%s%s%s%s%s\n",
651*22dc650dSSadaf Ebrahimi         ((match_options & PCRE2_ANCHORED) != 0)? " anchored" : "",
652*22dc650dSSadaf Ebrahimi         ((match_options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
653*22dc650dSSadaf Ebrahimi         ((match_options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
654*22dc650dSSadaf Ebrahimi         ((match_options & PCRE2_NOTBOL) != 0)? " notbol" : "",
655*22dc650dSSadaf Ebrahimi         ((match_options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
656*22dc650dSSadaf Ebrahimi         ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
657*22dc650dSSadaf Ebrahimi         ((match_options & PCRE2_NOTEOL) != 0)? " noteol" : "",
658*22dc650dSSadaf Ebrahimi         ((match_options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
659*22dc650dSSadaf Ebrahimi         ((match_options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
660*22dc650dSSadaf Ebrahimi #endif
661*22dc650dSSadaf Ebrahimi 
662*22dc650dSSadaf Ebrahimi       callout_count = 0;
663*22dc650dSSadaf Ebrahimi       errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)wdata,
664*22dc650dSSadaf Ebrahimi         (PCRE2_SIZE)match_size, 0, match_options, match_data,
665*22dc650dSSadaf Ebrahimi         match_context, dfa_workspace, DFA_WORKSPACE_COUNT);
666*22dc650dSSadaf Ebrahimi 
667*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
668*22dc650dSSadaf Ebrahimi       if (errorcode >= 0)
669*22dc650dSSadaf Ebrahimi         printf("Match returned %d\n", errorcode);
670*22dc650dSSadaf Ebrahimi       else
671*22dc650dSSadaf Ebrahimi         print_error(stdout, errorcode, "DFA match failed: error %d: ", errorcode);
672*22dc650dSSadaf Ebrahimi #endif
673*22dc650dSSadaf Ebrahimi 
674*22dc650dSSadaf Ebrahimi       if (match_options == 0) break;  /* No point doing same twice */
675*22dc650dSSadaf Ebrahimi       match_options = 0;              /* For second time */
676*22dc650dSSadaf Ebrahimi       }
677*22dc650dSSadaf Ebrahimi 
678*22dc650dSSadaf Ebrahimi     match_options = save_match_options;  /* Reset for the second compile */
679*22dc650dSSadaf Ebrahimi     pcre2_code_free(code);
680*22dc650dSSadaf Ebrahimi     }
681*22dc650dSSadaf Ebrahimi 
682*22dc650dSSadaf Ebrahimi   /* Compilation failed */
683*22dc650dSSadaf Ebrahimi 
684*22dc650dSSadaf Ebrahimi   else
685*22dc650dSSadaf Ebrahimi     {
686*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
687*22dc650dSSadaf Ebrahimi     print_error(stdout, errorcode, "Error %d at offset %lu: ", errorcode,
688*22dc650dSSadaf Ebrahimi       erroroffset);
689*22dc650dSSadaf Ebrahimi #else
690*22dc650dSSadaf Ebrahimi     if (errorcode == PCRE2_ERROR_INTERNAL) abort();
691*22dc650dSSadaf Ebrahimi #endif
692*22dc650dSSadaf Ebrahimi     }
693*22dc650dSSadaf Ebrahimi 
694*22dc650dSSadaf Ebrahimi   if (compile_options == PCRE2_NEVER_BACKSLASH_C) break;  /* Avoid same twice */
695*22dc650dSSadaf Ebrahimi   compile_options = PCRE2_NEVER_BACKSLASH_C;              /* For second time */
696*22dc650dSSadaf Ebrahimi   }
697*22dc650dSSadaf Ebrahimi 
698*22dc650dSSadaf Ebrahimi /* Tidy up before exiting */
699*22dc650dSSadaf Ebrahimi 
700*22dc650dSSadaf Ebrahimi if (match_data != NULL) pcre2_match_data_free(match_data);
701*22dc650dSSadaf Ebrahimi #ifdef SUPPORT_JIT
702*22dc650dSSadaf Ebrahimi if (match_data_jit != NULL) pcre2_match_data_free(match_data_jit);
703*22dc650dSSadaf Ebrahimi free(newwdata);
704*22dc650dSSadaf Ebrahimi #endif
705*22dc650dSSadaf Ebrahimi if (match_context != NULL) pcre2_match_context_free(match_context);
706*22dc650dSSadaf Ebrahimi if (compile_context != NULL) pcre2_compile_context_free(compile_context);
707*22dc650dSSadaf Ebrahimi return 0;
708*22dc650dSSadaf Ebrahimi }
709*22dc650dSSadaf Ebrahimi 
710*22dc650dSSadaf Ebrahimi 
711*22dc650dSSadaf Ebrahimi /* Optional main program.  */
712*22dc650dSSadaf Ebrahimi 
713*22dc650dSSadaf Ebrahimi #ifdef STANDALONE
main(int argc,char ** argv)714*22dc650dSSadaf Ebrahimi int main(int argc, char **argv)
715*22dc650dSSadaf Ebrahimi {
716*22dc650dSSadaf Ebrahimi LLVMFuzzerInitialize(&argc, &argv);
717*22dc650dSSadaf Ebrahimi 
718*22dc650dSSadaf Ebrahimi if (argc < 2)
719*22dc650dSSadaf Ebrahimi   {
720*22dc650dSSadaf Ebrahimi   printf("** No arguments given\n");
721*22dc650dSSadaf Ebrahimi   return 0;
722*22dc650dSSadaf Ebrahimi   }
723*22dc650dSSadaf Ebrahimi 
724*22dc650dSSadaf Ebrahimi for (int i = 1; i < argc; i++)
725*22dc650dSSadaf Ebrahimi   {
726*22dc650dSSadaf Ebrahimi   size_t filelen;
727*22dc650dSSadaf Ebrahimi   size_t readsize;
728*22dc650dSSadaf Ebrahimi   unsigned char *buffer;
729*22dc650dSSadaf Ebrahimi   FILE *f;
730*22dc650dSSadaf Ebrahimi 
731*22dc650dSSadaf Ebrahimi   /* Handle a literal string. Copy to an exact size buffer so that checks for
732*22dc650dSSadaf Ebrahimi   overrunning work. */
733*22dc650dSSadaf Ebrahimi 
734*22dc650dSSadaf Ebrahimi   if (argv[i][0] == '=')
735*22dc650dSSadaf Ebrahimi     {
736*22dc650dSSadaf Ebrahimi     readsize = strlen(argv[i]) - 1;
737*22dc650dSSadaf Ebrahimi     printf("------ <Literal> ------\n");
738*22dc650dSSadaf Ebrahimi     printf("Length = %lu\n", readsize);
739*22dc650dSSadaf Ebrahimi     printf("%.*s\n", (int)readsize, argv[i]+1);
740*22dc650dSSadaf Ebrahimi     buffer = (unsigned char *)malloc(readsize);
741*22dc650dSSadaf Ebrahimi     if (buffer == NULL)
742*22dc650dSSadaf Ebrahimi       printf("** Failed to allocate %lu bytes of memory\n", readsize);
743*22dc650dSSadaf Ebrahimi     else
744*22dc650dSSadaf Ebrahimi       {
745*22dc650dSSadaf Ebrahimi       memcpy(buffer, argv[i]+1, readsize);
746*22dc650dSSadaf Ebrahimi       LLVMFuzzerTestOneInput(buffer, readsize);
747*22dc650dSSadaf Ebrahimi       free(buffer);
748*22dc650dSSadaf Ebrahimi       }
749*22dc650dSSadaf Ebrahimi     continue;
750*22dc650dSSadaf Ebrahimi     }
751*22dc650dSSadaf Ebrahimi 
752*22dc650dSSadaf Ebrahimi   /* Handle a string given in a file */
753*22dc650dSSadaf Ebrahimi 
754*22dc650dSSadaf Ebrahimi   f = fopen(argv[i], "rb");
755*22dc650dSSadaf Ebrahimi   if (f == NULL)
756*22dc650dSSadaf Ebrahimi     {
757*22dc650dSSadaf Ebrahimi     printf("** Failed to open %s: %s\n", argv[i], strerror(errno));
758*22dc650dSSadaf Ebrahimi     continue;
759*22dc650dSSadaf Ebrahimi     }
760*22dc650dSSadaf Ebrahimi 
761*22dc650dSSadaf Ebrahimi   printf("------ %s ------\n", argv[i]);
762*22dc650dSSadaf Ebrahimi 
763*22dc650dSSadaf Ebrahimi   fseek(f, 0, SEEK_END);
764*22dc650dSSadaf Ebrahimi   filelen = ftell(f);
765*22dc650dSSadaf Ebrahimi   fseek(f, 0, SEEK_SET);
766*22dc650dSSadaf Ebrahimi 
767*22dc650dSSadaf Ebrahimi   buffer = (unsigned char *)malloc(filelen);
768*22dc650dSSadaf Ebrahimi   if (buffer == NULL)
769*22dc650dSSadaf Ebrahimi     {
770*22dc650dSSadaf Ebrahimi     printf("** Failed to allocate %lu bytes of memory\n", filelen);
771*22dc650dSSadaf Ebrahimi     fclose(f);
772*22dc650dSSadaf Ebrahimi     continue;
773*22dc650dSSadaf Ebrahimi     }
774*22dc650dSSadaf Ebrahimi 
775*22dc650dSSadaf Ebrahimi   readsize = fread(buffer, 1, filelen, f);
776*22dc650dSSadaf Ebrahimi   fclose(f);
777*22dc650dSSadaf Ebrahimi 
778*22dc650dSSadaf Ebrahimi   if (readsize != filelen)
779*22dc650dSSadaf Ebrahimi     printf("** File size is %lu but fread() returned %lu\n", filelen, readsize);
780*22dc650dSSadaf Ebrahimi   else
781*22dc650dSSadaf Ebrahimi     {
782*22dc650dSSadaf Ebrahimi     printf("Length = %lu\n", filelen);
783*22dc650dSSadaf Ebrahimi     LLVMFuzzerTestOneInput(buffer, filelen);
784*22dc650dSSadaf Ebrahimi     }
785*22dc650dSSadaf Ebrahimi   free(buffer);
786*22dc650dSSadaf Ebrahimi   }
787*22dc650dSSadaf Ebrahimi 
788*22dc650dSSadaf Ebrahimi return 0;
789*22dc650dSSadaf Ebrahimi }
790*22dc650dSSadaf Ebrahimi #endif  /* STANDALONE */
791*22dc650dSSadaf Ebrahimi 
792*22dc650dSSadaf Ebrahimi /* End */
793