1 /* A fuzz test for CPython.
2 
3   The only exposed function is LLVMFuzzerTestOneInput, which is called by
4   fuzzers and by the _fuzz module for smoke tests.
5 
6   To build exactly one fuzz test, as when running in oss-fuzz etc.,
7   build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_<test_name>. e.g. to build
8   LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with
9       -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float.
10 
11   See the source code for LLVMFuzzerTestOneInput for details. */
12 
13 #include <Python.h>
14 #include <stdlib.h>
15 #include <inttypes.h>
16 
17 /*  Fuzz PyFloat_FromString as a proxy for float(str). */
fuzz_builtin_float(const char * data,size_t size)18 static int fuzz_builtin_float(const char* data, size_t size) {
19     PyObject* s = PyBytes_FromStringAndSize(data, size);
20     if (s == NULL) return 0;
21     PyObject* f = PyFloat_FromString(s);
22     if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) {
23         PyErr_Clear();
24     }
25 
26     Py_XDECREF(f);
27     Py_DECREF(s);
28     return 0;
29 }
30 
31 #define MAX_INT_TEST_SIZE 0x10000
32 
33 /* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */
fuzz_builtin_int(const char * data,size_t size)34 static int fuzz_builtin_int(const char* data, size_t size) {
35     /* Ignore test cases with very long ints to avoid timeouts
36        int("9" * 1000000) is not a very interesting test caase */
37     if (size > MAX_INT_TEST_SIZE) {
38         return 0;
39     }
40     /* Pick a random valid base. (When the fuzzed function takes extra
41        parameters, it's somewhat normal to hash the input to generate those
42        parameters. We want to exercise all code paths, so we do so here.) */
43     int base = _Py_HashBytes(data, size) % 37;
44     if (base == 1) {
45         // 1 is the only number between 0 and 36 that is not a valid base.
46         base = 0;
47     }
48     if (base == -1) {
49         return 0;  // An error occurred, bail early.
50     }
51     if (base < 0) {
52         base = -base;
53     }
54 
55     PyObject* s = PyUnicode_FromStringAndSize(data, size);
56     if (s == NULL) {
57         if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
58             PyErr_Clear();
59         }
60         return 0;
61     }
62     PyObject* l = PyLong_FromUnicodeObject(s, base);
63     if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
64         PyErr_Clear();
65     }
66     PyErr_Clear();
67     Py_XDECREF(l);
68     Py_DECREF(s);
69     return 0;
70 }
71 
72 /* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */
fuzz_builtin_unicode(const char * data,size_t size)73 static int fuzz_builtin_unicode(const char* data, size_t size) {
74     PyObject* s = PyUnicode_FromStringAndSize(data, size);
75     if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
76         PyErr_Clear();
77     }
78     Py_XDECREF(s);
79     return 0;
80 }
81 
82 
83 PyObject* struct_unpack_method = NULL;
84 PyObject* struct_error = NULL;
85 /* Called by LLVMFuzzerTestOneInput for initialization */
init_struct_unpack(void)86 static int init_struct_unpack(void) {
87     /* Import struct.unpack */
88     PyObject* struct_module = PyImport_ImportModule("struct");
89     if (struct_module == NULL) {
90         return 0;
91     }
92     struct_error = PyObject_GetAttrString(struct_module, "error");
93     if (struct_error == NULL) {
94         return 0;
95     }
96     struct_unpack_method = PyObject_GetAttrString(struct_module, "unpack");
97     return struct_unpack_method != NULL;
98 }
99 /* Fuzz struct.unpack(x, y) */
fuzz_struct_unpack(const char * data,size_t size)100 static int fuzz_struct_unpack(const char* data, size_t size) {
101     /* Everything up to the first null byte is considered the
102        format. Everything after is the buffer */
103     const char* first_null = memchr(data, '\0', size);
104     if (first_null == NULL) {
105         return 0;
106     }
107 
108     size_t format_length = first_null - data;
109     size_t buffer_length = size - format_length - 1;
110 
111     PyObject* pattern = PyBytes_FromStringAndSize(data, format_length);
112     if (pattern == NULL) {
113         return 0;
114     }
115     PyObject* buffer = PyBytes_FromStringAndSize(first_null + 1, buffer_length);
116     if (buffer == NULL) {
117         Py_DECREF(pattern);
118         return 0;
119     }
120 
121     PyObject* unpacked = PyObject_CallFunctionObjArgs(
122         struct_unpack_method, pattern, buffer, NULL);
123     /* Ignore any overflow errors, these are easily triggered accidentally */
124     if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_OverflowError)) {
125         PyErr_Clear();
126     }
127     /* The pascal format string will throw a negative size when passing 0
128        like: struct.unpack('0p', b'') */
129     if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_SystemError)) {
130         PyErr_Clear();
131     }
132     /* Ignore any struct.error exceptions, these can be caused by invalid
133        formats or incomplete buffers both of which are common. */
134     if (unpacked == NULL && PyErr_ExceptionMatches(struct_error)) {
135         PyErr_Clear();
136     }
137 
138     Py_XDECREF(unpacked);
139     Py_DECREF(pattern);
140     Py_DECREF(buffer);
141     return 0;
142 }
143 
144 
145 #define MAX_JSON_TEST_SIZE 0x10000
146 
147 PyObject* json_loads_method = NULL;
148 /* Called by LLVMFuzzerTestOneInput for initialization */
init_json_loads(void)149 static int init_json_loads(void) {
150     /* Import json.loads */
151     PyObject* json_module = PyImport_ImportModule("json");
152     if (json_module == NULL) {
153         return 0;
154     }
155     json_loads_method = PyObject_GetAttrString(json_module, "loads");
156     return json_loads_method != NULL;
157 }
158 /* Fuzz json.loads(x) */
fuzz_json_loads(const char * data,size_t size)159 static int fuzz_json_loads(const char* data, size_t size) {
160     /* Since python supports arbitrarily large ints in JSON,
161        long inputs can lead to timeouts on boring inputs like
162        `json.loads("9" * 100000)` */
163     if (size > MAX_JSON_TEST_SIZE) {
164         return 0;
165     }
166     PyObject* input_bytes = PyBytes_FromStringAndSize(data, size);
167     if (input_bytes == NULL) {
168         return 0;
169     }
170     PyObject* parsed = PyObject_CallOneArg(json_loads_method, input_bytes);
171     if (parsed == NULL) {
172         /* Ignore ValueError as the fuzzer will more than likely
173            generate some invalid json and values */
174         if (PyErr_ExceptionMatches(PyExc_ValueError) ||
175         /* Ignore RecursionError as the fuzzer generates long sequences of
176            arrays such as `[[[...` */
177             PyErr_ExceptionMatches(PyExc_RecursionError) ||
178         /* Ignore unicode errors, invalid byte sequences are common */
179             PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)
180         ) {
181             PyErr_Clear();
182         }
183     }
184     Py_DECREF(input_bytes);
185     Py_XDECREF(parsed);
186     return 0;
187 }
188 
189 #define MAX_RE_TEST_SIZE 0x10000
190 
191 PyObject* sre_compile_method = NULL;
192 PyObject* sre_error_exception = NULL;
193 int SRE_FLAG_DEBUG = 0;
194 /* Called by LLVMFuzzerTestOneInput for initialization */
init_sre_compile(void)195 static int init_sre_compile(void) {
196     /* Import sre_compile.compile and sre.error */
197     PyObject* sre_compile_module = PyImport_ImportModule("sre_compile");
198     if (sre_compile_module == NULL) {
199         return 0;
200     }
201     sre_compile_method = PyObject_GetAttrString(sre_compile_module, "compile");
202     if (sre_compile_method == NULL) {
203         return 0;
204     }
205 
206     PyObject* sre_constants = PyImport_ImportModule("sre_constants");
207     if (sre_constants == NULL) {
208         return 0;
209     }
210     sre_error_exception = PyObject_GetAttrString(sre_constants, "error");
211     if (sre_error_exception == NULL) {
212         return 0;
213     }
214     PyObject* debug_flag = PyObject_GetAttrString(sre_constants, "SRE_FLAG_DEBUG");
215     if (debug_flag == NULL) {
216         return 0;
217     }
218     SRE_FLAG_DEBUG = PyLong_AsLong(debug_flag);
219     return 1;
220 }
221 /* Fuzz _sre.compile(x) */
fuzz_sre_compile(const char * data,size_t size)222 static int fuzz_sre_compile(const char* data, size_t size) {
223     /* Ignore really long regex patterns that will timeout the fuzzer */
224     if (size > MAX_RE_TEST_SIZE) {
225         return 0;
226     }
227     /* We treat the first 2 bytes of the input as a number for the flags */
228     if (size < 2) {
229         return 0;
230     }
231     uint16_t flags = ((uint16_t*) data)[0];
232     /* We remove the SRE_FLAG_DEBUG if present. This is because it
233        prints to stdout which greatly decreases fuzzing speed */
234     flags &= ~SRE_FLAG_DEBUG;
235 
236     /* Pull the pattern from the remaining bytes */
237     PyObject* pattern_bytes = PyBytes_FromStringAndSize(data + 2, size - 2);
238     if (pattern_bytes == NULL) {
239         return 0;
240     }
241     PyObject* flags_obj = PyLong_FromUnsignedLong(flags);
242     if (flags_obj == NULL) {
243         Py_DECREF(pattern_bytes);
244         return 0;
245     }
246 
247     /* compiled = _sre.compile(data[2:], data[0:2] */
248     PyObject* compiled = PyObject_CallFunctionObjArgs(
249         sre_compile_method, pattern_bytes, flags_obj, NULL);
250     /* Ignore ValueError as the fuzzer will more than likely
251        generate some invalid combination of flags */
252     if (compiled == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
253         PyErr_Clear();
254     }
255     /* Ignore some common errors thrown by sre_parse:
256        Overflow, Assertion, Recursion and Index */
257     if (compiled == NULL && (PyErr_ExceptionMatches(PyExc_OverflowError) ||
258                              PyErr_ExceptionMatches(PyExc_AssertionError) ||
259                              PyErr_ExceptionMatches(PyExc_RecursionError) ||
260                              PyErr_ExceptionMatches(PyExc_IndexError))
261     ) {
262         PyErr_Clear();
263     }
264     /* Ignore re.error */
265     if (compiled == NULL && PyErr_ExceptionMatches(sre_error_exception)) {
266         PyErr_Clear();
267     }
268 
269     Py_DECREF(pattern_bytes);
270     Py_DECREF(flags_obj);
271     Py_XDECREF(compiled);
272     return 0;
273 }
274 
275 /* Some random patterns used to test re.match.
276    Be careful not to add catostraphically slow regexes here, we want to
277    exercise the matching code without causing timeouts.*/
278 static const char* regex_patterns[] = {
279     ".", "^", "abc", "abc|def", "^xxx$", "\\b", "()", "[a-zA-Z0-9]",
280     "abc+", "[^A-Z]", "[x]", "(?=)", "a{z}", "a+b", "a*?", "a??", "a+?",
281     "{}", "a{,}", "{", "}", "^\\(*\\d{3}\\)*( |-)*\\d{3}( |-)*\\d{4}$",
282     "(?:a*)*", "a{1,2}?"
283 };
284 const size_t NUM_PATTERNS = sizeof(regex_patterns) / sizeof(regex_patterns[0]);
285 PyObject** compiled_patterns = NULL;
286 /* Called by LLVMFuzzerTestOneInput for initialization */
init_sre_match(void)287 static int init_sre_match(void) {
288     PyObject* re_module = PyImport_ImportModule("re");
289     if (re_module == NULL) {
290         return 0;
291     }
292     compiled_patterns = (PyObject**) PyMem_RawMalloc(
293         sizeof(PyObject*) * NUM_PATTERNS);
294     if (compiled_patterns == NULL) {
295         PyErr_NoMemory();
296         return 0;
297     }
298 
299     /* Precompile all the regex patterns on the first run for faster fuzzing */
300     for (size_t i = 0; i < NUM_PATTERNS; i++) {
301         PyObject* compiled = PyObject_CallMethod(
302             re_module, "compile", "y", regex_patterns[i]);
303         /* Bail if any of the patterns fail to compile */
304         if (compiled == NULL) {
305             return 0;
306         }
307         compiled_patterns[i] = compiled;
308     }
309     return 1;
310 }
311 /* Fuzz re.match(x) */
fuzz_sre_match(const char * data,size_t size)312 static int fuzz_sre_match(const char* data, size_t size) {
313     if (size < 1 || size > MAX_RE_TEST_SIZE) {
314         return 0;
315     }
316     /* Use the first byte as a uint8_t specifying the index of the
317        regex to use */
318     unsigned char idx = (unsigned char) data[0];
319     idx = idx % NUM_PATTERNS;
320 
321     /* Pull the string to match from the remaining bytes */
322     PyObject* to_match = PyBytes_FromStringAndSize(data + 1, size - 1);
323     if (to_match == NULL) {
324         return 0;
325     }
326 
327     PyObject* pattern = compiled_patterns[idx];
328     PyObject* match_callable = PyObject_GetAttrString(pattern, "match");
329 
330     PyObject* matches = PyObject_CallOneArg(match_callable, to_match);
331 
332     Py_XDECREF(matches);
333     Py_DECREF(match_callable);
334     Py_DECREF(to_match);
335     return 0;
336 }
337 
338 #define MAX_CSV_TEST_SIZE 0x10000
339 PyObject* csv_module = NULL;
340 PyObject* csv_error = NULL;
341 /* Called by LLVMFuzzerTestOneInput for initialization */
init_csv_reader(void)342 static int init_csv_reader(void) {
343     /* Import csv and csv.Error */
344     csv_module = PyImport_ImportModule("csv");
345     if (csv_module == NULL) {
346         return 0;
347     }
348     csv_error = PyObject_GetAttrString(csv_module, "Error");
349     return csv_error != NULL;
350 }
351 /* Fuzz csv.reader([x]) */
fuzz_csv_reader(const char * data,size_t size)352 static int fuzz_csv_reader(const char* data, size_t size) {
353     if (size < 1 || size > MAX_CSV_TEST_SIZE) {
354         return 0;
355     }
356     /* Ignore non null-terminated strings since _csv can't handle
357        embedded nulls */
358     if (memchr(data, '\0', size) == NULL) {
359         return 0;
360     }
361 
362     PyObject* s = PyUnicode_FromString(data);
363     /* Ignore exceptions until we have a valid string */
364     if (s == NULL) {
365         PyErr_Clear();
366         return 0;
367     }
368 
369     /* Split on \n so we can test multiple lines */
370     PyObject* lines = PyObject_CallMethod(s, "split", "s", "\n");
371     if (lines == NULL) {
372         Py_DECREF(s);
373         return 0;
374     }
375 
376     PyObject* reader = PyObject_CallMethod(csv_module, "reader", "N", lines);
377     if (reader) {
378         /* Consume all of the reader as an iterator */
379         PyObject* parsed_line;
380         while ((parsed_line = PyIter_Next(reader))) {
381             Py_DECREF(parsed_line);
382         }
383     }
384 
385     /* Ignore csv.Error because we're probably going to generate
386        some bad files (embedded new-lines, unterminated quotes etc) */
387     if (PyErr_ExceptionMatches(csv_error)) {
388         PyErr_Clear();
389     }
390 
391     Py_XDECREF(reader);
392     Py_DECREF(s);
393     return 0;
394 }
395 
396 #define MAX_AST_LITERAL_EVAL_TEST_SIZE 0x10000
397 PyObject* ast_literal_eval_method = NULL;
398 /* Called by LLVMFuzzerTestOneInput for initialization */
init_ast_literal_eval(void)399 static int init_ast_literal_eval(void) {
400     PyObject* ast_module = PyImport_ImportModule("ast");
401     if (ast_module == NULL) {
402         return 0;
403     }
404     ast_literal_eval_method = PyObject_GetAttrString(ast_module, "literal_eval");
405     return ast_literal_eval_method != NULL;
406 }
407 /* Fuzz ast.literal_eval(x) */
fuzz_ast_literal_eval(const char * data,size_t size)408 static int fuzz_ast_literal_eval(const char* data, size_t size) {
409     if (size > MAX_AST_LITERAL_EVAL_TEST_SIZE) {
410         return 0;
411     }
412     /* Ignore non null-terminated strings since ast can't handle
413        embedded nulls */
414     if (memchr(data, '\0', size) == NULL) {
415         return 0;
416     }
417 
418     PyObject* s = PyUnicode_FromString(data);
419     /* Ignore exceptions until we have a valid string */
420     if (s == NULL) {
421         PyErr_Clear();
422         return 0;
423     }
424 
425     PyObject* literal = PyObject_CallOneArg(ast_literal_eval_method, s);
426     /* Ignore some common errors thrown by ast.literal_eval */
427     if (literal == NULL && (PyErr_ExceptionMatches(PyExc_ValueError) ||
428                             PyErr_ExceptionMatches(PyExc_TypeError) ||
429                             PyErr_ExceptionMatches(PyExc_SyntaxError) ||
430                             PyErr_ExceptionMatches(PyExc_MemoryError) ||
431                             PyErr_ExceptionMatches(PyExc_RecursionError))
432     ) {
433         PyErr_Clear();
434     }
435 
436     Py_XDECREF(literal);
437     Py_DECREF(s);
438     return 0;
439 }
440 
441 /* Run fuzzer and abort on failure. */
_run_fuzz(const uint8_t * data,size_t size,int (* fuzzer)(const char *,size_t))442 static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
443     int rv = fuzzer((const char*) data, size);
444     if (PyErr_Occurred()) {
445         /* Fuzz tests should handle expected errors for themselves.
446            This is last-ditch check in case they didn't. */
447         PyErr_Print();
448         abort();
449     }
450     /* Someday the return value might mean something, propagate it. */
451     return rv;
452 }
453 
454 /* CPython generates a lot of leak warnings for whatever reason. */
__lsan_is_turned_off(void)455 int __lsan_is_turned_off(void) { return 1; }
456 
457 
LLVMFuzzerInitialize(int * argc,char *** argv)458 int LLVMFuzzerInitialize(int *argc, char ***argv) {
459     PyConfig config;
460     PyConfig_InitPythonConfig(&config);
461     config.install_signal_handlers = 0;
462     PyStatus status;
463     status = PyConfig_SetBytesString(&config, &config.program_name, *argv[0]);
464     if (PyStatus_Exception(status)) {
465         goto fail;
466     }
467 
468     status = Py_InitializeFromConfig(&config);
469     if (PyStatus_Exception(status)) {
470         goto fail;
471     }
472     PyConfig_Clear(&config);
473 
474     return 0;
475 
476 fail:
477     PyConfig_Clear(&config);
478     Py_ExitStatusException(status);
479 }
480 
481 /* Fuzz test interface.
482    This returns the bitwise or of all fuzz test's return values.
483 
484    All fuzz tests must return 0, as all nonzero return codes are reserved for
485    future use -- we propagate the return values for that future case.
486    (And we bitwise or when running multiple tests to verify that normally we
487    only return 0.) */
LLVMFuzzerTestOneInput(const uint8_t * data,size_t size)488 int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
489     assert(Py_IsInitialized());
490 
491     int rv = 0;
492 
493 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_float)
494     rv |= _run_fuzz(data, size, fuzz_builtin_float);
495 #endif
496 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_int)
497     rv |= _run_fuzz(data, size, fuzz_builtin_int);
498 #endif
499 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_unicode)
500     rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
501 #endif
502 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_struct_unpack)
503     static int STRUCT_UNPACK_INITIALIZED = 0;
504     if (!STRUCT_UNPACK_INITIALIZED && !init_struct_unpack()) {
505         PyErr_Print();
506         abort();
507     } else {
508         STRUCT_UNPACK_INITIALIZED = 1;
509     }
510     rv |= _run_fuzz(data, size, fuzz_struct_unpack);
511 #endif
512 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
513     static int JSON_LOADS_INITIALIZED = 0;
514     if (!JSON_LOADS_INITIALIZED && !init_json_loads()) {
515         PyErr_Print();
516         abort();
517     } else {
518         JSON_LOADS_INITIALIZED = 1;
519     }
520 
521     rv |= _run_fuzz(data, size, fuzz_json_loads);
522 #endif
523 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_compile)
524     static int SRE_COMPILE_INITIALIZED = 0;
525     if (!SRE_COMPILE_INITIALIZED && !init_sre_compile()) {
526         PyErr_Print();
527         abort();
528     } else {
529         SRE_COMPILE_INITIALIZED = 1;
530     }
531 
532     rv |= _run_fuzz(data, size, fuzz_sre_compile);
533 #endif
534 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_match)
535     static int SRE_MATCH_INITIALIZED = 0;
536     if (!SRE_MATCH_INITIALIZED && !init_sre_match()) {
537         PyErr_Print();
538         abort();
539     } else {
540         SRE_MATCH_INITIALIZED = 1;
541     }
542 
543     rv |= _run_fuzz(data, size, fuzz_sre_match);
544 #endif
545 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_csv_reader)
546     static int CSV_READER_INITIALIZED = 0;
547     if (!CSV_READER_INITIALIZED && !init_csv_reader()) {
548         PyErr_Print();
549         abort();
550     } else {
551         CSV_READER_INITIALIZED = 1;
552     }
553 
554     rv |= _run_fuzz(data, size, fuzz_csv_reader);
555 #endif
556 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_ast_literal_eval)
557     static int AST_LITERAL_EVAL_INITIALIZED = 0;
558     if (!AST_LITERAL_EVAL_INITIALIZED && !init_ast_literal_eval()) {
559         PyErr_Print();
560         abort();
561     } else {
562         AST_LITERAL_EVAL_INITIALIZED = 1;
563     }
564 
565     rv |= _run_fuzz(data, size, fuzz_ast_literal_eval);
566 #endif
567   return rv;
568 }
569