1 /* A fuzz test for CPython.
2
3 The only exposed function is LLVMFuzzerTestOneInput, which is called by
4 fuzzers and by the _fuzz module for smoke tests.
5
6 To build exactly one fuzz test, as when running in oss-fuzz etc.,
7 build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_<test_name>. e.g. to build
8 LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with
9 -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float.
10
11 See the source code for LLVMFuzzerTestOneInput for details. */
12
13 #include <Python.h>
14 #include <stdlib.h>
15 #include <inttypes.h>
16
17 /* Fuzz PyFloat_FromString as a proxy for float(str). */
fuzz_builtin_float(const char * data,size_t size)18 static int fuzz_builtin_float(const char* data, size_t size) {
19 PyObject* s = PyBytes_FromStringAndSize(data, size);
20 if (s == NULL) return 0;
21 PyObject* f = PyFloat_FromString(s);
22 if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) {
23 PyErr_Clear();
24 }
25
26 Py_XDECREF(f);
27 Py_DECREF(s);
28 return 0;
29 }
30
31 #define MAX_INT_TEST_SIZE 0x10000
32
33 /* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */
fuzz_builtin_int(const char * data,size_t size)34 static int fuzz_builtin_int(const char* data, size_t size) {
35 /* Ignore test cases with very long ints to avoid timeouts
36 int("9" * 1000000) is not a very interesting test caase */
37 if (size > MAX_INT_TEST_SIZE) {
38 return 0;
39 }
40 /* Pick a random valid base. (When the fuzzed function takes extra
41 parameters, it's somewhat normal to hash the input to generate those
42 parameters. We want to exercise all code paths, so we do so here.) */
43 int base = _Py_HashBytes(data, size) % 37;
44 if (base == 1) {
45 // 1 is the only number between 0 and 36 that is not a valid base.
46 base = 0;
47 }
48 if (base == -1) {
49 return 0; // An error occurred, bail early.
50 }
51 if (base < 0) {
52 base = -base;
53 }
54
55 PyObject* s = PyUnicode_FromStringAndSize(data, size);
56 if (s == NULL) {
57 if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
58 PyErr_Clear();
59 }
60 return 0;
61 }
62 PyObject* l = PyLong_FromUnicodeObject(s, base);
63 if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
64 PyErr_Clear();
65 }
66 PyErr_Clear();
67 Py_XDECREF(l);
68 Py_DECREF(s);
69 return 0;
70 }
71
72 /* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */
fuzz_builtin_unicode(const char * data,size_t size)73 static int fuzz_builtin_unicode(const char* data, size_t size) {
74 PyObject* s = PyUnicode_FromStringAndSize(data, size);
75 if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
76 PyErr_Clear();
77 }
78 Py_XDECREF(s);
79 return 0;
80 }
81
82
83 PyObject* struct_unpack_method = NULL;
84 PyObject* struct_error = NULL;
85 /* Called by LLVMFuzzerTestOneInput for initialization */
init_struct_unpack(void)86 static int init_struct_unpack(void) {
87 /* Import struct.unpack */
88 PyObject* struct_module = PyImport_ImportModule("struct");
89 if (struct_module == NULL) {
90 return 0;
91 }
92 struct_error = PyObject_GetAttrString(struct_module, "error");
93 if (struct_error == NULL) {
94 return 0;
95 }
96 struct_unpack_method = PyObject_GetAttrString(struct_module, "unpack");
97 return struct_unpack_method != NULL;
98 }
99 /* Fuzz struct.unpack(x, y) */
fuzz_struct_unpack(const char * data,size_t size)100 static int fuzz_struct_unpack(const char* data, size_t size) {
101 /* Everything up to the first null byte is considered the
102 format. Everything after is the buffer */
103 const char* first_null = memchr(data, '\0', size);
104 if (first_null == NULL) {
105 return 0;
106 }
107
108 size_t format_length = first_null - data;
109 size_t buffer_length = size - format_length - 1;
110
111 PyObject* pattern = PyBytes_FromStringAndSize(data, format_length);
112 if (pattern == NULL) {
113 return 0;
114 }
115 PyObject* buffer = PyBytes_FromStringAndSize(first_null + 1, buffer_length);
116 if (buffer == NULL) {
117 Py_DECREF(pattern);
118 return 0;
119 }
120
121 PyObject* unpacked = PyObject_CallFunctionObjArgs(
122 struct_unpack_method, pattern, buffer, NULL);
123 /* Ignore any overflow errors, these are easily triggered accidentally */
124 if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_OverflowError)) {
125 PyErr_Clear();
126 }
127 /* The pascal format string will throw a negative size when passing 0
128 like: struct.unpack('0p', b'') */
129 if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_SystemError)) {
130 PyErr_Clear();
131 }
132 /* Ignore any struct.error exceptions, these can be caused by invalid
133 formats or incomplete buffers both of which are common. */
134 if (unpacked == NULL && PyErr_ExceptionMatches(struct_error)) {
135 PyErr_Clear();
136 }
137
138 Py_XDECREF(unpacked);
139 Py_DECREF(pattern);
140 Py_DECREF(buffer);
141 return 0;
142 }
143
144
145 #define MAX_JSON_TEST_SIZE 0x10000
146
147 PyObject* json_loads_method = NULL;
148 /* Called by LLVMFuzzerTestOneInput for initialization */
init_json_loads(void)149 static int init_json_loads(void) {
150 /* Import json.loads */
151 PyObject* json_module = PyImport_ImportModule("json");
152 if (json_module == NULL) {
153 return 0;
154 }
155 json_loads_method = PyObject_GetAttrString(json_module, "loads");
156 return json_loads_method != NULL;
157 }
158 /* Fuzz json.loads(x) */
fuzz_json_loads(const char * data,size_t size)159 static int fuzz_json_loads(const char* data, size_t size) {
160 /* Since python supports arbitrarily large ints in JSON,
161 long inputs can lead to timeouts on boring inputs like
162 `json.loads("9" * 100000)` */
163 if (size > MAX_JSON_TEST_SIZE) {
164 return 0;
165 }
166 PyObject* input_bytes = PyBytes_FromStringAndSize(data, size);
167 if (input_bytes == NULL) {
168 return 0;
169 }
170 PyObject* parsed = PyObject_CallOneArg(json_loads_method, input_bytes);
171 if (parsed == NULL) {
172 /* Ignore ValueError as the fuzzer will more than likely
173 generate some invalid json and values */
174 if (PyErr_ExceptionMatches(PyExc_ValueError) ||
175 /* Ignore RecursionError as the fuzzer generates long sequences of
176 arrays such as `[[[...` */
177 PyErr_ExceptionMatches(PyExc_RecursionError) ||
178 /* Ignore unicode errors, invalid byte sequences are common */
179 PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)
180 ) {
181 PyErr_Clear();
182 }
183 }
184 Py_DECREF(input_bytes);
185 Py_XDECREF(parsed);
186 return 0;
187 }
188
189 #define MAX_RE_TEST_SIZE 0x10000
190
191 PyObject* sre_compile_method = NULL;
192 PyObject* sre_error_exception = NULL;
193 int SRE_FLAG_DEBUG = 0;
194 /* Called by LLVMFuzzerTestOneInput for initialization */
init_sre_compile(void)195 static int init_sre_compile(void) {
196 /* Import sre_compile.compile and sre.error */
197 PyObject* sre_compile_module = PyImport_ImportModule("sre_compile");
198 if (sre_compile_module == NULL) {
199 return 0;
200 }
201 sre_compile_method = PyObject_GetAttrString(sre_compile_module, "compile");
202 if (sre_compile_method == NULL) {
203 return 0;
204 }
205
206 PyObject* sre_constants = PyImport_ImportModule("sre_constants");
207 if (sre_constants == NULL) {
208 return 0;
209 }
210 sre_error_exception = PyObject_GetAttrString(sre_constants, "error");
211 if (sre_error_exception == NULL) {
212 return 0;
213 }
214 PyObject* debug_flag = PyObject_GetAttrString(sre_constants, "SRE_FLAG_DEBUG");
215 if (debug_flag == NULL) {
216 return 0;
217 }
218 SRE_FLAG_DEBUG = PyLong_AsLong(debug_flag);
219 return 1;
220 }
221 /* Fuzz _sre.compile(x) */
fuzz_sre_compile(const char * data,size_t size)222 static int fuzz_sre_compile(const char* data, size_t size) {
223 /* Ignore really long regex patterns that will timeout the fuzzer */
224 if (size > MAX_RE_TEST_SIZE) {
225 return 0;
226 }
227 /* We treat the first 2 bytes of the input as a number for the flags */
228 if (size < 2) {
229 return 0;
230 }
231 uint16_t flags = ((uint16_t*) data)[0];
232 /* We remove the SRE_FLAG_DEBUG if present. This is because it
233 prints to stdout which greatly decreases fuzzing speed */
234 flags &= ~SRE_FLAG_DEBUG;
235
236 /* Pull the pattern from the remaining bytes */
237 PyObject* pattern_bytes = PyBytes_FromStringAndSize(data + 2, size - 2);
238 if (pattern_bytes == NULL) {
239 return 0;
240 }
241 PyObject* flags_obj = PyLong_FromUnsignedLong(flags);
242 if (flags_obj == NULL) {
243 Py_DECREF(pattern_bytes);
244 return 0;
245 }
246
247 /* compiled = _sre.compile(data[2:], data[0:2] */
248 PyObject* compiled = PyObject_CallFunctionObjArgs(
249 sre_compile_method, pattern_bytes, flags_obj, NULL);
250 /* Ignore ValueError as the fuzzer will more than likely
251 generate some invalid combination of flags */
252 if (compiled == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
253 PyErr_Clear();
254 }
255 /* Ignore some common errors thrown by sre_parse:
256 Overflow, Assertion, Recursion and Index */
257 if (compiled == NULL && (PyErr_ExceptionMatches(PyExc_OverflowError) ||
258 PyErr_ExceptionMatches(PyExc_AssertionError) ||
259 PyErr_ExceptionMatches(PyExc_RecursionError) ||
260 PyErr_ExceptionMatches(PyExc_IndexError))
261 ) {
262 PyErr_Clear();
263 }
264 /* Ignore re.error */
265 if (compiled == NULL && PyErr_ExceptionMatches(sre_error_exception)) {
266 PyErr_Clear();
267 }
268
269 Py_DECREF(pattern_bytes);
270 Py_DECREF(flags_obj);
271 Py_XDECREF(compiled);
272 return 0;
273 }
274
275 /* Some random patterns used to test re.match.
276 Be careful not to add catostraphically slow regexes here, we want to
277 exercise the matching code without causing timeouts.*/
278 static const char* regex_patterns[] = {
279 ".", "^", "abc", "abc|def", "^xxx$", "\\b", "()", "[a-zA-Z0-9]",
280 "abc+", "[^A-Z]", "[x]", "(?=)", "a{z}", "a+b", "a*?", "a??", "a+?",
281 "{}", "a{,}", "{", "}", "^\\(*\\d{3}\\)*( |-)*\\d{3}( |-)*\\d{4}$",
282 "(?:a*)*", "a{1,2}?"
283 };
284 const size_t NUM_PATTERNS = sizeof(regex_patterns) / sizeof(regex_patterns[0]);
285 PyObject** compiled_patterns = NULL;
286 /* Called by LLVMFuzzerTestOneInput for initialization */
init_sre_match(void)287 static int init_sre_match(void) {
288 PyObject* re_module = PyImport_ImportModule("re");
289 if (re_module == NULL) {
290 return 0;
291 }
292 compiled_patterns = (PyObject**) PyMem_RawMalloc(
293 sizeof(PyObject*) * NUM_PATTERNS);
294 if (compiled_patterns == NULL) {
295 PyErr_NoMemory();
296 return 0;
297 }
298
299 /* Precompile all the regex patterns on the first run for faster fuzzing */
300 for (size_t i = 0; i < NUM_PATTERNS; i++) {
301 PyObject* compiled = PyObject_CallMethod(
302 re_module, "compile", "y", regex_patterns[i]);
303 /* Bail if any of the patterns fail to compile */
304 if (compiled == NULL) {
305 return 0;
306 }
307 compiled_patterns[i] = compiled;
308 }
309 return 1;
310 }
311 /* Fuzz re.match(x) */
fuzz_sre_match(const char * data,size_t size)312 static int fuzz_sre_match(const char* data, size_t size) {
313 if (size < 1 || size > MAX_RE_TEST_SIZE) {
314 return 0;
315 }
316 /* Use the first byte as a uint8_t specifying the index of the
317 regex to use */
318 unsigned char idx = (unsigned char) data[0];
319 idx = idx % NUM_PATTERNS;
320
321 /* Pull the string to match from the remaining bytes */
322 PyObject* to_match = PyBytes_FromStringAndSize(data + 1, size - 1);
323 if (to_match == NULL) {
324 return 0;
325 }
326
327 PyObject* pattern = compiled_patterns[idx];
328 PyObject* match_callable = PyObject_GetAttrString(pattern, "match");
329
330 PyObject* matches = PyObject_CallOneArg(match_callable, to_match);
331
332 Py_XDECREF(matches);
333 Py_DECREF(match_callable);
334 Py_DECREF(to_match);
335 return 0;
336 }
337
338 #define MAX_CSV_TEST_SIZE 0x10000
339 PyObject* csv_module = NULL;
340 PyObject* csv_error = NULL;
341 /* Called by LLVMFuzzerTestOneInput for initialization */
init_csv_reader(void)342 static int init_csv_reader(void) {
343 /* Import csv and csv.Error */
344 csv_module = PyImport_ImportModule("csv");
345 if (csv_module == NULL) {
346 return 0;
347 }
348 csv_error = PyObject_GetAttrString(csv_module, "Error");
349 return csv_error != NULL;
350 }
351 /* Fuzz csv.reader([x]) */
fuzz_csv_reader(const char * data,size_t size)352 static int fuzz_csv_reader(const char* data, size_t size) {
353 if (size < 1 || size > MAX_CSV_TEST_SIZE) {
354 return 0;
355 }
356 /* Ignore non null-terminated strings since _csv can't handle
357 embedded nulls */
358 if (memchr(data, '\0', size) == NULL) {
359 return 0;
360 }
361
362 PyObject* s = PyUnicode_FromString(data);
363 /* Ignore exceptions until we have a valid string */
364 if (s == NULL) {
365 PyErr_Clear();
366 return 0;
367 }
368
369 /* Split on \n so we can test multiple lines */
370 PyObject* lines = PyObject_CallMethod(s, "split", "s", "\n");
371 if (lines == NULL) {
372 Py_DECREF(s);
373 return 0;
374 }
375
376 PyObject* reader = PyObject_CallMethod(csv_module, "reader", "N", lines);
377 if (reader) {
378 /* Consume all of the reader as an iterator */
379 PyObject* parsed_line;
380 while ((parsed_line = PyIter_Next(reader))) {
381 Py_DECREF(parsed_line);
382 }
383 }
384
385 /* Ignore csv.Error because we're probably going to generate
386 some bad files (embedded new-lines, unterminated quotes etc) */
387 if (PyErr_ExceptionMatches(csv_error)) {
388 PyErr_Clear();
389 }
390
391 Py_XDECREF(reader);
392 Py_DECREF(s);
393 return 0;
394 }
395
396 #define MAX_AST_LITERAL_EVAL_TEST_SIZE 0x10000
397 PyObject* ast_literal_eval_method = NULL;
398 /* Called by LLVMFuzzerTestOneInput for initialization */
init_ast_literal_eval(void)399 static int init_ast_literal_eval(void) {
400 PyObject* ast_module = PyImport_ImportModule("ast");
401 if (ast_module == NULL) {
402 return 0;
403 }
404 ast_literal_eval_method = PyObject_GetAttrString(ast_module, "literal_eval");
405 return ast_literal_eval_method != NULL;
406 }
407 /* Fuzz ast.literal_eval(x) */
fuzz_ast_literal_eval(const char * data,size_t size)408 static int fuzz_ast_literal_eval(const char* data, size_t size) {
409 if (size > MAX_AST_LITERAL_EVAL_TEST_SIZE) {
410 return 0;
411 }
412 /* Ignore non null-terminated strings since ast can't handle
413 embedded nulls */
414 if (memchr(data, '\0', size) == NULL) {
415 return 0;
416 }
417
418 PyObject* s = PyUnicode_FromString(data);
419 /* Ignore exceptions until we have a valid string */
420 if (s == NULL) {
421 PyErr_Clear();
422 return 0;
423 }
424
425 PyObject* literal = PyObject_CallOneArg(ast_literal_eval_method, s);
426 /* Ignore some common errors thrown by ast.literal_eval */
427 if (literal == NULL && (PyErr_ExceptionMatches(PyExc_ValueError) ||
428 PyErr_ExceptionMatches(PyExc_TypeError) ||
429 PyErr_ExceptionMatches(PyExc_SyntaxError) ||
430 PyErr_ExceptionMatches(PyExc_MemoryError) ||
431 PyErr_ExceptionMatches(PyExc_RecursionError))
432 ) {
433 PyErr_Clear();
434 }
435
436 Py_XDECREF(literal);
437 Py_DECREF(s);
438 return 0;
439 }
440
441 /* Run fuzzer and abort on failure. */
_run_fuzz(const uint8_t * data,size_t size,int (* fuzzer)(const char *,size_t))442 static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
443 int rv = fuzzer((const char*) data, size);
444 if (PyErr_Occurred()) {
445 /* Fuzz tests should handle expected errors for themselves.
446 This is last-ditch check in case they didn't. */
447 PyErr_Print();
448 abort();
449 }
450 /* Someday the return value might mean something, propagate it. */
451 return rv;
452 }
453
454 /* CPython generates a lot of leak warnings for whatever reason. */
__lsan_is_turned_off(void)455 int __lsan_is_turned_off(void) { return 1; }
456
457
LLVMFuzzerInitialize(int * argc,char *** argv)458 int LLVMFuzzerInitialize(int *argc, char ***argv) {
459 PyConfig config;
460 PyConfig_InitPythonConfig(&config);
461 config.install_signal_handlers = 0;
462 PyStatus status;
463 status = PyConfig_SetBytesString(&config, &config.program_name, *argv[0]);
464 if (PyStatus_Exception(status)) {
465 goto fail;
466 }
467
468 status = Py_InitializeFromConfig(&config);
469 if (PyStatus_Exception(status)) {
470 goto fail;
471 }
472 PyConfig_Clear(&config);
473
474 return 0;
475
476 fail:
477 PyConfig_Clear(&config);
478 Py_ExitStatusException(status);
479 }
480
481 /* Fuzz test interface.
482 This returns the bitwise or of all fuzz test's return values.
483
484 All fuzz tests must return 0, as all nonzero return codes are reserved for
485 future use -- we propagate the return values for that future case.
486 (And we bitwise or when running multiple tests to verify that normally we
487 only return 0.) */
LLVMFuzzerTestOneInput(const uint8_t * data,size_t size)488 int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
489 assert(Py_IsInitialized());
490
491 int rv = 0;
492
493 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_float)
494 rv |= _run_fuzz(data, size, fuzz_builtin_float);
495 #endif
496 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_int)
497 rv |= _run_fuzz(data, size, fuzz_builtin_int);
498 #endif
499 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_unicode)
500 rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
501 #endif
502 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_struct_unpack)
503 static int STRUCT_UNPACK_INITIALIZED = 0;
504 if (!STRUCT_UNPACK_INITIALIZED && !init_struct_unpack()) {
505 PyErr_Print();
506 abort();
507 } else {
508 STRUCT_UNPACK_INITIALIZED = 1;
509 }
510 rv |= _run_fuzz(data, size, fuzz_struct_unpack);
511 #endif
512 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
513 static int JSON_LOADS_INITIALIZED = 0;
514 if (!JSON_LOADS_INITIALIZED && !init_json_loads()) {
515 PyErr_Print();
516 abort();
517 } else {
518 JSON_LOADS_INITIALIZED = 1;
519 }
520
521 rv |= _run_fuzz(data, size, fuzz_json_loads);
522 #endif
523 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_compile)
524 static int SRE_COMPILE_INITIALIZED = 0;
525 if (!SRE_COMPILE_INITIALIZED && !init_sre_compile()) {
526 PyErr_Print();
527 abort();
528 } else {
529 SRE_COMPILE_INITIALIZED = 1;
530 }
531
532 rv |= _run_fuzz(data, size, fuzz_sre_compile);
533 #endif
534 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_match)
535 static int SRE_MATCH_INITIALIZED = 0;
536 if (!SRE_MATCH_INITIALIZED && !init_sre_match()) {
537 PyErr_Print();
538 abort();
539 } else {
540 SRE_MATCH_INITIALIZED = 1;
541 }
542
543 rv |= _run_fuzz(data, size, fuzz_sre_match);
544 #endif
545 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_csv_reader)
546 static int CSV_READER_INITIALIZED = 0;
547 if (!CSV_READER_INITIALIZED && !init_csv_reader()) {
548 PyErr_Print();
549 abort();
550 } else {
551 CSV_READER_INITIALIZED = 1;
552 }
553
554 rv |= _run_fuzz(data, size, fuzz_csv_reader);
555 #endif
556 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_ast_literal_eval)
557 static int AST_LITERAL_EVAL_INITIALIZED = 0;
558 if (!AST_LITERAL_EVAL_INITIALIZED && !init_ast_literal_eval()) {
559 PyErr_Print();
560 abort();
561 } else {
562 AST_LITERAL_EVAL_INITIALIZED = 1;
563 }
564
565 rv |= _run_fuzz(data, size, fuzz_ast_literal_eval);
566 #endif
567 return rv;
568 }
569