1#!/usr/bin/env python 2 3# ################################################################ 4# Copyright (c) Meta Platforms, Inc. and affiliates. 5# All rights reserved. 6# 7# This source code is licensed under both the BSD-style license (found in the 8# LICENSE file in the root directory of this source tree) and the GPLv2 (found 9# in the COPYING file in the root directory of this source tree). 10# You may select, at your option, one of the above-listed licenses. 11# ########################################################################## 12 13import argparse 14import contextlib 15import os 16import re 17import shlex 18import shutil 19import subprocess 20import sys 21import tempfile 22 23 24def abs_join(a, *p): 25 return os.path.abspath(os.path.join(a, *p)) 26 27 28class InputType(object): 29 RAW_DATA = 1 30 COMPRESSED_DATA = 2 31 DICTIONARY_DATA = 3 32 33 34class FrameType(object): 35 ZSTD = 1 36 BLOCK = 2 37 38 39class TargetInfo(object): 40 def __init__(self, input_type, frame_type=FrameType.ZSTD): 41 self.input_type = input_type 42 self.frame_type = frame_type 43 44 45# Constants 46FUZZ_DIR = os.path.abspath(os.path.dirname(__file__)) 47CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora') 48TARGET_INFO = { 49 'simple_round_trip': TargetInfo(InputType.RAW_DATA), 50 'stream_round_trip': TargetInfo(InputType.RAW_DATA), 51 'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK), 52 'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA), 53 'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA), 54 'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK), 55 'dictionary_round_trip': TargetInfo(InputType.RAW_DATA), 56 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA), 57 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA), 58 'simple_compress': TargetInfo(InputType.RAW_DATA), 59 'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA), 60 'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA), 61 'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA), 62 'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA), 63 'fse_read_ncount': TargetInfo(InputType.RAW_DATA), 64 'sequence_compression_api': TargetInfo(InputType.RAW_DATA), 65 'seekable_roundtrip': TargetInfo(InputType.RAW_DATA), 66 'huf_round_trip': TargetInfo(InputType.RAW_DATA), 67 'huf_decompress': TargetInfo(InputType.RAW_DATA), 68 'decompress_cross_format': TargetInfo(InputType.RAW_DATA), 69 'generate_sequences': TargetInfo(InputType.RAW_DATA), 70} 71TARGETS = list(TARGET_INFO.keys()) 72ALL_TARGETS = TARGETS + ['all'] 73FUZZ_RNG_SEED_SIZE = 4 74 75# Standard environment variables 76CC = os.environ.get('CC', 'cc') 77CXX = os.environ.get('CXX', 'c++') 78CPPFLAGS = os.environ.get('CPPFLAGS', '') 79CFLAGS = os.environ.get('CFLAGS', '-O3') 80CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS) 81LDFLAGS = os.environ.get('LDFLAGS', '') 82MFLAGS = os.environ.get('MFLAGS', '-j') 83THIRD_PARTY_SEQ_PROD_OBJ = os.environ.get('THIRD_PARTY_SEQ_PROD_OBJ', '') 84 85# Fuzzing environment variables 86LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a') 87AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz') 88DECODECORPUS = os.environ.get('DECODECORPUS', 89 abs_join(FUZZ_DIR, '..', 'decodecorpus')) 90ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd')) 91 92# Sanitizer environment variables 93MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '') 94MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '') 95MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '') 96MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '') 97 98 99def create(r): 100 d = os.path.abspath(r) 101 if not os.path.isdir(d): 102 os.makedirs(d) 103 return d 104 105 106def check(r): 107 d = os.path.abspath(r) 108 if not os.path.isdir(d): 109 return None 110 return d 111 112 113@contextlib.contextmanager 114def tmpdir(): 115 dirpath = tempfile.mkdtemp() 116 try: 117 yield dirpath 118 finally: 119 shutil.rmtree(dirpath, ignore_errors=True) 120 121 122def parse_targets(in_targets): 123 targets = set() 124 for target in in_targets: 125 if not target: 126 continue 127 if target == 'all': 128 targets = targets.union(TARGETS) 129 elif target in TARGETS: 130 targets.add(target) 131 else: 132 raise RuntimeError('{} is not a valid target'.format(target)) 133 return list(targets) 134 135 136def targets_parser(args, description): 137 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 138 parser.add_argument( 139 'TARGET', 140 nargs='*', 141 type=str, 142 help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))) 143 args, extra = parser.parse_known_args(args) 144 args.extra = extra 145 146 args.TARGET = parse_targets(args.TARGET) 147 148 return args 149 150 151def parse_env_flags(args, flags): 152 """ 153 Look for flags set by environment variables. 154 """ 155 san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags)) 156 nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags)) 157 158 def set_sanitizer(sanitizer, default, san, nosan): 159 if sanitizer in san and sanitizer in nosan: 160 raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'. 161 format(s=sanitizer)) 162 if sanitizer in san: 163 return True 164 if sanitizer in nosan: 165 return False 166 return default 167 168 san = set(san_flags.split(',')) 169 nosan = set(nosan_flags.split(',')) 170 171 args.asan = set_sanitizer('address', args.asan, san, nosan) 172 args.msan = set_sanitizer('memory', args.msan, san, nosan) 173 args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan) 174 175 args.sanitize = args.asan or args.msan or args.ubsan 176 177 return args 178 179 180def compiler_version(cc, cxx): 181 """ 182 Determines the compiler and version. 183 Only works for clang and gcc. 184 """ 185 cc_version_bytes = subprocess.check_output([cc, "--version"]) 186 cxx_version_bytes = subprocess.check_output([cxx, "--version"]) 187 compiler = None 188 version = None 189 print("{} --version:\n{}".format(cc, cc_version_bytes.decode('ascii'))) 190 if b'clang' in cc_version_bytes: 191 assert(b'clang' in cxx_version_bytes) 192 compiler = 'clang' 193 elif b'gcc' in cc_version_bytes or b'GCC' in cc_version_bytes: 194 assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes) 195 compiler = 'gcc' 196 if compiler is not None: 197 version_regex = b'([0-9]+)\.([0-9]+)\.([0-9]+)' 198 version_match = re.search(version_regex, cc_version_bytes) 199 version = tuple(int(version_match.group(i)) for i in range(1, 4)) 200 return compiler, version 201 202 203def overflow_ubsan_flags(cc, cxx): 204 compiler, version = compiler_version(cc, cxx) 205 if compiler == 'gcc' and version < (8, 0, 0): 206 return ['-fno-sanitize=signed-integer-overflow'] 207 if compiler == 'gcc' or (compiler == 'clang' and version >= (5, 0, 0)): 208 return ['-fno-sanitize=pointer-overflow'] 209 return [] 210 211 212def build_parser(args): 213 description = """ 214 Cleans the repository and builds a fuzz target (or all). 215 Many flags default to environment variables (default says $X='y'). 216 Options that aren't enabling features default to the correct values for 217 zstd. 218 Enable sanitizers with --enable-*san. 219 For regression testing just build. 220 For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage. 221 For AFL set CC and CXX to AFL's compilers and set 222 LIB_FUZZING_ENGINE='libregression.a'. 223 """ 224 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 225 parser.add_argument( 226 '--lib-fuzzing-engine', 227 dest='lib_fuzzing_engine', 228 type=str, 229 default=LIB_FUZZING_ENGINE, 230 help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a ' 231 "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE))) 232 233 fuzz_group = parser.add_mutually_exclusive_group() 234 fuzz_group.add_argument( 235 '--enable-coverage', 236 dest='coverage', 237 action='store_true', 238 help='Enable coverage instrumentation (-fsanitize-coverage)') 239 fuzz_group.add_argument( 240 '--enable-fuzzer', 241 dest='fuzzer', 242 action='store_true', 243 help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled ' 244 'LIB_FUZZING_ENGINE is ignored') 245 ) 246 247 parser.add_argument( 248 '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN') 249 parser.add_argument( 250 '--enable-ubsan', 251 dest='ubsan', 252 action='store_true', 253 help='Enable UBSAN') 254 parser.add_argument( 255 '--disable-ubsan-pointer-overflow', 256 dest='ubsan_pointer_overflow', 257 action='store_false', 258 help='Disable UBSAN pointer overflow check (known failure)') 259 parser.add_argument( 260 '--enable-msan', dest='msan', action='store_true', help='Enable MSAN') 261 parser.add_argument( 262 '--enable-msan-track-origins', dest='msan_track_origins', 263 action='store_true', help='Enable MSAN origin tracking') 264 parser.add_argument( 265 '--msan-extra-cppflags', 266 dest='msan_extra_cppflags', 267 type=str, 268 default=MSAN_EXTRA_CPPFLAGS, 269 help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')". 270 format(MSAN_EXTRA_CPPFLAGS)) 271 parser.add_argument( 272 '--msan-extra-cflags', 273 dest='msan_extra_cflags', 274 type=str, 275 default=MSAN_EXTRA_CFLAGS, 276 help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format( 277 MSAN_EXTRA_CFLAGS)) 278 parser.add_argument( 279 '--msan-extra-cxxflags', 280 dest='msan_extra_cxxflags', 281 type=str, 282 default=MSAN_EXTRA_CXXFLAGS, 283 help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')". 284 format(MSAN_EXTRA_CXXFLAGS)) 285 parser.add_argument( 286 '--msan-extra-ldflags', 287 dest='msan_extra_ldflags', 288 type=str, 289 default=MSAN_EXTRA_LDFLAGS, 290 help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')". 291 format(MSAN_EXTRA_LDFLAGS)) 292 parser.add_argument( 293 '--enable-sanitize-recover', 294 dest='sanitize_recover', 295 action='store_true', 296 help='Non-fatal sanitizer errors where possible') 297 parser.add_argument( 298 '--debug', 299 dest='debug', 300 type=int, 301 default=1, 302 help='Set DEBUGLEVEL (default: 1)') 303 parser.add_argument( 304 '--force-memory-access', 305 dest='memory_access', 306 type=int, 307 default=0, 308 help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)') 309 parser.add_argument( 310 '--fuzz-rng-seed-size', 311 dest='fuzz_rng_seed_size', 312 type=int, 313 default=4, 314 help='Set FUZZ_RNG_SEED_SIZE (default: 4)') 315 parser.add_argument( 316 '--disable-fuzzing-mode', 317 dest='fuzzing_mode', 318 action='store_false', 319 help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION') 320 parser.add_argument( 321 '--enable-stateful-fuzzing', 322 dest='stateful_fuzzing', 323 action='store_true', 324 help='Reuse contexts between runs (makes reproduction impossible)') 325 parser.add_argument( 326 '--custom-seq-prod', 327 dest='third_party_seq_prod_obj', 328 type=str, 329 default=THIRD_PARTY_SEQ_PROD_OBJ, 330 help='Path to an object file with symbols for fuzzing your sequence producer plugin.') 331 parser.add_argument( 332 '--cc', 333 dest='cc', 334 type=str, 335 default=CC, 336 help="CC (default: $CC='{}')".format(CC)) 337 parser.add_argument( 338 '--cxx', 339 dest='cxx', 340 type=str, 341 default=CXX, 342 help="CXX (default: $CXX='{}')".format(CXX)) 343 parser.add_argument( 344 '--cppflags', 345 dest='cppflags', 346 type=str, 347 default=CPPFLAGS, 348 help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS)) 349 parser.add_argument( 350 '--cflags', 351 dest='cflags', 352 type=str, 353 default=CFLAGS, 354 help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS)) 355 parser.add_argument( 356 '--cxxflags', 357 dest='cxxflags', 358 type=str, 359 default=CXXFLAGS, 360 help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS)) 361 parser.add_argument( 362 '--ldflags', 363 dest='ldflags', 364 type=str, 365 default=LDFLAGS, 366 help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS)) 367 parser.add_argument( 368 '--mflags', 369 dest='mflags', 370 type=str, 371 default=MFLAGS, 372 help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS)) 373 parser.add_argument( 374 'TARGET', 375 nargs='*', 376 type=str, 377 help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)) 378 ) 379 args = parser.parse_args(args) 380 args = parse_env_flags(args, ' '.join( 381 [args.cppflags, args.cflags, args.cxxflags, args.ldflags])) 382 383 # Check option sanity 384 if args.msan and (args.asan or args.ubsan): 385 raise RuntimeError('MSAN may not be used with any other sanitizers') 386 if args.msan_track_origins and not args.msan: 387 raise RuntimeError('--enable-msan-track-origins requires MSAN') 388 if args.sanitize_recover and not args.sanitize: 389 raise RuntimeError('--enable-sanitize-recover but no sanitizers used') 390 391 return args 392 393 394def build(args): 395 try: 396 args = build_parser(args) 397 except Exception as e: 398 print(e) 399 return 1 400 # The compilation flags we are setting 401 targets = args.TARGET 402 cc = args.cc 403 cxx = args.cxx 404 cppflags = shlex.split(args.cppflags) 405 cflags = shlex.split(args.cflags) 406 ldflags = shlex.split(args.ldflags) 407 cxxflags = shlex.split(args.cxxflags) 408 mflags = shlex.split(args.mflags) 409 # Flags to be added to both cflags and cxxflags 410 common_flags = [ 411 '-Werror', 412 '-Wno-error=declaration-after-statement', 413 '-Wno-error=c++-compat', 414 '-Wno-error=deprecated' # C files are sometimes compiled with CXX 415 ] 416 417 cppflags += [ 418 '-DDEBUGLEVEL={}'.format(args.debug), 419 '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access), 420 '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size), 421 ] 422 423 # Set flags for options 424 assert not (args.fuzzer and args.coverage) 425 if args.coverage: 426 common_flags += [ 427 '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp' 428 ] 429 if args.fuzzer: 430 common_flags += ['-fsanitize=fuzzer'] 431 args.lib_fuzzing_engine = '' 432 433 mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)] 434 435 if args.sanitize_recover: 436 recover_flags = ['-fsanitize-recover=all'] 437 else: 438 recover_flags = ['-fno-sanitize-recover=all'] 439 if args.sanitize: 440 common_flags += recover_flags 441 442 if args.msan: 443 msan_flags = ['-fsanitize=memory'] 444 if args.msan_track_origins: 445 msan_flags += ['-fsanitize-memory-track-origins'] 446 common_flags += msan_flags 447 # Append extra MSAN flags (it might require special setup) 448 cppflags += [args.msan_extra_cppflags] 449 cflags += [args.msan_extra_cflags] 450 cxxflags += [args.msan_extra_cxxflags] 451 ldflags += [args.msan_extra_ldflags] 452 453 if args.asan: 454 common_flags += ['-fsanitize=address'] 455 456 if args.ubsan: 457 ubsan_flags = ['-fsanitize=undefined'] 458 if not args.ubsan_pointer_overflow: 459 ubsan_flags += overflow_ubsan_flags(cc, cxx) 460 common_flags += ubsan_flags 461 462 if args.stateful_fuzzing: 463 cppflags += ['-DSTATEFUL_FUZZING'] 464 465 if args.third_party_seq_prod_obj: 466 cppflags += ['-DFUZZ_THIRD_PARTY_SEQ_PROD'] 467 mflags += ['THIRD_PARTY_SEQ_PROD_OBJ={}'.format(args.third_party_seq_prod_obj)] 468 469 if args.fuzzing_mode: 470 cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION'] 471 472 if args.lib_fuzzing_engine == 'libregression.a': 473 targets = ['libregression.a'] + targets 474 475 # Append the common flags 476 cflags += common_flags 477 cxxflags += common_flags 478 479 # Prepare the flags for Make 480 cc_str = "CC={}".format(cc) 481 cxx_str = "CXX={}".format(cxx) 482 cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags)) 483 cflags_str = "CFLAGS={}".format(' '.join(cflags)) 484 cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags)) 485 ldflags_str = "LDFLAGS={}".format(' '.join(ldflags)) 486 487 # Print the flags 488 print('MFLAGS={}'.format(' '.join(mflags))) 489 print(cc_str) 490 print(cxx_str) 491 print(cppflags_str) 492 print(cflags_str) 493 print(cxxflags_str) 494 print(ldflags_str) 495 496 # Clean and build 497 clean_cmd = ['make', 'clean'] + mflags 498 print(' '.join(clean_cmd)) 499 subprocess.check_call(clean_cmd) 500 build_cmd = [ 501 'make', 502 '-j', 503 cc_str, 504 cxx_str, 505 cppflags_str, 506 cflags_str, 507 cxxflags_str, 508 ldflags_str, 509 ] + mflags + targets 510 print(' '.join(build_cmd)) 511 subprocess.check_call(build_cmd) 512 return 0 513 514 515def libfuzzer_parser(args): 516 description = """ 517 Runs a libfuzzer binary. 518 Passes all extra arguments to libfuzzer. 519 The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to 520 libFuzzer.a. 521 Generates output in the CORPORA directory, puts crashes in the ARTIFACT 522 directory, and takes extra input from the SEED directory. 523 To merge AFL's output pass the SEED as AFL's output directory and pass 524 '-merge=1'. 525 """ 526 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 527 parser.add_argument( 528 '--corpora', 529 type=str, 530 help='Override the default corpora dir (default: {})'.format( 531 abs_join(CORPORA_DIR, 'TARGET'))) 532 parser.add_argument( 533 '--artifact', 534 type=str, 535 help='Override the default artifact dir (default: {})'.format( 536 abs_join(CORPORA_DIR, 'TARGET-crash'))) 537 parser.add_argument( 538 '--seed', 539 type=str, 540 help='Override the default seed dir (default: {})'.format( 541 abs_join(CORPORA_DIR, 'TARGET-seed'))) 542 parser.add_argument( 543 'TARGET', 544 type=str, 545 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) 546 args, extra = parser.parse_known_args(args) 547 args.extra = extra 548 549 if args.TARGET and args.TARGET not in TARGETS: 550 raise RuntimeError('{} is not a valid target'.format(args.TARGET)) 551 552 return args 553 554 555def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None): 556 if corpora is None: 557 corpora = abs_join(CORPORA_DIR, target) 558 if artifact is None: 559 artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target)) 560 if seed is None: 561 seed = abs_join(CORPORA_DIR, '{}-seed'.format(target)) 562 if extra_args is None: 563 extra_args = [] 564 565 target = abs_join(FUZZ_DIR, target) 566 567 corpora = [create(corpora)] 568 artifact = create(artifact) 569 seed = check(seed) 570 571 corpora += [artifact] 572 if seed is not None: 573 corpora += [seed] 574 575 cmd = [target, '-artifact_prefix={}/'.format(artifact)] 576 cmd += corpora + extra_args 577 print(' '.join(cmd)) 578 subprocess.check_call(cmd) 579 580 581def libfuzzer_cmd(args): 582 try: 583 args = libfuzzer_parser(args) 584 except Exception as e: 585 print(e) 586 return 1 587 libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra) 588 return 0 589 590 591def afl_parser(args): 592 description = """ 593 Runs an afl-fuzz job. 594 Passes all extra arguments to afl-fuzz. 595 The fuzzer should have been built with CC/CXX set to the AFL compilers, 596 and with LIB_FUZZING_ENGINE='libregression.a'. 597 Takes input from CORPORA and writes output to OUTPUT. 598 Uses AFL_FUZZ as the binary (set from flag or environment variable). 599 """ 600 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 601 parser.add_argument( 602 '--corpora', 603 type=str, 604 help='Override the default corpora dir (default: {})'.format( 605 abs_join(CORPORA_DIR, 'TARGET'))) 606 parser.add_argument( 607 '--output', 608 type=str, 609 help='Override the default AFL output dir (default: {})'.format( 610 abs_join(CORPORA_DIR, 'TARGET-afl'))) 611 parser.add_argument( 612 '--afl-fuzz', 613 type=str, 614 default=AFL_FUZZ, 615 help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ)) 616 parser.add_argument( 617 'TARGET', 618 type=str, 619 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) 620 args, extra = parser.parse_known_args(args) 621 args.extra = extra 622 623 if args.TARGET and args.TARGET not in TARGETS: 624 raise RuntimeError('{} is not a valid target'.format(args.TARGET)) 625 626 if not args.corpora: 627 args.corpora = abs_join(CORPORA_DIR, args.TARGET) 628 if not args.output: 629 args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET)) 630 631 return args 632 633 634def afl(args): 635 try: 636 args = afl_parser(args) 637 except Exception as e: 638 print(e) 639 return 1 640 target = abs_join(FUZZ_DIR, args.TARGET) 641 642 corpora = create(args.corpora) 643 output = create(args.output) 644 645 cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra 646 cmd += [target, '@@'] 647 print(' '.join(cmd)) 648 subprocess.call(cmd) 649 return 0 650 651 652def regression(args): 653 try: 654 description = """ 655 Runs one or more regression tests. 656 The fuzzer should have been built with 657 LIB_FUZZING_ENGINE='libregression.a'. 658 Takes input from CORPORA. 659 """ 660 args = targets_parser(args, description) 661 except Exception as e: 662 print(e) 663 return 1 664 for target in args.TARGET: 665 corpora = create(abs_join(CORPORA_DIR, target)) 666 target = abs_join(FUZZ_DIR, target) 667 cmd = [target, corpora] 668 print(' '.join(cmd)) 669 subprocess.check_call(cmd) 670 return 0 671 672 673def gen_parser(args): 674 description = """ 675 Generate a seed corpus appropriate for TARGET with data generated with 676 decodecorpus. 677 The fuzz inputs are prepended with a seed before the zstd data, so the 678 output of decodecorpus shouldn't be used directly. 679 Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and 680 puts the output in SEED. 681 DECODECORPUS is the decodecorpus binary, and must already be built. 682 """ 683 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 684 parser.add_argument( 685 '--number', 686 '-n', 687 type=int, 688 default=100, 689 help='Number of samples to generate') 690 parser.add_argument( 691 '--max-size-log', 692 type=int, 693 default=18, 694 help='Maximum sample size to generate') 695 parser.add_argument( 696 '--seed', 697 type=str, 698 help='Override the default seed dir (default: {})'.format( 699 abs_join(CORPORA_DIR, 'TARGET-seed'))) 700 parser.add_argument( 701 '--decodecorpus', 702 type=str, 703 default=DECODECORPUS, 704 help="decodecorpus binary (default: $DECODECORPUS='{}')".format( 705 DECODECORPUS)) 706 parser.add_argument( 707 '--zstd', 708 type=str, 709 default=ZSTD, 710 help="zstd binary (default: $ZSTD='{}')".format(ZSTD)) 711 parser.add_argument( 712 '--fuzz-rng-seed-size', 713 type=int, 714 default=4, 715 help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)" 716 ) 717 parser.add_argument( 718 'TARGET', 719 type=str, 720 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) 721 args, extra = parser.parse_known_args(args) 722 args.extra = extra 723 724 if args.TARGET and args.TARGET not in TARGETS: 725 raise RuntimeError('{} is not a valid target'.format(args.TARGET)) 726 727 if not args.seed: 728 args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET)) 729 730 if not os.path.isfile(args.decodecorpus): 731 raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'". 732 format(args.decodecorpus, abs_join(FUZZ_DIR, '..'))) 733 734 return args 735 736 737def gen(args): 738 try: 739 args = gen_parser(args) 740 except Exception as e: 741 print(e) 742 return 1 743 744 seed = create(args.seed) 745 with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict: 746 info = TARGET_INFO[args.TARGET] 747 748 if info.input_type == InputType.DICTIONARY_DATA: 749 number = max(args.number, 1000) 750 else: 751 number = args.number 752 cmd = [ 753 args.decodecorpus, 754 '-n{}'.format(args.number), 755 '-p{}/'.format(compressed), 756 '-o{}'.format(decompressed), 757 ] 758 759 if info.frame_type == FrameType.BLOCK: 760 cmd += [ 761 '--gen-blocks', 762 '--max-block-size-log={}'.format(min(args.max_size_log, 17)) 763 ] 764 else: 765 cmd += ['--max-content-size-log={}'.format(args.max_size_log)] 766 767 print(' '.join(cmd)) 768 subprocess.check_call(cmd) 769 770 if info.input_type == InputType.RAW_DATA: 771 print('using decompressed data in {}'.format(decompressed)) 772 samples = decompressed 773 elif info.input_type == InputType.COMPRESSED_DATA: 774 print('using compressed data in {}'.format(compressed)) 775 samples = compressed 776 else: 777 assert info.input_type == InputType.DICTIONARY_DATA 778 print('making dictionary data from {}'.format(decompressed)) 779 samples = dict 780 min_dict_size_log = 9 781 max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log) 782 for dict_size_log in range(min_dict_size_log, max_dict_size_log): 783 dict_size = 1 << dict_size_log 784 cmd = [ 785 args.zstd, 786 '--train', 787 '-r', decompressed, 788 '--maxdict={}'.format(dict_size), 789 '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size)) 790 ] 791 print(' '.join(cmd)) 792 subprocess.check_call(cmd) 793 794 # Copy the samples over and prepend the RNG seeds 795 for name in os.listdir(samples): 796 samplename = abs_join(samples, name) 797 outname = abs_join(seed, name) 798 with open(samplename, 'rb') as sample: 799 with open(outname, 'wb') as out: 800 CHUNK_SIZE = 131072 801 chunk = sample.read(CHUNK_SIZE) 802 while len(chunk) > 0: 803 out.write(chunk) 804 chunk = sample.read(CHUNK_SIZE) 805 return 0 806 807 808def minimize(args): 809 try: 810 description = """ 811 Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in 812 TARGET_seed_corpus. All extra args are passed to libfuzzer. 813 """ 814 args = targets_parser(args, description) 815 except Exception as e: 816 print(e) 817 return 1 818 819 for target in args.TARGET: 820 # Merge the corpus + anything else into the seed_corpus 821 corpus = abs_join(CORPORA_DIR, target) 822 seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) 823 extra_args = [corpus, "-merge=1"] + args.extra 824 libfuzzer(target, corpora=seed_corpus, extra_args=extra_args) 825 seeds = set(os.listdir(seed_corpus)) 826 # Copy all crashes directly into the seed_corpus if not already present 827 crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target)) 828 for crash in os.listdir(crashes): 829 if crash not in seeds: 830 shutil.copy(abs_join(crashes, crash), seed_corpus) 831 seeds.add(crash) 832 833 834def zip_cmd(args): 835 try: 836 description = """ 837 Zips up the seed corpus. 838 """ 839 args = targets_parser(args, description) 840 except Exception as e: 841 print(e) 842 return 1 843 844 for target in args.TARGET: 845 # Zip the seed_corpus 846 seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) 847 zip_file = "{}.zip".format(seed_corpus) 848 cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."] 849 print(' '.join(cmd)) 850 subprocess.check_call(cmd, cwd=seed_corpus) 851 852 853def list_cmd(args): 854 print("\n".join(TARGETS)) 855 856 857def short_help(args): 858 name = args[0] 859 print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name)) 860 861 862def help(args): 863 short_help(args) 864 print("\tfuzzing helpers (select a command and pass -h for help)\n") 865 print("Options:") 866 print("\t-h, --help\tPrint this message") 867 print("") 868 print("Commands:") 869 print("\tbuild\t\tBuild a fuzzer") 870 print("\tlibfuzzer\tRun a libFuzzer fuzzer") 871 print("\tafl\t\tRun an AFL fuzzer") 872 print("\tregression\tRun a regression test") 873 print("\tgen\t\tGenerate a seed corpus for a fuzzer") 874 print("\tminimize\tMinimize the test corpora") 875 print("\tzip\t\tZip the minimized corpora up") 876 print("\tlist\t\tList the available targets") 877 878 879def main(): 880 args = sys.argv 881 if len(args) < 2: 882 help(args) 883 return 1 884 if args[1] == '-h' or args[1] == '--help' or args[1] == '-H': 885 help(args) 886 return 1 887 command = args.pop(1) 888 args[0] = "{} {}".format(args[0], command) 889 if command == "build": 890 return build(args) 891 if command == "libfuzzer": 892 return libfuzzer_cmd(args) 893 if command == "regression": 894 return regression(args) 895 if command == "afl": 896 return afl(args) 897 if command == "gen": 898 return gen(args) 899 if command == "minimize": 900 return minimize(args) 901 if command == "zip": 902 return zip_cmd(args) 903 if command == "list": 904 return list_cmd(args) 905 short_help(args) 906 print("Error: No such command {} (pass -h for help)".format(command)) 907 return 1 908 909 910if __name__ == "__main__": 911 sys.exit(main()) 912