xref: /aosp_15_r20/external/zstd/tests/fuzz/fuzz.py (revision 01826a4963a0d8a59bc3812d29bdf0fb76416722)
1#!/usr/bin/env python
2
3# ################################################################
4# Copyright (c) Meta Platforms, Inc. and affiliates.
5# All rights reserved.
6#
7# This source code is licensed under both the BSD-style license (found in the
8# LICENSE file in the root directory of this source tree) and the GPLv2 (found
9# in the COPYING file in the root directory of this source tree).
10# You may select, at your option, one of the above-listed licenses.
11# ##########################################################################
12
13import argparse
14import contextlib
15import os
16import re
17import shlex
18import shutil
19import subprocess
20import sys
21import tempfile
22
23
24def abs_join(a, *p):
25    return os.path.abspath(os.path.join(a, *p))
26
27
28class InputType(object):
29    RAW_DATA = 1
30    COMPRESSED_DATA = 2
31    DICTIONARY_DATA = 3
32
33
34class FrameType(object):
35    ZSTD = 1
36    BLOCK = 2
37
38
39class TargetInfo(object):
40    def __init__(self, input_type, frame_type=FrameType.ZSTD):
41        self.input_type = input_type
42        self.frame_type = frame_type
43
44
45# Constants
46FUZZ_DIR = os.path.abspath(os.path.dirname(__file__))
47CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora')
48TARGET_INFO = {
49    'simple_round_trip': TargetInfo(InputType.RAW_DATA),
50    'stream_round_trip': TargetInfo(InputType.RAW_DATA),
51    'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK),
52    'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA),
53    'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA),
54    'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK),
55    'dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
56    'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA),
57    'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA),
58    'simple_compress': TargetInfo(InputType.RAW_DATA),
59    'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA),
60    'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
61    'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA),
62    'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA),
63    'fse_read_ncount': TargetInfo(InputType.RAW_DATA),
64    'sequence_compression_api': TargetInfo(InputType.RAW_DATA),
65    'seekable_roundtrip': TargetInfo(InputType.RAW_DATA),
66    'huf_round_trip': TargetInfo(InputType.RAW_DATA),
67    'huf_decompress': TargetInfo(InputType.RAW_DATA),
68    'decompress_cross_format': TargetInfo(InputType.RAW_DATA),
69    'generate_sequences': TargetInfo(InputType.RAW_DATA),
70}
71TARGETS = list(TARGET_INFO.keys())
72ALL_TARGETS = TARGETS + ['all']
73FUZZ_RNG_SEED_SIZE = 4
74
75# Standard environment variables
76CC = os.environ.get('CC', 'cc')
77CXX = os.environ.get('CXX', 'c++')
78CPPFLAGS = os.environ.get('CPPFLAGS', '')
79CFLAGS = os.environ.get('CFLAGS', '-O3')
80CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS)
81LDFLAGS = os.environ.get('LDFLAGS', '')
82MFLAGS = os.environ.get('MFLAGS', '-j')
83THIRD_PARTY_SEQ_PROD_OBJ = os.environ.get('THIRD_PARTY_SEQ_PROD_OBJ', '')
84
85# Fuzzing environment variables
86LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a')
87AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz')
88DECODECORPUS = os.environ.get('DECODECORPUS',
89                              abs_join(FUZZ_DIR, '..', 'decodecorpus'))
90ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd'))
91
92# Sanitizer environment variables
93MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '')
94MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '')
95MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '')
96MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '')
97
98
99def create(r):
100    d = os.path.abspath(r)
101    if not os.path.isdir(d):
102        os.makedirs(d)
103    return d
104
105
106def check(r):
107    d = os.path.abspath(r)
108    if not os.path.isdir(d):
109        return None
110    return d
111
112
113@contextlib.contextmanager
114def tmpdir():
115    dirpath = tempfile.mkdtemp()
116    try:
117        yield dirpath
118    finally:
119        shutil.rmtree(dirpath, ignore_errors=True)
120
121
122def parse_targets(in_targets):
123    targets = set()
124    for target in in_targets:
125        if not target:
126            continue
127        if target == 'all':
128            targets = targets.union(TARGETS)
129        elif target in TARGETS:
130            targets.add(target)
131        else:
132            raise RuntimeError('{} is not a valid target'.format(target))
133    return list(targets)
134
135
136def targets_parser(args, description):
137    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
138    parser.add_argument(
139        'TARGET',
140        nargs='*',
141        type=str,
142        help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
143    args, extra = parser.parse_known_args(args)
144    args.extra = extra
145
146    args.TARGET = parse_targets(args.TARGET)
147
148    return args
149
150
151def parse_env_flags(args, flags):
152    """
153    Look for flags set by environment variables.
154    """
155    san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags))
156    nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags))
157
158    def set_sanitizer(sanitizer, default, san, nosan):
159        if sanitizer in san and sanitizer in nosan:
160            raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'.
161                               format(s=sanitizer))
162        if sanitizer in san:
163            return True
164        if sanitizer in nosan:
165            return False
166        return default
167
168    san = set(san_flags.split(','))
169    nosan = set(nosan_flags.split(','))
170
171    args.asan = set_sanitizer('address', args.asan, san, nosan)
172    args.msan = set_sanitizer('memory', args.msan, san, nosan)
173    args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan)
174
175    args.sanitize = args.asan or args.msan or args.ubsan
176
177    return args
178
179
180def compiler_version(cc, cxx):
181    """
182    Determines the compiler and version.
183    Only works for clang and gcc.
184    """
185    cc_version_bytes = subprocess.check_output([cc, "--version"])
186    cxx_version_bytes = subprocess.check_output([cxx, "--version"])
187    compiler = None
188    version = None
189    print("{} --version:\n{}".format(cc, cc_version_bytes.decode('ascii')))
190    if b'clang' in cc_version_bytes:
191        assert(b'clang' in cxx_version_bytes)
192        compiler = 'clang'
193    elif b'gcc' in cc_version_bytes or b'GCC' in cc_version_bytes:
194        assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes)
195        compiler = 'gcc'
196    if compiler is not None:
197        version_regex = b'([0-9]+)\.([0-9]+)\.([0-9]+)'
198        version_match = re.search(version_regex, cc_version_bytes)
199        version = tuple(int(version_match.group(i)) for i in range(1, 4))
200    return compiler, version
201
202
203def overflow_ubsan_flags(cc, cxx):
204    compiler, version = compiler_version(cc, cxx)
205    if compiler == 'gcc' and version < (8, 0, 0):
206        return ['-fno-sanitize=signed-integer-overflow']
207    if compiler == 'gcc' or (compiler == 'clang' and version >= (5, 0, 0)):
208        return ['-fno-sanitize=pointer-overflow']
209    return []
210
211
212def build_parser(args):
213    description = """
214    Cleans the repository and builds a fuzz target (or all).
215    Many flags default to environment variables (default says $X='y').
216    Options that aren't enabling features default to the correct values for
217    zstd.
218    Enable sanitizers with --enable-*san.
219    For regression testing just build.
220    For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage.
221    For AFL set CC and CXX to AFL's compilers and set
222    LIB_FUZZING_ENGINE='libregression.a'.
223    """
224    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
225    parser.add_argument(
226        '--lib-fuzzing-engine',
227        dest='lib_fuzzing_engine',
228        type=str,
229        default=LIB_FUZZING_ENGINE,
230        help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a '
231              "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE)))
232
233    fuzz_group = parser.add_mutually_exclusive_group()
234    fuzz_group.add_argument(
235        '--enable-coverage',
236        dest='coverage',
237        action='store_true',
238        help='Enable coverage instrumentation (-fsanitize-coverage)')
239    fuzz_group.add_argument(
240        '--enable-fuzzer',
241        dest='fuzzer',
242        action='store_true',
243        help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled '
244              'LIB_FUZZING_ENGINE is ignored')
245    )
246
247    parser.add_argument(
248        '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN')
249    parser.add_argument(
250        '--enable-ubsan',
251        dest='ubsan',
252        action='store_true',
253        help='Enable UBSAN')
254    parser.add_argument(
255        '--disable-ubsan-pointer-overflow',
256        dest='ubsan_pointer_overflow',
257        action='store_false',
258        help='Disable UBSAN pointer overflow check (known failure)')
259    parser.add_argument(
260        '--enable-msan', dest='msan', action='store_true', help='Enable MSAN')
261    parser.add_argument(
262        '--enable-msan-track-origins', dest='msan_track_origins',
263        action='store_true', help='Enable MSAN origin tracking')
264    parser.add_argument(
265        '--msan-extra-cppflags',
266        dest='msan_extra_cppflags',
267        type=str,
268        default=MSAN_EXTRA_CPPFLAGS,
269        help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')".
270        format(MSAN_EXTRA_CPPFLAGS))
271    parser.add_argument(
272        '--msan-extra-cflags',
273        dest='msan_extra_cflags',
274        type=str,
275        default=MSAN_EXTRA_CFLAGS,
276        help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format(
277            MSAN_EXTRA_CFLAGS))
278    parser.add_argument(
279        '--msan-extra-cxxflags',
280        dest='msan_extra_cxxflags',
281        type=str,
282        default=MSAN_EXTRA_CXXFLAGS,
283        help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')".
284        format(MSAN_EXTRA_CXXFLAGS))
285    parser.add_argument(
286        '--msan-extra-ldflags',
287        dest='msan_extra_ldflags',
288        type=str,
289        default=MSAN_EXTRA_LDFLAGS,
290        help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')".
291        format(MSAN_EXTRA_LDFLAGS))
292    parser.add_argument(
293        '--enable-sanitize-recover',
294        dest='sanitize_recover',
295        action='store_true',
296        help='Non-fatal sanitizer errors where possible')
297    parser.add_argument(
298        '--debug',
299        dest='debug',
300        type=int,
301        default=1,
302        help='Set DEBUGLEVEL (default: 1)')
303    parser.add_argument(
304        '--force-memory-access',
305        dest='memory_access',
306        type=int,
307        default=0,
308        help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)')
309    parser.add_argument(
310        '--fuzz-rng-seed-size',
311        dest='fuzz_rng_seed_size',
312        type=int,
313        default=4,
314        help='Set FUZZ_RNG_SEED_SIZE (default: 4)')
315    parser.add_argument(
316        '--disable-fuzzing-mode',
317        dest='fuzzing_mode',
318        action='store_false',
319        help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION')
320    parser.add_argument(
321        '--enable-stateful-fuzzing',
322        dest='stateful_fuzzing',
323        action='store_true',
324        help='Reuse contexts between runs (makes reproduction impossible)')
325    parser.add_argument(
326        '--custom-seq-prod',
327        dest='third_party_seq_prod_obj',
328        type=str,
329        default=THIRD_PARTY_SEQ_PROD_OBJ,
330        help='Path to an object file with symbols for fuzzing your sequence producer plugin.')
331    parser.add_argument(
332        '--cc',
333        dest='cc',
334        type=str,
335        default=CC,
336        help="CC (default: $CC='{}')".format(CC))
337    parser.add_argument(
338        '--cxx',
339        dest='cxx',
340        type=str,
341        default=CXX,
342        help="CXX (default: $CXX='{}')".format(CXX))
343    parser.add_argument(
344        '--cppflags',
345        dest='cppflags',
346        type=str,
347        default=CPPFLAGS,
348        help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS))
349    parser.add_argument(
350        '--cflags',
351        dest='cflags',
352        type=str,
353        default=CFLAGS,
354        help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS))
355    parser.add_argument(
356        '--cxxflags',
357        dest='cxxflags',
358        type=str,
359        default=CXXFLAGS,
360        help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS))
361    parser.add_argument(
362        '--ldflags',
363        dest='ldflags',
364        type=str,
365        default=LDFLAGS,
366        help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS))
367    parser.add_argument(
368        '--mflags',
369        dest='mflags',
370        type=str,
371        default=MFLAGS,
372        help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS))
373    parser.add_argument(
374        'TARGET',
375        nargs='*',
376        type=str,
377        help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))
378    )
379    args = parser.parse_args(args)
380    args = parse_env_flags(args, ' '.join(
381        [args.cppflags, args.cflags, args.cxxflags, args.ldflags]))
382
383    # Check option sanity
384    if args.msan and (args.asan or args.ubsan):
385        raise RuntimeError('MSAN may not be used with any other sanitizers')
386    if args.msan_track_origins and not args.msan:
387        raise RuntimeError('--enable-msan-track-origins requires MSAN')
388    if args.sanitize_recover and not args.sanitize:
389        raise RuntimeError('--enable-sanitize-recover but no sanitizers used')
390
391    return args
392
393
394def build(args):
395    try:
396        args = build_parser(args)
397    except Exception as e:
398        print(e)
399        return 1
400    # The compilation flags we are setting
401    targets = args.TARGET
402    cc = args.cc
403    cxx = args.cxx
404    cppflags = shlex.split(args.cppflags)
405    cflags = shlex.split(args.cflags)
406    ldflags = shlex.split(args.ldflags)
407    cxxflags = shlex.split(args.cxxflags)
408    mflags = shlex.split(args.mflags)
409    # Flags to be added to both cflags and cxxflags
410    common_flags = [
411        '-Werror',
412        '-Wno-error=declaration-after-statement',
413        '-Wno-error=c++-compat',
414        '-Wno-error=deprecated' # C files are sometimes compiled with CXX
415    ]
416
417    cppflags += [
418        '-DDEBUGLEVEL={}'.format(args.debug),
419        '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access),
420        '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size),
421    ]
422
423    # Set flags for options
424    assert not (args.fuzzer and args.coverage)
425    if args.coverage:
426        common_flags += [
427            '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp'
428        ]
429    if args.fuzzer:
430        common_flags += ['-fsanitize=fuzzer']
431        args.lib_fuzzing_engine = ''
432
433    mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)]
434
435    if args.sanitize_recover:
436        recover_flags = ['-fsanitize-recover=all']
437    else:
438        recover_flags = ['-fno-sanitize-recover=all']
439    if args.sanitize:
440        common_flags += recover_flags
441
442    if args.msan:
443        msan_flags = ['-fsanitize=memory']
444        if args.msan_track_origins:
445            msan_flags += ['-fsanitize-memory-track-origins']
446        common_flags += msan_flags
447        # Append extra MSAN flags (it might require special setup)
448        cppflags += [args.msan_extra_cppflags]
449        cflags += [args.msan_extra_cflags]
450        cxxflags += [args.msan_extra_cxxflags]
451        ldflags += [args.msan_extra_ldflags]
452
453    if args.asan:
454        common_flags += ['-fsanitize=address']
455
456    if args.ubsan:
457        ubsan_flags = ['-fsanitize=undefined']
458        if not args.ubsan_pointer_overflow:
459            ubsan_flags += overflow_ubsan_flags(cc, cxx)
460        common_flags += ubsan_flags
461
462    if args.stateful_fuzzing:
463        cppflags += ['-DSTATEFUL_FUZZING']
464
465    if args.third_party_seq_prod_obj:
466        cppflags += ['-DFUZZ_THIRD_PARTY_SEQ_PROD']
467        mflags += ['THIRD_PARTY_SEQ_PROD_OBJ={}'.format(args.third_party_seq_prod_obj)]
468
469    if args.fuzzing_mode:
470        cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION']
471
472    if args.lib_fuzzing_engine == 'libregression.a':
473        targets = ['libregression.a'] + targets
474
475    # Append the common flags
476    cflags += common_flags
477    cxxflags += common_flags
478
479    # Prepare the flags for Make
480    cc_str = "CC={}".format(cc)
481    cxx_str = "CXX={}".format(cxx)
482    cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags))
483    cflags_str = "CFLAGS={}".format(' '.join(cflags))
484    cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags))
485    ldflags_str = "LDFLAGS={}".format(' '.join(ldflags))
486
487    # Print the flags
488    print('MFLAGS={}'.format(' '.join(mflags)))
489    print(cc_str)
490    print(cxx_str)
491    print(cppflags_str)
492    print(cflags_str)
493    print(cxxflags_str)
494    print(ldflags_str)
495
496    # Clean and build
497    clean_cmd = ['make', 'clean'] + mflags
498    print(' '.join(clean_cmd))
499    subprocess.check_call(clean_cmd)
500    build_cmd = [
501        'make',
502        '-j',
503        cc_str,
504        cxx_str,
505        cppflags_str,
506        cflags_str,
507        cxxflags_str,
508        ldflags_str,
509    ] + mflags + targets
510    print(' '.join(build_cmd))
511    subprocess.check_call(build_cmd)
512    return 0
513
514
515def libfuzzer_parser(args):
516    description = """
517    Runs a libfuzzer binary.
518    Passes all extra arguments to libfuzzer.
519    The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to
520    libFuzzer.a.
521    Generates output in the CORPORA directory, puts crashes in the ARTIFACT
522    directory, and takes extra input from the SEED directory.
523    To merge AFL's output pass the SEED as AFL's output directory and pass
524    '-merge=1'.
525    """
526    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
527    parser.add_argument(
528        '--corpora',
529        type=str,
530        help='Override the default corpora dir (default: {})'.format(
531            abs_join(CORPORA_DIR, 'TARGET')))
532    parser.add_argument(
533        '--artifact',
534        type=str,
535        help='Override the default artifact dir (default: {})'.format(
536            abs_join(CORPORA_DIR, 'TARGET-crash')))
537    parser.add_argument(
538        '--seed',
539        type=str,
540        help='Override the default seed dir (default: {})'.format(
541            abs_join(CORPORA_DIR, 'TARGET-seed')))
542    parser.add_argument(
543        'TARGET',
544        type=str,
545        help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
546    args, extra = parser.parse_known_args(args)
547    args.extra = extra
548
549    if args.TARGET and args.TARGET not in TARGETS:
550        raise RuntimeError('{} is not a valid target'.format(args.TARGET))
551
552    return args
553
554
555def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None):
556    if corpora is None:
557        corpora = abs_join(CORPORA_DIR, target)
558    if artifact is None:
559        artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target))
560    if seed is None:
561        seed = abs_join(CORPORA_DIR, '{}-seed'.format(target))
562    if extra_args is None:
563        extra_args = []
564
565    target = abs_join(FUZZ_DIR, target)
566
567    corpora = [create(corpora)]
568    artifact = create(artifact)
569    seed = check(seed)
570
571    corpora += [artifact]
572    if seed is not None:
573        corpora += [seed]
574
575    cmd = [target, '-artifact_prefix={}/'.format(artifact)]
576    cmd += corpora + extra_args
577    print(' '.join(cmd))
578    subprocess.check_call(cmd)
579
580
581def libfuzzer_cmd(args):
582    try:
583        args = libfuzzer_parser(args)
584    except Exception as e:
585        print(e)
586        return 1
587    libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra)
588    return 0
589
590
591def afl_parser(args):
592    description = """
593    Runs an afl-fuzz job.
594    Passes all extra arguments to afl-fuzz.
595    The fuzzer should have been built with CC/CXX set to the AFL compilers,
596    and with LIB_FUZZING_ENGINE='libregression.a'.
597    Takes input from CORPORA and writes output to OUTPUT.
598    Uses AFL_FUZZ as the binary (set from flag or environment variable).
599    """
600    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
601    parser.add_argument(
602        '--corpora',
603        type=str,
604        help='Override the default corpora dir (default: {})'.format(
605            abs_join(CORPORA_DIR, 'TARGET')))
606    parser.add_argument(
607        '--output',
608        type=str,
609        help='Override the default AFL output dir (default: {})'.format(
610            abs_join(CORPORA_DIR, 'TARGET-afl')))
611    parser.add_argument(
612        '--afl-fuzz',
613        type=str,
614        default=AFL_FUZZ,
615        help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ))
616    parser.add_argument(
617        'TARGET',
618        type=str,
619        help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
620    args, extra = parser.parse_known_args(args)
621    args.extra = extra
622
623    if args.TARGET and args.TARGET not in TARGETS:
624        raise RuntimeError('{} is not a valid target'.format(args.TARGET))
625
626    if not args.corpora:
627        args.corpora = abs_join(CORPORA_DIR, args.TARGET)
628    if not args.output:
629        args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET))
630
631    return args
632
633
634def afl(args):
635    try:
636        args = afl_parser(args)
637    except Exception as e:
638        print(e)
639        return 1
640    target = abs_join(FUZZ_DIR, args.TARGET)
641
642    corpora = create(args.corpora)
643    output = create(args.output)
644
645    cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra
646    cmd += [target, '@@']
647    print(' '.join(cmd))
648    subprocess.call(cmd)
649    return 0
650
651
652def regression(args):
653    try:
654        description = """
655        Runs one or more regression tests.
656        The fuzzer should have been built with
657        LIB_FUZZING_ENGINE='libregression.a'.
658        Takes input from CORPORA.
659        """
660        args = targets_parser(args, description)
661    except Exception as e:
662        print(e)
663        return 1
664    for target in args.TARGET:
665        corpora = create(abs_join(CORPORA_DIR, target))
666        target = abs_join(FUZZ_DIR, target)
667        cmd = [target, corpora]
668        print(' '.join(cmd))
669        subprocess.check_call(cmd)
670    return 0
671
672
673def gen_parser(args):
674    description = """
675    Generate a seed corpus appropriate for TARGET with data generated with
676    decodecorpus.
677    The fuzz inputs are prepended with a seed before the zstd data, so the
678    output of decodecorpus shouldn't be used directly.
679    Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and
680    puts the output in SEED.
681    DECODECORPUS is the decodecorpus binary, and must already be built.
682    """
683    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
684    parser.add_argument(
685        '--number',
686        '-n',
687        type=int,
688        default=100,
689        help='Number of samples to generate')
690    parser.add_argument(
691        '--max-size-log',
692        type=int,
693        default=18,
694        help='Maximum sample size to generate')
695    parser.add_argument(
696        '--seed',
697        type=str,
698        help='Override the default seed dir (default: {})'.format(
699            abs_join(CORPORA_DIR, 'TARGET-seed')))
700    parser.add_argument(
701        '--decodecorpus',
702        type=str,
703        default=DECODECORPUS,
704        help="decodecorpus binary (default: $DECODECORPUS='{}')".format(
705            DECODECORPUS))
706    parser.add_argument(
707        '--zstd',
708        type=str,
709        default=ZSTD,
710        help="zstd binary (default: $ZSTD='{}')".format(ZSTD))
711    parser.add_argument(
712        '--fuzz-rng-seed-size',
713        type=int,
714        default=4,
715        help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)"
716    )
717    parser.add_argument(
718        'TARGET',
719        type=str,
720        help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
721    args, extra = parser.parse_known_args(args)
722    args.extra = extra
723
724    if args.TARGET and args.TARGET not in TARGETS:
725        raise RuntimeError('{} is not a valid target'.format(args.TARGET))
726
727    if not args.seed:
728        args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET))
729
730    if not os.path.isfile(args.decodecorpus):
731        raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'".
732                           format(args.decodecorpus, abs_join(FUZZ_DIR, '..')))
733
734    return args
735
736
737def gen(args):
738    try:
739        args = gen_parser(args)
740    except Exception as e:
741        print(e)
742        return 1
743
744    seed = create(args.seed)
745    with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict:
746        info = TARGET_INFO[args.TARGET]
747
748        if info.input_type == InputType.DICTIONARY_DATA:
749            number = max(args.number, 1000)
750        else:
751            number = args.number
752        cmd = [
753            args.decodecorpus,
754            '-n{}'.format(args.number),
755            '-p{}/'.format(compressed),
756            '-o{}'.format(decompressed),
757        ]
758
759        if info.frame_type == FrameType.BLOCK:
760            cmd += [
761                '--gen-blocks',
762                '--max-block-size-log={}'.format(min(args.max_size_log, 17))
763            ]
764        else:
765            cmd += ['--max-content-size-log={}'.format(args.max_size_log)]
766
767        print(' '.join(cmd))
768        subprocess.check_call(cmd)
769
770        if info.input_type == InputType.RAW_DATA:
771            print('using decompressed data in {}'.format(decompressed))
772            samples = decompressed
773        elif info.input_type == InputType.COMPRESSED_DATA:
774            print('using compressed data in {}'.format(compressed))
775            samples = compressed
776        else:
777            assert info.input_type == InputType.DICTIONARY_DATA
778            print('making dictionary data from {}'.format(decompressed))
779            samples = dict
780            min_dict_size_log = 9
781            max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log)
782            for dict_size_log in range(min_dict_size_log, max_dict_size_log):
783                dict_size = 1 << dict_size_log
784                cmd = [
785                    args.zstd,
786                    '--train',
787                    '-r', decompressed,
788                    '--maxdict={}'.format(dict_size),
789                    '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size))
790                ]
791                print(' '.join(cmd))
792                subprocess.check_call(cmd)
793
794        # Copy the samples over and prepend the RNG seeds
795        for name in os.listdir(samples):
796            samplename = abs_join(samples, name)
797            outname = abs_join(seed, name)
798            with open(samplename, 'rb') as sample:
799                with open(outname, 'wb') as out:
800                    CHUNK_SIZE = 131072
801                    chunk = sample.read(CHUNK_SIZE)
802                    while len(chunk) > 0:
803                        out.write(chunk)
804                        chunk = sample.read(CHUNK_SIZE)
805    return 0
806
807
808def minimize(args):
809    try:
810        description = """
811        Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in
812        TARGET_seed_corpus. All extra args are passed to libfuzzer.
813        """
814        args = targets_parser(args, description)
815    except Exception as e:
816        print(e)
817        return 1
818
819    for target in args.TARGET:
820        # Merge the corpus + anything else into the seed_corpus
821        corpus = abs_join(CORPORA_DIR, target)
822        seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
823        extra_args = [corpus, "-merge=1"] + args.extra
824        libfuzzer(target, corpora=seed_corpus, extra_args=extra_args)
825        seeds = set(os.listdir(seed_corpus))
826        # Copy all crashes directly into the seed_corpus if not already present
827        crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target))
828        for crash in os.listdir(crashes):
829            if crash not in seeds:
830                shutil.copy(abs_join(crashes, crash), seed_corpus)
831                seeds.add(crash)
832
833
834def zip_cmd(args):
835    try:
836        description = """
837        Zips up the seed corpus.
838        """
839        args = targets_parser(args, description)
840    except Exception as e:
841        print(e)
842        return 1
843
844    for target in args.TARGET:
845        # Zip the seed_corpus
846        seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
847        zip_file = "{}.zip".format(seed_corpus)
848        cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."]
849        print(' '.join(cmd))
850        subprocess.check_call(cmd, cwd=seed_corpus)
851
852
853def list_cmd(args):
854    print("\n".join(TARGETS))
855
856
857def short_help(args):
858    name = args[0]
859    print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name))
860
861
862def help(args):
863    short_help(args)
864    print("\tfuzzing helpers (select a command and pass -h for help)\n")
865    print("Options:")
866    print("\t-h, --help\tPrint this message")
867    print("")
868    print("Commands:")
869    print("\tbuild\t\tBuild a fuzzer")
870    print("\tlibfuzzer\tRun a libFuzzer fuzzer")
871    print("\tafl\t\tRun an AFL fuzzer")
872    print("\tregression\tRun a regression test")
873    print("\tgen\t\tGenerate a seed corpus for a fuzzer")
874    print("\tminimize\tMinimize the test corpora")
875    print("\tzip\t\tZip the minimized corpora up")
876    print("\tlist\t\tList the available targets")
877
878
879def main():
880    args = sys.argv
881    if len(args) < 2:
882        help(args)
883        return 1
884    if args[1] == '-h' or args[1] == '--help' or args[1] == '-H':
885        help(args)
886        return 1
887    command = args.pop(1)
888    args[0] = "{} {}".format(args[0], command)
889    if command == "build":
890        return build(args)
891    if command == "libfuzzer":
892        return libfuzzer_cmd(args)
893    if command == "regression":
894        return regression(args)
895    if command == "afl":
896        return afl(args)
897    if command == "gen":
898        return gen(args)
899    if command == "minimize":
900        return minimize(args)
901    if command == "zip":
902        return zip_cmd(args)
903    if command == "list":
904        return list_cmd(args)
905    short_help(args)
906    print("Error: No such command {} (pass -h for help)".format(command))
907    return 1
908
909
910if __name__ == "__main__":
911    sys.exit(main())
912