xref: /aosp_15_r20/external/AFLplusplus/src/afl-as.c (revision 08b48e0b10e97b33e7b60c5b6e2243bd915777f2)
1 /*
2    american fuzzy lop++ - wrapper for GNU as
3    -----------------------------------------
4 
5    Originally written by Michal Zalewski
6 
7    Now maintained by Marc Heuse <[email protected]>,
8                         Heiko Eißfeldt <[email protected]> and
9                         Andrea Fioraldi <[email protected]>
10 
11    Copyright 2016, 2017 Google Inc. All rights reserved.
12    Copyright 2019-2024 AFLplusplus Project. All rights reserved.
13 
14    Licensed under the Apache License, Version 2.0 (the "License");
15    you may not use this file except in compliance with the License.
16    You may obtain a copy of the License at:
17 
18      https://www.apache.org/licenses/LICENSE-2.0
19 
20    The sole purpose of this wrapper is to preprocess assembly files generated
21    by GCC / clang and inject the instrumentation bits included from afl-as.h. It
22    is automatically invoked by the toolchain when compiling programs using
23    afl-gcc / afl-clang.
24 
25    Note that it's an explicit non-goal to instrument hand-written assembly,
26    be it in separate .s files or in __asm__ blocks. The only aspiration this
27    utility has right now is to be able to skip them gracefully and allow the
28    compilation process to continue.
29 
30    That said, see utils/clang_asm_normalize/ for a solution that may
31    allow clang users to make things work even with hand-crafted assembly. Just
32    note that there is no equivalent for GCC.
33 
34  */
35 
36 #define AFL_MAIN
37 
38 #include "config.h"
39 #include "types.h"
40 #include "debug.h"
41 #include "alloc-inl.h"
42 
43 #include "afl-as.h"
44 
45 #include <stdio.h>
46 #include <unistd.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <time.h>
50 #include <limits.h>
51 #include <ctype.h>
52 #include <fcntl.h>
53 
54 #include <sys/wait.h>
55 #include <sys/time.h>
56 
57 static u8 **as_params;              /* Parameters passed to the real 'as'   */
58 
59 static u8 *input_file;              /* Originally specified input file      */
60 static u8 *modified_file;           /* Instrumented file for the real 'as'  */
61 
62 static u8 be_quiet,                 /* Quiet mode (no stderr output)        */
63     clang_mode,                     /* Running in clang mode?               */
64     pass_thru,                      /* Just pass data through?              */
65     just_version,                   /* Just show version?                   */
66     sanitizer;                      /* Using ASAN / MSAN                    */
67 
68 static u32 inst_ratio = 100,        /* Instrumentation probability (%)      */
69     as_par_cnt = 1;                 /* Number of params to 'as'             */
70 
71 /* If we don't find --32 or --64 in the command line, default to
72    instrumentation for whichever mode we were compiled with. This is not
73    perfect, but should do the trick for almost all use cases. */
74 
75 #ifdef WORD_SIZE_64
76 
77 static u8 use_64bit = 1;
78 
79 #else
80 
81 static u8 use_64bit = 0;
82 
83   #ifdef __APPLE__
84     #error "Sorry, 32-bit Apple platforms are not supported."
85   #endif                                                       /* __APPLE__ */
86 
87 #endif                                                     /* ^WORD_SIZE_64 */
88 
89 /* Examine and modify parameters to pass to 'as'. Note that the file name
90    is always the last parameter passed by GCC, so we exploit this property
91    to keep the code simple. */
92 
edit_params(int argc,char ** argv)93 static void edit_params(int argc, char **argv) {
94 
95   u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS");
96   u32 i, input_index;
97 
98 #ifdef __APPLE__
99 
100   u8 use_clang_as = 0;
101 
102   /* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work
103      with the code generated by newer versions of clang that are hand-built
104      by the user. See the thread here: https://goo.gl/HBWDtn.
105 
106      To work around this, when using clang and running without AFL_AS
107      specified, we will actually call 'clang -c' instead of 'as -q' to
108      compile the assembly file.
109 
110      The tools aren't cmdline-compatible, but at least for now, we can
111      seemingly get away with this by making only very minor tweaks. Thanks
112      to Nico Weber for the idea. */
113 
114   if (clang_mode && !afl_as) {
115 
116     use_clang_as = 1;
117 
118     afl_as = getenv("AFL_CC");
119     if (!afl_as) afl_as = getenv("AFL_CXX");
120     if (!afl_as) afl_as = "clang";
121 
122   }
123 
124 #endif                                                         /* __APPLE__ */
125 
126   /* Although this is not documented, GCC also uses TEMP and TMP when TMPDIR
127      is not set. We need to check these non-standard variables to properly
128      handle the pass_thru logic later on. */
129 
130   if (!tmp_dir) { tmp_dir = getenv("TEMP"); }
131   if (!tmp_dir) { tmp_dir = getenv("TMP"); }
132   if (!tmp_dir) { tmp_dir = "/tmp"; }
133 
134   as_params = ck_alloc((argc + 32) * sizeof(u8 *));
135   if (unlikely((INT_MAX - 32) < argc || !as_params)) {
136 
137     FATAL("Too many parameters passed to as");
138 
139   }
140 
141   as_params[0] = afl_as ? afl_as : (u8 *)"as";
142 
143   as_params[argc] = 0;
144 
145   /* Find the input file.  It's usually located near the end.
146      Assume there won't be any arguments referring to files after the input
147      file, e.g. as input.s -o output.o */
148   for (input_index = argc - 1; input_index > 0; input_index--) {
149 
150     input_file = argv[input_index];
151     /* Clang may add debug arguments after the input file. */
152     if (strncmp(input_file, "-g", 2)) break;
153 
154   }
155 
156   if (input_index == 0)
157     FATAL("Could not find input file (not called through afl-gcc?)");
158 
159   for (i = 1; (s32)i < argc; i++) {
160 
161     if (i == input_index) continue;
162 
163     if (!strcmp(argv[i], "--64")) {
164 
165       use_64bit = 1;
166 
167     } else if (!strcmp(argv[i], "--32")) {
168 
169       use_64bit = 0;
170 
171     }
172 
173 #ifdef __APPLE__
174 
175     /* The Apple case is a bit different... */
176 
177     if (!strcmp(argv[i], "-arch") && i + 1 < (u32)argc) {
178 
179       if (!strcmp(argv[i + 1], "x86_64"))
180         use_64bit = 1;
181       else if (!strcmp(argv[i + 1], "i386"))
182         FATAL("Sorry, 32-bit Apple platforms are not supported.");
183 
184     }
185 
186     /* Strip options that set the preference for a particular upstream
187        assembler in Xcode. */
188 
189     if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q")))
190       continue;
191 
192 #endif                                                         /* __APPLE__ */
193 
194     as_params[as_par_cnt++] = argv[i];
195 
196   }
197 
198 #ifdef __APPLE__
199 
200   /* When calling clang as the upstream assembler, append -c -x assembler
201      and hope for the best. */
202 
203   if (use_clang_as) {
204 
205     as_params[as_par_cnt++] = "-c";
206     as_params[as_par_cnt++] = "-x";
207     as_params[as_par_cnt++] = "assembler";
208 
209   }
210 
211 #endif                                                         /* __APPLE__ */
212 
213   if (input_file[0] == '-') {
214 
215     if (!strcmp(input_file + 1, "-version")) {
216 
217       just_version = 1;
218       modified_file = input_file;
219       goto wrap_things_up;
220 
221     }
222 
223     if (input_file[1]) {
224 
225       FATAL("Incorrect use (not called through afl-gcc?)");
226 
227     } else {
228 
229       input_file = NULL;
230 
231     }
232 
233   } else {
234 
235     /* Check if this looks like a standard invocation as a part of an attempt
236        to compile a program, rather than using gcc on an ad-hoc .s file in
237        a format we may not understand. This works around an issue compiling
238        NSS. */
239 
240     if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) &&
241         strncmp(input_file, "/var/tmp/", 9) &&
242         strncmp(input_file, "/tmp/", 5) &&
243         getenv("AFL_AS_FORCE_INSTRUMENT") == NULL) {
244 
245       pass_thru = 1;
246 
247     } else if (getenv("AFL_AS_FORCE_INSTRUMENT")) {
248 
249       unsetenv("AFL_AS_FORCE_INSTRUMENT");
250 
251     }
252 
253   }
254 
255   modified_file = alloc_printf("%s/.afl-%u-%u-%u.s", tmp_dir, (u32)getpid(),
256                                (u32)time(NULL), (u32)random());
257 
258 wrap_things_up:
259 
260   as_params[as_par_cnt++] = modified_file;
261   as_params[as_par_cnt] = NULL;
262 
263 }
264 
265 /* Process input file, generate modified_file. Insert instrumentation in all
266    the appropriate places. */
267 
add_instrumentation(void)268 static void add_instrumentation(void) {
269 
270   static u8 line[MAX_LINE];
271 
272   FILE *inf;
273   FILE *outf;
274   s32   outfd;
275   u32   ins_lines = 0;
276 
277   u8 instr_ok = 0, skip_csect = 0, skip_next_label = 0, skip_intel = 0,
278      skip_app = 0, instrument_next = 0;
279 
280 #ifdef __APPLE__
281 
282   u8 *colon_pos;
283 
284 #endif                                                         /* __APPLE__ */
285 
286   if (input_file) {
287 
288     inf = fopen(input_file, "r");
289     if (!inf) { PFATAL("Unable to read '%s'", input_file); }
290 
291   } else {
292 
293     inf = stdin;
294 
295   }
296 
297   outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, DEFAULT_PERMISSION);
298 
299   if (outfd < 0) { PFATAL("Unable to write to '%s'", modified_file); }
300 
301   outf = fdopen(outfd, "w");
302 
303   if (!outf) { PFATAL("fdopen() failed"); }
304 
305   while (fgets(line, MAX_LINE, inf)) {
306 
307     /* In some cases, we want to defer writing the instrumentation trampoline
308        until after all the labels, macros, comments, etc. If we're in this
309        mode, and if the line starts with a tab followed by a character, dump
310        the trampoline now. */
311 
312     if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&
313         instrument_next && line[0] == '\t' && isalpha(line[1])) {
314 
315       fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
316               R(MAP_SIZE));
317 
318       instrument_next = 0;
319       ins_lines++;
320 
321     }
322 
323     /* Output the actual line, call it a day in pass-thru mode. */
324 
325     fputs(line, outf);
326 
327     if (pass_thru) { continue; }
328 
329     /* All right, this is where the actual fun begins. For one, we only want to
330        instrument the .text section. So, let's keep track of that in processed
331        files - and let's set instr_ok accordingly. */
332 
333     if (line[0] == '\t' && line[1] == '.') {
334 
335       /* OpenBSD puts jump tables directly inline with the code, which is
336          a bit annoying. They use a specific format of p2align directives
337          around them, so we use that as a signal. */
338 
339       if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) &&
340           isdigit(line[10]) && line[11] == '\n') {
341 
342         skip_next_label = 1;
343 
344       }
345 
346       if (!strncmp(line + 2, "text\n", 5) ||
347           !strncmp(line + 2, "section\t.text", 13) ||
348           !strncmp(line + 2, "section\t__TEXT,__text", 21) ||
349           !strncmp(line + 2, "section __TEXT,__text", 21)) {
350 
351         instr_ok = 1;
352         continue;
353 
354       }
355 
356       if (!strncmp(line + 2, "section\t", 8) ||
357           !strncmp(line + 2, "section ", 8) || !strncmp(line + 2, "bss\n", 4) ||
358           !strncmp(line + 2, "data\n", 5)) {
359 
360         instr_ok = 0;
361         continue;
362 
363       }
364 
365     }
366 
367     /* Detect off-flavor assembly (rare, happens in gdb). When this is
368        encountered, we set skip_csect until the opposite directive is
369        seen, and we do not instrument. */
370 
371     if (strstr(line, ".code")) {
372 
373       if (strstr(line, ".code32")) { skip_csect = use_64bit; }
374       if (strstr(line, ".code64")) { skip_csect = !use_64bit; }
375 
376     }
377 
378     /* Detect syntax changes, as could happen with hand-written assembly.
379        Skip Intel blocks, resume instrumentation when back to AT&T. */
380 
381     if (strstr(line, ".intel_syntax")) { skip_intel = 1; }
382     if (strstr(line, ".att_syntax")) { skip_intel = 0; }
383 
384     /* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */
385 
386     if (line[0] == '#' || line[1] == '#') {
387 
388       if (strstr(line, "#APP")) { skip_app = 1; }
389       if (strstr(line, "#NO_APP")) { skip_app = 0; }
390 
391     }
392 
393     /* If we're in the right mood for instrumenting, check for function
394        names or conditional labels. This is a bit messy, but in essence,
395        we want to catch:
396 
397          ^main:      - function entry point (always instrumented)
398          ^.L0:       - GCC branch label
399          ^.LBB0_0:   - clang branch label (but only in clang mode)
400          ^\tjnz foo  - conditional branches
401 
402        ...but not:
403 
404          ^# BB#0:    - clang comments
405          ^ # BB#0:   - ditto
406          ^.Ltmp0:    - clang non-branch labels
407          ^.LC0       - GCC non-branch labels
408          ^.LBB0_0:   - ditto (when in GCC mode)
409          ^\tjmp foo  - non-conditional jumps
410 
411        Additionally, clang and GCC on MacOS X follow a different convention
412        with no leading dots on labels, hence the weird maze of #ifdefs
413        later on.
414 
415      */
416 
417     if (skip_intel || skip_app || skip_csect || !instr_ok || line[0] == '#' ||
418         line[0] == ' ') {
419 
420       continue;
421 
422     }
423 
424     /* Conditional branch instruction (jnz, etc). We append the instrumentation
425        right after the branch (to instrument the not-taken path) and at the
426        branch destination label (handled later on). */
427 
428     if (line[0] == '\t') {
429 
430       if (line[1] == 'j' && line[2] != 'm' && R(100) < (long)inst_ratio) {
431 
432         fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
433                 R(MAP_SIZE));
434 
435         ins_lines++;
436 
437       }
438 
439       continue;
440 
441     }
442 
443     /* Label of some sort. This may be a branch destination, but we need to
444        read carefully and account for several different formatting
445        conventions. */
446 
447 #ifdef __APPLE__
448 
449     /* Apple: L<whatever><digit>: */
450 
451     if ((colon_pos = strstr(line, ":"))) {
452 
453       if (line[0] == 'L' && isdigit(*(colon_pos - 1))) {
454 
455 #else
456 
457     /* Everybody else: .L<whatever>: */
458 
459     if (strstr(line, ":")) {
460 
461       if (line[0] == '.') {
462 
463 #endif                                                         /* __APPLE__ */
464 
465         /* .L0: or LBB0_0: style jump destination */
466 
467 #ifdef __APPLE__
468 
469         /* Apple: L<num> / LBB<num> */
470 
471         if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3))) &&
472             R(100) < (long)inst_ratio) {
473 
474 #else
475 
476         /* Apple: .L<num> / .LBB<num> */
477 
478         if ((isdigit(line[2]) ||
479              (clang_mode && !strncmp(line + 1, "LBB", 3))) &&
480             R(100) < (long)inst_ratio) {
481 
482 #endif                                                         /* __APPLE__ */
483 
484           /* An optimization is possible here by adding the code only if the
485              label is mentioned in the code in contexts other than call / jmp.
486              That said, this complicates the code by requiring two-pass
487              processing (messy with stdin), and results in a speed gain
488              typically under 10%, because compilers are generally pretty good
489              about not generating spurious intra-function jumps.
490 
491              We use deferred output chiefly to avoid disrupting
492              .Lfunc_begin0-style exception handling calculations (a problem on
493              MacOS X). */
494 
495           if (!skip_next_label) {
496 
497             instrument_next = 1;
498 
499           } else {
500 
501             skip_next_label = 0;
502 
503           }
504 
505         }
506 
507       } else {
508 
509         /* Function label (always instrumented, deferred mode). */
510 
511         instrument_next = 1;
512 
513       }
514 
515     }
516 
517   }
518 
519   if (ins_lines) { fputs(use_64bit ? main_payload_64 : main_payload_32, outf); }
520 
521   if (input_file) { fclose(inf); }
522   fclose(outf);
523 
524   if (!be_quiet) {
525 
526     if (!ins_lines) {
527 
528       WARNF("No instrumentation targets found%s.",
529             pass_thru ? " (pass-thru mode)" : "");
530 
531     } else {
532 
533       char modeline[100];
534       snprintf(modeline, sizeof(modeline), "%s%s%s%s%s%s",
535                getenv("AFL_HARDEN") ? "hardened" : "non-hardened",
536                getenv("AFL_USE_ASAN") ? ", ASAN" : "",
537                getenv("AFL_USE_MSAN") ? ", MSAN" : "",
538                getenv("AFL_USE_TSAN") ? ", TSAN" : "",
539                getenv("AFL_USE_UBSAN") ? ", UBSAN" : "",
540                getenv("AFL_USE_LSAN") ? ", LSAN" : "");
541 
542       OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).", ins_lines,
543           use_64bit ? "64" : "32", modeline, inst_ratio);
544 
545     }
546 
547   }
548 
549 }
550 
551 /* Main entry point */
552 
553 int main(int argc, char **argv) {
554 
555   s32 pid;
556   u32 rand_seed, i, j;
557   int status;
558   u8 *inst_ratio_str = getenv("AFL_INST_RATIO");
559 
560   struct timeval  tv;
561   struct timezone tz;
562 
563   clang_mode = !!getenv(CLANG_ENV_VAR);
564 
565   if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) {
566 
567     SAYF(cCYA "afl-as" VERSION cRST " by Michal Zalewski\n");
568 
569   } else {
570 
571     be_quiet = 1;
572 
573   }
574 
575   if (argc < 2 || (argc == 2 && strcmp(argv[1], "-h") == 0)) {
576 
577     fprintf(
578         stdout,
579         "afl-as" VERSION
580         " by Michal Zalewski\n"
581         "\n%s [-h]\n\n"
582         "This is a helper application for afl-fuzz. It is a wrapper around GNU "
583         "'as',\n"
584         "executed by the toolchain whenever using afl-gcc or afl-clang. You "
585         "probably\n"
586         "don't want to run this program directly.\n\n"
587 
588         "Rarely, when dealing with extremely complex projects, it may be "
589         "advisable\n"
590         "to set AFL_INST_RATIO to a value less than 100 in order to reduce "
591         "the\n"
592         "odds of instrumenting every discovered branch.\n\n"
593         "Environment variables used:\n"
594         "AFL_AS: path to assembler to use for instrumented files\n"
595         "AFL_CC: fall back path to assembler\n"
596         "AFL_CXX: fall back path to assembler\n"
597         "TMPDIR: directory to use for temporary files\n"
598         "TEMP: fall back path to directory for temporary files\n"
599         "TMP: fall back path to directory for temporary files\n"
600         "AFL_INST_RATIO: user specified instrumentation ratio\n"
601         "AFL_QUIET: suppress verbose output\n"
602         "AFL_KEEP_ASSEMBLY: leave instrumented assembly files\n"
603         "AFL_AS_FORCE_INSTRUMENT: force instrumentation for asm sources\n"
604         "AFL_HARDEN, AFL_USE_ASAN, AFL_USE_MSAN, AFL_USE_UBSAN, AFL_USE_LSAN:\n"
605         "  used in the instrumentation summary message\n",
606         argv[0]);
607 
608     exit(1);
609 
610   }
611 
612   gettimeofday(&tv, &tz);
613 
614   rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
615   // in fast systems where pids can repeat in the same seconds we need this
616   for (i = 1; (s32)i < argc; i++)
617     for (j = 0; j < strlen(argv[i]); j++)
618       rand_seed += argv[i][j];
619 
620   srandom(rand_seed);
621 
622   edit_params(argc, argv);
623 
624   if (inst_ratio_str) {
625 
626     if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100) {
627 
628       FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)");
629 
630     }
631 
632   }
633 
634   if (getenv(AS_LOOP_ENV_VAR)) {
635 
636     FATAL("Endless loop when calling 'as' (remove '.' from your PATH)");
637 
638   }
639 
640   setenv(AS_LOOP_ENV_VAR, "1", 1);
641 
642   /* When compiling with ASAN, we don't have a particularly elegant way to skip
643      ASAN-specific branches. But we can probabilistically compensate for
644      that... */
645 
646   if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) {
647 
648     sanitizer = 1;
649     if (!getenv("AFL_INST_RATIO")) { inst_ratio /= 3; }
650 
651   }
652 
653   if (!just_version) { add_instrumentation(); }
654 
655   if (!(pid = fork())) {
656 
657     execvp(as_params[0], (char **)as_params);
658     FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]);
659 
660   }
661 
662   if (pid < 0) { PFATAL("fork() failed"); }
663 
664   if (waitpid(pid, &status, 0) <= 0) { PFATAL("waitpid() failed"); }
665 
666   if (!getenv("AFL_KEEP_ASSEMBLY")) { unlink(modified_file); }
667 
668   exit(WEXITSTATUS(status));
669 
670 }
671 
672