1 /*
2 american fuzzy lop++ - wrapper for GNU as
3 -----------------------------------------
4
5 Originally written by Michal Zalewski
6
7 Now maintained by Marc Heuse <[email protected]>,
8 Heiko Eißfeldt <[email protected]> and
9 Andrea Fioraldi <[email protected]>
10
11 Copyright 2016, 2017 Google Inc. All rights reserved.
12 Copyright 2019-2024 AFLplusplus Project. All rights reserved.
13
14 Licensed under the Apache License, Version 2.0 (the "License");
15 you may not use this file except in compliance with the License.
16 You may obtain a copy of the License at:
17
18 https://www.apache.org/licenses/LICENSE-2.0
19
20 The sole purpose of this wrapper is to preprocess assembly files generated
21 by GCC / clang and inject the instrumentation bits included from afl-as.h. It
22 is automatically invoked by the toolchain when compiling programs using
23 afl-gcc / afl-clang.
24
25 Note that it's an explicit non-goal to instrument hand-written assembly,
26 be it in separate .s files or in __asm__ blocks. The only aspiration this
27 utility has right now is to be able to skip them gracefully and allow the
28 compilation process to continue.
29
30 That said, see utils/clang_asm_normalize/ for a solution that may
31 allow clang users to make things work even with hand-crafted assembly. Just
32 note that there is no equivalent for GCC.
33
34 */
35
36 #define AFL_MAIN
37
38 #include "config.h"
39 #include "types.h"
40 #include "debug.h"
41 #include "alloc-inl.h"
42
43 #include "afl-as.h"
44
45 #include <stdio.h>
46 #include <unistd.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <time.h>
50 #include <limits.h>
51 #include <ctype.h>
52 #include <fcntl.h>
53
54 #include <sys/wait.h>
55 #include <sys/time.h>
56
57 static u8 **as_params; /* Parameters passed to the real 'as' */
58
59 static u8 *input_file; /* Originally specified input file */
60 static u8 *modified_file; /* Instrumented file for the real 'as' */
61
62 static u8 be_quiet, /* Quiet mode (no stderr output) */
63 clang_mode, /* Running in clang mode? */
64 pass_thru, /* Just pass data through? */
65 just_version, /* Just show version? */
66 sanitizer; /* Using ASAN / MSAN */
67
68 static u32 inst_ratio = 100, /* Instrumentation probability (%) */
69 as_par_cnt = 1; /* Number of params to 'as' */
70
71 /* If we don't find --32 or --64 in the command line, default to
72 instrumentation for whichever mode we were compiled with. This is not
73 perfect, but should do the trick for almost all use cases. */
74
75 #ifdef WORD_SIZE_64
76
77 static u8 use_64bit = 1;
78
79 #else
80
81 static u8 use_64bit = 0;
82
83 #ifdef __APPLE__
84 #error "Sorry, 32-bit Apple platforms are not supported."
85 #endif /* __APPLE__ */
86
87 #endif /* ^WORD_SIZE_64 */
88
89 /* Examine and modify parameters to pass to 'as'. Note that the file name
90 is always the last parameter passed by GCC, so we exploit this property
91 to keep the code simple. */
92
edit_params(int argc,char ** argv)93 static void edit_params(int argc, char **argv) {
94
95 u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS");
96 u32 i, input_index;
97
98 #ifdef __APPLE__
99
100 u8 use_clang_as = 0;
101
102 /* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work
103 with the code generated by newer versions of clang that are hand-built
104 by the user. See the thread here: https://goo.gl/HBWDtn.
105
106 To work around this, when using clang and running without AFL_AS
107 specified, we will actually call 'clang -c' instead of 'as -q' to
108 compile the assembly file.
109
110 The tools aren't cmdline-compatible, but at least for now, we can
111 seemingly get away with this by making only very minor tweaks. Thanks
112 to Nico Weber for the idea. */
113
114 if (clang_mode && !afl_as) {
115
116 use_clang_as = 1;
117
118 afl_as = getenv("AFL_CC");
119 if (!afl_as) afl_as = getenv("AFL_CXX");
120 if (!afl_as) afl_as = "clang";
121
122 }
123
124 #endif /* __APPLE__ */
125
126 /* Although this is not documented, GCC also uses TEMP and TMP when TMPDIR
127 is not set. We need to check these non-standard variables to properly
128 handle the pass_thru logic later on. */
129
130 if (!tmp_dir) { tmp_dir = getenv("TEMP"); }
131 if (!tmp_dir) { tmp_dir = getenv("TMP"); }
132 if (!tmp_dir) { tmp_dir = "/tmp"; }
133
134 as_params = ck_alloc((argc + 32) * sizeof(u8 *));
135 if (unlikely((INT_MAX - 32) < argc || !as_params)) {
136
137 FATAL("Too many parameters passed to as");
138
139 }
140
141 as_params[0] = afl_as ? afl_as : (u8 *)"as";
142
143 as_params[argc] = 0;
144
145 /* Find the input file. It's usually located near the end.
146 Assume there won't be any arguments referring to files after the input
147 file, e.g. as input.s -o output.o */
148 for (input_index = argc - 1; input_index > 0; input_index--) {
149
150 input_file = argv[input_index];
151 /* Clang may add debug arguments after the input file. */
152 if (strncmp(input_file, "-g", 2)) break;
153
154 }
155
156 if (input_index == 0)
157 FATAL("Could not find input file (not called through afl-gcc?)");
158
159 for (i = 1; (s32)i < argc; i++) {
160
161 if (i == input_index) continue;
162
163 if (!strcmp(argv[i], "--64")) {
164
165 use_64bit = 1;
166
167 } else if (!strcmp(argv[i], "--32")) {
168
169 use_64bit = 0;
170
171 }
172
173 #ifdef __APPLE__
174
175 /* The Apple case is a bit different... */
176
177 if (!strcmp(argv[i], "-arch") && i + 1 < (u32)argc) {
178
179 if (!strcmp(argv[i + 1], "x86_64"))
180 use_64bit = 1;
181 else if (!strcmp(argv[i + 1], "i386"))
182 FATAL("Sorry, 32-bit Apple platforms are not supported.");
183
184 }
185
186 /* Strip options that set the preference for a particular upstream
187 assembler in Xcode. */
188
189 if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q")))
190 continue;
191
192 #endif /* __APPLE__ */
193
194 as_params[as_par_cnt++] = argv[i];
195
196 }
197
198 #ifdef __APPLE__
199
200 /* When calling clang as the upstream assembler, append -c -x assembler
201 and hope for the best. */
202
203 if (use_clang_as) {
204
205 as_params[as_par_cnt++] = "-c";
206 as_params[as_par_cnt++] = "-x";
207 as_params[as_par_cnt++] = "assembler";
208
209 }
210
211 #endif /* __APPLE__ */
212
213 if (input_file[0] == '-') {
214
215 if (!strcmp(input_file + 1, "-version")) {
216
217 just_version = 1;
218 modified_file = input_file;
219 goto wrap_things_up;
220
221 }
222
223 if (input_file[1]) {
224
225 FATAL("Incorrect use (not called through afl-gcc?)");
226
227 } else {
228
229 input_file = NULL;
230
231 }
232
233 } else {
234
235 /* Check if this looks like a standard invocation as a part of an attempt
236 to compile a program, rather than using gcc on an ad-hoc .s file in
237 a format we may not understand. This works around an issue compiling
238 NSS. */
239
240 if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) &&
241 strncmp(input_file, "/var/tmp/", 9) &&
242 strncmp(input_file, "/tmp/", 5) &&
243 getenv("AFL_AS_FORCE_INSTRUMENT") == NULL) {
244
245 pass_thru = 1;
246
247 } else if (getenv("AFL_AS_FORCE_INSTRUMENT")) {
248
249 unsetenv("AFL_AS_FORCE_INSTRUMENT");
250
251 }
252
253 }
254
255 modified_file = alloc_printf("%s/.afl-%u-%u-%u.s", tmp_dir, (u32)getpid(),
256 (u32)time(NULL), (u32)random());
257
258 wrap_things_up:
259
260 as_params[as_par_cnt++] = modified_file;
261 as_params[as_par_cnt] = NULL;
262
263 }
264
265 /* Process input file, generate modified_file. Insert instrumentation in all
266 the appropriate places. */
267
add_instrumentation(void)268 static void add_instrumentation(void) {
269
270 static u8 line[MAX_LINE];
271
272 FILE *inf;
273 FILE *outf;
274 s32 outfd;
275 u32 ins_lines = 0;
276
277 u8 instr_ok = 0, skip_csect = 0, skip_next_label = 0, skip_intel = 0,
278 skip_app = 0, instrument_next = 0;
279
280 #ifdef __APPLE__
281
282 u8 *colon_pos;
283
284 #endif /* __APPLE__ */
285
286 if (input_file) {
287
288 inf = fopen(input_file, "r");
289 if (!inf) { PFATAL("Unable to read '%s'", input_file); }
290
291 } else {
292
293 inf = stdin;
294
295 }
296
297 outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, DEFAULT_PERMISSION);
298
299 if (outfd < 0) { PFATAL("Unable to write to '%s'", modified_file); }
300
301 outf = fdopen(outfd, "w");
302
303 if (!outf) { PFATAL("fdopen() failed"); }
304
305 while (fgets(line, MAX_LINE, inf)) {
306
307 /* In some cases, we want to defer writing the instrumentation trampoline
308 until after all the labels, macros, comments, etc. If we're in this
309 mode, and if the line starts with a tab followed by a character, dump
310 the trampoline now. */
311
312 if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&
313 instrument_next && line[0] == '\t' && isalpha(line[1])) {
314
315 fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
316 R(MAP_SIZE));
317
318 instrument_next = 0;
319 ins_lines++;
320
321 }
322
323 /* Output the actual line, call it a day in pass-thru mode. */
324
325 fputs(line, outf);
326
327 if (pass_thru) { continue; }
328
329 /* All right, this is where the actual fun begins. For one, we only want to
330 instrument the .text section. So, let's keep track of that in processed
331 files - and let's set instr_ok accordingly. */
332
333 if (line[0] == '\t' && line[1] == '.') {
334
335 /* OpenBSD puts jump tables directly inline with the code, which is
336 a bit annoying. They use a specific format of p2align directives
337 around them, so we use that as a signal. */
338
339 if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) &&
340 isdigit(line[10]) && line[11] == '\n') {
341
342 skip_next_label = 1;
343
344 }
345
346 if (!strncmp(line + 2, "text\n", 5) ||
347 !strncmp(line + 2, "section\t.text", 13) ||
348 !strncmp(line + 2, "section\t__TEXT,__text", 21) ||
349 !strncmp(line + 2, "section __TEXT,__text", 21)) {
350
351 instr_ok = 1;
352 continue;
353
354 }
355
356 if (!strncmp(line + 2, "section\t", 8) ||
357 !strncmp(line + 2, "section ", 8) || !strncmp(line + 2, "bss\n", 4) ||
358 !strncmp(line + 2, "data\n", 5)) {
359
360 instr_ok = 0;
361 continue;
362
363 }
364
365 }
366
367 /* Detect off-flavor assembly (rare, happens in gdb). When this is
368 encountered, we set skip_csect until the opposite directive is
369 seen, and we do not instrument. */
370
371 if (strstr(line, ".code")) {
372
373 if (strstr(line, ".code32")) { skip_csect = use_64bit; }
374 if (strstr(line, ".code64")) { skip_csect = !use_64bit; }
375
376 }
377
378 /* Detect syntax changes, as could happen with hand-written assembly.
379 Skip Intel blocks, resume instrumentation when back to AT&T. */
380
381 if (strstr(line, ".intel_syntax")) { skip_intel = 1; }
382 if (strstr(line, ".att_syntax")) { skip_intel = 0; }
383
384 /* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */
385
386 if (line[0] == '#' || line[1] == '#') {
387
388 if (strstr(line, "#APP")) { skip_app = 1; }
389 if (strstr(line, "#NO_APP")) { skip_app = 0; }
390
391 }
392
393 /* If we're in the right mood for instrumenting, check for function
394 names or conditional labels. This is a bit messy, but in essence,
395 we want to catch:
396
397 ^main: - function entry point (always instrumented)
398 ^.L0: - GCC branch label
399 ^.LBB0_0: - clang branch label (but only in clang mode)
400 ^\tjnz foo - conditional branches
401
402 ...but not:
403
404 ^# BB#0: - clang comments
405 ^ # BB#0: - ditto
406 ^.Ltmp0: - clang non-branch labels
407 ^.LC0 - GCC non-branch labels
408 ^.LBB0_0: - ditto (when in GCC mode)
409 ^\tjmp foo - non-conditional jumps
410
411 Additionally, clang and GCC on MacOS X follow a different convention
412 with no leading dots on labels, hence the weird maze of #ifdefs
413 later on.
414
415 */
416
417 if (skip_intel || skip_app || skip_csect || !instr_ok || line[0] == '#' ||
418 line[0] == ' ') {
419
420 continue;
421
422 }
423
424 /* Conditional branch instruction (jnz, etc). We append the instrumentation
425 right after the branch (to instrument the not-taken path) and at the
426 branch destination label (handled later on). */
427
428 if (line[0] == '\t') {
429
430 if (line[1] == 'j' && line[2] != 'm' && R(100) < (long)inst_ratio) {
431
432 fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
433 R(MAP_SIZE));
434
435 ins_lines++;
436
437 }
438
439 continue;
440
441 }
442
443 /* Label of some sort. This may be a branch destination, but we need to
444 read carefully and account for several different formatting
445 conventions. */
446
447 #ifdef __APPLE__
448
449 /* Apple: L<whatever><digit>: */
450
451 if ((colon_pos = strstr(line, ":"))) {
452
453 if (line[0] == 'L' && isdigit(*(colon_pos - 1))) {
454
455 #else
456
457 /* Everybody else: .L<whatever>: */
458
459 if (strstr(line, ":")) {
460
461 if (line[0] == '.') {
462
463 #endif /* __APPLE__ */
464
465 /* .L0: or LBB0_0: style jump destination */
466
467 #ifdef __APPLE__
468
469 /* Apple: L<num> / LBB<num> */
470
471 if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3))) &&
472 R(100) < (long)inst_ratio) {
473
474 #else
475
476 /* Apple: .L<num> / .LBB<num> */
477
478 if ((isdigit(line[2]) ||
479 (clang_mode && !strncmp(line + 1, "LBB", 3))) &&
480 R(100) < (long)inst_ratio) {
481
482 #endif /* __APPLE__ */
483
484 /* An optimization is possible here by adding the code only if the
485 label is mentioned in the code in contexts other than call / jmp.
486 That said, this complicates the code by requiring two-pass
487 processing (messy with stdin), and results in a speed gain
488 typically under 10%, because compilers are generally pretty good
489 about not generating spurious intra-function jumps.
490
491 We use deferred output chiefly to avoid disrupting
492 .Lfunc_begin0-style exception handling calculations (a problem on
493 MacOS X). */
494
495 if (!skip_next_label) {
496
497 instrument_next = 1;
498
499 } else {
500
501 skip_next_label = 0;
502
503 }
504
505 }
506
507 } else {
508
509 /* Function label (always instrumented, deferred mode). */
510
511 instrument_next = 1;
512
513 }
514
515 }
516
517 }
518
519 if (ins_lines) { fputs(use_64bit ? main_payload_64 : main_payload_32, outf); }
520
521 if (input_file) { fclose(inf); }
522 fclose(outf);
523
524 if (!be_quiet) {
525
526 if (!ins_lines) {
527
528 WARNF("No instrumentation targets found%s.",
529 pass_thru ? " (pass-thru mode)" : "");
530
531 } else {
532
533 char modeline[100];
534 snprintf(modeline, sizeof(modeline), "%s%s%s%s%s%s",
535 getenv("AFL_HARDEN") ? "hardened" : "non-hardened",
536 getenv("AFL_USE_ASAN") ? ", ASAN" : "",
537 getenv("AFL_USE_MSAN") ? ", MSAN" : "",
538 getenv("AFL_USE_TSAN") ? ", TSAN" : "",
539 getenv("AFL_USE_UBSAN") ? ", UBSAN" : "",
540 getenv("AFL_USE_LSAN") ? ", LSAN" : "");
541
542 OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).", ins_lines,
543 use_64bit ? "64" : "32", modeline, inst_ratio);
544
545 }
546
547 }
548
549 }
550
551 /* Main entry point */
552
553 int main(int argc, char **argv) {
554
555 s32 pid;
556 u32 rand_seed, i, j;
557 int status;
558 u8 *inst_ratio_str = getenv("AFL_INST_RATIO");
559
560 struct timeval tv;
561 struct timezone tz;
562
563 clang_mode = !!getenv(CLANG_ENV_VAR);
564
565 if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) {
566
567 SAYF(cCYA "afl-as" VERSION cRST " by Michal Zalewski\n");
568
569 } else {
570
571 be_quiet = 1;
572
573 }
574
575 if (argc < 2 || (argc == 2 && strcmp(argv[1], "-h") == 0)) {
576
577 fprintf(
578 stdout,
579 "afl-as" VERSION
580 " by Michal Zalewski\n"
581 "\n%s [-h]\n\n"
582 "This is a helper application for afl-fuzz. It is a wrapper around GNU "
583 "'as',\n"
584 "executed by the toolchain whenever using afl-gcc or afl-clang. You "
585 "probably\n"
586 "don't want to run this program directly.\n\n"
587
588 "Rarely, when dealing with extremely complex projects, it may be "
589 "advisable\n"
590 "to set AFL_INST_RATIO to a value less than 100 in order to reduce "
591 "the\n"
592 "odds of instrumenting every discovered branch.\n\n"
593 "Environment variables used:\n"
594 "AFL_AS: path to assembler to use for instrumented files\n"
595 "AFL_CC: fall back path to assembler\n"
596 "AFL_CXX: fall back path to assembler\n"
597 "TMPDIR: directory to use for temporary files\n"
598 "TEMP: fall back path to directory for temporary files\n"
599 "TMP: fall back path to directory for temporary files\n"
600 "AFL_INST_RATIO: user specified instrumentation ratio\n"
601 "AFL_QUIET: suppress verbose output\n"
602 "AFL_KEEP_ASSEMBLY: leave instrumented assembly files\n"
603 "AFL_AS_FORCE_INSTRUMENT: force instrumentation for asm sources\n"
604 "AFL_HARDEN, AFL_USE_ASAN, AFL_USE_MSAN, AFL_USE_UBSAN, AFL_USE_LSAN:\n"
605 " used in the instrumentation summary message\n",
606 argv[0]);
607
608 exit(1);
609
610 }
611
612 gettimeofday(&tv, &tz);
613
614 rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
615 // in fast systems where pids can repeat in the same seconds we need this
616 for (i = 1; (s32)i < argc; i++)
617 for (j = 0; j < strlen(argv[i]); j++)
618 rand_seed += argv[i][j];
619
620 srandom(rand_seed);
621
622 edit_params(argc, argv);
623
624 if (inst_ratio_str) {
625
626 if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100) {
627
628 FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)");
629
630 }
631
632 }
633
634 if (getenv(AS_LOOP_ENV_VAR)) {
635
636 FATAL("Endless loop when calling 'as' (remove '.' from your PATH)");
637
638 }
639
640 setenv(AS_LOOP_ENV_VAR, "1", 1);
641
642 /* When compiling with ASAN, we don't have a particularly elegant way to skip
643 ASAN-specific branches. But we can probabilistically compensate for
644 that... */
645
646 if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) {
647
648 sanitizer = 1;
649 if (!getenv("AFL_INST_RATIO")) { inst_ratio /= 3; }
650
651 }
652
653 if (!just_version) { add_instrumentation(); }
654
655 if (!(pid = fork())) {
656
657 execvp(as_params[0], (char **)as_params);
658 FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]);
659
660 }
661
662 if (pid < 0) { PFATAL("fork() failed"); }
663
664 if (waitpid(pid, &status, 0) <= 0) { PFATAL("waitpid() failed"); }
665
666 if (!getenv("AFL_KEEP_ASSEMBLY")) { unlink(modified_file); }
667
668 exit(WEXITSTATUS(status));
669
670 }
671
672