1 /*
2 american fuzzy lop++ - extras relates routines
3 ----------------------------------------------
4
5 Originally written by Michal Zalewski
6
7 Now maintained by Marc Heuse <[email protected]>,
8 Heiko Eißfeldt <[email protected]> and
9 Andrea Fioraldi <[email protected]>
10
11 Copyright 2016, 2017 Google Inc. All rights reserved.
12 Copyright 2019-2024 AFLplusplus Project. All rights reserved.
13
14 Licensed under the Apache License, Version 2.0 (the "License");
15 you may not use this file except in compliance with the License.
16 You may obtain a copy of the License at:
17
18 https://www.apache.org/licenses/LICENSE-2.0
19
20 This is the real deal: the program takes an instrumented binary and
21 attempts a variety of basic fuzzing tricks, paying close attention to
22 how they affect the execution path.
23
24 */
25
26 #include "afl-fuzz.h"
27
28 /* helper function for auto_extras qsort */
compare_auto_extras_len(const void * ae1,const void * ae2)29 static int compare_auto_extras_len(const void *ae1, const void *ae2) {
30
31 return ((struct auto_extra_data *)ae1)->len -
32 ((struct auto_extra_data *)ae2)->len;
33
34 }
35
36 /* descending order */
37
compare_auto_extras_use_d(const void * ae1,const void * ae2)38 static int compare_auto_extras_use_d(const void *ae1, const void *ae2) {
39
40 return ((struct auto_extra_data *)ae2)->hit_cnt -
41 ((struct auto_extra_data *)ae1)->hit_cnt;
42
43 }
44
45 /* Helper function for load_extras. */
46
compare_extras_len(const void * e1,const void * e2)47 static int compare_extras_len(const void *e1, const void *e2) {
48
49 return ((struct extra_data *)e1)->len - ((struct extra_data *)e2)->len;
50
51 }
52
53 /* Read extras from a file, sort by size. */
54
load_extras_file(afl_state_t * afl,u8 * fname,u32 * min_len,u32 * max_len,u32 dict_level)55 void load_extras_file(afl_state_t *afl, u8 *fname, u32 *min_len, u32 *max_len,
56 u32 dict_level) {
57
58 FILE *f;
59 u8 buf[MAX_LINE];
60 u8 *lptr;
61 u32 cur_line = 0;
62
63 u8 val_bufs[2][STRINGIFY_VAL_SIZE_MAX];
64
65 f = fopen(fname, "r");
66
67 if (!f) { PFATAL("Unable to open '%s'", fname); }
68
69 while ((lptr = fgets(buf, MAX_LINE, f))) {
70
71 u8 *rptr, *wptr;
72 u32 klen = 0;
73
74 ++cur_line;
75
76 /* Trim on left and right. */
77
78 while (isspace(*lptr)) {
79
80 ++lptr;
81
82 }
83
84 rptr = lptr + strlen(lptr) - 1;
85 while (rptr >= lptr && isspace(*rptr)) {
86
87 --rptr;
88
89 }
90
91 ++rptr;
92 *rptr = 0;
93
94 /* Skip empty lines and comments. */
95
96 if (!*lptr || *lptr == '#') { continue; }
97
98 /* All other lines must end with '"', which we can consume. */
99
100 --rptr;
101
102 if (rptr < lptr || *rptr != '"') {
103
104 WARNF("Malformed name=\"value\" pair in line %u.", cur_line);
105 continue;
106
107 }
108
109 *rptr = 0;
110
111 /* Skip alphanumerics and dashes (label). */
112
113 while (isalnum(*lptr) || *lptr == '_') {
114
115 ++lptr;
116
117 }
118
119 /* If @number follows, parse that. */
120
121 if (*lptr == '@') {
122
123 ++lptr;
124 if (atoi(lptr) > (s32)dict_level) { continue; }
125 while (isdigit(*lptr)) {
126
127 ++lptr;
128
129 }
130
131 }
132
133 /* Skip [number] */
134
135 if (*lptr == '[') {
136
137 do {
138
139 ++lptr;
140
141 } while (*lptr >= '0' && *lptr <= '9');
142
143 if (*lptr == ']') { ++lptr; }
144
145 }
146
147 /* Skip whitespace and = signs. */
148
149 while (isspace(*lptr) || *lptr == '=') {
150
151 ++lptr;
152
153 }
154
155 /* Consume opening '"'. */
156
157 if (*lptr != '"') {
158
159 WARNF("Malformed name=\"keyword\" pair in line %u.", cur_line);
160 continue;
161
162 }
163
164 ++lptr;
165
166 if (!*lptr) {
167
168 WARNF("Empty keyword in line %u.", cur_line);
169 continue;
170
171 }
172
173 /* Okay, let's allocate memory and copy data between "...", handling
174 \xNN escaping, \\, and \". */
175
176 afl->extras =
177 afl_realloc((void **)&afl->extras,
178 (afl->extras_cnt + 1) * sizeof(struct extra_data));
179 char *hexdigits = "0123456789abcdef";
180
181 if (unlikely(!afl->extras)) { PFATAL("alloc"); }
182
183 wptr = afl->extras[afl->extras_cnt].data = ck_alloc(rptr - lptr);
184
185 if (!wptr) { PFATAL("no mem for data"); }
186
187 while (*lptr) {
188
189 switch (*lptr) {
190
191 case 1 ... 31:
192 case 128 ... 255:
193 WARNF("Non-printable characters in line %u.", cur_line);
194 ++lptr;
195 continue;
196 break;
197
198 case '\\':
199
200 ++lptr;
201
202 if (*lptr == '\\' || *lptr == '"') {
203
204 *(wptr++) = *(lptr++);
205 klen++;
206 break;
207
208 }
209
210 if (*lptr != 'x' || !isxdigit(lptr[1]) || !isxdigit(lptr[2])) {
211
212 WARNF("Invalid escaping (not \\xNN) in line %u.", cur_line);
213 continue;
214
215 }
216
217 *(wptr++) = ((strchr(hexdigits, tolower(lptr[1])) - hexdigits) << 4) |
218 (strchr(hexdigits, tolower(lptr[2])) - hexdigits);
219
220 lptr += 3;
221 ++klen;
222
223 break;
224
225 default:
226 *(wptr++) = *(lptr++);
227 ++klen;
228
229 }
230
231 }
232
233 afl->extras[afl->extras_cnt].len = klen;
234
235 if (afl->extras[afl->extras_cnt].len > MAX_DICT_FILE) {
236
237 WARNF(
238 "Keyword too big in line %u (%s, limit is %s)", cur_line,
239 stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), klen),
240 stringify_mem_size(val_bufs[1], sizeof(val_bufs[1]), MAX_DICT_FILE));
241 continue;
242
243 }
244
245 if (*min_len > klen) { *min_len = klen; }
246 if (*max_len < klen) { *max_len = klen; }
247
248 ++afl->extras_cnt;
249
250 }
251
252 fclose(f);
253
254 }
255
extras_check_and_sort(afl_state_t * afl,u32 min_len,u32 max_len,u8 * dir)256 static void extras_check_and_sort(afl_state_t *afl, u32 min_len, u32 max_len,
257 u8 *dir) {
258
259 u8 val_bufs[2][STRINGIFY_VAL_SIZE_MAX];
260
261 if (!afl->extras_cnt) {
262
263 WARNF("No usable data in '%s'", dir);
264 return;
265
266 }
267
268 qsort(afl->extras, afl->extras_cnt, sizeof(struct extra_data),
269 compare_extras_len);
270
271 ACTF("Loaded %u extra tokens, size range %s to %s.", afl->extras_cnt,
272 stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), min_len),
273 stringify_mem_size(val_bufs[1], sizeof(val_bufs[1]), max_len));
274
275 if (max_len > 32) {
276
277 WARNF("Some tokens are relatively large (%s) - consider trimming.",
278 stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), max_len));
279
280 }
281
282 if (afl->extras_cnt > afl->max_det_extras) {
283
284 WARNF("More than %u tokens - will use them probabilistically.",
285 afl->max_det_extras);
286
287 }
288
289 }
290
291 /* Read extras from the extras directory and sort them by size. */
292
load_extras(afl_state_t * afl,u8 * dir)293 void load_extras(afl_state_t *afl, u8 *dir) {
294
295 DIR *d;
296 struct dirent *de;
297 u32 min_len = MAX_DICT_FILE, max_len = 0, dict_level = 0;
298 u8 *x;
299
300 u8 val_bufs[2][STRINGIFY_VAL_SIZE_MAX];
301
302 /* If the name ends with @, extract level and continue. */
303
304 if ((x = strchr(dir, '@'))) {
305
306 *x = 0;
307 dict_level = atoi(x + 1);
308
309 }
310
311 ACTF("Loading extra dictionary from '%s' (level %u)...", dir, dict_level);
312
313 d = opendir(dir);
314
315 if (!d) {
316
317 if (errno == ENOTDIR) {
318
319 load_extras_file(afl, dir, &min_len, &max_len, dict_level);
320 extras_check_and_sort(afl, min_len, max_len, dir);
321 return;
322
323 }
324
325 PFATAL("Unable to open '%s'", dir);
326
327 }
328
329 if (x) { FATAL("Dictionary levels not supported for directories."); }
330
331 while ((de = readdir(d))) {
332
333 struct stat st;
334 u8 *fn = alloc_printf("%s/%s", dir, de->d_name);
335 s32 fd;
336
337 if (lstat(fn, &st) || access(fn, R_OK)) {
338
339 PFATAL("Unable to access '%s'", fn);
340
341 }
342
343 /* This also takes care of . and .. */
344 if (!S_ISREG(st.st_mode) || !st.st_size) {
345
346 ck_free(fn);
347 continue;
348
349 }
350
351 if (st.st_size > MAX_DICT_FILE) {
352
353 WARNF(
354 "Extra '%s' is too big (%s, limit is %s)", fn,
355 stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), st.st_size),
356 stringify_mem_size(val_bufs[1], sizeof(val_bufs[1]), MAX_DICT_FILE));
357 continue;
358
359 }
360
361 if (min_len > st.st_size) { min_len = st.st_size; }
362 if (max_len < st.st_size) { max_len = st.st_size; }
363
364 afl->extras =
365 afl_realloc((void **)&afl->extras,
366 (afl->extras_cnt + 1) * sizeof(struct extra_data));
367 if (unlikely(!afl->extras)) { PFATAL("alloc"); }
368
369 afl->extras[afl->extras_cnt].data = ck_alloc(st.st_size);
370 afl->extras[afl->extras_cnt].len = st.st_size;
371
372 fd = open(fn, O_RDONLY);
373
374 if (fd < 0) { PFATAL("Unable to open '%s'", fn); }
375
376 ck_read(fd, afl->extras[afl->extras_cnt].data, st.st_size, fn);
377
378 close(fd);
379 ck_free(fn);
380
381 ++afl->extras_cnt;
382
383 }
384
385 closedir(d);
386
387 extras_check_and_sort(afl, min_len, max_len, dir);
388
389 }
390
391 /* Helper function for maybe_add_auto(afl, ) */
392
memcmp_nocase(u8 * m1,u8 * m2,u32 len)393 static inline u8 memcmp_nocase(u8 *m1, u8 *m2, u32 len) {
394
395 while (len--) {
396
397 if (tolower(*(m1++)) ^ tolower(*(m2++))) { return 1; }
398
399 }
400
401 return 0;
402
403 }
404
405 /* add an extra/dict/token - no checks performed, no sorting */
406
add_extra_nocheck(afl_state_t * afl,u8 * mem,u32 len)407 static void add_extra_nocheck(afl_state_t *afl, u8 *mem, u32 len) {
408
409 afl->extras = afl_realloc((void **)&afl->extras,
410 (afl->extras_cnt + 1) * sizeof(struct extra_data));
411
412 if (unlikely(!afl->extras)) { PFATAL("alloc"); }
413
414 afl->extras[afl->extras_cnt].data = ck_alloc(len);
415 afl->extras[afl->extras_cnt].len = len;
416 memcpy(afl->extras[afl->extras_cnt].data, mem, len);
417 afl->extras_cnt++;
418
419 /* We only want to print this once */
420
421 if (afl->extras_cnt == afl->max_det_extras + 1) {
422
423 WARNF("More than %u tokens - will use them probabilistically.",
424 afl->max_det_extras);
425
426 }
427
428 }
429
430 /* Sometimes strings in input is transformed to unicode internally, so for
431 fuzzing we should attempt to de-unicode if it looks like simple unicode */
432
deunicode_extras(afl_state_t * afl)433 void deunicode_extras(afl_state_t *afl) {
434
435 if (!afl->extras_cnt) return;
436
437 u32 i, j, orig_cnt = afl->extras_cnt;
438 u8 buf[64];
439
440 for (i = 0; i < orig_cnt; ++i) {
441
442 if (afl->extras[i].len < 6 || afl->extras[i].len > 64 ||
443 afl->extras[i].len % 2) {
444
445 continue;
446
447 }
448
449 u32 k = 0, z1 = 0, z2 = 0, z3 = 0, z4 = 0, half = afl->extras[i].len >> 1;
450 u32 quarter = half >> 1;
451
452 for (j = 0; j < afl->extras[i].len; ++j) {
453
454 switch (j % 4) {
455
456 case 2:
457 if (!afl->extras[i].data[j]) { ++z3; }
458 // fall through
459 case 0:
460 if (!afl->extras[i].data[j]) { ++z1; }
461 break;
462 case 3:
463 if (!afl->extras[i].data[j]) { ++z4; }
464 // fall through
465 case 1:
466 if (!afl->extras[i].data[j]) { ++z2; }
467 break;
468
469 }
470
471 }
472
473 if ((z1 < half && z2 < half) || z1 + z2 == afl->extras[i].len) { continue; }
474
475 // also maybe 32 bit unicode?
476 if (afl->extras[i].len % 4 == 0 && afl->extras[i].len >= 12 &&
477 (z3 == quarter || z4 == quarter) && z1 + z2 == quarter * 3) {
478
479 for (j = 0; j < afl->extras[i].len; ++j) {
480
481 if (z4 < quarter) {
482
483 if (j % 4 == 3) { buf[k++] = afl->extras[i].data[j]; }
484
485 } else if (z3 < quarter) {
486
487 if (j % 4 == 2) { buf[k++] = afl->extras[i].data[j]; }
488
489 } else if (z2 < half) {
490
491 if (j % 4 == 1) { buf[k++] = afl->extras[i].data[j]; }
492
493 } else {
494
495 if (j % 4 == 0) { buf[k++] = afl->extras[i].data[j]; }
496
497 }
498
499 }
500
501 add_extra_nocheck(afl, buf, k);
502 k = 0;
503
504 }
505
506 for (j = 0; j < afl->extras[i].len; ++j) {
507
508 if (z1 < half) {
509
510 if (j % 2 == 0) { buf[k++] = afl->extras[i].data[j]; }
511
512 } else {
513
514 if (j % 2 == 1) { buf[k++] = afl->extras[i].data[j]; }
515
516 }
517
518 }
519
520 add_extra_nocheck(afl, buf, k);
521
522 }
523
524 qsort(afl->extras, afl->extras_cnt, sizeof(struct extra_data),
525 compare_extras_len);
526
527 }
528
529 /* Removes duplicates from the loaded extras. This can happen if multiple files
530 are loaded */
531
dedup_extras(afl_state_t * afl)532 void dedup_extras(afl_state_t *afl) {
533
534 if (afl->extras_cnt < 2) return;
535
536 u32 i, j, orig_cnt = afl->extras_cnt;
537
538 for (i = 0; i < afl->extras_cnt - 1; ++i) {
539
540 for (j = i + 1; j < afl->extras_cnt; ++j) {
541
542 restart_dedup:
543
544 // if the goto was used we could be at the end of the list
545 if (j >= afl->extras_cnt || afl->extras[i].len != afl->extras[j].len)
546 break;
547
548 if (memcmp(afl->extras[i].data, afl->extras[j].data,
549 afl->extras[i].len) == 0) {
550
551 ck_free(afl->extras[j].data);
552 if (j + 1 < afl->extras_cnt) // not at the end of the list?
553 memmove((char *)&afl->extras[j], (char *)&afl->extras[j + 1],
554 (afl->extras_cnt - j - 1) * sizeof(struct extra_data));
555 --afl->extras_cnt;
556 goto restart_dedup; // restart if several duplicates are in a row
557
558 }
559
560 }
561
562 }
563
564 if (afl->extras_cnt != orig_cnt)
565 afl->extras = afl_realloc_exact(
566 (void **)&afl->extras, afl->extras_cnt * sizeof(struct extra_data));
567
568 }
569
570 /* Adds a new extra / dict entry. */
add_extra(afl_state_t * afl,u8 * mem,u32 len)571 void add_extra(afl_state_t *afl, u8 *mem, u32 len) {
572
573 u32 i, found = 0;
574
575 for (i = 0; i < afl->extras_cnt; i++) {
576
577 if (afl->extras[i].len == len) {
578
579 if (memcmp(afl->extras[i].data, mem, len) == 0) return;
580 found = 1;
581
582 } else {
583
584 if (found) break;
585
586 }
587
588 }
589
590 if (len > MAX_DICT_FILE) {
591
592 u8 val_bufs[2][STRINGIFY_VAL_SIZE_MAX];
593 WARNF("Extra '%.*s' is too big (%s, limit is %s), skipping file!", (int)len,
594 mem, stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), len),
595 stringify_mem_size(val_bufs[1], sizeof(val_bufs[1]), MAX_DICT_FILE));
596 return;
597
598 } else if (len > 32) {
599
600 WARNF("Extra '%.*s' is pretty large, consider trimming.", (int)len, mem);
601
602 }
603
604 add_extra_nocheck(afl, mem, len);
605
606 qsort(afl->extras, afl->extras_cnt, sizeof(struct extra_data),
607 compare_extras_len);
608
609 }
610
611 /* Maybe add automatic extra. */
612
maybe_add_auto(afl_state_t * afl,u8 * mem,u32 len)613 void maybe_add_auto(afl_state_t *afl, u8 *mem, u32 len) {
614
615 u32 i;
616
617 /* Allow users to specify that they don't want auto dictionaries. */
618
619 if (!MAX_AUTO_EXTRAS || !USE_AUTO_EXTRAS) { return; }
620
621 /* Skip runs of identical bytes. */
622
623 for (i = 1; i < len; ++i) {
624
625 if (mem[0] ^ mem[i]) { break; }
626
627 }
628
629 if (i == len || unlikely(len > MAX_AUTO_EXTRA)) { return; }
630
631 /* Reject builtin interesting values. */
632
633 if (len == 2) {
634
635 i = sizeof(interesting_16) >> 1;
636
637 while (i--) {
638
639 if (*((u16 *)mem) == interesting_16[i] ||
640 *((u16 *)mem) == SWAP16(interesting_16[i])) {
641
642 return;
643
644 }
645
646 }
647
648 }
649
650 if (len == 4) {
651
652 i = sizeof(interesting_32) >> 2;
653
654 while (i--) {
655
656 if (*((u32 *)mem) == (u32)interesting_32[i] ||
657 *((u32 *)mem) == SWAP32(interesting_32[i])) {
658
659 return;
660
661 }
662
663 }
664
665 }
666
667 /* Reject anything that matches existing extras. Do a case-insensitive
668 match. We optimize by exploiting the fact that extras[] are sorted
669 by size. */
670
671 for (i = 0; i < afl->extras_cnt; ++i) {
672
673 if (afl->extras[i].len >= len) { break; }
674
675 }
676
677 for (; i < afl->extras_cnt && afl->extras[i].len == len; ++i) {
678
679 if (!memcmp_nocase(afl->extras[i].data, mem, len)) { return; }
680
681 }
682
683 /* Last but not least, check afl->a_extras[] for matches. There are no
684 guarantees of a particular sort order. */
685
686 afl->auto_changed = 1;
687
688 for (i = 0; i < afl->a_extras_cnt; ++i) {
689
690 if (afl->a_extras[i].len == len &&
691 !memcmp_nocase(afl->a_extras[i].data, mem, len)) {
692
693 afl->a_extras[i].hit_cnt++;
694 goto sort_a_extras;
695
696 }
697
698 }
699
700 /* At this point, looks like we're dealing with a new entry. So, let's
701 append it if we have room. Otherwise, let's randomly evict some other
702 entry from the bottom half of the list. */
703
704 if (afl->a_extras_cnt < MAX_AUTO_EXTRAS) {
705
706 memcpy(afl->a_extras[afl->a_extras_cnt].data, mem, len);
707 afl->a_extras[afl->a_extras_cnt].len = len;
708 ++afl->a_extras_cnt;
709
710 } else {
711
712 i = MAX_AUTO_EXTRAS / 2 + rand_below(afl, (MAX_AUTO_EXTRAS + 1) / 2);
713
714 memcpy(afl->a_extras[i].data, mem, len);
715 afl->a_extras[i].len = len;
716 afl->a_extras[i].hit_cnt = 0;
717
718 }
719
720 sort_a_extras:
721
722 /* First, sort all auto extras by use count, descending order. */
723
724 qsort(afl->a_extras, afl->a_extras_cnt, sizeof(struct auto_extra_data),
725 compare_auto_extras_use_d);
726
727 /* Then, sort the top USE_AUTO_EXTRAS entries by size. */
728
729 qsort(afl->a_extras, MIN((u32)USE_AUTO_EXTRAS, afl->a_extras_cnt),
730 sizeof(struct auto_extra_data), compare_auto_extras_len);
731
732 }
733
734 /* Save automatically generated extras. */
735
save_auto(afl_state_t * afl)736 void save_auto(afl_state_t *afl) {
737
738 u32 i;
739
740 if (!afl->auto_changed) { return; }
741 afl->auto_changed = 0;
742
743 for (i = 0; i < MIN((u32)USE_AUTO_EXTRAS, afl->a_extras_cnt); ++i) {
744
745 u8 *fn =
746 alloc_printf("%s/queue/.state/auto_extras/auto_%06u", afl->out_dir, i);
747 s32 fd;
748
749 fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, DEFAULT_PERMISSION);
750
751 if (fd < 0) { PFATAL("Unable to create '%s'", fn); }
752
753 ck_write(fd, afl->a_extras[i].data, afl->a_extras[i].len, fn);
754
755 close(fd);
756 ck_free(fn);
757
758 }
759
760 }
761
762 /* Load automatically generated extras. */
763
load_auto(afl_state_t * afl)764 void load_auto(afl_state_t *afl) {
765
766 u32 i;
767
768 for (i = 0; i < USE_AUTO_EXTRAS; ++i) {
769
770 u8 tmp[MAX_AUTO_EXTRA + 1];
771 u8 *fn = alloc_printf("%s/.state/auto_extras/auto_%06u", afl->in_dir, i);
772 s32 fd, len;
773
774 fd = open(fn, O_RDONLY);
775
776 if (fd < 0) {
777
778 if (errno != ENOENT) { PFATAL("Unable to open '%s'", fn); }
779 ck_free(fn);
780 break;
781
782 }
783
784 /* We read one byte more to cheaply detect tokens that are too
785 long (and skip them). */
786
787 len = read(fd, tmp, MAX_AUTO_EXTRA + 1);
788
789 if (len < 0) { PFATAL("Unable to read from '%s'", fn); }
790
791 if (len >= MIN_AUTO_EXTRA && len <= MAX_AUTO_EXTRA) {
792
793 maybe_add_auto(afl, tmp, len);
794
795 }
796
797 close(fd);
798 ck_free(fn);
799
800 }
801
802 if (i) {
803
804 OKF("Loaded %u auto-discovered dictionary tokens.", i);
805
806 } else {
807
808 ACTF("No auto-generated dictionary tokens to reuse.");
809
810 }
811
812 }
813
814 /* Destroy extras. */
815
destroy_extras(afl_state_t * afl)816 void destroy_extras(afl_state_t *afl) {
817
818 u32 i;
819
820 for (i = 0; i < afl->extras_cnt; ++i) {
821
822 ck_free(afl->extras[i].data);
823
824 }
825
826 afl_free(afl->extras);
827
828 }
829
830