AFLplusplus/src/afl-fuzz-extras.c

/*
   american fuzzy lop++ - extras relates routines
   ----------------------------------------------

   Originally written by Michal Zalewski

   Now maintained by Marc Heuse <[email protected]>,
                        Heiko Eißfeldt <[email protected]> and
                        Andrea Fioraldi <[email protected]>

   Copyright 2016, 2017 Google Inc. All rights reserved.
   Copyright 2019-2024 AFLplusplus Project. All rights reserved.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at:

     https://www.apache.org/licenses/LICENSE-2.0

   This is the real deal: the program takes an instrumented binary and
   attempts a variety of basic fuzzing tricks, paying close attention to
   how they affect the execution path.

 */

#include "afl-fuzz.h"

/* helper function for auto_extras qsort */
static int compare_auto_extras_len(const void *ae1, const void *ae2) {

  return ((struct auto_extra_data *)ae1)->len -
         ((struct auto_extra_data *)ae2)->len;

}

/* descending order */

static int compare_auto_extras_use_d(const void *ae1, const void *ae2) {

  return ((struct auto_extra_data *)ae2)->hit_cnt -
         ((struct auto_extra_data *)ae1)->hit_cnt;

}

/* Helper function for load_extras. */

static int compare_extras_len(const void *e1, const void *e2) {

  return ((struct extra_data *)e1)->len - ((struct extra_data *)e2)->len;

}

/* Read extras from a file, sort by size. */

void load_extras_file(afl_state_t *afl, u8 *fname, u32 *min_len, u32 *max_len,
                      u32 dict_level) {

  FILE *f;
  u8    buf[MAX_LINE];
  u8   *lptr;
  u32   cur_line = 0;

  u8 val_bufs[2][STRINGIFY_VAL_SIZE_MAX];

  f = fopen(fname, "r");

  if (!f) { PFATAL("Unable to open '%s'", fname); }

  while ((lptr = fgets(buf, MAX_LINE, f))) {

    u8 *rptr, *wptr;
    u32 klen = 0;

    ++cur_line;

    /* Trim on left and right. */

    while (isspace(*lptr)) {

      ++lptr;

    }

    rptr = lptr + strlen(lptr) - 1;
    while (rptr >= lptr && isspace(*rptr)) {

      --rptr;

    }

    ++rptr;
    *rptr = 0;

    /* Skip empty lines and comments. */

    if (!*lptr || *lptr == '#') { continue; }

    /* All other lines must end with '"', which we can consume. */

    --rptr;

    if (rptr < lptr || *rptr != '"') {

      WARNF("Malformed name=\"value\" pair in line %u.", cur_line);
      continue;

    }

    *rptr = 0;

    /* Skip alphanumerics and dashes (label). */

    while (isalnum(*lptr) || *lptr == '_') {

      ++lptr;

    }

    /* If @number follows, parse that. */

    if (*lptr == '@') {

      ++lptr;
      if (atoi(lptr) > (s32)dict_level) { continue; }
      while (isdigit(*lptr)) {

        ++lptr;

      }

    }

    /* Skip [number] */

    if (*lptr == '[') {

      do {

        ++lptr;

      } while (*lptr >= '0' && *lptr <= '9');

      if (*lptr == ']') { ++lptr; }

    }

    /* Skip whitespace and = signs. */

    while (isspace(*lptr) || *lptr == '=') {

      ++lptr;

    }

    /* Consume opening '"'. */

    if (*lptr != '"') {

      WARNF("Malformed name=\"keyword\" pair in line %u.", cur_line);
      continue;

    }

    ++lptr;

    if (!*lptr) {

      WARNF("Empty keyword in line %u.", cur_line);
      continue;

    }

    /* Okay, let's allocate memory and copy data between "...", handling
       \xNN escaping, \\, and \". */

    afl->extras =
        afl_realloc((void **)&afl->extras,
                    (afl->extras_cnt + 1) * sizeof(struct extra_data));
    char *hexdigits = "0123456789abcdef";

    if (unlikely(!afl->extras)) { PFATAL("alloc"); }

    wptr = afl->extras[afl->extras_cnt].data = ck_alloc(rptr - lptr);

    if (!wptr) { PFATAL("no mem for data"); }

    while (*lptr) {

      switch (*lptr) {

        case 1 ... 31:
        case 128 ... 255:
          WARNF("Non-printable characters in line %u.", cur_line);
          ++lptr;
          continue;
          break;

        case '\\':

          ++lptr;

          if (*lptr == '\\' || *lptr == '"') {

            *(wptr++) = *(lptr++);
            klen++;
            break;

          }

          if (*lptr != 'x' || !isxdigit(lptr[1]) || !isxdigit(lptr[2])) {

            WARNF("Invalid escaping (not \\xNN) in line %u.", cur_line);
            continue;

          }

          *(wptr++) = ((strchr(hexdigits, tolower(lptr[1])) - hexdigits) << 4) |
                      (strchr(hexdigits, tolower(lptr[2])) - hexdigits);

          lptr += 3;
          ++klen;

          break;

        default:
          *(wptr++) = *(lptr++);
          ++klen;

      }

    }

    afl->extras[afl->extras_cnt].len = klen;

    if (afl->extras[afl->extras_cnt].len > MAX_DICT_FILE) {

      WARNF(
          "Keyword too big in line %u (%s, limit is %s)", cur_line,
          stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), klen),
          stringify_mem_size(val_bufs[1], sizeof(val_bufs[1]), MAX_DICT_FILE));
      continue;

    }

    if (*min_len > klen) { *min_len = klen; }
    if (*max_len < klen) { *max_len = klen; }

    ++afl->extras_cnt;

  }

  fclose(f);

}

static void extras_check_and_sort(afl_state_t *afl, u32 min_len, u32 max_len,
                                  u8 *dir) {

  u8 val_bufs[2][STRINGIFY_VAL_SIZE_MAX];

  if (!afl->extras_cnt) {

    WARNF("No usable data in '%s'", dir);
    return;

  }

  qsort(afl->extras, afl->extras_cnt, sizeof(struct extra_data),
        compare_extras_len);

  ACTF("Loaded %u extra tokens, size range %s to %s.", afl->extras_cnt,
       stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), min_len),
       stringify_mem_size(val_bufs[1], sizeof(val_bufs[1]), max_len));

  if (max_len > 32) {

    WARNF("Some tokens are relatively large (%s) - consider trimming.",
          stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), max_len));

  }

  if (afl->extras_cnt > afl->max_det_extras) {

    WARNF("More than %u tokens - will use them probabilistically.",
          afl->max_det_extras);

  }

}

/* Read extras from the extras directory and sort them by size. */

void load_extras(afl_state_t *afl, u8 *dir) {

  DIR           *d;
  struct dirent *de;
  u32            min_len = MAX_DICT_FILE, max_len = 0, dict_level = 0;
  u8            *x;

  u8 val_bufs[2][STRINGIFY_VAL_SIZE_MAX];

  /* If the name ends with @, extract level and continue. */

  if ((x = strchr(dir, '@'))) {

    *x = 0;
    dict_level = atoi(x + 1);

  }

  ACTF("Loading extra dictionary from '%s' (level %u)...", dir, dict_level);

  d = opendir(dir);

  if (!d) {

    if (errno == ENOTDIR) {

      load_extras_file(afl, dir, &min_len, &max_len, dict_level);
      extras_check_and_sort(afl, min_len, max_len, dir);
      return;

    }

    PFATAL("Unable to open '%s'", dir);

  }

  if (x) { FATAL("Dictionary levels not supported for directories."); }

  while ((de = readdir(d))) {

    struct stat st;
    u8         *fn = alloc_printf("%s/%s", dir, de->d_name);
    s32         fd;

    if (lstat(fn, &st) || access(fn, R_OK)) {

      PFATAL("Unable to access '%s'", fn);

    }

    /* This also takes care of . and .. */
    if (!S_ISREG(st.st_mode) || !st.st_size) {

      ck_free(fn);
      continue;

    }

    if (st.st_size > MAX_DICT_FILE) {

      WARNF(
          "Extra '%s' is too big (%s, limit is %s)", fn,
          stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), st.st_size),
          stringify_mem_size(val_bufs[1], sizeof(val_bufs[1]), MAX_DICT_FILE));
      continue;

    }

    if (min_len > st.st_size) { min_len = st.st_size; }
    if (max_len < st.st_size) { max_len = st.st_size; }

    afl->extras =
        afl_realloc((void **)&afl->extras,
                    (afl->extras_cnt + 1) * sizeof(struct extra_data));
    if (unlikely(!afl->extras)) { PFATAL("alloc"); }

    afl->extras[afl->extras_cnt].data = ck_alloc(st.st_size);
    afl->extras[afl->extras_cnt].len = st.st_size;

    fd = open(fn, O_RDONLY);

    if (fd < 0) { PFATAL("Unable to open '%s'", fn); }

    ck_read(fd, afl->extras[afl->extras_cnt].data, st.st_size, fn);

    close(fd);
    ck_free(fn);

    ++afl->extras_cnt;

  }

  closedir(d);

  extras_check_and_sort(afl, min_len, max_len, dir);

}

/* Helper function for maybe_add_auto(afl, ) */

static inline u8 memcmp_nocase(u8 *m1, u8 *m2, u32 len) {

  while (len--) {

    if (tolower(*(m1++)) ^ tolower(*(m2++))) { return 1; }

  }

  return 0;

}

/* add an extra/dict/token - no checks performed, no sorting */

static void add_extra_nocheck(afl_state_t *afl, u8 *mem, u32 len) {

  afl->extras = afl_realloc((void **)&afl->extras,
                            (afl->extras_cnt + 1) * sizeof(struct extra_data));

  if (unlikely(!afl->extras)) { PFATAL("alloc"); }

  afl->extras[afl->extras_cnt].data = ck_alloc(len);
  afl->extras[afl->extras_cnt].len = len;
  memcpy(afl->extras[afl->extras_cnt].data, mem, len);
  afl->extras_cnt++;

  /* We only want to print this once */

  if (afl->extras_cnt == afl->max_det_extras + 1) {

    WARNF("More than %u tokens - will use them probabilistically.",
          afl->max_det_extras);

  }

}

/* Sometimes strings in input is transformed to unicode internally, so for
   fuzzing we should attempt to de-unicode if it looks like simple unicode */

void deunicode_extras(afl_state_t *afl) {

  if (!afl->extras_cnt) return;

  u32 i, j, orig_cnt = afl->extras_cnt;
  u8  buf[64];

  for (i = 0; i < orig_cnt; ++i) {

    if (afl->extras[i].len < 6 || afl->extras[i].len > 64 ||
        afl->extras[i].len % 2) {

      continue;

    }

    u32 k = 0, z1 = 0, z2 = 0, z3 = 0, z4 = 0, half = afl->extras[i].len >> 1;
    u32 quarter = half >> 1;

    for (j = 0; j < afl->extras[i].len; ++j) {

      switch (j % 4) {

        case 2:
          if (!afl->extras[i].data[j]) { ++z3; }
          // fall through
        case 0:
          if (!afl->extras[i].data[j]) { ++z1; }
          break;
        case 3:
          if (!afl->extras[i].data[j]) { ++z4; }
          // fall through
        case 1:
          if (!afl->extras[i].data[j]) { ++z2; }
          break;

      }

    }

    if ((z1 < half && z2 < half) || z1 + z2 == afl->extras[i].len) { continue; }

    // also maybe 32 bit unicode?
    if (afl->extras[i].len % 4 == 0 && afl->extras[i].len >= 12 &&
        (z3 == quarter || z4 == quarter) && z1 + z2 == quarter * 3) {

      for (j = 0; j < afl->extras[i].len; ++j) {

        if (z4 < quarter) {

          if (j % 4 == 3) { buf[k++] = afl->extras[i].data[j]; }

        } else if (z3 < quarter) {

          if (j % 4 == 2) { buf[k++] = afl->extras[i].data[j]; }

        } else if (z2 < half) {

          if (j % 4 == 1) { buf[k++] = afl->extras[i].data[j]; }

        } else {

          if (j % 4 == 0) { buf[k++] = afl->extras[i].data[j]; }

        }

      }

      add_extra_nocheck(afl, buf, k);
      k = 0;

    }

    for (j = 0; j < afl->extras[i].len; ++j) {

      if (z1 < half) {

        if (j % 2 == 0) { buf[k++] = afl->extras[i].data[j]; }

      } else {

        if (j % 2 == 1) { buf[k++] = afl->extras[i].data[j]; }

      }

    }

    add_extra_nocheck(afl, buf, k);

  }

  qsort(afl->extras, afl->extras_cnt, sizeof(struct extra_data),
        compare_extras_len);

}

/* Removes duplicates from the loaded extras. This can happen if multiple files
   are loaded */

void dedup_extras(afl_state_t *afl) {

  if (afl->extras_cnt < 2) return;

  u32 i, j, orig_cnt = afl->extras_cnt;

  for (i = 0; i < afl->extras_cnt - 1; ++i) {

    for (j = i + 1; j < afl->extras_cnt; ++j) {

    restart_dedup:

      // if the goto was used we could be at the end of the list
      if (j >= afl->extras_cnt || afl->extras[i].len != afl->extras[j].len)
        break;

      if (memcmp(afl->extras[i].data, afl->extras[j].data,
                 afl->extras[i].len) == 0) {

        ck_free(afl->extras[j].data);
        if (j + 1 < afl->extras_cnt)  // not at the end of the list?
          memmove((char *)&afl->extras[j], (char *)&afl->extras[j + 1],
                  (afl->extras_cnt - j - 1) * sizeof(struct extra_data));
        --afl->extras_cnt;
        goto restart_dedup;  // restart if several duplicates are in a row

      }

    }

  }

  if (afl->extras_cnt != orig_cnt)
    afl->extras = afl_realloc_exact(
        (void **)&afl->extras, afl->extras_cnt * sizeof(struct extra_data));

}

/* Adds a new extra / dict entry. */
void add_extra(afl_state_t *afl, u8 *mem, u32 len) {

  u32 i, found = 0;

  for (i = 0; i < afl->extras_cnt; i++) {

    if (afl->extras[i].len == len) {

      if (memcmp(afl->extras[i].data, mem, len) == 0) return;
      found = 1;

    } else {

      if (found) break;

    }

  }

  if (len > MAX_DICT_FILE) {

    u8 val_bufs[2][STRINGIFY_VAL_SIZE_MAX];
    WARNF("Extra '%.*s' is too big (%s, limit is %s), skipping file!", (int)len,
          mem, stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), len),
          stringify_mem_size(val_bufs[1], sizeof(val_bufs[1]), MAX_DICT_FILE));
    return;

  } else if (len > 32) {

    WARNF("Extra '%.*s' is pretty large, consider trimming.", (int)len, mem);

  }

  add_extra_nocheck(afl, mem, len);

  qsort(afl->extras, afl->extras_cnt, sizeof(struct extra_data),
        compare_extras_len);

}

/* Maybe add automatic extra. */

void maybe_add_auto(afl_state_t *afl, u8 *mem, u32 len) {

  u32 i;

  /* Allow users to specify that they don't want auto dictionaries. */

  if (!MAX_AUTO_EXTRAS || !USE_AUTO_EXTRAS) { return; }

  /* Skip runs of identical bytes. */

  for (i = 1; i < len; ++i) {

    if (mem[0] ^ mem[i]) { break; }

  }

  if (i == len || unlikely(len > MAX_AUTO_EXTRA)) { return; }

  /* Reject builtin interesting values. */

  if (len == 2) {

    i = sizeof(interesting_16) >> 1;

    while (i--) {

      if (*((u16 *)mem) == interesting_16[i] ||
          *((u16 *)mem) == SWAP16(interesting_16[i])) {

        return;

      }

    }

  }

  if (len == 4) {

    i = sizeof(interesting_32) >> 2;

    while (i--) {

      if (*((u32 *)mem) == (u32)interesting_32[i] ||
          *((u32 *)mem) == SWAP32(interesting_32[i])) {

        return;

      }

    }

  }

  /* Reject anything that matches existing extras. Do a case-insensitive
     match. We optimize by exploiting the fact that extras[] are sorted
     by size. */

  for (i = 0; i < afl->extras_cnt; ++i) {

    if (afl->extras[i].len >= len) { break; }

  }

  for (; i < afl->extras_cnt && afl->extras[i].len == len; ++i) {

    if (!memcmp_nocase(afl->extras[i].data, mem, len)) { return; }

  }

  /* Last but not least, check afl->a_extras[] for matches. There are no
     guarantees of a particular sort order. */

  afl->auto_changed = 1;

  for (i = 0; i < afl->a_extras_cnt; ++i) {

    if (afl->a_extras[i].len == len &&
        !memcmp_nocase(afl->a_extras[i].data, mem, len)) {

      afl->a_extras[i].hit_cnt++;
      goto sort_a_extras;

    }

  }

  /* At this point, looks like we're dealing with a new entry. So, let's
     append it if we have room. Otherwise, let's randomly evict some other
     entry from the bottom half of the list. */

  if (afl->a_extras_cnt < MAX_AUTO_EXTRAS) {

    memcpy(afl->a_extras[afl->a_extras_cnt].data, mem, len);
    afl->a_extras[afl->a_extras_cnt].len = len;
    ++afl->a_extras_cnt;

  } else {

    i = MAX_AUTO_EXTRAS / 2 + rand_below(afl, (MAX_AUTO_EXTRAS + 1) / 2);

    memcpy(afl->a_extras[i].data, mem, len);
    afl->a_extras[i].len = len;
    afl->a_extras[i].hit_cnt = 0;

  }

sort_a_extras:

  /* First, sort all auto extras by use count, descending order. */

  qsort(afl->a_extras, afl->a_extras_cnt, sizeof(struct auto_extra_data),
        compare_auto_extras_use_d);

  /* Then, sort the top USE_AUTO_EXTRAS entries by size. */

  qsort(afl->a_extras, MIN((u32)USE_AUTO_EXTRAS, afl->a_extras_cnt),
        sizeof(struct auto_extra_data), compare_auto_extras_len);

}

/* Save automatically generated extras. */

void save_auto(afl_state_t *afl) {

  u32 i;

  if (!afl->auto_changed) { return; }
  afl->auto_changed = 0;

  for (i = 0; i < MIN((u32)USE_AUTO_EXTRAS, afl->a_extras_cnt); ++i) {

    u8 *fn =
        alloc_printf("%s/queue/.state/auto_extras/auto_%06u", afl->out_dir, i);
    s32 fd;

    fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, DEFAULT_PERMISSION);

    if (fd < 0) { PFATAL("Unable to create '%s'", fn); }

    ck_write(fd, afl->a_extras[i].data, afl->a_extras[i].len, fn);

    close(fd);
    ck_free(fn);

  }

}

/* Load automatically generated extras. */

void load_auto(afl_state_t *afl) {

  u32 i;

  for (i = 0; i < USE_AUTO_EXTRAS; ++i) {

    u8  tmp[MAX_AUTO_EXTRA + 1];
    u8 *fn = alloc_printf("%s/.state/auto_extras/auto_%06u", afl->in_dir, i);
    s32 fd, len;

    fd = open(fn, O_RDONLY);

    if (fd < 0) {

      if (errno != ENOENT) { PFATAL("Unable to open '%s'", fn); }
      ck_free(fn);
      break;

    }

    /* We read one byte more to cheaply detect tokens that are too
       long (and skip them). */

    len = read(fd, tmp, MAX_AUTO_EXTRA + 1);

    if (len < 0) { PFATAL("Unable to read from '%s'", fn); }

    if (len >= MIN_AUTO_EXTRA && len <= MAX_AUTO_EXTRA) {

      maybe_add_auto(afl, tmp, len);

    }

    close(fd);
    ck_free(fn);

  }

  if (i) {

    OKF("Loaded %u auto-discovered dictionary tokens.", i);

  } else {

    ACTF("No auto-generated dictionary tokens to reuse.");

  }

}

/* Destroy extras. */

void destroy_extras(afl_state_t *afl) {

  u32 i;

  for (i = 0; i < afl->extras_cnt; ++i) {

    ck_free(afl->extras[i].data);

  }

  afl_free(afl->extras);

}