xref: /aosp_15_r20/external/AFLplusplus/instrumentation/afl-llvm-common.cc (revision 08b48e0b10e97b33e7b60c5b6e2243bd915777f2)
1 #define AFL_LLVM_PASS
2 
3 #include "config.h"
4 #include "debug.h"
5 
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <unistd.h>
9 #include <sys/time.h>
10 #include <fnmatch.h>
11 
12 #include <list>
13 #include <string>
14 #include <fstream>
15 #include <cmath>
16 
17 #include <llvm/Support/raw_ostream.h>
18 
19 #define IS_EXTERN extern
20 #include "afl-llvm-common.h"
21 
22 using namespace llvm;
23 
24 static std::list<std::string> allowListFiles;
25 static std::list<std::string> allowListFunctions;
26 static std::list<std::string> denyListFiles;
27 static std::list<std::string> denyListFunctions;
28 
getBBName(const llvm::BasicBlock * BB)29 char *getBBName(const llvm::BasicBlock *BB) {
30 
31   static char *name;
32 
33   if (!BB->getName().empty()) {
34 
35     name = strdup(BB->getName().str().c_str());
36     return name;
37 
38   }
39 
40   std::string        Str;
41   raw_string_ostream OS(Str);
42 
43 #if LLVM_VERSION_MAJOR >= 4 || \
44     (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7)
45   BB->printAsOperand(OS, false);
46 #endif
47   name = strdup(OS.str().c_str());
48   return name;
49 
50 }
51 
52 /* Function that we never instrument or analyze */
53 /* Note: this ignore check is also called in isInInstrumentList() */
isIgnoreFunction(const llvm::Function * F)54 bool isIgnoreFunction(const llvm::Function *F) {
55 
56   // Starting from "LLVMFuzzer" these are functions used in libfuzzer based
57   // fuzzing campaign installations, e.g. oss-fuzz
58 
59   static constexpr const char *ignoreList[] = {
60 
61       "asan.",
62       "llvm.",
63       "sancov.",
64       "__ubsan",
65       "ign.",
66       "__afl",
67       "_fini",
68       "__libc_",
69       "__asan",
70       "__msan",
71       "__cmplog",
72       "__sancov",
73       "__san",
74       "__cxx_",
75       "__decide_deferred",
76       "_GLOBAL",
77       "_ZZN6__asan",
78       "_ZZN6__lsan",
79       "msan.",
80       "LLVMFuzzerM",
81       "LLVMFuzzerC",
82       "LLVMFuzzerI",
83       "maybe_duplicate_stderr",
84       "discard_output",
85       "close_stdout",
86       "dup_and_close_stderr",
87       "maybe_close_fd_mask",
88       "ExecuteFilesOnyByOne"
89 
90   };
91 
92   for (auto const &ignoreListFunc : ignoreList) {
93 
94     if (F->getName().startswith(ignoreListFunc)) { return true; }
95 
96   }
97 
98   static constexpr const char *ignoreSubstringList[] = {
99 
100       "__asan",     "__msan",       "__ubsan",    "__lsan",  "__san",
101       "__sanitize", "DebugCounter", "DwarfDebug", "DebugLoc"
102 
103   };
104 
105   // This check is very sensitive, we must be sure to not include patterns
106   // that are part of user-written C++ functions like the ones including
107   // std::string as parameter (see #1927) as the mangled type is inserted in the
108   // mangled name of the user-written function
109   for (auto const &ignoreListFunc : ignoreSubstringList) {
110 
111     // hexcoder: F->getName().contains() not avaiilable in llvm 3.8.0
112     if (StringRef::npos != F->getName().find(ignoreListFunc)) { return true; }
113 
114   }
115 
116   return false;
117 
118 }
119 
initInstrumentList()120 void initInstrumentList() {
121 
122   char *allowlist = getenv("AFL_LLVM_ALLOWLIST");
123   if (!allowlist) allowlist = getenv("AFL_LLVM_INSTRUMENT_FILE");
124   if (!allowlist) allowlist = getenv("AFL_LLVM_WHITELIST");
125   char *denylist = getenv("AFL_LLVM_DENYLIST");
126   if (!denylist) denylist = getenv("AFL_LLVM_BLOCKLIST");
127 
128   if (allowlist && denylist)
129     FATAL(
130         "You can only specify either AFL_LLVM_ALLOWLIST or AFL_LLVM_DENYLIST "
131         "but not both!");
132 
133   if (allowlist) {
134 
135     std::string   line;
136     std::ifstream fileStream;
137     fileStream.open(allowlist);
138     if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_ALLOWLIST");
139     getline(fileStream, line);
140 
141     while (fileStream) {
142 
143       int         is_file = -1;
144       std::size_t npos;
145       std::string original_line = line;
146 
147       line.erase(std::remove_if(line.begin(), line.end(), ::isspace),
148                  line.end());
149 
150       // remove # and following
151       if ((npos = line.find("#")) != std::string::npos)
152         line = line.substr(0, npos);
153 
154       if (line.compare(0, 4, "fun:") == 0) {
155 
156         is_file = 0;
157         line = line.substr(4);
158 
159       } else if (line.compare(0, 9, "function:") == 0) {
160 
161         is_file = 0;
162         line = line.substr(9);
163 
164       } else if (line.compare(0, 4, "src:") == 0) {
165 
166         is_file = 1;
167         line = line.substr(4);
168 
169       } else if (line.compare(0, 7, "source:") == 0) {
170 
171         is_file = 1;
172         line = line.substr(7);
173 
174       }
175 
176       if (line.find(":") != std::string::npos) {
177 
178         FATAL("invalid line in AFL_LLVM_ALLOWLIST: %s", original_line.c_str());
179 
180       }
181 
182       if (line.length() > 0) {
183 
184         // if the entry contains / or . it must be a file
185         if (is_file == -1)
186           if (line.find("/") != std::string::npos ||
187               line.find(".") != std::string::npos)
188             is_file = 1;
189         // otherwise it is a function
190 
191         if (is_file == 1)
192           allowListFiles.push_back(line);
193         else
194           allowListFunctions.push_back(line);
195 
196       }
197 
198       getline(fileStream, line);
199 
200     }
201 
202     if (debug)
203       DEBUGF("loaded allowlist with %zu file and %zu function entries\n",
204              allowListFiles.size() / 4, allowListFunctions.size() / 4);
205 
206   }
207 
208   if (denylist) {
209 
210     std::string   line;
211     std::ifstream fileStream;
212     fileStream.open(denylist);
213     if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_DENYLIST");
214     getline(fileStream, line);
215 
216     while (fileStream) {
217 
218       int         is_file = -1;
219       std::size_t npos;
220       std::string original_line = line;
221 
222       line.erase(std::remove_if(line.begin(), line.end(), ::isspace),
223                  line.end());
224 
225       // remove # and following
226       if ((npos = line.find("#")) != std::string::npos)
227         line = line.substr(0, npos);
228 
229       if (line.compare(0, 4, "fun:") == 0) {
230 
231         is_file = 0;
232         line = line.substr(4);
233 
234       } else if (line.compare(0, 9, "function:") == 0) {
235 
236         is_file = 0;
237         line = line.substr(9);
238 
239       } else if (line.compare(0, 4, "src:") == 0) {
240 
241         is_file = 1;
242         line = line.substr(4);
243 
244       } else if (line.compare(0, 7, "source:") == 0) {
245 
246         is_file = 1;
247         line = line.substr(7);
248 
249       }
250 
251       if (line.find(":") != std::string::npos) {
252 
253         FATAL("invalid line in AFL_LLVM_DENYLIST: %s", original_line.c_str());
254 
255       }
256 
257       if (line.length() > 0) {
258 
259         // if the entry contains / or . it must be a file
260         if (is_file == -1)
261           if (line.find("/") != std::string::npos ||
262               line.find(".") != std::string::npos)
263             is_file = 1;
264         // otherwise it is a function
265 
266         if (is_file == 1)
267           denyListFiles.push_back(line);
268         else
269           denyListFunctions.push_back(line);
270 
271       }
272 
273       getline(fileStream, line);
274 
275     }
276 
277     if (debug)
278       DEBUGF("loaded denylist with %zu file and %zu function entries\n",
279              denyListFiles.size() / 4, denyListFunctions.size() / 4);
280 
281   }
282 
283 }
284 
scanForDangerousFunctions(llvm::Module * M)285 void scanForDangerousFunctions(llvm::Module *M) {
286 
287   if (!M) return;
288 
289 #if LLVM_VERSION_MAJOR >= 4 || \
290     (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9)
291 
292   for (GlobalIFunc &IF : M->ifuncs()) {
293 
294     StringRef ifunc_name = IF.getName();
295     Constant *r = IF.getResolver();
296     if (r->getNumOperands() == 0) { continue; }
297     StringRef r_name = cast<Function>(r->getOperand(0))->getName();
298     if (!be_quiet)
299       fprintf(stderr,
300               "Note: Found an ifunc with name %s that points to resolver "
301               "function %s, we will not instrument this, putting it into the "
302               "block list.\n",
303               ifunc_name.str().c_str(), r_name.str().c_str());
304     denyListFunctions.push_back(r_name.str());
305 
306   }
307 
308   GlobalVariable *GV = M->getNamedGlobal("llvm.global_ctors");
309   if (GV && !GV->isDeclaration() && !GV->hasLocalLinkage()) {
310 
311     ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
312 
313     if (InitList) {
314 
315       for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
316 
317         if (ConstantStruct *CS =
318                 dyn_cast<ConstantStruct>(InitList->getOperand(i))) {
319 
320           if (CS->getNumOperands() >= 2) {
321 
322             if (CS->getOperand(1)->isNullValue())
323               break;  // Found a null terminator, stop here.
324 
325             ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0));
326             int          Priority = CI ? CI->getSExtValue() : 0;
327 
328             Constant *FP = CS->getOperand(1);
329             if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
330               if (CE->isCast()) FP = CE->getOperand(0);
331             if (Function *F = dyn_cast<Function>(FP)) {
332 
333               if (!F->isDeclaration() &&
334                   strncmp(F->getName().str().c_str(), "__afl", 5) != 0) {
335 
336                 if (!be_quiet)
337                   fprintf(stderr,
338                           "Note: Found constructor function %s with prio "
339                           "%u, we will not instrument this, putting it into a "
340                           "block list.\n",
341                           F->getName().str().c_str(), Priority);
342                 denyListFunctions.push_back(F->getName().str());
343 
344               }
345 
346             }
347 
348           }
349 
350         }
351 
352       }
353 
354     }
355 
356   }
357 
358 #endif
359 
360 }
361 
getSourceName(llvm::Function * F)362 static std::string getSourceName(llvm::Function *F) {
363 
364   // let's try to get the filename for the function
365   auto                 bb = &F->getEntryBlock();
366   BasicBlock::iterator IP = bb->getFirstInsertionPt();
367   IRBuilder<>          IRB(&(*IP));
368   DebugLoc             Loc = IP->getDebugLoc();
369 
370 #if LLVM_VERSION_MAJOR >= 4 || \
371     (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7)
372   if (Loc) {
373 
374     StringRef   instFilename;
375     DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode());
376 
377     if (cDILoc) { instFilename = cDILoc->getFilename(); }
378 
379     if (instFilename.str().empty() && cDILoc) {
380 
381       /* If the original location is empty, try using the inlined location
382        */
383       DILocation *oDILoc = cDILoc->getInlinedAt();
384       if (oDILoc) { instFilename = oDILoc->getFilename(); }
385 
386     }
387 
388     return instFilename.str();
389 
390   }
391 
392 #else
393   if (!Loc.isUnknown()) {
394 
395     DILocation cDILoc(Loc.getAsMDNode(F->getContext()));
396 
397     StringRef instFilename = cDILoc.getFilename();
398 
399     /* Continue only if we know where we actually are */
400     return instFilename.str();
401 
402   }
403 
404 #endif
405 
406   return std::string("");
407 
408 }
409 
isInInstrumentList(llvm::Function * F,std::string Filename)410 bool isInInstrumentList(llvm::Function *F, std::string Filename) {
411 
412   bool return_default = true;
413 
414   // is this a function with code? If it is external we don't instrument it
415   // anyway and it can't be in the instrument file list. Or if it is it is
416   // ignored.
417   if (!F->size() || isIgnoreFunction(F)) return false;
418 
419   if (!denyListFiles.empty() || !denyListFunctions.empty()) {
420 
421     if (!denyListFunctions.empty()) {
422 
423       std::string instFunction = F->getName().str();
424 
425       for (std::list<std::string>::iterator it = denyListFunctions.begin();
426            it != denyListFunctions.end(); ++it) {
427 
428         /* We don't check for filename equality here because
429          * filenames might actually be full paths. Instead we
430          * check that the actual filename ends in the filename
431          * specified in the list. We also allow UNIX-style pattern
432          * matching */
433 
434         if (instFunction.length() >= it->length()) {
435 
436           if (fnmatch(("*" + *it).c_str(), instFunction.c_str(), 0) == 0) {
437 
438             if (debug)
439               DEBUGF(
440                   "Function %s is in the deny function list, not instrumenting "
441                   "... \n",
442                   instFunction.c_str());
443             return false;
444 
445           }
446 
447         }
448 
449       }
450 
451     }
452 
453     if (!denyListFiles.empty()) {
454 
455       std::string source_file = getSourceName(F);
456 
457       if (source_file.empty()) { source_file = Filename; }
458 
459       if (!source_file.empty()) {
460 
461         for (std::list<std::string>::iterator it = denyListFiles.begin();
462              it != denyListFiles.end(); ++it) {
463 
464           /* We don't check for filename equality here because
465            * filenames might actually be full paths. Instead we
466            * check that the actual filename ends in the filename
467            * specified in the list. We also allow UNIX-style pattern
468            * matching */
469 
470           if (source_file.length() >= it->length()) {
471 
472             if (fnmatch(("*" + *it).c_str(), source_file.c_str(), 0) == 0) {
473 
474               return false;
475 
476             }
477 
478           }
479 
480         }
481 
482       } else {
483 
484         // we could not find out the location. in this case we say it is not
485         // in the instrument file list
486         if (!be_quiet)
487           WARNF(
488               "No debug information found for function %s, will be "
489               "instrumented (recompile with -g -O[1-3] and use a modern llvm).",
490               F->getName().str().c_str());
491 
492       }
493 
494     }
495 
496   }
497 
498   // if we do not have a instrument file list return true
499   if (!allowListFiles.empty() || !allowListFunctions.empty()) {
500 
501     return_default = false;
502 
503     if (!allowListFunctions.empty()) {
504 
505       std::string instFunction = F->getName().str();
506 
507       for (std::list<std::string>::iterator it = allowListFunctions.begin();
508            it != allowListFunctions.end(); ++it) {
509 
510         /* We don't check for filename equality here because
511          * filenames might actually be full paths. Instead we
512          * check that the actual filename ends in the filename
513          * specified in the list. We also allow UNIX-style pattern
514          * matching */
515 
516         if (instFunction.length() >= it->length()) {
517 
518           if (fnmatch(("*" + *it).c_str(), instFunction.c_str(), 0) == 0) {
519 
520             if (debug)
521               DEBUGF(
522                   "Function %s is in the allow function list, instrumenting "
523                   "... \n",
524                   instFunction.c_str());
525             return true;
526 
527           }
528 
529         }
530 
531       }
532 
533     }
534 
535     if (!allowListFiles.empty()) {
536 
537       std::string source_file = getSourceName(F);
538 
539       if (source_file.empty()) { source_file = Filename; }
540 
541       if (!source_file.empty()) {
542 
543         for (std::list<std::string>::iterator it = allowListFiles.begin();
544              it != allowListFiles.end(); ++it) {
545 
546           /* We don't check for filename equality here because
547            * filenames might actually be full paths. Instead we
548            * check that the actual filename ends in the filename
549            * specified in the list. We also allow UNIX-style pattern
550            * matching */
551 
552           if (source_file.length() >= it->length()) {
553 
554             if (fnmatch(("*" + *it).c_str(), source_file.c_str(), 0) == 0) {
555 
556               if (debug)
557                 DEBUGF(
558                     "Function %s is in the allowlist (%s), instrumenting ... "
559                     "\n",
560                     F->getName().str().c_str(), source_file.c_str());
561               return true;
562 
563             }
564 
565           }
566 
567         }
568 
569       } else {
570 
571         // we could not find out the location. In this case we say it is not
572         // in the instrument file list
573         if (!be_quiet)
574           WARNF(
575               "No debug information found for function %s, will not be "
576               "instrumented (recompile with -g -O[1-3] and use a modern llvm).",
577               F->getName().str().c_str());
578         return false;
579 
580       }
581 
582     }
583 
584   }
585 
586   return return_default;
587 
588 }
589 
590 // Calculate the number of average collisions that would occur if all
591 // location IDs would be assigned randomly (like normal afl/AFL++).
592 // This uses the "balls in bins" algorithm.
calculateCollisions(uint32_t edges)593 unsigned long long int calculateCollisions(uint32_t edges) {
594 
595   double                 bins = MAP_SIZE;
596   double                 balls = edges;
597   double                 step1 = 1 - (1 / bins);
598   double                 step2 = pow(step1, balls);
599   double                 step3 = bins * step2;
600   double                 step4 = round(step3);
601   unsigned long long int empty = step4;
602   unsigned long long int collisions = edges - (MAP_SIZE - empty);
603   return collisions;
604 
605 }
606 
607