1*67e74705SXin Li //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2*67e74705SXin Li //
3*67e74705SXin Li // The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li //
10*67e74705SXin Li // This checker defines the attack surface for generic taint propagation.
11*67e74705SXin Li //
12*67e74705SXin Li // The taint information produced by it might be useful to other checkers. For
13*67e74705SXin Li // example, checkers should report errors which involve tainted data more
14*67e74705SXin Li // aggressively, even if the involved symbols are under constrained.
15*67e74705SXin Li //
16*67e74705SXin Li //===----------------------------------------------------------------------===//
17*67e74705SXin Li #include "ClangSACheckers.h"
18*67e74705SXin Li #include "clang/AST/Attr.h"
19*67e74705SXin Li #include "clang/Basic/Builtins.h"
20*67e74705SXin Li #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
21*67e74705SXin Li #include "clang/StaticAnalyzer/Core/Checker.h"
22*67e74705SXin Li #include "clang/StaticAnalyzer/Core/CheckerManager.h"
23*67e74705SXin Li #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
24*67e74705SXin Li #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
25*67e74705SXin Li #include <climits>
26*67e74705SXin Li
27*67e74705SXin Li using namespace clang;
28*67e74705SXin Li using namespace ento;
29*67e74705SXin Li
30*67e74705SXin Li namespace {
31*67e74705SXin Li class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
32*67e74705SXin Li check::PreStmt<CallExpr> > {
33*67e74705SXin Li public:
getTag()34*67e74705SXin Li static void *getTag() { static int Tag; return &Tag; }
35*67e74705SXin Li
36*67e74705SXin Li void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
37*67e74705SXin Li
38*67e74705SXin Li void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
39*67e74705SXin Li
40*67e74705SXin Li private:
41*67e74705SXin Li static const unsigned InvalidArgIndex = UINT_MAX;
42*67e74705SXin Li /// Denotes the return vale.
43*67e74705SXin Li static const unsigned ReturnValueIndex = UINT_MAX - 1;
44*67e74705SXin Li
45*67e74705SXin Li mutable std::unique_ptr<BugType> BT;
initBugType() const46*67e74705SXin Li inline void initBugType() const {
47*67e74705SXin Li if (!BT)
48*67e74705SXin Li BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
49*67e74705SXin Li }
50*67e74705SXin Li
51*67e74705SXin Li /// \brief Catch taint related bugs. Check if tainted data is passed to a
52*67e74705SXin Li /// system call etc.
53*67e74705SXin Li bool checkPre(const CallExpr *CE, CheckerContext &C) const;
54*67e74705SXin Li
55*67e74705SXin Li /// \brief Add taint sources on a pre-visit.
56*67e74705SXin Li void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
57*67e74705SXin Li
58*67e74705SXin Li /// \brief Propagate taint generated at pre-visit.
59*67e74705SXin Li bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
60*67e74705SXin Li
61*67e74705SXin Li /// \brief Add taint sources on a post visit.
62*67e74705SXin Li void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
63*67e74705SXin Li
64*67e74705SXin Li /// Check if the region the expression evaluates to is the standard input,
65*67e74705SXin Li /// and thus, is tainted.
66*67e74705SXin Li static bool isStdin(const Expr *E, CheckerContext &C);
67*67e74705SXin Li
68*67e74705SXin Li /// \brief Given a pointer argument, get the symbol of the value it contains
69*67e74705SXin Li /// (points to).
70*67e74705SXin Li static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
71*67e74705SXin Li
72*67e74705SXin Li /// Functions defining the attack surface.
73*67e74705SXin Li typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
74*67e74705SXin Li CheckerContext &C) const;
75*67e74705SXin Li ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
76*67e74705SXin Li ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
77*67e74705SXin Li ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
78*67e74705SXin Li
79*67e74705SXin Li /// Taint the scanned input if the file is tainted.
80*67e74705SXin Li ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
81*67e74705SXin Li
82*67e74705SXin Li /// Check for CWE-134: Uncontrolled Format String.
83*67e74705SXin Li static const char MsgUncontrolledFormatString[];
84*67e74705SXin Li bool checkUncontrolledFormatString(const CallExpr *CE,
85*67e74705SXin Li CheckerContext &C) const;
86*67e74705SXin Li
87*67e74705SXin Li /// Check for:
88*67e74705SXin Li /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
89*67e74705SXin Li /// CWE-78, "Failure to Sanitize Data into an OS Command"
90*67e74705SXin Li static const char MsgSanitizeSystemArgs[];
91*67e74705SXin Li bool checkSystemCall(const CallExpr *CE, StringRef Name,
92*67e74705SXin Li CheckerContext &C) const;
93*67e74705SXin Li
94*67e74705SXin Li /// Check if tainted data is used as a buffer size ins strn.. functions,
95*67e74705SXin Li /// and allocators.
96*67e74705SXin Li static const char MsgTaintedBufferSize[];
97*67e74705SXin Li bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
98*67e74705SXin Li CheckerContext &C) const;
99*67e74705SXin Li
100*67e74705SXin Li /// Generate a report if the expression is tainted or points to tainted data.
101*67e74705SXin Li bool generateReportIfTainted(const Expr *E, const char Msg[],
102*67e74705SXin Li CheckerContext &C) const;
103*67e74705SXin Li
104*67e74705SXin Li
105*67e74705SXin Li typedef SmallVector<unsigned, 2> ArgVector;
106*67e74705SXin Li
107*67e74705SXin Li /// \brief A struct used to specify taint propagation rules for a function.
108*67e74705SXin Li ///
109*67e74705SXin Li /// If any of the possible taint source arguments is tainted, all of the
110*67e74705SXin Li /// destination arguments should also be tainted. Use InvalidArgIndex in the
111*67e74705SXin Li /// src list to specify that all of the arguments can introduce taint. Use
112*67e74705SXin Li /// InvalidArgIndex in the dst arguments to signify that all the non-const
113*67e74705SXin Li /// pointer and reference arguments might be tainted on return. If
114*67e74705SXin Li /// ReturnValueIndex is added to the dst list, the return value will be
115*67e74705SXin Li /// tainted.
116*67e74705SXin Li struct TaintPropagationRule {
117*67e74705SXin Li /// List of arguments which can be taint sources and should be checked.
118*67e74705SXin Li ArgVector SrcArgs;
119*67e74705SXin Li /// List of arguments which should be tainted on function return.
120*67e74705SXin Li ArgVector DstArgs;
121*67e74705SXin Li // TODO: Check if using other data structures would be more optimal.
122*67e74705SXin Li
TaintPropagationRule__anon9faa87580111::GenericTaintChecker::TaintPropagationRule123*67e74705SXin Li TaintPropagationRule() {}
124*67e74705SXin Li
TaintPropagationRule__anon9faa87580111::GenericTaintChecker::TaintPropagationRule125*67e74705SXin Li TaintPropagationRule(unsigned SArg,
126*67e74705SXin Li unsigned DArg, bool TaintRet = false) {
127*67e74705SXin Li SrcArgs.push_back(SArg);
128*67e74705SXin Li DstArgs.push_back(DArg);
129*67e74705SXin Li if (TaintRet)
130*67e74705SXin Li DstArgs.push_back(ReturnValueIndex);
131*67e74705SXin Li }
132*67e74705SXin Li
TaintPropagationRule__anon9faa87580111::GenericTaintChecker::TaintPropagationRule133*67e74705SXin Li TaintPropagationRule(unsigned SArg1, unsigned SArg2,
134*67e74705SXin Li unsigned DArg, bool TaintRet = false) {
135*67e74705SXin Li SrcArgs.push_back(SArg1);
136*67e74705SXin Li SrcArgs.push_back(SArg2);
137*67e74705SXin Li DstArgs.push_back(DArg);
138*67e74705SXin Li if (TaintRet)
139*67e74705SXin Li DstArgs.push_back(ReturnValueIndex);
140*67e74705SXin Li }
141*67e74705SXin Li
142*67e74705SXin Li /// Get the propagation rule for a given function.
143*67e74705SXin Li static TaintPropagationRule
144*67e74705SXin Li getTaintPropagationRule(const FunctionDecl *FDecl,
145*67e74705SXin Li StringRef Name,
146*67e74705SXin Li CheckerContext &C);
147*67e74705SXin Li
addSrcArg__anon9faa87580111::GenericTaintChecker::TaintPropagationRule148*67e74705SXin Li inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
addDstArg__anon9faa87580111::GenericTaintChecker::TaintPropagationRule149*67e74705SXin Li inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
150*67e74705SXin Li
isNull__anon9faa87580111::GenericTaintChecker::TaintPropagationRule151*67e74705SXin Li inline bool isNull() const { return SrcArgs.empty(); }
152*67e74705SXin Li
isDestinationArgument__anon9faa87580111::GenericTaintChecker::TaintPropagationRule153*67e74705SXin Li inline bool isDestinationArgument(unsigned ArgNum) const {
154*67e74705SXin Li return (std::find(DstArgs.begin(),
155*67e74705SXin Li DstArgs.end(), ArgNum) != DstArgs.end());
156*67e74705SXin Li }
157*67e74705SXin Li
isTaintedOrPointsToTainted__anon9faa87580111::GenericTaintChecker::TaintPropagationRule158*67e74705SXin Li static inline bool isTaintedOrPointsToTainted(const Expr *E,
159*67e74705SXin Li ProgramStateRef State,
160*67e74705SXin Li CheckerContext &C) {
161*67e74705SXin Li return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
162*67e74705SXin Li (E->getType().getTypePtr()->isPointerType() &&
163*67e74705SXin Li State->isTainted(getPointedToSymbol(C, E))));
164*67e74705SXin Li }
165*67e74705SXin Li
166*67e74705SXin Li /// \brief Pre-process a function which propagates taint according to the
167*67e74705SXin Li /// taint rule.
168*67e74705SXin Li ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
169*67e74705SXin Li
170*67e74705SXin Li };
171*67e74705SXin Li };
172*67e74705SXin Li
173*67e74705SXin Li const unsigned GenericTaintChecker::ReturnValueIndex;
174*67e74705SXin Li const unsigned GenericTaintChecker::InvalidArgIndex;
175*67e74705SXin Li
176*67e74705SXin Li const char GenericTaintChecker::MsgUncontrolledFormatString[] =
177*67e74705SXin Li "Untrusted data is used as a format string "
178*67e74705SXin Li "(CWE-134: Uncontrolled Format String)";
179*67e74705SXin Li
180*67e74705SXin Li const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
181*67e74705SXin Li "Untrusted data is passed to a system call "
182*67e74705SXin Li "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
183*67e74705SXin Li
184*67e74705SXin Li const char GenericTaintChecker::MsgTaintedBufferSize[] =
185*67e74705SXin Li "Untrusted data is used to specify the buffer size "
186*67e74705SXin Li "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
187*67e74705SXin Li "character data and the null terminator)";
188*67e74705SXin Li
189*67e74705SXin Li } // end of anonymous namespace
190*67e74705SXin Li
191*67e74705SXin Li /// A set which is used to pass information from call pre-visit instruction
192*67e74705SXin Li /// to the call post-visit. The values are unsigned integers, which are either
193*67e74705SXin Li /// ReturnValueIndex, or indexes of the pointer/reference argument, which
194*67e74705SXin Li /// points to data, which should be tainted on return.
REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit,unsigned)195*67e74705SXin Li REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
196*67e74705SXin Li
197*67e74705SXin Li GenericTaintChecker::TaintPropagationRule
198*67e74705SXin Li GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
199*67e74705SXin Li const FunctionDecl *FDecl,
200*67e74705SXin Li StringRef Name,
201*67e74705SXin Li CheckerContext &C) {
202*67e74705SXin Li // TODO: Currently, we might lose precision here: we always mark a return
203*67e74705SXin Li // value as tainted even if it's just a pointer, pointing to tainted data.
204*67e74705SXin Li
205*67e74705SXin Li // Check for exact name match for functions without builtin substitutes.
206*67e74705SXin Li TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
207*67e74705SXin Li .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
208*67e74705SXin Li .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
209*67e74705SXin Li .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
210*67e74705SXin Li .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
211*67e74705SXin Li .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
212*67e74705SXin Li .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
213*67e74705SXin Li .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
214*67e74705SXin Li .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
215*67e74705SXin Li .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
216*67e74705SXin Li .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
217*67e74705SXin Li .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
218*67e74705SXin Li .Case("read", TaintPropagationRule(0, 2, 1, true))
219*67e74705SXin Li .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
220*67e74705SXin Li .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
221*67e74705SXin Li .Case("fgets", TaintPropagationRule(2, 0, true))
222*67e74705SXin Li .Case("getline", TaintPropagationRule(2, 0))
223*67e74705SXin Li .Case("getdelim", TaintPropagationRule(3, 0))
224*67e74705SXin Li .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
225*67e74705SXin Li .Default(TaintPropagationRule());
226*67e74705SXin Li
227*67e74705SXin Li if (!Rule.isNull())
228*67e74705SXin Li return Rule;
229*67e74705SXin Li
230*67e74705SXin Li // Check if it's one of the memory setting/copying functions.
231*67e74705SXin Li // This check is specialized but faster then calling isCLibraryFunction.
232*67e74705SXin Li unsigned BId = 0;
233*67e74705SXin Li if ( (BId = FDecl->getMemoryFunctionKind()) )
234*67e74705SXin Li switch(BId) {
235*67e74705SXin Li case Builtin::BImemcpy:
236*67e74705SXin Li case Builtin::BImemmove:
237*67e74705SXin Li case Builtin::BIstrncpy:
238*67e74705SXin Li case Builtin::BIstrncat:
239*67e74705SXin Li return TaintPropagationRule(1, 2, 0, true);
240*67e74705SXin Li case Builtin::BIstrlcpy:
241*67e74705SXin Li case Builtin::BIstrlcat:
242*67e74705SXin Li return TaintPropagationRule(1, 2, 0, false);
243*67e74705SXin Li case Builtin::BIstrndup:
244*67e74705SXin Li return TaintPropagationRule(0, 1, ReturnValueIndex);
245*67e74705SXin Li
246*67e74705SXin Li default:
247*67e74705SXin Li break;
248*67e74705SXin Li };
249*67e74705SXin Li
250*67e74705SXin Li // Process all other functions which could be defined as builtins.
251*67e74705SXin Li if (Rule.isNull()) {
252*67e74705SXin Li if (C.isCLibraryFunction(FDecl, "snprintf") ||
253*67e74705SXin Li C.isCLibraryFunction(FDecl, "sprintf"))
254*67e74705SXin Li return TaintPropagationRule(InvalidArgIndex, 0, true);
255*67e74705SXin Li else if (C.isCLibraryFunction(FDecl, "strcpy") ||
256*67e74705SXin Li C.isCLibraryFunction(FDecl, "stpcpy") ||
257*67e74705SXin Li C.isCLibraryFunction(FDecl, "strcat"))
258*67e74705SXin Li return TaintPropagationRule(1, 0, true);
259*67e74705SXin Li else if (C.isCLibraryFunction(FDecl, "bcopy"))
260*67e74705SXin Li return TaintPropagationRule(0, 2, 1, false);
261*67e74705SXin Li else if (C.isCLibraryFunction(FDecl, "strdup") ||
262*67e74705SXin Li C.isCLibraryFunction(FDecl, "strdupa"))
263*67e74705SXin Li return TaintPropagationRule(0, ReturnValueIndex);
264*67e74705SXin Li else if (C.isCLibraryFunction(FDecl, "wcsdup"))
265*67e74705SXin Li return TaintPropagationRule(0, ReturnValueIndex);
266*67e74705SXin Li }
267*67e74705SXin Li
268*67e74705SXin Li // Skipping the following functions, since they might be used for cleansing
269*67e74705SXin Li // or smart memory copy:
270*67e74705SXin Li // - memccpy - copying until hitting a special character.
271*67e74705SXin Li
272*67e74705SXin Li return TaintPropagationRule();
273*67e74705SXin Li }
274*67e74705SXin Li
checkPreStmt(const CallExpr * CE,CheckerContext & C) const275*67e74705SXin Li void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
276*67e74705SXin Li CheckerContext &C) const {
277*67e74705SXin Li // Check for errors first.
278*67e74705SXin Li if (checkPre(CE, C))
279*67e74705SXin Li return;
280*67e74705SXin Li
281*67e74705SXin Li // Add taint second.
282*67e74705SXin Li addSourcesPre(CE, C);
283*67e74705SXin Li }
284*67e74705SXin Li
checkPostStmt(const CallExpr * CE,CheckerContext & C) const285*67e74705SXin Li void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
286*67e74705SXin Li CheckerContext &C) const {
287*67e74705SXin Li if (propagateFromPre(CE, C))
288*67e74705SXin Li return;
289*67e74705SXin Li addSourcesPost(CE, C);
290*67e74705SXin Li }
291*67e74705SXin Li
addSourcesPre(const CallExpr * CE,CheckerContext & C) const292*67e74705SXin Li void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
293*67e74705SXin Li CheckerContext &C) const {
294*67e74705SXin Li ProgramStateRef State = nullptr;
295*67e74705SXin Li const FunctionDecl *FDecl = C.getCalleeDecl(CE);
296*67e74705SXin Li if (!FDecl || FDecl->getKind() != Decl::Function)
297*67e74705SXin Li return;
298*67e74705SXin Li
299*67e74705SXin Li StringRef Name = C.getCalleeName(FDecl);
300*67e74705SXin Li if (Name.empty())
301*67e74705SXin Li return;
302*67e74705SXin Li
303*67e74705SXin Li // First, try generating a propagation rule for this function.
304*67e74705SXin Li TaintPropagationRule Rule =
305*67e74705SXin Li TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
306*67e74705SXin Li if (!Rule.isNull()) {
307*67e74705SXin Li State = Rule.process(CE, C);
308*67e74705SXin Li if (!State)
309*67e74705SXin Li return;
310*67e74705SXin Li C.addTransition(State);
311*67e74705SXin Li return;
312*67e74705SXin Li }
313*67e74705SXin Li
314*67e74705SXin Li // Otherwise, check if we have custom pre-processing implemented.
315*67e74705SXin Li FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
316*67e74705SXin Li .Case("fscanf", &GenericTaintChecker::preFscanf)
317*67e74705SXin Li .Default(nullptr);
318*67e74705SXin Li // Check and evaluate the call.
319*67e74705SXin Li if (evalFunction)
320*67e74705SXin Li State = (this->*evalFunction)(CE, C);
321*67e74705SXin Li if (!State)
322*67e74705SXin Li return;
323*67e74705SXin Li C.addTransition(State);
324*67e74705SXin Li
325*67e74705SXin Li }
326*67e74705SXin Li
propagateFromPre(const CallExpr * CE,CheckerContext & C) const327*67e74705SXin Li bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
328*67e74705SXin Li CheckerContext &C) const {
329*67e74705SXin Li ProgramStateRef State = C.getState();
330*67e74705SXin Li
331*67e74705SXin Li // Depending on what was tainted at pre-visit, we determined a set of
332*67e74705SXin Li // arguments which should be tainted after the function returns. These are
333*67e74705SXin Li // stored in the state as TaintArgsOnPostVisit set.
334*67e74705SXin Li TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
335*67e74705SXin Li if (TaintArgs.isEmpty())
336*67e74705SXin Li return false;
337*67e74705SXin Li
338*67e74705SXin Li for (llvm::ImmutableSet<unsigned>::iterator
339*67e74705SXin Li I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
340*67e74705SXin Li unsigned ArgNum = *I;
341*67e74705SXin Li
342*67e74705SXin Li // Special handling for the tainted return value.
343*67e74705SXin Li if (ArgNum == ReturnValueIndex) {
344*67e74705SXin Li State = State->addTaint(CE, C.getLocationContext());
345*67e74705SXin Li continue;
346*67e74705SXin Li }
347*67e74705SXin Li
348*67e74705SXin Li // The arguments are pointer arguments. The data they are pointing at is
349*67e74705SXin Li // tainted after the call.
350*67e74705SXin Li if (CE->getNumArgs() < (ArgNum + 1))
351*67e74705SXin Li return false;
352*67e74705SXin Li const Expr* Arg = CE->getArg(ArgNum);
353*67e74705SXin Li SymbolRef Sym = getPointedToSymbol(C, Arg);
354*67e74705SXin Li if (Sym)
355*67e74705SXin Li State = State->addTaint(Sym);
356*67e74705SXin Li }
357*67e74705SXin Li
358*67e74705SXin Li // Clear up the taint info from the state.
359*67e74705SXin Li State = State->remove<TaintArgsOnPostVisit>();
360*67e74705SXin Li
361*67e74705SXin Li if (State != C.getState()) {
362*67e74705SXin Li C.addTransition(State);
363*67e74705SXin Li return true;
364*67e74705SXin Li }
365*67e74705SXin Li return false;
366*67e74705SXin Li }
367*67e74705SXin Li
addSourcesPost(const CallExpr * CE,CheckerContext & C) const368*67e74705SXin Li void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
369*67e74705SXin Li CheckerContext &C) const {
370*67e74705SXin Li // Define the attack surface.
371*67e74705SXin Li // Set the evaluation function by switching on the callee name.
372*67e74705SXin Li const FunctionDecl *FDecl = C.getCalleeDecl(CE);
373*67e74705SXin Li if (!FDecl || FDecl->getKind() != Decl::Function)
374*67e74705SXin Li return;
375*67e74705SXin Li
376*67e74705SXin Li StringRef Name = C.getCalleeName(FDecl);
377*67e74705SXin Li if (Name.empty())
378*67e74705SXin Li return;
379*67e74705SXin Li FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
380*67e74705SXin Li .Case("scanf", &GenericTaintChecker::postScanf)
381*67e74705SXin Li // TODO: Add support for vfscanf & family.
382*67e74705SXin Li .Case("getchar", &GenericTaintChecker::postRetTaint)
383*67e74705SXin Li .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
384*67e74705SXin Li .Case("getenv", &GenericTaintChecker::postRetTaint)
385*67e74705SXin Li .Case("fopen", &GenericTaintChecker::postRetTaint)
386*67e74705SXin Li .Case("fdopen", &GenericTaintChecker::postRetTaint)
387*67e74705SXin Li .Case("freopen", &GenericTaintChecker::postRetTaint)
388*67e74705SXin Li .Case("getch", &GenericTaintChecker::postRetTaint)
389*67e74705SXin Li .Case("wgetch", &GenericTaintChecker::postRetTaint)
390*67e74705SXin Li .Case("socket", &GenericTaintChecker::postSocket)
391*67e74705SXin Li .Default(nullptr);
392*67e74705SXin Li
393*67e74705SXin Li // If the callee isn't defined, it is not of security concern.
394*67e74705SXin Li // Check and evaluate the call.
395*67e74705SXin Li ProgramStateRef State = nullptr;
396*67e74705SXin Li if (evalFunction)
397*67e74705SXin Li State = (this->*evalFunction)(CE, C);
398*67e74705SXin Li if (!State)
399*67e74705SXin Li return;
400*67e74705SXin Li
401*67e74705SXin Li C.addTransition(State);
402*67e74705SXin Li }
403*67e74705SXin Li
checkPre(const CallExpr * CE,CheckerContext & C) const404*67e74705SXin Li bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
405*67e74705SXin Li
406*67e74705SXin Li if (checkUncontrolledFormatString(CE, C))
407*67e74705SXin Li return true;
408*67e74705SXin Li
409*67e74705SXin Li const FunctionDecl *FDecl = C.getCalleeDecl(CE);
410*67e74705SXin Li if (!FDecl || FDecl->getKind() != Decl::Function)
411*67e74705SXin Li return false;
412*67e74705SXin Li
413*67e74705SXin Li StringRef Name = C.getCalleeName(FDecl);
414*67e74705SXin Li if (Name.empty())
415*67e74705SXin Li return false;
416*67e74705SXin Li
417*67e74705SXin Li if (checkSystemCall(CE, Name, C))
418*67e74705SXin Li return true;
419*67e74705SXin Li
420*67e74705SXin Li if (checkTaintedBufferSize(CE, FDecl, C))
421*67e74705SXin Li return true;
422*67e74705SXin Li
423*67e74705SXin Li return false;
424*67e74705SXin Li }
425*67e74705SXin Li
getPointedToSymbol(CheckerContext & C,const Expr * Arg)426*67e74705SXin Li SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
427*67e74705SXin Li const Expr* Arg) {
428*67e74705SXin Li ProgramStateRef State = C.getState();
429*67e74705SXin Li SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
430*67e74705SXin Li if (AddrVal.isUnknownOrUndef())
431*67e74705SXin Li return nullptr;
432*67e74705SXin Li
433*67e74705SXin Li Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
434*67e74705SXin Li if (!AddrLoc)
435*67e74705SXin Li return nullptr;
436*67e74705SXin Li
437*67e74705SXin Li const PointerType *ArgTy =
438*67e74705SXin Li dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
439*67e74705SXin Li SVal Val = State->getSVal(*AddrLoc,
440*67e74705SXin Li ArgTy ? ArgTy->getPointeeType(): QualType());
441*67e74705SXin Li return Val.getAsSymbol();
442*67e74705SXin Li }
443*67e74705SXin Li
444*67e74705SXin Li ProgramStateRef
process(const CallExpr * CE,CheckerContext & C) const445*67e74705SXin Li GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
446*67e74705SXin Li CheckerContext &C) const {
447*67e74705SXin Li ProgramStateRef State = C.getState();
448*67e74705SXin Li
449*67e74705SXin Li // Check for taint in arguments.
450*67e74705SXin Li bool IsTainted = false;
451*67e74705SXin Li for (ArgVector::const_iterator I = SrcArgs.begin(),
452*67e74705SXin Li E = SrcArgs.end(); I != E; ++I) {
453*67e74705SXin Li unsigned ArgNum = *I;
454*67e74705SXin Li
455*67e74705SXin Li if (ArgNum == InvalidArgIndex) {
456*67e74705SXin Li // Check if any of the arguments is tainted, but skip the
457*67e74705SXin Li // destination arguments.
458*67e74705SXin Li for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
459*67e74705SXin Li if (isDestinationArgument(i))
460*67e74705SXin Li continue;
461*67e74705SXin Li if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
462*67e74705SXin Li break;
463*67e74705SXin Li }
464*67e74705SXin Li break;
465*67e74705SXin Li }
466*67e74705SXin Li
467*67e74705SXin Li if (CE->getNumArgs() < (ArgNum + 1))
468*67e74705SXin Li return State;
469*67e74705SXin Li if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
470*67e74705SXin Li break;
471*67e74705SXin Li }
472*67e74705SXin Li if (!IsTainted)
473*67e74705SXin Li return State;
474*67e74705SXin Li
475*67e74705SXin Li // Mark the arguments which should be tainted after the function returns.
476*67e74705SXin Li for (ArgVector::const_iterator I = DstArgs.begin(),
477*67e74705SXin Li E = DstArgs.end(); I != E; ++I) {
478*67e74705SXin Li unsigned ArgNum = *I;
479*67e74705SXin Li
480*67e74705SXin Li // Should we mark all arguments as tainted?
481*67e74705SXin Li if (ArgNum == InvalidArgIndex) {
482*67e74705SXin Li // For all pointer and references that were passed in:
483*67e74705SXin Li // If they are not pointing to const data, mark data as tainted.
484*67e74705SXin Li // TODO: So far we are just going one level down; ideally we'd need to
485*67e74705SXin Li // recurse here.
486*67e74705SXin Li for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
487*67e74705SXin Li const Expr *Arg = CE->getArg(i);
488*67e74705SXin Li // Process pointer argument.
489*67e74705SXin Li const Type *ArgTy = Arg->getType().getTypePtr();
490*67e74705SXin Li QualType PType = ArgTy->getPointeeType();
491*67e74705SXin Li if ((!PType.isNull() && !PType.isConstQualified())
492*67e74705SXin Li || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
493*67e74705SXin Li State = State->add<TaintArgsOnPostVisit>(i);
494*67e74705SXin Li }
495*67e74705SXin Li continue;
496*67e74705SXin Li }
497*67e74705SXin Li
498*67e74705SXin Li // Should mark the return value?
499*67e74705SXin Li if (ArgNum == ReturnValueIndex) {
500*67e74705SXin Li State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
501*67e74705SXin Li continue;
502*67e74705SXin Li }
503*67e74705SXin Li
504*67e74705SXin Li // Mark the given argument.
505*67e74705SXin Li assert(ArgNum < CE->getNumArgs());
506*67e74705SXin Li State = State->add<TaintArgsOnPostVisit>(ArgNum);
507*67e74705SXin Li }
508*67e74705SXin Li
509*67e74705SXin Li return State;
510*67e74705SXin Li }
511*67e74705SXin Li
512*67e74705SXin Li
513*67e74705SXin Li // If argument 0 (file descriptor) is tainted, all arguments except for arg 0
514*67e74705SXin Li // and arg 1 should get taint.
preFscanf(const CallExpr * CE,CheckerContext & C) const515*67e74705SXin Li ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
516*67e74705SXin Li CheckerContext &C) const {
517*67e74705SXin Li assert(CE->getNumArgs() >= 2);
518*67e74705SXin Li ProgramStateRef State = C.getState();
519*67e74705SXin Li
520*67e74705SXin Li // Check is the file descriptor is tainted.
521*67e74705SXin Li if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
522*67e74705SXin Li isStdin(CE->getArg(0), C)) {
523*67e74705SXin Li // All arguments except for the first two should get taint.
524*67e74705SXin Li for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
525*67e74705SXin Li State = State->add<TaintArgsOnPostVisit>(i);
526*67e74705SXin Li return State;
527*67e74705SXin Li }
528*67e74705SXin Li
529*67e74705SXin Li return nullptr;
530*67e74705SXin Li }
531*67e74705SXin Li
532*67e74705SXin Li
533*67e74705SXin Li // If argument 0(protocol domain) is network, the return value should get taint.
postSocket(const CallExpr * CE,CheckerContext & C) const534*67e74705SXin Li ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
535*67e74705SXin Li CheckerContext &C) const {
536*67e74705SXin Li ProgramStateRef State = C.getState();
537*67e74705SXin Li if (CE->getNumArgs() < 3)
538*67e74705SXin Li return State;
539*67e74705SXin Li
540*67e74705SXin Li SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
541*67e74705SXin Li StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
542*67e74705SXin Li // White list the internal communication protocols.
543*67e74705SXin Li if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
544*67e74705SXin Li DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
545*67e74705SXin Li return State;
546*67e74705SXin Li State = State->addTaint(CE, C.getLocationContext());
547*67e74705SXin Li return State;
548*67e74705SXin Li }
549*67e74705SXin Li
postScanf(const CallExpr * CE,CheckerContext & C) const550*67e74705SXin Li ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
551*67e74705SXin Li CheckerContext &C) const {
552*67e74705SXin Li ProgramStateRef State = C.getState();
553*67e74705SXin Li if (CE->getNumArgs() < 2)
554*67e74705SXin Li return State;
555*67e74705SXin Li
556*67e74705SXin Li // All arguments except for the very first one should get taint.
557*67e74705SXin Li for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
558*67e74705SXin Li // The arguments are pointer arguments. The data they are pointing at is
559*67e74705SXin Li // tainted after the call.
560*67e74705SXin Li const Expr* Arg = CE->getArg(i);
561*67e74705SXin Li SymbolRef Sym = getPointedToSymbol(C, Arg);
562*67e74705SXin Li if (Sym)
563*67e74705SXin Li State = State->addTaint(Sym);
564*67e74705SXin Li }
565*67e74705SXin Li return State;
566*67e74705SXin Li }
567*67e74705SXin Li
postRetTaint(const CallExpr * CE,CheckerContext & C) const568*67e74705SXin Li ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
569*67e74705SXin Li CheckerContext &C) const {
570*67e74705SXin Li return C.getState()->addTaint(CE, C.getLocationContext());
571*67e74705SXin Li }
572*67e74705SXin Li
isStdin(const Expr * E,CheckerContext & C)573*67e74705SXin Li bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
574*67e74705SXin Li ProgramStateRef State = C.getState();
575*67e74705SXin Li SVal Val = State->getSVal(E, C.getLocationContext());
576*67e74705SXin Li
577*67e74705SXin Li // stdin is a pointer, so it would be a region.
578*67e74705SXin Li const MemRegion *MemReg = Val.getAsRegion();
579*67e74705SXin Li
580*67e74705SXin Li // The region should be symbolic, we do not know it's value.
581*67e74705SXin Li const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
582*67e74705SXin Li if (!SymReg)
583*67e74705SXin Li return false;
584*67e74705SXin Li
585*67e74705SXin Li // Get it's symbol and find the declaration region it's pointing to.
586*67e74705SXin Li const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
587*67e74705SXin Li if (!Sm)
588*67e74705SXin Li return false;
589*67e74705SXin Li const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
590*67e74705SXin Li if (!DeclReg)
591*67e74705SXin Li return false;
592*67e74705SXin Li
593*67e74705SXin Li // This region corresponds to a declaration, find out if it's a global/extern
594*67e74705SXin Li // variable named stdin with the proper type.
595*67e74705SXin Li if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
596*67e74705SXin Li D = D->getCanonicalDecl();
597*67e74705SXin Li if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
598*67e74705SXin Li if (const PointerType * PtrTy =
599*67e74705SXin Li dyn_cast<PointerType>(D->getType().getTypePtr()))
600*67e74705SXin Li if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
601*67e74705SXin Li return true;
602*67e74705SXin Li }
603*67e74705SXin Li return false;
604*67e74705SXin Li }
605*67e74705SXin Li
getPrintfFormatArgumentNum(const CallExpr * CE,const CheckerContext & C,unsigned int & ArgNum)606*67e74705SXin Li static bool getPrintfFormatArgumentNum(const CallExpr *CE,
607*67e74705SXin Li const CheckerContext &C,
608*67e74705SXin Li unsigned int &ArgNum) {
609*67e74705SXin Li // Find if the function contains a format string argument.
610*67e74705SXin Li // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
611*67e74705SXin Li // vsnprintf, syslog, custom annotated functions.
612*67e74705SXin Li const FunctionDecl *FDecl = C.getCalleeDecl(CE);
613*67e74705SXin Li if (!FDecl)
614*67e74705SXin Li return false;
615*67e74705SXin Li for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
616*67e74705SXin Li ArgNum = Format->getFormatIdx() - 1;
617*67e74705SXin Li if ((Format->getType()->getName() == "printf") &&
618*67e74705SXin Li CE->getNumArgs() > ArgNum)
619*67e74705SXin Li return true;
620*67e74705SXin Li }
621*67e74705SXin Li
622*67e74705SXin Li // Or if a function is named setproctitle (this is a heuristic).
623*67e74705SXin Li if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
624*67e74705SXin Li ArgNum = 0;
625*67e74705SXin Li return true;
626*67e74705SXin Li }
627*67e74705SXin Li
628*67e74705SXin Li return false;
629*67e74705SXin Li }
630*67e74705SXin Li
generateReportIfTainted(const Expr * E,const char Msg[],CheckerContext & C) const631*67e74705SXin Li bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
632*67e74705SXin Li const char Msg[],
633*67e74705SXin Li CheckerContext &C) const {
634*67e74705SXin Li assert(E);
635*67e74705SXin Li
636*67e74705SXin Li // Check for taint.
637*67e74705SXin Li ProgramStateRef State = C.getState();
638*67e74705SXin Li if (!State->isTainted(getPointedToSymbol(C, E)) &&
639*67e74705SXin Li !State->isTainted(E, C.getLocationContext()))
640*67e74705SXin Li return false;
641*67e74705SXin Li
642*67e74705SXin Li // Generate diagnostic.
643*67e74705SXin Li if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
644*67e74705SXin Li initBugType();
645*67e74705SXin Li auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
646*67e74705SXin Li report->addRange(E->getSourceRange());
647*67e74705SXin Li C.emitReport(std::move(report));
648*67e74705SXin Li return true;
649*67e74705SXin Li }
650*67e74705SXin Li return false;
651*67e74705SXin Li }
652*67e74705SXin Li
checkUncontrolledFormatString(const CallExpr * CE,CheckerContext & C) const653*67e74705SXin Li bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
654*67e74705SXin Li CheckerContext &C) const{
655*67e74705SXin Li // Check if the function contains a format string argument.
656*67e74705SXin Li unsigned int ArgNum = 0;
657*67e74705SXin Li if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
658*67e74705SXin Li return false;
659*67e74705SXin Li
660*67e74705SXin Li // If either the format string content or the pointer itself are tainted, warn.
661*67e74705SXin Li return generateReportIfTainted(CE->getArg(ArgNum),
662*67e74705SXin Li MsgUncontrolledFormatString, C);
663*67e74705SXin Li }
664*67e74705SXin Li
checkSystemCall(const CallExpr * CE,StringRef Name,CheckerContext & C) const665*67e74705SXin Li bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
666*67e74705SXin Li StringRef Name,
667*67e74705SXin Li CheckerContext &C) const {
668*67e74705SXin Li // TODO: It might make sense to run this check on demand. In some cases,
669*67e74705SXin Li // we should check if the environment has been cleansed here. We also might
670*67e74705SXin Li // need to know if the user was reset before these calls(seteuid).
671*67e74705SXin Li unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
672*67e74705SXin Li .Case("system", 0)
673*67e74705SXin Li .Case("popen", 0)
674*67e74705SXin Li .Case("execl", 0)
675*67e74705SXin Li .Case("execle", 0)
676*67e74705SXin Li .Case("execlp", 0)
677*67e74705SXin Li .Case("execv", 0)
678*67e74705SXin Li .Case("execvp", 0)
679*67e74705SXin Li .Case("execvP", 0)
680*67e74705SXin Li .Case("execve", 0)
681*67e74705SXin Li .Case("dlopen", 0)
682*67e74705SXin Li .Default(UINT_MAX);
683*67e74705SXin Li
684*67e74705SXin Li if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
685*67e74705SXin Li return false;
686*67e74705SXin Li
687*67e74705SXin Li return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
688*67e74705SXin Li }
689*67e74705SXin Li
690*67e74705SXin Li // TODO: Should this check be a part of the CString checker?
691*67e74705SXin Li // If yes, should taint be a global setting?
checkTaintedBufferSize(const CallExpr * CE,const FunctionDecl * FDecl,CheckerContext & C) const692*67e74705SXin Li bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
693*67e74705SXin Li const FunctionDecl *FDecl,
694*67e74705SXin Li CheckerContext &C) const {
695*67e74705SXin Li // If the function has a buffer size argument, set ArgNum.
696*67e74705SXin Li unsigned ArgNum = InvalidArgIndex;
697*67e74705SXin Li unsigned BId = 0;
698*67e74705SXin Li if ( (BId = FDecl->getMemoryFunctionKind()) )
699*67e74705SXin Li switch(BId) {
700*67e74705SXin Li case Builtin::BImemcpy:
701*67e74705SXin Li case Builtin::BImemmove:
702*67e74705SXin Li case Builtin::BIstrncpy:
703*67e74705SXin Li ArgNum = 2;
704*67e74705SXin Li break;
705*67e74705SXin Li case Builtin::BIstrndup:
706*67e74705SXin Li ArgNum = 1;
707*67e74705SXin Li break;
708*67e74705SXin Li default:
709*67e74705SXin Li break;
710*67e74705SXin Li };
711*67e74705SXin Li
712*67e74705SXin Li if (ArgNum == InvalidArgIndex) {
713*67e74705SXin Li if (C.isCLibraryFunction(FDecl, "malloc") ||
714*67e74705SXin Li C.isCLibraryFunction(FDecl, "calloc") ||
715*67e74705SXin Li C.isCLibraryFunction(FDecl, "alloca"))
716*67e74705SXin Li ArgNum = 0;
717*67e74705SXin Li else if (C.isCLibraryFunction(FDecl, "memccpy"))
718*67e74705SXin Li ArgNum = 3;
719*67e74705SXin Li else if (C.isCLibraryFunction(FDecl, "realloc"))
720*67e74705SXin Li ArgNum = 1;
721*67e74705SXin Li else if (C.isCLibraryFunction(FDecl, "bcopy"))
722*67e74705SXin Li ArgNum = 2;
723*67e74705SXin Li }
724*67e74705SXin Li
725*67e74705SXin Li return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
726*67e74705SXin Li generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
727*67e74705SXin Li }
728*67e74705SXin Li
registerGenericTaintChecker(CheckerManager & mgr)729*67e74705SXin Li void ento::registerGenericTaintChecker(CheckerManager &mgr) {
730*67e74705SXin Li mgr.registerChecker<GenericTaintChecker>();
731*67e74705SXin Li }
732