27 using namespace clang;
31 class GenericTaintChecker :
public Checker< check::PostStmt<CallExpr>,
32 check::PreStmt<CallExpr> > {
34 static void *getTag() {
static int Tag;
return &Tag; }
41 static const unsigned InvalidArgIndex =
UINT_MAX;
43 static const unsigned ReturnValueIndex =
UINT_MAX - 1;
45 mutable std::unique_ptr<BugType> BT;
46 inline void initBugType()
const {
48 BT.reset(
new BugType(
this,
"Use of Untrusted Data",
"Untrusted Data"));
82 static const char MsgUncontrolledFormatString[];
83 bool checkUncontrolledFormatString(
const CallExpr *CE,
89 static const char MsgSanitizeSystemArgs[];
90 bool checkSystemCall(
const CallExpr *CE, StringRef Name,
95 static const char MsgTaintedBufferSize[];
96 bool checkTaintedBufferSize(
const CallExpr *CE,
const FunctionDecl *FDecl,
100 bool generateReportIfTainted(
const Expr *E,
const char Msg[],
105 class TaintBugVisitor
111 TaintBugVisitor(
const SVal V) : V(V) {}
112 void Profile(llvm::FoldingSetNodeID &
ID)
const override { ID.Add(V); }
114 std::shared_ptr<PathDiagnosticPiece> VisitNode(
const ExplodedNode *N,
131 struct TaintPropagationRule {
138 TaintPropagationRule() {}
140 TaintPropagationRule(
unsigned SArg,
141 unsigned DArg,
bool TaintRet =
false) {
142 SrcArgs.push_back(SArg);
143 DstArgs.push_back(DArg);
145 DstArgs.push_back(ReturnValueIndex);
148 TaintPropagationRule(
unsigned SArg1,
unsigned SArg2,
149 unsigned DArg,
bool TaintRet =
false) {
150 SrcArgs.push_back(SArg1);
151 SrcArgs.push_back(SArg2);
152 DstArgs.push_back(DArg);
154 DstArgs.push_back(ReturnValueIndex);
158 static TaintPropagationRule
163 inline void addSrcArg(
unsigned A) { SrcArgs.push_back(A); }
164 inline void addDstArg(
unsigned A) { DstArgs.push_back(A); }
166 inline bool isNull()
const {
return SrcArgs.empty(); }
168 inline bool isDestinationArgument(
unsigned ArgNum)
const {
169 return (std::find(DstArgs.begin(),
170 DstArgs.end(), ArgNum) != DstArgs.end());
173 static inline bool isTaintedOrPointsToTainted(
const Expr *E,
183 return (V && State->isTainted(*V));
193 const unsigned GenericTaintChecker::ReturnValueIndex;
194 const unsigned GenericTaintChecker::InvalidArgIndex;
196 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
197 "Untrusted data is used as a format string " 198 "(CWE-134: Uncontrolled Format String)";
200 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
201 "Untrusted data is passed to a system call " 202 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
204 const char GenericTaintChecker::MsgTaintedBufferSize[] =
205 "Untrusted data is used to specify the buffer size " 206 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for " 207 "character data and the null terminator)";
217 std::shared_ptr<PathDiagnosticPiece>
218 GenericTaintChecker::TaintBugVisitor::VisitNode(
const ExplodedNode *N,
235 return std::make_shared<PathDiagnosticEventPiece>(
236 L,
"Taint originated here");
239 GenericTaintChecker::TaintPropagationRule
240 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
248 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
249 .Case(
"atoi", TaintPropagationRule(0, ReturnValueIndex))
250 .Case(
"atol", TaintPropagationRule(0, ReturnValueIndex))
251 .Case(
"atoll", TaintPropagationRule(0, ReturnValueIndex))
252 .Case(
"getc", TaintPropagationRule(0, ReturnValueIndex))
253 .Case(
"fgetc", TaintPropagationRule(0, ReturnValueIndex))
254 .Case(
"getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
255 .Case(
"getw", TaintPropagationRule(0, ReturnValueIndex))
256 .Case(
"toupper", TaintPropagationRule(0, ReturnValueIndex))
257 .Case(
"tolower", TaintPropagationRule(0, ReturnValueIndex))
258 .Case(
"strchr", TaintPropagationRule(0, ReturnValueIndex))
259 .Case(
"strrchr", TaintPropagationRule(0, ReturnValueIndex))
260 .Case(
"read", TaintPropagationRule(0, 2, 1,
true))
261 .Case(
"pread", TaintPropagationRule(InvalidArgIndex, 1,
true))
262 .Case(
"gets", TaintPropagationRule(InvalidArgIndex, 0,
true))
263 .Case(
"fgets", TaintPropagationRule(2, 0,
true))
264 .Case(
"getline", TaintPropagationRule(2, 0))
265 .Case(
"getdelim", TaintPropagationRule(3, 0))
266 .Case(
"fgetln", TaintPropagationRule(0, ReturnValueIndex))
267 .
Default(TaintPropagationRule());
277 case Builtin::BImemcpy:
278 case Builtin::BImemmove:
279 case Builtin::BIstrncpy:
280 case Builtin::BIstrncat:
281 return TaintPropagationRule(1, 2, 0,
true);
282 case Builtin::BIstrlcpy:
283 case Builtin::BIstrlcat:
284 return TaintPropagationRule(1, 2, 0,
false);
285 case Builtin::BIstrndup:
286 return TaintPropagationRule(0, 1, ReturnValueIndex);
296 return TaintPropagationRule(InvalidArgIndex, 0,
true);
300 return TaintPropagationRule(1, 0,
true);
302 return TaintPropagationRule(0, 2, 1,
false);
305 return TaintPropagationRule(0, ReturnValueIndex);
307 return TaintPropagationRule(0, ReturnValueIndex);
314 return TaintPropagationRule();
317 void GenericTaintChecker::checkPreStmt(
const CallExpr *CE,
324 addSourcesPre(CE, C);
327 void GenericTaintChecker::checkPostStmt(
const CallExpr *CE,
329 if (propagateFromPre(CE, C))
331 addSourcesPost(CE, C);
334 void GenericTaintChecker::addSourcesPre(
const CallExpr *CE,
338 if (!FDecl || FDecl->
getKind() != Decl::Function)
346 TaintPropagationRule Rule =
347 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
348 if (!Rule.isNull()) {
349 State = Rule.process(CE, C);
357 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
358 .Case(
"fscanf", &GenericTaintChecker::preFscanf)
362 State = (this->*evalFunction)(CE, C);
369 bool GenericTaintChecker::propagateFromPre(
const CallExpr *CE,
376 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
377 if (TaintArgs.isEmpty())
381 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
382 unsigned ArgNum = *I;
385 if (ArgNum == ReturnValueIndex) {
397 State = State->addTaint(*V);
401 State = State->remove<TaintArgsOnPostVisit>();
410 void GenericTaintChecker::addSourcesPost(
const CallExpr *CE,
415 if (!FDecl || FDecl->
getKind() != Decl::Function)
421 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
422 .Case(
"scanf", &GenericTaintChecker::postScanf)
424 .Case(
"getchar", &GenericTaintChecker::postRetTaint)
425 .Case(
"getchar_unlocked", &GenericTaintChecker::postRetTaint)
426 .Case(
"getenv", &GenericTaintChecker::postRetTaint)
427 .Case(
"fopen", &GenericTaintChecker::postRetTaint)
428 .Case(
"fdopen", &GenericTaintChecker::postRetTaint)
429 .Case(
"freopen", &GenericTaintChecker::postRetTaint)
430 .Case(
"getch", &GenericTaintChecker::postRetTaint)
431 .Case(
"wgetch", &GenericTaintChecker::postRetTaint)
432 .Case(
"socket", &GenericTaintChecker::postSocket)
439 State = (this->*evalFunction)(CE, C);
446 bool GenericTaintChecker::checkPre(
const CallExpr *CE,
CheckerContext &C)
const{
448 if (checkUncontrolledFormatString(CE, C))
452 if (!FDecl || FDecl->
getKind() != Decl::Function)
459 if (checkSystemCall(CE, Name, C))
462 if (checkTaintedBufferSize(CE, FDecl, C))
490 return State->getSVal(*AddrLoc, ValTy);
494 GenericTaintChecker::TaintPropagationRule::process(
const CallExpr *CE,
499 bool IsTainted =
false;
500 for (ArgVector::const_iterator I = SrcArgs.begin(),
501 E = SrcArgs.end(); I != E; ++I) {
502 unsigned ArgNum = *I;
504 if (ArgNum == InvalidArgIndex) {
507 for (
unsigned int i = 0; i < CE->
getNumArgs(); ++i) {
508 if (isDestinationArgument(i))
510 if ((IsTainted = isTaintedOrPointsToTainted(CE->
getArg(i),
State, C)))
518 if ((IsTainted = isTaintedOrPointsToTainted(CE->
getArg(ArgNum),
State, C)))
525 for (ArgVector::const_iterator I = DstArgs.begin(),
526 E = DstArgs.end(); I != E; ++I) {
527 unsigned ArgNum = *I;
530 if (ArgNum == InvalidArgIndex) {
535 for (
unsigned int i = 0; i < CE->
getNumArgs(); ++i) {
542 State = State->add<TaintArgsOnPostVisit>(i);
548 if (ArgNum == ReturnValueIndex) {
549 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
554 assert(ArgNum < CE->getNumArgs());
555 State = State->add<TaintArgsOnPostVisit>(ArgNum);
571 isStdin(CE->
getArg(0), C)) {
573 for (
unsigned int i = 2; i < CE->
getNumArgs(); ++i)
574 State = State->add<TaintArgsOnPostVisit>(i);
592 if (DomName.equals(
"AF_SYSTEM") || DomName.equals(
"AF_LOCAL") ||
593 DomName.equals(
"AF_UNIX") || DomName.equals(
"AF_RESERVED_36"))
606 for (
unsigned int i = 1; i < CE->
getNumArgs(); ++i) {
612 State = State->addTaint(*V);
630 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
644 if (
const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->
getDecl())) {
645 D = D->getCanonicalDecl();
646 if ((D->getName().find(
"stdin") != StringRef::npos) && D->isExternC())
648 dyn_cast<PointerType>(D->getType().getTypePtr()))
657 unsigned int &ArgNum) {
665 ArgNum = Format->getFormatIdx() - 1;
666 if ((Format->getType()->getName() ==
"printf") &&
672 if (C.
getCalleeName(CE).find(
"setproctitle") != StringRef::npos) {
680 bool GenericTaintChecker::generateReportIfTainted(
const Expr *E,
689 if (PointedToSVal && State->isTainted(*PointedToSVal))
690 TaintedSVal = *PointedToSVal;
699 auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
701 report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
708 bool GenericTaintChecker::checkUncontrolledFormatString(
const CallExpr *CE,
711 unsigned int ArgNum = 0;
716 return generateReportIfTainted(CE->
getArg(ArgNum),
717 MsgUncontrolledFormatString, C);
720 bool GenericTaintChecker::checkSystemCall(
const CallExpr *CE,
726 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
742 return generateReportIfTainted(CE->
getArg(ArgNum), MsgSanitizeSystemArgs, C);
747 bool GenericTaintChecker::checkTaintedBufferSize(
const CallExpr *CE,
751 unsigned ArgNum = InvalidArgIndex;
755 case Builtin::BImemcpy:
756 case Builtin::BImemmove:
757 case Builtin::BIstrncpy:
760 case Builtin::BIstrndup:
767 if (ArgNum == InvalidArgIndex) {
780 return ArgNum != InvalidArgIndex && CE->
getNumArgs() > ArgNum &&
781 generateReportIfTainted(CE->
getArg(ArgNum), MsgTaintedBufferSize, C);
An instance of this class is created to represent a function declaration or definition.
unsigned getMemoryFunctionKind() const
Identify a memory copying or setting function.
PointerType - C99 6.7.5.1 - Pointer Declarators.
A (possibly-)qualified type.
MemRegion - The root abstract class for all memory regions.
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Stmt - This represents one statement.
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
ExplodedNode * addTransition(ProgramStateRef State=nullptr, const ProgramPointTag *Tag=nullptr)
Generates a new transition in the program state graph (ExplodedGraph).
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
The base class of the type hierarchy.
const ProgramStateRef & getState() const
VarDecl - An instance of this class is created to represent a variable declaration or definition...
const FunctionDecl * getCalleeDecl(const CallExpr *CE) const
Get the declaration of the called function (path-sensitive).
SVal getSVal(const Stmt *S) const
Get the value of arbitrary expressions at this point in the path.
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
StringRef getCalleeName(const FunctionDecl *FunDecl) const
Get the name of the called function (path-sensitive).
This class provides a convenience implementation for clone() using the Curiously-Recurring Template P...
bool isReferenceType() const
const LocationContext * getLocationContext() const
static bool getPrintfFormatArgumentNum(const CallExpr *CE, const CheckerContext &C, unsigned int &ArgNum)
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
SymbolicRegion - A special, "non-concrete" region.
Expr - This represents one expression.
static bool isCLibraryFunction(const FunctionDecl *FD, StringRef Name=StringRef())
Returns true if the callee is an externally-visible function in the top-level namespace, such as malloc.
const Decl * getDecl() const
QualType getFILEType() const
Retrieve the C FILE type.
ExplodedNode * generateNonFatalErrorNode(ProgramStateRef State=nullptr, const ProgramPointTag *Tag=nullptr)
Generate a transition to a node that will be used to report an error.
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Optional< T > getAs() const
Convert to the specified SVal type, returning None if this SVal is not of the desired type...
bool isConstQualified() const
Determine whether this type is const-qualified.
void emitReport(std::unique_ptr< BugReport > R)
Emit the diagnostics report.
static const Stmt * getStmt(const ExplodedNode *N)
Given an exploded node, retrieve the statement that should be used for the diagnostic location...
static PathDiagnosticLocation createBegin(const Decl *D, const SourceManager &SM)
Create a location for the beginning of the declaration.
QualType getCanonicalType() const
CHECKER * registerChecker()
Used to register checkers.
const TypedValueRegion * getRegion() const
Encodes a location in the source.
SymbolRef getSymbol() const
const MemRegion * getAsRegion() const
SVal - This represents a symbolic expression, which can be either an L-value or an R-value...
FullSourceLoc asLocation() const
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
A symbol representing the value stored at a MemRegion.
Dataflow Directional Tag Classes.
ASTContext & getASTContext()
bool isValid() const
Return true if this is a valid SourceLocation object.
StringRef getMacroNameOrSpelling(SourceLocation &Loc)
Depending on wither the location corresponds to a macro, return either the macro name or the token sp...
const ProgramStateRef & getState() const
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
bool isPointerType() const
This class provides an interface through which checkers can create individual bug reports...
const LocationContext * getLocationContext() const
Defines enum values for all the target-independent builtin functions.
SourceManager & getSourceManager()
bool isUnknownOrUndef() const
Expr * IgnoreParens() LLVM_READONLY
IgnoreParens - Ignore parentheses.