27 using namespace clang;
31 class GenericTaintChecker
32 :
public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
34 static void *getTag() {
39 void checkPostStmt(
const CallExpr *CE, CheckerContext &C)
const;
41 void checkPreStmt(
const CallExpr *CE, CheckerContext &C)
const;
44 static const unsigned InvalidArgIndex =
UINT_MAX;
46 static const unsigned ReturnValueIndex =
UINT_MAX - 1;
48 mutable std::unique_ptr<BugType> BT;
49 inline void initBugType()
const {
51 BT.reset(
new BugType(
this,
"Use of Untrusted Data",
"Untrusted Data"));
56 bool checkPre(
const CallExpr *CE, CheckerContext &C)
const;
59 void addSourcesPre(
const CallExpr *CE, CheckerContext &C)
const;
62 bool propagateFromPre(
const CallExpr *CE, CheckerContext &C)
const;
65 void addSourcesPost(
const CallExpr *CE, CheckerContext &C)
const;
69 static bool isStdin(
const Expr *E, CheckerContext &C);
76 const CallExpr *, CheckerContext &
C)
const;
78 ProgramStateRef postSocket(
const CallExpr *CE, CheckerContext &C)
const;
79 ProgramStateRef postRetTaint(
const CallExpr *CE, CheckerContext &C)
const;
85 static const char MsgUncontrolledFormatString[];
86 bool checkUncontrolledFormatString(
const CallExpr *CE,
87 CheckerContext &C)
const;
92 static const char MsgSanitizeSystemArgs[];
93 bool checkSystemCall(
const CallExpr *CE, StringRef Name,
94 CheckerContext &C)
const;
98 static const char MsgTaintedBufferSize[];
99 bool checkTaintedBufferSize(
const CallExpr *CE,
const FunctionDecl *FDecl,
100 CheckerContext &C)
const;
103 bool generateReportIfTainted(
const Expr *E,
const char Msg[],
104 CheckerContext &C)
const;
117 struct TaintPropagationRule {
124 TaintPropagationRule() {}
126 TaintPropagationRule(
unsigned SArg,
unsigned DArg,
bool TaintRet =
false) {
127 SrcArgs.push_back(SArg);
128 DstArgs.push_back(DArg);
130 DstArgs.push_back(ReturnValueIndex);
133 TaintPropagationRule(
unsigned SArg1,
unsigned SArg2,
unsigned DArg,
134 bool TaintRet =
false) {
135 SrcArgs.push_back(SArg1);
136 SrcArgs.push_back(SArg2);
137 DstArgs.push_back(DArg);
139 DstArgs.push_back(ReturnValueIndex);
143 static TaintPropagationRule
144 getTaintPropagationRule(
const FunctionDecl *FDecl, StringRef Name,
147 inline void addSrcArg(
unsigned A) { SrcArgs.push_back(A); }
148 inline void addDstArg(
unsigned A) { DstArgs.push_back(A); }
150 inline bool isNull()
const {
return SrcArgs.empty(); }
152 inline bool isDestinationArgument(
unsigned ArgNum)
const {
153 return (std::find(DstArgs.begin(), DstArgs.end(), ArgNum) !=
157 static inline bool isTaintedOrPointsToTainted(
const Expr *E,
160 if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C))
167 return (V && State->isTainted(*V));
176 const unsigned GenericTaintChecker::ReturnValueIndex;
177 const unsigned GenericTaintChecker::InvalidArgIndex;
179 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
180 "Untrusted data is used as a format string " 181 "(CWE-134: Uncontrolled Format String)";
183 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
184 "Untrusted data is passed to a system call " 185 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
187 const char GenericTaintChecker::MsgTaintedBufferSize[] =
188 "Untrusted data is used to specify the buffer size " 189 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " 191 "character data and the null terminator)";
201 GenericTaintChecker::TaintPropagationRule
202 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
203 const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) {
208 TaintPropagationRule Rule =
209 llvm::StringSwitch<TaintPropagationRule>(Name)
210 .Case(
"atoi", TaintPropagationRule(0, ReturnValueIndex))
211 .Case(
"atol", TaintPropagationRule(0, ReturnValueIndex))
212 .Case(
"atoll", TaintPropagationRule(0, ReturnValueIndex))
213 .Case(
"getc", TaintPropagationRule(0, ReturnValueIndex))
214 .Case(
"fgetc", TaintPropagationRule(0, ReturnValueIndex))
215 .Case(
"getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
216 .Case(
"getw", TaintPropagationRule(0, ReturnValueIndex))
217 .Case(
"toupper", TaintPropagationRule(0, ReturnValueIndex))
218 .Case(
"tolower", TaintPropagationRule(0, ReturnValueIndex))
219 .Case(
"strchr", TaintPropagationRule(0, ReturnValueIndex))
220 .Case(
"strrchr", TaintPropagationRule(0, ReturnValueIndex))
221 .Case(
"read", TaintPropagationRule(0, 2, 1,
true))
222 .Case(
"pread", TaintPropagationRule(InvalidArgIndex, 1,
true))
223 .Case(
"gets", TaintPropagationRule(InvalidArgIndex, 0,
true))
224 .Case(
"fgets", TaintPropagationRule(2, 0,
true))
225 .Case(
"getline", TaintPropagationRule(2, 0))
226 .Case(
"getdelim", TaintPropagationRule(3, 0))
227 .Case(
"fgetln", TaintPropagationRule(0, ReturnValueIndex))
228 .
Default(TaintPropagationRule());
238 case Builtin::BImemcpy:
239 case Builtin::BImemmove:
240 case Builtin::BIstrncpy:
241 case Builtin::BIstrncat:
242 return TaintPropagationRule(1, 2, 0,
true);
243 case Builtin::BIstrlcpy:
244 case Builtin::BIstrlcat:
245 return TaintPropagationRule(1, 2, 0,
false);
246 case Builtin::BIstrndup:
247 return TaintPropagationRule(0, 1, ReturnValueIndex);
255 if (C.isCLibraryFunction(FDecl,
"snprintf") ||
256 C.isCLibraryFunction(FDecl,
"sprintf"))
257 return TaintPropagationRule(InvalidArgIndex, 0,
true);
258 else if (C.isCLibraryFunction(FDecl,
"strcpy") ||
259 C.isCLibraryFunction(FDecl,
"stpcpy") ||
260 C.isCLibraryFunction(FDecl,
"strcat"))
261 return TaintPropagationRule(1, 0,
true);
262 else if (C.isCLibraryFunction(FDecl,
"bcopy"))
263 return TaintPropagationRule(0, 2, 1,
false);
264 else if (C.isCLibraryFunction(FDecl,
"strdup") ||
265 C.isCLibraryFunction(FDecl,
"strdupa"))
266 return TaintPropagationRule(0, ReturnValueIndex);
267 else if (C.isCLibraryFunction(FDecl,
"wcsdup"))
268 return TaintPropagationRule(0, ReturnValueIndex);
275 return TaintPropagationRule();
278 void GenericTaintChecker::checkPreStmt(
const CallExpr *CE,
279 CheckerContext &C)
const {
285 addSourcesPre(CE, C);
288 void GenericTaintChecker::checkPostStmt(
const CallExpr *CE,
289 CheckerContext &C)
const {
290 if (propagateFromPre(CE, C))
292 addSourcesPost(CE, C);
295 void GenericTaintChecker::addSourcesPre(
const CallExpr *CE,
296 CheckerContext &C)
const {
302 StringRef Name = C.getCalleeName(FDecl);
307 TaintPropagationRule Rule =
308 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
309 if (!Rule.isNull()) {
310 State = Rule.process(CE, C);
313 C.addTransition(State);
318 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
319 .Case(
"fscanf", &GenericTaintChecker::preFscanf)
323 State = (this->*evalFunction)(CE, C);
326 C.addTransition(State);
329 bool GenericTaintChecker::propagateFromPre(
const CallExpr *CE,
330 CheckerContext &C)
const {
336 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
337 if (TaintArgs.isEmpty())
340 for (llvm::ImmutableSet<unsigned>::iterator I = TaintArgs.begin(),
343 unsigned ArgNum = *I;
346 if (ArgNum == ReturnValueIndex) {
347 State = State->addTaint(CE, C.getLocationContext());
358 State = State->addTaint(*V);
362 State = State->remove<TaintArgsOnPostVisit>();
364 if (State != C.getState()) {
365 C.addTransition(State);
371 void GenericTaintChecker::addSourcesPost(
const CallExpr *CE,
372 CheckerContext &C)
const {
379 StringRef Name = C.getCalleeName(FDecl);
382 FnCheck evalFunction =
383 llvm::StringSwitch<FnCheck>(Name)
384 .Case(
"scanf", &GenericTaintChecker::postScanf)
386 .Case(
"getchar", &GenericTaintChecker::postRetTaint)
387 .Case(
"getchar_unlocked", &GenericTaintChecker::postRetTaint)
388 .Case(
"getenv", &GenericTaintChecker::postRetTaint)
389 .Case(
"fopen", &GenericTaintChecker::postRetTaint)
390 .Case(
"fdopen", &GenericTaintChecker::postRetTaint)
391 .Case(
"freopen", &GenericTaintChecker::postRetTaint)
392 .Case(
"getch", &GenericTaintChecker::postRetTaint)
393 .Case(
"wgetch", &GenericTaintChecker::postRetTaint)
394 .Case(
"socket", &GenericTaintChecker::postSocket)
401 State = (this->*evalFunction)(CE, C);
405 C.addTransition(State);
408 bool GenericTaintChecker::checkPre(
const CallExpr *CE,
409 CheckerContext &C)
const {
411 if (checkUncontrolledFormatString(CE, C))
418 StringRef Name = C.getCalleeName(FDecl);
422 if (checkSystemCall(CE, Name, C))
425 if (checkTaintedBufferSize(CE, FDecl, C))
431 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
435 if (AddrVal.isUnknownOrUndef())
451 ValTy = C.getASTContext().CharTy;
453 return State->getSVal(*AddrLoc, ValTy);
457 GenericTaintChecker::TaintPropagationRule::process(
const CallExpr *CE,
458 CheckerContext &C)
const {
462 bool IsTainted =
false;
463 for (ArgVector::const_iterator I = SrcArgs.begin(), E = SrcArgs.end(); I != E;
465 unsigned ArgNum = *I;
467 if (ArgNum == InvalidArgIndex) {
470 for (
unsigned int i = 0; i < CE->
getNumArgs(); ++i) {
471 if (isDestinationArgument(i))
473 if ((IsTainted = isTaintedOrPointsToTainted(CE->
getArg(i),
State, C)))
481 if ((IsTainted = isTaintedOrPointsToTainted(CE->
getArg(ArgNum),
State, C)))
488 for (ArgVector::const_iterator I = DstArgs.begin(), E = DstArgs.end(); I != E;
490 unsigned ArgNum = *I;
493 if (ArgNum == InvalidArgIndex) {
498 for (
unsigned int i = 0; i < CE->
getNumArgs(); ++i) {
505 State = State->add<TaintArgsOnPostVisit>(i);
511 if (ArgNum == ReturnValueIndex) {
512 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
517 assert(ArgNum < CE->getNumArgs());
518 State = State->add<TaintArgsOnPostVisit>(ArgNum);
527 CheckerContext &C)
const {
532 if (State->isTainted(CE->
getArg(0), C.getLocationContext()) ||
533 isStdin(CE->
getArg(0), C)) {
535 for (
unsigned int i = 2; i < CE->
getNumArgs(); ++i)
536 State = State->add<TaintArgsOnPostVisit>(i);
545 CheckerContext &C)
const {
551 StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
553 if (DomName.equals(
"AF_SYSTEM") || DomName.equals(
"AF_LOCAL") ||
554 DomName.equals(
"AF_UNIX") || DomName.equals(
"AF_RESERVED_36"))
556 State = State->addTaint(CE, C.getLocationContext());
561 CheckerContext &C)
const {
567 for (
unsigned int i = 1; i < CE->
getNumArgs(); ++i) {
573 State = State->addTaint(*V);
579 CheckerContext &C)
const {
580 return C.getState()->addTaint(CE, C.getLocationContext());
583 bool GenericTaintChecker::isStdin(
const Expr *E, CheckerContext &C) {
585 SVal Val = C.getSVal(E);
588 const MemRegion *MemReg = Val.getAsRegion();
591 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
596 const SymbolRegionValue *Sm =
597 dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
600 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
606 if (
const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
607 D = D->getCanonicalDecl();
608 if ((D->getName().find(
"stdin") != StringRef::npos) && D->isExternC())
610 dyn_cast<PointerType>(D->getType().getTypePtr()))
611 if (PtrTy->getPointeeType().getCanonicalType() ==
612 C.getASTContext().getFILEType().getCanonicalType())
619 const CheckerContext &C,
620 unsigned int &ArgNum) {
628 ArgNum = Format->getFormatIdx() - 1;
629 if ((Format->getType()->getName() ==
"printf") && CE->
getNumArgs() > ArgNum)
634 if (C.getCalleeName(CE).find(
"setproctitle") != StringRef::npos) {
642 bool GenericTaintChecker::generateReportIfTainted(
const Expr *E,
644 CheckerContext &C)
const {
651 if (PointedToSVal && State->isTainted(*PointedToSVal))
652 TaintedSVal = *PointedToSVal;
653 else if (State->isTainted(E, C.getLocationContext()))
654 TaintedSVal = C.getSVal(E);
659 if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
661 auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
663 report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
664 C.emitReport(std::move(report));
670 bool GenericTaintChecker::checkUncontrolledFormatString(
671 const CallExpr *CE, CheckerContext &C)
const {
673 unsigned int ArgNum = 0;
679 return generateReportIfTainted(CE->
getArg(ArgNum),
680 MsgUncontrolledFormatString, C);
683 bool GenericTaintChecker::checkSystemCall(
const CallExpr *CE, StringRef Name,
684 CheckerContext &C)
const {
688 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
704 return generateReportIfTainted(CE->
getArg(ArgNum), MsgSanitizeSystemArgs, C);
709 bool GenericTaintChecker::checkTaintedBufferSize(
const CallExpr *CE,
711 CheckerContext &C)
const {
713 unsigned ArgNum = InvalidArgIndex;
717 case Builtin::BImemcpy:
718 case Builtin::BImemmove:
719 case Builtin::BIstrncpy:
722 case Builtin::BIstrndup:
729 if (ArgNum == InvalidArgIndex) {
730 if (C.isCLibraryFunction(FDecl,
"malloc") ||
731 C.isCLibraryFunction(FDecl,
"calloc") ||
732 C.isCLibraryFunction(FDecl,
"alloca"))
734 else if (C.isCLibraryFunction(FDecl,
"memccpy"))
736 else if (C.isCLibraryFunction(FDecl,
"realloc"))
738 else if (C.isCLibraryFunction(FDecl,
"bcopy"))
742 return ArgNum != InvalidArgIndex && CE->
getNumArgs() > ArgNum &&
743 generateReportIfTainted(CE->
getArg(ArgNum), MsgTaintedBufferSize, C);
746 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
747 mgr.registerChecker<GenericTaintChecker>();
Represents a function declaration or definition.
unsigned getMemoryFunctionKind() const
Identify a memory copying or setting function.
PointerType - C99 6.7.5.1 - Pointer Declarators.
A (possibly-)qualified type.
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
The base class of the type hierarchy.
constexpr XRayInstrMask Function
Represents a variable declaration or definition.
bool isReferenceType() const
static bool getPrintfFormatArgumentNum(const CallExpr *CE, const CheckerContext &C, unsigned int &ArgNum)
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
This represents one expression.
bool isNull() const
Return true if this QualType doesn't point to a type yet.
bool isConstQualified() const
Determine whether this type is const-qualified.
QualType getCanonicalType() const
Encodes a location in the source.
constexpr XRayInstrMask None
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
Dataflow Directional Tag Classes.
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
bool isPointerType() const
Defines enum values for all the target-independent builtin functions.
Expr * IgnoreParens() LLVM_READONLY
IgnoreParens - Ignore parentheses.