clang  10.0.0git
GenericTaintChecker.cpp
Go to the documentation of this file.
1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Taint.h"
18 #include "Yaml.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/Basic/Builtins.h"
27 #include "llvm/Support/YAMLTraits.h"
28 #include <algorithm>
29 #include <limits>
30 #include <unordered_map>
31 #include <utility>
32 
33 using namespace clang;
34 using namespace ento;
35 using namespace taint;
36 
37 namespace {
38 class GenericTaintChecker
39  : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
40 public:
41  static void *getTag() {
42  static int Tag;
43  return &Tag;
44  }
45 
46  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
47 
48  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
49 
50  void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
51  const char *Sep) const override;
52 
53  using ArgVector = SmallVector<unsigned, 2>;
54  using SignedArgVector = SmallVector<int, 2>;
55 
56  enum class VariadicType { None, Src, Dst };
57 
58  /// Used to parse the configuration file.
59  struct TaintConfiguration {
60  using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>;
61 
62  struct Propagation {
63  std::string Name;
64  std::string Scope;
65  ArgVector SrcArgs;
66  SignedArgVector DstArgs;
67  VariadicType VarType;
68  unsigned VarIndex;
69  };
70 
71  std::vector<Propagation> Propagations;
72  std::vector<NameScopeArgs> Filters;
73  std::vector<NameScopeArgs> Sinks;
74 
75  TaintConfiguration() = default;
76  TaintConfiguration(const TaintConfiguration &) = default;
77  TaintConfiguration(TaintConfiguration &&) = default;
78  TaintConfiguration &operator=(const TaintConfiguration &) = default;
79  TaintConfiguration &operator=(TaintConfiguration &&) = default;
80  };
81 
82  /// Convert SignedArgVector to ArgVector.
83  ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
84  SignedArgVector Args);
85 
86  /// Parse the config.
87  void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
88  TaintConfiguration &&Config);
89 
90  static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
91  /// Denotes the return vale.
92  static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
93  1};
94 
95 private:
96  mutable std::unique_ptr<BugType> BT;
97  void initBugType() const {
98  if (!BT)
99  BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
100  }
101 
102  struct FunctionData {
103  FunctionData() = delete;
104  FunctionData(const FunctionData &) = default;
105  FunctionData(FunctionData &&) = default;
106  FunctionData &operator=(const FunctionData &) = delete;
107  FunctionData &operator=(FunctionData &&) = delete;
108 
109  static Optional<FunctionData> create(const CallExpr *CE,
110  const CheckerContext &C) {
111  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
112  if (!FDecl || (FDecl->getKind() != Decl::Function &&
113  FDecl->getKind() != Decl::CXXMethod))
114  return None;
115 
116  StringRef Name = C.getCalleeName(FDecl);
117  std::string FullName = FDecl->getQualifiedNameAsString();
118  if (Name.empty() || FullName.empty())
119  return None;
120 
121  return FunctionData{FDecl, Name, FullName};
122  }
123 
124  bool isInScope(StringRef Scope) const {
125  return StringRef(FullName).startswith(Scope);
126  }
127 
128  const FunctionDecl *const FDecl;
129  const StringRef Name;
130  const std::string FullName;
131  };
132 
133  /// Catch taint related bugs. Check if tainted data is passed to a
134  /// system call etc. Returns true on matching.
135  bool checkPre(const CallExpr *CE, const FunctionData &FData,
136  CheckerContext &C) const;
137 
138  /// Add taint sources on a pre-visit. Returns true on matching.
139  bool addSourcesPre(const CallExpr *CE, const FunctionData &FData,
140  CheckerContext &C) const;
141 
142  /// Mark filter's arguments not tainted on a pre-visit. Returns true on
143  /// matching.
144  bool addFiltersPre(const CallExpr *CE, const FunctionData &FData,
145  CheckerContext &C) const;
146 
147  /// Propagate taint generated at pre-visit. Returns true on matching.
148  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
149 
150  /// Check if the region the expression evaluates to is the standard input,
151  /// and thus, is tainted.
152  static bool isStdin(const Expr *E, CheckerContext &C);
153 
154  /// Given a pointer argument, return the value it points to.
155  static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
156 
157  /// Check for CWE-134: Uncontrolled Format String.
158  static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
159  "Untrusted data is used as a format string "
160  "(CWE-134: Uncontrolled Format String)";
161  bool checkUncontrolledFormatString(const CallExpr *CE,
162  CheckerContext &C) const;
163 
164  /// Check for:
165  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
166  /// CWE-78, "Failure to Sanitize Data into an OS Command"
167  static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
168  "Untrusted data is passed to a system call "
169  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
170  bool checkSystemCall(const CallExpr *CE, StringRef Name,
171  CheckerContext &C) const;
172 
173  /// Check if tainted data is used as a buffer size ins strn.. functions,
174  /// and allocators.
175  static constexpr llvm::StringLiteral MsgTaintedBufferSize =
176  "Untrusted data is used to specify the buffer size "
177  "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
178  "for character data and the null terminator)";
179  bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
180  CheckerContext &C) const;
181 
182  /// Check if tainted data is used as a custom sink's parameter.
183  static constexpr llvm::StringLiteral MsgCustomSink =
184  "Untrusted data is passed to a user-defined sink";
185  bool checkCustomSinks(const CallExpr *CE, const FunctionData &FData,
186  CheckerContext &C) const;
187 
188  /// Generate a report if the expression is tainted or points to tainted data.
189  bool generateReportIfTainted(const Expr *E, StringRef Msg,
190  CheckerContext &C) const;
191 
192  struct TaintPropagationRule;
193  template <typename T>
194  using ConfigDataMap =
195  std::unordered_multimap<std::string, std::pair<std::string, T>>;
196  using NameRuleMap = ConfigDataMap<TaintPropagationRule>;
197  using NameArgMap = ConfigDataMap<ArgVector>;
198 
199  /// Find a function with the given name and scope. Returns the first match
200  /// or the end of the map.
201  template <typename T>
202  static auto findFunctionInConfig(const ConfigDataMap<T> &Map,
203  const FunctionData &FData);
204 
205  /// A struct used to specify taint propagation rules for a function.
206  ///
207  /// If any of the possible taint source arguments is tainted, all of the
208  /// destination arguments should also be tainted. Use InvalidArgIndex in the
209  /// src list to specify that all of the arguments can introduce taint. Use
210  /// InvalidArgIndex in the dst arguments to signify that all the non-const
211  /// pointer and reference arguments might be tainted on return. If
212  /// ReturnValueIndex is added to the dst list, the return value will be
213  /// tainted.
214  struct TaintPropagationRule {
215  using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
216  CheckerContext &C);
217 
218  /// List of arguments which can be taint sources and should be checked.
219  ArgVector SrcArgs;
220  /// List of arguments which should be tainted on function return.
221  ArgVector DstArgs;
222  /// Index for the first variadic parameter if exist.
223  unsigned VariadicIndex;
224  /// Show when a function has variadic parameters. If it has, it marks all
225  /// of them as source or destination.
226  VariadicType VarType;
227  /// Special function for tainted source determination. If defined, it can
228  /// override the default behavior.
229  PropagationFuncType PropagationFunc;
230 
231  TaintPropagationRule()
232  : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
233  PropagationFunc(nullptr) {}
234 
235  TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
236  VariadicType Var = VariadicType::None,
237  unsigned VarIndex = InvalidArgIndex,
238  PropagationFuncType Func = nullptr)
239  : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
240  VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
241 
242  /// Get the propagation rule for a given function.
243  static TaintPropagationRule
244  getTaintPropagationRule(const NameRuleMap &CustomPropagations,
245  const FunctionData &FData, CheckerContext &C);
246 
247  void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
248  void addDstArg(unsigned A) { DstArgs.push_back(A); }
249 
250  bool isNull() const {
251  return SrcArgs.empty() && DstArgs.empty() &&
252  VariadicType::None == VarType;
253  }
254 
255  bool isDestinationArgument(unsigned ArgNum) const {
256  return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
257  }
258 
259  static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
260  CheckerContext &C) {
261  if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
262  return true;
263 
264  if (!E->getType().getTypePtr()->isPointerType())
265  return false;
266 
267  Optional<SVal> V = getPointedToSVal(C, E);
268  return (V && isTainted(State, *V));
269  }
270 
271  /// Pre-process a function which propagates taint according to the
272  /// taint rule.
273  ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
274 
275  // Functions for custom taintedness propagation.
276  static bool postSocket(bool IsTainted, const CallExpr *CE,
277  CheckerContext &C);
278  };
279 
280  /// Defines a map between the propagation function's name, scope
281  /// and TaintPropagationRule.
282  NameRuleMap CustomPropagations;
283 
284  /// Defines a map between the filter function's name, scope and filtering
285  /// args.
286  NameArgMap CustomFilters;
287 
288  /// Defines a map between the sink function's name, scope and sinking args.
289  NameArgMap CustomSinks;
290 };
291 
292 const unsigned GenericTaintChecker::ReturnValueIndex;
293 const unsigned GenericTaintChecker::InvalidArgIndex;
294 
295 // FIXME: these lines can be removed in C++17
296 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
297 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
298 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
299 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
300 } // end of anonymous namespace
301 
302 using TaintConfig = GenericTaintChecker::TaintConfiguration;
303 
304 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
305 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs)
306 
307 namespace llvm {
308 namespace yaml {
309 template <> struct MappingTraits<TaintConfig> {
310  static void mapping(IO &IO, TaintConfig &Config) {
311  IO.mapOptional("Propagations", Config.Propagations);
312  IO.mapOptional("Filters", Config.Filters);
313  IO.mapOptional("Sinks", Config.Sinks);
314  }
315 };
316 
317 template <> struct MappingTraits<TaintConfig::Propagation> {
318  static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
319  IO.mapRequired("Name", Propagation.Name);
320  IO.mapOptional("Scope", Propagation.Scope);
321  IO.mapOptional("SrcArgs", Propagation.SrcArgs);
322  IO.mapOptional("DstArgs", Propagation.DstArgs);
323  IO.mapOptional("VariadicType", Propagation.VarType,
325  IO.mapOptional("VariadicIndex", Propagation.VarIndex,
326  GenericTaintChecker::InvalidArgIndex);
327  }
328 };
329 
330 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
331  static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
332  IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
333  IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
334  IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
335  }
336 };
337 
338 template <> struct MappingTraits<TaintConfig::NameScopeArgs> {
339  static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) {
340  IO.mapRequired("Name", std::get<0>(NSA));
341  IO.mapOptional("Scope", std::get<1>(NSA));
342  IO.mapRequired("Args", std::get<2>(NSA));
343  }
344 };
345 } // namespace yaml
346 } // namespace llvm
347 
348 /// A set which is used to pass information from call pre-visit instruction
349 /// to the call post-visit. The values are unsigned integers, which are either
350 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
351 /// points to data, which should be tainted on return.
352 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
353 
354 GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector(
355  CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) {
356  ArgVector Result;
357  for (int Arg : Args) {
358  if (Arg == -1)
359  Result.push_back(ReturnValueIndex);
360  else if (Arg < -1) {
361  Result.push_back(InvalidArgIndex);
362  Mgr.reportInvalidCheckerOptionValue(
363  this, Option,
364  "an argument number for propagation rules greater or equal to -1");
365  } else
366  Result.push_back(static_cast<unsigned>(Arg));
367  }
368  return Result;
369 }
370 
371 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
372  const std::string &Option,
373  TaintConfiguration &&Config) {
374  for (auto &P : Config.Propagations) {
375  GenericTaintChecker::CustomPropagations.emplace(
376  P.Name,
377  std::make_pair(P.Scope, TaintPropagationRule{
378  std::move(P.SrcArgs),
379  convertToArgVector(Mgr, Option, P.DstArgs),
380  P.VarType, P.VarIndex}));
381  }
382 
383  for (auto &F : Config.Filters) {
384  GenericTaintChecker::CustomFilters.emplace(
385  std::get<0>(F),
386  std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F))));
387  }
388 
389  for (auto &S : Config.Sinks) {
390  GenericTaintChecker::CustomSinks.emplace(
391  std::get<0>(S),
392  std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S))));
393  }
394 }
395 
396 template <typename T>
397 auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map,
398  const FunctionData &FData) {
399  auto Range = Map.equal_range(FData.Name);
400  auto It =
401  std::find_if(Range.first, Range.second, [&FData](const auto &Entry) {
402  const auto &Value = Entry.second;
403  StringRef Scope = Value.first;
404  return Scope.empty() || FData.isInScope(Scope);
405  });
406  return It != Range.second ? It : Map.end();
407 }
408 
409 GenericTaintChecker::TaintPropagationRule
410 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
411  const NameRuleMap &CustomPropagations, const FunctionData &FData,
412  CheckerContext &C) {
413  // TODO: Currently, we might lose precision here: we always mark a return
414  // value as tainted even if it's just a pointer, pointing to tainted data.
415 
416  // Check for exact name match for functions without builtin substitutes.
417  // Use qualified name, because these are C functions without namespace.
418  TaintPropagationRule Rule =
419  llvm::StringSwitch<TaintPropagationRule>(FData.FullName)
420  // Source functions
421  // TODO: Add support for vfscanf & family.
422  .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
423  .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex}))
424  .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
425  .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
426  .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
427  .Case("getchar_unlocked",
428  TaintPropagationRule({}, {ReturnValueIndex}))
429  .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
430  .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
431  .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
432  .Case("socket",
433  TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None,
434  InvalidArgIndex,
435  &TaintPropagationRule::postSocket))
436  .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
437  // Propagating functions
438  .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
439  .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
440  .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
441  .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
442  .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
443  .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
444  .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
445  .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
446  .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
447  .Case("getdelim", TaintPropagationRule({3}, {0}))
448  .Case("getline", TaintPropagationRule({2}, {0}))
449  .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
450  .Case("pread",
451  TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
452  .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
453  .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
454  .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
455  .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
456  .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
457  .Default(TaintPropagationRule());
458 
459  if (!Rule.isNull())
460  return Rule;
461 
462  // Check if it's one of the memory setting/copying functions.
463  // This check is specialized but faster then calling isCLibraryFunction.
464  const FunctionDecl *FDecl = FData.FDecl;
465  unsigned BId = 0;
466  if ((BId = FDecl->getMemoryFunctionKind()))
467  switch (BId) {
468  case Builtin::BImemcpy:
469  case Builtin::BImemmove:
470  case Builtin::BIstrncpy:
471  case Builtin::BIstrncat:
472  return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
473  case Builtin::BIstrlcpy:
474  case Builtin::BIstrlcat:
475  return TaintPropagationRule({1, 2}, {0});
476  case Builtin::BIstrndup:
477  return TaintPropagationRule({0, 1}, {ReturnValueIndex});
478 
479  default:
480  break;
481  };
482 
483  // Process all other functions which could be defined as builtins.
484  if (Rule.isNull()) {
485  if (C.isCLibraryFunction(FDecl, "snprintf"))
486  return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
487  3);
488  else if (C.isCLibraryFunction(FDecl, "sprintf"))
489  return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
490  2);
491  else if (C.isCLibraryFunction(FDecl, "strcpy") ||
492  C.isCLibraryFunction(FDecl, "stpcpy") ||
493  C.isCLibraryFunction(FDecl, "strcat"))
494  return TaintPropagationRule({1}, {0, ReturnValueIndex});
495  else if (C.isCLibraryFunction(FDecl, "bcopy"))
496  return TaintPropagationRule({0, 2}, {1});
497  else if (C.isCLibraryFunction(FDecl, "strdup") ||
498  C.isCLibraryFunction(FDecl, "strdupa"))
499  return TaintPropagationRule({0}, {ReturnValueIndex});
500  else if (C.isCLibraryFunction(FDecl, "wcsdup"))
501  return TaintPropagationRule({0}, {ReturnValueIndex});
502  }
503 
504  // Skipping the following functions, since they might be used for cleansing
505  // or smart memory copy:
506  // - memccpy - copying until hitting a special character.
507 
508  auto It = findFunctionInConfig(CustomPropagations, FData);
509  if (It != CustomPropagations.end()) {
510  const auto &Value = It->second;
511  return Value.second;
512  }
513 
514  return TaintPropagationRule();
515 }
516 
517 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
518  CheckerContext &C) const {
520  if (!FData)
521  return;
522 
523  // Check for taintedness related errors first: system call, uncontrolled
524  // format string, tainted buffer size.
525  if (checkPre(CE, *FData, C))
526  return;
527 
528  // Marks the function's arguments and/or return value tainted if it present in
529  // the list.
530  if (addSourcesPre(CE, *FData, C))
531  return;
532 
533  addFiltersPre(CE, *FData, C);
534 }
535 
536 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
537  CheckerContext &C) const {
538  // Set the marked values as tainted. The return value only accessible from
539  // checkPostStmt.
540  propagateFromPre(CE, C);
541 }
542 
543 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
544  const char *NL, const char *Sep) const {
545  printTaint(State, Out, NL, Sep);
546 }
547 
548 bool GenericTaintChecker::addSourcesPre(const CallExpr *CE,
549  const FunctionData &FData,
550  CheckerContext &C) const {
551  // First, try generating a propagation rule for this function.
552  TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
553  this->CustomPropagations, FData, C);
554  if (!Rule.isNull()) {
555  ProgramStateRef State = Rule.process(CE, C);
556  if (State) {
557  C.addTransition(State);
558  return true;
559  }
560  }
561  return false;
562 }
563 
564 bool GenericTaintChecker::addFiltersPre(const CallExpr *CE,
565  const FunctionData &FData,
566  CheckerContext &C) const {
567  auto It = findFunctionInConfig(CustomFilters, FData);
568  if (It == CustomFilters.end())
569  return false;
570 
571  ProgramStateRef State = C.getState();
572  const auto &Value = It->second;
573  const ArgVector &Args = Value.second;
574  for (unsigned ArgNum : Args) {
575  if (ArgNum >= CE->getNumArgs())
576  continue;
577 
578  const Expr *Arg = CE->getArg(ArgNum);
579  Optional<SVal> V = getPointedToSVal(C, Arg);
580  if (V)
581  State = removeTaint(State, *V);
582  }
583 
584  if (State != C.getState()) {
585  C.addTransition(State);
586  return true;
587  }
588  return false;
589 }
590 
591 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
592  CheckerContext &C) const {
593  ProgramStateRef State = C.getState();
594 
595  // Depending on what was tainted at pre-visit, we determined a set of
596  // arguments which should be tainted after the function returns. These are
597  // stored in the state as TaintArgsOnPostVisit set.
598  TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
599  if (TaintArgs.isEmpty())
600  return false;
601 
602  for (unsigned ArgNum : TaintArgs) {
603  // Special handling for the tainted return value.
604  if (ArgNum == ReturnValueIndex) {
605  State = addTaint(State, CE, C.getLocationContext());
606  continue;
607  }
608 
609  // The arguments are pointer arguments. The data they are pointing at is
610  // tainted after the call.
611  if (CE->getNumArgs() < (ArgNum + 1))
612  return false;
613  const Expr *Arg = CE->getArg(ArgNum);
614  Optional<SVal> V = getPointedToSVal(C, Arg);
615  if (V)
616  State = addTaint(State, *V);
617  }
618 
619  // Clear up the taint info from the state.
620  State = State->remove<TaintArgsOnPostVisit>();
621 
622  if (State != C.getState()) {
623  C.addTransition(State);
624  return true;
625  }
626  return false;
627 }
628 
629 bool GenericTaintChecker::checkPre(const CallExpr *CE,
630  const FunctionData &FData,
631  CheckerContext &C) const {
632 
633  if (checkUncontrolledFormatString(CE, C))
634  return true;
635 
636  if (checkSystemCall(CE, FData.Name, C))
637  return true;
638 
639  if (checkTaintedBufferSize(CE, FData.FDecl, C))
640  return true;
641 
642  if (checkCustomSinks(CE, FData, C))
643  return true;
644 
645  return false;
646 }
647 
648 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
649  const Expr *Arg) {
650  ProgramStateRef State = C.getState();
651  SVal AddrVal = C.getSVal(Arg->IgnoreParens());
652  if (AddrVal.isUnknownOrUndef())
653  return None;
654 
655  Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
656  if (!AddrLoc)
657  return None;
658 
659  QualType ArgTy = Arg->getType().getCanonicalType();
660  if (!ArgTy->isPointerType())
661  return State->getSVal(*AddrLoc);
662 
663  QualType ValTy = ArgTy->getPointeeType();
664 
665  // Do not dereference void pointers. Treat them as byte pointers instead.
666  // FIXME: we might want to consider more than just the first byte.
667  if (ValTy->isVoidType())
668  ValTy = C.getASTContext().CharTy;
669 
670  return State->getSVal(*AddrLoc, ValTy);
671 }
672 
674 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
675  CheckerContext &C) const {
676  ProgramStateRef State = C.getState();
677 
678  // Check for taint in arguments.
679  bool IsTainted = true;
680  for (unsigned ArgNum : SrcArgs) {
681  if (ArgNum >= CE->getNumArgs())
682  continue;
683 
684  if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
685  break;
686  }
687 
688  // Check for taint in variadic arguments.
689  if (!IsTainted && VariadicType::Src == VarType) {
690  // Check if any of the arguments is tainted
691  for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
692  if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
693  break;
694  }
695  }
696 
697  if (PropagationFunc)
698  IsTainted = PropagationFunc(IsTainted, CE, C);
699 
700  if (!IsTainted)
701  return State;
702 
703  // Mark the arguments which should be tainted after the function returns.
704  for (unsigned ArgNum : DstArgs) {
705  // Should mark the return value?
706  if (ArgNum == ReturnValueIndex) {
707  State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
708  continue;
709  }
710 
711  if (ArgNum >= CE->getNumArgs())
712  continue;
713 
714  // Mark the given argument.
715  State = State->add<TaintArgsOnPostVisit>(ArgNum);
716  }
717 
718  // Mark all variadic arguments tainted if present.
719  if (VariadicType::Dst == VarType) {
720  // For all pointer and references that were passed in:
721  // If they are not pointing to const data, mark data as tainted.
722  // TODO: So far we are just going one level down; ideally we'd need to
723  // recurse here.
724  for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
725  const Expr *Arg = CE->getArg(i);
726  // Process pointer argument.
727  const Type *ArgTy = Arg->getType().getTypePtr();
728  QualType PType = ArgTy->getPointeeType();
729  if ((!PType.isNull() && !PType.isConstQualified()) ||
730  (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
731  State = State->add<TaintArgsOnPostVisit>(i);
732  }
733  }
734 
735  return State;
736 }
737 
738 // If argument 0(protocol domain) is network, the return value should get taint.
739 bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
740  const CallExpr *CE,
741  CheckerContext &C) {
742  SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
743  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
744  // White list the internal communication protocols.
745  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
746  DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
747  return false;
748 
749  return true;
750 }
751 
752 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
753  ProgramStateRef State = C.getState();
754  SVal Val = C.getSVal(E);
755 
756  // stdin is a pointer, so it would be a region.
757  const MemRegion *MemReg = Val.getAsRegion();
758 
759  // The region should be symbolic, we do not know it's value.
760  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
761  if (!SymReg)
762  return false;
763 
764  // Get it's symbol and find the declaration region it's pointing to.
765  const SymbolRegionValue *Sm =
766  dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
767  if (!Sm)
768  return false;
769  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
770  if (!DeclReg)
771  return false;
772 
773  // This region corresponds to a declaration, find out if it's a global/extern
774  // variable named stdin with the proper type.
775  if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
776  D = D->getCanonicalDecl();
777  if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
778  const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
779  if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
780  C.getASTContext().getFILEType().getCanonicalType())
781  return true;
782  }
783  }
784  return false;
785 }
786 
787 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
788  const CheckerContext &C,
789  unsigned &ArgNum) {
790  // Find if the function contains a format string argument.
791  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
792  // vsnprintf, syslog, custom annotated functions.
793  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
794  if (!FDecl)
795  return false;
796  for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
797  ArgNum = Format->getFormatIdx() - 1;
798  if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
799  return true;
800  }
801 
802  // Or if a function is named setproctitle (this is a heuristic).
803  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
804  ArgNum = 0;
805  return true;
806  }
807 
808  return false;
809 }
810 
811 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
812  CheckerContext &C) const {
813  assert(E);
814 
815  // Check for taint.
816  ProgramStateRef State = C.getState();
817  Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
818  SVal TaintedSVal;
819  if (PointedToSVal && isTainted(State, *PointedToSVal))
820  TaintedSVal = *PointedToSVal;
821  else if (isTainted(State, E, C.getLocationContext()))
822  TaintedSVal = C.getSVal(E);
823  else
824  return false;
825 
826  // Generate diagnostic.
827  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
828  initBugType();
829  auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
830  report->addRange(E->getSourceRange());
831  report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
832  C.emitReport(std::move(report));
833  return true;
834  }
835  return false;
836 }
837 
838 bool GenericTaintChecker::checkUncontrolledFormatString(
839  const CallExpr *CE, CheckerContext &C) const {
840  // Check if the function contains a format string argument.
841  unsigned ArgNum = 0;
842  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
843  return false;
844 
845  // If either the format string content or the pointer itself are tainted,
846  // warn.
847  return generateReportIfTainted(CE->getArg(ArgNum),
848  MsgUncontrolledFormatString, C);
849 }
850 
851 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
852  CheckerContext &C) const {
853  // TODO: It might make sense to run this check on demand. In some cases,
854  // we should check if the environment has been cleansed here. We also might
855  // need to know if the user was reset before these calls(seteuid).
856  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
857  .Case("system", 0)
858  .Case("popen", 0)
859  .Case("execl", 0)
860  .Case("execle", 0)
861  .Case("execlp", 0)
862  .Case("execv", 0)
863  .Case("execvp", 0)
864  .Case("execvP", 0)
865  .Case("execve", 0)
866  .Case("dlopen", 0)
867  .Default(InvalidArgIndex);
868 
869  if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1))
870  return false;
871 
872  return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
873 }
874 
875 // TODO: Should this check be a part of the CString checker?
876 // If yes, should taint be a global setting?
877 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
878  const FunctionDecl *FDecl,
879  CheckerContext &C) const {
880  // If the function has a buffer size argument, set ArgNum.
881  unsigned ArgNum = InvalidArgIndex;
882  unsigned BId = 0;
883  if ((BId = FDecl->getMemoryFunctionKind()))
884  switch (BId) {
885  case Builtin::BImemcpy:
886  case Builtin::BImemmove:
887  case Builtin::BIstrncpy:
888  ArgNum = 2;
889  break;
890  case Builtin::BIstrndup:
891  ArgNum = 1;
892  break;
893  default:
894  break;
895  };
896 
897  if (ArgNum == InvalidArgIndex) {
898  if (C.isCLibraryFunction(FDecl, "malloc") ||
899  C.isCLibraryFunction(FDecl, "calloc") ||
900  C.isCLibraryFunction(FDecl, "alloca"))
901  ArgNum = 0;
902  else if (C.isCLibraryFunction(FDecl, "memccpy"))
903  ArgNum = 3;
904  else if (C.isCLibraryFunction(FDecl, "realloc"))
905  ArgNum = 1;
906  else if (C.isCLibraryFunction(FDecl, "bcopy"))
907  ArgNum = 2;
908  }
909 
910  return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
911  generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
912 }
913 
914 bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE,
915  const FunctionData &FData,
916  CheckerContext &C) const {
917  auto It = findFunctionInConfig(CustomSinks, FData);
918  if (It == CustomSinks.end())
919  return false;
920 
921  const auto &Value = It->second;
922  const GenericTaintChecker::ArgVector &Args = Value.second;
923  for (unsigned ArgNum : Args) {
924  if (ArgNum >= CE->getNumArgs())
925  continue;
926 
927  if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C))
928  return true;
929  }
930 
931  return false;
932 }
933 
934 void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
935  auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
936  std::string Option{"Config"};
937  StringRef ConfigFile =
938  Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
940  getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
941  if (Config)
942  Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
943 }
944 
945 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
946  return true;
947 }
Represents a function declaration or definition.
Definition: Decl.h:1783
static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value)
unsigned getMemoryFunctionKind() const
Identify a memory copying or setting function.
Definition: Decl.cpp:3872
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2614
A (possibly-)qualified type.
Definition: Type.h:654
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:2702
Specialize PointerLikeTypeTraits to allow LazyGenerationalUpdatePtr to be placed into a PointerUnion...
Definition: Dominators.h:30
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2689
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:557
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
StringRef P
The base class of the type hierarchy.
Definition: Type.h:1450
constexpr XRayInstrMask Function
Definition: XRayInstr.h:38
LineState State
static void mapping(IO &IO, TaintConfig::Propagation &Propagation)
Definition: Format.h:2445
bool isReferenceType() const
Definition: Type.h:6516
__DEVICE__ int max(int __a, int __b)
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:53
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:40
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:6256
GenericTaintChecker::TaintConfiguration TaintConfig
void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl="\, const char *sep="")
This represents one expression.
Definition: Expr.h:108
#define V(N, I)
Definition: ASTContext.h:2941
#define bool
Definition: stdbool.h:15
static bool getPrintfFormatArgumentNum(const CallExpr *CE, const CheckerContext &C, unsigned &ArgNum)
QualType getType() const
Definition: Expr.h:137
bool isNull() const
Return true if this QualType doesn&#39;t point to a type yet.
Definition: Type.h:719
static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA)
static void mapping(IO &IO, TaintConfig &Config)
bool isConstQualified() const
Determine whether this type is const-qualified.
Definition: Type.h:6315
QualType getCanonicalType() const
Definition: Type.h:6295
Encodes a location in the source.
bool isTainted(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Check if the statement has a tainted value in the given state.
LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, TaintTagType Kind=TaintTagGeneric)
Create a new state in which the value of the statement is marked as tainted.
constexpr XRayInstrMask None
Definition: XRayInstr.h:37
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:224
#define REGISTER_SET_WITH_PROGRAMSTATE(Name, Elem)
Declares an immutable set of type NameTy, suitable for placement into the ProgramState.
Dataflow Directional Tag Classes.
std::unique_ptr< DiagnosticConsumer > create(StringRef OutputFile, DiagnosticOptions *Diags, bool MergeChildRecords=false)
Returns a DiagnosticConsumer that serializes diagnostics to a bitcode file.
Kind getKind() const
Definition: DeclBase.h:432
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:524
bool isVoidType() const
Definition: Type.h:6777
LLVM_NODISCARD ProgramStateRef removeTaint(ProgramStateRef State, SVal V)
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:263
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2546
std::string getQualifiedNameAsString() const
Definition: Decl.cpp:1555
bool isPointerType() const
Definition: Type.h:6504
std::error_code parseConfiguration(StringRef Text, FormatStyle *Style)
Parse configuration from YAML-formatted text.
Definition: Format.cpp:1159
Defines enum values for all the target-independent builtin functions.
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:2991