boga95 created this revision.
boga95 added reviewers: NoQ, Szelethus, xazax.hun, dkrupp.
boga95 added a project: clang.
Herald added subscribers: cfe-commits, Charusso, donat.nagy, mikhail.ramalho, 
a.sidorin, rnkovacs, szepet, baloghadamsoftware, whisperity.

One can pass a configuration file to the checker with the following argument: 
`-analyzer-config 
alpha.security.taint.TaintPropagation:Config=/path/to/the/file/taint-generic-config.yaml`.
 The config file can contain:

- Propagations: One can define functions which propagate or create the 
taintedness. It has five fields:
  - Name: The name of the function. Mandatory field.
  - SrcArgs: A list of arguments. If any of them tainted, the destination 
arguments will be marked tainted. It's not defined, the destination arguments 
always will be marked as tainted.
  - DstArgs: A list of arguments. Set the tainted flag for the arguments, if 
they are marked. The return value's index is 4294967294(it is temporary).
  - VarType: It's an enum with three possible values: `None`, `Src`, `Dst`. The 
default value is `None` and do nothing.
  - VarIndex: It's the first variadic argument for the function. If `VarType == 
Src` and any of them is tainted, the destination arguments will be marked ad 
tainted. If `VarType == Dst` and they are marked, all argument from the 
VarIndex will be marked as tainted.
- Filters: One can define function remove the tainted flag if it is passed to 
the proper argument.
  - Name: The name of the function. Mandatory field.
  - Args: A list of arguments. If a tainted value is passed to it, the tainted 
flag will be removed. Mandatory field.
- Sinks: A list of function which will give a warning if it gets a tainted 
value.
  - Name: The name of the function. Mandatory field.
  - Args: A list of arguments. If any of those arguments get a tainted value, 
it will give a warning. Mandatory field.

For the propagations, it uses the config to deduce the `TaintPropagationRules` 
from the function's name.
The filter functions are understandable as functions which mark their arguments 
not tainted. I improved the information flow from pre-visit to post-visit, 
therefore, the `TaintTagType` could be passed to the `setTaint` function. 
Currently, it only works if the argument is a pointer.


Repository:
  rC Clang

https://reviews.llvm.org/D59516

Files:
  include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
  include/clang/StaticAnalyzer/Core/PathSensitive/TaintTag.h
  lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  lib/StaticAnalyzer/Core/ProgramState.cpp
  test/Analysis/Inputs/taint-generic-config.yaml
  test/Analysis/taint-generic.c

Index: test/Analysis/taint-generic.c
===================================================================
--- test/Analysis/taint-generic.c
+++ test/Analysis/taint-generic.c
@@ -1,5 +1,5 @@
-// RUN: %clang_analyze_cc1  -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s
-// RUN: %clang_analyze_cc1  -DFILE_IS_STRUCT -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s
+// RUN: %clang_analyze_cc1  -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -analyzer-config alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml -Wno-format-security -verify %s
+// RUN: %clang_analyze_cc1  -DFILE_IS_STRUCT -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -analyzer-config alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml -Wno-format-security -verify %s
 
 int scanf(const char *restrict format, ...);
 char *gets(char *str);
@@ -295,3 +295,49 @@
   if (i < rhs)
     *(volatile int *) 0; // no-warning
 }
+
+// Test configuration
+int mySource1();
+void mySource2(int*);
+void myScanf(const char*, ...);
+int myPropagator(int, int*);
+int mySnprintf(char*, size_t, const char*, ...);
+void myFilter(int*);
+void mySink(int, int, int);
+
+void testConfigurationSources1() {
+  int x = mySource1();
+  Buffer[x] = 1; // expected-warning {{Out of bound memory access }}
+}
+
+void testConfigurationSources2() {
+  int x;
+  mySource2(&x);
+  Buffer[x] = 1; // expected-warning {{Out of bound memory access }}
+}
+
+void testConfigurationSources3() {
+  int x, y;
+  myScanf("%d %d", &x, &y);
+  Buffer[y] = 1; // expected-warning {{Out of bound memory access }}
+}
+
+void testConfigurationPropagation() {
+  int x = mySource1();
+  int y;
+  myPropagator(x, &y);
+  Buffer[y] = 1; // expected-warning {{Out of bound memory access }}
+}
+
+void testConfigurationFilter() {
+  int x = mySource1();
+  myFilter(&x);
+  Buffer[x] = 1; // no-warning
+}
+
+void testConfigurationSinks() {
+  int x = mySource1();
+  mySink(x, 1, 2); // expected-warning {{Untrusted data is passed to a user defined sink}}
+  mySink(1, x, 2); // no-warning
+  mySink(1, 2, x); // expected-warning {{Untrusted data is passed to a user defined sink}}
+}
Index: test/Analysis/Inputs/taint-generic-config.yaml
===================================================================
--- /dev/null
+++ test/Analysis/Inputs/taint-generic-config.yaml
@@ -0,0 +1,51 @@
+# A list of source/propagation function
+Propagations:
+  # int x = mySource1(); // x is tainted
+  - Name:     mySource1
+    DstArgs:  [4294967294] # Index for return value
+
+  # int x;
+  # mySource2(&x); // x is tainted
+  - Name:     mySource2
+    DstArgs:  [0]
+
+  # int x, y;
+  # myScanf("%d %d", &x, &y); // x and y are tainted
+  - Name:     myScanf
+    VarType:  Dst
+    VarIndex: 1
+
+  # int x; // x is tainted
+  # int y;
+  # myPropagator(x, &y); // y is tainted
+  - Name:     myPropagator
+    SrcArgs:  [0]
+    DstArgs:  [1]
+
+  # const unsigned size = 100;
+  # char buf[size];
+  # int x, y;
+  # int n = mySprintf(buf, size, "%d %d", x, y); // If size, x or y is tainted
+  # // the return value and the buf will be tainted
+  - Name:     mySnprintf
+    SrcArgs:  [1]
+    DstArgs:  [0, 4294967294]
+    VarType:  Src
+    VarIndex: 3
+
+# A list of filter functions
+Filters:
+  # int x; // x is tainted
+  # myFilter(&x); // x is not tainted anymore
+  - Name: myFilter
+    Args: [0]
+
+# A list of sink functions
+Sinks:
+  # int x, y; // x and y are tainted
+  # mySink(x, 0, 1); // It will warn
+  # mySink(0, 1, y); // It will warn
+  # mySink(0, x, 1); // It won't warn
+  - Name: mySink
+    Args: [0, 2]
+
Index: lib/StaticAnalyzer/Core/ProgramState.cpp
===================================================================
--- lib/StaticAnalyzer/Core/ProgramState.cpp
+++ lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -658,20 +658,20 @@
   return true;
 }
 
-ProgramStateRef ProgramState::addTaint(const Stmt *S,
+ProgramStateRef ProgramState::setTaint(const Stmt *S,
                                            const LocationContext *LCtx,
                                            TaintTagType Kind) const {
   if (const Expr *E = dyn_cast_or_null<Expr>(S))
     S = E->IgnoreParens();
 
-  return addTaint(getSVal(S, LCtx), Kind);
+  return setTaint(getSVal(S, LCtx), Kind);
 }
 
-ProgramStateRef ProgramState::addTaint(SVal V,
+ProgramStateRef ProgramState::setTaint(SVal V,
                                        TaintTagType Kind) const {
   SymbolRef Sym = V.getAsSymbol();
   if (Sym)
-    return addTaint(Sym, Kind);
+    return setTaint(Sym, Kind);
 
   // If the SVal represents a structure, try to mass-taint all values within the
   // structure. For now it only works efficiently on lazy compound values that
@@ -685,22 +685,22 @@
   if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
     if (Optional<SVal> binding = getStateManager().StoreMgr->getDefaultBinding(*LCV)) {
       if (SymbolRef Sym = binding->getAsSymbol())
-        return addPartialTaint(Sym, LCV->getRegion(), Kind);
+        return setPartialTaint(Sym, LCV->getRegion(), Kind);
     }
   }
 
   const MemRegion *R = V.getAsRegion();
-  return addTaint(R, Kind);
+  return setTaint(R, Kind);
 }
 
-ProgramStateRef ProgramState::addTaint(const MemRegion *R,
+ProgramStateRef ProgramState::setTaint(const MemRegion *R,
                                            TaintTagType Kind) const {
   if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
-    return addTaint(SR->getSymbol(), Kind);
+    return setTaint(SR->getSymbol(), Kind);
   return this;
 }
 
-ProgramStateRef ProgramState::addTaint(SymbolRef Sym,
+ProgramStateRef ProgramState::setTaint(SymbolRef Sym,
                                            TaintTagType Kind) const {
   // If this is a symbol cast, remove the cast before adding the taint. Taint
   // is cast agnostic.
@@ -712,7 +712,7 @@
   return NewState;
 }
 
-ProgramStateRef ProgramState::addPartialTaint(SymbolRef ParentSym,
+ProgramStateRef ProgramState::setPartialTaint(SymbolRef ParentSym,
                                               const SubRegion *SubRegion,
                                               TaintTagType Kind) const {
   // Ignore partial taint if the entire parent symbol is already tainted.
@@ -721,7 +721,7 @@
 
   // Partial taint applies if only a portion of the symbol is tainted.
   if (SubRegion == SubRegion->getBaseRegion())
-    return addTaint(ParentSym, Kind);
+    return setTaint(ParentSym, Kind);
 
   const TaintedSubRegions *SavedRegs = get<DerivedSymTaint>(ParentSym);
   TaintedSubRegions Regs =
@@ -779,7 +779,7 @@
       continue;
 
     if (const TaintTagType *Tag = get<TaintMap>(*SI)) {
-      if (*Tag == Kind)
+      if (*Tag != TaintTagNotTainted && *Tag == Kind)
         return true;
     }
 
Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===================================================================
--- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -13,14 +13,16 @@
 // aggressively, even if the involved symbols are under constrained.
 //
 //===----------------------------------------------------------------------===//
-#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
 #include "clang/AST/Attr.h"
 #include "clang/Basic/Builtins.h"
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/YAMLTraits.h"
 #include <climits>
 #include <initializer_list>
 #include <utility>
@@ -29,6 +31,28 @@
 using namespace ento;
 
 namespace {
+/// A struct to store tainted argument and taint type as a pair in the program
+/// state.
+struct TaintArgTypePair {
+  unsigned Arg;
+  TaintTagType TagType;
+
+  bool operator==(const TaintArgTypePair &X) const {
+    return Arg == X.Arg && TagType == X.TagType;
+  }
+
+  bool operator<(const TaintArgTypePair &X) const {
+    if (Arg != X.Arg)
+      return Arg < X.Arg;
+    return TagType < X.TagType;
+  }
+
+  void Profile(llvm::FoldingSetNodeID &ID) const {
+    ID.AddInteger(Arg);
+    ID.AddInteger(TagType);
+  }
+};
+
 class GenericTaintChecker
     : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
 public:
@@ -41,11 +65,38 @@
 
   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
 
-private:
+  using ArgVector = SmallVector<unsigned, 2>;
+
+  enum class VariadicType { None, Src, Dst };
+
+  /// The ``TaintConfiguration`` is used to parse configuration file.
+  struct TaintConfiguration {
+    using NameArgsPair = std::pair<std::string, ArgVector>;
+
+    struct Propagation {
+      std::string Name;
+      ArgVector SrcArgs;
+      ArgVector DstArgs;
+      VariadicType VarType;
+      unsigned VarIndex;
+    };
+
+    std::vector<Propagation> Propagations;
+    std::vector<NameArgsPair> Filters;
+    std::vector<NameArgsPair> Sinks;
+  };
+
+  /// Get and read the config file.
+  static void getConfiguration(StringRef ConfigFile);
+
+  /// Parse the config.
+  static void parseConfiguration(TaintConfiguration &&Config);
+
   static const unsigned InvalidArgIndex = UINT_MAX;
   /// Denotes the return vale.
   static const unsigned ReturnValueIndex = UINT_MAX - 1;
 
+private:
   mutable std::unique_ptr<BugType> BT;
   void initBugType() const {
     if (!BT)
@@ -54,10 +105,16 @@
 
   /// Catch taint related bugs. Check if tainted data is passed to a
   /// system call etc.
-  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
+  bool checkPre(const CallExpr *CE, const FunctionDecl *FDecl, StringRef Name,
+                CheckerContext &C) const;
 
   /// Add taint sources on a pre-visit.
-  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
+  bool addSourcesPre(const CallExpr *CE, const FunctionDecl *FDecl,
+                     StringRef Name, CheckerContext &C) const;
+
+  /// Mark filter's arguments not tainted on a pre-visit.
+  bool addFiltersPre(const CallExpr *CE, StringRef Name,
+                     CheckerContext &C) const;
 
   /// Propagate taint generated at pre-visit.
   bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
@@ -87,12 +144,15 @@
   bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
                               CheckerContext &C) const;
 
+  /// Check if tainted data is used as a custom sink's parameter.
+  static const char MsgCustomSink[];
+  bool checkCustomSinks(const CallExpr *CE, StringRef Name,
+                        CheckerContext &C) const;
+
   /// Generate a report if the expression is tainted or points to tainted data.
   bool generateReportIfTainted(const Expr *E, const char Msg[],
                                CheckerContext &C) const;
 
-  using ArgVector = SmallVector<unsigned, 2>;
-
   /// A struct used to specify taint propagation rules for a function.
   ///
   /// If any of the possible taint source arguments is tainted, all of the
@@ -103,8 +163,6 @@
   /// ReturnValueIndex is added to the dst list, the return value will be
   /// tainted.
   struct TaintPropagationRule {
-    enum class VariadicType { None, Src, Dst };
-
     using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
                                          CheckerContext &C);
 
@@ -125,8 +183,7 @@
         : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
           PropagationFunc(nullptr) {}
 
-    TaintPropagationRule(std::initializer_list<unsigned> &&Src,
-                         std::initializer_list<unsigned> &&Dst,
+    TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
                          VariadicType Var = VariadicType::None,
                          unsigned VarIndex = InvalidArgIndex,
                          PropagationFuncType Func = nullptr)
@@ -170,6 +227,19 @@
     static bool postSocket(bool IsTainted, const CallExpr *CE,
                            CheckerContext &C);
   };
+
+  using NameRuleMap = llvm::StringMap<TaintPropagationRule>;
+  using NameArgMap = llvm::StringMap<ArgVector>;
+
+  /// Defines a map between the propagation function's name and
+  /// TaintPropagationRule.
+  static NameRuleMap CustomPropagations;
+
+  /// Defines a map between the filter function's name and filtering args.
+  static NameArgMap CustomFilters;
+
+  /// Defines a map between the sink function's name and sinking args.
+  static NameArgMap CustomSinks;
 };
 
 const unsigned GenericTaintChecker::ReturnValueIndex;
@@ -188,13 +258,107 @@
     "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
     "for character data and the null terminator)";
 
+const char GenericTaintChecker::MsgCustomSink[] =
+    "Untrusted data is passed to a user defined sink";
+
+GenericTaintChecker::NameRuleMap GenericTaintChecker::CustomPropagations;
+
+GenericTaintChecker::NameArgMap GenericTaintChecker::CustomFilters;
+
+GenericTaintChecker::NameArgMap GenericTaintChecker::CustomSinks;
 } // end of anonymous namespace
 
+using TaintConfig = GenericTaintChecker::TaintConfiguration;
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
+LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameArgsPair)
+
+namespace llvm {
+namespace yaml {
+template <> struct MappingTraits<TaintConfig> {
+  static void mapping(IO &IO, TaintConfig &Config) {
+    IO.mapOptional("Propagations", Config.Propagations);
+    IO.mapOptional("Filters", Config.Filters);
+    IO.mapOptional("Sinks", Config.Sinks);
+  }
+};
+
+template <> struct MappingTraits<TaintConfig::Propagation> {
+  static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
+    IO.mapRequired("Name", Propagation.Name);
+    IO.mapOptional("SrcArgs", Propagation.SrcArgs);
+    IO.mapOptional("DstArgs", Propagation.DstArgs);
+    IO.mapOptional("VarType", Propagation.VarType,
+                   GenericTaintChecker::VariadicType::None);
+    IO.mapOptional("VarIndex", Propagation.VarIndex,
+                   GenericTaintChecker::InvalidArgIndex);
+  }
+};
+
+template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
+  static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
+    IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
+    IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
+    IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
+  }
+};
+
+template <> struct MappingTraits<TaintConfig::NameArgsPair> {
+  static void mapping(IO &IO, TaintConfig::NameArgsPair &NameArg) {
+    IO.mapRequired("Name", NameArg.first);
+    IO.mapRequired("Args", NameArg.second);
+  }
+};
+} // namespace yaml
+} // namespace llvm
+
 /// A set which is used to pass information from call pre-visit instruction
 /// to the call post-visit. The values are unsigned integers, which are either
 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
 /// points to data, which should be tainted on return.
-REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
+REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, TaintArgTypePair)
+
+void GenericTaintChecker::getConfiguration(StringRef ConfigFile) {
+  if (ConfigFile.trim().empty())
+    return;
+
+  llvm::vfs::FileSystem *FS = llvm::vfs::getRealFileSystem().get();
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Buffer =
+      FS->getBufferForFile(ConfigFile.str());
+
+  if (std::error_code ec = Buffer.getError()) {
+    llvm::errs() << "Error when getting TaintPropagation's config file '"
+                 << ConfigFile << "': " << ec.message() << '\n';
+    return;
+  }
+
+  llvm::yaml::Input Input(Buffer.get()->getBuffer());
+  TaintConfiguration Config;
+  Input >> Config;
+
+  if (std::error_code ec = Input.error()) {
+    return;
+  }
+
+  parseConfiguration(std::move(Config));
+}
+
+void GenericTaintChecker::parseConfiguration(TaintConfiguration &&Config) {
+  for (auto &P : Config.Propagations) {
+    GenericTaintChecker::CustomPropagations.try_emplace(
+        P.Name, std::move(P.SrcArgs), std::move(P.DstArgs), P.VarType,
+        P.VarIndex);
+  }
+
+  for (auto &F : Config.Filters) {
+    GenericTaintChecker::CustomFilters.try_emplace(F.first,
+                                                   std::move(F.second));
+  }
+
+  for (auto &S : Config.Sinks) {
+    GenericTaintChecker::CustomSinks.try_emplace(S.first, std::move(S.second));
+  }
+}
 
 GenericTaintChecker::TaintPropagationRule
 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
@@ -212,7 +376,8 @@
           .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
           .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
           .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
-          .Case("getchar_unlocked", TaintPropagationRule({}, {ReturnValueIndex}))
+          .Case("getchar_unlocked",
+                TaintPropagationRule({}, {ReturnValueIndex}))
           .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
           .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
           .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
@@ -291,19 +456,34 @@
   // or smart memory copy:
   // - memccpy - copying until hitting a special character.
 
+  auto It = CustomPropagations.find(Name);
+  if (It != CustomPropagations.end())
+    return It->getValue();
+
   return TaintPropagationRule();
 }
 
 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
                                        CheckerContext &C) const {
+  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
+  if (!FDecl || FDecl->getKind() != Decl::Function)
+    return;
+
+  StringRef Name = C.getCalleeName(FDecl);
+  if (Name.empty())
+    return;
+
   // Check for taintedness related errors first: system call, uncontrolled
   // format string, tainted buffer size.
-  if (checkPre(CE, C))
+  if (checkPre(CE, FDecl, Name, C))
     return;
 
   // Marks the function's arguments and/or return value tainted if it present in
   // the list.
-  addSourcesPre(CE, C);
+  if (addSourcesPre(CE, FDecl, Name, C))
+    return;
+
+  addFiltersPre(CE, Name, C);
 }
 
 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
@@ -313,31 +493,45 @@
   propagateFromPre(CE, C);
 }
 
-void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
+bool GenericTaintChecker::addSourcesPre(const CallExpr *CE,
+                                        const FunctionDecl *FDecl,
+                                        StringRef Name,
                                         CheckerContext &C) const {
-  ProgramStateRef State = nullptr;
-  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
-  if (!FDecl || FDecl->getKind() != Decl::Function)
-    return;
-
-  StringRef Name = C.getCalleeName(FDecl);
-  if (Name.empty())
-    return;
-
   // First, try generating a propagation rule for this function.
   TaintPropagationRule Rule =
       TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
   if (!Rule.isNull()) {
-    State = Rule.process(CE, C);
+    ProgramStateRef State = Rule.process(CE, C);
     if (!State)
-      return;
+      return false;
     C.addTransition(State);
-    return;
   }
+  return true;
+}
 
-  if (!State)
-    return;
-  C.addTransition(State);
+bool GenericTaintChecker::addFiltersPre(const CallExpr *CE, StringRef Name,
+                                        CheckerContext &C) const {
+  auto It = CustomFilters.find(Name);
+  if (It == CustomFilters.end())
+    return false;
+
+  ProgramStateRef State = C.getState();
+  const GenericTaintChecker::ArgVector &Args = It->getValue();
+  for (unsigned ArgNum : Args) {
+    if (ArgNum >= CE->getNumArgs()) {
+      llvm::errs() << "Skip out of bound filter Arg: " << Name << ":" << ArgNum
+                   << '\n';
+      continue;
+    }
+
+    State = State->add<TaintArgsOnPostVisit>({ArgNum, TaintTagNotTainted});
+  }
+
+  if (State != C.getState()) {
+    C.addTransition(State);
+    return true;
+  }
+  return false;
 }
 
 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
@@ -351,10 +545,12 @@
   if (TaintArgs.isEmpty())
     return false;
 
-  for (unsigned ArgNum : TaintArgs) {
+  for (auto ArgTypePair : TaintArgs) {
+    unsigned ArgNum = ArgTypePair.Arg;
+    TaintTagType TagType = ArgTypePair.TagType;
     // Special handling for the tainted return value.
     if (ArgNum == ReturnValueIndex) {
-      State = State->addTaint(CE, C.getLocationContext());
+      State = State->setTaint(CE, C.getLocationContext(), TagType);
       continue;
     }
 
@@ -365,7 +561,7 @@
     const Expr *Arg = CE->getArg(ArgNum);
     Optional<SVal> V = getPointedToSVal(C, Arg);
     if (V)
-      State = State->addTaint(*V);
+      State = State->setTaint(*V, TagType);
   }
 
   // Clear up the taint info from the state.
@@ -379,25 +575,21 @@
 }
 
 bool GenericTaintChecker::checkPre(const CallExpr *CE,
+                                   const FunctionDecl *FDecl, StringRef Name,
                                    CheckerContext &C) const {
 
   if (checkUncontrolledFormatString(CE, C))
     return true;
 
-  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
-  if (!FDecl || FDecl->getKind() != Decl::Function)
-    return false;
-
-  StringRef Name = C.getCalleeName(FDecl);
-  if (Name.empty())
-    return false;
-
   if (checkSystemCall(CE, Name, C))
     return true;
 
   if (checkTaintedBufferSize(CE, FDecl, C))
     return true;
 
+  if (checkCustomSinks(CE, Name, C))
+    return true;
+
   return false;
 }
 
@@ -434,8 +626,12 @@
   // Check for taint in arguments.
   bool IsTainted = true;
   for (unsigned ArgNum : SrcArgs) {
-    if (ArgNum >= CE->getNumArgs())
-      return State;
+    if (ArgNum >= CE->getNumArgs()) {
+      StringRef Name = C.getCalleeName(C.getCalleeDecl(CE));
+      llvm::errs() << "Skip out of bound SrcArg: " << Name << ":" << ArgNum
+                   << '\n';
+      continue;
+    }
     if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
       break;
   }
@@ -443,7 +639,7 @@
   // Check for taint in variadic arguments.
   if (!IsTainted && VariadicType::Src == VarType) {
     // Check if any of the arguments is tainted
-    for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
+    for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
       if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
         break;
     }
@@ -459,13 +655,20 @@
   for (unsigned ArgNum : DstArgs) {
     // Should mark the return value?
     if (ArgNum == ReturnValueIndex) {
-      State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
+      State =
+          State->add<TaintArgsOnPostVisit>({ReturnValueIndex, TaintTagGeneric});
+      continue;
+    }
+
+    if (ArgNum >= CE->getNumArgs()) {
+      StringRef Name = C.getCalleeName(C.getCalleeDecl(CE));
+      llvm::errs() << "Skip out of bound DstArg: " << Name << ":" << ArgNum
+                   << '\n';
       continue;
     }
 
     // Mark the given argument.
-    assert(ArgNum < CE->getNumArgs());
-    State = State->add<TaintArgsOnPostVisit>(ArgNum);
+    State = State->add<TaintArgsOnPostVisit>({ArgNum, TaintTagGeneric});
   }
 
   // Mark all variadic arguments tainted if present.
@@ -474,14 +677,14 @@
     //   If they are not pointing to const data, mark data as tainted.
     //   TODO: So far we are just going one level down; ideally we'd need to
     //         recurse here.
-    for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
+    for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
       const Expr *Arg = CE->getArg(i);
       // Process pointer argument.
       const Type *ArgTy = Arg->getType().getTypePtr();
       QualType PType = ArgTy->getPointeeType();
       if ((!PType.isNull() && !PType.isConstQualified()) ||
           (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
-        State = State->add<TaintArgsOnPostVisit>(i);
+        State = State->add<TaintArgsOnPostVisit>({i, TaintTagGeneric});
     }
   }
 
@@ -539,7 +742,7 @@
 
 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
                                        const CheckerContext &C,
-                                       unsigned int &ArgNum) {
+                                       unsigned &ArgNum) {
   // Find if the function contains a format string argument.
   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
   // vsnprintf, syslog, custom annotated functions.
@@ -592,7 +795,7 @@
 bool GenericTaintChecker::checkUncontrolledFormatString(
     const CallExpr *CE, CheckerContext &C) const {
   // Check if the function contains a format string argument.
-  unsigned int ArgNum = 0;
+  unsigned ArgNum = 0;
   if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
     return false;
 
@@ -618,9 +821,9 @@
                         .Case("execvP", 0)
                         .Case("execve", 0)
                         .Case("dlopen", 0)
-                        .Default(UINT_MAX);
+                        .Default(InvalidArgIndex);
 
-  if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
+  if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1))
     return false;
 
   return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
@@ -665,8 +868,32 @@
          generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
 }
 
+bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE, StringRef Name,
+                                           CheckerContext &C) const {
+  auto It = CustomSinks.find(Name);
+  if (It == CustomSinks.end())
+    return false;
+
+  const GenericTaintChecker::ArgVector &Args = It->getValue();
+  for (unsigned ArgNum : Args) {
+    if (ArgNum >= CE->getNumArgs()) {
+      llvm::errs() << "Skip out of bound sink Arg: " << Name << ":" << ArgNum
+                   << '\n';
+      continue;
+    }
+
+    if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C))
+      return true;
+  }
+
+  return false;
+}
+
 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
-  mgr.registerChecker<GenericTaintChecker>();
+  const auto *Checker = mgr.registerChecker<GenericTaintChecker>();
+  StringRef ConfigFile =
+      mgr.getAnalyzerOptions().getCheckerStringOption(Checker, "Config", "");
+  GenericTaintChecker::getConfiguration(ConfigFile);
 }
 
 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
Index: include/clang/StaticAnalyzer/Core/PathSensitive/TaintTag.h
===================================================================
--- include/clang/StaticAnalyzer/Core/PathSensitive/TaintTag.h
+++ include/clang/StaticAnalyzer/Core/PathSensitive/TaintTag.h
@@ -21,7 +21,8 @@
 /// taint.
 using TaintTagType = unsigned;
 
-static const TaintTagType TaintTagGeneric = 0;
+static const TaintTagType TaintTagNotTainted = 0;
+static const TaintTagType TaintTagGeneric = 1;
 
 } // namespace ento
 } // namespace clang
Index: include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
===================================================================
--- include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
+++ include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
@@ -369,27 +369,27 @@
 
   /// Create a new state in which the statement is marked as tainted.
   LLVM_NODISCARD ProgramStateRef
-  addTaint(const Stmt *S, const LocationContext *LCtx,
+  setTaint(const Stmt *S, const LocationContext *LCtx,
            TaintTagType Kind = TaintTagGeneric) const;
 
   /// Create a new state in which the value is marked as tainted.
   LLVM_NODISCARD ProgramStateRef
-  addTaint(SVal V, TaintTagType Kind = TaintTagGeneric) const;
+  setTaint(SVal V, TaintTagType Kind = TaintTagGeneric) const;
 
   /// Create a new state in which the symbol is marked as tainted.
-  LLVM_NODISCARD ProgramStateRef addTaint(SymbolRef S,
+  LLVM_NODISCARD ProgramStateRef setTaint(SymbolRef S,
                                TaintTagType Kind = TaintTagGeneric) const;
 
   /// Create a new state in which the region symbol is marked as tainted.
   LLVM_NODISCARD ProgramStateRef
-  addTaint(const MemRegion *R, TaintTagType Kind = TaintTagGeneric) const;
+  setTaint(const MemRegion *R, TaintTagType Kind = TaintTagGeneric) const;
 
   /// Create a new state in a which a sub-region of a given symbol is tainted.
   /// This might be necessary when referring to regions that can not have an
   /// individual symbol, e.g. if they are represented by the default binding of
   /// a LazyCompoundVal.
   LLVM_NODISCARD ProgramStateRef
-  addPartialTaint(SymbolRef ParentSym, const SubRegion *SubRegion,
+  setPartialTaint(SymbolRef ParentSym, const SubRegion *SubRegion,
                   TaintTagType Kind = TaintTagGeneric) const;
 
   /// Check if the statement is tainted in the current state.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to