boga95 updated this revision to Diff 210260.
boga95 marked 2 inline comments as done.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59555/new/

https://reviews.llvm.org/D59555

Files:
  include/clang/StaticAnalyzer/Checkers/Checkers.td
  lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  lib/StaticAnalyzer/Checkers/Yaml.h
  test/Analysis/Inputs/taint-generic-config.yaml
  test/Analysis/taint-generic.c

Index: test/Analysis/taint-generic.c
===================================================================
--- test/Analysis/taint-generic.c
+++ test/Analysis/taint-generic.c
@@ -1,5 +1,27 @@
-// RUN: %clang_analyze_cc1  -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s
-// RUN: %clang_analyze_cc1  -DFILE_IS_STRUCT -analyzer-checker=alpha.security.taint,core,alpha.security.ArrayBoundV2 -Wno-format-security -verify %s
+// RUN: %clang_analyze_cc1 -Wno-format-security -verify %s \
+// RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=alpha.security.ArrayBoundV2 \
+// RUN:   -analyzer-config \
+// RUN:     alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
+
+// RUN: %clang_analyze_cc1 -Wno-format-security -verify %s \
+// RUN:   -DFILE_IS_STRUCT \
+// RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=alpha.security.ArrayBoundV2 \
+// RUN:   -analyzer-config \
+// RUN:     alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
+
+// RUN: not %clang_analyze_cc1 -verify %s \
+// RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-config \
+// RUN:     alpha.security.taint.TaintPropagation:Config=justguessit \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-FILE
+
+// CHECK-INVALID-FILE: (frontend): invalid input for checker option
+// CHECK-INVALID-FILE-SAME:        'alpha.security.taint.TaintPropagation:Config',
+// CHECK-INVALID-FILE-SAME:        that expects a valid filename
 
 int scanf(const char *restrict format, ...);
 char *gets(char *str);
Index: test/Analysis/Inputs/taint-generic-config.yaml
===================================================================
--- /dev/null
+++ test/Analysis/Inputs/taint-generic-config.yaml
@@ -0,0 +1,51 @@
+# A list of source/propagation function
+Propagations:
+  # int x = mySource1(); // x is tainted
+  - Name:     mySource1
+    DstArgs:  [-1] # Index for return value
+
+  # int x;
+  # mySource2(&x); // x is tainted
+  - Name:     mySource2
+    DstArgs:  [0]
+
+  # int x, y;
+  # myScanf("%d %d", &x, &y); // x and y are tainted
+  - Name:          myScanf
+    VariadicType:  Dst
+    VariadicIndex: 1
+
+  # int x; // x is tainted
+  # int y;
+  # myPropagator(x, &y); // y is tainted
+  - Name:     myPropagator
+    SrcArgs:  [0]
+    DstArgs:  [1]
+
+  # constexpr unsigned size = 100;
+  # char buf[size];
+  # int x, y;
+  # int n = mySprintf(buf, size, "%d %d", x, y); // If size, x or y is tainted
+  # // the return value and the buf will be tainted
+  - Name:          mySnprintf
+    SrcArgs:       [1]
+    DstArgs:       [0, -1]
+    VariadicType:  Src
+    VariadicIndex: 3
+
+# A list of filter functions
+Filters:
+  # int x; // x is tainted
+  # myFilter(&x); // x is not tainted anymore
+  - Name: myFilter
+    Args: [0]
+
+# A list of sink functions
+Sinks:
+  # int x, y; // x and y are tainted
+  # mySink(x, 0, 1); // It will warn
+  # mySink(0, 1, y); // It will warn
+  # mySink(0, x, 1); // It won't warn
+  - Name: mySink
+    Args: [0, 2]
+
Index: lib/StaticAnalyzer/Checkers/Yaml.h
===================================================================
--- /dev/null
+++ lib/StaticAnalyzer/Checkers/Yaml.h
@@ -0,0 +1,48 @@
+//== Yaml.h ----------------------------------- -*- C++ -*--=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a simple interface allow to open and parse yaml files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/Support/YAMLTraits.h"
+
+/// Read the given file from the filesystem and parse it as a yaml file. The
+/// template parameter must have a yaml MappingTraits.
+template <class T, class Checker>
+llvm::Optional<T> getConfiguration(clang::ento::CheckerManager &Mgr,
+                                   Checker *Chk, llvm::StringRef Option,
+                                   llvm::StringRef ConfigFile) {
+  if (ConfigFile.trim().empty())
+    return {};
+
+  llvm::vfs::FileSystem *FS = llvm::vfs::getRealFileSystem().get();
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Buffer =
+      FS->getBufferForFile(ConfigFile.str());
+
+  if (std::error_code ec = Buffer.getError()) {
+    Mgr.reportInvalidCheckerOptionValue(Chk, Option,
+                                        "a valid filename instead of '" +
+                                            std::string(ConfigFile) + "'");
+    return {};
+  }
+
+  llvm::yaml::Input Input(Buffer.get()->getBuffer());
+  T Config;
+  Input >> Config;
+
+  if (std::error_code ec = Input.error()) {
+    Mgr.reportInvalidCheckerOptionValue(Chk, Option,
+                                        "a valid yaml file: " + ec.message());
+    return {};
+  }
+
+  return Config;
+}
Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===================================================================
--- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -15,16 +15,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "Taint.h"
-#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
+#include "Yaml.h"
 #include "clang/AST/Attr.h"
 #include "clang/Basic/Builtins.h"
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
-#include <climits>
-#include <initializer_list>
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <limits>
 #include <utility>
 
 using namespace clang;
@@ -44,14 +47,51 @@
 
   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
 
-  void printState(raw_ostream &Out, ProgramStateRef State,
-                  const char *NL, const char *Sep) const override;
+  void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
+                  const char *Sep) const override;
 
-private:
-  static const unsigned InvalidArgIndex = UINT_MAX;
+  using ArgVector = SmallVector<unsigned, 2>;
+  using SignedArgVector = SmallVector<int, 2>;
+
+  enum class VariadicType { None, Src, Dst };
+
+  /// Used to parse the configuration file.
+  struct TaintConfiguration {
+    using NameArgsPair = std::pair<std::string, ArgVector>;
+
+    struct Propagation {
+      std::string Name;
+      SignedArgVector SrcArgs;
+      SignedArgVector DstArgs;
+      VariadicType VarType;
+      unsigned VarIndex;
+    };
+
+    std::vector<Propagation> Propagations;
+    std::vector<NameArgsPair> Filters;
+    std::vector<NameArgsPair> Sinks;
+
+    TaintConfiguration() = default;
+    TaintConfiguration(const TaintConfiguration &) = delete;
+    TaintConfiguration(TaintConfiguration &&) = default;
+    TaintConfiguration &operator=(const TaintConfiguration &) = delete;
+    TaintConfiguration &operator=(TaintConfiguration &&) = default;
+  };
+
+  /// Convert SignedArgVector to ArgVector.
+  ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
+                               SignedArgVector Args);
+
+  /// Parse the config.
+  void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
+                          TaintConfiguration &&Config);
+
+  static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
   /// Denotes the return vale.
-  static const unsigned ReturnValueIndex = UINT_MAX - 1;
+  static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
+                                         1};
 
+private:
   mutable std::unique_ptr<BugType> BT;
   void initBugType() const {
     if (!BT)
@@ -97,8 +137,6 @@
   bool generateReportIfTainted(const Expr *E, const char Msg[],
                                CheckerContext &C) const;
 
-  using ArgVector = SmallVector<unsigned, 2>;
-
   /// A struct used to specify taint propagation rules for a function.
   ///
   /// If any of the possible taint source arguments is tainted, all of the
@@ -109,8 +147,6 @@
   /// ReturnValueIndex is added to the dst list, the return value will be
   /// tainted.
   struct TaintPropagationRule {
-    enum class VariadicType { None, Src, Dst };
-
     using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
                                          CheckerContext &C);
 
@@ -131,8 +167,7 @@
         : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
           PropagationFunc(nullptr) {}
 
-    TaintPropagationRule(std::initializer_list<unsigned> &&Src,
-                         std::initializer_list<unsigned> &&Dst,
+    TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
                          VariadicType Var = VariadicType::None,
                          unsigned VarIndex = InvalidArgIndex,
                          PropagationFuncType Func = nullptr)
@@ -176,6 +211,19 @@
     static bool postSocket(bool IsTainted, const CallExpr *CE,
                            CheckerContext &C);
   };
+
+  using NameRuleMap = llvm::StringMap<TaintPropagationRule>;
+  using NameArgMap = llvm::StringMap<ArgVector>;
+
+  /// Defines a map between the propagation function's name and
+  /// TaintPropagationRule.
+  NameRuleMap CustomPropagations;
+
+  /// Defines a map between the filter function's name and filtering args.
+  NameArgMap CustomFilters;
+
+  /// Defines a map between the sink function's name and sinking args.
+  NameArgMap CustomSinks;
 };
 
 const unsigned GenericTaintChecker::ReturnValueIndex;
@@ -193,15 +241,94 @@
     "Untrusted data is used to specify the buffer size "
     "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
     "for character data and the null terminator)";
-
 } // end of anonymous namespace
 
+using TaintConfig = GenericTaintChecker::TaintConfiguration;
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
+LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameArgsPair)
+
+namespace llvm {
+namespace yaml {
+template <> struct MappingTraits<TaintConfig> {
+  static void mapping(IO &IO, TaintConfig &Config) {
+    IO.mapOptional("Propagations", Config.Propagations);
+    IO.mapOptional("Filters", Config.Filters);
+    IO.mapOptional("Sinks", Config.Sinks);
+  }
+};
+
+template <> struct MappingTraits<TaintConfig::Propagation> {
+  static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
+    IO.mapRequired("Name", Propagation.Name);
+    IO.mapOptional("SrcArgs", Propagation.SrcArgs);
+    IO.mapOptional("DstArgs", Propagation.DstArgs);
+    IO.mapOptional("VariadicType", Propagation.VarType,
+                   GenericTaintChecker::VariadicType::None);
+    IO.mapOptional("VariadicIndex", Propagation.VarIndex,
+                   GenericTaintChecker::InvalidArgIndex);
+  }
+};
+
+template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
+  static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
+    IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
+    IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
+    IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
+  }
+};
+
+template <> struct MappingTraits<TaintConfig::NameArgsPair> {
+  static void mapping(IO &IO, TaintConfig::NameArgsPair &NameArg) {
+    IO.mapRequired("Name", NameArg.first);
+    IO.mapRequired("Args", NameArg.second);
+  }
+};
+} // namespace yaml
+} // namespace llvm
+
 /// A set which is used to pass information from call pre-visit instruction
 /// to the call post-visit. The values are unsigned integers, which are either
 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
 /// points to data, which should be tainted on return.
 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
 
+GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector(
+    CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) {
+  ArgVector Result;
+  for (int Arg : Args) {
+    if (Arg == -1)
+      Result.push_back(ReturnValueIndex);
+    else if (Arg < -1) {
+      Result.push_back(InvalidArgIndex);
+      Mgr.reportInvalidCheckerOptionValue(
+          this, Option,
+          "an argument number for propagation rules greater or equal to -1");
+    } else
+      Result.push_back(static_cast<unsigned>(Arg));
+  }
+  return Result;
+}
+
+void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
+                                             const std::string &Option,
+                                             TaintConfiguration &&Config) {
+  for (auto &P : Config.Propagations) {
+    GenericTaintChecker::CustomPropagations.try_emplace(
+        P.Name, convertToArgVector(Mgr, Option, P.SrcArgs),
+        convertToArgVector(Mgr, Option, P.DstArgs), P.VarType, P.VarIndex);
+  }
+
+  for (auto &F : Config.Filters) {
+    GenericTaintChecker::CustomFilters.try_emplace(F.first,
+                                                   std::move(F.second));
+  }
+
+  for (auto &S : Config.Sinks) {
+    GenericTaintChecker::CustomSinks.try_emplace(S.first, std::move(S.second));
+  }
+}
+
 GenericTaintChecker::TaintPropagationRule
 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
     const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) {
@@ -218,7 +345,8 @@
           .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
           .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
           .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
-          .Case("getchar_unlocked", TaintPropagationRule({}, {ReturnValueIndex}))
+          .Case("getchar_unlocked",
+                TaintPropagationRule({}, {ReturnValueIndex}))
           .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
           .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
           .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
@@ -454,7 +582,7 @@
   // Check for taint in variadic arguments.
   if (!IsTainted && VariadicType::Src == VarType) {
     // Check if any of the arguments is tainted
-    for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
+    for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
       if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
         break;
     }
@@ -485,7 +613,7 @@
     //   If they are not pointing to const data, mark data as tainted.
     //   TODO: So far we are just going one level down; ideally we'd need to
     //         recurse here.
-    for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
+    for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
       const Expr *Arg = CE->getArg(i);
       // Process pointer argument.
       const Type *ArgTy = Arg->getType().getTypePtr();
@@ -550,7 +678,7 @@
 
 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
                                        const CheckerContext &C,
-                                       unsigned int &ArgNum) {
+                                       unsigned &ArgNum) {
   // Find if the function contains a format string argument.
   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
   // vsnprintf, syslog, custom annotated functions.
@@ -603,7 +731,7 @@
 bool GenericTaintChecker::checkUncontrolledFormatString(
     const CallExpr *CE, CheckerContext &C) const {
   // Check if the function contains a format string argument.
-  unsigned int ArgNum = 0;
+  unsigned ArgNum = 0;
   if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
     return false;
 
@@ -676,8 +804,15 @@
          generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
 }
 
-void ento::registerGenericTaintChecker(CheckerManager &mgr) {
-  mgr.registerChecker<GenericTaintChecker>();
+void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
+  auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
+  std::string Option{"Config"};
+  StringRef ConfigFile =
+      Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
+  llvm::Optional<TaintConfig> Config =
+      getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
+  if (Config)
+    Checker->parseConfiguration(Mgr, Option, std::move(Config).getValue());
 }
 
 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
Index: include/clang/StaticAnalyzer/Checkers/Checkers.td
===================================================================
--- include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -799,6 +799,13 @@
 
 def GenericTaintChecker : Checker<"TaintPropagation">,
   HelpText<"Generate taint information used by other checkers">,
+  CheckerOptions<[
+    CmdLineOption<String,
+                  "Config",
+                  "Specifies the name of the configuration file.",
+                  "",
+                  InAlpha>,
+  ]>,
   Documentation<HasAlphaDocumentation>;
 
 } // end "alpha.security.taint"
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to