[PATCH] D116025: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap

2022-01-18 Thread Endre Fülöp via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG17f74240e6c3: [analyzer][NFC] Refactor GenericTaintChecker 
to use CallDescriptionMap (authored by gamesh411).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116025/new/

https://reviews.llvm.org/D116025

Files:
  clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h
  clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp

Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===
--- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -22,15 +22,14 @@
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "llvm/Support/YAMLTraits.h"
 
-#include 
 #include 
 #include 
-#include 
 #include 
 
 using namespace clang;
@@ -38,577 +37,651 @@
 using namespace taint;
 
 namespace {
-class GenericTaintChecker : public Checker {
-public:
-  static void *getTag() {
-static int Tag;
-return 
+
+class GenericTaintChecker;
+
+/// Check for CWE-134: Uncontrolled Format String.
+constexpr llvm::StringLiteral MsgUncontrolledFormatString =
+"Untrusted data is used as a format string "
+"(CWE-134: Uncontrolled Format String)";
+
+/// Check for:
+/// CERT/STR02-C. "Sanitize data passed to complex subsystems"
+/// CWE-78, "Failure to Sanitize Data into an OS Command"
+constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
+"Untrusted data is passed to a system call "
+"(CERT/STR02-C. Sanitize data passed to complex subsystems)";
+
+/// Check if tainted data is used as a buffer size in strn.. functions,
+/// and allocators.
+constexpr llvm::StringLiteral MsgTaintedBufferSize =
+"Untrusted data is used to specify the buffer size "
+"(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
+"for character data and the null terminator)";
+
+/// Check if tainted data is used as a custom sink's parameter.
+constexpr llvm::StringLiteral MsgCustomSink =
+"Untrusted data is passed to a user-defined sink";
+
+using ArgIdxTy = int;
+using ArgVecTy = llvm::SmallVector;
+
+/// Denotes the return value.
+constexpr ArgIdxTy ReturnValueIndex{-1};
+
+static ArgIdxTy fromArgumentCount(unsigned Count) {
+  assert(Count <=
+ static_cast(std::numeric_limits::max()) &&
+ "ArgIdxTy is not large enough to represent the number of arguments.");
+  return Count;
+}
+
+/// Check if the region the expression evaluates to is the standard input,
+/// and thus, is tainted.
+/// FIXME: Move this to Taint.cpp.
+bool isStdin(SVal Val, const ASTContext ) {
+  // FIXME: What if Val is NonParamVarRegion?
+
+  // The region should be symbolic, we do not know it's value.
+  const auto *SymReg = dyn_cast_or_null(Val.getAsRegion());
+  if (!SymReg)
+return false;
+
+  // Get it's symbol and find the declaration region it's pointing to.
+  const auto *Sm = dyn_cast(SymReg->getSymbol());
+  if (!Sm)
+return false;
+  const auto *DeclReg = dyn_cast(Sm->getRegion());
+  if (!DeclReg)
+return false;
+
+  // This region corresponds to a declaration, find out if it's a global/extern
+  // variable named stdin with the proper type.
+  if (const auto *D = dyn_cast_or_null(DeclReg->getDecl())) {
+D = D->getCanonicalDecl();
+// FIXME: This should look for an exact match.
+if (D->getName().contains("stdin") && D->isExternC()) {
+  const QualType FILETy = ACtx.getFILEType().getCanonicalType();
+  const QualType Ty = D->getType().getCanonicalType();
+
+  if (Ty->isPointerType())
+return Ty->getPointeeType() == FILETy;
+}
   }
+  return false;
+}
 
-  void checkPreCall(const CallEvent , CheckerContext ) const;
-  void checkPostCall(const CallEvent , CheckerContext ) const;
+SVal getPointeeOf(const CheckerContext , Loc LValue) {
+  const QualType ArgTy = LValue.getType(C.getASTContext());
+  if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType())
+return C.getState()->getSVal(LValue);
 
-  void printState(raw_ostream , ProgramStateRef State, const char *NL,
-  const char *Sep) const override;
+  // Do not dereference void pointers. Treat them as byte pointers instead.
+  // FIXME: we might want to consider more than just the first byte.
+  return C.getState()->getSVal(LValue, C.getASTContext().CharTy);
+}
+
+/// Given a pointer/reference argument, return the value it refers to.

[PATCH] D116025: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap

2022-01-18 Thread Endre Fülöp via Phabricator via cfe-commits
gamesh411 updated this revision to Diff 400790.
gamesh411 added a comment.

Remove explicit template keyword for MSVC compatibility


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116025/new/

https://reviews.llvm.org/D116025

Files:
  clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h
  clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp

Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===
--- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -22,15 +22,14 @@
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "llvm/Support/YAMLTraits.h"
 
-#include 
 #include 
 #include 
-#include 
 #include 
 
 using namespace clang;
@@ -38,577 +37,651 @@
 using namespace taint;
 
 namespace {
-class GenericTaintChecker : public Checker {
-public:
-  static void *getTag() {
-static int Tag;
-return 
+
+class GenericTaintChecker;
+
+/// Check for CWE-134: Uncontrolled Format String.
+constexpr llvm::StringLiteral MsgUncontrolledFormatString =
+"Untrusted data is used as a format string "
+"(CWE-134: Uncontrolled Format String)";
+
+/// Check for:
+/// CERT/STR02-C. "Sanitize data passed to complex subsystems"
+/// CWE-78, "Failure to Sanitize Data into an OS Command"
+constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
+"Untrusted data is passed to a system call "
+"(CERT/STR02-C. Sanitize data passed to complex subsystems)";
+
+/// Check if tainted data is used as a buffer size in strn.. functions,
+/// and allocators.
+constexpr llvm::StringLiteral MsgTaintedBufferSize =
+"Untrusted data is used to specify the buffer size "
+"(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
+"for character data and the null terminator)";
+
+/// Check if tainted data is used as a custom sink's parameter.
+constexpr llvm::StringLiteral MsgCustomSink =
+"Untrusted data is passed to a user-defined sink";
+
+using ArgIdxTy = int;
+using ArgVecTy = llvm::SmallVector;
+
+/// Denotes the return value.
+constexpr ArgIdxTy ReturnValueIndex{-1};
+
+static ArgIdxTy fromArgumentCount(unsigned Count) {
+  assert(Count <=
+ static_cast(std::numeric_limits::max()) &&
+ "ArgIdxTy is not large enough to represent the number of arguments.");
+  return Count;
+}
+
+/// Check if the region the expression evaluates to is the standard input,
+/// and thus, is tainted.
+/// FIXME: Move this to Taint.cpp.
+bool isStdin(SVal Val, const ASTContext ) {
+  // FIXME: What if Val is NonParamVarRegion?
+
+  // The region should be symbolic, we do not know it's value.
+  const auto *SymReg = dyn_cast_or_null(Val.getAsRegion());
+  if (!SymReg)
+return false;
+
+  // Get it's symbol and find the declaration region it's pointing to.
+  const auto *Sm = dyn_cast(SymReg->getSymbol());
+  if (!Sm)
+return false;
+  const auto *DeclReg = dyn_cast(Sm->getRegion());
+  if (!DeclReg)
+return false;
+
+  // This region corresponds to a declaration, find out if it's a global/extern
+  // variable named stdin with the proper type.
+  if (const auto *D = dyn_cast_or_null(DeclReg->getDecl())) {
+D = D->getCanonicalDecl();
+// FIXME: This should look for an exact match.
+if (D->getName().contains("stdin") && D->isExternC()) {
+  const QualType FILETy = ACtx.getFILEType().getCanonicalType();
+  const QualType Ty = D->getType().getCanonicalType();
+
+  if (Ty->isPointerType())
+return Ty->getPointeeType() == FILETy;
+}
   }
+  return false;
+}
 
-  void checkPreCall(const CallEvent , CheckerContext ) const;
-  void checkPostCall(const CallEvent , CheckerContext ) const;
+SVal getPointeeOf(const CheckerContext , Loc LValue) {
+  const QualType ArgTy = LValue.getType(C.getASTContext());
+  if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType())
+return C.getState()->getSVal(LValue);
 
-  void printState(raw_ostream , ProgramStateRef State, const char *NL,
-  const char *Sep) const override;
+  // Do not dereference void pointers. Treat them as byte pointers instead.
+  // FIXME: we might want to consider more than just the first byte.
+  return C.getState()->getSVal(LValue, C.getASTContext().CharTy);
+}
+
+/// Given a pointer/reference argument, return the value it refers to.
+Optional getPointeeOf(const CheckerContext , SVal Arg) {
+  if (auto LValue = Arg.getAs())
+return getPointeeOf(C, *LValue);
+  

[PATCH] D116025: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap

2022-01-17 Thread Balázs Benics via Phabricator via cfe-commits
steakhal accepted this revision.
steakhal added a comment.

LGTM


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116025/new/

https://reviews.llvm.org/D116025

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D116025: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap

2022-01-17 Thread Endre Fülöp via Phabricator via cfe-commits
gamesh411 updated this revision to Diff 400498.
gamesh411 added a comment.

All commits were exluded in the previous patch upload


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116025/new/

https://reviews.llvm.org/D116025

Files:
  clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h
  clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp

Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===
--- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -22,15 +22,14 @@
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "llvm/Support/YAMLTraits.h"
 
-#include 
 #include 
 #include 
-#include 
 #include 
 
 using namespace clang;
@@ -38,577 +37,651 @@
 using namespace taint;
 
 namespace {
-class GenericTaintChecker : public Checker {
-public:
-  static void *getTag() {
-static int Tag;
-return 
+
+class GenericTaintChecker;
+
+/// Check for CWE-134: Uncontrolled Format String.
+constexpr llvm::StringLiteral MsgUncontrolledFormatString =
+"Untrusted data is used as a format string "
+"(CWE-134: Uncontrolled Format String)";
+
+/// Check for:
+/// CERT/STR02-C. "Sanitize data passed to complex subsystems"
+/// CWE-78, "Failure to Sanitize Data into an OS Command"
+constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
+"Untrusted data is passed to a system call "
+"(CERT/STR02-C. Sanitize data passed to complex subsystems)";
+
+/// Check if tainted data is used as a buffer size in strn.. functions,
+/// and allocators.
+constexpr llvm::StringLiteral MsgTaintedBufferSize =
+"Untrusted data is used to specify the buffer size "
+"(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
+"for character data and the null terminator)";
+
+/// Check if tainted data is used as a custom sink's parameter.
+constexpr llvm::StringLiteral MsgCustomSink =
+"Untrusted data is passed to a user-defined sink";
+
+using ArgIdxTy = int;
+using ArgVecTy = llvm::SmallVector;
+
+/// Denotes the return value.
+constexpr ArgIdxTy ReturnValueIndex{-1};
+
+static ArgIdxTy fromArgumentCount(unsigned Count) {
+  assert(Count <=
+ static_cast(std::numeric_limits::max()) &&
+ "ArgIdxTy is not large enough to represent the number of arguments.");
+  return Count;
+}
+
+/// Check if the region the expression evaluates to is the standard input,
+/// and thus, is tainted.
+/// FIXME: Move this to Taint.cpp.
+bool isStdin(SVal Val, const ASTContext ) {
+  // FIXME: What if Val is NonParamVarRegion?
+
+  // The region should be symbolic, we do not know it's value.
+  const auto *SymReg = dyn_cast_or_null(Val.getAsRegion());
+  if (!SymReg)
+return false;
+
+  // Get it's symbol and find the declaration region it's pointing to.
+  const auto *Sm = dyn_cast(SymReg->getSymbol());
+  if (!Sm)
+return false;
+  const auto *DeclReg = dyn_cast(Sm->getRegion());
+  if (!DeclReg)
+return false;
+
+  // This region corresponds to a declaration, find out if it's a global/extern
+  // variable named stdin with the proper type.
+  if (const auto *D = dyn_cast_or_null(DeclReg->getDecl())) {
+D = D->getCanonicalDecl();
+// FIXME: This should look for an exact match.
+if (D->getName().contains("stdin") && D->isExternC()) {
+  const QualType FILETy = ACtx.getFILEType().getCanonicalType();
+  const QualType Ty = D->getType().getCanonicalType();
+
+  if (Ty->isPointerType())
+return Ty->getPointeeType() == FILETy;
+}
   }
+  return false;
+}
 
-  void checkPreCall(const CallEvent , CheckerContext ) const;
-  void checkPostCall(const CallEvent , CheckerContext ) const;
+SVal getPointeeOf(const CheckerContext , Loc LValue) {
+  const QualType ArgTy = LValue.getType(C.getASTContext());
+  if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType())
+return C.getState()->getSVal(LValue);
 
-  void printState(raw_ostream , ProgramStateRef State, const char *NL,
-  const char *Sep) const override;
+  // Do not dereference void pointers. Treat them as byte pointers instead.
+  // FIXME: we might want to consider more than just the first byte.
+  return C.getState()->getSVal(LValue, C.getASTContext().CharTy);
+}
+
+/// Given a pointer/reference argument, return the value it refers to.
+Optional getPointeeOf(const CheckerContext , SVal Arg) {
+  if (auto LValue = Arg.getAs())
+return getPointeeOf(C, *LValue);
+  

[PATCH] D116025: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap

2022-01-17 Thread Endre Fülöp via Phabricator via cfe-commits
gamesh411 added a comment.

Applied typo and naming fixes, introduced 2 move operations, and re-introduced 
short circuiting.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116025/new/

https://reviews.llvm.org/D116025

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D116025: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap

2022-01-17 Thread Endre Fülöp via Phabricator via cfe-commits
gamesh411 updated this revision to Diff 400464.
gamesh411 marked 7 inline comments as done.
gamesh411 added a comment.

Fixes round two


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116025/new/

https://reviews.llvm.org/D116025

Files:
  clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp


Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===
--- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -52,7 +52,7 @@
 "Untrusted data is passed to a system call "
 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
 
-/// Check if tainted data is used as a buffer size ins strn.. functions,
+/// Check if tainted data is used as a buffer size in strn.. functions,
 /// and allocators.
 constexpr llvm::StringLiteral MsgTaintedBufferSize =
 "Untrusted data is used to specify the buffer size "
@@ -160,7 +160,8 @@
 public:
   ArgSet() = default;
   ArgSet(ArgVecTy &, Optional VariadicIndex = None)
-  : DiscreteArgs(DiscreteArgs), VariadicIndex(VariadicIndex) {}
+  : DiscreteArgs(std::move(DiscreteArgs)),
+VariadicIndex(std::move(VariadicIndex)) {}
 
   bool contains(ArgIdxTy ArgIdx) const {
 if (llvm::is_contained(DiscreteArgs, ArgIdx))
@@ -308,13 +309,13 @@
  TaintConfiguration &) const;
 
 private:
-  using NamePartTy = llvm::SmallVector, 2>;
+  using NamePartsTy = llvm::SmallVector, 2>;
 
   /// Validate part of the configuration, which contains a list of argument
   /// indexes.
   void validateArgVector(const std::string , const ArgVecTy ) 
const;
 
-  template  static NamePartTy parseNameParts(const Config );
+  template  static NamePartsTy parseNameParts(const Config 
);
 
   // Takes the config and creates a CallDescription for it and associates a 
Rule
   // with that.
@@ -445,9 +446,9 @@
 }
 
 template 
-GenericTaintRuleParser::NamePartTy
+GenericTaintRuleParser::NamePartsTy
 GenericTaintRuleParser::parseNameParts(const Config ) {
-  NamePartTy NameParts;
+  NamePartsTy NameParts;
   if (!C.Scope.empty()) {
 // If the Scope argument contains multiple "::" parts, those are considered
 // namespace identifiers.
@@ -464,7 +465,7 @@
 void GenericTaintRuleParser::consumeRulesFromConfig(const Config ,
 GenericTaintRule &,
 RulesContTy ) {
-  NamePartTy NameParts = parseNameParts(C);
+  NamePartsTy NameParts = parseNameParts(C);
   llvm::SmallVector CallDescParts{NameParts.size()};
   llvm::transform(NameParts, CallDescParts.begin(),
   [](SmallString<32> ) { return S.c_str(); });
@@ -642,6 +643,7 @@
   llvm::Optional Config =
   getConfiguration(*Mgr, this, Option, ConfigFile);
   if (!Config) {
+// We don't have external taint config, no parsing required.
 DynamicTaintRules = RuleLookupTy{};
 return;
   }
@@ -746,8 +748,8 @@
   bool IsMatching = PropSrcArgs.isEmpty();
   ForEachCallArg(
   [this, , , ](ArgIdxTy I, const Expr *E, SVal) {
-IsMatching |=
-PropSrcArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C);
+IsMatching = IsMatching || (PropSrcArgs.contains(I) &&
+isTaintedOrPointsToTainted(E, State, C));
   });
 
   if (!IsMatching)
@@ -860,7 +862,7 @@
 
   SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc();
   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
-  // White list the internal communication protocols.
+  // Allow internal communication protocols.
   bool SafeProtocol = DomName.equals("AF_SYSTEM") ||
   DomName.equals("AF_LOCAL") || DomName.equals("AF_UNIX") 
||
   DomName.equals("AF_RESERVED_36");


Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===
--- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -52,7 +52,7 @@
 "Untrusted data is passed to a system call "
 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
 
-/// Check if tainted data is used as a buffer size ins strn.. functions,
+/// Check if tainted data is used as a buffer size in strn.. functions,
 /// and allocators.
 constexpr llvm::StringLiteral MsgTaintedBufferSize =
 "Untrusted data is used to specify the buffer size "
@@ -160,7 +160,8 @@
 public:
   ArgSet() = default;
   ArgSet(ArgVecTy &, Optional VariadicIndex = None)
-  : DiscreteArgs(DiscreteArgs), VariadicIndex(VariadicIndex) {}
+  : DiscreteArgs(std::move(DiscreteArgs)),
+VariadicIndex(std::move(VariadicIndex)) {}
 
   bool contains(ArgIdxTy ArgIdx) const {
 if (llvm::is_contained(DiscreteArgs, ArgIdx))
@@ -308,13 +309,13 @@
  

[PATCH] D116025: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap

2022-01-13 Thread Balázs Benics via Phabricator via cfe-commits
steakhal added a comment.

Sweet stuff!




Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:55
 
 /// Check if tainted data is used as a buffer size ins strn.. functions,
 /// and allocators.

typo



Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:163
+  ArgSet(ArgVecTy &, Optional VariadicIndex = None)
   : DiscreteArgs(DiscreteArgs), VariadicIndex(VariadicIndex) {}
 





Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:311
 private:
   using NamePartTy = llvm::SmallVector, 2>;
 

I would expect this in plural form.



Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:317
 
+  template  static NamePartTy parseNameParts(const Config );
+

Definitely reads oddly. The return type is singular but the function //parses 
name part**s**//.



Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:645
   if (!Config) {
-TaintRules = RuleLookupTy{};
+DynamicTaintRules = RuleLookupTy{};
 return;

A comment like this would make it cleaner:
`// We don't have external taint config, no parsing required.`



Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:749-750
   [this, , , ](ArgIdxTy I, const Expr *E, SVal) {
 IsMatching |=
 PropSrcArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C);
   });

It's unfortunate that we don't shortcircuit after the patch.



Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:863
   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
   // White list the internal communication protocols.
   bool SafeProtocol = DomName.equals("AF_SYSTEM") ||

Use inclusive terms.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116025/new/

https://reviews.llvm.org/D116025

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D116025: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap

2022-01-12 Thread Endre Fülöp via Phabricator via cfe-commits
gamesh411 updated this revision to Diff 399246.
gamesh411 added a comment.

Tidy things up thanks to the recommendations of @steakhal


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116025/new/

https://reviews.llvm.org/D116025

Files:
  clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h
  clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp

Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===
--- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -38,6 +38,8 @@
 
 namespace {
 
+class GenericTaintChecker;
+
 /// Check for CWE-134: Uncontrolled Format String.
 constexpr llvm::StringLiteral MsgUncontrolledFormatString =
 "Untrusted data is used as a format string "
@@ -62,23 +64,26 @@
 "Untrusted data is passed to a user-defined sink";
 
 using ArgIdxTy = int;
-using ArgVecTy = llvm::SmallVector;
+using ArgVecTy = llvm::SmallVector;
 
-constexpr int InvalidArgIndex{-2};
 /// Denotes the return value.
-constexpr int ReturnValueIndex{-1};
+constexpr ArgIdxTy ReturnValueIndex{-1};
+
+static ArgIdxTy fromArgumentCount(unsigned Count) {
+  assert(Count <=
+ static_cast(std::numeric_limits::max()) &&
+ "ArgIdxTy is not large enough to represent the number of arguments.");
+  return Count;
+}
 
 /// Check if the region the expression evaluates to is the standard input,
 /// and thus, is tainted.
-bool isStdin(const Expr *E, CheckerContext ) {
-  ProgramStateRef State = C.getState();
-  SVal Val = C.getSVal(E);
-
-  // stdin is a pointer, so it would be a region.
-  const MemRegion *MemReg = Val.getAsRegion();
+/// FIXME: Move this to Taint.cpp.
+bool isStdin(SVal Val, const ASTContext ) {
+  // FIXME: What if Val is NonParamVarRegion?
 
   // The region should be symbolic, we do not know it's value.
-  const auto *SymReg = dyn_cast_or_null(MemReg);
+  const auto *SymReg = dyn_cast_or_null(Val.getAsRegion());
   if (!SymReg)
 return false;
 
@@ -86,7 +91,7 @@
   const auto *Sm = dyn_cast(SymReg->getSymbol());
   if (!Sm)
 return false;
-  const auto *DeclReg = dyn_cast_or_null(Sm->getRegion());
+  const auto *DeclReg = dyn_cast(Sm->getRegion());
   if (!DeclReg)
 return false;
 
@@ -94,68 +99,58 @@
   // variable named stdin with the proper type.
   if (const auto *D = dyn_cast_or_null(DeclReg->getDecl())) {
 D = D->getCanonicalDecl();
+// FIXME: This should look for an exact match.
 if (D->getName().contains("stdin") && D->isExternC()) {
-  const auto *PtrTy = dyn_cast(D->getType().getTypePtr());
-  if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
-   C.getASTContext().getFILEType().getCanonicalType())
-return true;
+  const QualType FILETy = ACtx.getFILEType().getCanonicalType();
+  const QualType Ty = D->getType().getCanonicalType();
+
+  if (Ty->isPointerType())
+return Ty->getPointeeType() == FILETy;
 }
   }
   return false;
 }
 
-/// Given a pointer argument, return the value it points to.
-Optional getPointeeOf(CheckerContext , const Expr *Arg) {
-  ProgramStateRef State = C.getState();
-  SVal AddrVal = C.getSVal(Arg->IgnoreParens());
-  if (AddrVal.isUnknownOrUndef())
-return None;
-
-  Optional AddrLoc = AddrVal.getAs();
-  if (!AddrLoc)
-return None;
-
-  QualType ArgTy = Arg->getType().getCanonicalType();
-  if (!ArgTy->isPointerType())
-return State->getSVal(*AddrLoc);
-
-  QualType ValTy = ArgTy->getPointeeType();
+SVal getPointeeOf(const CheckerContext , Loc LValue) {
+  const QualType ArgTy = LValue.getType(C.getASTContext());
+  if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType())
+return C.getState()->getSVal(LValue);
 
   // Do not dereference void pointers. Treat them as byte pointers instead.
   // FIXME: we might want to consider more than just the first byte.
-  if (ValTy->isVoidType())
-ValTy = C.getASTContext().CharTy;
+  return C.getState()->getSVal(LValue, C.getASTContext().CharTy);
+}
 
-  return State->getSVal(*AddrLoc, ValTy);
+/// Given a pointer/reference argument, return the value it refers to.
+Optional getPointeeOf(const CheckerContext , SVal Arg) {
+  if (auto LValue = Arg.getAs())
+return getPointeeOf(C, *LValue);
+  return None;
 }
 
 /// Given a pointer, return the SVal of its pointee or if it is tainted,
 /// otherwise return the pointer's SVal if tainted.
-Optional getTaintedPointeeOrPointer(CheckerContext , const Expr *Arg) {
-  assert(Arg);
-  // Check for taint.
-  ProgramStateRef State = C.getState();
-  Optional PointedToSVal = getPointeeOf(C, Arg);
+/// Also considers stdin as a taint source.
+Optional getTaintedPointeeOrPointer(const CheckerContext , SVal Arg) {
+  const ProgramStateRef State = C.getState();
 
-  if (PointedToSVal && isTainted(State, *PointedToSVal))
-return 

[PATCH] D116025: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap

2022-01-05 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ accepted this revision.
NoQ added a comment.
This revision is now accepted and ready to land.

I haven't read this whole patch with full scrutiny but it sure looks lovely. I 
now also see what the problem is with non-static strings in call descriptions.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116025/new/

https://reviews.llvm.org/D116025

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D116025: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap

2021-12-20 Thread Endre Fülöp via Phabricator via cfe-commits
gamesh411 created this revision.
gamesh411 added reviewers: steakhal, martong, NoQ.
Herald added subscribers: manas, ASDenysPetrov, dkrupp, donat.nagy, Szelethus, 
mikhail.ramalho, a.sidorin, rnkovacs, szepet, baloghadamsoftware, xazax.hun.
Herald added a reviewer: Szelethus.
gamesh411 requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

GenericTaintChecker now uses CallDescriptionMap to describe the possible
operation in code which trigger the introduction (sources), the removal
(filters), the passing along (propagations) and detection (sinks) of
tainted values.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D116025

Files:
  clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp

Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===
--- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -22,15 +22,14 @@
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "llvm/Support/YAMLTraits.h"
 
-#include 
 #include 
 #include 
-#include 
 #include 
 
 using namespace clang;
@@ -38,577 +37,685 @@
 using namespace taint;
 
 namespace {
-class GenericTaintChecker : public Checker {
-public:
-  static void *getTag() {
-static int Tag;
-return 
+
+/// Check for CWE-134: Uncontrolled Format String.
+constexpr llvm::StringLiteral MsgUncontrolledFormatString =
+"Untrusted data is used as a format string "
+"(CWE-134: Uncontrolled Format String)";
+
+/// Check for:
+/// CERT/STR02-C. "Sanitize data passed to complex subsystems"
+/// CWE-78, "Failure to Sanitize Data into an OS Command"
+constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
+"Untrusted data is passed to a system call "
+"(CERT/STR02-C. Sanitize data passed to complex subsystems)";
+
+/// Check if tainted data is used as a buffer size ins strn.. functions,
+/// and allocators.
+constexpr llvm::StringLiteral MsgTaintedBufferSize =
+"Untrusted data is used to specify the buffer size "
+"(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
+"for character data and the null terminator)";
+
+/// Check if tainted data is used as a custom sink's parameter.
+constexpr llvm::StringLiteral MsgCustomSink =
+"Untrusted data is passed to a user-defined sink";
+
+using ArgIdxTy = int;
+using ArgVecTy = llvm::SmallVector;
+
+constexpr int InvalidArgIndex{-2};
+/// Denotes the return value.
+constexpr int ReturnValueIndex{-1};
+
+/// Check if the region the expression evaluates to is the standard input,
+/// and thus, is tainted.
+bool isStdin(const Expr *E, CheckerContext ) {
+  ProgramStateRef State = C.getState();
+  SVal Val = C.getSVal(E);
+
+  // stdin is a pointer, so it would be a region.
+  const MemRegion *MemReg = Val.getAsRegion();
+
+  // The region should be symbolic, we do not know it's value.
+  const auto *SymReg = dyn_cast_or_null(MemReg);
+  if (!SymReg)
+return false;
+
+  // Get it's symbol and find the declaration region it's pointing to.
+  const auto *Sm = dyn_cast(SymReg->getSymbol());
+  if (!Sm)
+return false;
+  const auto *DeclReg = dyn_cast_or_null(Sm->getRegion());
+  if (!DeclReg)
+return false;
+
+  // This region corresponds to a declaration, find out if it's a global/extern
+  // variable named stdin with the proper type.
+  if (const auto *D = dyn_cast_or_null(DeclReg->getDecl())) {
+D = D->getCanonicalDecl();
+if (D->getName().contains("stdin") && D->isExternC()) {
+  const auto *PtrTy = dyn_cast(D->getType().getTypePtr());
+  if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
+   C.getASTContext().getFILEType().getCanonicalType())
+return true;
+}
   }
+  return false;
+}
 
-  void checkPreCall(const CallEvent , CheckerContext ) const;
-  void checkPostCall(const CallEvent , CheckerContext ) const;
+/// Given a pointer argument, return the value it points to.
+Optional getPointeeOf(CheckerContext , const Expr *Arg) {
+  ProgramStateRef State = C.getState();
+  SVal AddrVal = C.getSVal(Arg->IgnoreParens());
+  if (AddrVal.isUnknownOrUndef())
+return None;
 
-  void printState(raw_ostream , ProgramStateRef State, const char *NL,
-  const char *Sep) const override;
+  Optional AddrLoc = AddrVal.getAs();
+  if (!AddrLoc)
+return None;
 
-  using ArgVector = SmallVector;
-  using SignedArgVector = SmallVector;
+  QualType ArgTy = Arg->getType().getCanonicalType();
+  if