[PATCH] D116025: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap
This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rG17f74240e6c3: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap (authored by gamesh411). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D116025/new/ https://reviews.llvm.org/D116025 Files: clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp === --- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -22,15 +22,14 @@ #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include "llvm/Support/YAMLTraits.h" -#include #include #include -#include #include using namespace clang; @@ -38,577 +37,651 @@ using namespace taint; namespace { -class GenericTaintChecker : public Checker { -public: - static void *getTag() { -static int Tag; -return + +class GenericTaintChecker; + +/// Check for CWE-134: Uncontrolled Format String. +constexpr llvm::StringLiteral MsgUncontrolledFormatString = +"Untrusted data is used as a format string " +"(CWE-134: Uncontrolled Format String)"; + +/// Check for: +/// CERT/STR02-C. "Sanitize data passed to complex subsystems" +/// CWE-78, "Failure to Sanitize Data into an OS Command" +constexpr llvm::StringLiteral MsgSanitizeSystemArgs = +"Untrusted data is passed to a system call " +"(CERT/STR02-C. Sanitize data passed to complex subsystems)"; + +/// Check if tainted data is used as a buffer size in strn.. functions, +/// and allocators. +constexpr llvm::StringLiteral MsgTaintedBufferSize = +"Untrusted data is used to specify the buffer size " +"(CERT/STR31-C. Guarantee that storage for strings has sufficient space " +"for character data and the null terminator)"; + +/// Check if tainted data is used as a custom sink's parameter. +constexpr llvm::StringLiteral MsgCustomSink = +"Untrusted data is passed to a user-defined sink"; + +using ArgIdxTy = int; +using ArgVecTy = llvm::SmallVector; + +/// Denotes the return value. +constexpr ArgIdxTy ReturnValueIndex{-1}; + +static ArgIdxTy fromArgumentCount(unsigned Count) { + assert(Count <= + static_cast(std::numeric_limits::max()) && + "ArgIdxTy is not large enough to represent the number of arguments."); + return Count; +} + +/// Check if the region the expression evaluates to is the standard input, +/// and thus, is tainted. +/// FIXME: Move this to Taint.cpp. +bool isStdin(SVal Val, const ASTContext ) { + // FIXME: What if Val is NonParamVarRegion? + + // The region should be symbolic, we do not know it's value. + const auto *SymReg = dyn_cast_or_null(Val.getAsRegion()); + if (!SymReg) +return false; + + // Get it's symbol and find the declaration region it's pointing to. + const auto *Sm = dyn_cast(SymReg->getSymbol()); + if (!Sm) +return false; + const auto *DeclReg = dyn_cast(Sm->getRegion()); + if (!DeclReg) +return false; + + // This region corresponds to a declaration, find out if it's a global/extern + // variable named stdin with the proper type. + if (const auto *D = dyn_cast_or_null(DeclReg->getDecl())) { +D = D->getCanonicalDecl(); +// FIXME: This should look for an exact match. +if (D->getName().contains("stdin") && D->isExternC()) { + const QualType FILETy = ACtx.getFILEType().getCanonicalType(); + const QualType Ty = D->getType().getCanonicalType(); + + if (Ty->isPointerType()) +return Ty->getPointeeType() == FILETy; +} } + return false; +} - void checkPreCall(const CallEvent , CheckerContext ) const; - void checkPostCall(const CallEvent , CheckerContext ) const; +SVal getPointeeOf(const CheckerContext , Loc LValue) { + const QualType ArgTy = LValue.getType(C.getASTContext()); + if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType()) +return C.getState()->getSVal(LValue); - void printState(raw_ostream , ProgramStateRef State, const char *NL, - const char *Sep) const override; + // Do not dereference void pointers. Treat them as byte pointers instead. + // FIXME: we might want to consider more than just the first byte. + return C.getState()->getSVal(LValue, C.getASTContext().CharTy); +} + +/// Given a pointer/reference argument, return the value it refers to.
[PATCH] D116025: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap
gamesh411 updated this revision to Diff 400790. gamesh411 added a comment. Remove explicit template keyword for MSVC compatibility Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D116025/new/ https://reviews.llvm.org/D116025 Files: clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp === --- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -22,15 +22,14 @@ #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include "llvm/Support/YAMLTraits.h" -#include #include #include -#include #include using namespace clang; @@ -38,577 +37,651 @@ using namespace taint; namespace { -class GenericTaintChecker : public Checker { -public: - static void *getTag() { -static int Tag; -return + +class GenericTaintChecker; + +/// Check for CWE-134: Uncontrolled Format String. +constexpr llvm::StringLiteral MsgUncontrolledFormatString = +"Untrusted data is used as a format string " +"(CWE-134: Uncontrolled Format String)"; + +/// Check for: +/// CERT/STR02-C. "Sanitize data passed to complex subsystems" +/// CWE-78, "Failure to Sanitize Data into an OS Command" +constexpr llvm::StringLiteral MsgSanitizeSystemArgs = +"Untrusted data is passed to a system call " +"(CERT/STR02-C. Sanitize data passed to complex subsystems)"; + +/// Check if tainted data is used as a buffer size in strn.. functions, +/// and allocators. +constexpr llvm::StringLiteral MsgTaintedBufferSize = +"Untrusted data is used to specify the buffer size " +"(CERT/STR31-C. Guarantee that storage for strings has sufficient space " +"for character data and the null terminator)"; + +/// Check if tainted data is used as a custom sink's parameter. +constexpr llvm::StringLiteral MsgCustomSink = +"Untrusted data is passed to a user-defined sink"; + +using ArgIdxTy = int; +using ArgVecTy = llvm::SmallVector; + +/// Denotes the return value. +constexpr ArgIdxTy ReturnValueIndex{-1}; + +static ArgIdxTy fromArgumentCount(unsigned Count) { + assert(Count <= + static_cast(std::numeric_limits::max()) && + "ArgIdxTy is not large enough to represent the number of arguments."); + return Count; +} + +/// Check if the region the expression evaluates to is the standard input, +/// and thus, is tainted. +/// FIXME: Move this to Taint.cpp. +bool isStdin(SVal Val, const ASTContext ) { + // FIXME: What if Val is NonParamVarRegion? + + // The region should be symbolic, we do not know it's value. + const auto *SymReg = dyn_cast_or_null(Val.getAsRegion()); + if (!SymReg) +return false; + + // Get it's symbol and find the declaration region it's pointing to. + const auto *Sm = dyn_cast(SymReg->getSymbol()); + if (!Sm) +return false; + const auto *DeclReg = dyn_cast(Sm->getRegion()); + if (!DeclReg) +return false; + + // This region corresponds to a declaration, find out if it's a global/extern + // variable named stdin with the proper type. + if (const auto *D = dyn_cast_or_null(DeclReg->getDecl())) { +D = D->getCanonicalDecl(); +// FIXME: This should look for an exact match. +if (D->getName().contains("stdin") && D->isExternC()) { + const QualType FILETy = ACtx.getFILEType().getCanonicalType(); + const QualType Ty = D->getType().getCanonicalType(); + + if (Ty->isPointerType()) +return Ty->getPointeeType() == FILETy; +} } + return false; +} - void checkPreCall(const CallEvent , CheckerContext ) const; - void checkPostCall(const CallEvent , CheckerContext ) const; +SVal getPointeeOf(const CheckerContext , Loc LValue) { + const QualType ArgTy = LValue.getType(C.getASTContext()); + if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType()) +return C.getState()->getSVal(LValue); - void printState(raw_ostream , ProgramStateRef State, const char *NL, - const char *Sep) const override; + // Do not dereference void pointers. Treat them as byte pointers instead. + // FIXME: we might want to consider more than just the first byte. + return C.getState()->getSVal(LValue, C.getASTContext().CharTy); +} + +/// Given a pointer/reference argument, return the value it refers to. +Optional getPointeeOf(const CheckerContext , SVal Arg) { + if (auto LValue = Arg.getAs()) +return getPointeeOf(C, *LValue); +
[PATCH] D116025: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap
steakhal accepted this revision. steakhal added a comment. LGTM Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D116025/new/ https://reviews.llvm.org/D116025 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D116025: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap
gamesh411 updated this revision to Diff 400498. gamesh411 added a comment. All commits were exluded in the previous patch upload Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D116025/new/ https://reviews.llvm.org/D116025 Files: clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp === --- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -22,15 +22,14 @@ #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include "llvm/Support/YAMLTraits.h" -#include #include #include -#include #include using namespace clang; @@ -38,577 +37,651 @@ using namespace taint; namespace { -class GenericTaintChecker : public Checker { -public: - static void *getTag() { -static int Tag; -return + +class GenericTaintChecker; + +/// Check for CWE-134: Uncontrolled Format String. +constexpr llvm::StringLiteral MsgUncontrolledFormatString = +"Untrusted data is used as a format string " +"(CWE-134: Uncontrolled Format String)"; + +/// Check for: +/// CERT/STR02-C. "Sanitize data passed to complex subsystems" +/// CWE-78, "Failure to Sanitize Data into an OS Command" +constexpr llvm::StringLiteral MsgSanitizeSystemArgs = +"Untrusted data is passed to a system call " +"(CERT/STR02-C. Sanitize data passed to complex subsystems)"; + +/// Check if tainted data is used as a buffer size in strn.. functions, +/// and allocators. +constexpr llvm::StringLiteral MsgTaintedBufferSize = +"Untrusted data is used to specify the buffer size " +"(CERT/STR31-C. Guarantee that storage for strings has sufficient space " +"for character data and the null terminator)"; + +/// Check if tainted data is used as a custom sink's parameter. +constexpr llvm::StringLiteral MsgCustomSink = +"Untrusted data is passed to a user-defined sink"; + +using ArgIdxTy = int; +using ArgVecTy = llvm::SmallVector; + +/// Denotes the return value. +constexpr ArgIdxTy ReturnValueIndex{-1}; + +static ArgIdxTy fromArgumentCount(unsigned Count) { + assert(Count <= + static_cast(std::numeric_limits::max()) && + "ArgIdxTy is not large enough to represent the number of arguments."); + return Count; +} + +/// Check if the region the expression evaluates to is the standard input, +/// and thus, is tainted. +/// FIXME: Move this to Taint.cpp. +bool isStdin(SVal Val, const ASTContext ) { + // FIXME: What if Val is NonParamVarRegion? + + // The region should be symbolic, we do not know it's value. + const auto *SymReg = dyn_cast_or_null(Val.getAsRegion()); + if (!SymReg) +return false; + + // Get it's symbol and find the declaration region it's pointing to. + const auto *Sm = dyn_cast(SymReg->getSymbol()); + if (!Sm) +return false; + const auto *DeclReg = dyn_cast(Sm->getRegion()); + if (!DeclReg) +return false; + + // This region corresponds to a declaration, find out if it's a global/extern + // variable named stdin with the proper type. + if (const auto *D = dyn_cast_or_null(DeclReg->getDecl())) { +D = D->getCanonicalDecl(); +// FIXME: This should look for an exact match. +if (D->getName().contains("stdin") && D->isExternC()) { + const QualType FILETy = ACtx.getFILEType().getCanonicalType(); + const QualType Ty = D->getType().getCanonicalType(); + + if (Ty->isPointerType()) +return Ty->getPointeeType() == FILETy; +} } + return false; +} - void checkPreCall(const CallEvent , CheckerContext ) const; - void checkPostCall(const CallEvent , CheckerContext ) const; +SVal getPointeeOf(const CheckerContext , Loc LValue) { + const QualType ArgTy = LValue.getType(C.getASTContext()); + if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType()) +return C.getState()->getSVal(LValue); - void printState(raw_ostream , ProgramStateRef State, const char *NL, - const char *Sep) const override; + // Do not dereference void pointers. Treat them as byte pointers instead. + // FIXME: we might want to consider more than just the first byte. + return C.getState()->getSVal(LValue, C.getASTContext().CharTy); +} + +/// Given a pointer/reference argument, return the value it refers to. +Optional getPointeeOf(const CheckerContext , SVal Arg) { + if (auto LValue = Arg.getAs()) +return getPointeeOf(C, *LValue); +
[PATCH] D116025: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap
gamesh411 added a comment. Applied typo and naming fixes, introduced 2 move operations, and re-introduced short circuiting. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D116025/new/ https://reviews.llvm.org/D116025 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D116025: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap
gamesh411 updated this revision to Diff 400464. gamesh411 marked 7 inline comments as done. gamesh411 added a comment. Fixes round two Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D116025/new/ https://reviews.llvm.org/D116025 Files: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp === --- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -52,7 +52,7 @@ "Untrusted data is passed to a system call " "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; -/// Check if tainted data is used as a buffer size ins strn.. functions, +/// Check if tainted data is used as a buffer size in strn.. functions, /// and allocators. constexpr llvm::StringLiteral MsgTaintedBufferSize = "Untrusted data is used to specify the buffer size " @@ -160,7 +160,8 @@ public: ArgSet() = default; ArgSet(ArgVecTy &, Optional VariadicIndex = None) - : DiscreteArgs(DiscreteArgs), VariadicIndex(VariadicIndex) {} + : DiscreteArgs(std::move(DiscreteArgs)), +VariadicIndex(std::move(VariadicIndex)) {} bool contains(ArgIdxTy ArgIdx) const { if (llvm::is_contained(DiscreteArgs, ArgIdx)) @@ -308,13 +309,13 @@ TaintConfiguration &) const; private: - using NamePartTy = llvm::SmallVector, 2>; + using NamePartsTy = llvm::SmallVector, 2>; /// Validate part of the configuration, which contains a list of argument /// indexes. void validateArgVector(const std::string , const ArgVecTy ) const; - template static NamePartTy parseNameParts(const Config ); + template static NamePartsTy parseNameParts(const Config ); // Takes the config and creates a CallDescription for it and associates a Rule // with that. @@ -445,9 +446,9 @@ } template -GenericTaintRuleParser::NamePartTy +GenericTaintRuleParser::NamePartsTy GenericTaintRuleParser::parseNameParts(const Config ) { - NamePartTy NameParts; + NamePartsTy NameParts; if (!C.Scope.empty()) { // If the Scope argument contains multiple "::" parts, those are considered // namespace identifiers. @@ -464,7 +465,7 @@ void GenericTaintRuleParser::consumeRulesFromConfig(const Config , GenericTaintRule &, RulesContTy ) { - NamePartTy NameParts = parseNameParts(C); + NamePartsTy NameParts = parseNameParts(C); llvm::SmallVector CallDescParts{NameParts.size()}; llvm::transform(NameParts, CallDescParts.begin(), [](SmallString<32> ) { return S.c_str(); }); @@ -642,6 +643,7 @@ llvm::Optional Config = getConfiguration(*Mgr, this, Option, ConfigFile); if (!Config) { +// We don't have external taint config, no parsing required. DynamicTaintRules = RuleLookupTy{}; return; } @@ -746,8 +748,8 @@ bool IsMatching = PropSrcArgs.isEmpty(); ForEachCallArg( [this, , , ](ArgIdxTy I, const Expr *E, SVal) { -IsMatching |= -PropSrcArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C); +IsMatching = IsMatching || (PropSrcArgs.contains(I) && +isTaintedOrPointsToTainted(E, State, C)); }); if (!IsMatching) @@ -860,7 +862,7 @@ SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc(); StringRef DomName = C.getMacroNameOrSpelling(DomLoc); - // White list the internal communication protocols. + // Allow internal communication protocols. bool SafeProtocol = DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"); Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp === --- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -52,7 +52,7 @@ "Untrusted data is passed to a system call " "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; -/// Check if tainted data is used as a buffer size ins strn.. functions, +/// Check if tainted data is used as a buffer size in strn.. functions, /// and allocators. constexpr llvm::StringLiteral MsgTaintedBufferSize = "Untrusted data is used to specify the buffer size " @@ -160,7 +160,8 @@ public: ArgSet() = default; ArgSet(ArgVecTy &, Optional VariadicIndex = None) - : DiscreteArgs(DiscreteArgs), VariadicIndex(VariadicIndex) {} + : DiscreteArgs(std::move(DiscreteArgs)), +VariadicIndex(std::move(VariadicIndex)) {} bool contains(ArgIdxTy ArgIdx) const { if (llvm::is_contained(DiscreteArgs, ArgIdx)) @@ -308,13 +309,13 @@
[PATCH] D116025: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap
steakhal added a comment. Sweet stuff! Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:55 /// Check if tainted data is used as a buffer size ins strn.. functions, /// and allocators. typo Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:163 + ArgSet(ArgVecTy &, Optional VariadicIndex = None) : DiscreteArgs(DiscreteArgs), VariadicIndex(VariadicIndex) {} Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:311 private: using NamePartTy = llvm::SmallVector, 2>; I would expect this in plural form. Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:317 + template static NamePartTy parseNameParts(const Config ); + Definitely reads oddly. The return type is singular but the function //parses name part**s**//. Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:645 if (!Config) { -TaintRules = RuleLookupTy{}; +DynamicTaintRules = RuleLookupTy{}; return; A comment like this would make it cleaner: `// We don't have external taint config, no parsing required.` Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:749-750 [this, , , ](ArgIdxTy I, const Expr *E, SVal) { IsMatching |= PropSrcArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C); }); It's unfortunate that we don't shortcircuit after the patch. Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:863 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); // White list the internal communication protocols. bool SafeProtocol = DomName.equals("AF_SYSTEM") || Use inclusive terms. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D116025/new/ https://reviews.llvm.org/D116025 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D116025: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap
gamesh411 updated this revision to Diff 399246. gamesh411 added a comment. Tidy things up thanks to the recommendations of @steakhal Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D116025/new/ https://reviews.llvm.org/D116025 Files: clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp === --- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -38,6 +38,8 @@ namespace { +class GenericTaintChecker; + /// Check for CWE-134: Uncontrolled Format String. constexpr llvm::StringLiteral MsgUncontrolledFormatString = "Untrusted data is used as a format string " @@ -62,23 +64,26 @@ "Untrusted data is passed to a user-defined sink"; using ArgIdxTy = int; -using ArgVecTy = llvm::SmallVector; +using ArgVecTy = llvm::SmallVector; -constexpr int InvalidArgIndex{-2}; /// Denotes the return value. -constexpr int ReturnValueIndex{-1}; +constexpr ArgIdxTy ReturnValueIndex{-1}; + +static ArgIdxTy fromArgumentCount(unsigned Count) { + assert(Count <= + static_cast(std::numeric_limits::max()) && + "ArgIdxTy is not large enough to represent the number of arguments."); + return Count; +} /// Check if the region the expression evaluates to is the standard input, /// and thus, is tainted. -bool isStdin(const Expr *E, CheckerContext ) { - ProgramStateRef State = C.getState(); - SVal Val = C.getSVal(E); - - // stdin is a pointer, so it would be a region. - const MemRegion *MemReg = Val.getAsRegion(); +/// FIXME: Move this to Taint.cpp. +bool isStdin(SVal Val, const ASTContext ) { + // FIXME: What if Val is NonParamVarRegion? // The region should be symbolic, we do not know it's value. - const auto *SymReg = dyn_cast_or_null(MemReg); + const auto *SymReg = dyn_cast_or_null(Val.getAsRegion()); if (!SymReg) return false; @@ -86,7 +91,7 @@ const auto *Sm = dyn_cast(SymReg->getSymbol()); if (!Sm) return false; - const auto *DeclReg = dyn_cast_or_null(Sm->getRegion()); + const auto *DeclReg = dyn_cast(Sm->getRegion()); if (!DeclReg) return false; @@ -94,68 +99,58 @@ // variable named stdin with the proper type. if (const auto *D = dyn_cast_or_null(DeclReg->getDecl())) { D = D->getCanonicalDecl(); +// FIXME: This should look for an exact match. if (D->getName().contains("stdin") && D->isExternC()) { - const auto *PtrTy = dyn_cast(D->getType().getTypePtr()); - if (PtrTy && PtrTy->getPointeeType().getCanonicalType() == - C.getASTContext().getFILEType().getCanonicalType()) -return true; + const QualType FILETy = ACtx.getFILEType().getCanonicalType(); + const QualType Ty = D->getType().getCanonicalType(); + + if (Ty->isPointerType()) +return Ty->getPointeeType() == FILETy; } } return false; } -/// Given a pointer argument, return the value it points to. -Optional getPointeeOf(CheckerContext , const Expr *Arg) { - ProgramStateRef State = C.getState(); - SVal AddrVal = C.getSVal(Arg->IgnoreParens()); - if (AddrVal.isUnknownOrUndef()) -return None; - - Optional AddrLoc = AddrVal.getAs(); - if (!AddrLoc) -return None; - - QualType ArgTy = Arg->getType().getCanonicalType(); - if (!ArgTy->isPointerType()) -return State->getSVal(*AddrLoc); - - QualType ValTy = ArgTy->getPointeeType(); +SVal getPointeeOf(const CheckerContext , Loc LValue) { + const QualType ArgTy = LValue.getType(C.getASTContext()); + if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType()) +return C.getState()->getSVal(LValue); // Do not dereference void pointers. Treat them as byte pointers instead. // FIXME: we might want to consider more than just the first byte. - if (ValTy->isVoidType()) -ValTy = C.getASTContext().CharTy; + return C.getState()->getSVal(LValue, C.getASTContext().CharTy); +} - return State->getSVal(*AddrLoc, ValTy); +/// Given a pointer/reference argument, return the value it refers to. +Optional getPointeeOf(const CheckerContext , SVal Arg) { + if (auto LValue = Arg.getAs()) +return getPointeeOf(C, *LValue); + return None; } /// Given a pointer, return the SVal of its pointee or if it is tainted, /// otherwise return the pointer's SVal if tainted. -Optional getTaintedPointeeOrPointer(CheckerContext , const Expr *Arg) { - assert(Arg); - // Check for taint. - ProgramStateRef State = C.getState(); - Optional PointedToSVal = getPointeeOf(C, Arg); +/// Also considers stdin as a taint source. +Optional getTaintedPointeeOrPointer(const CheckerContext , SVal Arg) { + const ProgramStateRef State = C.getState(); - if (PointedToSVal && isTainted(State, *PointedToSVal)) -return
[PATCH] D116025: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap
NoQ accepted this revision. NoQ added a comment. This revision is now accepted and ready to land. I haven't read this whole patch with full scrutiny but it sure looks lovely. I now also see what the problem is with non-static strings in call descriptions. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D116025/new/ https://reviews.llvm.org/D116025 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D116025: [analyzer][NFC] Refactor GenericTaintChecker to use CallDescriptionMap
gamesh411 created this revision. gamesh411 added reviewers: steakhal, martong, NoQ. Herald added subscribers: manas, ASDenysPetrov, dkrupp, donat.nagy, Szelethus, mikhail.ramalho, a.sidorin, rnkovacs, szepet, baloghadamsoftware, xazax.hun. Herald added a reviewer: Szelethus. gamesh411 requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits. GenericTaintChecker now uses CallDescriptionMap to describe the possible operation in code which trigger the introduction (sources), the removal (filters), the passing along (propagations) and detection (sinks) of tainted values. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D116025 Files: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp === --- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -22,15 +22,14 @@ #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include "llvm/Support/YAMLTraits.h" -#include #include #include -#include #include using namespace clang; @@ -38,577 +37,685 @@ using namespace taint; namespace { -class GenericTaintChecker : public Checker { -public: - static void *getTag() { -static int Tag; -return + +/// Check for CWE-134: Uncontrolled Format String. +constexpr llvm::StringLiteral MsgUncontrolledFormatString = +"Untrusted data is used as a format string " +"(CWE-134: Uncontrolled Format String)"; + +/// Check for: +/// CERT/STR02-C. "Sanitize data passed to complex subsystems" +/// CWE-78, "Failure to Sanitize Data into an OS Command" +constexpr llvm::StringLiteral MsgSanitizeSystemArgs = +"Untrusted data is passed to a system call " +"(CERT/STR02-C. Sanitize data passed to complex subsystems)"; + +/// Check if tainted data is used as a buffer size ins strn.. functions, +/// and allocators. +constexpr llvm::StringLiteral MsgTaintedBufferSize = +"Untrusted data is used to specify the buffer size " +"(CERT/STR31-C. Guarantee that storage for strings has sufficient space " +"for character data and the null terminator)"; + +/// Check if tainted data is used as a custom sink's parameter. +constexpr llvm::StringLiteral MsgCustomSink = +"Untrusted data is passed to a user-defined sink"; + +using ArgIdxTy = int; +using ArgVecTy = llvm::SmallVector; + +constexpr int InvalidArgIndex{-2}; +/// Denotes the return value. +constexpr int ReturnValueIndex{-1}; + +/// Check if the region the expression evaluates to is the standard input, +/// and thus, is tainted. +bool isStdin(const Expr *E, CheckerContext ) { + ProgramStateRef State = C.getState(); + SVal Val = C.getSVal(E); + + // stdin is a pointer, so it would be a region. + const MemRegion *MemReg = Val.getAsRegion(); + + // The region should be symbolic, we do not know it's value. + const auto *SymReg = dyn_cast_or_null(MemReg); + if (!SymReg) +return false; + + // Get it's symbol and find the declaration region it's pointing to. + const auto *Sm = dyn_cast(SymReg->getSymbol()); + if (!Sm) +return false; + const auto *DeclReg = dyn_cast_or_null(Sm->getRegion()); + if (!DeclReg) +return false; + + // This region corresponds to a declaration, find out if it's a global/extern + // variable named stdin with the proper type. + if (const auto *D = dyn_cast_or_null(DeclReg->getDecl())) { +D = D->getCanonicalDecl(); +if (D->getName().contains("stdin") && D->isExternC()) { + const auto *PtrTy = dyn_cast(D->getType().getTypePtr()); + if (PtrTy && PtrTy->getPointeeType().getCanonicalType() == + C.getASTContext().getFILEType().getCanonicalType()) +return true; +} } + return false; +} - void checkPreCall(const CallEvent , CheckerContext ) const; - void checkPostCall(const CallEvent , CheckerContext ) const; +/// Given a pointer argument, return the value it points to. +Optional getPointeeOf(CheckerContext , const Expr *Arg) { + ProgramStateRef State = C.getState(); + SVal AddrVal = C.getSVal(Arg->IgnoreParens()); + if (AddrVal.isUnknownOrUndef()) +return None; - void printState(raw_ostream , ProgramStateRef State, const char *NL, - const char *Sep) const override; + Optional AddrLoc = AddrVal.getAs(); + if (!AddrLoc) +return None; - using ArgVector = SmallVector; - using SignedArgVector = SmallVector; + QualType ArgTy = Arg->getType().getCanonicalType(); + if