llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Daniel Krupp (dkrupp) <details> <summary>Changes</summary> --- Full diff: https://github.com/llvm/llvm-project/pull/112212.diff 8 Files Affected: - (modified) clang/include/clang/StaticAnalyzer/Checkers/Checkers.td (+8-1) - (modified) clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def (+10) - (modified) clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h (+3) - (modified) clang/include/clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h (+8) - (modified) clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp (+64) - (modified) clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp (+25-1) - (modified) clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp (+79) - (added) clang/lib/StaticAnalyzer/taint_focused/taintcalls.c (+25) ``````````diff diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index 7da0d0a87e8c0c..309d1bab74d7f5 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -1048,7 +1048,6 @@ def ReturnPointerRangeChecker : Checker<"ReturnPtrRange">, } // end "alpha.security" - //===----------------------------------------------------------------------===// // Mac OS X, Cocoa, and Core Foundation checkers. //===----------------------------------------------------------------------===// @@ -1686,7 +1685,15 @@ def TaintPropagationChecker : Checker<"TaintPropagation">, // Modelling checker "Config", "Specifies the name of the configuration file.", "", + Released>, + CmdLineOption<Boolean, + "AggressiveTaintPropagation", + "If set to true and if a called function cannot be inlined, " + "taintedness will be propagated to the return value and to non-const " + "reference parameters", + "false", Released> + ]>, Documentation<NotDocumented>, Hidden; diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def index 737bc8e86cfb6a..bab7491883396a 100644 --- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def +++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def @@ -412,6 +412,16 @@ ANALYZER_OPTION(unsigned, MaxSymbolComplexity, "max-symbol-complexity", ANALYZER_OPTION(unsigned, MaxTaintedSymbolComplexity, "max-tainted-symbol-complexity", "[DEPRECATED] The maximum complexity of a symbol to carry taint", 9) +ANALYZER_OPTION(bool, AnalyzerInlineTaintOnly, "analyzer-inline-taint-only", + "Only inline on a call-path to a taint source or sink", false) + +ANALYZER_OPTION(bool, AnalyzerFocusedTaint, "analyzer-focused-taint", + "Only inline top-level functions on a call-path to a taint source or sink", false) + +ANALYZER_OPTION(bool, AnalyzerAlwaysInlineTainted, "analyzer-always-inline-tainted", + "Always inline taint related functions", false) + + ANALYZER_OPTION(unsigned, MaxTimesInlineLarge, "max-times-inline-large", "The maximum times a large function could be inlined.", 32) diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h index 3a3c1a13d67dd5..0b979ecd94c9ea 100644 --- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h +++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h @@ -293,6 +293,9 @@ class AnalyzerOptions : public RefCountedBase<AnalyzerOptions> { ShowConfigOptionsList(false), ShouldEmitErrorsOnInvalidConfigValue(false), AnalyzeAll(false), AnalyzerDisplayProgress(false), AnalyzerNoteAnalysisEntryPoints(false), + AnalyzerFocusedTaint(false), + AnalyzerInlineTaintOnly(false), + AnalyzerAlwaysInlineTainted(false), eagerlyAssumeBinOpBifurcation(false), TrimGraph(false), visualizeExplodedGraphWithGraphViz(false), UnoptimizedCFG(false), PrintStats(false), NoRetryExhausted(false), AnalyzerWerror(false) {} diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h index c76e9c0326afe7..27d23714c1ea68 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h @@ -42,6 +42,7 @@ class AnalysisManager : public BugReporterData { ConstraintManagerCreator CreateConstraintMgr; CheckerManager *CheckerMgr; + std::set<FunctionDecl*> TaintRelatedFunctions; public: AnalyzerOptions &options; @@ -59,6 +60,13 @@ class AnalysisManager : public BugReporterData { AnaCtxMgr.clear(); } + void setTaintRelatedFunctions(std::set<FunctionDecl*> TaintedFunctions){ + TaintRelatedFunctions = TaintedFunctions; + } + std::set<FunctionDecl*> getTaintRelatedFunctions(){ + return TaintRelatedFunctions; + } + AnalysisDeclContextManager& getAnalysisDeclContextManager() { return AnaCtxMgr; } diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp index b89a6e2588c987..85b5796cc16701 100644 --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -16,6 +16,7 @@ #include "Yaml.h" #include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" #include "clang/Basic/Builtins.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Checkers/Taint.h" @@ -26,6 +27,7 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "llvm/ADT/ImmutableSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/YAMLTraits.h" @@ -77,6 +79,13 @@ static ArgIdxTy fromArgumentCount(unsigned Count) { return Count; } +/// Handles the resolution of indexes of type ArgIdxTy to Expr*-s. +static const Expr *GetArgExpr(ArgIdxTy ArgIdx, const CallEvent &Call) { + return ArgIdx == ReturnValueIndex ? Call.getOriginExpr() + : Call.getArgExpr(ArgIdx); + }; + + /// Check if the region the expression evaluates to is the standard input, /// and thus, is tainted. /// FIXME: Move this to Taint.cpp. @@ -393,6 +402,7 @@ class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> { CheckerContext &C) const; bool isTaintReporterCheckerEnabled = false; + bool AggressiveTaintPropagation = false; std::optional<BugType> BT; private: @@ -854,6 +864,8 @@ void GenericTaintChecker::checkPreCall(const CallEvent &Call, void GenericTaintChecker::checkPostCall(const CallEvent &Call, CheckerContext &C) const { + + // Set the marked values as tainted. The return value only accessible from // checkPostStmt. ProgramStateRef State = C.getState(); @@ -864,7 +876,55 @@ void GenericTaintChecker::checkPostCall(const CallEvent &Call, // stored in the state as TaintArgsOnPostVisit set. TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>(); + + const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs()); + + const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(CurrentFrame); + auto &F = State->getStateManager().get_context<ArgIdxFactory>(); + ImmutableSet<ArgIdxTy> ApproxTaintedArgs = F.add(F.getEmptySet(), ReturnValueIndex); + + if (!C.wasInlined && !TaintArgs) { + llvm::errs() << "PostCall<"; + Call.dump(llvm::errs()); + llvm::errs() << "Was not inlined.\n"; + /// Check for taint sinks. + ProgramStateRef State = C.getState(); + bool HasTaintedParam = false; + for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) { + const Expr *E = GetArgExpr(I, Call); + if (!E) + continue; + HasTaintedParam = + HasTaintedParam || isTaintedOrPointsToTainted(State, C.getSVal(E)); + } + llvm::errs() << "\nHasTaintedParam:" << HasTaintedParam << "\n"; + if (HasTaintedParam && !TaintArgs && AggressiveTaintPropagation) { + llvm::errs() << "Making return value and writable params tainted.\n"; + llvm::errs() << "Number of params:" << CallNumArgs; + + for (ArgIdxTy I = ReturnValueIndex+1; I < CallNumArgs; ++I) { + const Expr *E = GetArgExpr(I, Call); + if (!E) + continue; + SVal ArgSVal = C.getSVal(E); + //checking if the argument is a non-const pointer + auto LValue = ArgSVal.getAs<Loc>(); + if (!LValue) + continue; + const QualType ArgTy = LValue->getType(State->getStateManager().getContext()); + if (ArgTy->isPointerType() && !ArgTy.isConstQualified()){ + llvm::errs() << "Param "<< I; + llvm::errs() << "is a non const qualified pointer, so tainting it\n"; + ArgTy->dump(); + llvm::errs()<<"\n"; + ApproxTaintedArgs = F.add(ApproxTaintedArgs, I); + } + } + TaintArgs = &ApproxTaintedArgs; + } + } + if (!TaintArgs) return; assert(!TaintArgs->isEmpty()); @@ -1129,6 +1189,10 @@ void GenericTaintChecker::taintUnsafeSocketProtocol(const CallEvent &Call, /// Checker registration void ento::registerTaintPropagationChecker(CheckerManager &Mgr) { Mgr.registerChecker<GenericTaintChecker>(); + GenericTaintChecker *checker = Mgr.getChecker<GenericTaintChecker>(); + checker->AggressiveTaintPropagation = + Mgr.getAnalyzerOptions().getCheckerBooleanOption(checker, + "AggressiveTaintPropagation"); } bool ento::shouldRegisterTaintPropagationChecker(const CheckerManager &mgr) { diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp index 2ca24d0c5ab22b..ce43548b2c66a2 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp @@ -1078,6 +1078,27 @@ bool ExprEngine::shouldInlineCall(const CallEvent &Call, const Decl *D, AnalysisDeclContextManager &ADCMgr = AMgr.getAnalysisDeclContextManager(); AnalysisDeclContext *CalleeADC = ADCMgr.getContext(D); + bool TaintRelatedFun=false; + + if (Opts.AnalyzerInlineTaintOnly) { + std::set<FunctionDecl*> TaintedFunctions = AMgr.getTaintRelatedFunctions(); + // if the function is not taint related skip it. + auto *FD = dyn_cast<FunctionDecl>(const_cast<Decl*>(D)); + if (TaintedFunctions.find(FD) == TaintedFunctions.end()) { + llvm::errs() + << "Skipping inlining of not taint related function from the analysis:\n"; + llvm::errs() << FD->getNameInfo().getAsString() << "\n"; + return false; + } else { + TaintRelatedFun=true; + llvm::errs() + << "tyring to inline taint related function:\n"; + llvm::errs() << FD->getNameInfo().getAsString() << "\n"; + //leave the other budget limits to kick in + //otherwise the analysis may hang + //return true; + } + } // The auto-synthesized bodies are essential to inline as they are // usually small and commonly used. Note: we should do this check early on to // ensure we always inline these calls. @@ -1118,14 +1139,17 @@ bool ExprEngine::shouldInlineCall(const CallEvent &Call, const Decl *D, } // Do not inline if recursive or we've reached max stack frame count. + // We skip this bound if the function is taint related bool IsRecursive = false; unsigned StackDepth = 0; examineStackFrames(D, Pred->getLocationContext(), IsRecursive, StackDepth); - if ((StackDepth >= Opts.InlineMaxStackDepth) && + if (!(Opts.AnalyzerAlwaysInlineTainted && TaintRelatedFun && (StackDepth < 3*Opts.InlineMaxStackDepth)) + && (StackDepth >= Opts.InlineMaxStackDepth) && (!isSmall(CalleeADC) || IsRecursive)) return false; // Do not inline large functions too many times. + // We skip this bound if the function is taint related if ((Engine.FunctionSummaries->getNumTimesInlined(D) > Opts.MaxTimesInlineLarge) && isLarge(CalleeADC)) { diff --git a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp index 03bc40804d7328..2af0654e0a5cb6 100644 --- a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp +++ b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp @@ -33,6 +33,8 @@ #include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h" #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" +#include <llvm/ADT/DepthFirstIterator.h> +#include "clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/Statistic.h" @@ -243,6 +245,11 @@ class AnalysisConsumer : public AnalysisASTConsumer, ExprEngine::InliningModes getInliningModeForFunction(const Decl *D, const SetOfConstDecls &Visited); + + std::set<FunctionDecl*> getDeclsForTaintAnalysis(CallGraph &CG); + bool isTaintRelatedFunction(const FunctionDecl* FD); + + /// Build the call graph for all the top level decls of this TU and /// use it to define the order in which the functions should be visited. void HandleDeclsCallGraph(const unsigned LocalTUDeclsSize); @@ -435,16 +442,76 @@ AnalysisConsumer::getInliningModeForFunction(const Decl *D, return ExprEngine::Inline_Regular; } +bool AnalysisConsumer::isTaintRelatedFunction(const FunctionDecl* FD){ + std::set<std::string> sources = {"scanf","gets","getch","read","fopen","fdopen","freopen","getchar", + "gets_s","scanf_s","getcwd","readlink","gethostname","getnameinfo","readlinkat","get_current_dir_name", + "getseuserbyname","getgroups","getlogin","getlogin_r","popen","getenv"}; + std::set<std::string> sinks = {"system", "execv","popen","malloc","calloc","memcpy","strcpy","strncpy"}; + if (!FD || !FD->getCanonicalDecl()) + return false; + std::string FN = FD->getCanonicalDecl()->getNameAsString(); + //llvm::errs()<<"Inspecting function if tainted " << FN << "\n"; + return (sources.count(FN) || sinks.count(FN)); +} + +// returns functions declarations required for taint analysis +std::set<FunctionDecl*> AnalysisConsumer::getDeclsForTaintAnalysis(CallGraph &CG) { + std::set<FunctionDecl*> TaintedFunctions; + for (auto N = llvm::df_begin(&CG), EI = llvm::df_end(&CG); N != EI; N++) { + Decl *D = N->getDecl(); + // Skip the abstract root node. + if (!D) + continue; + + auto *FD = dyn_cast<FunctionDecl>(D); + if (!FD) + continue; + if (FD->getDefinition()) { + llvm::errs() << "Visiting function: " << FD->getNameInfo().getAsString() << "\n"; + } + + for (auto Callee : N->callees()){ + FunctionDecl *CFD = dyn_cast<FunctionDecl>(Callee.Callee->getDecl()); + if (!CFD) + continue; + if (isTaintRelatedFunction(CFD)) + TaintedFunctions.insert(FD); + if (TaintedFunctions.find(CFD)!=TaintedFunctions.end())//if child is tainted, the parent is also tainted + TaintedFunctions.insert(FD); + } + + + if (isTaintRelatedFunction(FD)){ + llvm::errs()<<"Called Function "<<FD->getCanonicalDecl()->getNameAsString()<<" is tainted\n"; + TaintedFunctions.insert(FD); + } else + llvm::errs()<<"Called Function "<<FD->getCanonicalDecl()->getNameAsString()<<" is NOT tainted\n"; + + } + return TaintedFunctions; +} + void AnalysisConsumer::HandleDeclsCallGraph(const unsigned LocalTUDeclsSize) { // Build the Call Graph by adding all the top level declarations to the graph. // Note: CallGraph can trigger deserialization of more items from a pch // (though HandleInterestingDecl); triggering additions to LocalTUDecls. // We rely on random access to add the initially processed Decls to CG. + CallGraph CG; for (unsigned i = 0 ; i < LocalTUDeclsSize ; ++i) { CG.addToCallGraph(LocalTUDecls[i]); } + std::set<FunctionDecl*> TaintedFunctions; + if (Opts.AnalyzerFocusedTaint||Opts.AnalyzerInlineTaintOnly){ + TaintedFunctions = getDeclsForTaintAnalysis(CG); + Mgr->setTaintRelatedFunctions(TaintedFunctions); + llvm::errs()<<"Tainted functions:\n"; + for (FunctionDecl* FD:TaintedFunctions){ + llvm::errs()<<FD->getNameInfo().getAsString() << "\n"; + } + } + // Walk over all of the call graph nodes in topological order, so that we // analyze parents before the children. Skip the functions inlined into // the previously processed functions. Use external Visited set to identify @@ -463,6 +530,7 @@ void AnalysisConsumer::HandleDeclsCallGraph(const unsigned LocalTUDeclsSize) { if (!D) continue; + // Skip the functions which have been processed already or previously // inlined. if (shouldSkipFunction(D, Visited, VisitedAsTopLevel)) @@ -480,6 +548,17 @@ void AnalysisConsumer::HandleDeclsCallGraph(const unsigned LocalTUDeclsSize) { continue; } + if (Opts.AnalyzerFocusedTaint) { + // if the function is not taint related skip it. + auto *FD = dyn_cast<FunctionDecl>(D); + if (TaintedFunctions.find(FD) == TaintedFunctions.end()) { + llvm::errs() + << "Skipping not taint related function from the analysis:\n"; + llvm::errs() << FD->getNameInfo().getAsString() << "\n"; + continue; + } + } + // Analyze the function. SetOfConstDecls VisitedCallees; diff --git a/clang/lib/StaticAnalyzer/taint_focused/taintcalls.c b/clang/lib/StaticAnalyzer/taint_focused/taintcalls.c new file mode 100644 index 00000000000000..d0e1e980d62be5 --- /dev/null +++ b/clang/lib/StaticAnalyzer/taint_focused/taintcalls.c @@ -0,0 +1,25 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +char buf[1024]; + +int fetchTaintedString(char *txt){ + scanf("%s", txt); +} + +int exec(char* cmd){ + system(cmd);//warn here +} + +void topLevel(){ + char cmd[2048] = "/bin/cat "; + char filename[1024]; + fetchTaintedString (filename); + strcat(cmd, filename); + exec(cmd); +} + +void printNum(int data){ + printf("Data:%d\n",data); +} \ No newline at end of file `````````` </details> https://github.com/llvm/llvm-project/pull/112212 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits