https://github.com/dkrupp updated https://github.com/llvm/llvm-project/pull/178054
>From 9c96407be1716022d4599c50eb61f1eb8d1385f8 Mon Sep 17 00:00:00 2001 From: Daniel Krupp <[email protected]> Date: Mon, 26 Jan 2026 22:17:41 +0100 Subject: [PATCH] [clang][analyzer] Add taintedness to argv If the execution environment is untrusted, we assume that the argv of the main function is an attacker controlled value and set it as an taint analysis source. --- clang/docs/analyzer/checkers.rst | 7 +++ .../Checkers/GenericTaintChecker.cpp | 40 +++++++++++++- .../Analysis/taint-diagnostic-visitor-main.c | 54 +++++++++++++++++++ clang/test/Analysis/taint-generic.c | 13 +++++ 4 files changed, 112 insertions(+), 2 deletions(-) create mode 100644 clang/test/Analysis/taint-diagnostic-visitor-main.c diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index 31edf9e99dc7d..178156c9b2f07 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -1374,6 +1374,13 @@ For a more detailed description of configuration options, please see the * `Config` Specifies the name of the YAML configuration file. The user can define their own taint sources and sinks. +* The if the analyzer option `assume-controlled-environment` is set to `false`, + it is assumed that the command line arguments and the environment + variables of the program are attacker controlled. + In particular, the `argv` argument of the `main` function and + the return value of the `getenv()` function are assumed to + hold tainted values. + **Related Guidelines** * `CWE Data Neutralization Issues diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp index bea8f3f13ba21..6d7ff5c76ee35 100644 --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -63,6 +63,8 @@ constexpr llvm::StringLiteral MsgSanitizeSystemArgs = constexpr llvm::StringLiteral MsgCustomSink = "Untrusted data is passed to a user-defined sink"; +const std::string MsgTaintOrigin = "Taint originated here"; + using ArgIdxTy = int; using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>; @@ -159,7 +161,7 @@ const NoteTag *taintOriginTrackerTag(CheckerContext &C, return ""; } if (TaintedSymbols.empty()) - return "Taint originated here"; + return MsgTaintOrigin; for (auto Sym : TaintedSymbols) { BR.markInteresting(Sym); @@ -378,10 +380,12 @@ struct GenericTaintRuleParser { CheckerManager &Mgr; }; -class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> { +class GenericTaintChecker + : public Checker<check::PreCall, check::PostCall, check::BeginFunction> { public: void checkPreCall(const CallEvent &Call, CheckerContext &C) const; void checkPostCall(const CallEvent &Call, CheckerContext &C) const; + void checkBeginFunction(CheckerContext &C) const; void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, const char *Sep) const override; @@ -827,8 +831,40 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const { std::make_move_iterator(Rules.end())); } +// The incoming parameters of the main function get tainted +// if the program called in an untrusted environment. +void GenericTaintChecker::checkBeginFunction(CheckerContext &C) const { + if (!C.inTopFrame() || C.getAnalysisManager() + .getAnalyzerOptions() + .ShouldAssumeControlledEnvironment) + return; + + const auto *FD = dyn_cast<FunctionDecl>(C.getLocationContext()->getDecl()); + if (FD && (FD->param_size() < 2 || !FD->isMain())) + return; + + ProgramStateRef State = C.getState(); + const MemRegion *ArgvReg = + State->getRegion(FD->parameters()[1], C.getLocationContext()); + SVal ArgvSval = State->getSVal(ArgvReg); + // Add taintedness to argv** + State = addTaint(State, ArgvSval); + + const NoteTag *OriginatingTag = + C.getNoteTag([ArgvSval](PathSensitiveBugReport &BR) -> std::string { + // We give diagnostics only for taint related reports + if (!BR.isInteresting(ArgvSval) || + BR.getBugType().getCategory() != categories::TaintedData) + return ""; + + return MsgTaintOrigin; + }); + C.addTransition(State, OriginatingTag); +} + void GenericTaintChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const { + initTaintRules(C); // FIXME: this should be much simpler. diff --git a/clang/test/Analysis/taint-diagnostic-visitor-main.c b/clang/test/Analysis/taint-diagnostic-visitor-main.c new file mode 100644 index 0000000000000..315e352cb549f --- /dev/null +++ b/clang/test/Analysis/taint-diagnostic-visitor-main.c @@ -0,0 +1,54 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -DUNTRUSTED -analyzer-config assume-controlled-environment=false -analyzer-output=text -verify=expected,untrusted %s +// RUN: %clang_analyze_cc1 -analyzer-checker=optin.taint,core,security.ArrayBound -analyzer-config assume-controlled-environment=true -analyzer-output=text -verify=expected,trusted %s + +// This file is for testing enhanced diagnostics produced by the GenericTaintChecker + +typedef __typeof(sizeof(int)) size_t; +struct _IO_FILE; +typedef struct _IO_FILE FILE; + +int scanf(const char *restrict format, ...); +int system(const char *command); +char* getenv( const char* env_var ); +size_t strlen( const char* str ); +char *strcat( char *dest, const char *src ); +char * strncat ( char * destination, const char * source, size_t num ); +char* strcpy( char* dest, const char* src ); +char * strncpy ( char * destination, const char * source, size_t num ); +void *malloc(size_t size ); +void free( void *ptr ); +char *fgets(char *str, int n, FILE *stream); +extern FILE *stdin; + + +#ifdef UNTRUSTED +// In an untrusted environment the cmd line arguments +// are assumed to be tainted. +int main(int argc, char * argv[]) {// untrusted-note {{Taint originated here}} + if (argc < 1)// untrusted-note {{'argc' is >= 1}} + // untrusted-note@-1 {{Taking false branch}} + return 1; + char cmd[2048] = "/bin/cat "; + char filename[1024]; + strncpy(filename, argv[1], sizeof(filename)-1); // untrusted-note {{Taint propagated to the 1st argument}} + strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1);// untrusted-note {{Taint propagated to the 1st argument}} + system(cmd);// untrusted-warning {{Untrusted data is passed to a system call}} + // untrusted-note@-1 {{Untrusted data is passed to a system call}} + return 0; + } +#else +int main(int argc, char * argv[]) { + if (argc < 1)// trusted-note {{'argc' is >= 1}} + // trusted-note@-1 {{Taking false branch}} + return 1; + char cmd[2048] = "/bin/cat "; + char filename[1024]; + scanf("%s", filename);// trusted-note {{Taint originated here}} + // trusted-note@-1 {{Taint propagated to the 2nd argument}} + strncat(filename, argv[1], sizeof(filename)- - strlen(argv[1]) - 1);// trusted-note {{Taint propagated to the 1st argument}} + strncat(cmd, filename, sizeof(cmd) - strlen(cmd)-1);// trusted-note {{Taint propagated to the 1st argument}} + system(cmd);// trusted-warning {{Untrusted data is passed to a system call}} + // trusted-note@-1 {{Untrusted data is passed to a system call}} + return 0; + } + #endif diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c index 6017483f06b6d..93a5b9b3a53cd 100644 --- a/clang/test/Analysis/taint-generic.c +++ b/clang/test/Analysis/taint-generic.c @@ -116,6 +116,7 @@ char *stpcpy(char *restrict s1, const char *restrict s2); char *strncpy( char * destination, const char * source, size_t num ); char *strndup(const char *s, size_t n); char *strncat(char *restrict s1, const char *restrict s2, size_t n); +char *strcat( char *dest, const char *src ); void *malloc(size_t); void *calloc(size_t nmemb, size_t size); @@ -1396,3 +1397,15 @@ void testAcceptPropagates() { int acceptSocket = accept(listenSocket, 0, 0); clang_analyzer_isTainted_int(acceptSocket); // expected-warning {{YES}} } + +int main(int argc, char * argv[]) { + if (argc < 1) + return 1; + char cmd[2048] = "/bin/cat "; + char filename[1024]; + clang_analyzer_isTainted_char(*argv[1]); // expected-warning{{YES}} + strncat(cmd, argv[1], sizeof(cmd) - strlen(cmd)-1); + system(cmd);// expected-warning {{Untrusted data is passed to a system call}} + return 0; + } + \ No newline at end of file _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
