dkrupp updated this revision to Diff 511078.
dkrupp marked 21 inline comments as done.
dkrupp added a comment.

@steakhal thanks for your review. I tried to address all your concerns.
I added an extra test case too (multipleTaintSources(..)) which highlights the 
limitation of the current patch: If multiple tainted "variables" reach a sink, 
we only generate diagnostics for one of them. The main reason is that the 
isTainted() function returns a single tainted Symbolref instead of a vector of 
Symbolrefs if there are multiple instances. 
I highlighted this in the test and the implementation.

I think this could be still an acceptable limitation for now, because as the 
user sanitizes one of the tainted variables, he will get a new diagnostics for 
the remaining one(s).

So I suggest to address this limitation in  follow-up patche(s).
The implementation as is already greatly improves the understandability of the 
reports.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D144269/new/

https://reviews.llvm.org/D144269

Files:
  clang/include/clang/StaticAnalyzer/Checkers/Taint.h
  clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h
  clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
  clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
  clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  clang/lib/StaticAnalyzer/Checkers/Taint.cpp
  clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
  clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp
  clang/test/Analysis/taint-diagnostic-visitor.c
  clang/test/Analysis/taint-tester.c

Index: clang/test/Analysis/taint-tester.c
===================================================================
--- clang/test/Analysis/taint-tester.c
+++ clang/test/Analysis/taint-tester.c
@@ -122,7 +122,7 @@
   fscanf(pp, "%d", &ii);
   int jj = ii;// expected-warning + {{tainted}}
 
-  fscanf(p, "%d", &ii);
+  fscanf(p, "%d", &ii);// expected-warning + {{tainted}}
   int jj2 = ii;// expected-warning + {{tainted}}
 
   ii = 3;
Index: clang/test/Analysis/taint-diagnostic-visitor.c
===================================================================
--- clang/test/Analysis/taint-diagnostic-visitor.c
+++ clang/test/Analysis/taint-diagnostic-visitor.c
@@ -2,13 +2,24 @@
 
 // This file is for testing enhanced diagnostics produced by the GenericTaintChecker
 
+typedef unsigned long size_t;
+struct _IO_FILE;
+typedef struct _IO_FILE FILE;
+
 int scanf(const char *restrict format, ...);
 int system(const char *command);
+char* getenv( const char* env_var );
+size_t strlen( const char* str );
+void *malloc(size_t size );
+void free( void *ptr );
+char *fgets(char *str, int n, FILE *stream);
+FILE *stdin;
 
 void taintDiagnostic(void)
 {
   char buf[128];
   scanf("%s", buf); // expected-note {{Taint originated here}}
+                    // expected-note@-1 {{Taint propagated to argument 2}}
   system(buf); // expected-warning {{Untrusted data is passed to a system call}} // expected-note {{Untrusted data is passed to a system call (CERT/STR02-C. Sanitize data passed to complex subsystems)}}
 }
 
@@ -16,6 +27,7 @@
   int index;
   int Array[] = {1, 2, 3, 4, 5};
   scanf("%d", &index); // expected-note {{Taint originated here}}
+                       // expected-note@-1 {{Taint propagated to argument 2}}
   return Array[index]; // expected-warning {{Out of bound memory access (index is tainted)}}
                        // expected-note@-1 {{Out of bound memory access (index is tainted)}}
 }
@@ -23,6 +35,7 @@
 int taintDiagnosticDivZero(int operand) {
   scanf("%d", &operand); // expected-note {{Value assigned to 'operand'}}
                          // expected-note@-1 {{Taint originated here}}
+                         // expected-note@-2 {{Taint propagated to argument 2}}
   return 10 / operand; // expected-warning {{Division by a tainted value, possibly zero}}
                        // expected-note@-1 {{Division by a tainted value, possibly zero}}
 }
@@ -31,6 +44,71 @@
   int x;
   scanf("%d", &x); // expected-note {{Value assigned to 'x'}}
                    // expected-note@-1 {{Taint originated here}}
+                   // expected-note@-2 {{Taint propagated to argument 2}}
   int vla[x]; // expected-warning {{Declared variable-length array (VLA) has tainted size}}
               // expected-note@-1 {{Declared variable-length array (VLA) has tainted size}}
 }
+
+
+//Tests if the originated note is correctly placed even if the path is
+//propagating through variables and expressions
+char* taintDiagnosticPropagation(){
+  char *pathbuf;
+  char *pathlist=getenv("PATH"); // expected-note {{Taint originated here}}
+                                 // expected-note@-1 {{Taint propagated to the return value}}
+  if (pathlist){ // expected-note {{Assuming 'pathlist' is non-null}}
+	               // expected-note@-1 {{Taking true branch}}
+    pathbuf=(char*) malloc(strlen(pathlist)+1); // expected-warning{{Untrusted data is used to specify the buffer size}}
+                                                // expected-note@-1{{Untrusted data is used to specify the buffer size}}
+                                                // expected-note@-2 {{Taint propagated to the return value}}
+    return pathbuf;
+  }
+  return 0;
+}
+
+//Taint origin should be marked correctly even if there are multiple taint
+//sources in the function
+char* taintDiagnosticPropagation2(){
+  char *pathbuf;
+  char *user_env2=getenv("USER_ENV_VAR2");//unrelated taint source
+  char *pathlist=getenv("PATH"); // expected-note {{Taint originated here}}
+                                 // expected-note@-1 {{Taint propagated to the return value}}
+  char *user_env=getenv("USER_ENV_VAR");//unrelated taint source
+  if (pathlist){ // expected-note {{Assuming 'pathlist' is non-null}}
+	               // expected-note@-1 {{Taking true branch}}
+    pathbuf=(char*) malloc(strlen(pathlist)+1); // expected-warning{{Untrusted data is used to specify the buffer size}}
+                                                // expected-note@-1{{Untrusted data is used to specify the buffer size}}
+                                                // expected-note@-2 {{Taint propagated to the return value}}
+    return pathbuf;
+  }
+  return 0;
+}
+
+void testReadStdIn(){
+  char buf[1024];
+  fgets(buf, sizeof(buf), stdin);// expected-note {{Taint originated here}}
+                                 // expected-note@-1 {{Taint propagated to argument 1}}
+  system(buf);// expected-warning {{Untrusted data is passed to a system call}} // expected-note {{Untrusted data is passed to a system call (CERT/STR02-C. Sanitize data passed to complex subsystems)}}
+
+}
+
+void multipleTaintSources(void) {
+  int x,y,z;
+  scanf("%d", &x); // expected-note {{Taint originated here}}
+                   // expected-note@-1 {{Taint propagated to argument 2}}
+  scanf("%d", &y); // FIXME: Would be nice to indicate taint originated and propagation here too
+  scanf("%d", &z);
+  int* ptr = (int*) malloc(y + x); // expected-warning {{Untrusted data is used to specify the buffer size}}
+                                   // expected-note@-1{{Untrusted data is used to specify the buffer size}}
+  free (ptr);
+}
+
+void multipleTaintedArgs(void) {
+  int x,y;
+  scanf("%d %d", &x, &y); // expected-note {{Taint originated here}}
+                          // expected-note@-1 {{Taint propagated to argument 3}}
+                          // FIXME: Would be nice to indicate taint propagation argument 2
+  int* ptr = (int*) malloc(x + y); // expected-warning {{Untrusted data is used to specify the buffer size}}
+                                   // expected-note@-1{{Untrusted data is used to specify the buffer size}}
+  free (ptr);
+}
Index: clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp
+++ clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp
@@ -23,6 +23,7 @@
 const char *const CXXMoveSemantics = "C++ move semantics";
 const char *const SecurityError = "Security error";
 const char *const UnusedCode = "Unused code";
+const char *const TaintedData = "Tainted data used";
 } // namespace categories
 } // namespace ento
 } // namespace clang
Index: clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
@@ -55,8 +55,7 @@
                                     const Expr *SizeE) const;
 
   void reportBug(VLASize_Kind Kind, const Expr *SizeE, ProgramStateRef State,
-                 CheckerContext &C,
-                 std::unique_ptr<BugReporterVisitor> Visitor = nullptr) const;
+                 CheckerContext &C, SymbolRef TaintedSymbol = nullptr) const;
 
 public:
   void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
@@ -166,9 +165,8 @@
     return nullptr;
 
   // Check if the size is tainted.
-  if (isTainted(State, SizeV)) {
-    reportBug(VLA_Tainted, SizeE, nullptr, C,
-              std::make_unique<TaintBugVisitor>(SizeV));
+  if (SymbolRef TSR = isTainted(State, SizeV)) {
+    reportBug(VLA_Tainted, SizeE, nullptr, C, TSR);
     return nullptr;
   }
 
@@ -209,17 +207,24 @@
   return State;
 }
 
-void VLASizeChecker::reportBug(
-    VLASize_Kind Kind, const Expr *SizeE, ProgramStateRef State,
-    CheckerContext &C, std::unique_ptr<BugReporterVisitor> Visitor) const {
+void VLASizeChecker::reportBug(VLASize_Kind Kind, const Expr *SizeE,
+                               ProgramStateRef State, CheckerContext &C,
+                               SymbolRef TaintedSymbol) const {
   // Generate an error node.
   ExplodedNode *N = C.generateErrorNode(State);
   if (!N)
     return;
 
-  if (!BT)
-    BT.reset(new BuiltinBug(
-        this, "Dangerous variable-length array (VLA) declaration"));
+  if (!BT) {
+    if (Kind == VLA_Tainted)
+      BT.reset(new BugType(this,
+                           "Dangerous variable-length array (VLA) declaration",
+                           categories::TaintedData));
+    else
+      BT.reset(new BugType(this,
+                           "Dangerous variable-length array (VLA) declaration",
+                           categories::LogicError));
+  }
 
   SmallString<256> buf;
   llvm::raw_svector_ostream os(buf);
@@ -243,9 +248,11 @@
   }
 
   auto report = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N);
-  report->addVisitor(std::move(Visitor));
   report->addRange(SizeE->getSourceRange());
   bugreporter::trackExpressionValue(N, SizeE, *report);
+  if (TaintedSymbol)
+    report->markInteresting(TaintedSymbol);
+
   C.emitReport(std::move(report));
 }
 
Index: clang/lib/StaticAnalyzer/Checkers/Taint.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/Taint.cpp
+++ clang/lib/StaticAnalyzer/Checkers/Taint.cpp
@@ -144,30 +144,33 @@
   return NewState;
 }
 
-bool taint::isTainted(ProgramStateRef State, const Stmt *S,
-                      const LocationContext *LCtx, TaintTagType Kind) {
+SymbolRef taint::isTainted(ProgramStateRef State, const Stmt *S,
+                           const LocationContext *LCtx, TaintTagType Kind) {
   SVal val = State->getSVal(S, LCtx);
   return isTainted(State, val, Kind);
 }
 
-bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) {
+SymbolRef taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) {
   if (SymbolRef Sym = V.getAsSymbol())
     return isTainted(State, Sym, Kind);
   if (const MemRegion *Reg = V.getAsRegion())
     return isTainted(State, Reg, Kind);
-  return false;
+  return nullptr;
 }
 
-bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg,
-                      TaintTagType K) {
+SymbolRef taint::isTainted(ProgramStateRef State, const MemRegion *Reg,
+                           TaintTagType K) {
   if (!Reg)
-    return false;
+    return nullptr;
 
   // Element region (array element) is tainted if either the base or the offset
   // are tainted.
-  if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg))
-    return isTainted(State, ER->getSuperRegion(), K) ||
-           isTainted(State, ER->getIndex(), K);
+  if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg)) {
+    if (SymbolRef TaintedSym = isTainted(State, ER->getIndex(), K))
+      return TaintedSym;
+    if (SymbolRef TaintedSym = isTainted(State, ER->getSuperRegion(), K))
+      return TaintedSym;
+  }
 
   if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg))
     return isTainted(State, SR->getSymbol(), K);
@@ -175,12 +178,13 @@
   if (const SubRegion *ER = dyn_cast<SubRegion>(Reg))
     return isTainted(State, ER->getSuperRegion(), K);
 
-  return false;
+  return nullptr;
 }
 
-bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) {
+SymbolRef taint::isTainted(ProgramStateRef State, SymbolRef Sym,
+                           TaintTagType Kind) {
   if (!Sym)
-    return false;
+    return nullptr;
 
   // Traverse all the symbols this symbol depends on to see if any are tainted.
   for (SymExpr::symbol_iterator SI = Sym->symbol_begin(),
@@ -190,14 +194,15 @@
       continue;
 
     if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) {
-      if (*Tag == Kind)
-        return true;
+      if (*Tag == Kind) {
+        return *SI;
+      }
     }
 
     if (const auto *SD = dyn_cast<SymbolDerived>(*SI)) {
       // If this is a SymbolDerived with a tainted parent, it's also tainted.
-      if (isTainted(State, SD->getParentSymbol(), Kind))
-        return true;
+      if (SymbolRef TSR = isTainted(State, SD->getParentSymbol(), Kind))
+        return TSR;
 
       // If this is a SymbolDerived with the same parent symbol as another
       // tainted SymbolDerived and a region that's a sub-region of that tainted
@@ -210,46 +215,25 @@
           // complete. For example, this would not currently identify
           // overlapping fields in a union as tainted. To identify this we can
           // check for overlapping/nested byte offsets.
-          if (Kind == I.second && R->isSubRegionOf(I.first))
-            return true;
+          if (Kind == I.second && R->isSubRegionOf(I.first)) {
+            return SD->getParentSymbol();
+          }
         }
       }
     }
 
     // If memory region is tainted, data is also tainted.
     if (const auto *SRV = dyn_cast<SymbolRegionValue>(*SI)) {
-      if (isTainted(State, SRV->getRegion(), Kind))
-        return true;
+      if (SymbolRef TaintedSym = isTainted(State, SRV->getRegion(), Kind))
+        return TaintedSym;
     }
 
     // If this is a SymbolCast from a tainted value, it's also tainted.
     if (const auto *SC = dyn_cast<SymbolCast>(*SI)) {
-      if (isTainted(State, SC->getOperand(), Kind))
-        return true;
+      if (SymbolRef TaintedSym = isTainted(State, SC->getOperand(), Kind))
+        return TaintedSym;
     }
   }
 
-  return false;
-}
-
-PathDiagnosticPieceRef TaintBugVisitor::VisitNode(const ExplodedNode *N,
-                                                  BugReporterContext &BRC,
-                                                  PathSensitiveBugReport &BR) {
-
-  // Find the ExplodedNode where the taint was first introduced
-  if (!isTainted(N->getState(), V) ||
-      isTainted(N->getFirstPred()->getState(), V))
-    return nullptr;
-
-  const Stmt *S = N->getStmtForDiagnostics();
-  if (!S)
-    return nullptr;
-
-  const LocationContext *NCtx = N->getLocationContext();
-  PathDiagnosticLocation L =
-      PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx);
-  if (!L.isValid() || !L.asLocation().isValid())
-    return nullptr;
-
-  return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here");
+  return nullptr;
 }
Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -32,6 +32,7 @@
 #include <memory>
 #include <optional>
 #include <utility>
+#include <vector>
 
 #define DEBUG_TYPE "taint-checker"
 
@@ -114,47 +115,111 @@
   return false;
 }
 
-SVal getPointeeOf(const CheckerContext &C, Loc LValue) {
-  const QualType ArgTy = LValue.getType(C.getASTContext());
+SVal getPointeeOf(ASTContext &ASTCtx, const ProgramStateRef State, Loc LValue) {
+  const QualType ArgTy = LValue.getType(ASTCtx);
   if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType())
-    return C.getState()->getSVal(LValue);
+    return State->getSVal(LValue);
 
   // Do not dereference void pointers. Treat them as byte pointers instead.
   // FIXME: we might want to consider more than just the first byte.
-  return C.getState()->getSVal(LValue, C.getASTContext().CharTy);
+  return State->getSVal(LValue, ASTCtx.CharTy);
 }
 
 /// Given a pointer/reference argument, return the value it refers to.
-std::optional<SVal> getPointeeOf(const CheckerContext &C, SVal Arg) {
+std::optional<SVal> getPointeeOf(ASTContext &ASTCtx,
+                                 const ProgramStateRef State, SVal Arg) {
   if (auto LValue = Arg.getAs<Loc>())
-    return getPointeeOf(C, *LValue);
+    return getPointeeOf(ASTCtx, State, *LValue);
   return std::nullopt;
 }
 
 /// Given a pointer, return the SVal of its pointee or if it is tainted,
 /// otherwise return the pointer's SVal if tainted.
 /// Also considers stdin as a taint source.
-std::optional<SVal> getTaintedPointeeOrPointer(const CheckerContext &C,
+std::optional<SVal> getTaintedPointeeOrPointer(ASTContext &ASTCtx,
+                                               const ProgramStateRef State,
                                                SVal Arg) {
-  const ProgramStateRef State = C.getState();
-
-  if (auto Pointee = getPointeeOf(C, Arg))
+  if (auto Pointee = getPointeeOf(ASTCtx, State, Arg))
     if (isTainted(State, *Pointee)) // FIXME: isTainted(...) ? Pointee : None;
       return Pointee;
 
   if (isTainted(State, Arg))
     return Arg;
+  return std::nullopt;
+}
 
-  // FIXME: This should be done by the isTainted() API.
-  if (isStdin(Arg, C.getASTContext()))
-    return Arg;
+bool isTaintedOrPointsToTainted(ASTContext &ASTCtx,
+                                const ProgramStateRef &State, SVal ExprSVal) {
+  return getTaintedPointeeOrPointer(ASTCtx, State, ExprSVal).has_value();
+}
 
-  return std::nullopt;
+/// Helps in printing taint diagnostics.
+/// Marks the incoming parameters of a function interesting (to be printed)
+/// when the return value, or the outgoing parameters are tainted.
+const NoteTag *taintOriginTrackerTag(CheckerContext &C,
+                                     std::vector<SymbolRef> TaintedSymbols,
+                                     std::vector<ArgIdxTy> TaintedArgs,
+                                     const LocationContext *CallLocation) {
+  assert(TaintedSymbols.size() == TaintedArgs.size());
+  return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
+                       TaintedArgs = std::move(TaintedArgs), CallLocation](
+                          PathSensitiveBugReport &BR) -> std::string {
+    SmallString<256> Msg;
+    // We give diagnostics only for taint related reports
+    if (!BR.isInteresting(CallLocation) ||
+        BR.getBugType().getCategory() != categories::TaintedData) {
+      return "";
+    }
+    if (TaintedSymbols.empty()) {
+      return "Taint originated here";
+    }
+    for (auto Sym : llvm::enumerate(TaintedSymbols)) {
+      LLVM_DEBUG(llvm::dbgs() << "Taint Propagated from argument "
+                              << TaintedArgs.at(Sym.index()) + 1 << "\n");
+      BR.markInteresting(Sym.value());
+    }
+    return "";
+  });
 }
 
-bool isTaintedOrPointsToTainted(const Expr *E, const ProgramStateRef &State,
-                                CheckerContext &C) {
-  return getTaintedPointeeOrPointer(C, C.getSVal(E)).has_value();
+/// Helps in printing taint diagnostics.
+/// Marks the function interesting (to be printed)
+/// when the return value, or the outgoing parameters are tainted.
+const NoteTag *taintPropagationExplainerTag(
+    CheckerContext &C, std::vector<SymbolRef> TaintedSymbols,
+    std::vector<ArgIdxTy> TaintedArgs, const LocationContext *CallLocation) {
+  assert(TaintedSymbols.size() == TaintedArgs.size());
+  return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
+                       TaintedArgs = std::move(TaintedArgs), CallLocation](
+                          PathSensitiveBugReport &BR) -> std::string {
+    SmallString<256> Msg;
+    llvm::raw_svector_ostream Out(Msg);
+    // We give diagnostics only for taint related reports
+    if (TaintedSymbols.empty() ||
+        BR.getBugType().getCategory() != categories::TaintedData) {
+      return "";
+    }
+    int nofTaintedArgs = 0;
+    for (auto Sym : llvm::enumerate(TaintedSymbols)) {
+      if (BR.isInteresting(Sym.value())) {
+        BR.markInteresting(CallLocation);
+        if (TaintedArgs.at(Sym.index()) != ReturnValueIndex) {
+          LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to argument "
+                                  << TaintedArgs.at(Sym.index()) + 1 << "\n");
+          if (nofTaintedArgs == 0)
+            Out << "Taint propagated to argument "
+                << TaintedArgs.at(Sym.index()) + 1;
+          else
+            Out << ", " << TaintedArgs.at(Sym.index()) + 1;
+          nofTaintedArgs++;
+        } else {
+          LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to return value.\n");
+          Out << "Taint propagated to the return value";
+        }
+      }
+    }
+    return std::string(Out.str());
+  });
 }
 
 /// ArgSet is used to describe arguments relevant for taint detection or
@@ -193,7 +258,7 @@
   ArgSet SinkArgs;
   /// Arguments which should be sanitized on function return.
   ArgSet FilterArgs;
-  /// Arguments which can participate in taint propagationa. If any of the
+  /// Arguments which can participate in taint propagation. If any of the
   /// arguments in PropSrcArgs is tainted, all arguments in  PropDstArgs should
   /// be tainted.
   ArgSet PropSrcArgs;
@@ -343,7 +408,7 @@
                                CheckerContext &C) const;
 
 private:
-  const BugType BT{this, "Use of Untrusted Data", "Untrusted Data"};
+  const BugType BT{this, "Use of Untrusted Data", categories::TaintedData};
 
   bool checkUncontrolledFormatString(const CallEvent &Call,
                                      CheckerContext &C) const;
@@ -351,7 +416,7 @@
   void taintUnsafeSocketProtocol(const CallEvent &Call,
                                  CheckerContext &C) const;
 
-  /// Default taint rules are initilized with the help of a CheckerContext to
+  /// Default taint rules are initalized with the help of a CheckerContext to
   /// access the names of built-in functions like memcpy.
   void initTaintRules(CheckerContext &C) const;
 
@@ -788,22 +853,42 @@
     llvm::dbgs() << "> actually wants to taint arg index: " << I << '\n';
   });
 
+  const NoteTag *InjectionTag = nullptr;
+  std::vector<SymbolRef> TaintedSymbols;
+  std::vector<ArgIdxTy> TaintedIndexes;
   for (ArgIdxTy ArgNum : *TaintArgs) {
     // Special handling for the tainted return value.
     if (ArgNum == ReturnValueIndex) {
       State = addTaint(State, Call.getReturnValue());
+      SymbolRef TaintedSym = isTainted(State, Call.getReturnValue());
+      if (TaintedSym) {
+        TaintedSymbols.push_back(TaintedSym);
+        TaintedIndexes.push_back(ArgNum);
+      }
       continue;
     }
-
     // The arguments are pointer arguments. The data they are pointing at is
     // tainted after the call.
-    if (auto V = getPointeeOf(C, Call.getArgSVal(ArgNum)))
+    if (auto V =
+            getPointeeOf(C.getASTContext(), State, Call.getArgSVal(ArgNum))) {
       State = addTaint(State, *V);
+      // FIXME: The call argument may be a complex expression
+      // referring to multiple tainted variables.
+      // Now we generate notes and track back only one of them.
+      SymbolRef TaintedSym = isTainted(State, *V);
+      if (TaintedSym) {
+        TaintedSymbols.push_back(TaintedSym);
+        TaintedIndexes.push_back(ArgNum);
+      }
+    }
   }
-
+  // Create a NoteTag callback, which prints to the user where the taintedness
+  // was propagated to.
+  InjectionTag = taintPropagationExplainerTag(C, TaintedSymbols, TaintedIndexes,
+                                              Call.getCalleeStackFrame(0));
   // Clear up the taint info from the state.
   State = State->remove<TaintArgsOnPostVisit>(CurrentFrame);
-  C.addTransition(State);
+  C.addTransition(State, InjectionTag);
 }
 
 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
@@ -826,7 +911,12 @@
 
   /// Check for taint sinks.
   ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) {
-    if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C))
+    // Add taintedness to stdin parameters
+    if (isStdin(C.getSVal(E), C.getASTContext())) {
+      State = addTaint(State, C.getSVal(E));
+    }
+    if (SinkArgs.contains(I) &&
+        isTaintedOrPointsToTainted(C.getASTContext(), State, C.getSVal(E)))
       Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink), C);
   });
 
@@ -834,7 +924,7 @@
   ForEachCallArg([this, &C, &State](ArgIdxTy I, const Expr *E, SVal S) {
     if (FilterArgs.contains(I)) {
       State = removeTaint(State, S);
-      if (auto P = getPointeeOf(C, S))
+      if (auto P = getPointeeOf(C.getASTContext(), State, S))
         State = removeTaint(State, *P);
     }
   });
@@ -843,11 +933,25 @@
   /// A rule is relevant if PropSrcArgs is empty, or if any of its signified
   /// args are tainted in context of the current CallEvent.
   bool IsMatching = PropSrcArgs.isEmpty();
-  ForEachCallArg(
-      [this, &C, &IsMatching, &State](ArgIdxTy I, const Expr *E, SVal) {
-        IsMatching = IsMatching || (PropSrcArgs.contains(I) &&
-                                    isTaintedOrPointsToTainted(E, State, C));
-      });
+  std::vector<SymbolRef> TaintedSymbols;
+  std::vector<ArgIdxTy> TaintedIndexes;
+  ForEachCallArg([this, &C, &IsMatching, &State, &TaintedSymbols,
+                  &TaintedIndexes](ArgIdxTy I, const Expr *E, SVal) {
+    IsMatching =
+        IsMatching ||
+        (PropSrcArgs.contains(I) &&
+         isTaintedOrPointsToTainted(C.getASTContext(), State, C.getSVal(E)));
+    std::optional<SVal> TaintedSVal =
+        getTaintedPointeeOrPointer(C.getASTContext(), State, C.getSVal(E));
+
+    // We track back tainted arguments except for stdin
+    if (TaintedSVal && !isStdin(*TaintedSVal, C.getASTContext())) {
+      if (SymbolRef SR = isTainted(State, *TaintedSVal)) {
+        TaintedSymbols.push_back(SR);
+        TaintedIndexes.push_back(I);
+      }
+    }
+  });
 
   if (!IsMatching)
     return;
@@ -890,7 +994,9 @@
 
   if (!Result.isEmpty())
     State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result);
-  C.addTransition(State);
+  const NoteTag *InjectionTag = taintOriginTrackerTag(
+      C, TaintedSymbols, TaintedIndexes, Call.getCalleeStackFrame(0));
+  C.addTransition(State, InjectionTag);
 }
 
 bool GenericTaintRule::UntrustedEnv(CheckerContext &C) {
@@ -902,7 +1008,8 @@
 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
                                                   CheckerContext &C) const {
   assert(E);
-  std::optional<SVal> TaintedSVal{getTaintedPointeeOrPointer(C, C.getSVal(E))};
+  std::optional<SVal> TaintedSVal{getTaintedPointeeOrPointer(
+      C.getASTContext(), C.getState(), C.getSVal(E))};
 
   if (!TaintedSVal)
     return false;
@@ -911,7 +1018,9 @@
   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
     auto report = std::make_unique<PathSensitiveBugReport>(BT, Msg, N);
     report->addRange(E->getSourceRange());
-    report->addVisitor(std::make_unique<TaintBugVisitor>(*TaintedSVal));
+    if (SymbolRef TaintedSym = isTainted(C.getState(), *TaintedSVal))
+      report->markInteresting(TaintedSym);
+
     C.emitReport(std::move(report));
     return true;
   }
Index: clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
@@ -25,9 +25,9 @@
 
 namespace {
 class DivZeroChecker : public Checker< check::PreStmt<BinaryOperator> > {
-  mutable std::unique_ptr<BuiltinBug> BT;
-  void reportBug(const char *Msg, ProgramStateRef StateZero, CheckerContext &C,
-                 std::unique_ptr<BugReporterVisitor> Visitor = nullptr) const;
+  mutable std::unique_ptr<BugType> BT;
+  void reportBug(StringRef Msg, StringRef Category, ProgramStateRef StateZero,
+                 CheckerContext &C, SymbolRef TaintedSymbol = nullptr) const;
 
 public:
   void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const;
@@ -41,16 +41,17 @@
   return nullptr;
 }
 
-void DivZeroChecker::reportBug(
-    const char *Msg, ProgramStateRef StateZero, CheckerContext &C,
-    std::unique_ptr<BugReporterVisitor> Visitor) const {
+void DivZeroChecker::reportBug(StringRef Msg, StringRef Category,
+                               ProgramStateRef StateZero, CheckerContext &C,
+                               SymbolRef TaintedSymbol) const {
   if (ExplodedNode *N = C.generateErrorNode(StateZero)) {
     if (!BT)
-      BT.reset(new BuiltinBug(this, "Division by zero"));
+      BT.reset(new BugType(this, "Division by zero", Category));
 
     auto R = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
-    R->addVisitor(std::move(Visitor));
     bugreporter::trackExpressionValue(N, getDenomExpr(N), *R);
+    if (TaintedSymbol)
+      R->markInteresting(TaintedSymbol);
     C.emitReport(std::move(R));
   }
 }
@@ -82,15 +83,16 @@
 
   if (!stateNotZero) {
     assert(stateZero);
-    reportBug("Division by zero", stateZero, C);
+    reportBug("Division by zero", categories::LogicError, stateZero, C);
     return;
   }
 
-  bool TaintedD = isTainted(C.getState(), *DV);
-  if ((stateNotZero && stateZero && TaintedD)) {
-    reportBug("Division by a tainted value, possibly zero", stateZero, C,
-              std::make_unique<taint::TaintBugVisitor>(*DV));
-    return;
+  if ((stateNotZero && stateZero)) {
+    if (SymbolRef TaintedSym = isTainted(C.getState(), *DV)) {
+      reportBug("Division by a tainted value, possibly zero",
+                categories::TaintedData, stateZero, C, TaintedSym);
+      return;
+    }
   }
 
   // If we get here, then the denom should not be zero. We abandon the implicit
Index: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -32,12 +32,12 @@
 namespace {
 class ArrayBoundCheckerV2 :
     public Checker<check::Location> {
-  mutable std::unique_ptr<BuiltinBug> BT;
+  mutable std::unique_ptr<BugType> BT;
 
   enum OOB_Kind { OOB_Precedes, OOB_Excedes, OOB_Tainted };
 
   void reportOOB(CheckerContext &C, ProgramStateRef errorState, OOB_Kind kind,
-                 std::unique_ptr<BugReporterVisitor> Visitor = nullptr) const;
+                 SymbolRef TaintedSymbol = nullptr) const;
 
 public:
   void checkLocation(SVal l, bool isLoad, const Stmt*S,
@@ -206,9 +206,8 @@
     // If we are under constrained and the index variables are tainted, report.
     if (state_exceedsUpperBound && state_withinUpperBound) {
       SVal ByteOffset = rawOffset.getByteOffset();
-      if (isTainted(state, ByteOffset)) {
-        reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted,
-                  std::make_unique<TaintBugVisitor>(ByteOffset));
+      if (SymbolRef TSR = isTainted(state, ByteOffset)) {
+        reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted, TSR);
         return;
       }
     } else if (state_exceedsUpperBound) {
@@ -227,16 +226,16 @@
   checkerContext.addTransition(state);
 }
 
-void ArrayBoundCheckerV2::reportOOB(
-    CheckerContext &checkerContext, ProgramStateRef errorState, OOB_Kind kind,
-    std::unique_ptr<BugReporterVisitor> Visitor) const {
+void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext,
+                                    ProgramStateRef errorState, OOB_Kind kind,
+                                    SymbolRef TaintedSymbol) const {
 
   ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
   if (!errorNode)
     return;
-
-  if (!BT)
-    BT.reset(new BuiltinBug(this, "Out-of-bound access"));
+  BT.reset(new BugType(this, "Out-of-bound access",
+                       kind == OOB_Tainted ? categories::TaintedData
+                                           : categories::LogicError));
 
   // FIXME: This diagnostics are preliminary.  We should get far better
   // diagnostics for explaining buffer overruns.
@@ -257,7 +256,9 @@
   }
 
   auto BR = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), errorNode);
-  BR->addVisitor(std::move(Visitor));
+  if (TaintedSymbol)
+    BR->markInteresting(TaintedSymbol);
+
   checkerContext.emitReport(std::move(BR));
 }
 
Index: clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h
===================================================================
--- clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h
+++ clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h
@@ -22,6 +22,7 @@
 extern const char *const CXXMoveSemantics;
 extern const char *const SecurityError;
 extern const char *const UnusedCode;
+extern const char *const TaintedData;
 } // namespace categories
 } // namespace ento
 } // namespace clang
Index: clang/include/clang/StaticAnalyzer/Checkers/Taint.h
===================================================================
--- clang/include/clang/StaticAnalyzer/Checkers/Taint.h
+++ clang/include/clang/StaticAnalyzer/Checkers/Taint.h
@@ -62,43 +62,27 @@
                 TaintTagType Kind = TaintTagGeneric);
 
 /// Check if the statement has a tainted value in the given state.
-bool isTainted(ProgramStateRef State, const Stmt *S,
-               const LocationContext *LCtx,
-               TaintTagType Kind = TaintTagGeneric);
+SymbolRef isTainted(ProgramStateRef State, const Stmt *S,
+                    const LocationContext *LCtx,
+                    TaintTagType Kind = TaintTagGeneric);
 
 /// Check if the value is tainted in the given state.
-bool isTainted(ProgramStateRef State, SVal V,
-               TaintTagType Kind = TaintTagGeneric);
+SymbolRef isTainted(ProgramStateRef State, SVal V,
+                    TaintTagType Kind = TaintTagGeneric);
 
 /// Check if the symbol is tainted in the given state.
-bool isTainted(ProgramStateRef State, SymbolRef Sym,
-               TaintTagType Kind = TaintTagGeneric);
+SymbolRef isTainted(ProgramStateRef State, SymbolRef Sym,
+                    TaintTagType Kind = TaintTagGeneric);
 
 /// Check if the pointer represented by the region is tainted in the given
 /// state.
-bool isTainted(ProgramStateRef State, const MemRegion *Reg,
-               TaintTagType Kind = TaintTagGeneric);
+SymbolRef isTainted(ProgramStateRef State, const MemRegion *Reg,
+                    TaintTagType Kind = TaintTagGeneric);
 
 void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl = "\n",
                 const char *sep = "");
 
 LLVM_DUMP_METHOD void dumpTaint(ProgramStateRef State);
-
-/// The bug visitor prints a diagnostic message at the location where a given
-/// variable was tainted.
-class TaintBugVisitor final : public BugReporterVisitor {
-private:
-  const SVal V;
-
-public:
-  TaintBugVisitor(const SVal V) : V(V) {}
-  void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); }
-
-  PathDiagnosticPieceRef VisitNode(const ExplodedNode *N,
-                                   BugReporterContext &BRC,
-                                   PathSensitiveBugReport &BR) override;
-};
-
 } // namespace taint
 } // namespace ento
 } // namespace clang
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to