boga95 updated this revision to Diff 246120.
boga95 marked 5 inline comments as done.
Herald added a subscriber: martong.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71524/new/

https://reviews.llvm.org/D71524

Files:
  clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  clang/lib/StaticAnalyzer/Checkers/Taint.cpp
  clang/test/Analysis/taint-generic.cpp

Index: clang/test/Analysis/taint-generic.cpp
===================================================================
--- clang/test/Analysis/taint-generic.cpp
+++ clang/test/Analysis/taint-generic.cpp
@@ -124,3 +124,126 @@
   foo.myMemberScanf("%d", &x);
   Buffer[x] = 1; // expected-warning {{Out of bound memory access }}
 }
+
+
+// Test I/O
+namespace std {
+  template<class CharT> class char_traits {};
+
+  class ios_base {};
+
+  template<class CharT, class Traits = char_traits<CharT>>
+  class basic_ios : public ios_base {};
+
+  template<class CharT, class Traits = char_traits<CharT>>
+  class basic_istream : virtual public basic_ios<CharT, Traits> {};
+  template<class CharT, class Traits = char_traits<CharT>>
+  class basic_ostream : virtual public basic_ios<CharT, Traits> {};
+
+  using istream = basic_istream<char>;
+  using ostream = basic_ostream<char>;
+  using wistream = basic_istream<wchar_t>;
+
+  istream& operator>>(istream& is, int& val);
+  wistream& operator>>(wistream& is, int& val);
+
+  extern istream cin;
+  extern istream wcin;
+
+  template<class CharT, class Traits = char_traits<CharT>>
+  class basic_fstream :
+    public basic_istream<CharT, Traits>, public basic_ostream<CharT, Traits> {
+  public:
+      basic_fstream(const char*) {}
+  };
+  template<class CharT, class Traits = char_traits<CharT>>
+  class basic_ifstream : public basic_istream<CharT, Traits> {
+  public:
+    basic_ifstream(const char*) {}
+  };
+
+  using ifstream = basic_ifstream<char>;
+  using fstream = basic_ifstream<char>;
+
+  template<class CharT> class allocator {};
+
+namespace __cxx11 {
+    template<
+      class CharT,
+      class Traits = std::char_traits<CharT>,
+      class Allocator = std::allocator<CharT>>
+    class basic_string {
+    public:
+      const char* c_str();
+      basic_string operator=(const basic_string&);
+    private:
+      unsigned size;
+      char* ptr;
+    };
+  }
+
+  using string = __cxx11::basic_string<char>;
+
+  istream& operator>>(istream& is, string& val);
+  istream& getline(istream& is, string& str);
+}
+
+void testCin() {
+  int x, y;
+  std::cin >> x >> y;
+  Buffer[y] = 1; // expected-warning {{Out of bound memory access }}
+}
+
+void testWcin() {
+  int x, y;
+  std::wcin >> x >> y;
+  Buffer[y] = 1; // expected-warning {{Out of bound memory access }}
+}
+
+void mySink(const std::string&, int, const char*);
+
+void testFstream() {
+  std::string str;
+  std::ifstream file("example.txt");
+  file >> str;
+  mySink(str, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}}
+}
+
+void testIfstream() {
+  std::string str;
+  std::fstream file("example.txt");
+  file >> str;
+  mySink(str, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}}
+}
+
+void testStdstring() {
+  std::string str1;
+  std::cin >> str1;
+
+  std::string& str2 = str1;
+  mySink(str2, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}}
+
+  const std::string& str3 = str1;
+  mySink(str3, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}}
+}
+
+void testTaintedThis() {
+  std::string str;
+  mySink(std::string(), 0, str.c_str()); // no-warning
+
+  std::cin >> str;
+  mySink(std::string(), 0, str.c_str()); // expected-warning {{Untrusted data is passed to a user-defined sink}}
+}
+
+void testOverloadedAssignmentOp() {
+  std::string str1, str2;
+  std::cin >> str1;
+  str2 = str1;
+  mySink(str2, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}}
+}
+
+void testGetline() {
+  std::string str;
+  std::getline(std::cin, str);
+  mySink(str, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}}
+}
Index: clang/lib/StaticAnalyzer/Checkers/Taint.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/Taint.cpp
+++ clang/lib/StaticAnalyzer/Checkers/Taint.cpp
@@ -152,6 +152,14 @@
     return isTainted(State, Sym, Kind);
   if (const MemRegion *Reg = V.getAsRegion())
     return isTainted(State, Reg, Kind);
+  if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
+    if (Optional<SVal> binding =
+            State->getStateManager().getStoreManager().getDefaultBinding(
+                *LCV)) {
+      if (SymbolRef Sym = binding->getAsSymbol())
+        return isTainted(State, Sym, Kind);
+    }
+  }
   return false;
 }
 
Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -135,6 +135,9 @@
   bool checkPre(const CallExpr *CE, const FunctionData &FData,
                 CheckerContext &C) const;
 
+  /// Add taint sources for operator>> on pre-visit.
+  bool addOverloadedOpPre(const CallExpr *CE, CheckerContext &C) const;
+
   /// Add taint sources on a pre-visit. Returns true on matching.
   bool addSourcesPre(const CallExpr *CE, const FunctionData &FData,
                      CheckerContext &C) const;
@@ -151,6 +154,9 @@
   /// and thus, is tainted.
   static bool isStdin(const Expr *E, CheckerContext &C);
 
+  /// Check if the type of the variable is std::basic_istream
+  static bool isStdstream(const Expr *E, CheckerContext &C);
+
   /// Given a pointer argument, return the value it points to.
   static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
 
@@ -258,12 +264,10 @@
 
     static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
                                            CheckerContext &C) {
-      if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
+      if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C) ||
+          isStdstream(E, C))
         return true;
 
-      if (!E->getType().getTypePtr()->isPointerType())
-        return false;
-
       Optional<SVal> V = getPointedToSVal(C, E);
       return (V && isTainted(State, *V));
     }
@@ -279,7 +283,20 @@
 
   /// Defines a map between the propagation function's name, scope
   /// and TaintPropagationRule.
-  NameRuleMap CustomPropagations;
+  NameRuleMap CustomPropagations{
+      {"c_str",
+       {"std::__cxx11::basic_string",
+        TaintPropagationRule({0}, {ReturnValueIndex})}},
+      {"data",
+       {"std::__cxx11::basic_string",
+        TaintPropagationRule({0}, {ReturnValueIndex})}},
+      {"size",
+       {"std::__cxx11::basic_string",
+        TaintPropagationRule({0}, {ReturnValueIndex})}},
+      {"length",
+       {"std::__cxx11::basic_string",
+        TaintPropagationRule({0}, {ReturnValueIndex})}},
+      {"getline", {"std::", TaintPropagationRule({0}, {1, ReturnValueIndex})}}};
 
   /// Defines a map between the filter function's name, scope and filtering
   /// args.
@@ -351,6 +368,23 @@
 /// points to data, which should be tainted on return.
 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
 
+/// Treat implicit this parameter as the 0. argument.
+const Expr *getArgWithImplicitThis(const CallExpr *CE, unsigned ArgNum) {
+  if (const auto *MCE = dyn_cast<CXXMemberCallExpr>(CE)) {
+    if (ArgNum == 0)
+      return MCE->getImplicitObjectArgument();
+    return MCE->getArg(ArgNum - 1);
+  }
+  return CE->getArg(ArgNum);
+}
+
+/// Class member functions has an implicit this parameteter.
+unsigned getNumArgsWithImplicitThis(const CallExpr *CE) {
+  if (isa<CXXMemberCallExpr>(CE))
+    return CE->getNumArgs() + 1;
+  return CE->getNumArgs();
+}
+
 GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector(
     CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) {
   ArgVector Result;
@@ -516,6 +550,9 @@
 
 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
                                        CheckerContext &C) const {
+  if (addOverloadedOpPre(CE, C))
+    return;
+
   Optional<FunctionData> FData = FunctionData::create(CE, C);
   if (!FData)
     return;
@@ -545,6 +582,31 @@
   printTaint(State, Out, NL, Sep);
 }
 
+bool GenericTaintChecker::addOverloadedOpPre(const CallExpr *CE,
+                                             CheckerContext &C) const {
+  const auto *OCE = dyn_cast<CXXOperatorCallExpr>(CE);
+  if (OCE) {
+    TaintPropagationRule Rule;
+    switch (OCE->getOperator()) {
+    case OO_GreaterGreater:
+      Rule = TaintPropagationRule({0}, {1, ReturnValueIndex});
+      break;
+    case OO_Equal:
+      Rule = TaintPropagationRule({1}, {0});
+      break;
+    default:
+      return false;
+    };
+
+    ProgramStateRef State = Rule.process(CE, C);
+    if (State) {
+      C.addTransition(State);
+      return true;
+    }
+  }
+  return false;
+}
+
 bool GenericTaintChecker::addSourcesPre(const CallExpr *CE,
                                         const FunctionData &FData,
                                         CheckerContext &C) const {
@@ -572,10 +634,10 @@
   const auto &Value = It->second;
   const ArgVector &Args = Value.second;
   for (unsigned ArgNum : Args) {
-    if (ArgNum >= CE->getNumArgs())
+    if (ArgNum >= getNumArgsWithImplicitThis(CE))
       continue;
 
-    const Expr *Arg = CE->getArg(ArgNum);
+    const Expr *Arg = getArgWithImplicitThis(CE, ArgNum);
     Optional<SVal> V = getPointedToSVal(C, Arg);
     if (V)
       State = removeTaint(State, *V);
@@ -608,9 +670,9 @@
 
     // The arguments are pointer arguments. The data they are pointing at is
     // tainted after the call.
-    if (CE->getNumArgs() < (ArgNum + 1))
+    if (getNumArgsWithImplicitThis(CE) < (ArgNum + 1))
       return false;
-    const Expr *Arg = CE->getArg(ArgNum);
+    const Expr *Arg = getArgWithImplicitThis(CE, ArgNum);
     Optional<SVal> V = getPointedToSVal(C, Arg);
     if (V)
       State = addTaint(State, *V);
@@ -678,18 +740,20 @@
   // Check for taint in arguments.
   bool IsTainted = true;
   for (unsigned ArgNum : SrcArgs) {
-    if (ArgNum >= CE->getNumArgs())
+    if (ArgNum >= getNumArgsWithImplicitThis(CE))
       continue;
 
-    if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
+    if ((IsTainted = isTaintedOrPointsToTainted(
+             getArgWithImplicitThis(CE, ArgNum), State, C)))
       break;
   }
 
   // Check for taint in variadic arguments.
   if (!IsTainted && VariadicType::Src == VarType) {
     // Check if any of the arguments is tainted
-    for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
-      if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
+    for (unsigned i = VariadicIndex; i < getNumArgsWithImplicitThis(CE); ++i) {
+      if ((IsTainted = isTaintedOrPointsToTainted(getArgWithImplicitThis(CE, i),
+                                                  State, C)))
         break;
     }
   }
@@ -708,7 +772,7 @@
       continue;
     }
 
-    if (ArgNum >= CE->getNumArgs())
+    if (ArgNum >= getNumArgsWithImplicitThis(CE))
       continue;
 
     // Mark the given argument.
@@ -721,8 +785,8 @@
     //   If they are not pointing to const data, mark data as tainted.
     //   TODO: So far we are just going one level down; ideally we'd need to
     //         recurse here.
-    for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
-      const Expr *Arg = CE->getArg(i);
+    for (unsigned i = VariadicIndex; i < getNumArgsWithImplicitThis(CE); ++i) {
+      const Expr *Arg = getArgWithImplicitThis(CE, i);
       // Process pointer argument.
       const Type *ArgTy = Arg->getType().getTypePtr();
       QualType PType = ArgTy->getPointeeType();
@@ -739,7 +803,7 @@
 bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
                                                            const CallExpr *CE,
                                                            CheckerContext &C) {
-  SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
+  SourceLocation DomLoc = getArgWithImplicitThis(CE, 0)->getExprLoc();
   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
   // White list the internal communication protocols.
   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
@@ -749,6 +813,11 @@
   return true;
 }
 
+bool GenericTaintChecker::isStdstream(const Expr *E, CheckerContext &C) {
+  const std::string CT = E->getType().getCanonicalType().getAsString();
+  return StringRef(CT).find("std::basic_istream") != StringRef::npos;
+}
+
 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
   ProgramStateRef State = C.getState();
   SVal Val = C.getSVal(E);
@@ -795,7 +864,8 @@
     return false;
   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
     ArgNum = Format->getFormatIdx() - 1;
-    if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
+    if ((Format->getType()->getName() == "printf") &&
+        getNumArgsWithImplicitThis(CE) > ArgNum)
       return true;
   }
 
@@ -844,7 +914,7 @@
 
   // If either the format string content or the pointer itself are tainted,
   // warn.
-  return generateReportIfTainted(CE->getArg(ArgNum),
+  return generateReportIfTainted(getArgWithImplicitThis(CE, ArgNum),
                                  MsgUncontrolledFormatString, C);
 }
 
@@ -866,10 +936,12 @@
                         .Case("dlopen", 0)
                         .Default(InvalidArgIndex);
 
-  if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1))
+  if (ArgNum == InvalidArgIndex ||
+      getNumArgsWithImplicitThis(CE) < (ArgNum + 1))
     return false;
 
-  return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
+  return generateReportIfTainted(getArgWithImplicitThis(CE, ArgNum),
+                                 MsgSanitizeSystemArgs, C);
 }
 
 // TODO: Should this check be a part of the CString checker?
@@ -907,8 +979,9 @@
       ArgNum = 2;
   }
 
-  return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
-         generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
+  return ArgNum != InvalidArgIndex && getNumArgsWithImplicitThis(CE) > ArgNum &&
+         generateReportIfTainted(getArgWithImplicitThis(CE, ArgNum),
+                                 MsgTaintedBufferSize, C);
 }
 
 bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE,
@@ -921,10 +994,11 @@
   const auto &Value = It->second;
   const GenericTaintChecker::ArgVector &Args = Value.second;
   for (unsigned ArgNum : Args) {
-    if (ArgNum >= CE->getNumArgs())
+    if (ArgNum >= getNumArgsWithImplicitThis(CE))
       continue;
 
-    if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C))
+    if (generateReportIfTainted(getArgWithImplicitThis(CE, ArgNum),
+                                MsgCustomSink, C))
       return true;
   }
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to