teemperor created this revision.
teemperor added reviewers: v.g.vassilev, zaks.anna, NoQ.
teemperor added a subscriber: cfe-commits.

So far the CloneDetector only respected the class of each statement when 
searching for clones. This means that nodes that differentiate in any other 
attribute are falsely considered to be clones of each other as long as they had 
the same statement class. As an example, the statements "a > b" and "a < b" are 
currently considered to be clones of each other, even though they are obviously 
different.

This patch refines the way the CloneDetector collects data from each statement 
by providing methods for each class that will read the class-specific 
attributes. These attributes are for example the operator kind for 
BinaryOperator statements which would prevent the false-positive result from 
the example above.

The code itself is contained in the StmtDataCollector class because it will be 
reused in a future patch that searches for false-positives.

https://reviews.llvm.org/D22514

Files:
  lib/Analysis/CloneDetection.cpp
  test/Analysis/copypaste/test-min-max.cpp

Index: test/Analysis/copypaste/test-min-max.cpp
===================================================================
--- test/Analysis/copypaste/test-min-max.cpp
+++ test/Analysis/copypaste/test-min-max.cpp
@@ -18,14 +18,6 @@
 
 // False positives below. These clones should not be reported.
 
-// FIXME: Detect different binary operator kinds.
-int min1(int a, int b) { // expected-note{{Related code clone is here.}}
-  log();
-  if (a < b)
-    return a;
-  return b;
-}
-
 // FIXME: Detect different variable patterns.
 int min2(int a, int b) { // expected-note{{Related code clone is here.}}
   log();
@@ -36,6 +28,13 @@
 
 // Functions below are not clones and should not be reported.
 
+int min1(int a, int b) { // no-warning
+  log();
+  if (a < b)
+    return a;
+  return b;
+}
+
 int foo(int a, int b) { // no-warning
   return a + b;
 }
Index: lib/Analysis/CloneDetection.cpp
===================================================================
--- lib/Analysis/CloneDetection.cpp
+++ lib/Analysis/CloneDetection.cpp
@@ -78,6 +78,199 @@
 SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); }
 
 namespace {
+/// \brief Defines empty addData methods for each Stmt class.
+///
+/// StmtDataCollector inherits from this class so that each Stmt has an empty
+/// fallback addData method. This prevents compilation failures from missing
+/// methods if a new Stmt subclass is added to clang. Also, many subclasses
+/// don't have any special data attached to them, so we don't need to manually
+/// define empty addData methods in StmtDataCollector if they are already
+/// defined here.
+class StmtDataCollectorDefiner {
+protected:
+  #define STMT(CLASS, PARENT)                                                  \
+  void addData##CLASS(CLASS const *S) {                                        \
+  }
+  #include "clang/AST/StmtNodes.inc"
+};
+} // end anonymous namespace
+
+namespace {
+/// \brief Collects the data of a single Stmt.
+///
+/// The data of a statement that isn't collected by this class is considered to
+/// be unimportant when comparing two statements. This means that this class
+/// defines what a 'similar' clone is. For example, this class doesn't collect
+/// names of variables for example, which makes statements that only differ in
+/// the names of the referenced variables clones of each other.
+class StmtDataCollector : StmtDataCollectorDefiner {
+  ASTContext &Context;
+  llvm::FoldingSetNodeID &D;
+
+public:
+
+  /// \brief Collects data from the given Stmt and saves it into the given
+  ///        FoldingSetNodeID.
+  StmtDataCollector(Stmt const *S, ASTContext &Context,
+                    llvm::FoldingSetNodeID &D) : Context(Context), D(D) {
+    collectData(S);
+  }
+
+protected:
+  // Below are utility methods for adding generic data to the FoldingSetNodeID.
+
+  void addData(unsigned Data) {
+    D.AddInteger(Data);
+  }
+
+  void addData(llvm::StringRef const &String) {
+    D.AddString(String);
+  }
+
+  void addData(std::string const &String) {
+    D.AddString(String);
+  }
+
+  void addData(QualType QT) {
+    addData(QT.getAsString());
+  }
+
+  // Utility functions for calling the addData method for each parent class
+  // of a given Stmt class.
+  #define STMT(CLASS, PARENT)                                                  \
+  void callAddData##CLASS(CLASS const *S) {                                    \
+    callAddData##PARENT(S);                                                    \
+    addData##CLASS(S);                                                         \
+  }
+  #include "clang/AST/StmtNodes.inc"
+
+  // Above code won't define callAddDataStmt, so we have to manually define it.
+  void callAddDataStmt(Stmt const *S) {
+    addDataStmt(S);
+  }
+
+  /// \brief Calls the appropriate callAddData method for the actual class of
+  ///        the given Stmt.
+  void collectData(Stmt const *S) {
+    switch(S->getStmtClass()) {
+      case Stmt::NoStmtClass:
+        break;
+    #define ABSTRACT_STMT(STMT)
+    #define STMT(CLASS, PARENT)                                                \
+      case Stmt::CLASS##Class:                                                 \
+        callAddData##CLASS(cast<CLASS const>(S)); break;
+    #include "clang/AST/StmtNodes.inc"
+    }
+  }
+
+  // Below are functions that collect the specific data for each Stmt subclass.
+  #define DEF_ADD_DATA(CLASS, CODE)                                            \
+  void addData##CLASS(CLASS const *S) {                                        \
+    CODE;                                                                      \
+  }
+
+  DEF_ADD_DATA(Stmt, { addData(S->getStmtClass()); })
+  DEF_ADD_DATA(Expr, { addData(S->getType()); })
+
+  //--- Builtin functionality ------------------------------------------------//
+  DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); })
+  DEF_ADD_DATA(AtomicExpr, {
+    addData(S->isVolatile());
+    addData(S->getOp());
+  })
+  DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); })
+  DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); })
+  DEF_ADD_DATA(TypeTraitExpr, { addData(S->getTrait()); })
+
+  //--- Calls ----------------------------------------------------------------//
+  DEF_ADD_DATA(CXXOperatorCallExpr, { addData(S->getOperator()); })
+  DEF_ADD_DATA(CallExpr, {
+    addData(S->getDirectCallee()->getQualifiedNameAsString());
+  })
+
+  //--- Exceptions -----------------------------------------------------------//
+  DEF_ADD_DATA(CXXCatchStmt, { addData(S->getCaughtType()); })
+
+  //--- C++ OOP Stmts --------------------------------------------------------//
+  DEF_ADD_DATA(CXXDeleteExpr, {
+    addData(S->isArrayFormAsWritten());
+    addData(S->isGlobalDelete());
+  })
+
+  //--- Casts ----------------------------------------------------------------//
+  DEF_ADD_DATA(ObjCBridgedCastExpr, { addData(S->getBridgeKind()); })
+
+  //--- Miscellaneous Exprs --------------------------------------------------//
+  DEF_ADD_DATA(BinaryOperator, { addData(S->getOpcode()); })
+  DEF_ADD_DATA(UnaryOperator, { addData(S->getOpcode()); })
+
+  //--- Control flow ---------------------------------------------------------//
+  DEF_ADD_DATA(GotoStmt, { addData(S->getLabel()->getName()); })
+  DEF_ADD_DATA(IndirectGotoStmt, {
+                 if (S->getConstantTarget())
+                   addData(S->getConstantTarget()->getName());
+               })
+  DEF_ADD_DATA(LabelStmt, { addData(S->getDecl()->getName()); })
+  DEF_ADD_DATA(MSDependentExistsStmt, { addData(S->isIfExists()); })
+  DEF_ADD_DATA(AddrLabelExpr, { addData(S->getLabel()->getName()); })
+
+  //--- Objective-C ----------------------------------------------------------//
+  DEF_ADD_DATA(ObjCIndirectCopyRestoreExpr, { addData(S->shouldCopy()); })
+  DEF_ADD_DATA(ObjCPropertyRefExpr, {
+    addData(S->isSuperReceiver());
+    addData(S->isImplicitProperty());
+  })
+  DEF_ADD_DATA(ObjCAtCatchStmt, { addData(S->hasEllipsis()); })
+
+  //--- Miscellaneous Stmts --------------------------------------------------//
+  DEF_ADD_DATA(CXXFoldExpr, {
+    addData(S->isRightFold());
+    addData(S->getOperator());
+  })
+  DEF_ADD_DATA(GenericSelectionExpr, { addData(S->getNumAssocs()); })
+  DEF_ADD_DATA(LambdaExpr, {
+    for (const LambdaCapture &C : S->captures()) {
+      addData(C.isPackExpansion());
+      addData(C.getCaptureKind());
+      if (C.capturesVariable())
+        addData(C.getCapturedVar()->getType());
+    }
+    addData(S->isGenericLambda());
+    addData(S->isMutable());
+    addData(S->getCallOperator()->param_size());
+  })
+  DEF_ADD_DATA(DeclStmt, {
+    auto numDecls = std::distance(S->decl_begin(), S->decl_end());
+    addData(static_cast<unsigned>(numDecls));
+    for (Decl *D : S->decls()) {
+      if (VarDecl* VD = dyn_cast<VarDecl>(D)) {
+        addData(VD->getType());
+      }
+    }
+  })
+  DEF_ADD_DATA(AsmStmt, {
+    addData(S->isSimple());
+    addData(S->isVolatile());
+    addData(S->generateAsmString(Context));
+    for (unsigned i = 0; i < S->getNumInputs(); ++i) {
+      addData(S->getInputConstraint(i));
+    }
+    for (unsigned i = 0; i < S->getNumOutputs(); ++i) {
+      addData(S->getOutputConstraint(i));
+    }
+    for (unsigned i = 0; i < S->getNumClobbers(); ++i) {
+      addData(S->getClobber(i));
+    }
+  })
+  DEF_ADD_DATA(AttributedStmt, {
+    for (Attr const * A : S->getAttrs()) {
+      addData(std::string(A->getSpelling()));
+    }
+  })
+};
+} // end anonymous namespace
+
+namespace {
 /// \brief A cache for temporarily storing a small amount of CloneSignatures.
 ///
 /// This cache should be manually cleaned from CloneSignatures that are
@@ -110,6 +303,8 @@
   /// \param Context The ASTContext that contains Parent.
   void removeChildren(Stmt const *Parent, ASTContext &Context) {
     for (Stmt const *Child : Parent->children()) {
+      if (!Child)
+        continue;
       StmtSequence ChildSequence(Child, Context);
       remove(ChildSequence);
     }
@@ -126,7 +321,13 @@
         return *I;
       }
     }
-    llvm_unreachable("Couldn't find CloneSignature for StmtSequence");
+    // We return an empty signature on a cache miss. This isn't a perfect
+    // solution, but it's better than crashing when RecursiveASTVisitor is
+    // skipping some generated statements when generating signatures (which
+    // leads to this situation where signatures are missing in the cache).
+    // Unless RecursiveASTVisitor starts skipping important nodes, this won't
+    // influence the false-positive rate.
+    return CloneDetector::CloneSignature();
   }
 };
 } // end anonymous namespace
@@ -249,10 +450,7 @@
   llvm::FoldingSetNodeID Hash;
   unsigned Complexity = 1;
 
-  // The only relevant data for now is the class of the statement, so we
-  // calculate the hash class into the current hash value.
-  // TODO: Handle statement class specific data.
-  Hash.AddInteger(S->getStmtClass());
+  StmtDataCollector(S, Context, Hash);
 
   // Incorporate the hash values of all child Stmts into the current hash value.
   HandleChildHashes(Hash, Complexity, S->children());
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to