johannes updated this revision to Diff 110688.
johannes added a comment.

newline in error message


https://reviews.llvm.org/D36185

Files:
  include/clang/Tooling/ASTDiff/ASTDiff.h
  lib/Tooling/ASTDiff/ASTDiff.cpp
  test/Tooling/clang-diff-bottomup.cpp
  test/Tooling/clang-diff-opt.cpp
  test/Tooling/clang-diff-topdown.cpp
  tools/clang-diff/ClangDiff.cpp

Index: tools/clang-diff/ClangDiff.cpp
===================================================================
--- tools/clang-diff/ClangDiff.cpp
+++ tools/clang-diff/ClangDiff.cpp
@@ -50,6 +50,11 @@
                                             cl::Optional,
                                             cl::cat(ClangDiffCategory));
 
+static cl::opt<std::string> StopAfter("stop-after",
+                                      cl::desc("<topdown|bottomup>"),
+                                      cl::Optional, cl::init(""),
+                                      cl::cat(ClangDiffCategory));
+
 static cl::opt<int> MaxSize("s", cl::desc("<maxsize>"), cl::Optional,
                             cl::init(-1), cl::cat(ClangDiffCategory));
 
@@ -424,6 +429,14 @@
   diff::ComparisonOptions Options;
   if (MaxSize != -1)
     Options.MaxSize = MaxSize;
+  if (!StopAfter.empty()) {
+    if (StopAfter == "topdown")
+      Options.StopAfterTopDown = true;
+    else if (StopAfter != "bottomup") {
+      llvm::errs() << "Error: Invalid argument for -stop-after\n";
+      return 1;
+    }
+  }
   diff::SyntaxTree SrcTree(Src->getASTContext());
   diff::SyntaxTree DstTree(Dst->getASTContext());
   diff::ASTDiff Diff(SrcTree, DstTree, Options);
Index: test/Tooling/clang-diff-topdown.cpp
===================================================================
--- /dev/null
+++ test/Tooling/clang-diff-topdown.cpp
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -stop-after=topdown %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the top-down matching of identical subtrees only.
+
+#ifndef DEST
+
+void f1()
+{
+  // Match some subtree of height greater than 2.
+  // CHECK: Match CompoundStmt(3) to CompoundStmt(3)
+  // CHECK: Match CompoundStmt(4) to CompoundStmt(4)
+  // CHECK: Match NullStmt(5) to NullStmt(5)
+  {{;}}
+
+  // Don't match subtrees that are smaller.
+  // CHECK-NOT: Match CompoundStmt(6)
+  // CHECK-NOT: Match NullStmt(7)
+  {;}
+
+  // Greedy approach - use the first matching subtree when there are multiple
+  // identical subtrees.
+  // CHECK: Match CompoundStmt(8) to CompoundStmt(8)
+  // CHECK: Match CompoundStmt(9) to CompoundStmt(9)
+  // CHECK: Match NullStmt(10) to NullStmt(10)
+  {{;;}}
+}
+
+#else
+
+void f1() {
+
+  {{;}}
+
+  {;}
+
+  {{;;}}
+  // CHECK-NOT: Match {{.*}} to CompoundStmt(11)
+  // CHECK-NOT: Match {{.*}} to CompoundStmt(12)
+  // CHECK-NOT: Match {{.*}} to NullStmt(13)
+  {{;;}}
+
+  // CHECK-NOT: Match {{.*}} to NullStmt(14)
+  ;
+}
+
+#endif
Index: test/Tooling/clang-diff-opt.cpp
===================================================================
--- /dev/null
+++ test/Tooling/clang-diff-opt.cpp
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -s=10 %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the behaviour of the matching according to the optimal tree edit
+// distance, implemented with Zhang and Shasha's algorithm.
+// Just for testing we use a tiny value of 10 for maxsize. Subtrees bigger than
+// this size will not be processed by the optimal algorithm.
+
+#ifndef DEST
+
+void f1() { {;} {{;}} }
+
+void f2() { {;} {{;;;;;}} }
+
+void f3() { {;} {{;;;;;;}} }
+
+#else
+
+void f1() {
+// Jaccard similarity = 3 / (5 + 4 - 3) = 3 / 6 >= 0.5
+// The optimal matching algorithm should move the ; into the outer block
+// CHECK: Match CompoundStmt(2) to CompoundStmt(2)
+// CHECK-NOT: Match CompoundStmt(3)
+// CHECK-NEXT: Match NullStmt(4) to NullStmt(3)
+  ; {{;}}
+}
+ 
+void f2() {
+  // Jaccard similarity = 7 / (10 + 10 - 7) >= 0.5
+  // As none of the subtrees is bigger than 10 nodes, the optimal algorithm
+  // will be run.
+  // CHECK: Match NullStmt(11) to NullStmt(9)
+  ;; {{;;;;;}}
+}
+
+void f3() {
+  // Jaccard similarity = 8 / (11 + 11 - 8) >= 0.5
+  // As the subtrees are bigger than 10 nodes, the optimal algorithm will not
+  // be run.
+  // CHECK: Delete NullStmt(22)
+  ;; {{;;;;;;}}
+}
+#endif
Index: test/Tooling/clang-diff-bottomup.cpp
===================================================================
--- /dev/null
+++ test/Tooling/clang-diff-bottomup.cpp
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -s=0 %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the bottom-up matching, with maxsize set to 0, so that the optimal matching will never be applied.
+
+#ifndef DEST
+
+void f1() { ; {{;}} }
+void f2() { ;; {{;}} }
+
+#else
+
+// Jaccard similarity threshold is 0.5.
+
+void f1() {
+// CompoundStmt: 3 matched descendants, subtree sizes 4 and 5
+// Jaccard similarity = 3 / (4 + 5 - 3) = 3 / 6 >= 0.5
+// CHECK: Match FunctionDecl: f1(void (void))(1) to FunctionDecl: f1(void (void))(1)
+// CHECK: Match CompoundStmt(2) to CompoundStmt(2)
+// CHECK: Match CompoundStmt(4) to CompoundStmt(3)
+// CHECK: Match CompoundStmt(5) to CompoundStmt(4)
+// CHECK: Match NullStmt(6) to NullStmt(5)
+  {{;}} ;;
+}
+
+void f2() {
+// CompoundStmt: 3 matched descendants, subtree sizes 4 and 5
+// Jaccard similarity = 3 / (5 + 6 - 3) = 3 / 8 < 0.5
+// CHECK-NOT: Match FunctionDecl(9)
+// CHECK-NOT: Match CompoundStmt(10)
+// CHECK: Match CompoundStmt(11) to CompoundStmt(10)
+// CHECK: Match CompoundStmt(12) to CompoundStmt(11)
+// CHECK: Match NullStmt(13) to NullStmt(12)
+// CHECK-NOT: Match NullStmt(13)
+  {{;}} ;;;
+}
+ 
+#endif
Index: lib/Tooling/ASTDiff/ASTDiff.cpp
===================================================================
--- lib/Tooling/ASTDiff/ASTDiff.cpp
+++ lib/Tooling/ASTDiff/ASTDiff.cpp
@@ -92,7 +92,7 @@
 
   // Computes the ratio of common descendants between the two nodes.
   // Descendants are only considered to be equal when they are mapped in M.
-  double getSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const;
+  double getJaccardSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const;
 
   // Returns the node that has the highest degree of similarity.
   NodeId findCandidate(const Mapping &M, NodeId Id1) const;
@@ -714,8 +714,8 @@
 
 void ASTDiff::Impl::addOptimalMapping(Mapping &M, NodeId Id1,
                                       NodeId Id2) const {
-  if (std::max(T1.getNumberOfDescendants(Id1),
-               T2.getNumberOfDescendants(Id2)) >= Options.MaxSize)
+  if (std::max(T1.getNumberOfDescendants(Id1), T2.getNumberOfDescendants(Id2)) >
+      Options.MaxSize)
     return;
   ZhangShashaMatcher Matcher(*this, T1, T2, Id1, Id2);
   std::vector<std::pair<NodeId, NodeId>> R = Matcher.getMatchingNodes();
@@ -727,16 +727,19 @@
   }
 }
 
-double ASTDiff::Impl::getSimilarity(const Mapping &M, NodeId Id1,
-                                    NodeId Id2) const {
-  if (Id1.isInvalid() || Id2.isInvalid())
-    return 0.0;
+double ASTDiff::Impl::getJaccardSimilarity(const Mapping &M, NodeId Id1,
+                                           NodeId Id2) const {
   int CommonDescendants = 0;
   const Node &N1 = T1.getNode(Id1);
-  for (NodeId Id = Id1 + 1; Id <= N1.RightMostDescendant; ++Id)
-    CommonDescendants += int(T2.isInSubtree(M.getDst(Id), Id2));
-  return 2.0 * CommonDescendants /
-         (T1.getNumberOfDescendants(Id1) + T2.getNumberOfDescendants(Id2));
+  for (NodeId Src = Id1 + 1; Src <= N1.RightMostDescendant; ++Src) {
+    NodeId Dst = M.getDst(Src);
+    CommonDescendants += int(Dst.isValid() && T2.isInSubtree(Dst, Id2));
+  }
+  double Denominator = T1.getNumberOfDescendants(Id1) - 1 +
+                       T2.getNumberOfDescendants(Id2) - 1 - CommonDescendants;
+  if (Denominator == 0)
+    return 0;
+  return CommonDescendants / Denominator;
 }
 
 NodeId ASTDiff::Impl::findCandidate(const Mapping &M, NodeId Id1) const {
@@ -747,7 +750,7 @@
       continue;
     if (M.hasDst(Id2))
       continue;
-    double Similarity = getSimilarity(M, Id1, Id2);
+    double Similarity = getJaccardSimilarity(M, Id1, Id2);
     if (Similarity >= Options.MinSimilarity && Similarity > HighestSimilarity) {
       HighestSimilarity = Similarity;
       Candidate = Id2;
@@ -767,8 +770,8 @@
       }
       break;
     }
-    const Node &N1 = T1.getNode(Id1);
     bool Matched = M.hasSrc(Id1);
+    const Node &N1 = T1.getNode(Id1);
     bool MatchedChildren =
         std::any_of(N1.Children.begin(), N1.Children.end(),
                     [&](NodeId Child) { return M.hasSrc(Child); });
@@ -836,6 +839,8 @@
 
 void ASTDiff::Impl::computeMapping() {
   TheMapping = matchTopDown();
+  if (Options.StopAfterTopDown)
+    return;
   matchBottomUp(TheMapping);
 }
 
Index: include/clang/Tooling/ASTDiff/ASTDiff.h
===================================================================
--- include/clang/Tooling/ASTDiff/ASTDiff.h
+++ include/clang/Tooling/ASTDiff/ASTDiff.h
@@ -105,12 +105,14 @@
 
   /// During bottom-up matching, match only nodes with at least this value as
   /// the ratio of their common descendants.
-  double MinSimilarity = 0.2;
+  double MinSimilarity = 0.5;
 
   /// Whenever two subtrees are matched in the bottom-up phase, the optimal
   /// mapping is computed, unless the size of either subtrees exceeds this.
   int MaxSize = 100;
 
+  bool StopAfterTopDown = false;
+
   /// Returns false if the nodes should never be matched.
   bool isMatchingAllowed(const Node &N1, const Node &N2) const {
     return N1.getType().isSame(N2.getType());
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to