[PATCH] D36185: [clang-diff] Fix similarity computation

2017-08-20 Thread Phabricator via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL311284: [clang-diff] Fix similarity computation (authored by 
krobelus).

Changed prior to commit:
  https://reviews.llvm.org/D36185?vs=110951=111877#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D36185

Files:
  cfe/trunk/include/clang/Tooling/ASTDiff/ASTDiff.h
  cfe/trunk/lib/Tooling/ASTDiff/ASTDiff.cpp
  cfe/trunk/test/Tooling/clang-diff-bottomup.cpp
  cfe/trunk/test/Tooling/clang-diff-opt.cpp
  cfe/trunk/test/Tooling/clang-diff-topdown.cpp
  cfe/trunk/tools/clang-diff/ClangDiff.cpp

Index: cfe/trunk/tools/clang-diff/ClangDiff.cpp
===
--- cfe/trunk/tools/clang-diff/ClangDiff.cpp
+++ cfe/trunk/tools/clang-diff/ClangDiff.cpp
@@ -50,6 +50,11 @@
 cl::Optional,
 cl::cat(ClangDiffCategory));
 
+static cl::opt StopAfter("stop-after",
+  cl::desc(""),
+  cl::Optional, cl::init(""),
+  cl::cat(ClangDiffCategory));
+
 static cl::opt MaxSize("s", cl::desc(""), cl::Optional,
 cl::init(-1), cl::cat(ClangDiffCategory));
 
@@ -425,6 +430,14 @@
   diff::ComparisonOptions Options;
   if (MaxSize != -1)
 Options.MaxSize = MaxSize;
+  if (!StopAfter.empty()) {
+if (StopAfter == "topdown")
+  Options.StopAfterTopDown = true;
+else if (StopAfter != "bottomup") {
+  llvm::errs() << "Error: Invalid argument for -stop-after\n";
+  return 1;
+}
+  }
   diff::SyntaxTree SrcTree(Src->getASTContext());
   diff::SyntaxTree DstTree(Dst->getASTContext());
   diff::ASTDiff Diff(SrcTree, DstTree, Options);
Index: cfe/trunk/include/clang/Tooling/ASTDiff/ASTDiff.h
===
--- cfe/trunk/include/clang/Tooling/ASTDiff/ASTDiff.h
+++ cfe/trunk/include/clang/Tooling/ASTDiff/ASTDiff.h
@@ -105,12 +105,14 @@
 
   /// During bottom-up matching, match only nodes with at least this value as
   /// the ratio of their common descendants.
-  double MinSimilarity = 0.2;
+  double MinSimilarity = 0.5;
 
   /// Whenever two subtrees are matched in the bottom-up phase, the optimal
   /// mapping is computed, unless the size of either subtrees exceeds this.
   int MaxSize = 100;
 
+  bool StopAfterTopDown = false;
+
   /// Returns false if the nodes should never be matched.
   bool isMatchingAllowed(const Node , const Node ) const {
 return N1.getType().isSame(N2.getType());
Index: cfe/trunk/test/Tooling/clang-diff-bottomup.cpp
===
--- cfe/trunk/test/Tooling/clang-diff-bottomup.cpp
+++ cfe/trunk/test/Tooling/clang-diff-bottomup.cpp
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -s=0 %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the bottom-up matching, with maxsize set to 0, so that the optimal matching will never be applied.
+
+#ifndef DEST
+
+void f1() { ; {{;}} }
+void f2() { ;; {{;}} }
+
+#else
+
+// Jaccard similarity threshold is 0.5.
+
+void f1() {
+// CompoundStmt: 3 matched descendants, subtree sizes 4 and 5
+// Jaccard similarity = 3 / (4 + 5 - 3) = 3 / 6 >= 0.5
+// CHECK: Match FunctionDecl: f1(void (void))(1) to FunctionDecl: f1(void (void))(1)
+// CHECK: Match CompoundStmt(2) to CompoundStmt(2)
+// CHECK: Match CompoundStmt(4) to CompoundStmt(3)
+// CHECK: Match CompoundStmt(5) to CompoundStmt(4)
+// CHECK: Match NullStmt(6) to NullStmt(5)
+  {{;}} ;;
+}
+
+void f2() {
+// CompoundStmt: 3 matched descendants, subtree sizes 4 and 5
+// Jaccard similarity = 3 / (5 + 6 - 3) = 3 / 8 < 0.5
+// CHECK-NOT: Match FunctionDecl(9)
+// CHECK-NOT: Match CompoundStmt(10)
+// CHECK: Match CompoundStmt(11) to CompoundStmt(10)
+// CHECK: Match CompoundStmt(12) to CompoundStmt(11)
+// CHECK: Match NullStmt(13) to NullStmt(12)
+// CHECK-NOT: Match NullStmt(13)
+  {{;}} ;;;
+}
+
+#endif
Index: cfe/trunk/test/Tooling/clang-diff-opt.cpp
===
--- cfe/trunk/test/Tooling/clang-diff-opt.cpp
+++ cfe/trunk/test/Tooling/clang-diff-opt.cpp
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -s=10 %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the behaviour of the matching according to the optimal tree edit
+// distance, implemented with Zhang and Shasha's algorithm.
+// Just for testing we use a tiny value of 10 for maxsize. Subtrees bigger than
+// this size will not be processed by the optimal algorithm.
+
+#ifndef DEST
+
+void f1() { {;} {{;}} }
+
+void f2() { {;} {{;}} }
+
+void f3() { {;} {{;;}} }
+
+#else
+
+void f1() {
+// Jaccard similarity = 3 / (5 + 4 - 3) = 3 

[PATCH] D36185: [clang-diff] Fix similarity computation

2017-08-14 Thread Johannes Altmanninger via Phabricator via cfe-commits
johannes updated this revision to Diff 110951.
johannes added a comment.

comment getJaccardSimilarity


https://reviews.llvm.org/D36185

Files:
  include/clang/Tooling/ASTDiff/ASTDiff.h
  lib/Tooling/ASTDiff/ASTDiff.cpp
  test/Tooling/clang-diff-bottomup.cpp
  test/Tooling/clang-diff-opt.cpp
  test/Tooling/clang-diff-topdown.cpp
  tools/clang-diff/ClangDiff.cpp

Index: tools/clang-diff/ClangDiff.cpp
===
--- tools/clang-diff/ClangDiff.cpp
+++ tools/clang-diff/ClangDiff.cpp
@@ -50,6 +50,11 @@
 cl::Optional,
 cl::cat(ClangDiffCategory));
 
+static cl::opt StopAfter("stop-after",
+  cl::desc(""),
+  cl::Optional, cl::init(""),
+  cl::cat(ClangDiffCategory));
+
 static cl::opt MaxSize("s", cl::desc(""), cl::Optional,
 cl::init(-1), cl::cat(ClangDiffCategory));
 
@@ -424,6 +429,14 @@
   diff::ComparisonOptions Options;
   if (MaxSize != -1)
 Options.MaxSize = MaxSize;
+  if (!StopAfter.empty()) {
+if (StopAfter == "topdown")
+  Options.StopAfterTopDown = true;
+else if (StopAfter != "bottomup") {
+  llvm::errs() << "Error: Invalid argument for -stop-after\n";
+  return 1;
+}
+  }
   diff::SyntaxTree SrcTree(Src->getASTContext());
   diff::SyntaxTree DstTree(Dst->getASTContext());
   diff::ASTDiff Diff(SrcTree, DstTree, Options);
Index: test/Tooling/clang-diff-topdown.cpp
===
--- /dev/null
+++ test/Tooling/clang-diff-topdown.cpp
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -stop-after=topdown %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the top-down matching of identical subtrees only.
+
+#ifndef DEST
+
+void f1()
+{
+  // Match some subtree of height greater than 2.
+  // CHECK: Match CompoundStmt(3) to CompoundStmt(3)
+  // CHECK: Match CompoundStmt(4) to CompoundStmt(4)
+  // CHECK: Match NullStmt(5) to NullStmt(5)
+  {{;}}
+
+  // Don't match subtrees that are smaller.
+  // CHECK-NOT: Match CompoundStmt(6)
+  // CHECK-NOT: Match NullStmt(7)
+  {;}
+
+  // Greedy approach - use the first matching subtree when there are multiple
+  // identical subtrees.
+  // CHECK: Match CompoundStmt(8) to CompoundStmt(8)
+  // CHECK: Match CompoundStmt(9) to CompoundStmt(9)
+  // CHECK: Match NullStmt(10) to NullStmt(10)
+  {{;;}}
+}
+
+#else
+
+void f1() {
+
+  {{;}}
+
+  {;}
+
+  {{;;}}
+  // CHECK-NOT: Match {{.*}} to CompoundStmt(11)
+  // CHECK-NOT: Match {{.*}} to CompoundStmt(12)
+  // CHECK-NOT: Match {{.*}} to NullStmt(13)
+  {{;;}}
+
+  // CHECK-NOT: Match {{.*}} to NullStmt(14)
+  ;
+}
+
+#endif
Index: test/Tooling/clang-diff-opt.cpp
===
--- /dev/null
+++ test/Tooling/clang-diff-opt.cpp
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -s=10 %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the behaviour of the matching according to the optimal tree edit
+// distance, implemented with Zhang and Shasha's algorithm.
+// Just for testing we use a tiny value of 10 for maxsize. Subtrees bigger than
+// this size will not be processed by the optimal algorithm.
+
+#ifndef DEST
+
+void f1() { {;} {{;}} }
+
+void f2() { {;} {{;}} }
+
+void f3() { {;} {{;;}} }
+
+#else
+
+void f1() {
+// Jaccard similarity = 3 / (5 + 4 - 3) = 3 / 6 >= 0.5
+// The optimal matching algorithm should move the ; into the outer block
+// CHECK: Match CompoundStmt(2) to CompoundStmt(2)
+// CHECK-NOT: Match CompoundStmt(3)
+// CHECK-NEXT: Match NullStmt(4) to NullStmt(3)
+  ; {{;}}
+}
+ 
+void f2() {
+  // Jaccard similarity = 7 / (10 + 10 - 7) >= 0.5
+  // As none of the subtrees is bigger than 10 nodes, the optimal algorithm
+  // will be run.
+  // CHECK: Match NullStmt(11) to NullStmt(9)
+  ;; {{;}}
+}
+
+void f3() {
+  // Jaccard similarity = 8 / (11 + 11 - 8) >= 0.5
+  // As the subtrees are bigger than 10 nodes, the optimal algorithm will not
+  // be run.
+  // CHECK: Delete NullStmt(22)
+  ;; {{;;}}
+}
+#endif
Index: test/Tooling/clang-diff-bottomup.cpp
===
--- /dev/null
+++ test/Tooling/clang-diff-bottomup.cpp
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -s=0 %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the bottom-up matching, with maxsize set to 0, so that the optimal matching will never be applied.
+
+#ifndef DEST
+
+void f1() { ; {{;}} }
+void f2() { ;; {{;}} }
+
+#else
+
+// Jaccard similarity threshold is 0.5.
+
+void 

[PATCH] D36185: [clang-diff] Fix similarity computation

2017-08-14 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman accepted this revision.
arphaman added a comment.
This revision is now accepted and ready to land.

LGTM.




Comment at: lib/Tooling/ASTDiff/ASTDiff.cpp:738
+  }
+  double Denominator = T1.getNumberOfDescendants(Id1) - 1 +
+   T2.getNumberOfDescendants(Id2) - 1 - CommonDescendants;

Can denominator even be negative here? If no, please assert correspondingly. 


https://reviews.llvm.org/D36185



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D36185: [clang-diff] Fix similarity computation

2017-08-11 Thread Johannes Altmanninger via Phabricator via cfe-commits
johannes updated this revision to Diff 110688.
johannes added a comment.

newline in error message


https://reviews.llvm.org/D36185

Files:
  include/clang/Tooling/ASTDiff/ASTDiff.h
  lib/Tooling/ASTDiff/ASTDiff.cpp
  test/Tooling/clang-diff-bottomup.cpp
  test/Tooling/clang-diff-opt.cpp
  test/Tooling/clang-diff-topdown.cpp
  tools/clang-diff/ClangDiff.cpp

Index: tools/clang-diff/ClangDiff.cpp
===
--- tools/clang-diff/ClangDiff.cpp
+++ tools/clang-diff/ClangDiff.cpp
@@ -50,6 +50,11 @@
 cl::Optional,
 cl::cat(ClangDiffCategory));
 
+static cl::opt StopAfter("stop-after",
+  cl::desc(""),
+  cl::Optional, cl::init(""),
+  cl::cat(ClangDiffCategory));
+
 static cl::opt MaxSize("s", cl::desc(""), cl::Optional,
 cl::init(-1), cl::cat(ClangDiffCategory));
 
@@ -424,6 +429,14 @@
   diff::ComparisonOptions Options;
   if (MaxSize != -1)
 Options.MaxSize = MaxSize;
+  if (!StopAfter.empty()) {
+if (StopAfter == "topdown")
+  Options.StopAfterTopDown = true;
+else if (StopAfter != "bottomup") {
+  llvm::errs() << "Error: Invalid argument for -stop-after\n";
+  return 1;
+}
+  }
   diff::SyntaxTree SrcTree(Src->getASTContext());
   diff::SyntaxTree DstTree(Dst->getASTContext());
   diff::ASTDiff Diff(SrcTree, DstTree, Options);
Index: test/Tooling/clang-diff-topdown.cpp
===
--- /dev/null
+++ test/Tooling/clang-diff-topdown.cpp
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -stop-after=topdown %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the top-down matching of identical subtrees only.
+
+#ifndef DEST
+
+void f1()
+{
+  // Match some subtree of height greater than 2.
+  // CHECK: Match CompoundStmt(3) to CompoundStmt(3)
+  // CHECK: Match CompoundStmt(4) to CompoundStmt(4)
+  // CHECK: Match NullStmt(5) to NullStmt(5)
+  {{;}}
+
+  // Don't match subtrees that are smaller.
+  // CHECK-NOT: Match CompoundStmt(6)
+  // CHECK-NOT: Match NullStmt(7)
+  {;}
+
+  // Greedy approach - use the first matching subtree when there are multiple
+  // identical subtrees.
+  // CHECK: Match CompoundStmt(8) to CompoundStmt(8)
+  // CHECK: Match CompoundStmt(9) to CompoundStmt(9)
+  // CHECK: Match NullStmt(10) to NullStmt(10)
+  {{;;}}
+}
+
+#else
+
+void f1() {
+
+  {{;}}
+
+  {;}
+
+  {{;;}}
+  // CHECK-NOT: Match {{.*}} to CompoundStmt(11)
+  // CHECK-NOT: Match {{.*}} to CompoundStmt(12)
+  // CHECK-NOT: Match {{.*}} to NullStmt(13)
+  {{;;}}
+
+  // CHECK-NOT: Match {{.*}} to NullStmt(14)
+  ;
+}
+
+#endif
Index: test/Tooling/clang-diff-opt.cpp
===
--- /dev/null
+++ test/Tooling/clang-diff-opt.cpp
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -s=10 %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the behaviour of the matching according to the optimal tree edit
+// distance, implemented with Zhang and Shasha's algorithm.
+// Just for testing we use a tiny value of 10 for maxsize. Subtrees bigger than
+// this size will not be processed by the optimal algorithm.
+
+#ifndef DEST
+
+void f1() { {;} {{;}} }
+
+void f2() { {;} {{;}} }
+
+void f3() { {;} {{;;}} }
+
+#else
+
+void f1() {
+// Jaccard similarity = 3 / (5 + 4 - 3) = 3 / 6 >= 0.5
+// The optimal matching algorithm should move the ; into the outer block
+// CHECK: Match CompoundStmt(2) to CompoundStmt(2)
+// CHECK-NOT: Match CompoundStmt(3)
+// CHECK-NEXT: Match NullStmt(4) to NullStmt(3)
+  ; {{;}}
+}
+ 
+void f2() {
+  // Jaccard similarity = 7 / (10 + 10 - 7) >= 0.5
+  // As none of the subtrees is bigger than 10 nodes, the optimal algorithm
+  // will be run.
+  // CHECK: Match NullStmt(11) to NullStmt(9)
+  ;; {{;}}
+}
+
+void f3() {
+  // Jaccard similarity = 8 / (11 + 11 - 8) >= 0.5
+  // As the subtrees are bigger than 10 nodes, the optimal algorithm will not
+  // be run.
+  // CHECK: Delete NullStmt(22)
+  ;; {{;;}}
+}
+#endif
Index: test/Tooling/clang-diff-bottomup.cpp
===
--- /dev/null
+++ test/Tooling/clang-diff-bottomup.cpp
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -s=0 %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the bottom-up matching, with maxsize set to 0, so that the optimal matching will never be applied.
+
+#ifndef DEST
+
+void f1() { ; {{;}} }
+void f2() { ;; {{;}} }
+
+#else
+
+// Jaccard similarity threshold is 0.5.
+
+void 

[PATCH] D36185: [clang-diff] Fix similarity computation

2017-08-10 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman added inline comments.



Comment at: tools/clang-diff/ClangDiff.cpp:436
+else if (StopAfter != "bottomup") {
+  llvm::errs() << "Error: Invalid argument for -stop-after";
+  return 1;

Add a newline to the string as well.


https://reviews.llvm.org/D36185



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D36185: [clang-diff] Fix similarity computation

2017-08-03 Thread Johannes Altmanninger via Phabricator via cfe-commits
johannes updated this revision to Diff 109505.
johannes added a comment.

merge parent changes


https://reviews.llvm.org/D36185

Files:
  include/clang/Tooling/ASTDiff/ASTDiff.h
  lib/Tooling/ASTDiff/ASTDiff.cpp
  test/Tooling/clang-diff-bottomup.cpp
  test/Tooling/clang-diff-opt.cpp
  test/Tooling/clang-diff-topdown.cpp
  tools/clang-diff/ClangDiff.cpp

Index: tools/clang-diff/ClangDiff.cpp
===
--- tools/clang-diff/ClangDiff.cpp
+++ tools/clang-diff/ClangDiff.cpp
@@ -50,6 +50,11 @@
 cl::Optional,
 cl::cat(ClangDiffCategory));
 
+static cl::opt StopAfter("stop-after",
+  cl::desc(""),
+  cl::Optional, cl::init(""),
+  cl::cat(ClangDiffCategory));
+
 static cl::opt MaxSize("s", cl::desc(""), cl::Optional,
 cl::init(-1), cl::cat(ClangDiffCategory));
 
@@ -424,6 +429,14 @@
   diff::ComparisonOptions Options;
   if (MaxSize != -1)
 Options.MaxSize = MaxSize;
+  if (!StopAfter.empty()) {
+if (StopAfter == "topdown")
+  Options.StopAfterTopDown = true;
+else if (StopAfter != "bottomup") {
+  llvm::errs() << "Error: Invalid argument for -stop-after";
+  return 1;
+}
+  }
   diff::SyntaxTree SrcTree(Src->getASTContext());
   diff::SyntaxTree DstTree(Dst->getASTContext());
   diff::ASTDiff Diff(SrcTree, DstTree, Options);
Index: test/Tooling/clang-diff-topdown.cpp
===
--- /dev/null
+++ test/Tooling/clang-diff-topdown.cpp
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -stop-after=topdown %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the top-down matching of identical subtrees only.
+
+#ifndef DEST
+
+void f1()
+{
+  // Match some subtree of height greater than 2.
+  // CHECK: Match CompoundStmt(3) to CompoundStmt(3)
+  // CHECK: Match CompoundStmt(4) to CompoundStmt(4)
+  // CHECK: Match NullStmt(5) to NullStmt(5)
+  {{;}}
+
+  // Don't match subtrees that are smaller.
+  // CHECK-NOT: Match CompoundStmt(6)
+  // CHECK-NOT: Match NullStmt(7)
+  {;}
+
+  // Greedy approach - use the first matching subtree when there are multiple
+  // identical subtrees.
+  // CHECK: Match CompoundStmt(8) to CompoundStmt(8)
+  // CHECK: Match CompoundStmt(9) to CompoundStmt(9)
+  // CHECK: Match NullStmt(10) to NullStmt(10)
+  {{;;}}
+}
+
+#else
+
+void f1() {
+
+  {{;}}
+
+  {;}
+
+  {{;;}}
+  // CHECK-NOT: Match {{.*}} to CompoundStmt(11)
+  // CHECK-NOT: Match {{.*}} to CompoundStmt(12)
+  // CHECK-NOT: Match {{.*}} to NullStmt(13)
+  {{;;}}
+
+  // CHECK-NOT: Match {{.*}} to NullStmt(14)
+  ;
+}
+
+#endif
Index: test/Tooling/clang-diff-opt.cpp
===
--- /dev/null
+++ test/Tooling/clang-diff-opt.cpp
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -s=10 %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the behaviour of the matching according to the optimal tree edit
+// distance, implemented with Zhang and Shasha's algorithm.
+// Just for testing we use a tiny value of 10 for maxsize. Subtrees bigger than
+// this size will not be processed by the optimal algorithm.
+
+#ifndef DEST
+
+void f1() { {;} {{;}} }
+
+void f2() { {;} {{;}} }
+
+void f3() { {;} {{;;}} }
+
+#else
+
+void f1() {
+// Jaccard similarity = 3 / (5 + 4 - 3) = 3 / 6 >= 0.5
+// The optimal matching algorithm should move the ; into the outer block
+// CHECK: Match CompoundStmt(2) to CompoundStmt(2)
+// CHECK-NOT: Match CompoundStmt(3)
+// CHECK-NEXT: Match NullStmt(4) to NullStmt(3)
+  ; {{;}}
+}
+ 
+void f2() {
+  // Jaccard similarity = 7 / (10 + 10 - 7) >= 0.5
+  // As none of the subtrees is bigger than 10 nodes, the optimal algorithm
+  // will be run.
+  // CHECK: Match NullStmt(11) to NullStmt(9)
+  ;; {{;}}
+}
+
+void f3() {
+  // Jaccard similarity = 8 / (11 + 11 - 8) >= 0.5
+  // As the subtrees are bigger than 10 nodes, the optimal algorithm will not
+  // be run.
+  // CHECK: Delete NullStmt(22)
+  ;; {{;;}}
+}
+#endif
Index: test/Tooling/clang-diff-bottomup.cpp
===
--- /dev/null
+++ test/Tooling/clang-diff-bottomup.cpp
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -s=0 %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the bottom-up matching, with maxsize set to 0, so that the optimal matching will never be applied.
+
+#ifndef DEST
+
+void f1() { ; {{;}} }
+void f2() { ;; {{;}} }
+
+#else
+
+// Jaccard similarity threshold is 0.5.
+
+void f1() {

[PATCH] D36185: [clang-diff] Fix similarity computation

2017-08-02 Thread Johannes Altmanninger via Phabricator via cfe-commits
johannes added inline comments.



Comment at: test/Tooling/clang-diff-bottomup.cpp:3
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -m -no-compilation-database -s=0 %t.src.cpp %t.dst.cpp | 
FileCheck %s
+//

klimek wrote:
> Instead of using -no-compilation-database most tools support adding -- (args) 
> for using a fixed compilation database. Any specific reason to deviate from 
> that for clang-diff?
ah I actually wasn't aware of that, done


https://reviews.llvm.org/D36185



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D36185: [clang-diff] Fix similarity computation

2017-08-02 Thread Johannes Altmanninger via Phabricator via cfe-commits
johannes updated this revision to Diff 109424.
johannes added a comment.

add test for Options.MaxSize


https://reviews.llvm.org/D36185

Files:
  include/clang/Tooling/ASTDiff/ASTDiff.h
  lib/Tooling/ASTDiff/ASTDiff.cpp
  test/Tooling/clang-diff-bottomup.cpp
  test/Tooling/clang-diff-opt.cpp
  test/Tooling/clang-diff-topdown.cpp

Index: test/Tooling/clang-diff-topdown.cpp
===
--- /dev/null
+++ test/Tooling/clang-diff-topdown.cpp
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -stop-after=topdown %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the top-down matching of identical subtrees only.
+
+#ifndef DEST
+
+void f1()
+{
+  // Match some subtree of height greater than 2.
+  // CHECK: Match CompoundStmt(3) to CompoundStmt(3)
+  // CHECK: Match CompoundStmt(4) to CompoundStmt(4)
+  // CHECK: Match NullStmt(5) to NullStmt(5)
+  {{;}}
+
+  // Don't match subtrees that are smaller.
+  // CHECK-NOT: Match CompoundStmt(6)
+  // CHECK-NOT: Match NullStmt(7)
+  {;}
+
+  // Greedy approach - use the first matching subtree when there are multiple
+  // identical subtrees.
+  // CHECK: Match CompoundStmt(8) to CompoundStmt(8)
+  // CHECK: Match CompoundStmt(9) to CompoundStmt(9)
+  // CHECK: Match NullStmt(10) to NullStmt(10)
+  {{;;}}
+}
+
+#else
+
+void f1() {
+
+  {{;}}
+
+  {;}
+
+  {{;;}}
+  // CHECK-NOT: Match {{.*}} to CompoundStmt(11)
+  // CHECK-NOT: Match {{.*}} to CompoundStmt(12)
+  // CHECK-NOT: Match {{.*}} to NullStmt(13)
+  {{;;}}
+
+  // CHECK-NOT: Match {{.*}} to NullStmt(14)
+  ;
+}
+
+#endif
Index: test/Tooling/clang-diff-opt.cpp
===
--- /dev/null
+++ test/Tooling/clang-diff-opt.cpp
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -s=10 %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the behaviour of the matching according to the optimal tree edit
+// distance, implemented with Zhang and Shasha's algorithm.
+// Just for testing we use a tiny value of 10 for maxsize. Subtrees bigger than
+// this size will not be processed by the optimal algorithm.
+
+#ifndef DEST
+
+void f1() { {;} {{;}} }
+
+void f2() { {;} {{;}} }
+
+void f3() { {;} {{;;}} }
+
+#else
+
+void f1() {
+// Jaccard similarity = 3 / (5 + 4 - 3) = 3 / 6 >= 0.5
+// The optimal matching algorithm should move the ; into the outer block
+// CHECK: Match CompoundStmt(2) to CompoundStmt(2)
+// CHECK-NOT: Match CompoundStmt(3)
+// CHECK-NEXT: Match NullStmt(4) to NullStmt(3)
+  ; {{;}}
+}
+ 
+void f2() {
+  // Jaccard similarity = 7 / (10 + 10 - 7) >= 0.5
+  // As none of the subtrees is bigger than 10 nodes, the optimal algorithm
+  // will be run.
+  // CHECK: Match NullStmt(11) to NullStmt(9)
+  ;; {{;}}
+}
+
+void f3() {
+  // Jaccard similarity = 8 / (11 + 11 - 8) >= 0.5
+  // As the subtrees are bigger than 10 nodes, the optimal algorithm will not
+  // be run.
+  // CHECK: Delete NullStmt(22)
+  ;; {{;;}}
+}
+#endif
Index: test/Tooling/clang-diff-bottomup.cpp
===
--- /dev/null
+++ test/Tooling/clang-diff-bottomup.cpp
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -s=0 %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the bottom-up matching, with maxsize set to 0, so that the optimal matching will never be applied.
+
+#ifndef DEST
+
+void f1() { ; {{;}} }
+void f2() { ;; {{;}} }
+
+#else
+
+// Jaccard similarity threshold is 0.5.
+
+void f1() {
+// CompoundStmt: 3 matched descendants, subtree sizes 4 and 5
+// Jaccard similarity = 3 / (4 + 5 - 3) = 3 / 6 >= 0.5
+// CHECK: Match FunctionDecl: f1(void (void))(1) to FunctionDecl: f1(void (void))(1)
+// CHECK: Match CompoundStmt(2) to CompoundStmt(2)
+// CHECK: Match CompoundStmt(4) to CompoundStmt(3)
+// CHECK: Match CompoundStmt(5) to CompoundStmt(4)
+// CHECK: Match NullStmt(6) to NullStmt(5)
+  {{;}} ;;
+}
+
+void f2() {
+// CompoundStmt: 3 matched descendants, subtree sizes 4 and 5
+// Jaccard similarity = 3 / (5 + 6 - 3) = 3 / 8 < 0.5
+// CHECK-NOT: Match FunctionDecl(9)
+// CHECK-NOT: Match CompoundStmt(10)
+// CHECK: Match CompoundStmt(11) to CompoundStmt(10)
+// CHECK: Match CompoundStmt(12) to CompoundStmt(11)
+// CHECK: Match NullStmt(13) to NullStmt(12)
+// CHECK-NOT: Match NullStmt(13)
+  {{;}} ;;;
+}
+ 
+#endif
Index: lib/Tooling/ASTDiff/ASTDiff.cpp
===
--- lib/Tooling/ASTDiff/ASTDiff.cpp
+++ lib/Tooling/ASTDiff/ASTDiff.cpp
@@ -68,7 +68,8 @@
   // Compute ChangeKind for each node based on similarity.
   void computeChangeKinds(Mapping );
 
-  NodeId getMapped(const std::unique_ptr , NodeId Id) const {
+  NodeId getMapped(const