[PATCH] D36185: [clang-diff] Fix similarity computation
This revision was automatically updated to reflect the committed changes. Closed by commit rL311284: [clang-diff] Fix similarity computation (authored by krobelus). Changed prior to commit: https://reviews.llvm.org/D36185?vs=110951=111877#toc Repository: rL LLVM https://reviews.llvm.org/D36185 Files: cfe/trunk/include/clang/Tooling/ASTDiff/ASTDiff.h cfe/trunk/lib/Tooling/ASTDiff/ASTDiff.cpp cfe/trunk/test/Tooling/clang-diff-bottomup.cpp cfe/trunk/test/Tooling/clang-diff-opt.cpp cfe/trunk/test/Tooling/clang-diff-topdown.cpp cfe/trunk/tools/clang-diff/ClangDiff.cpp Index: cfe/trunk/tools/clang-diff/ClangDiff.cpp === --- cfe/trunk/tools/clang-diff/ClangDiff.cpp +++ cfe/trunk/tools/clang-diff/ClangDiff.cpp @@ -50,6 +50,11 @@ cl::Optional, cl::cat(ClangDiffCategory)); +static cl::opt StopAfter("stop-after", + cl::desc(""), + cl::Optional, cl::init(""), + cl::cat(ClangDiffCategory)); + static cl::opt MaxSize("s", cl::desc(""), cl::Optional, cl::init(-1), cl::cat(ClangDiffCategory)); @@ -425,6 +430,14 @@ diff::ComparisonOptions Options; if (MaxSize != -1) Options.MaxSize = MaxSize; + if (!StopAfter.empty()) { +if (StopAfter == "topdown") + Options.StopAfterTopDown = true; +else if (StopAfter != "bottomup") { + llvm::errs() << "Error: Invalid argument for -stop-after\n"; + return 1; +} + } diff::SyntaxTree SrcTree(Src->getASTContext()); diff::SyntaxTree DstTree(Dst->getASTContext()); diff::ASTDiff Diff(SrcTree, DstTree, Options); Index: cfe/trunk/include/clang/Tooling/ASTDiff/ASTDiff.h === --- cfe/trunk/include/clang/Tooling/ASTDiff/ASTDiff.h +++ cfe/trunk/include/clang/Tooling/ASTDiff/ASTDiff.h @@ -105,12 +105,14 @@ /// During bottom-up matching, match only nodes with at least this value as /// the ratio of their common descendants. - double MinSimilarity = 0.2; + double MinSimilarity = 0.5; /// Whenever two subtrees are matched in the bottom-up phase, the optimal /// mapping is computed, unless the size of either subtrees exceeds this. int MaxSize = 100; + bool StopAfterTopDown = false; + /// Returns false if the nodes should never be matched. bool isMatchingAllowed(const Node , const Node ) const { return N1.getType().isSame(N2.getType()); Index: cfe/trunk/test/Tooling/clang-diff-bottomup.cpp === --- cfe/trunk/test/Tooling/clang-diff-bottomup.cpp +++ cfe/trunk/test/Tooling/clang-diff-bottomup.cpp @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -dump-matches -s=0 %t.src.cpp %t.dst.cpp -- | FileCheck %s +// +// Test the bottom-up matching, with maxsize set to 0, so that the optimal matching will never be applied. + +#ifndef DEST + +void f1() { ; {{;}} } +void f2() { ;; {{;}} } + +#else + +// Jaccard similarity threshold is 0.5. + +void f1() { +// CompoundStmt: 3 matched descendants, subtree sizes 4 and 5 +// Jaccard similarity = 3 / (4 + 5 - 3) = 3 / 6 >= 0.5 +// CHECK: Match FunctionDecl: f1(void (void))(1) to FunctionDecl: f1(void (void))(1) +// CHECK: Match CompoundStmt(2) to CompoundStmt(2) +// CHECK: Match CompoundStmt(4) to CompoundStmt(3) +// CHECK: Match CompoundStmt(5) to CompoundStmt(4) +// CHECK: Match NullStmt(6) to NullStmt(5) + {{;}} ;; +} + +void f2() { +// CompoundStmt: 3 matched descendants, subtree sizes 4 and 5 +// Jaccard similarity = 3 / (5 + 6 - 3) = 3 / 8 < 0.5 +// CHECK-NOT: Match FunctionDecl(9) +// CHECK-NOT: Match CompoundStmt(10) +// CHECK: Match CompoundStmt(11) to CompoundStmt(10) +// CHECK: Match CompoundStmt(12) to CompoundStmt(11) +// CHECK: Match NullStmt(13) to NullStmt(12) +// CHECK-NOT: Match NullStmt(13) + {{;}} ;;; +} + +#endif Index: cfe/trunk/test/Tooling/clang-diff-opt.cpp === --- cfe/trunk/test/Tooling/clang-diff-opt.cpp +++ cfe/trunk/test/Tooling/clang-diff-opt.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -dump-matches -s=10 %t.src.cpp %t.dst.cpp -- | FileCheck %s +// +// Test the behaviour of the matching according to the optimal tree edit +// distance, implemented with Zhang and Shasha's algorithm. +// Just for testing we use a tiny value of 10 for maxsize. Subtrees bigger than +// this size will not be processed by the optimal algorithm. + +#ifndef DEST + +void f1() { {;} {{;}} } + +void f2() { {;} {{;}} } + +void f3() { {;} {{;;}} } + +#else + +void f1() { +// Jaccard similarity = 3 / (5 + 4 - 3) = 3
[PATCH] D36185: [clang-diff] Fix similarity computation
johannes updated this revision to Diff 110951. johannes added a comment. comment getJaccardSimilarity https://reviews.llvm.org/D36185 Files: include/clang/Tooling/ASTDiff/ASTDiff.h lib/Tooling/ASTDiff/ASTDiff.cpp test/Tooling/clang-diff-bottomup.cpp test/Tooling/clang-diff-opt.cpp test/Tooling/clang-diff-topdown.cpp tools/clang-diff/ClangDiff.cpp Index: tools/clang-diff/ClangDiff.cpp === --- tools/clang-diff/ClangDiff.cpp +++ tools/clang-diff/ClangDiff.cpp @@ -50,6 +50,11 @@ cl::Optional, cl::cat(ClangDiffCategory)); +static cl::opt StopAfter("stop-after", + cl::desc(""), + cl::Optional, cl::init(""), + cl::cat(ClangDiffCategory)); + static cl::opt MaxSize("s", cl::desc(""), cl::Optional, cl::init(-1), cl::cat(ClangDiffCategory)); @@ -424,6 +429,14 @@ diff::ComparisonOptions Options; if (MaxSize != -1) Options.MaxSize = MaxSize; + if (!StopAfter.empty()) { +if (StopAfter == "topdown") + Options.StopAfterTopDown = true; +else if (StopAfter != "bottomup") { + llvm::errs() << "Error: Invalid argument for -stop-after\n"; + return 1; +} + } diff::SyntaxTree SrcTree(Src->getASTContext()); diff::SyntaxTree DstTree(Dst->getASTContext()); diff::ASTDiff Diff(SrcTree, DstTree, Options); Index: test/Tooling/clang-diff-topdown.cpp === --- /dev/null +++ test/Tooling/clang-diff-topdown.cpp @@ -0,0 +1,48 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -dump-matches -stop-after=topdown %t.src.cpp %t.dst.cpp -- | FileCheck %s +// +// Test the top-down matching of identical subtrees only. + +#ifndef DEST + +void f1() +{ + // Match some subtree of height greater than 2. + // CHECK: Match CompoundStmt(3) to CompoundStmt(3) + // CHECK: Match CompoundStmt(4) to CompoundStmt(4) + // CHECK: Match NullStmt(5) to NullStmt(5) + {{;}} + + // Don't match subtrees that are smaller. + // CHECK-NOT: Match CompoundStmt(6) + // CHECK-NOT: Match NullStmt(7) + {;} + + // Greedy approach - use the first matching subtree when there are multiple + // identical subtrees. + // CHECK: Match CompoundStmt(8) to CompoundStmt(8) + // CHECK: Match CompoundStmt(9) to CompoundStmt(9) + // CHECK: Match NullStmt(10) to NullStmt(10) + {{;;}} +} + +#else + +void f1() { + + {{;}} + + {;} + + {{;;}} + // CHECK-NOT: Match {{.*}} to CompoundStmt(11) + // CHECK-NOT: Match {{.*}} to CompoundStmt(12) + // CHECK-NOT: Match {{.*}} to NullStmt(13) + {{;;}} + + // CHECK-NOT: Match {{.*}} to NullStmt(14) + ; +} + +#endif Index: test/Tooling/clang-diff-opt.cpp === --- /dev/null +++ test/Tooling/clang-diff-opt.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -dump-matches -s=10 %t.src.cpp %t.dst.cpp -- | FileCheck %s +// +// Test the behaviour of the matching according to the optimal tree edit +// distance, implemented with Zhang and Shasha's algorithm. +// Just for testing we use a tiny value of 10 for maxsize. Subtrees bigger than +// this size will not be processed by the optimal algorithm. + +#ifndef DEST + +void f1() { {;} {{;}} } + +void f2() { {;} {{;}} } + +void f3() { {;} {{;;}} } + +#else + +void f1() { +// Jaccard similarity = 3 / (5 + 4 - 3) = 3 / 6 >= 0.5 +// The optimal matching algorithm should move the ; into the outer block +// CHECK: Match CompoundStmt(2) to CompoundStmt(2) +// CHECK-NOT: Match CompoundStmt(3) +// CHECK-NEXT: Match NullStmt(4) to NullStmt(3) + ; {{;}} +} + +void f2() { + // Jaccard similarity = 7 / (10 + 10 - 7) >= 0.5 + // As none of the subtrees is bigger than 10 nodes, the optimal algorithm + // will be run. + // CHECK: Match NullStmt(11) to NullStmt(9) + ;; {{;}} +} + +void f3() { + // Jaccard similarity = 8 / (11 + 11 - 8) >= 0.5 + // As the subtrees are bigger than 10 nodes, the optimal algorithm will not + // be run. + // CHECK: Delete NullStmt(22) + ;; {{;;}} +} +#endif Index: test/Tooling/clang-diff-bottomup.cpp === --- /dev/null +++ test/Tooling/clang-diff-bottomup.cpp @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -dump-matches -s=0 %t.src.cpp %t.dst.cpp -- | FileCheck %s +// +// Test the bottom-up matching, with maxsize set to 0, so that the optimal matching will never be applied. + +#ifndef DEST + +void f1() { ; {{;}} } +void f2() { ;; {{;}} } + +#else + +// Jaccard similarity threshold is 0.5. + +void
[PATCH] D36185: [clang-diff] Fix similarity computation
arphaman accepted this revision. arphaman added a comment. This revision is now accepted and ready to land. LGTM. Comment at: lib/Tooling/ASTDiff/ASTDiff.cpp:738 + } + double Denominator = T1.getNumberOfDescendants(Id1) - 1 + + T2.getNumberOfDescendants(Id2) - 1 - CommonDescendants; Can denominator even be negative here? If no, please assert correspondingly. https://reviews.llvm.org/D36185 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D36185: [clang-diff] Fix similarity computation
johannes updated this revision to Diff 110688. johannes added a comment. newline in error message https://reviews.llvm.org/D36185 Files: include/clang/Tooling/ASTDiff/ASTDiff.h lib/Tooling/ASTDiff/ASTDiff.cpp test/Tooling/clang-diff-bottomup.cpp test/Tooling/clang-diff-opt.cpp test/Tooling/clang-diff-topdown.cpp tools/clang-diff/ClangDiff.cpp Index: tools/clang-diff/ClangDiff.cpp === --- tools/clang-diff/ClangDiff.cpp +++ tools/clang-diff/ClangDiff.cpp @@ -50,6 +50,11 @@ cl::Optional, cl::cat(ClangDiffCategory)); +static cl::opt StopAfter("stop-after", + cl::desc(""), + cl::Optional, cl::init(""), + cl::cat(ClangDiffCategory)); + static cl::opt MaxSize("s", cl::desc(""), cl::Optional, cl::init(-1), cl::cat(ClangDiffCategory)); @@ -424,6 +429,14 @@ diff::ComparisonOptions Options; if (MaxSize != -1) Options.MaxSize = MaxSize; + if (!StopAfter.empty()) { +if (StopAfter == "topdown") + Options.StopAfterTopDown = true; +else if (StopAfter != "bottomup") { + llvm::errs() << "Error: Invalid argument for -stop-after\n"; + return 1; +} + } diff::SyntaxTree SrcTree(Src->getASTContext()); diff::SyntaxTree DstTree(Dst->getASTContext()); diff::ASTDiff Diff(SrcTree, DstTree, Options); Index: test/Tooling/clang-diff-topdown.cpp === --- /dev/null +++ test/Tooling/clang-diff-topdown.cpp @@ -0,0 +1,48 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -dump-matches -stop-after=topdown %t.src.cpp %t.dst.cpp -- | FileCheck %s +// +// Test the top-down matching of identical subtrees only. + +#ifndef DEST + +void f1() +{ + // Match some subtree of height greater than 2. + // CHECK: Match CompoundStmt(3) to CompoundStmt(3) + // CHECK: Match CompoundStmt(4) to CompoundStmt(4) + // CHECK: Match NullStmt(5) to NullStmt(5) + {{;}} + + // Don't match subtrees that are smaller. + // CHECK-NOT: Match CompoundStmt(6) + // CHECK-NOT: Match NullStmt(7) + {;} + + // Greedy approach - use the first matching subtree when there are multiple + // identical subtrees. + // CHECK: Match CompoundStmt(8) to CompoundStmt(8) + // CHECK: Match CompoundStmt(9) to CompoundStmt(9) + // CHECK: Match NullStmt(10) to NullStmt(10) + {{;;}} +} + +#else + +void f1() { + + {{;}} + + {;} + + {{;;}} + // CHECK-NOT: Match {{.*}} to CompoundStmt(11) + // CHECK-NOT: Match {{.*}} to CompoundStmt(12) + // CHECK-NOT: Match {{.*}} to NullStmt(13) + {{;;}} + + // CHECK-NOT: Match {{.*}} to NullStmt(14) + ; +} + +#endif Index: test/Tooling/clang-diff-opt.cpp === --- /dev/null +++ test/Tooling/clang-diff-opt.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -dump-matches -s=10 %t.src.cpp %t.dst.cpp -- | FileCheck %s +// +// Test the behaviour of the matching according to the optimal tree edit +// distance, implemented with Zhang and Shasha's algorithm. +// Just for testing we use a tiny value of 10 for maxsize. Subtrees bigger than +// this size will not be processed by the optimal algorithm. + +#ifndef DEST + +void f1() { {;} {{;}} } + +void f2() { {;} {{;}} } + +void f3() { {;} {{;;}} } + +#else + +void f1() { +// Jaccard similarity = 3 / (5 + 4 - 3) = 3 / 6 >= 0.5 +// The optimal matching algorithm should move the ; into the outer block +// CHECK: Match CompoundStmt(2) to CompoundStmt(2) +// CHECK-NOT: Match CompoundStmt(3) +// CHECK-NEXT: Match NullStmt(4) to NullStmt(3) + ; {{;}} +} + +void f2() { + // Jaccard similarity = 7 / (10 + 10 - 7) >= 0.5 + // As none of the subtrees is bigger than 10 nodes, the optimal algorithm + // will be run. + // CHECK: Match NullStmt(11) to NullStmt(9) + ;; {{;}} +} + +void f3() { + // Jaccard similarity = 8 / (11 + 11 - 8) >= 0.5 + // As the subtrees are bigger than 10 nodes, the optimal algorithm will not + // be run. + // CHECK: Delete NullStmt(22) + ;; {{;;}} +} +#endif Index: test/Tooling/clang-diff-bottomup.cpp === --- /dev/null +++ test/Tooling/clang-diff-bottomup.cpp @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -dump-matches -s=0 %t.src.cpp %t.dst.cpp -- | FileCheck %s +// +// Test the bottom-up matching, with maxsize set to 0, so that the optimal matching will never be applied. + +#ifndef DEST + +void f1() { ; {{;}} } +void f2() { ;; {{;}} } + +#else + +// Jaccard similarity threshold is 0.5. + +void
[PATCH] D36185: [clang-diff] Fix similarity computation
arphaman added inline comments. Comment at: tools/clang-diff/ClangDiff.cpp:436 +else if (StopAfter != "bottomup") { + llvm::errs() << "Error: Invalid argument for -stop-after"; + return 1; Add a newline to the string as well. https://reviews.llvm.org/D36185 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D36185: [clang-diff] Fix similarity computation
johannes updated this revision to Diff 109505. johannes added a comment. merge parent changes https://reviews.llvm.org/D36185 Files: include/clang/Tooling/ASTDiff/ASTDiff.h lib/Tooling/ASTDiff/ASTDiff.cpp test/Tooling/clang-diff-bottomup.cpp test/Tooling/clang-diff-opt.cpp test/Tooling/clang-diff-topdown.cpp tools/clang-diff/ClangDiff.cpp Index: tools/clang-diff/ClangDiff.cpp === --- tools/clang-diff/ClangDiff.cpp +++ tools/clang-diff/ClangDiff.cpp @@ -50,6 +50,11 @@ cl::Optional, cl::cat(ClangDiffCategory)); +static cl::opt StopAfter("stop-after", + cl::desc(""), + cl::Optional, cl::init(""), + cl::cat(ClangDiffCategory)); + static cl::opt MaxSize("s", cl::desc(""), cl::Optional, cl::init(-1), cl::cat(ClangDiffCategory)); @@ -424,6 +429,14 @@ diff::ComparisonOptions Options; if (MaxSize != -1) Options.MaxSize = MaxSize; + if (!StopAfter.empty()) { +if (StopAfter == "topdown") + Options.StopAfterTopDown = true; +else if (StopAfter != "bottomup") { + llvm::errs() << "Error: Invalid argument for -stop-after"; + return 1; +} + } diff::SyntaxTree SrcTree(Src->getASTContext()); diff::SyntaxTree DstTree(Dst->getASTContext()); diff::ASTDiff Diff(SrcTree, DstTree, Options); Index: test/Tooling/clang-diff-topdown.cpp === --- /dev/null +++ test/Tooling/clang-diff-topdown.cpp @@ -0,0 +1,48 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -dump-matches -stop-after=topdown %t.src.cpp %t.dst.cpp -- | FileCheck %s +// +// Test the top-down matching of identical subtrees only. + +#ifndef DEST + +void f1() +{ + // Match some subtree of height greater than 2. + // CHECK: Match CompoundStmt(3) to CompoundStmt(3) + // CHECK: Match CompoundStmt(4) to CompoundStmt(4) + // CHECK: Match NullStmt(5) to NullStmt(5) + {{;}} + + // Don't match subtrees that are smaller. + // CHECK-NOT: Match CompoundStmt(6) + // CHECK-NOT: Match NullStmt(7) + {;} + + // Greedy approach - use the first matching subtree when there are multiple + // identical subtrees. + // CHECK: Match CompoundStmt(8) to CompoundStmt(8) + // CHECK: Match CompoundStmt(9) to CompoundStmt(9) + // CHECK: Match NullStmt(10) to NullStmt(10) + {{;;}} +} + +#else + +void f1() { + + {{;}} + + {;} + + {{;;}} + // CHECK-NOT: Match {{.*}} to CompoundStmt(11) + // CHECK-NOT: Match {{.*}} to CompoundStmt(12) + // CHECK-NOT: Match {{.*}} to NullStmt(13) + {{;;}} + + // CHECK-NOT: Match {{.*}} to NullStmt(14) + ; +} + +#endif Index: test/Tooling/clang-diff-opt.cpp === --- /dev/null +++ test/Tooling/clang-diff-opt.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -dump-matches -s=10 %t.src.cpp %t.dst.cpp -- | FileCheck %s +// +// Test the behaviour of the matching according to the optimal tree edit +// distance, implemented with Zhang and Shasha's algorithm. +// Just for testing we use a tiny value of 10 for maxsize. Subtrees bigger than +// this size will not be processed by the optimal algorithm. + +#ifndef DEST + +void f1() { {;} {{;}} } + +void f2() { {;} {{;}} } + +void f3() { {;} {{;;}} } + +#else + +void f1() { +// Jaccard similarity = 3 / (5 + 4 - 3) = 3 / 6 >= 0.5 +// The optimal matching algorithm should move the ; into the outer block +// CHECK: Match CompoundStmt(2) to CompoundStmt(2) +// CHECK-NOT: Match CompoundStmt(3) +// CHECK-NEXT: Match NullStmt(4) to NullStmt(3) + ; {{;}} +} + +void f2() { + // Jaccard similarity = 7 / (10 + 10 - 7) >= 0.5 + // As none of the subtrees is bigger than 10 nodes, the optimal algorithm + // will be run. + // CHECK: Match NullStmt(11) to NullStmt(9) + ;; {{;}} +} + +void f3() { + // Jaccard similarity = 8 / (11 + 11 - 8) >= 0.5 + // As the subtrees are bigger than 10 nodes, the optimal algorithm will not + // be run. + // CHECK: Delete NullStmt(22) + ;; {{;;}} +} +#endif Index: test/Tooling/clang-diff-bottomup.cpp === --- /dev/null +++ test/Tooling/clang-diff-bottomup.cpp @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -dump-matches -s=0 %t.src.cpp %t.dst.cpp -- | FileCheck %s +// +// Test the bottom-up matching, with maxsize set to 0, so that the optimal matching will never be applied. + +#ifndef DEST + +void f1() { ; {{;}} } +void f2() { ;; {{;}} } + +#else + +// Jaccard similarity threshold is 0.5. + +void f1() {
[PATCH] D36185: [clang-diff] Fix similarity computation
johannes added inline comments. Comment at: test/Tooling/clang-diff-bottomup.cpp:3 +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -m -no-compilation-database -s=0 %t.src.cpp %t.dst.cpp | FileCheck %s +// klimek wrote: > Instead of using -no-compilation-database most tools support adding -- (args) > for using a fixed compilation database. Any specific reason to deviate from > that for clang-diff? ah I actually wasn't aware of that, done https://reviews.llvm.org/D36185 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D36185: [clang-diff] Fix similarity computation
johannes updated this revision to Diff 109424. johannes added a comment. add test for Options.MaxSize https://reviews.llvm.org/D36185 Files: include/clang/Tooling/ASTDiff/ASTDiff.h lib/Tooling/ASTDiff/ASTDiff.cpp test/Tooling/clang-diff-bottomup.cpp test/Tooling/clang-diff-opt.cpp test/Tooling/clang-diff-topdown.cpp Index: test/Tooling/clang-diff-topdown.cpp === --- /dev/null +++ test/Tooling/clang-diff-topdown.cpp @@ -0,0 +1,48 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -dump-matches -stop-after=topdown %t.src.cpp %t.dst.cpp -- | FileCheck %s +// +// Test the top-down matching of identical subtrees only. + +#ifndef DEST + +void f1() +{ + // Match some subtree of height greater than 2. + // CHECK: Match CompoundStmt(3) to CompoundStmt(3) + // CHECK: Match CompoundStmt(4) to CompoundStmt(4) + // CHECK: Match NullStmt(5) to NullStmt(5) + {{;}} + + // Don't match subtrees that are smaller. + // CHECK-NOT: Match CompoundStmt(6) + // CHECK-NOT: Match NullStmt(7) + {;} + + // Greedy approach - use the first matching subtree when there are multiple + // identical subtrees. + // CHECK: Match CompoundStmt(8) to CompoundStmt(8) + // CHECK: Match CompoundStmt(9) to CompoundStmt(9) + // CHECK: Match NullStmt(10) to NullStmt(10) + {{;;}} +} + +#else + +void f1() { + + {{;}} + + {;} + + {{;;}} + // CHECK-NOT: Match {{.*}} to CompoundStmt(11) + // CHECK-NOT: Match {{.*}} to CompoundStmt(12) + // CHECK-NOT: Match {{.*}} to NullStmt(13) + {{;;}} + + // CHECK-NOT: Match {{.*}} to NullStmt(14) + ; +} + +#endif Index: test/Tooling/clang-diff-opt.cpp === --- /dev/null +++ test/Tooling/clang-diff-opt.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -dump-matches -s=10 %t.src.cpp %t.dst.cpp -- | FileCheck %s +// +// Test the behaviour of the matching according to the optimal tree edit +// distance, implemented with Zhang and Shasha's algorithm. +// Just for testing we use a tiny value of 10 for maxsize. Subtrees bigger than +// this size will not be processed by the optimal algorithm. + +#ifndef DEST + +void f1() { {;} {{;}} } + +void f2() { {;} {{;}} } + +void f3() { {;} {{;;}} } + +#else + +void f1() { +// Jaccard similarity = 3 / (5 + 4 - 3) = 3 / 6 >= 0.5 +// The optimal matching algorithm should move the ; into the outer block +// CHECK: Match CompoundStmt(2) to CompoundStmt(2) +// CHECK-NOT: Match CompoundStmt(3) +// CHECK-NEXT: Match NullStmt(4) to NullStmt(3) + ; {{;}} +} + +void f2() { + // Jaccard similarity = 7 / (10 + 10 - 7) >= 0.5 + // As none of the subtrees is bigger than 10 nodes, the optimal algorithm + // will be run. + // CHECK: Match NullStmt(11) to NullStmt(9) + ;; {{;}} +} + +void f3() { + // Jaccard similarity = 8 / (11 + 11 - 8) >= 0.5 + // As the subtrees are bigger than 10 nodes, the optimal algorithm will not + // be run. + // CHECK: Delete NullStmt(22) + ;; {{;;}} +} +#endif Index: test/Tooling/clang-diff-bottomup.cpp === --- /dev/null +++ test/Tooling/clang-diff-bottomup.cpp @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -dump-matches -s=0 %t.src.cpp %t.dst.cpp -- | FileCheck %s +// +// Test the bottom-up matching, with maxsize set to 0, so that the optimal matching will never be applied. + +#ifndef DEST + +void f1() { ; {{;}} } +void f2() { ;; {{;}} } + +#else + +// Jaccard similarity threshold is 0.5. + +void f1() { +// CompoundStmt: 3 matched descendants, subtree sizes 4 and 5 +// Jaccard similarity = 3 / (4 + 5 - 3) = 3 / 6 >= 0.5 +// CHECK: Match FunctionDecl: f1(void (void))(1) to FunctionDecl: f1(void (void))(1) +// CHECK: Match CompoundStmt(2) to CompoundStmt(2) +// CHECK: Match CompoundStmt(4) to CompoundStmt(3) +// CHECK: Match CompoundStmt(5) to CompoundStmt(4) +// CHECK: Match NullStmt(6) to NullStmt(5) + {{;}} ;; +} + +void f2() { +// CompoundStmt: 3 matched descendants, subtree sizes 4 and 5 +// Jaccard similarity = 3 / (5 + 6 - 3) = 3 / 8 < 0.5 +// CHECK-NOT: Match FunctionDecl(9) +// CHECK-NOT: Match CompoundStmt(10) +// CHECK: Match CompoundStmt(11) to CompoundStmt(10) +// CHECK: Match CompoundStmt(12) to CompoundStmt(11) +// CHECK: Match NullStmt(13) to NullStmt(12) +// CHECK-NOT: Match NullStmt(13) + {{;}} ;;; +} + +#endif Index: lib/Tooling/ASTDiff/ASTDiff.cpp === --- lib/Tooling/ASTDiff/ASTDiff.cpp +++ lib/Tooling/ASTDiff/ASTDiff.cpp @@ -68,7 +68,8 @@ // Compute ChangeKind for each node based on similarity. void computeChangeKinds(Mapping ); - NodeId getMapped(const std::unique_ptr , NodeId Id) const { + NodeId getMapped(const