Re: [PATCH] D22982: [CloneDetector] No longer reporting clones that don't have a common referenced variable pattern.
This revision was automatically updated to reflect the committed changes. Closed by commit rL277757: [analyzer] Make CloneDetector recognize different variable patterns. (authored by dergachev). Changed prior to commit: https://reviews.llvm.org/D22982?vs=66839=66842#toc Repository: rL LLVM https://reviews.llvm.org/D22982 Files: cfe/trunk/lib/Analysis/CloneDetection.cpp cfe/trunk/test/Analysis/copypaste/false-positives.cpp cfe/trunk/test/Analysis/copypaste/functions.cpp Index: cfe/trunk/test/Analysis/copypaste/functions.cpp === --- cfe/trunk/test/Analysis/copypaste/functions.cpp +++ cfe/trunk/test/Analysis/copypaste/functions.cpp @@ -37,14 +37,22 @@ return b; } - +// No clone because of the different comparison operator. int min1(int a, int b) { // no-warning log(); if (a < b) return a; return b; } +// No clone because of the different pattern in which the variables are used. +int min2(int a, int b) { // no-warning + log(); + if (a > b) +return b; + return a; +} + int foo(int a, int b) { // no-warning return a + b; } Index: cfe/trunk/test/Analysis/copypaste/false-positives.cpp === --- cfe/trunk/test/Analysis/copypaste/false-positives.cpp +++ cfe/trunk/test/Analysis/copypaste/false-positives.cpp @@ -1,21 +0,0 @@ -// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s - -// This test contains false-positive reports from the CloneChecker that need to -// be fixed. - -void log(); - -int max(int a, int b) { // expected-warning{{Detected code clone.}} - log(); - if (a > b) -return a; - return b; -} - -// FIXME: Detect different variable patterns. -int min2(int a, int b) { // expected-note{{Related code clone is here.}} - log(); - if (b > a) -return a; - return b; -} Index: cfe/trunk/lib/Analysis/CloneDetection.cpp === --- cfe/trunk/lib/Analysis/CloneDetection.cpp +++ cfe/trunk/lib/Analysis/CloneDetection.cpp @@ -80,6 +80,102 @@ SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); } namespace { + +/// \brief Analyzes the pattern of the referenced variables in a statement. +class VariablePattern { + + /// \brief Describes an occurence of a variable reference in a statement. + struct VariableOccurence { +/// The index of the associated VarDecl in the Variables vector. +size_t KindID; + +VariableOccurence(size_t KindID) : KindID(KindID) {} + }; + + /// All occurences of referenced variables in the order of appearance. + std::vector Occurences; + /// List of referenced variables in the order of appearance. + /// Every item in this list is unique. + std::vector Variables; + + /// \brief Adds a new variable referenced to this pattern. + /// \param VarDecl The declaration of the variable that is referenced. + void addVariableOccurence(const VarDecl *VarDecl) { +// First check if we already reference this variable +for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) { + if (Variables[KindIndex] == VarDecl) { +// If yes, add a new occurence that points to the existing entry in +// the Variables vector. +Occurences.emplace_back(KindIndex); +return; + } +} +// If this variable wasn't already referenced, add it to the list of +// referenced variables and add a occurence that points to this new entry. +Occurences.emplace_back(Variables.size()); +Variables.push_back(VarDecl); + } + + /// \brief Adds each referenced variable from the given statement. + void addVariables(const Stmt *S) { +// Sometimes we get a nullptr (such as from IfStmts which often have nullptr +// children). We skip such statements as they don't reference any +// variables. +if (!S) + return; + +// Check if S is a reference to a variable. If yes, add it to the pattern. +if (auto D = dyn_cast(S)) { + if (auto VD = dyn_cast(D->getDecl()->getCanonicalDecl())) +addVariableOccurence(VD); +} + +// Recursively check all children of the given statement. +for (const Stmt *Child : S->children()) { + addVariables(Child); +} + } + +public: + /// \brief Creates an VariablePattern object with information about the given + ///StmtSequence. + VariablePattern(const StmtSequence ) { +for (const Stmt *S : Sequence) + addVariables(S); + } + + /// \brief Compares this pattern with the given one. + /// \param Other The given VariablePattern to compare with. + /// \return Returns true if and only if the references variables in this + /// object follow the same pattern than the ones in the given + /// VariablePattern. + /// + /// For example, the following statements all have the same pattern: + /// + /// if (a < b) return a; return b; + ///
Re: [PATCH] D22982: [CloneDetector] No longer reporting clones that don't have a common referenced variable pattern.
teemperor updated this revision to Diff 66839. teemperor marked 3 inline comments as done. teemperor added a comment. - Patch should no longer cause merge conflicts. - Improved comments and tests in functions.cpp. https://reviews.llvm.org/D22982 Files: lib/Analysis/CloneDetection.cpp test/Analysis/copypaste/false-positives.cpp test/Analysis/copypaste/functions.cpp Index: test/Analysis/copypaste/functions.cpp === --- test/Analysis/copypaste/functions.cpp +++ test/Analysis/copypaste/functions.cpp @@ -37,14 +37,22 @@ return b; } - +// No clone because of the different comparison operator. int min1(int a, int b) { // no-warning log(); if (a < b) return a; return b; } +// No clone because of the different pattern in which the variables are used. +int min2(int a, int b) { // no-warning + log(); + if (a > b) +return b; + return a; +} + int foo(int a, int b) { // no-warning return a + b; } Index: test/Analysis/copypaste/false-positives.cpp === --- test/Analysis/copypaste/false-positives.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s - -// This test contains false-positive reports from the CloneChecker that need to -// be fixed. - -void log(); - -int max(int a, int b) { // expected-warning{{Detected code clone.}} - log(); - if (a > b) -return a; - return b; -} - -// FIXME: Detect different variable patterns. -int min2(int a, int b) { // expected-note{{Related code clone is here.}} - log(); - if (b > a) -return a; - return b; -} Index: lib/Analysis/CloneDetection.cpp === --- lib/Analysis/CloneDetection.cpp +++ lib/Analysis/CloneDetection.cpp @@ -80,6 +80,102 @@ SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); } namespace { + +/// \brief Analyzes the pattern of the referenced variables in a statement. +class VariablePattern { + + /// \brief Describes an occurence of a variable reference in a statement. + struct VariableOccurence { +/// The index of the associated VarDecl in the Variables vector. +size_t KindID; + +VariableOccurence(size_t KindID) : KindID(KindID) {} + }; + + /// All occurences of referenced variables in the order of appearance. + std::vector Occurences; + /// List of referenced variables in the order of appearance. + /// Every item in this list is unique. + std::vector Variables; + + /// \brief Adds a new variable referenced to this pattern. + /// \param VarDecl The declaration of the variable that is referenced. + void addVariableOccurence(const VarDecl *VarDecl) { +// First check if we already reference this variable +for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) { + if (Variables[KindIndex] == VarDecl) { +// If yes, add a new occurence that points to the existing entry in +// the Variables vector. +Occurences.emplace_back(KindIndex); +return; + } +} +// If this variable wasn't already referenced, add it to the list of +// referenced variables and add a occurence that points to this new entry. +Occurences.emplace_back(Variables.size()); +Variables.push_back(VarDecl); + } + + /// \brief Adds each referenced variable from the given statement. + void addVariables(const Stmt *S) { +// Sometimes we get a nullptr (such as from IfStmts which often have nullptr +// children). We skip such statements as they don't reference any +// variables. +if (!S) + return; + +// Check if S is a reference to a variable. If yes, add it to the pattern. +if (auto D = dyn_cast(S)) { + if (auto VD = dyn_cast(D->getDecl()->getCanonicalDecl())) +addVariableOccurence(VD); +} + +// Recursively check all children of the given statement. +for (const Stmt *Child : S->children()) { + addVariables(Child); +} + } + +public: + /// \brief Creates an VariablePattern object with information about the given + ///StmtSequence. + VariablePattern(const StmtSequence ) { +for (const Stmt *S : Sequence) + addVariables(S); + } + + /// \brief Compares this pattern with the given one. + /// \param Other The given VariablePattern to compare with. + /// \return Returns true if and only if the references variables in this + /// object follow the same pattern than the ones in the given + /// VariablePattern. + /// + /// For example, the following statements all have the same pattern: + /// + /// if (a < b) return a; return b; + /// if (x < y) return x; return y; + /// if (u2 < u1) return u2; return u1; + /// + /// but the following statement has a different pattern (note the changed + /// variables in the return statements). + /// + /// if (a < b)
Re: [PATCH] D22982: [CloneDetector] No longer reporting clones that don't have a common referenced variable pattern.
NoQ requested changes to this revision. NoQ added a comment. This revision now requires changes to proceed. Whoops, because of a merge conflict while applying the patch, i noticed a few problems in the tests, they seem obvious, but could you have a quick look? Comment at: test/Analysis/copypaste/functions.cpp:21 @@ -20,3 +20,3 @@ -// Functions below are not clones and should not be reported. +// Different variable patterns, therefore different clones. Not sure, do "different clones" mean "different code, should not be reported" or "different clones of the same code, should be reported"? Comment at: test/Analysis/copypaste/functions.cpp:30 @@ +29,3 @@ + +int minClone(int x, int y) { // expected-note{{Related code clone is here.}} + log(); This code is exactly identical, did you mean 'x > y' in any of those places? https://reviews.llvm.org/D22982 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D22982: [CloneDetector] No longer reporting clones that don't have a common referenced variable pattern.
v.g.vassilev accepted this revision. v.g.vassilev added a comment. LGTM. My comments were clarified in a Skype chat. https://reviews.llvm.org/D22982 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D22982: [CloneDetector] No longer reporting clones that don't have a common referenced variable pattern.
NoQ accepted this revision. This revision is now accepted and ready to land. Comment at: lib/Analysis/CloneDetection.cpp:99 @@ +98,3 @@ + /// Every item in this list is unique. + std::vector Variables; + NoQ wrote: > I've a feeling this is essentially a `map`, because > that harmonizes with our access patterns (lookup number by variable, insert > the new value `Variables.size()` if not found). So i think a map would > express the intention more clearly. Discussed that we're going to have lookups in both directions in the next patch. Comment at: lib/Analysis/CloneDetection.cpp:168 @@ +167,3 @@ +assert(Other.Occurences.size() == Occurences.size()); +for (unsigned i = 0; i < Occurences.size(); ++i) { + if (Occurences[i].KindID != Other.Occurences[i].KindID) { NoQ wrote: > Because your code uses a lot of fancy C++11, i'd probably suggest to make it > even prettier with `std::all_of()`. Uhm, never mind, iterating two arrays here. https://reviews.llvm.org/D22982 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D22982: [CloneDetector] No longer reporting clones that don't have a common referenced variable pattern.
NoQ added a comment. Looks good, minor comments :) Comment at: lib/Analysis/CloneDetection.cpp:99 @@ +98,3 @@ + /// Every item in this list is unique. + std::vector Variables; + I've a feeling this is essentially a `map`, because that harmonizes with our access patterns (lookup number by variable, insert the new value `Variables.size()` if not found). So i think a map would express the intention more clearly. Comment at: lib/Analysis/CloneDetection.cpp:168 @@ +167,3 @@ +assert(Other.Occurences.size() == Occurences.size()); +for (unsigned i = 0; i < Occurences.size(); ++i) { + if (Occurences[i].KindID != Other.Occurences[i].KindID) { Because your code uses a lot of fancy C++11, i'd probably suggest to make it even prettier with `std::all_of()`. https://reviews.llvm.org/D22982 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D22982: [CloneDetector] No longer reporting clones that don't have a common referenced variable pattern.
NoQ added a comment. Uhm. I've a feeling that now i realize how it should have been written from the start. That's a pretty bad feeling to have. I wish we had a //series of passes//. Before the first pass, all statements are considered equivalent. After the first pass, they're split into smaller clone groups. The second pass would add further distinction between items in each clone group, producing more clone groups. Etc. Every subsequent pass would compute more performance-intensive properties of statement sequences (the first pass being very simple, eg. only statement kinds). Once all passes end, only true positives remain. Probably wrap up each pass into a class that conforms to some common interface, and we're done with a nicely structured and easy-to-extend copy-paste error detection framework. We could probably even allow the user to turn separate passes on and off through some kind of dynamic registry. Never mind though, will have a look at what we have :) https://reviews.llvm.org/D22982 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D22982: [CloneDetector] No longer reporting clones that don't have a common referenced variable pattern.
teemperor added a comment. I've deleted false-positives.cpp because all false-positives in there are now resolved and the test did no longer serve a purpose. We could leave the test file in there, but I think new false-positives either belong to an existing test category or deserve their own named test file (e.g. macros.cpp would contain tests for macros and related false-positives with a FIXME remark). Opinions? https://reviews.llvm.org/D22982 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D22982: [CloneDetector] No longer reporting clones that don't have a common referenced variable pattern.
teemperor marked 2 inline comments as done. teemperor added a comment. https://reviews.llvm.org/D22982 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D22982: [CloneDetector] No longer reporting clones that don't have a common referenced variable pattern.
teemperor updated this revision to Diff 66262. teemperor added a comment. - Fixed a few typos in the comments. - Removed unnecessary `std::`. - Added the new tests to functions.cpp instead of their own file. https://reviews.llvm.org/D22982 Files: lib/Analysis/CloneDetection.cpp test/Analysis/copypaste/false-positives.cpp test/Analysis/copypaste/functions.cpp Index: test/Analysis/copypaste/functions.cpp === --- test/Analysis/copypaste/functions.cpp +++ test/Analysis/copypaste/functions.cpp @@ -18,15 +18,24 @@ return y; } -// Functions below are not clones and should not be reported. +// Different variable patterns, therefore different clones. -int min1(int a, int b) { // no-warning +int min(int x, int y) { // expected-warning{{Detected code clone.}} log(); - if (a < b) -return a; - return b; + if (y > x) +return x; + return y; +} + +int minClone(int x, int y) { // expected-note{{Related code clone is here.}} + log(); + if (y > x) +return x; + return y; } +// Functions below are not clones and should not be reported. + int foo(int a, int b) { // no-warning return a + b; } Index: test/Analysis/copypaste/false-positives.cpp === --- test/Analysis/copypaste/false-positives.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s - -// This test contains false-positive reports from the CloneChecker that need to -// be fixed. - -void log(); - -int max(int a, int b) { // expected-warning{{Detected code clone.}} - log(); - if (a > b) -return a; - return b; -} - -// FIXME: Detect different variable patterns. -int min2(int a, int b) { // expected-note{{Related code clone is here.}} - log(); - if (b > a) -return a; - return b; -} Index: lib/Analysis/CloneDetection.cpp === --- lib/Analysis/CloneDetection.cpp +++ lib/Analysis/CloneDetection.cpp @@ -80,6 +80,102 @@ SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); } namespace { + +/// \brief Analyzes the pattern of the referenced variables in a statement. +class VariablePattern { + + /// \brief Describes an occurence of a variable reference in a statement. + struct VariableOccurence { +/// The index of the associated VarDecl in the Variables vector. +size_t KindID; + +VariableOccurence(size_t KindID) : KindID(KindID) {} + }; + + /// All occurences of referenced variables in the order of appearance. + std::vector Occurences; + /// List of referenced variables in the order of appearance. + /// Every item in this list is unique. + std::vector Variables; + + /// \brief Adds a new variable referenced to this pattern. + /// \param VarDecl The declaration of the variable that is referenced. + void addVariableOccurence(const VarDecl *VarDecl) { +// First check if we already reference this variable +for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) { + if (Variables[KindIndex] == VarDecl) { +// If yes, add a new occurence that points to the existing entry in +// the Variables vector. +Occurences.emplace_back(KindIndex); +return; + } +} +// If this variable wasn't already referenced, add it to the list of +// referenced variables and add a occurence that points to this new entry. +Occurences.emplace_back(Variables.size()); +Variables.push_back(VarDecl); + } + + /// \brief Adds each referenced variable from the given statement. + void addVariables(const Stmt *S) { +// Sometimes we get a nullptr (such as from IfStmts which often have nullptr +// children). We skip such statements as they don't reference any +// variables. +if (!S) + return; + +// Check if S is a reference to a variable. If yes, add it to the pattern. +if (auto D = dyn_cast(S)) { + if (auto VD = dyn_cast(D->getDecl()->getCanonicalDecl())) +addVariableOccurence(VD); +} + +// Recursively check all children of the given statement. +for (const Stmt *Child : S->children()) { + addVariables(Child); +} + } + +public: + /// \brief Creates an VariablePattern object with information about the given + ///StmtSequence. + VariablePattern(const StmtSequence ) { +for (const Stmt *S : Sequence) + addVariables(S); + } + + /// \brief Compares this pattern with the given one. + /// \param Other The given VariablePattern to compare with. + /// \return Returns true if and only if the references variables in this + /// object follow the same pattern than the ones in the given + /// VariablePattern. + /// + /// For example, the following statements all have the same pattern: + /// + /// if (a < b) return a; return b; + /// if (x < y) return x; return y; + ///
Re: [PATCH] D22982: [CloneDetector] No longer reporting clones that don't have a common referenced variable pattern.
vsk added a subscriber: vsk. vsk added a comment. This is neat! I may be missing something, but could you explain why 'false-positives.cpp' was deleted? I'd expect the test to be incorporated into 'functions.cpp' with the 'expected-*' directives removed. Comment at: lib/Analysis/CloneDetection.cpp:92 @@ +91,3 @@ + +VariableOccurence(std::size_t KindID) : KindID(KindID) {} + }; Extra 'std::' here. Comment at: test/Analysis/copypaste/var-patterns.cpp:7 @@ +6,3 @@ + +int max(int a, int b) { // expected-warning{{Detected code clone.}} + log(); This looks duplicated from 'functions.cpp'. Perhaps you could modify that test case directly? https://reviews.llvm.org/D22982 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D22982: [CloneDetector] No longer reporting clones that don't have a common referenced variable pattern.
teemperor created this revision. teemperor added reviewers: v.g.vassilev, NoQ, zaks.anna. teemperor added a subscriber: cfe-commits. One of the current false-positives the CloneDetector produces is that the statements `a < b ? b` and `b < a ? b` are considered clones of each other, even though they represent two different kinds of algorithms. The reason for this is that the StmtDataCollector ignores variable names when collecting data, so that two clones that only differ in the names of the referenced variables are considered clones. The pattern in which they are referenced however is not taken into aspect so far and causes the mentioned false-positive. This patch introduces a filter that removes clones which don't have the same variable pattern which prevents these kinds of false-positives. It should be noted that this is intentionally done after the clones are grouped together by their collected data vector because another planned feature will find clones that explicitly violate the variable patterns of each other. https://reviews.llvm.org/D22982 Files: lib/Analysis/CloneDetection.cpp test/Analysis/copypaste/false-positives.cpp test/Analysis/copypaste/var-patterns.cpp Index: test/Analysis/copypaste/var-patterns.cpp === --- /dev/null +++ test/Analysis/copypaste/var-patterns.cpp @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// This tests that clones with different variable patterns are not reported. + +void log(); + +int max(int a, int b) { // expected-warning{{Detected code clone.}} + log(); + if (a > b) +return a; + return b; +} + +int max2(int x, int y) { // expected-note{{Related code clone is here.}} + log(); + if (x > y) +return x; + return y; +} + +// Different variable patterns, therefore different clones. + +int min(int x, int y) { // expected-warning{{Detected code clone.}} + log(); + if (y > x) +return x; + return y; +} + +int min2(int x, int y) { // expected-note{{Related code clone is here.}} + log(); + if (y > x) +return x; + return y; +} Index: test/Analysis/copypaste/false-positives.cpp === --- test/Analysis/copypaste/false-positives.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s - -// This test contains false-positive reports from the CloneChecker that need to -// be fixed. - -void log(); - -int max(int a, int b) { // expected-warning{{Detected code clone.}} - log(); - if (a > b) -return a; - return b; -} - -// FIXME: Detect different variable patterns. -int min2(int a, int b) { // expected-note{{Related code clone is here.}} - log(); - if (b > a) -return a; - return b; -} Index: lib/Analysis/CloneDetection.cpp === --- lib/Analysis/CloneDetection.cpp +++ lib/Analysis/CloneDetection.cpp @@ -80,6 +80,102 @@ SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); } namespace { + +/// \brief Analyzses the pattern of the referenced variables in a statement. +class VariablePattern { + + /// \brief Describes an occurence of a variable reference in a statement. + struct VariableOccurence { +/// The index of the associated VarDecl in the Variables vector. +size_t KindID; + +VariableOccurence(std::size_t KindID) : KindID(KindID) {} + }; + + /// All occurences of referenced variables in the order of appearance. + std::vector Occurences; + /// List of referenced variables in the order of appearance. + /// Every item in this list is unique. + std::vector Variables; + + /// \brief Adds a new variable reference to this pattern. + /// \param VarDecl The declaration of the variable that is referenced. + void addVariableOccurence(const VarDecl *VarDecl) { +// First check if we already reference this variable +for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) { + if (Variables[KindIndex] == VarDecl) { +// If yes, add a new occurence that points to the existing entry in +// the Variables vector. +Occurences.emplace_back(KindIndex); +return; + } +} +// If this variable wasn't already referenced, add it to the list of +// referenced variables and add a occurence that points to this new entry. +Occurences.emplace_back(Variables.size()); +Variables.push_back(VarDecl); + } + + /// \brief Adds each referenced variable from the given statement. + void addVariables(const Stmt *S) { +// Sometimes we get a nullptr (such as from IfStmts which often have nullptr +// children). We skip such statements as they don't reference any +// variables. +if (!S) + return; + +// Check if S is a reference to a variable. If yes, add it to the pattern. +if (auto D =