Artem,
I think you broke the build. Could you take a look please?
http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fuzzer/builds/11051
http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fuzzer/builds/11051/steps/build%20clang/logs/stdio
[80/140] Building CXX object
tools/clang/lib/StaticAnalyzer/Checkers/CMakeFiles/clangStaticAnalyzerCheckers.dir/CloneChecker.cpp.o
FAILED:
tools/clang/lib/StaticAnalyzer/Checkers/CMakeFiles/clangStaticAnalyzerCheckers.dir/CloneChecker.cpp.o
/usr/bin/c++ -DCLANG_ENABLE_ARCMT -DCLANG_ENABLE_OBJC_REWRITER
-DCLANG_ENABLE_STATIC_ANALYZER -DGTEST_HAS_RTTI=0 -D_GNU_SOURCE
-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS
-Itools/clang/lib/StaticAnalyzer/Checkers
-I/mnt/b/sanitizer-buildbot5/sanitizer-x86_64-linux-fuzzer/build/llvm/tools/clang/lib/StaticAnalyzer/Checkers
-I/mnt/b/sanitizer-buildbot5/sanitizer-x86_64-linux-fuzzer/build/llvm/tools/clang/include
-Itools/clang/include -Iinclude
-I/mnt/b/sanitizer-buildbot5/sanitizer-x86_64-linux-fuzzer/build/llvm/include
-fPIC -fvisibility-inlines-hidden -Wall -W -Wno-unused-parameter
-Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic
-Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor
-Wno-comment -Werror=date-time -std=c++11 -ffunction-sections
-fdata-sections -fno-common -Woverloaded-virtual -fno-strict-aliasing
-O3 -DNDEBUG -fno-exceptions -fno-rtti -MMD -MT
tools/clang/lib/StaticAnalyzer/Checkers/CMakeFiles/clangStaticAnalyzerCheckers.dir/CloneChecker.cpp.o
-MF
tools/clang/lib/StaticAnalyzer/Checkers/CMakeFiles/clangStaticAnalyzerCheckers.dir/CloneChecker.cpp.o.d
-o
tools/clang/lib/StaticAnalyzer/Checkers/CMakeFiles/clangStaticAnalyzerCheckers.dir/CloneChecker.cpp.o
-c
/mnt/b/sanitizer-buildbot5/sanitizer-x86_64-linux-fuzzer/build/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp
/mnt/b/sanitizer-buildbot5/sanitizer-x86_64-linux-fuzzer/build/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp:29:25:
error: declaration of ‘clang::CloneDetector
{anonymous}::CloneChecker::CloneDetector’ [-fpermissive] mutable
CloneDetector CloneDetector; ^ In file included from
/mnt/b/sanitizer-buildbot5/sanitizer-x86_64-linux-fuzzer/build/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp:17:0:
/mnt/b/sanitizer-buildbot5/sanitizer-x86_64-linux-fuzzer/build/llvm/tools/clang/include/clang/Analysis/CloneDetection.h:158:7:
error: changes meaning of ‘CloneDetector’ from ‘class
clang::CloneDetector’ [-fpermissive] class CloneDetector { ^
On Tue, Jul 26, 2016 at 11:28 AM Artem Dergachev via cfe-commits
<cfe-commits@lists.llvm.org <mailto:cfe-commits@lists.llvm.org>> wrote:
Author: dergachev
Date: Tue Jul 26 13:13:12 2016
New Revision: 276782
URL: http://llvm.org/viewvc/llvm-project?rev=276782&view=rev
Log:
[analyzer] Add basic capabilities to detect source code clones.
This patch adds the CloneDetector class which allows searching
source code
for clones.
For every statement or group of statements within a compound
statement,
CloneDetector computes a hash value, and finds clones by detecting
identical hash values.
This initial patch only provides a simple hashing mechanism
that hashes the kind of each sub-statement.
This patch also adds CloneChecker - a simple static analyzer checker
that uses CloneDetector to report copy-pasted code.
Patch by Raphael Isemann!
Differential Revision: https://reviews.llvm.org/D20795
Added:
cfe/trunk/include/clang/Analysis/CloneDetection.h
cfe/trunk/lib/Analysis/CloneDetection.cpp
cfe/trunk/lib/StaticAnalyzer/Checkers/CloneChecker.cpp
cfe/trunk/test/Analysis/copypaste/
cfe/trunk/test/Analysis/copypaste/blocks.cpp
cfe/trunk/test/Analysis/copypaste/false-positives.cpp
cfe/trunk/test/Analysis/copypaste/functions.cpp
cfe/trunk/test/Analysis/copypaste/objc-methods.m
cfe/trunk/test/Analysis/copypaste/sub-sequences.cpp
Modified:
cfe/trunk/include/clang/StaticAnalyzer/Checkers/Checkers.td
cfe/trunk/lib/Analysis/CMakeLists.txt
cfe/trunk/lib/StaticAnalyzer/Checkers/CMakeLists.txt
Added: cfe/trunk/include/clang/Analysis/CloneDetection.h
URL:
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Analysis/CloneDetection.h?rev=276782&view=auto
==============================================================================
--- cfe/trunk/include/clang/Analysis/CloneDetection.h (added)
+++ cfe/trunk/include/clang/Analysis/CloneDetection.h Tue Jul 26
13:13:12 2016
@@ -0,0 +1,235 @@
+//===--- CloneDetection.h - Finds code clones in an AST
---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// /file
+/// This file defines classes for searching and anlyzing source
code clones.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_AST_CLONEDETECTION_H
+#define LLVM_CLANG_AST_CLONEDETECTION_H
+
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/StringMap.h"
+
+#include <vector>
+
+namespace clang {
+
+class Stmt;
+class Decl;
+class ASTContext;
+class CompoundStmt;
+
+/// \brief Identifies a list of statements.
+///
+/// Can either identify a single arbitrary Stmt object, a
continuous sequence of
+/// child statements inside a CompoundStmt or no statements at all.
+class StmtSequence {
+ /// If this object identifies a sequence of statements inside a
CompoundStmt,
+ /// S points to this CompoundStmt. If this object only
identifies a single
+ /// Stmt, then S is a pointer to this Stmt.
+ const Stmt *S;
+
+ /// The related ASTContext for S.
+ ASTContext *Context;
+
+ /// If EndIndex is non-zero, then S is a CompoundStmt and this
StmtSequence
+ /// instance is representing the CompoundStmt children inside
the array
+ /// [StartIndex, EndIndex).
+ unsigned StartIndex;
+ unsigned EndIndex;
+
+public:
+ /// \brief Constructs a StmtSequence holding multiple statements.
+ ///
+ /// The resulting StmtSequence identifies a continuous sequence
of statements
+ /// in the body of the given CompoundStmt. Which statements of
the body should
+ /// be identified needs to be specified by providing a start
and end index
+ /// that describe a non-empty sub-array in the body of the
given CompoundStmt.
+ ///
+ /// \param Stmt A CompoundStmt that contains all statements in
its body.
+ /// \param Context The ASTContext for the given CompoundStmt.
+ /// \param StartIndex The inclusive start index in the children
array of
+ /// \p Stmt
+ /// \param EndIndex The exclusive end index in the children
array of \p Stmt.
+ StmtSequence(const CompoundStmt *Stmt, ASTContext &Context,
+ unsigned StartIndex, unsigned EndIndex);
+
+ /// \brief Constructs a StmtSequence holding a single statement.
+ ///
+ /// \param Stmt An arbitrary Stmt.
+ /// \param Context The ASTContext for the given Stmt.
+ StmtSequence(const Stmt *Stmt, ASTContext &Context);
+
+ /// \brief Constructs an empty StmtSequence.
+ StmtSequence();
+
+ typedef const Stmt *const *iterator;
+
+ /// Returns an iterator pointing to the first statement in this
sequence.
+ iterator begin() const;
+
+ /// Returns an iterator pointing behind the last statement in
this sequence.
+ iterator end() const;
+
+ /// Returns the first statement in this sequence.
+ ///
+ /// This method should only be called on a non-empty
StmtSequence object.
+ const Stmt *front() const {
+ assert(!empty());
+ return begin()[0];
+ }
+
+ /// Returns the last statement in this sequence.
+ ///
+ /// This method should only be called on a non-empty
StmtSequence object.
+ const Stmt *back() const {
+ assert(!empty());
+ return begin()[size() - 1];
+ }
+
+ /// Returns the number of statements this object holds.
+ unsigned size() const {
+ if (holdsSequence())
+ return EndIndex - StartIndex;
+ if (S == nullptr)
+ return 0;
+ return 1;
+ }
+
+ /// Returns true if and only if this StmtSequence contains no
statements.
+ bool empty() const { return size() == 0; }
+
+ /// Returns the related ASTContext for the stored Stmts.
+ ASTContext &getASTContext() const {
+ assert(Context);
+ return *Context;
+ }
+
+ /// Returns true if this objects holds a list of statements.
+ bool holdsSequence() const { return EndIndex != 0; }
+
+ /// Returns the start sourcelocation of the first statement in
this sequence.
+ ///
+ /// This method should only be called on a non-empty
StmtSequence object.
+ SourceLocation getStartLoc() const;
+
+ /// Returns the end sourcelocation of the last statement in
this sequence.
+ ///
+ /// This method should only be called on a non-empty
StmtSequence object.
+ SourceLocation getEndLoc() const;
+
+ bool operator==(const StmtSequence &Other) const {
+ return std::tie(S, StartIndex, EndIndex) ==
+ std::tie(Other.S, Other.StartIndex, Other.EndIndex);
+ }
+
+ bool operator!=(const StmtSequence &Other) const {
+ return std::tie(S, StartIndex, EndIndex) !=
+ std::tie(Other.S, Other.StartIndex, Other.EndIndex);
+ }
+
+ /// Returns true if and only if this sequence covers a source
range that
+ /// contains the source range of the given sequence \p Other.
+ ///
+ /// This method should only be called on a non-empty
StmtSequence object
+ /// and passed a non-empty StmtSequence object.
+ bool contains(const StmtSequence &Other) const;
+};
+
+/// \brief Searches for clones in source code.
+///
+/// First, this class needs a translation unit which is passed via
+/// \p analyzeTranslationUnit . It will then generate and store
search data
+/// for all statements inside the given translation unit.
+/// Afterwards the generated data can be used to find code clones
by calling
+/// \p findClones .
+///
+/// This class only searches for clones in exectuable source code
+/// (e.g. function bodies). Other clones (e.g. cloned comments or
declarations)
+/// are not supported.
+class CloneDetector {
+public:
+ /// Holds the data about a StmtSequence that is needed during
the search for
+ /// code clones.
+ struct CloneSignature {
+ /// \brief Holds all relevant data of a StmtSequence.
+ ///
+ /// If this variable is equal for two different
StmtSequences, then they can
+ /// be considered clones of each other.
+ std::vector<unsigned> Data;
+
+ /// \brief The complexity of the StmtSequence.
+ ///
+ /// This scalar value serves as a simple way of filtering
clones that are
+ /// too small to be reported. A greater value indicates that
the related
+ /// StmtSequence is probably more interesting to the user.
+ unsigned Complexity;
+
+ /// \brief Creates an empty CloneSignature without any data.
+ CloneSignature() : Complexity(1) {}
+
+ CloneSignature(const std::vector<unsigned> &Data, unsigned
Complexity)
+ : Data(Data), Complexity(Complexity) {}
+
+ /// \brief Adds the data from the given CloneSignature to
this one.
+ void add(const CloneSignature &Other) {
+ Data.insert(Data.end(), Other.Data.begin(), Other.Data.end());
+ Complexity += Other.Complexity;
+ }
+ };
+
+ /// Holds group of StmtSequences that are clones of each other
and the
+ /// complexity value (see CloneSignature::Complexity) that all
stored
+ /// StmtSequences have in common.
+ struct CloneGroup {
+ std::vector<StmtSequence> Sequences;
+ unsigned Complexity;
+
+ CloneGroup(const StmtSequence &Seq, unsigned Complexity)
+ : Complexity(Complexity) {
+ Sequences.push_back(Seq);
+ }
+
+ /// \brief Returns false if and only if this group should be
skipped when
+ /// searching for clones.
+ bool isValid() const {
+ // A clone group with only one member makes no sense, so we
skip them.
+ return Sequences.size() > 1;
+ }
+ };
+
+ /// \brief Generates and stores search data for all statements
in the body of
+ /// the given Decl.
+ void analyzeCodeBody(const Decl *D);
+
+ /// \brief Stores the CloneSignature to allow future querying.
+ void add(const StmtSequence &S, const CloneSignature &Signature);
+
+ /// \brief Searches the provided statements for clones.
+ ///
+ /// \param Result Output parameter that is filled with a list
of found
+ /// clone groups. Each group contains multiple
StmtSequences
+ /// that were identified to be clones of each other.
+ /// \param MinGroupComplexity Only return clones which have at
least this
+ /// complexity value.
+ void findClones(std::vector<CloneGroup> &Result, unsigned
MinGroupComplexity);
+
+private:
+ /// Stores all found clone groups including invalid groups with
only a single
+ /// statement.
+ std::vector<CloneGroup> CloneGroups;
+ /// Maps search data to its related index in the \p CloneGroups
vector.
+ llvm::StringMap<std::size_t> CloneGroupIndexes;
+};
+
+} // end namespace clang
+
+#endif // LLVM_CLANG_AST_CLONEDETECTION_H
Modified: cfe/trunk/include/clang/StaticAnalyzer/Checkers/Checkers.td
URL:
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/StaticAnalyzer/Checkers/Checkers.td?rev=276782&r1=276781&r2=276782&view=diff
==============================================================================
--- cfe/trunk/include/clang/StaticAnalyzer/Checkers/Checkers.td
(original)
+++ cfe/trunk/include/clang/StaticAnalyzer/Checkers/Checkers.td
Tue Jul 26 13:13:12 2016
@@ -77,6 +77,8 @@ def MPI : Package<"mpi">, InPackage<OptI
def LLVM : Package<"llvm">;
def Debug : Package<"debug">;
+def CloneDetectionAlpha : Package<"clone">, InPackage<Alpha>, Hidden;
+
//===----------------------------------------------------------------------===//
// Core Checkers.
//===----------------------------------------------------------------------===//
@@ -661,3 +663,17 @@ def BugHashDumper : Checker<"DumpBugHash
DescFile<"DebugCheckers.cpp">;
} // end "debug"
+
+
+//===----------------------------------------------------------------------===//
+// Clone Detection
+//===----------------------------------------------------------------------===//
+
+let ParentPackage = CloneDetectionAlpha in {
+
+def CloneChecker : Checker<"CloneChecker">,
+ HelpText<"Reports similar pieces of code.">,
+ DescFile<"CloneChecker.cpp">;
+
+} // end "clone"
+
Modified: cfe/trunk/lib/Analysis/CMakeLists.txt
URL:
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Analysis/CMakeLists.txt?rev=276782&r1=276781&r2=276782&view=diff
==============================================================================
--- cfe/trunk/lib/Analysis/CMakeLists.txt (original)
+++ cfe/trunk/lib/Analysis/CMakeLists.txt Tue Jul 26 13:13:12 2016
@@ -9,6 +9,7 @@ add_clang_library(clangAnalysis
CFGReachabilityAnalysis.cpp
CFGStmtMap.cpp
CallGraph.cpp
+ CloneDetection.cpp
CocoaConventions.cpp
Consumed.cpp
CodeInjector.cpp
Added: cfe/trunk/lib/Analysis/CloneDetection.cpp
URL:
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Analysis/CloneDetection.cpp?rev=276782&view=auto
==============================================================================
--- cfe/trunk/lib/Analysis/CloneDetection.cpp (added)
+++ cfe/trunk/lib/Analysis/CloneDetection.cpp Tue Jul 26 13:13:12 2016
@@ -0,0 +1,277 @@
+//===--- CloneDetection.cpp - Finds code clones in an AST
-------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file implements classes for searching and anlyzing
source code clones.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Analysis/CloneDetection.h"
+
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/Stmt.h"
+#include "llvm/ADT/StringRef.h"
+
+using namespace clang;
+
+StmtSequence::StmtSequence(const CompoundStmt *Stmt, ASTContext
&Context,
+ unsigned StartIndex, unsigned EndIndex)
+ : S(Stmt), Context(&Context), StartIndex(StartIndex),
EndIndex(EndIndex) {
+ assert(Stmt && "Stmt must not be a nullptr");
+ assert(StartIndex < EndIndex && "Given array should not be empty");
+ assert(EndIndex <= Stmt->size() && "Given array too big for
this Stmt");
+}
+
+StmtSequence::StmtSequence(const Stmt *Stmt, ASTContext &Context)
+ : S(Stmt), Context(&Context), StartIndex(0), EndIndex(0) {}
+
+StmtSequence::StmtSequence()
+ : S(nullptr), Context(nullptr), StartIndex(0), EndIndex(0) {}
+
+bool StmtSequence::contains(const StmtSequence &Other) const {
+ // If both sequences reside in different translation units,
they can never
+ // contain each other.
+ if (Context != Other.Context)
+ return false;
+
+ const SourceManager &SM = Context->getSourceManager();
+
+ // Otherwise check if the start and end locations of the
current sequence
+ // surround the other sequence.
+ bool StartIsInBounds =
+ SM.isBeforeInTranslationUnit(getStartLoc(),
Other.getStartLoc()) ||
+ getStartLoc() == Other.getStartLoc();
+ if (!StartIsInBounds)
+ return false;
+
+ bool EndIsInBounds =
+ SM.isBeforeInTranslationUnit(Other.getEndLoc(), getEndLoc()) ||
+ Other.getEndLoc() == getEndLoc();
+ return EndIsInBounds;
+}
+
+StmtSequence::iterator StmtSequence::begin() const {
+ if (!holdsSequence()) {
+ return &S;
+ }
+ auto CS = cast<CompoundStmt>(S);
+ return CS->body_begin() + StartIndex;
+}
+
+StmtSequence::iterator StmtSequence::end() const {
+ if (!holdsSequence()) {
+ return &S + 1;
+ }
+ auto CS = cast<CompoundStmt>(S);
+ return CS->body_begin() + EndIndex;
+}
+
+SourceLocation StmtSequence::getStartLoc() const {
+ return front()->getLocStart();
+}
+
+SourceLocation StmtSequence::getEndLoc() const { return
back()->getLocEnd(); }
+
+namespace {
+/// Generates CloneSignatures for a set of statements and stores
the results in
+/// a CloneDetector object.
+class CloneSignatureGenerator {
+
+ CloneDetector &CD;
+ ASTContext &Context;
+
+ /// \brief Generates CloneSignatures for all statements in the
given statement
+ /// tree and stores them in the CloneDetector.
+ ///
+ /// \param S The root of the given statement tree.
+ /// \return The CloneSignature of the root statement.
+ CloneDetector::CloneSignature generateSignatures(const Stmt *S) {
+ // Create an empty signature that will be filled in this method.
+ CloneDetector::CloneSignature Signature;
+
+ // The only relevant data for now is the class of the statement.
+ // TODO: Collect statement class specific data.
+ Signature.Data.push_back(S->getStmtClass());
+
+ // Storage for the signatures of the direct child statements.
This is only
+ // needed if the current statement is a CompoundStmt.
+ std::vector<CloneDetector::CloneSignature> ChildSignatures;
+ const CompoundStmt *CS = dyn_cast<const CompoundStmt>(S);
+
+ // The signature of a statement includes the signatures of
its children.
+ // Therefore we create the signatures for every child and add
them to the
+ // current signature.
+ for (const Stmt *Child : S->children()) {
+ // Some statements like 'if' can have nullptr children that
we will skip.
+ if (!Child)
+ continue;
+
+ // Recursive call to create the signature of the child
statement. This
+ // will also create and store all clone groups in this
child statement.
+ auto ChildSignature = generateSignatures(Child);
+
+ // Add the collected data to the signature of the current
statement.
+ Signature.add(ChildSignature);
+
+ // If the current statement is a CompoundStatement, we need
to store the
+ // signature for the generation of the sub-sequences.
+ if (CS)
+ ChildSignatures.push_back(ChildSignature);
+ }
+
+ // If the current statement is a CompoundStmt, we also need
to create the
+ // clone groups from the sub-sequences inside the children.
+ if (CS)
+ handleSubSequences(CS, ChildSignatures);
+
+ // Save the signature for the current statement in the
CloneDetector object.
+ CD.add(StmtSequence(S, Context), Signature);
+
+ return Signature;
+ }
+
+ /// \brief Adds all possible sub-sequences in the child array
of the given
+ /// CompoundStmt to the CloneDetector.
+ /// \param CS The given CompoundStmt.
+ /// \param ChildSignatures A list of calculated signatures for
each child in
+ /// the given CompoundStmt.
+ void handleSubSequences(
+ const CompoundStmt *CS,
+ const std::vector<CloneDetector::CloneSignature>
&ChildSignatures) {
+
+ // FIXME: This function has quadratic runtime right now.
Check if skipping
+ // this function for too long CompoundStmts is an option.
+
+ // The length of the sub-sequence. We don't need to handle
sequences with
+ // the length 1 as they are already handled in CollectData().
+ for (unsigned Length = 2; Length <= CS->size(); ++Length) {
+ // The start index in the body of the CompoundStmt. We
increase the
+ // position until the end of the sub-sequence reaches the
end of the
+ // CompoundStmt body.
+ for (unsigned Pos = 0; Pos <= CS->size() - Length; ++Pos) {
+ // Create an empty signature and add the signatures of
all selected
+ // child statements to it.
+ CloneDetector::CloneSignature SubSignature;
+
+ for (unsigned i = Pos; i < Pos + Length; ++i) {
+ SubSignature.add(ChildSignatures[i]);
+ }
+
+ // Save the signature together with the information about
what children
+ // sequence we selected.
+ CD.add(StmtSequence(CS, Context, Pos, Pos + Length),
SubSignature);
+ }
+ }
+ }
+
+public:
+ explicit CloneSignatureGenerator(CloneDetector &CD, ASTContext
&Context)
+ : CD(CD), Context(Context) {}
+
+ /// \brief Generates signatures for all statements in the given
function body.
+ void consumeCodeBody(const Stmt *S) { generateSignatures(S); }
+};
+} // end anonymous namespace
+
+void CloneDetector::analyzeCodeBody(const Decl *D) {
+ assert(D);
+ assert(D->hasBody());
+ CloneSignatureGenerator Generator(*this, D->getASTContext());
+ Generator.consumeCodeBody(D->getBody());
+}
+
+void CloneDetector::add(const StmtSequence &S,
+ const CloneSignature &Signature) {
+ // StringMap only works with StringRefs, so we create one for
our data vector.
+ auto &Data = Signature.Data;
+ StringRef DataRef = StringRef(reinterpret_cast<const char
*>(Data.data()),
+ Data.size() * sizeof(unsigned));
+
+ // Search with the help of the signature if we already have
encountered a
+ // clone of the given StmtSequence.
+ auto I = CloneGroupIndexes.find(DataRef);
+ if (I == CloneGroupIndexes.end()) {
+ // We haven't found an existing clone group, so we create a
new clone group
+ // for this StmtSequence and store the index of it in our
search map.
+ CloneGroupIndexes[DataRef] = CloneGroups.size();
+ CloneGroups.emplace_back(S, Signature.Complexity);
+ return;
+ }
+
+ // We have found an existing clone group and can expand it with
the given
+ // StmtSequence.
+ CloneGroups[I->getValue()].Sequences.push_back(S);
+}
+
+namespace {
+/// \brief Returns true if and only if \p Stmt contains at least
one other
+/// sequence in the \p Group.
+bool containsAnyInGroup(StmtSequence &Stmt,
+ CloneDetector::CloneGroup &Group) {
+ for (StmtSequence &GroupStmt : Group.Sequences) {
+ if (Stmt.contains(GroupStmt))
+ return true;
+ }
+ return false;
+}
+
+/// \brief Returns true if and only if all sequences in \p
OtherGroup are
+/// contained by a sequence in \p Group.
+bool containsGroup(CloneDetector::CloneGroup &Group,
+ CloneDetector::CloneGroup &OtherGroup) {
+ // We have less sequences in the current group than we have in
the other,
+ // so we will never fulfill the requirement for returning true.
This is only
+ // possible because we know that a sequence in Group can
contain at most
+ // one sequence in OtherGroup.
+ if (Group.Sequences.size() < OtherGroup.Sequences.size())
+ return false;
+
+ for (StmtSequence &Stmt : Group.Sequences) {
+ if (!containsAnyInGroup(Stmt, OtherGroup))
+ return false;
+ }
+ return true;
+}
+} // end anonymous namespace
+
+void CloneDetector::findClones(std::vector<CloneGroup> &Result,
+ unsigned MinGroupComplexity) {
+ // Add every valid clone group that fulfills the complexity
requirement.
+ for (const CloneGroup &Group : CloneGroups) {
+ if (Group.isValid() && Group.Complexity >= MinGroupComplexity) {
+ Result.push_back(Group);
+ }
+ }
+
+ std::vector<unsigned> IndexesToRemove;
+
+ // Compare every group in the result with the rest. If one
groups contains
+ // another group, we only need to return the bigger group.
+ // Note: This doesn't scale well, so if possible avoid calling
any heavy
+ // function from this loop to minimize the performance impact.
+ for (unsigned i = 0; i < Result.size(); ++i) {
+ for (unsigned j = 0; j < Result.size(); ++j) {
+ // Don't compare a group with itself.
+ if (i == j)
+ continue;
+
+ if (containsGroup(Result[j], Result[i])) {
+ IndexesToRemove.push_back(i);
+ break;
+ }
+ }
+ }
+
+ // Erasing a list of indexes from the vector should be done
with decreasing
+ // indexes. As IndexesToRemove is constructed with increasing
values, we just
+ // reverse iterate over it to get the desired order.
+ for (auto I = IndexesToRemove.rbegin(); I !=
IndexesToRemove.rend(); ++I) {
+ Result.erase(Result.begin() + *I);
+ }
+}
Modified: cfe/trunk/lib/StaticAnalyzer/Checkers/CMakeLists.txt
URL:
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Checkers/CMakeLists.txt?rev=276782&r1=276781&r2=276782&view=diff
==============================================================================
--- cfe/trunk/lib/StaticAnalyzer/Checkers/CMakeLists.txt (original)
+++ cfe/trunk/lib/StaticAnalyzer/Checkers/CMakeLists.txt Tue Jul
26 13:13:12 2016
@@ -22,6 +22,7 @@ add_clang_library(clangStaticAnalyzerChe
CheckerDocumentation.cpp
ChrootChecker.cpp
ClangCheckers.cpp
+ CloneChecker.cpp
CXXSelfAssignmentChecker.cpp
DeadStoresChecker.cpp
DebugCheckers.cpp
Added: cfe/trunk/lib/StaticAnalyzer/Checkers/CloneChecker.cpp
URL:
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Checkers/CloneChecker.cpp?rev=276782&view=auto
==============================================================================
--- cfe/trunk/lib/StaticAnalyzer/Checkers/CloneChecker.cpp (added)
+++ cfe/trunk/lib/StaticAnalyzer/Checkers/CloneChecker.cpp Tue Jul
26 13:13:12 2016
@@ -0,0 +1,96 @@
+//===--- CloneChecker.cpp - Clone detection checker
-------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// CloneChecker is a checker that reports clones in the current
translation
+/// unit.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ClangSACheckers.h"
+#include "clang/Analysis/CloneDetection.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+class CloneChecker
+ : public Checker<check::ASTCodeBody,
check::EndOfTranslationUnit> {
+ mutable CloneDetector CloneDetector;
+
+public:
+ void checkASTCodeBody(const Decl *D, AnalysisManager &Mgr,
+ BugReporter &BR) const;
+
+ void checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr,
BugReporter &BR) const;
+};
+} // end anonymous namespace
+
+void CloneChecker::checkASTCodeBody(const Decl *D,
AnalysisManager &Mgr,
+ BugReporter &BR) const {
+ // Every statement that should be included in the search for
clones needs to
+ // be passed to the CloneDetector.
+ CloneDetector.analyzeCodeBody(D);
+}
+
+void CloneChecker::checkEndOfTranslationUnit(const
TranslationUnitDecl *TU,
+ AnalysisManager &Mgr,
+ BugReporter &BR) const {
+ // At this point, every statement in the translation unit has
been analyzed by
+ // the CloneDetector. The only thing left to do is to report
the found clones.
+
+ int MinComplexity = Mgr.getAnalyzerOptions().getOptionAsInteger(
+ "MinimumCloneComplexity", 10, this);
+
+ assert(MinComplexity >= 0);
+
+ SourceManager &SM = BR.getSourceManager();
+
+ std::vector<CloneDetector::CloneGroup> CloneGroups;
+ CloneDetector.findClones(CloneGroups, MinComplexity);
+
+ DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic();
+
+ unsigned WarnID =
DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning,
+ "Detected code
clone.");
+
+ unsigned NoteID =
DiagEngine.getCustomDiagID(DiagnosticsEngine::Note,
+ "Related code
clone is here.");
+
+ for (CloneDetector::CloneGroup &Group : CloneGroups) {
+ // For readability reasons we sort the clones by line numbers.
+ std::sort(Group.Sequences.begin(), Group.Sequences.end(),
+ [&SM](const StmtSequence &LHS, const StmtSequence
&RHS) {
+ return
SM.isBeforeInTranslationUnit(LHS.getStartLoc(),
+ RHS.getStartLoc()) &&
+ SM.isBeforeInTranslationUnit(LHS.getEndLoc(),
+ RHS.getEndLoc());
+ });
+
+ // We group the clones by printing the first as a warning and
all others
+ // as a note.
+ DiagEngine.Report(Group.Sequences.front().getStartLoc(), WarnID);
+ for (unsigned i = 1; i < Group.Sequences.size(); ++i) {
+ DiagEngine.Report(Group.Sequences[i].getStartLoc(), NoteID);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Register CloneChecker
+//===----------------------------------------------------------------------===//
+
+void ento::registerCloneChecker(CheckerManager &Mgr) {
+ Mgr.registerChecker<CloneChecker>();
+}
Added: cfe/trunk/test/Analysis/copypaste/blocks.cpp
URL:
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/copypaste/blocks.cpp?rev=276782&view=auto
==============================================================================
--- cfe/trunk/test/Analysis/copypaste/blocks.cpp (added)
+++ cfe/trunk/test/Analysis/copypaste/blocks.cpp Tue Jul 26
13:13:12 2016
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -analyze -fblocks -std=c++11
-analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This tests if we search for clones in blocks.
+
+void log();
+
+auto BlockA = ^(int a, int b){ // expected-warning{{Detected code
clone.}}
+ log();
+ if (a > b)
+ return a;
+ return b;
+};
+
+auto BlockB = ^(int a, int b){ // expected-note{{Related code
clone is here.}}
+ log();
+ if (a > b)
+ return a;
+ return b;
+};
Added: cfe/trunk/test/Analysis/copypaste/false-positives.cpp
URL:
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/copypaste/false-positives.cpp?rev=276782&view=auto
==============================================================================
--- cfe/trunk/test/Analysis/copypaste/false-positives.cpp (added)
+++ cfe/trunk/test/Analysis/copypaste/false-positives.cpp Tue Jul
26 13:13:12 2016
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -analyze -std=c++11
-analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This test contains false-positive reports from the
CloneChecker that need to
+// be fixed.
+
+void log();
+
+int max(int a, int b) { // expected-warning{{Detected code clone.}}
+ log();
+ if (a > b)
+ return a;
+ return b;
+}
+
+// FIXME: Detect different binary operator kinds.
+int min1(int a, int b) { // expected-note{{Related code clone is
here.}}
+ log();
+ if (a < b)
+ return a;
+ return b;
+}
+
+// FIXME: Detect different variable patterns.
+int min2(int a, int b) { // expected-note{{Related code clone is
here.}}
+ log();
+ if (b > a)
+ return a;
+ return b;
+}
Added: cfe/trunk/test/Analysis/copypaste/functions.cpp
URL:
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/copypaste/functions.cpp?rev=276782&view=auto
==============================================================================
--- cfe/trunk/test/Analysis/copypaste/functions.cpp (added)
+++ cfe/trunk/test/Analysis/copypaste/functions.cpp Tue Jul 26
13:13:12 2016
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -analyze -std=c++11
-analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This tests if we search for clones in functions.
+
+void log();
+
+int max(int a, int b) { // expected-warning{{Detected code clone.}}
+ log();
+ if (a > b)
+ return a;
+ return b;
+}
+
+int maxClone(int x, int y) { // expected-note{{Related code clone
is here.}}
+ log();
+ if (x > y)
+ return x;
+ return y;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) { // no-warning
+ return a + b;
+}
Added: cfe/trunk/test/Analysis/copypaste/objc-methods.m
URL:
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/copypaste/objc-methods.m?rev=276782&view=auto
==============================================================================
--- cfe/trunk/test/Analysis/copypaste/objc-methods.m (added)
+++ cfe/trunk/test/Analysis/copypaste/objc-methods.m Tue Jul 26
13:13:12 2016
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -analyze -Wno-objc-root-class
-analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This tests if we search for clones in Objective-C methods.
+
+@interface A
+- (int) setOk : (int) a : (int) b;
+@end
+
+@implementation A
+- (int) setOk : (int) a : (int) b { //
expected-warning{{Detected code clone.}}
+ if (a > b)
+ return a;
+ return b;
+}
+@end
+
+@interface B
+- (int) setOk : (int) a : (int) b;
+@end
+
+@implementation B
+- (int) setOk : (int) a : (int) b { // expected-note{{Related
code clone is here.}}
+ if (a > b)
+ return a;
+ return b;
+}
+@end
Added: cfe/trunk/test/Analysis/copypaste/sub-sequences.cpp
URL:
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/copypaste/sub-sequences.cpp?rev=276782&view=auto
==============================================================================
--- cfe/trunk/test/Analysis/copypaste/sub-sequences.cpp (added)
+++ cfe/trunk/test/Analysis/copypaste/sub-sequences.cpp Tue Jul 26
13:13:12 2016
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -analyze -std=c++11
-analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This tests if sub-sequences can match with normal sequences.
+
+void log2(int a);
+void log();
+
+int max(int a, int b) {
+ log2(a);
+ log(); // expected-warning{{Detected code clone.}}
+ if (a > b)
+ return a;
+ return b;
+}
+
+int maxClone(int a, int b) {
+ log(); // expected-note{{Related code clone is here.}}
+ if (a > b)
+ return a;
+ return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) { // no-warning
+ return a + b;
+}
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org <mailto:cfe-commits@lists.llvm.org>
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
--
Mike
Sent from phone