Hi chandlerc,
http://llvm-reviews.chandlerc.com/D30
CHANGE SINCE LAST DIFF
http://llvm-reviews.chandlerc.com/D30?vs=75&id=113#toc
Files:
include/clang/Tooling/CompilationDatabase.h
include/clang/Tooling/JSONCompilationDatabase.h
lib/Tooling/CompilationDatabase.cpp
lib/Tooling/JSONCompilationDatabase.cpp
unittests/Tooling/CompilationDatabaseTest.cpp
Index: include/clang/Tooling/CompilationDatabase.h
===================================================================
--- include/clang/Tooling/CompilationDatabase.h
+++ include/clang/Tooling/CompilationDatabase.h
@@ -31,8 +31,12 @@
#include "clang/Basic/LLVM.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/PathV2.h"
+
#include <string>
#include <vector>
@@ -186,6 +190,105 @@
std::vector<CompileCommand> CompileCommands;
};
+struct PathComparator {
+ virtual bool equivalent(const Twine &FileA, const Twine &FileB) const {
+ return FileA.str() == FileB.str() ||
+ llvm::sys::fs::equivalent(FileA, FileB);
+ }
+};
+
+
+/// \brief A trie to efficiently match against the entries of the compilation
+/// database in order of matching suffix length.
+///
+/// When a clang tool is supposed to operate on a specific file, we have to
+/// find the corresponding file in the compilation database. Although entries
+/// in the compilation database are keyed by filename, a simple string match
+/// is insufficient because of symlinks. Commonly, a project hierarchy looks
+/// like this:
+/// /<project-root>/src/<path>/<somefile>.cc (used as input for the tool)
+/// /<project-root>/build/<symlink-to-src>/<path>/<somefile>.cc (stored in DB)
+///
+/// Furthermore, there might be symlinks inside the source folder or inside the
+/// database, so that the same source file is translated with different build
+/// options.
+///
+/// For a given input file, the \c FileNameMatchTrie finds its entries in order
+/// of matching suffix length. For each suffix length, there might be one or
+/// more entries in the database. For each of those entries, it calls
+/// \c llvm::sys::fs::equivalent() (injected as \c PathComparator). There might
+/// be zero or more entries with the same matching suffix length that are
+/// equivalent to the input file. Three cases are distinguished:
+/// 0 equivalent files: Continue with the next suffix length.
+/// 1 equivalent file: Best match found, return it.
+/// >1 equivalent files: Match is ambiguous, return error.
+///
+/// Each node has storage for up to one path and a map mapping a path segment to
+/// child nodes. The trie starts with an empty root node. An insert of a path
+/// 'p'starts at the root node and does the following:
+/// - If the node is empty, insert 'p' into its storage and abort.
+/// - If the node has a path 'p2' but no children, take the last path segment
+/// 's' of 'p2', put a new child into the map at 's' an insert the rest of
+/// 'p2' there.
+/// - Insert a new child for the last segment of 'p' and insert the rest of 'p'
+/// there.
+///
+/// To find the best matching node for a given path 'p', the \c findEquivalent()
+/// function is called recursively for each path segment (back to fron) of 'p'
+/// until a node 'n' is reached that does not ..
+/// - .. have children. In this case it is checked
+/// whether the stored path is equivalent to 'p'. If yes, the best match is
+/// found. Otherwise continue with the parent node as if this node did not
+/// exist.
+/// - .. a child matching the next path segment. In this case, all children of
+/// 'n' are an equally good match for 'p'. All children are of 'n' are found
+/// recursively and their equivalence to 'p' is determined. If none are
+/// equivalent, continue with the parent node as if 'n' didn't exist. If one
+/// is equivalent, the best match is found. Otherwise, report and ambigiuity
+/// error.
+///
+/// An insert operation is linear in the number of a path's segments. The
+/// \c findEquivalent() operation can will in worst case (when the Trie does not
+/// contain the input path) compare each store path against the input path.
+class FileNameMatchTrie {
+public:
+ /// \brief Inserts 'NewPath' into this trie.
+ void insert(StringRef NewPath, unsigned ConsumedLength = 0);
+
+ /// \brief Finds the corresponding file in this trie.
+ ///
+ /// Returns file name stored in this trie that is equivalent to 'FileName'
+ /// according to 'Comparator', if it can be uniquely identified. If there
+ /// are no matches an empty StringRef is return. If there are ambigious
+ /// matches, an empty StringRef is return and a corresponding message written
+ /// to 'Error'.
+ StringRef findEquivalent(StringRef FileName,
+ llvm::raw_ostream &Error) const;
+
+private:
+ friend class FileNameMatchTrieTest;
+
+ /// Internal version of \c findEquivalent(). If multiple paths fit 'FileName'
+ /// equally well, \c IsAmbiguous is set to \c true. \c ConsumedLength denotes
+ /// the number of \c Filename's trailing characters already consumed during
+ /// recursion.
+ StringRef findEquivalent(const PathComparator &Comparator,
+ StringRef FileName,
+ bool &IsAmbiguous,
+ unsigned ConsumedLength) const;
+
+ /// \brief Gets all paths in this FileNameMatchTrie.
+ void getAll(std::vector<StringRef> &Results,
+ llvm::StringMap<FileNameMatchTrie>::const_iterator Except) const;
+
+ // The stored absolute path in this node. Only valid for leaf nodes, i.e.
+ // nodes where Children.empty().
+ std::string Path;
+
+ // The children of this node stored in a map based on the next path segment.
+ llvm::StringMap<FileNameMatchTrie> Children;
+};
+
} // end namespace tooling
} // end namespace clang
Index: include/clang/Tooling/JSONCompilationDatabase.h
===================================================================
--- include/clang/Tooling/JSONCompilationDatabase.h
+++ include/clang/Tooling/JSONCompilationDatabase.h
@@ -92,6 +92,8 @@
// Maps file paths to the compile command lines for that file.
llvm::StringMap< std::vector<CompileCommandRef> > IndexByFile;
+
+ FileNameMatchTrie MatchTrie;
llvm::OwningPtr<llvm::MemoryBuffer> Database;
llvm::SourceMgr SM;
Index: lib/Tooling/CompilationDatabase.cpp
===================================================================
--- lib/Tooling/CompilationDatabase.cpp
+++ lib/Tooling/CompilationDatabase.cpp
@@ -132,5 +132,94 @@
extern volatile int JSONAnchorSource;
static int JSONAnchorDest = JSONAnchorSource;
+void FileNameMatchTrie::insert(StringRef NewPath, unsigned ConsumedLength) {
+ if (llvm::sys::path::is_relative(NewPath))
+ return;
+ if (Path.empty()) {
+ // This is an empty leaf. Store NewPath and return.
+ Path = NewPath;
+ return;
+ }
+ if (Children.empty()) {
+ // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'.
+ if (NewPath == Path)
+ return;
+ // Make this a node and create a child-leaf with 'Path'.
+ StringRef Element(llvm::sys::path::filename(
+ StringRef(Path).drop_back(ConsumedLength)));
+ Children[Element].Path = Path;
+ }
+ StringRef Element(llvm::sys::path::filename(
+ StringRef(NewPath).drop_back(ConsumedLength)));
+ Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1);
+}
+
+StringRef FileNameMatchTrie::findEquivalent(StringRef FileName,
+ llvm::raw_ostream &Error) const {
+ if (llvm::sys::path::is_relative(FileName)) {
+ Error << "Cannot resolve relative paths";
+ return StringRef();
+ }
+ bool IsAmbiguous = false;
+ PathComparator PathComp;
+ StringRef Result = findEquivalent(PathComp, FileName, IsAmbiguous, 0);
+ if (IsAmbiguous)
+ Error << "Path is ambiguous";
+ return Result;
+}
+
+StringRef FileNameMatchTrie::findEquivalent(const PathComparator &Comparator,
+ StringRef FileName,
+ bool &IsAmbiguous,
+ unsigned ConsumedLength) const {
+ if (Children.empty()) {
+ if (Comparator.equivalent(StringRef(Path), FileName))
+ return StringRef(Path);
+ return StringRef();
+ }
+ StringRef Element(llvm::sys::path::filename(FileName.drop_back(
+ ConsumedLength)));
+ llvm::StringMap<FileNameMatchTrie>::const_iterator MatchingChild =
+ Children.find(Element);
+ if (MatchingChild != Children.end()) {
+ StringRef Result = MatchingChild->getValue().findEquivalent(
+ Comparator, FileName, IsAmbiguous, ConsumedLength + Element.size() + 1);
+ if (!Result.empty() || IsAmbiguous)
+ return Result;
+ }
+ std::vector<StringRef> AllChildren;
+ getAll(AllChildren, MatchingChild);
+ StringRef Result;
+ for (unsigned i = 0; i < AllChildren.size(); i++) {
+ if (Comparator.equivalent(AllChildren[i], FileName)) {
+ if (Result.empty()) {
+ Result = AllChildren[i];
+ } else {
+ IsAmbiguous = true;
+ return StringRef();
+ }
+ }
+ }
+ return Result;
+}
+
+void FileNameMatchTrie::getAll(
+ std::vector<StringRef> &Results,
+ llvm::StringMap<FileNameMatchTrie>::const_iterator Except) const {
+ if (Path.empty())
+ return;
+ if (Children.empty()) {
+ Results.push_back(StringRef(Path));
+ return;
+ }
+ for (llvm::StringMap<FileNameMatchTrie>::const_iterator
+ It = Children.begin(), E = Children.end();
+ It != E; ++It) {
+ if (It == Except)
+ continue;
+ It->getValue().getAll(Results, Children.end());
+ }
+}
+
} // end namespace tooling
} // end namespace clang
Index: lib/Tooling/JSONCompilationDatabase.cpp
===================================================================
--- lib/Tooling/JSONCompilationDatabase.cpp
+++ lib/Tooling/JSONCompilationDatabase.cpp
@@ -164,8 +164,18 @@
JSONCompilationDatabase::getCompileCommands(StringRef FilePath) const {
llvm::SmallString<128> NativeFilePath;
llvm::sys::path::native(FilePath, NativeFilePath);
+ std::vector<StringRef> PossibleMatches;
+ std::string Error;
+ llvm::raw_string_ostream ES(Error);
+ StringRef Match = MatchTrie.findEquivalent(NativeFilePath.str(), ES);
+ if (Match.empty()) {
+ if (Error.empty())
+ Error = "No match found.";
+ llvm::outs() << Error << "\n";
+ return std::vector<CompileCommand>();
+ }
llvm::StringMap< std::vector<CompileCommandRef> >::const_iterator
- CommandsRefI = IndexByFile.find(NativeFilePath);
+ CommandsRefI = IndexByFile.find(Match);
if (CommandsRefI == IndexByFile.end())
return std::vector<CompileCommand>();
const std::vector<CompileCommandRef> &CommandsRef = CommandsRefI->getValue();
@@ -271,10 +281,20 @@
return false;
}
llvm::SmallString<8> FileStorage;
+ StringRef FileName = File->getValue(FileStorage);
llvm::SmallString<128> NativeFilePath;
- llvm::sys::path::native(File->getValue(FileStorage), NativeFilePath);
+ if (llvm::sys::path::is_relative(FileName)) {
+ llvm::SmallString<8> DirectoryStorage;
+ llvm::SmallString<128> AbsolutePath(
+ Directory->getValue(DirectoryStorage));
+ llvm::sys::path::append(AbsolutePath, FileName);
+ llvm::sys::path::native(AbsolutePath.str(), NativeFilePath);
+ } else {
+ llvm::sys::path::native(FileName, NativeFilePath);
+ }
IndexByFile[NativeFilePath].push_back(
- CompileCommandRef(Directory, Command));
+ CompileCommandRef(Directory, Command));
+ MatchTrie.insert(NativeFilePath.str());
}
return true;
}
Index: unittests/Tooling/CompilationDatabaseTest.cpp
===================================================================
--- unittests/Tooling/CompilationDatabaseTest.cpp
+++ unittests/Tooling/CompilationDatabaseTest.cpp
@@ -55,13 +55,13 @@
getAllFiles("[]", ErrorMessage)) << ErrorMessage;
std::vector<std::string> expected_files;
- expected_files.push_back("file1");
- expected_files.push_back("file2");
+ expected_files.push_back("/dir/file1");
+ expected_files.push_back("/dir/file2");
EXPECT_EQ(expected_files, getAllFiles(
- "[{\"directory\":\"dir\","
+ "[{\"directory\":\"/dir\","
"\"command\":\"command\","
"\"file\":\"file1\"},"
- " {\"directory\":\"dir\","
+ " {\"directory\":\"/dir\","
"\"command\":\"command\","
"\"file\":\"file2\"}]",
ErrorMessage)) << ErrorMessage;
@@ -81,6 +81,83 @@
return Commands[0];
}
+struct FakeComparator : public PathComparator {
+ virtual bool equivalent(const Twine &FileA, const Twine &FileB) const {
+ return StringRef(FileA.str()).equals_lower(FileB.str());
+ }
+};
+
+class FileNameMatchTrieTest : public ::testing::Test {
+protected:
+ StringRef find(StringRef Path) {
+ IsAmbiguous = false;
+ return Trie.findEquivalent(Comparator, Path, IsAmbiguous, 0);
+ }
+
+ FileNameMatchTrie Trie;
+ FakeComparator Comparator;
+ bool IsAmbiguous;
+};
+
+TEST_F(FileNameMatchTrieTest, InsertingRelativePath) {
+ Trie.insert("/path/file.cc");
+ Trie.insert("file.cc");
+ EXPECT_EQ("/path/file.cc", find("/path/file.cc"));
+ EXPECT_FALSE(IsAmbiguous);
+}
+
+TEST_F(FileNameMatchTrieTest, MatchingRelativePath) {
+ EXPECT_EQ("", find("file.cc"));
+ EXPECT_FALSE(IsAmbiguous);
+}
+
+TEST_F(FileNameMatchTrieTest, ReturnsBestResults) {
+ Trie.insert("/d/c/b.cc");
+ Trie.insert("/d/b/b.cc");
+ EXPECT_EQ("/d/b/b.cc", find("/d/b/b.cc"));
+ EXPECT_FALSE(IsAmbiguous);
+}
+
+TEST_F(FileNameMatchTrieTest, HandlesSymlinks) {
+ Trie.insert("/AA/file.cc");
+ EXPECT_EQ("/AA/file.cc", find("/aa/file.cc"));
+ EXPECT_FALSE(IsAmbiguous);
+}
+
+TEST_F(FileNameMatchTrieTest, ReportsSymlinkAmbiguity) {
+ Trie.insert("/Aa/file.cc");
+ Trie.insert("/aA/file.cc");
+ EXPECT_TRUE(find("/aa/file.cc").empty());
+ EXPECT_TRUE(IsAmbiguous);
+}
+
+TEST_F(FileNameMatchTrieTest, LongerMatchingSuffixPreferred) {
+ Trie.insert("/src/Aa/file.cc");
+ Trie.insert("/src/aA/file.cc");
+ Trie.insert("/SRC/aa/file.cc");
+ EXPECT_EQ("/SRC/aa/file.cc", find("/src/aa/file.cc"));
+ EXPECT_FALSE(IsAmbiguous);
+}
+
+TEST_F(FileNameMatchTrieTest, EmptyTrie) {
+ EXPECT_TRUE(find("/some/path").empty());
+ EXPECT_FALSE(IsAmbiguous);
+}
+
+TEST_F(FileNameMatchTrieTest, NoResult) {
+ Trie.insert("/somepath/otherfile.cc");
+ Trie.insert("/otherpath/somefile.cc");
+ EXPECT_EQ("", find("/somepath/somefile.cc"));
+ EXPECT_FALSE(IsAmbiguous);
+}
+
+TEST_F(FileNameMatchTrieTest, RootElementDifferent) {
+ Trie.insert("/path/file.cc");
+ Trie.insert("/otherpath/file.cc");
+ EXPECT_EQ("/path/file.cc", find("/path/file.cc"));
+ EXPECT_FALSE(IsAmbiguous);
+}
+
TEST(findCompileArgsInJsonDatabase, FindsNothingIfEmpty) {
std::string ErrorMessage;
CompileCommand NotFound = findCompileArgsInJsonDatabase(
@@ -148,7 +225,7 @@
}
TEST(findCompileArgsInJsonDatabase, FindsEntry) {
- StringRef Directory("directory");
+ StringRef Directory("/directory");
StringRef FileName("file");
StringRef Command("command");
std::string JsonDatabase = "[";
@@ -162,19 +239,19 @@
JsonDatabase += "]";
std::string ErrorMessage;
CompileCommand FoundCommand = findCompileArgsInJsonDatabase(
- "file4", JsonDatabase, ErrorMessage);
- EXPECT_EQ("directory4", FoundCommand.Directory) << ErrorMessage;
+ "/directory4/file4", JsonDatabase, ErrorMessage);
+ EXPECT_EQ("/directory4", FoundCommand.Directory) << ErrorMessage;
ASSERT_EQ(1u, FoundCommand.CommandLine.size()) << ErrorMessage;
EXPECT_EQ("command4", FoundCommand.CommandLine[0]) << ErrorMessage;
}
static std::vector<std::string> unescapeJsonCommandLine(StringRef Command) {
std::string JsonDatabase =
- ("[{\"directory\":\"\", \"file\":\"test\", \"command\": \"" +
+ ("[{\"directory\":\"/root\", \"file\":\"test\", \"command\": \"" +
Command + "\"}]").str();
std::string ErrorMessage;
CompileCommand FoundCommand = findCompileArgsInJsonDatabase(
- "test", JsonDatabase, ErrorMessage);
+ "/root/test", JsonDatabase, ErrorMessage);
EXPECT_TRUE(ErrorMessage.empty()) << ErrorMessage;
return FoundCommand.CommandLine;
}
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits