Hi chandlerc,

This is a first version of my patch to properly support symlinks in the 
compilation database design, hopefully following the algorithm we discussed on 
IRC (see class comment of FileNameMatchTrie).

This is not polished yet (see FIXMEs), but I would like to get feedback before 
going (further) into the wrong direction.

http://llvm-reviews.chandlerc.com/D30

Files:
  include/clang/Tooling/CompilationDatabase.h
  include/clang/Tooling/JSONCompilationDatabase.h
  lib/Tooling/CompilationDatabase.cpp
  lib/Tooling/JSONCompilationDatabase.cpp
  unittests/Tooling/CompilationDatabaseTest.cpp
Index: include/clang/Tooling/CompilationDatabase.h
===================================================================
--- include/clang/Tooling/CompilationDatabase.h
+++ include/clang/Tooling/CompilationDatabase.h
@@ -31,8 +31,12 @@
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/PathV2.h"
+
 #include <string>
 #include <vector>
 
@@ -186,6 +190,66 @@
   std::vector<CompileCommand> CompileCommands;
 };
 
+struct PathComparator {
+  virtual bool equivalent(const Twine &FileA, const Twine &FileB) const {
+    return FileA.str() == FileB.str() ||
+        llvm::sys::fs::equivalent(FileA, FileB);
+  }
+};
+
+/// \brief A trie to efficiently match against the entries of the compilation
+/// database in order of matching suffix length.
+///
+/// When a clang tool is supposed to operate on a specific file, we have to
+/// find the corresponding file in the compilation database. Although entries
+/// in the compilation database are keyed by filename, a simple string match
+/// is insufficient because of symlinks. Commonly, a project hierarchy looks
+/// like this:
+///   /<project-root/src/<path>/<somefile>.cc      (used as input for the tool)
+///   /<project-root/build/<symlink-to-src>/<path>/<somefile>.cc (stored in DB)
+///
+/// Furthermore, there might be symlinks inside the source folder or inside the
+/// database, so that the same source file is translated with different build
+/// options.
+///
+/// For a given input file, the \c FileNameMatchTrie finds its entries in order
+/// of matching suffix length. For each suffix length, there might be one or
+/// more entries in the database. For each of those entries, it calls
+/// \c llvm::sys::fs::equivalent() (injected as \c PathComparator). There might
+/// be zero or more entries with the same matching suffix length that are
+/// equivalent to the input file. Three cases are distinguished:
+/// 0  equivalent files: Continue with the next suffix length.
+/// 1  equivalent file:  Best match found, return it.
+/// >1 equivalent files: Match is ambiguous, return error.
+class FileNameMatchTrie {
+public:
+  /// \brief Inserts 'NewPath' into this trie.
+  void insert(StringRef NewPath, unsigned ConsumedLength = 0);
+
+  /// \brief Finds the corresponding file in this trie.
+  ///
+  /// Returns file name stored in this trie that is equivalent to 'FileName'
+  /// according to 'Comparator', if it can be uniquely identified. If there
+  /// are no matches an empty StringRef is return. If there are ambigious
+  /// matches, an empty StringRef is return and a corresponding message written
+  /// to 'Error'.
+  StringRef findEquivalent(const PathComparator &Comparator,
+                           StringRef FileName,
+                           std::string *Error,
+                           unsigned ConsumedLength = 0) const;
+
+  /// \brief Gets all paths in this FileNameMatchTrie.
+  void getAll(std::vector<StringRef> &Results) const;
+
+private:
+  // The stored absolute path in this node. Only valid for leaf nodes, i.e.
+  // nodes where Children.empty().
+  std::string Path;
+
+  // The children of this node stored in a map based on the next path segment.
+  llvm::StringMap<FileNameMatchTrie> Children;
+};
+
 } // end namespace tooling
 } // end namespace clang
 
Index: include/clang/Tooling/JSONCompilationDatabase.h
===================================================================
--- include/clang/Tooling/JSONCompilationDatabase.h
+++ include/clang/Tooling/JSONCompilationDatabase.h
@@ -92,6 +92,8 @@
 
   // Maps file paths to the compile command lines for that file.
   llvm::StringMap< std::vector<CompileCommandRef> > IndexByFile;
+  
+  FileNameMatchTrie MatchTrie;
 
   llvm::OwningPtr<llvm::MemoryBuffer> Database;
   llvm::SourceMgr SM;
Index: lib/Tooling/CompilationDatabase.cpp
===================================================================
--- lib/Tooling/CompilationDatabase.cpp
+++ lib/Tooling/CompilationDatabase.cpp
@@ -132,5 +132,83 @@
 extern volatile int JSONAnchorSource;
 static int JSONAnchorDest = JSONAnchorSource;
 
+void FileNameMatchTrie::insert(StringRef NewPath, unsigned ConsumedLength) {
+  if (llvm::sys::path::is_relative(NewPath))
+    return;
+  if (Path.empty()) {
+    // This is an empty leaf. Store NewPath and return.
+    Path = NewPath;
+    return;
+  }
+  if (Children.empty()) {
+    // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'. 
+    if (NewPath == Path)
+        return;
+    // Make this a node and create a child-leaf with 'Path'.
+    StringRef Element(llvm::sys::path::filename(
+        StringRef(Path).drop_back(ConsumedLength)));
+    Children[Element].Path = Path;
+  }
+  StringRef Element(llvm::sys::path::filename(
+        StringRef(NewPath).drop_back(ConsumedLength)));
+  Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1);
+}
+
+StringRef FileNameMatchTrie::findEquivalent(const PathComparator &Comparator,
+                                            StringRef FileName,
+                                            std::string *Error,
+                                            unsigned ConsumedLength) const {
+  if (llvm::sys::path::is_relative(FileName)) {
+    *Error = "Cannot resolve relative paths";
+    return StringRef();
+  }
+  if (Children.empty()) {
+    if (Comparator.equivalent(StringRef(Path), FileName))
+      return StringRef(Path);
+    return StringRef();
+  }
+  StringRef Element(llvm::sys::path::filename(FileName.drop_back(
+      ConsumedLength)));
+  llvm::StringMap<FileNameMatchTrie>::const_iterator MatchingChild =
+      Children.find(Element);
+  if (MatchingChild != Children.end()) {
+    StringRef Result = MatchingChild->getValue().findEquivalent(
+        Comparator, FileName, Error, ConsumedLength + Element.size() + 1);
+    if (!Result.empty() || !Error->empty())
+      return Result;
+  }
+  // FIXME: This will re-examine 'MatchingChild' which does not have any
+  // matching children. Thus, the result is still correct, but performance
+  // can be improved.
+  std::vector<StringRef> AllChildren;
+  getAll(AllChildren);
+  StringRef Result;
+  for (unsigned i = 0; i < AllChildren.size(); i++) {
+    if (Comparator.equivalent(AllChildren[i], FileName)) {
+      if (Result.empty()) {
+        Result = AllChildren[i];
+      } else {
+        *Error = "Path is ambiguous";
+        return StringRef();
+      }
+    }
+  }
+  return Result;
+}
+
+void FileNameMatchTrie::getAll(std::vector<StringRef> &Results) const {
+  if (Path.empty())
+    return;
+  if (Children.empty()) {
+    Results.push_back(StringRef(Path));
+    return;
+  }
+  for (llvm::StringMap<FileNameMatchTrie>::const_iterator
+       It = Children.begin(), E = Children.end();
+       It != E; ++It) {
+    It->getValue().getAll(Results);
+  }
+}
+
 } // end namespace tooling
 } // end namespace clang
Index: lib/Tooling/JSONCompilationDatabase.cpp
===================================================================
--- lib/Tooling/JSONCompilationDatabase.cpp
+++ lib/Tooling/JSONCompilationDatabase.cpp
@@ -164,8 +164,19 @@
 JSONCompilationDatabase::getCompileCommands(StringRef FilePath) const {
   llvm::SmallString<128> NativeFilePath;
   llvm::sys::path::native(FilePath, NativeFilePath);
+  std::vector<StringRef> PossibleMatches;
+  PathComparator Comparator;
+  std::string Error;
+  StringRef Match = MatchTrie.findEquivalent(Comparator, NativeFilePath.str(),
+                                             &Error);
+  if (Match.empty()) {
+    if (Error.empty())
+      Error = "No match found.";
+    llvm::outs() << Error << "\n";
+    return std::vector<CompileCommand>();
+  }
   llvm::StringMap< std::vector<CompileCommandRef> >::const_iterator
-    CommandsRefI = IndexByFile.find(NativeFilePath);
+    CommandsRefI = IndexByFile.find(Match);
   if (CommandsRefI == IndexByFile.end())
     return std::vector<CompileCommand>();
   const std::vector<CompileCommandRef> &CommandsRef = CommandsRefI->getValue();
@@ -271,10 +282,19 @@
       return false;
     }
     llvm::SmallString<8> FileStorage;
+    StringRef FileName = File->getValue(FileStorage);
     llvm::SmallString<128> NativeFilePath;
-    llvm::sys::path::native(File->getValue(FileStorage), NativeFilePath);
+    if (llvm::sys::path::is_relative(FileName)) {
+      llvm::SmallString<8> DirectoryStorage;
+      llvm::SmallString<128> AbsolutePath(Directory->getValue(DirectoryStorage));
+      llvm::sys::path::append(AbsolutePath, FileName);
+      llvm::sys::path::native(AbsolutePath.str(), NativeFilePath);
+    } else {
+      llvm::sys::path::native(FileName, NativeFilePath);
+    }
     IndexByFile[NativeFilePath].push_back(
-      CompileCommandRef(Directory, Command));
+        CompileCommandRef(Directory, Command));
+    MatchTrie.insert(NativeFilePath.str());
   }
   return true;
 }
Index: unittests/Tooling/CompilationDatabaseTest.cpp
===================================================================
--- unittests/Tooling/CompilationDatabaseTest.cpp
+++ unittests/Tooling/CompilationDatabaseTest.cpp
@@ -55,13 +55,13 @@
             getAllFiles("[]", ErrorMessage)) << ErrorMessage;
 
   std::vector<std::string> expected_files;
-  expected_files.push_back("file1");
-  expected_files.push_back("file2");
+  expected_files.push_back("/dir/file1");
+  expected_files.push_back("/dir/file2");
   EXPECT_EQ(expected_files, getAllFiles(
-    "[{\"directory\":\"dir\","
+    "[{\"directory\":\"/dir\","
       "\"command\":\"command\","
       "\"file\":\"file1\"},"
-    " {\"directory\":\"dir\","
+    " {\"directory\":\"/dir\","
       "\"command\":\"command\","
       "\"file\":\"file2\"}]",
     ErrorMessage)) << ErrorMessage;
@@ -81,6 +81,82 @@
   return Commands[0];
 }
 
+struct FakeComparator : public PathComparator {
+  virtual bool equivalent(const Twine &FileA, const Twine &FileB) const {
+    return StringRef(FileA.str()).equals_lower(FileB.str());
+  }
+};
+
+class FileNameMatchTrieTest : public ::testing::Test {
+protected:
+  StringRef find(StringRef Path) {
+    return Trie.findEquivalent(Comparator, Path, &Error);
+  }
+
+  FileNameMatchTrie Trie;
+  FakeComparator Comparator;
+  std::string Error;
+};
+
+TEST_F(FileNameMatchTrieTest, InsertingRelativePath) {
+  Trie.insert("/path/file.cc");
+  Trie.insert("file.cc");
+  EXPECT_EQ("/path/file.cc", find("/path/file.cc"));
+  EXPECT_EQ("", Error);
+}
+
+TEST_F(FileNameMatchTrieTest, MatchingRelativePath) {
+  EXPECT_EQ("", find("file.cc"));
+  EXPECT_EQ("Cannot resolve relative paths", Error);
+}
+
+TEST_F(FileNameMatchTrieTest, ReturnsBestResults) {
+  Trie.insert("/d/c/b.cc");
+  Trie.insert("/d/b/b.cc");
+  EXPECT_EQ("/d/b/b.cc", find("/d/b/b.cc"));
+  EXPECT_EQ("", Error);
+}
+
+TEST_F(FileNameMatchTrieTest, HandlesSymlinks) {
+  Trie.insert("/AA/file.cc");
+  EXPECT_EQ("/AA/file.cc", find("/aa/file.cc"));
+  EXPECT_EQ("", Error);
+}
+
+TEST_F(FileNameMatchTrieTest, ReportsSymlinkAmbiguity) {
+  Trie.insert("/Aa/file.cc");
+  Trie.insert("/aA/file.cc");
+  EXPECT_TRUE(find("/aa/file.cc").empty());
+  EXPECT_EQ("Path is ambiguous", Error);
+}
+
+TEST_F(FileNameMatchTrieTest, LongerMatchingSuffixPreferred) {
+  Trie.insert("/src/Aa/file.cc");
+  Trie.insert("/src/aA/file.cc");
+  Trie.insert("/SRC/aa/file.cc");
+  EXPECT_EQ("/SRC/aa/file.cc", find("/src/aa/file.cc"));
+  EXPECT_EQ("", Error);
+}
+
+TEST_F(FileNameMatchTrieTest, EmptyTrie) {
+  EXPECT_TRUE(find("/some/path").empty());
+  EXPECT_EQ("", Error);
+}
+
+TEST_F(FileNameMatchTrieTest, NoResult) {
+  Trie.insert("/somepath/otherfile.cc");
+  Trie.insert("/otherpath/somefile.cc");
+  EXPECT_EQ("", find("/somepath/somefile.cc"));
+  EXPECT_EQ("", Error);
+}
+
+TEST_F(FileNameMatchTrieTest, RootElementDifferent) {
+  Trie.insert("/path/file.cc");
+  Trie.insert("/otherpath/file.cc");
+  EXPECT_EQ("/path/file.cc", find("/path/file.cc"));
+  EXPECT_EQ("", Error);
+}
+
 TEST(findCompileArgsInJsonDatabase, FindsNothingIfEmpty) {
   std::string ErrorMessage;
   CompileCommand NotFound = findCompileArgsInJsonDatabase(
@@ -148,7 +224,7 @@
 }
 
 TEST(findCompileArgsInJsonDatabase, FindsEntry) {
-  StringRef Directory("directory");
+  StringRef Directory("/directory");
   StringRef FileName("file");
   StringRef Command("command");
   std::string JsonDatabase = "[";
@@ -162,19 +238,19 @@
   JsonDatabase += "]";
   std::string ErrorMessage;
   CompileCommand FoundCommand = findCompileArgsInJsonDatabase(
-    "file4", JsonDatabase, ErrorMessage);
-  EXPECT_EQ("directory4", FoundCommand.Directory) << ErrorMessage;
+    "/directory4/file4", JsonDatabase, ErrorMessage);
+  EXPECT_EQ("/directory4", FoundCommand.Directory) << ErrorMessage;
   ASSERT_EQ(1u, FoundCommand.CommandLine.size()) << ErrorMessage;
   EXPECT_EQ("command4", FoundCommand.CommandLine[0]) << ErrorMessage;
 }
 
 static std::vector<std::string> unescapeJsonCommandLine(StringRef Command) {
   std::string JsonDatabase =
-    ("[{\"directory\":\"\", \"file\":\"test\", \"command\": \"" +
+    ("[{\"directory\":\"/root\", \"file\":\"test\", \"command\": \"" +
      Command + "\"}]").str();
   std::string ErrorMessage;
   CompileCommand FoundCommand = findCompileArgsInJsonDatabase(
-    "test", JsonDatabase, ErrorMessage);
+    "/root/test", JsonDatabase, ErrorMessage);
   EXPECT_TRUE(ErrorMessage.empty()) << ErrorMessage;
   return FoundCommand.CommandLine;
 }
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to