[PATCH] D45006: [Tooling] A CompilationDatabase wrapper that infers header commands.

2018-04-09 Thread Sam McCall via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
sammccall marked an inline comment as done.
Closed by commit rC329580: [Tooling] A CompilationDatabase wrapper that infers 
header commands. (authored by sammccall, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D45006?vs=141172&id=141658#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D45006

Files:
  include/clang/Tooling/CompilationDatabase.h
  lib/Tooling/CMakeLists.txt
  lib/Tooling/InterpolatingCompilationDatabase.cpp
  unittests/Tooling/CompilationDatabaseTest.cpp

Index: unittests/Tooling/CompilationDatabaseTest.cpp
===
--- unittests/Tooling/CompilationDatabaseTest.cpp
+++ unittests/Tooling/CompilationDatabaseTest.cpp
@@ -626,5 +626,115 @@
   EXPECT_EQ(2, Argc);
 }
 
+struct MemCDB : public CompilationDatabase {
+  using EntryMap = llvm::StringMap>;
+  EntryMap Entries;
+  MemCDB(const EntryMap &E) : Entries(E) {}
+
+  std::vector getCompileCommands(StringRef F) const override {
+auto Ret = Entries.lookup(F);
+return {Ret.begin(), Ret.end()};
+  }
+
+  std::vector getAllFiles() const override {
+std::vector Result;
+for (const auto &Entry : Entries)
+  Result.push_back(Entry.first());
+return Result;
+  }
+};
+
+class InterpolateTest : public ::testing::Test {
+protected:
+  // Adds an entry to the underlying compilation database.
+  // A flag is injected: -D , so the command used can be identified.
+  void add(llvm::StringRef File, llvm::StringRef Flags = "") {
+llvm::SmallVector Argv = {"clang", File, "-D", File};
+llvm::SplitString(Flags, Argv);
+llvm::SmallString<32> Dir;
+llvm::sys::path::system_temp_directory(false, Dir);
+Entries[path(File)].push_back(
+{Dir, path(File), {Argv.begin(), Argv.end()}, "foo.o"});
+  }
+
+  // Turn a unix path fragment (foo/bar.h) into a native path (C:\tmp\foo\bar.h)
+  std::string path(llvm::SmallString<32> File) {
+llvm::SmallString<32> Dir;
+llvm::sys::path::system_temp_directory(false, Dir);
+llvm::sys::path::native(File);
+llvm::SmallString<64> Result;
+llvm::sys::path::append(Result, Dir, File);
+return Result.str();
+  }
+
+  // Look up the command from a relative path, and return it in string form.
+  // The input file is not included in the returned command.
+  std::string getCommand(llvm::StringRef F) {
+auto Results =
+inferMissingCompileCommands(llvm::make_unique(Entries))
+->getCompileCommands(path(F));
+if (Results.empty())
+  return "none";
+// drop the input file argument, so tests don't have to deal with path().
+EXPECT_EQ(Results[0].CommandLine.back(), path(F))
+<< "Last arg should be the file";
+Results[0].CommandLine.pop_back();
+return llvm::join(Results[0].CommandLine, " ");
+  }
+
+  MemCDB::EntryMap Entries;
+};
+
+TEST_F(InterpolateTest, Nearby) {
+  add("dir/foo.cpp");
+  add("dir/bar.cpp");
+  add("an/other/foo.cpp");
+
+  // great: dir and name both match (prefix or full, case insensitive)
+  EXPECT_EQ(getCommand("dir/f.cpp"), "clang -D dir/foo.cpp");
+  EXPECT_EQ(getCommand("dir/FOO.cpp"), "clang -D dir/foo.cpp");
+  // no name match. prefer matching dir, break ties by alpha
+  EXPECT_EQ(getCommand("dir/a.cpp"), "clang -D dir/bar.cpp");
+  // an exact name match beats one segment of directory match
+  EXPECT_EQ(getCommand("some/other/bar.h"),
+"clang -D dir/bar.cpp -x c++-header");
+  // two segments of directory match beat a prefix name match
+  EXPECT_EQ(getCommand("an/other/b.cpp"), "clang -D an/other/foo.cpp");
+  // if nothing matches at all, we still get the closest alpha match
+  EXPECT_EQ(getCommand("below/some/obscure/path.cpp"),
+"clang -D an/other/foo.cpp");
+}
+
+TEST_F(InterpolateTest, Language) {
+  add("dir/foo.cpp", "-std=c++17");
+  add("dir/baz.cee", "-x c");
+
+  // .h is ambiguous, so we add explicit language flags
+  EXPECT_EQ(getCommand("foo.h"),
+"clang -D dir/foo.cpp -x c++-header -std c++17");
+  // and don't add -x if the inferred language is correct.
+  EXPECT_EQ(getCommand("foo.hpp"), "clang -D dir/foo.cpp -std c++17");
+  // respect -x if it's already there.
+  EXPECT_EQ(getCommand("baz.h"), "clang -D dir/baz.cee -x c-header");
+  // prefer a worse match with the right language
+  EXPECT_EQ(getCommand("foo.c"), "clang -D dir/baz.cee");
+  Entries.erase(path(StringRef("dir/baz.cee")));
+  // Now we transfer across languages, so drop -std too.
+  EXPECT_EQ(getCommand("foo.c"), "clang -D dir/foo.cpp");
+}
+
+TEST_F(InterpolateTest, Strip) {
+  add("dir/foo.cpp", "-o foo.o -Wall");
+  // the -o option and the input file are removed, but -Wall is preserved.
+  EXPECT_EQ(getCommand("dir/bar.cpp"), "clang -D dir/foo.cpp -Wall");
+}
+
+TEST_F(InterpolateTest, Case) {
+  add("FOO/BAR/BAZ/SHOUT.cc");
+  add("foo/bar/baz/quiet.cc");
+  // Case mismatches are completely ignored, so we choos

[PATCH] D45006: [Tooling] A CompilationDatabase wrapper that infers header commands.

2018-04-09 Thread Sam McCall via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL329580: [Tooling] A CompilationDatabase wrapper that infers 
header commands. (authored by sammccall, committed by ).
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D45006?vs=141172&id=141659#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D45006

Files:
  cfe/trunk/include/clang/Tooling/CompilationDatabase.h
  cfe/trunk/lib/Tooling/CMakeLists.txt
  cfe/trunk/lib/Tooling/InterpolatingCompilationDatabase.cpp
  cfe/trunk/unittests/Tooling/CompilationDatabaseTest.cpp

Index: cfe/trunk/include/clang/Tooling/CompilationDatabase.h
===
--- cfe/trunk/include/clang/Tooling/CompilationDatabase.h
+++ cfe/trunk/include/clang/Tooling/CompilationDatabase.h
@@ -213,6 +213,13 @@
   std::vector CompileCommands;
 };
 
+/// Returns a wrapped CompilationDatabase that defers to the provided one,
+/// but getCompileCommands() will infer commands for unknown files.
+/// The return value of getAllFiles() or getAllCompileCommands() is unchanged.
+/// See InterpolatingCompilationDatabase.cpp for details on heuristics.
+std::unique_ptr
+inferMissingCompileCommands(std::unique_ptr);
+
 } // namespace tooling
 } // namespace clang
 
Index: cfe/trunk/lib/Tooling/InterpolatingCompilationDatabase.cpp
===
--- cfe/trunk/lib/Tooling/InterpolatingCompilationDatabase.cpp
+++ cfe/trunk/lib/Tooling/InterpolatingCompilationDatabase.cpp
@@ -0,0 +1,458 @@
+//===- InterpolatingCompilationDatabase.cpp -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+// InterpolatingCompilationDatabase wraps another CompilationDatabase and
+// attempts to heuristically determine appropriate compile commands for files
+// that are not included, such as headers or newly created files.
+//
+// Motivating cases include:
+//   Header files that live next to their implementation files. These typically
+// share a base filename. (libclang/CXString.h, libclang/CXString.cpp).
+//   Some projects separate headers from includes. Filenames still typically
+// match, maybe other path segments too. (include/llvm/IR/Use.h, lib/IR/Use.cc).
+//   Matches are sometimes only approximate (Sema.h, SemaDecl.cpp). This goes
+// for directories too (Support/Unix/Process.inc, lib/Support/Process.cpp).
+//   Even if we can't find a "right" compile command, even a random one from
+// the project will tend to get important flags like -I and -x right.
+//
+// We "borrow" the compile command for the closest available file:
+//   - points are awarded if the filename matches (ignoring extension)
+//   - points are awarded if the directory structure matches
+//   - ties are broken by length of path prefix match
+//
+// The compile command is adjusted, replacing the filename and removing output
+// file arguments. The -x and -std flags may be affected too.
+//
+// Source language is a tricky issue: is it OK to use a .c file's command
+// for building a .cc file? What language is a .h file in?
+//   - We only consider compile commands for c-family languages as candidates.
+//   - For files whose language is implied by the filename (e.g. .m, .hpp)
+// we prefer candidates from the same language.
+// If we must cross languages, we drop any -x and -std flags.
+//   - For .h files, candidates from any c-family language are acceptable.
+// We use the candidate's language, inserting  e.g. -x c++-header.
+//
+// This class is only useful when wrapping databases that can enumerate all
+// their compile commands. If getAllFilenames() is empty, no inference occurs.
+//
+//===--===//
+
+#include "clang/Driver/Options.h"
+#include "clang/Driver/Types.h"
+#include "clang/Frontend/LangStandard.h"
+#include "clang/Tooling/CompilationDatabase.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/OptTable.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/raw_ostream.h"
+#include 
+
+namespace clang {
+namespace tooling {
+namespace {
+using namespace llvm;
+namespace types = clang::driver::types;
+namespace path = llvm::sys::path;
+
+// The length of the prefix these two strings have in common.
+size_t matchingPrefix(StringRef L, StringRef R) {
+  size_t Limit = std::min(L.size(), R.size());
+  for (size_t I = 0; I < Limit; ++I)
+if (L[I] != R[I])
+  return I;
+  return Limit;
+}
+
+// A comparator for searching Subst

[PATCH] D45006: [Tooling] A CompilationDatabase wrapper that infers header commands.

2018-04-09 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov accepted this revision.
ilya-biryukov added a comment.
This revision is now accepted and ready to land.

LGTM with a small nit.
Really excited about this landing!




Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:353
+  }
+  Best = Candidate.first;
+  BestPreferred = Preferred;

Maybe put these fields into `struct Candidate {}`?
The code would, arguably, be easier to read. Up to you.


Repository:
  rC Clang

https://reviews.llvm.org/D45006



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D45006: [Tooling] A CompilationDatabase wrapper that infers header commands.

2018-04-05 Thread Sam McCall via Phabricator via cfe-commits
sammccall updated this revision to Diff 141172.
sammccall added a comment.

Address cross-language issues in a more comprehensive way.
Prefers (potentially) same-language over cross-language, handles cross-language
flag transfers correctly using -x and handling (dropping) -std.


Repository:
  rC Clang

https://reviews.llvm.org/D45006

Files:
  include/clang/Tooling/CompilationDatabase.h
  lib/Tooling/CMakeLists.txt
  lib/Tooling/InterpolatingCompilationDatabase.cpp
  unittests/Tooling/CompilationDatabaseTest.cpp

Index: unittests/Tooling/CompilationDatabaseTest.cpp
===
--- unittests/Tooling/CompilationDatabaseTest.cpp
+++ unittests/Tooling/CompilationDatabaseTest.cpp
@@ -626,5 +626,115 @@
   EXPECT_EQ(2, Argc);
 }
 
+struct MemCDB : public CompilationDatabase {
+  using EntryMap = llvm::StringMap>;
+  EntryMap Entries;
+  MemCDB(const EntryMap &E) : Entries(E) {}
+
+  std::vector getCompileCommands(StringRef F) const override {
+auto Ret = Entries.lookup(F);
+return {Ret.begin(), Ret.end()};
+  }
+
+  std::vector getAllFiles() const override {
+std::vector Result;
+for (const auto &Entry : Entries)
+  Result.push_back(Entry.first());
+return Result;
+  }
+};
+
+class InterpolateTest : public ::testing::Test {
+protected:
+  // Adds an entry to the underlying compilation database.
+  // A flag is injected: -D , so the command used can be identified.
+  void add(llvm::StringRef File, llvm::StringRef Flags = "") {
+llvm::SmallVector Argv = {"clang", File, "-D", File};
+llvm::SplitString(Flags, Argv);
+llvm::SmallString<32> Dir;
+llvm::sys::path::system_temp_directory(false, Dir);
+Entries[path(File)].push_back(
+{Dir, path(File), {Argv.begin(), Argv.end()}, "foo.o"});
+  }
+
+  // Turn a unix path fragment (foo/bar.h) into a native path (C:\tmp\foo\bar.h)
+  std::string path(llvm::SmallString<32> File) {
+llvm::SmallString<32> Dir;
+llvm::sys::path::system_temp_directory(false, Dir);
+llvm::sys::path::native(File);
+llvm::SmallString<64> Result;
+llvm::sys::path::append(Result, Dir, File);
+return Result.str();
+  }
+
+  // Look up the command from a relative path, and return it in string form.
+  // The input file is not included in the returned command.
+  std::string getCommand(llvm::StringRef F) {
+auto Results =
+inferMissingCompileCommands(llvm::make_unique(Entries))
+->getCompileCommands(path(F));
+if (Results.empty())
+  return "none";
+// drop the input file argument, so tests don't have to deal with path().
+EXPECT_EQ(Results[0].CommandLine.back(), path(F))
+<< "Last arg should be the file";
+Results[0].CommandLine.pop_back();
+return llvm::join(Results[0].CommandLine, " ");
+  }
+
+  MemCDB::EntryMap Entries;
+};
+
+TEST_F(InterpolateTest, Nearby) {
+  add("dir/foo.cpp");
+  add("dir/bar.cpp");
+  add("an/other/foo.cpp");
+
+  // great: dir and name both match (prefix or full, case insensitive)
+  EXPECT_EQ(getCommand("dir/f.cpp"), "clang -D dir/foo.cpp");
+  EXPECT_EQ(getCommand("dir/FOO.cpp"), "clang -D dir/foo.cpp");
+  // no name match. prefer matching dir, break ties by alpha
+  EXPECT_EQ(getCommand("dir/a.cpp"), "clang -D dir/bar.cpp");
+  // an exact name match beats one segment of directory match
+  EXPECT_EQ(getCommand("some/other/bar.h"),
+"clang -D dir/bar.cpp -x c++-header");
+  // two segments of directory match beat a prefix name match
+  EXPECT_EQ(getCommand("an/other/b.cpp"), "clang -D an/other/foo.cpp");
+  // if nothing matches at all, we still get the closest alpha match
+  EXPECT_EQ(getCommand("below/some/obscure/path.cpp"),
+"clang -D an/other/foo.cpp");
+}
+
+TEST_F(InterpolateTest, Language) {
+  add("dir/foo.cpp", "-std=c++17");
+  add("dir/baz.cee", "-x c");
+
+  // .h is ambiguous, so we add explicit language flags
+  EXPECT_EQ(getCommand("foo.h"),
+"clang -D dir/foo.cpp -x c++-header -std c++17");
+  // and don't add -x if the inferred language is correct.
+  EXPECT_EQ(getCommand("foo.hpp"), "clang -D dir/foo.cpp -std c++17");
+  // respect -x if it's already there.
+  EXPECT_EQ(getCommand("baz.h"), "clang -D dir/baz.cee -x c-header");
+  // prefer a worse match with the right language
+  EXPECT_EQ(getCommand("foo.c"), "clang -D dir/baz.cee");
+  Entries.erase(path(StringRef("dir/baz.cee")));
+  // Now we transfer across languages, so drop -std too.
+  EXPECT_EQ(getCommand("foo.c"), "clang -D dir/foo.cpp");
+}
+
+TEST_F(InterpolateTest, Strip) {
+  add("dir/foo.cpp", "-o foo.o -Wall");
+  // the -o option and the input file are removed, but -Wall is preserved.
+  EXPECT_EQ(getCommand("dir/bar.cpp"), "clang -D dir/foo.cpp -Wall");
+}
+
+TEST_F(InterpolateTest, Case) {
+  add("FOO/BAR/BAZ/SHOUT.cc");
+  add("foo/bar/baz/quiet.cc");
+  // Case mismatches are completely ignored, so we choose the name match.
+  EXPECT_EQ(getCommand("foo/bar/baz/sho

[PATCH] D45006: [Tooling] A CompilationDatabase wrapper that infers header commands.

2018-04-04 Thread Sam McCall via Phabricator via cfe-commits
sammccall added inline comments.



Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:308
+   llvm::sys::path::extension(Filename).substr(1));
+  if (OldLang && NewLang != OldLang) {
+Base.CommandLine.push_back("-x");

ilya-biryukov wrote:
> It feels like this heuristics only works for headers and files without 
> extension (i.e. probably also headers).
> E.g., if we have a .cpp file and .c file, then trying to infer args for .c 
> file from .cpp file is probably the wrong thing to do. And using Fortran 
> flags for C or C++ is certainly the wrong thing to do.
> 
> It seems transferring flags between different languages is never fine, except 
> for C/C++ headers. WDYT?
Urgh, you're right, this is dubious. But I think your suggestion is too narrow:
 - transferring flags between *.m, *.mm, and *.h seems fine (mixed m and mm 
isn't that uncommon I think). -x should be set on the headers but not on the m 
or mm files.
 - transferring between *.c and *.cc doesn't seem always wrong. Many -W, -I, 
and -D flags are shared (aren't these the most important ones?). Clearly adding 
-x is bad in this case.
 - yeah, fortran... we should drop those, but I'd wait for a report. 
compile_commands is a clang "standard" afaik, so putting fortran there doesn't 
make sense unless the build system doesn't know about languages.
 - also if we hard-ban some candidates, we no longer have the guarantee that we 
can pick a best candidate, which adds complexity

So I'd suggest:
 - in addition to the "implied language changed" guard, only add -x to certain 
languages
 - maybe we should reward same-language somehow. This is tricky, because if 
there's a compile command for one header, it might be quite unusual. Also 
there'll be *lots* of matches. Not sure how to best do this.



Repository:
  rC Clang

https://reviews.llvm.org/D45006



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D45006: [Tooling] A CompilationDatabase wrapper that infers header commands.

2018-04-04 Thread Sam McCall via Phabricator via cfe-commits
sammccall updated this revision to Diff 140941.
sammccall marked 3 inline comments as done.
sammccall added a comment.

Address comments, except cross-language flag transfer which needs more thought.


Repository:
  rC Clang

https://reviews.llvm.org/D45006

Files:
  include/clang/Tooling/CompilationDatabase.h
  lib/Tooling/CMakeLists.txt
  lib/Tooling/InterpolatingCompilationDatabase.cpp
  unittests/Tooling/CompilationDatabaseTest.cpp

Index: unittests/Tooling/CompilationDatabaseTest.cpp
===
--- unittests/Tooling/CompilationDatabaseTest.cpp
+++ unittests/Tooling/CompilationDatabaseTest.cpp
@@ -626,5 +626,108 @@
   EXPECT_EQ(2, Argc);
 }
 
+struct MemCDB : public CompilationDatabase {
+  using EntryMap = llvm::StringMap>;
+  EntryMap Entries;
+  MemCDB(const EntryMap &E) : Entries(E) {}
+
+  std::vector getCompileCommands(StringRef F) const override {
+auto Ret = Entries.lookup(F);
+return {Ret.begin(), Ret.end()};
+  }
+
+  std::vector getAllFiles() const override {
+std::vector Result;
+for (const auto &Entry : Entries)
+  Result.push_back(Entry.first());
+return Result;
+  }
+};
+
+class InterpolateTest : public ::testing::Test {
+protected:
+  // Adds an entry to the underlying compilation database.
+  // A flag is injected: -D , so the command used can be identified.
+  void add(llvm::StringRef File, llvm::StringRef Flags = "") {
+llvm::SmallVector Argv = {"clang", File, "-D", File};
+llvm::SplitString(Flags, Argv);
+llvm::SmallString<32> Dir;
+llvm::sys::path::system_temp_directory(false, Dir);
+Entries[path(File)].push_back(
+{Dir, File, {Argv.begin(), Argv.end()}, "foo.o"});
+  }
+
+  // Turn a unix path fragment (foo/bar.h) into a native path (C:\tmp\foo\bar.h)
+  std::string path(llvm::SmallString<32> File) {
+llvm::SmallString<32> Dir;
+llvm::sys::path::system_temp_directory(false, Dir);
+llvm::sys::path::native(File);
+llvm::SmallString<64> Result;
+llvm::sys::path::append(Result, Dir, File);
+return Result.str();
+  }
+
+  // Look up the command from a relative path, and return it in string form.
+  // The input file is not included in the returned command.
+  std::string getCommand(llvm::StringRef F) {
+auto Results =
+inferMissingCompileCommands(llvm::make_unique(Entries))
+->getCompileCommands(path(F));
+if (Results.empty())
+  return "none";
+// drop the input file argument, so tests don't have to deal with path().
+EXPECT_EQ(Results[0].CommandLine.back(), path(F))
+<< "Last arg should be the file";
+Results[0].CommandLine.pop_back();
+return llvm::join(Results[0].CommandLine, " ");
+  }
+
+  MemCDB::EntryMap Entries;
+};
+
+TEST_F(InterpolateTest, Nearby) {
+  add("dir/foo.cpp");
+  add("dir/bar.cpp");
+  add("an/other/foo.cpp");
+
+  // great: dir and name both match (prefix or full, case insensitive)
+  EXPECT_EQ(getCommand("dir/f.cpp"), "clang -D dir/foo.cpp");
+  EXPECT_EQ(getCommand("dir/FOO.cpp"), "clang -D dir/foo.cpp");
+  // no name match. prefer matching dir, break ties by alpha
+  EXPECT_EQ(getCommand("dir/a.cpp"), "clang -D dir/bar.cpp");
+  // an exact name match beats one segment of directory match
+  EXPECT_EQ(getCommand("some/other/bar.h"), "clang -x c++ -D dir/bar.cpp");
+  // two segments of directory match beat a prefix name match
+  EXPECT_EQ(getCommand("an/other/b.cpp"), "clang -D an/other/foo.cpp");
+  // if nothing matches at all, we still get the closest alpha match
+  EXPECT_EQ(getCommand("below/some/obscure/path.cpp"),
+"clang -D an/other/foo.cpp");
+}
+
+TEST_F(InterpolateTest, Language) {
+  add("dir/foo.cpp");
+  add("dir/baz.cee", "-x c");
+
+  // extension changed, so we add explicit language flags
+  EXPECT_EQ(getCommand("foo.h"), "clang -x c++ -D dir/foo.cpp");
+  // but we don't add -x if it's already there
+  EXPECT_EQ(getCommand("baz.h"), "clang -D dir/baz.cee -x c");
+  // and don't add -x if the inferred language didn't change
+  EXPECT_EQ(getCommand("foo.cc"), "clang -D dir/foo.cpp");
+}
+
+TEST_F(InterpolateTest, Strip) {
+  add("dir/foo.cpp", "-o foo.o -Wall");
+  // the -o option and the input file are removed, but -Wall is preserved.
+  EXPECT_EQ(getCommand("dir/bar.cpp"), "clang -D dir/foo.cpp -Wall");
+}
+
+TEST_F(InterpolateTest, Case) {
+  add("FOO/BAR/BAZ/SHOUT.cc");
+  add("foo/bar/baz/quiet.cc");
+  // Case mismatches are completely ignored, so we choose the name match.
+  EXPECT_EQ(getCommand("foo/bar/baz/shout.C"), "clang -D FOO/BAR/BAZ/SHOUT.cc");
+}
+
 } // end namespace tooling
 } // end namespace clang
Index: lib/Tooling/InterpolatingCompilationDatabase.cpp
===
--- /dev/null
+++ lib/Tooling/InterpolatingCompilationDatabase.cpp
@@ -0,0 +1,333 @@
+//===- InterpolatingCompilationDatabase.cpp -*- C++ -*-===//
+//
+// The LLVM Compi

[PATCH] D45006: [Tooling] A CompilationDatabase wrapper that infers header commands.

2018-04-04 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov added inline comments.



Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:96
+// Sort input for determinism (index is used as a tiebreaker).
+llvm::sort(OriginalPaths.begin(), OriginalPaths.end());
+for (size_t I = 0; I < Filenames.size(); ++I) {

`OriginalPaths` is empty at this point, did we intend to sort `Filenames` 
instead?



Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:99
+  OriginalPaths.emplace_back(Strings.save(Filenames[I]));
+  StringRef Path = Strings.save(OriginalPaths.back().lower());
+  Paths.push_back({Path, I});

Given that we do an extra allocation when using `lower()` anyway, an extra copy 
into `Strings` is redundant.
Do we really need the arena at this point? It adds extra copies that we might 
not want.
It might give better memory locality, though, so it may be faster to use it 
overall, so up to you.



Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:224
+if (It == Paths.end())
+  return *--It;
+// Have to choose between It and It-1

Maybe add an assertion that `Idx` is non-empty?



Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:308
+   llvm::sys::path::extension(Filename).substr(1));
+  if (OldLang && NewLang != OldLang) {
+Base.CommandLine.push_back("-x");

It feels like this heuristics only works for headers and files without 
extension (i.e. probably also headers).
E.g., if we have a .cpp file and .c file, then trying to infer args for .c file 
from .cpp file is probably the wrong thing to do. And using Fortran flags for C 
or C++ is certainly the wrong thing to do.

It seems transferring flags between different languages is never fine, except 
for C/C++ headers. WDYT?


Repository:
  rC Clang

https://reviews.llvm.org/D45006



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D45006: [Tooling] A CompilationDatabase wrapper that infers header commands.

2018-04-03 Thread Sam McCall via Phabricator via cfe-commits
sammccall updated this revision to Diff 140802.
sammccall added a comment.

Add more tests, and only add -x when actually needed.


Repository:
  rC Clang

https://reviews.llvm.org/D45006

Files:
  include/clang/Tooling/CompilationDatabase.h
  lib/Tooling/CMakeLists.txt
  lib/Tooling/InterpolatingCompilationDatabase.cpp
  unittests/Tooling/CompilationDatabaseTest.cpp

Index: unittests/Tooling/CompilationDatabaseTest.cpp
===
--- unittests/Tooling/CompilationDatabaseTest.cpp
+++ unittests/Tooling/CompilationDatabaseTest.cpp
@@ -626,5 +626,108 @@
   EXPECT_EQ(2, Argc);
 }
 
+struct MemCDB : public CompilationDatabase {
+  using EntryMap = llvm::StringMap>;
+  EntryMap Entries;
+  MemCDB(const EntryMap &E) : Entries(E) {}
+
+  std::vector getCompileCommands(StringRef F) const override {
+auto Ret = Entries.lookup(F);
+return {Ret.begin(), Ret.end()};
+  }
+
+  std::vector getAllFiles() const override {
+std::vector Result;
+for (const auto &Entry : Entries)
+  Result.push_back(Entry.first());
+return Result;
+  }
+};
+
+class InterpolateTest : public ::testing::Test {
+protected:
+  // Adds an entry to the underlying compilation database.
+  // A flag is injected: -D , so the command used can be identified.
+  void add(llvm::StringRef File, llvm::StringRef Flags = "") {
+llvm::SmallVector Argv = {"clang", File, "-D", File};
+llvm::SplitString(Flags, Argv);
+llvm::SmallString<32> Dir;
+llvm::sys::path::system_temp_directory(false, Dir);
+Entries[path(File)].push_back(
+{Dir, File, {Argv.begin(), Argv.end()}, "foo.o"});
+  }
+
+  // Turn a unix path fragment (foo/bar.h) into a native path (C:\tmp\foo\bar.h)
+  std::string path(llvm::SmallString<32> File) {
+llvm::SmallString<32> Dir;
+llvm::sys::path::system_temp_directory(false, Dir);
+llvm::sys::path::native(File);
+llvm::SmallString<64> Result;
+llvm::sys::path::append(Result, Dir, File);
+return Result.str();
+  }
+
+  // Look up the command from a relative path, and return it in string form.
+  // The input file is not included in the returned command.
+  std::string getCommand(llvm::StringRef F) {
+auto Results =
+inferMissingCompileCommands(llvm::make_unique(Entries))
+->getCompileCommands(path(F));
+if (Results.empty())
+  return "none";
+// drop the input file argument, so tests don't have to deal with path().
+EXPECT_EQ(Results[0].CommandLine.back(), path(F))
+<< "Last arg should be the file";
+Results[0].CommandLine.pop_back();
+return llvm::join(Results[0].CommandLine, " ");
+  }
+
+  MemCDB::EntryMap Entries;
+};
+
+TEST_F(InterpolateTest, Nearby) {
+  add("dir/foo.cpp");
+  add("dir/bar.cpp");
+  add("an/other/foo.cpp");
+
+  // great: dir and name both match (prefix or full, case insensitive)
+  EXPECT_EQ(getCommand("dir/f.cpp"), "clang -D dir/foo.cpp");
+  EXPECT_EQ(getCommand("dir/FOO.cpp"), "clang -D dir/foo.cpp");
+  // no name match. prefer matching dir, break ties by alpha
+  EXPECT_EQ(getCommand("dir/a.cpp"), "clang -D dir/bar.cpp");
+  // an exact name match beats one segment of directory match
+  EXPECT_EQ(getCommand("some/other/bar.h"), "clang -x c++ -D dir/bar.cpp");
+  // two segments of directory match beat a prefix name match
+  EXPECT_EQ(getCommand("an/other/b.cpp"), "clang -D an/other/foo.cpp");
+  // if nothing matches at all, we still get the closest alpha match
+  EXPECT_EQ(getCommand("below/some/obscure/path.cpp"),
+"clang -D an/other/foo.cpp");
+}
+
+TEST_F(InterpolateTest, Language) {
+  add("dir/foo.cpp");
+  add("dir/baz.cee", "-x c");
+
+  // extension changed, so we add explicit language flags
+  EXPECT_EQ(getCommand("foo.h"), "clang -x c++ -D dir/foo.cpp");
+  // but we don't add -x if it's already there
+  EXPECT_EQ(getCommand("baz.h"), "clang -D dir/baz.cee -x c");
+  // and don't add -x if the inferred language didn't change
+  EXPECT_EQ(getCommand("foo.cc"), "clang -D dir/foo.cpp");
+}
+
+TEST_F(InterpolateTest, Strip) {
+  add("dir/foo.cpp", "-o foo.o -Wall");
+  // the -o option and the input file are removed, but -Wall is preserved.
+  EXPECT_EQ(getCommand("dir/bar.cpp"), "clang -D dir/foo.cpp -Wall");
+}
+
+TEST_F(InterpolateTest, Case) {
+  add("FOO/BAR/BAZ/SHOUT.cc");
+  add("foo/bar/baz/quiet.cc");
+  // Case mismatches are completely ignored, so we choose the name match.
+  EXPECT_EQ(getCommand("foo/bar/baz/shout.C"), "clang -D FOO/BAR/BAZ/SHOUT.cc");
+}
+
 } // end namespace tooling
 } // end namespace clang
Index: lib/Tooling/InterpolatingCompilationDatabase.cpp
===
--- /dev/null
+++ lib/Tooling/InterpolatingCompilationDatabase.cpp
@@ -0,0 +1,332 @@
+//===- InterpolatingCompilationDatabase.cpp -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the Universi

[PATCH] D45006: [Tooling] A CompilationDatabase wrapper that infers header commands.

2018-04-03 Thread Sam McCall via Phabricator via cfe-commits
sammccall updated this revision to Diff 140792.
sammccall added a comment.

clang-format


Repository:
  rC Clang

https://reviews.llvm.org/D45006

Files:
  include/clang/Tooling/CompilationDatabase.h
  lib/Tooling/CMakeLists.txt
  lib/Tooling/InterpolatingCompilationDatabase.cpp
  unittests/Tooling/CompilationDatabaseTest.cpp

Index: unittests/Tooling/CompilationDatabaseTest.cpp
===
--- unittests/Tooling/CompilationDatabaseTest.cpp
+++ unittests/Tooling/CompilationDatabaseTest.cpp
@@ -626,5 +626,99 @@
   EXPECT_EQ(2, Argc);
 }
 
+struct MemCDB : public CompilationDatabase {
+  using EntryMap = llvm::StringMap>;
+  EntryMap Entries;
+  MemCDB(const EntryMap &E) : Entries(E) {}
+
+  std::vector getCompileCommands(StringRef F) const override {
+auto Ret = Entries.lookup(F);
+return {Ret.begin(), Ret.end()};
+  }
+
+  std::vector getAllFiles() const override {
+std::vector Result;
+for (const auto &Entry : Entries)
+  Result.push_back(Entry.first());
+return Result;
+  }
+};
+
+class InterpolateTest : public ::testing::Test {
+protected:
+  // Adds an entry to the underlying compilation database.
+  // A flag is injected: -D , so the command used can be identified.
+  void add(llvm::StringRef File, llvm::StringRef Flags = "") {
+llvm::SmallVector Argv = {"clang", File, "-D", File};
+llvm::SplitString(Flags, Argv);
+llvm::SmallString<32> Dir;
+llvm::sys::path::system_temp_directory(false, Dir);
+Entries[path(File)].push_back(
+{Dir, File, {Argv.begin(), Argv.end()}, "foo.o"});
+  }
+
+  // Turn a unix path fragment (foo/bar.h) into a native path (C:\tmp\foo\bar.h)
+  std::string path(llvm::SmallString<32> File) {
+llvm::SmallString<32> Dir;
+llvm::sys::path::system_temp_directory(false, Dir);
+llvm::sys::path::native(File);
+llvm::SmallString<64> Result;
+llvm::sys::path::append(Result, Dir, File);
+return Result.str();
+  }
+
+  // Look up the command from a relative path, and return it in string form.
+  // The input file is not included in the returned command.
+  std::string getCommand(llvm::StringRef F) {
+auto Results =
+inferMissingCompileCommands(llvm::make_unique(Entries))
+->getCompileCommands(path(F));
+if (Results.empty())
+  return "none";
+// drop the input file argument, so tests don't have to deal with path().
+EXPECT_EQ(Results[0].CommandLine.back(), path(F))
+<< "Last arg should be the file";
+Results[0].CommandLine.pop_back();
+return llvm::join(Results[0].CommandLine, " ");
+  }
+
+  MemCDB::EntryMap Entries;
+};
+
+TEST_F(InterpolateTest, Nearby) {
+  add("dir/foo.cpp");
+  add("dir/bar.cpp");
+  add("an/other/foo.cpp");
+
+  // great: dir and name both match (prefix or full, case insensitive)
+  EXPECT_EQ(getCommand("dir/f.cpp"), "clang -D dir/foo.cpp");
+  EXPECT_EQ(getCommand("dir/FOO.cpp"), "clang -D dir/foo.cpp");
+  // no name match. prefer matching dir, break ties by alpha
+  EXPECT_EQ(getCommand("dir/a.cpp"), "clang -D dir/bar.cpp");
+  // an exact name match beats one segment of directory match
+  EXPECT_EQ(getCommand("some/other/bar.h"), "clang -x c++ -D dir/bar.cpp");
+  // two segments of directory match beat a prefix name match
+  EXPECT_EQ(getCommand("an/other/b.cpp"), "clang -D an/other/foo.cpp");
+  // if nothing matches at all, we still get the closest alpha match
+  EXPECT_EQ(getCommand("below/some/obscure/path.cpp"),
+"clang -D an/other/foo.cpp");
+}
+
+TEST_F(InterpolateTest, Language) {
+  add("dir/foo.cpp");
+  add("dir/baz.cee", "-x c");
+
+  // extension changed, so we add explicit language flags
+  EXPECT_EQ(getCommand("foo.h"), "clang -x c++ -D dir/foo.cpp");
+  // but we don't add -x if it's already there
+  EXPECT_EQ(getCommand("baz.h"), "clang -D dir/baz.cee -x c");
+}
+
+TEST_F(InterpolateTest, Strip) {
+  add("dir/foo.cpp", "-o foo.o -Wall");
+  // the -o option and the input file are removed, but -Wall is preserved.
+  EXPECT_EQ(getCommand("dir/bar.cpp"), "clang -D dir/foo.cpp -Wall");
+}
+
 } // end namespace tooling
 } // end namespace clang
Index: lib/Tooling/InterpolatingCompilationDatabase.cpp
===
--- /dev/null
+++ lib/Tooling/InterpolatingCompilationDatabase.cpp
@@ -0,0 +1,329 @@
+//===- InterpolatingCompilationDatabase.cpp -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+// InterpolatingCompilationDatabase wraps another CompilationDatabase and
+// attempts to heuristically determine appropriate compile commands for files
+// that are not included, such as headers or newly created files.
+//
+// Motivating cases include:
+//   Hea

[PATCH] D45006: [Tooling] A CompilationDatabase wrapper that infers header commands.

2018-04-03 Thread Sam McCall via Phabricator via cfe-commits
sammccall updated this revision to Diff 140791.
sammccall marked 6 inline comments as done.
sammccall added a comment.

Address review comments.


Repository:
  rC Clang

https://reviews.llvm.org/D45006

Files:
  include/clang/Tooling/CompilationDatabase.h
  lib/Tooling/CMakeLists.txt
  lib/Tooling/InterpolatingCompilationDatabase.cpp
  unittests/Tooling/CompilationDatabaseTest.cpp

Index: unittests/Tooling/CompilationDatabaseTest.cpp
===
--- unittests/Tooling/CompilationDatabaseTest.cpp
+++ unittests/Tooling/CompilationDatabaseTest.cpp
@@ -626,5 +626,99 @@
   EXPECT_EQ(2, Argc);
 }
 
+struct MemCDB : public CompilationDatabase {
+  using EntryMap = llvm::StringMap>;
+  EntryMap Entries;
+  MemCDB(const EntryMap &E) : Entries(E) {}
+
+  std::vector getCompileCommands(StringRef F) const override {
+auto Ret = Entries.lookup(F);
+return {Ret.begin(), Ret.end()};
+  }
+
+  std::vector getAllFiles() const override {
+std::vector Result;
+for (const auto &Entry : Entries)
+  Result.push_back(Entry.first());
+return Result;
+  }
+};
+
+class InterpolateTest : public ::testing::Test {
+protected:
+  // Adds an entry to the underlying compilation database.
+  // A flag is injected: -D , so the command used can be identified.
+  void add(llvm::StringRef File, llvm::StringRef Flags = "") {
+llvm::SmallVector Argv = {"clang", File, "-D", File};
+llvm::SplitString(Flags, Argv);
+llvm::SmallString<32> Dir;
+llvm::sys::path::system_temp_directory(false, Dir);
+Entries[path(File)].push_back(
+{Dir, File, {Argv.begin(), Argv.end()}, "foo.o"});
+  }
+
+  // Turn a unix path fragment (foo/bar.h) into a native path (C:\tmp\foo\bar.h)
+  std::string path(llvm::SmallString<32> File) {
+llvm::SmallString<32> Dir;
+llvm::sys::path::system_temp_directory(false, Dir);
+llvm::sys::path::native(File);
+llvm::SmallString<64> Result;
+llvm::sys::path::append(Result, Dir, File);
+return Result.str();
+  }
+
+  // Look up the command from a relative path, and return it in string form.
+  // The input file is not included in the returned command.
+  std::string getCommand(llvm::StringRef F) {
+auto Results =
+inferMissingCompileCommands(llvm::make_unique(Entries))
+->getCompileCommands(path(F));
+if (Results.empty())
+  return "none";
+// drop the input file argument, so tests don't have to deal with path().
+EXPECT_EQ(Results[0].CommandLine.back(), path(F))
+<< "Last arg should be the file";
+Results[0].CommandLine.pop_back();
+return llvm::join(Results[0].CommandLine, " ");
+  }
+
+  MemCDB::EntryMap Entries;
+};
+
+TEST_F(InterpolateTest, Nearby) {
+  add("dir/foo.cpp");
+  add("dir/bar.cpp");
+  add("an/other/foo.cpp");
+
+  // great: dir and name both match (prefix or full, case insensitive)
+  EXPECT_EQ(getCommand("dir/f.cpp"), "clang -D dir/foo.cpp");
+  EXPECT_EQ(getCommand("dir/FOO.cpp"), "clang -D dir/foo.cpp");
+  // no name match. prefer matching dir, break ties by alpha
+  EXPECT_EQ(getCommand("dir/a.cpp"), "clang -D dir/bar.cpp");
+  // an exact name match beats one segment of directory match
+  EXPECT_EQ(getCommand("some/other/bar.h"), "clang -x c++ -D dir/bar.cpp");
+  // two segments of directory match beat a prefix name match
+  EXPECT_EQ(getCommand("an/other/b.cpp"), "clang -D an/other/foo.cpp");
+  // if nothing matches at all, we still get the closest alpha match
+  EXPECT_EQ(getCommand("below/some/obscure/path.cpp"),
+"clang -D an/other/foo.cpp");
+}
+
+TEST_F(InterpolateTest, Language) {
+  add("dir/foo.cpp");
+  add("dir/baz.cee", "-x c");
+
+  // extension changed, so we add explicit language flags
+  EXPECT_EQ(getCommand("foo.h"), "clang -x c++ -D dir/foo.cpp");
+  // but we don't add -x if it's already there
+  EXPECT_EQ(getCommand("baz.h"), "clang -D dir/baz.cee -x c");
+}
+
+TEST_F(InterpolateTest, Strip) {
+  add("dir/foo.cpp", "-o foo.o -Wall");
+  // the -o option and the input file are removed, but -Wall is preserved.
+  EXPECT_EQ(getCommand("dir/bar.cpp"), "clang -D dir/foo.cpp -Wall");
+}
+
 } // end namespace tooling
 } // end namespace clang
Index: lib/Tooling/InterpolatingCompilationDatabase.cpp
===
--- /dev/null
+++ lib/Tooling/InterpolatingCompilationDatabase.cpp
@@ -0,0 +1,329 @@
+//===- InterpolatingCompilationDatabase.cpp -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+// InterpolatingCompilationDatabase wraps another CompilationDatabase and
+// attempts to heuristically determine appropriate compile commands for files
+// that are not included, such as headers or newly cr

[PATCH] D45006: [Tooling] A CompilationDatabase wrapper that infers header commands.

2018-04-03 Thread Sam McCall via Phabricator via cfe-commits
sammccall added inline comments.



Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:115
+for (auto F : getAllFiles())
+  Paths.emplace_back(Strings.save(F), 0);
+finalizeIndex();

ilya-biryukov wrote:
> This class seems to do two somewhat orthogonal things: 
>   - build and query the index structure for the paths,
>   - handle queries to inner CDB and patch the compile commands for other 
> files accordingly.
> 
> Maybe we could extract the code that handles the index into a separate class?
I split out FilenameIndex, which just does the filename->filename mapping. The 
CompilationDatabase implementation now only contains the interface methods and 
adjust().


Repository:
  rC Clang

https://reviews.llvm.org/D45006



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D45006: [Tooling] A CompilationDatabase wrapper that infers header commands.

2018-04-03 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov added inline comments.



Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:26
+// their compile commands. If getAllFilenames() is empty, no inference occurs.
+//
+//===--===//

Maybe add a comment describing the use-cases we had in mind while designing 
these heuristics? 
  - .cpp and .h files usually have the same (or slightly modified) name, 
usually the prefix match,
  - LLVM (and other) codebases that put .h and .cpp files into different 
directories,
  - even random matches are better than arbitrary defaults,
  - ...



Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:83
+template  int prefixCompare(StringRef S, StringRef Prefix) {
+  if (S.size() >= Prefix.size())
+return Reverse ? rMemCompare(S.end(), Prefix.end(), Prefix.size())

Summarizing the offline discussion, we could exclude suffix matches from the 
initial version. This would make the code much simpler, and it seems most C++ 
projects we know of would actually work with prefix-only matches.



Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:115
+for (auto F : getAllFiles())
+  Paths.emplace_back(Strings.save(F), 0);
+finalizeIndex();

This class seems to do two somewhat orthogonal things: 
  - build and query the index structure for the paths,
  - handle queries to inner CDB and patch the compile commands for other files 
accordingly.

Maybe we could extract the code that handles the index into a separate class?



Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:141
+  void finalizeIndex() {
+llvm::sort(Paths.begin(), Paths.end());
+for (size_t I = 0; I < Paths.size(); ++I) {

Maybe store lower-cased paths in the index and compare case-insensitively when 
querying?
Having slight case mismatches is not uncommon and case-sensitivity shouldn't 
ever be the defining factor for this kind of heuristics.



Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:147
+  auto Dir = ++sys::path::rbegin(Path), DirEnd = sys::path::rend(Path);
+  // Index up to 4 path components.
+  for (int J = 0; J < 4 && Dir != DirEnd; ++J, ++Dir)

Same as prev. comment, maybe comment on why 4 was chosen here? Maybe use a 
named constant?



Comment at: lib/Tooling/InterpolatingCompilationDatabase.cpp:187
+StringRef Stem = sys::path::stem(Filename);
+llvm::SmallVector Dirs; // Only look up the last 2.
+llvm::StringRef Prefix;

Maybe add a comment why 2 is chosen here? Also, maybe use a named constant?


Repository:
  rC Clang

https://reviews.llvm.org/D45006



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D45006: [Tooling] A CompilationDatabase wrapper that infers header commands.

2018-03-29 Thread Sam McCall via Phabricator via cfe-commits
sammccall updated this revision to Diff 140349.
sammccall added a comment.

Add tests, fix minor bugs, add lots of comments and structure.


Repository:
  rC Clang

https://reviews.llvm.org/D45006

Files:
  include/clang/Tooling/CompilationDatabase.h
  lib/Tooling/CMakeLists.txt
  lib/Tooling/InterpolatingCompilationDatabase.cpp
  unittests/Tooling/CompilationDatabaseTest.cpp

Index: unittests/Tooling/CompilationDatabaseTest.cpp
===
--- unittests/Tooling/CompilationDatabaseTest.cpp
+++ unittests/Tooling/CompilationDatabaseTest.cpp
@@ -626,5 +626,99 @@
   EXPECT_EQ(2, Argc);
 }
 
+struct MemCDB : public CompilationDatabase {
+  using EntryMap = llvm::StringMap>;
+  EntryMap Entries;
+  MemCDB(const EntryMap &E) : Entries(E) {}
+
+  std::vector getCompileCommands(StringRef F) const override {
+auto Ret = Entries.lookup(F);
+return {Ret.begin(), Ret.end()};
+  }
+
+  std::vector getAllFiles() const override {
+std::vector Result;
+for (const auto &Entry : Entries)
+  Result.push_back(Entry.first());
+return Result;
+  }
+};
+
+class InterpolateTest : public ::testing::Test {
+protected:
+  // Adds an entry to the underlying compilation database.
+  // A flag is injected: -D , so the command used can be identified.
+  void add(llvm::StringRef File, llvm::StringRef Flags = "") {
+llvm::SmallVector Argv = {"clang", File, "-D", File};
+llvm::SplitString(Flags, Argv);
+llvm::SmallString<32> Dir;
+llvm::sys::path::system_temp_directory(false, Dir);
+Entries[path(File)].push_back(
+{Dir, File, {Argv.begin(), Argv.end()}, "foo.o"});
+  }
+
+  // Turn a unix path fragment (foo/bar.h) into a native path (C:\tmp\foo\bar.h)
+  std::string path(llvm::SmallString<32> File) {
+llvm::SmallString<32> Dir;
+llvm::sys::path::system_temp_directory(false, Dir);
+llvm::sys::path::native(File);
+llvm::SmallString<64> Result;
+llvm::sys::path::append(Result, Dir, File);
+return Result.str();
+  }
+
+  // Look up the command from a relative path, and return it in string form.
+  // The input file is not included in the returned command.
+  std::string getCommand(llvm::StringRef F) {
+auto Results =
+inferMissingCompileCommands(llvm::make_unique(Entries))
+->getCompileCommands(path(F));
+if (Results.empty())
+  return "none";
+// drop the input file argument, so tests don't have to deal with path().
+EXPECT_EQ(Results[0].CommandLine.back(), path(F))
+<< "Last arg should be the file";
+Results[0].CommandLine.pop_back();
+return llvm::join(Results[0].CommandLine, " ");
+  }
+
+  MemCDB::EntryMap Entries;
+};
+
+TEST_F(InterpolateTest, Nearby) {
+  add("dir/foo.cpp");
+  add("dir/bar.cpp");
+  add("an/other/foo.cpp");
+
+  // great: dir and name both match (prefix or suffix)
+  EXPECT_EQ(getCommand("dir/f.cpp"), "clang -D dir/foo.cpp");
+  EXPECT_EQ(getCommand("dir/o.cpp"), "clang -D dir/foo.cpp");
+  // no name match. prefer matching dir, break ties by alpha
+  EXPECT_EQ(getCommand("dir/a.cpp"), "clang -D dir/bar.cpp");
+  // an exact name match beats one segment of directory match
+  EXPECT_EQ(getCommand("some/other/bar.h"), "clang -x c++ -D dir/bar.cpp");
+  // two segments of directory match beat a prefix name match
+  EXPECT_EQ(getCommand("an/other/b.cpp"), "clang -D an/other/foo.cpp");
+  // if nothing matches at all, we still get the closest alpha match
+  EXPECT_EQ(getCommand("below/some/obscure/path.cpp"),
+"clang -D an/other/foo.cpp");
+}
+
+TEST_F(InterpolateTest, Language) {
+  add("dir/foo.cpp");
+  add("dir/baz.cee", "-x c");
+
+  // extension changed, so we add explicit language flags
+  EXPECT_EQ(getCommand("foo.h"), "clang -x c++ -D dir/foo.cpp");
+  // but we don't add -x if it's already there
+  EXPECT_EQ(getCommand("baz.h"), "clang -D dir/baz.cee -x c");
+}
+
+TEST_F(InterpolateTest, Strip) {
+  add("dir/foo.cpp", "-o foo.o -Wall");
+  // the -o option and the input file are removed, but -Wall is preserved.
+  EXPECT_EQ(getCommand("dir/bar.cpp"), "clang -D dir/foo.cpp -Wall");
+}
+
 } // end namespace tooling
 } // end namespace clang
Index: lib/Tooling/InterpolatingCompilationDatabase.cpp
===
--- /dev/null
+++ lib/Tooling/InterpolatingCompilationDatabase.cpp
@@ -0,0 +1,340 @@
+//===- InterpolatingCompilationDatabase.cpp -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+// InterpolatingCompilationDatabase wraps another CompilationDatabase and
+// attempts to heuristically determine appropriate compile commands for files
+// that are not included, such as headers or newly created files.
+//
+// We 

[PATCH] D45006: [Tooling] A CompilationDatabase wrapper that infers header commands.

2018-03-28 Thread Sam McCall via Phabricator via cfe-commits
sammccall updated this revision to Diff 140160.
sammccall added a comment.

Handle the case where no points are awarded (e.g. system headers) and document 
awards better.


Repository:
  rC Clang

https://reviews.llvm.org/D45006

Files:
  include/clang/Tooling/CompilationDatabase.h
  lib/Tooling/CMakeLists.txt
  lib/Tooling/InterpolatingCompilationDatabase.cpp

Index: lib/Tooling/InterpolatingCompilationDatabase.cpp
===
--- /dev/null
+++ lib/Tooling/InterpolatingCompilationDatabase.cpp
@@ -0,0 +1,293 @@
+//===- InterpolatingCompilationDatabase.cpp -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+// InterpolatingCompilationDatabase wraps another CompilationDatabase and
+// attempts to heuristically determine appropriate compile commands for files
+// that are not included, such as headers or newly created files.
+//
+// We "borrow" the compile command for the closest available file:
+//   - points are awarded if the filename matches (ignoring extension)
+//   - points are awarded if the directory structure matches
+//   - ties are broken by length of path prefix match
+//
+// The compile command is adjusted:
+//   - the input filename is replaced
+//   - if the extension differs, an "-x" flag is added to preserve the language
+//   - output file arguments are removed
+//
+// This class is only useful when wrapping databases that can enumerate all
+// their compile commands. If getAllFilenames() is empty, no inference occurs.
+//
+//===--===//
+
+#include "clang/Driver/Options.h"
+#include "clang/Driver/Types.h"
+#include "clang/Tooling/CompilationDatabase.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/OptTable.h"
+#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include 
+
+namespace clang {
+namespace tooling {
+namespace {
+using namespace llvm;
+
+size_t matchingPrefix(StringRef L, StringRef R) {
+  size_t Limit = std::min(L.size(), R.size());
+  for (size_t I = 0; I < Limit; ++I)
+if (L[I] != R[I])
+  return I;
+  return Limit;
+}
+
+// Like memcmp(), but traverses in reverse order. L and R are one-past-end.
+int rMemCompare(const char *L, const char *R, size_t N) {
+  for (const char *LStop = L - N; L > LStop;)
+if (*--L != *--R)
+  return *L < *R ? -1 : 1;
+  return 0;
+}
+
+// This is like L.compare(R), but maybe with the order of characters reversed.
+template 
+int compare(StringRef L, StringRef R) {
+  if (!Reverse)
+return L.compare(R);
+  // Traverse the common region backwards, first differing byte is decisive.
+  if (int Cmp = rMemCompare(L.end(), R.end(), std::min(L.size(), R.size(
+return Cmp;
+  // No byte differed, so the shorter string is smaller.
+  return L.size() == R.size() ? 0 : L.size() < R.size() ? -1 : 1;
+}
+
+// Returns 0 if S starts with prefix, else -1 for S < Prefix, 1 for S > Prefix.
+template  int prefixCompare(StringRef S, StringRef Prefix) {
+  if (S.size() >= Prefix.size())
+return Reverse ? rMemCompare(S.end(), Prefix.end(), Prefix.size())
+   : memcmp(S.begin(), Prefix.begin(), Prefix.size());
+  return compare(S, Prefix);
+}
+
+template  struct Less {
+  bool operator()(StringRef Key, std::pair Value) const {
+return Prefix ? prefixCompare(Value.first, Key) > 0
+  : compare(Key, Value.first) < 0;
+  }
+  bool operator()(std::pair Value, StringRef Key) const {
+return Prefix ? prefixCompare(Value.first, Key) < 0
+  : compare(Value.first, Key) < 0;
+  }
+};
+
+class InterpolatingCompilationDatabase : public CompilationDatabase {
+public:
+  InterpolatingCompilationDatabase(std::unique_ptr Inner)
+  : Inner(std::move(Inner)), Strings(Arena) {
+for (auto F : getAllFiles())
+  Paths.emplace_back(Strings.save(F), 0);
+finalizeIndex();
+  }
+
+  std::vector
+  getCompileCommands(StringRef FilePath) const override {
+auto Known = Inner->getCompileCommands(FilePath);
+if (Paths.empty() || !Known.empty())
+  return Known;
+return {inferCommand(FilePath)};
+  }
+
+  std::vector getAllFiles() const override {
+return Inner->getAllFiles();
+  }
+
+  std::vector getAllCompileCommands() const override {
+return Inner->getAllCompileCommands();
+  }
+
+private:
+  using SubstringAndIndex = std::pair;
+
+  // Sort the paths list, and populate other index fields from it.
+  // We identify files by the index into (sorted) Paths.
+  void finalizeIndex() {
+llvm::sort(Paths.begin(), Paths.end());
+for (size_t I = 0; I < Paths.size(); ++I) {
+  Paths[I].second = I

[PATCH] D45006: [Tooling] A CompilationDatabase wrapper that infers header commands.

2018-03-28 Thread Sam McCall via Phabricator via cfe-commits
sammccall created this revision.
Herald added subscribers: cfe-commits, mgorny, klimek.

The wrapper finds the closest matching compile command using filename heuristics
and makes minimal tweaks so it can be used with the header.

(This is WIP and needs tests)


Repository:
  rC Clang

https://reviews.llvm.org/D45006

Files:
  include/clang/Tooling/CompilationDatabase.h
  lib/Tooling/CMakeLists.txt
  lib/Tooling/InterpolatingCompilationDatabase.cpp

Index: lib/Tooling/InterpolatingCompilationDatabase.cpp
===
--- /dev/null
+++ lib/Tooling/InterpolatingCompilationDatabase.cpp
@@ -0,0 +1,271 @@
+//===- InterpolatingCompilationDatabase.cpp -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+// InterpolatingCompilationDatabase wraps another CompilationDatabase and
+// attempts to heuristically determine appropriate compile commands for files
+// that are not included, such as headers or newly created files.
+//
+// We "borrow" the compile command for the closest available file:
+//   - points are awarded if the filename matches (ignoring extension)
+//   - points are awarded if the directory structure matches
+//   - ties are broken by length of path prefix match
+//
+// The compile command is adjusted:
+//   - the input filename is replaced
+//   - if the extension differs, an "-x" flag is added to preserve the language
+//   - output file arguments are removed
+//
+// This class is only useful when wrapping databases that can enumerate all
+// their compile commands. If getAllFilenames() is empty, no inference occurs.
+//
+//===--===//
+
+#include "clang/Driver/Options.h"
+#include "clang/Driver/Types.h"
+#include "clang/Tooling/CompilationDatabase.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/OptTable.h"
+#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include 
+
+namespace clang {
+namespace tooling {
+namespace {
+using namespace llvm;
+
+size_t matchingPrefix(StringRef L, StringRef R) {
+  size_t Limit = std::min(L.size(), R.size());
+  for (size_t I = 0; I < Limit; ++I)
+if (L[I] != R[I])
+  return I;
+  return Limit;
+}
+
+// Like memcmp(), but traverses in reverse order. L and R are one-past-end.
+int rMemCompare(const char *L, const char *R, size_t N) {
+  for (const char *LStop = L - N; L > LStop;)
+if (*--L != *--R)
+  return *L < *R ? -1 : 1;
+  return 0;
+}
+
+// This is like L.compare(R), but maybe with the order of characters reversed.
+template 
+int compare(StringRef L, StringRef R) {
+  if (!Reverse)
+return L.compare(R);
+  // Traverse the common region backwards, first differing byte is decisive.
+  if (int Cmp = rMemCompare(L.end(), R.end(), std::min(L.size(), R.size(
+return Cmp;
+  // No byte differed, so the shorter string is smaller.
+  return L.size() == R.size() ? 0 : L.size() < R.size() ? -1 : 1;
+}
+
+// Returns 0 if S starts with prefix, else -1 for S < Prefix, 1 for S > Prefix.
+template  int prefixCompare(StringRef S, StringRef Prefix) {
+  if (S.size() >= Prefix.size())
+return Reverse ? rMemCompare(S.end(), Prefix.end(), Prefix.size())
+   : memcmp(S.begin(), Prefix.begin(), Prefix.size());
+  return compare(S, Prefix);
+}
+
+template  struct Less {
+  bool operator()(StringRef Key, std::pair Value) const {
+return Prefix ? prefixCompare(Value.first, Key) > 0
+  : compare(Key, Value.first) < 0;
+  }
+  bool operator()(std::pair Value, StringRef Key) const {
+return Prefix ? prefixCompare(Value.first, Key) < 0
+  : compare(Value.first, Key) < 0;
+  }
+};
+
+class InterpolatingCompilationDatabase : public CompilationDatabase {
+public:
+  InterpolatingCompilationDatabase(std::unique_ptr Inner)
+  : Inner(std::move(Inner)), Strings(Arena) {
+for (auto F : getAllFiles())
+  Paths.emplace_back(Strings.save(F), 0);
+finalizeIndex();
+  }
+
+  std::vector
+  getCompileCommands(StringRef FilePath) const override {
+auto Known = Inner->getCompileCommands(FilePath);
+if (Paths.empty() || !Known.empty())
+  return Known;
+return {inferCommand(FilePath)};
+  }
+
+  std::vector getAllFiles() const override {
+return Inner->getAllFiles();
+  }
+
+  std::vector getAllCompileCommands() const override {
+return Inner->getAllCompileCommands();
+  }
+
+private:
+  using SubstringAndIndex = std::pair;
+
+  // Sort the paths list, and populate other index fields from it.
+  // We identify files by the index into (sorted) Paths.
+  void finalizeIndex() {
+llvm::sort(Paths.begin(),