sammccall created this revision.
sammccall added reviewers: adamcz, hokein.
Herald added subscribers: cfe-commits, usaxena95, kadircet, arphaman, jkorous, 
MaskRay, ilya-biryukov.
Herald added a project: clang.

This is designed for tweaking compile commands by specifying flags to add/remove
in a config file. Something like:

  CompileFlags: { Remove: -fcolor-diagnostics }

Having users tweak raw argv (e.g. with a regex) is going to end in tears: bugs
around clang-cl, xclang, aliases, joined-vs-separate args etc are inevitable.

This isn't in tooling because of the performance choices: build a big table
up-front to make subsequent actions fast. Maybe it should be though.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D81958

Files:
  clang-tools-extra/clangd/CompileCommands.cpp
  clang-tools-extra/clangd/CompileCommands.h
  clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp

Index: clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp
+++ clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp
@@ -185,6 +185,108 @@
 }
 #endif
 
+static std::string strip(llvm::StringRef Arg, llvm::StringRef Argv) {
+  llvm::SmallVector<llvm::StringRef, 8> Parts;
+  llvm::SplitString(Argv, Parts);
+  std::vector<std::string> Args = {Parts.begin(), Parts.end()};
+  ArgStripper S;
+  S.strip(Arg);
+  S.process(Args);
+  return llvm::join(Args, " ");
+}
+
+TEST(ArgStripperTest, Spellings) {
+  // May use alternate prefixes.
+  EXPECT_EQ(strip("-pedantic", "clang -pedantic foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("-pedantic", "clang --pedantic foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("--pedantic", "clang -pedantic foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("--pedantic", "clang --pedantic foo.cc"), "clang foo.cc");
+  // May use alternate names.
+  EXPECT_EQ(strip("-x", "clang -x c++ foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("-x", "clang --language=c++ foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("--language=", "clang -x c++ foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("--language=", "clang --language=c++ foo.cc"),
+            "clang foo.cc");
+}
+
+TEST(ArgStripperTest, UnknownFlag) {
+  EXPECT_EQ(strip("-xyzzy", "clang -xyzzy foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("-xyz*", "clang -xyzzy foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("-xyzzy", "clang -Xclang -xyzzy foo.cc"), "clang foo.cc");
+}
+
+TEST(ArgStripperTest, Xclang) {
+  // Flags may be -Xclang escaped.
+  EXPECT_EQ(strip("-ast-dump", "clang -Xclang -ast-dump foo.cc"),
+            "clang foo.cc");
+  // Args may be -Xclang escaped.
+  EXPECT_EQ(strip("-add-plugin", "clang -Xclang -add-plugin -Xclang z foo.cc"),
+            "clang foo.cc");
+}
+
+TEST(ArgStripperTest, ClangCL) {
+  // /I is a synonym for -I in clang-cl mode only.
+  // Not stripped by default.
+  EXPECT_EQ(strip("-I", "clang -I /usr/inc /Interesting/file.cc"),
+            "clang /Interesting/file.cc");
+  // Stripped when invoked as clang-cl.
+  EXPECT_EQ(strip("-I", "clang-cl -I /usr/inc /Interesting/file.cc"),
+            "clang-cl");
+  // Stripped when invoked as CL.EXE
+  EXPECT_EQ(strip("-I", "CL.EXE -I /usr/inc /Interesting/file.cc"), "CL.EXE");
+  // Stripped when passed --driver-mode=cl.
+  EXPECT_EQ(strip("-I", "cc -I /usr/inc /Interesting/file.cc --driver-mode=cl"),
+            "cc --driver-mode=cl");
+}
+
+TEST(ArgStripperTest, ArgStyles) {
+  // Flag
+  EXPECT_EQ(strip("-Qn", "clang -Qn foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("-Qn", "clang -QnZ foo.cc"), "clang -QnZ foo.cc");
+  // Joined
+  EXPECT_EQ(strip("-std=", "clang -std= foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("-std=", "clang -std=c++11 foo.cc"), "clang foo.cc");
+  // Separate
+  EXPECT_EQ(strip("-mllvm", "clang -mllvm X foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("-mllvm", "clang -mllvmX foo.cc"), "clang -mllvmX foo.cc");
+  // RemainingArgsJoined
+  EXPECT_EQ(strip("/link", "clang-cl /link b c d foo.cc"), "clang-cl");
+  EXPECT_EQ(strip("/link", "clang-cl /linka b c d foo.cc"), "clang-cl");
+  // CommaJoined
+  EXPECT_EQ(strip("-Wl,", "clang -Wl,x,y foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("-Wl,", "clang -Wl, foo.cc"), "clang foo.cc");
+  // MultiArg
+  EXPECT_EQ(strip("-segaddr", "clang -segaddr a b foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("-segaddr", "clang -segaddra b foo.cc"),
+            "clang -segaddra b foo.cc");
+  // JoinedOrSeparate
+  EXPECT_EQ(strip("-G", "clang -GX foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("-G", "clang -G X foo.cc"), "clang foo.cc");
+  // JoinedAndSeparate
+  EXPECT_EQ(strip("-plugin-arg-", "clang -cc1 -plugin-arg-X Y foo.cc"),
+            "clang -cc1 foo.cc");
+  EXPECT_EQ(strip("-plugin-arg-", "clang -cc1 -plugin-arg- Y foo.cc"),
+            "clang -cc1 foo.cc");
+}
+
+TEST(ArgStripperTest, EndOfList) {
+  // When we hit the end-of-args prematurely, we don't crash.
+  // We consume the incomplete args if we've matched the target option.
+  EXPECT_EQ(strip("-I", "clang -Xclang"), "clang -Xclang");
+  EXPECT_EQ(strip("-I", "clang -Xclang -I"), "clang");
+  EXPECT_EQ(strip("-I", "clang -I -Xclang"), "clang");
+  EXPECT_EQ(strip("-I", "clang -I"), "clang");
+}
+
+TEST(ArgStripperTest, Multiple) {
+  ArgStripper S;
+  S.strip("-o");
+  S.strip("-c");
+  std::vector<std::string> Args = {"clang", "-o", "foo.o", "foo.cc", "-c"};
+  S.process(Args);
+  EXPECT_THAT(Args, ElementsAre("clang", "foo.cc"));
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/clangd/CompileCommands.h
===================================================================
--- clang-tools-extra/clangd/CompileCommands.h
+++ clang-tools-extra/clangd/CompileCommands.h
@@ -50,6 +50,40 @@
   Memoize<llvm::StringMap<std::string>> ResolvedDriversNoFollow;
 };
 
+// Removes args from a command-line in a semantically-aware way.
+// For instance, when "-I" is stripped:
+//  - so is its argument (either as -Ifoo or -I foo)
+//  - aliases like --include-directory=foo are also removed
+//  - CL-style /Ifoo will be removed if the args indicate MS-compatible mode
+//  - the -Xclang prefix will be dropped if present
+// Args that are not recognized as flags are still removed as literal strings,
+// and strip("ABC*") will remove any arg with an ABC prefix.
+//
+// Internally this builds a large (0.5MB) table of clang options on first use.
+// Both strip() and process() are fairly cheap after that.
+//
+// FIXME: this reimplements much of OptTable, it might be nice to expose more.
+// The table-building strategy may not make sense outside clangd.
+class ArgStripper {
+public:
+  // Adds the arg to the set which should be removed.
+  void strip(llvm::StringRef Arg);
+  // Remove the targets from Args, in-place.
+  void process(std::vector<std::string> &Args) const;
+
+private:
+  // Deletion rules, to be checked for each arg.
+  struct Rule {
+    llvm::StringRef Text;    // Rule applies only if arg begins with Text.
+    unsigned char Modes = 0; // Rule applies only in specified driver modes.
+    uint16_t ExactArgs = 0;  // Num args consumed when Arg == Text.
+    uint16_t PrefixArgs = 0; // Num args consumed when Arg starts with Text.
+  };
+  static llvm::ArrayRef<Rule> rulesFor(llvm::StringRef Arg);
+  llvm::SmallVector<Rule, 4> Rules;
+  std::vector<std::string> Storage; // Store strings not found in option table.
+};
+
 } // namespace clangd
 } // namespace clang
 
Index: clang-tools-extra/clangd/CompileCommands.cpp
===================================================================
--- clang-tools-extra/clangd/CompileCommands.cpp
+++ clang-tools-extra/clangd/CompileCommands.cpp
@@ -8,8 +8,12 @@
 
 #include "CompileCommands.h"
 #include "support/Logger.h"
+#include "clang/Driver/Options.h"
 #include "clang/Frontend/CompilerInvocation.h"
 #include "clang/Tooling/ArgumentsAdjusters.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -224,5 +228,236 @@
   };
 }
 
+// Determine total number of args consumed by this option.
+// Return answers for {Exact, Prefix} match. 0 means not allowed.
+static std::pair<unsigned, unsigned> getArgCount(const llvm::opt::Option &Opt) {
+  // Reference is llvm::opt::Option::acceptInternal()
+  using llvm::opt::Option;
+  switch (Opt.getKind()) {
+  case Option::FlagClass:
+    return {1, 0};
+  case Option::JoinedClass:
+  case Option::CommaJoinedClass:
+    return {1, 1};
+  case Option::GroupClass:
+  case Option::InputClass:
+  case Option::UnknownClass:
+  case Option::ValuesClass:
+    return {1, 0};
+  case Option::JoinedAndSeparateClass:
+    return {2, 2};
+  case Option::SeparateClass:
+    return {2, 0};
+  case Option::MultiArgClass:
+    return {1 + Opt.getNumArgs(), 0};
+  case Option::JoinedOrSeparateClass:
+    return {2, 1};
+  case Option::RemainingArgsClass:
+    return {10000, 0};
+  case Option::RemainingArgsJoinedClass:
+    return {10000, 10000};
+  }
+}
+
+// Flag-parsing mode, which affects which flags are available.
+enum DriverMode : unsigned char {
+  DM_None = 0,
+  DM_GCC = 1, // Default mode e.g. when invoked as 'clang'
+  DM_CL = 2,  // MS CL.exe compatible mode e.g. when invoked as 'clang-cl'
+  DM_CC1 = 4, // When invoked as 'clang -cc1' or after '-Xclang'
+  DM_All = 7
+};
+
+// Returns the set of DriverModes where an option may be used.
+static unsigned char getModes(const llvm::opt::Option &Opt) {
+  // Why is this so complicated?!
+  // Reference is clang::driver::Driver::getIncludeExcludeOptionFlagMasks()
+  unsigned char Result = DM_None;
+  if (Opt.hasFlag(driver::options::CC1Option))
+    Result |= DM_CC1;
+  if (!Opt.hasFlag(driver::options::NoDriverOption)) {
+    if (Opt.hasFlag(driver::options::CLOption)) {
+      Result |= DM_CL;
+    } else {
+      Result |= DM_GCC;
+      if (Opt.hasFlag(driver::options::CoreOption)) {
+        Result |= DM_CL;
+      }
+    }
+  }
+  return Result;
+};
+
+llvm::ArrayRef<ArgStripper::Rule> ArgStripper::rulesFor(llvm::StringRef Arg) {
+  // All the hard work is done once in a static initializer.
+  // We compute a table containing strings to look for and #args to skip.
+  // e.g. "-x" => {-x 2 args, -x* 2 args, --language 2 args, --language=* 1 arg}
+  using TableTy =
+      llvm::StringMap<llvm::SmallVector<Rule, 4>, llvm::BumpPtrAllocator>;
+  static TableTy *Table = [] {
+    auto &DriverTable = driver::getDriverOptTable();
+    using DriverID = clang::driver::options::ID;
+
+    // Collect sets of aliases, so we can treet -foo and -foo= as synonyms.
+    // Conceptually a double-linked list: PrevAlias[I] -> I -> NextAlias[I].
+    // If PrevAlias[I] is INVALID, then I is canonical.
+    DriverID PrevAlias[DriverID::LastOption] = {DriverID::OPT_INVALID};
+    DriverID NextAlias[DriverID::LastOption] = {DriverID::OPT_INVALID};
+    auto AddAlias = [&](DriverID Self, DriverID T) {
+      if (NextAlias[T]) {
+        PrevAlias[NextAlias[T]] = Self;
+        NextAlias[Self] = NextAlias[T];
+      }
+      PrevAlias[Self] = T;
+      NextAlias[T] = Self;
+    };
+    // Also grab prefixes for each option, these are not fully exposed.
+    const char *const *Prefixes[DriverID::LastOption] = {nullptr};
+#define PREFIX(NAME, VALUE) static const char *const NAME[] = VALUE;
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
+               HELP, METAVAR, VALUES)                                          \
+  if (DriverID::OPT_##ALIAS != DriverID::OPT_INVALID && ALIASARGS == nullptr)  \
+    AddAlias(DriverID::OPT_##ID, DriverID::OPT_##ALIAS);                       \
+  Prefixes[DriverID::OPT_##ID] = PREFIX;
+#include "clang/Driver/Options.inc"
+#undef OPTION
+#undef PREFIX
+
+    auto Result = std::make_unique<TableTy>();
+    // Iterate over distinct options (represented by the canonical alias).
+    // Every spelling of this option will get the same set of rules.
+    for (unsigned ID = 1; ID < DriverID::LastOption; ++ID) {
+      if (PrevAlias[ID] || ID == DriverID::OPT_Xclang)
+        continue; // Not canonical, or specially handled.
+      llvm::SmallVector<Rule, 8> Rules;
+      // Iterate over each alias, to add rules for parsing it.
+      for (unsigned A = ID; A != DriverID::OPT_INVALID; A = NextAlias[A]) {
+        if (Prefixes[A] == nullptr) // option groups.
+          continue;
+        auto Opt = DriverTable.getOption(A);
+        // Exclude - and -foo pseudo-options.
+        if (Opt.getName().empty())
+          continue;
+        auto Modes = getModes(Opt);
+        std::pair<unsigned, unsigned> ArgCount = getArgCount(Opt);
+        // Iterate over each spelling of the alias, e.g. -foo vs --foo.
+        for (auto *Prefix = Prefixes[A]; *Prefix != nullptr; ++Prefix) {
+          llvm::SmallString<64> Buf(*Prefix);
+          Buf.append(Opt.getName());
+          llvm::StringRef Spelling = Result->try_emplace(Buf).first->getKey();
+          Rules.emplace_back();
+          Rules.back().Text = Spelling;
+          Rules.back().Modes = Modes;
+          Rules.back().ExactArgs = ArgCount.first;
+          Rules.back().PrefixArgs = ArgCount.second;
+        }
+      }
+      // Register the set of rules under each possible name.
+      for (const auto &R : Rules)
+        Result->find(R.Text)->second.append(Rules.begin(), Rules.end());
+    }
+#ifndef NDEBUG
+    // Dump the table and various measures of its size.
+    unsigned RuleCount = 0;
+    dlog("ArgStripper Option spelling table");
+    for (const auto &Entry : *Result) {
+      dlog("{0}", Entry.first());
+      RuleCount += Entry.second.size();
+      for (const auto &R : Entry.second)
+        dlog("  {0} #={1} *={2} Mode={3}", R.Text, R.ExactArgs, R.PrefixArgs,
+             int(R.Modes));
+    }
+    dlog("Table spellings={0} rules={1} string-bytes={2}", Result->size(),
+         RuleCount, Result->getAllocator().getBytesAllocated());
+#endif
+    // The static table will never be destroyed.
+    return Result.release();
+  }();
+
+  auto It = Table->find(Arg);
+  return (It == Table->end()) ? llvm::ArrayRef<Rule>() : It->second;
+}
+
+void ArgStripper::strip(llvm::StringRef Arg) {
+  auto OptionRules = rulesFor(Arg);
+  if (OptionRules.empty()) {
+    // Not a recognized flag. Strip it literally.
+    Storage.emplace_back(Arg);
+    Rules.emplace_back();
+    Rules.back().Text = Storage.back();
+    Rules.back().ExactArgs = 1;
+    if (Rules.back().Text.consume_back("*"))
+      Rules.back().PrefixArgs = 1;
+    Rules.back().Modes = DM_All;
+  } else {
+    Rules.append(OptionRules.begin(), OptionRules.end());
+  }
+}
+
+void ArgStripper::process(std::vector<std::string> &Args) const {
+  if (Args.empty())
+    return;
+
+  // Examine args list to determine if we're in GCC, CL-compatible, or cc1 mode.
+  DriverMode MainMode = DM_GCC;
+  llvm::StringRef Argv0 = Args.front();
+  if (Argv0.endswith_lower(".exe"))
+    Argv0 = Argv0.drop_back(strlen(".exe"));
+  if (Argv0.endswith_lower("cl"))
+    MainMode = DM_CL;
+  for (const llvm::StringRef Arg : Args) {
+    if (Arg == "--driver-mode=cl") {
+      MainMode = DM_CL;
+      break;
+    }
+    if (Arg == "-cc1") {
+      MainMode = DM_CC1;
+      break;
+    }
+  }
+  DriverMode CurrentMode = MainMode;
+
+  // Read and write heads for in-place deletion.
+  unsigned Read = 0, Write = 0;
+  bool WasXclang = false;
+  while (Read < Args.size()) {
+    llvm::StringRef Arg = Args[Read];
+    for (const Rule &R : Rules) {
+      // Rule can fail to match if...
+      if (!(R.Modes & CurrentMode))
+        continue; // not applicable to current driver mode
+      if (!Arg.startswith(R.Text))
+        continue; // current arg doesn't match the prefix string
+      bool PrefixMatch = Arg.size() > R.Text.size();
+      unsigned ArgCount = PrefixMatch ? R.PrefixArgs : R.ExactArgs;
+      if (ArgCount == 0)
+        continue; // rule can't apply as an exact/prefix match
+
+      // OK, rule matched, delete it and its args.
+      if (WasXclang) {
+        --Write; // Drop previous -Xclang arg
+        CurrentMode = MainMode;
+        WasXclang = false;
+      }
+      // Advance to last arg. An arg may be foo or -Xclang foo.
+      for (unsigned I = 1; Read < Args.size() && I < ArgCount; ++I) {
+        ++Read;
+        if (Read < Args.size() && Args[Read] == "-Xclang")
+          ++Read;
+      }
+      goto Matched;
+    }
+    // No match, just copy the arg through.
+    WasXclang = Arg == "-Xclang";
+    CurrentMode = WasXclang ? DM_CC1 : MainMode;
+    if (Write != Read)
+      Args[Write] = std::move(Args[Read]);
+    ++Write;
+  Matched:
+    ++Read;
+  }
+  Args.resize(Write);
+}
+
 } // namespace clangd
 } // namespace clang
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to