qchateau created this revision.
qchateau added a reviewer: sammccall.
Herald added subscribers: usaxena95, kadircet, arphaman, javed.absar.
qchateau requested review of this revision.
Herald added subscribers: cfe-commits, MaskRay, ilya-biryukov.
Herald added a project: clang.

Keep a store of the preambles of all opened filed, plus up to
5 previously used preambles. When building the AST for a TU,
if the preamble for this TU is not yet built, look through
the stored premables to find the best compatible preamble
and use this preamble to speed-up the AST build.

---------

This patch is starting to look like something acceptable, so
I'm sending it for review. At this point I'd like to get some 
feedback on this before going further.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D97417

Files:
  clang-tools-extra/clangd/ParsedAST.cpp
  clang-tools-extra/clangd/TUScheduler.cpp
  clang-tools-extra/clangd/TUScheduler.h

Index: clang-tools-extra/clangd/TUScheduler.h
===================================================================
--- clang-tools-extra/clangd/TUScheduler.h
+++ clang-tools-extra/clangd/TUScheduler.h
@@ -310,6 +310,8 @@
   /// Responsible for retaining and rebuilding idle ASTs. An implementation is
   /// an LRU cache.
   class ASTCache;
+  /// Store all known preambles
+  class PreambleStore;
 
   // The file being built/processed in the current thread. This is a hack in
   // order to get the file name into the index implementations. Do not depend on
@@ -332,6 +334,7 @@
   Semaphore QuickRunBarrier;
   llvm::StringMap<std::unique_ptr<FileData>> Files;
   std::unique_ptr<ASTCache> IdleASTs;
+  std::unique_ptr<PreambleStore> Preambles;
   // None when running tasks synchronously and non-None when running tasks
   // asynchronously.
   llvm::Optional<AsyncTaskRunner> PreambleTasks;
Index: clang-tools-extra/clangd/TUScheduler.cpp
===================================================================
--- clang-tools-extra/clangd/TUScheduler.cpp
+++ clang-tools-extra/clangd/TUScheduler.cpp
@@ -94,6 +94,21 @@
 
 namespace {
 class ASTWorker;
+
+bool compileCommandsAreSimilar(const tooling::CompileCommand &LHS,
+                               const tooling::CompileCommand &RHS) {
+  std::vector<std::string> LHSCL(LHS.CommandLine);
+  auto LHSBasename = llvm::sys::path::filename(LHS.Filename).str();
+  auto RHSBasename = llvm::sys::path::filename(RHS.Filename).str();
+  for (auto &Arg : LHSCL) {
+    for (auto Pos = Arg.find(LHSBasename); Pos != std::string::npos;
+         Pos = Arg.find(LHSBasename, Pos + 1)) {
+      Arg.replace(Pos, LHSBasename.size(), RHSBasename);
+    }
+  }
+  return llvm::makeArrayRef(LHSCL).equals(RHS.CommandLine);
+}
+
 } // namespace
 
 static clang::clangd::Key<std::string> kFileBeingProcessed;
@@ -179,6 +194,59 @@
   std::vector<KVPair> LRU; /* GUARDED_BY(Mut) */
 };
 
+class TUScheduler::PreambleStore {
+public:
+  struct Entry {
+    std::shared_ptr<const PreambleData> Preamble;
+    size_t Score;
+    Path FileName;
+
+    bool operator>(const Entry &Other) const { return Score > Other.Score; }
+  };
+
+  auto getAll() {
+    std::unique_lock<std::mutex> Lock(Mut);
+    return Store;
+  }
+
+  void push(PathRef FileName, std::shared_ptr<const PreambleData> Preamble) {
+    std::unique_lock<std::mutex> Lock(Mut);
+    auto It = llvm::find_if(
+        Store, [&](const auto &Item) { return Item.FileName == FileName; });
+    if (It == Store.end()) {
+      Store.push_back(Entry{std::move(Preamble), 0, FileName.str()});
+      popWorstPreambles();
+    }
+    vlog("Store contains {0} preambles", Store.size());
+  }
+
+  void hit(PathRef FileName) {
+    std::unique_lock<std::mutex> Lock(Mut);
+    auto It = llvm::find_if(
+        Store, [&](const auto &Item) { return Item.FileName == FileName; });
+    if (It == Store.end()) {
+      return;
+    }
+    It->Score++;
+  }
+
+private:
+  void popWorstPreambles() {
+    constexpr int ClosedPreamblesToKeep = 5;
+    auto Begin = llvm::partition(
+        Store, [](const auto &Item) { return Item.Preamble.use_count() > 1; });
+    if (std::distance(Begin, Store.end()) <= ClosedPreamblesToKeep) {
+      return;
+    }
+    auto Nth = Begin + ClosedPreamblesToKeep;
+    std::nth_element(Begin, Nth, Store.end(), std::greater<Entry>());
+    Store.erase(Nth, Store.end());
+  }
+
+  std::mutex Mut;
+  std::vector<Entry> Store;
+};
+
 namespace {
 /// Threadsafe manager for updating a TUStatus and emitting it after each
 /// update.
@@ -368,8 +436,10 @@
 class ASTWorker {
   friend class ASTWorkerHandle;
   ASTWorker(PathRef FileName, const GlobalCompilationDatabase &CDB,
-            TUScheduler::ASTCache &LRUCache, Semaphore &Barrier, bool RunSync,
-            const TUScheduler::Options &Opts, ParsingCallbacks &Callbacks);
+            TUScheduler::ASTCache &LRUCache,
+            TUScheduler::PreambleStore &Preambles, Semaphore &Barrier,
+            bool RunSync, const TUScheduler::Options &Opts,
+            ParsingCallbacks &Callbacks);
 
 public:
   /// Create a new ASTWorker and return a handle to it.
@@ -377,12 +447,11 @@
   /// is null, all requests will be processed on the calling thread
   /// synchronously instead. \p Barrier is acquired when processing each
   /// request, it is used to limit the number of actively running threads.
-  static ASTWorkerHandle create(PathRef FileName,
-                                const GlobalCompilationDatabase &CDB,
-                                TUScheduler::ASTCache &IdleASTs,
-                                AsyncTaskRunner *Tasks, Semaphore &Barrier,
-                                const TUScheduler::Options &Opts,
-                                ParsingCallbacks &Callbacks);
+  static ASTWorkerHandle
+  create(PathRef FileName, const GlobalCompilationDatabase &CDB,
+         TUScheduler::ASTCache &IdleASTs, TUScheduler::PreambleStore &Preambles,
+         AsyncTaskRunner *Tasks, Semaphore &Barrier,
+         const TUScheduler::Options &Opts, ParsingCallbacks &Callbacks);
   ~ASTWorker();
 
   void update(ParseInputs Inputs, WantDiagnostics, bool ContentChanged);
@@ -394,6 +463,8 @@
 
   std::shared_ptr<const PreambleData> getPossiblyStalePreamble(
       std::shared_ptr<const ASTSignals> *ASTSignals = nullptr) const;
+  std::shared_ptr<const PreambleData> getBestPreamble(
+      std::shared_ptr<const ASTSignals> *ASTSignals = nullptr) const;
 
   /// Used to inform ASTWorker about a new preamble build by PreambleThread.
   /// Diagnostics are only published through this callback. This ensures they
@@ -460,6 +531,9 @@
   /// Should the first task in the queue be skipped instead of run?
   bool shouldSkipHeadLocked() const;
 
+  /// Get all preambles that may be used to accelerate the AST build
+  std::vector<TUScheduler::PreambleStore::Entry> getCompatiblePreambles() const;
+
   struct Request {
     llvm::unique_function<void()> Action;
     std::string Name;
@@ -473,6 +547,7 @@
 
   /// Handles retention of ASTs.
   TUScheduler::ASTCache &IdleASTs;
+  TUScheduler::PreambleStore &Preambles;
   const bool RunSync;
   /// Time to wait after an update to see whether another update obsoletes it.
   const DebouncePolicy UpdateDebounce;
@@ -574,11 +649,13 @@
 ASTWorkerHandle ASTWorker::create(PathRef FileName,
                                   const GlobalCompilationDatabase &CDB,
                                   TUScheduler::ASTCache &IdleASTs,
+                                  TUScheduler::PreambleStore &Preambles,
                                   AsyncTaskRunner *Tasks, Semaphore &Barrier,
                                   const TUScheduler::Options &Opts,
                                   ParsingCallbacks &Callbacks) {
-  std::shared_ptr<ASTWorker> Worker(new ASTWorker(
-      FileName, CDB, IdleASTs, Barrier, /*RunSync=*/!Tasks, Opts, Callbacks));
+  std::shared_ptr<ASTWorker> Worker(
+      new ASTWorker(FileName, CDB, IdleASTs, Preambles, Barrier,
+                    /*RunSync=*/!Tasks, Opts, Callbacks));
   if (Tasks) {
     Tasks->runAsync("ASTWorker:" + llvm::sys::path::filename(FileName),
                     [Worker]() { Worker->run(); });
@@ -590,13 +667,14 @@
 }
 
 ASTWorker::ASTWorker(PathRef FileName, const GlobalCompilationDatabase &CDB,
-                     TUScheduler::ASTCache &LRUCache, Semaphore &Barrier,
+                     TUScheduler::ASTCache &LRUCache,
+                     TUScheduler::PreambleStore &Preambles, Semaphore &Barrier,
                      bool RunSync, const TUScheduler::Options &Opts,
                      ParsingCallbacks &Callbacks)
-    : IdleASTs(LRUCache), RunSync(RunSync), UpdateDebounce(Opts.UpdateDebounce),
-      FileName(FileName), ContextProvider(Opts.ContextProvider), CDB(CDB),
-      Callbacks(Callbacks), Barrier(Barrier), Done(false),
-      Status(FileName, Callbacks),
+    : IdleASTs(LRUCache), Preambles(Preambles), RunSync(RunSync),
+      UpdateDebounce(Opts.UpdateDebounce), FileName(FileName),
+      ContextProvider(Opts.ContextProvider), CDB(CDB), Callbacks(Callbacks),
+      Barrier(Barrier), Done(false), Status(FileName, Callbacks),
       PreamblePeer(FileName, Callbacks, Opts.StorePreamblesInMemory, RunSync,
                    Status, *this) {
   // Set a fallback command because compile command can be accessed before
@@ -687,14 +765,18 @@
 
     PreamblePeer.update(std::move(Invocation), std::move(Inputs),
                         std::move(CompilerInvocationDiags), WantDiags);
+
     std::unique_lock<std::mutex> Lock(Mutex);
     PreambleCV.wait(Lock, [this] {
       // Block until we reiceve a preamble request, unless a preamble already
       // exists, as patching an empty preamble would imply rebuilding it from
       // scratch.
+      // It is also acceptable to unblock is we have compatible preambles
+      // that may be used to accelerate the AST build.
       // We block here instead of the consumer to prevent any deadlocks. Since
       // LatestPreamble is only populated by ASTWorker thread.
-      return LatestPreamble || !PreambleRequests.empty() || Done;
+      return LatestPreamble || !PreambleRequests.empty() ||
+             !getCompatiblePreambles().empty() || Done;
     });
     return;
   };
@@ -722,13 +804,13 @@
       vlog("ASTWorker rebuilding evicted AST to run {0}: {1} version {2}", Name,
            FileName, FileInputs.Version);
       // FIXME: We might need to build a patched ast once preamble thread starts
-      // running async. Currently getPossiblyStalePreamble below will always
+      // running async. Currently getBestPreamble below will always
       // return a compatible preamble as ASTWorker::update blocks.
       llvm::Optional<ParsedAST> NewAST;
       if (Invocation) {
         NewAST = ParsedAST::build(FileName, FileInputs, std::move(Invocation),
                                   CompilerInvocationDiagConsumer.take(),
-                                  getPossiblyStalePreamble());
+                                  getBestPreamble());
         ++ASTBuildCount;
       }
       AST = NewAST ? std::make_unique<ParsedAST>(std::move(*NewAST)) : nullptr;
@@ -808,6 +890,7 @@
       else
         LatestPreamble = std::move(Preamble);
     }
+    Preambles.push(FileName, *LatestPreamble);
     // Notify anyone waiting for a preamble.
     PreambleCV.notify_all();
     // Give up our ownership to old preamble before starting expensive AST
@@ -950,6 +1033,47 @@
   return LatestPreamble ? *LatestPreamble : nullptr;
 }
 
+std::vector<TUScheduler::PreambleStore::Entry>
+ASTWorker::getCompatiblePreambles() const {
+  auto AllPreamblesEntries = Preambles.getAll();
+  auto End = llvm::remove_if(AllPreamblesEntries, [&](const auto &Item) {
+    return !compileCommandsAreSimilar(FileInputs.CompileCommand,
+                                      Item.Preamble->CompileCommand);
+  });
+  AllPreamblesEntries.erase(End, AllPreamblesEntries.end());
+  return AllPreamblesEntries;
+}
+
+std::shared_ptr<const PreambleData> ASTWorker::getBestPreamble(
+    std::shared_ptr<const ASTSignals> *ASTSignals) const {
+  if (auto Preamble = getPossiblyStalePreamble(ASTSignals)) {
+    vlog("Best premable is the real one");
+    return Preamble;
+  }
+
+  auto CompatiblePreambles = getCompatiblePreambles();
+  vlog("Looking for the best of {0} preambles ...", CompatiblePreambles.size());
+  if (CompatiblePreambles.empty()) {
+    vlog("No compatible preanble");
+    return nullptr;
+  }
+
+  auto Best = CompatiblePreambles.begin();
+  auto BestPatch = PreamblePatch::create(FileName, FileInputs, *Best->Preamble);
+
+  for (auto It = Best + 1; It != CompatiblePreambles.end(); ++It) {
+    auto Patch = PreamblePatch::create(FileName, FileInputs, *It->Preamble);
+    if (Patch.preambleIncludes().size() > BestPatch.preambleIncludes().size()) {
+      BestPatch = std::move(Patch);
+      Best = It;
+    }
+  }
+
+  vlog("Using preamble from: {0}", Best->FileName);
+  Preambles.hit(Best->FileName);
+  return Best->Preamble;
+}
+
 void ASTWorker::waitForFirstPreamble() const {
   std::unique_lock<std::mutex> Lock(Mutex);
   PreambleCV.wait(Lock, [this] { return LatestPreamble.hasValue() || Done; });
@@ -1297,7 +1421,8 @@
                           : std::make_unique<ParsingCallbacks>()),
       Barrier(Opts.AsyncThreadsCount), QuickRunBarrier(Opts.AsyncThreadsCount),
       IdleASTs(
-          std::make_unique<ASTCache>(Opts.RetentionPolicy.MaxRetainedASTs)) {
+          std::make_unique<ASTCache>(Opts.RetentionPolicy.MaxRetainedASTs)),
+      Preambles(std::make_unique<PreambleStore>()) {
   // Avoid null checks everywhere.
   if (!Opts.ContextProvider) {
     this->Opts.ContextProvider = [](llvm::StringRef) {
@@ -1339,7 +1464,7 @@
   if (!FD) {
     // Create a new worker to process the AST-related tasks.
     ASTWorkerHandle Worker =
-        ASTWorker::create(File, CDB, *IdleASTs,
+        ASTWorker::create(File, CDB, *IdleASTs, *Preambles,
                           WorkerThreads ? WorkerThreads.getPointer() : nullptr,
                           Barrier, Opts, *Callbacks);
     FD = std::unique_ptr<FileData>(
@@ -1426,7 +1551,7 @@
     SPAN_ATTACH(Tracer, "file", File);
     std::shared_ptr<const ASTSignals> Signals;
     std::shared_ptr<const PreambleData> Preamble =
-        It->second->Worker->getPossiblyStalePreamble(&Signals);
+        It->second->Worker->getBestPreamble(&Signals);
     WithContext WithProvidedContext(Opts.ContextProvider(File));
     Action(InputsAndPreamble{It->second->Contents,
                              It->second->Worker->getCurrentCompileCommand(),
@@ -1449,7 +1574,7 @@
       Worker->waitForFirstPreamble();
     }
     std::shared_ptr<const ASTSignals> Signals;
-    Preamble = Worker->getPossiblyStalePreamble(&Signals);
+    Preamble = Worker->getBestPreamble(&Signals);
 
     std::lock_guard<Semaphore> BarrierLock(Barrier);
     WithContext Guard(std::move(Ctx));
Index: clang-tools-extra/clangd/ParsedAST.cpp
===================================================================
--- clang-tools-extra/clangd/ParsedAST.cpp
+++ clang-tools-extra/clangd/ParsedAST.cpp
@@ -236,6 +236,58 @@
   std::vector<syntax::Token> MainFileTokens;
 };
 
+llvm::Expected<MainFileMacros>
+scanMainFileMacros(llvm::StringRef Contents,
+                   const tooling::CompileCommand &Cmd) {
+  class EmptyFS : public ThreadsafeFS {
+  private:
+    llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> viewImpl() const override {
+      return new llvm::vfs::InMemoryFileSystem;
+    }
+  };
+  EmptyFS FS;
+  // Build and run Preprocessor over the preamble.
+  ParseInputs PI;
+  PI.Contents = Contents.str();
+  PI.TFS = &FS;
+  PI.CompileCommand = Cmd;
+  IgnoringDiagConsumer IgnoreDiags;
+  auto CI = buildCompilerInvocation(PI, IgnoreDiags);
+  if (!CI)
+    return error("failed to create compiler invocation");
+  CI->getDiagnosticOpts().IgnoreWarnings = true;
+  auto ContentsBuffer = llvm::MemoryBuffer::getMemBuffer(Contents);
+  // This means we're scanning (though not preprocessing) the preamble section
+  // twice. However, it's important to precisely follow the preamble bounds used
+  // elsewhere.
+  auto Bounds = ComputePreambleBounds(*CI->getLangOpts(), *ContentsBuffer, 0);
+  auto PreambleContents =
+      llvm::MemoryBuffer::getMemBufferCopy(Contents.substr(0, Bounds.Size));
+  auto Clang = prepareCompilerInstance(
+      std::move(CI), nullptr, std::move(PreambleContents),
+      // Provide an empty FS to prevent preprocessor from performing IO. This
+      // also implies missing resolved paths for includes.
+      FS.view(llvm::None), IgnoreDiags);
+  if (Clang->getFrontendOpts().Inputs.empty())
+    return error("compiler instance had no inputs");
+  // We are only interested in main file includes.
+  Clang->getPreprocessorOpts().SingleFileParseMode = true;
+  PreprocessOnlyAction Action;
+  if (!Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0]))
+    return error("failed BeginSourceFile");
+
+  MainFileMacros Macros;
+  Clang->getPreprocessor().addPPCallbacks(
+      std::make_unique<CollectMainFileMacros>(Clang->getSourceManager(),
+                                              Macros));
+
+  if (llvm::Error Err = Action.Execute())
+    return std::move(Err);
+  Action.EndSourceFile();
+
+  return Macros;
+}
+
 } // namespace
 
 llvm::Optional<ParsedAST>
@@ -393,8 +445,17 @@
   // Copy over the macros in the preamble region of the main file, and combine
   // with non-preamble macros below.
   MainFileMacros Macros;
-  if (Preamble)
+  if (Preamble && Preamble->CompileCommand.Filename == Filename) {
     Macros = Preamble->Macros;
+  }
+  else if (Preamble) {
+    auto EM = scanMainFileMacros(Inputs.Contents, Inputs.CompileCommand);
+    if (!EM) {
+      elog("Failed to scan macros of {0}: {1}", Filename, EM.takeError());
+    } else {
+      Macros = std::move(*EM);
+    }
+  }
   Clang->getPreprocessor().addPPCallbacks(
       std::make_unique<CollectMainFileMacros>(Clang->getSourceManager(),
                                               Macros));
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to