arphaman updated this revision to Diff 213664.
arphaman marked 6 inline comments as done.
arphaman added a comment.

Address review comments.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63907/new/

https://reviews.llvm.org/D63907

Files:
  clang/include/clang/Basic/FileManager.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
  clang/lib/Tooling/DependencyScanning/CMakeLists.txt
  clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
  clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
  clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
  
clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/Headers/Framework.h
  
clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/PrivateHeaders/PrivateHeader.h
  clang/test/ClangScanDeps/Inputs/header_stat_before_open_cdb.json
  clang/test/ClangScanDeps/Inputs/vfsoverlay.yaml
  clang/test/ClangScanDeps/Inputs/vfsoverlay_cdb.json
  clang/test/ClangScanDeps/header_stat_before_open.m
  clang/test/ClangScanDeps/regular_cdb.cpp
  clang/test/ClangScanDeps/vfsoverlay.cpp
  clang/tools/clang-scan-deps/ClangScanDeps.cpp

Index: clang/tools/clang-scan-deps/ClangScanDeps.cpp
===================================================================
--- clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -8,6 +8,7 @@
 
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
 #include "clang/Tooling/JSONCompilationDatabase.h"
 #include "llvm/Support/InitLLVM.h"
@@ -45,9 +46,10 @@
   ///
   /// \param Compilations     The reference to the compilation database that's
   /// used by the clang tool.
-  DependencyScanningTool(const tooling::CompilationDatabase &Compilations,
+  DependencyScanningTool(DependencyScanningService &Service,
+                         const tooling::CompilationDatabase &Compilations,
                          SharedStream &OS, SharedStream &Errs)
-      : Compilations(Compilations), OS(OS), Errs(Errs) {}
+      : Worker(Service), Compilations(Compilations), OS(OS), Errs(Errs) {}
 
   /// Computes the dependencies for the given file and prints them out.
   ///
@@ -80,6 +82,20 @@
 
 llvm::cl::OptionCategory DependencyScannerCategory("Tool options");
 
+static llvm::cl::opt<ScanningMode> ScanMode(
+    "mode",
+    llvm::cl::desc("The preprocessing mode used to compute the dependencies"),
+    llvm::cl::values(
+        clEnumValN(ScanningMode::MinimizedSourcePreprocessing,
+                   "preprocess-minimized-sources",
+                   "The set of dependencies is computed by preprocessing the "
+                   "source files that were minimized to only include the "
+                   "contents that might affect the dependencies"),
+        clEnumValN(ScanningMode::CanonicalPreprocessing, "preprocess",
+                   "The set of dependencies is computed by preprocessing the "
+                   "unmodified source files")),
+    llvm::cl::init(ScanningMode::MinimizedSourcePreprocessing));
+
 llvm::cl::opt<unsigned>
     NumThreads("j", llvm::cl::Optional,
                llvm::cl::desc("Number of worker threads to use (default: use "
@@ -136,12 +152,14 @@
   SharedStream Errs(llvm::errs());
   // Print out the dependency results to STDOUT by default.
   SharedStream DependencyOS(llvm::outs());
+
+  DependencyScanningService Service(ScanMode);
   unsigned NumWorkers =
       NumThreads == 0 ? llvm::hardware_concurrency() : NumThreads;
   std::vector<std::unique_ptr<DependencyScanningTool>> WorkerTools;
   for (unsigned I = 0; I < NumWorkers; ++I)
     WorkerTools.push_back(llvm::make_unique<DependencyScanningTool>(
-        *AdjustingCompilations, DependencyOS, Errs));
+        Service, *AdjustingCompilations, DependencyOS, Errs));
 
   std::vector<std::thread> WorkerThreads;
   std::atomic<bool> HadErrors(false);
Index: clang/test/ClangScanDeps/vfsoverlay.cpp
===================================================================
--- /dev/null
+++ clang/test/ClangScanDeps/vfsoverlay.cpp
@@ -0,0 +1,17 @@
+// RUN: rm -rf %t.dir
+// RUN: rm -rf %t.cdb
+// RUN: mkdir -p %t.dir
+// RUN: cp %s %t.dir/vfsoverlay.cpp
+// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/vfsoverlay.yaml > %t.dir/vfsoverlay.yaml
+// RUN: mkdir %t.dir/Inputs
+// RUN: cp %S/Inputs/header.h %t.dir/Inputs/header.h
+// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/vfsoverlay_cdb.json > %t.cdb
+//
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 | \
+// RUN:   FileCheck %s
+
+#include "not_real.h"
+
+// CHECK: clang-scan-deps dependency
+// CHECK-NEXT: vfsoverlay.cpp
+// CHECK-NEXT: Inputs{{/|\\}}header.h
Index: clang/test/ClangScanDeps/regular_cdb.cpp
===================================================================
--- clang/test/ClangScanDeps/regular_cdb.cpp
+++ clang/test/ClangScanDeps/regular_cdb.cpp
@@ -8,7 +8,9 @@
 // RUN: cp %S/Inputs/header2.h %t.dir/Inputs/header2.h
 // RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/regular_cdb.json > %t.cdb
 //
-// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 | \
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-minimized-sources | \
+// RUN:   FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO %s
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess | \
 // RUN:   FileCheck --check-prefixes=CHECK1,CHECK2,CHECK2NO %s
 //
 // Make sure we didn't produce any dependency files!
@@ -20,9 +22,13 @@
 // as it might fail if the results for `regular_cdb.cpp` are reported before
 // `regular_cdb2.cpp`.
 //
-// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 | \
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-minimized-sources | \
+// RUN:   FileCheck --check-prefix=CHECK1 %s
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess | \
 // RUN:   FileCheck --check-prefix=CHECK1 %s
-// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 | \
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess-minimized-sources | \
+// RUN:   FileCheck --check-prefix=CHECK2 %s
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 2 -mode preprocess | \
 // RUN:   FileCheck --check-prefix=CHECK2 %s
 
 #include "header.h"
Index: clang/test/ClangScanDeps/header_stat_before_open.m
===================================================================
--- /dev/null
+++ clang/test/ClangScanDeps/header_stat_before_open.m
@@ -0,0 +1,18 @@
+// RUN: rm -rf %t.dir
+// RUN: rm -rf %t.cdb
+// RUN: mkdir -p %t.dir
+// RUN: cp %s %t.dir/header_stat_before_open.m
+// RUN: mkdir %t.dir/Inputs
+// RUN: cp -R %S/Inputs/frameworks %t.dir/Inputs/frameworks
+// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/header_stat_before_open_cdb.json > %t.cdb
+//
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 | \
+// RUN:   FileCheck %s
+
+#include "Framework/Framework.h"
+#include "Framework/PrivateHeader.h"
+
+// CHECK: clang-scan-deps dependency
+// CHECK-NEXT: header_stat_before_open.m
+// CHECK-NEXT: Inputs/frameworks/Framework.framework/Headers/Framework.h
+// CHECK-NEXT: Inputs/frameworks/Framework.framework/PrivateHeaders/PrivateHeader.h
Index: clang/test/ClangScanDeps/Inputs/vfsoverlay_cdb.json
===================================================================
--- /dev/null
+++ clang/test/ClangScanDeps/Inputs/vfsoverlay_cdb.json
@@ -0,0 +1,7 @@
+[
+{
+  "directory": "DIR",
+  "command": "clang -E DIR/vfsoverlay.cpp -IInputs -ivfsoverlay DIR/vfsoverlay.yaml",
+  "file": "DIR/vfsoverlay.cpp"
+}
+]
Index: clang/test/ClangScanDeps/Inputs/vfsoverlay.yaml
===================================================================
--- /dev/null
+++ clang/test/ClangScanDeps/Inputs/vfsoverlay.yaml
@@ -0,0 +1,12 @@
+{
+  'version': 0,
+  'roots': [
+    { 'name': 'DIR', 'type': 'directory',
+      'contents': [
+        { 'name': 'not_real.h', 'type': 'file',
+          'external-contents': 'DIR/Inputs/header.h'
+        }
+      ]
+    }
+  ]
+}
Index: clang/test/ClangScanDeps/Inputs/header_stat_before_open_cdb.json
===================================================================
--- /dev/null
+++ clang/test/ClangScanDeps/Inputs/header_stat_before_open_cdb.json
@@ -0,0 +1,7 @@
+[
+{
+  "directory": "DIR",
+  "command": "clang -E DIR/header_stat_before_open.m -iframework Inputs/frameworks",
+  "file": "DIR/header_stat_before_open.m"
+}
+]
Index: clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/PrivateHeaders/PrivateHeader.h
===================================================================
--- /dev/null
+++ clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/PrivateHeaders/PrivateHeader.h
@@ -0,0 +1,2 @@
+// This comment is stripped when file is opened, so size will change
+#define PRIV 0
Index: clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/Headers/Framework.h
===================================================================
--- /dev/null
+++ clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/Headers/Framework.h
@@ -0,0 +1,2 @@
+// This comment is stripped, so size is changed when file is opened
+#define FRAMEWORK 0
Index: clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
===================================================================
--- clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -8,9 +8,11 @@
 
 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
 #include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/CompilerInvocation.h"
 #include "clang/Frontend/FrontendActions.h"
 #include "clang/Frontend/TextDiagnosticPrinter.h"
 #include "clang/Frontend/Utils.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
 #include "clang/Tooling/Tooling.h"
 
 using namespace clang;
@@ -62,10 +64,12 @@
 /// dependency scanning for the given compiler invocation.
 class DependencyScanningAction : public tooling::ToolAction {
 public:
-  DependencyScanningAction(StringRef WorkingDirectory,
-                           std::string &DependencyFileContents)
+  DependencyScanningAction(
+      StringRef WorkingDirectory, std::string &DependencyFileContents,
+      llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS)
       : WorkingDirectory(WorkingDirectory),
-        DependencyFileContents(DependencyFileContents) {}
+        DependencyFileContents(DependencyFileContents),
+        DepFS(std::move(DepFS)) {}
 
   bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation,
                      FileManager *FileMgr,
@@ -74,8 +78,6 @@
     // Create a compiler instance to handle the actual work.
     CompilerInstance Compiler(std::move(PCHContainerOps));
     Compiler.setInvocation(std::move(Invocation));
-    FileMgr->getFileSystemOpts().WorkingDir = WorkingDirectory;
-    Compiler.setFileManager(FileMgr);
 
     // Don't print 'X warnings and Y errors generated'.
     Compiler.getDiagnosticOpts().ShowCarets = false;
@@ -84,6 +86,27 @@
     if (!Compiler.hasDiagnostics())
       return false;
 
+    // Use the dependency scanning optimized file system if we can.
+    if (DepFS) {
+      // FIXME: Purge the symlink entries from the stat cache in the FM.
+      const CompilerInvocation &CI = Compiler.getInvocation();
+      // Add any filenames that were explicity passed in the build settings and
+      // that might be opened, as we want to ensure we don't run source
+      // minimization on them.
+      DepFS->IgnoredFiles.clear();
+      for (const auto &Entry : CI.getHeaderSearchOpts().UserEntries)
+        DepFS->IgnoredFiles.insert(Entry.Path);
+      for (const auto &Entry : CI.getHeaderSearchOpts().VFSOverlayFiles)
+        DepFS->IgnoredFiles.insert(Entry);
+
+      // Support for virtual file system overlays on top of the caching
+      // filesystem.
+      FileMgr->setVirtualFileSystem(createVFSFromCompilerInvocation(
+          CI, Compiler.getDiagnostics(), DepFS));
+    }
+
+    FileMgr->getFileSystemOpts().WorkingDir = WorkingDirectory;
+    Compiler.setFileManager(FileMgr);
     Compiler.createSourceManager(*FileMgr);
 
     // Create the dependency collector that will collect the produced
@@ -103,7 +126,8 @@
 
     auto Action = llvm::make_unique<PreprocessOnlyAction>();
     const bool Result = Compiler.ExecuteAction(*Action);
-    FileMgr->clearStatCache();
+    if (!DepFS)
+      FileMgr->clearStatCache();
     return Result;
   }
 
@@ -111,16 +135,19 @@
   StringRef WorkingDirectory;
   /// The dependency file will be written to this string.
   std::string &DependencyFileContents;
+  llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS;
 };
 
 } // end anonymous namespace
 
-DependencyScanningWorker::DependencyScanningWorker() {
+DependencyScanningWorker::DependencyScanningWorker(
+    DependencyScanningService &Service) {
   DiagOpts = new DiagnosticOptions();
   PCHContainerOps = std::make_shared<PCHContainerOperations>();
-  /// FIXME: Use the shared file system from the service for fast scanning
-  /// mode.
-  WorkerFS = new ProxyFileSystemWithoutChdir(llvm::vfs::getRealFileSystem());
+  RealFS = new ProxyFileSystemWithoutChdir(llvm::vfs::getRealFileSystem());
+  if (Service.getMode() == ScanningMode::MinimizedSourcePreprocessing)
+    DepFS = new DependencyScanningWorkerFilesystem(Service.getSharedCache(),
+                                                   RealFS);
 }
 
 llvm::Expected<std::string>
@@ -133,14 +160,17 @@
   llvm::raw_string_ostream DiagnosticsOS(DiagnosticOutput);
   TextDiagnosticPrinter DiagPrinter(DiagnosticsOS, DiagOpts.get());
 
-  WorkerFS->setCurrentWorkingDirectory(WorkingDirectory);
-  tooling::ClangTool Tool(CDB, Input, PCHContainerOps, WorkerFS);
+  RealFS->setCurrentWorkingDirectory(WorkingDirectory);
+  /// Create the tool that uses the underlying file system to ensure that any
+  /// file system requests that are made by the driver do not go through the
+  /// dependency scanning filesystem.
+  tooling::ClangTool Tool(CDB, Input, PCHContainerOps, RealFS);
   Tool.clearArgumentsAdjusters();
   Tool.setRestoreWorkingDir(false);
   Tool.setPrintErrorMessage(false);
   Tool.setDiagnosticConsumer(&DiagPrinter);
   std::string Output;
-  DependencyScanningAction Action(WorkingDirectory, Output);
+  DependencyScanningAction Action(WorkingDirectory, Output, DepFS);
   if (Tool.run(&Action)) {
     return llvm::make_error<llvm::StringError>(DiagnosticsOS.str(),
                                                llvm::inconvertibleErrorCode());
Index: clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
===================================================================
--- /dev/null
+++ clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
@@ -0,0 +1,16 @@
+//===- DependencyScanningService.cpp - clang-scan-deps service ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
+
+using namespace clang;
+using namespace tooling;
+using namespace dependencies;
+
+DependencyScanningService::DependencyScanningService(ScanningMode Mode)
+    : Mode(Mode) {}
Index: clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
===================================================================
--- /dev/null
+++ clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -0,0 +1,218 @@
+//===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
+#include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Threading.h"
+
+using namespace clang;
+using namespace tooling;
+using namespace dependencies;
+
+CachedFileSystemEntry CachedFileSystemEntry::createFileEntry(
+    StringRef Filename, llvm::vfs::FileSystem &FS, bool Minimize) {
+  // Load the file and its content from the file system.
+  llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MaybeFile =
+      FS.openFileForRead(Filename);
+  if (!MaybeFile)
+    return MaybeFile.getError();
+  llvm::ErrorOr<llvm::vfs::Status> Stat = (*MaybeFile)->status();
+  if (!Stat)
+    return Stat.getError();
+
+  llvm::vfs::File &F = **MaybeFile;
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MaybeBuffer =
+      F.getBuffer(Stat->getName());
+  if (!MaybeBuffer)
+    return MaybeBuffer.getError();
+
+  llvm::SmallString<1024> MinimizedFileContents;
+  // Minimize the file down to directives that might affect the dependencies.
+  const auto &Buffer = *MaybeBuffer;
+  SmallVector<minimize_source_to_dependency_directives::Token, 64> Tokens;
+  if (!Minimize || minimizeSourceToDependencyDirectives(
+                       Buffer->getBuffer(), MinimizedFileContents, Tokens)) {
+    // Use the original file unless requested otherwise, or
+    // if the minimization failed.
+    // FIXME: Propage the diagnostic if desired by the client.
+    CachedFileSystemEntry Result;
+    Result.MaybeStat = std::move(*Stat);
+    Result.Contents.reserve(Buffer->getBufferSize() + 1);
+    Result.Contents.append(Buffer->getBufferStart(), Buffer->getBufferEnd());
+    // Implicitly null terminate the contents for Clang's lexer.
+    Result.Contents.push_back('\0');
+    Result.Contents.pop_back();
+    return Result;
+  }
+
+  CachedFileSystemEntry Result;
+  size_t Size = MinimizedFileContents.size();
+  Result.MaybeStat = llvm::vfs::Status(Stat->getName(), Stat->getUniqueID(),
+                                       Stat->getLastModificationTime(),
+                                       Stat->getUser(), Stat->getGroup(), Size,
+                                       Stat->getType(), Stat->getPermissions());
+  // The contents produced by the minimizer must be null terminated.
+  assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' &&
+         "not null terminated contents");
+  // Even though there's an implicit null terminator in the minimized contents,
+  // we want to temporarily make it explicit. This will ensure that the
+  // std::move will preserve it even if it needs to do a copy if the
+  // SmallString still has the small capacity.
+  MinimizedFileContents.push_back('\0');
+  Result.Contents = std::move(MinimizedFileContents);
+  // Now make the null terminator implicit again, so that Clang's lexer can find
+  // it right where the buffer ends.
+  Result.Contents.pop_back();
+  return Result;
+}
+
+CachedFileSystemEntry
+CachedFileSystemEntry::createDirectoryEntry(llvm::vfs::Status Stat) {
+  assert(Stat.isDirectory() && "not a directory!");
+  auto Result = CachedFileSystemEntry();
+  Result.MaybeStat = std::move(Stat);
+  return Result;
+}
+
+DependencyScanningFilesystemSharedCache::
+    DependencyScanningFilesystemSharedCache() {
+  // This heuristic was chosen using a empirical testing on a
+  // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
+  // sharding gives a performance edge by reducing the lock contention.
+  // FIXME: A better heuristic might also consider the OS to account for
+  // the different cost of lock contention on different OSes.
+  NumShards = std::max(2u, llvm::hardware_concurrency() / 4);
+  CacheShards = llvm::make_unique<CacheShard[]>(NumShards);
+}
+
+/// Returns a cache entry for the corresponding key.
+///
+/// A new cache entry is created if the key is not in the cache. This is a
+/// thread safe call.
+DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &
+DependencyScanningFilesystemSharedCache::get(StringRef Key) {
+  CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards];
+  std::unique_lock<std::mutex> LockGuard(Shard.CacheLock);
+  auto It = Shard.Cache.try_emplace(Key);
+  return It.first->getValue();
+}
+
+llvm::ErrorOr<llvm::vfs::Status>
+DependencyScanningWorkerFilesystem::status(const Twine &Path) {
+  SmallString<256> OwnedFilename;
+  StringRef Filename = Path.toStringRef(OwnedFilename);
+
+  // Check the local cache first.
+  if (const CachedFileSystemEntry *Entry = getCachedEntry(Filename))
+    return Entry->getStatus();
+
+  // FIXME: Handle PCM/PCH files.
+  // FIXME: Handle module map files.
+
+  bool KeepOriginalSource = IgnoredFiles.count(Filename);
+  DependencyScanningFilesystemSharedCache::SharedFileSystemEntry
+      &SharedCacheEntry = SharedCache.get(Filename);
+  const CachedFileSystemEntry *Result;
+  {
+    std::unique_lock<std::mutex> LockGuard(SharedCacheEntry.ValueLock);
+    CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value;
+
+    if (!CacheEntry.isValid()) {
+      llvm::vfs::FileSystem &FS = getUnderlyingFS();
+      auto MaybeStatus = FS.status(Filename);
+      if (!MaybeStatus)
+        CacheEntry = CachedFileSystemEntry(MaybeStatus.getError());
+      else if (MaybeStatus->isDirectory())
+        CacheEntry = CachedFileSystemEntry::createDirectoryEntry(
+            std::move(*MaybeStatus));
+      else
+        CacheEntry = CachedFileSystemEntry::createFileEntry(
+            Filename, FS, !KeepOriginalSource);
+    }
+
+    Result = &CacheEntry;
+  }
+
+  // Store the result in the local cache.
+  setCachedEntry(Filename, Result);
+  return Result->getStatus();
+}
+
+namespace {
+
+/// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
+/// this subclass.
+class MinimizedVFSFile final : public llvm::vfs::File {
+public:
+  MinimizedVFSFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,
+                   llvm::vfs::Status Stat)
+      : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
+
+  llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
+
+  const llvm::MemoryBuffer *getBufferPtr() const { return Buffer.get(); }
+
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
+  getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
+            bool IsVolatile) override {
+    return std::move(Buffer);
+  }
+
+  std::error_code close() override { return {}; }
+
+private:
+  std::unique_ptr<llvm::MemoryBuffer> Buffer;
+  llvm::vfs::Status Stat;
+};
+
+llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
+createFile(const CachedFileSystemEntry *Entry) {
+  llvm::ErrorOr<StringRef> Contents = Entry->getContents();
+  if (!Contents)
+    return Contents.getError();
+  return llvm::make_unique<MinimizedVFSFile>(
+      llvm::MemoryBuffer::getMemBuffer(*Contents, Entry->getName(),
+                                       /*RequiresNullTerminator=*/false),
+      *Entry->getStatus());
+}
+
+} // end anonymous namespace
+
+llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
+DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
+  SmallString<256> OwnedFilename;
+  StringRef Filename = Path.toStringRef(OwnedFilename);
+
+  // Check the local cache first.
+  if (const CachedFileSystemEntry *Entry = getCachedEntry(Filename))
+    return createFile(Entry);
+
+  // FIXME: Handle PCM/PCH files.
+  // FIXME: Handle module map files.
+
+  bool KeepOriginalSource = IgnoredFiles.count(Filename);
+  DependencyScanningFilesystemSharedCache::SharedFileSystemEntry
+      &SharedCacheEntry = SharedCache.get(Filename);
+  const CachedFileSystemEntry *Result;
+  {
+    std::unique_lock<std::mutex> LockGuard(SharedCacheEntry.ValueLock);
+    CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value;
+
+    if (!CacheEntry.isValid()) {
+      CacheEntry = CachedFileSystemEntry::createFileEntry(
+          Filename, getUnderlyingFS(), !KeepOriginalSource);
+    }
+
+    Result = &CacheEntry;
+  }
+
+  // Store the result in the local cache.
+  setCachedEntry(Filename, Result);
+  return createFile(Result);
+}
Index: clang/lib/Tooling/DependencyScanning/CMakeLists.txt
===================================================================
--- clang/lib/Tooling/DependencyScanning/CMakeLists.txt
+++ clang/lib/Tooling/DependencyScanning/CMakeLists.txt
@@ -4,6 +4,8 @@
   )
 
 add_clang_library(clangDependencyScanning
+  DependencyScanningFilesystem.cpp
+  DependencyScanningService.cpp
   DependencyScanningWorker.cpp
 
   DEPENDS
Index: clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
===================================================================
--- clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
+++ clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
@@ -10,6 +10,7 @@
 #define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_WORKER_H
 
 #include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Frontend/PCHContainerOperations.h"
 #include "clang/Tooling/CompilationDatabase.h"
@@ -21,6 +22,9 @@
 namespace tooling {
 namespace dependencies {
 
+class DependencyScanningService;
+class DependencyScanningWorkerFilesystem;
+
 /// An individual dependency scanning worker that is able to run on its own
 /// thread.
 ///
@@ -29,7 +33,7 @@
 /// using the regular processing run.
 class DependencyScanningWorker {
 public:
-  DependencyScanningWorker();
+  DependencyScanningWorker(DependencyScanningService &Service);
 
   /// Print out the dependency information into a string using the dependency
   /// file format that is specified in the options (-MD is the default) and
@@ -45,10 +49,11 @@
   IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts;
   std::shared_ptr<PCHContainerOperations> PCHContainerOps;
 
+  llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> RealFS;
   /// The file system that is used by each worker when scanning for
   /// dependencies. This filesystem persists accross multiple compiler
   /// invocations.
-  llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> WorkerFS;
+  llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS;
 };
 
 } // end namespace dependencies
Index: clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
===================================================================
--- /dev/null
+++ clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
@@ -0,0 +1,55 @@
+//===- DependencyScanningService.h - clang-scan-deps service ===-*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_SERVICE_H
+#define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_SERVICE_H
+
+#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
+
+namespace clang {
+namespace tooling {
+namespace dependencies {
+
+/// The mode in which the dependency scanner will operate to find the
+/// dependencies.
+enum class ScanningMode {
+  /// This mode is used to compute the dependencies by running the preprocessor
+  /// over
+  /// the unmodified source files.
+  CanonicalPreprocessing,
+
+  /// This mode is used to compute the dependencies by running the preprocessor
+  /// over
+  /// the source files that have been minimized to contents that might affect
+  /// the dependencies.
+  MinimizedSourcePreprocessing
+};
+
+/// The dependency scanning service contains the shared state that is used by
+/// the invidual dependency scanning workers.
+class DependencyScanningService {
+public:
+  DependencyScanningService(ScanningMode Mode);
+
+  ScanningMode getMode() const { return Mode; }
+
+  DependencyScanningFilesystemSharedCache &getSharedCache() {
+    return SharedCache;
+  }
+
+private:
+  const ScanningMode Mode;
+  /// The global file system cache.
+  DependencyScanningFilesystemSharedCache SharedCache;
+};
+
+} // end namespace dependencies
+} // end namespace tooling
+} // end namespace clang
+
+#endif // LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_SERVICE_H
Index: clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
===================================================================
--- /dev/null
+++ clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -0,0 +1,168 @@
+//===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_FILESYSTEM_H
+#define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_FILESYSTEM_H
+
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include <mutex>
+
+namespace clang {
+namespace tooling {
+namespace dependencies {
+
+/// An in-memory representation of a file system entity that is of interest to
+/// the dependency scanning filesystem.
+///
+/// It represents one of the following:
+/// - an opened source file with minimized contents and a stat value.
+/// - an opened source file with original contents and a stat value.
+/// - a directory entry with its stat value.
+/// - an error value to represent a file system error.
+/// - a placeholder with an invalid stat indicating a not yet initialized entry.
+class CachedFileSystemEntry {
+public:
+  /// Default constructor creates an entry with an invalid stat.
+  CachedFileSystemEntry() : MaybeStat(llvm::vfs::Status()) {}
+
+  CachedFileSystemEntry(std::error_code Error) : MaybeStat(std::move(Error)) {}
+
+  /// Create an entry that represents an opened source file with minimized or
+  /// original contents.
+  ///
+  /// The filesystem opens the file even for `stat` calls open to avoid the
+  /// issues with stat + open of minimized files that might lead to a
+  /// mismatching size of the file. If file is not minimized, the full file is
+  /// read and copied into memory to ensure that it's not memory mapped to avoid
+  /// running out of file descriptors.
+  static CachedFileSystemEntry createFileEntry(StringRef Filename,
+                                               llvm::vfs::FileSystem &FS,
+                                               bool Minimize = true);
+
+  /// Create an entry that represents a directory on the filesystem.
+  static CachedFileSystemEntry createDirectoryEntry(llvm::vfs::Status Stat);
+
+  /// \returns True if the entry is valid.
+  bool isValid() const { return !MaybeStat || MaybeStat->isStatusKnown(); }
+
+  /// \returns The error or the file's contents.
+  llvm::ErrorOr<StringRef> getContents() const {
+    if (!MaybeStat)
+      return MaybeStat.getError();
+    assert(!MaybeStat->isDirectory() && "not a file");
+    assert(isValid() && "not initialized");
+    return StringRef(Contents);
+  }
+
+  /// \returns The error or the status of the entry.
+  llvm::ErrorOr<llvm::vfs::Status> getStatus() const {
+    assert(isValid() && "not initialized");
+    return MaybeStat;
+  }
+
+  /// \returns the name of the file.
+  StringRef getName() const {
+    assert(isValid() && "not initialized");
+    return MaybeStat->getName();
+  }
+
+  CachedFileSystemEntry(CachedFileSystemEntry &&) = default;
+  CachedFileSystemEntry &operator=(CachedFileSystemEntry &&) = default;
+
+  CachedFileSystemEntry(const CachedFileSystemEntry &) = delete;
+  CachedFileSystemEntry &operator=(const CachedFileSystemEntry &) = delete;
+
+private:
+  llvm::ErrorOr<llvm::vfs::Status> MaybeStat;
+  // Store the contents in a small string to allow a
+  // move from the small string for the minimized contents.
+  // Note: small size of 1 allows us to store an empty string with an implicit
+  // null terminator without any allocations.
+  llvm::SmallString<1> Contents;
+};
+
+/// This class is a shared cache, that caches the 'stat' and 'open' calls to the
+/// underlying real file system.
+///
+/// It is sharded based on the hash of the key to reduce the lock contention for
+/// the worker threads.
+class DependencyScanningFilesystemSharedCache {
+public:
+  struct SharedFileSystemEntry {
+    std::mutex ValueLock;
+    CachedFileSystemEntry Value;
+  };
+
+  DependencyScanningFilesystemSharedCache();
+
+  /// Returns a cache entry for the corresponding key.
+  ///
+  /// A new cache entry is created if the key is not in the cache. This is a
+  /// thread safe call.
+  SharedFileSystemEntry &get(StringRef Key);
+
+private:
+  struct CacheShard {
+    std::mutex CacheLock;
+    llvm::StringMap<SharedFileSystemEntry, llvm::BumpPtrAllocator> Cache;
+  };
+  std::unique_ptr<CacheShard[]> CacheShards;
+  unsigned NumShards;
+};
+
+/// A virtual file system optimized for the dependency discovery.
+///
+/// It is primarily designed to work with source files whose contents was was
+/// preprocessed to remove any tokens that are unlikely to affect the dependency
+/// computation.
+///
+/// This is not a thread safe VFS. A single instance is meant to be used only in
+/// one thread. Multiple instances are allowed to service multiple threads
+/// running in parallel.
+class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
+public:
+  DependencyScanningWorkerFilesystem(
+      DependencyScanningFilesystemSharedCache &SharedCache,
+      IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
+      : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache) {}
+
+  llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override;
+  llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
+  openFileForRead(const Twine &Path) override;
+
+  /// The set of files that should not be minimized.
+  llvm::StringSet<> IgnoredFiles;
+
+private:
+  void setCachedEntry(StringRef Filename, const CachedFileSystemEntry *Entry) {
+    bool IsInserted = Cache.try_emplace(Filename, Entry).second;
+    (void)IsInserted;
+    assert(IsInserted && "local cache is updated more than once");
+  }
+
+  const CachedFileSystemEntry *getCachedEntry(StringRef Filename) {
+    auto It = Cache.find(Filename);
+    return It == Cache.end() ? nullptr : It->getValue();
+  }
+
+  DependencyScanningFilesystemSharedCache &SharedCache;
+  /// The local cache is used by the worker thread to cache file system queries
+  /// locally instead of querying the global cache every time.
+  llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
+};
+
+} // end namespace dependencies
+} // end namespace tooling
+} // end namespace clang
+
+#endif // LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_FILESYSTEM_H
Index: clang/include/clang/Basic/FileManager.h
===================================================================
--- clang/include/clang/Basic/FileManager.h
+++ clang/include/clang/Basic/FileManager.h
@@ -231,6 +231,10 @@
 
   llvm::vfs::FileSystem &getVirtualFileSystem() const { return *FS; }
 
+  void setVirtualFileSystem(IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) {
+    this->FS = std::move(FS);
+  }
+
   /// Retrieve a file entry for a "virtual" file that acts as
   /// if there were a file with the given name on disk.
   ///
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to