jansvoboda11 created this revision.
jansvoboda11 added reviewers: Bigcheese, benlangmuir.
Herald added a subscriber: ributzka.
Herald added a project: All.
jansvoboda11 requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

In the scanner's VFS, we cache all files by default and only avoid caching stat 
failures for certain files. This tanks the performance of scanning with 
pre-populated module cache. When there is a stale PCM file, it gets cached by 
the scanner at the start and the rebuilt version never makes it through the VFS 
again. The TU invocation that rebuilds the PCM only sees the copy in its 
InMemoryModuleCache, which is invisible to other invocations. This means the 
PCM gets rebuilt for every TU given to the scanner.

This patch fixes the situation by flipping the default, only caching files that 
are known to be important, and letting everything else fall through to the 
underlying VFS.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D146328

Files:
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
  clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp

Index: clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
===================================================================
--- clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -42,9 +42,8 @@
 }
 
 EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary(
-    const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) {
-  if (Entry.isError() || Entry.isDirectory() || Disable ||
-      !shouldScanForDirectives(Filename))
+    const CachedFileSystemEntry &Entry, StringRef Filename, PathPolicy Policy) {
+  if (Entry.isError() || Entry.isDirectory() || !(Policy & PP_ScanFile))
     return EntryRef(Filename, Entry);
 
   CachedFileContents *Contents = Entry.getCachedContents();
@@ -159,39 +158,29 @@
   return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
 }
 
-/// Whitelist file extensions that should be minimized, treating no extension as
-/// a source file that should be minimized.
+/// Whitelist file extensions that should be cached/scanned.
 ///
 /// This is kinda hacky, it would be better if we knew what kind of file Clang
 /// was expecting instead.
-static bool shouldScanForDirectivesBasedOnExtension(StringRef Filename) {
+PathPolicy clang::tooling::dependencies::getPolicy(StringRef Filename) {
   StringRef Ext = llvm::sys::path::extension(Filename);
   if (Ext.empty())
-    return true; // C++ standard library
-  return llvm::StringSwitch<bool>(Ext)
-      .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true)
-      .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true)
-      .CasesLower(".m", ".mm", true)
-      .CasesLower(".i", ".ii", ".mi", ".mmi", true)
-      .CasesLower(".def", ".inc", true)
-      .Default(false);
-}
-
-static bool shouldCacheStatFailures(StringRef Filename) {
-  StringRef Ext = llvm::sys::path::extension(Filename);
-  if (Ext.empty())
-    return false; // This may be the module cache directory.
-  // Only cache stat failures on files that are not expected to change during
-  // the build.
-  StringRef FName = llvm::sys::path::filename(Filename);
-  if (FName == "module.modulemap" || FName == "module.map")
-    return true;
-  return shouldScanForDirectivesBasedOnExtension(Filename);
-}
-
-bool DependencyScanningWorkerFilesystem::shouldScanForDirectives(
-    StringRef Filename) {
-  return shouldScanForDirectivesBasedOnExtension(Filename);
+    return (PathPolicy)(PP_CacheSuccess | PP_ScanFile);
+  return (PathPolicy)llvm::StringSwitch<int>(Ext)
+      // Sources:
+      .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", PP_Cache | PP_ScanFile)
+      .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", PP_Cache | PP_ScanFile)
+      .CasesLower(".m", ".mm", PP_Cache | PP_ScanFile)
+      .CasesLower(".i", ".ii", ".mi", ".mmi", PP_Cache | PP_ScanFile)
+      .CasesLower(".def", ".inc", PP_Cache | PP_ScanFile)
+      // Module maps:
+      .CasesLower(".modulemap", ".map", PP_Cache)
+      // Frameworks:
+      .CaseLower(".framework", PP_Cache)
+      // VFS overlays, header maps:
+      .CasesLower(".yaml", ".json", ".hmap", PP_Cache)
+      // Everything else (PCM, binary files, ".noindex" module cache, ...):
+      .Default(PP_FallThrough);
 }
 
 const CachedFileSystemEntry &
@@ -215,10 +204,11 @@
 }
 
 llvm::ErrorOr<const CachedFileSystemEntry &>
-DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) {
+DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename,
+                                                          PathPolicy Policy) {
   llvm::ErrorOr<llvm::vfs::Status> Stat = getUnderlyingFS().status(Filename);
   if (!Stat) {
-    if (!shouldCacheStatFailures(Filename))
+    if (!(Policy & PP_CacheFailure))
       return Stat.getError();
     const auto &Entry =
         getOrEmplaceSharedEntryForFilename(Filename, Stat.getError());
@@ -244,16 +234,13 @@
 
 llvm::ErrorOr<EntryRef>
 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
-    StringRef Filename, bool DisableDirectivesScanning) {
+    StringRef Filename, PathPolicy Policy) {
   if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename))
-    return scanForDirectivesIfNecessary(*Entry, Filename,
-                                        DisableDirectivesScanning)
-        .unwrapError();
-  auto MaybeEntry = computeAndStoreResult(Filename);
+    return scanForDirectivesIfNecessary(*Entry, Filename, Policy).unwrapError();
+  auto MaybeEntry = computeAndStoreResult(Filename, Policy);
   if (!MaybeEntry)
     return MaybeEntry.getError();
-  return scanForDirectivesIfNecessary(*MaybeEntry, Filename,
-                                      DisableDirectivesScanning)
+  return scanForDirectivesIfNecessary(*MaybeEntry, Filename, Policy)
       .unwrapError();
 }
 
@@ -261,8 +248,11 @@
 DependencyScanningWorkerFilesystem::status(const Twine &Path) {
   SmallString<256> OwnedFilename;
   StringRef Filename = Path.toStringRef(OwnedFilename);
+  PathPolicy Policy = getPolicy(Filename);
+  if (Policy == PP_FallThrough)
+    return getUnderlyingFS().status(Path);
 
-  llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
+  llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename, Policy);
   if (!Result)
     return Result.getError();
   return Result->getStatus();
@@ -318,8 +308,11 @@
 DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
   SmallString<256> OwnedFilename;
   StringRef Filename = Path.toStringRef(OwnedFilename);
+  PathPolicy Policy = getPolicy(Filename);
+  if (Policy == PP_FallThrough)
+    return getUnderlyingFS().openFileForRead(Path);
 
-  llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
+  llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename, Policy);
   if (!Result)
     return Result.getError();
   return DepScanFile::create(Result.get());
Index: clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
===================================================================
--- clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -269,6 +269,17 @@
   }
 };
 
+enum PathPolicy {
+  PP_FallThrough = 0,  // Skip caching/scanning this file and fall through.
+  PP_CacheSuccess = 1, // Cache only successful stats/reads.
+  PP_CacheFailure = 2, // Cache even unsuccessful stats/reads.
+  PP_ScanFile = 4,     // Scan dependency directives for a file.
+                       // Only allowed when PP_CacheSuccess is enabled.
+  PP_Cache = PP_CacheSuccess | PP_CacheFailure, // Cache all stats/reads.
+};
+
+PathPolicy getPolicy(StringRef Filename);
+
 /// A virtual file system optimized for the dependency discovery.
 ///
 /// It is primarily designed to work with source files whose contents was
@@ -293,24 +304,25 @@
   ///
   /// Attempts to use the local and shared caches first, then falls back to
   /// using the underlying filesystem.
-  llvm::ErrorOr<EntryRef>
-  getOrCreateFileSystemEntry(StringRef Filename,
-                             bool DisableDirectivesScanning = false);
+  llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename) {
+    return getOrCreateFileSystemEntry(Filename, getPolicy(Filename));
+  }
 
 private:
-  /// Check whether the file should be scanned for preprocessor directives.
-  bool shouldScanForDirectives(StringRef Filename);
+  /// Same as the public version, but with explicit PathPolicy parameter.
+  llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename,
+                                                     PathPolicy Policy);
 
   /// For a filename that's not yet associated with any entry in the caches,
   /// uses the underlying filesystem to either look up the entry based in the
   /// shared cache indexed by unique ID, or creates new entry from scratch.
   llvm::ErrorOr<const CachedFileSystemEntry &>
-  computeAndStoreResult(StringRef Filename);
+  computeAndStoreResult(StringRef Filename, PathPolicy Policy);
 
   /// Scan for preprocessor directives for the given entry if necessary and
   /// returns a wrapper object with reference semantics.
   EntryRef scanForDirectivesIfNecessary(const CachedFileSystemEntry &Entry,
-                                        StringRef Filename, bool Disable);
+                                        StringRef Filename, PathPolicy Policy);
 
   /// Represents a filesystem entry that has been stat-ed (and potentially read)
   /// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to