https://github.com/Bigcheese created https://github.com/llvm/llvm-project/pull/73734
`-ivfsoverlay` files are unused when building most modules. Enable removing them by, * adding a way to visit the filesystem tree with extensible RTTI to access each `RedirectingFileSystem`. * Adding tracking to `RedirectingFileSystem` to record when it actually redirects a file access. * Storing this information in each PCM. Usage tracking is disabled during implicit modulemap search as this ends up touching a lot of files that aren't actually used. The used files are later touched by other parts of Clang so relevant VFS overlays get marked as used. >From 25f3b9202654aa007b5a7fd2b82a93ef8521003c Mon Sep 17 00:00:00 2001 From: Michael Spencer <michael_spen...@apple.com> Date: Fri, 24 Feb 2023 17:18:51 -0800 Subject: [PATCH 1/2] Remove unused -ivfsoverlay files `-ivfsoverlay` files are unused when building most modules. Enable removing them by, * adding a way to visit the filesystem tree with extensible RTTI to access each `RedirectingFileSystem`. * Adding tracking to `RedirectingFileSystem` to record when it actually redirects a file access. * Storing this information in each PCM. Usage tracking is disabled during implicit modulemap search as this ends up touching a lot of files that aren't actually used. The used files are later touched by other parts of Clang so relevant VFS overlays get marked as used. --- clang/include/clang/Basic/FileManager.h | 4 + clang/include/clang/Lex/HeaderSearch.h | 6 + .../include/clang/Serialization/ASTBitCodes.h | 3 + .../include/clang/Serialization/ModuleFile.h | 3 + .../DependencyScanningFilesystem.h | 6 +- .../DependencyScanningService.h | 5 +- clang/lib/Basic/FileManager.cpp | 7 + clang/lib/Lex/HeaderSearch.cpp | 20 ++ clang/lib/Serialization/ASTReader.cpp | 15 +- clang/lib/Serialization/ASTWriter.cpp | 24 ++- .../DependencyScanningFilesystem.cpp | 6 +- .../DependencyScanning/ModuleDepCollector.cpp | 65 +++++-- clang/test/ClangScanDeps/optimize-vfs.m | 181 ++++++++++++++++++ clang/tools/clang-scan-deps/ClangScanDeps.cpp | 1 + llvm/include/llvm/Support/VirtualFileSystem.h | 57 +++++- llvm/lib/Support/VirtualFileSystem.cpp | 26 +++ .../Support/VirtualFileSystemTest.cpp | 84 ++++++++ 17 files changed, 474 insertions(+), 39 deletions(-) create mode 100644 clang/test/ClangScanDeps/optimize-vfs.m diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h index 56cb093dd8c376f..997c17a0ffcfcce 100644 --- a/clang/include/clang/Basic/FileManager.h +++ b/clang/include/clang/Basic/FileManager.h @@ -248,6 +248,10 @@ class FileManager : public RefCountedBase<FileManager> { return FS; } + /// Enable or disable tracking of VFS usage. Used to not track full header + /// search and implicit modulemap lookup. + void trackVFSUsage(bool Active); + void setVirtualFileSystem(IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) { this->FS = std::move(FS); } diff --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h index a100598c80155fa..7c21796b0460238 100644 --- a/clang/include/clang/Lex/HeaderSearch.h +++ b/clang/include/clang/Lex/HeaderSearch.h @@ -579,6 +579,12 @@ class HeaderSearch { /// Note: implicit module maps don't contribute to entry usage. std::vector<bool> computeUserEntryUsage() const; + /// Determine which HeaderSearchOptions::VFSOverlayFiles have been + /// successfully used so far and mark their index with 'true' in the resulting + /// bit vector. + /// Note: implicit module maps don't contribute to entry usage. + std::vector<bool> computeVFSUsage() const; + /// This method returns a HeaderMap for the specified /// FileEntry, uniquing them through the 'HeaderMaps' datastructure. const HeaderMap *CreateHeaderMap(FileEntryRef FE); diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index fdd64f2abbe9375..f4abfe6f560664f 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -405,6 +405,9 @@ enum UnhashedControlBlockRecordTypes { /// Record code for the indices of used header search entries. HEADER_SEARCH_ENTRY_USAGE, + + /// Record code for the indices of used VFSs. + VFS_USAGE, }; /// Record code for extension blocks. diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h index 48be8676cc26a4c..a2d49507a579427 100644 --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -189,6 +189,9 @@ class ModuleFile { /// The bit vector denoting usage of each header search entry (true = used). llvm::BitVector SearchPathUsage; + /// The bit vector denoting usage of each VFS entry (true = used). + llvm::BitVector VFSUsage; + /// Whether this module has been directly imported by the /// user. bool DirectlyImported = false; diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h index 9a2aea5d6efa170..846fdc7253977f9 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h @@ -280,8 +280,12 @@ class EntryRef { /// This is not a thread safe VFS. A single instance is meant to be used only in /// one thread. Multiple instances are allowed to service multiple threads /// running in parallel. -class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { +class DependencyScanningWorkerFilesystem + : public llvm::RTTIExtends<DependencyScanningWorkerFilesystem, + llvm::vfs::ProxyFileSystem> { public: + static const char ID; + DependencyScanningWorkerFilesystem( DependencyScanningFilesystemSharedCache &SharedCache, IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS); diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h index dcdf1c171f6d731..e953e2dee48fc0a 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h @@ -54,7 +54,10 @@ enum class ScanningOptimizations { /// Remove warnings from system modules. SystemWarnings = 2, - LLVM_MARK_AS_BITMASK_ENUM(SystemWarnings), + /// Remove unused -ivfsoverlay arguments. + VFS = 4, + + LLVM_MARK_AS_BITMASK_ENUM(VFS), All = HeaderSearch | SystemWarnings, Default = All }; diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp index d16626b10652136..86f90b26145e9dc 100644 --- a/clang/lib/Basic/FileManager.cpp +++ b/clang/lib/Basic/FileManager.cpp @@ -387,6 +387,13 @@ llvm::Expected<FileEntryRef> FileManager::getSTDIN() { return *STDIN; } +void FileManager::trackVFSUsage(bool Active) { + FS->visit([Active](llvm::vfs::FileSystem &FileSys) { + if (auto *RFS = dyn_cast<llvm::vfs::RedirectingFileSystem>(&FileSys)) + RFS->setUsageTrackingActive(Active); + }); +} + const FileEntry *FileManager::getVirtualFile(StringRef Filename, off_t Size, time_t ModificationTime) { return &getVirtualFileRef(Filename, Size, ModificationTime).getFileEntry(); diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index cf1c0cc5284f316..78f9f014dc06fe1 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -142,6 +142,21 @@ std::vector<bool> HeaderSearch::computeUserEntryUsage() const { return UserEntryUsage; } +std::vector<bool> HeaderSearch::computeVFSUsage() const { + std::vector<bool> VFSUsage; + llvm::vfs::FileSystem &RootFS = FileMgr.getVirtualFileSystem(); + // TODO: This only works if the `RedirectingFileSystem`s were all created by + // `createVFSFromOverlayFiles`. + RootFS.visit([&](const llvm::vfs::FileSystem &FS) { + if (auto *RFS = dyn_cast<const llvm::vfs::RedirectingFileSystem>(&FS)) { + VFSUsage.push_back(RFS->hasBeenUsed()); + } + }); + // VFS visit order is the opposite of VFSOverlayFiles order. + std::reverse(VFSUsage.begin(), VFSUsage.end()); + return VFSUsage; +} + /// CreateHeaderMap - This method returns a HeaderMap for the specified /// FileEntry, uniquing them through the 'HeaderMaps' datastructure. const HeaderMap *HeaderSearch::CreateHeaderMap(FileEntryRef FE) { @@ -303,6 +318,10 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName, bool AllowExtraModuleMapSearch) { Module *Module = nullptr; + // Modulemap search can touch lots of files that aren't actually relavant to + // if a VFS is used or not. + FileMgr.trackVFSUsage(false); + // Look through the various header search paths to load any available module // maps, searching for a module map that describes this module. for (DirectoryLookup &Dir : search_dir_range()) { @@ -371,6 +390,7 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName, break; } + FileMgr.trackVFSUsage(true); return Module; } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index f22da838424b415..4cee84ca00e5062 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -4985,7 +4985,7 @@ ASTReader::ASTReadResult ASTReader::readUnhashedControlBlockImpl( F->PragmaDiagMappings.insert(F->PragmaDiagMappings.end(), Record.begin(), Record.end()); break; - case HEADER_SEARCH_ENTRY_USAGE: + case HEADER_SEARCH_ENTRY_USAGE: { if (!F) break; unsigned Count = Record[0]; @@ -4997,6 +4997,19 @@ ASTReader::ASTReadResult ASTReader::readUnhashedControlBlockImpl( F->SearchPathUsage[I] = true; break; } + case VFS_USAGE: { + if (!F) + break; + unsigned Count = Record[0]; + const char *Byte = Blob.data(); + F->VFSUsage = llvm::BitVector(Count, false); + for (unsigned I = 0; I < Count; ++Byte) + for (unsigned Bit = 0; Bit < 8 && I < Count; ++Bit, ++I) + if (*Byte & (1 << Bit)) + F->VFSUsage[I] = true; + break; + } + } } } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 6df815234e235fb..bc26e7c68720a40 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1265,18 +1265,30 @@ void ASTWriter::writeUnhashedControlBlock(Preprocessor &PP, WritePragmaDiagnosticMappings(Diags, /* isModule = */ WritingModule); // Header search entry usage. - auto HSEntryUsage = PP.getHeaderSearchInfo().computeUserEntryUsage(); - auto Abbrev = std::make_shared<BitCodeAbbrev>(); - Abbrev->Add(BitCodeAbbrevOp(HEADER_SEARCH_ENTRY_USAGE)); - Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Number of bits. - Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Bit vector. - unsigned HSUsageAbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); { + auto HSEntryUsage = PP.getHeaderSearchInfo().computeUserEntryUsage(); + auto Abbrev = std::make_shared<BitCodeAbbrev>(); + Abbrev->Add(BitCodeAbbrevOp(HEADER_SEARCH_ENTRY_USAGE)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Number of bits. + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Bit vector. + unsigned HSUsageAbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); RecordData::value_type Record[] = {HEADER_SEARCH_ENTRY_USAGE, HSEntryUsage.size()}; Stream.EmitRecordWithBlob(HSUsageAbbrevCode, Record, bytes(HSEntryUsage)); } + // VFS usage. + { + auto VFSUsage = PP.getHeaderSearchInfo().computeVFSUsage(); + auto Abbrev = std::make_shared<BitCodeAbbrev>(); + Abbrev->Add(BitCodeAbbrevOp(VFS_USAGE)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Number of bits. + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Bit vector. + unsigned VFSUsageAbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); + RecordData::value_type Record[] = {VFS_USAGE, VFSUsage.size()}; + Stream.EmitRecordWithBlob(VFSUsageAbbrevCode, Record, bytes(VFSUsage)); + } + // Leave the options block. Stream.ExitBlock(); UnhashedControlBlockRange.second = Stream.GetCurrentBitNo() >> 3; diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp index 3e53c8fc5740875..bea52c906cab856 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp @@ -194,7 +194,9 @@ static bool shouldCacheStatFailures(StringRef Filename) { DependencyScanningWorkerFilesystem::DependencyScanningWorkerFilesystem( DependencyScanningFilesystemSharedCache &SharedCache, IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) - : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache), + : llvm::RTTIExtends<DependencyScanningWorkerFilesystem, + llvm::vfs::ProxyFileSystem>(std::move(FS)), + SharedCache(SharedCache), WorkingDirForCacheLookup(llvm::errc::invalid_argument) { updateWorkingDirForCacheLookup(); } @@ -379,3 +381,5 @@ void DependencyScanningWorkerFilesystem::updateWorkingDirForCacheLookup() { assert(!WorkingDirForCacheLookup || llvm::sys::path::is_absolute_gnu(*WorkingDirForCacheLookup)); } + +const char DependencyScanningWorkerFilesystem::ID = 0; diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index 9099c18391e4d29..d23d6cd3ad946af 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -31,25 +31,46 @@ const std::vector<std::string> &ModuleDeps::getBuildArguments() { static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts, ASTReader &Reader, - const serialization::ModuleFile &MF) { - // Only preserve search paths that were used during the dependency scan. - std::vector<HeaderSearchOptions::Entry> Entries = Opts.UserEntries; - Opts.UserEntries.clear(); - - llvm::BitVector SearchPathUsage(Entries.size()); - llvm::DenseSet<const serialization::ModuleFile *> Visited; - std::function<void(const serialization::ModuleFile *)> VisitMF = - [&](const serialization::ModuleFile *MF) { - SearchPathUsage |= MF->SearchPathUsage; - Visited.insert(MF); - for (const serialization::ModuleFile *Import : MF->Imports) - if (!Visited.contains(Import)) - VisitMF(Import); - }; - VisitMF(&MF); - - for (auto Idx : SearchPathUsage.set_bits()) - Opts.UserEntries.push_back(Entries[Idx]); + const serialization::ModuleFile &MF, + ScanningOptimizations OptimizeArgs) { + if (any(OptimizeArgs & ScanningOptimizations::HeaderSearch)) { + // Only preserve search paths that were used during the dependency scan. + std::vector<HeaderSearchOptions::Entry> Entries = Opts.UserEntries; + Opts.UserEntries.clear(); + + llvm::BitVector SearchPathUsage(Entries.size()); + llvm::DenseSet<const serialization::ModuleFile *> Visited; + std::function<void(const serialization::ModuleFile *)> VisitMF = + [&](const serialization::ModuleFile *MF) { + SearchPathUsage |= MF->SearchPathUsage; + Visited.insert(MF); + for (const serialization::ModuleFile *Import : MF->Imports) + if (!Visited.contains(Import)) + VisitMF(Import); + }; + VisitMF(&MF); + + for (auto Idx : SearchPathUsage.set_bits()) + Opts.UserEntries.push_back(Entries[Idx]); + } + if (any(OptimizeArgs & ScanningOptimizations::VFS)) { + std::vector<std::string> VFSOverlayFiles = Opts.VFSOverlayFiles; + Opts.VFSOverlayFiles.clear(); + llvm::BitVector VFSUsage(VFSOverlayFiles.size()); + llvm::DenseSet<const serialization::ModuleFile *> Visited; + std::function<void(const serialization::ModuleFile *)> VisitMF = + [&](const serialization::ModuleFile *MF) { + VFSUsage |= MF->VFSUsage; + Visited.insert(MF); + for (const serialization::ModuleFile *Import : MF->Imports) + if (!Visited.contains(Import)) + VisitMF(Import); + }; + VisitMF(&MF); + + for (auto Idx : VFSUsage.set_bits()) + Opts.VFSOverlayFiles.push_back(VFSOverlayFiles[Idx]); + } } static void optimizeDiagnosticOpts(DiagnosticOptions &Opts, @@ -551,9 +572,11 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { CowCompilerInvocation CI = MDC.getInvocationAdjustedForModuleBuildWithoutOutputs( MD, [&](CowCompilerInvocation &BuildInvocation) { - if (any(MDC.OptimizeArgs & ScanningOptimizations::HeaderSearch)) + if (any(MDC.OptimizeArgs & (ScanningOptimizations::HeaderSearch | + ScanningOptimizations::VFS))) optimizeHeaderSearchOpts(BuildInvocation.getMutHeaderSearchOpts(), - *MDC.ScanInstance.getASTReader(), *MF); + *MDC.ScanInstance.getASTReader(), *MF, + MDC.OptimizeArgs); if (any(MDC.OptimizeArgs & ScanningOptimizations::SystemWarnings)) optimizeDiagnosticOpts( BuildInvocation.getMutDiagnosticOpts(), diff --git a/clang/test/ClangScanDeps/optimize-vfs.m b/clang/test/ClangScanDeps/optimize-vfs.m new file mode 100644 index 000000000000000..ce051f5d820e2c9 --- /dev/null +++ b/clang/test/ClangScanDeps/optimize-vfs.m @@ -0,0 +1,181 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed -e "s|DIR|%/t|g" %t/build/compile-commands.json.in > %t/build/compile-commands.json +// RUN: sed -e "s|DIR|%/t|g" %t/build/vfs.yaml.in > %t/build/vfs.yaml +// RUN: sed -e "s|DIR|%/t|g" %t/build/unused-vfs.yaml.in > %t/build/unused-vfs.yaml +// RUN: clang-scan-deps -compilation-database %t/build/compile-commands.json \ +// RUN: -j 1 -format experimental-full --optimize-args=vfs,header-search > %t/deps.db +// RUN: cat %t/deps.db | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t + +// Check that unused -ivfsoverlay arguments are removed, and that used ones are +// not. + +// CHECK: { +// CHECK-NEXT: "modules": [ +// CHECK-NEXT: { +// CHECK-NEXT: "clang-module-deps": [], +// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/modules/A/module.modulemap", +// CHECK-NEXT: "command-line": [ +// CHECK-NOT: "build/unused-vfs.yaml" +// CHECK: "-ivfsoverlay" +// CHECK-NEXT: "build/vfs.yaml" +// CHECK-NOT: "build/unused-vfs.yaml" +// CHECK: ], +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/build/A.h", +// CHECK-NEXT: "[[PREFIX]]/build/module.modulemap" +// CHECK-NEXT: ], +// CHECK-NEXT: "name": "A" +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "clang-module-deps": [], +// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/modules/B/module.modulemap", +// CHECK-NEXT: "command-line": [ +// CHECK-NOT: "-ivfsoverlay" +// CHECK: ], +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/modules/B/B.h", +// CHECK-NEXT: "[[PREFIX]]/modules/B/module.modulemap" +// CHECK-NEXT: ], +// CHECK-NEXT: "name": "B" +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "module-name": "B" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/modules/C/module.modulemap", +// CHECK-NEXT: "command-line": [ +// CHECK-NOT: "-ivfsoverlay" +// CHECK: ], +// CHECK-NEXT: "context-hash": "{{.*}}", +// CHECK-NEXT: "file-deps": [ +// CHECK-NEXT: "[[PREFIX]]/modules/B/module.modulemap", +// CHECK-NEXT: "[[PREFIX]]/modules/C/C.h", +// CHECK-NEXT: "[[PREFIX]]/modules/C/module.modulemap" +// CHECK-NEXT: ], +// CHECK-NEXT: "name": "C" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK-NEXT: "translation-units": [ +// CHECK: ] +// CHECK: } + +//--- build/compile-commands.json.in + +[ +{ + "directory": "DIR", + "command": "clang -c DIR/A.m -Imodules/A -Imodules/B -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps -ivfsoverlay build/vfs.yaml -ivfsoverlay build/unused-vfs.yaml", + "file": "DIR/A.m" +}, +{ + "directory": "DIR", + "command": "clang -c DIR/B.m -Imodules/B -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps -ivfsoverlay build/unused-vfs.yaml -ivfsoverlay build/vfs.yaml", + "file": "DIR/B.m" +}, +{ + "directory": "DIR", + "command": "clang -c DIR/C.m -Imodules/A -Imodules/B -Imodules/C -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps -ivfsoverlay build/vfs.yaml -ivfsoverlay build/unused-vfs.yaml", + "file": "DIR/C.m" +} +] + +//--- build/vfs.yaml.in + +{ + "version":0, + "case-sensitive":"false", + "roots":[ + { + "contents":[ + { + "external-contents":"DIR/build/module.modulemap", + "name":"module.modulemap", + "type":"file" + }, + { + "external-contents":"DIR/build/A.h", + "name":"A.h", + "type":"file" + } + ], + "name":"DIR/modules/A", + "type":"directory" + } + ] +} + +//--- build/unused-vfs.yaml.in + +{ + "version":0, + "case-sensitive":"false", + "roots":[ + { + "contents":[ + { + "external-contents":"DIR/build/module.modulemap", + "name":"module.modulemap", + "type":"file" + } + ], + "name":"DIR/modules/D", + "type":"directory" + } + ] +} + +//--- build/module.modulemap + +module A { + umbrella header "A.h" +} + +//--- build/A.h + +typedef int A_t; + +//--- modules/B/module.modulemap + +module B { + umbrella header "B.h" +} + +//--- modules/B/B.h + +typedef int B_t; + +//--- modules/C/module.modulemap + +module C { + umbrella header "C.h" +} + +//--- modules/C/C.h + +@import B; + +typedef B_t C_t; + +//--- A.m + +#include <A.h> + +A_t a = 0; + +//--- B.m + +#include <B.h> + +B_t b = 0; + +//--- C.m + +#include <C.h> + +C_t b = 0; diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index f11c933d9576565..9768daaa51288ae 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -157,6 +157,7 @@ static void ParseArgs(int argc, char **argv) { .Case("none", ScanningOptimizations::None) .Case("header-search", ScanningOptimizations::HeaderSearch) .Case("system-warnings", ScanningOptimizations::SystemWarnings) + .Case("vfs", ScanningOptimizations::VFS) .Case("all", ScanningOptimizations::All) .Default(std::nullopt); if (!Optimization) { diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h index 44a56e54a0a09c5..c1a2824083c8574 100644 --- a/llvm/include/llvm/Support/VirtualFileSystem.h +++ b/llvm/include/llvm/Support/VirtualFileSystem.h @@ -15,12 +15,14 @@ #define LLVM_SUPPORT_VIRTUALFILESYSTEM_H #include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/Support/Chrono.h" -#include "llvm/Support/ErrorOr.h" #include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/ExtensibleRTTI.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/SourceMgr.h" @@ -264,8 +266,10 @@ class recursive_directory_iterator { }; /// The virtual file system interface. -class FileSystem : public llvm::ThreadSafeRefCountedBase<FileSystem> { +class FileSystem : public llvm::ThreadSafeRefCountedBase<FileSystem>, + public RTTIExtends<FileSystem, RTTIRoot> { public: + static const char ID; virtual ~FileSystem(); /// Get the status of the entry at \p Path, if one exists. @@ -324,6 +328,13 @@ class FileSystem : public llvm::ThreadSafeRefCountedBase<FileSystem> { printImpl(OS, Type, IndentLevel); } + using VisitCallbackTy = llvm::function_ref<void(FileSystem &)>; + virtual void visitChildFileSystems(VisitCallbackTy Callback) {} + void visit(VisitCallbackTy Callback) { + Callback(*this); + visitChildFileSystems(Callback); + } + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void dump() const; #endif @@ -363,7 +374,7 @@ std::unique_ptr<FileSystem> createPhysicalFileSystem(); /// top-most (most recently added) directory are used. When there is a file /// that exists in more than one file system, the file in the top-most file /// system overrides the other(s). -class OverlayFileSystem : public FileSystem { +class OverlayFileSystem : public RTTIExtends<OverlayFileSystem, FileSystem> { using FileSystemList = SmallVector<IntrusiveRefCntPtr<FileSystem>, 1>; /// The stack of file systems, implemented as a list in order of @@ -371,6 +382,7 @@ class OverlayFileSystem : public FileSystem { FileSystemList FSList; public: + static const char ID; OverlayFileSystem(IntrusiveRefCntPtr<FileSystem> Base); /// Pushes a file system on top of the stack. @@ -415,13 +427,15 @@ class OverlayFileSystem : public FileSystem { protected: void printImpl(raw_ostream &OS, PrintType Type, unsigned IndentLevel) const override; + void visitChildFileSystems(VisitCallbackTy Callback) override; }; /// By default, this delegates all calls to the underlying file system. This /// is useful when derived file systems want to override some calls and still /// proxy other calls. -class ProxyFileSystem : public FileSystem { +class ProxyFileSystem : public RTTIExtends<ProxyFileSystem, FileSystem> { public: + static const char ID; explicit ProxyFileSystem(IntrusiveRefCntPtr<FileSystem> FS) : FS(std::move(FS)) {} @@ -451,11 +465,17 @@ class ProxyFileSystem : public FileSystem { protected: FileSystem &getUnderlyingFS() const { return *FS; } + void visitChildFileSystems(VisitCallbackTy Callback) override { + if (FS) { + Callback(*FS); + FS->visitChildFileSystems(Callback); + } + } private: IntrusiveRefCntPtr<FileSystem> FS; - virtual void anchor(); + virtual void anchor() override; }; namespace detail { @@ -498,11 +518,18 @@ class NamedNodeOrError { } // namespace detail /// An in-memory file system. -class InMemoryFileSystem : public FileSystem { +class InMemoryFileSystem : public RTTIExtends<InMemoryFileSystem, FileSystem> { std::unique_ptr<detail::InMemoryDirectory> Root; std::string WorkingDirectory; bool UseNormalizedPaths = true; +public: + static const char ID; + using GetFileContentsCallback = + std::function<llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>( + StringRef)>; + +private: using MakeNodeFn = llvm::function_ref<std::unique_ptr<detail::InMemoryNode>( detail::NewInMemoryNodeInfo)>; @@ -739,8 +766,10 @@ class RedirectingFileSystemParser; /// FIXME: 'use-external-name' causes behaviour that's inconsistent with how /// "real" filesystems behave. Maybe there should be a separate channel for /// this information. -class RedirectingFileSystem : public vfs::FileSystem { +class RedirectingFileSystem + : public RTTIExtends<RedirectingFileSystem, vfs::FileSystem> { public: + static const char ID; enum EntryKind { EK_Directory, EK_DirectoryRemap, EK_File }; enum NameKind { NK_NotSet, NK_External, NK_Virtual }; @@ -976,6 +1005,13 @@ class RedirectingFileSystem : public vfs::FileSystem { /// names of files. This global value is overridable on a per-file basis. bool UseExternalNames = true; + /// True if this FS has redirected a lookup. This does not include + /// fallthrough. + mutable bool HasBeenUsed = false; + + /// Used to enable or disable updating `HasBeenUsed`. + bool UsageTrackingActive = true; + /// Determines the lookups to perform, as well as their order. See /// \c RedirectKind for details. RedirectKind Redirection = RedirectKind::Fallthrough; @@ -1046,11 +1082,16 @@ class RedirectingFileSystem : public vfs::FileSystem { std::vector<llvm::StringRef> getRoots() const; + bool hasBeenUsed() const { return HasBeenUsed; }; + + void setUsageTrackingActive(bool Active) { UsageTrackingActive = Active; } + void printEntry(raw_ostream &OS, Entry *E, unsigned IndentLevel = 0) const; protected: void printImpl(raw_ostream &OS, PrintType Type, unsigned IndentLevel) const override; + void visitChildFileSystems(VisitCallbackTy Callback) override; }; /// Collect all pairs of <virtual path, real path> entries from the diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp index 367e794d38f63ac..ea150a5563bcc5b 100644 --- a/llvm/lib/Support/VirtualFileSystem.cpp +++ b/llvm/lib/Support/VirtualFileSystem.cpp @@ -480,6 +480,13 @@ OverlayFileSystem::getRealPath(const Twine &Path, return errc::no_such_file_or_directory; } +void OverlayFileSystem::visitChildFileSystems(VisitCallbackTy Callback) { + for (IntrusiveRefCntPtr<FileSystem> FS : overlays_range()) { + Callback(*FS); + FS->visitChildFileSystems(Callback); + } +} + void OverlayFileSystem::printImpl(raw_ostream &OS, PrintType Type, unsigned IndentLevel) const { printIndent(OS, IndentLevel); @@ -1581,6 +1588,13 @@ void RedirectingFileSystem::printEntry(raw_ostream &OS, } } +void RedirectingFileSystem::visitChildFileSystems(VisitCallbackTy Callback) { + if (ExternalFS) { + Callback(*ExternalFS); + ExternalFS->visitChildFileSystems(Callback); + } +} + /// A helper class to hold the common YAML parsing state. class llvm::vfs::RedirectingFileSystemParser { yaml::Stream &Stream; @@ -2263,12 +2277,18 @@ RedirectingFileSystem::makeCanonical(SmallVectorImpl<char> &Path) const { ErrorOr<RedirectingFileSystem::LookupResult> RedirectingFileSystem::lookupPath(StringRef Path) const { + // RedirectOnly means the VFS is always used. + if (UsageTrackingActive && Redirection == RedirectKind::RedirectOnly) + HasBeenUsed = true; + sys::path::const_iterator Start = sys::path::begin(Path); sys::path::const_iterator End = sys::path::end(Path); llvm::SmallVector<Entry *, 32> Entries; for (const auto &Root : Roots) { ErrorOr<RedirectingFileSystem::LookupResult> Result = lookupPathImpl(Start, End, Root.get(), Entries); + if (UsageTrackingActive && Result && isa<RemapEntry>(Result->E)) + HasBeenUsed = true; if (Result || Result.getError() != llvm::errc::no_such_file_or_directory) { Result->Parents = std::move(Entries); return Result; @@ -2864,3 +2884,9 @@ recursive_directory_iterator::increment(std::error_code &EC) { return *this; } + +const char FileSystem::ID = 0; +const char OverlayFileSystem::ID = 0; +const char ProxyFileSystem::ID = 0; +const char InMemoryFileSystem::ID = 0; +const char RedirectingFileSystem::ID = 0; diff --git a/llvm/unittests/Support/VirtualFileSystemTest.cpp b/llvm/unittests/Support/VirtualFileSystemTest.cpp index bd4048f025c0de0..11a74f298e3e9e5 100644 --- a/llvm/unittests/Support/VirtualFileSystemTest.cpp +++ b/llvm/unittests/Support/VirtualFileSystemTest.cpp @@ -895,6 +895,47 @@ TEST(VirtualFileSystemTest, HiddenInIteration) { } } +TEST(VirtualFileSystemTest, Visit) { + IntrusiveRefCntPtr<DummyFileSystem> Base(new DummyFileSystem()); + IntrusiveRefCntPtr<DummyFileSystem> Middle(new DummyFileSystem()); + IntrusiveRefCntPtr<DummyFileSystem> Top(new DummyFileSystem()); + IntrusiveRefCntPtr<vfs::OverlayFileSystem> O( + new vfs::OverlayFileSystem(Base)); + O->pushOverlay(Middle); + O->pushOverlay(Top); + + auto YAML = + MemoryBuffer::getMemBuffer("{\n" + " 'version': 0,\n" + " 'redirecting-with': 'redirect-only',\n" + " 'roots': [\n" + " {\n" + " 'type': 'file',\n" + " 'name': '/vfile',\n" + " 'external-contents': '/a',\n" + " }," + " ]\n" + "}"); + + IntrusiveRefCntPtr<vfs::RedirectingFileSystem> Redirecting = + vfs::RedirectingFileSystem::create(std::move(YAML), nullptr, "", nullptr, + O) + .release(); + + vfs::ProxyFileSystem PFS(Redirecting); + + std::vector<const vfs::FileSystem *> FSs; + PFS.visit([&](const vfs::FileSystem &FS) { FSs.push_back(&FS); }); + + ASSERT_EQ(size_t(6), FSs.size()); + EXPECT_TRUE(isa<vfs::ProxyFileSystem>(FSs[0])); + EXPECT_TRUE(isa<vfs::RedirectingFileSystem>(FSs[1])); + EXPECT_TRUE(isa<vfs::OverlayFileSystem>(FSs[2])); + EXPECT_TRUE(isa<vfs::FileSystem>(FSs[3])); + EXPECT_TRUE(isa<vfs::FileSystem>(FSs[4])); + EXPECT_TRUE(isa<vfs::FileSystem>(FSs[5])); +} + TEST(OverlayFileSystemTest, PrintOutput) { auto Dummy = makeIntrusiveRefCnt<DummyFileSystem>(); auto Overlay1 = makeIntrusiveRefCnt<vfs::OverlayFileSystem>(Dummy); @@ -3262,3 +3303,46 @@ TEST(RedirectingFileSystemTest, PrintOutput) { " DummyFileSystem (RecursiveContents)\n", Output); } + +TEST(RedirectingFileSystemTest, Used) { + IntrusiveRefCntPtr<DummyFileSystem> Dummy(new NoStatusDummyFileSystem()); + auto YAML1 = + MemoryBuffer::getMemBuffer("{\n" + " 'version': 0,\n" + " 'redirecting-with': 'fallthrough',\n" + " 'roots': [\n" + " {\n" + " 'type': 'file',\n" + " 'name': '/vfile1',\n" + " 'external-contents': '/a',\n" + " }," + " ]\n" + "}"); + auto YAML2 = + MemoryBuffer::getMemBuffer("{\n" + " 'version': 0,\n" + " 'redirecting-with': 'fallthrough',\n" + " 'roots': [\n" + " {\n" + " 'type': 'file',\n" + " 'name': '/vfile2',\n" + " 'external-contents': '/b',\n" + " }," + " ]\n" + "}"); + + Dummy->addRegularFile("/a"); + Dummy->addRegularFile("/b"); + + IntrusiveRefCntPtr<vfs::RedirectingFileSystem> Redirecting1 = + vfs::RedirectingFileSystem::create(std::move(YAML1), nullptr, "", nullptr, + Dummy) + .release(); + auto Redirecting2 = vfs::RedirectingFileSystem::create( + std::move(YAML2), nullptr, "", nullptr, Redirecting1); + + EXPECT_TRUE(Redirecting2->exists("/vfile1")); + EXPECT_TRUE(Redirecting2->exists("/b")); + EXPECT_TRUE(Redirecting1->hasBeenUsed()); + EXPECT_FALSE(Redirecting2->hasBeenUsed()); +} >From c97550e95c624ef902f5894809aac2a870e8083d Mon Sep 17 00:00:00 2001 From: Michael Spencer <michael_spen...@apple.com> Date: Wed, 8 Mar 2023 17:58:10 -0800 Subject: [PATCH 2/2] Include `-ivfsoverlay` in the strict context hash When `-ivfsoverlay` is not included in the strict context hash we run into the following situation when optimizing `-ivfsoverlay` flags in the dependency scanner. If you scanned a TU with 2 `-ivfsoverlay`s, with the last being used, then scanned one with 1, we would reuse the 2 `-ivfsoverlay` module and try to copy the non-existent 2nd `-ivfsoverlay` path. This patch fixes this issue, and potential non-deterministic results, by adding all `-ivfsoverlay` flags to the strict context hash. This also does a drive by change of not copying a vector of string and adding error checking for invalid indices. --- clang/lib/Frontend/CompilerInvocation.cpp | 1 + .../DependencyScanning/ModuleDepCollector.cpp | 21 +++++++++++++------ clang/test/ClangScanDeps/optimize-vfs.m | 12 +++++++++++ 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 5a8e4cf9843de2b..f1787e152fd056a 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -4739,6 +4739,7 @@ std::string CompilerInvocation::getModuleHash() const { if (hsOpts.ModulesStrictContextHash) { HBuilder.addRange(hsOpts.SystemHeaderPrefixes); HBuilder.addRange(hsOpts.UserEntries); + HBuilder.addRange(hsOpts.VFSOverlayFiles); const DiagnosticOptions &diagOpts = getDiagnosticOpts(); #define DIAGOPT(Name, Bits, Default) HBuilder.add(diagOpts.Name); diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index d23d6cd3ad946af..ac981a092d62dbe 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -35,8 +35,8 @@ static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts, ScanningOptimizations OptimizeArgs) { if (any(OptimizeArgs & ScanningOptimizations::HeaderSearch)) { // Only preserve search paths that were used during the dependency scan. - std::vector<HeaderSearchOptions::Entry> Entries = Opts.UserEntries; - Opts.UserEntries.clear(); + std::vector<HeaderSearchOptions::Entry> Entries; + std::swap(Opts.UserEntries, Entries); llvm::BitVector SearchPathUsage(Entries.size()); llvm::DenseSet<const serialization::ModuleFile *> Visited; @@ -50,12 +50,17 @@ static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts, }; VisitMF(&MF); + if (SearchPathUsage.size() != Entries.size()) + llvm::report_fatal_error( + "Inconsistent search path options between modules detected"); + for (auto Idx : SearchPathUsage.set_bits()) - Opts.UserEntries.push_back(Entries[Idx]); + Opts.UserEntries.push_back(std::move(Entries[Idx])); } if (any(OptimizeArgs & ScanningOptimizations::VFS)) { - std::vector<std::string> VFSOverlayFiles = Opts.VFSOverlayFiles; - Opts.VFSOverlayFiles.clear(); + std::vector<std::string> VFSOverlayFiles; + std::swap(Opts.VFSOverlayFiles, VFSOverlayFiles); + llvm::BitVector VFSUsage(VFSOverlayFiles.size()); llvm::DenseSet<const serialization::ModuleFile *> Visited; std::function<void(const serialization::ModuleFile *)> VisitMF = @@ -68,8 +73,12 @@ static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts, }; VisitMF(&MF); + if (VFSUsage.size() != VFSOverlayFiles.size()) + llvm::report_fatal_error( + "Inconsistent -ivfsoverlay options between modules detected"); + for (auto Idx : VFSUsage.set_bits()) - Opts.VFSOverlayFiles.push_back(VFSOverlayFiles[Idx]); + Opts.VFSOverlayFiles.push_back(std::move(VFSOverlayFiles[Idx])); } } diff --git a/clang/test/ClangScanDeps/optimize-vfs.m b/clang/test/ClangScanDeps/optimize-vfs.m index ce051f5d820e2c9..20c97956087d2d8 100644 --- a/clang/test/ClangScanDeps/optimize-vfs.m +++ b/clang/test/ClangScanDeps/optimize-vfs.m @@ -3,6 +3,7 @@ // RUN: sed -e "s|DIR|%/t|g" %t/build/compile-commands.json.in > %t/build/compile-commands.json // RUN: sed -e "s|DIR|%/t|g" %t/build/vfs.yaml.in > %t/build/vfs.yaml // RUN: sed -e "s|DIR|%/t|g" %t/build/unused-vfs.yaml.in > %t/build/unused-vfs.yaml +// RUN: sed -e "s|DIR|%/t|g" %t/build/unused-vfs.yaml.in > %t/build/unused2-vfs.yaml // RUN: clang-scan-deps -compilation-database %t/build/compile-commands.json \ // RUN: -j 1 -format experimental-full --optimize-args=vfs,header-search > %t/deps.db // RUN: cat %t/deps.db | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t @@ -68,6 +69,11 @@ //--- build/compile-commands.json.in [ +{ + "directory": "DIR", + "command": "clang -c DIR/0.m -Imodules/A -Imodules/B -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps -ivfsoverlay build/unused-vfs.yaml -ivfsoverlay build/unused2-vfs.yaml -ivfsoverlay build/vfs.yaml", + "file": "DIR/0.m" +}, { "directory": "DIR", "command": "clang -c DIR/A.m -Imodules/A -Imodules/B -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps -ivfsoverlay build/vfs.yaml -ivfsoverlay build/unused-vfs.yaml", @@ -162,6 +168,12 @@ typedef B_t C_t; +//--- 0.m + +#include <A.h> + +A_t a = 0; + //--- A.m #include <A.h> _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits