https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/188877
>From 196117b66a6bf6cf282394673c63d5ebd8ef2c82 Mon Sep 17 00:00:00 2001 From: Jan Svoboda <[email protected]> Date: Fri, 20 Mar 2026 20:54:01 -0700 Subject: [PATCH 1/6] [clang] Introduce `ModuleCache::write()` --- .../clang/Basic/DiagnosticCommonKinds.td | 1 + .../include/clang/Frontend/CompilerInstance.h | 10 +-- .../include/clang/Frontend/FrontendActions.h | 12 ++++ .../include/clang/Serialization/ModuleCache.h | 10 ++- .../InProcessModuleCache.cpp | 6 ++ clang/lib/Frontend/CompilerInstance.cpp | 61 ++++++++++++++----- clang/lib/Frontend/FrontendActions.cpp | 3 +- clang/lib/Serialization/ModuleCache.cpp | 40 ++++++++++++ 8 files changed, 121 insertions(+), 22 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td index cb267e3ee05c1..b5f99606789fe 100644 --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -103,6 +103,7 @@ def err_deleted_non_function : Error< "only functions can have deleted definitions">; def err_module_not_found : Error<"module '%0' not found">, DefaultFatal; def err_module_not_built : Error<"could not build module '%0'">, DefaultFatal; +def err_module_not_written : Error<"could not write module file for '%0' to '%1': %2">, DefaultFatal; def err_module_build_disabled: Error< "module '%0' is needed but has not been provided, and implicit use of module " "files is disabled">, DefaultFatal; diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h index f206d012eacc9..be44817aa5a1b 100644 --- a/clang/include/clang/Frontend/CompilerInstance.h +++ b/clang/include/clang/Frontend/CompilerInstance.h @@ -934,12 +934,14 @@ class CompilerInstance : public ModuleLoader { std::optional<ThreadSafeCloneConfig> ThreadSafeConfig = std::nullopt); /// Compile a module file for the given module, using the options - /// provided by the importing compiler instance. Returns true if the module - /// was built without errors. + /// provided by the importing compiler instance. Returns the PCM file in + /// a buffer. // FIXME: This should be private, but it's called from static non-member // functions in the implementation file. - bool compileModule(SourceLocation ImportLoc, StringRef ModuleName, - StringRef ModuleFileName, CompilerInstance &Instance); + std::unique_ptr<llvm::MemoryBuffer> compileModule(SourceLocation ImportLoc, + StringRef ModuleName, + StringRef ModuleFileName, + CompilerInstance &Instance); ModuleLoadResult loadModule(SourceLocation ImportLoc, ModuleIdPath Path, Module::NameVisibilityKind Visibility, diff --git a/clang/include/clang/Frontend/FrontendActions.h b/clang/include/clang/Frontend/FrontendActions.h index 87a9f0d4cb06c..c5aff7ae1a713 100644 --- a/clang/include/clang/Frontend/FrontendActions.h +++ b/clang/include/clang/Frontend/FrontendActions.h @@ -114,6 +114,15 @@ class GeneratePCHAction : public ASTFrontendAction { }; class GenerateModuleAction : public ASTFrontendAction { +public: + /// When \c OS is non-null, uses it for outputting the PCM file instead of + /// automatically creating an output file. + explicit GenerateModuleAction(std::unique_ptr<raw_pwrite_stream> OS = nullptr) + : OS(std::move(OS)) {} + +private: + std::unique_ptr<raw_pwrite_stream> OS; + virtual std::unique_ptr<raw_pwrite_stream> CreateOutputFile(CompilerInstance &CI, StringRef InFile) = 0; @@ -145,6 +154,9 @@ class GenerateInterfaceStubsAction : public ASTFrontendAction { }; class GenerateModuleFromModuleMapAction : public GenerateModuleAction { +public: + using GenerateModuleAction::GenerateModuleAction; + private: bool BeginSourceFileAction(CompilerInstance &CI) override; diff --git a/clang/include/clang/Serialization/ModuleCache.h b/clang/include/clang/Serialization/ModuleCache.h index c6795c5dc358a..4fced900bbdcb 100644 --- a/clang/include/clang/Serialization/ModuleCache.h +++ b/clang/include/clang/Serialization/ModuleCache.h @@ -14,6 +14,7 @@ #include <ctime> namespace llvm { +class MemoryBufferRef; class AdvisoryLock; } // namespace llvm @@ -52,7 +53,11 @@ class ModuleCache { virtual InMemoryModuleCache &getInMemoryModuleCache() = 0; virtual const InMemoryModuleCache &getInMemoryModuleCache() const = 0; - // TODO: Virtualize writing/reading PCM files, etc. + /// Write the PCM contents to the given path in the module cache. + virtual std::error_code write(StringRef Path, + llvm::MemoryBufferRef Buffer) = 0; + + // TODO: Virtualize reading PCM files, etc. virtual ~ModuleCache() = default; }; @@ -65,6 +70,9 @@ std::shared_ptr<ModuleCache> createCrossProcessModuleCache(); /// Shared implementation of `ModuleCache::maybePrune()`. void maybePruneImpl(StringRef Path, time_t PruneInterval, time_t PruneAfter); + +/// Shared implementation of `ModuleCache::write()`. +std::error_code writeImpl(StringRef Path, llvm::MemoryBufferRef Buffer); } // namespace clang #endif diff --git a/clang/lib/DependencyScanning/InProcessModuleCache.cpp b/clang/lib/DependencyScanning/InProcessModuleCache.cpp index cd7385c8f38c2..7bdfae8f3e567 100644 --- a/clang/lib/DependencyScanning/InProcessModuleCache.cpp +++ b/clang/lib/DependencyScanning/InProcessModuleCache.cpp @@ -127,6 +127,12 @@ class InProcessModuleCache : public ModuleCache { maybePruneImpl(Path, PruneInterval, PruneAfter); } + std::error_code write(StringRef Path, llvm::MemoryBufferRef Buffer) override { + // FIXME: This could use an in-memory cache to avoid IO, and only write to + // disk at the end of the scan. + return writeImpl(Path, Buffer); + } + InMemoryModuleCache &getInMemoryModuleCache() override { return InMemory; } const InMemoryModuleCache &getInMemoryModuleCache() const override { return InMemory; diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 1f1b6701c38df..262bf3484e6a0 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -56,6 +56,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/SmallVectorMemoryBuffer.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/Timer.h" #include "llvm/Support/VirtualFileSystem.h" @@ -1238,10 +1239,10 @@ class PrettyStackTraceBuildModule : public llvm::PrettyStackTraceEntry { }; } // namespace -bool CompilerInstance::compileModule(SourceLocation ImportLoc, - StringRef ModuleName, - StringRef ModuleFileName, - CompilerInstance &Instance) { +std::unique_ptr<llvm::MemoryBuffer> +CompilerInstance::compileModule(SourceLocation ImportLoc, StringRef ModuleName, + StringRef ModuleFileName, + CompilerInstance &Instance) { PrettyStackTraceBuildModule CrashInfo(ModuleName, ModuleFileName); llvm::TimeTraceScope TimeScope("Module Compile", ModuleName); @@ -1250,18 +1251,22 @@ bool CompilerInstance::compileModule(SourceLocation ImportLoc, if (getModuleCache().getInMemoryModuleCache().isPCMFinal(ModuleFileName)) { getDiagnostics().Report(ImportLoc, diag::err_module_rebuild_finalized) << ModuleName; - return false; + return nullptr; } getDiagnostics().Report(ImportLoc, diag::remark_module_build) << ModuleName << ModuleFileName; + SmallString<0> Buffer; + // Execute the action to actually build the module in-place. Use a separate // thread so that we get a stack large enough. bool Crashed = !llvm::CrashRecoveryContext().RunSafelyOnNewStack( [&]() { + auto OS = std::make_unique<llvm::raw_svector_ostream>(Buffer); + std::unique_ptr<FrontendAction> Action = - std::make_unique<GenerateModuleFromModuleMapAction>(); + std::make_unique<GenerateModuleFromModuleMapAction>(std::move(OS)); if (auto WrapGenModuleAction = Instance.getGenModuleActionWrapper()) Action = WrapGenModuleAction(Instance.getFrontendOpts(), @@ -1297,10 +1302,17 @@ bool CompilerInstance::compileModule(SourceLocation ImportLoc, setBuildGlobalModuleIndex(true); } - // If \p AllowPCMWithCompilerErrors is set return 'success' even if errors + if (Crashed) + return nullptr; + + // Unless \p AllowPCMWithCompilerErrors is set, return 'failure' if errors // occurred. - return !Instance.getDiagnostics().hasErrorOccurred() || - Instance.getFrontendOpts().AllowPCMWithCompilerErrors; + if (Instance.getDiagnostics().hasErrorOccurred() && + !Instance.getFrontendOpts().AllowPCMWithCompilerErrors) + return nullptr; + + return std::make_unique<llvm::SmallVectorMemoryBuffer>( + std::move(Buffer), Instance.getFrontendOpts().OutputFile); } static OptionalFileEntryRef getPublicModuleMap(FileEntryRef File, @@ -1442,13 +1454,17 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance, SourceLocation ImportLoc, SourceLocation ModuleNameLoc, Module *Module, ModuleFileName ModuleFileName) { + std::unique_ptr<llvm::MemoryBuffer> Buffer; + { auto Instance = ImportingInstance.cloneForModuleCompile( ModuleNameLoc, Module, ModuleFileName); - if (!ImportingInstance.compileModule(ModuleNameLoc, - Module->getTopLevelModuleName(), - ModuleFileName, *Instance)) { + Buffer = ImportingInstance.compileModule(ModuleNameLoc, + Module->getTopLevelModuleName(), + ModuleFileName, *Instance); + + if (!Buffer) { ImportingInstance.getDiagnostics().Report(ModuleNameLoc, diag::err_module_not_built) << Module->Name << SourceRange(ImportLoc, ModuleNameLoc); @@ -1456,6 +1472,16 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance, } } + std::error_code EC = + ImportingInstance.getModuleCache().write(ModuleFileName, *Buffer); + if (EC) { + ImportingInstance.getDiagnostics().Report(ModuleNameLoc, + diag::err_module_not_written) + << Module->Name << ModuleFileName << EC.message() + << SourceRange(ImportLoc, ModuleNameLoc); + return false; + } + // The module is built successfully, we can update its timestamp now. if (ImportingInstance.getPreprocessor() .getHeaderSearchInfo() @@ -2196,8 +2222,9 @@ void CompilerInstance::createModuleFromSource(SourceLocation ImportLoc, // output is nondeterministic (as .pcm files refer to each other by name). // Can this affect the output in any way? SmallString<128> ModuleFileName; + int FD; if (std::error_code EC = llvm::sys::fs::createTemporaryFile( - CleanModuleName, "pcm", ModuleFileName)) { + CleanModuleName, "pcm", FD, ModuleFileName)) { getDiagnostics().Report(ImportLoc, diag::err_fe_unable_to_open_output) << ModuleFileName << EC.message(); return; @@ -2225,12 +2252,14 @@ void CompilerInstance::createModuleFromSource(SourceLocation ImportLoc, Other->DeleteBuiltModules = false; // Build the module, inheriting any modules that we've built locally. - bool Success = compileModule(ImportLoc, ModuleName, ModuleFileName, *Other); - + std::unique_ptr<llvm::MemoryBuffer> Buffer = + compileModule(ImportLoc, ModuleName, ModuleFileName, *Other); BuiltModules = std::move(Other->BuiltModules); - if (Success) { + if (Buffer) { + llvm::raw_fd_ostream OS(FD, /*shouldClose=*/true); BuiltModules[std::string(ModuleName)] = std::string(ModuleFileName); + OS << Buffer->getBuffer(); llvm::sys::RemoveFileOnSignal(ModuleFileName); } } diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index e5eaab0da7adb..42f1ae3d83ed3 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -188,7 +188,8 @@ bool GeneratePCHAction::BeginSourceFileAction(CompilerInstance &CI) { std::vector<std::unique_ptr<ASTConsumer>> GenerateModuleAction::CreateMultiplexConsumer(CompilerInstance &CI, StringRef InFile) { - std::unique_ptr<raw_pwrite_stream> OS = CreateOutputFile(CI, InFile); + if (!OS) + OS = CreateOutputFile(CI, InFile); if (!OS) return {}; diff --git a/clang/lib/Serialization/ModuleCache.cpp b/clang/lib/Serialization/ModuleCache.cpp index 658da6e3b7145..6a1fe5e635cd8 100644 --- a/clang/lib/Serialization/ModuleCache.cpp +++ b/clang/lib/Serialization/ModuleCache.cpp @@ -101,6 +101,39 @@ void clang::maybePruneImpl(StringRef Path, time_t PruneInterval, } } +std::error_code clang::writeImpl(StringRef Path, llvm::MemoryBufferRef Buffer) { + StringRef Extension = llvm::sys::path::extension(Path); + SmallString<128> ModelPath = StringRef(Path).drop_back(Extension.size()); + ModelPath += "-%%%%%%%%"; + ModelPath += Extension; + ModelPath += ".tmp"; + + std::error_code EC; + int FD; + SmallString<128> TmpPath; + if ((EC = llvm::sys::fs::createUniqueFile(ModelPath, FD, TmpPath))) { + if (EC != std::errc::no_such_file_or_directory) + return EC; + + StringRef Dir = llvm::sys::path::parent_path(Path); + if (std::error_code InnerEC = llvm::sys::fs::create_directories(Dir)) + return InnerEC; + + if ((EC = llvm::sys::fs::createUniqueFile(ModelPath, FD, TmpPath))) + return EC; + } + + { + llvm::raw_fd_ostream OS(FD, /*shouldClose=*/true); + OS << Buffer.getBuffer(); + } + + if ((EC = llvm::sys::fs::rename(TmpPath, Path))) + return EC; + + return {}; +} + namespace { class CrossProcessModuleCache : public ModuleCache { InMemoryModuleCache InMemory; @@ -157,6 +190,13 @@ class CrossProcessModuleCache : public ModuleCache { maybePruneImpl(Path, PruneInterval, PruneAfter); } + std::error_code write(StringRef Path, llvm::MemoryBufferRef Buffer) override { + // This is a compiler-internal input/output, let's bypass the sandbox. + auto BypassSandbox = llvm::sys::sandbox::scopedDisable(); + + return writeImpl(Path, Buffer); + } + InMemoryModuleCache &getInMemoryModuleCache() override { return InMemory; } const InMemoryModuleCache &getInMemoryModuleCache() const override { return InMemory; >From eaa8b5e9260c7e4303500865935e7604f03751b0 Mon Sep 17 00:00:00 2001 From: Jan Svoboda <[email protected]> Date: Fri, 27 Mar 2026 09:48:29 -0700 Subject: [PATCH 2/6] Add missing include --- clang/include/clang/Serialization/ModuleCache.h | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/include/clang/Serialization/ModuleCache.h b/clang/include/clang/Serialization/ModuleCache.h index 4fced900bbdcb..9ea4d84380660 100644 --- a/clang/include/clang/Serialization/ModuleCache.h +++ b/clang/include/clang/Serialization/ModuleCache.h @@ -12,6 +12,7 @@ #include "clang/Basic/LLVM.h" #include <ctime> +#include <system_error> namespace llvm { class MemoryBufferRef; >From 0db177cdd9feebe3d74603055c4b79a332aceb6d Mon Sep 17 00:00:00 2001 From: Jan Svoboda <[email protected]> Date: Mon, 30 Mar 2026 15:15:00 -0700 Subject: [PATCH 3/6] [clang] Add ExternalSubmoduleSource interface to ModuleMap Introduce an abstract ExternalSubmoduleSource class that allows an external consumer (e.g. ASTReader) to demand-load individual submodule Module objects on first lookup rather than materialising the entire submodule tree upfront. The interface exposes a single hook: virtual void loadSubmodule(Module *Parent, StringRef Name) = 0; which is called by ModuleMap just before the two internal qualified- lookup chokepoints attempt findSubmodule(): * lookupModuleQualified() -- reached by every dotted import path component (e.g. "NSString" when resolving Foundation.NSString) via lookupModuleUnqualified(), resolveModuleId(), and CompilerInstance::loadModule(). * findOrInferSubmodule() -- the other path that searches for an existing child before potentially inferring a new one. Both sites are already the canonical funnels for external callers; no other code directly walks Module::findSubmodule() for cross-file purposes. The external source is registered via ModuleMap::setExternalSubmoduleSource(). No functional change in this commit: ExternalSource is null by default so all code paths are identical to before. A subsequent commit will implement the interface in ASTReader and perform the lazy scan-and- index pass during ReadSubmoduleBlock(). Co-Authored-By: Claude Sonnet 4.6 <[email protected]> --- clang/include/clang/Lex/ModuleMap.h | 24 ++++++++++++++++++++++++ clang/lib/Lex/ModuleMap.cpp | 6 ++++++ 2 files changed, 30 insertions(+) diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h index 570a68c37fac4..1d77a1087904b 100644 --- a/clang/include/clang/Lex/ModuleMap.h +++ b/clang/include/clang/Lex/ModuleMap.h @@ -44,6 +44,22 @@ class FileManager; class HeaderSearch; class SourceManager; +/// Abstract interface for demand-loading submodule data from external storage +/// (e.g., a precompiled module file). Registered with \c ModuleMap via +/// \c setExternalSubmoduleSource() and called from qualified lookup paths +/// before falling back to the in-memory module tree, enabling lazy +/// construction of \c Module objects for submodules that may never be needed. +class ExternalSubmoduleSource { +public: + virtual ~ExternalSubmoduleSource() = default; + + /// Called when a lookup for \p Name as an immediate child of \p Parent is + /// about to be performed but has not yet found a result in the in-memory + /// module tree. If external storage knows of such a child, it should create + /// the \c Module object and register it with \p Parent before returning. + virtual void loadSubmodule(Module *Parent, StringRef Name) = 0; +}; + /// A mechanism to observe the actions of the module map loader as it /// reads module map files. class ModuleMapCallbacks { @@ -109,6 +125,9 @@ class ModuleMap { /// The number of modules we have created in total. unsigned NumCreatedModules = 0; + /// Optional external source for on-demand submodule loading. + ExternalSubmoduleSource *ExternalSource = nullptr; + /// In case a module has a export_as entry, it might have a pending link /// name to be determined if that module is imported. llvm::StringMap<llvm::StringSet<>> PendingLinkAsModule; @@ -122,6 +141,11 @@ class ModuleMap { /// information is available or add it to a pending list otherwise. void addLinkAsDependency(Module *Mod); + /// Set the external source used for on-demand submodule loading. + void setExternalSubmoduleSource(ExternalSubmoduleSource *Source) { + ExternalSource = Source; + } + /// Flags describing the role of a module header. enum ModuleHeaderRole { /// This header is normally included in the module. diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index 6c991430cb08b..bd12b796d6eb4 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -829,6 +829,9 @@ Module *ModuleMap::findModule(StringRef Name) const { } Module *ModuleMap::findOrInferSubmodule(Module *Parent, StringRef Name) { + if (ExternalSource) + ExternalSource->loadSubmodule(Parent, Name); + if (Module *SubM = Parent->findSubmodule(Name)) return SubM; if (!Parent->InferSubmodules) @@ -858,6 +861,9 @@ Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) const{ if (!Context) return findModule(Name); + if (ExternalSource) + ExternalSource->loadSubmodule(Context, Name); + return Context->findSubmodule(Name); } >From 708a9ddfdcc71c3b0555f0f2ab836cbcd279047a Mon Sep 17 00:00:00 2001 From: Jan Svoboda <[email protected]> Date: Mon, 30 Mar 2026 15:48:57 -0700 Subject: [PATCH 4/6] [clang] Record per-submodule scan index in ModuleFile during PCM load Add three parallel arrays and a saved bitstream cursor to ModuleFile that together form a lightweight index over the SUBMODULE_BLOCK for use by future on-demand submodule loading: SubmoduleCursor - BitstreamCursor saved immediately after reading SUBMODULE_METADATA, positioned for JumpToBit(). SubmoduleOffsets - Bit position of each SUBMODULE_DEFINITION record, indexed by 0-based file-local submodule index (FileIdx = GlobalID - NUM_PREDEF - BaseSubmoduleID). SubmoduleParentIDs - File-local index of each submodule's parent; uint32_t(-1) means "no parent" (top-level module). SubmoduleNames - StringRef name of each submodule, pointing into the PCM's memory-mapped buffer (no heap allocation). These are populated in ReadSubmoduleBlock() alongside the existing eager Module-object creation. A RecordStartBit variable is captured before each advanceSkippingSubblocks() call so that SUBMODULE_DEFINITION handlers know the exact bit offset of their record header. The local ID stored in Record[0] of a SUBMODULE_DEFINITION record is not 1-based; it starts at LocalBaseSubmoduleID (recorded in SUBMODULE_METADATA as Record[1]). The correct file-local 0-based index is therefore derived from the global ID: FileIdx = (GlobalID - NUM_PREDEF_SUBMODULE_IDS) - F.BaseSubmoduleID No functional change in this commit: Module objects are still created eagerly for every submodule. The index data collected here will be consumed by ASTReader::loadSubmodule() in the next commit to enable demand-loading individual submodules without reading the entire block. Co-Authored-By: Claude Sonnet 4.6 <[email protected]> --- .../include/clang/Serialization/ModuleFile.h | 19 ++++++++++ clang/lib/Serialization/ASTReader.cpp | 35 +++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h index 303bd65a8aad0..decc1f279b0e4 100644 --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -434,6 +434,25 @@ class ModuleFile { /// Remapping table for submodule IDs in this module. ContinuousRangeMap<uint32_t, int, 2> SubmoduleRemap; + /// Cursor into the submodule block, saved immediately after reading + /// SUBMODULE_METADATA. Used to seek to individual submodule definition + /// records for on-demand loading via JumpToBit(SubmoduleOffsets[i]). + llvm::BitstreamCursor SubmoduleCursor; + + /// Bit position of each submodule's SUBMODULE_DEFINITION record within + /// the submodule block bitstream, indexed by (local_id - 1). + SmallVector<uint64_t, 0> SubmoduleOffsets; + + /// Raw local submodule ID of each submodule's parent within this module + /// file, indexed by (local_id - 1). 0 means the submodule is the + /// top-level module of this file (no parent). + SmallVector<uint32_t, 0> SubmoduleParentIDs; + + /// Name of each submodule as written in the module map, indexed by + /// (local_id - 1). These StringRefs point into the PCM's memory-mapped + /// buffer and carry no heap allocation cost. + SmallVector<StringRef, 0> SubmoduleNames; + // === Selectors === /// The number of selectors new to this file. diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 7c3a6fceb3623..9923daa43a18c 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -6259,7 +6259,12 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, bool First = true; Module *CurrentModule = nullptr; RecordData Record; + // RecordStartBit tracks the bitstream position just before each call to + // advanceSkippingSubblocks(), so that SUBMODULE_DEFINITION handlers can + // record the seek-to offset for later on-demand re-reading. + uint64_t RecordStartBit = 0; while (true) { + RecordStartBit = F.Stream.GetCurrentBitNo(); Expected<llvm::BitstreamEntry> MaybeEntry = F.Stream.advanceSkippingSubblocks(); if (!MaybeEntry) @@ -6311,6 +6316,24 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned Idx = 0; SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[Idx++]); SubmoduleID Parent = getGlobalSubmoduleID(F, Record[Idx++]); + + // Record scan-index data for this definition. FileIdx is the 0-based + // offset of this submodule within F's local range, derived from the + // global ID. ParentFileIdx uses uint32_t(-1) as a sentinel for "no + // parent" (top-level module of this file). + if (!F.SubmoduleOffsets.empty()) { + uint32_t FileIdx = + (GlobalID - NUM_PREDEF_SUBMODULE_IDS) - F.BaseSubmoduleID; + assert(FileIdx < F.SubmoduleOffsets.size() && + "submodule file index out of range"); + uint32_t ParentFileIdx = + (Parent < NUM_PREDEF_SUBMODULE_IDS) + ? uint32_t(-1) + : (Parent - NUM_PREDEF_SUBMODULE_IDS) - F.BaseSubmoduleID; + F.SubmoduleOffsets[FileIdx] = RecordStartBit; + F.SubmoduleParentIDs[FileIdx] = ParentFileIdx; + F.SubmoduleNames[FileIdx] = Name; + } Module::ModuleKind Kind = (Module::ModuleKind)Record[Idx++]; SourceLocation DefinitionLoc = ReadSourceLocation(F, Record[Idx++]); FileID InferredAllowedBy = ReadFileID(F, Record, Idx); @@ -6484,7 +6507,19 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, F.BaseSubmoduleID - LocalBaseSubmoduleID)); SubmodulesLoaded.resize(SubmodulesLoaded.size() + F.LocalNumSubmodules); + + // Pre-allocate per-submodule scan data. These are populated as + // SUBMODULE_DEFINITION records are encountered below, and used by + // the on-demand loader to seek to individual submodule records. + F.SubmoduleOffsets.resize(F.LocalNumSubmodules); + F.SubmoduleParentIDs.resize(F.LocalNumSubmodules); + F.SubmoduleNames.resize(F.LocalNumSubmodules); } + + // Save a copy of the cursor positioned just after SUBMODULE_METADATA. + // On-demand loading uses this to JumpToBit(SubmoduleOffsets[i]) and + // re-read any individual submodule's definition and attribute records. + F.SubmoduleCursor = F.Stream; break; } >From b3167a7a252462f4165b5bc42321b98b90a7e6f9 Mon Sep 17 00:00:00 2001 From: Jan Svoboda <[email protected]> Date: Mon, 30 Mar 2026 16:30:39 -0700 Subject: [PATCH 5/6] [clang][modules] Add on-demand submodule loader to ASTReader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement `ASTReader` as an `ExternalSubmoduleSource` so that future demand-loading of individual submodules is possible without changing the existing eager `ReadSubmoduleBlock()` path. What this commit does --------------------- * Inherits `ASTReader` from the new `ExternalSubmoduleSource` interface. * Adds `SubmoduleGlobalIDs` (reverse map Module* → global SubmoduleID) populated during `ReadSubmoduleBlock()`. * Adds `loadSubmodule(Module *Parent, StringRef Name)` which looks up the parent's owning `ModuleFile`, scans the per-file index arrays (`SubmoduleParentIDs` / `SubmoduleNames`) recorded in the previous commit, and calls `loadSingleSubmodule()` when the child is absent. * Adds `loadSingleSubmodule(ModuleFile &F, uint32_t FileIdx)` which seeks a saved `BitstreamCursor` to the DEFINITION record, re-reads it and all following attribute records (imports, exports, headers, …) for exactly one submodule, and immediately resolves any new `UnresolvedModuleRefs`. * Registers `this` as the `ExternalSubmoduleSource` at the end of each `ReadSubmoduleBlock()` call so that `ModuleMap::lookupModuleQualified()` and `findOrInferSubmodule()` can trigger demand-loading. Re-entrancy guard ----------------- While `ReadSubmoduleBlock()` is actively building the scan index and creating `Module` objects, `findOrCreateModuleFirst()` calls back into `ModuleMap::lookupModuleQualified()`, which would invoke `loadSubmodule()` re-entrantly. Without protection this causes `loadSingleSubmodule()` to read and inline-resolve a submodule's attribute records (imports, exports, …) before `ReadSubmoduleBlock()` also reads them, resulting in duplicate exports and dropped imports. The fix is a `ReadingSubmoduleBlock` boolean guard: `loadSubmodule()` is a no-op whenever `ReadSubmoduleBlock()` is on the call stack. This preserves the existing eager-loading semantics for the current commit while keeping the infrastructure in place for a future commit that will make non-top-level submodule creation truly lazy. No functional change in this commit: all submodules are still eagerly created during PCM loading, exactly as before. Co-Authored-By: Claude Sonnet 4.6 <[email protected]> --- clang/include/clang/Serialization/ASTReader.h | 23 +- clang/lib/Serialization/ASTReader.cpp | 372 ++++++++++++++++++ 2 files changed, 394 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index d6f75e5973c45..dac5eb0d41bf4 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -423,7 +423,8 @@ class ASTReader public ExternalHeaderFileInfoSource, public ExternalSemaSource, public IdentifierInfoLookup, - public ExternalSLocEntrySource + public ExternalSLocEntrySource, + public ExternalSubmoduleSource { public: /// Types of AST files. @@ -803,6 +804,16 @@ class ASTReader /// indicate that the particular submodule ID has not yet been loaded. SmallVector<Module *, 2> SubmodulesLoaded; + /// Reverse map from Module objects to their global SubmoduleID, populated + /// as modules are loaded. Used by loadSubmodule() to locate the owning + /// ModuleFile and file-local index when demand-loading a named child. + llvm::DenseMap<const Module *, serialization::SubmoduleID> SubmoduleGlobalIDs; + + /// True while ReadSubmoduleBlock() is actively processing records. Guards + /// loadSubmodule() from firing re-entrantly and double-processing attribute + /// records (imports, exports, …) that ReadSubmoduleBlock() will also read. + bool ReadingSubmoduleBlock = false; + using GlobalSubmoduleMapType = ContinuousRangeMap<serialization::SubmoduleID, ModuleFile *, 4>; @@ -1607,6 +1618,12 @@ class ASTReader unsigned ClientLoadCapabilities); llvm::Error ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities); + + /// Demand-load the submodule at the given file-local index from \p F, + /// reading its SUBMODULE_DEFINITION and all following attribute records. + /// The parent module must already be present in SubmodulesLoaded. + /// Immediately resolves any UnresolvedModuleRefs added for the new module. + void loadSingleSubmodule(ModuleFile &F, uint32_t FileIdx); static bool ParseLanguageOptions(const RecordData &Record, StringRef ModuleFilename, bool Complain, ASTReaderListener &Listener, @@ -1893,6 +1910,10 @@ class ASTReader /// Make the entities in the given module and any of its (non-explicit) /// submodules visible to name lookup. /// + /// ExternalSubmoduleSource implementation: demand-load a named child of + /// \p Parent by consulting the scan index built during ReadSubmoduleBlock(). + void loadSubmodule(Module *Parent, StringRef Name) override; + /// \param Mod The module whose names should be made visible. /// /// \param NameVisibility The level of visibility to give the names in the diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 9923daa43a18c..27f30b45c33bb 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -6247,6 +6247,13 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, if (llvm::Error Err = F.Stream.EnterSubBlock(SUBMODULE_BLOCK_ID)) return Err; + // Suppress demand-loading via loadSubmodule() while we are actively building + // the scan index and creating Module objects. Without this guard, + // findOrCreateModuleFirst() → lookupModuleQualified() → loadSubmodule() would + // fire re-entrantly and double-process attribute records (imports, exports, + // …) that ReadSubmoduleBlock() is about to read anyway. + llvm::SaveAndRestore<bool> GuardReading(ReadingSubmoduleBlock, true); + ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap(); bool KnowsTopLevelModule = ModMap.findModule(F.ModuleName) != nullptr; // If we don't know the top-level module, there's no point in doing qualified @@ -6277,6 +6284,10 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, return llvm::createStringError(std::errc::illegal_byte_sequence, "malformed block record in AST file"); case llvm::BitstreamEntry::EndBlock: + // Register this reader as the external submodule source so that future + // qualified lookups through ModuleMap can demand-load individual + // submodules from this file's scan index. + ModMap.setExternalSubmoduleSource(this); return llvm::Error::success(); case llvm::BitstreamEntry::Record: // The interesting case. @@ -6423,6 +6434,7 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, DeserializationListener->ModuleRead(GlobalID, CurrentModule); SubmodulesLoaded[GlobalIndex] = CurrentModule; + SubmoduleGlobalIDs[CurrentModule] = GlobalID; // Clear out data that will be replaced by what is in the module file. CurrentModule->LinkLibraries.clear(); @@ -6611,6 +6623,366 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, } } + +void ASTReader::loadSubmodule(Module *Parent, StringRef Name) { + // Don't fire while ReadSubmoduleBlock() is building the scan index — the + // eager loader already creates every module we need, and we would otherwise + // double-process attribute records (imports, exports, …). + if (ReadingSubmoduleBlock) + return; + + // Look up the global ID of the parent module. + auto It = SubmoduleGlobalIDs.find(Parent); + if (It == SubmoduleGlobalIDs.end()) + return; // Parent not from this reader; nothing to demand-load. + + serialization::SubmoduleID ParentGlobalID = It->second; + ModuleFile *F = GlobalSubmoduleMap.find(ParentGlobalID)->second; + + // Convert the parent's global ID to its file-local 0-based index. + uint32_t ParentFileIdx = + (ParentGlobalID - NUM_PREDEF_SUBMODULE_IDS) - F->BaseSubmoduleID; + + // Scan the index for a child of ParentFileIdx whose name matches. + for (uint32_t ChildFileIdx = 0; ChildFileIdx < F->LocalNumSubmodules; + ++ChildFileIdx) { + if (F->SubmoduleParentIDs[ChildFileIdx] != ParentFileIdx || + F->SubmoduleNames[ChildFileIdx] != Name) + continue; + + // Compute the global index in SubmodulesLoaded. + serialization::SubmoduleID ChildGlobalID = + F->BaseSubmoduleID + ChildFileIdx + NUM_PREDEF_SUBMODULE_IDS; + uint32_t ChildGlobalIndex = ChildGlobalID - NUM_PREDEF_SUBMODULE_IDS; + + // Only load if not already present. + if (!SubmodulesLoaded[ChildGlobalIndex]) + loadSingleSubmodule(*F, ChildFileIdx); + return; + } +} + +void ASTReader::loadSingleSubmodule(ModuleFile &F, uint32_t FileIdx) { + assert(FileIdx < F.LocalNumSubmodules && "FileIdx out of range"); + assert(F.SubmoduleOffsets[FileIdx] != 0 || FileIdx == 0); + + ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap(); + SmallString<0> PathBuf; + PathBuf.reserve(256); + + // Ensure the parent is loaded before we load the child. + uint32_t ParentFileIdx = F.SubmoduleParentIDs[FileIdx]; + Module *ParentModule = nullptr; + if (ParentFileIdx != uint32_t(-1)) { + serialization::SubmoduleID ParentGlobalID = + F.BaseSubmoduleID + ParentFileIdx + NUM_PREDEF_SUBMODULE_IDS; + uint32_t ParentGlobalIndex = ParentGlobalID - NUM_PREDEF_SUBMODULE_IDS; + if (!SubmodulesLoaded[ParentGlobalIndex]) + loadSingleSubmodule(F, ParentFileIdx); + ParentModule = SubmodulesLoaded[ParentGlobalIndex]; + } + + // Seek the saved cursor to the SUBMODULE_DEFINITION record for FileIdx. + llvm::BitstreamCursor Cursor = F.SubmoduleCursor; + if (llvm::Error Err = Cursor.JumpToBit(F.SubmoduleOffsets[FileIdx])) { + Error(std::move(Err)); + return; + } + + // Track how many UnresolvedModuleRefs were pending before this load so we + // can inline-resolve the newly added entries at the end. + size_t OldUnresolvedSize = UnresolvedModuleRefs.size(); + + RecordData Record; + Module *CurrentModule = nullptr; + + while (true) { + Expected<llvm::BitstreamEntry> MaybeEntry = + Cursor.advanceSkippingSubblocks(); + if (!MaybeEntry) { + Error(MaybeEntry.takeError()); + return; + } + llvm::BitstreamEntry Entry = MaybeEntry.get(); + + if (Entry.Kind == llvm::BitstreamEntry::EndBlock) + break; + if (Entry.Kind != llvm::BitstreamEntry::Record) { + Error("malformed submodule block record in AST file"); + return; + } + + StringRef Blob; + Record.clear(); + Expected<unsigned> MaybeKind = Cursor.readRecord(Entry.ID, Record, &Blob); + if (!MaybeKind) { + Error(MaybeKind.takeError()); + return; + } + unsigned Kind = MaybeKind.get(); + + // If we've finished reading this submodule's definition and attributes and + // hit the next submodule's definition record, we're done. + if (Kind == SUBMODULE_DEFINITION && CurrentModule) + break; + + switch (Kind) { + default: + break; + + case SUBMODULE_DEFINITION: { + if (Record.size() < 13) { + Error("malformed module definition"); + return; + } + unsigned Idx = 0; + serialization::SubmoduleID GlobalID = + getGlobalSubmoduleID(F, Record[Idx++]); + serialization::SubmoduleID Parent = + getGlobalSubmoduleID(F, Record[Idx++]); + Module::ModuleKind MKind = (Module::ModuleKind)Record[Idx++]; + SourceLocation DefinitionLoc = ReadSourceLocation(F, Record[Idx++]); + FileID InferredAllowedBy = ReadFileID(F, Record, Idx); + bool IsFramework = Record[Idx++]; + bool IsExplicit = Record[Idx++]; + bool IsSystem = Record[Idx++]; + bool IsExternC = Record[Idx++]; + bool InferSubmodules = Record[Idx++]; + bool InferExplicitSubmodules = Record[Idx++]; + bool InferExportWildcard = Record[Idx++]; + bool ConfigMacrosExhaustive = Record[Idx++]; + bool ModuleMapIsPrivate = Record[Idx++]; + bool NamedModuleHasInit = Record[Idx++]; + + // Bypass lookupModuleQualified here to avoid re-entrant calls back into + // loadSubmodule(). Check directly in the parent's SubModules list, then + // fall back to createModule() if not found. + Module *ExistingMod = ParentModule ? ParentModule->findSubmodule(Blob) + : ModMap.findModule(Blob); + CurrentModule = ExistingMod + ? ExistingMod + : ModMap.createModule(Blob, ParentModule, IsFramework, + IsExplicit); + + serialization::SubmoduleID GlobalIndex = + GlobalID - NUM_PREDEF_SUBMODULE_IDS; + if (GlobalIndex >= SubmodulesLoaded.size() || + SubmodulesLoaded[GlobalIndex]) { + // Already loaded (race or error); bail out. + CurrentModule = nullptr; + return; + } + + CurrentModule->Kind = MKind; + CurrentModule->DefinitionLoc = DefinitionLoc; + CurrentModule->Signature = F.Signature; + CurrentModule->IsFromModuleFile = true; + if (InferredAllowedBy.isValid()) + ModMap.setInferredModuleAllowedBy(CurrentModule, InferredAllowedBy); + CurrentModule->IsSystem = IsSystem || CurrentModule->IsSystem; + CurrentModule->IsExternC = IsExternC; + CurrentModule->InferSubmodules = InferSubmodules; + CurrentModule->InferExplicitSubmodules = InferExplicitSubmodules; + CurrentModule->InferExportWildcard = InferExportWildcard; + CurrentModule->ConfigMacrosExhaustive = ConfigMacrosExhaustive; + CurrentModule->ModuleMapIsPrivate = ModuleMapIsPrivate; + CurrentModule->NamedModuleHasInit = NamedModuleHasInit; + + if (ParentModule && ParentModule->Directory) + CurrentModule->Directory = ParentModule->Directory; + + if (DeserializationListener) + DeserializationListener->ModuleRead(GlobalID, CurrentModule); + + SubmodulesLoaded[GlobalIndex] = CurrentModule; + SubmoduleGlobalIDs[CurrentModule] = GlobalID; + + CurrentModule->LinkLibraries.clear(); + CurrentModule->ConfigMacros.clear(); + CurrentModule->UnresolvedConflicts.clear(); + CurrentModule->Conflicts.clear(); + CurrentModule->Requirements.clear(); + CurrentModule->MissingHeaders.clear(); + CurrentModule->IsUnimportable = + ParentModule && ParentModule->IsUnimportable; + CurrentModule->IsAvailable = !CurrentModule->IsUnimportable; + break; + } + + case SUBMODULE_UMBRELLA_HEADER: { + if (!CurrentModule) + break; + SmallString<128> RelativePathName; + if (auto Umbrella = ModMap.findUmbrellaHeaderForModule( + CurrentModule, Blob.str(), RelativePathName)) + if (!CurrentModule->getUmbrellaHeaderAsWritten()) + ModMap.setUmbrellaHeaderAsWritten(CurrentModule, *Umbrella, Blob, + RelativePathName); + break; + } + + case SUBMODULE_HEADER: + case SUBMODULE_EXCLUDED_HEADER: + case SUBMODULE_PRIVATE_HEADER: + case SUBMODULE_TEXTUAL_HEADER: + case SUBMODULE_PRIVATE_TEXTUAL_HEADER: + break; + + case SUBMODULE_TOPHEADER: { + if (!CurrentModule) + break; + auto HeaderName = ResolveImportedPath(PathBuf, Blob, F); + CurrentModule->addTopHeaderFilename(*HeaderName); + break; + } + + case SUBMODULE_UMBRELLA_DIR: { + if (!CurrentModule) + break; + auto Dirname = ResolveImportedPath(PathBuf, Blob, F); + if (auto Umbrella = + PP.getFileManager().getOptionalDirectoryRef(*Dirname)) + if (!CurrentModule->getUmbrellaDirAsWritten()) + ModMap.setUmbrellaDirAsWritten(CurrentModule, *Umbrella, Blob, ""); + break; + } + + case SUBMODULE_IMPORTS: + if (!CurrentModule) + break; + for (unsigned I = 0; I != Record.size(); ++I) { + UnresolvedModuleRef Unresolved; + Unresolved.File = &F; + Unresolved.Mod = CurrentModule; + Unresolved.ID = Record[I]; + Unresolved.Kind = UnresolvedModuleRef::Import; + Unresolved.IsWildcard = false; + UnresolvedModuleRefs.push_back(Unresolved); + } + break; + + case SUBMODULE_AFFECTING_MODULES: + if (!CurrentModule) + break; + for (unsigned I = 0; I != Record.size(); ++I) { + UnresolvedModuleRef Unresolved; + Unresolved.File = &F; + Unresolved.Mod = CurrentModule; + Unresolved.ID = Record[I]; + Unresolved.Kind = UnresolvedModuleRef::Affecting; + Unresolved.IsWildcard = false; + UnresolvedModuleRefs.push_back(Unresolved); + } + break; + + case SUBMODULE_EXPORTS: + if (!CurrentModule) + break; + for (unsigned I = 0; I + 1 < Record.size(); I += 2) { + UnresolvedModuleRef Unresolved; + Unresolved.File = &F; + Unresolved.Mod = CurrentModule; + Unresolved.ID = Record[I]; + Unresolved.Kind = UnresolvedModuleRef::Export; + Unresolved.IsWildcard = Record[I + 1]; + UnresolvedModuleRefs.push_back(Unresolved); + } + if (CurrentModule) + CurrentModule->UnresolvedExports.clear(); + break; + + case SUBMODULE_REQUIRES: + if (!CurrentModule) + break; + CurrentModule->addRequirement(Blob, Record[0], PP.getLangOpts(), + PP.getTargetInfo()); + break; + + case SUBMODULE_LINK_LIBRARY: + if (!CurrentModule) + break; + ModMap.resolveLinkAsDependencies(CurrentModule); + CurrentModule->LinkLibraries.push_back( + Module::LinkLibrary(std::string(Blob), Record[0])); + break; + + case SUBMODULE_CONFIG_MACRO: + if (!CurrentModule) + break; + CurrentModule->ConfigMacros.push_back(Blob.str()); + break; + + case SUBMODULE_CONFLICT: { + if (!CurrentModule) + break; + UnresolvedModuleRef Unresolved; + Unresolved.File = &F; + Unresolved.Mod = CurrentModule; + Unresolved.ID = Record[0]; + Unresolved.Kind = UnresolvedModuleRef::Conflict; + Unresolved.IsWildcard = false; + Unresolved.String = Blob; + UnresolvedModuleRefs.push_back(Unresolved); + break; + } + + case SUBMODULE_INITIALIZERS: + if (!CurrentModule || !ContextObj) + break; + if (!F.StandardCXXModule || F.Kind == MK_MainFile) { + SmallVector<GlobalDeclID, 16> Inits; + for (unsigned I = 0; I < Record.size(); /*in loop*/) + Inits.push_back(ReadDeclID(F, Record, I)); + ContextObj->addLazyModuleInitializers(CurrentModule, Inits); + } + break; + + case SUBMODULE_EXPORT_AS: + if (!CurrentModule) + break; + CurrentModule->ExportAsModule = Blob.str(); + ModMap.addLinkAsDependency(CurrentModule); + break; + } + } + + // Inline-resolve any UnresolvedModuleRefs that were added during this load. + // This handles the case where loadSingleSubmodule is called outside an + // active deserialization chain (NumCurrentElementsDeserializing == 0), + // where FinishedDeserializing() would not otherwise run. + for (size_t I = OldUnresolvedSize; I < UnresolvedModuleRefs.size(); ++I) { + UnresolvedModuleRef &Unresolved = UnresolvedModuleRefs[I]; + serialization::SubmoduleID GlobalID = + getGlobalSubmoduleID(*Unresolved.File, Unresolved.ID); + Module *ResolvedMod = getSubmodule(GlobalID); + + switch (Unresolved.Kind) { + case UnresolvedModuleRef::Conflict: + if (ResolvedMod) { + Module::Conflict Conflict; + Conflict.Other = ResolvedMod; + Conflict.Message = Unresolved.String.str(); + Unresolved.Mod->Conflicts.push_back(Conflict); + } + break; + case UnresolvedModuleRef::Import: + if (ResolvedMod) + Unresolved.Mod->Imports.insert(ResolvedMod); + break; + case UnresolvedModuleRef::Affecting: + if (ResolvedMod) + Unresolved.Mod->AffectingClangModules.insert(ResolvedMod); + break; + case UnresolvedModuleRef::Export: + if (ResolvedMod || Unresolved.IsWildcard) + Unresolved.Mod->Exports.push_back( + Module::ExportDecl(ResolvedMod, Unresolved.IsWildcard)); + break; + } + } + UnresolvedModuleRefs.resize(OldUnresolvedSize); +} + /// Parse the record that corresponds to a LangOptions data /// structure. /// >From b75b28fd98445d976057673fbfffc390f504d949 Mon Sep 17 00:00:00 2001 From: Jan Svoboda <[email protected]> Date: Tue, 31 Mar 2026 08:20:23 -0700 Subject: [PATCH 6/6] WIP --- clang/include/clang/Serialization/ASTReader.h | 4 ++ clang/lib/Serialization/ASTReader.cpp | 53 ++++++++++++++++--- 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index dac5eb0d41bf4..78f15dfcbda3a 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -2457,6 +2457,10 @@ class ASTReader /// Module *getSubmodule(serialization::SubmoduleID GlobalID); + /// Like getSubmodule(), but demand-loads the module if it has not been + /// materialised yet (i.e., it was deferred during lazy ReadSubmoduleBlock). + Module *getOrLoadSubmodule(serialization::SubmoduleID GlobalID); + /// Retrieve the module that corresponds to the given module ID. /// /// Note: overrides method in ExternalASTSource diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 27f30b45c33bb..55912b6bbb46b 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -2406,7 +2406,7 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d, // This header is part of a module. Associate it with the module to enable // implicit module import. SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID); - Module *Mod = Reader.getSubmodule(GlobalSMID); + Module *Mod = Reader.getOrLoadSubmodule(GlobalSMID); ModuleMap &ModMap = Reader.getPreprocessor().getHeaderSearchInfo().getModuleMap(); @@ -2670,14 +2670,14 @@ void ASTReader::resolvePendingMacro(IdentifierInfo *II, for (auto &MMR : ModuleMacros) { Overrides.clear(); for (unsigned ModID : MMR.Overrides) { - Module *Mod = getSubmodule(ModID); + Module *Mod = getOrLoadSubmodule(ModID); auto *Macro = PP.getModuleMacro(Mod, II); assert(Macro && "missing definition for overridden macro"); Overrides.push_back(Macro); } bool Inserted = false; - Module *Owner = getSubmodule(MMR.SubModID); + Module *Owner = getOrLoadSubmodule(MMR.SubModID); PP.addModuleMacro(Owner, II, MMR.MI, Overrides, Inserted); } } @@ -5075,7 +5075,7 @@ ASTReader::ASTReadResult ASTReader::ReadAST(ModuleFileName FileName, for (unsigned I = 0, N = UnresolvedModuleRefs.size(); I != N; ++I) { UnresolvedModuleRef &Unresolved = UnresolvedModuleRefs[I]; SubmoduleID GlobalID = getGlobalSubmoduleID(*Unresolved.File,Unresolved.ID); - Module *ResolvedMod = getSubmodule(GlobalID); + Module *ResolvedMod = getOrLoadSubmodule(GlobalID); switch (Unresolved.Kind) { case UnresolvedModuleRef::Conflict: @@ -5707,7 +5707,7 @@ void ASTReader::InitializeContext() { // Re-export any modules that were imported by a non-module AST file. // FIXME: This does not make macro-only imports visible again. for (auto &Import : PendingImportedModules) { - if (Module *Imported = getSubmodule(Import.ID)) { + if (Module *Imported = getOrLoadSubmodule(Import.ID)) { makeModuleVisible(Imported, Module::AllVisible, /*ImportLoc=*/Import.ImportLoc); if (Import.ImportLoc.isValid()) @@ -6264,6 +6264,10 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, : &ModuleMap::findOrCreateModuleFirst; bool First = true; + // SeenFirstDef: true after the root module of this PCM file has been eagerly + // loaded. All subsequent SUBMODULE_DEFINITION records (descendants of the + // root) are demand-loaded lazily via loadSingleSubmodule(). + bool SeenFirstDef = false; Module *CurrentModule = nullptr; RecordData Record; // RecordStartBit tracks the bitstream position just before each call to @@ -6345,6 +6349,16 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, F.SubmoduleParentIDs[FileIdx] = ParentFileIdx; F.SubmoduleNames[FileIdx] = Name; } + + // After the root module of this PCM file has been eagerly loaded, skip + // creating Module objects for all descendant submodules. Their scan-index + // entries (above) are enough for loadSingleSubmodule() to materialise + // them on demand when they are first referenced (e.g. via a header-file + // lookup or an explicit import). + if (SeenFirstDef) { + CurrentModule = nullptr; + continue; + } Module::ModuleKind Kind = (Module::ModuleKind)Record[Idx++]; SourceLocation DefinitionLoc = ReadSourceLocation(F, Record[Idx++]); FileID InferredAllowedBy = ReadFileID(F, Record, Idx); @@ -6452,6 +6466,9 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, CurrentModule->IsUnimportable = ParentModule && ParentModule->IsUnimportable; CurrentModule->IsAvailable = !CurrentModule->IsUnimportable; + // Mark that the root module of this PCM file has been loaded; subsequent + // SUBMODULE_DEFINITION records will be deferred to demand-loading. + SeenFirstDef = true; break; } @@ -6954,7 +6971,7 @@ void ASTReader::loadSingleSubmodule(ModuleFile &F, uint32_t FileIdx) { UnresolvedModuleRef &Unresolved = UnresolvedModuleRefs[I]; serialization::SubmoduleID GlobalID = getGlobalSubmoduleID(*Unresolved.File, Unresolved.ID); - Module *ResolvedMod = getSubmodule(GlobalID); + Module *ResolvedMod = getOrLoadSubmodule(GlobalID); switch (Unresolved.Kind) { case UnresolvedModuleRef::Conflict: @@ -9688,7 +9705,7 @@ void ASTReader::UpdateSema() { for (auto &Import : PendingImportedModulesSema) { if (Import.ImportLoc.isInvalid()) continue; - if (Module *Imported = getSubmodule(Import.ID)) { + if (Module *Imported = getOrLoadSubmodule(Import.ID)) { SemaObj->makeModuleVisible(Imported, Import.ImportLoc); } } @@ -10397,6 +10414,28 @@ Module *ASTReader::getSubmodule(SubmoduleID GlobalID) { return SubmodulesLoaded[GlobalID - NUM_PREDEF_SUBMODULE_IDS]; } +Module *ASTReader::getOrLoadSubmodule(SubmoduleID GlobalID) { + if (GlobalID < NUM_PREDEF_SUBMODULE_IDS) + return nullptr; + if (GlobalID > SubmodulesLoaded.size()) { + Error("submodule ID out of range in AST file"); + return nullptr; + } + uint32_t GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS; + if (!SubmodulesLoaded[GlobalIndex] && !ReadingSubmoduleBlock) { + auto It = GlobalSubmoduleMap.find(GlobalID); + if (It != GlobalSubmoduleMap.end()) { + ModuleFile *F = It->second; + uint32_t FileIdx = + (GlobalID - NUM_PREDEF_SUBMODULE_IDS) - F->BaseSubmoduleID; + if (FileIdx < F->LocalNumSubmodules && !F->SubmoduleOffsets.empty() && + F->SubmoduleOffsets[FileIdx] != 0) + loadSingleSubmodule(*F, FileIdx); + } + } + return SubmodulesLoaded[GlobalIndex]; +} + Module *ASTReader::getModule(unsigned ID) { return getSubmodule(ID); } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
