https://github.com/jansvoboda11 updated 
https://github.com/llvm/llvm-project/pull/188877

>From 196117b66a6bf6cf282394673c63d5ebd8ef2c82 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <[email protected]>
Date: Fri, 20 Mar 2026 20:54:01 -0700
Subject: [PATCH 1/6] [clang] Introduce `ModuleCache::write()`

---
 .../clang/Basic/DiagnosticCommonKinds.td      |  1 +
 .../include/clang/Frontend/CompilerInstance.h | 10 +--
 .../include/clang/Frontend/FrontendActions.h  | 12 ++++
 .../include/clang/Serialization/ModuleCache.h | 10 ++-
 .../InProcessModuleCache.cpp                  |  6 ++
 clang/lib/Frontend/CompilerInstance.cpp       | 61 ++++++++++++++-----
 clang/lib/Frontend/FrontendActions.cpp        |  3 +-
 clang/lib/Serialization/ModuleCache.cpp       | 40 ++++++++++++
 8 files changed, 121 insertions(+), 22 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td 
b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index cb267e3ee05c1..b5f99606789fe 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -103,6 +103,7 @@ def err_deleted_non_function : Error<
   "only functions can have deleted definitions">;
 def err_module_not_found : Error<"module '%0' not found">, DefaultFatal;
 def err_module_not_built : Error<"could not build module '%0'">, DefaultFatal;
+def err_module_not_written : Error<"could not write module file for '%0' to 
'%1': %2">, DefaultFatal;
 def err_module_build_disabled: Error<
   "module '%0' is needed but has not been provided, and implicit use of module 
"
   "files is disabled">, DefaultFatal;
diff --git a/clang/include/clang/Frontend/CompilerInstance.h 
b/clang/include/clang/Frontend/CompilerInstance.h
index f206d012eacc9..be44817aa5a1b 100644
--- a/clang/include/clang/Frontend/CompilerInstance.h
+++ b/clang/include/clang/Frontend/CompilerInstance.h
@@ -934,12 +934,14 @@ class CompilerInstance : public ModuleLoader {
       std::optional<ThreadSafeCloneConfig> ThreadSafeConfig = std::nullopt);
 
   /// Compile a module file for the given module, using the options
-  /// provided by the importing compiler instance. Returns true if the module
-  /// was built without errors.
+  /// provided by the importing compiler instance. Returns the PCM file in
+  /// a buffer.
   // FIXME: This should be private, but it's called from static non-member
   // functions in the implementation file.
-  bool compileModule(SourceLocation ImportLoc, StringRef ModuleName,
-                     StringRef ModuleFileName, CompilerInstance &Instance);
+  std::unique_ptr<llvm::MemoryBuffer> compileModule(SourceLocation ImportLoc,
+                                                    StringRef ModuleName,
+                                                    StringRef ModuleFileName,
+                                                    CompilerInstance 
&Instance);
 
   ModuleLoadResult loadModule(SourceLocation ImportLoc, ModuleIdPath Path,
                               Module::NameVisibilityKind Visibility,
diff --git a/clang/include/clang/Frontend/FrontendActions.h 
b/clang/include/clang/Frontend/FrontendActions.h
index 87a9f0d4cb06c..c5aff7ae1a713 100644
--- a/clang/include/clang/Frontend/FrontendActions.h
+++ b/clang/include/clang/Frontend/FrontendActions.h
@@ -114,6 +114,15 @@ class GeneratePCHAction : public ASTFrontendAction {
 };
 
 class GenerateModuleAction : public ASTFrontendAction {
+public:
+  /// When \c OS is non-null, uses it for outputting the PCM file instead of
+  /// automatically creating an output file.
+  explicit GenerateModuleAction(std::unique_ptr<raw_pwrite_stream> OS = 
nullptr)
+      : OS(std::move(OS)) {}
+
+private:
+  std::unique_ptr<raw_pwrite_stream> OS;
+
   virtual std::unique_ptr<raw_pwrite_stream>
   CreateOutputFile(CompilerInstance &CI, StringRef InFile) = 0;
 
@@ -145,6 +154,9 @@ class GenerateInterfaceStubsAction : public 
ASTFrontendAction {
 };
 
 class GenerateModuleFromModuleMapAction : public GenerateModuleAction {
+public:
+  using GenerateModuleAction::GenerateModuleAction;
+
 private:
   bool BeginSourceFileAction(CompilerInstance &CI) override;
 
diff --git a/clang/include/clang/Serialization/ModuleCache.h 
b/clang/include/clang/Serialization/ModuleCache.h
index c6795c5dc358a..4fced900bbdcb 100644
--- a/clang/include/clang/Serialization/ModuleCache.h
+++ b/clang/include/clang/Serialization/ModuleCache.h
@@ -14,6 +14,7 @@
 #include <ctime>
 
 namespace llvm {
+class MemoryBufferRef;
 class AdvisoryLock;
 } // namespace llvm
 
@@ -52,7 +53,11 @@ class ModuleCache {
   virtual InMemoryModuleCache &getInMemoryModuleCache() = 0;
   virtual const InMemoryModuleCache &getInMemoryModuleCache() const = 0;
 
-  // TODO: Virtualize writing/reading PCM files, etc.
+  /// Write the PCM contents to the given path in the module cache.
+  virtual std::error_code write(StringRef Path,
+                                llvm::MemoryBufferRef Buffer) = 0;
+
+  // TODO: Virtualize reading PCM files, etc.
 
   virtual ~ModuleCache() = default;
 };
@@ -65,6 +70,9 @@ std::shared_ptr<ModuleCache> createCrossProcessModuleCache();
 
 /// Shared implementation of `ModuleCache::maybePrune()`.
 void maybePruneImpl(StringRef Path, time_t PruneInterval, time_t PruneAfter);
+
+/// Shared implementation of `ModuleCache::write()`.
+std::error_code writeImpl(StringRef Path, llvm::MemoryBufferRef Buffer);
 } // namespace clang
 
 #endif
diff --git a/clang/lib/DependencyScanning/InProcessModuleCache.cpp 
b/clang/lib/DependencyScanning/InProcessModuleCache.cpp
index cd7385c8f38c2..7bdfae8f3e567 100644
--- a/clang/lib/DependencyScanning/InProcessModuleCache.cpp
+++ b/clang/lib/DependencyScanning/InProcessModuleCache.cpp
@@ -127,6 +127,12 @@ class InProcessModuleCache : public ModuleCache {
     maybePruneImpl(Path, PruneInterval, PruneAfter);
   }
 
+  std::error_code write(StringRef Path, llvm::MemoryBufferRef Buffer) override 
{
+    // FIXME: This could use an in-memory cache to avoid IO, and only write to
+    // disk at the end of the scan.
+    return writeImpl(Path, Buffer);
+  }
+
   InMemoryModuleCache &getInMemoryModuleCache() override { return InMemory; }
   const InMemoryModuleCache &getInMemoryModuleCache() const override {
     return InMemory;
diff --git a/clang/lib/Frontend/CompilerInstance.cpp 
b/clang/lib/Frontend/CompilerInstance.cpp
index 1f1b6701c38df..262bf3484e6a0 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -56,6 +56,7 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/SmallVectorMemoryBuffer.h"
 #include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/VirtualFileSystem.h"
@@ -1238,10 +1239,10 @@ class PrettyStackTraceBuildModule : public 
llvm::PrettyStackTraceEntry {
 };
 } // namespace
 
-bool CompilerInstance::compileModule(SourceLocation ImportLoc,
-                                     StringRef ModuleName,
-                                     StringRef ModuleFileName,
-                                     CompilerInstance &Instance) {
+std::unique_ptr<llvm::MemoryBuffer>
+CompilerInstance::compileModule(SourceLocation ImportLoc, StringRef ModuleName,
+                                StringRef ModuleFileName,
+                                CompilerInstance &Instance) {
   PrettyStackTraceBuildModule CrashInfo(ModuleName, ModuleFileName);
   llvm::TimeTraceScope TimeScope("Module Compile", ModuleName);
 
@@ -1250,18 +1251,22 @@ bool CompilerInstance::compileModule(SourceLocation 
ImportLoc,
   if (getModuleCache().getInMemoryModuleCache().isPCMFinal(ModuleFileName)) {
     getDiagnostics().Report(ImportLoc, diag::err_module_rebuild_finalized)
         << ModuleName;
-    return false;
+    return nullptr;
   }
 
   getDiagnostics().Report(ImportLoc, diag::remark_module_build)
       << ModuleName << ModuleFileName;
 
+  SmallString<0> Buffer;
+
   // Execute the action to actually build the module in-place. Use a separate
   // thread so that we get a stack large enough.
   bool Crashed = !llvm::CrashRecoveryContext().RunSafelyOnNewStack(
       [&]() {
+        auto OS = std::make_unique<llvm::raw_svector_ostream>(Buffer);
+
         std::unique_ptr<FrontendAction> Action =
-            std::make_unique<GenerateModuleFromModuleMapAction>();
+            std::make_unique<GenerateModuleFromModuleMapAction>(std::move(OS));
 
         if (auto WrapGenModuleAction = Instance.getGenModuleActionWrapper())
           Action = WrapGenModuleAction(Instance.getFrontendOpts(),
@@ -1297,10 +1302,17 @@ bool CompilerInstance::compileModule(SourceLocation 
ImportLoc,
     setBuildGlobalModuleIndex(true);
   }
 
-  // If \p AllowPCMWithCompilerErrors is set return 'success' even if errors
+  if (Crashed)
+    return nullptr;
+
+  // Unless \p AllowPCMWithCompilerErrors is set, return 'failure' if errors
   // occurred.
-  return !Instance.getDiagnostics().hasErrorOccurred() ||
-         Instance.getFrontendOpts().AllowPCMWithCompilerErrors;
+  if (Instance.getDiagnostics().hasErrorOccurred() &&
+      !Instance.getFrontendOpts().AllowPCMWithCompilerErrors)
+    return nullptr;
+
+  return std::make_unique<llvm::SmallVectorMemoryBuffer>(
+      std::move(Buffer), Instance.getFrontendOpts().OutputFile);
 }
 
 static OptionalFileEntryRef getPublicModuleMap(FileEntryRef File,
@@ -1442,13 +1454,17 @@ static bool compileModuleImpl(CompilerInstance 
&ImportingInstance,
                               SourceLocation ImportLoc,
                               SourceLocation ModuleNameLoc, Module *Module,
                               ModuleFileName ModuleFileName) {
+  std::unique_ptr<llvm::MemoryBuffer> Buffer;
+
   {
     auto Instance = ImportingInstance.cloneForModuleCompile(
         ModuleNameLoc, Module, ModuleFileName);
 
-    if (!ImportingInstance.compileModule(ModuleNameLoc,
-                                         Module->getTopLevelModuleName(),
-                                         ModuleFileName, *Instance)) {
+    Buffer = ImportingInstance.compileModule(ModuleNameLoc,
+                                             Module->getTopLevelModuleName(),
+                                             ModuleFileName, *Instance);
+
+    if (!Buffer) {
       ImportingInstance.getDiagnostics().Report(ModuleNameLoc,
                                                 diag::err_module_not_built)
           << Module->Name << SourceRange(ImportLoc, ModuleNameLoc);
@@ -1456,6 +1472,16 @@ static bool compileModuleImpl(CompilerInstance 
&ImportingInstance,
     }
   }
 
+  std::error_code EC =
+      ImportingInstance.getModuleCache().write(ModuleFileName, *Buffer);
+  if (EC) {
+    ImportingInstance.getDiagnostics().Report(ModuleNameLoc,
+                                              diag::err_module_not_written)
+        << Module->Name << ModuleFileName << EC.message()
+        << SourceRange(ImportLoc, ModuleNameLoc);
+    return false;
+  }
+
   // The module is built successfully, we can update its timestamp now.
   if (ImportingInstance.getPreprocessor()
           .getHeaderSearchInfo()
@@ -2196,8 +2222,9 @@ void 
CompilerInstance::createModuleFromSource(SourceLocation ImportLoc,
   // output is nondeterministic (as .pcm files refer to each other by name).
   // Can this affect the output in any way?
   SmallString<128> ModuleFileName;
+  int FD;
   if (std::error_code EC = llvm::sys::fs::createTemporaryFile(
-          CleanModuleName, "pcm", ModuleFileName)) {
+          CleanModuleName, "pcm", FD, ModuleFileName)) {
     getDiagnostics().Report(ImportLoc, diag::err_fe_unable_to_open_output)
         << ModuleFileName << EC.message();
     return;
@@ -2225,12 +2252,14 @@ void 
CompilerInstance::createModuleFromSource(SourceLocation ImportLoc,
   Other->DeleteBuiltModules = false;
 
   // Build the module, inheriting any modules that we've built locally.
-  bool Success = compileModule(ImportLoc, ModuleName, ModuleFileName, *Other);
-
+  std::unique_ptr<llvm::MemoryBuffer> Buffer =
+      compileModule(ImportLoc, ModuleName, ModuleFileName, *Other);
   BuiltModules = std::move(Other->BuiltModules);
 
-  if (Success) {
+  if (Buffer) {
+    llvm::raw_fd_ostream OS(FD, /*shouldClose=*/true);
     BuiltModules[std::string(ModuleName)] = std::string(ModuleFileName);
+    OS << Buffer->getBuffer();
     llvm::sys::RemoveFileOnSignal(ModuleFileName);
   }
 }
diff --git a/clang/lib/Frontend/FrontendActions.cpp 
b/clang/lib/Frontend/FrontendActions.cpp
index e5eaab0da7adb..42f1ae3d83ed3 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -188,7 +188,8 @@ bool 
GeneratePCHAction::BeginSourceFileAction(CompilerInstance &CI) {
 std::vector<std::unique_ptr<ASTConsumer>>
 GenerateModuleAction::CreateMultiplexConsumer(CompilerInstance &CI,
                                               StringRef InFile) {
-  std::unique_ptr<raw_pwrite_stream> OS = CreateOutputFile(CI, InFile);
+  if (!OS)
+    OS = CreateOutputFile(CI, InFile);
   if (!OS)
     return {};
 
diff --git a/clang/lib/Serialization/ModuleCache.cpp 
b/clang/lib/Serialization/ModuleCache.cpp
index 658da6e3b7145..6a1fe5e635cd8 100644
--- a/clang/lib/Serialization/ModuleCache.cpp
+++ b/clang/lib/Serialization/ModuleCache.cpp
@@ -101,6 +101,39 @@ void clang::maybePruneImpl(StringRef Path, time_t 
PruneInterval,
   }
 }
 
+std::error_code clang::writeImpl(StringRef Path, llvm::MemoryBufferRef Buffer) 
{
+  StringRef Extension = llvm::sys::path::extension(Path);
+  SmallString<128> ModelPath = StringRef(Path).drop_back(Extension.size());
+  ModelPath += "-%%%%%%%%";
+  ModelPath += Extension;
+  ModelPath += ".tmp";
+
+  std::error_code EC;
+  int FD;
+  SmallString<128> TmpPath;
+  if ((EC = llvm::sys::fs::createUniqueFile(ModelPath, FD, TmpPath))) {
+    if (EC != std::errc::no_such_file_or_directory)
+      return EC;
+
+    StringRef Dir = llvm::sys::path::parent_path(Path);
+    if (std::error_code InnerEC = llvm::sys::fs::create_directories(Dir))
+      return InnerEC;
+
+    if ((EC = llvm::sys::fs::createUniqueFile(ModelPath, FD, TmpPath)))
+      return EC;
+  }
+
+  {
+    llvm::raw_fd_ostream OS(FD, /*shouldClose=*/true);
+    OS << Buffer.getBuffer();
+  }
+
+  if ((EC = llvm::sys::fs::rename(TmpPath, Path)))
+    return EC;
+
+  return {};
+}
+
 namespace {
 class CrossProcessModuleCache : public ModuleCache {
   InMemoryModuleCache InMemory;
@@ -157,6 +190,13 @@ class CrossProcessModuleCache : public ModuleCache {
     maybePruneImpl(Path, PruneInterval, PruneAfter);
   }
 
+  std::error_code write(StringRef Path, llvm::MemoryBufferRef Buffer) override 
{
+    // This is a compiler-internal input/output, let's bypass the sandbox.
+    auto BypassSandbox = llvm::sys::sandbox::scopedDisable();
+
+    return writeImpl(Path, Buffer);
+  }
+
   InMemoryModuleCache &getInMemoryModuleCache() override { return InMemory; }
   const InMemoryModuleCache &getInMemoryModuleCache() const override {
     return InMemory;

>From eaa8b5e9260c7e4303500865935e7604f03751b0 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <[email protected]>
Date: Fri, 27 Mar 2026 09:48:29 -0700
Subject: [PATCH 2/6] Add missing include

---
 clang/include/clang/Serialization/ModuleCache.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/include/clang/Serialization/ModuleCache.h 
b/clang/include/clang/Serialization/ModuleCache.h
index 4fced900bbdcb..9ea4d84380660 100644
--- a/clang/include/clang/Serialization/ModuleCache.h
+++ b/clang/include/clang/Serialization/ModuleCache.h
@@ -12,6 +12,7 @@
 #include "clang/Basic/LLVM.h"
 
 #include <ctime>
+#include <system_error>
 
 namespace llvm {
 class MemoryBufferRef;

>From 0db177cdd9feebe3d74603055c4b79a332aceb6d Mon Sep 17 00:00:00 2001
From: Jan Svoboda <[email protected]>
Date: Mon, 30 Mar 2026 15:15:00 -0700
Subject: [PATCH 3/6] [clang] Add ExternalSubmoduleSource interface to
 ModuleMap

Introduce an abstract ExternalSubmoduleSource class that allows an
external consumer (e.g. ASTReader) to demand-load individual submodule
Module objects on first lookup rather than materialising the entire
submodule tree upfront.

The interface exposes a single hook:

  virtual void loadSubmodule(Module *Parent, StringRef Name) = 0;

which is called by ModuleMap just before the two internal qualified-
lookup chokepoints attempt findSubmodule():

* lookupModuleQualified() -- reached by every dotted import path
  component (e.g. "NSString" when resolving Foundation.NSString) via
  lookupModuleUnqualified(), resolveModuleId(), and
  CompilerInstance::loadModule().

* findOrInferSubmodule() -- the other path that searches for an
  existing child before potentially inferring a new one.

Both sites are already the canonical funnels for external callers; no
other code directly walks Module::findSubmodule() for cross-file
purposes.  The external source is registered via
ModuleMap::setExternalSubmoduleSource().

No functional change in this commit: ExternalSource is null by default
so all code paths are identical to before.  A subsequent commit will
implement the interface in ASTReader and perform the lazy scan-and-
index pass during ReadSubmoduleBlock().

Co-Authored-By: Claude Sonnet 4.6 <[email protected]>
---
 clang/include/clang/Lex/ModuleMap.h | 24 ++++++++++++++++++++++++
 clang/lib/Lex/ModuleMap.cpp         |  6 ++++++
 2 files changed, 30 insertions(+)

diff --git a/clang/include/clang/Lex/ModuleMap.h 
b/clang/include/clang/Lex/ModuleMap.h
index 570a68c37fac4..1d77a1087904b 100644
--- a/clang/include/clang/Lex/ModuleMap.h
+++ b/clang/include/clang/Lex/ModuleMap.h
@@ -44,6 +44,22 @@ class FileManager;
 class HeaderSearch;
 class SourceManager;
 
+/// Abstract interface for demand-loading submodule data from external storage
+/// (e.g., a precompiled module file). Registered with \c ModuleMap via
+/// \c setExternalSubmoduleSource() and called from qualified lookup paths
+/// before falling back to the in-memory module tree, enabling lazy
+/// construction of \c Module objects for submodules that may never be needed.
+class ExternalSubmoduleSource {
+public:
+  virtual ~ExternalSubmoduleSource() = default;
+
+  /// Called when a lookup for \p Name as an immediate child of \p Parent is
+  /// about to be performed but has not yet found a result in the in-memory
+  /// module tree. If external storage knows of such a child, it should create
+  /// the \c Module object and register it with \p Parent before returning.
+  virtual void loadSubmodule(Module *Parent, StringRef Name) = 0;
+};
+
 /// A mechanism to observe the actions of the module map loader as it
 /// reads module map files.
 class ModuleMapCallbacks {
@@ -109,6 +125,9 @@ class ModuleMap {
   /// The number of modules we have created in total.
   unsigned NumCreatedModules = 0;
 
+  /// Optional external source for on-demand submodule loading.
+  ExternalSubmoduleSource *ExternalSource = nullptr;
+
   /// In case a module has a export_as entry, it might have a pending link
   /// name to be determined if that module is imported.
   llvm::StringMap<llvm::StringSet<>> PendingLinkAsModule;
@@ -122,6 +141,11 @@ class ModuleMap {
   /// information is available or add it to a pending list otherwise.
   void addLinkAsDependency(Module *Mod);
 
+  /// Set the external source used for on-demand submodule loading.
+  void setExternalSubmoduleSource(ExternalSubmoduleSource *Source) {
+    ExternalSource = Source;
+  }
+
   /// Flags describing the role of a module header.
   enum ModuleHeaderRole {
     /// This header is normally included in the module.
diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp
index 6c991430cb08b..bd12b796d6eb4 100644
--- a/clang/lib/Lex/ModuleMap.cpp
+++ b/clang/lib/Lex/ModuleMap.cpp
@@ -829,6 +829,9 @@ Module *ModuleMap::findModule(StringRef Name) const {
 }
 
 Module *ModuleMap::findOrInferSubmodule(Module *Parent, StringRef Name) {
+  if (ExternalSource)
+    ExternalSource->loadSubmodule(Parent, Name);
+
   if (Module *SubM = Parent->findSubmodule(Name))
     return SubM;
   if (!Parent->InferSubmodules)
@@ -858,6 +861,9 @@ Module *ModuleMap::lookupModuleQualified(StringRef Name, 
Module *Context) const{
   if (!Context)
     return findModule(Name);
 
+  if (ExternalSource)
+    ExternalSource->loadSubmodule(Context, Name);
+
   return Context->findSubmodule(Name);
 }
 

>From 708a9ddfdcc71c3b0555f0f2ab836cbcd279047a Mon Sep 17 00:00:00 2001
From: Jan Svoboda <[email protected]>
Date: Mon, 30 Mar 2026 15:48:57 -0700
Subject: [PATCH 4/6] [clang] Record per-submodule scan index in ModuleFile
 during PCM load

Add three parallel arrays and a saved bitstream cursor to ModuleFile
that together form a lightweight index over the SUBMODULE_BLOCK for
use by future on-demand submodule loading:

  SubmoduleCursor   - BitstreamCursor saved immediately after reading
                      SUBMODULE_METADATA, positioned for JumpToBit().
  SubmoduleOffsets  - Bit position of each SUBMODULE_DEFINITION record,
                      indexed by 0-based file-local submodule index
                      (FileIdx = GlobalID - NUM_PREDEF - BaseSubmoduleID).
  SubmoduleParentIDs - File-local index of each submodule's parent;
                      uint32_t(-1) means "no parent" (top-level module).
  SubmoduleNames    - StringRef name of each submodule, pointing into the
                      PCM's memory-mapped buffer (no heap allocation).

These are populated in ReadSubmoduleBlock() alongside the existing eager
Module-object creation.  A RecordStartBit variable is captured before
each advanceSkippingSubblocks() call so that SUBMODULE_DEFINITION
handlers know the exact bit offset of their record header.

The local ID stored in Record[0] of a SUBMODULE_DEFINITION record is
not 1-based; it starts at LocalBaseSubmoduleID (recorded in
SUBMODULE_METADATA as Record[1]).  The correct file-local 0-based
index is therefore derived from the global ID:

  FileIdx = (GlobalID - NUM_PREDEF_SUBMODULE_IDS) - F.BaseSubmoduleID

No functional change in this commit: Module objects are still created
eagerly for every submodule.  The index data collected here will be
consumed by ASTReader::loadSubmodule() in the next commit to enable
demand-loading individual submodules without reading the entire block.

Co-Authored-By: Claude Sonnet 4.6 <[email protected]>
---
 .../include/clang/Serialization/ModuleFile.h  | 19 ++++++++++
 clang/lib/Serialization/ASTReader.cpp         | 35 +++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/clang/include/clang/Serialization/ModuleFile.h 
b/clang/include/clang/Serialization/ModuleFile.h
index 303bd65a8aad0..decc1f279b0e4 100644
--- a/clang/include/clang/Serialization/ModuleFile.h
+++ b/clang/include/clang/Serialization/ModuleFile.h
@@ -434,6 +434,25 @@ class ModuleFile {
   /// Remapping table for submodule IDs in this module.
   ContinuousRangeMap<uint32_t, int, 2> SubmoduleRemap;
 
+  /// Cursor into the submodule block, saved immediately after reading
+  /// SUBMODULE_METADATA. Used to seek to individual submodule definition
+  /// records for on-demand loading via JumpToBit(SubmoduleOffsets[i]).
+  llvm::BitstreamCursor SubmoduleCursor;
+
+  /// Bit position of each submodule's SUBMODULE_DEFINITION record within
+  /// the submodule block bitstream, indexed by (local_id - 1).
+  SmallVector<uint64_t, 0> SubmoduleOffsets;
+
+  /// Raw local submodule ID of each submodule's parent within this module
+  /// file, indexed by (local_id - 1). 0 means the submodule is the
+  /// top-level module of this file (no parent).
+  SmallVector<uint32_t, 0> SubmoduleParentIDs;
+
+  /// Name of each submodule as written in the module map, indexed by
+  /// (local_id - 1). These StringRefs point into the PCM's memory-mapped
+  /// buffer and carry no heap allocation cost.
+  SmallVector<StringRef, 0> SubmoduleNames;
+
   // === Selectors ===
 
   /// The number of selectors new to this file.
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 7c3a6fceb3623..9923daa43a18c 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -6259,7 +6259,12 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
   bool First = true;
   Module *CurrentModule = nullptr;
   RecordData Record;
+  // RecordStartBit tracks the bitstream position just before each call to
+  // advanceSkippingSubblocks(), so that SUBMODULE_DEFINITION handlers can
+  // record the seek-to offset for later on-demand re-reading.
+  uint64_t RecordStartBit = 0;
   while (true) {
+    RecordStartBit = F.Stream.GetCurrentBitNo();
     Expected<llvm::BitstreamEntry> MaybeEntry =
         F.Stream.advanceSkippingSubblocks();
     if (!MaybeEntry)
@@ -6311,6 +6316,24 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
       unsigned Idx = 0;
       SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[Idx++]);
       SubmoduleID Parent = getGlobalSubmoduleID(F, Record[Idx++]);
+
+      // Record scan-index data for this definition.  FileIdx is the 0-based
+      // offset of this submodule within F's local range, derived from the
+      // global ID.  ParentFileIdx uses uint32_t(-1) as a sentinel for "no
+      // parent" (top-level module of this file).
+      if (!F.SubmoduleOffsets.empty()) {
+        uint32_t FileIdx =
+            (GlobalID - NUM_PREDEF_SUBMODULE_IDS) - F.BaseSubmoduleID;
+        assert(FileIdx < F.SubmoduleOffsets.size() &&
+               "submodule file index out of range");
+        uint32_t ParentFileIdx =
+            (Parent < NUM_PREDEF_SUBMODULE_IDS)
+                ? uint32_t(-1)
+                : (Parent - NUM_PREDEF_SUBMODULE_IDS) - F.BaseSubmoduleID;
+        F.SubmoduleOffsets[FileIdx] = RecordStartBit;
+        F.SubmoduleParentIDs[FileIdx] = ParentFileIdx;
+        F.SubmoduleNames[FileIdx] = Name;
+      }
       Module::ModuleKind Kind = (Module::ModuleKind)Record[Idx++];
       SourceLocation DefinitionLoc = ReadSourceLocation(F, Record[Idx++]);
       FileID InferredAllowedBy = ReadFileID(F, Record, Idx);
@@ -6484,7 +6507,19 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
                          F.BaseSubmoduleID - LocalBaseSubmoduleID));
 
         SubmodulesLoaded.resize(SubmodulesLoaded.size() + 
F.LocalNumSubmodules);
+
+        // Pre-allocate per-submodule scan data. These are populated as
+        // SUBMODULE_DEFINITION records are encountered below, and used by
+        // the on-demand loader to seek to individual submodule records.
+        F.SubmoduleOffsets.resize(F.LocalNumSubmodules);
+        F.SubmoduleParentIDs.resize(F.LocalNumSubmodules);
+        F.SubmoduleNames.resize(F.LocalNumSubmodules);
       }
+
+      // Save a copy of the cursor positioned just after SUBMODULE_METADATA.
+      // On-demand loading uses this to JumpToBit(SubmoduleOffsets[i]) and
+      // re-read any individual submodule's definition and attribute records.
+      F.SubmoduleCursor = F.Stream;
       break;
     }
 

>From b3167a7a252462f4165b5bc42321b98b90a7e6f9 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <[email protected]>
Date: Mon, 30 Mar 2026 16:30:39 -0700
Subject: [PATCH 5/6] [clang][modules] Add on-demand submodule loader to
 ASTReader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement `ASTReader` as an `ExternalSubmoduleSource` so that future
demand-loading of individual submodules is possible without changing the
existing eager `ReadSubmoduleBlock()` path.

What this commit does
---------------------
* Inherits `ASTReader` from the new `ExternalSubmoduleSource` interface.
* Adds `SubmoduleGlobalIDs` (reverse map Module* → global SubmoduleID)
  populated during `ReadSubmoduleBlock()`.
* Adds `loadSubmodule(Module *Parent, StringRef Name)` which looks up the
  parent's owning `ModuleFile`, scans the per-file index arrays
  (`SubmoduleParentIDs` / `SubmoduleNames`) recorded in the previous
  commit, and calls `loadSingleSubmodule()` when the child is absent.
* Adds `loadSingleSubmodule(ModuleFile &F, uint32_t FileIdx)` which seeks
  a saved `BitstreamCursor` to the DEFINITION record, re-reads it and all
  following attribute records (imports, exports, headers, …) for exactly
  one submodule, and immediately resolves any new `UnresolvedModuleRefs`.
* Registers `this` as the `ExternalSubmoduleSource` at the end of each
  `ReadSubmoduleBlock()` call so that `ModuleMap::lookupModuleQualified()`
  and `findOrInferSubmodule()` can trigger demand-loading.

Re-entrancy guard
-----------------
While `ReadSubmoduleBlock()` is actively building the scan index and
creating `Module` objects, `findOrCreateModuleFirst()` calls back into
`ModuleMap::lookupModuleQualified()`, which would invoke `loadSubmodule()`
re-entrantly.  Without protection this causes `loadSingleSubmodule()` to
read and inline-resolve a submodule's attribute records (imports, exports,
…) before `ReadSubmoduleBlock()` also reads them, resulting in duplicate
exports and dropped imports.

The fix is a `ReadingSubmoduleBlock` boolean guard: `loadSubmodule()` is a
no-op whenever `ReadSubmoduleBlock()` is on the call stack.  This preserves
the existing eager-loading semantics for the current commit while keeping
the infrastructure in place for a future commit that will make non-top-level
submodule creation truly lazy.

No functional change in this commit: all submodules are still eagerly
created during PCM loading, exactly as before.

Co-Authored-By: Claude Sonnet 4.6 <[email protected]>
---
 clang/include/clang/Serialization/ASTReader.h |  23 +-
 clang/lib/Serialization/ASTReader.cpp         | 372 ++++++++++++++++++
 2 files changed, 394 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index d6f75e5973c45..dac5eb0d41bf4 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -423,7 +423,8 @@ class ASTReader
     public ExternalHeaderFileInfoSource,
     public ExternalSemaSource,
     public IdentifierInfoLookup,
-    public ExternalSLocEntrySource
+    public ExternalSLocEntrySource,
+    public ExternalSubmoduleSource
 {
 public:
   /// Types of AST files.
@@ -803,6 +804,16 @@ class ASTReader
   /// indicate that the particular submodule ID has not yet been loaded.
   SmallVector<Module *, 2> SubmodulesLoaded;
 
+  /// Reverse map from Module objects to their global SubmoduleID, populated
+  /// as modules are loaded.  Used by loadSubmodule() to locate the owning
+  /// ModuleFile and file-local index when demand-loading a named child.
+  llvm::DenseMap<const Module *, serialization::SubmoduleID> 
SubmoduleGlobalIDs;
+
+  /// True while ReadSubmoduleBlock() is actively processing records.  Guards
+  /// loadSubmodule() from firing re-entrantly and double-processing attribute
+  /// records (imports, exports, …) that ReadSubmoduleBlock() will also read.
+  bool ReadingSubmoduleBlock = false;
+
   using GlobalSubmoduleMapType =
       ContinuousRangeMap<serialization::SubmoduleID, ModuleFile *, 4>;
 
@@ -1607,6 +1618,12 @@ class ASTReader
                                        unsigned ClientLoadCapabilities);
   llvm::Error ReadSubmoduleBlock(ModuleFile &F,
                                  unsigned ClientLoadCapabilities);
+
+  /// Demand-load the submodule at the given file-local index from \p F,
+  /// reading its SUBMODULE_DEFINITION and all following attribute records.
+  /// The parent module must already be present in SubmodulesLoaded.
+  /// Immediately resolves any UnresolvedModuleRefs added for the new module.
+  void loadSingleSubmodule(ModuleFile &F, uint32_t FileIdx);
   static bool ParseLanguageOptions(const RecordData &Record,
                                    StringRef ModuleFilename, bool Complain,
                                    ASTReaderListener &Listener,
@@ -1893,6 +1910,10 @@ class ASTReader
   /// Make the entities in the given module and any of its (non-explicit)
   /// submodules visible to name lookup.
   ///
+  /// ExternalSubmoduleSource implementation: demand-load a named child of
+  /// \p Parent by consulting the scan index built during ReadSubmoduleBlock().
+  void loadSubmodule(Module *Parent, StringRef Name) override;
+
   /// \param Mod The module whose names should be made visible.
   ///
   /// \param NameVisibility The level of visibility to give the names in the
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 9923daa43a18c..27f30b45c33bb 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -6247,6 +6247,13 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
   if (llvm::Error Err = F.Stream.EnterSubBlock(SUBMODULE_BLOCK_ID))
     return Err;
 
+  // Suppress demand-loading via loadSubmodule() while we are actively building
+  // the scan index and creating Module objects.  Without this guard,
+  // findOrCreateModuleFirst() → lookupModuleQualified() → loadSubmodule() 
would
+  // fire re-entrantly and double-process attribute records (imports, exports,
+  // …) that ReadSubmoduleBlock() is about to read anyway.
+  llvm::SaveAndRestore<bool> GuardReading(ReadingSubmoduleBlock, true);
+
   ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap();
   bool KnowsTopLevelModule = ModMap.findModule(F.ModuleName) != nullptr;
   // If we don't know the top-level module, there's no point in doing qualified
@@ -6277,6 +6284,10 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
       return llvm::createStringError(std::errc::illegal_byte_sequence,
                                      "malformed block record in AST file");
     case llvm::BitstreamEntry::EndBlock:
+      // Register this reader as the external submodule source so that future
+      // qualified lookups through ModuleMap can demand-load individual
+      // submodules from this file's scan index.
+      ModMap.setExternalSubmoduleSource(this);
       return llvm::Error::success();
     case llvm::BitstreamEntry::Record:
       // The interesting case.
@@ -6423,6 +6434,7 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
         DeserializationListener->ModuleRead(GlobalID, CurrentModule);
 
       SubmodulesLoaded[GlobalIndex] = CurrentModule;
+      SubmoduleGlobalIDs[CurrentModule] = GlobalID;
 
       // Clear out data that will be replaced by what is in the module file.
       CurrentModule->LinkLibraries.clear();
@@ -6611,6 +6623,366 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
   }
 }
 
+
+void ASTReader::loadSubmodule(Module *Parent, StringRef Name) {
+  // Don't fire while ReadSubmoduleBlock() is building the scan index — the
+  // eager loader already creates every module we need, and we would otherwise
+  // double-process attribute records (imports, exports, …).
+  if (ReadingSubmoduleBlock)
+    return;
+
+  // Look up the global ID of the parent module.
+  auto It = SubmoduleGlobalIDs.find(Parent);
+  if (It == SubmoduleGlobalIDs.end())
+    return; // Parent not from this reader; nothing to demand-load.
+
+  serialization::SubmoduleID ParentGlobalID = It->second;
+  ModuleFile *F = GlobalSubmoduleMap.find(ParentGlobalID)->second;
+
+  // Convert the parent's global ID to its file-local 0-based index.
+  uint32_t ParentFileIdx =
+      (ParentGlobalID - NUM_PREDEF_SUBMODULE_IDS) - F->BaseSubmoduleID;
+
+  // Scan the index for a child of ParentFileIdx whose name matches.
+  for (uint32_t ChildFileIdx = 0; ChildFileIdx < F->LocalNumSubmodules;
+       ++ChildFileIdx) {
+    if (F->SubmoduleParentIDs[ChildFileIdx] != ParentFileIdx ||
+        F->SubmoduleNames[ChildFileIdx] != Name)
+      continue;
+
+    // Compute the global index in SubmodulesLoaded.
+    serialization::SubmoduleID ChildGlobalID =
+        F->BaseSubmoduleID + ChildFileIdx + NUM_PREDEF_SUBMODULE_IDS;
+    uint32_t ChildGlobalIndex = ChildGlobalID - NUM_PREDEF_SUBMODULE_IDS;
+
+    // Only load if not already present.
+    if (!SubmodulesLoaded[ChildGlobalIndex])
+      loadSingleSubmodule(*F, ChildFileIdx);
+    return;
+  }
+}
+
+void ASTReader::loadSingleSubmodule(ModuleFile &F, uint32_t FileIdx) {
+  assert(FileIdx < F.LocalNumSubmodules && "FileIdx out of range");
+  assert(F.SubmoduleOffsets[FileIdx] != 0 || FileIdx == 0);
+
+  ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap();
+  SmallString<0> PathBuf;
+  PathBuf.reserve(256);
+
+  // Ensure the parent is loaded before we load the child.
+  uint32_t ParentFileIdx = F.SubmoduleParentIDs[FileIdx];
+  Module *ParentModule = nullptr;
+  if (ParentFileIdx != uint32_t(-1)) {
+    serialization::SubmoduleID ParentGlobalID =
+        F.BaseSubmoduleID + ParentFileIdx + NUM_PREDEF_SUBMODULE_IDS;
+    uint32_t ParentGlobalIndex = ParentGlobalID - NUM_PREDEF_SUBMODULE_IDS;
+    if (!SubmodulesLoaded[ParentGlobalIndex])
+      loadSingleSubmodule(F, ParentFileIdx);
+    ParentModule = SubmodulesLoaded[ParentGlobalIndex];
+  }
+
+  // Seek the saved cursor to the SUBMODULE_DEFINITION record for FileIdx.
+  llvm::BitstreamCursor Cursor = F.SubmoduleCursor;
+  if (llvm::Error Err = Cursor.JumpToBit(F.SubmoduleOffsets[FileIdx])) {
+    Error(std::move(Err));
+    return;
+  }
+
+  // Track how many UnresolvedModuleRefs were pending before this load so we
+  // can inline-resolve the newly added entries at the end.
+  size_t OldUnresolvedSize = UnresolvedModuleRefs.size();
+
+  RecordData Record;
+  Module *CurrentModule = nullptr;
+
+  while (true) {
+    Expected<llvm::BitstreamEntry> MaybeEntry =
+        Cursor.advanceSkippingSubblocks();
+    if (!MaybeEntry) {
+      Error(MaybeEntry.takeError());
+      return;
+    }
+    llvm::BitstreamEntry Entry = MaybeEntry.get();
+
+    if (Entry.Kind == llvm::BitstreamEntry::EndBlock)
+      break;
+    if (Entry.Kind != llvm::BitstreamEntry::Record) {
+      Error("malformed submodule block record in AST file");
+      return;
+    }
+
+    StringRef Blob;
+    Record.clear();
+    Expected<unsigned> MaybeKind = Cursor.readRecord(Entry.ID, Record, &Blob);
+    if (!MaybeKind) {
+      Error(MaybeKind.takeError());
+      return;
+    }
+    unsigned Kind = MaybeKind.get();
+
+    // If we've finished reading this submodule's definition and attributes and
+    // hit the next submodule's definition record, we're done.
+    if (Kind == SUBMODULE_DEFINITION && CurrentModule)
+      break;
+
+    switch (Kind) {
+    default:
+      break;
+
+    case SUBMODULE_DEFINITION: {
+      if (Record.size() < 13) {
+        Error("malformed module definition");
+        return;
+      }
+      unsigned Idx = 0;
+      serialization::SubmoduleID GlobalID =
+          getGlobalSubmoduleID(F, Record[Idx++]);
+      serialization::SubmoduleID Parent =
+          getGlobalSubmoduleID(F, Record[Idx++]);
+      Module::ModuleKind MKind = (Module::ModuleKind)Record[Idx++];
+      SourceLocation DefinitionLoc = ReadSourceLocation(F, Record[Idx++]);
+      FileID InferredAllowedBy = ReadFileID(F, Record, Idx);
+      bool IsFramework = Record[Idx++];
+      bool IsExplicit = Record[Idx++];
+      bool IsSystem = Record[Idx++];
+      bool IsExternC = Record[Idx++];
+      bool InferSubmodules = Record[Idx++];
+      bool InferExplicitSubmodules = Record[Idx++];
+      bool InferExportWildcard = Record[Idx++];
+      bool ConfigMacrosExhaustive = Record[Idx++];
+      bool ModuleMapIsPrivate = Record[Idx++];
+      bool NamedModuleHasInit = Record[Idx++];
+
+      // Bypass lookupModuleQualified here to avoid re-entrant calls back into
+      // loadSubmodule().  Check directly in the parent's SubModules list, then
+      // fall back to createModule() if not found.
+      Module *ExistingMod = ParentModule ? ParentModule->findSubmodule(Blob)
+                                         : ModMap.findModule(Blob);
+      CurrentModule = ExistingMod
+                          ? ExistingMod
+                          : ModMap.createModule(Blob, ParentModule, 
IsFramework,
+                                                IsExplicit);
+
+      serialization::SubmoduleID GlobalIndex =
+          GlobalID - NUM_PREDEF_SUBMODULE_IDS;
+      if (GlobalIndex >= SubmodulesLoaded.size() ||
+          SubmodulesLoaded[GlobalIndex]) {
+        // Already loaded (race or error); bail out.
+        CurrentModule = nullptr;
+        return;
+      }
+
+      CurrentModule->Kind = MKind;
+      CurrentModule->DefinitionLoc = DefinitionLoc;
+      CurrentModule->Signature = F.Signature;
+      CurrentModule->IsFromModuleFile = true;
+      if (InferredAllowedBy.isValid())
+        ModMap.setInferredModuleAllowedBy(CurrentModule, InferredAllowedBy);
+      CurrentModule->IsSystem = IsSystem || CurrentModule->IsSystem;
+      CurrentModule->IsExternC = IsExternC;
+      CurrentModule->InferSubmodules = InferSubmodules;
+      CurrentModule->InferExplicitSubmodules = InferExplicitSubmodules;
+      CurrentModule->InferExportWildcard = InferExportWildcard;
+      CurrentModule->ConfigMacrosExhaustive = ConfigMacrosExhaustive;
+      CurrentModule->ModuleMapIsPrivate = ModuleMapIsPrivate;
+      CurrentModule->NamedModuleHasInit = NamedModuleHasInit;
+
+      if (ParentModule && ParentModule->Directory)
+        CurrentModule->Directory = ParentModule->Directory;
+
+      if (DeserializationListener)
+        DeserializationListener->ModuleRead(GlobalID, CurrentModule);
+
+      SubmodulesLoaded[GlobalIndex] = CurrentModule;
+      SubmoduleGlobalIDs[CurrentModule] = GlobalID;
+
+      CurrentModule->LinkLibraries.clear();
+      CurrentModule->ConfigMacros.clear();
+      CurrentModule->UnresolvedConflicts.clear();
+      CurrentModule->Conflicts.clear();
+      CurrentModule->Requirements.clear();
+      CurrentModule->MissingHeaders.clear();
+      CurrentModule->IsUnimportable =
+          ParentModule && ParentModule->IsUnimportable;
+      CurrentModule->IsAvailable = !CurrentModule->IsUnimportable;
+      break;
+    }
+
+    case SUBMODULE_UMBRELLA_HEADER: {
+      if (!CurrentModule)
+        break;
+      SmallString<128> RelativePathName;
+      if (auto Umbrella = ModMap.findUmbrellaHeaderForModule(
+              CurrentModule, Blob.str(), RelativePathName))
+        if (!CurrentModule->getUmbrellaHeaderAsWritten())
+          ModMap.setUmbrellaHeaderAsWritten(CurrentModule, *Umbrella, Blob,
+                                            RelativePathName);
+      break;
+    }
+
+    case SUBMODULE_HEADER:
+    case SUBMODULE_EXCLUDED_HEADER:
+    case SUBMODULE_PRIVATE_HEADER:
+    case SUBMODULE_TEXTUAL_HEADER:
+    case SUBMODULE_PRIVATE_TEXTUAL_HEADER:
+      break;
+
+    case SUBMODULE_TOPHEADER: {
+      if (!CurrentModule)
+        break;
+      auto HeaderName = ResolveImportedPath(PathBuf, Blob, F);
+      CurrentModule->addTopHeaderFilename(*HeaderName);
+      break;
+    }
+
+    case SUBMODULE_UMBRELLA_DIR: {
+      if (!CurrentModule)
+        break;
+      auto Dirname = ResolveImportedPath(PathBuf, Blob, F);
+      if (auto Umbrella =
+              PP.getFileManager().getOptionalDirectoryRef(*Dirname))
+        if (!CurrentModule->getUmbrellaDirAsWritten())
+          ModMap.setUmbrellaDirAsWritten(CurrentModule, *Umbrella, Blob, "");
+      break;
+    }
+
+    case SUBMODULE_IMPORTS:
+      if (!CurrentModule)
+        break;
+      for (unsigned I = 0; I != Record.size(); ++I) {
+        UnresolvedModuleRef Unresolved;
+        Unresolved.File = &F;
+        Unresolved.Mod = CurrentModule;
+        Unresolved.ID = Record[I];
+        Unresolved.Kind = UnresolvedModuleRef::Import;
+        Unresolved.IsWildcard = false;
+        UnresolvedModuleRefs.push_back(Unresolved);
+      }
+      break;
+
+    case SUBMODULE_AFFECTING_MODULES:
+      if (!CurrentModule)
+        break;
+      for (unsigned I = 0; I != Record.size(); ++I) {
+        UnresolvedModuleRef Unresolved;
+        Unresolved.File = &F;
+        Unresolved.Mod = CurrentModule;
+        Unresolved.ID = Record[I];
+        Unresolved.Kind = UnresolvedModuleRef::Affecting;
+        Unresolved.IsWildcard = false;
+        UnresolvedModuleRefs.push_back(Unresolved);
+      }
+      break;
+
+    case SUBMODULE_EXPORTS:
+      if (!CurrentModule)
+        break;
+      for (unsigned I = 0; I + 1 < Record.size(); I += 2) {
+        UnresolvedModuleRef Unresolved;
+        Unresolved.File = &F;
+        Unresolved.Mod = CurrentModule;
+        Unresolved.ID = Record[I];
+        Unresolved.Kind = UnresolvedModuleRef::Export;
+        Unresolved.IsWildcard = Record[I + 1];
+        UnresolvedModuleRefs.push_back(Unresolved);
+      }
+      if (CurrentModule)
+        CurrentModule->UnresolvedExports.clear();
+      break;
+
+    case SUBMODULE_REQUIRES:
+      if (!CurrentModule)
+        break;
+      CurrentModule->addRequirement(Blob, Record[0], PP.getLangOpts(),
+                                    PP.getTargetInfo());
+      break;
+
+    case SUBMODULE_LINK_LIBRARY:
+      if (!CurrentModule)
+        break;
+      ModMap.resolveLinkAsDependencies(CurrentModule);
+      CurrentModule->LinkLibraries.push_back(
+          Module::LinkLibrary(std::string(Blob), Record[0]));
+      break;
+
+    case SUBMODULE_CONFIG_MACRO:
+      if (!CurrentModule)
+        break;
+      CurrentModule->ConfigMacros.push_back(Blob.str());
+      break;
+
+    case SUBMODULE_CONFLICT: {
+      if (!CurrentModule)
+        break;
+      UnresolvedModuleRef Unresolved;
+      Unresolved.File = &F;
+      Unresolved.Mod = CurrentModule;
+      Unresolved.ID = Record[0];
+      Unresolved.Kind = UnresolvedModuleRef::Conflict;
+      Unresolved.IsWildcard = false;
+      Unresolved.String = Blob;
+      UnresolvedModuleRefs.push_back(Unresolved);
+      break;
+    }
+
+    case SUBMODULE_INITIALIZERS:
+      if (!CurrentModule || !ContextObj)
+        break;
+      if (!F.StandardCXXModule || F.Kind == MK_MainFile) {
+        SmallVector<GlobalDeclID, 16> Inits;
+        for (unsigned I = 0; I < Record.size(); /*in loop*/)
+          Inits.push_back(ReadDeclID(F, Record, I));
+        ContextObj->addLazyModuleInitializers(CurrentModule, Inits);
+      }
+      break;
+
+    case SUBMODULE_EXPORT_AS:
+      if (!CurrentModule)
+        break;
+      CurrentModule->ExportAsModule = Blob.str();
+      ModMap.addLinkAsDependency(CurrentModule);
+      break;
+    }
+  }
+
+  // Inline-resolve any UnresolvedModuleRefs that were added during this load.
+  // This handles the case where loadSingleSubmodule is called outside an
+  // active deserialization chain (NumCurrentElementsDeserializing == 0),
+  // where FinishedDeserializing() would not otherwise run.
+  for (size_t I = OldUnresolvedSize; I < UnresolvedModuleRefs.size(); ++I) {
+    UnresolvedModuleRef &Unresolved = UnresolvedModuleRefs[I];
+    serialization::SubmoduleID GlobalID =
+        getGlobalSubmoduleID(*Unresolved.File, Unresolved.ID);
+    Module *ResolvedMod = getSubmodule(GlobalID);
+
+    switch (Unresolved.Kind) {
+    case UnresolvedModuleRef::Conflict:
+      if (ResolvedMod) {
+        Module::Conflict Conflict;
+        Conflict.Other = ResolvedMod;
+        Conflict.Message = Unresolved.String.str();
+        Unresolved.Mod->Conflicts.push_back(Conflict);
+      }
+      break;
+    case UnresolvedModuleRef::Import:
+      if (ResolvedMod)
+        Unresolved.Mod->Imports.insert(ResolvedMod);
+      break;
+    case UnresolvedModuleRef::Affecting:
+      if (ResolvedMod)
+        Unresolved.Mod->AffectingClangModules.insert(ResolvedMod);
+      break;
+    case UnresolvedModuleRef::Export:
+      if (ResolvedMod || Unresolved.IsWildcard)
+        Unresolved.Mod->Exports.push_back(
+            Module::ExportDecl(ResolvedMod, Unresolved.IsWildcard));
+      break;
+    }
+  }
+  UnresolvedModuleRefs.resize(OldUnresolvedSize);
+}
+
 /// Parse the record that corresponds to a LangOptions data
 /// structure.
 ///

>From b75b28fd98445d976057673fbfffc390f504d949 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <[email protected]>
Date: Tue, 31 Mar 2026 08:20:23 -0700
Subject: [PATCH 6/6] WIP

---
 clang/include/clang/Serialization/ASTReader.h |  4 ++
 clang/lib/Serialization/ASTReader.cpp         | 53 ++++++++++++++++---
 2 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index dac5eb0d41bf4..78f15dfcbda3a 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -2457,6 +2457,10 @@ class ASTReader
   ///
   Module *getSubmodule(serialization::SubmoduleID GlobalID);
 
+  /// Like getSubmodule(), but demand-loads the module if it has not been
+  /// materialised yet (i.e., it was deferred during lazy ReadSubmoduleBlock).
+  Module *getOrLoadSubmodule(serialization::SubmoduleID GlobalID);
+
   /// Retrieve the module that corresponds to the given module ID.
   ///
   /// Note: overrides method in ExternalASTSource
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 27f30b45c33bb..55912b6bbb46b 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -2406,7 +2406,7 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const 
unsigned char *d,
     // This header is part of a module. Associate it with the module to enable
     // implicit module import.
     SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID);
-    Module *Mod = Reader.getSubmodule(GlobalSMID);
+    Module *Mod = Reader.getOrLoadSubmodule(GlobalSMID);
     ModuleMap &ModMap =
         Reader.getPreprocessor().getHeaderSearchInfo().getModuleMap();
 
@@ -2670,14 +2670,14 @@ void ASTReader::resolvePendingMacro(IdentifierInfo *II,
     for (auto &MMR : ModuleMacros) {
       Overrides.clear();
       for (unsigned ModID : MMR.Overrides) {
-        Module *Mod = getSubmodule(ModID);
+        Module *Mod = getOrLoadSubmodule(ModID);
         auto *Macro = PP.getModuleMacro(Mod, II);
         assert(Macro && "missing definition for overridden macro");
         Overrides.push_back(Macro);
       }
 
       bool Inserted = false;
-      Module *Owner = getSubmodule(MMR.SubModID);
+      Module *Owner = getOrLoadSubmodule(MMR.SubModID);
       PP.addModuleMacro(Owner, II, MMR.MI, Overrides, Inserted);
     }
   }
@@ -5075,7 +5075,7 @@ ASTReader::ASTReadResult 
ASTReader::ReadAST(ModuleFileName FileName,
   for (unsigned I = 0, N = UnresolvedModuleRefs.size(); I != N; ++I) {
     UnresolvedModuleRef &Unresolved = UnresolvedModuleRefs[I];
     SubmoduleID GlobalID = 
getGlobalSubmoduleID(*Unresolved.File,Unresolved.ID);
-    Module *ResolvedMod = getSubmodule(GlobalID);
+    Module *ResolvedMod = getOrLoadSubmodule(GlobalID);
 
     switch (Unresolved.Kind) {
     case UnresolvedModuleRef::Conflict:
@@ -5707,7 +5707,7 @@ void ASTReader::InitializeContext() {
   // Re-export any modules that were imported by a non-module AST file.
   // FIXME: This does not make macro-only imports visible again.
   for (auto &Import : PendingImportedModules) {
-    if (Module *Imported = getSubmodule(Import.ID)) {
+    if (Module *Imported = getOrLoadSubmodule(Import.ID)) {
       makeModuleVisible(Imported, Module::AllVisible,
                         /*ImportLoc=*/Import.ImportLoc);
       if (Import.ImportLoc.isValid())
@@ -6264,6 +6264,10 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
                           : &ModuleMap::findOrCreateModuleFirst;
 
   bool First = true;
+  // SeenFirstDef: true after the root module of this PCM file has been eagerly
+  // loaded.  All subsequent SUBMODULE_DEFINITION records (descendants of the
+  // root) are demand-loaded lazily via loadSingleSubmodule().
+  bool SeenFirstDef = false;
   Module *CurrentModule = nullptr;
   RecordData Record;
   // RecordStartBit tracks the bitstream position just before each call to
@@ -6345,6 +6349,16 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
         F.SubmoduleParentIDs[FileIdx] = ParentFileIdx;
         F.SubmoduleNames[FileIdx] = Name;
       }
+
+      // After the root module of this PCM file has been eagerly loaded, skip
+      // creating Module objects for all descendant submodules.  Their 
scan-index
+      // entries (above) are enough for loadSingleSubmodule() to materialise
+      // them on demand when they are first referenced (e.g. via a header-file
+      // lookup or an explicit import).
+      if (SeenFirstDef) {
+        CurrentModule = nullptr;
+        continue;
+      }
       Module::ModuleKind Kind = (Module::ModuleKind)Record[Idx++];
       SourceLocation DefinitionLoc = ReadSourceLocation(F, Record[Idx++]);
       FileID InferredAllowedBy = ReadFileID(F, Record, Idx);
@@ -6452,6 +6466,9 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
       CurrentModule->IsUnimportable =
           ParentModule && ParentModule->IsUnimportable;
       CurrentModule->IsAvailable = !CurrentModule->IsUnimportable;
+      // Mark that the root module of this PCM file has been loaded; subsequent
+      // SUBMODULE_DEFINITION records will be deferred to demand-loading.
+      SeenFirstDef = true;
       break;
     }
 
@@ -6954,7 +6971,7 @@ void ASTReader::loadSingleSubmodule(ModuleFile &F, 
uint32_t FileIdx) {
     UnresolvedModuleRef &Unresolved = UnresolvedModuleRefs[I];
     serialization::SubmoduleID GlobalID =
         getGlobalSubmoduleID(*Unresolved.File, Unresolved.ID);
-    Module *ResolvedMod = getSubmodule(GlobalID);
+    Module *ResolvedMod = getOrLoadSubmodule(GlobalID);
 
     switch (Unresolved.Kind) {
     case UnresolvedModuleRef::Conflict:
@@ -9688,7 +9705,7 @@ void ASTReader::UpdateSema() {
   for (auto &Import : PendingImportedModulesSema) {
     if (Import.ImportLoc.isInvalid())
       continue;
-    if (Module *Imported = getSubmodule(Import.ID)) {
+    if (Module *Imported = getOrLoadSubmodule(Import.ID)) {
       SemaObj->makeModuleVisible(Imported, Import.ImportLoc);
     }
   }
@@ -10397,6 +10414,28 @@ Module *ASTReader::getSubmodule(SubmoduleID GlobalID) {
   return SubmodulesLoaded[GlobalID - NUM_PREDEF_SUBMODULE_IDS];
 }
 
+Module *ASTReader::getOrLoadSubmodule(SubmoduleID GlobalID) {
+  if (GlobalID < NUM_PREDEF_SUBMODULE_IDS)
+    return nullptr;
+  if (GlobalID > SubmodulesLoaded.size()) {
+    Error("submodule ID out of range in AST file");
+    return nullptr;
+  }
+  uint32_t GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS;
+  if (!SubmodulesLoaded[GlobalIndex] && !ReadingSubmoduleBlock) {
+    auto It = GlobalSubmoduleMap.find(GlobalID);
+    if (It != GlobalSubmoduleMap.end()) {
+      ModuleFile *F = It->second;
+      uint32_t FileIdx =
+          (GlobalID - NUM_PREDEF_SUBMODULE_IDS) - F->BaseSubmoduleID;
+      if (FileIdx < F->LocalNumSubmodules && !F->SubmoduleOffsets.empty() &&
+          F->SubmoduleOffsets[FileIdx] != 0)
+        loadSingleSubmodule(*F, FileIdx);
+    }
+  }
+  return SubmodulesLoaded[GlobalIndex];
+}
+
 Module *ASTReader::getModule(unsigned ID) {
   return getSubmodule(ID);
 }

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to