Author: Jan Svoboda
Date: 2026-05-08T08:43:58-07:00
New Revision: f68d44dce63f1a94e60410e8817e0eed70fc0578

URL: 
https://github.com/llvm/llvm-project/commit/f68d44dce63f1a94e60410e8817e0eed70fc0578
DIFF: 
https://github.com/llvm/llvm-project/commit/f68d44dce63f1a94e60410e8817e0eed70fc0578.diff

LOG: [clang][modules] Deserialize submodules lazily (#194968)

This PR implements on-demand deserialization of `Module` objects from
PCM files. This is motivated by dependency scanning, where eager
deserialization of submodules turns out to be very expensive and
typically unnecessary. The core of this patch is the introduction of
`ModuleRef`, which may either be a pointer to `Module`, a pointer to new
`ExternalSubmoduleSource` (implemented by `ASTReader`) and
`serialization::SubmoduleID`, both, or none (null). Dereferencing
`ModuleRef` ensures the `Module` is deserialized if possible.

This replaces `ASTReader::UnresolvedModuleRefs` and changes the
structure of a PCM file a bit, most importantly introducing new
`SUBMODULE_CHILD` record that enables hooking up the laziness into the
qualified by-name lookup that uses `Module::SubModuleIndex`.

This speeds up dependency scanning by ~5.5%.

Added: 
    

Modified: 
    clang/include/clang/Basic/Module.h
    clang/include/clang/Lex/ModuleMap.h
    clang/include/clang/Lex/Preprocessor.h
    clang/include/clang/Serialization/ASTBitCodes.h
    clang/include/clang/Serialization/ASTReader.h
    clang/include/clang/Serialization/ModuleFile.h
    clang/lib/Basic/Module.cpp
    clang/lib/Lex/ModuleMap.cpp
    clang/lib/Lex/Preprocessor.cpp
    clang/lib/Sema/SemaLookup.cpp
    clang/lib/Sema/SemaModule.cpp
    clang/lib/Serialization/ASTReader.cpp
    clang/lib/Serialization/ASTWriter.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/Module.h 
b/clang/include/clang/Basic/Module.h
index f83319db082d7..3fd6bfa063af4 100644
--- a/clang/include/clang/Basic/Module.h
+++ b/clang/include/clang/Basic/Module.h
@@ -48,9 +48,17 @@ namespace clang {
 
 class FileManager;
 class LangOptions;
+class Module;
 class ModuleMap;
 class TargetInfo;
 
+/// Interface for on-demand deserialization of submodules stored in a PCM file.
+class ExternalSubmoduleSource {
+public:
+  virtual Module *getSubmodule(uint32_t GlobalID) = 0;
+  virtual ~ExternalSubmoduleSource() = default;
+};
+
 /// Describes the name of a module.
 using ModuleId = SmallVector<std::pair<std::string, SourceLocation>, 2>;
 
@@ -222,6 +230,62 @@ struct ModuleAttributes {
         NoUndeclaredIncludes(false) {}
 };
 
+/// Reference to a module that consists of either an existing/materialized
+/// Module object, reference to a serialized submodule record, both, or
+/// neither (null).
+class ModuleRef {
+  /// The existing/materialized Module object.
+  mutable Module *Existing = nullptr;
+
+  /// The external submodule source (i.e. \c ASTReader), and a boolean
+  /// signifying whether it's already been used to deserialize \c SubmoduleID.
+  mutable llvm::PointerIntPair<ExternalSubmoduleSource *, 1, bool>
+      ExternalSource = {nullptr, false};
+
+  /// Identifier of the external submodule in \c ExternalSource.
+  mutable uint64_t SubmoduleID = 0;
+
+public:
+  /// Create an empty reference.
+  ModuleRef() = default;
+
+  /// Create reference to a materialized module.
+  ModuleRef(Module *M) : Existing(M) {}
+
+  /// Create reference to a serialized submodule record.
+  ModuleRef(ExternalSubmoduleSource *ExtSrc, uint64_t SubmoduleID)
+      : ExternalSource(ExtSrc, false), SubmoduleID(SubmoduleID) {}
+
+  /// Get the existing/materialized module, if there's any.
+  Module *getExisting() const { return Existing; }
+  /// Add the existing/materialized module.
+  void setExisting(Module *E) { Existing = E; }
+
+  /// Add the serialized submodule record reference.
+  void setExternal(ExternalSubmoduleSource *ExtSrc, uint64_t ID) {
+    ExternalSource = {ExtSrc, false};
+    SubmoduleID = ID;
+  }
+
+  /// Check whether this is a non-empty reference.
+  operator bool() const {
+    return Existing || (ExternalSource.getPointer() && SubmoduleID);
+  }
+
+  /// Get the existing/materialized module. Try materializing it on-demand from
+  /// the serialized submodule record if possible.
+  operator Module *() const {
+    if (!ExternalSource.getInt() && ExternalSource.getPointer() &&
+        SubmoduleID) {
+      Existing = ExternalSource.getPointer()->getSubmodule(SubmoduleID);
+      ExternalSource.setInt(true);
+    }
+    return Existing;
+  }
+
+  Module *operator->() const { return *this; }
+};
+
 /// Required to construct a Module.
 ///
 /// This tag type is only constructible by ModuleMap, guaranteeing it ownership
@@ -348,7 +412,7 @@ class alignas(8) Module {
 
 private:
   /// The submodules of this module, indexed by name.
-  std::vector<Module *> SubModules;
+  std::vector<ModuleRef> SubModules;
 
   /// A mapping from the submodule name to the index into the
   /// \c SubModules vector at which that submodule resides.
@@ -552,17 +616,17 @@ class alignas(8) Module {
 
   /// The set of modules imported by this module, and on which this
   /// module depends.
-  llvm::SmallSetVector<Module *, 2> Imports;
+  llvm::SmallVector<ModuleRef, 2> Imports;
 
   /// The set of top-level modules that affected the compilation of this 
module,
   /// but were not imported.
-  llvm::SmallSetVector<Module *, 2> AffectingClangModules;
+  llvm::SmallVector<ModuleRef, 2> AffectingClangModules;
 
   /// Describes an exported module.
   ///
   /// The pointer is the module being re-exported, while the bit will be true
   /// to indicate that this is a wildcard export.
-  using ExportDecl = std::pair<Module *, bool>;
+  using ExportDecl = std::pair<ModuleRef, bool>;
 
   /// The set of export declarations.
   SmallVector<ExportDecl, 2> Exports;
@@ -640,7 +704,7 @@ class alignas(8) Module {
   /// A conflict between two modules.
   struct Conflict {
     /// The module that this module conflicts with.
-    Module *Other;
+    ModuleRef Other;
 
     /// The message provided to the user when there is a conflict.
     std::string Message;
@@ -742,6 +806,23 @@ class alignas(8) Module {
     Parent->SubModules.push_back(this);
   }
 
+  /// Add a child submodule.
+  void addSubmodule(StringRef Name, Module *Submodule) {
+    auto [It, New] = SubModuleIndex.insert({Name, SubModules.size()});
+    if (New)
+      SubModules.emplace_back();
+    SubModules[It->second].setExisting(Submodule);
+  }
+
+  /// Add the external part of a submodule ModuleRef.
+  void addSubmodule(StringRef Name, ExternalSubmoduleSource *ExternalSource,
+                    uint64_t SubmoduleID) {
+    auto [It, New] = SubModuleIndex.insert({Name, SubModules.size()});
+    if (New)
+      SubModules.emplace_back();
+    SubModules[It->second].setExternal(ExternalSource, SubmoduleID);
+  }
+
   /// Is this module have similar semantics as headers.
   bool isHeaderLikeModule() const {
     return isModuleMapModule() || isHeaderUnit();
@@ -913,7 +994,7 @@ class alignas(8) Module {
   /// Find the submodule with the given name.
   ///
   /// \returns The submodule if found, or NULL otherwise.
-  Module *findSubmodule(StringRef Name) const;
+  ModuleRef findSubmodule(StringRef Name) const;
 
   /// Get the Global Module Fragment (sub-module) for this module, it there is
   /// one.
@@ -941,8 +1022,8 @@ class alignas(8) Module {
 
   unsigned getVisibilityID() const { return VisibilityID; }
 
-  using submodule_iterator = std::vector<Module *>::iterator;
-  using submodule_const_iterator = std::vector<Module *>::const_iterator;
+  using submodule_iterator = std::vector<ModuleRef>::iterator;
+  using submodule_const_iterator = std::vector<ModuleRef>::const_iterator;
 
   llvm::iterator_range<submodule_iterator> submodules() {
     return llvm::make_range(SubModules.begin(), SubModules.end());

diff  --git a/clang/include/clang/Lex/ModuleMap.h 
b/clang/include/clang/Lex/ModuleMap.h
index ed326a7fd545b..12f8dbb0b6090 100644
--- a/clang/include/clang/Lex/ModuleMap.h
+++ b/clang/include/clang/Lex/ModuleMap.h
@@ -548,7 +548,7 @@ class ModuleMap {
   /// null, we will look for a top-level module.
   ///
   /// \returns The named submodule, if known; otherwose, returns null.
-  Module *lookupModuleQualified(StringRef Name, Module *Context) const;
+  ModuleRef lookupModuleQualified(StringRef Name, Module *Context) const;
 
   /// Find a new module or submodule, or create it if it does not already
   /// exist.

diff  --git a/clang/include/clang/Lex/Preprocessor.h 
b/clang/include/clang/Lex/Preprocessor.h
index 8830294ea1658..8cba21539e48a 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -1535,7 +1535,7 @@ class Preprocessor {
     assert(M->isModuleMapModule());
     if (!BuildingSubmoduleStack.empty()) {
       if (M != BuildingSubmoduleStack.back().M)
-        BuildingSubmoduleStack.back().M->AffectingClangModules.insert(M);
+        BuildingSubmoduleStack.back().M->AffectingClangModules.push_back(M);
     } else {
       AffectingClangModules.insert(M);
     }

diff  --git a/clang/include/clang/Serialization/ASTBitCodes.h 
b/clang/include/clang/Serialization/ASTBitCodes.h
index 9a41f9e89df98..3c8f3ba59a07e 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -44,7 +44,7 @@ namespace serialization {
 /// Version 4 of AST files also requires that the version control branch and
 /// revision match exactly, since there is no backward compatibility of
 /// AST files at this time.
-const unsigned VERSION_MAJOR = 37;
+const unsigned VERSION_MAJOR = 38;
 
 /// AST file minor version number supported by this version of
 /// Clang.
@@ -751,6 +751,10 @@ enum ASTRecordTypes {
 
   /// Record code for extname-redefined undeclared identifiers.
   EXTNAME_UNDECLARED_IDENTIFIERS = 79,
+
+  /// Record that encodes the number of submodules, their base ID in the AST
+  /// file, and for each module the relative bit offset into the stream.
+  SUBMODULE_METADATA = 80,
 };
 
 /// Record types used within a source manager block.
@@ -819,8 +823,8 @@ enum PreprocessorDetailRecordTypes {
 
 /// Record types used within a submodule description block.
 enum SubmoduleRecordTypes {
-  /// Metadata for submodules as a whole.
-  SUBMODULE_METADATA = 0,
+  /// Defines the end of a single submodule. Sentinel record without any data.
+  SUBMODULE_END = 0,
 
   /// Defines the major attributes of a submodule, including its
   /// name and parent.
@@ -884,6 +888,10 @@ enum SubmoduleRecordTypes {
 
   /// Specifies affecting modules that were not imported.
   SUBMODULE_AFFECTING_MODULES = 18,
+
+  /// Specifies a direct submodule by name and ID, enabling on-demand
+  /// deserialization of children without loading the entire submodule block.
+  SUBMODULE_CHILD = 19,
 };
 
 /// Record types used within a comments block.

diff  --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index 8394647885bd3..bedac9f8a540a 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -418,14 +418,13 @@ struct LookupBlockOffsets : VisibleLookupBlockOffsets {
 /// The AST reader provides lazy de-serialization of declarations, as
 /// required when traversing the AST. Only those AST nodes that are
 /// actually required will be de-serialized.
-class ASTReader
-  : public ExternalPreprocessorSource,
-    public ExternalPreprocessingRecordSource,
-    public ExternalHeaderFileInfoSource,
-    public ExternalSemaSource,
-    public IdentifierInfoLookup,
-    public ExternalSLocEntrySource
-{
+class ASTReader : public ExternalPreprocessorSource,
+                  public ExternalPreprocessingRecordSource,
+                  public ExternalHeaderFileInfoSource,
+                  public ExternalSemaSource,
+                  public IdentifierInfoLookup,
+                  public ExternalSLocEntrySource,
+                  public ExternalSubmoduleSource {
 public:
   /// Types of AST files.
   friend class ASTDeclMerger;
@@ -820,32 +819,6 @@ class ASTReader
   /// declarations in that submodule that could be made visible.
   HiddenNamesMapType HiddenNamesMap;
 
-  /// A module import, export, or conflict that hasn't yet been resolved.
-  struct UnresolvedModuleRef {
-    /// The file in which this module resides.
-    ModuleFile *File;
-
-    /// The module that is importing or exporting.
-    Module *Mod;
-
-    /// The kind of module reference.
-    enum { Import, Export, Conflict, Affecting } Kind;
-
-    /// The local ID of the module that is being exported.
-    unsigned ID;
-
-    /// Whether this is a wildcard export.
-    LLVM_PREFERRED_TYPE(bool)
-    unsigned IsWildcard : 1;
-
-    /// String data.
-    StringRef String;
-  };
-
-  /// The set of module imports and exports that still need to be
-  /// resolved.
-  SmallVector<UnresolvedModuleRef, 2> UnresolvedModuleRefs;
-
   /// A vector containing selectors that have already been loaded.
   ///
   /// This vector is indexed by the Selector ID (-1). NULL selector
@@ -1612,8 +1585,6 @@ class ASTReader
   ASTReadResult ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
                                        const ModuleFile *ImportedBy,
                                        unsigned ClientLoadCapabilities);
-  llvm::Error ReadSubmoduleBlock(ModuleFile &F,
-                                 unsigned ClientLoadCapabilities);
   static bool ParseLanguageOptions(const RecordData &Record,
                                    StringRef ModuleFilename, bool Complain,
                                    ASTReaderListener &Listener,
@@ -2444,8 +2415,7 @@ class ASTReader
                                                   unsigned LocalID) const;
 
   /// Retrieve the submodule that corresponds to a global submodule ID.
-  ///
-  Module *getSubmodule(serialization::SubmoduleID GlobalID);
+  Module *getSubmodule(uint32_t GlobalID) override;
 
   /// Retrieve the module that corresponds to the given module ID.
   ///

diff  --git a/clang/include/clang/Serialization/ModuleFile.h 
b/clang/include/clang/Serialization/ModuleFile.h
index 58f2fcba01e67..6c47040fde093 100644
--- a/clang/include/clang/Serialization/ModuleFile.h
+++ b/clang/include/clang/Serialization/ModuleFile.h
@@ -447,9 +447,25 @@ class ModuleFile {
   /// Base submodule ID for submodules local to this module.
   serialization::SubmoduleID BaseSubmoduleID = 0;
 
+  /// Base submodule ID for submodules local to this module within its own
+  /// address space.
+  unsigned LocalBaseSubmoduleID = 0;
+
+  /// Local submodule ID of the top-level module.
+  unsigned LocalTopLevelSubmoduleID = 0;
+
   /// Remapping table for submodule IDs in this module.
   ContinuousRangeMap<uint32_t, int, 2> SubmoduleRemap;
 
+  /// The cursor to the start of the submodules block.
+  llvm::BitstreamCursor SubmodulesCursor;
+
+  /// Absolute offset of the start of the submodules block.
+  uint64_t SubmodulesOffsetBase = 0;
+
+  /// Relative offsets for all submodule entries in the AST file.
+  const llvm::support::unaligned_uint64_t *SubmoduleOffsets = nullptr;
+
   // === Selectors ===
 
   /// The number of selectors new to this file.

diff  --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp
index 66629baa6240b..d27abb1153c72 100644
--- a/clang/lib/Basic/Module.cpp
+++ b/clang/lib/Basic/Module.cpp
@@ -53,8 +53,7 @@ Module::Module(ModuleConstructorTag, StringRef Name,
     NoUndeclaredIncludes = Parent->NoUndeclaredIncludes;
     ModuleMapIsPrivate = Parent->ModuleMapIsPrivate;
 
-    Parent->SubModuleIndex[Name] = Parent->SubModules.size();
-    Parent->SubModules.push_back(this);
+    Parent->addSubmodule(Name, this);
   }
 }
 
@@ -348,7 +347,7 @@ void Module::markUnavailable(bool Unimportable) {
   }
 }
 
-Module *Module::findSubmodule(StringRef Name) const {
+ModuleRef Module::findSubmodule(StringRef Name) const {
   if (auto It = SubModuleIndex.find(Name); It != SubModuleIndex.end())
     return SubModules[It->second];
 

diff  --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp
index aaeea01bf775e..436b8e5620765 100644
--- a/clang/lib/Lex/ModuleMap.cpp
+++ b/clang/lib/Lex/ModuleMap.cpp
@@ -970,7 +970,8 @@ Module *ModuleMap::lookupModuleUnqualified(StringRef Name,
   return findModule(Name);
 }
 
-Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) 
const{
+ModuleRef ModuleMap::lookupModuleQualified(StringRef Name,
+                                           Module *Context) const {
   if (!Context)
     return findModule(Name);
 
@@ -982,8 +983,8 @@ std::pair<Module *, bool> 
ModuleMap::findOrCreateModule(StringRef Name,
                                                         bool IsFramework,
                                                         bool IsExplicit) {
   // Try to find an existing module with this name.
-  if (Module *Sub = lookupModuleQualified(Name, Parent))
-    return std::make_pair(Sub, false);
+  if (ModuleRef Sub = lookupModuleQualified(Name, Parent); Sub.getExisting())
+    return std::make_pair(Sub.getExisting(), false);
 
   // Create a new module with this name.
   Module *M = createModule(Name, Parent, IsFramework, IsExplicit);
@@ -992,7 +993,7 @@ std::pair<Module *, bool> 
ModuleMap::findOrCreateModule(StringRef Name,
 
 Module *ModuleMap::createModule(StringRef Name, Module *Parent,
                                 bool IsFramework, bool IsExplicit) {
-  assert(lookupModuleQualified(Name, Parent) == nullptr &&
+  assert(!lookupModuleQualified(Name, Parent).getExisting() &&
          "Creating duplicate submodule");
 
   Module *Result = new (ModulesAlloc.Allocate())

diff  --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index b08459632aacb..761bf8e9af56b 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1450,7 +1450,7 @@ void Preprocessor::makeModuleVisible(Module *M, 
SourceLocation Loc,
 
   // Add this module to the imports list of the currently-built submodule.
   if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
-    BuildingSubmoduleStack.back().M->Imports.insert(M);
+    BuildingSubmoduleStack.back().M->Imports.push_back(M);
 }
 
 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,

diff  --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
index b96065f8619d2..e4e55bb7d0ac7 100644
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp
@@ -2032,7 +2032,8 @@ bool LookupResult::isReachableSlow(Sema &SemaRef, 
NamedDecl *D) {
   // Directly imported module are necessarily reachable.
   // Since we can't export import a module implementation partition unit, we
   // don't need to count for Exports here.
-  if (CurrentM && CurrentM->getTopLevelModule()->Imports.count(DeclTopModule))
+  if (CurrentM &&
+      llvm::is_contained(CurrentM->getTopLevelModule()->Imports, 
DeclTopModule))
     return true;
 
   // Then we treat all module implementation partition unit as unreachable.

diff  --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp
index 67f46b64cf047..caa61a99a6914 100644
--- a/clang/lib/Sema/SemaModule.cpp
+++ b/clang/lib/Sema/SemaModule.cpp
@@ -483,7 +483,7 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, 
SourceLocation ModuleLoc,
     // Sequence initialization of the imported module before that of the 
current
     // module, if any.
     Context.addModuleInitializer(ModuleScopes.back().Module, Import);
-    Mod->Imports.insert(Interface); // As if we imported it.
+    Mod->Imports.push_back(Interface); // As if we imported it.
     // Also save this as a shortcut to checking for decls in the interface
     ThePrimaryInterface = Interface;
     // If we made an implicit import of the module interface, then return the
@@ -710,7 +710,7 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc,
     if (ExportLoc.isValid() || getEnclosingExportDecl(Import))
       getCurrentModule()->Exports.emplace_back(Mod, false);
     else
-      getCurrentModule()->Imports.insert(Mod);
+      getCurrentModule()->Imports.push_back(Mod);
   }
 
   HadImportedNamedModules = true;

diff  --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 7e8bb6509e84b..6b242f553c59d 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -3762,8 +3762,13 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
         break;
 
       case SUBMODULE_BLOCK_ID:
-        if (llvm::Error Err = ReadSubmoduleBlock(F, ClientLoadCapabilities))
+        F.SubmodulesCursor = Stream;
+        if (llvm::Error Err = Stream.SkipBlock())
+          return Err;
+        if (llvm::Error Err =
+                ReadBlockAbbrevs(F.SubmodulesCursor, SUBMODULE_BLOCK_ID))
           return Err;
+        F.SubmodulesOffsetBase = F.SubmodulesCursor.GetCurrentBitNo();
         break;
 
       case COMMENTS_BLOCK_ID: {
@@ -3815,6 +3820,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
       case HEADER_SEARCH_TABLE:
       case IMPORTED_MODULES:
       case MACRO_OFFSET:
+      case SUBMODULE_METADATA:
         break;
       default:
         continue;
@@ -3825,6 +3831,50 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
     default:  // Default behavior: ignore.
       break;
 
+    case SUBMODULE_METADATA: {
+      F.BaseSubmoduleID = getTotalNumSubmodules();
+      F.LocalNumSubmodules = Record[0];
+      F.LocalBaseSubmoduleID = Record[1];
+      F.LocalTopLevelSubmoduleID = Record[2];
+      F.SubmoduleOffsets =
+          (const llvm::support::unaligned_uint64_t *)Blob.data();
+      if (F.LocalNumSubmodules > 0) {
+        // Introduce the global -> local mapping for submodules within this
+        // module.
+        GlobalSubmoduleMap.insert(
+            std::make_pair(getTotalNumSubmodules() + 1, &F));
+
+        // Introduce the local -> global mapping for submodules within this
+        // module.
+        F.SubmoduleRemap.insertOrReplace(
+            std::make_pair(F.LocalBaseSubmoduleID,
+                           F.BaseSubmoduleID - F.LocalBaseSubmoduleID));
+
+        SubmodulesLoaded.resize(SubmodulesLoaded.size() + 
F.LocalNumSubmodules);
+      }
+
+      auto ReadSubmodule = [&](unsigned LocalID) -> Module * {
+        return getSubmodule(getGlobalSubmoduleID(F, LocalID));
+      };
+
+      if (PP.getHeaderSearchInfo().getModuleMap().findModule(F.ModuleName)) {
+        // If we already knew about this module, make sure to bring all
+        // submodules up to date.
+        for (unsigned Index = 0; Index != F.LocalNumSubmodules; ++Index) {
+          unsigned LocalID =
+              Index + F.LocalBaseSubmoduleID + NUM_PREDEF_SUBMODULE_IDS;
+          ReadSubmodule(LocalID);
+        }
+      } else {
+        // If we didn't know this module, we loaded it transitively. 
Deserialize
+        // just the top-level module to register it with ModuleMap, but load 
the
+        // rest lazily.
+        ReadSubmodule(F.LocalTopLevelSubmoduleID);
+      }
+
+      break;
+    }
+
     case TYPE_OFFSET: {
       if (F.LocalNumTypes != 0)
         return llvm::createStringError(
@@ -5103,41 +5153,6 @@ ASTReader::ASTReadResult 
ASTReader::ReadAST(ModuleFileName FileName,
       F.ImportLoc = TranslateSourceLocation(*M.ImportedBy, M.ImportLoc);
   }
 
-  // Resolve any unresolved module exports.
-  for (unsigned I = 0, N = UnresolvedModuleRefs.size(); I != N; ++I) {
-    UnresolvedModuleRef &Unresolved = UnresolvedModuleRefs[I];
-    SubmoduleID GlobalID = 
getGlobalSubmoduleID(*Unresolved.File,Unresolved.ID);
-    Module *ResolvedMod = getSubmodule(GlobalID);
-
-    switch (Unresolved.Kind) {
-    case UnresolvedModuleRef::Conflict:
-      if (ResolvedMod) {
-        Module::Conflict Conflict;
-        Conflict.Other = ResolvedMod;
-        Conflict.Message = Unresolved.String.str();
-        Unresolved.Mod->Conflicts.push_back(Conflict);
-      }
-      continue;
-
-    case UnresolvedModuleRef::Import:
-      if (ResolvedMod)
-        Unresolved.Mod->Imports.insert(ResolvedMod);
-      continue;
-
-    case UnresolvedModuleRef::Affecting:
-      if (ResolvedMod)
-        Unresolved.Mod->AffectingClangModules.insert(ResolvedMod);
-      continue;
-
-    case UnresolvedModuleRef::Export:
-      if (ResolvedMod || Unresolved.IsWildcard)
-        Unresolved.Mod->Exports.push_back(Module::ExportDecl(
-            ResolvedMod, static_cast<bool>(Unresolved.IsWildcard)));
-      continue;
-    }
-  }
-  UnresolvedModuleRefs.clear();
-
   // FIXME: How do we load the 'use'd modules? They may not be submodules.
   // Might be unnecessary as use declarations are only used to build the
   // module itself.
@@ -6293,11 +6308,34 @@ bool ASTReader::isAcceptableASTFile(
                                   /*ValidateDiagnosticOptions=*/true);
 }
 
-llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
-                                          unsigned ClientLoadCapabilities) {
-  // Enter the submodule block.
-  if (llvm::Error Err = F.Stream.EnterSubBlock(SUBMODULE_BLOCK_ID))
-    return Err;
+Module *ASTReader::getSubmodule(uint32_t GlobalID) {
+  if (GlobalID < NUM_PREDEF_SUBMODULE_IDS) {
+    assert(GlobalID == 0 && "Unhandled global submodule ID");
+    return nullptr;
+  }
+
+  SubmoduleID GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS;
+  if (GlobalIndex >= SubmodulesLoaded.size()) {
+    Error("submodule ID out of range in AST file");
+    return nullptr;
+  }
+
+  if (SubmodulesLoaded[GlobalIndex])
+    return SubmodulesLoaded[GlobalIndex];
+
+  GlobalSubmoduleMapType::iterator It = GlobalSubmoduleMap.find(GlobalID);
+  assert(It != GlobalSubmoduleMap.end());
+  ModuleFile &F = *It->second;
+  unsigned Index = GlobalID - F.BaseSubmoduleID - NUM_PREDEF_SUBMODULE_IDS;
+  unsigned LocalID = Index + F.LocalBaseSubmoduleID + NUM_PREDEF_SUBMODULE_IDS;
+
+  BitstreamCursor &Cursor = F.SubmodulesCursor;
+  SavedStreamPosition SavedPosition(Cursor);
+  unsigned Offset = F.SubmoduleOffsets[Index];
+  if (llvm::Error Err = Cursor.JumpToBit(F.SubmodulesOffsetBase + Offset)) {
+    Error(std::move(Err));
+    return nullptr;
+  }
 
   ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap();
   bool KnowsTopLevelModule = ModMap.findModule(F.ModuleName) != nullptr;
@@ -6308,23 +6346,24 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
                           ? &ModuleMap::createModule
                           : &ModuleMap::findOrCreateModuleFirst;
 
-  bool First = true;
   Module *CurrentModule = nullptr;
   RecordData Record;
   while (true) {
-    Expected<llvm::BitstreamEntry> MaybeEntry =
-        F.Stream.advanceSkippingSubblocks();
-    if (!MaybeEntry)
-      return MaybeEntry.takeError();
+    Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance();
+    if (!MaybeEntry) {
+      Error(MaybeEntry.takeError());
+      return nullptr;
+    }
     llvm::BitstreamEntry Entry = MaybeEntry.get();
 
     switch (Entry.Kind) {
-    case llvm::BitstreamEntry::SubBlock: // Handled for us already.
+    case llvm::BitstreamEntry::SubBlock:
     case llvm::BitstreamEntry::Error:
-      return llvm::createStringError(std::errc::illegal_byte_sequence,
-                                     "malformed block record in AST file");
-    case llvm::BitstreamEntry::EndBlock:
-      return llvm::Error::success();
+    case llvm::BitstreamEntry::EndBlock: {
+      Error(llvm::createStringError(std::errc::illegal_byte_sequence,
+                                    "malformed block record in AST file"));
+      return nullptr;
+    }
     case llvm::BitstreamEntry::Record:
       // The interesting case.
       break;
@@ -6333,35 +6372,35 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
     // Read a record.
     StringRef Blob;
     Record.clear();
-    Expected<unsigned> MaybeKind = F.Stream.readRecord(Entry.ID, Record, 
&Blob);
-    if (!MaybeKind)
-      return MaybeKind.takeError();
-    unsigned Kind = MaybeKind.get();
-
-    if ((Kind == SUBMODULE_METADATA) != First)
-      return llvm::createStringError(
-          std::errc::illegal_byte_sequence,
-          "submodule metadata record should be at beginning of block");
-    First = false;
-
-    // Submodule information is only valid if we have a current module.
-    // FIXME: Should we error on these cases?
-    if (!CurrentModule && Kind != SUBMODULE_METADATA &&
-        Kind != SUBMODULE_DEFINITION)
-      continue;
+    Expected<unsigned> MaybeKind = Cursor.readRecord(Entry.ID, Record, &Blob);
+    if (!MaybeKind) {
+      Error(MaybeKind.takeError());
+      return nullptr;
+    }
+    auto Kind = static_cast<SubmoduleRecordTypes>(MaybeKind.get());
 
     switch (Kind) {
-    default:  // Default behavior: ignore.
-      break;
+    case SUBMODULE_END:
+      if (!CurrentModule) {
+        Error(llvm::createStringError(std::errc::illegal_byte_sequence,
+                                      "malformed module definition"));
+        return nullptr;
+      }
+      return CurrentModule;
 
     case SUBMODULE_DEFINITION: {
-      if (Record.size() < 13)
-        return llvm::createStringError(std::errc::illegal_byte_sequence,
-                                       "malformed module definition");
+      if (Record.size() < 13) {
+        Error(llvm::createStringError(std::errc::illegal_byte_sequence,
+                                      "malformed module definition"));
+        return nullptr;
+      }
 
       StringRef Name = Blob;
       unsigned Idx = 0;
-      SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[Idx++]);
+      unsigned ReadLocalID = Record[Idx++];
+      assert(LocalID == ReadLocalID);
+      SubmoduleID ReadGlobalID = getGlobalSubmoduleID(F, ReadLocalID);
+      assert(GlobalID == ReadGlobalID);
       SubmoduleID Parent = getGlobalSubmoduleID(F, Record[Idx++]);
       Module::ModuleKind Kind = (Module::ModuleKind)Record[Idx++];
       SourceLocation DefinitionLoc = ReadSourceLocation(F, Record[Idx++]);
@@ -6378,18 +6417,15 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
       bool NamedModuleHasInit = Record[Idx++];
 
       Module *ParentModule = nullptr;
-      if (Parent)
+      if (Parent) {
         ParentModule = getSubmodule(Parent);
+        if (!ParentModule)
+          return nullptr;
+      }
 
       CurrentModule = std::invoke(CreateModule, &ModMap, Name, ParentModule,
                                   IsFramework, IsExplicit);
 
-      SubmoduleID GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS;
-      if (GlobalIndex >= SubmodulesLoaded.size() ||
-          SubmodulesLoaded[GlobalIndex])
-        return llvm::createStringError(std::errc::invalid_argument,
-                                       "too many submodules");
-
       if (!ParentModule) {
         if ([[maybe_unused]] const ModuleFileKey *CurFileKey =
                 CurrentModule->getASTFileKey()) {
@@ -6410,7 +6446,7 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
               Diag(diag::note_module_file_conflict)
                   << CurModMapFile->getName() << ModMapFile->getName();
 
-            return llvm::make_error<AlreadyReportedDiagnosticError>();
+            return nullptr;
           }
         }
 
@@ -6520,59 +6556,29 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
       break;
     }
 
-    case SUBMODULE_METADATA: {
-      F.BaseSubmoduleID = getTotalNumSubmodules();
-      F.LocalNumSubmodules = Record[0];
-      unsigned LocalBaseSubmoduleID = Record[1];
-      if (F.LocalNumSubmodules > 0) {
-        // Introduce the global -> local mapping for submodules within this
-        // module.
-        
GlobalSubmoduleMap.insert(std::make_pair(getTotalNumSubmodules()+1,&F));
-
-        // Introduce the local -> global mapping for submodules within this
-        // module.
-        F.SubmoduleRemap.insertOrReplace(
-          std::make_pair(LocalBaseSubmoduleID,
-                         F.BaseSubmoduleID - LocalBaseSubmoduleID));
-
-        SubmodulesLoaded.resize(SubmodulesLoaded.size() + 
F.LocalNumSubmodules);
-      }
-      break;
-    }
-
     case SUBMODULE_IMPORTS:
       for (unsigned Idx = 0; Idx != Record.size(); ++Idx) {
-        UnresolvedModuleRef Unresolved;
-        Unresolved.File = &F;
-        Unresolved.Mod = CurrentModule;
-        Unresolved.ID = Record[Idx];
-        Unresolved.Kind = UnresolvedModuleRef::Import;
-        Unresolved.IsWildcard = false;
-        UnresolvedModuleRefs.push_back(Unresolved);
+        SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[Idx]);
+        CurrentModule->Imports.push_back(ModuleRef(this, GlobalID));
       }
       break;
 
     case SUBMODULE_AFFECTING_MODULES:
       for (unsigned Idx = 0; Idx != Record.size(); ++Idx) {
-        UnresolvedModuleRef Unresolved;
-        Unresolved.File = &F;
-        Unresolved.Mod = CurrentModule;
-        Unresolved.ID = Record[Idx];
-        Unresolved.Kind = UnresolvedModuleRef::Affecting;
-        Unresolved.IsWildcard = false;
-        UnresolvedModuleRefs.push_back(Unresolved);
+        SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[Idx]);
+        CurrentModule->AffectingClangModules.push_back(
+            ModuleRef(this, GlobalID));
       }
       break;
 
     case SUBMODULE_EXPORTS:
       for (unsigned Idx = 0; Idx + 1 < Record.size(); Idx += 2) {
-        UnresolvedModuleRef Unresolved;
-        Unresolved.File = &F;
-        Unresolved.Mod = CurrentModule;
-        Unresolved.ID = Record[Idx];
-        Unresolved.Kind = UnresolvedModuleRef::Export;
-        Unresolved.IsWildcard = Record[Idx + 1];
-        UnresolvedModuleRefs.push_back(Unresolved);
+        SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[Idx]);
+        bool IsWildcard = Record[Idx + 1];
+        ModuleRef ExportedMod =
+            GlobalID ? ModuleRef(this, GlobalID) : ModuleRef();
+        if (ExportedMod || IsWildcard)
+          CurrentModule->Exports.push_back({ExportedMod, IsWildcard});
       }
 
       // Once we've loaded the set of exports, there's no reason to keep
@@ -6596,14 +6602,11 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
       break;
 
     case SUBMODULE_CONFLICT: {
-      UnresolvedModuleRef Unresolved;
-      Unresolved.File = &F;
-      Unresolved.Mod = CurrentModule;
-      Unresolved.ID = Record[0];
-      Unresolved.Kind = UnresolvedModuleRef::Conflict;
-      Unresolved.IsWildcard = false;
-      Unresolved.String = Blob;
-      UnresolvedModuleRefs.push_back(Unresolved);
+      SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[0]);
+      Module::Conflict Conflict;
+      Conflict.Other = ModuleRef(this, GlobalID);
+      Conflict.Message = Blob.str();
+      CurrentModule->Conflicts.push_back(Conflict);
       break;
     }
 
@@ -6624,6 +6627,13 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
       CurrentModule->ExportAsModule = Blob.str();
       ModMap.addLinkAsDependency(CurrentModule);
       break;
+
+    case SUBMODULE_CHILD: {
+      // Record a not-yet-loaded direct child for on-demand deserialization.
+      SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[0]);
+      CurrentModule->addSubmodule(Blob, this, GlobalID);
+      break;
+    }
     }
   }
 }
@@ -10061,20 +10071,6 @@ ASTReader::getGlobalSubmoduleID(ModuleFile &M, 
unsigned LocalID) const {
   return LocalID + I->second;
 }
 
-Module *ASTReader::getSubmodule(SubmoduleID GlobalID) {
-  if (GlobalID < NUM_PREDEF_SUBMODULE_IDS) {
-    assert(GlobalID == 0 && "Unhandled global submodule ID");
-    return nullptr;
-  }
-
-  if (GlobalID > SubmodulesLoaded.size()) {
-    Error("submodule ID out of range in AST file");
-    return nullptr;
-  }
-
-  return SubmodulesLoaded[GlobalID - NUM_PREDEF_SUBMODULE_IDS];
-}
-
 Module *ASTReader::getModule(unsigned ID) {
   return getSubmodule(ID);
 }

diff  --git a/clang/lib/Serialization/ASTWriter.cpp 
b/clang/lib/Serialization/ASTWriter.cpp
index ba644fefc109a..1970ed86589b5 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -919,6 +919,7 @@ void ASTWriter::WriteBlockInfoBlock() {
 
   // AST Top-Level Block.
   BLOCK(AST_BLOCK);
+  RECORD(SUBMODULE_METADATA);
   RECORD(TYPE_OFFSET);
   RECORD(DECL_OFFSET);
   RECORD(IDENTIFIER_OFFSET);
@@ -997,7 +998,7 @@ void ASTWriter::WriteBlockInfoBlock() {
 
   // Submodule Block.
   BLOCK(SUBMODULE_BLOCK);
-  RECORD(SUBMODULE_METADATA);
+  RECORD(SUBMODULE_END);
   RECORD(SUBMODULE_DEFINITION);
   RECORD(SUBMODULE_UMBRELLA_HEADER);
   RECORD(SUBMODULE_HEADER);
@@ -1016,6 +1017,7 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(SUBMODULE_PRIVATE_TEXTUAL_HEADER);
   RECORD(SUBMODULE_INITIALIZERS);
   RECORD(SUBMODULE_EXPORT_AS);
+  RECORD(SUBMODULE_CHILD);
 
   // Comments Block.
   BLOCK(COMMENTS_BLOCK);
@@ -2983,16 +2985,6 @@ unsigned ASTWriter::getSubmoduleID(Module *Mod) {
   return ID;
 }
 
-/// Compute the number of modules within the given tree (including the
-/// given module).
-static unsigned getNumberOfModules(Module *Mod) {
-  unsigned ChildModules = 0;
-  for (Module *Submodule : Mod->submodules())
-    ChildModules += getNumberOfModules(Submodule);
-
-  return ChildModules + 1;
-}
-
 void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext *Context) {
   // Enter the submodule description block.
   Stream.EnterSubblock(SUBMODULE_BLOCK_ID, /*bits for abbreviations*/5);
@@ -3088,11 +3080,16 @@ void ASTWriter::WriteSubmodules(Module *WritingModule, 
ASTContext *Context) {
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));    // Macro name
   unsigned ExportAsAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
 
-  // Write the submodule metadata block.
-  RecordData::value_type Record[] = {
-      getNumberOfModules(WritingModule),
-      FirstSubmoduleID - NUM_PREDEF_SUBMODULE_IDS};
-  Stream.EmitRecord(SUBMODULE_METADATA, Record);
+  Abbrev = std::make_shared<BitCodeAbbrev>();
+  Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_CHILD));
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Child submodule ID
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));   // Child name
+  unsigned ChildAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
+
+  SmallVector<uint64_t> SubmoduleOffsets;
+  uint64_t SubmoduleOffsetBase = Stream.GetCurrentBitNo();
+
+  unsigned TopLevelID = getSubmoduleID(WritingModule);
 
   // Write all of the submodules.
   std::queue<Module *> Q;
@@ -3101,6 +3098,19 @@ void ASTWriter::WriteSubmodules(Module *WritingModule, 
ASTContext *Context) {
     Module *Mod = Q.front();
     Q.pop();
     unsigned ID = getSubmoduleID(Mod);
+    if (ID < FirstSubmoduleID) {
+      assert(0 && "Loaded submodule entered WritingModule ?");
+      continue;
+    }
+
+    // Record the local offset of this submodule.
+    unsigned Index = ID - FirstSubmoduleID;
+    if (Index >= SubmoduleOffsets.size())
+      SubmoduleOffsets.resize(Index + 1);
+
+    uint64_t Offset = Stream.GetCurrentBitNo() - SubmoduleOffsetBase;
+    assert((Offset >> 32) == 0 && "Submodule offset too large");
+    SubmoduleOffsets[Index] = Offset;
 
     uint64_t ParentID = 0;
     if (Mod->Parent) {
@@ -3259,6 +3269,20 @@ void ASTWriter::WriteSubmodules(Module *WritingModule, 
ASTContext *Context) {
       Stream.EmitRecordWithBlob(ExportAsAbbrev, Record, Mod->ExportAsModule);
     }
 
+    // Emit one SUBMODULE_CHILD record per direct child so the reader can
+    // populate PendingSubmodules and demand-load children by name.
+    for (Module *Child : Mod->submodules()) {
+      RecordData::value_type Record[] = {SUBMODULE_CHILD,
+                                         getSubmoduleID(Child)};
+      Stream.EmitRecordWithBlob(ChildAbbrev, Record, Child->Name);
+    }
+
+    // Emit the sentinel signifying the end of this submodule.
+    {
+      RecordData Record;
+      Stream.EmitRecord(SUBMODULE_END, Record);
+    }
+
     // Queue up the submodules of this module.
     for (Module *M : Mod->submodules())
       Q.push(M);
@@ -3266,10 +3290,23 @@ void ASTWriter::WriteSubmodules(Module *WritingModule, 
ASTContext *Context) {
 
   Stream.ExitBlock();
 
-  assert((NextSubmoduleID - FirstSubmoduleID ==
-          getNumberOfModules(WritingModule)) &&
+  assert((NextSubmoduleID - FirstSubmoduleID == SubmoduleOffsets.size()) &&
          "Wrong # of submodules; found a reference to a non-local, "
          "non-imported submodule?");
+
+  Abbrev = std::make_shared<BitCodeAbbrev>();
+  Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_METADATA));
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Submodule count
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Base submodule ID
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Top-level submod ID
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));   // Submodule offsets
+  unsigned SubmoduleMetadataAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
+
+  RecordData::value_type Record[] = {
+      SUBMODULE_METADATA, SubmoduleOffsets.size(),
+      FirstSubmoduleID - NUM_PREDEF_SUBMODULE_IDS, TopLevelID};
+  Stream.EmitRecordWithBlob(SubmoduleMetadataAbbrev, Record,
+                            bytes(SubmoduleOffsets));
 }
 
 void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag,


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to