https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/194968
>From 48df0dd820e47bb08f20a95dd106af0f3ef47936 Mon Sep 17 00:00:00 2001 From: Jan Svoboda <[email protected]> Date: Wed, 29 Apr 2026 15:58:48 -0700 Subject: [PATCH 1/4] [clang][modules] Deserialize submodule lazily --- clang/include/clang/Basic/Module.h | 76 ++++- clang/include/clang/Lex/ModuleMap.h | 2 +- clang/include/clang/Lex/Preprocessor.h | 2 +- .../include/clang/Serialization/ASTBitCodes.h | 14 +- clang/include/clang/Serialization/ASTReader.h | 46 +-- .../include/clang/Serialization/ModuleFile.h | 16 + clang/lib/Basic/Module.cpp | 5 +- clang/lib/Lex/ModuleMap.cpp | 9 +- clang/lib/Lex/Preprocessor.cpp | 2 +- clang/lib/Sema/SemaLookup.cpp | 3 +- clang/lib/Sema/SemaModule.cpp | 4 +- clang/lib/Serialization/ASTReader.cpp | 284 +++++++++--------- clang/lib/Serialization/ASTWriter.cpp | 73 +++-- 13 files changed, 312 insertions(+), 224 deletions(-) diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index f83319db082d7..14200b50569c9 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -48,9 +48,17 @@ namespace clang { class FileManager; class LangOptions; +class Module; class ModuleMap; class TargetInfo; +/// Interface for on-demand deserialization of submodules stored in a PCM file. +class ExternalSubmoduleSource { +public: + virtual Module *getSubmodule(uint32_t GlobalID) = 0; + virtual ~ExternalSubmoduleSource() = default; +}; + /// Describes the name of a module. using ModuleId = SmallVector<std::pair<std::string, SourceLocation>, 2>; @@ -222,6 +230,41 @@ struct ModuleAttributes { NoUndeclaredIncludes(false) {} }; +/// A reference to either a fully materialized Module object, or +/// a yet-to-be-deserialized submodule in an AST file. +class ModuleRef { + mutable Module *Existing = nullptr; + mutable ExternalSubmoduleSource *ExternalSource = nullptr; + mutable uint64_t SubmoduleID = 0; + +public: + ModuleRef() = default; + ModuleRef(Module *M) : Existing(M) {} + ModuleRef(ExternalSubmoduleSource *ExtSrc, uint64_t SubmoduleID) + : ExternalSource(ExtSrc), SubmoduleID(SubmoduleID) {} + + Module *getExisting() const { return Existing; } + void setExisting(Module *E) { Existing = E; } + + void setExternal(ExternalSubmoduleSource *ExtSrc, uint64_t ID) { + ExternalSource = ExtSrc; + SubmoduleID = ID; + } + + operator bool() const { return Existing || (ExternalSource && SubmoduleID); } + + operator Module *() const { + if (ExternalSource) { + Existing = ExternalSource->getSubmodule(SubmoduleID); + ExternalSource = nullptr; + SubmoduleID = 0; + } + return Existing; + } + + Module *operator->() const { return *this; } +}; + /// Required to construct a Module. /// /// This tag type is only constructible by ModuleMap, guaranteeing it ownership @@ -348,7 +391,7 @@ class alignas(8) Module { private: /// The submodules of this module, indexed by name. - std::vector<Module *> SubModules; + std::vector<ModuleRef> SubModules; /// A mapping from the submodule name to the index into the /// \c SubModules vector at which that submodule resides. @@ -552,17 +595,17 @@ class alignas(8) Module { /// The set of modules imported by this module, and on which this /// module depends. - llvm::SmallSetVector<Module *, 2> Imports; + llvm::SmallVector<ModuleRef, 2> Imports; /// The set of top-level modules that affected the compilation of this module, /// but were not imported. - llvm::SmallSetVector<Module *, 2> AffectingClangModules; + llvm::SmallVector<ModuleRef, 2> AffectingClangModules; /// Describes an exported module. /// /// The pointer is the module being re-exported, while the bit will be true /// to indicate that this is a wildcard export. - using ExportDecl = std::pair<Module *, bool>; + using ExportDecl = std::pair<ModuleRef, bool>; /// The set of export declarations. SmallVector<ExportDecl, 2> Exports; @@ -640,7 +683,7 @@ class alignas(8) Module { /// A conflict between two modules. struct Conflict { /// The module that this module conflicts with. - Module *Other; + ModuleRef Other; /// The message provided to the user when there is a conflict. std::string Message; @@ -742,6 +785,23 @@ class alignas(8) Module { Parent->SubModules.push_back(this); } + /// Add a child submodule. + void addSubmodule(StringRef Name, Module *Submodule) { + auto [It, New] = SubModuleIndex.insert({Name, SubModules.size()}); + if (New) + SubModules.emplace_back(); + SubModules[It->second].setExisting(Submodule); + } + + /// Add the external part of a submodule ModuleRef. + void addSubmodule(StringRef Name, ExternalSubmoduleSource *ExternalSource, + uint64_t SubmoduleID) { + auto [It, New] = SubModuleIndex.insert({Name, SubModules.size()}); + if (New) + SubModules.emplace_back(); + SubModules[It->second].setExternal(ExternalSource, SubmoduleID); + } + /// Is this module have similar semantics as headers. bool isHeaderLikeModule() const { return isModuleMapModule() || isHeaderUnit(); @@ -913,7 +973,7 @@ class alignas(8) Module { /// Find the submodule with the given name. /// /// \returns The submodule if found, or NULL otherwise. - Module *findSubmodule(StringRef Name) const; + ModuleRef findSubmodule(StringRef Name) const; /// Get the Global Module Fragment (sub-module) for this module, it there is /// one. @@ -941,8 +1001,8 @@ class alignas(8) Module { unsigned getVisibilityID() const { return VisibilityID; } - using submodule_iterator = std::vector<Module *>::iterator; - using submodule_const_iterator = std::vector<Module *>::const_iterator; + using submodule_iterator = std::vector<ModuleRef>::iterator; + using submodule_const_iterator = std::vector<ModuleRef>::const_iterator; llvm::iterator_range<submodule_iterator> submodules() { return llvm::make_range(SubModules.begin(), SubModules.end()); diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h index ed326a7fd545b..12f8dbb0b6090 100644 --- a/clang/include/clang/Lex/ModuleMap.h +++ b/clang/include/clang/Lex/ModuleMap.h @@ -548,7 +548,7 @@ class ModuleMap { /// null, we will look for a top-level module. /// /// \returns The named submodule, if known; otherwose, returns null. - Module *lookupModuleQualified(StringRef Name, Module *Context) const; + ModuleRef lookupModuleQualified(StringRef Name, Module *Context) const; /// Find a new module or submodule, or create it if it does not already /// exist. diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 8830294ea1658..8cba21539e48a 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -1535,7 +1535,7 @@ class Preprocessor { assert(M->isModuleMapModule()); if (!BuildingSubmoduleStack.empty()) { if (M != BuildingSubmoduleStack.back().M) - BuildingSubmoduleStack.back().M->AffectingClangModules.insert(M); + BuildingSubmoduleStack.back().M->AffectingClangModules.push_back(M); } else { AffectingClangModules.insert(M); } diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 9a41f9e89df98..3c8f3ba59a07e 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -44,7 +44,7 @@ namespace serialization { /// Version 4 of AST files also requires that the version control branch and /// revision match exactly, since there is no backward compatibility of /// AST files at this time. -const unsigned VERSION_MAJOR = 37; +const unsigned VERSION_MAJOR = 38; /// AST file minor version number supported by this version of /// Clang. @@ -751,6 +751,10 @@ enum ASTRecordTypes { /// Record code for extname-redefined undeclared identifiers. EXTNAME_UNDECLARED_IDENTIFIERS = 79, + + /// Record that encodes the number of submodules, their base ID in the AST + /// file, and for each module the relative bit offset into the stream. + SUBMODULE_METADATA = 80, }; /// Record types used within a source manager block. @@ -819,8 +823,8 @@ enum PreprocessorDetailRecordTypes { /// Record types used within a submodule description block. enum SubmoduleRecordTypes { - /// Metadata for submodules as a whole. - SUBMODULE_METADATA = 0, + /// Defines the end of a single submodule. Sentinel record without any data. + SUBMODULE_END = 0, /// Defines the major attributes of a submodule, including its /// name and parent. @@ -884,6 +888,10 @@ enum SubmoduleRecordTypes { /// Specifies affecting modules that were not imported. SUBMODULE_AFFECTING_MODULES = 18, + + /// Specifies a direct submodule by name and ID, enabling on-demand + /// deserialization of children without loading the entire submodule block. + SUBMODULE_CHILD = 19, }; /// Record types used within a comments block. diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 8394647885bd3..bedac9f8a540a 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -418,14 +418,13 @@ struct LookupBlockOffsets : VisibleLookupBlockOffsets { /// The AST reader provides lazy de-serialization of declarations, as /// required when traversing the AST. Only those AST nodes that are /// actually required will be de-serialized. -class ASTReader - : public ExternalPreprocessorSource, - public ExternalPreprocessingRecordSource, - public ExternalHeaderFileInfoSource, - public ExternalSemaSource, - public IdentifierInfoLookup, - public ExternalSLocEntrySource -{ +class ASTReader : public ExternalPreprocessorSource, + public ExternalPreprocessingRecordSource, + public ExternalHeaderFileInfoSource, + public ExternalSemaSource, + public IdentifierInfoLookup, + public ExternalSLocEntrySource, + public ExternalSubmoduleSource { public: /// Types of AST files. friend class ASTDeclMerger; @@ -820,32 +819,6 @@ class ASTReader /// declarations in that submodule that could be made visible. HiddenNamesMapType HiddenNamesMap; - /// A module import, export, or conflict that hasn't yet been resolved. - struct UnresolvedModuleRef { - /// The file in which this module resides. - ModuleFile *File; - - /// The module that is importing or exporting. - Module *Mod; - - /// The kind of module reference. - enum { Import, Export, Conflict, Affecting } Kind; - - /// The local ID of the module that is being exported. - unsigned ID; - - /// Whether this is a wildcard export. - LLVM_PREFERRED_TYPE(bool) - unsigned IsWildcard : 1; - - /// String data. - StringRef String; - }; - - /// The set of module imports and exports that still need to be - /// resolved. - SmallVector<UnresolvedModuleRef, 2> UnresolvedModuleRefs; - /// A vector containing selectors that have already been loaded. /// /// This vector is indexed by the Selector ID (-1). NULL selector @@ -1612,8 +1585,6 @@ class ASTReader ASTReadResult ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F, const ModuleFile *ImportedBy, unsigned ClientLoadCapabilities); - llvm::Error ReadSubmoduleBlock(ModuleFile &F, - unsigned ClientLoadCapabilities); static bool ParseLanguageOptions(const RecordData &Record, StringRef ModuleFilename, bool Complain, ASTReaderListener &Listener, @@ -2444,8 +2415,7 @@ class ASTReader unsigned LocalID) const; /// Retrieve the submodule that corresponds to a global submodule ID. - /// - Module *getSubmodule(serialization::SubmoduleID GlobalID); + Module *getSubmodule(uint32_t GlobalID) override; /// Retrieve the module that corresponds to the given module ID. /// diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h index 58f2fcba01e67..6c47040fde093 100644 --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -447,9 +447,25 @@ class ModuleFile { /// Base submodule ID for submodules local to this module. serialization::SubmoduleID BaseSubmoduleID = 0; + /// Base submodule ID for submodules local to this module within its own + /// address space. + unsigned LocalBaseSubmoduleID = 0; + + /// Local submodule ID of the top-level module. + unsigned LocalTopLevelSubmoduleID = 0; + /// Remapping table for submodule IDs in this module. ContinuousRangeMap<uint32_t, int, 2> SubmoduleRemap; + /// The cursor to the start of the submodules block. + llvm::BitstreamCursor SubmodulesCursor; + + /// Absolute offset of the start of the submodules block. + uint64_t SubmodulesOffsetBase = 0; + + /// Relative offsets for all submodule entries in the AST file. + const llvm::support::unaligned_uint64_t *SubmoduleOffsets = nullptr; + // === Selectors === /// The number of selectors new to this file. diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp index 66629baa6240b..d27abb1153c72 100644 --- a/clang/lib/Basic/Module.cpp +++ b/clang/lib/Basic/Module.cpp @@ -53,8 +53,7 @@ Module::Module(ModuleConstructorTag, StringRef Name, NoUndeclaredIncludes = Parent->NoUndeclaredIncludes; ModuleMapIsPrivate = Parent->ModuleMapIsPrivate; - Parent->SubModuleIndex[Name] = Parent->SubModules.size(); - Parent->SubModules.push_back(this); + Parent->addSubmodule(Name, this); } } @@ -348,7 +347,7 @@ void Module::markUnavailable(bool Unimportable) { } } -Module *Module::findSubmodule(StringRef Name) const { +ModuleRef Module::findSubmodule(StringRef Name) const { if (auto It = SubModuleIndex.find(Name); It != SubModuleIndex.end()) return SubModules[It->second]; diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index 71d8bef278179..e771a4b50111d 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -938,7 +938,8 @@ Module *ModuleMap::lookupModuleUnqualified(StringRef Name, return findModule(Name); } -Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) const{ +ModuleRef ModuleMap::lookupModuleQualified(StringRef Name, + Module *Context) const { if (!Context) return findModule(Name); @@ -950,8 +951,8 @@ std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name, bool IsFramework, bool IsExplicit) { // Try to find an existing module with this name. - if (Module *Sub = lookupModuleQualified(Name, Parent)) - return std::make_pair(Sub, false); + if (ModuleRef Sub = lookupModuleQualified(Name, Parent); Sub.getExisting()) + return std::make_pair(Sub.getExisting(), false); // Create a new module with this name. Module *M = createModule(Name, Parent, IsFramework, IsExplicit); @@ -960,7 +961,7 @@ std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name, Module *ModuleMap::createModule(StringRef Name, Module *Parent, bool IsFramework, bool IsExplicit) { - assert(lookupModuleQualified(Name, Parent) == nullptr && + assert(!lookupModuleQualified(Name, Parent).getExisting() && "Creating duplicate submodule"); Module *Result = new (ModulesAlloc.Allocate()) diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index b08459632aacb..761bf8e9af56b 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -1450,7 +1450,7 @@ void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc, // Add this module to the imports list of the currently-built submodule. if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M) - BuildingSubmoduleStack.back().M->Imports.insert(M); + BuildingSubmoduleStack.back().M->Imports.push_back(M); } bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index b96065f8619d2..e4e55bb7d0ac7 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -2032,7 +2032,8 @@ bool LookupResult::isReachableSlow(Sema &SemaRef, NamedDecl *D) { // Directly imported module are necessarily reachable. // Since we can't export import a module implementation partition unit, we // don't need to count for Exports here. - if (CurrentM && CurrentM->getTopLevelModule()->Imports.count(DeclTopModule)) + if (CurrentM && + llvm::is_contained(CurrentM->getTopLevelModule()->Imports, DeclTopModule)) return true; // Then we treat all module implementation partition unit as unreachable. diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index 67f46b64cf047..caa61a99a6914 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -483,7 +483,7 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, // Sequence initialization of the imported module before that of the current // module, if any. Context.addModuleInitializer(ModuleScopes.back().Module, Import); - Mod->Imports.insert(Interface); // As if we imported it. + Mod->Imports.push_back(Interface); // As if we imported it. // Also save this as a shortcut to checking for decls in the interface ThePrimaryInterface = Interface; // If we made an implicit import of the module interface, then return the @@ -710,7 +710,7 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc, if (ExportLoc.isValid() || getEnclosingExportDecl(Import)) getCurrentModule()->Exports.emplace_back(Mod, false); else - getCurrentModule()->Imports.insert(Mod); + getCurrentModule()->Imports.push_back(Mod); } HadImportedNamedModules = true; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 2c0b908314fa5..e44cee65ce517 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3746,8 +3746,13 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, break; case SUBMODULE_BLOCK_ID: - if (llvm::Error Err = ReadSubmoduleBlock(F, ClientLoadCapabilities)) + F.SubmodulesCursor = Stream; + if (llvm::Error Err = Stream.SkipBlock()) + return Err; + if (llvm::Error Err = + ReadBlockAbbrevs(F.SubmodulesCursor, SUBMODULE_BLOCK_ID)) return Err; + F.SubmodulesOffsetBase = F.SubmodulesCursor.GetCurrentBitNo(); break; case COMMENTS_BLOCK_ID: { @@ -3799,6 +3804,7 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, case HEADER_SEARCH_TABLE: case IMPORTED_MODULES: case MACRO_OFFSET: + case SUBMODULE_METADATA: break; default: continue; @@ -3809,6 +3815,50 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, default: // Default behavior: ignore. break; + case SUBMODULE_METADATA: { + F.BaseSubmoduleID = getTotalNumSubmodules(); + F.LocalNumSubmodules = Record[0]; + F.LocalBaseSubmoduleID = Record[1]; + F.LocalTopLevelSubmoduleID = Record[2]; + F.SubmoduleOffsets = + (const llvm::support::unaligned_uint64_t *)Blob.data(); + if (F.LocalNumSubmodules > 0) { + // Introduce the global -> local mapping for submodules within this + // module. + GlobalSubmoduleMap.insert( + std::make_pair(getTotalNumSubmodules() + 1, &F)); + + // Introduce the local -> global mapping for submodules within this + // module. + F.SubmoduleRemap.insertOrReplace( + std::make_pair(F.LocalBaseSubmoduleID, + F.BaseSubmoduleID - F.LocalBaseSubmoduleID)); + + SubmodulesLoaded.resize(SubmodulesLoaded.size() + F.LocalNumSubmodules); + } + + auto ReadSubmodule = [&](unsigned LocalID) -> Module * { + return getSubmodule(getGlobalSubmoduleID(F, LocalID)); + }; + + if (PP.getHeaderSearchInfo().getModuleMap().findModule(F.ModuleName)) { + // If we already knew about this module, make sure to bring all + // submodules up to date. + for (unsigned Index = 0; Index != F.LocalNumSubmodules; ++Index) { + unsigned LocalID = + Index + F.LocalBaseSubmoduleID + NUM_PREDEF_SUBMODULE_IDS; + ReadSubmodule(LocalID); + } + } else { + // If we didn't know this module, we loaded it transitively. Deserialize + // just the top-level module to register it with ModuleMap, but load the + // rest lazily. + ReadSubmodule(F.LocalTopLevelSubmoduleID); + } + + break; + } + case TYPE_OFFSET: { if (F.LocalNumTypes != 0) return llvm::createStringError( @@ -5087,41 +5137,6 @@ ASTReader::ASTReadResult ASTReader::ReadAST(ModuleFileName FileName, F.ImportLoc = TranslateSourceLocation(*M.ImportedBy, M.ImportLoc); } - // Resolve any unresolved module exports. - for (unsigned I = 0, N = UnresolvedModuleRefs.size(); I != N; ++I) { - UnresolvedModuleRef &Unresolved = UnresolvedModuleRefs[I]; - SubmoduleID GlobalID = getGlobalSubmoduleID(*Unresolved.File,Unresolved.ID); - Module *ResolvedMod = getSubmodule(GlobalID); - - switch (Unresolved.Kind) { - case UnresolvedModuleRef::Conflict: - if (ResolvedMod) { - Module::Conflict Conflict; - Conflict.Other = ResolvedMod; - Conflict.Message = Unresolved.String.str(); - Unresolved.Mod->Conflicts.push_back(Conflict); - } - continue; - - case UnresolvedModuleRef::Import: - if (ResolvedMod) - Unresolved.Mod->Imports.insert(ResolvedMod); - continue; - - case UnresolvedModuleRef::Affecting: - if (ResolvedMod) - Unresolved.Mod->AffectingClangModules.insert(ResolvedMod); - continue; - - case UnresolvedModuleRef::Export: - if (ResolvedMod || Unresolved.IsWildcard) - Unresolved.Mod->Exports.push_back(Module::ExportDecl( - ResolvedMod, static_cast<bool>(Unresolved.IsWildcard))); - continue; - } - } - UnresolvedModuleRefs.clear(); - // FIXME: How do we load the 'use'd modules? They may not be submodules. // Might be unnecessary as use declarations are only used to build the // module itself. @@ -6277,11 +6292,34 @@ bool ASTReader::isAcceptableASTFile( /*ValidateDiagnosticOptions=*/true); } -llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, - unsigned ClientLoadCapabilities) { - // Enter the submodule block. - if (llvm::Error Err = F.Stream.EnterSubBlock(SUBMODULE_BLOCK_ID)) - return Err; +Module *ASTReader::getSubmodule(uint32_t GlobalID) { + if (GlobalID < NUM_PREDEF_SUBMODULE_IDS) { + assert(GlobalID == 0 && "Unhandled global submodule ID"); + return nullptr; + } + + if (GlobalID > SubmodulesLoaded.size()) { + Error("submodule ID out of range in AST file"); + return nullptr; + } + + SubmoduleID GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS; + if (GlobalIndex < SubmodulesLoaded.size() && SubmodulesLoaded[GlobalIndex]) + return SubmodulesLoaded[GlobalIndex]; + + GlobalSubmoduleMapType::iterator It = GlobalSubmoduleMap.find(GlobalID); + assert(It != GlobalSubmoduleMap.end()); + ModuleFile &F = *It->second; + unsigned Index = GlobalID - F.BaseSubmoduleID - NUM_PREDEF_SELECTOR_IDS; + unsigned LocalID = Index + F.LocalBaseSubmoduleID + NUM_PREDEF_SUBMODULE_IDS; + + BitstreamCursor &Cursor = F.SubmodulesCursor; + SavedStreamPosition SavedPosition(Cursor); + unsigned Offset = F.SubmoduleOffsets[Index]; + if (llvm::Error Err = Cursor.JumpToBit(F.SubmodulesOffsetBase + Offset)) { + Error(std::move(Err)); + return nullptr; + } ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap(); bool KnowsTopLevelModule = ModMap.findModule(F.ModuleName) != nullptr; @@ -6292,23 +6330,24 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, ? &ModuleMap::createModule : &ModuleMap::findOrCreateModuleFirst; - bool First = true; Module *CurrentModule = nullptr; RecordData Record; while (true) { - Expected<llvm::BitstreamEntry> MaybeEntry = - F.Stream.advanceSkippingSubblocks(); - if (!MaybeEntry) - return MaybeEntry.takeError(); + Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance(); + if (!MaybeEntry) { + Error(MaybeEntry.takeError()); + return nullptr; + } llvm::BitstreamEntry Entry = MaybeEntry.get(); switch (Entry.Kind) { - case llvm::BitstreamEntry::SubBlock: // Handled for us already. + case llvm::BitstreamEntry::SubBlock: case llvm::BitstreamEntry::Error: - return llvm::createStringError(std::errc::illegal_byte_sequence, - "malformed block record in AST file"); - case llvm::BitstreamEntry::EndBlock: - return llvm::Error::success(); + case llvm::BitstreamEntry::EndBlock: { + Error(llvm::createStringError(std::errc::illegal_byte_sequence, + "malformed block record in AST file")); + return nullptr; + } case llvm::BitstreamEntry::Record: // The interesting case. break; @@ -6317,35 +6356,35 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, // Read a record. StringRef Blob; Record.clear(); - Expected<unsigned> MaybeKind = F.Stream.readRecord(Entry.ID, Record, &Blob); - if (!MaybeKind) - return MaybeKind.takeError(); - unsigned Kind = MaybeKind.get(); - - if ((Kind == SUBMODULE_METADATA) != First) - return llvm::createStringError( - std::errc::illegal_byte_sequence, - "submodule metadata record should be at beginning of block"); - First = false; - - // Submodule information is only valid if we have a current module. - // FIXME: Should we error on these cases? - if (!CurrentModule && Kind != SUBMODULE_METADATA && - Kind != SUBMODULE_DEFINITION) - continue; + Expected<unsigned> MaybeKind = Cursor.readRecord(Entry.ID, Record, &Blob); + if (!MaybeKind) { + Error(MaybeKind.takeError()); + return nullptr; + } + auto Kind = static_cast<SubmoduleRecordTypes>(MaybeKind.get()); switch (Kind) { - default: // Default behavior: ignore. - break; + case SUBMODULE_END: + if (!CurrentModule) { + Error(llvm::createStringError(std::errc::illegal_byte_sequence, + "malformed module definition")); + return nullptr; + } + return CurrentModule; case SUBMODULE_DEFINITION: { - if (Record.size() < 13) - return llvm::createStringError(std::errc::illegal_byte_sequence, - "malformed module definition"); + if (Record.size() < 13) { + Error(llvm::createStringError(std::errc::illegal_byte_sequence, + "malformed module definition")); + return nullptr; + } StringRef Name = Blob; unsigned Idx = 0; - SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[Idx++]); + unsigned ReadLocalID = Record[Idx++]; + assert(LocalID == ReadLocalID); + SubmoduleID ReadGlobalID = getGlobalSubmoduleID(F, ReadLocalID); + assert(GlobalID == ReadGlobalID); SubmoduleID Parent = getGlobalSubmoduleID(F, Record[Idx++]); Module::ModuleKind Kind = (Module::ModuleKind)Record[Idx++]; SourceLocation DefinitionLoc = ReadSourceLocation(F, Record[Idx++]); @@ -6362,18 +6401,15 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, bool NamedModuleHasInit = Record[Idx++]; Module *ParentModule = nullptr; - if (Parent) + if (Parent) { ParentModule = getSubmodule(Parent); + if (!ParentModule) + return nullptr; + } CurrentModule = std::invoke(CreateModule, &ModMap, Name, ParentModule, IsFramework, IsExplicit); - SubmoduleID GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS; - if (GlobalIndex >= SubmodulesLoaded.size() || - SubmodulesLoaded[GlobalIndex]) - return llvm::createStringError(std::errc::invalid_argument, - "too many submodules"); - if (!ParentModule) { if ([[maybe_unused]] const ModuleFileKey *CurFileKey = CurrentModule->getASTFileKey()) { @@ -6394,7 +6430,7 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, Diag(diag::note_module_file_conflict) << CurModMapFile->getName() << ModMapFile->getName(); - return llvm::make_error<AlreadyReportedDiagnosticError>(); + return nullptr; } } @@ -6504,59 +6540,29 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, break; } - case SUBMODULE_METADATA: { - F.BaseSubmoduleID = getTotalNumSubmodules(); - F.LocalNumSubmodules = Record[0]; - unsigned LocalBaseSubmoduleID = Record[1]; - if (F.LocalNumSubmodules > 0) { - // Introduce the global -> local mapping for submodules within this - // module. - GlobalSubmoduleMap.insert(std::make_pair(getTotalNumSubmodules()+1,&F)); - - // Introduce the local -> global mapping for submodules within this - // module. - F.SubmoduleRemap.insertOrReplace( - std::make_pair(LocalBaseSubmoduleID, - F.BaseSubmoduleID - LocalBaseSubmoduleID)); - - SubmodulesLoaded.resize(SubmodulesLoaded.size() + F.LocalNumSubmodules); - } - break; - } - case SUBMODULE_IMPORTS: for (unsigned Idx = 0; Idx != Record.size(); ++Idx) { - UnresolvedModuleRef Unresolved; - Unresolved.File = &F; - Unresolved.Mod = CurrentModule; - Unresolved.ID = Record[Idx]; - Unresolved.Kind = UnresolvedModuleRef::Import; - Unresolved.IsWildcard = false; - UnresolvedModuleRefs.push_back(Unresolved); + SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[Idx]); + CurrentModule->Imports.push_back(ModuleRef(this, GlobalID)); } break; case SUBMODULE_AFFECTING_MODULES: for (unsigned Idx = 0; Idx != Record.size(); ++Idx) { - UnresolvedModuleRef Unresolved; - Unresolved.File = &F; - Unresolved.Mod = CurrentModule; - Unresolved.ID = Record[Idx]; - Unresolved.Kind = UnresolvedModuleRef::Affecting; - Unresolved.IsWildcard = false; - UnresolvedModuleRefs.push_back(Unresolved); + SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[Idx]); + CurrentModule->AffectingClangModules.push_back( + ModuleRef(this, GlobalID)); } break; case SUBMODULE_EXPORTS: for (unsigned Idx = 0; Idx + 1 < Record.size(); Idx += 2) { - UnresolvedModuleRef Unresolved; - Unresolved.File = &F; - Unresolved.Mod = CurrentModule; - Unresolved.ID = Record[Idx]; - Unresolved.Kind = UnresolvedModuleRef::Export; - Unresolved.IsWildcard = Record[Idx + 1]; - UnresolvedModuleRefs.push_back(Unresolved); + SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[Idx]); + bool IsWildcard = Record[Idx + 1]; + ModuleRef ExportedMod = + GlobalID ? ModuleRef(this, GlobalID) : ModuleRef(); + if (ExportedMod || IsWildcard) + CurrentModule->Exports.push_back({ExportedMod, IsWildcard}); } // Once we've loaded the set of exports, there's no reason to keep @@ -6580,14 +6586,11 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, break; case SUBMODULE_CONFLICT: { - UnresolvedModuleRef Unresolved; - Unresolved.File = &F; - Unresolved.Mod = CurrentModule; - Unresolved.ID = Record[0]; - Unresolved.Kind = UnresolvedModuleRef::Conflict; - Unresolved.IsWildcard = false; - Unresolved.String = Blob; - UnresolvedModuleRefs.push_back(Unresolved); + SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[0]); + Module::Conflict Conflict; + Conflict.Other = ModuleRef(this, GlobalID); + Conflict.Message = Blob.str(); + CurrentModule->Conflicts.push_back(Conflict); break; } @@ -6608,6 +6611,13 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, CurrentModule->ExportAsModule = Blob.str(); ModMap.addLinkAsDependency(CurrentModule); break; + + case SUBMODULE_CHILD: { + // Record a not-yet-loaded direct child for on-demand deserialization. + SubmoduleID GlobalID = getGlobalSubmoduleID(F, Record[0]); + CurrentModule->addSubmodule(Blob, this, GlobalID); + break; + } } } } @@ -10045,20 +10055,6 @@ ASTReader::getGlobalSubmoduleID(ModuleFile &M, unsigned LocalID) const { return LocalID + I->second; } -Module *ASTReader::getSubmodule(SubmoduleID GlobalID) { - if (GlobalID < NUM_PREDEF_SUBMODULE_IDS) { - assert(GlobalID == 0 && "Unhandled global submodule ID"); - return nullptr; - } - - if (GlobalID > SubmodulesLoaded.size()) { - Error("submodule ID out of range in AST file"); - return nullptr; - } - - return SubmodulesLoaded[GlobalID - NUM_PREDEF_SUBMODULE_IDS]; -} - Module *ASTReader::getModule(unsigned ID) { return getSubmodule(ID); } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index ba644fefc109a..1970ed86589b5 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -919,6 +919,7 @@ void ASTWriter::WriteBlockInfoBlock() { // AST Top-Level Block. BLOCK(AST_BLOCK); + RECORD(SUBMODULE_METADATA); RECORD(TYPE_OFFSET); RECORD(DECL_OFFSET); RECORD(IDENTIFIER_OFFSET); @@ -997,7 +998,7 @@ void ASTWriter::WriteBlockInfoBlock() { // Submodule Block. BLOCK(SUBMODULE_BLOCK); - RECORD(SUBMODULE_METADATA); + RECORD(SUBMODULE_END); RECORD(SUBMODULE_DEFINITION); RECORD(SUBMODULE_UMBRELLA_HEADER); RECORD(SUBMODULE_HEADER); @@ -1016,6 +1017,7 @@ void ASTWriter::WriteBlockInfoBlock() { RECORD(SUBMODULE_PRIVATE_TEXTUAL_HEADER); RECORD(SUBMODULE_INITIALIZERS); RECORD(SUBMODULE_EXPORT_AS); + RECORD(SUBMODULE_CHILD); // Comments Block. BLOCK(COMMENTS_BLOCK); @@ -2983,16 +2985,6 @@ unsigned ASTWriter::getSubmoduleID(Module *Mod) { return ID; } -/// Compute the number of modules within the given tree (including the -/// given module). -static unsigned getNumberOfModules(Module *Mod) { - unsigned ChildModules = 0; - for (Module *Submodule : Mod->submodules()) - ChildModules += getNumberOfModules(Submodule); - - return ChildModules + 1; -} - void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext *Context) { // Enter the submodule description block. Stream.EnterSubblock(SUBMODULE_BLOCK_ID, /*bits for abbreviations*/5); @@ -3088,11 +3080,16 @@ void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext *Context) { Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Macro name unsigned ExportAsAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); - // Write the submodule metadata block. - RecordData::value_type Record[] = { - getNumberOfModules(WritingModule), - FirstSubmoduleID - NUM_PREDEF_SUBMODULE_IDS}; - Stream.EmitRecord(SUBMODULE_METADATA, Record); + Abbrev = std::make_shared<BitCodeAbbrev>(); + Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_CHILD)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Child submodule ID + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Child name + unsigned ChildAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); + + SmallVector<uint64_t> SubmoduleOffsets; + uint64_t SubmoduleOffsetBase = Stream.GetCurrentBitNo(); + + unsigned TopLevelID = getSubmoduleID(WritingModule); // Write all of the submodules. std::queue<Module *> Q; @@ -3101,6 +3098,19 @@ void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext *Context) { Module *Mod = Q.front(); Q.pop(); unsigned ID = getSubmoduleID(Mod); + if (ID < FirstSubmoduleID) { + assert(0 && "Loaded submodule entered WritingModule ?"); + continue; + } + + // Record the local offset of this submodule. + unsigned Index = ID - FirstSubmoduleID; + if (Index >= SubmoduleOffsets.size()) + SubmoduleOffsets.resize(Index + 1); + + uint64_t Offset = Stream.GetCurrentBitNo() - SubmoduleOffsetBase; + assert((Offset >> 32) == 0 && "Submodule offset too large"); + SubmoduleOffsets[Index] = Offset; uint64_t ParentID = 0; if (Mod->Parent) { @@ -3259,6 +3269,20 @@ void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext *Context) { Stream.EmitRecordWithBlob(ExportAsAbbrev, Record, Mod->ExportAsModule); } + // Emit one SUBMODULE_CHILD record per direct child so the reader can + // populate PendingSubmodules and demand-load children by name. + for (Module *Child : Mod->submodules()) { + RecordData::value_type Record[] = {SUBMODULE_CHILD, + getSubmoduleID(Child)}; + Stream.EmitRecordWithBlob(ChildAbbrev, Record, Child->Name); + } + + // Emit the sentinel signifying the end of this submodule. + { + RecordData Record; + Stream.EmitRecord(SUBMODULE_END, Record); + } + // Queue up the submodules of this module. for (Module *M : Mod->submodules()) Q.push(M); @@ -3266,10 +3290,23 @@ void ASTWriter::WriteSubmodules(Module *WritingModule, ASTContext *Context) { Stream.ExitBlock(); - assert((NextSubmoduleID - FirstSubmoduleID == - getNumberOfModules(WritingModule)) && + assert((NextSubmoduleID - FirstSubmoduleID == SubmoduleOffsets.size()) && "Wrong # of submodules; found a reference to a non-local, " "non-imported submodule?"); + + Abbrev = std::make_shared<BitCodeAbbrev>(); + Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_METADATA)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Submodule count + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Base submodule ID + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Top-level submod ID + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Submodule offsets + unsigned SubmoduleMetadataAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); + + RecordData::value_type Record[] = { + SUBMODULE_METADATA, SubmoduleOffsets.size(), + FirstSubmoduleID - NUM_PREDEF_SUBMODULE_IDS, TopLevelID}; + Stream.EmitRecordWithBlob(SubmoduleMetadataAbbrev, Record, + bytes(SubmoduleOffsets)); } void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag, >From a3113a8cf5388aa8ff87c08fe9a9366ba4c4e828 Mon Sep 17 00:00:00 2001 From: Jan Svoboda <[email protected]> Date: Wed, 6 May 2026 14:17:18 -0700 Subject: [PATCH 2/4] NUM_PREDEF_SELECTOR_IDS -> NUM_PREDEF_SUBMODULE_IDS --- clang/lib/Serialization/ASTReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index e44cee65ce517..2b7acfd54a357 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -6310,7 +6310,7 @@ Module *ASTReader::getSubmodule(uint32_t GlobalID) { GlobalSubmoduleMapType::iterator It = GlobalSubmoduleMap.find(GlobalID); assert(It != GlobalSubmoduleMap.end()); ModuleFile &F = *It->second; - unsigned Index = GlobalID - F.BaseSubmoduleID - NUM_PREDEF_SELECTOR_IDS; + unsigned Index = GlobalID - F.BaseSubmoduleID - NUM_PREDEF_SUBMODULE_IDS; unsigned LocalID = Index + F.LocalBaseSubmoduleID + NUM_PREDEF_SUBMODULE_IDS; BitstreamCursor &Cursor = F.SubmodulesCursor; >From 3abdf06f2f0b3b1f2783431c38b9223d0450f2ec Mon Sep 17 00:00:00 2001 From: Jan Svoboda <[email protected]> Date: Wed, 6 May 2026 14:17:38 -0700 Subject: [PATCH 3/4] GlobalID -> GlobalIndex --- clang/lib/Serialization/ASTReader.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 2b7acfd54a357..98d36282e43be 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -6298,13 +6298,13 @@ Module *ASTReader::getSubmodule(uint32_t GlobalID) { return nullptr; } - if (GlobalID > SubmodulesLoaded.size()) { + SubmoduleID GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS; + if (GlobalIndex >= SubmodulesLoaded.size()) { Error("submodule ID out of range in AST file"); return nullptr; } - SubmoduleID GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS; - if (GlobalIndex < SubmodulesLoaded.size() && SubmodulesLoaded[GlobalIndex]) + if (SubmodulesLoaded[GlobalIndex]) return SubmodulesLoaded[GlobalIndex]; GlobalSubmoduleMapType::iterator It = GlobalSubmoduleMap.find(GlobalID); >From 0054b3a346f6f8e5a97d2ad237ae789730d3e3f8 Mon Sep 17 00:00:00 2001 From: Jan Svoboda <[email protected]> Date: Wed, 6 May 2026 14:48:22 -0700 Subject: [PATCH 4/4] ModuleRef documentation, don't clear ExternalSource --- clang/include/clang/Basic/Module.h | 41 ++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index 14200b50569c9..3fd6bfa063af4 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -230,34 +230,55 @@ struct ModuleAttributes { NoUndeclaredIncludes(false) {} }; -/// A reference to either a fully materialized Module object, or -/// a yet-to-be-deserialized submodule in an AST file. +/// Reference to a module that consists of either an existing/materialized +/// Module object, reference to a serialized submodule record, both, or +/// neither (null). class ModuleRef { + /// The existing/materialized Module object. mutable Module *Existing = nullptr; - mutable ExternalSubmoduleSource *ExternalSource = nullptr; + + /// The external submodule source (i.e. \c ASTReader), and a boolean + /// signifying whether it's already been used to deserialize \c SubmoduleID. + mutable llvm::PointerIntPair<ExternalSubmoduleSource *, 1, bool> + ExternalSource = {nullptr, false}; + + /// Identifier of the external submodule in \c ExternalSource. mutable uint64_t SubmoduleID = 0; public: + /// Create an empty reference. ModuleRef() = default; + + /// Create reference to a materialized module. ModuleRef(Module *M) : Existing(M) {} + + /// Create reference to a serialized submodule record. ModuleRef(ExternalSubmoduleSource *ExtSrc, uint64_t SubmoduleID) - : ExternalSource(ExtSrc), SubmoduleID(SubmoduleID) {} + : ExternalSource(ExtSrc, false), SubmoduleID(SubmoduleID) {} + /// Get the existing/materialized module, if there's any. Module *getExisting() const { return Existing; } + /// Add the existing/materialized module. void setExisting(Module *E) { Existing = E; } + /// Add the serialized submodule record reference. void setExternal(ExternalSubmoduleSource *ExtSrc, uint64_t ID) { - ExternalSource = ExtSrc; + ExternalSource = {ExtSrc, false}; SubmoduleID = ID; } - operator bool() const { return Existing || (ExternalSource && SubmoduleID); } + /// Check whether this is a non-empty reference. + operator bool() const { + return Existing || (ExternalSource.getPointer() && SubmoduleID); + } + /// Get the existing/materialized module. Try materializing it on-demand from + /// the serialized submodule record if possible. operator Module *() const { - if (ExternalSource) { - Existing = ExternalSource->getSubmodule(SubmoduleID); - ExternalSource = nullptr; - SubmoduleID = 0; + if (!ExternalSource.getInt() && ExternalSource.getPointer() && + SubmoduleID) { + Existing = ExternalSource.getPointer()->getSubmodule(SubmoduleID); + ExternalSource.setInt(true); } return Existing; } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
