Author: Jan Svoboda Date: 2026-05-18T08:34:19-07:00 New Revision: 847e1acb50c14f93a8c1ae7c663b66840bf06ffa
URL: https://github.com/llvm/llvm-project/commit/847e1acb50c14f93a8c1ae7c663b66840bf06ffa DIFF: https://github.com/llvm/llvm-project/commit/847e1acb50c14f93a8c1ae7c663b66840bf06ffa.diff LOG: [clang][modules] Remove `ModuleManager` in-memory buffers (#194753) The last remaining undesirable `FileManager` usage in `ModuleManager::addModule()` is the lookup in the in-memory buffers map. This PR merges that functionality with the `InMemoryModuleCache`, simplifying the code and removing unnecessary file system access via `ModuleManager::lookupBuffer()`. This is also useful downstream, where we already use the `InMemoryModuleCache` for PCMs loaded from CAS, but don't have an explicit way to represent the PCM location in `ModuleFile{Name,Key}`. Added: Modified: clang/include/clang/Basic/Module.h clang/include/clang/Serialization/ASTReader.h clang/include/clang/Serialization/ModuleManager.h clang/lib/Frontend/ASTUnit.cpp clang/lib/Frontend/ChainedIncludesSource.cpp clang/lib/Serialization/ASTReader.cpp clang/lib/Serialization/ASTWriter.cpp clang/lib/Serialization/ModuleManager.cpp Removed: ################################################################################ diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index 3fd6bfa063af4..96a22435bc567 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -74,6 +74,8 @@ using ModuleId = SmallVector<std::pair<std::string, SourceLocation>, 2>; /// This uses \c FileManager's inode-based canonicalization of the user-provided /// module file path. Because input explicitly-built modules do not change /// during the lifetime of the compiler, inode recycling is not of concern here. +/// +/// For in-memory modules, this is the \c MemoryBuffer in InMemoryModuleCache. class ModuleFileKey { /// The entity used for deduplication. const void *Ptr; @@ -105,17 +107,42 @@ class ModuleFileKey { /// path and the module file name with the (optional) context hash. For all /// other types of module files, this is just the file system path. class ModuleFileName { + enum Kind : unsigned { + InMemory = 0, + Explicit = 1, + ImplicitSuffixLength, + }; + std::string Path; - unsigned ImplicitModuleSuffixLength = 0; + /// The kind of the module file (\c Kind), or the length of the implicit + /// module file name suffix in \c Path (integer values 2+). + Kind KindOrSuffixLength; public: /// Creates an empty module file name. ModuleFileName() = default; + /// Creates a file name from the raw kind value. + static ModuleFileName makeFromRaw(StringRef Name, unsigned RawKind) { + ModuleFileName File; + File.Path = Name; + File.KindOrSuffixLength = static_cast<Kind>(RawKind); + return File; + } + + /// Creates a file name for an in-memory module. + static ModuleFileName makeInMemory(StringRef Name) { + ModuleFileName File; + File.Path = Name; + File.KindOrSuffixLength = InMemory; + return File; + } + /// Creates a file name for an explicit module. static ModuleFileName makeExplicit(std::string Name) { ModuleFileName File; File.Path = std::move(Name); + File.KindOrSuffixLength = Explicit; return File; } @@ -126,12 +153,12 @@ class ModuleFileName { /// Creates a file name for an implicit module. static ModuleFileName makeImplicit(std::string Name, unsigned SuffixLength) { - assert(SuffixLength != 0 && "Empty suffix for implicit module file name"); + assert(SuffixLength >= 2 && "Invalid suffix for implicit module file name"); assert(SuffixLength <= Name.size() && "Suffix for implicit module file name out-of-bounds"); ModuleFileName File; File.Path = std::move(Name); - File.ImplicitModuleSuffixLength = SuffixLength; + File.KindOrSuffixLength = static_cast<Kind>(SuffixLength); return File; } @@ -142,9 +169,21 @@ class ModuleFileName { /// Returns the suffix length for an implicit module name, zero otherwise. unsigned getImplicitModuleSuffixLength() const { - return ImplicitModuleSuffixLength; + switch (KindOrSuffixLength) { + case InMemory: + case Explicit: + return 0; + default: + return KindOrSuffixLength; + } } + /// Returns \c true iff this is an in-memory module file, \c false otherwise. + bool isInMemory() const { return KindOrSuffixLength == InMemory; } + + /// Returns the raw value representing the kind of the module file. + unsigned getRawKind() const { return KindOrSuffixLength; } + /// Returns the plain module file name. StringRef str() const { return Path; } diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index bedac9f8a540a..03b8fa74a81fe 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -1966,12 +1966,6 @@ class ASTReader : public ExternalPreprocessorSource, /// Update the state of Sema after loading some additional modules. void UpdateSema(); - /// Add in-memory (virtual file) buffer. - void addInMemoryBuffer(StringRef &FileName, - std::unique_ptr<llvm::MemoryBuffer> Buffer) { - ModuleMgr.addInMemoryBuffer(FileName, std::move(Buffer)); - } - /// Finalizes the AST reader's state before writing an AST file to /// disk. /// diff --git a/clang/include/clang/Serialization/ModuleManager.h b/clang/include/clang/Serialization/ModuleManager.h index 6f7f657dfc23f..63dc1c25b469a 100644 --- a/clang/include/clang/Serialization/ModuleManager.h +++ b/clang/include/clang/Serialization/ModuleManager.h @@ -129,11 +129,6 @@ class ModuleManager { /// Preprocessor's HeaderSearchInfo containing the module map. const HeaderSearch &HeaderSearchInfo; - /// A lookup of in-memory (virtual file) buffers. - // FIXME: No need to key this by `FileEntry`. - llvm::DenseMap<const FileEntry *, std::unique_ptr<llvm::MemoryBuffer>> - InMemoryBuffers; - /// The visitation order. SmallVector<ModuleFile *, 4> VisitOrder; @@ -245,10 +240,6 @@ class ModuleManager { /// Returns the module associated with the given module file key. ModuleFile *lookup(ModuleFileKey Key) const; - /// Returns the in-memory (virtual file) buffer with the given name - std::unique_ptr<llvm::MemoryBuffer> lookupBuffer(StringRef Name, off_t &Size, - time_t &ModTime); - /// Number of modules loaded unsigned size() const { return Chain.size(); } @@ -292,10 +283,6 @@ class ModuleManager { /// Remove the modules starting from First (to the end). void removeModules(ModuleIterator First); - /// Add an in-memory buffer the list of known buffers - void addInMemoryBuffer(StringRef FileName, - std::unique_ptr<llvm::MemoryBuffer> Buffer); - /// Set the global module index. void setGlobalIndex(GlobalModuleIndex *Index); diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp index 83fe82365b008..898405b4e5809 100644 --- a/clang/lib/Frontend/ASTUnit.cpp +++ b/clang/lib/Frontend/ASTUnit.cpp @@ -60,6 +60,7 @@ #include "clang/Sema/SemaCodeCompletion.h" #include "clang/Serialization/ASTReader.h" #include "clang/Serialization/ASTWriter.h" +#include "clang/Serialization/InMemoryModuleCache.h" #include "clang/Serialization/ModuleCache.h" #include "clang/Serialization/ModuleFile.h" #include "clang/Serialization/PCHContainerOperations.h" @@ -823,6 +824,8 @@ std::unique_ptr<ASTUnit> ASTUnit::LoadFromASTFile( AST->LangOpts->CommentOpts); } + ModuleFileName ModuleFilename = ModuleFileName::makeExplicit(Filename); + // The temporary FileManager we used for ASTReader::readASTFileControlBlock() // might have already read stdin, and reading it again will fail. Let's // explicitly forward the buffer. @@ -831,16 +834,18 @@ std::unique_ptr<ASTUnit> ASTUnit::LoadFromASTFile( if (auto BufRef = TmpFileMgr.getBufferForFile(*FE)) { auto Buf = llvm::MemoryBuffer::getMemBufferCopy( (*BufRef)->getBuffer(), (*BufRef)->getBufferIdentifier()); - AST->Reader->getModuleManager().addInMemoryBuffer("-", std::move(Buf)); + off_t BufSize = Buf->getBufferSize(); + AST->ModCache->getInMemoryModuleCache().addBuiltPCM( + "-", std::move(Buf), BufSize, /*ModTime=*/0); + ModuleFilename = ModuleFileName::makeInMemory(Filename); } // Reinstate the provided options that are relevant for reading AST files. AST->HSOpts->ForceCheckCXX20ModulesInputFiles = HSOpts.ForceCheckCXX20ModulesInputFiles; - switch (AST->Reader->ReadAST(ModuleFileName::makeExplicit(Filename), - serialization::MK_MainFile, SourceLocation(), - ASTReader::ARR_None)) { + switch (AST->Reader->ReadAST(ModuleFilename, serialization::MK_MainFile, + SourceLocation(), ASTReader::ARR_None)) { case ASTReader::Success: break; diff --git a/clang/lib/Frontend/ChainedIncludesSource.cpp b/clang/lib/Frontend/ChainedIncludesSource.cpp index aa93785322a5c..7a552a572ca78 100644 --- a/clang/lib/Frontend/ChainedIncludesSource.cpp +++ b/clang/lib/Frontend/ChainedIncludesSource.cpp @@ -22,6 +22,8 @@ #include "clang/Sema/MultiplexExternalSemaSource.h" #include "clang/Serialization/ASTReader.h" #include "clang/Serialization/ASTWriter.h" +#include "clang/Serialization/InMemoryModuleCache.h" +#include "clang/Serialization/ModuleCache.h" #include "llvm/Support/MemoryBuffer.h" using namespace clang; @@ -65,11 +67,12 @@ createASTReader(CompilerInstance &CI, StringRef pchFile, /*Extensions=*/ArrayRef<std::shared_ptr<ModuleFileExtension>>(), /*isysroot=*/"", DisableValidationForModuleKind::PCH); for (unsigned ti = 0; ti < bufNames.size(); ++ti) { - StringRef sr(bufNames[ti]); - Reader->addInMemoryBuffer(sr, std::move(MemBufs[ti])); + off_t MemBufSize = MemBufs[ti]->getBufferSize(); + CI.getModuleCache().getInMemoryModuleCache().addBuiltPCM( + bufNames[ti], std::move(MemBufs[ti]), MemBufSize, /*ModTime=*/0); } Reader->setDeserializationListener(deserialListener); - switch (Reader->ReadAST(ModuleFileName::makeExplicit(pchFile), + switch (Reader->ReadAST(ModuleFileName::makeInMemory(pchFile), serialization::MK_PCH, SourceLocation(), ASTReader::ARR_None)) { case ASTReader::Success: diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 5425c1da7419b..f13b70d695b0d 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3499,7 +3499,7 @@ ASTReader::ReadControlBlock(ModuleFile &F, off_t StoredSize = 0; time_t StoredModTime = 0; - unsigned ImplicitModuleSuffixLength = 0; + unsigned FileNameKind = 0; ASTFileSignature StoredSignature; ModuleFileName ImportedFile; std::string StoredFile; @@ -3523,7 +3523,7 @@ ASTReader::ReadControlBlock(ModuleFile &F, if (!IsImportingStdCXXModule) { StoredSize = (off_t)Record[Idx++]; StoredModTime = (time_t)Record[Idx++]; - ImplicitModuleSuffixLength = (unsigned)Record[Idx++]; + FileNameKind = (unsigned)Record[Idx++]; StringRef SignatureBytes = Blob.substr(0, ASTFileSignature::size); StoredSignature = ASTFileSignature::create(SignatureBytes.begin(), @@ -3532,12 +3532,7 @@ ASTReader::ReadControlBlock(ModuleFile &F, StoredFile = ReadPathBlob(BaseDirectoryAsWritten, Record, Idx, Blob); if (ImportedFile.empty()) { - ImportedFile = ImplicitModuleSuffixLength - ? ModuleFileName::makeImplicit( - StoredFile, ImplicitModuleSuffixLength) - : ModuleFileName::makeExplicit(StoredFile); - assert((ImportedKind == MK_ImplicitModule) == - (ImplicitModuleSuffixLength != 0)); + ImportedFile = ModuleFileName::makeFromRaw(StoredFile, FileNameKind); } else if (!getDiags().isIgnored( diag::warn_module_file_mapping_mismatch, CurrentImportLoc)) { @@ -4691,6 +4686,8 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { Kind == MK_ImplicitModule ? ModuleMgr.lookupByModuleName(Name) : ModuleMgr.lookupByFileName(ModuleFileName::makeExplicit(Name))); + if (!OM) + OM = ModuleMgr.lookupByFileName(ModuleFileName::makeInMemory(Name)); if (!OM) { std::string Msg = "refers to unknown module, cannot find "; Msg.append(std::string(Name)); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index c0c4aa107e200..9dbbdc0249caf 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1570,7 +1570,7 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, StringRef isysroot) { Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Standard C++ mod Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // File size Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // File timestamp - Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Implicit suff len + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // File name raw kind Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // File name len Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Strings unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); @@ -1604,9 +1604,10 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, StringRef isysroot) { Record.push_back(M.Signature ? 0 : M.Size); Record.push_back(M.Signature ? 0 : getTimestampForOutput(M.ModTime)); + Record.push_back(M.FileName.getRawKind()); + llvm::append_range(Blob, M.Signature); - Record.push_back(M.FileName.getImplicitModuleSuffixLength()); AddPathBlob(M.FileName, Record, Blob); } diff --git a/clang/lib/Serialization/ModuleManager.cpp b/clang/lib/Serialization/ModuleManager.cpp index b1ec886704fe4..920cf07736407 100644 --- a/clang/lib/Serialization/ModuleManager.cpp +++ b/clang/lib/Serialization/ModuleManager.cpp @@ -48,6 +48,12 @@ ModuleManager::makeKey(const ModuleFileName &Name) const { StringRef ImplicitModuleSuffix = StringRef(Name).take_back(SuffixLen); if (auto *ModuleCacheDir = ModCache.getDirectoryPtr(ModuleCachePath)) return ModuleFileKey(ModuleCacheDir, ImplicitModuleSuffix); + } else if (Name.isInMemory()) { + off_t Size; + time_t ModTime; + if (auto *Buf = + ModCache.getInMemoryModuleCache().lookupPCM(Name, Size, ModTime)) + return ModuleFileKey(Buf); } else { if (auto ModuleFile = FileMgr.getOptionalFileRef(Name, /*OpenFile=*/true, /*CacheFailure=*/false, @@ -75,18 +81,6 @@ ModuleFile *ModuleManager::lookup(ModuleFileKey Key) const { return Modules.lookup(Key); } -std::unique_ptr<llvm::MemoryBuffer> -ModuleManager::lookupBuffer(StringRef Name, off_t &Size, time_t &ModTime) { - auto Entry = FileMgr.getOptionalFileRef(Name, /*OpenFile=*/false, - /*CacheFailure=*/false, - /*IsText=*/false); - if (!Entry) - return nullptr; - Size = Entry->getSize(); - ModTime = Entry->getModificationTime(); - return std::move(InMemoryBuffers[*Entry]); -} - bool ModuleManager::isModuleFileOutOfDate(off_t Size, time_t ModTime, off_t ExpectedSize, time_t ExpectedModTime, @@ -185,14 +179,9 @@ AddModuleResult ModuleManager::addModule( time_t ModTime = ExpectedModTime; llvm::MemoryBuffer *ModuleBuffer = nullptr; std::unique_ptr<llvm::MemoryBuffer> NewFileBuffer = nullptr; - if (std::unique_ptr<llvm::MemoryBuffer> Buffer = - lookupBuffer(FileName, Size, ModTime)) { - // The buffer was already provided for us. - ModuleBuffer = &getModuleCache().getInMemoryModuleCache().addBuiltPCM( - FileName, std::move(Buffer), Size, ModTime); - } else if (llvm::MemoryBuffer *Buffer = - getModuleCache().getInMemoryModuleCache().lookupPCM( - FileName, Size, ModTime)) { + if (llvm::MemoryBuffer *Buffer = + getModuleCache().getInMemoryModuleCache().lookupPCM(FileName, Size, + ModTime)) { ModuleBuffer = Buffer; } else if (getModuleCache().getInMemoryModuleCache().shouldBuildPCM( FileName)) { @@ -205,6 +194,8 @@ AddModuleResult ModuleManager::addModule( return Result; } else { auto Buf = [&]() -> Expected<std::unique_ptr<llvm::MemoryBuffer>> { + assert(!FileName.isInMemory() && "In-memory module not found"); + // Implicit modules live in the module cache. if (FileName.getImplicitModuleSuffixLength()) return ModCache.read(FileName, Size, ModTime); @@ -326,14 +317,6 @@ void ModuleManager::removeModules(ModuleIterator First) { Chain.erase(Chain.begin() + (First - begin()), Chain.end()); } -void -ModuleManager::addInMemoryBuffer(StringRef FileName, - std::unique_ptr<llvm::MemoryBuffer> Buffer) { - FileEntryRef Entry = - FileMgr.getVirtualFileRef(FileName, Buffer->getBufferSize(), 0); - InMemoryBuffers[Entry] = std::move(Buffer); -} - std::unique_ptr<ModuleManager::VisitState> ModuleManager::allocateVisitState() { // Fast path: if we have a cached state, use it. if (FirstVisitState) { _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
