https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/190207
>From 8e6d7ada504e38f138d28e5101d051fbcb3d0e9c Mon Sep 17 00:00:00 2001 From: Jan Svoboda <[email protected]> Date: Fri, 20 Mar 2026 21:09:27 -0700 Subject: [PATCH] [clang] Store size & mtime in in-memory module cache --- .../clang/Serialization/InMemoryModuleCache.h | 25 +++++++++---- .../include/clang/Serialization/ModuleCache.h | 7 ++-- .../clang/Serialization/ModuleManager.h | 6 ++-- .../InProcessModuleCache.cpp | 5 +-- clang/lib/Frontend/CompilerInstance.cpp | 8 +++-- clang/lib/Serialization/ASTReader.cpp | 4 ++- .../lib/Serialization/InMemoryModuleCache.cpp | 18 +++++++--- clang/lib/Serialization/ModuleCache.cpp | 16 +++++++-- clang/lib/Serialization/ModuleManager.cpp | 33 +++++------------ .../Serialization/InMemoryModuleCacheTest.cpp | 36 +++++++++++-------- 10 files changed, 95 insertions(+), 63 deletions(-) diff --git a/clang/include/clang/Serialization/InMemoryModuleCache.h b/clang/include/clang/Serialization/InMemoryModuleCache.h index fc3ba334fc64d..5e3fc19c48ff0 100644 --- a/clang/include/clang/Serialization/InMemoryModuleCache.h +++ b/clang/include/clang/Serialization/InMemoryModuleCache.h @@ -28,16 +28,24 @@ namespace clang { /// each \a ModuleManager sees the same files. class InMemoryModuleCache : public llvm::RefCountedBase<InMemoryModuleCache> { struct PCM { + /// The contents of the PCM as produced by \c ASTWriter. std::unique_ptr<llvm::MemoryBuffer> Buffer; + /// The size of this PCM. This may be different from the size of \c Buffer + /// when it's wrapped in an object file. + off_t Size = 0; + + /// The modification time of this PCM. + time_t ModTime = 0; + /// Track whether this PCM is known to be good (either built or /// successfully imported by a CompilerInstance/ASTReader using this /// cache). bool IsFinal = false; PCM() = default; - PCM(std::unique_ptr<llvm::MemoryBuffer> Buffer) - : Buffer(std::move(Buffer)) {} + PCM(std::unique_ptr<llvm::MemoryBuffer> Buffer, off_t Size, time_t ModTime) + : Buffer(std::move(Buffer)), Size(Size), ModTime(ModTime) {} }; /// Cache of buffers. @@ -64,7 +72,8 @@ class InMemoryModuleCache : public llvm::RefCountedBase<InMemoryModuleCache> { /// \post state is Tentative /// \return a reference to the buffer as a convenience. llvm::MemoryBuffer &addPCM(llvm::StringRef Filename, - std::unique_ptr<llvm::MemoryBuffer> Buffer); + std::unique_ptr<llvm::MemoryBuffer> Buffer, + off_t Size, time_t ModTime); /// Store a just-built PCM under the Filename. /// @@ -72,7 +81,8 @@ class InMemoryModuleCache : public llvm::RefCountedBase<InMemoryModuleCache> { /// \pre state is not Tentative. /// \return a reference to the buffer as a convenience. llvm::MemoryBuffer &addBuiltPCM(llvm::StringRef Filename, - std::unique_ptr<llvm::MemoryBuffer> Buffer); + std::unique_ptr<llvm::MemoryBuffer> Buffer, + off_t Size, time_t ModTime); /// Try to remove a buffer from the cache. No effect if state is Final. /// @@ -87,8 +97,11 @@ class InMemoryModuleCache : public llvm::RefCountedBase<InMemoryModuleCache> { /// \post state is Final. void finalizePCM(llvm::StringRef Filename); - /// Get a pointer to the pCM if it exists; else nullptr. - llvm::MemoryBuffer *lookupPCM(llvm::StringRef Filename) const; + /// Get a pointer to the PCM if it exists and set \c Size and \c ModTime to + /// its on-disk size and modification time. Otherwise, return nullptr and + /// don't change \c Size and \c ModTime. + llvm::MemoryBuffer *lookupPCM(llvm::StringRef Filename, off_t &Size, + time_t &ModTime) const; /// Check whether the PCM is final and has been shown to work. /// diff --git a/clang/include/clang/Serialization/ModuleCache.h b/clang/include/clang/Serialization/ModuleCache.h index 6683511b56a05..0be884133ace8 100644 --- a/clang/include/clang/Serialization/ModuleCache.h +++ b/clang/include/clang/Serialization/ModuleCache.h @@ -58,8 +58,8 @@ class ModuleCache { virtual const InMemoryModuleCache &getInMemoryModuleCache() const = 0; /// Write the PCM contents to the given path in the module cache. - virtual std::error_code write(StringRef Path, - llvm::MemoryBufferRef Buffer) = 0; + virtual std::error_code write(StringRef Path, llvm::MemoryBufferRef Buffer, + off_t &Size, time_t &ModTime) = 0; virtual Expected<std::unique_ptr<llvm::MemoryBuffer>> read(StringRef FileName, off_t &Size, time_t &ModTime) = 0; @@ -77,7 +77,8 @@ std::shared_ptr<ModuleCache> createCrossProcessModuleCache(); void maybePruneImpl(StringRef Path, time_t PruneInterval, time_t PruneAfter); /// Shared implementation of `ModuleCache::write()`. -std::error_code writeImpl(StringRef Path, llvm::MemoryBufferRef Buffer); +std::error_code writeImpl(StringRef Path, llvm::MemoryBufferRef Buffer, + off_t &Size, time_t &ModTime); /// Shared implementation of `ModuleCache::read()`. Expected<std::unique_ptr<llvm::MemoryBuffer>> diff --git a/clang/include/clang/Serialization/ModuleManager.h b/clang/include/clang/Serialization/ModuleManager.h index 1ef9aeee7e1fd..80f43ea922a17 100644 --- a/clang/include/clang/Serialization/ModuleManager.h +++ b/clang/include/clang/Serialization/ModuleManager.h @@ -73,7 +73,8 @@ class ModuleManager { /// Preprocessor's HeaderSearchInfo containing the module map. const HeaderSearch &HeaderSearchInfo; - /// A lookup of in-memory (virtual file) buffers + /// A lookup of in-memory (virtual file) buffers. + // FIXME: No need to key this by `FileEntry`. llvm::DenseMap<const FileEntry *, std::unique_ptr<llvm::MemoryBuffer>> InMemoryBuffers; @@ -182,7 +183,8 @@ class ModuleManager { ModuleFile *lookup(ModuleFileKey Key) const; /// Returns the in-memory (virtual file) buffer with the given name - std::unique_ptr<llvm::MemoryBuffer> lookupBuffer(StringRef Name); + std::unique_ptr<llvm::MemoryBuffer> lookupBuffer(StringRef Name, off_t &Size, + time_t &ModTime); /// Number of modules loaded unsigned size() const { return Chain.size(); } diff --git a/clang/lib/DependencyScanning/InProcessModuleCache.cpp b/clang/lib/DependencyScanning/InProcessModuleCache.cpp index 0565f5eebfe04..6ef20a8806b8c 100644 --- a/clang/lib/DependencyScanning/InProcessModuleCache.cpp +++ b/clang/lib/DependencyScanning/InProcessModuleCache.cpp @@ -134,13 +134,14 @@ class InProcessModuleCache : public ModuleCache { return InMemory; } - std::error_code write(StringRef Path, llvm::MemoryBufferRef Buffer) override { + std::error_code write(StringRef Path, llvm::MemoryBufferRef Buffer, + off_t &Size, time_t &ModTime) override { // This is a compiler-internal input/output, let's bypass the sandbox. auto BypassSandbox = llvm::sys::sandbox::scopedDisable(); // FIXME: This could use an in-memory cache to avoid IO, and only write to // disk at the end of the scan. - return writeImpl(Path, Buffer); + return writeImpl(Path, Buffer, Size, ModTime); } Expected<std::unique_ptr<llvm::MemoryBuffer>> diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index a504cde306a35..19ee7a01d7974 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -1470,8 +1470,10 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance, } } - std::error_code EC = - ImportingInstance.getModuleCache().write(ModuleFileName, *Buffer); + off_t Size; + time_t ModTime; + std::error_code EC = ImportingInstance.getModuleCache().write( + ModuleFileName, *Buffer, Size, ModTime); if (EC) { ImportingInstance.getDiagnostics().Report(ModuleNameLoc, diag::err_module_not_written) @@ -1498,7 +1500,7 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance, Buffer = llvm::MemoryBuffer::getMemBufferCopy(ExtractedBuffer); ImportingInstance.getModuleCache().getInMemoryModuleCache().addBuiltPCM( - ModuleFileName, std::move(Buffer)); + ModuleFileName, std::move(Buffer), Size, ModTime); return true; } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index b211b0d32e1de..45a0feb99f54f 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -5934,9 +5934,11 @@ bool ASTReader::readASTFileControlBlock( ASTReaderListener &Listener, bool ValidateDiagnosticOptions, unsigned ClientLoadCapabilities) { // Open the AST file. + off_t Size; + time_t ModTime; std::unique_ptr<llvm::MemoryBuffer> OwnedBuffer; llvm::MemoryBuffer *Buffer = - ModCache.getInMemoryModuleCache().lookupPCM(Filename); + ModCache.getInMemoryModuleCache().lookupPCM(Filename, Size, ModTime); if (!Buffer) { // FIXME: We should add the pcm to the InMemoryModuleCache if it could be // read again later, but we do not have the context here to determine if it diff --git a/clang/lib/Serialization/InMemoryModuleCache.cpp b/clang/lib/Serialization/InMemoryModuleCache.cpp index d35fa2a807f4d..dcd6395434c16 100644 --- a/clang/lib/Serialization/InMemoryModuleCache.cpp +++ b/clang/lib/Serialization/InMemoryModuleCache.cpp @@ -23,28 +23,36 @@ InMemoryModuleCache::getPCMState(llvm::StringRef Filename) const { llvm::MemoryBuffer & InMemoryModuleCache::addPCM(llvm::StringRef Filename, - std::unique_ptr<llvm::MemoryBuffer> Buffer) { - auto Insertion = PCMs.insert(std::make_pair(Filename, std::move(Buffer))); + std::unique_ptr<llvm::MemoryBuffer> Buffer, + off_t Size, time_t ModTime) { + auto Insertion = PCMs.insert( + std::make_pair(Filename, PCM(std::move(Buffer), Size, ModTime))); assert(Insertion.second && "Already has a PCM"); return *Insertion.first->second.Buffer; } llvm::MemoryBuffer & InMemoryModuleCache::addBuiltPCM(llvm::StringRef Filename, - std::unique_ptr<llvm::MemoryBuffer> Buffer) { + std::unique_ptr<llvm::MemoryBuffer> Buffer, + off_t Size, time_t ModTime) { auto &PCM = PCMs[Filename]; assert(!PCM.IsFinal && "Trying to override finalized PCM?"); assert(!PCM.Buffer && "Trying to override tentative PCM?"); PCM.Buffer = std::move(Buffer); + PCM.Size = Size; + PCM.ModTime = ModTime; PCM.IsFinal = true; return *PCM.Buffer; } -llvm::MemoryBuffer * -InMemoryModuleCache::lookupPCM(llvm::StringRef Filename) const { +llvm::MemoryBuffer *InMemoryModuleCache::lookupPCM(llvm::StringRef Filename, + off_t &Size, + time_t &ModTime) const { auto I = PCMs.find(Filename); if (I == PCMs.end()) return nullptr; + Size = I->second.Size; + ModTime = I->second.ModTime; return I->second.Buffer.get(); } diff --git a/clang/lib/Serialization/ModuleCache.cpp b/clang/lib/Serialization/ModuleCache.cpp index e82875f6f2831..36ae19aff466b 100644 --- a/clang/lib/Serialization/ModuleCache.cpp +++ b/clang/lib/Serialization/ModuleCache.cpp @@ -112,7 +112,8 @@ void clang::maybePruneImpl(StringRef Path, time_t PruneInterval, } } -std::error_code clang::writeImpl(StringRef Path, llvm::MemoryBufferRef Buffer) { +std::error_code clang::writeImpl(StringRef Path, llvm::MemoryBufferRef Buffer, + off_t &Size, time_t &ModTime) { StringRef Extension = llvm::sys::path::extension(Path); SmallString<128> ModelPath = StringRef(Path).drop_back(Extension.size()); ModelPath += "-%%%%%%%%"; @@ -134,11 +135,19 @@ std::error_code clang::writeImpl(StringRef Path, llvm::MemoryBufferRef Buffer) { return EC; } + llvm::sys::fs::file_status Status; { llvm::raw_fd_ostream OS(FD, /*shouldClose=*/true); OS << Buffer.getBuffer(); + // Using the status from an open file descriptor ensures this is not racy. + if ((EC = llvm::sys::fs::status(FD, Status))) + return EC; } + Size = Status.getSize(); + ModTime = llvm::sys::toTimeT(Status.getLastModificationTime()); + + // This preserves both size and modification time. if ((EC = llvm::sys::fs::rename(TmpPath, Path))) return EC; @@ -225,11 +234,12 @@ class CrossProcessModuleCache : public ModuleCache { return InMemory; } - std::error_code write(StringRef Path, llvm::MemoryBufferRef Buffer) override { + std::error_code write(StringRef Path, llvm::MemoryBufferRef Buffer, + off_t &Size, time_t &ModTime) override { // This is a compiler-internal input/output, let's bypass the sandbox. auto BypassSandbox = llvm::sys::sandbox::scopedDisable(); - return writeImpl(Path, Buffer); + return writeImpl(Path, Buffer, Size, ModTime); } Expected<std::unique_ptr<llvm::MemoryBuffer>> diff --git a/clang/lib/Serialization/ModuleManager.cpp b/clang/lib/Serialization/ModuleManager.cpp index 022e2ef42f635..b7d0ee85bc05e 100644 --- a/clang/lib/Serialization/ModuleManager.cpp +++ b/clang/lib/Serialization/ModuleManager.cpp @@ -59,11 +59,13 @@ ModuleFile *ModuleManager::lookup(ModuleFileKey Key) const { } std::unique_ptr<llvm::MemoryBuffer> -ModuleManager::lookupBuffer(StringRef Name) { +ModuleManager::lookupBuffer(StringRef Name, off_t &Size, time_t &ModTime) { auto Entry = FileMgr.getOptionalFileRef(Name, /*OpenFile=*/false, /*CacheFailure=*/false); if (!Entry) return nullptr; + Size = Entry->getSize(); + ModTime = Entry->getModificationTime(); return std::move(InMemoryBuffers[*Entry]); } @@ -157,32 +159,15 @@ ModuleManager::AddModuleResult ModuleManager::addModule( time_t ModTime = ExpectedModTime; llvm::MemoryBuffer *ModuleBuffer = nullptr; std::unique_ptr<llvm::MemoryBuffer> NewFileBuffer = nullptr; - if (std::unique_ptr<llvm::MemoryBuffer> Buffer = lookupBuffer(FileName)) { + if (std::unique_ptr<llvm::MemoryBuffer> Buffer = + lookupBuffer(FileName, Size, ModTime)) { // The buffer was already provided for us. ModuleBuffer = &getModuleCache().getInMemoryModuleCache().addBuiltPCM( - FileName, std::move(Buffer)); + FileName, std::move(Buffer), Size, ModTime); } else if (llvm::MemoryBuffer *Buffer = getModuleCache().getInMemoryModuleCache().lookupPCM( - FileName)) { + FileName, Size, ModTime)) { ModuleBuffer = Buffer; - if (!FileName.getImplicitModuleSuffixLength()) { - // Explicitly-built PCM files maintain consistency via mtime/size - // expectations on their imports. Even if we've previously successfully - // loaded a PCM file and stored it in the in-memory module cache, that - // does not mean its mtime/size matches current importer's expectations. - // Get that information so that it can be checked below. - // FIXME: Even though this FileManager access is likely already cached, we - // should store this directly in the in-memory module cache. - OptionalFileEntryRef Entry = - FileMgr.getOptionalFileRef(FileName, /*OpenFile=*/true, - /*CacheFailure=*/false); - if (!Entry) { - ErrorStr = "module file not found"; - return Missing; - } - ModTime = Entry->getModificationTime(); - Size = Entry->getSize(); - } } else if (getModuleCache().getInMemoryModuleCache().shouldBuildPCM( FileName)) { // Report that the module is out of date, since we tried (and failed) to @@ -246,8 +231,8 @@ ModuleManager::AddModuleResult ModuleManager::addModule( return OutOfDate; if (NewFileBuffer) - getModuleCache().getInMemoryModuleCache().addPCM(FileName, - std::move(NewFileBuffer)); + getModuleCache().getInMemoryModuleCache().addPCM( + FileName, std::move(NewFileBuffer), Size, ModTime); // We're keeping this module. Store it in the map. Module = Modules[*FileKey] = NewModule.get(); diff --git a/clang/unittests/Serialization/InMemoryModuleCacheTest.cpp b/clang/unittests/Serialization/InMemoryModuleCacheTest.cpp index ed5e1538eba74..f0cfa2f8f0c3d 100644 --- a/clang/unittests/Serialization/InMemoryModuleCacheTest.cpp +++ b/clang/unittests/Serialization/InMemoryModuleCacheTest.cpp @@ -39,15 +39,17 @@ TEST(InMemoryModuleCacheTest, addPCM) { auto *RawB = B.get(); InMemoryModuleCache Cache; - EXPECT_EQ(RawB, &Cache.addPCM("B", std::move(B))); + EXPECT_EQ(RawB, &Cache.addPCM("B", std::move(B), 0, 0)); EXPECT_EQ(InMemoryModuleCache::Tentative, Cache.getPCMState("B")); - EXPECT_EQ(RawB, Cache.lookupPCM("B")); + off_t Size; + time_t ModTime; + EXPECT_EQ(RawB, Cache.lookupPCM("B", Size, ModTime)); EXPECT_FALSE(Cache.isPCMFinal("B")); EXPECT_FALSE(Cache.shouldBuildPCM("B")); #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST - EXPECT_DEATH(Cache.addPCM("B", getBuffer(2)), "Already has a PCM"); - EXPECT_DEATH(Cache.addBuiltPCM("B", getBuffer(2)), + EXPECT_DEATH(Cache.addPCM("B", getBuffer(2), 0, 0), "Already has a PCM"); + EXPECT_DEATH(Cache.addBuiltPCM("B", getBuffer(2), 0, 0), "Trying to override tentative PCM"); #endif } @@ -57,15 +59,17 @@ TEST(InMemoryModuleCacheTest, addBuiltPCM) { auto *RawB = B.get(); InMemoryModuleCache Cache; - EXPECT_EQ(RawB, &Cache.addBuiltPCM("B", std::move(B))); + EXPECT_EQ(RawB, &Cache.addBuiltPCM("B", std::move(B), 0, 0)); EXPECT_EQ(InMemoryModuleCache::Final, Cache.getPCMState("B")); - EXPECT_EQ(RawB, Cache.lookupPCM("B")); + off_t Size; + time_t ModTime; + EXPECT_EQ(RawB, Cache.lookupPCM("B", Size, ModTime)); EXPECT_TRUE(Cache.isPCMFinal("B")); EXPECT_FALSE(Cache.shouldBuildPCM("B")); #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST - EXPECT_DEATH(Cache.addPCM("B", getBuffer(2)), "Already has a PCM"); - EXPECT_DEATH(Cache.addBuiltPCM("B", getBuffer(2)), + EXPECT_DEATH(Cache.addPCM("B", getBuffer(2), 0, 0), "Already has a PCM"); + EXPECT_DEATH(Cache.addBuiltPCM("B", getBuffer(2), 0, 0), "Trying to override finalized PCM"); #endif } @@ -79,27 +83,31 @@ TEST(InMemoryModuleCacheTest, tryToDropPCM) { InMemoryModuleCache Cache; EXPECT_EQ(InMemoryModuleCache::Unknown, Cache.getPCMState("B")); - EXPECT_EQ(RawB1, &Cache.addPCM("B", std::move(B1))); + EXPECT_EQ(RawB1, &Cache.addPCM("B", std::move(B1), 0, 0)); EXPECT_FALSE(Cache.tryToDropPCM("B")); - EXPECT_EQ(nullptr, Cache.lookupPCM("B")); + off_t Size1; + time_t ModTime1; + EXPECT_EQ(nullptr, Cache.lookupPCM("B", Size1, ModTime1)); EXPECT_EQ(InMemoryModuleCache::ToBuild, Cache.getPCMState("B")); EXPECT_FALSE(Cache.isPCMFinal("B")); EXPECT_TRUE(Cache.shouldBuildPCM("B")); #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST - EXPECT_DEATH(Cache.addPCM("B", getBuffer(2)), "Already has a PCM"); + EXPECT_DEATH(Cache.addPCM("B", getBuffer(2), 0, 0), "Already has a PCM"); EXPECT_DEATH(Cache.tryToDropPCM("B"), "PCM to remove is scheduled to be built"); EXPECT_DEATH(Cache.finalizePCM("B"), "Trying to finalize a dropped PCM"); #endif // Add a new one. - EXPECT_EQ(RawB2, &Cache.addBuiltPCM("B", std::move(B2))); + EXPECT_EQ(RawB2, &Cache.addBuiltPCM("B", std::move(B2), 0, 0)); EXPECT_TRUE(Cache.isPCMFinal("B")); // Can try to drop again, but this should error and do nothing. EXPECT_TRUE(Cache.tryToDropPCM("B")); - EXPECT_EQ(RawB2, Cache.lookupPCM("B")); + off_t Size2; + time_t ModTime2; + EXPECT_EQ(RawB2, Cache.lookupPCM("B", Size2, ModTime2)); } TEST(InMemoryModuleCacheTest, finalizePCM) { @@ -108,7 +116,7 @@ TEST(InMemoryModuleCacheTest, finalizePCM) { InMemoryModuleCache Cache; EXPECT_EQ(InMemoryModuleCache::Unknown, Cache.getPCMState("B")); - EXPECT_EQ(RawB, &Cache.addPCM("B", std::move(B))); + EXPECT_EQ(RawB, &Cache.addPCM("B", std::move(B), 0, 0)); // Call finalize. Cache.finalizePCM("B"); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
