https://github.com/artemcm updated https://github.com/llvm/llvm-project/pull/199680
>From 73e486c8e67a6272cbbe7100608c55baafe6f9a4 Mon Sep 17 00:00:00 2001 From: Artem Chikin <[email protected]> Date: Thu, 28 May 2026 11:28:31 +0100 Subject: [PATCH 1/2] [Support][VFS] Make `TracingFileSystem` a template on its counter type Templating `TracingFileSystem` on the counter type lets the same proxy serve single-threaded use (with `std::size_t` counters) and concurrent use (with `std::atomic<std::size_t>`) without duplicating the class. Introduce `TracingFileSystemImpl<CounterT>` as the underlying class template and provide two aliases: using TracingFileSystem = TracingFileSystemImpl<std::size_t>; using AtomicTracingFileSystem = TracingFileSystemImpl<std::atomic<std::size_t>>; Existing callers continue to use `TracingFileSystem` unchanged. A new `TracingFileSystemTest.AtomicCountersUnderConcurrency` test exercises the atomic instantiation with eight threads. --- llvm/include/llvm/Support/VirtualFileSystem.h | 69 +++++++++++++++---- llvm/lib/Support/VirtualFileSystem.cpp | 26 ------- .../Support/VirtualFileSystemTest.cpp | 30 ++++++++ 3 files changed, 86 insertions(+), 39 deletions(-) diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h index 5b8871b8f3db5..d22c534228331 100644 --- a/llvm/include/llvm/Support/VirtualFileSystem.h +++ b/llvm/include/llvm/Support/VirtualFileSystem.h @@ -27,6 +27,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/SourceMgr.h" +#include <atomic> #include <cassert> #include <cstdint> #include <ctime> @@ -1156,20 +1157,28 @@ class YAMLVFSWriter { /// File system that tracks the number of calls to the underlying file system. /// This is particularly useful when wrapped around \c RealFileSystem to add /// lightweight tracking of expensive syscalls. -class LLVM_ABI TracingFileSystem - : public llvm::RTTIExtends<TracingFileSystem, ProxyFileSystem> { +/// +/// Templated on the counter type so callers can choose between non-atomic +/// counters (suitable for single-threaded tracing) and atomic counters +/// (suitable for tracing under concurrent access). Use the +/// \c TracingFileSystem and \c AtomicTracingFileSystem aliases below. +template <typename CounterT> +class TracingFileSystemImpl + : public llvm::RTTIExtends<TracingFileSystemImpl<CounterT>, + ProxyFileSystem> { public: - static const char ID; + inline static const char ID = 0; - std::size_t NumStatusCalls = 0; - std::size_t NumOpenFileForReadCalls = 0; - std::size_t NumDirBeginCalls = 0; - std::size_t NumGetRealPathCalls = 0; - std::size_t NumExistsCalls = 0; - std::size_t NumIsLocalCalls = 0; + CounterT NumStatusCalls = 0; + CounterT NumOpenFileForReadCalls = 0; + CounterT NumDirBeginCalls = 0; + CounterT NumGetRealPathCalls = 0; + CounterT NumExistsCalls = 0; + CounterT NumIsLocalCalls = 0; - TracingFileSystem(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) - : RTTIExtends(std::move(FS)) {} + TracingFileSystemImpl(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) + : llvm::RTTIExtends<TracingFileSystemImpl<CounterT>, ProxyFileSystem>( + std::move(FS)) {} ErrorOr<Status> status(const Twine &Path) override { ++NumStatusCalls; @@ -1203,10 +1212,44 @@ class LLVM_ABI TracingFileSystem } protected: - void printImpl(raw_ostream &OS, PrintType Type, - unsigned IndentLevel) const override; + void printImpl(raw_ostream &OS, FileSystem::PrintType Type, + unsigned IndentLevel) const override { + FileSystem::printIndent(OS, IndentLevel); + OS << "TracingFileSystem\n"; + if (Type == FileSystem::PrintType::Summary) + return; + + FileSystem::printIndent(OS, IndentLevel); + OS << "NumStatusCalls=" << static_cast<std::size_t>(NumStatusCalls) << "\n"; + FileSystem::printIndent(OS, IndentLevel); + OS << "NumOpenFileForReadCalls=" + << static_cast<std::size_t>(NumOpenFileForReadCalls) << "\n"; + FileSystem::printIndent(OS, IndentLevel); + OS << "NumDirBeginCalls=" << static_cast<std::size_t>(NumDirBeginCalls) + << "\n"; + FileSystem::printIndent(OS, IndentLevel); + OS << "NumGetRealPathCalls=" + << static_cast<std::size_t>(NumGetRealPathCalls) << "\n"; + FileSystem::printIndent(OS, IndentLevel); + OS << "NumExistsCalls=" << static_cast<std::size_t>(NumExistsCalls) << "\n"; + FileSystem::printIndent(OS, IndentLevel); + OS << "NumIsLocalCalls=" << static_cast<std::size_t>(NumIsLocalCalls) + << "\n"; + + if (Type == FileSystem::PrintType::Contents) + Type = FileSystem::PrintType::Summary; + this->getUnderlyingFS().print(OS, Type, IndentLevel + 1); + } }; +/// Single-threaded tracing filesystem. Counters are plain \c std::size_t and +/// must not be incremented concurrently. +using TracingFileSystem = TracingFileSystemImpl<std::size_t>; + +/// Concurrent-safe tracing filesystem. Counters are \c std::atomic<std::size_t> +/// so the proxy can be shared across threads. +using AtomicTracingFileSystem = TracingFileSystemImpl<std::atomic<std::size_t>>; + } // namespace vfs } // namespace llvm diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp index 69f3bb8582b87..42e8bb4f9958e 100644 --- a/llvm/lib/Support/VirtualFileSystem.cpp +++ b/llvm/lib/Support/VirtualFileSystem.cpp @@ -2991,34 +2991,8 @@ recursive_directory_iterator::increment(std::error_code &EC) { return *this; } -void TracingFileSystem::printImpl(raw_ostream &OS, PrintType Type, - unsigned IndentLevel) const { - printIndent(OS, IndentLevel); - OS << "TracingFileSystem\n"; - if (Type == PrintType::Summary) - return; - - printIndent(OS, IndentLevel); - OS << "NumStatusCalls=" << NumStatusCalls << "\n"; - printIndent(OS, IndentLevel); - OS << "NumOpenFileForReadCalls=" << NumOpenFileForReadCalls << "\n"; - printIndent(OS, IndentLevel); - OS << "NumDirBeginCalls=" << NumDirBeginCalls << "\n"; - printIndent(OS, IndentLevel); - OS << "NumGetRealPathCalls=" << NumGetRealPathCalls << "\n"; - printIndent(OS, IndentLevel); - OS << "NumExistsCalls=" << NumExistsCalls << "\n"; - printIndent(OS, IndentLevel); - OS << "NumIsLocalCalls=" << NumIsLocalCalls << "\n"; - - if (Type == PrintType::Contents) - Type = PrintType::Summary; - getUnderlyingFS().print(OS, Type, IndentLevel + 1); -} - const char FileSystem::ID = 0; const char OverlayFileSystem::ID = 0; const char ProxyFileSystem::ID = 0; const char InMemoryFileSystem::ID = 0; const char RedirectingFileSystem::ID = 0; -const char TracingFileSystem::ID = 0; diff --git a/llvm/unittests/Support/VirtualFileSystemTest.cpp b/llvm/unittests/Support/VirtualFileSystemTest.cpp index bb0172b28e373..507a0e4108a79 100644 --- a/llvm/unittests/Support/VirtualFileSystemTest.cpp +++ b/llvm/unittests/Support/VirtualFileSystemTest.cpp @@ -22,6 +22,8 @@ #include "gtest/gtest.h" #include <map> #include <string> +#include <thread> +#include <vector> using namespace llvm; using llvm::sys::fs::UniqueID; @@ -3732,3 +3734,31 @@ TEST(TracingFileSystemTest, PrintOutput) { " InMemoryFileSystem\n", Output); } + +TEST(TracingFileSystemTest, AtomicCountersUnderConcurrency) { + auto InMemoryFS = makeIntrusiveRefCnt<vfs::InMemoryFileSystem>(); + InMemoryFS->setCurrentWorkingDirectory("/"); + InMemoryFS->addFile("/foo", 0, MemoryBuffer::getMemBuffer("hello")); + + auto TracingFS = + makeIntrusiveRefCnt<vfs::AtomicTracingFileSystem>(std::move(InMemoryFS)); + + constexpr unsigned NumThreads = 8; + constexpr unsigned CallsPerThread = 100; + std::vector<std::thread> Threads; + Threads.reserve(NumThreads); + for (unsigned I = 0; I < NumThreads; ++I) { + Threads.emplace_back([&] { + for (unsigned J = 0; J < CallsPerThread; ++J) { + (void)TracingFS->status("/foo"); + (void)TracingFS->openFileForRead("/foo"); + } + }); + } + for (auto &T : Threads) + T.join(); + + EXPECT_EQ(TracingFS->NumStatusCalls.load(), NumThreads * CallsPerThread); + EXPECT_EQ(TracingFS->NumOpenFileForReadCalls.load(), + NumThreads * CallsPerThread); +} >From e01edca43529f2ee50d24c89adb317d8a1a4c3d2 Mon Sep 17 00:00:00 2001 From: Artem Chikin <[email protected]> Date: Thu, 28 May 2026 11:29:31 +0100 Subject: [PATCH 2/2] [clang][deps] Add in-flight query caching to `DependencyScanningFilesystemSharedCache` The shared cache deduplicates results only after the underlying filesystem trip completes, so concurrent workers querying the same filename or UID each pay their own `stat` and `open`. Track in-flight queries per key: the first arrival produces the result, late arrivals wait on a condition variable and adopt it. `CacheByFilename`'s value gains a `std::shared_ptr<InProgressEntry>` field alongside the resolved entry and real path; `EntriesByUID` does the same. The in-progress entry is populated for the duration of a producer's filesystem trip and reset on publish. Four new shard methods manage the slot lifecycle: - `acquireFilenameSlot` / `acquireUIDSlot` collapse three outcomes (resolved hit, in-progress wait, fresh producer) into one critical section. Waiters block on the slot's condition variable, which atomically releases the shard lock for the duration of the wait. - `fulfilFilenameSlot` / `fulfilUIDSlot` publish the produced entry, set `Done`, clear the slot, and `notify_all` waiters outside the lock. --- .../DependencyScanningFilesystem.h | 224 ++++++----- .../DependencyScanningFilesystem.cpp | 355 ++++++++++++------ .../DependencyScanning/CMakeLists.txt | 2 + .../DependencyScanningFilesystemTest.cpp | 239 ++++++++++++ 4 files changed, 587 insertions(+), 233 deletions(-) diff --git a/clang/include/clang/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/DependencyScanning/DependencyScanningFilesystem.h index c9ffc202ad694..8225a2fe72ddb 100644 --- a/clang/include/clang/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/DependencyScanning/DependencyScanningFilesystem.h @@ -16,6 +16,8 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/VirtualFileSystem.h" +#include <condition_variable> +#include <memory> #include <mutex> #include <optional> #include <variant> @@ -154,19 +156,63 @@ using CachedRealPath = llvm::ErrorOr<std::string>; /// the worker threads. class DependencyScanningFilesystemSharedCache { public: + /// Tracks a cache entry whose value is currently being computed by one + /// worker so that other workers arriving at the same key can wait for the + /// result rather than producing it in parallel. The producer publishes the + /// produced outcome into \c Result, sets \c Done, and notifies waiters via + /// \c CondVar. \c Done and \c Result are guarded by this entry's own + /// \c Mutex, which also backs the wait, decoupled from the owning + /// shard's \c CacheLock so waking waiters do not contend on the shard lock. + /// + /// \c Result holds either a resolved entry or a bare error. The error case + /// represents an uncached negative stat: it is shared with the concurrent + /// waiters that overlapped this query in time, but is deliberately not + /// persisted in the shard, so a later, separate query re-runs it. + struct InProgressEntry { + std::mutex Mutex; + std::condition_variable CondVar; + bool Done = false; + llvm::ErrorOr<const CachedFileSystemEntry *> Result = std::error_code{}; + }; + + /// Outcome of attempting to claim a slot for a given key: + /// - \c Produce non-null: the caller has the right to produce the entry + /// and must call the matching \c fulfil*Slot once finished. \c Resolved + /// is unused. + /// - \c Produce null: the cache (or a concurrent producer) has already + /// resolved the key; the caller adopts \c Resolved, which is either the + /// resolved entry or an uncached-negative error. + struct SlotAcquisitionResult { + llvm::ErrorOr<const CachedFileSystemEntry *> Resolved; + std::shared_ptr<InProgressEntry> Produce; + }; + struct CacheShard { /// The mutex that needs to be locked before mutation of any member. mutable std::mutex CacheLock; - /// Map from filenames to cached entries and real paths. - llvm::StringMap< - std::pair<const CachedFileSystemEntry *, const CachedRealPath *>, - llvm::BumpPtrAllocator> - CacheByFilename; + /// Cache state stored per filename: the resolved entry, the + /// resolved real path, and the in-flight slot for an outstanding + /// query (if any). \c InProgress is reset to null once the producing + /// worker publishes its result. + struct FilenameCacheState { + const CachedFileSystemEntry *Entry = nullptr; + const CachedRealPath *RealPath = nullptr; + std::shared_ptr<InProgressEntry> InProgress; + }; - /// Map from unique IDs to cached entries. - llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *> - EntriesByUID; + /// Cache state stored per unique ID; similar to + /// \c FilenameCacheState. + struct UIDCacheState { + const CachedFileSystemEntry *Entry = nullptr; + std::shared_ptr<InProgressEntry> InProgress; + }; + + /// Map from filenames to their cached state. + llvm::StringMap<FilenameCacheState, llvm::BumpPtrAllocator> CacheByFilename; + + /// Map from unique IDs to their cached state. + llvm::DenseMap<llvm::sys::fs::UniqueID, UIDCacheState> EntriesByUID; /// The backing storage for cached entries. llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage; @@ -177,13 +223,6 @@ class DependencyScanningFilesystemSharedCache { /// The backing storage for cached real paths. llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage; - /// Returns entry associated with the filename or nullptr if none is found. - const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const; - - /// Returns entry associated with the unique ID or nullptr if none is found. - const CachedFileSystemEntry * - findEntryByUID(llvm::sys::fs::UniqueID UID) const; - /// Returns entry associated with the filename if there is some. Otherwise, /// constructs new one with the given status, associates it with the /// filename and returns the result. @@ -204,6 +243,38 @@ class DependencyScanningFilesystemSharedCache { getOrInsertEntryForFilename(StringRef Filename, const CachedFileSystemEntry &Entry); + /// Claims a slot for \p Filename. If a resolved entry already exists, + /// returns it. If another worker is currently producing a result for this + /// filename, blocks on its \c InProgressEntry until done and adopts the + /// produced outcome (a resolved entry, or an uncached-negative error). + /// Otherwise installs a fresh \c InProgressEntry and returns it as a + /// producer slot which the caller must complete via \c fulfillFilenameSlot. + SlotAcquisitionResult acquireFilenameSlot(StringRef); + + /// Claims a slot for \p UID. Same semantics as \c acquireFilenameSlot but + /// keyed by unique ID; used after \c stat() has identified a file so that + /// concurrent \c readFile() calls for the same on-disk file (reached via + /// different filenames) collapse onto a single open. + SlotAcquisitionResult acquireUIDSlot(llvm::sys::fs::UniqueID); + + /// Completes a producer slot acquired via \c acquireFilenameSlot. + /// Publishes \p Result to waiters and, when it is a resolved entry, + /// records it under \p Filename in \c CacheByFilename. An error result + /// (an uncached negative stat) is shared with current waiters but not + /// recorded. Marks the slot done, removes the in-progress entry, and + /// notifies waiters. + void fulfillFilenameSlot(StringRef, + const std::shared_ptr<InProgressEntry> &, + llvm::ErrorOr<const CachedFileSystemEntry *>); + + /// Completes a producer slot acquired via \c acquireUIDSlot. + /// Publishes \p Result under \p UID in \c EntriesByUID, records + /// it in the slot, marks the slot done, removes the in-progress + /// entry, and notifies waiters. + void fulfillUIDSlot(llvm::sys::fs::UniqueID, + const std::shared_ptr<InProgressEntry> &, + const CachedFileSystemEntry *); + /// Returns the real path associated with the filename or nullptr if none is /// found. const CachedRealPath *findRealPathByFilename(StringRef Filename) const; @@ -256,65 +327,6 @@ class DependencyScanningFilesystemSharedCache { unsigned NumShards; }; -/// This class is a local cache, that caches the 'stat' and 'open' calls to the -/// underlying real file system. -class DependencyScanningFilesystemLocalCache { - llvm::StringMap< - std::pair<const CachedFileSystemEntry *, const CachedRealPath *>, - llvm::BumpPtrAllocator> - Cache; - -public: - /// Returns entry associated with the filename or nullptr if none is found. - const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const { - assert(llvm::sys::path::is_absolute_gnu(Filename)); - auto It = Cache.find(Filename); - return It == Cache.end() ? nullptr : It->getValue().first; - } - - /// Associates the given entry with the filename and returns the given entry - /// pointer (for convenience). - const CachedFileSystemEntry & - insertEntryForFilename(StringRef Filename, - const CachedFileSystemEntry &Entry) { - assert(llvm::sys::path::is_absolute_gnu(Filename)); - auto [It, Inserted] = Cache.insert({Filename, {&Entry, nullptr}}); - auto &[CachedEntry, CachedRealPath] = It->getValue(); - if (!Inserted) { - // The file is already present in the local cache. If we got here, it only - // contains the real path. Let's make sure the entry is populated too. - assert((!CachedEntry && CachedRealPath) && "entry already present"); - CachedEntry = &Entry; - } - return *CachedEntry; - } - - /// Returns real path associated with the filename or nullptr if none is - /// found. - const CachedRealPath *findRealPathByFilename(StringRef Filename) const { - assert(llvm::sys::path::is_absolute_gnu(Filename)); - auto It = Cache.find(Filename); - return It == Cache.end() ? nullptr : It->getValue().second; - } - - /// Associates the given real path with the filename and returns the given - /// entry pointer (for convenience). - const CachedRealPath & - insertRealPathForFilename(StringRef Filename, - const CachedRealPath &RealPath) { - assert(llvm::sys::path::is_absolute_gnu(Filename)); - auto [It, Inserted] = Cache.insert({Filename, {nullptr, &RealPath}}); - auto &[CachedEntry, CachedRealPath] = It->getValue(); - if (!Inserted) { - // The file is already present in the local cache. If we got here, it only - // contains the entry. Let's make sure the real path is populated too. - assert((!CachedRealPath && CachedEntry) && "real path already present"); - CachedRealPath = &RealPath; - } - return *CachedRealPath; - } -}; - /// Reference to a CachedFileSystemEntry. /// If the underlying entry is an opened file, this wrapper returns the file /// contents and the scanned preprocessor directives. @@ -411,14 +423,16 @@ class DependencyScanningWorkerFilesystem bool exists(const Twine &Path) override; private: - /// For a filename that's not yet associated with any entry in the caches, - /// uses the underlying filesystem to either look up the entry based in the - /// shared cache indexed by unique ID, or creates new entry from scratch. - /// \p FilenameForLookup will always be an absolute path, and different than - /// \p OriginalFilename if \p OriginalFilename is relative. + /// Resolves the cache entry for \p FilenameForLookup through the shared + /// cache: returns an entry already produced by another worker (a cache hit + /// or the result of an in-flight wait), or claims the producer slot and + /// computes the entry via the underlying + /// filesystem. The result is written through to the local cache. + /// \p FilenameForLookup is always absolute, and may differ from + /// \p OriginalFilename if the latter is relative. llvm::ErrorOr<const CachedFileSystemEntry &> - computeAndStoreResult(StringRef OriginalFilename, - StringRef FilenameForLookup); + resolveFilenameThroughSharedCache(StringRef OriginalFilename, + StringRef FilenameForLookup); /// Represents a filesystem entry that has been stat-ed (and potentially read) /// and that's about to be inserted into the cache as `CachedFileSystemEntry`. @@ -435,46 +449,16 @@ class DependencyScanningWorkerFilesystem /// in status and size of read contents. llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename); - /// Returns entry associated with the unique ID of the given tentative entry - /// if there is some in the shared cache. Otherwise, constructs new one, - /// associates it with the unique ID and returns the result. - const CachedFileSystemEntry & - getOrEmplaceSharedEntryForUID(TentativeEntry TEntry); - - /// Returns entry associated with the filename or nullptr if none is found. - /// - /// Returns entry from local cache if there is some. Otherwise, if the entry - /// is found in the shared cache, writes it through the local cache and - /// returns it. Otherwise returns nullptr. - const CachedFileSystemEntry * - findEntryByFilenameWithWriteThrough(StringRef Filename); - - /// Returns entry associated with the unique ID in the shared cache or nullptr - /// if none is found. - const CachedFileSystemEntry * - findSharedEntryByUID(llvm::vfs::Status Stat) const; - /// Associates the given entry with the filename in the local cache and /// returns it. const CachedFileSystemEntry & insertLocalEntryForFilename(StringRef Filename, const CachedFileSystemEntry &Entry) { - return LocalCache.insertEntryForFilename(Filename, Entry); + assert(llvm::sys::path::is_absolute_gnu(Filename)); + LocalCache[Filename].File = &Entry; + return Entry; } - /// Returns entry associated with the filename in the shared cache if there is - /// some. Otherwise, constructs new one with the given error code, associates - /// it with the filename and returns the result. - const CachedFileSystemEntry & - getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC); - - /// Returns entry associated with the filename in the shared cache if there is - /// some. Otherwise, associates the given entry with the filename and returns - /// it. - const CachedFileSystemEntry & - getOrInsertSharedEntryForFilename(StringRef Filename, - const CachedFileSystemEntry &Entry); - void printImpl(raw_ostream &OS, PrintType Type, unsigned IndentLevel) const override { printIndent(OS, IndentLevel); @@ -484,9 +468,19 @@ class DependencyScanningWorkerFilesystem /// The service associated with this VFS. DependencyScanningService &Service; + + /// Per-filename state cached locally by this worker thread, so repeated + /// queries can be served without touching the shared cache. The entry and + /// real path are arena-owned by the shared cache and outlive this worker, so + /// borrowing raw pointers here is safe. + struct LocalEntry { + const CachedFileSystemEntry *File = nullptr; + const CachedRealPath *RealPath = nullptr; + }; + /// The local cache is used by the worker thread to cache file system queries /// locally instead of querying the global cache every time. - DependencyScanningFilesystemLocalCache LocalCache; + llvm::StringMap<LocalEntry, llvm::BumpPtrAllocator> LocalCache; /// The working directory to use for making relative paths absolute before /// using them for cache lookups. diff --git a/clang/lib/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/DependencyScanning/DependencyScanningFilesystem.cpp index 6ff6cd9040665..3a845113d933e 100644 --- a/clang/lib/DependencyScanning/DependencyScanningFilesystem.cpp +++ b/clang/lib/DependencyScanning/DependencyScanningFilesystem.cpp @@ -115,8 +115,13 @@ DependencyScanningFilesystemSharedCache::getOutOfDateEntries( for (unsigned i = 0; i < NumShards; i++) { const CacheShard &Shard = CacheShards[i]; std::lock_guard<std::mutex> LockGuard(Shard.CacheLock); - for (const auto &[Path, CachedPair] : Shard.CacheByFilename) { - const CachedFileSystemEntry *Entry = CachedPair.first; + for (const auto &[Path, State] : Shard.CacheByFilename) { + const CachedFileSystemEntry *Entry = State.Entry; + // Skip filenames with no resolved entry: either a real-path-only entry + // (from getRealPath), or an uncached negative stat that left the slot + // behind without recording one. Runs post-scan, so no in-progress slots. + if (!Entry) + continue; llvm::ErrorOr<llvm::vfs::Status> Status = UnderlyingFS.status(Path); if (Status) { if (Entry->getError()) { @@ -151,38 +156,145 @@ DependencyScanningFilesystemSharedCache::getOutOfDateEntries( return InvalidDiagInfo; } -const CachedFileSystemEntry * -DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename( - StringRef Filename) const { - assert(llvm::sys::path::is_absolute_gnu(Filename)); - std::lock_guard<std::mutex> LockGuard(CacheLock); - auto It = CacheByFilename.find(Filename); - return It == CacheByFilename.end() ? nullptr : It->getValue().first; -} - -const CachedFileSystemEntry * -DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID( - llvm::sys::fs::UniqueID UID) const { - std::lock_guard<std::mutex> LockGuard(CacheLock); - auto It = EntriesByUID.find(UID); - return It == EntriesByUID.end() ? nullptr : It->getSecond(); -} - const CachedFileSystemEntry & DependencyScanningFilesystemSharedCache::CacheShard:: getOrEmplaceEntryForFilename(StringRef Filename, llvm::ErrorOr<llvm::vfs::Status> Stat) { std::lock_guard<std::mutex> LockGuard(CacheLock); - auto [It, Inserted] = CacheByFilename.insert({Filename, {nullptr, nullptr}}); - auto &[CachedEntry, CachedRealPath] = It->getValue(); - if (!CachedEntry) { - // The entry is not present in the shared cache. Either the cache doesn't - // know about the file at all, or it only knows about its real path. - assert((Inserted || CachedRealPath) && "existing file with empty pair"); - CachedEntry = + auto [It, Inserted] = CacheByFilename.try_emplace(Filename); + auto &State = It->getValue(); + if (!State.Entry) { + // The entry is not present in the shared cache. This method runs only + // from inside a producer slot held by the caller, so either the cache + // state was just freshly inserted or it already carries an in-flight + // slot. + assert((Inserted || State.InProgress) && + "cache state should be fresh or carry an in-flight slot held by " + "the caller"); + State.Entry = new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat)); } - return *CachedEntry; + return *State.Entry; +} + +DependencyScanningFilesystemSharedCache::SlotAcquisitionResult +DependencyScanningFilesystemSharedCache::CacheShard::acquireFilenameSlot( + StringRef Filename) { + assert(llvm::sys::path::is_absolute_gnu(Filename)); + std::shared_ptr<InProgressEntry> Pending; + { + std::lock_guard<std::mutex> ShardLock(CacheLock); + auto &State = CacheByFilename[Filename]; + + // Cache hit. + if (State.Entry) + return SlotAcquisitionResult{State.Entry, nullptr}; + + // No outstanding query: install an in-progress entry and become the + // producer. + if (!State.InProgress) { + State.InProgress = std::make_shared<InProgressEntry>(); + return SlotAcquisitionResult{std::error_code{}, State.InProgress}; + } + + // Another worker is producing for this filename. Capture the shared_ptr + // by copy so the slot stays alive for us to wait on after we release the + // shard lock (the producer resets State.InProgress on publish). + Pending = State.InProgress; + } + + // Wait for the producer on the slot's own mutex/CV, with the shard lock + // released so unrelated keys in this shard are not blocked by our wait, then + // adopt its outcome. This holds even for a negative result the producer + // chose not to cache: our query overlapped the producer's in time, so we can + // share its answer without that constituting caching across separate queries. + std::unique_lock<std::mutex> EntryLock(Pending->Mutex); + Pending->CondVar.wait(EntryLock, [&] { return Pending->Done; }); + return SlotAcquisitionResult{Pending->Result, nullptr}; +} + +DependencyScanningFilesystemSharedCache::SlotAcquisitionResult +DependencyScanningFilesystemSharedCache::CacheShard::acquireUIDSlot( + llvm::sys::fs::UniqueID UID) { + std::shared_ptr<InProgressEntry> Pending; + { + std::lock_guard<std::mutex> ShardLock(CacheLock); + auto &State = EntriesByUID[UID]; + + // Cache hit. + if (State.Entry) + return SlotAcquisitionResult{State.Entry, nullptr}; + + // No outstanding query: install an in-progress entry and become the + // producer. + if (!State.InProgress) { + State.InProgress = std::make_shared<InProgressEntry>(); + return SlotAcquisitionResult{std::error_code{}, State.InProgress}; + } + + // Another worker is producing for this UID. Capture the shared_ptr by copy + // so the slot stays alive for us to wait on after we release the shard + // lock. + Pending = State.InProgress; + } + + // Wait for the producer on the slot's own mutex/CV, with the shard lock + // released. Unlike the filename slot, a UID producer always publishes a + // non-null entry, so the adopted result is never an error. + std::unique_lock<std::mutex> EntryLock(Pending->Mutex); + Pending->CondVar.wait(EntryLock, [&] { return Pending->Done; }); + assert(Pending->Result && *Pending->Result && + "in-progress UID slot fulfilled without an entry"); + return SlotAcquisitionResult{Pending->Result, nullptr}; +} + +void DependencyScanningFilesystemSharedCache::CacheShard::fulfillFilenameSlot( + StringRef Filename, + const std::shared_ptr< + DependencyScanningFilesystemSharedCache::InProgressEntry> &IPE, + llvm::ErrorOr<const CachedFileSystemEntry *> Result) { + { + std::lock_guard<std::mutex> ShardLock(CacheLock); + auto &State = CacheByFilename[Filename]; + // Only a resolved entry is recorded for later queries; an uncached + // negative stat (an error) is shared with current waiters but not + // persisted in the shard. + if (Result && !State.Entry) + State.Entry = *Result; + State.InProgress.reset(); + } + // Publish the result under the slot's own mutex, then notify waiters. Waiters + // read Done/Result under this same mutex, so the predicate wait cannot miss + // the wakeup. The producer's shared_ptr keeps the slot (and CondVar) alive + // across notify_all. + { + std::lock_guard<std::mutex> EntryLock(IPE->Mutex); + IPE->Result = Result; + IPE->Done = true; + } + IPE->CondVar.notify_all(); +} + +void DependencyScanningFilesystemSharedCache::CacheShard::fulfillUIDSlot( + llvm::sys::fs::UniqueID UID, + const std::shared_ptr< + DependencyScanningFilesystemSharedCache::InProgressEntry> &IPE, + const CachedFileSystemEntry *Result) { + { + std::lock_guard<std::mutex> ShardLock(CacheLock); + auto &State = EntriesByUID[UID]; + if (Result && !State.Entry) + State.Entry = Result; + State.InProgress.reset(); + } + // Publish the result under the slot's own mutex, then notify waiters. See + // fulfillFilenameSlot for the synchronization rationale. + { + std::lock_guard<std::mutex> EntryLock(IPE->Mutex); + IPE->Result = Result; + IPE->Done = true; + } + IPE->CondVar.notify_all(); } const CachedFileSystemEntry & @@ -190,17 +302,16 @@ DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID( llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, std::unique_ptr<llvm::MemoryBuffer> Contents) { std::lock_guard<std::mutex> LockGuard(CacheLock); - auto [It, Inserted] = EntriesByUID.try_emplace(UID); - auto &CachedEntry = It->getSecond(); - if (Inserted) { + auto &State = EntriesByUID[UID]; + if (!State.Entry) { CachedFileContents *StoredContents = nullptr; if (Contents) StoredContents = new (ContentsStorage.Allocate()) CachedFileContents(std::move(Contents)); - CachedEntry = new (EntryStorage.Allocate()) + State.Entry = new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat), StoredContents); } - return *CachedEntry; + return *State.Entry; } const CachedFileSystemEntry & @@ -208,11 +319,10 @@ DependencyScanningFilesystemSharedCache::CacheShard:: getOrInsertEntryForFilename(StringRef Filename, const CachedFileSystemEntry &Entry) { std::lock_guard<std::mutex> LockGuard(CacheLock); - auto [It, Inserted] = CacheByFilename.insert({Filename, {&Entry, nullptr}}); - auto &[CachedEntry, CachedRealPath] = It->getValue(); - if (!Inserted || !CachedEntry) - CachedEntry = &Entry; - return *CachedEntry; + auto &State = CacheByFilename[Filename]; + if (!State.Entry) + State.Entry = &Entry; + return *State.Entry; } const CachedRealPath * @@ -221,7 +331,7 @@ DependencyScanningFilesystemSharedCache::CacheShard::findRealPathByFilename( assert(llvm::sys::path::is_absolute_gnu(Filename)); std::lock_guard<std::mutex> LockGuard(CacheLock); auto It = CacheByFilename.find(Filename); - return It == CacheByFilename.end() ? nullptr : It->getValue().second; + return It == CacheByFilename.end() ? nullptr : It->getValue().RealPath; } const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard:: @@ -229,7 +339,7 @@ const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard:: llvm::ErrorOr<llvm::StringRef> RealPath) { std::lock_guard<std::mutex> LockGuard(CacheLock); - const CachedRealPath *&StoredRealPath = CacheByFilename[Filename].second; + const CachedRealPath *&StoredRealPath = CacheByFilename[Filename].RealPath; if (!StoredRealPath) { auto OwnedRealPath = [&]() -> CachedRealPath { if (!RealPath) @@ -253,82 +363,91 @@ DependencyScanningWorkerFilesystem::DependencyScanningWorkerFilesystem( updateWorkingDirForCacheLookup(); } -const CachedFileSystemEntry & -DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID( - TentativeEntry TEntry) { - auto &Shard = - Service.getSharedCache().getShardForUID(TEntry.Status.getUniqueID()); - return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(), - std::move(TEntry.Status), - std::move(TEntry.Contents)); -} - -const CachedFileSystemEntry * -DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough( - StringRef Filename) { - if (const auto *Entry = LocalCache.findEntryByFilename(Filename)) - return Entry; - auto &Shard = Service.getSharedCache().getShardForFilename(Filename); - if (const auto *Entry = Shard.findEntryByFilename(Filename)) - return &LocalCache.insertEntryForFilename(Filename, *Entry); - return nullptr; -} - -const CachedFileSystemEntry * -DependencyScanningWorkerFilesystem::findSharedEntryByUID( - llvm::vfs::Status Stat) const { - return Service.getSharedCache() - .getShardForUID(Stat.getUniqueID()) - .findEntryByUID(Stat.getUniqueID()); -} - -const CachedFileSystemEntry & -DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForFilename( - StringRef Filename, std::error_code EC) { - return Service.getSharedCache() - .getShardForFilename(Filename) - .getOrEmplaceEntryForFilename(Filename, EC); -} - -const CachedFileSystemEntry & -DependencyScanningWorkerFilesystem::getOrInsertSharedEntryForFilename( - StringRef Filename, const CachedFileSystemEntry &Entry) { - return Service.getSharedCache() - .getShardForFilename(Filename) - .getOrInsertEntryForFilename(Filename, Entry); -} - llvm::ErrorOr<const CachedFileSystemEntry &> -DependencyScanningWorkerFilesystem::computeAndStoreResult( +DependencyScanningWorkerFilesystem::resolveFilenameThroughSharedCache( StringRef OriginalFilename, StringRef FilenameForLookup) { - llvm::ErrorOr<llvm::vfs::Status> Stat = - getUnderlyingFS().status(OriginalFilename); - if (!Stat) { - if (!Service.getOpts().CacheNegativeStats || - !shouldCacheNegativeStatsForPath(OriginalFilename)) - return Stat.getError(); - - const auto &Entry = - getOrEmplaceSharedEntryForFilename(FilenameForLookup, Stat.getError()); - return insertLocalEntryForFilename(FilenameForLookup, Entry); + auto &FilenameShard = + Service.getSharedCache().getShardForFilename(FilenameForLookup); + + // Acquire a per-filename in-progress entry. If another worker has already + // produced an entry under this filename, or is currently producing one, + // adopt its result instead of duplicating the underlying filesystem. + auto FilenameSlot = FilenameShard.acquireFilenameSlot(FilenameForLookup); + if (!FilenameSlot.Produce) { + // Resolved by a cache hit, or adopted from a concurrent producer. An error + // here is an uncached negative stat shared with us by a query that + // overlapped ours in time. + if (!FilenameSlot.Resolved) + return FilenameSlot.Resolved.getError(); + return insertLocalEntryForFilename(FilenameForLookup, + **FilenameSlot.Resolved); } - if (const auto *Entry = findSharedEntryByUID(*Stat)) - return insertLocalEntryForFilename(FilenameForLookup, *Entry); - - auto TEntry = - Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(OriginalFilename); + // Compute the result, then publish it through the producer slot so any + // workers waiting on the same filename adopt it. + std::shared_ptr<DependencyScanningFilesystemSharedCache::InProgressEntry> + ProducerSlot = std::move(FilenameSlot.Produce); + auto Result = [&]() -> llvm::ErrorOr<const CachedFileSystemEntry &> { + llvm::ErrorOr<llvm::vfs::Status> Stat = + getUnderlyingFS().status(OriginalFilename); + if (!Stat) { + // Honor the negative-stat caching policy. When caching is disabled for + // this path, return the error without recording it in the shard. The + // producer slot publishes the error to concurrent waiters so they adopt + // it (a valid shared answer for overlapping queries), but because it is + // not persisted, a later separate query re-runs it; this lets a file + // created mid-scan become visible. + if (!Service.getOpts().CacheNegativeStats || + !shouldCacheNegativeStatsForPath(OriginalFilename)) + return Stat.getError(); + return insertLocalEntryForFilename( + FilenameForLookup, FilenameShard.getOrEmplaceEntryForFilename( + FilenameForLookup, Stat.getError())); + } - const CachedFileSystemEntry *SharedEntry = [&]() { - if (TEntry) { - const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry)); - return &getOrInsertSharedEntryForFilename(FilenameForLookup, UIDEntry); + // Acquire a per-UID producer slot to dedup concurrent readFile() + // calls across workers that arrived under different filenames + // pointing at the same on-disk file. + auto &UIDShard = + Service.getSharedCache().getShardForUID(Stat->getUniqueID()); + auto UIDSlot = UIDShard.acquireUIDSlot(Stat->getUniqueID()); + const CachedFileSystemEntry *SharedEntry = nullptr; + if (!UIDSlot.Produce) { + // Adopt the entry produced by a concurrent worker for the same UID. + SharedEntry = *UIDSlot.Resolved; + } else { + auto TEntry = Stat->isDirectory() ? TentativeEntry(*Stat) + : readFile(OriginalFilename); + if (TEntry) { + SharedEntry = &UIDShard.getOrEmplaceEntryForUID( + Stat->getUniqueID(), std::move(TEntry->Status), + std::move(TEntry->Contents)); + } else { + // `readFile` failed despite `stat` succeeding. Cache the failure under + // the filename, and publish that same entry under the UID so awaiting + // workers surface the error rather than racing to retry the open. + SharedEntry = &FilenameShard.getOrEmplaceEntryForFilename( + FilenameForLookup, TEntry.getError()); + } + UIDShard.fulfillUIDSlot(Stat->getUniqueID(), UIDSlot.Produce, + SharedEntry); } - return &getOrEmplaceSharedEntryForFilename(FilenameForLookup, - TEntry.getError()); + + // Bind the resolved/produced entry to this filename in the shared cache + // (idempotent if already there) and the local cache. + return insertLocalEntryForFilename( + FilenameForLookup, FilenameShard.getOrInsertEntryForFilename( + FilenameForLookup, *SharedEntry)); }(); - return insertLocalEntryForFilename(FilenameForLookup, *SharedEntry); + // Publish the produced outcome: a resolved entry, or the error of an + // uncached negative stat, which concurrent waiters adopt without it being + // recorded in the shard. + llvm::ErrorOr<const CachedFileSystemEntry *> Produced = + Result ? llvm::ErrorOr<const CachedFileSystemEntry *>(&*Result) + : llvm::ErrorOr<const CachedFileSystemEntry *>(Result.getError()); + FilenameShard.fulfillFilenameSlot(FilenameForLookup, ProducerSlot, Produced); + return Result; } llvm::ErrorOr<EntryRef> @@ -339,10 +458,11 @@ DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( if (!FilenameForLookup) return FilenameForLookup.getError(); - if (const auto *Entry = - findEntryByFilenameWithWriteThrough(*FilenameForLookup)) + if (const auto *Entry = LocalCache[*FilenameForLookup].File) return EntryRef(OriginalFilename, *Entry).unwrapError(); - auto MaybeEntry = computeAndStoreResult(OriginalFilename, *FilenameForLookup); + + auto MaybeEntry = + resolveFilenameThroughSharedCache(OriginalFilename, *FilenameForLookup); if (!MaybeEntry) return MaybeEntry.getError(); return EntryRef(OriginalFilename, *MaybeEntry).unwrapError(); @@ -448,18 +568,17 @@ DependencyScanningWorkerFilesystem::getRealPath(const Twine &Path, }; // If we already have the result in local cache, no work required. - if (const auto *RealPath = - LocalCache.findRealPathByFilename(*FilenameForLookup)) - return HandleCachedRealPath(*RealPath); + auto &Local = LocalCache[*FilenameForLookup]; + if (Local.RealPath) + return HandleCachedRealPath(*Local.RealPath); // If we have the result in the shared cache, cache it locally. auto &Shard = Service.getSharedCache().getShardForFilename(*FilenameForLookup); if (const auto *ShardRealPath = Shard.findRealPathByFilename(*FilenameForLookup)) { - const auto &RealPath = LocalCache.insertRealPathForFilename( - *FilenameForLookup, *ShardRealPath); - return HandleCachedRealPath(RealPath); + Local.RealPath = ShardRealPath; + return HandleCachedRealPath(*Local.RealPath); } // If we don't know the real path, compute it... @@ -473,8 +592,8 @@ DependencyScanningWorkerFilesystem::getRealPath(const Twine &Path, // whatever is in the shared cache into the local one. const auto &RealPath = Shard.getOrEmplaceRealPathForFilename( *FilenameForLookup, ComputedRealPath); - return HandleCachedRealPath( - LocalCache.insertRealPathForFilename(*FilenameForLookup, RealPath)); + Local.RealPath = &RealPath; + return HandleCachedRealPath(*Local.RealPath); } std::error_code DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory( diff --git a/clang/unittests/DependencyScanning/CMakeLists.txt b/clang/unittests/DependencyScanning/CMakeLists.txt index 4f2a36067209c..53d257a76784d 100644 --- a/clang/unittests/DependencyScanning/CMakeLists.txt +++ b/clang/unittests/DependencyScanning/CMakeLists.txt @@ -4,6 +4,8 @@ add_clang_unittest(ClangDependencyScanningTests CLANG_LIBS clangDependencyScanning clangFrontend # For TextDiagnosticPrinter. + LINK_LIBS + LLVMTestingSupport LLVM_COMPONENTS ${LLVM_TARGETS_TO_BUILD} Option diff --git a/clang/unittests/DependencyScanning/DependencyScanningFilesystemTest.cpp b/clang/unittests/DependencyScanning/DependencyScanningFilesystemTest.cpp index 85525ba5324ca..8108200d0b21c 100644 --- a/clang/unittests/DependencyScanning/DependencyScanningFilesystemTest.cpp +++ b/clang/unittests/DependencyScanning/DependencyScanningFilesystemTest.cpp @@ -10,10 +10,39 @@ #include "clang/DependencyScanning/DependencyScanningService.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/VirtualFileSystem.h" +#include "llvm/Testing/Support/SupportHelpers.h" #include "gtest/gtest.h" +#include <condition_variable> +#include <mutex> +#include <thread> using namespace clang::dependencies; +namespace { + +/// Releases all waiting threads simultaneously so that the worker logic can +/// be observed under maximal concurrency rather than a thread-spawn cascade. +struct StartBarrier { + std::mutex M; + std::condition_variable CV; + bool Released = false; + + void wait() { + std::unique_lock<std::mutex> Lock(M); + CV.wait(Lock, [&] { return Released; }); + } + + void release() { + { + std::lock_guard<std::mutex> Lock(M); + Released = true; + } + CV.notify_all(); + } +}; + +} // namespace + TEST(DependencyScanningFilesystem, OpenFileAndGetBufferRepeatedly) { auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>(); InMemoryFS->setCurrentWorkingDirectory("/"); @@ -334,3 +363,213 @@ TEST(DependencyScanningFilesystem, DoNotDiagnoseDirSizeChange) { auto InvalidEntries = Service.getSharedCache().getOutOfDateEntries(*FS); EXPECT_EQ(InvalidEntries.size(), 0u); } + +TEST(DependencyScanningWorkerFilesystem, ConcurrentSameFilenameDeduplicates) { + llvm::unittest::TempDir TmpDir("dswf-same-filename", /*Unique=*/true); + llvm::unittest::TempFile TmpFile(TmpDir.path("foo.c"), /*Suffix=*/"", + /*Contents=*/"hello"); + + auto TracingFS = + llvm::makeIntrusiveRefCnt<llvm::vfs::AtomicTracingFileSystem>( + llvm::vfs::getRealFileSystem()); + DependencyScanningService Service({}); + + constexpr unsigned NumWorkers = 16; + std::vector<std::unique_ptr<DependencyScanningWorkerFilesystem>> Workers; + Workers.reserve(NumWorkers); + for (unsigned I = 0; I < NumWorkers; ++I) + Workers.push_back(std::make_unique<DependencyScanningWorkerFilesystem>( + Service, TracingFS)); + + StartBarrier Barrier; + std::vector<std::thread> Threads; + std::vector<llvm::ErrorOr<EntryRef>> Results( + NumWorkers, llvm::ErrorOr<EntryRef>(std::error_code{})); + Threads.reserve(NumWorkers); + for (unsigned I = 0; I < NumWorkers; ++I) { + Threads.emplace_back([&, I] { + Barrier.wait(); + Results[I] = Workers[I]->getOrCreateFileSystemEntry(TmpFile.path()); + }); + } + Barrier.release(); + for (auto &T : Threads) + T.join(); + + EXPECT_EQ(TracingFS->NumStatusCalls.load(), 1u); + EXPECT_EQ(TracingFS->NumOpenFileForReadCalls.load(), 1u); + + // All workers must have observed the same underlying entry. + ASSERT_TRUE(Results[0]); + llvm::sys::fs::UniqueID FirstUID = Results[0]->getStatus().getUniqueID(); + const char *FirstContents = Results[0]->getContents().data(); + for (unsigned I = 0; I < NumWorkers; ++I) { + ASSERT_TRUE(Results[I]) << "worker " << I << " failed"; + EXPECT_EQ(Results[I]->getStatus().getUniqueID(), FirstUID); + EXPECT_EQ(Results[I]->getContents().data(), FirstContents); + } +} + +TEST(DependencyScanningWorkerFilesystem, + ConcurrentSameUIDDifferentFilenamesDeduplicatesOpen) { + // Use a real on-disk file plus a hard link so the two filenames share a + // UniqueID, exercising the per-UID slot. + llvm::unittest::TempDir TmpDir("dswf-same-uid", /*Unique=*/true); + llvm::SmallString<128> RealPath = TmpDir.path("real.c"); + llvm::SmallString<128> AliasPath = TmpDir.path("alias.c"); + llvm::unittest::TempFile TmpFile(RealPath, /*Suffix=*/"", /*Contents=*/"hi"); + llvm::unittest::TempLink TmpLink(RealPath, AliasPath); + + auto TracingFS = + llvm::makeIntrusiveRefCnt<llvm::vfs::AtomicTracingFileSystem>( + llvm::vfs::getRealFileSystem()); + DependencyScanningService Service({}); + + constexpr unsigned NumWorkers = 16; + std::vector<std::unique_ptr<DependencyScanningWorkerFilesystem>> Workers; + Workers.reserve(NumWorkers); + for (unsigned I = 0; I < NumWorkers; ++I) + Workers.push_back(std::make_unique<DependencyScanningWorkerFilesystem>( + Service, TracingFS)); + + StartBarrier Barrier; + std::vector<std::thread> Threads; + std::vector<llvm::ErrorOr<EntryRef>> Results( + NumWorkers, llvm::ErrorOr<EntryRef>(std::error_code{})); + Threads.reserve(NumWorkers); + for (unsigned I = 0; I < NumWorkers; ++I) { + llvm::StringRef Path = + (I % 2 == 0) ? llvm::StringRef(RealPath) : llvm::StringRef(AliasPath); + Threads.emplace_back([&, I, Path] { + Barrier.wait(); + Results[I] = Workers[I]->getOrCreateFileSystemEntry(Path); + }); + } + Barrier.release(); + for (auto &T : Threads) + T.join(); + + // Each filename's slot dedupes its own stats; with two filenames we expect + // at most two stats. The UID slot then collapses the actual reads. + EXPECT_LE(TracingFS->NumStatusCalls.load(), 2u); + EXPECT_EQ(TracingFS->NumOpenFileForReadCalls.load(), 1u); + + ASSERT_TRUE(Results[0]); + llvm::sys::fs::UniqueID FirstUID = Results[0]->getStatus().getUniqueID(); + for (unsigned I = 0; I < NumWorkers; ++I) { + ASSERT_TRUE(Results[I]) << "worker " << I << " failed"; + EXPECT_EQ(Results[I]->getStatus().getUniqueID(), FirstUID); + } +} + +TEST(DependencyScanningWorkerFilesystem, ConcurrentNegativeStatDeduplicates) { + // Construct a path inside a temporary directory but never create the + // file, so concurrent stat() calls land on the negative-stat path through + // the real filesystem. + llvm::unittest::TempDir TmpDir("dswf-negative", /*Unique=*/true); + llvm::SmallString<128> MissingPath = TmpDir.path("missing.h"); + + auto TracingFS = + llvm::makeIntrusiveRefCnt<llvm::vfs::AtomicTracingFileSystem>( + llvm::vfs::getRealFileSystem()); + // The dedup under test only applies when negative stats are cached; enable + // caching and use a path whose extension is eligible for it. + DependencyScanningServiceOptions Opts; + Opts.CacheNegativeStats = true; + DependencyScanningService Service(std::move(Opts)); + + constexpr unsigned NumWorkers = 16; + std::vector<std::unique_ptr<DependencyScanningWorkerFilesystem>> Workers; + Workers.reserve(NumWorkers); + for (unsigned I = 0; I < NumWorkers; ++I) + Workers.push_back(std::make_unique<DependencyScanningWorkerFilesystem>( + Service, TracingFS)); + + StartBarrier Barrier; + std::vector<std::thread> Threads; + std::vector<llvm::ErrorOr<llvm::vfs::Status>> Results( + NumWorkers, llvm::ErrorOr<llvm::vfs::Status>(std::error_code{})); + Threads.reserve(NumWorkers); + for (unsigned I = 0; I < NumWorkers; ++I) { + Threads.emplace_back([&, I] { + Barrier.wait(); + Results[I] = Workers[I]->status(MissingPath); + }); + } + Barrier.release(); + for (auto &T : Threads) + T.join(); + + EXPECT_EQ(TracingFS->NumStatusCalls.load(), 1u); + EXPECT_EQ(TracingFS->NumOpenFileForReadCalls.load(), 0u); + + // Every worker observed the same negative result. + ASSERT_FALSE(Results[0]); + std::error_code FirstError = Results[0].getError(); + for (unsigned I = 0; I < NumWorkers; ++I) { + ASSERT_FALSE(Results[I]) << "worker " << I << " unexpectedly succeeded"; + EXPECT_EQ(Results[I].getError(), FirstError); + } +} + +TEST(DependencyScanningWorkerFilesystem, + ConcurrentUncachedNegativeStatIsSharedButNotPersisted) { + // Same as above, but with negative-stat caching disabled. Concurrent queries + // that overlap the producer's in-flight stat still adopt its negative + // result (sharing an answer for overlapping queries is not "caching"), so no + // worker opens the missing file. Because the result is not persisted, a + // later, separate query must re-run the stat. + llvm::unittest::TempDir TmpDir("dswf-uncached-negative", /*Unique=*/true); + llvm::SmallString<128> MissingPath = TmpDir.path("missing.h"); + + auto TracingFS = + llvm::makeIntrusiveRefCnt<llvm::vfs::AtomicTracingFileSystem>( + llvm::vfs::getRealFileSystem()); + DependencyScanningServiceOptions Opts; + Opts.CacheNegativeStats = false; + DependencyScanningService Service(std::move(Opts)); + + constexpr unsigned NumWorkers = 16; + std::vector<std::unique_ptr<DependencyScanningWorkerFilesystem>> Workers; + Workers.reserve(NumWorkers); + for (unsigned I = 0; I < NumWorkers; ++I) + Workers.push_back(std::make_unique<DependencyScanningWorkerFilesystem>( + Service, TracingFS)); + + StartBarrier Barrier; + std::vector<std::thread> Threads; + std::vector<llvm::ErrorOr<llvm::vfs::Status>> Results( + NumWorkers, llvm::ErrorOr<llvm::vfs::Status>(std::error_code{})); + Threads.reserve(NumWorkers); + for (unsigned I = 0; I < NumWorkers; ++I) { + Threads.emplace_back([&, I] { + Barrier.wait(); + Results[I] = Workers[I]->status(MissingPath); + }); + } + Barrier.release(); + for (auto &T : Threads) + T.join(); + + // A negative stat never opens the file, regardless of how many workers + // produced versus adopted. Producers are bounded by the worker count; + // adopters add none. With caching off the result is not persisted, so the + // count is not pinned to one, but it can never exceed one stat per worker. + EXPECT_EQ(TracingFS->NumOpenFileForReadCalls.load(), 0u); + unsigned StatsAfterBurst = TracingFS->NumStatusCalls.load(); + EXPECT_GE(StatsAfterBurst, 1u); + EXPECT_LE(StatsAfterBurst, NumWorkers); + + // Every worker observed the same negative result. + ASSERT_FALSE(Results[0]); + std::error_code FirstError = Results[0].getError(); + for (unsigned I = 0; I < NumWorkers; ++I) { + ASSERT_FALSE(Results[I]) << "worker " << I << " unexpectedly succeeded"; + EXPECT_EQ(Results[I].getError(), FirstError); + } + + // The uncached negative was shared, not cached: a later, separate query + // re-runs the stat rather than adopting a persisted miss. + EXPECT_FALSE(Workers[0]->status(MissingPath)); + EXPECT_EQ(TracingFS->NumStatusCalls.load(), StatsAfterBurst + 1); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
