https://github.com/jansvoboda11 updated https://github.com/llvm/llvm-project/pull/205632
>From bc6447f27fdf093191b8fa1f9db57d62abbb01b7 Mon Sep 17 00:00:00 2001 From: Jan Svoboda <[email protected]> Date: Wed, 24 Jun 2026 11:03:05 -0700 Subject: [PATCH 1/2] [clang][deps] Avoid `CompilerInvocation` copies When constructing the dependency graph for compilation caching, the dependency scanner needs to do some extra operations on the compiler invocations. Historically, these have not utilized the copy-on-write variant well. This patch takes care to minimize `CompilerInvocation` copies, which improves incremental scans with populated up-to-date scanning module cache by 16-18%. Together with https://github.com/llvm/llvm-project/pull/203350 which operates in the same space, wall-times are improved by 1.54x and instruction counts by 1.66x. --- .../DependencyActionController.h | 2 +- .../clang/Frontend/CompilerInvocation.h | 80 ++++++++++++++++++- .../DependencyScannerImpl.cpp | 12 ++- clang/lib/Frontend/CompilerInvocation.cpp | 23 ++++++ clang/lib/Tooling/DependencyScanningTool.cpp | 6 +- 5 files changed, 118 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/DependencyScanning/DependencyActionController.h b/clang/include/clang/DependencyScanning/DependencyActionController.h index 024b0de9048ec..023f080b767cc 100644 --- a/clang/include/clang/DependencyScanning/DependencyActionController.h +++ b/clang/include/clang/DependencyScanning/DependencyActionController.h @@ -61,7 +61,7 @@ class DependencyActionController { /// Finalizes the scan instance and modifies the resulting TU invocation. /// Returns true on success, false on failure. virtual bool finalize(CompilerInstance &ScanInstance, - CompilerInvocation &NewInvocation) { + CowCompilerInvocation &NewInvocation) { return true; } diff --git a/clang/include/clang/Frontend/CompilerInvocation.h b/clang/include/clang/Frontend/CompilerInvocation.h index 03097aefacf50..a3bd41a70a4ec 100644 --- a/clang/include/clang/Frontend/CompilerInvocation.h +++ b/clang/include/clang/Frontend/CompilerInvocation.h @@ -21,8 +21,10 @@ #include "clang/Frontend/MigratorOptions.h" #include "clang/Frontend/PreprocessorOutputOptions.h" #include "clang/StaticAnalyzer/Core/AnalyzerOptions.h" -#include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/ADT/ScopeExit.h" + #include <memory> #include <string> @@ -127,6 +129,9 @@ class CompilerInvocationBase { /// prevent creation of the reference-counted option objects. struct EmptyConstructor {}; + /// Tag for the shallow-copy constructor below. + struct ShallowConstructor {}; + CompilerInvocationBase(); CompilerInvocationBase(EmptyConstructor) {} CompilerInvocationBase(const CompilerInvocationBase &X) = delete; @@ -251,6 +256,15 @@ class CompilerInvocation : public CompilerInvocationBase { explicit CompilerInvocation(const CowCompilerInvocation &X); CompilerInvocation &operator=(const CowCompilerInvocation &X); + /// Move-construct/move-assign from a \c CowCompilerInvocation. Steals the + /// (potentially copy-on-written) option group pointers without deep-copying; + /// \p X is left empty. Useful to receive results of mutating a temporary + /// Cow alias back into a \c CompilerInvocation. + /// @{ + explicit CompilerInvocation(CowCompilerInvocation &&X); + CompilerInvocation &operator=(CowCompilerInvocation &&X); + /// @} + /// Const getters. /// @{ // Note: These need to be pulled in manually. Otherwise, they get hidden by @@ -293,6 +307,22 @@ class CompilerInvocation : public CompilerInvocationBase { ssaf::SSAFOptions &getSSAFOpts() { return *SSAFOpts; } /// @} + /// Invokes the \a Fn with CowCompilerInvocation representing \c this. + /// The \a Fn must not directly modify \c this. + /// The provided \c CowCompilerInvocation must not escape \a Fn. + template <class R> + R withCowRef(llvm::function_ref<R(CowCompilerInvocation &)> Fn); + template <class R> + R withCowRef(llvm::function_ref<R(const CowCompilerInvocation &)> Fn) const; + + /// Visitation. + /// @{ + /// Visits paths stored in the invocation. The callback may return true to + /// short-circuit the visitation, or return false to continue visiting. This + /// is allowed to mutate the visited paths. + void visitPaths(llvm::function_ref<bool(std::string &)> Callback); + /// @} + /// Create a compiler invocation from a list of input options. /// \returns true on success. /// @@ -385,6 +415,16 @@ class CowCompilerInvocation : public CompilerInvocationBase { CowCompilerInvocation(CompilerInvocation &&X) : CompilerInvocationBase(std::move(X)) {} + /// Construct a CowCompilerInvocation that aliases the option storage of \p + /// X without deep-copying. Subsequent mutations through getMut*Opts() will + /// copy-on-write per group as usual, leaving \p X unaffected. The caller + /// must guarantee that \p X is not mutated for the lifetime of the + /// constructed invocation. + CowCompilerInvocation(ShallowConstructor, const CompilerInvocation &X) + : CompilerInvocationBase(EmptyConstructor{}) { + shallow_copy_assign(X); + } + // Const getters are inherited from the base class. /// Mutable getters. @@ -404,8 +444,46 @@ class CowCompilerInvocation : public CompilerInvocationBase { PreprocessorOutputOptions &getMutPreprocessorOutputOpts(); ssaf::SSAFOptions &getMutSSAFOpts(); /// @} + + /// Visits paths stored in the invocation, allowing the callback to mutate + /// them. To preserve the copy-on-write invariant for groups whose paths the + /// caller might modify, this ensures unique ownership of every option group + /// up front; if the callback only inspects (and does not mutate) the paths, + /// the const \c visitPaths overload should be used instead to avoid those + /// per-group copies. + void visitMutPaths(llvm::function_ref<bool(std::string &)> Callback); }; +template <class R> +R CompilerInvocation::withCowRef( + llvm::function_ref<R(CowCompilerInvocation &)> Fn) { + // We use moves to avoid bumping the ref-count of the shared_ptr that holds + // individual options. Since we expect \a Fn to actually modify \c CowRef, + // this prevents temporary copies. + CowCompilerInvocation CowRef = std::move(*this); + llvm::scope_exit Mutate([&]() { *this = std::move(CowRef); }); + return Fn(CowRef); +} + +template <class R> +R CompilerInvocation::withCowRef( + llvm::function_ref<R(const CowCompilerInvocation &)> Fn) const { + // We use the shallow constructor. Since \a Fn cannot modify \c CowRef, no + // copies will be created, despite the bump to the ref-count of the shared_ptr + // that holds individual options. + CowCompilerInvocation CowRef(ShallowConstructor{}, *this); + return Fn(CowRef); +} + +inline CompilerInvocation::CompilerInvocation(CowCompilerInvocation &&X) + : CompilerInvocationBase(std::move(X)) {} + +inline CompilerInvocation & +CompilerInvocation::operator=(CowCompilerInvocation &&X) { + CompilerInvocationBase::operator=(std::move(X)); + return *this; +} + IntrusiveRefCntPtr<llvm::vfs::FileSystem> createVFSFromCompilerInvocation(const CompilerInvocation &CI, DiagnosticsEngine &Diags); diff --git a/clang/lib/DependencyScanning/DependencyScannerImpl.cpp b/clang/lib/DependencyScanning/DependencyScannerImpl.cpp index dc3dbe3603c01..68fda9227dfcb 100644 --- a/clang/lib/DependencyScanning/DependencyScannerImpl.cpp +++ b/clang/lib/DependencyScanning/DependencyScannerImpl.cpp @@ -713,7 +713,11 @@ bool DependencyScanningAction::runInvocation( if (MDC) MDC->applyDiscoveredDependencies(*OriginalInvocation); - if (!Controller.finalize(ScanInstance, *OriginalInvocation)) + bool Success = OriginalInvocation->withCowRef<bool>( + [&](CowCompilerInvocation &CowOriginalInvocation) { + return Controller.finalize(ScanInstance, CowOriginalInvocation); + }); + if (!Success) return false; Consumer.handleBuildCommand( @@ -791,7 +795,11 @@ bool DependencyScanningAction::runInvocation( MDC->applyDiscoveredDependencies(*OriginalInvocation); } - if (!Controller.finalize(ScanInstance, *OriginalInvocation)) + bool Success = OriginalInvocation->withCowRef<bool>( + [&](CowCompilerInvocation &CowOriginalInvocation) { + return Controller.finalize(ScanInstance, CowOriginalInvocation); + }); + if (!Success) return false; Consumer.handleBuildCommand( diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index dfde7b756dbff..e2260eb0d078a 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -5435,6 +5435,29 @@ void CompilerInvocationBase::visitPaths( [&Callback](std::string &Path) { return Callback(StringRef(Path)); }); } +void CowCompilerInvocation::visitMutPaths( + llvm::function_ref<bool(std::string &)> Callback) { + // Ensure exclusive ownership of every option group, so that visitPathsImpl() + // doesn't affect any other invocations. + // FIXME: Do this only if \c Callback does decide to modify any strings in an + // option group. + (void)ensureOwned(LangOpts); + (void)ensureOwned(TargetOpts); + (void)ensureOwned(DiagnosticOpts); + (void)ensureOwned(HSOpts); + (void)ensureOwned(PPOpts); + (void)ensureOwned(AnalyzerOpts); + (void)ensureOwned(MigratorOpts); + (void)ensureOwned(APINotesOpts); + (void)ensureOwned(CodeGenOpts); + (void)ensureOwned(FSOpts); + (void)ensureOwned(FrontendOpts); + (void)ensureOwned(DependencyOutputOpts); + (void)ensureOwned(PreprocessorOutputOpts); + (void)ensureOwned(SSAFOpts); + visitPathsImpl(Callback); +} + void CompilerInvocationBase::generateCC1CommandLine( ArgumentConsumer Consumer) const { llvm::Triple T(getTargetOpts().Triple); diff --git a/clang/lib/Tooling/DependencyScanningTool.cpp b/clang/lib/Tooling/DependencyScanningTool.cpp index d55367107862d..11b225830c2fc 100644 --- a/clang/lib/Tooling/DependencyScanningTool.cpp +++ b/clang/lib/Tooling/DependencyScanningTool.cpp @@ -587,7 +587,11 @@ bool CompilerInstanceWithContext::computeDependencies( MDC->run(Consumer); MDC->applyDiscoveredDependencies(ModuleInvocation); - if (!Controller.finalize(CI, ModuleInvocation)) + bool Success = ModuleInvocation.withCowRef<bool>( + [&](CowCompilerInvocation &CowModuleInvocation) { + return Controller.finalize(CI, CowModuleInvocation); + }); + if (!Success) return false; Consumer.handleBuildCommand( >From ce82c7d9780e917f7b2b30a56cca0921ffbab4e4 Mon Sep 17 00:00:00 2001 From: Jan Svoboda <[email protected]> Date: Fri, 26 Jun 2026 09:00:09 -0700 Subject: [PATCH 2/2] Unit tests --- .../Frontend/CompilerInvocationTest.cpp | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/clang/unittests/Frontend/CompilerInvocationTest.cpp b/clang/unittests/Frontend/CompilerInvocationTest.cpp index 887fbc5938333..2c0c6972cef3f 100644 --- a/clang/unittests/Frontend/CompilerInvocationTest.cpp +++ b/clang/unittests/Frontend/CompilerInvocationTest.cpp @@ -165,6 +165,73 @@ TEST(CompilerInvocationTest, CopyOnWriteAssignment) { EXPECT_EQ(B.getFrontendOpts().OutputFile, "x.o"); } +TEST(CompilerInvocationTest, WithConstCowRef) { + CompilerInvocation CI; + CI.getHeaderSearchOpts().ModuleCachePath = "mcp"; + + HeaderSearchOptions *HSOpts = &CI.getHeaderSearchOpts(); + + CI.withCowRef<void>([](const CowCompilerInvocation &CowCI) { + // Values stored in the original invocation are reflected in cow. + EXPECT_EQ(CowCI.getHeaderSearchOpts().ModuleCachePath, "mcp"); + }); + + // Creating const cow reference does not make a copy. + EXPECT_EQ(HSOpts, &CI.getHeaderSearchOpts()); +} + +TEST(CompilerInvocationTest, WithMutCowRef) { + CompilerInvocation CI; + CI.getHeaderSearchOpts().ModuleCachePath = "mcp"; + CI.getLangOpts().Modules = true; + + HeaderSearchOptions *HSOpts = &CI.getHeaderSearchOpts(); + LangOptions *LangOpts = &CI.getLangOpts(); + + CI.withCowRef<void>([](CowCompilerInvocation &CowCI) { + // Values stored in the original invocation are reflected in cow. + EXPECT_EQ(CowCI.getHeaderSearchOpts().ModuleCachePath, "mcp"); + // Values can be mutated. + CowCI.getMutLangOpts().Modules = false; + }); + + // Reading options class on a non-const cow reference does not make a copy. + EXPECT_EQ(HSOpts, &CI.getHeaderSearchOpts()); + // Writing options class on a non-const cow reference does not make a copy. + EXPECT_EQ(LangOpts, &CI.getLangOpts()); + // Writing options class on a non-const cow reference modifies the original. + EXPECT_EQ(CI.getLangOpts().Modules, false); +} + +TEST(CompilerInvocationTest, CopyOnWriteVisitPaths) { + CowCompilerInvocation A; + A.getMutHeaderSearchOpts().ModuleCachePath = "mcp"; + A.getMutLangOpts().Modules = true; + + CowCompilerInvocation B(A); + + const HeaderSearchOptions *HSOpts = &B.getHeaderSearchOpts(); + const LangOptions *LangOpts = &B.getLangOpts(); + B.visitMutPaths([](std::string &Path) { + if (Path == "mcp") { + Path = "pcm"; + return true; + } + return false; + }); + + // Modifying a path copies and modifies only one instance of the invocation. + EXPECT_NE(HSOpts, &B.getHeaderSearchOpts()); + EXPECT_EQ(B.getHeaderSearchOpts().ModuleCachePath, "pcm"); + // And the other instance remains unmodified. + EXPECT_EQ(HSOpts, &A.getHeaderSearchOpts()); + EXPECT_EQ(A.getHeaderSearchOpts().ModuleCachePath, "mcp"); + + // FIXME: Make this work: Unmodified options are not copied. + // EXPECT_EQ(LangOpts, &B.getLangOpts()); + (void)LangOpts; +} + // Boolean option with a keypath that defaults to true. // The only flag with a negative spelling can set the keypath to false. _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
