https://github.com/zyn0217 updated https://github.com/llvm/llvm-project/pull/188421
>From 58b0a6978f604aeab04e0fb027ea3b2c896e38a6 Mon Sep 17 00:00:00 2001 From: Younan Zhang <[email protected]> Date: Tue, 24 Mar 2026 19:09:11 +0800 Subject: [PATCH 1/5] [Clang] Improve concept performance 1/N The concept parameter mapping patch significantly impacted performance in scenarios where concepts are heavily used, even with the addition of atomic-expression-level caching. After normalization, we often end up with large atomic expressions containing numerous duplicate and complex template parameter mappings. Previously, we were substituting and checking these repeatedly, which was highly inefficient. We now cache these substitution results within TemplateInstantiator. This provides us some performance improvement, as in these regression cases: Regressions clang-21 clang-22 This patch usb_ids_gen.cpp 1.41s 3.90s 2.45s inspector_style_resolver.cpp 18.21s 22.43s 19.01s While performance has not yet so good as clang-21, I think there is still room for future improvements. E.g. We can cache invalid results for SFINAE diagnostics and avoiding redundant pack unpacking, etc. --- clang/include/clang/Sema/Sema.h | 3 +++ clang/lib/Sema/SemaConcept.cpp | 7 +++++++ clang/lib/Sema/SemaTemplateInstantiate.cpp | 23 +++++++++++++++++++++- 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index a214a7aa9147b..3f18c97fbc4d4 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -15091,6 +15091,9 @@ class Sema final : public SemaBase { UnsubstitutedConstraintSatisfactionCacheResult> UnsubstitutedConstraintSatisfactionCache; + llvm::DenseMap<llvm::FoldingSetNodeID, TemplateArgumentLoc> + *CurrentCachedTemplateArgs = nullptr; + private: /// Caches pairs of template-like decls whose associated constraints were /// checked for subsumption and whether or not the first's constraints did in diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index 9c4f52dd7150c..6ae678fe23700 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -487,6 +487,10 @@ class ConstraintSatisfactionChecker { // right context. ConceptDecl *ParentConcept = nullptr; +public: + llvm::DenseMap<llvm::FoldingSetNodeID, TemplateArgumentLoc> + CachedTemplateArgs; + private: ExprResult EvaluateAtomicConstraint(const Expr *AtomicExpr, @@ -658,6 +662,9 @@ ConstraintSatisfactionChecker::SubstitutionInTemplateArguments( ? Constraint.getPackSubstitutionIndex() : PackSubstitutionIndex); + llvm::SaveAndRestore PushTemplateArgsCache(S.CurrentCachedTemplateArgs, + &CachedTemplateArgs); + if (S.SubstTemplateArgumentsInParameterMapping( Constraint.getParameterMapping(), Constraint.getBeginLoc(), MLTAL, SubstArgs)) { diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 34ed5dffa11b4..194d5ef0ba06a 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1329,6 +1329,8 @@ namespace { // Whether an incomplete substituion should be treated as an error. bool BailOutOnIncomplete; + std::optional<llvm::FoldingSetNodeID> TemplateArgsHashValue; + // CWG2770: Function parameters should be instantiated when they are // needed by a satisfaction check of an atomic constraint or // (recursively) by another function parameter. @@ -1358,7 +1360,12 @@ namespace { SourceLocation Loc, const MultiLevelTemplateArgumentList &TemplateArgs) : inherited(SemaRef), TemplateArgs(TemplateArgs), Loc(Loc), - BailOutOnIncomplete(false) {} + BailOutOnIncomplete(false) { + auto &V = TemplateArgsHashValue.emplace(); + for (auto &Level : TemplateArgs) + for (auto &Arg : Level.Args) + Arg.Profile(V, SemaRef.Context); + } /// Determine whether the given type \p T has already been /// transformed. @@ -1611,6 +1618,7 @@ namespace { } return Type; } + // Override the default version to handle a rewrite-template-arg-pack case // for building a deduction guide. bool TransformTemplateArgument(const TemplateArgumentLoc &Input, @@ -1618,6 +1626,19 @@ namespace { bool Uneval = false) { const TemplateArgument &Arg = Input.getArgument(); std::vector<TemplateArgument> TArgs; + if (auto *Cache = SemaRef.CurrentCachedTemplateArgs; + TemplateArgsHashValue && Cache) { + llvm::FoldingSetNodeID ID = *TemplateArgsHashValue; + Input.getArgument().Profile(ID, SemaRef.Context); + if (auto Iter = Cache->find(ID); Iter != Cache->end()) { + Output = Iter->second; + return false; + } + bool Ret = inherited::TransformTemplateArgument(Input, Output, Uneval); + if (!Ret) + Cache->insert({ID, Output}); + return Ret; + } switch (Arg.getKind()) { case TemplateArgument::Pack: assert(SemaRef.CodeSynthesisContexts.empty() || >From 3d6e5b5651e9bb9d6f044567e148d85add587962 Mon Sep 17 00:00:00 2001 From: Younan Zhang <[email protected]> Date: Wed, 25 Mar 2026 16:01:43 +0800 Subject: [PATCH 2/5] Fix libc++ test --- clang/lib/Sema/SemaTemplateInstantiate.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 194d5ef0ba06a..d6c199f12ac4f 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1629,6 +1629,7 @@ namespace { if (auto *Cache = SemaRef.CurrentCachedTemplateArgs; TemplateArgsHashValue && Cache) { llvm::FoldingSetNodeID ID = *TemplateArgsHashValue; + ID.AddInteger(SemaRef.ArgPackSubstIndex.toInternalRepresentation()); Input.getArgument().Profile(ID, SemaRef.Context); if (auto Iter = Cache->find(ID); Iter != Cache->end()) { Output = Iter->second; >From 5e563c8acd195a045119d3e0382f1b9154a172af Mon Sep 17 00:00:00 2001 From: Younan Zhang <[email protected]> Date: Thu, 2 Apr 2026 16:59:14 +0800 Subject: [PATCH 3/5] Documentation --- clang/docs/InternalsManual.rst | 3 ++- clang/docs/ReleaseNotes.rst | 1 + clang/include/clang/Sema/Sema.h | 12 ++++++++++++ clang/lib/Sema/SemaConcept.cpp | 3 ++- clang/lib/Sema/SemaTemplateInstantiate.cpp | 13 ++++++++----- 5 files changed, 25 insertions(+), 7 deletions(-) diff --git a/clang/docs/InternalsManual.rst b/clang/docs/InternalsManual.rst index 0694bf02b4996..764e7d8dddbcc 100644 --- a/clang/docs/InternalsManual.rst +++ b/clang/docs/InternalsManual.rst @@ -2910,7 +2910,8 @@ After substitution in the mapping, we substitute in the constraint expression using that copy of the ``MultiLevelTemplateArgumentList``, and then evaluate it. Because this is expensive, it is cached in -``UnsubstitutedConstraintSatisfactionCache``. +``UnsubstitutedConstraintSatisfactionCache``. Also we will cache the instantiation +result of parameter mappings to avoid unnecessary semantic checking. Any error during satisfaction is recorded in ``ConstraintSatisfaction``. for nested requirements, ``ConstraintSatisfaction`` is stored (including diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 6a2632543d337..b2288c7379ed0 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -570,6 +570,7 @@ OpenMP Support Improvements ^^^^^^^^^^^^ +- Improved substitution performance in concept checking. (#GH172266) Additional Information ====================== diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 3f18c97fbc4d4..f1a02701cc038 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -15091,6 +15091,18 @@ class Sema final : public SemaBase { UnsubstitutedConstraintSatisfactionCacheResult> UnsubstitutedConstraintSatisfactionCache; + /// Cache the instantiation results of template parameter mappings within + /// concepts. Substituting into normalized concepts can be extremely expensive + /// due to the redundancy of template parameters. This cache is intended for + /// use by TemplateInstantiator to avoid redundant semantic checking. + /// + /// NOTE: Cached results may lose TypeLoc fidelity, such as SourceLocations. + /// As such, this is only applied to concepts and valid template arguments, + /// where non-type information from TypeLoc is less critical for subsequent + /// checking. + /// + /// FIXME: Clang should learn to avoid duplicate instantiations more broadly + /// for performance. llvm::DenseMap<llvm::FoldingSetNodeID, TemplateArgumentLoc> *CurrentCachedTemplateArgs = nullptr; diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index 6ae678fe23700..dbb3625f19bb1 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -487,7 +487,8 @@ class ConstraintSatisfactionChecker { // right context. ConceptDecl *ParentConcept = nullptr; -public: + // This is for TemplateInstantiator to not instantiate the same template + // parameter mapping many times, in order to improve substitution performance. llvm::DenseMap<llvm::FoldingSetNodeID, TemplateArgumentLoc> CachedTemplateArgs; diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index d6c199f12ac4f..8cc7a1f5574fd 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1620,17 +1620,18 @@ namespace { } // Override the default version to handle a rewrite-template-arg-pack case - // for building a deduction guide. + // for building a deduction guide, and to cache substitution results in + // concepts checking. bool TransformTemplateArgument(const TemplateArgumentLoc &Input, TemplateArgumentLoc &Output, bool Uneval = false) { const TemplateArgument &Arg = Input.getArgument(); - std::vector<TemplateArgument> TArgs; if (auto *Cache = SemaRef.CurrentCachedTemplateArgs; - TemplateArgsHashValue && Cache) { + Cache && TemplateArgsHashValue) { llvm::FoldingSetNodeID ID = *TemplateArgsHashValue; ID.AddInteger(SemaRef.ArgPackSubstIndex.toInternalRepresentation()); - Input.getArgument().Profile(ID, SemaRef.Context); + // FIXME: We can lose sugars when profiling Arg. + Arg.Profile(ID, SemaRef.Context); if (auto Iter = Cache->find(ID); Iter != Cache->end()) { Output = Iter->second; return false; @@ -1641,7 +1642,8 @@ namespace { return Ret; } switch (Arg.getKind()) { - case TemplateArgument::Pack: + case TemplateArgument::Pack: { + std::vector<TemplateArgument> TArgs; assert(SemaRef.CodeSynthesisContexts.empty() || SemaRef.CodeSynthesisContexts.back().Kind == Sema::CodeSynthesisContext::BuildingDeductionGuides); @@ -1659,6 +1661,7 @@ namespace { TemplateArgument(llvm::ArrayRef(TArgs).copy(SemaRef.Context)), QualType(), SourceLocation{}); return false; + } default: break; } >From b69bfabbba29620b377d4826c5a65f60d4050aa3 Mon Sep 17 00:00:00 2001 From: Younan Zhang <[email protected]> Date: Tue, 7 Apr 2026 17:32:56 +0800 Subject: [PATCH 4/5] Address feedback --- clang/include/clang/Sema/Sema.h | 8 -------- clang/lib/Sema/SemaTemplateInstantiate.cpp | 9 ++++++++- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index f1a02701cc038..5a2a2e71a0e7a 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -15095,14 +15095,6 @@ class Sema final : public SemaBase { /// concepts. Substituting into normalized concepts can be extremely expensive /// due to the redundancy of template parameters. This cache is intended for /// use by TemplateInstantiator to avoid redundant semantic checking. - /// - /// NOTE: Cached results may lose TypeLoc fidelity, such as SourceLocations. - /// As such, this is only applied to concepts and valid template arguments, - /// where non-type information from TypeLoc is less critical for subsequent - /// checking. - /// - /// FIXME: Clang should learn to avoid duplicate instantiations more broadly - /// for performance. llvm::DenseMap<llvm::FoldingSetNodeID, TemplateArgumentLoc> *CurrentCachedTemplateArgs = nullptr; diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 8cc7a1f5574fd..78896a3953975 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1630,8 +1630,15 @@ namespace { Cache && TemplateArgsHashValue) { llvm::FoldingSetNodeID ID = *TemplateArgsHashValue; ID.AddInteger(SemaRef.ArgPackSubstIndex.toInternalRepresentation()); - // FIXME: We can lose sugars when profiling Arg. Arg.Profile(ID, SemaRef.Context); + // FIXME: We should ideally only cache and restore TemplateArgument and + // rebuild the uncached TypeLoc separately in place. However this is + // nearly impossible given the current architecture of TreeTransform so + // that we have to lose TypeLoc fidelity in cases where TypeLocs are + // less critical, otherwise this might result in diagnostics pointing to + // arbitrary locations. + // We now only applied to concepts substitutions and their valid + // template arguments for performance reasons. if (auto Iter = Cache->find(ID); Iter != Cache->end()) { Output = Iter->second; return false; >From 4d05a21692284a4348cc25cd78d7a8259134180b Mon Sep 17 00:00:00 2001 From: Younan Zhang <[email protected]> Date: Tue, 7 Apr 2026 18:45:06 +0800 Subject: [PATCH 5/5] Fix tense --- clang/lib/Sema/SemaTemplateInstantiate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 78896a3953975..354da75200c28 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1637,7 +1637,7 @@ namespace { // that we have to lose TypeLoc fidelity in cases where TypeLocs are // less critical, otherwise this might result in diagnostics pointing to // arbitrary locations. - // We now only applied to concepts substitutions and their valid + // We now only apply to concepts substitutions and their valid // template arguments for performance reasons. if (auto Iter = Cache->find(ID); Iter != Cache->end()) { Output = Iter->second; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
