https://github.com/dzbarsky created https://github.com/llvm/llvm-project/pull/202618
SVE and SME code generation only reads BuiltinID, LLVMIntrinsic, and TypeModifier from its generated intrinsic maps. Store those fields in a 16-byte ARMScalableVectorIntrinsicInfo instead of the 32-byte ARMVectorIntrinsicInfo used by NEON and SISD code generation. Update both the classic and CIR consumers. On an LLVM 22.1.7 arm64 release build, stripped standalone clang shrinks from 115,097,192 to 114,816,456 bytes, saving 280,736 bytes (0.244%). The stripped all-tools multicall shrinks from 139,804,464 to 139,523,736 bytes, saving 280,728 bytes (0.201%). All 244 SVE, SME, and NEON/SVE bridge CodeGen tests pass. Large SVE and SME tests produce byte-identical LLVM IR. Alternating CPU-time batches were performance-neutral: SVE improved 0.7%, and a 4,096-call SME lookup benchmark improved 2.3%; end-to-end SME batches differed by 1.1% with overlapping distributions under host load. Work towards #202616 >From c3c154d456c0d81090cce785af62e20e8f5650f6 Mon Sep 17 00:00:00 2001 From: David Zbarsky <[email protected]> Date: Mon, 8 Jun 2026 19:01:25 -0400 Subject: [PATCH] [AArch64] Compact scalable-vector intrinsic maps SVE and SME code generation only reads BuiltinID, LLVMIntrinsic, and TypeModifier from its generated intrinsic maps. Store those fields in a 16-byte ARMScalableVectorIntrinsicInfo instead of the 32-byte ARMVectorIntrinsicInfo used by NEON and SISD code generation. Update both the classic and CIR consumers. On an LLVM 22.1.7 arm64 release build, stripped standalone clang shrinks from 115,097,192 to 114,816,456 bytes, saving 280,736 bytes (0.244%). The stripped all-tools multicall shrinks from 139,804,464 to 139,523,736 bytes, saving 280,728 bytes (0.201%). All 244 SVE, SME, and NEON/SVE bridge CodeGen tests pass. Large SVE and SME tests produce byte-identical LLVM IR. Alternating CPU-time batches were performance-neutral: SVE improved 0.7%, and a 4,096-call SME lookup benchmark improved 2.3%; end-to-end SME batches differed by 1.1% with overlapping distributions under host load. --- .../include/clang/Basic/AArch64CodeGenUtils.h | 15 ++++++++ .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 23 ++++++++----- clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 34 ++++++++++--------- 3 files changed, 48 insertions(+), 24 deletions(-) diff --git a/clang/include/clang/Basic/AArch64CodeGenUtils.h b/clang/include/clang/Basic/AArch64CodeGenUtils.h index f64a41df63cf8..681ee35dc2cf4 100644 --- a/clang/include/clang/Basic/AArch64CodeGenUtils.h +++ b/clang/include/clang/Basic/AArch64CodeGenUtils.h @@ -64,6 +64,21 @@ struct ARMVectorIntrinsicInfo { } }; +struct ARMScalableVectorIntrinsicInfo { + unsigned BuiltinID; + unsigned LLVMIntrinsic; + uint64_t TypeModifier; + + bool operator<(unsigned RHSBuiltinID) const { + return BuiltinID < RHSBuiltinID; + } + bool operator<(const ARMScalableVectorIntrinsicInfo &TE) const { + return BuiltinID < TE.BuiltinID; + } +}; + +static_assert(sizeof(ARMScalableVectorIntrinsicInfo) == 16); + #define NEONMAP0(NameBase) \ {#NameBase, NEON::BI__builtin_neon_##NameBase, 0, 0, 0} diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index be906d0671e3a..b9b2a797c299b 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -45,12 +45,11 @@ static mlir::Value genVscaleTimesFactor(mlir::Location loc, } #define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ - {#NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \ - TypeModifier} + {SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, TypeModifier} #define SVEMAP2(NameBase, TypeModifier) \ - {#NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier} -static const ARMVectorIntrinsicInfo aarch64SVEIntrinsicMap[] = { + {SVE::BI__builtin_sve_##NameBase, 0, TypeModifier} +static const ARMScalableVectorIntrinsicInfo aarch64SVEIntrinsicMap[] = { #define GET_SVE_LLVM_INTRINSIC_MAP #include "clang/Basic/arm_sve_builtin_cg.inc" #undef GET_SVE_LLVM_INTRINSIC_MAP @@ -62,8 +61,9 @@ static bool aarch64SVEIntrinsicsProvenSorted = false; // Check if Builtin `builtinId` is present in `intrinsicMap`. If yes, returns // the corresponding info struct. -static const ARMVectorIntrinsicInfo * -findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> intrinsicMap, +template <typename IntrinsicInfo> +static const IntrinsicInfo * +findARMVectorIntrinsicInMap(ArrayRef<IntrinsicInfo> intrinsicMap, unsigned builtinID, bool &mapProvenSorted) { #ifndef NDEBUG @@ -73,8 +73,7 @@ findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> intrinsicMap, } #endif - const ARMVectorIntrinsicInfo *info = - llvm::lower_bound(intrinsicMap, builtinID); + const IntrinsicInfo *info = llvm::lower_bound(intrinsicMap, builtinID); if (info != intrinsicMap.end() && info->BuiltinID == builtinID) return info; @@ -82,6 +81,14 @@ findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> intrinsicMap, return nullptr; } +template <typename IntrinsicInfo, size_t numIntrinsics> +static const IntrinsicInfo * +findARMVectorIntrinsicInMap(const IntrinsicInfo (&intrinsicMap)[numIntrinsics], + unsigned builtinID, bool &mapProvenSorted) { + return findARMVectorIntrinsicInMap(ArrayRef<IntrinsicInfo>(intrinsicMap), + builtinID, mapProvenSorted); +} + //===----------------------------------------------------------------------===// // Generic helpers //===----------------------------------------------------------------------===// diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index ece8ff21561cf..df9678774b8ec 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -1010,14 +1010,11 @@ static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = { #undef NEONMAP2 #define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ - { \ - #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \ - TypeModifier \ - } + {SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, TypeModifier} #define SVEMAP2(NameBase, TypeModifier) \ - { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier } -static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = { + {SVE::BI__builtin_sve_##NameBase, 0, TypeModifier} +static const ARMScalableVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = { #define GET_SVE_LLVM_INTRINSIC_MAP #include "clang/Basic/arm_sve_builtin_cg.inc" #include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def" @@ -1028,14 +1025,11 @@ static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = { #undef SVEMAP2 #define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ - { \ - #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \ - TypeModifier \ - } + {SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, TypeModifier} #define SMEMAP2(NameBase, TypeModifier) \ - { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier } -static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = { + {SME::BI__builtin_sme_##NameBase, 0, TypeModifier} +static const ARMScalableVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = { #define GET_SME_LLVM_INTRINSIC_MAP #include "clang/Basic/arm_sme_builtin_cg.inc" #undef GET_SME_LLVM_INTRINSIC_MAP @@ -1053,8 +1047,9 @@ static bool AArch64SMEIntrinsicsProvenSorted = false; // Check if Builtin `BuiltinId` is present in `IntrinsicMap`. If yes, returns // the corresponding info struct. -static const ARMVectorIntrinsicInfo * -findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap, +template <typename IntrinsicInfo> +static const IntrinsicInfo * +findARMVectorIntrinsicInMap(ArrayRef<IntrinsicInfo> IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted) { #ifndef NDEBUG @@ -1064,8 +1059,7 @@ findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap, } #endif - const ARMVectorIntrinsicInfo *Builtin = - llvm::lower_bound(IntrinsicMap, BuiltinID); + const IntrinsicInfo *Builtin = llvm::lower_bound(IntrinsicMap, BuiltinID); if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) return Builtin; @@ -1073,6 +1067,14 @@ findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap, return nullptr; } +template <typename IntrinsicInfo, size_t NumIntrinsics> +static const IntrinsicInfo * +findARMVectorIntrinsicInMap(const IntrinsicInfo (&IntrinsicMap)[NumIntrinsics], + unsigned BuiltinID, bool &MapProvenSorted) { + return findARMVectorIntrinsicInMap(ArrayRef<IntrinsicInfo>(IntrinsicMap), + BuiltinID, MapProvenSorted); +} + Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgType, _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
