https://github.com/dzbarsky created 
https://github.com/llvm/llvm-project/pull/202618

SVE and SME code generation only reads BuiltinID, LLVMIntrinsic, and 
TypeModifier from its generated intrinsic maps. Store those fields in a 16-byte 
ARMScalableVectorIntrinsicInfo instead of the 32-byte ARMVectorIntrinsicInfo 
used by NEON and SISD code generation. Update both the classic and CIR 
consumers.

On an LLVM 22.1.7 arm64 release build, stripped standalone clang shrinks from 
115,097,192 to 114,816,456 bytes, saving 280,736 bytes (0.244%). The stripped 
all-tools multicall shrinks from 139,804,464 to 139,523,736 bytes, saving 
280,728 bytes (0.201%).

All 244 SVE, SME, and NEON/SVE bridge CodeGen tests pass. Large SVE and SME 
tests produce byte-identical LLVM IR. Alternating CPU-time batches were 
performance-neutral: SVE improved 0.7%, and a 4,096-call SME lookup benchmark 
improved 2.3%; end-to-end SME batches differed by 1.1% with overlapping 
distributions under host load.

Work towards #202616

>From c3c154d456c0d81090cce785af62e20e8f5650f6 Mon Sep 17 00:00:00 2001
From: David Zbarsky <[email protected]>
Date: Mon, 8 Jun 2026 19:01:25 -0400
Subject: [PATCH] [AArch64] Compact scalable-vector intrinsic maps

SVE and SME code generation only reads BuiltinID, LLVMIntrinsic, and 
TypeModifier from its generated intrinsic maps. Store those fields in a 16-byte 
ARMScalableVectorIntrinsicInfo instead of the 32-byte ARMVectorIntrinsicInfo 
used by NEON and SISD code generation. Update both the classic and CIR 
consumers.

On an LLVM 22.1.7 arm64 release build, stripped standalone clang shrinks from 
115,097,192 to 114,816,456 bytes, saving 280,736 bytes (0.244%). The stripped 
all-tools multicall shrinks from 139,804,464 to 139,523,736 bytes, saving 
280,728 bytes (0.201%).

All 244 SVE, SME, and NEON/SVE bridge CodeGen tests pass. Large SVE and SME 
tests produce byte-identical LLVM IR. Alternating CPU-time batches were 
performance-neutral: SVE improved 0.7%, and a 4,096-call SME lookup benchmark 
improved 2.3%; end-to-end SME batches differed by 1.1% with overlapping 
distributions under host load.
---
 .../include/clang/Basic/AArch64CodeGenUtils.h | 15 ++++++++
 .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp  | 23 ++++++++-----
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp      | 34 ++++++++++---------
 3 files changed, 48 insertions(+), 24 deletions(-)

diff --git a/clang/include/clang/Basic/AArch64CodeGenUtils.h 
b/clang/include/clang/Basic/AArch64CodeGenUtils.h
index f64a41df63cf8..681ee35dc2cf4 100644
--- a/clang/include/clang/Basic/AArch64CodeGenUtils.h
+++ b/clang/include/clang/Basic/AArch64CodeGenUtils.h
@@ -64,6 +64,21 @@ struct ARMVectorIntrinsicInfo {
   }
 };
 
+struct ARMScalableVectorIntrinsicInfo {
+  unsigned BuiltinID;
+  unsigned LLVMIntrinsic;
+  uint64_t TypeModifier;
+
+  bool operator<(unsigned RHSBuiltinID) const {
+    return BuiltinID < RHSBuiltinID;
+  }
+  bool operator<(const ARMScalableVectorIntrinsicInfo &TE) const {
+    return BuiltinID < TE.BuiltinID;
+  }
+};
+
+static_assert(sizeof(ARMScalableVectorIntrinsicInfo) == 16);
+
 #define NEONMAP0(NameBase)                                                     
\
   {#NameBase, NEON::BI__builtin_neon_##NameBase, 0, 0, 0}
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index be906d0671e3a..b9b2a797c299b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -45,12 +45,11 @@ static mlir::Value genVscaleTimesFactor(mlir::Location loc,
 }
 
 #define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier)                         
\
-  {#NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0,    
\
-   TypeModifier}
+  {SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, TypeModifier}
 
 #define SVEMAP2(NameBase, TypeModifier)                                        
\
-  {#NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier}
-static const ARMVectorIntrinsicInfo aarch64SVEIntrinsicMap[] = {
+  {SVE::BI__builtin_sve_##NameBase, 0, TypeModifier}
+static const ARMScalableVectorIntrinsicInfo aarch64SVEIntrinsicMap[] = {
 #define GET_SVE_LLVM_INTRINSIC_MAP
 #include "clang/Basic/arm_sve_builtin_cg.inc"
 #undef GET_SVE_LLVM_INTRINSIC_MAP
@@ -62,8 +61,9 @@ static bool aarch64SVEIntrinsicsProvenSorted = false;
 
 // Check if Builtin `builtinId` is present in `intrinsicMap`. If yes, returns
 // the corresponding info struct.
-static const ARMVectorIntrinsicInfo *
-findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> intrinsicMap,
+template <typename IntrinsicInfo>
+static const IntrinsicInfo *
+findARMVectorIntrinsicInMap(ArrayRef<IntrinsicInfo> intrinsicMap,
                             unsigned builtinID, bool &mapProvenSorted) {
 
 #ifndef NDEBUG
@@ -73,8 +73,7 @@ findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> 
intrinsicMap,
   }
 #endif
 
-  const ARMVectorIntrinsicInfo *info =
-      llvm::lower_bound(intrinsicMap, builtinID);
+  const IntrinsicInfo *info = llvm::lower_bound(intrinsicMap, builtinID);
 
   if (info != intrinsicMap.end() && info->BuiltinID == builtinID)
     return info;
@@ -82,6 +81,14 @@ findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> 
intrinsicMap,
   return nullptr;
 }
 
+template <typename IntrinsicInfo, size_t numIntrinsics>
+static const IntrinsicInfo *
+findARMVectorIntrinsicInMap(const IntrinsicInfo (&intrinsicMap)[numIntrinsics],
+                            unsigned builtinID, bool &mapProvenSorted) {
+  return findARMVectorIntrinsicInMap(ArrayRef<IntrinsicInfo>(intrinsicMap),
+                                     builtinID, mapProvenSorted);
+}
+
 
//===----------------------------------------------------------------------===//
 //  Generic helpers
 
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index ece8ff21561cf..df9678774b8ec 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -1010,14 +1010,11 @@ static const std::pair<unsigned, unsigned> 
NEONEquivalentIntrinsicMap[] = {
 #undef NEONMAP2
 
 #define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier)                         
\
-  {                                                                            
\
-    #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0,   
\
-        TypeModifier                                                           
\
-  }
+  {SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, TypeModifier}
 
 #define SVEMAP2(NameBase, TypeModifier)                                        
\
-  { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
-static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
+  {SVE::BI__builtin_sve_##NameBase, 0, TypeModifier}
+static const ARMScalableVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
 #define GET_SVE_LLVM_INTRINSIC_MAP
 #include "clang/Basic/arm_sve_builtin_cg.inc"
 #include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
@@ -1028,14 +1025,11 @@ static const ARMVectorIntrinsicInfo 
AArch64SVEIntrinsicMap[] = {
 #undef SVEMAP2
 
 #define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier)                         
\
-  {                                                                            
\
-    #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0,   
\
-        TypeModifier                                                           
\
-  }
+  {SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, TypeModifier}
 
 #define SMEMAP2(NameBase, TypeModifier)                                        
\
-  { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
-static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
+  {SME::BI__builtin_sme_##NameBase, 0, TypeModifier}
+static const ARMScalableVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
 #define GET_SME_LLVM_INTRINSIC_MAP
 #include "clang/Basic/arm_sme_builtin_cg.inc"
 #undef GET_SME_LLVM_INTRINSIC_MAP
@@ -1053,8 +1047,9 @@ static bool AArch64SMEIntrinsicsProvenSorted = false;
 
 // Check if Builtin `BuiltinId` is present in `IntrinsicMap`. If yes, returns
 // the corresponding info struct.
-static const ARMVectorIntrinsicInfo *
-findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
+template <typename IntrinsicInfo>
+static const IntrinsicInfo *
+findARMVectorIntrinsicInMap(ArrayRef<IntrinsicInfo> IntrinsicMap,
                             unsigned BuiltinID, bool &MapProvenSorted) {
 
 #ifndef NDEBUG
@@ -1064,8 +1059,7 @@ 
findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
   }
 #endif
 
-  const ARMVectorIntrinsicInfo *Builtin =
-      llvm::lower_bound(IntrinsicMap, BuiltinID);
+  const IntrinsicInfo *Builtin = llvm::lower_bound(IntrinsicMap, BuiltinID);
 
   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
     return Builtin;
@@ -1073,6 +1067,14 @@ 
findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,
   return nullptr;
 }
 
+template <typename IntrinsicInfo, size_t NumIntrinsics>
+static const IntrinsicInfo *
+findARMVectorIntrinsicInMap(const IntrinsicInfo (&IntrinsicMap)[NumIntrinsics],
+                            unsigned BuiltinID, bool &MapProvenSorted) {
+  return findARMVectorIntrinsicInMap(ArrayRef<IntrinsicInfo>(IntrinsicMap),
+                                     BuiltinID, MapProvenSorted);
+}
+
 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
                                                    unsigned Modifier,
                                                    llvm::Type *ArgType,

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to