[clang] [CIR][AArch64] Add lowering + tests for predicated SVE svdup_x builtins (PR #182542)

Andrzej Warzyński via cfe-commits Fri, 20 Feb 2026 09:10:46 -0800

https://github.com/banach-space created 
https://github.com/llvm/llvm-project/pull/182542


This PR adds CIR lowering tests for the predicated SVE `svdup` builtins
on AArch64. The corresponding ACLE intrinsics are documented at:
https://developer.arm.com/architectures/instruction-sets/intrinsics

This PR covers the merging-predicated variants with suffix `_x`, e.g.
`svdup_n_f32_x`. The corresponding LLVM intrinsics take an undef which
are merged into the result for lanes where the predicate is false.


From 2f9556e87028b1b5d47836401c8ae0e63d38cc53 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <[email protected]>
Date: Fri, 20 Feb 2026 17:07:06 +0000
Subject: [PATCH] [CIR][AArch64] Add lowering + tests for predicated SVE
 svdup_x builtins

This PR adds CIR lowering tests for the predicated SVE `svdup` builtins
on AArch64. The corresponding ACLE intrinsics are documented at:
https://developer.arm.com/architectures/instruction-sets/intrinsics

This PR covers the merging-predicated variants with suffix `_x`, e.g.
`svdup_n_f32_x`. The corresponding LLVM intrinsics take an undef which
are merged into the result for lanes where the predicate is false.
---
 .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp  |   7 +-
 .../CodeGenBuiltins/AArch64/acle_sve_dup.c    | 176 +++++++++++++++++-
 2 files changed, 179 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 699fee5a3a358..c406e0613976d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -243,6 +243,8 @@ static unsigned 
getSVEMinEltCount(clang::SVETypeFlags::EltType sveType) {
 std::optional<mlir::Value>
 CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID,
                                           const CallExpr *expr) {
+  mlir::Type ty = convertType(expr->getType());
+
   if (builtinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
       builtinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
     cgm.errorNYI(expr->getSourceRange(),
@@ -293,9 +295,8 @@ CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned 
builtinID,
     }
 
     if (typeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
-      cgm.errorNYI(expr->getSourceRange(),
-                   std::string("unimplemented AArch64 builtin call: ") +
-                       getContext().BuiltinInfo.getName(builtinID));
+      ops.insert(ops.begin(),
+                 builder.getConstant(loc, cir::UndefAttr::get(ty)));
 
     // Some ACLE builtins leave out the argument to specify the predicate
     // pattern, which is expected to be expanded to an SV_ALL pattern.
diff --git a/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c 
b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
index 8697b2121fb3e..2bfee3d754f49 100644
--- a/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
+++ b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
@@ -325,7 +325,7 @@ svfloat64_t test_svdup_n_f64_z(svbool_t pg, float64_t op) 
MODE_ATTR
 }
 
 //===------------------------------------------------------===//
-// 2. PREDICATED MERGING-ING SVDUP
+// 3. PREDICATED MERGING-ING SVDUP (Op1)
 //===------------------------------------------------------===//
 // ALL-LABEL: @test_svdup_n_s8_m(
 svint8_t test_svdup_n_s8_m(svint8_t inactive, svbool_t pg, int8_t op) MODE_ATTR
@@ -485,3 +485,177 @@ svfloat64_t test_svdup_n_f64_m(svfloat64_t inactive, 
svbool_t pg, float64_t op)
 // LLVM_OGCG_CIR-NEXT:    ret <vscale x 2 x double> [[RES]]
   return SVE_ACLE_FUNC(svdup,_n,_f64_m,)(inactive, pg, op);
 }
+
+//===------------------------------------------------------===//
+// 4. PREDICATED MERGING-ING SVDUP (MergeAnyExp)
+//===------------------------------------------------------===//
+// ALL-LABEL: @test_svdup_n_s8_x(
+svint8_t test_svdup_n_s8_x(svbool_t pg, int8_t op) MODE_ATTR
+{
+// CIR:           [[UNDEF:%.*]] = cir.const #cir.undef : !cir.vector<[16] x 
!s8i>
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup" [[UNDEF]], 
%{{.*}}, %{{.*}} :
+// CIR-SAME:        (!cir.vector<[16] x !s8i>, !cir.vector<[16] x !cir.int<u, 
1>>, !s8i) -> !cir.vector<[16] x !s8i>
+
+// LLVM_OGCG_CIR-SAME:    <vscale x 16 x i1> [[PG:%.*]], i8{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:         [[RES:%.*]] = call <vscale x 16 x i8> 
@llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> 
[[PG]], i8 [[OP]])
+// LLVM_OGCG_CIR-NEXT:    ret <vscale x 16 x i8> [[RES]]
+  return SVE_ACLE_FUNC(svdup,_n,_s8_x,)(pg, op);
+}
+
+// ALL-LABEL: @test_svdup_n_s16_x(
+svint16_t test_svdup_n_s16_x(svbool_t pg, int16_t op) MODE_ATTR
+{
+// CIR:       [[UNDEF:%.*]] = cir.const #cir.undef : !cir.vector<[8] x !s16i>
+// CIR:       %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" %{{.*}} :
+// CIR-SAME:    (!cir.vector<[16] x !cir.int<u, 1>>) -> !cir.vector<[8] x 
!cir.int<u, 1>>
+// CIR:       cir.call_llvm_intrinsic "aarch64.sve.dup" [[UNDEF]], 
%[[CONVERT_PG]], %{{.*}} :
+// CIR-SAME:    (!cir.vector<[8] x !s16i>, !cir.vector<[8] x !cir.int<u, 1>>, 
!s16i) -> !cir.vector<[8] x !s16i>
+
+// LLVM_OGCG_CIR-SAME:    <vscale x 16 x i1> [[PG:%.*]], i16{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:         [[PG_CONVERTED:%.*]] = call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// LLVM_OGCG_CIR-NEXT:    [[RES:%.*]] = call <vscale x 8 x i16> 
@llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> 
[[PG_CONVERTED]], i16 [[OP]])
+// LLVM_OGCG_CIR-NEXT:    ret <vscale x 8 x i16> [[RES]]
+  return SVE_ACLE_FUNC(svdup,_n,_s16_x,)(pg, op);
+}
+
+// ALL-LABEL: @test_svdup_n_s32_x(
+svint32_t test_svdup_n_s32_x(svbool_t pg, int32_t op) MODE_ATTR
+{
+// CIR:       [[UNDEF:%.*]] = cir.const #cir.undef : !cir.vector<[4] x !s32i>
+// CIR:       %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" %{{.*}} :
+// CIR-SAME:    (!cir.vector<[16] x !cir.int<u, 1>>) -> !cir.vector<[4] x 
!cir.int<u, 1>>
+// CIR:       cir.call_llvm_intrinsic "aarch64.sve.dup" [[UNDEF]], 
%[[CONVERT_PG]], %{{.*}} :
+// CIR-SAME:    (!cir.vector<[4] x !s32i>, !cir.vector<[4] x !cir.int<u, 1>>, 
!s32i) -> !cir.vector<[4] x !s32i>
+
+// LLVM_OGCG_CIR-SAME:    <vscale x 16 x i1> [[PG:%.*]], i32{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:         [[PG_CONVERTED:%.*]] = call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// LLVM_OGCG_CIR-NEXT:    [[RES:%.*]] = call <vscale x 4 x i32> 
@llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> 
[[PG_CONVERTED]], i32 [[OP]])
+// LLVM_OGCG_CIR-NEXT:    ret <vscale x 4 x i32> [[RES]]
+//
+  return SVE_ACLE_FUNC(svdup,_n,_s32_x,)(pg, op);
+}
+
+// ALL-LABEL: @test_svdup_n_s64_x(
+svint64_t test_svdup_n_s64_x(svbool_t pg, int64_t op) MODE_ATTR
+{
+// CIR:       [[UNDEF:%.*]] = cir.const #cir.undef : !cir.vector<[2] x !s64i>
+// CIR:       %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" %{{.*}} :
+// CIR-SAME:   (!cir.vector<[16] x !cir.int<u, 1>>) -> !cir.vector<[2] x 
!cir.int<u, 1>>
+// CIR:       cir.call_llvm_intrinsic "aarch64.sve.dup" [[UNDEF]], 
%[[CONVERT_PG]], %{{.*}} :
+// CIR-SAME:   (!cir.vector<[2] x !s64i>, !cir.vector<[2] x !cir.int<u, 1>>, 
!s64i) -> !cir.vector<[2] x !s64i>
+
+// LLVM_OGCG_CIR-SAME:    <vscale x 16 x i1> [[PG:%.*]], i64{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:         [[PG_CONVERTED:%.*]] = call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// LLVM_OGCG_CIR-NEXT:    [[RES:%.*]] = call <vscale x 2 x i64> 
@llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> 
[[PG_CONVERTED]], i64 [[OP]])
+// LLVM_OGCG_CIR-NEXT:    ret <vscale x 2 x i64> [[RES]]
+  return SVE_ACLE_FUNC(svdup,_n,_s64_x,)(pg, op);
+}
+
+// ALL-LABEL: @test_svdup_n_u8_x(
+svuint8_t test_svdup_n_u8_x(svbool_t pg, uint8_t op) MODE_ATTR
+{
+// CIR:       [[UNDEF:%.*]] = cir.const #cir.undef : !cir.vector<[16] x !u8i>
+// CIR:       cir.call_llvm_intrinsic "aarch64.sve.dup" [[UNDEF]], %{{.*}}, 
%{{.*}} :
+// CIR-SAME:   (!cir.vector<[16] x !u8i>, !cir.vector<[16] x !cir.int<u, 1>>, 
!u8i) -> !cir.vector<[16] x !u8i>
+
+// LLVM_OGCG_CIR-SAME:    <vscale x 16 x i1> [[PG:%.*]], i8{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:         [[PG_CONVERTED:%.*]] = call <vscale x 16 x i8> 
@llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> 
[[PG]], i8 [[OP]])
+// LLVM_OGCG_CIR-NEXT:    ret <vscale x 16 x i8> [[PG_CONVERTED]]
+  return SVE_ACLE_FUNC(svdup,_n,_u8_x,)(pg, op);
+}
+
+// ALL-LABEL: @test_svdup_n_u16_x(
+svuint16_t test_svdup_n_u16_x(svbool_t pg, uint16_t op) MODE_ATTR
+{
+// CIR:       [[UNDEF:%.*]] = cir.const #cir.undef : !cir.vector<[8] x !u16i>
+// CIR:       %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" %{{.*}} :
+// CIR-SAME:   (!cir.vector<[16] x !cir.int<u, 1>>) -> !cir.vector<[8] x 
!cir.int<u, 1>>
+// CIR:       cir.call_llvm_intrinsic "aarch64.sve.dup" [[UNDEF]], 
%[[CONVERT_PG]], %{{.*}} :
+// CIR-SAME:   (!cir.vector<[8] x !u16i>, !cir.vector<[8] x !cir.int<u, 1>>, 
!u16i) -> !cir.vector<[8] x !u16i>
+
+// LLVM_OGCG_CIR-SAME:    <vscale x 16 x i1> [[PG:%.*]], i16{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:         [[PG_CONVERTED:%.*]] = call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// LLVM_OGCG_CIR-NEXT:    [[RES:%.*]] = call <vscale x 8 x i16> 
@llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> 
[[PG_CONVERTED]], i16 [[OP]])
+// LLVM_OGCG_CIR-NEXT:    ret <vscale x 8 x i16> [[RES]]
+  return SVE_ACLE_FUNC(svdup,_n,_u16_x,)(pg, op);
+}
+
+// ALL-LABEL: @test_svdup_n_u32_x(
+svuint32_t test_svdup_n_u32_x(svbool_t pg, uint32_t op) MODE_ATTR
+{
+// CIR:       [[UNDEF:%.*]] = cir.const #cir.undef : !cir.vector<[4] x !u32i>
+// CIR:       %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" %{{.*}} :
+// CIR-SAME:   (!cir.vector<[16] x !cir.int<u, 1>>) -> !cir.vector<[4] x 
!cir.int<u, 1>>
+// CIR:       cir.call_llvm_intrinsic "aarch64.sve.dup" [[UNDEF]], 
%[[CONVERT_PG]], %{{.*}} :
+// CIR-SAME:   (!cir.vector<[4] x !u32i>, !cir.vector<[4] x !cir.int<u, 1>>, 
!u32i) -> !cir.vector<[4] x !u32i>
+
+// LLVM_OGCG_CIR-SAME:    <vscale x 16 x i1> [[PG:%.*]], i32{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:         [[PG_CONVERTED:%.*]] = call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// LLVM_OGCG_CIR-NEXT:    [[RES:%.*]] = call <vscale x 4 x i32> 
@llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> 
[[PG_CONVERTED]], i32 [[OP]])
+// LLVM_OGCG_CIR-NEXT:    ret <vscale x 4 x i32> [[RES]]
+  return SVE_ACLE_FUNC(svdup,_n,_u32_x,)(pg, op);
+}
+
+// ALL-LABEL: @test_svdup_n_u64_x(
+svuint64_t test_svdup_n_u64_x(svbool_t pg, uint64_t op) MODE_ATTR
+{
+// CIR:       [[UNDEF:%.*]] = cir.const #cir.undef : !cir.vector<[2] x !u64i>
+// CIR:       %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" %{{.*}} :
+// CIR-SAME:   (!cir.vector<[16] x !cir.int<u, 1>>) -> !cir.vector<[2] x 
!cir.int<u, 1>>
+// CIR:       cir.call_llvm_intrinsic "aarch64.sve.dup" [[UNDEF]], 
%[[CONVERT_PG]], %{{.*}} :
+// CIR-SAME:   (!cir.vector<[2] x !u64i>, !cir.vector<[2] x !cir.int<u, 1>>, 
!u64i) -> !cir.vector<[2] x !u64i>
+
+// LLVM_OGCG_CIR-SAME:    <vscale x 16 x i1> [[PG:%.*]], i64{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:         [[PG_CONVERTED:%.*]] = call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// LLVM_OGCG_CIR-NEXT:    [[RES:%.*]] = call <vscale x 2 x i64> 
@llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> 
[[PG_CONVERTED]], i64 [[OP]])
+// LLVM_OGCG_CIR-NEXT:    ret <vscale x 2 x i64> [[RES]]
+  return SVE_ACLE_FUNC(svdup,_n,_u64_x,)(pg, op);
+}
+
+// ALL-LABEL: @test_svdup_n_f16_x(
+svfloat16_t test_svdup_n_f16_x(svbool_t pg, float16_t op) MODE_ATTR
+{
+// CIR:       [[UNDEF:%.*]] = cir.const #cir.undef : !cir.vector<[8] x 
!cir.f16>
+// CIR:       %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" %{{.*}} :
+// CIR-SAME:   (!cir.vector<[16] x !cir.int<u, 1>>) -> !cir.vector<[8] x 
!cir.int<u, 1>>
+// CIR:       cir.call_llvm_intrinsic "aarch64.sve.dup" [[UNDEF]], 
%[[CONVERT_PG]], %{{.*}} :
+// CIR-SAME:   (!cir.vector<[8] x !cir.f16>, !cir.vector<[8] x !cir.int<u, 
1>>, !cir.f16) -> !cir.vector<[8] x !cir.f16>
+
+// LLVM_OGCG_CIR-SAME:    <vscale x 16 x i1> [[PG:%.*]], half{{.*}} [[OP:%.*]])
+// LLVM_OGCG_CIR:         [[PG_CONVERTED:%.*]] = call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// LLVM_OGCG_CIR-NEXT:    [[RES:%.*]] = call <vscale x 8 x half> 
@llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> 
[[PG_CONVERTED]], half [[OP]])
+// LLVM_OGCG_CIR-NEXT:    ret <vscale x 8 x half> [[RES]]
+  return SVE_ACLE_FUNC(svdup,_n,_f16_x,)(pg, op);
+}
+
+// ALL-LABEL: @test_svdup_n_f32_x(
+svfloat32_t test_svdup_n_f32_x(svbool_t pg, float32_t op) MODE_ATTR
+{
+// CIR:       [[UNDEF:%.*]] = cir.const #cir.undef : !cir.vector<[4] x 
!cir.float>
+// CIR:       %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" %{{.*}} :
+// CIR-SAME:   (!cir.vector<[16] x !cir.int<u, 1>>) -> !cir.vector<[4] x 
!cir.int<u, 1>>
+// CIR:       cir.call_llvm_intrinsic "aarch64.sve.dup" [[UNDEF]], 
%[[CONVERT_PG]], %{{.*}} :
+// CIR-SAME:   (!cir.vector<[4] x !cir.float>, !cir.vector<[4] x !cir.int<u, 
1>>, !cir.float) -> !cir.vector<[4] x !cir.float>
+
+// LLVM_OGCG_CIR-SAME:    <vscale x 16 x i1> [[PG:%.*]], float{{.*}} 
[[OP:%.*]])
+// LLVM_OGCG_CIR:         [[PG_CONVERTED:%.*]] = call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// LLVM_OGCG_CIR-NEXT:    [[RES:%.*]] = call <vscale x 4 x float> 
@llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> 
[[PG_CONVERTED]], float [[OP]])
+// LLVM_OGCG_CIR-NEXT:    ret <vscale x 4 x float> [[RES]]
+  return SVE_ACLE_FUNC(svdup,_n,_f32_x,)(pg, op);
+}
+
+// ALL-LABEL: @test_svdup_n_f64_x(
+svfloat64_t test_svdup_n_f64_x(svbool_t pg, float64_t op) MODE_ATTR
+{
+// CIR:       [[UNDEF:%.*]] = cir.const #cir.undef : !cir.vector<[2] x 
!cir.double>
+// CIR:       %[[CONVERT_PG:.*]] = cir.call_llvm_intrinsic 
"aarch64.sve.convert.from.svbool" %{{.*}} :
+// CIR-SAME:   (!cir.vector<[16] x !cir.int<u, 1>>) -> !cir.vector<[2] x 
!cir.int<u, 1>>
+// CIR:       cir.call_llvm_intrinsic "aarch64.sve.dup" [[UNDEF]], 
%[[CONVERT_PG]], %{{.*}} :
+// CIR-SAME:   (!cir.vector<[2] x !cir.double>, !cir.vector<[2] x !cir.int<u, 
1>>, !cir.double) -> !cir.vector<[2] x !cir.double>
+
+// LLVM_OGCG_CIR-SAME:    <vscale x 16 x i1> [[PG:%.*]], double{{.*}} 
[[OP:%.*]])
+// LLVM_OGCG_CIR:         [[PG_CONVERTED:%.*]] = call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// LLVM_OGCG_CIR-NEXT:    [[RES:%.*]] = call <vscale x 2 x double> 
@llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> 
[[PG_CONVERTED]], double [[OP]])
+// LLVM_OGCG_CIR-NEXT:    ret <vscale x 2 x double> [[RES]]
+  return SVE_ACLE_FUNC(svdup,_n,_f64_x,)(pg, op);
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR][AArch64] Add lowering + tests for predicated SVE svdup_x builtins (PR #182542)

Reply via email to