[clang] [llvm] [AARCH64] Add intrinsic support for new s/udot intrinsics (PR #189424)

via cfe-commits Wed, 01 Apr 2026 09:04:16 -0700

https://github.com/Lukacma updated 
https://github.com/llvm/llvm-project/pull/189424


>From 4eef064266de835a8ff7079c4059db5cc5b38af1 Mon Sep 17 00:00:00 2001
From: Marian Lukac <[email protected]>
Date: Mon, 30 Mar 2026 16:23:08 +0000
Subject: [PATCH 1/2] [AARCH64] Add intrinsic support for new fdot intrinsics

---
 clang/include/clang/Basic/arm_sve.td          |   8 +
 .../sve2p3-intrinsics/acle_sve2p3_dot.c       |  84 +++++++++
 ...e2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c |  58 +++++++
 ...sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c | 160 ++++++++++++++++++
 .../acle_sve2p3_imm.cpp                       |  14 ++
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |   8 +-
 llvm/lib/Target/AArch64/SVEInstrFormats.td    |  12 ++
 .../CodeGen/AArch64/sve2p3-intrinsics-dots.ll |  46 +++++
 8 files changed, 386 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c
 create mode 100644 
clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
 create mode 100644 
clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c
 create mode 100644 
clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp
 create mode 100644 llvm/test/CodeGen/AArch64/sve2p3-intrinsics-dots.ll

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index be3cd8a76503b..336c83bfbcdf5 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2476,3 +2476,11 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = 
"sme2p2" in {
   def FMUL_X2 : SInst<"svmul[_{d}_x2]", "222", "hfd", MergeNone, 
"aarch64_sve_fmul_x2", [IsStreaming], []>;
   def FMUL_X4 : SInst<"svmul[_{d}_x4]", "444", "hfd", MergeNone, 
"aarch64_sve_fmul_x4", [IsStreaming], []>;
 }
+
+let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in {
+  def SVDOT_X2_SH : SInst<"svdot[_{d}_{2}]", "ddhh", "s",  MergeNone, 
"aarch64_sve_sdot_x2", [VerifyRuntimeMode], []>;
+  def SVDOT_X2_UH : SInst<"svdot[_{d}_{2}]", "ddhh", "Us", MergeNone, 
"aarch64_sve_udot_x2", [VerifyRuntimeMode], []>;
+
+  def SVDOT_LANE_X2_SH : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "s",  
MergeNone, "aarch64_sve_sdot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, 
ImmCheck0_7>]>;
+  def SVDOT_LANE_X2_UH : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "Us", 
MergeNone, "aarch64_sve_udot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, 
ImmCheck0_7>]>;
+}
\ No newline at end of file
diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c 
b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c
new file mode 100644
index 0000000000000..e32ec95f4b6c8
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c
@@ -0,0 +1,84 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | 
opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p3 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sme2p3 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sve.h>
+
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
+#define ATTR __arm_streaming
+#else
+#define ATTR
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+// CHECK-LABEL: @test_svdot_s16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.sdot.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 16 
x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svdot_s16_x2u11__SVInt16_tu10__SVInt8_tS0_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.sdot.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 16 
x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svdot_s16_x2(svint16_t op1, svint8_t op2, svint8_t op3) ATTR
+{
+  return SVE_ACLE_FUNC(svdot,_s16_s8,)(op1, op2, op3);
+}
+
+// CHECK-LABEL: @test_svdot_u16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.udot.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 16 
x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svdot_u16_x2u12__SVUint16_tu11__SVUint8_tS0_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.udot.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 16 
x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svdot_u16_x2(svuint16_t op1, svuint8_t op2, svuint8_t op3) ATTR
+{
+  return SVE_ACLE_FUNC(svdot,_u16_u8,)(op1, op2, op3);
+}
+
+// CHECK-LABEL: @test_svdot_lane_s16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.sdot.lane.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale 
x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]], i32 7)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svdot_lane_s16_x2u11__SVInt16_tu10__SVInt8_tS0_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.sdot.lane.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale 
x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]], i32 7)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svdot_lane_s16_x2(svint16_t op1, svint8_t op2, svint8_t op3) 
ATTR
+{
+  return SVE_ACLE_FUNC(svdot_lane,_s16_s8,)(op1, op2, op3, 7);
+}
+
+// CHECK-LABEL: @test_svdot_lane_u16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.udot.lane.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale 
x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]], i32 7)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: 
@_Z22test_svdot_lane_u16_x2u12__SVUint16_tu11__SVUint8_tS0_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.udot.lane.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale 
x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]], i32 7)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svdot_lane_u16_x2(svuint16_t op1, svuint8_t op2, svuint8_t 
op3) ATTR
+{
+  return SVE_ACLE_FUNC(svdot_lane,_u16_u8,)(op1, op2, op3, 7);
+}
diff --git 
a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
 
b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
new file mode 100644
index 0000000000000..40750dbbb86c8
--- /dev/null
+++ 
b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
@@ -0,0 +1,58 @@
+// NOTE: File has been autogenerated by 
utils/aarch64_builtins_test_generator.py
+// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu 
-target-feature +sme -target-feature +sve -target-feature +sve2p3 -verify
+// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu 
-target-feature +sme -target-feature +sme2p3 -target-feature +sve -verify
+// expected-no-diagnostics
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+// Properties: guard="sve,(sve2p3|sme2p3)" 
streaming_guard="sme,(sve2p3|sme2p3)" flags="feature-dependent"
+
+void test(void) {
+  svint8_t svint8_t_val;
+  svint16_t svint16_t_val;
+  svuint8_t svuint8_t_val;
+  svuint16_t svuint16_t_val;
+
+  svdot(svint16_t_val, svint8_t_val, svint8_t_val);
+  svdot(svuint16_t_val, svuint8_t_val, svuint8_t_val);
+  svdot_lane(svint16_t_val, svint8_t_val, svint8_t_val, 2);
+  svdot_lane(svuint16_t_val, svuint8_t_val, svuint8_t_val, 2);
+  svdot_lane_s16_s8(svint16_t_val, svint8_t_val, svint8_t_val, 2);
+  svdot_lane_u16_u8(svuint16_t_val, svuint8_t_val, svuint8_t_val, 2);
+  svdot_s16_s8(svint16_t_val, svint8_t_val, svint8_t_val);
+  svdot_u16_u8(svuint16_t_val, svuint8_t_val, svuint8_t_val);
+}
+
+void test_streaming(void) __arm_streaming{
+  svint8_t svint8_t_val;
+  svint16_t svint16_t_val;
+  svuint8_t svuint8_t_val;
+  svuint16_t svuint16_t_val;
+
+  svdot(svint16_t_val, svint8_t_val, svint8_t_val);
+  svdot(svuint16_t_val, svuint8_t_val, svuint8_t_val);
+  svdot_lane(svint16_t_val, svint8_t_val, svint8_t_val, 2);
+  svdot_lane(svuint16_t_val, svuint8_t_val, svuint8_t_val, 2);
+  svdot_lane_s16_s8(svint16_t_val, svint8_t_val, svint8_t_val, 2);
+  svdot_lane_u16_u8(svuint16_t_val, svuint8_t_val, svuint8_t_val, 2);
+  svdot_s16_s8(svint16_t_val, svint8_t_val, svint8_t_val);
+  svdot_u16_u8(svuint16_t_val, svuint8_t_val, svuint8_t_val);
+}
+
+void test_streaming_compatible(void) __arm_streaming_compatible{
+  svint8_t svint8_t_val;
+  svint16_t svint16_t_val;
+  svuint8_t svuint8_t_val;
+  svuint16_t svuint16_t_val;
+
+  svdot(svint16_t_val, svint8_t_val, svint8_t_val);
+  svdot(svuint16_t_val, svuint8_t_val, svuint8_t_val);
+  svdot_lane(svint16_t_val, svint8_t_val, svint8_t_val, 2);
+  svdot_lane(svuint16_t_val, svuint8_t_val, svuint8_t_val, 2);
+  svdot_lane_s16_s8(svint16_t_val, svint8_t_val, svint8_t_val, 2);
+  svdot_lane_u16_u8(svuint16_t_val, svuint8_t_val, svuint8_t_val, 2);
+  svdot_s16_s8(svint16_t_val, svint8_t_val, svint8_t_val);
+  svdot_u16_u8(svuint16_t_val, svuint8_t_val, svuint8_t_val);
+}
diff --git 
a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c
 
b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c
new file mode 100644
index 0000000000000..9c31ebde4f7f8
--- /dev/null
+++ 
b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c
@@ -0,0 +1,160 @@
+// NOTE: File has been autogenerated by 
utils/aarch64_builtins_test_generator.py
+// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu 
-target-feature +sme -target-feature +sve -target-feature +sve-aes2 
-verify=guard
+// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu 
-target-feature +sme -target-feature +ssve-aes -target-feature +sve 
-target-feature +sve-aes2 -verify
+// expected-no-diagnostics
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+// Properties: guard="sve,sve-aes2" streaming_guard="sme,sve-aes2,ssve-aes" 
flags="feature-dependent"
+
+void test(void) {
+  svuint8_t svuint8_t_val;
+  svuint8x2_t svuint8x2_t_val;
+  svuint8x4_t svuint8x4_t_val;
+  svuint64_t svuint64_t_val;
+  svuint64x2_t svuint64x2_t_val;
+  uint64_t uint64_t_val;
+
+  svaesd_lane(svuint8x2_t_val, svuint8_t_val, 2);
+  svaesd_lane(svuint8x4_t_val, svuint8_t_val, 2);
+  svaesd_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+  svaesd_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+  svaesdimc_lane(svuint8x2_t_val, svuint8_t_val, 2);
+  svaesdimc_lane(svuint8x4_t_val, svuint8_t_val, 2);
+  svaesdimc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+  svaesdimc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+  svaese_lane(svuint8x2_t_val, svuint8_t_val, 2);
+  svaese_lane(svuint8x4_t_val, svuint8_t_val, 2);
+  svaese_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+  svaese_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+  svaesemc_lane(svuint8x2_t_val, svuint8_t_val, 2);
+  svaesemc_lane(svuint8x4_t_val, svuint8_t_val, 2);
+  svaesemc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+  svaesemc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+  svpmlal_pair(svuint64x2_t_val, svuint64_t_val, svuint64_t_val);
+  svpmlal_pair(svuint64x2_t_val, svuint64_t_val, uint64_t_val);
+  svpmlal_pair_n_u64_x2(svuint64x2_t_val, svuint64_t_val, uint64_t_val);
+  svpmlal_pair_u64_x2(svuint64x2_t_val, svuint64_t_val, svuint64_t_val);
+  svpmull_pair(svuint64_t_val, svuint64_t_val);
+  svpmull_pair(svuint64_t_val, uint64_t_val);
+  svpmull_pair_n_u64_x2(svuint64_t_val, uint64_t_val);
+  svpmull_pair_u64_x2(svuint64_t_val, svuint64_t_val);
+}
+
+void test_streaming(void) __arm_streaming{
+  svuint8_t svuint8_t_val;
+  svuint8x2_t svuint8x2_t_val;
+  svuint8x4_t svuint8x4_t_val;
+  svuint64_t svuint64_t_val;
+  svuint64x2_t svuint64x2_t_val;
+  uint64_t uint64_t_val;
+
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesd_lane(svuint8x2_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesd_lane(svuint8x4_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesd_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesd_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesdimc_lane(svuint8x2_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesdimc_lane(svuint8x4_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesdimc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesdimc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaese_lane(svuint8x2_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaese_lane(svuint8x4_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaese_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaese_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesemc_lane(svuint8x2_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesemc_lane(svuint8x4_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesemc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesemc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svpmlal_pair(svuint64x2_t_val, svuint64_t_val, svuint64_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svpmlal_pair(svuint64x2_t_val, svuint64_t_val, uint64_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svpmlal_pair_n_u64_x2(svuint64x2_t_val, svuint64_t_val, uint64_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svpmlal_pair_u64_x2(svuint64x2_t_val, svuint64_t_val, svuint64_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svpmull_pair(svuint64_t_val, svuint64_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svpmull_pair(svuint64_t_val, uint64_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svpmull_pair_n_u64_x2(svuint64_t_val, uint64_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svpmull_pair_u64_x2(svuint64_t_val, svuint64_t_val);
+}
+
+void test_streaming_compatible(void) __arm_streaming_compatible{
+  svuint8_t svuint8_t_val;
+  svuint8x2_t svuint8x2_t_val;
+  svuint8x4_t svuint8x4_t_val;
+  svuint64_t svuint64_t_val;
+  svuint64x2_t svuint64x2_t_val;
+  uint64_t uint64_t_val;
+
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesd_lane(svuint8x2_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesd_lane(svuint8x4_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesd_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesd_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesdimc_lane(svuint8x2_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesdimc_lane(svuint8x4_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesdimc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesdimc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaese_lane(svuint8x2_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaese_lane(svuint8x4_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaese_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaese_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesemc_lane(svuint8x2_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesemc_lane(svuint8x4_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesemc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svaesemc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svpmlal_pair(svuint64x2_t_val, svuint64_t_val, svuint64_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svpmlal_pair(svuint64x2_t_val, svuint64_t_val, uint64_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svpmlal_pair_n_u64_x2(svuint64x2_t_val, svuint64_t_val, uint64_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svpmlal_pair_u64_x2(svuint64x2_t_val, svuint64_t_val, svuint64_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svpmull_pair(svuint64_t_val, svuint64_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svpmull_pair(svuint64_t_val, uint64_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svpmull_pair_n_u64_x2(svuint64_t_val, uint64_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svpmull_pair_u64_x2(svuint64_t_val, svuint64_t_val);
+}
diff --git a/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp 
b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp
new file mode 100644
index 0000000000000..e0004effa48da
--- /dev/null
+++ b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp
@@ -0,0 +1,14 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +sve2p3 -fsyntax-only -verify %s
+
+#include <arm_sve.h>
+
+void test_svdot_lane_x2_imm_0_7(svint16_t s16, svuint16_t u16, svint8_t s8,
+                                svuint8_t u8) {
+  svdot_lane_s16_s8(s16, s8, s8, -1); // expected-error {{argument value 
18446744073709551615 is outside the valid range [0, 7]}}
+  svdot_lane_u16_u8(u16, u8, u8, -1); // expected-error {{argument value 
18446744073709551615 is outside the valid range [0, 7]}}
+
+  svdot_lane_s16_s8(s16, s8, s8, 8); // expected-error {{argument value 8 is 
outside the valid range [0, 7]}}
+  svdot_lane_u16_u8(u16, u8, u8, 8); // expected-error {{argument value 8 is 
outside the valid range [0, 7]}}
+}
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index c5a3bd504adf9..1255fbe73a5b7 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4804,8 +4804,8 @@ let Predicates = [HasSVE2p3_or_SME2p3] in {
   defm UABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b1, "uabal">;
 
   // SVE2 integer dot product
-  def SDOT_ZZZ_BtoH : sve_intx_dot<0b01, 0b00000, 0b0, "sdot", ZPR16, ZPR8>;
-  def UDOT_ZZZ_BtoH : sve_intx_dot<0b01, 0b00000, 0b1, "udot", ZPR16, ZPR8>;
+  defm SDOT_ZZZ_BtoH : sve2p3_two_way_dot_vv<"sdot", 0b0, 
int_aarch64_sve_sdot_x2>;
+  defm UDOT_ZZZ_BtoH : sve2p3_two_way_dot_vv<"udot", 0b1, 
int_aarch64_sve_udot_x2>;
 
   def : Pat<(nxv8i16 (partial_reduce_umla nxv8i16:$Acc, nxv16i8:$MulLHS, 
nxv16i8:$MulRHS)),
             (UDOT_ZZZ_BtoH $Acc, $MulLHS, $MulRHS)>;
@@ -4813,8 +4813,8 @@ let Predicates = [HasSVE2p3_or_SME2p3] in {
             (SDOT_ZZZ_BtoH $Acc, $MulLHS, $MulRHS)>;
 
   // SVE2 integer indexed dot product
-  def SDOT_ZZZI_BtoH : sve_intx_dot_by_indexed_elem_x<0b0, "sdot">;
-  def UDOT_ZZZI_BtoH : sve_intx_dot_by_indexed_elem_x<0b1, "udot">;
+  defm SDOT_ZZZI_BtoH : sve2p3_two_way_dot_vvi<"sdot", 0b0, 
int_aarch64_sve_sdot_lane_x2>;
+  defm UDOT_ZZZI_BtoH : sve2p3_two_way_dot_vvi<"udot", 0b1, 
int_aarch64_sve_udot_lane_x2>;
 
   // SVE2 fp convert, narrow and interleave to integer, rounding toward zero
   defm FCVTZSN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzsn", 0b0>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td 
b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 8a3f52090ab4c..e411c221fe7f5 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -3821,6 +3821,12 @@ multiclass sve2p1_two_way_dot_vv<string mnemonic, bit u, 
SDPatternOperator intri
   def : SVE_3_Op_Pat<nxv4i32, intrinsic, nxv4i32, nxv8i16, nxv8i16, 
!cast<Instruction>(NAME)>;
 }
 
+multiclass sve2p3_two_way_dot_vv<string mnemonic, bit u, SDPatternOperator 
intrinsic> {
+  def NAME : sve_intx_dot<0b01, 0b00000, u, mnemonic, ZPR16, ZPR8>;
+
+  def : SVE_3_Op_Pat<nxv8i16, intrinsic, nxv8i16, nxv16i8, nxv16i8, 
!cast<Instruction>(NAME)>;
+}
+
 
//===----------------------------------------------------------------------===//
 // SVE Integer Dot Product Group - Indexed Group
 
//===----------------------------------------------------------------------===//
@@ -10015,6 +10021,12 @@ multiclass sve2p1_two_way_dot_vvi<string mnemonic, bit 
u, SDPatternOperator intr
   def : SVE_4_Op_Imm_Pat<nxv4i32, intrinsic, nxv4i32, nxv8i16, nxv8i16, i32, 
VectorIndexS32b_timm, !cast<Instruction>(NAME)>;
 }
 
+multiclass sve2p3_two_way_dot_vvi<string mnemonic, bit u, SDPatternOperator 
intrinsic> {
+  def NAME : sve_intx_dot_by_indexed_elem_x<u, mnemonic>;
+
+  def : SVE_4_Op_Imm_Pat<nxv8i16, intrinsic, nxv8i16, nxv16i8, nxv16i8, i32, 
VectorIndexH32b_timm, !cast<Instruction>(NAME)>;
+}
+
 class sve2p1_ptrue_pn<string mnemonic, bits<2> sz, PNRP8to15RegOp pnrty, 
SDPatternOperator op>
     : I<(outs pnrty:$PNd), (ins ), mnemonic, "\t$PNd",
         "", [(set pnrty:$PNd, (op))]>, Sched<[]> {
diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-dots.ll 
b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-dots.ll
new file mode 100644
index 0000000000000..4636ffb122d6b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-dots.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | 
FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme,+sve2p3 -force-streaming < 
%s | FileCheck %s
+
+define <vscale x 8 x i16> @sdot_x2(<vscale x 8 x i16> %zda, <vscale x 16 x i8> 
%zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: sdot_x2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sdot z0.h, z1.b, z2.b
+; CHECK-NEXT:    ret
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sdot.x2.nxv8i16(<vscale x 8 
x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+  ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 8 x i16> @udot_x2(<vscale x 8 x i16> %zda, <vscale x 16 x i8> 
%zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: udot_x2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    udot z0.h, z1.b, z2.b
+; CHECK-NEXT:    ret
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.udot.x2.nxv8i16(<vscale x 8 
x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+  ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 8 x i16> @sdot_lane_x2(<vscale x 8 x i16> %zda, <vscale x 16 
x i8> %zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: sdot_lane_x2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sdot z0.h, z1.b, z2.b[7]
+; CHECK-NEXT:    ret
+  %out = call <vscale x 8 x i16> 
@llvm.aarch64.sve.sdot.lane.x2.nxv8i16(<vscale x 8 x i16> %zda, <vscale x 16 x 
i8> %zn, <vscale x 16 x i8> %zm, i32 7)
+  ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 8 x i16> @udot_lane_x2(<vscale x 8 x i16> %zda, <vscale x 16 
x i8> %zn, <vscale x 16 x i8> %zm) {
+; CHECK-LABEL: udot_lane_x2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    udot z0.h, z1.b, z2.b[7]
+; CHECK-NEXT:    ret
+  %out = call <vscale x 8 x i16> 
@llvm.aarch64.sve.udot.lane.x2.nxv8i16(<vscale x 8 x i16> %zda, <vscale x 16 x 
i8> %zn, <vscale x 16 x i8> %zm, i32 7)
+  ret <vscale x 8 x i16> %out
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sdot.x2.nxv8i16(<vscale x 8 x 
i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.udot.x2.nxv8i16(<vscale x 8 x 
i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sdot.lane.x2.nxv8i16(<vscale x 8 
x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.udot.lane.x2.nxv8i16(<vscale x 8 
x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm, i32)

>From eb9d580c6b394c9d8895f35c791996d3064ad6da Mon Sep 17 00:00:00 2001
From: Marian Lukac <[email protected]>
Date: Wed, 1 Apr 2026 16:03:15 +0000
Subject: [PATCH 2/2] Fix tests

---
 .../AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c  | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c 
b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c
index e32ec95f4b6c8..8ad4fec2aae52 100644
--- a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c
@@ -8,9 +8,7 @@
 
 #include <arm_sve.h>
 
-#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
-#define ATTR __arm_streaming_compatible
-#elif defined(__ARM_FEATURE_SME)
+#if defined(__ARM_FEATURE_SME)
 #define ATTR __arm_streaming
 #else
 #define ATTR
@@ -18,9 +16,9 @@
 
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
-#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1
 #else
-#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
 #endif
 
 // CHECK-LABEL: @test_svdot_s16_x2(
@@ -35,7 +33,7 @@
 //
 svint16_t test_svdot_s16_x2(svint16_t op1, svint8_t op2, svint8_t op3) ATTR
 {
-  return SVE_ACLE_FUNC(svdot,_s16_s8,)(op1, op2, op3);
+  return SVE_ACLE_FUNC(svdot,_s16_s8)(op1, op2, op3);
 }
 
 // CHECK-LABEL: @test_svdot_u16_x2(
@@ -50,7 +48,7 @@ svint16_t test_svdot_s16_x2(svint16_t op1, svint8_t op2, 
svint8_t op3) ATTR
 //
 svuint16_t test_svdot_u16_x2(svuint16_t op1, svuint8_t op2, svuint8_t op3) ATTR
 {
-  return SVE_ACLE_FUNC(svdot,_u16_u8,)(op1, op2, op3);
+  return SVE_ACLE_FUNC(svdot,_u16_u8)(op1, op2, op3);
 }
 
 // CHECK-LABEL: @test_svdot_lane_s16_x2(
@@ -65,7 +63,7 @@ svuint16_t test_svdot_u16_x2(svuint16_t op1, svuint8_t op2, 
svuint8_t op3) ATTR
 //
 svint16_t test_svdot_lane_s16_x2(svint16_t op1, svint8_t op2, svint8_t op3) 
ATTR
 {
-  return SVE_ACLE_FUNC(svdot_lane,_s16_s8,)(op1, op2, op3, 7);
+  return SVE_ACLE_FUNC(svdot_lane,_s16_s8)(op1, op2, op3, 7);
 }
 
 // CHECK-LABEL: @test_svdot_lane_u16_x2(
@@ -80,5 +78,5 @@ svint16_t test_svdot_lane_s16_x2(svint16_t op1, svint8_t op2, 
svint8_t op3) ATTR
 //
 svuint16_t test_svdot_lane_u16_x2(svuint16_t op1, svuint8_t op2, svuint8_t 
op3) ATTR
 {
-  return SVE_ACLE_FUNC(svdot_lane,_u16_u8,)(op1, op2, op3, 7);
+  return SVE_ACLE_FUNC(svdot_lane,_u16_u8)(op1, op2, op3, 7);
 }

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AARCH64] Add intrinsic support for new s/udot intrinsics (PR #189424)

Reply via email to