https://github.com/Lukacma updated https://github.com/llvm/llvm-project/pull/189424
>From 4eef064266de835a8ff7079c4059db5cc5b38af1 Mon Sep 17 00:00:00 2001 From: Marian Lukac <[email protected]> Date: Mon, 30 Mar 2026 16:23:08 +0000 Subject: [PATCH 1/2] [AARCH64] Add intrinsic support for new fdot intrinsics --- clang/include/clang/Basic/arm_sve.td | 8 + .../sve2p3-intrinsics/acle_sve2p3_dot.c | 84 +++++++++ ...e2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c | 58 +++++++ ...sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c | 160 ++++++++++++++++++ .../acle_sve2p3_imm.cpp | 14 ++ .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 8 +- llvm/lib/Target/AArch64/SVEInstrFormats.td | 12 ++ .../CodeGen/AArch64/sve2p3-intrinsics-dots.ll | 46 +++++ 8 files changed, 386 insertions(+), 4 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c create mode 100644 clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c create mode 100644 clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c create mode 100644 clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp create mode 100644 llvm/test/CodeGen/AArch64/sve2p3-intrinsics-dots.ll diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index be3cd8a76503b..336c83bfbcdf5 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2476,3 +2476,11 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2p2" in { def FMUL_X2 : SInst<"svmul[_{d}_x2]", "222", "hfd", MergeNone, "aarch64_sve_fmul_x2", [IsStreaming], []>; def FMUL_X4 : SInst<"svmul[_{d}_x4]", "444", "hfd", MergeNone, "aarch64_sve_fmul_x4", [IsStreaming], []>; } + +let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in { + def SVDOT_X2_SH : SInst<"svdot[_{d}_{2}]", "ddhh", "s", MergeNone, "aarch64_sve_sdot_x2", [VerifyRuntimeMode], []>; + def SVDOT_X2_UH : SInst<"svdot[_{d}_{2}]", "ddhh", "Us", MergeNone, "aarch64_sve_udot_x2", [VerifyRuntimeMode], []>; + + def SVDOT_LANE_X2_SH : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "s", MergeNone, "aarch64_sve_sdot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>; + def SVDOT_LANE_X2_UH : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "Us", MergeNone, "aarch64_sve_udot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>; +} \ No newline at end of file diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c new file mode 100644 index 0000000000000..e32ec95f4b6c8 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c @@ -0,0 +1,84 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2p3 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include <arm_sve.h> + +#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE) +#define ATTR __arm_streaming_compatible +#elif defined(__ARM_FEATURE_SME) +#define ATTR __arm_streaming +#else +#define ATTR +#endif + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: @test_svdot_s16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sdot.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svdot_s16_x2u11__SVInt16_tu10__SVInt8_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sdot.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +svint16_t test_svdot_s16_x2(svint16_t op1, svint8_t op2, svint8_t op3) ATTR +{ + return SVE_ACLE_FUNC(svdot,_s16_s8,)(op1, op2, op3); +} + +// CHECK-LABEL: @test_svdot_u16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.udot.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svdot_u16_x2u12__SVUint16_tu11__SVUint8_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.udot.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +svuint16_t test_svdot_u16_x2(svuint16_t op1, svuint8_t op2, svuint8_t op3) ATTR +{ + return SVE_ACLE_FUNC(svdot,_u16_u8,)(op1, op2, op3); +} + +// CHECK-LABEL: @test_svdot_lane_s16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sdot.lane.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]], i32 7) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z22test_svdot_lane_s16_x2u11__SVInt16_tu10__SVInt8_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sdot.lane.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]], i32 7) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +svint16_t test_svdot_lane_s16_x2(svint16_t op1, svint8_t op2, svint8_t op3) ATTR +{ + return SVE_ACLE_FUNC(svdot_lane,_s16_s8,)(op1, op2, op3, 7); +} + +// CHECK-LABEL: @test_svdot_lane_u16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.udot.lane.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]], i32 7) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z22test_svdot_lane_u16_x2u12__SVUint16_tu11__SVUint8_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.udot.lane.x2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]], i32 7) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +svuint16_t test_svdot_lane_u16_x2(svuint16_t op1, svuint8_t op2, svuint8_t op3) ATTR +{ + return SVE_ACLE_FUNC(svdot_lane,_u16_u8,)(op1, op2, op3, 7); +} diff --git a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c new file mode 100644 index 0000000000000..40750dbbb86c8 --- /dev/null +++ b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c @@ -0,0 +1,58 @@ +// NOTE: File has been autogenerated by utils/aarch64_builtins_test_generator.py +// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +sve2p3 -verify +// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3 -target-feature +sve -verify +// expected-no-diagnostics + +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +// Properties: guard="sve,(sve2p3|sme2p3)" streaming_guard="sme,(sve2p3|sme2p3)" flags="feature-dependent" + +void test(void) { + svint8_t svint8_t_val; + svint16_t svint16_t_val; + svuint8_t svuint8_t_val; + svuint16_t svuint16_t_val; + + svdot(svint16_t_val, svint8_t_val, svint8_t_val); + svdot(svuint16_t_val, svuint8_t_val, svuint8_t_val); + svdot_lane(svint16_t_val, svint8_t_val, svint8_t_val, 2); + svdot_lane(svuint16_t_val, svuint8_t_val, svuint8_t_val, 2); + svdot_lane_s16_s8(svint16_t_val, svint8_t_val, svint8_t_val, 2); + svdot_lane_u16_u8(svuint16_t_val, svuint8_t_val, svuint8_t_val, 2); + svdot_s16_s8(svint16_t_val, svint8_t_val, svint8_t_val); + svdot_u16_u8(svuint16_t_val, svuint8_t_val, svuint8_t_val); +} + +void test_streaming(void) __arm_streaming{ + svint8_t svint8_t_val; + svint16_t svint16_t_val; + svuint8_t svuint8_t_val; + svuint16_t svuint16_t_val; + + svdot(svint16_t_val, svint8_t_val, svint8_t_val); + svdot(svuint16_t_val, svuint8_t_val, svuint8_t_val); + svdot_lane(svint16_t_val, svint8_t_val, svint8_t_val, 2); + svdot_lane(svuint16_t_val, svuint8_t_val, svuint8_t_val, 2); + svdot_lane_s16_s8(svint16_t_val, svint8_t_val, svint8_t_val, 2); + svdot_lane_u16_u8(svuint16_t_val, svuint8_t_val, svuint8_t_val, 2); + svdot_s16_s8(svint16_t_val, svint8_t_val, svint8_t_val); + svdot_u16_u8(svuint16_t_val, svuint8_t_val, svuint8_t_val); +} + +void test_streaming_compatible(void) __arm_streaming_compatible{ + svint8_t svint8_t_val; + svint16_t svint16_t_val; + svuint8_t svuint8_t_val; + svuint16_t svuint16_t_val; + + svdot(svint16_t_val, svint8_t_val, svint8_t_val); + svdot(svuint16_t_val, svuint8_t_val, svuint8_t_val); + svdot_lane(svint16_t_val, svint8_t_val, svint8_t_val, 2); + svdot_lane(svuint16_t_val, svuint8_t_val, svuint8_t_val, 2); + svdot_lane_s16_s8(svint16_t_val, svint8_t_val, svint8_t_val, 2); + svdot_lane_u16_u8(svuint16_t_val, svuint8_t_val, svuint8_t_val, 2); + svdot_s16_s8(svint16_t_val, svint8_t_val, svint8_t_val); + svdot_u16_u8(svuint16_t_val, svuint8_t_val, svuint8_t_val); +} diff --git a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c new file mode 100644 index 0000000000000..9c31ebde4f7f8 --- /dev/null +++ b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve-aes2___sme_AND_sve-aes2_AND_ssve-aes.c @@ -0,0 +1,160 @@ +// NOTE: File has been autogenerated by utils/aarch64_builtins_test_generator.py +// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +sve-aes2 -verify=guard +// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +ssve-aes -target-feature +sve -target-feature +sve-aes2 -verify +// expected-no-diagnostics + +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +// Properties: guard="sve,sve-aes2" streaming_guard="sme,sve-aes2,ssve-aes" flags="feature-dependent" + +void test(void) { + svuint8_t svuint8_t_val; + svuint8x2_t svuint8x2_t_val; + svuint8x4_t svuint8x4_t_val; + svuint64_t svuint64_t_val; + svuint64x2_t svuint64x2_t_val; + uint64_t uint64_t_val; + + svaesd_lane(svuint8x2_t_val, svuint8_t_val, 2); + svaesd_lane(svuint8x4_t_val, svuint8_t_val, 2); + svaesd_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + svaesd_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + svaesdimc_lane(svuint8x2_t_val, svuint8_t_val, 2); + svaesdimc_lane(svuint8x4_t_val, svuint8_t_val, 2); + svaesdimc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + svaesdimc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + svaese_lane(svuint8x2_t_val, svuint8_t_val, 2); + svaese_lane(svuint8x4_t_val, svuint8_t_val, 2); + svaese_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + svaese_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + svaesemc_lane(svuint8x2_t_val, svuint8_t_val, 2); + svaesemc_lane(svuint8x4_t_val, svuint8_t_val, 2); + svaesemc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + svaesemc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + svpmlal_pair(svuint64x2_t_val, svuint64_t_val, svuint64_t_val); + svpmlal_pair(svuint64x2_t_val, svuint64_t_val, uint64_t_val); + svpmlal_pair_n_u64_x2(svuint64x2_t_val, svuint64_t_val, uint64_t_val); + svpmlal_pair_u64_x2(svuint64x2_t_val, svuint64_t_val, svuint64_t_val); + svpmull_pair(svuint64_t_val, svuint64_t_val); + svpmull_pair(svuint64_t_val, uint64_t_val); + svpmull_pair_n_u64_x2(svuint64_t_val, uint64_t_val); + svpmull_pair_u64_x2(svuint64_t_val, svuint64_t_val); +} + +void test_streaming(void) __arm_streaming{ + svuint8_t svuint8_t_val; + svuint8x2_t svuint8x2_t_val; + svuint8x4_t svuint8x4_t_val; + svuint64_t svuint64_t_val; + svuint64x2_t svuint64x2_t_val; + uint64_t uint64_t_val; + + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesd_lane(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesd_lane(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesd_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesd_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesdimc_lane(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesdimc_lane(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesdimc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesdimc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaese_lane(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaese_lane(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaese_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaese_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesemc_lane(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesemc_lane(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesemc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesemc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmlal_pair(svuint64x2_t_val, svuint64_t_val, svuint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmlal_pair(svuint64x2_t_val, svuint64_t_val, uint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmlal_pair_n_u64_x2(svuint64x2_t_val, svuint64_t_val, uint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmlal_pair_u64_x2(svuint64x2_t_val, svuint64_t_val, svuint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmull_pair(svuint64_t_val, svuint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmull_pair(svuint64_t_val, uint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmull_pair_n_u64_x2(svuint64_t_val, uint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmull_pair_u64_x2(svuint64_t_val, svuint64_t_val); +} + +void test_streaming_compatible(void) __arm_streaming_compatible{ + svuint8_t svuint8_t_val; + svuint8x2_t svuint8x2_t_val; + svuint8x4_t svuint8x4_t_val; + svuint64_t svuint64_t_val; + svuint64x2_t svuint64x2_t_val; + uint64_t uint64_t_val; + + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesd_lane(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesd_lane(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesd_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesd_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesdimc_lane(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesdimc_lane(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesdimc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesdimc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaese_lane(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaese_lane(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaese_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaese_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesemc_lane(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesemc_lane(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesemc_lane_u8_x2(svuint8x2_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svaesemc_lane_u8_x4(svuint8x4_t_val, svuint8_t_val, 2); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmlal_pair(svuint64x2_t_val, svuint64_t_val, svuint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmlal_pair(svuint64x2_t_val, svuint64_t_val, uint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmlal_pair_n_u64_x2(svuint64x2_t_val, svuint64_t_val, uint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmlal_pair_u64_x2(svuint64x2_t_val, svuint64_t_val, svuint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmull_pair(svuint64_t_val, svuint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmull_pair(svuint64_t_val, uint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmull_pair_n_u64_x2(svuint64_t_val, uint64_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svpmull_pair_u64_x2(svuint64_t_val, svuint64_t_val); +} diff --git a/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp new file mode 100644 index 0000000000000..e0004effa48da --- /dev/null +++ b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp @@ -0,0 +1,14 @@ +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -fsyntax-only -verify %s + +#include <arm_sve.h> + +void test_svdot_lane_x2_imm_0_7(svint16_t s16, svuint16_t u16, svint8_t s8, + svuint8_t u8) { + svdot_lane_s16_s8(s16, s8, s8, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + svdot_lane_u16_u8(u16, u8, u8, -1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + + svdot_lane_s16_s8(s16, s8, s8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + svdot_lane_u16_u8(u16, u8, u8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} +} diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index c5a3bd504adf9..1255fbe73a5b7 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4804,8 +4804,8 @@ let Predicates = [HasSVE2p3_or_SME2p3] in { defm UABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b1, "uabal">; // SVE2 integer dot product - def SDOT_ZZZ_BtoH : sve_intx_dot<0b01, 0b00000, 0b0, "sdot", ZPR16, ZPR8>; - def UDOT_ZZZ_BtoH : sve_intx_dot<0b01, 0b00000, 0b1, "udot", ZPR16, ZPR8>; + defm SDOT_ZZZ_BtoH : sve2p3_two_way_dot_vv<"sdot", 0b0, int_aarch64_sve_sdot_x2>; + defm UDOT_ZZZ_BtoH : sve2p3_two_way_dot_vv<"udot", 0b1, int_aarch64_sve_udot_x2>; def : Pat<(nxv8i16 (partial_reduce_umla nxv8i16:$Acc, nxv16i8:$MulLHS, nxv16i8:$MulRHS)), (UDOT_ZZZ_BtoH $Acc, $MulLHS, $MulRHS)>; @@ -4813,8 +4813,8 @@ let Predicates = [HasSVE2p3_or_SME2p3] in { (SDOT_ZZZ_BtoH $Acc, $MulLHS, $MulRHS)>; // SVE2 integer indexed dot product - def SDOT_ZZZI_BtoH : sve_intx_dot_by_indexed_elem_x<0b0, "sdot">; - def UDOT_ZZZI_BtoH : sve_intx_dot_by_indexed_elem_x<0b1, "udot">; + defm SDOT_ZZZI_BtoH : sve2p3_two_way_dot_vvi<"sdot", 0b0, int_aarch64_sve_sdot_lane_x2>; + defm UDOT_ZZZI_BtoH : sve2p3_two_way_dot_vvi<"udot", 0b1, int_aarch64_sve_udot_lane_x2>; // SVE2 fp convert, narrow and interleave to integer, rounding toward zero defm FCVTZSN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzsn", 0b0>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 8a3f52090ab4c..e411c221fe7f5 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -3821,6 +3821,12 @@ multiclass sve2p1_two_way_dot_vv<string mnemonic, bit u, SDPatternOperator intri def : SVE_3_Op_Pat<nxv4i32, intrinsic, nxv4i32, nxv8i16, nxv8i16, !cast<Instruction>(NAME)>; } +multiclass sve2p3_two_way_dot_vv<string mnemonic, bit u, SDPatternOperator intrinsic> { + def NAME : sve_intx_dot<0b01, 0b00000, u, mnemonic, ZPR16, ZPR8>; + + def : SVE_3_Op_Pat<nxv8i16, intrinsic, nxv8i16, nxv16i8, nxv16i8, !cast<Instruction>(NAME)>; +} + //===----------------------------------------------------------------------===// // SVE Integer Dot Product Group - Indexed Group //===----------------------------------------------------------------------===// @@ -10015,6 +10021,12 @@ multiclass sve2p1_two_way_dot_vvi<string mnemonic, bit u, SDPatternOperator intr def : SVE_4_Op_Imm_Pat<nxv4i32, intrinsic, nxv4i32, nxv8i16, nxv8i16, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME)>; } +multiclass sve2p3_two_way_dot_vvi<string mnemonic, bit u, SDPatternOperator intrinsic> { + def NAME : sve_intx_dot_by_indexed_elem_x<u, mnemonic>; + + def : SVE_4_Op_Imm_Pat<nxv8i16, intrinsic, nxv8i16, nxv16i8, nxv16i8, i32, VectorIndexH32b_timm, !cast<Instruction>(NAME)>; +} + class sve2p1_ptrue_pn<string mnemonic, bits<2> sz, PNRP8to15RegOp pnrty, SDPatternOperator op> : I<(outs pnrty:$PNd), (ins ), mnemonic, "\t$PNd", "", [(set pnrty:$PNd, (op))]>, Sched<[]> { diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-dots.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-dots.ll new file mode 100644 index 0000000000000..4636ffb122d6b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-dots.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme,+sve2p3 -force-streaming < %s | FileCheck %s + +define <vscale x 8 x i16> @sdot_x2(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) { +; CHECK-LABEL: sdot_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: sdot z0.h, z1.b, z2.b +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sdot.x2.nxv8i16(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) + ret <vscale x 8 x i16> %out +} + +define <vscale x 8 x i16> @udot_x2(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) { +; CHECK-LABEL: udot_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: udot z0.h, z1.b, z2.b +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.udot.x2.nxv8i16(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) + ret <vscale x 8 x i16> %out +} + +define <vscale x 8 x i16> @sdot_lane_x2(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) { +; CHECK-LABEL: sdot_lane_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: sdot z0.h, z1.b, z2.b[7] +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sdot.lane.x2.nxv8i16(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm, i32 7) + ret <vscale x 8 x i16> %out +} + +define <vscale x 8 x i16> @udot_lane_x2(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) { +; CHECK-LABEL: udot_lane_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: udot z0.h, z1.b, z2.b[7] +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.udot.lane.x2.nxv8i16(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm, i32 7) + ret <vscale x 8 x i16> %out +} + +declare <vscale x 8 x i16> @llvm.aarch64.sve.sdot.x2.nxv8i16(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) +declare <vscale x 8 x i16> @llvm.aarch64.sve.udot.x2.nxv8i16(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) +declare <vscale x 8 x i16> @llvm.aarch64.sve.sdot.lane.x2.nxv8i16(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.udot.lane.x2.nxv8i16(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm, i32) >From eb9d580c6b394c9d8895f35c791996d3064ad6da Mon Sep 17 00:00:00 2001 From: Marian Lukac <[email protected]> Date: Wed, 1 Apr 2026 16:03:15 +0000 Subject: [PATCH 2/2] Fix tests --- .../AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c index e32ec95f4b6c8..8ad4fec2aae52 100644 --- a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_dot.c @@ -8,9 +8,7 @@ #include <arm_sve.h> -#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE) -#define ATTR __arm_streaming_compatible -#elif defined(__ARM_FEATURE_SME) +#if defined(__ARM_FEATURE_SME) #define ATTR __arm_streaming #else #define ATTR @@ -18,9 +16,9 @@ #ifdef SVE_OVERLOADED_FORMS // A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 #else -#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#define SVE_ACLE_FUNC(A1,A2) A1##A2 #endif // CHECK-LABEL: @test_svdot_s16_x2( @@ -35,7 +33,7 @@ // svint16_t test_svdot_s16_x2(svint16_t op1, svint8_t op2, svint8_t op3) ATTR { - return SVE_ACLE_FUNC(svdot,_s16_s8,)(op1, op2, op3); + return SVE_ACLE_FUNC(svdot,_s16_s8)(op1, op2, op3); } // CHECK-LABEL: @test_svdot_u16_x2( @@ -50,7 +48,7 @@ svint16_t test_svdot_s16_x2(svint16_t op1, svint8_t op2, svint8_t op3) ATTR // svuint16_t test_svdot_u16_x2(svuint16_t op1, svuint8_t op2, svuint8_t op3) ATTR { - return SVE_ACLE_FUNC(svdot,_u16_u8,)(op1, op2, op3); + return SVE_ACLE_FUNC(svdot,_u16_u8)(op1, op2, op3); } // CHECK-LABEL: @test_svdot_lane_s16_x2( @@ -65,7 +63,7 @@ svuint16_t test_svdot_u16_x2(svuint16_t op1, svuint8_t op2, svuint8_t op3) ATTR // svint16_t test_svdot_lane_s16_x2(svint16_t op1, svint8_t op2, svint8_t op3) ATTR { - return SVE_ACLE_FUNC(svdot_lane,_s16_s8,)(op1, op2, op3, 7); + return SVE_ACLE_FUNC(svdot_lane,_s16_s8)(op1, op2, op3, 7); } // CHECK-LABEL: @test_svdot_lane_u16_x2( @@ -80,5 +78,5 @@ svint16_t test_svdot_lane_s16_x2(svint16_t op1, svint8_t op2, svint8_t op3) ATTR // svuint16_t test_svdot_lane_u16_x2(svuint16_t op1, svuint8_t op2, svuint8_t op3) ATTR { - return SVE_ACLE_FUNC(svdot_lane,_u16_u8,)(op1, op2, op3, 7); + return SVE_ACLE_FUNC(svdot_lane,_u16_u8)(op1, op2, op3, 7); } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
