https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88105
>From 74ee4857a76bc7eb5353dc22311e766ec5356514 Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH 1/3] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 +++++ .../acle_sme2_mopa_nonwide.c | 97 +++++++++++++++++++ .../acle_sme2_mopa_nonwide.c | 34 +++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 3 + .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td | 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 ++++++++ 7 files changed, 219 insertions(+), 7 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..a18a5094a15ed3 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + +//////////////////////////////////////////////////////////////////////////////// +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme2,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mopa", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mops", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h", + MergeNone, "aarch64_sme_mopa", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h", + MergeNone, "aarch64_sme_mops", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00000000000000..40fcad6a576483 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -o /dev/null %s + +// REQUIRES: aarch64-registered-target + +#include <arm_sme.h> + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: define dso_local void @test_svmopa_za16_bf16( +// CHECK-SAME: <vscale x 16 x i1> [[PN:%.*]], <vscale x 16 x i1> [[PM:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv8bf16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]]) +// CHECK-NEXT: ret void +// +// CHECK-CXX-LABEL: define dso_local void @_Z21test_svmopa_za16_bf16u10__SVBool_tS_u14__SVBfloat16_tS0_( +// CHECK-CXX-SAME: <vscale x 16 x i1> [[PN:%.*]], <vscale x 16 x i1> [[PM:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-CXX-NEXT: entry: +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]]) +// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv8bf16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]]) +// CHECK-CXX-NEXT: ret void +// +void test_svmopa_za16_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmopa_za16, _bf16, _m)(0, pn, pm, zn, zm); +} + +// CHECK-LABEL: define dso_local void @test_svmops_za16_bf16( +// CHECK-SAME: <vscale x 16 x i1> [[PN:%.*]], <vscale x 16 x i1> [[PM:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.nxv8bf16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]]) +// CHECK-NEXT: ret void +// +// CHECK-CXX-LABEL: define dso_local void @_Z21test_svmops_za16_bf16u10__SVBool_tS_u14__SVBfloat16_tS0_( +// CHECK-CXX-SAME: <vscale x 16 x i1> [[PN:%.*]], <vscale x 16 x i1> [[PM:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-NEXT: entry: +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]]) +// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.nxv8bf16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]]) +// CHECK-CXX-NEXT: ret void +// +void test_svmops_za16_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmops_za16, _bf16, _m)(0, pn, pm, zn, zm); +} + +// CHECK-LABEL: define dso_local void @test_svmopa_za16_f16( +// CHECK-SAME: <vscale x 16 x i1> [[PN:%.*]], <vscale x 16 x i1> [[PM:%.*]], <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv8f16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]]) +// CHECK-NEXT: ret void +// +// CHECK-CXX-LABEL: define dso_local void @_Z20test_svmopa_za16_f16u10__SVBool_tS_u13__SVFloat16_tS0_( +// CHECK-CXX-SAME: <vscale x 16 x i1> [[PN:%.*]], <vscale x 16 x i1> [[PM:%.*]], <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-NEXT: entry: +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]]) +// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv8f16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]]) +// CHECK-CXX-NEXT: ret void +// +void test_svmopa_za16_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmopa_za16, _f16, _m)(0, pn, pm, zn, zm); +} + +// CHECK-LABEL: define dso_local void @test_svmops_za16_f16( +// CHECK-SAME: <vscale x 16 x i1> [[PN:%.*]], <vscale x 16 x i1> [[PM:%.*]], <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.nxv8f16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]]) +// CHECK-NEXT: ret void +// +// CHECK-CXX-LABEL: define dso_local void @_Z20test_svmops_za16_f16u10__SVBool_tS_u13__SVFloat16_tS0_( +// CHECK-CXX-SAME: <vscale x 16 x i1> [[PN:%.*]], <vscale x 16 x i1> [[PM:%.*]], <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-NEXT: entry: +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]]) +// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.nxv8f16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]]) +// CHECK-CXX-NEXT: ret void +// +void test_svmops_za16_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmops_za16, _f16, _m)(0, pn, pm, zn, zm); +} diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00000000000000..2d7064dea5ebe8 --- /dev/null +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -fsyntax-only -verify -emit-llvm %s + +// REQUIRES: aarch64-registered-target + +#include <arm_sme.h> + +void test_features(svbool_t pn, svbool_t pm, + svfloat16_t zn, svfloat16_t zm, + svbfloat16_t znb, svbfloat16_t zmb) + __arm_streaming __arm_inout("za") { +// expected-error@+1 {{'svmopa_za16_bf16_m' needs target feature sme2,b16b16}} + svmopa_za16_bf16_m(0, pn, pm, znb, zmb); +// expected-error@+1 {{'svmops_za16_bf16_m' needs target feature sme2,b16b16}} + svmops_za16_bf16_m(0, pn, pm, znb, zmb); +// expected-error@+1 {{'svmopa_za16_f16_m' needs target feature sme2p1,sme-f16f16}} + svmopa_za16_f16_m(0, pn, pm, zn, zm); +// expected-error@+1 {{'svmops_za16_f16_m' needs target feature sme2p1,sme-f16f16}} + svmops_za16_f16_m(0, pn, pm, zn, zm); +} + +void test_imm(svbool_t pn, svbool_t pm, + svfloat16_t zn, svfloat16_t zm, + svbfloat16_t znb, svbfloat16_t zmb) + __arm_streaming __arm_inout("za") { +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svmopa_za16_bf16_m(-1, pn, pm, znb, zmb); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svmops_za16_bf16_m(-1, pn, pm, znb, zmb); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svmopa_za16_f16_m(-1, pn, pm, zn, zm); +// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svmops_za16_f16_m(-1, pn, pm, zn, zm); +} + diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index e31e00a9c76f31..e0630a6649dd7a 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3649,3 +3649,6 @@ def int_aarch64_sve_pmov_to_pred_lane_zero : SVE2_1VectorArg_Pred_Intrinsic; def int_aarch64_sve_pmov_to_vector_lane_merging : SVE2_Pred_1VectorArgIndexed_Intrinsic; def int_aarch64_sve_pmov_to_vector_lane_zeroing : SVE2_Pred_1VectorArg_Intrinsic; + +def int_aarch64_sme_mopa_nonwide : SME_OuterProduct_Intrinsic; +def int_aarch64_sme_mops_nonwide : SME_OuterProduct_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 574178c8d5244c..1341754efe3c7d 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -817,8 +817,8 @@ defm FMLS_VG4_M4Z2Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b0100011, defm FCVT_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvt", 0b0>; defm FCVTL_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvtl", 0b1>; -defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, ZPR16>; -defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, 0b11, ZPR16>; +defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, nxv8f16, int_aarch64_sme_mopa>; +defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, 0b11, nxv8f16, int_aarch64_sme_mops>; } let Predicates = [HasSME2, HasB16B16] in { @@ -865,8 +865,8 @@ defm BFMINNM_VG4_4Z2Z : sme2p1_bf_max_min_vector_vg4_multi<"bfminnm", 0b0010011 defm BFCLAMP_VG2_2ZZZ: sme2p1_bfclamp_vector_vg2_multi<"bfclamp">; defm BFCLAMP_VG4_4ZZZ: sme2p1_bfclamp_vector_vg4_multi<"bfclamp">; -defm BFMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmopa", 0b1, 0b0, 0b11, ZPR16>; -defm BFMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmops", 0b1, 0b1, 0b11, ZPR16>; +defm BFMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmopa", 0b1, 0b0, 0b11, nxv8bf16, int_aarch64_sme_mopa>; +defm BFMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmops", 0b1, 0b1, 0b11, nxv8bf16, int_aarch64_sme_mops>; } let Predicates = [HasSME2, HasFP8] in { @@ -928,7 +928,7 @@ defm FMLAL_VG4_M4ZZ_BtoH : sme2_fp_mla_long_array_vg4_single<"fmlal", 0b001, M defm FMLAL_VG2_M2Z2Z_BtoH : sme2_fp_mla_long_array_vg2_multi<"fmlal", 0b100, MatrixOp16, ZZ_b_mul_r, nxv16i8, null_frag>; defm FMLAL_VG4_M4Z4Z_BtoH : sme2_fp_mla_long_array_vg4_multi<"fmlal", 0b100, MatrixOp16, ZZZZ_b_mul_r, nxv16i8, null_frag>; -defm FMOPA_MPPZZ_BtoH : sme2p1_fmop_tile_fp16<"fmopa", 0b1, 0b0, 0b01, ZPR8>; +defm FMOPA_MPPZZ_BtoH : sme2p1_fmop_tile_f8f16<"fmopa", 0b1, 0b0, 0b01>; } //[HasSMEF8F16] diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 3363aab4b093cc..ad197e2b273574 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -286,14 +286,26 @@ multiclass sme_outer_product_fp64<bit S, string mnemonic, SDPatternOperator op> def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_7, nxv2i1, nxv2f64>; } -multiclass sme2p1_fmop_tile_fp16<string mnemonic, bit bf, bit s, bits<2> op, ZPRRegOp zpr_ty>{ - def NAME : sme_fp_outer_product_inst<s, {0,bf}, op, TileOp16, zpr_ty, mnemonic> { +multiclass sme2p1_fmop_tile_f8f16<string mnemonic, bit bf, bit s, bits<2> op> { + def NAME : sme_fp_outer_product_inst<s, {0,bf}, op, TileOp16, ZPR8, mnemonic> { bits<1> ZAda; let Inst{2-1} = 0b00; let Inst{0} = ZAda; } } +multiclass sme2p1_fmop_tile_fp16<string mnemonic, bit bf, bit s, bits<2> op, ValueType vt, SDPatternOperator intrinsic = null_frag> { + def NAME : sme_fp_outer_product_inst<s, {0,bf}, op, TileOp16, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1> { + bits<1> ZAda; + let Inst{2-1} = 0b00; + let Inst{0} = ZAda; + } + + def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileH>, SMEPseudo2Instr<NAME, 0>; + + def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_1, nxv8i1, vt>; +} + class sme_int_outer_product_inst<bits<3> opc, bit sz, bit sme2, MatrixTileOperand za_ty, ZPRRegOp zpr_ty, string mnemonic> diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll new file mode 100644 index 00000000000000..fa0fd43607020f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -verify-machineinstrs < %s | FileCheck %s + +target triple = "aarch64-linux" + +define void @mopa_bf16(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) #0 { +; CHECK-LABEL: mopa_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: bfmopa za0.h, p0/m, p1/m, z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mopa.nxv8bf16(i32 0, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) + ret void +} + +define void @mopa_f16(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x half> %zn, <vscale x 8 x half> %zm) #0 { +; CHECK-LABEL: mopa_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fmopa za1.h, p0/m, p1/m, z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mopa.nxv8f16(i32 1, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x half> %zn, <vscale x 8 x half> %zm) + ret void +} + +define void @mops_bf16(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) #0 { +; CHECK-LABEL: mops_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: bfmops za0.h, p0/m, p1/m, z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mops.nxv8bf16(i32 0, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) + ret void +} + +define void @mops_f16(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x half> %zn, <vscale x 8 x half> %zm) #0 { +; CHECK-LABEL: mops_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fmops za1.h, p0/m, p1/m, z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mops.nxv8f16(i32 1, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x half> %zn, <vscale x 8 x half> %zm) + ret void +} + +attributes #0 = {nounwind "target-features" = "+sme,+sme2p1,+bf16,+sme-f16f16,+b16b16" } >From d59152e76b7c230644f2014adc15c968b09567c0 Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Thu, 18 Apr 2024 18:20:41 +0100 Subject: [PATCH 2/3] [fixup] Remove a TableGen template parameter which only takes a single value --- llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td | 8 ++++---- llvm/lib/Target/AArch64/SMEInstrFormats.td | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 1341754efe3c7d..97d84bcffffa86 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -817,8 +817,8 @@ defm FMLS_VG4_M4Z2Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b0100011, defm FCVT_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvt", 0b0>; defm FCVTL_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvtl", 0b1>; -defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, nxv8f16, int_aarch64_sme_mopa>; -defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, 0b11, nxv8f16, int_aarch64_sme_mops>; +defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, nxv8f16, int_aarch64_sme_mopa>; +defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, nxv8f16, int_aarch64_sme_mops>; } let Predicates = [HasSME2, HasB16B16] in { @@ -865,8 +865,8 @@ defm BFMINNM_VG4_4Z2Z : sme2p1_bf_max_min_vector_vg4_multi<"bfminnm", 0b0010011 defm BFCLAMP_VG2_2ZZZ: sme2p1_bfclamp_vector_vg2_multi<"bfclamp">; defm BFCLAMP_VG4_4ZZZ: sme2p1_bfclamp_vector_vg4_multi<"bfclamp">; -defm BFMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmopa", 0b1, 0b0, 0b11, nxv8bf16, int_aarch64_sme_mopa>; -defm BFMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmops", 0b1, 0b1, 0b11, nxv8bf16, int_aarch64_sme_mops>; +defm BFMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmopa", 0b1, 0b0, nxv8bf16, int_aarch64_sme_mopa>; +defm BFMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmops", 0b1, 0b1, nxv8bf16, int_aarch64_sme_mops>; } let Predicates = [HasSME2, HasFP8] in { diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index ad197e2b273574..e7c2010907b4af 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -294,8 +294,8 @@ multiclass sme2p1_fmop_tile_f8f16<string mnemonic, bit bf, bit s, bits<2> op> { } } -multiclass sme2p1_fmop_tile_fp16<string mnemonic, bit bf, bit s, bits<2> op, ValueType vt, SDPatternOperator intrinsic = null_frag> { - def NAME : sme_fp_outer_product_inst<s, {0,bf}, op, TileOp16, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1> { +multiclass sme2p1_fmop_tile_fp16<string mnemonic, bit bf, bit s, ValueType vt, SDPatternOperator intrinsic = null_frag> { + def NAME : sme_fp_outer_product_inst<s, {0,bf}, 0b11, TileOp16, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1> { bits<1> ZAda; let Inst{2-1} = 0b00; let Inst{0} = ZAda; >From eda76d5887e3c80ea5c4d25941d7eb232520d0a7 Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Mon, 29 Apr 2024 18:03:36 +0100 Subject: [PATCH 3/3] [fixup] Fix target guard of f16f16 intrinsics - they don't require SME2.1 --- clang/include/clang/Basic/arm_sme.td | 2 +- .../Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index a18a5094a15ed3..7d292b29630936 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -688,7 +688,7 @@ let TargetGuard = "sme2,b16b16" in { [ImmCheck<0, ImmCheck0_1>]>; } -let TargetGuard = "sme2p1,sme-f16f16" in { +let TargetGuard = "sme-f16f16" in { def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h", MergeNone, "aarch64_sme_mopa", [IsStreaming, IsInOutZA], diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c index 2d7064dea5ebe8..b7b11da4fbc25f 100644 --- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -12,9 +12,9 @@ void test_features(svbool_t pn, svbool_t pm, svmopa_za16_bf16_m(0, pn, pm, znb, zmb); // expected-error@+1 {{'svmops_za16_bf16_m' needs target feature sme2,b16b16}} svmops_za16_bf16_m(0, pn, pm, znb, zmb); -// expected-error@+1 {{'svmopa_za16_f16_m' needs target feature sme2p1,sme-f16f16}} +// expected-error@+1 {{'svmopa_za16_f16_m' needs target feature sme-f16f16}} svmopa_za16_f16_m(0, pn, pm, zn, zm); -// expected-error@+1 {{'svmops_za16_f16_m' needs target feature sme2p1,sme-f16f16}} +// expected-error@+1 {{'svmops_za16_f16_m' needs target feature sme-f16f16}} svmops_za16_f16_m(0, pn, pm, zn, zm); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits