Author: Jonathan Thackray Date: 2026-06-12T16:47:16+01:00 New Revision: 7814cc9454cf454acdc7385fc41809a6bdf5d7f8
URL: https://github.com/llvm/llvm-project/commit/7814cc9454cf454acdc7385fc41809a6bdf5d7f8 DIFF: https://github.com/llvm/llvm-project/commit/7814cc9454cf454acdc7385fc41809a6bdf5d7f8.diff LOG: [AArch64][clang][llvm] Add ACLE Armv9.7 lookup table intrinsics (#187046) Add support for the following Armv9.7-A Lookup Table (lut) instruction intrinsics, as defined in the ACLE[1]: SVE2.3: ```c // Variants are also available for: _u8 _mf8 svint8_t svluti6[_s8](svint8x2_t table, svuint8_t indices); ``` SVE2.3 and SME2.3: ``` c // Variants are also available for _u16_x2 and _f16_x2. svint16_t svluti6_lane[_s16_x2](svint16x2_t table, svuint8_t indices, uint64_t imm_idx); ``` SME2.3: ```c // Variants are also available for: _u16, _f16 and _bf16. svint16x4_t svluti6_lane_s16_x4[_s16_x2](svint16x2_t table, svuint8x2_t indices, uint64_t imm_idx); // Variants are also available for: _u8 and _mf8. svint8x4_t svluti6_zt_s8_x4(uint64_t zt0, svuint8x3_t zn) __arm_streaming __arm_in("zt0"); // Variants are also available for: _u8 and _mf8. svint8_t svluti6_zt_s8(uint64_t zt0, svuint8_t zn) __arm_streaming __arm_in("zt0"); ``` [1] https://github.com/ARM-software/acle/pull/428/ Added: clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6_lane_x2.c clang/test/Sema/AArch64/arm_sme_streaming_only_sme_AND_sme2p3.c clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve2p3___sme_AND_LP_sve2p3_OR_sme2p3_RP.c clang/test/Sema/AArch64/arm_sve_non_streaming_only_sve_AND_sve2p3.c clang/test/Sema/AArch64/arm_sve_streaming_only_sme_AND_sme2p3.c clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c llvm/test/CodeGen/AArch64/sme2p3-intrinsics-luti6.ll llvm/test/CodeGen/AArch64/sve2p3-intrinsics-luti6.ll Modified: clang/include/clang/Basic/arm_sme.td clang/include/clang/Basic/arm_sve.td clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/lib/Target/AArch64/SMEInstrFormats.td llvm/lib/Target/AArch64/SVEInstrFormats.td Removed: ################################################################################ diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 032c588966032..c79e6e2ae1f9a 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -981,6 +981,11 @@ let SMETargetGuard = "sme-lutv2" in { def SVLUTI4_ZT_X4 : SInst<"svluti4_zt_{d}_x4", "4i2.u", "cUc", MergeNone, "aarch64_sme_luti4_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>]>; } +let SMETargetGuard = "sme2p3" in { + def SVLUTI6_ZT : SInst<"svluti6_zt_{d}", "di[", "cUcm", MergeNone, "aarch64_sme_luti6_zt", [IsOverloadNone, IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>]>; + def SVLUTI6_ZT_X4 : SInst<"svluti6_zt_{d}_x4", "4i3.[", "cUcm", MergeNone, "aarch64_sme_luti6_zt_x4", [IsOverloadNone, IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>]>; +} + let SMETargetGuard = "sme-f8f32" in { def SVMOPA_FP8_ZA32 : Inst<"svmopa_za32[_mf8]_m", "viPPdd>", "m", MergeNone, "aarch64_sme_fp8_fmopa_za32", [IsStreaming, IsInOutZA, IsOverloadNone], [ImmCheck<0, ImmCheck0_3>]>; diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 25f42cbcac64e..91111001703c3 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1919,6 +1919,19 @@ let SVETargetGuard = "(sve2|sme2),lut", SMETargetGuard = "sme2,lut" in { def SVLUTI4_x2 : SInst<"svluti4_lane[_{d}_x2]", "d2.d[i", "sUshb", MergeNone, "aarch64_sve_luti4_lane_x2", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>; } +let SVETargetGuard = "sve2p3", SMETargetGuard = InvalidMode in { + def SVLUTI6 : SInst<"svluti6[_{d}_x2]", "d2[", "cUcm", MergeNone, "aarch64_sve_luti6", [IsOverloadNone]>; +} + +let SVETargetGuard = "sve2p3", SMETargetGuard = "sve2p3|sme2p3" in { + def SVLUTI6_x2 : SInst<"svluti6_lane[_{d}_x2]", "d2.d[i", "sUshb", MergeNone, "aarch64_sve_luti6_lane_x2", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_1>]>; +} + +let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2p3" in { + def SVLUTI6_X4_U8X2 : SInst<"svluti6_lane_{d}_x4[_{d}_x2_u8_x2]", "422.[i", "sUshb", MergeNone, "aarch64_sme_luti6_lane_x4_x2", [IsStreaming], [ImmCheck<2, ImmCheck0_1>]>; + def SVLUTI6_X4_U8X3 : SInst<"svluti6_lane_{d}_x4[_{d}_x2_u8_x3]", "423.[i", "sUshb", MergeNone, "aarch64_sme_luti6_lane_x4_x3", [IsStreaming], [ImmCheck<2, ImmCheck0_1>]>; +} + //////////////////////////////////////////////////////////////////////////////// // SVE2 - Optional diff --git a/clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c b/clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c new file mode 100644 index 0000000000000..656b0ce565833 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c @@ -0,0 +1,251 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -S -O1 -Werror -o /dev/null %s + +#include <arm_sme.h> + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2,A3_UNUSED,A4_UNUSED) A1##A2 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @test_svluti6_lane_s16_x4( +// CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.lane.x4.x2.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], i32 1) +// CHECK-NEXT: ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @_Z24test_svluti6_lane_s16_x411svint16x2_t11svuint8x2_t( +// CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.lane.x4.x2.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], i32 1) +// CPP-CHECK-NEXT: ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]] +// +svint16x4_t test_svluti6_lane_s16_x4(svint16x2_t table, svuint8x2_t indices) + __arm_streaming { + return SVE_ACLE_FUNC(svluti6_lane,_s16_x4,_s16_x2_u8_x2,)(table, indices, 1); +} + +// CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @test_svluti6_lane_u16_x4( +// CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.lane.x4.x2.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], i32 0) +// CHECK-NEXT: ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @_Z24test_svluti6_lane_u16_x412svuint16x2_t11svuint8x2_t( +// CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.lane.x4.x2.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]] +// +svuint16x4_t test_svluti6_lane_u16_x4(svuint16x2_t table, svuint8x2_t indices) + __arm_streaming { + return SVE_ACLE_FUNC(svluti6_lane,_u16_x4,_u16_x2_u8_x2,)(table, indices, 0); +} + +// CHECK-LABEL: define dso_local { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @test_svluti6_lane_f16_x4( +// CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x half> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.luti6.lane.x4.x2.nxv8f16(<vscale x 8 x half> [[TABLE_COERCE0]], <vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], i32 1) +// CHECK-NEXT: ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @_Z24test_svluti6_lane_f16_x413svfloat16x2_t11svuint8x2_t( +// CPP-CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x half> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.luti6.lane.x4.x2.nxv8f16(<vscale x 8 x half> [[TABLE_COERCE0]], <vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], i32 1) +// CPP-CHECK-NEXT: ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]] +// +svfloat16x4_t test_svluti6_lane_f16_x4(svfloat16x2_t table, svuint8x2_t indices) + __arm_streaming { + return SVE_ACLE_FUNC(svluti6_lane,_f16_x4,_f16_x2_u8_x2,)(table, indices, 1); +} + +// CHECK-LABEL: define dso_local { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @test_svluti6_lane_bf16_x4( +// CHECK-SAME: <vscale x 8 x bfloat> [[TABLE_COERCE0:%.*]], <vscale x 8 x bfloat> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.luti6.lane.x4.x2.nxv8bf16(<vscale x 8 x bfloat> [[TABLE_COERCE0]], <vscale x 8 x bfloat> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], i32 0) +// CHECK-NEXT: ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @_Z25test_svluti6_lane_bf16_x414svbfloat16x2_t11svuint8x2_t( +// CPP-CHECK-SAME: <vscale x 8 x bfloat> [[TABLE_COERCE0:%.*]], <vscale x 8 x bfloat> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.luti6.lane.x4.x2.nxv8bf16(<vscale x 8 x bfloat> [[TABLE_COERCE0]], <vscale x 8 x bfloat> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]] +// +svbfloat16x4_t test_svluti6_lane_bf16_x4(svbfloat16x2_t table, svuint8x2_t indices) + __arm_streaming { + return SVE_ACLE_FUNC(svluti6_lane,_bf16_x4,_bf16_x2_u8_x2,)(table, indices, 0); +} + +// CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @test_svluti6_lane_s16_x4_u8_x3( +// CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.lane.x4.x3.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]], i32 1) +// CHECK-NEXT: ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @_Z30test_svluti6_lane_s16_x4_u8_x311svint16x2_t11svuint8x3_t( +// CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.lane.x4.x3.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]], i32 1) +// CPP-CHECK-NEXT: ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]] +// +svint16x4_t test_svluti6_lane_s16_x4_u8_x3(svint16x2_t table, svuint8x3_t indices) + __arm_streaming { + return SVE_ACLE_FUNC(svluti6_lane,_s16_x4,_s16_x2_u8_x3,)(table, indices, 1); +} + +// CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @test_svluti6_lane_u16_x4_u8_x3( +// CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.lane.x4.x3.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]], i32 0) +// CHECK-NEXT: ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @_Z30test_svluti6_lane_u16_x4_u8_x312svuint16x2_t11svuint8x3_t( +// CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.lane.x4.x3.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]] +// +svuint16x4_t test_svluti6_lane_u16_x4_u8_x3(svuint16x2_t table, svuint8x3_t indices) + __arm_streaming { + return SVE_ACLE_FUNC(svluti6_lane,_u16_x4,_u16_x2_u8_x3,)(table, indices, 0); +} + +// CHECK-LABEL: define dso_local { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @test_svluti6_lane_f16_x4_u8_x3( +// CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x half> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.luti6.lane.x4.x3.nxv8f16(<vscale x 8 x half> [[TABLE_COERCE0]], <vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]], i32 1) +// CHECK-NEXT: ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @_Z30test_svluti6_lane_f16_x4_u8_x313svfloat16x2_t11svuint8x3_t( +// CPP-CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x half> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.luti6.lane.x4.x3.nxv8f16(<vscale x 8 x half> [[TABLE_COERCE0]], <vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]], i32 1) +// CPP-CHECK-NEXT: ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]] +// +svfloat16x4_t test_svluti6_lane_f16_x4_u8_x3(svfloat16x2_t table, svuint8x3_t indices) + __arm_streaming { + return SVE_ACLE_FUNC(svluti6_lane,_f16_x4,_f16_x2_u8_x3,)(table, indices, 1); +} + +// CHECK-LABEL: define dso_local { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @test_svluti6_lane_bf16_x4_u8_x3( +// CHECK-SAME: <vscale x 8 x bfloat> [[TABLE_COERCE0:%.*]], <vscale x 8 x bfloat> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.luti6.lane.x4.x3.nxv8bf16(<vscale x 8 x bfloat> [[TABLE_COERCE0]], <vscale x 8 x bfloat> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]], i32 0) +// CHECK-NEXT: ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @_Z31test_svluti6_lane_bf16_x4_u8_x314svbfloat16x2_t11svuint8x3_t( +// CPP-CHECK-SAME: <vscale x 8 x bfloat> [[TABLE_COERCE0:%.*]], <vscale x 8 x bfloat> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.luti6.lane.x4.x3.nxv8bf16(<vscale x 8 x bfloat> [[TABLE_COERCE0]], <vscale x 8 x bfloat> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]] +// +svbfloat16x4_t test_svluti6_lane_bf16_x4_u8_x3(svbfloat16x2_t table, svuint8x3_t indices) + __arm_streaming { + return SVE_ACLE_FUNC(svluti6_lane,_bf16_x4,_bf16_x2_u8_x3,)(table, indices, 0); +} + +// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_zt_s8( +// CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z18test_svluti6_zt_s8u11__SVUint8_t( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svint8_t test_svluti6_zt_s8(svuint8_t indices) __arm_streaming __arm_in("zt0") { + return svluti6_zt_s8(0, indices); +} + +// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_zt_u8( +// CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z18test_svluti6_zt_u8u11__SVUint8_t( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svuint8_t test_svluti6_zt_u8(svuint8_t indices) __arm_streaming __arm_in("zt0") { + return svluti6_zt_u8(0, indices); +} + +// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_zt_mf8( +// CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svluti6_zt_mf8u11__SVUint8_t( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_svluti6_zt_mf8(svuint8_t indices) __arm_streaming __arm_in("zt0") { + return svluti6_zt_mf8(0, indices); +} + +// CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_svluti6_zt_u8_x4( +// CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]]) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @_Z21test_svluti6_zt_u8_x411svuint8x3_t( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]]) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x4_t test_svluti6_zt_u8_x4(svuint8x3_t indices) + __arm_streaming __arm_in("zt0") { + return svluti6_zt_u8_x4(0, indices); +} + +// CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_svluti6_zt_s8_x4( +// CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]]) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @_Z21test_svluti6_zt_s8_x411svuint8x3_t( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]]) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svint8x4_t test_svluti6_zt_s8_x4(svuint8x3_t indices) + __arm_streaming __arm_in("zt0") { + return svluti6_zt_s8_x4(0, indices); +} + +// CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_svluti6_zt_mf8_x4( +// CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]]) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @_Z22test_svluti6_zt_mf8_x411svuint8x3_t( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]]) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svmfloat8x4_t test_svluti6_zt_mf8_x4(svuint8x3_t indices) + __arm_streaming __arm_in("zt0") { + return svluti6_zt_mf8_x4(0, indices); +} diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c new file mode 100644 index 0000000000000..11f0848af1c07 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c @@ -0,0 +1,64 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -S -O1 -Werror -o /dev/null %s + +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1, A2) A1##A2 +#endif + +// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_s8_x2( +// CHECK-SAME: <vscale x 16 x i8> [[TABLE_COERCE0:%.*]], <vscale x 16 x i8> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti6(<vscale x 16 x i8> [[TABLE_COERCE0]], <vscale x 16 x i8> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z18test_svluti6_s8_x210svint8x2_tu11__SVUint8_t( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[TABLE_COERCE0:%.*]], <vscale x 16 x i8> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti6(<vscale x 16 x i8> [[TABLE_COERCE0]], <vscale x 16 x i8> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svint8_t test_svluti6_s8_x2(svint8x2_t table, svuint8_t indices) { + return SVE_ACLE_FUNC(svluti6, _s8_x2)(table, indices); +} + +// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_u8_x2( +// CHECK-SAME: <vscale x 16 x i8> [[TABLE_COERCE0:%.*]], <vscale x 16 x i8> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti6(<vscale x 16 x i8> [[TABLE_COERCE0]], <vscale x 16 x i8> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z18test_svluti6_u8_x211svuint8x2_tu11__SVUint8_t( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[TABLE_COERCE0:%.*]], <vscale x 16 x i8> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti6(<vscale x 16 x i8> [[TABLE_COERCE0]], <vscale x 16 x i8> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svuint8_t test_svluti6_u8_x2(svuint8x2_t table, svuint8_t indices) { + return SVE_ACLE_FUNC(svluti6, _u8_x2)(table, indices); +} + +// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_mf8_x2( +// CHECK-SAME: <vscale x 16 x i8> [[TABLE_COERCE0:%.*]], <vscale x 16 x i8> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti6(<vscale x 16 x i8> [[TABLE_COERCE0]], <vscale x 16 x i8> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svluti6_mf8_x213svmfloat8x2_tu11__SVUint8_t( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[TABLE_COERCE0:%.*]], <vscale x 16 x i8> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti6(<vscale x 16 x i8> [[TABLE_COERCE0]], <vscale x 16 x i8> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_svluti6_mf8_x2(svmfloat8x2_t table, svuint8_t indices) { + return SVE_ACLE_FUNC(svluti6, _mf8_x2)(table, indices); +} diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6_lane_x2.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6_lane_x2.c new file mode 100644 index 0000000000000..b6d8fe5cff531 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6_lane_x2.c @@ -0,0 +1,138 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSTREAMING_MODE -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +sme -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s --check-prefix=STREAM-CHECK +// RUN: %clang_cc1 -DSTREAMING_MODE -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +sme -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=STREAM-CPP-CHECK +// RUN: %clang_cc1 -DSTREAMING_MODE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +sme -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s --check-prefix=STREAM-CHECK +// RUN: %clang_cc1 -DSTREAMING_MODE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +sme -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=STREAM-CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -S -O1 -Werror -o /dev/null %s + +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1, A2) A1##A2 +#endif + +#ifdef STREAMING_MODE +#define STREAMING_ATTR __arm_streaming +#else +#define STREAMING_ATTR +#endif + +// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svluti6_lane_s16_x2( +// CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti6.lane.x2.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 1) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +// STREAM-CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svluti6_lane_s16_x2( +// STREAM-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// STREAM-CHECK-NEXT: [[ENTRY:.*:]] +// STREAM-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti6.lane.x2.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 1) +// STREAM-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z24test_svluti6_lane_s16_x211svint16x2_tu11__SVUint8_t( +// CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti6.lane.x2.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 1) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +// STREAM-CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z24test_svluti6_lane_s16_x211svint16x2_tu11__SVUint8_t( +// STREAM-CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// STREAM-CPP-CHECK-NEXT: [[ENTRY:.*:]] +// STREAM-CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti6.lane.x2.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 1) +// STREAM-CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +svint16_t test_svluti6_lane_s16_x2(svint16x2_t table, svuint8_t indices) STREAMING_ATTR { + return SVE_ACLE_FUNC(svluti6_lane, _s16_x2)(table, indices, 1); +} + +// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svluti6_lane_u16_x2( +// CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti6.lane.x2.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 0) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +// STREAM-CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svluti6_lane_u16_x2( +// STREAM-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0]] { +// STREAM-CHECK-NEXT: [[ENTRY:.*:]] +// STREAM-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti6.lane.x2.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 0) +// STREAM-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z24test_svluti6_lane_u16_x212svuint16x2_tu11__SVUint8_t( +// CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti6.lane.x2.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 0) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +// STREAM-CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z24test_svluti6_lane_u16_x212svuint16x2_tu11__SVUint8_t( +// STREAM-CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0]] { +// STREAM-CPP-CHECK-NEXT: [[ENTRY:.*:]] +// STREAM-CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti6.lane.x2.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 0) +// STREAM-CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +svuint16_t test_svluti6_lane_u16_x2(svuint16x2_t table, svuint8_t indices) STREAMING_ATTR { + return SVE_ACLE_FUNC(svluti6_lane, _u16_x2)(table, indices, 0); +} + +// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svluti6_lane_f16_x2( +// CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x half> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.luti6.lane.x2.nxv8f16(<vscale x 8 x half> [[TABLE_COERCE0]], <vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 1) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +// STREAM-CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svluti6_lane_f16_x2( +// STREAM-CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x half> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0]] { +// STREAM-CHECK-NEXT: [[ENTRY:.*:]] +// STREAM-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.luti6.lane.x2.nxv8f16(<vscale x 8 x half> [[TABLE_COERCE0]], <vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 1) +// STREAM-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z24test_svluti6_lane_f16_x213svfloat16x2_tu11__SVUint8_t( +// CPP-CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x half> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.luti6.lane.x2.nxv8f16(<vscale x 8 x half> [[TABLE_COERCE0]], <vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 1) +// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +// STREAM-CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z24test_svluti6_lane_f16_x213svfloat16x2_tu11__SVUint8_t( +// STREAM-CPP-CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x half> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0]] { +// STREAM-CPP-CHECK-NEXT: [[ENTRY:.*:]] +// STREAM-CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.luti6.lane.x2.nxv8f16(<vscale x 8 x half> [[TABLE_COERCE0]], <vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 1) +// STREAM-CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +svfloat16_t test_svluti6_lane_f16_x2(svfloat16x2_t table, svuint8_t indices) STREAMING_ATTR { + return SVE_ACLE_FUNC(svluti6_lane, _f16_x2)(table, indices, 1); +} + +// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svluti6_lane_bf16_x2( +// CHECK-SAME: <vscale x 8 x bfloat> [[TABLE_COERCE0:%.*]], <vscale x 8 x bfloat> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti6.lane.x2.nxv8bf16(<vscale x 8 x bfloat> [[TABLE_COERCE0]], <vscale x 8 x bfloat> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 0) +// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]] +// +// STREAM-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svluti6_lane_bf16_x2( +// STREAM-CHECK-SAME: <vscale x 8 x bfloat> [[TABLE_COERCE0:%.*]], <vscale x 8 x bfloat> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0]] { +// STREAM-CHECK-NEXT: [[ENTRY:.*:]] +// STREAM-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti6.lane.x2.nxv8bf16(<vscale x 8 x bfloat> [[TABLE_COERCE0]], <vscale x 8 x bfloat> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 0) +// STREAM-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z25test_svluti6_lane_bf16_x214svbfloat16x2_tu11__SVUint8_t( +// CPP-CHECK-SAME: <vscale x 8 x bfloat> [[TABLE_COERCE0:%.*]], <vscale x 8 x bfloat> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti6.lane.x2.nxv8bf16(<vscale x 8 x bfloat> [[TABLE_COERCE0]], <vscale x 8 x bfloat> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 0) +// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]] +// +// STREAM-CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z25test_svluti6_lane_bf16_x214svbfloat16x2_tu11__SVUint8_t( +// STREAM-CPP-CHECK-SAME: <vscale x 8 x bfloat> [[TABLE_COERCE0:%.*]], <vscale x 8 x bfloat> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr #[[ATTR0]] { +// STREAM-CPP-CHECK-NEXT: [[ENTRY:.*:]] +// STREAM-CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti6.lane.x2.nxv8bf16(<vscale x 8 x bfloat> [[TABLE_COERCE0]], <vscale x 8 x bfloat> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 0) +// STREAM-CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]] +// +svbfloat16_t test_svluti6_lane_bf16_x2(svbfloat16x2_t table, svuint8_t indices) STREAMING_ATTR { + return SVE_ACLE_FUNC(svluti6_lane, _bf16_x2)(table, indices, 0); +} diff --git a/clang/test/Sema/AArch64/arm_sme_streaming_only_sme_AND_sme2p3.c b/clang/test/Sema/AArch64/arm_sme_streaming_only_sme_AND_sme2p3.c new file mode 100644 index 0000000000000..2ab8d4d0c4120 --- /dev/null +++ b/clang/test/Sema/AArch64/arm_sme_streaming_only_sme_AND_sme2p3.c @@ -0,0 +1,56 @@ +// NOTE: File has been autogenerated by utils/aarch64_builtins_test_generator.py +// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -target-feature +sve -verify=streaming-guard + +// REQUIRES: aarch64-registered-target + +#include <arm_sme.h> + +// Properties: guard="" streaming_guard="sme,sme2p3" flags="streaming-only,requires-zt" + +void test(void) __arm_inout("zt0"){ + svuint8_t svuint8_t_val; + svuint8x3_t svuint8x3_t_val; + + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_zt_mf8(0, svuint8_t_val); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_zt_mf8_x4(0, svuint8x3_t_val); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_zt_s8(0, svuint8_t_val); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_zt_s8_x4(0, svuint8x3_t_val); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_zt_u8(0, svuint8_t_val); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_zt_u8_x4(0, svuint8x3_t_val); +} + +void test_streaming(void) __arm_streaming __arm_inout("zt0"){ + svuint8_t svuint8_t_val; + svuint8x3_t svuint8x3_t_val; + + svluti6_zt_mf8(0, svuint8_t_val); + svluti6_zt_mf8_x4(0, svuint8x3_t_val); + svluti6_zt_s8(0, svuint8_t_val); + svluti6_zt_s8_x4(0, svuint8x3_t_val); + svluti6_zt_u8(0, svuint8_t_val); + svluti6_zt_u8_x4(0, svuint8x3_t_val); +} + +void test_streaming_compatible(void) __arm_streaming_compatible __arm_inout("zt0"){ + svuint8_t svuint8_t_val; + svuint8x3_t svuint8x3_t_val; + + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_zt_mf8(0, svuint8_t_val); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_zt_mf8_x4(0, svuint8x3_t_val); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_zt_s8(0, svuint8_t_val); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_zt_s8_x4(0, svuint8x3_t_val); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_zt_u8(0, svuint8_t_val); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_zt_u8_x4(0, svuint8x3_t_val); +} diff --git a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve2p3___sme_AND_LP_sve2p3_OR_sme2p3_RP.c b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve2p3___sme_AND_LP_sve2p3_OR_sme2p3_RP.c new file mode 100644 index 0000000000000..1918990b4153e --- /dev/null +++ b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve2p3___sme_AND_LP_sve2p3_OR_sme2p3_RP.c @@ -0,0 +1,77 @@ +// NOTE: File has been autogenerated by utils/aarch64_builtins_test_generator.py +// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3 -target-feature +sve -verify=streaming-guard +// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +sve2p3 -verify +// expected-no-diagnostics + +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +// Properties: guard="sve,sve2p3" streaming_guard="sme,(sve2p3|sme2p3)" flags="feature-dependent" + +void test(void) { + svbfloat16x2_t svbfloat16x2_t_val; + svfloat16x2_t svfloat16x2_t_val; + svint16x2_t svint16x2_t_val; + svuint8_t svuint8_t_val; + svuint16x2_t svuint16x2_t_val; + + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane(svbfloat16x2_t_val, svuint8_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane(svfloat16x2_t_val, svuint8_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane(svint16x2_t_val, svuint8_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane(svuint16x2_t_val, svuint8_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_bf16_x2(svbfloat16x2_t_val, svuint8_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_f16_x2(svfloat16x2_t_val, svuint8_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_s16_x2(svint16x2_t_val, svuint8_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_u16_x2(svuint16x2_t_val, svuint8_t_val, 1); +} + +void test_streaming(void) __arm_streaming{ + svbfloat16x2_t svbfloat16x2_t_val; + svfloat16x2_t svfloat16x2_t_val; + svint16x2_t svint16x2_t_val; + svuint8_t svuint8_t_val; + svuint16x2_t svuint16x2_t_val; + + svluti6_lane(svbfloat16x2_t_val, svuint8_t_val, 1); + svluti6_lane(svfloat16x2_t_val, svuint8_t_val, 1); + svluti6_lane(svint16x2_t_val, svuint8_t_val, 1); + svluti6_lane(svuint16x2_t_val, svuint8_t_val, 1); + svluti6_lane_bf16_x2(svbfloat16x2_t_val, svuint8_t_val, 1); + svluti6_lane_f16_x2(svfloat16x2_t_val, svuint8_t_val, 1); + svluti6_lane_s16_x2(svint16x2_t_val, svuint8_t_val, 1); + svluti6_lane_u16_x2(svuint16x2_t_val, svuint8_t_val, 1); +} + +void test_streaming_compatible(void) __arm_streaming_compatible{ + svbfloat16x2_t svbfloat16x2_t_val; + svfloat16x2_t svfloat16x2_t_val; + svint16x2_t svint16x2_t_val; + svuint8_t svuint8_t_val; + svuint16x2_t svuint16x2_t_val; + + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane(svbfloat16x2_t_val, svuint8_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane(svfloat16x2_t_val, svuint8_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane(svint16x2_t_val, svuint8_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane(svuint16x2_t_val, svuint8_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_bf16_x2(svbfloat16x2_t_val, svuint8_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_f16_x2(svfloat16x2_t_val, svuint8_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_s16_x2(svint16x2_t_val, svuint8_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_u16_x2(svuint16x2_t_val, svuint8_t_val, 1); +} diff --git a/clang/test/Sema/AArch64/arm_sve_non_streaming_only_sve_AND_sve2p3.c b/clang/test/Sema/AArch64/arm_sve_non_streaming_only_sve_AND_sve2p3.c new file mode 100644 index 0000000000000..ebf06311b8939 --- /dev/null +++ b/clang/test/Sema/AArch64/arm_sve_non_streaming_only_sve_AND_sve2p3.c @@ -0,0 +1,62 @@ +// NOTE: File has been autogenerated by utils/aarch64_builtins_test_generator.py +// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +sve2p3 -verify=guard + +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +// Properties: guard="sve,sve2p3" streaming_guard="" flags="" + +void test(void) { + svint8x2_t svint8x2_t_val; + svmfloat8x2_t svmfloat8x2_t_val; + svuint8_t svuint8_t_val; + svuint8x2_t svuint8x2_t_val; + + svluti6(svint8x2_t_val, svuint8_t_val); + svluti6(svmfloat8x2_t_val, svuint8_t_val); + svluti6(svuint8x2_t_val, svuint8_t_val); + svluti6_mf8_x2(svmfloat8x2_t_val, svuint8_t_val); + svluti6_s8_x2(svint8x2_t_val, svuint8_t_val); + svluti6_u8_x2(svuint8x2_t_val, svuint8_t_val); +} + +void test_streaming(void) __arm_streaming{ + svint8x2_t svint8x2_t_val; + svmfloat8x2_t svmfloat8x2_t_val; + svuint8_t svuint8_t_val; + svuint8x2_t svuint8x2_t_val; + + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svluti6(svint8x2_t_val, svuint8_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svluti6(svmfloat8x2_t_val, svuint8_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svluti6(svuint8x2_t_val, svuint8_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svluti6_mf8_x2(svmfloat8x2_t_val, svuint8_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svluti6_s8_x2(svint8x2_t_val, svuint8_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svluti6_u8_x2(svuint8x2_t_val, svuint8_t_val); +} + +void test_streaming_compatible(void) __arm_streaming_compatible{ + svint8x2_t svint8x2_t_val; + svmfloat8x2_t svmfloat8x2_t_val; + svuint8_t svuint8_t_val; + svuint8x2_t svuint8x2_t_val; + + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svluti6(svint8x2_t_val, svuint8_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svluti6(svmfloat8x2_t_val, svuint8_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svluti6(svuint8x2_t_val, svuint8_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svluti6_mf8_x2(svmfloat8x2_t_val, svuint8_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svluti6_s8_x2(svint8x2_t_val, svuint8_t_val); + // guard-error@+1 {{builtin can only be called from a non-streaming function}} + svluti6_u8_x2(svuint8x2_t_val, svuint8_t_val); +} diff --git a/clang/test/Sema/AArch64/arm_sve_streaming_only_sme_AND_sme2p3.c b/clang/test/Sema/AArch64/arm_sve_streaming_only_sme_AND_sme2p3.c new file mode 100644 index 0000000000000..0f88ee7ad7fef --- /dev/null +++ b/clang/test/Sema/AArch64/arm_sve_streaming_only_sme_AND_sme2p3.c @@ -0,0 +1,118 @@ +// NOTE: File has been autogenerated by utils/aarch64_builtins_test_generator.py +// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3 -target-feature +sve -verify=streaming-guard + +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +// Properties: guard="" streaming_guard="sme,sme2p3" flags="streaming-only" + +void test(void) { + svbfloat16x2_t svbfloat16x2_t_val; + svfloat16x2_t svfloat16x2_t_val; + svint16x2_t svint16x2_t_val; + svuint8x2_t svuint8x2_t_val; + svuint8x3_t svuint8x3_t_val; + svuint16x2_t svuint16x2_t_val; + + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_bf16_x4(svbfloat16x2_t_val, svuint8x2_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_bf16_x4(svbfloat16x2_t_val, svuint8x3_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_bf16_x4_bf16_x2_u8_x2(svbfloat16x2_t_val, svuint8x2_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_bf16_x4_bf16_x2_u8_x3(svbfloat16x2_t_val, svuint8x3_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_f16_x4(svfloat16x2_t_val, svuint8x2_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_f16_x4(svfloat16x2_t_val, svuint8x3_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_f16_x4_f16_x2_u8_x2(svfloat16x2_t_val, svuint8x2_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_f16_x4_f16_x2_u8_x3(svfloat16x2_t_val, svuint8x3_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_s16_x4(svint16x2_t_val, svuint8x2_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_s16_x4(svint16x2_t_val, svuint8x3_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_s16_x4_s16_x2_u8_x2(svint16x2_t_val, svuint8x2_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_s16_x4_s16_x2_u8_x3(svint16x2_t_val, svuint8x3_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_u16_x4(svuint16x2_t_val, svuint8x2_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_u16_x4(svuint16x2_t_val, svuint8x3_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_u16_x4_u16_x2_u8_x2(svuint16x2_t_val, svuint8x2_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_u16_x4_u16_x2_u8_x3(svuint16x2_t_val, svuint8x3_t_val, 1); +} + +void test_streaming(void) __arm_streaming{ + svbfloat16x2_t svbfloat16x2_t_val; + svfloat16x2_t svfloat16x2_t_val; + svint16x2_t svint16x2_t_val; + svuint8x2_t svuint8x2_t_val; + svuint8x3_t svuint8x3_t_val; + svuint16x2_t svuint16x2_t_val; + + svluti6_lane_bf16_x4(svbfloat16x2_t_val, svuint8x2_t_val, 1); + svluti6_lane_bf16_x4(svbfloat16x2_t_val, svuint8x3_t_val, 1); + svluti6_lane_bf16_x4_bf16_x2_u8_x2(svbfloat16x2_t_val, svuint8x2_t_val, 1); + svluti6_lane_bf16_x4_bf16_x2_u8_x3(svbfloat16x2_t_val, svuint8x3_t_val, 1); + svluti6_lane_f16_x4(svfloat16x2_t_val, svuint8x2_t_val, 1); + svluti6_lane_f16_x4(svfloat16x2_t_val, svuint8x3_t_val, 1); + svluti6_lane_f16_x4_f16_x2_u8_x2(svfloat16x2_t_val, svuint8x2_t_val, 1); + svluti6_lane_f16_x4_f16_x2_u8_x3(svfloat16x2_t_val, svuint8x3_t_val, 1); + svluti6_lane_s16_x4(svint16x2_t_val, svuint8x2_t_val, 1); + svluti6_lane_s16_x4(svint16x2_t_val, svuint8x3_t_val, 1); + svluti6_lane_s16_x4_s16_x2_u8_x2(svint16x2_t_val, svuint8x2_t_val, 1); + svluti6_lane_s16_x4_s16_x2_u8_x3(svint16x2_t_val, svuint8x3_t_val, 1); + svluti6_lane_u16_x4(svuint16x2_t_val, svuint8x2_t_val, 1); + svluti6_lane_u16_x4(svuint16x2_t_val, svuint8x3_t_val, 1); + svluti6_lane_u16_x4_u16_x2_u8_x2(svuint16x2_t_val, svuint8x2_t_val, 1); + svluti6_lane_u16_x4_u16_x2_u8_x3(svuint16x2_t_val, svuint8x3_t_val, 1); +} + +void test_streaming_compatible(void) __arm_streaming_compatible{ + svbfloat16x2_t svbfloat16x2_t_val; + svfloat16x2_t svfloat16x2_t_val; + svint16x2_t svint16x2_t_val; + svuint8x2_t svuint8x2_t_val; + svuint8x3_t svuint8x3_t_val; + svuint16x2_t svuint16x2_t_val; + + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_bf16_x4(svbfloat16x2_t_val, svuint8x2_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_bf16_x4(svbfloat16x2_t_val, svuint8x3_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_bf16_x4_bf16_x2_u8_x2(svbfloat16x2_t_val, svuint8x2_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_bf16_x4_bf16_x2_u8_x3(svbfloat16x2_t_val, svuint8x3_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_f16_x4(svfloat16x2_t_val, svuint8x2_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_f16_x4(svfloat16x2_t_val, svuint8x3_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_f16_x4_f16_x2_u8_x2(svfloat16x2_t_val, svuint8x2_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_f16_x4_f16_x2_u8_x3(svfloat16x2_t_val, svuint8x3_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_s16_x4(svint16x2_t_val, svuint8x2_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_s16_x4(svint16x2_t_val, svuint8x3_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_s16_x4_s16_x2_u8_x2(svint16x2_t_val, svuint8x2_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_s16_x4_s16_x2_u8_x3(svint16x2_t_val, svuint8x3_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_u16_x4(svuint16x2_t_val, svuint8x2_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_u16_x4(svuint16x2_t_val, svuint8x3_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_u16_x4_u16_x2_u8_x2(svuint16x2_t_val, svuint8x2_t_val, 1); + // streaming-guard-error@+1 {{builtin can only be called from a streaming function}} + svluti6_lane_u16_x4_u16_x2_u8_x3(svuint16x2_t_val, svuint8x3_t_val, 1); +} diff --git a/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c b/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c new file mode 100644 index 0000000000000..25c35fbcbcc7b --- /dev/null +++ b/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c @@ -0,0 +1,21 @@ +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -fsyntax-only -verify %s + +#include <arm_sme.h> + +void test_range_0_0(void) __arm_streaming __arm_in("zt0") { + svluti6_zt_s8(1, svundef_u8()); // expected-error {{argument value 1 is outside the valid range [0, 0]}} + svluti6_zt_u8_x4(1, svcreate3_u8(svundef_u8(), svundef_u8(), svundef_u8())); // expected-error {{argument value 1 is outside the valid range [0, 0]}} +} + +void test_range_0_1(void) __arm_streaming { + svluti6_lane_s16_x4_s16_x2_u8_x2(svcreate2_s16(svundef_s16(), svundef_s16()), // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svcreate2_u8(svundef_u8(), svundef_u8()), -1); + svluti6_lane_u16_x4_u16_x2_u8_x2(svcreate2_u16(svundef_u16(), svundef_u16()), // expected-error {{argument value 2 is outside the valid range [0, 1]}} + svcreate2_u8(svundef_u8(), svundef_u8()), 2); + svluti6_lane_f16_x4_f16_x2_u8_x3(svcreate2_f16(svundef_f16(), svundef_f16()), // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svcreate3_u8(svundef_u8(), svundef_u8(), svundef_u8()), -1); + svluti6_lane_bf16_x4_bf16_x2_u8_x3(svcreate2_bf16(svundef_bf16(), svundef_bf16()), // expected-error {{argument value 2 is outside the valid range [0, 1]}} + svcreate3_u8(svundef_u8(), svundef_u8(), svundef_u8()), 2); +} diff --git a/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp index 60183e346f181..01781da390e0b 100644 --- a/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp +++ b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp @@ -1,10 +1,15 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p3 -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -fsyntax-only -verify %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -fsyntax-only -verify %s #include <arm_sve.h> - +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1, A2) A1##A2 +#endif svint8_t test_svqshrn_n_s8_s16_x2(svint16x2_t zn, uint64_t imm) { @@ -83,7 +88,23 @@ svuint8_t test_svqrshrun_n_u8_s16_x2(svint16x2_t zn, uint64_t imm) svqrshrun_n_u8_s16_x2(zn, 9); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} svqrshrun_n_u8_s16_x2(zn, -1); // expected-error-re {{argument value {{[0-9]+}} is outside the valid range [1, 8]}} - svqrshrun_n_u8_s16_x2(zn, imm); // expected-error-re {{argument to {{.+}} must be a constant integer}}}} + svqrshrun_n_u8_s16_x2(zn, imm); // expected-error-re {{argument to {{.+}} must be a constant integer}}} +} + + +void test_range_0_1() { + // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 1]}} + SVE_ACLE_FUNC(svluti6_lane, _s16_x2)(svcreate2_s16(svundef_s16(), svundef_s16()), + svundef_u8(), -1); + // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 1]}} + SVE_ACLE_FUNC(svluti6_lane, _u16_x2)(svcreate2_u16(svundef_u16(), svundef_u16()), + svundef_u8(), 2); + // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 1]}} + SVE_ACLE_FUNC(svluti6_lane, _f16_x2)(svcreate2_f16(svundef_f16(), svundef_f16()), + svundef_u8(), -1); + // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid range [0, 1]}} + SVE_ACLE_FUNC(svluti6_lane, _bf16_x2)(svcreate2_bf16(svundef_bf16(), svundef_bf16()), + svundef_u8(), 2); } void test_svdot_lane_x2_imm_0_7(svint16_t s16, svuint16_t u16, svint8_t s8, diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index ba0d7c02bf427..ba8e23d3df3db 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1371,13 +1371,28 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". LLVMVectorOfBitcastsToInt<0>], !listconcat(Attrs, [IntrNoMem])>; - class SVE2_LUTI_Inrinsic<list<IntrinsicProperty> Attrs = []> + class SVE2_LUTI_Intrinsic<list<IntrinsicProperty> Attrs = []> : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_i32_ty], !listconcat(Attrs, [IntrNoMem, ImmArg<ArgIndex<2>>])>; + class SVE2_LUTI_X2_Intrinsic<list<IntrinsicProperty> Attrs = []> + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_nxv16i8_ty, + llvm_i32_ty], + !listconcat(Attrs, [IntrNoMem, ImmArg<ArgIndex<3>>])>; + + class SVE2_LUTI6_Intrinsic<list<IntrinsicProperty> Attrs = []> + : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], + [llvm_nxv16i8_ty, + llvm_nxv16i8_ty, + llvm_nxv16i8_ty], + !listconcat(Attrs, [IntrNoMem])>; + class SVE2_1VectorArg_Long_Intrinsic<list<IntrinsicProperty> Attrs = []> : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMSubdivide2VectorType<0>, @@ -2813,14 +2828,11 @@ def int_aarch64_sve_tbx : AdvSIMD_SVE2_TBX_Intrinsic<[IntrSpeculatable]>; // SVE2 - Lookup Table // -def int_aarch64_sve_luti2_lane : SVE2_LUTI_Inrinsic<[IntrSpeculatable]>; -def int_aarch64_sve_luti4_lane : SVE2_LUTI_Inrinsic<[IntrSpeculatable]>; -def int_aarch64_sve_luti4_lane_x2 : DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, - LLVMMatchType<0>, - llvm_nxv16i8_ty, - llvm_i32_ty], - [IntrNoMem, ImmArg<ArgIndex<3>>, IntrSpeculatable]>; +def int_aarch64_sve_luti2_lane : SVE2_LUTI_Intrinsic<[IntrSpeculatable]>; +def int_aarch64_sve_luti4_lane : SVE2_LUTI_Intrinsic<[IntrSpeculatable]>; +def int_aarch64_sve_luti6 : SVE2_LUTI6_Intrinsic<[IntrSpeculatable]>; +def int_aarch64_sve_luti4_lane_x2 : SVE2_LUTI_X2_Intrinsic<[IntrSpeculatable]>; +def int_aarch64_sve_luti6_lane_x2 : SVE2_LUTI_X2_Intrinsic<[IntrSpeculatable]>; // // SVE2 - Optional bit permutation @@ -3980,6 +3992,9 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_luti4_lane_zt : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrInaccessibleMemOnly, IntrReadMem]>; + def int_aarch64_sme_luti6_zt + : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_i32_ty, llvm_nxv16i8_ty], + [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrReadMem]>; // Lookup table expand two registers // @@ -4001,11 +4016,24 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrInaccessibleMemOnly, IntrReadMem]>; + def int_aarch64_sme_luti6_lane_x4_x2 + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], + [ImmArg<ArgIndex<4>>, IntrNoMem, IntrSpeculatable]>; + def int_aarch64_sme_luti6_lane_x4_x3 + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], + [ImmArg<ArgIndex<5>>, IntrNoMem, IntrSpeculatable]>; def int_aarch64_sme_luti4_zt_x4 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrReadMem]>; + def int_aarch64_sme_luti6_zt_x4 + : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty, + llvm_nxv16i8_ty, llvm_nxv16i8_ty], + [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], + [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrReadMem]>; // diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 9c454349dc12d..ade5b9e0e6e70 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -415,8 +415,10 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs, unsigned Opc, uint32_t MaxImm); + void SelectMultiVectorLuti6LaneX4(SDNode *Node, unsigned NumIndexVecs); - void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc); + void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc, + unsigned NumInVecs); template <unsigned MaxIdx, unsigned Scale> bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) { @@ -2271,17 +2273,57 @@ void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node, CurDAG->RemoveDeadNode(Node); } +void AArch64DAGToDAGISel::SelectMultiVectorLuti6LaneX4(SDNode *Node, + unsigned NumIndexVecs) { + assert((NumIndexVecs == 2 || NumIndexVecs == 3) && + "unexpected number of index vectors"); + + constexpr unsigned FirstIndexOp = 3; + unsigned ImmOp = FirstIndexOp + NumIndexVecs; + auto *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(ImmOp)); + if (!Imm || Imm->getZExtValue() > 1) + return; + + // The luti6 instruction always takes a 2-register Zm index tuple. The x3 + // ACLE form provides three index vectors, so the lane selects which adjacent + // pair to use before forming Zm (op 3/4 or op 4/5, with op6 as imm) + unsigned Lane = Imm->getZExtValue(); + unsigned IndexOp = FirstIndexOp; + if (NumIndexVecs == 3) + IndexOp += Lane; + + SDValue TableTuple = createZTuple({Node->getOperand(1), Node->getOperand(2)}); + SDValue IndexTuple = + createZTuple({Node->getOperand(IndexOp), Node->getOperand(IndexOp + 1)}); + SDValue Ops[] = {TableTuple, IndexTuple, Node->getOperand(ImmOp)}; + + SDLoc DL(Node); + EVT VT = Node->getValueType(0); + SDNode *Instruction = + CurDAG->getMachineNode(AArch64::LUTI6_4Z2Z2ZI, DL, MVT::Untyped, Ops); + SDValue SuperReg = SDValue(Instruction, 0); + + for (unsigned I = 0; I < 4; ++I) + ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg( + AArch64::zsub0 + I, DL, VT, SuperReg)); + + CurDAG->RemoveDeadNode(Node); +} + void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, - unsigned Opc) { + unsigned Opc, + unsigned NumInVecs) { + assert((NumInVecs == 2 || NumInVecs == 3) && + "unexpected number of input vectors"); + SDValue ZtValue; if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue)) return; - SDValue Chain = Node->getOperand(0); - SDValue Ops[] = {ZtValue, - createZMulTuple({Node->getOperand(3), Node->getOperand(4)}), - Chain}; + SmallVector<SDValue, 4> Regs(Node->ops().slice(3, NumInVecs)); + SDValue ZTuple = NumInVecs == 3 ? createZTuple(Regs) : createZMulTuple(Regs); + SDValue Ops[] = {ZtValue, ZTuple, Node->getOperand(0)}; SDLoc DL(Node); EVT VT = Node->getValueType(0); @@ -2294,9 +2336,7 @@ void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node, ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg( AArch64::zsub0 + I, DL, VT, SuperReg)); - // Copy chain - unsigned ChainIdx = NumOutVecs; - ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1)); + ReplaceUses(SDValue(Node, NumOutVecs), SDValue(Instruction, 1)); CurDAG->RemoveDeadNode(Node); } @@ -5987,7 +6027,11 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { return; } case Intrinsic::aarch64_sme_luti4_zt_x4: { - SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z); + SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z, 2); + return; + } + case Intrinsic::aarch64_sme_luti6_zt_x4: { + SelectMultiVectorLuti(Node, 4, AArch64::LUTI6_4ZT3Z, 3); return; } case Intrinsic::aarch64_sve_fp8_cvtl1_x2: @@ -6080,6 +6124,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D})) SelectDestructiveMultiIntrinsic(Node, 4, false, Op); return; + case Intrinsic::aarch64_sme_luti6_lane_x4_x2: + SelectMultiVectorLuti6LaneX4(Node, 2); + return; + case Intrinsic::aarch64_sme_luti6_lane_x4_x3: + SelectMultiVectorLuti6LaneX4(Node, 3); + return; case Intrinsic::aarch64_sve_urshl_single_x2: if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>( Node->getValueType(0), diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index d1fe1718dcc44..493f11ac13484 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -227,9 +227,9 @@ def HasSVE_B16MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasS AssemblerPredicateWithAll<(all_of FeatureSVE_B16MM), "sve-b16mm">; def HasF16MM : Predicate<"Subtarget->hasF16MM()">, AssemblerPredicateWithAll<(all_of FeatureF16MM), "f16mm">; -def HasSVE2p3 : Predicate<"Subtarget->hasSVE2p3()">, +def HasSVE2p3 : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2p3()">, AssemblerPredicateWithAll<(all_of FeatureSVE2p3), "sve2p3">; -def HasSME2p3 : Predicate<"Subtarget->hasSME2p3()">, +def HasSME2p3 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2p3()">, AssemblerPredicateWithAll<(all_of FeatureSME2p3), "sme2p3">; def HasF16F32DOT : Predicate<"Subtarget->hasF16F32DOT()">, AssemblerPredicateWithAll<(all_of FeatureF16F32DOT), "f16f32dot">; diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 022fed6473486..d0eb9ca218a27 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -1140,7 +1140,7 @@ let Predicates = [HasSME_MOP4, HasSMEF64F64] in { // SME2.3 instructions //===----------------------------------------------------------------------===// let Predicates = [HasSME2p3] in { - def LUTI6_ZTZ : sme2_lut_single<"luti6">; + defm LUTI6_ZTZ : sme2_lut_single<"luti6", int_aarch64_sme_luti6_zt>; def LUTI6_4ZT3Z : sme2_luti6_zt_consecutive<"luti6">; def LUTI6_S_4ZT3Z : sme2_luti6_zt_strided<"luti6">; def LUTI6_4Z2Z2ZI : sme2_luti6_vector_vg4_consecutive<"luti6">; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 0cc788d12bae0..4712406e37e6b 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4897,14 +4897,14 @@ let Predicates = [HasSVE2p3_or_SME2p3] in { defm SQSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"sqshrn", 0b000, int_aarch64_sve_sqshrn_x2>; defm UQSHRN_Z2ZI_StoH : sve_multi_vec_shift_narrow<"uqshrn", 0b010, int_aarch64_sve_uqshrn_x2>; - defm LUTI6_Z2ZZI : sve2_luti6_vector_index<"luti6">; + defm LUTI6_Z2ZZI : sve2_luti6_vector_index<"luti6", int_aarch64_sve_luti6_lane_x2>; } // End HasSME2p3orSVE2p3 //===----------------------------------------------------------------------===// // SVE2.3 instructions //===----------------------------------------------------------------------===// let Predicates = [HasSVE2p3] in { - def LUTI6_Z2ZZ : sve2_luti6_vector<"luti6">; + defm LUTI6_Z2ZZ : sve2_luti6_vector<"luti6", int_aarch64_sve_luti6>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 771c4c1fb2b6e..f07fb8ad81f63 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -3922,8 +3922,8 @@ multiclass sme2_luti4_vector_vg4_index<string mnemonic> { // 8-bit Look up table class sme2_lut_single<string asm> - : I<(outs ZPR8:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn), - asm, "\t$Zd, $ZTt, $Zn", "", []>, Sched<[]> { + : I<(outs ZPR8:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn), + asm, "\t$Zd, $ZTt, $Zn", "", []>, Sched<[]> { bits<0> ZTt; bits<5> Zd; bits<5> Zn; @@ -3932,6 +3932,13 @@ class sme2_lut_single<string asm> let Inst{4-0} = Zd; } +multiclass sme2_lut_single<string asm, SDPatternOperator intrinsic> { + def NAME : sme2_lut_single<asm>; + + def : Pat<(nxv16i8 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn)), + (!cast<Instruction>(NAME) $zt, nxv16i8:$zn)>; +} + //===----------------------------------------------------------------------===// // Lookup table read with 6-bit indices (8-bit) class sme2_luti6_zt_base<RegisterOperand zd_ty, string asm> diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 040962e801604..f96702a01c277 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -728,6 +728,13 @@ class SVE_Cvt_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2)), (!cast<Instruction>(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1))>; +class SVE_LUTI6_VG2_Index_Pat<ValueType vt, SDPatternOperator intrinsic, + Instruction inst> + : Pat<(vt (intrinsic vt:$Op1, vt:$Op2, nxv16i8:$Op3, + (i32 timm32_0_1:$Op4))), + (vt (inst (REG_SEQUENCE ZPR2, vt:$Op1, zsub0, vt:$Op2, zsub1), + nxv16i8:$Op3, timm32_0_1:$Op4))>; + //===----------------------------------------------------------------------===// // SVE pattern match helpers. //===----------------------------------------------------------------------===// @@ -11415,18 +11422,24 @@ multiclass sve2_luti4_vector_vg2_index<string mnemonic> { } // Look up table read with 6-bit indices -multiclass sve2_luti6_vector_index<string mnemonic> { +multiclass sve2_luti6_vector_index<string mnemonic, SDPatternOperator intrinsic> { def _H : sve2_lut_vector_index<ZPR16, ZZ_h, VectorIndexD32b, 0b1011, mnemonic> { bit idx; let Inst{23} = idx; } + + def : SVE_LUTI6_VG2_Index_Pat<nxv8i16, intrinsic, + !cast<Instruction>(NAME # _H)>; + def : SVE_LUTI6_VG2_Index_Pat<nxv8f16, intrinsic, + !cast<Instruction>(NAME # _H)>; + def : SVE_LUTI6_VG2_Index_Pat<nxv8bf16, intrinsic, + !cast<Instruction>(NAME # _H)>; } // Look up table class sve2_luti6_vector<string mnemonic> : I<(outs ZPR8:$Zd), (ins ZZ_b:$Zn, ZPRAny:$Zm), - mnemonic, "\t$Zd, $Zn, $Zm", - "", []>, Sched<[]> { + mnemonic, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { bits<5> Zd; bits<5> Zn; bits<5> Zm; @@ -11437,6 +11450,15 @@ class sve2_luti6_vector<string mnemonic> let Inst{4-0} = Zd; } +multiclass sve2_luti6_vector<string mnemonic, SDPatternOperator intrinsic> { + def NAME : sve2_luti6_vector<mnemonic>; + + def : Pat<(nxv16i8 (intrinsic nxv16i8:$Op1, nxv16i8:$Op2, nxv16i8:$Op3)), + (!cast<Instruction>(NAME) (REG_SEQUENCE ZPR2, nxv16i8:$Op1, zsub0, + nxv16i8:$Op2, zsub1), + nxv16i8:$Op3)>; +} + //===----------------------------------------------------------------------===// // Checked Pointer Arithmetic (FEAT_CPA) //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sme2p3-intrinsics-luti6.ll b/llvm/test/CodeGen/AArch64/sme2p3-intrinsics-luti6.ll new file mode 100644 index 0000000000000..3c695e46267d3 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2p3-intrinsics-luti6.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -verify-machineinstrs -force-streaming -mtriple=aarch64-none-linux-gnu -mattr=+sme2p3 < %s | FileCheck %s + +define <vscale x 16 x i8> @luti6_zt_i8(<vscale x 16 x i8> %x) { +; CHECK-LABEL: luti6_zt_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: luti6 z0.b, zt0, z0 +; CHECK-NEXT: ret + %res = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> %x) + ret <vscale x 16 x i8> %res +} + +define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @luti6_zt_i8_x4(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { +; CHECK-LABEL: luti6_zt_i8_x4: +; CHECK: // %bb.0: +; CHECK-NEXT: luti6 { z0.b - z3.b }, zt0, { z0 - z2 } +; CHECK-NEXT: ret + %res = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) + ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res +} + +define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @luti6_i16_x4(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: luti6_i16_x4: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: mov z4.d, z0.d +; CHECK-NEXT: luti6 { z0.h - z3.h }, { z3.h, z4.h }, { z1, z2 }[1] +; CHECK-NEXT: ret + %res = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.lane.x4.x2.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %b, i32 1) + ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res +} + +define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @luti6_bf16_x4(<vscale x 8 x bfloat> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: luti6_bf16_x4: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: mov z4.d, z0.d +; CHECK-NEXT: luti6 { z0.h - z3.h }, { z3.h, z4.h }, { z1, z2 }[0] +; CHECK-NEXT: ret + %res = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.luti6.lane.x4.x2.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %b, i32 0) + ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res +} + +define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @luti6_f16_x4(<vscale x 8 x half> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: luti6_f16_x4: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: mov z4.d, z0.d +; CHECK-NEXT: luti6 { z0.h - z3.h }, { z3.h, z4.h }, { z1, z2 }[1] +; CHECK-NEXT: ret + %res = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.luti6.lane.x4.x2.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %b, i32 1) + ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res +} + +define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @luti6_i16_x4_x3_imm0(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) { +; CHECK-LABEL: luti6_i16_x4_x3_imm0: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov z4.d, z0.d +; CHECK-NEXT: luti6 { z0.h - z3.h }, { z3.h, z4.h }, { z1, z2 }[0] +; CHECK-NEXT: ret + %res = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.lane.x4.x3.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, i32 0) + ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res +} + +define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @luti6_i16_x4_x3_imm1(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) { +; CHECK-LABEL: luti6_i16_x4_x3_imm1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: luti6 { z0.h - z3.h }, { z0.h, z1.h }, { z2, z3 }[1] +; CHECK-NEXT: ret + %res = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.lane.x4.x3.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, i32 1) + ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res +} + +define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @luti6_f16_x4_x3(<vscale x 8 x half> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) { +; CHECK-LABEL: luti6_f16_x4_x3: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: luti6 { z0.h - z3.h }, { z0.h, z1.h }, { z2, z3 }[1] +; CHECK-NEXT: ret + %res = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.luti6.lane.x4.x3.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, i32 1) + ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res +} + +define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @luti6_bf16_x4_x3(<vscale x 8 x bfloat> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) { +; CHECK-LABEL: luti6_bf16_x4_x3: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov z4.d, z0.d +; CHECK-NEXT: luti6 { z0.h - z3.h }, { z3.h, z4.h }, { z1, z2 }[0] +; CHECK-NEXT: ret + %res = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.luti6.lane.x4.x3.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, i32 0) + ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res +} diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-luti6.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-luti6.ll new file mode 100644 index 0000000000000..a2bf43088968f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-luti6.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve2p3 -enable-subreg-liveness < %s | FileCheck %s + +define <vscale x 16 x i8> @luti6_i8(<vscale x 16 x i8> %a) { +; CHECK-LABEL: luti6_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: luti6 z0.b, { z0.b, z1.b }, z0 +; CHECK-NEXT: ret + %res = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti6(<vscale x 16 x i8> %a, <vscale x 16 x i8> %a, <vscale x 16 x i8> %a) + ret <vscale x 16 x i8> %res +} + +define <vscale x 8 x i16> @luti6_i16_x2(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: luti6_i16_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: luti6 z0.h, { z2.h, z3.h }, z1[1] +; CHECK-NEXT: ret + %res = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti6.lane.x2.i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %a, <vscale x 16 x i8> %b, i32 1) + ret <vscale x 8 x i16> %res +} + +define <vscale x 8 x half> @luti6_f16_x2(<vscale x 8 x half> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: luti6_f16_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: luti6 z0.h, { z2.h, z3.h }, z1[0] +; CHECK-NEXT: ret + %res = tail call <vscale x 8 x half> @llvm.aarch64.sve.luti6.lane.x2.f16(<vscale x 8 x half> %a, <vscale x 8 x half> %a, <vscale x 16 x i8> %b, i32 0) + ret <vscale x 8 x half> %res +} + +define <vscale x 8 x bfloat> @luti6_bf16_x2(<vscale x 8 x bfloat> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: luti6_bf16_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: luti6 z0.h, { z2.h, z3.h }, z1[1] +; CHECK-NEXT: ret + %res = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti6.lane.x2.bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %a, <vscale x 16 x i8> %b, i32 1) + ret <vscale x 8 x bfloat> %res +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
