llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Jonathan Thackray (jthackray)
<details>
<summary>Changes</summary>
Add support for the following Armv9.7-A Lookup Table (lut)
instruction intrinsics:
SVE2.3
```c
// Variant is also available for: _u8 _mf8
svint8_t svluti6[_s8](svint8x2_t table, svuint8_t indices);
```
SVE2.3 and SME2.3
``` c
// Variants are also available for _u16_x2 and _f16_x2.
svint16_t svluti6_lane[_s16_x2](svint16x2_t table, svuint8_t indices,
uint64_t imm_idx);
```
SME2.3
```c
// Variant are also available for: _u16, _f16 and _bf16.
svint16x4_t svluti6_lane_s16_x4[_s16_x2](svint16x2_t table, svuint8x2_t
indices, uint64_t imm_idx);
// Variants are also available for: _u8 and _mf8.
svint8x4_t svluti6_zt_s8_x4(uint64_t zt0, svuint8x3_t zn) __arm_streaming
__arm_in("zt0");
// Variants are also available for: _u8 and _mf8.
svint8_t svluti6_zt_s8(uint64_t zt0, svuint8_t zn) __arm_streaming
__arm_in("zt0");
```
---
Patch is 66.30 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/187046.diff
22 Files Affected:
- (modified) clang/include/clang/Basic/arm_sme.td (+6)
- (modified) clang/include/clang/Basic/arm_sve.td (+9)
- (modified) clang/lib/Basic/Targets/AArch64.cpp (+29)
- (modified) clang/lib/Basic/Targets/AArch64.h (+2)
- (added) clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c
(+175)
- (added) clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c
(+112)
- (modified) clang/test/Preprocessor/aarch64-target-features.c (+23)
- (added) clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c (+21)
- (added) clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_target.c (+20)
- (added) clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_target_lane.c
(+16)
- (added) clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp (+24)
- (added) clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_target.c (+19)
- (added) clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_target_lane.c
(+14)
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+32)
- (modified) llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (+102)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+9-1)
- (modified) llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td (+3)
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+10-2)
- (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+11)
- (added) llvm/test/CodeGen/AArch64/sme2p3-intrinsics-luti6.ll (+105)
- (added) llvm/test/CodeGen/AArch64/sve2p3-intrinsics-luti6.ll (+55)
- (added) llvm/test/Verifier/AArch64/luti6-intrinsics.ll (+79)
``````````diff
diff --git a/clang/include/clang/Basic/arm_sme.td
b/clang/include/clang/Basic/arm_sme.td
index 032c588966032..8de360fca5f5e 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -981,6 +981,12 @@ let SMETargetGuard = "sme-lutv2" in {
def SVLUTI4_ZT_X4 : SInst<"svluti4_zt_{d}_x4", "4i2.u", "cUc", MergeNone,
"aarch64_sme_luti4_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>]>;
}
+let SMETargetGuard = "sme2p3" in {
+ def SVLUTI6_ZT : SInst<"svluti6_zt_{d}", "diu", "cUcm", MergeNone,
"aarch64_sme_luti6_zt", [IsOverloadNone, IsStreaming, IsInZT0], [ImmCheck<0,
ImmCheck0_0>]>;
+ def SVLUTI6_ZT_X4 : SInst<"svluti6_zt_{d}_x4", "4i3.u", "cUcm", MergeNone,
"aarch64_sme_luti6_zt_x4", [IsOverloadNone, IsStreaming, IsInZT0], [ImmCheck<0,
ImmCheck0_0>]>;
+ def SVLUTI6_LANE_X4 : SInst<"svluti6_lane[_{d}_x4]", "42.d2.[i", "sUshb",
MergeNone, "aarch64_sme_luti6_lane_x4", [IsStreaming], [ImmCheck<2,
ImmCheck0_1>]>;
+}
+
let SMETargetGuard = "sme-f8f32" in {
def SVMOPA_FP8_ZA32 : Inst<"svmopa_za32[_mf8]_m", "viPPdd>", "m", MergeNone,
"aarch64_sme_fp8_fmopa_za32",
[IsStreaming, IsInOutZA, IsOverloadNone],
[ImmCheck<0, ImmCheck0_3>]>;
diff --git a/clang/include/clang/Basic/arm_sve.td
b/clang/include/clang/Basic/arm_sve.td
index be3cd8a76503b..b4080a8456dd5 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1876,6 +1876,15 @@ let SVETargetGuard = "(sve2|sme2),lut", SMETargetGuard =
"sme2,lut" in {
def SVLUTI4_x2 : SInst<"svluti4_lane[_{d}_x2]", "d2.d[i", "sUshb",
MergeNone, "aarch64_sve_luti4_lane_x2", [VerifyRuntimeMode], [ImmCheck<2,
ImmCheck0_3>]>;
}
+let SVETargetGuard = "sve2p3", SMETargetGuard = InvalidMode in {
+ def SVLUTI6 : SInst<"svluti6[_{d}]", "d2u", "cUcm", MergeNone,
"aarch64_sve_luti6", [IsOverloadNone]>;
+}
+
+let SVETargetGuard = "sve2p3", SMETargetGuard = "sme2p3" in {
+ def SVLUTI6_x2_I16 : SInst<"svluti6_lane[_{d}_x2]", "d2.d[i", "sUs",
MergeNone, "aarch64_sve_luti6_lane_x2_i16", [IsOverloadNone,
VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_1>]>;
+ def SVLUTI6_x2_F16 : SInst<"svluti6_lane[_{d}_x2]", "d2.d[i", "h",
MergeNone, "aarch64_sve_luti6_lane_x2_f16", [IsOverloadNone,
VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_1>]>;
+}
+
////////////////////////////////////////////////////////////////////////////////
// SVE2 - Optional
diff --git a/clang/lib/Basic/Targets/AArch64.cpp
b/clang/lib/Basic/Targets/AArch64.cpp
index f7ed15be75cd8..9d4085e5ebbf4 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -495,6 +495,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions
&Opts,
if (HasSVE2p1)
Builder.defineMacro("__ARM_FEATURE_SVE2p1", "1");
+ if (HasSVE2p3)
+ Builder.defineMacro("__ARM_FEATURE_SVE2p3", "1");
+
if (HasSVE2 && HasSVEAES)
Builder.defineMacro("__ARM_FEATURE_SVE2_AES", "1");
@@ -521,6 +524,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions
&Opts,
if (HasSME2p1)
Builder.defineMacro("__ARM_FEATURE_SME2p1", "1");
+ if (HasSME2p3)
+ Builder.defineMacro("__ARM_FEATURE_SME2p3", "1");
+
if (HasSMEF16F16)
Builder.defineMacro("__ARM_FEATURE_SME_F16F16", "1");
@@ -900,9 +906,11 @@ void AArch64TargetInfo::computeFeatureLookup() {
.Case("sve2-sha3", FPU & SveMode && HasSVE2SHA3)
.Case("sve2-sm4", FPU & SveMode && HasSVE2SM4)
.Case("sve2p1", FPU & SveMode && HasSVE2p1)
+ .Case("sve2p3", FPU & SveMode && HasSVE2p3)
.Case("sme", HasSME)
.Case("sme2", HasSME2)
.Case("sme2p1", HasSME2p1)
+ .Case("sme2p3", HasSME2p3)
.Case("sme-f64f64", HasSMEF64F64)
.Case("sme-i16i64", HasSMEI16I64)
.Case("sme-fa64", HasSMEFA64)
@@ -1008,6 +1016,15 @@ bool
AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasSVE2 = true;
HasSVE2p1 = true;
}
+ if (Feature == "+sve2p3") {
+ FPU |= NeonMode;
+ FPU |= SveMode;
+ HasFullFP16 = true;
+ HasSVE2 = true;
+ HasSVE2p1 = true;
+ HasSVE2p2 = true;
+ HasSVE2p3 = true;
+ }
if (Feature == "+sve-aes") {
FPU |= NeonMode;
HasFullFP16 = true;
@@ -1064,6 +1081,18 @@ bool
AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasBFloat16 = true;
HasFullFP16 = true;
}
+ if (Feature == "+sme2p3") {
+ HasSME = true;
+ HasSME2 = true;
+ HasSVE2 = true;
+ HasSVE2p1 = true;
+ HasSVE2p2 = true;
+ HasSME2p1 = true;
+ HasSME2p2 = true;
+ HasSME2p3 = true;
+ HasBFloat16 = true;
+ HasFullFP16 = true;
+ }
if (Feature == "+sme-f64f64") {
HasSME = true;
HasSMEF64F64 = true;
diff --git a/clang/lib/Basic/Targets/AArch64.h
b/clang/lib/Basic/Targets/AArch64.h
index f9dffed8769ef..4191070a4786d 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -85,6 +85,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public
TargetInfo {
bool HasBFloat16 = false;
bool HasSVE2 = false;
bool HasSVE2p1 = false;
+ bool HasSVE2p3 = false;
bool HasSVEAES = false;
bool HasSVE2SHA3 = false;
bool HasSVE2SM4 = false;
@@ -110,6 +111,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public
TargetInfo {
bool HasSMEF16F16 = false;
bool HasSMEB16B16 = false;
bool HasSME2p1 = false;
+ bool HasSME2p3 = false;
bool HasFP8 = false;
bool HasFP8FMA = false;
bool HasFP8DOT2 = false;
diff --git a/clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c
b/clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c
new file mode 100644
index 0000000000000..ae5fb1f64d0fc
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c
@@ -0,0 +1,175 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 6
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme
-target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror
-emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme
-target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror
-emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme
-target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -S -O1
-Werror -o /dev/null %s
+
+#include <arm_sme.h>
+
+// CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>,
<vscale x 8 x i16>, <vscale x 8 x i16> } @test_svluti6_lane_s16_x4(
+// CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16>
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16
x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x
i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@llvm.aarch64.sme.luti6.lane.x4.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]],
<vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]],
<vscale x 16 x i8> [[INDICES_COERCE1]], i32 1)
+// CHECK-NEXT: ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x
i16>, <vscale x 8 x i16> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>,
<vscale x 8 x i16>, <vscale x 8 x i16> }
@_Z24test_svluti6_lane_s16_x411svint16x2_t11svuint8x2_t(
+// CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x
i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale
x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT: [[ENTRY:.*:]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x
8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@llvm.aarch64.sme.luti6.lane.x4.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]],
<vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]],
<vscale x 16 x i8> [[INDICES_COERCE1]], i32 1)
+// CPP-CHECK-NEXT: ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x
8 x i16>, <vscale x 8 x i16> } [[TMP0]]
+//
+svint16x4_t test_svluti6_lane_s16_x4(svint16x2_t table, svuint8x2_t indices)
+ __arm_streaming {
+ return svluti6_lane_s16_x4(table, indices, 1);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>,
<vscale x 8 x i16>, <vscale x 8 x i16> } @test_svluti6_lane_u16_x4(
+// CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16>
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16
x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x
i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@llvm.aarch64.sme.luti6.lane.x4.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]],
<vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]],
<vscale x 16 x i8> [[INDICES_COERCE1]], i32 0)
+// CHECK-NEXT: ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x
i16>, <vscale x 8 x i16> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>,
<vscale x 8 x i16>, <vscale x 8 x i16> }
@_Z24test_svluti6_lane_u16_x412svuint16x2_t11svuint8x2_t(
+// CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x
i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale
x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT: [[ENTRY:.*:]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x
8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@llvm.aarch64.sme.luti6.lane.x4.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]],
<vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]],
<vscale x 16 x i8> [[INDICES_COERCE1]], i32 0)
+// CPP-CHECK-NEXT: ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x
8 x i16>, <vscale x 8 x i16> } [[TMP0]]
+//
+svuint16x4_t test_svluti6_lane_u16_x4(svuint16x2_t table, svuint8x2_t indices)
+ __arm_streaming {
+ return svluti6_lane_u16_x4(table, indices, 0);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 8 x half>, <vscale x 8 x half>,
<vscale x 8 x half>, <vscale x 8 x half> } @test_svluti6_lane_f16_x4(
+// CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x half>
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16
x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8
x half>, <vscale x 8 x half>, <vscale x 8 x half> }
@llvm.aarch64.sme.luti6.lane.x4.nxv8f16(<vscale x 8 x half> [[TABLE_COERCE0]],
<vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]],
<vscale x 16 x i8> [[INDICES_COERCE1]], i32 1)
+// CHECK-NEXT: ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8
x half>, <vscale x 8 x half> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x half>, <vscale x 8 x
half>, <vscale x 8 x half>, <vscale x 8 x half> }
@_Z24test_svluti6_lane_f16_x413svfloat16x2_t11svuint8x2_t(
+// CPP-CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x
half> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]],
<vscale x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT: [[ENTRY:.*:]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale
x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
@llvm.aarch64.sme.luti6.lane.x4.nxv8f16(<vscale x 8 x half> [[TABLE_COERCE0]],
<vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]],
<vscale x 16 x i8> [[INDICES_COERCE1]], i32 1)
+// CPP-CHECK-NEXT: ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale
x 8 x half>, <vscale x 8 x half> } [[TMP0]]
+//
+svfloat16x4_t test_svluti6_lane_f16_x4(svfloat16x2_t table, svuint8x2_t
indices)
+ __arm_streaming {
+ return svluti6_lane_f16_x4(table, indices, 1);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 8 x bfloat>, <vscale x 8 x
bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }
@test_svluti6_lane_bf16_x4(
+// CHECK-SAME: <vscale x 8 x bfloat> [[TABLE_COERCE0:%.*]], <vscale x 8 x
bfloat> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]],
<vscale x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x
8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }
@llvm.aarch64.sme.luti6.lane.x4.nxv8bf16(<vscale x 8 x bfloat>
[[TABLE_COERCE0]], <vscale x 8 x bfloat> [[TABLE_COERCE1]], <vscale x 16 x i8>
[[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], i32 0)
+// CHECK-NEXT: ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale
x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x bfloat>, <vscale x 8 x
bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }
@_Z25test_svluti6_lane_bf16_x414svbfloat16x2_t11svuint8x2_t(
+// CPP-CHECK-SAME: <vscale x 8 x bfloat> [[TABLE_COERCE0:%.*]], <vscale x 8 x
bfloat> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]],
<vscale x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT: [[ENTRY:.*:]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>,
<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }
@llvm.aarch64.sme.luti6.lane.x4.nxv8bf16(<vscale x 8 x bfloat>
[[TABLE_COERCE0]], <vscale x 8 x bfloat> [[TABLE_COERCE1]], <vscale x 16 x i8>
[[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], i32 0)
+// CPP-CHECK-NEXT: ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>,
<vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]]
+//
+svbfloat16x4_t test_svluti6_lane_bf16_x4(svbfloat16x2_t table, svuint8x2_t
indices)
+ __arm_streaming {
+ return svluti6_lane_bf16_x4(table, indices, 0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_zt_s8(
+// CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr
#[[ATTR2:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8>
@llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8>
@_Z18test_svluti6_zt_s8u11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr
#[[ATTR2:[0-9]+]] {
+// CPP-CHECK-NEXT: [[ENTRY:.*:]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8>
@llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svluti6_zt_s8(svuint8_t indices) __arm_streaming __arm_in("zt0")
{
+ return svluti6_zt_s8(0, indices);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_zt_u8(
+// CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr
#[[ATTR2]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8>
@llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8>
@_Z18test_svluti6_zt_u8u11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr
#[[ATTR2]] {
+// CPP-CHECK-NEXT: [[ENTRY:.*:]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8>
@llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svluti6_zt_u8(svuint8_t indices) __arm_streaming
__arm_in("zt0") {
+ return svluti6_zt_u8(0, indices);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_zt_mf8(
+// CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr
#[[ATTR2]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8>
@llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]])
+// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8>
@_Z19test_svluti6_zt_mf8u11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr
#[[ATTR2]] {
+// CPP-CHECK-NEXT: [[ENTRY:.*:]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8>
@llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]])
+// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svluti6_zt_mf8(svuint8_t indices) __arm_streaming
__arm_in("zt0") {
+ return svluti6_zt_mf8(0, indices);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>,
<vscale x 16 x i8>, <vscale x 16 x i8> } @test_svluti6_zt_u8_x4(
+// CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8>
[[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]])
local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16
x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]],
<vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]])
+// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x
i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>,
<vscale x 16 x i8>, <vscale x 16 x i8> }
@_Z21test_svluti6_zt_u8_x411svuint8x3_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x
i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]])
local_unnamed_addr #[[ATTR2]] {
+// CPP-CHECK-NEXT: [[ENTRY:.*:]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x
16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]],
<vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]])
+// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x
16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svuint8x4_t test_svluti6_zt_u8_x4(svuint8x3_t indices)
+ __arm_streaming __arm_in("zt0") {
+ return svluti6_zt_u8_x4(0, indices);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>,
<vscale x 16 x i8>, <vscale x 16 x i8> } @test_svluti6_zt_s8_x4(
+// CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8>
[[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]])
local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16
x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]],
<vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]])
+// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x
i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>,
<vscale x 16 x i8>, <vscale x 16 x i8> }
@_Z21test_svluti6_zt_s8_x411svuint8x3_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x
i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]])
local_unnamed_addr #[[ATTR2]] {
+// CPP-CHECK-NEXT: [[ENTRY:.*:]]
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x
16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]],
<vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]])
+// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x
16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svint8x4_t test_svluti6_zt_s8_x4(svuint8x3_t indices)
+ __arm_streaming __arm_in("zt0") {
+ return svluti6_zt_s8_x4(0, indices);
+}
+
+//...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/187046
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits