[clang] 7814cc9 - [AArch64][clang][llvm] Add ACLE Armv9.7 lookup table intrinsics (#187046)

via cfe-commits Fri, 12 Jun 2026 08:47:32 -0700

Author: Jonathan Thackray
Date: 2026-06-12T16:47:16+01:00
New Revision: 7814cc9454cf454acdc7385fc41809a6bdf5d7f8


URL: 
https://github.com/llvm/llvm-project/commit/7814cc9454cf454acdc7385fc41809a6bdf5d7f8
DIFF: 
https://github.com/llvm/llvm-project/commit/7814cc9454cf454acdc7385fc41809a6bdf5d7f8.diff

LOG: [AArch64][clang][llvm] Add ACLE Armv9.7 lookup table intrinsics (#187046)

Add support for the following Armv9.7-A Lookup Table (lut)
instruction intrinsics, as defined in the ACLE[1]:

SVE2.3:
```c
  // Variants are also available for: _u8 _mf8
  svint8_t svluti6[_s8](svint8x2_t table, svuint8_t indices);
```

SVE2.3 and SME2.3:
``` c
  // Variants are also available for _u16_x2 and _f16_x2.
  svint16_t svluti6_lane[_s16_x2](svint16x2_t table, svuint8_t indices, 
uint64_t imm_idx);
```

SME2.3:
```c
  // Variants are also available for: _u16, _f16 and _bf16.
  svint16x4_t svluti6_lane_s16_x4[_s16_x2](svint16x2_t table, svuint8x2_t 
indices, uint64_t imm_idx);

  // Variants are also available for: _u8 and _mf8.
  svint8x4_t svluti6_zt_s8_x4(uint64_t zt0, svuint8x3_t zn) __arm_streaming 
__arm_in("zt0");

  // Variants are also available for: _u8 and _mf8.
  svint8_t svluti6_zt_s8(uint64_t zt0, svuint8_t zn) __arm_streaming 
__arm_in("zt0");
```

[1] https://github.com/ARM-software/acle/pull/428/

Added: 
    clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c
    clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c
    clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6_lane_x2.c
    clang/test/Sema/AArch64/arm_sme_streaming_only_sme_AND_sme2p3.c
    
clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve2p3___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
    clang/test/Sema/AArch64/arm_sve_non_streaming_only_sve_AND_sve2p3.c
    clang/test/Sema/AArch64/arm_sve_streaming_only_sme_AND_sme2p3.c
    clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c
    llvm/test/CodeGen/AArch64/sme2p3-intrinsics-luti6.ll
    llvm/test/CodeGen/AArch64/sve2p3-intrinsics-luti6.ll

Modified: 
    clang/include/clang/Basic/arm_sme.td
    clang/include/clang/Basic/arm_sve.td
    clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp
    llvm/include/llvm/IR/IntrinsicsAArch64.td
    llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/lib/Target/AArch64/SMEInstrFormats.td
    llvm/lib/Target/AArch64/SVEInstrFormats.td

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 032c588966032..c79e6e2ae1f9a 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -981,6 +981,11 @@ let SMETargetGuard = "sme-lutv2" in {
   def SVLUTI4_ZT_X4 : SInst<"svluti4_zt_{d}_x4", "4i2.u", "cUc", MergeNone, 
"aarch64_sme_luti4_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>]>;
 }
 
+let SMETargetGuard = "sme2p3" in {
+  def SVLUTI6_ZT      : SInst<"svluti6_zt_{d}", "di[", "cUcm", MergeNone, 
"aarch64_sme_luti6_zt", [IsOverloadNone, IsStreaming, IsInZT0], [ImmCheck<0, 
ImmCheck0_0>]>;
+  def SVLUTI6_ZT_X4   : SInst<"svluti6_zt_{d}_x4", "4i3.[", "cUcm", MergeNone, 
"aarch64_sme_luti6_zt_x4", [IsOverloadNone, IsStreaming, IsInZT0], [ImmCheck<0, 
ImmCheck0_0>]>;
+}
+
 let SMETargetGuard = "sme-f8f32" in {
   def SVMOPA_FP8_ZA32 : Inst<"svmopa_za32[_mf8]_m", "viPPdd>", "m", MergeNone, 
"aarch64_sme_fp8_fmopa_za32",
                              [IsStreaming, IsInOutZA, IsOverloadNone], 
[ImmCheck<0, ImmCheck0_3>]>;

diff  --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 25f42cbcac64e..91111001703c3 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1919,6 +1919,19 @@ let SVETargetGuard = "(sve2|sme2),lut", SMETargetGuard = 
"sme2,lut" in {
   def SVLUTI4_x2 : SInst<"svluti4_lane[_{d}_x2]", "d2.d[i", "sUshb", 
MergeNone, "aarch64_sve_luti4_lane_x2", [VerifyRuntimeMode], [ImmCheck<2, 
ImmCheck0_3>]>;
 }
 
+let SVETargetGuard = "sve2p3", SMETargetGuard = InvalidMode in {
+  def SVLUTI6 : SInst<"svluti6[_{d}_x2]", "d2[", "cUcm", MergeNone, 
"aarch64_sve_luti6", [IsOverloadNone]>;
+}
+
+let SVETargetGuard = "sve2p3", SMETargetGuard = "sve2p3|sme2p3" in {
+  def SVLUTI6_x2 : SInst<"svluti6_lane[_{d}_x2]", "d2.d[i", "sUshb", 
MergeNone, "aarch64_sve_luti6_lane_x2", [VerifyRuntimeMode], [ImmCheck<2, 
ImmCheck0_1>]>;
+}
+
+let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2p3" in {
+  def SVLUTI6_X4_U8X2 : SInst<"svluti6_lane_{d}_x4[_{d}_x2_u8_x2]", "422.[i", 
"sUshb", MergeNone, "aarch64_sme_luti6_lane_x4_x2", [IsStreaming], [ImmCheck<2, 
ImmCheck0_1>]>;
+  def SVLUTI6_X4_U8X3 : SInst<"svluti6_lane_{d}_x4[_{d}_x2_u8_x3]", "423.[i", 
"sUshb", MergeNone, "aarch64_sme_luti6_lane_x4_x3", [IsStreaming], [ImmCheck<2, 
ImmCheck0_1>]>;
+}
+
 
////////////////////////////////////////////////////////////////////////////////
 // SVE2 - Optional
 

diff  --git a/clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c 
b/clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c
new file mode 100644
index 0000000000000..656b0ce565833
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c
@@ -0,0 +1,251 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror 
-emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror 
-emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sme -target-feature +sme2 -target-feature +sme2p3 
-target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sme -target-feature +sme2 -target-feature +sme2p3 
-target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s 
--check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -S -O1 
-Werror -o /dev/null %s
+
+#include <arm_sme.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2,A3_UNUSED,A4_UNUSED) A1##A2
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>, 
<vscale x 8 x i16>, <vscale x 8 x i16> } @test_svluti6_lane_s16_x4(
+// CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 
x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x 
i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 
@llvm.aarch64.sme.luti6.lane.x4.x2.nxv8i16(<vscale x 8 x i16> 
[[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], i32 1)
+// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x 
i16>, <vscale x 8 x i16> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>, 
<vscale x 8 x i16>, <vscale x 8 x i16> } 
@_Z24test_svluti6_lane_s16_x411svint16x2_t11svuint8x2_t(
+// CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale 
x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 
8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 
@llvm.aarch64.sme.luti6.lane.x4.x2.nxv8i16(<vscale x 8 x i16> 
[[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], i32 1)
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 
8 x i16>, <vscale x 8 x i16> } [[TMP0]]
+//
+svint16x4_t test_svluti6_lane_s16_x4(svint16x2_t table, svuint8x2_t indices)
+    __arm_streaming {
+  return SVE_ACLE_FUNC(svluti6_lane,_s16_x4,_s16_x2_u8_x2,)(table, indices, 1);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>, 
<vscale x 8 x i16>, <vscale x 8 x i16> } @test_svluti6_lane_u16_x4(
+// CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 
x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x 
i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 
@llvm.aarch64.sme.luti6.lane.x4.x2.nxv8i16(<vscale x 8 x i16> 
[[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], i32 0)
+// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x 
i16>, <vscale x 8 x i16> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>, 
<vscale x 8 x i16>, <vscale x 8 x i16> } 
@_Z24test_svluti6_lane_u16_x412svuint16x2_t11svuint8x2_t(
+// CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale 
x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 
8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 
@llvm.aarch64.sme.luti6.lane.x4.x2.nxv8i16(<vscale x 8 x i16> 
[[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], i32 0)
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 
8 x i16>, <vscale x 8 x i16> } [[TMP0]]
+//
+svuint16x4_t test_svluti6_lane_u16_x4(svuint16x2_t table, svuint8x2_t indices)
+    __arm_streaming {
+  return SVE_ACLE_FUNC(svluti6_lane,_u16_x4,_u16_x2_u8_x2,)(table, indices, 0);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 8 x half>, <vscale x 8 x half>, 
<vscale x 8 x half>, <vscale x 8 x half> } @test_svluti6_lane_f16_x4(
+// CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x half> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 
x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 
x half>, <vscale x 8 x half>, <vscale x 8 x half> } 
@llvm.aarch64.sme.luti6.lane.x4.x2.nxv8f16(<vscale x 8 x half> 
[[TABLE_COERCE0]], <vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], i32 1)
+// CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 
x half>, <vscale x 8 x half> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x half>, <vscale x 8 x 
half>, <vscale x 8 x half>, <vscale x 8 x half> } 
@_Z24test_svluti6_lane_f16_x413svfloat16x2_t11svuint8x2_t(
+// CPP-CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
half> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], 
<vscale x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale 
x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } 
@llvm.aarch64.sme.luti6.lane.x4.x2.nxv8f16(<vscale x 8 x half> 
[[TABLE_COERCE0]], <vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], i32 1)
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale 
x 8 x half>, <vscale x 8 x half> } [[TMP0]]
+//
+svfloat16x4_t test_svluti6_lane_f16_x4(svfloat16x2_t table, svuint8x2_t 
indices)
+    __arm_streaming {
+  return SVE_ACLE_FUNC(svluti6_lane,_f16_x4,_f16_x2_u8_x2,)(table, indices, 1);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 8 x bfloat>, <vscale x 8 x 
bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } 
@test_svluti6_lane_bf16_x4(
+// CHECK-SAME: <vscale x 8 x bfloat> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
bfloat> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], 
<vscale x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 
8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } 
@llvm.aarch64.sme.luti6.lane.x4.x2.nxv8bf16(<vscale x 8 x bfloat> 
[[TABLE_COERCE0]], <vscale x 8 x bfloat> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], i32 0)
+// CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale 
x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x bfloat>, <vscale x 8 x 
bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } 
@_Z25test_svluti6_lane_bf16_x414svbfloat16x2_t11svuint8x2_t(
+// CPP-CHECK-SAME: <vscale x 8 x bfloat> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
bfloat> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], 
<vscale x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } 
@llvm.aarch64.sme.luti6.lane.x4.x2.nxv8bf16(<vscale x 8 x bfloat> 
[[TABLE_COERCE0]], <vscale x 8 x bfloat> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], i32 0)
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]]
+//
+svbfloat16x4_t test_svluti6_lane_bf16_x4(svbfloat16x2_t table, svuint8x2_t 
indices)
+    __arm_streaming {
+  return SVE_ACLE_FUNC(svluti6_lane,_bf16_x4,_bf16_x2_u8_x2,)(table, indices, 
0);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>, 
<vscale x 8 x i16>, <vscale x 8 x i16> } @test_svluti6_lane_s16_x4_u8_x3(
+// CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 
x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x 
i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 
@llvm.aarch64.sme.luti6.lane.x4.x3.nxv8i16(<vscale x 8 x i16> 
[[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE2]], i32 1)
+// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x 
i16>, <vscale x 8 x i16> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>, 
<vscale x 8 x i16>, <vscale x 8 x i16> } 
@_Z30test_svluti6_lane_s16_x4_u8_x311svint16x2_t11svuint8x3_t(
+// CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale 
x 16 x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 
8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 
@llvm.aarch64.sme.luti6.lane.x4.x3.nxv8i16(<vscale x 8 x i16> 
[[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE2]], i32 1)
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 
8 x i16>, <vscale x 8 x i16> } [[TMP0]]
+//
+svint16x4_t test_svluti6_lane_s16_x4_u8_x3(svint16x2_t table, svuint8x3_t 
indices)
+    __arm_streaming {
+  return SVE_ACLE_FUNC(svluti6_lane,_s16_x4,_s16_x2_u8_x3,)(table, indices, 1);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>, 
<vscale x 8 x i16>, <vscale x 8 x i16> } @test_svluti6_lane_u16_x4_u8_x3(
+// CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 
x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x 
i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 
@llvm.aarch64.sme.luti6.lane.x4.x3.nxv8i16(<vscale x 8 x i16> 
[[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE2]], i32 0)
+// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x 
i16>, <vscale x 8 x i16> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>, 
<vscale x 8 x i16>, <vscale x 8 x i16> } 
@_Z30test_svluti6_lane_u16_x4_u8_x312svuint16x2_t11svuint8x3_t(
+// CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale 
x 16 x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 
8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 
@llvm.aarch64.sme.luti6.lane.x4.x3.nxv8i16(<vscale x 8 x i16> 
[[TABLE_COERCE0]], <vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE2]], i32 0)
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 
8 x i16>, <vscale x 8 x i16> } [[TMP0]]
+//
+svuint16x4_t test_svluti6_lane_u16_x4_u8_x3(svuint16x2_t table, svuint8x3_t 
indices)
+    __arm_streaming {
+  return SVE_ACLE_FUNC(svluti6_lane,_u16_x4,_u16_x2_u8_x3,)(table, indices, 0);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 8 x half>, <vscale x 8 x half>, 
<vscale x 8 x half>, <vscale x 8 x half> } @test_svluti6_lane_f16_x4_u8_x3(
+// CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x half> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 
x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 
x half>, <vscale x 8 x half>, <vscale x 8 x half> } 
@llvm.aarch64.sme.luti6.lane.x4.x3.nxv8f16(<vscale x 8 x half> 
[[TABLE_COERCE0]], <vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE2]], i32 1)
+// CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 
x half>, <vscale x 8 x half> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x half>, <vscale x 8 x 
half>, <vscale x 8 x half>, <vscale x 8 x half> } 
@_Z30test_svluti6_lane_f16_x4_u8_x313svfloat16x2_t11svuint8x3_t(
+// CPP-CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
half> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], 
<vscale x 16 x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> 
[[INDICES_COERCE2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale 
x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } 
@llvm.aarch64.sme.luti6.lane.x4.x3.nxv8f16(<vscale x 8 x half> 
[[TABLE_COERCE0]], <vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE2]], i32 1)
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale 
x 8 x half>, <vscale x 8 x half> } [[TMP0]]
+//
+svfloat16x4_t test_svluti6_lane_f16_x4_u8_x3(svfloat16x2_t table, svuint8x3_t 
indices)
+    __arm_streaming {
+  return SVE_ACLE_FUNC(svluti6_lane,_f16_x4,_f16_x2_u8_x3,)(table, indices, 1);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 8 x bfloat>, <vscale x 8 x 
bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } 
@test_svluti6_lane_bf16_x4_u8_x3(
+// CHECK-SAME: <vscale x 8 x bfloat> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
bfloat> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], 
<vscale x 16 x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> 
[[INDICES_COERCE2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 
8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } 
@llvm.aarch64.sme.luti6.lane.x4.x3.nxv8bf16(<vscale x 8 x bfloat> 
[[TABLE_COERCE0]], <vscale x 8 x bfloat> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE2]], i32 0)
+// CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale 
x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x bfloat>, <vscale x 8 x 
bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } 
@_Z31test_svluti6_lane_bf16_x4_u8_x314svbfloat16x2_t11svuint8x3_t(
+// CPP-CHECK-SAME: <vscale x 8 x bfloat> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
bfloat> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], 
<vscale x 16 x i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> 
[[INDICES_COERCE2:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } 
@llvm.aarch64.sme.luti6.lane.x4.x3.nxv8bf16(<vscale x 8 x bfloat> 
[[TABLE_COERCE0]], <vscale x 8 x bfloat> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE2]], i32 0)
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]]
+//
+svbfloat16x4_t test_svluti6_lane_bf16_x4_u8_x3(svbfloat16x2_t table, 
svuint8x3_t indices)
+    __arm_streaming {
+  return SVE_ACLE_FUNC(svluti6_lane,_bf16_x4,_bf16_x2_u8_x3,)(table, indices, 
0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_zt_s8(
+// CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> 
@_Z18test_svluti6_zt_s8u11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR2:[0-9]+]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svluti6_zt_s8(svuint8_t indices) __arm_streaming __arm_in("zt0") 
{
+  return svluti6_zt_s8(0, indices);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_zt_u8(
+// CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> 
@_Z18test_svluti6_zt_u8u11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR2]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svluti6_zt_u8(svuint8_t indices) __arm_streaming 
__arm_in("zt0") {
+  return svluti6_zt_u8(0, indices);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_zt_mf8(
+// CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> 
@_Z19test_svluti6_zt_mf8u11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR2]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svluti6_zt_mf8(svuint8_t indices) __arm_streaming 
__arm_in("zt0") {
+  return svluti6_zt_mf8(0, indices);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, 
<vscale x 16 x i8>, <vscale x 16 x i8> } @test_svluti6_zt_u8_x4(
+// CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> 
[[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 
x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]], 
<vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x 
i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, 
<vscale x 16 x i8>, <vscale x 16 x i8> } 
@_Z21test_svluti6_zt_u8_x411svuint8x3_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x 
i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 
16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]], 
<vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 
16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svuint8x4_t test_svluti6_zt_u8_x4(svuint8x3_t indices)
+    __arm_streaming __arm_in("zt0") {
+  return svluti6_zt_u8_x4(0, indices);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, 
<vscale x 16 x i8>, <vscale x 16 x i8> } @test_svluti6_zt_s8_x4(
+// CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> 
[[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 
x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]], 
<vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x 
i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, 
<vscale x 16 x i8>, <vscale x 16 x i8> } 
@_Z21test_svluti6_zt_s8_x411svuint8x3_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x 
i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 
16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]], 
<vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 
16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svint8x4_t test_svluti6_zt_s8_x4(svuint8x3_t indices)
+    __arm_streaming __arm_in("zt0") {
+  return svluti6_zt_s8_x4(0, indices);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, 
<vscale x 16 x i8>, <vscale x 16 x i8> } @test_svluti6_zt_mf8_x4(
+// CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> 
[[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 
x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]], 
<vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x 
i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, 
<vscale x 16 x i8>, <vscale x 16 x i8> } 
@_Z22test_svluti6_zt_mf8_x411svuint8x3_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x 
i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 
16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]], 
<vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 
16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svmfloat8x4_t test_svluti6_zt_mf8_x4(svuint8x3_t indices)
+    __arm_streaming __arm_in("zt0") {
+  return svluti6_zt_mf8_x4(0, indices);
+}

diff  --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c 
b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c
new file mode 100644
index 0000000000000..11f0848af1c07
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c
@@ -0,0 +1,64 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | 
FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x 
c++ %s | FileCheck %s --check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror 
-emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror 
-emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2p3 -target-feature +bf16 -S -O1 -Werror -o /dev/null %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
+#else
+#define SVE_ACLE_FUNC(A1, A2) A1##A2
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_s8_x2(
+// CHECK-SAME: <vscale x 16 x i8> [[TABLE_COERCE0:%.*]], <vscale x 16 x i8> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.luti6(<vscale x 16 x i8> [[TABLE_COERCE0]], <vscale x 16 x 
i8> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> 
@_Z18test_svluti6_s8_x210svint8x2_tu11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[TABLE_COERCE0:%.*]], <vscale x 16 x 
i8> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.luti6(<vscale x 16 x i8> [[TABLE_COERCE0]], <vscale x 16 x 
i8> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svluti6_s8_x2(svint8x2_t table, svuint8_t indices) {
+  return SVE_ACLE_FUNC(svluti6, _s8_x2)(table, indices);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_u8_x2(
+// CHECK-SAME: <vscale x 16 x i8> [[TABLE_COERCE0:%.*]], <vscale x 16 x i8> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.luti6(<vscale x 16 x i8> [[TABLE_COERCE0]], <vscale x 16 x 
i8> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> 
@_Z18test_svluti6_u8_x211svuint8x2_tu11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[TABLE_COERCE0:%.*]], <vscale x 16 x 
i8> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.luti6(<vscale x 16 x i8> [[TABLE_COERCE0]], <vscale x 16 x 
i8> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svluti6_u8_x2(svuint8x2_t table, svuint8_t indices) {
+  return SVE_ACLE_FUNC(svluti6, _u8_x2)(table, indices);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_mf8_x2(
+// CHECK-SAME: <vscale x 16 x i8> [[TABLE_COERCE0:%.*]], <vscale x 16 x i8> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.luti6(<vscale x 16 x i8> [[TABLE_COERCE0]], <vscale x 16 x 
i8> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> 
@_Z19test_svluti6_mf8_x213svmfloat8x2_tu11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[TABLE_COERCE0:%.*]], <vscale x 16 x 
i8> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.luti6(<vscale x 16 x i8> [[TABLE_COERCE0]], <vscale x 16 x 
i8> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svluti6_mf8_x2(svmfloat8x2_t table, svuint8_t indices) {
+  return SVE_ACLE_FUNC(svluti6, _mf8_x2)(table, indices);
+}

diff  --git 
a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6_lane_x2.c 
b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6_lane_x2.c
new file mode 100644
index 0000000000000..b6d8fe5cff531
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6_lane_x2.c
@@ -0,0 +1,138 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | 
FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x 
c++ %s | FileCheck %s --check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror 
-emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror 
-emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSTREAMING_MODE -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sve2p3 -target-feature +sme 
-target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | 
FileCheck %s --check-prefix=STREAM-CHECK
+// RUN: %clang_cc1 -DSTREAMING_MODE -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sve2p3 -target-feature +sme 
-target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x 
c++ %s | FileCheck %s --check-prefix=STREAM-CPP-CHECK
+// RUN: %clang_cc1 -DSTREAMING_MODE -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 
-target-feature +sme -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror 
-emit-llvm -o - %s | FileCheck %s --check-prefix=STREAM-CHECK
+// RUN: %clang_cc1 -DSTREAMING_MODE -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 
-target-feature +sme -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror 
-emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=STREAM-CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2p3 -target-feature +bf16 -S -O1 -Werror -o /dev/null %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
+#else
+#define SVE_ACLE_FUNC(A1, A2) A1##A2
+#endif
+
+#ifdef STREAMING_MODE
+#define STREAMING_ATTR __arm_streaming
+#else
+#define STREAMING_ATTR
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svluti6_lane_s16_x2(
+// CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.luti6.lane.x2.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], 
<vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 1)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// STREAM-CHECK-LABEL: define dso_local <vscale x 8 x i16> 
@test_svluti6_lane_s16_x2(
+// STREAM-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// STREAM-CHECK-NEXT:  [[ENTRY:.*:]]
+// STREAM-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.luti6.lane.x2.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], 
<vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 1)
+// STREAM-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> 
@_Z24test_svluti6_lane_s16_x211svint16x2_tu11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.luti6.lane.x2.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], 
<vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 1)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// STREAM-CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> 
@_Z24test_svluti6_lane_s16_x211svint16x2_tu11__SVUint8_t(
+// STREAM-CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 
8 x i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// STREAM-CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// STREAM-CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.luti6.lane.x2.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], 
<vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 1)
+// STREAM-CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svluti6_lane_s16_x2(svint16x2_t table, svuint8_t indices) 
STREAMING_ATTR {
+  return SVE_ACLE_FUNC(svluti6_lane, _s16_x2)(table, indices, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svluti6_lane_u16_x2(
+// CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.luti6.lane.x2.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], 
<vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// STREAM-CHECK-LABEL: define dso_local <vscale x 8 x i16> 
@test_svluti6_lane_u16_x2(
+// STREAM-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// STREAM-CHECK-NEXT:  [[ENTRY:.*:]]
+// STREAM-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.luti6.lane.x2.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], 
<vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 0)
+// STREAM-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> 
@_Z24test_svluti6_lane_u16_x212svuint16x2_tu11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.luti6.lane.x2.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], 
<vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 0)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// STREAM-CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> 
@_Z24test_svluti6_lane_u16_x212svuint16x2_tu11__SVUint8_t(
+// STREAM-CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 
8 x i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// STREAM-CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// STREAM-CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.luti6.lane.x2.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], 
<vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 0)
+// STREAM-CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svluti6_lane_u16_x2(svuint16x2_t table, svuint8_t indices) 
STREAMING_ATTR {
+  return SVE_ACLE_FUNC(svluti6_lane, _u16_x2)(table, indices, 0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svluti6_lane_f16_x2(
+// CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x half> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.luti6.lane.x2.nxv8f16(<vscale x 8 x half> [[TABLE_COERCE0]], 
<vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 1)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// STREAM-CHECK-LABEL: define dso_local <vscale x 8 x half> 
@test_svluti6_lane_f16_x2(
+// STREAM-CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
half> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// STREAM-CHECK-NEXT:  [[ENTRY:.*:]]
+// STREAM-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.luti6.lane.x2.nxv8f16(<vscale x 8 x half> [[TABLE_COERCE0]], 
<vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 1)
+// STREAM-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> 
@_Z24test_svluti6_lane_f16_x213svfloat16x2_tu11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
half> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.luti6.lane.x2.nxv8f16(<vscale x 8 x half> [[TABLE_COERCE0]], 
<vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 1)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// STREAM-CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> 
@_Z24test_svluti6_lane_f16_x213svfloat16x2_tu11__SVUint8_t(
+// STREAM-CPP-CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 
8 x half> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// STREAM-CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// STREAM-CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.luti6.lane.x2.nxv8f16(<vscale x 8 x half> [[TABLE_COERCE0]], 
<vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 1)
+// STREAM-CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svluti6_lane_f16_x2(svfloat16x2_t table, svuint8_t indices) 
STREAMING_ATTR {
+  return SVE_ACLE_FUNC(svluti6_lane, _f16_x2)(table, indices, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> 
@test_svluti6_lane_bf16_x2(
+// CHECK-SAME: <vscale x 8 x bfloat> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
bfloat> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.luti6.lane.x2.nxv8bf16(<vscale x 8 x bfloat> 
[[TABLE_COERCE0]], <vscale x 8 x bfloat> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES]], i32 0)
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// STREAM-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> 
@test_svluti6_lane_bf16_x2(
+// STREAM-CHECK-SAME: <vscale x 8 x bfloat> [[TABLE_COERCE0:%.*]], <vscale x 8 
x bfloat> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// STREAM-CHECK-NEXT:  [[ENTRY:.*:]]
+// STREAM-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.luti6.lane.x2.nxv8bf16(<vscale x 8 x bfloat> 
[[TABLE_COERCE0]], <vscale x 8 x bfloat> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES]], i32 0)
+// STREAM-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> 
@_Z25test_svluti6_lane_bf16_x214svbfloat16x2_tu11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 8 x bfloat> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
bfloat> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.luti6.lane.x2.nxv8bf16(<vscale x 8 x bfloat> 
[[TABLE_COERCE0]], <vscale x 8 x bfloat> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES]], i32 0)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// STREAM-CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> 
@_Z25test_svluti6_lane_bf16_x214svbfloat16x2_tu11__SVUint8_t(
+// STREAM-CPP-CHECK-SAME: <vscale x 8 x bfloat> [[TABLE_COERCE0:%.*]], <vscale 
x 8 x bfloat> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// STREAM-CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// STREAM-CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.luti6.lane.x2.nxv8bf16(<vscale x 8 x bfloat> 
[[TABLE_COERCE0]], <vscale x 8 x bfloat> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES]], i32 0)
+// STREAM-CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svluti6_lane_bf16_x2(svbfloat16x2_t table, svuint8_t 
indices) STREAMING_ATTR {
+  return SVE_ACLE_FUNC(svluti6_lane, _bf16_x2)(table, indices, 0);
+}

diff  --git a/clang/test/Sema/AArch64/arm_sme_streaming_only_sme_AND_sme2p3.c 
b/clang/test/Sema/AArch64/arm_sme_streaming_only_sme_AND_sme2p3.c
new file mode 100644
index 0000000000000..2ab8d4d0c4120
--- /dev/null
+++ b/clang/test/Sema/AArch64/arm_sme_streaming_only_sme_AND_sme2p3.c
@@ -0,0 +1,56 @@
+// NOTE: File has been autogenerated by 
utils/aarch64_builtins_test_generator.py
+// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu 
-target-feature +sme -target-feature +sme2 -target-feature +sme2p3 
-target-feature +sve -verify=streaming-guard
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sme.h>
+
+// Properties: guard="" streaming_guard="sme,sme2p3" 
flags="streaming-only,requires-zt"
+
+void test(void) __arm_inout("zt0"){
+  svuint8_t svuint8_t_val;
+  svuint8x3_t svuint8x3_t_val;
+
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_zt_mf8(0, svuint8_t_val);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_zt_mf8_x4(0, svuint8x3_t_val);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_zt_s8(0, svuint8_t_val);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_zt_s8_x4(0, svuint8x3_t_val);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_zt_u8(0, svuint8_t_val);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_zt_u8_x4(0, svuint8x3_t_val);
+}
+
+void test_streaming(void) __arm_streaming __arm_inout("zt0"){
+  svuint8_t svuint8_t_val;
+  svuint8x3_t svuint8x3_t_val;
+
+  svluti6_zt_mf8(0, svuint8_t_val);
+  svluti6_zt_mf8_x4(0, svuint8x3_t_val);
+  svluti6_zt_s8(0, svuint8_t_val);
+  svluti6_zt_s8_x4(0, svuint8x3_t_val);
+  svluti6_zt_u8(0, svuint8_t_val);
+  svluti6_zt_u8_x4(0, svuint8x3_t_val);
+}
+
+void test_streaming_compatible(void) __arm_streaming_compatible 
__arm_inout("zt0"){
+  svuint8_t svuint8_t_val;
+  svuint8x3_t svuint8x3_t_val;
+
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_zt_mf8(0, svuint8_t_val);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_zt_mf8_x4(0, svuint8x3_t_val);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_zt_s8(0, svuint8_t_val);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_zt_s8_x4(0, svuint8x3_t_val);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_zt_u8(0, svuint8_t_val);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_zt_u8_x4(0, svuint8x3_t_val);
+}

diff  --git 
a/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve2p3___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
 
b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve2p3___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
new file mode 100644
index 0000000000000..1918990b4153e
--- /dev/null
+++ 
b/clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_sve2p3___sme_AND_LP_sve2p3_OR_sme2p3_RP.c
@@ -0,0 +1,77 @@
+// NOTE: File has been autogenerated by 
utils/aarch64_builtins_test_generator.py
+// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu 
-target-feature +sme -target-feature +sme2p3 -target-feature +sve 
-verify=streaming-guard
+// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu 
-target-feature +sme -target-feature +sve -target-feature +sve2p3 -verify
+// expected-no-diagnostics
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+// Properties: guard="sve,sve2p3" streaming_guard="sme,(sve2p3|sme2p3)" 
flags="feature-dependent"
+
+void test(void) {
+  svbfloat16x2_t svbfloat16x2_t_val;
+  svfloat16x2_t svfloat16x2_t_val;
+  svint16x2_t svint16x2_t_val;
+  svuint8_t svuint8_t_val;
+  svuint16x2_t svuint16x2_t_val;
+
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane(svbfloat16x2_t_val, svuint8_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane(svfloat16x2_t_val, svuint8_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane(svint16x2_t_val, svuint8_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane(svuint16x2_t_val, svuint8_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_bf16_x2(svbfloat16x2_t_val, svuint8_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_f16_x2(svfloat16x2_t_val, svuint8_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_s16_x2(svint16x2_t_val, svuint8_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_u16_x2(svuint16x2_t_val, svuint8_t_val, 1);
+}
+
+void test_streaming(void) __arm_streaming{
+  svbfloat16x2_t svbfloat16x2_t_val;
+  svfloat16x2_t svfloat16x2_t_val;
+  svint16x2_t svint16x2_t_val;
+  svuint8_t svuint8_t_val;
+  svuint16x2_t svuint16x2_t_val;
+
+  svluti6_lane(svbfloat16x2_t_val, svuint8_t_val, 1);
+  svluti6_lane(svfloat16x2_t_val, svuint8_t_val, 1);
+  svluti6_lane(svint16x2_t_val, svuint8_t_val, 1);
+  svluti6_lane(svuint16x2_t_val, svuint8_t_val, 1);
+  svluti6_lane_bf16_x2(svbfloat16x2_t_val, svuint8_t_val, 1);
+  svluti6_lane_f16_x2(svfloat16x2_t_val, svuint8_t_val, 1);
+  svluti6_lane_s16_x2(svint16x2_t_val, svuint8_t_val, 1);
+  svluti6_lane_u16_x2(svuint16x2_t_val, svuint8_t_val, 1);
+}
+
+void test_streaming_compatible(void) __arm_streaming_compatible{
+  svbfloat16x2_t svbfloat16x2_t_val;
+  svfloat16x2_t svfloat16x2_t_val;
+  svint16x2_t svint16x2_t_val;
+  svuint8_t svuint8_t_val;
+  svuint16x2_t svuint16x2_t_val;
+
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane(svbfloat16x2_t_val, svuint8_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane(svfloat16x2_t_val, svuint8_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane(svint16x2_t_val, svuint8_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane(svuint16x2_t_val, svuint8_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_bf16_x2(svbfloat16x2_t_val, svuint8_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_f16_x2(svfloat16x2_t_val, svuint8_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_s16_x2(svint16x2_t_val, svuint8_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_u16_x2(svuint16x2_t_val, svuint8_t_val, 1);
+}

diff  --git 
a/clang/test/Sema/AArch64/arm_sve_non_streaming_only_sve_AND_sve2p3.c 
b/clang/test/Sema/AArch64/arm_sve_non_streaming_only_sve_AND_sve2p3.c
new file mode 100644
index 0000000000000..ebf06311b8939
--- /dev/null
+++ b/clang/test/Sema/AArch64/arm_sve_non_streaming_only_sve_AND_sve2p3.c
@@ -0,0 +1,62 @@
+// NOTE: File has been autogenerated by 
utils/aarch64_builtins_test_generator.py
+// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu 
-target-feature +sme -target-feature +sve -target-feature +sve2p3 -verify=guard
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+// Properties: guard="sve,sve2p3" streaming_guard="" flags=""
+
+void test(void) {
+  svint8x2_t svint8x2_t_val;
+  svmfloat8x2_t svmfloat8x2_t_val;
+  svuint8_t svuint8_t_val;
+  svuint8x2_t svuint8x2_t_val;
+
+  svluti6(svint8x2_t_val, svuint8_t_val);
+  svluti6(svmfloat8x2_t_val, svuint8_t_val);
+  svluti6(svuint8x2_t_val, svuint8_t_val);
+  svluti6_mf8_x2(svmfloat8x2_t_val, svuint8_t_val);
+  svluti6_s8_x2(svint8x2_t_val, svuint8_t_val);
+  svluti6_u8_x2(svuint8x2_t_val, svuint8_t_val);
+}
+
+void test_streaming(void) __arm_streaming{
+  svint8x2_t svint8x2_t_val;
+  svmfloat8x2_t svmfloat8x2_t_val;
+  svuint8_t svuint8_t_val;
+  svuint8x2_t svuint8x2_t_val;
+
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svluti6(svint8x2_t_val, svuint8_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svluti6(svmfloat8x2_t_val, svuint8_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svluti6(svuint8x2_t_val, svuint8_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svluti6_mf8_x2(svmfloat8x2_t_val, svuint8_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svluti6_s8_x2(svint8x2_t_val, svuint8_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svluti6_u8_x2(svuint8x2_t_val, svuint8_t_val);
+}
+
+void test_streaming_compatible(void) __arm_streaming_compatible{
+  svint8x2_t svint8x2_t_val;
+  svmfloat8x2_t svmfloat8x2_t_val;
+  svuint8_t svuint8_t_val;
+  svuint8x2_t svuint8x2_t_val;
+
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svluti6(svint8x2_t_val, svuint8_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svluti6(svmfloat8x2_t_val, svuint8_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svluti6(svuint8x2_t_val, svuint8_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svluti6_mf8_x2(svmfloat8x2_t_val, svuint8_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svluti6_s8_x2(svint8x2_t_val, svuint8_t_val);
+  // guard-error@+1 {{builtin can only be called from a non-streaming 
function}}
+  svluti6_u8_x2(svuint8x2_t_val, svuint8_t_val);
+}

diff  --git a/clang/test/Sema/AArch64/arm_sve_streaming_only_sme_AND_sme2p3.c 
b/clang/test/Sema/AArch64/arm_sve_streaming_only_sme_AND_sme2p3.c
new file mode 100644
index 0000000000000..0f88ee7ad7fef
--- /dev/null
+++ b/clang/test/Sema/AArch64/arm_sve_streaming_only_sme_AND_sme2p3.c
@@ -0,0 +1,118 @@
+// NOTE: File has been autogenerated by 
utils/aarch64_builtins_test_generator.py
+// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu 
-target-feature +sme -target-feature +sme2p3 -target-feature +sve 
-verify=streaming-guard
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+// Properties: guard="" streaming_guard="sme,sme2p3" flags="streaming-only"
+
+void test(void) {
+  svbfloat16x2_t svbfloat16x2_t_val;
+  svfloat16x2_t svfloat16x2_t_val;
+  svint16x2_t svint16x2_t_val;
+  svuint8x2_t svuint8x2_t_val;
+  svuint8x3_t svuint8x3_t_val;
+  svuint16x2_t svuint16x2_t_val;
+
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_bf16_x4(svbfloat16x2_t_val, svuint8x2_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_bf16_x4(svbfloat16x2_t_val, svuint8x3_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_bf16_x4_bf16_x2_u8_x2(svbfloat16x2_t_val, svuint8x2_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_bf16_x4_bf16_x2_u8_x3(svbfloat16x2_t_val, svuint8x3_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_f16_x4(svfloat16x2_t_val, svuint8x2_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_f16_x4(svfloat16x2_t_val, svuint8x3_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_f16_x4_f16_x2_u8_x2(svfloat16x2_t_val, svuint8x2_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_f16_x4_f16_x2_u8_x3(svfloat16x2_t_val, svuint8x3_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_s16_x4(svint16x2_t_val, svuint8x2_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_s16_x4(svint16x2_t_val, svuint8x3_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_s16_x4_s16_x2_u8_x2(svint16x2_t_val, svuint8x2_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_s16_x4_s16_x2_u8_x3(svint16x2_t_val, svuint8x3_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_u16_x4(svuint16x2_t_val, svuint8x2_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_u16_x4(svuint16x2_t_val, svuint8x3_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_u16_x4_u16_x2_u8_x2(svuint16x2_t_val, svuint8x2_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_u16_x4_u16_x2_u8_x3(svuint16x2_t_val, svuint8x3_t_val, 1);
+}
+
+void test_streaming(void) __arm_streaming{
+  svbfloat16x2_t svbfloat16x2_t_val;
+  svfloat16x2_t svfloat16x2_t_val;
+  svint16x2_t svint16x2_t_val;
+  svuint8x2_t svuint8x2_t_val;
+  svuint8x3_t svuint8x3_t_val;
+  svuint16x2_t svuint16x2_t_val;
+
+  svluti6_lane_bf16_x4(svbfloat16x2_t_val, svuint8x2_t_val, 1);
+  svluti6_lane_bf16_x4(svbfloat16x2_t_val, svuint8x3_t_val, 1);
+  svluti6_lane_bf16_x4_bf16_x2_u8_x2(svbfloat16x2_t_val, svuint8x2_t_val, 1);
+  svluti6_lane_bf16_x4_bf16_x2_u8_x3(svbfloat16x2_t_val, svuint8x3_t_val, 1);
+  svluti6_lane_f16_x4(svfloat16x2_t_val, svuint8x2_t_val, 1);
+  svluti6_lane_f16_x4(svfloat16x2_t_val, svuint8x3_t_val, 1);
+  svluti6_lane_f16_x4_f16_x2_u8_x2(svfloat16x2_t_val, svuint8x2_t_val, 1);
+  svluti6_lane_f16_x4_f16_x2_u8_x3(svfloat16x2_t_val, svuint8x3_t_val, 1);
+  svluti6_lane_s16_x4(svint16x2_t_val, svuint8x2_t_val, 1);
+  svluti6_lane_s16_x4(svint16x2_t_val, svuint8x3_t_val, 1);
+  svluti6_lane_s16_x4_s16_x2_u8_x2(svint16x2_t_val, svuint8x2_t_val, 1);
+  svluti6_lane_s16_x4_s16_x2_u8_x3(svint16x2_t_val, svuint8x3_t_val, 1);
+  svluti6_lane_u16_x4(svuint16x2_t_val, svuint8x2_t_val, 1);
+  svluti6_lane_u16_x4(svuint16x2_t_val, svuint8x3_t_val, 1);
+  svluti6_lane_u16_x4_u16_x2_u8_x2(svuint16x2_t_val, svuint8x2_t_val, 1);
+  svluti6_lane_u16_x4_u16_x2_u8_x3(svuint16x2_t_val, svuint8x3_t_val, 1);
+}
+
+void test_streaming_compatible(void) __arm_streaming_compatible{
+  svbfloat16x2_t svbfloat16x2_t_val;
+  svfloat16x2_t svfloat16x2_t_val;
+  svint16x2_t svint16x2_t_val;
+  svuint8x2_t svuint8x2_t_val;
+  svuint8x3_t svuint8x3_t_val;
+  svuint16x2_t svuint16x2_t_val;
+
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_bf16_x4(svbfloat16x2_t_val, svuint8x2_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_bf16_x4(svbfloat16x2_t_val, svuint8x3_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_bf16_x4_bf16_x2_u8_x2(svbfloat16x2_t_val, svuint8x2_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_bf16_x4_bf16_x2_u8_x3(svbfloat16x2_t_val, svuint8x3_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_f16_x4(svfloat16x2_t_val, svuint8x2_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_f16_x4(svfloat16x2_t_val, svuint8x3_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_f16_x4_f16_x2_u8_x2(svfloat16x2_t_val, svuint8x2_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_f16_x4_f16_x2_u8_x3(svfloat16x2_t_val, svuint8x3_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_s16_x4(svint16x2_t_val, svuint8x2_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_s16_x4(svint16x2_t_val, svuint8x3_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_s16_x4_s16_x2_u8_x2(svint16x2_t_val, svuint8x2_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_s16_x4_s16_x2_u8_x3(svint16x2_t_val, svuint8x3_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_u16_x4(svuint16x2_t_val, svuint8x2_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_u16_x4(svuint16x2_t_val, svuint8x3_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_u16_x4_u16_x2_u8_x2(svuint16x2_t_val, svuint8x2_t_val, 1);
+  // streaming-guard-error@+1 {{builtin can only be called from a streaming 
function}}
+  svluti6_lane_u16_x4_u16_x2_u8_x3(svuint16x2_t_val, svuint8x3_t_val, 1);
+}

diff  --git a/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c 
b/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c
new file mode 100644
index 0000000000000..25c35fbcbcc7b
--- /dev/null
+++ b/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c
@@ -0,0 +1,21 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 
-fsyntax-only -verify %s
+
+#include <arm_sme.h>
+
+void test_range_0_0(void) __arm_streaming __arm_in("zt0") {
+  svluti6_zt_s8(1, svundef_u8()); // expected-error {{argument value 1 is 
outside the valid range [0, 0]}}
+  svluti6_zt_u8_x4(1, svcreate3_u8(svundef_u8(), svundef_u8(), svundef_u8())); 
// expected-error {{argument value 1 is outside the valid range [0, 0]}}
+}
+
+void test_range_0_1(void) __arm_streaming {
+  svluti6_lane_s16_x4_s16_x2_u8_x2(svcreate2_s16(svundef_s16(), 
svundef_s16()), // expected-error {{argument value 18446744073709551615 is 
outside the valid range [0, 1]}}
+                                   svcreate2_u8(svundef_u8(), svundef_u8()), 
-1);
+  svluti6_lane_u16_x4_u16_x2_u8_x2(svcreate2_u16(svundef_u16(), 
svundef_u16()), // expected-error {{argument value 2 is outside the valid range 
[0, 1]}}
+                                   svcreate2_u8(svundef_u8(), svundef_u8()), 
2);
+  svluti6_lane_f16_x4_f16_x2_u8_x3(svcreate2_f16(svundef_f16(), 
svundef_f16()), // expected-error {{argument value 18446744073709551615 is 
outside the valid range [0, 1]}}
+                                   svcreate3_u8(svundef_u8(), svundef_u8(), 
svundef_u8()), -1);
+  svluti6_lane_bf16_x4_bf16_x2_u8_x3(svcreate2_bf16(svundef_bf16(), 
svundef_bf16()), // expected-error {{argument value 2 is outside the valid 
range [0, 1]}}
+                                     svcreate3_u8(svundef_u8(), svundef_u8(), 
svundef_u8()), 2);
+}

diff  --git a/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp 
b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp
index 60183e346f181..01781da390e0b 100644
--- a/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp
+++ b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp
@@ -1,10 +1,15 @@
 // REQUIRES: aarch64-registered-target
 
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +sve2p3 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2p3 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sve2p3 -fsyntax-only -verify %s
 
 #include <arm_sve.h>
 
-
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
+#else
+#define SVE_ACLE_FUNC(A1, A2) A1##A2
+#endif
 
 svint8_t test_svqshrn_n_s8_s16_x2(svint16x2_t zn, uint64_t imm)
 {
@@ -83,7 +88,23 @@ svuint8_t test_svqrshrun_n_u8_s16_x2(svint16x2_t zn, 
uint64_t imm)
   svqrshrun_n_u8_s16_x2(zn, 9); // expected-error-re {{argument value 
{{[0-9]+}} is outside the valid range [1, 8]}}
   svqrshrun_n_u8_s16_x2(zn, -1); // expected-error-re {{argument value 
{{[0-9]+}} is outside the valid range [1, 8]}}
 
-  svqrshrun_n_u8_s16_x2(zn, imm); // expected-error-re {{argument to {{.+}} 
must be a constant integer}}}}
+  svqrshrun_n_u8_s16_x2(zn, imm); // expected-error-re {{argument to {{.+}} 
must be a constant integer}}}
+}
+
+
+void test_range_0_1() {
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid 
range [0, 1]}}
+  SVE_ACLE_FUNC(svluti6_lane, _s16_x2)(svcreate2_s16(svundef_s16(), 
svundef_s16()),
+                                        svundef_u8(), -1);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid 
range [0, 1]}}
+  SVE_ACLE_FUNC(svluti6_lane, _u16_x2)(svcreate2_u16(svundef_u16(), 
svundef_u16()),
+                                        svundef_u8(), 2);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid 
range [0, 1]}}
+  SVE_ACLE_FUNC(svluti6_lane, _f16_x2)(svcreate2_f16(svundef_f16(), 
svundef_f16()),
+                                        svundef_u8(), -1);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid 
range [0, 1]}}
+  SVE_ACLE_FUNC(svluti6_lane, _bf16_x2)(svcreate2_bf16(svundef_bf16(), 
svundef_bf16()),
+                                         svundef_u8(), 2);
 }
 
 void test_svdot_lane_x2_imm_0_7(svint16_t s16, svuint16_t u16, svint8_t s8,

diff  --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td 
b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index ba0d7c02bf427..ba8e23d3df3db 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1371,13 +1371,28 @@ let TargetPrefix = "aarch64" in {  // All intrinsics 
start with "llvm.aarch64.".
                  LLVMVectorOfBitcastsToInt<0>],
                 !listconcat(Attrs, [IntrNoMem])>;
 
-  class SVE2_LUTI_Inrinsic<list<IntrinsicProperty> Attrs = []>
+  class SVE2_LUTI_Intrinsic<list<IntrinsicProperty> Attrs = []>
     :  DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                 [LLVMMatchType<0>,
                  llvm_nxv16i8_ty,
                  llvm_i32_ty],
                  !listconcat(Attrs, [IntrNoMem, ImmArg<ArgIndex<2>>])>;
 
+  class SVE2_LUTI_X2_Intrinsic<list<IntrinsicProperty> Attrs = []>
+    : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+                [LLVMMatchType<0>,
+                 LLVMMatchType<0>,
+                 llvm_nxv16i8_ty,
+                 llvm_i32_ty],
+                !listconcat(Attrs, [IntrNoMem, ImmArg<ArgIndex<3>>])>;
+
+  class SVE2_LUTI6_Intrinsic<list<IntrinsicProperty> Attrs = []>
+    : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
+                [llvm_nxv16i8_ty,
+                 llvm_nxv16i8_ty,
+                 llvm_nxv16i8_ty],
+                !listconcat(Attrs, [IntrNoMem])>;
+
   class SVE2_1VectorArg_Long_Intrinsic<list<IntrinsicProperty> Attrs = []>
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                 [LLVMSubdivide2VectorType<0>,
@@ -2813,14 +2828,11 @@ def int_aarch64_sve_tbx  : 
AdvSIMD_SVE2_TBX_Intrinsic<[IntrSpeculatable]>;
 // SVE2 - Lookup Table
 //
 
-def int_aarch64_sve_luti2_lane : SVE2_LUTI_Inrinsic<[IntrSpeculatable]>;
-def int_aarch64_sve_luti4_lane : SVE2_LUTI_Inrinsic<[IntrSpeculatable]>;
-def int_aarch64_sve_luti4_lane_x2 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
-                                    [LLVMMatchType<0>,
-                                    LLVMMatchType<0>,
-                                    llvm_nxv16i8_ty,
-                                    llvm_i32_ty],
-                                    [IntrNoMem, ImmArg<ArgIndex<3>>, 
IntrSpeculatable]>;
+def int_aarch64_sve_luti2_lane : SVE2_LUTI_Intrinsic<[IntrSpeculatable]>;
+def int_aarch64_sve_luti4_lane : SVE2_LUTI_Intrinsic<[IntrSpeculatable]>;
+def int_aarch64_sve_luti6 : SVE2_LUTI6_Intrinsic<[IntrSpeculatable]>;
+def int_aarch64_sve_luti4_lane_x2 : SVE2_LUTI_X2_Intrinsic<[IntrSpeculatable]>;
+def int_aarch64_sve_luti6_lane_x2 : SVE2_LUTI_X2_Intrinsic<[IntrSpeculatable]>;
 
 //
 // SVE2 - Optional bit permutation
@@ -3980,6 +3992,9 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sme_luti4_lane_zt
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, 
llvm_nxv16i8_ty, llvm_i32_ty],
                             [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, 
IntrInaccessibleMemOnly, IntrReadMem]>;
+  def int_aarch64_sme_luti6_zt
+    : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_i32_ty, llvm_nxv16i8_ty],
+                            [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, 
IntrReadMem]>;
 
   // Lookup table expand two registers
   //
@@ -4001,11 +4016,24 @@ let TargetPrefix = "aarch64" in {
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, 
LLVMMatchType<0>, LLVMMatchType<0>],
                             [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty],
                             [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, 
IntrInaccessibleMemOnly, IntrReadMem]>;
+  def int_aarch64_sme_luti6_lane_x4_x2
+    : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, 
LLVMMatchType<0>, LLVMMatchType<0>],
+                            [LLVMMatchType<0>, LLVMMatchType<0>, 
llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty],
+                            [ImmArg<ArgIndex<4>>, IntrNoMem, 
IntrSpeculatable]>;
+  def int_aarch64_sme_luti6_lane_x4_x3
+    : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, 
LLVMMatchType<0>, LLVMMatchType<0>],
+                            [LLVMMatchType<0>, LLVMMatchType<0>, 
llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty],
+                            [ImmArg<ArgIndex<5>>, IntrNoMem, 
IntrSpeculatable]>;
 
   def int_aarch64_sme_luti4_zt_x4
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, 
LLVMMatchType<0>, LLVMMatchType<0>],
                             [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty],
                             [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, 
IntrReadMem]>;
+  def int_aarch64_sme_luti6_zt_x4
+    : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty,
+                             llvm_nxv16i8_ty, llvm_nxv16i8_ty],
+                            [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, 
llvm_nxv16i8_ty],
+                            [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, 
IntrReadMem]>;
 
 
   //

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 9c454349dc12d..ade5b9e0e6e70 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -415,8 +415,10 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
 
   void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
                                  unsigned Opc, uint32_t MaxImm);
+  void SelectMultiVectorLuti6LaneX4(SDNode *Node, unsigned NumIndexVecs);
 
-  void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
+  void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc,
+                             unsigned NumInVecs);
 
   template <unsigned MaxIdx, unsigned Scale>
   bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
@@ -2271,17 +2273,57 @@ void 
AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
   CurDAG->RemoveDeadNode(Node);
 }
 
+void AArch64DAGToDAGISel::SelectMultiVectorLuti6LaneX4(SDNode *Node,
+                                                       unsigned NumIndexVecs) {
+  assert((NumIndexVecs == 2 || NumIndexVecs == 3) &&
+         "unexpected number of index vectors");
+
+  constexpr unsigned FirstIndexOp = 3;
+  unsigned ImmOp = FirstIndexOp + NumIndexVecs;
+  auto *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(ImmOp));
+  if (!Imm || Imm->getZExtValue() > 1)
+    return;
+
+  // The luti6 instruction always takes a 2-register Zm index tuple. The x3
+  // ACLE form provides three index vectors, so the lane selects which adjacent
+  // pair to use before forming Zm (op 3/4 or op 4/5, with op6 as imm)
+  unsigned Lane = Imm->getZExtValue();
+  unsigned IndexOp = FirstIndexOp;
+  if (NumIndexVecs == 3)
+    IndexOp += Lane;
+
+  SDValue TableTuple = createZTuple({Node->getOperand(1), 
Node->getOperand(2)});
+  SDValue IndexTuple =
+      createZTuple({Node->getOperand(IndexOp), Node->getOperand(IndexOp + 1)});
+  SDValue Ops[] = {TableTuple, IndexTuple, Node->getOperand(ImmOp)};
+
+  SDLoc DL(Node);
+  EVT VT = Node->getValueType(0);
+  SDNode *Instruction =
+      CurDAG->getMachineNode(AArch64::LUTI6_4Z2Z2ZI, DL, MVT::Untyped, Ops);
+  SDValue SuperReg = SDValue(Instruction, 0);
+
+  for (unsigned I = 0; I < 4; ++I)
+    ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
+                                      AArch64::zsub0 + I, DL, VT, SuperReg));
+
+  CurDAG->RemoveDeadNode(Node);
+}
+
 void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
                                                 unsigned NumOutVecs,
-                                                unsigned Opc) {
+                                                unsigned Opc,
+                                                unsigned NumInVecs) {
+  assert((NumInVecs == 2 || NumInVecs == 3) &&
+         "unexpected number of input vectors");
+
   SDValue ZtValue;
   if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
     return;
 
-  SDValue Chain = Node->getOperand(0);
-  SDValue Ops[] = {ZtValue,
-                   createZMulTuple({Node->getOperand(3), Node->getOperand(4)}),
-                   Chain};
+  SmallVector<SDValue, 4> Regs(Node->ops().slice(3, NumInVecs));
+  SDValue ZTuple = NumInVecs == 3 ? createZTuple(Regs) : createZMulTuple(Regs);
+  SDValue Ops[] = {ZtValue, ZTuple, Node->getOperand(0)};
 
   SDLoc DL(Node);
   EVT VT = Node->getValueType(0);
@@ -2294,9 +2336,7 @@ void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode 
*Node,
     ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
                                       AArch64::zsub0 + I, DL, VT, SuperReg));
 
-  // Copy chain
-  unsigned ChainIdx = NumOutVecs;
-  ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
+  ReplaceUses(SDValue(Node, NumOutVecs), SDValue(Instruction, 1));
   CurDAG->RemoveDeadNode(Node);
 }
 
@@ -5987,7 +6027,11 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
       return;
     }
     case Intrinsic::aarch64_sme_luti4_zt_x4: {
-      SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
+      SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z, 2);
+      return;
+    }
+    case Intrinsic::aarch64_sme_luti6_zt_x4: {
+      SelectMultiVectorLuti(Node, 4, AArch64::LUTI6_4ZT3Z, 3);
       return;
     }
     case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
@@ -6080,6 +6124,12 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
                AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
       return;
+    case Intrinsic::aarch64_sme_luti6_lane_x4_x2:
+      SelectMultiVectorLuti6LaneX4(Node, 2);
+      return;
+    case Intrinsic::aarch64_sme_luti6_lane_x4_x3:
+      SelectMultiVectorLuti6LaneX4(Node, 3);
+      return;
     case Intrinsic::aarch64_sve_urshl_single_x2:
       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
               Node->getValueType(0),

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index d1fe1718dcc44..493f11ac13484 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -227,9 +227,9 @@ def HasSVE_B16MM     : 
Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasS
                                  AssemblerPredicateWithAll<(all_of 
FeatureSVE_B16MM), "sve-b16mm">;
 def HasF16MM         : Predicate<"Subtarget->hasF16MM()">,
                                  AssemblerPredicateWithAll<(all_of 
FeatureF16MM), "f16mm">;
-def HasSVE2p3        : Predicate<"Subtarget->hasSVE2p3()">,
+def HasSVE2p3        : Predicate<"Subtarget->isSVEAvailable() && 
Subtarget->hasSVE2p3()">,
                                  AssemblerPredicateWithAll<(all_of 
FeatureSVE2p3), "sve2p3">;
-def HasSME2p3        : Predicate<"Subtarget->hasSME2p3()">,
+def HasSME2p3        : Predicate<"Subtarget->isStreaming() && 
Subtarget->hasSME2p3()">,
                                  AssemblerPredicateWithAll<(all_of 
FeatureSME2p3), "sme2p3">;
 def HasF16F32DOT     : Predicate<"Subtarget->hasF16F32DOT()">,
                                  AssemblerPredicateWithAll<(all_of 
FeatureF16F32DOT), "f16f32dot">;

diff  --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 022fed6473486..d0eb9ca218a27 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -1140,7 +1140,7 @@ let Predicates = [HasSME_MOP4, HasSMEF64F64] in {
 // SME2.3 instructions
 
//===----------------------------------------------------------------------===//
 let Predicates = [HasSME2p3] in {
-  def LUTI6_ZTZ       : sme2_lut_single<"luti6">;
+  defm LUTI6_ZTZ      : sme2_lut_single<"luti6", int_aarch64_sme_luti6_zt>;
   def LUTI6_4ZT3Z     : sme2_luti6_zt_consecutive<"luti6">;
   def LUTI6_S_4ZT3Z   : sme2_luti6_zt_strided<"luti6">;
   def LUTI6_4Z2Z2ZI   : sme2_luti6_vector_vg4_consecutive<"luti6">;

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 0cc788d12bae0..4712406e37e6b 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4897,14 +4897,14 @@ let Predicates = [HasSVE2p3_or_SME2p3] in {
   defm SQSHRN_Z2ZI_StoH   : sve_multi_vec_shift_narrow<"sqshrn",   0b000, 
int_aarch64_sve_sqshrn_x2>;
   defm UQSHRN_Z2ZI_StoH   : sve_multi_vec_shift_narrow<"uqshrn",   0b010, 
int_aarch64_sve_uqshrn_x2>;
 
-  defm LUTI6_Z2ZZI : sve2_luti6_vector_index<"luti6">;
+  defm LUTI6_Z2ZZI : sve2_luti6_vector_index<"luti6", 
int_aarch64_sve_luti6_lane_x2>;
 } // End HasSME2p3orSVE2p3
 
 
//===----------------------------------------------------------------------===//
 // SVE2.3 instructions
 
//===----------------------------------------------------------------------===//
 let Predicates = [HasSVE2p3] in {
-  def LUTI6_Z2ZZ : sve2_luti6_vector<"luti6">;
+  defm LUTI6_Z2ZZ : sve2_luti6_vector<"luti6", int_aarch64_sve_luti6>;
 }
 
 
//===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td 
b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 771c4c1fb2b6e..f07fb8ad81f63 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -3922,8 +3922,8 @@ multiclass sme2_luti4_vector_vg4_index<string mnemonic> {
 
 // 8-bit Look up table
 class sme2_lut_single<string asm>
-  : I<(outs ZPR8:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn),
-    asm, "\t$Zd, $ZTt, $Zn", "", []>, Sched<[]> {
+    : I<(outs ZPR8:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn),
+        asm, "\t$Zd, $ZTt, $Zn", "", []>, Sched<[]> {
   bits<0> ZTt;
   bits<5> Zd;
   bits<5> Zn;
@@ -3932,6 +3932,13 @@ class sme2_lut_single<string asm>
   let Inst{4-0}   = Zd;
 }
 
+multiclass sme2_lut_single<string asm, SDPatternOperator intrinsic> {
+  def NAME : sme2_lut_single<asm>;
+
+  def : Pat<(nxv16i8 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn)),
+            (!cast<Instruction>(NAME) $zt, nxv16i8:$zn)>;
+}
+
 
//===----------------------------------------------------------------------===//
 // Lookup table read with 6-bit indices (8-bit)
 class sme2_luti6_zt_base<RegisterOperand zd_ty, string asm>

diff  --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td 
b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 040962e801604..f96702a01c277 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -728,6 +728,13 @@ class SVE_Cvt_VG2_Pat<string name, SDPatternOperator 
intrinsic, ValueType out_vt
     : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2)),
                   (!cast<Instruction>(name) (REG_SEQUENCE ZPR2Mul2, 
in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1))>;
 
+class SVE_LUTI6_VG2_Index_Pat<ValueType vt, SDPatternOperator intrinsic,
+                              Instruction inst>
+    : Pat<(vt (intrinsic vt:$Op1, vt:$Op2, nxv16i8:$Op3,
+                         (i32 timm32_0_1:$Op4))),
+          (vt (inst (REG_SEQUENCE ZPR2, vt:$Op1, zsub0, vt:$Op2, zsub1),
+                    nxv16i8:$Op3, timm32_0_1:$Op4))>;
+
 
//===----------------------------------------------------------------------===//
 // SVE pattern match helpers.
 
//===----------------------------------------------------------------------===//
@@ -11415,18 +11422,24 @@ multiclass sve2_luti4_vector_vg2_index<string 
mnemonic> {
 }
 
 // Look up table read with 6-bit indices
-multiclass sve2_luti6_vector_index<string mnemonic> {
+multiclass sve2_luti6_vector_index<string mnemonic, SDPatternOperator 
intrinsic> {
   def _H : sve2_lut_vector_index<ZPR16, ZZ_h, VectorIndexD32b, 0b1011, 
mnemonic> {
     bit idx;
     let Inst{23} = idx;
   }
+
+  def : SVE_LUTI6_VG2_Index_Pat<nxv8i16, intrinsic,
+                                !cast<Instruction>(NAME # _H)>;
+  def : SVE_LUTI6_VG2_Index_Pat<nxv8f16, intrinsic,
+                                !cast<Instruction>(NAME # _H)>;
+  def : SVE_LUTI6_VG2_Index_Pat<nxv8bf16, intrinsic,
+                                !cast<Instruction>(NAME # _H)>;
 }
 
 // Look up table
 class sve2_luti6_vector<string mnemonic>
     : I<(outs ZPR8:$Zd), (ins ZZ_b:$Zn, ZPRAny:$Zm),
-      mnemonic, "\t$Zd, $Zn, $Zm",
-      "", []>, Sched<[]> {
+        mnemonic, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
   bits<5> Zd;
   bits<5> Zn;
   bits<5> Zm;
@@ -11437,6 +11450,15 @@ class sve2_luti6_vector<string mnemonic>
   let Inst{4-0}   = Zd;
 }
 
+multiclass sve2_luti6_vector<string mnemonic, SDPatternOperator intrinsic> {
+  def NAME : sve2_luti6_vector<mnemonic>;
+
+  def : Pat<(nxv16i8 (intrinsic nxv16i8:$Op1, nxv16i8:$Op2, nxv16i8:$Op3)),
+            (!cast<Instruction>(NAME) (REG_SEQUENCE ZPR2, nxv16i8:$Op1, zsub0,
+                                                     nxv16i8:$Op2, zsub1),
+                                       nxv16i8:$Op3)>;
+}
+
 
//===----------------------------------------------------------------------===//
 // Checked Pointer Arithmetic (FEAT_CPA)
 
//===----------------------------------------------------------------------===//

diff  --git a/llvm/test/CodeGen/AArch64/sme2p3-intrinsics-luti6.ll 
b/llvm/test/CodeGen/AArch64/sme2p3-intrinsics-luti6.ll
new file mode 100644
index 0000000000000..3c695e46267d3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme2p3-intrinsics-luti6.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs -force-streaming 
-mtriple=aarch64-none-linux-gnu -mattr=+sme2p3 < %s | FileCheck %s
+
+define <vscale x 16 x i8> @luti6_zt_i8(<vscale x 16 x i8> %x) {
+; CHECK-LABEL: luti6_zt_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    luti6 z0.b, zt0, z0
+; CHECK-NEXT:    ret
+  %res = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti6.zt(i32 0, 
<vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %res
+}
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 
16 x i8> } @luti6_zt_i8_x4(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, 
<vscale x 16 x i8> %c) {
+; CHECK-LABEL: luti6_zt_i8_x4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    luti6 { z0.b - z3.b }, zt0, { z0 - z2 }
+; CHECK-NEXT:    ret
+  %res = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x 
i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x 
i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
+  ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 
16 x i8> } %res
+}
+
+define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 
8 x i16> } @luti6_i16_x4(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: luti6_i16_x4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    mov z4.d, z0.d
+; CHECK-NEXT:    luti6 { z0.h - z3.h }, { z3.h, z4.h }, { z1, z2 }[1]
+; CHECK-NEXT:    ret
+  %res = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x 
i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.lane.x4.x2.nxv8i16(<vscale x 
8 x i16> %a, <vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> 
%b, i32 1)
+  ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 
8 x i16> } %res
+}
+
+define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat> } @luti6_bf16_x4(<vscale x 8 x bfloat> %a, <vscale x 16 x 
i8> %b) {
+; CHECK-LABEL: luti6_bf16_x4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    mov z4.d, z0.d
+; CHECK-NEXT:    luti6 { z0.h - z3.h }, { z3.h, z4.h }, { z1, z2 }[0]
+; CHECK-NEXT:    ret
+  %res = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 
x bfloat>, <vscale x 8 x bfloat> } 
@llvm.aarch64.sme.luti6.lane.x4.x2.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 
8 x bfloat> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %b, i32 0)
+  ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat> } %res
+}
+
+define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, 
<vscale x 8 x half> } @luti6_f16_x4(<vscale x 8 x half> %a, <vscale x 16 x i8> 
%b) {
+; CHECK-LABEL: luti6_f16_x4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    mov z4.d, z0.d
+; CHECK-NEXT:    luti6 { z0.h - z3.h }, { z3.h, z4.h }, { z1, z2 }[1]
+; CHECK-NEXT:    ret
+  %res = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x 
half>, <vscale x 8 x half> } @llvm.aarch64.sme.luti6.lane.x4.x2.nxv8f16(<vscale 
x 8 x half> %a, <vscale x 8 x half> %a, <vscale x 16 x i8> %b, <vscale x 16 x 
i8> %b, i32 1)
+  ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale 
x 8 x half> } %res
+}
+
+define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 
8 x i16> } @luti6_i16_x4_x3_imm0(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b, 
<vscale x 16 x i8> %c, <vscale x 16 x i8> %d) {
+; CHECK-LABEL: luti6_i16_x4_x3_imm0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    mov z4.d, z0.d
+; CHECK-NEXT:    luti6 { z0.h - z3.h }, { z3.h, z4.h }, { z1, z2 }[0]
+; CHECK-NEXT:    ret
+  %res = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x 
i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.lane.x4.x3.nxv8i16(<vscale x 
8 x i16> %a, <vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> 
%c, <vscale x 16 x i8> %d, i32 0)
+  ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 
8 x i16> } %res
+}
+
+define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 
8 x i16> } @luti6_i16_x4_x3_imm1(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b, 
<vscale x 16 x i8> %c, <vscale x 16 x i8> %d) {
+; CHECK-LABEL: luti6_i16_x4_x3_imm1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    luti6 { z0.h - z3.h }, { z0.h, z1.h }, { z2, z3 }[1]
+; CHECK-NEXT:    ret
+  %res = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x 
i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.lane.x4.x3.nxv8i16(<vscale x 
8 x i16> %a, <vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> 
%c, <vscale x 16 x i8> %d, i32 1)
+  ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 
8 x i16> } %res
+}
+
+define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, 
<vscale x 8 x half> } @luti6_f16_x4_x3(<vscale x 8 x half> %a, <vscale x 16 x 
i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) {
+; CHECK-LABEL: luti6_f16_x4_x3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    luti6 { z0.h - z3.h }, { z0.h, z1.h }, { z2, z3 }[1]
+; CHECK-NEXT:    ret
+  %res = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x 
half>, <vscale x 8 x half> } @llvm.aarch64.sme.luti6.lane.x4.x3.nxv8f16(<vscale 
x 8 x half> %a, <vscale x 8 x half> %a, <vscale x 16 x i8> %b, <vscale x 16 x 
i8> %c, <vscale x 16 x i8> %d, i32 1)
+  ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale 
x 8 x half> } %res
+}
+
+define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat> } @luti6_bf16_x4_x3(<vscale x 8 x bfloat> %a, <vscale x 
16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) {
+; CHECK-LABEL: luti6_bf16_x4_x3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    mov z4.d, z0.d
+; CHECK-NEXT:    luti6 { z0.h - z3.h }, { z3.h, z4.h }, { z1, z2 }[0]
+; CHECK-NEXT:    ret
+  %res = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 
x bfloat>, <vscale x 8 x bfloat> } 
@llvm.aarch64.sme.luti6.lane.x4.x3.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 
8 x bfloat> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x 
i8> %d, i32 0)
+  ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat> } %res
+}

diff  --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-luti6.ll 
b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-luti6.ll
new file mode 100644
index 0000000000000..a2bf43088968f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-luti6.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve2p3 
-enable-subreg-liveness < %s | FileCheck %s
+
+define <vscale x 16 x i8> @luti6_i8(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: luti6_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    luti6 z0.b, { z0.b, z1.b }, z0
+; CHECK-NEXT:    ret
+  %res = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti6(<vscale x 16 x 
i8> %a, <vscale x 16 x i8> %a, <vscale x 16 x i8> %a)
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @luti6_i16_x2(<vscale x 8 x i16> %a, <vscale x 16 x 
i8> %b) {
+; CHECK-LABEL: luti6_i16_x2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.d, z0.d
+; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    luti6 z0.h, { z2.h, z3.h }, z1[1]
+; CHECK-NEXT:    ret
+  %res = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.luti6.lane.x2.i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> 
%a, <vscale x 16 x i8> %b, i32 1)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 8 x half> @luti6_f16_x2(<vscale x 8 x half> %a, <vscale x 16 
x i8> %b) {
+; CHECK-LABEL: luti6_f16_x2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.d, z0.d
+; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    luti6 z0.h, { z2.h, z3.h }, z1[0]
+; CHECK-NEXT:    ret
+  %res = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.luti6.lane.x2.f16(<vscale x 8 x half> %a, <vscale x 8 x half> 
%a, <vscale x 16 x i8> %b, i32 0)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 8 x bfloat> @luti6_bf16_x2(<vscale x 8 x bfloat> %a, <vscale 
x 16 x i8> %b) {
+; CHECK-LABEL: luti6_bf16_x2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.d, z0.d
+; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    luti6 z0.h, { z2.h, z3.h }, z1[1]
+; CHECK-NEXT:    ret
+  %res = tail call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.luti6.lane.x2.bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x 
bfloat> %a, <vscale x 16 x i8> %b, i32 1)
+  ret <vscale x 8 x bfloat> %res
+}


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] 7814cc9 - [AArch64][clang][llvm] Add ACLE Armv9.7 lookup table intrinsics (#187046)

Reply via email to