https://github.com/jthackray updated 
https://github.com/llvm/llvm-project/pull/187046

>From 5b75824a9018ff354cbd4885ec0390357131f786 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <[email protected]>
Date: Fri, 13 Mar 2026 15:35:37 +0000
Subject: [PATCH 1/2] [AArch64][clang][llvm] Add support for Armv9.7-A lookup
 table intrinsics

Add support for the following Armv9.7-A Lookup Table (lut)
instruction intrinsics:

SVE2.3
```c
  // Variant is  also available for: _u8 _mf8
  svint8_t svluti6[_s8](svint8x2_t table, svuint8_t indices);
```

SVE2.3 and SME2.3
``` c
  // Variants are also available for _u16_x2 and _f16_x2.
  svint16_t svluti6_lane[_s16_x2](svint16x2_t table, svuint8_t indices, 
uint64_t imm_idx);
```

SME2.3
```c
  // Variant are also available for: _u16, _f16 and _bf16.
  svint16x4_t svluti6_lane_s16_x4[_s16_x2](svint16x2_t table, svuint8x2_t 
indices, uint64_t imm_idx);

  // Variants are also available for: _u8 and _mf8.
  svint8x4_t svluti6_zt_s8_x4(uint64_t zt0, svuint8x3_t zn) __arm_streaming 
__arm_in("zt0");

  // Variants are also available for: _u8 and _mf8.
  svint8_t svluti6_zt_s8(uint64_t zt0, svuint8_t zn) __arm_streaming 
__arm_in("zt0");
```
---
 clang/include/clang/Basic/arm_sme.td          |   6 +
 clang/include/clang/Basic/arm_sve.td          |   9 +
 clang/lib/Basic/Targets/AArch64.cpp           |  29 +++
 clang/lib/Basic/Targets/AArch64.h             |   2 +
 .../sme2p3-intrinsics/acle_sme2p3_luti6.c     | 175 ++++++++++++++++++
 .../sve2p3-intrinsics/acle_sve2p3_luti6.c     | 112 +++++++++++
 .../Preprocessor/aarch64-target-features.c    |  23 +++
 .../acle_sme2p3_imm.c                         |  21 +++
 .../acle_sme2p3_target.c                      |  20 ++
 .../acle_sme2p3_target_lane.c                 |  16 ++
 .../acle_sve2p3_imm.cpp                       |  24 +++
 .../acle_sve2p3_target.c                      |  19 ++
 .../acle_sve2p3_target_lane.c                 |  14 ++
 llvm/include/llvm/IR/IntrinsicsAArch64.td     |  32 ++++
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp    | 102 ++++++++++
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  10 +-
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td |   3 +
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  12 +-
 llvm/lib/Target/AArch64/SVEInstrFormats.td    |  11 ++
 .../AArch64/sme2p3-intrinsics-luti6.ll        | 105 +++++++++++
 .../AArch64/sve2p3-intrinsics-luti6.ll        |  55 ++++++
 .../test/Verifier/AArch64/luti6-intrinsics.ll |  79 ++++++++
 22 files changed, 876 insertions(+), 3 deletions(-)
 create mode 100644 
clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c
 create mode 100644 
clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c
 create mode 100644 clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c
 create mode 100644 
clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_target.c
 create mode 100644 
clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_target_lane.c
 create mode 100644 
clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp
 create mode 100644 
clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_target.c
 create mode 100644 
clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_target_lane.c
 create mode 100644 llvm/test/CodeGen/AArch64/sme2p3-intrinsics-luti6.ll
 create mode 100644 llvm/test/CodeGen/AArch64/sve2p3-intrinsics-luti6.ll
 create mode 100644 llvm/test/Verifier/AArch64/luti6-intrinsics.ll

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 032c588966032..8de360fca5f5e 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -981,6 +981,12 @@ let SMETargetGuard = "sme-lutv2" in {
   def SVLUTI4_ZT_X4 : SInst<"svluti4_zt_{d}_x4", "4i2.u", "cUc", MergeNone, 
"aarch64_sme_luti4_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>]>;
 }
 
+let SMETargetGuard = "sme2p3" in {
+  def SVLUTI6_ZT      : SInst<"svluti6_zt_{d}", "diu", "cUcm", MergeNone, 
"aarch64_sme_luti6_zt", [IsOverloadNone, IsStreaming, IsInZT0], [ImmCheck<0, 
ImmCheck0_0>]>;
+  def SVLUTI6_ZT_X4   : SInst<"svluti6_zt_{d}_x4", "4i3.u", "cUcm", MergeNone, 
"aarch64_sme_luti6_zt_x4", [IsOverloadNone, IsStreaming, IsInZT0], [ImmCheck<0, 
ImmCheck0_0>]>;
+  def SVLUTI6_LANE_X4 : SInst<"svluti6_lane[_{d}_x4]", "42.d2.[i", "sUshb", 
MergeNone, "aarch64_sme_luti6_lane_x4", [IsStreaming], [ImmCheck<2, 
ImmCheck0_1>]>;
+}
+
 let SMETargetGuard = "sme-f8f32" in {
   def SVMOPA_FP8_ZA32 : Inst<"svmopa_za32[_mf8]_m", "viPPdd>", "m", MergeNone, 
"aarch64_sme_fp8_fmopa_za32",
                              [IsStreaming, IsInOutZA, IsOverloadNone], 
[ImmCheck<0, ImmCheck0_3>]>;
diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index be3cd8a76503b..b4080a8456dd5 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1876,6 +1876,15 @@ let SVETargetGuard = "(sve2|sme2),lut", SMETargetGuard = 
"sme2,lut" in {
   def SVLUTI4_x2 : SInst<"svluti4_lane[_{d}_x2]", "d2.d[i", "sUshb", 
MergeNone, "aarch64_sve_luti4_lane_x2", [VerifyRuntimeMode], [ImmCheck<2, 
ImmCheck0_3>]>;
 }
 
+let SVETargetGuard = "sve2p3", SMETargetGuard = InvalidMode in {
+  def SVLUTI6 : SInst<"svluti6[_{d}]", "d2u", "cUcm", MergeNone, 
"aarch64_sve_luti6", [IsOverloadNone]>;
+}
+
+let SVETargetGuard = "sve2p3", SMETargetGuard = "sme2p3" in {
+  def SVLUTI6_x2_I16 : SInst<"svluti6_lane[_{d}_x2]", "d2.d[i", "sUs", 
MergeNone, "aarch64_sve_luti6_lane_x2_i16", [IsOverloadNone, 
VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_1>]>;
+  def SVLUTI6_x2_F16 : SInst<"svluti6_lane[_{d}_x2]", "d2.d[i", "h", 
MergeNone, "aarch64_sve_luti6_lane_x2_f16", [IsOverloadNone, 
VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_1>]>;
+}
+
 
////////////////////////////////////////////////////////////////////////////////
 // SVE2 - Optional
 
diff --git a/clang/lib/Basic/Targets/AArch64.cpp 
b/clang/lib/Basic/Targets/AArch64.cpp
index f7ed15be75cd8..9d4085e5ebbf4 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -495,6 +495,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions 
&Opts,
   if (HasSVE2p1)
     Builder.defineMacro("__ARM_FEATURE_SVE2p1", "1");
 
+  if (HasSVE2p3)
+    Builder.defineMacro("__ARM_FEATURE_SVE2p3", "1");
+
   if (HasSVE2 && HasSVEAES)
     Builder.defineMacro("__ARM_FEATURE_SVE2_AES", "1");
 
@@ -521,6 +524,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions 
&Opts,
   if (HasSME2p1)
     Builder.defineMacro("__ARM_FEATURE_SME2p1", "1");
 
+  if (HasSME2p3)
+    Builder.defineMacro("__ARM_FEATURE_SME2p3", "1");
+
   if (HasSMEF16F16)
     Builder.defineMacro("__ARM_FEATURE_SME_F16F16", "1");
 
@@ -900,9 +906,11 @@ void AArch64TargetInfo::computeFeatureLookup() {
       .Case("sve2-sha3", FPU & SveMode && HasSVE2SHA3)
       .Case("sve2-sm4", FPU & SveMode && HasSVE2SM4)
       .Case("sve2p1", FPU & SveMode && HasSVE2p1)
+      .Case("sve2p3", FPU & SveMode && HasSVE2p3)
       .Case("sme", HasSME)
       .Case("sme2", HasSME2)
       .Case("sme2p1", HasSME2p1)
+      .Case("sme2p3", HasSME2p3)
       .Case("sme-f64f64", HasSMEF64F64)
       .Case("sme-i16i64", HasSMEI16I64)
       .Case("sme-fa64", HasSMEFA64)
@@ -1008,6 +1016,15 @@ bool 
AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasSVE2 = true;
       HasSVE2p1 = true;
     }
+    if (Feature == "+sve2p3") {
+      FPU |= NeonMode;
+      FPU |= SveMode;
+      HasFullFP16 = true;
+      HasSVE2 = true;
+      HasSVE2p1 = true;
+      HasSVE2p2 = true;
+      HasSVE2p3 = true;
+    }
     if (Feature == "+sve-aes") {
       FPU |= NeonMode;
       HasFullFP16 = true;
@@ -1064,6 +1081,18 @@ bool 
AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasBFloat16 = true;
       HasFullFP16 = true;
     }
+    if (Feature == "+sme2p3") {
+      HasSME = true;
+      HasSME2 = true;
+      HasSVE2 = true;
+      HasSVE2p1 = true;
+      HasSVE2p2 = true;
+      HasSME2p1 = true;
+      HasSME2p2 = true;
+      HasSME2p3 = true;
+      HasBFloat16 = true;
+      HasFullFP16 = true;
+    }
     if (Feature == "+sme-f64f64") {
       HasSME = true;
       HasSMEF64F64 = true;
diff --git a/clang/lib/Basic/Targets/AArch64.h 
b/clang/lib/Basic/Targets/AArch64.h
index f9dffed8769ef..4191070a4786d 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -85,6 +85,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public 
TargetInfo {
   bool HasBFloat16 = false;
   bool HasSVE2 = false;
   bool HasSVE2p1 = false;
+  bool HasSVE2p3 = false;
   bool HasSVEAES = false;
   bool HasSVE2SHA3 = false;
   bool HasSVE2SM4 = false;
@@ -110,6 +111,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public 
TargetInfo {
   bool HasSMEF16F16 = false;
   bool HasSMEB16B16 = false;
   bool HasSME2p1 = false;
+  bool HasSME2p3 = false;
   bool HasFP8 = false;
   bool HasFP8FMA = false;
   bool HasFP8DOT2 = false;
diff --git a/clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c 
b/clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c
new file mode 100644
index 0000000000000..ae5fb1f64d0fc
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c
@@ -0,0 +1,175 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror 
-emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror 
-emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -S -O1 
-Werror -o /dev/null %s
+
+#include <arm_sme.h>
+
+// CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>, 
<vscale x 8 x i16>, <vscale x 8 x i16> } @test_svluti6_lane_s16_x4(
+// CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 
x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x 
i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 
@llvm.aarch64.sme.luti6.lane.x4.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], 
<vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], 
<vscale x 16 x i8> [[INDICES_COERCE1]], i32 1)
+// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x 
i16>, <vscale x 8 x i16> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>, 
<vscale x 8 x i16>, <vscale x 8 x i16> } 
@_Z24test_svluti6_lane_s16_x411svint16x2_t11svuint8x2_t(
+// CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale 
x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 
8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 
@llvm.aarch64.sme.luti6.lane.x4.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], 
<vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], 
<vscale x 16 x i8> [[INDICES_COERCE1]], i32 1)
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 
8 x i16>, <vscale x 8 x i16> } [[TMP0]]
+//
+svint16x4_t test_svluti6_lane_s16_x4(svint16x2_t table, svuint8x2_t indices)
+    __arm_streaming {
+  return svluti6_lane_s16_x4(table, indices, 1);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>, 
<vscale x 8 x i16>, <vscale x 8 x i16> } @test_svluti6_lane_u16_x4(
+// CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 
x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x 
i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 
@llvm.aarch64.sme.luti6.lane.x4.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], 
<vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], 
<vscale x 16 x i8> [[INDICES_COERCE1]], i32 0)
+// CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x 
i16>, <vscale x 8 x i16> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x i16>, <vscale x 8 x i16>, 
<vscale x 8 x i16>, <vscale x 8 x i16> } 
@_Z24test_svluti6_lane_u16_x412svuint16x2_t11svuint8x2_t(
+// CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale 
x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 
8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 
@llvm.aarch64.sme.luti6.lane.x4.nxv8i16(<vscale x 8 x i16> [[TABLE_COERCE0]], 
<vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], 
<vscale x 16 x i8> [[INDICES_COERCE1]], i32 0)
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 
8 x i16>, <vscale x 8 x i16> } [[TMP0]]
+//
+svuint16x4_t test_svluti6_lane_u16_x4(svuint16x2_t table, svuint8x2_t indices)
+    __arm_streaming {
+  return svluti6_lane_u16_x4(table, indices, 0);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 8 x half>, <vscale x 8 x half>, 
<vscale x 8 x half>, <vscale x 8 x half> } @test_svluti6_lane_f16_x4(
+// CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x half> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 
x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 
x half>, <vscale x 8 x half>, <vscale x 8 x half> } 
@llvm.aarch64.sme.luti6.lane.x4.nxv8f16(<vscale x 8 x half> [[TABLE_COERCE0]], 
<vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], 
<vscale x 16 x i8> [[INDICES_COERCE1]], i32 1)
+// CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 
x half>, <vscale x 8 x half> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x half>, <vscale x 8 x 
half>, <vscale x 8 x half>, <vscale x 8 x half> } 
@_Z24test_svluti6_lane_f16_x413svfloat16x2_t11svuint8x2_t(
+// CPP-CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
half> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], 
<vscale x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale 
x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } 
@llvm.aarch64.sme.luti6.lane.x4.nxv8f16(<vscale x 8 x half> [[TABLE_COERCE0]], 
<vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE0]], 
<vscale x 16 x i8> [[INDICES_COERCE1]], i32 1)
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale 
x 8 x half>, <vscale x 8 x half> } [[TMP0]]
+//
+svfloat16x4_t test_svluti6_lane_f16_x4(svfloat16x2_t table, svuint8x2_t 
indices)
+    __arm_streaming {
+  return svluti6_lane_f16_x4(table, indices, 1);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 8 x bfloat>, <vscale x 8 x 
bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } 
@test_svluti6_lane_bf16_x4(
+// CHECK-SAME: <vscale x 8 x bfloat> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
bfloat> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], 
<vscale x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 
8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } 
@llvm.aarch64.sme.luti6.lane.x4.nxv8bf16(<vscale x 8 x bfloat> 
[[TABLE_COERCE0]], <vscale x 8 x bfloat> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], i32 0)
+// CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale 
x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 8 x bfloat>, <vscale x 8 x 
bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } 
@_Z25test_svluti6_lane_bf16_x414svbfloat16x2_t11svuint8x2_t(
+// CPP-CHECK-SAME: <vscale x 8 x bfloat> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
bfloat> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], 
<vscale x 16 x i8> [[INDICES_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } 
@llvm.aarch64.sme.luti6.lane.x4.nxv8bf16(<vscale x 8 x bfloat> 
[[TABLE_COERCE0]], <vscale x 8 x bfloat> [[TABLE_COERCE1]], <vscale x 16 x i8> 
[[INDICES_COERCE0]], <vscale x 16 x i8> [[INDICES_COERCE1]], i32 0)
+// CPP-CHECK-NEXT:    ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]]
+//
+svbfloat16x4_t test_svluti6_lane_bf16_x4(svbfloat16x2_t table, svuint8x2_t 
indices)
+    __arm_streaming {
+  return svluti6_lane_bf16_x4(table, indices, 0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_zt_s8(
+// CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> 
@_Z18test_svluti6_zt_s8u11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR2:[0-9]+]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svluti6_zt_s8(svuint8_t indices) __arm_streaming __arm_in("zt0") 
{
+  return svluti6_zt_s8(0, indices);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_zt_u8(
+// CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> 
@_Z18test_svluti6_zt_u8u11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR2]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svluti6_zt_u8(svuint8_t indices) __arm_streaming 
__arm_in("zt0") {
+  return svluti6_zt_u8(0, indices);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_zt_mf8(
+// CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> 
@_Z19test_svluti6_zt_mf8u11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR2]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sme.luti6.zt(i32 0, <vscale x 16 x i8> [[INDICES]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svluti6_zt_mf8(svuint8_t indices) __arm_streaming 
__arm_in("zt0") {
+  return svluti6_zt_mf8(0, indices);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, 
<vscale x 16 x i8>, <vscale x 16 x i8> } @test_svluti6_zt_u8_x4(
+// CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> 
[[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 
x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]], 
<vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x 
i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, 
<vscale x 16 x i8>, <vscale x 16 x i8> } 
@_Z21test_svluti6_zt_u8_x411svuint8x3_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x 
i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 
16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]], 
<vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 
16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svuint8x4_t test_svluti6_zt_u8_x4(svuint8x3_t indices)
+    __arm_streaming __arm_in("zt0") {
+  return svluti6_zt_u8_x4(0, indices);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, 
<vscale x 16 x i8>, <vscale x 16 x i8> } @test_svluti6_zt_s8_x4(
+// CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> 
[[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 
x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]], 
<vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x 
i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, 
<vscale x 16 x i8>, <vscale x 16 x i8> } 
@_Z21test_svluti6_zt_s8_x411svuint8x3_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x 
i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 
16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]], 
<vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 
16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svint8x4_t test_svluti6_zt_s8_x4(svuint8x3_t indices)
+    __arm_streaming __arm_in("zt0") {
+  return svluti6_zt_s8_x4(0, indices);
+}
+
+// CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, 
<vscale x 16 x i8>, <vscale x 16 x i8> } @test_svluti6_zt_mf8_x4(
+// CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x i8> 
[[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 
x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]], 
<vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]])
+// CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x 
i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, 
<vscale x 16 x i8>, <vscale x 16 x i8> } 
@_Z22test_svluti6_zt_mf8_x411svuint8x3_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[INDICES_COERCE0:%.*]], <vscale x 16 x 
i8> [[INDICES_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES_COERCE2:%.*]]) 
local_unnamed_addr #[[ATTR2]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 
16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 
@llvm.aarch64.sme.luti6.zt.x4(i32 0, <vscale x 16 x i8> [[INDICES_COERCE0]], 
<vscale x 16 x i8> [[INDICES_COERCE1]], <vscale x 16 x i8> [[INDICES_COERCE2]])
+// CPP-CHECK-NEXT:    ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 
16 x i8>, <vscale x 16 x i8> } [[TMP0]]
+//
+svmfloat8x4_t test_svluti6_zt_mf8_x4(svuint8x3_t indices)
+    __arm_streaming __arm_in("zt0") {
+  return svluti6_zt_mf8_x4(0, indices);
+}
diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c 
b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c
new file mode 100644
index 0000000000000..a806ef0b13c20
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c
@@ -0,0 +1,112 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2p3 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2p3 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s 
--check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sve2p3 -O1 -Werror -emit-llvm -o - %s | 
FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sve2p3 -O1 -Werror -emit-llvm -o - -x c++ 
%s | FileCheck %s --check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2p3 -S -O1 -Werror -o /dev/null %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
+#else
+#define SVE_ACLE_FUNC(A1, A2) A1##A2
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_s8(
+// CHECK-SAME: <vscale x 16 x i8> [[TABLE_COERCE0:%.*]], <vscale x 16 x i8> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.luti6(<vscale x 16 x i8> [[TABLE_COERCE0]], <vscale x 16 x 
i8> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> 
@_Z15test_svluti6_s810svint8x2_tu11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[TABLE_COERCE0:%.*]], <vscale x 16 x 
i8> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.luti6(<vscale x 16 x i8> [[TABLE_COERCE0]], <vscale x 16 x 
i8> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svluti6_s8(svint8x2_t table, svuint8_t indices) {
+  return SVE_ACLE_FUNC(svluti6, _s8)(table, indices);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_u8(
+// CHECK-SAME: <vscale x 16 x i8> [[TABLE_COERCE0:%.*]], <vscale x 16 x i8> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.luti6(<vscale x 16 x i8> [[TABLE_COERCE0]], <vscale x 16 x 
i8> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> 
@_Z15test_svluti6_u811svuint8x2_tu11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[TABLE_COERCE0:%.*]], <vscale x 16 x 
i8> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.luti6(<vscale x 16 x i8> [[TABLE_COERCE0]], <vscale x 16 x 
i8> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svluti6_u8(svuint8x2_t table, svuint8_t indices) {
+  return SVE_ACLE_FUNC(svluti6, _u8)(table, indices);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svluti6_mf8(
+// CHECK-SAME: <vscale x 16 x i8> [[TABLE_COERCE0:%.*]], <vscale x 16 x i8> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.luti6(<vscale x 16 x i8> [[TABLE_COERCE0]], <vscale x 16 x 
i8> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> 
@_Z16test_svluti6_mf813svmfloat8x2_tu11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 16 x i8> [[TABLE_COERCE0:%.*]], <vscale x 16 x 
i8> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.luti6(<vscale x 16 x i8> [[TABLE_COERCE0]], <vscale x 16 x 
i8> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svluti6_mf8(svmfloat8x2_t table, svuint8_t indices) {
+  return SVE_ACLE_FUNC(svluti6, _mf8)(table, indices);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svluti6_lane_s16_x2(
+// CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.luti6.lane.x2.i16(<vscale x 8 x i16> [[TABLE_COERCE0]], 
<vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 1)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> 
@_Z24test_svluti6_lane_s16_x211svint16x2_tu11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.luti6.lane.x2.i16(<vscale x 8 x i16> [[TABLE_COERCE0]], 
<vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 1)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svluti6_lane_s16_x2(svint16x2_t table, svuint8_t indices) {
+  return SVE_ACLE_FUNC(svluti6_lane, _s16_x2)(table, indices, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svluti6_lane_u16_x2(
+// CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x i16> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.luti6.lane.x2.i16(<vscale x 8 x i16> [[TABLE_COERCE0]], 
<vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> 
@_Z24test_svluti6_lane_u16_x212svuint16x2_tu11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 8 x i16> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
i16> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.luti6.lane.x2.i16(<vscale x 8 x i16> [[TABLE_COERCE0]], 
<vscale x 8 x i16> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 0)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svluti6_lane_u16_x2(svuint16x2_t table, svuint8_t indices) {
+  return SVE_ACLE_FUNC(svluti6_lane, _u16_x2)(table, indices, 0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svluti6_lane_f16_x2(
+// CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x half> 
[[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) local_unnamed_addr 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.luti6.lane.x2.f16(<vscale x 8 x half> [[TABLE_COERCE0]], 
<vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 1)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> 
@_Z24test_svluti6_lane_f16_x213svfloat16x2_tu11__SVUint8_t(
+// CPP-CHECK-SAME: <vscale x 8 x half> [[TABLE_COERCE0:%.*]], <vscale x 8 x 
half> [[TABLE_COERCE1:%.*]], <vscale x 16 x i8> [[INDICES:%.*]]) 
local_unnamed_addr #[[ATTR0]] {
+// CPP-CHECK-NEXT:  [[ENTRY:.*:]]
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.luti6.lane.x2.f16(<vscale x 8 x half> [[TABLE_COERCE0]], 
<vscale x 8 x half> [[TABLE_COERCE1]], <vscale x 16 x i8> [[INDICES]], i32 1)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svluti6_lane_f16_x2(svfloat16x2_t table, svuint8_t indices) {
+  return SVE_ACLE_FUNC(svluti6_lane, _f16_x2)(table, indices, 1);
+}
diff --git a/clang/test/Preprocessor/aarch64-target-features.c 
b/clang/test/Preprocessor/aarch64-target-features.c
index 60ddaad639d48..6316b25befed8 100644
--- a/clang/test/Preprocessor/aarch64-target-features.c
+++ b/clang/test/Preprocessor/aarch64-target-features.c
@@ -827,9 +827,32 @@
 // CHECK-SVE2p2: __ARM_NEON_FP 0xE
 // CHECK-SVE2p2: __ARM_NEON_SVE_BRIDGE 1
 //
+// RUN: %clang -target aarch64-none-linux-gnu -march=armv9.7-a+sve2p3 -x c -E 
-dM %s -o - | FileCheck --check-prefix=CHECK-SVE2p3 %s
+// CHECK-SVE2p3: __ARM_FEATURE_FP16_SCALAR_ARITHMETIC 1
+// CHECK-SVE2p3: __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 1
+// CHECK-SVE2p3: __ARM_FEATURE_SVE 1
+// CHECK-SVE2p3: __ARM_FEATURE_SVE2 1
+// CHECK-SVE2p3: __ARM_FEATURE_SVE2p1 1
+// CHECK-SVE2p3: __ARM_FEATURE_SVE2p2 1
+// CHECK-SVE2p3: __ARM_FEATURE_SVE2p3 1
+// CHECK-SVE2p3: __ARM_NEON 1
+// CHECK-SVE2p3: __ARM_NEON_FP 0xE
+// CHECK-SVE2p3: __ARM_NEON_SVE_BRIDGE 1
+// CHECK-SVE2p3-NOT: __ARM_FEATURE_SME2p3 1
+//
 // RUN: %clang --target=aarch64 -march=armv9-a+sme2p2 -x c -E -dM %s -o - | 
FileCheck --check-prefix=CHECK-SME2p2 %s
 // CHECK-SME2p2: __ARM_FEATURE_LOCALLY_STREAMING 1
 // CHECK-SME2p2: __ARM_FEATURE_SME 1
 // CHECK-SME2p2: __ARM_FEATURE_SME2 1
 // CHECK-SME2p2: __ARM_FEATURE_SME2p1 1
 // CHECK-SME2p2: __ARM_FEATURE_SME2p2 1
+//
+// RUN: %clang --target=aarch64 -march=armv9.7-a+sme2p3 -x c -E -dM %s -o - | 
FileCheck --check-prefix=CHECK-SME2p3 %s
+// CHECK-SME2p3: __ARM_FEATURE_LOCALLY_STREAMING 1
+// CHECK-SME2p3: __ARM_FEATURE_SME 1
+// CHECK-SME2p3: __ARM_FEATURE_SME2 1
+// CHECK-SME2p3: __ARM_FEATURE_SME2p1 1
+// CHECK-SME2p3: __ARM_FEATURE_SME2p2 1
+// CHECK-SME2p3: __ARM_FEATURE_SME2p3 1
+// CHECK-SME2p3: __ARM_FEATURE_SVE2p1 1
+// CHECK-SME2p3: __ARM_FEATURE_SVE2p2 1
diff --git a/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c 
b/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c
new file mode 100644
index 0000000000000..8883ea3580fb2
--- /dev/null
+++ b/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c
@@ -0,0 +1,21 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 
-fsyntax-only -verify %s
+
+#include <arm_sme.h>
+
+void test_range_0_0(void) __arm_streaming __arm_in("zt0") {
+  svluti6_zt_s8(1, svundef_u8()); // expected-error {{argument value 1 is 
outside the valid range [0, 0]}}
+  svluti6_zt_u8_x4(1, svcreate3_u8(svundef_u8(), svundef_u8(), svundef_u8())); 
// expected-error {{argument value 1 is outside the valid range [0, 0]}}
+}
+
+void test_range_0_1(void) __arm_streaming {
+  svluti6_lane_s16_x4(svcreate2_s16(svundef_s16(), svundef_s16()), // 
expected-error {{argument value 18446744073709551615 is outside the valid range 
[0, 1]}}
+                      svcreate2_u8(svundef_u8(), svundef_u8()), -1);
+  svluti6_lane_u16_x4(svcreate2_u16(svundef_u16(), svundef_u16()), // 
expected-error {{argument value 2 is outside the valid range [0, 1]}}
+                      svcreate2_u8(svundef_u8(), svundef_u8()), 2);
+  svluti6_lane_f16_x4(svcreate2_f16(svundef_f16(), svundef_f16()), // 
expected-error {{argument value 18446744073709551615 is outside the valid range 
[0, 1]}}
+                      svcreate2_u8(svundef_u8(), svundef_u8()), -1);
+  svluti6_lane_bf16_x4(svcreate2_bf16(svundef_bf16(), svundef_bf16()), // 
expected-error {{argument value 2 is outside the valid range [0, 1]}}
+                       svcreate2_u8(svundef_u8(), svundef_u8()), 2);
+}
diff --git a/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_target.c 
b/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_target.c
new file mode 100644
index 0000000000000..2cffc1344bfe1
--- /dev/null
+++ b/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_target.c
@@ -0,0 +1,20 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +bf16 -verify -emit-llvm -o - %s
+
+#include <arm_sme.h>
+
+svint8_t missing_sme2p3_zt(svuint8_t indices) __arm_streaming __arm_in("zt0") {
+  return svluti6_zt_s8(0, indices); // expected-error {{'svluti6_zt_s8' needs 
target feature sme,sme2p3}}
+}
+
+__attribute__((target("sme2p3")))
+svint8_t has_sme2p3_zt(svuint8_t indices) __arm_streaming __arm_in("zt0") {
+  return svluti6_zt_s8(0, indices);
+}
+
+__attribute__((target("sme2p3")))
+svfloat16_t has_sme2p3_implied_sme2p2(svbool_t pg, svfloat16_t op)
+    __arm_streaming {
+  return svcompact_f16(pg, op);
+}
diff --git 
a/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_target_lane.c 
b/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_target_lane.c
new file mode 100644
index 0000000000000..1a06663a9aab7
--- /dev/null
+++ b/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_target_lane.c
@@ -0,0 +1,16 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +bf16 -verify -emit-llvm -o - %s
+
+#include <arm_sme.h>
+
+svbfloat16x4_t missing_sme2p3_lane(svbfloat16x2_t table, svuint8x2_t indices)
+    __arm_streaming {
+  return svluti6_lane_bf16_x4(table, indices, 1); // expected-error 
{{'svluti6_lane_bf16_x4' needs target feature sme,sme2p3}}
+}
+
+__attribute__((target("sme2p3,bf16")))
+svbfloat16x4_t has_sme2p3_lane(svbfloat16x2_t table, svuint8x2_t indices)
+    __arm_streaming {
+  return svluti6_lane_bf16_x4(table, indices, 0);
+}
diff --git a/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp 
b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp
new file mode 100644
index 0000000000000..8bbb0211b0bbb
--- /dev/null
+++ b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_imm.cpp
@@ -0,0 +1,24 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2p3 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sve -target-feature +sve2p3 -fsyntax-only -verify %s
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
+#else
+#define SVE_ACLE_FUNC(A1, A2) A1##A2
+#endif
+
+#include <arm_sve.h>
+
+void test_range_0_1() {
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid 
range [0, 1]}}
+  SVE_ACLE_FUNC(svluti6_lane, _s16_x2)(svcreate2_s16(svundef_s16(), 
svundef_s16()),
+                                        svundef_u8(), -1);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid 
range [0, 1]}}
+  SVE_ACLE_FUNC(svluti6_lane, _u16_x2)(svcreate2_u16(svundef_u16(), 
svundef_u16()),
+                                        svundef_u8(), 2);
+  // expected-error-re@+1 {{argument value {{[0-9]+}} is outside the valid 
range [0, 1]}}
+  SVE_ACLE_FUNC(svluti6_lane, _f16_x2)(svcreate2_f16(svundef_f16(), 
svundef_f16()),
+                                        svundef_u8(), -1);
+}
diff --git a/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_target.c 
b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_target.c
new file mode 100644
index 0000000000000..3b5596ac1d5a6
--- /dev/null
+++ b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_target.c
@@ -0,0 +1,19 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -verify 
-emit-llvm -o - %s
+
+#include <arm_sve.h>
+
+void missing_sve2p3_luti6(svint8x2_t table, svuint8_t indices) {
+  svluti6_s8(table, indices); // expected-error {{'svluti6_s8' needs target 
feature sve,sve2p3}}
+}
+
+__attribute__((target("sve2p3")))
+svint8_t has_sve2p3_luti6(svint8x2_t table, svuint8_t indices) {
+  return svluti6_s8(table, indices);
+}
+
+__attribute__((target("sve2p3")))
+svfloat32_t has_sve2p3_implied_sve2p2(svbool_t pg, svfloat16_t op) {
+  return svcvtlt_f32_f16_z(pg, op);
+}
diff --git 
a/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_target_lane.c 
b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_target_lane.c
new file mode 100644
index 0000000000000..6a2465f4027fc
--- /dev/null
+++ b/clang/test/Sema/aarch64-sve2p3-intrinsics/acle_sve2p3_target_lane.c
@@ -0,0 +1,14 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -verify 
-emit-llvm -o - %s
+
+#include <arm_sve.h>
+
+svfloat16_t missing_sve2p3_luti6_lane(svfloat16x2_t table, svuint8_t indices) {
+  return svluti6_lane_f16_x2(table, indices, 1); // expected-error 
{{'svluti6_lane_f16_x2' needs target feature (sve,sve2p3)|(sme,sme2p3)}}
+}
+
+__attribute__((target("sve2p3")))
+svfloat16_t has_sve2p3_luti6_lane(svfloat16x2_t table, svuint8_t indices) {
+  return svluti6_lane_f16_x2(table, indices, 0);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td 
b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 75929cbc222ad..325c606149bfc 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1051,6 +1051,7 @@ def llvm_nxv4i1_ty  : LLVMType<nxv4i1>;
 def llvm_nxv8i1_ty  : LLVMType<nxv8i1>;
 def llvm_nxv16i1_ty : LLVMType<nxv16i1>;
 def llvm_nxv16i8_ty : LLVMType<nxv16i8>;
+def llvm_nxv8i16_ty : LLVMType<nxv8i16>;
 def llvm_nxv4i32_ty : LLVMType<nxv4i32>;
 def llvm_nxv2i64_ty : LLVMType<nxv2i64>;
 def llvm_nxv8f16_ty : LLVMType<nxv8f16>;
@@ -2797,12 +2798,31 @@ def int_aarch64_sve_tbx  : 
AdvSIMD_SVE2_TBX_Intrinsic<[IntrSpeculatable]>;
 
 def int_aarch64_sve_luti2_lane : SVE2_LUTI_Inrinsic<[IntrSpeculatable]>;
 def int_aarch64_sve_luti4_lane : SVE2_LUTI_Inrinsic<[IntrSpeculatable]>;
+def int_aarch64_sve_luti6 : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
+                                  [llvm_nxv16i8_ty,
+                                   llvm_nxv16i8_ty,
+                                   llvm_nxv16i8_ty],
+                                  [IntrNoMem, IntrSpeculatable]>;
 def int_aarch64_sve_luti4_lane_x2 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                                     [LLVMMatchType<0>,
                                     LLVMMatchType<0>,
                                     llvm_nxv16i8_ty,
                                     llvm_i32_ty],
                                     [IntrNoMem, ImmArg<ArgIndex<3>>, 
IntrSpeculatable]>;
+def int_aarch64_sve_luti6_lane_x2_i16
+    : DefaultAttrsIntrinsic<[llvm_nxv8i16_ty],
+                            [llvm_nxv8i16_ty,
+                             llvm_nxv8i16_ty,
+                             llvm_nxv16i8_ty,
+                             llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>, 
IntrSpeculatable]>;
+def int_aarch64_sve_luti6_lane_x2_f16
+    : DefaultAttrsIntrinsic<[llvm_nxv8f16_ty],
+                            [llvm_nxv8f16_ty,
+                             llvm_nxv8f16_ty,
+                             llvm_nxv16i8_ty,
+                             llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>, 
IntrSpeculatable]>;
 
 //
 // SVE2 - Optional bit permutation
@@ -3957,6 +3977,9 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sme_luti4_lane_zt
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, 
llvm_nxv16i8_ty, llvm_i32_ty],
                             [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, 
IntrInaccessibleMemOnly, IntrReadMem]>;
+  def int_aarch64_sme_luti6_zt
+    : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_i32_ty, llvm_nxv16i8_ty],
+                            [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, 
IntrReadMem]>;
 
   // Lookup table expand two registers
   //
@@ -3978,11 +4001,20 @@ let TargetPrefix = "aarch64" in {
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, 
LLVMMatchType<0>, LLVMMatchType<0>],
                             [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty],
                             [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, 
IntrInaccessibleMemOnly, IntrReadMem]>;
+  def int_aarch64_sme_luti6_lane_x4
+    : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, 
LLVMMatchType<0>, LLVMMatchType<0>],
+                            [LLVMMatchType<0>, LLVMMatchType<0>, 
llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty],
+                            [ImmArg<ArgIndex<4>>, IntrNoMem, 
IntrSpeculatable]>;
 
   def int_aarch64_sme_luti4_zt_x4
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, 
LLVMMatchType<0>, LLVMMatchType<0>],
                             [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty],
                             [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, 
IntrReadMem]>;
+  def int_aarch64_sme_luti6_zt_x4
+    : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty,
+                             llvm_nxv16i8_ty, llvm_nxv16i8_ty],
+                            [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, 
llvm_nxv16i8_ty],
+                            [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, 
IntrReadMem]>;
 
 
   //
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 1b706411791e9..79061be0a525f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -414,8 +414,12 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
 
   void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
                                  unsigned Opc, uint32_t MaxImm);
+  void SelectMultiVectorLutiLaneTuple(SDNode *Node, unsigned NumOutVecs,
+                                      unsigned Opc, uint32_t MaxImm);
 
   void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
+  void SelectMultiVectorLutiZT(SDNode *Node, unsigned NumOutVecs, unsigned Opc,
+                               unsigned NumInVecs);
 
   template <unsigned MaxIdx, unsigned Scale>
   bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
@@ -2242,6 +2246,51 @@ void 
AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
   CurDAG->RemoveDeadNode(Node);
 }
 
+void AArch64DAGToDAGISel::SelectMultiVectorLutiLaneTuple(SDNode *Node,
+                                                         unsigned NumOutVecs,
+                                                         unsigned Opc,
+                                                         uint32_t MaxImm) {
+  const bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN;
+  const unsigned BaseOp = HasChain ? 1 : 0;
+  const unsigned t0 = BaseOp + 1;
+  const unsigned t1 = BaseOp + 2;
+  const unsigned i0 = BaseOp + 3;
+  const unsigned i1 = BaseOp + 4;
+  const unsigned ImmOp = BaseOp + 5;
+
+  SDValue ImmVal = Node->getOperand(ImmOp);
+  if (auto *Imm = dyn_cast<ConstantSDNode>(ImmVal))
+    if (Imm->getZExtValue() > MaxImm)
+      return;
+
+  SDLoc DL(Node);
+  EVT VT = Node->getValueType(0);
+  SmallVector<SDValue, 4> Ops = {
+      createZTuple({Node->getOperand(t0), Node->getOperand(t1)}),
+      createZTuple({Node->getOperand(i0), Node->getOperand(i1)}),
+      Node->getOperand(ImmOp),
+  };
+
+  SDNode *Instruction;
+  if (HasChain) {
+    Ops.push_back(Node->getOperand(0));
+    Instruction =
+        CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
+  } else {
+    Instruction = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
+  }
+  SDValue SuperReg(Instruction, 0);
+
+  for (unsigned i = 0; i < NumOutVecs; ++i)
+    ReplaceUses(SDValue(Node, i), CurDAG->getTargetExtractSubreg(
+                                      AArch64::zsub0 + i, DL, VT, SuperReg));
+
+  if (HasChain)
+    ReplaceUses(SDValue(Node, NumOutVecs), SDValue(Instruction, 1));
+
+  CurDAG->RemoveDeadNode(Node);
+}
+
 void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
                                                 unsigned NumOutVecs,
                                                 unsigned Opc) {
@@ -2271,6 +2320,50 @@ void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode 
*Node,
   CurDAG->RemoveDeadNode(Node);
 }
 
+void AArch64DAGToDAGISel::SelectMultiVectorLutiZT(SDNode *Node,
+                                                  unsigned NumOutVecs,
+                                                  unsigned Opc,
+                                                  unsigned NumInVecs) {
+  const unsigned ChainOp = 0;
+  const unsigned ZtOp = 2;
+  const unsigned FirstVecOp = 3;
+
+  SDValue ZtValue;
+  if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(ZtOp), ZtValue))
+    return;
+
+  SDValue ZTuple;
+  switch (NumInVecs) {
+  case 2:
+    ZTuple = createZMulTuple(
+        {Node->getOperand(FirstVecOp), Node->getOperand(FirstVecOp + 1)});
+    break;
+  case 3:
+    ZTuple = createZTuple({Node->getOperand(FirstVecOp),
+                           Node->getOperand(FirstVecOp + 1),
+                           Node->getOperand(FirstVecOp + 2)});
+    break;
+  default:
+    llvm_unreachable("unexpected LUTI ZT tuple width");
+  }
+
+  SDValue Ops[] = {ZtValue, ZTuple, Node->getOperand(ChainOp)};
+
+  SDLoc DL(Node);
+  EVT VT = Node->getValueType(0);
+
+  SDNode *Instruction =
+      CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
+  SDValue SuperReg(Instruction, 0);
+
+  for (unsigned i = 0; i < NumOutVecs; ++i)
+    ReplaceUses(SDValue(Node, i), CurDAG->getTargetExtractSubreg(
+                                      AArch64::zsub0 + i, DL, VT, SuperReg));
+
+  ReplaceUses(SDValue(Node, NumOutVecs), SDValue(Instruction, 1));
+  CurDAG->RemoveDeadNode(Node);
+}
+
 void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
                                       unsigned Op) {
   SDLoc DL(N);
@@ -5903,6 +5996,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
       SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
       return;
     }
+    case Intrinsic::aarch64_sme_luti6_zt_x4: {
+      SelectMultiVectorLutiZT(Node, 4, AArch64::LUTI6_4ZT3Z, 3);
+      return;
+    }
     case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
       if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
               Node->getValueType(0),
@@ -5993,6 +6090,11 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
                AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
       return;
+    case Intrinsic::aarch64_sme_luti6_lane_x4:
+      if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
+              Node->getValueType(0), {0, AArch64::LUTI6_4Z2Z2ZI, 0}))
+        SelectMultiVectorLutiLaneTuple(Node, 4, Opc, 1);
+      return;
     case Intrinsic::aarch64_sve_urshl_single_x2:
       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
               Node->getValueType(0),
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 44968b14b11a9..34915a073c9aa 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -229,7 +229,7 @@ def HasF16MM         : 
Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasF
                                  AssemblerPredicateWithAll<(all_of 
FeatureF16MM), "f16mm">;
 def HasSVE2p3        : Predicate<"Subtarget->hasSVE2p3()">,
                                  AssemblerPredicateWithAll<(all_of 
FeatureSVE2p3), "sve2p3">;
-def HasSME2p3        : Predicate<"Subtarget->hasSME2p3()">,
+def HasSME2p3        : Predicate<"Subtarget->isStreaming() && 
Subtarget->hasSME2p3()">,
                                  AssemblerPredicateWithAll<(all_of 
FeatureSME2p3), "sme2p3">;
 def HasF16F32DOT     : Predicate<"Subtarget->hasF16F32DOT()">,
                                  AssemblerPredicateWithAll<(all_of 
FeatureF16F32DOT), "f16f32dot">;
@@ -313,6 +313,14 @@ def HasNonStreamingSVE2p2_or_SME2p2
                 "(Subtarget->isSVEorStreamingSVEAvailable() && 
Subtarget->hasSME2p2())">,
                 AssemblerPredicateWithAll<(any_of FeatureSVE2p2, 
FeatureSME2p2),
                 "sme2p2 or sve2p2">;
+def HasNonStreamingSVE2p3
+    : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2p3()">,
+                AssemblerPredicateWithAll<(all_of FeatureSVE2p3), "sve2p3">;
+def HasNonStreamingSVE2p3_or_SME2p3
+    : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2p3()) ||"
+                "(Subtarget->isStreaming() && Subtarget->hasSME2p3())">,
+                AssemblerPredicateWithAll<(any_of FeatureSVE2p3, 
FeatureSME2p3),
+                "sme2p3 or sve2p3">;
 
 def HasSMEF16F16_or_SMEF8F16
     : Predicate<"Subtarget->isStreaming() && (Subtarget->hasSMEF16F16() || 
Subtarget->hasSMEF8F16())">,
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 905eed50dee9a..c5d1fedcbc9ae 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -1186,6 +1186,9 @@ let Predicates = [HasSME_MOP4, HasSMEF64F64] in {
 
//===----------------------------------------------------------------------===//
 let Predicates = [HasSME2p3] in {
   def LUTI6_ZTZ       : sme2_lut_single<"luti6">;
+  def : Pat<(nxv16i8 (int_aarch64_sme_luti6_zt (imm_to_zt untyped:$zt),
+                      nxv16i8:$zn)),
+            (LUTI6_ZTZ $zt, nxv16i8:$zn)>;
   def LUTI6_4ZT3Z     : sme2_luti6_zt_consecutive<"luti6">;
   def LUTI6_S_4ZT3Z   : sme2_luti6_zt_strided<"luti6">;
   def LUTI6_4Z2Z2ZI   : sme2_luti6_vector_vg4_consecutive<"luti6">;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index c5a3bd504adf9..248b11332e2a4 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4837,14 +4837,22 @@ let Predicates = [HasSVE2p3_or_SME2p3] in {
   defm SQSHRN_Z2ZI_StoH   : sve_multi_vec_shift_narrow<"sqshrn",   0b000, 
null_frag>;
   defm UQSHRN_Z2ZI_StoH   : sve_multi_vec_shift_narrow<"uqshrn",   0b010, 
null_frag>;
 
-  defm LUTI6_Z2ZZI : sve2_luti6_vector_index<"luti6">;
 } // End HasSME2p3orSVE2p3
 
+let Predicates = [HasNonStreamingSVE2p3_or_SME2p3] in {
+  defm LUTI6_Z2ZZI : sve2_luti6_vector_index<"luti6">;
+}
+
 
//===----------------------------------------------------------------------===//
 // SVE2.3 instructions
 
//===----------------------------------------------------------------------===//
-let Predicates = [HasSVE2p3] in {
+let Predicates = [HasNonStreamingSVE2p3] in {
   def LUTI6_Z2ZZ : sve2_luti6_vector<"luti6">;
+  def : Pat<(nxv16i8 (int_aarch64_sve_luti6 nxv16i8:$Op1, nxv16i8:$Op2,
+                      nxv16i8:$Op3)),
+            (LUTI6_Z2ZZ (REG_SEQUENCE ZPR2, nxv16i8:$Op1, zsub0,
+                                           nxv16i8:$Op2, zsub1),
+                         nxv16i8:$Op3)>;
 }
 
 
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td 
b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 8a3f52090ab4c..ddec0a21b0ccb 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -11358,6 +11358,17 @@ multiclass sve2_luti6_vector_index<string mnemonic> {
     bit idx;
     let Inst{23} = idx;
   }
+
+  def : Pat<(nxv8i16 (int_aarch64_sve_luti6_lane_x2_i16 nxv8i16:$Op1, 
nxv8i16:$Op2,
+                      nxv16i8:$Op3, (i32 timm32_0_1:$Op4))),
+            (nxv8i16 (!cast<Instruction>(NAME # _H) (REG_SEQUENCE ZPR2, 
nxv8i16:$Op1, zsub0,
+                                                                      
nxv8i16:$Op2, zsub1),
+                                                nxv16i8:$Op3, 
timm32_0_1:$Op4))>;
+  def : Pat<(nxv8f16 (int_aarch64_sve_luti6_lane_x2_f16 nxv8f16:$Op1, 
nxv8f16:$Op2,
+                      nxv16i8:$Op3, (i32 timm32_0_1:$Op4))),
+            (nxv8f16 (!cast<Instruction>(NAME # _H) (REG_SEQUENCE ZPR2, 
nxv8f16:$Op1, zsub0,
+                                                                      
nxv8f16:$Op2, zsub1),
+                                                nxv16i8:$Op3, 
timm32_0_1:$Op4))>;
 }
 
 // Look up table
diff --git a/llvm/test/CodeGen/AArch64/sme2p3-intrinsics-luti6.ll 
b/llvm/test/CodeGen/AArch64/sme2p3-intrinsics-luti6.ll
new file mode 100644
index 0000000000000..07fb62baa58cd
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme2p3-intrinsics-luti6.ll
@@ -0,0 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs -force-streaming 
-mtriple=aarch64-none-linux-gnu -mattr=+sme2p3 < %s | FileCheck %s
+
+target triple = "aarch64-none-linux-gnu"
+
+define <vscale x 16 x i8> @luti6_zt_i8(<vscale x 16 x i8> %x) #0 {
+; CHECK-LABEL: luti6_zt_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    luti6 z0.b, zt0, z0
+; CHECK-NEXT:    ret
+  %res = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti6.zt(
+      i32 0, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %res
+}
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>,
+         <vscale x 16 x i8> } @luti6_zt_i8_x4(<vscale x 16 x i8> %a,
+                                              <vscale x 16 x i8> %b,
+                                              <vscale x 16 x i8> %c) #0 {
+; CHECK-LABEL: luti6_zt_i8_x4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    luti6 { z0.b - z3.b }, zt0, { z0 - z2 }
+; CHECK-NEXT:    ret
+  %res = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>,
+                     <vscale x 16 x i8>, <vscale x 16 x i8> }
+      @llvm.aarch64.sme.luti6.zt.x4(
+          i32 0, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b,
+          <vscale x 16 x i8> %c)
+  ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>,
+        <vscale x 16 x i8> } %res
+}
+
+define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
+         <vscale x 8 x i16> } @luti6_i16_x4(<vscale x 8 x i16> %a,
+                                            <vscale x 8 x i16> %b,
+                                            <vscale x 16 x i8> %x,
+                                            <vscale x 16 x i8> %y) #0 {
+; CHECK-LABEL: luti6_i16_x4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    luti6 { z0.h - z3.h }, { z0.h, z1.h }, { z2, z3 }[1]
+; CHECK-NEXT:    ret
+  %res = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>,
+                     <vscale x 8 x i16>, <vscale x 8 x i16> }
+      @llvm.aarch64.sme.luti6.lane.x4.nxv8i16(
+          <vscale x 8 x i16> %a, <vscale x 8 x i16> %b,
+          <vscale x 16 x i8> %x, <vscale x 16 x i8> %y, i32 1)
+  ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
+        <vscale x 8 x i16> } %res
+}
+
+define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>,
+         <vscale x 8 x bfloat> } @luti6_bf16_x4(<vscale x 8 x bfloat> %a,
+                                                <vscale x 8 x bfloat> %b,
+                                                <vscale x 16 x i8> %x,
+                                                <vscale x 16 x i8> %y) #0 {
+; CHECK-LABEL: luti6_bf16_x4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    luti6 { z0.h - z3.h }, { z0.h, z1.h }, { z2, z3 }[0]
+; CHECK-NEXT:    ret
+  %res = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>,
+                     <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }
+      @llvm.aarch64.sme.luti6.lane.x4.nxv8bf16(
+          <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b,
+          <vscale x 16 x i8> %x, <vscale x 16 x i8> %y, i32 0)
+  ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>,
+        <vscale x 8 x bfloat> } %res
+}
+
+define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>,
+         <vscale x 8 x half> } @luti6_f16_x4(<vscale x 8 x half> %a,
+                                             <vscale x 8 x half> %b,
+                                             <vscale x 16 x i8> %x,
+                                             <vscale x 16 x i8> %y) #0 {
+; CHECK-LABEL: luti6_f16_x4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    luti6 { z0.h - z3.h }, { z0.h, z1.h }, { z2, z3 }[1]
+; CHECK-NEXT:    ret
+  %res = tail call { <vscale x 8 x half>, <vscale x 8 x half>,
+                     <vscale x 8 x half>, <vscale x 8 x half> }
+      @llvm.aarch64.sme.luti6.lane.x4.nxv8f16(
+          <vscale x 8 x half> %a, <vscale x 8 x half> %b,
+          <vscale x 16 x i8> %x, <vscale x 16 x i8> %y, i32 1)
+  ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>,
+        <vscale x 8 x half> } %res
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sme.luti6.zt(
+    i32, <vscale x 16 x i8>)
+declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>,
+          <vscale x 16 x i8> } @llvm.aarch64.sme.luti6.zt.x4(
+    i32, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
+          <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.lane.x4.nxv8i16(
+    <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 16 x i8>,
+    <vscale x 16 x i8>, i32)
+declare { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>,
+          <vscale x 8 x bfloat> } @llvm.aarch64.sme.luti6.lane.x4.nxv8bf16(
+    <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 16 x i8>,
+    <vscale x 16 x i8>, i32)
+declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>,
+          <vscale x 8 x half> } @llvm.aarch64.sme.luti6.lane.x4.nxv8f16(
+    <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 16 x i8>,
+    <vscale x 16 x i8>, i32)
+
+attributes #0 = { "target-features"="+sme2p3" }
diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-luti6.ll 
b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-luti6.ll
new file mode 100644
index 0000000000000..ab89e87df66d2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-luti6.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s
+
+target triple = "aarch64-none-linux-gnu"
+
+define <vscale x 16 x i8> @luti6_i8(<vscale x 16 x i8> %a,
+; CHECK-LABEL: luti6_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    luti6 z0.b, { z0.b, z1.b }, z2
+; CHECK-NEXT:    ret
+                                    <vscale x 16 x i8> %b,
+                                    <vscale x 16 x i8> %idx) {
+  %res = tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti6(
+      <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %idx)
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @luti6_i16_x2(<vscale x 8 x i16> %a,
+; CHECK-LABEL: luti6_i16_x2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    luti6 z0.h, { z0.h, z1.h }, z2[1]
+; CHECK-NEXT:    ret
+                                        <vscale x 8 x i16> %b,
+                                        <vscale x 16 x i8> %idx) {
+  %res = tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti6.lane.x2.i16(
+      <vscale x 8 x i16> %a, <vscale x 8 x i16> %b,
+      <vscale x 16 x i8> %idx, i32 1)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 8 x half> @luti6_f16_x2(<vscale x 8 x half> %a,
+; CHECK-LABEL: luti6_f16_x2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    luti6 z0.h, { z0.h, z1.h }, z2[0]
+; CHECK-NEXT:    ret
+                                         <vscale x 8 x half> %b,
+                                         <vscale x 16 x i8> %idx) {
+  %res = tail call <vscale x 8 x half> @llvm.aarch64.sve.luti6.lane.x2.f16(
+      <vscale x 8 x half> %a, <vscale x 8 x half> %b,
+      <vscale x 16 x i8> %idx, i32 0)
+  ret <vscale x 8 x half> %res
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.luti6(
+    <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.luti6.lane.x2.i16(
+    <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 16 x i8>, i32)
+declare <vscale x 8 x half> @llvm.aarch64.sve.luti6.lane.x2.f16(
+    <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 16 x i8>, i32)
diff --git a/llvm/test/Verifier/AArch64/luti6-intrinsics.ll 
b/llvm/test/Verifier/AArch64/luti6-intrinsics.ll
new file mode 100644
index 0000000000000..0777c1db532b1
--- /dev/null
+++ b/llvm/test/Verifier/AArch64/luti6-intrinsics.ll
@@ -0,0 +1,79 @@
+; RUN: not opt -S -passes=verify < %s 2>&1 | FileCheck %s
+
+define <vscale x 8 x i16> @bad_sve_luti6_ret(<vscale x 16 x i8> %a,
+                                             <vscale x 16 x i8> %b,
+                                             <vscale x 16 x i8> %idx) {
+; CHECK: Intrinsic has incorrect return type!
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.luti6(
+      <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %idx)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 8 x i16> @bad_sve_luti6_lane_x2_arg(<vscale x 4 x i32> %a,
+                                                     <vscale x 8 x i16> %b,
+                                                     <vscale x 16 x i8> %idx) {
+; CHECK: Intrinsic has incorrect argument type!
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.luti6.lane.x2.i16(
+      <vscale x 4 x i32> %a, <vscale x 8 x i16> %b,
+      <vscale x 16 x i8> %idx, i32 1)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 8 x half> @bad_sve_luti6_lane_x2_f16_arg(
+    <vscale x 8 x i16> %a, <vscale x 8 x half> %b, <vscale x 16 x i8> %idx) {
+; CHECK: Intrinsic has incorrect argument type!
+  %res = call <vscale x 8 x half> @llvm.aarch64.sve.luti6.lane.x2.f16(
+      <vscale x 8 x i16> %a, <vscale x 8 x half> %b,
+      <vscale x 16 x i8> %idx, i32 1)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 8 x i16> @bad_sme_luti6_zt_ret(i32 %zt,
+                                                <vscale x 16 x i8> %idx) {
+; CHECK: Intrinsic has incorrect return type!
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sme.luti6.zt(
+      i32 %zt, <vscale x 16 x i8> %idx)
+  ret <vscale x 8 x i16> %res
+}
+
+define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
+         <vscale x 8 x i16> } @bad_sme_luti6_zt_x4_ret(i32 %zt,
+                                                       <vscale x 16 x i8> %a,
+                                                       <vscale x 16 x i8> %b,
+                                                       <vscale x 16 x i8> %c) {
+; CHECK: Intrinsic has incorrect return type!
+  %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
+                <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.zt.x4(
+      i32 %zt, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b,
+      <vscale x 16 x i8> %c)
+  ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
+        <vscale x 8 x i16> } %res
+}
+
+define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
+         <vscale x 8 x i16> } @bad_sme_luti6_lane_x4_arg(
+             <vscale x 8 x half> %a, <vscale x 8 x i16> %b,
+             <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
+; CHECK: Intrinsic has incorrect argument type!
+  %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
+                <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.lane.x4.nxv8i16(
+      <vscale x 8 x half> %a, <vscale x 8 x i16> %b,
+      <vscale x 16 x i8> %x, <vscale x 16 x i8> %y, i32 1)
+  ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
+        <vscale x 8 x i16> } %res
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.luti6(
+    <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.luti6.lane.x2.i16(
+    <vscale x 4 x i32>, <vscale x 8 x i16>, <vscale x 16 x i8>, i32)
+declare <vscale x 8 x half> @llvm.aarch64.sve.luti6.lane.x2.f16(
+    <vscale x 8 x i16>, <vscale x 8 x half>, <vscale x 16 x i8>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sme.luti6.zt(i32, <vscale x 16 x i8>)
+declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
+          <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.zt.x4(
+    i32, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
+          <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.lane.x4.nxv8i16(
+    <vscale x 8 x half>, <vscale x 8 x i16>, <vscale x 16 x i8>,
+    <vscale x 16 x i8>, i32)

>From a31514483fe75b42bbf5677c443a678ee55642c8 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <[email protected]>
Date: Thu, 19 Mar 2026 16:39:11 +0000
Subject: [PATCH 2/2] fixup! Address PR comments

---
 clang/lib/Basic/Targets/AArch64.cpp           | 29 ------------------
 clang/lib/Basic/Targets/AArch64.h             |  2 --
 .../Preprocessor/aarch64-target-features.c    | 23 --------------
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td |  5 +---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  7 +----
 llvm/lib/Target/AArch64/SMEInstrFormats.td    | 22 ++++++++------
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 30 +++++++++++--------
 7 files changed, 33 insertions(+), 85 deletions(-)

diff --git a/clang/lib/Basic/Targets/AArch64.cpp 
b/clang/lib/Basic/Targets/AArch64.cpp
index 9d4085e5ebbf4..f7ed15be75cd8 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -495,9 +495,6 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions 
&Opts,
   if (HasSVE2p1)
     Builder.defineMacro("__ARM_FEATURE_SVE2p1", "1");
 
-  if (HasSVE2p3)
-    Builder.defineMacro("__ARM_FEATURE_SVE2p3", "1");
-
   if (HasSVE2 && HasSVEAES)
     Builder.defineMacro("__ARM_FEATURE_SVE2_AES", "1");
 
@@ -524,9 +521,6 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions 
&Opts,
   if (HasSME2p1)
     Builder.defineMacro("__ARM_FEATURE_SME2p1", "1");
 
-  if (HasSME2p3)
-    Builder.defineMacro("__ARM_FEATURE_SME2p3", "1");
-
   if (HasSMEF16F16)
     Builder.defineMacro("__ARM_FEATURE_SME_F16F16", "1");
 
@@ -906,11 +900,9 @@ void AArch64TargetInfo::computeFeatureLookup() {
       .Case("sve2-sha3", FPU & SveMode && HasSVE2SHA3)
       .Case("sve2-sm4", FPU & SveMode && HasSVE2SM4)
       .Case("sve2p1", FPU & SveMode && HasSVE2p1)
-      .Case("sve2p3", FPU & SveMode && HasSVE2p3)
       .Case("sme", HasSME)
       .Case("sme2", HasSME2)
       .Case("sme2p1", HasSME2p1)
-      .Case("sme2p3", HasSME2p3)
       .Case("sme-f64f64", HasSMEF64F64)
       .Case("sme-i16i64", HasSMEI16I64)
       .Case("sme-fa64", HasSMEFA64)
@@ -1016,15 +1008,6 @@ bool 
AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasSVE2 = true;
       HasSVE2p1 = true;
     }
-    if (Feature == "+sve2p3") {
-      FPU |= NeonMode;
-      FPU |= SveMode;
-      HasFullFP16 = true;
-      HasSVE2 = true;
-      HasSVE2p1 = true;
-      HasSVE2p2 = true;
-      HasSVE2p3 = true;
-    }
     if (Feature == "+sve-aes") {
       FPU |= NeonMode;
       HasFullFP16 = true;
@@ -1081,18 +1064,6 @@ bool 
AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasBFloat16 = true;
       HasFullFP16 = true;
     }
-    if (Feature == "+sme2p3") {
-      HasSME = true;
-      HasSME2 = true;
-      HasSVE2 = true;
-      HasSVE2p1 = true;
-      HasSVE2p2 = true;
-      HasSME2p1 = true;
-      HasSME2p2 = true;
-      HasSME2p3 = true;
-      HasBFloat16 = true;
-      HasFullFP16 = true;
-    }
     if (Feature == "+sme-f64f64") {
       HasSME = true;
       HasSMEF64F64 = true;
diff --git a/clang/lib/Basic/Targets/AArch64.h 
b/clang/lib/Basic/Targets/AArch64.h
index 4191070a4786d..f9dffed8769ef 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -85,7 +85,6 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public 
TargetInfo {
   bool HasBFloat16 = false;
   bool HasSVE2 = false;
   bool HasSVE2p1 = false;
-  bool HasSVE2p3 = false;
   bool HasSVEAES = false;
   bool HasSVE2SHA3 = false;
   bool HasSVE2SM4 = false;
@@ -111,7 +110,6 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public 
TargetInfo {
   bool HasSMEF16F16 = false;
   bool HasSMEB16B16 = false;
   bool HasSME2p1 = false;
-  bool HasSME2p3 = false;
   bool HasFP8 = false;
   bool HasFP8FMA = false;
   bool HasFP8DOT2 = false;
diff --git a/clang/test/Preprocessor/aarch64-target-features.c 
b/clang/test/Preprocessor/aarch64-target-features.c
index 6316b25befed8..60ddaad639d48 100644
--- a/clang/test/Preprocessor/aarch64-target-features.c
+++ b/clang/test/Preprocessor/aarch64-target-features.c
@@ -827,32 +827,9 @@
 // CHECK-SVE2p2: __ARM_NEON_FP 0xE
 // CHECK-SVE2p2: __ARM_NEON_SVE_BRIDGE 1
 //
-// RUN: %clang -target aarch64-none-linux-gnu -march=armv9.7-a+sve2p3 -x c -E 
-dM %s -o - | FileCheck --check-prefix=CHECK-SVE2p3 %s
-// CHECK-SVE2p3: __ARM_FEATURE_FP16_SCALAR_ARITHMETIC 1
-// CHECK-SVE2p3: __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 1
-// CHECK-SVE2p3: __ARM_FEATURE_SVE 1
-// CHECK-SVE2p3: __ARM_FEATURE_SVE2 1
-// CHECK-SVE2p3: __ARM_FEATURE_SVE2p1 1
-// CHECK-SVE2p3: __ARM_FEATURE_SVE2p2 1
-// CHECK-SVE2p3: __ARM_FEATURE_SVE2p3 1
-// CHECK-SVE2p3: __ARM_NEON 1
-// CHECK-SVE2p3: __ARM_NEON_FP 0xE
-// CHECK-SVE2p3: __ARM_NEON_SVE_BRIDGE 1
-// CHECK-SVE2p3-NOT: __ARM_FEATURE_SME2p3 1
-//
 // RUN: %clang --target=aarch64 -march=armv9-a+sme2p2 -x c -E -dM %s -o - | 
FileCheck --check-prefix=CHECK-SME2p2 %s
 // CHECK-SME2p2: __ARM_FEATURE_LOCALLY_STREAMING 1
 // CHECK-SME2p2: __ARM_FEATURE_SME 1
 // CHECK-SME2p2: __ARM_FEATURE_SME2 1
 // CHECK-SME2p2: __ARM_FEATURE_SME2p1 1
 // CHECK-SME2p2: __ARM_FEATURE_SME2p2 1
-//
-// RUN: %clang --target=aarch64 -march=armv9.7-a+sme2p3 -x c -E -dM %s -o - | 
FileCheck --check-prefix=CHECK-SME2p3 %s
-// CHECK-SME2p3: __ARM_FEATURE_LOCALLY_STREAMING 1
-// CHECK-SME2p3: __ARM_FEATURE_SME 1
-// CHECK-SME2p3: __ARM_FEATURE_SME2 1
-// CHECK-SME2p3: __ARM_FEATURE_SME2p1 1
-// CHECK-SME2p3: __ARM_FEATURE_SME2p2 1
-// CHECK-SME2p3: __ARM_FEATURE_SME2p3 1
-// CHECK-SME2p3: __ARM_FEATURE_SVE2p1 1
-// CHECK-SME2p3: __ARM_FEATURE_SVE2p2 1
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index c5d1fedcbc9ae..35a157aeb07cb 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -1185,10 +1185,7 @@ let Predicates = [HasSME_MOP4, HasSMEF64F64] in {
 // SME2.3 instructions
 
//===----------------------------------------------------------------------===//
 let Predicates = [HasSME2p3] in {
-  def LUTI6_ZTZ       : sme2_lut_single<"luti6">;
-  def : Pat<(nxv16i8 (int_aarch64_sme_luti6_zt (imm_to_zt untyped:$zt),
-                      nxv16i8:$zn)),
-            (LUTI6_ZTZ $zt, nxv16i8:$zn)>;
+  defm LUTI6_ZTZ      : sme2_lut_single<"luti6", int_aarch64_sme_luti6_zt>;
   def LUTI6_4ZT3Z     : sme2_luti6_zt_consecutive<"luti6">;
   def LUTI6_S_4ZT3Z   : sme2_luti6_zt_strided<"luti6">;
   def LUTI6_4Z2Z2ZI   : sme2_luti6_vector_vg4_consecutive<"luti6">;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 248b11332e2a4..d664e66477f92 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4847,12 +4847,7 @@ let Predicates = [HasNonStreamingSVE2p3_or_SME2p3] in {
 // SVE2.3 instructions
 
//===----------------------------------------------------------------------===//
 let Predicates = [HasNonStreamingSVE2p3] in {
-  def LUTI6_Z2ZZ : sve2_luti6_vector<"luti6">;
-  def : Pat<(nxv16i8 (int_aarch64_sve_luti6 nxv16i8:$Op1, nxv16i8:$Op2,
-                      nxv16i8:$Op3)),
-            (LUTI6_Z2ZZ (REG_SEQUENCE ZPR2, nxv16i8:$Op1, zsub0,
-                                           nxv16i8:$Op2, zsub1),
-                         nxv16i8:$Op3)>;
+  defm LUTI6_Z2ZZ : sve2_luti6_vector<"luti6", int_aarch64_sve_luti6>;
 }
 
 
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td 
b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 99836aeed7c0a..e17b1b2e6c7a5 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -3921,15 +3921,19 @@ multiclass sme2_luti4_vector_vg4_index<string mnemonic> 
{
 }
 
 // 8-bit Look up table
-class sme2_lut_single<string asm>
-  : I<(outs ZPR8:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn),
-    asm, "\t$Zd, $ZTt, $Zn", "", []>, Sched<[]> {
-  bits<0> ZTt;
-  bits<5> Zd;
-  bits<5> Zn;
-  let Inst{31-10} = 0b1100000011001000010000;
-  let Inst{9-5}   = Zn;
-  let Inst{4-0}   = Zd;
+multiclass sme2_lut_single<string asm, SDPatternOperator intrinsic> {
+  def NAME : I<(outs ZPR8:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn),
+                asm, "\t$Zd, $ZTt, $Zn", "", []>, Sched<[]> {
+    bits<0> ZTt;
+    bits<5> Zd;
+    bits<5> Zn;
+    let Inst{31-10} = 0b1100000011001000010000;
+    let Inst{9-5}   = Zn;
+    let Inst{4-0}   = Zd;
+  }
+
+  def : Pat<(nxv16i8 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn)),
+            (!cast<Instruction>(NAME) $zt, nxv16i8:$zn)>;
 }
 
 
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td 
b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index ddec0a21b0ccb..a123265ba90aa 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -11372,18 +11372,24 @@ multiclass sve2_luti6_vector_index<string mnemonic> {
 }
 
 // Look up table
-class sve2_luti6_vector<string mnemonic>
-    : I<(outs ZPR8:$Zd), (ins ZZ_b:$Zn, ZPRAny:$Zm),
-      mnemonic, "\t$Zd, $Zn, $Zm",
-      "", []>, Sched<[]> {
-  bits<5> Zd;
-  bits<5> Zn;
-  bits<5> Zm;
-  let Inst{31-21} = 0b01000101001;
-  let Inst{20-16} = Zm;
-  let Inst{15-10} = 0b101011;
-  let Inst{9-5}   = Zn;
-  let Inst{4-0}   = Zd;
+multiclass sve2_luti6_vector<string mnemonic, SDPatternOperator intrinsic> {
+  def NAME : I<(outs ZPR8:$Zd), (ins ZZ_b:$Zn, ZPRAny:$Zm),
+                mnemonic, "\t$Zd, $Zn, $Zm",
+                "", []>, Sched<[]> {
+    bits<5> Zd;
+    bits<5> Zn;
+    bits<5> Zm;
+    let Inst{31-21} = 0b01000101001;
+    let Inst{20-16} = Zm;
+    let Inst{15-10} = 0b101011;
+    let Inst{9-5}   = Zn;
+    let Inst{4-0}   = Zd;
+  }
+
+  def : Pat<(nxv16i8 (intrinsic nxv16i8:$Op1, nxv16i8:$Op2, nxv16i8:$Op3)),
+            (!cast<Instruction>(NAME) (REG_SEQUENCE ZPR2, nxv16i8:$Op1, zsub0,
+                                                     nxv16i8:$Op2, zsub1),
+                                       nxv16i8:$Op3)>;
 }
 
 
//===----------------------------------------------------------------------===//

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to