https://github.com/CarolineConcatto updated 
https://github.com/llvm/llvm-project/pull/154144

>From dccf21f154ba09f1b051fbf52a320f6aab03fad7 Mon Sep 17 00:00:00 2001
From: CarolineConcatto <[email protected]>
Date: Thu, 11 Jun 2026 12:56:31 +0000
Subject: [PATCH 1/2] [AArch64][SME] Split FP8 FTMOPA intrinsics

Introduce separate FP8 FTMOPA intrinsics for ZA16 and ZA32:

    llvm.aarch64.sme.fp8.ftmopa.za16
    llvm.aarch64.sme.fp8.ftmopa.za32

The FP8 FTMOPA forms need to model their FPMR dependency, so they
should not share the same intrinsic definitions as the non-FP8 FTMOPA
forms.

Update the Clang SME builtin definitions and AArch64 instruction
patterns to use the new intrinsics, and add AutoUpgrade support for the
previous FP8-shaped llvm.aarch64.sme.ftmopa.* spellings so existing IR and
bitcode  continue to work.

This was split out from #154144 because the intrinsic upgrade needs to be
handled separately to avoid breaking existing bitcode.
---
 clang/include/clang/Basic/arm_sme.td          |  4 +-
 .../AArch64/sme2-intrinsics/acle_sme2_tmop.c  |  8 ++--
 llvm/include/llvm/IR/IntrinsicsAArch64.td     | 14 +++++++
 llvm/lib/IR/AutoUpgrade.cpp                   | 15 +++++++
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td |  4 +-
 .../upgrade-sme2-fp8-intrinsics-tmop.ll       | 42 +++++++++++++++++++
 .../CodeGen/AArch64/sme2-intrinsics-tmop.ll   |  4 +-
 7 files changed, 81 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/Bitcode/upgrade-sme2-fp8-intrinsics-tmop.ll

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 032c588966032..5137e968bec55 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -915,11 +915,11 @@ let SMETargetGuard = "sme2,sme-tmop,sme-b16b16" in {
 }
 
 let SMETargetGuard = "sme2,sme-tmop,sme-f8f16" in {
-  def SVTMOPA_ZA16_FPM : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2.dd[i>", "m", 
MergeNone, "aarch64_sme_ftmopa_za16", [IsStreaming, IsInOutZA], [ImmCheck<0, 
ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>;
+  def SVTMOPA_ZA16_FPM : Inst<"svtmopa_lane_za16[_{d}_{d}]", "vi2.dd[i>", "m", 
MergeNone, "aarch64_sme_fp8_ftmopa_za16", [IsStreaming, IsInOutZA, 
IsOverloadNone], [ImmCheck<0, ImmCheck0_1>, ImmCheck<4, ImmCheck0_3>]>;
 }
 
 let SMETargetGuard = "sme2,sme-tmop,sme-f8f32" in {
-  def SVTMOPA_ZA32_FPM : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2.dd[i>", "m", 
MergeNone, "aarch64_sme_ftmopa_za32", [IsStreaming, IsInOutZA], [ImmCheck<0, 
ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
+  def SVTMOPA_ZA32_FPM : Inst<"svtmopa_lane_za32[_{d}_{d}]", "vi2.dd[i>", "m", 
MergeNone, "aarch64_sme_fp8_ftmopa_za32", [IsStreaming, IsInOutZA, 
IsOverloadNone], [ImmCheck<0, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
 }
 
 multiclass ZAReadz<string n_suffix, string vg_num, string t, string i_prefix, 
list<ImmCheck> ch> {
diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c 
b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c
index 55d0074663bc9..d68a465e092c6 100644
--- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c
+++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_tmop.c
@@ -172,13 +172,13 @@ void test_svtmopa_lane_za16_bf16_bf16(svbfloat16x2_t zn, 
svbfloat16_t zm, svuint
 // CHECK-LABEL: @test_svtmopa_lane_za16_mf8_mf8_fpm(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.ftmopa.za16.nxv16i8(i32 1, 
<vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], 
<vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fp8.ftmopa.za16(i32 1, 
<vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], 
<vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: 
@_Z34test_svtmopa_lane_za16_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tu11__SVUint8_tm(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.ftmopa.za16.nxv16i8(i32 
1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> 
[[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> 
[[ZK:%.*]], i32 3)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.fp8.ftmopa.za16(i32 1, 
<vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], 
<vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svtmopa_lane_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, 
svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") {
@@ -188,13 +188,13 @@ void test_svtmopa_lane_za16_mf8_mf8_fpm(svmfloat8x2_t zn, 
svmfloat8_t zm, svuint
 // CHECK-LABEL: @test_svtmopa_lane_za32_mf8_mf8_fpm(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
-// CHECK-NEXT:    tail call void @llvm.aarch64.sme.ftmopa.za32.nxv16i8(i32 1, 
<vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], 
<vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fp8.ftmopa.za32(i32 1, 
<vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], 
<vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
 // CHECK-NEXT:    ret void
 //
 // CPP-CHECK-LABEL: 
@_Z34test_svtmopa_lane_za32_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tu11__SVUint8_tm(
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
-// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.ftmopa.za32.nxv16i8(i32 
1, <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> 
[[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> 
[[ZK:%.*]], i32 3)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.fp8.ftmopa.za32(i32 1, 
<vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], 
<vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZK:%.*]], i32 3)
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svtmopa_lane_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, 
svuint8_t zk, fpm_t fpmr) __arm_streaming __arm_inout("za") {
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td 
b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index ba0d7c02bf427..6cb96a635f87c 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3142,6 +3142,20 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sme_sutmopa_za32 : SME_OuterProduct_TMOP_Intrinsic;
   def int_aarch64_sme_ustmopa_za32 : SME_OuterProduct_TMOP_Intrinsic;
 
+  class SME_FP8_OuterProduct_TMOP_Intrinsic
+     : DefaultAttrsIntrinsic<[],
+         [llvm_i32_ty,
+          llvm_nxv16i8_ty,
+          llvm_nxv16i8_ty,
+          llvm_nxv16i8_ty,
+          llvm_nxv16i8_ty,
+          llvm_i32_ty],
+         [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<5>>,
+          IntrInaccessibleMemOnly]>;
+
+  def int_aarch64_sme_fp8_ftmopa_za16 : SME_FP8_OuterProduct_TMOP_Intrinsic;
+  def int_aarch64_sme_fp8_ftmopa_za32 : SME_FP8_OuterProduct_TMOP_Intrinsic;
+
   // 16 and 32 bit multi-vector floating point 8 Quarter Tile Quarter Product
   foreach za = ["za16", "za32"] in {
     def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : 
SME_OuterProduct_QuarterTile_Single_Single;
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 0770f0f0ff060..74b01200c064b 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -970,6 +970,21 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool 
IsArm, Function *F,
     }
   } else {
     // 'aarch64.*'.
+    if (Name.consume_front("sme.ftmopa.")) {
+      // The FP8 FTMOPA intrinsics were split out from the non-FP8 FTMOPA
+      // intrinsics to model their FPMR dependency.
+      Intrinsic::ID ID =
+          StringSwitch<Intrinsic::ID>(Name)
+              .Case("za16.nxv16i8", Intrinsic::aarch64_sme_fp8_ftmopa_za16)
+              .Case("za32.nxv16i8", Intrinsic::aarch64_sme_fp8_ftmopa_za32)
+              .Default(Intrinsic::not_intrinsic);
+      if (ID != Intrinsic::not_intrinsic) {
+        NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
+        return true;
+      }
+      return false; // No other 'aarch64.sme.ftmopa.*'.
+    }
+
     if (Neon) {
       // 'aarch64.neon.*'.
       Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 022fed6473486..5a7e4f22a7c0c 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -224,11 +224,11 @@ let Predicates = [HasSME_TMOP, HasSMEB16B16] in {
 }
 
 let Predicates = [HasSME_TMOP, HasSMEF8F16] in {
-  defm FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, nxv16i8, 
"ftmopa", int_aarch64_sme_ftmopa_za16, [FPMR, FPCR]>;
+  defm FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, nxv16i8, 
"ftmopa", int_aarch64_sme_fp8_ftmopa_za16, [FPMR, FPCR]>;
 }
 
 let Predicates = [HasSME_TMOP, HasSMEF8F32] in {
-  defm FTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01000, ZZ_b_mul_r, ZPR8, nxv16i8, 
"ftmopa", int_aarch64_sme_ftmopa_za32, [FPMR, FPCR]>;
+  defm FTMOPA_M2ZZZI_BtoS : sme_tmopa_32b<0b01000, ZZ_b_mul_r, ZPR8, nxv16i8, 
"ftmopa", int_aarch64_sme_fp8_ftmopa_za32, [FPMR, FPCR]>;
 }
 
 let Predicates = [HasSME] in {
diff --git a/llvm/test/Bitcode/upgrade-sme2-fp8-intrinsics-tmop.ll 
b/llvm/test/Bitcode/upgrade-sme2-fp8-intrinsics-tmop.ll
new file mode 100644
index 0000000000000..de9a6b69bd219
--- /dev/null
+++ b/llvm/test/Bitcode/upgrade-sme2-fp8-intrinsics-tmop.ll
@@ -0,0 +1,42 @@
+; RUN: opt -S < %s | FileCheck %s
+; RUN: llvm-as %s -o - | llvm-dis | FileCheck %s
+
+target triple = "aarch64-linux"
+
+define void @ftmopa_za16_nxv16i8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> 
%zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk) #0 {
+; CHECK-LABEL: @ftmopa_za16_nxv16i8
+; CHECK: call void @llvm.aarch64.sme.fp8.ftmopa.za16(i32 0, <vscale x 16 x i8> 
%zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, 
i32 0)
+  call void @llvm.aarch64.sme.ftmopa.za16.nxv16i8(i32 0, <vscale x 16 x i8> 
%zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, 
i32 0)
+  ret void
+}
+
+define void @ftmopa_za32_nxv16i8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> 
%zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk) #0 {
+; CHECK-LABEL: @ftmopa_za32_nxv16i
+; CHECK: call void @llvm.aarch64.sme.fp8.ftmopa.za32(i32 0, <vscale x 16 x i8> 
%zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, 
i32 0)
+  call void @llvm.aarch64.sme.ftmopa.za32.nxv16i8(i32 0, <vscale x 16 x i8> 
%zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, 
i32 0)
+  ret void
+}
+
+
+define void @ftmopa_za16(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, 
<vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk) #0 {
+; CHECK-LABEL: @ftmopa_za16
+; CHECK: call void @llvm.aarch64.sme.fp8.ftmopa.za16(i32 0, <vscale x 16 x i8> 
%zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, 
i32 0)
+  call void @llvm.aarch64.sme.ftmopa.za16(i32 0, <vscale x 16 x i8> %zn1, 
<vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, i32 0)
+  ret void
+}
+
+define void @ftmopa_za32(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, 
<vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk) #0 {
+; CHECK-LABEL: @ftmopa_za32
+; CHECK: call void @llvm.aarch64.sme.fp8.ftmopa.za32(i32 0, <vscale x 16 x i8> 
%zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, 
i32 0)
+   call void @llvm.aarch64.sme.ftmopa.za32(i32 0, <vscale x 16 x i8> %zn1, 
<vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, i32 0)
+   ret void
+}
+
+define void @ftmopa_za32_wrong(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> 
%zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk) #0 {
+; CHECK-LABEL: @ftmopa_za32
+; CHECK: call void @llvm.aarch64.sme.fp8.ftmopa.za32(i32 0, <vscale x 16 x i8> 
%zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, 
i32 0)
+   call void @llvm.aarch64.sme.ftmopa.za32.nxv8i16(i32 0, <vscale x 16 x i8> 
%zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, 
i32 0)
+   ret void
+}
+
+attributes #0 = {nounwind "target-features" = 
"+sme2,+sme-tmop,+sme-f8f16,+sme-f8f32" }
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll 
b/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll
index e918137bee27d..4b8615cc0ca00 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll
@@ -119,7 +119,7 @@ define void @ftmopa_za16_f8(<vscale x 16 x i8> %zn1, 
<vscale x 16 x i8> %zn2, <v
 ; CHECK-NEXT:    mov z28.d, z3.d
 ; CHECK-NEXT:    ftmopa za0.h, { z0.b, z1.b }, z2.b, z28[0]
 ; CHECK-NEXT:    ret
-  call void @llvm.aarch64.sme.ftmopa.za16.nxv16i8(i32 0, <vscale x 16 x i8> 
%zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, 
i32 0)
+  call void @llvm.aarch64.sme.fp8.ftmopa.za16(i32 0, <vscale x 16 x i8> %zn1, 
<vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, i32 0)
   ret void
 }
 
@@ -129,7 +129,7 @@ define void @ftmopa_za32_f8(<vscale x 16 x i8> %zn1, 
<vscale x 16 x i8> %zn2, <v
 ; CHECK-NEXT:    mov z28.d, z3.d
 ; CHECK-NEXT:    ftmopa za0.s, { z0.b, z1.b }, z2.b, z28[0]
 ; CHECK-NEXT:    ret
-  call void @llvm.aarch64.sme.ftmopa.za32.nxv16i8(i32 0, <vscale x 16 x i8> 
%zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, 
i32 0)
+  call void @llvm.aarch64.sme.fp8.ftmopa.za32(i32 0, <vscale x 16 x i8> %zn1, 
<vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, i32 0)
   ret void
 }
 

>From d718ae243f7316590cedc0a881bd1ee7b06f5b9e Mon Sep 17 00:00:00 2001
From: CarolineConcatto <[email protected]>
Date: Mon, 18 Aug 2025 15:11:25 +0000
Subject: [PATCH 2/2] [NFC][AArch64][TableGen] Define ZA, ZT0 and FPMR memory
 defvars

Introduce TableGen defvars for the inaccessible memory effects used to
model accesses to ZA, ZT0 and FPMR in IntrinsicsAArch64.td.

This is a preparatory cleanup for a follow-up patch that will replace
these uses of InaccessibleMem with target-specific memory locations.
Other uses of inaccessible memory in the file are left unchanged because
they are unrelated to ZA, ZT0 or FPMR.

This preserves the existing memory effects. In particular, intrinsics
that currently access both argument memory and inaccessible memory keep
the same ArgMem/InaccessibleMem read/write modelling.
---
 llvm/include/llvm/IR/Intrinsics.td        |   5 +
 llvm/include/llvm/IR/IntrinsicsAArch64.td | 214 +++++++++++++---------
 2 files changed, 132 insertions(+), 87 deletions(-)

diff --git a/llvm/include/llvm/IR/Intrinsics.td 
b/llvm/include/llvm/IR/Intrinsics.td
index b1b2bb2a72c65..b7af0944fbc4e 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -74,6 +74,11 @@ class IntrWrite<list<IntrinsicMemoryLocation> idx> : 
IntrinsicProperty {
    list<IntrinsicMemoryLocation> MemLoc=idx;
 }
 
+// Constrain intrinsic to not write any memory location.
+defvar IntrReadOnly = IntrWrite<[]>;
+// Constrain intrinsic to not read any memory location.
+defvar IntrWriteOnly = IntrRead<[]>;
+
 // Commutative - This intrinsic is commutative: X op Y == Y op X.
 def Commutative : IntrinsicProperty;
 
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td 
b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 6cb96a635f87c..a954350bc6825 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -738,6 +738,11 @@ def int_aarch64_neon_tbx2 : AdvSIMD_Tbx2_Intrinsic;
 def int_aarch64_neon_tbx3 : AdvSIMD_Tbx3_Intrinsic;
 def int_aarch64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic;
 
+// Maps Memory locations to registers.
+defvar FPMR = InaccessibleMem;
+defvar ZT0 = InaccessibleMem;
+defvar ZA = InaccessibleMem;
+
 let TargetPrefix = "aarch64" in {
   class FPENV_Get_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, 
IntrHasSideEffects]>;
@@ -746,7 +751,7 @@ let TargetPrefix = "aarch64" in {
   class RNDR_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_i64_ty, llvm_i1_ty], [], [IntrNoMem, 
IntrHasSideEffects]>;
   class FPMR_Set_Intrinsic
-    : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrWriteMem, 
IntrInaccessibleMemOnly]>;
+    : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrWrite<[FPMR]>, 
IntrWriteOnly]>;
 }
 
 // FP environment registers.
@@ -971,7 +976,8 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], 
!listconcat([llvm_ptr_ty], dat
 
   // Conversions
   class AdvSIMD_FP8_1VectorArg_Long_Intrinsic
-    : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], 
[IntrReadMem, IntrInaccessibleMemOnly]>;
+    : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
 
   def int_aarch64_neon_fp8_cvtl1   : AdvSIMD_FP8_1VectorArg_Long_Intrinsic;
   def int_aarch64_neon_fp8_cvtl2   : AdvSIMD_FP8_1VectorArg_Long_Intrinsic;
@@ -980,13 +986,13 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], 
!listconcat([llvm_ptr_ty], dat
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                             [llvm_anyvector_ty,
                              LLVMMatchType<1>],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
   def int_aarch64_neon_fp8_fcvtn2
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                             [LLVMMatchType<0>,
                              llvm_anyvector_ty,
                              LLVMMatchType<1>],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
 
   // Dot-product
   class AdvSIMD_FP8_DOT_Intrinsic
@@ -994,14 +1000,14 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], 
!listconcat([llvm_ptr_ty], dat
                             [LLVMMatchType<0>,
                              llvm_anyvector_ty,
                              LLVMMatchType<1>],
-                             [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
   class AdvSIMD_FP8_DOT_LANE_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                             [LLVMMatchType<0>,
                              llvm_anyvector_ty,
                              llvm_v16i8_ty,
                              llvm_i32_ty],
-                             [IntrReadMem, IntrInaccessibleMemOnly, 
ImmArg<ArgIndex<3>>]>;
+                             [IntrRead<[FPMR]>, IntrReadOnly, 
ImmArg<ArgIndex<3>>]>;
 
   def int_aarch64_neon_fp8_fdot2 : AdvSIMD_FP8_DOT_Intrinsic;
   def int_aarch64_neon_fp8_fdot2_lane : AdvSIMD_FP8_DOT_LANE_Intrinsic;
@@ -1016,7 +1022,7 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], 
!listconcat([llvm_ptr_ty], dat
                             [LLVMMatchType<0>,
                              llvm_v16i8_ty,
                              llvm_v16i8_ty],
-                             [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
 
   class AdvSIMD_FP8_FMLA_LANE_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
@@ -1024,7 +1030,7 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], 
!listconcat([llvm_ptr_ty], dat
                              llvm_v16i8_ty,
                              llvm_v16i8_ty,
                              llvm_i32_ty],
-                             [IntrReadMem, IntrInaccessibleMemOnly, 
ImmArg<ArgIndex<3>>]>;
+                             [IntrRead<[FPMR]>, IntrReadOnly, 
ImmArg<ArgIndex<3>>]>;
 
   def int_aarch64_neon_fp8_fmlalb : AdvSIMD_FP8_FMLA_Intrinsic;
   def int_aarch64_neon_fp8_fmlalt : AdvSIMD_FP8_FMLA_Intrinsic;
@@ -2854,7 +2860,7 @@ def int_aarch64_sve_fmmla
 def int_aarch64_sve_fp8_fmmla
   : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                           [LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty],
-                          [IntrReadMem, IntrInaccessibleMemOnly]>;
+                          [IntrRead<[FPMR]>, IntrReadOnly]>;
 
 //
 // SVE ACLE: 7.2. BFloat16 extensions
@@ -2957,7 +2963,9 @@ def int_aarch64_sve_whilewr_d : 
SVE2_CONFLICT_DETECT_Intrinsic<[IntrSpeculatable
 let TargetPrefix = "aarch64" in {
   class SME_Load_Store_Intrinsic<LLVMType pred_ty>
     : DefaultAttrsIntrinsic<[],
-        [pred_ty, llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty], 
[IntrInaccessibleMemOrArgMemOnly, ImmArg<ArgIndex<2>>]>;
+        [pred_ty, llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty],
+        [IntrRead<[ArgMem, ZA]>, IntrWrite<[ArgMem, ZA]>,
+         ImmArg<ArgIndex<2>>]>;
 
   // Loads
   def int_aarch64_sme_ld1b_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
@@ -2985,18 +2993,21 @@ let TargetPrefix = "aarch64" in {
 
   // Spill + fill
   class SME_LDR_STR_ZA_Intrinsic
-    : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyptr_ty, llvm_i32_ty], 
[IntrInaccessibleMemOrArgMemOnly]>;
+    : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyptr_ty, llvm_i32_ty], 
[IntrRead<[ArgMem, ZA]>, IntrWrite<[ArgMem, ZA]>]>;
   def int_aarch64_sme_ldr : SME_LDR_STR_ZA_Intrinsic;
   def int_aarch64_sme_str : SME_LDR_STR_ZA_Intrinsic;
 
+
   class SME_TileToVector_Intrinsic
       : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
           [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
-           llvm_i32_ty, llvm_i32_ty], [IntrReadMem, IntrInaccessibleMemOnly, 
ImmArg<ArgIndex<2>>]>;
+           llvm_i32_ty, llvm_i32_ty], [IntrRead<[ZA]>, IntrReadOnly,
+                                        ImmArg<ArgIndex<2>>]>;
   class SME_VectorToTile_Intrinsic
       : DefaultAttrsIntrinsic<[],
           [llvm_i32_ty, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, 
llvm_i1_ty>,
-           llvm_anyvector_ty], [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
+           llvm_anyvector_ty], [IntrRead<[ZA]>, IntrWrite<[ZA]>,
+                                ImmArg<ArgIndex<0>>]>;
 
   def int_aarch64_sme_read_horiz  : SME_TileToVector_Intrinsic;
   def int_aarch64_sme_read_vert   : SME_TileToVector_Intrinsic;
@@ -3011,13 +3022,13 @@ let TargetPrefix = "aarch64" in {
   class SME_MOVAZ_TileToVector_X2_Intrinsic
       : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
           [llvm_i32_ty, llvm_i32_ty],
-          [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
+          [IntrWrite<[ZA]>, IntrRead<[ZA]>, ImmArg<ArgIndex<0>>]>;
 
   class SME_MOVAZ_TileToVector_X4_Intrinsic
       : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
            LLVMMatchType<0>,LLVMMatchType<0>],
           [llvm_i32_ty, llvm_i32_ty],
-          [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
+          [IntrWrite<[ZA]>, IntrRead<[ZA]>, ImmArg<ArgIndex<0>>]>;
 
   def int_aarch64_sme_readz_horiz_x2 : SME_MOVAZ_TileToVector_X2_Intrinsic;
   def int_aarch64_sme_readz_vert_x2  : SME_MOVAZ_TileToVector_X2_Intrinsic;
@@ -3028,7 +3039,7 @@ let TargetPrefix = "aarch64" in {
   class SME_MOVAZ_TileToVector_Intrinsic
       : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
           [llvm_i32_ty, llvm_i32_ty],
-          [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
+          [IntrWrite<[ZA]>, IntrRead<[ZA]>, ImmArg<ArgIndex<0>>]>;
 
   def int_aarch64_sme_readz_horiz : SME_MOVAZ_TileToVector_Intrinsic;
   def int_aarch64_sme_readz_vert  : SME_MOVAZ_TileToVector_Intrinsic;
@@ -3039,23 +3050,24 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sme_readz_x2
       : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
           [llvm_i32_ty],
-          [IntrInaccessibleMemOnly]>;
+          [IntrWrite<[ZA]>, IntrRead<[ZA]>]>;
 
   def int_aarch64_sme_readz_x4
       : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, 
LLVMMatchType<0>, LLVMMatchType<0>],
           [llvm_i32_ty],
-          [IntrInaccessibleMemOnly]>;
+          [IntrWrite<[ZA]>, IntrRead<[ZA]>]>;
 
   def int_aarch64_sme_write_lane_zt
        :  DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, 
llvm_i32_ty],
-            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, 
IntrInaccessibleMemOnly]>;
+            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrRead<[ZT0]>,
+             IntrWrite<[ZT0]>]>;
 
   def int_aarch64_sme_write_zt
        :  DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty],
-            [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrWriteMem]>;
+            [ImmArg<ArgIndex<0>>, IntrWrite<[ZT0]>, IntrWriteOnly]>;
 
 
-  def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], 
[IntrWriteMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
+  def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], 
[IntrWrite<[ZA]>, IntrWriteOnly, ImmArg<ArgIndex<0>>]>;
   def int_aarch64_sme_in_streaming_mode : DefaultAttrsIntrinsic<[llvm_i1_ty], 
[], [IntrNoMem]>, ClangBuiltin<"__builtin_arm_in_streaming_mode">;
 
   class SME_OuterProduct_Intrinsic
@@ -3064,7 +3076,8 @@ let TargetPrefix = "aarch64" in {
            LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
            LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
            LLVMMatchType<0>,
-           llvm_anyvector_ty], [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
+           llvm_anyvector_ty], [IntrRead<[ZA]>, IntrWrite<[ZA]>,
+                                ImmArg<ArgIndex<0>>]>;
 
   def int_aarch64_sme_mopa : SME_OuterProduct_Intrinsic;
   def int_aarch64_sme_mops : SME_OuterProduct_Intrinsic;
@@ -3082,17 +3095,32 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sme_usmops_wide : SME_OuterProduct_Intrinsic;
 
   class SME_OuterProduct_QuarterTile_Single_Single
+      : DefaultAttrsIntrinsic<[],
+          [llvm_i32_ty,
+           llvm_anyvector_ty,
+           LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[ZA]>,
+                               IntrWrite<[ZA]>, IntrHasSideEffects]>;
+
+ class SME_FP8_OuterProduct_QuarterTile_Single_Single
       : DefaultAttrsIntrinsic<[],
           [llvm_i32_ty,
           llvm_anyvector_ty,
-          LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, 
IntrHasSideEffects]>;
+          LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[FPMR, ZA]>, 
IntrWrite<[ZA]>, IntrHasSideEffects]>;
 
   class SME_OuterProduct_QuarterTile_Single_Multi
       : DefaultAttrsIntrinsic<[],
           [llvm_i32_ty,
           llvm_anyvector_ty,
           LLVMMatchType<0>,
-          LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, 
IntrHasSideEffects]>;
+          LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[ZA]>,
+                              IntrWrite<[ZA]>, IntrHasSideEffects]>;
+
+  class SME_FP8_OuterProduct_QuarterTile_Single_Multi
+      : DefaultAttrsIntrinsic<[],
+          [llvm_i32_ty,
+          llvm_anyvector_ty,
+          LLVMMatchType<0>,
+          LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[FPMR, ZA]>, 
IntrWrite<[ZA]>, IntrHasSideEffects]>;
 
   class SME_OuterProduct_QuarterTile_Multi_Multi
       : DefaultAttrsIntrinsic<[],
@@ -3100,7 +3128,16 @@ let TargetPrefix = "aarch64" in {
           llvm_anyvector_ty,
           LLVMMatchType<0>,
           LLVMMatchType<0>,
-          LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, 
IntrHasSideEffects]>;
+          LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[ZA]>,
+                              IntrWrite<[ZA]>, IntrHasSideEffects]>;
+
+  class SME_FP8_OuterProduct_QuarterTile_Multi_Multi
+      : DefaultAttrsIntrinsic<[],
+          [llvm_i32_ty,
+          llvm_anyvector_ty,
+          LLVMMatchType<0>,
+          LLVMMatchType<0>,
+          LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[FPMR, ZA]>, 
IntrWrite<[ZA]>, IntrHasSideEffects]>;
 
   // 2-way and 4-way multi-vector signed/unsigned Quarter Tile Quarter Product 
A/S
   foreach mode = ["s", "a"] in {
@@ -3132,8 +3169,8 @@ let TargetPrefix = "aarch64" in {
           LLVMMatchType<0>,
           llvm_nxv16i8_ty,
           llvm_i32_ty],
-         [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<5>>,
-          IntrInaccessibleMemOnly]>;
+         [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<5>>, IntrRead<[ZA]>,
+          IntrWrite<[ZA]>]>;
 
   def int_aarch64_sme_ftmopa_za16 : SME_OuterProduct_TMOP_Intrinsic;
   def int_aarch64_sme_ftmopa_za32 : SME_OuterProduct_TMOP_Intrinsic;
@@ -3151,17 +3188,17 @@ let TargetPrefix = "aarch64" in {
           llvm_nxv16i8_ty,
           llvm_i32_ty],
          [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<5>>,
-          IntrInaccessibleMemOnly]>;
+          IntrRead<[ZA, FPMR]>, IntrWrite<[ZA]>]>;
 
   def int_aarch64_sme_fp8_ftmopa_za16 : SME_FP8_OuterProduct_TMOP_Intrinsic;
   def int_aarch64_sme_fp8_ftmopa_za32 : SME_FP8_OuterProduct_TMOP_Intrinsic;
 
   // 16 and 32 bit multi-vector floating point 8 Quarter Tile Quarter Product
   foreach za = ["za16", "za32"] in {
-    def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : 
SME_OuterProduct_QuarterTile_Single_Single;
-    def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : 
SME_OuterProduct_QuarterTile_Single_Multi;
-    def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : 
SME_OuterProduct_QuarterTile_Single_Multi;
-    def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : 
SME_OuterProduct_QuarterTile_Multi_Multi;
+    def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : 
SME_FP8_OuterProduct_QuarterTile_Single_Single;
+    def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : 
SME_FP8_OuterProduct_QuarterTile_Single_Multi;
+    def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : 
SME_FP8_OuterProduct_QuarterTile_Single_Multi;
+    def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : 
SME_FP8_OuterProduct_QuarterTile_Multi_Multi;
   }
 
   class SME_AddVectorToTile_Intrinsic
@@ -3169,7 +3206,8 @@ let TargetPrefix = "aarch64" in {
           [llvm_i32_ty,
            LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
            LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
-           llvm_anyvector_ty], [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
+           llvm_anyvector_ty], [IntrRead<[ZA]>, IntrWrite<[ZA]>,
+                                ImmArg<ArgIndex<0>>]>;
 
   def int_aarch64_sme_addha : SME_AddVectorToTile_Intrinsic;
   def int_aarch64_sme_addva : SME_AddVectorToTile_Intrinsic;
@@ -3193,9 +3231,9 @@ let TargetPrefix = "aarch64" in {
                               [IntrNoMem, IntrHasSideEffects]>;
 
   def int_aarch64_sme_za_enable
-      : DefaultAttrsIntrinsic<[], [], [IntrWriteMem, IntrInaccessibleMemOnly]>;
+      : DefaultAttrsIntrinsic<[], [], [IntrWrite<[ZA, ZT0]>, IntrWriteOnly]>;
   def int_aarch64_sme_za_disable
-      : DefaultAttrsIntrinsic<[], [], [IntrWriteMem, IntrInaccessibleMemOnly]>;
+      : DefaultAttrsIntrinsic<[], [], [IntrWrite<[ZA, ZT0]>, IntrWriteOnly]>;
 
   // Clamp
   //
@@ -3284,56 +3322,59 @@ let TargetPrefix = "aarch64" in {
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>],
-                [IntrInaccessibleMemOnly]>;
+                [IntrRead<[ZA]>, IntrWrite<[ZA]>]>;
 
   class SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>,
                  LLVMMatchType<0>],
-                [IntrInaccessibleMemOnly]>;
+                [IntrRead<[ZA]>, IntrWrite<[ZA]>]>;
 
   class SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, 
LLVMMatchType<0>,
                  LLVMMatchType<0>],
-                [IntrInaccessibleMemOnly]>;
+                [IntrRead<[ZA]>, IntrWrite<[ZA]>]>;
 
   class SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>,
                  LLVMMatchType<0>, LLVMMatchType<0>],
-                [IntrInaccessibleMemOnly]>;
+                [IntrRead<[ZA]>, IntrWrite<[ZA]>]>;
 
   class SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, 
LLVMMatchType<0>,
                  LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, 
LLVMMatchType<0>],
-                [IntrInaccessibleMemOnly]>;
+                [IntrRead<[ZA]>, IntrWrite<[ZA]>]>;
 
   class SME2_Matrix_ArrayVector_Single_Index_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                 llvm_anyvector_ty,
                 LLVMMatchType<0>, llvm_i32_ty],
-                [IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>;
+                [IntrRead<[ZA]>, IntrWrite<[ZA]>,
+                 ImmArg<ArgIndex<3>>]>;
 
   class SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>,
                  LLVMMatchType<0>, llvm_i32_ty],
-                [IntrInaccessibleMemOnly, ImmArg<ArgIndex<4>>]>;
+                [IntrRead<[ZA]>, IntrWrite<[ZA]>,
+                 ImmArg<ArgIndex<4>>]>;
 
   class SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, 
LLVMMatchType<0>,
                  LLVMMatchType<0>, llvm_i32_ty],
-                [IntrInaccessibleMemOnly, ImmArg<ArgIndex<6>>]>;
+                [IntrRead<[ZA]>, IntrWrite<[ZA]>,
+                 ImmArg<ArgIndex<6>>]>;
 
   class SVE2_VG2_Multi_Imm_Intrinsic
     : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>],
@@ -3352,14 +3393,14 @@ let TargetPrefix = "aarch64" in {
    : DefaultAttrsIntrinsic<[],
                [llvm_i32_ty,
                 llvm_anyvector_ty, LLVMMatchType<0>],
-               [IntrInaccessibleMemOnly]>;
+               [IntrRead<[ZA]>, IntrWrite<[ZA]>]>;
 
   class SME2_ZA_Write_VG4_Intrinsic
    : DefaultAttrsIntrinsic<[],
                [llvm_i32_ty,
                 llvm_anyvector_ty, LLVMMatchType<0>,
                 LLVMMatchType<0>,  LLVMMatchType<0>],
-               [IntrInaccessibleMemOnly]>;
+               [IntrRead<[ZA]>, IntrWrite<[ZA]>]>;
 
   class SVE2_VG2_Multi_Single_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
@@ -3477,50 +3518,50 @@ let TargetPrefix = "aarch64" in {
   class SME2_ZA_ArrayVector_Read_VG2_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
                 [llvm_i32_ty],
-                [IntrReadMem, IntrInaccessibleMemOnly]>;
+                [IntrRead<[ZA]>, IntrReadOnly]>;
 
   class SME2_ZA_ArrayVector_Read_VG4_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
                              LLVMMatchType<0>,  LLVMMatchType<0>],
                 [llvm_i32_ty],
-                [IntrReadMem, IntrInaccessibleMemOnly]>;
+                [IntrRead<[ZA]>, IntrReadOnly]>;
 
   class SME2_Matrix_TileVector_Read_VG2_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
                 [llvm_i32_ty, llvm_i32_ty],
-                [IntrReadMem, IntrInaccessibleMemOnly]>;
+                [IntrRead<[ZA]>, IntrReadOnly]>;
 
   class SME2_Matrix_TileVector_Read_VG4_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
                              LLVMMatchType<0>,  LLVMMatchType<0>],
                 [llvm_i32_ty, llvm_i32_ty],
-                [IntrReadMem, IntrInaccessibleMemOnly]>;
+                [IntrRead<[ZA]>, IntrReadOnly]>;
 
   class SME2_ZA_ArrayVector_Write_VG2_Intrinsic
    : DefaultAttrsIntrinsic<[],
                [llvm_i32_ty,
                 llvm_anyvector_ty, LLVMMatchType<0>],
-               [IntrWriteMem, IntrInaccessibleMemOnly]>;
+               [IntrWrite<[ZA]>, IntrWriteOnly]>;
 
   class SME2_ZA_ArrayVector_Write_VG4_Intrinsic
    : DefaultAttrsIntrinsic<[],
                [llvm_i32_ty,
                 llvm_anyvector_ty, LLVMMatchType<0>,
                 LLVMMatchType<0>,  LLVMMatchType<0>],
-               [IntrWriteMem, IntrInaccessibleMemOnly]>;
+               [IntrWrite<[ZA]>, IntrWriteOnly]>;
 
   class SME2_Matrix_TileVector_Write_VG2_Intrinsic
    : DefaultAttrsIntrinsic<[],
                [llvm_i32_ty, llvm_i32_ty,
                 llvm_anyvector_ty, LLVMMatchType<0>],
-               [IntrWriteMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
+               [IntrWrite<[ZA]>, IntrWriteOnly, ImmArg<ArgIndex<0>>]>;
 
   class SME2_Matrix_TileVector_Write_VG4_Intrinsic
    : DefaultAttrsIntrinsic<[],
                [llvm_i32_ty, llvm_i32_ty,
                 llvm_anyvector_ty, LLVMMatchType<0>,
                 LLVMMatchType<0>,  LLVMMatchType<0>],
-               [IntrWriteMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
+               [IntrWrite<[ZA]>, IntrWriteOnly, ImmArg<ArgIndex<0>>]>;
 
   class SVE2_VG2_Multi_Single_Single_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
@@ -3686,7 +3727,7 @@ let TargetPrefix = "aarch64" in {
   // Multi-vector zeroing
 
   foreach vg = ["vg1x2", "vg1x4", "vg2x1", "vg2x2", "vg2x4", "vg4x1", "vg4x2", 
"vg4x4"] in {
-    def int_aarch64_sme_zero_za64_ # vg : DefaultAttrsIntrinsic<[], 
[llvm_i32_ty],  [IntrWriteMem, IntrInaccessibleMemOnly]>;
+    def int_aarch64_sme_zero_za64_ # vg : DefaultAttrsIntrinsic<[], 
[llvm_i32_ty],  [IntrWrite<[ZA]>, IntrWriteOnly]>;
   }
   // Multi-vector signed saturating doubling multiply high
   def int_aarch64_sve_sqdmulh_single_vgx2 : SVE2_VG2_Multi_Single_Intrinsic;
@@ -3824,14 +3865,14 @@ let TargetPrefix = "aarch64" in {
           [llvm_i32_ty,
           llvm_anyvector_ty, LLVMMatchType<0>,
           LLVMMatchType<0>],
-          [IntrInaccessibleMemOnly, IntrWriteMem]>;
+          [IntrWrite<[ZA]>, IntrWriteOnly]>;
 
   class SME2_Add_Sub_Write_VG4_Multi_Single_Intrinsic
       : DefaultAttrsIntrinsic<[],
           [llvm_i32_ty,
           llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, 
LLVMMatchType<0>,
           LLVMMatchType<0>],
-        [IntrInaccessibleMemOnly, IntrWriteMem]>;
+        [IntrWrite<[ZA]>, IntrWriteOnly]>;
 
   def int_aarch64_sme_add_write_single_za_vg1x2 : 
SME2_Add_Sub_Write_VG2_Multi_Single_Intrinsic;
   def int_aarch64_sme_sub_write_single_za_vg1x2 : 
SME2_Add_Sub_Write_VG2_Multi_Single_Intrinsic;
@@ -3846,7 +3887,7 @@ let TargetPrefix = "aarch64" in {
           [llvm_i32_ty,
           llvm_anyvector_ty, LLVMMatchType<0>,
           LLVMMatchType<0>, LLVMMatchType<0>],
-          [IntrInaccessibleMemOnly, IntrWriteMem]>;
+          [IntrWrite<[ZA]>, IntrWriteOnly]>;
 
   class SME2_Add_Sub_Write_VG4_Multi_Multi_Intrinsic
       : DefaultAttrsIntrinsic<[],
@@ -3854,7 +3895,7 @@ let TargetPrefix = "aarch64" in {
           llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>,
           LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
           LLVMMatchType<0>, LLVMMatchType<0>],
-          [IntrInaccessibleMemOnly, IntrWriteMem]>;
+          [IntrWrite<[ZA]>, IntrWriteOnly]>;
 
   def int_aarch64_sme_add_write_za_vg1x2 : 
SME2_Add_Sub_Write_VG2_Multi_Multi_Intrinsic;
   def int_aarch64_sme_sub_write_za_vg1x2 : 
SME2_Add_Sub_Write_VG2_Multi_Multi_Intrinsic;
@@ -3975,34 +4016,35 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sve_sel_x2  : SVE2_VG2_Sel_Intrinsic;
   def int_aarch64_sve_sel_x4  : SVE2_VG4_Sel_Intrinsic;
 
+
   class SME_LDR_STR_ZT_Intrinsic
-    : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyptr_ty], 
[IntrInaccessibleMemOrArgMemOnly]>;
+    : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyptr_ty], 
[IntrRead<[ArgMem, ZT0]>, IntrWrite<[ArgMem, ZT0]>]>;
   def int_aarch64_sme_ldr_zt : SME_LDR_STR_ZT_Intrinsic;
   def int_aarch64_sme_str_zt : SME_LDR_STR_ZT_Intrinsic;
 
   //
   //  Zero ZT0
   //
-  def int_aarch64_sme_zero_zt : DefaultAttrsIntrinsic<[], [llvm_i32_ty], 
[ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrWriteMem]>;
+  def int_aarch64_sme_zero_zt : DefaultAttrsIntrinsic<[], [llvm_i32_ty], 
[ImmArg<ArgIndex<0>>, IntrWrite<[ZT0]>, IntrWriteOnly]>;
 
   //
   // Lookup table expand one register
   //
   def int_aarch64_sme_luti2_lane_zt
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, 
llvm_nxv16i8_ty, llvm_i32_ty],
-                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, 
IntrInaccessibleMemOnly, IntrReadMem]>;
+                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, 
IntrRead<[ZT0]>, IntrReadOnly]>;
   def int_aarch64_sme_luti4_lane_zt
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, 
llvm_nxv16i8_ty, llvm_i32_ty],
-                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, 
IntrInaccessibleMemOnly, IntrReadMem]>;
+                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, 
IntrRead<[ZT0]>, IntrReadOnly]>;
 
   // Lookup table expand two registers
   //
   def int_aarch64_sme_luti2_lane_zt_x2
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], 
[llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty],
-                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, 
IntrInaccessibleMemOnly, IntrReadMem]>;
+                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, 
IntrRead<[ZT0]>, IntrReadOnly]>;
   def int_aarch64_sme_luti4_lane_zt_x2
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], 
[llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty],
-                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, 
IntrInaccessibleMemOnly, IntrReadMem]>;
+                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, 
IntrRead<[ZT0]>, IntrReadOnly]>;
 
   //
   // Lookup table expand four registers
@@ -4010,16 +4052,16 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sme_luti2_lane_zt_x4
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, 
LLVMMatchType<0>, LLVMMatchType<0>],
                             [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty],
-                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, 
IntrInaccessibleMemOnly, IntrReadMem]>;
+                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, 
IntrRead<[ZT0]>, IntrReadOnly]>;
   def int_aarch64_sme_luti4_lane_zt_x4
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, 
LLVMMatchType<0>, LLVMMatchType<0>],
                             [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty],
-                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, 
IntrInaccessibleMemOnly, IntrReadMem]>;
+                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, 
IntrRead<[ZT0]>, IntrReadOnly]>;
 
   def int_aarch64_sme_luti4_zt_x4
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, 
LLVMMatchType<0>, LLVMMatchType<0>],
                             [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty],
-                            [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, 
IntrReadMem]>;
+                            [ImmArg<ArgIndex<0>>, IntrRead<[ZT0]>, 
IntrReadOnly]>;
 
 
   //
@@ -4105,7 +4147,7 @@ let TargetPrefix = "aarch64" in {
   class SVE2_FP8_Cvt
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                             [llvm_nxv16i8_ty],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
 
   def int_aarch64_sve_fp8_cvt1   : SVE2_FP8_Cvt;
   def int_aarch64_sve_fp8_cvt2   : SVE2_FP8_Cvt;
@@ -4116,29 +4158,26 @@ let TargetPrefix = "aarch64" in {
   class SVE2_FP8_Narrow_Cvt
     : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
                             [llvm_anyvector_ty, LLVMMatchType<0>],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
-
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
   def int_aarch64_sve_fp8_cvtn  : SVE2_FP8_Narrow_Cvt;
   def int_aarch64_sve_fp8_cvtnb : SVE2_FP8_Narrow_Cvt;
 
   def int_aarch64_sve_fp8_cvtnt
     : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
                             [llvm_nxv16i8_ty, llvm_anyvector_ty, 
LLVMMatchType<0>],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
 
   // Dot product
   class SVE2_FP8_FMLA_FDOT
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                             [LLVMMatchType<0>,
                              llvm_nxv16i8_ty, llvm_nxv16i8_ty],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
-
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
   class SVE2_FP8_FMLA_FDOT_Lane
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                             [LLVMMatchType<0>,
                              llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty],
-                            [IntrReadMem, IntrInaccessibleMemOnly, 
ImmArg<ArgIndex<3>>]>;
-
+                            [IntrRead<[FPMR]>, IntrReadOnly, 
ImmArg<ArgIndex<3>>]>;
   def int_aarch64_sve_fp8_fdot      : SVE2_FP8_FMLA_FDOT;
   def int_aarch64_sve_fp8_fdot_lane : SVE2_FP8_FMLA_FDOT_Lane;
 
@@ -4164,69 +4203,70 @@ let TargetPrefix = "aarch64" in {
   class SVE2_FP8_CVT_X2_Single_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
                             [llvm_nxv16i8_ty],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
 
   class SVE2_FP8_CVT_Single_X4_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
                             [llvm_nxv4f32_ty, llvm_nxv4f32_ty, 
llvm_nxv4f32_ty, llvm_nxv4f32_ty],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
 
   class SME_FP8_OuterProduct_Intrinsic
       : DefaultAttrsIntrinsic<[],
           [llvm_i32_ty,
           llvm_nxv16i1_ty, llvm_nxv16i1_ty,
           llvm_nxv16i8_ty, llvm_nxv16i8_ty],
-          [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly]>;
+          [ImmArg<ArgIndex<0>>, IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>;
 
   class SME_FP8_ZA_LANE_VGx1_Intrinsic
    : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                llvm_nxv16i8_ty,
                                llvm_nxv16i8_ty,
                                llvm_i32_ty],
-                          [IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>;
+                          [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, 
ImmArg<ArgIndex<3>>]>;
 
   class SME_FP8_ZA_LANE_VGx2_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                 llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                 llvm_nxv16i8_ty,
                                 llvm_i32_ty],
-                            [IntrInaccessibleMemOnly, ImmArg<ArgIndex<4>>]>;
+                            [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, 
ImmArg<ArgIndex<4>>]>;
 
   class SME_FP8_ZA_LANE_VGx4_Intrinsic
    : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                 llvm_nxv16i8_ty, llvm_nxv16i8_ty, 
llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                 llvm_nxv16i8_ty,
                                 llvm_i32_ty],
-                            [IntrInaccessibleMemOnly, ImmArg<ArgIndex<6>>]>;
+                            [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, 
ImmArg<ArgIndex<6>>]>;
+
   class SME_FP8_ZA_VGx1_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                 llvm_nxv16i8_ty,
                                 llvm_nxv16i8_ty],
-                            [IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>;
 
   class SME_FP8_ZA_SINGLE_VGx2_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                 llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                 llvm_nxv16i8_ty],
-                            [IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>;
 
   class SME_FP8_ZA_SINGLE_VGx4_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                 llvm_nxv16i8_ty, llvm_nxv16i8_ty, 
llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                 llvm_nxv16i8_ty],
-                              [IntrInaccessibleMemOnly]>;
+                              [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>;
 
   class SME_FP8_ZA_MULTI_VGx2_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                  llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                  llvm_nxv16i8_ty, llvm_nxv16i8_ty],
-                            [IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>;
 
   class SME_FP8_ZA_MULTI_VGx4_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                  llvm_nxv16i8_ty, llvm_nxv16i8_ty, 
llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                  llvm_nxv16i8_ty, llvm_nxv16i8_ty, 
llvm_nxv16i8_ty, llvm_nxv16i8_ty],
-                            [IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>;
   //
   // CVT from FP8 to half-precision/BFloat16 multi-vector
   //
@@ -4245,7 +4285,7 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sve_fp8_cvt_x2
     : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
                             [llvm_anyvector_ty, LLVMMatchType<0>],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
 
   def int_aarch64_sve_fp8_cvt_x4  : SVE2_FP8_CVT_Single_X4_Intrinsic;
   def int_aarch64_sve_fp8_cvtn_x4 : SVE2_FP8_CVT_Single_X4_Intrinsic;

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to