https://github.com/MacDue created
https://github.com/llvm/llvm-project/pull/166362
This patch extends the MachineSMEABIPass to support ZT0. This is done with the
addition of two new states:
- `ACTIVE_ZT0_SAVED`
* This is used when calling a function that shares ZA, but does share ZT0
(i.e., no ZT0 attributes).
* This state indicates ZT0 must be saved to the save slot, but must remain
on, with no lazy save setup
- `LOCAL_COMMITTED`
* This is used for saving ZT0 in functions without ZA state.
* This state indicates ZA is off and ZT0 has been saved.
* This state is general enough to support ZA, but those have not been
implemented†
To aid with readability, the state transitions have been reworked to a switch
of `transitionFrom(<FromState>).to(<ToState>)`, rather than nested ifs, which
helps manage more transitions.
† This could be implemented to handle some cases of undefined behavior better.
>From dc41be430aa17616f431e0ce793e66f92df28881 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <[email protected]>
Date: Mon, 3 Nov 2025 15:41:49 +0000
Subject: [PATCH] [AArch64][SME] Support saving/restoring ZT0 in the
MachineSMEABIPass
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This patch extends the MachineSMEABIPass to support ZT0. This is done
with the addition of two new states:
- `ACTIVE_ZT0_SAVED`
* This is used when calling a function that shares ZA, but does
share ZT0 (i.e., no ZT0 attributes).
* This state indicates ZT0 must be saved to the save slot, but
must remain on, with no lazy save setup
- `LOCAL_COMMITTED`
* This is used for saving ZT0 in functions without ZA state.
* This state indicates ZA is off and ZT0 has been saved.
* This state is general enough to support ZA, but those
have not been implemented†
To aid with readability, the state transitions have been reworked to a
switch of `transitionFrom(<FromState>).to(<ToState>)`, rather than
nested ifs, which helps manage more transitions.
† This could be implemented to handle some cases of undefined behavior
better.
Change-Id: I14be4a7f8b998fe667bfaade5088f88039515f91
---
.../AArch64/AArch64ExpandPseudoInsts.cpp | 1 +
.../Target/AArch64/AArch64ISelLowering.cpp | 11 +-
.../lib/Target/AArch64/AArch64SMEInstrInfo.td | 6 +
llvm/lib/Target/AArch64/MachineSMEABIPass.cpp | 176 +++++++++++++++---
.../test/CodeGen/AArch64/sme-peephole-opts.ll | 4 -
.../test/CodeGen/AArch64/sme-za-exceptions.ll | 124 +++++++++---
llvm/test/CodeGen/AArch64/sme-zt0-state.ll | 104 ++++++-----
7 files changed, 321 insertions(+), 105 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 34d74d04c4419..60e6a82d41cc8 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -1717,6 +1717,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
}
case AArch64::InOutZAUsePseudo:
case AArch64::RequiresZASavePseudo:
+ case AArch64::RequiresZT0SavePseudo:
case AArch64::SMEStateAllocPseudo:
case AArch64::COALESCER_BARRIER_FPR16:
case AArch64::COALESCER_BARRIER_FPR32:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 30f961043e78b..20c1c6790b2fb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9457,6 +9457,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
if (CallAttrs.requiresLazySave() ||
CallAttrs.requiresPreservingAllZAState())
ZAMarkerNode = AArch64ISD::REQUIRES_ZA_SAVE;
+ else if (CallAttrs.requiresPreservingZT0())
+ ZAMarkerNode = AArch64ISD::REQUIRES_ZT0_SAVE;
else if (CallAttrs.caller().hasZAState() ||
CallAttrs.caller().hasZT0State())
ZAMarkerNode = AArch64ISD::INOUT_ZA_USE;
@@ -9576,7 +9578,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SDValue ZTFrameIdx;
MachineFrameInfo &MFI = MF.getFrameInfo();
- bool ShouldPreserveZT0 = CallAttrs.requiresPreservingZT0();
+ bool ShouldPreserveZT0 =
+ !UseNewSMEABILowering && CallAttrs.requiresPreservingZT0();
// If the caller has ZT0 state which will not be preserved by the callee,
// spill ZT0 before the call.
@@ -9589,7 +9592,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// If caller shares ZT0 but the callee is not shared ZA, we need to stop
// PSTATE.ZA before the call if there is no lazy-save active.
- bool DisableZA = CallAttrs.requiresDisablingZABeforeCall();
+ bool DisableZA =
+ !UseNewSMEABILowering && CallAttrs.requiresDisablingZABeforeCall();
assert((!DisableZA || !RequiresLazySave) &&
"Lazy-save should have PSTATE.SM=1 on entry to the function");
@@ -10074,7 +10078,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
getSMToggleCondition(CallAttrs));
}
- if (RequiresLazySave || CallAttrs.requiresEnablingZAAfterCall())
+ if (!UseNewSMEABILowering &&
+ (RequiresLazySave || CallAttrs.requiresEnablingZAAfterCall()))
// Unconditionally resume ZA.
Result = DAG.getNode(
AArch64ISD::SMSTART, DL, DAG.getVTList(MVT::Other, MVT::Glue), Result,
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 8f8f211c5fceb..2753a4561daae 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -102,6 +102,7 @@ def : Pat<(i64 (AArch64AllocateSMESaveBuffer GPR64:$size)),
let hasSideEffects = 1, isMeta = 1 in {
def InOutZAUsePseudo : Pseudo<(outs), (ins), []>, Sched<[]>;
def RequiresZASavePseudo : Pseudo<(outs), (ins), []>, Sched<[]>;
+ def RequiresZT0SavePseudo : Pseudo<(outs), (ins), []>, Sched<[]>;
}
def SMEStateAllocPseudo : Pseudo<(outs), (ins), []>, Sched<[]>;
@@ -122,6 +123,11 @@ def AArch64_requires_za_save
[SDNPHasChain, SDNPInGlue]>;
def : Pat<(AArch64_requires_za_save), (RequiresZASavePseudo)>;
+def AArch64_requires_zt0_save
+ : SDNode<"AArch64ISD::REQUIRES_ZT0_SAVE", SDTypeProfile<0, 0, []>,
+ [SDNPHasChain, SDNPInGlue]>;
+def : Pat<(AArch64_requires_zt0_save), (RequiresZT0SavePseudo)>;
+
def AArch64_sme_state_alloc
: SDNode<"AArch64ISD::SME_STATE_ALLOC", SDTypeProfile<0, 0,[]>,
[SDNPHasChain]>;
diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
index bb4dfe8c60904..c8d20b571d702 100644
--- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
+++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
@@ -72,16 +72,30 @@ using namespace llvm;
namespace {
-enum ZAState {
+// Note: For agnostic ZA, we assume the function is always entered/exited in
the
+// "ACTIVE" state -- this _may_ not be the case (since OFF is also a
+// possibility, but for the purpose of placing ZA saves/restores, that does not
+// matter).
+enum ZAState : uint8_t {
// Any/unknown state (not valid)
ANY = 0,
// ZA is in use and active (i.e. within the accumulator)
ACTIVE,
+ // ZA is active, but ZT0 has been saved.
+ // This handles the edge case of sharedZA && !sharesZT0.
+ ACTIVE_ZT0_SAVED,
+
// A ZA save has been set up or committed (i.e. ZA is dormant or off)
+ // If the function uses ZT0 it must also be saved.
LOCAL_SAVED,
+ // ZA has been committed to the lazy save buffer of the current function.
+ // If the function uses ZT0 it must also be saved.
+ // ZA is off when a save has been committed.
+ LOCAL_COMMITTED,
+
// The ZA/ZT0 state on entry to the function.
ENTRY,
@@ -164,6 +178,14 @@ class EmitContext {
return AgnosticZABufferPtr;
}
+ int getZT0SaveSlot(MachineFunction &MF) {
+ if (ZT0SaveFI)
+ return *ZT0SaveFI;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ ZT0SaveFI = MFI.CreateSpillStackObject(64, Align(16));
+ return *ZT0SaveFI;
+ }
+
/// Returns true if the function must allocate a ZA save buffer on entry.
This
/// will be the case if, at any point in the function, a ZA save was emitted.
bool needsSaveBuffer() const {
@@ -173,6 +195,7 @@ class EmitContext {
}
private:
+ std::optional<int> ZT0SaveFI;
std::optional<int> TPIDR2BlockFI;
Register AgnosticZABufferPtr = AArch64::NoRegister;
};
@@ -184,8 +207,10 @@ class EmitContext {
/// state would not be legal, as transitioning to it drops the content of ZA.
static bool isLegalEdgeBundleZAState(ZAState State) {
switch (State) {
- case ZAState::ACTIVE: // ZA state within the accumulator/ZT0.
- case ZAState::LOCAL_SAVED: // ZA state is saved on the stack.
+ case ZAState::ACTIVE: // ZA state within the accumulator/ZT0.
+ case ZAState::ACTIVE_ZT0_SAVED: // ZT0 is saved (ZA is active).
+ case ZAState::LOCAL_SAVED: // ZA state may be saved on the stack.
+ case ZAState::LOCAL_COMMITTED: // ZA state is saved on the stack.
return true;
default:
return false;
@@ -199,7 +224,9 @@ StringRef getZAStateString(ZAState State) {
switch (State) {
MAKE_CASE(ZAState::ANY)
MAKE_CASE(ZAState::ACTIVE)
+ MAKE_CASE(ZAState::ACTIVE_ZT0_SAVED)
MAKE_CASE(ZAState::LOCAL_SAVED)
+ MAKE_CASE(ZAState::LOCAL_COMMITTED)
MAKE_CASE(ZAState::ENTRY)
MAKE_CASE(ZAState::OFF)
default:
@@ -221,18 +248,34 @@ static bool isZAorZTRegOp(const TargetRegisterInfo &TRI,
/// Returns the required ZA state needed before \p MI and an iterator pointing
/// to where any code required to change the ZA state should be inserted.
static std::pair<ZAState, MachineBasicBlock::iterator>
-getZAStateBeforeInst(const TargetRegisterInfo &TRI, MachineInstr &MI,
- bool ZAOffAtReturn) {
+getInstNeededZAState(const TargetRegisterInfo &TRI, MachineInstr &MI,
+ SMEAttrs SMEFnAttrs) {
MachineBasicBlock::iterator InsertPt(MI);
if (MI.getOpcode() == AArch64::InOutZAUsePseudo)
return {ZAState::ACTIVE, std::prev(InsertPt)};
+ // Note: If we need to save both ZA and ZT0 we use RequiresZASavePseudo.
if (MI.getOpcode() == AArch64::RequiresZASavePseudo)
return {ZAState::LOCAL_SAVED, std::prev(InsertPt)};
- if (MI.isReturn())
+ // If we only need to save ZT0 there's two cases to consider:
+ // 1. The function has ZA state (that we don't need to save).
+ // - In this case we switch to the "ACTIVE_ZT0_SAVED" state.
+ // This only saves ZT0.
+ // 2. The function does not have ZA state
+ // - In this case we switch to "LOCAL_COMMITTED" state.
+ // This saves ZT0 and turns ZA off.
+ if (MI.getOpcode() == AArch64::RequiresZT0SavePseudo) {
+ return {SMEFnAttrs.hasZAState() ? ZAState::ACTIVE_ZT0_SAVED
+ : ZAState::LOCAL_COMMITTED,
+ std::prev(InsertPt)};
+ }
+
+ if (MI.isReturn()) {
+ bool ZAOffAtReturn = SMEFnAttrs.hasPrivateZAInterface();
return {ZAOffAtReturn ? ZAState::OFF : ZAState::ACTIVE, InsertPt};
+ }
for (auto &MO : MI.operands()) {
if (isZAorZTRegOp(TRI, MO))
@@ -280,6 +323,9 @@ struct MachineSMEABI : public MachineFunctionPass {
/// predecessors).
void propagateDesiredStates(FunctionInfo &FnInfo, bool Forwards = true);
+ void emitZT0SaveRestore(EmitContext &, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, bool IsSave);
+
// Emission routines for private and shared ZA functions (using lazy saves).
void emitSMEPrologue(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
@@ -290,8 +336,8 @@ struct MachineSMEABI : public MachineFunctionPass {
MachineBasicBlock::iterator MBBI);
void emitAllocateLazySaveBuffer(EmitContext &, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
- void emitZAOff(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- bool ClearTPIDR2);
+ void emitZAMode(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ bool ClearTPIDR2, bool On);
// Emission routines for agnostic ZA functions.
void emitSetupFullZASave(MachineBasicBlock &MBB,
@@ -398,7 +444,7 @@ FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs
SMEFnAttrs) {
Block.FixedEntryState = ZAState::ENTRY;
} else if (MBB.isEHPad()) {
// EH entry block:
- Block.FixedEntryState = ZAState::LOCAL_SAVED;
+ Block.FixedEntryState = ZAState::LOCAL_COMMITTED;
}
LiveRegUnits LiveUnits(*TRI);
@@ -420,8 +466,7 @@ FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs
SMEFnAttrs) {
PhysLiveRegsAfterSMEPrologue = PhysLiveRegs;
}
// Note: We treat Agnostic ZA as inout_za with an alternate save/restore.
- auto [NeededState, InsertPt] = getZAStateBeforeInst(
- *TRI, MI, /*ZAOffAtReturn=*/SMEFnAttrs.hasPrivateZAInterface());
+ auto [NeededState, InsertPt] = getInstNeededZAState(*TRI, MI,
SMEFnAttrs);
assert((InsertPt == MBBI ||
InsertPt->getOpcode() == AArch64::ADJCALLSTACKDOWN) &&
"Unexpected state change insertion point!");
@@ -742,9 +787,9 @@ void MachineSMEABI::emitRestoreLazySave(EmitContext
&Context,
restorePhyRegSave(RegSave, MBB, MBBI, DL);
}
-void MachineSMEABI::emitZAOff(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- bool ClearTPIDR2) {
+void MachineSMEABI::emitZAMode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ bool ClearTPIDR2, bool On) {
DebugLoc DL = getDebugLoc(MBB, MBBI);
if (ClearTPIDR2)
@@ -755,7 +800,7 @@ void MachineSMEABI::emitZAOff(MachineBasicBlock &MBB,
// Disable ZA.
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1))
.addImm(AArch64SVCR::SVCRZA)
- .addImm(0);
+ .addImm(On ? 1 : 0);
}
void MachineSMEABI::emitAllocateLazySaveBuffer(
@@ -884,6 +929,28 @@ void MachineSMEABI::emitFullZASaveRestore(EmitContext
&Context,
restorePhyRegSave(RegSave, MBB, MBBI, DL);
}
+void MachineSMEABI::emitZT0SaveRestore(EmitContext &Context,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ bool IsSave) {
+ DebugLoc DL = getDebugLoc(MBB, MBBI);
+ Register ZT0Save = MRI->createVirtualRegister(&AArch64::GPR64spRegClass);
+
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), ZT0Save)
+ .addFrameIndex(Context.getZT0SaveSlot(*MF))
+ .addImm(0)
+ .addImm(0);
+
+ if (IsSave) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::STR_TX))
+ .addReg(AArch64::ZT0)
+ .addReg(ZT0Save);
+ } else {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDR_TX), AArch64::ZT0)
+ .addReg(ZT0Save);
+ }
+}
+
void MachineSMEABI::emitAllocateFullZASaveBuffer(
EmitContext &Context, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs) {
@@ -928,6 +995,17 @@ void MachineSMEABI::emitAllocateFullZASaveBuffer(
restorePhyRegSave(RegSave, MBB, MBBI, DL);
}
+struct FromState {
+ ZAState From;
+
+ constexpr uint8_t to(ZAState To) const {
+ static_assert(NUM_ZA_STATE < 16, "expected ZAState to fit in 4-bits");
+ return uint8_t(From) << 4 | uint8_t(To);
+ }
+};
+
+constexpr FromState transitionFrom(ZAState From) { return FromState{From}; }
+
void MachineSMEABI::emitStateChange(EmitContext &Context,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt,
@@ -959,17 +1037,63 @@ void MachineSMEABI::emitStateChange(EmitContext &Context,
From = ZAState::ACTIVE;
}
- if (From == ZAState::ACTIVE && To == ZAState::LOCAL_SAVED)
- emitZASave(Context, MBB, InsertPt, PhysLiveRegs);
- else if (From == ZAState::LOCAL_SAVED && To == ZAState::ACTIVE)
- emitZARestore(Context, MBB, InsertPt, PhysLiveRegs);
- else if (To == ZAState::OFF) {
- assert(From != ZAState::ENTRY &&
- "ENTRY to OFF should have already been handled");
- assert(!SMEFnAttrs.hasAgnosticZAInterface() &&
- "Should not turn ZA off in agnostic ZA function");
- emitZAOff(MBB, InsertPt, /*ClearTPIDR2=*/From == ZAState::LOCAL_SAVED);
- } else {
+ bool IsAgnosticZA = SMEFnAttrs.hasAgnosticZAInterface();
+ bool HasZT0State = SMEFnAttrs.hasZT0State();
+ bool HasZAState = IsAgnosticZA || SMEFnAttrs.hasZAState();
+
+ switch (transitionFrom(From).to(To)) {
+ // This section handles: ACTIVE <-> ACTIVE_ZT0_SAVED
+ case transitionFrom(ZAState::ACTIVE).to(ZAState::ACTIVE_ZT0_SAVED):
+ emitZT0SaveRestore(Context, MBB, InsertPt, /*IsSave=*/true);
+ break;
+ case transitionFrom(ZAState::ACTIVE_ZT0_SAVED).to(ZAState::ACTIVE):
+ emitZT0SaveRestore(Context, MBB, InsertPt, /*IsSave=*/false);
+ break;
+
+ // This section handles: ACTIVE -> LOCAL_SAVED
+ case transitionFrom(ZAState::ACTIVE).to(ZAState::LOCAL_SAVED):
+ if (HasZT0State)
+ emitZT0SaveRestore(Context, MBB, InsertPt, /*IsSave=*/true);
+ if (HasZAState)
+ emitZASave(Context, MBB, InsertPt, PhysLiveRegs);
+ break;
+
+ // This section handles: ACTIVE -> LOCAL_COMMITTED
+ case transitionFrom(ZAState::ACTIVE).to(ZAState::LOCAL_COMMITTED):
+ // Note: We could support ZA state here, but this transition is currently
+ // only possible when we _don't_ have ZA state.
+ assert(HasZT0State && !HasZAState && "Expect to only have ZT0 state.");
+ emitZT0SaveRestore(Context, MBB, InsertPt, /*IsSave=*/true);
+ emitZAMode(MBB, InsertPt, /*ClearTPIDR2=*/false, /*On=*/false);
+ break;
+
+ // This section handles: LOCAL_COMMITTED -> (OFF|LOCAL_SAVED)
+ case transitionFrom(ZAState::LOCAL_COMMITTED).to(ZAState::OFF):
+ case transitionFrom(ZAState::LOCAL_COMMITTED).to(ZAState::LOCAL_SAVED):
+ // These transistions are a no-op.
+ break;
+
+ // This section handles: LOCAL_(SAVED|COMMITTED) -> ACTIVE[_ZT0_SAVED]
+ case transitionFrom(ZAState::LOCAL_COMMITTED).to(ZAState::ACTIVE):
+ case transitionFrom(ZAState::LOCAL_COMMITTED).to(ZAState::ACTIVE_ZT0_SAVED):
+ case transitionFrom(ZAState::LOCAL_SAVED).to(ZAState::ACTIVE):
+ if (HasZAState)
+ emitZARestore(Context, MBB, InsertPt, PhysLiveRegs);
+ else
+ emitZAMode(MBB, InsertPt, /*ClearTPIDR2=*/false, /*On=*/true);
+ if (HasZT0State && To == ZAState::ACTIVE)
+ emitZT0SaveRestore(Context, MBB, InsertPt, /*IsSave=*/false);
+ break;
+ default:
+ if (To == ZAState::OFF) {
+ assert(From != ZAState::ENTRY &&
+ "ENTRY to OFF should have already been handled");
+ assert(!AFI->getSMEFnAttrs().hasAgnosticZAInterface() &&
+ "Should not turn ZA off in agnostic ZA function");
+ emitZAMode(MBB, InsertPt, /*ClearTPIDR2=*/From == ZAState::LOCAL_SAVED,
+ /*On=*/false);
+ break;
+ }
dbgs() << "Error: Transition from " << getZAStateString(From) << " to "
<< getZAStateString(To) << '\n';
llvm_unreachable("Unimplemented state transition");
diff --git a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
index ced0d41c22dab..f4a3b55e49cd7 100644
--- a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
+++ b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
@@ -230,10 +230,6 @@ define void @test7() nounwind "aarch64_inout_zt0" {
; CHECK-NEXT: str zt0, [x19]
; CHECK-NEXT: smstop za
; CHECK-NEXT: bl callee
-; CHECK-NEXT: smstart za
-; CHECK-NEXT: ldr zt0, [x19]
-; CHECK-NEXT: str zt0, [x19]
-; CHECK-NEXT: smstop za
; CHECK-NEXT: bl callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: ldr zt0, [x19]
diff --git a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
index dcdc56c669077..f219b1169af01 100644
--- a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
+++ b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
@@ -511,7 +511,6 @@ exit:
;
; This code may require reloading ZT0 in the cleanup for ~ZT0Resource().
;
-; FIXME: Codegen with `-aarch64-new-sme-abi` is broken with ZT0 (as it is not
implemented).
define void @try_catch_shared_zt0_callee() "aarch64_inout_zt0" personality ptr
@__gxx_personality_v0 {
; CHECK-LABEL: try_catch_shared_zt0_callee:
; CHECK: .Lfunc_begin3:
@@ -519,52 +518,37 @@ define void @try_catch_shared_zt0_callee()
"aarch64_inout_zt0" personality ptr @
; CHECK-NEXT: .cfi_personality 156, DW.ref.__gxx_personality_v0
; CHECK-NEXT: .cfi_lsda 28, .Lexception3
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: mov x29, sp
-; CHECK-NEXT: sub sp, sp, #80
-; CHECK-NEXT: .cfi_def_cfa w29, 32
+; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 96
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
-; CHECK-NEXT: .cfi_offset w30, -24
-; CHECK-NEXT: .cfi_offset w29, -32
-; CHECK-NEXT: rdsvl x8, #1
-; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: msub x9, x8, x8, x9
-; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: stp x9, x8, [x29, #-80]
+; CHECK-NEXT: .cfi_offset w30, -32
; CHECK-NEXT: .Ltmp9: // EH_LABEL
-; CHECK-NEXT: sub x19, x29, #64
+; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: str zt0, [x19]
; CHECK-NEXT: smstop za
; CHECK-NEXT: bl may_throw
+; CHECK-NEXT: .Ltmp10: // EH_LABEL
; CHECK-NEXT: smstart za
; CHECK-NEXT: ldr zt0, [x19]
-; CHECK-NEXT: .Ltmp10: // EH_LABEL
; CHECK-NEXT: // %bb.1: // %return_normally
-; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #96
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB3_2: // %unwind_dtors
; CHECK-NEXT: .Ltmp11: // EH_LABEL
-; CHECK-NEXT: sub x20, x29, #64
+; CHECK-NEXT: mov x20, sp
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstart za
-; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #80
-; CHECK-NEXT: cbnz x8, .LBB3_4
-; CHECK-NEXT: // %bb.3: // %unwind_dtors
-; CHECK-NEXT: bl __arm_tpidr2_restore
-; CHECK-NEXT: .LBB3_4: // %unwind_dtors
-; CHECK-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-NEXT: ldr zt0, [x20]
; CHECK-NEXT: bl shared_zt0_call
; CHECK-NEXT: str zt0, [x20]
; CHECK-NEXT: smstop za
; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl _Unwind_Resume
-; CHECK-NEXT: smstart za
-; CHECK-NEXT: ldr zt0, [x20]
;
; CHECK-SDAG-LABEL: try_catch_shared_zt0_callee:
; CHECK-SDAG: .Lfunc_begin3:
@@ -965,6 +949,90 @@ exit:
ret void
}
+define void @try_catch_inout_zt0() "aarch64_inout_zt0" personality ptr
@__gxx_personality_v0 {
+; CHECK-LABEL: try_catch_inout_zt0:
+; CHECK: .Lfunc_begin7:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: .cfi_personality 156, DW.ref.__gxx_personality_v0
+; CHECK-NEXT: .cfi_lsda 28, .Lexception7
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: .Ltmp21: // EH_LABEL
+; CHECK-NEXT: mov x19, sp
+; CHECK-NEXT: str zt0, [x19]
+; CHECK-NEXT: smstop za
+; CHECK-NEXT: bl may_throw
+; CHECK-NEXT: .Ltmp22: // EH_LABEL
+; CHECK-NEXT: .LBB7_1: // %exit
+; CHECK-NEXT: smstart za
+; CHECK-NEXT: ldr zt0, [x19]
+; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #80
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB7_2: // %catch
+; CHECK-NEXT: .Ltmp23: // EH_LABEL
+; CHECK-NEXT: bl __cxa_begin_catch
+; CHECK-NEXT: bl __cxa_end_catch
+; CHECK-NEXT: b .LBB7_1
+;
+; CHECK-SDAG-LABEL: try_catch_inout_zt0:
+; CHECK-SDAG: .Lfunc_begin7:
+; CHECK-SDAG-NEXT: .cfi_startproc
+; CHECK-SDAG-NEXT: .cfi_personality 156, DW.ref.__gxx_personality_v0
+; CHECK-SDAG-NEXT: .cfi_lsda 28, .Lexception7
+; CHECK-SDAG-NEXT: // %bb.0: // %entry
+; CHECK-SDAG-NEXT: sub sp, sp, #80
+; CHECK-SDAG-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 80
+; CHECK-SDAG-NEXT: .cfi_offset w19, -8
+; CHECK-SDAG-NEXT: .cfi_offset w30, -16
+; CHECK-SDAG-NEXT: .Ltmp21: // EH_LABEL
+; CHECK-SDAG-NEXT: mov x19, sp
+; CHECK-SDAG-NEXT: str zt0, [x19]
+; CHECK-SDAG-NEXT: smstop za
+; CHECK-SDAG-NEXT: bl may_throw
+; CHECK-SDAG-NEXT: smstart za
+; CHECK-SDAG-NEXT: ldr zt0, [x19]
+; CHECK-SDAG-NEXT: .Ltmp22: // EH_LABEL
+; CHECK-SDAG-NEXT: .LBB7_1: // %exit
+; CHECK-SDAG-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-SDAG-NEXT: add sp, sp, #80
+; CHECK-SDAG-NEXT: ret
+; CHECK-SDAG-NEXT: .LBB7_2: // %catch
+; CHECK-SDAG-NEXT: .Ltmp23: // EH_LABEL
+; CHECK-SDAG-NEXT: smstart za
+; CHECK-SDAG-NEXT: ldr zt0, [x19]
+; CHECK-SDAG-NEXT: str zt0, [x19]
+; CHECK-SDAG-NEXT: smstop za
+; CHECK-SDAG-NEXT: bl __cxa_begin_catch
+; CHECK-SDAG-NEXT: smstart za
+; CHECK-SDAG-NEXT: ldr zt0, [x19]
+; CHECK-SDAG-NEXT: str zt0, [x19]
+; CHECK-SDAG-NEXT: smstop za
+; CHECK-SDAG-NEXT: bl __cxa_end_catch
+; CHECK-SDAG-NEXT: smstart za
+; CHECK-SDAG-NEXT: ldr zt0, [x19]
+; CHECK-SDAG-NEXT: b .LBB7_1
+entry:
+ invoke void @may_throw()
+ to label %exit unwind label %catch
+
+catch:
+ %eh_info = landingpad { ptr, i32 }
+ catch ptr null
+ %exception_ptr = extractvalue { ptr, i32 } %eh_info, 0
+ tail call ptr @__cxa_begin_catch(ptr %exception_ptr)
+ tail call void @__cxa_end_catch()
+ br label %exit
+
+exit:
+ ret void
+}
+
declare ptr @__cxa_allocate_exception(i64)
declare void @__cxa_throw(ptr, ptr, ptr)
declare ptr @__cxa_begin_catch(ptr)
diff --git a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
index 4c48e41294a3a..e8f4f6ed78b9c 100644
--- a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
+++ b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
@@ -193,7 +193,7 @@ define void @zt0_new_caller_zt0_new_callee(ptr %callee)
"aarch64_new_zt0" nounwi
; CHECK-NEWLOWERING-LABEL: zt0_new_caller_zt0_new_callee:
; CHECK-NEWLOWERING: // %bb.0:
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #80
-; CHECK-NEWLOWERING-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEWLOWERING-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: cbz x8, .LBB6_2
; CHECK-NEWLOWERING-NEXT: // %bb.1:
@@ -202,14 +202,11 @@ define void @zt0_new_caller_zt0_new_callee(ptr %callee)
"aarch64_new_zt0" nounwi
; CHECK-NEWLOWERING-NEXT: zero { zt0 }
; CHECK-NEWLOWERING-NEXT: .LBB6_2:
; CHECK-NEWLOWERING-NEXT: smstart za
-; CHECK-NEWLOWERING-NEXT: mov x19, sp
-; CHECK-NEWLOWERING-NEXT: str zt0, [x19]
+; CHECK-NEWLOWERING-NEXT: mov x8, sp
+; CHECK-NEWLOWERING-NEXT: str zt0, [x8]
; CHECK-NEWLOWERING-NEXT: smstop za
; CHECK-NEWLOWERING-NEXT: blr x0
-; CHECK-NEWLOWERING-NEXT: smstart za
-; CHECK-NEWLOWERING-NEXT: ldr zt0, [x19]
-; CHECK-NEWLOWERING-NEXT: smstop za
-; CHECK-NEWLOWERING-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEWLOWERING-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: add sp, sp, #80
; CHECK-NEWLOWERING-NEXT: ret
call void %callee() "aarch64_new_zt0";
@@ -246,7 +243,7 @@ define i64 @zt0_new_caller_abi_routine_callee()
"aarch64_new_zt0" nounwind {
; CHECK-NEWLOWERING-LABEL: zt0_new_caller_abi_routine_callee:
; CHECK-NEWLOWERING: // %bb.0:
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #80
-; CHECK-NEWLOWERING-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEWLOWERING-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: cbz x8, .LBB7_2
; CHECK-NEWLOWERING-NEXT: // %bb.1:
@@ -255,12 +252,11 @@ define i64 @zt0_new_caller_abi_routine_callee()
"aarch64_new_zt0" nounwind {
; CHECK-NEWLOWERING-NEXT: zero { zt0 }
; CHECK-NEWLOWERING-NEXT: .LBB7_2:
; CHECK-NEWLOWERING-NEXT: smstart za
-; CHECK-NEWLOWERING-NEXT: mov x19, sp
-; CHECK-NEWLOWERING-NEXT: str zt0, [x19]
-; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state
-; CHECK-NEWLOWERING-NEXT: ldr zt0, [x19]
+; CHECK-NEWLOWERING-NEXT: mov x8, sp
+; CHECK-NEWLOWERING-NEXT: str zt0, [x8]
; CHECK-NEWLOWERING-NEXT: smstop za
-; CHECK-NEWLOWERING-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state
+; CHECK-NEWLOWERING-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: add sp, sp, #80
; CHECK-NEWLOWERING-NEXT: ret
%res = call {i64, i64} @__arm_sme_state()
@@ -382,37 +378,57 @@ define void @shared_za_new_zt0(ptr %callee)
"aarch64_inout_za" "aarch64_new_zt0"
define void @zt0_multiple_private_za_calls(ptr %callee) "aarch64_in_zt0"
nounwind {
-; CHECK-COMMON-LABEL: zt0_multiple_private_za_calls:
-; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: sub sp, sp, #96
-; CHECK-COMMON-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-COMMON-NEXT: mov x20, sp
-; CHECK-COMMON-NEXT: mov x19, x0
-; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; CHECK-COMMON-NEXT: str zt0, [x20]
-; CHECK-COMMON-NEXT: smstop za
-; CHECK-COMMON-NEXT: blr x0
-; CHECK-COMMON-NEXT: smstart za
-; CHECK-COMMON-NEXT: ldr zt0, [x20]
-; CHECK-COMMON-NEXT: str zt0, [x20]
-; CHECK-COMMON-NEXT: smstop za
-; CHECK-COMMON-NEXT: blr x19
-; CHECK-COMMON-NEXT: smstart za
-; CHECK-COMMON-NEXT: ldr zt0, [x20]
-; CHECK-COMMON-NEXT: str zt0, [x20]
-; CHECK-COMMON-NEXT: smstop za
-; CHECK-COMMON-NEXT: blr x19
-; CHECK-COMMON-NEXT: smstart za
-; CHECK-COMMON-NEXT: ldr zt0, [x20]
-; CHECK-COMMON-NEXT: str zt0, [x20]
-; CHECK-COMMON-NEXT: smstop za
-; CHECK-COMMON-NEXT: blr x19
-; CHECK-COMMON-NEXT: smstart za
-; CHECK-COMMON-NEXT: ldr zt0, [x20]
-; CHECK-COMMON-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-COMMON-NEXT: add sp, sp, #96
-; CHECK-COMMON-NEXT: ret
+; CHECK-LABEL: zt0_multiple_private_za_calls:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: mov x20, sp
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT: str zt0, [x20]
+; CHECK-NEXT: smstop za
+; CHECK-NEXT: blr x0
+; CHECK-NEXT: smstart za
+; CHECK-NEXT: ldr zt0, [x20]
+; CHECK-NEXT: str zt0, [x20]
+; CHECK-NEXT: smstop za
+; CHECK-NEXT: blr x19
+; CHECK-NEXT: smstart za
+; CHECK-NEXT: ldr zt0, [x20]
+; CHECK-NEXT: str zt0, [x20]
+; CHECK-NEXT: smstop za
+; CHECK-NEXT: blr x19
+; CHECK-NEXT: smstart za
+; CHECK-NEXT: ldr zt0, [x20]
+; CHECK-NEXT: str zt0, [x20]
+; CHECK-NEXT: smstop za
+; CHECK-NEXT: blr x19
+; CHECK-NEXT: smstart za
+; CHECK-NEXT: ldr zt0, [x20]
+; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: ret
+;
+; CHECK-NEWLOWERING-LABEL: zt0_multiple_private_za_calls:
+; CHECK-NEWLOWERING: // %bb.0:
+; CHECK-NEWLOWERING-NEXT: sub sp, sp, #96
+; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEWLOWERING-NEXT: mov x20, sp
+; CHECK-NEWLOWERING-NEXT: mov x19, x0
+; CHECK-NEWLOWERING-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEWLOWERING-NEXT: str zt0, [x20]
+; CHECK-NEWLOWERING-NEXT: smstop za
+; CHECK-NEWLOWERING-NEXT: blr x0
+; CHECK-NEWLOWERING-NEXT: blr x19
+; CHECK-NEWLOWERING-NEXT: blr x19
+; CHECK-NEWLOWERING-NEXT: blr x19
+; CHECK-NEWLOWERING-NEXT: smstart za
+; CHECK-NEWLOWERING-NEXT: ldr zt0, [x20]
+; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEWLOWERING-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEWLOWERING-NEXT: add sp, sp, #96
+; CHECK-NEWLOWERING-NEXT: ret
call void %callee()
call void %callee()
call void %callee()
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits