https://github.com/MacDue approved this pull request.
LGTM :+1:
https://github.com/llvm/llvm-project/pull/86098
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
@@ -316,6 +317,9 @@ class ValueBoundsConstraintSet {
/// Builder for constructing affine expressions.
Builder builder;
+
+ /// The current stop condition function.
+ StopConditionFn stopCondition = nullptr;
MacDue wrote:
Just wondering if this should be
https://github.com/MacDue approved this pull request.
https://github.com/llvm/llvm-project/pull/86099
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
https://github.com/MacDue approved this pull request.
https://github.com/llvm/llvm-project/pull/87976
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
@@ -1416,14 +1466,14 @@ void VPlanTransforms::addActiveLaneMask(
auto *FoundWidenCanonicalIVUser =
find_if(Plan.getCanonicalIV()->users(),
[](VPUser *U) { return isa(U); });
- assert(FoundWidenCanonicalIVUser &&
+ assert(FoundWidenCanonicalIVUser && *Fou
@@ -77,9 +77,13 @@ struct VPlanTransforms {
/// creation) and instead it is handled using active-lane-mask. \p
/// DataAndControlFlowWithoutRuntimeCheck implies \p
/// UseActiveLaneMaskForControlFlow.
+ /// RTChecks refers to the pointer pairs that need aliasing elements
@@ -3073,6 +3075,56 @@ struct VPWidenStoreEVLRecipe final : public
VPWidenMemoryRecipe {
}
};
+// Given a pointer A that is being stored to, and pointer B that is being
+// read from, both with unknown lengths, create a mask that disables
+// elements which could overlap ac
@@ -427,6 +428,29 @@ Value *VPInstruction::generate(VPTransformState &State) {
{PredTy, ScalarTC->getType()},
{VIVElem0, ScalarTC}, nullptr, Name);
}
+ // Count the number of bits set in each lane and redu
@@ -1300,14 +1301,38 @@ static VPActiveLaneMaskPHIRecipe
*addVPLaneMaskPhiAndUpdateExitBranch(
cast(CanonicalIVPHI->getBackedgeValue());
// TODO: Check if dropping the flags is needed if
// !DataAndControlFlowWithoutRuntimeCheck.
+ VPValue *IncVal = CanonicalIVIncre
@@ -195,6 +195,13 @@ enum class TailFoldingStyle {
DataWithEVL,
};
+enum class RTCheckStyle {
+ /// Branch to scalar loop if checks fails at runtime.
+ ScalarFallback,
+ /// Form a mask based on elements which won't be a WAR or RAW hazard
MacDue wrote:
u
https://github.com/MacDue edited
https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
@@ -1331,14 +1356,37 @@ static VPActiveLaneMaskPHIRecipe
*addVPLaneMaskPhiAndUpdateExitBranch(
"index.part.next");
// Create the active lane mask instruction in the VPlan preheader.
- auto *EntryALM =
+ VPValue *Mask =
Builder.createNaryOp(VPInstruction::Acti
@@ -3235,6 +3263,36 @@ void VPWidenPointerInductionRecipe::print(raw_ostream
&O, const Twine &Indent,
}
#endif
+void VPAliasLaneMaskRecipe::execute(VPTransformState &State) {
+ IRBuilderBase Builder = State.Builder;
+ Value *SinkValue = State.get(getSinkValue(), true);
+ V
@@ -952,7 +952,6 @@ void VPlan::prepareToExecute(Value *TripCountV, Value
*VectorTripCountV,
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
// FIXME: Model VF * UF computation completely in VPlan.
- assert(VFxUF.getNumUsers() && "VFxUF expected to always have us
@@ -3235,6 +3263,36 @@ void VPWidenPointerInductionRecipe::print(raw_ostream
&O, const Twine &Indent,
}
#endif
+void VPAliasLaneMaskRecipe::execute(VPTransformState &State) {
+ IRBuilderBase Builder = State.Builder;
+ Value *SinkValue = State.get(getSinkValue(), true);
+ V
https://github.com/MacDue commented:
A bunch of little comments (mostly just nitpicks from a pass over the PR)
:slightly_smiling_face:
https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llv
@@ -418,7 +418,13 @@ class LoopVectorizationPlanner {
/// Build VPlans for the specified \p UserVF and \p UserIC if they are
/// non-zero or all applicable candidate VFs otherwise. If vectorization and
/// interleaving should be avoided up-front, no plans are generated.
-
https://github.com/MacDue edited
https://github.com/llvm/llvm-project/pull/126503
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
https://github.com/MacDue edited
https://github.com/llvm/llvm-project/pull/126503
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
https://github.com/MacDue milestoned
https://github.com/llvm/llvm-project/pull/126503
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
https://github.com/MacDue edited
https://github.com/llvm/llvm-project/pull/126503
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
@@ -253,38 +253,38 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr
readonly %a, ptr readonly %b
; CHECK-MAXBW-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]])
#[[ATTR1:[0-9]+]] {
; CHECK-MAXBW-NEXT: entry:
; CHECK-MAXBW-NEXT:[[TMP0:%.*]] = call i64 @llvm.vsca
@@ -2376,6 +2327,59 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
}
};
+/// A recipe for forming partial reductions. In the loop, an accumulator and
+/// vector operand are added together and passed to the next iteration as the
+/// next accumulator. After the lo
https://github.com/MacDue edited
https://github.com/llvm/llvm-project/pull/136173
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
https://github.com/MacDue created
https://github.com/llvm/llvm-project/pull/137683
This cherry-picks https://github.com/llvm/llvm-project/pull/132722 and
https://github.com/llvm/llvm-project/pull/136726 (the latter is based on the
former).
These patches are needed to prevent invalid codegen
https://github.com/MacDue milestoned
https://github.com/llvm/llvm-project/pull/137683
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
MacDue wrote:
@sdesmalen-arm What do you think about merging this PR to the release branch?
https://github.com/llvm/llvm-project/pull/137683
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/
@@ -4923,9 +4923,7 @@ InstructionCost AArch64TTIImpl::getPartialReductionCost(
return Invalid;
break;
case 16:
- if (AccumEVT == MVT::i64)
-Cost *= 2;
- else if (AccumEVT != MVT::i32)
+ if (AccumEVT != MVT::i32)
MacDue w
@@ -4923,9 +4923,7 @@ InstructionCost AArch64TTIImpl::getPartialReductionCost(
return Invalid;
break;
case 16:
- if (AccumEVT == MVT::i64)
-Cost *= 2;
- else if (AccumEVT != MVT::i32)
+ if (AccumEVT != MVT::i32)
MacDue w
MacDue wrote:
Note: This patch is a minor improvement to placing saves/restores. For more
complex programs, we will need to propagate required ZA states through blocks
with "no preference" to make better decisions.
https://github.com/llvm/llvm-project/pull/149065
_
MacDue wrote:
This is a stacked PR. See other PRs below:
1. https://github.com/llvm/llvm-project/pull/149062
2. https://github.com/llvm/llvm-project/pull/149063
3. :point_right: https://github.com/llvm/llvm-project/pull/149064
4. https://github.com/llvm/llvm-project/pull/149065
https://github.c
https://github.com/MacDue created
https://github.com/llvm/llvm-project/pull/149063
On Windows or with stack probes on other targets, additional code needs to be
inserted after dynamic stack allocations to validate stack accesses and/or
ensure enough stack space has been allocated.
Rather than
MacDue wrote:
This is a stacked PR. See other PRs below:
1. https://github.com/llvm/llvm-project/pull/149062
2. :point_right: https://github.com/llvm/llvm-project/pull/149063
3. https://github.com/llvm/llvm-project/pull/149064
4. https://github.com/llvm/llvm-project/pull/149065
https://github.c
https://github.com/MacDue created
https://github.com/llvm/llvm-project/pull/149064
This extends the MachineSMEABIPass to handle agnostic ZA functions. This case
is currently handled like shared ZA functions, but we don't require ZA state to
be reloaded before agnostic ZA calls.
Note: This pat
https://github.com/MacDue created
https://github.com/llvm/llvm-project/pull/149065
This patch uses the MachineLoopInfo to give blocks within loops a higher weight
when choosing the bundle ZA state. MachineLoopInfo does not find loop trip
counts, so this uses an arbitrary weight (default 10), w
https://github.com/MacDue edited
https://github.com/llvm/llvm-project/pull/149064
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
https://github.com/MacDue ready_for_review
https://github.com/llvm/llvm-project/pull/149063
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
https://github.com/MacDue ready_for_review
https://github.com/llvm/llvm-project/pull/149064
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
https://github.com/MacDue ready_for_review
https://github.com/llvm/llvm-project/pull/149065
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
MacDue wrote:
This is a stacked PR. See other PRs below:
1. https://github.com/llvm/llvm-project/pull/149062
2. https://github.com/llvm/llvm-project/pull/149063
3. https://github.com/llvm/llvm-project/pull/149064
4. :point_right: https://github.com/llvm/llvm-project/pull/149065
https://github.
MacDue wrote:
> On a side-note, doing inline stack probes on Windows, instead of using
> __chkstk, is allowed; __chkstk is just faster for large allocations because
> it caches the size of the stack. Not sure if that changes what you want to do
> here.
What is allowed? I think part of your me
MacDue wrote:
Thanks :+1: My current thought is that it does not change anything for this
patch (which mainly aims to restore the functionality that existed with the old
lowering, which solved this the same way). However, it could help simplify the
implementation if we move the handling for Wi
MacDue wrote:
I think it's reasonably safe given the general case (without hazard padding) is
well used and tested, and there's been no issues reported since this landed a
few weeks back.
https://github.com/llvm/llvm-project/pull/144693
___
llvm-bran
https://github.com/MacDue updated
https://github.com/llvm/llvm-project/pull/142391
>From 7b0003f249619b6b584d8a6501a2c6048deb1843 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell
Date: Thu, 8 May 2025 17:38:27 +
Subject: [PATCH 1/3] [AArch64] Prepare for split ZPR and PPR area allocation
(N
@@ -299,14 +297,20 @@ class AArch64FunctionInfo final : public
MachineFunctionInfo {
TailCallReservedStack = bytes;
}
- bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; }
+ void setStackSizeZPR(uint64_t S) {
+HasCalculatedStackSizeSVE = t
MacDue wrote:
There's going to be a 20.1.8
https://discourse.llvm.org/t/llvm-20-1-8-plans/87207
https://github.com/llvm/llvm-project/pull/147171
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/ma
@@ -19,6 +19,11 @@
namespace llvm {
+struct SVEStackSizes {
MacDue wrote:
The return value of `determineSVEStackObjectOffsets()` is the SVE stack sizes
(the values will be >= 0).
https://github.com/llvm/llvm-project/pull/142391
___
@@ -849,17 +849,46 @@ SDValue
DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults(
SetSoftenedFloat(SDValue(N, ResNum), CreateStackLoad(SlackSlot));
}
- return SDValue();
+ return true;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSINCOS(SDNode *N) {
- return S
https://github.com/MacDue approved this pull request.
The logic here LGTM, can't vouch for the WASM tests though.
https://github.com/llvm/llvm-project/pull/147468
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.ll
@@ -849,17 +849,46 @@ SDValue
DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults(
SetSoftenedFloat(SDValue(N, ResNum), CreateStackLoad(SlackSlot));
}
- return SDValue();
+ return true;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSINCOS(SDNode *N) {
- return S
https://github.com/MacDue edited
https://github.com/llvm/llvm-project/pull/147468
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
@@ -19,6 +19,11 @@
namespace llvm {
+struct SVEStackSizes {
MacDue wrote:
e.g.: https://godbolt.org/z/hcWWco8Yj
https://github.com/llvm/llvm-project/pull/142391
___
llvm-branch-commits mailing list
llvm-branch-com
@@ -19,6 +19,11 @@
namespace llvm {
+struct SVEStackSizes {
MacDue wrote:
I think it's best to keep the value signed as the sizes are used to derive
signed offsets, and mixing signed/unsigned can sometimes lead to unexpected
results.
https://github.com/ll
https://github.com/MacDue updated
https://github.com/llvm/llvm-project/pull/142391
>From 8664a890ef0567b92b82fd8eee8b69914fac678b Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell
Date: Thu, 8 May 2025 17:38:27 +
Subject: [PATCH 1/4] [AArch64] Prepare for split ZPR and PPR area allocation
(N
https://github.com/MacDue updated
https://github.com/llvm/llvm-project/pull/142391
>From 8664a890ef0567b92b82fd8eee8b69914fac678b Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell
Date: Thu, 8 May 2025 17:38:27 +
Subject: [PATCH 1/5] [AArch64] Prepare for split ZPR and PPR area allocation
(N
@@ -4308,26 +4398,33 @@ static int64_t
determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
"reference.");
#endif
- auto Assign = [&MFI](int FI, int64_t Offset) {
+ auto StackForObject = [&](int FI, uint64_t &ZPRStackTop,
+uint64_t &P
@@ -19,6 +19,11 @@
namespace llvm {
+struct SVEStackSizes {
MacDue wrote:
The usage I was referring to was within `determineSVEStackObjectOffsets()` for
`ZPROffset` and `PPROffset`, which based on another suggestion uses the values
from the `SVEStackSizes`
https://github.com/MacDue updated
https://github.com/llvm/llvm-project/pull/142391
>From 8664a890ef0567b92b82fd8eee8b69914fac678b Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell
Date: Thu, 8 May 2025 17:38:27 +
Subject: [PATCH 1/7] [AArch64] Prepare for split ZPR and PPR area allocation
(N
@@ -4694,12 +4790,8 @@ void
AArch64FrameLowering::processFunctionBeforeFrameFinalized(
assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
"Upwards growing stack unsupported");
- int MinCSFrameIndex, MaxCSFrameIndex;
- int64_t SVEStackSize
https://github.com/MacDue edited
https://github.com/llvm/llvm-project/pull/142391
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
https://github.com/MacDue edited
https://github.com/llvm/llvm-project/pull/142391
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
@@ -4296,10 +4372,20 @@ static bool getSVECalleeSaveSlotRange(const
MachineFrameInfo &MFI,
// Fills in the first and last callee-saved frame indices into
// Min/MaxCSFrameIndex, respectively.
// Returns the size of the stack.
-static int64_t determineSVEStackObjectOffsets(Mach
https://github.com/MacDue edited
https://github.com/llvm/llvm-project/pull/142391
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
https://github.com/MacDue updated
https://github.com/llvm/llvm-project/pull/142391
>From 8664a890ef0567b92b82fd8eee8b69914fac678b Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell
Date: Thu, 8 May 2025 17:38:27 +
Subject: [PATCH 1/8] [AArch64] Prepare for split ZPR and PPR area allocation
(N
@@ -4308,26 +4398,33 @@ static int64_t
determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
"reference.");
#endif
- auto Assign = [&MFI](int FI, int64_t Offset) {
+ auto StackForObject = [&](int FI, uint64_t &ZPRStackTop,
+uint64_t &P
https://github.com/MacDue approved this pull request.
https://github.com/llvm/llvm-project/pull/146691
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
@@ -96,6 +432,1015 @@ define { <3 x float>, <3 x i32> }
@test_frexp_v3f16_v3i32(<3 x float> %a) {
; CHECK-NEXT:mov v0.16b, v2.16b
; CHECK-NEXT:add sp, sp, #80
; CHECK-NEXT:ret
+;
+; WINDOWS-LABEL: test_frexp_v3f16_v3i32:
+; WINDOWS: .seh_proc test_frexp_v3f16
https://github.com/MacDue updated
https://github.com/llvm/llvm-project/pull/149064
>From af4a764539b38cac8a2b83b326f85da8e403289f Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell
Date: Tue, 15 Jul 2025 11:47:48 +
Subject: [PATCH] [AArch64][SME] Support agnostic ZA functions in the
MachineSM
https://github.com/MacDue updated
https://github.com/llvm/llvm-project/pull/149065
>From 2c9e14c536635939b7e162e9277911e73fd84283 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell
Date: Tue, 15 Jul 2025 11:48:03 +
Subject: [PATCH] [AArch64][SME] Avoid ZA save state changes in loops in
Machin
https://github.com/MacDue updated
https://github.com/llvm/llvm-project/pull/149063
>From 4250bec812603b7520dc36f26d68a2c3834ad6cd Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell
Date: Tue, 15 Jul 2025 11:47:39 +
Subject: [PATCH] [AArch64][SME] Support Windows/stack probes in
MachineSMEABIP
https://github.com/MacDue created
https://github.com/llvm/llvm-project/pull/149510
This patch adds a propagation step to the MachineSMEABIPass that propagates
desired ZA states forwards (from predecessors to successors).
The aim of this is to pick better ZA states for edge bundles, as when man
https://github.com/MacDue updated
https://github.com/llvm/llvm-project/pull/149510
>From c2d34149b2860cadf03824cc35a724775aaf60f8 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell
Date: Tue, 15 Jul 2025 17:00:04 +
Subject: [PATCH] [AArch64][SME] Propagate desired ZA states in the
MachineSMEA
https://github.com/MacDue updated
https://github.com/llvm/llvm-project/pull/149063
>From 4250bec812603b7520dc36f26d68a2c3834ad6cd Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell
Date: Tue, 15 Jul 2025 11:47:39 +
Subject: [PATCH 1/2] [AArch64][SME] Support Windows/stack probes in
MachineSME
@@ -465,23 +473,25 @@ void MachineSMEABI::emitAllocateLazySaveBuffer(
auto &Subtarget = MF.getSubtarget();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
+ auto *AFI = MF.getInfo();
DebugLoc DL = getDebugLoc(MBB,
https://github.com/MacDue updated
https://github.com/llvm/llvm-project/pull/149510
>From b3c21834c11b5ac3edb5fa0c819b5cd5f49be1af Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell
Date: Tue, 15 Jul 2025 17:00:04 +
Subject: [PATCH] [AArch64][SME] Propagate desired ZA states in the
MachineSMEA
https://github.com/MacDue created
https://github.com/llvm/llvm-project/pull/142391
This patch attempts to refactor AArch64FrameLowering to allow the size of the
ZPR and PPR areas to be calculated separately. This will be used by a
subsequent patch to support allocating ZPRs and PPRs to separat
https://github.com/MacDue edited
https://github.com/llvm/llvm-project/pull/142392
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
MacDue wrote:
This is a stacked PR. Please see the final PR for context.
1. https://github.com/llvm/llvm-project/pull/142390
2. :point_right: https://github.com/llvm/llvm-project/pull/142391
3. https://github.com/llvm/llvm-project/pull/142392
https://github.com/llvm/llvm-project/pull/142391
__
https://github.com/MacDue ready_for_review
https://github.com/llvm/llvm-project/pull/142391
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
https://github.com/MacDue ready_for_review
https://github.com/llvm/llvm-project/pull/142392
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
MacDue wrote:
This is a stacked PR. See previous PRs below:
1. https://github.com/llvm/llvm-project/pull/142390
2. https://github.com/llvm/llvm-project/pull/142391
3. :point_right: https://github.com/llvm/llvm-project/pull/142392
https://github.com/llvm/llvm-project/pull/142392
__
MacDue wrote:
> In the implementation you're interested in, is there not a hazard between
> PPRs and GPRs?
There are no hazards between PPRs and GPRs (those types of memory accesses can
both be considered as occurring on the CPU).
> What's the interaction between this and aarch64-enable-zpr-
https://github.com/MacDue updated
https://github.com/llvm/llvm-project/pull/142391
>From 55bd461f342d5dcca49b2bac2f2142be9214823a Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell
Date: Thu, 8 May 2025 17:38:27 +
Subject: [PATCH] [AArch64] Prepare for split ZPR and PPR area allocation
(NFCI)
https://github.com/MacDue updated
https://github.com/llvm/llvm-project/pull/142391
>From 7b0003f249619b6b584d8a6501a2c6048deb1843 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell
Date: Thu, 8 May 2025 17:38:27 +
Subject: [PATCH 1/2] [AArch64] Prepare for split ZPR and PPR area allocation
(N
https://github.com/MacDue updated
https://github.com/llvm/llvm-project/pull/149064
>From 323b82187d616ec0f45791f348a776abd9c7f9d8 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell
Date: Tue, 15 Jul 2025 11:47:48 +
Subject: [PATCH] [AArch64][SME] Support agnostic ZA functions in the
MachineSM
https://github.com/MacDue updated
https://github.com/llvm/llvm-project/pull/149065
>From f182168eed68f0f61e2417fa7865e0dab233ba80 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell
Date: Tue, 15 Jul 2025 11:48:03 +
Subject: [PATCH] [AArch64][SME] Avoid ZA save state changes in loops in
Machin
https://github.com/MacDue updated
https://github.com/llvm/llvm-project/pull/149063
>From 5539ca71f5adca55780ec1321102a87700c136b8 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell
Date: Tue, 15 Jul 2025 11:47:39 +
Subject: [PATCH 1/2] [AArch64][SME] Support Windows/stack probes in
MachineSME
https://github.com/MacDue updated
https://github.com/llvm/llvm-project/pull/149510
>From 0497093e683ab2b167060bbab1b19973e8422e1e Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell
Date: Tue, 15 Jul 2025 17:00:04 +
Subject: [PATCH] [AArch64][SME] Propagate desired ZA states in the
MachineSMEA
MacDue wrote:
Not sure why the bot is asking me (I think it's fine, but I requested the
backport).
cc @arsenm, @RKSimon
https://github.com/llvm/llvm-project/pull/140703
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https
https://github.com/MacDue updated
https://github.com/llvm/llvm-project/pull/142391
>From 42af819d001699ae6361d51e34d76a06fe956250 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell
Date: Thu, 8 May 2025 17:38:27 +
Subject: [PATCH] [AArch64] Prepare for split ZPR and PPR area allocation
(NFCI)
MacDue wrote:
Rebased this PR stack on the changes from #138609... Which makes things even
hairier :sweat_smile: It would be nice if all these modes were not so
intertwined in the code.
https://github.com/llvm/llvm-project/pull/142392
___
llvm-bran
https://github.com/MacDue commented:
cc @efriedma-quic
https://github.com/llvm/llvm-project/pull/142741
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
MacDue wrote:
> If you express the size of the hazard padding between the PPRs and ZPRs as a
> scalable size, that might simplify some of the logic? You wouldn't need to
> represent the two areas as separate stacks, at least.
It would, but for the sizes of hazard padding and vscale we're inter
@@ -3780,25 +3938,49 @@ void AArch64FrameLowering::determineStackHazardSlot(
bool HasFPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) {
return AArch64::FPR64RegClass.contains(Reg) ||
AArch64::FPR128RegClass.contains(Reg) ||
- AArch64::ZPRRegCla
https://github.com/MacDue updated
https://github.com/llvm/llvm-project/pull/149065
>From 88c0bb60a8f96b2d0850ab7019a92fd76a356133 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell
Date: Tue, 15 Jul 2025 11:48:03 +
Subject: [PATCH] [AArch64][SME] Avoid ZA save state changes in loops in
Machin
https://github.com/MacDue updated
https://github.com/llvm/llvm-project/pull/149064
>From 75b2bf03de4e77169325dfe5b15053c12e446696 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell
Date: Tue, 15 Jul 2025 11:47:48 +
Subject: [PATCH] [AArch64][SME] Support agnostic ZA functions in the
MachineSM
@@ -5248,49 +5248,94 @@
AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
uint64_t EltSize = Op.getConstantOperandVal(2);
- EVT VT = Op.getValueType();
+ EVT FullVT = Op.
@@ -5248,49 +5248,94 @@
AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
uint64_t EltSize = Op.getConstantOperandVal(2);
- EVT VT = Op.getValueType();
+ EVT FullVT = Op.
@@ -5248,49 +5248,94 @@
AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
uint64_t EltSize = Op.getConstantOperandVal(2);
MacDue wrote:
nit: `EltSize` ->
@@ -5248,49 +5248,94 @@
AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
uint64_t EltSize = Op.getConstantOperandVal(2);
- EVT VT = Op.getValueType();
+ EVT FullVT = Op.
1 - 100 of 117 matches
Mail list logo