================ @@ -3780,25 +3938,49 @@ void AArch64FrameLowering::determineStackHazardSlot( bool HasFPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) { return AArch64::FPR64RegClass.contains(Reg) || AArch64::FPR128RegClass.contains(Reg) || - AArch64::ZPRRegClass.contains(Reg) || - AArch64::PPRRegClass.contains(Reg); + AArch64::ZPRRegClass.contains(Reg); + }); + bool HasPPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) { + return AArch64::PPRRegClass.contains(Reg); }); bool HasFPRStackObjects = false; - if (!HasFPRCSRs) { - std::vector<unsigned> FrameObjects(MFI.getObjectIndexEnd()); + bool HasPPRStackObjects = false; + if (!HasFPRCSRs || SplitSVEObjects) { + enum SlotType : uint8_t { + Unknown = 0, + ZPRorFPR = 1 << 0, + PPR = 1 << 1, + GPR = 1 << 2, + LLVM_MARK_AS_BITMASK_ENUM(GPR) + }; + + // Find stack slots solely used for one kind of register (ZPR, PPR, etc.), + // based on the kinds of accesses used in the function. + SmallVector<SlotType> SlotTypes(MFI.getObjectIndexEnd(), SlotType::Unknown); for (auto &MBB : MF) { for (auto &MI : MBB) { std::optional<int> FI = getLdStFrameID(MI, MFI); - if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) { - if (MFI.isScalableStackID(*FI) || AArch64InstrInfo::isFpOrNEON(MI)) - FrameObjects[*FI] |= 2; - else - FrameObjects[*FI] |= 1; + if (!FI || FI < 0 || FI > int(SlotTypes.size())) + continue; + bool IsScalable = MFI.isScalableStackID(*FI); + bool IsPPR = IsScalable && isPPRAccess(MI); + if (IsScalable || AArch64InstrInfo::isFpOrNEON(MI)) { + SlotTypes[*FI] |= IsPPR ? SlotType::PPR : SlotType::ZPRorFPR; + } else { + SlotTypes[*FI] |= SlotType::GPR; } } } - HasFPRStackObjects = - any_of(FrameObjects, [](unsigned B) { return (B & 3) == 2; }); + + for (int FI = 0; FI < int(SlotTypes.size()); ++FI) { + HasFPRStackObjects |= SlotTypes[FI] == SlotType::ZPRorFPR; + // For SplitSVEObjects remember that this stack slot is a predicate, this + // will be needed later when determining the frame layout. + if (SlotTypes[FI] == SlotType::PPR) { ---------------- MacDue wrote:
Which bit does not look right? `SlotType::PPR` (not to be confused with `SlotType::GPR`) is only set if the original stack ID was scalable and all accesses to that slot used predicate load/stores. The original stack ID could be `ScalableVector`, as the earlier selection of the stack ID is only based on the type size. Therefore, we change it to `ScalablePredVector` here so that it can be sorted into the correct region. https://github.com/llvm/llvm-project/pull/142392 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits