Author: Diana Picus Date: 2025-04-23T13:04:37+02:00 New Revision: 334e3a844e6b02e400cc83fed2f71b3fe273a42e
URL: https://github.com/llvm/llvm-project/commit/334e3a844e6b02e400cc83fed2f71b3fe273a42e DIFF: https://github.com/llvm/llvm-project/commit/334e3a844e6b02e400cc83fed2f71b3fe273a42e.diff LOG: Revert "[AMDGPU] Support block load/store for CSR (#130013)" This reverts commit 4a58071d87265dfccba72134b25cf4d1595d98c5. Added: Modified: llvm/include/llvm/CodeGen/MachineFrameInfo.h llvm/include/llvm/CodeGen/TargetFrameLowering.h llvm/lib/CodeGen/PrologEpilogInserter.cpp llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp llvm/lib/Target/AMDGPU/AMDGPU.td llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp llvm/lib/Target/AMDGPU/GCNSubtarget.h llvm/lib/Target/AMDGPU/SIFrameLowering.cpp llvm/lib/Target/AMDGPU/SIFrameLowering.h llvm/lib/Target/AMDGPU/SIInstrInfo.h llvm/lib/Target/AMDGPU/SIInstructions.td llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp llvm/lib/Target/AMDGPU/SIRegisterInfo.h llvm/unittests/Target/AMDGPU/CMakeLists.txt Removed: llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir llvm/test/CodeGen/AMDGPU/spill-vgpr-block.ll llvm/test/CodeGen/AMDGPU/vgpr-blocks-funcinfo.mir llvm/unittests/Target/AMDGPU/LiveRegUnits.cpp ################################################################################ diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h index 9d1b536d23331..172c3e8c9a847 100644 --- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -61,7 +61,6 @@ class CalleeSavedInfo { MCRegister getReg() const { return Reg; } int getFrameIdx() const { return FrameIdx; } MCRegister getDstReg() const { return DstReg; } - void setReg(MCRegister R) { Reg = R; } void setFrameIdx(int FI) { FrameIdx = FI; SpilledToReg = false; diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h index 58b63f1769003..cdbefb36c00c7 100644 --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -270,14 +270,6 @@ class TargetFrameLowering { return false; } - /// spillCalleeSavedRegister - Default implementation for spilling a single - /// callee saved register. - void spillCalleeSavedRegister(MachineBasicBlock &SaveBlock, - MachineBasicBlock::iterator MI, - const CalleeSavedInfo &CS, - const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI) const; - /// restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee /// saved registers and returns true if it isn't possible / profitable to do /// so by issuing a series of load instructions via loadRegToStackSlot(). @@ -292,15 +284,6 @@ class TargetFrameLowering { return false; } - // restoreCalleeSavedRegister - Default implementation for restoring a single - // callee saved register. Should be called in reverse order. Can insert - // multiple instructions. - void restoreCalleeSavedRegister(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const CalleeSavedInfo &CS, - const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI) const; - /// hasFP - Return true if the specified function should have a dedicated /// frame pointer register. For most targets this is true only if the function /// has variable sized allocas or if frame pointer elimination is disabled. diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 0cd25c4feb8b9..9b852c0fd49cf 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -476,8 +476,8 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F, // Now that we know which registers need to be saved and restored, allocate // stack slots for them. for (auto &CS : CSI) { - // If the target has spilled this register to another register or already - // handled it , we don't need to allocate a stack slot. + // If the target has spilled this register to another register, we don't + // need to allocate a stack slot. if (CS.isSpilledToReg()) continue; @@ -597,14 +597,25 @@ static void updateLiveness(MachineFunction &MF) { static void insertCSRSaves(MachineBasicBlock &SaveBlock, ArrayRef<CalleeSavedInfo> CSI) { MachineFunction &MF = *SaveBlock.getParent(); - const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); MachineBasicBlock::iterator I = SaveBlock.begin(); if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { for (const CalleeSavedInfo &CS : CSI) { - TFI->spillCalleeSavedRegister(SaveBlock, I, CS, TII, TRI); + // Insert the spill to the stack frame. + MCRegister Reg = CS.getReg(); + + if (CS.isSpilledToReg()) { + BuildMI(SaveBlock, I, DebugLoc(), + TII.get(TargetOpcode::COPY), CS.getDstReg()) + .addReg(Reg, getKillRegState(true)); + } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC, + TRI, Register()); + } } } } @@ -613,7 +624,7 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock, static void insertCSRRestores(MachineBasicBlock &RestoreBlock, std::vector<CalleeSavedInfo> &CSI) { MachineFunction &MF = *RestoreBlock.getParent(); - const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); @@ -623,7 +634,19 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock, if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { for (const CalleeSavedInfo &CI : reverse(CSI)) { - TFI->restoreCalleeSavedRegister(RestoreBlock, I, CI, TII, TRI); + MCRegister Reg = CI.getReg(); + if (CI.isSpilledToReg()) { + BuildMI(RestoreBlock, I, DebugLoc(), TII.get(TargetOpcode::COPY), Reg) + .addReg(CI.getDstReg(), getKillRegState(true)); + } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, + TRI, Register()); + assert(I != RestoreBlock.begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. + } } } } diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index 70c3b2cbae9a6..be73b73c93989 100644 --- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -15,7 +15,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" @@ -183,37 +182,3 @@ TargetFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const { const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); return DwarfFrameBase{DwarfFrameBase::Register, {RI->getFrameRegister(MF).id()}}; } - -void TargetFrameLowering::spillCalleeSavedRegister( - MachineBasicBlock &SaveBlock, MachineBasicBlock::iterator MI, - const CalleeSavedInfo &CS, const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI) const { - // Insert the spill to the stack frame. - MCRegister Reg = CS.getReg(); - - if (CS.isSpilledToReg()) { - BuildMI(SaveBlock, MI, DebugLoc(), TII->get(TargetOpcode::COPY), - CS.getDstReg()) - .addReg(Reg, getKillRegState(true)); - } else { - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII->storeRegToStackSlot(SaveBlock, MI, Reg, true, CS.getFrameIdx(), RC, - TRI, Register()); - } -} - -void TargetFrameLowering::restoreCalleeSavedRegister( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const CalleeSavedInfo &CS, const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI) const { - MCRegister Reg = CS.getReg(); - if (CS.isSpilledToReg()) { - BuildMI(MBB, MI, DebugLoc(), TII->get(TargetOpcode::COPY), Reg) - .addReg(CS.getDstReg(), getKillRegState(true)); - } else { - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII->loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI, - Register()); - assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); - } -} diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index d896589825fc7..b2098b41acb7e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1275,14 +1275,6 @@ def FeatureDynamicVGPRBlockSize32 : SubtargetFeature<"dynamic-vgpr-block-size-32 "Use a block size of 32 for dynamic VGPR allocation (default is 16)" >; -// Enable the use of SCRATCH_STORE/LOAD_BLOCK instructions for saving and -// restoring the callee-saved registers. -def FeatureUseBlockVGPROpsForCSR : SubtargetFeature<"block-vgpr-csr", - "UseBlockVGPROpsForCSR", - "true", - "Use block load/store for VGPR callee saved registers" ->; - def FeatureLshlAddU64Inst : SubtargetFeature<"lshl-add-u64-inst", "HasLshlAddU64Inst", "true", "Has v_lshl_add_u64 instruction">; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 2dec16de940d1..3d6b974d1f027 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -19,7 +19,6 @@ #include "MCTargetDesc/AMDGPUInstPrinter.h" #include "MCTargetDesc/AMDGPUMCExpr.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/Constants.h" @@ -244,36 +243,6 @@ const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV, return AsmPrinter::lowerConstant(CV, BaseCV, Offset); } -static void emitVGPRBlockComment(const MachineInstr *MI, const SIInstrInfo *TII, - const TargetRegisterInfo *TRI, - const SIMachineFunctionInfo *MFI, - MCStreamer &OS) { - // The instruction will only transfer a subset of the registers in the block, - // based on the mask that is stored in m0. We could search for the instruction - // that sets m0, but most of the time we'll already have the mask stored in - // the machine function info. Try to use that. This assumes that we only use - // block loads/stores for CSR spills. - Register RegBlock = - TII->getNamedOperand(*MI, MI->mayLoad() ? AMDGPU::OpName::vdst - : AMDGPU::OpName::vdata) - ->getReg(); - Register FirstRegInBlock = TRI->getSubReg(RegBlock, AMDGPU::sub0); - uint32_t Mask = MFI->getMaskForVGPRBlockOps(RegBlock); - - if (!Mask) - return; // Nothing to report - - SmallString<512> TransferredRegs; - for (unsigned I = 0; I < sizeof(Mask) * 8; ++I) { - if (Mask & (1 << I)) { - (llvm::Twine(" ") + TRI->getRegAsmName(FirstRegInBlock + I)) - .toVector(TransferredRegs); - } - } - - OS.emitRawComment(" transferring at most " + TransferredRegs); -} - void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) { // FIXME: Enable feature predicate checks once all the test pass. // AMDGPU_MC::verifyInstructionPredicates(MI->getOpcode(), @@ -362,12 +331,6 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) { return; } - if (isVerbose()) - if (STI.getInstrInfo()->isBlockLoadStore(MI->getOpcode())) - emitVGPRBlockComment(MI, STI.getInstrInfo(), STI.getRegisterInfo(), - MF->getInfo<SIMachineFunctionInfo>(), - *OutStreamer); - MCInst TmpInst; MCInstLowering.lower(MI, TmpInst); EmitToStreamer(*OutStreamer, TmpInst); diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index fea17baa17722..7dd91c0775a48 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -262,7 +262,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasPointSampleAccel = false; bool RequiresCOV6 = false; - bool UseBlockVGPROpsForCSR = false; // Dummy feature to use for assembler in tablegen. bool FeatureDisable = false; @@ -1278,8 +1277,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool requiresCodeObjectV6() const { return RequiresCOV6; } - bool useVGPRBlockOpsForCSR() const { return UseBlockVGPROpsForCSR; } - bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; } bool hasVALUReadSGPRHazard() const { return getGeneration() == GFX12; } diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 0c1cd9ceddb02..9c737b4f3e378 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1739,105 +1739,6 @@ void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF, } } -static void assignSlotsUsingVGPRBlocks(MachineFunction &MF, - const GCNSubtarget &ST, - std::vector<CalleeSavedInfo> &CSI, - unsigned &MinCSFrameIndex, - unsigned &MaxCSFrameIndex) { - SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); - MachineFrameInfo &MFI = MF.getFrameInfo(); - const SIRegisterInfo *TRI = ST.getRegisterInfo(); - - assert(std::is_sorted(CSI.begin(), CSI.end(), - [](const CalleeSavedInfo &A, const CalleeSavedInfo &B) { - return A.getReg() < B.getReg(); - }) && - "Callee saved registers not sorted"); - - auto CanUseBlockOps = [&](const CalleeSavedInfo &CSI) { - return !CSI.isSpilledToReg() && - TRI->getPhysRegBaseClass(CSI.getReg()) == &AMDGPU::VGPR_32RegClass && - !FuncInfo->isWWMReservedRegister(CSI.getReg()); - }; - - auto CSEnd = CSI.end(); - for (auto CSIt = CSI.begin(); CSIt != CSEnd; ++CSIt) { - Register Reg = CSIt->getReg(); - if (!CanUseBlockOps(*CSIt)) - continue; - - // Find all the regs that will fit in a 32-bit mask starting at the current - // reg and build said mask. It should have 1 for every register that's - // included, with the current register as the least significant bit. - uint32_t Mask = 1; - CSEnd = std::remove_if( - CSIt + 1, CSEnd, [&](const CalleeSavedInfo &CSI) -> bool { - if (CanUseBlockOps(CSI) && CSI.getReg() < Reg + 32) { - Mask |= 1 << (CSI.getReg() - Reg); - return true; - } else { - return false; - } - }); - - const TargetRegisterClass *BlockRegClass = TRI->getRegClassForBlockOp(MF); - Register RegBlock = - TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, BlockRegClass); - if (!RegBlock) { - // We couldn't find a super register for the block. This can happen if - // the register we started with is too high (e.g. v232 if the maximum is - // v255). We therefore try to get the last register block and figure out - // the mask from there. - Register LastBlockStart = - AMDGPU::VGPR0 + alignDown(Reg - AMDGPU::VGPR0, 32); - RegBlock = - TRI->getMatchingSuperReg(LastBlockStart, AMDGPU::sub0, BlockRegClass); - assert(RegBlock && TRI->isSubRegister(RegBlock, Reg) && - "Couldn't find super register"); - int RegDelta = Reg - LastBlockStart; - assert(RegDelta > 0 && llvm::countl_zero(Mask) >= RegDelta && - "Bad shift amount"); - Mask <<= RegDelta; - } - - FuncInfo->setMaskForVGPRBlockOps(RegBlock, Mask); - - // The stack objects can be a bit smaller than the register block if we know - // some of the high bits of Mask are 0. This may happen often with calling - // conventions where the caller and callee-saved VGPRs are interleaved at - // a small boundary (e.g. 8 or 16). - int UnusedBits = llvm::countl_zero(Mask); - unsigned BlockSize = TRI->getSpillSize(*BlockRegClass) - UnusedBits * 4; - int FrameIdx = - MFI.CreateStackObject(BlockSize, TRI->getSpillAlign(*BlockRegClass), - /*isSpillSlot=*/true); - if ((unsigned)FrameIdx < MinCSFrameIndex) - MinCSFrameIndex = FrameIdx; - if ((unsigned)FrameIdx > MaxCSFrameIndex) - MaxCSFrameIndex = FrameIdx; - - CSIt->setFrameIdx(FrameIdx); - CSIt->setReg(RegBlock); - } - CSI.erase(CSEnd, CSI.end()); -} - -bool SIFrameLowering::assignCalleeSavedSpillSlots( - MachineFunction &MF, const TargetRegisterInfo *TRI, - std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex, - unsigned &MaxCSFrameIndex) const { - if (CSI.empty()) - return true; // Early exit if no callee saved registers are modified! - - const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); - bool UseVGPRBlocks = ST.useVGPRBlockOpsForCSR(); - - if (UseVGPRBlocks) - assignSlotsUsingVGPRBlocks(MF, ST, CSI, MinCSFrameIndex, MaxCSFrameIndex); - - return assignCalleeSavedSpillSlots(MF, TRI, CSI) || UseVGPRBlocks; -} - bool SIFrameLowering::assignCalleeSavedSpillSlots( MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const { @@ -1907,111 +1808,6 @@ bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP( return true; } -bool SIFrameLowering::spillCalleeSavedRegisters( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { - MachineFunction *MF = MBB.getParent(); - const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); - if (!ST.useVGPRBlockOpsForCSR()) - return false; - - MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); - const SIInstrInfo *TII = ST.getInstrInfo(); - SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>(); - - const TargetRegisterClass *BlockRegClass = - static_cast<const SIRegisterInfo *>(TRI)->getRegClassForBlockOp(*MF); - for (const CalleeSavedInfo &CS : CSI) { - Register Reg = CS.getReg(); - if (!BlockRegClass->contains(Reg) || - !FuncInfo->hasMaskForVGPRBlockOps(Reg)) { - spillCalleeSavedRegister(MBB, MI, CS, TII, TRI); - continue; - } - - // Build a scratch block store. - uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg); - int FrameIndex = CS.getFrameIdx(); - MachinePointerInfo PtrInfo = - MachinePointerInfo::getFixedStack(*MF, FrameIndex); - MachineMemOperand *MMO = - MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, - FrameInfo.getObjectSize(FrameIndex), - FrameInfo.getObjectAlign(FrameIndex)); - - BuildMI(MBB, MI, MI->getDebugLoc(), - TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE)) - .addReg(Reg, getKillRegState(false)) - .addFrameIndex(FrameIndex) - .addReg(MFI->getStackPtrOffsetReg()) - .addImm(0) - .addImm(Mask) - .addMemOperand(MMO); - - FuncInfo->setHasSpilledVGPRs(); - - // Add the register to the liveins. This is necessary because if any of the - // VGPRs in the register block is reserved (e.g. if it's a WWM register), - // then the whole block will be marked as reserved and `updateLiveness` will - // skip it. - MBB.addLiveIn(Reg); - } - MBB.sortUniqueLiveIns(); - - return true; -} - -bool SIFrameLowering::restoreCalleeSavedRegisters( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { - MachineFunction *MF = MBB.getParent(); - const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); - if (!ST.useVGPRBlockOpsForCSR()) - return false; - - SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>(); - MachineFrameInfo &MFI = MF->getFrameInfo(); - const SIInstrInfo *TII = ST.getInstrInfo(); - const SIRegisterInfo *SITRI = static_cast<const SIRegisterInfo *>(TRI); - const TargetRegisterClass *BlockRegClass = SITRI->getRegClassForBlockOp(*MF); - for (const CalleeSavedInfo &CS : reverse(CSI)) { - Register Reg = CS.getReg(); - if (!BlockRegClass->contains(Reg) || - !FuncInfo->hasMaskForVGPRBlockOps(Reg)) { - restoreCalleeSavedRegister(MBB, MI, CS, TII, TRI); - continue; - } - - // Build a scratch block load. - uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg); - int FrameIndex = CS.getFrameIdx(); - MachinePointerInfo PtrInfo = - MachinePointerInfo::getFixedStack(*MF, FrameIndex); - MachineMemOperand *MMO = MF->getMachineMemOperand( - PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex), - MFI.getObjectAlign(FrameIndex)); - - auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(), - TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE), Reg) - .addFrameIndex(FrameIndex) - .addReg(FuncInfo->getStackPtrOffsetReg()) - .addImm(0) - .addImm(Mask) - .addMemOperand(MMO); - SITRI->addImplicitUsesForBlockCSRLoad(MIB, Reg); - - // Add the register to the liveins. This is necessary because if any of the - // VGPRs in the register block is reserved (e.g. if it's a WWM register), - // then the whole block will be marked as reserved and `updateLiveness` will - // skip it. - MBB.addLiveIn(Reg); - } - - MBB.sortUniqueLiveIns(); - return true; -} - MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h index a72772987262e..9dac4bc8951e5 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -49,23 +49,6 @@ class SIFrameLowering final : public AMDGPUFrameLowering { const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const override; - bool assignCalleeSavedSpillSlots(MachineFunction &MF, - const TargetRegisterInfo *TRI, - std::vector<CalleeSavedInfo> &CSI, - unsigned &MinCSFrameIndex, - unsigned &MaxCSFrameIndex) const override; - - bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - ArrayRef<CalleeSavedInfo> CSI, - const TargetRegisterInfo *TRI) const override; - - bool - restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - MutableArrayRef<CalleeSavedInfo> CSI, - const TargetRegisterInfo *TRI) const override; - bool allocateScavengingFrameIndexesNearIncomingSP( const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 4b97f58ce92b9..a3a54659d299a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -665,20 +665,6 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { return get(Opcode).TSFlags & SIInstrFlags::FLAT; } - static bool isBlockLoadStore(uint16_t Opcode) { - switch (Opcode) { - case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: - case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE: - case AMDGPU::SCRATCH_STORE_BLOCK_SADDR: - case AMDGPU::SCRATCH_LOAD_BLOCK_SADDR: - case AMDGPU::SCRATCH_STORE_BLOCK_SVS: - case AMDGPU::SCRATCH_LOAD_BLOCK_SVS: - return true; - default: - return false; - } - } - static bool isEXP(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::EXP; } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index a144ae2104da6..ed45cf8851146 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1025,16 +1025,13 @@ def SI_RESTORE_S32_FROM_VGPR : PseudoInstSI <(outs SReg_32:$sdst), // VGPR or AGPR spill instructions. In case of AGPR spilling a temp register // needs to be used and an extra instruction to move between VGPR and AGPR. // UsesTmp adds to the total size of an expanded spill in this case. -multiclass SI_SPILL_VGPR <RegisterClass vgpr_class, - bit UsesTmp = 0, bit HasMask = 0> { +multiclass SI_SPILL_VGPR <RegisterClass vgpr_class, bit UsesTmp = 0> { let UseNamedOperandTable = 1, Spill = 1, VALU = 1, SchedRW = [WriteVMEM] in { def _SAVE : VPseudoInstSI < (outs), - !con( - (ins vgpr_class:$vdata, i32imm:$vaddr, - SReg_32:$soffset, i32imm:$offset), - !if(HasMask, (ins SReg_32:$mask), (ins)))> { + (ins vgpr_class:$vdata, i32imm:$vaddr, + SReg_32:$soffset, i32imm:$offset)> { let mayStore = 1; let mayLoad = 0; // (2 * 4) + (8 * num_subregs) bytes maximum @@ -1045,10 +1042,8 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class, def _RESTORE : VPseudoInstSI < (outs vgpr_class:$vdata), - !con( - (ins i32imm:$vaddr, - SReg_32:$soffset, i32imm:$offset), - !if(HasMask, (ins SReg_32:$mask), (ins)))> { + (ins i32imm:$vaddr, + SReg_32:$soffset, i32imm:$offset)> { let mayStore = 0; let mayLoad = 1; @@ -1076,12 +1071,6 @@ defm SI_SPILL_V384 : SI_SPILL_VGPR <VReg_384>; defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>; defm SI_SPILL_V1024 : SI_SPILL_VGPR <VReg_1024>; -let Defs = [M0] in { - // Spills a block of 32 VGPRs. M0 will contain a mask describing which - // registers in the block need to be transferred. - defm SI_BLOCK_SPILL_V1024 : SI_SPILL_VGPR <VReg_1024, 0, 1>; -} - defm SI_SPILL_A32 : SI_SPILL_VGPR <AGPR_32, 1>; defm SI_SPILL_A64 : SI_SPILL_VGPR <AReg_64, 1>; defm SI_SPILL_A96 : SI_SPILL_VGPR <AReg_96, 1>; diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 9c1014a0e5cfe..a60409b5a7e09 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -388,16 +388,6 @@ class PrologEpilogSGPRSaveRestoreInfo { SGPRSaveKind getKind() const { return Kind; } }; -const MCRegister FirstVGPRBlock = AMDGPU::VReg_1024RegClass.getRegister(0); - -struct VGPRBlock2IndexFunctor { - using argument_type = Register; - unsigned operator()(Register Reg) const { - assert(AMDGPU::VReg_1024RegClass.contains(Reg) && "Expecting a VGPR block"); - return Reg - FirstVGPRBlock; - } -}; - /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which /// tells the hardware which interpolation parameters to load. class SIMachineFunctionInfo final : public AMDGPUMachineFunction, @@ -584,11 +574,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, // frame, so save it here and add it to the RegScavenger later. std::optional<int> ScavengeFI; - // Map each VGPR CSR to the mask needed to save and restore it using block - // load/store instructions. Only used if the subtarget feature for VGPR block - // load/store is enabled. - IndexedMap<uint32_t, VGPRBlock2IndexFunctor> MaskForVGPRBlockOps; - private: Register VGPRForAGPRCopy; @@ -609,19 +594,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) const; - void setMaskForVGPRBlockOps(Register RegisterBlock, uint32_t Mask) { - MaskForVGPRBlockOps.grow(RegisterBlock); - MaskForVGPRBlockOps[RegisterBlock] = Mask; - } - - uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const { - return MaskForVGPRBlockOps[RegisterBlock]; - } - - bool hasMaskForVGPRBlockOps(Register RegisterBlock) const { - return MaskForVGPRBlockOps.inBounds(RegisterBlock); - } - public: SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI) = default; SIMachineFunctionInfo(const Function &F, const GCNSubtarget *STI); @@ -662,10 +634,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, const WWMSpillsMap &getWWMSpills() const { return WWMSpills; } const ReservedRegSet &getWWMReservedRegs() const { return WWMReservedRegs; } - bool isWWMReservedRegister(Register Reg) const { - return WWMReservedRegs.contains(Reg); - } - ArrayRef<PrologEpilogSGPRSpill> getPrologEpilogSGPRSpills() const { assert(is_sorted(PrologEpilogSGPRSpills, llvm::less_first())); return PrologEpilogSGPRSpills; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 0e4cd12e57d77..c1ac9491b2363 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1177,18 +1177,9 @@ SIRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { return RC; } -static unsigned getNumSubRegsForSpillOp(const MachineInstr &MI, - const SIInstrInfo *TII) { +static unsigned getNumSubRegsForSpillOp(unsigned Op) { - unsigned Op = MI.getOpcode(); switch (Op) { - case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: - case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE: - // FIXME: This assumes the mask is statically known and not computed at - // runtime. However, some ABIs may want to compute the mask dynamically and - // this will need to be updated. - return llvm::popcount( - (uint64_t)TII->getNamedOperand(MI, AMDGPU::OpName::mask)->getImm()); case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_V1024_SAVE: @@ -1529,10 +1520,6 @@ static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, bool UseST = !HasVAddr && !AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::saddr); - // Handle block load/store first. - if (TII->isBlockLoadStore(LoadStoreOp)) - return LoadStoreOp; - switch (EltSize) { case 4: LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR @@ -1577,7 +1564,6 @@ void SIRegisterInfo::buildSpillLoadStore( const MCInstrDesc *Desc = &TII->get(LoadStoreOp); bool IsStore = Desc->mayStore(); bool IsFlat = TII->isFLATScratch(LoadStoreOp); - bool IsBlock = TII->isBlockLoadStore(LoadStoreOp); bool CanClobberSCC = false; bool Scavenged = false; @@ -1590,10 +1576,7 @@ void SIRegisterInfo::buildSpillLoadStore( // Always use 4 byte operations for AGPRs because we need to scavenge // a temporary VGPR. - // If we're using a block operation, the element should be the whole block. - unsigned EltSize = IsBlock ? RegWidth - : (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) - : 4u; + unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u; unsigned NumSubRegs = RegWidth / EltSize; unsigned Size = NumSubRegs * EltSize; unsigned RemSize = RegWidth - Size; @@ -1748,7 +1731,6 @@ void SIRegisterInfo::buildSpillLoadStore( LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp); } else { assert(ST.hasFlatScratchSTMode()); - assert(!TII->isBlockLoadStore(LoadStoreOp) && "Block ops don't have ST"); LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp); } @@ -1957,14 +1939,6 @@ void SIRegisterInfo::buildSpillLoadStore( MIB.addReg(SubReg, RegState::Implicit); MIB->tieOperands(0, MIB->getNumOperands() - 1); } - - // If we're building a block load, we should add artificial uses for the - // CSR VGPRs that are *not* being transferred. This is because liveness - // analysis is not aware of the mask, so we need to somehow inform it that - // those registers are not available before the load and they should not be - // scavenged. - if (!IsStore && TII->isBlockLoadStore(LoadStoreOp)) - addImplicitUsesForBlockCSRLoad(MIB, ValueReg); } if (ScratchOffsetRegDelta != 0) { @@ -1975,18 +1949,6 @@ void SIRegisterInfo::buildSpillLoadStore( } } -void SIRegisterInfo::addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, - Register BlockReg) const { - const MachineFunction *MF = MIB->getParent()->getParent(); - const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>(); - uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(BlockReg); - Register BaseVGPR = getSubReg(BlockReg, AMDGPU::sub0); - for (unsigned RegOffset = 1; RegOffset < 32; ++RegOffset) - if (!(Mask & (1 << RegOffset)) && - isCalleeSavedPhysReg(BaseVGPR + RegOffset, *MF)) - MIB.addUse(BaseVGPR + RegOffset, RegState::Implicit); -} - void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill) const { @@ -2405,13 +2367,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } // VGPR register spill - case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: { - // Put mask into M0. - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), - AMDGPU::M0) - .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::mask)); - LLVM_FALLTHROUGH; - } case AMDGPU::SI_SPILL_V1024_SAVE: case AMDGPU::SI_SPILL_V512_SAVE: case AMDGPU::SI_SPILL_V384_SAVE: @@ -2472,10 +2427,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, assert(ST.enableFlatScratch() && "Flat Scratch is not enabled!"); Opc = AMDGPU::SCRATCH_STORE_SHORT_SADDR_t16; } else { - Opc = MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_SAVE - ? AMDGPU::SCRATCH_STORE_BLOCK_SADDR - : ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR - : AMDGPU::BUFFER_STORE_DWORD_OFFSET; + Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR + : AMDGPU::BUFFER_STORE_DWORD_OFFSET; } auto *MBB = MI->getParent(); @@ -2488,20 +2441,13 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), *MI->memoperands_begin(), RS); - MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(*MI, TII)); + MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode())); if (IsWWMRegSpill) TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy()); MI->eraseFromParent(); return true; } - case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE: { - // Put mask into M0. - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), - AMDGPU::M0) - .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::mask)); - LLVM_FALLTHROUGH; - } case AMDGPU::SI_SPILL_V16_RESTORE: case AMDGPU::SI_SPILL_V32_RESTORE: case AMDGPU::SI_SPILL_V64_RESTORE: @@ -2557,17 +2503,14 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, assert(ST.enableFlatScratch() && "Flat Scratch is not enabled!"); Opc = AMDGPU::SCRATCH_LOAD_SHORT_D16_SADDR_t16; } else { - Opc = MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE - ? AMDGPU::SCRATCH_LOAD_BLOCK_SADDR - : ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR - : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; + Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR + : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; } - auto *MBB = MI->getParent(); bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode()); if (IsWWMRegSpill) { TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(), - RS->isRegUsed(AMDGPU::SCC)); + RS->isRegUsed(AMDGPU::SCC)); } buildSpillLoadStore( diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index a4b135d5e0b59..f3068963fd10f 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -26,7 +26,6 @@ namespace llvm { class GCNSubtarget; class LiveIntervals; class LiveRegUnits; -class MachineInstrBuilder; class RegisterBank; struct SGPRSpillBuilder; @@ -116,16 +115,6 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { return 100; } - // When building a block VGPR load, we only really transfer a subset of the - // registers in the block, based on a mask. Liveness analysis is not aware of - // the mask, so it might consider that any register in the block is available - // before the load and may therefore be scavenged. This is not ok for CSRs - // that are not clobbered, since the caller will expect them to be preserved. - // This method will add artificial implicit uses for those registers on the - // load instruction, so liveness analysis knows they're unavailable. - void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, - Register BlockReg) const; - const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override; @@ -169,11 +158,6 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override; - const TargetRegisterClass * - getRegClassForBlockOp(const MachineFunction &MF) const { - return &AMDGPU::VReg_1024RegClass; - } - void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill = true) const; diff --git a/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir b/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir deleted file mode 100644 index 086390f575fbb..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir +++ /dev/null @@ -1,294 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+block-vgpr-csr,+wavefrontsize32,-wavefrontsize64 -start-before=si-lower-sgpr-spills -stop-after=prologepilog -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK,W32 -# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+block-vgpr-csr,-wavefrontsize32,+wavefrontsize64 -start-before=si-lower-sgpr-spills -stop-after=prologepilog -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK,W64 - ---- | - define void @one_block() { ret void } - define void @one_block_csr_only() { ret void } - define void @multiple_blocks() { ret void } - define void @reg_tuples() { ret void } - define void @locals() { ret void } - define void @other_regs() { ret void } - define amdgpu_kernel void @entry_func() { ret void } - define void @multiple_basic_blocks() { ret void } -... - -# Block load/store v42 and v45. The mask should be 0x9. - ---- -name: one_block -tracksRegLiveness: true -machineFunctionInfo: - stackPtrOffsetReg: $sgpr32 -body: | - bb.0: - liveins: $sgpr30_sgpr31 - ; CHECK-LABEL: name: one_block - ; CHECK: liveins: $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 9 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 - ; CHECK-NEXT: $m0 = S_MOV_B32 9 - ; CHECK-NEXT: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: (load (s1024) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 - S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 - S_SETPC_B64_return $sgpr30_sgpr31 -... - -# Block load/store v40-47 and v56-63 (v48-55 and v64-71 are caller-saved). The -# mask should be 0x00FF00FF. - ---- -name: one_block_csr_only -tracksRegLiveness: true -machineFunctionInfo: - stackPtrOffsetReg: $sgpr32 -body: | - bb.0: - liveins: $sgpr30_sgpr31 - ; CHECK-LABEL: name: one_block_csr_only - ; CHECK: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 16711935 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr42, implicit-def $vgpr43, implicit-def $vgpr44, implicit-def $vgpr45, implicit-def $vgpr46, implicit-def $vgpr47, implicit-def $vgpr48, implicit-def $vgpr49, implicit-def $vgpr50, implicit-def $vgpr51, implicit-def $vgpr52, implicit-def $vgpr53, implicit-def $vgpr54, implicit-def $vgpr55, implicit-def $vgpr56, implicit-def $vgpr57, implicit-def $vgpr58, implicit-def $vgpr59, implicit-def $vgpr60, implicit-def $vgpr61, implicit-def $vgpr62, implicit-def $vgpr63, implicit-def $vgpr64, implicit-def $vgpr65, implicit-def $vgpr66 - ; CHECK-NEXT: $m0 = S_MOV_B32 16711935 - ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (load (s1024) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 - S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr42, implicit-def $vgpr43, implicit-def $vgpr44, implicit-def $vgpr45, implicit-def $vgpr46, implicit-def $vgpr47, implicit-def $vgpr48, implicit-def $vgpr49, implicit-def $vgpr50, implicit-def $vgpr51, implicit-def $vgpr52, implicit-def $vgpr53, implicit-def $vgpr54, implicit-def $vgpr55, implicit-def $vgpr56, implicit-def $vgpr57, implicit-def $vgpr58, implicit-def $vgpr59, implicit-def $vgpr60, implicit-def $vgpr61, implicit-def $vgpr62, implicit-def $vgpr63, implicit-def $vgpr64, implicit-def $vgpr65, implicit-def $vgpr66 - S_SETPC_B64_return $sgpr30_sgpr31 -... - -# Block load/store to/from diff erent blocks. -# Note the mask for storing v232, which is 0x100 because we have to start the -# block at v224 (since the upper limit is 255). For the same reason, the first -# stack slot will be 36 bytes long (the first 32 will be empty, since the memory -# will not get compacted). The second slot, which will hold registers v104 and -# v110, will be 28 bytes long, and finally the third, holding registers v40 and -# v41, will be 8 bytes long. ---- -name: multiple_blocks -tracksRegLiveness: true -machineFunctionInfo: - stackPtrOffsetReg: $sgpr32 -body: | - bb.0: - liveins: $sgpr30_sgpr31 - ; CHECK-LABEL: name: multiple_blocks - ; CHECK: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 3 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: $m0 = S_MOV_B32 65 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: $m0 = S_MOV_B32 256 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr104, implicit-def $vgpr110, implicit-def $vgpr232 - ; CHECK-NEXT: $m0 = S_MOV_B32 256 - ; CHECK-NEXT: $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr233, implicit $vgpr234, implicit $vgpr235, implicit $vgpr236, implicit $vgpr237, implicit $vgpr238, implicit $vgpr239, implicit $vgpr248, implicit $vgpr249, implicit $vgpr250, implicit $vgpr251, implicit $vgpr252, implicit $vgpr253, implicit $vgpr254, implicit $vgpr255 :: (load (s1024) from %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: $m0 = S_MOV_B32 65 - ; CHECK-NEXT: $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr105, implicit $vgpr106, implicit $vgpr107, implicit $vgpr108, implicit $vgpr109, implicit $vgpr111, implicit $vgpr120, implicit $vgpr121, implicit $vgpr122, implicit $vgpr123, implicit $vgpr124, implicit $vgpr125, implicit $vgpr126, implicit $vgpr127 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: $m0 = S_MOV_B32 3 - ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 - S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr104, implicit-def $vgpr110, implicit-def $vgpr232 - S_SETPC_B64_return $sgpr30_sgpr31 -... - -# Make sure we handle register tuples correctly, even when they're straddling -# the boundary between blocks. The first mask should be 0x00000007 (the bottom -# 2 registers from the second tuple are not callee saves), the second -# 0x00000003. - ---- -name: reg_tuples -tracksRegLiveness: true -machineFunctionInfo: - stackPtrOffsetReg: $sgpr32 -body: | - bb.0: - liveins: $sgpr30_sgpr31 - ; CHECK-LABEL: name: reg_tuples - ; CHECK: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 7 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: $m0 = S_MOV_B32 3 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42, implicit-def $vgpr70_vgpr71_vgpr72_vgpr73 - ; CHECK-NEXT: $m0 = S_MOV_B32 3 - ; CHECK-NEXT: $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr74, implicit $vgpr75, implicit $vgpr76, implicit $vgpr77, implicit $vgpr78, implicit $vgpr79, implicit $vgpr88, implicit $vgpr89, implicit $vgpr90, implicit $vgpr91, implicit $vgpr92, implicit $vgpr93, implicit $vgpr94, implicit $vgpr95 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: $m0 = S_MOV_B32 7 - ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 - S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42, implicit-def $vgpr70_vgpr71_vgpr72_vgpr73 - S_SETPC_B64_return $sgpr30_sgpr31 -... - -# Make sure we don't overwrite any stack variables. - ---- -name: locals -tracksRegLiveness: true -machineFunctionInfo: - stackPtrOffsetReg: $sgpr32 -stack: -- { id: 0, type: default, offset: 0, size: 12, alignment: 4, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - local-offset: 0, debug-info-variable: '', debug-info-expression: '', - debug-info-location: '' } -- { id: 1, type: default, offset: 12, size: 20, alignment: 4, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - local-offset: 0, debug-info-variable: '', debug-info-expression: '', - debug-info-location: '' } -body: | - bb.0: - liveins: $sgpr30_sgpr31, $vgpr48 - ; CHECK-LABEL: name: locals - ; CHECK: liveins: $vgpr48, $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 1 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) - ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40 - ; CHECK-NEXT: $m0 = S_MOV_B32 1 - ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.2, align 4, addrspace 5) - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 - SCRATCH_STORE_DWORD_SADDR $vgpr48, %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) - SCRATCH_STORE_DWORD_SADDR $vgpr48, %stack.1, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) - S_NOP 0, implicit-def $vgpr40 - S_SETPC_B64_return $sgpr30_sgpr31 -... - -# Make sure we don't break SGPR or WWM handling, and also that we don't -# block-spill WWM VGPRs that have already been spilled (the mask for the block -# load/store should be 0x9 because we don't want to include v41 or v42). -# Use all VGPRs up to v40, so the WWM registers v41 and v42 and the VGPR used -# for SGPR spills remain within the block. - ---- -name: other_regs -tracksRegLiveness: true -machineFunctionInfo: - stackPtrOffsetReg: $sgpr32 - wwmReservedRegs: - - '$vgpr41' - - '$vgpr42' -body: | - bb.0: - liveins: $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40 - ; W32-LABEL: name: other_regs - ; W32: liveins: $sgpr48, $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 - ; W32-NEXT: {{ $}} - ; W32-NEXT: $sgpr0 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) - ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) - ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) - ; W32-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 - ; W32-NEXT: $m0 = S_MOV_B32 9 - ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.4, align 4, addrspace 5) - ; W32-NEXT: $vgpr44 = SI_SPILL_S32_TO_VGPR $sgpr48, 0, $vgpr44 - ; W32-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr48, implicit-def $m0, implicit-def $exec - ; W32-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40 - ; W32-NEXT: $sgpr48 = SI_RESTORE_S32_FROM_VGPR $vgpr44, 0 - ; W32-NEXT: $m0 = S_MOV_B32 9 - ; W32-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.4, align 4, addrspace 5) - ; W32-NEXT: $sgpr0 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; W32-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) - ; W32-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5) - ; W32-NEXT: $vgpr44 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5) - ; W32-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 - ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 - ; - ; W64-LABEL: name: other_regs - ; W64: liveins: $sgpr48, $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 - ; W64-NEXT: {{ $}} - ; W64-NEXT: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) - ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) - ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) - ; W64-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1 - ; W64-NEXT: $m0 = S_MOV_B32 9 - ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.4, align 4, addrspace 5) - ; W64-NEXT: $vgpr44 = SI_SPILL_S32_TO_VGPR $sgpr48, 0, $vgpr44 - ; W64-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr48, implicit-def $m0, implicit-def $exec - ; W64-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40 - ; W64-NEXT: $sgpr48 = SI_RESTORE_S32_FROM_VGPR $vgpr44, 0 - ; W64-NEXT: $m0 = S_MOV_B32 9 - ; W64-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.4, align 4, addrspace 5) - ; W64-NEXT: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; W64-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) - ; W64-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5) - ; W64-NEXT: $vgpr44 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5) - ; W64-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1 - ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 - S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr48, implicit-def $m0, implicit-def $exec - S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40 - - S_SETPC_B64_return $sgpr30_sgpr31 -... - -# Make sure we don't break anything for entry functions. - ---- -name: entry_func -tracksRegLiveness: true -machineFunctionInfo: - stackPtrOffsetReg: $sgpr32 -body: | - bb.0: - liveins: $sgpr30_sgpr31 - ; CHECK-LABEL: name: entry_func - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45, implicit-def $vgpr51 - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 - S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45, implicit-def $vgpr51 - S_SETPC_B64_return $sgpr30_sgpr31 -... - ---- -name: multiple_basic_blocks -tracksRegLiveness: true -machineFunctionInfo: - stackPtrOffsetReg: $sgpr32 -body: | - ; CHECK-LABEL: name: multiple_basic_blocks - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr44, $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 11 - ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 - ; CHECK-NEXT: S_BRANCH %bb.1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: liveins: $vgpr44, $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr43, implicit $vgpr44 - ; CHECK-NEXT: S_BRANCH %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: liveins: $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 11 - ; CHECK-NEXT: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: (load (s1024) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 - bb.0: - liveins: $sgpr30_sgpr31, $vgpr44 - S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45 - S_BRANCH %bb.1 - - bb.1: - liveins: $sgpr30_sgpr31, $vgpr44 - S_NOP 0, implicit-def $vgpr43, implicit $vgpr44 - S_BRANCH %bb.2 - - bb.2: - liveins: $sgpr30_sgpr31 - S_SETPC_B64_return $sgpr30_sgpr31 -... diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-block.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-block.ll deleted file mode 100644 index 91ad9742f7b28..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-block.ll +++ /dev/null @@ -1,93 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+block-vgpr-csr < %s | FileCheck -check-prefixes=CHECK,GISEL %s -; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+block-vgpr-csr < %s | FileCheck -check-prefixes=CHECK,DAGISEL %s - -define i32 @non_entry_func(i32 %x) { -; CHECK-LABEL: non_entry_func: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 -; CHECK-NEXT: s_wait_expcnt 0x0 -; CHECK-NEXT: s_wait_samplecnt 0x0 -; CHECK-NEXT: s_wait_bvhcnt 0x0 -; CHECK-NEXT: s_wait_kmcnt 0x0 -; CHECK-NEXT: s_xor_saveexec_b32 s0, -1 -; CHECK-NEXT: scratch_store_b32 off, v2, s32 offset:100 ; 4-byte Folded Spill -; CHECK-NEXT: s_wait_alu 0xfffe -; CHECK-NEXT: s_mov_b32 exec_lo, s0 -; CHECK-NEXT: s_mov_b32 m0, 0x110003 -; CHECK-NEXT: v_writelane_b32 v2, s48, 0 -; CHECK-NEXT: ; transferring at most v40 v41 v56 v60 ; 128-byte Folded Spill -; CHECK-NEXT: scratch_store_block off, v[40:71], s32 offset:4 -; CHECK-NEXT: s_mov_b32 m0, 1 -; CHECK-NEXT: v_mov_b32_e32 v1, v0 -; CHECK-NEXT: ; transferring at most v120 ; 128-byte Folded Spill -; CHECK-NEXT: scratch_store_block off, v[120:151], s32 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: s_nop -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: ; transferring at most v120 ; 128-byte Folded Reload -; CHECK-NEXT: scratch_load_block v[120:151], off, s32 -; CHECK-NEXT: s_mov_b32 m0, 0x110003 -; CHECK-NEXT: scratch_store_b32 off, v1, s32 offset:88 -; CHECK-NEXT: ; transferring at most v40 v41 v56 v60 ; 128-byte Folded Reload -; CHECK-NEXT: scratch_load_block v[40:71], off, s32 offset:4 -; CHECK-NEXT: v_mov_b32_e32 v0, v1 -; CHECK-NEXT: v_readlane_b32 s48, v2, 0 -; CHECK-NEXT: s_xor_saveexec_b32 s0, -1 -; CHECK-NEXT: scratch_load_b32 v2, off, s32 offset:100 ; 4-byte Folded Reload -; CHECK-NEXT: s_wait_alu 0xfffe -; CHECK-NEXT: s_mov_b32 exec_lo, s0 -; CHECK-NEXT: s_wait_loadcnt 0x0 -; CHECK-NEXT: s_setpc_b64 s[30:31] - %local = alloca i32, i32 3, addrspace(5) - store i32 %x, ptr addrspace(5) %local - call void asm "s_nop", "~{v0},~{v8},~{v40},~{v41},~{v49},~{v52},~{v56},~{v60},~{v120},~{s0},~{s48}"() - ret i32 %x -} - -define amdgpu_kernel void @entry_func(i32 %x) { -; GISEL-LABEL: entry_func: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_mov_b64 s[10:11], s[6:7] -; GISEL-NEXT: s_load_b32 s6, s[4:5], 0x0 -; GISEL-NEXT: v_mov_b32_e32 v31, v0 -; GISEL-NEXT: s_mov_b64 s[12:13], s[0:1] -; GISEL-NEXT: ;;#ASMSTART -; GISEL-NEXT: s_nop -; GISEL-NEXT: ;;#ASMEND -; GISEL-NEXT: s_add_co_u32 s8, s4, 4 -; GISEL-NEXT: s_mov_b32 s0, non_entry_func@abs32@lo -; GISEL-NEXT: s_mov_b32 s1, non_entry_func@abs32@hi -; GISEL-NEXT: s_add_co_ci_u32 s9, s5, 0 -; GISEL-NEXT: s_mov_b64 s[4:5], s[12:13] -; GISEL-NEXT: s_mov_b32 s32, 0 -; GISEL-NEXT: s_wait_kmcnt 0x0 -; GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GISEL-NEXT: s_wait_alu 0xfffe -; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GISEL-NEXT: s_endpgm -; -; DAGISEL-LABEL: entry_func: -; DAGISEL: ; %bb.0: -; DAGISEL-NEXT: s_load_b32 s12, s[4:5], 0x0 -; DAGISEL-NEXT: s_mov_b64 s[10:11], s[6:7] -; DAGISEL-NEXT: v_mov_b32_e32 v31, v0 -; DAGISEL-NEXT: s_mov_b64 s[6:7], s[0:1] -; DAGISEL-NEXT: ;;#ASMSTART -; DAGISEL-NEXT: s_nop -; DAGISEL-NEXT: ;;#ASMEND -; DAGISEL-NEXT: s_add_nc_u64 s[8:9], s[4:5], 4 -; DAGISEL-NEXT: s_mov_b32 s1, non_entry_func@abs32@hi -; DAGISEL-NEXT: s_mov_b32 s0, non_entry_func@abs32@lo -; DAGISEL-NEXT: s_mov_b64 s[4:5], s[6:7] -; DAGISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; DAGISEL-NEXT: s_mov_b32 s32, 0 -; DAGISEL-NEXT: s_wait_kmcnt 0x0 -; DAGISEL-NEXT: v_mov_b32_e32 v0, s12 -; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] -; DAGISEL-NEXT: s_endpgm - call void asm "s_nop", "~{v0},~{v8},~{v40},~{v41},~{v49},~{v52},~{v56},~{v60},~{v120},~{s0},~{s48}"() - %res = call i32 @non_entry_func(i32 %x) - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-blocks-funcinfo.mir b/llvm/test/CodeGen/AMDGPU/vgpr-blocks-funcinfo.mir deleted file mode 100644 index 6ef1c33ed18f6..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/vgpr-blocks-funcinfo.mir +++ /dev/null @@ -1,47 +0,0 @@ -# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+block-vgpr-csr -start-before=si-lower-sgpr-spills -stop-after=prologepilog -verify-machineinstrs -o - %s | FileCheck %s - -# The spill slot for the VGPR block needs to hold v40 and v43, so it needs to be -# 16 bytes large. ---- -name: locals -tracksRegLiveness: true -machineFunctionInfo: - stackPtrOffsetReg: $sgpr32 -stack: -- { id: 0, type: default, offset: 0, size: 12, alignment: 4, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - local-offset: 0, debug-info-variable: '', debug-info-expression: '', - debug-info-location: '' } -- { id: 1, type: default, offset: 12, size: 20, alignment: 4, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - local-offset: 0, debug-info-variable: '', debug-info-expression: '', - debug-info-location: '' } -# CHECK-LABEL: name: locals -# CHECK: frameInfo: -# CHECK: stackSize: 52 -# CHECK: stack: -# CHECK-NEXT: - { id: 0, name: '', type: default, offset: 16, size: 12, alignment: 4, -# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, -# CHECK-NEXT: local-offset: 0, debug-info-variable: '', debug-info-expression: '', -# CHECK-NEXT: debug-info-location: '' } -# CHECK-NEXT: - { id: 1, name: '', type: default, offset: 28, size: 20, alignment: 4, -# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, -# CHECK-NEXT: local-offset: 0, debug-info-variable: '', debug-info-expression: '', -# CHECK-NEXT: debug-info-location: '' } -# CHECK-NEXT: - { id: 2, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4, -# CHECK-NEXT: stack-id: default, callee-saved-register: '$vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71', -# CHECK-NEXT: callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', -# CHECK-NEXT: debug-info-location: '' } -# CHECK-NEXT: - { id: 3, name: '', type: default, offset: 48, size: 4, alignment: 4, -# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, -# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -# CHECK: machineFunctionInfo: -# CHECK: hasSpilledVGPRs: true -body: | - bb.0: - liveins: $sgpr30_sgpr31, $vgpr48 - SCRATCH_STORE_DWORD_SADDR $vgpr48, %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) - SCRATCH_STORE_DWORD_SADDR $vgpr48, %stack.1, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) - S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr43 - S_SETPC_B64_return $sgpr30_sgpr31 -... diff --git a/llvm/unittests/Target/AMDGPU/CMakeLists.txt b/llvm/unittests/Target/AMDGPU/CMakeLists.txt index d6cbaf3f3fb5d..6d6f17883a07e 100644 --- a/llvm/unittests/Target/AMDGPU/CMakeLists.txt +++ b/llvm/unittests/Target/AMDGPU/CMakeLists.txt @@ -23,6 +23,5 @@ add_llvm_target_unittest(AMDGPUTests CSETest.cpp DwarfRegMappings.cpp ExecMayBeModifiedBeforeAnyUse.cpp - LiveRegUnits.cpp PALMetadata.cpp ) diff --git a/llvm/unittests/Target/AMDGPU/LiveRegUnits.cpp b/llvm/unittests/Target/AMDGPU/LiveRegUnits.cpp deleted file mode 100644 index 95266dc853bfd..0000000000000 --- a/llvm/unittests/Target/AMDGPU/LiveRegUnits.cpp +++ /dev/null @@ -1,160 +0,0 @@ -//===--------- llvm/unittests/Target/AMDGPU/LiveRegUnits.cpp --------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "AMDGPUTargetMachine.h" -#include "AMDGPUUnitTests.h" -#include "GCNSubtarget.h" -#include "llvm/CodeGen/MIRParser/MIRParser.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/MC/TargetRegistry.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/TargetParser/TargetParser.h" -#include "gtest/gtest.h" - -#include "AMDGPUGenSubtargetInfo.inc" - -using namespace llvm; - -// FIXME: Consolidate parseMIR and other common helpers (this one is copied from -// unittests/MIR/MachineMetadata.cpp). -std::unique_ptr<Module> parseMIR(LLVMContext &Context, const TargetMachine &TM, - StringRef MIRCode, const char *FnName, - MachineModuleInfo &MMI) { - SMDiagnostic Diagnostic; - std::unique_ptr<MemoryBuffer> MBuffer = MemoryBuffer::getMemBuffer(MIRCode); - auto MIR = createMIRParser(std::move(MBuffer), Context); - if (!MIR) - return nullptr; - - std::unique_ptr<Module> Mod = MIR->parseIRModule(); - if (!Mod) - return nullptr; - - Mod->setDataLayout(TM.createDataLayout()); - - if (MIR->parseMachineFunctions(*Mod, MMI)) { - return nullptr; - } - - return Mod; -} - -TEST(AMDGPULiveRegUnits, TestVGPRBlockLoadStore) { - auto TM = createAMDGPUTargetMachine("amdgcn-amd-", "gfx1200", ""); - ASSERT_TRUE(TM) << "No target machine"; - - GCNSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()), - std::string(TM->getTargetFeatureString()), *TM); - - // Add a very simple MIR snippet that saves and restores a block of VGPRs. The - // body of the function, represented by a S_NOP, clobbers one CSR (v42) and - // one caller-saved register (v49), and reads one CSR (v61) and one - // callee-saved register (v53). - StringRef MIRString = R"MIR( -name: vgpr-block-insts -stack: -- { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4, - stack-id: default, callee-saved-register: '$vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71', - callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', - debug-info-location: '' } -body: | - bb.0: - liveins: $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 - - $m0 = S_MOV_B32 1 - SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5) - S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr49, implicit $vgpr53, implicit $vgpr61 - $m0 = S_MOV_B32 1 - $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: (load (s1024) from %stack.0, align 4, addrspace 5) - S_SETPC_B64_return $sgpr30_sgpr31 -... -)MIR"; - - LLVMContext Context; - MachineModuleInfo MMI(TM.get()); - auto M = parseMIR(Context, *TM, MIRString, "vgpr-block-insts", MMI); - - auto *MF = MMI.getMachineFunction(*M->getFunction("vgpr-block-insts")); - auto *MBB = MF->getBlockNumbered(0); - - auto MIt = --MBB->instr_end(); - - LiveRegUnits LiveUnits; - LiveUnits.init(*ST.getRegisterInfo()); - - LiveUnits.addLiveOuts(*MBB); - LiveUnits.stepBackward(*MIt); - - // Right after the restore, we expect all the CSRs to be unavailable. - // Check v40-v88 (callee and caller saved regs interleaved in blocks of 8). - for (unsigned I = 0; I < 8; ++I) { - EXPECT_FALSE(LiveUnits.available(AMDGPU::VGPR40 + I)) << "I = " << I; - EXPECT_TRUE(LiveUnits.available(AMDGPU::VGPR48 + I)) << "I = " << I; - EXPECT_FALSE(LiveUnits.available(AMDGPU::VGPR56 + I)) << "I = " << I; - EXPECT_TRUE(LiveUnits.available(AMDGPU::VGPR64 + I)) << "I = " << I; - EXPECT_FALSE(LiveUnits.available(AMDGPU::VGPR72 + I)) << "I = " << I; - EXPECT_TRUE(LiveUnits.available(AMDGPU::VGPR80 + I)) << "I = " << I; - } - - --MIt; - LiveUnits.stepBackward(*MIt); - - // Right before the restore, we expect the CSRs that are actually transferred - // (in this case v42) to be available. Everything else should be the same as - // before. - for (unsigned I = 0; I < 8; ++I) { - if (I == 2) - EXPECT_TRUE(LiveUnits.available(AMDGPU::VGPR40 + I)) << "I = " << I; - else - EXPECT_FALSE(LiveUnits.available(AMDGPU::VGPR40 + I)) << "I = " << I; - EXPECT_TRUE(LiveUnits.available(AMDGPU::VGPR48 + I)) << "I = " << I; - EXPECT_FALSE(LiveUnits.available(AMDGPU::VGPR56 + I)) << "I = " << I; - EXPECT_TRUE(LiveUnits.available(AMDGPU::VGPR64 + I)) << "I = " << I; - EXPECT_FALSE(LiveUnits.available(AMDGPU::VGPR72 + I)) << "I = " << I; - EXPECT_TRUE(LiveUnits.available(AMDGPU::VGPR80 + I)) << "I = " << I; - } - - --MIt; // Set m0 has no effect on VGPRs. - LiveUnits.stepBackward(*MIt); - --MIt; // S_NOP. - LiveUnits.stepBackward(*MIt); - - // The S_NOP uses one of the caller-saved registers (v53), so that won't be - // available anymore. - for (unsigned I = 0; I < 8; ++I) { - if (I == 2) - EXPECT_TRUE(LiveUnits.available(AMDGPU::VGPR40 + I)) << "I = " << I; - else - EXPECT_FALSE(LiveUnits.available(AMDGPU::VGPR40 + I)) << "I = " << I; - if (I == 5) - EXPECT_FALSE(LiveUnits.available(AMDGPU::VGPR48 + I)) << "I = " << I; - else - EXPECT_TRUE(LiveUnits.available(AMDGPU::VGPR48 + I)) << "I = " << I; - EXPECT_FALSE(LiveUnits.available(AMDGPU::VGPR56 + I)) << "I = " << I; - EXPECT_TRUE(LiveUnits.available(AMDGPU::VGPR64 + I)) << "I = " << I; - EXPECT_FALSE(LiveUnits.available(AMDGPU::VGPR72 + I)) << "I = " << I; - EXPECT_TRUE(LiveUnits.available(AMDGPU::VGPR80 + I)) << "I = " << I; - } - - --MIt; - LiveUnits.stepBackward(*MIt); - - // Right before the save, all the VGPRs in the block that we're saving will be - // unavailable, regardless of whether they're callee or caller saved. This is - // unfortunate and should probably be fixed somehow. - // VGPRs outside the block will only be unavailable if they're callee saved. - for (unsigned I = 0; I < 8; ++I) { - EXPECT_FALSE(LiveUnits.available(AMDGPU::VGPR40 + I)) << "I = " << I; - EXPECT_FALSE(LiveUnits.available(AMDGPU::VGPR48 + I)) << "I = " << I; - EXPECT_FALSE(LiveUnits.available(AMDGPU::VGPR56 + I)) << "I = " << I; - EXPECT_FALSE(LiveUnits.available(AMDGPU::VGPR64 + I)) << "I = " << I; - EXPECT_FALSE(LiveUnits.available(AMDGPU::VGPR72 + I)) << "I = " << I; - EXPECT_TRUE(LiveUnits.available(AMDGPU::VGPR80 + I)) << "I = " << I; - } -} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits