From: Marek Olšák <marek.ol...@amd.com> This also fixes scalar compare instructions which were always eliminated, because they didn't have a destination of SCC.
Signed-off-by: Marek Olšák <marek.ol...@amd.com> --- lib/Target/R600/SIISelLowering.cpp | 30 ++++++++++++++++++++++++++---- lib/Target/R600/SIInsertWaits.cpp | 6 ++++++ lib/Target/R600/SIInstrInfo.td | 5 +++++ lib/Target/R600/SIInstructions.td | 26 +++++++++++++++----------- 4 files changed, 52 insertions(+), 15 deletions(-) diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 371572e..e9f4035 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -14,6 +14,7 @@ #include "SIISelLowering.h" #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "AMDILIntrinsicInfo.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" @@ -302,14 +303,37 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MachineInstr * MI, MachineBasicBlock * BB) const { MachineBasicBlock::iterator I = *MI; + const SIInstrInfo *TII = + static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); + + // Sea Islands must conditionally execute SMRD instructions depending + // on the value of SQ_BUF_RSRC_WORD2.NUM_RECORDS, because the hardware + // doesn't skip the instructions if NUM_RECORDS is 0. + if (TII->isSMRD(MI->getOpcode())) { + if (getTargetMachine().getSubtarget<AMDGPUSubtarget>().getGeneration() != + AMDGPUSubtarget::SEA_ISLANDS) + return BB; + + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + unsigned NumRecords = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + + // XXX should we save and restore the SCC register? + BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::COPY), NumRecords) + .addReg(MI->getOperand(1).getReg(), 0, AMDGPU::sub2); + BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_CMPK_EQ_U32), AMDGPU::SCC) + .addReg(NumRecords) + .addImm(0); + BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_CBRANCH_SCC1)) + .addImm(1) + .addReg(AMDGPU::SCC); + return BB; + } switch (MI->getOpcode()) { default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); case AMDGPU::BRANCH: return BB; case AMDGPU::SI_ADDR64_RSRC: { - const SIInstrInfo *TII = - static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); unsigned SuperReg = MI->getOperand(0).getReg(); unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); @@ -336,8 +360,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( break; } case AMDGPU::V_SUB_F64: { - const SIInstrInfo *TII = - static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index 7e42fb7..2e47346 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -294,6 +294,12 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB, if (Counts.Named.EXP == 0) ExpInstrTypesSeen = 0; + // Ensure S_WAITCNT is inserted before S_CBRANCH. + MachineBasicBlock::iterator beforeI = I; + --beforeI; + if (beforeI->getOpcode() == AMDGPU::S_CBRANCH_SCC1) + I = beforeI; + // Build the wait instruction BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)) .addImm((Counts.Named.VM & 0xF) | diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index ed42a2a..9567879 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -177,6 +177,11 @@ class SOPC_32 <bits<7> op, string opName, list<dag> pattern> : SOPC < opName#" $dst, $src0, $src1", pattern >; +class SOPCK_32 <bits<7> op, string opName, list<dag> pattern> : SOPC < + op, (outs SCCReg:$dst), (ins SReg_32:$src0, i16imm:$src1), + opName#" $dst, $src0, $src1", pattern +>; + class SOPC_64 <bits<7> op, string opName, list<dag> pattern> : SOPC < op, (outs SCCReg:$dst), (ins SSrc_64:$src0, SSrc_64:$src1), opName#" $dst, $src0, $src1", pattern diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 048c157..1b275a7 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -115,17 +115,17 @@ def S_CMPK_EQ_I32 : SOPK < */ let isCompare = 1 in { -def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>; -def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>; -def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>; -def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>; -def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>; -def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>; -def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>; -def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>; -def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>; -def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>; -def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>; +def S_CMPK_LG_I32 : SOPCK_32 <0x00000004, "S_CMPK_LG_I32", []>; +def S_CMPK_GT_I32 : SOPCK_32 <0x00000005, "S_CMPK_GT_I32", []>; +def S_CMPK_GE_I32 : SOPCK_32 <0x00000006, "S_CMPK_GE_I32", []>; +def S_CMPK_LT_I32 : SOPCK_32 <0x00000007, "S_CMPK_LT_I32", []>; +def S_CMPK_LE_I32 : SOPCK_32 <0x00000008, "S_CMPK_LE_I32", []>; +def S_CMPK_EQ_U32 : SOPCK_32 <0x00000009, "S_CMPK_EQ_U32", []>; +def S_CMPK_LG_U32 : SOPCK_32 <0x0000000a, "S_CMPK_LG_U32", []>; +def S_CMPK_GT_U32 : SOPCK_32 <0x0000000b, "S_CMPK_GT_U32", []>; +def S_CMPK_GE_U32 : SOPCK_32 <0x0000000c, "S_CMPK_GE_U32", []>; +def S_CMPK_LT_U32 : SOPCK_32 <0x0000000d, "S_CMPK_LT_U32", []>; +def S_CMPK_LE_U32 : SOPCK_32 <0x0000000e, "S_CMPK_LE_U32", []>; } // End isCompare = 1 def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>; @@ -492,6 +492,8 @@ defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>; defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>; defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>; +let usesCustomInserter = 1 in { + defm S_BUFFER_LOAD_DWORD : SMRD_Helper < 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32 >; @@ -512,6 +514,8 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < 0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512 >; +} // usesCustomInserter = 1 + } // mayLoad = 1 //def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>; -- 1.8.1.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev