I thought that doing S_CMPK followed by S_CBRANCH has less overhead than doing a memory read. If we used one of S_BUFFER_LOAD_DWORDX2,4,8,16, it wouldn't be so bad. I don't know.
Marek On Wed, Oct 30, 2013 at 2:48 PM, Christian König <deathsim...@vodafone.de> wrote: > Am 30.10.2013 14:23, schrieb Marek Olšák: > >> From: Marek Olšák <marek.ol...@amd.com> >> >> This also fixes scalar compare instructions which were always eliminated, >> because they didn't have a destination of SCC. > > > Uff, that looks like quite a bit of overhead, isn't there a simpler > approach? Like setting the the NumRecord to one and letting unused constants > pointing to a dummy buffer or soemthing like this? > > Christian. > > >> >> Signed-off-by: Marek Olšák <marek.ol...@amd.com> >> --- >> lib/Target/R600/SIISelLowering.cpp | 30 ++++++++++++++++++++++++++---- >> lib/Target/R600/SIInsertWaits.cpp | 6 ++++++ >> lib/Target/R600/SIInstrInfo.td | 5 +++++ >> lib/Target/R600/SIInstructions.td | 26 +++++++++++++++----------- >> 4 files changed, 52 insertions(+), 15 deletions(-) >> >> diff --git a/lib/Target/R600/SIISelLowering.cpp >> b/lib/Target/R600/SIISelLowering.cpp >> index 371572e..e9f4035 100644 >> --- a/lib/Target/R600/SIISelLowering.cpp >> +++ b/lib/Target/R600/SIISelLowering.cpp >> @@ -14,6 +14,7 @@ >> #include "SIISelLowering.h" >> #include "AMDGPU.h" >> +#include "AMDGPUSubtarget.h" >> #include "AMDILIntrinsicInfo.h" >> #include "SIInstrInfo.h" >> #include "SIMachineFunctionInfo.h" >> @@ -302,14 +303,37 @@ MachineBasicBlock * >> SITargetLowering::EmitInstrWithCustomInserter( >> MachineInstr * MI, MachineBasicBlock * BB) const { >> MachineBasicBlock::iterator I = *MI; >> + const SIInstrInfo *TII = >> + static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); >> + >> + // Sea Islands must conditionally execute SMRD instructions depending >> + // on the value of SQ_BUF_RSRC_WORD2.NUM_RECORDS, because the hardware >> + // doesn't skip the instructions if NUM_RECORDS is 0. >> + if (TII->isSMRD(MI->getOpcode())) { >> + if >> (getTargetMachine().getSubtarget<AMDGPUSubtarget>().getGeneration() != >> + AMDGPUSubtarget::SEA_ISLANDS) >> + return BB; >> + >> + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); >> + unsigned NumRecords = >> MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); >> + >> + // XXX should we save and restore the SCC register? >> + BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::COPY), >> NumRecords) >> + .addReg(MI->getOperand(1).getReg(), 0, AMDGPU::sub2); >> + BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_CMPK_EQ_U32), >> AMDGPU::SCC) >> + .addReg(NumRecords) >> + .addImm(0); >> + BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_CBRANCH_SCC1)) >> + .addImm(1) >> + .addReg(AMDGPU::SCC); >> + return BB; >> + } >> switch (MI->getOpcode()) { >> default: >> return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); >> case AMDGPU::BRANCH: return BB; >> case AMDGPU::SI_ADDR64_RSRC: { >> - const SIInstrInfo *TII = >> - static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); >> MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); >> unsigned SuperReg = MI->getOperand(0).getReg(); >> unsigned SubRegLo = >> MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); >> @@ -336,8 +360,6 @@ MachineBasicBlock * >> SITargetLowering::EmitInstrWithCustomInserter( >> break; >> } >> case AMDGPU::V_SUB_F64: { >> - const SIInstrInfo *TII = >> - static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); >> BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), >> MI->getOperand(0).getReg()) >> .addReg(MI->getOperand(1).getReg()) >> diff --git a/lib/Target/R600/SIInsertWaits.cpp >> b/lib/Target/R600/SIInsertWaits.cpp >> index 7e42fb7..2e47346 100644 >> --- a/lib/Target/R600/SIInsertWaits.cpp >> +++ b/lib/Target/R600/SIInsertWaits.cpp >> @@ -294,6 +294,12 @@ bool SIInsertWaits::insertWait(MachineBasicBlock >> &MBB, >> if (Counts.Named.EXP == 0) >> ExpInstrTypesSeen = 0; >> + // Ensure S_WAITCNT is inserted before S_CBRANCH. >> + MachineBasicBlock::iterator beforeI = I; >> + --beforeI; >> + if (beforeI->getOpcode() == AMDGPU::S_CBRANCH_SCC1) >> + I = beforeI; >> + >> // Build the wait instruction >> BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)) >> .addImm((Counts.Named.VM & 0xF) | >> diff --git a/lib/Target/R600/SIInstrInfo.td >> b/lib/Target/R600/SIInstrInfo.td >> index ed42a2a..9567879 100644 >> --- a/lib/Target/R600/SIInstrInfo.td >> +++ b/lib/Target/R600/SIInstrInfo.td >> @@ -177,6 +177,11 @@ class SOPC_32 <bits<7> op, string opName, list<dag> >> pattern> : SOPC < >> opName#" $dst, $src0, $src1", pattern >> >; >> +class SOPCK_32 <bits<7> op, string opName, list<dag> pattern> : SOPC < >> + op, (outs SCCReg:$dst), (ins SReg_32:$src0, i16imm:$src1), >> + opName#" $dst, $src0, $src1", pattern >> +>; >> + >> class SOPC_64 <bits<7> op, string opName, list<dag> pattern> : SOPC < >> op, (outs SCCReg:$dst), (ins SSrc_64:$src0, SSrc_64:$src1), >> opName#" $dst, $src0, $src1", pattern >> diff --git a/lib/Target/R600/SIInstructions.td >> b/lib/Target/R600/SIInstructions.td >> index 048c157..1b275a7 100644 >> --- a/lib/Target/R600/SIInstructions.td >> +++ b/lib/Target/R600/SIInstructions.td >> @@ -115,17 +115,17 @@ def S_CMPK_EQ_I32 : SOPK < >> */ >> let isCompare = 1 in { >> -def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>; >> -def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>; >> -def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>; >> -def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>; >> -def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>; >> -def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>; >> -def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>; >> -def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>; >> -def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>; >> -def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>; >> -def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>; >> +def S_CMPK_LG_I32 : SOPCK_32 <0x00000004, "S_CMPK_LG_I32", []>; >> +def S_CMPK_GT_I32 : SOPCK_32 <0x00000005, "S_CMPK_GT_I32", []>; >> +def S_CMPK_GE_I32 : SOPCK_32 <0x00000006, "S_CMPK_GE_I32", []>; >> +def S_CMPK_LT_I32 : SOPCK_32 <0x00000007, "S_CMPK_LT_I32", []>; >> +def S_CMPK_LE_I32 : SOPCK_32 <0x00000008, "S_CMPK_LE_I32", []>; >> +def S_CMPK_EQ_U32 : SOPCK_32 <0x00000009, "S_CMPK_EQ_U32", []>; >> +def S_CMPK_LG_U32 : SOPCK_32 <0x0000000a, "S_CMPK_LG_U32", []>; >> +def S_CMPK_GT_U32 : SOPCK_32 <0x0000000b, "S_CMPK_GT_U32", []>; >> +def S_CMPK_GE_U32 : SOPCK_32 <0x0000000c, "S_CMPK_GE_U32", []>; >> +def S_CMPK_LT_U32 : SOPCK_32 <0x0000000d, "S_CMPK_LT_U32", []>; >> +def S_CMPK_LE_U32 : SOPCK_32 <0x0000000e, "S_CMPK_LE_U32", []>; >> } // End isCompare = 1 >> def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>; >> @@ -492,6 +492,8 @@ defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, >> "S_LOAD_DWORDX4", SReg_64, SReg_128>; >> defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, >> SReg_256>; >> defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, >> SReg_512>; >> +let usesCustomInserter = 1 in { >> + >> defm S_BUFFER_LOAD_DWORD : SMRD_Helper < >> 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32 >> >; >> @@ -512,6 +514,8 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < >> 0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512 >> >; >> +} // usesCustomInserter = 1 >> + >> } // mayLoad = 1 >> //def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>; > > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev