Re: [Mesa-dev] [PATCH] R600: Control Flow support for pre EG gen
On Sun, Apr 07, 2013 at 09:43:43PM +0200, Vincent Lejeune wrote: Reviewed-by: Tom Stellard > --- > lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 30 ++-- > lib/Target/R600/R600ControlFlowFinalizer.cpp | 84 +++-- > lib/Target/R600/R600Instructions.td| 198 > +++-- > 3 files changed, 240 insertions(+), 72 deletions(-) > > diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp > b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp > index 927bcbd..469a8ad 100644 > --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp > +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp > @@ -266,17 +266,27 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst > &MI, raw_ostream &OS, >Emit(Inst, OS); >break; > } > -case AMDGPU::CF_TC: > -case AMDGPU::CF_VC: > -case AMDGPU::CF_CALL_FS: > +case AMDGPU::CF_TC_EG: > +case AMDGPU::CF_VC_EG: > +case AMDGPU::CF_CALL_FS_EG: > +case AMDGPU::CF_TC_R600: > +case AMDGPU::CF_VC_R600: > +case AMDGPU::CF_CALL_FS_R600: >return; > -case AMDGPU::WHILE_LOOP: > -case AMDGPU::END_LOOP: > -case AMDGPU::LOOP_BREAK: > -case AMDGPU::CF_CONTINUE: > -case AMDGPU::CF_JUMP: > -case AMDGPU::CF_ELSE: > -case AMDGPU::POP: { > +case AMDGPU::WHILE_LOOP_EG: > +case AMDGPU::END_LOOP_EG: > +case AMDGPU::LOOP_BREAK_EG: > +case AMDGPU::CF_CONTINUE_EG: > +case AMDGPU::CF_JUMP_EG: > +case AMDGPU::CF_ELSE_EG: > +case AMDGPU::POP_EG: > +case AMDGPU::WHILE_LOOP_R600: > +case AMDGPU::END_LOOP_R600: > +case AMDGPU::LOOP_BREAK_R600: > +case AMDGPU::CF_CONTINUE_R600: > +case AMDGPU::CF_JUMP_R600: > +case AMDGPU::CF_ELSE_R600: > +case AMDGPU::POP_R600: { >uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); >EmitByte(INSTR_NATIVE, OS); >Emit(Inst, OS); > diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp > b/lib/Target/R600/R600ControlFlowFinalizer.cpp > index 3a6c7ea..cfaa36e 100644 > --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp > +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp > @@ -30,9 +30,22 @@ namespace llvm { > class R600ControlFlowFinalizer : public MachineFunctionPass { > > private: > + enum ControlFlowInstruction { > +CF_TC, > +CF_CALL_FS, > +CF_WHILE_LOOP, > +CF_END_LOOP, > +CF_LOOP_BREAK, > +CF_LOOP_CONTINUE, > +CF_JUMP, > +CF_ELSE, > +CF_POP > + }; > + >static char ID; >const R600InstrInfo *TII; >unsigned MaxFetchInst; > + const AMDGPUSubtarget &ST; > >bool isFetch(const MachineInstr *MI) const { > switch (MI->getOpcode()) { > @@ -70,6 +83,52 @@ private: > } >} > > + const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { > +if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) { > + switch (CFI) { > + case CF_TC: > +return TII->get(AMDGPU::CF_TC_R600); > + case CF_CALL_FS: > +return TII->get(AMDGPU::CF_CALL_FS_R600); > + case CF_WHILE_LOOP: > +return TII->get(AMDGPU::WHILE_LOOP_R600); > + case CF_END_LOOP: > +return TII->get(AMDGPU::END_LOOP_R600); > + case CF_LOOP_BREAK: > +return TII->get(AMDGPU::LOOP_BREAK_R600); > + case CF_LOOP_CONTINUE: > +return TII->get(AMDGPU::CF_CONTINUE_R600); > + case CF_JUMP: > +return TII->get(AMDGPU::CF_JUMP_R600); > + case CF_ELSE: > +return TII->get(AMDGPU::CF_ELSE_R600); > + case CF_POP: > +return TII->get(AMDGPU::POP_R600); > + } > +} else { > + switch (CFI) { > + case CF_TC: > +return TII->get(AMDGPU::CF_TC_EG); > + case CF_CALL_FS: > +return TII->get(AMDGPU::CF_CALL_FS_EG); > + case CF_WHILE_LOOP: > +return TII->get(AMDGPU::WHILE_LOOP_EG); > + case CF_END_LOOP: > +return TII->get(AMDGPU::END_LOOP_EG); > + case CF_LOOP_BREAK: > +return TII->get(AMDGPU::LOOP_BREAK_EG); > + case CF_LOOP_CONTINUE: > +return TII->get(AMDGPU::CF_CONTINUE_EG); > + case CF_JUMP: > +return TII->get(AMDGPU::CF_JUMP_EG); > + case CF_ELSE: > +return TII->get(AMDGPU::CF_ELSE_EG); > + case CF_POP: > +return TII->get(AMDGPU::POP_EG); > + } > +} > + } > + >MachineBasicBlock::iterator >MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, >unsigned CfAddress) const { > @@ -85,7 +144,7 @@ private: > break; > } > BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), > -TII->get(AMDGPU::CF_TC)) > +getHWInstrDesc(CF_TC)) > .addImm(CfAddress) // ADDR > .addImm(AluInstCount); // COUNT > return I; > @@ -104,7 +163,8 @@ private: > > public: >R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID), > -TII (static_cast(tm.getInstrInfo())) { > +TII (static_cast(tm.getInstr
[Mesa-dev] [PATCH] R600: Control Flow support for pre EG gen
--- lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 30 ++-- lib/Target/R600/R600ControlFlowFinalizer.cpp | 84 +++-- lib/Target/R600/R600Instructions.td| 198 +++-- 3 files changed, 240 insertions(+), 72 deletions(-) diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index 927bcbd..469a8ad 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -266,17 +266,27 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, Emit(Inst, OS); break; } -case AMDGPU::CF_TC: -case AMDGPU::CF_VC: -case AMDGPU::CF_CALL_FS: +case AMDGPU::CF_TC_EG: +case AMDGPU::CF_VC_EG: +case AMDGPU::CF_CALL_FS_EG: +case AMDGPU::CF_TC_R600: +case AMDGPU::CF_VC_R600: +case AMDGPU::CF_CALL_FS_R600: return; -case AMDGPU::WHILE_LOOP: -case AMDGPU::END_LOOP: -case AMDGPU::LOOP_BREAK: -case AMDGPU::CF_CONTINUE: -case AMDGPU::CF_JUMP: -case AMDGPU::CF_ELSE: -case AMDGPU::POP: { +case AMDGPU::WHILE_LOOP_EG: +case AMDGPU::END_LOOP_EG: +case AMDGPU::LOOP_BREAK_EG: +case AMDGPU::CF_CONTINUE_EG: +case AMDGPU::CF_JUMP_EG: +case AMDGPU::CF_ELSE_EG: +case AMDGPU::POP_EG: +case AMDGPU::WHILE_LOOP_R600: +case AMDGPU::END_LOOP_R600: +case AMDGPU::LOOP_BREAK_R600: +case AMDGPU::CF_CONTINUE_R600: +case AMDGPU::CF_JUMP_R600: +case AMDGPU::CF_ELSE_R600: +case AMDGPU::POP_R600: { uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); EmitByte(INSTR_NATIVE, OS); Emit(Inst, OS); diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index 3a6c7ea..cfaa36e 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -30,9 +30,22 @@ namespace llvm { class R600ControlFlowFinalizer : public MachineFunctionPass { private: + enum ControlFlowInstruction { +CF_TC, +CF_CALL_FS, +CF_WHILE_LOOP, +CF_END_LOOP, +CF_LOOP_BREAK, +CF_LOOP_CONTINUE, +CF_JUMP, +CF_ELSE, +CF_POP + }; + static char ID; const R600InstrInfo *TII; unsigned MaxFetchInst; + const AMDGPUSubtarget &ST; bool isFetch(const MachineInstr *MI) const { switch (MI->getOpcode()) { @@ -70,6 +83,52 @@ private: } } + const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { +if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) { + switch (CFI) { + case CF_TC: +return TII->get(AMDGPU::CF_TC_R600); + case CF_CALL_FS: +return TII->get(AMDGPU::CF_CALL_FS_R600); + case CF_WHILE_LOOP: +return TII->get(AMDGPU::WHILE_LOOP_R600); + case CF_END_LOOP: +return TII->get(AMDGPU::END_LOOP_R600); + case CF_LOOP_BREAK: +return TII->get(AMDGPU::LOOP_BREAK_R600); + case CF_LOOP_CONTINUE: +return TII->get(AMDGPU::CF_CONTINUE_R600); + case CF_JUMP: +return TII->get(AMDGPU::CF_JUMP_R600); + case CF_ELSE: +return TII->get(AMDGPU::CF_ELSE_R600); + case CF_POP: +return TII->get(AMDGPU::POP_R600); + } +} else { + switch (CFI) { + case CF_TC: +return TII->get(AMDGPU::CF_TC_EG); + case CF_CALL_FS: +return TII->get(AMDGPU::CF_CALL_FS_EG); + case CF_WHILE_LOOP: +return TII->get(AMDGPU::WHILE_LOOP_EG); + case CF_END_LOOP: +return TII->get(AMDGPU::END_LOOP_EG); + case CF_LOOP_BREAK: +return TII->get(AMDGPU::LOOP_BREAK_EG); + case CF_LOOP_CONTINUE: +return TII->get(AMDGPU::CF_CONTINUE_EG); + case CF_JUMP: +return TII->get(AMDGPU::CF_JUMP_EG); + case CF_ELSE: +return TII->get(AMDGPU::CF_ELSE_EG); + case CF_POP: +return TII->get(AMDGPU::POP_EG); + } +} + } + MachineBasicBlock::iterator MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned CfAddress) const { @@ -85,7 +144,7 @@ private: break; } BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), -TII->get(AMDGPU::CF_TC)) +getHWInstrDesc(CF_TC)) .addImm(CfAddress) // ADDR .addImm(AluInstCount); // COUNT return I; @@ -104,7 +163,8 @@ private: public: R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID), -TII (static_cast(tm.getInstrInfo())) { +TII (static_cast(tm.getInstrInfo())), +ST(tm.getSubtarget()) { const AMDGPUSubtarget &ST = tm.getSubtarget(); if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) MaxFetchInst = 8; @@ -124,7 +184,7 @@ public: R600MachineFunctionInfo *MFI = MF.getInfo(); if (MFI->ShaderType == 1) { BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), -TII->g