[Mesa-dev] [PATCH 2/2] R600: Emit CF_ALU and use true kcache register.
--- lib/Target/R600/AMDGPU.h | 1 + lib/Target/R600/AMDGPUTargetMachine.cpp| 1 + lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 10 +- lib/Target/R600/R600EmitClauseMarkers.cpp | 243 + lib/Target/R600/R600Instructions.td| 83 ++- lib/Target/R600/R600RegisterInfo.td| 63 ++ 6 files changed, 389 insertions(+), 12 deletions(-) create mode 100644 lib/Target/R600/R600EmitClauseMarkers.cpp diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index e099a9f..3cd792a 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -23,6 +23,7 @@ class AMDGPUTargetMachine; // R600 Passes FunctionPass* createR600KernelParametersPass(const DataLayout *TD); FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine tm); +FunctionPass *createR600EmitClauseMarkers(TargetMachine tm); // SI Passes FunctionPass *createSIAnnotateControlFlowPass(); diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 0185747..45b1be0 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -151,6 +151,7 @@ bool AMDGPUPassConfig::addPreEmitPass() { if (ST.device()-getGeneration() = AMDGPUDeviceInfo::HD6XXX) { addPass(createAMDGPUCFGPreparationPass(*TM)); addPass(createAMDGPUCFGStructurizerPass(*TM)); +addPass(createR600EmitClauseMarkers(*TM)); addPass(createR600ExpandSpecialInstrsPass(*TM)); addPass(FinalizeMachineBundlesID); } else { diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index 00ebb44..cf43f3f 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -101,7 +101,8 @@ enum InstrTypes { INSTR_FC, INSTR_NATIVE, INSTR_VTX, - INSTR_EXPORT + INSTR_EXPORT, + INSTR_CFALU }; enum FCInstr { @@ -250,6 +251,13 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst MI, raw_ostream OS, Emit(Inst, OS); break; } +case AMDGPU::CF_ALU: +case AMDGPU::CF_ALU_PUSH_BEFORE: { + uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); + EmitByte(INSTR_CFALU, OS); + Emit(Inst, OS); + break; +} default: EmitALUInstr(MI, Fixups, OS); diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp new file mode 100644 index 000..b869c88 --- /dev/null +++ b/lib/Target/R600/R600EmitClauseMarkers.cpp @@ -0,0 +1,243 @@ +//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// +// +/// \file +/// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold +/// 128 Alu instructions ; these instructions can access up to 4 prefetched +/// 4 lines of 16 registers from constant buffers. Such ALU clauses are +/// initiated by CF_ALU instructions. +//===--===// + +#include AMDGPU.h +#include R600Defines.h +#include R600InstrInfo.h +#include R600MachineFunctionInfo.h +#include R600RegisterInfo.h +#include llvm/CodeGen/MachineFunctionPass.h +#include llvm/CodeGen/MachineInstrBuilder.h +#include llvm/CodeGen/MachineRegisterInfo.h + +namespace llvm { + +class R600EmitClauseMarkersPass : public MachineFunctionPass { + +private: + static char ID; + const R600InstrInfo *TII; + + unsigned OccupiedDwords(MachineInstr *MI) const { +switch (MI-getOpcode()) { +case AMDGPU::INTERP_PAIR_XY: +case AMDGPU::INTERP_PAIR_ZW: +case AMDGPU::INTERP_VEC_LOAD: +case AMDGPU::DOT4_eg_pseudo: +case AMDGPU::DOT4_r600_pseudo: + return 4; +case AMDGPU::KILL: + return 0; +default: + break; +} + +if(TII-isVector(*MI) || +TII-isCubeOp(MI-getOpcode()) || +TII-isReductionOp(MI-getOpcode())) + return 4; + +unsigned NumLiteral = 0; +for (MachineInstr::mop_iterator It = MI-operands_begin(), +E = MI-operands_end(); It != E; ++It) { + MachineOperand MO = *It; + if (MO.isReg() MO.getReg() == AMDGPU::ALU_LITERAL_X) +++NumLiteral; +} +return 1 + NumLiteral; + } + + bool isALU(const MachineInstr *MI) const { +if (TII-isALUInstr(MI-getOpcode())) + return true; +if (TII-isVector(*MI) || TII-isCubeOp(MI-getOpcode())) + return true; +switch (MI-getOpcode()) { +case AMDGPU::INTERP_PAIR_XY: +case AMDGPU::INTERP_PAIR_ZW: +case AMDGPU::INTERP_VEC_LOAD: +case AMDGPU::COPY: +case AMDGPU::DOT4_eg_pseudo: +case AMDGPU::DOT4_r600_pseudo: + return true; +default: +
Re: [Mesa-dev] [PATCH 2/2] R600: Emit CF_ALU and use true kcache register.
On Thu, Mar 28, 2013 at 12:40:19AM +0100, Vincent Lejeune wrote: --- Thanks for working on this, it is a very nice improvement. See my comments inline. lib/Target/R600/AMDGPU.h | 1 + lib/Target/R600/AMDGPUTargetMachine.cpp| 1 + lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 10 +- lib/Target/R600/R600EmitClauseMarkers.cpp | 243 + lib/Target/R600/R600Instructions.td| 83 ++- lib/Target/R600/R600RegisterInfo.td| 63 ++ 6 files changed, 389 insertions(+), 12 deletions(-) create mode 100644 lib/Target/R600/R600EmitClauseMarkers.cpp diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index e099a9f..3cd792a 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -23,6 +23,7 @@ class AMDGPUTargetMachine; // R600 Passes FunctionPass* createR600KernelParametersPass(const DataLayout *TD); FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine tm); +FunctionPass *createR600EmitClauseMarkers(TargetMachine tm); // SI Passes FunctionPass *createSIAnnotateControlFlowPass(); diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 0185747..45b1be0 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -151,6 +151,7 @@ bool AMDGPUPassConfig::addPreEmitPass() { if (ST.device()-getGeneration() = AMDGPUDeviceInfo::HD6XXX) { addPass(createAMDGPUCFGPreparationPass(*TM)); addPass(createAMDGPUCFGStructurizerPass(*TM)); +addPass(createR600EmitClauseMarkers(*TM)); addPass(createR600ExpandSpecialInstrsPass(*TM)); addPass(FinalizeMachineBundlesID); } else { diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index 00ebb44..cf43f3f 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -101,7 +101,8 @@ enum InstrTypes { INSTR_FC, INSTR_NATIVE, INSTR_VTX, - INSTR_EXPORT + INSTR_EXPORT, + INSTR_CFALU }; enum FCInstr { @@ -250,6 +251,13 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst MI, raw_ostream OS, Emit(Inst, OS); break; } +case AMDGPU::CF_ALU: +case AMDGPU::CF_ALU_PUSH_BEFORE: { + uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); + EmitByte(INSTR_CFALU, OS); + Emit(Inst, OS); + break; +} default: EmitALUInstr(MI, Fixups, OS); diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp new file mode 100644 index 000..b869c88 --- /dev/null +++ b/lib/Target/R600/R600EmitClauseMarkers.cpp @@ -0,0 +1,243 @@ +//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// +// +/// \file +/// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold +/// 128 Alu instructions ; these instructions can access up to 4 prefetched +/// 4 lines of 16 registers from constant buffers. Such ALU clauses are +/// initiated by CF_ALU instructions. +//===--===// + +#include AMDGPU.h +#include R600Defines.h +#include R600InstrInfo.h +#include R600MachineFunctionInfo.h +#include R600RegisterInfo.h +#include llvm/CodeGen/MachineFunctionPass.h +#include llvm/CodeGen/MachineInstrBuilder.h +#include llvm/CodeGen/MachineRegisterInfo.h + +namespace llvm { + +class R600EmitClauseMarkersPass : public MachineFunctionPass { + +private: + static char ID; + const R600InstrInfo *TII; + + unsigned OccupiedDwords(MachineInstr *MI) const { +switch (MI-getOpcode()) { +case AMDGPU::INTERP_PAIR_XY: +case AMDGPU::INTERP_PAIR_ZW: +case AMDGPU::INTERP_VEC_LOAD: +case AMDGPU::DOT4_eg_pseudo: +case AMDGPU::DOT4_r600_pseudo: + return 4; +case AMDGPU::KILL: + return 0; +default: + break; +} + +if(TII-isVector(*MI) || +TII-isCubeOp(MI-getOpcode()) || +TII-isReductionOp(MI-getOpcode())) + return 4; + +unsigned NumLiteral = 0; +for (MachineInstr::mop_iterator It = MI-operands_begin(), +E = MI-operands_end(); It != E; ++It) { + MachineOperand MO = *It; + if (MO.isReg() MO.getReg() == AMDGPU::ALU_LITERAL_X) +++NumLiteral; +} +return 1 + NumLiteral; + } + + bool isALU(const MachineInstr *MI) const { +if (TII-isALUInstr(MI-getOpcode())) + return true; +if (TII-isVector(*MI) ||