[Mesa-dev] [PATCH 2/2] R600: Emit CF_ALU and use true kcache register.

2013-03-27 Thread Vincent Lejeune
---
 lib/Target/R600/AMDGPU.h   |   1 +
 lib/Target/R600/AMDGPUTargetMachine.cpp|   1 +
 lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp |  10 +-
 lib/Target/R600/R600EmitClauseMarkers.cpp  | 243 +
 lib/Target/R600/R600Instructions.td|  83 ++-
 lib/Target/R600/R600RegisterInfo.td|  63 ++
 6 files changed, 389 insertions(+), 12 deletions(-)
 create mode 100644 lib/Target/R600/R600EmitClauseMarkers.cpp

diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index e099a9f..3cd792a 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -23,6 +23,7 @@ class AMDGPUTargetMachine;
 // R600 Passes
 FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
 FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine tm);
+FunctionPass *createR600EmitClauseMarkers(TargetMachine tm);
 
 // SI Passes
 FunctionPass *createSIAnnotateControlFlowPass();
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp 
b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 0185747..45b1be0 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -151,6 +151,7 @@ bool AMDGPUPassConfig::addPreEmitPass() {
   if (ST.device()-getGeneration() = AMDGPUDeviceInfo::HD6XXX) {
 addPass(createAMDGPUCFGPreparationPass(*TM));
 addPass(createAMDGPUCFGStructurizerPass(*TM));
+addPass(createR600EmitClauseMarkers(*TM));
 addPass(createR600ExpandSpecialInstrsPass(*TM));
 addPass(FinalizeMachineBundlesID);
   } else {
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp 
b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 00ebb44..cf43f3f 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -101,7 +101,8 @@ enum InstrTypes {
   INSTR_FC,
   INSTR_NATIVE,
   INSTR_VTX,
-  INSTR_EXPORT
+  INSTR_EXPORT,
+  INSTR_CFALU
 };
 
 enum FCInstr {
@@ -250,6 +251,13 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst 
MI, raw_ostream OS,
   Emit(Inst, OS);
   break;
 }
+case AMDGPU::CF_ALU:
+case AMDGPU::CF_ALU_PUSH_BEFORE: {
+  uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+  EmitByte(INSTR_CFALU, OS);
+  Emit(Inst, OS);
+  break;
+}
 
 default:
   EmitALUInstr(MI, Fixups, OS);
diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp 
b/lib/Target/R600/R600EmitClauseMarkers.cpp
new file mode 100644
index 000..b869c88
--- /dev/null
+++ b/lib/Target/R600/R600EmitClauseMarkers.cpp
@@ -0,0 +1,243 @@
+//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU 
---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+/// \file
+/// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
+/// 128 Alu instructions ; these instructions can access up to 4 prefetched
+/// 4 lines of 16 registers from constant buffers. Such ALU clauses are
+/// initiated by CF_ALU instructions.
+//===--===//
+
+#include AMDGPU.h
+#include R600Defines.h
+#include R600InstrInfo.h
+#include R600MachineFunctionInfo.h
+#include R600RegisterInfo.h
+#include llvm/CodeGen/MachineFunctionPass.h
+#include llvm/CodeGen/MachineInstrBuilder.h
+#include llvm/CodeGen/MachineRegisterInfo.h
+
+namespace llvm {
+
+class R600EmitClauseMarkersPass : public MachineFunctionPass {
+
+private:
+  static char ID;
+  const R600InstrInfo *TII;
+
+  unsigned OccupiedDwords(MachineInstr *MI) const {
+switch (MI-getOpcode()) {
+case AMDGPU::INTERP_PAIR_XY:
+case AMDGPU::INTERP_PAIR_ZW:
+case AMDGPU::INTERP_VEC_LOAD:
+case AMDGPU::DOT4_eg_pseudo:
+case AMDGPU::DOT4_r600_pseudo:
+  return 4;
+case AMDGPU::KILL:
+  return 0;
+default:
+  break;
+}
+
+if(TII-isVector(*MI) ||
+TII-isCubeOp(MI-getOpcode()) ||
+TII-isReductionOp(MI-getOpcode()))
+  return 4;
+
+unsigned NumLiteral = 0;
+for (MachineInstr::mop_iterator It = MI-operands_begin(),
+E = MI-operands_end(); It != E; ++It) {
+  MachineOperand MO = *It;
+  if (MO.isReg()  MO.getReg() == AMDGPU::ALU_LITERAL_X)
+++NumLiteral;
+}
+return 1 + NumLiteral;
+  }
+
+  bool isALU(const MachineInstr *MI) const {
+if (TII-isALUInstr(MI-getOpcode()))
+  return true;
+if (TII-isVector(*MI) || TII-isCubeOp(MI-getOpcode()))
+  return true;
+switch (MI-getOpcode()) {
+case AMDGPU::INTERP_PAIR_XY:
+case AMDGPU::INTERP_PAIR_ZW:
+case AMDGPU::INTERP_VEC_LOAD:
+case AMDGPU::COPY:
+case AMDGPU::DOT4_eg_pseudo:
+case AMDGPU::DOT4_r600_pseudo:
+  return true;
+default:
+  

Re: [Mesa-dev] [PATCH 2/2] R600: Emit CF_ALU and use true kcache register.

2013-03-27 Thread Tom Stellard
On Thu, Mar 28, 2013 at 12:40:19AM +0100, Vincent Lejeune wrote:
 ---

Thanks for working on this, it is a very nice improvement.  See my
comments inline.

  lib/Target/R600/AMDGPU.h   |   1 +
  lib/Target/R600/AMDGPUTargetMachine.cpp|   1 +
  lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp |  10 +-
  lib/Target/R600/R600EmitClauseMarkers.cpp  | 243 
 +
  lib/Target/R600/R600Instructions.td|  83 ++-
  lib/Target/R600/R600RegisterInfo.td|  63 ++
  6 files changed, 389 insertions(+), 12 deletions(-)
  create mode 100644 lib/Target/R600/R600EmitClauseMarkers.cpp
 
 diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
 index e099a9f..3cd792a 100644
 --- a/lib/Target/R600/AMDGPU.h
 +++ b/lib/Target/R600/AMDGPU.h
 @@ -23,6 +23,7 @@ class AMDGPUTargetMachine;
  // R600 Passes
  FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
  FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine tm);
 +FunctionPass *createR600EmitClauseMarkers(TargetMachine tm);
  
  // SI Passes
  FunctionPass *createSIAnnotateControlFlowPass();
 diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp 
 b/lib/Target/R600/AMDGPUTargetMachine.cpp
 index 0185747..45b1be0 100644
 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp
 +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
 @@ -151,6 +151,7 @@ bool AMDGPUPassConfig::addPreEmitPass() {
if (ST.device()-getGeneration() = AMDGPUDeviceInfo::HD6XXX) {
  addPass(createAMDGPUCFGPreparationPass(*TM));
  addPass(createAMDGPUCFGStructurizerPass(*TM));
 +addPass(createR600EmitClauseMarkers(*TM));
  addPass(createR600ExpandSpecialInstrsPass(*TM));
  addPass(FinalizeMachineBundlesID);
} else {
 diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp 
 b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
 index 00ebb44..cf43f3f 100644
 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
 +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
 @@ -101,7 +101,8 @@ enum InstrTypes {
INSTR_FC,
INSTR_NATIVE,
INSTR_VTX,
 -  INSTR_EXPORT
 +  INSTR_EXPORT,
 +  INSTR_CFALU
  };
  
  enum FCInstr {
 @@ -250,6 +251,13 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst 
 MI, raw_ostream OS,
Emit(Inst, OS);
break;
  }
 +case AMDGPU::CF_ALU:
 +case AMDGPU::CF_ALU_PUSH_BEFORE: {
 +  uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
 +  EmitByte(INSTR_CFALU, OS);
 +  Emit(Inst, OS);
 +  break;
 +}
  
  default:
EmitALUInstr(MI, Fixups, OS);
 diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp 
 b/lib/Target/R600/R600EmitClauseMarkers.cpp
 new file mode 100644
 index 000..b869c88
 --- /dev/null
 +++ b/lib/Target/R600/R600EmitClauseMarkers.cpp
 @@ -0,0 +1,243 @@
 +//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU 
 ---===//
 +//
 +// The LLVM Compiler Infrastructure
 +//
 +// This file is distributed under the University of Illinois Open Source
 +// License. See LICENSE.TXT for details.
 +//
 +//===--===//
 +//
 +/// \file
 +/// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
 +/// 128 Alu instructions ; these instructions can access up to 4 prefetched
 +/// 4 lines of 16 registers from constant buffers. Such ALU clauses are
 +/// initiated by CF_ALU instructions.
 +//===--===//
 +
 +#include AMDGPU.h
 +#include R600Defines.h
 +#include R600InstrInfo.h
 +#include R600MachineFunctionInfo.h
 +#include R600RegisterInfo.h
 +#include llvm/CodeGen/MachineFunctionPass.h
 +#include llvm/CodeGen/MachineInstrBuilder.h
 +#include llvm/CodeGen/MachineRegisterInfo.h
 +
 +namespace llvm {
 +
 +class R600EmitClauseMarkersPass : public MachineFunctionPass {
 +
 +private:
 +  static char ID;
 +  const R600InstrInfo *TII;
 +
 +  unsigned OccupiedDwords(MachineInstr *MI) const {
 +switch (MI-getOpcode()) {
 +case AMDGPU::INTERP_PAIR_XY:
 +case AMDGPU::INTERP_PAIR_ZW:
 +case AMDGPU::INTERP_VEC_LOAD:
 +case AMDGPU::DOT4_eg_pseudo:
 +case AMDGPU::DOT4_r600_pseudo:
 +  return 4;
 +case AMDGPU::KILL:
 +  return 0;
 +default:
 +  break;
 +}
 +
 +if(TII-isVector(*MI) ||
 +TII-isCubeOp(MI-getOpcode()) ||
 +TII-isReductionOp(MI-getOpcode()))
 +  return 4;
 +
 +unsigned NumLiteral = 0;
 +for (MachineInstr::mop_iterator It = MI-operands_begin(),
 +E = MI-operands_end(); It != E; ++It) {
 +  MachineOperand MO = *It;
 +  if (MO.isReg()  MO.getReg() == AMDGPU::ALU_LITERAL_X)
 +++NumLiteral;
 +}
 +return 1 + NumLiteral;
 +  }
 +
 +  bool isALU(const MachineInstr *MI) const {
 +if (TII-isALUInstr(MI-getOpcode()))
 +  return true;
 +if (TII-isVector(*MI) ||