Am 30.10.2013 14:23, schrieb Marek Olšák:
From: Marek Olšák <marek.ol...@amd.com>

This also fixes scalar compare instructions which were always eliminated,
because they didn't have a destination of SCC.

Uff, that looks like quite a bit of overhead, isn't there a simpler approach? Like setting the the NumRecord to one and letting unused constants pointing to a dummy buffer or soemthing like this?

Christian.


Signed-off-by: Marek Olšák <marek.ol...@amd.com>
---
  lib/Target/R600/SIISelLowering.cpp | 30 ++++++++++++++++++++++++++----
  lib/Target/R600/SIInsertWaits.cpp  |  6 ++++++
  lib/Target/R600/SIInstrInfo.td     |  5 +++++
  lib/Target/R600/SIInstructions.td  | 26 +++++++++++++++-----------
  4 files changed, 52 insertions(+), 15 deletions(-)

diff --git a/lib/Target/R600/SIISelLowering.cpp 
b/lib/Target/R600/SIISelLowering.cpp
index 371572e..e9f4035 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -14,6 +14,7 @@
#include "SIISelLowering.h"
  #include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
  #include "AMDILIntrinsicInfo.h"
  #include "SIInstrInfo.h"
  #include "SIMachineFunctionInfo.h"
@@ -302,14 +303,37 @@ MachineBasicBlock * 
SITargetLowering::EmitInstrWithCustomInserter(
      MachineInstr * MI, MachineBasicBlock * BB) const {
MachineBasicBlock::iterator I = *MI;
+  const SIInstrInfo *TII =
+    static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+
+  // Sea Islands must conditionally execute SMRD instructions depending
+  // on the value of SQ_BUF_RSRC_WORD2.NUM_RECORDS, because the hardware
+  // doesn't skip the instructions if NUM_RECORDS is 0.
+  if (TII->isSMRD(MI->getOpcode())) {
+    if (getTargetMachine().getSubtarget<AMDGPUSubtarget>().getGeneration() !=
+        AMDGPUSubtarget::SEA_ISLANDS)
+      return BB;
+
+    MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+    unsigned NumRecords = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+
+    // XXX should we save and restore the SCC register?
+    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::COPY), NumRecords)
+        .addReg(MI->getOperand(1).getReg(), 0, AMDGPU::sub2);
+    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_CMPK_EQ_U32), 
AMDGPU::SCC)
+        .addReg(NumRecords)
+        .addImm(0);
+    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_CBRANCH_SCC1))
+        .addImm(1)
+        .addReg(AMDGPU::SCC);
+    return BB;
+  }
switch (MI->getOpcode()) {
    default:
      return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
    case AMDGPU::BRANCH: return BB;
    case AMDGPU::SI_ADDR64_RSRC: {
-    const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
      MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
      unsigned SuperReg = MI->getOperand(0).getReg();
      unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
@@ -336,8 +360,6 @@ MachineBasicBlock * 
SITargetLowering::EmitInstrWithCustomInserter(
      break;
    }
    case AMDGPU::V_SUB_F64: {
-    const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
      BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64),
              MI->getOperand(0).getReg())
              .addReg(MI->getOperand(1).getReg())
diff --git a/lib/Target/R600/SIInsertWaits.cpp 
b/lib/Target/R600/SIInsertWaits.cpp
index 7e42fb7..2e47346 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -294,6 +294,12 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
    if (Counts.Named.EXP == 0)
      ExpInstrTypesSeen = 0;
+ // Ensure S_WAITCNT is inserted before S_CBRANCH.
+  MachineBasicBlock::iterator beforeI = I;
+  --beforeI;
+  if (beforeI->getOpcode() == AMDGPU::S_CBRANCH_SCC1)
+    I = beforeI;
+
    // Build the wait instruction
    BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
            .addImm((Counts.Named.VM & 0xF) |
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index ed42a2a..9567879 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -177,6 +177,11 @@ class SOPC_32 <bits<7> op, string opName, list<dag> pattern> 
: SOPC <
    opName#" $dst, $src0, $src1", pattern
  >;
+class SOPCK_32 <bits<7> op, string opName, list<dag> pattern> : SOPC <
+  op, (outs SCCReg:$dst), (ins SReg_32:$src0, i16imm:$src1),
+  opName#" $dst, $src0, $src1", pattern
+>;
+
  class SOPC_64 <bits<7> op, string opName, list<dag> pattern> : SOPC <
    op, (outs SCCReg:$dst), (ins SSrc_64:$src0, SSrc_64:$src1),
    opName#" $dst, $src0, $src1", pattern
diff --git a/lib/Target/R600/SIInstructions.td 
b/lib/Target/R600/SIInstructions.td
index 048c157..1b275a7 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -115,17 +115,17 @@ def S_CMPK_EQ_I32 : SOPK <
  */
let isCompare = 1 in {
-def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
-def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
-def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>;
-def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>;
-def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>;
-def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>;
-def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>;
-def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
-def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
-def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
-def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
+def S_CMPK_LG_I32 : SOPCK_32 <0x00000004, "S_CMPK_LG_I32", []>;
+def S_CMPK_GT_I32 : SOPCK_32 <0x00000005, "S_CMPK_GT_I32", []>;
+def S_CMPK_GE_I32 : SOPCK_32 <0x00000006, "S_CMPK_GE_I32", []>;
+def S_CMPK_LT_I32 : SOPCK_32 <0x00000007, "S_CMPK_LT_I32", []>;
+def S_CMPK_LE_I32 : SOPCK_32 <0x00000008, "S_CMPK_LE_I32", []>;
+def S_CMPK_EQ_U32 : SOPCK_32 <0x00000009, "S_CMPK_EQ_U32", []>;
+def S_CMPK_LG_U32 : SOPCK_32 <0x0000000a, "S_CMPK_LG_U32", []>;
+def S_CMPK_GT_U32 : SOPCK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
+def S_CMPK_GE_U32 : SOPCK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
+def S_CMPK_LT_U32 : SOPCK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
+def S_CMPK_LE_U32 : SOPCK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
  } // End isCompare = 1
def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
@@ -492,6 +492,8 @@ defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", 
SReg_64, SReg_128>;
  defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>;
  defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, 
SReg_512>;
+let usesCustomInserter = 1 in {
+
  defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
    0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32
  >;
@@ -512,6 +514,8 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
    0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512
  >;
+} // usesCustomInserter = 1
+
  } // mayLoad = 1
//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to