Re: [Mesa-dev] [PATCH] R600/SI: add Gather4 intrinsics

2014-06-08 Thread Matt Arsenault

On 06/06/2014 02:57 PM, Marek Olšák wrote:


DMASK was repurposed for GATHER4, so all passes which modify DMASK are
disabled by setting MIMG=0 and hasPostISelHook=0. See my Mesa patches
for how DMASK works with GATHER4, because this is not documented anywhere.

Can you add a comment explaining this to the source here?

Needs tests, other than that LGTM
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] R600/SI: add Gather4 intrinsics

2014-06-06 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

This adds a new type of intrinsic and SDNode: SampleRaw.
All fields of the MIMG opcodes are exposed and can be set by Mesa,
even DMASK. All GATHER4 variants are added and there are a lot of them.

DMASK was repurposed for GATHER4, so all passes which modify DMASK are
disabled by setting MIMG=0 and hasPostISelHook=0. See my Mesa patches
for how DMASK works with GATHER4, because this is not documented anywhere.
---
 lib/Target/R600/AMDGPUISelLowering.cpp | 24 +
 lib/Target/R600/AMDGPUISelLowering.h   | 31 +++
 lib/Target/R600/SIISelLowering.cpp | 72 +
 lib/Target/R600/SIISelLowering.h   |  2 +
 lib/Target/R600/SIInstrInfo.td | 86 ++
 lib/Target/R600/SIInstructions.td  | 96 +-
 lib/Target/R600/SIIntrinsics.td| 48 +
 7 files changed, 335 insertions(+), 24 deletions(-)

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp 
b/lib/Target/R600/AMDGPUISelLowering.cpp
index 7b6df9a..5875a11 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -1541,6 +1541,30 @@ const char* 
AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(SAMPLEB)
   NODE_NAME_CASE(SAMPLED)
   NODE_NAME_CASE(SAMPLEL)
+  NODE_NAME_CASE(GATHER4)
+  NODE_NAME_CASE(GATHER4_CL)
+  NODE_NAME_CASE(GATHER4_L)
+  NODE_NAME_CASE(GATHER4_B)
+  NODE_NAME_CASE(GATHER4_B_CL)
+  NODE_NAME_CASE(GATHER4_LZ)
+  NODE_NAME_CASE(GATHER4_C)
+  NODE_NAME_CASE(GATHER4_C_CL)
+  NODE_NAME_CASE(GATHER4_C_L)
+  NODE_NAME_CASE(GATHER4_C_B)
+  NODE_NAME_CASE(GATHER4_C_B_CL)
+  NODE_NAME_CASE(GATHER4_C_LZ)
+  NODE_NAME_CASE(GATHER4_O)
+  NODE_NAME_CASE(GATHER4_CL_O)
+  NODE_NAME_CASE(GATHER4_L_O)
+  NODE_NAME_CASE(GATHER4_B_O)
+  NODE_NAME_CASE(GATHER4_B_CL_O)
+  NODE_NAME_CASE(GATHER4_LZ_O)
+  NODE_NAME_CASE(GATHER4_C_O)
+  NODE_NAME_CASE(GATHER4_C_CL_O)
+  NODE_NAME_CASE(GATHER4_C_L_O)
+  NODE_NAME_CASE(GATHER4_C_B_O)
+  NODE_NAME_CASE(GATHER4_C_B_CL_O)
+  NODE_NAME_CASE(GATHER4_C_LZ_O)
   NODE_NAME_CASE(STORE_MSKOR)
   NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
   }
diff --git a/lib/Target/R600/AMDGPUISelLowering.h 
b/lib/Target/R600/AMDGPUISelLowering.h
index d5d821d..a9af195 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -203,6 +203,37 @@ enum {
   SAMPLEB,
   SAMPLED,
   SAMPLEL,
+
+  // Gather4 opcodes
+  GATHER4,
+  GATHER4_CL,
+  GATHER4_L,
+  GATHER4_B,
+  GATHER4_B_CL,
+  GATHER4_LZ,
+
+  GATHER4_C,
+  GATHER4_C_CL,
+  GATHER4_C_L,
+  GATHER4_C_B,
+  GATHER4_C_B_CL,
+  GATHER4_C_LZ,
+
+  GATHER4_O,
+  GATHER4_CL_O,
+  GATHER4_L_O,
+  GATHER4_B_O,
+  GATHER4_B_CL_O,
+  GATHER4_LZ_O,
+
+  GATHER4_C_O,
+  GATHER4_C_CL_O,
+  GATHER4_C_L_O,
+  GATHER4_C_B_O,
+  GATHER4_C_B_CL_O,
+  GATHER4_C_LZ_O,
+
+  // Nemory opcodes
   FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
   STORE_MSKOR,
   LOAD_CONSTANT,
diff --git a/lib/Target/R600/SIISelLowering.cpp 
b/lib/Target/R600/SIISelLowering.cpp
index 1a861d4..909255d 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -688,6 +688,59 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, 
SelectionDAG DAG) const {
  Op.getOperand(1),
  Op.getOperand(2),
  Op.getOperand(3));
+
+// Gather4 intrinsics
+case AMDGPUIntrinsic::SI_gather4:
+  return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_cl:
+  return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_CL, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_l:
+  return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_L, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_b:
+  return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_b_cl:
+  return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_CL, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_lz:
+  return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_LZ, Op, DAG);
+
+case AMDGPUIntrinsic::SI_gather4_c:
+  return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_c_cl:
+  return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_CL, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_c_l:
+  return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_L, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_c_b:
+  return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_c_b_cl:
+  return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B_CL, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_c_lz:
+  return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_LZ, Op, DAG);
+
+case AMDGPUIntrinsic::SI_gather4_o:
+  return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_O, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_cl_o:
+