From: Marek Olšák marek.ol...@amd.com
This adds a new type of intrinsic and SDNode: SampleRaw.
All fields of the MIMG opcodes are exposed and can be set by Mesa,
even DMASK. All GATHER4 variants are added and there are a lot of them.
DMASK was repurposed for GATHER4, so all passes which modify DMASK are
disabled by setting MIMG=0 and hasPostISelHook=0. See my Mesa patches
for how DMASK works with GATHER4, because this is not documented anywhere.
---
lib/Target/R600/AMDGPUISelLowering.cpp | 24 +
lib/Target/R600/AMDGPUISelLowering.h | 31 +++
lib/Target/R600/SIISelLowering.cpp | 72 +
lib/Target/R600/SIISelLowering.h | 2 +
lib/Target/R600/SIInstrInfo.td | 86 ++
lib/Target/R600/SIInstructions.td | 96 +-
lib/Target/R600/SIIntrinsics.td| 48 +
7 files changed, 335 insertions(+), 24 deletions(-)
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp
b/lib/Target/R600/AMDGPUISelLowering.cpp
index 7b6df9a..5875a11 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -1541,6 +1541,30 @@ const char*
AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SAMPLEB)
NODE_NAME_CASE(SAMPLED)
NODE_NAME_CASE(SAMPLEL)
+ NODE_NAME_CASE(GATHER4)
+ NODE_NAME_CASE(GATHER4_CL)
+ NODE_NAME_CASE(GATHER4_L)
+ NODE_NAME_CASE(GATHER4_B)
+ NODE_NAME_CASE(GATHER4_B_CL)
+ NODE_NAME_CASE(GATHER4_LZ)
+ NODE_NAME_CASE(GATHER4_C)
+ NODE_NAME_CASE(GATHER4_C_CL)
+ NODE_NAME_CASE(GATHER4_C_L)
+ NODE_NAME_CASE(GATHER4_C_B)
+ NODE_NAME_CASE(GATHER4_C_B_CL)
+ NODE_NAME_CASE(GATHER4_C_LZ)
+ NODE_NAME_CASE(GATHER4_O)
+ NODE_NAME_CASE(GATHER4_CL_O)
+ NODE_NAME_CASE(GATHER4_L_O)
+ NODE_NAME_CASE(GATHER4_B_O)
+ NODE_NAME_CASE(GATHER4_B_CL_O)
+ NODE_NAME_CASE(GATHER4_LZ_O)
+ NODE_NAME_CASE(GATHER4_C_O)
+ NODE_NAME_CASE(GATHER4_C_CL_O)
+ NODE_NAME_CASE(GATHER4_C_L_O)
+ NODE_NAME_CASE(GATHER4_C_B_O)
+ NODE_NAME_CASE(GATHER4_C_B_CL_O)
+ NODE_NAME_CASE(GATHER4_C_LZ_O)
NODE_NAME_CASE(STORE_MSKOR)
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
}
diff --git a/lib/Target/R600/AMDGPUISelLowering.h
b/lib/Target/R600/AMDGPUISelLowering.h
index d5d821d..a9af195 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -203,6 +203,37 @@ enum {
SAMPLEB,
SAMPLED,
SAMPLEL,
+
+ // Gather4 opcodes
+ GATHER4,
+ GATHER4_CL,
+ GATHER4_L,
+ GATHER4_B,
+ GATHER4_B_CL,
+ GATHER4_LZ,
+
+ GATHER4_C,
+ GATHER4_C_CL,
+ GATHER4_C_L,
+ GATHER4_C_B,
+ GATHER4_C_B_CL,
+ GATHER4_C_LZ,
+
+ GATHER4_O,
+ GATHER4_CL_O,
+ GATHER4_L_O,
+ GATHER4_B_O,
+ GATHER4_B_CL_O,
+ GATHER4_LZ_O,
+
+ GATHER4_C_O,
+ GATHER4_C_CL_O,
+ GATHER4_C_L_O,
+ GATHER4_C_B_O,
+ GATHER4_C_B_CL_O,
+ GATHER4_C_LZ_O,
+
+ // Nemory opcodes
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
STORE_MSKOR,
LOAD_CONSTANT,
diff --git a/lib/Target/R600/SIISelLowering.cpp
b/lib/Target/R600/SIISelLowering.cpp
index 1a861d4..909255d 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -688,6 +688,59 @@ SDValue SITargetLowering::LowerOperation(SDValue Op,
SelectionDAG DAG) const {
Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
+
+// Gather4 intrinsics
+case AMDGPUIntrinsic::SI_gather4:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_cl:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_CL, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_l:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_L, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_b:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_b_cl:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_CL, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_lz:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_LZ, Op, DAG);
+
+case AMDGPUIntrinsic::SI_gather4_c:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_c_cl:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_CL, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_c_l:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_L, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_c_b:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_c_b_cl:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B_CL, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_c_lz:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_LZ, Op, DAG);
+
+case AMDGPUIntrinsic::SI_gather4_o:
+ return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_O, Op, DAG);
+case AMDGPUIntrinsic::SI_gather4_cl_o:
+