Anthony Gutierrez has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/29952 )
Change subject: arch-gcn3: Add ds_bpermute and ds_permute insts
......................................................................
arch-gcn3: Add ds_bpermute and ds_permute insts
The implementation of these insts provided by this
change is based on the description provided here:
https://gpuopen.com/amd-gcn-assembly-cross-lane-operations/
Change-Id: Id63b6c34c9fdc6e0dbd445d859e7b209023f2874
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29952
Maintainer: Anthony Gutierrez <anthony.gutier...@amd.com>
Tested-by: kokoro <noreply+kok...@google.com>
Reviewed-by: Matt Sinclair <mattdsincl...@gmail.com>
---
M src/arch/gcn3/insts/instructions.cc
1 file changed, 113 insertions(+), 4 deletions(-)
Approvals:
Matt Sinclair: Looks good to me, approved
Anthony Gutierrez: Looks good to me, approved
kokoro: Regressions pass
diff --git a/src/arch/gcn3/insts/instructions.cc
b/src/arch/gcn3/insts/instructions.cc
index 6e5ff42..e93278a 100644
--- a/src/arch/gcn3/insts/instructions.cc
+++ b/src/arch/gcn3/insts/instructions.cc
@@ -32129,6 +32129,13 @@
Inst_DS__DS_PERMUTE_B32::Inst_DS__DS_PERMUTE_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_permute_b32")
{
+ setFlag(MemoryRef);
+ /**
+ * While this operation doesn't actually use DS storage we classify
+ * it as a load here because it does a writeback to a VGPR, which
+ * fits in better with the LDS pipeline logic.
+ */
+ setFlag(Load);
} // Inst_DS__DS_PERMUTE_B32
Inst_DS__DS_PERMUTE_B32::~Inst_DS__DS_PERMUTE_B32()
@@ -32139,12 +32146,66 @@
void
Inst_DS__DS_PERMUTE_B32::execute(GPUDynInstPtr gpuDynInst)
{
- panicUnimplemented();
- }
+ Wavefront *wf = gpuDynInst->wavefront();
+ gpuDynInst->execUnitId = wf->execUnitId;
+ gpuDynInst->latency.init(gpuDynInst->computeUnit());
+ gpuDynInst->latency.set(gpuDynInst->computeUnit()
+ ->cyclesToTicks(Cycles(24)));
+ ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
+ ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
+ VecOperandU32 vdst(gpuDynInst, extData.VDST);
+
+ addr.read();
+ data.read();
+
+ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+ if (wf->execMask(lane)) {
+ /**
+ * One of the offset fields can be used for the index.
+ * It is assumed OFFSET0 would be used, as OFFSET1 is
+ * typically only used for DS ops that operate on two
+ * disparate pieces of data.
+ */
+ assert(!instData.OFFSET1);
+ /**
+ * The address provided is a byte address, but VGPRs are
+ * 4 bytes, so we must divide by 4 to get the actual VGPR
+ * index. Additionally, the index is calculated modulo the
+ * WF size, 64 in this case, so we simply extract bits 7-2.
+ */
+ int index = bits(addr[lane] + instData.OFFSET0, 7, 2);
+ panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is
out "
+ "of bounds.\n", gpuDynInst->disassemble(), index);
+ /**
+ * If the shuffled index corresponds to a lane that is
+ * inactive then this instruction writes a 0 to the active
+ * lane in VDST.
+ */
+ if (wf->execMask(index)) {
+ vdst[index] = data[lane];
+ } else {
+ vdst[index] = 0;
+ }
+ }
+ }
+
+ vdst.write();
+
+ wf->rdLmReqsInPipe--;
+ wf->validateRequestCounters();
+ } // execute
+ // --- Inst_DS__DS_BPERMUTE_B32 class methods ---
Inst_DS__DS_BPERMUTE_B32::Inst_DS__DS_BPERMUTE_B32(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_bpermute_b32")
{
+ setFlag(MemoryRef);
+ /**
+ * While this operation doesn't actually use DS storage we classify
+ * it as a load here because it does a writeback to a VGPR, which
+ * fits in better with the LDS pipeline logic.
+ */
+ setFlag(Load);
} // Inst_DS__DS_BPERMUTE_B32
Inst_DS__DS_BPERMUTE_B32::~Inst_DS__DS_BPERMUTE_B32()
@@ -32155,8 +32216,56 @@
void
Inst_DS__DS_BPERMUTE_B32::execute(GPUDynInstPtr gpuDynInst)
{
- panicUnimplemented();
- }
+ Wavefront *wf = gpuDynInst->wavefront();
+ gpuDynInst->execUnitId = wf->execUnitId;
+ gpuDynInst->latency.init(gpuDynInst->computeUnit());
+ gpuDynInst->latency.set(gpuDynInst->computeUnit()
+ ->cyclesToTicks(Cycles(24)));
+ ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
+ ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
+ VecOperandU32 vdst(gpuDynInst, extData.VDST);
+
+ addr.read();
+ data.read();
+
+ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+ if (wf->execMask(lane)) {
+ /**
+ * One of the offset fields can be used for the index.
+ * It is assumed OFFSET0 would be used, as OFFSET1 is
+ * typically only used for DS ops that operate on two
+ * disparate pieces of data.
+ */
+ assert(!instData.OFFSET1);
+ /**
+ * The address provided is a byte address, but VGPRs are
+ * 4 bytes, so we must divide by 4 to get the actual VGPR
+ * index. Additionally, the index is calculated modulo the
+ * WF size, 64 in this case, so we simply extract bits 7-2.
+ */
+ int index = bits(addr[lane] + instData.OFFSET0, 7, 2);
+ panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is
out "
+ "of bounds.\n", gpuDynInst->disassemble(), index);
+ /**
+ * If the shuffled index corresponds to a lane that is
+ * inactive then this instruction writes a 0 to the active
+ * lane in VDST.
+ */
+ if (wf->execMask(index)) {
+ vdst[lane] = data[index];
+ } else {
+ vdst[lane] = 0;
+ }
+ }
+ }
+
+ vdst.write();
+
+ wf->rdLmReqsInPipe--;
+ wf->validateRequestCounters();
+ } // execute
+
+ // --- Inst_DS__DS_ADD_U64 class methods ---
Inst_DS__DS_ADD_U64::Inst_DS__DS_ADD_U64(InFmt_DS *iFmt)
: Inst_DS(iFmt, "ds_add_u64")
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/29952
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Id63b6c34c9fdc6e0dbd445d859e7b209023f2874
Gerrit-Change-Number: 29952
Gerrit-PatchSet: 7
Gerrit-Owner: Anthony Gutierrez <anthony.gutier...@amd.com>
Gerrit-Reviewer: Anthony Gutierrez <anthony.gutier...@amd.com>
Gerrit-Reviewer: Matt Sinclair <mattdsincl...@gmail.com>
Gerrit-Reviewer: Tony Gutierrez <anthony.gutier...@amd.com>
Gerrit-Reviewer: Tuan Ta <q...@cornell.edu>
Gerrit-Reviewer: kokoro <noreply+kok...@google.com>
Gerrit-MessageType: merged
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s