Anthony Gutierrez has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/29958 )

Change subject: arch-gcn3: Implement ds_swizzle
......................................................................

arch-gcn3: Implement ds_swizzle

Change-Id: I7d188388afa16932217ae207368666a724207c52
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29958
Maintainer: Anthony Gutierrez <anthony.gutier...@amd.com>
Tested-by: kokoro <noreply+kok...@google.com>
Reviewed-by: Matt Sinclair <mattdsincl...@gmail.com>
---
M src/arch/gcn3/insts/instructions.cc
1 file changed, 102 insertions(+), 2 deletions(-)

Approvals:
  Matt Sinclair: Looks good to me, approved
  Anthony Gutierrez: Looks good to me, approved
  kokoro: Regressions pass



diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc
index 71efd8f..002c4d5 100644
--- a/src/arch/gcn3/insts/instructions.cc
+++ b/src/arch/gcn3/insts/instructions.cc
@@ -32266,6 +32266,7 @@
     Inst_DS__DS_SWIZZLE_B32::Inst_DS__DS_SWIZZLE_B32(InFmt_DS *iFmt)
         : Inst_DS(iFmt, "ds_swizzle_b32")
     {
+         setFlag(Load);
     } // Inst_DS__DS_SWIZZLE_B32

     Inst_DS__DS_SWIZZLE_B32::~Inst_DS__DS_SWIZZLE_B32()
@@ -32277,8 +32278,107 @@
     void
     Inst_DS__DS_SWIZZLE_B32::execute(GPUDynInstPtr gpuDynInst)
     {
-        panicUnimplemented();
-    }
+        Wavefront *wf = gpuDynInst->wavefront();
+        wf->rdLmReqsInPipe--;
+        wf->validateRequestCounters();
+
+        if (gpuDynInst->exec_mask.none()) {
+            return;
+        }
+
+        gpuDynInst->execUnitId = wf->execUnitId;
+        gpuDynInst->latency.init(gpuDynInst->computeUnit());
+        gpuDynInst->latency.set(gpuDynInst->computeUnit()
+                                ->cyclesToTicks(Cycles(24)));
+
+        ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
+        VecOperandU32 vdst(gpuDynInst, extData.VDST);
+        /**
+ * The "DS pattern" is comprised of both offset fields. That is, the
+         * swizzle pattern between lanes. Bit 15 of the DS pattern dictates
+         * which swizzle mode to use. There are two different swizzle
+         * patterns: 1) QDMode and 2) Bit-masks mode. If bit 15 is set use
+ * QDMode else use Bit-masks mode. The remaining bits dictate how to
+         * swizzle the lanes.
+         *
+         * QDMode:      Chunks the lanes into 4s and swizzles among them.
+ * Bits 7:6 dictate where lane 3 (of the current chunk)
+         *              gets its date, 5:4 lane 2, etc.
+         *
+ * Bit-mask: This mode breaks bits 14:0 into 3 equal-sized chunks.
+         *              14:10 is the xor_mask, 9:5 is the or_mask, and 4:0
+ * is the and_mask. Each lane is swizzled by performing
+         *              the appropriate operation using these masks.
+         */
+ VecElemU16 ds_pattern = ((instData.OFFSET1 << 8) | instData.OFFSET0);
+
+        data.read();
+
+        if (bits(ds_pattern, 15)) {
+            // QDMode
+            for (int lane = 0; lane < NumVecElemPerVecReg; lane += 4) {
+                /**
+                 * This operation allows data sharing between groups
+                 * of four consecutive threads. Note the increment by
+                 * 4 in the for loop.
+                 */
+                if (gpuDynInst->exec_mask[lane]) {
+                    int index0 = lane + bits(ds_pattern, 1, 0);
+ panic_if(index0 >= NumVecElemPerVecReg, "%s: index0 (%d) " + "is out of bounds.\n", gpuDynInst->disassemble(),
+                             index0);
+                    vdst[lane]
+                        = gpuDynInst->exec_mask[index0] ? data[index0]: 0;
+                }
+                if (gpuDynInst->exec_mask[lane + 1]) {
+                    int index1 = lane + bits(ds_pattern, 3, 2);
+ panic_if(index1 >= NumVecElemPerVecReg, "%s: index1 (%d) " + "is out of bounds.\n", gpuDynInst->disassemble(),
+                             index1);
+                    vdst[lane + 1]
+                        = gpuDynInst->exec_mask[index1] ? data[index1]: 0;
+                }
+                if (gpuDynInst->exec_mask[lane + 2]) {
+                    int index2 = lane + bits(ds_pattern, 5, 4);
+ panic_if(index2 >= NumVecElemPerVecReg, "%s: index2 (%d) " + "is out of bounds.\n", gpuDynInst->disassemble(),
+                             index2);
+                    vdst[lane + 2]
+                        = gpuDynInst->exec_mask[index2] ? data[index2]: 0;
+                }
+                if (gpuDynInst->exec_mask[lane + 3]) {
+                    int index3 = lane + bits(ds_pattern, 7, 6);
+ panic_if(index3 >= NumVecElemPerVecReg, "%s: index3 (%d) " + "is out of bounds.\n", gpuDynInst->disassemble(),
+                             index3);
+                    vdst[lane + 3]
+                        = gpuDynInst->exec_mask[index3] ? data[index3]: 0;
+                }
+            }
+        } else {
+            // Bit Mode
+            int and_mask = bits(ds_pattern, 4, 0);
+            int or_mask = bits(ds_pattern, 9, 5);
+            int xor_mask = bits(ds_pattern, 14, 10);
+            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+                if (gpuDynInst->exec_mask[lane]) {
+                    int index = (((lane & and_mask) | or_mask) ^ xor_mask);
+                    // Adjust for the next 32 lanes.
+                    if (lane > 31) {
+                        index += 32;
+                    }
+ panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is "
+                             "out of bounds.\n", gpuDynInst->disassemble(),
+                             index);
+                    vdst[lane]
+                        = gpuDynInst->exec_mask[index] ? data[index] : 0;
+                }
+            }
+        }
+
+        vdst.write();
+    } // execute
+    // --- Inst_DS__DS_PERMUTE_B32 class methods ---

     Inst_DS__DS_PERMUTE_B32::Inst_DS__DS_PERMUTE_B32(InFmt_DS *iFmt)
         : Inst_DS(iFmt, "ds_permute_b32")

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/29958
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I7d188388afa16932217ae207368666a724207c52
Gerrit-Change-Number: 29958
Gerrit-PatchSet: 7
Gerrit-Owner: Anthony Gutierrez <anthony.gutier...@amd.com>
Gerrit-Reviewer: Anthony Gutierrez <anthony.gutier...@amd.com>
Gerrit-Reviewer: Matt Sinclair <mattdsincl...@gmail.com>
Gerrit-Reviewer: Tony Gutierrez <anthony.gutier...@amd.com>
Gerrit-Reviewer: Tuan Ta <q...@cornell.edu>
Gerrit-Reviewer: kokoro <noreply+kok...@google.com>
Gerrit-MessageType: merged
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

Reply via email to