Hello Tony Gutierrez,

I'd like you to do a code review. Please visit

    https://gem5-review.googlesource.com/c/public/gem5/+/29957

to review the following change.


Change subject: arch-gcn3: Implement s_buffer_load_dwordx16
......................................................................

arch-gcn3: Implement s_buffer_load_dwordx16

Change-Id: I25382dcae9bb55eaf035385fa925157f25d39c20
---
M src/arch/gcn3/insts/instructions.cc
M src/arch/gcn3/insts/op_encodings.hh
2 files changed, 90 insertions(+), 31 deletions(-)



diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc
index 567cc10..71efd8f 100644
--- a/src/arch/gcn3/insts/instructions.cc
+++ b/src/arch/gcn3/insts/instructions.cc
@@ -4857,17 +4857,45 @@
     void
     Inst_SMEM__S_BUFFER_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst)
     {
-        panicUnimplemented();
-    }
+        Wavefront *wf = gpuDynInst->wavefront();
+        gpuDynInst->execUnitId = wf->execUnitId;
+        gpuDynInst->latency.init(gpuDynInst->computeUnit());
+        gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
+        ScalarRegU32 offset(0);
+        ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
+
+        rsrcDesc.read();
+
+        if (instData.IMM) {
+            offset = extData.OFFSET;
+        } else {
+            ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
+            off_sgpr.read();
+            offset = off_sgpr.rawData();
+        }
+
+        calcAddr(gpuDynInst, rsrcDesc, offset);
+
+        gpuDynInst->computeUnit()->scalarMemoryPipe
+            .getGMReqFIFO().push(gpuDynInst);
+
+        wf->scalarRdGmReqsInPipe--;
+        wf->scalarOutstandingReqsRdGm++;
+        gpuDynInst->wavefront()->outstandingReqs++;
+        gpuDynInst->wavefront()->validateRequestCounters();
+    } // execute

     void
Inst_SMEM__S_BUFFER_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst)
     {
+        initMemRead<16>(gpuDynInst);
     } // initiateAcc

     void
Inst_SMEM__S_BUFFER_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst)
     {
+        ScalarOperandU512 sdst(gpuDynInst, instData.SDATA);
+        sdst.write();
     } // completeAcc

     Inst_SMEM__S_STORE_DWORD::Inst_SMEM__S_STORE_DWORD(InFmt_SMEM *iFmt)
diff --git a/src/arch/gcn3/insts/op_encodings.hh b/src/arch/gcn3/insts/op_encodings.hh
index 4056f0a..202dd1d 100644
--- a/src/arch/gcn3/insts/op_encodings.hh
+++ b/src/arch/gcn3/insts/op_encodings.hh
@@ -46,6 +46,29 @@

 namespace Gcn3ISA
 {
+    struct BufferRsrcDescriptor
+    {
+        uint64_t baseAddr : 48;
+        uint32_t stride : 14;
+        uint32_t cacheSwizzle : 1;
+        uint32_t swizzleEn : 1;
+        uint32_t numRecords : 32;
+        uint32_t dstSelX : 3;
+        uint32_t dstSelY : 3;
+        uint32_t dstSelZ : 3;
+        uint32_t dstSelW : 3;
+        uint32_t numFmt : 3;
+        uint32_t dataFmt : 4;
+        uint32_t elemSize : 2;
+        uint32_t idxStride : 2;
+        uint32_t addTidEn : 1;
+        uint32_t atc : 1;
+        uint32_t hashEn : 1;
+        uint32_t heap : 1;
+        uint32_t mType : 3;
+        uint32_t type : 2;
+    };
+
     // --- purely virtual instruction classes ---

     class Inst_SOP2 : public GCN3GPUStaticInst
@@ -197,14 +220,45 @@
                                                     MemCmd::WriteReq);
         }

+        /**
+         * For normal s_load_dword/s_store_dword instruction addresses.
+         */
         void
-        calcAddr(GPUDynInstPtr gpuDynInst, ConstScalarOperandU64 &addr,
-            ScalarRegU32 offset)
+        calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr,
+                 ScalarRegU32 offset)
         {
-            Addr vaddr = addr.rawData();
-            vaddr += offset;
-            vaddr &= ~0x3;
-            gpuDynInst->scalarAddr = vaddr;
+            Addr vaddr = ((addr.rawData() + offset) & ~0x3);
+            gpu_dyn_inst->scalarAddr = vaddr;
+        }
+
+        /**
+ * For s_buffer_load_dword/s_buffer_store_dword instruction addresses. + * The s_buffer instructions use the same buffer resource descriptor
+         * as the MUBUF instructions.
+         */
+        void
+        calcAddr(GPUDynInstPtr gpu_dyn_inst,
+                 ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset)
+        {
+            BufferRsrcDescriptor rsrc_desc;
+            ScalarRegU32 clamped_offset(offset);
+            std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
+                        sizeof(BufferRsrcDescriptor));
+
+            /**
+             * The address is clamped if:
+             *     Stride is zero: clamp if offset >= num_records
+ * Stride is non-zero: clamp if offset > (stride * num_records)
+             */
+            if (!rsrc_desc.stride && offset >= rsrc_desc.numRecords) {
+                clamped_offset = rsrc_desc.numRecords;
+            } else if (rsrc_desc.stride && offset
+                       > (rsrc_desc.stride * rsrc_desc.numRecords)) {
+                clamped_offset = (rsrc_desc.stride * rsrc_desc.numRecords);
+            }
+
+            Addr vaddr = ((rsrc_desc.baseAddr + clamped_offset) & ~0x3);
+            gpu_dyn_inst->scalarAddr = vaddr;
         }

         // first instruction DWORD
@@ -469,29 +523,6 @@
         int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;

       protected:
-        struct BufferRsrcDescriptor
-        {
-            uint64_t baseAddr : 48;
-            uint32_t stride : 14;
-            uint32_t cacheSwizzle : 1;
-            uint32_t swizzleEn : 1;
-            uint32_t numRecords : 32;
-            uint32_t dstSelX : 3;
-            uint32_t dstSelY : 3;
-            uint32_t dstSelZ : 3;
-            uint32_t dstSelW : 3;
-            uint32_t numFmt : 3;
-            uint32_t dataFmt : 4;
-            uint32_t elemSize : 2;
-            uint32_t idxStride : 2;
-            uint32_t addTidEn : 1;
-            uint32_t atc : 1;
-            uint32_t hashEn : 1;
-            uint32_t heap : 1;
-            uint32_t mType : 3;
-            uint32_t type : 2;
-        };
-
         template<typename T>
         void
         initMemRead(GPUDynInstPtr gpuDynInst)

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/29957
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I25382dcae9bb55eaf035385fa925157f25d39c20
Gerrit-Change-Number: 29957
Gerrit-PatchSet: 1
Gerrit-Owner: Anthony Gutierrez <[email protected]>
Gerrit-Reviewer: Tony Gutierrez <[email protected]>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- [email protected]
To unsubscribe send an email to [email protected]
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

Reply via email to