Matt Sinclair has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/42210 )

Change subject: arch-gcn3, gpu-compute: Update getRegisterIndex() API
......................................................................

arch-gcn3, gpu-compute: Update getRegisterIndex() API

This change removes the GPUDynInstPtr argument from
getRegisterIndex(). The dynamic inst was only needed
to get access to its parent WF's state so it could
determine the number of scalar registers the wave was
allocated. However, we can simply pass the number of
scalar registers directly. This cuts down on shared
pointer usage.

Change-Id: I29ab8d9a3de1f8b82b820ef421fc653284567c65
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42210
Tested-by: kokoro <[email protected]>
Reviewed-by: Matt Sinclair <[email protected]>
Maintainer: Matt Sinclair <[email protected]>
---
M src/arch/amdgpu/gcn3/insts/gpu_static_inst.hh
M src/arch/amdgpu/gcn3/insts/op_encodings.cc
M src/arch/amdgpu/gcn3/insts/op_encodings.hh
M src/gpu-compute/fetch_unit.cc
M src/gpu-compute/gpu_dyn_inst.cc
M src/gpu-compute/gpu_dyn_inst.hh
M src/gpu-compute/gpu_static_inst.hh
M src/gpu-compute/scalar_register_file.cc
M src/gpu-compute/vector_register_file.cc
M src/gpu-compute/wavefront.cc
10 files changed, 86 insertions(+), 120 deletions(-)

Approvals:
  Matt Sinclair: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass



diff --git a/src/arch/amdgpu/gcn3/insts/gpu_static_inst.hh b/src/arch/amdgpu/gcn3/insts/gpu_static_inst.hh
index 03beb20..e4983e8 100644
--- a/src/arch/amdgpu/gcn3/insts/gpu_static_inst.hh
+++ b/src/arch/amdgpu/gcn3/insts/gpu_static_inst.hh
@@ -70,7 +70,7 @@
         int getOperandSize(int opIdx) override { return 0; }

         int
-        getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
+        getRegisterIndex(int opIdx, int num_scalar_regs) override
         {
             return 0;
         }
diff --git a/src/arch/amdgpu/gcn3/insts/op_encodings.cc b/src/arch/amdgpu/gcn3/insts/op_encodings.cc
index a6a3a26..34bd35f 100644
--- a/src/arch/amdgpu/gcn3/insts/op_encodings.cc
+++ b/src/arch/amdgpu/gcn3/insts/op_encodings.cc
@@ -128,21 +128,18 @@
     }

     int
-    Inst_SOP2::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
+    Inst_SOP2::getRegisterIndex(int opIdx, int num_scalar_regs)
     {
         assert(opIdx >= 0);
         assert(opIdx < getNumOperands());

         switch (opIdx) {
           case 0:
-            return opSelectorToRegIdx(instData.SSRC0,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+            return opSelectorToRegIdx(instData.SSRC0, num_scalar_regs);
           case 1:
-            return opSelectorToRegIdx(instData.SSRC1,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+            return opSelectorToRegIdx(instData.SSRC1, num_scalar_regs);
           case 2:
-            return opSelectorToRegIdx(instData.SDST,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+            return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
           default:
             fatal("Operand at idx %i does not exist\n", opIdx);
             return -1;
@@ -244,7 +241,7 @@
     }

     int
-    Inst_SOPK::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
+    Inst_SOPK::getRegisterIndex(int opIdx, int num_scalar_regs)
     {
         assert(opIdx >= 0);
         assert(opIdx < getNumOperands());
@@ -253,8 +250,7 @@
           case 0:
             return  -1;
           case 1:
-            return opSelectorToRegIdx(instData.SDST,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+            return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
           default:
             fatal("Operand at idx %i does not exist\n", opIdx);
             return -1;
@@ -349,7 +345,7 @@
     }

     int
-    Inst_SOP1::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
+    Inst_SOP1::getRegisterIndex(int opIdx, int num_scalar_regs)
     {
         assert(opIdx >= 0);
         assert(opIdx < getNumOperands());
@@ -359,14 +355,11 @@
             if (instData.OP == 0x1C) {
                 // Special case for s_getpc, which has no source reg.
                 // Instead, it implicitly reads the PC.
-                return opSelectorToRegIdx(instData.SDST,
-                        gpuDynInst->wavefront()->reservedScalarRegs);
+                return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
             }
-            return opSelectorToRegIdx(instData.SSRC0,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+            return opSelectorToRegIdx(instData.SSRC0, num_scalar_regs);
           case 1:
-            return opSelectorToRegIdx(instData.SDST,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+            return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
           default:
             fatal("Operand at idx %i does not exist\n", opIdx);
             return -1;
@@ -467,18 +460,16 @@
     }

     int
-    Inst_SOPC::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
+    Inst_SOPC::getRegisterIndex(int opIdx, int num_scalar_regs)
     {
         assert(opIdx >= 0);
         assert(opIdx < getNumOperands());

         switch (opIdx) {
           case 0:
-            return opSelectorToRegIdx(instData.SSRC0,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+            return opSelectorToRegIdx(instData.SSRC0, num_scalar_regs);
           case 1:
-            return opSelectorToRegIdx(instData.SSRC1,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+            return opSelectorToRegIdx(instData.SSRC1, num_scalar_regs);
           default:
             fatal("Operand at idx %i does not exist\n", opIdx);
             return -1;
@@ -583,7 +574,7 @@
     }

     int
-    Inst_SOPP::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
+    Inst_SOPP::getRegisterIndex(int opIdx, int num_scalar_regs)
     {
         assert(opIdx >= 0);
         assert(opIdx < getNumOperands());
@@ -691,7 +682,7 @@
     }

     int
-    Inst_SMEM::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
+    Inst_SMEM::getRegisterIndex(int opIdx, int num_scalar_regs)
     {
         assert(opIdx >= 0);
         assert(opIdx < getNumOperands());
@@ -700,8 +691,7 @@
           case 0:
             // SBASE has an implied LSB of 0, so we need
             // to shift by one to get the actual value
-            return opSelectorToRegIdx(instData.SBASE << 1,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+ return opSelectorToRegIdx(instData.SBASE << 1, num_scalar_regs);
           case 1:
             if (instData.IMM) {
               // operand is an immediate value, not a register
@@ -710,8 +700,7 @@
               return extData.OFFSET;
             }
           case 2:
-            return opSelectorToRegIdx(instData.SDATA,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+            return opSelectorToRegIdx(instData.SDATA, num_scalar_regs);
           default:
             fatal("Operand at idx %i does not exist\n", opIdx);
             return -1;
@@ -901,21 +890,19 @@
     }

     int
-    Inst_VOP2::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
+    Inst_VOP2::getRegisterIndex(int opIdx, int num_scalar_regs)
     {
         assert(opIdx >= 0);
         assert(opIdx < getNumOperands());

         switch (opIdx) {
           case 0:
-            return opSelectorToRegIdx(instData.SRC0,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+            return opSelectorToRegIdx(instData.SRC0, num_scalar_regs);
           case 1:
             return instData.VSRC1;
           case 2:
             if (readsVCC()) {
-                return opSelectorToRegIdx(REG_VCC_LO,
-                        gpuDynInst->wavefront()->reservedScalarRegs);
+                return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
             } else {
                 return instData.VDST;
             }
@@ -924,13 +911,11 @@
             if (readsVCC()) {
                 return instData.VDST;
             } else {
-                return opSelectorToRegIdx(REG_VCC_LO,
-                        gpuDynInst->wavefront()->reservedScalarRegs);
+                return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
             }
           case 4:
             assert(writesVCC() && readsVCC());
-            return opSelectorToRegIdx(REG_VCC_LO,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+            return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
           default:
             fatal("Operand at idx %i does not exist\n", opIdx);
             return -1;
@@ -1044,15 +1029,14 @@
     }

     int
-    Inst_VOP1::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
+    Inst_VOP1::getRegisterIndex(int opIdx, int num_scalar_regs)
     {
         assert(opIdx >= 0);
         assert(opIdx < getNumOperands());

         switch (opIdx) {
           case 0:
-            return opSelectorToRegIdx(instData.SRC0,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+            return opSelectorToRegIdx(instData.SRC0, num_scalar_regs);
           case 1:
             return instData.VDST;
           default:
@@ -1066,6 +1050,7 @@
     Inst_VOPC::Inst_VOPC(InFmt_VOPC *iFmt, const std::string &opcode)
         : GCN3GPUStaticInst(opcode)
     {
+        setFlag(WritesVCC);
         // copy first instruction DWORD
         instData = iFmt[0];
         if (hasSecondDword(iFmt)) {
@@ -1167,21 +1152,19 @@
     }

     int
-    Inst_VOPC::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
+    Inst_VOPC::getRegisterIndex(int opIdx, int num_scalar_regs)
     {
         assert(opIdx >= 0);
         assert(opIdx < getNumOperands());

         switch (opIdx) {
           case 0:
-            return opSelectorToRegIdx(instData.SRC0,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+            return opSelectorToRegIdx(instData.SRC0, num_scalar_regs);
           case 1:
             return instData.VSRC1;
           case 2:
             // VCC
-            return opSelectorToRegIdx(REG_VCC_LO,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+            return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
           default:
             fatal("Operand at idx %i does not exist\n", opIdx);
             return -1;
@@ -1437,7 +1420,7 @@
     }

     int
-    Inst_VOP3::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
+    Inst_VOP3::getRegisterIndex(int opIdx, int num_scalar_regs)
     {
         assert(opIdx >= 0);
         assert(opIdx < getNumOperands());
@@ -1445,20 +1428,17 @@
         switch (opIdx) {
           case 0:
             // SRC0
-            return opSelectorToRegIdx(extData.SRC0,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+            return opSelectorToRegIdx(extData.SRC0, num_scalar_regs);
           case 1:
             if (numSrcRegOperands() > 1) {
                 // if we have more than 1 source operand then
                 // op index 1 corresponds to SRC1
-                return opSelectorToRegIdx(extData.SRC1,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+                return opSelectorToRegIdx(extData.SRC1, num_scalar_regs);
             } else {
                 // if we only have 1 source operand, opIdx 1
                 // will be VDST
                 if (sgprDst) {
-                    return opSelectorToRegIdx(instData.VDST,
-                            gpuDynInst->wavefront()->reservedScalarRegs);
+ return opSelectorToRegIdx(instData.VDST, num_scalar_regs);
                 }
                 return instData.VDST;
             }
@@ -1468,15 +1448,13 @@
                 // op index 2 corresponds to SRC2. SRC2 may be
                 // a scalar or vector register, an inline
                 // constant, or a special HW register
-                return opSelectorToRegIdx(extData.SRC2,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+                return opSelectorToRegIdx(extData.SRC2, num_scalar_regs);
             } else if (numSrcRegOperands() == 2) {
                 // if we only have 2 source operands, opIdx 2
                 // will be VDST, and VDST is always a vector
                 // reg
                 if (sgprDst) {
-                    return opSelectorToRegIdx(instData.VDST,
-                            gpuDynInst->wavefront()->reservedScalarRegs);
+ return opSelectorToRegIdx(instData.VDST, num_scalar_regs);
                 }
                 return instData.VDST;
             } else {
@@ -1484,31 +1462,27 @@
                 // VDST then it must be a VCC read or write,
                 // and VCC is never stored in a VGPR
                 assert(writesVCC() || readsVCC());
-                return opSelectorToRegIdx(REG_VCC_LO,
-                        gpuDynInst->wavefront()->reservedScalarRegs);
+                return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
             }
           case 3:
             if (numSrcRegOperands() == 3) {
                 // if we have 3 source operands then op
                 // idx 3 will correspond to VDST
                 if (sgprDst) {
-                    return opSelectorToRegIdx(instData.VDST,
-                            gpuDynInst->wavefront()->reservedScalarRegs);
+ return opSelectorToRegIdx(instData.VDST, num_scalar_regs);
                 }
                 return instData.VDST;
             } else {
                 // if this idx doesn't correspond to VDST
                 // then it must be a VCC read or write
                 assert(writesVCC() || readsVCC());
-                return opSelectorToRegIdx(REG_VCC_LO,
-                        gpuDynInst->wavefront()->reservedScalarRegs);
+                return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
             }
           case 4:
             // if a VOP3 instruction has more than 4 ops
             // it must read from and write to VCC
             assert(writesVCC() || readsVCC());
-            return opSelectorToRegIdx(REG_VCC_LO,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+            return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
           default:
             fatal("Operand at idx %i does not exist\n", opIdx);
             return -1;
@@ -1731,7 +1705,7 @@
     }

     int
- Inst_VOP3_SDST_ENC::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
+    Inst_VOP3_SDST_ENC::getRegisterIndex(int opIdx, int num_scalar_regs)
     {
         assert(opIdx >= 0);
         assert(opIdx < getNumOperands());
@@ -1739,14 +1713,12 @@
         switch (opIdx) {
           case 0:
             // SRC0
-            return opSelectorToRegIdx(extData.SRC0,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+            return opSelectorToRegIdx(extData.SRC0, num_scalar_regs);
           case 1:
             if (numSrcRegOperands() > 1) {
                 // if we have more than 1 source operand then
                 // op index 1 corresponds to SRC1
-                return opSelectorToRegIdx(extData.SRC1,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+                return opSelectorToRegIdx(extData.SRC1, num_scalar_regs);
             } else {
                 // if we only have 1 source operand, opIdx 1
                 // will be VDST
@@ -1756,8 +1728,7 @@
             if (numSrcRegOperands() > 2) {
                 // if we have more than 2 source operand then
                 // op index 2 corresponds to SRC2
-                return opSelectorToRegIdx(extData.SRC2,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+                return opSelectorToRegIdx(extData.SRC2, num_scalar_regs);
             } else if (numSrcRegOperands() == 2) {
                 // if we only have 2 source operands, opIdx 2
                 // will be VDST
@@ -1766,8 +1737,7 @@
                 // if this idx doesn't correspond to SRCX or
                 // VDST then it must be a VCC read or write
                 assert(writesVCC() || readsVCC());
-                return opSelectorToRegIdx(instData.SDST,
-                        gpuDynInst->wavefront()->reservedScalarRegs);
+                return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
             }
           case 3:
             if (numSrcRegOperands() == 3) {
@@ -1778,15 +1748,13 @@
                 // if this idx doesn't correspond to VDST
                 // then it must be a VCC read or write
                 assert(writesVCC() || readsVCC());
-                return opSelectorToRegIdx(instData.SDST,
-                        gpuDynInst->wavefront()->reservedScalarRegs);
+                return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
             }
           case 4:
             // if a VOP3 instruction has more than 4 ops
             // it must read from and write to VCC
             assert(writesVCC() || readsVCC());
-            return opSelectorToRegIdx(instData.SDST,
-                    gpuDynInst->wavefront()->reservedScalarRegs);
+            return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
           default:
             fatal("Operand at idx %i does not exist\n", opIdx);
             return -1;
@@ -1871,7 +1839,7 @@
     }

     int
-    Inst_DS::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
+    Inst_DS::getRegisterIndex(int opIdx, int num_scalar_regs)
     {
         assert(opIdx >= 0);
         assert(opIdx < getNumOperands());
@@ -1993,7 +1961,7 @@
     }

     int
-    Inst_MUBUF::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
+    Inst_MUBUF::getRegisterIndex(int opIdx, int num_scalar_regs)
     {
         assert(opIdx >= 0);
         assert(opIdx < getNumOperands());
@@ -2170,7 +2138,7 @@
     }

     int
-    Inst_FLAT::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
+    Inst_FLAT::getRegisterIndex(int opIdx, int num_scalar_regs)
     {
         assert(opIdx >= 0);
         assert(opIdx < getNumOperands());
diff --git a/src/arch/amdgpu/gcn3/insts/op_encodings.hh b/src/arch/amdgpu/gcn3/insts/op_encodings.hh
index 0957a7d..c2a417a 100644
--- a/src/arch/amdgpu/gcn3/insts/op_encodings.hh
+++ b/src/arch/amdgpu/gcn3/insts/op_encodings.hh
@@ -79,7 +79,7 @@

         bool isScalarRegister(int opIdx) override;
         bool isVectorRegister(int opIdx) override;
-        int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
+        int getRegisterIndex(int opIdx, int num_scalar_regs) override;

       protected:
         // first instruction DWORD
@@ -103,7 +103,7 @@

         bool isScalarRegister(int opIdx) override;
         bool isVectorRegister(int opIdx) override;
-        int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
+        int getRegisterIndex(int opIdx, int num_scalar_regs) override;

       protected:
         // first instruction DWORD
@@ -127,7 +127,7 @@

         bool isScalarRegister(int opIdx) override;
         bool isVectorRegister(int opIdx) override;
-        int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
+        int getRegisterIndex(int opIdx, int num_scalar_regs) override;

       protected:
         // first instruction DWORD
@@ -151,7 +151,7 @@

         bool isScalarRegister(int opIdx) override;
         bool isVectorRegister(int opIdx) override;
-        int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
+        int getRegisterIndex(int opIdx, int num_scalar_regs) override;

       protected:
         // first instruction DWORD
@@ -175,7 +175,7 @@

         bool isScalarRegister(int opIdx) override;
         bool isVectorRegister(int opIdx) override;
-        int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
+        int getRegisterIndex(int opIdx, int num_scalar_regs) override;

       protected:
         // first instruction DWORD
@@ -193,7 +193,7 @@

         bool isScalarRegister(int opIdx) override;
         bool isVectorRegister(int opIdx) override;
-        int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
+        int getRegisterIndex(int opIdx, int num_scalar_regs) override;

       protected:
         /**
@@ -276,7 +276,7 @@

         bool isScalarRegister(int opIdx) override;
         bool isVectorRegister(int opIdx) override;
-        int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
+        int getRegisterIndex(int opIdx, int num_scalar_regs) override;

       protected:
         // first instruction DWORD
@@ -300,7 +300,7 @@

         bool isScalarRegister(int opIdx) override;
         bool isVectorRegister(int opIdx) override;
-        int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
+        int getRegisterIndex(int opIdx, int num_scalar_regs) override;

       protected:
         // first instruction DWORD
@@ -324,7 +324,7 @@

         bool isScalarRegister(int opIdx) override;
         bool isVectorRegister(int opIdx) override;
-        int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
+        int getRegisterIndex(int opIdx, int num_scalar_regs) override;

       protected:
         // first instruction DWORD
@@ -361,7 +361,7 @@

         bool isScalarRegister(int opIdx) override;
         bool isVectorRegister(int opIdx) override;
-        int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
+        int getRegisterIndex(int opIdx, int num_scalar_regs) override;

       protected:
         // first instruction DWORD
@@ -395,7 +395,7 @@

         bool isScalarRegister(int opIdx) override;
         bool isVectorRegister(int opIdx) override;
-        int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
+        int getRegisterIndex(int opIdx, int num_scalar_regs) override;

       protected:
         // first instruction DWORD
@@ -418,7 +418,7 @@

         bool isScalarRegister(int opIdx) override;
         bool isVectorRegister(int opIdx) override;
-        int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
+        int getRegisterIndex(int opIdx, int num_scalar_regs) override;

       protected:
         template<typename T>
@@ -518,7 +518,7 @@

         bool isScalarRegister(int opIdx) override;
         bool isVectorRegister(int opIdx) override;
-        int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
+        int getRegisterIndex(int opIdx, int num_scalar_regs) override;

       protected:
         template<typename T>
@@ -649,7 +649,7 @@
                      * non-formatted accesses, this is done on a per-lane
                      * basis.
                      */
-                    if (stride == 0 || !rsrc_desc.swizzleEn) {
+                    if (rsrc_desc.stride == 0 || !rsrc_desc.swizzleEn) {
                         if (buf_off + stride * buf_idx >=
                             rsrc_desc.numRecords - s_offset.rawData()) {
DPRINTF(GCN3, "mubuf out-of-bounds condition 1: "
@@ -657,13 +657,13 @@
                                     "const_stride = %llx, "
                                     "const_num_records = %llx\n",
                                     lane, buf_off + stride * buf_idx,
-                                    stride, rsrc_desc.numRecords);
+ rsrc_desc.stride, rsrc_desc.numRecords);
                             oobMask.set(lane);
                             continue;
                         }
                     }

-                    if (stride != 0 && rsrc_desc.swizzleEn) {
+                    if (rsrc_desc.stride != 0 && rsrc_desc.swizzleEn) {
                         if (buf_idx >= rsrc_desc.numRecords ||
                             buf_off >= stride) {
DPRINTF(GCN3, "mubuf out-of-bounds condition 2: "
@@ -776,7 +776,7 @@

         bool isScalarRegister(int opIdx) override;
         bool isVectorRegister(int opIdx) override;
-        int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
+        int getRegisterIndex(int opIdx, int num_scalar_regs) override;

       protected:
         template<typename T>
diff --git a/src/gpu-compute/fetch_unit.cc b/src/gpu-compute/fetch_unit.cc
index d2af7b3..2664c3b 100644
--- a/src/gpu-compute/fetch_unit.cc
+++ b/src/gpu-compute/fetch_unit.cc
@@ -557,7 +557,7 @@
                                                wavefront, gpu_static_inst,
                                                wavefront->computeUnit->
                                                 getAndIncSeqNum());
-            gpu_dyn_inst->initOperandInfo(gpu_dyn_inst);
+            gpu_dyn_inst->initOperandInfo();
             wavefront->instructionBuffer.push_back(gpu_dyn_inst);

             DPRINTF(GPUFetch, "WF[%d][%d]: Id%ld decoded %s (%d bytes). "
@@ -598,7 +598,7 @@
                                        wavefront, gpu_static_inst,
                                        wavefront->computeUnit->
                                            getAndIncSeqNum());
-    gpu_dyn_inst->initOperandInfo(gpu_dyn_inst);
+    gpu_dyn_inst->initOperandInfo();
     wavefront->instructionBuffer.push_back(gpu_dyn_inst);

     DPRINTF(GPUFetch, "WF[%d][%d]: Id%d decoded split inst %s (%#x) "
diff --git a/src/gpu-compute/gpu_dyn_inst.cc b/src/gpu-compute/gpu_dyn_inst.cc
index c08e4b9..2dc1dcf 100644
--- a/src/gpu-compute/gpu_dyn_inst.cc
+++ b/src/gpu-compute/gpu_dyn_inst.cc
@@ -85,9 +85,8 @@
 }

 void
-GPUDynInst::initOperandInfo(GPUDynInstPtr &gpu_dyn_inst)
+GPUDynInst::initOperandInfo()
 {
-    assert(gpu_dyn_inst->wavefront());
     /**
      * Generate and cache the operand to register mapping information. This
      * prevents this info from being generated multiple times throughout
@@ -102,7 +101,7 @@
         int op_num_dwords(-1);

         if (isVectorRegister(op_idx)) {
-            virt_idx = getRegisterIndex(op_idx, gpu_dyn_inst);
+            virt_idx = getRegisterIndex(op_idx);
             op_num_dwords = numOpdDWORDs(op_idx);

             if (isSrcOperand(op_idx)) {
@@ -143,7 +142,7 @@
                                           phys_indices);
             }
         } else if (isScalarRegister(op_idx)) {
-            virt_idx = getRegisterIndex(op_idx, gpu_dyn_inst);
+            virt_idx = getRegisterIndex(op_idx);
             op_num_dwords = numOpdDWORDs(op_idx);

             if (isSrcOperand(op_idx)) {
@@ -287,9 +286,9 @@
 }

 int
-GPUDynInst::getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst)
+GPUDynInst::getRegisterIndex(int operandIdx)
 {
-    return _staticInst->getRegisterIndex(operandIdx, gpuDynInst);
+ return _staticInst->getRegisterIndex(operandIdx, wf->reservedScalarRegs);
 }

 int
diff --git a/src/gpu-compute/gpu_dyn_inst.hh b/src/gpu-compute/gpu_dyn_inst.hh
index e38a87f..b2635bc 100644
--- a/src/gpu-compute/gpu_dyn_inst.hh
+++ b/src/gpu-compute/gpu_dyn_inst.hh
@@ -117,7 +117,7 @@
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,
                uint64_t instSeqNum);
     ~GPUDynInst();
-    void initOperandInfo(GPUDynInstPtr &gpu_dyn_inst);
+    void initOperandInfo();
     void execute(GPUDynInstPtr gpuDynInst);

     const std::vector<RegisterOperandInfo>&
@@ -159,7 +159,7 @@
     int getNumOperands();
     bool isVectorRegister(int operandIdx);
     bool isScalarRegister(int operandIdx);
-    int getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst);
+    int getRegisterIndex(int operandIdx);
     int getOperandSize(int operandIdx);
     bool isDstOperand(int operandIdx);
     bool isSrcOperand(int operandIdx);
diff --git a/src/gpu-compute/gpu_static_inst.hh b/src/gpu-compute/gpu_static_inst.hh
index f973f2f..cdd342e 100644
--- a/src/gpu-compute/gpu_static_inst.hh
+++ b/src/gpu-compute/gpu_static_inst.hh
@@ -86,8 +86,7 @@
     virtual bool isExecMaskRegister(int opIdx) = 0;
     virtual int getOperandSize(int operandIndex) = 0;

-    virtual int getRegisterIndex(int operandIndex,
-                                 GPUDynInstPtr gpuDynInst) = 0;
+ virtual int getRegisterIndex(int operandIndex, int num_scalar_regs) = 0;

     virtual int numDstRegOperands() = 0;
     virtual int numSrcRegOperands() = 0;
@@ -310,7 +309,7 @@
     int getOperandSize(int operandIndex) override { return 0; }

     int
-    getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
+    getRegisterIndex(int operandIndex, int num_scalar_regs) override
     {
         return 0;
     }
diff --git a/src/gpu-compute/scalar_register_file.cc b/src/gpu-compute/scalar_register_file.cc
index 8068ff8..ba6ac57 100644
--- a/src/gpu-compute/scalar_register_file.cc
+++ b/src/gpu-compute/scalar_register_file.cc
@@ -53,7 +53,7 @@
     for (int i = 0; i < ii->getNumOperands(); ++i) {
         if (ii->isScalarRegister(i) && ii->isSrcOperand(i)) {

-            int sgprIdx = ii->getRegisterIndex(i, ii);
+            int sgprIdx = ii->getRegisterIndex(i);
             int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
                 ii->getOperandSize(i) / 4;

@@ -84,7 +84,7 @@
     for (int i = 0; i < ii->getNumOperands(); ++i) {
         if (ii->isScalarRegister(i) && ii->isDstOperand(i)) {

-            int sgprIdx = ii->getRegisterIndex(i, ii);
+            int sgprIdx = ii->getRegisterIndex(i);
             int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
                 ii->getOperandSize(i) / 4;

@@ -116,7 +116,7 @@

         for (int i = 0; i < ii->getNumOperands(); i++) {
             if (ii->isScalarRegister(i) && ii->isDstOperand(i)) {
-                int sgprIdx = ii->getRegisterIndex(i, ii);
+                int sgprIdx = ii->getRegisterIndex(i);
                 int nRegs = ii->getOperandSize(i) <= 4 ? 1
                     : ii->getOperandSize(i) / 4;
                 for (int j = 0; j < nRegs; j++) {
@@ -139,7 +139,7 @@
     for (int i = 0; i < ii->getNumOperands(); ++i) {
         if (ii->isScalarRegister(i) && ii->isDstOperand(i)) {

-            int sgprIdx = ii->getRegisterIndex(i, ii);
+            int sgprIdx = ii->getRegisterIndex(i);
             int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
                 ii->getOperandSize(i) / 4;

diff --git a/src/gpu-compute/vector_register_file.cc b/src/gpu-compute/vector_register_file.cc
index 40ce281..f162279 100644
--- a/src/gpu-compute/vector_register_file.cc
+++ b/src/gpu-compute/vector_register_file.cc
@@ -59,7 +59,7 @@
 {
     for (int i = 0; i < ii->getNumOperands(); ++i) {
         if (ii->isVectorRegister(i) && ii->isSrcOperand(i)) {
-            int vgprIdx = ii->getRegisterIndex(i, ii);
+            int vgprIdx = ii->getRegisterIndex(i);

             // determine number of registers
             int nRegs =
@@ -89,7 +89,7 @@
     // iterate over all register destination operands
     for (int i = 0; i < ii->getNumOperands(); ++i) {
         if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
-            int vgprIdx = ii->getRegisterIndex(i, ii);
+            int vgprIdx = ii->getRegisterIndex(i);
             int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
                 ii->getOperandSize(i) / 4;

@@ -150,7 +150,7 @@

         for (int i = 0; i < ii->getNumOperands(); i++) {
             if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
-                int vgprIdx = ii->getRegisterIndex(i, ii);
+                int vgprIdx = ii->getRegisterIndex(i);
                 int nRegs = ii->getOperandSize(i) <= 4 ? 1
                     : ii->getOperandSize(i) / 4;
                 for (int j = 0; j < nRegs; j++) {
@@ -183,7 +183,7 @@
     assert(ii->isLoad() || ii->isAtomicRet());
     for (int i = 0; i < ii->getNumOperands(); ++i) {
         if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
-            int vgprIdx = ii->getRegisterIndex(i, ii);
+            int vgprIdx = ii->getRegisterIndex(i);
             int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
                 ii->getOperandSize(i) / 4;

diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc
index 279aac7..0cb6675 100644
--- a/src/gpu-compute/wavefront.cc
+++ b/src/gpu-compute/wavefront.cc
@@ -927,7 +927,7 @@
     // vector RAW dependency tracking
     for (int i = 0; i < ii->getNumOperands(); i++) {
         if (ii->isVectorRegister(i)) {
-            int vgpr = ii->getRegisterIndex(i, ii);
+            int vgpr = ii->getRegisterIndex(i);
             int nReg = ii->getOperandSize(i) <= 4 ? 1 :
                 ii->getOperandSize(i) / 4;
             for (int n = 0; n < nReg; n++) {



1 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the submitted one.
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/42210
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I29ab8d9a3de1f8b82b820ef421fc653284567c65
Gerrit-Change-Number: 42210
Gerrit-PatchSet: 6
Gerrit-Owner: Alex Dutu <[email protected]>
Gerrit-Reviewer: Matt Sinclair <[email protected]>
Gerrit-Reviewer: Matthew Poremba <[email protected]>
Gerrit-Reviewer: kokoro <[email protected]>
Gerrit-CC: Bobby R. Bruce <[email protected]>
Gerrit-CC: Kyle Roarty <[email protected]>
Gerrit-CC: Tony Gutierrez <[email protected]>
Gerrit-MessageType: merged
_______________________________________________
gem5-dev mailing list -- [email protected]
To unsubscribe send an email to [email protected]
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

Reply via email to