Matthew Poremba has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/67077?usp=email )

Change subject: arch-vega: Read one dword for SGPR base global insts
......................................................................

arch-vega: Read one dword for SGPR base global insts

Global instructions in Vega can either use a VGPR base address plus
instruction offset or SGPR base address plus VGPR offset plus
instruction offset. Currently the VGPR address/offset is always read as
two dwords. This causes problems if the VGPR number is the last VGPR
allocated to a wavefront since the second dword would be beyond the
allocation and trip an assert.

This changeset sets the operand size of the VGPR operand to one dword
when SGPR base is used and two dwords otherwise so initDynOperandInfo
does not assert. It also moves the read of the VGPR into the calcAddr
method so that the correct ConstVecOperandU## is used to prevent another
assertion failure when reading from the register file. These two changes
are made to all flat instructions, as global instructions are a
subsegement of flat instructions.

Change-Id: I79030771aa6deec05ffa5853ca2d8b68943ee0a0
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67077
Reviewed-by: Matt Sinclair <mattdsincl...@gmail.com>
Maintainer: Matt Sinclair <mattdsincl...@gmail.com>
Tested-by: kokoro <noreply+kok...@google.com>
---
M src/arch/amdgpu/vega/insts/instructions.cc
M src/arch/amdgpu/vega/insts/instructions.hh
M src/arch/amdgpu/vega/insts/op_encodings.hh
3 files changed, 101 insertions(+), 107 deletions(-)

Approvals:
  Matt Sinclair: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index c803656..4b27afa 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -43831,11 +43831,7 @@
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-        addr.read();
-
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

         issueRequestHelper(gpuDynInst);
     } // execute
@@ -43919,11 +43915,7 @@
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-        addr.read();
-
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

         issueRequestHelper(gpuDynInst);
     } // execute
@@ -44008,11 +44000,7 @@
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-        addr.read();
-
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

         issueRequestHelper(gpuDynInst);
     } // execute
@@ -44067,11 +44055,7 @@
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-        addr.read();
-
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

         issueRequestHelper(gpuDynInst);
     } // execute
@@ -44126,11 +44110,7 @@
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-        addr.read();
-
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

         issueRequestHelper(gpuDynInst);
     } // execute
@@ -44194,11 +44174,7 @@
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-        addr.read();
-
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

         issueRequestHelper(gpuDynInst);
     } // execute
@@ -44266,13 +44242,11 @@
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU8 data(gpuDynInst, extData.DATA);

-        addr.read();
         data.read();

-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -44325,13 +44299,11 @@
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU16 data(gpuDynInst, extData.DATA);

-        addr.read();
         data.read();

-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -44384,13 +44356,11 @@
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU32 data(gpuDynInst, extData.DATA);

-        addr.read();
         data.read();

-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -44444,13 +44414,11 @@
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU64 data(gpuDynInst, extData.DATA);

-        addr.read();
         data.read();

-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -44504,17 +44472,15 @@
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU32 data0(gpuDynInst, extData.DATA);
         ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1);
         ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2);

-        addr.read();
         data0.read();
         data1.read();
         data2.read();

-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -44572,19 +44538,17 @@
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU32 data0(gpuDynInst, extData.DATA);
         ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1);
         ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2);
         ConstVecOperandU32 data3(gpuDynInst, extData.DATA + 3);

-        addr.read();
         data0.read();
         data1.read();
         data2.read();
         data3.read();

-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -44650,13 +44614,11 @@
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU32 data(gpuDynInst, extData.DATA);

-        addr.read();
         data.read();

-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -44732,15 +44694,13 @@
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
         ConstVecOperandU32 cmp(gpuDynInst, extData.DATA + 1);

-        addr.read();
         data.read();
         cmp.read();

-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -44814,13 +44774,11 @@
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU32 data(gpuDynInst, extData.DATA);

-        addr.read();
         data.read();

-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -45204,15 +45162,13 @@
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU64 data(gpuDynInst, extData.DATA);
         ConstVecOperandU64 cmp(gpuDynInst, extData.DATA + 2);

-        addr.read();
         data.read();
         cmp.read();

-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -45287,13 +45243,11 @@
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU64 data(gpuDynInst, extData.DATA);

-        addr.read();
         data.read();

-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh
index b2cf2b9..9f017f9 100644
--- a/src/arch/amdgpu/vega/insts/instructions.hh
+++ b/src/arch/amdgpu/vega/insts/instructions.hh
@@ -41892,7 +41892,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_dst or saddr
                 return isFlat() ? 1 : 8;
               case 2: //vgpr_dst
@@ -41929,7 +41929,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_dst or saddr
                 return isFlat() ? 1 : 8;
               case 2: //vgpr_dst
@@ -41966,7 +41966,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_dst or saddr
                 return isFlat() ? 2 : 8;
               case 2: //vgpr_dst
@@ -42003,7 +42003,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_dst or saddr
                 return isFlat() ? 2 : 8;
               case 2: //vgpr_dst
@@ -42040,7 +42040,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_dst or saddr
                 return isFlat() ? 4 : 8;
               case 2: //vgpr_dst
@@ -42077,7 +42077,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_dst or saddr
                 return isFlat() ? 8 : 8;
               case 2: //vgpr_dst
@@ -42114,7 +42114,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_dst or saddr
                 return isFlat() ? 12 : 8;
               case 2: //vgpr_dst
@@ -42151,7 +42151,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_dst or saddr
                 return isFlat() ? 16 : 8;
               case 2: //vgpr_dst
@@ -42188,7 +42188,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 1;
               case 2: //saddr
@@ -42225,7 +42225,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 2;
               case 2: //saddr
@@ -42262,7 +42262,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //saddr
@@ -42299,7 +42299,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //saddr
@@ -42336,7 +42336,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 12;
               case 2: //saddr
@@ -42373,7 +42373,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 16;
               case 2: //saddr
@@ -42410,7 +42410,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42449,7 +42449,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -42488,7 +42488,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42527,7 +42527,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42564,7 +42564,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42601,7 +42601,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42638,7 +42638,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42675,7 +42675,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42712,7 +42712,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42749,7 +42749,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42786,7 +42786,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42823,7 +42823,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42860,7 +42860,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42897,7 +42897,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -42934,7 +42934,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 16;
               case 2: //vgpr_dst or saddr
@@ -42973,7 +42973,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -43012,7 +43012,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -43049,7 +43049,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -43086,7 +43086,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -43123,7 +43123,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -43160,7 +43160,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -43197,7 +43197,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -43234,7 +43234,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -43271,7 +43271,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -43308,7 +43308,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -43345,7 +43345,7 @@
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
diff --git a/src/arch/amdgpu/vega/insts/op_encodings.hh b/src/arch/amdgpu/vega/insts/op_encodings.hh
index 508d706..34f6040 100644
--- a/src/arch/amdgpu/vega/insts/op_encodings.hh
+++ b/src/arch/amdgpu/vega/insts/op_encodings.hh
@@ -925,7 +925,7 @@
         }

         void
-        calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &vaddr,
+        calcAddr(GPUDynInstPtr gpuDynInst, ScalarRegU32 vaddr,
                  ScalarRegU32 saddr, ScalarRegI32 offset)
         {
             // Offset is a 13-bit field w/the following meanings:
@@ -940,14 +940,20 @@
             // be a 64-bit address. Otherwise, saddr is the reg index for a
             // scalar reg used as the base address for a 32-bit address.
             if ((saddr == 0x7f && isFlatGlobal()) || isFlat()) {
-                calcAddrVgpr(gpuDynInst, vaddr, offset);
+                ConstVecOperandU64 vbase(gpuDynInst, vaddr);
+                vbase.read();
+
+                calcAddrVgpr(gpuDynInst, vbase, offset);
             } else {
// Assume we are operating in 64-bit mode and read a pair of
                 // SGPRs for the address base.
                 ConstScalarOperandU64 sbase(gpuDynInst, saddr);
                 sbase.read();

-                calcAddrSgpr(gpuDynInst, vaddr, sbase, offset);
+                ConstVecOperandU32 voffset(gpuDynInst, vaddr);
+                voffset.read();
+
+                calcAddrSgpr(gpuDynInst, voffset, sbase, offset);
             }

             if (isFlat()) {
@@ -974,6 +980,12 @@
             }
         }

+        bool
+        vgprIsOffset()
+        {
+            return (extData.SADDR != 0x7f);
+        }
+
         // first instruction DWORD
         InFmt_FLAT instData;
         // second instruction DWORD
@@ -987,7 +999,7 @@
         void generateGlobalDisassembly();

         void
-        calcAddrSgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &vaddr,
+        calcAddrSgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &vaddr,
                      ConstScalarOperandU64 &saddr, ScalarRegI32 offset)
         {
             // Use SGPR pair as a base address and add VGPR-offset and

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/67077?usp=email To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I79030771aa6deec05ffa5853ca2d8b68943ee0a0
Gerrit-Change-Number: 67077
Gerrit-PatchSet: 3
Gerrit-Owner: Matthew Poremba <matthew.pore...@amd.com>
Gerrit-Reviewer: Matt Sinclair <mattdsincl...@gmail.com>
Gerrit-Reviewer: Matthew Poremba <matthew.pore...@amd.com>
Gerrit-Reviewer: kokoro <noreply+kok...@google.com>
Gerrit-MessageType: merged
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org

Reply via email to