[gem5-dev] [M] Change in gem5/gem5[develop]: arch-vega: Read one dword for SGPR base global insts

2023-01-05 Thread Matthew Poremba (Gerrit) via gem5-dev
Matthew Poremba has submitted this change. (  
https://gem5-review.googlesource.com/c/public/gem5/+/67077?usp=email )


Change subject: arch-vega: Read one dword for SGPR base global insts
..

arch-vega: Read one dword for SGPR base global insts

Global instructions in Vega can either use a VGPR base address plus
instruction offset or SGPR base address plus VGPR offset plus
instruction offset. Currently the VGPR address/offset is always read as
two dwords. This causes problems if the VGPR number is the last VGPR
allocated to a wavefront since the second dword would be beyond the
allocation and trip an assert.

This changeset sets the operand size of the VGPR operand to one dword
when SGPR base is used and two dwords otherwise so initDynOperandInfo
does not assert. It also moves the read of the VGPR into the calcAddr
method so that the correct ConstVecOperandU## is used to prevent another
assertion failure when reading from the register file. These two changes
are made to all flat instructions, as global instructions are a
subsegement of flat instructions.

Change-Id: I79030771aa6deec05ffa5853ca2d8b68943ee0a0
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67077
Reviewed-by: Matt Sinclair 
Maintainer: Matt Sinclair 
Tested-by: kokoro 
---
M src/arch/amdgpu/vega/insts/instructions.cc
M src/arch/amdgpu/vega/insts/instructions.hh
M src/arch/amdgpu/vega/insts/op_encodings.hh
3 files changed, 101 insertions(+), 107 deletions(-)

Approvals:
  Matt Sinclair: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass




diff --git a/src/arch/amdgpu/vega/insts/instructions.cc  
b/src/arch/amdgpu/vega/insts/instructions.cc

index c803656..4b27afa 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -43831,11 +43831,7 @@
 gpuDynInst->latency.init(gpuDynInst->computeUnit());
 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-addr.read();
-
-calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

 issueRequestHelper(gpuDynInst);
 } // execute
@@ -43919,11 +43915,7 @@
 gpuDynInst->latency.init(gpuDynInst->computeUnit());
 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-addr.read();
-
-calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

 issueRequestHelper(gpuDynInst);
 } // execute
@@ -44008,11 +44000,7 @@
 gpuDynInst->latency.init(gpuDynInst->computeUnit());
 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-addr.read();
-
-calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

 issueRequestHelper(gpuDynInst);
 } // execute
@@ -44067,11 +44055,7 @@
 gpuDynInst->latency.init(gpuDynInst->computeUnit());
 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-addr.read();
-
-calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

 issueRequestHelper(gpuDynInst);
 } // execute
@@ -44126,11 +44110,7 @@
 gpuDynInst->latency.init(gpuDynInst->computeUnit());
 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-addr.read();
-
-calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

 issueRequestHelper(gpuDynInst);
 } // execute
@@ -44194,11 +44174,7 @@
 gpuDynInst->latency.init(gpuDynInst->computeUnit());
 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-addr.read();
-
-calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

 issueRequestHelper(gpuDynInst);
 } // execute
@@ -44266,13 +44242,11 @@
 gpuDynInst->latency.init(gpuDynInst->computeUnit());
 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
 ConstVecOperandU8 data(gpuDynInst, extData.DATA);

-addr.read();
 data.read();

-calcAddr(gpuDynInst, addr, extData.SADDR, 

[gem5-dev] [M] Change in gem5/gem5[develop]: arch-vega: Read one dword for SGPR base global insts

2022-12-30 Thread Matthew Poremba (Gerrit) via gem5-dev
Matthew Poremba has uploaded this change for review. (  
https://gem5-review.googlesource.com/c/public/gem5/+/67077?usp=email )



Change subject: arch-vega: Read one dword for SGPR base global insts
..

arch-vega: Read one dword for SGPR base global insts

Global instructions in Vega can either use a VGPR base address plus
instruction offset or SGPR base address plus VGPR offset plus
instruction offset. Currently the VGPR address/offset is always read as
two dwords. This causes problems if the VGPR number is the last VGPR
allocated to a wavefront since the second dword would be beyond the
allocation and trip an assert.

This changeset sets the operand size of the VGPR operand to one dword
when SGPR base is used and two dwords otherwise so initDynOperandInfo
does not assert. It also moves the read of the VGPR into the calcAddr
method so that the correct ConstVecOperandU## is used to prevent another
assertion failure when reading from the register file. These two changes
are made to all flat instructions, as global instructions are a
subsegement of flat instructions.

Change-Id: I79030771aa6deec05ffa5853ca2d8b68943ee0a0
---
M src/arch/amdgpu/vega/insts/instructions.cc
M src/arch/amdgpu/vega/insts/instructions.hh
M src/arch/amdgpu/vega/insts/op_encodings.hh
3 files changed, 97 insertions(+), 107 deletions(-)



diff --git a/src/arch/amdgpu/vega/insts/instructions.cc  
b/src/arch/amdgpu/vega/insts/instructions.cc

index f0fb1aa..7594f9c 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -43825,11 +43825,7 @@
 gpuDynInst->latency.init(gpuDynInst->computeUnit());
 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-addr.read();
-
-calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

 issueRequestHelper(gpuDynInst);
 } // execute
@@ -43913,11 +43909,7 @@
 gpuDynInst->latency.init(gpuDynInst->computeUnit());
 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-addr.read();
-
-calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

 issueRequestHelper(gpuDynInst);
 } // execute
@@ -44002,11 +43994,7 @@
 gpuDynInst->latency.init(gpuDynInst->computeUnit());
 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-addr.read();
-
-calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

 issueRequestHelper(gpuDynInst);
 } // execute
@@ -44061,11 +44049,7 @@
 gpuDynInst->latency.init(gpuDynInst->computeUnit());
 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-addr.read();
-
-calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

 issueRequestHelper(gpuDynInst);
 } // execute
@@ -44120,11 +44104,7 @@
 gpuDynInst->latency.init(gpuDynInst->computeUnit());
 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-addr.read();
-
-calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

 issueRequestHelper(gpuDynInst);
 } // execute
@@ -44188,11 +44168,7 @@
 gpuDynInst->latency.init(gpuDynInst->computeUnit());
 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-addr.read();
-
-calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

 issueRequestHelper(gpuDynInst);
 } // execute
@@ -44260,13 +44236,11 @@
 gpuDynInst->latency.init(gpuDynInst->computeUnit());
 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());

-ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
 ConstVecOperandU8 data(gpuDynInst, extData.DATA);

-addr.read();
 data.read();

-calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);

 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
 if (gpuDynInst->exec_mask[lane]) {
@@ -44319,13 +44293,11 @@