[gem5-dev] [M] Change in gem5/gem5[develop]: arch-vega: Read one dword for SGPR base global insts
Matthew Poremba has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/67077?usp=email ) Change subject: arch-vega: Read one dword for SGPR base global insts .. arch-vega: Read one dword for SGPR base global insts Global instructions in Vega can either use a VGPR base address plus instruction offset or SGPR base address plus VGPR offset plus instruction offset. Currently the VGPR address/offset is always read as two dwords. This causes problems if the VGPR number is the last VGPR allocated to a wavefront since the second dword would be beyond the allocation and trip an assert. This changeset sets the operand size of the VGPR operand to one dword when SGPR base is used and two dwords otherwise so initDynOperandInfo does not assert. It also moves the read of the VGPR into the calcAddr method so that the correct ConstVecOperandU## is used to prevent another assertion failure when reading from the register file. These two changes are made to all flat instructions, as global instructions are a subsegement of flat instructions. Change-Id: I79030771aa6deec05ffa5853ca2d8b68943ee0a0 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67077 Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Tested-by: kokoro --- M src/arch/amdgpu/vega/insts/instructions.cc M src/arch/amdgpu/vega/insts/instructions.hh M src/arch/amdgpu/vega/insts/op_encodings.hh 3 files changed, 101 insertions(+), 107 deletions(-) Approvals: Matt Sinclair: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index c803656..4b27afa 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -43831,11 +43831,7 @@ gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); -ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - -addr.read(); - -calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); +calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); issueRequestHelper(gpuDynInst); } // execute @@ -43919,11 +43915,7 @@ gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); -ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - -addr.read(); - -calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); +calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); issueRequestHelper(gpuDynInst); } // execute @@ -44008,11 +44000,7 @@ gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); -ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - -addr.read(); - -calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); +calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); issueRequestHelper(gpuDynInst); } // execute @@ -44067,11 +44055,7 @@ gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); -ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - -addr.read(); - -calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); +calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); issueRequestHelper(gpuDynInst); } // execute @@ -44126,11 +44110,7 @@ gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); -ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - -addr.read(); - -calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); +calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); issueRequestHelper(gpuDynInst); } // execute @@ -44194,11 +44174,7 @@ gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); -ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - -addr.read(); - -calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); +calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); issueRequestHelper(gpuDynInst); } // execute @@ -44266,13 +44242,11 @@ gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); -ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); ConstVecOperandU8 data(gpuDynInst, extData.DATA); -addr.read(); data.read(); -calcAddr(gpuDynInst, addr, extData.SADDR,
[gem5-dev] [M] Change in gem5/gem5[develop]: arch-vega: Read one dword for SGPR base global insts
Matthew Poremba has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/67077?usp=email ) Change subject: arch-vega: Read one dword for SGPR base global insts .. arch-vega: Read one dword for SGPR base global insts Global instructions in Vega can either use a VGPR base address plus instruction offset or SGPR base address plus VGPR offset plus instruction offset. Currently the VGPR address/offset is always read as two dwords. This causes problems if the VGPR number is the last VGPR allocated to a wavefront since the second dword would be beyond the allocation and trip an assert. This changeset sets the operand size of the VGPR operand to one dword when SGPR base is used and two dwords otherwise so initDynOperandInfo does not assert. It also moves the read of the VGPR into the calcAddr method so that the correct ConstVecOperandU## is used to prevent another assertion failure when reading from the register file. These two changes are made to all flat instructions, as global instructions are a subsegement of flat instructions. Change-Id: I79030771aa6deec05ffa5853ca2d8b68943ee0a0 --- M src/arch/amdgpu/vega/insts/instructions.cc M src/arch/amdgpu/vega/insts/instructions.hh M src/arch/amdgpu/vega/insts/op_encodings.hh 3 files changed, 97 insertions(+), 107 deletions(-) diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index f0fb1aa..7594f9c 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -43825,11 +43825,7 @@ gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); -ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - -addr.read(); - -calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); +calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); issueRequestHelper(gpuDynInst); } // execute @@ -43913,11 +43909,7 @@ gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); -ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - -addr.read(); - -calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); +calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); issueRequestHelper(gpuDynInst); } // execute @@ -44002,11 +43994,7 @@ gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); -ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - -addr.read(); - -calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); +calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); issueRequestHelper(gpuDynInst); } // execute @@ -44061,11 +44049,7 @@ gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); -ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - -addr.read(); - -calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); +calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); issueRequestHelper(gpuDynInst); } // execute @@ -44120,11 +44104,7 @@ gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); -ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - -addr.read(); - -calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); +calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); issueRequestHelper(gpuDynInst); } // execute @@ -44188,11 +44168,7 @@ gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); -ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - -addr.read(); - -calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); +calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); issueRequestHelper(gpuDynInst); } // execute @@ -44260,13 +44236,11 @@ gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); -ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); ConstVecOperandU8 data(gpuDynInst, extData.DATA); -addr.read(); data.read(); -calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); +calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (gpuDynInst->exec_mask[lane]) { @@ -44319,13 +44293,11 @@