Hello Tony Gutierrez,
I'd like you to do a code review. Please visit
https://gem5-review.googlesource.com/c/public/gem5/+/29947
to review the following change.
Change subject: arch-gcn3: fix bug with DPP support
......................................................................
arch-gcn3: fix bug with DPP support
Instructions that use the DPP field need to use the extra SRC0
register associated with the DPP instruction instead of the
"default" SRC0 register, since the default SRC0 register contains
the DPP information when DPP is being used. This commit fixes
2735c3bb88 to take this into account. Additionally, this commit
removes write of the src register from the DPP helper functions,
to avoid overwriting any changes made to the destination register.
Finally, this change modifies the instructions that use DPP to
simplify the flow through the execute() functions.
Change-Id: I80fd0af1f131f287f18ff73b3c1c9122d8c60823
---
M src/arch/gcn3/insts/inst_util.hh
M src/arch/gcn3/insts/instructions.cc
2 files changed, 41 insertions(+), 20 deletions(-)
diff --git a/src/arch/gcn3/insts/inst_util.hh
b/src/arch/gcn3/insts/inst_util.hh
index 433ccbe..b40e890 100644
--- a/src/arch/gcn3/insts/inst_util.hh
+++ b/src/arch/gcn3/insts/inst_util.hh
@@ -505,7 +505,6 @@
src0[lane] = 0;
}
- src0.write();
// reset for next iteration
laneDisabled = false;
}
diff --git a/src/arch/gcn3/insts/instructions.cc
b/src/arch/gcn3/insts/instructions.cc
index b852281..79e7dda 100644
--- a/src/arch/gcn3/insts/instructions.cc
+++ b/src/arch/gcn3/insts/instructions.cc
@@ -5296,8 +5296,12 @@
VecOperandF32 src1(gpuDynInst, instData.VSRC1);
VecOperandF32 vdst(gpuDynInst, instData.VDST);
+ src0.readSrc();
+ src1.read();
+
if (isDPPInst()) {
VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
+ src0_dpp.read();
DPRINTF(GCN3, "Handling V_ADD_F32 SRC DPP. SRC0: register
v[%d], "
"DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
@@ -5313,14 +5317,17 @@
extData.iFmt_VOP_DPP.ROW_MASK);
processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);
- }
- src0.readSrc();
- src1.read();
-
- for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
- if (wf->execMask(lane)) {
- vdst[lane] = src0[lane] + src1[lane];
+ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+ if (wf->execMask(lane)) {
+ vdst[lane] = src0_dpp[lane] + src1[lane];
+ }
+ }
+ } else {
+ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+ if (wf->execMask(lane)) {
+ vdst[lane] = src0[lane] + src1[lane];
+ }
}
}
@@ -6164,6 +6171,7 @@
if (isDPPInst()) {
VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
+ src0_dpp.read();
DPRINTF(GCN3, "Handling V_MAC_F32 SRC DPP. SRC0: register
v[%d], "
"DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
@@ -6179,11 +6187,18 @@
extData.iFmt_VOP_DPP.ROW_MASK);
processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);
- }
- for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
- if (wf->execMask(lane)) {
- vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
+ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+ if (wf->execMask(lane)) {
+ vdst[lane] = std::fma(src0_dpp[lane], src1[lane],
+ vdst[lane]);
+ }
+ }
+ } else {
+ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+ if (wf->execMask(lane)) {
+ vdst[lane] = std::fma(src0[lane], src1[lane],
vdst[lane]);
+ }
}
}
@@ -7117,8 +7132,11 @@
ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
+ src.readSrc();
+
if (isDPPInst()) {
- VecOperandU32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
+ VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
+ src_dpp.read();
DPRINTF(GCN3, "Handling V_MOV_B32 SRC DPP. SRC0: register
v[%d], "
"DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
@@ -7137,14 +7155,18 @@
// to negate it or take the absolute value of it
assert(!extData.iFmt_VOP_DPP.SRC1_ABS);
assert(!extData.iFmt_VOP_DPP.SRC1_NEG);
- processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp);
- }
+ processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src_dpp);
- src.readSrc();
-
- for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
- if (wf->execMask(lane)) {
- vdst[lane] = src[lane];
+ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+ if (wf->execMask(lane)) {
+ vdst[lane] = src_dpp[lane];
+ }
+ }
+ } else {
+ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+ if (wf->execMask(lane)) {
+ vdst[lane] = src[lane];
+ }
}
}
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/29947
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I80fd0af1f131f287f18ff73b3c1c9122d8c60823
Gerrit-Change-Number: 29947
Gerrit-PatchSet: 1
Gerrit-Owner: Anthony Gutierrez <[email protected]>
Gerrit-Reviewer: Tony Gutierrez <[email protected]>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- [email protected]
To unsubscribe send an email to [email protected]
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s