Hello Tony Gutierrez, Xianwei Zhang,
I'd like you to do a code review. Please visit
https://gem5-review.googlesource.com/c/public/gem5/+/29917
to review the following change.
Change subject: gpu-compute: enable flexible control of kernel boundary
syncs
......................................................................
gpu-compute: enable flexible control of kernel boundary syncs
Kernel end release was turned on for VIPER protocol, which
is in fact write-through based and thus no need to have
release operation. This changeset splits the option
'impl_kern_boundary_sync' into 'impl_kern_launch_acq'
and 'impl_kern_end_rel', and turns off release on VIPER.
Change-Id: I5490019b6765a25bd801cc78fb7445b90eb02a3d
---
M src/arch/gcn3/insts/instructions.cc
M src/gpu-compute/GPU.py
M src/gpu-compute/dispatcher.cc
M src/gpu-compute/shader.cc
M src/gpu-compute/shader.hh
5 files changed, 20 insertions(+), 11 deletions(-)
diff --git a/src/arch/gcn3/insts/instructions.cc
b/src/arch/gcn3/insts/instructions.cc
index 7578694..8d63296 100644
--- a/src/arch/gcn3/insts/instructions.cc
+++ b/src/arch/gcn3/insts/instructions.cc
@@ -3759,9 +3759,13 @@
// the last workgroup in the kernel).
bool kernelEnd =
wf->computeUnit->shader->dispatcher().isReachingKernelEnd(wf);
+ // further check whether 'release @ kernel end' is needed
+ bool relNeeded =
+ wf->computeUnit->shader->impl_kern_end_rel;
- // if it is not a kernel end, then retire the workgroup
directly
- if (!kernelEnd) {
+ // if not a kernel end or no release needed, retire the
workgroup
+ // directly
+ if (!kernelEnd || !relNeeded) {
wf->computeUnit->shader->dispatcher().notifyWgCompl(wf);
wf->setStatus(Wavefront::S_STOPPED);
wf->computeUnit->completedWGs++;
@@ -3770,8 +3774,8 @@
}
/**
- * If it is a kernel end, inject a memory sync and retire the
- * workgroup after receving response.
+ * If a kernel end and release needed, inject a memory sync and
+ * retire the workgroup after receving all acks.
*/
setFlag(MemSync);
setFlag(GlobalSegment);
diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py
index 6b033f4..8a2ad81 100644
--- a/src/gpu-compute/GPU.py
+++ b/src/gpu-compute/GPU.py
@@ -213,8 +213,10 @@
gpu_cmd_proc = Param.GPUCommandProcessor('Command processor for GPU')
dispatcher = Param.GPUDispatcher('GPU workgroup dispatcher')
n_wf = Param.Int(10, 'Number of wavefront slots per SIMD')
- impl_kern_boundary_sync = Param.Bool(True, """Insert acq/rel packets
into
- ruby at kernel boundaries""")
+ impl_kern_launch_acq = Param.Bool(True, """Insert acq packet into
+ ruby at kernel launch""")
+ impl_kern_end_rel = Param.Bool(False, """Insert rel packet into
+ ruby at kernel end""")
globalmem = Param.MemorySize('64kB', 'Memory size')
timing = Param.Bool(False, 'timing memory accesses')
diff --git a/src/gpu-compute/dispatcher.cc b/src/gpu-compute/dispatcher.cc
index c4f16c7..65dcf0e 100644
--- a/src/gpu-compute/dispatcher.cc
+++ b/src/gpu-compute/dispatcher.cc
@@ -166,12 +166,12 @@
auto task = hsaQueueEntries[exec_id];
bool launched(false);
- // invalidate is needed before starting dispatch
- if (shader->impl_kern_boundary_sync) {
+ // acq is needed before starting dispatch
+ if (shader->impl_kern_launch_acq) {
// try to invalidate cache
shader->prepareInvalidate(task);
} else {
- // kern boundary sync is not set, skip invalidate
+ // kern launch acquire is not set, skip invalidate
task->markInvDone();
}
diff --git a/src/gpu-compute/shader.cc b/src/gpu-compute/shader.cc
index 4be2fbf..aa7a6dd 100644
--- a/src/gpu-compute/shader.cc
+++ b/src/gpu-compute/shader.cc
@@ -56,7 +56,8 @@
tickEvent([this]{ execScheduledAdds(); }, "Shader scheduled adds
event",
false, Event::CPU_Tick_Pri),
timingSim(p->timing), hsail_mode(SIMT),
- impl_kern_boundary_sync(p->impl_kern_boundary_sync),
+ impl_kern_launch_acq(p->impl_kern_launch_acq),
+ impl_kern_end_rel(p->impl_kern_end_rel),
coissue_return(1),
trace_vgpr_all(1), n_cu((p->CUs).size()), n_wf(p->n_wf),
globalMemSize(p->globalmem),
diff --git a/src/gpu-compute/shader.hh b/src/gpu-compute/shader.hh
index 72063a4..eeaf343 100644
--- a/src/gpu-compute/shader.hh
+++ b/src/gpu-compute/shader.hh
@@ -216,7 +216,9 @@
hsail_mode_e hsail_mode;
// If set, issue acq packet @ kernel launch
- int impl_kern_boundary_sync;
+ int impl_kern_launch_acq;
+ // If set, issue rel packet @ kernel end
+ int impl_kern_end_rel;
// If set, fetch returns may be coissued with instructions
int coissue_return;
// If set, always dump all 64 gprs to trace
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/29917
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I5490019b6765a25bd801cc78fb7445b90eb02a3d
Gerrit-Change-Number: 29917
Gerrit-PatchSet: 1
Gerrit-Owner: Anthony Gutierrez <[email protected]>
Gerrit-Reviewer: Tony Gutierrez <[email protected]>
Gerrit-Reviewer: Xianwei Zhang <[email protected]>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- [email protected]
To unsubscribe send an email to [email protected]
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s