[gem5-dev] Change in gem5/gem5[develop]: gpu-compute,mem-ruby: Replace ACQUIRE and RELEASE request flags
Matthew Poremba has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/32859 ) Change subject: gpu-compute,mem-ruby: Replace ACQUIRE and RELEASE request flags .. gpu-compute,mem-ruby: Replace ACQUIRE and RELEASE request flags This patch replaces ACQUIRE and RELEASE flags which are HSA-specific. ACQUIRE flag becomes INV_L1 in VIPER protocol. RELEASE flag is removed. Future protocols may support extra cache coherence flags like INV_L2 and WB_L2. Change-Id: I3d60c9d3625c898f4110a12d81742b6822728533 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/32859 Reviewed-by: Jason Lowe-Power Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Tested-by: kokoro --- M src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc M src/gpu-compute/compute_unit.cc M src/gpu-compute/gpu_dyn_inst.hh M src/mem/request.hh M src/mem/ruby/system/GPUCoalescer.cc M src/mem/ruby/system/VIPERCoalescer.cc 6 files changed, 60 insertions(+), 39 deletions(-) Approvals: Jason Lowe-Power: Looks good to me, approved Matt Sinclair: Looks good to me, but someone else must approve; Looks good to me, approved kokoro: Regressions pass diff --git a/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc b/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc index dbdaba4..6b3c3a0 100644 --- a/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc +++ b/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc @@ -232,7 +232,7 @@ threadId, nullptr); acq_req->setPaddr(0); acq_req->setReqInstSeqNum(tester->getActionSeqNum()); -acq_req->setFlags(Request::ACQUIRE); +acq_req->setCacheCoherenceFlags(Request::INV_L1); // set protocol-specific flags setExtraRequestFlags(acq_req); diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc index 2787e42..1da5a45 100644 --- a/src/gpu-compute/compute_unit.cc +++ b/src/gpu-compute/compute_unit.cc @@ -805,9 +805,9 @@ // here (simdId=-1, wfSlotId=-1) if (gpuDynInst->isKernelLaunch()) { // for kernel launch, the original request must be both kernel-type -// and acquire +// and INV_L1 assert(pkt->req->isKernel()); -assert(pkt->req->isAcquire()); +assert(pkt->req->isInvL1()); // one D-Cache inv is done, decrement counter dispatcher.updateInvCounter(gpuDynInst->kern_id); @@ -820,16 +820,19 @@ // retrieve wavefront from inst Wavefront *w = gpuDynInst->wavefront(); -// Check if we are waiting on Kernel End Release +// Check if we are waiting on Kernel End Flush if (w->getStatus() == Wavefront::S_RETURNING && gpuDynInst->isEndOfKernel()) { // for kernel end, the original request must be both kernel-type -// and release +// and last-level GPU cache should be flushed if it contains +// dirty data. This request may have been quiesced and +// immediately responded to if the GL2 is a write-through / +// read-only cache. assert(pkt->req->isKernel()); -assert(pkt->req->isRelease()); +assert(pkt->req->isGL2CacheFlush()); -// one wb done, decrement counter, and return whether all wbs are -// done for the kernel +// once flush done, decrement counter, and return whether all +// dirty writeback operations are done for the kernel bool isWbDone = dispatcher.updateWbCounter(gpuDynInst->kern_id); // not all wbs are done for the kernel, just release pkt @@ -1218,7 +1221,7 @@ if (kernelMemSync) { if (gpuDynInst->isKernelLaunch()) { -req->setCacheCoherenceFlags(Request::ACQUIRE); +req->setCacheCoherenceFlags(Request::INV_L1); req->setReqInstSeqNum(gpuDynInst->seqNum()); req->setFlags(Request::KERNEL); pkt = new Packet(req, MemCmd::MemSyncReq); @@ -1234,11 +1237,12 @@ schedule(mem_req_event, curTick() + req_tick_latency); } else { - // kernel end release must be enabled + // kernel end flush of GL2 cache may be quiesced by Ruby if the + // GL2 is a read-only cache assert(shader->impl_kern_end_rel); assert(gpuDynInst->isEndOfKernel()); - req->setCacheCoherenceFlags(Request::WB_L2); + req->setCacheCoherenceFlags(Request::FLUSH_L2); req->setReqInstSeqNum(gpuDynInst->seqNum()); req->setFlags(Request::KERNEL); pkt = new Packet(req, MemCmd::MemSyncReq); diff --git a/src/gpu-compute/gpu_dyn_inst.hh b/src/gpu-compute/gpu_dyn_inst.hh index f34eff6..cdb130e 100644 --- a/src/gpu-compute/gpu_dyn_inst.hh +++ b/src/gpu-compute/gpu_dyn_inst.hh @@ -306,7 +306,7 @@ assert(!isEndOfKer
[gem5-dev] Change in gem5/gem5[develop]: gpu-compute,mem-ruby: replace ACQUIRE and RELEASE request flags
Hello Tuan Ta, I'd like you to do a code review. Please visit https://gem5-review.googlesource.com/c/public/gem5/+/32859 to review the following change. Change subject: gpu-compute,mem-ruby: replace ACQUIRE and RELEASE request flags .. gpu-compute,mem-ruby: replace ACQUIRE and RELEASE request flags This patch replaces ACQUIRE and RELEASE flags which are HSA-specific. ACQUIRE flag becomes INV_L1 in VIPER protocol. RELEASE flag is removed. Future protocols may support extra cache coherence flags like INV_L2 and WB_L2. Change-Id: I3d60c9d3625c898f4110a12d81742b6822728533 --- M src/cpu/testers/gpu_ruby_test/GpuWavefront.cc M src/gpu-compute/compute_unit.cc M src/gpu-compute/gpu_dyn_inst.hh M src/mem/request.hh M src/mem/ruby/system/VIPERCoalescer.cc 5 files changed, 31 insertions(+), 25 deletions(-) diff --git a/src/cpu/testers/gpu_ruby_test/GpuWavefront.cc b/src/cpu/testers/gpu_ruby_test/GpuWavefront.cc index 822fb26..7addd72 100644 --- a/src/cpu/testers/gpu_ruby_test/GpuWavefront.cc +++ b/src/cpu/testers/gpu_ruby_test/GpuWavefront.cc @@ -233,7 +233,7 @@ threadId, nullptr); acq_req->setPaddr(0); acq_req->setReqInstSeqNum(tester->getActionSeqNum()); -acq_req->setFlags(Request::ACQUIRE); +acq_req->setCacheCoherenceFlags(Request::INV_L1); // set protocol-specific flags setExtraRequestFlags(acq_req); diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc index 7e0947f..0ad3846 100644 --- a/src/gpu-compute/compute_unit.cc +++ b/src/gpu-compute/compute_unit.cc @@ -798,9 +798,9 @@ // here (simdId=-1, wfSlotId=-1) if (gpuDynInst->isKernelLaunch()) { // for kernel launch, the original request must be both kernel-type -// and acquire +// and INV_L1 assert(pkt->req->isKernel()); -assert(pkt->req->isAcquire()); +assert(pkt->req->isInvL1()); // one D-Cache inv is done, decrement counter dispatcher.updateInvCounter(gpuDynInst->kern_id); @@ -813,16 +813,19 @@ // retrieve wavefront from inst Wavefront *w = gpuDynInst->wavefront(); -// Check if we are waiting on Kernel End Release +// Check if we are waiting on Kernel End Flush if (w->getStatus() == Wavefront::S_RETURNING && gpuDynInst->isEndOfKernel()) { // for kernel end, the original request must be both kernel-type -// and release +// and last-level GPU cache should be flushed if it contains +// dirty data. This request may have been quiesced and +// immediately responded to if the GL2 is a write-through / +// read-only cache. assert(pkt->req->isKernel()); -assert(pkt->req->isRelease()); +assert(pkt->req->isGL2CacheFlush()); -// one wb done, decrement counter, and return whether all wbs are -// done for the kernel +// once flush done, decrement counter, and return whether all +// dirty writeback operations are done for the kernel bool isWbDone = dispatcher.updateWbCounter(gpuDynInst->kern_id); // not all wbs are done for the kernel, just release pkt @@ -1238,7 +1241,7 @@ if (kernelMemSync) { if (gpuDynInst->isKernelLaunch()) { -req->setCacheCoherenceFlags(Request::ACQUIRE); +req->setCacheCoherenceFlags(Request::INV_L1); req->setReqInstSeqNum(gpuDynInst->seqNum()); req->setFlags(Request::KERNEL); pkt = new Packet(req, MemCmd::MemSyncReq); @@ -1254,11 +1257,12 @@ schedule(mem_req_event, curTick() + req_tick_latency); } else { - // kernel end release must be enabled + // kernel end flush of GL2 cache may be quiesced by Ruby if the + // GL2 is a read-only cache assert(shader->impl_kern_end_rel); assert(gpuDynInst->isEndOfKernel()); - req->setCacheCoherenceFlags(Request::WB_L2); + req->setCacheCoherenceFlags(Request::FLUSH_L2); req->setReqInstSeqNum(gpuDynInst->seqNum()); req->setFlags(Request::KERNEL); pkt = new Packet(req, MemCmd::MemSyncReq); diff --git a/src/gpu-compute/gpu_dyn_inst.hh b/src/gpu-compute/gpu_dyn_inst.hh index 3d2fa0d..eb3db5d 100644 --- a/src/gpu-compute/gpu_dyn_inst.hh +++ b/src/gpu-compute/gpu_dyn_inst.hh @@ -305,7 +305,7 @@ assert(!isEndOfKernel()); // must be wbinv inst if not kernel launch/end -req->setCacheCoherenceFlags(Request::ACQUIRE); +req->setCacheCoherenceFlags(Request::INV_L1); } } diff --git a/src/mem/request.hh b/src/mem/request.hh index 7b324dc..668cdef 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -226,7