[gem5-dev] Change in gem5/gem5[develop]: gpu-compute,mem-ruby: replace ACQUIRE and RELEASE request flags

Bradford Beckmann (Gerrit) via gem5-dev Tue, 18 Aug 2020 17:04:00 -0700

Hello Tuan Ta,

I'd like you to do a code review. Please visit


    https://gem5-review.googlesource.com/c/public/gem5/+/32859

to review the following change.

Change subject: gpu-compute,mem-ruby: replace ACQUIRE and RELEASE requestflags

......................................................................

gpu-compute,mem-ruby: replace ACQUIRE and RELEASE request flags

This patch replaces ACQUIRE and RELEASE flags which are HSA-specific.
ACQUIRE flag becomes INV_L1 in VIPER protocol. RELEASE flag is removed.
Future protocols may support extra cache coherence flags like INV_L2 and
WB_L2.

Change-Id: I3d60c9d3625c898f4110a12d81742b6822728533
---
M src/cpu/testers/gpu_ruby_test/GpuWavefront.cc
M src/gpu-compute/compute_unit.cc
M src/gpu-compute/gpu_dyn_inst.hh
M src/mem/request.hh
M src/mem/ruby/system/VIPERCoalescer.cc
5 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/src/cpu/testers/gpu_ruby_test/GpuWavefront.ccb/src/cpu/testers/gpu_ruby_test/GpuWavefront.cc

index 822fb26..7addd72 100644
--- a/src/cpu/testers/gpu_ruby_test/GpuWavefront.cc
+++ b/src/cpu/testers/gpu_ruby_test/GpuWavefront.cc
@@ -233,7 +233,7 @@
                                              threadId, nullptr);
     acq_req->setPaddr(0);
     acq_req->setReqInstSeqNum(tester->getActionSeqNum());
-    acq_req->setFlags(Request::ACQUIRE);
+    acq_req->setCacheCoherenceFlags(Request::INV_L1);
     // set protocol-specific flags
     setExtraRequestFlags(acq_req);

diff --git a/src/gpu-compute/compute_unit.ccb/src/gpu-compute/compute_unit.cc

index 7e0947f..0ad3846 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -798,9 +798,9 @@
         // here (simdId=-1, wfSlotId=-1)
         if (gpuDynInst->isKernelLaunch()) {

// for kernel launch, the original request must be bothkernel-type

-            // and acquire
+            // and INV_L1
             assert(pkt->req->isKernel());
-            assert(pkt->req->isAcquire());
+            assert(pkt->req->isInvL1());

             // one D-Cache inv is done, decrement counter
             dispatcher.updateInvCounter(gpuDynInst->kern_id);
@@ -813,16 +813,19 @@
         // retrieve wavefront from inst
         Wavefront *w = gpuDynInst->wavefront();

-        // Check if we are waiting on Kernel End Release
+        // Check if we are waiting on Kernel End Flush
         if (w->getStatus() == Wavefront::S_RETURNING
             && gpuDynInst->isEndOfKernel()) {

// for kernel end, the original request must be bothkernel-type

-            // and release
+            // and last-level GPU cache should be flushed if it contains
+            // dirty data.  This request may have been quiesced and
+            // immediately responded to if the GL2 is a write-through /
+            // read-only cache.
             assert(pkt->req->isKernel());
-            assert(pkt->req->isRelease());
+            assert(pkt->req->isGL2CacheFlush());

- // one wb done, decrement counter, and return whether all wbsare

-            // done for the kernel
+            // once flush done, decrement counter, and return whether all
+            // dirty writeback operations are done for the kernel

bool isWbDone =dispatcher.updateWbCounter(gpuDynInst->kern_id);


             // not all wbs are done for the kernel, just release pkt
@@ -1238,7 +1241,7 @@

     if (kernelMemSync) {
         if (gpuDynInst->isKernelLaunch()) {
-            req->setCacheCoherenceFlags(Request::ACQUIRE);
+            req->setCacheCoherenceFlags(Request::INV_L1);
             req->setReqInstSeqNum(gpuDynInst->seqNum());
             req->setFlags(Request::KERNEL);
             pkt = new Packet(req, MemCmd::MemSyncReq);
@@ -1254,11 +1257,12 @@

             schedule(mem_req_event, curTick() + req_tick_latency);
         } else {
-          // kernel end release must be enabled
+          // kernel end flush of GL2 cache may be quiesced by Ruby if the
+          // GL2 is a read-only cache
           assert(shader->impl_kern_end_rel);
           assert(gpuDynInst->isEndOfKernel());

-          req->setCacheCoherenceFlags(Request::WB_L2);
+          req->setCacheCoherenceFlags(Request::FLUSH_L2);
           req->setReqInstSeqNum(gpuDynInst->seqNum());
           req->setFlags(Request::KERNEL);
           pkt = new Packet(req, MemCmd::MemSyncReq);

diff --git a/src/gpu-compute/gpu_dyn_inst.hhb/src/gpu-compute/gpu_dyn_inst.hh

index 3d2fa0d..eb3db5d 100644
--- a/src/gpu-compute/gpu_dyn_inst.hh
+++ b/src/gpu-compute/gpu_dyn_inst.hh
@@ -305,7 +305,7 @@
             assert(!isEndOfKernel());

             // must be wbinv inst if not kernel launch/end
-            req->setCacheCoherenceFlags(Request::ACQUIRE);
+            req->setCacheCoherenceFlags(Request::INV_L1);
         }
     }

diff --git a/src/mem/request.hh b/src/mem/request.hh
index 7b324dc..668cdef 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -226,7 +226,7 @@
      * details.
      *
      * INV_L1: L1 cache invalidation
-     * WB_L2: L2 cache writeback
+     * FLUSH_L2: L2 cache flush
      *
      * SLC: System Level Coherent. Accesses are forced to miss in
      *      the L2 cache and are coherent with system memory.
@@ -242,7 +242,7 @@
     enum : CacheCoherenceFlagsType {
         /** mem_sync_op flags */
         INV_L1                  = 0x00000001,
-        WB_L2                   = 0x00000020,
+        FLUSH_L2                = 0x00000020,
         /** user-policy flags */
         /** user-policy flags */
         SLC_BIT                 = 0x00000080,
@@ -843,11 +843,14 @@
     /**

* Accessor functions for the memory space configuration flags andused by* GPU ISAs such as the Heterogeneous System Architecture (HSA). Notethat

-     * these are for testing only; setting extraFlags should be done via
-     * setCacheCoherenceFlags().
+     * setting extraFlags should be done via setCacheCoherenceFlags().
      */
-    bool isSLC() const { return _cacheCoherenceFlags.isSet(SLC_BIT); }
-    bool isGLC() const { return _cacheCoherenceFlags.isSet(GLC_BIT); }
+    bool isInvL1() const { return _cacheCoherenceFlags.isSet(INV_L1); }
+
+    bool isGL2CacheFlush() const
+    {
+        return _cacheCoherenceFlags.isSet(FLUSH_L2);
+    }

     /**
      * Accessor functions to determine whether this request is part of

diff --git a/src/mem/ruby/system/VIPERCoalescer.ccb/src/mem/ruby/system/VIPERCoalescer.cc

index a21cf33..231c20a 100644
--- a/src/mem/ruby/system/VIPERCoalescer.cc
+++ b/src/mem/ruby/system/VIPERCoalescer.cc
@@ -81,20 +81,19 @@
 VIPERCoalescer::makeRequest(PacketPtr pkt)
 {
     // VIPER only supports following memory request types
-    //    MemSyncReq & Acquire: TCP cache invalidation
+    //    MemSyncReq & INV_L1 : TCP cache invalidation
     //    ReadReq             : cache read
     //    WriteReq            : cache write
     //    AtomicOp            : cache atomic
     //

// VIPER does not expect MemSyncReq & Release since in GCN3, computeunit

     // does not specify an equivalent type of memory request.
-    // TODO: future patches should rename Acquire and Release
-    assert((pkt->cmd == MemCmd::MemSyncReq && pkt->req->isAcquire()) ||
+    assert((pkt->cmd == MemCmd::MemSyncReq && pkt->req->isInvL1()) ||
             pkt->cmd == MemCmd::ReadReq ||
             pkt->cmd == MemCmd::WriteReq ||
             pkt->isAtomicOp());

-    if (pkt->req->isAcquire() && m_cache_inv_pkt) {
+    if (pkt->req->isInvL1() && m_cache_inv_pkt) {
         // In VIPER protocol, the coalescer is not able to handle two or
         // more cache invalidation requests at a time. Cache invalidation
         // requests must be serialized to ensure that all stale data in
@@ -105,8 +104,8 @@

     GPUCoalescer::makeRequest(pkt);

-    if (pkt->req->isAcquire()) {

- // In VIPER protocol, a compute unit sends a MemSyncReq withAcquire

+    if (pkt->req->isInvL1()) {
+        // In VIPER protocol, a compute unit sends a MemSyncReq with INV_L1
         // flag to invalidate TCP. Upon receiving a request of this type,

// VIPERCoalescer starts a cache walk to invalidate all validentries// in TCP. The request is completed once all entries areinvalidated.

@@ -278,7 +277,7 @@
 }

 /**
-  * Invalidate TCP (Acquire)
+  * Invalidate TCP
   */
 void
 VIPERCoalescer::invTCP()

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/32859

To unsubscribe, or for help writing mail filters, visithttps://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I3d60c9d3625c898f4110a12d81742b6822728533
Gerrit-Change-Number: 32859
Gerrit-PatchSet: 1
Gerrit-Owner: Bradford Beckmann <[email protected]>
Gerrit-Reviewer: Tuan Ta <[email protected]>
Gerrit-MessageType: newchange

_______________________________________________
gem5-dev mailing list -- [email protected]
To unsubscribe send an email to [email protected]
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

[gem5-dev] Change in gem5/gem5[develop]: gpu-compute,mem-ruby: replace ACQUIRE and RELEASE request flags

Reply via email to