[gem5-dev] [M] Change in gem5/gem5[develop]: gpu-compute, mem-ruby: Update GPU cache bypassing to use TBE

VISHNU RAMADAS (Gerrit) via gem5-dev Thu, 05 Jan 2023 15:39:26 -0800

VISHNU RAMADAS has submitted this change. (https://gem5-review.googlesource.com/c/public/gem5/+/67191?usp=email )


4 is the latest approved patch-set.

No files were changed between the latest approved patch-set and thesubmitted one.)Change subject: gpu-compute, mem-ruby: Update GPU cache bypassing to useTBE

......................................................................

gpu-compute, mem-ruby: Update GPU cache bypassing to use TBE

An earlier commit added support for GLC and SLC AMDGPU instruction
modifiers. These modifiers enable cache bypassing when set. The GLC/SLC
flag information was being threaded through all the way to memory and
back so that appropriate actions could be taken upon receiving a request
and corresponding response. This commit removes the threading and adds
the bypass flag information to TBE. Requests populate this
entry and responses access it to determine the correct set of actions to
execute.

Change-Id: I20ffa6682d109270adb921de078cfd47fb4e137c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67191
Maintainer: Matt Sinclair <mattdsincl...@gmail.com>
Tested-by: kokoro <noreply+kok...@google.com>
Reviewed-by: Matt Sinclair <mattdsincl...@gmail.com>
Reviewed-by: Jason Lowe-Power <power...@gmail.com>
---
M src/mem/ruby/protocol/GPU_VIPER-TCC.sm
M src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
M src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm
3 files changed, 48 insertions(+), 66 deletions(-)

Approvals:

Matt Sinclair: Looks good to me, but someone else must approve; Looksgood to me, approved

  kokoro: Regressions pass
  Jason Lowe-Power: Looks good to me, approved

diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.smb/src/mem/ruby/protocol/GPU_VIPER-TCC.sm

index ae14247..ca4c543 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
@@ -283,7 +283,13 @@
       peek(responseFromNB_in, ResponseMsg, block_on="addr") {
         TBE tbe := TBEs.lookup(in_msg.addr);
         Entry cache_entry := getCacheEntry(in_msg.addr);
-        if (in_msg.isSLCSet) {
+        bool is_slc_set := false;
+
+        if (!is_invalid(tbe)) {
+            is_slc_set := tbe.isSLCSet;
+        }
+
+        if (is_slc_set) {

// If the SLC bit is set, the response needs to bypass thecache

             // and should not be allocated an entry.
             trigger(Event:Bypass, in_msg.addr, cache_entry, tbe);
@@ -343,6 +349,10 @@
                 trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe);
             }
         } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+          // Currently the Atomic requests do not have GLC/SLC bit handing
+          // support. The assert ensures that the requests do not have
+          // these set, and therefore do not expect to bypass the cache
+          assert(!in_msg.isSLCSet);
           trigger(Event:Atomic, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceRequestType:RdBlk) {
           if (in_msg.isSLCSet) {
@@ -399,8 +409,8 @@
       out_msg.State := CoherenceState:Shared;
       DPRINTF(RubySlicc, "%s\n", out_msg);
       peek(responseFromNB_in, ResponseMsg) {
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
+        out_msg.isGLCSet := tbe.isGLCSet;
+        out_msg.isSLCSet := tbe.isSLCSet;
       }
     }
     enqueue(unblockToNB_out, UnblockMsg, 1) {
@@ -408,8 +418,8 @@

out_msg.Destination.add(mapAddressToMachine(address,MachineType:Directory));

       out_msg.MessageSize := MessageSizeType:Unblock_Control;
       peek(responseFromNB_in, ResponseMsg) {
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
+        out_msg.isGLCSet := tbe.isGLCSet;
+        out_msg.isSLCSet := tbe.isSLCSet;
       }
       DPRINTF(RubySlicc, "%s\n", out_msg);
     }
@@ -426,8 +436,8 @@
           out_msg.MessageSize := MessageSizeType:Response_Data;
           out_msg.Dirty := false;
           out_msg.State := CoherenceState:Shared;
-          out_msg.isGLCSet := in_msg.isGLCSet;
-          out_msg.isSLCSet := in_msg.isSLCSet;
+          out_msg.isGLCSet := tbe.isGLCSet;
+          out_msg.isSLCSet := tbe.isSLCSet;
           DPRINTF(RubySlicc, "%s\n", out_msg);
         }
         enqueue(unblockToNB_out, UnblockMsg, 1) {
@@ -449,8 +459,8 @@

out_msg.Destination.add(mapAddressToMachine(address,MachineType:Directory));

           out_msg.Shared := false; // unneeded for this request
           out_msg.MessageSize := in_msg.MessageSize;
-          out_msg.isGLCSet := in_msg.isGLCSet;
-          out_msg.isSLCSet := in_msg.isSLCSet;
+          out_msg.isGLCSet := tbe.isGLCSet;
+          out_msg.isSLCSet := tbe.isSLCSet;
           DPRINTF(RubySlicc, "%s\n", out_msg);
         }
       }
@@ -467,9 +477,6 @@
         out_msg.Sender := machineID;
         out_msg.MessageSize := MessageSizeType:Writeback_Control;
         out_msg.instSeqNum := in_msg.instSeqNum;
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
-
       }
     }
   }
@@ -484,9 +491,6 @@
         out_msg.Sender := machineID;
         out_msg.MessageSize := MessageSizeType:Writeback_Control;
         out_msg.instSeqNum := in_msg.instSeqNum;
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
-
       }
     }
   }
@@ -500,9 +504,8 @@
           out_msg.Sender := machineID;
           out_msg.MessageSize := in_msg.MessageSize;
           out_msg.DataBlk := in_msg.DataBlk;
-          out_msg.isGLCSet := in_msg.isGLCSet;
-          out_msg.isSLCSet := in_msg.isSLCSet;
-
+          out_msg.isGLCSet := tbe.isGLCSet;
+          out_msg.isSLCSet := tbe.isSLCSet;
         }
     }
   }
@@ -535,9 +538,9 @@
       peek(coreRequestNetwork_in, CPURequestMsg) {

if(in_msg.Type == CoherenceRequestType:RdBlk || in_msg.Type ==CoherenceRequestType:Atomic){

           tbe.Destination.add(in_msg.Requestor);
-          tbe.isGLCSet := in_msg.isGLCSet;
-          tbe.isSLCSet := in_msg.isSLCSet;
         }
+        tbe.isGLCSet := in_msg.isGLCSet;
+        tbe.isSLCSet := in_msg.isSLCSet;
       }
     }
   }
@@ -576,8 +579,6 @@
         out_msg.DataBlk := in_msg.DataBlk;
         out_msg.writeMask.orMask(in_msg.writeMask);
         out_msg.instSeqNum := in_msg.instSeqNum;
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
       }
     }
   }
@@ -593,10 +594,6 @@
       out_msg.Dirty := true;
       out_msg.DataBlk := cache_entry.DataBlk;
       out_msg.writeMask.orMask(cache_entry.writeMask);
-      peek(coreRequestNetwork_in, CPURequestMsg) {
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
-      }
     }
   }

@@ -611,8 +608,6 @@
         out_msg.Type := CoherenceRequestType:Atomic;
         out_msg.Dirty := true;
         out_msg.writeMask.orMask(in_msg.writeMask);
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
       }
     }
   }
@@ -628,10 +623,6 @@
       out_msg.Ntsl := true;
       out_msg.State := CoherenceState:NA;
       out_msg.MessageSize := MessageSizeType:Response_Control;
-      peek(probeNetwork_in, NBProbeRequestMsg) {
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
-      }
     }
   }
   action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") {
@@ -676,8 +667,8 @@
         out_msg.addr := address;
         out_msg.Type := TriggerType:AtomicDone;
         peek(responseFromNB_in, ResponseMsg) {
-          out_msg.isGLCSet := in_msg.isGLCSet;
-          out_msg.isSLCSet := in_msg.isSLCSet;
+          out_msg.isGLCSet := tbe.isGLCSet;
+          out_msg.isSLCSet := tbe.isSLCSet;
         }
       }
     }

diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.smb/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm

index 57edef8..3b38e3b 100644
--- a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
+++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
@@ -161,8 +161,6 @@
     uint64_t probe_id,        desc="probe id for lifetime profiling";
     WriteMask writeMask,    desc="outstanding write through mask";
     int Len,            desc="Length of memory request for DMA";
-    bool isGLCSet,      desc="Bypass L1 Cache";
-    bool isSLCSet,      desc="Bypass L1 and L2 Cache";
   }

   structure(TBETable, external="yes") {
@@ -485,8 +483,6 @@
       out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
       out_msg.OriginalResponder := tbe.LastSender;
       out_msg.L3Hit := tbe.L3Hit;
-      out_msg.isGLCSet := tbe.isGLCSet;
-      out_msg.isSLCSet := tbe.isSLCSet;
       DPRINTF(RubySlicc, "%s\n", out_msg);
     }
   }
@@ -516,8 +512,6 @@
         out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
         out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
         out_msg.OriginalResponder := tbe.LastSender;
-        out_msg.isGLCSet := tbe.isGLCSet;
-        out_msg.isSLCSet := tbe.isSLCSet;
         if(tbe.atomicData){
           out_msg.WTRequestor := tbe.WTRequestor;
         }
@@ -546,8 +540,6 @@
         out_msg.InitialRequestTime := tbe.InitialRequestTime;
         out_msg.ForwardRequestTime := curCycle();
         out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
-        out_msg.isGLCSet := tbe.isGLCSet;
-        out_msg.isSLCSet := tbe.isSLCSet;
         DPRINTF(RubySlicc, "%s\n", out_msg);
       }
   }
@@ -565,8 +557,6 @@
         out_msg.ForwardRequestTime := curCycle();
         out_msg.ProbeRequestStartTime := curCycle();
         out_msg.instSeqNum := in_msg.instSeqNum;
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
       }
     }
   }
@@ -579,8 +569,6 @@
         out_msg.Sender := machineID;
         out_msg.MessageSize := MessageSizeType:Writeback_Data;
         out_msg.DataBlk := in_msg.DataBlk;
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
       }
     }
   }
@@ -636,8 +624,6 @@
           out_msg.Type := MemoryRequestType:MEMORY_READ;
           out_msg.Sender := machineID;
           out_msg.MessageSize := MessageSizeType:Request_Control;
-          out_msg.isGLCSet := in_msg.isGLCSet;
-          out_msg.isSLCSet := in_msg.isSLCSet;
         }
       }
     }
@@ -753,8 +739,6 @@
           out_msg.MessageSize := MessageSizeType:Control;
           out_msg.Destination := probe_dests;
           tbe.NumPendingAcks := out_msg.Destination.count();
-          out_msg.isGLCSet := in_msg.isGLCSet;
-          out_msg.isSLCSet := in_msg.isSLCSet;
           DPRINTF(RubySlicc, "%s\n", out_msg);
           APPEND_TRANSITION_COMMENT(" dc: Acks remaining: ");
           APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
@@ -858,8 +842,6 @@
           out_msg.ReturnData := true;
           out_msg.MessageSize := MessageSizeType:Control;
           out_msg.Destination := probe_dests;
-          out_msg.isGLCSet := in_msg.isGLCSet;
-          out_msg.isSLCSet := in_msg.isSLCSet;
           tbe.NumPendingAcks := out_msg.Destination.count();
           DPRINTF(RubySlicc, "%s\n", (out_msg));
           APPEND_TRANSITION_COMMENT(" sc: Acks remaining: ");
@@ -915,8 +897,6 @@
           out_msg.ReturnData := false;
           out_msg.MessageSize := MessageSizeType:Control;
           out_msg.Destination := probe_dests;
-          out_msg.isGLCSet := in_msg.isGLCSet;
-          out_msg.isSLCSet := in_msg.isSLCSet;
           tbe.NumPendingAcks := out_msg.Destination.count();
           APPEND_TRANSITION_COMMENT(" ic: Acks remaining: ");
           APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
@@ -943,8 +923,6 @@
         out_msg.Sender := machineID;
         out_msg.MessageSize := MessageSizeType:Writeback_Data;
         out_msg.DataBlk := in_msg.DataBlk;
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
       }
       if (tbe.Dirty == false) {
           // have to update the TBE, too, because of how this
@@ -1007,8 +985,6 @@
       tbe.NumPendingAcks := 0;
       tbe.Cached := in_msg.ForceShared;
       tbe.InitialRequestTime := in_msg.InitialRequestTime;
-      tbe.isGLCSet := in_msg.isGLCSet;
-      tbe.isSLCSet := in_msg.isSLCSet;
     }
   }

@@ -1028,8 +1004,6 @@
         out_msg.Sender := machineID;
         out_msg.MessageSize := MessageSizeType:Writeback_Data;
         out_msg.DataBlk := tbe.DataBlk;
-        out_msg.isGLCSet := tbe.isGLCSet;
-        out_msg.isSLCSet := tbe.isSLCSet;
         DPRINTF(ProtocolTrace, "%s\n", out_msg);
       }
     }
@@ -1130,8 +1104,6 @@
             out_msg.Sender := machineID;
             out_msg.MessageSize := MessageSizeType:Writeback_Data;
             out_msg.DataBlk := victim_entry.DataBlk;
-            out_msg.isGLCSet := in_msg.isGLCSet;
-            out_msg.isSLCSet := in_msg.isSLCSet;
           }
           L3CacheMemory.deallocate(victim);
         }
@@ -1164,8 +1136,6 @@
             out_msg.Sender := machineID;
             out_msg.MessageSize := MessageSizeType:Writeback_Data;
             out_msg.DataBlk := victim_entry.DataBlk;
-            out_msg.isGLCSet := tbe.isGLCSet;
-            out_msg.isSLCSet := tbe.isSLCSet;
           }
           L3CacheMemory.deallocate(victim);
         }

diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-msg.smb/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm

index 6ff19e9..bb3a013 100644
--- a/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm
+++ b/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm
@@ -168,8 +168,6 @@
   MachineID Requestor,          desc="Requestor id for 3-hop requests";
   bool NoAckNeeded, default="false", desc="For short circuting acks";
   int ProgramCounter,           desc="PC that accesses to this block";
-  bool isGLCSet,                desc="Bypass L1 Cache";
-  bool isSLCSet,                desc="Bypass L1 and L2 Caches";

   bool functionalRead(Packet *pkt) {
     return false;

--

To view, visithttps://gem5-review.googlesource.com/c/public/gem5/+/67191?usp=emailTo unsubscribe, or for help writing mail filters, visithttps://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I20ffa6682d109270adb921de078cfd47fb4e137c
Gerrit-Change-Number: 67191
Gerrit-PatchSet: 6
Gerrit-Owner: VISHNU RAMADAS <vrama...@wisc.edu>
Gerrit-Reviewer: Jason Lowe-Power <ja...@lowepower.com>
Gerrit-Reviewer: Jason Lowe-Power <power...@gmail.com>
Gerrit-Reviewer: Matt Sinclair <mattdsincl...@gmail.com>
Gerrit-Reviewer: Matthew Poremba <matthew.pore...@amd.com>
Gerrit-Reviewer: VISHNU RAMADAS <vrama...@wisc.edu>
Gerrit-Reviewer: kokoro <noreply+kok...@google.com>
Gerrit-MessageType: merged

_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org

[gem5-dev] [M] Change in gem5/gem5[develop]: gpu-compute, mem-ruby: Update GPU cache bypassing to use TBE

Reply via email to