Tiago Mück has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/17568

Change subject: mem-ruby: Fix MOESI_CMP_directory blocked line handling
......................................................................

mem-ruby: Fix MOESI_CMP_directory blocked line handling

Using recycle in the L2 controllers to put messages back into the buffer
may lead to starvation when there are many L1 requests for the same line.
This can easily trigger the deadlock detection mechanism in configurations
with many cores (16+). Replacing recycle by stall_and_wait+wakeUpBuffers
for L1 requests avoids this issue.

Change-Id: I28b8aeacc48919ccf38e69653cd9205a4153514b
Signed-off-by: Tiago Muck <[email protected]>
---
M src/mem/protocol/MOESI_CMP_directory-L2cache.sm
1 file changed, 91 insertions(+), 10 deletions(-)



diff --git a/src/mem/protocol/MOESI_CMP_directory-L2cache.sm b/src/mem/protocol/MOESI_CMP_directory-L2cache.sm
index 0c00bd9..c1174f5 100644
--- a/src/mem/protocol/MOESI_CMP_directory-L2cache.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-L2cache.sm
@@ -234,6 +234,9 @@
   void set_tbe(TBE b);
   void unset_tbe();
   MachineID mapAddressToMachine(Addr addr, MachineType mtype);
+  void wakeUpBuffers(Addr a);
+  void wakeUpAllBuffers(Addr a);
+  void wakeUpAllBuffers();

   Entry getCacheEntry(Addr address), return_by_pointer="yes" {
     return static_cast(Entry, "pointer", L2cache[address]);
@@ -1537,13 +1540,6 @@
     localDirectory.deallocate(address);
   }

- action(zz_recycleL1RequestQueue, "zz", desc="Send the head of the mandatory queue to the back of the queue.") {
-    peek(L1requestNetwork_in, RequestMsg) {
-      APPEND_TRANSITION_COMMENT(in_msg.Requestor);
-    }
- L1requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
-  }
-
action(zz_recycleRequestQueue, "\zz", desc="Send the head of the mandatory queue to the back of the queue.") {
     peek(requestNetwork_in, RequestMsg) {
       APPEND_TRANSITION_COMMENT(in_msg.Requestor);
@@ -1558,6 +1554,18 @@
responseNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
   }

+ action(st_stallAndWaitL1RequestQueue, "st", desc="Stall and wait on the address") {
+    stall_and_wait(L1requestNetwork_in, address);
+  }
+
+ action(wa_wakeUpDependents, "wa", desc="Wake up any requests waiting for this address") {
+    wakeUpAllBuffers(address);
+  }
+
+  action(wa_wakeUpAllDependents, "waa", desc="wake-up all dependents") {
+    wakeUpAllBuffers();
+  }
+
action(da_sendDmaAckUnblock, "da", desc="Send dma ack to global directory") {
     enqueue(responseNetwork_out, ResponseMsg, response_latency) {
       out_msg.addr := address;
@@ -1576,11 +1584,11 @@
   //*****************************************************

transition({II, IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMIO, OGMIO, IGMIOF, OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_PUTO, L1_PUTS, L1_PUTS_only, L1_PUTX}) {
-    zz_recycleL1RequestQueue;
+    st_stallAndWaitL1RequestQueue;
   }

transition({II, IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMIO, OGMIO, IGMIOF, OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_GETX, L1_GETS}) {
-    zz_recycleL1RequestQueue;
+    st_stallAndWaitL1RequestQueue;
   }

transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, ILXW, OW, SW, OXW, OLSXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, IGMLS, IGMO, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, L2_Replacement) {
@@ -1674,6 +1682,7 @@
     s_deallocateTBE;
     da_sendDmaAckUnblock;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(ILOSX, Fwd_DMA, ILOSXD) {
@@ -1687,6 +1696,7 @@
     s_deallocateTBE;
     da_sendDmaAckUnblock;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(ILO, Fwd_DMA, ILOD) {
@@ -1700,6 +1710,7 @@
     s_deallocateTBE;
     da_sendDmaAckUnblock;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(ILX, Fwd_DMA, ILXD) {
@@ -1713,6 +1724,7 @@
     s_deallocateTBE;
     da_sendDmaAckUnblock;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(ILOX, Fwd_DMA, ILOXD) {
@@ -1726,6 +1738,7 @@
     s_deallocateTBE;
     da_sendDmaAckUnblock;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition({ILOS, ILOSX, ILO, ILX, ILOX, ILXW}, Data) {
@@ -1740,6 +1753,7 @@
     c_sendDataFromTBEToFwdGETS;
     s_deallocateTBE;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(ISFGS, Data, ILOS) {
@@ -1747,6 +1761,7 @@
     c_sendDataFromTBEToFwdGETS;
     s_deallocateTBE;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(IFGS, Data_Exclusive, I) {
@@ -1755,6 +1770,7 @@
     gg_clearLocalSharers;
     s_deallocateTBE;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }


@@ -1771,6 +1787,7 @@
     gg_clearLocalSharers;
     s_deallocateTBE;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition({ILOSX, ILOS}, Fwd_GETX, IFGXX) {
@@ -1801,6 +1818,7 @@
     gg_clearLocalSharers;
     s_deallocateTBE;
     n_popTriggerQueue;
+    wa_wakeUpDependents;
   }


@@ -1879,6 +1897,7 @@
     s_deallocateTBE;
     rr_deallocateL2CacheBlock;
     n_popTriggerQueue;
+    wa_wakeUpDependents;
   }


@@ -1907,6 +1926,7 @@
     ee_sendLocalInv;
     gg_clearLocalSharers;
     m_popRequestQueue;
+    wa_wakeUpDependents;
   }

   transition(SLS, Inv, II) {
@@ -1915,6 +1935,7 @@
     ee_sendLocalInv;
     rr_deallocateL2CacheBlock;
     m_popRequestQueue;
+    wa_wakeUpDependents;
   }

   transition(II, IntAck) {
@@ -1927,6 +1948,7 @@
     e_sendAck;
     s_deallocateTBE;
     n_popTriggerQueue;
+    wa_wakeUpDependents;
   }

   transition(S, Inv, I) {
@@ -1936,6 +1958,7 @@
     s_deallocateTBE;
     rr_deallocateL2CacheBlock;
     m_popRequestQueue;
+    wa_wakeUpDependents;
   }


@@ -1960,6 +1983,7 @@
     g_recordLocalExclusive;
     s_deallocateTBE;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(OLSX, L1_GETS, OLSXS) {
@@ -1972,6 +1996,7 @@
   transition(OLSXS, Unblock, OLSX) {
     g_recordLocalSharer;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   // after this, can't get Fwd_GETX
@@ -2023,31 +2048,37 @@
   transition(IFLOX, Unblock, ILOSX) {
     g_recordLocalSharer;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(IFLS, Unblock, ILS) {
     g_recordLocalSharer;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(IFLOXX, Unblock, ILOSX) {
     g_recordLocalSharer;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(IFLOSX, Unblock, ILOSX) {
     g_recordLocalSharer;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition({IFLOSX, IFLOXX}, Exclusive_Unblock, ILX) {
     g_recordLocalExclusive;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(IFLO, Unblock, ILOS) {
     g_recordLocalSharer;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }


@@ -2066,6 +2097,7 @@
     g_recordLocalExclusive;
     s_deallocateTBE;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   // LOCAL REQUESTS THAT MUST ISSUE
@@ -2129,6 +2161,7 @@
     h_clearIntAcks;
     e_sendAck;
     n_popTriggerQueue;
+    wa_wakeUpDependents;
   }

   // transition(IGMLS, ExtAck, IGMO) {
@@ -2144,6 +2177,7 @@
     m_decrementNumberOfMessagesExt;
     o_checkForExtCompletion;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }


@@ -2217,6 +2251,7 @@
     i_copyDataToTBE;
     c_sendDataFromTBEToFwdGETS;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(OGMIO, Fwd_GETX, OGMIOF) {
@@ -2236,6 +2271,7 @@
     hh_countLocalSharersExceptL1GETXRequestorInTBE;
     c_sendDataFromTBEToFwdGETX;
     n_popTriggerQueue;
+    wa_wakeUpDependents;
   }

   transition(IGMIOF, IntAck) {
@@ -2255,6 +2291,7 @@
     gg_clearLocalSharers;
     c_sendDataFromTBEToFwdGETX;
     n_popTriggerQueue;
+    wa_wakeUpDependents;
   }

   transition(IGMIO, All_Acks, IGMO) {
@@ -2263,12 +2300,14 @@
     k_forwardLocalGETXToLocalOwner;
     e_sendAckToL1RequestorFromTBE;
     n_popTriggerQueue;
+    wa_wakeUpDependents;
   }

   transition(OGMIO, All_Acks, IGMO) {
     ee_issueLocalInvExceptL1RequestorInTBE;
     c_sendDataFromTBEToL1GETX;
     n_popTriggerQueue;
+    wa_wakeUpDependents;
   }

   transition({IGMIO, OGMIO}, Own_GETX) {
@@ -2283,6 +2322,7 @@
     m_decrementNumberOfMessagesExt;
     o_checkForExtCompletion;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition({IGM, IGMIO, OGMIO}, ExtAck) {
@@ -2316,6 +2356,7 @@
     f_sendUnblock;
     s_deallocateTBE;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(IGS, Exclusive_Unblock, ILX) {
@@ -2323,6 +2364,7 @@
     f_sendExclusiveUnblock;
     s_deallocateTBE;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(IGMO, All_Acks) {
@@ -2335,6 +2377,7 @@
     f_sendExclusiveUnblock;
     s_deallocateTBE;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }


@@ -2362,6 +2405,7 @@
   transition(SLSS, Unblock, SLS) {
     g_recordLocalSharer;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }


@@ -2385,6 +2429,7 @@
   transition(OLSS, Unblock, OLS) {
     g_recordLocalSharer;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(IGMO, Fwd_GETX, IGM) {
@@ -2418,6 +2463,7 @@
   transition(MM, Exclusive_Unblock, ILX) {
     g_recordLocalExclusive;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(M, L1_GETS, OO) {
@@ -2441,6 +2487,7 @@
   transition(SS, Unblock, SLS) {
     g_recordLocalSharer;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(O, L1_GETS, OO) {
@@ -2453,6 +2500,7 @@
   transition(OO, Unblock, OLS) {
     g_recordLocalSharer;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(OO, Exclusive_Unblock, ILX) {
@@ -2460,6 +2508,7 @@
     y_copyCacheStateToDir;
     rr_deallocateL2CacheBlock;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }


@@ -2495,11 +2544,13 @@
   transition(ILSW, Unblock, ILS) {
     gg_clearSharerFromL1Response;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(ILOW, Unblock, ILO) {
     gg_clearSharerFromL1Response;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(ILOSX, L1_PUTS_only, ILOXW) {
@@ -2510,6 +2561,7 @@
   transition(ILOXW, Unblock, ILOX) {
     gg_clearSharerFromL1Response;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   // hmmm...keep data or drop.  Just drop for now
@@ -2526,11 +2578,13 @@
   transition(ILOSW, Unblock, ILOS) {
     gg_clearSharerFromL1Response;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(ILOSXW, Unblock, ILOSX) {
     gg_clearSharerFromL1Response;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(SLS, L1_PUTS, SLSW) {
@@ -2546,6 +2600,7 @@
   transition(SW, {Unblock}, S) {
     gg_clearSharerFromL1Response;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(OLS, L1_PUTS, OLSW) {
@@ -2581,16 +2636,19 @@
   transition(OLSXW, {Unblock}, OLSX) {
     gg_clearSharerFromL1Response;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(OW, {Unblock}, O) {
     gg_clearSharerFromL1Response;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(OXW, {Unblock}, M) {
     gg_clearSharerFromL1Response;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(ILX, L1_PUTX, ILXW ) {
@@ -2604,6 +2662,7 @@
     y_copyDirToCacheAndRemove;
     u_writeDataToCache;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   // clean writeback
@@ -2613,11 +2672,13 @@
     y_copyDirToCacheAndRemove;
     u_writeDataToCache;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(ILXW, Unblock, ILX) {
     // writeback canceled because L1 invalidated
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(ILSW, L1_WBCLEANDATA, SLS) {
@@ -2626,6 +2687,7 @@
     u_writeDataToCache;
     gg_clearSharerFromL1Response;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(IW, L1_WBCLEANDATA, S) {
@@ -2634,7 +2696,7 @@
     u_writeDataToCache;
     gg_clearSharerFromL1Response;
     n_popResponseQueue;
-
+    wa_wakeUpDependents;
   }

   // Owner can have dirty data
@@ -2644,6 +2706,7 @@
     gg_clearOwnerFromL1Response;
     u_writeDataToCache;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(ILOXW, L1_WBDIRTYDATA, M) {
@@ -2652,6 +2715,7 @@
     gg_clearOwnerFromL1Response;
     u_writeDataToCache;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(ILOXW, L1_WBCLEANDATA, M) {
@@ -2660,6 +2724,7 @@
     gg_clearOwnerFromL1Response;
     u_writeDataToCache;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(ILOSW, {L1_WBCLEANDATA, L1_WBDIRTYDATA}, OLS) {
@@ -2668,6 +2733,7 @@
     gg_clearOwnerFromL1Response;
     u_writeDataToCache;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(ILOSXW, {L1_WBCLEANDATA, L1_WBDIRTYDATA}, OLSX) {
@@ -2676,23 +2742,27 @@
     gg_clearOwnerFromL1Response;
     u_writeDataToCache;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }


   transition(SLSW, {Unblock}, SLS) {
     gg_clearSharerFromL1Response;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }

   transition(OLSW, {Unblock}, OLS) {
     gg_clearSharerFromL1Response;
     n_popResponseQueue;
+    wa_wakeUpDependents;
   }


   // L2 WRITEBACKS
   transition({I, S}, L2_Replacement, I) {
     rr_deallocateL2CacheBlock;
+    wa_wakeUpAllDependents;
   }

   transition(ILS, L2_Replacement) {
@@ -2713,6 +2783,7 @@
   transition(SLS, L2_Replacement, ILS) {
     y_copyCacheStateToDir;
     rr_deallocateL2CacheBlock;
+    wa_wakeUpAllDependents;
   }

   transition({OLS, OLSX}, L2_Replacement, OLSI) {
@@ -2720,6 +2791,7 @@
     b_issuePUTO_ls;
     i_allocateTBE;
     rr_deallocateL2CacheBlock;
+    wa_wakeUpAllDependents;
   }


@@ -2727,12 +2799,14 @@
     b_issuePUTO;
     i_allocateTBE;
     rr_deallocateL2CacheBlock;
+    wa_wakeUpAllDependents;
   }

   transition(M, L2_Replacement, MI) {
     b_issuePUTX;
     i_allocateTBE;
     rr_deallocateL2CacheBlock;
+    wa_wakeUpAllDependents;
   }

   transition(OLSI, Fwd_GETX, ILSI) {
@@ -2751,6 +2825,7 @@
     gg_clearLocalSharers;
     c_sendDataFromTBEToFwdGETX;
     n_popTriggerQueue;
+    wa_wakeUpDependents;
   }

   transition(OLSI, Fwd_GETS) {
@@ -2787,33 +2862,39 @@
     qq_sendDataFromTBEToMemory;
     s_deallocateTBE;
     m_popRequestQueue;
+    wa_wakeUpDependents;
   }

   transition(MII, Writeback_Nack, I) {
     s_deallocateTBE;
     m_popRequestQueue;
+    wa_wakeUpDependents;
   }

   transition(OI, Writeback_Nack) {
     b_issuePUTO;
     m_popRequestQueue;
+    wa_wakeUpDependents;
   }

   transition(OLSI, Writeback_Ack, ILS) {
     qq_sendDataFromTBEToMemory;
     s_deallocateTBE;
     m_popRequestQueue;
+    wa_wakeUpDependents;
   }

   transition(MII, Writeback_Ack, I) {
     f_sendUnblock;
     s_deallocateTBE;
     m_popRequestQueue;
+    wa_wakeUpDependents;
   }

   transition(ILSI, Writeback_Ack, ILS) {
     f_sendUnblock;
     s_deallocateTBE;
     m_popRequestQueue;
+    wa_wakeUpDependents;
   }
 }

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/17568
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: I28b8aeacc48919ccf38e69653cd9205a4153514b
Gerrit-Change-Number: 17568
Gerrit-PatchSet: 1
Gerrit-Owner: Tiago Mück <[email protected]>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev

Reply via email to