Tiago Mück has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/17568
Change subject: mem-ruby: Fix MOESI_CMP_directory blocked line handling
......................................................................
mem-ruby: Fix MOESI_CMP_directory blocked line handling
Using recycle in the L2 controllers to put messages back into the buffer
may lead to starvation when there are many L1 requests for the same line.
This can easily trigger the deadlock detection mechanism in configurations
with many cores (16+). Replacing recycle by stall_and_wait+wakeUpBuffers
for L1 requests avoids this issue.
Change-Id: I28b8aeacc48919ccf38e69653cd9205a4153514b
Signed-off-by: Tiago Muck <[email protected]>
---
M src/mem/protocol/MOESI_CMP_directory-L2cache.sm
1 file changed, 91 insertions(+), 10 deletions(-)
diff --git a/src/mem/protocol/MOESI_CMP_directory-L2cache.sm
b/src/mem/protocol/MOESI_CMP_directory-L2cache.sm
index 0c00bd9..c1174f5 100644
--- a/src/mem/protocol/MOESI_CMP_directory-L2cache.sm
+++ b/src/mem/protocol/MOESI_CMP_directory-L2cache.sm
@@ -234,6 +234,9 @@
void set_tbe(TBE b);
void unset_tbe();
MachineID mapAddressToMachine(Addr addr, MachineType mtype);
+ void wakeUpBuffers(Addr a);
+ void wakeUpAllBuffers(Addr a);
+ void wakeUpAllBuffers();
Entry getCacheEntry(Addr address), return_by_pointer="yes" {
return static_cast(Entry, "pointer", L2cache[address]);
@@ -1537,13 +1540,6 @@
localDirectory.deallocate(address);
}
- action(zz_recycleL1RequestQueue, "zz", desc="Send the head of the
mandatory queue to the back of the queue.") {
- peek(L1requestNetwork_in, RequestMsg) {
- APPEND_TRANSITION_COMMENT(in_msg.Requestor);
- }
- L1requestNetwork_in.recycle(clockEdge(),
cyclesToTicks(recycle_latency));
- }
-
action(zz_recycleRequestQueue, "\zz", desc="Send the head of the
mandatory queue to the back of the queue.") {
peek(requestNetwork_in, RequestMsg) {
APPEND_TRANSITION_COMMENT(in_msg.Requestor);
@@ -1558,6 +1554,18 @@
responseNetwork_in.recycle(clockEdge(),
cyclesToTicks(recycle_latency));
}
+ action(st_stallAndWaitL1RequestQueue, "st", desc="Stall and wait on the
address") {
+ stall_and_wait(L1requestNetwork_in, address);
+ }
+
+ action(wa_wakeUpDependents, "wa", desc="Wake up any requests waiting for
this address") {
+ wakeUpAllBuffers(address);
+ }
+
+ action(wa_wakeUpAllDependents, "waa", desc="wake-up all dependents") {
+ wakeUpAllBuffers();
+ }
+
action(da_sendDmaAckUnblock, "da", desc="Send dma ack to global
directory") {
enqueue(responseNetwork_out, ResponseMsg, response_latency) {
out_msg.addr := address;
@@ -1576,11 +1584,11 @@
//*****************************************************
transition({II, IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW,
ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO,
IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMIO, OGMIO, IGMIOF,
OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS,
ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_PUTO, L1_PUTS, L1_PUTS_only,
L1_PUTX}) {
- zz_recycleL1RequestQueue;
+ st_stallAndWaitL1RequestQueue;
}
transition({II, IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW,
ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO,
IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMIO, OGMIO, IGMIOF,
OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS,
ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_GETX, L1_GETS}) {
- zz_recycleL1RequestQueue;
+ st_stallAndWaitL1RequestQueue;
}
transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW,
ILOSXW, SLSW, OLSW, ILSW, IW, ILXW, OW, SW, OXW, OLSXW, IFLS, IFLO, IFLOX,
IFLOXX, IFLOSX,OLSXS, IGS, IGM, IGMLS, IGMO, MM, SS, OO, OI, MI, MII,
OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD},
L2_Replacement) {
@@ -1674,6 +1682,7 @@
s_deallocateTBE;
da_sendDmaAckUnblock;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(ILOSX, Fwd_DMA, ILOSXD) {
@@ -1687,6 +1696,7 @@
s_deallocateTBE;
da_sendDmaAckUnblock;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(ILO, Fwd_DMA, ILOD) {
@@ -1700,6 +1710,7 @@
s_deallocateTBE;
da_sendDmaAckUnblock;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(ILX, Fwd_DMA, ILXD) {
@@ -1713,6 +1724,7 @@
s_deallocateTBE;
da_sendDmaAckUnblock;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(ILOX, Fwd_DMA, ILOXD) {
@@ -1726,6 +1738,7 @@
s_deallocateTBE;
da_sendDmaAckUnblock;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition({ILOS, ILOSX, ILO, ILX, ILOX, ILXW}, Data) {
@@ -1740,6 +1753,7 @@
c_sendDataFromTBEToFwdGETS;
s_deallocateTBE;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(ISFGS, Data, ILOS) {
@@ -1747,6 +1761,7 @@
c_sendDataFromTBEToFwdGETS;
s_deallocateTBE;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(IFGS, Data_Exclusive, I) {
@@ -1755,6 +1770,7 @@
gg_clearLocalSharers;
s_deallocateTBE;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
@@ -1771,6 +1787,7 @@
gg_clearLocalSharers;
s_deallocateTBE;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition({ILOSX, ILOS}, Fwd_GETX, IFGXX) {
@@ -1801,6 +1818,7 @@
gg_clearLocalSharers;
s_deallocateTBE;
n_popTriggerQueue;
+ wa_wakeUpDependents;
}
@@ -1879,6 +1897,7 @@
s_deallocateTBE;
rr_deallocateL2CacheBlock;
n_popTriggerQueue;
+ wa_wakeUpDependents;
}
@@ -1907,6 +1926,7 @@
ee_sendLocalInv;
gg_clearLocalSharers;
m_popRequestQueue;
+ wa_wakeUpDependents;
}
transition(SLS, Inv, II) {
@@ -1915,6 +1935,7 @@
ee_sendLocalInv;
rr_deallocateL2CacheBlock;
m_popRequestQueue;
+ wa_wakeUpDependents;
}
transition(II, IntAck) {
@@ -1927,6 +1948,7 @@
e_sendAck;
s_deallocateTBE;
n_popTriggerQueue;
+ wa_wakeUpDependents;
}
transition(S, Inv, I) {
@@ -1936,6 +1958,7 @@
s_deallocateTBE;
rr_deallocateL2CacheBlock;
m_popRequestQueue;
+ wa_wakeUpDependents;
}
@@ -1960,6 +1983,7 @@
g_recordLocalExclusive;
s_deallocateTBE;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(OLSX, L1_GETS, OLSXS) {
@@ -1972,6 +1996,7 @@
transition(OLSXS, Unblock, OLSX) {
g_recordLocalSharer;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
// after this, can't get Fwd_GETX
@@ -2023,31 +2048,37 @@
transition(IFLOX, Unblock, ILOSX) {
g_recordLocalSharer;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(IFLS, Unblock, ILS) {
g_recordLocalSharer;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(IFLOXX, Unblock, ILOSX) {
g_recordLocalSharer;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(IFLOSX, Unblock, ILOSX) {
g_recordLocalSharer;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition({IFLOSX, IFLOXX}, Exclusive_Unblock, ILX) {
g_recordLocalExclusive;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(IFLO, Unblock, ILOS) {
g_recordLocalSharer;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
@@ -2066,6 +2097,7 @@
g_recordLocalExclusive;
s_deallocateTBE;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
// LOCAL REQUESTS THAT MUST ISSUE
@@ -2129,6 +2161,7 @@
h_clearIntAcks;
e_sendAck;
n_popTriggerQueue;
+ wa_wakeUpDependents;
}
// transition(IGMLS, ExtAck, IGMO) {
@@ -2144,6 +2177,7 @@
m_decrementNumberOfMessagesExt;
o_checkForExtCompletion;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
@@ -2217,6 +2251,7 @@
i_copyDataToTBE;
c_sendDataFromTBEToFwdGETS;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(OGMIO, Fwd_GETX, OGMIOF) {
@@ -2236,6 +2271,7 @@
hh_countLocalSharersExceptL1GETXRequestorInTBE;
c_sendDataFromTBEToFwdGETX;
n_popTriggerQueue;
+ wa_wakeUpDependents;
}
transition(IGMIOF, IntAck) {
@@ -2255,6 +2291,7 @@
gg_clearLocalSharers;
c_sendDataFromTBEToFwdGETX;
n_popTriggerQueue;
+ wa_wakeUpDependents;
}
transition(IGMIO, All_Acks, IGMO) {
@@ -2263,12 +2300,14 @@
k_forwardLocalGETXToLocalOwner;
e_sendAckToL1RequestorFromTBE;
n_popTriggerQueue;
+ wa_wakeUpDependents;
}
transition(OGMIO, All_Acks, IGMO) {
ee_issueLocalInvExceptL1RequestorInTBE;
c_sendDataFromTBEToL1GETX;
n_popTriggerQueue;
+ wa_wakeUpDependents;
}
transition({IGMIO, OGMIO}, Own_GETX) {
@@ -2283,6 +2322,7 @@
m_decrementNumberOfMessagesExt;
o_checkForExtCompletion;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition({IGM, IGMIO, OGMIO}, ExtAck) {
@@ -2316,6 +2356,7 @@
f_sendUnblock;
s_deallocateTBE;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(IGS, Exclusive_Unblock, ILX) {
@@ -2323,6 +2364,7 @@
f_sendExclusiveUnblock;
s_deallocateTBE;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(IGMO, All_Acks) {
@@ -2335,6 +2377,7 @@
f_sendExclusiveUnblock;
s_deallocateTBE;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
@@ -2362,6 +2405,7 @@
transition(SLSS, Unblock, SLS) {
g_recordLocalSharer;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
@@ -2385,6 +2429,7 @@
transition(OLSS, Unblock, OLS) {
g_recordLocalSharer;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(IGMO, Fwd_GETX, IGM) {
@@ -2418,6 +2463,7 @@
transition(MM, Exclusive_Unblock, ILX) {
g_recordLocalExclusive;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(M, L1_GETS, OO) {
@@ -2441,6 +2487,7 @@
transition(SS, Unblock, SLS) {
g_recordLocalSharer;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(O, L1_GETS, OO) {
@@ -2453,6 +2500,7 @@
transition(OO, Unblock, OLS) {
g_recordLocalSharer;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(OO, Exclusive_Unblock, ILX) {
@@ -2460,6 +2508,7 @@
y_copyCacheStateToDir;
rr_deallocateL2CacheBlock;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
@@ -2495,11 +2544,13 @@
transition(ILSW, Unblock, ILS) {
gg_clearSharerFromL1Response;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(ILOW, Unblock, ILO) {
gg_clearSharerFromL1Response;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(ILOSX, L1_PUTS_only, ILOXW) {
@@ -2510,6 +2561,7 @@
transition(ILOXW, Unblock, ILOX) {
gg_clearSharerFromL1Response;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
// hmmm...keep data or drop. Just drop for now
@@ -2526,11 +2578,13 @@
transition(ILOSW, Unblock, ILOS) {
gg_clearSharerFromL1Response;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(ILOSXW, Unblock, ILOSX) {
gg_clearSharerFromL1Response;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(SLS, L1_PUTS, SLSW) {
@@ -2546,6 +2600,7 @@
transition(SW, {Unblock}, S) {
gg_clearSharerFromL1Response;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(OLS, L1_PUTS, OLSW) {
@@ -2581,16 +2636,19 @@
transition(OLSXW, {Unblock}, OLSX) {
gg_clearSharerFromL1Response;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(OW, {Unblock}, O) {
gg_clearSharerFromL1Response;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(OXW, {Unblock}, M) {
gg_clearSharerFromL1Response;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(ILX, L1_PUTX, ILXW ) {
@@ -2604,6 +2662,7 @@
y_copyDirToCacheAndRemove;
u_writeDataToCache;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
// clean writeback
@@ -2613,11 +2672,13 @@
y_copyDirToCacheAndRemove;
u_writeDataToCache;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(ILXW, Unblock, ILX) {
// writeback canceled because L1 invalidated
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(ILSW, L1_WBCLEANDATA, SLS) {
@@ -2626,6 +2687,7 @@
u_writeDataToCache;
gg_clearSharerFromL1Response;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(IW, L1_WBCLEANDATA, S) {
@@ -2634,7 +2696,7 @@
u_writeDataToCache;
gg_clearSharerFromL1Response;
n_popResponseQueue;
-
+ wa_wakeUpDependents;
}
// Owner can have dirty data
@@ -2644,6 +2706,7 @@
gg_clearOwnerFromL1Response;
u_writeDataToCache;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(ILOXW, L1_WBDIRTYDATA, M) {
@@ -2652,6 +2715,7 @@
gg_clearOwnerFromL1Response;
u_writeDataToCache;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(ILOXW, L1_WBCLEANDATA, M) {
@@ -2660,6 +2724,7 @@
gg_clearOwnerFromL1Response;
u_writeDataToCache;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(ILOSW, {L1_WBCLEANDATA, L1_WBDIRTYDATA}, OLS) {
@@ -2668,6 +2733,7 @@
gg_clearOwnerFromL1Response;
u_writeDataToCache;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(ILOSXW, {L1_WBCLEANDATA, L1_WBDIRTYDATA}, OLSX) {
@@ -2676,23 +2742,27 @@
gg_clearOwnerFromL1Response;
u_writeDataToCache;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(SLSW, {Unblock}, SLS) {
gg_clearSharerFromL1Response;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
transition(OLSW, {Unblock}, OLS) {
gg_clearSharerFromL1Response;
n_popResponseQueue;
+ wa_wakeUpDependents;
}
// L2 WRITEBACKS
transition({I, S}, L2_Replacement, I) {
rr_deallocateL2CacheBlock;
+ wa_wakeUpAllDependents;
}
transition(ILS, L2_Replacement) {
@@ -2713,6 +2783,7 @@
transition(SLS, L2_Replacement, ILS) {
y_copyCacheStateToDir;
rr_deallocateL2CacheBlock;
+ wa_wakeUpAllDependents;
}
transition({OLS, OLSX}, L2_Replacement, OLSI) {
@@ -2720,6 +2791,7 @@
b_issuePUTO_ls;
i_allocateTBE;
rr_deallocateL2CacheBlock;
+ wa_wakeUpAllDependents;
}
@@ -2727,12 +2799,14 @@
b_issuePUTO;
i_allocateTBE;
rr_deallocateL2CacheBlock;
+ wa_wakeUpAllDependents;
}
transition(M, L2_Replacement, MI) {
b_issuePUTX;
i_allocateTBE;
rr_deallocateL2CacheBlock;
+ wa_wakeUpAllDependents;
}
transition(OLSI, Fwd_GETX, ILSI) {
@@ -2751,6 +2825,7 @@
gg_clearLocalSharers;
c_sendDataFromTBEToFwdGETX;
n_popTriggerQueue;
+ wa_wakeUpDependents;
}
transition(OLSI, Fwd_GETS) {
@@ -2787,33 +2862,39 @@
qq_sendDataFromTBEToMemory;
s_deallocateTBE;
m_popRequestQueue;
+ wa_wakeUpDependents;
}
transition(MII, Writeback_Nack, I) {
s_deallocateTBE;
m_popRequestQueue;
+ wa_wakeUpDependents;
}
transition(OI, Writeback_Nack) {
b_issuePUTO;
m_popRequestQueue;
+ wa_wakeUpDependents;
}
transition(OLSI, Writeback_Ack, ILS) {
qq_sendDataFromTBEToMemory;
s_deallocateTBE;
m_popRequestQueue;
+ wa_wakeUpDependents;
}
transition(MII, Writeback_Ack, I) {
f_sendUnblock;
s_deallocateTBE;
m_popRequestQueue;
+ wa_wakeUpDependents;
}
transition(ILSI, Writeback_Ack, ILS) {
f_sendUnblock;
s_deallocateTBE;
m_popRequestQueue;
+ wa_wakeUpDependents;
}
}
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/17568
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: I28b8aeacc48919ccf38e69653cd9205a4153514b
Gerrit-Change-Number: 17568
Gerrit-PatchSet: 1
Gerrit-Owner: Tiago Mück <[email protected]>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev