changeset 38b8b9a97500 in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=38b8b9a97500
description:
MESI: Add queues for stalled requests
This patch adds support for stalling the requests queued up at different
controllers for the MESI CMP directory protocol. Earlier the controllers
would recycle the requests using some fixed latency. This results in
younger requests getting serviced first at times, and can result in
starvation. Instead all the requests that need a particular block to be
in a stable state are moved to a separate queue, where they wait till
that block returns to a stable state and then they are processed.
diffstat:
src/mem/protocol/MESI_CMP_directory-L1cache.sm | 38 +++++++++++++----------
src/mem/protocol/MESI_CMP_directory-L2cache.sm | 42 +++++++++++++++++--------
src/mem/protocol/MESI_CMP_directory-dir.sm | 24 ++++++++++----
3 files changed, 66 insertions(+), 38 deletions(-)
diffs (truncated from 476 to 300 lines):
diff -r 6cc162805986 -r 38b8b9a97500
src/mem/protocol/MESI_CMP_directory-L1cache.sm
--- a/src/mem/protocol/MESI_CMP_directory-L1cache.sm Fri Feb 10 09:52:32
2012 -0600
+++ b/src/mem/protocol/MESI_CMP_directory-L1cache.sm Fri Feb 10 11:05:24
2012 -0600
@@ -136,6 +136,7 @@
void unset_cache_entry();
void set_tbe(TBE a);
void unset_tbe();
+ void wakeUpBuffers(Address a);
// inclusive cache returns L1 entries only
Entry getCacheEntry(Address addr), return_by_pointer="yes" {
@@ -230,7 +231,7 @@
out_port(unblockNetwork_out, ResponseMsg, unblockFromL1Cache);
// Response IntraChip L1 Network - response msg to this L1 cache
- in_port(responseIntraChipL1Network_in, ResponseMsg, responseToL1Cache) {
+ in_port(responseIntraChipL1Network_in, ResponseMsg, responseToL1Cache, rank
= 2) {
if (responseIntraChipL1Network_in.isReady()) {
peek(responseIntraChipL1Network_in, ResponseMsg, block_on="Address") {
assert(in_msg.Destination.isElement(machineID));
@@ -268,7 +269,7 @@
}
// Request InterChip network - request from this L1 cache to the shared L2
- in_port(requestIntraChipL1Network_in, RequestMsg, requestToL1Cache) {
+ in_port(requestIntraChipL1Network_in, RequestMsg, requestToL1Cache, rank =
1) {
if(requestIntraChipL1Network_in.isReady()) {
peek(requestIntraChipL1Network_in, RequestMsg, block_on="Address") {
assert(in_msg.Destination.isElement(machineID));
@@ -293,7 +294,7 @@
}
// Mandatory Queue betweens Node's CPU and it's L1 caches
- in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") {
+ in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...", rank =
0) {
if (mandatoryQueue_in.isReady()) {
peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
@@ -653,9 +654,6 @@
}
}
- action(z_stall, "z", desc="Stall") {
- }
-
action(ff_deallocateL1CacheBlock, "\f", desc="Deallocate L1 cache block.
Sets the cache to not present, allowing a replacement in parallel with a
fetch.") {
if (L1DcacheMemory.isTagPresent(address)) {
L1DcacheMemory.deallocate(address);
@@ -677,12 +675,12 @@
}
}
- action(zz_recycleRequestQueue, "zz", desc="recycle L1 request queue") {
- requestIntraChipL1Network_in.recycle();
+ action(z_stallAndWaitMandatoryQueue, "\z", desc="recycle L1 request queue") {
+ stall_and_wait(mandatoryQueue_in, address);
}
- action(z_recycleMandatoryQueue, "\z", desc="recycle L1 request queue") {
- mandatoryQueue_in.recycle();
+ action(kd_wakeUpDependents, "kd", desc="wake-up dependents") {
+ wakeUpBuffers(address);
}
action(uu_profileInstMiss, "\ui", desc="Profile the demand miss") {
@@ -702,8 +700,8 @@
//*****************************************************
// Transitions for Load/Store/Replacement/WriteBack from transient states
- transition({IS, IM, IS_I, M_I, SM}, {Load, Ifetch, Store, L1_Replacement}) {
- z_recycleMandatoryQueue;
+ transition({IS, IM, IS_I, M_I, SM, SINK_WB_ACK}, {Load, Ifetch, Store,
L1_Replacement}) {
+ z_stallAndWaitMandatoryQueue;
}
// Transitions from Idle
@@ -824,6 +822,7 @@
transition(M_I, WB_Ack, I) {
s_deallocateTBE;
o_popIncomingResponseQueue;
+ kd_wakeUpDependents;
}
transition(M, Inv, I) {
@@ -871,6 +870,7 @@
h_load_hit;
s_deallocateTBE;
o_popIncomingResponseQueue;
+ kd_wakeUpDependents;
}
transition(IS_I, Data_all_Acks, I) {
@@ -878,6 +878,7 @@
h_load_hit;
s_deallocateTBE;
o_popIncomingResponseQueue;
+ kd_wakeUpDependents;
}
transition(IS, DataS_fromL1, S) {
@@ -886,6 +887,7 @@
h_load_hit;
s_deallocateTBE;
o_popIncomingResponseQueue;
+ kd_wakeUpDependents;
}
transition(IS_I, DataS_fromL1, I) {
@@ -894,6 +896,7 @@
h_load_hit;
s_deallocateTBE;
o_popIncomingResponseQueue;
+ kd_wakeUpDependents;
}
// directory is blocked when sending exclusive data
@@ -903,6 +906,7 @@
jj_sendExclusiveUnblock;
s_deallocateTBE;
o_popIncomingResponseQueue;
+ kd_wakeUpDependents;
}
transition(IS, Data_Exclusive, E) {
@@ -911,6 +915,7 @@
jj_sendExclusiveUnblock;
s_deallocateTBE;
o_popIncomingResponseQueue;
+ kd_wakeUpDependents;
}
// Transitions from IM
@@ -931,6 +936,7 @@
jj_sendExclusiveUnblock;
s_deallocateTBE;
o_popIncomingResponseQueue;
+ kd_wakeUpDependents;
}
// transitions from SM
@@ -944,10 +950,7 @@
hh_store_hit;
s_deallocateTBE;
o_popIncomingResponseQueue;
- }
-
- transition(SINK_WB_ACK, {Load, Store, Ifetch, L1_Replacement}){
- z_recycleMandatoryQueue;
+ kd_wakeUpDependents;
}
transition(SINK_WB_ACK, Inv){
@@ -955,8 +958,9 @@
l_popRequestQueue;
}
- transition(SINK_WB_ACK, WB_Ack){
+ transition(SINK_WB_ACK, WB_Ack, I){
s_deallocateTBE;
o_popIncomingResponseQueue;
+ kd_wakeUpDependents;
}
}
diff -r 6cc162805986 -r 38b8b9a97500
src/mem/protocol/MESI_CMP_directory-L2cache.sm
--- a/src/mem/protocol/MESI_CMP_directory-L2cache.sm Fri Feb 10 09:52:32
2012 -0600
+++ b/src/mem/protocol/MESI_CMP_directory-L2cache.sm Fri Feb 10 11:05:24
2012 -0600
@@ -158,6 +158,7 @@
void unset_cache_entry();
void set_tbe(TBE a);
void unset_tbe();
+ void wakeUpBuffers(Address a);
// inclusive cache, returns L2 entries only
Entry getCacheEntry(Address addr), return_by_pointer="yes" {
@@ -283,7 +284,7 @@
out_port(responseIntraChipL2Network_out, ResponseMsg, responseFromL2Cache);
- in_port(L1unblockNetwork_in, ResponseMsg, unblockToL2Cache) {
+ in_port(L1unblockNetwork_in, ResponseMsg, unblockToL2Cache, rank = 2) {
if(L1unblockNetwork_in.isReady()) {
peek(L1unblockNetwork_in, ResponseMsg) {
Entry cache_entry := getCacheEntry(in_msg.Address);
@@ -305,7 +306,7 @@
}
// Response IntraChip L2 Network - response msg to this particular L2 bank
- in_port(responseIntraChipL2Network_in, ResponseMsg, responseToL2Cache) {
+ in_port(responseIntraChipL2Network_in, ResponseMsg, responseToL2Cache, rank
= 1) {
if (responseIntraChipL2Network_in.isReady()) {
peek(responseIntraChipL2Network_in, ResponseMsg) {
// test wether it's from a local L1 or an off chip source
@@ -349,7 +350,7 @@
}
// L1 Request
- in_port(L1RequestIntraChipL2Network_in, RequestMsg, L1RequestToL2Cache) {
+ in_port(L1RequestIntraChipL2Network_in, RequestMsg, L1RequestToL2Cache, rank
= 0) {
if(L1RequestIntraChipL2Network_in.isReady()) {
peek(L1RequestIntraChipL2Network_in, RequestMsg) {
Entry cache_entry := getCacheEntry(in_msg.Address);
@@ -791,14 +792,17 @@
}
}
- action(zz_recycleL1RequestQueue, "zz", desc="recycle L1 request queue") {
- L1RequestIntraChipL2Network_in.recycle();
+ action(zz_stallAndWaitL1RequestQueue, "zz", desc="recycle L1 request queue")
{
+ stall_and_wait(L1RequestIntraChipL2Network_in, address);
}
action(zn_recycleResponseNetwork, "zn", desc="recycle memory request") {
responseIntraChipL2Network_in.recycle();
}
+ action(kd_wakeUpDependents, "kd", desc="wake-up dependents") {
+ wakeUpBuffers(address);
+ }
//*****************************************************
// TRANSITIONS
@@ -820,7 +824,7 @@
}
transition({IM, IS, ISS, SS_MB, M_MB, MT_MB, MT_IIB, MT_IB, MT_SB},
{L2_Replacement, L2_Replacement_clean}) {
- zz_recycleL1RequestQueue;
+ zz_stallAndWaitL1RequestQueue;
}
transition({IM, IS, ISS, SS_MB, M_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, MEM_Inv)
{
@@ -833,7 +837,7 @@
transition({SS_MB, M_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, {L1_GETS,
L1_GET_INSTR, L1_GETX, L1_UPGRADE}) {
- zz_recycleL1RequestQueue;
+ zz_stallAndWaitL1RequestQueue;
}
@@ -885,6 +889,7 @@
e_sendDataToGetSRequestors;
s_deallocateTBE;
o_popIncomingResponseQueue;
+ kd_wakeUpDependents;
}
transition(IM, Mem_Data, MT_MB) {
@@ -902,11 +907,11 @@
}
transition({IS, ISS}, L1_GETX) {
- zz_recycleL1RequestQueue;
+ zz_stallAndWaitL1RequestQueue;
}
transition(IM, {L1_GETX, L1_GETS, L1_GET_INSTR}) {
- zz_recycleL1RequestQueue;
+ zz_stallAndWaitL1RequestQueue;
}
// transitions from SS
@@ -1018,30 +1023,35 @@
// transitions from blocking states
transition(SS_MB, Unblock_Cancel, SS) {
k_popUnblockQueue;
+ kd_wakeUpDependents;
}
transition(MT_MB, Unblock_Cancel, MT) {
k_popUnblockQueue;
+ kd_wakeUpDependents;
}
transition(MT_IB, Unblock_Cancel, MT) {
k_popUnblockQueue;
+ kd_wakeUpDependents;
}
transition(SS_MB, Exclusive_Unblock, MT) {
// update actual directory
mmu_markExclusiveFromUnblock;
k_popUnblockQueue;
+ kd_wakeUpDependents;
}
transition({M_MB, MT_MB}, Exclusive_Unblock, MT) {
// update actual directory
mmu_markExclusiveFromUnblock;
k_popUnblockQueue;
+ kd_wakeUpDependents;
}
transition(MT_IIB, {L1_PUTX, L1_PUTX_old}){
- zz_recycleL1RequestQueue;
+ zz_stallAndWaitL1RequestQueue;
}
transition(MT_IIB, Unblock, MT_IB) {
@@ -1057,16 +1067,18 @@
transition(MT_IB, {WB_Data, WB_Data_clean}, SS) {
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev