changeset 96a602c5368d in /z/repo/m5
details: http://repo.m5sim.org/m5?cmd=changeset;node=96a602c5368d
description:
        ruby: Added merge GETS optimization to hammer

        Added an optimization that merges multiple pending GETS requests into a
        single request to the owner node.

diffstat:

 src/mem/protocol/MOESI_hammer-cache.sm |   75 ++++++++++++++++++-
 src/mem/protocol/MOESI_hammer-dir.sm   |  132 +++++++++++++++++++++++++++++++-
 src/mem/protocol/MOESI_hammer-msg.sm   |    4 +
 3 files changed, 203 insertions(+), 8 deletions(-)

diffs (truncated from 442 to 300 lines):

diff -r f895258c9121 -r 96a602c5368d src/mem/protocol/MOESI_hammer-cache.sm
--- a/src/mem/protocol/MOESI_hammer-cache.sm    Fri Aug 20 11:46:14 2010 -0700
+++ b/src/mem/protocol/MOESI_hammer-cache.sm    Fri Aug 20 11:46:14 2010 -0700
@@ -95,6 +95,7 @@
     // Requests
     Other_GETX,      desc="A GetX from another processor";
     Other_GETS,      desc="A GetS from another processor";
+    Merged_GETS,     desc="A Merged GetS from another processor";
     Other_GETS_No_Mig, desc="A GetS from another processor";
     Invalidate,      desc="Invalidate block";
 
@@ -136,6 +137,7 @@
     int NumPendingMsgs,      desc="Number of acks/data messages that this 
processor is waiting for";
     bool Sharers,            desc="On a GetS, did we find any other sharers in 
the system";
     MachineID LastResponder, desc="last machine to send a response for this 
request";
+    MachineID CurOwner,      desc="current owner of the block, used for 
UnblockS responses";
     Time InitialRequestTime, default="0", desc="time the initial requests was 
sent from the L1Cache";
     Time ForwardRequestTime, default="0", desc="time the dir forwarded the 
request";
     Time FirstResponseTime, default="0", desc="the time the first response was 
received";
@@ -286,6 +288,8 @@
       peek(forwardToCache_in, RequestMsg, block_on="Address") {
         if (in_msg.Type == CoherenceRequestType:GETX) {
           trigger(Event:Other_GETX, in_msg.Address);
+        } else if (in_msg.Type == CoherenceRequestType:MERGED_GETS) {
+          trigger(Event:Merged_GETS, in_msg.Address);
         } else if (in_msg.Type == CoherenceRequestType:GETS) {
           if (isCacheTagPresent(in_msg.Address)) {
             if (getCacheEntry(in_msg.Address).AtomicAccessed && no_mig_atomic) 
{
@@ -518,6 +522,24 @@
     }
   }
   
+  action(em_sendDataSharedMultiple, "em", desc="Send data from cache to all 
requestors") {
+    peek(forwardToCache_in, RequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, 
latency=cache_response_latency) {
+        out_msg.Address := address;
+        out_msg.Type := CoherenceResponseType:DATA_SHARED;
+        out_msg.Sender := machineID;
+        out_msg.Destination := in_msg.MergedRequestors;
+        out_msg.DataBlk := getCacheEntry(address).DataBlk;
+        DEBUG_EXPR(out_msg.DataBlk);
+        out_msg.Dirty := getCacheEntry(address).Dirty;
+        out_msg.Acks := machineCount(MachineType:L1Cache);
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+        out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+        out_msg.ForwardRequestTime := in_msg.ForwardRequestTime;
+      }
+    }
+  }
+  
   action(f_sendAck, "f", desc="Send ack from cache to requestor") {
     peek(forwardToCache_in, RequestMsg) {
       enqueue(responseNetwork_out, ResponseMsg, 
latency=cache_response_latency) {
@@ -575,6 +597,7 @@
       out_msg.Address := address;
       out_msg.Type := CoherenceResponseType:UNBLOCKS;
       out_msg.Sender := machineID;
+      out_msg.CurOwner := TBEs[address].CurOwner;
       out_msg.Destination.add(map_Address_to_Directory(address));
       out_msg.MessageSize := MessageSizeType:Unblock_Control;
     }
@@ -690,6 +713,11 @@
       }
     }
   }
+  action(uo_updateCurrentOwner, "uo", desc="When moving SS state, update 
current owner.") {
+    peek(responseToCache_in, ResponseMsg) {
+      TBEs[address].CurOwner := in_msg.Sender;
+    }
+  }
 
   action(n_popResponseQueue, "n", desc="Pop response queue") {
     responseToCache_in.dequeue();
@@ -745,6 +773,24 @@
     }
   }
 
+  action(qm_sendDataFromTBEToCache, "qm", desc="Send data from TBE to cache, 
multiple sharers") {
+    peek(forwardToCache_in, RequestMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, 
latency=cache_response_latency) {
+        out_msg.Address := address;
+        out_msg.Type := CoherenceResponseType:DATA;
+        out_msg.Sender := machineID;
+        out_msg.Destination := in_msg.MergedRequestors;
+        DEBUG_EXPR(out_msg.Destination);
+        out_msg.DataBlk := TBEs[address].DataBlk;
+        out_msg.Dirty := TBEs[address].Dirty;
+        out_msg.Acks := machineCount(MachineType:L1Cache);
+        out_msg.MessageSize := MessageSizeType:Response_Data;
+        out_msg.InitialRequestTime := in_msg.InitialRequestTime;
+        out_msg.ForwardRequestTime := in_msg.ForwardRequestTime;
+      }
+    }
+  }
+
   action(qq_sendDataFromTBEToMemory, "\q", desc="Send data from TBE to 
memory") {
     enqueue(unblockNetwork_out, ResponseMsg, latency=cache_response_latency) {
       out_msg.Address := address;
@@ -899,7 +945,7 @@
     zz_recycleMandatoryQueue;
   }
 
-  transition({IT, ST, OT, MT, MMT}, {Other_GETX, Other_GETS, 
Other_GETS_No_Mig, Invalidate}) {
+  transition({IT, ST, OT, MT, MMT}, {Other_GETX, Other_GETS, Merged_GETS, 
Other_GETS_No_Mig, Invalidate}) {
     // stall
   }
 
@@ -1111,6 +1157,11 @@
     l_popForwardQueue;
   }
 
+  transition(O, Merged_GETS) {
+    em_sendDataSharedMultiple;
+    l_popForwardQueue;
+  }
+
   // Transitions from Modified
   transition(MM, {Load, Ifetch}) {
     h_load_hit;
@@ -1143,6 +1194,11 @@
     l_popForwardQueue;
   }
   
+  transition(MM, Merged_GETS, O) {
+    em_sendDataSharedMultiple;
+    l_popForwardQueue;
+  }
+ 
   // Transitions from Dirty Exclusive
   transition(M, {Load, Ifetch}) {
     h_load_hit;
@@ -1170,6 +1226,11 @@
     l_popForwardQueue;
   }
 
+  transition(M, Merged_GETS, O) {
+    em_sendDataSharedMultiple;
+    l_popForwardQueue;
+  }
+
   // Transitions from IM
 
   transition(IM, {Other_GETX, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
@@ -1249,6 +1310,11 @@
     l_popForwardQueue;
   }
 
+  transition(OM, Merged_GETS) {
+    em_sendDataSharedMultiple;
+    l_popForwardQueue;
+  }
+
   transition(OM, Ack) {
     m_decrementNumberOfMessages;
     o_checkForCompletion;
@@ -1287,6 +1353,7 @@
     m_decrementNumberOfMessages;
     o_checkForCompletion;
     hx_external_load_hit;
+    uo_updateCurrentOwner;
     n_popResponseQueue;
   }
 
@@ -1304,6 +1371,7 @@
     m_decrementNumberOfMessages;
     o_checkForCompletion;
     hx_external_load_hit;
+    uo_updateCurrentOwner;
     n_popResponseQueue;
   }
 
@@ -1385,6 +1453,11 @@
     l_popForwardQueue;
   }
 
+  transition({OI, MI}, Merged_GETS, OI) {
+    qm_sendDataFromTBEToCache;
+    l_popForwardQueue;
+  }
+
   transition(MI, Writeback_Ack, I) {
     t_sendExclusiveDataFromTBEToMemory;
     s_deallocateTBE;
diff -r f895258c9121 -r 96a602c5368d src/mem/protocol/MOESI_hammer-dir.sm
--- a/src/mem/protocol/MOESI_hammer-dir.sm      Fri Aug 20 11:46:14 2010 -0700
+++ b/src/mem/protocol/MOESI_hammer-dir.sm      Fri Aug 20 11:46:14 2010 -0700
@@ -69,6 +69,9 @@
     NO_R,           desc="Was Not Owner or Sharer, replacing probe filter 
entry";
 
     NO_B,  "NO^B",  desc="Not Owner, Blocked";
+    NO_B_X,  "NO^B",  desc="Not Owner, Blocked, next queued request GETX";
+    NO_B_S,  "NO^B",  desc="Not Owner, Blocked, next queued request GETS";
+    NO_B_S_W,  "NO^B",  desc="Not Owner, Blocked, forwarded merged GETS, 
waiting for responses";
     O_B,   "O^B",   desc="Owner, Blocked";
     NO_B_W,         desc="Not Owner, Blocked, waiting for Dram";
     O_B_W,          desc="Owner, Blocked, waiting for Dram";
@@ -121,6 +124,7 @@
     All_acks_and_shared_data,     desc="Received shared data and message acks";
     All_acks_and_owner_data,     desc="Received shared data and message acks";
     All_acks_and_data_no_sharers, desc="Received all acks and no other 
processor has a shared copy";
+    All_Unblocks, desc="Received all unblocks for a merged gets request";
   }
 
   // TYPES
@@ -148,6 +152,7 @@
     DataBlock DataBlk,     desc="The current view of system memory";
     int Len,               desc="...";
     MachineID DmaRequestor, desc="DMA requestor";
+    NetDest GetSRequestors, desc="GETS merged requestors";
     int NumPendingMsgs,    desc="Number of pending acks/messages";
     bool CacheDirty, default="false", desc="Indicates whether a cache has 
responded with dirty data";
     bool Sharers, default="false", desc="Indicates whether a cache has 
indicated it is currently a sharer";
@@ -243,6 +248,8 @@
           trigger(Event:All_acks_and_shared_data, in_msg.Address);
         } else if (in_msg.Type == TriggerType:ALL_ACKS_NO_SHARERS) {
           trigger(Event:All_acks_and_data_no_sharers, in_msg.Address);
+        } else if (in_msg.Type == TriggerType:ALL_UNBLOCKS) {
+          trigger(Event:All_Unblocks, in_msg.Address);
         } else {
           error("Unexpected message");
         }
@@ -487,6 +494,20 @@
     }
   }
 
+  action(mu_decrementNumberOfUnblocks, "mu", desc="Decrement the number of 
messages for which we're waiting") {
+    peek(unblockNetwork_in, ResponseMsg) {
+      assert(in_msg.Type == CoherenceResponseType:UNBLOCKS);
+      DEBUG_EXPR(TBEs[address].NumPendingMsgs);
+      //
+      // Note that cache data responses will have an ack count of 2.  However, 
+      // directory DMA requests must wait for acks from all LLC caches, so 
+      // only decrement by 1.
+      //
+      TBEs[address].NumPendingMsgs := TBEs[address].NumPendingMsgs - 1;
+      DEBUG_EXPR(TBEs[address].NumPendingMsgs);
+    }
+  }
+
   action(n_popResponseQueue, "n", desc="Pop response queue") {
     responseToDir_in.dequeue();
   }
@@ -508,6 +529,19 @@
     }
   }
 
+  action(os_checkForMergedGetSCompletion, "os", desc="Check for merged GETS 
completion") {
+    if (TBEs[address].NumPendingMsgs == 0) {
+      enqueue(triggerQueue_out, TriggerMsg) {
+        out_msg.Address := address;
+        out_msg.Type := TriggerType:ALL_UNBLOCKS;
+      }
+    }
+  }
+
+  action(sp_setPendingMsgsToMergedSharers, "sp", desc="Set pending messages to 
waiting sharers") {
+    TBEs[address].NumPendingMsgs := TBEs[address].GetSRequestors.count();
+  }
+
   action(spa_setPendingAcksToZeroIfPF, "spa", desc="if probe filter, no need 
to wait for acks") {
     if (probe_filter_enabled) {
       TBEs[address].NumPendingMsgs := 0;
@@ -598,6 +632,12 @@
     }
   }
 
+  action(rs_recordGetSRequestor, "rs", desc="Record GETS requestor in TBE") {
+    peek(requestQueue_in, RequestMsg) {
+      TBEs[address].GetSRequestors.add(in_msg.Requestor);
+    }
+  }
+
   action(r_setSharerBit, "r", desc="We saw other sharers") {
     TBEs[address].Sharers := true;
   }
@@ -694,6 +734,29 @@
     }
   }
 
+  action(fr_forwardMergeReadRequestsToOwner, "frr", desc="Forward coalesced 
read request to owner") {
+    assert(machineCount(MachineType:L1Cache) > 1);
+    //
+    // Fixme! The unblock network should not stall on the forward network.  
Add a trigger queue to
+    // decouple the two.
+    //
+    peek(unblockNetwork_in, ResponseMsg) {
+      enqueue(forwardNetwork_out, RequestMsg, 
latency=memory_controller_latency) {
+        out_msg.Address := address;
+        out_msg.Type := CoherenceRequestType:MERGED_GETS;
+        out_msg.MergedRequestors := TBEs[address].GetSRequestors;
+        if (in_msg.Type == CoherenceResponseType:UNBLOCKS) {
+          out_msg.Destination.add(in_msg.CurOwner);
+        } else {
+          out_msg.Destination.add(in_msg.Sender);
+        }
+        out_msg.MessageSize := MessageSizeType:Request_Control;
+        out_msg.InitialRequestTime := zero_time();
+        out_msg.ForwardRequestTime := get_time();
+      }      
+    }
_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev

Reply via email to