changeset f39c2cc0d44e in /z/repo/gem5
details: http://repo.gem5.org/gem5?cmd=changeset;node=f39c2cc0d44e
description:
        mem: Make the coherent crossbar account for timing snoops

        This patch introduces the concept of a snoop latency. Given the
        requirement to snoop and forward packets in zero time (due to the
        coherency mechanism), the latency is accounted for later.

        On a snoop, we establish the latency, and later add it to the header
        delay of the packet. To allow multiple caches to contribute to the
        snoop latency, we use a separate variable in the packet, and then take
        the maximum before adding it to the header delay.

diffstat:

 src/mem/cache/cache.cc   |  36 +++++++++++++++++++++++++++++-------
 src/mem/cache/cache.hh   |  17 ++++++++++++-----
 src/mem/coherent_xbar.cc |  16 ++++++++++++++++
 src/mem/packet.hh        |  13 +++++++++++--
 4 files changed, 68 insertions(+), 14 deletions(-)

diffs (224 lines):

diff -r 823a6aa11fbd -r f39c2cc0d44e src/mem/cache/cache.cc
--- a/src/mem/cache/cache.cc    Fri Sep 25 06:45:52 2015 -0400
+++ b/src/mem/cache/cache.cc    Fri Sep 25 07:13:54 2015 -0400
@@ -1730,7 +1730,7 @@
     memSidePort->schedTimingSnoopResp(pkt, forward_time, true);
 }
 
-void
+uint32_t
 Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
                    bool is_deferred, bool pending_inval)
 {
@@ -1748,6 +1748,8 @@
     bool invalidate = pkt->isInvalidate();
     bool M5_VAR_USED needs_exclusive = pkt->needsExclusive();
 
+    uint32_t snoop_delay = 0;
+
     if (forwardSnoops) {
         // first propagate snoop upward to see if anyone above us wants to
         // handle it.  save & restore packet src since it will get
@@ -1765,6 +1767,12 @@
             // time
             snoopPkt.headerDelay = snoopPkt.payloadDelay = 0;
             cpuSidePort->sendTimingSnoopReq(&snoopPkt);
+
+            // add the header delay (including crossbar and snoop
+            // delays) of the upward snoop to the snoop delay for this
+            // cache
+            snoop_delay += snoopPkt.headerDelay;
+
             if (snoopPkt.memInhibitAsserted()) {
                 // cache-to-cache response from some upper cache
                 assert(!alreadyResponded);
@@ -1796,7 +1804,7 @@
     if (!blk || !blk->isValid()) {
         DPRINTF(Cache, "%s snoop miss for %s addr %#llx size %d\n",
                 __func__, pkt->cmdString(), pkt->getAddr(), pkt->getSize());
-        return;
+        return snoop_delay;
     } else {
        DPRINTF(Cache, "%s snoop hit for %s for addr %#llx size %d, "
                "old state is %s\n", __func__, pkt->cmdString(),
@@ -1824,7 +1832,7 @@
         DPRINTF(Cache, "Found addr %#llx in upper level cache for snoop %s 
from"
                 " lower cache\n", pkt->getAddr(), pkt->cmdString());
         pkt->setBlockCached();
-        return;
+        return snoop_delay;
     }
 
     if (!pkt->req->isUncacheable() && pkt->isRead() && !invalidate) {
@@ -1884,6 +1892,8 @@
     }
 
     DPRINTF(Cache, "new state is %s\n", blk->print());
+
+    return snoop_delay;
 }
 
 
@@ -1907,6 +1917,14 @@
     Addr blk_addr = blockAlign(pkt->getAddr());
     MSHR *mshr = mshrQueue.findMatch(blk_addr, is_secure);
 
+    // Update the latency cost of the snoop so that the crossbar can
+    // account for it. Do not overwrite what other neighbouring caches
+    // have already done, rather take the maximum. The update is
+    // tentative, for cases where we return before an upward snoop
+    // happens below.
+    pkt->snoopDelay = std::max<uint32_t>(pkt->snoopDelay,
+                                         lookupLatency * clockPeriod());
+
     // Inform request(Prefetch, CleanEvict or Writeback) from below of
     // MSHR hit, set setBlockCached.
     if (mshr && pkt->mustCheckAbove()) {
@@ -2004,7 +2022,12 @@
     // We could be more selective and return here if the
     // request is non-exclusive or if the writeback is
     // exclusive.
-    handleSnoop(pkt, blk, true, false, false);
+    uint32_t snoop_delay = handleSnoop(pkt, blk, true, false, false);
+
+    // Override what we did when we first saw the snoop, as we now
+    // also have the cost of the upwards snoops to account for
+    pkt->snoopDelay = std::max<uint32_t>(pkt->snoopDelay, snoop_delay +
+                                         lookupLatency * clockPeriod());
 }
 
 bool
@@ -2030,9 +2053,8 @@
     }
 
     CacheBlk *blk = tags->findBlock(pkt->getAddr(), pkt->isSecure());
-    handleSnoop(pkt, blk, false, false, false);
-    // We consider forwardLatency here because a snoop occurs in atomic mode
-    return forwardLatency * clockPeriod();
+    uint32_t snoop_delay = handleSnoop(pkt, blk, false, false, false);
+    return snoop_delay + lookupLatency * clockPeriod();
 }
 
 
diff -r 823a6aa11fbd -r f39c2cc0d44e src/mem/cache/cache.hh
--- a/src/mem/cache/cache.hh    Fri Sep 25 06:45:52 2015 -0400
+++ b/src/mem/cache/cache.hh    Fri Sep 25 07:13:54 2015 -0400
@@ -300,12 +300,19 @@
                                 bool already_copied, bool pending_inval);
 
     /**
-     * Sets the blk to the new state.
-     * @param blk The cache block being snooped.
-     * @param new_state The new coherence state for the block.
+     * Perform an upward snoop if needed, and update the block state
+     * (possibly invalidating the block). Also create a response if required.
+     *
+     * @param pkt Snoop packet
+     * @param blk Cache block being snooped
+     * @param is_timing Timing or atomic for the response
+     * @param is_deferred Is this a deferred snoop or not?
+     * @param pending_inval Do we have a pending invalidation?
+     *
+     * @return The snoop delay incurred by the upwards snoop
      */
-    void handleSnoop(PacketPtr ptk, CacheBlk *blk,
-                     bool is_timing, bool is_deferred, bool pending_inval);
+    uint32_t handleSnoop(PacketPtr pkt, CacheBlk *blk,
+                         bool is_timing, bool is_deferred, bool pending_inval);
 
     /**
      * Create a writeback request for the given block.
diff -r 823a6aa11fbd -r f39c2cc0d44e src/mem/coherent_xbar.cc
--- a/src/mem/coherent_xbar.cc  Fri Sep 25 06:45:52 2015 -0400
+++ b/src/mem/coherent_xbar.cc  Fri Sep 25 07:13:54 2015 -0400
@@ -187,6 +187,8 @@
     Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
 
     if (!system->bypassCaches()) {
+        assert(pkt->snoopDelay == 0);
+
         // the packet is a memory-mapped request and should be
         // broadcasted to our snoopers but the source
         if (snoopFilter) {
@@ -204,6 +206,10 @@
         } else {
             forwardTiming(pkt, slave_port_id);
         }
+
+        // add the snoop delay to our header delay, and then reset it
+        pkt->headerDelay += pkt->snoopDelay;
+        pkt->snoopDelay = 0;
     }
 
     // forwardTiming snooped into peer caches of the sender, and if
@@ -377,9 +383,15 @@
     // we should only see express snoops from caches
     assert(pkt->isExpressSnoop());
 
+    // set the packet header and payload delay, for now use forward latency
+    // @todo Assess the choice of latency further
+    calcPacketTiming(pkt, forwardLatency * clockPeriod());
+
     // remeber if the packet is inhibited so we can see if it changes
     const bool is_inhibited = pkt->memInhibitAsserted();
 
+    assert(pkt->snoopDelay == 0);
+
     if (snoopFilter) {
         // let the Snoop Filter work its magic and guide probing
         auto sf_res = snoopFilter->lookupSnoop(pkt);
@@ -398,6 +410,10 @@
         forwardTiming(pkt, InvalidPortID);
     }
 
+    // add the snoop delay to our header delay, and then reset it
+    pkt->headerDelay += pkt->snoopDelay;
+    pkt->snoopDelay = 0;
+
     // if we can expect a response, remember how to route it
     if (!is_inhibited && pkt->memInhibitAsserted()) {
         assert(routeTo.find(pkt->req) == routeTo.end());
diff -r 823a6aa11fbd -r f39c2cc0d44e src/mem/packet.hh
--- a/src/mem/packet.hh Fri Sep 25 06:45:52 2015 -0400
+++ b/src/mem/packet.hh Fri Sep 25 07:13:54 2015 -0400
@@ -324,6 +324,14 @@
     uint32_t headerDelay;
 
     /**
+     * Keep track of the extra delay incurred by snooping upwards
+     * before sending a request down the memory system. This is used
+     * by the coherent crossbar to account for the additional request
+     * delay.
+     */
+    uint32_t snoopDelay;
+
+    /**
      * The extra pipelining delay from seeing the packet until the end of
      * payload is transmitted by the component that provided it (if
      * any). This includes the header delay. Similar to the header
@@ -582,7 +590,7 @@
      */
     Packet(const RequestPtr _req, MemCmd _cmd)
         :  cmd(_cmd), req(_req), data(nullptr), addr(0), _isSecure(false),
-           size(0), headerDelay(0), payloadDelay(0),
+           size(0), headerDelay(0), snoopDelay(0), payloadDelay(0),
            senderState(NULL)
     {
         if (req->hasPaddr()) {
@@ -603,7 +611,7 @@
      */
     Packet(const RequestPtr _req, MemCmd _cmd, int _blkSize)
         :  cmd(_cmd), req(_req), data(nullptr), addr(0), _isSecure(false),
-           headerDelay(0), payloadDelay(0),
+           headerDelay(0), snoopDelay(0), payloadDelay(0),
            senderState(NULL)
     {
         if (req->hasPaddr()) {
@@ -628,6 +636,7 @@
            addr(pkt->addr), _isSecure(pkt->_isSecure), size(pkt->size),
            bytesValid(pkt->bytesValid),
            headerDelay(pkt->headerDelay),
+           snoopDelay(0),
            payloadDelay(pkt->payloadDelay),
            senderState(pkt->senderState)
     {
_______________________________________________
gem5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/gem5-dev

Reply via email to