changeset ab05e20dc4a7 in /z/repo/m5
details: http://repo.m5sim.org/m5?cmd=changeset;node=ab05e20dc4a7
description:
        cache: Make caches sharing aware and add occupancy stats.
        On the config end, if a shared L2 is created for the system, it is
        parameterized to have n sharers as defined by option.num_cpus. In 
addition to
        making the cache sharing aware so that discriminating tag policies can 
make use
        of context_ids to make decisions, I added an occupancy AverageStat and 
an occ %
        stat to each cache so that you could know which contexts are occupying 
how much
        cache on average, both in terms of blocks and percentage. Note that 
since
        devices have context_id -1, having an array of occ stats that 
correspond to
        each context_id will break here, so in FS mode I add an extra bucket 
for device
        blocks. This bucket is explicitly not added in SE mode in order to not 
only
        avoid ugliness in the stats.txt file, but to avoid broken stats (some 
formulas
        break when a bucket is 0).

diffstat:

20 files changed, 126 insertions(+), 23 deletions(-)
configs/example/se.py                       |    1 
src/mem/cache/BaseCache.py                  |    1 
src/mem/cache/base.cc                       |   15 ++++++--
src/mem/cache/base.hh                       |   48 +++++++++++++++++++++++++--
src/mem/cache/blk.hh                        |    5 ++
src/mem/cache/cache_impl.hh                 |    8 ++--
src/mem/cache/tags/base.cc                  |   15 ++++++++
src/mem/cache/tags/base.hh                  |   10 +++++
src/mem/cache/tags/fa_lru.cc                |   14 +++----
src/mem/cache/tags/fa_lru.hh                |    2 -
src/mem/cache/tags/iic.cc                   |    2 -
src/mem/cache/tags/iic.hh                   |    2 -
src/mem/cache/tags/lru.cc                   |   19 ++++++++++
tests/configs/memtest.py                    |    1 
tests/configs/o3-timing-mp.py               |    1 
tests/configs/simple-atomic-mp.py           |    1 
tests/configs/simple-timing-mp.py           |    1 
tests/configs/tsunami-o3-dual.py            |    1 
tests/configs/tsunami-simple-atomic-dual.py |    1 
tests/configs/tsunami-simple-timing-dual.py |    1 

diffs (truncated from 471 to 300 lines):

diff -r 039202aafc0d -r ab05e20dc4a7 configs/example/se.py
--- a/configs/example/se.py     Tue Feb 23 09:33:18 2010 -0800
+++ b/configs/example/se.py     Tue Feb 23 09:34:22 2010 -0800
@@ -151,6 +151,7 @@
     system.tol2bus = Bus()
     system.l2.cpu_side = system.tol2bus.port
     system.l2.mem_side = system.membus.port
+    system.l2.num_cpus = np
 
 for i in xrange(np):
     if options.caches:
diff -r 039202aafc0d -r ab05e20dc4a7 src/mem/cache/BaseCache.py
--- a/src/mem/cache/BaseCache.py        Tue Feb 23 09:33:18 2010 -0800
+++ b/src/mem/cache/BaseCache.py        Tue Feb 23 09:34:22 2010 -0800
@@ -44,6 +44,7 @@
     prioritizeRequests = Param.Bool(False,
         "always service demand misses first")
     repl = Param.Repl(NULL, "replacement policy")
+    num_cpus =  Param.Int(1, "number of cpus sharing this cache")
     size = Param.MemorySize("capacity in bytes")
     forward_snoops = Param.Bool(True,
         "forward snoops from mem side to cpu side")
diff -r 039202aafc0d -r ab05e20dc4a7 src/mem/cache/base.cc
--- a/src/mem/cache/base.cc     Tue Feb 23 09:33:18 2010 -0800
+++ b/src/mem/cache/base.cc     Tue Feb 23 09:34:22 2010 -0800
@@ -62,7 +62,8 @@
       noTargetMSHR(NULL),
       missCount(p->max_miss_count),
       drainEvent(NULL),
-      addrRange(p->addr_range)
+      addrRange(p->addr_range),
+      _numCpus(p->num_cpus)
 {
 }
 
@@ -148,7 +149,11 @@
         const string &cstr = cmd.toString();
 
         hits[access_idx]
-            .init(maxThreadsPerCPU)
+#if FULL_SYSTEM
+            .init(_numCpus + 1)
+#else
+            .init(_numCpus)
+#endif
             .name(name() + "." + cstr + "_hits")
             .desc("number of " + cstr + " hits")
             .flags(total | nozero | nonan)
@@ -185,7 +190,11 @@
         const string &cstr = cmd.toString();
 
         misses[access_idx]
-            .init(maxThreadsPerCPU)
+#if FULL_SYSTEM
+            .init(_numCpus + 1)
+#else
+            .init(_numCpus)
+#endif
             .name(name() + "." + cstr + "_misses")
             .desc("number of " + cstr + " misses")
             .flags(total | nozero | nonan)
diff -r 039202aafc0d -r ab05e20dc4a7 src/mem/cache/base.hh
--- a/src/mem/cache/base.hh     Tue Feb 23 09:33:18 2010 -0800
+++ b/src/mem/cache/base.hh     Tue Feb 23 09:34:22 2010 -0800
@@ -47,6 +47,7 @@
 #include "base/statistics.hh"
 #include "base/trace.hh"
 #include "base/types.hh"
+#include "config/full_system.hh"
 #include "mem/cache/mshr_queue.hh"
 #include "mem/mem_object.hh"
 #include "mem/packet.hh"
@@ -219,7 +220,11 @@
      * Normally this is all possible memory addresses. */
     Range<Addr> addrRange;
 
+    /** number of cpus sharing this cache - from config file */
+    int _numCpus;
+
   public:
+    int numCpus() { return _numCpus; }
     // Statistics
     /**
      * @addtogroup CacheStatistics
@@ -481,9 +486,25 @@
 
     virtual bool inMissQueue(Addr addr) = 0;
 
-    void incMissCount(PacketPtr pkt)
+    void incMissCount(PacketPtr pkt, int id)
     {
-        misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+
+        if (pkt->cmd == MemCmd::Writeback) {
+            assert(id == -1);
+            misses[pkt->cmdToIndex()][0]++;
+            /* same thing for writeback hits as misses - no context id
+             * available, meanwhile writeback hit/miss stats are not used
+             * in any aggregate hit/miss calculations, so just lump them all
+             * in bucket 0 */
+#if FULL_SYSTEM
+        } else if (id == -1) {
+            // Device accesses have id -1
+            // lump device accesses into their own bucket
+            misses[pkt->cmdToIndex()][_numCpus]++;
+#endif
+        } else {
+            misses[pkt->cmdToIndex()][id % _numCpus]++;
+        }
 
         if (missCount) {
             --missCount;
@@ -491,6 +512,29 @@
                 exitSimLoop("A cache reached the maximum miss count");
         }
     }
+    void incHitCount(PacketPtr pkt, int id)
+    {
+
+        /* Writeback requests don't have a context id associated with
+         * them, so attributing a hit to a -1 context id is obviously a
+         * problem.  I've noticed in the stats that hits are split into
+         * demand and non-demand hits - neither of which include writeback
+         * hits, so here, I'll just put the writeback hits into bucket 0
+         * since it won't mess with any other stats -hsul */
+        if (pkt->cmd == MemCmd::Writeback) {
+            assert(id == -1);
+            hits[pkt->cmdToIndex()][0]++;
+#if FULL_SYSTEM
+        } else if (id == -1) {
+            // Device accesses have id -1
+            // lump device accesses into their own bucket
+            hits[pkt->cmdToIndex()][_numCpus]++;
+#endif
+        } else {
+            /* the % is necessary in case there are switch cpus */
+            hits[pkt->cmdToIndex()][id % _numCpus]++;
+        }
+    }
 
 };
 
diff -r 039202aafc0d -r ab05e20dc4a7 src/mem/cache/blk.hh
--- a/src/mem/cache/blk.hh      Tue Feb 23 09:33:18 2010 -0800
+++ b/src/mem/cache/blk.hh      Tue Feb 23 09:34:22 2010 -0800
@@ -104,6 +104,9 @@
     /** Number of references to this block since it was brought in. */
     int refCount;
 
+    /** holds the context source ID of the requestor for this block. */
+    int contextSrc;
+
   protected:
     /**
      * Represents that the indicated thread context has a "lock" on
@@ -133,7 +136,7 @@
 
     CacheBlk()
         : asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0),
-          set(-1), isTouched(false), refCount(0)
+          set(-1), isTouched(false), refCount(0), contextSrc(-1)
     {}
 
     /**
diff -r 039202aafc0d -r ab05e20dc4a7 src/mem/cache/cache_impl.hh
--- a/src/mem/cache/cache_impl.hh       Tue Feb 23 09:33:18 2010 -0800
+++ b/src/mem/cache/cache_impl.hh       Tue Feb 23 09:34:22 2010 -0800
@@ -277,7 +277,7 @@
 
         if (pkt->needsExclusive() ? blk->isWritable() : blk->isReadable()) {
             // OK to satisfy access
-            hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+            incHitCount(pkt, id);
             satisfyCpuSideRequest(pkt, blk);
             return true;
         }
@@ -297,7 +297,7 @@
             if (blk == NULL) {
                 // no replaceable block available, give up.
                 // writeback will be forwarded to next level.
-                incMissCount(pkt);
+                incMissCount(pkt, id);
                 return false;
             }
             int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1;
@@ -308,11 +308,11 @@
         blk->status |= BlkDirty;
         // nothing else to do; writeback doesn't expect response
         assert(!pkt->needsResponse());
-        hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++;
+        incHitCount(pkt, id);
         return true;
     }
 
-    incMissCount(pkt);
+    incMissCount(pkt, id);
 
     if (blk == NULL && pkt->isLLSC() && pkt->isWrite()) {
         // complete miss on store conditional... just give up now
diff -r 039202aafc0d -r ab05e20dc4a7 src/mem/cache/tags/base.cc
--- a/src/mem/cache/tags/base.cc        Tue Feb 23 09:33:18 2010 -0800
+++ b/src/mem/cache/tags/base.cc        Tue Feb 23 09:34:22 2010 -0800
@@ -87,5 +87,20 @@
         .desc("Cycle when the warmup percentage was hit.")
         ;
 
+    occupancies
+        .init(cache->numCpus())
+        .name(name + ".occ_blocks")
+        .desc("Average occupied blocks per context")
+        .flags(nozero | nonan)
+        ;
+
+    avgOccs
+        .name(name + ".occ_%")
+        .desc("Average percentage of cache occupancy")
+        .flags(nozero)
+        ;
+
+    avgOccs = occupancies / Stats::constant(numBlocks);
+
     registerExitCallback(new BaseTagsCallback(this));
 }
diff -r 039202aafc0d -r ab05e20dc4a7 src/mem/cache/tags/base.hh
--- a/src/mem/cache/tags/base.hh        Tue Feb 23 09:33:18 2010 -0800
+++ b/src/mem/cache/tags/base.hh        Tue Feb 23 09:34:22 2010 -0800
@@ -63,6 +63,9 @@
     /** Marked true when the cache is warmed up. */
     bool warmedUp;
 
+    /** the number of blocks in the cache */
+    unsigned numBlocks;
+
     // Statistics
     /**
      * @addtogroup CacheStatistics
@@ -92,6 +95,13 @@
 
     /** The cycle that the warmup percentage was hit. */
     Stats::Scalar warmupCycle;
+
+    /** Average occupancy of each context/cpu using the cache */
+    Stats::AverageVector occupancies;
+
+    /** Average occ % of each context/cpu using the cache */
+    Stats::Formula avgOccs;
+
     /**
      * @}
      */
diff -r 039202aafc0d -r ab05e20dc4a7 src/mem/cache/tags/fa_lru.cc
--- a/src/mem/cache/tags/fa_lru.cc      Tue Feb 23 09:33:18 2010 -0800
+++ b/src/mem/cache/tags/fa_lru.cc      Tue Feb 23 09:34:22 2010 -0800
@@ -43,8 +43,7 @@
 using namespace std;
 
 FALRU::FALRU(unsigned _blkSize, unsigned _size, unsigned hit_latency)
-    : blkSize(_blkSize), size(_size),
-      numBlks(size/blkSize), hitLatency(hit_latency)
+    : blkSize(_blkSize), size(_size), hitLatency(hit_latency)
 {
     if (!isPowerOf2(blkSize))
         fatal("cache block size (in bytes) `%d' must be a power of two",
@@ -65,23 +64,24 @@
 
     warmedUp = false;
     warmupBound = size/blkSize;
+    numBlocks = size/blkSize;
 
-    blks = new FALRUBlk[numBlks];
+    blks = new FALRUBlk[numBlocks];
     head = &(blks[0]);
-    tail = &(blks[numBlks-1]);
+    tail = &(blks[numBlocks-1]);
 
     head->prev = NULL;
     head->next = &(blks[1]);
     head->inCache = cacheMask;
 
-    tail->prev = &(blks[numBlks-2]);
+    tail->prev = &(blks[numBlocks-2]);
     tail->next = NULL;
     tail->inCache = 0;
 
     unsigned index = (1 << 17) / blkSize;
     unsigned j = 0;
     int flags = cacheMask;
-    for (unsigned i = 1; i < numBlks - 1; i++) {
+    for (unsigned i = 1; i < numBlocks - 1; i++) {
         blks[i].inCache = flags;
         if (i == index - 1){
             cacheBoundaries[j] = &(blks[i]);
@@ -94,7 +94,7 @@
         blks[i].isTouched = false;
     }
     assert(j == numCaches);
-    assert(index == numBlks);
+    assert(index == numBlocks);
     //assert(check());
 }
_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev

Reply via email to