changeset b01a51ff05fa in /z/repo/m5
details: http://repo.m5sim.org/m5?cmd=changeset;node=b01a51ff05fa
description:
        Mem: Fix issue with dirty block being lost when entire block 
transferred to non-cache.

        This change fixes the problem for all the cases we actively use. If you 
want to try
        more creative I/O device attachments (E.g. sharing an L2), this won't 
work. You
        would need another level of caching between the I/O device and the cache
        (which you actually need anyway with our current code to make sure 
writes
        propagate). This is required so that you can mark the cache in between 
as
        top level and it won't try to send ownership of a block to the I/O 
device.
        Asserts have been added that should catch any issues.

diffstat:

 configs/common/Caches.py                    |  3 +++
 src/cpu/o3/fetch_impl.hh                    |  3 +++
 src/dev/io_device.cc                        |  3 +++
 src/mem/cache/BaseCache.py                  |  1 +
 src/mem/cache/base.cc                       |  1 +
 src/mem/cache/base.hh                       |  5 +++++
 src/mem/cache/cache_impl.hh                 |  2 +-
 tests/configs/inorder-timing.py             |  6 +++++-
 tests/configs/memtest.py                    |  1 +
 tests/configs/o3-timing-mp.py               |  1 +
 tests/configs/o3-timing.py                  |  6 +++++-
 tests/configs/pc-simple-atomic.py           |  3 +++
 tests/configs/pc-simple-timing.py           |  1 +
 tests/configs/realview-simple-atomic.py     |  1 +
 tests/configs/realview-simple-timing.py     |  1 +
 tests/configs/simple-atomic-mp.py           |  1 +
 tests/configs/simple-timing-mp.py           |  1 +
 tests/configs/simple-timing.py              |  6 +++++-
 tests/configs/tsunami-o3-dual.py            |  2 ++
 tests/configs/tsunami-o3.py                 |  2 ++
 tests/configs/tsunami-simple-atomic-dual.py |  2 ++
 tests/configs/tsunami-simple-atomic.py      |  2 ++
 tests/configs/tsunami-simple-timing-dual.py |  2 ++
 tests/configs/tsunami-simple-timing.py      |  2 ++
 24 files changed, 54 insertions(+), 4 deletions(-)

diffs (truncated from 368 to 300 lines):

diff -r 9f704aa10eb4 -r b01a51ff05fa configs/common/Caches.py
--- a/configs/common/Caches.py  Thu Mar 17 19:20:19 2011 -0500
+++ b/configs/common/Caches.py  Thu Mar 17 19:20:19 2011 -0500
@@ -34,6 +34,7 @@
     latency = '1ns'
     mshrs = 10
     tgts_per_mshr = 5
+    is_top_level = True
 
 class L2Cache(BaseCache):
     assoc = 8
@@ -49,6 +50,7 @@
     mshrs = 10
     size = '1kB'
     tgts_per_mshr = 12
+    is_top_level = True
 
 class IOCache(BaseCache):
     assoc = 8
@@ -58,3 +60,4 @@
     size = '1kB'
     tgts_per_mshr = 12
     forward_snoops = False
+    is_top_level = True
diff -r 9f704aa10eb4 -r b01a51ff05fa src/cpu/o3/fetch_impl.hh
--- a/src/cpu/o3/fetch_impl.hh  Thu Mar 17 19:20:19 2011 -0500
+++ b/src/cpu/o3/fetch_impl.hh  Thu Mar 17 19:20:19 2011 -0500
@@ -112,6 +112,9 @@
 {
     DPRINTF(Fetch, "Received timing\n");
     if (pkt->isResponse()) {
+        // We shouldn't ever get a block in ownership state
+        assert(!(pkt->memInhibitAsserted() && !pkt->sharedAsserted()));
+
         fetch->processCacheCompletion(pkt);
     }
     //else Snooped a coherence request, just return
diff -r 9f704aa10eb4 -r b01a51ff05fa src/dev/io_device.cc
--- a/src/dev/io_device.cc      Thu Mar 17 19:20:19 2011 -0500
+++ b/src/dev/io_device.cc      Thu Mar 17 19:20:19 2011 -0500
@@ -139,6 +139,9 @@
         assert(pendingCount >= 0);
         assert(state);
 
+        // We shouldn't ever get a block in ownership state
+        assert(!(pkt->memInhibitAsserted() && !pkt->sharedAsserted()));
+
         state->numBytes += pkt->req->getSize();
         assert(state->totBytes >= state->numBytes);
         if (state->totBytes == state->numBytes) {
diff -r 9f704aa10eb4 -r b01a51ff05fa src/mem/cache/BaseCache.py
--- a/src/mem/cache/BaseCache.py        Thu Mar 17 19:20:19 2011 -0500
+++ b/src/mem/cache/BaseCache.py        Thu Mar 17 19:20:19 2011 -0500
@@ -48,6 +48,7 @@
     size = Param.MemorySize("capacity in bytes")
     forward_snoops = Param.Bool(True,
         "forward snoops from mem side to cpu side")
+    is_top_level = Param.Bool(False, "Is this cache at the top level (e.g. 
L1)")
     subblock_size = Param.Int(0,
         "Size of subblock in IIC used for compression")
     tgts_per_mshr = Param.Int("max number of accesses per MSHR")
diff -r 9f704aa10eb4 -r b01a51ff05fa src/mem/cache/base.cc
--- a/src/mem/cache/base.cc     Thu Mar 17 19:20:19 2011 -0500
+++ b/src/mem/cache/base.cc     Thu Mar 17 19:20:19 2011 -0500
@@ -58,6 +58,7 @@
       hitLatency(p->latency),
       numTarget(p->tgts_per_mshr),
       forwardSnoops(p->forward_snoops),
+      isTopLevel(p->is_top_level),
       blocked(0),
       noTargetMSHR(NULL),
       missCount(p->max_miss_count),
diff -r 9f704aa10eb4 -r b01a51ff05fa src/mem/cache/base.hh
--- a/src/mem/cache/base.hh     Thu Mar 17 19:20:19 2011 -0500
+++ b/src/mem/cache/base.hh     Thu Mar 17 19:20:19 2011 -0500
@@ -194,6 +194,11 @@
     /** Do we forward snoops from mem side port through to cpu side port? */
     bool forwardSnoops;
 
+    /** Is this cache a toplevel cache (e.g. L1, I/O cache). If so we should
+     * never try to forward ownership and similar optimizations to the cpu
+     * side */
+    bool isTopLevel;
+
     /**
      * Bit vector of the blocking reasons for the access path.
      * @sa #BlockedCause
diff -r 9f704aa10eb4 -r b01a51ff05fa src/mem/cache/cache_impl.hh
--- a/src/mem/cache/cache_impl.hh       Thu Mar 17 19:20:19 2011 -0500
+++ b/src/mem/cache/cache_impl.hh       Thu Mar 17 19:20:19 2011 -0500
@@ -216,7 +216,7 @@
                 
                 if (blk->isDirty()) {
                     // special considerations if we're owner:
-                    if (!deferred_response) {
+                    if (!deferred_response && !isTopLevel) {
                         // if we are responding immediately and can
                         // signal that we're transferring ownership
                         // along with exclusivity, do so
diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/inorder-timing.py
--- a/tests/configs/inorder-timing.py   Thu Mar 17 19:20:19 2011 -0500
+++ b/tests/configs/inorder-timing.py   Thu Mar 17 19:20:19 2011 -0500
@@ -37,8 +37,12 @@
     mshrs = 10
     tgts_per_mshr = 5
 
+class MyL1Cache(MyCache):
+    is_top_level = True
+
 cpu = InOrderCPU(cpu_id=0)
-cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'),
+cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'),
+                              MyL1Cache(size = '256kB'),
                               MyCache(size = '2MB', latency='10ns'))
 
 cpu.clock = '2GHz'
diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/memtest.py
--- a/tests/configs/memtest.py  Thu Mar 17 19:20:19 2011 -0500
+++ b/tests/configs/memtest.py  Thu Mar 17 19:20:19 2011 -0500
@@ -38,6 +38,7 @@
     block_size = 64
     mshrs = 12
     tgts_per_mshr = 8
+    is_top_level = True
 
 # ----------------------
 # Base L2 Cache
diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/o3-timing-mp.py
--- a/tests/configs/o3-timing-mp.py     Thu Mar 17 19:20:19 2011 -0500
+++ b/tests/configs/o3-timing-mp.py     Thu Mar 17 19:20:19 2011 -0500
@@ -39,6 +39,7 @@
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
+    is_top_level = True
 
 # ----------------------
 # Base L2 Cache
diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/o3-timing.py
--- a/tests/configs/o3-timing.py        Thu Mar 17 19:20:19 2011 -0500
+++ b/tests/configs/o3-timing.py        Thu Mar 17 19:20:19 2011 -0500
@@ -37,8 +37,12 @@
     mshrs = 10
     tgts_per_mshr = 5
 
+class MyL1Cache(MyCache):
+    is_top_level = True
+
 cpu = DerivO3CPU(cpu_id=0)
-cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'),
+cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'),
+                              MyL1Cache(size = '256kB'),
                               MyCache(size = '2MB'))
 cpu.clock = '2GHz'
 
diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/pc-simple-atomic.py
--- a/tests/configs/pc-simple-atomic.py Thu Mar 17 19:20:19 2011 -0500
+++ b/tests/configs/pc-simple-atomic.py Thu Mar 17 19:20:19 2011 -0500
@@ -43,6 +43,7 @@
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
+    is_top_level = True
 
 # ----------------------
 # Base L2 Cache
@@ -65,6 +66,7 @@
     mshrs = 10
     size = '1kB'
     tgts_per_mshr = 12
+    is_top_level = True
 
 # ---------------------
 # I/O Cache
@@ -78,6 +80,7 @@
     tgts_per_mshr = 12
     addr_range = AddrRange(0, size=mem_size)
     forward_snoops = False
+    is_top_level = True
 
 #cpu
 cpu = AtomicSimpleCPU(cpu_id=0)
diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/pc-simple-timing.py
--- a/tests/configs/pc-simple-timing.py Thu Mar 17 19:20:19 2011 -0500
+++ b/tests/configs/pc-simple-timing.py Thu Mar 17 19:20:19 2011 -0500
@@ -44,6 +44,7 @@
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
+    is_top_level = True
 
 # ----------------------
 # Base L2 Cache
diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/realview-simple-atomic.py
--- a/tests/configs/realview-simple-atomic.py   Thu Mar 17 19:20:19 2011 -0500
+++ b/tests/configs/realview-simple-atomic.py   Thu Mar 17 19:20:19 2011 -0500
@@ -40,6 +40,7 @@
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
+    is_top_level = True
 
 # ----------------------
 # Base L2 Cache
diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/realview-simple-timing.py
--- a/tests/configs/realview-simple-timing.py   Thu Mar 17 19:20:19 2011 -0500
+++ b/tests/configs/realview-simple-timing.py   Thu Mar 17 19:20:19 2011 -0500
@@ -41,6 +41,7 @@
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
+    is_top_level = True
 
 # ----------------------
 # Base L2 Cache
diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/simple-atomic-mp.py
--- a/tests/configs/simple-atomic-mp.py Thu Mar 17 19:20:19 2011 -0500
+++ b/tests/configs/simple-atomic-mp.py Thu Mar 17 19:20:19 2011 -0500
@@ -38,6 +38,7 @@
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
+    is_top_level = True
 
 # ----------------------
 # Base L2 Cache
diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/simple-timing-mp.py
--- a/tests/configs/simple-timing-mp.py Thu Mar 17 19:20:19 2011 -0500
+++ b/tests/configs/simple-timing-mp.py Thu Mar 17 19:20:19 2011 -0500
@@ -38,6 +38,7 @@
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
+    is_top_level = True
 
 # ----------------------
 # Base L2 Cache
diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/simple-timing.py
--- a/tests/configs/simple-timing.py    Thu Mar 17 19:20:19 2011 -0500
+++ b/tests/configs/simple-timing.py    Thu Mar 17 19:20:19 2011 -0500
@@ -36,8 +36,12 @@
     mshrs = 10
     tgts_per_mshr = 5
 
+class MyL1Cache(MyCache):
+    is_top_level = True
+
 cpu = TimingSimpleCPU(cpu_id=0)
-cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'),
+cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'),
+                              MyL1Cache(size = '256kB'),
                               MyCache(size = '2MB', latency='10ns'))
 system = System(cpu = cpu,
                 physmem = PhysicalMemory(),
diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/tsunami-o3-dual.py
--- a/tests/configs/tsunami-o3-dual.py  Thu Mar 17 19:20:19 2011 -0500
+++ b/tests/configs/tsunami-o3-dual.py  Thu Mar 17 19:20:19 2011 -0500
@@ -41,6 +41,7 @@
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
+    is_top_level = True
 
 # ----------------------
 # Base L2 Cache
@@ -65,6 +66,7 @@
     tgts_per_mshr = 12
     addr_range=AddrRange(0, size='8GB')
     forward_snoops = False
+    is_top_level = True
 
 #cpu
 cpus = [ DerivO3CPU(cpu_id=i) for i in xrange(2) ]
diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/tsunami-o3.py
--- a/tests/configs/tsunami-o3.py       Thu Mar 17 19:20:19 2011 -0500
+++ b/tests/configs/tsunami-o3.py       Thu Mar 17 19:20:19 2011 -0500
@@ -41,6 +41,7 @@
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
+    is_top_level = True
 
 # ----------------------
 # Base L2 Cache
@@ -65,6 +66,7 @@
     tgts_per_mshr = 12
     addr_range=AddrRange(0, size='8GB')
     forward_snoops = False
+    is_top_level = True
 
 #cpu
 cpu = DerivO3CPU(cpu_id=0)
diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/tsunami-simple-atomic-dual.py
--- a/tests/configs/tsunami-simple-atomic-dual.py       Thu Mar 17 19:20:19 
2011 -0500
+++ b/tests/configs/tsunami-simple-atomic-dual.py       Thu Mar 17 19:20:19 
2011 -0500
@@ -40,6 +40,7 @@
     block_size = 64
     mshrs = 4
     tgts_per_mshr = 8
+    is_top_level = True
_______________________________________________
m5-dev mailing list
m5-dev@m5sim.org
http://m5sim.org/mailman/listinfo/m5-dev

Reply via email to