changeset b01a51ff05fa in /z/repo/m5 details: http://repo.m5sim.org/m5?cmd=changeset;node=b01a51ff05fa description: Mem: Fix issue with dirty block being lost when entire block transferred to non-cache.
This change fixes the problem for all the cases we actively use. If you want to try more creative I/O device attachments (E.g. sharing an L2), this won't work. You would need another level of caching between the I/O device and the cache (which you actually need anyway with our current code to make sure writes propagate). This is required so that you can mark the cache in between as top level and it won't try to send ownership of a block to the I/O device. Asserts have been added that should catch any issues. diffstat: configs/common/Caches.py | 3 +++ src/cpu/o3/fetch_impl.hh | 3 +++ src/dev/io_device.cc | 3 +++ src/mem/cache/BaseCache.py | 1 + src/mem/cache/base.cc | 1 + src/mem/cache/base.hh | 5 +++++ src/mem/cache/cache_impl.hh | 2 +- tests/configs/inorder-timing.py | 6 +++++- tests/configs/memtest.py | 1 + tests/configs/o3-timing-mp.py | 1 + tests/configs/o3-timing.py | 6 +++++- tests/configs/pc-simple-atomic.py | 3 +++ tests/configs/pc-simple-timing.py | 1 + tests/configs/realview-simple-atomic.py | 1 + tests/configs/realview-simple-timing.py | 1 + tests/configs/simple-atomic-mp.py | 1 + tests/configs/simple-timing-mp.py | 1 + tests/configs/simple-timing.py | 6 +++++- tests/configs/tsunami-o3-dual.py | 2 ++ tests/configs/tsunami-o3.py | 2 ++ tests/configs/tsunami-simple-atomic-dual.py | 2 ++ tests/configs/tsunami-simple-atomic.py | 2 ++ tests/configs/tsunami-simple-timing-dual.py | 2 ++ tests/configs/tsunami-simple-timing.py | 2 ++ 24 files changed, 54 insertions(+), 4 deletions(-) diffs (truncated from 368 to 300 lines): diff -r 9f704aa10eb4 -r b01a51ff05fa configs/common/Caches.py --- a/configs/common/Caches.py Thu Mar 17 19:20:19 2011 -0500 +++ b/configs/common/Caches.py Thu Mar 17 19:20:19 2011 -0500 @@ -34,6 +34,7 @@ latency = '1ns' mshrs = 10 tgts_per_mshr = 5 + is_top_level = True class L2Cache(BaseCache): assoc = 8 @@ -49,6 +50,7 @@ mshrs = 10 size = '1kB' tgts_per_mshr = 12 + is_top_level = True class IOCache(BaseCache): assoc = 8 @@ -58,3 +60,4 @@ size = '1kB' tgts_per_mshr = 12 forward_snoops = False + is_top_level = True diff -r 9f704aa10eb4 -r b01a51ff05fa src/cpu/o3/fetch_impl.hh --- a/src/cpu/o3/fetch_impl.hh Thu Mar 17 19:20:19 2011 -0500 +++ b/src/cpu/o3/fetch_impl.hh Thu Mar 17 19:20:19 2011 -0500 @@ -112,6 +112,9 @@ { DPRINTF(Fetch, "Received timing\n"); if (pkt->isResponse()) { + // We shouldn't ever get a block in ownership state + assert(!(pkt->memInhibitAsserted() && !pkt->sharedAsserted())); + fetch->processCacheCompletion(pkt); } //else Snooped a coherence request, just return diff -r 9f704aa10eb4 -r b01a51ff05fa src/dev/io_device.cc --- a/src/dev/io_device.cc Thu Mar 17 19:20:19 2011 -0500 +++ b/src/dev/io_device.cc Thu Mar 17 19:20:19 2011 -0500 @@ -139,6 +139,9 @@ assert(pendingCount >= 0); assert(state); + // We shouldn't ever get a block in ownership state + assert(!(pkt->memInhibitAsserted() && !pkt->sharedAsserted())); + state->numBytes += pkt->req->getSize(); assert(state->totBytes >= state->numBytes); if (state->totBytes == state->numBytes) { diff -r 9f704aa10eb4 -r b01a51ff05fa src/mem/cache/BaseCache.py --- a/src/mem/cache/BaseCache.py Thu Mar 17 19:20:19 2011 -0500 +++ b/src/mem/cache/BaseCache.py Thu Mar 17 19:20:19 2011 -0500 @@ -48,6 +48,7 @@ size = Param.MemorySize("capacity in bytes") forward_snoops = Param.Bool(True, "forward snoops from mem side to cpu side") + is_top_level = Param.Bool(False, "Is this cache at the top level (e.g. L1)") subblock_size = Param.Int(0, "Size of subblock in IIC used for compression") tgts_per_mshr = Param.Int("max number of accesses per MSHR") diff -r 9f704aa10eb4 -r b01a51ff05fa src/mem/cache/base.cc --- a/src/mem/cache/base.cc Thu Mar 17 19:20:19 2011 -0500 +++ b/src/mem/cache/base.cc Thu Mar 17 19:20:19 2011 -0500 @@ -58,6 +58,7 @@ hitLatency(p->latency), numTarget(p->tgts_per_mshr), forwardSnoops(p->forward_snoops), + isTopLevel(p->is_top_level), blocked(0), noTargetMSHR(NULL), missCount(p->max_miss_count), diff -r 9f704aa10eb4 -r b01a51ff05fa src/mem/cache/base.hh --- a/src/mem/cache/base.hh Thu Mar 17 19:20:19 2011 -0500 +++ b/src/mem/cache/base.hh Thu Mar 17 19:20:19 2011 -0500 @@ -194,6 +194,11 @@ /** Do we forward snoops from mem side port through to cpu side port? */ bool forwardSnoops; + /** Is this cache a toplevel cache (e.g. L1, I/O cache). If so we should + * never try to forward ownership and similar optimizations to the cpu + * side */ + bool isTopLevel; + /** * Bit vector of the blocking reasons for the access path. * @sa #BlockedCause diff -r 9f704aa10eb4 -r b01a51ff05fa src/mem/cache/cache_impl.hh --- a/src/mem/cache/cache_impl.hh Thu Mar 17 19:20:19 2011 -0500 +++ b/src/mem/cache/cache_impl.hh Thu Mar 17 19:20:19 2011 -0500 @@ -216,7 +216,7 @@ if (blk->isDirty()) { // special considerations if we're owner: - if (!deferred_response) { + if (!deferred_response && !isTopLevel) { // if we are responding immediately and can // signal that we're transferring ownership // along with exclusivity, do so diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/inorder-timing.py --- a/tests/configs/inorder-timing.py Thu Mar 17 19:20:19 2011 -0500 +++ b/tests/configs/inorder-timing.py Thu Mar 17 19:20:19 2011 -0500 @@ -37,8 +37,12 @@ mshrs = 10 tgts_per_mshr = 5 +class MyL1Cache(MyCache): + is_top_level = True + cpu = InOrderCPU(cpu_id=0) -cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'), +cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'), + MyL1Cache(size = '256kB'), MyCache(size = '2MB', latency='10ns')) cpu.clock = '2GHz' diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/memtest.py --- a/tests/configs/memtest.py Thu Mar 17 19:20:19 2011 -0500 +++ b/tests/configs/memtest.py Thu Mar 17 19:20:19 2011 -0500 @@ -38,6 +38,7 @@ block_size = 64 mshrs = 12 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/o3-timing-mp.py --- a/tests/configs/o3-timing-mp.py Thu Mar 17 19:20:19 2011 -0500 +++ b/tests/configs/o3-timing-mp.py Thu Mar 17 19:20:19 2011 -0500 @@ -39,6 +39,7 @@ block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/o3-timing.py --- a/tests/configs/o3-timing.py Thu Mar 17 19:20:19 2011 -0500 +++ b/tests/configs/o3-timing.py Thu Mar 17 19:20:19 2011 -0500 @@ -37,8 +37,12 @@ mshrs = 10 tgts_per_mshr = 5 +class MyL1Cache(MyCache): + is_top_level = True + cpu = DerivO3CPU(cpu_id=0) -cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'), +cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'), + MyL1Cache(size = '256kB'), MyCache(size = '2MB')) cpu.clock = '2GHz' diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/pc-simple-atomic.py --- a/tests/configs/pc-simple-atomic.py Thu Mar 17 19:20:19 2011 -0500 +++ b/tests/configs/pc-simple-atomic.py Thu Mar 17 19:20:19 2011 -0500 @@ -43,6 +43,7 @@ block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache @@ -65,6 +66,7 @@ mshrs = 10 size = '1kB' tgts_per_mshr = 12 + is_top_level = True # --------------------- # I/O Cache @@ -78,6 +80,7 @@ tgts_per_mshr = 12 addr_range = AddrRange(0, size=mem_size) forward_snoops = False + is_top_level = True #cpu cpu = AtomicSimpleCPU(cpu_id=0) diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/pc-simple-timing.py --- a/tests/configs/pc-simple-timing.py Thu Mar 17 19:20:19 2011 -0500 +++ b/tests/configs/pc-simple-timing.py Thu Mar 17 19:20:19 2011 -0500 @@ -44,6 +44,7 @@ block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/realview-simple-atomic.py --- a/tests/configs/realview-simple-atomic.py Thu Mar 17 19:20:19 2011 -0500 +++ b/tests/configs/realview-simple-atomic.py Thu Mar 17 19:20:19 2011 -0500 @@ -40,6 +40,7 @@ block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/realview-simple-timing.py --- a/tests/configs/realview-simple-timing.py Thu Mar 17 19:20:19 2011 -0500 +++ b/tests/configs/realview-simple-timing.py Thu Mar 17 19:20:19 2011 -0500 @@ -41,6 +41,7 @@ block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/simple-atomic-mp.py --- a/tests/configs/simple-atomic-mp.py Thu Mar 17 19:20:19 2011 -0500 +++ b/tests/configs/simple-atomic-mp.py Thu Mar 17 19:20:19 2011 -0500 @@ -38,6 +38,7 @@ block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/simple-timing-mp.py --- a/tests/configs/simple-timing-mp.py Thu Mar 17 19:20:19 2011 -0500 +++ b/tests/configs/simple-timing-mp.py Thu Mar 17 19:20:19 2011 -0500 @@ -38,6 +38,7 @@ block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/simple-timing.py --- a/tests/configs/simple-timing.py Thu Mar 17 19:20:19 2011 -0500 +++ b/tests/configs/simple-timing.py Thu Mar 17 19:20:19 2011 -0500 @@ -36,8 +36,12 @@ mshrs = 10 tgts_per_mshr = 5 +class MyL1Cache(MyCache): + is_top_level = True + cpu = TimingSimpleCPU(cpu_id=0) -cpu.addTwoLevelCacheHierarchy(MyCache(size = '128kB'), MyCache(size = '256kB'), +cpu.addTwoLevelCacheHierarchy(MyL1Cache(size = '128kB'), + MyL1Cache(size = '256kB'), MyCache(size = '2MB', latency='10ns')) system = System(cpu = cpu, physmem = PhysicalMemory(), diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/tsunami-o3-dual.py --- a/tests/configs/tsunami-o3-dual.py Thu Mar 17 19:20:19 2011 -0500 +++ b/tests/configs/tsunami-o3-dual.py Thu Mar 17 19:20:19 2011 -0500 @@ -41,6 +41,7 @@ block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache @@ -65,6 +66,7 @@ tgts_per_mshr = 12 addr_range=AddrRange(0, size='8GB') forward_snoops = False + is_top_level = True #cpu cpus = [ DerivO3CPU(cpu_id=i) for i in xrange(2) ] diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/tsunami-o3.py --- a/tests/configs/tsunami-o3.py Thu Mar 17 19:20:19 2011 -0500 +++ b/tests/configs/tsunami-o3.py Thu Mar 17 19:20:19 2011 -0500 @@ -41,6 +41,7 @@ block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True # ---------------------- # Base L2 Cache @@ -65,6 +66,7 @@ tgts_per_mshr = 12 addr_range=AddrRange(0, size='8GB') forward_snoops = False + is_top_level = True #cpu cpu = DerivO3CPU(cpu_id=0) diff -r 9f704aa10eb4 -r b01a51ff05fa tests/configs/tsunami-simple-atomic-dual.py --- a/tests/configs/tsunami-simple-atomic-dual.py Thu Mar 17 19:20:19 2011 -0500 +++ b/tests/configs/tsunami-simple-atomic-dual.py Thu Mar 17 19:20:19 2011 -0500 @@ -40,6 +40,7 @@ block_size = 64 mshrs = 4 tgts_per_mshr = 8 + is_top_level = True _______________________________________________ m5-dev mailing list m5-dev@m5sim.org http://m5sim.org/mailman/listinfo/m5-dev