changeset 4c5303395e08 in /z/repo/m5 details: http://repo.m5sim.org/m5?cmd=changeset;node=4c5303395e08 summary: Restructure bus timing calcs to cope with pkt being deleted by target.
changeset 155a82ec17b7 in /z/repo/m5 details: http://repo.m5sim.org/m5?cmd=changeset;node=155a82ec17b7 summary: Fix a few Packet memory leaks. changeset f910b095a3a5 in /z/repo/m5 details: http://repo.m5sim.org/m5?cmd=changeset;node=f910b095a3a5 summary: Update long regression stats for semi-recent cache changes. diffstat: 18 files changed, 270 insertions(+), 249 deletions(-) src/mem/bus.cc | 50 +++---- src/mem/bus.hh | 10 - src/mem/cache/cache_impl.hh | 9 + tests/long/00.gzip/ref/alpha/tru64/o3-timing/config.ini | 1 tests/long/00.gzip/ref/alpha/tru64/o3-timing/m5stats.txt | 76 +++++------ tests/long/00.gzip/ref/alpha/tru64/o3-timing/stderr | 2 tests/long/00.gzip/ref/sparc/linux/o3-timing/config.ini | 1 tests/long/00.gzip/ref/sparc/linux/o3-timing/m5stats.txt | 70 +++++----- tests/long/00.gzip/ref/sparc/linux/o3-timing/stdout | 6 tests/long/30.eon/ref/alpha/tru64/o3-timing/config.ini | 1 tests/long/30.eon/ref/alpha/tru64/o3-timing/m5stats.txt | 68 ++++----- tests/long/50.vortex/ref/alpha/tru64/o3-timing/config.ini | 1 tests/long/50.vortex/ref/alpha/tru64/o3-timing/m5stats.txt | 82 ++++++------ tests/long/50.vortex/ref/alpha/tru64/o3-timing/stderr | 2 tests/long/60.bzip2/ref/alpha/tru64/o3-timing/config.ini | 1 tests/long/60.bzip2/ref/alpha/tru64/o3-timing/m5stats.txt | 70 +++++----- tests/long/70.twolf/ref/alpha/tru64/o3-timing/config.ini | 1 tests/long/70.twolf/ref/alpha/tru64/o3-timing/m5stats.txt | 68 ++++----- diffs (truncated from 1229 to 300 lines): diff -r a79c14353915 -r f910b095a3a5 src/mem/bus.cc --- a/src/mem/bus.cc Sat Mar 15 05:03:55 2008 -0700 +++ b/src/mem/bus.cc Mon Mar 17 23:07:22 2008 -0400 @@ -110,21 +110,22 @@ const char * Bus::BusFreeEvent::descript return "bus became available"; } -void Bus::preparePacket(PacketPtr pkt, Tick & headerTime) -{ - //Bring tickNextIdle up to the present tick - //There is some potential ambiguity where a cycle starts, which might make - //a difference when devices are acting right around a cycle boundary. Using - //a < allows things which happen exactly on a cycle boundary to take up - //only the following cycle. Anything that happens later will have to "wait" - //for the end of that cycle, and then start using the bus after that. +Tick Bus::calcPacketTiming(PacketPtr pkt) +{ + // Bring tickNextIdle up to the present tick. + // There is some potential ambiguity where a cycle starts, which + // might make a difference when devices are acting right around a + // cycle boundary. Using a < allows things which happen exactly on + // a cycle boundary to take up only the following cycle. Anything + // that happens later will have to "wait" for the end of that + // cycle, and then start using the bus after that. if (tickNextIdle < curTick) { tickNextIdle = curTick; if (tickNextIdle % clock != 0) tickNextIdle = curTick - (curTick % clock) + clock; } - headerTime = tickNextIdle + headerCycles * clock; + Tick headerTime = tickNextIdle + headerCycles * clock; // The packet will be sent. Figure out how long it occupies the bus, and // how much of that time is for the first "word", aka bus width. @@ -142,10 +143,17 @@ void Bus::preparePacket(PacketPtr pkt, T pkt->firstWordTime = headerTime + clock; pkt->finishTime = headerTime + numCycles * clock; + + return headerTime; } void Bus::occupyBus(Tick until) { + if (until == 0) { + // shortcut for express snoop packets + return; + } + tickNextIdle = until; if (!busIdle.scheduled()) { @@ -190,11 +198,8 @@ Bus::recvTiming(PacketPtr pkt) DPRINTF(Bus, "recvTiming: src %d dst %d %s 0x%x\n", src, pkt->getDest(), pkt->cmdString(), pkt->getAddr()); - Tick headerTime = 0; - - if (!pkt->isExpressSnoop()) { - preparePacket(pkt, headerTime); - } + Tick headerFinishTime = pkt->isExpressSnoop() ? 0 : calcPacketTiming(pkt); + Tick packetFinishTime = pkt->isExpressSnoop() ? 0 : pkt->finishTime; short dest = pkt->getDest(); int dest_port_id; @@ -243,17 +248,16 @@ Bus::recvTiming(PacketPtr pkt) DPRINTF(Bus, "recvTiming: src %d dst %d %s 0x%x TGT RETRY\n", src, pkt->getDest(), pkt->cmdString(), pkt->getAddr()); addToRetryList(src_port); - if (!pkt->isExpressSnoop()) { - occupyBus(headerTime); - } + occupyBus(headerFinishTime); return false; } - // send OK, fall through - } - - if (!pkt->isExpressSnoop()) { - occupyBus(pkt->finishTime); - } + // send OK, fall through... pkt may have been deleted by + // target at this point, so it should *not* be referenced + // again. We'll set it to NULL here just to be safe. + pkt = NULL; + } + + occupyBus(packetFinishTime); // Packet was successfully sent. // Also take care of retries diff -r a79c14353915 -r f910b095a3a5 src/mem/bus.hh --- a/src/mem/bus.hh Sat Mar 15 05:03:55 2008 -0700 +++ b/src/mem/bus.hh Mon Mar 17 23:07:22 2008 -0400 @@ -245,10 +245,12 @@ class Bus : public MemObject */ void addressRanges(AddrRangeList &resp, bool &snoop, int id); - /** Prepare a packet to be sent on the bus. The header finishes at tick - * headerTime - */ - void preparePacket(PacketPtr pkt, Tick & headerTime); + /** Calculate the timing parameters for the packet. Updates the + * firstWordTime and finishTime fields of the packet object. + * Returns the tick at which the packet header is completed (which + * will be all that is sent if the target rejects the packet). + */ + Tick calcPacketTiming(PacketPtr pkt); /** Occupy the bus until until */ void occupyBus(Tick until); diff -r a79c14353915 -r f910b095a3a5 src/mem/cache/cache_impl.hh --- a/src/mem/cache/cache_impl.hh Sat Mar 15 05:03:55 2008 -0700 +++ b/src/mem/cache/cache_impl.hh Mon Mar 17 23:07:22 2008 -0400 @@ -407,6 +407,9 @@ Cache<TagStore>::timingAccess(PacketPtr memSidePort->sendTiming(snoopPkt); // main memory will delete snoopPkt } + // since we're the official target but we aren't responding, + // delete the packet now. + delete pkt; return true; } @@ -1092,6 +1095,11 @@ Cache<TagStore>::handleSnoop(PacketPtr p pkt->makeAtomicResponse(); pkt->setDataFromBlock(blk->data, blkSize); } + } else if (is_timing && is_deferred) { + // if it's a deferred timing snoop then we've made a copy of + // the packet, and so if we're not using that copy to respond + // then we need to delete it here. + delete pkt; } // Do this last in case it deallocates block data or something @@ -1160,6 +1168,7 @@ Cache<TagStore>::snoopTiming(PacketPtr p if (pkt->isInvalidate()) { // Invalidation trumps our writeback... discard here markInService(mshr); + delete wb_pkt; } // If this was a shared writeback, there may still be diff -r a79c14353915 -r f910b095a3a5 tests/long/00.gzip/ref/alpha/tru64/o3-timing/config.ini --- a/tests/long/00.gzip/ref/alpha/tru64/o3-timing/config.ini Sat Mar 15 05:03:55 2008 -0700 +++ b/tests/long/00.gzip/ref/alpha/tru64/o3-timing/config.ini Mon Mar 17 23:07:22 2008 -0400 @@ -376,6 +376,7 @@ output=cout output=cout pid=100 ppid=99 +simpoint=0 system=system uid=100 diff -r a79c14353915 -r f910b095a3a5 tests/long/00.gzip/ref/alpha/tru64/o3-timing/m5stats.txt --- a/tests/long/00.gzip/ref/alpha/tru64/o3-timing/m5stats.txt Sat Mar 15 05:03:55 2008 -0700 +++ b/tests/long/00.gzip/ref/alpha/tru64/o3-timing/m5stats.txt Mon Mar 17 23:07:22 2008 -0400 @@ -8,10 +8,10 @@ global.BPredUnit.condPredicted global.BPredUnit.condPredicted 70175548 # Number of conditional branches predicted global.BPredUnit.lookups 76112488 # Number of BP lookups global.BPredUnit.usedRAS 1692573 # Number of times the RAS was used to get a target. -host_inst_rate 185893 # Simulator instruction rate (inst/s) -host_mem_usage 223968 # Number of bytes of host memory used -host_seconds 3042.35 # Real time elapsed on the host -host_tick_rate 54375513 # Simulator tick rate (ticks/s) +host_inst_rate 131337 # Simulator instruction rate (inst/s) +host_mem_usage 179084 # Number of bytes of host memory used +host_seconds 4306.11 # Real time elapsed on the host +host_tick_rate 38417331 # Simulator tick rate (ticks/s) memdepunit.memDep.conflictingLoads 21896719 # Number of conflicting loads. memdepunit.memDep.conflictingStores 16284345 # Number of conflicting stores. memdepunit.memDep.insertedLoads 127086189 # Number of loads inserted to the mem dependence unit. @@ -53,61 +53,61 @@ system.cpu.cpi_total system.cpu.cpi_total 0.585019 # CPI: Total CPI of All Threads system.cpu.dcache.LoadLockedReq_accesses 1 # number of LoadLockedReq accesses(hits+misses) system.cpu.dcache.LoadLockedReq_hits 1 # number of LoadLockedReq hits -system.cpu.dcache.ReadReq_accesses 114321557 # number of ReadReq accesses(hits+misses) -system.cpu.dcache.ReadReq_avg_miss_latency 26993.890628 # average ReadReq miss latency +system.cpu.dcache.ReadReq_accesses 115038352 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 6257.587595 # average ReadReq miss latency system.cpu.dcache.ReadReq_avg_mshr_miss_latency 3367.177206 # average ReadReq mshr miss latency system.cpu.dcache.ReadReq_hits 114105250 # number of ReadReq hits system.cpu.dcache.ReadReq_miss_latency 5838967500 # number of ReadReq miss cycles -system.cpu.dcache.ReadReq_miss_rate 0.001892 # miss rate for ReadReq accesses -system.cpu.dcache.ReadReq_misses 216307 # number of ReadReq misses +system.cpu.dcache.ReadReq_miss_rate 0.008111 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 933102 # number of ReadReq misses system.cpu.dcache.ReadReq_mshr_hits 716795 # number of ReadReq MSHR hits system.cpu.dcache.ReadReq_mshr_miss_latency 728344000 # number of ReadReq MSHR miss cycles -system.cpu.dcache.ReadReq_mshr_miss_rate 0.001892 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_miss_rate 0.001880 # mshr miss rate for ReadReq accesses system.cpu.dcache.ReadReq_mshr_misses 216307 # number of ReadReq MSHR misses -system.cpu.dcache.WriteReq_accesses 37579282 # number of WriteReq accesses(hits+misses) -system.cpu.dcache.WriteReq_avg_miss_latency 48790.597140 # average WriteReq miss latency +system.cpu.dcache.WriteReq_accesses 39451321 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_avg_miss_latency 7448.640662 # average WriteReq miss latency system.cpu.dcache.WriteReq_avg_mshr_miss_latency 7159.473367 # average WriteReq mshr miss latency system.cpu.dcache.WriteReq_hits 37241994 # number of WriteReq hits system.cpu.dcache.WriteReq_miss_latency 16456482928 # number of WriteReq miss cycles -system.cpu.dcache.WriteReq_miss_rate 0.008975 # miss rate for WriteReq accesses -system.cpu.dcache.WriteReq_misses 337288 # number of WriteReq misses +system.cpu.dcache.WriteReq_miss_rate 0.056001 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 2209327 # number of WriteReq misses system.cpu.dcache.WriteReq_mshr_hits 1872039 # number of WriteReq MSHR hits system.cpu.dcache.WriteReq_mshr_miss_latency 2414804453 # number of WriteReq MSHR miss cycles -system.cpu.dcache.WriteReq_mshr_miss_rate 0.008975 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_miss_rate 0.008549 # mshr miss rate for WriteReq accesses system.cpu.dcache.WriteReq_mshr_misses 337288 # number of WriteReq MSHR misses system.cpu.dcache.avg_blocked_cycles_no_mshrs 1999.750000 # average number of cycles each access was blocked system.cpu.dcache.avg_blocked_cycles_no_targets 2750 # average number of cycles each access was blocked -system.cpu.dcache.avg_refs 320.196392 # Average number of references to valid blocks. +system.cpu.dcache.avg_refs 321.245700 # Average number of references to valid blocks. system.cpu.dcache.blocked_no_mshrs 4 # number of cycles access was blocked system.cpu.dcache.blocked_no_targets 4 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_mshrs 7999 # number of cycles access was blocked system.cpu.dcache.blocked_cycles_no_targets 11000 # number of cycles access was blocked system.cpu.dcache.cache_copies 0 # number of cache copies performed -system.cpu.dcache.demand_accesses 151900839 # number of demand (read+write) accesses -system.cpu.dcache.demand_avg_miss_latency 40273.937496 # average overall miss latency +system.cpu.dcache.demand_accesses 154489673 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 7094.973483 # average overall miss latency system.cpu.dcache.demand_avg_mshr_miss_latency 5677.703832 # average overall mshr miss latency system.cpu.dcache.demand_hits 151347244 # number of demand (read+write) hits system.cpu.dcache.demand_miss_latency 22295450428 # number of demand (read+write) miss cycles -system.cpu.dcache.demand_miss_rate 0.003644 # miss rate for demand accesses -system.cpu.dcache.demand_misses 553595 # number of demand (read+write) misses +system.cpu.dcache.demand_miss_rate 0.020341 # miss rate for demand accesses +system.cpu.dcache.demand_misses 3142429 # number of demand (read+write) misses system.cpu.dcache.demand_mshr_hits 2588834 # number of demand (read+write) MSHR hits system.cpu.dcache.demand_mshr_miss_latency 3143148453 # number of demand (read+write) MSHR miss cycles -system.cpu.dcache.demand_mshr_miss_rate 0.003644 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_miss_rate 0.003583 # mshr miss rate for demand accesses system.cpu.dcache.demand_mshr_misses 553595 # number of demand (read+write) MSHR misses system.cpu.dcache.fast_writes 0 # number of fast writes performed system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate -system.cpu.dcache.overall_accesses 151900839 # number of overall (read+write) accesses -system.cpu.dcache.overall_avg_miss_latency 40273.937496 # average overall miss latency +system.cpu.dcache.overall_accesses 154489673 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 7094.973483 # average overall miss latency system.cpu.dcache.overall_avg_mshr_miss_latency 5677.703832 # average overall mshr miss latency system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0> # average overall mshr uncacheable latency system.cpu.dcache.overall_hits 151347244 # number of overall hits system.cpu.dcache.overall_miss_latency 22295450428 # number of overall miss cycles -system.cpu.dcache.overall_miss_rate 0.003644 # miss rate for overall accesses -system.cpu.dcache.overall_misses 553595 # number of overall misses +system.cpu.dcache.overall_miss_rate 0.020341 # miss rate for overall accesses +system.cpu.dcache.overall_misses 3142429 # number of overall misses system.cpu.dcache.overall_mshr_hits 2588834 # number of overall MSHR hits system.cpu.dcache.overall_mshr_miss_latency 3143148453 # number of overall MSHR miss cycles -system.cpu.dcache.overall_mshr_miss_rate 0.003644 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_miss_rate 0.003583 # mshr miss rate for overall accesses system.cpu.dcache.overall_mshr_misses 553595 # number of overall MSHR misses system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses @@ -124,7 +124,7 @@ system.cpu.dcache.sampled_refs system.cpu.dcache.sampled_refs 472922 # Sample count of references to valid blocks. system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions system.cpu.dcache.tagsinuse 4095.170465 # Cycle average of tags in use -system.cpu.dcache.total_refs 151427918 # Total number of references to valid blocks. +system.cpu.dcache.total_refs 151924159 # Total number of references to valid blocks. system.cpu.dcache.warmup_cycle 50285000 # Cycle when the warmup percentage was hit. system.cpu.dcache.writebacks 334126 # number of writebacks system.cpu.decode.DECODE:BlockedCycles 46422286 # Number of cycles decode is blocked @@ -173,13 +173,13 @@ system.cpu.fetch.rateDist.max_value system.cpu.fetch.rateDist.max_value 8 system.cpu.fetch.rateDist.end_dist -system.cpu.icache.ReadReq_accesses 66025546 # number of ReadReq accesses(hits+misses) -system.cpu.icache.ReadReq_avg_miss_latency 10641.352550 # average ReadReq miss latency +system.cpu.icache.ReadReq_accesses 66025670 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 9355.263158 # average ReadReq miss latency system.cpu.icache.ReadReq_avg_mshr_miss_latency 6819.290466 # average ReadReq mshr miss latency system.cpu.icache.ReadReq_hits 66024644 # number of ReadReq hits system.cpu.icache.ReadReq_miss_latency 9598500 # number of ReadReq miss cycles -system.cpu.icache.ReadReq_miss_rate 0.000014 # miss rate for ReadReq accesses -system.cpu.icache.ReadReq_misses 902 # number of ReadReq misses +system.cpu.icache.ReadReq_miss_rate 0.000016 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 1026 # number of ReadReq misses system.cpu.icache.ReadReq_mshr_hits 124 # number of ReadReq MSHR hits system.cpu.icache.ReadReq_mshr_miss_latency 6151000 # number of ReadReq MSHR miss cycles system.cpu.icache.ReadReq_mshr_miss_rate 0.000014 # mshr miss rate for ReadReq accesses @@ -192,13 +192,13 @@ system.cpu.icache.blocked_cycles_no_mshr system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked system.cpu.icache.cache_copies 0 # number of cache copies performed -system.cpu.icache.demand_accesses 66025546 # number of demand (read+write) accesses -system.cpu.icache.demand_avg_miss_latency 10641.352550 # average overall miss latency +system.cpu.icache.demand_accesses 66025670 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 9355.263158 # average overall miss latency system.cpu.icache.demand_avg_mshr_miss_latency 6819.290466 # average overall mshr miss latency system.cpu.icache.demand_hits 66024644 # number of demand (read+write) hits system.cpu.icache.demand_miss_latency 9598500 # number of demand (read+write) miss cycles -system.cpu.icache.demand_miss_rate 0.000014 # miss rate for demand accesses -system.cpu.icache.demand_misses 902 # number of demand (read+write) misses +system.cpu.icache.demand_miss_rate 0.000016 # miss rate for demand accesses +system.cpu.icache.demand_misses 1026 # number of demand (read+write) misses _______________________________________________ m5-dev mailing list [email protected] http://m5sim.org/mailman/listinfo/m5-dev
