changeset 4c5303395e08 in /z/repo/m5
details: http://repo.m5sim.org/m5?cmd=changeset;node=4c5303395e08
summary: Restructure bus timing calcs to cope with pkt being deleted by target.

changeset 155a82ec17b7 in /z/repo/m5
details: http://repo.m5sim.org/m5?cmd=changeset;node=155a82ec17b7
summary: Fix a few Packet memory leaks.

changeset f910b095a3a5 in /z/repo/m5
details: http://repo.m5sim.org/m5?cmd=changeset;node=f910b095a3a5
summary: Update long regression stats for semi-recent cache changes.

diffstat:

18 files changed, 270 insertions(+), 249 deletions(-)
src/mem/bus.cc                                             |   50 +++----
src/mem/bus.hh                                             |   10 -
src/mem/cache/cache_impl.hh                                |    9 +
tests/long/00.gzip/ref/alpha/tru64/o3-timing/config.ini    |    1 
tests/long/00.gzip/ref/alpha/tru64/o3-timing/m5stats.txt   |   76 +++++------
tests/long/00.gzip/ref/alpha/tru64/o3-timing/stderr        |    2 
tests/long/00.gzip/ref/sparc/linux/o3-timing/config.ini    |    1 
tests/long/00.gzip/ref/sparc/linux/o3-timing/m5stats.txt   |   70 +++++-----
tests/long/00.gzip/ref/sparc/linux/o3-timing/stdout        |    6 
tests/long/30.eon/ref/alpha/tru64/o3-timing/config.ini     |    1 
tests/long/30.eon/ref/alpha/tru64/o3-timing/m5stats.txt    |   68 ++++-----
tests/long/50.vortex/ref/alpha/tru64/o3-timing/config.ini  |    1 
tests/long/50.vortex/ref/alpha/tru64/o3-timing/m5stats.txt |   82 ++++++------
tests/long/50.vortex/ref/alpha/tru64/o3-timing/stderr      |    2 
tests/long/60.bzip2/ref/alpha/tru64/o3-timing/config.ini   |    1 
tests/long/60.bzip2/ref/alpha/tru64/o3-timing/m5stats.txt  |   70 +++++-----
tests/long/70.twolf/ref/alpha/tru64/o3-timing/config.ini   |    1 
tests/long/70.twolf/ref/alpha/tru64/o3-timing/m5stats.txt  |   68 ++++-----

diffs (truncated from 1229 to 300 lines):

diff -r a79c14353915 -r f910b095a3a5 src/mem/bus.cc
--- a/src/mem/bus.cc    Sat Mar 15 05:03:55 2008 -0700
+++ b/src/mem/bus.cc    Mon Mar 17 23:07:22 2008 -0400
@@ -110,21 +110,22 @@ const char * Bus::BusFreeEvent::descript
     return "bus became available";
 }
 
-void Bus::preparePacket(PacketPtr pkt, Tick & headerTime)
-{
-    //Bring tickNextIdle up to the present tick
-    //There is some potential ambiguity where a cycle starts, which might make
-    //a difference when devices are acting right around a cycle boundary. Using
-    //a < allows things which happen exactly on a cycle boundary to take up
-    //only the following cycle. Anything that happens later will have to "wait"
-    //for the end of that cycle, and then start using the bus after that.
+Tick Bus::calcPacketTiming(PacketPtr pkt)
+{
+    // Bring tickNextIdle up to the present tick.
+    // There is some potential ambiguity where a cycle starts, which
+    // might make a difference when devices are acting right around a
+    // cycle boundary. Using a < allows things which happen exactly on
+    // a cycle boundary to take up only the following cycle. Anything
+    // that happens later will have to "wait" for the end of that
+    // cycle, and then start using the bus after that.
     if (tickNextIdle < curTick) {
         tickNextIdle = curTick;
         if (tickNextIdle % clock != 0)
             tickNextIdle = curTick - (curTick % clock) + clock;
     }
 
-    headerTime = tickNextIdle + headerCycles * clock;
+    Tick headerTime = tickNextIdle + headerCycles * clock;
 
     // The packet will be sent. Figure out how long it occupies the bus, and
     // how much of that time is for the first "word", aka bus width.
@@ -142,10 +143,17 @@ void Bus::preparePacket(PacketPtr pkt, T
     pkt->firstWordTime = headerTime + clock;
 
     pkt->finishTime = headerTime + numCycles * clock;
+
+    return headerTime;
 }
 
 void Bus::occupyBus(Tick until)
 {
+    if (until == 0) {
+        // shortcut for express snoop packets
+        return;
+    }
+
     tickNextIdle = until;
 
     if (!busIdle.scheduled()) {
@@ -190,11 +198,8 @@ Bus::recvTiming(PacketPtr pkt)
     DPRINTF(Bus, "recvTiming: src %d dst %d %s 0x%x\n",
             src, pkt->getDest(), pkt->cmdString(), pkt->getAddr());
 
-    Tick headerTime = 0;
-
-    if (!pkt->isExpressSnoop()) {
-        preparePacket(pkt, headerTime);
-    }
+    Tick headerFinishTime = pkt->isExpressSnoop() ? 0 : calcPacketTiming(pkt);
+    Tick packetFinishTime = pkt->isExpressSnoop() ? 0 : pkt->finishTime;
 
     short dest = pkt->getDest();
     int dest_port_id;
@@ -243,17 +248,16 @@ Bus::recvTiming(PacketPtr pkt)
             DPRINTF(Bus, "recvTiming: src %d dst %d %s 0x%x TGT RETRY\n",
                     src, pkt->getDest(), pkt->cmdString(), pkt->getAddr());
             addToRetryList(src_port);
-            if (!pkt->isExpressSnoop()) {
-                occupyBus(headerTime);
-            }
+            occupyBus(headerFinishTime);
             return false;
         }
-        // send OK, fall through
-    }
-
-    if (!pkt->isExpressSnoop()) {
-        occupyBus(pkt->finishTime);
-    }
+        // send OK, fall through... pkt may have been deleted by
+        // target at this point, so it should *not* be referenced
+        // again.  We'll set it to NULL here just to be safe.
+        pkt = NULL;
+    }
+
+    occupyBus(packetFinishTime);
 
     // Packet was successfully sent.
     // Also take care of retries
diff -r a79c14353915 -r f910b095a3a5 src/mem/bus.hh
--- a/src/mem/bus.hh    Sat Mar 15 05:03:55 2008 -0700
+++ b/src/mem/bus.hh    Mon Mar 17 23:07:22 2008 -0400
@@ -245,10 +245,12 @@ class Bus : public MemObject
      */
     void addressRanges(AddrRangeList &resp, bool &snoop, int id);
 
-    /** Prepare a packet to be sent on the bus. The header finishes at tick
-     *  headerTime
-     */
-    void preparePacket(PacketPtr pkt, Tick & headerTime);
+    /** Calculate the timing parameters for the packet.  Updates the
+     * firstWordTime and finishTime fields of the packet object.
+     * Returns the tick at which the packet header is completed (which
+     * will be all that is sent if the target rejects the packet).
+     */
+    Tick calcPacketTiming(PacketPtr pkt);
 
     /** Occupy the bus until until */
     void occupyBus(Tick until);
diff -r a79c14353915 -r f910b095a3a5 src/mem/cache/cache_impl.hh
--- a/src/mem/cache/cache_impl.hh       Sat Mar 15 05:03:55 2008 -0700
+++ b/src/mem/cache/cache_impl.hh       Mon Mar 17 23:07:22 2008 -0400
@@ -407,6 +407,9 @@ Cache<TagStore>::timingAccess(PacketPtr 
             memSidePort->sendTiming(snoopPkt);
             // main memory will delete snoopPkt
         }
+        // since we're the official target but we aren't responding,
+        // delete the packet now.
+        delete pkt;
         return true;
     }
 
@@ -1092,6 +1095,11 @@ Cache<TagStore>::handleSnoop(PacketPtr p
             pkt->makeAtomicResponse();
             pkt->setDataFromBlock(blk->data, blkSize);
         }
+    } else if (is_timing && is_deferred) {
+        // if it's a deferred timing snoop then we've made a copy of
+        // the packet, and so if we're not using that copy to respond
+        // then we need to delete it here.
+        delete pkt;
     }
 
     // Do this last in case it deallocates block data or something
@@ -1160,6 +1168,7 @@ Cache<TagStore>::snoopTiming(PacketPtr p
             if (pkt->isInvalidate()) {
                 // Invalidation trumps our writeback... discard here
                 markInService(mshr);
+                delete wb_pkt;
             }
 
             // If this was a shared writeback, there may still be
diff -r a79c14353915 -r f910b095a3a5 
tests/long/00.gzip/ref/alpha/tru64/o3-timing/config.ini
--- a/tests/long/00.gzip/ref/alpha/tru64/o3-timing/config.ini   Sat Mar 15 
05:03:55 2008 -0700
+++ b/tests/long/00.gzip/ref/alpha/tru64/o3-timing/config.ini   Mon Mar 17 
23:07:22 2008 -0400
@@ -376,6 +376,7 @@ output=cout
 output=cout
 pid=100
 ppid=99
+simpoint=0
 system=system
 uid=100
 
diff -r a79c14353915 -r f910b095a3a5 
tests/long/00.gzip/ref/alpha/tru64/o3-timing/m5stats.txt
--- a/tests/long/00.gzip/ref/alpha/tru64/o3-timing/m5stats.txt  Sat Mar 15 
05:03:55 2008 -0700
+++ b/tests/long/00.gzip/ref/alpha/tru64/o3-timing/m5stats.txt  Mon Mar 17 
23:07:22 2008 -0400
@@ -8,10 +8,10 @@ global.BPredUnit.condPredicted          
 global.BPredUnit.condPredicted               70175548                       # 
Number of conditional branches predicted
 global.BPredUnit.lookups                     76112488                       # 
Number of BP lookups
 global.BPredUnit.usedRAS                      1692573                       # 
Number of times the RAS was used to get a target.
-host_inst_rate                                 185893                       # 
Simulator instruction rate (inst/s)
-host_mem_usage                                 223968                       # 
Number of bytes of host memory used
-host_seconds                                  3042.35                       # 
Real time elapsed on the host
-host_tick_rate                               54375513                       # 
Simulator tick rate (ticks/s)
+host_inst_rate                                 131337                       # 
Simulator instruction rate (inst/s)
+host_mem_usage                                 179084                       # 
Number of bytes of host memory used
+host_seconds                                  4306.11                       # 
Real time elapsed on the host
+host_tick_rate                               38417331                       # 
Simulator tick rate (ticks/s)
 memdepunit.memDep.conflictingLoads           21896719                       # 
Number of conflicting loads.
 memdepunit.memDep.conflictingStores          16284345                       # 
Number of conflicting stores.
 memdepunit.memDep.insertedLoads             127086189                       # 
Number of loads inserted to the mem dependence unit.
@@ -53,61 +53,61 @@ system.cpu.cpi_total                    
 system.cpu.cpi_total                         0.585019                       # 
CPI: Total CPI of All Threads
 system.cpu.dcache.LoadLockedReq_accesses            1                       # 
number of LoadLockedReq accesses(hits+misses)
 system.cpu.dcache.LoadLockedReq_hits                1                       # 
number of LoadLockedReq hits
-system.cpu.dcache.ReadReq_accesses          114321557                       # 
number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency 26993.890628                       
# average ReadReq miss latency
+system.cpu.dcache.ReadReq_accesses          115038352                       # 
number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency  6257.587595                       
# average ReadReq miss latency
 system.cpu.dcache.ReadReq_avg_mshr_miss_latency  3367.177206                   
    # average ReadReq mshr miss latency
 system.cpu.dcache.ReadReq_hits              114105250                       # 
number of ReadReq hits
 system.cpu.dcache.ReadReq_miss_latency     5838967500                       # 
number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.001892                       # 
miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses               216307                       # 
number of ReadReq misses
+system.cpu.dcache.ReadReq_miss_rate          0.008111                       # 
miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses               933102                       # 
number of ReadReq misses
 system.cpu.dcache.ReadReq_mshr_hits            716795                       # 
number of ReadReq MSHR hits
 system.cpu.dcache.ReadReq_mshr_miss_latency    728344000                       
# number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.001892                       # 
mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.001880                       # 
mshr miss rate for ReadReq accesses
 system.cpu.dcache.ReadReq_mshr_misses          216307                       # 
number of ReadReq MSHR misses
-system.cpu.dcache.WriteReq_accesses          37579282                       # 
number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency 48790.597140                       
# average WriteReq miss latency
+system.cpu.dcache.WriteReq_accesses          39451321                       # 
number of WriteReq accesses(hits+misses)
+system.cpu.dcache.WriteReq_avg_miss_latency  7448.640662                       
# average WriteReq miss latency
 system.cpu.dcache.WriteReq_avg_mshr_miss_latency  7159.473367                  
     # average WriteReq mshr miss latency
 system.cpu.dcache.WriteReq_hits              37241994                       # 
number of WriteReq hits
 system.cpu.dcache.WriteReq_miss_latency   16456482928                       # 
number of WriteReq miss cycles
-system.cpu.dcache.WriteReq_miss_rate         0.008975                       # 
miss rate for WriteReq accesses
-system.cpu.dcache.WriteReq_misses              337288                       # 
number of WriteReq misses
+system.cpu.dcache.WriteReq_miss_rate         0.056001                       # 
miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_misses             2209327                       # 
number of WriteReq misses
 system.cpu.dcache.WriteReq_mshr_hits          1872039                       # 
number of WriteReq MSHR hits
 system.cpu.dcache.WriteReq_mshr_miss_latency   2414804453                      
 # number of WriteReq MSHR miss cycles
-system.cpu.dcache.WriteReq_mshr_miss_rate     0.008975                       # 
mshr miss rate for WriteReq accesses
+system.cpu.dcache.WriteReq_mshr_miss_rate     0.008549                       # 
mshr miss rate for WriteReq accesses
 system.cpu.dcache.WriteReq_mshr_misses         337288                       # 
number of WriteReq MSHR misses
 system.cpu.dcache.avg_blocked_cycles_no_mshrs  1999.750000                     
  # average number of cycles each access was blocked
 system.cpu.dcache.avg_blocked_cycles_no_targets         2750                   
    # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                 320.196392                       # 
Average number of references to valid blocks.
+system.cpu.dcache.avg_refs                 321.245700                       # 
Average number of references to valid blocks.
 system.cpu.dcache.blocked_no_mshrs                  4                       # 
number of cycles access was blocked
 system.cpu.dcache.blocked_no_targets                4                       # 
number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_mshrs         7999                       # 
number of cycles access was blocked
 system.cpu.dcache.blocked_cycles_no_targets        11000                       
# number of cycles access was blocked
 system.cpu.dcache.cache_copies                      0                       # 
number of cache copies performed
-system.cpu.dcache.demand_accesses           151900839                       # 
number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency 40273.937496                       # 
average overall miss latency
+system.cpu.dcache.demand_accesses           154489673                       # 
number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency  7094.973483                       # 
average overall miss latency
 system.cpu.dcache.demand_avg_mshr_miss_latency  5677.703832                    
   # average overall mshr miss latency
 system.cpu.dcache.demand_hits               151347244                       # 
number of demand (read+write) hits
 system.cpu.dcache.demand_miss_latency     22295450428                       # 
number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.003644                       # 
miss rate for demand accesses
-system.cpu.dcache.demand_misses                553595                       # 
number of demand (read+write) misses
+system.cpu.dcache.demand_miss_rate           0.020341                       # 
miss rate for demand accesses
+system.cpu.dcache.demand_misses               3142429                       # 
number of demand (read+write) misses
 system.cpu.dcache.demand_mshr_hits            2588834                       # 
number of demand (read+write) MSHR hits
 system.cpu.dcache.demand_mshr_miss_latency   3143148453                       
# number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.003644                       # 
mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_miss_rate      0.003583                       # 
mshr miss rate for demand accesses
 system.cpu.dcache.demand_mshr_misses           553595                       # 
number of demand (read+write) MSHR misses
 system.cpu.dcache.fast_writes                       0                       # 
number of fast writes performed
 system.cpu.dcache.mshr_cap_events                   0                       # 
number of times MSHR cap was activated
 system.cpu.dcache.no_allocate_misses                0                       # 
Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses          151900839                       # 
number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency 40273.937496                       
# average overall miss latency
+system.cpu.dcache.overall_accesses          154489673                       # 
number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency  7094.973483                       
# average overall miss latency
 system.cpu.dcache.overall_avg_mshr_miss_latency  5677.703832                   
    # average overall mshr miss latency
 system.cpu.dcache.overall_avg_mshr_uncacheable_latency <err: div-0>            
           # average overall mshr uncacheable latency
 system.cpu.dcache.overall_hits              151347244                       # 
number of overall hits
 system.cpu.dcache.overall_miss_latency    22295450428                       # 
number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.003644                       # 
miss rate for overall accesses
-system.cpu.dcache.overall_misses               553595                       # 
number of overall misses
+system.cpu.dcache.overall_miss_rate          0.020341                       # 
miss rate for overall accesses
+system.cpu.dcache.overall_misses              3142429                       # 
number of overall misses
 system.cpu.dcache.overall_mshr_hits           2588834                       # 
number of overall MSHR hits
 system.cpu.dcache.overall_mshr_miss_latency   3143148453                       
# number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.003644                       # 
mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_miss_rate     0.003583                       # 
mshr miss rate for overall accesses
 system.cpu.dcache.overall_mshr_misses          553595                       # 
number of overall MSHR misses
 system.cpu.dcache.overall_mshr_uncacheable_latency            0                
       # number of overall MSHR uncacheable cycles
 system.cpu.dcache.overall_mshr_uncacheable_misses            0                 
      # number of overall MSHR uncacheable misses
@@ -124,7 +124,7 @@ system.cpu.dcache.sampled_refs          
 system.cpu.dcache.sampled_refs                 472922                       # 
Sample count of references to valid blocks.
 system.cpu.dcache.soft_prefetch_mshr_full            0                       # 
number of mshr full events for SW prefetching instrutions
 system.cpu.dcache.tagsinuse               4095.170465                       # 
Cycle average of tags in use
-system.cpu.dcache.total_refs                151427918                       # 
Total number of references to valid blocks.
+system.cpu.dcache.total_refs                151924159                       # 
Total number of references to valid blocks.
 system.cpu.dcache.warmup_cycle               50285000                       # 
Cycle when the warmup percentage was hit.
 system.cpu.dcache.writebacks                   334126                       # 
number of writebacks
 system.cpu.decode.DECODE:BlockedCycles       46422286                       # 
Number of cycles decode is blocked
@@ -173,13 +173,13 @@ system.cpu.fetch.rateDist.max_value     
 system.cpu.fetch.rateDist.max_value                 8                      
 system.cpu.fetch.rateDist.end_dist
 
-system.cpu.icache.ReadReq_accesses           66025546                       # 
number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency 10641.352550                       
# average ReadReq miss latency
+system.cpu.icache.ReadReq_accesses           66025670                       # 
number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency  9355.263158                       
# average ReadReq miss latency
 system.cpu.icache.ReadReq_avg_mshr_miss_latency  6819.290466                   
    # average ReadReq mshr miss latency
 system.cpu.icache.ReadReq_hits               66024644                       # 
number of ReadReq hits
 system.cpu.icache.ReadReq_miss_latency        9598500                       # 
number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.000014                       # 
miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses                  902                       # 
number of ReadReq misses
+system.cpu.icache.ReadReq_miss_rate          0.000016                       # 
miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses                 1026                       # 
number of ReadReq misses
 system.cpu.icache.ReadReq_mshr_hits               124                       # 
number of ReadReq MSHR hits
 system.cpu.icache.ReadReq_mshr_miss_latency      6151000                       
# number of ReadReq MSHR miss cycles
 system.cpu.icache.ReadReq_mshr_miss_rate     0.000014                       # 
mshr miss rate for ReadReq accesses
@@ -192,13 +192,13 @@ system.cpu.icache.blocked_cycles_no_mshr
 system.cpu.icache.blocked_cycles_no_mshrs            0                       # 
number of cycles access was blocked
 system.cpu.icache.blocked_cycles_no_targets            0                       
# number of cycles access was blocked
 system.cpu.icache.cache_copies                      0                       # 
number of cache copies performed
-system.cpu.icache.demand_accesses            66025546                       # 
number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency 10641.352550                       # 
average overall miss latency
+system.cpu.icache.demand_accesses            66025670                       # 
number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency  9355.263158                       # 
average overall miss latency
 system.cpu.icache.demand_avg_mshr_miss_latency  6819.290466                    
   # average overall mshr miss latency
 system.cpu.icache.demand_hits                66024644                       # 
number of demand (read+write) hits
 system.cpu.icache.demand_miss_latency         9598500                       # 
number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.000014                       # 
miss rate for demand accesses
-system.cpu.icache.demand_misses                   902                       # 
number of demand (read+write) misses
+system.cpu.icache.demand_miss_rate           0.000016                       # 
miss rate for demand accesses
+system.cpu.icache.demand_misses                  1026                       # 
number of demand (read+write) misses
_______________________________________________
m5-dev mailing list
[email protected]
http://m5sim.org/mailman/listinfo/m5-dev

Reply via email to