[commit: ghc] master: Change the presentation of parallel GC work balance in +RTS -s (cd930da)

Duncan Coutts Wed, 04 Apr 2012 11:26:37 -0700

Repository : ssh://darcs.haskell.org//srv/darcs/ghc

On branch  : master


http://hackage.haskell.org/trac/ghc/changeset/cd930da1145a0d6094e5b5380034e80d002d5b9a

>---------------------------------------------------------------

commit cd930da1145a0d6094e5b5380034e80d002d5b9a
Author: Duncan Coutts <[email protected]>
Date:   Fri Feb 17 02:55:29 2012 +0000

    Change the presentation of parallel GC work balance in +RTS -s
    
    Also rename internal variables to make the names match what they hold.
    The parallel GC work balance is calculated using the total amount of
    memory copied by all GC threads, and the maximum copied by any
    individual thread. You have serial GC when the max is the same as
    copied, and perfectly balanced GC when total/max == n_caps.
    
    Previously we presented this as the ratio total/max and told users
    that the serial value was 1 and the ideal value N, for N caps, e.g.
    
      Parallel GC work balance: 1.05 (4045071 / 3846774, ideal 2)
    
    The downside of this is that the user always has to keep in mind the
    number of cores being used. Our new presentation uses a normalised
    scale 0--1 as a percentage. The 0% means completely serial and 100%
    is perfect balance, e.g.
    
      Parallel GC work balance: 4.56% (serial 0%, perfect 100%)

>---------------------------------------------------------------

 includes/rts/storage/GC.h |    4 ++--
 rts/Stats.c               |   21 ++++++++++-----------
 rts/sm/GC.c               |   17 ++++++++---------
 3 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h
index 26c5593..ab3af17 100644
--- a/includes/rts/storage/GC.h
+++ b/includes/rts/storage/GC.h
@@ -192,7 +192,7 @@ typedef struct _GCStats {
   StgWord64 current_bytes_slop;
   StgWord64 max_bytes_slop;
   StgWord64 peak_megabytes_allocated;
-  StgWord64 par_avg_bytes_copied;
+  StgWord64 par_tot_bytes_copied;
   StgWord64 par_max_bytes_copied;
   StgDouble mutator_cpu_seconds;
   StgDouble mutator_wall_seconds;
@@ -208,7 +208,7 @@ void getGCStats (GCStats *s);
 //  StgDouble init_wall_seconds;
 
 typedef struct _ParGCStats {
-  StgWord64 avg_copied;
+  StgWord64 tot_copied;
   StgWord64 max_copied;
 } ParGCStats;
 void getParGCStats (ParGCStats *s);
diff --git a/rts/Stats.c b/rts/Stats.c
index 76444b3..2277958 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -40,7 +40,7 @@ static StgWord64 GC_tot_alloc      = 0;
 static StgWord64 GC_tot_copied     = 0;
 
 static StgWord64 GC_par_max_copied = 0;
-static StgWord64 GC_par_avg_copied = 0;
+static StgWord64 GC_par_tot_copied = 0;
 
 #ifdef PROFILING
 static Time RP_start_time  = 0, RP_tot_time  = 0;  // retainer prof user time
@@ -140,7 +140,7 @@ initStats0(void)
     GC_tot_alloc     = 0;
     GC_tot_copied    = 0;
     GC_par_max_copied = 0;
-    GC_par_avg_copied = 0;
+    GC_par_tot_copied = 0;
     GC_tot_cpu  = 0;
 
 #ifdef PROFILING
@@ -332,7 +332,7 @@ stat_gcWorkerThreadDone (gc_thread *gct STG_UNUSED)
 void
 stat_endGC (gc_thread *gct,
             lnat alloc, lnat live, lnat copied, nat gen,
-            lnat max_copied, lnat avg_copied, lnat slop)
+            lnat par_max_copied, lnat par_tot_copied, lnat slop)
 {
     if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
         RtsFlags.ProfFlags.doHeapProfile)
@@ -372,8 +372,8 @@ stat_endGC (gc_thread *gct,
 
        GC_tot_copied += (StgWord64) copied;
        GC_tot_alloc  += (StgWord64) alloc;
-        GC_par_max_copied += (StgWord64) max_copied;
-        GC_par_avg_copied += (StgWord64) avg_copied;
+        GC_par_max_copied += (StgWord64) par_max_copied;
+        GC_par_tot_copied += (StgWord64) par_tot_copied;
        GC_tot_cpu   += gc_cpu;
         
         /* For the moment we calculate both per-HEC and total allocation.
@@ -642,11 +642,10 @@ stat_exit(int alloc)
             }
 
 #if defined(THREADED_RTS)
-            if (RtsFlags.ParFlags.parGcEnabled) {
-                statsPrintf("\n  Parallel GC work balance: %.2f (%ld / %ld, 
ideal %d)\n", 
-                            (double)GC_par_avg_copied / 
(double)GC_par_max_copied,
-                            (lnat)GC_par_avg_copied, (lnat)GC_par_max_copied,
-                            n_capabilities
+            if (RtsFlags.ParFlags.parGcEnabled && n_capabilities > 1) {
+                statsPrintf("\n  Parallel GC work balance: %.2f%% (serial 0%%, 
perfect 100%%)\n", 
+                            100 * (((double)GC_par_tot_copied / 
(double)GC_par_max_copied) - 1)
+                                / (n_capabilities - 1)
                     );
             }
 #endif
@@ -913,7 +912,7 @@ extern void getGCStats( GCStats *s )
     /* EZY: Being consistent with incremental output, but maybe should also 
discount init */
     s->cpu_seconds = TimeToSecondsDbl(current_cpu);
     s->wall_seconds = TimeToSecondsDbl(current_elapsed - end_init_elapsed);
-    s->par_avg_bytes_copied = GC_par_avg_copied*(StgWord64)sizeof(W_);
+    s->par_tot_bytes_copied = GC_par_tot_copied*(StgWord64)sizeof(W_);
     s->par_max_bytes_copied = GC_par_max_copied*(StgWord64)sizeof(W_);
 }
 // extern void getTaskStats( TaskStats **s ) {}
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 8623194..8201a24 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -178,7 +178,7 @@ GarbageCollect (rtsBool force_major_gc,
 {
   bdescr *bd;
   generation *gen;
-  lnat live_blocks, live_words, allocated, max_copied, avg_copied;
+  lnat live_blocks, live_words, allocated, par_max_copied, par_tot_copied;
 #if defined(THREADED_RTS)
   gc_thread *saved_gct;
 #endif
@@ -443,8 +443,8 @@ GarbageCollect (rtsBool force_major_gc,
   }
 
   copied = 0;
-  max_copied = 0;
-  avg_copied = 0;
+  par_max_copied = 0;
+  par_tot_copied = 0;
   { 
       nat i;
       for (i=0; i < n_gc_threads; i++) {
@@ -457,13 +457,12 @@ GarbageCollect (rtsBool force_major_gc,
               debugTrace(DEBUG_gc,"   scav_find_work %ld",   
gc_threads[i]->scav_find_work);
           }
           copied += gc_threads[i]->copied;
-          max_copied = stg_max(gc_threads[i]->copied, max_copied);
+          par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
       }
+      par_tot_copied = copied;
       if (n_gc_threads == 1) {
-          max_copied = 0;
-          avg_copied = 0;
-      } else {
-          avg_copied = copied;
+          par_max_copied = 0;
+          par_tot_copied = 0;
       }
   }
 
@@ -740,7 +739,7 @@ GarbageCollect (rtsBool force_major_gc,
 
   // ok, GC over: tell the stats department what happened. 
   stat_endGC(gct, allocated, live_words,
-             copied, N, max_copied, avg_copied,
+             copied, N, par_max_copied, par_tot_copied,
              live_blocks * BLOCK_SIZE_W - live_words /* slop */);
 
   // Guess which generation we'll collect *next* time



_______________________________________________
Cvs-ghc mailing list
[email protected]
http://www.haskell.org/mailman/listinfo/cvs-ghc

[commit: ghc] master: Change the presentation of parallel GC work balance in +RTS -s (cd930da)

Reply via email to