Repository : ssh://darcs.haskell.org//srv/darcs/ghc

On branch  : master

http://hackage.haskell.org/trac/ghc/changeset/cc2ea98ac4a15e40a15e89de9e47f33e191ba393

>---------------------------------------------------------------

commit cc2ea98ac4a15e40a15e89de9e47f33e191ba393
Author: Simon Marlow <[email protected]>
Date:   Thu Apr 14 08:59:39 2011 +0100

    Avoid accumulating slop in the pinned_object_block.
    
    The pinned_object_block is where we allocate small pinned ByteArray#
    objects.  At a GC the pinned_object_block was being treated like other
    large objects and promoted to the next step/generation, even if it was
    only partly full.  Under some ByteString-heavy workloads this would
    accumulate on average 2k of slop per GC, and this memory is never
    released until the ByteArray# objects in the block are freed.
    
    So now, we keep allocating into the pinned_object_block until it is
    completely full, at which point it is handed over to the GC as before.
    The pinned_object_block might therefore contain objects which a large
    range of ages, but I don't think this is any worse than the situation
    before.  We still have the fragmentation issue in general, but the new
    scheme can improve the memory overhead for some workloads
    dramatically.

>---------------------------------------------------------------

 rts/Stats.c      |    9 +++++++++
 rts/sm/GC.c      |    5 -----
 rts/sm/Sanity.c  |    4 ++++
 rts/sm/Storage.c |   25 ++++++++++++++++++++-----
 4 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/rts/Stats.c b/rts/Stats.c
index 159a909..3e7b5d8 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -798,6 +798,15 @@ statDescribeGens(void)
       mut = 0;
       for (i = 0; i < n_capabilities; i++) {
           mut += countOccupied(capabilities[i].mut_lists[g]);
+
+          // Add the pinned object block.
+          bd = capabilities[i].pinned_object_block;
+          if (bd != NULL) {
+              gen_live   += bd->free - bd->start;
+              gen_blocks += bd->blocks;
+          }
+
+          gen_live   += gcThreadLiveWords(i,g);
           gen_live   += gcThreadLiveWords(i,g);
           gen_blocks += gcThreadLiveBlocks(i,g);
       }
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 05bc8f2..3036140 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -597,11 +597,6 @@ GarbageCollect (rtsBool force_major_gc,
   // update the max size of older generations after a major GC
   resize_generations();
   
-  // Start a new pinned_object_block
-  for (n = 0; n < n_capabilities; n++) {
-      capabilities[n].pinned_object_block = NULL;
-  }
-
   // Free the mark stack.
   if (mark_stack_top_bd != NULL) {
       debugTrace(DEBUG_gc, "mark stack: %d blocks",
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index 8ebb9a2..0ec552c 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -789,6 +789,7 @@ findMemoryLeak (void)
 
     for (i = 0; i < n_capabilities; i++) {
         markBlocks(nurseries[i].blocks);
+        markBlocks(capabilities[i].pinned_object_block);
     }
 
 #ifdef PROFILING
@@ -880,6 +881,9 @@ memInventory (rtsBool show)
   for (i = 0; i < n_capabilities; i++) {
       ASSERT(countBlocks(nurseries[i].blocks) == nurseries[i].n_blocks);
       nursery_blocks += nurseries[i].n_blocks;
+      if (capabilities[i].pinned_object_block != NULL) {
+          nursery_blocks += capabilities[i].pinned_object_block->blocks;
+      }
   }
 
   retainer_blocks = 0;
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index ae3433a..f8a9e55 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -657,17 +657,32 @@ allocatePinned (Capability *cap, lnat n)
     // If we don't have a block of pinned objects yet, or the current
     // one isn't large enough to hold the new object, allocate a new one.
     if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
+        // The pinned_object_block remains attached to the capability
+        // until it is full, even if a GC occurs.  We want this
+        // behaviour because otherwise the unallocated portion of the
+        // block would be forever slop, and under certain workloads
+        // (allocating a few ByteStrings per GC) we accumulate a lot
+        // of slop.
+        //
+        // So, the pinned_object_block is initially marked
+        // BF_EVACUATED so the GC won't touch it.  When it is full,
+        // we place it on the large_objects list, and at the start of
+        // the next GC the BF_EVACUATED flag will be cleared, and the
+        // block will be promoted as usual (if anything in it is
+        // live).
         ACQUIRE_SM_LOCK;
-       cap->pinned_object_block = bd = allocBlock();
-       dbl_link_onto(bd, &g0->large_objects);
-       g0->n_large_blocks++;
+        if (bd != NULL) {
+            dbl_link_onto(bd, &g0->large_objects);
+            g0->n_large_blocks++;
+            g0->n_new_large_words += bd->free - bd->start;
+        }
+        cap->pinned_object_block = bd = allocBlock();
         RELEASE_SM_LOCK;
         initBdescr(bd, g0, g0);
-       bd->flags  = BF_PINNED | BF_LARGE;
+        bd->flags  = BF_PINNED | BF_LARGE | BF_EVACUATED;
        bd->free   = bd->start;
     }
 
-    g0->n_new_large_words += n;
     p = bd->free;
     bd->free += n;
     return p;



_______________________________________________
Cvs-ghc mailing list
[email protected]
http://www.haskell.org/mailman/listinfo/cvs-ghc

Reply via email to