wingo pushed a commit to branch wip-whippet
in repository guile.

commit 0ba9f68621a8428a76f99f1d7be340a84d0e6325
Author: Andy Wingo <wi...@igalia.com>
AuthorDate: Mon Aug 4 21:33:25 2025 +0200

    Collect fragmentation into freelists
    
    The idea is to use fragmentation instead of wasting it.  Unclear if it's
    a win though; in the microbenchmarks it's very inconclusive.
---
 src/nofl-holeset.h | 191 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/nofl-space.h   |  63 +++++++++++++++---
 2 files changed, 245 insertions(+), 9 deletions(-)

diff --git a/src/nofl-holeset.h b/src/nofl-holeset.h
new file mode 100644
index 000000000..6a3cfda9f
--- /dev/null
+++ b/src/nofl-holeset.h
@@ -0,0 +1,191 @@
+#ifndef NOFL_HOLESET_H
+#define NOFL_HOLESET_H
+
+#include <math.h>
+#include <pthread.h>
+
+#include "gc-attrs.h"
+#include "gc-align.h"
+#include "gc-ref.h"
+#include "gc-inline.h"
+#include "gc-lock.h"
+
+struct nofl_hole {
+  struct nofl_hole *next;
+  struct nofl_hole *next_chain;
+};
+
+struct nofl_hole_with_size {
+  struct nofl_hole entry;
+  size_t granules;
+};
+
+struct nofl_holeset {
+  uint64_t nonempty;
+  struct nofl_hole *buckets[64];
+};
+
+#define NOFL_HOLESET_PACKED_GRANULES 50
+#define NOFL_HOLESET_SPARSE_COUNT (64 - NOFL_HOLESET_PACKED_GRANULES - 1)
+#define NOFL_HOLESET_MAX_GRANULES 512
+
+static inline void
+nofl_hole_push(struct nofl_hole **loc, struct nofl_hole *hole) {
+  GC_ASSERT_EQ(hole->next, NULL);
+  GC_ASSERT_EQ(hole->next_chain, NULL);
+  hole->next = *loc;
+  *loc = hole;
+}
+
+static inline void*
+nofl_hole_pop(struct nofl_hole **loc) {
+  GC_ASSERT(*loc);
+  struct nofl_hole *hole = *loc;
+  GC_ASSERT_EQ(hole->next_chain, NULL);
+  *loc = hole->next;
+  hole->next = NULL;
+  return hole;
+}
+
+static inline size_t
+nofl_holeset_sparse_bucket(size_t granules, int for_insert) {
+  GC_ASSERT(granules >= NOFL_HOLESET_PACKED_GRANULES);
+  size_t idx = NOFL_HOLESET_PACKED_GRANULES;
+  size_t num = granules - NOFL_HOLESET_PACKED_GRANULES;
+  size_t denom = NOFL_HOLESET_MAX_GRANULES - NOFL_HOLESET_PACKED_GRANULES;
+  double frac = (double) (num * NOFL_HOLESET_SPARSE_COUNT) / denom;
+  idx += fmin(for_insert ? floor(frac) : ceil(frac),
+              NOFL_HOLESET_SPARSE_COUNT);
+  GC_ASSERT(idx < 64);
+  return idx;
+}
+
+static inline size_t
+nofl_holeset_bucket_for_lookup(size_t granules) {
+  return granules <= NOFL_HOLESET_PACKED_GRANULES
+    ? granules - 1
+    : nofl_holeset_sparse_bucket(granules, 0);
+}
+
+static inline void
+nofl_holeset_push_local(struct nofl_holeset *local, uintptr_t addr,
+                        size_t granules) {
+  size_t idx;
+  struct nofl_hole *hole = (struct nofl_hole *) addr;
+  if (granules <= NOFL_HOLESET_PACKED_GRANULES) {
+    GC_ASSERT(granules > 0);
+    idx = granules - 1;
+  } else {
+    struct nofl_hole_with_size *hole_with_size =
+      (struct nofl_hole_with_size *) hole;
+    GC_ASSERT_EQ(hole_with_size->granules, 0);
+    hole_with_size->granules = granules;
+    idx = nofl_holeset_sparse_bucket(granules, 1);
+  }
+
+  nofl_hole_push(&local->buckets[idx], hole);
+  local->nonempty |= ((uint64_t) 1) << idx;
+}
+
+static inline uintptr_t
+nofl_hole_tail_address(void *hole, size_t granules)
+{
+  return ((uintptr_t) hole) + granules * NOFL_GRANULE_SIZE;
+}
+
+static inline struct gc_ref
+nofl_holeset_try_pop(struct nofl_holeset *holes, size_t granules) {
+  GC_ASSERT(granules <= NOFL_HOLESET_MAX_GRANULES);
+  size_t min_idx = nofl_holeset_bucket_for_lookup(granules);
+
+  uint64_t candidates = holes->nonempty >> min_idx;
+  if (!candidates)
+    return gc_ref_null();
+
+  size_t idx = min_idx + __builtin_ctzll(candidates);
+  void *ret = nofl_hole_pop(&holes->buckets[idx]);
+  
+  GC_ASSERT(holes->nonempty & (((uint64_t)1) << idx));
+  if (!holes->buckets[idx])
+    holes->nonempty ^= ((uint64_t)1) << idx;
+
+  size_t hole_granules;
+  if (idx < NOFL_HOLESET_PACKED_GRANULES) {
+    hole_granules = idx + 1;
+  } else {
+    struct nofl_hole_with_size *hole_with_size = ret;
+    hole_granules = hole_with_size->granules;
+    hole_with_size->granules = 0;
+  }
+
+  GC_ASSERT(hole_granules >= granules);
+  size_t tail_granules = hole_granules - granules;
+  if (tail_granules)
+    nofl_holeset_push_local(holes, nofl_hole_tail_address(ret, granules),
+                            tail_granules);
+
+  return gc_ref_from_heap_object(ret);
+}
+
+static inline void
+nofl_holeset_release(struct nofl_holeset *local,
+                     struct nofl_holeset *remote,
+                     pthread_mutex_t *lock) {
+  uint64_t nonempty = local->nonempty;
+  if (!nonempty)
+    return;
+  
+  local->nonempty = 0;
+
+  pthread_mutex_lock(lock);
+  remote->nonempty |= nonempty;
+  do {
+    uint64_t idx = __builtin_ctzll(nonempty);
+    struct nofl_hole *hole = local->buckets[idx];
+    local->buckets[idx] = NULL;
+    hole->next_chain = remote->buckets[idx];
+    remote->buckets[idx] = hole;
+    nonempty ^= ((uint64_t)1) << idx;
+  } while (nonempty);
+  pthread_mutex_unlock(lock);
+}
+
+static inline int
+nofl_holeset_acquire(struct nofl_holeset *local,
+                     struct nofl_holeset *remote,
+                     pthread_mutex_t *lock,
+                     size_t granules) {
+  size_t min_idx = nofl_holeset_bucket_for_lookup(granules);
+  
+  uint64_t sloppy_nonempty =
+    atomic_load_explicit(&remote->nonempty, memory_order_relaxed);
+  if (!(sloppy_nonempty >> min_idx))
+    return 0;
+
+  pthread_mutex_lock(lock);
+  uint64_t nonempty = remote->nonempty >> min_idx;
+  if (!nonempty) {
+    pthread_mutex_unlock(lock);
+    return 0;
+  }
+  uint64_t idx = min_idx + __builtin_ctzll(nonempty);
+  GC_ASSERT(!local->buckets[idx]);
+  struct nofl_hole *chain = remote->buckets[idx];
+  remote->buckets[idx] = chain->next_chain;
+  if (!chain->next_chain)
+    remote->nonempty ^= ((uint64_t)1) << idx;
+  pthread_mutex_unlock(lock);
+
+  local->buckets[idx] = chain;
+  local->nonempty |= ((uint64_t)1) << idx;
+  chain->next_chain = NULL;
+  return 1;
+}
+
+static inline void
+nofl_holeset_clear(struct nofl_holeset *holes)
+{
+  memset(holes, 0, sizeof (*holes));
+}
+
+#endif // NOFL_HOLESET_H
diff --git a/src/nofl-space.h b/src/nofl-space.h
index 496d1c595..a623acd8e 100644
--- a/src/nofl-space.h
+++ b/src/nofl-space.h
@@ -38,6 +38,8 @@ STATIC_ASSERT_EQ(NOFL_GRANULE_SIZE, 1 << 
NOFL_GRANULE_SIZE_LOG_2);
 STATIC_ASSERT_EQ(NOFL_MEDIUM_OBJECT_THRESHOLD,
                  NOFL_MEDIUM_OBJECT_GRANULE_THRESHOLD * NOFL_GRANULE_SIZE);
 
+#include "nofl-holeset.h"
+
 #define NOFL_SLAB_SIZE (4 * 1024 * 1024)
 #define NOFL_BLOCK_SIZE (64 * 1024)
 #define NOFL_METADATA_BYTES_PER_BLOCK (NOFL_BLOCK_SIZE / NOFL_GRANULE_SIZE)
@@ -166,6 +168,7 @@ struct nofl_space {
   struct nofl_block_list promoted;
   struct nofl_block_list old;
   struct nofl_block_list evacuation_targets;
+  struct nofl_holeset holes;
   pthread_mutex_t lock;
   double evacuation_minimum_reserve;
   double evacuation_reserve;
@@ -183,6 +186,7 @@ struct nofl_allocator {
   uintptr_t alloc;
   uintptr_t sweep;
   struct nofl_block_ref block;
+  struct nofl_holeset holes;
 };
 
 #if GC_CONSERVATIVE_TRACE && GC_CONCURRENT_TRACE
@@ -610,6 +614,7 @@ static void
 nofl_allocator_reset(struct nofl_allocator *alloc) {
   alloc->alloc = alloc->sweep = 0;
   alloc->block = nofl_block_null();
+  nofl_holeset_clear(&alloc->holes);
 }
 
 static int
@@ -738,6 +743,7 @@ static void
 nofl_allocator_finish_hole(struct nofl_allocator *alloc) {
   size_t granules = (alloc->sweep - alloc->alloc) / NOFL_GRANULE_SIZE;
   if (granules) {
+    nofl_holeset_push_local(&alloc->holes, alloc->alloc, granules);
     alloc->block.summary->holes_with_fragmentation++;
     alloc->block.summary->fragmentation_granules += granules;
     alloc->alloc = alloc->sweep;
@@ -839,6 +845,7 @@ static void
 nofl_allocator_finish(struct nofl_allocator *alloc, struct nofl_space *space) {
   if (nofl_allocator_has_block(alloc))
     nofl_allocator_release_block(alloc, space);
+  nofl_holeset_release(&alloc->holes, &space->holes, &space->lock);
 }
 
 static int
@@ -936,6 +943,49 @@ nofl_allocator_next_hole(struct nofl_allocator *alloc,
   }
 }
 
+static inline struct gc_ref
+nofl_allocate_bump_pointer(struct nofl_allocator *alloc, size_t size,
+                           enum gc_allocation_kind kind) {
+  GC_ASSERT(size <= alloc->sweep - alloc->alloc);
+  struct gc_ref ret = gc_ref(alloc->alloc);
+  alloc->alloc += size;
+  gc_update_alloc_table(ret, size, kind);
+  return ret;
+}
+
+static struct gc_ref
+nofl_allocate_second_chance(struct nofl_allocator *alloc,
+                            struct nofl_space *space, size_t granules) {
+  struct gc_ref local = nofl_holeset_try_pop(&alloc->holes, granules);
+  if (!gc_ref_is_null(local))
+    return local;
+  if (nofl_holeset_acquire(&alloc->holes, &space->holes, &space->lock,
+                           granules))
+    return nofl_holeset_try_pop(&alloc->holes, granules);
+  return gc_ref_null();
+}
+
+static struct gc_ref
+nofl_allocate_slow(struct nofl_allocator *alloc, struct nofl_space *space,
+                   size_t size, enum gc_allocation_kind kind) {
+  size_t granules = size >> NOFL_GRANULE_SIZE_LOG_2;
+
+  if (nofl_allocator_next_hole_in_block_of_size(alloc, space, granules))
+    return nofl_allocate_bump_pointer(alloc, size, kind);
+
+  struct gc_ref second_chance =
+    nofl_allocate_second_chance(alloc, space, granules);
+  if (!gc_ref_is_null(second_chance)) {
+    gc_update_alloc_table(second_chance, size, kind);
+    return second_chance;
+  }
+
+  if (nofl_allocator_next_hole(alloc, space, granules))
+    return nofl_allocate_bump_pointer(alloc, size, kind);
+
+  return gc_ref_null();
+}
+
 static struct gc_ref
 nofl_allocate(struct nofl_allocator *alloc, struct nofl_space *space,
               size_t size, enum gc_allocation_kind kind) {
@@ -943,16 +993,10 @@ nofl_allocate(struct nofl_allocator *alloc, struct 
nofl_space *space,
   GC_ASSERT(size <= gc_allocator_large_threshold());
   size = align_up(size, NOFL_GRANULE_SIZE);
 
-  if (alloc->alloc + size > alloc->sweep) {
-    size_t granules = size >> NOFL_GRANULE_SIZE_LOG_2;
-    if (!nofl_allocator_next_hole(alloc, space, granules))
-      return gc_ref_null();
-  }
+  if (alloc->alloc + size <= alloc->sweep)
+    return nofl_allocate_bump_pointer(alloc, size, kind);
 
-  struct gc_ref ret = gc_ref(alloc->alloc);
-  alloc->alloc += size;
-  gc_update_alloc_table(ret, size, kind);
-  return ret;
+  return nofl_allocate_slow(alloc, space, size, kind);
 }
 
 static struct gc_ref
@@ -1538,6 +1582,7 @@ nofl_space_verify_before_restart(struct nofl_space 
*space) {
 static void
 nofl_space_finish_gc(struct nofl_space *space,
                      enum gc_collection_kind gc_kind) {
+  nofl_holeset_clear(&space->holes);
   space->last_collection_was_minor = (gc_kind == GC_COLLECTION_MINOR);
   struct gc_lock lock = nofl_space_lock(space);
   if (space->evacuating)

Reply via email to