wingo pushed a commit to branch wip-whippet in repository guile. commit 0ba9f68621a8428a76f99f1d7be340a84d0e6325 Author: Andy Wingo <wi...@igalia.com> AuthorDate: Mon Aug 4 21:33:25 2025 +0200
Collect fragmentation into freelists The idea is to use fragmentation instead of wasting it. Unclear if it's a win though; in the microbenchmarks it's very inconclusive. --- src/nofl-holeset.h | 191 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/nofl-space.h | 63 +++++++++++++++--- 2 files changed, 245 insertions(+), 9 deletions(-) diff --git a/src/nofl-holeset.h b/src/nofl-holeset.h new file mode 100644 index 000000000..6a3cfda9f --- /dev/null +++ b/src/nofl-holeset.h @@ -0,0 +1,191 @@ +#ifndef NOFL_HOLESET_H +#define NOFL_HOLESET_H + +#include <math.h> +#include <pthread.h> + +#include "gc-attrs.h" +#include "gc-align.h" +#include "gc-ref.h" +#include "gc-inline.h" +#include "gc-lock.h" + +struct nofl_hole { + struct nofl_hole *next; + struct nofl_hole *next_chain; +}; + +struct nofl_hole_with_size { + struct nofl_hole entry; + size_t granules; +}; + +struct nofl_holeset { + uint64_t nonempty; + struct nofl_hole *buckets[64]; +}; + +#define NOFL_HOLESET_PACKED_GRANULES 50 +#define NOFL_HOLESET_SPARSE_COUNT (64 - NOFL_HOLESET_PACKED_GRANULES - 1) +#define NOFL_HOLESET_MAX_GRANULES 512 + +static inline void +nofl_hole_push(struct nofl_hole **loc, struct nofl_hole *hole) { + GC_ASSERT_EQ(hole->next, NULL); + GC_ASSERT_EQ(hole->next_chain, NULL); + hole->next = *loc; + *loc = hole; +} + +static inline void* +nofl_hole_pop(struct nofl_hole **loc) { + GC_ASSERT(*loc); + struct nofl_hole *hole = *loc; + GC_ASSERT_EQ(hole->next_chain, NULL); + *loc = hole->next; + hole->next = NULL; + return hole; +} + +static inline size_t +nofl_holeset_sparse_bucket(size_t granules, int for_insert) { + GC_ASSERT(granules >= NOFL_HOLESET_PACKED_GRANULES); + size_t idx = NOFL_HOLESET_PACKED_GRANULES; + size_t num = granules - NOFL_HOLESET_PACKED_GRANULES; + size_t denom = NOFL_HOLESET_MAX_GRANULES - NOFL_HOLESET_PACKED_GRANULES; + double frac = (double) (num * NOFL_HOLESET_SPARSE_COUNT) / denom; + idx += fmin(for_insert ? floor(frac) : ceil(frac), + NOFL_HOLESET_SPARSE_COUNT); + GC_ASSERT(idx < 64); + return idx; +} + +static inline size_t +nofl_holeset_bucket_for_lookup(size_t granules) { + return granules <= NOFL_HOLESET_PACKED_GRANULES + ? granules - 1 + : nofl_holeset_sparse_bucket(granules, 0); +} + +static inline void +nofl_holeset_push_local(struct nofl_holeset *local, uintptr_t addr, + size_t granules) { + size_t idx; + struct nofl_hole *hole = (struct nofl_hole *) addr; + if (granules <= NOFL_HOLESET_PACKED_GRANULES) { + GC_ASSERT(granules > 0); + idx = granules - 1; + } else { + struct nofl_hole_with_size *hole_with_size = + (struct nofl_hole_with_size *) hole; + GC_ASSERT_EQ(hole_with_size->granules, 0); + hole_with_size->granules = granules; + idx = nofl_holeset_sparse_bucket(granules, 1); + } + + nofl_hole_push(&local->buckets[idx], hole); + local->nonempty |= ((uint64_t) 1) << idx; +} + +static inline uintptr_t +nofl_hole_tail_address(void *hole, size_t granules) +{ + return ((uintptr_t) hole) + granules * NOFL_GRANULE_SIZE; +} + +static inline struct gc_ref +nofl_holeset_try_pop(struct nofl_holeset *holes, size_t granules) { + GC_ASSERT(granules <= NOFL_HOLESET_MAX_GRANULES); + size_t min_idx = nofl_holeset_bucket_for_lookup(granules); + + uint64_t candidates = holes->nonempty >> min_idx; + if (!candidates) + return gc_ref_null(); + + size_t idx = min_idx + __builtin_ctzll(candidates); + void *ret = nofl_hole_pop(&holes->buckets[idx]); + + GC_ASSERT(holes->nonempty & (((uint64_t)1) << idx)); + if (!holes->buckets[idx]) + holes->nonempty ^= ((uint64_t)1) << idx; + + size_t hole_granules; + if (idx < NOFL_HOLESET_PACKED_GRANULES) { + hole_granules = idx + 1; + } else { + struct nofl_hole_with_size *hole_with_size = ret; + hole_granules = hole_with_size->granules; + hole_with_size->granules = 0; + } + + GC_ASSERT(hole_granules >= granules); + size_t tail_granules = hole_granules - granules; + if (tail_granules) + nofl_holeset_push_local(holes, nofl_hole_tail_address(ret, granules), + tail_granules); + + return gc_ref_from_heap_object(ret); +} + +static inline void +nofl_holeset_release(struct nofl_holeset *local, + struct nofl_holeset *remote, + pthread_mutex_t *lock) { + uint64_t nonempty = local->nonempty; + if (!nonempty) + return; + + local->nonempty = 0; + + pthread_mutex_lock(lock); + remote->nonempty |= nonempty; + do { + uint64_t idx = __builtin_ctzll(nonempty); + struct nofl_hole *hole = local->buckets[idx]; + local->buckets[idx] = NULL; + hole->next_chain = remote->buckets[idx]; + remote->buckets[idx] = hole; + nonempty ^= ((uint64_t)1) << idx; + } while (nonempty); + pthread_mutex_unlock(lock); +} + +static inline int +nofl_holeset_acquire(struct nofl_holeset *local, + struct nofl_holeset *remote, + pthread_mutex_t *lock, + size_t granules) { + size_t min_idx = nofl_holeset_bucket_for_lookup(granules); + + uint64_t sloppy_nonempty = + atomic_load_explicit(&remote->nonempty, memory_order_relaxed); + if (!(sloppy_nonempty >> min_idx)) + return 0; + + pthread_mutex_lock(lock); + uint64_t nonempty = remote->nonempty >> min_idx; + if (!nonempty) { + pthread_mutex_unlock(lock); + return 0; + } + uint64_t idx = min_idx + __builtin_ctzll(nonempty); + GC_ASSERT(!local->buckets[idx]); + struct nofl_hole *chain = remote->buckets[idx]; + remote->buckets[idx] = chain->next_chain; + if (!chain->next_chain) + remote->nonempty ^= ((uint64_t)1) << idx; + pthread_mutex_unlock(lock); + + local->buckets[idx] = chain; + local->nonempty |= ((uint64_t)1) << idx; + chain->next_chain = NULL; + return 1; +} + +static inline void +nofl_holeset_clear(struct nofl_holeset *holes) +{ + memset(holes, 0, sizeof (*holes)); +} + +#endif // NOFL_HOLESET_H diff --git a/src/nofl-space.h b/src/nofl-space.h index 496d1c595..a623acd8e 100644 --- a/src/nofl-space.h +++ b/src/nofl-space.h @@ -38,6 +38,8 @@ STATIC_ASSERT_EQ(NOFL_GRANULE_SIZE, 1 << NOFL_GRANULE_SIZE_LOG_2); STATIC_ASSERT_EQ(NOFL_MEDIUM_OBJECT_THRESHOLD, NOFL_MEDIUM_OBJECT_GRANULE_THRESHOLD * NOFL_GRANULE_SIZE); +#include "nofl-holeset.h" + #define NOFL_SLAB_SIZE (4 * 1024 * 1024) #define NOFL_BLOCK_SIZE (64 * 1024) #define NOFL_METADATA_BYTES_PER_BLOCK (NOFL_BLOCK_SIZE / NOFL_GRANULE_SIZE) @@ -166,6 +168,7 @@ struct nofl_space { struct nofl_block_list promoted; struct nofl_block_list old; struct nofl_block_list evacuation_targets; + struct nofl_holeset holes; pthread_mutex_t lock; double evacuation_minimum_reserve; double evacuation_reserve; @@ -183,6 +186,7 @@ struct nofl_allocator { uintptr_t alloc; uintptr_t sweep; struct nofl_block_ref block; + struct nofl_holeset holes; }; #if GC_CONSERVATIVE_TRACE && GC_CONCURRENT_TRACE @@ -610,6 +614,7 @@ static void nofl_allocator_reset(struct nofl_allocator *alloc) { alloc->alloc = alloc->sweep = 0; alloc->block = nofl_block_null(); + nofl_holeset_clear(&alloc->holes); } static int @@ -738,6 +743,7 @@ static void nofl_allocator_finish_hole(struct nofl_allocator *alloc) { size_t granules = (alloc->sweep - alloc->alloc) / NOFL_GRANULE_SIZE; if (granules) { + nofl_holeset_push_local(&alloc->holes, alloc->alloc, granules); alloc->block.summary->holes_with_fragmentation++; alloc->block.summary->fragmentation_granules += granules; alloc->alloc = alloc->sweep; @@ -839,6 +845,7 @@ static void nofl_allocator_finish(struct nofl_allocator *alloc, struct nofl_space *space) { if (nofl_allocator_has_block(alloc)) nofl_allocator_release_block(alloc, space); + nofl_holeset_release(&alloc->holes, &space->holes, &space->lock); } static int @@ -936,6 +943,49 @@ nofl_allocator_next_hole(struct nofl_allocator *alloc, } } +static inline struct gc_ref +nofl_allocate_bump_pointer(struct nofl_allocator *alloc, size_t size, + enum gc_allocation_kind kind) { + GC_ASSERT(size <= alloc->sweep - alloc->alloc); + struct gc_ref ret = gc_ref(alloc->alloc); + alloc->alloc += size; + gc_update_alloc_table(ret, size, kind); + return ret; +} + +static struct gc_ref +nofl_allocate_second_chance(struct nofl_allocator *alloc, + struct nofl_space *space, size_t granules) { + struct gc_ref local = nofl_holeset_try_pop(&alloc->holes, granules); + if (!gc_ref_is_null(local)) + return local; + if (nofl_holeset_acquire(&alloc->holes, &space->holes, &space->lock, + granules)) + return nofl_holeset_try_pop(&alloc->holes, granules); + return gc_ref_null(); +} + +static struct gc_ref +nofl_allocate_slow(struct nofl_allocator *alloc, struct nofl_space *space, + size_t size, enum gc_allocation_kind kind) { + size_t granules = size >> NOFL_GRANULE_SIZE_LOG_2; + + if (nofl_allocator_next_hole_in_block_of_size(alloc, space, granules)) + return nofl_allocate_bump_pointer(alloc, size, kind); + + struct gc_ref second_chance = + nofl_allocate_second_chance(alloc, space, granules); + if (!gc_ref_is_null(second_chance)) { + gc_update_alloc_table(second_chance, size, kind); + return second_chance; + } + + if (nofl_allocator_next_hole(alloc, space, granules)) + return nofl_allocate_bump_pointer(alloc, size, kind); + + return gc_ref_null(); +} + static struct gc_ref nofl_allocate(struct nofl_allocator *alloc, struct nofl_space *space, size_t size, enum gc_allocation_kind kind) { @@ -943,16 +993,10 @@ nofl_allocate(struct nofl_allocator *alloc, struct nofl_space *space, GC_ASSERT(size <= gc_allocator_large_threshold()); size = align_up(size, NOFL_GRANULE_SIZE); - if (alloc->alloc + size > alloc->sweep) { - size_t granules = size >> NOFL_GRANULE_SIZE_LOG_2; - if (!nofl_allocator_next_hole(alloc, space, granules)) - return gc_ref_null(); - } + if (alloc->alloc + size <= alloc->sweep) + return nofl_allocate_bump_pointer(alloc, size, kind); - struct gc_ref ret = gc_ref(alloc->alloc); - alloc->alloc += size; - gc_update_alloc_table(ret, size, kind); - return ret; + return nofl_allocate_slow(alloc, space, size, kind); } static struct gc_ref @@ -1538,6 +1582,7 @@ nofl_space_verify_before_restart(struct nofl_space *space) { static void nofl_space_finish_gc(struct nofl_space *space, enum gc_collection_kind gc_kind) { + nofl_holeset_clear(&space->holes); space->last_collection_was_minor = (gc_kind == GC_COLLECTION_MINOR); struct gc_lock lock = nofl_space_lock(space); if (space->evacuating)