wingo pushed a commit to branch wip-whippet in repository guile. commit b663e5878e031ac6c4ba25bdd4779543f2f32075 Author: Andy Wingo <wi...@igalia.com> AuthorDate: Tue Aug 20 14:33:56 2024 +0200
nofl: Refactor evacuation allocation to be thread-local This relaxes a "reliability" requirement, as in https://wingolog.org/archives/2024/07/10/copying-collectors-with-block-structured-heaps-are-unreliable. --- src/nofl-space.h | 1605 ++++++++++++++++++++++++++---------------------------- src/whippet.c | 63 ++- 2 files changed, 815 insertions(+), 853 deletions(-) diff --git a/src/nofl-space.h b/src/nofl-space.h index fd718c962..eba3cd386 100644 --- a/src/nofl-space.h +++ b/src/nofl-space.h @@ -34,55 +34,6 @@ STATIC_ASSERT_EQ(NOFL_GRANULE_SIZE, 1 << NOFL_GRANULE_SIZE_LOG_2); STATIC_ASSERT_EQ(NOFL_MEDIUM_OBJECT_THRESHOLD, NOFL_MEDIUM_OBJECT_GRANULE_THRESHOLD * NOFL_GRANULE_SIZE); -// Each granule has one mark byte stored in a side table. A granule's -// mark state is a whole byte instead of a bit to facilitate parallel -// marking. (Parallel markers are allowed to race.) We also use this -// byte to compute object extent, via a bit flag indicating -// end-of-object. -// -// Because we want to allow for conservative roots, we need to know -// whether an address indicates an object or not. That means that when -// an object is allocated, it has to set a bit, somewhere. We use the -// metadata byte for this purpose, setting the "young" bit. -// -// The "young" bit's name might make you think about generational -// collection, and indeed all objects collected in a minor collection -// will have this bit set. However, the nofl space never needs to check -// for the young bit; if it weren't for the need to identify -// conservative roots, we wouldn't need a young bit at all. Perhaps in -// an all-precise system, we would be able to avoid the overhead of -// initializing mark byte upon each fresh allocation. -// -// When an object becomes dead after a GC, it will still have a bit set -// -- maybe the young bit, or maybe a survivor bit. The sweeper has to -// clear these bits before the next collection. But, for concurrent -// marking, we will also be marking "live" objects, updating their mark -// bits. So there are four object states concurrently observable: -// young, dead, survivor, and marked. (If we didn't have concurrent -// marking we would still need the "marked" state, because marking -// mutator roots before stopping is also a form of concurrent marking.) -// Even though these states are mutually exclusive, we use separate bits -// for them because we have the space. After each collection, the dead, -// survivor, and marked states rotate by one bit. -enum nofl_metadata_byte { - NOFL_METADATA_BYTE_NONE = 0, - NOFL_METADATA_BYTE_YOUNG = 1, - NOFL_METADATA_BYTE_MARK_0 = 2, - NOFL_METADATA_BYTE_MARK_1 = 4, - NOFL_METADATA_BYTE_MARK_2 = 8, - NOFL_METADATA_BYTE_END = 16, - NOFL_METADATA_BYTE_EPHEMERON = 32, - NOFL_METADATA_BYTE_PINNED = 64, - NOFL_METADATA_BYTE_UNUSED_1 = 128 -}; - -static uint8_t -nofl_rotate_dead_survivor_marked(uint8_t mask) { - uint8_t all = - NOFL_METADATA_BYTE_MARK_0 | NOFL_METADATA_BYTE_MARK_1 | NOFL_METADATA_BYTE_MARK_2; - return ((mask << 1) | (mask >> 2)) & all; -} - #define NOFL_SLAB_SIZE (4 * 1024 * 1024) #define NOFL_BLOCK_SIZE (64 * 1024) #define NOFL_METADATA_BYTES_PER_BLOCK (NOFL_BLOCK_SIZE / NOFL_GRANULE_SIZE) @@ -144,7 +95,9 @@ struct nofl_block_summary { uint16_t hole_count; uint16_t free_granules; // Counters related to allocation since previous collection: - // wasted space due to fragmentation. + // wasted space due to fragmentation. Also used by blocks on the + // "partly full" list, which have zero holes_with_fragmentation + // but nonzero fragmentation_granules. uint16_t holes_with_fragmentation; uint16_t fragmentation_granules; // After a block is swept, if it's empty it goes on the empties @@ -173,6 +126,91 @@ struct nofl_slab { }; STATIC_ASSERT_EQ(sizeof(struct nofl_slab), NOFL_SLAB_SIZE); +// Lock-free block list. +struct nofl_block_list { + size_t count; + uintptr_t blocks; +}; + +struct nofl_space { + uint64_t sweep_mask; + uint8_t live_mask; + uint8_t marked_mask; + uint8_t evacuating; + uintptr_t low_addr; + size_t extent; + size_t heap_size; + uint8_t last_collection_was_minor; + uintptr_t next_block; // atomically + struct nofl_block_list empty; + struct nofl_block_list unavailable; + struct nofl_block_list partly_full; + struct nofl_block_list evacuation_targets; + double evacuation_minimum_reserve; + double evacuation_reserve; + double venerable_threshold; + ssize_t pending_unavailable_bytes; // atomically + struct nofl_slab *slabs; + size_t nslabs; + uintptr_t granules_freed_by_last_collection; // atomically + uintptr_t fragmentation_granules_since_last_collection; // atomically +}; + +struct nofl_allocator { + uintptr_t alloc; + uintptr_t sweep; + uintptr_t block; +}; + +// Each granule has one mark byte stored in a side table. A granule's +// mark state is a whole byte instead of a bit to facilitate parallel +// marking. (Parallel markers are allowed to race.) We also use this +// byte to compute object extent, via a bit flag indicating +// end-of-object. +// +// Because we want to allow for conservative roots, we need to know +// whether an address indicates an object or not. That means that when +// an object is allocated, it has to set a bit, somewhere. We use the +// metadata byte for this purpose, setting the "young" bit. +// +// The "young" bit's name might make you think about generational +// collection, and indeed all objects collected in a minor collection +// will have this bit set. However, the nofl space never needs to check +// for the young bit; if it weren't for the need to identify +// conservative roots, we wouldn't need a young bit at all. Perhaps in +// an all-precise system, we would be able to avoid the overhead of +// initializing mark byte upon each fresh allocation. +// +// When an object becomes dead after a GC, it will still have a bit set +// -- maybe the young bit, or maybe a survivor bit. The sweeper has to +// clear these bits before the next collection. But, for concurrent +// marking, we will also be marking "live" objects, updating their mark +// bits. So there are four object states concurrently observable: +// young, dead, survivor, and marked. (If we didn't have concurrent +// marking we would still need the "marked" state, because marking +// mutator roots before stopping is also a form of concurrent marking.) +// Even though these states are mutually exclusive, we use separate bits +// for them because we have the space. After each collection, the dead, +// survivor, and marked states rotate by one bit. +enum nofl_metadata_byte { + NOFL_METADATA_BYTE_NONE = 0, + NOFL_METADATA_BYTE_YOUNG = 1, + NOFL_METADATA_BYTE_MARK_0 = 2, + NOFL_METADATA_BYTE_MARK_1 = 4, + NOFL_METADATA_BYTE_MARK_2 = 8, + NOFL_METADATA_BYTE_END = 16, + NOFL_METADATA_BYTE_EPHEMERON = 32, + NOFL_METADATA_BYTE_PINNED = 64, + NOFL_METADATA_BYTE_UNUSED_1 = 128 +}; + +static uint8_t +nofl_rotate_dead_survivor_marked(uint8_t mask) { + uint8_t all = + NOFL_METADATA_BYTE_MARK_0 | NOFL_METADATA_BYTE_MARK_1 | NOFL_METADATA_BYTE_MARK_2; + return ((mask << 1) | (mask >> 2)) & all; +} + static struct nofl_slab* nofl_object_slab(void *obj) { uintptr_t addr = (uintptr_t) obj; @@ -235,12 +273,6 @@ nofl_block_summary_set_next(struct nofl_block_summary *summary, (summary->next_and_flags & (NOFL_BLOCK_SIZE - 1)) | next; } -// Lock-free block list. -struct nofl_block_list { - size_t count; - uintptr_t blocks; -}; - static void nofl_push_block(struct nofl_block_list *list, uintptr_t block) { atomic_fetch_add_explicit(&list->count, 1, memory_order_acq_rel); @@ -267,85 +299,81 @@ nofl_pop_block(struct nofl_block_list *list) { return head; } -static inline size_t -nofl_size_to_granules(size_t size) { - return (size + NOFL_GRANULE_SIZE - 1) >> NOFL_GRANULE_SIZE_LOG_2; +static size_t +nofl_block_count(struct nofl_block_list *list) { + return atomic_load_explicit(&list->count, memory_order_acquire); } -struct nofl_evacuation_allocator { - size_t allocated; // atomically - size_t limit; - uintptr_t block_cursor; // atomically -}; - -struct nofl_space { - uint64_t sweep_mask; - uint8_t live_mask; - uint8_t marked_mask; - uint8_t evacuating; - uintptr_t low_addr; - size_t extent; - size_t heap_size; - uint8_t last_collection_was_minor; - uintptr_t next_block; // atomically - struct nofl_block_list empty; - struct nofl_block_list unavailable; - struct nofl_block_list evacuation_targets; - double evacuation_minimum_reserve; - double evacuation_reserve; - double venerable_threshold; - ssize_t pending_unavailable_bytes; // atomically - struct nofl_evacuation_allocator evacuation_allocator; - struct nofl_slab *slabs; - size_t nslabs; - uintptr_t granules_freed_by_last_collection; // atomically - uintptr_t fragmentation_granules_since_last_collection; // atomically -}; - -struct nofl_allocator { - uintptr_t alloc; - uintptr_t sweep; - uintptr_t block; -}; +static void +nofl_push_unavailable_block(struct nofl_space *space, uintptr_t block) { + struct nofl_block_summary *summary = nofl_block_summary_for_addr(block); + GC_ASSERT(!nofl_block_summary_has_flag(summary, NOFL_BLOCK_NEEDS_SWEEP)); + GC_ASSERT(!nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE)); + nofl_block_summary_set_flag(summary, NOFL_BLOCK_UNAVAILABLE); + madvise((void*)block, NOFL_BLOCK_SIZE, MADV_DONTNEED); + nofl_push_block(&space->unavailable, block); +} -static inline void -nofl_clear_memory(uintptr_t addr, size_t size) { - memset((char*)addr, 0, size); +static uintptr_t +nofl_pop_unavailable_block(struct nofl_space *space) { + uintptr_t block = nofl_pop_block(&space->unavailable); + if (!block) + return 0; + struct nofl_block_summary *summary = nofl_block_summary_for_addr(block); + GC_ASSERT(nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE)); + nofl_block_summary_clear_flag(summary, NOFL_BLOCK_UNAVAILABLE); + return block; } -static size_t -nofl_space_live_object_granules(uint8_t *metadata) { - return scan_for_byte(metadata, -1, broadcast_byte(NOFL_METADATA_BYTE_END)) + 1; +static uintptr_t +nofl_pop_empty_block(struct nofl_space *space) { + return nofl_pop_block(&space->empty); } -static inline int -nofl_space_mark_object(struct nofl_space *space, struct gc_ref ref) { - uint8_t *loc = nofl_metadata_byte_for_object(ref); - uint8_t byte = *loc; - if (byte & space->marked_mask) +static int +nofl_maybe_push_evacuation_target(struct nofl_space *space, + uintptr_t block, double reserve) { + GC_ASSERT(!nofl_block_summary_has_flag(nofl_block_summary_for_addr(block), + NOFL_BLOCK_NEEDS_SWEEP)); + size_t targets = nofl_block_count(&space->evacuation_targets); + size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB; + size_t unavailable = nofl_block_count(&space->unavailable); + if (targets >= (total - unavailable) * reserve) return 0; - uint8_t mask = NOFL_METADATA_BYTE_YOUNG | NOFL_METADATA_BYTE_MARK_0 - | NOFL_METADATA_BYTE_MARK_1 | NOFL_METADATA_BYTE_MARK_2; - *loc = (byte & ~mask) | space->marked_mask; + + nofl_push_block(&space->evacuation_targets, block); return 1; } -static uintptr_t -nofl_make_evacuation_allocator_cursor(uintptr_t block, size_t allocated) { - GC_ASSERT(allocated < (NOFL_BLOCK_SIZE - 1) * (uint64_t) NOFL_BLOCK_SIZE); - return align_down(block, NOFL_BLOCK_SIZE) | (allocated / NOFL_BLOCK_SIZE); +static int +nofl_push_evacuation_target_if_needed(struct nofl_space *space, + uintptr_t block) { + return nofl_maybe_push_evacuation_target(space, block, + space->evacuation_minimum_reserve); +} + +static int +nofl_push_evacuation_target_if_possible(struct nofl_space *space, + uintptr_t block) { + return nofl_maybe_push_evacuation_target(space, block, + space->evacuation_reserve); } static void -nofl_prepare_evacuation_allocator(struct nofl_evacuation_allocator *alloc, - struct nofl_block_list *targets) { - uintptr_t first_block = targets->blocks; - atomic_store_explicit(&alloc->allocated, 0, memory_order_release); - alloc->limit = - atomic_load_explicit(&targets->count, memory_order_acquire) * NOFL_BLOCK_SIZE; - atomic_store_explicit(&alloc->block_cursor, - nofl_make_evacuation_allocator_cursor(first_block, 0), - memory_order_release); +nofl_push_empty_block(struct nofl_space *space, uintptr_t block) { + GC_ASSERT(!nofl_block_summary_has_flag(nofl_block_summary_for_addr(block), + NOFL_BLOCK_NEEDS_SWEEP)); + nofl_push_block(&space->empty, block); +} + +static inline void +nofl_clear_memory(uintptr_t addr, size_t size) { + memset((char*)addr, 0, size); +} + +static size_t +nofl_space_live_object_granules(uint8_t *metadata) { + return scan_for_byte(metadata, -1, broadcast_byte(NOFL_METADATA_BYTE_END)) + 1; } static void @@ -360,440 +388,408 @@ nofl_clear_remaining_metadata_bytes_in_block(uintptr_t block, } static void -nofl_finish_evacuation_allocator_block(uintptr_t block, - uintptr_t allocated) { - GC_ASSERT(allocated <= NOFL_BLOCK_SIZE); - struct nofl_block_summary *summary = nofl_block_summary_for_addr(block); - nofl_block_summary_set_flag(summary, NOFL_BLOCK_NEEDS_SWEEP); - size_t fragmentation = (NOFL_BLOCK_SIZE - allocated) >> NOFL_GRANULE_SIZE_LOG_2; - summary->hole_count = 1; - summary->free_granules = NOFL_GRANULES_PER_BLOCK; - summary->holes_with_fragmentation = fragmentation ? 1 : 0; - summary->fragmentation_granules = fragmentation; - if (fragmentation) - nofl_clear_remaining_metadata_bytes_in_block(block, allocated); +nofl_allocator_reset(struct nofl_allocator *alloc) { + alloc->alloc = alloc->sweep = alloc->block = 0; } static void -nofl_finish_evacuation_allocator(struct nofl_evacuation_allocator *alloc, - struct nofl_block_list *targets, - struct nofl_block_list *empties, - size_t reserve) { - // Blocks that we used for evacuation get returned to the mutator as - // sweepable blocks. Blocks that we didn't get to use go to the - // empties. - size_t allocated = atomic_load_explicit(&alloc->allocated, - memory_order_acquire); - atomic_store_explicit(&alloc->allocated, 0, memory_order_release); - if (allocated > alloc->limit) - allocated = alloc->limit; - while (allocated >= NOFL_BLOCK_SIZE) { - uintptr_t block = nofl_pop_block(targets); - GC_ASSERT(block); - allocated -= NOFL_BLOCK_SIZE; - } - if (allocated) { - // Finish off the last partially-filled block. - uintptr_t block = nofl_pop_block(targets); - GC_ASSERT(block); - nofl_finish_evacuation_allocator_block(block, allocated); - } - size_t remaining = atomic_load_explicit(&targets->count, memory_order_acquire); - while (remaining-- > reserve) - nofl_push_block(empties, nofl_pop_block(targets)); -} +nofl_allocator_release_full_block(struct nofl_allocator *alloc, + struct nofl_space *space, + struct nofl_block_summary *summary) { + GC_ASSERT(alloc->block); + GC_ASSERT(alloc->alloc == alloc->sweep); + GC_ASSERT(!nofl_block_summary_has_flag(summary, NOFL_BLOCK_VENERABLE)); -static struct gc_ref -nofl_evacuation_allocate(struct nofl_space *space, size_t granules) { - // All collector threads compete to allocate from what is logically a - // single bump-pointer arena, which is actually composed of a linked - // list of blocks. - struct nofl_evacuation_allocator *alloc = &space->evacuation_allocator; - uintptr_t cursor = atomic_load_explicit(&alloc->block_cursor, - memory_order_acquire); - size_t bytes = granules * NOFL_GRANULE_SIZE; - size_t prev = atomic_load_explicit(&alloc->allocated, memory_order_acquire); - size_t block_mask = (NOFL_BLOCK_SIZE - 1); - size_t next; - do { - if (prev >= alloc->limit) - // No more space. - return gc_ref_null(); - next = prev + bytes; - if ((prev ^ next) & ~block_mask) - // Allocation straddles a block boundary; advance so it starts a - // fresh block. - next = (next & ~block_mask) + bytes; - } while (!atomic_compare_exchange_weak(&alloc->allocated, &prev, next)); - // OK, we've claimed our memory, starting at next - bytes. Now find - // the node in the linked list of evacuation targets that corresponds - // to this allocation pointer. - uintptr_t block = cursor & ~block_mask; - // This is the SEQ'th block to be allocated into. - uintptr_t seq = cursor & block_mask; - // Therefore this block handles allocations starting at SEQ*BLOCK_SIZE - // and continuing for NOFL_BLOCK_SIZE bytes. - uintptr_t base = seq * NOFL_BLOCK_SIZE; - - while ((base ^ next) & ~block_mask) { - GC_ASSERT(base < next); - if (base + NOFL_BLOCK_SIZE > prev) { - // The allocation straddles a block boundary, and the cursor has - // caught up so that we identify the block for the previous - // allocation pointer. Finish the previous block, probably - // leaving a small hole at the end. - nofl_finish_evacuation_allocator_block(block, prev - base); - } - // Cursor lags; advance it. - block = nofl_block_summary_next(nofl_block_summary_for_addr(block)); - base += NOFL_BLOCK_SIZE; - if (base >= alloc->limit) { - // Ran out of blocks! - GC_ASSERT(!block); - return gc_ref_null(); - } - GC_ASSERT(block); - // This store can race with other allocators, but that's OK as long - // as it never advances the cursor beyond the allocation pointer, - // which it won't because we updated the allocation pointer already. - atomic_store_explicit(&alloc->block_cursor, - nofl_make_evacuation_allocator_cursor(block, base), - memory_order_release); - } - - uintptr_t addr = block + (next & block_mask) - bytes; - return gc_ref(addr); -} - -static inline int -nofl_space_evacuate_or_mark_object(struct nofl_space *space, - struct gc_edge edge, - struct gc_ref old_ref) { - uint8_t *metadata = nofl_metadata_byte_for_object(old_ref); - uint8_t byte = *metadata; - if (byte & space->marked_mask) - return 0; - if (space->evacuating && - nofl_block_summary_has_flag(nofl_block_summary_for_addr(gc_ref_value(old_ref)), - NOFL_BLOCK_EVACUATE)) { - // This is an evacuating collection, and we are attempting to - // evacuate this block, and we are tracing this particular object - // for what appears to be the first time. - struct gc_atomic_forward fwd = gc_atomic_forward_begin(old_ref); + atomic_fetch_add(&space->granules_freed_by_last_collection, + summary->free_granules); + atomic_fetch_add(&space->fragmentation_granules_since_last_collection, + summary->fragmentation_granules); - if (fwd.state == GC_FORWARDING_STATE_NOT_FORWARDED) - gc_atomic_forward_acquire(&fwd); + // If this block has mostly survivors, we should avoid sweeping it and + // trying to allocate into it for a minor GC. Sweep it next time to + // clear any garbage allocated in this cycle and mark it as + // "venerable" (i.e., old). + if (!nofl_block_summary_has_flag(summary, NOFL_BLOCK_VENERABLE_AFTER_SWEEP) && + summary->free_granules < NOFL_GRANULES_PER_BLOCK * space->venerable_threshold) + nofl_block_summary_set_flag(summary, NOFL_BLOCK_VENERABLE_AFTER_SWEEP); - switch (fwd.state) { - case GC_FORWARDING_STATE_NOT_FORWARDED: - case GC_FORWARDING_STATE_ABORTED: - // Impossible. - GC_CRASH(); - case GC_FORWARDING_STATE_ACQUIRED: { - // We claimed the object successfully; evacuating is up to us. - size_t object_granules = nofl_space_live_object_granules(metadata); - struct gc_ref new_ref = nofl_evacuation_allocate(space, object_granules); - if (gc_ref_is_heap_object(new_ref)) { - // Copy object contents before committing, as we don't know what - // part of the object (if any) will be overwritten by the - // commit. - memcpy(gc_ref_heap_object(new_ref), gc_ref_heap_object(old_ref), - object_granules * NOFL_GRANULE_SIZE); - gc_atomic_forward_commit(&fwd, new_ref); - // Now update extent metadata, and indicate to the caller that - // the object's fields need to be traced. - uint8_t *new_metadata = nofl_metadata_byte_for_object(new_ref); - memcpy(new_metadata + 1, metadata + 1, object_granules - 1); - gc_edge_update(edge, new_ref); - metadata = new_metadata; - // Fall through to set mark bits. - } else { - // Well shucks; allocation failed, marking the end of - // opportunistic evacuation. No future evacuation of this - // object will succeed. Mark in place instead. - gc_atomic_forward_abort(&fwd); - } - break; - } - case GC_FORWARDING_STATE_BUSY: - // Someone else claimed this object first. Spin until new address - // known, or evacuation aborts. - for (size_t spin_count = 0;; spin_count++) { - if (gc_atomic_forward_retry_busy(&fwd)) - break; - yield_for_spin(spin_count); - } - if (fwd.state == GC_FORWARDING_STATE_ABORTED) - // Remove evacuation aborted; remote will mark and enqueue. - return 0; - ASSERT(fwd.state == GC_FORWARDING_STATE_FORWARDED); - // Fall through. - case GC_FORWARDING_STATE_FORWARDED: - // The object has been evacuated already. Update the edge; - // whoever forwarded the object will make sure it's eventually - // traced. - gc_edge_update(edge, gc_ref(gc_atomic_forward_address(&fwd))); - return 0; - } - } + nofl_allocator_reset(alloc); +} - uint8_t mask = NOFL_METADATA_BYTE_YOUNG | NOFL_METADATA_BYTE_MARK_0 - | NOFL_METADATA_BYTE_MARK_1 | NOFL_METADATA_BYTE_MARK_2; - *metadata = (byte & ~mask) | space->marked_mask; - return 1; +static void +nofl_allocator_release_partly_full_block(struct nofl_allocator *alloc, + struct nofl_space *space, + struct nofl_block_summary *summary) { + // A block can go on the partly full list if it has exactly one + // hole, located at the end of the block. + GC_ASSERT(alloc->alloc > alloc->block); + GC_ASSERT(alloc->sweep == alloc->block + NOFL_BLOCK_SIZE); + GC_ASSERT(nofl_block_summary_has_flag(summary, NOFL_BLOCK_NEEDS_SWEEP)); + size_t hole_size = alloc->sweep - alloc->alloc; + GC_ASSERT(hole_size); + summary->fragmentation_granules = hole_size >> NOFL_GRANULE_SIZE_LOG_2; + nofl_push_block(&space->partly_full, alloc->block); + nofl_allocator_reset(alloc); } -static inline int -nofl_space_contains_address(struct nofl_space *space, uintptr_t addr) { - return addr - space->low_addr < space->extent; +static size_t +nofl_allocator_acquire_partly_full_block(struct nofl_allocator *alloc, + struct nofl_space *space) { + uintptr_t block = nofl_pop_block(&space->partly_full); + if (!block) return 0; + struct nofl_block_summary *summary = nofl_block_summary_for_addr(block); + GC_ASSERT(summary->holes_with_fragmentation == 0); + alloc->block = block; + alloc->sweep = block + NOFL_BLOCK_SIZE; + size_t hole_granules = summary->fragmentation_granules; + summary->fragmentation_granules = 0; + alloc->alloc = alloc->sweep - (hole_granules << NOFL_GRANULE_SIZE_LOG_2); + return hole_granules; } -static inline int -nofl_space_contains_conservative_ref(struct nofl_space *space, - struct gc_conservative_ref ref) { - return nofl_space_contains_address(space, gc_conservative_ref_value(ref)); +static size_t +nofl_allocator_acquire_empty_block(struct nofl_allocator *alloc, + struct nofl_space *space) { + uintptr_t block = nofl_pop_empty_block(space); + if (!block) return 0; + struct nofl_block_summary *summary = nofl_block_summary_for_addr(block); + summary->hole_count = 1; + summary->free_granules = NOFL_GRANULES_PER_BLOCK; + summary->holes_with_fragmentation = 0; + summary->fragmentation_granules = 0; + nofl_block_summary_set_flag(summary, NOFL_BLOCK_NEEDS_SWEEP); + alloc->block = alloc->alloc = block; + alloc->sweep = block + NOFL_BLOCK_SIZE; + nofl_clear_memory(block, NOFL_BLOCK_SIZE); + return NOFL_GRANULES_PER_BLOCK; } -static inline int -nofl_space_contains(struct nofl_space *space, struct gc_ref ref) { - return nofl_space_contains_address(space, gc_ref_value(ref)); +static void +nofl_allocator_finish_hole(struct nofl_allocator *alloc) { + size_t granules = (alloc->sweep - alloc->alloc) / NOFL_GRANULE_SIZE; + if (granules) { + struct nofl_block_summary *summary = nofl_block_summary_for_addr(alloc->block); + summary->holes_with_fragmentation++; + summary->fragmentation_granules += granules; + uint8_t *metadata = nofl_metadata_byte_for_addr(alloc->alloc); + memset(metadata, 0, granules); + alloc->alloc = alloc->sweep; + } } -static int -nofl_space_forward_or_mark_if_traced(struct nofl_space *space, - struct gc_edge edge, - struct gc_ref ref) { - uint8_t *metadata = nofl_metadata_byte_for_object(ref); - uint8_t byte = *metadata; - if (byte & space->marked_mask) - return 1; +// Sweep some heap to reclaim free space, advancing alloc->alloc and +// alloc->sweep. Return the size of the hole in granules, or 0 if we +// reached the end of the block. +static size_t +nofl_allocator_next_hole_in_block(struct nofl_allocator *alloc, + uintptr_t sweep_mask) { + GC_ASSERT(alloc->block != 0); + GC_ASSERT_EQ(alloc->alloc, alloc->sweep); + uintptr_t sweep = alloc->sweep; + uintptr_t limit = alloc->block + NOFL_BLOCK_SIZE; - if (!space->evacuating) - return 0; - if (!nofl_block_summary_has_flag(nofl_block_summary_for_addr(gc_ref_value(ref)), - NOFL_BLOCK_EVACUATE)) + if (sweep == limit) return 0; - struct gc_atomic_forward fwd = gc_atomic_forward_begin(ref); - switch (fwd.state) { - case GC_FORWARDING_STATE_NOT_FORWARDED: - return 0; - case GC_FORWARDING_STATE_BUSY: - // Someone else claimed this object first. Spin until new address - // known, or evacuation aborts. - for (size_t spin_count = 0;; spin_count++) { - if (gc_atomic_forward_retry_busy(&fwd)) - break; - yield_for_spin(spin_count); - } - if (fwd.state == GC_FORWARDING_STATE_ABORTED) - // Remote evacuation aborted; remote will mark and enqueue. - return 1; - ASSERT(fwd.state == GC_FORWARDING_STATE_FORWARDED); - // Fall through. - case GC_FORWARDING_STATE_FORWARDED: - gc_edge_update(edge, gc_ref(gc_atomic_forward_address(&fwd))); - return 1; - default: - GC_CRASH(); + GC_ASSERT((sweep & (NOFL_GRANULE_SIZE - 1)) == 0); + uint8_t* metadata = nofl_metadata_byte_for_addr(sweep); + size_t limit_granules = (limit - sweep) >> NOFL_GRANULE_SIZE_LOG_2; + + // Except for when we first get a block, alloc->sweep is positioned + // right after a hole, which can point to either the end of the + // block or to a live object. Assume that a live object is more + // common. + while (limit_granules && (metadata[0] & sweep_mask)) { + // Object survived collection; skip over it and continue sweeping. + size_t object_granules = nofl_space_live_object_granules(metadata); + sweep += object_granules * NOFL_GRANULE_SIZE; + limit_granules -= object_granules; + metadata += object_granules; } -} - -static inline struct gc_ref -nofl_space_mark_conservative_ref(struct nofl_space *space, - struct gc_conservative_ref ref, - int possibly_interior) { - uintptr_t addr = gc_conservative_ref_value(ref); - - if (possibly_interior) { - addr = align_down(addr, NOFL_GRANULE_SIZE); - } else { - // Addr not an aligned granule? Not an object. - uintptr_t displacement = addr & (NOFL_GRANULE_SIZE - 1); - if (!gc_is_valid_conservative_ref_displacement(displacement)) - return gc_ref_null(); - addr -= displacement; + if (!limit_granules) { + GC_ASSERT_EQ(sweep, limit); + alloc->alloc = alloc->sweep = limit; + return 0; } - // Addr in meta block? Not an object. - if ((addr & (NOFL_SLAB_SIZE - 1)) < NOFL_META_BLOCKS_PER_SLAB * NOFL_BLOCK_SIZE) - return gc_ref_null(); - - // Addr in block that has been paged out? Not an object. - struct nofl_block_summary *summary = nofl_block_summary_for_addr(addr); - if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE)) - return gc_ref_null(); - - uint8_t *loc = nofl_metadata_byte_for_addr(addr); - uint8_t byte = atomic_load_explicit(loc, memory_order_relaxed); - - // Already marked object? Nothing to do. - if (byte & space->marked_mask) - return gc_ref_null(); - - // Addr is the not start of an unmarked object? Search backwards if - // we have interior pointers, otherwise not an object. - uint8_t object_start_mask = space->live_mask | NOFL_METADATA_BYTE_YOUNG; - if (!(byte & object_start_mask)) { - if (!possibly_interior) - return gc_ref_null(); - - uintptr_t block_base = align_down(addr, NOFL_BLOCK_SIZE); - uint8_t *loc_base = nofl_metadata_byte_for_addr(block_base); - do { - // Searched past block? Not an object. - if (loc-- == loc_base) - return gc_ref_null(); - - byte = atomic_load_explicit(loc, memory_order_relaxed); - - // Ran into the end of some other allocation? Not an object, then. - if (byte & NOFL_METADATA_BYTE_END) - return gc_ref_null(); - - // Continue until we find object start. - } while (!(byte & object_start_mask)); - - // Found object start, and object is unmarked; adjust addr. - addr = block_base + (loc - loc_base) * NOFL_GRANULE_SIZE; - } + size_t free_granules = scan_for_byte(metadata, limit_granules, sweep_mask); + GC_ASSERT(free_granules); + GC_ASSERT(free_granules <= limit_granules); - uint8_t mask = NOFL_METADATA_BYTE_YOUNG | NOFL_METADATA_BYTE_MARK_0 - | NOFL_METADATA_BYTE_MARK_1 | NOFL_METADATA_BYTE_MARK_2; - atomic_store_explicit(loc, (byte & ~mask) | space->marked_mask, - memory_order_relaxed); + struct nofl_block_summary *summary = nofl_block_summary_for_addr(sweep); + summary->hole_count++; + GC_ASSERT(free_granules <= NOFL_GRANULES_PER_BLOCK - summary->free_granules); + summary->free_granules += free_granules; - return gc_ref(addr); + size_t free_bytes = free_granules * NOFL_GRANULE_SIZE; + alloc->alloc = sweep; + alloc->sweep = sweep + free_bytes; + return free_granules; } -static inline size_t -nofl_space_object_size(struct nofl_space *space, struct gc_ref ref) { - uint8_t *loc = nofl_metadata_byte_for_object(ref); - size_t granules = nofl_space_live_object_granules(loc); - return granules * NOFL_GRANULE_SIZE; +static void +nofl_allocator_finish_sweeping_in_block(struct nofl_allocator *alloc, + uintptr_t sweep_mask) { + do { + nofl_allocator_finish_hole(alloc); + } while (nofl_allocator_next_hole_in_block(alloc, sweep_mask)); } static void -nofl_push_unavailable_block(struct nofl_space *space, uintptr_t block) { - struct nofl_block_summary *summary = nofl_block_summary_for_addr(block); - GC_ASSERT(!nofl_block_summary_has_flag(summary, NOFL_BLOCK_NEEDS_SWEEP)); - GC_ASSERT(!nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE)); - nofl_block_summary_set_flag(summary, NOFL_BLOCK_UNAVAILABLE); - madvise((void*)block, NOFL_BLOCK_SIZE, MADV_DONTNEED); - nofl_push_block(&space->unavailable, block); +nofl_allocator_release_block(struct nofl_allocator *alloc, + struct nofl_space *space) { + GC_ASSERT(alloc->block); + struct nofl_block_summary *summary = nofl_block_summary_for_addr(alloc->block); + if (alloc->alloc < alloc->sweep && + alloc->sweep == alloc->block + NOFL_BLOCK_SIZE && + summary->holes_with_fragmentation == 0) { + nofl_allocator_release_partly_full_block(alloc, space, summary); + } else { + nofl_allocator_finish_sweeping_in_block(alloc, space->sweep_mask); + nofl_allocator_release_full_block(alloc, space, summary); + } } -static uintptr_t -nofl_pop_unavailable_block(struct nofl_space *space) { - uintptr_t block = nofl_pop_block(&space->unavailable); - if (!block) - return 0; - struct nofl_block_summary *summary = nofl_block_summary_for_addr(block); - GC_ASSERT(nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE)); - nofl_block_summary_clear_flag(summary, NOFL_BLOCK_UNAVAILABLE); - return block; +static void +nofl_allocator_finish(struct nofl_allocator *alloc, struct nofl_space *space) { + if (alloc->block) + nofl_allocator_release_block(alloc, space); } static uintptr_t -nofl_pop_empty_block(struct nofl_space *space) { - return nofl_pop_block(&space->empty); -} +nofl_space_next_block_to_sweep(struct nofl_space *space) { + uintptr_t block = atomic_load_explicit(&space->next_block, + memory_order_acquire); + uintptr_t next_block; + do { + if (block == 0) + return 0; -static int -nofl_maybe_push_evacuation_target(struct nofl_space *space, - uintptr_t block, double reserve) { - GC_ASSERT(!nofl_block_summary_has_flag(nofl_block_summary_for_addr(block), - NOFL_BLOCK_NEEDS_SWEEP)); - size_t targets = atomic_load_explicit(&space->evacuation_targets.count, - memory_order_acquire); - size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB; - size_t unavailable = atomic_load_explicit(&space->unavailable.count, - memory_order_acquire); - if (targets >= (total - unavailable) * reserve) + next_block = block + NOFL_BLOCK_SIZE; + if (next_block % NOFL_SLAB_SIZE == 0) { + uintptr_t hi_addr = space->low_addr + space->extent; + if (next_block == hi_addr) + next_block = 0; + else + next_block += NOFL_META_BLOCKS_PER_SLAB * NOFL_BLOCK_SIZE; + } + } while (!atomic_compare_exchange_weak(&space->next_block, &block, + next_block)); + return block; +} + +static int +nofl_maybe_release_swept_empty_block(struct nofl_allocator *alloc, + struct nofl_space *space) { + GC_ASSERT(alloc->block); + uintptr_t block = alloc->block; + if (atomic_load_explicit(&space->pending_unavailable_bytes, + memory_order_acquire) <= 0) return 0; - nofl_push_block(&space->evacuation_targets, block); + nofl_push_unavailable_block(space, block); + atomic_fetch_sub(&space->pending_unavailable_bytes, NOFL_BLOCK_SIZE); + nofl_allocator_reset(alloc); return 1; } -static int -nofl_push_evacuation_target_if_needed(struct nofl_space *space, - uintptr_t block) { - return nofl_maybe_push_evacuation_target(space, block, - space->evacuation_minimum_reserve); -} +static size_t +nofl_allocator_next_hole(struct nofl_allocator *alloc, + struct nofl_space *space) { + nofl_allocator_finish_hole(alloc); + // As we sweep if we find that a block is empty, we return it to the + // empties list. Empties are precious. But if we return 10 blocks in + // a row, and still find an 11th empty, go ahead and use it. + size_t empties_countdown = 10; + while (1) { + // Sweep current block for a hole. + if (alloc->block) { + struct nofl_block_summary *summary = + nofl_block_summary_for_addr(alloc->block); + size_t granules = + nofl_allocator_next_hole_in_block(alloc, space->sweep_mask); + if (granules) { + // If the hole spans only part of a block, let the allocator try + // to use it. + if (granules < NOFL_GRANULES_PER_BLOCK) + return granules; + // Otherwise we have an empty block. + nofl_clear_remaining_metadata_bytes_in_block(alloc->block, 0); + nofl_block_summary_clear_flag(summary, NOFL_BLOCK_NEEDS_SWEEP); + // If we need an evacuation reserve block, take it. + if (nofl_push_evacuation_target_if_needed(space, alloc->block)) { + nofl_allocator_reset(alloc); + continue; + } + // If we have pending pages to release to the OS, we should unmap + // this block. + if (nofl_maybe_release_swept_empty_block(alloc, space)) + continue; + // Otherwise if we've already returned lots of empty blocks to the + // freelist, let the allocator keep this block. + if (!empties_countdown) { + // After this block is allocated into, it will need to be swept. + nofl_block_summary_set_flag(summary, NOFL_BLOCK_NEEDS_SWEEP); + return granules; + } + // Otherwise we push to the empty blocks list. + nofl_push_empty_block(space, alloc->block); + nofl_allocator_reset(alloc); + empties_countdown--; + } else { + nofl_allocator_release_full_block(alloc, space, summary); + } + } -static int -nofl_push_evacuation_target_if_possible(struct nofl_space *space, - uintptr_t block) { - return nofl_maybe_push_evacuation_target(space, block, - space->evacuation_reserve); -} + GC_ASSERT(alloc->block == 0); -static void -nofl_push_empty_block(struct nofl_space *space, uintptr_t block) { - GC_ASSERT(!nofl_block_summary_has_flag(nofl_block_summary_for_addr(block), - NOFL_BLOCK_NEEDS_SWEEP)); - nofl_push_block(&space->empty, block); -} + { + size_t granules = nofl_allocator_acquire_partly_full_block(alloc, space); + if (granules) + return granules; + } -static ssize_t -nofl_space_request_release_memory(struct nofl_space *space, size_t bytes) { - return atomic_fetch_add(&space->pending_unavailable_bytes, bytes) + bytes; + while (1) { + uintptr_t block = nofl_space_next_block_to_sweep(space); + if (block) { + // Sweeping found a block. We might take it for allocation, or + // we might send it back. + struct nofl_block_summary *summary = nofl_block_summary_for_addr(block); + // If it's marked unavailable, it's already on a list of + // unavailable blocks, so skip and get the next block. + if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE)) + continue; + if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_VENERABLE)) { + // Skip venerable blocks after a minor GC -- we don't need to + // sweep as they weren't allocated into last cycle, and the + // mark bytes didn't rotate, so we have no cleanup to do; and + // we shouldn't try to allocate into them as it's not worth + // it. Any wasted space is measured as fragmentation. + if (space->last_collection_was_minor) + continue; + else + nofl_block_summary_clear_flag(summary, NOFL_BLOCK_VENERABLE); + } + if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_NEEDS_SWEEP)) { + // Prepare to sweep the block for holes. + alloc->alloc = alloc->sweep = alloc->block = block; + if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_VENERABLE_AFTER_SWEEP)) { + // In the last cycle we noted that this block consists of + // mostly old data. Sweep any garbage, commit the mark as + // venerable, and avoid allocating into it. + nofl_block_summary_clear_flag(summary, NOFL_BLOCK_VENERABLE_AFTER_SWEEP); + if (space->last_collection_was_minor) { + nofl_allocator_finish_sweeping_in_block(alloc, space->sweep_mask); + nofl_allocator_release_full_block(alloc, space, summary); + nofl_block_summary_set_flag(summary, NOFL_BLOCK_VENERABLE); + continue; + } + } + // This block was marked in the last GC and needs sweeping. + // As we sweep we'll want to record how many bytes were live + // at the last collection. As we allocate we'll record how + // many granules were wasted because of fragmentation. + summary->hole_count = 0; + summary->free_granules = 0; + summary->holes_with_fragmentation = 0; + summary->fragmentation_granules = 0; + break; + } else { + // Otherwise this block is completely empty and is on the + // empties list. We take from the empties list only after all + // the NEEDS_SWEEP blocks are processed. + continue; + } + } else { + // We are done sweeping for blocks. Now take from the empties + // list. + block = nofl_pop_empty_block(space); + // No empty block? Return 0 to cause collection. + if (!block) + return 0; + + // Maybe we should use this empty as a target for evacuation. + if (nofl_push_evacuation_target_if_possible(space, block)) + continue; + + // Otherwise give the block to the allocator. + struct nofl_block_summary *summary = nofl_block_summary_for_addr(block); + nofl_block_summary_set_flag(summary, NOFL_BLOCK_NEEDS_SWEEP); + summary->hole_count = 1; + summary->free_granules = NOFL_GRANULES_PER_BLOCK; + summary->holes_with_fragmentation = 0; + summary->fragmentation_granules = 0; + alloc->block = block; + alloc->alloc = block; + alloc->sweep = block + NOFL_BLOCK_SIZE; + return NOFL_GRANULES_PER_BLOCK; + } + } + } } -static void -nofl_space_reacquire_memory(struct nofl_space *space, size_t bytes) { - ssize_t pending = - atomic_fetch_sub(&space->pending_unavailable_bytes, bytes) - bytes; - while (pending + NOFL_BLOCK_SIZE <= 0) { - uintptr_t block = nofl_pop_unavailable_block(space); - GC_ASSERT(block); - if (nofl_push_evacuation_target_if_needed(space, block)) - continue; - nofl_push_empty_block(space, block); - pending = atomic_fetch_add(&space->pending_unavailable_bytes, NOFL_BLOCK_SIZE) - + NOFL_BLOCK_SIZE; +static struct gc_ref +nofl_allocate(struct nofl_allocator *alloc, struct nofl_space *space, + size_t size, void (*gc)(void*), void *gc_data) { + GC_ASSERT(size > 0); + GC_ASSERT(size <= gc_allocator_large_threshold()); + size = align_up(size, NOFL_GRANULE_SIZE); + + if (alloc->alloc + size > alloc->sweep) { + size_t granules = size >> NOFL_GRANULE_SIZE_LOG_2; + while (1) { + size_t hole = nofl_allocator_next_hole(alloc, space); + if (hole >= granules) { + nofl_clear_memory(alloc->alloc, hole * NOFL_GRANULE_SIZE); + break; + } + if (!hole) + gc(gc_data); + } } + + struct gc_ref ret = gc_ref(alloc->alloc); + alloc->alloc += size; + gc_update_alloc_table(ret, size); + return ret; } static size_t -nofl_allocator_next_hole(struct nofl_allocator *alloc, - struct nofl_space *space); +nofl_allocator_acquire_evacuation_block(struct nofl_allocator* alloc, + struct nofl_space *space) { + size_t granules = nofl_allocator_acquire_partly_full_block(alloc, space); + if (granules) + return granules; + return nofl_allocator_acquire_empty_block(alloc, space); +} -static int -nofl_space_sweep_until_memory_released(struct nofl_space *space, - struct nofl_allocator *alloc) { - ssize_t pending = atomic_load_explicit(&space->pending_unavailable_bytes, - memory_order_acquire); - // First try to unmap previously-identified empty blocks. If pending - // > 0 and other mutators happen to identify empty blocks, they will - // be unmapped directly and moved to the unavailable list. - while (pending > 0) { - uintptr_t block = nofl_pop_empty_block(space); - if (!block) - break; - // Note that we may have competing uses; if we're evacuating, - // perhaps we should push this block to the evacuation target list. - // That would enable us to reach a fragmentation low water-mark in - // fewer cycles. But maybe evacuation started in order to obtain - // free blocks for large objects; in that case we should just reap - // the fruits of our labor. Probably this second use-case is more - // important. - nofl_push_unavailable_block(space, block); - pending = atomic_fetch_sub(&space->pending_unavailable_bytes, NOFL_BLOCK_SIZE); - pending -= NOFL_BLOCK_SIZE; - } - // Otherwise, sweep, transitioning any empty blocks to unavailable and - // throwing away any non-empty block. A bit wasteful but hastening - // the next collection is a reasonable thing to do here. - while (pending > 0) { - if (!nofl_allocator_next_hole(alloc, space)) - return 0; - pending = atomic_load_explicit(&space->pending_unavailable_bytes, - memory_order_acquire); +static struct gc_ref +nofl_evacuation_allocate(struct nofl_allocator* alloc, struct nofl_space *space, + size_t granules) { + size_t avail = (alloc->sweep - alloc->alloc) >> NOFL_GRANULE_SIZE_LOG_2; + while (avail < granules) { + if (alloc->block) { + nofl_allocator_finish_hole(alloc); + nofl_allocator_release_full_block(alloc, space, + nofl_block_summary_for_addr(alloc->block)); + } + avail = nofl_allocator_acquire_evacuation_block(alloc, space); + if (!avail) + return gc_ref_null(); } - return pending <= 0; + + struct gc_ref ret = gc_ref(alloc->alloc); + alloc->alloc += granules * NOFL_GRANULE_SIZE; + gc_update_alloc_table(ret, granules * NOFL_GRANULE_SIZE); + return ret; +} + +// Another thread is triggering GC. Before we stop, finish clearing the +// dead mark bytes for the mutator's block, and release the block. +static void +nofl_finish_sweeping(struct nofl_allocator *alloc, + struct nofl_space *space) { + while (nofl_allocator_next_hole(alloc, space)) {} } static inline int @@ -810,11 +806,6 @@ nofl_space_set_ephemeron_flag(struct gc_ref ref) { } } -static void nofl_finish_sweeping(struct nofl_allocator *alloc, - struct nofl_space *space); -static void nofl_finish_sweeping_in_block(struct nofl_allocator *alloc, - struct nofl_space *space); - // Note that it's quite possible (and even likely) that any given remset // byte doesn't hold any roots, if all stores were to nursery objects. STATIC_ASSERT_EQ(NOFL_GRANULES_PER_REMSET_BYTE % 8, 0); @@ -902,9 +893,8 @@ nofl_space_yield(struct nofl_space *space) { } static size_t -nofl_space_evacuation_reserve(struct nofl_space *space) { - return atomic_load_explicit(&space->evacuation_targets.count, - memory_order_acquire) * NOFL_BLOCK_SIZE; +nofl_space_evacuation_reserve_bytes(struct nofl_space *space) { + return nofl_block_count(&space->evacuation_targets) * NOFL_BLOCK_SIZE; } static size_t @@ -914,40 +904,26 @@ nofl_space_fragmentation(struct nofl_space *space) { } static void -nofl_space_release_evacuation_target_blocks(struct nofl_space *space) { - // Move excess evacuation target blocks back to empties. - size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB; - size_t unavailable = atomic_load_explicit(&space->unavailable.count, - memory_order_acquire); - size_t reserve = space->evacuation_minimum_reserve * (total - unavailable); - nofl_finish_evacuation_allocator(&space->evacuation_allocator, - &space->evacuation_targets, - &space->empty, - reserve); -} - -static void -nofl_space_prepare_for_evacuation(struct nofl_space *space, - enum gc_collection_kind gc_kind) { - if (gc_kind != GC_COLLECTION_COMPACTING) { - space->evacuating = 0; - space->evacuation_reserve = space->evacuation_minimum_reserve; - return; +nofl_space_prepare_evacuation(struct nofl_space *space) { + GC_ASSERT(!space->evacuating); + { + uintptr_t block; + while ((block = nofl_pop_block(&space->evacuation_targets))) + nofl_push_empty_block(space, block); } - - // Put the mutator into evacuation mode, collecting up to 50% of free space as - // evacuation blocks. - space->evacuation_reserve = 0.5; - - size_t target_blocks = space->evacuation_targets.count; + size_t target_blocks = nofl_block_count(&space->empty); DEBUG("evacuation target block count: %zu\n", target_blocks); if (target_blocks == 0) { - DEBUG("no evacuation target blocks, disabling evacuation for this round\n"); - space->evacuating = 0; + DEBUG("no evacuation target blocks, not evacuating this round\n"); return; } + // Put the mutator into evacuation mode, collecting up to 50% of free + // space as evacuation blocks. + space->evacuation_reserve = 0.5; + space->evacuating = 1; + size_t target_granules = target_blocks * NOFL_GRANULES_PER_BLOCK; // Compute histogram where domain is the number of granules in a block // that survived the last collection, aggregated into 33 buckets, and @@ -1011,15 +987,43 @@ nofl_space_prepare_for_evacuation(struct nofl_space *space, } } } +} - // We are ready to evacuate! - nofl_prepare_evacuation_allocator(&space->evacuation_allocator, - &space->evacuation_targets); - space->evacuating = 1; +static void +nofl_space_finish_evacuation(struct nofl_space *space) { + // When evacuation began, the evacuation reserve was moved to the + // empties list. Now that evacuation is finished, attempt to + // repopulate the reserve. + GC_ASSERT(space->evacuating); + space->evacuating = 0; + space->evacuation_reserve = space->evacuation_minimum_reserve; + size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB; + size_t unavailable = nofl_block_count(&space->unavailable); + size_t reserve = space->evacuation_minimum_reserve * (total - unavailable); + GC_ASSERT(nofl_block_count(&space->evacuation_targets) == 0); + while (reserve--) { + uintptr_t block = nofl_pop_block(&space->empty); + if (!block) break; + nofl_push_block(&space->evacuation_targets, block); + } + { + // FIXME: We should avoid sweeping partly full blocks, but it's too annoying + // to do at the moment given the way sweeping works. + uintptr_t block; + do { + block = nofl_pop_block(&space->partly_full); + } while (block); + } +} + +static inline size_t +nofl_size_to_granules(size_t size) { + return (size + NOFL_GRANULE_SIZE - 1) >> NOFL_GRANULE_SIZE_LOG_2; } static void nofl_space_verify_before_restart(struct nofl_space *space) { + GC_ASSERT_EQ(nofl_block_count(&space->partly_full), 0); // Iterate objects in each block, verifying that the END bytes correspond to // the measured object size. for (size_t slab = 0; slab < space->nslabs; slab++) { @@ -1056,333 +1060,278 @@ nofl_space_verify_before_restart(struct nofl_space *space) { static void nofl_space_finish_gc(struct nofl_space *space, enum gc_collection_kind gc_kind) { - space->evacuating = 0; space->last_collection_was_minor = (gc_kind == GC_COLLECTION_MINOR); + if (space->evacuating) + nofl_space_finish_evacuation(space); nofl_space_reset_sweeper(space); nofl_space_update_mark_patterns(space, 0); nofl_space_reset_statistics(space); - nofl_space_release_evacuation_target_blocks(space); if (GC_DEBUG) nofl_space_verify_before_restart(space); } -static int -nofl_sweep_byte(uint8_t *loc, uintptr_t sweep_mask) { - uint8_t metadata = atomic_load_explicit(loc, memory_order_relaxed); - // If the metadata byte is nonzero, that means either a young, dead, - // survived, or marked object. If it's live (survived or marked), we - // found the next mark. Otherwise it's dead and we clear the byte. - // If we see an END, that means an end of a dead object; clear it. - if (metadata) { - if (metadata & sweep_mask) - return 1; - atomic_store_explicit(loc, 0, memory_order_relaxed); - } - return 0; +static ssize_t +nofl_space_request_release_memory(struct nofl_space *space, size_t bytes) { + return atomic_fetch_add(&space->pending_unavailable_bytes, bytes) + bytes; } -static int -nofl_sweep_word(uintptr_t *loc, uintptr_t sweep_mask) { - uintptr_t metadata = atomic_load_explicit(loc, memory_order_relaxed); - if (metadata) { - if (metadata & sweep_mask) - return 1; - atomic_store_explicit(loc, 0, memory_order_relaxed); +static void +nofl_space_reacquire_memory(struct nofl_space *space, size_t bytes) { + ssize_t pending = + atomic_fetch_sub(&space->pending_unavailable_bytes, bytes) - bytes; + while (pending + NOFL_BLOCK_SIZE <= 0) { + uintptr_t block = nofl_pop_unavailable_block(space); + GC_ASSERT(block); + if (!nofl_push_evacuation_target_if_needed(space, block)) + nofl_push_empty_block(space, block); + pending = atomic_fetch_add(&space->pending_unavailable_bytes, NOFL_BLOCK_SIZE) + + NOFL_BLOCK_SIZE; } - return 0; } -static uintptr_t -nofl_space_next_block_to_sweep(struct nofl_space *space) { - uintptr_t block = atomic_load_explicit(&space->next_block, +static int +nofl_space_sweep_until_memory_released(struct nofl_space *space, + struct nofl_allocator *alloc) { + ssize_t pending = atomic_load_explicit(&space->pending_unavailable_bytes, memory_order_acquire); - uintptr_t next_block; - do { - if (block == 0) + // First try to unmap previously-identified empty blocks. If pending + // > 0 and other mutators happen to identify empty blocks, they will + // be unmapped directly and moved to the unavailable list. + while (pending > 0) { + uintptr_t block = nofl_pop_empty_block(space); + if (!block) + break; + // Note that we may have competing uses; if we're evacuating, + // perhaps we should push this block to the evacuation target list. + // That would enable us to reach a fragmentation low water-mark in + // fewer cycles. But maybe evacuation started in order to obtain + // free blocks for large objects; in that case we should just reap + // the fruits of our labor. Probably this second use-case is more + // important. + nofl_push_unavailable_block(space, block); + pending = atomic_fetch_sub(&space->pending_unavailable_bytes, NOFL_BLOCK_SIZE); + pending -= NOFL_BLOCK_SIZE; + } + // Otherwise, sweep, transitioning any empty blocks to unavailable and + // throwing away any non-empty block. A bit wasteful but hastening + // the next collection is a reasonable thing to do here. + while (pending > 0) { + if (!nofl_allocator_next_hole(alloc, space)) return 0; + pending = atomic_load_explicit(&space->pending_unavailable_bytes, + memory_order_acquire); + } + return pending <= 0; +} - next_block = block + NOFL_BLOCK_SIZE; - if (next_block % NOFL_SLAB_SIZE == 0) { - uintptr_t hi_addr = space->low_addr + space->extent; - if (next_block == hi_addr) - next_block = 0; - else - next_block += NOFL_META_BLOCKS_PER_SLAB * NOFL_BLOCK_SIZE; +static inline int +nofl_space_evacuate_or_mark_object(struct nofl_space *space, + struct gc_edge edge, + struct gc_ref old_ref, + struct nofl_allocator *evacuate) { + uint8_t *metadata = nofl_metadata_byte_for_object(old_ref); + uint8_t byte = *metadata; + if (byte & space->marked_mask) + return 0; + if (space->evacuating && + nofl_block_summary_has_flag(nofl_block_summary_for_addr(gc_ref_value(old_ref)), + NOFL_BLOCK_EVACUATE)) { + // This is an evacuating collection, and we are attempting to + // evacuate this block, and we are tracing this particular object + // for what appears to be the first time. + struct gc_atomic_forward fwd = gc_atomic_forward_begin(old_ref); + + if (fwd.state == GC_FORWARDING_STATE_NOT_FORWARDED) + gc_atomic_forward_acquire(&fwd); + + switch (fwd.state) { + case GC_FORWARDING_STATE_NOT_FORWARDED: + case GC_FORWARDING_STATE_ABORTED: + // Impossible. + GC_CRASH(); + case GC_FORWARDING_STATE_ACQUIRED: { + // We claimed the object successfully; evacuating is up to us. + size_t object_granules = nofl_space_live_object_granules(metadata); + struct gc_ref new_ref = nofl_evacuation_allocate(evacuate, space, + object_granules); + if (gc_ref_is_heap_object(new_ref)) { + // Copy object contents before committing, as we don't know what + // part of the object (if any) will be overwritten by the + // commit. + memcpy(gc_ref_heap_object(new_ref), gc_ref_heap_object(old_ref), + object_granules * NOFL_GRANULE_SIZE); + gc_atomic_forward_commit(&fwd, new_ref); + // Now update extent metadata, and indicate to the caller that + // the object's fields need to be traced. + uint8_t *new_metadata = nofl_metadata_byte_for_object(new_ref); + memcpy(new_metadata + 1, metadata + 1, object_granules - 1); + gc_edge_update(edge, new_ref); + metadata = new_metadata; + // Fall through to set mark bits. + } else { + // Well shucks; allocation failed, marking the end of + // opportunistic evacuation. No future evacuation of this + // object will succeed. Mark in place instead. + gc_atomic_forward_abort(&fwd); + } + break; } - } while (!atomic_compare_exchange_weak(&space->next_block, &block, - next_block)); - return block; -} + case GC_FORWARDING_STATE_BUSY: + // Someone else claimed this object first. Spin until new address + // known, or evacuation aborts. + for (size_t spin_count = 0;; spin_count++) { + if (gc_atomic_forward_retry_busy(&fwd)) + break; + yield_for_spin(spin_count); + } + if (fwd.state == GC_FORWARDING_STATE_ABORTED) + // Remove evacuation aborted; remote will mark and enqueue. + return 0; + ASSERT(fwd.state == GC_FORWARDING_STATE_FORWARDED); + // Fall through. + case GC_FORWARDING_STATE_FORWARDED: + // The object has been evacuated already. Update the edge; + // whoever forwarded the object will make sure it's eventually + // traced. + gc_edge_update(edge, gc_ref(gc_atomic_forward_address(&fwd))); + return 0; + } + } -static void -nofl_allocator_release_block(struct nofl_allocator *alloc) { - alloc->alloc = alloc->sweep = alloc->block = 0; + uint8_t mask = NOFL_METADATA_BYTE_YOUNG | NOFL_METADATA_BYTE_MARK_0 + | NOFL_METADATA_BYTE_MARK_1 | NOFL_METADATA_BYTE_MARK_2; + *metadata = (byte & ~mask) | space->marked_mask; + return 1; } -static void -nofl_allocator_finish_block(struct nofl_allocator *alloc, - struct nofl_space *space) { - GC_ASSERT(alloc->block); - struct nofl_block_summary *block = nofl_block_summary_for_addr(alloc->block); - atomic_fetch_add(&space->granules_freed_by_last_collection, - block->free_granules); - atomic_fetch_add(&space->fragmentation_granules_since_last_collection, - block->fragmentation_granules); +static inline int +nofl_space_contains_address(struct nofl_space *space, uintptr_t addr) { + return addr - space->low_addr < space->extent; +} - // If this block has mostly survivors, we should avoid sweeping it and - // trying to allocate into it for a minor GC. Sweep it next time to - // clear any garbage allocated in this cycle and mark it as - // "venerable" (i.e., old). - GC_ASSERT(!nofl_block_summary_has_flag(block, NOFL_BLOCK_VENERABLE)); - if (!nofl_block_summary_has_flag(block, NOFL_BLOCK_VENERABLE_AFTER_SWEEP) && - block->free_granules < NOFL_GRANULES_PER_BLOCK * space->venerable_threshold) - nofl_block_summary_set_flag(block, NOFL_BLOCK_VENERABLE_AFTER_SWEEP); +static inline int +nofl_space_contains_conservative_ref(struct nofl_space *space, + struct gc_conservative_ref ref) { + return nofl_space_contains_address(space, gc_conservative_ref_value(ref)); +} - nofl_allocator_release_block(alloc); +static inline int +nofl_space_contains(struct nofl_space *space, struct gc_ref ref) { + return nofl_space_contains_address(space, gc_ref_value(ref)); } -// Sweep some heap to reclaim free space, resetting alloc->alloc and -// alloc->sweep. Return the size of the hole in granules. -static size_t -nofl_allocator_next_hole_in_block(struct nofl_allocator *alloc, - struct nofl_space *space) { - uintptr_t sweep = alloc->sweep; - if (sweep == 0) - return 0; - uintptr_t limit = alloc->block + NOFL_BLOCK_SIZE; - uintptr_t sweep_mask = space->sweep_mask; +static int +nofl_space_forward_or_mark_if_traced(struct nofl_space *space, + struct gc_edge edge, + struct gc_ref ref) { + uint8_t *metadata = nofl_metadata_byte_for_object(ref); + uint8_t byte = *metadata; + if (byte & space->marked_mask) + return 1; - while (sweep != limit) { - GC_ASSERT((sweep & (NOFL_GRANULE_SIZE - 1)) == 0); - uint8_t* metadata = nofl_metadata_byte_for_addr(sweep); - size_t limit_granules = (limit - sweep) >> NOFL_GRANULE_SIZE_LOG_2; + if (!space->evacuating) + return 0; + if (!nofl_block_summary_has_flag(nofl_block_summary_for_addr(gc_ref_value(ref)), + NOFL_BLOCK_EVACUATE)) + return 0; - // Except for when we first get a block, alloc->sweep is positioned - // right after a hole, which can point to either the end of the - // block or to a live object. Assume that a live object is more - // common. - { - size_t live_granules = 0; - while (limit_granules && (metadata[0] & sweep_mask)) { - // Object survived collection; skip over it and continue sweeping. - size_t object_granules = nofl_space_live_object_granules(metadata); - live_granules += object_granules; - limit_granules -= object_granules; - metadata += object_granules; - } - if (!limit_granules) + struct gc_atomic_forward fwd = gc_atomic_forward_begin(ref); + switch (fwd.state) { + case GC_FORWARDING_STATE_NOT_FORWARDED: + return 0; + case GC_FORWARDING_STATE_BUSY: + // Someone else claimed this object first. Spin until new address + // known, or evacuation aborts. + for (size_t spin_count = 0;; spin_count++) { + if (gc_atomic_forward_retry_busy(&fwd)) break; - sweep += live_granules * NOFL_GRANULE_SIZE; + yield_for_spin(spin_count); } - - size_t free_granules = scan_for_byte(metadata, limit_granules, sweep_mask); - GC_ASSERT(free_granules); - GC_ASSERT(free_granules <= limit_granules); - - struct nofl_block_summary *summary = nofl_block_summary_for_addr(sweep); - summary->hole_count++; - GC_ASSERT(free_granules <= NOFL_GRANULES_PER_BLOCK - summary->free_granules); - summary->free_granules += free_granules; - - size_t free_bytes = free_granules * NOFL_GRANULE_SIZE; - alloc->alloc = sweep; - alloc->sweep = sweep + free_bytes; - return free_granules; + if (fwd.state == GC_FORWARDING_STATE_ABORTED) + // Remote evacuation aborted; remote will mark and enqueue. + return 1; + ASSERT(fwd.state == GC_FORWARDING_STATE_FORWARDED); + // Fall through. + case GC_FORWARDING_STATE_FORWARDED: + gc_edge_update(edge, gc_ref(gc_atomic_forward_address(&fwd))); + return 1; + default: + GC_CRASH(); } - - nofl_allocator_finish_block(alloc, space); - return 0; } -static void -nofl_allocator_finish_hole(struct nofl_allocator *alloc) { - size_t granules = (alloc->sweep - alloc->alloc) / NOFL_GRANULE_SIZE; - if (granules) { - struct nofl_block_summary *summary = nofl_block_summary_for_addr(alloc->block); - summary->holes_with_fragmentation++; - summary->fragmentation_granules += granules; - uint8_t *metadata = nofl_metadata_byte_for_addr(alloc->alloc); - memset(metadata, 0, granules); - alloc->alloc = alloc->sweep; +static inline struct gc_ref +nofl_space_mark_conservative_ref(struct nofl_space *space, + struct gc_conservative_ref ref, + int possibly_interior) { + uintptr_t addr = gc_conservative_ref_value(ref); + + if (possibly_interior) { + addr = align_down(addr, NOFL_GRANULE_SIZE); + } else { + // Addr not an aligned granule? Not an object. + uintptr_t displacement = addr & (NOFL_GRANULE_SIZE - 1); + if (!gc_is_valid_conservative_ref_displacement(displacement)) + return gc_ref_null(); + addr -= displacement; } - // FIXME: add to fragmentation -} -static int -nofl_maybe_release_swept_empty_block(struct nofl_allocator *alloc, - struct nofl_space *space) { - GC_ASSERT(alloc->block); - uintptr_t block = alloc->block; - if (atomic_load_explicit(&space->pending_unavailable_bytes, - memory_order_acquire) <= 0) - return 0; + // Addr in meta block? Not an object. + if ((addr & (NOFL_SLAB_SIZE - 1)) < NOFL_META_BLOCKS_PER_SLAB * NOFL_BLOCK_SIZE) + return gc_ref_null(); - nofl_push_unavailable_block(space, block); - atomic_fetch_sub(&space->pending_unavailable_bytes, NOFL_BLOCK_SIZE); - nofl_allocator_release_block(alloc); - return 1; -} + // Addr in block that has been paged out? Not an object. + struct nofl_block_summary *summary = nofl_block_summary_for_addr(addr); + if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE)) + return gc_ref_null(); -static size_t -nofl_allocator_next_hole(struct nofl_allocator *alloc, - struct nofl_space *space) { - nofl_allocator_finish_hole(alloc); - // As we sweep if we find that a block is empty, we return it to the - // empties list. Empties are precious. But if we return 10 blocks in - // a row, and still find an 11th empty, go ahead and use it. - size_t empties_countdown = 10; - while (1) { - // Sweep current block for a hole. - size_t granules = nofl_allocator_next_hole_in_block(alloc, space); - if (granules) { - // If the hole spans only part of a block, let the allocator try - // to use it. - if (granules < NOFL_GRANULES_PER_BLOCK) - return granules; - struct nofl_block_summary *summary = nofl_block_summary_for_addr(alloc->block); - memset(nofl_metadata_byte_for_addr(alloc->block), 0, NOFL_GRANULES_PER_BLOCK); - nofl_block_summary_clear_flag(summary, NOFL_BLOCK_NEEDS_SWEEP); - // Sweeping found a completely empty block. If we are below the - // minimum evacuation reserve, take the block. - if (nofl_push_evacuation_target_if_needed(space, alloc->block)) { - nofl_allocator_release_block(alloc); - continue; - } - // If we have pending pages to release to the OS, we should unmap - // this block. - if (nofl_maybe_release_swept_empty_block(alloc, space)) - continue; - // Otherwise if we've already returned lots of empty blocks to the - // freelist, let the allocator keep this block. - if (!empties_countdown) { - // After this block is allocated into, it will need to be swept. - nofl_block_summary_set_flag(summary, NOFL_BLOCK_NEEDS_SWEEP); - return granules; - } - // Otherwise we push to the empty blocks list. - nofl_push_empty_block(space, alloc->block); - nofl_allocator_release_block(alloc); - empties_countdown--; - } - GC_ASSERT(alloc->block == 0); - while (1) { - uintptr_t block = nofl_space_next_block_to_sweep(space); - if (block) { - // Sweeping found a block. We might take it for allocation, or - // we might send it back. - struct nofl_block_summary *summary = nofl_block_summary_for_addr(block); - // If it's marked unavailable, it's already on a list of - // unavailable blocks, so skip and get the next block. - if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE)) - continue; - if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_VENERABLE)) { - // Skip venerable blocks after a minor GC -- we don't need to - // sweep as they weren't allocated into last cycle, and the - // mark bytes didn't rotate, so we have no cleanup to do; and - // we shouldn't try to allocate into them as it's not worth - // it. Any wasted space is measured as fragmentation. - if (space->last_collection_was_minor) - continue; - else - nofl_block_summary_clear_flag(summary, NOFL_BLOCK_VENERABLE); - } - if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_NEEDS_SWEEP)) { - // Prepare to sweep the block for holes. - alloc->alloc = alloc->sweep = alloc->block = block; - if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_VENERABLE_AFTER_SWEEP)) { - // In the last cycle we noted that this block consists of - // mostly old data. Sweep any garbage, commit the mark as - // venerable, and avoid allocating into it. - nofl_block_summary_clear_flag(summary, NOFL_BLOCK_VENERABLE_AFTER_SWEEP); - if (space->last_collection_was_minor) { - nofl_finish_sweeping_in_block(alloc, space); - nofl_block_summary_set_flag(summary, NOFL_BLOCK_VENERABLE); - continue; - } - } - // This block was marked in the last GC and needs sweeping. - // As we sweep we'll want to record how many bytes were live - // at the last collection. As we allocate we'll record how - // many granules were wasted because of fragmentation. - summary->hole_count = 0; - summary->free_granules = 0; - summary->holes_with_fragmentation = 0; - summary->fragmentation_granules = 0; - break; - } else { - // Otherwise this block is completely empty and is on the - // empties list. We take from the empties list only after all - // the NEEDS_SWEEP blocks are processed. - continue; - } - } else { - // We are done sweeping for blocks. Now take from the empties - // list. - block = nofl_pop_empty_block(space); - // No empty block? Return 0 to cause collection. - if (!block) - return 0; + uint8_t *loc = nofl_metadata_byte_for_addr(addr); + uint8_t byte = atomic_load_explicit(loc, memory_order_relaxed); - // Maybe we should use this empty as a target for evacuation. - if (nofl_push_evacuation_target_if_possible(space, block)) - continue; + // Already marked object? Nothing to do. + if (byte & space->marked_mask) + return gc_ref_null(); - // Otherwise give the block to the allocator. - struct nofl_block_summary *summary = nofl_block_summary_for_addr(block); - nofl_block_summary_set_flag(summary, NOFL_BLOCK_NEEDS_SWEEP); - summary->hole_count = 1; - summary->free_granules = NOFL_GRANULES_PER_BLOCK; - summary->holes_with_fragmentation = 0; - summary->fragmentation_granules = 0; - alloc->block = block; - alloc->alloc = block; - alloc->sweep = block + NOFL_BLOCK_SIZE; - return NOFL_GRANULES_PER_BLOCK; - } - } - } -} + // Addr is the not start of an unmarked object? Search backwards if + // we have interior pointers, otherwise not an object. + uint8_t object_start_mask = space->live_mask | NOFL_METADATA_BYTE_YOUNG; + if (!(byte & object_start_mask)) { + if (!possibly_interior) + return gc_ref_null(); -static void -nofl_finish_sweeping_in_block(struct nofl_allocator *alloc, - struct nofl_space *space) { - do { - nofl_allocator_finish_hole(alloc); - } while (nofl_allocator_next_hole_in_block(alloc, space)); -} + uintptr_t block_base = align_down(addr, NOFL_BLOCK_SIZE); + uint8_t *loc_base = nofl_metadata_byte_for_addr(block_base); + do { + // Searched past block? Not an object. + if (loc-- == loc_base) + return gc_ref_null(); -// Another thread is triggering GC. Before we stop, finish clearing the -// dead mark bytes for the mutator's block, and release the block. -static void -nofl_finish_sweeping(struct nofl_allocator *alloc, - struct nofl_space *space) { - while (nofl_allocator_next_hole(alloc, space)) {} -} + byte = atomic_load_explicit(loc, memory_order_relaxed); -static struct gc_ref -nofl_allocate(struct nofl_allocator *alloc, struct nofl_space *space, - size_t size, void (*gc)(void*), void *gc_data) { - GC_ASSERT(size > 0); - GC_ASSERT(size <= gc_allocator_large_threshold()); - size = align_up(size, NOFL_GRANULE_SIZE); + // Ran into the end of some other allocation? Not an object, then. + if (byte & NOFL_METADATA_BYTE_END) + return gc_ref_null(); - if (alloc->alloc + size > alloc->sweep) { - size_t granules = size >> NOFL_GRANULE_SIZE_LOG_2; - while (1) { - size_t hole = nofl_allocator_next_hole(alloc, space); - if (hole >= granules) { - nofl_clear_memory(alloc->alloc, hole * NOFL_GRANULE_SIZE); - break; - } - if (!hole) - gc(gc_data); - } + // Continue until we find object start. + } while (!(byte & object_start_mask)); + + // Found object start, and object is unmarked; adjust addr. + addr = block_base + (loc - loc_base) * NOFL_GRANULE_SIZE; } - struct gc_ref ret = gc_ref(alloc->alloc); - alloc->alloc += size; - gc_update_alloc_table(ret, size); - return ret; + uint8_t mask = NOFL_METADATA_BYTE_YOUNG | NOFL_METADATA_BYTE_MARK_0 + | NOFL_METADATA_BYTE_MARK_1 | NOFL_METADATA_BYTE_MARK_2; + atomic_store_explicit(loc, (byte & ~mask) | space->marked_mask, + memory_order_relaxed); + + return gc_ref(addr); +} + +static inline size_t +nofl_space_object_size(struct nofl_space *space, struct gc_ref ref) { + uint8_t *loc = nofl_metadata_byte_for_object(ref); + size_t granules = nofl_space_live_object_granules(loc); + return granules * NOFL_GRANULE_SIZE; } static struct nofl_slab* diff --git a/src/whippet.c b/src/whippet.c index 76f8f1ed5..6e942d7da 100644 --- a/src/whippet.c +++ b/src/whippet.c @@ -91,6 +91,10 @@ struct gc_mutator { struct gc_mutator *next; }; +struct gc_trace_worker_data { + struct nofl_allocator allocator; +}; + static inline struct nofl_space* heap_nofl_space(struct gc_heap *heap) { return &heap->nofl_space; @@ -108,16 +112,34 @@ mutator_heap(struct gc_mutator *mutator) { return mutator->heap; } +static void +gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer, + struct gc_heap *heap, + struct gc_trace_worker *worker, + struct gc_trace_worker_data *data), + struct gc_tracer *tracer, + struct gc_heap *heap, + struct gc_trace_worker *worker) { + struct gc_trace_worker_data data; + nofl_allocator_reset(&data.allocator); + f(tracer, heap, worker, &data); + nofl_allocator_finish(&data.allocator, heap_nofl_space(heap)); +} + static void collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind) GC_NEVER_INLINE; static inline int -do_trace(struct gc_heap *heap, struct gc_edge edge, struct gc_ref ref) { +do_trace(struct gc_heap *heap, struct gc_edge edge, struct gc_ref ref, + struct gc_trace_worker *worker) { if (!gc_ref_is_heap_object(ref)) return 0; - if (GC_LIKELY(nofl_space_contains(heap_nofl_space(heap), ref))) - return nofl_space_evacuate_or_mark_object(heap_nofl_space(heap), edge, ref); - else if (large_object_space_contains(heap_large_object_space(heap), ref)) + if (GC_LIKELY(nofl_space_contains(heap_nofl_space(heap), ref))) { + struct nofl_allocator *alloc = + worker ? &gc_trace_worker_data(worker)->allocator : NULL; + return nofl_space_evacuate_or_mark_object(heap_nofl_space(heap), edge, ref, + alloc); + } else if (large_object_space_contains(heap_large_object_space(heap), ref)) return large_object_space_mark_object(heap_large_object_space(heap), ref); else @@ -125,12 +147,13 @@ do_trace(struct gc_heap *heap, struct gc_edge edge, struct gc_ref ref) { } static inline int trace_edge(struct gc_heap *heap, - struct gc_edge edge) GC_ALWAYS_INLINE; + struct gc_edge edge, + struct gc_trace_worker *worker) GC_ALWAYS_INLINE; static inline int -trace_edge(struct gc_heap *heap, struct gc_edge edge) { +trace_edge(struct gc_heap *heap, struct gc_edge edge, struct gc_trace_worker *worker) { struct gc_ref ref = gc_edge_ref(edge); - int is_new = do_trace(heap, edge, ref); + int is_new = do_trace(heap, edge, ref, worker); if (is_new && GC_UNLIKELY(atomic_load_explicit(&heap->check_pending_ephemerons, @@ -340,23 +363,12 @@ gc_heap_set_extern_space(struct gc_heap *heap, struct gc_extern_space *space) { heap->extern_space = space; } -static void -gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer, - struct gc_heap *heap, - struct gc_trace_worker *worker, - struct gc_trace_worker_data *data), - struct gc_tracer *tracer, - struct gc_heap *heap, - struct gc_trace_worker *worker) { - f(tracer, heap, worker, NULL); -} - static inline void tracer_visit(struct gc_edge edge, struct gc_heap *heap, void *trace_data) GC_ALWAYS_INLINE; static inline void tracer_visit(struct gc_edge edge, struct gc_heap *heap, void *trace_data) { struct gc_trace_worker *worker = trace_data; - if (trace_edge(heap, edge)) + if (trace_edge(heap, edge, worker)) gc_trace_worker_enqueue(worker, gc_edge_ref(edge)); } @@ -364,7 +376,7 @@ static void trace_and_enqueue_locally(struct gc_edge edge, struct gc_heap *heap, void *data) { struct gc_mutator *mut = data; - if (trace_edge(heap, edge)) + if (trace_edge(heap, edge, NULL)) mutator_mark_buf_push(&mut->mark_buf, gc_edge_ref(edge)); } @@ -396,7 +408,7 @@ trace_conservative_ref_and_enqueue_locally(struct gc_conservative_ref ref, static void trace_and_enqueue_globally(struct gc_edge edge, struct gc_heap *heap, void *unused) { - if (trace_edge(heap, edge)) + if (trace_edge(heap, edge, NULL)) gc_tracer_enqueue_root(&heap->tracer, gc_edge_ref(edge)); } @@ -643,7 +655,7 @@ trace_mutator_roots_after_stop(struct gc_heap *heap) { atomic_store(&heap->mutator_trace_list, NULL); for (struct gc_mutator *mut = heap->inactive_mutators; mut; mut = mut->next) { - nofl_finish_sweeping_in_block(&mut->allocator, heap_nofl_space(heap)); + nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap)); trace_mutator_roots_with_lock(mut); } } @@ -719,7 +731,7 @@ pause_mutator_for_collection_with_lock(struct gc_mutator *mut) { struct gc_heap *heap = mutator_heap(mut); GC_ASSERT(mutators_are_stopping(heap)); MUTATOR_EVENT(mut, mutator_stopping); - nofl_finish_sweeping_in_block(&mut->allocator, heap_nofl_space(heap)); + nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap)); gc_stack_capture_hot(&mut->stack); if (mutator_should_mark_while_stopping(mut)) // No need to collect results in mark buf; we can enqueue roots directly. @@ -760,7 +772,7 @@ static double heap_last_gc_yield(struct gc_heap *heap) { struct nofl_space *nofl_space = heap_nofl_space(heap); size_t nofl_yield = nofl_space_yield(nofl_space); - size_t evacuation_reserve = nofl_space_evacuation_reserve(nofl_space); + size_t evacuation_reserve = nofl_space_evacuation_reserve_bytes(nofl_space); // FIXME: Size nofl evacuation reserve based on size of nofl space, // not heap size. size_t minimum_evacuation_reserve = @@ -1030,7 +1042,8 @@ collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind) { DEBUG("last gc yield: %f; fragmentation: %f\n", yield, fragmentation); detect_out_of_memory(heap); trace_pinned_roots_after_stop(heap); - nofl_space_prepare_for_evacuation(nofl_space, gc_kind); + if (gc_kind == GC_COLLECTION_COMPACTING) + nofl_space_prepare_evacuation(nofl_space); trace_roots_after_stop(heap); HEAP_EVENT(heap, roots_traced); gc_tracer_trace(&heap->tracer);