wingo pushed a commit to branch wip-whippet in repository guile. commit 1ecb45a437df45ae2ae894639e91e999c9c50813 Author: Andy Wingo <wi...@igalia.com> AuthorDate: Wed Oct 2 21:25:09 2024 +0200
Switch mmc to field-logging write barrier Instead of the card table, use metadata bytes for field-logging. More precision should lead to less work during the pause. --- api/mmc-attrs.h | 4 +- src/field-set.h | 205 +++++++++++++++++++++++++++++++++++++++++++++++ src/large-object-space.h | 77 +++++++----------- src/mmc.c | 77 +++++++++++------- src/nofl-space.h | 180 +++++++++++++++++++++-------------------- src/root.h | 20 ++--- 6 files changed, 383 insertions(+), 180 deletions(-) diff --git a/api/mmc-attrs.h b/api/mmc-attrs.h index f527c127b..5d4dcb490 100644 --- a/api/mmc-attrs.h +++ b/api/mmc-attrs.h @@ -56,7 +56,7 @@ static inline uint8_t gc_old_generation_check_alloc_table_bit_pattern(void) { static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t obj_size) { if (GC_GENERATIONAL) { if (obj_size <= gc_allocator_large_threshold()) - return GC_WRITE_BARRIER_CARD; + return GC_WRITE_BARRIER_FIELD; return GC_WRITE_BARRIER_SLOW; } return GC_WRITE_BARRIER_NONE; @@ -71,7 +71,7 @@ static inline size_t gc_write_barrier_card_size(void) { } static inline size_t gc_write_barrier_field_table_alignment(void) { GC_ASSERT(GC_GENERATIONAL); - return 4 * 1024 * 1024; + return gc_allocator_alloc_table_alignment(); } static inline size_t gc_write_barrier_field_fields_per_byte(void) { GC_ASSERT(GC_GENERATIONAL); diff --git a/src/field-set.h b/src/field-set.h new file mode 100644 index 000000000..c7ddffd08 --- /dev/null +++ b/src/field-set.h @@ -0,0 +1,205 @@ +#ifndef FIELD_SET_H +#define FIELD_SET_H + +#include <pthread.h> +#include <stdatomic.h> +#include <stdlib.h> + +#include "assert.h" +#include "gc-edge.h" +#include "gc-lock.h" +#include "tracer.h" + +#define GC_EDGE_BUFFER_CAPACITY 510 + +struct gc_edge_buffer { + struct gc_edge_buffer *next; + size_t size; + struct gc_edge edges[GC_EDGE_BUFFER_CAPACITY]; +}; + +// Lock-free. +struct gc_edge_buffer_list { + struct gc_edge_buffer *head; +}; + +// With a lock. +struct gc_edge_buffer_stack { + struct gc_edge_buffer_list list; +}; + +struct gc_field_set { + struct gc_edge_buffer_list full; + struct gc_edge_buffer_stack partly_full; + struct gc_edge_buffer_list empty; + size_t count; + pthread_mutex_t lock; +}; + +struct gc_field_set_writer { + struct gc_edge_buffer *buf; + struct gc_field_set *set; +}; + +static void +gc_edge_buffer_list_push(struct gc_edge_buffer_list *list, + struct gc_edge_buffer *buf) { + struct gc_edge_buffer *next = + atomic_load_explicit(&list->head, memory_order_relaxed); + do { + buf->next = next; + } while (!atomic_compare_exchange_weak_explicit(&list->head, &next, buf, + memory_order_acq_rel, + memory_order_acquire)); +} + +static struct gc_edge_buffer* +gc_edge_buffer_list_pop(struct gc_edge_buffer_list *list) { + struct gc_edge_buffer *head = + atomic_load_explicit(&list->head, memory_order_acquire); + struct gc_edge_buffer *next; + do { + if (!head) return NULL; + next = head->next; + } while (!atomic_compare_exchange_weak_explicit(&list->head, &head, next, + memory_order_acq_rel, + memory_order_acquire)); + head->next = NULL; + return head; +} + +static void +gc_edge_buffer_stack_push(struct gc_edge_buffer_stack *stack, + struct gc_edge_buffer *buf, + const struct gc_lock *lock) { + buf->next = stack->list.head; + stack->list.head = buf; +} + +static struct gc_edge_buffer* +gc_edge_buffer_stack_pop(struct gc_edge_buffer_stack *stack, + const struct gc_lock *lock) { + struct gc_edge_buffer *head = stack->list.head; + if (head) { + stack->list.head = head->next; + head->next = NULL; + } + return head; +} + +static void +gc_field_set_init(struct gc_field_set *set) { + memset(set, 0, sizeof(*set)); + pthread_mutex_init(&set->lock, NULL); +} + +static struct gc_edge_buffer* +gc_field_set_acquire_buffer(struct gc_field_set *set) { + struct gc_edge_buffer *ret; + + ret = gc_edge_buffer_list_pop(&set->empty); + if (ret) return ret; + + struct gc_lock lock = gc_lock_acquire(&set->lock); + ret = gc_edge_buffer_stack_pop(&set->partly_full, &lock); + gc_lock_release(&lock); + if (ret) return ret; + + // atomic inc count + ret = malloc(sizeof(*ret)); + if (!ret) { + perror("Failed to allocate remembered set"); + GC_CRASH(); + } + memset(ret, 0, sizeof(*ret)); + return ret; +} + +static void +gc_field_set_release_buffer(struct gc_field_set *set, + struct gc_edge_buffer *buf) { + if (buf->size == GC_EDGE_BUFFER_CAPACITY) { + gc_edge_buffer_list_push(&set->full, buf); + } else { + struct gc_lock lock = gc_lock_acquire(&set->lock); + gc_edge_buffer_stack_push(&set->partly_full, buf, &lock); + gc_lock_release(&lock); + } +} + +static void +gc_field_set_add_roots(struct gc_field_set *set, struct gc_tracer *tracer) { + struct gc_edge_buffer *buf; + for (buf = set->partly_full.list.head; buf; buf = buf->next) + gc_tracer_add_root(tracer, gc_root_edge_buffer(buf)); + for (buf = set->full.head; buf; buf = buf->next) + gc_tracer_add_root(tracer, gc_root_edge_buffer(buf)); +} + +static void +gc_field_set_clear(struct gc_field_set *set, + void (*forget_edge)(struct gc_edge, struct gc_heap*), + struct gc_heap *heap) { + struct gc_edge_buffer *partly_full = set->partly_full.list.head; + struct gc_edge_buffer *full = set->full.head; + // Clear the full and partly full sets now so that if a collector + // wanted to it could re-add an edge to the remembered set. + set->partly_full.list.head = NULL; + set->full.head = NULL; + struct gc_edge_buffer *buf; + for (buf = partly_full; buf; buf = buf->next) { + for (size_t i = 0; i < buf->size; i++) + forget_edge(buf->edges[i], heap); + buf->size = 0; + gc_edge_buffer_list_push(&set->empty, buf); + } + for (buf = full; buf; buf = buf->next) { + for (size_t i = 0; i < buf->size; i++) + forget_edge(buf->edges[i], heap); + buf->size = 0; + gc_edge_buffer_list_push(&set->empty, buf); + } +} + +static inline void +gc_field_set_trace_edge_buffer(struct gc_field_set *set, + struct gc_edge_buffer *buf, + void (*tracer_visit)(struct gc_edge, + struct gc_heap*, + void *data), + struct gc_heap *heap, + struct gc_trace_worker *worker) { + for (size_t i = 0; i < buf->size; i++) + tracer_visit(buf->edges[i], heap, worker); +} + +static void +gc_field_set_writer_release_buffer(struct gc_field_set_writer *writer) { + if (writer->buf) { + gc_field_set_release_buffer(writer->set, writer->buf); + writer->buf = NULL; + } +} + +static void +gc_field_set_writer_init(struct gc_field_set_writer *writer, + struct gc_field_set *set) { + writer->set = set; + writer->buf = NULL; +} + +static void +gc_field_set_writer_add_edge(struct gc_field_set_writer *writer, + struct gc_edge edge) { + struct gc_edge_buffer *buf = writer->buf; + if (GC_UNLIKELY(!buf)) + writer->buf = buf = gc_field_set_acquire_buffer(writer->set); + GC_ASSERT(buf->size < GC_EDGE_BUFFER_CAPACITY); + buf->edges[buf->size++] = edge; + if (GC_UNLIKELY(buf->size == GC_EDGE_BUFFER_CAPACITY)) { + gc_edge_buffer_list_push(&writer->set->full, buf); + writer->buf = NULL; + } +} + +#endif // FIELD_SET_H diff --git a/src/large-object-space.h b/src/large-object-space.h index 323c3f2f8..4c7277797 100644 --- a/src/large-object-space.h +++ b/src/large-object-space.h @@ -36,7 +36,7 @@ struct large_object_space { struct address_set from_space; struct address_set to_space; struct address_set survivor_space; - struct address_set remembered_set; + struct address_set remembered_edges; struct address_set free_space; struct address_map object_pages; // for each object: size in pages. struct address_map predecessors; // subsequent addr -> object addr @@ -50,7 +50,7 @@ static int large_object_space_init(struct large_object_space *space, address_set_init(&space->from_space); address_set_init(&space->to_space); address_set_init(&space->survivor_space); - address_set_init(&space->remembered_set); + address_set_init(&space->remembered_edges); address_set_init(&space->free_space); address_map_init(&space->object_pages); address_map_init(&space->predecessors); @@ -77,49 +77,6 @@ static inline int large_object_space_contains(struct large_object_space *space, return ret; } -struct large_object_space_trace_remembered_data { - void (*trace)(struct gc_ref, struct gc_heap*); - struct gc_heap *heap; -}; - -static void large_object_space_trace_one_remembered(uintptr_t addr, - void *data) { - struct gc_ref ref = gc_ref(addr); - gc_object_clear_remembered_nonatomic(ref); - struct large_object_space_trace_remembered_data *vdata = data; - vdata->trace(ref, vdata->heap); -} - -static void -large_object_space_clear_remembered_set(struct large_object_space *space) { - address_set_clear(&space->remembered_set); -} - -static void -large_object_space_trace_remembered_set(struct large_object_space *space, - void (*trace)(struct gc_ref, - struct gc_heap*), - struct gc_heap *heap) { - struct large_object_space_trace_remembered_data vdata = { trace, heap }; - - if (!GC_GENERATIONAL) - return; - address_set_for_each(&space->remembered_set, - large_object_space_trace_one_remembered, &vdata); - large_object_space_clear_remembered_set(space); -} - -static void -large_object_space_remember_object(struct large_object_space *space, - struct gc_ref ref) { - GC_ASSERT(GC_GENERATIONAL); - uintptr_t addr = gc_ref_value(ref); - pthread_mutex_lock(&space->lock); - GC_ASSERT(!address_set_contains(&space->remembered_set, addr)); - address_set_add(&space->remembered_set, addr); - pthread_mutex_unlock(&space->lock); -} - static void large_object_space_flip_survivor(uintptr_t addr, void *data) { struct large_object_space *space = data; @@ -176,17 +133,41 @@ static int large_object_space_is_copied(struct large_object_space *space, return copied; } +static int +large_object_space_is_survivor_with_lock(struct large_object_space *space, + struct gc_ref ref) { + return address_set_contains(&space->survivor_space, gc_ref_value(ref)); +} + static int large_object_space_is_survivor(struct large_object_space *space, struct gc_ref ref) { GC_ASSERT(large_object_space_contains(space, ref)); - int old = 0; - uintptr_t addr = gc_ref_value(ref); pthread_mutex_lock(&space->lock); - old = address_set_contains(&space->survivor_space, addr); + int old = large_object_space_is_survivor_with_lock(space, ref); pthread_mutex_unlock(&space->lock); return old; } +static int large_object_space_remember_edge(struct large_object_space *space, + struct gc_ref obj, + struct gc_edge edge) { + int remembered = 0; + uintptr_t edge_addr = (uintptr_t)gc_edge_loc(edge); + pthread_mutex_lock(&space->lock); + if (large_object_space_is_survivor_with_lock(space, obj) + && !address_set_contains(&space->remembered_edges, edge_addr)) { + address_set_add(&space->remembered_edges, edge_addr); + remembered = 1; + } + pthread_mutex_unlock(&space->lock); + return remembered; +} + +static void +large_object_space_clear_remembered_edges(struct large_object_space *space) { + address_set_clear(&space->remembered_edges); +} + static int large_object_space_mark_object(struct large_object_space *space, struct gc_ref ref) { return large_object_space_copy(space, ref); diff --git a/src/mmc.c b/src/mmc.c index ce1571118..b1f4238a7 100644 --- a/src/mmc.c +++ b/src/mmc.c @@ -11,6 +11,7 @@ #include "background-thread.h" #include "debug.h" +#include "field-set.h" #include "gc-align.h" #include "gc-inline.h" #include "gc-platform.h" @@ -33,6 +34,7 @@ struct gc_heap { struct nofl_space nofl_space; struct large_object_space large_object_space; struct gc_extern_space *extern_space; + struct gc_field_set remembered_set; size_t large_object_pages; pthread_mutex_t lock; pthread_cond_t collector_cond; @@ -72,6 +74,7 @@ struct gc_heap { struct gc_mutator { struct nofl_allocator allocator; + struct gc_field_set_writer logger; struct gc_heap *heap; struct gc_stack stack; struct gc_mutator_roots *roots; @@ -188,6 +191,7 @@ add_mutator(struct gc_heap *heap, struct gc_mutator *mut) { mut->event_listener_data = heap->event_listener.mutator_added(heap->event_listener_data); nofl_allocator_reset(&mut->allocator); + gc_field_set_writer_init(&mut->logger, &heap->remembered_set); heap_lock(heap); // We have no roots. If there is a GC currently in progress, we have // nothing to add. Just wait until it's done. @@ -207,6 +211,8 @@ add_mutator(struct gc_heap *heap, struct gc_mutator *mut) { static void remove_mutator(struct gc_heap *heap, struct gc_mutator *mut) { nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap)); + if (GC_GENERATIONAL) + gc_field_set_writer_release_buffer(&mut->logger); MUTATOR_EVENT(mut, mutator_removed); mut->heap = NULL; heap_lock(heap); @@ -360,12 +366,9 @@ trace_root(struct gc_root root, struct gc_heap *heap, case GC_ROOT_KIND_EDGE: tracer_visit(root.edge, heap, worker); break; - case GC_ROOT_KIND_REMEMBERED_OBJECT: - trace_one(root.ref, heap, worker); - break; - case GC_ROOT_KIND_REMEMBERED_SLAB: - nofl_space_trace_remembered_slab(heap_nofl_space(heap), root.idx, - trace_one, heap, worker); + case GC_ROOT_KIND_EDGE_BUFFER: + gc_field_set_trace_edge_buffer(&heap->remembered_set, root.edge_buffer, + tracer_visit, heap, worker); break; default: GC_CRASH(); @@ -633,25 +636,27 @@ enqueue_root_edge(struct gc_edge edge, struct gc_heap *heap, void *unused) { gc_tracer_add_root(&heap->tracer, gc_root_edge(edge)); } -static void -enqueue_remembered_object(struct gc_ref ref, struct gc_heap *heap) { - gc_tracer_add_root(&heap->tracer, gc_root_remembered_object(ref)); -} - static void enqueue_generational_roots(struct gc_heap *heap, enum gc_collection_kind gc_kind) { if (!GC_GENERATIONAL) return; - if (gc_kind == GC_COLLECTION_MINOR) { - for (size_t i = 0; i < heap_nofl_space(heap)->nslabs; i++) - gc_tracer_add_root(&heap->tracer, gc_root_remembered_slab(i)); - large_object_space_trace_remembered_set(heap_large_object_space(heap), - enqueue_remembered_object, - heap); - } else { - nofl_space_clear_remembered_set(heap_nofl_space(heap)); - large_object_space_clear_remembered_set(heap_large_object_space(heap)); - } + if (gc_kind == GC_COLLECTION_MINOR) + gc_field_set_add_roots(&heap->remembered_set, &heap->tracer); +} + +static inline void +forget_remembered_edge(struct gc_edge edge, struct gc_heap *heap) { + struct nofl_space *space = heap_nofl_space(heap); + if (nofl_space_contains_edge(space, edge)) + nofl_space_forget_edge(space, edge); + // Otherwise the edge is in the lospace, whose remembered edges are + // cleared in bulk. +} + +static void +clear_remembered_set(struct gc_heap *heap) { + gc_field_set_clear(&heap->remembered_set, forget_remembered_edge, heap); + large_object_space_clear_remembered_edges(heap_large_object_space(heap)); } static void @@ -768,6 +773,7 @@ collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind, HEAP_EVENT(heap, finalizers_traced); sweep_ephemerons(heap); gc_tracer_release(&heap->tracer); + clear_remembered_set(heap); nofl_space_finish_gc(nofl_space, gc_kind); large_object_space_finish_gc(lospace, is_minor); gc_extern_space_finish_gc(exspace, is_minor); @@ -792,6 +798,8 @@ trigger_collection(struct gc_mutator *mut, int prev_kind = -1; gc_stack_capture_hot(&mut->stack); nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap)); + if (GC_GENERATIONAL) + gc_field_set_writer_release_buffer(&mut->logger); heap_lock(heap); while (mutators_are_stopping(heap)) prev_kind = pause_mutator_for_collection(heap, mut); @@ -815,6 +823,8 @@ gc_safepoint_slow(struct gc_mutator *mut) { struct gc_heap *heap = mutator_heap(mut); gc_stack_capture_hot(&mut->stack); nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap)); + if (GC_GENERATIONAL) + gc_field_set_writer_release_buffer(&mut->logger); heap_lock(heap); while (mutators_are_stopping(mutator_heap(mut))) pause_mutator_for_collection(heap, mut); @@ -900,14 +910,16 @@ void gc_write_barrier_slow(struct gc_mutator *mut, struct gc_ref obj, size_t obj_size, struct gc_edge edge, struct gc_ref new_val) { + GC_ASSERT(gc_ref_is_heap_object(new_val)); if (!GC_GENERATIONAL) return; - GC_ASSERT(obj_size > gc_allocator_large_threshold()); - struct gc_heap *heap = mutator_heap(mut); - struct large_object_space *space = heap_large_object_space(heap); - if (!large_object_space_is_survivor(space, obj)) + if (gc_object_is_old_generation_slow(mut, new_val)) return; - if (gc_object_set_remembered(obj)) - large_object_space_remember_object(space, obj); + struct gc_heap *heap = mutator_heap(mut); + if ((obj_size <= gc_allocator_large_threshold()) + ? nofl_space_remember_edge(heap_nofl_space(heap), obj, edge) + : large_object_space_remember_edge(heap_large_object_space(heap), + obj, edge)) + gc_field_set_writer_add_edge(&mut->logger, edge); } struct gc_ephemeron* @@ -1032,6 +1044,7 @@ static int heap_init(struct gc_heap *heap, const struct gc_options *options) { // *heap is already initialized to 0. + gc_field_set_init(&heap->remembered_set); pthread_mutex_init(&heap->lock, NULL); pthread_cond_init(&heap->mutator_cond, NULL); pthread_cond_init(&heap->collector_cond, NULL); @@ -1080,9 +1093,11 @@ gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, GC_ASSERT_EQ(gc_allocator_alloc_table_begin_pattern(), NOFL_METADATA_BYTE_YOUNG); GC_ASSERT_EQ(gc_allocator_alloc_table_end_pattern(), NOFL_METADATA_BYTE_END); if (GC_GENERATIONAL) { - GC_ASSERT_EQ(gc_write_barrier_card_table_alignment(), NOFL_SLAB_SIZE); - GC_ASSERT_EQ(gc_write_barrier_card_size(), - NOFL_BLOCK_SIZE / NOFL_REMSET_BYTES_PER_BLOCK); + GC_ASSERT_EQ(gc_write_barrier_field_table_alignment(), NOFL_SLAB_SIZE); + GC_ASSERT_EQ(gc_write_barrier_field_fields_per_byte(), + NOFL_GRANULE_SIZE / sizeof(uintptr_t)); + GC_ASSERT_EQ(gc_write_barrier_field_first_bit_pattern(), + NOFL_METADATA_BYTE_LOGGED_0); } *heap = calloc(1, sizeof(struct gc_heap)); @@ -1139,6 +1154,8 @@ static void deactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) { GC_ASSERT(mut->next == NULL); nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap)); + if (GC_GENERATIONAL) + gc_field_set_writer_release_buffer(&mut->logger); heap_lock(heap); heap->inactive_mutator_count++; gc_stack_capture_hot(&mut->stack); diff --git a/src/nofl-space.h b/src/nofl-space.h index e47162853..9759b3d8e 100644 --- a/src/nofl-space.h +++ b/src/nofl-space.h @@ -45,10 +45,10 @@ STATIC_ASSERT_EQ(NOFL_MEDIUM_OBJECT_THRESHOLD, #define NOFL_NONMETA_BLOCKS_PER_SLAB (NOFL_BLOCKS_PER_SLAB - NOFL_META_BLOCKS_PER_SLAB) #define NOFL_METADATA_BYTES_PER_SLAB (NOFL_NONMETA_BLOCKS_PER_SLAB * NOFL_METADATA_BYTES_PER_BLOCK) #define NOFL_SLACK_METADATA_BYTES_PER_SLAB (NOFL_META_BLOCKS_PER_SLAB * NOFL_METADATA_BYTES_PER_BLOCK) -#define NOFL_REMSET_BYTES_PER_BLOCK (NOFL_SLACK_METADATA_BYTES_PER_SLAB / NOFL_BLOCKS_PER_SLAB) -#define NOFL_REMSET_BYTES_PER_SLAB (NOFL_REMSET_BYTES_PER_BLOCK * NOFL_NONMETA_BLOCKS_PER_SLAB) -#define NOFL_SLACK_REMSET_BYTES_PER_SLAB (NOFL_REMSET_BYTES_PER_BLOCK * NOFL_META_BLOCKS_PER_SLAB) -#define NOFL_SUMMARY_BYTES_PER_BLOCK (NOFL_SLACK_REMSET_BYTES_PER_SLAB / NOFL_BLOCKS_PER_SLAB) +#define NOFL_VESTIGIAL_BYTES_PER_BLOCK (NOFL_SLACK_METADATA_BYTES_PER_SLAB / NOFL_BLOCKS_PER_SLAB) +#define NOFL_VESTIGIAL_BYTES_PER_SLAB (NOFL_VESTIGIAL_BYTES_PER_BLOCK * NOFL_NONMETA_BLOCKS_PER_SLAB) +#define NOFL_SLACK_VESTIGIAL_BYTES_PER_SLAB (NOFL_VESTIGIAL_BYTES_PER_BLOCK * NOFL_META_BLOCKS_PER_SLAB) +#define NOFL_SUMMARY_BYTES_PER_BLOCK (NOFL_SLACK_VESTIGIAL_BYTES_PER_SLAB / NOFL_BLOCKS_PER_SLAB) #define NOFL_SUMMARY_BYTES_PER_SLAB (NOFL_SUMMARY_BYTES_PER_BLOCK * NONMETA_BLOCKS_PER_SLAB) #define NOFL_SLACK_SUMMARY_BYTES_PER_SLAB (NOFL_SUMMARY_BYTES_PER_BLOCK * NOFL_META_BLOCKS_PER_SLAB) #define NOFL_HEADER_BYTES_PER_SLAB NOFL_SLACK_SUMMARY_BYTES_PER_SLAB @@ -127,7 +127,7 @@ struct nofl_block_ref { struct nofl_slab { struct nofl_slab_header header; struct nofl_block_summary summaries[NOFL_NONMETA_BLOCKS_PER_SLAB]; - uint8_t remembered_set[NOFL_REMSET_BYTES_PER_SLAB]; + uint8_t unused[NOFL_VESTIGIAL_BYTES_PER_SLAB]; uint8_t metadata[NOFL_METADATA_BYTES_PER_SLAB]; struct nofl_block blocks[NOFL_NONMETA_BLOCKS_PER_SLAB]; }; @@ -297,8 +297,6 @@ nofl_block_set_mark(uintptr_t addr) { } #define NOFL_GRANULES_PER_BLOCK (NOFL_BLOCK_SIZE / NOFL_GRANULE_SIZE) -#define NOFL_GRANULES_PER_REMSET_BYTE \ - (NOFL_GRANULES_PER_BLOCK / NOFL_REMSET_BYTES_PER_BLOCK) static struct nofl_block_summary* nofl_block_summary_for_addr(uintptr_t addr) { @@ -909,67 +907,75 @@ nofl_space_set_ephemeron_flag(struct gc_ref ref) { struct gc_trace_worker; -// Note that it's quite possible (and even likely) that any given remset -// byte doesn't hold any roots, if all stores were to nursery objects. -STATIC_ASSERT_EQ(NOFL_GRANULES_PER_REMSET_BYTE % 8, 0); -static void -nofl_space_trace_card(struct nofl_space *space, struct nofl_slab *slab, - size_t card, - void (*trace_object)(struct gc_ref, - struct gc_heap*, - struct gc_trace_worker*), - struct gc_heap *heap, - struct gc_trace_worker *worker) { - uintptr_t first_addr_in_slab = (uintptr_t) &slab->blocks[0]; - size_t granule_base = card * NOFL_GRANULES_PER_REMSET_BYTE; - for (size_t granule_in_remset = 0; - granule_in_remset < NOFL_GRANULES_PER_REMSET_BYTE; - granule_in_remset += 8, granule_base += 8) { - uint64_t mark_bytes = load_eight_aligned_bytes(slab->metadata + granule_base); - mark_bytes &= space->sweep_mask; - while (mark_bytes) { - size_t granule_offset = count_zero_bytes(mark_bytes); - mark_bytes &= ~(((uint64_t)0xff) << (granule_offset * 8)); - size_t granule = granule_base + granule_offset; - uintptr_t addr = first_addr_in_slab + granule * NOFL_GRANULE_SIZE; - GC_ASSERT(nofl_metadata_byte_for_addr(addr) == &slab->metadata[granule]); - trace_object(gc_ref(addr), heap, worker); - } - } +static inline int +nofl_space_contains_address(struct nofl_space *space, uintptr_t addr) { + return extents_contain_addr(space->extents, addr); } -static void -nofl_space_trace_remembered_slab(struct nofl_space *space, - size_t slab_idx, - void (*trace_object)(struct gc_ref, - struct gc_heap*, - struct gc_trace_worker*), - struct gc_heap *heap, - struct gc_trace_worker *worker) { - GC_ASSERT(slab_idx < space->nslabs); - struct nofl_slab *slab = space->slabs[slab_idx]; - uint8_t *remset = slab->remembered_set; - for (size_t card_base = 0; - card_base < NOFL_REMSET_BYTES_PER_SLAB; - card_base += 8) { - uint64_t remset_bytes = load_eight_aligned_bytes(remset + card_base); - if (!remset_bytes) continue; - memset(remset + card_base, 0, 8); - while (remset_bytes) { - size_t card_offset = count_zero_bytes(remset_bytes); - remset_bytes &= ~(((uint64_t)0xff) << (card_offset * 8)); - nofl_space_trace_card(space, slab, card_base + card_offset, - trace_object, heap, worker); - } - } +static inline int +nofl_space_contains_conservative_ref(struct nofl_space *space, + struct gc_conservative_ref ref) { + return nofl_space_contains_address(space, gc_conservative_ref_value(ref)); +} + +static inline int +nofl_space_contains(struct nofl_space *space, struct gc_ref ref) { + return nofl_space_contains_address(space, gc_ref_value(ref)); +} + +static inline int +nofl_space_contains_edge(struct nofl_space *space, struct gc_edge edge) { + return nofl_space_contains_address(space, (uintptr_t)gc_edge_loc(edge)); +} + +static inline int +nofl_space_is_survivor(struct nofl_space *space, struct gc_ref ref) { + uint8_t *metadata = nofl_metadata_byte_for_object(ref); + uint8_t mask = NOFL_METADATA_BYTE_MARK_0 + | NOFL_METADATA_BYTE_MARK_1 | NOFL_METADATA_BYTE_MARK_2; + uint8_t byte = atomic_load_explicit(metadata, memory_order_relaxed); + return byte & mask; +} + +static uint8_t* +nofl_field_logged_byte(struct gc_edge edge) { + return nofl_metadata_byte_for_addr((uintptr_t)gc_edge_loc(edge)); +} + +static uint8_t +nofl_field_logged_bit(struct gc_edge edge) { + GC_ASSERT_EQ(sizeof(uintptr_t) * 2, NOFL_GRANULE_SIZE); + size_t field = ((uintptr_t)gc_edge_loc(edge)) / sizeof(uintptr_t); + return NOFL_METADATA_BYTE_LOGGED_0 << (field % 2); +} + +static int +nofl_space_remember_edge(struct nofl_space *space, struct gc_ref obj, + struct gc_edge edge) { + GC_ASSERT(nofl_space_contains(space, obj)); + if (!GC_GENERATIONAL) return 0; + if (!nofl_space_is_survivor(space, obj)) + return 0; + uint8_t* loc = nofl_field_logged_byte(edge); + uint8_t bit = nofl_field_logged_bit(edge); + uint8_t byte = atomic_load_explicit(loc, memory_order_acquire); + do { + if (byte & bit) return 0; + } while (!atomic_compare_exchange_weak_explicit(loc, &byte, byte|bit, + memory_order_acq_rel, + memory_order_acquire)); + return 1; } static void -nofl_space_clear_remembered_set(struct nofl_space *space) { - if (!GC_GENERATIONAL) return; - for (size_t slab = 0; slab < space->nslabs; slab++) { - memset(space->slabs[slab]->remembered_set, 0, NOFL_REMSET_BYTES_PER_SLAB); - } +nofl_space_forget_edge(struct nofl_space *space, struct gc_edge edge) { + GC_ASSERT(nofl_space_contains_edge(space, edge)); + GC_ASSERT(GC_GENERATIONAL); + uint8_t* loc = nofl_field_logged_byte(edge); + // Clear both logged bits. + uint8_t bits = NOFL_METADATA_BYTE_LOGGED_0 | NOFL_METADATA_BYTE_LOGGED_1; + uint8_t byte = atomic_load_explicit(loc, memory_order_acquire); + atomic_store_explicit(loc, byte & ~bits, memory_order_release); } static void @@ -1431,13 +1437,29 @@ nofl_space_pin_object(struct nofl_space *space, struct gc_ref ref) { memory_order_acquire)); } -static inline int -nofl_space_is_survivor(struct nofl_space *space, struct gc_ref ref) { - uint8_t *metadata = nofl_metadata_byte_for_object(ref); - uint8_t mask = NOFL_METADATA_BYTE_MARK_0 - | NOFL_METADATA_BYTE_MARK_1 | NOFL_METADATA_BYTE_MARK_2; - uint8_t byte = atomic_load_explicit(metadata, memory_order_relaxed); - return byte & mask; +static inline void +clear_logged_bits_in_evacuated_object(uint8_t *metadata, size_t count) { + // On a major collection, it could be that we evacuate an object that + // has one or more fields in the old-to-new remembered set. Because + // the young generation is empty after a major collection, we know the + // old-to-new remembered set will be empty also. To clear the + // remembered set, we call gc_field_set_clear, which will end up + // visiting all remembered edges and clearing their logged bits. But + // that doesn't work for evacuated objects, because their edges move: + // gc_field_set_clear will frob the pre-evacuation metadata bytes of + // the object. So here we explicitly clear logged bits for evacuated + // objects. That the bits for the pre-evacuation location are also + // frobbed by gc_field_set_clear doesn't cause a problem, as that + // memory will be swept and cleared later. + // + // This concern doesn't apply to minor collections: there we will + // never evacuate an object in the remembered set, because old objects + // aren't traced during a minor collection. + uint8_t mask = NOFL_METADATA_BYTE_LOGGED_0 | NOFL_METADATA_BYTE_LOGGED_1; + for (size_t i = 0; i < count; i++) { + if (metadata[i] & mask) + metadata[i] &= ~mask; + } } static inline int @@ -1472,6 +1494,8 @@ nofl_space_evacuate(struct nofl_space *space, uint8_t *metadata, uint8_t byte, // the object's fields need to be traced. uint8_t *new_metadata = nofl_metadata_byte_for_object(new_ref); memcpy(new_metadata + 1, metadata + 1, object_granules - 1); + if (GC_GENERATIONAL) + clear_logged_bits_in_evacuated_object(new_metadata, object_granules); gc_edge_update(edge, new_ref); return nofl_space_set_nonempty_mark(space, new_metadata, byte, new_ref); @@ -1523,22 +1547,6 @@ nofl_space_evacuate_or_mark_object(struct nofl_space *space, return nofl_space_set_nonempty_mark(space, metadata, byte, old_ref); } -static inline int -nofl_space_contains_address(struct nofl_space *space, uintptr_t addr) { - return extents_contain_addr(space->extents, addr); -} - -static inline int -nofl_space_contains_conservative_ref(struct nofl_space *space, - struct gc_conservative_ref ref) { - return nofl_space_contains_address(space, gc_conservative_ref_value(ref)); -} - -static inline int -nofl_space_contains(struct nofl_space *space, struct gc_ref ref) { - return nofl_space_contains_address(space, gc_ref_value(ref)); -} - static inline int nofl_space_forward_if_evacuated(struct nofl_space *space, struct gc_edge edge, diff --git a/src/root.h b/src/root.h index 46e019b06..4fc705e61 100644 --- a/src/root.h +++ b/src/root.h @@ -7,6 +7,7 @@ struct gc_ephemeron; struct gc_heap; struct gc_mutator; +struct gc_edge_buffer; enum gc_root_kind { GC_ROOT_KIND_NONE, @@ -16,8 +17,7 @@ enum gc_root_kind { GC_ROOT_KIND_CONSERVATIVE_POSSIBLY_INTERIOR_EDGES, GC_ROOT_KIND_RESOLVED_EPHEMERONS, GC_ROOT_KIND_EDGE, - GC_ROOT_KIND_REMEMBERED_OBJECT, - GC_ROOT_KIND_REMEMBERED_SLAB, + GC_ROOT_KIND_EDGE_BUFFER, }; struct gc_root { @@ -28,8 +28,7 @@ struct gc_root { struct gc_ephemeron *resolved_ephemerons; struct extent_range range; struct gc_edge edge; - struct gc_ref ref; - size_t idx; + struct gc_edge_buffer *edge_buffer; }; }; @@ -73,16 +72,9 @@ gc_root_edge(struct gc_edge edge) { } static inline struct gc_root -gc_root_remembered_object(struct gc_ref ref) { - struct gc_root ret = { GC_ROOT_KIND_REMEMBERED_OBJECT }; - ret.ref = ref; - return ret; -} - -static inline struct gc_root -gc_root_remembered_slab(size_t idx) { - struct gc_root ret = { GC_ROOT_KIND_REMEMBERED_SLAB }; - ret.idx = idx; +gc_root_edge_buffer(struct gc_edge_buffer *buf) { + struct gc_root ret = { GC_ROOT_KIND_EDGE_BUFFER }; + ret.edge_buffer = buf; return ret; }