wingo pushed a commit to branch wip-whippet
in repository guile.

commit 1ecb45a437df45ae2ae894639e91e999c9c50813
Author: Andy Wingo <wi...@igalia.com>
AuthorDate: Wed Oct 2 21:25:09 2024 +0200

    Switch mmc to field-logging write barrier
    
    Instead of the card table, use metadata bytes for field-logging.  More
    precision should lead to less work during the pause.
---
 api/mmc-attrs.h          |   4 +-
 src/field-set.h          | 205 +++++++++++++++++++++++++++++++++++++++++++++++
 src/large-object-space.h |  77 +++++++-----------
 src/mmc.c                |  77 +++++++++++-------
 src/nofl-space.h         | 180 +++++++++++++++++++++--------------------
 src/root.h               |  20 ++---
 6 files changed, 383 insertions(+), 180 deletions(-)

diff --git a/api/mmc-attrs.h b/api/mmc-attrs.h
index f527c127b..5d4dcb490 100644
--- a/api/mmc-attrs.h
+++ b/api/mmc-attrs.h
@@ -56,7 +56,7 @@ static inline uint8_t 
gc_old_generation_check_alloc_table_bit_pattern(void) {
 static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t 
obj_size) {
   if (GC_GENERATIONAL) {
     if (obj_size <= gc_allocator_large_threshold())
-      return GC_WRITE_BARRIER_CARD;
+      return GC_WRITE_BARRIER_FIELD;
     return GC_WRITE_BARRIER_SLOW;
   }
   return GC_WRITE_BARRIER_NONE;
@@ -71,7 +71,7 @@ static inline size_t gc_write_barrier_card_size(void) {
 }
 static inline size_t gc_write_barrier_field_table_alignment(void) {
   GC_ASSERT(GC_GENERATIONAL);
-  return 4 * 1024 * 1024;
+  return gc_allocator_alloc_table_alignment();
 }
 static inline size_t gc_write_barrier_field_fields_per_byte(void) {
   GC_ASSERT(GC_GENERATIONAL);
diff --git a/src/field-set.h b/src/field-set.h
new file mode 100644
index 000000000..c7ddffd08
--- /dev/null
+++ b/src/field-set.h
@@ -0,0 +1,205 @@
+#ifndef FIELD_SET_H
+#define FIELD_SET_H
+
+#include <pthread.h>
+#include <stdatomic.h>
+#include <stdlib.h>
+
+#include "assert.h"
+#include "gc-edge.h"
+#include "gc-lock.h"
+#include "tracer.h"
+
+#define GC_EDGE_BUFFER_CAPACITY 510
+
+struct gc_edge_buffer {
+  struct gc_edge_buffer *next;
+  size_t size;
+  struct gc_edge edges[GC_EDGE_BUFFER_CAPACITY];
+};
+
+// Lock-free.
+struct gc_edge_buffer_list {
+  struct gc_edge_buffer *head;
+};
+
+// With a lock.
+struct gc_edge_buffer_stack {
+  struct gc_edge_buffer_list list;
+};
+
+struct gc_field_set {
+  struct gc_edge_buffer_list full;
+  struct gc_edge_buffer_stack partly_full;
+  struct gc_edge_buffer_list empty;
+  size_t count;
+  pthread_mutex_t lock;
+};
+
+struct gc_field_set_writer {
+  struct gc_edge_buffer *buf;
+  struct gc_field_set *set;
+};
+
+static void
+gc_edge_buffer_list_push(struct gc_edge_buffer_list *list,
+                         struct gc_edge_buffer *buf) {
+  struct gc_edge_buffer *next =
+    atomic_load_explicit(&list->head, memory_order_relaxed);
+  do {
+    buf->next = next;
+  } while (!atomic_compare_exchange_weak_explicit(&list->head, &next, buf,
+                                                  memory_order_acq_rel,
+                                                  memory_order_acquire));
+}
+
+static struct gc_edge_buffer*
+gc_edge_buffer_list_pop(struct gc_edge_buffer_list *list) {
+  struct gc_edge_buffer *head =
+    atomic_load_explicit(&list->head, memory_order_acquire);
+  struct gc_edge_buffer *next;
+  do {
+    if (!head) return NULL;
+    next = head->next;
+  } while (!atomic_compare_exchange_weak_explicit(&list->head, &head, next,
+                                                  memory_order_acq_rel,
+                                                  memory_order_acquire));
+  head->next = NULL;
+  return head;
+}
+
+static void
+gc_edge_buffer_stack_push(struct gc_edge_buffer_stack *stack,
+                          struct gc_edge_buffer *buf,
+                          const struct gc_lock *lock) {
+  buf->next = stack->list.head;
+  stack->list.head = buf;
+}
+
+static struct gc_edge_buffer*
+gc_edge_buffer_stack_pop(struct gc_edge_buffer_stack *stack,
+                         const struct gc_lock *lock) {
+  struct gc_edge_buffer *head = stack->list.head;
+  if (head) {
+    stack->list.head = head->next;
+    head->next = NULL;
+  }
+  return head;
+}
+
+static void
+gc_field_set_init(struct gc_field_set *set) {
+  memset(set, 0, sizeof(*set));
+  pthread_mutex_init(&set->lock, NULL);
+}
+
+static struct gc_edge_buffer*
+gc_field_set_acquire_buffer(struct gc_field_set *set) {
+  struct gc_edge_buffer *ret;
+
+  ret = gc_edge_buffer_list_pop(&set->empty);
+  if (ret) return ret;
+
+  struct gc_lock lock = gc_lock_acquire(&set->lock);
+  ret = gc_edge_buffer_stack_pop(&set->partly_full, &lock);
+  gc_lock_release(&lock);
+  if (ret) return ret;
+
+  // atomic inc count
+  ret = malloc(sizeof(*ret));
+  if (!ret) {
+    perror("Failed to allocate remembered set");
+    GC_CRASH();
+  }
+  memset(ret, 0, sizeof(*ret));
+  return ret;
+}
+
+static void
+gc_field_set_release_buffer(struct gc_field_set *set,
+                            struct gc_edge_buffer *buf) {
+  if (buf->size == GC_EDGE_BUFFER_CAPACITY) {
+    gc_edge_buffer_list_push(&set->full, buf);
+  } else {
+    struct gc_lock lock = gc_lock_acquire(&set->lock);
+    gc_edge_buffer_stack_push(&set->partly_full, buf, &lock);
+    gc_lock_release(&lock);
+  }
+}
+
+static void
+gc_field_set_add_roots(struct gc_field_set *set, struct gc_tracer *tracer) {
+  struct gc_edge_buffer *buf;
+  for (buf = set->partly_full.list.head; buf; buf = buf->next)
+    gc_tracer_add_root(tracer, gc_root_edge_buffer(buf));
+  for (buf = set->full.head; buf; buf = buf->next)
+    gc_tracer_add_root(tracer, gc_root_edge_buffer(buf));
+}
+
+static void
+gc_field_set_clear(struct gc_field_set *set,
+                   void (*forget_edge)(struct gc_edge, struct gc_heap*),
+                   struct gc_heap *heap) {
+  struct gc_edge_buffer *partly_full = set->partly_full.list.head;
+  struct gc_edge_buffer *full = set->full.head;
+  // Clear the full and partly full sets now so that if a collector
+  // wanted to it could re-add an edge to the remembered set.
+  set->partly_full.list.head = NULL;
+  set->full.head = NULL;
+  struct gc_edge_buffer *buf;
+  for (buf = partly_full; buf; buf = buf->next) {
+    for (size_t i = 0; i < buf->size; i++)
+      forget_edge(buf->edges[i], heap);
+    buf->size = 0;
+    gc_edge_buffer_list_push(&set->empty, buf);
+  }
+  for (buf = full; buf; buf = buf->next) {
+    for (size_t i = 0; i < buf->size; i++)
+      forget_edge(buf->edges[i], heap);
+    buf->size = 0;
+    gc_edge_buffer_list_push(&set->empty, buf);
+  }
+}
+
+static inline void
+gc_field_set_trace_edge_buffer(struct gc_field_set *set,
+                               struct gc_edge_buffer *buf,
+                               void (*tracer_visit)(struct gc_edge,
+                                                    struct gc_heap*,
+                                                    void *data),
+                               struct gc_heap *heap,
+                               struct gc_trace_worker *worker) {
+  for (size_t i = 0; i < buf->size; i++)
+    tracer_visit(buf->edges[i], heap, worker);
+}
+
+static void
+gc_field_set_writer_release_buffer(struct gc_field_set_writer *writer) {
+  if (writer->buf) {
+    gc_field_set_release_buffer(writer->set, writer->buf);
+    writer->buf = NULL;
+  }
+}
+
+static void
+gc_field_set_writer_init(struct gc_field_set_writer *writer,
+                         struct gc_field_set *set) {
+  writer->set = set;
+  writer->buf = NULL;
+}
+
+static void
+gc_field_set_writer_add_edge(struct gc_field_set_writer *writer,
+                             struct gc_edge edge) {
+  struct gc_edge_buffer *buf = writer->buf;
+  if (GC_UNLIKELY(!buf))
+    writer->buf = buf = gc_field_set_acquire_buffer(writer->set);
+  GC_ASSERT(buf->size < GC_EDGE_BUFFER_CAPACITY);
+  buf->edges[buf->size++] = edge;
+  if (GC_UNLIKELY(buf->size == GC_EDGE_BUFFER_CAPACITY)) {
+    gc_edge_buffer_list_push(&writer->set->full, buf);
+    writer->buf = NULL;
+  }
+}
+
+#endif // FIELD_SET_H
diff --git a/src/large-object-space.h b/src/large-object-space.h
index 323c3f2f8..4c7277797 100644
--- a/src/large-object-space.h
+++ b/src/large-object-space.h
@@ -36,7 +36,7 @@ struct large_object_space {
   struct address_set from_space;
   struct address_set to_space;
   struct address_set survivor_space;
-  struct address_set remembered_set;
+  struct address_set remembered_edges;
   struct address_set free_space;
   struct address_map object_pages; // for each object: size in pages.
   struct address_map predecessors; // subsequent addr -> object addr
@@ -50,7 +50,7 @@ static int large_object_space_init(struct large_object_space 
*space,
   address_set_init(&space->from_space);
   address_set_init(&space->to_space);
   address_set_init(&space->survivor_space);
-  address_set_init(&space->remembered_set);
+  address_set_init(&space->remembered_edges);
   address_set_init(&space->free_space);
   address_map_init(&space->object_pages);
   address_map_init(&space->predecessors);
@@ -77,49 +77,6 @@ static inline int large_object_space_contains(struct 
large_object_space *space,
   return ret;
 }
 
-struct large_object_space_trace_remembered_data {
-  void (*trace)(struct gc_ref, struct gc_heap*);
-  struct gc_heap *heap;
-};
-
-static void large_object_space_trace_one_remembered(uintptr_t addr,
-                                                    void *data) {
-  struct gc_ref ref = gc_ref(addr);
-  gc_object_clear_remembered_nonatomic(ref);
-  struct large_object_space_trace_remembered_data *vdata = data;
-  vdata->trace(ref, vdata->heap);
-}
-
-static void
-large_object_space_clear_remembered_set(struct large_object_space *space) {
-  address_set_clear(&space->remembered_set);
-}
-
-static void
-large_object_space_trace_remembered_set(struct large_object_space *space,
-                                        void (*trace)(struct gc_ref,
-                                                      struct gc_heap*),
-                                        struct gc_heap *heap) {
-  struct large_object_space_trace_remembered_data vdata = { trace, heap };
-
-  if (!GC_GENERATIONAL)
-    return;
-  address_set_for_each(&space->remembered_set,
-                       large_object_space_trace_one_remembered, &vdata);
-  large_object_space_clear_remembered_set(space);
-}
-
-static void
-large_object_space_remember_object(struct large_object_space *space,
-                                   struct gc_ref ref) {
-  GC_ASSERT(GC_GENERATIONAL);
-  uintptr_t addr = gc_ref_value(ref);
-  pthread_mutex_lock(&space->lock);
-  GC_ASSERT(!address_set_contains(&space->remembered_set, addr));
-  address_set_add(&space->remembered_set, addr);
-  pthread_mutex_unlock(&space->lock);
-}
-
 static void large_object_space_flip_survivor(uintptr_t addr,
                                              void *data) {
   struct large_object_space *space = data;
@@ -176,17 +133,41 @@ static int large_object_space_is_copied(struct 
large_object_space *space,
   return copied;
 }
 
+static int
+large_object_space_is_survivor_with_lock(struct large_object_space *space,
+                                         struct gc_ref ref) {
+  return address_set_contains(&space->survivor_space, gc_ref_value(ref));
+}
+
 static int large_object_space_is_survivor(struct large_object_space *space,
                                           struct gc_ref ref) {
   GC_ASSERT(large_object_space_contains(space, ref));
-  int old = 0;
-  uintptr_t addr = gc_ref_value(ref);
   pthread_mutex_lock(&space->lock);
-  old = address_set_contains(&space->survivor_space, addr);
+  int old = large_object_space_is_survivor_with_lock(space, ref);
   pthread_mutex_unlock(&space->lock);
   return old;
 }
 
+static int large_object_space_remember_edge(struct large_object_space *space,
+                                            struct gc_ref obj,
+                                            struct gc_edge edge) {
+  int remembered = 0;
+  uintptr_t edge_addr = (uintptr_t)gc_edge_loc(edge);
+  pthread_mutex_lock(&space->lock);
+  if (large_object_space_is_survivor_with_lock(space, obj)
+      && !address_set_contains(&space->remembered_edges, edge_addr)) {
+    address_set_add(&space->remembered_edges, edge_addr);
+    remembered = 1;
+  }
+  pthread_mutex_unlock(&space->lock);
+  return remembered;
+}
+
+static void
+large_object_space_clear_remembered_edges(struct large_object_space *space) {
+  address_set_clear(&space->remembered_edges);
+}
+
 static int large_object_space_mark_object(struct large_object_space *space,
                                           struct gc_ref ref) {
   return large_object_space_copy(space, ref);
diff --git a/src/mmc.c b/src/mmc.c
index ce1571118..b1f4238a7 100644
--- a/src/mmc.c
+++ b/src/mmc.c
@@ -11,6 +11,7 @@
 
 #include "background-thread.h"
 #include "debug.h"
+#include "field-set.h"
 #include "gc-align.h"
 #include "gc-inline.h"
 #include "gc-platform.h"
@@ -33,6 +34,7 @@ struct gc_heap {
   struct nofl_space nofl_space;
   struct large_object_space large_object_space;
   struct gc_extern_space *extern_space;
+  struct gc_field_set remembered_set;
   size_t large_object_pages;
   pthread_mutex_t lock;
   pthread_cond_t collector_cond;
@@ -72,6 +74,7 @@ struct gc_heap {
 
 struct gc_mutator {
   struct nofl_allocator allocator;
+  struct gc_field_set_writer logger;
   struct gc_heap *heap;
   struct gc_stack stack;
   struct gc_mutator_roots *roots;
@@ -188,6 +191,7 @@ add_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
   mut->event_listener_data =
     heap->event_listener.mutator_added(heap->event_listener_data);
   nofl_allocator_reset(&mut->allocator);
+  gc_field_set_writer_init(&mut->logger, &heap->remembered_set);
   heap_lock(heap);
   // We have no roots.  If there is a GC currently in progress, we have
   // nothing to add.  Just wait until it's done.
@@ -207,6 +211,8 @@ add_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
 static void
 remove_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
   nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap));
+  if (GC_GENERATIONAL)
+    gc_field_set_writer_release_buffer(&mut->logger);
   MUTATOR_EVENT(mut, mutator_removed);
   mut->heap = NULL;
   heap_lock(heap);
@@ -360,12 +366,9 @@ trace_root(struct gc_root root, struct gc_heap *heap,
   case GC_ROOT_KIND_EDGE:
     tracer_visit(root.edge, heap, worker);
     break;
-  case GC_ROOT_KIND_REMEMBERED_OBJECT:
-    trace_one(root.ref, heap, worker);
-    break;
-  case GC_ROOT_KIND_REMEMBERED_SLAB:
-    nofl_space_trace_remembered_slab(heap_nofl_space(heap), root.idx,
-                                     trace_one, heap, worker);
+  case GC_ROOT_KIND_EDGE_BUFFER:
+    gc_field_set_trace_edge_buffer(&heap->remembered_set, root.edge_buffer,
+                                   tracer_visit, heap, worker);
     break;
   default:
     GC_CRASH();
@@ -633,25 +636,27 @@ enqueue_root_edge(struct gc_edge edge, struct gc_heap 
*heap, void *unused) {
   gc_tracer_add_root(&heap->tracer, gc_root_edge(edge));
 }
 
-static void
-enqueue_remembered_object(struct gc_ref ref, struct gc_heap *heap) {
-  gc_tracer_add_root(&heap->tracer, gc_root_remembered_object(ref));
-}
-
 static void
 enqueue_generational_roots(struct gc_heap *heap,
                            enum gc_collection_kind gc_kind) {
   if (!GC_GENERATIONAL) return;
-  if (gc_kind == GC_COLLECTION_MINOR) {
-    for (size_t i = 0; i < heap_nofl_space(heap)->nslabs; i++)
-      gc_tracer_add_root(&heap->tracer, gc_root_remembered_slab(i));
-    large_object_space_trace_remembered_set(heap_large_object_space(heap),
-                                            enqueue_remembered_object,
-                                            heap);
-  } else {
-    nofl_space_clear_remembered_set(heap_nofl_space(heap));
-    large_object_space_clear_remembered_set(heap_large_object_space(heap));
-  }
+  if (gc_kind == GC_COLLECTION_MINOR)
+    gc_field_set_add_roots(&heap->remembered_set, &heap->tracer);
+}
+
+static inline void
+forget_remembered_edge(struct gc_edge edge, struct gc_heap *heap) {
+  struct nofl_space *space = heap_nofl_space(heap);
+  if (nofl_space_contains_edge(space, edge))
+    nofl_space_forget_edge(space, edge);
+  // Otherwise the edge is in the lospace, whose remembered edges are
+  // cleared in bulk.
+}
+
+static void
+clear_remembered_set(struct gc_heap *heap) {
+  gc_field_set_clear(&heap->remembered_set, forget_remembered_edge, heap);
+  large_object_space_clear_remembered_edges(heap_large_object_space(heap));
 }
 
 static void
@@ -768,6 +773,7 @@ collect(struct gc_mutator *mut, enum gc_collection_kind 
requested_kind,
   HEAP_EVENT(heap, finalizers_traced);
   sweep_ephemerons(heap);
   gc_tracer_release(&heap->tracer);
+  clear_remembered_set(heap);
   nofl_space_finish_gc(nofl_space, gc_kind);
   large_object_space_finish_gc(lospace, is_minor);
   gc_extern_space_finish_gc(exspace, is_minor);
@@ -792,6 +798,8 @@ trigger_collection(struct gc_mutator *mut,
   int prev_kind = -1;
   gc_stack_capture_hot(&mut->stack);
   nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap));
+  if (GC_GENERATIONAL)
+    gc_field_set_writer_release_buffer(&mut->logger);
   heap_lock(heap);
   while (mutators_are_stopping(heap))
     prev_kind = pause_mutator_for_collection(heap, mut);
@@ -815,6 +823,8 @@ gc_safepoint_slow(struct gc_mutator *mut) {
   struct gc_heap *heap = mutator_heap(mut);
   gc_stack_capture_hot(&mut->stack);
   nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap));
+  if (GC_GENERATIONAL)
+    gc_field_set_writer_release_buffer(&mut->logger);
   heap_lock(heap);
   while (mutators_are_stopping(mutator_heap(mut)))
     pause_mutator_for_collection(heap, mut);
@@ -900,14 +910,16 @@ void
 gc_write_barrier_slow(struct gc_mutator *mut, struct gc_ref obj,
                       size_t obj_size, struct gc_edge edge,
                       struct gc_ref new_val) {
+  GC_ASSERT(gc_ref_is_heap_object(new_val));
   if (!GC_GENERATIONAL) return;
-  GC_ASSERT(obj_size > gc_allocator_large_threshold());
-  struct gc_heap *heap = mutator_heap(mut);
-  struct large_object_space *space = heap_large_object_space(heap);
-  if (!large_object_space_is_survivor(space, obj))
+  if (gc_object_is_old_generation_slow(mut, new_val))
     return;
-  if (gc_object_set_remembered(obj))
-    large_object_space_remember_object(space, obj);
+  struct gc_heap *heap = mutator_heap(mut);
+  if ((obj_size <= gc_allocator_large_threshold())
+      ? nofl_space_remember_edge(heap_nofl_space(heap), obj, edge)
+      : large_object_space_remember_edge(heap_large_object_space(heap),
+                                         obj, edge))
+    gc_field_set_writer_add_edge(&mut->logger, edge);
 }
   
 struct gc_ephemeron*
@@ -1032,6 +1044,7 @@ static int
 heap_init(struct gc_heap *heap, const struct gc_options *options) {
   // *heap is already initialized to 0.
 
+  gc_field_set_init(&heap->remembered_set);
   pthread_mutex_init(&heap->lock, NULL);
   pthread_cond_init(&heap->mutator_cond, NULL);
   pthread_cond_init(&heap->collector_cond, NULL);
@@ -1080,9 +1093,11 @@ gc_init(const struct gc_options *options, struct 
gc_stack_addr *stack_base,
   GC_ASSERT_EQ(gc_allocator_alloc_table_begin_pattern(), 
NOFL_METADATA_BYTE_YOUNG);
   GC_ASSERT_EQ(gc_allocator_alloc_table_end_pattern(), NOFL_METADATA_BYTE_END);
   if (GC_GENERATIONAL) {
-    GC_ASSERT_EQ(gc_write_barrier_card_table_alignment(), NOFL_SLAB_SIZE);
-    GC_ASSERT_EQ(gc_write_barrier_card_size(),
-                 NOFL_BLOCK_SIZE / NOFL_REMSET_BYTES_PER_BLOCK);
+    GC_ASSERT_EQ(gc_write_barrier_field_table_alignment(), NOFL_SLAB_SIZE);
+    GC_ASSERT_EQ(gc_write_barrier_field_fields_per_byte(),
+                 NOFL_GRANULE_SIZE / sizeof(uintptr_t));
+    GC_ASSERT_EQ(gc_write_barrier_field_first_bit_pattern(),
+                 NOFL_METADATA_BYTE_LOGGED_0);
   }
 
   *heap = calloc(1, sizeof(struct gc_heap));
@@ -1139,6 +1154,8 @@ static void
 deactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
   GC_ASSERT(mut->next == NULL);
   nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap));
+  if (GC_GENERATIONAL)
+    gc_field_set_writer_release_buffer(&mut->logger);
   heap_lock(heap);
   heap->inactive_mutator_count++;
   gc_stack_capture_hot(&mut->stack);
diff --git a/src/nofl-space.h b/src/nofl-space.h
index e47162853..9759b3d8e 100644
--- a/src/nofl-space.h
+++ b/src/nofl-space.h
@@ -45,10 +45,10 @@ STATIC_ASSERT_EQ(NOFL_MEDIUM_OBJECT_THRESHOLD,
 #define NOFL_NONMETA_BLOCKS_PER_SLAB (NOFL_BLOCKS_PER_SLAB - 
NOFL_META_BLOCKS_PER_SLAB)
 #define NOFL_METADATA_BYTES_PER_SLAB (NOFL_NONMETA_BLOCKS_PER_SLAB * 
NOFL_METADATA_BYTES_PER_BLOCK)
 #define NOFL_SLACK_METADATA_BYTES_PER_SLAB (NOFL_META_BLOCKS_PER_SLAB * 
NOFL_METADATA_BYTES_PER_BLOCK)
-#define NOFL_REMSET_BYTES_PER_BLOCK (NOFL_SLACK_METADATA_BYTES_PER_SLAB / 
NOFL_BLOCKS_PER_SLAB)
-#define NOFL_REMSET_BYTES_PER_SLAB (NOFL_REMSET_BYTES_PER_BLOCK * 
NOFL_NONMETA_BLOCKS_PER_SLAB)
-#define NOFL_SLACK_REMSET_BYTES_PER_SLAB (NOFL_REMSET_BYTES_PER_BLOCK * 
NOFL_META_BLOCKS_PER_SLAB)
-#define NOFL_SUMMARY_BYTES_PER_BLOCK (NOFL_SLACK_REMSET_BYTES_PER_SLAB / 
NOFL_BLOCKS_PER_SLAB)
+#define NOFL_VESTIGIAL_BYTES_PER_BLOCK (NOFL_SLACK_METADATA_BYTES_PER_SLAB / 
NOFL_BLOCKS_PER_SLAB)
+#define NOFL_VESTIGIAL_BYTES_PER_SLAB (NOFL_VESTIGIAL_BYTES_PER_BLOCK * 
NOFL_NONMETA_BLOCKS_PER_SLAB)
+#define NOFL_SLACK_VESTIGIAL_BYTES_PER_SLAB (NOFL_VESTIGIAL_BYTES_PER_BLOCK * 
NOFL_META_BLOCKS_PER_SLAB)
+#define NOFL_SUMMARY_BYTES_PER_BLOCK (NOFL_SLACK_VESTIGIAL_BYTES_PER_SLAB / 
NOFL_BLOCKS_PER_SLAB)
 #define NOFL_SUMMARY_BYTES_PER_SLAB (NOFL_SUMMARY_BYTES_PER_BLOCK * 
NONMETA_BLOCKS_PER_SLAB)
 #define NOFL_SLACK_SUMMARY_BYTES_PER_SLAB (NOFL_SUMMARY_BYTES_PER_BLOCK * 
NOFL_META_BLOCKS_PER_SLAB)
 #define NOFL_HEADER_BYTES_PER_SLAB NOFL_SLACK_SUMMARY_BYTES_PER_SLAB
@@ -127,7 +127,7 @@ struct nofl_block_ref {
 struct nofl_slab {
   struct nofl_slab_header header;
   struct nofl_block_summary summaries[NOFL_NONMETA_BLOCKS_PER_SLAB];
-  uint8_t remembered_set[NOFL_REMSET_BYTES_PER_SLAB];
+  uint8_t unused[NOFL_VESTIGIAL_BYTES_PER_SLAB];
   uint8_t metadata[NOFL_METADATA_BYTES_PER_SLAB];
   struct nofl_block blocks[NOFL_NONMETA_BLOCKS_PER_SLAB];
 };
@@ -297,8 +297,6 @@ nofl_block_set_mark(uintptr_t addr) {
 }
 
 #define NOFL_GRANULES_PER_BLOCK (NOFL_BLOCK_SIZE / NOFL_GRANULE_SIZE)
-#define NOFL_GRANULES_PER_REMSET_BYTE \
-  (NOFL_GRANULES_PER_BLOCK / NOFL_REMSET_BYTES_PER_BLOCK)
 
 static struct nofl_block_summary*
 nofl_block_summary_for_addr(uintptr_t addr) {
@@ -909,67 +907,75 @@ nofl_space_set_ephemeron_flag(struct gc_ref ref) {
 
 struct gc_trace_worker;
 
-// Note that it's quite possible (and even likely) that any given remset
-// byte doesn't hold any roots, if all stores were to nursery objects.
-STATIC_ASSERT_EQ(NOFL_GRANULES_PER_REMSET_BYTE % 8, 0);
-static void
-nofl_space_trace_card(struct nofl_space *space, struct nofl_slab *slab,
-                      size_t card,
-                      void (*trace_object)(struct gc_ref,
-                                           struct gc_heap*,
-                                           struct gc_trace_worker*),
-                      struct gc_heap *heap,
-                      struct gc_trace_worker *worker) {
-  uintptr_t first_addr_in_slab = (uintptr_t) &slab->blocks[0];
-  size_t granule_base = card * NOFL_GRANULES_PER_REMSET_BYTE;
-  for (size_t granule_in_remset = 0;
-       granule_in_remset < NOFL_GRANULES_PER_REMSET_BYTE;
-       granule_in_remset += 8, granule_base += 8) {
-    uint64_t mark_bytes = load_eight_aligned_bytes(slab->metadata + 
granule_base);
-    mark_bytes &= space->sweep_mask;
-    while (mark_bytes) {
-      size_t granule_offset = count_zero_bytes(mark_bytes);
-      mark_bytes &= ~(((uint64_t)0xff) << (granule_offset * 8));
-      size_t granule = granule_base + granule_offset;
-      uintptr_t addr = first_addr_in_slab + granule * NOFL_GRANULE_SIZE;
-      GC_ASSERT(nofl_metadata_byte_for_addr(addr) == &slab->metadata[granule]);
-      trace_object(gc_ref(addr), heap, worker);
-    }
-  }
+static inline int
+nofl_space_contains_address(struct nofl_space *space, uintptr_t addr) {
+  return extents_contain_addr(space->extents, addr);
 }
 
-static void
-nofl_space_trace_remembered_slab(struct nofl_space *space,
-                                 size_t slab_idx,
-                                 void (*trace_object)(struct gc_ref,
-                                                      struct gc_heap*,
-                                                      struct gc_trace_worker*),
-                                 struct gc_heap *heap,
-                                 struct gc_trace_worker *worker) {
-  GC_ASSERT(slab_idx < space->nslabs);
-  struct nofl_slab *slab = space->slabs[slab_idx];
-  uint8_t *remset = slab->remembered_set;
-  for (size_t card_base = 0;
-       card_base < NOFL_REMSET_BYTES_PER_SLAB;
-       card_base += 8) {
-    uint64_t remset_bytes = load_eight_aligned_bytes(remset + card_base);
-    if (!remset_bytes) continue;
-    memset(remset + card_base, 0, 8);
-    while (remset_bytes) {
-      size_t card_offset = count_zero_bytes(remset_bytes);
-      remset_bytes &= ~(((uint64_t)0xff) << (card_offset * 8));
-      nofl_space_trace_card(space, slab, card_base + card_offset,
-                            trace_object, heap, worker);
-    }
-  }
+static inline int
+nofl_space_contains_conservative_ref(struct nofl_space *space,
+                                     struct gc_conservative_ref ref) {
+  return nofl_space_contains_address(space, gc_conservative_ref_value(ref));
+}
+
+static inline int
+nofl_space_contains(struct nofl_space *space, struct gc_ref ref) {
+  return nofl_space_contains_address(space, gc_ref_value(ref));
+}
+
+static inline int
+nofl_space_contains_edge(struct nofl_space *space, struct gc_edge edge) {
+  return nofl_space_contains_address(space, (uintptr_t)gc_edge_loc(edge));
+}  
+
+static inline int
+nofl_space_is_survivor(struct nofl_space *space, struct gc_ref ref) {
+  uint8_t *metadata = nofl_metadata_byte_for_object(ref);
+  uint8_t mask = NOFL_METADATA_BYTE_MARK_0
+    | NOFL_METADATA_BYTE_MARK_1 | NOFL_METADATA_BYTE_MARK_2;
+  uint8_t byte = atomic_load_explicit(metadata, memory_order_relaxed);
+  return byte & mask;
+}
+
+static uint8_t*
+nofl_field_logged_byte(struct gc_edge edge) {
+  return nofl_metadata_byte_for_addr((uintptr_t)gc_edge_loc(edge));
+}
+
+static uint8_t
+nofl_field_logged_bit(struct gc_edge edge) {
+  GC_ASSERT_EQ(sizeof(uintptr_t) * 2, NOFL_GRANULE_SIZE);
+  size_t field = ((uintptr_t)gc_edge_loc(edge)) / sizeof(uintptr_t);
+  return NOFL_METADATA_BYTE_LOGGED_0 << (field % 2);
+}
+
+static int
+nofl_space_remember_edge(struct nofl_space *space, struct gc_ref obj,
+                         struct gc_edge edge) {
+  GC_ASSERT(nofl_space_contains(space, obj));
+  if (!GC_GENERATIONAL) return 0;
+  if (!nofl_space_is_survivor(space, obj))
+    return 0;
+  uint8_t* loc = nofl_field_logged_byte(edge);
+  uint8_t bit = nofl_field_logged_bit(edge);
+  uint8_t byte = atomic_load_explicit(loc, memory_order_acquire);
+  do {
+    if (byte & bit) return 0;
+  } while (!atomic_compare_exchange_weak_explicit(loc, &byte, byte|bit,
+                                                  memory_order_acq_rel,
+                                                  memory_order_acquire));
+  return 1;
 }
 
 static void
-nofl_space_clear_remembered_set(struct nofl_space *space) {
-  if (!GC_GENERATIONAL) return;
-  for (size_t slab = 0; slab < space->nslabs; slab++) {
-    memset(space->slabs[slab]->remembered_set, 0, NOFL_REMSET_BYTES_PER_SLAB);
-  }
+nofl_space_forget_edge(struct nofl_space *space, struct gc_edge edge) {
+  GC_ASSERT(nofl_space_contains_edge(space, edge));
+  GC_ASSERT(GC_GENERATIONAL);
+  uint8_t* loc = nofl_field_logged_byte(edge);
+  // Clear both logged bits.
+  uint8_t bits = NOFL_METADATA_BYTE_LOGGED_0 | NOFL_METADATA_BYTE_LOGGED_1;
+  uint8_t byte = atomic_load_explicit(loc, memory_order_acquire);
+  atomic_store_explicit(loc, byte & ~bits, memory_order_release);
 }
 
 static void
@@ -1431,13 +1437,29 @@ nofl_space_pin_object(struct nofl_space *space, struct 
gc_ref ref) {
                                                   memory_order_acquire));
 }
 
-static inline int
-nofl_space_is_survivor(struct nofl_space *space, struct gc_ref ref) {
-  uint8_t *metadata = nofl_metadata_byte_for_object(ref);
-  uint8_t mask = NOFL_METADATA_BYTE_MARK_0
-    | NOFL_METADATA_BYTE_MARK_1 | NOFL_METADATA_BYTE_MARK_2;
-  uint8_t byte = atomic_load_explicit(metadata, memory_order_relaxed);
-  return byte & mask;
+static inline void
+clear_logged_bits_in_evacuated_object(uint8_t *metadata, size_t count) {
+  // On a major collection, it could be that we evacuate an object that
+  // has one or more fields in the old-to-new remembered set.  Because
+  // the young generation is empty after a major collection, we know the
+  // old-to-new remembered set will be empty also.  To clear the
+  // remembered set, we call gc_field_set_clear, which will end up
+  // visiting all remembered edges and clearing their logged bits.  But
+  // that doesn't work for evacuated objects, because their edges move:
+  // gc_field_set_clear will frob the pre-evacuation metadata bytes of
+  // the object.  So here we explicitly clear logged bits for evacuated
+  // objects.  That the bits for the pre-evacuation location are also
+  // frobbed by gc_field_set_clear doesn't cause a problem, as that
+  // memory will be swept and cleared later.
+  //
+  // This concern doesn't apply to minor collections: there we will
+  // never evacuate an object in the remembered set, because old objects
+  // aren't traced during a minor collection.
+  uint8_t mask = NOFL_METADATA_BYTE_LOGGED_0 | NOFL_METADATA_BYTE_LOGGED_1;
+  for (size_t i = 0; i < count; i++) {
+    if (metadata[i] & mask)
+      metadata[i] &= ~mask;
+  }    
 }
 
 static inline int
@@ -1472,6 +1494,8 @@ nofl_space_evacuate(struct nofl_space *space, uint8_t 
*metadata, uint8_t byte,
       // the object's fields need to be traced.
       uint8_t *new_metadata = nofl_metadata_byte_for_object(new_ref);
       memcpy(new_metadata + 1, metadata + 1, object_granules - 1);
+      if (GC_GENERATIONAL)
+        clear_logged_bits_in_evacuated_object(new_metadata, object_granules);
       gc_edge_update(edge, new_ref);
       return nofl_space_set_nonempty_mark(space, new_metadata, byte,
                                           new_ref);
@@ -1523,22 +1547,6 @@ nofl_space_evacuate_or_mark_object(struct nofl_space 
*space,
   return nofl_space_set_nonempty_mark(space, metadata, byte, old_ref);
 }
 
-static inline int
-nofl_space_contains_address(struct nofl_space *space, uintptr_t addr) {
-  return extents_contain_addr(space->extents, addr);
-}
-
-static inline int
-nofl_space_contains_conservative_ref(struct nofl_space *space,
-                                     struct gc_conservative_ref ref) {
-  return nofl_space_contains_address(space, gc_conservative_ref_value(ref));
-}
-
-static inline int
-nofl_space_contains(struct nofl_space *space, struct gc_ref ref) {
-  return nofl_space_contains_address(space, gc_ref_value(ref));
-}
-
 static inline int
 nofl_space_forward_if_evacuated(struct nofl_space *space,
                                 struct gc_edge edge,
diff --git a/src/root.h b/src/root.h
index 46e019b06..4fc705e61 100644
--- a/src/root.h
+++ b/src/root.h
@@ -7,6 +7,7 @@
 struct gc_ephemeron;
 struct gc_heap;
 struct gc_mutator;
+struct gc_edge_buffer;
 
 enum gc_root_kind {
   GC_ROOT_KIND_NONE,
@@ -16,8 +17,7 @@ enum gc_root_kind {
   GC_ROOT_KIND_CONSERVATIVE_POSSIBLY_INTERIOR_EDGES,
   GC_ROOT_KIND_RESOLVED_EPHEMERONS,
   GC_ROOT_KIND_EDGE,
-  GC_ROOT_KIND_REMEMBERED_OBJECT,
-  GC_ROOT_KIND_REMEMBERED_SLAB,
+  GC_ROOT_KIND_EDGE_BUFFER,
 };
 
 struct gc_root {
@@ -28,8 +28,7 @@ struct gc_root {
     struct gc_ephemeron *resolved_ephemerons;
     struct extent_range range;
     struct gc_edge edge;
-    struct gc_ref ref;
-    size_t idx;
+    struct gc_edge_buffer *edge_buffer;
   };
 };
 
@@ -73,16 +72,9 @@ gc_root_edge(struct gc_edge edge) {
 }
 
 static inline struct gc_root
-gc_root_remembered_object(struct gc_ref ref) {
-  struct gc_root ret = { GC_ROOT_KIND_REMEMBERED_OBJECT };
-  ret.ref = ref;
-  return ret;
-}
-
-static inline struct gc_root
-gc_root_remembered_slab(size_t idx) {
-  struct gc_root ret = { GC_ROOT_KIND_REMEMBERED_SLAB };
-  ret.idx = idx;
+gc_root_edge_buffer(struct gc_edge_buffer *buf) {
+  struct gc_root ret = { GC_ROOT_KIND_EDGE_BUFFER };
+  ret.edge_buffer = buf;
   return ret;
 }
 

Reply via email to