wingo pushed a commit to branch wip-whippet
in repository guile.

commit 9f437485ec1f49ee4369f9aeaf4dd324d71c4b26
Author: Andy Wingo <wi...@igalia.com>
AuthorDate: Mon Sep 9 15:02:12 2024 +0200

    MMC marks roots in parallel during the pause, not while stopping
    
    Following the analysis in
    
https://wingolog.org/archives/2024/09/06/on-taking-advantage-of-ragged-stops,
    we simplify MMC by traversing roots only during the pause.  This lets us
    use gc_tracer parallel root-tracing.
---
 src/mmc.c             | 541 +++++++++++++-------------------------------------
 src/nofl-space.h      |  52 ++---
 src/parallel-tracer.h | 112 +++++++----
 src/root.h            |  39 +++-
 src/serial-tracer.h   |  35 ++--
 src/tracer.h          |  13 +-
 6 files changed, 306 insertions(+), 486 deletions(-)

diff --git a/src/mmc.c b/src/mmc.c
index 4c3cb9dc8..e6335dced 100644
--- a/src/mmc.c
+++ b/src/mmc.c
@@ -40,7 +40,6 @@ struct gc_heap {
   pthread_cond_t mutator_cond;
   size_t size;
   int collecting;
-  int mark_while_stopping;
   int check_pending_ephemerons;
   struct gc_pending_ephemerons *pending_ephemerons;
   struct gc_finalizer_state *finalizer_state;
@@ -50,10 +49,9 @@ struct gc_heap {
   size_t paused_mutator_count;
   size_t inactive_mutator_count;
   struct gc_heap_roots *roots;
-  struct gc_mutator *mutator_trace_list;
+  struct gc_mutator *mutators;
   long count;
   uint8_t last_collection_was_minor;
-  struct gc_mutator *inactive_mutators;
   struct gc_tracer tracer;
   double fragmentation_low_threshold;
   double fragmentation_high_threshold;
@@ -71,24 +69,14 @@ struct gc_heap {
 #define MUTATOR_EVENT(mut, event, ...)                                  \
   (mut)->heap->event_listener.event((mut)->event_listener_data, ##__VA_ARGS__)
 
-struct gc_mutator_mark_buf {
-  size_t size;
-  size_t capacity;
-  struct gc_ref *objects;
-};
-
 struct gc_mutator {
   struct nofl_allocator allocator;
   struct gc_heap *heap;
   struct gc_stack stack;
   struct gc_mutator_roots *roots;
-  struct gc_mutator_mark_buf mark_buf;
   void *event_listener_data;
-  // Three uses for this in-object linked-list pointer:
-  //  - inactive (blocked in syscall) mutators
-  //  - grey objects when stopping active mutators for mark-in-place
-  //  - untraced mutators when stopping active mutators for evacuation
   struct gc_mutator *next;
+  struct gc_mutator *prev;
 };
 
 struct gc_trace_worker_data {
@@ -126,9 +114,6 @@ gc_trace_worker_call_with_data(void (*f)(struct gc_tracer 
*tracer,
   nofl_allocator_finish(&data.allocator, heap_nofl_space(heap));
 }
 
-static void collect(struct gc_mutator *mut,
-                    enum gc_collection_kind requested_kind) GC_NEVER_INLINE;
-
 static inline int
 do_trace(struct gc_heap *heap, struct gc_edge edge, struct gc_ref ref,
          struct gc_trace_worker *worker) {
@@ -180,34 +165,6 @@ gc_visit_ephemeron_key(struct gc_edge edge, struct gc_heap 
*heap) {
   GC_CRASH();
 }
 
-static inline struct gc_ref
-do_trace_conservative_ref(struct gc_heap *heap, struct gc_conservative_ref ref,
-                          int possibly_interior) {
-  if (!gc_conservative_ref_might_be_a_heap_object(ref, possibly_interior))
-    return gc_ref_null();
-
-  struct nofl_space *nofl_space = heap_nofl_space(heap);
-  if (GC_LIKELY(nofl_space_contains_conservative_ref(nofl_space, ref)))
-    return nofl_space_mark_conservative_ref(nofl_space, ref, 
possibly_interior);
-
-  struct large_object_space *lospace = heap_large_object_space(heap);
-  return large_object_space_mark_conservative_ref(lospace, ref,
-                                                  possibly_interior);
-}
-
-static inline struct gc_ref
-trace_conservative_ref(struct gc_heap *heap, struct gc_conservative_ref ref,
-                       int possibly_interior) {
-  struct gc_ref ret = do_trace_conservative_ref(heap, ref, possibly_interior);
-
-  if (gc_ref_is_heap_object(ret) &&
-      GC_UNLIKELY(atomic_load_explicit(&heap->check_pending_ephemerons,
-                                       memory_order_relaxed)))
-    gc_resolve_pending_ephemerons(ret, heap);
-
-  return ret;
-}
-
 static int
 mutators_are_stopping(struct gc_heap *heap) {
   return atomic_load_explicit(&heap->collecting, memory_order_relaxed);
@@ -241,6 +198,13 @@ add_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
     pthread_cond_wait(&heap->mutator_cond, &heap->lock);
   if (heap->mutator_count == 1)
     heap->multithreaded = 1;
+  mut->next = mut->prev = NULL;
+  struct gc_mutator *tail = heap->mutators;
+  if (tail) {
+    mut->next = tail;
+    tail->prev = mut;
+  }
+  heap->mutators = mut;
   heap->mutator_count++;
   heap_unlock(heap);
 }
@@ -252,6 +216,12 @@ remove_mutator(struct gc_heap *heap, struct gc_mutator 
*mut) {
   mut->heap = NULL;
   heap_lock(heap);
   heap->mutator_count--;
+  if (mut->next)
+    mut->next->prev = mut->prev;
+  if (mut->prev)
+    mut->prev->next = mut->next;
+  else
+    heap->mutators = mut->next;
   // We have no roots.  If there is a GC stop currently in progress,
   // maybe tell the controller it can continue.
   if (mutators_are_stopping(heap) && all_mutators_stopped(heap))
@@ -285,72 +255,6 @@ heap_reset_large_object_pages(struct gc_heap *heap, size_t 
npages) {
   nofl_space_reacquire_memory(heap_nofl_space(heap), bytes);
 }
 
-static void
-mutator_mark_buf_grow(struct gc_mutator_mark_buf *buf) {
-  size_t old_capacity = buf->capacity;
-  size_t old_bytes = old_capacity * sizeof(struct gc_ref);
-
-  size_t new_bytes = old_bytes ? old_bytes * 2 : getpagesize();
-  size_t new_capacity = new_bytes / sizeof(struct gc_ref);
-
-  void *mem = mmap(NULL, new_bytes, PROT_READ|PROT_WRITE,
-                   MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-  if (mem == MAP_FAILED) {
-    perror("allocating mutator mark buffer failed");
-    GC_CRASH();
-  }
-  if (old_bytes) {
-    memcpy(mem, buf->objects, old_bytes);
-    munmap(buf->objects, old_bytes);
-  }
-  buf->objects = mem;
-  buf->capacity = new_capacity;
-}
-
-static void
-mutator_mark_buf_push(struct gc_mutator_mark_buf *buf, struct gc_ref ref) {
-  if (GC_UNLIKELY(buf->size == buf->capacity))
-    mutator_mark_buf_grow(buf);
-  buf->objects[buf->size++] = ref;
-}
-
-static void
-mutator_mark_buf_release(struct gc_mutator_mark_buf *buf) {
-  size_t bytes = buf->size * sizeof(struct gc_ref);
-  if (bytes >= getpagesize())
-    madvise(buf->objects, align_up(bytes, getpagesize()), MADV_DONTNEED);
-  buf->size = 0;
-}
-
-static void
-mutator_mark_buf_destroy(struct gc_mutator_mark_buf *buf) {
-  size_t bytes = buf->capacity * sizeof(struct gc_ref);
-  if (bytes)
-    munmap(buf->objects, bytes);
-}
-
-static void
-enqueue_mutator_for_tracing(struct gc_mutator *mut) {
-  struct gc_heap *heap = mutator_heap(mut);
-  GC_ASSERT(mut->next == NULL);
-  struct gc_mutator *next =
-    atomic_load_explicit(&heap->mutator_trace_list, memory_order_acquire);
-  do {
-    mut->next = next;
-  } while (!atomic_compare_exchange_weak(&heap->mutator_trace_list,
-                                         &next, mut));
-}
-
-static int
-heap_should_mark_while_stopping(struct gc_heap *heap) {
-  return atomic_load_explicit(&heap->mark_while_stopping, 
memory_order_acquire);
-}
-
-static int
-mutator_should_mark_while_stopping(struct gc_mutator *mut) {
-  return heap_should_mark_while_stopping(mutator_heap(mut));
-}
-
 void
 gc_mutator_set_roots(struct gc_mutator *mut, struct gc_mutator_roots *roots) {
   mut->roots = roots;
@@ -373,68 +277,42 @@ tracer_visit(struct gc_edge edge, struct gc_heap *heap, 
void *trace_data) {
     gc_trace_worker_enqueue(worker, gc_edge_ref(edge));
 }
 
-static void
-trace_and_enqueue_locally(struct gc_edge edge, struct gc_heap *heap,
-                          void *data) {
-  struct gc_mutator *mut = data;
-  if (trace_edge(heap, edge, NULL))
-    mutator_mark_buf_push(&mut->mark_buf, gc_edge_ref(edge));
-}
-
-static inline void
-do_trace_conservative_ref_and_enqueue_locally(struct gc_conservative_ref ref,
-                                              struct gc_heap *heap,
-                                              void *data,
-                                              int possibly_interior) {
-  struct gc_mutator *mut = data;
-  struct gc_ref object = trace_conservative_ref(heap, ref, possibly_interior);
-  if (gc_ref_is_heap_object(object))
-    mutator_mark_buf_push(&mut->mark_buf, object);
-}
+static inline struct gc_ref
+do_trace_conservative_ref(struct gc_heap *heap, struct gc_conservative_ref ref,
+                          int possibly_interior) {
+  if (!gc_conservative_ref_might_be_a_heap_object(ref, possibly_interior))
+    return gc_ref_null();
 
-static void
-trace_possibly_interior_conservative_ref_and_enqueue_locally(struct 
gc_conservative_ref ref,
-                                                             struct gc_heap 
*heap,
-                                                             void *data) {
-  return do_trace_conservative_ref_and_enqueue_locally(ref, heap, data, 1);
-}
+  struct nofl_space *nofl_space = heap_nofl_space(heap);
+  if (GC_LIKELY(nofl_space_contains_conservative_ref(nofl_space, ref)))
+    return nofl_space_mark_conservative_ref(nofl_space, ref, 
possibly_interior);
 
-static void
-trace_conservative_ref_and_enqueue_locally(struct gc_conservative_ref ref,
-                                           struct gc_heap *heap,
-                                           void *data) {
-  return do_trace_conservative_ref_and_enqueue_locally(ref, heap, data, 0);
+  struct large_object_space *lospace = heap_large_object_space(heap);
+  return large_object_space_mark_conservative_ref(lospace, ref,
+                                                  possibly_interior);
 }
 
-static void
-trace_and_enqueue_globally(struct gc_edge edge, struct gc_heap *heap,
-                           void *unused) {
-  if (trace_edge(heap, edge, NULL))
-    gc_tracer_enqueue_root(&heap->tracer, gc_edge_ref(edge));
-}
+static inline struct gc_ref
+trace_conservative_ref(struct gc_heap *heap, struct gc_conservative_ref ref,
+                       int possibly_interior) {
+  struct gc_ref ret = do_trace_conservative_ref(heap, ref, possibly_interior);
 
-static inline void
-do_trace_conservative_ref_and_enqueue_globally(struct gc_conservative_ref ref,
-                                               struct gc_heap *heap,
-                                               void *data,
-                                               int possibly_interior) {
-  struct gc_ref object = trace_conservative_ref(heap, ref, possibly_interior);
-  if (gc_ref_is_heap_object(object))
-    gc_tracer_enqueue_root(&heap->tracer, object);
-}
+  if (gc_ref_is_heap_object(ret) &&
+      GC_UNLIKELY(atomic_load_explicit(&heap->check_pending_ephemerons,
+                                       memory_order_relaxed)))
+    gc_resolve_pending_ephemerons(ret, heap);
 
-static void
-trace_possibly_interior_conservative_ref_and_enqueue_globally(struct 
gc_conservative_ref ref,
-                                                              struct gc_heap 
*heap,
-                                                              void *data) {
-  return do_trace_conservative_ref_and_enqueue_globally(ref, heap, data, 1);
+  return ret;
 }
 
-static void
-trace_conservative_ref_and_enqueue_globally(struct gc_conservative_ref ref,
-                                            struct gc_heap *heap,
-                                            void *data) {
-  return do_trace_conservative_ref_and_enqueue_globally(ref, heap, data, 0);
+static inline void
+tracer_trace_conservative_ref(struct gc_conservative_ref ref,
+                              struct gc_heap *heap,
+                              struct gc_trace_worker *worker,
+                              int possibly_interior) {
+  struct gc_ref resolved = trace_conservative_ref(heap, ref, 
possibly_interior);
+  if (gc_ref_is_heap_object(resolved))
+    gc_trace_worker_enqueue(worker, resolved);
 }
 
 static inline struct gc_conservative_ref
@@ -446,26 +324,13 @@ load_conservative_ref(uintptr_t addr) {
 }
 
 static inline void
-trace_conservative_edges(uintptr_t low,
-                         uintptr_t high,
-                         void (*trace)(struct gc_conservative_ref,
-                                       struct gc_heap *, void *),
-                         struct gc_heap *heap,
-                         void *data) {
+trace_conservative_edges(uintptr_t low, uintptr_t high, int possibly_interior,
+                         struct gc_heap *heap, struct gc_trace_worker *worker) 
{
   GC_ASSERT(low == align_down(low, sizeof(uintptr_t)));
   GC_ASSERT(high == align_down(high, sizeof(uintptr_t)));
   for (uintptr_t addr = low; addr < high; addr += sizeof(uintptr_t))
-    trace(load_conservative_ref(addr), heap, data);
-}
-
-static inline void
-tracer_trace_conservative_ref(struct gc_conservative_ref ref,
-                              struct gc_heap *heap, void *data) {
-  struct gc_trace_worker *worker = data;
-  int possibly_interior = 0;
-  struct gc_ref resolved = trace_conservative_ref(heap, ref, 
possibly_interior);
-  if (gc_ref_is_heap_object(resolved))
-    gc_trace_worker_enqueue(worker, resolved);
+    tracer_trace_conservative_ref(load_conservative_ref(addr), heap, worker,
+                                  possibly_interior);
 }
 
 static inline void
@@ -486,10 +351,10 @@ trace_one_conservatively(struct gc_ref ref, struct 
gc_heap *heap,
   } else {
     bytes = large_object_space_object_size(heap_large_object_space(heap), ref);
   }
-  trace_conservative_edges(gc_ref_value(ref),
-                           gc_ref_value(ref) + bytes,
-                           tracer_trace_conservative_ref, heap,
-                           worker);
+  // Intraheap edges are not interior.
+  int possibly_interior = 0;
+  trace_conservative_edges(gc_ref_value(ref), gc_ref_value(ref) + bytes,
+                           possibly_interior, heap, worker);
 }
 
 static inline void
@@ -511,6 +376,14 @@ trace_root(struct gc_root root, struct gc_heap *heap,
   case GC_ROOT_KIND_MUTATOR:
     gc_trace_mutator_roots(root.mutator->roots, tracer_visit, heap, worker);
     break;
+  case GC_ROOT_KIND_CONSERVATIVE_EDGES:
+    trace_conservative_edges(root.range.lo_addr, root.range.hi_addr, 0,
+                             heap, worker);
+    break;
+  case GC_ROOT_KIND_CONSERVATIVE_POSSIBLY_INTERIOR_EDGES:
+    trace_conservative_edges(root.range.lo_addr, root.range.hi_addr, 1,
+                             heap, worker);
+    break;
   case GC_ROOT_KIND_RESOLVED_EPHEMERONS:
     gc_trace_resolved_ephemerons(root.resolved_ephemerons, tracer_visit,
                                  heap, worker);
@@ -518,101 +391,52 @@ trace_root(struct gc_root root, struct gc_heap *heap,
   case GC_ROOT_KIND_EDGE:
     tracer_visit(root.edge, heap, worker);
     break;
+  case GC_ROOT_KIND_REMEMBERED_OBJECT:
+    trace_one(root.ref, heap, worker);
+    break;
+  case GC_ROOT_KIND_REMEMBERED_SLAB:
+    nofl_space_trace_remembered_slab(heap_nofl_space(heap), root.idx,
+                                     trace_one, heap, worker);
+    break;
   default:
     GC_CRASH();
   }
 }
 
 static void
-visit_root_edge(struct gc_edge edge, struct gc_heap *heap, void *unused) {
-  gc_tracer_add_root(&heap->tracer, gc_root_edge(edge));
-}
-
-static void
-mark_and_globally_enqueue_mutator_conservative_roots(uintptr_t low,
-                                                     uintptr_t high,
-                                                     struct gc_heap *heap,
-                                                     void *data) {
-  trace_conservative_edges(low, high,
-                           gc_mutator_conservative_roots_may_be_interior()
-                           ? 
trace_possibly_interior_conservative_ref_and_enqueue_globally
-                           : trace_conservative_ref_and_enqueue_globally,
-                           heap, data);
-}
-
-static void
-mark_and_globally_enqueue_heap_conservative_roots(uintptr_t low,
-                                                  uintptr_t high,
-                                                  struct gc_heap *heap,
-                                                  void *data) {
-  trace_conservative_edges(low, high,
-                           trace_conservative_ref_and_enqueue_globally,
-                           heap, data);
-}
-
-static void
-mark_and_locally_enqueue_mutator_conservative_roots(uintptr_t low,
-                                                    uintptr_t high,
-                                                    struct gc_heap *heap,
-                                                    void *data) {
-  trace_conservative_edges(low, high,
-                           gc_mutator_conservative_roots_may_be_interior()
-                           ? 
trace_possibly_interior_conservative_ref_and_enqueue_locally
-                           : trace_conservative_ref_and_enqueue_locally,
-                           heap, data);
-}
-
-static inline void
-trace_mutator_conservative_roots(struct gc_mutator *mut,
-                                 void (*trace_range)(uintptr_t low,
-                                                     uintptr_t high,
-                                                     struct gc_heap *heap,
-                                                     void *data),
-                                 struct gc_heap *heap,
-                                 void *data) {
-  if (gc_has_mutator_conservative_roots())
-    gc_stack_visit(&mut->stack, trace_range, heap, data);
-}
-
-// Mark the roots of a mutator that is stopping for GC.  We can't
-// enqueue them directly, so we send them to the controller in a buffer.
-static void
-trace_stopping_mutator_roots(struct gc_mutator *mut) {
-  GC_ASSERT(mutator_should_mark_while_stopping(mut));
-  struct gc_heap *heap = mutator_heap(mut);
-  trace_mutator_conservative_roots(mut,
-                                   
mark_and_locally_enqueue_mutator_conservative_roots,
-                                   heap, mut);
-  gc_trace_mutator_roots(mut->roots, trace_and_enqueue_locally, heap, mut);
-}
-
-static void
-trace_mutator_conservative_roots_with_lock(struct gc_mutator *mut) {
-  trace_mutator_conservative_roots(mut,
-                                   
mark_and_globally_enqueue_mutator_conservative_roots,
-                                   mutator_heap(mut),
-                                   NULL);
+enqueue_conservative_roots(uintptr_t low, uintptr_t high,
+                           struct gc_heap *heap, void *data) {
+  int *possibly_interior = data;
+  gc_tracer_add_root(&heap->tracer,
+                     gc_root_conservative_edges(low, high, 
*possibly_interior));
 }
 
 static void
-trace_mutator_roots_with_lock(struct gc_mutator *mut) {
-  trace_mutator_conservative_roots_with_lock(mut);
-  gc_trace_mutator_roots(mut->roots, trace_and_enqueue_globally,
-                         mutator_heap(mut), NULL);
+enqueue_mutator_conservative_roots(struct gc_heap *heap) {
+  if (gc_has_mutator_conservative_roots()) {
+    int possibly_interior = gc_mutator_conservative_roots_may_be_interior();
+    for (struct gc_mutator *mut = heap->mutators;
+         mut;
+         mut = mut->next)
+      gc_stack_visit(&mut->stack, enqueue_conservative_roots, heap,
+                     &possibly_interior);
+  }
 }
 
 static void
-trace_mutator_roots_with_lock_before_stop(struct gc_mutator *mut) {
-  gc_stack_capture_hot(&mut->stack);
-  if (mutator_should_mark_while_stopping(mut))
-    trace_mutator_roots_with_lock(mut);
-  else
-    enqueue_mutator_for_tracing(mut);
+enqueue_global_conservative_roots(struct gc_heap *heap) {
+  if (gc_has_global_conservative_roots()) {
+    int possibly_interior = 0;
+    gc_platform_visit_global_conservative_roots
+      (enqueue_conservative_roots, heap, &possibly_interior);
+  }
 }
 
 static void
-release_stopping_mutator_roots(struct gc_mutator *mut) {
-  mutator_mark_buf_release(&mut->mark_buf);
+enqueue_pinned_roots(struct gc_heap *heap) {
+  GC_ASSERT(!heap_nofl_space(heap)->evacuating);
+  enqueue_mutator_conservative_roots(heap);
+  enqueue_global_conservative_roots(heap);
 }
 
 static void
@@ -622,56 +446,6 @@ wait_for_mutators_to_stop(struct gc_heap *heap) {
     pthread_cond_wait(&heap->collector_cond, &heap->lock);
 }
 
-static void
-trace_mutator_conservative_roots_after_stop(struct gc_heap *heap) {
-  int active_mutators_already_marked = heap_should_mark_while_stopping(heap);
-  if (!active_mutators_already_marked)
-    for (struct gc_mutator *mut = atomic_load(&heap->mutator_trace_list);
-         mut;
-         mut = mut->next)
-      trace_mutator_conservative_roots_with_lock(mut);
-
-  for (struct gc_mutator *mut = heap->inactive_mutators;
-       mut;
-       mut = mut->next)
-    trace_mutator_conservative_roots_with_lock(mut);
-}
-
-static void
-trace_mutator_roots_after_stop(struct gc_heap *heap) {
-  struct gc_mutator *mut = atomic_load(&heap->mutator_trace_list);
-  int active_mutators_already_marked = heap_should_mark_while_stopping(heap);
-  while (mut) {
-    // Also collect any already-marked grey objects and put them on the
-    // global trace queue.
-    if (active_mutators_already_marked)
-      gc_tracer_enqueue_roots(&heap->tracer, mut->mark_buf.objects,
-                              mut->mark_buf.size);
-    else
-      trace_mutator_roots_with_lock(mut);
-    // Also unlink mutator_trace_list chain.
-    struct gc_mutator *next = mut->next;
-    mut->next = NULL;
-    mut = next;
-  }
-  atomic_store(&heap->mutator_trace_list, NULL);
-
-  for (struct gc_mutator *mut = heap->inactive_mutators; mut; mut = mut->next)
-    trace_mutator_roots_with_lock(mut);
-}
-
-static void
-trace_global_conservative_roots(struct gc_heap *heap) {
-  if (gc_has_global_conservative_roots())
-    gc_platform_visit_global_conservative_roots
-      (mark_and_globally_enqueue_heap_conservative_roots, heap, NULL);
-}
-
-static void
-enqueue_generational_root(struct gc_ref ref, struct gc_heap *heap) {
-  gc_tracer_enqueue_root(&heap->tracer, ref);
-}
-
 void
 gc_write_barrier_extern(struct gc_ref obj, size_t obj_size,
                         struct gc_edge edge, struct gc_ref new_val) {
@@ -679,22 +453,6 @@ gc_write_barrier_extern(struct gc_ref obj, size_t obj_size,
   gc_object_set_remembered(obj);
 }
 
-static void
-trace_generational_roots(struct gc_heap *heap) {
-  // TODO: Add lospace nursery.
-  if (atomic_load(&heap->gc_kind) == GC_COLLECTION_MINOR) {
-    nofl_space_trace_remembered_set(heap_nofl_space(heap),
-                                    enqueue_generational_root,
-                                    heap);
-    large_object_space_trace_remembered_set(heap_large_object_space(heap),
-                                            enqueue_generational_root,
-                                            heap);
-  } else {
-    nofl_space_clear_remembered_set(heap_nofl_space(heap));
-    large_object_space_clear_remembered_set(heap_large_object_space(heap));
-  }
-}
-
 static enum gc_collection_kind
 pause_mutator_for_collection(struct gc_heap *heap,
                              struct gc_mutator *mut) GC_NEVER_INLINE;
@@ -733,11 +491,6 @@ pause_mutator_for_collection_with_lock(struct gc_mutator 
*mut) {
   MUTATOR_EVENT(mut, mutator_stopping);
   nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap));
   gc_stack_capture_hot(&mut->stack);
-  if (mutator_should_mark_while_stopping(mut))
-    // No need to collect results in mark buf; we can enqueue roots directly.
-    trace_mutator_roots_with_lock(mut);
-  else
-    enqueue_mutator_for_tracing(mut);
   return pause_mutator_for_collection(heap, mut);
 }
 
@@ -749,13 +502,9 @@ pause_mutator_for_collection_without_lock(struct 
gc_mutator *mut) {
   MUTATOR_EVENT(mut, mutator_stopping);
   nofl_finish_sweeping(&mut->allocator, heap_nofl_space(heap));
   gc_stack_capture_hot(&mut->stack);
-  if (mutator_should_mark_while_stopping(mut))
-    trace_stopping_mutator_roots(mut);
-  enqueue_mutator_for_tracing(mut);
   heap_lock(heap);
   pause_mutator_for_collection(heap, mut);
   heap_unlock(heap);
-  release_stopping_mutator_roots(mut);
 }
 
 static inline void
@@ -838,7 +587,6 @@ determine_collection_kind(struct gc_heap *heap,
   struct nofl_space *nofl_space = heap_nofl_space(heap);
   enum gc_collection_kind previous_gc_kind = atomic_load(&heap->gc_kind);
   enum gc_collection_kind gc_kind;
-  int mark_while_stopping = 1;
   double yield = heap_last_gc_yield(heap);
   double fragmentation = heap_fragmentation(heap);
   ssize_t pending = 
atomic_load_explicit(&nofl_space->pending_unavailable_bytes,
@@ -856,17 +604,6 @@ determine_collection_kind(struct gc_heap *heap,
     // free blocks, and we decided not to expand the heap.  Let's do an
     // evacuating major collection to maximize the free block yield.
     gc_kind = GC_COLLECTION_COMPACTING;
-
-    // Generally speaking, we allow mutators to mark their own stacks
-    // before pausing.  This is a limited form of concurrent marking, as
-    // other mutators might be running, not having received the signal
-    // to stop yet.  In a compacting collection, this results in pinned
-    // roots, because we haven't started evacuating yet and instead mark
-    // in place.  However as in this case we are trying to reclaim free
-    // blocks, try to avoid any pinning caused by the ragged-stop
-    // marking.  Of course if the mutator has conservative roots we will
-    // have pinning anyway and might as well allow ragged stops.
-    mark_while_stopping = gc_has_conservative_roots();
   } else if (previous_gc_kind == GC_COLLECTION_COMPACTING
              && fragmentation >= heap->fragmentation_low_threshold) {
     DEBUG("continuing evacuation due to fragmentation %.2f%% > %.2f%%\n",
@@ -912,7 +649,6 @@ determine_collection_kind(struct gc_heap *heap,
       gc_kind == GC_COLLECTION_COMPACTING) {
     DEBUG("welp.  conservative heap scanning, no evacuation for you\n");
     gc_kind = GC_COLLECTION_MAJOR;
-    mark_while_stopping = 1;
   }
 
   // If this is the first in a series of minor collections, reset the
@@ -927,34 +663,49 @@ determine_collection_kind(struct gc_heap *heap,
           yield * 100., clamped * 100.);
   }
 
-  atomic_store_explicit(&heap->mark_while_stopping, mark_while_stopping,
-                        memory_order_release);
-
   atomic_store(&heap->gc_kind, gc_kind);
   return gc_kind;
 }
 
 static void
-trace_conservative_roots_after_stop(struct gc_heap *heap) {
-  GC_ASSERT(!heap_nofl_space(heap)->evacuating);
-  if (gc_has_mutator_conservative_roots())
-    trace_mutator_conservative_roots_after_stop(heap);
-  if (gc_has_global_conservative_roots())
-    trace_global_conservative_roots(heap);
+enqueue_root_edge(struct gc_edge edge, struct gc_heap *heap, void *unused) {
+  gc_tracer_add_root(&heap->tracer, gc_root_edge(edge));
 }
 
 static void
-trace_pinned_roots_after_stop(struct gc_heap *heap) {
-  GC_ASSERT(!heap_nofl_space(heap)->evacuating);
-  trace_conservative_roots_after_stop(heap);
+enqueue_remembered_object(struct gc_ref ref, struct gc_heap *heap) {
+  gc_tracer_add_root(&heap->tracer, gc_root_remembered_object(ref));
 }
 
 static void
-trace_roots_after_stop(struct gc_heap *heap) {
-  trace_mutator_roots_after_stop(heap);
-  gc_trace_heap_roots(heap->roots, trace_and_enqueue_globally, heap, NULL);
-  gc_visit_finalizer_roots(heap->finalizer_state, visit_root_edge, heap, NULL);
-  trace_generational_roots(heap);
+enqueue_generational_roots(struct gc_heap *heap,
+                           enum gc_collection_kind gc_kind) {
+  // TODO: Add lospace nursery.
+  if (gc_kind == GC_COLLECTION_MINOR) {
+    for (size_t i = 0; i < heap_nofl_space(heap)->nslabs; i++)
+      gc_tracer_add_root(&heap->tracer, gc_root_remembered_slab(i));
+    large_object_space_trace_remembered_set(heap_large_object_space(heap),
+                                            enqueue_remembered_object,
+                                            heap);
+  } else {
+    nofl_space_clear_remembered_set(heap_nofl_space(heap));
+    large_object_space_clear_remembered_set(heap_large_object_space(heap));
+  }
+}
+
+static void
+enqueue_relocatable_roots(struct gc_heap *heap,
+                          enum gc_collection_kind gc_kind) {
+  for (struct gc_mutator *mut = heap->mutators;
+       mut;
+       mut = mut->next) {
+    if (mut->roots)
+      gc_tracer_add_root(&heap->tracer, gc_root_mutator(mut));
+  }
+  if (heap->roots)
+    gc_tracer_add_root(&heap->tracer, gc_root_heap(heap));
+  gc_visit_finalizer_roots(heap->finalizer_state, enqueue_root_edge, heap, 
NULL);
+  enqueue_generational_roots(heap, gc_kind);
 }
 
 static void
@@ -970,15 +721,6 @@ resolve_ephemerons_eagerly(struct gc_heap *heap) {
   gc_scan_pending_ephemerons(heap->pending_ephemerons, heap, 0, 1);
 }
 
-static int
-enqueue_resolved_ephemerons(struct gc_heap *heap) {
-  struct gc_ephemeron *resolved = gc_pop_resolved_ephemerons(heap);
-  if (!resolved)
-    return 0;
-  gc_trace_resolved_ephemerons(resolved, trace_and_enqueue_globally, heap, 
NULL);
-  return 1;
-}
-
 static void
 trace_resolved_ephemerons(struct gc_heap *heap) {
   for (struct gc_ephemeron *resolved = gc_pop_resolved_ephemerons(heap);
@@ -995,7 +737,7 @@ resolve_finalizers(struct gc_heap *heap) {
        priority < gc_finalizer_priority_count();
        priority++) {
     if (gc_resolve_finalizers(heap->finalizer_state, priority,
-                              visit_root_edge, heap, NULL)) {
+                              enqueue_root_edge, heap, NULL)) {
       gc_tracer_trace(&heap->tracer);
       trace_resolved_ephemerons(heap);
     }
@@ -1008,6 +750,8 @@ sweep_ephemerons(struct gc_heap *heap) {
   return gc_sweep_pending_ephemerons(heap->pending_ephemerons, 0, 1);
 }
 
+static void collect(struct gc_mutator *mut,
+                    enum gc_collection_kind requested_kind) GC_NEVER_INLINE;
 static void
 collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind) {
   struct gc_heap *heap = mutator_heap(mut);
@@ -1020,6 +764,13 @@ collect(struct gc_mutator *mut, enum gc_collection_kind 
requested_kind) {
   }
   MUTATOR_EVENT(mut, mutator_cause_gc);
   DEBUG("start collect #%ld:\n", heap->count);
+  HEAP_EVENT(heap, requesting_stop);
+  request_mutators_to_stop(heap);
+  gc_stack_capture_hot(&mut->stack);
+  nofl_finish_sweeping(&mut->allocator, nofl_space);
+  HEAP_EVENT(heap, waiting_for_stop);
+  wait_for_mutators_to_stop(heap);
+  HEAP_EVENT(heap, mutators_stopped);
   enum gc_collection_kind gc_kind =
     determine_collection_kind(heap, requested_kind);
   int is_minor = gc_kind == GC_COLLECTION_MINOR;
@@ -1029,22 +780,17 @@ collect(struct gc_mutator *mut, enum gc_collection_kind 
requested_kind) {
   gc_extern_space_start_gc(exspace, is_minor);
   resolve_ephemerons_lazily(heap);
   gc_tracer_prepare(&heap->tracer);
-  HEAP_EVENT(heap, requesting_stop);
-  request_mutators_to_stop(heap);
-  trace_mutator_roots_with_lock_before_stop(mut);
-  nofl_finish_sweeping(&mut->allocator, nofl_space);
-  HEAP_EVENT(heap, waiting_for_stop);
-  wait_for_mutators_to_stop(heap);
-  HEAP_EVENT(heap, mutators_stopped);
   double yield = heap_last_gc_yield(heap);
   double fragmentation = heap_fragmentation(heap);
   HEAP_EVENT(heap, live_data_size, heap->size * (1 - yield));
   DEBUG("last gc yield: %f; fragmentation: %f\n", yield, fragmentation);
   detect_out_of_memory(heap);
-  trace_pinned_roots_after_stop(heap);
-  nofl_space_start_gc(nofl_space, gc_kind);
-  trace_roots_after_stop(heap);
+  enqueue_pinned_roots(heap);
+  if (gc_kind == GC_COLLECTION_COMPACTING)
+    gc_tracer_trace_roots(&heap->tracer);
   HEAP_EVENT(heap, roots_traced);
+  enqueue_relocatable_roots(heap, gc_kind);
+  nofl_space_start_gc(nofl_space, gc_kind);
   gc_tracer_trace(&heap->tracer);
   HEAP_EVENT(heap, heap_traced);
   resolve_ephemerons_eagerly(heap);
@@ -1334,7 +1080,6 @@ gc_init_for_thread(struct gc_stack_addr *stack_base,
 void
 gc_finish_for_thread(struct gc_mutator *mut) {
   remove_mutator(mutator_heap(mut), mut);
-  mutator_mark_buf_destroy(&mut->mark_buf);
   free(mut);
 }
 
@@ -1343,8 +1088,6 @@ deactivate_mutator(struct gc_heap *heap, struct 
gc_mutator *mut) {
   GC_ASSERT(mut->next == NULL);
   nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap));
   heap_lock(heap);
-  mut->next = heap->inactive_mutators;
-  heap->inactive_mutators = mut;
   heap->inactive_mutator_count++;
   gc_stack_capture_hot(&mut->stack);
   if (all_mutators_stopped(heap))
@@ -1357,18 +1100,12 @@ reactivate_mutator(struct gc_heap *heap, struct 
gc_mutator *mut) {
   heap_lock(heap);
   while (mutators_are_stopping(heap))
     pthread_cond_wait(&heap->mutator_cond, &heap->lock);
-  struct gc_mutator **prev = &heap->inactive_mutators;
-  while (*prev != mut)
-    prev = &(*prev)->next;
-  *prev = mut->next;
-  mut->next = NULL;
   heap->inactive_mutator_count--;
   heap_unlock(heap);
 }
 
-void* gc_call_without_gc(struct gc_mutator *mut,
-                         void* (*f)(void*),
-                         void *data) {
+void*
+gc_call_without_gc(struct gc_mutator *mut, void* (*f)(void*), void *data) {
   struct gc_heap *heap = mutator_heap(mut);
   deactivate_mutator(heap, mut);
   void *ret = f(data);
diff --git a/src/nofl-space.h b/src/nofl-space.h
index 037dff0d2..5a217cdb0 100644
--- a/src/nofl-space.h
+++ b/src/nofl-space.h
@@ -808,14 +808,19 @@ nofl_space_set_ephemeron_flag(struct gc_ref ref) {
   }
 }
 
+struct gc_trace_worker;
+
 // Note that it's quite possible (and even likely) that any given remset
 // byte doesn't hold any roots, if all stores were to nursery objects.
 STATIC_ASSERT_EQ(NOFL_GRANULES_PER_REMSET_BYTE % 8, 0);
 static void
 nofl_space_trace_card(struct nofl_space *space, struct nofl_slab *slab,
                       size_t card,
-                      void (*enqueue)(struct gc_ref, struct gc_heap*),
-                      struct gc_heap *heap) {
+                      void (*trace_object)(struct gc_ref,
+                                           struct gc_heap*,
+                                           struct gc_trace_worker*),
+                      struct gc_heap *heap,
+                      struct gc_trace_worker *worker) {
   uintptr_t first_addr_in_slab = (uintptr_t) &slab->blocks[0];
   size_t granule_base = card * NOFL_GRANULES_PER_REMSET_BYTE;
   for (size_t granule_in_remset = 0;
@@ -829,32 +834,33 @@ nofl_space_trace_card(struct nofl_space *space, struct 
nofl_slab *slab,
       size_t granule = granule_base + granule_offset;
       uintptr_t addr = first_addr_in_slab + granule * NOFL_GRANULE_SIZE;
       GC_ASSERT(nofl_metadata_byte_for_addr(addr) == &slab->metadata[granule]);
-      enqueue(gc_ref(addr), heap);
+      trace_object(gc_ref(addr), heap, worker);
     }
   }
 }
 
 static void
-nofl_space_trace_remembered_set(struct nofl_space *space,
-                                void (*enqueue)(struct gc_ref,
-                                                struct gc_heap*),
-                                struct gc_heap *heap) {
-  GC_ASSERT(!space->evacuating);
-  for (size_t s = 0; s < space->nslabs; s++) {
-    struct nofl_slab *slab = space->slabs[s];
-    uint8_t *remset = slab->remembered_set;
-    for (size_t card_base = 0;
-         card_base < NOFL_REMSET_BYTES_PER_SLAB;
-         card_base += 8) {
-      uint64_t remset_bytes = load_eight_aligned_bytes(remset + card_base);
-      if (!remset_bytes) continue;
-      memset(remset + card_base, 0, 8);
-      while (remset_bytes) {
-        size_t card_offset = count_zero_bytes(remset_bytes);
-        remset_bytes &= ~(((uint64_t)0xff) << (card_offset * 8));
-        nofl_space_trace_card(space, slab, card_base + card_offset,
-                              enqueue, heap);
-      }
+nofl_space_trace_remembered_slab(struct nofl_space *space,
+                                 size_t slab_idx,
+                                 void (*trace_object)(struct gc_ref,
+                                                      struct gc_heap*,
+                                                      struct gc_trace_worker*),
+                                 struct gc_heap *heap,
+                                 struct gc_trace_worker *worker) {
+  GC_ASSERT(slab_idx < space->nslabs);
+  struct nofl_slab *slab = space->slabs[slab_idx];
+  uint8_t *remset = slab->remembered_set;
+  for (size_t card_base = 0;
+       card_base < NOFL_REMSET_BYTES_PER_SLAB;
+       card_base += 8) {
+    uint64_t remset_bytes = load_eight_aligned_bytes(remset + card_base);
+    if (!remset_bytes) continue;
+    memset(remset + card_base, 0, 8);
+    while (remset_bytes) {
+      size_t card_offset = count_zero_bytes(remset_bytes);
+      remset_bytes &= ~(((uint64_t)0xff) << (card_offset * 8));
+      nofl_space_trace_card(space, slab, card_base + card_offset,
+                            trace_object, heap, worker);
     }
   }
 }
diff --git a/src/parallel-tracer.h b/src/parallel-tracer.h
index 888c0fad6..20d66730f 100644
--- a/src/parallel-tracer.h
+++ b/src/parallel-tracer.h
@@ -57,6 +57,7 @@ struct gc_tracer {
   long epoch;
   pthread_mutex_t lock;
   pthread_cond_t cond;
+  int trace_roots_only;
   struct root_worklist roots;
   struct gc_trace_worker workers[TRACE_WORKERS_MAX_COUNT];
 };
@@ -112,6 +113,7 @@ gc_tracer_init(struct gc_tracer *tracer, struct gc_heap 
*heap,
   tracer->heap = heap;
   atomic_init(&tracer->active_tracers, 0);
   tracer->epoch = 0;
+  tracer->trace_roots_only = 0;
   pthread_mutex_init(&tracer->lock, NULL);
   pthread_cond_init(&tracer->cond, NULL);
   root_worklist_init(&tracer->roots);
@@ -299,32 +301,52 @@ trace_with_data(struct gc_tracer *tracer,
   atomic_fetch_add_explicit(&tracer->active_tracers, 1, memory_order_acq_rel);
   worker->data = data;
 
-  size_t n = 0;
   DEBUG("tracer #%zu: running trace loop\n", worker->id);
 
-  do {
-    struct gc_root root = root_worklist_pop(&tracer->roots);
-    if (root.kind == GC_ROOT_KIND_NONE)
-      break;
-    trace_root(root, heap, worker);
-  } while (1);
-
-  do {
-    while (1) {
-      struct gc_ref ref;
-      if (!local_worklist_empty(&worker->local)) {
-        ref = local_worklist_pop(&worker->local);
-      } else {
-        ref = trace_worker_steal(worker);
-        if (!gc_ref_is_heap_object(ref))
-          break;
-      }
-      trace_one(ref, heap, worker);
+  {
+    DEBUG("tracer #%zu: tracing roots\n", worker->id);
+    size_t n = 0;
+    do {
+      struct gc_root root = root_worklist_pop(&tracer->roots);
+      if (root.kind == GC_ROOT_KIND_NONE)
+        break;
+      trace_root(root, heap, worker);
       n++;
+    } while (1);
+
+    DEBUG("tracer #%zu: done tracing roots, %zu roots traced\n", worker->id, 
n);
+  }
+
+  if (tracer->trace_roots_only) {
+    // Unlike the full trace where work is generated during the trace, a
+    // roots-only trace consumes work monotonically; any object enqueued as a
+    // result of marking roots isn't ours to deal with.  However we do need to
+    // synchronize with remote workers to ensure they have completed their
+    // work items.
+    if (worker->id == 0) {
+      for (size_t i = 1; i < tracer->worker_count; i++)
+        pthread_mutex_lock(&tracer->workers[i].lock);
     }
-  } while (trace_worker_should_continue(worker));
+  } else {
+    DEBUG("tracer #%zu: tracing objects\n", worker->id);
+    size_t n = 0;
+    do {
+      while (1) {
+        struct gc_ref ref;
+        if (!local_worklist_empty(&worker->local)) {
+          ref = local_worklist_pop(&worker->local);
+        } else {
+          ref = trace_worker_steal(worker);
+          if (!gc_ref_is_heap_object(ref))
+            break;
+        }
+        trace_one(ref, heap, worker);
+        n++;
+      }
+    } while (trace_worker_should_continue(worker));
 
-  DEBUG("tracer #%zu: done tracing, %zu objects traced\n", worker->id, n);
+    DEBUG("tracer #%zu: done tracing, %zu objects traced\n", worker->id, n);
+  }
 
   worker->data = NULL;
   atomic_fetch_sub_explicit(&tracer->active_tracers, 1, memory_order_acq_rel);
@@ -336,17 +358,28 @@ trace_worker_trace(struct gc_trace_worker *worker) {
                                  worker->heap, worker);
 }
 
-static inline void
-gc_tracer_enqueue_root(struct gc_tracer *tracer, struct gc_ref ref) {
-  struct shared_worklist *worker0_deque = &tracer->workers[0].shared;
-  shared_worklist_push(worker0_deque, ref);
-}
+static inline int
+gc_tracer_should_parallelize(struct gc_tracer *tracer) {
+  if (root_worklist_size(&tracer->roots) > 1)
+    return 1;
 
-static inline void
-gc_tracer_enqueue_roots(struct gc_tracer *tracer, struct gc_ref *objv,
-                        size_t count) {
-  struct shared_worklist *worker0_deque = &tracer->workers[0].shared;
-  shared_worklist_push_many(worker0_deque, objv, count);
+  if (tracer->trace_roots_only)
+    return 0;
+
+  size_t nonempty_worklists = 0;
+  ssize_t parallel_threshold =
+    LOCAL_WORKLIST_SIZE - LOCAL_WORKLIST_SHARE_AMOUNT;
+  for (size_t i = 0; i < tracer->worker_count; i++) {
+    ssize_t size = shared_worklist_size(&tracer->workers[i].shared);
+    if (!size)
+      continue;
+    nonempty_worklists++;
+    if (nonempty_worklists > 1)
+      return 1;
+    if (size >= parallel_threshold)
+      return 1;
+  }
+  return 0;
 }
 
 static inline void
@@ -356,10 +389,7 @@ gc_tracer_trace(struct gc_tracer *tracer) {
   for (int i = 1; i < tracer->worker_count; i++)
     pthread_mutex_unlock(&tracer->workers[i].lock);
 
-  ssize_t parallel_threshold =
-    LOCAL_WORKLIST_SIZE - LOCAL_WORKLIST_SHARE_AMOUNT;
-  if (root_worklist_size(&tracer->roots) > 1 ||
-      shared_worklist_size(&tracer->workers[0].shared) >= parallel_threshold) {
+  if (gc_tracer_should_parallelize(tracer)) {
     DEBUG("waking workers\n");
     tracer_unpark_all_workers(tracer);
   } else {
@@ -372,4 +402,16 @@ gc_tracer_trace(struct gc_tracer *tracer) {
   DEBUG("trace finished\n");
 }
 
+static inline void
+gc_tracer_trace_roots(struct gc_tracer *tracer) {
+  DEBUG("starting roots-only trace\n");
+
+  tracer->trace_roots_only = 1;
+  gc_tracer_trace(tracer);
+  tracer->trace_roots_only = 0;
+  
+  GC_ASSERT_EQ(atomic_load(&tracer->active_tracers), 0);
+  DEBUG("roots-only trace finished\n");
+}
+
 #endif // PARALLEL_TRACER_H
diff --git a/src/root.h b/src/root.h
index 5228dcb4f..46e019b06 100644
--- a/src/root.h
+++ b/src/root.h
@@ -2,6 +2,7 @@
 #define ROOT_H
 
 #include "gc-edge.h"
+#include "extents.h"
 
 struct gc_ephemeron;
 struct gc_heap;
@@ -11,8 +12,12 @@ enum gc_root_kind {
   GC_ROOT_KIND_NONE,
   GC_ROOT_KIND_HEAP,
   GC_ROOT_KIND_MUTATOR,
+  GC_ROOT_KIND_CONSERVATIVE_EDGES,
+  GC_ROOT_KIND_CONSERVATIVE_POSSIBLY_INTERIOR_EDGES,
   GC_ROOT_KIND_RESOLVED_EPHEMERONS,
   GC_ROOT_KIND_EDGE,
+  GC_ROOT_KIND_REMEMBERED_OBJECT,
+  GC_ROOT_KIND_REMEMBERED_SLAB,
 };
 
 struct gc_root {
@@ -21,22 +26,38 @@ struct gc_root {
     struct gc_heap *heap;
     struct gc_mutator *mutator;
     struct gc_ephemeron *resolved_ephemerons;
+    struct extent_range range;
     struct gc_edge edge;
+    struct gc_ref ref;
+    size_t idx;
   };
 };
 
-static inline struct gc_root gc_root_heap(struct gc_heap* heap) {
+static inline struct gc_root
+gc_root_heap(struct gc_heap* heap) {
   struct gc_root ret = { GC_ROOT_KIND_HEAP };
   ret.heap = heap;
   return ret;
 }
 
-static inline struct gc_root gc_root_mutator(struct gc_mutator* mutator) {
+static inline struct gc_root
+gc_root_mutator(struct gc_mutator* mutator) {
   struct gc_root ret = { GC_ROOT_KIND_MUTATOR };
   ret.mutator = mutator;
   return ret;
 }
 
+static inline struct gc_root
+gc_root_conservative_edges(uintptr_t lo_addr, uintptr_t hi_addr,
+                           int possibly_interior) {
+  enum gc_root_kind kind = possibly_interior
+    ? GC_ROOT_KIND_CONSERVATIVE_POSSIBLY_INTERIOR_EDGES
+    : GC_ROOT_KIND_CONSERVATIVE_EDGES;
+  struct gc_root ret = { kind };
+  ret.range = (struct extent_range) {lo_addr, hi_addr};
+  return ret;
+}
+
 static inline struct gc_root
 gc_root_resolved_ephemerons(struct gc_ephemeron* resolved) {
   struct gc_root ret = { GC_ROOT_KIND_RESOLVED_EPHEMERONS };
@@ -51,4 +72,18 @@ gc_root_edge(struct gc_edge edge) {
   return ret;
 }
 
+static inline struct gc_root
+gc_root_remembered_object(struct gc_ref ref) {
+  struct gc_root ret = { GC_ROOT_KIND_REMEMBERED_OBJECT };
+  ret.ref = ref;
+  return ret;
+}
+
+static inline struct gc_root
+gc_root_remembered_slab(size_t idx) {
+  struct gc_root ret = { GC_ROOT_KIND_REMEMBERED_SLAB };
+  ret.idx = idx;
+  return ret;
+}
+
 #endif // ROOT_H
diff --git a/src/serial-tracer.h b/src/serial-tracer.h
index 96ab7e563..b9575fddb 100644
--- a/src/serial-tracer.h
+++ b/src/serial-tracer.h
@@ -12,6 +12,7 @@
 
 struct gc_tracer {
   struct gc_heap *heap;
+  int trace_roots_only;
   struct root_worklist roots;
   struct simple_worklist worklist;
 };
@@ -30,6 +31,7 @@ static int
 gc_tracer_init(struct gc_tracer *tracer, struct gc_heap *heap,
                size_t parallelism) {
   tracer->heap = heap;
+  tracer->trace_roots_only = 0;
   root_worklist_init(&tracer->roots);
   return simple_worklist_init(&tracer->worklist);
 }
@@ -43,19 +45,11 @@ gc_tracer_add_root(struct gc_tracer *tracer, struct gc_root 
root) {
   root_worklist_push(&tracer->roots, root);
 }
 
-static inline void
-gc_tracer_enqueue_root(struct gc_tracer *tracer, struct gc_ref obj) {
-  simple_worklist_push(&tracer->worklist, obj);
-}
-static inline void
-gc_tracer_enqueue_roots(struct gc_tracer *tracer, struct gc_ref *objs,
-                        size_t count) {
-  simple_worklist_push_many(&tracer->worklist, objs, count);
-}
 static inline void
 gc_trace_worker_enqueue(struct gc_trace_worker *worker, struct gc_ref ref) {
-  gc_tracer_enqueue_root(worker->tracer, ref);
+  simple_worklist_push(&worker->tracer->worklist, ref);
 }
+
 static inline void
 tracer_trace_with_data(struct gc_tracer *tracer, struct gc_heap *heap,
                        struct gc_trace_worker *worker,
@@ -68,12 +62,14 @@ tracer_trace_with_data(struct gc_tracer *tracer, struct 
gc_heap *heap,
     trace_root(root, heap, worker);
   } while (1);
   root_worklist_reset(&tracer->roots);
-  do {
-    struct gc_ref obj = simple_worklist_pop(&tracer->worklist);
-    if (!gc_ref_is_heap_object(obj))
-      break;
-    trace_one(obj, heap, worker);
-  } while (1);
+  if (!tracer->trace_roots_only) {
+    do {
+      struct gc_ref obj = simple_worklist_pop(&tracer->worklist);
+      if (!gc_ref_is_heap_object(obj))
+        break;
+      trace_one(obj, heap, worker);
+    } while (1);
+  }
 }
 static inline void
 gc_tracer_trace(struct gc_tracer *tracer) {
@@ -82,4 +78,11 @@ gc_tracer_trace(struct gc_tracer *tracer) {
                                  &worker);
 }
 
+static inline void
+gc_tracer_trace_roots(struct gc_tracer *tracer) {
+  tracer->trace_roots_only = 1;
+  gc_tracer_trace(tracer);
+  tracer->trace_roots_only = 0;
+}
+
 #endif // SERIAL_TRACER_H
diff --git a/src/tracer.h b/src/tracer.h
index ec6a140b1..c563a7018 100644
--- a/src/tracer.h
+++ b/src/tracer.h
@@ -47,11 +47,8 @@ static void gc_tracer_prepare(struct gc_tracer *tracer);
 static void gc_tracer_release(struct gc_tracer *tracer);
 
 // Add root objects to the trace.  Call before tracer_trace.
-static inline void gc_tracer_enqueue_root(struct gc_tracer *tracer,
-                                          struct gc_ref obj);
-static inline void gc_tracer_enqueue_roots(struct gc_tracer *tracer,
-                                           struct gc_ref *objs,
-                                           size_t count);
+static inline void gc_tracer_add_root(struct gc_tracer *tracer,
+                                      struct gc_root root);
 
 // Given that an object has been shaded grey, enqueue for tracing.
 static inline void gc_trace_worker_enqueue(struct gc_trace_worker *worker,
@@ -59,10 +56,10 @@ static inline void gc_trace_worker_enqueue(struct 
gc_trace_worker *worker,
 static inline struct gc_trace_worker_data*
 gc_trace_worker_data(struct gc_trace_worker *worker) GC_ALWAYS_INLINE;
 
-static inline void gc_tracer_add_root(struct gc_tracer *tracer,
-                                      struct gc_root root);
+// Just trace roots.
+static inline void gc_tracer_trace_roots(struct gc_tracer *tracer);
 
-// Run the full trace.
+// Run the full trace, including roots.
 static inline void gc_tracer_trace(struct gc_tracer *tracer);
 
 #endif // TRACER_H


Reply via email to