wingo pushed a commit to branch wip-whippet
in repository guile.

commit 5084730471a15b6776c548f9e6779e61b9638705
Author: Andy Wingo <wi...@igalia.com>
AuthorDate: Wed Jul 10 15:46:11 2024 +0200

    Add parallel root-tracing phase
---
 benchmarks/simple-gc-embedder.h |  8 ++---
 src/gc-ephemeron-internal.h     | 16 +++++----
 src/gc-ephemeron.c              | 22 ++++++------
 src/parallel-tracer.h           | 51 +++++++++++++++++++++------
 src/root-worklist.h             | 76 +++++++++++++++++++++++++++++++++++++++++
 src/root.h                      | 43 +++++++++++++++++++++++
 src/semi.c                      |  7 +++-
 src/serial-tracer.h             | 20 +++++++++++
 src/tracer.h                    |  8 +++++
 src/whippet.c                   | 14 ++++++--
 10 files changed, 231 insertions(+), 34 deletions(-)

diff --git a/benchmarks/simple-gc-embedder.h b/benchmarks/simple-gc-embedder.h
index 5f77a7cc9..23fc54a5d 100644
--- a/benchmarks/simple-gc-embedder.h
+++ b/benchmarks/simple-gc-embedder.h
@@ -172,11 +172,11 @@ static inline size_t
 gc_atomic_forward_object_size(struct gc_atomic_forward *fwd) {
   GC_ASSERT(fwd->state == GC_FORWARDING_STATE_ACQUIRED);
   switch (tag_live_alloc_kind(fwd->data)) {
-#define SCAN_OBJECT(name, Name, NAME)                                   \
+#define OBJECT_SIZE(name, Name, NAME)                                   \
     case ALLOC_KIND_##NAME:                                             \
-      return name##_size(gc_ref_heap_object(ref));                      \
-    FOR_EACH_HEAP_OBJECT_KIND(SCAN_OBJECT)
-#undef SCAN_OBJECT
+      return name##_size(gc_ref_heap_object(fwd->ref));
+    FOR_EACH_HEAP_OBJECT_KIND(OBJECT_SIZE)
+#undef OBJECT_SIZE
   default:
     GC_CRASH();
   }
diff --git a/src/gc-ephemeron-internal.h b/src/gc-ephemeron-internal.h
index 8894bbd8f..3d34cf188 100644
--- a/src/gc-ephemeron-internal.h
+++ b/src/gc-ephemeron-internal.h
@@ -32,12 +32,16 @@ gc_scan_pending_ephemerons(struct gc_pending_ephemerons 
*state,
                            struct gc_heap *heap, size_t shard,
                            size_t nshards);
 
-GC_INTERNAL int
-gc_pop_resolved_ephemerons(struct gc_heap *heap,
-                           void (*visit)(struct gc_edge edge,
-                                         struct gc_heap *heap,
-                                         void *visit_data),
-                           void *trace_data);
+GC_INTERNAL struct gc_ephemeron*
+gc_pop_resolved_ephemerons(struct gc_heap *heap);
+
+GC_INTERNAL void
+gc_trace_resolved_ephemerons(struct gc_ephemeron *resolved,
+                             void (*visit)(struct gc_edge edge,
+                                           struct gc_heap *heap,
+                                           void *visit_data),
+                             struct gc_heap *heap,
+                             void *trace_data);
 
 GC_INTERNAL void
 gc_sweep_pending_ephemerons(struct gc_pending_ephemerons *state,
diff --git a/src/gc-ephemeron.c b/src/gc-ephemeron.c
index 8a42c1a84..90f82e8ca 100644
--- a/src/gc-ephemeron.c
+++ b/src/gc-ephemeron.c
@@ -521,23 +521,25 @@ gc_scan_pending_ephemerons(struct gc_pending_ephemerons 
*state,
   }
 }
 
-int
-gc_pop_resolved_ephemerons(struct gc_heap *heap,
-                           void (*visit)(struct gc_edge edge,
-                                         struct gc_heap *heap,
-                                         void *visit_data),
-                           void *trace_data) {
+struct gc_ephemeron*
+gc_pop_resolved_ephemerons(struct gc_heap *heap) {
   struct gc_pending_ephemerons *state = gc_heap_pending_ephemerons(heap);
-  struct gc_ephemeron *resolved = atomic_exchange(&state->resolved, NULL);
-  if (!resolved)
-    return 0;
+  return atomic_exchange(&state->resolved, NULL);
+}    
+
+void
+gc_trace_resolved_ephemerons(struct gc_ephemeron *resolved,
+                             void (*visit)(struct gc_edge edge,
+                                           struct gc_heap *heap,
+                                           void *visit_data),
+                             struct gc_heap *heap,
+                             void *trace_data) {
   for (; resolved; resolved = resolved->resolved) {
     visit(gc_ephemeron_value_edge(resolved), heap, trace_data);
     // RESOLVED -> TRACED.
     atomic_store_explicit(&resolved->state, EPHEMERON_STATE_TRACED,
                           memory_order_release);
   }
-  return 1;
 }    
 
 void
diff --git a/src/parallel-tracer.h b/src/parallel-tracer.h
index 0e93ca0dd..dea87cc9f 100644
--- a/src/parallel-tracer.h
+++ b/src/parallel-tracer.h
@@ -10,10 +10,17 @@
 #include "debug.h"
 #include "gc-inline.h"
 #include "local-worklist.h"
+#include "root-worklist.h"
 #include "shared-worklist.h"
 #include "spin.h"
 #include "tracer.h"
 
+#ifdef VERBOSE_LOGGING
+#define LOG(...) fprintf (stderr, "LOG: " __VA_ARGS__)
+#else
+#define LOG(...) do { } while (0)
+#endif
+
 enum trace_worker_state {
   TRACE_WORKER_STOPPED,
   TRACE_WORKER_IDLE,
@@ -36,6 +43,11 @@ struct gc_trace_worker {
   struct gc_trace_worker_data *data;
 };
 
+static inline struct gc_trace_worker_data*
+gc_trace_worker_data(struct gc_trace_worker *worker) {
+  return worker->data;
+}
+
 #define TRACE_WORKERS_MAX_COUNT 8
 
 struct gc_tracer {
@@ -45,6 +57,7 @@ struct gc_tracer {
   long epoch;
   pthread_mutex_t lock;
   pthread_cond_t cond;
+  struct root_worklist roots;
   struct gc_trace_worker workers[TRACE_WORKERS_MAX_COUNT];
 };
 
@@ -101,6 +114,7 @@ gc_tracer_init(struct gc_tracer *tracer, struct gc_heap 
*heap,
   tracer->epoch = 0;
   pthread_mutex_init(&tracer->lock, NULL);
   pthread_cond_init(&tracer->cond, NULL);
+  root_worklist_init(&tracer->roots);
   size_t desired_worker_count = parallelism;
   ASSERT(desired_worker_count);
   if (desired_worker_count > TRACE_WORKERS_MAX_COUNT)
@@ -121,13 +135,18 @@ gc_tracer_init(struct gc_tracer *tracer, struct gc_heap 
*heap,
 
 static void gc_tracer_prepare(struct gc_tracer *tracer) {
   for (size_t i = 0; i < tracer->worker_count; i++)
-    tracer->workers[i].steal_id = 0;
+    tracer->workers[i].steal_id = (i + 1) % tracer->worker_count;
 }
 static void gc_tracer_release(struct gc_tracer *tracer) {
   for (size_t i = 0; i < tracer->worker_count; i++)
     shared_worklist_release(&tracer->workers[i].shared);
 }
 
+static inline void
+gc_tracer_add_root(struct gc_tracer *tracer, struct gc_root root) {
+  root_worklist_push(&tracer->roots, root);
+}
+
 static inline void
 tracer_unpark_all_workers(struct gc_tracer *tracer) {
   long old_epoch =
@@ -161,6 +180,7 @@ tracer_share(struct gc_trace_worker *worker) {
 
 static inline void
 gc_trace_worker_enqueue(struct gc_trace_worker *worker, struct gc_ref ref) {
+  ASSERT(gc_ref_is_heap_object(ref));
   if (local_worklist_full(&worker->local))
     tracer_share(worker);
   local_worklist_push(&worker->local, ref);
@@ -182,33 +202,33 @@ static struct gc_ref
 trace_worker_steal_from_any(struct gc_trace_worker *worker,
                             struct gc_tracer *tracer) {
   for (size_t i = 0; i < tracer->worker_count; i++) {
-    DEBUG("tracer #%zu: stealing from #%zu\n", worker->id, worker->steal_id);
+    LOG("tracer #%zu: stealing from #%zu\n", worker->id, worker->steal_id);
     struct gc_ref obj = tracer_steal_from_worker(tracer, worker->steal_id);
     if (gc_ref_is_heap_object(obj)) {
-      DEBUG("tracer #%zu: stealing got %p\n", worker->id,
+      LOG("tracer #%zu: stealing got %p\n", worker->id,
             gc_ref_heap_object(obj));
       return obj;
     }
     worker->steal_id = (worker->steal_id + 1) % tracer->worker_count;
   }
-  DEBUG("tracer #%zu: failed to steal\n", worker->id);
+  LOG("tracer #%zu: failed to steal\n", worker->id);
   return gc_ref_null();
 }
 
 static int
 trace_worker_can_steal_from_any(struct gc_trace_worker *worker,
                                 struct gc_tracer *tracer) {
-  DEBUG("tracer #%zu: checking if any worker has tasks\n", worker->id);
+  LOG("tracer #%zu: checking if any worker has tasks\n", worker->id);
   for (size_t i = 0; i < tracer->worker_count; i++) {
     int res = tracer_can_steal_from_worker(tracer, worker->steal_id);
     if (res) {
-      DEBUG("tracer #%zu: worker #%zu has tasks!\n", worker->id,
+      LOG("tracer #%zu: worker #%zu has tasks!\n", worker->id,
             worker->steal_id);
       return 1;
     }
     worker->steal_id = (worker->steal_id + 1) % tracer->worker_count;
   }
-  DEBUG("tracer #%zu: nothing to steal\n", worker->id);
+  LOG("tracer #%zu: nothing to steal\n", worker->id);
   return 0;
 }
 
@@ -241,7 +261,7 @@ trace_worker_should_continue(struct gc_trace_worker 
*worker) {
       return !done;
     }
     // spin
-    DEBUG("checking for termination: spinning #%zu\n", spin_count);
+    LOG("checking for termination: spinning #%zu\n", spin_count);
     yield_for_spin(spin_count);
   }
 }
@@ -254,13 +274,13 @@ trace_worker_steal(struct gc_trace_worker *worker) {
   // overflowed.  In that case avoid contention by trying to pop
   // something from the worker's own queue.
   {
-    DEBUG("tracer #%zu: trying to pop worker's own deque\n", worker->id);
+    LOG("tracer #%zu: trying to pop worker's own deque\n", worker->id);
     struct gc_ref obj = shared_worklist_try_pop(&worker->shared);
     if (gc_ref_is_heap_object(obj))
       return obj;
   }
 
-  DEBUG("tracer #%zu: trying to steal\n", worker->id);
+  LOG("tracer #%zu: trying to steal\n", worker->id);
   struct gc_ref obj = trace_worker_steal_from_any(worker, tracer);
   if (gc_ref_is_heap_object(obj))
     return obj;
@@ -279,6 +299,13 @@ trace_with_data(struct gc_tracer *tracer,
   size_t n = 0;
   DEBUG("tracer #%zu: running trace loop\n", worker->id);
 
+  do {
+    struct gc_root root = root_worklist_pop(&tracer->roots);
+    if (root.kind == GC_ROOT_KIND_NONE)
+      break;
+    trace_root(root, heap, worker);
+  } while (1);
+
   do {
     while (1) {
       struct gc_ref ref;
@@ -325,7 +352,8 @@ gc_tracer_trace(struct gc_tracer *tracer) {
 
   ssize_t parallel_threshold =
     LOCAL_WORKLIST_SIZE - LOCAL_WORKLIST_SHARE_AMOUNT;
-  if (shared_worklist_size(&tracer->workers[0].shared) >= parallel_threshold) {
+  if (root_worklist_size(&tracer->roots) > 1 ||
+      shared_worklist_size(&tracer->workers[0].shared) >= parallel_threshold) {
     DEBUG("waking workers\n");
     tracer_unpark_all_workers(tracer);
   } else {
@@ -333,6 +361,7 @@ gc_tracer_trace(struct gc_tracer *tracer) {
   }
 
   trace_worker_trace(&tracer->workers[0]);
+  root_worklist_reset(&tracer->roots);
 
   DEBUG("trace finished\n");
 }
diff --git a/src/root-worklist.h b/src/root-worklist.h
new file mode 100644
index 000000000..45ede8595
--- /dev/null
+++ b/src/root-worklist.h
@@ -0,0 +1,76 @@
+#ifndef ROOT_WORKLIST_H
+#define ROOT_WORKLIST_H
+
+#include <stdatomic.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "assert.h"
+#include "debug.h"
+#include "gc-inline.h"
+#include "gc-ref.h"
+#include "root.h"
+
+// A single-producer, multiple-consumer worklist that has two phases:
+// one in which roots are added by the producer, then one in which roots
+// are consumed from the worklist.  Roots are never added once the
+// consumer phase starts.
+struct root_worklist {
+  size_t size;
+  size_t read;
+  size_t write;
+  struct gc_root *buf;
+};
+
+void
+root_worklist_alloc(struct root_worklist *q) {
+  q->buf = realloc(q->buf, q->size * sizeof(struct gc_root));
+  if (!q->buf) {
+    perror("Failed to grow root worklist");
+    GC_CRASH();
+  }
+}
+
+static void
+root_worklist_init(struct root_worklist *q) {
+  q->size = 16;
+  q->read = 0;
+  q->write = 0;
+  q->buf = NULL;
+  root_worklist_alloc(q);
+}
+
+static inline void
+root_worklist_push(struct root_worklist *q, struct gc_root root) {
+  if (UNLIKELY(q->write == q->size)) {
+    q->size *= 2;
+    root_worklist_alloc(q);
+  }
+  q->buf[q->write++] = root;
+}
+
+// Not atomic.
+static inline size_t
+root_worklist_size(struct root_worklist *q) {
+  return q->write - q->read;
+}
+
+static inline struct gc_root
+root_worklist_pop(struct root_worklist *q) {
+  size_t idx = atomic_fetch_add(&q->read, 1);
+  if (idx < q->write)
+    return q->buf[idx];
+  return (struct gc_root){ GC_ROOT_KIND_NONE, };
+}
+
+static void
+root_worklist_reset(struct root_worklist *q) {
+  q->read = q->write = 0;
+}
+
+static void
+root_worklist_destroy(struct root_worklist *q) {
+  free(q->buf);
+}
+
+#endif // ROOT_WORKLIST_H
diff --git a/src/root.h b/src/root.h
new file mode 100644
index 000000000..a6a91f987
--- /dev/null
+++ b/src/root.h
@@ -0,0 +1,43 @@
+#ifndef ROOT_H
+#define ROOT_H
+
+struct gc_ephemeron;
+struct gc_heap;
+struct gc_mutator;
+
+enum gc_root_kind {
+  GC_ROOT_KIND_NONE,
+  GC_ROOT_KIND_HEAP,
+  GC_ROOT_KIND_MUTATOR,
+  GC_ROOT_KIND_RESOLVED_EPHEMERONS
+};
+
+struct gc_root {
+  enum gc_root_kind kind;
+  union {
+    struct gc_heap *heap;
+    struct gc_mutator *mutator;
+    struct gc_ephemeron *resolved_ephemerons;
+  };
+};
+
+static inline struct gc_root gc_root_heap(struct gc_heap* heap) {
+  struct gc_root ret = { GC_ROOT_KIND_HEAP };
+  ret.heap = heap;
+  return ret;
+}
+
+static inline struct gc_root gc_root_mutator(struct gc_mutator* mutator) {
+  struct gc_root ret = { GC_ROOT_KIND_MUTATOR };
+  ret.mutator = mutator;
+  return ret;
+}
+
+static inline struct gc_root
+gc_root_resolved_ephemerons(struct gc_ephemeron* resolved) {
+  struct gc_root ret = { GC_ROOT_KIND_RESOLVED_EPHEMERONS };
+  ret.resolved_ephemerons = resolved;
+  return ret;
+}
+
+#endif // ROOT_H
diff --git a/src/semi.c b/src/semi.c
index fdcd03792..af9134bd7 100644
--- a/src/semi.c
+++ b/src/semi.c
@@ -380,9 +380,14 @@ static void collect(struct gc_mutator *mut, size_t 
for_alloc) {
   HEAP_EVENT(heap, heap_traced);
   gc_scan_pending_ephemerons(heap->pending_ephemerons, heap, 0, 1);
   heap->check_pending_ephemerons = 1;
-  while (gc_pop_resolved_ephemerons(heap, trace, NULL))
+  do {
+    struct gc_ephemeron *resolved = gc_pop_resolved_ephemerons(heap);
+    if (!resolved)
+      break;
+    gc_trace_resolved_ephemerons(resolved, trace, heap, NULL);
     while(grey < semi->hp)
       grey = scan(heap, gc_ref(grey));
+  } while (1);
   HEAP_EVENT(heap, ephemerons_traced);
   large_object_space_finish_gc(large, 0);
   gc_extern_space_finish_gc(heap->extern_space, 0);
diff --git a/src/serial-tracer.h b/src/serial-tracer.h
index 9e128be91..96ab7e563 100644
--- a/src/serial-tracer.h
+++ b/src/serial-tracer.h
@@ -7,10 +7,12 @@
 #include "assert.h"
 #include "debug.h"
 #include "simple-worklist.h"
+#include "root-worklist.h"
 #include "tracer.h"
 
 struct gc_tracer {
   struct gc_heap *heap;
+  struct root_worklist roots;
   struct simple_worklist worklist;
 };
 
@@ -19,10 +21,16 @@ struct gc_trace_worker {
   struct gc_trace_worker_data *data;
 };
 
+static inline struct gc_trace_worker_data*
+gc_trace_worker_data(struct gc_trace_worker *worker) {
+  return worker->data;
+}
+
 static int
 gc_tracer_init(struct gc_tracer *tracer, struct gc_heap *heap,
                size_t parallelism) {
   tracer->heap = heap;
+  root_worklist_init(&tracer->roots);
   return simple_worklist_init(&tracer->worklist);
 }
 static void gc_tracer_prepare(struct gc_tracer *tracer) {}
@@ -30,6 +38,11 @@ static void gc_tracer_release(struct gc_tracer *tracer) {
   simple_worklist_release(&tracer->worklist);
 }
 
+static inline void
+gc_tracer_add_root(struct gc_tracer *tracer, struct gc_root root) {
+  root_worklist_push(&tracer->roots, root);
+}
+
 static inline void
 gc_tracer_enqueue_root(struct gc_tracer *tracer, struct gc_ref obj) {
   simple_worklist_push(&tracer->worklist, obj);
@@ -48,6 +61,13 @@ tracer_trace_with_data(struct gc_tracer *tracer, struct 
gc_heap *heap,
                        struct gc_trace_worker *worker,
                        struct gc_trace_worker_data *data) {
   worker->data = data;
+  do {
+    struct gc_root root = root_worklist_pop(&tracer->roots);
+    if (root.kind == GC_ROOT_KIND_NONE)
+      break;
+    trace_root(root, heap, worker);
+  } while (1);
+  root_worklist_reset(&tracer->roots);
   do {
     struct gc_ref obj = simple_worklist_pop(&tracer->worklist);
     if (!gc_ref_is_heap_object(obj))
diff --git a/src/tracer.h b/src/tracer.h
index 64f6dcdc6..ec6a140b1 100644
--- a/src/tracer.h
+++ b/src/tracer.h
@@ -3,6 +3,7 @@
 
 #include "gc-ref.h"
 #include "gc-edge.h"
+#include "root.h"
 
 struct gc_heap;
 
@@ -19,6 +20,8 @@ struct gc_trace_worker_data;
 // Visit all fields in an object.
 static inline void trace_one(struct gc_ref ref, struct gc_heap *heap,
                              struct gc_trace_worker *worker) GC_ALWAYS_INLINE;
+static inline void trace_root(struct gc_root root, struct gc_heap *heap,
+                              struct gc_trace_worker *worker) GC_ALWAYS_INLINE;
 
 static void
 gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer,
@@ -53,6 +56,11 @@ static inline void gc_tracer_enqueue_roots(struct gc_tracer 
*tracer,
 // Given that an object has been shaded grey, enqueue for tracing.
 static inline void gc_trace_worker_enqueue(struct gc_trace_worker *worker,
                                            struct gc_ref ref) GC_ALWAYS_INLINE;
+static inline struct gc_trace_worker_data*
+gc_trace_worker_data(struct gc_trace_worker *worker) GC_ALWAYS_INLINE;
+
+static inline void gc_tracer_add_root(struct gc_tracer *tracer,
+                                      struct gc_root root);
 
 // Run the full trace.
 static inline void gc_tracer_trace(struct gc_tracer *tracer);
diff --git a/src/whippet.c b/src/whippet.c
index 9674560c2..3e17f5422 100644
--- a/src/whippet.c
+++ b/src/whippet.c
@@ -1228,6 +1228,13 @@ static inline void trace_one(struct gc_ref ref, struct 
gc_heap *heap,
     gc_trace_object(ref, tracer_visit, heap, worker, NULL);
 }
 
+static inline void trace_root(struct gc_root root,
+                              struct gc_heap *heap,
+                              struct gc_trace_worker *worker) {
+  // We don't use parallel root tracing yet.
+  GC_CRASH();
+}
+
 static void
 mark_and_globally_enqueue_mutator_conservative_roots(uintptr_t low,
                                                      uintptr_t high,
@@ -1876,8 +1883,11 @@ static void resolve_ephemerons_eagerly(struct gc_heap 
*heap) {
 }
 
 static int enqueue_resolved_ephemerons(struct gc_heap *heap) {
-  return gc_pop_resolved_ephemerons(heap, trace_and_enqueue_globally,
-                                    NULL);
+  struct gc_ephemeron *resolved = gc_pop_resolved_ephemerons(heap);
+  if (!resolved)
+    return 0;
+  gc_trace_resolved_ephemerons(resolved, trace_and_enqueue_globally, heap, 
NULL);
+  return 1;
 }
 
 static void sweep_ephemerons(struct gc_heap *heap) {

Reply via email to