wingo pushed a commit to branch wip-whippet
in repository guile.

commit f715794762fb05b301a3057de4e734ac3bf47863
Author: Andy Wingo <wi...@igalia.com>
AuthorDate: Tue Aug 5 17:17:39 2025 +0200

    For parallel tracer, remove local worklist
    
    There were pathological cases that didn't parallelize like they should
    have.
---
 src/local-worklist.h  | 59 ---------------------------------------
 src/parallel-tracer.h | 76 ++++++---------------------------------------------
 2 files changed, 8 insertions(+), 127 deletions(-)

diff --git a/src/local-worklist.h b/src/local-worklist.h
deleted file mode 100644
index 8dcd3e20d..000000000
--- a/src/local-worklist.h
+++ /dev/null
@@ -1,59 +0,0 @@
-#ifndef LOCAL_WORKLIST_H
-#define LOCAL_WORKLIST_H
-
-#include "assert.h"
-
-#define LOCAL_WORKLIST_SIZE 1024
-#define LOCAL_WORKLIST_MASK (LOCAL_WORKLIST_SIZE - 1)
-#define LOCAL_WORKLIST_SHARE_AMOUNT (LOCAL_WORKLIST_SIZE * 3 / 4)
-struct local_worklist {
-  size_t read;
-  size_t write;
-  struct gc_ref data[LOCAL_WORKLIST_SIZE];
-};
-
-static inline void
-local_worklist_init(struct local_worklist *q) {
-  q->read = q->write = 0;
-}
-static inline void
-local_worklist_poison(struct local_worklist *q) {
-  q->read = 0; q->write = LOCAL_WORKLIST_SIZE;
-}
-static inline size_t
-local_worklist_size(struct local_worklist *q) {
-  return q->write - q->read;
-}
-static inline int
-local_worklist_empty(struct local_worklist *q) {
-  return local_worklist_size(q) == 0;
-}
-static inline int
-local_worklist_full(struct local_worklist *q) {
-  return local_worklist_size(q) >= LOCAL_WORKLIST_SIZE;
-}
-static inline void
-local_worklist_push(struct local_worklist *q, struct gc_ref v) {
-  ASSERT(!local_worklist_full(q));
-  q->data[q->write++ & LOCAL_WORKLIST_MASK] = v;
-}
-static inline struct gc_ref
-local_worklist_pop(struct local_worklist *q) {
-  ASSERT(!local_worklist_empty(q));
-  return q->data[q->read++ & LOCAL_WORKLIST_MASK];
-}
-
-static inline size_t
-local_worklist_pop_many(struct local_worklist *q, struct gc_ref **objv,
-                        size_t limit) {
-  size_t avail = local_worklist_size(q);
-  size_t read = q->read & LOCAL_WORKLIST_MASK;
-  size_t contig = LOCAL_WORKLIST_SIZE - read;
-  if (contig < avail) avail = contig;
-  if (limit < avail) avail = limit;
-  *objv = q->data + read;
-  q->read += avail;
-  return avail;
-}
-
-#endif // LOCAL_WORKLIST_H
diff --git a/src/parallel-tracer.h b/src/parallel-tracer.h
index 70edbe7f2..94ce54a0c 100644
--- a/src/parallel-tracer.h
+++ b/src/parallel-tracer.h
@@ -10,7 +10,6 @@
 #include "debug.h"
 #include "gc-inline.h"
 #include "gc-tracepoint.h"
-#include "local-worklist.h"
 #include "root-worklist.h"
 #include "shared-worklist.h"
 #include "spin.h"
@@ -40,7 +39,6 @@ struct gc_trace_worker {
   enum trace_worker_state state;
   pthread_mutex_t lock;
   struct shared_worklist shared;
-  struct local_worklist local;
   struct gc_trace_worker_data *data;
 };
 
@@ -74,7 +72,6 @@ trace_worker_init(struct gc_trace_worker *worker, struct 
gc_heap *heap,
   worker->state = TRACE_WORKER_STOPPED;
   pthread_mutex_init(&worker->lock, NULL);
   worker->data = NULL;
-  local_worklist_init(&worker->local);
   return shared_worklist_init(&worker->shared);
 }
 
@@ -170,39 +167,10 @@ tracer_maybe_unpark_workers(struct gc_tracer *tracer) {
     tracer_unpark_all_workers(tracer);
 }
 
-static inline void
-tracer_share(struct gc_trace_worker *worker) {
-  LOG("tracer #%zu: sharing\n", worker->id);
-  GC_TRACEPOINT(trace_share);
-  size_t to_share = LOCAL_WORKLIST_SHARE_AMOUNT;
-  while (to_share) {
-    struct gc_ref *objv;
-    size_t count = local_worklist_pop_many(&worker->local, &objv, to_share);
-    shared_worklist_push_many(&worker->shared, objv, count);
-    to_share -= count;
-  }
-  tracer_maybe_unpark_workers(worker->tracer);
-}
-
-static inline void
-tracer_share_all(struct gc_trace_worker *worker) {
-  LOG("tracer #%zu: found %zu roots\n", worker->id,
-      local_worklist_size (&worker->local));
-  while (!local_worklist_empty (&worker->local)) {
-    struct gc_ref *objv;
-    size_t count =
-      local_worklist_pop_many(&worker->local, &objv, LOCAL_WORKLIST_SIZE);
-    shared_worklist_push_many(&worker->shared, objv, count);
-  }
-  // No unparking; this is used at the end of a roots-only trace.
-}
-
 static inline void
 gc_trace_worker_enqueue(struct gc_trace_worker *worker, struct gc_ref ref) {
   ASSERT(gc_ref_is_heap_object(ref));
-  if (local_worklist_full(&worker->local))
-    tracer_share(worker);
-  local_worklist_push(&worker->local, ref);
+  shared_worklist_push(&worker->shared, ref);
 }
 
 static struct gc_ref
@@ -288,6 +256,7 @@ trace_worker_should_continue(struct gc_trace_worker 
*worker, size_t spin_count)
     pthread_mutex_unlock(&tracer->workers[--locked].lock);
 
   LOG("checking for termination: failed to lock, spinning #%zu\n", spin_count);
+  tracer_unpark_all_workers(tracer);
   yield_for_spin(spin_count);
   return 1;
 }
@@ -296,16 +265,6 @@ static struct gc_ref
 trace_worker_steal(struct gc_trace_worker *worker) {
   struct gc_tracer *tracer = worker->tracer;
 
-  // It could be that the worker's local trace queue has simply
-  // overflowed.  In that case avoid contention by trying to pop
-  // something from the worker's own queue.
-  {
-    LOG("tracer #%zu: trying to pop worker's own deque\n", worker->id);
-    struct gc_ref obj = shared_worklist_try_pop(&worker->shared);
-    if (!gc_ref_is_null(obj))
-      return obj;
-  }
-
   GC_TRACEPOINT(trace_steal);
   LOG("tracer #%zu: trying to steal\n", worker->id);
   struct gc_ref obj = trace_worker_steal_from_any(worker, tracer);
@@ -345,7 +304,6 @@ trace_with_data(struct gc_tracer *tracer,
     // result of marking roots isn't ours to deal with.  However we do need to
     // synchronize with remote workers to ensure they have completed their
     // work items.
-    tracer_share_all(worker);
     if (worker->id == 0) {
       for (size_t i = 1; i < tracer->worker_count; i++)
         pthread_mutex_lock(&tracer->workers[i].lock);
@@ -357,16 +315,17 @@ trace_with_data(struct gc_tracer *tracer,
     size_t spin_count = 0;
     do {
       while (1) {
-        struct gc_ref ref;
-        if (!local_worklist_empty(&worker->local)) {
-          ref = local_worklist_pop(&worker->local);
-        } else {
+        struct gc_ref ref = shared_worklist_try_pop(&worker->shared);
+        if (gc_ref_is_null(ref)) {
           ref = trace_worker_steal(worker);
           if (gc_ref_is_null(ref))
             break;
         }
         trace_one(ref, heap, worker);
         n++;
+        if (worker->id == 0 && n % 128 == 0
+            && shared_worklist_can_steal(&worker->shared))
+          tracer_maybe_unpark_workers(tracer);
       }
     } while (trace_worker_should_continue(worker, spin_count++));
     GC_TRACEPOINT(trace_objects_end);
@@ -388,26 +347,7 @@ trace_worker_trace(struct gc_trace_worker *worker) {
 
 static inline int
 gc_tracer_should_parallelize(struct gc_tracer *tracer) {
-  if (root_worklist_size(&tracer->roots) > 1)
-    return 1;
-
-  if (tracer->trace_roots_only)
-    return 0;
-
-  size_t nonempty_worklists = 0;
-  ssize_t parallel_threshold =
-    LOCAL_WORKLIST_SIZE - LOCAL_WORKLIST_SHARE_AMOUNT;
-  for (size_t i = 0; i < tracer->worker_count; i++) {
-    ssize_t size = shared_worklist_size(&tracer->workers[i].shared);
-    if (!size)
-      continue;
-    nonempty_worklists++;
-    if (nonempty_worklists > 1)
-      return 1;
-    if (size >= parallel_threshold)
-      return 1;
-  }
-  return 0;
+  return 1;
 }
 
 static inline void

Reply via email to