wingo pushed a commit to branch wip-whippet in repository guile. commit f715794762fb05b301a3057de4e734ac3bf47863 Author: Andy Wingo <wi...@igalia.com> AuthorDate: Tue Aug 5 17:17:39 2025 +0200
For parallel tracer, remove local worklist There were pathological cases that didn't parallelize like they should have. --- src/local-worklist.h | 59 --------------------------------------- src/parallel-tracer.h | 76 ++++++--------------------------------------------- 2 files changed, 8 insertions(+), 127 deletions(-) diff --git a/src/local-worklist.h b/src/local-worklist.h deleted file mode 100644 index 8dcd3e20d..000000000 --- a/src/local-worklist.h +++ /dev/null @@ -1,59 +0,0 @@ -#ifndef LOCAL_WORKLIST_H -#define LOCAL_WORKLIST_H - -#include "assert.h" - -#define LOCAL_WORKLIST_SIZE 1024 -#define LOCAL_WORKLIST_MASK (LOCAL_WORKLIST_SIZE - 1) -#define LOCAL_WORKLIST_SHARE_AMOUNT (LOCAL_WORKLIST_SIZE * 3 / 4) -struct local_worklist { - size_t read; - size_t write; - struct gc_ref data[LOCAL_WORKLIST_SIZE]; -}; - -static inline void -local_worklist_init(struct local_worklist *q) { - q->read = q->write = 0; -} -static inline void -local_worklist_poison(struct local_worklist *q) { - q->read = 0; q->write = LOCAL_WORKLIST_SIZE; -} -static inline size_t -local_worklist_size(struct local_worklist *q) { - return q->write - q->read; -} -static inline int -local_worklist_empty(struct local_worklist *q) { - return local_worklist_size(q) == 0; -} -static inline int -local_worklist_full(struct local_worklist *q) { - return local_worklist_size(q) >= LOCAL_WORKLIST_SIZE; -} -static inline void -local_worklist_push(struct local_worklist *q, struct gc_ref v) { - ASSERT(!local_worklist_full(q)); - q->data[q->write++ & LOCAL_WORKLIST_MASK] = v; -} -static inline struct gc_ref -local_worklist_pop(struct local_worklist *q) { - ASSERT(!local_worklist_empty(q)); - return q->data[q->read++ & LOCAL_WORKLIST_MASK]; -} - -static inline size_t -local_worklist_pop_many(struct local_worklist *q, struct gc_ref **objv, - size_t limit) { - size_t avail = local_worklist_size(q); - size_t read = q->read & LOCAL_WORKLIST_MASK; - size_t contig = LOCAL_WORKLIST_SIZE - read; - if (contig < avail) avail = contig; - if (limit < avail) avail = limit; - *objv = q->data + read; - q->read += avail; - return avail; -} - -#endif // LOCAL_WORKLIST_H diff --git a/src/parallel-tracer.h b/src/parallel-tracer.h index 70edbe7f2..94ce54a0c 100644 --- a/src/parallel-tracer.h +++ b/src/parallel-tracer.h @@ -10,7 +10,6 @@ #include "debug.h" #include "gc-inline.h" #include "gc-tracepoint.h" -#include "local-worklist.h" #include "root-worklist.h" #include "shared-worklist.h" #include "spin.h" @@ -40,7 +39,6 @@ struct gc_trace_worker { enum trace_worker_state state; pthread_mutex_t lock; struct shared_worklist shared; - struct local_worklist local; struct gc_trace_worker_data *data; }; @@ -74,7 +72,6 @@ trace_worker_init(struct gc_trace_worker *worker, struct gc_heap *heap, worker->state = TRACE_WORKER_STOPPED; pthread_mutex_init(&worker->lock, NULL); worker->data = NULL; - local_worklist_init(&worker->local); return shared_worklist_init(&worker->shared); } @@ -170,39 +167,10 @@ tracer_maybe_unpark_workers(struct gc_tracer *tracer) { tracer_unpark_all_workers(tracer); } -static inline void -tracer_share(struct gc_trace_worker *worker) { - LOG("tracer #%zu: sharing\n", worker->id); - GC_TRACEPOINT(trace_share); - size_t to_share = LOCAL_WORKLIST_SHARE_AMOUNT; - while (to_share) { - struct gc_ref *objv; - size_t count = local_worklist_pop_many(&worker->local, &objv, to_share); - shared_worklist_push_many(&worker->shared, objv, count); - to_share -= count; - } - tracer_maybe_unpark_workers(worker->tracer); -} - -static inline void -tracer_share_all(struct gc_trace_worker *worker) { - LOG("tracer #%zu: found %zu roots\n", worker->id, - local_worklist_size (&worker->local)); - while (!local_worklist_empty (&worker->local)) { - struct gc_ref *objv; - size_t count = - local_worklist_pop_many(&worker->local, &objv, LOCAL_WORKLIST_SIZE); - shared_worklist_push_many(&worker->shared, objv, count); - } - // No unparking; this is used at the end of a roots-only trace. -} - static inline void gc_trace_worker_enqueue(struct gc_trace_worker *worker, struct gc_ref ref) { ASSERT(gc_ref_is_heap_object(ref)); - if (local_worklist_full(&worker->local)) - tracer_share(worker); - local_worklist_push(&worker->local, ref); + shared_worklist_push(&worker->shared, ref); } static struct gc_ref @@ -288,6 +256,7 @@ trace_worker_should_continue(struct gc_trace_worker *worker, size_t spin_count) pthread_mutex_unlock(&tracer->workers[--locked].lock); LOG("checking for termination: failed to lock, spinning #%zu\n", spin_count); + tracer_unpark_all_workers(tracer); yield_for_spin(spin_count); return 1; } @@ -296,16 +265,6 @@ static struct gc_ref trace_worker_steal(struct gc_trace_worker *worker) { struct gc_tracer *tracer = worker->tracer; - // It could be that the worker's local trace queue has simply - // overflowed. In that case avoid contention by trying to pop - // something from the worker's own queue. - { - LOG("tracer #%zu: trying to pop worker's own deque\n", worker->id); - struct gc_ref obj = shared_worklist_try_pop(&worker->shared); - if (!gc_ref_is_null(obj)) - return obj; - } - GC_TRACEPOINT(trace_steal); LOG("tracer #%zu: trying to steal\n", worker->id); struct gc_ref obj = trace_worker_steal_from_any(worker, tracer); @@ -345,7 +304,6 @@ trace_with_data(struct gc_tracer *tracer, // result of marking roots isn't ours to deal with. However we do need to // synchronize with remote workers to ensure they have completed their // work items. - tracer_share_all(worker); if (worker->id == 0) { for (size_t i = 1; i < tracer->worker_count; i++) pthread_mutex_lock(&tracer->workers[i].lock); @@ -357,16 +315,17 @@ trace_with_data(struct gc_tracer *tracer, size_t spin_count = 0; do { while (1) { - struct gc_ref ref; - if (!local_worklist_empty(&worker->local)) { - ref = local_worklist_pop(&worker->local); - } else { + struct gc_ref ref = shared_worklist_try_pop(&worker->shared); + if (gc_ref_is_null(ref)) { ref = trace_worker_steal(worker); if (gc_ref_is_null(ref)) break; } trace_one(ref, heap, worker); n++; + if (worker->id == 0 && n % 128 == 0 + && shared_worklist_can_steal(&worker->shared)) + tracer_maybe_unpark_workers(tracer); } } while (trace_worker_should_continue(worker, spin_count++)); GC_TRACEPOINT(trace_objects_end); @@ -388,26 +347,7 @@ trace_worker_trace(struct gc_trace_worker *worker) { static inline int gc_tracer_should_parallelize(struct gc_tracer *tracer) { - if (root_worklist_size(&tracer->roots) > 1) - return 1; - - if (tracer->trace_roots_only) - return 0; - - size_t nonempty_worklists = 0; - ssize_t parallel_threshold = - LOCAL_WORKLIST_SIZE - LOCAL_WORKLIST_SHARE_AMOUNT; - for (size_t i = 0; i < tracer->worker_count; i++) { - ssize_t size = shared_worklist_size(&tracer->workers[i].shared); - if (!size) - continue; - nonempty_worklists++; - if (nonempty_worklists > 1) - return 1; - if (size >= parallel_threshold) - return 1; - } - return 0; + return 1; } static inline void