wingo pushed a commit to branch wip-whippet
in repository guile.

commit 910b62af8f8264c1a23fe13833e4c346f772af6f
Author: Andy Wingo <wi...@igalia.com>
AuthorDate: Wed Oct 26 10:37:55 2022 +0200

    Add conservative heap tracing (not just roots)
    
    Also accelerate mark_space_live_object_granules.
---
 Makefile                      |  84 +++++++++++++++++++-------
 bdw.c                         |  14 ++++-
 conservative-roots-embedder.h |   3 +-
 gc-config.h                   |  16 ++++-
 large-object-space.h          |   9 +++
 mt-gcbench-embedder.h         |   6 +-
 mt-gcbench.c                  |   5 +-
 parallel-tracer.h             |  16 +++++
 quads.c                       |   5 +-
 semi.c                        |   6 +-
 serial-tracer.h               |   8 ++-
 simple-gc-embedder.h          |   6 +-
 whippet.c                     | 137 ++++++++++++++++++++++++++----------------
 13 files changed, 221 insertions(+), 94 deletions(-)

diff --git a/Makefile b/Makefile
index 1f1e8ba8e..3d9eed197 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,23 @@
 TESTS=quads mt-gcbench # MT_GCBench MT_GCBench2
-COLLECTORS=bdw semi whippet conservative-whippet parallel-whippet 
conservative-parallel-whippet generational-whippet 
conservative-generational-whippet parallel-generational-whippet 
conservative-parallel-generational-whippet
+COLLECTORS= \
+       bdw \
+       semi \
+       \
+       whippet \
+       conservative-whippet \
+       fully-conservative-whippet \
+       \
+       parallel-whippet \
+       conservative-parallel-whippet \
+       fully-conservative-parallel-whippet \
+       \
+       generational-whippet \
+       conservative-generational-whippet \
+       fully-conservative-generational-whippet \
+       \
+       parallel-generational-whippet \
+       conservative-parallel-generational-whippet \
+       fully-conservative-parallel-generational-whippet
 
 CC=gcc
 CFLAGS=-Wall -O2 -g -flto -fno-strict-aliasing -fvisibility=hidden -Wno-unused 
-DNDEBUG
@@ -18,57 +36,77 @@ gc-platform.o: gc-platform.h gc-platform-$(PLATFORM).c 
gc-visibility.h
 gc-stack.o: gc-stack.c
        $(COMPILE) -o $@ -c $<
 
-bdw-%-gc.o: semi.c %-embedder.h %.c
-       $(COMPILE) `pkg-config --cflags bdw-gc` -include $*-embedder.h -o $@ -c 
bdw.c
-bdw-%.o: semi.c %.c
-       $(COMPILE) -include bdw-attrs.h -o $@ -c $*.c
+bdw-%-gc.o: bdw.c %-embedder.h %.c
+       $(COMPILE) -DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 
`pkg-config --cflags bdw-gc` -include $*-embedder.h -o $@ -c bdw.c
+bdw-%.o: bdw.c %.c
+       $(COMPILE) -DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 -include 
bdw-attrs.h -o $@ -c $*.c
 bdw-%: bdw-%.o bdw-%-gc.o gc-stack.o gc-platform.o
        $(CC) $(LDFLAGS) `pkg-config --libs bdw-gc` -o $@ $^
 
 semi-%-gc.o: semi.c %-embedder.h large-object-space.h assert.h debug.h %.c
-       $(COMPILE) -DGC_PRECISE=1 -include $*-embedder.h -o $@ -c semi.c
+       $(COMPILE) -DGC_PRECISE_ROOTS=1 -include $*-embedder.h -o $@ -c semi.c
 semi-%.o: semi.c %.c
-       $(COMPILE) -DGC_PRECISE=1 -include semi-attrs.h -o $@ -c $*.c
+       $(COMPILE) -DGC_PRECISE_ROOTS=1 -include semi-attrs.h -o $@ -c $*.c
 
 whippet-%-gc.o: whippet.c %-embedder.h large-object-space.h serial-tracer.h 
assert.h debug.h heap-objects.h %.c
-       $(COMPILE) -DGC_PRECISE=1 -include $*-embedder.h -o $@ -c whippet.c
+       $(COMPILE) -DGC_PRECISE_ROOTS=1 -include $*-embedder.h -o $@ -c 
whippet.c
 whippet-%.o: whippet.c %.c
-       $(COMPILE) -DGC_PRECISE=1 -include whippet-attrs.h -o $@ -c $*.c
+       $(COMPILE) -DGC_PRECISE_ROOTS=1 -include whippet-attrs.h -o $@ -c $*.c
 
 conservative-whippet-%-gc.o: whippet.c %-embedder.h large-object-space.h 
serial-tracer.h assert.h debug.h heap-objects.h %.c
-       $(COMPILE) -DGC_PRECISE=0 -include $*-embedder.h -o $@ -c whippet.c
+       $(COMPILE) -DGC_CONSERVATIVE_ROOTS=1 -include $*-embedder.h -o $@ -c 
whippet.c
 conservative-whippet-%.o: whippet.c %.c
-       $(COMPILE) -DGC_PRECISE=0 -include whippet-attrs.h -o $@ -c $*.c
+       $(COMPILE) -DGC_CONSERVATIVE_ROOTS=1 -include whippet-attrs.h -o $@ -c 
$*.c
+
+fully-conservative-whippet-%-gc.o: whippet.c %-embedder.h large-object-space.h 
serial-tracer.h assert.h debug.h heap-objects.h %.c
+       $(COMPILE) -DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 -include 
$*-embedder.h -o $@ -c whippet.c
+fully-conservative-whippet-%.o: whippet.c %.c
+       $(COMPILE) -DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 -include 
whippet-attrs.h -o $@ -c $*.c
 
 parallel-whippet-%-gc.o: whippet.c %-embedder.h large-object-space.h 
parallel-tracer.h assert.h debug.h heap-objects.h %.c
-       $(COMPILE) -DGC_PARALLEL=1 -DGC_PRECISE=1 -include $*-embedder.h -o $@ 
-c whippet.c
+       $(COMPILE) -DGC_PARALLEL=1 -DGC_PRECISE_ROOTS=1 -include $*-embedder.h 
-o $@ -c whippet.c
 parallel-whippet-%.o: whippet.c %.c
-       $(COMPILE) -DGC_PARALLEL=1 -DGC_PRECISE=1 -include whippet-attrs.h -o 
$@ -c $*.c
+       $(COMPILE) -DGC_PARALLEL=1 -DGC_PRECISE_ROOTS=1 -include 
whippet-attrs.h -o $@ -c $*.c
 
 conservative-parallel-whippet-%-gc.o: whippet.c %-embedder.h 
large-object-space.h serial-tracer.h assert.h debug.h heap-objects.h %.c
-       $(COMPILE) -DGC_PARALLEL=1 -DGC_PRECISE=0 -include $*-embedder.h -o $@ 
-c whippet.c
+       $(COMPILE) -DGC_PARALLEL=1 -DGC_CONSERVATIVE_ROOTS=1 -include 
$*-embedder.h -o $@ -c whippet.c
 conservative-parallel-whippet-%.o: whippet.c %.c
-       $(COMPILE) -DGC_PARALLEL=1 -DGC_PRECISE=0 -include whippet-attrs.h -o 
$@ -c $*.c
+       $(COMPILE) -DGC_PARALLEL=1 -DGC_CONSERVATIVE_ROOTS=1 -include 
whippet-attrs.h -o $@ -c $*.c
+
+fully-conservative-parallel-whippet-%-gc.o: whippet.c %-embedder.h 
large-object-space.h serial-tracer.h assert.h debug.h heap-objects.h %.c
+       $(COMPILE) -DGC_PARALLEL=1 -DGC_CONSERVATIVE_ROOTS=1 
-DGC_CONSERVATIVE_TRACE=1 -include $*-embedder.h -o $@ -c whippet.c
+fully-conservative-parallel-whippet-%.o: whippet.c %.c
+       $(COMPILE) -DGC_PARALLEL=1 -DGC_CONSERVATIVE_ROOTS=1 
-DGC_CONSERVATIVE_TRACE=1 -DGC_FULLY_CONSERVATIVE=1 -include whippet-attrs.h -o 
$@ -c $*.c
 
 generational-whippet-%-gc.o: whippet.c %-embedder.h large-object-space.h 
serial-tracer.h assert.h debug.h heap-objects.h %.c
-       $(COMPILE) -DGC_GENERATIONAL=1 -DGC_PRECISE=1 -include $*-embedder.h -o 
$@ -c whippet.c
+       $(COMPILE) -DGC_GENERATIONAL=1 -DGC_PRECISE_ROOTS=1 -include 
$*-embedder.h -o $@ -c whippet.c
 generational-whippet-%.o: whippet.c %.c
-       $(COMPILE) -DGC_GENERATIONAL=1 -DGC_PRECISE=1 -include whippet-attrs.h 
-o $@ -c $*.c
+       $(COMPILE) -DGC_GENERATIONAL=1 -DGC_PRECISE_ROOTS=1 -include 
whippet-attrs.h -o $@ -c $*.c
 
 conservative-generational-whippet-%-gc.o: whippet.c %-embedder.h 
large-object-space.h serial-tracer.h assert.h debug.h heap-objects.h %.c
-       $(COMPILE) -DGC_GENERATIONAL=1 -DGC_PRECISE=0 -include $*-embedder.h -o 
$@ -c whippet.c
+       $(COMPILE) -DGC_GENERATIONAL=1 -DGC_CONSERVATIVE_ROOTS=1 -include 
$*-embedder.h -o $@ -c whippet.c
 conservative-generational-whippet-%.o: whippet.c %.c
-       $(COMPILE) -DGC_GENERATIONAL=1 -DGC_PRECISE=0 -include whippet-attrs.h 
-o $@ -c $*.c
+       $(COMPILE) -DGC_GENERATIONAL=1 -DGC_CONSERVATIVE_ROOTS=1 -include 
whippet-attrs.h -o $@ -c $*.c
+
+fully-conservative-generational-whippet-%-gc.o: whippet.c %-embedder.h 
large-object-space.h serial-tracer.h assert.h debug.h heap-objects.h %.c
+       $(COMPILE) -DGC_GENERATIONAL=1 -DGC_CONSERVATIVE_ROOTS=1 
-DGC_CONSERVATIVE_TRACE=1 -include $*-embedder.h -o $@ -c whippet.c
+fully-conservative-generational-whippet-%.o: whippet.c %.c
+       $(COMPILE) -DGC_GENERATIONAL=1 -DGC_CONSERVATIVE_ROOTS=1 
-DGC_CONSERVATIVE_TRACE=1 -include whippet-attrs.h -o $@ -c $*.c
 
 parallel-generational-whippet-%-gc.o: whippet.c %-embedder.h 
large-object-space.h parallel-tracer.h assert.h debug.h heap-objects.h %.c
-       $(COMPILE) -DGC_PARALLEL=1 -DGC_GENERATIONAL=1 -DGC_PRECISE=1 -include 
$*-embedder.h -o $@ -c whippet.c
+       $(COMPILE) -DGC_PARALLEL=1 -DGC_GENERATIONAL=1 -DGC_PRECISE_ROOTS=1 
-include $*-embedder.h -o $@ -c whippet.c
 parallel-generational-whippet-%.o: whippet.c %.c
-       $(COMPILE) -DGC_PARALLEL=1 -DGC_GENERATIONAL=1 -DGC_PRECISE=1 -include 
whippet-attrs.h -o $@ -c $*.c
+       $(COMPILE) -DGC_PARALLEL=1 -DGC_GENERATIONAL=1 -DGC_PRECISE_ROOTS=1 
-include whippet-attrs.h -o $@ -c $*.c
 
 conservative-parallel-generational-whippet-%-gc.o: whippet.c %-embedder.h 
large-object-space.h parallel-tracer.h assert.h debug.h heap-objects.h %.c
-       $(COMPILE) -DGC_PARALLEL=1 -DGC_GENERATIONAL=1 -DGC_PRECISE=0 -include 
$*-embedder.h -o $@ -c whippet.c
+       $(COMPILE) -DGC_PARALLEL=1 -DGC_GENERATIONAL=1 
-DGC_CONSERVATIVE_ROOTS=1 -include $*-embedder.h -o $@ -c whippet.c
 conservative-parallel-generational-whippet-%.o: whippet.c %.c
-       $(COMPILE) -DGC_PARALLEL=1 -DGC_GENERATIONAL=1 -DGC_PRECISE=0 -include 
whippet-attrs.h -o $@ -c $*.c
+       $(COMPILE) -DGC_PARALLEL=1 -DGC_GENERATIONAL=1 
-DGC_CONSERVATIVE_ROOTS=1 -include whippet-attrs.h -o $@ -c $*.c
+
+fully-conservative-parallel-generational-whippet-%-gc.o: whippet.c 
%-embedder.h large-object-space.h parallel-tracer.h assert.h debug.h 
heap-objects.h %.c
+       $(COMPILE) -DGC_PARALLEL=1 -DGC_GENERATIONAL=1 
-DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 -include $*-embedder.h -o 
$@ -c whippet.c
+fully-conservative-parallel-generational-whippet-%.o: whippet.c %.c
+       $(COMPILE) -DGC_PARALLEL=1 -DGC_GENERATIONAL=1 
-DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 -include whippet-attrs.h -o 
$@ -c $*.c
 
 %: %.o %-gc.o gc-platform.o gc-stack.o
        $(CC) $(LDFLAGS) $($*_LDFLAGS) -o $@ $^
diff --git a/bdw.c b/bdw.c
index caf161e0b..caf53b69f 100644
--- a/bdw.c
+++ b/bdw.c
@@ -8,12 +8,20 @@
 
 #include "bdw-attrs.h"
 
-#if GC_PRECISE
+#if GC_PRECISE_ROOTS
 #error bdw-gc is a conservative collector
-#else
-#include "conservative-roots-embedder.h"
 #endif
 
+#if !GC_CONSERVATIVE_ROOTS
+#error bdw-gc is a conservative collector
+#endif
+
+#if !GC_CONSERVATIVE_TRACE
+#error bdw-gc is a conservative collector
+#endif
+
+#include "conservative-roots-embedder.h"
+
 // When pthreads are used, let `libgc' know about it and redirect
 // allocation calls such as `GC_MALLOC ()' to (contention-free, faster)
 // thread-local allocation.
diff --git a/conservative-roots-embedder.h b/conservative-roots-embedder.h
index c8004f00c..4d2c4fa7b 100644
--- a/conservative-roots-embedder.h
+++ b/conservative-roots-embedder.h
@@ -1,6 +1,7 @@
 #ifndef CONSERVATIVE_ROOTS_EMBEDDER_H
 #define CONSERVATIVE_ROOTS_EMBEDDER_H
 
+#include "gc-config.h"
 #include "gc-embedder-api.h"
 
 static inline int gc_has_mutator_conservative_roots(void) {
@@ -13,7 +14,7 @@ static inline int gc_has_global_conservative_roots(void) {
   return 1;
 }
 static inline int gc_has_conservative_intraheap_edges(void) {
-  return 0;
+  return GC_CONSERVATIVE_TRACE;
 }
 
 static inline int
diff --git a/gc-config.h b/gc-config.h
index 5fc27b7e5..91dd555e2 100644
--- a/gc-config.h
+++ b/gc-config.h
@@ -13,8 +13,20 @@
 #define GC_GENERATIONAL 0
 #endif
 
-#ifndef GC_PRECISE
-#define GC_PRECISE 0
+// Though you normally wouldn't configure things this way, it's possible
+// to have both precise and conservative roots.  However we have to
+// either have precise or conservative tracing; not a mix.
+
+#ifndef GC_PRECISE_ROOTS
+#define GC_PRECISE_ROOTS 0
+#endif
+
+#ifndef GC_CONSERVATIVE_ROOTS
+#define GC_CONSERVATIVE_ROOTS 0
+#endif
+
+#ifndef GC_CONSERVATIVE_TRACE
+#define GC_CONSERVATIVE_TRACE 0
 #endif
 
 #endif // GC_CONFIG_H
diff --git a/large-object-space.h b/large-object-space.h
index 6bb7a5af7..ddd1bfcde 100644
--- a/large-object-space.h
+++ b/large-object-space.h
@@ -9,6 +9,7 @@
 #include <sys/mman.h>
 #include <unistd.h>
 
+#include "gc-assert.h"
 #include "gc-ref.h"
 #include "gc-conservative-ref.h"
 #include "address-map.h"
@@ -96,6 +97,14 @@ static int large_object_space_mark_object(struct 
large_object_space *space,
   return large_object_space_copy(space, ref);
 }
 
+static inline size_t large_object_space_object_size(struct large_object_space 
*space,
+                                                    struct gc_ref ref) {
+  size_t npages = address_map_lookup(&space->object_pages,
+                                     gc_ref_value(ref), 0);
+  GC_ASSERT(npages != 0);
+  return npages * space->page_size;
+}
+
 static void large_object_space_reclaim_one(uintptr_t addr, void *data) {
   struct large_object_space *space = data;
   size_t npages = address_map_lookup(&space->object_pages, addr, 0);
diff --git a/mt-gcbench-embedder.h b/mt-gcbench-embedder.h
index 1ac42a327..110e7e05e 100644
--- a/mt-gcbench-embedder.h
+++ b/mt-gcbench-embedder.h
@@ -1,6 +1,7 @@
 #ifndef MT_GCBENCH_EMBEDDER_H
 #define MT_GCBENCH_EMBEDDER_H
 
+#include "gc-config.h"
 #include "mt-gcbench-types.h"
 
 struct gc_heap;
@@ -44,9 +45,8 @@ visit_hole_fields(Hole *obj,
                   void (*visit)(struct gc_edge edge,
                                 struct gc_heap *heap, void *visit_data),
                   struct gc_heap *heap, void *visit_data) {
-#if GC_PRECISE
-  GC_CRASH();
-#endif
+  if (GC_PRECISE_ROOTS)
+    GC_CRASH();
 }
 
 #include "simple-gc-embedder.h"
diff --git a/mt-gcbench.c b/mt-gcbench.c
index 4789a4f7d..744a7e66b 100644
--- a/mt-gcbench.c
+++ b/mt-gcbench.c
@@ -47,9 +47,10 @@
 #include "assert.h"
 #include "gc-api.h"
 #include "mt-gcbench-types.h"
-#if GC_PRECISE
+#if GC_PRECISE_ROOTS
 #include "precise-roots-api.h"
-#else
+#endif
+#if GC_CONSERVATIVE_ROOTS
 #include "conservative-roots-api.h"
 #endif
 #include "mt-gcbench-types.h"
diff --git a/parallel-tracer.h b/parallel-tracer.h
index df6cc89ae..9711ed03a 100644
--- a/parallel-tracer.h
+++ b/parallel-tracer.h
@@ -450,6 +450,8 @@ static void tracer_release(struct gc_heap *heap) {
 
 static inline void tracer_visit(struct gc_edge edge, struct gc_heap *heap,
                                 void *trace_data) GC_ALWAYS_INLINE;
+static inline void tracer_enqueue(struct gc_ref ref, struct gc_heap *heap,
+                                  void *trace_data) GC_ALWAYS_INLINE;
 static inline void trace_one(struct gc_ref ref, struct gc_heap *heap,
                              void *trace_data) GC_ALWAYS_INLINE;
 static inline int trace_edge(struct gc_heap *heap,
@@ -462,8 +464,22 @@ tracer_share(struct local_tracer *trace) {
     trace_deque_push(trace->share_deque, local_trace_queue_pop(&trace->local));
 }
 
+static inline void
+tracer_enqueue(struct gc_ref ref, struct gc_heap *heap, void *trace_data) {
+  struct local_tracer *trace = trace_data;
+  if (local_trace_queue_full(&trace->local))
+    tracer_share(trace);
+  local_trace_queue_push(&trace->local, ref);
+}
+
 static inline void
 tracer_visit(struct gc_edge edge, struct gc_heap *heap, void *trace_data) {
+  if (trace_edge(heap, edge))
+    tracer_enqueue(gc_edge_ref(edge), heap, trace_data);
+}
+
+static inline void
+tracer_visit_(struct gc_edge edge, struct gc_heap *heap, void *trace_data) {
   if (trace_edge(heap, edge)) {
     struct local_tracer *trace = trace_data;
     if (local_trace_queue_full(&trace->local))
diff --git a/quads.c b/quads.c
index 1318adf9f..b7d1bccc3 100644
--- a/quads.c
+++ b/quads.c
@@ -5,9 +5,10 @@
 
 #include "assert.h"
 #include "gc-api.h"
-#if GC_PRECISE
+#if GC_PRECISE_ROOTS
 #include "precise-roots-api.h"
-#else
+#endif
+#if GC_CONSERVATIVE_ROOTS
 #include "conservative-roots-api.h"
 #endif
 #include "quads-types.h"
diff --git a/semi.c b/semi.c
index 2c1eae600..2a3b19f23 100644
--- a/semi.c
+++ b/semi.c
@@ -11,9 +11,11 @@
 #include "semi-attrs.h"
 #include "large-object-space.h"
 
-#if GC_PRECISE
+#if GC_PRECISE_ROOTS
 #include "precise-roots-embedder.h"
-#else
+#endif
+
+#if GC_CONSERVATIVE_ROOTS
 #error semi is a precise collector
 #endif
 
diff --git a/serial-tracer.h b/serial-tracer.h
index 7c0bdcca9..d189b1c7c 100644
--- a/serial-tracer.h
+++ b/serial-tracer.h
@@ -137,6 +137,8 @@ static void tracer_release(struct gc_heap *heap) {
 
 static inline void tracer_visit(struct gc_edge edge, struct gc_heap *heap,
                                 void *trace_data) GC_ALWAYS_INLINE;
+static inline void tracer_enqueue(struct gc_ref ref, struct gc_heap *heap,
+                                  void *trace_data) GC_ALWAYS_INLINE;
 static inline void trace_one(struct gc_ref ref, struct gc_heap *heap,
                              void *trace_data) GC_ALWAYS_INLINE;
 static inline int trace_edge(struct gc_heap *heap,
@@ -152,9 +154,13 @@ tracer_enqueue_roots(struct tracer *tracer, struct gc_ref 
*objs,
   trace_queue_push_many(&tracer->queue, objs, count);
 }
 static inline void
+tracer_enqueue(struct gc_ref ref, struct gc_heap *heap, void *trace_data) {
+  tracer_enqueue_root(heap_tracer(heap), ref);
+}
+static inline void
 tracer_visit(struct gc_edge edge, struct gc_heap *heap, void *trace_data) {
   if (trace_edge(heap, edge))
-    tracer_enqueue_root(heap_tracer(heap), gc_edge_ref(edge));
+    tracer_enqueue(gc_edge_ref(edge), heap, trace_data);
 }
 static inline void
 tracer_trace(struct gc_heap *heap) {
diff --git a/simple-gc-embedder.h b/simple-gc-embedder.h
index 7b691acfa..b97d1d7f0 100644
--- a/simple-gc-embedder.h
+++ b/simple-gc-embedder.h
@@ -26,9 +26,11 @@ static inline void gc_trace_object(struct gc_ref ref,
   }
 }
 
-#if GC_PRECISE
+#if GC_PRECISE_ROOTS
 #include "precise-roots-embedder.h"
-#else
+#endif
+
+#if GC_CONSERVATIVE_ROOTS
 #include "conservative-roots-embedder.h"
 #endif
 
diff --git a/whippet.c b/whippet.c
index 5dbfc80ec..7ae78671b 100644
--- a/whippet.c
+++ b/whippet.c
@@ -26,9 +26,11 @@
 #include "spin.h"
 #include "whippet-attrs.h"
 
-#if GC_PRECISE
+#if GC_PRECISE_ROOTS
 #include "precise-roots-embedder.h"
-#else
+#endif
+
+#if GC_CONSERVATIVE_ROOTS
 #include "conservative-roots-embedder.h"
 #endif
 
@@ -371,11 +373,52 @@ static inline void clear_memory(uintptr_t addr, size_t 
size) {
 
 static void collect(struct gc_mutator *mut) GC_NEVER_INLINE;
 
-static size_t mark_space_live_object_granules(uint8_t *metadata) {
+static inline uint64_t load_eight_aligned_bytes(uint8_t *mark) {
+  GC_ASSERT(((uintptr_t)mark & 7) == 0);
+  uint8_t * __attribute__((aligned(8))) aligned_mark = mark;
+  uint64_t word;
+  memcpy(&word, aligned_mark, 8);
+#ifdef WORDS_BIGENDIAN
+  word = __builtin_bswap64(word);
+#endif
+  return word;
+}
+
+static inline size_t count_zero_bytes(uint64_t bytes) {
+  return bytes ? (__builtin_ctzll(bytes) / 8) : sizeof(bytes);
+}
+
+static uint64_t broadcast_byte(uint8_t byte) {
+  uint64_t result = byte;
+  return result * 0x0101010101010101ULL;
+}
+
+static size_t next_mark(uint8_t *mark, size_t limit, uint64_t sweep_mask) {
   size_t n = 0;
-  while ((metadata[n] & METADATA_BYTE_END) == 0)
-    n++;
-  return n + 1;
+  // If we have a hole, it is likely to be more that 8 granules long.
+  // Assuming that it's better to make aligned loads, first we align the
+  // sweep pointer, then we load aligned mark words.
+  size_t unaligned = ((uintptr_t) mark) & 7;
+  if (unaligned) {
+    uint64_t bytes = load_eight_aligned_bytes(mark - unaligned) >> (unaligned 
* 8);
+    bytes &= sweep_mask;
+    if (bytes)
+      return count_zero_bytes(bytes);
+    n += 8 - unaligned;
+  }
+
+  for(; n < limit; n += 8) {
+    uint64_t bytes = load_eight_aligned_bytes(mark + n);
+    bytes &= sweep_mask;
+    if (bytes)
+      return n + count_zero_bytes(bytes);
+  }
+
+  return limit;
+}
+
+static size_t mark_space_live_object_granules(uint8_t *metadata) {
+  return next_mark(metadata, -1, broadcast_byte(METADATA_BYTE_END)) + 1;
 }
 
 static inline int mark_space_mark_object(struct mark_space *space,
@@ -710,9 +753,18 @@ static inline struct gc_ref trace_conservative_ref(struct 
gc_heap *heap,
                                                     ref, possibly_interior);
 }
 
-static inline void trace_one(struct gc_ref ref, struct gc_heap *heap,
-                             void *mark_data) {
-  gc_trace_object(ref, tracer_visit, heap, mark_data, NULL);
+static inline size_t mark_space_object_size(struct mark_space *space,
+                                            struct gc_ref ref) {
+  uint8_t *loc = metadata_byte_for_object(ref);
+  size_t granules = mark_space_live_object_granules(loc);
+  return granules * GRANULE_SIZE;
+}
+
+static inline size_t gc_object_allocation_size(struct gc_heap *heap,
+                                               struct gc_ref ref) {
+  if (GC_LIKELY(mark_space_contains(heap_mark_space(heap), ref)))
+    return mark_space_object_size(heap_mark_space(heap), ref);
+  return large_object_space_object_size(heap_large_object_space(heap), ref);
 }
 
 static int heap_has_multiple_mutators(struct gc_heap *heap) {
@@ -1037,6 +1089,29 @@ trace_conservative_edges(uintptr_t low,
     trace(load_conservative_ref(addr), heap, data);
 }
 
+static inline void tracer_trace_conservative_ref(struct gc_conservative_ref 
ref,
+                                                 struct gc_heap *heap,
+                                                 void *data) {
+  int possibly_interior = 0;
+  struct gc_ref resolved = trace_conservative_ref(heap, ref, 
possibly_interior);
+  if (gc_ref_is_heap_object(resolved))
+    tracer_enqueue(resolved, heap, data);
+}
+
+static inline void trace_one(struct gc_ref ref, struct gc_heap *heap,
+                             void *mark_data) {
+  if (gc_has_conservative_intraheap_edges()) {
+    size_t bytes = GC_LIKELY(mark_space_contains(heap_mark_space(heap), ref))
+      ? mark_space_object_size(heap_mark_space(heap), ref)
+      : large_object_space_object_size(heap_large_object_space(heap), ref);
+    trace_conservative_edges(gc_ref_value(ref),
+                             gc_ref_value(ref) + bytes,
+                             tracer_trace_conservative_ref, heap, mark_data);
+  } else {
+    gc_trace_object(ref, tracer_visit, heap, mark_data, NULL);
+  }
+}
+
 static void
 mark_and_globally_enqueue_mutator_conservative_roots(uintptr_t low,
                                                      uintptr_t high,
@@ -1172,26 +1247,6 @@ static void trace_global_conservative_roots(struct 
gc_heap *heap) {
       (mark_and_globally_enqueue_heap_conservative_roots, heap, NULL);
 }
 
-static inline uint64_t load_eight_aligned_bytes(uint8_t *mark) {
-  GC_ASSERT(((uintptr_t)mark & 7) == 0);
-  uint8_t * __attribute__((aligned(8))) aligned_mark = mark;
-  uint64_t word;
-  memcpy(&word, aligned_mark, 8);
-#ifdef WORDS_BIGENDIAN
-  word = __builtin_bswap64(word);
-#endif
-  return word;
-}
-
-static inline size_t count_zero_bytes(uint64_t bytes) {
-  return bytes ? (__builtin_ctzll(bytes) / 8) : sizeof(bytes);
-}
-
-static uint64_t broadcast_byte(uint8_t byte) {
-  uint64_t result = byte;
-  return result * 0x0101010101010101ULL;
-}
-
 // Note that it's quite possible (and even likely) that any given remset
 // byte doesn't hold any roots, if all stores were to nursery objects.
 STATIC_ASSERT_EQ(GRANULES_PER_REMSET_BYTE % 8, 0);
@@ -1690,30 +1745,6 @@ static int sweep_word(uintptr_t *loc, uintptr_t 
sweep_mask) {
   return 0;
 }
 
-static size_t next_mark(uint8_t *mark, size_t limit, uint64_t sweep_mask) {
-  size_t n = 0;
-  // If we have a hole, it is likely to be more that 8 granules long.
-  // Assuming that it's better to make aligned loads, first we align the
-  // sweep pointer, then we load aligned mark words.
-  size_t unaligned = ((uintptr_t) mark) & 7;
-  if (unaligned) {
-    uint64_t bytes = load_eight_aligned_bytes(mark - unaligned) >> (unaligned 
* 8);
-    bytes &= sweep_mask;
-    if (bytes)
-      return count_zero_bytes(bytes);
-    n += 8 - unaligned;
-  }
-
-  for(; n < limit; n += 8) {
-    uint64_t bytes = load_eight_aligned_bytes(mark + n);
-    bytes &= sweep_mask;
-    if (bytes)
-      return n + count_zero_bytes(bytes);
-  }
-
-  return limit;
-}
-
 static uintptr_t mark_space_next_block_to_sweep(struct mark_space *space) {
   uintptr_t block = atomic_load_explicit(&space->next_block,
                                          memory_order_acquire);

Reply via email to