wingo pushed a commit to branch wip-whippet
in repository guile.

commit f5e10eb7c9bf925b5c565026900108aea75e6d33
Author: Andy Wingo <wi...@igalia.com>
AuthorDate: Mon May 26 11:58:03 2025 +0200

    mmc: Speed up conservative ref resolution
---
 src/nofl-space.h | 42 +++++++++++++++++++-----------------------
 src/swar.h       | 25 +++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 23 deletions(-)

diff --git a/src/nofl-space.h b/src/nofl-space.h
index 97a905a33..dd7ed9071 100644
--- a/src/nofl-space.h
+++ b/src/nofl-space.h
@@ -1741,40 +1741,36 @@ nofl_space_mark_conservative_ref(struct nofl_space 
*space,
   uint8_t *loc = nofl_metadata_byte_for_addr(addr);
   uint8_t byte = atomic_load_explicit(loc, memory_order_relaxed);
 
-  // Already marked object?  Nothing to do.
-  if (nofl_metadata_byte_has_mark(byte, space->current_mark))
-    return gc_ref_null();
-
-  // Addr is the not start of an unmarked object?  Search backwards if
-  // we have interior pointers, otherwise not an object.
-  if (!nofl_metadata_byte_is_young_or_has_mark(byte, space->survivor_mark)) {
+  // Not pointing to the start of an object?  Scan backwards if the ref
+  // is possibly interior, otherwise bail.
+  if ((byte & NOFL_METADATA_BYTE_MARK_MASK) == 0) {
     if (!possibly_interior)
       return gc_ref_null();
 
     uintptr_t block_base = align_down(addr, NOFL_BLOCK_SIZE);
     uint8_t *loc_base = nofl_metadata_byte_for_addr(block_base);
-    do {
-      // Searched past block?  Not an object.
-      if (loc-- == loc_base)
-        return gc_ref_null();
+    uint8_t mask = NOFL_METADATA_BYTE_MARK_MASK | NOFL_METADATA_BYTE_END;
+    loc = scan_backwards_for_byte_with_bits(loc, loc_base, mask);
 
-      byte = atomic_load_explicit(loc, memory_order_relaxed);
-
-      // Ran into the end of some other allocation?  Not an object, then.
-      if (byte & NOFL_METADATA_BYTE_END)
-        return gc_ref_null();
-      // Object already marked?  Nothing to do.
-      if (nofl_metadata_byte_has_mark(byte, space->current_mark))
-        return gc_ref_null();
-
-      // Continue until we find object start.
-    } while (!nofl_metadata_byte_is_young_or_has_mark(byte, 
space->survivor_mark));
+    if (!loc)
+      return gc_ref_null();
 
+    byte = atomic_load_explicit(loc, memory_order_relaxed);
+    GC_ASSERT(byte & mask);
+    // Ran into the end of some other allocation?  Not an object, then.
+    if (byte & NOFL_METADATA_BYTE_END)
+      return gc_ref_null();
     // Found object start, and object is unmarked; adjust addr.
     addr = block_base + (loc - loc_base) * NOFL_GRANULE_SIZE;
   }
 
-  GC_ASSERT(*loc & NOFL_METADATA_BYTE_MARK_MASK);
+  // Object already marked?  Nothing to do.
+  if (nofl_metadata_byte_has_mark(byte, space->current_mark))
+    return gc_ref_null();
+
+  GC_ASSERT(nofl_metadata_byte_is_young_or_has_mark(byte,
+                                                    space->survivor_mark));
+
   nofl_space_set_nonempty_mark(space, loc, byte, gc_ref(addr));
 
   return gc_ref(addr);
diff --git a/src/swar.h b/src/swar.h
index d8598c8b5..81a1ec472 100644
--- a/src/swar.h
+++ b/src/swar.h
@@ -53,6 +53,31 @@ scan_for_byte_with_bits(uint8_t *ptr, size_t limit, uint8_t 
mask) {
   return limit;
 }
 
+static inline uint8_t*
+scan_backwards_for_byte_with_bits(uint8_t *ptr, uint8_t *base, uint8_t mask) {
+  GC_ASSERT_EQ (((uintptr_t)base) & 7, 0);
+
+  size_t unaligned = ((uintptr_t) ptr) & 7;
+  if (unaligned) {
+    uint64_t bytes =
+      load_eight_aligned_bytes(ptr - unaligned) << ((8 - unaligned) * 8);
+    uint64_t match = match_bytes_against_bits(bytes, mask);
+    if (match)
+      return ptr - 1 - __builtin_clzll(match) / 8;
+    ptr -= unaligned;
+  }
+
+  for (; ptr > base; ptr -= 8) {
+    GC_ASSERT(ptr >= base + 8);
+    uint64_t bytes = load_eight_aligned_bytes(ptr - 8);
+    uint64_t match = match_bytes_against_bits(bytes, mask);
+    if (match)
+      return ptr - 1 - __builtin_clzll(match) / 8;
+  }
+
+  return NULL;
+}
+
 static inline uint64_t
 match_bytes_against_tag(uint64_t bytes, uint8_t mask, uint8_t tag) {
   // Precondition: tag within mask.

Reply via email to