wingo pushed a commit to branch wip-whippet in repository guile. commit f5e10eb7c9bf925b5c565026900108aea75e6d33 Author: Andy Wingo <wi...@igalia.com> AuthorDate: Mon May 26 11:58:03 2025 +0200
mmc: Speed up conservative ref resolution --- src/nofl-space.h | 42 +++++++++++++++++++----------------------- src/swar.h | 25 +++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 23 deletions(-) diff --git a/src/nofl-space.h b/src/nofl-space.h index 97a905a33..dd7ed9071 100644 --- a/src/nofl-space.h +++ b/src/nofl-space.h @@ -1741,40 +1741,36 @@ nofl_space_mark_conservative_ref(struct nofl_space *space, uint8_t *loc = nofl_metadata_byte_for_addr(addr); uint8_t byte = atomic_load_explicit(loc, memory_order_relaxed); - // Already marked object? Nothing to do. - if (nofl_metadata_byte_has_mark(byte, space->current_mark)) - return gc_ref_null(); - - // Addr is the not start of an unmarked object? Search backwards if - // we have interior pointers, otherwise not an object. - if (!nofl_metadata_byte_is_young_or_has_mark(byte, space->survivor_mark)) { + // Not pointing to the start of an object? Scan backwards if the ref + // is possibly interior, otherwise bail. + if ((byte & NOFL_METADATA_BYTE_MARK_MASK) == 0) { if (!possibly_interior) return gc_ref_null(); uintptr_t block_base = align_down(addr, NOFL_BLOCK_SIZE); uint8_t *loc_base = nofl_metadata_byte_for_addr(block_base); - do { - // Searched past block? Not an object. - if (loc-- == loc_base) - return gc_ref_null(); + uint8_t mask = NOFL_METADATA_BYTE_MARK_MASK | NOFL_METADATA_BYTE_END; + loc = scan_backwards_for_byte_with_bits(loc, loc_base, mask); - byte = atomic_load_explicit(loc, memory_order_relaxed); - - // Ran into the end of some other allocation? Not an object, then. - if (byte & NOFL_METADATA_BYTE_END) - return gc_ref_null(); - // Object already marked? Nothing to do. - if (nofl_metadata_byte_has_mark(byte, space->current_mark)) - return gc_ref_null(); - - // Continue until we find object start. - } while (!nofl_metadata_byte_is_young_or_has_mark(byte, space->survivor_mark)); + if (!loc) + return gc_ref_null(); + byte = atomic_load_explicit(loc, memory_order_relaxed); + GC_ASSERT(byte & mask); + // Ran into the end of some other allocation? Not an object, then. + if (byte & NOFL_METADATA_BYTE_END) + return gc_ref_null(); // Found object start, and object is unmarked; adjust addr. addr = block_base + (loc - loc_base) * NOFL_GRANULE_SIZE; } - GC_ASSERT(*loc & NOFL_METADATA_BYTE_MARK_MASK); + // Object already marked? Nothing to do. + if (nofl_metadata_byte_has_mark(byte, space->current_mark)) + return gc_ref_null(); + + GC_ASSERT(nofl_metadata_byte_is_young_or_has_mark(byte, + space->survivor_mark)); + nofl_space_set_nonempty_mark(space, loc, byte, gc_ref(addr)); return gc_ref(addr); diff --git a/src/swar.h b/src/swar.h index d8598c8b5..81a1ec472 100644 --- a/src/swar.h +++ b/src/swar.h @@ -53,6 +53,31 @@ scan_for_byte_with_bits(uint8_t *ptr, size_t limit, uint8_t mask) { return limit; } +static inline uint8_t* +scan_backwards_for_byte_with_bits(uint8_t *ptr, uint8_t *base, uint8_t mask) { + GC_ASSERT_EQ (((uintptr_t)base) & 7, 0); + + size_t unaligned = ((uintptr_t) ptr) & 7; + if (unaligned) { + uint64_t bytes = + load_eight_aligned_bytes(ptr - unaligned) << ((8 - unaligned) * 8); + uint64_t match = match_bytes_against_bits(bytes, mask); + if (match) + return ptr - 1 - __builtin_clzll(match) / 8; + ptr -= unaligned; + } + + for (; ptr > base; ptr -= 8) { + GC_ASSERT(ptr >= base + 8); + uint64_t bytes = load_eight_aligned_bytes(ptr - 8); + uint64_t match = match_bytes_against_bits(bytes, mask); + if (match) + return ptr - 1 - __builtin_clzll(match) / 8; + } + + return NULL; +} + static inline uint64_t match_bytes_against_tag(uint64_t bytes, uint8_t mask, uint8_t tag) { // Precondition: tag within mask.