wingo pushed a commit to branch wip-whippet
in repository guile.

commit 619a49ba410e373d2b689123de3a01df6a3a9d7e
Author: Andy Wingo <wi...@igalia.com>
AuthorDate: Wed Apr 13 21:43:18 2022 +0200

    Add large object space
    
    Not wired up yet.
---
 address-set.h        |  40 ++++++++---
 large-object-space.h | 188 +++++++++++++++++++++++++++++++++++++++++++++++++++
 mt-gcbench.c         |   2 +-
 semi.h               |  31 ++++++---
 4 files changed, 241 insertions(+), 20 deletions(-)

diff --git a/address-set.h b/address-set.h
index 5bbcb6168..74bc08888 100644
--- a/address-set.h
+++ b/address-set.h
@@ -136,13 +136,14 @@ static int hash_set_contains(struct hash_set *set, 
uintptr_t v) {
   }
   return 0;
 }
-static inline void hash_set_for_each (struct hash_set *set,
-                                      void (*f)(uintptr_t, void*), void *data) 
__attribute__((always_inline));
-static inline void hash_set_for_each(struct hash_set *set,
-                                     void (*f)(uintptr_t, void*), void *data) {
+static inline void hash_set_find(struct hash_set *set,
+                                 int (*f)(uintptr_t, void*), void *data) 
__attribute__((always_inline));
+static inline void hash_set_find(struct hash_set *set,
+                                 int (*f)(uintptr_t, void*), void *data) {
   for (size_t i = 0; i < set->size; i++)
     if (!hash_set_slot_is_empty(set, i))
-      f(hash_set_slot_ref(set, i), data);
+      if (f(hash_set_slot_ref(set, i), data))
+        return;
 }
   
 struct address_set {
@@ -178,16 +179,33 @@ struct address_set_for_each_data {
   void (*f)(uintptr_t, void *);
   void *data;
 };
-static void address_set_do_for_each(uintptr_t v, void *data) {
+static int address_set_do_for_each(uintptr_t v, void *data) {
   struct address_set_for_each_data *for_each_data = data;
   for_each_data->f(unhash_address(v), for_each_data->data);
+  return 0;
 }
-static inline void address_set_for_each (struct address_set *set,
-                                         void (*f)(uintptr_t, void*), void 
*data) __attribute__((always_inline));
-static inline void address_set_for_each (struct address_set *set,
-                                         void (*f)(uintptr_t, void*), void 
*data) {
+static inline void address_set_for_each(struct address_set *set,
+                                        void (*f)(uintptr_t, void*), void 
*data) __attribute__((always_inline));
+static inline void address_set_for_each(struct address_set *set,
+                                        void (*f)(uintptr_t, void*), void 
*data) {
   struct address_set_for_each_data for_each_data = { f, data };
-  hash_set_for_each(&set->hash_set, address_set_do_for_each, &for_each_data);
+  hash_set_find(&set->hash_set, address_set_do_for_each, &for_each_data);
+}
+
+struct address_set_find_data {
+  int (*f)(uintptr_t, void *);
+  void *data;
+};
+static int address_set_do_find(uintptr_t v, void *data) {
+  struct address_set_find_data *find_data = data;
+  return find_data->f(unhash_address(v), find_data->data);
+}
+static inline void address_set_find(struct address_set *set,
+                                    int (*f)(uintptr_t, void*), void *data) 
__attribute__((always_inline));
+static inline void address_set_find(struct address_set *set,
+                                    int (*f)(uintptr_t, void*), void *data) {
+  struct address_set_find_data find_data = { f, data };
+  hash_set_find(&set->hash_set, address_set_do_find, &find_data);
 }
 
 #endif // ADDRESS_SET_H
diff --git a/large-object-space.h b/large-object-space.h
new file mode 100644
index 000000000..bcd36e9e9
--- /dev/null
+++ b/large-object-space.h
@@ -0,0 +1,188 @@
+#ifndef LARGE_OBJECT_SPACE_H
+#define LARGE_OBJECT_SPACE_H
+
+#include <pthread.h>
+#include <malloc.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "address-map.h"
+#include "address-set.h"
+
+// Logically the large object space is a treadmill space -- somewhat like a
+// copying collector, in that we allocate into tospace, and collection flips
+// tospace to fromspace, except that we just keep a record on the side of which
+// objects are in which space.  That way we slot into the abstraction of a
+// copying collector while not actually copying data.
+
+struct heap;
+struct gcobj;
+
+struct large_object_space {
+  struct heap *heap;
+  
+  pthread_mutex_t lock;
+
+  size_t page_size;
+  size_t page_size_log2;
+  size_t total_pages;
+  size_t free_pages;
+  size_t live_pages_at_last_collection;
+
+  struct address_set from_space;
+  struct address_set to_space;
+  struct address_set free_space;
+  struct address_map object_pages; // for each object: size in pages.
+  struct address_map predecessors; // subsequent addr -> object addr
+};
+
+static int large_object_space_init(struct large_object_space *space,
+                                   struct heap *heap) {
+  pthread_mutex_init(&space->lock, NULL);
+  space->page_size = getpagesize();
+  space->page_size_log2 = __builtin_clz(space->page_size);
+  address_set_init(&space->from_space);
+  address_set_init(&space->to_space);
+  address_set_init(&space->free_space);
+  address_map_init(&space->object_pages);
+  address_map_init(&space->predecessors);
+  return 1;
+}
+
+static void large_object_space_start_gc(struct large_object_space *space) {
+  // Flip.  Note that when we flip, fromspace is empty, but it might have
+  // allocated storage, so we do need to do a proper swap.
+  struct address_set tmp;
+  memcpy(&tmp, &space->from_space, sizeof(tmp));
+  memcpy(&space->from_space, &space->to_space, sizeof(tmp));
+  memcpy(&space->to_space, &tmp, sizeof(tmp));
+  space->live_pages_at_last_collection = 0;
+}
+
+static int large_object_space_copy(struct large_object_space *space,
+                                   uintptr_t addr) {
+  int copied = 0;
+  pthread_mutex_lock(&space->lock);
+  if (!address_set_contains(&space->from_space, addr))
+    // Already copied; object is grey or white.
+    goto done;
+  space->live_pages_at_last_collection +=
+    address_map_lookup(&space->object_pages, addr, 0);
+  address_set_remove(&space->from_space, addr);
+  address_set_add(&space->to_space, addr);
+  // Object should be placed on mark stack for visiting its fields.  (While on
+  // mark stack it's actually grey, not black.)
+  copied = 1;
+done:
+  pthread_mutex_unlock(&space->lock);
+  return copied;
+}
+
+static void large_object_space_reclaim_one(uintptr_t addr, void *data) {
+  struct large_object_space *space = data;
+  size_t npages = address_map_lookup(&space->object_pages, addr, 0);
+  // Release the pages to the OS, and cause them to be zero on next use.
+  madvise((void*) addr, npages * space->page_size, MADV_DONTNEED);
+  size_t did_merge = 0;
+  uintptr_t pred = address_map_lookup(&space->predecessors, addr, 0);
+  uintptr_t succ = addr + npages * space->page_size;
+  if (pred && address_set_contains(&space->free_space, pred)) {
+    // Merge with free predecessor.
+    address_map_remove(&space->predecessors, addr);
+    address_map_remove(&space->object_pages, addr);
+    addr = pred;
+    npages += address_map_lookup(&space->object_pages, addr, 0);
+    did_merge = 1;
+  } else {
+    // Otherwise this is a new free object.
+    address_set_add(&space->free_space, addr);
+  }
+  if (address_set_contains(&space->free_space, succ)) {
+    // Merge with free successor.
+    size_t succ_npages = address_map_lookup(&space->object_pages, succ, 0);
+    address_map_remove(&space->predecessors, succ);
+    address_map_remove(&space->object_pages, succ);
+    address_set_remove(&space->free_space, succ);
+    npages += succ_npages;
+    succ += succ_npages * space->page_size;
+    did_merge = 1;
+  }
+  if (did_merge) {
+    // Update extents.
+    address_map_add(&space->object_pages, addr, npages);
+    address_map_add(&space->predecessors, succ, addr);
+  }
+}
+
+static void large_object_space_finish_gc(struct large_object_space *space) {
+  pthread_mutex_lock(&space->lock);
+  address_set_for_each(&space->from_space, large_object_space_reclaim_one,
+                       space);
+  address_set_clear(&space->from_space);
+  space->free_pages = space->total_pages - 
space->live_pages_at_last_collection;
+  pthread_mutex_unlock(&space->lock);
+}
+
+static inline int large_object_space_contains(struct large_object_space *space,
+                                              struct gcobj *ptr) {
+  int ret;
+  pthread_mutex_lock(&space->lock);
+  // ptr might be in fromspace or tospace.  Just check the object_pages table, 
which
+  // contains both, as well as object_pages for free blocks.
+  ret = address_map_contains(&space->object_pages, (uintptr_t)ptr);
+  pthread_mutex_unlock(&space->lock);
+  return ret;
+}
+
+struct large_object_space_candidate {
+  struct large_object_space *space;
+  size_t min_npages;
+  uintptr_t addr;
+  size_t npages;
+};
+
+static int large_object_space_best_fit(uintptr_t addr, void *data) {
+  struct large_object_space_candidate *found = data;
+  size_t npages = address_map_lookup(&found->space->object_pages, addr, 0);
+  if (npages < found->min_npages)
+    return 0;
+  if (npages >= found->npages)
+    return 0;
+  found->addr = addr;
+  found->npages = npages;
+  return found->min_npages == npages;
+}
+    
+static inline void* large_object_space_alloc(struct large_object_space *space,
+                                             size_t size) {
+  void *ret;
+  pthread_mutex_lock(&space->lock);
+  ret = NULL;
+  size_t npages = (size + space->page_size - 1) >> space->page_size_log2;
+  struct large_object_space_candidate found = { space, npages, 0, -1 };
+  address_set_find(&space->free_space, large_object_space_best_fit, &found);
+  if (found.addr) {
+    uintptr_t addr = found.addr;
+    ret = (void*)addr;
+    address_set_remove(&space->free_space, addr);
+    address_set_add(&space->to_space, addr);
+
+    if (found.npages > npages) {
+      uintptr_t succ = addr + npages * space->page_size;
+      uintptr_t succ_succ = succ + (found.npages - npages) * space->page_size;
+      address_map_add(&space->object_pages, addr, npages);
+      address_map_add(&space->object_pages, succ, found.npages - npages);
+      address_set_add(&space->free_space, succ);
+      address_map_add(&space->predecessors, succ, addr);
+      address_map_add(&space->predecessors, succ_succ, succ);
+    }
+    space->free_pages -= npages;
+  }
+  pthread_mutex_unlock(&space->lock);
+  return ret;
+}
+
+#endif // LARGE_OBJECT_SPACE_H
diff --git a/mt-gcbench.c b/mt-gcbench.c
index b80a0c83c..aa9177b98 100644
--- a/mt-gcbench.c
+++ b/mt-gcbench.c
@@ -45,7 +45,7 @@
 #include <sys/time.h>
 
 #include "assert.h"
-#include "gcbench-types.h"
+#include "mt-gcbench-types.h"
 #include "gc.h"
 #include "inline.h"
 
diff --git a/semi.h b/semi.h
index 096c57591..3661c7ace 100644
--- a/semi.h
+++ b/semi.h
@@ -5,6 +5,7 @@
 #include <sys/mman.h>
 #include <unistd.h>
 
+#include "large-object-space.h"
 #include "precise-roots.h"
 
 struct semi_space {
@@ -16,6 +17,7 @@ struct semi_space {
 };
 struct heap {
   struct semi_space semi_space;
+  struct large_object_space large_object_space;
 };
 // One mutator per space, can just store the heap in the mutator.
 struct mutator {
@@ -29,6 +31,9 @@ static inline struct heap* mutator_heap(struct mutator *mut) {
 static inline struct semi_space* heap_semi_space(struct heap *heap) {
   return &heap->semi_space;
 }
+static inline struct large_object_space* heap_large_object_space(struct heap 
*heap) {
+  return &heap->large_object_space;
+}
 static inline struct semi_space* mutator_semi_space(struct mutator *mut) {
   return heap_semi_space(mutator_heap(mut));
 }
@@ -172,24 +177,34 @@ static inline void* get_field(void **addr) {
   return *addr;
 }
 
-static int initialize_gc(size_t heap_size, struct heap **heap,
-                         struct mutator **mut) {
-  void *mem = mmap(NULL, heap_size, PROT_READ|PROT_WRITE,
+static int initialize_semi_space(struct semi_space *space, size_t size) {
+  void *mem = mmap(NULL, size, PROT_READ|PROT_WRITE,
                    MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
   if (mem == MAP_FAILED) {
     perror("mmap failed");
     return 0;
   }
 
+  space->hp = space->base = (uintptr_t) mem;
+  space->size = size;
+  space->count = -1;
+  flip(space);
+
+  return 1;
+}
+  
+static int initialize_gc(size_t heap_size, struct heap **heap,
+                         struct mutator **mut) {
   *mut = calloc(1, sizeof(struct mutator));
   if (!*mut) abort();
   *heap = mutator_heap(*mut);
-  struct semi_space *space = mutator_semi_space(*mut);
 
-  space->hp = space->base = (uintptr_t) mem;
-  space->size = heap_size;
-  space->count = -1;
-  flip(space);
+  struct semi_space *space = mutator_semi_space(*mut);
+  if (!initialize_semi_space(space, heap_size))
+    return 0;
+  if (!large_object_space_init(heap_large_object_space(*heap), *heap))
+    return 0;
+  
   (*mut)->roots = NULL;
 
   return 1;

Reply via email to