wingo pushed a commit to branch wip-whippet
in repository guile.

commit d63288048c3ff9cc0c597542b3882b1db0c9f52d
Author: Andy Wingo <wi...@igalia.com>
AuthorDate: Fri Mar 18 16:19:42 2022 +0100

    Add mt-gcbench
---
 Makefile           |   2 +-
 bdw.h              |  12 +-
 mt-gcbench-types.h |  10 ++
 mt-gcbench.c       | 359 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 381 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 9a33422d5..ca644681c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-TESTS=gcbench quads # MT_GCBench MT_GCBench2
+TESTS=gcbench quads mt-gcbench # MT_GCBench MT_GCBench2
 COLLECTORS=bdw semi mark-sweep parallel-mark-sweep
 
 CC=gcc
diff --git a/bdw.h b/bdw.h
index 56a1162bd..968dd22b2 100644
--- a/bdw.h
+++ b/bdw.h
@@ -55,12 +55,22 @@ static struct context* initialize_gc(size_t heap_size) {
     GC_set_max_heap_size (heap_size);
     GC_expand_hp(heap_size - current_heap_size);
   }
+  GC_allow_register_threads();
   return GC_malloc_atomic(1);
 }
 
-static inline void print_start_gc_stats(struct context *cx) {
+static struct context* initialize_gc_for_thread(uintptr_t *stack_base,
+                                                struct context *parent) {
+  struct GC_stack_base base = { stack_base };
+  GC_register_my_thread(&base);
+  return GC_malloc_atomic(1);
+}
+static void finish_gc_for_thread(struct context *cx) {
+  GC_unregister_my_thread();
 }
 
+static inline void print_start_gc_stats(struct context *cx) {
+}
 static inline void print_end_gc_stats(struct context *cx) {
   printf("Completed %ld collections\n", (long)GC_get_gc_no());
   printf("Heap size is %ld\n", (long)GC_get_heap_size());
diff --git a/mt-gcbench-types.h b/mt-gcbench-types.h
new file mode 100644
index 000000000..a61b2b7d5
--- /dev/null
+++ b/mt-gcbench-types.h
@@ -0,0 +1,10 @@
+#ifndef GCBENCH_TYPES_H
+#define GCBENCH_TYPES_H
+
+#define FOR_EACH_HEAP_OBJECT_KIND(M) \
+  M(node, Node, NODE) \
+  M(double_array, DoubleArray, DOUBLE_ARRAY)
+
+#include "heap-objects.h"
+
+#endif // GCBENCH_TYPES_H
diff --git a/mt-gcbench.c b/mt-gcbench.c
new file mode 100644
index 000000000..269df9afe
--- /dev/null
+++ b/mt-gcbench.c
@@ -0,0 +1,359 @@
+// This is adapted from a benchmark written by John Ellis and Pete Kovac
+// of Post Communications.
+// It was modified by Hans Boehm of Silicon Graphics.
+// Translated to C++ 30 May 1997 by William D Clinger of Northeastern Univ.
+// Translated to C 15 March 2000 by Hans Boehm, now at HP Labs.
+//
+//      This is no substitute for real applications.  No actual application
+//      is likely to behave in exactly this way.  However, this benchmark was
+//      designed to be more representative of real applications than other
+//      Java GC benchmarks of which we are aware.
+//      It attempts to model those properties of allocation requests that
+//      are important to current GC techniques.
+//      It is designed to be used either to obtain a single overall performance
+//      number, or to give a more detailed estimate of how collector
+//      performance varies with object lifetimes.  It prints the time
+//      required to allocate and collect balanced binary trees of various
+//      sizes.  Smaller trees result in shorter object lifetimes.  Each cycle
+//      allocates roughly the same amount of memory.
+//      Two data structures are kept around during the entire process, so
+//      that the measured performance is representative of applications
+//      that maintain some live in-memory data.  One of these is a tree
+//      containing many pointers.  The other is a large array containing
+//      double precision floating point numbers.  Both should be of comparable
+//      size.
+//
+//      The results are only really meaningful together with a specification
+//      of how much memory was used.  It is possible to trade memory for
+//      better time performance.  This benchmark should be run in a 32 MB
+//      heap, though we don't currently know how to enforce that uniformly.
+//
+//      Unlike the original Ellis and Kovac benchmark, we do not attempt
+//      measure pause times.  This facility should eventually be added back
+//      in.  There are several reasons for omitting it for now.  The original
+//      implementation depended on assumptions about the thread scheduler
+//      that don't hold uniformly.  The results really measure both the
+//      scheduler and GC.  Pause time measurements tend to not fit well with
+//      current benchmark suites.  As far as we know, none of the current
+//      commercial Java implementations seriously attempt to minimize GC pause
+//      times.
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+
+#include "assert.h"
+#include "gcbench-types.h"
+#include "gc.h"
+#include "inline.h"
+
+#define MAX_THREAD_COUNT 256
+
+static const int long_lived_tree_depth = 16; // about 4Mb
+static const int array_size = 500000; // about 4Mb
+static const int min_tree_depth = 4;
+static const int max_tree_depth = 16;
+
+struct Node {
+  GC_HEADER;
+  struct Node * left;
+  struct Node * right;
+  int i, j;
+};
+
+struct DoubleArray {
+  GC_HEADER;
+  size_t length;
+  double values[0];
+};
+
+static inline size_t node_size(Node *obj) {
+  return sizeof(Node);
+}
+static inline size_t double_array_size(DoubleArray *array) {
+  return sizeof(*array) + array->length * sizeof(double);
+}
+static inline void
+visit_node_fields(Node *node,
+                  void (*visit)(void **loc, void *visit_data),
+                  void *visit_data) {
+  visit((void**)&node->left, visit_data);
+  visit((void**)&node->right, visit_data);
+}
+static inline void
+visit_double_array_fields(DoubleArray *obj,
+                          void (*visit)(void **loc, void *visit_data),
+                          void *visit_data) {
+}
+
+typedef HANDLE_TO(Node) NodeHandle;
+typedef HANDLE_TO(DoubleArray) DoubleArrayHandle;
+
+static Node* allocate_node(struct context *cx) {
+  // memset to 0 by the collector.
+  return allocate(cx, ALLOC_KIND_NODE, sizeof (Node));
+}
+
+static DoubleArray* allocate_double_array(struct context *cx,
+                                                 size_t size) {
+  // May be uninitialized.
+  DoubleArray *ret =
+    allocate_pointerless(cx, ALLOC_KIND_DOUBLE_ARRAY,
+                         sizeof(DoubleArray) + sizeof (double) * size);
+  ret->length = size;
+  return ret;
+}
+
+static unsigned long current_time(void)
+{
+  struct timeval t = { 0 };
+  gettimeofday(&t, NULL);
+  return t.tv_sec * 1000 * 1000 + t.tv_usec;
+}
+
+static double elapsed_millis(unsigned long start) {
+  return (current_time() - start) * 1e-3;
+}
+
+// Nodes used by a tree of a given size
+static int tree_size(int i) {
+  return ((1 << (i + 1)) - 1);
+}
+
+// Number of iterations to use for a given tree depth
+static int compute_num_iters(int i) {
+  return 2 * tree_size(max_tree_depth + 2) / tree_size(i);
+}
+
+// Build tree top down, assigning to older objects.
+static void populate(struct context *cx, int depth, Node *node) {
+  if (depth <= 0)
+    return;
+
+  NodeHandle self = { node };
+  PUSH_HANDLE(cx, self);
+  NodeHandle l = { allocate_node(cx) };
+  PUSH_HANDLE(cx, l);
+  NodeHandle r = { allocate_node(cx) };
+  PUSH_HANDLE(cx, r);
+
+  set_field((void**)&HANDLE_REF(self)->left, HANDLE_REF(l));
+  set_field((void**)&HANDLE_REF(self)->right, HANDLE_REF(r));
+
+  populate(cx, depth-1, HANDLE_REF(self)->left);
+  populate(cx, depth-1, HANDLE_REF(self)->right);
+
+  POP_HANDLE(cx);
+  POP_HANDLE(cx);
+  POP_HANDLE(cx);
+}
+
+// Build tree bottom-up
+static Node* make_tree(struct context *cx, int depth) {
+  if (depth <= 0)
+    return allocate_node(cx);
+
+  NodeHandle left = { make_tree(cx, depth-1) };
+  PUSH_HANDLE(cx, left);
+  NodeHandle right = { make_tree(cx, depth-1) };
+  PUSH_HANDLE(cx, right);
+
+  Node *result = allocate_node(cx);
+  init_field((void**)&result->left, HANDLE_REF(left));
+  init_field((void**)&result->right, HANDLE_REF(right));
+
+  POP_HANDLE(cx);
+  POP_HANDLE(cx);
+
+  return result;
+}
+
+static void validate_tree(Node *tree, int depth) {
+#ifndef NDEBUG
+  ASSERT_EQ(tree->i, 0);
+  ASSERT_EQ(tree->j, 0);
+  if (depth == 0) {
+    ASSERT(!tree->left);
+    ASSERT(!tree->right);
+  } else {
+    ASSERT(tree->left);
+    ASSERT(tree->right);
+    validate_tree(tree->left, depth - 1);
+    validate_tree(tree->right, depth - 1);
+  }
+#endif
+}
+
+static void time_construction(struct context *cx, int depth) {
+  int num_iters = compute_num_iters(depth);
+  NodeHandle temp_tree = { NULL };
+  PUSH_HANDLE(cx, temp_tree);
+
+  printf("Creating %d trees of depth %d\n", num_iters, depth);
+
+  {
+    unsigned long start = current_time();
+    for (int i = 0; i < num_iters; ++i) {
+      HANDLE_SET(temp_tree, allocate_node(cx));
+      populate(cx, depth, HANDLE_REF(temp_tree));
+      validate_tree(HANDLE_REF(temp_tree), depth);
+      HANDLE_SET(temp_tree, NULL);
+    }
+    printf("\tTop down construction took %.3f msec\n",
+           elapsed_millis(start));
+  }
+
+  {
+    long start = current_time();
+    for (int i = 0; i < num_iters; ++i) {
+      HANDLE_SET(temp_tree, make_tree(cx, depth));
+      validate_tree(HANDLE_REF(temp_tree), depth);
+      HANDLE_SET(temp_tree, NULL);
+    }
+    printf("\tBottom up construction took %.3f msec\n",
+           elapsed_millis(start));
+  }
+
+  POP_HANDLE(cx);
+}
+
+static void* call_with_stack_base(void* (*)(uintptr_t*, void*), void*) 
NEVER_INLINE;
+static void* call_with_stack_base_inner(void* (*)(uintptr_t*, void*), 
uintptr_t*, void*) NEVER_INLINE;
+static void* call_with_stack_base_inner(void* (*f)(uintptr_t *stack_base, void 
*arg),
+                                        uintptr_t *stack_base, void *arg) {
+  return f(stack_base, arg);
+}
+static void* call_with_stack_base(void* (*f)(uintptr_t *stack_base, void *arg),
+                                  void *arg) {
+  uintptr_t x;
+  return call_with_stack_base_inner(f, &x, arg);
+}
+
+struct call_with_gc_data {
+  void* (*f)(struct context *);
+  struct context *parent;
+};
+static void* call_with_gc_inner(uintptr_t *stack_base, void *arg) {
+  struct call_with_gc_data *data = arg;
+  struct context *cx = initialize_gc_for_thread(stack_base, data->parent);
+  void *ret = data->f(cx);
+  finish_gc_for_thread(cx);
+  return ret;
+}
+static void* call_with_gc(void* (*f)(struct context *),
+                          struct context *parent) {
+  struct call_with_gc_data data = { f, parent };
+  return call_with_stack_base(call_with_gc_inner, &data);
+}
+
+static void* run_one_test(struct context *cx) {
+  NodeHandle long_lived_tree = { NULL };
+  NodeHandle temp_tree = { NULL };
+  DoubleArrayHandle array = { NULL };
+
+  PUSH_HANDLE(cx, long_lived_tree);
+  PUSH_HANDLE(cx, temp_tree);
+  PUSH_HANDLE(cx, array);
+
+  // Create a long lived object
+  printf(" Creating a long-lived binary tree of depth %d\n",
+         long_lived_tree_depth);
+  HANDLE_SET(long_lived_tree, allocate_node(cx));
+  populate(cx, long_lived_tree_depth, HANDLE_REF(long_lived_tree));
+
+  // Create long-lived array, filling half of it
+  printf(" Creating a long-lived array of %d doubles\n", array_size);
+  HANDLE_SET(array, allocate_double_array(cx, array_size));
+  for (int i = 0; i < array_size/2; ++i) {
+    HANDLE_REF(array)->values[i] = 1.0/i;
+  }
+
+  for (int d = min_tree_depth; d <= max_tree_depth; d += 2) {
+    time_construction(cx, d);
+  }
+
+  validate_tree(HANDLE_REF(long_lived_tree), long_lived_tree_depth);
+
+  // Fake reference to LongLivedTree and array to keep them from being 
optimized
+  // away.
+  if (HANDLE_REF(long_lived_tree) == 0
+      || HANDLE_REF(array)->values[1000] != 1.0/1000)
+    fprintf(stderr, "Failed\n");
+
+  POP_HANDLE(cx);
+  POP_HANDLE(cx);
+  POP_HANDLE(cx);
+  return NULL;
+}
+
+static void* run_one_test_in_thread(void *arg) {
+  struct context *parent_cx = arg;
+  return call_with_gc(run_one_test, parent_cx);
+}
+
+int main(int argc, char *argv[]) {
+  // Define size of Node without any GC header.
+  size_t sizeof_node = 2 * sizeof(Node*) + 2 * sizeof(int);
+  size_t sizeof_double_array = sizeof(size_t);
+  size_t heap_max_live =
+    tree_size(long_lived_tree_depth) * sizeof_node +
+    tree_size(max_tree_depth) * sizeof_node +
+    sizeof_double_array + sizeof(double) * array_size;
+  if (argc != 3) {
+    fprintf(stderr, "usage: %s MULTIPLIER NTHREADS\n", argv[0]);
+    return 1;
+  }
+
+  double multiplier = atof(argv[1]);
+  size_t nthreads = atol(argv[2]);
+
+  if (!(1.0 < multiplier && multiplier < 100)) {
+    fprintf(stderr, "Failed to parse heap multiplier '%s'\n", argv[2]);
+    return 1;
+  }
+  if (nthreads < 1 || nthreads > MAX_THREAD_COUNT) {
+    fprintf(stderr, "Expected integer between 1 and %d for thread count, got 
'%s'\n",
+            (int)MAX_THREAD_COUNT, argv[2]);
+    return 1;
+  }
+
+  size_t heap_size = heap_max_live * multiplier * nthreads;
+  struct context *cx = initialize_gc(heap_size);
+  if (!cx) {
+    fprintf(stderr, "Failed to initialize GC with heap size %zu bytes\n",
+            heap_size);
+    return 1;
+  }
+
+  printf("Garbage Collector Test\n");
+  printf(" Live storage will peak at %zd bytes.\n\n", heap_max_live);
+  print_start_gc_stats(cx);
+
+  unsigned long start = current_time();
+        
+  pthread_t threads[MAX_THREAD_COUNT];
+  // Run one of the threads in the main thread.
+  for (size_t i = 1; i < nthreads; i++) {
+    int status = pthread_create(&threads[i], NULL, run_one_test_in_thread, cx);
+    if (status) {
+      errno = status;
+      perror("Failed to create thread");
+      return 1;
+    }
+  }
+  run_one_test(cx);
+  for (size_t i = 1; i < nthreads; i++) {
+    int status = pthread_join(threads[i], NULL);
+    if (status) {
+      errno = status;
+      perror("Failed to join thread");
+      return 1;
+    }
+  }
+  
+  printf("Completed in %.3f msec\n", elapsed_millis(start));
+  print_end_gc_stats(cx);
+}
+

Reply via email to