wingo pushed a commit to branch wip-whippet in repository guile. commit 95868c70a2c94472f16b6f8fd2f542b3d87a99b3 Author: Andy Wingo <wi...@igalia.com> AuthorDate: Mon Jan 6 11:11:10 2025 +0100
Add splay tree --- src/splay-tree.h | 258 +++++++++++++++++++++++++++++++++++++++++++++++++ test/test-splay-tree.c | 116 ++++++++++++++++++++++ 2 files changed, 374 insertions(+) diff --git a/src/splay-tree.h b/src/splay-tree.h new file mode 100644 index 000000000..f4e41af18 --- /dev/null +++ b/src/splay-tree.h @@ -0,0 +1,258 @@ +// A splay tree, originally derived from Octane's `splay.js', whose +// copyright is as follows: +// +// Copyright 2009 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// The splay tree has been modified to allow nodes to store spans of +// keys, for example so that we can look up an object given any address +// pointing into that object. + +#ifndef SPLAY_TREE_PREFIX +#error define SPLAY_TREE_PREFIX before including splay-tree.h +#endif + +#include <malloc.h> +#include <stdint.h> +#include <string.h> + +#include "gc-assert.h" + +#define SPLAY___(p, n) p ## n +#define SPLAY__(p, n) SPLAY___(p, n) +#define SPLAY_(n) SPLAY__(SPLAY_TREE_PREFIX, n) + +// Data types used by the splay tree. +#define SPLAY_KEY_SPAN SPLAY_(key_span) +#define SPLAY_KEY SPLAY_(key) +#define SPLAY_VALUE SPLAY_(value) + +// Functions used by the splay tree. +// key_span, key -> -1|0|1 +#define SPLAY_COMPARE SPLAY_(compare) +// key_span -> key +#define SPLAY_SPAN_START SPLAY_(span_start) + +// Data types defined by the splay tree. +#define SPLAY_TREE SPLAY_(tree) +#define SPLAY_NODE SPLAY_(node) + +// Functions defined by the splay tree. +#define SPLAY_NODE_NEW SPLAY_(node_new) +#define SPLAY_INIT SPLAY_(tree_init) +#define SPLAY_SPLAY SPLAY_(tree_splay) +#define SPLAY_PREVIOUS SPLAY_(tree_previous) +#define SPLAY_LOOKUP SPLAY_(tree_lookup) +#define SPLAY_CONTAINS SPLAY_(tree_contains) +#define SPLAY_INSERT SPLAY_(tree_insert) +#define SPLAY_REMOVE SPLAY_(tree_remove) + +struct SPLAY_NODE { + SPLAY_KEY_SPAN key; + SPLAY_VALUE value; + struct SPLAY_NODE *left; + struct SPLAY_NODE *right; +}; + +struct SPLAY_TREE { + struct SPLAY_NODE *root; +}; + +static inline struct SPLAY_NODE* +SPLAY_NODE_NEW(SPLAY_KEY_SPAN key, SPLAY_VALUE value) { + struct SPLAY_NODE *ret = malloc(sizeof(*ret)); + if (!ret) GC_CRASH(); + ret->key = key; + ret->value = value; + ret->left = ret->right = NULL; + return ret; +} + +static inline void +SPLAY_INIT(struct SPLAY_TREE *tree) { + tree->root = NULL; +} + +static struct SPLAY_NODE* +SPLAY_SPLAY(struct SPLAY_TREE *tree, SPLAY_KEY key) { + struct SPLAY_NODE *current = tree->root; + if (!current) + return NULL; + // The use of the dummy node is a bit counter-intuitive: The right + // child of the dummy node will hold the L tree of the algorithm. The + // left child of the dummy node will hold the R tree of the algorithm. + // Using a dummy node, left and right will always be nodes and we + // avoid special cases. + struct SPLAY_NODE dummy; + memset(&dummy, 0, sizeof(dummy)); + struct SPLAY_NODE *left = &dummy; + struct SPLAY_NODE *right = &dummy; + +loop: + switch (SPLAY_COMPARE(key, current->key)) { + case -1: + if (!current->left) + break; + if (SPLAY_COMPARE(key, current->left->key) < 0LL) { + // Rotate right. + struct SPLAY_NODE *tmp = current->left; + current->left = tmp->right; + tmp->right = current; + current = tmp; + if (!current->left) + break; + } + // Link right. + right->left = current; + right = current; + current = current->left; + goto loop; + + case 0: + break; + + case 1: + if (!current->right) + break; + if (SPLAY_COMPARE(key, current->right->key) > 0LL) { + // Rotate left. + struct SPLAY_NODE *tmp = current->right; + current->right = tmp->left; + tmp->left = current; + current = tmp; + if (!current->right) + break; + } + // Link left. + left->right = current; + left = current; + current = current->right; + goto loop; + + default: + GC_CRASH(); + } + + left->right = current->left; + right->left = current->right; + current->left = dummy.right; + current->right = dummy.left; + tree->root = current; + return current; +} + +static inline struct SPLAY_NODE* +SPLAY_PREVIOUS(struct SPLAY_NODE *node) { + node = node->left; + if (!node) return NULL; + while (node->right) + node = node->right; + return node; +} + +static inline struct SPLAY_NODE* +SPLAY_LOOKUP(struct SPLAY_TREE *tree, SPLAY_KEY key) { + struct SPLAY_NODE *node = SPLAY_SPLAY(tree, key); + if (node && SPLAY_COMPARE(key, node->key) == 0) + return node; + return NULL; +} + +static inline int +SPLAY_CONTAINS(struct SPLAY_TREE *tree, SPLAY_KEY key) { + return !!SPLAY_LOOKUP(tree, key); +} + +static inline struct SPLAY_NODE* +SPLAY_INSERT(struct SPLAY_TREE* tree, SPLAY_KEY_SPAN key, SPLAY_VALUE value) { + if (!tree->root) { + tree->root = SPLAY_NODE_NEW(key, value); + return tree->root; + } + SPLAY_KEY scalar = SPLAY_SPAN_START(key); + struct SPLAY_NODE *node = SPLAY_SPLAY(tree, scalar); + switch (SPLAY_COMPARE(scalar, node->key)) { + case -1: + node = SPLAY_NODE_NEW(key, value); + node->right = tree->root; + node->left = tree->root->left; + tree->root->left = NULL; + tree->root = node; + break; + case 0: + GC_ASSERT(memcmp(&key, &node->key, sizeof(SPLAY_KEY_SPAN)) == 0); + node->value = value; + break; + case 1: + node = SPLAY_NODE_NEW(key, value); + node->left = tree->root; + node->right = tree->root->right; + tree->root->right = NULL; + tree->root = node; + break; + default: + GC_CRASH(); + } + return node; +} + +static inline SPLAY_VALUE +SPLAY_REMOVE(struct SPLAY_TREE *tree, SPLAY_KEY key) { + GC_ASSERT(tree->root); + struct SPLAY_NODE *removed = SPLAY_SPLAY(tree, key); + GC_ASSERT(removed); + SPLAY_VALUE value = removed->value; + if (!removed->left) { + tree->root = removed->right; + } else { + struct SPLAY_NODE *right = removed->right; + tree->root = removed->left; + // Splay to make sure that the new root has an empty right child. + SPLAY_SPLAY(tree, key); + tree->root->right = right; + } + free(removed); + return value; +} + +#undef SPLAY_TREE_PREFIX +#undef SPLAY_KEY_SPAN +#undef SPLAY_KEY +#undef SPLAY_VALUE +#undef SPLAY_COMPARE +#undef SPLAY_SPAN_START +#undef SPLAY_SPANS_EQUAL +#undef SPLAY_TREE +#undef SPLAY_NODE +#undef SPLAY_NODE_NEW +#undef SPLAY_INIT +#undef SPLAY_SPLAY +#undef SPLAY_PREVIOUS +#undef SPLAY_LOOKUP +#undef SPLAY_CONTAINS +#undef SPLAY_INSERT +#undef SPLAY_REMOVE diff --git a/test/test-splay-tree.c b/test/test-splay-tree.c new file mode 100644 index 000000000..7f6e916c6 --- /dev/null +++ b/test/test-splay-tree.c @@ -0,0 +1,116 @@ +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <unistd.h> + +struct object { + uintptr_t addr; + size_t size; +}; + +struct data { + size_t idx; +}; + +#define SPLAY_TREE_PREFIX object_ +typedef struct object object_key_span; +typedef uintptr_t object_key; +typedef struct data object_value; +static inline int +object_compare(uintptr_t addr, struct object obj) { + if (addr < obj.addr) return -1; + if (addr - obj.addr < obj.size) return 0; + return 1; +} +static inline uintptr_t +object_span_start(struct object obj) { + return obj.addr; +} +#include "splay-tree.h" + +// A power-law distribution. Each integer was selected by starting at +// 0, taking a random number in [0,1), and then accepting the integer if +// the random number was less than 0.15, or trying again with the next +// integer otherwise. Useful for modelling allocation sizes or number +// of garbage objects to allocate between live allocations. +static const uint8_t power_law_distribution[256] = { + 1, 15, 3, 12, 2, 8, 4, 0, 18, 7, 9, 8, 15, 2, 36, 5, + 1, 9, 6, 11, 9, 19, 2, 0, 0, 3, 9, 6, 3, 2, 1, 1, + 6, 1, 8, 4, 2, 0, 5, 3, 7, 0, 0, 3, 0, 4, 1, 7, + 1, 8, 2, 2, 2, 14, 0, 7, 8, 0, 2, 1, 4, 12, 7, 5, + 0, 3, 4, 13, 10, 2, 3, 7, 0, 8, 0, 23, 0, 16, 1, 1, + 6, 28, 1, 18, 0, 3, 6, 5, 8, 6, 14, 5, 2, 5, 0, 11, + 0, 18, 4, 16, 1, 4, 3, 13, 3, 23, 7, 4, 10, 5, 3, 13, + 0, 14, 5, 5, 2, 5, 0, 16, 2, 0, 1, 1, 0, 0, 4, 2, + 7, 7, 0, 5, 7, 2, 1, 24, 27, 3, 7, 1, 0, 8, 1, 4, + 0, 3, 0, 7, 7, 3, 9, 2, 9, 2, 5, 10, 1, 1, 12, 6, + 2, 9, 5, 0, 4, 6, 0, 7, 2, 1, 5, 4, 1, 0, 1, 15, + 4, 0, 15, 4, 0, 0, 32, 18, 2, 2, 1, 7, 8, 3, 11, 1, + 2, 7, 11, 1, 9, 1, 2, 6, 11, 17, 1, 2, 5, 1, 14, 3, + 6, 1, 1, 15, 3, 1, 0, 6, 10, 8, 1, 3, 2, 7, 0, 1, + 0, 11, 3, 3, 5, 8, 2, 0, 0, 7, 12, 2, 5, 20, 3, 7, + 4, 4, 5, 22, 1, 5, 2, 7, 15, 2, 4, 6, 11, 8, 12, 1 +}; + +static size_t power_law(size_t *counter) { + return power_law_distribution[(*counter)++ & 0xff]; +} + +static uintptr_t allocate(size_t size) { + void *ret = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (ret == MAP_FAILED) { + perror("mmap failed"); + exit(1); + } + return (uintptr_t)ret; +} + +static const size_t GB = 1024 * 1024 * 1024; + +// Page size is at least 4 kB, so we will have at most 256 * 1024 allocations. +static uintptr_t all_objects[256 * 1024 + 1]; +static size_t object_count; + +#define ASSERT(x) do { if (!(x)) abort(); } while (0) + +int main(int argc, char *arv[]) { + struct object_tree tree; + + object_tree_init(&tree); + + size_t counter = 0; + size_t page_size = getpagesize(); + + // Use mmap as a source of nonoverlapping spans. Allocate 1 GB of address space. + size_t allocated = 0; + while (allocated < 1 * GB) { + size_t size = power_law(&counter) * page_size; + if (!size) + continue; + uintptr_t addr = allocate(size); + object_tree_insert(&tree, + (struct object){addr, size}, + (struct data){object_count}); + all_objects[object_count++] = addr; + ASSERT(object_count < sizeof(all_objects) / sizeof(all_objects[0])); + allocated += size; + } + + for (size_t i = 0; i < object_count; i++) + ASSERT(object_tree_contains(&tree, all_objects[i])); + + for (size_t i = 0; i < object_count; i++) + ASSERT(object_tree_lookup(&tree, all_objects[i])->value.idx == i); + + for (size_t i = 0; i < object_count; i++) + ASSERT(object_tree_lookup(&tree, all_objects[i] + 42)->value.idx == i); + + for (size_t i = 0; i < object_count; i++) + object_tree_remove(&tree, all_objects[i]); + + for (size_t i = 0; i < object_count; i++) + ASSERT(!object_tree_contains(&tree, all_objects[i])); + for (size_t i = 0; i < object_count; i++) + ASSERT(object_tree_lookup(&tree, all_objects[i]) == NULL); +}