Author: Remi Meier <remi.me...@inf.ethz.ch> Branch: Changeset: r1332:e250803b835e Date: 2014-09-03 09:54 +0200 http://bitbucket.org/pypy/stmgc/changeset/e250803b835e/
Log: WIP diff too long, truncating to 2000 out of 2893 lines diff --git a/.hgignore b/.hgignore --- a/.hgignore +++ b/.hgignore @@ -8,3 +8,4 @@ */__pycache__ *.out.* */\#*\# +*/.\#* diff --git a/c8/stm/atomic.h b/c8/stm/atomic.h new file mode 100644 --- /dev/null +++ b/c8/stm/atomic.h @@ -0,0 +1,47 @@ +#ifndef _STM_ATOMIC_H +#define _STM_ATOMIC_H + +/* spin_loop() corresponds to the PAUSE instruction on x86. On + other architectures, we generate no instruction (but still need + the compiler barrier); if on another architecture you find the + corresponding instruction, feel free to add it here. +*/ + +/* write_fence() is a function that inserts a "write fence". The + goal is to make sure that past writes are really pushed to memory + before the future writes. We assume that the corresponding "read + fence" effect is done automatically by a corresponding + __sync_bool_compare_and_swap(). + + On x86, this is done automatically by the CPU; we only need a + compiler barrier (asm("memory")). + + On other architectures, we use __sync_synchronize() as a general + fall-back, but we might have more efficient alternative on some other + platforms too. +*/ + + +#if defined(__i386__) || defined(__amd64__) + +# define HAVE_FULL_EXCHANGE_INSN + static inline void spin_loop(void) { asm("pause" : : : "memory"); } + static inline void write_fence(void) { asm("" : : : "memory"); } + +#else + + static inline void spin_loop(void) { asm("" : : : "memory"); } + static inline void write_fence(void) { __sync_synchronize(); } + +#endif + + +#define spinlock_acquire(lock) \ + do { if (LIKELY(__sync_lock_test_and_set(&(lock), 1) == 0)) break; \ + spin_loop(); } while (1) +#define spinlock_release(lock) \ + do { assert((lock) == 1); \ + __sync_lock_release(&(lock)); } while (0) + + +#endif /* _STM_ATOMIC_H */ diff --git a/c8/stm/core.c b/c8/stm/core.c new file mode 100644 --- /dev/null +++ b/c8/stm/core.c @@ -0,0 +1,101 @@ +#ifndef _STM_CORE_H_ +# error "must be compiled via stmgc.c" +#endif + + +void _stm_write_slowpath(object_t *obj) +{ + +} + +static void reset_transaction_read_version(void) +{ + /* force-reset all read markers to 0 */ + + char *readmarkers = REAL_ADDRESS(STM_SEGMENT->segment_base, + FIRST_READMARKER_PAGE * 4096UL); + dprintf(("reset_transaction_read_version: %p %ld\n", readmarkers, + (long)(NB_READMARKER_PAGES * 4096UL))); + if (mmap(readmarkers, NB_READMARKER_PAGES * 4096UL, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_PAGES_FLAGS, -1, 0) != readmarkers) { + /* fall-back */ +#if STM_TESTS + stm_fatalerror("reset_transaction_read_version: %m"); +#endif + memset(readmarkers, 0, NB_READMARKER_PAGES * 4096UL); + } + STM_SEGMENT->transaction_read_version = 1; +} + + +static void _stm_start_transaction(stm_thread_local_t *tl, bool inevitable) +{ + assert(!_stm_in_transaction(tl)); + + retry: + + if (!acquire_thread_segment(tl)) + goto retry; + /* GS invalid before this point! */ + +#ifndef NDEBUG + STM_PSEGMENT->running_pthread = pthread_self(); +#endif + + dprintf(("start_transaction\n")); + + s_mutex_unlock(); + + uint8_t old_rv = STM_SEGMENT->transaction_read_version; + STM_SEGMENT->transaction_read_version = old_rv + 1; + if (UNLIKELY(old_rv == 0xff)) { + reset_transaction_read_version(); + } + + assert(list_is_empty(STM_PSEGMENT->modified_old_objects)); + check_nursery_at_transaction_start(); +} + +long stm_start_transaction(stm_thread_local_t *tl) +{ + s_mutex_lock(); +#ifdef STM_NO_AUTOMATIC_SETJMP + long repeat_count = 0; /* test/support.py */ +#else + long repeat_count = stm_rewind_jmp_setjmp(tl); +#endif + _stm_start_transaction(tl, false); + return repeat_count; +} + + +/************************************************************/ + +static void _finish_transaction() +{ + stm_thread_local_t *tl = STM_SEGMENT->running_thread; + release_thread_segment(tl); + /* cannot access STM_SEGMENT or STM_PSEGMENT from here ! */ +} + +void stm_commit_transaction(void) +{ + assert(!_has_mutex()); + assert(STM_PSEGMENT->running_pthread == pthread_self()); + + minor_collection(1); + abort(); + + s_mutex_lock(); + + assert(STM_SEGMENT->nursery_end == NURSERY_END); + stm_rewind_jmp_forget(STM_SEGMENT->running_thread); + + + /* done */ + _finish_transaction(); + /* cannot access STM_SEGMENT or STM_PSEGMENT from here ! */ + + s_mutex_unlock(); +} diff --git a/c8/stm/core.h b/c8/stm/core.h new file mode 100644 --- /dev/null +++ b/c8/stm/core.h @@ -0,0 +1,97 @@ +#define _STM_CORE_H_ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sys/mman.h> +#include <errno.h> +#include <pthread.h> + +/************************************************************/ + +#ifndef STM_GC_NURSERY +# define STM_GC_NURSERY 4096 // 4MB +#endif + + +#define NB_PAGES (2500*256) // 2500MB +#define NB_SEGMENTS STM_NB_SEGMENTS +#define NB_SEGMENTS_MAX 240 /* don't increase NB_SEGMENTS past this */ +#define MAP_PAGES_FLAGS (MAP_SHARED | MAP_ANONYMOUS | MAP_NORESERVE) +#define NB_NURSERY_PAGES (STM_GC_NURSERY/4) + +#define TOTAL_MEMORY (NB_PAGES * 4096UL * (1 + NB_SEGMENTS)) +#define READMARKER_END ((NB_PAGES * 4096UL) >> 4) +#define FIRST_OBJECT_PAGE ((READMARKER_END + 4095) / 4096UL) +#define FIRST_NURSERY_PAGE FIRST_OBJECT_PAGE +#define END_NURSERY_PAGE (FIRST_NURSERY_PAGE + NB_NURSERY_PAGES) + +#define READMARKER_START ((FIRST_OBJECT_PAGE * 4096UL) >> 4) +#define FIRST_READMARKER_PAGE (READMARKER_START / 4096UL) +#define OLD_RM_START ((END_NURSERY_PAGE * 4096UL) >> 4) +#define FIRST_OLD_RM_PAGE (OLD_RM_START / 4096UL) +#define NB_READMARKER_PAGES (FIRST_OBJECT_PAGE - FIRST_READMARKER_PAGE) + + +#define USE_REMAP_FILE_PAGES 1 + +enum /* stm_flags */ { + GCFLAG_WRITE_BARRIER = _STM_GCFLAG_WRITE_BARRIER, +}; + + +/************************************************************/ + + +#define STM_PSEGMENT ((stm_priv_segment_info_t *)STM_SEGMENT) + +typedef TLPREFIX struct stm_priv_segment_info_s stm_priv_segment_info_t; + +struct stm_priv_segment_info_s { + struct stm_segment_info_s pub; + + struct list_s *modified_old_objects; + + /* For debugging */ +#ifndef NDEBUG + pthread_t running_pthread; +#endif +}; + + +static char *stm_object_pages; +static int stm_object_pages_fd; +static stm_thread_local_t *stm_all_thread_locals = NULL; + + +#define REAL_ADDRESS(segment_base, src) ((segment_base) + (uintptr_t)(src)) + + +static inline char *get_segment_base(long segment_num) { + return stm_object_pages + segment_num * (NB_PAGES * 4096UL); +} + +static inline +struct stm_segment_info_s *get_segment(long segment_num) { + return (struct stm_segment_info_s *)REAL_ADDRESS( + get_segment_base(segment_num), STM_PSEGMENT); +} + +static inline +struct stm_priv_segment_info_s *get_priv_segment(long segment_num) { + return (struct stm_priv_segment_info_s *)REAL_ADDRESS( + get_segment_base(segment_num), STM_PSEGMENT); +} + +static bool _is_tl_registered(stm_thread_local_t *tl); +static bool _seems_to_be_running_transaction(void); + + +static inline void _duck(void) { + /* put a call to _duck() between two instructions that set 0 into + a %gs-prefixed address and that may otherwise be replaced with + llvm.memset --- it fails later because of the prefix... + This is not needed any more after applying the patch + llvmfix/no-memset-creation-with-addrspace.diff. */ + asm("/* workaround for llvm bug */"); +} diff --git a/c8/stm/fprintcolor.c b/c8/stm/fprintcolor.c new file mode 100644 --- /dev/null +++ b/c8/stm/fprintcolor.c @@ -0,0 +1,48 @@ +/* ------------------------------------------------------------ */ +#ifdef STM_DEBUGPRINT +/* ------------------------------------------------------------ */ + + +static int threadcolor_printf(const char *format, ...) +{ + char buffer[2048]; + va_list ap; + int result; + int size = (int)sprintf(buffer, "\033[%dm[%d,%lx] ", dprintfcolor(), + (int)getpid(), (long)pthread_self()); + assert(size >= 0); + + va_start(ap, format); + result = vsnprintf(buffer + size, 2000, format, ap); + assert(result >= 0); + va_end(ap); + + strcpy(buffer + size + result, "\033[0m"); + fputs(buffer, stderr); + + return result; +} + + +/* ------------------------------------------------------------ */ +#endif +/* ------------------------------------------------------------ */ + + +static void stm_fatalerror(const char *format, ...) +{ + va_list ap; + +#ifdef STM_DEBUGPRINT + dprintf(("STM Subsystem: Fatal Error\n")); +#else + fprintf(stderr, "STM Subsystem: Fatal Error\n"); +#endif + + va_start(ap, format); + vfprintf(stderr, format, ap); + fprintf(stderr, "\n"); + va_end(ap); + + abort(); +} diff --git a/c8/stm/fprintcolor.h b/c8/stm/fprintcolor.h new file mode 100644 --- /dev/null +++ b/c8/stm/fprintcolor.h @@ -0,0 +1,41 @@ +/* ------------------------------------------------------------ */ +#ifdef STM_DEBUGPRINT +/* ------------------------------------------------------------ */ + + +#include <stdarg.h> + + +#define dprintf(args) threadcolor_printf args +static inline int dprintfcolor(void) +{ + return 31 + (STM_SEGMENT->segment_num + 5) % 6; +} + +static int threadcolor_printf(const char *format, ...) + __attribute__((format (printf, 1, 2))); + +#ifdef STM_TESTS +# define dprintf_test(args) dprintf(args) +#else +# define dprintf_test(args) do { } while(0) +#endif + + +/* ------------------------------------------------------------ */ +#else +/* ------------------------------------------------------------ */ + + +#define dprintf(args) do { } while(0) +#define dprintf_test(args) do { } while(0) +#define dprintfcolor() 0 + + +/* ------------------------------------------------------------ */ +#endif +/* ------------------------------------------------------------ */ + + +static void stm_fatalerror(const char *format, ...) + __attribute__((format (printf, 1, 2), noreturn)); diff --git a/c8/stm/list.c b/c8/stm/list.c new file mode 100644 --- /dev/null +++ b/c8/stm/list.c @@ -0,0 +1,180 @@ +#ifndef _STM_CORE_H_ +# error "must be compiled via stmgc.c" +#endif + + +#define LIST_SETSIZE(n) (sizeof(struct list_s) + LIST_ITEMSSIZE(n)) +#define LIST_ITEMSSIZE(n) ((n) * sizeof(uintptr_t)) +#define LIST_OVERCNT(n) (33 + ((((n) / 2) * 3) | 1)) + +static struct list_s *list_create(void) +{ + uintptr_t initial_allocation = 32; + struct list_s *lst = malloc(LIST_SETSIZE(initial_allocation)); + if (lst == NULL) + stm_fatalerror("out of memory in list_create"); /* XXX */ + + lst->count = 0; + lst->last_allocated = initial_allocation - 1; + return lst; +} + +static struct list_s *_list_grow(struct list_s *lst, uintptr_t nalloc) +{ + nalloc = LIST_OVERCNT(nalloc); + lst = realloc(lst, LIST_SETSIZE(nalloc)); + if (lst == NULL) + stm_fatalerror("out of memory in _list_grow"); /* XXX */ + + lst->last_allocated = nalloc - 1; + return lst; +} + + +/************************************************************/ + +static void _tree_clear_node(wlog_node_t *node) +{ + memset(node, 0, sizeof(wlog_node_t)); +} + +static void tree_clear(struct tree_s *tree) +{ + if (tree->raw_current != tree->raw_start) { + _tree_clear_node(&tree->toplevel); + tree->raw_current = tree->raw_start; + } +} + +static struct tree_s *tree_create(void) +{ + return (struct tree_s *)calloc(1, sizeof(struct tree_s)); +} + +static void tree_free(struct tree_s *tree) +{ + free(tree->raw_start); + assert(memset(tree, 0xDD, sizeof(struct tree_s))); + free(tree); +} + +static void _tree_compress(struct tree_s *tree) +{ + wlog_t *item; + struct tree_s tree_copy; + memset(&tree_copy, 0, sizeof(struct tree_s)); + + TREE_LOOP_FORWARD(*tree, item) { + tree_insert(&tree_copy, item->addr, item->val); + + } TREE_LOOP_END; + + free(tree->raw_start); + *tree = tree_copy; +} + +static wlog_t *_tree_find(char *entry, uintptr_t addr) +{ + uintptr_t key = TREE_HASH(addr); + while (((long)entry) & 1) { + /* points to a further level */ + key >>= TREE_BITS; + entry = *(char **)((entry - 1) + (key & TREE_MASK)); + } + return (wlog_t *)entry; /* may be NULL */ +} + +static void _tree_grow(struct tree_s *tree, long extra) +{ + struct tree_s newtree; + wlog_t *item; + long alloc = tree->raw_end - tree->raw_start; + long newalloc = (alloc + extra + (alloc >> 2) + 31) & ~15; + //fprintf(stderr, "growth: %ld\n", newalloc); + char *newitems = malloc(newalloc); + if (newitems == NULL) { + stm_fatalerror("out of memory!"); /* XXX */ + } + newtree.raw_start = newitems; + newtree.raw_current = newitems; + newtree.raw_end = newitems + newalloc; + _tree_clear_node(&newtree.toplevel); + TREE_LOOP_FORWARD(*tree, item) + { + tree_insert(&newtree, item->addr, item->val); + } TREE_LOOP_END; + free(tree->raw_start); + *tree = newtree; +} + +static char *_tree_grab(struct tree_s *tree, long size) +{ + char *result; + result = tree->raw_current; + tree->raw_current += size; + if (tree->raw_current > tree->raw_end) { + _tree_grow(tree, size); + return NULL; + } + return result; +} + +static void tree_insert(struct tree_s *tree, uintptr_t addr, uintptr_t val) +{ + assert(addr != 0); /* the NULL key is reserved */ + retry:; + wlog_t *wlog; + uintptr_t key = TREE_HASH(addr); + int shift = 0; + char *p = (char *)(tree->toplevel.items); + char *entry; + while (1) { + assert(shift < TREE_DEPTH_MAX * TREE_BITS); + p += (key >> shift) & TREE_MASK; + shift += TREE_BITS; + entry = *(char **)p; + if (entry == NULL) + break; + else if (((long)entry) & 1) { + /* points to a further level */ + p = entry - 1; + } + else { + wlog_t *wlog1 = (wlog_t *)entry; + if (wlog1->addr == 0) { + /* reuse the deleted entry and that's it */ + wlog1->addr = addr; + wlog1->val = val; + return; + } + /* the key must not already be present */ + assert(wlog1->addr != addr); + /* collision: there is already a different wlog here */ + wlog_node_t *node = (wlog_node_t *) + _tree_grab(tree, sizeof(wlog_node_t)); + if (node == NULL) goto retry; + _tree_clear_node(node); + uintptr_t key1 = TREE_HASH(wlog1->addr); + char *p1 = (char *)(node->items); + *(wlog_t **)(p1 + ((key1 >> shift) & TREE_MASK)) = wlog1; + *(char **)p = ((char *)node) + 1; + p = p1; + } + } + wlog = (wlog_t *)_tree_grab(tree, sizeof(wlog_t)); + if (wlog == NULL) goto retry; + wlog->addr = addr; + wlog->val = val; + *(char **)p = (char *)wlog; +} + +static bool tree_delete_item(struct tree_s *tree, uintptr_t addr) +{ + wlog_t *entry; + TREE_FIND(*tree, addr, entry, goto missing); + entry->addr = 0; + return true; + + missing: + return false; +} diff --git a/c8/stm/list.h b/c8/stm/list.h new file mode 100644 --- /dev/null +++ b/c8/stm/list.h @@ -0,0 +1,217 @@ +#include <stdlib.h> +#include <stdbool.h> + +/************************************************************/ + +struct list_s { + uintptr_t count; + uintptr_t last_allocated; + uintptr_t items[]; +}; + +static struct list_s *list_create(void) __attribute__((unused)); + +static inline void list_free(struct list_s *lst) +{ + free(lst); +} + +#define LIST_CREATE(lst) ((lst) = list_create()) +#define LIST_FREE(lst) (list_free(lst), (lst) = NULL) + + +static struct list_s *_list_grow(struct list_s *, uintptr_t); + +static inline struct list_s *list_append(struct list_s *lst, uintptr_t item) +{ + uintptr_t index = lst->count++; + if (UNLIKELY(index > lst->last_allocated)) + lst = _list_grow(lst, index); + lst->items[index] = item; + return lst; +} + +#define LIST_APPEND(lst, e) ((lst) = list_append((lst), (uintptr_t)(e))) + +static inline struct list_s *list_append2(struct list_s *lst, + uintptr_t item0, uintptr_t item1) +{ + uintptr_t index = lst->count; + lst->count += 2; + if (UNLIKELY(index >= lst->last_allocated)) + lst = _list_grow(lst, index + 1); + lst->items[index + 0] = item0; + lst->items[index + 1] = item1; + return lst; +} + + +static inline void list_clear(struct list_s *lst) +{ + lst->count = 0; +} + +static inline bool list_is_empty(struct list_s *lst) +{ + return (lst->count == 0); +} + +static inline uintptr_t list_count(struct list_s *lst) +{ + return lst->count; +} + +static inline uintptr_t list_pop_item(struct list_s *lst) +{ + assert(lst->count > 0); + return lst->items[--lst->count]; +} + +static inline uintptr_t list_item(struct list_s *lst, uintptr_t index) +{ + return lst->items[index]; +} + +static inline void list_set_item(struct list_s *lst, uintptr_t index, + uintptr_t newitem) +{ + lst->items[index] = newitem; +} + +static inline uintptr_t *list_ptr_to_item(struct list_s *lst, uintptr_t index) +{ + return &lst->items[index]; +} + +#define LIST_FOREACH_R(lst, TYPE, CODE) \ + do { \ + struct list_s *_lst = (lst); \ + uintptr_t _i; \ + for (_i = _lst->count; _i--; ) { \ + TYPE item = (TYPE)_lst->items[_i]; \ + CODE; \ + } \ + } while (0) + +/************************************************************/ + +/* The tree_xx functions are, like the name hints, implemented as a tree, + supporting very high performance in TREE_FIND in the common case where + there are no or few elements in the tree, but scaling correctly + if the number of items becomes large (logarithmically, rather + than almost-constant-time with hash maps, but with low constants). + The value 0 cannot be used as a key. +*/ + +#define TREE_BITS 4 +#define TREE_ARITY (1 << TREE_BITS) + +#define TREE_DEPTH_MAX ((sizeof(void*)*8 + TREE_BITS-1) / TREE_BITS) +/* sizeof(void*)*8 = total number of bits + (x + TREE_BITS-1) / TREE_BITS = divide by TREE_BITS, rounding up +*/ + +#define TREE_HASH(key) ((key) ^ ((key) << 4)) +#define TREE_MASK ((TREE_ARITY - 1) * sizeof(void*)) + +typedef struct { + uintptr_t addr; + uintptr_t val; +} wlog_t; + +typedef struct { + char *items[TREE_ARITY]; +} wlog_node_t; + +struct tree_s { + char *raw_start, *raw_current, *raw_end; + wlog_node_t toplevel; +}; + +static struct tree_s *tree_create(void) __attribute__((unused)); +static void tree_free(struct tree_s *tree) __attribute__((unused)); +static void tree_clear(struct tree_s *tree) __attribute__((unused)); +//static inline void tree_delete_not_used_any_more(struct tree_s *tree)... + +static inline bool tree_is_cleared(struct tree_s *tree) { + return tree->raw_current == tree->raw_start; +} + +#define _TREE_LOOP(tree, item, INITIAL, _PLUS_) \ +{ \ + struct { char **next; char **end; } _stack[TREE_DEPTH_MAX], *_stackp; \ + char **_next, **_end, *_entry; \ + long _deleted_factor = 0; \ + struct tree_s *_tree = &(tree); \ + /* initialization */ \ + _stackp = _stack; /* empty stack */ \ + _next = _tree->toplevel.items + INITIAL; \ + _end = _next _PLUS_ TREE_ARITY; \ + /* loop */ \ + while (1) \ + { \ + if (_next == _end) \ + { \ + if (_stackp == _stack) \ + break; /* done */ \ + /* finished with this level, go to the next one */ \ + _stackp--; \ + _next = _stackp->next; \ + _end = _stackp->end; \ + continue; \ + } \ + _entry = *_next; \ + _next = _next _PLUS_ 1; \ + if (_entry == NULL) /* empty entry */ \ + continue; \ + if (((long)_entry) & 1) \ + { /* points to a further level: enter it */ \ + assert(_stackp - _stack < TREE_DEPTH_MAX); \ + _stackp->next = _next; \ + _stackp->end = _end; \ + _stackp++; \ + _next = ((wlog_node_t *)(_entry - 1))->items + INITIAL; \ + _end = _next _PLUS_ TREE_ARITY; \ + continue; \ + } \ + /* points to a wlog_t item */ \ + if (((wlog_t *)_entry)->addr == 0) { /* deleted entry */ \ + _deleted_factor += 3; \ + continue; \ + } \ + _deleted_factor -= 4; \ + item = (wlog_t *)_entry; + +#define TREE_LOOP_FORWARD(tree, item) \ + _TREE_LOOP(tree, item, 0, +) +#define TREE_LOOP_BACKWARD(tree, item) \ + _TREE_LOOP(tree, item, (TREE_ARITY-1), -) +#define TREE_LOOP_END } } +#define TREE_LOOP_END_AND_COMPRESS \ + } if (_deleted_factor > 9) _tree_compress(_tree); } +#define TREE_LOOP_DELETE(item) { (item)->addr = NULL; _deleted_factor += 6; } + +#define TREE_FIND(tree, addr1, result, goto_not_found) \ +{ \ + uintptr_t _key = TREE_HASH(addr1); \ + char *_p = (char *)((tree).toplevel.items); \ + char *_entry = *(char **)(_p + (_key & TREE_MASK)); \ + if (_entry == NULL) \ + goto_not_found; /* common case, hopefully */ \ + result = _tree_find(_entry, addr1); \ + if (result == NULL || result->addr != (addr1)) \ + goto_not_found; \ +} + +static wlog_t *_tree_find(char *entry, uintptr_t addr); +static void _tree_compress(struct tree_s *tree) __attribute__((unused)); +static void tree_insert(struct tree_s *tree, uintptr_t addr, uintptr_t val); +static bool tree_delete_item(struct tree_s *tree, uintptr_t addr) + __attribute__((unused)); + +static inline bool tree_contains(struct tree_s *tree, uintptr_t addr) +{ + wlog_t *result; + TREE_FIND(*tree, addr, result, return false); + return true; +} diff --git a/c8/stm/misc.c b/c8/stm/misc.c new file mode 100644 --- /dev/null +++ b/c8/stm/misc.c @@ -0,0 +1,61 @@ +#ifndef _STM_CORE_H_ +# error "must be compiled via stmgc.c" +#endif + + +char *_stm_real_address(object_t *o) +{ + if (o == NULL) + return NULL; + + assert(FIRST_OBJECT_PAGE * 4096UL <= (uintptr_t)o + && (uintptr_t)o < NB_PAGES * 4096UL); + return REAL_ADDRESS(STM_SEGMENT->segment_base, o); +} + +char *_stm_get_segment_base(long index) +{ + return get_segment_base(index); +} + +struct stm_priv_segment_info_s *_stm_segment(void) +{ + char *info = REAL_ADDRESS(STM_SEGMENT->segment_base, STM_PSEGMENT); + return (struct stm_priv_segment_info_s *)info; +} + +stm_thread_local_t *_stm_thread(void) +{ + return STM_SEGMENT->running_thread; +} + +bool _stm_was_read(object_t *obj) +{ + uint8_t rm = ((struct stm_read_marker_s *) + (STM_SEGMENT->segment_base + (((uintptr_t)obj) >> 4)))->rm; + assert(rm <= STM_SEGMENT->transaction_read_version); + return rm == STM_SEGMENT->transaction_read_version; +} + +bool _stm_was_written(object_t *obj) +{ + return (obj->stm_flags & _STM_GCFLAG_WRITE_BARRIER) == 0; +} + + +#ifdef STM_TESTS + +long _stm_count_modified_old_objects(void) +{ + if (STM_PSEGMENT->modified_old_objects == NULL) + return -1; + return list_count(STM_PSEGMENT->modified_old_objects); +} + + +object_t *_stm_enum_modified_old_objects(long index) +{ + return (object_t *)list_item( + STM_PSEGMENT->modified_old_objects, index); +} +#endif diff --git a/c8/stm/nursery.c b/c8/stm/nursery.c new file mode 100644 --- /dev/null +++ b/c8/stm/nursery.c @@ -0,0 +1,143 @@ +#ifndef _STM_CORE_H_ +# error "must be compiled via stmgc.c" +#endif + +/************************************************************/ + +#define NURSERY_START (FIRST_NURSERY_PAGE * 4096UL) +#define NURSERY_SIZE (NB_NURSERY_PAGES * 4096UL) +#define NURSERY_END (NURSERY_START + NURSERY_SIZE) + +static uintptr_t _stm_nursery_start; + + +/************************************************************/ + +static void setup_nursery(void) +{ + _stm_nursery_start = NURSERY_START; + + long i; + for (i = 1; i <= NB_SEGMENTS; i++) { + get_segment(i)->nursery_current = (stm_char *)NURSERY_START; + get_segment(i)->nursery_end = NURSERY_END; + } +} + +static inline bool _is_in_nursery(object_t *obj) +{ + assert((uintptr_t)obj >= NURSERY_START); + return (uintptr_t)obj < NURSERY_END; +} + + +/************************************************************/ + +static size_t throw_away_nursery(struct stm_priv_segment_info_s *pseg) +{ +#pragma push_macro("STM_PSEGMENT") +#pragma push_macro("STM_SEGMENT") +#undef STM_PSEGMENT +#undef STM_SEGMENT + dprintf(("throw_away_nursery\n")); + /* reset the nursery by zeroing it */ + size_t nursery_used; + char *realnursery; + + realnursery = REAL_ADDRESS(pseg->pub.segment_base, _stm_nursery_start); + nursery_used = pseg->pub.nursery_current - (stm_char *)_stm_nursery_start; + if (nursery_used > NB_NURSERY_PAGES * 4096) { + /* possible in rare cases when the program artificially advances + its own nursery_current */ + nursery_used = NB_NURSERY_PAGES * 4096; + } + OPT_ASSERT((nursery_used & 7) == 0); + memset(realnursery, 0, nursery_used); + + /* assert that the rest of the nursery still contains only zeroes */ + assert_memset_zero(realnursery + nursery_used, + (NURSERY_END - _stm_nursery_start) - nursery_used); + + pseg->pub.nursery_current = (stm_char *)_stm_nursery_start; + + + return nursery_used; +#pragma pop_macro("STM_SEGMENT") +#pragma pop_macro("STM_PSEGMENT") +} + + +static void _do_minor_collection(bool commit) +{ + dprintf(("minor_collection commit=%d\n", (int)commit)); + + throw_away_nursery(get_priv_segment(STM_SEGMENT->segment_num)); +} + +static void minor_collection(bool commit) +{ + assert(!_has_mutex()); + + _do_minor_collection(commit); +} + + +/************************************************************/ + + +object_t *_stm_allocate_slowpath(ssize_t size_rounded_up) +{ + /* may collect! */ + STM_SEGMENT->nursery_current -= size_rounded_up; /* restore correct val */ + + restart: + + OPT_ASSERT(size_rounded_up >= 16); + OPT_ASSERT((size_rounded_up & 7) == 0); + + stm_char *p = STM_SEGMENT->nursery_current; + stm_char *end = p + size_rounded_up; + if ((uintptr_t)end <= NURSERY_END) { + STM_SEGMENT->nursery_current = end; + return (object_t *)p; + } + + abort();//stm_collect(0); + goto restart; +} + +#ifdef STM_TESTS +void _stm_set_nursery_free_count(uint64_t free_count) +{ + assert(free_count <= NURSERY_SIZE); + assert((free_count & 7) == 0); + _stm_nursery_start = NURSERY_END - free_count; + + long i; + for (i = 1; i <= NB_SEGMENTS; i++) { + if ((uintptr_t)get_segment(i)->nursery_current < _stm_nursery_start) + get_segment(i)->nursery_current = (stm_char *)_stm_nursery_start; + } +} +#endif + +static void assert_memset_zero(void *s, size_t n) +{ +#ifndef NDEBUG + size_t i; +# ifndef STM_TESTS + if (n > 5000) n = 5000; +# endif + n /= 8; + for (i = 0; i < n; i++) + assert(((uint64_t *)s)[i] == 0); +#endif +} + +static void check_nursery_at_transaction_start(void) +{ + assert((uintptr_t)STM_SEGMENT->nursery_current == _stm_nursery_start); + assert_memset_zero(REAL_ADDRESS(STM_SEGMENT->segment_base, + STM_SEGMENT->nursery_current), + NURSERY_END - _stm_nursery_start); +} diff --git a/c8/stm/nursery.h b/c8/stm/nursery.h new file mode 100644 --- /dev/null +++ b/c8/stm/nursery.h @@ -0,0 +1,5 @@ +static void minor_collection(bool commit); +static void check_nursery_at_transaction_start(void); +static size_t throw_away_nursery(struct stm_priv_segment_info_s *pseg); + +static void assert_memset_zero(void *s, size_t n); diff --git a/c8/stm/rewind_setjmp.c b/c8/stm/rewind_setjmp.c new file mode 100644 --- /dev/null +++ b/c8/stm/rewind_setjmp.c @@ -0,0 +1,217 @@ +#include "rewind_setjmp.h" +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <alloca.h> + + +struct _rewind_jmp_moved_s { + struct _rewind_jmp_moved_s *next; + size_t stack_size; + size_t shadowstack_size; +}; +#define RJM_HEADER sizeof(struct _rewind_jmp_moved_s) + +#ifndef RJBUF_CUSTOM_MALLOC +#define rj_malloc malloc +#define rj_free free +#else +void *rj_malloc(size_t); +void rj_free(void *); +#endif + +/* XXX: currently synchronized with global mutex!! */ + +static void copy_stack(rewind_jmp_thread *rjthread, char *base, void *ssbase) +{ + /* Copy away part of the stack and shadowstack. Sets moved_off_base to + the current frame_base. + + The stack is copied between 'base' (lower limit, i.e. newest bytes) + and 'rjthread->head->frame_base' (upper limit, i.e. oldest bytes). + The shadowstack is copied between 'ssbase' (upper limit, newest) + and 'rjthread->head->shadowstack_base' (lower limit, oldest). + */ + struct _rewind_jmp_moved_s *next; + char *stop; + void *ssstop; + size_t stack_size, ssstack_size; + + assert(rjthread->head != NULL); + ssstop = rjthread->head->shadowstack_base; + if (((long)ssstop) & 1) { + /* PyPy's JIT: 'head->frame_base' is missing; use directly 'head', + which should be at the end of the frame (and doesn't need itself + to be copied because it contains immutable data only) */ + ssstop = ((char *)ssstop) - 1; + stop = (char *)rjthread->head; + } + else { + stop = rjthread->head->frame_base; + } + assert(stop >= base); + assert(ssstop <= ssbase); + stack_size = stop - base; + ssstack_size = ssbase - ssstop; + + next = (struct _rewind_jmp_moved_s *) + rj_malloc(RJM_HEADER + stack_size + ssstack_size); + assert(next != NULL); /* XXX out of memory */ + next->next = rjthread->moved_off; + next->stack_size = stack_size; + next->shadowstack_size = ssstack_size; + + memcpy(((char *)next) + RJM_HEADER, base, stack_size); + memcpy(((char *)next) + RJM_HEADER + stack_size, ssstop, + ssstack_size); + + rjthread->moved_off_base = stop; + rjthread->moved_off_ssbase = ssstop; + rjthread->moved_off = next; +} + +__attribute__((noinline)) +long rewind_jmp_setjmp(rewind_jmp_thread *rjthread, void *ss) +{ + /* saves the current stack frame to the list of slices and + calls setjmp(). It returns the number of times a longjmp() + jumped back to this setjmp() */ + if (rjthread->moved_off) { + /* old stack slices are not needed anymore (next longjmp() + will restore only to this setjmp()) */ + _rewind_jmp_free_stack_slices(rjthread); + } + /* all locals of this function that need to be saved and restored + across the setjmp() should be stored inside this structure */ + struct { void *ss1; rewind_jmp_thread *rjthread1; } volatile saved = + { ss, rjthread }; + + int result; + if (__builtin_setjmp(rjthread->jmpbuf) == 0) { + rjthread = saved.rjthread1; + rjthread->initial_head = rjthread->head; + result = 0; + } + else { + rjthread = saved.rjthread1; + rjthread->head = rjthread->initial_head; + result = rjthread->repeat_count + 1; + } + rjthread->repeat_count = result; + + /* snapshot of top frame: needed every time because longjmp() frees + the previous one. Note that this function is called with the + mutex already acquired. Although it's not the job of this file, + we assert it is indeed acquired here. This is needed, otherwise a + concurrent GC may get garbage while saving shadow stack */ +#ifdef _STM_CORE_H_ + assert(_has_mutex()); +#endif + copy_stack(rjthread, (char *)&saved, saved.ss1); + + return result; +} + +__attribute__((noinline, noreturn)) +static void do_longjmp(rewind_jmp_thread *rjthread, char *stack_free) +{ + /* go through list of copied stack-slices and copy them back to the + current stack, expanding it if necessary. The shadowstack should + already be restored at this point (restore_shadowstack()) */ + assert(rjthread->moved_off_base != NULL); + + while (rjthread->moved_off) { + struct _rewind_jmp_moved_s *p = rjthread->moved_off; + char *target = rjthread->moved_off_base; + /* CPU stack grows downwards: */ + target -= p->stack_size; + if (target < stack_free) { + /* need more stack space! */ + do_longjmp(rjthread, alloca(stack_free - target)); + abort(); /* unreachable */ + } + memcpy(target, ((char *)p) + RJM_HEADER, p->stack_size); + + rjthread->moved_off_base = target; + rjthread->moved_off = p->next; + rj_free(p); + } + +#ifdef _STM_CORE_H_ + /* This function must be called with the mutex held. It will + remain held across the longjmp that follows and into the + target rewind_jmp_setjmp() function. */ + assert(_has_mutex()); +#endif + __builtin_longjmp(rjthread->jmpbuf, 1); +} + +__attribute__((noreturn)) +void rewind_jmp_longjmp(rewind_jmp_thread *rjthread) +{ + char _rewind_jmp_marker; + do_longjmp(rjthread, &_rewind_jmp_marker); +} + + +char *rewind_jmp_enum_shadowstack(rewind_jmp_thread *rjthread, + void *callback(void *, const void *, size_t)) +{ + /* enumerate all saved shadow-stack slices */ + struct _rewind_jmp_moved_s *p = rjthread->moved_off; + char *sstarget = rjthread->moved_off_ssbase; + +#ifdef _STM_CORE_H_ + assert(_has_mutex()); +#endif + + while (p) { + if (p->shadowstack_size) { + void *ss_slice = ((char *)p) + RJM_HEADER + p->stack_size; + callback(sstarget, ss_slice, p->shadowstack_size); + + sstarget += p->shadowstack_size; + } + p = p->next; + } + return sstarget; +} + + +char *rewind_jmp_restore_shadowstack(rewind_jmp_thread *rjthread) +{ + return rewind_jmp_enum_shadowstack(rjthread, memcpy); +} + +__attribute__((noinline)) +void _rewind_jmp_copy_stack_slice(rewind_jmp_thread *rjthread) +{ + /* called when leaving a frame. copies the now-current frame + to the list of stack-slices */ +#ifdef _STM_CORE_H_ + /* A transaction should be running now. This means in particular + that it's not possible that a major GC runs concurrently with + this code (and tries to read the shadowstack slice). */ + assert(_seems_to_be_running_transaction()); +#endif + if (rjthread->head == NULL) { + _rewind_jmp_free_stack_slices(rjthread); + return; + } + assert(rjthread->moved_off_base < (char *)rjthread->head); + copy_stack(rjthread, rjthread->moved_off_base, rjthread->moved_off_ssbase); +} + +void _rewind_jmp_free_stack_slices(rewind_jmp_thread *rjthread) +{ + /* frees all saved stack copies */ + struct _rewind_jmp_moved_s *p = rjthread->moved_off; + while (p) { + struct _rewind_jmp_moved_s *pnext = p->next; + rj_free(p); + p = pnext; + } + rjthread->moved_off = NULL; + rjthread->moved_off_base = NULL; + rjthread->moved_off_ssbase = NULL; +} diff --git a/c8/stm/rewind_setjmp.h b/c8/stm/rewind_setjmp.h new file mode 100644 --- /dev/null +++ b/c8/stm/rewind_setjmp.h @@ -0,0 +1,117 @@ +#ifndef _REWIND_SETJMP_H_ +#define _REWIND_SETJMP_H_ + + +#include <stddef.h> + +/************************************************************ +There is a singly-linked list of frames in each thread +rjthread->head->prev->prev->prev + +Another singly-linked list is the list of copied stack-slices. +When doing a setjmp(), we copy the top-frame, free all old +stack-slices, and link it to the top-frame->moved_off. +When returning from the top-frame while moved_off still points +to a slice, we also need to copy the top-frame->prev frame/slice +and add it to this list (pointed to by moved_off). +-------------------------------------------------------------- + + : : ^^^^^ + |-------------------| older frames in the stack + | prev=0 | + ,---> | rewind_jmp_buf | + | |-------------------| + | | | + | : : + | : : + | | | + | |-------------------| + `---------prev | + ,----> | rewind_jmp_buf | + | +-------------------| + | | | + | : : + | | | + | |-------------------| + `----------prev | + ,---> | rewind_jmp_buf | <--------------- MOVED_OFF_BASE + | |---------------- +-------------+ + | | | STACK COPY | + | | : : + | : | size | + | | | next | <---- MOVED_OFF + | | +---|------ +-------------+ + | | | | | STACK COPY | + | |-------------------| | : (SEQUEL) : + `---------prev | | : : +HEAD-----> | rewind_jmp_buf | | | | + |-------------------| | | size | + `------> | next=0 | + +-------------+ + + +************************************************************/ + +typedef struct _rewind_jmp_buf { + char *shadowstack_base; + struct _rewind_jmp_buf *prev; + char *frame_base; + /* NB: PyPy's JIT has got details of this structure hard-coded, + as follows: it uses 2 words only (so frame_base is invalid) + and sets the lowest bit of 'shadowstack_base' to tell this */ +} rewind_jmp_buf; + +typedef struct { + rewind_jmp_buf *head; + rewind_jmp_buf *initial_head; + char *moved_off_base; + char *moved_off_ssbase; + struct _rewind_jmp_moved_s *moved_off; + void *jmpbuf[5]; + long repeat_count; +} rewind_jmp_thread; + + +/* remember the current stack and ss_stack positions */ +#define rewind_jmp_enterframe(rjthread, rjbuf, ss) do { \ + rewind_jmp_prepareframe(rjbuf); \ + rewind_jmp_enterprepframe(rjthread, rjbuf, ss); \ +} while (0) +#define rewind_jmp_prepareframe(rjbuf) \ + ((rjbuf)->frame_base = __builtin_frame_address(0)) +#define rewind_jmp_enterprepframe(rjthread, rjbuf, ss) do { \ + assert((((long)(ss)) & 1) == 0); \ + (rjbuf)->shadowstack_base = (char *)(ss); \ + (rjbuf)->prev = (rjthread)->head; \ + (rjthread)->head = (rjbuf); \ +} while (0) + +/* go up one frame. if there was a setjmp call in this frame, + */ +#define rewind_jmp_leaveframe(rjthread, rjbuf, ss) do { \ + assert((rjbuf)->shadowstack_base == (char *)(ss)); \ + (rjthread)->head = (rjbuf)->prev; \ + if ((rjbuf)->frame_base == (rjthread)->moved_off_base) { \ + assert((rjthread)->moved_off_ssbase == (char *)(ss));\ + _rewind_jmp_copy_stack_slice(rjthread); \ + } \ +} while (0) + +long rewind_jmp_setjmp(rewind_jmp_thread *rjthread, void *ss); +void rewind_jmp_longjmp(rewind_jmp_thread *rjthread) __attribute__((noreturn)); +char *rewind_jmp_restore_shadowstack(rewind_jmp_thread *rjthread); +char *rewind_jmp_enum_shadowstack(rewind_jmp_thread *rjthread, + void *callback(void *, const void *, size_t)); + +#define rewind_jmp_forget(rjthread) do { \ + if ((rjthread)->moved_off) _rewind_jmp_free_stack_slices(rjthread); \ + (rjthread)->moved_off_base = 0; \ + (rjthread)->moved_off_ssbase = 0; \ +} while (0) + +void _rewind_jmp_copy_stack_slice(rewind_jmp_thread *); +void _rewind_jmp_free_stack_slices(rewind_jmp_thread *); + +#define rewind_jmp_armed(rjthread) ((rjthread)->moved_off_base != 0) + +#endif diff --git a/c8/stm/setup.c b/c8/stm/setup.c new file mode 100644 --- /dev/null +++ b/c8/stm/setup.c @@ -0,0 +1,207 @@ +#ifndef _STM_CORE_H_ +# error "must be compiled via stmgc.c" +#endif + + +#ifdef USE_REMAP_FILE_PAGES +static char *setup_mmap(char *reason, int *ignored) +{ + char *result = mmap(NULL, TOTAL_MEMORY, + PROT_READ | PROT_WRITE, + MAP_PAGES_FLAGS, -1, 0); + if (result == MAP_FAILED) + stm_fatalerror("%s failed: %m", reason); + + return result; +} +static void close_fd_mmap(int ignored) +{ +} +#else +#include <fcntl.h> /* For O_* constants */ +static char *setup_mmap(char *reason, int *map_fd) +{ + char name[128]; + + /* Create the big shared memory object, and immediately unlink it. + There is a small window where if this process is killed the + object is left around. It doesn't seem possible to do anything + about it... + */ + int fd = shm_open(name, O_RDWR | O_CREAT | O_EXCL, 0600); + shm_unlink(name); + + if (fd == -1) { + stm_fatalerror("%s failed (stm_open): %m", reason); + } + if (ftruncate(fd, TOTAL_MEMORY) != 0) { + stm_fatalerror("%s failed (ftruncate): %m", reason); + } + char *result = mmap(NULL, TOTAL_MEMORY, + PROT_READ | PROT_WRITE, + MAP_PAGES_FLAGS & ~MAP_ANONYMOUS, fd, 0); + if (result == MAP_FAILED) { + stm_fatalerror("%s failed (mmap): %m", reason); + } + *map_fd = fd; + return result; +} +static void close_fd_mmap(int map_fd) +{ + close(map_fd); +} +#endif + +static void setup_protection_settings(void) +{ + /* The segment 0 is not used to run transactions, but contains the + shared copy of the pages. We mprotect all pages before so that + accesses fail, up to and including the pages corresponding to the + nurseries of the other segments. */ + mprotect(stm_object_pages, END_NURSERY_PAGE * 4096UL, PROT_NONE); + + long i; + for (i = 1; i <= NB_SEGMENTS; i++) { + char *segment_base = get_segment_base(i); + + /* In each segment, the first page is where TLPREFIX'ed + NULL accesses land. We mprotect it so that accesses fail. */ + mprotect(segment_base, 4096, PROT_NONE); + + /* Pages in range(2, FIRST_READMARKER_PAGE) are never used */ + if (FIRST_READMARKER_PAGE > 2) + mprotect(segment_base + 8192, + (FIRST_READMARKER_PAGE - 2) * 4096UL, + PROT_NONE); + } +} + +void stm_setup(void) +{ + /* Check that some values are acceptable */ + assert(NB_SEGMENTS <= NB_SEGMENTS_MAX); + assert(4096 <= ((uintptr_t)STM_SEGMENT)); + assert((uintptr_t)STM_SEGMENT == (uintptr_t)STM_PSEGMENT); + assert(((uintptr_t)STM_PSEGMENT) + sizeof(*STM_PSEGMENT) <= 8192); + assert(2 <= FIRST_READMARKER_PAGE); + assert(FIRST_READMARKER_PAGE * 4096UL <= READMARKER_START); + assert(READMARKER_START < READMARKER_END); + assert(READMARKER_END <= 4096UL * FIRST_OBJECT_PAGE); + assert(FIRST_OBJECT_PAGE < NB_PAGES); + assert((NB_PAGES * 4096UL) >> 8 <= (FIRST_OBJECT_PAGE * 4096UL) >> 4); + assert((END_NURSERY_PAGE * 4096UL) >> 8 <= + (FIRST_READMARKER_PAGE * 4096UL)); + + stm_object_pages = setup_mmap("initial stm_object_pages mmap()", + &stm_object_pages_fd); + setup_protection_settings(); + + long i; + for (i = 1; i <= NB_SEGMENTS; i++) { + char *segment_base = get_segment_base(i); + + /* Fill the TLS page (page 1) with 0xDC, for debugging */ + memset(REAL_ADDRESS(segment_base, 4096), 0xDC, 4096); + /* Make a "hole" at STM_PSEGMENT (which includes STM_SEGMENT) */ + memset(REAL_ADDRESS(segment_base, STM_PSEGMENT), 0, + sizeof(*STM_PSEGMENT)); + + /* Initialize STM_PSEGMENT */ + struct stm_priv_segment_info_s *pr = get_priv_segment(i); + assert(1 <= i && i < 255); /* 255 is WL_VISITED in gcpage.c */ + pr->pub.segment_num = i; + pr->pub.segment_base = segment_base; + pr->modified_old_objects = list_create(); + pr->pub.transaction_read_version = 0xff; + } + + /* The pages are shared lazily, as remap_file_pages() takes a relatively + long time for each page. + + The read markers are initially zero, but we set anyway + transaction_read_version to 0xff in order to force the first + transaction to "clear" the read markers by mapping a different, + private range of addresses. + */ + + setup_sync(); + setup_nursery(); +} + +void stm_teardown(void) +{ + /* This function is called during testing, but normal programs don't + need to call it. */ + assert(!_has_mutex()); + + long i; + for (i = 1; i <= NB_SEGMENTS; i++) { + struct stm_priv_segment_info_s *pr = get_priv_segment(i); + list_free(pr->modified_old_objects); + } + + munmap(stm_object_pages, TOTAL_MEMORY); + stm_object_pages = NULL; + close_fd_mmap(stm_object_pages_fd); + + teardown_sync(); +} + +static pthread_t *_get_cpth(stm_thread_local_t *tl) +{ + assert(sizeof(pthread_t) <= sizeof(tl->creating_pthread)); + return (pthread_t *)(tl->creating_pthread); +} + +void stm_register_thread_local(stm_thread_local_t *tl) +{ + int num; + s_mutex_lock(); + if (stm_all_thread_locals == NULL) { + stm_all_thread_locals = tl->next = tl->prev = tl; + num = 0; + } + else { + tl->next = stm_all_thread_locals; + tl->prev = stm_all_thread_locals->prev; + stm_all_thread_locals->prev->next = tl; + stm_all_thread_locals->prev = tl; + num = tl->prev->associated_segment_num; + } + + /* assign numbers consecutively, but that's for tests; we could also + assign the same number to all of them and they would get their own + numbers automatically. */ + num = (num % NB_SEGMENTS) + 1; + tl->associated_segment_num = num; + *_get_cpth(tl) = pthread_self(); + set_gs_register(get_segment_base(num)); + s_mutex_unlock(); +} + +void stm_unregister_thread_local(stm_thread_local_t *tl) +{ + s_mutex_lock(); + assert(tl->prev != NULL); + assert(tl->next != NULL); + + if (tl == stm_all_thread_locals) { + stm_all_thread_locals = stm_all_thread_locals->next; + if (tl == stm_all_thread_locals) { + stm_all_thread_locals = NULL; + s_mutex_unlock(); + return; + } + } + tl->prev->next = tl->next; + tl->next->prev = tl->prev; + tl->prev = NULL; + tl->next = NULL; + s_mutex_unlock(); +} + +__attribute__((unused)) +static bool _is_tl_registered(stm_thread_local_t *tl) +{ + return tl->next != NULL; +} diff --git a/c8/stm/setup.h b/c8/stm/setup.h new file mode 100644 --- /dev/null +++ b/c8/stm/setup.h @@ -0,0 +1,4 @@ +static char *setup_mmap(char *reason, int *map_fd); +static void close_fd_mmap(int map_fd); +static void setup_protection_settings(void); +static pthread_t *_get_cpth(stm_thread_local_t *); diff --git a/c8/stm/sync.c b/c8/stm/sync.c new file mode 100644 --- /dev/null +++ b/c8/stm/sync.c @@ -0,0 +1,144 @@ +#include <sys/syscall.h> +#include <sys/prctl.h> +#include <asm/prctl.h> + +#ifndef _STM_CORE_H_ +# error "must be compiled via stmgc.c" +#endif + + + +static union { + struct { + pthread_mutex_t global_mutex; + /* some additional pieces of global state follow */ + uint8_t in_use1[NB_SEGMENTS]; /* 1 if running a pthread */ + }; + char reserved[192]; +} sync_ctl __attribute__((aligned(64))); + + +static void setup_sync(void) +{ + if (pthread_mutex_init(&sync_ctl.global_mutex, NULL) != 0) + stm_fatalerror("mutex initialization: %m"); +} + +static void teardown_sync(void) +{ + if (pthread_mutex_destroy(&sync_ctl.global_mutex) != 0) + stm_fatalerror("mutex destroy: %m"); + + memset(&sync_ctl, 0, sizeof(sync_ctl)); +} + +#ifndef NDEBUG +__thread bool _has_mutex_here; +static inline bool _has_mutex(void) +{ + return _has_mutex_here; +} +#endif + +static void set_gs_register(char *value) +{ + if (UNLIKELY(syscall(SYS_arch_prctl, ARCH_SET_GS, (uint64_t)value) != 0)) + stm_fatalerror("syscall(arch_prctl, ARCH_SET_GS): %m"); +} + +static inline void s_mutex_lock(void) +{ + assert(!_has_mutex_here); + if (UNLIKELY(pthread_mutex_lock(&sync_ctl.global_mutex) != 0)) + stm_fatalerror("pthread_mutex_lock: %m"); + assert((_has_mutex_here = true, 1)); +} + +static inline void s_mutex_unlock(void) +{ + assert(_has_mutex_here); + if (UNLIKELY(pthread_mutex_unlock(&sync_ctl.global_mutex) != 0)) + stm_fatalerror("pthread_mutex_unlock: %m"); + assert((_has_mutex_here = false, 1)); +} + +/************************************************************/ + + +static bool acquire_thread_segment(stm_thread_local_t *tl) +{ + /* This function acquires a segment for the currently running thread, + and set up the GS register if it changed. */ + assert(_has_mutex()); + assert(_is_tl_registered(tl)); + + int num = tl->associated_segment_num; + if (sync_ctl.in_use1[num - 1] == 0) { + /* fast-path: we can get the same segment number than the one + we had before. The value stored in GS is still valid. */ +#ifdef STM_TESTS + /* that can be optimized away, except during tests, because + they use only one thread */ + set_gs_register(get_segment_base(num)); +#endif + dprintf(("acquired same segment: %d\n", num)); + goto got_num; + } + /* Look for the next free segment. If there is none, wait for + the condition variable. */ + int retries; + for (retries = 0; retries < NB_SEGMENTS; retries++) { + num = (num % NB_SEGMENTS) + 1; + if (sync_ctl.in_use1[num - 1] == 0) { + /* we're getting 'num', a different number. */ + dprintf(("acquired different segment: %d->%d\n", tl->associated_segment_num, num)); + tl->associated_segment_num = num; + set_gs_register(get_segment_base(num)); + goto got_num; + } + } + /* No segment available. Wait until release_thread_segment() + signals that one segment has been freed. */ + abort(); /* XXX */ + + /* Return false to the caller, which will call us again */ + return false; + + got_num: + sync_ctl.in_use1[num - 1] = 1; + assert(STM_SEGMENT->segment_num == num); + assert(STM_SEGMENT->running_thread == NULL); + STM_SEGMENT->running_thread = tl; + return true; +} + +static void release_thread_segment(stm_thread_local_t *tl) +{ + assert(_has_mutex()); + + assert(STM_SEGMENT->running_thread == tl); + STM_SEGMENT->running_thread = NULL; + + assert(sync_ctl.in_use1[tl->associated_segment_num - 1] == 1); + sync_ctl.in_use1[tl->associated_segment_num - 1] = 0; +} + +__attribute__((unused)) +static bool _seems_to_be_running_transaction(void) +{ + return (STM_SEGMENT->running_thread != NULL); +} + +bool _stm_in_transaction(stm_thread_local_t *tl) +{ + int num = tl->associated_segment_num; + assert(1 <= num && num <= NB_SEGMENTS); + return get_segment(num)->running_thread == tl; +} + +void _stm_test_switch(stm_thread_local_t *tl) +{ + assert(_stm_in_transaction(tl)); + set_gs_register(get_segment_base(tl->associated_segment_num)); + assert(STM_SEGMENT->running_thread == tl); +} diff --git a/c8/stm/sync.h b/c8/stm/sync.h new file mode 100644 --- /dev/null +++ b/c8/stm/sync.h @@ -0,0 +1,16 @@ +static void setup_sync(void); +static void teardown_sync(void); + + +static void s_mutex_lock(void); +static void s_mutex_unlock(void); +#ifndef NDEBUG +static bool _has_mutex(void); +#endif +static void set_gs_register(char *value); + + +/* acquire and release one of the segments for running the given thread + (must have the mutex acquired!) */ +static bool acquire_thread_segment(stm_thread_local_t *tl); +static void release_thread_segment(stm_thread_local_t *tl); diff --git a/c8/stmgc.c b/c8/stmgc.c new file mode 100644 --- /dev/null +++ b/c8/stmgc.c @@ -0,0 +1,19 @@ +#define _GNU_SOURCE 1 +#include "stmgc.h" +#include "stm/atomic.h" +#include "stm/list.h" +#include "stm/core.h" +#include "stm/nursery.h" +#include "stm/sync.h" +#include "stm/setup.h" +#include "stm/fprintcolor.h" +#include "stm/rewind_setjmp.h" + +#include "stm/list.c" +#include "stm/nursery.c" +#include "stm/core.c" +#include "stm/sync.c" +#include "stm/setup.c" +#include "stm/fprintcolor.c" +#include "stm/rewind_setjmp.c" +#include "stm/misc.c" diff --git a/c8/stmgc.h b/c8/stmgc.h new file mode 100644 --- /dev/null +++ b/c8/stmgc.h @@ -0,0 +1,162 @@ +#ifndef _STMGC_H +#define _STMGC_H + + +/* ==================== INTERNAL ==================== */ + +/* See "API" below. */ + + +#include <stddef.h> +#include <stdint.h> +#include <assert.h> +#include <limits.h> +#include <unistd.h> + +#include "stm/rewind_setjmp.h" + +#if LONG_MAX == 2147483647 +# error "Requires a 64-bit environment" +#endif + + +#define TLPREFIX __attribute__((address_space(256))) + +typedef TLPREFIX struct object_s object_t; +typedef TLPREFIX struct stm_segment_info_s stm_segment_info_t; +typedef TLPREFIX struct stm_read_marker_s stm_read_marker_t; +typedef TLPREFIX char stm_char; + +struct stm_read_marker_s { + /* In every segment, every object has a corresponding read marker. + We assume that objects are at least 16 bytes long, and use + their address divided by 16. The read marker is equal to + 'STM_SEGMENT->transaction_read_version' if and only if the + object was read in the current transaction. The nurseries + also have corresponding read markers, but they are never used. */ + uint8_t rm; +}; + +struct stm_segment_info_s { + uint8_t transaction_read_version; + int segment_num; + char *segment_base; + stm_char *nursery_current; + uintptr_t nursery_end; + struct stm_thread_local_s *running_thread; +}; +#define STM_SEGMENT ((stm_segment_info_t *)4352) + + +typedef struct stm_thread_local_s { + /* rewind_setjmp's interface */ + rewind_jmp_thread rjthread; + /* the next fields are handled internally by the library */ + int associated_segment_num; + struct stm_thread_local_s *prev, *next; + void *creating_pthread[2]; +} stm_thread_local_t; + +#define _STM_GCFLAG_WRITE_BARRIER 0x01 + + +void _stm_write_slowpath(object_t *); +object_t *_stm_allocate_slowpath(ssize_t); +#ifdef STM_TESTS +#include <stdbool.h> +bool _stm_was_read(object_t *obj); +bool _stm_was_written(object_t *obj); + +char *_stm_get_segment_base(long index); +bool _stm_in_transaction(stm_thread_local_t *tl); +void _stm_set_nursery_free_count(uint64_t free_count); +long _stm_count_modified_old_objects(void); +object_t *_stm_enum_modified_old_objects(long index); +#endif + +/* ==================== HELPERS ==================== */ +#ifdef NDEBUG +#define OPT_ASSERT(cond) do { if (!(cond)) __builtin_unreachable(); } while (0) +#else +#define OPT_ASSERT(cond) assert(cond) +#endif +#define LIKELY(x) __builtin_expect(x, 1) +#define UNLIKELY(x) __builtin_expect(x, 0) +#define IMPLY(a, b) (!(a) || (b)) + + +/* ==================== PUBLIC API ==================== */ + +/* Number of segments (i.e. how many transactions can be executed in + parallel, in maximum). If you try to start transactions in more + threads than the number of segments, it will block, waiting for the + next segment to become free. +*/ +#define STM_NB_SEGMENTS 4 + + +struct object_s { + uint32_t stm_flags; /* reserved for the STM library */ +}; + +extern ssize_t stmcb_size_rounded_up(struct object_s *); + +__attribute__((always_inline)) +static inline void stm_read(object_t *obj) +{ + ((stm_read_marker_t *)(((uintptr_t)obj) >> 4))->rm = + STM_SEGMENT->transaction_read_version; +} + +__attribute__((always_inline)) +static inline void stm_write(object_t *obj) +{ + if (UNLIKELY((obj->stm_flags & _STM_GCFLAG_WRITE_BARRIER) != 0)) + _stm_write_slowpath(obj); +} + + +__attribute__((always_inline)) +static inline object_t *stm_allocate(ssize_t size_rounded_up) +{ + OPT_ASSERT(size_rounded_up >= 16); + OPT_ASSERT((size_rounded_up & 7) == 0); + + stm_char *p = STM_SEGMENT->nursery_current; + stm_char *end = p + size_rounded_up; + STM_SEGMENT->nursery_current = end; + if (UNLIKELY((uintptr_t)end > STM_SEGMENT->nursery_end)) + return _stm_allocate_slowpath(size_rounded_up); + + return (object_t *)p; +} + + +void stm_setup(void); +void stm_teardown(void); + + +void stm_register_thread_local(stm_thread_local_t *tl); +void stm_unregister_thread_local(stm_thread_local_t *tl); + +#define stm_rewind_jmp_enterprepframe(tl, rjbuf) \ + rewind_jmp_enterprepframe(&(tl)->rjthread, rjbuf, (tl)->shadowstack) +#define stm_rewind_jmp_enterframe(tl, rjbuf) \ + rewind_jmp_enterframe(&(tl)->rjthread, rjbuf, (tl)->shadowstack) +#define stm_rewind_jmp_leaveframe(tl, rjbuf) \ + rewind_jmp_leaveframe(&(tl)->rjthread, rjbuf, (tl)->shadowstack) +#define stm_rewind_jmp_setjmp(tl) \ + rewind_jmp_setjmp(&(tl)->rjthread, (tl)->shadowstack) +#define stm_rewind_jmp_longjmp(tl) \ + rewind_jmp_longjmp(&(tl)->rjthread) +#define stm_rewind_jmp_forget(tl) \ + rewind_jmp_forget(&(tl)->rjthread) + + +long stm_start_transaction(stm_thread_local_t *tl); +void stm_commit_transaction(void); + + +/* ==================== END ==================== */ + +#endif diff --git a/c8/test/common.py b/c8/test/common.py new file mode 100644 --- /dev/null +++ b/c8/test/common.py @@ -0,0 +1,27 @@ +import os +import sys +assert sys.maxint == 9223372036854775807, "requires a 64-bit environment" + +# ---------- +os.environ['CC'] = 'clang' + +parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + +# ---------- + +source_files = [os.path.join(parent_dir, "stmgc.c")] +all_files = [os.path.join(parent_dir, "stmgc.h"), + os.path.join(parent_dir, "stmgc.c")] + [ + os.path.join(parent_dir, 'stm', _n) + for _n in os.listdir(os.path.join(parent_dir, 'stm')) + if (_n.endswith('.h') or _n.endswith('.c')) and not _n.startswith('.')] + +_pycache_ = os.path.join(parent_dir, 'test', '__pycache__') +if os.path.exists(_pycache_): + _fs = [_f for _f in os.listdir(_pycache_) if _f.startswith('_cffi_')] + if _fs: + _fsmtime = min(os.stat(os.path.join(_pycache_, _f)).st_mtime + for _f in _fs) + if any(os.stat(src).st_mtime >= _fsmtime for src in all_files): + import shutil + shutil.rmtree(_pycache_) diff --git a/c8/test/support.py b/c8/test/support.py new file mode 100644 --- /dev/null +++ b/c8/test/support.py @@ -0,0 +1,463 @@ +import os +import cffi, weakref +from common import parent_dir, source_files + +# ---------- + +ffi = cffi.FFI() +ffi.cdef(""" +typedef ... object_t; +#define SIZEOF_MYOBJ ... +#define STM_NB_SEGMENTS ... +#define _STM_GCFLAG_WRITE_BARRIER ... + +typedef struct { +...; +} rewind_jmp_thread; + +typedef struct { + rewind_jmp_thread rjthread; + int associated_segment_num; + struct stm_thread_local_s *prev, *next; + void *creating_pthread[2]; + ...; +} stm_thread_local_t; + +void stm_read(object_t *obj); +/*void stm_write(object_t *obj); use _checked_stm_write() instead */ +object_t *stm_allocate(ssize_t size_rounded_up); + +void stm_setup(void); +void stm_teardown(void); +void stm_register_thread_local(stm_thread_local_t *tl); +void stm_unregister_thread_local(stm_thread_local_t *tl); + +bool _checked_stm_write(object_t *obj); +bool _stm_was_read(object_t *obj); +bool _stm_was_written(object_t *obj); +char *_stm_get_segment_base(long index); +bool _stm_in_transaction(stm_thread_local_t *tl); _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit