Author: Armin Rigo <ar...@tunes.org> Branch: stm Changeset: r47621:29beb9a43433 Date: 2011-09-27 13:28 +0200 http://bitbucket.org/pypy/pypy/changeset/29beb9a43433/
Log: Import from arigo/hack/stm/c. diff --git a/pypy/translator/stm/__init__.py b/pypy/translator/stm/__init__.py new file mode 100644 diff --git a/pypy/translator/stm/_rffi_stm.py b/pypy/translator/stm/_rffi_stm.py new file mode 100644 --- /dev/null +++ b/pypy/translator/stm/_rffi_stm.py @@ -0,0 +1,40 @@ +import py +import os +from pypy.tool.autopath import pypydir +from pypy.rpython.lltypesystem import lltype, rffi +from pypy.translator.tool.cbuild import ExternalCompilationInfo + + +cdir = py.path.local(pypydir) / 'translator' / 'stm' + + +eci = ExternalCompilationInfo( + include_dirs = [cdir], + includes = ['src_stm/et.h'], + post_include_bits = [ + r'''#define stm_begin_transaction_inline() ; \ + jmp_buf _jmpbuf; \ + setjmp(_jmpbuf); \ + stm_begin_transaction(&_jmpbuf); + '''], + separate_module_sources = ['#include "src_stm/et.c"\n'], +) + +def llexternal(name, args, result, **kwds): + return rffi.llexternal(name, args, result, compilation_info=eci, + _nowrapper=True, **kwds) + + +descriptor_init = llexternal('stm_descriptor_init', [], lltype.Void) +descriptor_done = llexternal('stm_descriptor_done', [], lltype.Void) + +begin_transaction = llexternal('stm_begin_transaction_inline',[], lltype.Void) +commit_transaction = llexternal('stm_commit_transaction', [], lltype.Signed) + +read_word = llexternal('stm_read_word', [rffi.VOIDPP], rffi.VOIDP) +write_word = llexternal('stm_write_word', [rffi.VOIDPP, rffi.VOIDP], + lltype.Void) + +CALLBACK = lltype.Ptr(lltype.FuncType([rffi.VOIDP], rffi.VOIDP)) +perform_transaction = llexternal('stm_perform_transaction', + [CALLBACK, rffi.VOIDP], rffi.VOIDP) diff --git a/pypy/translator/stm/src_stm/atomic_ops.h b/pypy/translator/stm/src_stm/atomic_ops.h new file mode 100644 --- /dev/null +++ b/pypy/translator/stm/src_stm/atomic_ops.h @@ -0,0 +1,45 @@ + + +/* "compiler fence" for preventing reordering of loads/stores to + non-volatiles */ +#define CFENCE asm volatile ("":::"memory") + + +#ifdef __llvm__ +# define HAS_SYNC_BOOL_COMPARE_AND_SWAP +#endif + +#ifdef __GNUC__ +# if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) +# define HAS_SYNC_BOOL_COMPARE_AND_SWAP +# endif +#endif + + +#ifdef HAS_SYNC_BOOL_COMPARE_AND_SWAP +# define bool_cas __sync_bool_compare_and_swap +#else +/* x86 (32 bits and 64 bits) */ +static inline _Bool +bool_cas(volatile unsigned long* ptr, unsigned long old, unsigned long _new) +{ + unsigned long prev; + asm volatile("lock;" +#if defined(__amd64__) + "cmpxchgq %1, %2;" +#else + "cmpxchgl %1, %2;" +#endif + : "=a"(prev) + : "q"(_new), "m"(*ptr), "a"(old) + : "memory"); + return prev == old; +} +/* end */ +#endif + + +static inline void spinloop(void) +{ + asm volatile ("pause"); +} diff --git a/pypy/translator/stm/src_stm/et.c b/pypy/translator/stm/src_stm/et.c new file mode 100644 --- /dev/null +++ b/pypy/translator/stm/src_stm/et.c @@ -0,0 +1,745 @@ +/* -*- c-basic-offset: 2 -*- */ + +/* XXX assumes that time never wraps around (in a 'long'), which may be + * correct on 64-bit machines but not on 32-bit machines if the process + * runs for long enough. + * + * XXX measure the overhead of the global_timestamp + */ +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include <string.h> + +#define USE_PTHREAD_MUTEX /* optional */ +#ifdef USE_PTHREAD_MUTEX +# include <pthread.h> +#endif + +#include "src_stm/et.h" +#include "src_stm/atomic_ops.h" + +/************************************************************/ + +#define IS_LOCKED(num) ((num) < 0) +#define IS_LOCKED_OR_NEWER(num, max_age) \ + (((unsigned long)(num)) > ((unsigned long)(max_age))) +typedef long owner_version_t; + +typedef struct { + owner_version_t v; // the current version number +} orec_t; + +/*** Specify the number of orecs in the global array. */ +#define NUM_STRIPES 1048576 + +/*** declare the table of orecs */ +static char orecs[NUM_STRIPES * sizeof(orec_t)]; + +/*** map addresses to orec table entries */ +inline static volatile orec_t* get_orec(void* addr) +{ + unsigned long index = (unsigned long)addr; + char *p = orecs + (index & ((NUM_STRIPES-1) * sizeof(orec_t))); + return (volatile orec_t *)p; +} + +#include "src_stm/lists.c" + +/************************************************************/ + +/* Uncomment the line to try this extra code. Doesn't work reliably so far */ +/*#define COMMIT_OTHER_INEV*/ + +#define ABORT_REASONS 7 +#define SPINLOOP_REASONS 10 +#define OTHERINEV_REASONS 5 + +struct tx_descriptor { + jmp_buf *setjmp_buf; + owner_version_t start_time; + owner_version_t end_time; + unsigned long last_known_global_timestamp; + struct OrecList reads; + unsigned num_commits; + unsigned num_aborts[ABORT_REASONS]; + unsigned num_spinloops[SPINLOOP_REASONS]; +#ifdef COMMIT_OTHER_INEV + unsigned num_otherinev[OTHERINEV_REASONS]; +#endif + unsigned int spinloop_counter; + owner_version_t my_lock_word; + struct RedoLog redolog; /* last item, because it's the biggest one */ +}; + +/* global_timestamp contains in its lowest bit a flag equal to 1 + if there is an inevitable transaction running */ +static volatile unsigned long global_timestamp = 2; +static __thread struct tx_descriptor *thread_descriptor; +#ifdef COMMIT_OTHER_INEV +static struct tx_descriptor *volatile thread_descriptor_inev; +static volatile unsigned long d_inev_checking = 0; +#endif + +/************************************************************/ + +static unsigned long get_global_timestamp(struct tx_descriptor *d) +{ + return (d->last_known_global_timestamp = global_timestamp); +} + +static _Bool change_global_timestamp(struct tx_descriptor *d, + unsigned long old, + unsigned long new) +{ + if (bool_cas(&global_timestamp, old, new)) + { + d->last_known_global_timestamp = new; + return 1; + } + return 0; +} + +static void set_global_timestamp(struct tx_descriptor *d, unsigned long new) +{ + global_timestamp = new; + d->last_known_global_timestamp = new; +} + +static void tx_abort(int); + +static void tx_spinloop(int num) +{ + unsigned int c; + int i; + struct tx_descriptor *d = thread_descriptor; + d->num_spinloops[num]++; + + //printf("tx_spinloop(%d)\n", num); + + c = d->spinloop_counter; + d->spinloop_counter = c * 9; + i = c & 0xff0000; + while (i >= 0) { + spinloop(); + i -= 0x10000; + } +} + +static _Bool is_inevitable(struct tx_descriptor *d) +{ + return d->setjmp_buf == NULL; +} + +/*** run the redo log to commit a transaction, and release the locks */ +static void tx_redo(struct tx_descriptor *d) +{ + owner_version_t newver = d->end_time; + wlog_t *item; + /* loop in "forward" order: in this order, if there are duplicate orecs + then only the last one has p != -1. */ + REDOLOG_LOOP_FORWARD(d->redolog, item) + { + *item->addr = item->val; + /* but we must only unlock the orec if it's the last time it + appears in the redolog list. If it's not, then p == -1. */ + if (item->p != -1) + { + volatile orec_t* o = get_orec(item->addr); + CFENCE; + o->v = newver; + } + } REDOLOG_LOOP_END; +} + +/*** on abort, release locks and restore the old version number. */ +static void releaseAndRevertLocks(struct tx_descriptor *d) +{ + wlog_t *item; + REDOLOG_LOOP_FORWARD(d->redolog, item) + { + if (item->p != -1) + { + volatile orec_t* o = get_orec(item->addr); + o->v = item->p; + } + } REDOLOG_LOOP_END; +} + +/*** release locks and restore the old version number, ready to retry later */ +static void releaseLocksForRetry(struct tx_descriptor *d) +{ + wlog_t *item; + REDOLOG_LOOP_FORWARD(d->redolog, item) + { + if (item->p != -1) + { + volatile orec_t* o = get_orec(item->addr); + o->v = item->p; + item->p = -1; + } + } REDOLOG_LOOP_END; +} + +/*** lock all locations */ +static void acquireLocks(struct tx_descriptor *d) +{ + wlog_t *item; + // try to lock every location in the write set + REDOLOG_LOOP_BACKWARD(d->redolog, item) + { + // get orec, read its version# + volatile orec_t* o = get_orec(item->addr); + owner_version_t ovt; + + retry: + ovt = o->v; + + // if orec not locked, lock it + // + // NB: if ovt > start time, we may introduce inconsistent + // reads. Since most writes are also reads, we'll just abort under this + // condition. This can introduce false conflicts + if (!IS_LOCKED_OR_NEWER(ovt, d->start_time)) { + if (!bool_cas(&o->v, ovt, d->my_lock_word)) + goto retry; + // save old version to item->p. Now we hold the lock. + // in case of duplicate orecs, only the last one has p != -1. + item->p = ovt; + } + // else if the location is too recent... + else if (!IS_LOCKED(ovt)) + tx_abort(0); + // else it is locked: if we don't hold the lock... + else if (ovt != d->my_lock_word) { + // we can either abort or spinloop. Because we are at the end of + // the transaction we might try to spinloop, even though after the + // lock is released the ovt will be very recent, possibly + // > d->start_time. It is necessary to spinloop in case we are + // inevitable, so use that as a criteria. Another solution to avoid + // deadlocks would be to sort the order in which we take the locks. + if (is_inevitable(d)) + tx_spinloop(8); + else + tx_abort(6); + goto retry; + } + } REDOLOG_LOOP_END; +} + +static void common_cleanup(struct tx_descriptor *d) +{ + d->reads.size = 0; + redolog_clear(&d->redolog); +} + +static void tx_cleanup(struct tx_descriptor *d) +{ + // release the locks and restore version numbers + releaseAndRevertLocks(d); + // reset all lists + common_cleanup(d); +} + +static void tx_restart(struct tx_descriptor *d) +{ + tx_cleanup(d); + tx_spinloop(0); + longjmp(*d->setjmp_buf, 1); +} + +/*** increase the abort count and restart the transaction */ +static void tx_abort(int reason) +{ + struct tx_descriptor *d = thread_descriptor; + assert(!is_inevitable(d)); + d->num_aborts[reason]++; + tx_restart(d); +} + +/** + * fast-path validation, assuming that I don't hold locks. + */ +static void validate_fast(struct tx_descriptor *d, int lognum) +{ + int i; + owner_version_t ovt; + assert(!is_inevitable(d)); + for (i=0; i<d->reads.size; i++) + { + retry: + ovt = d->reads.items[i]->v; + if (IS_LOCKED_OR_NEWER(ovt, d->start_time)) + { + // If locked, we wait until it becomes unlocked. The chances are + // that it will then have a very recent start_time, likely + // > d->start_time, but it might still be better than always aborting + if (IS_LOCKED(ovt)) + { + tx_spinloop(lognum); /* tx_spinloop(1), tx_spinloop(2), + tx_spinloop(3) */ + goto retry; + } + else + // abort if the timestamp is newer than my start time. + tx_abort(lognum); /* tx_abort(1), tx_abort(2), tx_abort(3) */ + } + } +} + +/** + * validate the read set by making sure that all orecs that we've read have + * timestamps at least as old as our start time, unless we locked those orecs. + */ +static void validate(struct tx_descriptor *d) +{ + int i; + owner_version_t ovt; + assert(!is_inevitable(d)); + for (i=0; i<d->reads.size; i++) + { + ovt = d->reads.items[i]->v; // read this orec + if (IS_LOCKED_OR_NEWER(ovt, d->start_time)) + { + if (!IS_LOCKED(ovt)) + // if unlocked and newer than start time, abort + tx_abort(4); + else + { + // if locked and not by me, abort + if (ovt != d->my_lock_word) + tx_abort(5); + } + } + } +} + +#ifdef USE_PTHREAD_MUTEX +/* mutex: only to avoid busy-looping too much in tx_spinloop() below */ +static pthread_mutex_t mutex_inevitable = PTHREAD_MUTEX_INITIALIZER; +#endif + +#ifdef COMMIT_OTHER_INEV +unsigned long can_commit_with_other_inevitable(struct tx_descriptor *d, + unsigned long expected) +{ + int i; + owner_version_t ovt; + unsigned long result = 0; + struct tx_descriptor *d_inev; + + // 'd_inev_checking' is 1 or 2 when an inevitable transaction is running + // and didn't start committing yet; otherwise it is 0. It is normally 1 + // except in this function. + if (!bool_cas(&d_inev_checking, 1, 2)) + { + d->num_otherinev[4]++; + return 0; + } + + // optimization only: did the inevitable thread 'd_inev' read any data + // that we are about to commit? If we are sure that the answer is + // negative, then commit anyway, because it cannot make the inevitable + // thread fail. We can safely check an approximation of this, because + // we hold a lock on all orecs that we would like to write. So if all + // orecs read by d_inev are not locked now, then no conflict. This + // function is allowed to "fail" and give up rather than spinloop + // waiting for a condition to be true, which is potentially dangerous + // here, because we acquired all the locks. + + // Note that if the inevitable thread itself adds in parallel an extra + // orec to d_inev->reads, *and* if this new orec is locked, then we + // will miss it here; but the d_inev thread will spinloop waiting for + // us to be done. So even if we commit, the d_inev thread will just + // wait and load the new committed value. + + // while we are in this function, the d_inev thread is prevented from + // going too far with the commitTransaction() code because d_inev_checking + // is greater than 1; it will just tx_spinloop(9). (And of course it + // cannot abort.) + + d_inev = thread_descriptor_inev; + if (!bool_cas(&d_inev->reads.locked, 0, 1)) + { + d->num_otherinev[1]++; + goto give_up_1; + } + + for (i=d_inev->reads.size; i--; ) + { + ovt = d_inev->reads.items[i]->v; // read this orec + if (ovt == d->my_lock_word) + { + d->num_otherinev[2]++; + goto give_up_2; + } + } + assert(expected & 1); + if (!change_global_timestamp(d, expected, expected + 2)) + { + d->num_otherinev[3]++; + goto give_up_2; + } + + /* success: scale d_inet forward */ + d->num_otherinev[0]++; + result = expected + 1; + assert(d_inev->start_time == result - 2); + d_inev->start_time = result; + CFENCE; + + give_up_2: + d_inev->reads.locked = 0; + + give_up_1: + d_inev_checking = 1; + return result; +} +#endif + +void wait_end_inevitability(struct tx_descriptor *d) +{ + unsigned long curts; + releaseLocksForRetry(d); + + // We are going to wait until the other inevitable transaction + // finishes. XXX We could do better here: we could check if + // committing 'd' would create a conflict for the other inevitable + // thread 'd_inev' or not. It requires peeking in 'd_inev' from this + // thread (which we never do so far) in order to do something like + // 'validate_fast(d_inev); d_inev->start_time = updated;' + + while ((curts = get_global_timestamp(d)) & 1) + { + // while we're about to wait anyway, we can do a validate_fast + if (d->start_time < curts - 1) + { + validate_fast(d, 3); + d->start_time = curts - 1; + } + tx_spinloop(4); +#ifdef USE_PTHREAD_MUTEX + pthread_mutex_lock(&mutex_inevitable); + pthread_mutex_unlock(&mutex_inevitable); +#endif + } + acquireLocks(d); +} + +void commitInevitableTransaction(struct tx_descriptor *d) +{ + unsigned long ts; + _Bool ok; + +#ifdef COMMIT_OTHER_INEV + // reset d_inev_checking back from 1 to 0 + while (!bool_cas(&d_inev_checking, 1, 0)) + tx_spinloop(9); +#endif + // no-one else can modify global_timestamp if I'm inevitable + // and d_inev_checking is 0 + ts = get_global_timestamp(d); + assert(ts & 1); + set_global_timestamp(d, ts + 1); + d->end_time = ts + 1; + assert(d->end_time == (d->start_time + 2)); + + // run the redo log, and release the locks + tx_redo(d); + +#ifdef USE_PTHREAD_MUTEX + pthread_mutex_unlock(&mutex_inevitable); +#endif +} + +/* lazy/lazy read instrumentation */ +void* stm_read_word(void** addr) +{ + struct tx_descriptor *d = thread_descriptor; + + // check writeset first + wlog_t* found; + REDOLOG_FIND(d->redolog, addr, found, goto not_found); + return found->val; + + not_found:; + // get the orec addr + volatile orec_t* o = get_orec((void*)addr); + owner_version_t ovt; + +#ifdef COMMIT_OTHER_INEV + // log orec BEFORE we spinloop waiting for the orec lock to be released, + // for can_commit_with_other_inevitable() + oreclist_insert(&d->reads, (orec_t*)o); +#endif + + retry: + // read the orec BEFORE we read anything else + ovt = o->v; + CFENCE; + + // this tx doesn't hold any locks, so if the lock for this addr is held, + // there is contention. A lock is never hold for too long, so spinloop + // until it is released. + if (IS_LOCKED_OR_NEWER(ovt, d->start_time)) + { + if (IS_LOCKED(ovt)) { + tx_spinloop(7); + goto retry; + } + // else this location is too new, scale forward + owner_version_t newts = get_global_timestamp(d) & ~1; +#ifdef COMMIT_OTHER_INEV + d->reads.size--; // ignore the newly logged orec +#endif + validate_fast(d, 1); +#ifdef COMMIT_OTHER_INEV + d->reads.size++; +#endif + d->start_time = newts; + } + + // orec is unlocked, with ts <= start_time. read the location + void* tmp = *addr; + + // postvalidate AFTER reading addr: + CFENCE; + if (o->v != ovt) + goto retry; /* oups, try again */ + +#ifndef COMMIT_OTHER_INEV + oreclist_insert(&d->reads, (orec_t*)o); +#endif + + return tmp; +} + +void stm_write_word(void** addr, void* val) +{ + struct tx_descriptor *d = thread_descriptor; + redolog_insert(&d->redolog, addr, val); +} + + +void stm_descriptor_init(void) +{ + struct tx_descriptor *d = malloc(sizeof(struct tx_descriptor)); + memset(d, 0, sizeof(struct tx_descriptor)); + + /* initialize 'my_lock_word' to be a unique negative number */ + d->my_lock_word = (owner_version_t)d; + if (!IS_LOCKED(d->my_lock_word)) + d->my_lock_word = ~d->my_lock_word; + assert(IS_LOCKED(d->my_lock_word)); + d->spinloop_counter = (unsigned int)(d->my_lock_word | 1); + + thread_descriptor = d; +} + +void stm_descriptor_done(void) +{ + struct tx_descriptor *d = thread_descriptor; + thread_descriptor = NULL; + + int num_aborts = 0, num_spinloops = 0; + int i, prevchar; + for (i=0; i<ABORT_REASONS; i++) + num_aborts += d->num_aborts[i]; + for (i=0; i<SPINLOOP_REASONS; i++) + num_spinloops += d->num_spinloops[i]; + + fprintf(stderr, "thread %lx: %d commits, %d aborts ", + d->my_lock_word, + d->num_commits, + num_aborts); + + for (i=0; i<ABORT_REASONS; i++) + fprintf(stderr, "%c%d", i == 0 ? '[' : ',', + d->num_aborts[i]); + + for (i=1; i<SPINLOOP_REASONS; i++) /* num_spinloops[0] == num_aborts */ + fprintf(stderr, "%c%d", i == 1 ? '|' : ',', + d->num_spinloops[i]); + +#ifdef COMMIT_OTHER_INEV + for (i=0; i<OTHERINEV_REASONS; i++) + fprintf(stderr, "%c%d", i == 0 ? '|' : ',', + d->num_otherinev[i]); +#endif + + fprintf(stderr, "]\n"); + free(d); +} + +void* stm_perform_transaction(void*(*callback)(void*), void *arg) +{ + void *result; + jmp_buf jmpbuf; + stm_begin_transaction(&jmpbuf); + result = callback(arg); + stm_commit_transaction(); + return result; +} + +void stm_begin_transaction(jmp_buf* buf) +{ + struct tx_descriptor *d = thread_descriptor; + d->setjmp_buf = buf; + d->start_time = d->last_known_global_timestamp & ~1; +} + +long stm_commit_transaction(void) +{ + struct tx_descriptor *d = thread_descriptor; + + // if I don't have writes, I'm committed + if (!redolog_any_entry(&d->redolog)) + { + if (is_inevitable(d)) + { + unsigned long ts = get_global_timestamp(d); + assert(ts & 1); + set_global_timestamp(d, ts - 1); +#ifdef USE_PTHREAD_MUTEX + pthread_mutex_unlock(&mutex_inevitable); +#endif + } + d->num_commits++; + common_cleanup(d); + return d->start_time; + } + + // bring that variable over to this CPU core (optimization, maybe) + global_timestamp; + + // acquire locks + acquireLocks(d); + + if (is_inevitable(d)) + { + commitInevitableTransaction(d); + } + else + { + while (1) + { + unsigned long expected = get_global_timestamp(d); + if (expected & 1) + { +#ifdef COMMIT_OTHER_INEV + // there is another inevitable transaction running. + expected = can_commit_with_other_inevitable(d, expected); + if (expected != 0) + { + d->end_time = expected; + break; + } +#endif + // wait until it is done. hopefully we can then proceed + // without conflicts. + wait_end_inevitability(d); + continue; + } + if (change_global_timestamp(d, expected, expected + 2)) + { + d->end_time = expected + 2; + break; + } + } + + // validate (but skip validation if nobody else committed) + if (d->end_time != (d->start_time + 2)) + validate(d); + + // run the redo log, and release the locks + tx_redo(d); + } + + // remember that this was a commit + d->num_commits++; + + // reset all lists + common_cleanup(d); + return d->end_time; +} + +void stm_try_inevitable(void) +{ + /* when a transaction is inevitable, its start_time is equal to + global_timestamp and global_timestamp cannot be incremented + by another thread. We set the lowest bit in global_timestamp + to 1. */ + struct tx_descriptor *d = thread_descriptor; + + if (is_inevitable(d)) + return; /* I am already inevitable */ + + while (1) + { + unsigned long curtime = get_global_timestamp(d); + if (d->start_time != (curtime & ~1)) + { /* scale forward */ + validate_fast(d, 2); + d->start_time = curtime & ~1; + } +#ifdef USE_PTHREAD_MUTEX + pthread_mutex_lock(&mutex_inevitable); +#endif + if (curtime & 1) /* there is, or was, already an inevitable thread */ + { + /* should we spinloop here, or abort (and likely come back + in try_inevitable() very soon)? unclear. For now + let's try to spinloop, after the waiting done by + acquiring the mutex */ +#ifdef USE_PTHREAD_MUTEX + pthread_mutex_unlock(&mutex_inevitable); +#endif + tx_spinloop(6); + continue; + } + if (change_global_timestamp(d, curtime, curtime + 1)) + break; +#ifdef USE_PTHREAD_MUTEX + pthread_mutex_unlock(&mutex_inevitable); +#endif + } + d->setjmp_buf = NULL; /* inevitable from now on */ +#ifdef COMMIT_OTHER_INEV + thread_descriptor_inev = d; + CFENCE; + d_inev_checking = 1; +#endif +} + +void stm_begin_inevitable_transaction(void) +{ + struct tx_descriptor *d = thread_descriptor; + unsigned long curtime; + + retry: +#ifdef USE_PTHREAD_MUTEX + pthread_mutex_lock(&mutex_inevitable); /* possibly waiting here */ +#endif + + while (1) + { + curtime = global_timestamp; + if (curtime & 1) + { +#ifdef USE_PTHREAD_MUTEX + pthread_mutex_unlock(&mutex_inevitable); +#endif + tx_spinloop(5); + goto retry; + } + if (bool_cas(&global_timestamp, curtime, curtime + 1)) + break; + } + d->setjmp_buf = NULL; + d->start_time = curtime; +#ifdef COMMIT_OTHER_INEV + thread_descriptor_inev = d; + CFENCE; + d_inev_checking = 1; +#endif +} diff --git a/pypy/translator/stm/src_stm/et.h b/pypy/translator/stm/src_stm/et.h new file mode 100644 --- /dev/null +++ b/pypy/translator/stm/src_stm/et.h @@ -0,0 +1,25 @@ +/*** Extendable Timestamps + * + * This is a C version of rstm_r5/stm/et.hpp. + * See http://www.cs.rochester.edu/research/synchronization/rstm/api.shtml + * + */ + +#ifndef _ET_H +#define _ET_H + +#include <setjmp.h> + + +void stm_descriptor_init(void); +void stm_descriptor_done(void); +void* stm_perform_transaction(void*(*)(void*), void*); +void stm_begin_transaction(jmp_buf* buf); +long stm_commit_transaction(void); +void* stm_read_word(void** addr); +void stm_write_word(void** addr, void* val); +void stm_try_inevitable(void); +void stm_begin_inevitable_transaction(void); + + +#endif /* _ET_H */ diff --git a/pypy/translator/stm/src_stm/lists.c b/pypy/translator/stm/src_stm/lists.c new file mode 100644 --- /dev/null +++ b/pypy/translator/stm/src_stm/lists.c @@ -0,0 +1,241 @@ +/* -*- c-basic-offset: 2 -*- */ + +#include <limits.h> + +/************************************************************/ + +/* The redolog_xx functions are implemented as a tree, supporting + very high performance in REDOLOG_FIND in the common case where + there are no or few elements in the tree, but scaling correctly + if the number of items becomes large. */ + +#define TREE_BITS 4 +#define TREE_ARITY (1 << TREE_BITS) + +#define TREE_DEPTH_MAX ((sizeof(void*)*8 - 2 + TREE_BITS-1) / TREE_BITS) +/* sizeof(void*) = total number of bits + 2 = bits that we ignore anyway (2 or 3, conservatively 2) + (x + TREE_BITS-1) / TREE_BITS = divide by TREE_BITS, rounding up +*/ + +#define TREE_MASK ((TREE_ARITY - 1) * sizeof(void*)) + +typedef struct { + void** addr; + void* val; + owner_version_t p; // the previous version number (if locked) +} wlog_t; + +typedef struct { + char *items[TREE_ARITY]; +} wlog_node_t; + +struct RedoLog { + char *raw_start, *raw_current, *raw_end; + wlog_node_t toplevel; +}; + +static void _redolog_clear_node(wlog_node_t *node) +{ + memset(node, 0, sizeof(wlog_node_t)); +} + +static void redolog_clear(struct RedoLog *redolog) +{ + if (redolog->raw_current != redolog->raw_start) + { + _redolog_clear_node(&redolog->toplevel); + redolog->raw_current = redolog->raw_start; + } +} + +static int redolog_any_entry(struct RedoLog *redolog) +{ + return redolog->raw_current != redolog->raw_start; +} + +#define _REDOLOG_LOOP(redolog, item, INITIAL, _PLUS_) \ +{ \ + struct { char **next; char **end; } _stack[TREE_DEPTH_MAX], *_stackp; \ + char **_next, **_end, *_entry; \ + /* initialization */ \ + _stackp = _stack; /* empty stack */ \ + _next = (redolog).toplevel.items + INITIAL; \ + _end = _next _PLUS_ TREE_ARITY; \ + /* loop */ \ + while (1) \ + { \ + if (_next == _end) \ + { \ + if (_stackp == _stack) \ + break; /* done */ \ + /* finished with this level, go to the next one */ \ + _stackp--; \ + _next = _stackp->next; \ + _end = _stackp->end; \ + continue; \ + } \ + _entry = *_next; \ + _next = _next _PLUS_ 1; \ + if (_entry == NULL) /* empty entry */ \ + continue; \ + if (((long)_entry) & 1) \ + { /* points to a further level: enter it */ \ + _stackp->next = _next; \ + _stackp->end = _end; \ + _stackp++; \ + _next = ((wlog_node_t *)(_entry - 1))->items + INITIAL; \ + _end = _next _PLUS_ TREE_ARITY; \ + continue; \ + } \ + /* points to a wlog_t item */ \ + item = (wlog_t *)_entry; + +#define REDOLOG_LOOP_FORWARD(redolog, item) \ + _REDOLOG_LOOP(redolog, item, 0, +) +#define REDOLOG_LOOP_BACKWARD(redolog, item) \ + _REDOLOG_LOOP(redolog, item, (TREE_ARITY-1), -) +#define REDOLOG_LOOP_END } } + +#define REDOLOG_FIND(redolog, addr1, result, goto_not_found) \ +{ \ + unsigned long _key = (unsigned long)(addr1); \ + char *_p = (char *)((redolog).toplevel.items); \ + char *_entry = *(char **)(_p + (_key & TREE_MASK)); \ + if (_entry == NULL) \ + goto_not_found; /* common case, hopefully */ \ + result = _redolog_find(_entry, addr1); \ + if (result == NULL || result->addr != (addr1)) \ + goto_not_found; \ +} + +static wlog_t *_redolog_find(char *entry, void** addr) +{ + unsigned long key = (unsigned long)addr; + while (((long)entry) & 1) + { /* points to a further level */ + key >>= TREE_BITS; + entry = *(char **)((entry - 1) + (key & TREE_MASK)); + } + return (wlog_t *)entry; /* may be NULL */ +} + +static void redolog_insert(struct RedoLog *redolog, void** addr, void* val); + +static void _redolog_grow(struct RedoLog *redolog, long extra) +{ + struct RedoLog newredolog; + wlog_t *item, *newitem; + long alloc = redolog->raw_end - redolog->raw_start; + long newalloc = (alloc + extra + (alloc >> 2) + 31) & ~15; + //fprintf(stderr, "growth: %ld\n", newalloc); + char *newitems = malloc(newalloc); + newredolog.raw_start = newitems; + newredolog.raw_current = newitems; + newredolog.raw_end = newitems + newalloc; + _redolog_clear_node(&newredolog.toplevel); + REDOLOG_LOOP_FORWARD(*redolog, item) + { + assert(item->p == -1); + redolog_insert(&newredolog, item->addr, item->val); + } REDOLOG_LOOP_END; + free(redolog->raw_start); + *redolog = newredolog; +} + +static char *_redolog_grab(struct RedoLog *redolog, long size) +{ + char *result; + result = redolog->raw_current; + redolog->raw_current += size; + if (redolog->raw_current > redolog->raw_end) + { + _redolog_grow(redolog, size); + return NULL; + } + return result; +} + +static void redolog_insert(struct RedoLog *redolog, void** addr, void* val) +{ + retry:; + wlog_t *wlog; + unsigned long key = (unsigned long)addr; + int shift = 0; + char *p = (char *)(redolog->toplevel.items); + char *entry; + while (1) + { + p += (key >> shift) & TREE_MASK; + shift += TREE_BITS; + entry = *(char **)p; + if (entry == NULL) + break; + else if (((long)entry) & 1) + { /* points to a further level */ + p = entry - 1; + } + else + { + wlog_t *wlog1 = (wlog_t *)entry; + if (wlog1->addr == addr) + { + /* overwrite and that's it */ + wlog1->val = val; + return; + } + /* collision: there is already a different wlog here */ + wlog_node_t *node = (wlog_node_t *) + _redolog_grab(redolog, sizeof(wlog_node_t)); + if (node == NULL) goto retry; + _redolog_clear_node(node); + unsigned long key1 = (unsigned long)(wlog1->addr); + char *p1 = (char *)(node->items); + *(wlog_t **)(p1 + ((key1 >> shift) & TREE_MASK)) = wlog1; + *(char **)p = ((char *)node) + 1; + p = p1; + } + } + wlog = (wlog_t *)_redolog_grab(redolog, sizeof(wlog_t)); + if (wlog == NULL) goto retry; + wlog->addr = addr; + wlog->val = val; + wlog->p = -1; + *(char **)p = (char *)wlog; +} + +/************************************************************/ + +/* The oreclist_xx functions are implemented as an array that grows + as needed. */ + +struct OrecList { + long size, alloc; + unsigned long locked; + orec_t **items; +}; + +static void _oreclist_grow(struct OrecList *oreclist) +{ + long newalloc = oreclist->alloc + (oreclist->alloc >> 1) + 16; + orec_t **newitems = malloc(newalloc * sizeof(orec_t *)); + long i; + for (i=0; i<oreclist->size; i++) + newitems[i] = oreclist->items[i]; + while (!bool_cas(&oreclist->locked, 0, 1)) + /* rare case */ ; + free(oreclist->items); + oreclist->items = newitems; + oreclist->alloc = newalloc; + CFENCE; + oreclist->locked = 0; +} + +static void oreclist_insert(struct OrecList *oreclist, orec_t *newitem) +{ + if (oreclist->size == oreclist->alloc) + _oreclist_grow(oreclist); + oreclist->items[oreclist->size++] = newitem; +} + +/************************************************************/ diff --git a/pypy/translator/stm/test/__init__.py b/pypy/translator/stm/test/__init__.py new file mode 100644 diff --git a/pypy/translator/stm/test/test_basic.py b/pypy/translator/stm/test/test_basic.py new file mode 100644 --- /dev/null +++ b/pypy/translator/stm/test/test_basic.py @@ -0,0 +1,2 @@ + + diff --git a/pypy/translator/stm/test/test_rffi_stm.py b/pypy/translator/stm/test/test_rffi_stm.py new file mode 100644 --- /dev/null +++ b/pypy/translator/stm/test/test_rffi_stm.py @@ -0,0 +1,14 @@ +from pypy.translator.stm._rffi_stm import * +from pypy.rpython.annlowlevel import llhelper + +def test_descriptor(): + descriptor_init() + descriptor_done() + +def test_perform_transaction(): + def callback1(x): + return lltype.nullptr(rffi.VOIDP.TO) + descriptor_init() + perform_transaction(llhelper(CALLBACK, callback1), + lltype.nullptr(rffi.VOIDP.TO)) + descriptor_done() _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit