Author: Armin Rigo <ar...@tunes.org> Branch: hashtable Changeset: r1479:04bcef9511f9 Date: 2014-10-18 17:09 +0200 http://bitbucket.org/pypy/stmgc/changeset/04bcef9511f9/
Log: Import initial untested logic diff --git a/c7/stm/hashtable.c b/c7/stm/hashtable.c new file mode 100644 --- /dev/null +++ b/c7/stm/hashtable.c @@ -0,0 +1,276 @@ +#include "stmgc.h" + +/* +Design of stmgc's "hashtable" objects +===================================== + +A "hashtable" is theoretically a lazily-filled array of objects of +length 2**64. Initially it is full of NULLs. It's obviously +implemented as a dictionary in which NULL objects are not needed. + +The only operations on a hashtable are reading or writing an object at +a given index. + +There are two markers for every index (a read and a write marker). +This is unlike regular arrays, which have only two markers in total. + + +Implementation +-------------- + +First idea: have the hashtable in raw memory, pointing to "entry" +objects. The entry objects themselves point to the user-specified +objects, and they have the read/write markers. Every entry object +itself, once created, stays around. It is only removed by the next +major GC if it points to NULL and its read/write markers are not set +in any currently-running transaction. +*/ + + +typedef TLPREFIX struct stm_hashtable_entry_s { + struct object_s header; + uintptr_t index; + object_t *object; +} stm_hashtable_entry_t; + + +#define INITIAL_HASHTABLE_SIZE 8 +#define PERTURB_SHIFT 5 +#define RESIZING_LOCK 0 + +typedef struct { + uintptr_t mask; + + /* 'resize_counter' start at an odd value, and is decremented (by + 6) for every new item put in 'items'. When it crosses 0, we + instead allocate a bigger table and change 'resize_counter' to + be a regular pointer to it (which is then even). The whole + structure is immutable then. + + The field 'resize_counter' also works as a write lock: changes + go via the intermediate value RESIZING_LOCK (0). + */ + uintptr_t resize_counter; + + stm_hashtable_entry_t *items[INITIAL_HASHTABLE_SIZE]; +} stm_hashtable_table_t; + +#define IS_EVEN(p) (((p) & 1) == 0) + +struct stm_hashtable_s { + stm_hashtable_table_t *table; + stm_hashtable_table_t initial_table; +}; + + +static inline void init_table(stm_hashtable_table_t *table, uintptr_t itemcount) +{ + table->mask = itemcount - 1; + table->resize_counter = itemcount * 4 + 1; + memset(table->items, 0, itemcount * sizeof(stm_hashtable_entry_t *)); +} + +stm_hashtable_t *stm_hashtable_create(void) +{ + stm_hashtable_t *hashtable = malloc(sizeof(stm_hashtable_t)); + assert(hashtable); + hashtable->table = &hashtable->initial_table; + init_table(&hashtable->initial_table, INITIAL_HASHTABLE_SIZE); + return hashtable; +} + +void stm_hashtable_free(stm_hashtable_t *hashtable) +{ + uintptr_t rc = hashtable->initial_table.resize_counter; + free(hashtable); + while (IS_EVEN(rc)) { + assert(rc != RESIZING_LOCK); + + stm_hashtable_table_t *table = (stm_hashtable_table_t *)rc; + rc = table->resize_counter; + free(table); + } +} + +#if 0 +static void stm_compact_hashtable(stm_hashtable_t *hashtable) +{ + stm_hashtable_table_t *table = hashtable->table; + assert(!IS_EVEN(table->resize_counter)); + + if (table != &hashtable->initial_table) { + uintptr_t rc = hashtable->initial_table.resize_counter; + while (1) { + assert(IS_EVEN(rc)); + assert(rc != RESIZING_LOCK); + + stm_hashtable_table_t *old_table = (stm_hashtable_table_t *)rc; + if (old_table == table) + break; + rc = old_table->resize_counter; + free(old_table); + } + hashtable->initial_table.resize_counter = (uintptr_t)table; + } + if (table->resize_counter < table->mask * 3) { + uintptr_t j, mask = table->mask; + uintptr_t rc = table->resize_counter; + for (j = 0; j <= mask; j++) { + stm_hashtable_entry_t *e = table->items[j]; + if (e != NULL && e->object == NULL) { + if (!_stm_was_read_by_anybody(e)) { + table->items[j] = NULL; + rc += 6; + } + } + } + table->resize_counter = rc; + } +} +#endif + +#define VOLATILE_HASHTABLE(p) ((volatile stm_hashtable_t *)(p)) +#define VOLATILE_TABLE(p) ((volatile stm_hashtable_table_t *)(p)) + +static void _insert_clean(stm_hashtable_table_t *table, + stm_hashtable_entry_t *entry) +{ + uintptr_t mask = table->mask; + uintptr_t i = entry->index & mask; + if (table->items[i] == NULL) { + table->items[i] = entry; + return; + } + + uintptr_t perturb = entry->index; + while (1) { + i = (i << 2) + i + perturb + 1; + i &= mask; + if (table->items[i] == NULL) { + table->items[i] = entry; + return; + } + + perturb >>= PERTURB_SHIFT; + } +} + +static stm_hashtable_entry_t *_stm_hashtable_lookup(stm_hashtable_t *hashtable, + uintptr_t index) +{ + stm_hashtable_table_t *table; + uintptr_t mask; + uintptr_t i; + stm_hashtable_entry_t *entry; + + restart: + /* classical dict lookup logic */ + table = VOLATILE_HASHTABLE(hashtable)->table; + mask = table->mask; /* read-only field */ + i = index & mask; + entry = VOLATILE_TABLE(table)->items[i]; + if (entry != NULL) { + if (entry->index == index) + return entry; /* found at the first try */ + + uintptr_t perturb = index; + while (1) { + i = (i << 2) + i + perturb + 1; + i &= mask; + entry = VOLATILE_TABLE(table)->items[i]; + if (entry != NULL) { + if (entry->index == index) + return entry; /* found */ + } + else + break; + perturb >>= PERTURB_SHIFT; + } + } + /* here, we didn't find the 'entry' with the correct index. */ + + uintptr_t rc = VOLATILE_TABLE(table)->resize_counter; + + /* if rc is RESIZING_LOCK (which is 0, so even), a concurrent thread + is writing to the hashtable. Or, if rc is another even number, it is + actually a pointer to the next version of the table, installed + just now. In both cases, this thread must simply spin loop. + */ + if (IS_EVEN(rc)) { + spin_loop(); + goto restart; + } + /* in the other cases, we need to grab the RESIZING_LOCK. + */ + if (!__sync_bool_compare_and_swap(&table->resize_counter, + rc, RESIZING_LOCK)) { + goto restart; + } + /* we now have the lock. Check that 'table->items[i]' is still NULL, + i.e. hasn't been populated under our feet. + */ + if (table->items[i] != NULL) { + table->resize_counter = rc; /* unlock */ + goto restart; + } + /* if rc is greater than 6, there is enough room for a new + item in the current table. + */ + if (rc > 6) { + entry = (stm_hashtable_entry_t *) + _stm_allocate_old(sizeof(stm_hashtable_entry_t)); + entry->index = index; + write_fence(); /* make sure 'entry' is fully initialized here */ + table->items[i] = entry; + write_fence(); /* make sure 'table->items' is written here */ + VOLATILE_TABLE(table)->resize_counter = rc - 6; /* unlock */ + return entry; + } + else { + /* if rc is smaller than 6, we must allocate a new bigger table. + */ + uintptr_t biggercount = (table->mask + 1) * 2; + if (biggercount < 50000) + biggercount *= 2; + size_t size = (offsetof(stm_hashtable_table_t, items) + + biggercount * sizeof(stm_hashtable_entry_t *)); + stm_hashtable_table_t *biggertable = malloc(size); + assert(biggertable); + table->resize_counter = (uintptr_t)biggertable; + /* unlock, but put the new table, so IS_EVEN() is still true */ + + init_table(biggertable, biggercount); + + uintptr_t j; + rc = biggertable->resize_counter; + for (j = 0; j <= mask; j++) { + entry = table->items[j]; + if (entry != NULL) { + _insert_clean(biggertable, entry); + rc -= 6; + } + } + biggertable->resize_counter = rc; + + write_fence(); /* make sure as well that 'biggertable' is valid here; + and make sure 'table->resize_counter' is updated + ('table' must be immutable from now on). */ + VOLATILE_HASHTABLE(hashtable)->table = biggertable; + goto restart; + } +} + +object_t *stm_hashtable_read(stm_hashtable_t *hashtable, uintptr_t index) +{ + stm_hashtable_entry_t *e = _stm_hashtable_lookup(hashtable, index); + stm_read((object_t *)e); + return e->object; +} + +void stm_hashtable_write(stm_hashtable_t *hashtable, uintptr_t index, + object_t *nvalue) +{ + stm_hashtable_entry_t *e = _stm_hashtable_lookup(hashtable, index); + stm_write((object_t *)e); + e->object = nvalue; +} diff --git a/c7/stm/hashtable.h b/c7/stm/hashtable.h new file mode 100644 --- /dev/null +++ b/c7/stm/hashtable.h @@ -0,0 +1,4 @@ + +#if 0 +static void stm_compact_hashtable(stm_hashtable_t *hashtable); +#endif diff --git a/c7/stmgc.c b/c7/stmgc.c --- a/c7/stmgc.c +++ b/c7/stmgc.c @@ -17,6 +17,7 @@ #include "stm/marker.h" #include "stm/prof.h" #include "stm/finalizer.h" +#include "stm/hashtable.h" #include "stm/misc.c" #include "stm/list.c" @@ -39,3 +40,4 @@ #include "stm/prof.c" #include "stm/rewind_setjmp.c" #include "stm/finalizer.c" +#include "stm/hashtable.c" diff --git a/c7/stmgc.h b/c7/stmgc.h --- a/c7/stmgc.h +++ b/c7/stmgc.h @@ -528,6 +528,17 @@ void (*stmcb_finalizer)(object_t *); object_t *stm_allocate_with_finalizer(ssize_t size_rounded_up); +/* Hashtables. Keys are 64-bit unsigned integers, values are + 'object_t *'. Note that the type 'stm_hashtable_t' is not an + object type at all; you need to allocate and free it explicitly. + If you want to embed the hashtable inside an 'object_t' you + probably need a light finalizer to do the freeing. */ +typedef struct stm_hashtable_s stm_hashtable_t; +stm_hashtable_t *stm_hashtable_create(void); +void stm_hashtable_free(stm_hashtable_t *); +object_t *stm_hashtable_read(stm_hashtable_t *, uintptr_t key); +void stm_hashtable_write(stm_hashtable_t *, uintptr_t key, object_t *nvalue); + /* ==================== END ==================== */ #endif diff --git a/c7/test/support.py b/c7/test/support.py --- a/c7/test/support.py +++ b/c7/test/support.py @@ -165,6 +165,12 @@ void stm_enable_light_finalizer(object_t *); void (*stmcb_finalizer)(object_t *); + +typedef struct stm_hashtable_s stm_hashtable_t; +stm_hashtable_t *stm_hashtable_create(void); +void stm_hashtable_free(stm_hashtable_t *); +object_t *stm_hashtable_read(stm_hashtable_t *, uintptr_t key); +void stm_hashtable_write(stm_hashtable_t *, uintptr_t key, object_t *nvalue); """) diff --git a/c7/test/test_hashtable.py b/c7/test/test_hashtable.py new file mode 100644 --- /dev/null +++ b/c7/test/test_hashtable.py @@ -0,0 +1,14 @@ +from support import * +import random +import py + +class TestHashtable(BaseTest): + + def test_empty(self): + self.start_transaction() + h = lib.stm_hashtable_create() + for i in range(100): + index = random.randrange(0, 1<<64) + got = lib.stm_hashtable_read(h, index) + assert got == ffi.NULL + lib.stm_hashtable_free(h) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit