Author: Armin Rigo <[email protected]>
Branch: hashtable-iter
Changeset: r1600:06c31e9c06f8
Date: 2015-02-01 01:29 +0100
http://bitbucket.org/pypy/stmgc/changeset/06c31e9c06f8/
Log: in-progress, getting there hopefully
diff --git a/c7/stm/core.h b/c7/stm/core.h
--- a/c7/stm/core.h
+++ b/c7/stm/core.h
@@ -281,10 +281,9 @@
static stm_thread_local_t *abort_with_mutex_no_longjmp(void);
static void abort_data_structures_from_segment_num(int segment_num);
-static inline bool was_read_local(object_t *obj)
-{
- return ((stm_read_marker_t *)(((uintptr_t)obj) >> 4))->rm ==
- STM_SEGMENT->transaction_read_version;
+static inline struct stm_read_marker_s *get_read_marker(char *base,
+ object_t *obj) {
+ return (struct stm_read_marker_s *)(base + (((uintptr_t)obj) >> 4));
}
static inline bool was_read_remote(char *base, object_t *obj)
@@ -292,8 +291,7 @@
uint8_t other_transaction_read_version =
((struct stm_segment_info_s *)REAL_ADDRESS(base, STM_PSEGMENT))
->transaction_read_version;
- uint8_t rm = ((struct stm_read_marker_s *)
- (base + (((uintptr_t)obj) >> 4)))->rm;
+ uint8_t rm = get_read_marker(base, obj)->rm;
assert(rm <= other_transaction_read_version);
return rm == other_transaction_read_version;
}
diff --git a/c7/stm/hashtable.c b/c7/stm/hashtable.c
--- a/c7/stm/hashtable.c
+++ b/c7/stm/hashtable.c
@@ -12,22 +12,16 @@
collision).
The main operations on a hashtable are reading or writing an object at a
-given index. It also supports iterating its non-NULL entries.
+given index. It also supports fetching the list of non-NULL entries.
There are two markers for every index (a read and a write marker).
This is unlike regular arrays, which have only two markers in total.
Additionally, we use the read marker for the hashtable object itself
-to mean "we are iterating". When a transaction has got this "global"
-read marker and another transaction attempts to create a new key/value
-pair via stm_hashtable_{lookup,read,write}, the call immediately fails
-with a read/write conflict. This gives priority to the iterating
-transaction rather than the modifying transaction, which is probably
-what we want.
-
-XXX NO: when creating new key/value objects, we should copy the read
-marker from the hashtableobj to the new key/value object. I *think*
-this gives a correct and better result XXX
+to mean "we have read the complete list of keys". This plays the role
+of a "global" read marker: when any thread adds a new key/value object
+to the hashtable, this new object's read marker is initialized with a
+copy of the "global" read marker --- in all segments.
Implementation
@@ -66,12 +60,8 @@
The field 'resize_counter' also works as a write lock: changes
go via the intermediate value RESIZING_LOCK (0).
-
- In addition, 'resize_counter' can be the negation of the odd
- number that it would normally be: in this case it is "probably
- write-protected" (see stm_hashtable_next()).
*/
- intptr_t resize_counter;
+ uintptr_t resize_counter;
stm_hashtable_entry_t *items[INITIAL_HASHTABLE_SIZE];
} stm_hashtable_table_t;
@@ -104,7 +94,7 @@
void stm_hashtable_free(stm_hashtable_t *hashtable)
{
- intptr_t rc = hashtable->initial_table.resize_counter;
+ uintptr_t rc = hashtable->initial_table.resize_counter;
free(hashtable);
while (IS_EVEN(rc)) {
assert(rc != RESIZING_LOCK);
@@ -119,9 +109,7 @@
{
long i;
for (i = 1; i <= NB_SEGMENTS; i++) {
- char *remote_base = get_segment_base(i);
- uint8_t remote_version = get_segment(i)->transaction_read_version;
- if (was_read_remote(remote_base, obj, remote_version))
+ if (was_read_remote(get_segment_base(i), obj))
return true;
}
return false;
@@ -155,8 +143,7 @@
static void _stm_rehash_hashtable(stm_hashtable_t *hashtable,
uintptr_t biggercount,
- int remove_unread_from_seg,
- bool rc_must_be_negative)
+ int remove_unread_from_seg)
{
dprintf(("rehash %p to %ld, remove_unread_from_seg=%d\n",
hashtable, biggercount, remove_unread_from_seg));
@@ -167,7 +154,7 @@
assert(biggertable); // XXX
stm_hashtable_table_t *table = hashtable->table;
- table->resize_counter = (intptr_t)biggertable;
+ table->resize_counter = (uintptr_t)biggertable;
/* ^^^ this unlocks the table by writing a non-zero value to
table->resize_counter, but the new value is a pointer to the
new bigger table, so IS_EVEN() is still true */
@@ -176,7 +163,7 @@
init_table(biggertable, biggercount);
uintptr_t j, mask = table->mask;
- intptr_t rc = biggertable->resize_counter;
+ uintptr_t rc = biggertable->resize_counter;
char *segment_base = get_segment_base(remove_unread_from_seg);
for (j = 0; j <= mask; j++) {
stm_hashtable_entry_t *entry = table->items[j];
@@ -191,10 +178,10 @@
}
}
_insert_clean(biggertable, entry);
+ assert(rc > 6);
rc -= 6;
}
- assert(rc > 0);
- biggertable->resize_counter = rc_must_be_negative ? -rc : rc;
+ biggertable->resize_counter = rc;
write_fence(); /* make sure that 'biggertable' is valid here,
and make sure 'table->resize_counter' is updated
@@ -246,8 +233,7 @@
whole process.
*/
- intptr_t rc = VOLATILE_TABLE(table)->resize_counter;
- bool rc_must_be_negative = false;
+ uintptr_t rc = VOLATILE_TABLE(table)->resize_counter;
/* if rc is RESIZING_LOCK (which is 0, so even), a concurrent thread
is writing to the hashtable. Or, if rc is another even number, it is
@@ -279,7 +265,6 @@
/* if rc is greater than 6, there is enough room for a new
item in the current table.
*/
- retry_adding:
if (rc > 6) {
/* we can only enter here once! If we allocate stuff, we may
run the GC, and so 'hashtableobj' might move afterwards. */
@@ -312,12 +297,22 @@
synchronization with other pieces of the code that may
change.
*/
+
+ /* First fetch the read marker of 'hashtableobj' in all
+ segments, before allocate_outside_nursery_large()
+ which might trigger a GC */
+ long j;
+ uint8_t readmarkers[NB_SEGMENTS];
+ for (j = 1; j <= NB_SEGMENTS; j++) {
+ readmarkers[j - 1] = get_read_marker(get_segment_base(j),
+ hashtableobj)->rm;
+ }
+
acquire_privatization_lock();
char *p = allocate_outside_nursery_large(
sizeof(stm_hashtable_entry_t));
entry = (stm_hashtable_entry_t *)(p - stm_object_pages);
- long j;
for (j = 0; j <= NB_SEGMENTS; j++) {
struct stm_hashtable_entry_s *e;
e = (struct stm_hashtable_entry_s *)
@@ -329,16 +324,19 @@
}
hashtable->additions += 0x100;
release_privatization_lock();
+
+ for (j = 1; j <= NB_SEGMENTS; j++) {
+ get_read_marker(get_segment_base(j), (object_t *)entry)->rm =
+ readmarkers[j - 1];
+ }
}
write_fence(); /* make sure 'entry' is fully initialized here */
table->items[i] = entry;
write_fence(); /* make sure 'table->items' is written here */
- rc -= 6;
- VOLATILE_TABLE(table)->resize_counter = (
- rc_must_be_negative ? -rc : rc); /* unlock
*/
+ VOLATILE_TABLE(table)->resize_counter = rc - 6; /* unlock */
return entry;
}
- else if (rc > 0) {
+ else {
/* if rc is smaller than 6, we must allocate a new bigger table.
*/
uintptr_t biggercount = table->mask + 1;
@@ -346,52 +344,9 @@
biggercount *= 4;
else
biggercount *= 2;
- _stm_rehash_hashtable(hashtable, biggercount, /*remove_unread=*/0,
- rc_must_be_negative);
+ _stm_rehash_hashtable(hashtable, biggercount, /*remove_unread=*/0);
goto restart;
}
- else {
- assert(rc < 0);
- assert(!_is_in_nursery(hashtableobj));
-
- /* if rc is negative, the hashtable is probably write-protected.
- Check if the read marker of the 'hashtableobj' is set in
- another segment.
- */
- int j, my_segment = STM_SEGMENT->segment_num;
- for (j = 1; j <= NB_SEGMENTS; j++) {
- if (j == my_segment)
- continue;
- if (was_read_remote(get_segment_base(j), hashtableobj)) {
- /* conflict! */
- table->resize_counter = rc; /* unlock */
- if (write_read_contention_management(j, hashtableobj)) {
- /* If we reach this point, we didn't abort, but we
- had to wait for the other thread to commit. If
- we did, then we have to restart. */
- return true;
- ...;
-
- }
- /* we aborted the other transaction without waiting, so
- we can just break out of this loop on
- modified_old_objects and continue with the next
- segment */
-
- xxx;
- }
- }
- }
-
- /* if even in this thread's segment it was not read, then there
- is no point in keeping it write-protected. So we set
- 'rc_must_be_negative', i.e. keep it write-protected, iff
- it was read locally.
- */
- rc_must_be_negative = was_read_local(hashtableobj);
- rc = -rc;
- goto retry_adding;
- }
}
object_t *stm_hashtable_read(object_t *hobj, stm_hashtable_t *hashtable,
@@ -413,81 +368,86 @@
e->object = nvalue;
}
-struct stm_hashtable_entry_s *
-stm_hashtable_next(object_t *hobj, stm_hashtable_t *hashtable,
- uintptr_t *pposition, stm_thread_local_t *tl)
+long stm_hashtable_length_upper_bound(stm_hashtable_t *hashtable)
{
- /* this assumes the simple c7 model whereby commits only occur with
- all other transaction paused at a known point. */
+ stm_hashtable_table_t *table;
+ uintptr_t rc;
+
+ restart:
+ table = VOLATILE_HASHTABLE(hashtable)->table;
+ rc = VOLATILE_TABLE(table)->resize_counter;
+ if (IS_EVEN(rc)) {
+ spin_loop();
+ goto restart;
+ }
+
+ uintptr_t initial_rc = (table->mask + 1) * 4 + 1;
+ uintptr_t num_entries_times_6 = initial_rc - rc;
+ return num_entries_times_6 / 6;
+}
+
+long stm_hashtable_list(object_t *hobj, stm_hashtable_t *hashtable,
+ stm_hashtable_entry_t **results)
+{
stm_hashtable_table_t *table;
intptr_t rc;
- /* First set the read marker. It will be left as long as we're running
- the same transaction. Note that this code assumes that nothing else
- can set the read marker! Also, if 'hobj' is still in the nursery,
- it was created by this transaction and there is nothing to do.
+ /* Set the read marker. It will be left as long as we're running
+ the same transaction.
*/
- if (!_is_in_nursery(hobj) && !was_read_local(hobj)) {
+ stm_read(hobj);
- stm_read(hobj);
+ /* Acquire and immediately release the lock. We don't actually
+ need to do anything while we hold the lock, but the point is to
+ wait until the lock is available, and to synchronize other
+ threads with the stm_read() done above.
+ */
+ restart:
+ table = VOLATILE_HASHTABLE(hashtable)->table;
+ rc = VOLATILE_TABLE(table)->resize_counter;
+ if (IS_EVEN(rc)) {
+ spin_loop();
+ goto restart;
+ }
+ if (!__sync_bool_compare_and_swap(&table->resize_counter, rc, rc))
+ goto restart;
- /* Change the 'resize_counter' field to its negative value. This
- must be done after we've set the read marker. */
- restart:
- table = VOLATILE_HASHTABLE(hashtable)->table;
- rc = VOLATILE_TABLE(table)->resize_counter;
- if (IS_EVEN(rc)) {
- spin_loop();
- goto restart;
+ /* Read all entries, check which ones are not NULL, count them,
+ and optionally list them in 'results'.
+ */
+ uintptr_t i, mask = table->mask;
+ stm_hashtable_entry_t *entry;
+ long nresult = 0;
+
+ if (results != NULL) {
+ /* collect the results in the provided list */
+ for (i = 0; i <= mask; i++) {
+ entry = VOLATILE_TABLE(table)->items[i];
+ if (entry != NULL) {
+ stm_read((object_t *)entry);
+ if (entry->object != NULL)
+ results[nresult++] = entry;
+ }
}
- if (!__sync_bool_compare_and_swap(&table->resize_counter, rc,
- rc > 0 ? -rc : rc))
- goto restart;
- /* Note that we did a compare-and-swap even if rc was already
- negative. This is needed for its memory-ordering effect,
- to ensure that from now on the other threads do see our
- read marker set. */
}
else {
- /* Read marker already set. Assume (and assert) that we
- already set a negative value into 'resize_counter'.
- Changes of 'table' or 'resize_counter' under our feet
- should not be possible here.
- */
- table = hashtable->table;
-
- if (!_is_in_nursery(hobj)) {
- assert(!IS_EVEN(table->resize_counter) &&
- table->resize_counter < 0);
+ /* don't collect, just get the exact number of results */
+ for (i = 0; i <= mask; i++) {
+ entry = VOLATILE_TABLE(table)->items[i];
+ if (entry != NULL) {
+ stm_read((object_t *)entry);
+ if (entry->object != NULL)
+ nresult++;
+ }
}
}
-
- /* At this point, the hashtable is write-protected: no other
- thread may add new key/value objects nor grow/replace the
- 'table'. The hashtable will remain write-protected as long as
- this transaction is running. Note that *this* thread is
- allowed to continue modifying the hashtable (unless another
- thread did also set a write protection).
- */
- uintptr_t position = *pposition;
- uintptr_t mask = table->mask;
- stm_hashtable_entry_t *entry;
-
- while (position <= mask) {
- entry = table->items[position++];
- if (entry != NULL) {
- *pposition = position;
- return entry;
- }
- }
- *pposition = (uintptr_t)-1;
- return NULL;
+ return nresult;
}
static void _stm_compact_hashtable(stm_hashtable_t *hashtable)
{
stm_hashtable_table_t *table = hashtable->table;
- intptr_t rc = table->resize_counter;
+ uintptr_t rc = table->resize_counter;
assert(!IS_EVEN(rc));
if ((hashtable->additions >> 8) * 4 > table->mask) {
@@ -495,7 +455,7 @@
if (!segment_num) segment_num = 1;
hashtable->additions = segment_num;
uintptr_t initial_rc = (table->mask + 1) * 4 + 1;
- uintptr_t num_entries_times_6 = initial_rc - (rc < 0 ? -rc : rc);
+ uintptr_t num_entries_times_6 = initial_rc - rc;
uintptr_t count = INITIAL_HASHTABLE_SIZE;
while (count * 4 < num_entries_times_6)
count *= 2;
@@ -504,15 +464,14 @@
assert(count <= table->mask + 1);
dprintf(("compact with %ld items:\n", num_entries_times_6 / 6));
- _stm_rehash_hashtable(hashtable, count, /*remove_unread=*/segment_num,
- /*rc_must_be_negative=*/rc < 0);
+ _stm_rehash_hashtable(hashtable, count, /*remove_unread=*/segment_num);
}
table = hashtable->table;
assert(!IS_EVEN(table->resize_counter));
if (table != &hashtable->initial_table) {
- intptr_t rc = hashtable->initial_table.resize_counter;
+ uintptr_t rc = hashtable->initial_table.resize_counter;
while (1) {
assert(IS_EVEN(rc));
assert(rc != RESIZING_LOCK);
@@ -523,7 +482,7 @@
rc = old_table->resize_counter;
free(old_table);
}
- hashtable->initial_table.resize_counter = (intptr_t)table;
+ hashtable->initial_table.resize_counter = (uintptr_t)table;
assert(IS_EVEN(hashtable->initial_table.resize_counter));
}
}
diff --git a/c7/stm/misc.c b/c7/stm/misc.c
--- a/c7/stm/misc.c
+++ b/c7/stm/misc.c
@@ -31,7 +31,7 @@
bool _stm_was_read(object_t *obj)
{
- return was_read_local(obj);
+ return was_read_remote(STM_SEGMENT->segment_base, obj);
}
bool _stm_was_written(object_t *obj)
diff --git a/c7/stmgc.h b/c7/stmgc.h
--- a/c7/stmgc.h
+++ b/c7/stmgc.h
@@ -545,8 +545,9 @@
object_t *stm_hashtable_read(object_t *, stm_hashtable_t *, uintptr_t key);
void stm_hashtable_write(object_t *, stm_hashtable_t *, uintptr_t key,
object_t *nvalue, stm_thread_local_t *);
-struct stm_hashtable_entry_s *stm_hashtable_next(
- object_t *, stm_hashtable_t *, uintptr_t *pposition, stm_thread_local_t *);
+long stm_hashtable_length_upper_bound(stm_hashtable_t *);
+long stm_hashtable_list(object_t *, stm_hashtable_t *,
+ stm_hashtable_entry_t **results);
extern uint32_t stm_hashtable_entry_userdata;
void stm_hashtable_tracefn(stm_hashtable_t *, void (object_t **));
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit