Author: Armin Rigo <[email protected]>
Branch:
Changeset: r1607:ee68afa62c2c
Date: 2015-02-02 19:58 +0100
http://bitbucket.org/pypy/stmgc/changeset/ee68afa62c2c/
Log: hg merge hashtable-iter
Add methods like .items(), .keys() and so on. Unlike the name of
the branch suggests, we *don't* support the lazy iterators.
diff --git a/c7/stm/core.c b/c7/stm/core.c
--- a/c7/stm/core.c
+++ b/c7/stm/core.c
@@ -431,13 +431,12 @@
continue; /* no need to check: is pending immediate abort */
char *remote_base = get_segment_base(i);
- uint8_t remote_version = get_segment(i)->transaction_read_version;
LIST_FOREACH_R(
STM_PSEGMENT->modified_old_objects,
object_t * /*item*/,
({
- if (was_read_remote(remote_base, item, remote_version)) {
+ if (was_read_remote(remote_base, item)) {
/* A write-read conflict! */
dprintf(("write-read conflict on %p, our seg: %d, other:
%ld\n",
item, STM_SEGMENT->segment_num, i));
diff --git a/c7/stm/core.h b/c7/stm/core.h
--- a/c7/stm/core.h
+++ b/c7/stm/core.h
@@ -281,11 +281,17 @@
static stm_thread_local_t *abort_with_mutex_no_longjmp(void);
static void abort_data_structures_from_segment_num(int segment_num);
-static inline bool was_read_remote(char *base, object_t *obj,
- uint8_t other_transaction_read_version)
+static inline struct stm_read_marker_s *get_read_marker(char *base,
+ object_t *obj) {
+ return (struct stm_read_marker_s *)(base + (((uintptr_t)obj) >> 4));
+}
+
+static inline bool was_read_remote(char *base, object_t *obj)
{
- uint8_t rm = ((struct stm_read_marker_s *)
- (base + (((uintptr_t)obj) >> 4)))->rm;
+ uint8_t other_transaction_read_version =
+ ((struct stm_segment_info_s *)REAL_ADDRESS(base, STM_PSEGMENT))
+ ->transaction_read_version;
+ uint8_t rm = get_read_marker(base, obj)->rm;
assert(rm <= other_transaction_read_version);
return rm == other_transaction_read_version;
}
diff --git a/c7/stm/hashtable.c b/c7/stm/hashtable.c
--- a/c7/stm/hashtable.c
+++ b/c7/stm/hashtable.c
@@ -12,22 +12,28 @@
collision).
The main operations on a hashtable are reading or writing an object at a
-given index. It might support in the future enumerating the indexes of
-non-NULL objects.
+given index. It also supports fetching the list of non-NULL entries.
There are two markers for every index (a read and a write marker).
This is unlike regular arrays, which have only two markers in total.
+Additionally, we use the read marker for the hashtable object itself
+to mean "we have read the complete list of keys". This plays the role
+of a "global" read marker: when any thread adds a new key/value object
+to the hashtable, this new object's read marker is initialized with a
+copy of the "global" read marker --- in all segments.
+
Implementation
--------------
First idea: have the hashtable in raw memory, pointing to "entry"
-objects. The entry objects themselves point to the user-specified
-objects. The entry objects have the read/write markers. Every entry
-object, once created, stays around. It is only removed by the next
-major GC if it points to NULL and its read/write markers are not set
-in any currently-running transaction.
+objects (which are regular, GC- and STM-managed objects). The entry
+objects themselves point to the user-specified objects. The entry
+objects hold the read/write markers. Every entry object, once
+created, stays around. It is only removed by the next major GC if it
+points to NULL and its read/write markers are not set in any
+currently-running transaction.
References
----------
@@ -103,9 +109,7 @@
{
long i;
for (i = 1; i <= NB_SEGMENTS; i++) {
- char *remote_base = get_segment_base(i);
- uint8_t remote_version = get_segment(i)->transaction_read_version;
- if (was_read_remote(remote_base, obj, remote_version))
+ if (was_read_remote(get_segment_base(i), obj))
return true;
}
return false;
@@ -154,6 +158,7 @@
/* ^^^ this unlocks the table by writing a non-zero value to
table->resize_counter, but the new value is a pointer to the
new bigger table, so IS_EVEN() is still true */
+ assert(IS_EVEN(table->resize_counter));
init_table(biggertable, biggercount);
@@ -173,6 +178,7 @@
}
}
_insert_clean(biggertable, entry);
+ assert(rc > 6);
rc -= 6;
}
biggertable->resize_counter = rc;
@@ -216,7 +222,16 @@
perturb >>= PERTURB_SHIFT;
}
}
- /* here, we didn't find the 'entry' with the correct index. */
+ /* here, we didn't find the 'entry' with the correct index. Note
+ that even if the same 'table' is modified or resized by other
+ threads concurrently, any new item found from a race condition
+ would anyway contain NULL in the present segment (ensured by
+ the first write_fence() below). If the 'table' grows an entry
+ just after we checked above, then we go ahead and lock the
+ table; but after we get the lock, we will notice the new entry
+ (ensured by the second write_fence() below) and restart the
+ whole process.
+ */
uintptr_t rc = VOLATILE_TABLE(table)->resize_counter;
@@ -282,12 +297,22 @@
synchronization with other pieces of the code that may
change.
*/
+
+ /* First fetch the read marker of 'hashtableobj' in all
+ segments, before allocate_outside_nursery_large()
+ which might trigger a GC */
+ long j;
+ uint8_t readmarkers[NB_SEGMENTS];
+ for (j = 1; j <= NB_SEGMENTS; j++) {
+ readmarkers[j - 1] = get_read_marker(get_segment_base(j),
+ hashtableobj)->rm;
+ }
+
acquire_privatization_lock();
char *p = allocate_outside_nursery_large(
sizeof(stm_hashtable_entry_t));
entry = (stm_hashtable_entry_t *)(p - stm_object_pages);
- long j;
for (j = 0; j <= NB_SEGMENTS; j++) {
struct stm_hashtable_entry_s *e;
e = (struct stm_hashtable_entry_s *)
@@ -299,6 +324,11 @@
}
hashtable->additions += 0x100;
release_privatization_lock();
+
+ for (j = 1; j <= NB_SEGMENTS; j++) {
+ get_read_marker(get_segment_base(j), (object_t *)entry)->rm =
+ readmarkers[j - 1];
+ }
}
write_fence(); /* make sure 'entry' is fully initialized here */
table->items[i] = entry;
@@ -338,17 +368,94 @@
e->object = nvalue;
}
+long stm_hashtable_length_upper_bound(stm_hashtable_t *hashtable)
+{
+ stm_hashtable_table_t *table;
+ uintptr_t rc;
+
+ restart:
+ table = VOLATILE_HASHTABLE(hashtable)->table;
+ rc = VOLATILE_TABLE(table)->resize_counter;
+ if (IS_EVEN(rc)) {
+ spin_loop();
+ goto restart;
+ }
+
+ uintptr_t initial_rc = (table->mask + 1) * 4 + 1;
+ uintptr_t num_entries_times_6 = initial_rc - rc;
+ return num_entries_times_6 / 6;
+}
+
+long stm_hashtable_list(object_t *hobj, stm_hashtable_t *hashtable,
+ stm_hashtable_entry_t **results)
+{
+ stm_hashtable_table_t *table;
+ intptr_t rc;
+
+ /* Set the read marker. It will be left as long as we're running
+ the same transaction.
+ */
+ stm_read(hobj);
+
+ /* Acquire and immediately release the lock. We don't actually
+ need to do anything while we hold the lock, but the point is to
+ wait until the lock is available, and to synchronize other
+ threads with the stm_read() done above.
+ */
+ restart:
+ table = VOLATILE_HASHTABLE(hashtable)->table;
+ rc = VOLATILE_TABLE(table)->resize_counter;
+ if (IS_EVEN(rc)) {
+ spin_loop();
+ goto restart;
+ }
+ if (!__sync_bool_compare_and_swap(&table->resize_counter, rc, rc))
+ goto restart;
+
+ /* Read all entries, check which ones are not NULL, count them,
+ and optionally list them in 'results'.
+ */
+ uintptr_t i, mask = table->mask;
+ stm_hashtable_entry_t *entry;
+ long nresult = 0;
+
+ if (results != NULL) {
+ /* collect the results in the provided list */
+ for (i = 0; i <= mask; i++) {
+ entry = VOLATILE_TABLE(table)->items[i];
+ if (entry != NULL) {
+ stm_read((object_t *)entry);
+ if (entry->object != NULL)
+ results[nresult++] = entry;
+ }
+ }
+ }
+ else {
+ /* don't collect, just get the exact number of results */
+ for (i = 0; i <= mask; i++) {
+ entry = VOLATILE_TABLE(table)->items[i];
+ if (entry != NULL) {
+ stm_read((object_t *)entry);
+ if (entry->object != NULL)
+ nresult++;
+ }
+ }
+ }
+ return nresult;
+}
+
static void _stm_compact_hashtable(stm_hashtable_t *hashtable)
{
stm_hashtable_table_t *table = hashtable->table;
- assert(!IS_EVEN(table->resize_counter));
+ uintptr_t rc = table->resize_counter;
+ assert(!IS_EVEN(rc));
if ((hashtable->additions >> 8) * 4 > table->mask) {
int segment_num = (hashtable->additions & 0xFF);
if (!segment_num) segment_num = 1;
hashtable->additions = segment_num;
uintptr_t initial_rc = (table->mask + 1) * 4 + 1;
- uintptr_t num_entries_times_6 = initial_rc - table->resize_counter;
+ uintptr_t num_entries_times_6 = initial_rc - rc;
uintptr_t count = INITIAL_HASHTABLE_SIZE;
while (count * 4 < num_entries_times_6)
count *= 2;
@@ -376,6 +483,7 @@
free(old_table);
}
hashtable->initial_table.resize_counter = (uintptr_t)table;
+ assert(IS_EVEN(hashtable->initial_table.resize_counter));
}
}
diff --git a/c7/stm/misc.c b/c7/stm/misc.c
--- a/c7/stm/misc.c
+++ b/c7/stm/misc.c
@@ -31,8 +31,7 @@
bool _stm_was_read(object_t *obj)
{
- return was_read_remote(STM_SEGMENT->segment_base, obj,
- STM_SEGMENT->transaction_read_version);
+ return was_read_remote(STM_SEGMENT->segment_base, obj);
}
bool _stm_was_written(object_t *obj)
diff --git a/c7/stmgc.h b/c7/stmgc.h
--- a/c7/stmgc.h
+++ b/c7/stmgc.h
@@ -545,6 +545,9 @@
object_t *stm_hashtable_read(object_t *, stm_hashtable_t *, uintptr_t key);
void stm_hashtable_write(object_t *, stm_hashtable_t *, uintptr_t key,
object_t *nvalue, stm_thread_local_t *);
+long stm_hashtable_length_upper_bound(stm_hashtable_t *);
+long stm_hashtable_list(object_t *, stm_hashtable_t *,
+ stm_hashtable_entry_t **results);
extern uint32_t stm_hashtable_entry_userdata;
void stm_hashtable_tracefn(stm_hashtable_t *, void (object_t **));
diff --git a/c7/test/support.py b/c7/test/support.py
--- a/c7/test/support.py
+++ b/c7/test/support.py
@@ -167,17 +167,23 @@
void (*stmcb_finalizer)(object_t *);
typedef struct stm_hashtable_s stm_hashtable_t;
+typedef ... stm_hashtable_entry_t;
stm_hashtable_t *stm_hashtable_create(void);
void stm_hashtable_free(stm_hashtable_t *);
bool _check_hashtable_read(object_t *, stm_hashtable_t *, uintptr_t key);
object_t *hashtable_read_result;
bool _check_hashtable_write(object_t *, stm_hashtable_t *, uintptr_t key,
object_t *nvalue, stm_thread_local_t *tl);
+long stm_hashtable_length_upper_bound(stm_hashtable_t *);
+long stm_hashtable_list(object_t *, stm_hashtable_t *,
+ stm_hashtable_entry_t **results);
uint32_t stm_hashtable_entry_userdata;
void stm_hashtable_tracefn(stm_hashtable_t *, void (object_t **));
void _set_hashtable(object_t *obj, stm_hashtable_t *h);
stm_hashtable_t *_get_hashtable(object_t *obj);
+uintptr_t _get_entry_index(stm_hashtable_entry_t *entry);
+object_t *_get_entry_object(stm_hashtable_entry_t *entry);
""")
@@ -308,6 +314,18 @@
return *(stm_hashtable_t *TLPREFIX *)field_addr;
}
+uintptr_t _get_entry_index(stm_hashtable_entry_t *entry)
+{
+ stm_read((object_t *)entry);
+ return entry->index;
+}
+
+object_t *_get_entry_object(stm_hashtable_entry_t *entry)
+{
+ stm_read((object_t *)entry);
+ return entry->object;
+}
+
void _set_ptr(object_t *obj, int n, object_t *v)
{
long nrefs = (long)((myobj_t*)obj)->type_id - 421420;
diff --git a/c7/test/test_hashtable.py b/c7/test/test_hashtable.py
--- a/c7/test/test_hashtable.py
+++ b/c7/test/test_hashtable.py
@@ -16,6 +16,24 @@
if res:
raise Conflict
+def ht_length_upper_bound(o):
+ h = get_hashtable(o)
+ return lib.stm_hashtable_length_upper_bound(h)
+
+def htitems(o):
+ h = get_hashtable(o)
+ upper_bound = lib.stm_hashtable_length_upper_bound(h)
+ entries = ffi.new("stm_hashtable_entry_t *[]", upper_bound)
+ count = lib.stm_hashtable_list(o, h, entries)
+ assert count <= upper_bound
+ return [(lib._get_entry_index(entries[i]),
+ lib._get_entry_object(entries[i])) for i in range(count)]
+
+def htlen(o):
+ h = get_hashtable(o)
+ count = lib.stm_hashtable_list(o, h, ffi.NULL)
+ return count
+
class BaseTestHashtable(BaseTest):
@@ -236,6 +254,36 @@
assert htget(h, 11) != ffi.NULL
assert htget(h, 12) != ffi.NULL
+ def test_list_1(self):
+ self.start_transaction()
+ h = self.allocate_hashtable()
+ tl0 = self.tls[self.current_thread]
+ for i in range(32):
+ assert ht_length_upper_bound(h) == i
+ htset(h, 19 ^ i, stm_allocate(32), tl0)
+ assert ht_length_upper_bound(h) == 32
+
+ def test_list_2(self):
+ self.start_transaction()
+ h = self.allocate_hashtable()
+ tl0 = self.tls[self.current_thread]
+ expected = []
+ for i in range(29):
+ lp = stm_allocate(32)
+ htset(h, 19 ^ i, lp, tl0)
+ expected.append((19 ^ i, lp))
+ lst = htitems(h)
+ assert len(lst) == 29
+ assert sorted(lst) == sorted(expected)
+
+ def test_list_3(self):
+ self.start_transaction()
+ h = self.allocate_hashtable()
+ tl0 = self.tls[self.current_thread]
+ for i in range(29):
+ htset(h, 19 ^ i, stm_allocate(32), tl0)
+ assert htlen(h) == 29
+
class TestRandomHashtable(BaseTestHashtable):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit