Author: Armin Rigo <[email protected]>
Branch: 
Changeset: r1607:ee68afa62c2c
Date: 2015-02-02 19:58 +0100
http://bitbucket.org/pypy/stmgc/changeset/ee68afa62c2c/

Log:    hg merge hashtable-iter

        Add methods like .items(), .keys() and so on. Unlike the name of
        the branch suggests, we *don't* support the lazy iterators.

diff --git a/c7/stm/core.c b/c7/stm/core.c
--- a/c7/stm/core.c
+++ b/c7/stm/core.c
@@ -431,13 +431,12 @@
             continue;    /* no need to check: is pending immediate abort */
 
         char *remote_base = get_segment_base(i);
-        uint8_t remote_version = get_segment(i)->transaction_read_version;
 
         LIST_FOREACH_R(
             STM_PSEGMENT->modified_old_objects,
             object_t * /*item*/,
             ({
-                if (was_read_remote(remote_base, item, remote_version)) {
+                if (was_read_remote(remote_base, item)) {
                     /* A write-read conflict! */
                     dprintf(("write-read conflict on %p, our seg: %d, other: 
%ld\n",
                              item, STM_SEGMENT->segment_num, i));
diff --git a/c7/stm/core.h b/c7/stm/core.h
--- a/c7/stm/core.h
+++ b/c7/stm/core.h
@@ -281,11 +281,17 @@
 static stm_thread_local_t *abort_with_mutex_no_longjmp(void);
 static void abort_data_structures_from_segment_num(int segment_num);
 
-static inline bool was_read_remote(char *base, object_t *obj,
-                                   uint8_t other_transaction_read_version)
+static inline struct stm_read_marker_s *get_read_marker(char *base,
+                                                        object_t *obj) {
+    return (struct stm_read_marker_s *)(base + (((uintptr_t)obj) >> 4));
+}
+
+static inline bool was_read_remote(char *base, object_t *obj)
 {
-    uint8_t rm = ((struct stm_read_marker_s *)
-                  (base + (((uintptr_t)obj) >> 4)))->rm;
+    uint8_t other_transaction_read_version =
+        ((struct stm_segment_info_s *)REAL_ADDRESS(base, STM_PSEGMENT))
+            ->transaction_read_version;
+    uint8_t rm = get_read_marker(base, obj)->rm;
     assert(rm <= other_transaction_read_version);
     return rm == other_transaction_read_version;
 }
diff --git a/c7/stm/hashtable.c b/c7/stm/hashtable.c
--- a/c7/stm/hashtable.c
+++ b/c7/stm/hashtable.c
@@ -12,22 +12,28 @@
 collision).
 
 The main operations on a hashtable are reading or writing an object at a
-given index.  It might support in the future enumerating the indexes of
-non-NULL objects.
+given index.  It also supports fetching the list of non-NULL entries.
 
 There are two markers for every index (a read and a write marker).
 This is unlike regular arrays, which have only two markers in total.
 
+Additionally, we use the read marker for the hashtable object itself
+to mean "we have read the complete list of keys".  This plays the role
+of a "global" read marker: when any thread adds a new key/value object
+to the hashtable, this new object's read marker is initialized with a
+copy of the "global" read marker --- in all segments.
+
 
 Implementation
 --------------
 
 First idea: have the hashtable in raw memory, pointing to "entry"
-objects.  The entry objects themselves point to the user-specified
-objects.  The entry objects have the read/write markers.  Every entry
-object, once created, stays around.  It is only removed by the next
-major GC if it points to NULL and its read/write markers are not set
-in any currently-running transaction.
+objects (which are regular, GC- and STM-managed objects).  The entry
+objects themselves point to the user-specified objects.  The entry
+objects hold the read/write markers.  Every entry object, once
+created, stays around.  It is only removed by the next major GC if it
+points to NULL and its read/write markers are not set in any
+currently-running transaction.
 
 References
 ----------
@@ -103,9 +109,7 @@
 {
     long i;
     for (i = 1; i <= NB_SEGMENTS; i++) {
-        char *remote_base = get_segment_base(i);
-        uint8_t remote_version = get_segment(i)->transaction_read_version;
-        if (was_read_remote(remote_base, obj, remote_version))
+        if (was_read_remote(get_segment_base(i), obj))
             return true;
     }
     return false;
@@ -154,6 +158,7 @@
     /* ^^^ this unlocks the table by writing a non-zero value to
        table->resize_counter, but the new value is a pointer to the
        new bigger table, so IS_EVEN() is still true */
+    assert(IS_EVEN(table->resize_counter));
 
     init_table(biggertable, biggercount);
 
@@ -173,6 +178,7 @@
             }
         }
         _insert_clean(biggertable, entry);
+        assert(rc > 6);
         rc -= 6;
     }
     biggertable->resize_counter = rc;
@@ -216,7 +222,16 @@
             perturb >>= PERTURB_SHIFT;
         }
     }
-    /* here, we didn't find the 'entry' with the correct index. */
+    /* here, we didn't find the 'entry' with the correct index.  Note
+       that even if the same 'table' is modified or resized by other
+       threads concurrently, any new item found from a race condition
+       would anyway contain NULL in the present segment (ensured by
+       the first write_fence() below).  If the 'table' grows an entry
+       just after we checked above, then we go ahead and lock the
+       table; but after we get the lock, we will notice the new entry
+       (ensured by the second write_fence() below) and restart the
+       whole process.
+     */
 
     uintptr_t rc = VOLATILE_TABLE(table)->resize_counter;
 
@@ -282,12 +297,22 @@
                synchronization with other pieces of the code that may
                change.
             */
+
+            /* First fetch the read marker of 'hashtableobj' in all
+               segments, before allocate_outside_nursery_large()
+               which might trigger a GC */
+            long j;
+            uint8_t readmarkers[NB_SEGMENTS];
+            for (j = 1; j <= NB_SEGMENTS; j++) {
+                readmarkers[j - 1] = get_read_marker(get_segment_base(j),
+                                                     hashtableobj)->rm;
+            }
+
             acquire_privatization_lock();
             char *p = allocate_outside_nursery_large(
                           sizeof(stm_hashtable_entry_t));
             entry = (stm_hashtable_entry_t *)(p - stm_object_pages);
 
-            long j;
             for (j = 0; j <= NB_SEGMENTS; j++) {
                 struct stm_hashtable_entry_s *e;
                 e = (struct stm_hashtable_entry_s *)
@@ -299,6 +324,11 @@
             }
             hashtable->additions += 0x100;
             release_privatization_lock();
+
+            for (j = 1; j <= NB_SEGMENTS; j++) {
+                get_read_marker(get_segment_base(j), (object_t *)entry)->rm =
+                    readmarkers[j - 1];
+            }
         }
         write_fence();     /* make sure 'entry' is fully initialized here */
         table->items[i] = entry;
@@ -338,17 +368,94 @@
     e->object = nvalue;
 }
 
+long stm_hashtable_length_upper_bound(stm_hashtable_t *hashtable)
+{
+    stm_hashtable_table_t *table;
+    uintptr_t rc;
+
+ restart:
+    table = VOLATILE_HASHTABLE(hashtable)->table;
+    rc = VOLATILE_TABLE(table)->resize_counter;
+    if (IS_EVEN(rc)) {
+        spin_loop();
+        goto restart;
+    }
+
+    uintptr_t initial_rc = (table->mask + 1) * 4 + 1;
+    uintptr_t num_entries_times_6 = initial_rc - rc;
+    return num_entries_times_6 / 6;
+}
+
+long stm_hashtable_list(object_t *hobj, stm_hashtable_t *hashtable,
+                        stm_hashtable_entry_t **results)
+{
+    stm_hashtable_table_t *table;
+    intptr_t rc;
+
+    /* Set the read marker.  It will be left as long as we're running
+       the same transaction.
+    */
+    stm_read(hobj);
+
+    /* Acquire and immediately release the lock.  We don't actually
+       need to do anything while we hold the lock, but the point is to
+       wait until the lock is available, and to synchronize other
+       threads with the stm_read() done above.
+     */
+ restart:
+    table = VOLATILE_HASHTABLE(hashtable)->table;
+    rc = VOLATILE_TABLE(table)->resize_counter;
+    if (IS_EVEN(rc)) {
+        spin_loop();
+        goto restart;
+    }
+    if (!__sync_bool_compare_and_swap(&table->resize_counter, rc, rc))
+        goto restart;
+
+    /* Read all entries, check which ones are not NULL, count them,
+       and optionally list them in 'results'.
+    */
+    uintptr_t i, mask = table->mask;
+    stm_hashtable_entry_t *entry;
+    long nresult = 0;
+
+    if (results != NULL) {
+        /* collect the results in the provided list */
+        for (i = 0; i <= mask; i++) {
+            entry = VOLATILE_TABLE(table)->items[i];
+            if (entry != NULL) {
+                stm_read((object_t *)entry);
+                if (entry->object != NULL)
+                    results[nresult++] = entry;
+            }
+        }
+    }
+    else {
+        /* don't collect, just get the exact number of results */
+        for (i = 0; i <= mask; i++) {
+            entry = VOLATILE_TABLE(table)->items[i];
+            if (entry != NULL) {
+                stm_read((object_t *)entry);
+                if (entry->object != NULL)
+                    nresult++;
+            }
+        }
+    }
+    return nresult;
+}
+
 static void _stm_compact_hashtable(stm_hashtable_t *hashtable)
 {
     stm_hashtable_table_t *table = hashtable->table;
-    assert(!IS_EVEN(table->resize_counter));
+    uintptr_t rc = table->resize_counter;
+    assert(!IS_EVEN(rc));
 
     if ((hashtable->additions >> 8) * 4 > table->mask) {
         int segment_num = (hashtable->additions & 0xFF);
         if (!segment_num) segment_num = 1;
         hashtable->additions = segment_num;
         uintptr_t initial_rc = (table->mask + 1) * 4 + 1;
-        uintptr_t num_entries_times_6 = initial_rc - table->resize_counter;
+        uintptr_t num_entries_times_6 = initial_rc - rc;
         uintptr_t count = INITIAL_HASHTABLE_SIZE;
         while (count * 4 < num_entries_times_6)
             count *= 2;
@@ -376,6 +483,7 @@
             free(old_table);
         }
         hashtable->initial_table.resize_counter = (uintptr_t)table;
+        assert(IS_EVEN(hashtable->initial_table.resize_counter));
     }
 }
 
diff --git a/c7/stm/misc.c b/c7/stm/misc.c
--- a/c7/stm/misc.c
+++ b/c7/stm/misc.c
@@ -31,8 +31,7 @@
 
 bool _stm_was_read(object_t *obj)
 {
-    return was_read_remote(STM_SEGMENT->segment_base, obj,
-                           STM_SEGMENT->transaction_read_version);
+    return was_read_remote(STM_SEGMENT->segment_base, obj);
 }
 
 bool _stm_was_written(object_t *obj)
diff --git a/c7/stmgc.h b/c7/stmgc.h
--- a/c7/stmgc.h
+++ b/c7/stmgc.h
@@ -545,6 +545,9 @@
 object_t *stm_hashtable_read(object_t *, stm_hashtable_t *, uintptr_t key);
 void stm_hashtable_write(object_t *, stm_hashtable_t *, uintptr_t key,
                          object_t *nvalue, stm_thread_local_t *);
+long stm_hashtable_length_upper_bound(stm_hashtable_t *);
+long stm_hashtable_list(object_t *, stm_hashtable_t *,
+                        stm_hashtable_entry_t **results);
 extern uint32_t stm_hashtable_entry_userdata;
 void stm_hashtable_tracefn(stm_hashtable_t *, void (object_t **));
 
diff --git a/c7/test/support.py b/c7/test/support.py
--- a/c7/test/support.py
+++ b/c7/test/support.py
@@ -167,17 +167,23 @@
 void (*stmcb_finalizer)(object_t *);
 
 typedef struct stm_hashtable_s stm_hashtable_t;
+typedef ... stm_hashtable_entry_t;
 stm_hashtable_t *stm_hashtable_create(void);
 void stm_hashtable_free(stm_hashtable_t *);
 bool _check_hashtable_read(object_t *, stm_hashtable_t *, uintptr_t key);
 object_t *hashtable_read_result;
 bool _check_hashtable_write(object_t *, stm_hashtable_t *, uintptr_t key,
                             object_t *nvalue, stm_thread_local_t *tl);
+long stm_hashtable_length_upper_bound(stm_hashtable_t *);
+long stm_hashtable_list(object_t *, stm_hashtable_t *,
+                        stm_hashtable_entry_t **results);
 uint32_t stm_hashtable_entry_userdata;
 void stm_hashtable_tracefn(stm_hashtable_t *, void (object_t **));
 
 void _set_hashtable(object_t *obj, stm_hashtable_t *h);
 stm_hashtable_t *_get_hashtable(object_t *obj);
+uintptr_t _get_entry_index(stm_hashtable_entry_t *entry);
+object_t *_get_entry_object(stm_hashtable_entry_t *entry);
 """)
 
 
@@ -308,6 +314,18 @@
     return *(stm_hashtable_t *TLPREFIX *)field_addr;
 }
 
+uintptr_t _get_entry_index(stm_hashtable_entry_t *entry)
+{
+    stm_read((object_t *)entry);
+    return entry->index;
+}
+
+object_t *_get_entry_object(stm_hashtable_entry_t *entry)
+{
+    stm_read((object_t *)entry);
+    return entry->object;
+}
+
 void _set_ptr(object_t *obj, int n, object_t *v)
 {
     long nrefs = (long)((myobj_t*)obj)->type_id - 421420;
diff --git a/c7/test/test_hashtable.py b/c7/test/test_hashtable.py
--- a/c7/test/test_hashtable.py
+++ b/c7/test/test_hashtable.py
@@ -16,6 +16,24 @@
     if res:
         raise Conflict
 
+def ht_length_upper_bound(o):
+    h = get_hashtable(o)
+    return lib.stm_hashtable_length_upper_bound(h)
+
+def htitems(o):
+    h = get_hashtable(o)
+    upper_bound = lib.stm_hashtable_length_upper_bound(h)
+    entries = ffi.new("stm_hashtable_entry_t *[]", upper_bound)
+    count = lib.stm_hashtable_list(o, h, entries)
+    assert count <= upper_bound
+    return [(lib._get_entry_index(entries[i]),
+             lib._get_entry_object(entries[i])) for i in range(count)]
+
+def htlen(o):
+    h = get_hashtable(o)
+    count = lib.stm_hashtable_list(o, h, ffi.NULL)
+    return count
+
 
 class BaseTestHashtable(BaseTest):
 
@@ -236,6 +254,36 @@
         assert htget(h, 11) != ffi.NULL
         assert htget(h, 12) != ffi.NULL
 
+    def test_list_1(self):
+        self.start_transaction()
+        h = self.allocate_hashtable()
+        tl0 = self.tls[self.current_thread]
+        for i in range(32):
+            assert ht_length_upper_bound(h) == i
+            htset(h, 19 ^ i, stm_allocate(32), tl0)
+        assert ht_length_upper_bound(h) == 32
+
+    def test_list_2(self):
+        self.start_transaction()
+        h = self.allocate_hashtable()
+        tl0 = self.tls[self.current_thread]
+        expected = []
+        for i in range(29):
+            lp = stm_allocate(32)
+            htset(h, 19 ^ i, lp, tl0)
+            expected.append((19 ^ i, lp))
+        lst = htitems(h)
+        assert len(lst) == 29
+        assert sorted(lst) == sorted(expected)
+
+    def test_list_3(self):
+        self.start_transaction()
+        h = self.allocate_hashtable()
+        tl0 = self.tls[self.current_thread]
+        for i in range(29):
+            htset(h, 19 ^ i, stm_allocate(32), tl0)
+        assert htlen(h) == 29
+
 
 class TestRandomHashtable(BaseTestHashtable):
 
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to