Author: Remi Meier <remi.me...@gmail.com>
Branch: c8-reshare-pages
Changeset: r2073:e08f018e54b8
Date: 2017-06-14 15:32 +0200
http://bitbucket.org/pypy/stmgc/changeset/e08f018e54b8/

Log:    understand and document what this branch is doing

        A try to write down the logic of the page resharing approach while
        also improving the code.

diff --git a/c8/doc/page-resharing.md b/c8/doc/page-resharing.md
new file mode 100644
--- /dev/null
+++ b/c8/doc/page-resharing.md
@@ -0,0 +1,79 @@
+
+
+# Page Resharing #
+
+The idea of page resharing is that, after a while of running a multi-threaded
+program, all `S` segments have read most pages of the heap. Thus, nearly all of
+the heap memory is duplicated `S` times. While very wasteful, such duplication
+also negatively affects the performance of the validation operation. During
+validation, all committed changes need to be imported for all accessible pages
+in a segment. Thus, if all pages are accessible, all changes need to be copied
+in. Instead, it is probably better to sometimes mark pages inaccessible again 
if
+they are rarely accessed in that segment, and we therefore don't need to import
+changes for these pages; lessening the work done by the validation operation.
+
+
+## Situation without Resharing ##
+
+Pages can only be ACCESSIBLE (readable *and* writable) or INACCESSIBLE 
(neither)
+in a segment. Seg0's pages are always ACCESSIBLE.
+
+New (old) objects get allocated during minor GCs in pages of the current
+segment. On commit, these objects get copied to seg0 and to all segments in
+which that page is also ACCESSIBLE. 
+
+Whenever we access an obj (reading *or* writing) and the obj's page is not
+ACCESSIBLE yet, we get a signal and transition the page from INACCESSIBLE to
+ACCESSIBLE. Other segments are unaffected.
+
+
+## Situation with Resharing ##
+
+Pages can be NOACC, RO, ACC. 
+
+`RO` provides the revision of seg0. Whenever a TX makes a page `ACC` while 
there
+are `RO` pages around, make all `RO` pages `NOACC` in all segments. This eager
+approach means that we do not need all privatization-locks (write) in many
+places where we would otherwise need a `RO->NOACC` transition. Also, we expect
+the `RO` pages to go out-of-date when we commit anyway (soon).
+
+*INVARIANT*: whenever a segment has a `RO` page, that page has the same content
+as seg0's version of that page.
+
+*INVARIANT*: if there are `RO` pages around, no segment has the page `ACC`
+
+*PROPERTY I*: Once there page is `ACC`, it stays `ACC` until major GC; no `RO` 
can
+exist until major GC reshares (`ACC -> RO`) and makes seg0 up-to-date.
+
+*PROPERTY II*: Validation will never try to import into `RO` pages, since (I)
+guarantees that it wouldn't be a `RO` anymore if there was a change to import.
+
+
+In signal handler:
+
+    if read or write:
+      if is `RO`:
+        `RO -> ACC` (and `RO -> NOACC` for all others)
+      else if is `NOACC`:
+        if !is_write and noone has `ACC`:
+          `NOACC -> RO`
+        else:
+          `NOACC -> ACC`
+
+On validate: always imports into `ACC`, into `RO` would be a bug.
+
+During major GC:
+
+ 1. Validation of seg0: gets all changes; any `RO` views still around means 
that
+    there was *no change* in those pages, so the views stay valid.
+ 2. All other segments validate their `ACC` pages; again `RO` pages *cannot*
+    have changes that need importing.
+ 3. While tracing modified objs and overflow objs, remember pages with
+    modifications. These *must not* change from `ACC` to `RO`.
+ 4. Loop over some pages (resharing can be distributed over several major GCs),
+    and do `ACC -> RO` for all segments iff the previous step shows that to be
+    valid. After that, the INVARIANTs need to hold.
+ 
+
+
+
diff --git a/c8/stm/core.c b/c8/stm/core.c
--- a/c8/stm/core.c
+++ b/c8/stm/core.c
@@ -67,8 +67,10 @@
         /* never import anything into READONLY pages */
         assert(get_page_status_in(my_segnum, current_page_num) != 
PAGE_READONLY);
 
-        if (pagenum == -1) {
-            if (get_page_status_in(my_segnum, current_page_num) != 
PAGE_ACCESSIBLE)
+        if (pagenum == -1UL) {
+            assert(IMPLY(my_segnum == 0,
+                         get_page_status_in(my_segnum, current_page_num) == 
PAGE_ACCESSIBLE));
+            if (get_page_status_in(my_segnum, current_page_num) == 
PAGE_NO_ACCESS)
                 continue;
         } else if (pagenum != current_page_num) {
             continue;
@@ -793,7 +795,7 @@
 }
 
 
-static void touch_all_pages_of_obj(object_t *obj, size_t obj_size)
+static void make_all_pages_of_obj_accessible(object_t *obj, size_t obj_size)
 {
     /* XXX: make this function not needed */
     int my_segnum = STM_SEGMENT->segment_num;
@@ -806,7 +808,7 @@
         end_page = (((uintptr_t)obj) + obj_size - 1) / 4096UL;
     }
 
-    dprintf(("touch_all_pages_of_obj(%p, %lu): %ld-%ld\n",
+    dprintf(("make_all_pages_of_obj_accessible(%p, %lu): %ld-%ld\n",
              obj, obj_size, first_page, end_page));
 
     acquire_privatization_lock(STM_SEGMENT->segment_num);
@@ -816,8 +818,8 @@
 
         if (get_page_status_in(my_segnum, page) != PAGE_ACCESSIBLE) {
             release_privatization_lock(STM_SEGMENT->segment_num);
-            /* emulate pagefault -> PAGE_ACCESSIBLE/READONLY: */
-            handle_segfault_in_page(page, false);
+            /* emulate pagefault -> PAGE_ACCESSIBLE: */
+            handle_segfault_in_page(page, true);
             volatile char *dummy = REAL_ADDRESS(STM_SEGMENT->segment_base, 
page * 4096UL);
             *dummy = *dummy;            /* force segfault (incl. writing) */
             acquire_privatization_lock(STM_SEGMENT->segment_num);
@@ -863,7 +865,8 @@
         realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
         obj_size = stmcb_size_rounded_up((struct object_s *)realobj);
 
-        touch_all_pages_of_obj(obj, obj_size);
+        /* ACCESSIBLE, not READONLY since we need to write flags into the obj 
*/
+        make_all_pages_of_obj_accessible(obj, obj_size);
     }
 
     if (mark_card) {
diff --git a/c8/stm/core.h b/c8/stm/core.h
--- a/c8/stm/core.h
+++ b/c8/stm/core.h
@@ -302,7 +302,7 @@
 static stm_thread_local_t *abort_with_mutex_no_longjmp(void);
 static void abort_data_structures_from_segment_num(int segment_num);
 
-static void touch_all_pages_of_obj(object_t *obj, size_t obj_size);
+static void make_all_pages_of_obj_accessible(object_t *obj, size_t obj_size);
 
 static void synchronize_object_enqueue(object_t *obj);
 static void synchronize_objects_flush(void);
diff --git a/c8/stm/gcpage.c b/c8/stm/gcpage.c
--- a/c8/stm/gcpage.c
+++ b/c8/stm/gcpage.c
@@ -249,6 +249,7 @@
             count++;
         }
     }
+    _assert_page_status_invariants(pagenum);
 
     return count;
 }
@@ -278,7 +279,7 @@
             fprintf(stderr, "COLLECT_HINTS_ONLY\n");
 
             /* XXX: since major GC also makes sure that currently modified 
objs are
-               "modified recently", maybe don't need to do it in 
touch_all_pages_of_obj() */
+               "modified recently", maybe don't need to do it in 
make_all_pages_of_obj_accessible() */
             return;
         }
     }
diff --git a/c8/stm/nursery.c b/c8/stm/nursery.c
--- a/c8/stm/nursery.c
+++ b/c8/stm/nursery.c
@@ -671,7 +671,7 @@
     o->stm_flags = 0;
     /* make all pages of 'o' accessible as synchronize_obj_flush() in minor
        collections assumes all young objs are fully accessible. */
-    touch_all_pages_of_obj(o, size_rounded_up);
+    make_all_pages_of_obj_accessible(o, size_rounded_up);
 #endif
     return o;
 }
diff --git a/c8/stm/pages.h b/c8/stm/pages.h
--- a/c8/stm/pages.h
+++ b/c8/stm/pages.h
@@ -157,3 +157,41 @@
     ps->by_segment &= ~(0b11UL << seg_shift); /* clear */
     ps->by_segment |= status << seg_shift; /* set */
 }
+
+__attribute__((unused))
+static void _debug_page_status(uintptr_t pagenum)
+{
+    for (long i = 0; i < NB_SEGMENTS; i++) {
+        uint8_t status = get_page_status_in(i, pagenum);
+        switch (status) {
+        case PAGE_NO_ACCESS:
+            fprintf(stderr, "in seg %ld: NO_ACC\n", i);
+            break;
+        case PAGE_READONLY:
+            fprintf(stderr, "in seg %ld: RO\n", i);
+            break;
+        case PAGE_ACCESSIBLE:
+            fprintf(stderr, "in seg %ld: ACC\n", i);
+            break;
+        }
+    }
+}
+
+__attribute__((unused))
+static void _assert_page_status_invariants(uintptr_t pagenum)
+{
+#ifndef NDEBUG
+    bool has_ro = false;
+    bool has_acc = false;
+    for (long i = 1; i < NB_SEGMENTS; i++) {
+        uint8_t status = get_page_status_in(i, pagenum);
+        has_ro |= (status == PAGE_READONLY);
+        has_acc |= (status == PAGE_ACCESSIBLE);
+    }
+
+    if (has_ro && has_acc) {
+        _debug_page_status(pagenum);
+        assert(false);
+    }
+#endif
+}
diff --git a/c8/stm/signal_handler.c b/c8/stm/signal_handler.c
--- a/c8/stm/signal_handler.c
+++ b/c8/stm/signal_handler.c
@@ -5,7 +5,6 @@
 
 
 
-
 static void setup_signal_handler(void)
 {
     struct sigaction act;
@@ -61,7 +60,7 @@
 }
 
 
-static void readonly_to_accessible(int my_segnum, uintptr_t pagenum)
+static void any_to_accessible(int my_segnum, uintptr_t pagenum)
 {
     /* make our page write-ready */
     page_mark_accessible(my_segnum, pagenum);
@@ -91,22 +90,55 @@
 
     long i;
     int my_segnum = STM_SEGMENT->segment_num;
+    uint8_t page_status = get_page_status_in(my_segnum, pagenum);
 
-    uint8_t page_status = get_page_status_in(my_segnum, pagenum);
     assert(page_status == PAGE_NO_ACCESS
            || page_status == PAGE_READONLY);
+    _assert_page_status_invariants(pagenum);
 
-
-    //is_write=false;
-    if (page_status == PAGE_READONLY || is_write) {
-        dprintf(("SHORTCUT\n"));
-        readonly_to_accessible(my_segnum, pagenum);
-    }
     if (page_status == PAGE_READONLY) {
+        /* RO -> ACC */
+        assert(is_write);       /* should only fail if linux kernel changed */
+        any_to_accessible(my_segnum, pagenum);
         ro_to_acc++;
 
+        /* if was RO, page already has the right contents */
+        _assert_page_status_invariants(pagenum);
         release_all_privatization_locks();
         return;
+
+    }
+
+    assert(page_status == PAGE_NO_ACCESS);
+
+    /* if this is just a read-access, try to get a RO view: */
+    if (!is_write) {
+        bool acc_exists = false;
+        for (i = 1; i < NB_SEGMENTS; i++) {
+            if (i == my_segnum)
+                continue;
+
+            if (get_page_status_in(i, pagenum) == PAGE_ACCESSIBLE) {
+                acc_exists = true;
+                break;
+            }
+        }
+
+        if (!acc_exists) {
+            /* if there is no ACC version around, it means noone ever had that 
page
+             * ACC since the last major GC -> seg0 has the most current 
revision and
+             * we can get a RO of that. (of course only if this is not a
+             * write-access anyway) */
+
+            /* this case could be avoided by making all NO_ACCESS to READONLY
+               when resharing pages (XXX: better?).
+               We may go from NO_ACCESS->READONLY->ACCESSIBLE */
+            dprintf((" > make a previously NO_ACCESS page READONLY\n"));
+            page_mark_readonly(my_segnum, pagenum);
+            _assert_page_status_invariants(pagenum);
+            release_all_privatization_locks();
+            return;
+        }
     }
 
     /* find a suitable page to copy from in other segments:
@@ -116,26 +148,23 @@
      * Note: simply finding the most recent revision would be a conservative 
strategy, but
      *       requires going back in time more often (see below)
      */
+
+    /* special case: if there are RO versions around, we want to copy from 
seg0,
+     * since we make RO -> NOACC below before we copy (which wouldn't work). */
     int copy_from_segnum = -1;
     uint64_t copy_from_rev = 0;
     uint64_t target_rev = STM_PSEGMENT->last_commit_log_entry->rev_num;
-    bool was_readonly = false;
     for (i = 1; i < NB_SEGMENTS; i++) {
         if (i == my_segnum)
             continue;
 
-        if (!was_readonly && get_page_status_in(i, pagenum) == PAGE_READONLY) {
-            was_readonly = true;
-            break;
-        }
-
         struct stm_commit_log_entry_s *log_entry;
         log_entry = get_priv_segment(i)->last_commit_log_entry;
 
         /* - if not found anything, initialise copy_from_rev
          * - else if target_rev is higher than everything we found, find 
newest among them
          * - else: find revision that is as close to target_rev as possible    
     */
-        bool accessible = get_page_status_in(i, pagenum) != PAGE_NO_ACCESS;
+        bool accessible = get_page_status_in(i, pagenum) == PAGE_ACCESSIBLE;
         bool uninit = copy_from_segnum == -1;
         bool find_most_recent = copy_from_rev < target_rev && 
log_entry->rev_num > copy_from_rev;
         bool find_closest = copy_from_rev >= target_rev && (
@@ -150,29 +179,19 @@
     }
     OPT_ASSERT(copy_from_segnum != my_segnum);
 
-    if (was_readonly) {
-        assert(page_status == PAGE_NO_ACCESS);
-        /* this case could be avoided by making all NO_ACCESS to READONLY
-           when resharing pages (XXX: better?).
-           We may go from NO_ACCESS->READONLY->ACCESSIBLE on write with
-           2 SIGSEGV in a row.*/
-        dprintf((" > make a previously NO_ACCESS page READONLY\n"));
-        page_mark_readonly(my_segnum, pagenum);
 
-        release_all_privatization_locks();
-        return;
-    }
-
-    /* make our page write-ready */
-    if (!is_write) // is_write -> already marked accessible above
-        page_mark_accessible(my_segnum, pagenum);
+    /* make our page write-ready and reconstruct contents */
+    any_to_accessible(my_segnum, pagenum);
+    _assert_page_status_invariants(pagenum);
 
     /* account for this page now: XXX */
     /* increment_total_allocated(4096); */
 
     if (copy_from_segnum == -1) {
         /* this page is only accessible in the sharing segment seg0 so far (new
-           allocation). We can thus simply mark it accessible here. */
+           allocation). Or it was only in RO pages, which are the same as seg0.
+           We can thus simply mark it accessible here w/o undoing any
+           modifications or going back in time (seg0 is up-to-date). */
         pagecopy(get_virtual_page(my_segnum, pagenum),
                  get_virtual_page(0, pagenum));
         release_all_privatization_locks();
diff --git a/c8/test/test_resharing.py b/c8/test/test_resharing.py
--- a/c8/test/test_resharing.py
+++ b/c8/test/test_resharing.py
@@ -118,6 +118,62 @@
         assert stm_get_page_status(p2) == PAGE_NO_ACCESS
 
 
+    def test_seg0_updated_on_majorgc(self):
+        self.start_transaction()
+        lp1 = stm_allocate(16)
+        stm_set_char(lp1, 'a')
+        self.push_roots([lp1,])
+        stm_minor_collect()
+        lp1, = self.pop_roots()
+        self.push_roots([lp1,])
+        self.commit_transaction()
+        p1 = stm_get_obj_pages(lp1)[0]
+
+        self.switch(1)
+        self.start_transaction()
+        # NOACC -> ACC
+        assert stm_get_page_status(p1) == PAGE_NO_ACCESS
+        assert stm_get_char(lp1) == 'a'
+        assert stm_get_page_status(p1) == PAGE_ACCESSIBLE
+        stm_set_char(lp1, 'b')
+        self.commit_transaction()
+
+        self.switch(2)
+        self.start_transaction()
+        # NOACC -> ACC
+        assert stm_get_page_status(p1) == PAGE_NO_ACCESS
+        assert stm_get_char(lp1) == 'b'
+        # stm_set_char(lp1, 'x')
+        assert stm_get_page_status(p1) == PAGE_ACCESSIBLE
+        # merging: unmodified ACC -> RO
+        stm_major_collect()
+        stm_major_collect()
+        stm_major_collect()
+        assert stm_get_page_status(p1) == PAGE_READONLY
+
+        self.switch(1)
+
+        self.start_transaction()
+        # RO stays RO
+        assert stm_get_page_status(p1) == PAGE_READONLY
+        assert stm_get_char(lp1) == 'b'
+        assert stm_get_page_status(p1) == PAGE_READONLY
+
+        self.switch(2)
+
+        # write makes RO -> ACC, all others RO->NO_ACC
+        assert stm_get_page_status(p1) == PAGE_READONLY
+        stm_set_char(lp1, 'x')
+        assert stm_get_page_status(p1) == PAGE_ACCESSIBLE
+
+        self.switch(1)
+
+        assert stm_get_page_status(p1) == PAGE_NO_ACCESS
+        stm_major_collect()
+        stm_major_collect()
+        stm_major_collect()
+        assert stm_get_page_status(p1) == PAGE_NO_ACCESS
+
 
 
     def test_resharing_more(self):
@@ -215,7 +271,7 @@
         # now wref is in NO_ACCESS page
         assert stm_get_page_status(page) == PAGE_NO_ACCESS
         assert stm_get_weakref(wref) == ffi.NULL
-        assert stm_get_page_status(page) == PAGE_ACCESSIBLE
+        assert stm_get_page_status(page) == PAGE_READONLY
 
     def test_weakref2(self):
         self.start_transaction()
@@ -244,4 +300,4 @@
         # now wref is in NO_ACCESS page
         assert stm_get_page_status(page) == PAGE_NO_ACCESS
         assert stm_get_weakref(wref) == ffi.NULL
-        assert stm_get_page_status(page) == PAGE_ACCESSIBLE
+        assert stm_get_page_status(page) == PAGE_READONLY
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to