Author: Remi Meier <[email protected]>
Branch: c8-reshare-pages
Changeset: r2073:e08f018e54b8
Date: 2017-06-14 15:32 +0200
http://bitbucket.org/pypy/stmgc/changeset/e08f018e54b8/
Log: understand and document what this branch is doing
A try to write down the logic of the page resharing approach while
also improving the code.
diff --git a/c8/doc/page-resharing.md b/c8/doc/page-resharing.md
new file mode 100644
--- /dev/null
+++ b/c8/doc/page-resharing.md
@@ -0,0 +1,79 @@
+
+
+# Page Resharing #
+
+The idea of page resharing is that, after a while of running a multi-threaded
+program, all `S` segments have read most pages of the heap. Thus, nearly all of
+the heap memory is duplicated `S` times. While very wasteful, such duplication
+also negatively affects the performance of the validation operation. During
+validation, all committed changes need to be imported for all accessible pages
+in a segment. Thus, if all pages are accessible, all changes need to be copied
+in. Instead, it is probably better to sometimes mark pages inaccessible again
if
+they are rarely accessed in that segment, and we therefore don't need to import
+changes for these pages; lessening the work done by the validation operation.
+
+
+## Situation without Resharing ##
+
+Pages can only be ACCESSIBLE (readable *and* writable) or INACCESSIBLE
(neither)
+in a segment. Seg0's pages are always ACCESSIBLE.
+
+New (old) objects get allocated during minor GCs in pages of the current
+segment. On commit, these objects get copied to seg0 and to all segments in
+which that page is also ACCESSIBLE.
+
+Whenever we access an obj (reading *or* writing) and the obj's page is not
+ACCESSIBLE yet, we get a signal and transition the page from INACCESSIBLE to
+ACCESSIBLE. Other segments are unaffected.
+
+
+## Situation with Resharing ##
+
+Pages can be NOACC, RO, ACC.
+
+`RO` provides the revision of seg0. Whenever a TX makes a page `ACC` while
there
+are `RO` pages around, make all `RO` pages `NOACC` in all segments. This eager
+approach means that we do not need all privatization-locks (write) in many
+places where we would otherwise need a `RO->NOACC` transition. Also, we expect
+the `RO` pages to go out-of-date when we commit anyway (soon).
+
+*INVARIANT*: whenever a segment has a `RO` page, that page has the same content
+as seg0's version of that page.
+
+*INVARIANT*: if there are `RO` pages around, no segment has the page `ACC`
+
+*PROPERTY I*: Once there page is `ACC`, it stays `ACC` until major GC; no `RO`
can
+exist until major GC reshares (`ACC -> RO`) and makes seg0 up-to-date.
+
+*PROPERTY II*: Validation will never try to import into `RO` pages, since (I)
+guarantees that it wouldn't be a `RO` anymore if there was a change to import.
+
+
+In signal handler:
+
+ if read or write:
+ if is `RO`:
+ `RO -> ACC` (and `RO -> NOACC` for all others)
+ else if is `NOACC`:
+ if !is_write and noone has `ACC`:
+ `NOACC -> RO`
+ else:
+ `NOACC -> ACC`
+
+On validate: always imports into `ACC`, into `RO` would be a bug.
+
+During major GC:
+
+ 1. Validation of seg0: gets all changes; any `RO` views still around means
that
+ there was *no change* in those pages, so the views stay valid.
+ 2. All other segments validate their `ACC` pages; again `RO` pages *cannot*
+ have changes that need importing.
+ 3. While tracing modified objs and overflow objs, remember pages with
+ modifications. These *must not* change from `ACC` to `RO`.
+ 4. Loop over some pages (resharing can be distributed over several major GCs),
+ and do `ACC -> RO` for all segments iff the previous step shows that to be
+ valid. After that, the INVARIANTs need to hold.
+
+
+
+
diff --git a/c8/stm/core.c b/c8/stm/core.c
--- a/c8/stm/core.c
+++ b/c8/stm/core.c
@@ -67,8 +67,10 @@
/* never import anything into READONLY pages */
assert(get_page_status_in(my_segnum, current_page_num) !=
PAGE_READONLY);
- if (pagenum == -1) {
- if (get_page_status_in(my_segnum, current_page_num) !=
PAGE_ACCESSIBLE)
+ if (pagenum == -1UL) {
+ assert(IMPLY(my_segnum == 0,
+ get_page_status_in(my_segnum, current_page_num) ==
PAGE_ACCESSIBLE));
+ if (get_page_status_in(my_segnum, current_page_num) ==
PAGE_NO_ACCESS)
continue;
} else if (pagenum != current_page_num) {
continue;
@@ -793,7 +795,7 @@
}
-static void touch_all_pages_of_obj(object_t *obj, size_t obj_size)
+static void make_all_pages_of_obj_accessible(object_t *obj, size_t obj_size)
{
/* XXX: make this function not needed */
int my_segnum = STM_SEGMENT->segment_num;
@@ -806,7 +808,7 @@
end_page = (((uintptr_t)obj) + obj_size - 1) / 4096UL;
}
- dprintf(("touch_all_pages_of_obj(%p, %lu): %ld-%ld\n",
+ dprintf(("make_all_pages_of_obj_accessible(%p, %lu): %ld-%ld\n",
obj, obj_size, first_page, end_page));
acquire_privatization_lock(STM_SEGMENT->segment_num);
@@ -816,8 +818,8 @@
if (get_page_status_in(my_segnum, page) != PAGE_ACCESSIBLE) {
release_privatization_lock(STM_SEGMENT->segment_num);
- /* emulate pagefault -> PAGE_ACCESSIBLE/READONLY: */
- handle_segfault_in_page(page, false);
+ /* emulate pagefault -> PAGE_ACCESSIBLE: */
+ handle_segfault_in_page(page, true);
volatile char *dummy = REAL_ADDRESS(STM_SEGMENT->segment_base,
page * 4096UL);
*dummy = *dummy; /* force segfault (incl. writing) */
acquire_privatization_lock(STM_SEGMENT->segment_num);
@@ -863,7 +865,8 @@
realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj);
obj_size = stmcb_size_rounded_up((struct object_s *)realobj);
- touch_all_pages_of_obj(obj, obj_size);
+ /* ACCESSIBLE, not READONLY since we need to write flags into the obj
*/
+ make_all_pages_of_obj_accessible(obj, obj_size);
}
if (mark_card) {
diff --git a/c8/stm/core.h b/c8/stm/core.h
--- a/c8/stm/core.h
+++ b/c8/stm/core.h
@@ -302,7 +302,7 @@
static stm_thread_local_t *abort_with_mutex_no_longjmp(void);
static void abort_data_structures_from_segment_num(int segment_num);
-static void touch_all_pages_of_obj(object_t *obj, size_t obj_size);
+static void make_all_pages_of_obj_accessible(object_t *obj, size_t obj_size);
static void synchronize_object_enqueue(object_t *obj);
static void synchronize_objects_flush(void);
diff --git a/c8/stm/gcpage.c b/c8/stm/gcpage.c
--- a/c8/stm/gcpage.c
+++ b/c8/stm/gcpage.c
@@ -249,6 +249,7 @@
count++;
}
}
+ _assert_page_status_invariants(pagenum);
return count;
}
@@ -278,7 +279,7 @@
fprintf(stderr, "COLLECT_HINTS_ONLY\n");
/* XXX: since major GC also makes sure that currently modified
objs are
- "modified recently", maybe don't need to do it in
touch_all_pages_of_obj() */
+ "modified recently", maybe don't need to do it in
make_all_pages_of_obj_accessible() */
return;
}
}
diff --git a/c8/stm/nursery.c b/c8/stm/nursery.c
--- a/c8/stm/nursery.c
+++ b/c8/stm/nursery.c
@@ -671,7 +671,7 @@
o->stm_flags = 0;
/* make all pages of 'o' accessible as synchronize_obj_flush() in minor
collections assumes all young objs are fully accessible. */
- touch_all_pages_of_obj(o, size_rounded_up);
+ make_all_pages_of_obj_accessible(o, size_rounded_up);
#endif
return o;
}
diff --git a/c8/stm/pages.h b/c8/stm/pages.h
--- a/c8/stm/pages.h
+++ b/c8/stm/pages.h
@@ -157,3 +157,41 @@
ps->by_segment &= ~(0b11UL << seg_shift); /* clear */
ps->by_segment |= status << seg_shift; /* set */
}
+
+__attribute__((unused))
+static void _debug_page_status(uintptr_t pagenum)
+{
+ for (long i = 0; i < NB_SEGMENTS; i++) {
+ uint8_t status = get_page_status_in(i, pagenum);
+ switch (status) {
+ case PAGE_NO_ACCESS:
+ fprintf(stderr, "in seg %ld: NO_ACC\n", i);
+ break;
+ case PAGE_READONLY:
+ fprintf(stderr, "in seg %ld: RO\n", i);
+ break;
+ case PAGE_ACCESSIBLE:
+ fprintf(stderr, "in seg %ld: ACC\n", i);
+ break;
+ }
+ }
+}
+
+__attribute__((unused))
+static void _assert_page_status_invariants(uintptr_t pagenum)
+{
+#ifndef NDEBUG
+ bool has_ro = false;
+ bool has_acc = false;
+ for (long i = 1; i < NB_SEGMENTS; i++) {
+ uint8_t status = get_page_status_in(i, pagenum);
+ has_ro |= (status == PAGE_READONLY);
+ has_acc |= (status == PAGE_ACCESSIBLE);
+ }
+
+ if (has_ro && has_acc) {
+ _debug_page_status(pagenum);
+ assert(false);
+ }
+#endif
+}
diff --git a/c8/stm/signal_handler.c b/c8/stm/signal_handler.c
--- a/c8/stm/signal_handler.c
+++ b/c8/stm/signal_handler.c
@@ -5,7 +5,6 @@
-
static void setup_signal_handler(void)
{
struct sigaction act;
@@ -61,7 +60,7 @@
}
-static void readonly_to_accessible(int my_segnum, uintptr_t pagenum)
+static void any_to_accessible(int my_segnum, uintptr_t pagenum)
{
/* make our page write-ready */
page_mark_accessible(my_segnum, pagenum);
@@ -91,22 +90,55 @@
long i;
int my_segnum = STM_SEGMENT->segment_num;
+ uint8_t page_status = get_page_status_in(my_segnum, pagenum);
- uint8_t page_status = get_page_status_in(my_segnum, pagenum);
assert(page_status == PAGE_NO_ACCESS
|| page_status == PAGE_READONLY);
+ _assert_page_status_invariants(pagenum);
-
- //is_write=false;
- if (page_status == PAGE_READONLY || is_write) {
- dprintf(("SHORTCUT\n"));
- readonly_to_accessible(my_segnum, pagenum);
- }
if (page_status == PAGE_READONLY) {
+ /* RO -> ACC */
+ assert(is_write); /* should only fail if linux kernel changed */
+ any_to_accessible(my_segnum, pagenum);
ro_to_acc++;
+ /* if was RO, page already has the right contents */
+ _assert_page_status_invariants(pagenum);
release_all_privatization_locks();
return;
+
+ }
+
+ assert(page_status == PAGE_NO_ACCESS);
+
+ /* if this is just a read-access, try to get a RO view: */
+ if (!is_write) {
+ bool acc_exists = false;
+ for (i = 1; i < NB_SEGMENTS; i++) {
+ if (i == my_segnum)
+ continue;
+
+ if (get_page_status_in(i, pagenum) == PAGE_ACCESSIBLE) {
+ acc_exists = true;
+ break;
+ }
+ }
+
+ if (!acc_exists) {
+ /* if there is no ACC version around, it means noone ever had that
page
+ * ACC since the last major GC -> seg0 has the most current
revision and
+ * we can get a RO of that. (of course only if this is not a
+ * write-access anyway) */
+
+ /* this case could be avoided by making all NO_ACCESS to READONLY
+ when resharing pages (XXX: better?).
+ We may go from NO_ACCESS->READONLY->ACCESSIBLE */
+ dprintf((" > make a previously NO_ACCESS page READONLY\n"));
+ page_mark_readonly(my_segnum, pagenum);
+ _assert_page_status_invariants(pagenum);
+ release_all_privatization_locks();
+ return;
+ }
}
/* find a suitable page to copy from in other segments:
@@ -116,26 +148,23 @@
* Note: simply finding the most recent revision would be a conservative
strategy, but
* requires going back in time more often (see below)
*/
+
+ /* special case: if there are RO versions around, we want to copy from
seg0,
+ * since we make RO -> NOACC below before we copy (which wouldn't work). */
int copy_from_segnum = -1;
uint64_t copy_from_rev = 0;
uint64_t target_rev = STM_PSEGMENT->last_commit_log_entry->rev_num;
- bool was_readonly = false;
for (i = 1; i < NB_SEGMENTS; i++) {
if (i == my_segnum)
continue;
- if (!was_readonly && get_page_status_in(i, pagenum) == PAGE_READONLY) {
- was_readonly = true;
- break;
- }
-
struct stm_commit_log_entry_s *log_entry;
log_entry = get_priv_segment(i)->last_commit_log_entry;
/* - if not found anything, initialise copy_from_rev
* - else if target_rev is higher than everything we found, find
newest among them
* - else: find revision that is as close to target_rev as possible
*/
- bool accessible = get_page_status_in(i, pagenum) != PAGE_NO_ACCESS;
+ bool accessible = get_page_status_in(i, pagenum) == PAGE_ACCESSIBLE;
bool uninit = copy_from_segnum == -1;
bool find_most_recent = copy_from_rev < target_rev &&
log_entry->rev_num > copy_from_rev;
bool find_closest = copy_from_rev >= target_rev && (
@@ -150,29 +179,19 @@
}
OPT_ASSERT(copy_from_segnum != my_segnum);
- if (was_readonly) {
- assert(page_status == PAGE_NO_ACCESS);
- /* this case could be avoided by making all NO_ACCESS to READONLY
- when resharing pages (XXX: better?).
- We may go from NO_ACCESS->READONLY->ACCESSIBLE on write with
- 2 SIGSEGV in a row.*/
- dprintf((" > make a previously NO_ACCESS page READONLY\n"));
- page_mark_readonly(my_segnum, pagenum);
- release_all_privatization_locks();
- return;
- }
-
- /* make our page write-ready */
- if (!is_write) // is_write -> already marked accessible above
- page_mark_accessible(my_segnum, pagenum);
+ /* make our page write-ready and reconstruct contents */
+ any_to_accessible(my_segnum, pagenum);
+ _assert_page_status_invariants(pagenum);
/* account for this page now: XXX */
/* increment_total_allocated(4096); */
if (copy_from_segnum == -1) {
/* this page is only accessible in the sharing segment seg0 so far (new
- allocation). We can thus simply mark it accessible here. */
+ allocation). Or it was only in RO pages, which are the same as seg0.
+ We can thus simply mark it accessible here w/o undoing any
+ modifications or going back in time (seg0 is up-to-date). */
pagecopy(get_virtual_page(my_segnum, pagenum),
get_virtual_page(0, pagenum));
release_all_privatization_locks();
diff --git a/c8/test/test_resharing.py b/c8/test/test_resharing.py
--- a/c8/test/test_resharing.py
+++ b/c8/test/test_resharing.py
@@ -118,6 +118,62 @@
assert stm_get_page_status(p2) == PAGE_NO_ACCESS
+ def test_seg0_updated_on_majorgc(self):
+ self.start_transaction()
+ lp1 = stm_allocate(16)
+ stm_set_char(lp1, 'a')
+ self.push_roots([lp1,])
+ stm_minor_collect()
+ lp1, = self.pop_roots()
+ self.push_roots([lp1,])
+ self.commit_transaction()
+ p1 = stm_get_obj_pages(lp1)[0]
+
+ self.switch(1)
+ self.start_transaction()
+ # NOACC -> ACC
+ assert stm_get_page_status(p1) == PAGE_NO_ACCESS
+ assert stm_get_char(lp1) == 'a'
+ assert stm_get_page_status(p1) == PAGE_ACCESSIBLE
+ stm_set_char(lp1, 'b')
+ self.commit_transaction()
+
+ self.switch(2)
+ self.start_transaction()
+ # NOACC -> ACC
+ assert stm_get_page_status(p1) == PAGE_NO_ACCESS
+ assert stm_get_char(lp1) == 'b'
+ # stm_set_char(lp1, 'x')
+ assert stm_get_page_status(p1) == PAGE_ACCESSIBLE
+ # merging: unmodified ACC -> RO
+ stm_major_collect()
+ stm_major_collect()
+ stm_major_collect()
+ assert stm_get_page_status(p1) == PAGE_READONLY
+
+ self.switch(1)
+
+ self.start_transaction()
+ # RO stays RO
+ assert stm_get_page_status(p1) == PAGE_READONLY
+ assert stm_get_char(lp1) == 'b'
+ assert stm_get_page_status(p1) == PAGE_READONLY
+
+ self.switch(2)
+
+ # write makes RO -> ACC, all others RO->NO_ACC
+ assert stm_get_page_status(p1) == PAGE_READONLY
+ stm_set_char(lp1, 'x')
+ assert stm_get_page_status(p1) == PAGE_ACCESSIBLE
+
+ self.switch(1)
+
+ assert stm_get_page_status(p1) == PAGE_NO_ACCESS
+ stm_major_collect()
+ stm_major_collect()
+ stm_major_collect()
+ assert stm_get_page_status(p1) == PAGE_NO_ACCESS
+
def test_resharing_more(self):
@@ -215,7 +271,7 @@
# now wref is in NO_ACCESS page
assert stm_get_page_status(page) == PAGE_NO_ACCESS
assert stm_get_weakref(wref) == ffi.NULL
- assert stm_get_page_status(page) == PAGE_ACCESSIBLE
+ assert stm_get_page_status(page) == PAGE_READONLY
def test_weakref2(self):
self.start_transaction()
@@ -244,4 +300,4 @@
# now wref is in NO_ACCESS page
assert stm_get_page_status(page) == PAGE_NO_ACCESS
assert stm_get_weakref(wref) == ffi.NULL
- assert stm_get_page_status(page) == PAGE_ACCESSIBLE
+ assert stm_get_page_status(page) == PAGE_READONLY
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit