Author: Remi Meier <remi.me...@gmail.com> Branch: c8-reshare-pages Changeset: r2073:e08f018e54b8 Date: 2017-06-14 15:32 +0200 http://bitbucket.org/pypy/stmgc/changeset/e08f018e54b8/
Log: understand and document what this branch is doing A try to write down the logic of the page resharing approach while also improving the code. diff --git a/c8/doc/page-resharing.md b/c8/doc/page-resharing.md new file mode 100644 --- /dev/null +++ b/c8/doc/page-resharing.md @@ -0,0 +1,79 @@ + + +# Page Resharing # + +The idea of page resharing is that, after a while of running a multi-threaded +program, all `S` segments have read most pages of the heap. Thus, nearly all of +the heap memory is duplicated `S` times. While very wasteful, such duplication +also negatively affects the performance of the validation operation. During +validation, all committed changes need to be imported for all accessible pages +in a segment. Thus, if all pages are accessible, all changes need to be copied +in. Instead, it is probably better to sometimes mark pages inaccessible again if +they are rarely accessed in that segment, and we therefore don't need to import +changes for these pages; lessening the work done by the validation operation. + + +## Situation without Resharing ## + +Pages can only be ACCESSIBLE (readable *and* writable) or INACCESSIBLE (neither) +in a segment. Seg0's pages are always ACCESSIBLE. + +New (old) objects get allocated during minor GCs in pages of the current +segment. On commit, these objects get copied to seg0 and to all segments in +which that page is also ACCESSIBLE. + +Whenever we access an obj (reading *or* writing) and the obj's page is not +ACCESSIBLE yet, we get a signal and transition the page from INACCESSIBLE to +ACCESSIBLE. Other segments are unaffected. + + +## Situation with Resharing ## + +Pages can be NOACC, RO, ACC. + +`RO` provides the revision of seg0. Whenever a TX makes a page `ACC` while there +are `RO` pages around, make all `RO` pages `NOACC` in all segments. This eager +approach means that we do not need all privatization-locks (write) in many +places where we would otherwise need a `RO->NOACC` transition. Also, we expect +the `RO` pages to go out-of-date when we commit anyway (soon). + +*INVARIANT*: whenever a segment has a `RO` page, that page has the same content +as seg0's version of that page. + +*INVARIANT*: if there are `RO` pages around, no segment has the page `ACC` + +*PROPERTY I*: Once there page is `ACC`, it stays `ACC` until major GC; no `RO` can +exist until major GC reshares (`ACC -> RO`) and makes seg0 up-to-date. + +*PROPERTY II*: Validation will never try to import into `RO` pages, since (I) +guarantees that it wouldn't be a `RO` anymore if there was a change to import. + + +In signal handler: + + if read or write: + if is `RO`: + `RO -> ACC` (and `RO -> NOACC` for all others) + else if is `NOACC`: + if !is_write and noone has `ACC`: + `NOACC -> RO` + else: + `NOACC -> ACC` + +On validate: always imports into `ACC`, into `RO` would be a bug. + +During major GC: + + 1. Validation of seg0: gets all changes; any `RO` views still around means that + there was *no change* in those pages, so the views stay valid. + 2. All other segments validate their `ACC` pages; again `RO` pages *cannot* + have changes that need importing. + 3. While tracing modified objs and overflow objs, remember pages with + modifications. These *must not* change from `ACC` to `RO`. + 4. Loop over some pages (resharing can be distributed over several major GCs), + and do `ACC -> RO` for all segments iff the previous step shows that to be + valid. After that, the INVARIANTs need to hold. + + + + diff --git a/c8/stm/core.c b/c8/stm/core.c --- a/c8/stm/core.c +++ b/c8/stm/core.c @@ -67,8 +67,10 @@ /* never import anything into READONLY pages */ assert(get_page_status_in(my_segnum, current_page_num) != PAGE_READONLY); - if (pagenum == -1) { - if (get_page_status_in(my_segnum, current_page_num) != PAGE_ACCESSIBLE) + if (pagenum == -1UL) { + assert(IMPLY(my_segnum == 0, + get_page_status_in(my_segnum, current_page_num) == PAGE_ACCESSIBLE)); + if (get_page_status_in(my_segnum, current_page_num) == PAGE_NO_ACCESS) continue; } else if (pagenum != current_page_num) { continue; @@ -793,7 +795,7 @@ } -static void touch_all_pages_of_obj(object_t *obj, size_t obj_size) +static void make_all_pages_of_obj_accessible(object_t *obj, size_t obj_size) { /* XXX: make this function not needed */ int my_segnum = STM_SEGMENT->segment_num; @@ -806,7 +808,7 @@ end_page = (((uintptr_t)obj) + obj_size - 1) / 4096UL; } - dprintf(("touch_all_pages_of_obj(%p, %lu): %ld-%ld\n", + dprintf(("make_all_pages_of_obj_accessible(%p, %lu): %ld-%ld\n", obj, obj_size, first_page, end_page)); acquire_privatization_lock(STM_SEGMENT->segment_num); @@ -816,8 +818,8 @@ if (get_page_status_in(my_segnum, page) != PAGE_ACCESSIBLE) { release_privatization_lock(STM_SEGMENT->segment_num); - /* emulate pagefault -> PAGE_ACCESSIBLE/READONLY: */ - handle_segfault_in_page(page, false); + /* emulate pagefault -> PAGE_ACCESSIBLE: */ + handle_segfault_in_page(page, true); volatile char *dummy = REAL_ADDRESS(STM_SEGMENT->segment_base, page * 4096UL); *dummy = *dummy; /* force segfault (incl. writing) */ acquire_privatization_lock(STM_SEGMENT->segment_num); @@ -863,7 +865,8 @@ realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj); obj_size = stmcb_size_rounded_up((struct object_s *)realobj); - touch_all_pages_of_obj(obj, obj_size); + /* ACCESSIBLE, not READONLY since we need to write flags into the obj */ + make_all_pages_of_obj_accessible(obj, obj_size); } if (mark_card) { diff --git a/c8/stm/core.h b/c8/stm/core.h --- a/c8/stm/core.h +++ b/c8/stm/core.h @@ -302,7 +302,7 @@ static stm_thread_local_t *abort_with_mutex_no_longjmp(void); static void abort_data_structures_from_segment_num(int segment_num); -static void touch_all_pages_of_obj(object_t *obj, size_t obj_size); +static void make_all_pages_of_obj_accessible(object_t *obj, size_t obj_size); static void synchronize_object_enqueue(object_t *obj); static void synchronize_objects_flush(void); diff --git a/c8/stm/gcpage.c b/c8/stm/gcpage.c --- a/c8/stm/gcpage.c +++ b/c8/stm/gcpage.c @@ -249,6 +249,7 @@ count++; } } + _assert_page_status_invariants(pagenum); return count; } @@ -278,7 +279,7 @@ fprintf(stderr, "COLLECT_HINTS_ONLY\n"); /* XXX: since major GC also makes sure that currently modified objs are - "modified recently", maybe don't need to do it in touch_all_pages_of_obj() */ + "modified recently", maybe don't need to do it in make_all_pages_of_obj_accessible() */ return; } } diff --git a/c8/stm/nursery.c b/c8/stm/nursery.c --- a/c8/stm/nursery.c +++ b/c8/stm/nursery.c @@ -671,7 +671,7 @@ o->stm_flags = 0; /* make all pages of 'o' accessible as synchronize_obj_flush() in minor collections assumes all young objs are fully accessible. */ - touch_all_pages_of_obj(o, size_rounded_up); + make_all_pages_of_obj_accessible(o, size_rounded_up); #endif return o; } diff --git a/c8/stm/pages.h b/c8/stm/pages.h --- a/c8/stm/pages.h +++ b/c8/stm/pages.h @@ -157,3 +157,41 @@ ps->by_segment &= ~(0b11UL << seg_shift); /* clear */ ps->by_segment |= status << seg_shift; /* set */ } + +__attribute__((unused)) +static void _debug_page_status(uintptr_t pagenum) +{ + for (long i = 0; i < NB_SEGMENTS; i++) { + uint8_t status = get_page_status_in(i, pagenum); + switch (status) { + case PAGE_NO_ACCESS: + fprintf(stderr, "in seg %ld: NO_ACC\n", i); + break; + case PAGE_READONLY: + fprintf(stderr, "in seg %ld: RO\n", i); + break; + case PAGE_ACCESSIBLE: + fprintf(stderr, "in seg %ld: ACC\n", i); + break; + } + } +} + +__attribute__((unused)) +static void _assert_page_status_invariants(uintptr_t pagenum) +{ +#ifndef NDEBUG + bool has_ro = false; + bool has_acc = false; + for (long i = 1; i < NB_SEGMENTS; i++) { + uint8_t status = get_page_status_in(i, pagenum); + has_ro |= (status == PAGE_READONLY); + has_acc |= (status == PAGE_ACCESSIBLE); + } + + if (has_ro && has_acc) { + _debug_page_status(pagenum); + assert(false); + } +#endif +} diff --git a/c8/stm/signal_handler.c b/c8/stm/signal_handler.c --- a/c8/stm/signal_handler.c +++ b/c8/stm/signal_handler.c @@ -5,7 +5,6 @@ - static void setup_signal_handler(void) { struct sigaction act; @@ -61,7 +60,7 @@ } -static void readonly_to_accessible(int my_segnum, uintptr_t pagenum) +static void any_to_accessible(int my_segnum, uintptr_t pagenum) { /* make our page write-ready */ page_mark_accessible(my_segnum, pagenum); @@ -91,22 +90,55 @@ long i; int my_segnum = STM_SEGMENT->segment_num; + uint8_t page_status = get_page_status_in(my_segnum, pagenum); - uint8_t page_status = get_page_status_in(my_segnum, pagenum); assert(page_status == PAGE_NO_ACCESS || page_status == PAGE_READONLY); + _assert_page_status_invariants(pagenum); - - //is_write=false; - if (page_status == PAGE_READONLY || is_write) { - dprintf(("SHORTCUT\n")); - readonly_to_accessible(my_segnum, pagenum); - } if (page_status == PAGE_READONLY) { + /* RO -> ACC */ + assert(is_write); /* should only fail if linux kernel changed */ + any_to_accessible(my_segnum, pagenum); ro_to_acc++; + /* if was RO, page already has the right contents */ + _assert_page_status_invariants(pagenum); release_all_privatization_locks(); return; + + } + + assert(page_status == PAGE_NO_ACCESS); + + /* if this is just a read-access, try to get a RO view: */ + if (!is_write) { + bool acc_exists = false; + for (i = 1; i < NB_SEGMENTS; i++) { + if (i == my_segnum) + continue; + + if (get_page_status_in(i, pagenum) == PAGE_ACCESSIBLE) { + acc_exists = true; + break; + } + } + + if (!acc_exists) { + /* if there is no ACC version around, it means noone ever had that page + * ACC since the last major GC -> seg0 has the most current revision and + * we can get a RO of that. (of course only if this is not a + * write-access anyway) */ + + /* this case could be avoided by making all NO_ACCESS to READONLY + when resharing pages (XXX: better?). + We may go from NO_ACCESS->READONLY->ACCESSIBLE */ + dprintf((" > make a previously NO_ACCESS page READONLY\n")); + page_mark_readonly(my_segnum, pagenum); + _assert_page_status_invariants(pagenum); + release_all_privatization_locks(); + return; + } } /* find a suitable page to copy from in other segments: @@ -116,26 +148,23 @@ * Note: simply finding the most recent revision would be a conservative strategy, but * requires going back in time more often (see below) */ + + /* special case: if there are RO versions around, we want to copy from seg0, + * since we make RO -> NOACC below before we copy (which wouldn't work). */ int copy_from_segnum = -1; uint64_t copy_from_rev = 0; uint64_t target_rev = STM_PSEGMENT->last_commit_log_entry->rev_num; - bool was_readonly = false; for (i = 1; i < NB_SEGMENTS; i++) { if (i == my_segnum) continue; - if (!was_readonly && get_page_status_in(i, pagenum) == PAGE_READONLY) { - was_readonly = true; - break; - } - struct stm_commit_log_entry_s *log_entry; log_entry = get_priv_segment(i)->last_commit_log_entry; /* - if not found anything, initialise copy_from_rev * - else if target_rev is higher than everything we found, find newest among them * - else: find revision that is as close to target_rev as possible */ - bool accessible = get_page_status_in(i, pagenum) != PAGE_NO_ACCESS; + bool accessible = get_page_status_in(i, pagenum) == PAGE_ACCESSIBLE; bool uninit = copy_from_segnum == -1; bool find_most_recent = copy_from_rev < target_rev && log_entry->rev_num > copy_from_rev; bool find_closest = copy_from_rev >= target_rev && ( @@ -150,29 +179,19 @@ } OPT_ASSERT(copy_from_segnum != my_segnum); - if (was_readonly) { - assert(page_status == PAGE_NO_ACCESS); - /* this case could be avoided by making all NO_ACCESS to READONLY - when resharing pages (XXX: better?). - We may go from NO_ACCESS->READONLY->ACCESSIBLE on write with - 2 SIGSEGV in a row.*/ - dprintf((" > make a previously NO_ACCESS page READONLY\n")); - page_mark_readonly(my_segnum, pagenum); - release_all_privatization_locks(); - return; - } - - /* make our page write-ready */ - if (!is_write) // is_write -> already marked accessible above - page_mark_accessible(my_segnum, pagenum); + /* make our page write-ready and reconstruct contents */ + any_to_accessible(my_segnum, pagenum); + _assert_page_status_invariants(pagenum); /* account for this page now: XXX */ /* increment_total_allocated(4096); */ if (copy_from_segnum == -1) { /* this page is only accessible in the sharing segment seg0 so far (new - allocation). We can thus simply mark it accessible here. */ + allocation). Or it was only in RO pages, which are the same as seg0. + We can thus simply mark it accessible here w/o undoing any + modifications or going back in time (seg0 is up-to-date). */ pagecopy(get_virtual_page(my_segnum, pagenum), get_virtual_page(0, pagenum)); release_all_privatization_locks(); diff --git a/c8/test/test_resharing.py b/c8/test/test_resharing.py --- a/c8/test/test_resharing.py +++ b/c8/test/test_resharing.py @@ -118,6 +118,62 @@ assert stm_get_page_status(p2) == PAGE_NO_ACCESS + def test_seg0_updated_on_majorgc(self): + self.start_transaction() + lp1 = stm_allocate(16) + stm_set_char(lp1, 'a') + self.push_roots([lp1,]) + stm_minor_collect() + lp1, = self.pop_roots() + self.push_roots([lp1,]) + self.commit_transaction() + p1 = stm_get_obj_pages(lp1)[0] + + self.switch(1) + self.start_transaction() + # NOACC -> ACC + assert stm_get_page_status(p1) == PAGE_NO_ACCESS + assert stm_get_char(lp1) == 'a' + assert stm_get_page_status(p1) == PAGE_ACCESSIBLE + stm_set_char(lp1, 'b') + self.commit_transaction() + + self.switch(2) + self.start_transaction() + # NOACC -> ACC + assert stm_get_page_status(p1) == PAGE_NO_ACCESS + assert stm_get_char(lp1) == 'b' + # stm_set_char(lp1, 'x') + assert stm_get_page_status(p1) == PAGE_ACCESSIBLE + # merging: unmodified ACC -> RO + stm_major_collect() + stm_major_collect() + stm_major_collect() + assert stm_get_page_status(p1) == PAGE_READONLY + + self.switch(1) + + self.start_transaction() + # RO stays RO + assert stm_get_page_status(p1) == PAGE_READONLY + assert stm_get_char(lp1) == 'b' + assert stm_get_page_status(p1) == PAGE_READONLY + + self.switch(2) + + # write makes RO -> ACC, all others RO->NO_ACC + assert stm_get_page_status(p1) == PAGE_READONLY + stm_set_char(lp1, 'x') + assert stm_get_page_status(p1) == PAGE_ACCESSIBLE + + self.switch(1) + + assert stm_get_page_status(p1) == PAGE_NO_ACCESS + stm_major_collect() + stm_major_collect() + stm_major_collect() + assert stm_get_page_status(p1) == PAGE_NO_ACCESS + def test_resharing_more(self): @@ -215,7 +271,7 @@ # now wref is in NO_ACCESS page assert stm_get_page_status(page) == PAGE_NO_ACCESS assert stm_get_weakref(wref) == ffi.NULL - assert stm_get_page_status(page) == PAGE_ACCESSIBLE + assert stm_get_page_status(page) == PAGE_READONLY def test_weakref2(self): self.start_transaction() @@ -244,4 +300,4 @@ # now wref is in NO_ACCESS page assert stm_get_page_status(page) == PAGE_NO_ACCESS assert stm_get_weakref(wref) == ffi.NULL - assert stm_get_page_status(page) == PAGE_ACCESSIBLE + assert stm_get_page_status(page) == PAGE_READONLY _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit