Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r1274:34c8006bc1e2 Date: 2014-08-09 14:26 +0200 http://bitbucket.org/pypy/stmgc/changeset/34c8006bc1e2/
Log: Merge the branch 'card-marking'. Adds card-marking support similar to PyPy's, i.e. it's optional for large enough arrays, whereas we still have the regular per-object flag for all other objects. It only have an effect for the GC: for STM, we always mark the whole array as read or written, never some random slice of it. diff --git a/c7/TODO b/c7/TODO --- a/c7/TODO +++ b/c7/TODO @@ -1,8 +1,6 @@ - use small uniform gcpages -- write barrier for big arrays - - finalizers - the highest_overflow_number can overflow after 2**30 non-collect-time @@ -16,3 +14,13 @@ the unused pages away --- or maybe use consecutive addresses from the lowest ones from segment N, instead of the page corresponding to the page number in segment 0 (possibly a bit messy) + +- possibly messy too, but think about not using N+1 segments but only N + +- use a call/cc-style variant of setjmp/longjmp to avoid inevitable + transactions when we need to return + +- kill "atomic" and use regular lock elision + +- increase the memory limit, currently 2.5GB; this requires, apparently, + more fighting against LLVM bugs diff --git a/c7/demo/demo2.c b/c7/demo/demo2.c --- a/c7/demo/demo2.c +++ b/c7/demo/demo2.c @@ -43,7 +43,16 @@ n = (struct node_s*)obj; visit((object_t **)&n->next); } - +void stmcb_get_card_base_itemsize(struct object_s *obj, + uintptr_t offset_itemsize[2]) +{ + abort(); +} +void stmcb_trace_cards(struct object_s *obj, void visit(object_t **), + uintptr_t start, uintptr_t stop) +{ + abort(); +} void stmcb_commit_soon() {} static void expand_marker(char *base, uintptr_t odd_number, @@ -294,7 +303,7 @@ unregister_thread_local(); - stm_teardown(); + //stm_teardown(); return 0; } diff --git a/c7/stm/contention.c b/c7/stm/contention.c --- a/c7/stm/contention.c +++ b/c7/stm/contention.c @@ -194,7 +194,7 @@ /* tell the other to commit ASAP, since it causes aborts */ signal_other_to_commit_soon(contmgr.other_pseg); - dprintf(("abort in contention\n")); + dprintf(("abort in contention: kind %d\n", kind)); STM_SEGMENT->nursery_end = abort_category; marker_contention(kind, false, other_segment_num, obj); abort_with_mutex(); diff --git a/c7/stm/core.c b/c7/stm/core.c --- a/c7/stm/core.c +++ b/c7/stm/core.c @@ -40,26 +40,67 @@ #endif } -void _stm_write_slowpath(object_t *obj) +__attribute__((always_inline)) +static void write_slowpath_overflow_obj(object_t *obj, bool mark_card) +{ + /* An overflow object is an object from the same transaction, but + outside the nursery. More precisely, it is no longer young, + i.e. it comes from before the most recent minor collection. + */ + assert(STM_PSEGMENT->objects_pointing_to_nursery != NULL); + + assert(obj->stm_flags & GCFLAG_WRITE_BARRIER); + if (!mark_card) { + /* The basic case, with no card marking. We append the object + into 'objects_pointing_to_nursery', and remove the flag so + that the write_slowpath will not be called again until the + next minor collection. */ + if (obj->stm_flags & GCFLAG_CARDS_SET) { + /* if we clear this flag, we also need to clear the cards */ + _reset_object_cards(get_priv_segment(STM_SEGMENT->segment_num), + obj, CARD_CLEAR, false); + } + obj->stm_flags &= ~(GCFLAG_WRITE_BARRIER | GCFLAG_CARDS_SET); + LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj); + } + else { + /* Card marking. Don't remove GCFLAG_WRITE_BARRIER because we + need to come back to _stm_write_slowpath_card() for every + card to mark. Add GCFLAG_CARDS_SET. */ + assert(!(obj->stm_flags & GCFLAG_CARDS_SET)); + obj->stm_flags |= GCFLAG_CARDS_SET; + assert(STM_PSEGMENT->old_objects_with_cards); + LIST_APPEND(STM_PSEGMENT->old_objects_with_cards, obj); + } +} + +__attribute__((always_inline)) +static void write_slowpath_common(object_t *obj, bool mark_card) { assert(_seems_to_be_running_transaction()); assert(!_is_young(obj)); assert(obj->stm_flags & GCFLAG_WRITE_BARRIER); - /* is this an object from the same transaction, outside the nursery? */ - if ((obj->stm_flags & -GCFLAG_OVERFLOW_NUMBER_bit0) == - STM_PSEGMENT->overflow_number) { + uintptr_t base_lock_idx = get_write_lock_idx((uintptr_t)obj); - dprintf_test(("write_slowpath %p -> ovf obj_to_nurs\n", obj)); - obj->stm_flags &= ~GCFLAG_WRITE_BARRIER; - assert(STM_PSEGMENT->objects_pointing_to_nursery != NULL); - LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj); + if (IS_OVERFLOW_OBJ(STM_PSEGMENT, obj)) { + assert(write_locks[base_lock_idx] == 0); + write_slowpath_overflow_obj(obj, mark_card); return; } + /* Else, it's an old object and we need to privatise it. + Do a read-barrier now. Note that this must occur before the + safepoints that may be issued in write_write_contention_management(). + */ + stm_read(obj); - /* do a read-barrier now. Note that this must occur before the - safepoints that may be issued in write_write_contention_management(). */ - stm_read(obj); + /* Take the segment's own lock number */ + uint8_t lock_num = STM_PSEGMENT->write_lock_num; + + /* If CARDS_SET, we entered here at least once already, so we + already own the write_lock */ + assert(IMPLY(obj->stm_flags & GCFLAG_CARDS_SET, + write_locks[base_lock_idx] == lock_num)); /* XXX XXX XXX make the logic of write-locking objects optional! */ @@ -68,16 +109,14 @@ 'modified_old_objects' (but, because it had GCFLAG_WRITE_BARRIER, not in 'objects_pointing_to_nursery'). We'll detect this case by finding that we already own the write-lock. */ - uintptr_t lock_idx = (((uintptr_t)obj) >> 4) - WRITELOCK_START; - uint8_t lock_num = STM_PSEGMENT->write_lock_num; - assert(lock_idx < sizeof(write_locks)); + retry: - if (write_locks[lock_idx] == 0) { + if (write_locks[base_lock_idx] == 0) { /* A lock to prevent reading garbage from lookup_other_thread_recorded_marker() */ acquire_marker_lock(STM_SEGMENT->segment_base); - if (UNLIKELY(!__sync_bool_compare_and_swap(&write_locks[lock_idx], + if (UNLIKELY(!__sync_bool_compare_and_swap(&write_locks[base_lock_idx], 0, lock_num))) { release_marker_lock(STM_SEGMENT->segment_base); goto retry; @@ -119,16 +158,15 @@ realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj); obj_size = stmcb_size_rounded_up((struct object_s *)realobj); - /* that's the page *following* the last page with the object */ - end_page = (((uintptr_t)obj) + obj_size + 4095) / 4096UL; + /* get the last page containing data from the object */ + end_page = (((uintptr_t)obj) + obj_size - 1) / 4096UL; - for (i = first_page; i < end_page; i++) { + for (i = first_page; i <= end_page; i++) { page_privatize(i); } } } - else if (write_locks[lock_idx] == lock_num) { - OPT_ASSERT(STM_PSEGMENT->objects_pointing_to_nursery != NULL); + else if (write_locks[base_lock_idx] == lock_num) { #ifdef STM_TESTS bool found = false; LIST_FOREACH_R(STM_PSEGMENT->modified_old_objects, object_t *, @@ -139,17 +177,10 @@ else { /* call the contention manager, and then retry (unless we were aborted). */ - write_write_contention_management(lock_idx, obj); + write_write_contention_management(base_lock_idx, obj); goto retry; } - /* A common case for write_locks[] that was either 0 or lock_num: - we need to add the object to 'objects_pointing_to_nursery' - if there is such a list. */ - if (STM_PSEGMENT->objects_pointing_to_nursery != NULL) { - dprintf_test(("write_slowpath %p -> old obj_to_nurs\n", obj)); - LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj); - } /* check that we really have a private page */ assert(is_private_page(STM_SEGMENT->segment_num, @@ -158,16 +189,121 @@ /* check that so far all copies of the object have the flag */ check_flag_write_barrier(obj); - /* remove GCFLAG_WRITE_BARRIER, but only if we succeeded in - getting the write-lock */ assert(obj->stm_flags & GCFLAG_WRITE_BARRIER); - obj->stm_flags &= ~GCFLAG_WRITE_BARRIER; + if (!mark_card) { + /* A common case for write_locks[] that was either 0 or lock_num: + we need to add the object to the appropriate list if there is one. + */ + if (STM_PSEGMENT->objects_pointing_to_nursery != NULL) { + dprintf_test(("write_slowpath %p -> old obj_to_nurs\n", obj)); + LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, obj); + } + + if (obj->stm_flags & GCFLAG_CARDS_SET) { + /* if we clear this flag, we have to tell sync_old_objs that + everything needs to be synced */ + _reset_object_cards(get_priv_segment(STM_SEGMENT->segment_num), + obj, CARD_MARKED_OLD, true); /* mark all */ + } + + /* remove GCFLAG_WRITE_BARRIER if we succeeded in getting the base + write-lock (not for card marking). */ + obj->stm_flags &= ~(GCFLAG_WRITE_BARRIER | GCFLAG_CARDS_SET); + } + else { + /* don't remove WRITE_BARRIER, but add CARDS_SET */ + obj->stm_flags |= GCFLAG_CARDS_SET; + assert(STM_PSEGMENT->old_objects_with_cards); + LIST_APPEND(STM_PSEGMENT->old_objects_with_cards, obj); + } /* for sanity, check again that all other segment copies of this object still have the flag (so privatization worked) */ check_flag_write_barrier(obj); } +void _stm_write_slowpath(object_t *obj) +{ + write_slowpath_common(obj, /*mark_card=*/false); +} + +static bool obj_should_use_cards(object_t *obj) +{ + struct object_s *realobj = (struct object_s *) + REAL_ADDRESS(STM_SEGMENT->segment_base, obj); + size_t size = stmcb_size_rounded_up(realobj); + + return (size >= _STM_MIN_CARD_OBJ_SIZE); +} + +char _stm_write_slowpath_card_extra(object_t *obj) +{ + /* the PyPy JIT calls this function directly if it finds that an + array doesn't have the GCFLAG_CARDS_SET */ + bool mark_card = obj_should_use_cards(obj); + write_slowpath_common(obj, mark_card); + return mark_card; +} + +long _stm_write_slowpath_card_extra_base(void) +{ + /* for the PyPy JIT: _stm_write_slowpath_card_extra_base[obj >> 4] + is the byte that must be set to CARD_MARKED. The logic below + does the same, but more explicitly. */ + return (((long)write_locks) - WRITELOCK_START + 1) + + 0x4000000000000000L; // <- workaround for a clang bug :-( +} + +void _stm_write_slowpath_card(object_t *obj, uintptr_t index) +{ + /* If CARDS_SET is not set so far, issue a normal write barrier. + If the object is large enough, ask it to set up the object for + card marking instead. + */ + if (!(obj->stm_flags & GCFLAG_CARDS_SET)) { + char mark_card = _stm_write_slowpath_card_extra(obj); + if (!mark_card) + return; + } + + dprintf_test(("write_slowpath_card %p -> index:%lu\n", + obj, index)); + + /* We reach this point if we have to mark the card. + */ + assert(obj->stm_flags & GCFLAG_WRITE_BARRIER); + assert(obj->stm_flags & GCFLAG_CARDS_SET); + assert(!(obj->stm_flags & GCFLAG_SMALL_UNIFORM)); /* not supported/tested */ + +#ifndef NDEBUG + struct object_s *realobj = (struct object_s *) + REAL_ADDRESS(STM_SEGMENT->segment_base, obj); + size_t size = stmcb_size_rounded_up(realobj); + /* we need at least one lock in addition to the STM-reserved object + write-lock */ + assert(size >= 32); + /* the 'index' must be in range(length-of-obj), but we don't have + a direct way to know the length. We know that it is smaller + than the size in bytes. */ + assert(index < size); +#endif + + /* Write into the card's lock. This is used by the next minor + collection to know what parts of the big object may have changed. + We already own the object here or it is an overflow obj. */ + uintptr_t base_lock_idx = get_write_lock_idx((uintptr_t)obj); + uintptr_t card_lock_idx = base_lock_idx + get_index_to_card_index(index); + write_locks[card_lock_idx] = CARD_MARKED; + + /* More debug checks */ + dprintf(("mark %p index %lu, card:%lu with %d\n", + obj, index, get_index_to_card_index(index), CARD_MARKED)); + assert(IMPLY(IS_OVERFLOW_OBJ(STM_PSEGMENT, obj), + write_locks[base_lock_idx] == 0)); + assert(IMPLY(!IS_OVERFLOW_OBJ(STM_PSEGMENT, obj), + write_locks[base_lock_idx] == STM_PSEGMENT->write_lock_num)); +} + static void reset_transaction_read_version(void) { /* force-reset all read markers to 0 */ @@ -284,6 +420,8 @@ ({ if (was_read_remote(remote_base, item, remote_version)) { /* A write-read conflict! */ + dprintf(("write-read conflict on %p, our seg: %d, other: %ld\n", + item, STM_SEGMENT->segment_num, i)); if (write_read_contention_management(i, item)) { /* If we reach this point, we didn't abort, but we had to wait for the other thread to commit. If we @@ -355,7 +493,214 @@ } } -static void synchronize_object_now(object_t *obj) +static void _page_wise_synchronize_object_now(object_t *obj) +{ + uintptr_t start = (uintptr_t)obj; + uintptr_t first_page = start / 4096UL; + + char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj); + ssize_t obj_size = stmcb_size_rounded_up((struct object_s *)realobj); + assert(obj_size >= 16); + uintptr_t end = start + obj_size; + uintptr_t last_page = (end - 1) / 4096UL; + long i, myself = STM_SEGMENT->segment_num; + + for (; first_page <= last_page; first_page++) { + + uintptr_t copy_size; + if (first_page == last_page) { + /* this is the final fragment */ + copy_size = end - start; + } + else { + /* this is a non-final fragment, going up to the + page's end */ + copy_size = 4096 - (start & 4095); + } + /* double-check that the result fits in one page */ + assert(copy_size > 0); + assert(copy_size + (start & 4095) <= 4096); + + /* First copy the object into the shared page, if needed */ + char *src = REAL_ADDRESS(STM_SEGMENT->segment_base, start); + char *dst = REAL_ADDRESS(stm_object_pages, start); + if (is_private_page(myself, first_page)) { + if (copy_size == 4096) + pagecopy(dst, src); + else + memcpy(dst, src, copy_size); + } + else { + assert(memcmp(dst, src, copy_size) == 0); /* same page */ + } + + for (i = 1; i <= NB_SEGMENTS; i++) { + if (i == myself) + continue; + + /* src = REAL_ADDRESS(stm_object_pages, start); */ + dst = REAL_ADDRESS(get_segment_base(i), start); + if (is_private_page(i, first_page)) { + /* The page is a private page. We need to diffuse this + fragment of object from the shared page to this private + page. */ + if (copy_size == 4096) + pagecopy(dst, src); + else + memcpy(dst, src, copy_size); + } + else { + assert(!memcmp(dst, src, copy_size)); /* same page */ + } + } + + start = (start + 4096) & ~4095; + } +} + +static inline bool _has_private_page_in_range( + long seg_num, uintptr_t start, uintptr_t size) +{ + uintptr_t first_page = start / 4096UL; + uintptr_t last_page = (start + size) / 4096UL; + for (; first_page <= last_page; first_page++) + if (is_private_page(seg_num, first_page)) + return true; + return false; +} + +static void _card_wise_synchronize_object_now(object_t *obj) +{ + assert(obj_should_use_cards(obj)); + assert(!(obj->stm_flags & GCFLAG_CARDS_SET)); + assert(!IS_OVERFLOW_OBJ(STM_PSEGMENT, obj)); + + struct object_s *realobj = (struct object_s *)REAL_ADDRESS(STM_SEGMENT->segment_base, obj); + size_t obj_size = stmcb_size_rounded_up(realobj); + assert(obj_size >= 32); + + uintptr_t first_card_index = get_write_lock_idx((uintptr_t)obj); + uintptr_t card_index = 1; + uintptr_t last_card_index = get_index_to_card_index(obj_size - 1); /* max valid index */ + long i, myself = STM_SEGMENT->segment_num; + + /* simple heuristic to check if probably the whole object is + marked anyway so we should do page-wise synchronize */ + if (write_locks[first_card_index + 1] == CARD_MARKED_OLD + && write_locks[first_card_index + last_card_index] == CARD_MARKED_OLD + && write_locks[first_card_index + (last_card_index >> 1) + 1] == CARD_MARKED_OLD) { + + dprintf(("card_wise_sync assumes %p,size:%lu is fully marked\n", obj, obj_size)); + _reset_object_cards(get_priv_segment(STM_SEGMENT->segment_num), + obj, CARD_CLEAR, false); + _page_wise_synchronize_object_now(obj); + return; + } + + dprintf(("card_wise_sync syncs %p,size:%lu card-wise\n", obj, obj_size)); + + /* Combine multiple marked cards and do a memcpy for them. We don't + try yet to use page_copy() or otherwise take into account privatization + of pages (except _has_private_page_in_range) */ + uintptr_t offset_itemsize[2]; + bool all_cards_were_cleared = true; + + uintptr_t start_card_index = -1; + while (card_index <= last_card_index) { + uintptr_t card_lock_idx = first_card_index + card_index; + uint8_t card_value = write_locks[card_lock_idx]; + + if (card_value == CARD_MARKED_OLD) { + write_locks[card_lock_idx] = CARD_CLEAR; + + if (start_card_index == -1) { /* first marked card */ + start_card_index = card_index; + /* start = (uintptr_t)obj + stmcb_index_to_byte_offset( */ + /* realobj, get_card_index_to_index(card_index)); */ + if (all_cards_were_cleared) { + all_cards_were_cleared = false; + stmcb_get_card_base_itemsize(realobj, offset_itemsize); + } + } + } + else { + OPT_ASSERT(card_value == CARD_CLEAR); + } + + if (start_card_index != -1 /* something to copy */ + && (card_value != CARD_MARKED_OLD /* found non-marked card */ + || card_index == last_card_index)) { /* this is the last card */ + /* do the copying: */ + uintptr_t start, copy_size; + uintptr_t next_card_offset; + uintptr_t start_card_offset; + uintptr_t next_card_index = card_index; + + if (card_value == CARD_MARKED_OLD) { + /* card_index is the last card of the object, but we need + to go one further to get the right offset */ + next_card_index++; + } + + start_card_offset = offset_itemsize[0] + + get_card_index_to_index(start_card_index) * offset_itemsize[1]; + + next_card_offset = offset_itemsize[0] + + get_card_index_to_index(next_card_index) * offset_itemsize[1]; + + if (next_card_offset > obj_size) + next_card_offset = obj_size; + + start = (uintptr_t)obj + start_card_offset; + copy_size = next_card_offset - start_card_offset; + OPT_ASSERT(copy_size > 0); + + /* dprintf(("copy %lu bytes\n", copy_size)); */ + + /* since we have marked cards, at least one page here must be private */ + assert(_has_private_page_in_range(myself, start, copy_size)); + + /* copy to shared segment: */ + char *src = REAL_ADDRESS(STM_SEGMENT->segment_base, start); + char *dst = REAL_ADDRESS(stm_object_pages, start); + memcpy(dst, src, copy_size); + + /* copy to other segments */ + for (i = 1; i <= NB_SEGMENTS; i++) { + if (i == myself) + continue; + if (!_has_private_page_in_range(i, start, copy_size)) + continue; + /* src = REAL_ADDRESS(stm_object_pages, start); */ + dst = REAL_ADDRESS(get_segment_base(i), start); + memcpy(dst, src, copy_size); + } + + start_card_index = -1; + } + + card_index++; + } + + if (all_cards_were_cleared) { + /* well, seems like we never called stm_write_card() on it, so actually + we need to fall back to synchronize the whole object */ + _page_wise_synchronize_object_now(obj); + return; + } + +#ifndef NDEBUG + char *src = REAL_ADDRESS(stm_object_pages, (uintptr_t)obj); + char *dst; + for (i = 1; i <= NB_SEGMENTS; i++) { + dst = REAL_ADDRESS(get_segment_base(i), (uintptr_t)obj); + assert(memcmp(dst, src, obj_size) == 0); + } +#endif +} + + +static void synchronize_object_now(object_t *obj, bool ignore_cards) { /* Copy around the version of 'obj' that lives in our own segment. It is first copied into the shared pages, and then into other @@ -367,72 +712,16 @@ assert(obj->stm_flags & GCFLAG_WRITE_BARRIER); assert(STM_PSEGMENT->privatization_lock == 1); - uintptr_t start = (uintptr_t)obj; - uintptr_t first_page = start / 4096UL; + if (obj->stm_flags & GCFLAG_SMALL_UNIFORM) { + assert(!(obj->stm_flags & GCFLAG_CARDS_SET)); + abort();//XXX WRITE THE FAST CASE + } else if (ignore_cards || !obj_should_use_cards(obj)) { + _page_wise_synchronize_object_now(obj); + } else { + _card_wise_synchronize_object_now(obj); + } - if (obj->stm_flags & GCFLAG_SMALL_UNIFORM) { - abort();//XXX WRITE THE FAST CASE - } - else { - char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj); - ssize_t obj_size = stmcb_size_rounded_up((struct object_s *)realobj); - assert(obj_size >= 16); - uintptr_t end = start + obj_size; - uintptr_t last_page = (end - 1) / 4096UL; - long i, myself = STM_SEGMENT->segment_num; - - for (; first_page <= last_page; first_page++) { - - uintptr_t copy_size; - if (first_page == last_page) { - /* this is the final fragment */ - copy_size = end - start; - } - else { - /* this is a non-final fragment, going up to the - page's end */ - copy_size = 4096 - (start & 4095); - } - /* double-check that the result fits in one page */ - assert(copy_size > 0); - assert(copy_size + (start & 4095) <= 4096); - - /* First copy the object into the shared page, if needed */ - char *src = REAL_ADDRESS(STM_SEGMENT->segment_base, start); - char *dst = REAL_ADDRESS(stm_object_pages, start); - if (is_private_page(myself, first_page)) { - if (copy_size == 4096) - pagecopy(dst, src); - else - memcpy(dst, src, copy_size); - } - else { - assert(memcmp(dst, src, copy_size) == 0); /* same page */ - } - - for (i = 1; i <= NB_SEGMENTS; i++) { - if (i == myself) - continue; - - src = REAL_ADDRESS(stm_object_pages, start); - dst = REAL_ADDRESS(get_segment_base(i), start); - if (is_private_page(i, first_page)) { - /* The page is a private page. We need to diffuse this - fragment of object from the shared page to this private - page. */ - if (copy_size == 4096) - pagecopy(dst, src); - else - memcpy(dst, src, copy_size); - } - else { - assert(!memcmp(dst, src, copy_size)); /* same page */ - } - } - - start = (start + 4096) & ~4095; - } - } + _cards_cleared_in_object(get_priv_segment(STM_SEGMENT->segment_num), obj); } static void push_overflow_objects_from_privatized_pages(void) @@ -442,7 +731,7 @@ acquire_privatization_lock(); LIST_FOREACH_R(STM_PSEGMENT->large_overflow_objects, object_t *, - synchronize_object_now(item)); + synchronize_object_now(item, true /*ignore_cards*/)); release_privatization_lock(); } @@ -466,7 +755,7 @@ /* copy the object to the shared page, and to the other private pages as needed */ - synchronize_object_now(item); + synchronize_object_now(item, false); /* don't ignore_cards */ })); release_privatization_lock(); @@ -483,7 +772,9 @@ STM_PSEGMENT->marker_inev[1] = 0; /* reset these lists to NULL for the next transaction */ + _verify_cards_cleared_in_all_lists(get_priv_segment(STM_SEGMENT->segment_num)); LIST_FREE(STM_PSEGMENT->objects_pointing_to_nursery); + list_clear(STM_PSEGMENT->old_objects_with_cards); LIST_FREE(STM_PSEGMENT->large_overflow_objects); timing_end_transaction(attribute_to); @@ -534,6 +825,7 @@ /* synchronize modified old objects to other threads */ push_modified_to_other_segments(); + _verify_cards_cleared_in_all_lists(get_priv_segment(STM_SEGMENT->segment_num)); /* update 'overflow_number' if needed */ if (STM_PSEGMENT->overflow_number_has_been_used) { @@ -593,6 +885,9 @@ ssize_t size = stmcb_size_rounded_up((struct object_s *)src); memcpy(dst, src, size); + if (obj_should_use_cards(item)) + _reset_object_cards(pseg, item, CARD_CLEAR, false); + /* objects in 'modified_old_objects' usually have the WRITE_BARRIER flag, unless they have been modified recently. Ignore the old flag; after copying from the @@ -621,6 +916,10 @@ static void abort_data_structures_from_segment_num(int segment_num) { +#pragma push_macro("STM_PSEGMENT") +#pragma push_macro("STM_SEGMENT") +#undef STM_PSEGMENT +#undef STM_SEGMENT /* This function clears the content of the given segment undergoing an abort. It is called from abort_with_mutex(), but also sometimes from other threads that figure out that this segment should abort. @@ -648,8 +947,27 @@ /* throw away the content of the nursery */ long bytes_in_nursery = throw_away_nursery(pseg); + /* modified_old_objects' cards get cleared in + reset_modified_from_other_segments. Objs in old_objs_with_cards but not + in modified_old_objs are overflow objects and handled here: */ + if (pseg->large_overflow_objects != NULL) { + /* some overflow objects may have cards when aborting, clear them too */ + LIST_FOREACH_R(pseg->large_overflow_objects, object_t * /*item*/, + { + struct object_s *realobj = (struct object_s *) + REAL_ADDRESS(pseg->pub.segment_base, item); + + if (realobj->stm_flags & GCFLAG_CARDS_SET) { + /* CARDS_SET is enough since other HAS_CARDS objs + are already cleared */ + _reset_object_cards(pseg, item, CARD_CLEAR, false); + } + }); + } + /* reset all the modified objects (incl. re-adding GCFLAG_WRITE_BARRIER) */ reset_modified_from_other_segments(segment_num); + _verify_cards_cleared_in_all_lists(pseg); /* reset the tl->shadowstack and thread_local_obj to their original value before the transaction start */ @@ -662,8 +980,11 @@ /* reset these lists to NULL too on abort */ LIST_FREE(pseg->objects_pointing_to_nursery); + list_clear(pseg->old_objects_with_cards); LIST_FREE(pseg->large_overflow_objects); list_clear(pseg->young_weakrefs); +#pragma pop_macro("STM_SEGMENT") +#pragma pop_macro("STM_PSEGMENT") } static void abort_with_mutex(void) diff --git a/c7/stm/core.h b/c7/stm/core.h --- a/c7/stm/core.h +++ b/c7/stm/core.h @@ -14,7 +14,7 @@ #endif -#define NB_PAGES (1500*256) // 1500MB +#define NB_PAGES (2500*256) // 2500MB #define NB_SEGMENTS STM_NB_SEGMENTS #define NB_SEGMENTS_MAX 240 /* don't increase NB_SEGMENTS past this */ #define MAP_PAGES_FLAGS (MAP_SHARED | MAP_ANONYMOUS | MAP_NORESERVE) @@ -35,6 +35,8 @@ #define WRITELOCK_START ((END_NURSERY_PAGE * 4096UL) >> 4) #define WRITELOCK_END READMARKER_END +#define CARD_SIZE _STM_CARD_SIZE + enum /* stm_flags */ { /* This flag is set on non-nursery objects. It forces stm_write() to call _stm_write_slowpath(). @@ -54,6 +56,12 @@ after the object. */ GCFLAG_HAS_SHADOW = 0x04, + /* Set on objects that are large enough (_STM_MIN_CARD_OBJ_SIZE) + to have multiple cards (at least _STM_MIN_CARD_COUNT), and that + have at least one card marked. This flag implies + GCFLAG_WRITE_BARRIER. */ + GCFLAG_CARDS_SET = _STM_GCFLAG_CARDS_SET, + /* All remaining bits of the 32-bit 'stm_flags' field are taken by the "overflow number". This is a number that identifies the "overflow objects" from the current transaction among all old @@ -61,7 +69,7 @@ current transaction that have been flushed out of the nursery, which occurs if the same transaction allocates too many objects. */ - GCFLAG_OVERFLOW_NUMBER_bit0 = 0x8 /* must be last */ + GCFLAG_OVERFLOW_NUMBER_bit0 = 0x10 /* must be last */ }; @@ -96,6 +104,10 @@ understood as meaning implicitly "this is the same as 'modified_old_objects'". */ struct list_s *objects_pointing_to_nursery; + /* Like objects_pointing_to_nursery it holds the old objects that + we did a stm_write_card() on. Objects can be in both lists. + It is NULL iff objects_pointing_to_nursery is NULL. */ + struct list_s *old_objects_with_cards; /* List of all large, overflowed objects. Only non-NULL after the current transaction spanned a minor collection. */ @@ -212,9 +224,34 @@ static uint8_t write_locks[WRITELOCK_END - WRITELOCK_START]; +enum /* card values for write_locks */ { + CARD_CLEAR = 0, /* card not used at all */ + CARD_MARKED = _STM_CARD_MARKED, /* card marked for tracing in the next gc */ + CARD_MARKED_OLD = 101, /* card was marked before, but cleared + in a GC */ +}; + #define REAL_ADDRESS(segment_base, src) ((segment_base) + (uintptr_t)(src)) +#define IS_OVERFLOW_OBJ(pseg, obj) (((obj)->stm_flags & -GCFLAG_OVERFLOW_NUMBER_bit0) \ + == (pseg)->overflow_number) + +static inline uintptr_t get_index_to_card_index(uintptr_t index) { + return (index / CARD_SIZE) + 1; +} + +static inline uintptr_t get_card_index_to_index(uintptr_t card_index) { + return (card_index - 1) * CARD_SIZE; +} + + +static inline uintptr_t get_write_lock_idx(uintptr_t obj) { + uintptr_t res = (obj >> 4) - WRITELOCK_START; + assert(res < sizeof(write_locks)); + return res; +} + static inline char *get_segment_base(long segment_num) { return stm_object_pages + segment_num * (NB_PAGES * 4096UL); } @@ -257,7 +294,7 @@ } static void copy_object_to_shared(object_t *obj, int source_segment_num); -static void synchronize_object_now(object_t *obj); +static void synchronize_object_now(object_t *obj, bool ignore_cards); static inline void acquire_privatization_lock(void) { diff --git a/c7/stm/gcpage.c b/c7/stm/gcpage.c --- a/c7/stm/gcpage.c +++ b/c7/stm/gcpage.c @@ -166,7 +166,7 @@ static inline uintptr_t mark_loc(object_t *obj) { - uintptr_t lock_idx = (((uintptr_t)obj) >> 4) - WRITELOCK_START; + uintptr_t lock_idx = get_write_lock_idx((uintptr_t)obj); assert(lock_idx < sizeof(write_locks)); return lock_idx; } @@ -440,6 +440,11 @@ static void clean_up_segment_lists(void) { +#pragma push_macro("STM_PSEGMENT") +#pragma push_macro("STM_SEGMENT") +#undef STM_PSEGMENT +#undef STM_SEGMENT + long i; for (i = 1; i <= NB_SEGMENTS; i++) { struct stm_priv_segment_info_s *pseg = get_priv_segment(i); @@ -450,21 +455,54 @@ written to but don't actually point to the nursery. Clear it up and set GCFLAG_WRITE_BARRIER again on the objects. This is the case for transactions where - MINOR_NOTHING_TO_DO() == false + MINOR_NOTHING_TO_DO() == true but they still did write-barriers on objects */ lst = pseg->objects_pointing_to_nursery; if (lst != NULL) { - LIST_FOREACH_R(lst, uintptr_t /*item*/, + LIST_FOREACH_R(lst, object_t* /*item*/, ({ struct object_s *realobj = (struct object_s *) - REAL_ADDRESS(pseg->pub.segment_base, item); + REAL_ADDRESS(pseg->pub.segment_base, (uintptr_t)item); + assert(!(realobj->stm_flags & GCFLAG_WRITE_BARRIER)); + OPT_ASSERT(!(realobj->stm_flags & GCFLAG_CARDS_SET)); + realobj->stm_flags |= GCFLAG_WRITE_BARRIER; + + if (realobj->stm_flags & GCFLAG_CARDS_SET) { + /* we called a normal WB on this object, so all cards + need to be marked OLD */ + if (!IS_OVERFLOW_OBJ(pseg, realobj)) { + _reset_object_cards(pseg, item, CARD_MARKED_OLD, true); /* mark all */ + } else { + /* simply clear overflow */ + _reset_object_cards(pseg, item, CARD_CLEAR, false); + } + } })); list_clear(lst); + } else { + /* if here MINOR_NOTHING_TO_DO() was true before, it's like + we "didn't do a collection" at all. So nothing to do on + modified_old_objs. */ } + lst = pseg->old_objects_with_cards; + LIST_FOREACH_R(lst, object_t* /*item*/, + ({ + struct object_s *realobj = (struct object_s *) + REAL_ADDRESS(pseg->pub.segment_base, item); + OPT_ASSERT(realobj->stm_flags & GCFLAG_CARDS_SET); + OPT_ASSERT(realobj->stm_flags & GCFLAG_WRITE_BARRIER); + + /* clear cards if overflow, or mark marked cards as old otherwise */ + uint8_t mark_value = IS_OVERFLOW_OBJ(pseg, realobj) ? + CARD_CLEAR : CARD_MARKED_OLD; + _reset_object_cards(pseg, item, mark_value, false); + })); + list_clear(lst); + /* Remove from 'large_overflow_objects' all objects that die */ lst = pseg->large_overflow_objects; if (lst != NULL) { @@ -477,6 +515,8 @@ } } } +#pragma pop_macro("STM_SEGMENT") +#pragma pop_macro("STM_PSEGMENT") } static inline bool largemalloc_keep_object_at(char *data) @@ -503,6 +543,20 @@ _stm_largemalloc_sweep(); } +static void assert_cleared_locks(size_t n) +{ +#ifndef NDEBUG + size_t i; + uint8_t *s = write_locks; +# ifndef STM_TESTS + if (n > 5000) n = 5000; +# endif + for (i = 0; i < n; i++) + assert(s[i] == CARD_CLEAR || s[i] == CARD_MARKED + || s[i] == CARD_MARKED_OLD); +#endif +} + static void clean_write_locks(void) { /* the write_locks array, containing the visit marker during @@ -512,7 +566,7 @@ object_t *loc2 = (object_t *)(uninitialized_page_stop - stm_object_pages); uintptr_t lock2_idx = mark_loc(loc2 - 1) + 1; - assert_memset_zero(write_locks, lock2_idx); + assert_cleared_locks(lock2_idx); memset(write_locks + lock2_idx, 0, sizeof(write_locks) - lock2_idx); } diff --git a/c7/stm/misc.c b/c7/stm/misc.c --- a/c7/stm/misc.c +++ b/c7/stm/misc.c @@ -40,6 +40,12 @@ return (obj->stm_flags & _STM_GCFLAG_WRITE_BARRIER) == 0; } + +bool _stm_was_written_card(object_t *obj) +{ + return obj->stm_flags & _STM_GCFLAG_CARDS_SET; +} + #ifdef STM_TESTS uintptr_t _stm_get_private_page(uintptr_t pagenum) { @@ -61,6 +67,13 @@ return list_count(STM_PSEGMENT->objects_pointing_to_nursery); } +long _stm_count_old_objects_with_cards(void) +{ + if (STM_PSEGMENT->old_objects_with_cards == NULL) + return -1; + return list_count(STM_PSEGMENT->old_objects_with_cards); +} + object_t *_stm_enum_modified_old_objects(long index) { return (object_t *)list_item( @@ -73,6 +86,12 @@ STM_PSEGMENT->objects_pointing_to_nursery, index); } +object_t *_stm_enum_old_objects_with_cards(long index) +{ + return (object_t *)list_item( + STM_PSEGMENT->old_objects_with_cards, index); +} + uint64_t _stm_total_allocated(void) { return increment_total_allocated(0); diff --git a/c7/stm/nursery.c b/c7/stm/nursery.c --- a/c7/stm/nursery.c +++ b/c7/stm/nursery.c @@ -65,6 +65,8 @@ object_t *obj = *pobj; object_t *nobj; uintptr_t nobj_sync_now; + char *realobj; + size_t size; if (obj == NULL) return; @@ -75,8 +77,6 @@ to GCWORD_MOVED. In that case, the forwarding location, i.e. where the object moved to, is stored in the second word in 'obj'. */ object_t *TLPREFIX *pforwarded_array = (object_t *TLPREFIX *)obj; - char *realobj; - size_t size; if (obj->stm_flags & GCFLAG_HAS_SHADOW) { /* ^^ the single check above detects both already-moved objects @@ -149,6 +149,7 @@ /* Must trace the object later */ LIST_APPEND(STM_PSEGMENT->objects_pointing_to_nursery, nobj_sync_now); + _cards_cleared_in_object(get_priv_segment(STM_SEGMENT->segment_num), nobj); } static void collect_roots_in_nursery(void) @@ -183,23 +184,192 @@ minor_trace_if_young(&tl->thread_local_obj); } +static void _cards_cleared_in_object(struct stm_priv_segment_info_s *pseg, object_t *obj) +{ +#ifndef NDEBUG + struct object_s *realobj = (struct object_s *)REAL_ADDRESS(pseg->pub.segment_base, obj); + size_t size = stmcb_size_rounded_up(realobj); + + if (size < _STM_MIN_CARD_OBJ_SIZE) + return; /* too small for cards */ + + uintptr_t first_card_index = get_write_lock_idx((uintptr_t)obj); + uintptr_t card_index = 1; + uintptr_t last_card_index = get_index_to_card_index(size - 1); /* max valid index */ + + OPT_ASSERT(write_locks[first_card_index] <= NB_SEGMENTS_MAX + || write_locks[first_card_index] == 255); /* see gcpage.c */ + while (card_index <= last_card_index) { + uintptr_t card_lock_idx = first_card_index + card_index; + assert(write_locks[card_lock_idx] == CARD_CLEAR); + card_index++; + } + + assert(!(realobj->stm_flags & GCFLAG_CARDS_SET)); +#endif +} + +static void _verify_cards_cleared_in_all_lists(struct stm_priv_segment_info_s *pseg) +{ +#ifndef NDEBUG + LIST_FOREACH_R( + pseg->modified_old_objects, object_t * /*item*/, + _cards_cleared_in_object(pseg, item)); + + if (pseg->large_overflow_objects) { + LIST_FOREACH_R( + pseg->large_overflow_objects, object_t * /*item*/, + _cards_cleared_in_object(pseg, item)); + } + if (pseg->objects_pointing_to_nursery) { + LIST_FOREACH_R( + pseg->objects_pointing_to_nursery, object_t * /*item*/, + _cards_cleared_in_object(pseg, item)); + } + LIST_FOREACH_R( + pseg->old_objects_with_cards, object_t * /*item*/, + _cards_cleared_in_object(pseg, item)); +#endif +} + +static void _reset_object_cards(struct stm_priv_segment_info_s *pseg, + object_t *obj, uint8_t mark_value, + bool mark_all) +{ +#pragma push_macro("STM_PSEGMENT") +#pragma push_macro("STM_SEGMENT") +#undef STM_PSEGMENT +#undef STM_SEGMENT + struct object_s *realobj = (struct object_s *)REAL_ADDRESS(pseg->pub.segment_base, obj); + size_t size = stmcb_size_rounded_up(realobj); + + OPT_ASSERT(size >= _STM_MIN_CARD_OBJ_SIZE); + assert(IMPLY(mark_value == CARD_CLEAR, !mark_all)); /* not necessary */ + assert(IMPLY(mark_all, mark_value == CARD_MARKED_OLD)); /* set *all* to OLD */ + assert(IMPLY(IS_OVERFLOW_OBJ(pseg, realobj), + mark_value == CARD_CLEAR)); /* overflows are always CLEARed */ + + uintptr_t first_card_index = get_write_lock_idx((uintptr_t)obj); + uintptr_t card_index = 1; + uintptr_t last_card_index = get_index_to_card_index(size - 1); /* max valid index */ + + OPT_ASSERT(write_locks[first_card_index] <= NB_SEGMENTS + || write_locks[first_card_index] == 255); /* see gcpage.c */ + + dprintf(("mark cards of %p, size %lu with %d, all: %d\n", + obj, size, mark_value, mark_all)); + dprintf(("obj has %lu cards\n", last_card_index)); + while (card_index <= last_card_index) { + uintptr_t card_lock_idx = first_card_index + card_index; + + if (mark_all || write_locks[card_lock_idx] != CARD_CLEAR) { + /* dprintf(("mark card %lu,wl:%lu of %p with %d\n", */ + /* card_index, card_lock_idx, obj, mark_value)); */ + write_locks[card_lock_idx] = mark_value; + } + card_index++; + } + + realobj->stm_flags &= ~GCFLAG_CARDS_SET; + +#pragma pop_macro("STM_SEGMENT") +#pragma pop_macro("STM_PSEGMENT") +} + + +static void _trace_card_object(object_t *obj) +{ + assert(!_is_in_nursery(obj)); + assert(obj->stm_flags & GCFLAG_CARDS_SET); + assert(obj->stm_flags & GCFLAG_WRITE_BARRIER); + + dprintf(("_trace_card_object(%p)\n", obj)); + bool obj_is_overflow = IS_OVERFLOW_OBJ(STM_PSEGMENT, obj); + uint8_t mark_value = obj_is_overflow ? CARD_CLEAR : CARD_MARKED_OLD; + + struct object_s *realobj = (struct object_s *)REAL_ADDRESS(STM_SEGMENT->segment_base, obj); + size_t size = stmcb_size_rounded_up(realobj); + + uintptr_t first_card_index = get_write_lock_idx((uintptr_t)obj); + uintptr_t card_index = 1; + uintptr_t last_card_index = get_index_to_card_index(size - 1); /* max valid index */ + + OPT_ASSERT(write_locks[first_card_index] <= NB_SEGMENTS_MAX + || write_locks[first_card_index] == 255); /* see gcpage.c */ + + /* XXX: merge ranges */ + while (card_index <= last_card_index) { + uintptr_t card_lock_idx = first_card_index + card_index; + if (write_locks[card_lock_idx] == CARD_MARKED) { + /* clear or set to old: */ + write_locks[card_lock_idx] = mark_value; + + uintptr_t start = get_card_index_to_index(card_index); + uintptr_t stop = get_card_index_to_index(card_index + 1); + + dprintf(("trace_cards on %p with start:%lu stop:%lu\n", + obj, start, stop)); + stmcb_trace_cards(realobj, &minor_trace_if_young, + start, stop); + + } + + /* all cards should be cleared on overflow objs */ + assert(IMPLY(obj_is_overflow, + write_locks[card_lock_idx] == CARD_CLEAR)); + + card_index++; + } + obj->stm_flags &= ~GCFLAG_CARDS_SET; +} + + + static inline void _collect_now(object_t *obj) { assert(!_is_young(obj)); + assert(!(obj->stm_flags & GCFLAG_CARDS_SET)); - /* We must not have GCFLAG_WRITE_BARRIER so far. Add it now. */ - assert(!(obj->stm_flags & GCFLAG_WRITE_BARRIER)); - obj->stm_flags |= GCFLAG_WRITE_BARRIER; + dprintf(("_collect_now: %p\n", obj)); - /* Trace the 'obj' to replace pointers to nursery with pointers - outside the nursery, possibly forcing nursery objects out and - adding them to 'objects_pointing_to_nursery' as well. */ - char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj); - stmcb_trace((struct object_s *)realobj, &minor_trace_if_young); + if (!(obj->stm_flags & GCFLAG_WRITE_BARRIER)) { + /* Trace the 'obj' to replace pointers to nursery with pointers + outside the nursery, possibly forcing nursery objects out and + adding them to 'objects_pointing_to_nursery' as well. */ + char *realobj = REAL_ADDRESS(STM_SEGMENT->segment_base, obj); + stmcb_trace((struct object_s *)realobj, &minor_trace_if_young); + + obj->stm_flags |= GCFLAG_WRITE_BARRIER; + } + /* else traced in collect_cardrefs_to_nursery if necessary */ +} + + +static void collect_cardrefs_to_nursery(void) +{ + dprintf(("collect_cardrefs_to_nursery\n")); + struct list_s *lst = STM_PSEGMENT->old_objects_with_cards; + + while (!list_is_empty(lst)) { + object_t *obj = (object_t*)list_pop_item(lst); + + assert(!_is_young(obj)); + + if (!(obj->stm_flags & GCFLAG_CARDS_SET)) { + /* sometimes we remove the CARDS_SET in the WB slowpath, see core.c */ + continue; + } + + /* traces cards, clears marked cards or marks them old if necessary */ + _trace_card_object(obj); + + assert(!(obj->stm_flags & GCFLAG_CARDS_SET)); + } } static void collect_oldrefs_to_nursery(void) { + dprintf(("collect_oldrefs_to_nursery\n")); struct list_s *lst = STM_PSEGMENT->objects_pointing_to_nursery; while (!list_is_empty(lst)) { @@ -207,6 +377,7 @@ object_t *obj = (object_t *)(obj_sync_now & ~FLAG_SYNC_LARGE); _collect_now(obj); + assert(!(obj->stm_flags & GCFLAG_CARDS_SET)); if (obj_sync_now & FLAG_SYNC_LARGE) { /* this was a large object. We must either synchronize the @@ -214,13 +385,15 @@ WRITE_BARRIER flag and traced into it to fix its content); or add the object to 'large_overflow_objects'. */ + struct stm_priv_segment_info_s *pseg = get_priv_segment(STM_SEGMENT->segment_num); if (STM_PSEGMENT->minor_collect_will_commit_now) { acquire_privatization_lock(); - synchronize_object_now(obj); + synchronize_object_now(obj, true); /* ignore cards! */ release_privatization_lock(); } else { LIST_APPEND(STM_PSEGMENT->large_overflow_objects, obj); } + _cards_cleared_in_object(pseg, obj); } /* the list could have moved while appending */ @@ -230,12 +403,15 @@ static void collect_modified_old_objects(void) { - LIST_FOREACH_R(STM_PSEGMENT->modified_old_objects, object_t * /*item*/, - _collect_now(item)); + dprintf(("collect_modified_old_objects\n")); + LIST_FOREACH_R( + STM_PSEGMENT->modified_old_objects, object_t * /*item*/, + _collect_now(item)); } static void collect_roots_from_markers(uintptr_t num_old) { + dprintf(("collect_roots_from_markers\n")); /* visit the marker objects */ struct list_s *mlst = STM_PSEGMENT->modified_old_objects_markers; STM_PSEGMENT->modified_old_objects_markers_num_old = list_count(mlst); @@ -254,6 +430,11 @@ static size_t throw_away_nursery(struct stm_priv_segment_info_s *pseg) { +#pragma push_macro("STM_PSEGMENT") +#pragma push_macro("STM_SEGMENT") +#undef STM_PSEGMENT +#undef STM_SEGMENT + dprintf(("throw_away_nursery\n")); /* reset the nursery by zeroing it */ size_t nursery_used; char *realnursery; @@ -279,11 +460,13 @@ wlog_t *item; TREE_LOOP_FORWARD(*pseg->young_outside_nursery, item) { - assert(!_is_in_nursery((object_t *)item->addr)); + object_t *obj = (object_t*)item->addr; + assert(!_is_in_nursery(obj)); + /* mark slot as unread (it can only have the read marker in this segment) */ ((struct stm_read_marker_s *) - (pseg->pub.segment_base + (item->addr >> 4)))->rm = 0; + (pseg->pub.segment_base + (((uintptr_t)obj) >> 4)))->rm = 0; _stm_large_free(stm_object_pages + item->addr); } TREE_LOOP_END; @@ -292,7 +475,10 @@ } tree_clear(pseg->nursery_objects_shadows); + return nursery_used; +#pragma pop_macro("STM_SEGMENT") +#pragma pop_macro("STM_PSEGMENT") } #define MINOR_NOTHING_TO_DO(pseg) \ @@ -331,6 +517,7 @@ if (!commit && STM_PSEGMENT->large_overflow_objects == NULL) STM_PSEGMENT->large_overflow_objects = list_create(); + /* All the objects we move out of the nursery become "overflow" objects. We use the list 'objects_pointing_to_nursery' to hold the ones we didn't trace so far. */ @@ -338,6 +525,11 @@ if (STM_PSEGMENT->objects_pointing_to_nursery == NULL) { STM_PSEGMENT->objects_pointing_to_nursery = list_create(); + /* collect objs with cards, adds to objects_pointing_to_nursery + and makes sure there are no objs with cards left in + modified_old_objs */ + collect_cardrefs_to_nursery(); + /* See the doc of 'objects_pointing_to_nursery': if it is NULL, then it is implicitly understood to be equal to 'modified_old_objects'. We could copy modified_old_objects @@ -347,6 +539,7 @@ num_old = 0; } else { + collect_cardrefs_to_nursery(); num_old = STM_PSEGMENT->modified_old_objects_markers_num_old; } @@ -355,6 +548,7 @@ collect_roots_in_nursery(); collect_oldrefs_to_nursery(); + assert(list_is_empty(STM_PSEGMENT->old_objects_with_cards)); /* now all surviving nursery objects have been moved out */ stm_move_young_weakrefs(); @@ -428,6 +622,7 @@ char *result = allocate_outside_nursery_large(size_rounded_up); object_t *o = (object_t *)(result - stm_object_pages); + tree_insert(STM_PSEGMENT->young_outside_nursery, (uintptr_t)o, 0); memset(REAL_ADDRESS(STM_SEGMENT->segment_base, o), 0, size_rounded_up); @@ -529,6 +724,7 @@ memcpy(realnobj, realobj, size); obj->stm_flags |= GCFLAG_HAS_SHADOW; + tree_insert(STM_PSEGMENT->nursery_objects_shadows, (uintptr_t)obj, (uintptr_t)nobj); return nobj; diff --git a/c7/stm/nursery.h b/c7/stm/nursery.h --- a/c7/stm/nursery.h +++ b/c7/stm/nursery.h @@ -6,6 +6,10 @@ static uint32_t highest_overflow_number; +static void _cards_cleared_in_object(struct stm_priv_segment_info_s *pseg, object_t *obj); +static void _reset_object_cards(struct stm_priv_segment_info_s *pseg, + object_t *obj, uint8_t mark_value, + bool mark_all); static void minor_collection(bool commit); static void check_nursery_at_transaction_start(void); static size_t throw_away_nursery(struct stm_priv_segment_info_s *pseg); diff --git a/c7/stm/setup.c b/c7/stm/setup.c --- a/c7/stm/setup.c +++ b/c7/stm/setup.c @@ -83,6 +83,7 @@ { /* Check that some values are acceptable */ assert(NB_SEGMENTS <= NB_SEGMENTS_MAX); + assert(CARD_SIZE >= 32 && CARD_SIZE % 16 == 0); assert(4096 <= ((uintptr_t)STM_SEGMENT)); assert((uintptr_t)STM_SEGMENT == (uintptr_t)STM_PSEGMENT); assert(((uintptr_t)STM_PSEGMENT) + sizeof(*STM_PSEGMENT) <= 8192); @@ -117,6 +118,7 @@ pr->pub.segment_num = i; pr->pub.segment_base = segment_base; pr->objects_pointing_to_nursery = NULL; + pr->old_objects_with_cards = list_create(); pr->large_overflow_objects = NULL; pr->modified_old_objects = list_create(); pr->modified_old_objects_markers = list_create(); @@ -156,6 +158,7 @@ for (i = 1; i <= NB_SEGMENTS; i++) { struct stm_priv_segment_info_s *pr = get_priv_segment(i); assert(pr->objects_pointing_to_nursery == NULL); + list_free(pr->old_objects_with_cards); assert(pr->large_overflow_objects == NULL); list_free(pr->modified_old_objects); list_free(pr->modified_old_objects_markers); diff --git a/c7/stmgc.h b/c7/stmgc.h --- a/c7/stmgc.h +++ b/c7/stmgc.h @@ -107,6 +107,10 @@ /* this should use llvm's coldcc calling convention, but it's not exposed to C code so far */ void _stm_write_slowpath(object_t *); +void _stm_write_slowpath_card(object_t *, uintptr_t); +char _stm_write_slowpath_card_extra(object_t *); +long _stm_write_slowpath_card_extra_base(void); +#define _STM_CARD_MARKED 100 object_t *_stm_allocate_slowpath(ssize_t); object_t *_stm_allocate_external(ssize_t); void _stm_become_inevitable(const char*); @@ -120,6 +124,7 @@ #include <stdbool.h> bool _stm_was_read(object_t *obj); bool _stm_was_written(object_t *obj); +bool _stm_was_written_card(object_t *obj); uintptr_t _stm_get_private_page(uintptr_t pagenum); bool _stm_in_transaction(stm_thread_local_t *tl); char *_stm_get_segment_base(long index); @@ -137,12 +142,18 @@ void _stm_set_nursery_free_count(uint64_t free_count); long _stm_count_modified_old_objects(void); long _stm_count_objects_pointing_to_nursery(void); +long _stm_count_old_objects_with_cards(void); object_t *_stm_enum_modified_old_objects(long index); object_t *_stm_enum_objects_pointing_to_nursery(long index); +object_t *_stm_enum_old_objects_with_cards(long index); uint64_t _stm_total_allocated(void); #endif #define _STM_GCFLAG_WRITE_BARRIER 0x01 +#define _STM_GCFLAG_CARDS_SET 0x08 +#define _STM_CARD_SIZE 32 /* must be >= 32 */ +#define _STM_MIN_CARD_COUNT 17 +#define _STM_MIN_CARD_OBJ_SIZE (_STM_CARD_SIZE * _STM_MIN_CARD_COUNT) #define _STM_NSE_SIGNAL_MAX _STM_TIME_N #define _STM_FAST_ALLOC (66*1024) @@ -213,6 +224,20 @@ _stm_write_slowpath(obj); } +/* The following is a GC-optimized barrier that works on the granularity + of CARD_SIZE. It can be used on any array object, but it is only + useful with those that were internally marked with GCFLAG_HAS_CARDS. + It has the same purpose as stm_write() for TM. + 'index' is the array-item-based position within the object, which + is measured in units returned by stmcb_get_card_base_itemsize(). +*/ +__attribute__((always_inline)) +static inline void stm_write_card(object_t *obj, uintptr_t index) +{ + if (UNLIKELY((obj->stm_flags & _STM_GCFLAG_WRITE_BARRIER) != 0)) + _stm_write_slowpath_card(obj, index); +} + /* Must be provided by the user of this library. The "size rounded up" must be a multiple of 8 and at least 16. "Tracing" an object means enumerating all GC references in it, @@ -223,6 +248,16 @@ */ extern ssize_t stmcb_size_rounded_up(struct object_s *); extern void stmcb_trace(struct object_s *, void (object_t **)); +/* a special trace-callback that is only called for the marked + ranges of indices (using stm_write_card(o, index)) */ +extern void stmcb_trace_cards(struct object_s *, void (object_t **), + uintptr_t start, uintptr_t stop); +/* this function will be called on objects that support cards. + It returns the base_offset (in bytes) inside the object from + where the indices start, and item_size (in bytes) for the size of + one item */ +extern void stmcb_get_card_base_itemsize(struct object_s *, + uintptr_t offset_itemsize[2]); extern void stmcb_commit_soon(void); @@ -248,6 +283,7 @@ return (object_t *)p; } + /* Allocate a weakref object. Weakref objects have a reference to an object at the byte-offset stmcb_size_rounded_up(obj) - sizeof(void*) diff --git a/c7/test/support.py b/c7/test/support.py --- a/c7/test/support.py +++ b/c7/test/support.py @@ -12,6 +12,7 @@ #define STM_NB_SEGMENTS ... #define _STM_FAST_ALLOC ... #define _STM_GCFLAG_WRITE_BARRIER ... +#define _STM_CARD_SIZE ... #define STM_STACK_MARKER_NEW ... #define STM_STACK_MARKER_OLD ... @@ -41,6 +42,9 @@ object_t *stm_allocate_weakref(ssize_t size_rounded_up); object_t *_stm_allocate_old(ssize_t size_rounded_up); +/*void stm_write_card(); use _checked_stm_write_card() instead */ + + void stm_setup(void); void stm_teardown(void); void stm_register_thread_local(stm_thread_local_t *tl); @@ -49,8 +53,10 @@ object_t *stm_setup_prebuilt_weakref(object_t *); bool _checked_stm_write(object_t *obj); +bool _checked_stm_write_card(object_t *obj, uintptr_t index); bool _stm_was_read(object_t *obj); bool _stm_was_written(object_t *obj); +bool _stm_was_written_card(object_t *obj); char *_stm_real_address(object_t *obj); char *_stm_get_segment_base(long index); bool _stm_in_transaction(stm_thread_local_t *tl); @@ -92,8 +98,11 @@ long _stm_count_modified_old_objects(void); long _stm_count_objects_pointing_to_nursery(void); +long _stm_count_old_objects_with_cards(void); object_t *_stm_enum_modified_old_objects(long index); object_t *_stm_enum_objects_pointing_to_nursery(long index); +object_t *_stm_enum_old_objects_with_cards(long index); + void stm_collect(long level); uint64_t _stm_total_allocated(void); @@ -181,6 +190,10 @@ CHECKED(stm_write(object)); } +bool _checked_stm_write_card(object_t *object, uintptr_t index) { + CHECKED(stm_write_card(object, index)); +} + bool _check_stop_safe_point(void) { CHECKED(_stm_stop_safe_point()); } @@ -287,6 +300,36 @@ } } +void stmcb_trace_cards(struct object_s *obj, void visit(object_t **), + uintptr_t start, uintptr_t stop) +{ + int i; + struct myobj_s *myobj = (struct myobj_s*)obj; + if (myobj->type_id < 421420) { + /* basic case: no references */ + return; + } + + for (i=start; (i < myobj->type_id - 421420) && (i < stop); i++) { + object_t **ref = ((object_t **)(myobj + 1)) + i; + visit(ref); + } +} + +void stmcb_get_card_base_itemsize(struct object_s *obj, + uintptr_t offset_itemsize[2]) +{ + struct myobj_s *myobj = (struct myobj_s*)obj; + if (myobj->type_id < 421420) { + offset_itemsize[0] = SIZEOF_MYOBJ; + offset_itemsize[1] = 1; + } + else { + offset_itemsize[0] = sizeof(struct myobj_s); + offset_itemsize[1] = sizeof(object_t *); + } +} + void stm_push_marker(stm_thread_local_t *tl, uintptr_t onum, object_t *ob) { STM_PUSH_MARKER(*tl, onum, ob); @@ -323,6 +366,7 @@ HDR = lib.SIZEOF_MYOBJ assert HDR == 8 GCFLAG_WRITE_BARRIER = lib._STM_GCFLAG_WRITE_BARRIER +CARD_SIZE = lib._STM_CARD_SIZE # 16b at least NB_SEGMENTS = lib.STM_NB_SEGMENTS FAST_ALLOC = lib._STM_FAST_ALLOC @@ -371,22 +415,28 @@ lib._set_type_id(o, tid) return o -def stm_set_ref(obj, idx, ref): - stm_write(obj) +def stm_set_ref(obj, idx, ref, use_cards=False): + if use_cards: + stm_write_card(obj, idx) + else: + stm_write(obj) lib._set_ptr(obj, idx, ref) def stm_get_ref(obj, idx): stm_read(obj) return lib._get_ptr(obj, idx) -def stm_set_char(obj, c, offset=HDR): - stm_write(obj) +def stm_set_char(obj, c, offset=HDR, use_cards=False): assert HDR <= offset < stm_get_obj_size(obj) + if use_cards: + stm_write_card(obj, offset - HDR) + else: + stm_write(obj) stm_get_real_address(obj)[offset] = c def stm_get_char(obj, offset=HDR): + assert HDR <= offset < stm_get_obj_size(obj) stm_read(obj) - assert HDR <= offset < stm_get_obj_size(obj) return stm_get_real_address(obj)[offset] def stm_get_real_address(obj): @@ -395,16 +445,24 @@ def stm_read(o): lib.stm_read(o) + def stm_write(o): if lib._checked_stm_write(o): raise Conflict() +def stm_write_card(o, index): + if lib._checked_stm_write_card(o, index): + raise Conflict() + def stm_was_read(o): return lib._stm_was_read(o) def stm_was_written(o): return lib._stm_was_written(o) +def stm_was_written_card(o): + return lib._stm_was_written_card(o) + def stm_start_safe_point(): lib._stm_start_safe_point() @@ -445,6 +503,14 @@ return None return map(lib._stm_enum_objects_pointing_to_nursery, range(count)) +def old_objects_with_cards(): + count = lib._stm_count_old_objects_with_cards() + if count < 0: + return None + return map(lib._stm_enum_old_objects_with_cards, range(count)) + + + SHADOWSTACK_LENGTH = 1000 _keepalive = weakref.WeakKeyDictionary() diff --git a/c7/test/test_card_marking.py b/c7/test/test_card_marking.py new file mode 100644 --- /dev/null +++ b/c7/test/test_card_marking.py @@ -0,0 +1,223 @@ +from support import * +import py + + +class TestBasic(BaseTest): + + def _collect(self, kind): + if kind == 0: + stm_minor_collect() + elif kind == 1: + stm_major_collect() + elif kind == 2: + self.switch(1) + self.start_transaction() + stm_major_collect() + self.abort_transaction() + self.switch(0) + + def test_simple(self): + o = stm_allocate_old_refs(1024) + self.start_transaction() + stm_read(o) + stm_write(o) + self.commit_transaction() + + def test_simple2(self): + o = stm_allocate_old_refs(1024) + self.start_transaction() + stm_write_card(o, 5) + assert not stm_was_written(o) # don't remove GCFLAG_WRITE_BARRIER + assert stm_was_written_card(o) + self.commit_transaction() + + @py.test.mark.parametrize("k", range(3)) + def test_overflow(self, k): + self.start_transaction() + o = stm_allocate_refs(1024) + + self.push_root(o) + self._collect(k) + o = self.pop_root() + + stm_write_card(o, 5) + + assert o in old_objects_with_cards() + assert o not in modified_old_objects() # overflow object + assert o not in objects_pointing_to_nursery() + # don't remove GCFLAG_WB + assert not stm_was_written(o) + stm_write(o) + assert stm_was_written(o) + self.commit_transaction() + + def test_nursery(self): + o = stm_allocate_old_refs(200) + self.start_transaction() + p = stm_allocate(64) + stm_set_ref(o, 199, p, True) + + # without a write-barrier: + lib._set_ptr(o, 0, ffi.cast("object_t*", -1)) + + self.push_root(o) + stm_minor_collect() + o = self.pop_root() + + lib._set_ptr(o, 0, ffi.NULL) + + pn = stm_get_ref(o, 199) + assert not is_in_nursery(pn) + assert pn != p + + assert not stm_was_written(o) + stm_write_card(o, 2) + assert stm_was_written_card(o) + + # card cleared after last collection, + # so no retrace of index 199: + + # without a write-barrier: + lib._set_ptr(o, 199, ffi.cast("object_t*", -1)) + self.push_root(o) + stm_minor_collect() + o = self.pop_root() + + def test_nursery2(self): + o = stm_allocate_old_refs(200) + self.start_transaction() + p = stm_allocate(64) + d = stm_allocate(64) + e = stm_allocate(64) + stm_set_ref(o, 199, p, True) + stm_set_ref(o, 1, d, False) + lib._set_ptr(o, 100, e) # no barrier + + self.push_root(o) + stm_minor_collect() + o = self.pop_root() + + # stm_write in stm_set_ref made it trace everything + assert not is_in_nursery(stm_get_ref(o, 199)) + assert not is_in_nursery(stm_get_ref(o, 1)) + assert not is_in_nursery(stm_get_ref(o, 100)) + + def test_nursery3(self): + o = stm_allocate_old_refs(2000) + self.start_transaction() + stm_minor_collect() + + p = stm_allocate(64) + d = stm_allocate(64) + stm_set_ref(o, 1999, p, True) + stm_set_ref(o, 1, d, True) + + lib._set_ptr(o, 1000, ffi.cast("object_t*", -1)) + + assert not stm_was_written(o) + assert stm_was_written_card(o) + + self.push_root(o) + stm_minor_collect() + o = self.pop_root() + + assert not is_in_nursery(stm_get_ref(o, 1999)) + assert not is_in_nursery(stm_get_ref(o, 1)) + + + def test_abort_cleanup(self): + o = stm_allocate_old_refs(200) + self.start_transaction() + stm_minor_collect() + + p = stm_allocate_refs(64) + d = stm_allocate(64) + e = stm_allocate(64) + stm_set_ref(o, 199, p, True) + stm_set_ref(o, 1, d, True) + stm_set_ref(p, 1, e) + + self.abort_transaction() + + assert not modified_old_objects() + assert not objects_pointing_to_nursery() + assert not old_objects_with_cards() + + self.start_transaction() + d = stm_allocate(64) + e = stm_allocate(64) + lib._set_ptr(o, 199, d) # no barrier + stm_set_ref(o, 1, e, True) # card barrier + + self.push_root(o) + stm_minor_collect() + o = self.pop_root() + + assert not is_in_nursery(stm_get_ref(o, 1)) + assert is_in_nursery(stm_get_ref(o, 199)) # not traced + + @py.test.mark.parametrize("k", range(3)) + def test_major_gc(self, k): + o = stm_allocate_old_refs(200) + self.start_transaction() + p = stm_allocate(64) + stm_set_ref(o, 0, p, True) + + self.push_root(o) + stm_major_collect() + o = self.pop_root() + + stm_set_ref(o, 1, ffi.NULL, True) + p = stm_get_ref(o, 0) + assert stm_was_written_card(o) + + self.push_root(o) + self._collect(k) + o = self.pop_root() + + assert not stm_was_written_card(o) + assert stm_get_ref(o, 0) == p + self.commit_transaction() + + def test_synchronize_objs(self): + o = stm_allocate_old(1000+20*CARD_SIZE) + + self.start_transaction() + stm_set_char(o, 'a', 1000, False) + self.commit_transaction() + + self.switch(1) + + self.start_transaction() + stm_set_char(o, 'b', 1001, False) + assert stm_get_char(o, 1000) == 'a' + self.commit_transaction() + + self.switch(0) + + self.start_transaction() + assert stm_get_char(o, 1001) == 'b' + + stm_set_char(o, 'c', 1000, True) + stm_set_char(o, 'c', 1000+CARD_SIZE, True) + stm_set_char(o, 'c', 1000+CARD_SIZE*2, True) + stm_set_char(o, 'c', 1000+CARD_SIZE*3, True) + + stm_set_char(o, 'd', 1000+CARD_SIZE*10, True) + + stm_set_char(o, 'e', 1000+CARD_SIZE*12, True) + self.commit_transaction() + + self.switch(1) + + self.start_transaction() + assert stm_get_char(o, 1000) == 'c' + assert stm_get_char(o, 1000+CARD_SIZE) == 'c' + assert stm_get_char(o, 1000+CARD_SIZE*2) == 'c' + assert stm_get_char(o, 1000+CARD_SIZE*3) == 'c' + + assert stm_get_char(o, 1000+CARD_SIZE*10) == 'd' + + assert stm_get_char(o, 1000+CARD_SIZE*12) == 'e' + + self.commit_transaction() diff --git a/c7/test/test_random.py b/c7/test/test_random.py --- a/c7/test/test_random.py +++ b/c7/test/test_random.py @@ -36,17 +36,22 @@ # we win but cannot wait in tests... raise WriteWriteConflictNotTestable - if our_trs.inevitable: + if our_trs.start_time >= other_trs.start_time: + abort_other = False + else: + abort_other = True + + if other_trs.check_must_abort(): + abort_other = True + elif our_trs.inevitable: + abort_other = True + elif other_trs.inevitable: + abort_other = False + + if not abort_other: + our_trs.set_must_abort(objs_in_conflict) + else: other_trs.set_must_abort(objs_in_conflict) - elif other_trs.start_time < our_trs.start_time: - pass - elif not other_trs.inevitable: - other_trs.set_must_abort(objs_in_conflict) - - if not other_trs.check_must_abort(): - our_trs.set_must_abort(objs_in_conflict) - elif wait: - assert not our_trs.inevitable class TransactionState(object): @@ -375,7 +380,7 @@ thread_state.register_root(r) def op_allocate_ref(ex, global_state, thread_state): - num = str(global_state.rnd.randrange(1, 100)) + num = str(global_state.rnd.randrange(1, 1000)) r = global_state.get_new_root_name(True, num) thread_state.push_roots(ex) ex.do('%s = stm_allocate_refs(%s)' % (r, num)) @@ -410,6 +415,7 @@ r = thread_state.get_random_root() trs = thread_state.transaction_state is_ref = global_state.has_ref_type(r) + try_cards = global_state.rnd.randrange(1, 100) > 5# and False # # check for possible write-write conflict: was_written = False @@ -438,13 +444,13 @@ thread_state.abort_transaction() offset = global_state.get_root_size(r) + " - 1" if is_ref: - ex.do(raising_call(aborts, "stm_set_ref", r, offset, v)) + ex.do(raising_call(aborts, "stm_set_ref", r, offset, v, try_cards)) if not aborts: - ex.do(raising_call(False, "stm_set_ref", r, "0", v)) + ex.do(raising_call(False, "stm_set_ref", r, "0", v, try_cards)) else: - ex.do(raising_call(aborts, "stm_set_char", r, repr(chr(v)), offset)) + ex.do(raising_call(aborts, "stm_set_char", r, repr(chr(v)), offset, try_cards)) if not aborts: - ex.do(raising_call(False, "stm_set_char", r, repr(chr(v)), "HDR")) + ex.do(raising_call(False, "stm_set_char", r, repr(chr(v)), "HDR", try_cards)) def op_read(ex, global_state, thread_state): r = thread_state.get_random_root() _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit