Author: Armin Rigo <ar...@tunes.org> Branch: nogil-unsafe-2 Changeset: r92161:e40f8472eb81 Date: 2017-08-17 11:38 +0200 http://bitbucket.org/pypy/pypy/changeset/e40f8472eb81/
Log: Attempt to reduce false sharing between threads. Unclear results diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -280,7 +280,7 @@ # "cache_line_min" is used to round the actual thread-local # blocks to a cache line, to avoid pointless cache conflicts. "tl_block_size": 131072, - "cache_line_min": 256, # why not 64b? + "cache_line_min": 128, # two cache lines on x86 } def __init__(self, config, @@ -313,6 +313,7 @@ self.max_heap_size_already_raised = False self.max_delta = float(r_uint(-1)) self.max_number_of_pinned_objects = 0 # computed later + self.collecting_roots_in_nursery = False # self.card_page_indices = card_page_indices if self.card_page_indices > 0: @@ -1983,13 +1984,20 @@ # see them. use_jit_frame_stoppers = not any_pinned_object_from_earlier # + self.collecting_roots_in_nursery = True self.root_walker.walk_roots( callback, # stack roots callback, # static in prebuilt non-gc None, # static in prebuilt gc is_minor=use_jit_frame_stoppers) + self.collecting_roots_in_nursery = False debug_stop("gc-minor-walkroots") + def collected_roots_for_one_thread(self): + if self.collecting_roots_in_nursery: + self.collect_oldrefs_to_nursery() + self.ac.force_non_sharing_by_dummy_allocation(self.cache_line_min) + def collect_cardrefs_to_nursery(self): size_gc_header = self.gcheaderbuilder.size_gc_header oldlist = self.old_objects_with_cards_set diff --git a/rpython/memory/gc/minimarkpage.py b/rpython/memory/gc/minimarkpage.py --- a/rpython/memory/gc/minimarkpage.py +++ b/rpython/memory/gc/minimarkpage.py @@ -191,6 +191,30 @@ return result + def force_non_sharing_by_dummy_allocation(self, alignment): + """Force a few bytes of memory to be lost, to ensure that + a CPU cache of size "alignment" would not cause false sharing + between objects allocated just before and objects allocated + just after the call to the present function. + """ + size_class_max = self.small_request_threshold >> WORD_POWER_2 + size_class = 1 + while size_class <= size_class_max: + page = self.page_for_size[size_class] + if page != PAGE_NULL: + next_alloc = page.freeblock + allocation_start = llmemory.cast_ptr_to_adr(page) + self.hdrsize + if next_alloc != allocation_start: + next_alloc = rffi.cast(lltype.Signed, next_alloc) + rounded_up = (next_alloc + (alignment-1)) & ~(alignment-1) + while next_alloc < rounded_up: + self.malloc(size_class << WORD_POWER_2) + if self.page_for_size[size_class] != page: + break + next_alloc = rffi.cast(lltype.Signed, page.freeblock) + size_class += 1 + + def allocate_new_page(self, size_class): """Allocate and return a new page for the given size_class.""" # diff --git a/rpython/memory/gctransform/shadowstack.py b/rpython/memory/gctransform/shadowstack.py --- a/rpython/memory/gctransform/shadowstack.py +++ b/rpython/memory/gctransform/shadowstack.py @@ -113,6 +113,7 @@ debug_print("walk_stack", base, top) walk_stack_root(self.invoke_collect_stack_root, collect_stack_root, None, base, top, is_minor=False) + self.gcdata.gc.collected_roots_for_one_thread() self._walk_thread_stack = walk_thread_stack diff --git a/rpython/translator/c/src/threadlocal.c b/rpython/translator/c/src/threadlocal.c --- a/rpython/translator/c/src/threadlocal.c +++ b/rpython/translator/c/src/threadlocal.c @@ -11,32 +11,36 @@ #include "src/thread.h" -/* this is a spin-lock that must be acquired around each doubly-linked-list +/* this is a reentrant lock that must be acquired around each doubly-linked-list manipulation (because such manipulations can occur without the GIL) */ -static long pypy_threadlocal_lock = 0; +static pthread_mutex_t _rpy_threadlocal_lock; static int check_valid(void); -int _RPython_ThreadLocals_AcquireTimeout(int max_wait_iterations) { - while (1) { - long old_value = pypy_lock_test_and_set(&pypy_threadlocal_lock, 1); - if (old_value == 0) - break; - /* busy loop */ - if (max_wait_iterations == 0) - return -1; - if (max_wait_iterations > 0) - --max_wait_iterations; +static void do_check(int result) +{ + if (result != 0) { + fprintf(stderr, "threadlocal.c got an unexpected mutex error\n"); + exit(1); } +} + +static void init_lock(void) +{ + pthread_mutexattr_t attr; + do_check(pthread_mutexattr_init(&attr) + || pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) + || pthread_mutex_init(&_rpy_threadlocal_lock, &attr) + || pthread_mutexattr_destroy(&attr)); +} + +void _RPython_ThreadLocals_Acquire(void) { + do_check(pthread_mutex_lock(&_rpy_threadlocal_lock)); assert(check_valid()); - return 0; -} -void _RPython_ThreadLocals_Acquire(void) { - _RPython_ThreadLocals_AcquireTimeout(-1); } void _RPython_ThreadLocals_Release(void) { assert(check_valid()); - pypy_lock_release(&pypy_threadlocal_lock); + do_check(pthread_mutex_unlock(&_rpy_threadlocal_lock)); } @@ -73,6 +77,7 @@ { /* assume that at most one pypy_threadlocal_s survived, the current one */ struct pypy_threadlocal_s *cur; + init_lock(); cur = (struct pypy_threadlocal_s *)_RPy_ThreadLocals_Get(); if (cur && cur->ready == 42) { cur->next = cur->prev = &linkedlist_head; @@ -81,7 +86,6 @@ else { linkedlist_head.next = linkedlist_head.prev = &linkedlist_head; } - _RPython_ThreadLocals_Release(); } @@ -188,7 +192,7 @@ a non-null thread-local value). This is needed even in the case where we use '__thread' below, for the destructor. */ - assert(pypy_threadlocal_lock == 0); + init_lock(); #ifdef _WIN32 pypy_threadlocal_key = TlsAlloc(); if (pypy_threadlocal_key == TLS_OUT_OF_INDEXES) diff --git a/rpython/translator/c/src/threadlocal.h b/rpython/translator/c/src/threadlocal.h --- a/rpython/translator/c/src/threadlocal.h +++ b/rpython/translator/c/src/threadlocal.h @@ -21,7 +21,6 @@ RPY_EXTERN void _RPython_ThreadLocals_Acquire(void); RPY_EXTERN void _RPython_ThreadLocals_Release(void); -RPY_EXTERN int _RPython_ThreadLocals_AcquireTimeout(int max_wait_iterations); /* Must acquire/release the thread-local lock around a series of calls to the following function */ _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit