Author: Armin Rigo <ar...@tunes.org> Branch: stm-gc Changeset: r54332:2abd27c473de Date: 2012-04-13 15:21 +0200 http://bitbucket.org/pypy/pypy/changeset/2abd27c473de/
Log: Intermediate check-in. diff --git a/pypy/rpython/memory/gc/stmgc.py b/pypy/rpython/memory/gc/stmgc.py --- a/pypy/rpython/memory/gc/stmgc.py +++ b/pypy/rpython/memory/gc/stmgc.py @@ -33,7 +33,10 @@ # - The LOCAL objects might be YOUNG or OLD depending on whether they # already survived a collection. YOUNG LOCAL objects are either in # the nursery or, if they are big, raw-malloced. OLD LOCAL objects -# are in the shared area. +# are in the shared area. Getting the write barrier right for both +# this and the general STM mechanisms is tricky, so for now this GC +# is not actually generational (slow when running long transactions +# or before running transactions at all). # GCFLAG_GLOBAL = first_gcflag << 0 # keep in sync with et.c GCFLAG_WAS_COPIED = first_gcflag << 1 # keep in sync with et.c @@ -65,7 +68,7 @@ TRANSLATION_PARAMS = { 'stm_operations': 'use_real_one', - 'nursery_size': 4*1024*1024, # 4 MB + 'nursery_size': 32*1024*1024, # 32 MB "page_size": 1024*WORD, # copied from minimark.py "arena_size": 65536*WORD, # copied from minimark.py @@ -120,32 +123,20 @@ self.main_thread_tls = StmGCTLS(self, in_main_thread=True) self.main_thread_tls.start_transaction() + @always_inline def get_tls(self): from pypy.rpython.memory.gc.stmtls import StmGCTLS tls = self.stm_operations.get_tls() return StmGCTLS.cast_address_to_tls_object(tls) + def enter_transactional_mode(self): + self.main_thread_tls.enter_transactional_mode() + + def leave_transactional_mode(self): + self.main_thread_tls.leave_transactional_mode() + # ---------- - @always_inline - def allocate_bump_pointer(self, size): - tls = self.collector.get_tls() - free = tls.nursery_free - top = tls.nursery_top - if (top - free) < llmemory.raw_malloc_usage(size): - free = self.local_collection(size) - tls.nursery_free = free + size - return free - - @dont_inline - def local_collection(self, size): - tls = self.collector.get_tls() - if not tls.nursery_free: - fatalerror("malloc in a non-main thread but outside a transaction") - #... - xxxxxxxxx - - def malloc_fixedsize_clear(self, typeid, size, needs_finalizer=False, is_finalizer_light=False, @@ -159,7 +150,7 @@ # Get the memory from the nursery. size_gc_header = self.gcheaderbuilder.size_gc_header totalsize = size_gc_header + size - result = self.allocate_bump_pointer(totalsize) + result = self.get_tls().allocate_bump_pointer(totalsize) # # Build the object. llarena.arena_reserve(result, totalsize) @@ -180,7 +171,7 @@ nonvarsize = size_gc_header + size totalsize = nonvarsize + itemsize * length totalsize = llarena.round_up_for_allocation(totalsize) - result = self.allocate_bump_pointer(totalsize) + result = self.get_tls().allocate_bump_pointer(totalsize) llarena.arena_reserve(result, totalsize) obj = result + size_gc_header self.init_gc_object(result, typeid, flags=0) @@ -315,7 +306,7 @@ def _stm_write_barrier_global(obj): if not stm_operations.in_transaction(): return obj - # we need to find of make a local copy + # we need to find or make a local copy hdr = self.header(obj) if hdr.tid & GCFLAG_WAS_COPIED == 0: # in this case, we are sure that we don't have a copy diff --git a/pypy/rpython/memory/gc/stmtls.py b/pypy/rpython/memory/gc/stmtls.py --- a/pypy/rpython/memory/gc/stmtls.py +++ b/pypy/rpython/memory/gc/stmtls.py @@ -3,9 +3,11 @@ from pypy.rpython.annlowlevel import cast_base_ptr_to_instance, base_ptr_lltype from pypy.rlib.objectmodel import we_are_translated, free_non_gc_object from pypy.rlib.rarithmetic import r_uint -from pypy.rlib.debug import ll_assert +from pypy.rlib.debug import ll_assert, debug_start, debug_stop, fatalerror from pypy.rpython.memory.gc.stmgc import WORD, NULL +from pypy.rpython.memory.gc.stmgc import always_inline, dont_inline +from pypy.rpython.memory.gc.stmgc import GCFLAG_GLOBAL class StmGCTLS(object): @@ -34,21 +36,18 @@ self.nursery_size = self.gc.nursery_size self.nursery_start = self._alloc_nursery(self.nursery_size) # - # --- the local raw-malloced objects, young and old - self.rawmalloced_young_objects = self.null_address_dict() - self.rawmalloced_old_objects = None - self.rawmalloced_total_size = r_uint(0) - # --- the local objects with weakrefs, young and old - self.young_objects_with_weakrefs = self.AddressStack() - self.old_objects_with_weakrefs = self.AddressStack() - # --- support for id and identityhash: maps nursery objects with - # GCFLAG_HAS_SHADOW to their future location at the next - # local collection - self.nursery_objects_shadows = self.AddressDict() + # --- the local raw-malloced objects (chained list via hdr.version) + self.rawmalloced_objects = NULL + # --- the local "normal" old objects (chained list via hdr.version) + self.old_objects = NULL + # --- the local objects with weakrefs (chained list via hdr.version) + #self.young_objects_with_weakrefs = NULL + #self.old_objects_with_weakrefs = NULL # self._register_with_C_code() def teardown_thread(self): + self._cleanup_state() self._unregister_with_C_code() self._free_nursery(self.nursery_start) free_non_gc_object(self) @@ -67,9 +66,9 @@ tls = cast_instance_to_base_ptr(self) tlsaddr = llmemory.cast_ptr_to_adr(tls) else: - n = 10000 + len(self.nontranslated_dict) + n = 10000 + len(StmGCTLS.nontranslated_dict) tlsaddr = rffi.cast(llmemory.Address, n) - self.nontranslated_dict[n] = self + StmGCTLS.nontranslated_dict[n] = self self.stm_operations.set_tls(tlsaddr, int(self.in_main_thread)) def _unregister_with_C_code(self): @@ -78,16 +77,40 @@ self.stm_operations.del_tls() @staticmethod - def cast_address_to_tls_object(self, tlsaddr): + @always_inline + def cast_address_to_tls_object(tlsaddr): if we_are_translated(): tls = llmemory.cast_adr_to_ptr(tlsaddr, base_ptr_lltype()) return cast_base_ptr_to_instance(tls) else: n = rffi.cast(lltype.Signed, tlsaddr) - return self.nontranslated_dict[n] + return StmGCTLS.nontranslated_dict[n] + + def _disable_mallocs(self): + ll_assert(bool(self.nursery_free), "disable_mallocs: already disabled") + self.nursery_pending_clear = self.nursery_free - self.nursery_start + self.nursery_free = NULL + self.nursery_top = NULL # ------------------------------------------------------------ + def enter_transactional_mode(self): + """Called on the main thread, just before spawning the other + threads.""" + self.local_collection() + if not self.local_nursery_is_empty(): + self.local_collection(run_finalizers=False) + self._promote_locals_to_globals() + self._disable_mallocs() + + def leave_transactional_mode(self): + """Restart using the main thread for mallocs.""" + if not we_are_translated(): + for key, value in StmGCTLS.nontranslated_dict.items(): + if value is not self: + del StmGCTLS.nontranslated_dict[key] + self.start_transaction() + def start_transaction(self): """Enter a thread: performs any pending cleanups, and set up a fresh state for allocating. Called at the start of @@ -96,29 +119,88 @@ # end_of_transaction_collection() are not balanced: if a # transaction is aborted, the latter might never be called. # Be ready here to clean up any state. + self._cleanup_state() if self.nursery_free: clear_size = self.nursery_free - self.nursery_start else: clear_size = self.nursery_pending_clear + self.nursery_pending_clear = 0 if clear_size > 0: llarena.arena_reset(self.nursery_start, clear_size, 2) - self.nursery_pending_clear = 0 - if self.rawmalloced_young_objects: - xxx - if self.rawmalloced_old_objects: - xxx self.nursery_free = self.nursery_start self.nursery_top = self.nursery_start + self.nursery_size + def local_nursery_is_empty(self): + ll_assert(self.nursery_free, "local_nursery_is_empty: gc not running") + return self.nursery_free == self.nursery_start + # ------------------------------------------------------------ - def local_collection(self): + def local_collection(self, run_finalizers=True): """Do a local collection. Finds all surviving young objects and make them old. Also looks for roots from the stack. The flag GCFLAG_WAS_COPIED is kept and the C tree is updated if the local young object moves. """ - xxx + # + debug_start("gc-local") + # + # First, find the roots that point to young objects. All nursery + # objects found are copied out of the nursery, and the occasional + # young raw-malloced object is flagged with GCFLAG_VISITED. + # Note that during this step, we ignore references to further + # young objects; only objects directly referenced by roots + # are copied out or flagged. They are also added to the list + # 'old_objects_pointing_to_young'. + self.collect_roots_in_nursery() + # + while True: + # If we are using card marking, do a partial trace of the arrays + # that are flagged with GCFLAG_CARDS_SET. + if self.card_page_indices > 0: + self.collect_cardrefs_to_nursery() + # + # Now trace objects from 'old_objects_pointing_to_young'. + # All nursery objects they reference are copied out of the + # nursery, and again added to 'old_objects_pointing_to_young'. + # All young raw-malloced object found are flagged GCFLAG_VISITED. + # We proceed until 'old_objects_pointing_to_young' is empty. + self.collect_oldrefs_to_nursery() + # + # We have to loop back if collect_oldrefs_to_nursery caused + # new objects to show up in old_objects_with_cards_set + if self.card_page_indices > 0: + if self.old_objects_with_cards_set.non_empty(): + continue + break + # + # Now all live nursery objects should be out. Update the young + # weakrefs' targets. + if self.young_objects_with_weakrefs.non_empty(): + self.invalidate_young_weakrefs() + if self.young_objects_with_light_finalizers.non_empty(): + self.deal_with_young_objects_with_finalizers() + # + # Clear this mapping. + if self.nursery_objects_shadows.length() > 0: + self.nursery_objects_shadows.clear() + # + # Walk the list of young raw-malloced objects, and either free + # them or make them old. + if self.young_rawmalloced_objects: + self.free_young_rawmalloced_objects() + # + # All live nursery objects are out, and the rest dies. Fill + # the whole nursery with zero and reset the current nursery pointer. + llarena.arena_reset(self.nursery, self.nursery_size, 2) + self.debug_rotate_nursery() + self.nursery_free = self.nursery + # + debug_print("minor collect, total memory used:", + self.get_total_memory_used()) + if self.DEBUG >= 2: + self.debug_check_consistency() # expensive! + debug_stop("gc-minor") def end_of_transaction_collection(self): """Do an end-of-transaction collection. Finds all surviving @@ -133,3 +215,49 @@ xxx # ------------------------------------------------------------ + + @always_inline + def allocate_bump_pointer(self, size): + free = self.nursery_free + top = self.nursery_top + if (top - free) < llmemory.raw_malloc_usage(size): + free = self.allocate_object_of_size(size) + self.nursery_free = free + size + return free + + @dont_inline + def allocate_object_of_size(self, size): + if not self.nursery_free: + fatalerror("malloc in a non-main thread but outside a transaction") + if size > self.nursery_size: + fatalerror("object too large to ever fit in the nursery") + while True: + self.local_collection() + free = self.nursery_free + top = self.nursery_top + if (top - free) < llmemory.raw_malloc_usage(size): + continue # try again + return free + + # ------------------------------------------------------------ + + def _promote_locals_to_globals(self): + ll_assert(self.local_nursery_is_empty(), "nursery must be empty [1]") + # + obj = self.old_objects + self.old_objects = NULL + while obj: + hdr = self.header(obj) + hdr.tid |= GCFLAG_GLOBAL + obj = hdr.version + # + obj = self.rawmalloced_objects + self.rawmalloced_objects = NULL + while obj: + hdr = self.header(obj) + hdr.tid |= GCFLAG_GLOBAL + obj = hdr.version + + def _cleanup_state(self): + if self.rawmalloced_objects: + xxx # free the rawmalloced_objects still around diff --git a/pypy/rpython/memory/gc/test/test_stmgc.py b/pypy/rpython/memory/gc/test/test_stmgc.py --- a/pypy/rpython/memory/gc/test/test_stmgc.py +++ b/pypy/rpython/memory/gc/test/test_stmgc.py @@ -133,7 +133,7 @@ return llmemory.offsetof(WR, 'wadr') -class TestBasic: +class StmGCTests: GCClass = StmGC def setup_method(self, meth): @@ -155,6 +155,7 @@ if key != 0: self.gc.stm_operations.threadnum = key self.gc.teardown_thread() + self.gc.stm_operations.threadnum = 0 # ---------- # test helpers @@ -187,14 +188,18 @@ meth = getattr(self.gc, 'read_int%d' % WORD) return meth(obj, offset) + +class TestBasic(StmGCTests): + def test_gc_creation_works(self): pass def test_allocate_bump_pointer(self): - a3 = self.gc.allocate_bump_pointer(3) - a4 = self.gc.allocate_bump_pointer(4) - a5 = self.gc.allocate_bump_pointer(5) - a6 = self.gc.allocate_bump_pointer(6) + tls = self.gc.main_thread_tls + a3 = tls.allocate_bump_pointer(3) + a4 = tls.allocate_bump_pointer(4) + a5 = tls.allocate_bump_pointer(5) + a6 = tls.allocate_bump_pointer(6) assert a4 - a3 == 3 assert a5 - a4 == 4 assert a6 - a5 == 5 @@ -210,7 +215,7 @@ def test_malloc_main_vs_thread(self): gcref = self.gc.malloc_fixedsize_clear(123, llmemory.sizeof(S)) obj = llmemory.cast_ptr_to_adr(gcref) - assert self.gc.header(obj).tid & GCFLAG_GLOBAL != 0 + assert self.gc.header(obj).tid & GCFLAG_GLOBAL == 0 # self.select_thread(1) gcref = self.gc.malloc_fixedsize_clear(123, llmemory.sizeof(S)) @@ -588,7 +593,3 @@ assert a == sr1_adr a = self.gc.stm_normalize_global(tr1_adr) assert a == sr1_adr - - def test_alloc_a_lot_from_main_thread(self): - for i in range(1000): - sr1, sr1_adr = self.malloc(SR) diff --git a/pypy/rpython/memory/gc/test/test_stmtls.py b/pypy/rpython/memory/gc/test/test_stmtls.py new file mode 100644 --- /dev/null +++ b/pypy/rpython/memory/gc/test/test_stmtls.py @@ -0,0 +1,50 @@ +import py +from pypy.rpython.lltypesystem import lltype, llmemory, llarena, llgroup, rffi +from pypy.rpython.memory.gc.stmtls import StmGCTLS, WORD +from pypy.rpython.memory.gc.test.test_stmgc import StmGCTests + + +S = lltype.GcStruct('S', ('a', lltype.Signed), ('b', lltype.Signed), + ('c', lltype.Signed)) + + +class TestStmGCTLS(StmGCTests): + current_stack = () + + def stack_add(self, p): + if self.current_stack == (): + self.current_stack = [] + self.current_stack.append(p) + + def stack_pop(self): + return self.current_stack.pop() + + # ---------- + + def test_creation_works(self): + pass + + def test_allocate_bump_pointer(self): + tls = self.gc.main_thread_tls + a3 = tls.allocate_bump_pointer(3) + a4 = tls.allocate_bump_pointer(4) + a5 = tls.allocate_bump_pointer(5) + a6 = tls.allocate_bump_pointer(6) + assert a4 - a3 == 3 + assert a5 - a4 == 4 + assert a6 - a5 == 5 + + def test_local_collection(self): + s1, _ = self.malloc(S); s1.a = 111 + s2, _ = self.malloc(S); s2.a = 222 + self.stack_add(s2) + self.gc.main_thread_tls.local_collection() + s3 = self.stack_pop() + assert s3.a == 222 + xxxx # raises... + s1.a + s2.a + + def test_alloc_a_lot(self): + for i in range(1000): + sr1, sr1_adr = self.malloc(SR) _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit