Author: Stefan Beyer <h...@sbeyer.at> Branch: cpyext-gc-cycle Changeset: r97189:615c66be0a6a Date: 2019-08-14 17:43 +0200 http://bitbucket.org/pypy/pypy/changeset/615c66be0a6a/
Log: WIP: adapted incremental rrc to use snapshot (finalizers still missing) diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -2392,22 +2392,28 @@ self.more_objects_to_trace = swap self.visit_all_objects() + rrc_finished = False + if (not self.objects_to_trace.non_empty() and + not self.more_objects_to_trace.non_empty()): + # + # 'prebuilt_root_objects' might have grown since + # we scanned it in collect_roots() (rare case). Rescan. + self.collect_nonstack_roots() + self.visit_all_objects() + # + # If enabled, do a major collection step for rrc objects. + if self.rrc_enabled: + while not rrc_finished: # TODO: remove this line to do incremental collection + rrc_finished = self.rrc_gc.major_collection_trace_step() + else: + rrc_finished = True + # XXX A simplifying assumption that should be checked, # finalizers/weak references are rare and short which means that # they do not need a separate state and do not need to be # made incremental. # For now, the same applies to rawrefcount'ed objects. - if (not self.objects_to_trace.non_empty() and - not self.more_objects_to_trace.non_empty()): - # - # First, 'prebuilt_root_objects' might have grown since - # we scanned it in collect_roots() (rare case). Rescan. - self.collect_nonstack_roots() - self.visit_all_objects() - # - if self.rrc_enabled: - self.rrc_gc.major_collection_trace() - # + if rrc_finished: ll_assert(not (self.probably_young_objects_with_finalizers .non_empty()), "probably_young_objects_with_finalizers should be empty") @@ -2723,7 +2729,7 @@ hdr.tid |= GCFLAG_VISITED | GCFLAG_TRACK_YOUNG_PTRS if self.rrc_enabled and \ - self.rrc_gc.state == RawRefCountBaseGC.STATE_MARKING: + self.rrc_gc.state == RawRefCountBaseGC.STATE_GARBAGE_MARKING: hdr.tid |= GCFLAG_GARBAGE if self.has_gcptr(llop.extract_ushort(llgroup.HALFWORD, hdr.tid)): @@ -3155,7 +3161,7 @@ def rawrefcount_end_garbage(self): ll_assert(self.rrc_enabled, "rawrefcount.init not called") - self.rrc_gc.state = RawRefCountBaseGC.STATE_DEFAULT + self.rrc_gc.state = RawRefCountBaseGC.STATE_MARKING def rawrefcount_next_garbage_pypy(self): ll_assert(self.rrc_enabled, "rawrefcount.init not called") diff --git a/rpython/memory/gc/rrc/base.py b/rpython/memory/gc/rrc/base.py --- a/rpython/memory/gc/rrc/base.py +++ b/rpython/memory/gc/rrc/base.py @@ -21,26 +21,31 @@ return None class RawRefCountBaseGC(object): - # Default state, no rawrefcount specific code is executed during normal marking. + # Default state. STATE_DEFAULT = 0 + # Marking state. + STATE_MARKING = 1 + # Here cyclic garbage only reachable from legacy finalizers is marked. - STATE_MARKING = 1 + STATE_GARBAGE_MARKING = 2 # The state in which cyclic garbage with legacy finalizers is traced. # Do not mark objects during this state, because we remove the flag # during tracing and we do not want to trace those objects again. Also # during this phase no new objects can be marked, as we are only building # the list of cyclic garbage. - STATE_GARBAGE = 2 + STATE_GARBAGE = 3 _ADDRARRAY = lltype.Array(llmemory.Address, hints={'nolength': True}) PYOBJ_SNAPSHOT_OBJ = lltype.Struct('PyObject_Snapshot', ('pyobj', llmemory.Address), ('refcnt', lltype.Signed), - ('refcnt_internal', lltype.Signed), + ('refcnt_external', lltype.Signed), ('refs_index', lltype.Signed), - ('refs_len', lltype.Signed)) + ('refs_len', lltype.Signed), + ('pypy_link', lltype.Signed)) + PYOBJ_SNAPSHOT_OBJ_PTR = lltype.Ptr(PYOBJ_SNAPSHOT_OBJ) PYOBJ_SNAPSHOT = lltype.Array(PYOBJ_SNAPSHOT_OBJ, hints={'nolength': True}) PYOBJ_HDR = lltype.Struct('GCHdr_PyObject', @@ -327,57 +332,8 @@ self._pyobj(pyobject).c_ob_refcnt = rc _free._always_inline_ = True - def major_collection_trace(self): - if not self.cycle_enabled: - self._debug_check_consistency(print_label="begin-mark") - - # First, untrack all tuples with only non-gc rrc objects and promote - # all other tuples to the pyobj_list - self._untrack_tuples() - - # Only trace and mark rawrefcounted object if we are not doing - # something special, like building gc.garbage. - if (self.state == self.STATE_DEFAULT and self.cycle_enabled): - merged_old_list = False - # check objects with finalizers from last collection cycle - if not self._gc_list_is_empty(self.pyobj_old_list): - merged_old_list = self._check_finalizer() - # collect all rawrefcounted roots - self._collect_roots(self.pyobj_list) # TODO: from snapshot - if merged_old_list: - # set all refcounts to zero for objects in dead list - # (might have been incremented) by fix_refcnt - gchdr = self.pyobj_dead_list.c_gc_next - while gchdr <> self.pyobj_dead_list: - gchdr.c_gc_refs = 0 - gchdr = gchdr.c_gc_next - self._debug_check_consistency(print_label="roots-marked") - # mark all objects reachable from rawrefcounted roots - self._mark_rawrefcount() # TODO: from snapshot - self._debug_check_consistency(print_label="before-fin") - self.state = self.STATE_MARKING - if self._find_garbage(): # handle legacy finalizers # TODO: from snapshot - self._mark_garbage() # TODO: from snapshot - self._debug_check_consistency(print_label="end-legacy-fin") - self.state = self.STATE_DEFAULT - found_finalizer = self._find_finalizer() # modern finalizers # TODO: from snapshot - if found_finalizer: - self._gc_list_move(self.pyobj_old_list, - self.pyobj_isolate_list) - use_cylicrc = not found_finalizer - self._debug_check_consistency(print_label="end-mark-cyclic") - else: - use_cylicrc = False # don't sweep any objects in cyclic isolates - - # now mark all pypy objects at the border, depending on the results - debug_print("use_cylicrc", use_cylicrc) - self.p_list_old.foreach(self._major_trace, use_cylicrc) - self._debug_check_consistency(print_label="end-mark") - - # fix refcnt back - self.refcnt_dict.foreach(self._fix_refcnt_back, None) # TODO: from snapshot? - self.refcnt_dict.delete() - self.refcnt_dict = self.gc.AddressDict() + def major_collection_trace_step(self): + return True def _fix_refcnt_back(self, pyobject, link, ignore): pyobj = self._pyobj(pyobject) @@ -385,9 +341,10 @@ pyobj.c_ob_refcnt = pyobj.c_ob_pypy_link pyobj.c_ob_pypy_link = link_int - def _major_trace(self, pyobject, use_cylicrefcnt): + def _major_trace(self, pyobject, flags): from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT + (use_cylicrefcnt, use_dict) = flags # pyobj = self._pyobj(pyobject) cyclic_rc = -42 @@ -410,11 +367,15 @@ # force the corresponding object to be alive debug_print("pyobj stays alive", pyobj, "rc", rc, "cyclic_rc", cyclic_rc) - obj = self.refcnt_dict.get(pyobject) + if use_dict: + obj = self.refcnt_dict.get(pyobject) + else: + intobj = pyobj.c_ob_pypy_link + obj = llmemory.cast_int_to_adr(intobj) self.gc.objects_to_trace.append(obj) self.gc.visit_all_objects() - def _major_trace_nongc(self, pyobject, ignore): + def _major_trace_nongc(self, pyobject, use_dict): from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT # @@ -433,9 +394,11 @@ else: # force the corresponding object to be alive debug_print("pyobj stays alive", pyobj, "rc", rc) - #intobj = pyobj.c_ob_pypy_link - #obj = llmemory.cast_int_to_adr(intobj) - obj = self.refcnt_dict.get(pyobject) + if use_dict: + obj = self.refcnt_dict.get(pyobject) + else: + intobj = pyobj.c_ob_pypy_link + obj = llmemory.cast_int_to_adr(intobj) self.gc.objects_to_trace.append(obj) self.gc.visit_all_objects() @@ -507,7 +470,7 @@ # For all non-gc pyobjects which have a refcount > 0, # mark all reachable objects on the pypy side - self.p_list_old.foreach(self._major_trace_nongc, None) + self.p_list_old.foreach(self._major_trace_nongc, True) # For every object in this set, if it is marked, add 1 as a real # refcount (p_list => pyobj stays alive if obj stays alive). diff --git a/rpython/memory/gc/rrc/incmark.py b/rpython/memory/gc/rrc/incmark.py --- a/rpython/memory/gc/rrc/incmark.py +++ b/rpython/memory/gc/rrc/incmark.py @@ -1,9 +1,157 @@ from rpython.rtyper.lltypesystem import lltype, llmemory from rpython.rtyper.lltypesystem import rffi from rpython.memory.gc.rrc.base import RawRefCountBaseGC +from rpython.rlib.debug import ll_assert, debug_print, debug_start, debug_stop class RawRefCountIncMarkGC(RawRefCountBaseGC): + def major_collection_trace_step(self): + if not self.cycle_enabled or self.state == self.STATE_GARBAGE: + self._debug_check_consistency(print_label="begin-mark") + self.p_list_old.foreach(self._major_trace, (False, False)) + self._debug_check_consistency(print_label="end-mark") + return True + + elif self.state == self.STATE_DEFAULT: + # First, untrack all tuples with only non-gc rrc objects and promote + # all other tuples to the pyobj_list + self._untrack_tuples() + + merged_old_list = False + # check objects with finalizers from last collection cycle + if not self._gc_list_is_empty(self.pyobj_old_list): + merged_old_list = self._check_finalizer() + + # For all non-gc pyobjects which have a refcount > 0, + # mark all reachable objects on the pypy side + self.p_list_old.foreach(self._major_trace_nongc, False) + + # Now take a snapshot + self._take_snapshot(self.pyobj_list) + + # collect all rawrefcounted roots + self._collect_roots(self.pyobj_list) + + if merged_old_list: + # set all refcounts to zero for objects in dead list + # (might have been incremented) by fix_refcnt + gchdr = self.pyobj_dead_list.c_gc_next + while gchdr <> self.pyobj_dead_list: + if (gchdr.c_gc_refs > 0 and gchdr.c_gc_refs != + self.RAWREFCOUNT_REFS_UNTRACKED): + pyobj = self.snapshot_objs[gchdr.c_gc_refs - 1] + pyobj.refcnt_external = 0 + gchdr = gchdr.c_gc_next + + self._debug_check_consistency(print_label="roots-marked") + self.state = self.STATE_MARKING + return False + + elif self.state == self.STATE_MARKING: + # mark all objects reachable from rawrefcounted roots + self._mark_rawrefcount() + + self._debug_check_consistency(print_label="before-fin") + self.state = self.STATE_GARBAGE_MARKING + return False + + elif self.state == self.STATE_GARBAGE_MARKING: + #if self._find_garbage(): # handle legacy finalizers # TODO: from snapshot + # self._mark_garbage() # TODO: from snapshot + # self._debug_check_consistency(print_label="end-legacy-fin") + self.state = self.STATE_DEFAULT + + # We are finished with marking, now finish things up + #found_finalizer = self._find_finalizer() # modern finalizers # TODO: from snapshot + #if found_finalizer: + # self._gc_list_move(self.pyobj_old_list, + # self.pyobj_isolate_list) + #use_cylicrc = not found_finalizer + use_cylicrc = True + + # now move all dead objs still in pyob_list to garbage + # dead -> pyobj_old_list + # live -> set cyclic refcount to > 0 + pygchdr = self.pyobj_list.c_gc_next + while pygchdr <> self.pyobj_list: + next_old = pygchdr.c_gc_next + snapobj = self.snapshot_objs[pygchdr.c_gc_refs - 1] + pygchdr.c_gc_refs = snapobj.refcnt_external + if snapobj.refcnt_external == 0: + # remove from old list + next = pygchdr.c_gc_next + next.c_gc_prev = pygchdr.c_gc_prev + pygchdr.c_gc_prev.c_gc_next = next + # add to new list (or not, if it is a tuple) + self._gc_list_add(self.pyobj_old_list, pygchdr) + pygchdr = next_old + + # now mark all pypy objects at the border, depending on the results + self._debug_check_consistency(print_label="end-mark-cyclic") + debug_print("use_cylicrc", use_cylicrc) + self.p_list_old.foreach(self._major_trace, (use_cylicrc, False)) + self._debug_check_consistency(print_label="end-mark") + self._discard_snapshot() + return True + + def _collect_roots(self, pygclist): + # Subtract all internal refcounts from the cyclic refcount + # of rawrefcounted objects + for i in range(0, self.total_objs): + obj = self.snapshot_objs[i] + for j in range(0, obj.refs_len): + addr = self.snapshot_refs[obj.refs_index + j] + obj_ref = llmemory.cast_adr_to_ptr(addr, + self.PYOBJ_SNAPSHOT_OBJ_PTR) + obj_ref.refcnt_external -= 1 + + # now all rawrefcounted roots or live border objects have a + # refcount > 0 + + def _mark_rawrefcount(self): + self._gc_list_init(self.pyobj_old_list) + # as long as new objects with cyclic a refcount > 0 or alive border + # objects are found, increment the refcount of all referenced objects + # of those newly found objects + found_alive = True + # + while found_alive: # TODO: working set to improve performance? + found_alive = False + for i in range(0, self.total_objs): + obj = self.snapshot_objs[i] + found_alive |= self._mark_rawrefcount_obj(obj) + # + # now all rawrefcounted objects, which are alive, have a cyclic + # refcount > 0 or are marked + + def _mark_rawrefcount_obj(self, snapobj): + if snapobj.refcnt == 0: # hack + return False + + alive = snapobj.refcnt_external > 0 + if snapobj.pypy_link <> 0: + intobj = snapobj.pypy_link + obj = llmemory.cast_int_to_adr(intobj) + if not alive and self.gc.header(obj).tid & ( + self.GCFLAG_VISITED | self.GCFLAG_NO_HEAP_PTRS): + alive = True + snapobj.refcnt_external += 1 + if alive: + # increment refcounts + for j in range(0, snapobj.refs_len): + addr = self.snapshot_refs[snapobj.refs_index + j] + obj_ref = llmemory.cast_adr_to_ptr(addr, + self.PYOBJ_SNAPSHOT_OBJ_PTR) + obj_ref.refcnt_external += 1 + # mark recursively, if it is a pypyobj + if snapobj.pypy_link <> 0: + self.gc.objects_to_trace.append(obj) + self.gc.visit_all_objects() + + # remove from old list, TODO: hack -> working set might be better + snapobj.refcnt = 0 + return alive + def _take_snapshot(self, pygclist): from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT @@ -26,6 +174,8 @@ self.snapshot_objs = lltype.malloc(self.PYOBJ_SNAPSHOT, total_objs, flavor='raw', track_allocation=False) + self.total_objs = total_objs + objs_index = 0 refs_index = 0 pygchdr = pygclist.c_gc_next @@ -36,17 +186,29 @@ refcnt -= REFCNT_FROM_PYPY_LIGHT elif refcnt >= REFCNT_FROM_PYPY: refcnt -= REFCNT_FROM_PYPY + if pyobj.c_ob_pypy_link != 0: + addr = llmemory.cast_int_to_adr(pyobj.c_ob_pypy_link) + if self.gc.header(addr).tid & (self.GCFLAG_VISITED | + self.GCFLAG_NO_HEAP_PTRS): + refcnt += 1 + pygchdr.c_gc_refs = objs_index + 1 obj = self.snapshot_objs[objs_index] obj.pyobj = llmemory.cast_ptr_to_adr(pyobj) - obj.refcnt = refcnt - obj.refcnt_internal = 0 + obj.refcnt = 1 + obj.refcnt_external = refcnt obj.refs_index = refs_index obj.refs_len = 0 + obj.pypy_link = pyobj.c_ob_pypy_link self.snapshot_curr = obj self._take_snapshot_traverse(pyobj) objs_index += 1 refs_index += obj.refs_len pygchdr = pygchdr.c_gc_next + for i in range(0, refs_index): + addr = self.snapshot_refs[i] + pyobj = llmemory.cast_adr_to_ptr(addr, self.PYOBJ_GC_HDR_PTR) + obj = self.snapshot_objs[pyobj.c_gc_refs - 1] + self.snapshot_refs[i] = llmemory.cast_ptr_to_adr(obj) def _take_snapshot_visit(pyobj, self_ptr): from rpython.rtyper.annlowlevel import cast_adr_to_nongc_instance @@ -62,7 +224,7 @@ pygchdr.c_gc_refs != self.RAWREFCOUNT_REFS_UNTRACKED: curr = self.snapshot_curr index = curr.refs_index + curr.refs_len - self.snapshot_refs[index] = llmemory.cast_ptr_to_adr(pyobj) + self.snapshot_refs[index] = llmemory.cast_ptr_to_adr(pygchdr) curr.refs_len += 1 def _take_snapshot_traverse(self, pyobj): diff --git a/rpython/memory/gc/rrc/mark.py b/rpython/memory/gc/rrc/mark.py --- a/rpython/memory/gc/rrc/mark.py +++ b/rpython/memory/gc/rrc/mark.py @@ -1,4 +1,62 @@ from rpython.memory.gc.rrc.base import RawRefCountBaseGC +from rpython.rlib.debug import ll_assert, debug_print, debug_start, debug_stop class RawRefCountMarkGC(RawRefCountBaseGC): - pass \ No newline at end of file + + def major_collection_trace_step(self): + if not self.cycle_enabled: + self._debug_check_consistency(print_label="begin-mark") + + if self.state == self.STATE_DEFAULT: + self.state = self.STATE_MARKING + + # First, untrack all tuples with only non-gc rrc objects and promote + # all other tuples to the pyobj_list + self._untrack_tuples() + + # Only trace and mark rawrefcounted object if we are not doing + # something special, like building gc.garbage. + if (self.state == self.STATE_MARKING and self.cycle_enabled): + merged_old_list = False + # check objects with finalizers from last collection cycle + if not self._gc_list_is_empty(self.pyobj_old_list): + merged_old_list = self._check_finalizer() + # collect all rawrefcounted roots + self._collect_roots(self.pyobj_list) + if merged_old_list: + # set all refcounts to zero for objects in dead list + # (might have been incremented) by fix_refcnt + gchdr = self.pyobj_dead_list.c_gc_next + while gchdr <> self.pyobj_dead_list: + gchdr.c_gc_refs = 0 + gchdr = gchdr.c_gc_next + self._debug_check_consistency(print_label="roots-marked") + # mark all objects reachable from rawrefcounted roots + self._mark_rawrefcount() + self._debug_check_consistency(print_label="before-fin") + self.state = self.STATE_GARBAGE_MARKING + if self._find_garbage(): # handle legacy finalizers + self._mark_garbage() + self._debug_check_consistency(print_label="end-legacy-fin") + self.state = self.STATE_MARKING + found_finalizer = self._find_finalizer() + if found_finalizer: + self._gc_list_move(self.pyobj_old_list, + self.pyobj_isolate_list) + use_cylicrc = not found_finalizer + self._debug_check_consistency(print_label="end-mark-cyclic") + else: + use_cylicrc = False # don't sweep any objects in cyclic isolates + + # now mark all pypy objects at the border, depending on the results + debug_print("use_cylicrc", use_cylicrc) + self.p_list_old.foreach(self._major_trace, (use_cylicrc, True)) + self._debug_check_consistency(print_label="end-mark") + + # fix refcnt back + self.refcnt_dict.foreach(self._fix_refcnt_back, None) + self.refcnt_dict.delete() + self.refcnt_dict = self.gc.AddressDict() + + self.state = self.STATE_DEFAULT + return True diff --git a/rpython/memory/gc/test/test_rawrefcount.py b/rpython/memory/gc/test/test_rawrefcount.py --- a/rpython/memory/gc/test/test_rawrefcount.py +++ b/rpython/memory/gc/test/test_rawrefcount.py @@ -3,6 +3,7 @@ from rpython.memory.gc.incminimark import IncrementalMiniMarkGC as IncMiniMark from rpython.memory.gc.rrc.base import RawRefCountBaseGC from rpython.memory.gc.rrc.mark import RawRefCountMarkGC +from rpython.memory.gc.rrc.incmark import RawRefCountIncMarkGC from rpython.memory.gc.test.test_direct import BaseDirectGCTest from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY, REFCNT_FROM_PYPY_LIGHT @@ -25,7 +26,8 @@ class TestRawRefCount(BaseDirectGCTest): GCClass = IncMiniMark - RRCGCClass = RawRefCountMarkGC + RRCGCClass = RawRefCountIncMarkGC + #RRCGCClass = RawRefCountMarkGC def setup_method(self, method): BaseDirectGCTest.setup_method(self, method) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit