Author: Stefan Beyer <[email protected]>
Branch: cpyext-gc-cycle
Changeset: r97189:615c66be0a6a
Date: 2019-08-14 17:43 +0200
http://bitbucket.org/pypy/pypy/changeset/615c66be0a6a/
Log: WIP: adapted incremental rrc to use snapshot (finalizers still
missing)
diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -2392,22 +2392,28 @@
self.more_objects_to_trace = swap
self.visit_all_objects()
+ rrc_finished = False
+ if (not self.objects_to_trace.non_empty() and
+ not self.more_objects_to_trace.non_empty()):
+ #
+ # 'prebuilt_root_objects' might have grown since
+ # we scanned it in collect_roots() (rare case). Rescan.
+ self.collect_nonstack_roots()
+ self.visit_all_objects()
+ #
+ # If enabled, do a major collection step for rrc objects.
+ if self.rrc_enabled:
+ while not rrc_finished: # TODO: remove this line to do
incremental collection
+ rrc_finished =
self.rrc_gc.major_collection_trace_step()
+ else:
+ rrc_finished = True
+
# XXX A simplifying assumption that should be checked,
# finalizers/weak references are rare and short which means that
# they do not need a separate state and do not need to be
# made incremental.
# For now, the same applies to rawrefcount'ed objects.
- if (not self.objects_to_trace.non_empty() and
- not self.more_objects_to_trace.non_empty()):
- #
- # First, 'prebuilt_root_objects' might have grown since
- # we scanned it in collect_roots() (rare case). Rescan.
- self.collect_nonstack_roots()
- self.visit_all_objects()
- #
- if self.rrc_enabled:
- self.rrc_gc.major_collection_trace()
- #
+ if rrc_finished:
ll_assert(not (self.probably_young_objects_with_finalizers
.non_empty()),
"probably_young_objects_with_finalizers should be empty")
@@ -2723,7 +2729,7 @@
hdr.tid |= GCFLAG_VISITED | GCFLAG_TRACK_YOUNG_PTRS
if self.rrc_enabled and \
- self.rrc_gc.state == RawRefCountBaseGC.STATE_MARKING:
+ self.rrc_gc.state == RawRefCountBaseGC.STATE_GARBAGE_MARKING:
hdr.tid |= GCFLAG_GARBAGE
if self.has_gcptr(llop.extract_ushort(llgroup.HALFWORD, hdr.tid)):
@@ -3155,7 +3161,7 @@
def rawrefcount_end_garbage(self):
ll_assert(self.rrc_enabled, "rawrefcount.init not called")
- self.rrc_gc.state = RawRefCountBaseGC.STATE_DEFAULT
+ self.rrc_gc.state = RawRefCountBaseGC.STATE_MARKING
def rawrefcount_next_garbage_pypy(self):
ll_assert(self.rrc_enabled, "rawrefcount.init not called")
diff --git a/rpython/memory/gc/rrc/base.py b/rpython/memory/gc/rrc/base.py
--- a/rpython/memory/gc/rrc/base.py
+++ b/rpython/memory/gc/rrc/base.py
@@ -21,26 +21,31 @@
return None
class RawRefCountBaseGC(object):
- # Default state, no rawrefcount specific code is executed during normal
marking.
+ # Default state.
STATE_DEFAULT = 0
+ # Marking state.
+ STATE_MARKING = 1
+
# Here cyclic garbage only reachable from legacy finalizers is marked.
- STATE_MARKING = 1
+ STATE_GARBAGE_MARKING = 2
# The state in which cyclic garbage with legacy finalizers is traced.
# Do not mark objects during this state, because we remove the flag
# during tracing and we do not want to trace those objects again. Also
# during this phase no new objects can be marked, as we are only building
# the list of cyclic garbage.
- STATE_GARBAGE = 2
+ STATE_GARBAGE = 3
_ADDRARRAY = lltype.Array(llmemory.Address, hints={'nolength': True})
PYOBJ_SNAPSHOT_OBJ = lltype.Struct('PyObject_Snapshot',
('pyobj', llmemory.Address),
('refcnt', lltype.Signed),
- ('refcnt_internal', lltype.Signed),
+ ('refcnt_external', lltype.Signed),
('refs_index', lltype.Signed),
- ('refs_len', lltype.Signed))
+ ('refs_len', lltype.Signed),
+ ('pypy_link', lltype.Signed))
+ PYOBJ_SNAPSHOT_OBJ_PTR = lltype.Ptr(PYOBJ_SNAPSHOT_OBJ)
PYOBJ_SNAPSHOT = lltype.Array(PYOBJ_SNAPSHOT_OBJ,
hints={'nolength': True})
PYOBJ_HDR = lltype.Struct('GCHdr_PyObject',
@@ -327,57 +332,8 @@
self._pyobj(pyobject).c_ob_refcnt = rc
_free._always_inline_ = True
- def major_collection_trace(self):
- if not self.cycle_enabled:
- self._debug_check_consistency(print_label="begin-mark")
-
- # First, untrack all tuples with only non-gc rrc objects and promote
- # all other tuples to the pyobj_list
- self._untrack_tuples()
-
- # Only trace and mark rawrefcounted object if we are not doing
- # something special, like building gc.garbage.
- if (self.state == self.STATE_DEFAULT and self.cycle_enabled):
- merged_old_list = False
- # check objects with finalizers from last collection cycle
- if not self._gc_list_is_empty(self.pyobj_old_list):
- merged_old_list = self._check_finalizer()
- # collect all rawrefcounted roots
- self._collect_roots(self.pyobj_list) # TODO: from snapshot
- if merged_old_list:
- # set all refcounts to zero for objects in dead list
- # (might have been incremented) by fix_refcnt
- gchdr = self.pyobj_dead_list.c_gc_next
- while gchdr <> self.pyobj_dead_list:
- gchdr.c_gc_refs = 0
- gchdr = gchdr.c_gc_next
- self._debug_check_consistency(print_label="roots-marked")
- # mark all objects reachable from rawrefcounted roots
- self._mark_rawrefcount() # TODO: from snapshot
- self._debug_check_consistency(print_label="before-fin")
- self.state = self.STATE_MARKING
- if self._find_garbage(): # handle legacy finalizers # TODO: from
snapshot
- self._mark_garbage() # TODO: from snapshot
- self._debug_check_consistency(print_label="end-legacy-fin")
- self.state = self.STATE_DEFAULT
- found_finalizer = self._find_finalizer() # modern finalizers #
TODO: from snapshot
- if found_finalizer:
- self._gc_list_move(self.pyobj_old_list,
- self.pyobj_isolate_list)
- use_cylicrc = not found_finalizer
- self._debug_check_consistency(print_label="end-mark-cyclic")
- else:
- use_cylicrc = False # don't sweep any objects in cyclic isolates
-
- # now mark all pypy objects at the border, depending on the results
- debug_print("use_cylicrc", use_cylicrc)
- self.p_list_old.foreach(self._major_trace, use_cylicrc)
- self._debug_check_consistency(print_label="end-mark")
-
- # fix refcnt back
- self.refcnt_dict.foreach(self._fix_refcnt_back, None) # TODO: from
snapshot?
- self.refcnt_dict.delete()
- self.refcnt_dict = self.gc.AddressDict()
+ def major_collection_trace_step(self):
+ return True
def _fix_refcnt_back(self, pyobject, link, ignore):
pyobj = self._pyobj(pyobject)
@@ -385,9 +341,10 @@
pyobj.c_ob_refcnt = pyobj.c_ob_pypy_link
pyobj.c_ob_pypy_link = link_int
- def _major_trace(self, pyobject, use_cylicrefcnt):
+ def _major_trace(self, pyobject, flags):
from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY
from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT
+ (use_cylicrefcnt, use_dict) = flags
#
pyobj = self._pyobj(pyobject)
cyclic_rc = -42
@@ -410,11 +367,15 @@
# force the corresponding object to be alive
debug_print("pyobj stays alive", pyobj, "rc", rc, "cyclic_rc",
cyclic_rc)
- obj = self.refcnt_dict.get(pyobject)
+ if use_dict:
+ obj = self.refcnt_dict.get(pyobject)
+ else:
+ intobj = pyobj.c_ob_pypy_link
+ obj = llmemory.cast_int_to_adr(intobj)
self.gc.objects_to_trace.append(obj)
self.gc.visit_all_objects()
- def _major_trace_nongc(self, pyobject, ignore):
+ def _major_trace_nongc(self, pyobject, use_dict):
from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY
from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT
#
@@ -433,9 +394,11 @@
else:
# force the corresponding object to be alive
debug_print("pyobj stays alive", pyobj, "rc", rc)
- #intobj = pyobj.c_ob_pypy_link
- #obj = llmemory.cast_int_to_adr(intobj)
- obj = self.refcnt_dict.get(pyobject)
+ if use_dict:
+ obj = self.refcnt_dict.get(pyobject)
+ else:
+ intobj = pyobj.c_ob_pypy_link
+ obj = llmemory.cast_int_to_adr(intobj)
self.gc.objects_to_trace.append(obj)
self.gc.visit_all_objects()
@@ -507,7 +470,7 @@
# For all non-gc pyobjects which have a refcount > 0,
# mark all reachable objects on the pypy side
- self.p_list_old.foreach(self._major_trace_nongc, None)
+ self.p_list_old.foreach(self._major_trace_nongc, True)
# For every object in this set, if it is marked, add 1 as a real
# refcount (p_list => pyobj stays alive if obj stays alive).
diff --git a/rpython/memory/gc/rrc/incmark.py b/rpython/memory/gc/rrc/incmark.py
--- a/rpython/memory/gc/rrc/incmark.py
+++ b/rpython/memory/gc/rrc/incmark.py
@@ -1,9 +1,157 @@
from rpython.rtyper.lltypesystem import lltype, llmemory
from rpython.rtyper.lltypesystem import rffi
from rpython.memory.gc.rrc.base import RawRefCountBaseGC
+from rpython.rlib.debug import ll_assert, debug_print, debug_start, debug_stop
class RawRefCountIncMarkGC(RawRefCountBaseGC):
+ def major_collection_trace_step(self):
+ if not self.cycle_enabled or self.state == self.STATE_GARBAGE:
+ self._debug_check_consistency(print_label="begin-mark")
+ self.p_list_old.foreach(self._major_trace, (False, False))
+ self._debug_check_consistency(print_label="end-mark")
+ return True
+
+ elif self.state == self.STATE_DEFAULT:
+ # First, untrack all tuples with only non-gc rrc objects and
promote
+ # all other tuples to the pyobj_list
+ self._untrack_tuples()
+
+ merged_old_list = False
+ # check objects with finalizers from last collection cycle
+ if not self._gc_list_is_empty(self.pyobj_old_list):
+ merged_old_list = self._check_finalizer()
+
+ # For all non-gc pyobjects which have a refcount > 0,
+ # mark all reachable objects on the pypy side
+ self.p_list_old.foreach(self._major_trace_nongc, False)
+
+ # Now take a snapshot
+ self._take_snapshot(self.pyobj_list)
+
+ # collect all rawrefcounted roots
+ self._collect_roots(self.pyobj_list)
+
+ if merged_old_list:
+ # set all refcounts to zero for objects in dead list
+ # (might have been incremented) by fix_refcnt
+ gchdr = self.pyobj_dead_list.c_gc_next
+ while gchdr <> self.pyobj_dead_list:
+ if (gchdr.c_gc_refs > 0 and gchdr.c_gc_refs !=
+ self.RAWREFCOUNT_REFS_UNTRACKED):
+ pyobj = self.snapshot_objs[gchdr.c_gc_refs - 1]
+ pyobj.refcnt_external = 0
+ gchdr = gchdr.c_gc_next
+
+ self._debug_check_consistency(print_label="roots-marked")
+ self.state = self.STATE_MARKING
+ return False
+
+ elif self.state == self.STATE_MARKING:
+ # mark all objects reachable from rawrefcounted roots
+ self._mark_rawrefcount()
+
+ self._debug_check_consistency(print_label="before-fin")
+ self.state = self.STATE_GARBAGE_MARKING
+ return False
+
+ elif self.state == self.STATE_GARBAGE_MARKING:
+ #if self._find_garbage(): # handle legacy finalizers # TODO: from
snapshot
+ # self._mark_garbage() # TODO: from snapshot
+ # self._debug_check_consistency(print_label="end-legacy-fin")
+ self.state = self.STATE_DEFAULT
+
+ # We are finished with marking, now finish things up
+ #found_finalizer = self._find_finalizer() # modern finalizers # TODO:
from snapshot
+ #if found_finalizer:
+ # self._gc_list_move(self.pyobj_old_list,
+ # self.pyobj_isolate_list)
+ #use_cylicrc = not found_finalizer
+ use_cylicrc = True
+
+ # now move all dead objs still in pyob_list to garbage
+ # dead -> pyobj_old_list
+ # live -> set cyclic refcount to > 0
+ pygchdr = self.pyobj_list.c_gc_next
+ while pygchdr <> self.pyobj_list:
+ next_old = pygchdr.c_gc_next
+ snapobj = self.snapshot_objs[pygchdr.c_gc_refs - 1]
+ pygchdr.c_gc_refs = snapobj.refcnt_external
+ if snapobj.refcnt_external == 0:
+ # remove from old list
+ next = pygchdr.c_gc_next
+ next.c_gc_prev = pygchdr.c_gc_prev
+ pygchdr.c_gc_prev.c_gc_next = next
+ # add to new list (or not, if it is a tuple)
+ self._gc_list_add(self.pyobj_old_list, pygchdr)
+ pygchdr = next_old
+
+ # now mark all pypy objects at the border, depending on the results
+ self._debug_check_consistency(print_label="end-mark-cyclic")
+ debug_print("use_cylicrc", use_cylicrc)
+ self.p_list_old.foreach(self._major_trace, (use_cylicrc, False))
+ self._debug_check_consistency(print_label="end-mark")
+ self._discard_snapshot()
+ return True
+
+ def _collect_roots(self, pygclist):
+ # Subtract all internal refcounts from the cyclic refcount
+ # of rawrefcounted objects
+ for i in range(0, self.total_objs):
+ obj = self.snapshot_objs[i]
+ for j in range(0, obj.refs_len):
+ addr = self.snapshot_refs[obj.refs_index + j]
+ obj_ref = llmemory.cast_adr_to_ptr(addr,
+ self.PYOBJ_SNAPSHOT_OBJ_PTR)
+ obj_ref.refcnt_external -= 1
+
+ # now all rawrefcounted roots or live border objects have a
+ # refcount > 0
+
+ def _mark_rawrefcount(self):
+ self._gc_list_init(self.pyobj_old_list)
+ # as long as new objects with cyclic a refcount > 0 or alive border
+ # objects are found, increment the refcount of all referenced objects
+ # of those newly found objects
+ found_alive = True
+ #
+ while found_alive: # TODO: working set to improve performance?
+ found_alive = False
+ for i in range(0, self.total_objs):
+ obj = self.snapshot_objs[i]
+ found_alive |= self._mark_rawrefcount_obj(obj)
+ #
+ # now all rawrefcounted objects, which are alive, have a cyclic
+ # refcount > 0 or are marked
+
+ def _mark_rawrefcount_obj(self, snapobj):
+ if snapobj.refcnt == 0: # hack
+ return False
+
+ alive = snapobj.refcnt_external > 0
+ if snapobj.pypy_link <> 0:
+ intobj = snapobj.pypy_link
+ obj = llmemory.cast_int_to_adr(intobj)
+ if not alive and self.gc.header(obj).tid & (
+ self.GCFLAG_VISITED | self.GCFLAG_NO_HEAP_PTRS):
+ alive = True
+ snapobj.refcnt_external += 1
+ if alive:
+ # increment refcounts
+ for j in range(0, snapobj.refs_len):
+ addr = self.snapshot_refs[snapobj.refs_index + j]
+ obj_ref = llmemory.cast_adr_to_ptr(addr,
+ self.PYOBJ_SNAPSHOT_OBJ_PTR)
+ obj_ref.refcnt_external += 1
+ # mark recursively, if it is a pypyobj
+ if snapobj.pypy_link <> 0:
+ self.gc.objects_to_trace.append(obj)
+ self.gc.visit_all_objects()
+
+ # remove from old list, TODO: hack -> working set might be better
+ snapobj.refcnt = 0
+ return alive
+
def _take_snapshot(self, pygclist):
from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY
from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT
@@ -26,6 +174,8 @@
self.snapshot_objs = lltype.malloc(self.PYOBJ_SNAPSHOT, total_objs,
flavor='raw',
track_allocation=False)
+ self.total_objs = total_objs
+
objs_index = 0
refs_index = 0
pygchdr = pygclist.c_gc_next
@@ -36,17 +186,29 @@
refcnt -= REFCNT_FROM_PYPY_LIGHT
elif refcnt >= REFCNT_FROM_PYPY:
refcnt -= REFCNT_FROM_PYPY
+ if pyobj.c_ob_pypy_link != 0:
+ addr = llmemory.cast_int_to_adr(pyobj.c_ob_pypy_link)
+ if self.gc.header(addr).tid & (self.GCFLAG_VISITED |
+ self.GCFLAG_NO_HEAP_PTRS):
+ refcnt += 1
+ pygchdr.c_gc_refs = objs_index + 1
obj = self.snapshot_objs[objs_index]
obj.pyobj = llmemory.cast_ptr_to_adr(pyobj)
- obj.refcnt = refcnt
- obj.refcnt_internal = 0
+ obj.refcnt = 1
+ obj.refcnt_external = refcnt
obj.refs_index = refs_index
obj.refs_len = 0
+ obj.pypy_link = pyobj.c_ob_pypy_link
self.snapshot_curr = obj
self._take_snapshot_traverse(pyobj)
objs_index += 1
refs_index += obj.refs_len
pygchdr = pygchdr.c_gc_next
+ for i in range(0, refs_index):
+ addr = self.snapshot_refs[i]
+ pyobj = llmemory.cast_adr_to_ptr(addr, self.PYOBJ_GC_HDR_PTR)
+ obj = self.snapshot_objs[pyobj.c_gc_refs - 1]
+ self.snapshot_refs[i] = llmemory.cast_ptr_to_adr(obj)
def _take_snapshot_visit(pyobj, self_ptr):
from rpython.rtyper.annlowlevel import cast_adr_to_nongc_instance
@@ -62,7 +224,7 @@
pygchdr.c_gc_refs != self.RAWREFCOUNT_REFS_UNTRACKED:
curr = self.snapshot_curr
index = curr.refs_index + curr.refs_len
- self.snapshot_refs[index] = llmemory.cast_ptr_to_adr(pyobj)
+ self.snapshot_refs[index] = llmemory.cast_ptr_to_adr(pygchdr)
curr.refs_len += 1
def _take_snapshot_traverse(self, pyobj):
diff --git a/rpython/memory/gc/rrc/mark.py b/rpython/memory/gc/rrc/mark.py
--- a/rpython/memory/gc/rrc/mark.py
+++ b/rpython/memory/gc/rrc/mark.py
@@ -1,4 +1,62 @@
from rpython.memory.gc.rrc.base import RawRefCountBaseGC
+from rpython.rlib.debug import ll_assert, debug_print, debug_start, debug_stop
class RawRefCountMarkGC(RawRefCountBaseGC):
- pass
\ No newline at end of file
+
+ def major_collection_trace_step(self):
+ if not self.cycle_enabled:
+ self._debug_check_consistency(print_label="begin-mark")
+
+ if self.state == self.STATE_DEFAULT:
+ self.state = self.STATE_MARKING
+
+ # First, untrack all tuples with only non-gc rrc objects and promote
+ # all other tuples to the pyobj_list
+ self._untrack_tuples()
+
+ # Only trace and mark rawrefcounted object if we are not doing
+ # something special, like building gc.garbage.
+ if (self.state == self.STATE_MARKING and self.cycle_enabled):
+ merged_old_list = False
+ # check objects with finalizers from last collection cycle
+ if not self._gc_list_is_empty(self.pyobj_old_list):
+ merged_old_list = self._check_finalizer()
+ # collect all rawrefcounted roots
+ self._collect_roots(self.pyobj_list)
+ if merged_old_list:
+ # set all refcounts to zero for objects in dead list
+ # (might have been incremented) by fix_refcnt
+ gchdr = self.pyobj_dead_list.c_gc_next
+ while gchdr <> self.pyobj_dead_list:
+ gchdr.c_gc_refs = 0
+ gchdr = gchdr.c_gc_next
+ self._debug_check_consistency(print_label="roots-marked")
+ # mark all objects reachable from rawrefcounted roots
+ self._mark_rawrefcount()
+ self._debug_check_consistency(print_label="before-fin")
+ self.state = self.STATE_GARBAGE_MARKING
+ if self._find_garbage(): # handle legacy finalizers
+ self._mark_garbage()
+ self._debug_check_consistency(print_label="end-legacy-fin")
+ self.state = self.STATE_MARKING
+ found_finalizer = self._find_finalizer()
+ if found_finalizer:
+ self._gc_list_move(self.pyobj_old_list,
+ self.pyobj_isolate_list)
+ use_cylicrc = not found_finalizer
+ self._debug_check_consistency(print_label="end-mark-cyclic")
+ else:
+ use_cylicrc = False # don't sweep any objects in cyclic isolates
+
+ # now mark all pypy objects at the border, depending on the results
+ debug_print("use_cylicrc", use_cylicrc)
+ self.p_list_old.foreach(self._major_trace, (use_cylicrc, True))
+ self._debug_check_consistency(print_label="end-mark")
+
+ # fix refcnt back
+ self.refcnt_dict.foreach(self._fix_refcnt_back, None)
+ self.refcnt_dict.delete()
+ self.refcnt_dict = self.gc.AddressDict()
+
+ self.state = self.STATE_DEFAULT
+ return True
diff --git a/rpython/memory/gc/test/test_rawrefcount.py
b/rpython/memory/gc/test/test_rawrefcount.py
--- a/rpython/memory/gc/test/test_rawrefcount.py
+++ b/rpython/memory/gc/test/test_rawrefcount.py
@@ -3,6 +3,7 @@
from rpython.memory.gc.incminimark import IncrementalMiniMarkGC as IncMiniMark
from rpython.memory.gc.rrc.base import RawRefCountBaseGC
from rpython.memory.gc.rrc.mark import RawRefCountMarkGC
+from rpython.memory.gc.rrc.incmark import RawRefCountIncMarkGC
from rpython.memory.gc.test.test_direct import BaseDirectGCTest
from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY, REFCNT_FROM_PYPY_LIGHT
@@ -25,7 +26,8 @@
class TestRawRefCount(BaseDirectGCTest):
GCClass = IncMiniMark
- RRCGCClass = RawRefCountMarkGC
+ RRCGCClass = RawRefCountIncMarkGC
+ #RRCGCClass = RawRefCountMarkGC
def setup_method(self, method):
BaseDirectGCTest.setup_method(self, method)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit