Author: Stefan Beyer <h...@sbeyer.at>
Branch: cpyext-gc-cycle
Changeset: r97189:615c66be0a6a
Date: 2019-08-14 17:43 +0200
http://bitbucket.org/pypy/pypy/changeset/615c66be0a6a/

Log:    WIP: adapted incremental rrc to use snapshot (finalizers still
        missing)

diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -2392,22 +2392,28 @@
                     self.more_objects_to_trace = swap
                     self.visit_all_objects()
 
+            rrc_finished = False
+            if (not self.objects_to_trace.non_empty() and
+                    not self.more_objects_to_trace.non_empty()):
+                #
+                # 'prebuilt_root_objects' might have grown since
+                # we scanned it in collect_roots() (rare case).  Rescan.
+                self.collect_nonstack_roots()
+                self.visit_all_objects()
+                #
+                # If enabled, do a major collection step for rrc objects.
+                if self.rrc_enabled:
+                    while not rrc_finished: # TODO: remove this line to do 
incremental collection
+                        rrc_finished = 
self.rrc_gc.major_collection_trace_step()
+                else:
+                    rrc_finished = True
+
             # XXX A simplifying assumption that should be checked,
             # finalizers/weak references are rare and short which means that
             # they do not need a separate state and do not need to be
             # made incremental.
             # For now, the same applies to rawrefcount'ed objects.
-            if (not self.objects_to_trace.non_empty() and
-                not self.more_objects_to_trace.non_empty()):
-                #
-                # First, 'prebuilt_root_objects' might have grown since
-                # we scanned it in collect_roots() (rare case).  Rescan.
-                self.collect_nonstack_roots()
-                self.visit_all_objects()
-                #
-                if self.rrc_enabled:
-                    self.rrc_gc.major_collection_trace()
-                #
+            if rrc_finished:
                 ll_assert(not (self.probably_young_objects_with_finalizers
                                .non_empty()),
                     "probably_young_objects_with_finalizers should be empty")
@@ -2723,7 +2729,7 @@
         hdr.tid |= GCFLAG_VISITED | GCFLAG_TRACK_YOUNG_PTRS
 
         if self.rrc_enabled and \
-                self.rrc_gc.state == RawRefCountBaseGC.STATE_MARKING:
+                self.rrc_gc.state == RawRefCountBaseGC.STATE_GARBAGE_MARKING:
             hdr.tid |= GCFLAG_GARBAGE
 
         if self.has_gcptr(llop.extract_ushort(llgroup.HALFWORD, hdr.tid)):
@@ -3155,7 +3161,7 @@
 
     def rawrefcount_end_garbage(self):
         ll_assert(self.rrc_enabled, "rawrefcount.init not called")
-        self.rrc_gc.state = RawRefCountBaseGC.STATE_DEFAULT
+        self.rrc_gc.state = RawRefCountBaseGC.STATE_MARKING
 
     def rawrefcount_next_garbage_pypy(self):
         ll_assert(self.rrc_enabled, "rawrefcount.init not called")
diff --git a/rpython/memory/gc/rrc/base.py b/rpython/memory/gc/rrc/base.py
--- a/rpython/memory/gc/rrc/base.py
+++ b/rpython/memory/gc/rrc/base.py
@@ -21,26 +21,31 @@
         return None
 
 class RawRefCountBaseGC(object):
-    # Default state, no rawrefcount specific code is executed during normal 
marking.
+    # Default state.
     STATE_DEFAULT = 0
 
+    # Marking state.
+    STATE_MARKING = 1
+
     # Here cyclic garbage only reachable from legacy finalizers is marked.
-    STATE_MARKING = 1
+    STATE_GARBAGE_MARKING = 2
 
     # The state in which cyclic garbage with legacy finalizers is traced.
     # Do not mark objects during this state, because we remove the flag
     # during tracing and we do not want to trace those objects again. Also
     # during this phase no new objects can be marked, as we are only building
     # the list of cyclic garbage.
-    STATE_GARBAGE = 2
+    STATE_GARBAGE = 3
 
     _ADDRARRAY = lltype.Array(llmemory.Address, hints={'nolength': True})
     PYOBJ_SNAPSHOT_OBJ = lltype.Struct('PyObject_Snapshot',
                                        ('pyobj', llmemory.Address),
                                        ('refcnt', lltype.Signed),
-                                       ('refcnt_internal', lltype.Signed),
+                                       ('refcnt_external', lltype.Signed),
                                        ('refs_index', lltype.Signed),
-                                       ('refs_len', lltype.Signed))
+                                       ('refs_len', lltype.Signed),
+                                       ('pypy_link', lltype.Signed))
+    PYOBJ_SNAPSHOT_OBJ_PTR = lltype.Ptr(PYOBJ_SNAPSHOT_OBJ)
     PYOBJ_SNAPSHOT = lltype.Array(PYOBJ_SNAPSHOT_OBJ,
                                   hints={'nolength': True})
     PYOBJ_HDR = lltype.Struct('GCHdr_PyObject',
@@ -327,57 +332,8 @@
             self._pyobj(pyobject).c_ob_refcnt = rc
     _free._always_inline_ = True
 
-    def major_collection_trace(self):
-        if not self.cycle_enabled:
-            self._debug_check_consistency(print_label="begin-mark")
-
-        # First, untrack all tuples with only non-gc rrc objects and promote
-        # all other tuples to the pyobj_list
-        self._untrack_tuples()
-
-        # Only trace and mark rawrefcounted object if we are not doing
-        # something special, like building gc.garbage.
-        if (self.state == self.STATE_DEFAULT and self.cycle_enabled):
-            merged_old_list = False
-            # check objects with finalizers from last collection cycle
-            if not self._gc_list_is_empty(self.pyobj_old_list):
-                merged_old_list = self._check_finalizer()
-            # collect all rawrefcounted roots
-            self._collect_roots(self.pyobj_list) # TODO: from snapshot
-            if merged_old_list:
-                # set all refcounts to zero for objects in dead list
-                # (might have been incremented) by fix_refcnt
-                gchdr = self.pyobj_dead_list.c_gc_next
-                while gchdr <> self.pyobj_dead_list:
-                    gchdr.c_gc_refs = 0
-                    gchdr = gchdr.c_gc_next
-            self._debug_check_consistency(print_label="roots-marked")
-            # mark all objects reachable from rawrefcounted roots
-            self._mark_rawrefcount() # TODO: from snapshot
-            self._debug_check_consistency(print_label="before-fin")
-            self.state = self.STATE_MARKING
-            if self._find_garbage(): # handle legacy finalizers # TODO: from 
snapshot
-                self._mark_garbage() # TODO: from snapshot
-                self._debug_check_consistency(print_label="end-legacy-fin")
-            self.state = self.STATE_DEFAULT
-            found_finalizer = self._find_finalizer() # modern finalizers # 
TODO: from snapshot
-            if found_finalizer:
-                self._gc_list_move(self.pyobj_old_list,
-                                   self.pyobj_isolate_list)
-            use_cylicrc = not found_finalizer
-            self._debug_check_consistency(print_label="end-mark-cyclic")
-        else:
-            use_cylicrc = False # don't sweep any objects in cyclic isolates
-
-        # now mark all pypy objects at the border, depending on the results
-        debug_print("use_cylicrc", use_cylicrc)
-        self.p_list_old.foreach(self._major_trace, use_cylicrc)
-        self._debug_check_consistency(print_label="end-mark")
-
-        # fix refcnt back
-        self.refcnt_dict.foreach(self._fix_refcnt_back, None) # TODO: from 
snapshot?
-        self.refcnt_dict.delete()
-        self.refcnt_dict = self.gc.AddressDict()
+    def major_collection_trace_step(self):
+        return True
 
     def _fix_refcnt_back(self, pyobject, link, ignore):
         pyobj = self._pyobj(pyobject)
@@ -385,9 +341,10 @@
         pyobj.c_ob_refcnt = pyobj.c_ob_pypy_link
         pyobj.c_ob_pypy_link = link_int
 
-    def _major_trace(self, pyobject, use_cylicrefcnt):
+    def _major_trace(self, pyobject, flags):
         from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY
         from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT
+        (use_cylicrefcnt, use_dict) = flags
         #
         pyobj = self._pyobj(pyobject)
         cyclic_rc = -42
@@ -410,11 +367,15 @@
             # force the corresponding object to be alive
             debug_print("pyobj stays alive", pyobj, "rc", rc, "cyclic_rc",
                         cyclic_rc)
-            obj = self.refcnt_dict.get(pyobject)
+            if use_dict:
+                obj = self.refcnt_dict.get(pyobject)
+            else:
+                intobj = pyobj.c_ob_pypy_link
+                obj = llmemory.cast_int_to_adr(intobj)
             self.gc.objects_to_trace.append(obj)
             self.gc.visit_all_objects()
 
-    def _major_trace_nongc(self, pyobject, ignore):
+    def _major_trace_nongc(self, pyobject, use_dict):
         from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY
         from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT
         #
@@ -433,9 +394,11 @@
         else:
             # force the corresponding object to be alive
             debug_print("pyobj stays alive", pyobj, "rc", rc)
-            #intobj = pyobj.c_ob_pypy_link
-            #obj = llmemory.cast_int_to_adr(intobj)
-            obj = self.refcnt_dict.get(pyobject)
+            if use_dict:
+                obj = self.refcnt_dict.get(pyobject)
+            else:
+                intobj = pyobj.c_ob_pypy_link
+                obj = llmemory.cast_int_to_adr(intobj)
             self.gc.objects_to_trace.append(obj)
             self.gc.visit_all_objects()
 
@@ -507,7 +470,7 @@
 
         # For all non-gc pyobjects which have a refcount > 0,
         # mark all reachable objects on the pypy side
-        self.p_list_old.foreach(self._major_trace_nongc, None)
+        self.p_list_old.foreach(self._major_trace_nongc, True)
 
         # For every object in this set, if it is marked, add 1 as a real
         # refcount (p_list => pyobj stays alive if obj stays alive).
diff --git a/rpython/memory/gc/rrc/incmark.py b/rpython/memory/gc/rrc/incmark.py
--- a/rpython/memory/gc/rrc/incmark.py
+++ b/rpython/memory/gc/rrc/incmark.py
@@ -1,9 +1,157 @@
 from rpython.rtyper.lltypesystem import lltype, llmemory
 from rpython.rtyper.lltypesystem import rffi
 from rpython.memory.gc.rrc.base import RawRefCountBaseGC
+from rpython.rlib.debug import ll_assert, debug_print, debug_start, debug_stop
 
 class RawRefCountIncMarkGC(RawRefCountBaseGC):
 
+    def major_collection_trace_step(self):
+        if not self.cycle_enabled or self.state == self.STATE_GARBAGE:
+            self._debug_check_consistency(print_label="begin-mark")
+            self.p_list_old.foreach(self._major_trace, (False, False))
+            self._debug_check_consistency(print_label="end-mark")
+            return True
+
+        elif self.state == self.STATE_DEFAULT:
+            # First, untrack all tuples with only non-gc rrc objects and 
promote
+            # all other tuples to the pyobj_list
+            self._untrack_tuples()
+
+            merged_old_list = False
+            # check objects with finalizers from last collection cycle
+            if not self._gc_list_is_empty(self.pyobj_old_list):
+                merged_old_list = self._check_finalizer()
+
+            # For all non-gc pyobjects which have a refcount > 0,
+            # mark all reachable objects on the pypy side
+            self.p_list_old.foreach(self._major_trace_nongc, False)
+
+            # Now take a snapshot
+            self._take_snapshot(self.pyobj_list)
+
+            # collect all rawrefcounted roots
+            self._collect_roots(self.pyobj_list)
+
+            if merged_old_list:
+                # set all refcounts to zero for objects in dead list
+                # (might have been incremented) by fix_refcnt
+                gchdr = self.pyobj_dead_list.c_gc_next
+                while gchdr <> self.pyobj_dead_list:
+                    if (gchdr.c_gc_refs > 0 and gchdr.c_gc_refs !=
+                            self.RAWREFCOUNT_REFS_UNTRACKED):
+                        pyobj = self.snapshot_objs[gchdr.c_gc_refs - 1]
+                        pyobj.refcnt_external = 0
+                    gchdr = gchdr.c_gc_next
+
+            self._debug_check_consistency(print_label="roots-marked")
+            self.state = self.STATE_MARKING
+            return False
+
+        elif self.state == self.STATE_MARKING:
+            # mark all objects reachable from rawrefcounted roots
+            self._mark_rawrefcount()
+
+            self._debug_check_consistency(print_label="before-fin")
+            self.state = self.STATE_GARBAGE_MARKING
+            return False
+
+        elif self.state == self.STATE_GARBAGE_MARKING:
+            #if self._find_garbage():  # handle legacy finalizers # TODO: from 
snapshot
+            #    self._mark_garbage()  # TODO: from snapshot
+            #    self._debug_check_consistency(print_label="end-legacy-fin")
+            self.state = self.STATE_DEFAULT
+
+        # We are finished with marking, now finish things up
+        #found_finalizer = self._find_finalizer()  # modern finalizers # TODO: 
from snapshot
+        #if found_finalizer:
+        #    self._gc_list_move(self.pyobj_old_list,
+        #                       self.pyobj_isolate_list)
+        #use_cylicrc = not found_finalizer
+        use_cylicrc = True
+
+        # now move all dead objs still in pyob_list to garbage
+        # dead -> pyobj_old_list
+        # live -> set cyclic refcount to > 0
+        pygchdr = self.pyobj_list.c_gc_next
+        while pygchdr <> self.pyobj_list:
+            next_old = pygchdr.c_gc_next
+            snapobj = self.snapshot_objs[pygchdr.c_gc_refs - 1]
+            pygchdr.c_gc_refs = snapobj.refcnt_external
+            if snapobj.refcnt_external == 0:
+                # remove from old list
+                next = pygchdr.c_gc_next
+                next.c_gc_prev = pygchdr.c_gc_prev
+                pygchdr.c_gc_prev.c_gc_next = next
+                # add to new list (or not, if it is a tuple)
+                self._gc_list_add(self.pyobj_old_list, pygchdr)
+            pygchdr = next_old
+
+        # now mark all pypy objects at the border, depending on the results
+        self._debug_check_consistency(print_label="end-mark-cyclic")
+        debug_print("use_cylicrc", use_cylicrc)
+        self.p_list_old.foreach(self._major_trace, (use_cylicrc, False))
+        self._debug_check_consistency(print_label="end-mark")
+        self._discard_snapshot()
+        return True
+
+    def _collect_roots(self, pygclist):
+        # Subtract all internal refcounts from the cyclic refcount
+        # of rawrefcounted objects
+        for i in range(0, self.total_objs):
+            obj = self.snapshot_objs[i]
+            for j in range(0, obj.refs_len):
+                addr = self.snapshot_refs[obj.refs_index + j]
+                obj_ref = llmemory.cast_adr_to_ptr(addr,
+                                                   self.PYOBJ_SNAPSHOT_OBJ_PTR)
+                obj_ref.refcnt_external -= 1
+
+        # now all rawrefcounted roots or live border objects have a
+        # refcount > 0
+
+    def _mark_rawrefcount(self):
+        self._gc_list_init(self.pyobj_old_list)
+        # as long as new objects with cyclic a refcount > 0 or alive border
+        # objects are found, increment the refcount of all referenced objects
+        # of those newly found objects
+        found_alive = True
+        #
+        while found_alive: # TODO: working set to improve performance?
+            found_alive = False
+            for i in range(0, self.total_objs):
+                obj = self.snapshot_objs[i]
+                found_alive |= self._mark_rawrefcount_obj(obj)
+        #
+        # now all rawrefcounted objects, which are alive, have a cyclic
+        # refcount > 0 or are marked
+
+    def _mark_rawrefcount_obj(self, snapobj):
+        if snapobj.refcnt == 0: # hack
+            return False
+
+        alive = snapobj.refcnt_external > 0
+        if snapobj.pypy_link <> 0:
+            intobj = snapobj.pypy_link
+            obj = llmemory.cast_int_to_adr(intobj)
+            if not alive and self.gc.header(obj).tid & (
+                    self.GCFLAG_VISITED | self.GCFLAG_NO_HEAP_PTRS):
+                alive = True
+                snapobj.refcnt_external += 1
+        if alive:
+            # increment refcounts
+            for j in range(0, snapobj.refs_len):
+                addr = self.snapshot_refs[snapobj.refs_index + j]
+                obj_ref = llmemory.cast_adr_to_ptr(addr,
+                                                   self.PYOBJ_SNAPSHOT_OBJ_PTR)
+                obj_ref.refcnt_external += 1
+            # mark recursively, if it is a pypyobj
+            if snapobj.pypy_link <> 0:
+                self.gc.objects_to_trace.append(obj)
+                self.gc.visit_all_objects()
+
+            # remove from old list, TODO: hack -> working set might be better
+            snapobj.refcnt = 0
+        return alive
+
     def _take_snapshot(self, pygclist):
         from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY
         from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY_LIGHT
@@ -26,6 +174,8 @@
         self.snapshot_objs = lltype.malloc(self.PYOBJ_SNAPSHOT, total_objs,
                                            flavor='raw',
                                            track_allocation=False)
+        self.total_objs = total_objs
+
         objs_index = 0
         refs_index = 0
         pygchdr = pygclist.c_gc_next
@@ -36,17 +186,29 @@
                 refcnt -= REFCNT_FROM_PYPY_LIGHT
             elif refcnt >= REFCNT_FROM_PYPY:
                 refcnt -= REFCNT_FROM_PYPY
+            if pyobj.c_ob_pypy_link != 0:
+                addr = llmemory.cast_int_to_adr(pyobj.c_ob_pypy_link)
+                if self.gc.header(addr).tid & (self.GCFLAG_VISITED |
+                                              self.GCFLAG_NO_HEAP_PTRS):
+                    refcnt += 1
+            pygchdr.c_gc_refs = objs_index + 1
             obj = self.snapshot_objs[objs_index]
             obj.pyobj = llmemory.cast_ptr_to_adr(pyobj)
-            obj.refcnt = refcnt
-            obj.refcnt_internal = 0
+            obj.refcnt = 1
+            obj.refcnt_external = refcnt
             obj.refs_index = refs_index
             obj.refs_len = 0
+            obj.pypy_link = pyobj.c_ob_pypy_link
             self.snapshot_curr = obj
             self._take_snapshot_traverse(pyobj)
             objs_index += 1
             refs_index += obj.refs_len
             pygchdr = pygchdr.c_gc_next
+        for i in range(0, refs_index):
+            addr = self.snapshot_refs[i]
+            pyobj = llmemory.cast_adr_to_ptr(addr, self.PYOBJ_GC_HDR_PTR)
+            obj = self.snapshot_objs[pyobj.c_gc_refs - 1]
+            self.snapshot_refs[i] = llmemory.cast_ptr_to_adr(obj)
 
     def _take_snapshot_visit(pyobj, self_ptr):
         from rpython.rtyper.annlowlevel import cast_adr_to_nongc_instance
@@ -62,7 +224,7 @@
                 pygchdr.c_gc_refs != self.RAWREFCOUNT_REFS_UNTRACKED:
             curr = self.snapshot_curr
             index = curr.refs_index + curr.refs_len
-            self.snapshot_refs[index] = llmemory.cast_ptr_to_adr(pyobj)
+            self.snapshot_refs[index] = llmemory.cast_ptr_to_adr(pygchdr)
             curr.refs_len += 1
 
     def _take_snapshot_traverse(self, pyobj):
diff --git a/rpython/memory/gc/rrc/mark.py b/rpython/memory/gc/rrc/mark.py
--- a/rpython/memory/gc/rrc/mark.py
+++ b/rpython/memory/gc/rrc/mark.py
@@ -1,4 +1,62 @@
 from rpython.memory.gc.rrc.base import RawRefCountBaseGC
+from rpython.rlib.debug import ll_assert, debug_print, debug_start, debug_stop
 
 class RawRefCountMarkGC(RawRefCountBaseGC):
-    pass
\ No newline at end of file
+
+    def major_collection_trace_step(self):
+        if not self.cycle_enabled:
+            self._debug_check_consistency(print_label="begin-mark")
+
+        if self.state == self.STATE_DEFAULT:
+            self.state = self.STATE_MARKING
+
+        # First, untrack all tuples with only non-gc rrc objects and promote
+        # all other tuples to the pyobj_list
+        self._untrack_tuples()
+
+        # Only trace and mark rawrefcounted object if we are not doing
+        # something special, like building gc.garbage.
+        if (self.state == self.STATE_MARKING and self.cycle_enabled):
+            merged_old_list = False
+            # check objects with finalizers from last collection cycle
+            if not self._gc_list_is_empty(self.pyobj_old_list):
+                merged_old_list = self._check_finalizer()
+            # collect all rawrefcounted roots
+            self._collect_roots(self.pyobj_list)
+            if merged_old_list:
+                # set all refcounts to zero for objects in dead list
+                # (might have been incremented) by fix_refcnt
+                gchdr = self.pyobj_dead_list.c_gc_next
+                while gchdr <> self.pyobj_dead_list:
+                    gchdr.c_gc_refs = 0
+                    gchdr = gchdr.c_gc_next
+            self._debug_check_consistency(print_label="roots-marked")
+            # mark all objects reachable from rawrefcounted roots
+            self._mark_rawrefcount()
+            self._debug_check_consistency(print_label="before-fin")
+            self.state = self.STATE_GARBAGE_MARKING
+            if self._find_garbage(): # handle legacy finalizers
+                self._mark_garbage()
+                self._debug_check_consistency(print_label="end-legacy-fin")
+            self.state = self.STATE_MARKING
+            found_finalizer = self._find_finalizer()
+            if found_finalizer:
+                self._gc_list_move(self.pyobj_old_list,
+                                   self.pyobj_isolate_list)
+            use_cylicrc = not found_finalizer
+            self._debug_check_consistency(print_label="end-mark-cyclic")
+        else:
+            use_cylicrc = False # don't sweep any objects in cyclic isolates
+
+        # now mark all pypy objects at the border, depending on the results
+        debug_print("use_cylicrc", use_cylicrc)
+        self.p_list_old.foreach(self._major_trace, (use_cylicrc, True))
+        self._debug_check_consistency(print_label="end-mark")
+
+        # fix refcnt back
+        self.refcnt_dict.foreach(self._fix_refcnt_back, None)
+        self.refcnt_dict.delete()
+        self.refcnt_dict = self.gc.AddressDict()
+
+        self.state = self.STATE_DEFAULT
+        return True
diff --git a/rpython/memory/gc/test/test_rawrefcount.py 
b/rpython/memory/gc/test/test_rawrefcount.py
--- a/rpython/memory/gc/test/test_rawrefcount.py
+++ b/rpython/memory/gc/test/test_rawrefcount.py
@@ -3,6 +3,7 @@
 from rpython.memory.gc.incminimark import IncrementalMiniMarkGC as IncMiniMark
 from rpython.memory.gc.rrc.base import RawRefCountBaseGC
 from rpython.memory.gc.rrc.mark import RawRefCountMarkGC
+from rpython.memory.gc.rrc.incmark import RawRefCountIncMarkGC
 from rpython.memory.gc.test.test_direct import BaseDirectGCTest
 from rpython.rlib.rawrefcount import REFCNT_FROM_PYPY, REFCNT_FROM_PYPY_LIGHT
 
@@ -25,7 +26,8 @@
 
 class TestRawRefCount(BaseDirectGCTest):
     GCClass = IncMiniMark
-    RRCGCClass = RawRefCountMarkGC
+    RRCGCClass = RawRefCountIncMarkGC
+    #RRCGCClass = RawRefCountMarkGC
 
     def setup_method(self, method):
         BaseDirectGCTest.setup_method(self, method)
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to