Author: Armin Rigo <ar...@tunes.org>
Branch: 
Changeset: r83837:490058ea54e6
Date: 2016-04-24 10:52 +0200
http://bitbucket.org/pypy/pypy/changeset/490058ea54e6/

Log:    Issue #2226

        Another tweak in the incremental GC: this should ensure that
        progress in the major GC occurs quickly enough in all cases.

diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -341,6 +341,20 @@
         self.prebuilt_root_objects = self.AddressStack()
         #
         self._init_writebarrier_logic()
+        #
+        # The size of all the objects turned from 'young' to 'old'
+        # since we started the last major collection cycle.  This is
+        # used to track progress of the incremental GC: normally, we
+        # run one major GC step after each minor collection, but if a
+        # lot of objects are made old, we need run two or more steps.
+        # Otherwise the risk is that we create old objects faster than
+        # we're collecting them.  The 'threshold' is incremented after
+        # each major GC step at a fixed rate; the idea is that as long
+        # as 'size_objects_made_old > threshold_objects_made_old' then
+        # we must do more major GC steps.  See major_collection_step()
+        # for more details.
+        self.size_objects_made_old = r_uint(0)
+        self.threshold_objects_made_old = r_uint(0)
 
 
     def setup(self):
@@ -464,7 +478,7 @@
                 self.gc_nursery_debug = True
             else:
                 self.gc_nursery_debug = False
-            self.minor_collection()    # to empty the nursery
+            self._minor_collection()    # to empty the nursery
             llarena.arena_free(self.nursery)
             self.nursery_size = newsize
             self.allocate_nursery()
@@ -509,8 +523,8 @@
         self.min_heap_size = max(self.min_heap_size, self.nursery_size *
                                               self.major_collection_threshold)
         # the following two values are usually equal, but during raw mallocs
-        # of arrays, next_major_collection_threshold is decremented to make
-        # the next major collection arrive earlier.
+        # with memory pressure accounting, next_major_collection_threshold
+        # is decremented to make the next major collection arrive earlier.
         # See translator/c/test/test_newgc, test_nongc_attached_to_gc
         self.next_major_collection_initial = self.min_heap_size
         self.next_major_collection_threshold = self.min_heap_size
@@ -700,21 +714,58 @@
     def collect(self, gen=2):
         """Do a minor (gen=0), start a major (gen=1), or do a full
         major (gen>=2) collection."""
-        if gen <= 1:
-            self.minor_collection()
-            if gen == 1 or (self.gc_state != STATE_SCANNING and gen != -1):
+        if gen < 0:
+            self._minor_collection()   # dangerous! no major GC cycle progress
+        elif gen <= 1:
+            self.minor_collection_with_major_progress()
+            if gen == 1 and self.gc_state == STATE_SCANNING:
                 self.major_collection_step()
         else:
             self.minor_and_major_collection()
         self.rrc_invoke_callback()
 
 
+    def minor_collection_with_major_progress(self, extrasize=0):
+        """To a minor collection.  Then, if there is already a major GC
+        in progress, run at least one major collection step.  If there is
+        no major GC but the threshold is reached, start a major GC.
+        """
+        self._minor_collection()
+
+        # If the gc_state is STATE_SCANNING, we're not in the middle
+        # of an incremental major collection.  In that case, wait
+        # until there is too much garbage before starting the next
+        # major collection.  But if we are in the middle of an
+        # incremental major collection, then always do (at least) one
+        # step now.
+        #
+        # Within a major collection cycle, every call to
+        # major_collection_step() increments
+        # 'threshold_objects_made_old' by nursery_size/2.
+
+        if self.gc_state != STATE_SCANNING or 
self.threshold_reached(extrasize):
+            self.major_collection_step(extrasize)
+
+            # See documentation in major_collection_step()
+            while self.gc_state != STATE_SCANNING:    # target (A1)
+                threshold = self.threshold_objects_made_old
+                if threshold >= r_uint(extrasize):
+                    threshold -= r_uint(extrasize)
+                    if self.size_objects_made_old <= threshold:   # target (A2)
+                        break
+
+                self._minor_collection()
+                self.major_collection_step(extrasize)
+
+        self.rrc_invoke_callback()
+
+
     def collect_and_reserve(self, totalsize):
         """To call when nursery_free overflows nursery_top.
         First check if pinned objects are in front of nursery_top. If so,
         jump over the pinned object and try again to reserve totalsize.
-        Otherwise do a minor collection, and possibly a major collection, and
-        finally reserve totalsize bytes.
+        Otherwise do a minor collection, and possibly some steps of a
+        major collection, and finally reserve totalsize bytes.
         """
 
         minor_collection_count = 0
@@ -757,47 +808,27 @@
                 self.nursery_top = self.nursery_barriers.popleft()
             else:
                 minor_collection_count += 1
-                self.minor_collection()
                 if minor_collection_count == 1:
+                    self.minor_collection_with_major_progress()
+                else:
+                    # Nursery too full again.  This is likely because of
+                    # execute_finalizers() or rrc_invoke_callback().
+                    # we need to fix it with another call to minor_collection()
+                    # ---this time only the minor part so that we are sure that
+                    # the nursery is empty (apart from pinned objects).
                     #
-                    # If the gc_state is STATE_SCANNING, we're not in
-                    # the middle of an incremental major collection.
-                    # In that case, wait until there is too much
-                    # garbage before starting the next major
-                    # collection.  But if we are in the middle of an
-                    # incremental major collection, then always do (at
-                    # least) one step now.
+                    # Note that this still works with the counters:
+                    # 'size_objects_made_old' will be increased by
+                    # the _minor_collection() below.  We don't
+                    # immediately restore the target invariant that
+                    # 'size_objects_made_old <= threshold_objects_made_old'.
+                    # But we will do it in the next call to
+                    # minor_collection_with_major_progress().
                     #
-                    # This will increment next_major_collection_threshold
-                    # by nursery_size//2.  If more than nursery_size//2
-                    # survives, then threshold_reached() might still be
-                    # true after that.  In that case we do a second step.
-                    # The goal is to avoid too high memory peaks if the
-                    # program allocates a lot of surviving objects.
-                    # 
-                    if (self.gc_state != STATE_SCANNING or
-                           self.threshold_reached()):
-
-                        self.major_collection_step()
-
-                        if (self.gc_state != STATE_SCANNING and
-                               self.threshold_reached()):  # ^^but only if 
still
-                            self.minor_collection()        # the same 
collection
-                            self.major_collection_step()
-                    #
-                    self.rrc_invoke_callback()
-                    #
-                    # The nursery might not be empty now, because of
-                    # execute_finalizers() or rrc_invoke_callback().
-                    # If it is almost full again,
-                    # we need to fix it with another call to 
minor_collection().
-                    if self.nursery_free + totalsize > self.nursery_top:
-                        self.minor_collection()
-                    #
-                else:
                     ll_assert(minor_collection_count == 2,
-                            "Seeing minor_collection() at least twice."
-                            "Too many pinned objects?")
+                              "Calling minor_collection() twice is not "
+                              "enough. Too many pinned objects?")
+                    self._minor_collection()
             #
             # Tried to do something about nursery_free overflowing
             # nursery_top before this point. Try to reserve totalsize now.
@@ -855,21 +886,9 @@
         # to major_collection_step().  If there is really no memory,
         # then when the major collection finishes it will raise
         # MemoryError.
-        #
-        # The logic is to first do a minor GC only, and check if that
-        # was enough to free a bunch of large young objects.  If it
-        # was, then we don't do any major collection step.
-        #
-        while self.threshold_reached(raw_malloc_usage(totalsize)):
-            self.minor_collection()
-            if self.threshold_reached(raw_malloc_usage(totalsize) +
-                                      self.nursery_size // 2):
-                self.major_collection_step(raw_malloc_usage(totalsize))
-            self.rrc_invoke_callback()
-            # note that this loop should not be infinite: when the
-            # last step of a major collection is done but
-            # threshold_reached(totalsize) is still true, then
-            # we should get a MemoryError from major_collection_step().
+        if self.threshold_reached(raw_malloc_usage(totalsize)):
+            self.minor_collection_with_major_progress(
+                raw_malloc_usage(totalsize) + self.nursery_size // 2)
         #
         # Check if the object would fit in the ArenaCollection.
         # Also, an object allocated from ArenaCollection must be old.
@@ -1547,7 +1566,7 @@
     # ----------
     # Nursery collection
 
-    def minor_collection(self):
+    def _minor_collection(self):
         """Perform a minor collection: find the objects from the nursery
         that remain alive and move them out."""
         #
@@ -1718,6 +1737,10 @@
         self.old_objects_pointing_to_pinned.foreach(
                 self._reset_flag_old_objects_pointing_to_pinned, None)
         #
+        # Accounting: 'nursery_surviving_size' is the size of objects
+        # from the nursery that we just moved out.
+        self.size_objects_made_old += r_uint(self.nursery_surviving_size)
+        #
         debug_print("minor collect, total memory used:",
                     self.get_total_memory_used())
         debug_print("number of pinned objects:",
@@ -1958,6 +1981,7 @@
             self.header(obj).tid &= ~GCFLAG_HAS_SHADOW
             #
             totalsize = size_gc_header + self.get_size(obj)
+            self.nursery_surviving_size += raw_malloc_usage(totalsize)
         #
         # Copy it.  Note that references to other objects in the
         # nursery are kept unchanged in this step.
@@ -2002,6 +2026,11 @@
             return
         hdr.tid |= GCFLAG_VISITED_RMY
         #
+        # Accounting
+        size_gc_header = self.gcheaderbuilder.size_gc_header
+        size = size_gc_header + self.get_size(obj)
+        self.size_objects_made_old += r_uint(raw_malloc_usage(size))
+        #
         # we just made 'obj' old, so we need to add it to the correct lists
         added_somewhere = False
         #
@@ -2084,14 +2113,14 @@
 
     def gc_step_until(self, state):
         while self.gc_state != state:
-            self.minor_collection()
+            self._minor_collection()
             self.major_collection_step()
 
     debug_gc_step_until = gc_step_until   # xxx
 
     def debug_gc_step(self, n=1):
         while n > 0:
-            self.minor_collection()
+            self._minor_collection()
             self.major_collection_step()
             n -= 1
 
@@ -2111,37 +2140,44 @@
         self.debug_check_consistency()
 
         #
+        # 'threshold_objects_made_old', is used inside comparisons
+        # with 'size_objects_made_old' to know when we must do
+        # several major GC steps (i.e. several consecurive calls
+        # to the present function).  Here is the target that
+        # we try to aim to: either (A1) or (A2)
+        #
+        #  (A1)  gc_state == STATE_SCANNING   (i.e. major GC cycle ended)
+        #  (A2)  size_objects_made_old <= threshold_objects_made_old
+        #
         # Every call to major_collection_step() adds nursery_size//2
-        # to the threshold.  It is reset at the end of this function
-        # when the major collection is fully finished.
-        #
+        # to 'threshold_objects_made_old'.
         # In the common case, this is larger than the size of all
         # objects that survive a minor collection.  After a few
         # minor collections (each followed by one call to
         # major_collection_step()) the threshold is much higher than
-        # the currently-in-use old memory.  Then threshold_reached()
-        # won't be true again until the major collection fully
-        # finishes, time passes, and it's time for the next major
-        # collection.
+        # the 'size_objects_made_old', making the target invariant (A2)
+        # true by a large margin.
         #
         # However there are less common cases:
         #
-        # * if more than half of the nursery consistently survives: we
-        #   call major_collection_step() twice after a minor
-        #   collection;
+        # * if more than half of the nursery consistently survives:
+        #   then we need two calls to major_collection_step() after
+        #   some minor collection;
         #
         # * or if we're allocating a large number of bytes in
-        #   external_malloc().  In that case, we are likely to reach
-        #   again the threshold_reached() case, and more major
-        #   collection steps will be done immediately until
-        #   threshold_reached() returns false.
+        #   external_malloc() and some of them survive the following
+        #   minor collection.  In that case, more than two major
+        #   collection steps must be done immediately, until we
+        #   restore the target invariant (A2).
         #
-        self.next_major_collection_threshold += self.nursery_size // 2
+        self.threshold_objects_made_old += r_uint(self.nursery_size // 2)
 
 
-        # XXX currently very coarse increments, get this working then split
-        # to smaller increments using stacks for resuming
         if self.gc_state == STATE_SCANNING:
+            # starting a major GC cycle: reset these two counters
+            self.size_objects_made_old = r_uint(0)
+            self.threshold_objects_made_old = r_uint(self.nursery_size // 2)
+
             self.objects_to_trace = self.AddressStack()
             self.collect_roots()
             self.gc_state = STATE_MARKING
diff --git a/rpython/memory/gc/test/test_object_pinning.py 
b/rpython/memory/gc/test/test_object_pinning.py
--- a/rpython/memory/gc/test/test_object_pinning.py
+++ b/rpython/memory/gc/test/test_object_pinning.py
@@ -19,6 +19,8 @@
         BaseDirectGCTest.setup_method(self, meth)
         max = getattr(meth, 'max_number_of_pinned_objects', 20)
         self.gc.max_number_of_pinned_objects = max
+        if not hasattr(self.gc, 'minor_collection'):
+            self.gc.minor_collection = self.gc._minor_collection
 
     def test_pin_can_move(self):
         # even a pinned object is considered to be movable. Only the caller
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to