Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r83837:490058ea54e6 Date: 2016-04-24 10:52 +0200 http://bitbucket.org/pypy/pypy/changeset/490058ea54e6/
Log: Issue #2226 Another tweak in the incremental GC: this should ensure that progress in the major GC occurs quickly enough in all cases. diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -341,6 +341,20 @@ self.prebuilt_root_objects = self.AddressStack() # self._init_writebarrier_logic() + # + # The size of all the objects turned from 'young' to 'old' + # since we started the last major collection cycle. This is + # used to track progress of the incremental GC: normally, we + # run one major GC step after each minor collection, but if a + # lot of objects are made old, we need run two or more steps. + # Otherwise the risk is that we create old objects faster than + # we're collecting them. The 'threshold' is incremented after + # each major GC step at a fixed rate; the idea is that as long + # as 'size_objects_made_old > threshold_objects_made_old' then + # we must do more major GC steps. See major_collection_step() + # for more details. + self.size_objects_made_old = r_uint(0) + self.threshold_objects_made_old = r_uint(0) def setup(self): @@ -464,7 +478,7 @@ self.gc_nursery_debug = True else: self.gc_nursery_debug = False - self.minor_collection() # to empty the nursery + self._minor_collection() # to empty the nursery llarena.arena_free(self.nursery) self.nursery_size = newsize self.allocate_nursery() @@ -509,8 +523,8 @@ self.min_heap_size = max(self.min_heap_size, self.nursery_size * self.major_collection_threshold) # the following two values are usually equal, but during raw mallocs - # of arrays, next_major_collection_threshold is decremented to make - # the next major collection arrive earlier. + # with memory pressure accounting, next_major_collection_threshold + # is decremented to make the next major collection arrive earlier. # See translator/c/test/test_newgc, test_nongc_attached_to_gc self.next_major_collection_initial = self.min_heap_size self.next_major_collection_threshold = self.min_heap_size @@ -700,21 +714,58 @@ def collect(self, gen=2): """Do a minor (gen=0), start a major (gen=1), or do a full major (gen>=2) collection.""" - if gen <= 1: - self.minor_collection() - if gen == 1 or (self.gc_state != STATE_SCANNING and gen != -1): + if gen < 0: + self._minor_collection() # dangerous! no major GC cycle progress + elif gen <= 1: + self.minor_collection_with_major_progress() + if gen == 1 and self.gc_state == STATE_SCANNING: self.major_collection_step() else: self.minor_and_major_collection() self.rrc_invoke_callback() + def minor_collection_with_major_progress(self, extrasize=0): + """To a minor collection. Then, if there is already a major GC + in progress, run at least one major collection step. If there is + no major GC but the threshold is reached, start a major GC. + """ + self._minor_collection() + + # If the gc_state is STATE_SCANNING, we're not in the middle + # of an incremental major collection. In that case, wait + # until there is too much garbage before starting the next + # major collection. But if we are in the middle of an + # incremental major collection, then always do (at least) one + # step now. + # + # Within a major collection cycle, every call to + # major_collection_step() increments + # 'threshold_objects_made_old' by nursery_size/2. + + if self.gc_state != STATE_SCANNING or self.threshold_reached(extrasize): + self.major_collection_step(extrasize) + + # See documentation in major_collection_step() + while self.gc_state != STATE_SCANNING: # target (A1) + threshold = self.threshold_objects_made_old + if threshold >= r_uint(extrasize): + threshold -= r_uint(extrasize) + if self.size_objects_made_old <= threshold: # target (A2) + break + + self._minor_collection() + self.major_collection_step(extrasize) + + self.rrc_invoke_callback() + + def collect_and_reserve(self, totalsize): """To call when nursery_free overflows nursery_top. First check if pinned objects are in front of nursery_top. If so, jump over the pinned object and try again to reserve totalsize. - Otherwise do a minor collection, and possibly a major collection, and - finally reserve totalsize bytes. + Otherwise do a minor collection, and possibly some steps of a + major collection, and finally reserve totalsize bytes. """ minor_collection_count = 0 @@ -757,47 +808,27 @@ self.nursery_top = self.nursery_barriers.popleft() else: minor_collection_count += 1 - self.minor_collection() if minor_collection_count == 1: + self.minor_collection_with_major_progress() + else: + # Nursery too full again. This is likely because of + # execute_finalizers() or rrc_invoke_callback(). + # we need to fix it with another call to minor_collection() + # ---this time only the minor part so that we are sure that + # the nursery is empty (apart from pinned objects). # - # If the gc_state is STATE_SCANNING, we're not in - # the middle of an incremental major collection. - # In that case, wait until there is too much - # garbage before starting the next major - # collection. But if we are in the middle of an - # incremental major collection, then always do (at - # least) one step now. + # Note that this still works with the counters: + # 'size_objects_made_old' will be increased by + # the _minor_collection() below. We don't + # immediately restore the target invariant that + # 'size_objects_made_old <= threshold_objects_made_old'. + # But we will do it in the next call to + # minor_collection_with_major_progress(). # - # This will increment next_major_collection_threshold - # by nursery_size//2. If more than nursery_size//2 - # survives, then threshold_reached() might still be - # true after that. In that case we do a second step. - # The goal is to avoid too high memory peaks if the - # program allocates a lot of surviving objects. - # - if (self.gc_state != STATE_SCANNING or - self.threshold_reached()): - - self.major_collection_step() - - if (self.gc_state != STATE_SCANNING and - self.threshold_reached()): # ^^but only if still - self.minor_collection() # the same collection - self.major_collection_step() - # - self.rrc_invoke_callback() - # - # The nursery might not be empty now, because of - # execute_finalizers() or rrc_invoke_callback(). - # If it is almost full again, - # we need to fix it with another call to minor_collection(). - if self.nursery_free + totalsize > self.nursery_top: - self.minor_collection() - # - else: ll_assert(minor_collection_count == 2, - "Seeing minor_collection() at least twice." - "Too many pinned objects?") + "Calling minor_collection() twice is not " + "enough. Too many pinned objects?") + self._minor_collection() # # Tried to do something about nursery_free overflowing # nursery_top before this point. Try to reserve totalsize now. @@ -855,21 +886,9 @@ # to major_collection_step(). If there is really no memory, # then when the major collection finishes it will raise # MemoryError. - # - # The logic is to first do a minor GC only, and check if that - # was enough to free a bunch of large young objects. If it - # was, then we don't do any major collection step. - # - while self.threshold_reached(raw_malloc_usage(totalsize)): - self.minor_collection() - if self.threshold_reached(raw_malloc_usage(totalsize) + - self.nursery_size // 2): - self.major_collection_step(raw_malloc_usage(totalsize)) - self.rrc_invoke_callback() - # note that this loop should not be infinite: when the - # last step of a major collection is done but - # threshold_reached(totalsize) is still true, then - # we should get a MemoryError from major_collection_step(). + if self.threshold_reached(raw_malloc_usage(totalsize)): + self.minor_collection_with_major_progress( + raw_malloc_usage(totalsize) + self.nursery_size // 2) # # Check if the object would fit in the ArenaCollection. # Also, an object allocated from ArenaCollection must be old. @@ -1547,7 +1566,7 @@ # ---------- # Nursery collection - def minor_collection(self): + def _minor_collection(self): """Perform a minor collection: find the objects from the nursery that remain alive and move them out.""" # @@ -1718,6 +1737,10 @@ self.old_objects_pointing_to_pinned.foreach( self._reset_flag_old_objects_pointing_to_pinned, None) # + # Accounting: 'nursery_surviving_size' is the size of objects + # from the nursery that we just moved out. + self.size_objects_made_old += r_uint(self.nursery_surviving_size) + # debug_print("minor collect, total memory used:", self.get_total_memory_used()) debug_print("number of pinned objects:", @@ -1958,6 +1981,7 @@ self.header(obj).tid &= ~GCFLAG_HAS_SHADOW # totalsize = size_gc_header + self.get_size(obj) + self.nursery_surviving_size += raw_malloc_usage(totalsize) # # Copy it. Note that references to other objects in the # nursery are kept unchanged in this step. @@ -2002,6 +2026,11 @@ return hdr.tid |= GCFLAG_VISITED_RMY # + # Accounting + size_gc_header = self.gcheaderbuilder.size_gc_header + size = size_gc_header + self.get_size(obj) + self.size_objects_made_old += r_uint(raw_malloc_usage(size)) + # # we just made 'obj' old, so we need to add it to the correct lists added_somewhere = False # @@ -2084,14 +2113,14 @@ def gc_step_until(self, state): while self.gc_state != state: - self.minor_collection() + self._minor_collection() self.major_collection_step() debug_gc_step_until = gc_step_until # xxx def debug_gc_step(self, n=1): while n > 0: - self.minor_collection() + self._minor_collection() self.major_collection_step() n -= 1 @@ -2111,37 +2140,44 @@ self.debug_check_consistency() # + # 'threshold_objects_made_old', is used inside comparisons + # with 'size_objects_made_old' to know when we must do + # several major GC steps (i.e. several consecurive calls + # to the present function). Here is the target that + # we try to aim to: either (A1) or (A2) + # + # (A1) gc_state == STATE_SCANNING (i.e. major GC cycle ended) + # (A2) size_objects_made_old <= threshold_objects_made_old + # # Every call to major_collection_step() adds nursery_size//2 - # to the threshold. It is reset at the end of this function - # when the major collection is fully finished. - # + # to 'threshold_objects_made_old'. # In the common case, this is larger than the size of all # objects that survive a minor collection. After a few # minor collections (each followed by one call to # major_collection_step()) the threshold is much higher than - # the currently-in-use old memory. Then threshold_reached() - # won't be true again until the major collection fully - # finishes, time passes, and it's time for the next major - # collection. + # the 'size_objects_made_old', making the target invariant (A2) + # true by a large margin. # # However there are less common cases: # - # * if more than half of the nursery consistently survives: we - # call major_collection_step() twice after a minor - # collection; + # * if more than half of the nursery consistently survives: + # then we need two calls to major_collection_step() after + # some minor collection; # # * or if we're allocating a large number of bytes in - # external_malloc(). In that case, we are likely to reach - # again the threshold_reached() case, and more major - # collection steps will be done immediately until - # threshold_reached() returns false. + # external_malloc() and some of them survive the following + # minor collection. In that case, more than two major + # collection steps must be done immediately, until we + # restore the target invariant (A2). # - self.next_major_collection_threshold += self.nursery_size // 2 + self.threshold_objects_made_old += r_uint(self.nursery_size // 2) - # XXX currently very coarse increments, get this working then split - # to smaller increments using stacks for resuming if self.gc_state == STATE_SCANNING: + # starting a major GC cycle: reset these two counters + self.size_objects_made_old = r_uint(0) + self.threshold_objects_made_old = r_uint(self.nursery_size // 2) + self.objects_to_trace = self.AddressStack() self.collect_roots() self.gc_state = STATE_MARKING diff --git a/rpython/memory/gc/test/test_object_pinning.py b/rpython/memory/gc/test/test_object_pinning.py --- a/rpython/memory/gc/test/test_object_pinning.py +++ b/rpython/memory/gc/test/test_object_pinning.py @@ -19,6 +19,8 @@ BaseDirectGCTest.setup_method(self, meth) max = getattr(meth, 'max_number_of_pinned_objects', 20) self.gc.max_number_of_pinned_objects = max + if not hasattr(self.gc, 'minor_collection'): + self.gc.minor_collection = self.gc._minor_collection def test_pin_can_move(self): # even a pinned object is considered to be movable. Only the caller _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit