Author: Armin Rigo <[email protected]>
Branch: gc-more-incremental
Changeset: r79705:264a78afb911
Date: 2015-09-19 12:14 +0200
http://bitbucket.org/pypy/pypy/changeset/264a78afb911/
Log: This is mostly an attempt to change the threshold in
external_malloc()
diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -754,14 +754,30 @@
self.minor_collection()
if minor_collection_count == 1:
#
- # If the gc_state is not STATE_SCANNING, we're in the
middle of
- # an incremental major collection. In this case, always
progress
- # one step. If the gc_state is STATE_SCANNING, wait until
there
- # is too much garbage before starting the next major
collection.
+ # If the gc_state is STATE_SCANNING, we're not in
+ # the middle of an incremental major collection.
+ # In that case, wait until there is too much
+ # garbage before starting the next major
+ # collection. But if we are in the middle of an
+ # incremental major collection, then always do (at
+ # least) one step now.
+ #
+ # This will increment next_major_collection_threshold
+ # by nursery_size//2. If more than nursery_size//2
+ # survives, then threshold_reached() might still be
+ # true after that. In that case we do a second step.
+ # The goal is to avoid too high memory peaks if the
+ # program allocates a lot of surviving objects.
+ #
if (self.gc_state != STATE_SCANNING or
- self.get_total_memory_used() >
- self.next_major_collection_threshold):
+ self.threshold_reached()):
+
self.major_collection_step()
+
+ if (self.gc_state != STATE_SCANNING and
+ self.threshold_reached()): # ^^but only if
still
+ self.minor_collection() # the same
collection
+ self.major_collection_step()
#
# The nursery might not be empty now, because of
# execute_finalizers(). If it is almost full again,
@@ -825,17 +841,25 @@
raise MemoryError
#
# If somebody calls this function a lot, we must eventually
- # force a collection. XXX make this more incremental! For now
- # the logic is to first do a minor GC only, and check if that
- # was enough to free a bunch of large young objects. If not,
- # we do a complete major GC.
- if self.get_total_memory_free() < raw_malloc_usage(totalsize):
+ # force a collection. We use threshold_reached(), which might
+ # be true now but become false at some point after a few calls
+ # to major_collection_step(). If there is really no memory,
+ # then when the major collection finishes it will raise
+ # MemoryError.
+ #
+ # The logic is to first do a minor GC only, and check if that
+ # was enough to free a bunch of large young objects. If it
+ # was, then we don't do any major collection step.
+ #
+ while self.threshold_reached(raw_malloc_usage(totalsize)):
self.minor_collection()
- if self.get_total_memory_free() < (raw_malloc_usage(totalsize) +
- self.nursery_size // 2):
- self.gc_step_until(STATE_SWEEPING)
- self.gc_step_until(STATE_FINALIZING,
- raw_malloc_usage(totalsize))
+ if self.threshold_reached(raw_malloc_usage(totalsize) +
+ self.nursery_size // 2):
+ self.major_collection_step(raw_malloc_usage(totalsize))
+ # note that this loop should not be infinite: when the
+ # last step of a major collection is done but
+ # threshold_reached(totalsize) is still true, then
+ # we should get a MemoryError from major_collection_step().
#
# Check if the object would fit in the ArenaCollection.
if raw_malloc_usage(totalsize) <= self.small_request_threshold:
@@ -1112,9 +1136,9 @@
"""
return self.ac.total_memory_used + self.rawmalloced_total_size
- def get_total_memory_free(self):
+ def threshold_reached(self, extra=0):
return (self.next_major_collection_threshold -
- float(self.get_total_memory_used()))
+ float(self.get_total_memory_used())) < float(extra)
def card_marking_words_for_length(self, length):
# --- Unoptimized version:
@@ -2058,10 +2082,10 @@
self.gc_step_until(STATE_MARKING)
self.gc_step_until(STATE_SCANNING)
- def gc_step_until(self, state, reserving_size=0):
+ def gc_step_until(self, state):
while self.gc_state != state:
self.minor_collection()
- self.major_collection_step(reserving_size)
+ self.major_collection_step()
debug_gc_step_until = gc_step_until # xxx
@@ -2086,8 +2110,36 @@
pass
self.debug_check_consistency()
+ #
+ # Every call to major_collection_step() adds nursery_size//2
+ # to the threshold. It is reset at the end of this function
+ # when the major collection is fully finished.
+ #
+ # In the common case, this is larger than the size of all
+ # objects that survive a minor collection. After a few
+ # minor collections (each followed by one call to
+ # major_collection_step()) the threshold is much higher than
+ # the currently-in-use old memory. Then threshold_reached()
+ # won't be true again until the major collection fully
+ # finishes, time passes, and it's time for the next major
+ # collection.
+ #
+ # However there are less common cases:
+ #
+ # * if more than half of the nursery consistently survives: we
+ # call major_collection_step() twice after a minor
+ # collection;
+ #
+ # * or if we're allocating a large number of bytes in
+ # external_malloc(). In that case, we are likely to reach
+ # again the threshold_reached() case, and more major
+ # collection steps will be done immediately until
+ # threshold_reached() returns false.
+ #
+ self.next_major_collection_threshold += self.nursery_size // 2
- # XXX currently very course increments, get this working then split
+
+ # XXX currently very coarse increments, get this working then split
# to smaller increments using stacks for resuming
if self.gc_state == STATE_SCANNING:
self.objects_to_trace = self.AddressStack()
@@ -2201,8 +2253,7 @@
#
# Max heap size: gives an upper bound on the threshold. If we
# already have at least this much allocated, raise MemoryError.
- if bounded and (float(self.get_total_memory_used()) +
reserving_size >=
- self.next_major_collection_initial):
+ if bounded and self.threshold_reached(reserving_size):
#
# First raise MemoryError, giving the program a chance to
# quit cleanly. It might still allocate in the nursery,
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit