Author: Antonio Cuni <anto.c...@gmail.com> Branch: Changeset: r95514:947a6274ef5e Date: 2018-12-19 18:14 +0100 http://bitbucket.org/pypy/pypy/changeset/947a6274ef5e/
Log: Merge the gc-disable branch, which introduces two changes to the gc module: 1. gc.disable() actually disables the major collections of the GC (thus causing an apparent leak if you leave it disabled for too much) 2. gc.collect_step() lets you to manually run collection steps when the GC is disabled diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -394,8 +394,10 @@ * some functions and attributes of the ``gc`` module behave in a slightly different way: for example, ``gc.enable`` and - ``gc.disable`` are supported, but instead of enabling and disabling - the GC, they just enable and disable the execution of finalizers. + ``gc.disable`` are supported, but "enabling and disabling the GC" has + a different meaning in PyPy than in CPython. These functions + actually enable and disable the major collections and the + execution of finalizers. * PyPy prints a random line from past #pypy IRC topics at startup in interactive mode. In a released version, this behaviour is suppressed, but diff --git a/pypy/doc/gc_info.rst b/pypy/doc/gc_info.rst --- a/pypy/doc/gc_info.rst +++ b/pypy/doc/gc_info.rst @@ -22,8 +22,44 @@ larger. (A third category, the very large objects, are initially allocated outside the nursery and never move.) -Since Incminimark is an incremental GC, the major collection is incremental, -meaning there should not be any pauses longer than 1ms. +Since Incminimark is an incremental GC, the major collection is incremental: +the goal is not to have any pause longer than 1ms, but in practice it depends +on the size and characteristics of the heap: occasionally, there can be pauses +between 10-100ms. + + +Semi-manual GC management +-------------------------- + +If there are parts of the program where it is important to have a low latency, +you might want to control precisely when the GC runs, to avoid unexpected +pauses. Note that this has effect only on major collections, while minor +collections continue to work as usual. + +As explained above, a full major collection consists of ``N`` steps, where +``N`` depends on the size of the heap; generally speaking, it is not possible +to predict how many steps will be needed to complete a collection. + +``gc.enable()`` and ``gc.disable()`` control whether the GC runs collection +steps automatically. When the GC is disabled the memory usage will grow +indefinitely, unless you manually call ``gc.collect()`` and +``gc.collect_step()``. + +``gc.collect()`` runs a full major collection. + +``gc.collect_step()`` runs a single collection step. It returns an object of +type GcCollectStepStats_, the same which is passed to the corresponding `GC +Hooks`_. The following code is roughly equivalent to a ``gc.collect()``:: + + while True: + if gc.collect_step().major_is_done: + break + +For a real-world example of usage of this API, you can look at the 3rd-party +module `pypytools.gc.custom`_, which also provides a ``with customgc.nogc()`` +context manager to mark sections where the GC is forbidden. + +.. _`pypytools.gc.custom`: https://bitbucket.org/antocuni/pypytools/src/0273afc3e8bedf0eb1ef630c3bc69e8d9dd661fe/pypytools/gc/custom.py?at=default&fileviewer=file-view-default Fragmentation @@ -184,6 +220,8 @@ the number of pinned objects. +.. _GcCollectStepStats: + The attributes for ``GcCollectStepStats`` are: ``count``, ``duration``, ``duration_min``, ``duration_max`` @@ -192,10 +230,14 @@ ``oldstate``, ``newstate`` Integers which indicate the state of the GC before and after the step. +``major_is_done`` + Boolean which indicate whether this was the last step of the major + collection + The value of ``oldstate`` and ``newstate`` is one of these constants, defined inside ``gc.GcCollectStepStats``: ``STATE_SCANNING``, ``STATE_MARKING``, -``STATE_SWEEPING``, ``STATE_FINALIZING``. It is possible to get a string -representation of it by indexing the ``GC_STATS`` tuple. +``STATE_SWEEPING``, ``STATE_FINALIZING``, ``STATE_USERDEL``. It is possible +to get a string representation of it by indexing the ``GC_STATES`` tuple. The attributes for ``GcCollectStats`` are: diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -60,3 +60,10 @@ .. branch: cleanup-test_lib_pypy Update most test_lib_pypy/ tests and move them to extra_tests/. + +.. branch: gc-disable + +Make it possible to manually manage the GC by using a combination of +gc.disable() and gc.collect_step(). Make sure to write a proper release +announcement in which we explain that existing programs could leak memory if +they run for too much time between a gc.disable()/gc.enable() diff --git a/pypy/module/gc/__init__.py b/pypy/module/gc/__init__.py --- a/pypy/module/gc/__init__.py +++ b/pypy/module/gc/__init__.py @@ -4,6 +4,7 @@ class Module(MixedModule): interpleveldefs = { 'collect': 'interp_gc.collect', + 'collect_step': 'interp_gc.collect_step', 'enable': 'interp_gc.enable', 'disable': 'interp_gc.disable', 'isenabled': 'interp_gc.isenabled', diff --git a/pypy/module/gc/hook.py b/pypy/module/gc/hook.py --- a/pypy/module/gc/hook.py +++ b/pypy/module/gc/hook.py @@ -1,5 +1,6 @@ from rpython.memory.gc.hook import GcHooks -from rpython.memory.gc import incminimark +from rpython.memory.gc import incminimark +from rpython.rlib import rgc from rpython.rlib.nonconst import NonConstant from rpython.rlib.rarithmetic import r_uint, r_longlong, longlongmax from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault @@ -117,12 +118,24 @@ self.descr_set_on_gc_collect(space, space.w_None) -class GcMinorHookAction(AsyncAction): +class NoRecursiveAction(AsyncAction): + depth = 0 + + def perform(self, ec, frame): + if self.depth == 0: + try: + self.depth += 1 + return self._do_perform(ec, frame) + finally: + self.depth -= 1 + + +class GcMinorHookAction(NoRecursiveAction): total_memory_used = 0 pinned_objects = 0 def __init__(self, space): - AsyncAction.__init__(self, space) + NoRecursiveAction.__init__(self, space) self.w_callable = space.w_None self.reset() @@ -145,7 +158,7 @@ self.pinned_objects = NonConstant(-42) self.fire() - def perform(self, ec, frame): + def _do_perform(self, ec, frame): w_stats = W_GcMinorStats( self.count, self.duration, @@ -157,12 +170,12 @@ self.space.call_function(self.w_callable, w_stats) -class GcCollectStepHookAction(AsyncAction): +class GcCollectStepHookAction(NoRecursiveAction): oldstate = 0 newstate = 0 def __init__(self, space): - AsyncAction.__init__(self, space) + NoRecursiveAction.__init__(self, space) self.w_callable = space.w_None self.reset() @@ -185,19 +198,20 @@ self.newstate = NonConstant(-42) self.fire() - def perform(self, ec, frame): + def _do_perform(self, ec, frame): w_stats = W_GcCollectStepStats( self.count, self.duration, self.duration_min, self.duration_max, self.oldstate, - self.newstate) + self.newstate, + rgc.is_done__states(self.oldstate, self.newstate)) self.reset() self.space.call_function(self.w_callable, w_stats) -class GcCollectHookAction(AsyncAction): +class GcCollectHookAction(NoRecursiveAction): num_major_collects = 0 arenas_count_before = 0 arenas_count_after = 0 @@ -206,7 +220,7 @@ rawmalloc_bytes_after = 0 def __init__(self, space): - AsyncAction.__init__(self, space) + NoRecursiveAction.__init__(self, space) self.w_callable = space.w_None self.reset() @@ -227,7 +241,7 @@ self.rawmalloc_bytes_after = NonConstant(r_uint(42)) self.fire() - def perform(self, ec, frame): + def _do_perform(self, ec, frame): w_stats = W_GcCollectStats(self.count, self.num_major_collects, self.arenas_count_before, @@ -252,15 +266,32 @@ class W_GcCollectStepStats(W_Root): + # NOTE: this is specific to incminimark: if we want to integrate the + # applevel gc module with another gc, we probably need a more general + # approach to this. + # + # incminimark has 4 GC states: scanning, marking, sweeping and + # finalizing. However, from the user point of view, we have an additional + # "virtual" state: USERDEL, which represent when we run applevel + # finalizers after having completed a GC major collection. This state is + # never explicitly visible when using hooks, but it is used for the return + # value of gc.collect_step (see interp_gc.py) + STATE_SCANNING = incminimark.STATE_SCANNING + STATE_MARKING = incminimark.STATE_MARKING + STATE_SWEEPING = incminimark.STATE_SWEEPING + STATE_FINALIZING = incminimark.STATE_FINALIZING + STATE_USERDEL = incminimark.STATE_FINALIZING + 1 # used by StepCollector + GC_STATES = tuple(incminimark.GC_STATES + ['USERDEL']) def __init__(self, count, duration, duration_min, duration_max, - oldstate, newstate): + oldstate, newstate, major_is_done): self.count = count self.duration = duration self.duration_min = duration_min self.duration_max = duration_max self.oldstate = oldstate self.newstate = newstate + self.major_is_done = major_is_done class W_GcCollectStats(W_Root): @@ -320,11 +351,16 @@ W_GcCollectStepStats.typedef = TypeDef( "GcCollectStepStats", - STATE_SCANNING = incminimark.STATE_SCANNING, - STATE_MARKING = incminimark.STATE_MARKING, - STATE_SWEEPING = incminimark.STATE_SWEEPING, - STATE_FINALIZING = incminimark.STATE_FINALIZING, - GC_STATES = tuple(incminimark.GC_STATES), + STATE_SCANNING = W_GcCollectStepStats.STATE_SCANNING, + STATE_MARKING = W_GcCollectStepStats.STATE_MARKING, + STATE_SWEEPING = W_GcCollectStepStats.STATE_SWEEPING, + STATE_FINALIZING = W_GcCollectStepStats.STATE_FINALIZING, + STATE_USERDEL = W_GcCollectStepStats.STATE_USERDEL, + GC_STATES = tuple(W_GcCollectStepStats.GC_STATES), + major_is_done = interp_attrproperty( + "major_is_done", + cls=W_GcCollectStepStats, + wrapfn="newbool"), **wrap_many(W_GcCollectStepStats, ( "count", "duration", diff --git a/pypy/module/gc/interp_gc.py b/pypy/module/gc/interp_gc.py --- a/pypy/module/gc/interp_gc.py +++ b/pypy/module/gc/interp_gc.py @@ -1,6 +1,7 @@ from pypy.interpreter.gateway import unwrap_spec from pypy.interpreter.error import oefmt from rpython.rlib import rgc +from pypy.module.gc.hook import W_GcCollectStepStats @unwrap_spec(generation=int) @@ -16,7 +17,9 @@ cache.clear() rgc.collect() + _run_finalizers(space) +def _run_finalizers(space): # if we are running in gc.disable() mode but gc.collect() is called, # we should still call the finalizers now. We do this as an attempt # to get closer to CPython's behavior: in Py3.5 some tests @@ -39,18 +42,20 @@ return space.newint(0) def enable(space): - """Non-recursive version. Enable finalizers now. + """Non-recursive version. Enable major collections and finalizers. If they were already enabled, no-op. If they were disabled even several times, enable them anyway. """ + rgc.enable() if not space.user_del_action.enabled_at_app_level: space.user_del_action.enabled_at_app_level = True enable_finalizers(space) def disable(space): - """Non-recursive version. Disable finalizers now. Several calls - to this function are ignored. + """Non-recursive version. Disable major collections and finalizers. + Multiple calls to this function are ignored. """ + rgc.disable() if space.user_del_action.enabled_at_app_level: space.user_del_action.enabled_at_app_level = False disable_finalizers(space) @@ -77,6 +82,59 @@ if uda.pending_with_disabled_del is None: uda.pending_with_disabled_del = [] + +class StepCollector(object): + """ + Invoke rgc.collect_step() until we are done, then run the app-level + finalizers as a separate step + """ + + def __init__(self, space): + self.space = space + self.finalizing = False + + def do(self): + if self.finalizing: + self._run_finalizers() + self.finalizing = False + oldstate = W_GcCollectStepStats.STATE_USERDEL + newstate = W_GcCollectStepStats.STATE_SCANNING + major_is_done = True # now we are finally done + else: + states = self._collect_step() + oldstate = rgc.old_state(states) + newstate = rgc.new_state(states) + major_is_done = False # USERDEL still to do + if rgc.is_done(states): + newstate = W_GcCollectStepStats.STATE_USERDEL + self.finalizing = True + # + duration = -1 + return W_GcCollectStepStats( + count = 1, + duration = duration, + duration_min = duration, + duration_max = duration, + oldstate = oldstate, + newstate = newstate, + major_is_done = major_is_done) + + def _collect_step(self): + return rgc.collect_step() + + def _run_finalizers(self): + _run_finalizers(self.space) + +def collect_step(space): + """ + If the GC is incremental, run a single gc-collect-step. Return True when + the major collection is completed. + If the GC is not incremental, do a full collection and return True. + """ + sc = space.fromcache(StepCollector) + w_stats = sc.do() + return w_stats + # ____________________________________________________________ @unwrap_spec(filename='fsencode') diff --git a/pypy/module/gc/test/test_gc.py b/pypy/module/gc/test/test_gc.py --- a/pypy/module/gc/test/test_gc.py +++ b/pypy/module/gc/test/test_gc.py @@ -1,7 +1,20 @@ import py - +import pytest +from rpython.rlib import rgc +from pypy.interpreter.baseobjspace import ObjSpace +from pypy.interpreter.gateway import interp2app, unwrap_spec +from pypy.module.gc.interp_gc import StepCollector, W_GcCollectStepStats class AppTestGC(object): + + def setup_class(cls): + if cls.runappdirect: + pytest.skip("these tests cannot work with -A") + space = cls.space + def rgc_isenabled(space): + return space.newbool(rgc.isenabled()) + cls.w_rgc_isenabled = space.wrap(interp2app(rgc_isenabled)) + def test_collect(self): import gc gc.collect() # mostly a "does not crash" kind of test @@ -63,12 +76,16 @@ def test_enable(self): import gc assert gc.isenabled() + assert self.rgc_isenabled() gc.disable() assert not gc.isenabled() + assert not self.rgc_isenabled() gc.enable() assert gc.isenabled() + assert self.rgc_isenabled() gc.enable() assert gc.isenabled() + assert self.rgc_isenabled() def test_gc_collect_overrides_gc_disable(self): import gc @@ -83,6 +100,24 @@ assert deleted == [1] gc.enable() + def test_gc_collect_step(self): + import gc + + class X(object): + deleted = 0 + def __del__(self): + X.deleted += 1 + + gc.disable() + X(); X(); X(); + n = 0 + while True: + n += 1 + if gc.collect_step().major_is_done: + break + + assert n >= 2 # at least one step + 1 finalizing + assert X.deleted == 3 class AppTestGcDumpHeap(object): pytestmark = py.test.mark.xfail(run=False) @@ -156,3 +191,55 @@ gc.collect() # the classes C should all go away here for r in rlist: assert r() is None + + +def test_StepCollector(): + W = W_GcCollectStepStats + SCANNING = W.STATE_SCANNING + MARKING = W.STATE_MARKING + SWEEPING = W.STATE_SWEEPING + FINALIZING = W.STATE_FINALIZING + USERDEL = W.STATE_USERDEL + + class MyStepCollector(StepCollector): + my_steps = 0 + my_done = False + my_finalized = 0 + + def __init__(self): + StepCollector.__init__(self, space=None) + self._state_transitions = iter([ + (SCANNING, MARKING), + (MARKING, SWEEPING), + (SWEEPING, FINALIZING), + (FINALIZING, SCANNING)]) + + def _collect_step(self): + self.my_steps += 1 + try: + oldstate, newstate = next(self._state_transitions) + except StopIteration: + assert False, 'should not happen, did you call _collect_step too much?' + return rgc._encode_states(oldstate, newstate) + + def _run_finalizers(self): + self.my_finalized += 1 + + sc = MyStepCollector() + transitions = [] + while True: + result = sc.do() + transitions.append((result.oldstate, result.newstate, sc.my_finalized)) + if result.major_is_done: + break + + assert transitions == [ + (SCANNING, MARKING, False), + (MARKING, SWEEPING, False), + (SWEEPING, FINALIZING, False), + (FINALIZING, USERDEL, False), + (USERDEL, SCANNING, True) + ] + # there is one more transition than actual step, because + # FINALIZING->USERDEL is "virtual" + assert sc.my_steps == len(transitions) - 1 diff --git a/pypy/module/gc/test/test_hook.py b/pypy/module/gc/test/test_hook.py --- a/pypy/module/gc/test/test_hook.py +++ b/pypy/module/gc/test/test_hook.py @@ -69,26 +69,29 @@ def test_on_gc_collect_step(self): import gc + SCANNING = 0 + MARKING = 1 + SWEEPING = 2 + FINALIZING = 3 lst = [] def on_gc_collect_step(stats): lst.append((stats.count, stats.duration, stats.oldstate, - stats.newstate)) + stats.newstate, + stats.major_is_done)) gc.hooks.on_gc_collect_step = on_gc_collect_step - self.fire_gc_collect_step(10, 20, 30) - self.fire_gc_collect_step(40, 50, 60) + self.fire_gc_collect_step(10, SCANNING, MARKING) + self.fire_gc_collect_step(40, FINALIZING, SCANNING) assert lst == [ - (1, 10, 20, 30), - (1, 40, 50, 60), + (1, 10, SCANNING, MARKING, False), + (1, 40, FINALIZING, SCANNING, True), ] # gc.hooks.on_gc_collect_step = None - self.fire_gc_collect_step(70, 80, 90) # won't fire - assert lst == [ - (1, 10, 20, 30), - (1, 40, 50, 60), - ] + oldlst = lst[:] + self.fire_gc_collect_step(70, SCANNING, MARKING) # won't fire + assert lst == oldlst def test_on_gc_collect(self): import gc @@ -123,7 +126,8 @@ assert S.STATE_MARKING == 1 assert S.STATE_SWEEPING == 2 assert S.STATE_FINALIZING == 3 - assert S.GC_STATES == ('SCANNING', 'MARKING', 'SWEEPING', 'FINALIZING') + assert S.GC_STATES == ('SCANNING', 'MARKING', 'SWEEPING', + 'FINALIZING', 'USERDEL') def test_cumulative(self): import gc @@ -176,3 +180,22 @@ assert gc.hooks.on_gc_minor is None assert gc.hooks.on_gc_collect_step is None assert gc.hooks.on_gc_collect is None + + def test_no_recursive(self): + import gc + lst = [] + def on_gc_minor(stats): + lst.append((stats.count, + stats.duration, + stats.total_memory_used, + stats.pinned_objects)) + self.fire_gc_minor(1, 2, 3) # won't fire NOW + gc.hooks.on_gc_minor = on_gc_minor + self.fire_gc_minor(10, 20, 30) + self.fire_gc_minor(40, 50, 60) + # the duration for the 2nd call is 41, because it also counts the 1 + # which was fired recursively + assert lst == [ + (1, 10, 20, 30), + (2, 41, 50, 60), + ] diff --git a/rpython/memory/gc/base.py b/rpython/memory/gc/base.py --- a/rpython/memory/gc/base.py +++ b/rpython/memory/gc/base.py @@ -149,6 +149,20 @@ def get_size_incl_hash(self, obj): return self.get_size(obj) + # these can be overriden by subclasses, called by the GCTransformer + def enable(self): + pass + + def disable(self): + pass + + def isenabled(self): + return True + + def collect_step(self): + self.collect() + return True + def malloc(self, typeid, length=0, zero=False): """NOT_RPYTHON For testing. The interface used by the gctransformer is diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -379,6 +379,11 @@ self.total_gc_time = 0.0 self.gc_state = STATE_SCANNING + + # if the GC is disabled, it runs only minor collections; major + # collections need to be manually triggered by explicitly calling + # collect() + self.enabled = True # # Two lists of all objects with finalizers. Actually they are lists # of pairs (finalization_queue_nr, object). "probably young objects" @@ -514,6 +519,15 @@ bigobj = self.nonlarge_max + 1 self.max_number_of_pinned_objects = self.nursery_size / (bigobj * 2) + def enable(self): + self.enabled = True + + def disable(self): + self.enabled = False + + def isenabled(self): + return self.enabled + def _nursery_memory_size(self): extra = self.nonlarge_max + 1 return self.nursery_size + extra @@ -750,22 +764,53 @@ """Do a minor (gen=0), start a major (gen=1), or do a full major (gen>=2) collection.""" if gen < 0: - self._minor_collection() # dangerous! no major GC cycle progress - elif gen <= 1: - self.minor_collection_with_major_progress() - if gen == 1 and self.gc_state == STATE_SCANNING: + # Dangerous! this makes no progress on the major GC cycle. + # If called too often, the memory usage will keep increasing, + # because we'll never completely fill the nursery (and so + # never run anything about the major collection). + self._minor_collection() + elif gen == 0: + # This runs a minor collection. This is basically what occurs + # when the nursery is full. If a major collection is in + # progress, it also runs one more step of it. It might also + # decide to start a major collection just now, depending on + # current memory pressure. + self.minor_collection_with_major_progress(force_enabled=True) + elif gen == 1: + # This is like gen == 0, but if no major collection is running, + # then it forces one to start now. + self.minor_collection_with_major_progress(force_enabled=True) + if self.gc_state == STATE_SCANNING: self.major_collection_step() else: + # This does a complete minor and major collection. self.minor_and_major_collection() self.rrc_invoke_callback() + def collect_step(self): + """ + Do a single major collection step. Return True when the major collection + is completed. - def minor_collection_with_major_progress(self, extrasize=0): - """Do a minor collection. Then, if there is already a major GC - in progress, run at least one major collection step. If there is - no major GC but the threshold is reached, start a major GC. + This is meant to be used together with gc.disable(), to have a + fine-grained control on when the GC runs. + """ + old_state = self.gc_state + self._minor_collection() + self.major_collection_step() + self.rrc_invoke_callback() + return rgc._encode_states(old_state, self.gc_state) + + def minor_collection_with_major_progress(self, extrasize=0, + force_enabled=False): + """Do a minor collection. Then, if the GC is enabled and there + is already a major GC in progress, run at least one major collection + step. If there is no major GC but the threshold is reached, start a + major GC. """ self._minor_collection() + if not self.enabled and not force_enabled: + return # If the gc_state is STATE_SCANNING, we're not in the middle # of an incremental major collection. In that case, wait @@ -2428,25 +2473,6 @@ # We also need to reset the GCFLAG_VISITED on prebuilt GC objects. self.prebuilt_root_objects.foreach(self._reset_gcflag_visited, None) # - # Print statistics - debug_start("gc-collect-done") - debug_print("arenas: ", - self.stat_ac_arenas_count, " => ", - self.ac.arenas_count) - debug_print("bytes used in arenas: ", - self.ac.total_memory_used) - debug_print("bytes raw-malloced: ", - self.stat_rawmalloced_total_size, " => ", - self.rawmalloced_total_size) - debug_stop("gc-collect-done") - self.hooks.fire_gc_collect( - num_major_collects=self.num_major_collects, - arenas_count_before=self.stat_ac_arenas_count, - arenas_count_after=self.ac.arenas_count, - arenas_bytes=self.ac.total_memory_used, - rawmalloc_bytes_before=self.stat_rawmalloced_total_size, - rawmalloc_bytes_after=self.rawmalloced_total_size) - # # Set the threshold for the next major collection to be when we # have allocated 'major_collection_threshold' times more than # we currently have -- but no more than 'max_delta' more than @@ -2460,6 +2486,27 @@ total_memory_used + self.max_delta), reserving_size) # + # Print statistics + debug_start("gc-collect-done") + debug_print("arenas: ", + self.stat_ac_arenas_count, " => ", + self.ac.arenas_count) + debug_print("bytes used in arenas: ", + self.ac.total_memory_used) + debug_print("bytes raw-malloced: ", + self.stat_rawmalloced_total_size, " => ", + self.rawmalloced_total_size) + debug_print("next major collection threshold: ", + self.next_major_collection_threshold) + debug_stop("gc-collect-done") + self.hooks.fire_gc_collect( + num_major_collects=self.num_major_collects, + arenas_count_before=self.stat_ac_arenas_count, + arenas_count_after=self.ac.arenas_count, + arenas_bytes=self.ac.total_memory_used, + rawmalloc_bytes_before=self.stat_rawmalloced_total_size, + rawmalloc_bytes_after=self.rawmalloced_total_size) + # # Max heap size: gives an upper bound on the threshold. If we # already have at least this much allocated, raise MemoryError. if bounded and self.threshold_reached(reserving_size): diff --git a/rpython/memory/gc/test/test_direct.py b/rpython/memory/gc/test/test_direct.py --- a/rpython/memory/gc/test/test_direct.py +++ b/rpython/memory/gc/test/test_direct.py @@ -13,6 +13,7 @@ from rpython.memory.gc import minimark, incminimark from rpython.memory.gctypelayout import zero_gc_pointers_inside, zero_gc_pointers from rpython.rlib.debug import debug_print +from rpython.rlib.test.test_debug import debuglog import pdb WORD = LONG_BIT // 8 @@ -770,4 +771,76 @@ assert elem.prev == lltype.nullptr(S) assert elem.next == lltype.nullptr(S) - + def test_collect_0(self, debuglog): + self.gc.collect(1) # start a major + debuglog.reset() + self.gc.collect(0) # do ONLY a minor + assert debuglog.summary() == {'gc-minor': 1} + + def test_enable_disable(self, debuglog): + def large_malloc(): + # malloc an object which is large enough to trigger a major collection + threshold = self.gc.next_major_collection_threshold + self.malloc(VAR, int(threshold/8)) + summary = debuglog.summary() + debuglog.reset() + return summary + # + summary = large_malloc() + assert sorted(summary.keys()) == ['gc-collect-step', 'gc-minor'] + # + self.gc.disable() + summary = large_malloc() + assert sorted(summary.keys()) == ['gc-minor'] + # + self.gc.enable() + summary = large_malloc() + assert sorted(summary.keys()) == ['gc-collect-step', 'gc-minor'] + + def test_call_collect_when_disabled(self, debuglog): + # malloc an object and put it the old generation + s = self.malloc(S) + s.x = 42 + self.stackroots.append(s) + self.gc.collect() + s = self.stackroots.pop() + # + self.gc.disable() + self.gc.collect(1) # start a major collect + assert sorted(debuglog.summary()) == ['gc-collect-step', 'gc-minor'] + assert s.x == 42 # s is not freed yet + # + debuglog.reset() + self.gc.collect(1) # run one more step + assert sorted(debuglog.summary()) == ['gc-collect-step', 'gc-minor'] + assert s.x == 42 # s is not freed yet + # + debuglog.reset() + self.gc.collect() # finish the major collection + summary = debuglog.summary() + assert sorted(debuglog.summary()) == ['gc-collect-step', 'gc-minor'] + # s is freed + py.test.raises(RuntimeError, 's.x') + + def test_collect_step(self, debuglog): + from rpython.rlib import rgc + n = 0 + states = [] + while True: + debuglog.reset() + val = self.gc.collect_step() + states.append((rgc.old_state(val), rgc.new_state(val))) + summary = debuglog.summary() + assert summary == {'gc-minor': 1, 'gc-collect-step': 1} + if rgc.is_done(val): + break + n += 1 + if n == 100: + assert False, 'this looks like an endless loop' + # + assert states == [ + (incminimark.STATE_SCANNING, incminimark.STATE_MARKING), + (incminimark.STATE_MARKING, incminimark.STATE_SWEEPING), + (incminimark.STATE_SWEEPING, incminimark.STATE_FINALIZING), + (incminimark.STATE_FINALIZING, incminimark.STATE_SCANNING) + ] diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py --- a/rpython/memory/gctransform/framework.py +++ b/rpython/memory/gctransform/framework.py @@ -309,6 +309,12 @@ self.collect_ptr = getfn(GCClass.collect.im_func, [s_gc, annmodel.SomeInteger()], annmodel.s_None) + self.collect_step_ptr = getfn(GCClass.collect_step.im_func, [s_gc], + annmodel.SomeInteger()) + self.enable_ptr = getfn(GCClass.enable.im_func, [s_gc], annmodel.s_None) + self.disable_ptr = getfn(GCClass.disable.im_func, [s_gc], annmodel.s_None) + self.isenabled_ptr = getfn(GCClass.isenabled.im_func, [s_gc], + annmodel.s_Bool) self.can_move_ptr = getfn(GCClass.can_move.im_func, [s_gc, SomeAddress()], annmodel.SomeBool()) @@ -884,6 +890,28 @@ resultvar=op.result) self.pop_roots(hop, livevars) + def gct_gc__collect_step(self, hop): + op = hop.spaceop + livevars = self.push_roots(hop) + hop.genop("direct_call", [self.collect_step_ptr, self.c_const_gc], + resultvar=op.result) + self.pop_roots(hop, livevars) + + def gct_gc__enable(self, hop): + op = hop.spaceop + hop.genop("direct_call", [self.enable_ptr, self.c_const_gc], + resultvar=op.result) + + def gct_gc__disable(self, hop): + op = hop.spaceop + hop.genop("direct_call", [self.disable_ptr, self.c_const_gc], + resultvar=op.result) + + def gct_gc__isenabled(self, hop): + op = hop.spaceop + hop.genop("direct_call", [self.isenabled_ptr, self.c_const_gc], + resultvar=op.result) + def gct_gc_can_move(self, hop): op = hop.spaceop v_addr = hop.genop('cast_ptr_to_adr', diff --git a/rpython/rlib/debug.py b/rpython/rlib/debug.py --- a/rpython/rlib/debug.py +++ b/rpython/rlib/debug.py @@ -1,5 +1,6 @@ import sys import time +from collections import Counter from rpython.rlib.objectmodel import enforceargs from rpython.rtyper.extregistry import ExtRegistryEntry @@ -38,6 +39,23 @@ assert False, ("nesting error: no start corresponding to stop %r" % (category,)) + def reset(self): + # only for tests: empty the log + self[:] = [] + + def summary(self, flatten=False): + res = Counter() + def visit(lst): + for section, sublist in lst: + if section == 'debug_print': + continue + res[section] += 1 + if flatten: + visit(sublist) + # + visit(self) + return res + def __repr__(self): import pprint return pprint.pformat(list(self)) diff --git a/rpython/rlib/rgc.py b/rpython/rlib/rgc.py --- a/rpython/rlib/rgc.py +++ b/rpython/rlib/rgc.py @@ -13,6 +13,50 @@ # General GC features collect = gc.collect +enable = gc.enable +disable = gc.disable +isenabled = gc.isenabled + +def collect_step(): + """ + If the GC is incremental, run a single gc-collect-step. + + Return an integer which encodes the starting and ending GC state. Use + rgc.{old_state,new_state,is_done} to decode it. + + If the GC is not incremental, do a full collection and return a value on + which rgc.is_done() return True. + """ + gc.collect() + return _encode_states(1, 0) + +def _encode_states(oldstate, newstate): + return oldstate << 8 | newstate + +def old_state(states): + return (states & 0xFF00) >> 8 + +def new_state(states): + return states & 0xFF + +def is_done(states): + """ + Return True if the return value of collect_step signals the end of a major + collection + """ + old = old_state(states) + new = new_state(states) + return is_done__states(old, new) + +def is_done__states(oldstate, newstate): + "Like is_done, but takes oldstate and newstate explicitly" + # a collection is considered done when it ends up in the starting state + # (which is usually represented as 0). This logic works for incminimark, + # which is currently the only gc actually used and for which collect_step + # is implemented. In case we add more GC in the future, we might want to + # delegate this logic to the GC itself, but for now it is MUCH simpler to + # just write it in plain RPython. + return oldstate != 0 and newstate == 0 def set_max_heap_size(nbytes): """Limit the heap size to n bytes. @@ -131,6 +175,44 @@ args_v = hop.inputargs(lltype.Signed) return hop.genop('gc__collect', args_v, resulttype=hop.r_result) + +class EnableDisableEntry(ExtRegistryEntry): + _about_ = (gc.enable, gc.disable) + + def compute_result_annotation(self): + from rpython.annotator import model as annmodel + return annmodel.s_None + + def specialize_call(self, hop): + hop.exception_cannot_occur() + opname = self.instance.__name__ + return hop.genop('gc__%s' % opname, hop.args_v, resulttype=hop.r_result) + + +class IsEnabledEntry(ExtRegistryEntry): + _about_ = gc.isenabled + + def compute_result_annotation(self): + from rpython.annotator import model as annmodel + return annmodel.s_Bool + + def specialize_call(self, hop): + hop.exception_cannot_occur() + return hop.genop('gc__isenabled', hop.args_v, resulttype=hop.r_result) + + +class CollectStepEntry(ExtRegistryEntry): + _about_ = collect_step + + def compute_result_annotation(self): + from rpython.annotator import model as annmodel + return annmodel.SomeInteger() + + def specialize_call(self, hop): + hop.exception_cannot_occur() + return hop.genop('gc__collect_step', hop.args_v, resulttype=hop.r_result) + + class SetMaxHeapSizeEntry(ExtRegistryEntry): _about_ = set_max_heap_size diff --git a/rpython/rlib/test/test_debug.py b/rpython/rlib/test/test_debug.py --- a/rpython/rlib/test/test_debug.py +++ b/rpython/rlib/test/test_debug.py @@ -1,5 +1,5 @@ - import py +import pytest from rpython.rlib.debug import (check_annotation, make_sure_not_resized, debug_print, debug_start, debug_stop, have_debug_prints, debug_offset, debug_flush, @@ -10,6 +10,12 @@ from rpython.rlib import debug from rpython.rtyper.test.test_llinterp import interpret, gengraph +@pytest.fixture +def debuglog(monkeypatch): + dlog = debug.DebugLog() + monkeypatch.setattr(debug, '_log', dlog) + return dlog + def test_check_annotation(): class Error(Exception): pass @@ -94,7 +100,7 @@ py.test.raises(NotAListOfChars, "interpret(g, [3])") -def test_debug_print_start_stop(): +def test_debug_print_start_stop(debuglog): def f(x): debug_start("mycat") debug_print("foo", 2, "bar", x) @@ -103,22 +109,27 @@ debug_offset() # should not explode at least return have_debug_prints() - try: - debug._log = dlog = debug.DebugLog() - res = f(3) - assert res is True - finally: - debug._log = None - assert dlog == [("mycat", [('debug_print', 'foo', 2, 'bar', 3)])] + res = f(3) + assert res is True + assert debuglog == [("mycat", [('debug_print', 'foo', 2, 'bar', 3)])] + debuglog.reset() - try: - debug._log = dlog = debug.DebugLog() - res = interpret(f, [3]) - assert res is True - finally: - debug._log = None - assert dlog == [("mycat", [('debug_print', 'foo', 2, 'bar', 3)])] + res = interpret(f, [3]) + assert res is True + assert debuglog == [("mycat", [('debug_print', 'foo', 2, 'bar', 3)])] +def test_debuglog_summary(debuglog): + debug_start('foo') + debug_start('bar') # this is nested, so not counted in the summary by default + debug_stop('bar') + debug_stop('foo') + debug_start('foo') + debug_stop('foo') + debug_start('bar') + debug_stop('bar') + # + assert debuglog.summary() == {'foo': 2, 'bar': 1} + assert debuglog.summary(flatten=True) == {'foo': 2, 'bar': 2} def test_debug_start_stop_timestamp(): import time diff --git a/rpython/rlib/test/test_rgc.py b/rpython/rlib/test/test_rgc.py --- a/rpython/rlib/test/test_rgc.py +++ b/rpython/rlib/test/test_rgc.py @@ -39,6 +39,45 @@ assert res is None +def test_enable_disable(): + def f(): + gc.enable() + a = gc.isenabled() + gc.disable() + b = gc.isenabled() + return a and not b + + t, typer, graph = gengraph(f, []) + blockops = list(graph.iterblockops()) + opnames = [op.opname for block, op in blockops + if op.opname.startswith('gc__')] + assert opnames == ['gc__enable', 'gc__isenabled', + 'gc__disable', 'gc__isenabled'] + res = interpret(f, []) + assert res + +def test_collect_step(): + def f(): + return rgc.collect_step() + + assert f() + t, typer, graph = gengraph(f, []) + blockops = list(graph.iterblockops()) + opnames = [op.opname for block, op in blockops + if op.opname.startswith('gc__')] + assert opnames == ['gc__collect_step'] + res = interpret(f, []) + assert res + +def test__encode_states(): + val = rgc._encode_states(42, 43) + assert rgc.old_state(val) == 42 + assert rgc.new_state(val) == 43 + assert not rgc.is_done(val) + # + val = rgc.collect_step() + assert rgc.is_done(val) + def test_can_move(): T0 = lltype.GcStruct('T') T1 = lltype.GcArray(lltype.Float) diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py --- a/rpython/rtyper/llinterp.py +++ b/rpython/rtyper/llinterp.py @@ -819,6 +819,18 @@ def op_gc__collect(self, *gen): self.heap.collect(*gen) + def op_gc__collect_step(self): + return self.heap.collect_step() + + def op_gc__enable(self): + self.heap.enable() + + def op_gc__disable(self): + self.heap.disable() + + def op_gc__isenabled(self): + return self.heap.isenabled() + def op_gc_heap_stats(self): raise NotImplementedError diff --git a/rpython/rtyper/lltypesystem/llheap.py b/rpython/rtyper/lltypesystem/llheap.py --- a/rpython/rtyper/lltypesystem/llheap.py +++ b/rpython/rtyper/lltypesystem/llheap.py @@ -5,7 +5,7 @@ setfield = setattr from operator import setitem as setarrayitem -from rpython.rlib.rgc import can_move, collect, add_memory_pressure +from rpython.rlib.rgc import can_move, collect, enable, disable, isenabled, add_memory_pressure, collect_step def setinterior(toplevelcontainer, inneraddr, INNERTYPE, newvalue, offsets=None): diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py --- a/rpython/rtyper/lltypesystem/lloperation.py +++ b/rpython/rtyper/lltypesystem/lloperation.py @@ -456,6 +456,10 @@ # __________ GC operations __________ 'gc__collect': LLOp(canmallocgc=True), + 'gc__collect_step': LLOp(canmallocgc=True), + 'gc__enable': LLOp(), + 'gc__disable': LLOp(), + 'gc__isenabled': LLOp(), 'gc_free': LLOp(), 'gc_fetch_exception': LLOp(), 'gc_restore_exception': LLOp(), diff --git a/rpython/translator/c/test/test_newgc.py b/rpython/translator/c/test/test_newgc.py --- a/rpython/translator/c/test/test_newgc.py +++ b/rpython/translator/c/test/test_newgc.py @@ -1812,6 +1812,92 @@ res = self.run("ignore_finalizer") assert res == 1 # translated: x1 is removed from the list + def define_enable_disable(self): + class Counter(object): + val = 0 + counter = Counter() + class X(object): + def __del__(self): + counter.val += 1 + def f(should_disable): + x1 = X() + rgc.collect() # make x1 old + assert not rgc.can_move(x1) + x1 = None + # + if should_disable: + gc.disable() + assert not gc.isenabled() + # try to trigger a major collection + N = 100 # this should be enough, increase if not + lst = [] + for i in range(N): + lst.append(chr(i%256) * (1024*1024)) + #print i, counter.val + # + gc.enable() + assert gc.isenabled() + return counter.val + return f + + def test_enable_disable(self): + # first, run with normal gc. If the assert fails it means that in the + # loop we don't allocate enough mem to trigger a major collection. Try + # to increase N + deleted = self.run("enable_disable", 0) + assert deleted == 1, 'This should not fail, try to increment N' + # + # now, run with gc.disable: this should NOT free x1 + deleted = self.run("enable_disable", 1) + assert deleted == 0 + + def define_collect_step(self): + class Counter(object): + val = 0 + counter = Counter() + class X(object): + def __del__(self): + counter.val += 1 + def f(): + x1 = X() + rgc.collect() # make x1 old + assert not rgc.can_move(x1) + x1 = None + # + gc.disable() + n = 0 + states = [] + while True: + n += 1 + val = rgc.collect_step() + states.append((rgc.old_state(val), rgc.new_state(val))) + if rgc.is_done(val): + break + if n == 100: + print 'Endless loop!' + assert False, 'this looks like an endless loop' + + if n < 4: # we expect at least 4 steps + print 'Too few steps! n =', n + assert False + + # check that the state transitions are reasonable + first_state, _ = states[0] + for i, (old_state, new_state) in enumerate(states): + is_last = (i == len(states) - 1) + is_valid = False + if is_last: + assert old_state != new_state == first_state + else: + assert new_state == old_state or new_state == old_state+1 + + return counter.val + return f + + def test_collect_step(self): + deleted = self.run("collect_step") + assert deleted == 1 + def define_total_gc_time(cls): def f(): l = [] diff --git a/rpython/translator/goal/gcbench.py b/rpython/translator/goal/gcbench.py --- a/rpython/translator/goal/gcbench.py +++ b/rpython/translator/goal/gcbench.py @@ -44,8 +44,9 @@ # - Results are sensitive to locking cost, but we dont # check for proper locking import time +import gc -USAGE = """gcbench [num_repetitions] [--depths=N,N,N..] [--threads=N]""" +USAGE = """gcbench [num_repetitions] [--depths=N,N,N..] [--threads=N] [--gc=off|--gc=manual]""" ENABLE_THREADS = True @@ -173,6 +174,7 @@ depths = DEFAULT_DEPTHS threads = 0 repeatcount = 1 + gc_policy = 'on' for arg in argv[1:]: if arg.startswith('--threads='): arg = arg[len('--threads='):] @@ -189,13 +191,22 @@ depths = [int(s) for s in arg] except ValueError: return argerror() + elif arg.startswith('--gc=off'): + gc_policy = 'off' + elif arg.startswith('--gc=manual'): + gc_policy = 'manual' else: try: repeatcount = int(arg) except ValueError: return argerror() + # + if gc_policy == 'off' or gc_policy == 'manual': + gc.disable() for i in range(repeatcount): main(depths, threads) + if gc_policy == 'manual': + gc.collect(1) return 0 _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit