Author: Maciej Fijalkowski <fij...@gmail.com> Branch: Changeset: r58786:0acb47dfb2f4 Date: 2012-11-07 18:45 +0100 http://bitbucket.org/pypy/pypy/changeset/0acb47dfb2f4/
Log: merge diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -5,6 +5,8 @@ .. this is the revision of the last merge from default to release-1.9.x .. startrev: 8d567513d04d +Fixed the performance of gc.get_referrers() + .. branch: default .. branch: app_main-refactor .. branch: win-ordinal diff --git a/pypy/module/gc/referents.py b/pypy/module/gc/referents.py --- a/pypy/module/gc/referents.py +++ b/pypy/module/gc/referents.py @@ -45,6 +45,113 @@ return OperationError(space.w_NotImplementedError, space.wrap("operation not implemented by this GC")) +# ____________________________________________________________ + +def clear_gcflag_extra(fromlist): + pending = fromlist[:] + while pending: + gcref = pending.pop() + if rgc.get_gcflag_extra(gcref): + rgc.toggle_gcflag_extra(gcref) + pending.extend(rgc.get_rpy_referents(gcref)) + +def do_get_objects(): + roots = [gcref for gcref in rgc.get_rpy_roots() if gcref] + pending = roots[:] + result_w = [] + while pending: + gcref = pending.pop() + if not rgc.get_gcflag_extra(gcref): + rgc.toggle_gcflag_extra(gcref) + w_obj = try_cast_gcref_to_w_root(gcref) + if w_obj is not None: + result_w.append(w_obj) + pending.extend(rgc.get_rpy_referents(gcref)) + clear_gcflag_extra(roots) + return result_w + +# ____________________________________________________________ + +class PathEntry(object): + # PathEntries are nodes of a complete tree of all objects, but + # built lazily (there is only one branch alive at any time). + # Each node has a 'gcref' and the list of referents from this gcref. + def __init__(self, prev, gcref, referents): + self.prev = prev + self.gcref = gcref + self.referents = referents + self.remaining = len(referents) + + def get_most_recent_w_obj(self): + entry = self + while entry is not None: + if entry.gcref: + w_obj = try_cast_gcref_to_w_root(entry.gcref) + if w_obj is not None: + return w_obj + entry = entry.prev + return None + +def do_get_referrers(w_arg): + result_w = [] + gcarg = rgc.cast_instance_to_gcref(w_arg) + roots = [gcref for gcref in rgc.get_rpy_roots() if gcref] + head = PathEntry(None, rgc.NULL_GCREF, roots) + while True: + head.remaining -= 1 + if head.remaining >= 0: + gcref = head.referents[head.remaining] + if not rgc.get_gcflag_extra(gcref): + # not visited so far + if gcref == gcarg: + w_obj = head.get_most_recent_w_obj() + if w_obj is not None: + result_w.append(w_obj) # found! + rgc.toggle_gcflag_extra(gcref) # toggle twice + rgc.toggle_gcflag_extra(gcref) + head = PathEntry(head, gcref, rgc.get_rpy_referents(gcref)) + else: + # no more referents to visit + head = head.prev + if head is None: + break + # done. Clear flags carefully + rgc.toggle_gcflag_extra(gcarg) + clear_gcflag_extra(roots) + clear_gcflag_extra([gcarg]) + return result_w + +# ____________________________________________________________ + +def _list_w_obj_referents(gcref, result_w): + # Get all W_Root reachable directly from gcref, and add them to + # the list 'result_w'. + pending = [] # = list of all objects whose gcflag was toggled + i = 0 + gcrefparent = gcref + while True: + for gcref in rgc.get_rpy_referents(gcrefparent): + if rgc.get_gcflag_extra(gcref): + continue + rgc.toggle_gcflag_extra(gcref) + pending.append(gcref) + + while i < len(pending): + gcrefparent = pending[i] + i += 1 + w_obj = try_cast_gcref_to_w_root(gcrefparent) + if w_obj is not None: + result_w.append(w_obj) + else: + break # jump back to the start of the outermost loop + else: + break # done + + for gcref in pending: + rgc.toggle_gcflag_extra(gcref) # reset the gcflag_extra's + +# ____________________________________________________________ + def get_rpy_roots(space): lst = rgc.get_rpy_roots() if lst is None: @@ -79,93 +186,35 @@ raise missing_operation(space) return space.wrap(index) -def _list_w_obj_referents(gcref, result_w): - # Get all W_Root reachable directly from gcref, and add them to - # the list 'result_w'. The logic here is not robust against gc - # moves, and may return the same object several times. - seen = {} # map {current_addr: obj} - pending = [gcref] - i = 0 - while i < len(pending): - gcrefparent = pending[i] - i += 1 - for gcref in rgc.get_rpy_referents(gcrefparent): - key = rgc.cast_gcref_to_int(gcref) - if gcref == seen.get(key, rgc.NULL_GCREF): - continue # already in 'seen' - seen[key] = gcref - w_obj = try_cast_gcref_to_w_root(gcref) - if w_obj is not None: - result_w.append(w_obj) - else: - pending.append(gcref) - -def _get_objects_from_rpy(list_of_gcrefs): - # given a list of gcrefs that may or may not be W_Roots, build a list - # of W_Roots obtained by following references from there. - result_w = [] # <- list of W_Roots - for gcref in list_of_gcrefs: - if gcref: - w_obj = try_cast_gcref_to_w_root(gcref) - if w_obj is not None: - result_w.append(w_obj) - else: - _list_w_obj_referents(gcref, result_w) - return result_w - def get_objects(space): """Return a list of all app-level objects.""" - roots = rgc.get_rpy_roots() - pending_w = _get_objects_from_rpy(roots) - # continue by following every W_Root. Note that this will force a hash - # on every W_Root, which is kind of bad, but not on every RPython object, - # which is really good. - result_w = {} - while len(pending_w) > 0: - previous_w = pending_w - pending_w = [] - for w_obj in previous_w: - if w_obj not in result_w: - result_w[w_obj] = None - gcref = rgc.cast_instance_to_gcref(w_obj) - _list_w_obj_referents(gcref, pending_w) - return space.newlist(result_w.keys()) + if not rgc.has_gcflag_extra(): + raise missing_operation(space) + result_w = do_get_objects() + rgc.assert_no_more_gcflags() + return space.newlist(result_w) def get_referents(space, args_w): """Return a list of objects directly referred to by any of the arguments. - Approximative: follow references recursively until it finds - app-level objects. May return several times the same object, too.""" - result = [] + """ + if not rgc.has_gcflag_extra(): + raise missing_operation(space) + result_w = [] for w_obj in args_w: gcref = rgc.cast_instance_to_gcref(w_obj) - _list_w_obj_referents(gcref, result) - return space.newlist(result) + _list_w_obj_referents(gcref, result_w) + rgc.assert_no_more_gcflags() + return space.newlist(result_w) def get_referrers(space, args_w): """Return the list of objects that directly refer to any of objs.""" - roots = rgc.get_rpy_roots() - pending_w = _get_objects_from_rpy(roots) - arguments_w = {} - for w_obj in args_w: - arguments_w[w_obj] = None - # continue by following every W_Root. Same remark about hashes as - # in get_objects(). - result_w = {} - seen_w = {} - while len(pending_w) > 0: - previous_w = pending_w - pending_w = [] - for w_obj in previous_w: - if w_obj not in seen_w: - seen_w[w_obj] = None - gcref = rgc.cast_instance_to_gcref(w_obj) - referents_w = [] - _list_w_obj_referents(gcref, referents_w) - for w_subobj in referents_w: - if w_subobj in arguments_w: - result_w[w_obj] = None - pending_w += referents_w - return space.newlist(result_w.keys()) + if not rgc.has_gcflag_extra(): + raise missing_operation(space) + result_w = [] + for w_arg in args_w: + result_w += do_get_referrers(w_arg) + rgc.assert_no_more_gcflags() + return space.newlist(result_w) @unwrap_spec(fd=int) def _dump_rpy_heap(space, fd): diff --git a/pypy/module/gc/test/test_referents.py b/pypy/module/gc/test/test_referents.py --- a/pypy/module/gc/test/test_referents.py +++ b/pypy/module/gc/test/test_referents.py @@ -13,7 +13,8 @@ l4 = space.newlist([w(4)]) l2 = space.newlist([w(2)]) l7 = space.newlist([w(7)]) - cls.ALL_ROOTS = [l4, space.newlist([l2, l7]), RandomRPythonObject()] + cls.ALL_ROOTS = [l4, space.newlist([l2, l7]), RandomRPythonObject(), + space.newtuple([l7])] cls.w_ALL_ROOTS = cls.space.newlist(cls.ALL_ROOTS) rgc.get_rpy_roots = lambda: ( map(rgc._GcRef, cls.ALL_ROOTS) + [rgc.NULL_GCREF]*17) @@ -26,7 +27,7 @@ def test_get_objects(self): import gc lst = gc.get_objects() - i4, l27, ro = self.ALL_ROOTS + i4, l27, ro, rt = self.ALL_ROOTS i2, i7 = l27 found = 0 for x in lst: @@ -48,7 +49,8 @@ assert lst[0] == [4] assert lst[1] == [[2], [7]] assert type(lst[2]) is gc.GcRef - assert len(lst) == 3 + assert lst[3] == ([7],) + assert len(lst) == 4 def test_get_rpy_referents(self): import gc @@ -108,3 +110,9 @@ break # found else: assert 0, "the list [2, 7] is not found as gc.get_referrers(7)" + l7t = self.ALL_ROOTS[3] + for x in lst: + if x is l7t: + break # found + else: + assert 0, "the tuple (7,) is not found as gc.get_referrers(7)" diff --git a/pypy/rlib/rgc.py b/pypy/rlib/rgc.py --- a/pypy/rlib/rgc.py +++ b/pypy/rlib/rgc.py @@ -308,6 +308,32 @@ "NOT_RPYTHON" raise NotImplementedError +def has_gcflag_extra(): + "NOT_RPYTHON" + return True +has_gcflag_extra._subopnum = 1 + +_gcflag_extras = [] + +def get_gcflag_extra(gcref): + "NOT_RPYTHON" + assert gcref # not NULL! + return gcref in _gcflag_extras # XXX slow +get_gcflag_extra._subopnum = 2 + +def toggle_gcflag_extra(gcref): + "NOT_RPYTHON" + assert gcref # not NULL! + try: + _gcflag_extras.remove(gcref) # XXX slow + except ValueError: + _gcflag_extras.append(gcref) +toggle_gcflag_extra._subopnum = 3 + +def assert_no_more_gcflags(): + if not we_are_translated(): + assert not _gcflag_extras + ARRAY_OF_CHAR = lltype.Array(lltype.Char) NULL_GCREF = lltype.nullptr(llmemory.GCREF.TO) @@ -476,5 +502,17 @@ hop.exception_is_here() return hop.genop('gc_typeids_z', [], resulttype = hop.r_result) +class Entry(ExtRegistryEntry): + _about_ = (has_gcflag_extra, get_gcflag_extra, toggle_gcflag_extra) + def compute_result_annotation(self, s_arg=None): + from pypy.annotation.model import s_Bool + return s_Bool + def specialize_call(self, hop): + subopnum = self.instance._subopnum + vlist = [hop.inputconst(lltype.Signed, subopnum)] + vlist += hop.inputargs(*hop.args_r) + hop.exception_cannot_occur() + return hop.genop('gc_gcflag_extra', vlist, resulttype = hop.r_result) + def lltype_is_gc(TP): return getattr(getattr(TP, "TO", None), "_gckind", "?") == 'gc' diff --git a/pypy/rpython/llinterp.py b/pypy/rpython/llinterp.py --- a/pypy/rpython/llinterp.py +++ b/pypy/rpython/llinterp.py @@ -921,6 +921,9 @@ def op_gc_typeids_z(self): raise NotImplementedError("gc_typeids_z") + def op_gc_gcflag_extra(self, subopnum, *args): + return self.heap.gcflag_extra(subopnum, *args) + def op_do_malloc_fixedsize_clear(self): raise NotImplementedError("do_malloc_fixedsize_clear") diff --git a/pypy/rpython/lltypesystem/lloperation.py b/pypy/rpython/lltypesystem/lloperation.py --- a/pypy/rpython/lltypesystem/lloperation.py +++ b/pypy/rpython/lltypesystem/lloperation.py @@ -497,6 +497,7 @@ 'gc_is_rpy_instance' : LLOp(), 'gc_dump_rpy_heap' : LLOp(), 'gc_typeids_z' : LLOp(), + 'gc_gcflag_extra' : LLOp(), 'gc_add_memory_pressure': LLOp(), # ------- JIT & GC interaction, only for some GCs ---------- diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py --- a/pypy/rpython/memory/gc/minimark.py +++ b/pypy/rpython/memory/gc/minimark.py @@ -108,6 +108,9 @@ # collection. See pypy/doc/discussion/finalizer-order.txt GCFLAG_FINALIZATION_ORDERING = first_gcflag << 4 +# This flag is reserved for RPython. +GCFLAG_EXTRA = first_gcflag << 5 + # The following flag is set on externally raw_malloc'ed arrays of pointers. # They are allocated with some extra space in front of them for a bitfield, # one bit per 'card_page_indices' indices. @@ -116,7 +119,6 @@ # note that GCFLAG_CARDS_SET is the most significant bit of a byte: # this is required for the JIT (x86) -#GCFLAG_UNUSED = first_gcflag << 5 # this flag is free TID_MASK = (first_gcflag << 8) - 1 @@ -133,7 +135,7 @@ needs_write_barrier = True prebuilt_gc_objects_are_static_roots = False malloc_zero_filled = True # xxx experiment with False - gcflag_extra = GCFLAG_FINALIZATION_ORDERING + gcflag_extra = GCFLAG_EXTRA # All objects start with a HDR, i.e. with a field 'tid' which contains # a word. This word is divided in two halves: the lower half contains diff --git a/pypy/rpython/memory/gc/semispace.py b/pypy/rpython/memory/gc/semispace.py --- a/pypy/rpython/memory/gc/semispace.py +++ b/pypy/rpython/memory/gc/semispace.py @@ -33,6 +33,8 @@ # - we have our own extra field to store the hash GC_HASH_HASFIELD = _GCFLAG_HASH_BASE * 0x3 +GCFLAG_EXTRA = first_gcflag << 5 # for RPython abuse only + memoryError = MemoryError() @@ -41,8 +43,8 @@ inline_simple_malloc = True inline_simple_malloc_varsize = True malloc_zero_filled = True - first_unused_gcflag = first_gcflag << 5 - gcflag_extra = GCFLAG_FINALIZATION_ORDERING + first_unused_gcflag = first_gcflag << 6 + gcflag_extra = GCFLAG_EXTRA HDR = lltype.Struct('header', ('tid', lltype.Signed)) # XXX or rffi.INT? typeid_is_in_field = 'tid' diff --git a/pypy/rpython/memory/gcwrapper.py b/pypy/rpython/memory/gcwrapper.py --- a/pypy/rpython/memory/gcwrapper.py +++ b/pypy/rpython/memory/gcwrapper.py @@ -153,6 +153,20 @@ else: return True + def gcflag_extra(self, subopnum, *args): + if subopnum == 1: # has_gcflag_extra + assert len(args) == 0 + return self.gc.gcflag_extra != 0 + assert len(args) == 1 + addr = llmemory.cast_ptr_to_adr(args[0]) + hdr = self.gc.header(addr) + if subopnum == 3: # toggle_gcflag_extra + if hdr.tid & self.gc.gcflag_extra: + hdr.tid &= ~self.gc.gcflag_extra + else: + hdr.tid |= self.gc.gcflag_extra + return (hdr.tid & self.gc.gcflag_extra) != 0 + # ____________________________________________________________ class LLInterpRootWalker: diff --git a/pypy/rpython/memory/test/test_gc.py b/pypy/rpython/memory/test/test_gc.py --- a/pypy/rpython/memory/test/test_gc.py +++ b/pypy/rpython/memory/test/test_gc.py @@ -746,6 +746,30 @@ res = self.interpret(fn, []) assert res == ord('y') + def test_gcflag_extra(self): + class A: + pass + a1 = A() + def fn(): + a2 = A() + if not rgc.has_gcflag_extra(): + return # cannot test it then + assert rgc.get_gcflag_extra(a1) == False + assert rgc.get_gcflag_extra(a2) == False + rgc.toggle_gcflag_extra(a1) + assert rgc.get_gcflag_extra(a1) == True + assert rgc.get_gcflag_extra(a2) == False + rgc.toggle_gcflag_extra(a2) + assert rgc.get_gcflag_extra(a1) == True + assert rgc.get_gcflag_extra(a2) == True + rgc.toggle_gcflag_extra(a1) + assert rgc.get_gcflag_extra(a1) == False + assert rgc.get_gcflag_extra(a2) == True + rgc.toggle_gcflag_extra(a2) + assert rgc.get_gcflag_extra(a1) == False + assert rgc.get_gcflag_extra(a2) == False + self.interpret(fn, []) + from pypy.rlib.objectmodel import UnboxedValue class TaggedBase(object): diff --git a/pypy/translator/c/gc.py b/pypy/translator/c/gc.py --- a/pypy/translator/c/gc.py +++ b/pypy/translator/c/gc.py @@ -91,6 +91,11 @@ def OP_GC_STACK_BOTTOM(self, funcgen, op): return '' + def OP_GC_GCFLAG_EXTRA(self, funcgen, op): + return '%s = 0; /* gc_gcflag_extra%r */' % ( + funcgen.expr(op.result), + op.args[0]) + class RefcountingInfo: static_deallocator = None @@ -370,16 +375,23 @@ config = self.db.translator.config return config.translation.gcremovetypeptr + def header_type(self, extra='*'): + # Fish out the C name of the 'struct pypy_header0' + HDR = self.db.gctransformer.HDR + return self.db.gettype(HDR).replace('@', extra) + + def tid_fieldname(self, tid_field='tid'): + # Fish out the C name of the tid field. + HDR = self.db.gctransformer.HDR + hdr_node = self.db.gettypedefnode(HDR) + return hdr_node.c_struct_field_name(tid_field) + def OP_GC_GETTYPEPTR_GROUP(self, funcgen, op): # expands to a number of steps, as per rpython/lltypesystem/opimpl.py, # all implemented by a single call to a C macro. [v_obj, c_grpptr, c_skipoffset, c_vtableinfo] = op.args + tid_field = c_vtableinfo.value[2] typename = funcgen.db.gettype(op.result.concretetype) - tid_field = c_vtableinfo.value[2] - # Fish out the C name of the tid field. - HDR = self.db.gctransformer.HDR - hdr_node = self.db.gettypedefnode(HDR) - fieldname = hdr_node.c_struct_field_name(tid_field) return ( '%s = (%s)_OP_GET_NEXT_GROUP_MEMBER(%s, (pypy_halfword_t)%s->' '_gcheader.%s, %s);' @@ -387,12 +399,36 @@ cdecl(typename, ''), funcgen.expr(c_grpptr), funcgen.expr(v_obj), - fieldname, + self.tid_fieldname(tid_field), funcgen.expr(c_skipoffset))) def OP_GC_ASSUME_YOUNG_POINTERS(self, funcgen, op): raise Exception("the FramewokGCTransformer should handle this") + def OP_GC_GCFLAG_EXTRA(self, funcgen, op): + gcflag_extra = self.db.gctransformer.gcdata.gc.gcflag_extra + if gcflag_extra == 0: + return BasicGcPolicy.OP_GC_GCFLAG_EXTRA(self, funcgen, op) + subopnum = op.args[0].value + if subopnum == 1: + return '%s = 1; /* has_gcflag_extra */' % ( + funcgen.expr(op.result),) + hdrfield = '((%s)%s)->%s' % (self.header_type(), + funcgen.expr(op.args[1]), + self.tid_fieldname()) + parts = ['%s = (%s & %dL) != 0;' % (funcgen.expr(op.result), + hdrfield, + gcflag_extra)] + if subopnum == 2: # get_gcflag_extra + parts.append('/* get_gcflag_extra */') + elif subopnum == 3: # toggle_gcflag_extra + parts.insert(0, '%s ^= %dL;' % (hdrfield, + gcflag_extra)) + parts.append('/* toggle_gcflag_extra */') + else: + raise AssertionError(subopnum) + return ' '.join(parts) + class ShadowStackFrameworkGcPolicy(BasicFrameworkGcPolicy): def gettransformer(self): diff --git a/pypy/translator/c/test/test_newgc.py b/pypy/translator/c/test/test_newgc.py --- a/pypy/translator/c/test/test_newgc.py +++ b/pypy/translator/c/test/test_newgc.py @@ -1183,6 +1183,35 @@ assert data.startswith('member0') assert 'GcArray of * GcStruct S {' in data + def define_gcflag_extra(self): + class A: + pass + a1 = A() + def fn(): + a2 = A() + if not rgc.has_gcflag_extra(): + return 0 # cannot test it then + assert rgc.get_gcflag_extra(a1) == False + assert rgc.get_gcflag_extra(a2) == False + rgc.toggle_gcflag_extra(a1) + assert rgc.get_gcflag_extra(a1) == True + assert rgc.get_gcflag_extra(a2) == False + rgc.toggle_gcflag_extra(a2) + assert rgc.get_gcflag_extra(a1) == True + assert rgc.get_gcflag_extra(a2) == True + rgc.toggle_gcflag_extra(a1) + assert rgc.get_gcflag_extra(a1) == False + assert rgc.get_gcflag_extra(a2) == True + rgc.toggle_gcflag_extra(a2) + assert rgc.get_gcflag_extra(a1) == False + assert rgc.get_gcflag_extra(a2) == False + return 0 + return fn + + def test_gcflag_extra(self): + self.run("gcflag_extra") + + class TestSemiSpaceGC(TestUsingFramework, snippet.SemiSpaceGCTestDefines): gcpolicy = "semispace" should_be_moving = True _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit