Author: Maciej Fijalkowski <fij...@gmail.com>
Branch: 
Changeset: r58786:0acb47dfb2f4
Date: 2012-11-07 18:45 +0100
http://bitbucket.org/pypy/pypy/changeset/0acb47dfb2f4/

Log:    merge

diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -5,6 +5,8 @@
 .. this is the revision of the last merge from default to release-1.9.x
 .. startrev: 8d567513d04d
 
+Fixed the performance of gc.get_referrers()
+
 .. branch: default
 .. branch: app_main-refactor
 .. branch: win-ordinal
diff --git a/pypy/module/gc/referents.py b/pypy/module/gc/referents.py
--- a/pypy/module/gc/referents.py
+++ b/pypy/module/gc/referents.py
@@ -45,6 +45,113 @@
     return OperationError(space.w_NotImplementedError,
                           space.wrap("operation not implemented by this GC"))
 
+# ____________________________________________________________
+
+def clear_gcflag_extra(fromlist):
+    pending = fromlist[:]
+    while pending:
+        gcref = pending.pop()
+        if rgc.get_gcflag_extra(gcref):
+            rgc.toggle_gcflag_extra(gcref)
+            pending.extend(rgc.get_rpy_referents(gcref))
+
+def do_get_objects():
+    roots = [gcref for gcref in rgc.get_rpy_roots() if gcref]
+    pending = roots[:]
+    result_w = []
+    while pending:
+        gcref = pending.pop()
+        if not rgc.get_gcflag_extra(gcref):
+            rgc.toggle_gcflag_extra(gcref)
+            w_obj = try_cast_gcref_to_w_root(gcref)
+            if w_obj is not None:
+                result_w.append(w_obj)
+            pending.extend(rgc.get_rpy_referents(gcref))
+    clear_gcflag_extra(roots)
+    return result_w
+
+# ____________________________________________________________
+
+class PathEntry(object):
+    # PathEntries are nodes of a complete tree of all objects, but
+    # built lazily (there is only one branch alive at any time).
+    # Each node has a 'gcref' and the list of referents from this gcref.
+    def __init__(self, prev, gcref, referents):
+        self.prev = prev
+        self.gcref = gcref
+        self.referents = referents
+        self.remaining = len(referents)
+
+    def get_most_recent_w_obj(self):
+        entry = self
+        while entry is not None:
+            if entry.gcref:
+                w_obj = try_cast_gcref_to_w_root(entry.gcref)
+                if w_obj is not None:
+                    return w_obj
+            entry = entry.prev
+        return None
+
+def do_get_referrers(w_arg):
+    result_w = []
+    gcarg = rgc.cast_instance_to_gcref(w_arg)
+    roots = [gcref for gcref in rgc.get_rpy_roots() if gcref]
+    head = PathEntry(None, rgc.NULL_GCREF, roots)
+    while True:
+        head.remaining -= 1
+        if head.remaining >= 0:
+            gcref = head.referents[head.remaining]
+            if not rgc.get_gcflag_extra(gcref):
+                # not visited so far
+                if gcref == gcarg:
+                    w_obj = head.get_most_recent_w_obj()
+                    if w_obj is not None:
+                        result_w.append(w_obj)   # found!
+                        rgc.toggle_gcflag_extra(gcref)  # toggle twice
+                rgc.toggle_gcflag_extra(gcref)
+                head = PathEntry(head, gcref, rgc.get_rpy_referents(gcref))
+        else:
+            # no more referents to visit
+            head = head.prev
+            if head is None:
+                break
+    # done.  Clear flags carefully
+    rgc.toggle_gcflag_extra(gcarg)
+    clear_gcflag_extra(roots)
+    clear_gcflag_extra([gcarg])
+    return result_w
+
+# ____________________________________________________________
+
+def _list_w_obj_referents(gcref, result_w):
+    # Get all W_Root reachable directly from gcref, and add them to
+    # the list 'result_w'.
+    pending = []    # = list of all objects whose gcflag was toggled
+    i = 0
+    gcrefparent = gcref
+    while True:
+        for gcref in rgc.get_rpy_referents(gcrefparent):
+            if rgc.get_gcflag_extra(gcref):
+                continue
+            rgc.toggle_gcflag_extra(gcref)
+            pending.append(gcref)
+
+        while i < len(pending):
+            gcrefparent = pending[i]
+            i += 1
+            w_obj = try_cast_gcref_to_w_root(gcrefparent)
+            if w_obj is not None:
+                result_w.append(w_obj)
+            else:
+                break   # jump back to the start of the outermost loop
+        else:
+            break   # done
+
+    for gcref in pending:
+        rgc.toggle_gcflag_extra(gcref)    # reset the gcflag_extra's
+
+# ____________________________________________________________
+
 def get_rpy_roots(space):
     lst = rgc.get_rpy_roots()
     if lst is None:
@@ -79,93 +186,35 @@
         raise missing_operation(space)
     return space.wrap(index)
 
-def _list_w_obj_referents(gcref, result_w):
-    # Get all W_Root reachable directly from gcref, and add them to
-    # the list 'result_w'.  The logic here is not robust against gc
-    # moves, and may return the same object several times.
-    seen = {}     # map {current_addr: obj}
-    pending = [gcref]
-    i = 0
-    while i < len(pending):
-        gcrefparent = pending[i]
-        i += 1
-        for gcref in rgc.get_rpy_referents(gcrefparent):
-            key = rgc.cast_gcref_to_int(gcref)
-            if gcref == seen.get(key, rgc.NULL_GCREF):
-                continue     # already in 'seen'
-            seen[key] = gcref
-            w_obj = try_cast_gcref_to_w_root(gcref)
-            if w_obj is not None:
-                result_w.append(w_obj)
-            else:
-                pending.append(gcref)
-
-def _get_objects_from_rpy(list_of_gcrefs):
-    # given a list of gcrefs that may or may not be W_Roots, build a list
-    # of W_Roots obtained by following references from there.
-    result_w = []   # <- list of W_Roots
-    for gcref in list_of_gcrefs:
-        if gcref:
-            w_obj = try_cast_gcref_to_w_root(gcref)
-            if w_obj is not None:
-                result_w.append(w_obj)
-            else:
-                _list_w_obj_referents(gcref, result_w)
-    return result_w
-
 def get_objects(space):
     """Return a list of all app-level objects."""
-    roots = rgc.get_rpy_roots()
-    pending_w = _get_objects_from_rpy(roots)
-    # continue by following every W_Root.  Note that this will force a hash
-    # on every W_Root, which is kind of bad, but not on every RPython object,
-    # which is really good.
-    result_w = {}
-    while len(pending_w) > 0:
-        previous_w = pending_w
-        pending_w = []
-        for w_obj in previous_w:
-            if w_obj not in result_w:
-                result_w[w_obj] = None
-                gcref = rgc.cast_instance_to_gcref(w_obj)
-                _list_w_obj_referents(gcref, pending_w)
-    return space.newlist(result_w.keys())
+    if not rgc.has_gcflag_extra():
+        raise missing_operation(space)
+    result_w = do_get_objects()
+    rgc.assert_no_more_gcflags()
+    return space.newlist(result_w)
 
 def get_referents(space, args_w):
     """Return a list of objects directly referred to by any of the arguments.
-    Approximative: follow references recursively until it finds
-    app-level objects.  May return several times the same object, too."""
-    result = []
+    """
+    if not rgc.has_gcflag_extra():
+        raise missing_operation(space)
+    result_w = []
     for w_obj in args_w:
         gcref = rgc.cast_instance_to_gcref(w_obj)
-        _list_w_obj_referents(gcref, result)
-    return space.newlist(result)
+        _list_w_obj_referents(gcref, result_w)
+    rgc.assert_no_more_gcflags()
+    return space.newlist(result_w)
 
 def get_referrers(space, args_w):
     """Return the list of objects that directly refer to any of objs."""
-    roots = rgc.get_rpy_roots()
-    pending_w = _get_objects_from_rpy(roots)
-    arguments_w = {}
-    for w_obj in args_w:
-        arguments_w[w_obj] = None
-    # continue by following every W_Root.  Same remark about hashes as
-    # in get_objects().
-    result_w = {}
-    seen_w = {}
-    while len(pending_w) > 0:
-        previous_w = pending_w
-        pending_w = []
-        for w_obj in previous_w:
-            if w_obj not in seen_w:
-                seen_w[w_obj] = None
-                gcref = rgc.cast_instance_to_gcref(w_obj)
-                referents_w = []
-                _list_w_obj_referents(gcref, referents_w)
-                for w_subobj in referents_w:
-                    if w_subobj in arguments_w:
-                        result_w[w_obj] = None
-                pending_w += referents_w
-    return space.newlist(result_w.keys())
+    if not rgc.has_gcflag_extra():
+        raise missing_operation(space)
+    result_w = []
+    for w_arg in args_w:
+        result_w += do_get_referrers(w_arg)
+    rgc.assert_no_more_gcflags()
+    return space.newlist(result_w)
 
 @unwrap_spec(fd=int)
 def _dump_rpy_heap(space, fd):
diff --git a/pypy/module/gc/test/test_referents.py 
b/pypy/module/gc/test/test_referents.py
--- a/pypy/module/gc/test/test_referents.py
+++ b/pypy/module/gc/test/test_referents.py
@@ -13,7 +13,8 @@
         l4 = space.newlist([w(4)])
         l2 = space.newlist([w(2)])
         l7 = space.newlist([w(7)])
-        cls.ALL_ROOTS = [l4, space.newlist([l2, l7]), RandomRPythonObject()]
+        cls.ALL_ROOTS = [l4, space.newlist([l2, l7]), RandomRPythonObject(),
+                         space.newtuple([l7])]
         cls.w_ALL_ROOTS = cls.space.newlist(cls.ALL_ROOTS)
         rgc.get_rpy_roots = lambda: (
             map(rgc._GcRef, cls.ALL_ROOTS) + [rgc.NULL_GCREF]*17)
@@ -26,7 +27,7 @@
     def test_get_objects(self):
         import gc
         lst = gc.get_objects()
-        i4, l27, ro = self.ALL_ROOTS
+        i4, l27, ro, rt = self.ALL_ROOTS
         i2, i7 = l27
         found = 0
         for x in lst:
@@ -48,7 +49,8 @@
             assert lst[0] == [4]
             assert lst[1] == [[2], [7]]
             assert type(lst[2]) is gc.GcRef
-            assert len(lst) == 3
+            assert lst[3] == ([7],)
+            assert len(lst) == 4
 
     def test_get_rpy_referents(self):
         import gc
@@ -108,3 +110,9 @@
                 break   # found
         else:
             assert 0, "the list [2, 7] is not found as gc.get_referrers(7)"
+        l7t = self.ALL_ROOTS[3]
+        for x in lst:
+            if x is l7t:
+                break   # found
+        else:
+            assert 0, "the tuple (7,) is not found as gc.get_referrers(7)"
diff --git a/pypy/rlib/rgc.py b/pypy/rlib/rgc.py
--- a/pypy/rlib/rgc.py
+++ b/pypy/rlib/rgc.py
@@ -308,6 +308,32 @@
     "NOT_RPYTHON"
     raise NotImplementedError
 
+def has_gcflag_extra():
+    "NOT_RPYTHON"
+    return True
+has_gcflag_extra._subopnum = 1
+
+_gcflag_extras = []
+
+def get_gcflag_extra(gcref):
+    "NOT_RPYTHON"
+    assert gcref   # not NULL!
+    return gcref in _gcflag_extras    # XXX slow
+get_gcflag_extra._subopnum = 2
+
+def toggle_gcflag_extra(gcref):
+    "NOT_RPYTHON"
+    assert gcref   # not NULL!
+    try:
+        _gcflag_extras.remove(gcref)  # XXX slow
+    except ValueError:
+        _gcflag_extras.append(gcref)
+toggle_gcflag_extra._subopnum = 3
+
+def assert_no_more_gcflags():
+    if not we_are_translated():
+        assert not _gcflag_extras
+
 ARRAY_OF_CHAR = lltype.Array(lltype.Char)
 NULL_GCREF = lltype.nullptr(llmemory.GCREF.TO)
 
@@ -476,5 +502,17 @@
         hop.exception_is_here()
         return hop.genop('gc_typeids_z', [], resulttype = hop.r_result)
 
+class Entry(ExtRegistryEntry):
+    _about_ = (has_gcflag_extra, get_gcflag_extra, toggle_gcflag_extra)
+    def compute_result_annotation(self, s_arg=None):
+        from pypy.annotation.model import s_Bool
+        return s_Bool
+    def specialize_call(self, hop):
+        subopnum = self.instance._subopnum
+        vlist = [hop.inputconst(lltype.Signed, subopnum)]
+        vlist += hop.inputargs(*hop.args_r)
+        hop.exception_cannot_occur()
+        return hop.genop('gc_gcflag_extra', vlist, resulttype = hop.r_result)
+
 def lltype_is_gc(TP):
     return getattr(getattr(TP, "TO", None), "_gckind", "?") == 'gc'
diff --git a/pypy/rpython/llinterp.py b/pypy/rpython/llinterp.py
--- a/pypy/rpython/llinterp.py
+++ b/pypy/rpython/llinterp.py
@@ -921,6 +921,9 @@
     def op_gc_typeids_z(self):
         raise NotImplementedError("gc_typeids_z")
 
+    def op_gc_gcflag_extra(self, subopnum, *args):
+        return self.heap.gcflag_extra(subopnum, *args)
+
     def op_do_malloc_fixedsize_clear(self):
         raise NotImplementedError("do_malloc_fixedsize_clear")
 
diff --git a/pypy/rpython/lltypesystem/lloperation.py 
b/pypy/rpython/lltypesystem/lloperation.py
--- a/pypy/rpython/lltypesystem/lloperation.py
+++ b/pypy/rpython/lltypesystem/lloperation.py
@@ -497,6 +497,7 @@
     'gc_is_rpy_instance'  : LLOp(),
     'gc_dump_rpy_heap'    : LLOp(),
     'gc_typeids_z'        : LLOp(),
+    'gc_gcflag_extra'     : LLOp(),
     'gc_add_memory_pressure': LLOp(),
 
     # ------- JIT & GC interaction, only for some GCs ----------
diff --git a/pypy/rpython/memory/gc/minimark.py 
b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -108,6 +108,9 @@
 # collection.  See pypy/doc/discussion/finalizer-order.txt
 GCFLAG_FINALIZATION_ORDERING = first_gcflag << 4
 
+# This flag is reserved for RPython.
+GCFLAG_EXTRA        = first_gcflag << 5
+
 # The following flag is set on externally raw_malloc'ed arrays of pointers.
 # They are allocated with some extra space in front of them for a bitfield,
 # one bit per 'card_page_indices' indices.
@@ -116,7 +119,6 @@
 # note that GCFLAG_CARDS_SET is the most significant bit of a byte:
 # this is required for the JIT (x86)
 
-#GCFLAG_UNUSED      = first_gcflag << 5     # this flag is free
 TID_MASK            = (first_gcflag << 8) - 1
 
 
@@ -133,7 +135,7 @@
     needs_write_barrier = True
     prebuilt_gc_objects_are_static_roots = False
     malloc_zero_filled = True    # xxx experiment with False
-    gcflag_extra = GCFLAG_FINALIZATION_ORDERING
+    gcflag_extra = GCFLAG_EXTRA
 
     # All objects start with a HDR, i.e. with a field 'tid' which contains
     # a word.  This word is divided in two halves: the lower half contains
diff --git a/pypy/rpython/memory/gc/semispace.py 
b/pypy/rpython/memory/gc/semispace.py
--- a/pypy/rpython/memory/gc/semispace.py
+++ b/pypy/rpython/memory/gc/semispace.py
@@ -33,6 +33,8 @@
 #   - we have our own extra field to store the hash
 GC_HASH_HASFIELD   = _GCFLAG_HASH_BASE * 0x3
 
+GCFLAG_EXTRA = first_gcflag << 5    # for RPython abuse only
+
 memoryError = MemoryError()
 
 
@@ -41,8 +43,8 @@
     inline_simple_malloc = True
     inline_simple_malloc_varsize = True
     malloc_zero_filled = True
-    first_unused_gcflag = first_gcflag << 5
-    gcflag_extra = GCFLAG_FINALIZATION_ORDERING
+    first_unused_gcflag = first_gcflag << 6
+    gcflag_extra = GCFLAG_EXTRA
 
     HDR = lltype.Struct('header', ('tid', lltype.Signed))   # XXX or rffi.INT?
     typeid_is_in_field = 'tid'
diff --git a/pypy/rpython/memory/gcwrapper.py b/pypy/rpython/memory/gcwrapper.py
--- a/pypy/rpython/memory/gcwrapper.py
+++ b/pypy/rpython/memory/gcwrapper.py
@@ -153,6 +153,20 @@
         else:
             return True
 
+    def gcflag_extra(self, subopnum, *args):
+        if subopnum == 1:      # has_gcflag_extra
+            assert len(args) == 0
+            return self.gc.gcflag_extra != 0
+        assert len(args) == 1
+        addr = llmemory.cast_ptr_to_adr(args[0])
+        hdr = self.gc.header(addr)
+        if subopnum == 3:      # toggle_gcflag_extra
+            if hdr.tid & self.gc.gcflag_extra:
+                hdr.tid &= ~self.gc.gcflag_extra
+            else:
+                hdr.tid |= self.gc.gcflag_extra
+        return (hdr.tid & self.gc.gcflag_extra) != 0
+
 # ____________________________________________________________
 
 class LLInterpRootWalker:
diff --git a/pypy/rpython/memory/test/test_gc.py 
b/pypy/rpython/memory/test/test_gc.py
--- a/pypy/rpython/memory/test/test_gc.py
+++ b/pypy/rpython/memory/test/test_gc.py
@@ -746,6 +746,30 @@
         res = self.interpret(fn, [])
         assert res == ord('y')
 
+    def test_gcflag_extra(self):
+        class A:
+            pass
+        a1 = A()
+        def fn():
+            a2 = A()
+            if not rgc.has_gcflag_extra():
+                return     # cannot test it then
+            assert rgc.get_gcflag_extra(a1) == False
+            assert rgc.get_gcflag_extra(a2) == False
+            rgc.toggle_gcflag_extra(a1)
+            assert rgc.get_gcflag_extra(a1) == True
+            assert rgc.get_gcflag_extra(a2) == False
+            rgc.toggle_gcflag_extra(a2)
+            assert rgc.get_gcflag_extra(a1) == True
+            assert rgc.get_gcflag_extra(a2) == True
+            rgc.toggle_gcflag_extra(a1)
+            assert rgc.get_gcflag_extra(a1) == False
+            assert rgc.get_gcflag_extra(a2) == True
+            rgc.toggle_gcflag_extra(a2)
+            assert rgc.get_gcflag_extra(a1) == False
+            assert rgc.get_gcflag_extra(a2) == False
+        self.interpret(fn, [])
+
 from pypy.rlib.objectmodel import UnboxedValue
 
 class TaggedBase(object):
diff --git a/pypy/translator/c/gc.py b/pypy/translator/c/gc.py
--- a/pypy/translator/c/gc.py
+++ b/pypy/translator/c/gc.py
@@ -91,6 +91,11 @@
     def OP_GC_STACK_BOTTOM(self, funcgen, op):
         return ''
 
+    def OP_GC_GCFLAG_EXTRA(self, funcgen, op):
+        return '%s = 0;  /* gc_gcflag_extra%r */' % (
+            funcgen.expr(op.result),
+            op.args[0])
+
 
 class RefcountingInfo:
     static_deallocator = None
@@ -370,16 +375,23 @@
         config = self.db.translator.config
         return config.translation.gcremovetypeptr
 
+    def header_type(self, extra='*'):
+        # Fish out the C name of the 'struct pypy_header0'
+        HDR = self.db.gctransformer.HDR
+        return self.db.gettype(HDR).replace('@', extra)
+
+    def tid_fieldname(self, tid_field='tid'):
+        # Fish out the C name of the tid field.
+        HDR = self.db.gctransformer.HDR
+        hdr_node = self.db.gettypedefnode(HDR)
+        return hdr_node.c_struct_field_name(tid_field)
+
     def OP_GC_GETTYPEPTR_GROUP(self, funcgen, op):
         # expands to a number of steps, as per rpython/lltypesystem/opimpl.py,
         # all implemented by a single call to a C macro.
         [v_obj, c_grpptr, c_skipoffset, c_vtableinfo] = op.args
+        tid_field = c_vtableinfo.value[2]
         typename = funcgen.db.gettype(op.result.concretetype)
-        tid_field = c_vtableinfo.value[2]
-        # Fish out the C name of the tid field.
-        HDR = self.db.gctransformer.HDR
-        hdr_node = self.db.gettypedefnode(HDR)
-        fieldname = hdr_node.c_struct_field_name(tid_field)
         return (
         '%s = (%s)_OP_GET_NEXT_GROUP_MEMBER(%s, (pypy_halfword_t)%s->'
             '_gcheader.%s, %s);'
@@ -387,12 +399,36 @@
                cdecl(typename, ''),
                funcgen.expr(c_grpptr),
                funcgen.expr(v_obj),
-               fieldname,
+               self.tid_fieldname(tid_field),
                funcgen.expr(c_skipoffset)))
 
     def OP_GC_ASSUME_YOUNG_POINTERS(self, funcgen, op):
         raise Exception("the FramewokGCTransformer should handle this")
 
+    def OP_GC_GCFLAG_EXTRA(self, funcgen, op):
+        gcflag_extra = self.db.gctransformer.gcdata.gc.gcflag_extra
+        if gcflag_extra == 0:
+            return BasicGcPolicy.OP_GC_GCFLAG_EXTRA(self, funcgen, op)
+        subopnum = op.args[0].value
+        if subopnum == 1:
+            return '%s = 1;  /* has_gcflag_extra */' % (
+                funcgen.expr(op.result),)
+        hdrfield = '((%s)%s)->%s' % (self.header_type(),
+                                     funcgen.expr(op.args[1]),
+                                     self.tid_fieldname())
+        parts = ['%s = (%s & %dL) != 0;' % (funcgen.expr(op.result),
+                                            hdrfield,
+                                            gcflag_extra)]
+        if subopnum == 2:     # get_gcflag_extra
+            parts.append('/* get_gcflag_extra */')
+        elif subopnum == 3:     # toggle_gcflag_extra
+            parts.insert(0, '%s ^= %dL;' % (hdrfield,
+                                            gcflag_extra))
+            parts.append('/* toggle_gcflag_extra */')
+        else:
+            raise AssertionError(subopnum)
+        return ' '.join(parts)
+
 class ShadowStackFrameworkGcPolicy(BasicFrameworkGcPolicy):
 
     def gettransformer(self):
diff --git a/pypy/translator/c/test/test_newgc.py 
b/pypy/translator/c/test/test_newgc.py
--- a/pypy/translator/c/test/test_newgc.py
+++ b/pypy/translator/c/test/test_newgc.py
@@ -1183,6 +1183,35 @@
         assert data.startswith('member0')
         assert 'GcArray of * GcStruct S {' in data
 
+    def define_gcflag_extra(self):
+        class A:
+            pass
+        a1 = A()
+        def fn():
+            a2 = A()
+            if not rgc.has_gcflag_extra():
+                return 0     # cannot test it then
+            assert rgc.get_gcflag_extra(a1) == False
+            assert rgc.get_gcflag_extra(a2) == False
+            rgc.toggle_gcflag_extra(a1)
+            assert rgc.get_gcflag_extra(a1) == True
+            assert rgc.get_gcflag_extra(a2) == False
+            rgc.toggle_gcflag_extra(a2)
+            assert rgc.get_gcflag_extra(a1) == True
+            assert rgc.get_gcflag_extra(a2) == True
+            rgc.toggle_gcflag_extra(a1)
+            assert rgc.get_gcflag_extra(a1) == False
+            assert rgc.get_gcflag_extra(a2) == True
+            rgc.toggle_gcflag_extra(a2)
+            assert rgc.get_gcflag_extra(a1) == False
+            assert rgc.get_gcflag_extra(a2) == False
+            return 0
+        return fn
+
+    def test_gcflag_extra(self):
+        self.run("gcflag_extra")
+
+
 class TestSemiSpaceGC(TestUsingFramework, snippet.SemiSpaceGCTestDefines):
     gcpolicy = "semispace"
     should_be_moving = True
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to