Author: Armin Rigo <[email protected]>
Branch: 
Changeset: r75737:2f0b1cc091f5
Date: 2015-02-06 12:50 +0100
http://bitbucket.org/pypy/pypy/changeset/2f0b1cc091f5/

Log:    hg merge stackroot-speedup-2

        This is a quick hack to avoid walking the complete stack in every
        minor GC collection. It only works with asmgcc so far. We'll see
        if it shows some speed-ups. (Adapting shadowstack for it should not
        be hard.)

diff --git a/rpython/jit/backend/x86/arch.py b/rpython/jit/backend/x86/arch.py
--- a/rpython/jit/backend/x86/arch.py
+++ b/rpython/jit/backend/x86/arch.py
@@ -35,7 +35,9 @@
     PASS_ON_MY_FRAME = 15
     JITFRAME_FIXED_SIZE = 6 + 8 * 2 # 6 GPR + 8 XMM * 2 WORDS/float
     # 'threadlocal_addr' is passed as 2nd argument on the stack,
-    # and it can be left here for when it is needed
+    # and it can be left here for when it is needed.  As an additional hack,
+    # with asmgcc, it is made odd-valued to mean "already seen this frame
+    # during the previous minor collection".
     THREADLOCAL_OFS = (FRAME_FIXED_SIZE + 2) * WORD
 else:
     # rbp + rbx + r12 + r13 + r14 + r15 + threadlocal + 12 extra words = 19
@@ -43,7 +45,9 @@
     PASS_ON_MY_FRAME = 12
     JITFRAME_FIXED_SIZE = 28 # 13 GPR + 15 XMM
     # 'threadlocal_addr' is passed as 2nd argument in %esi,
-    # and is moved into this frame location
+    # and is moved into this frame location.  As an additional hack,
+    # with asmgcc, it is made odd-valued to mean "already seen this frame
+    # during the previous minor collection".
     THREADLOCAL_OFS = (FRAME_FIXED_SIZE - 1) * WORD
 
 assert PASS_ON_MY_FRAME >= 12       # asmgcc needs at least JIT_USE_WORDS + 3
diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -1980,6 +1980,23 @@
 
     def _call_assembler_emit_call(self, addr, argloc, _):
         threadlocal_loc = RawEspLoc(THREADLOCAL_OFS, INT)
+        if self._is_asmgcc():
+            # We need to remove the bit "already seen during the
+            # previous minor collection" instead of passing this
+            # value directly.
+            if IS_X86_64:
+                tmploc = esi    # already the correct place
+                if argloc is tmploc:
+                    self.mc.MOV_rr(esi.value, edi.value)
+                    argloc = edi
+            else:
+                tmploc = eax
+                if tmploc is argloc:
+                    tmploc = edx
+            self.mc.MOV(tmploc, threadlocal_loc)
+            self.mc.AND_ri(tmploc.value, ~1)
+            threadlocal_loc = tmploc
+        #
         self.simple_call(addr, [argloc, threadlocal_loc])
 
     def _call_assembler_emit_helper_call(self, addr, arglocs, result_loc):
@@ -2355,6 +2372,8 @@
         assert self.cpu.translate_support_code
         assert isinstance(resloc, RegLoc)
         self.mc.MOV_rs(resloc.value, THREADLOCAL_OFS)
+        if self._is_asmgcc():
+            self.mc.AND_ri(resloc.value, ~1)
         self.load_from_mem(resloc, addr_add_const(resloc, offset),
                            imm(size), imm(sign))
 
diff --git a/rpython/jit/backend/x86/callbuilder.py 
b/rpython/jit/backend/x86/callbuilder.py
--- a/rpython/jit/backend/x86/callbuilder.py
+++ b/rpython/jit/backend/x86/callbuilder.py
@@ -167,6 +167,8 @@
                 self.tlofs_reg = r12
             self.mc.MOV_rs(self.tlofs_reg.value,
                            THREADLOCAL_OFS - self.current_esp)
+            if self.asm._is_asmgcc():
+                self.mc.AND_ri(self.tlofs_reg.value, ~1)
         return self.tlofs_reg
 
     def save_stack_position(self):
diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -375,6 +375,10 @@
         # the nursery.
         self.pinned_objects_in_nursery = 0
         #
+        # This flag is set if the previous minor collection found at least
+        # one pinned object alive.
+        self.any_pinned_object_kept = False
+        #
         # Keeps track of old objects pointing to pinned objects. These objects
         # must be traced every minor collection. Without tracing them the
         # referenced pinned object wouldn't be visited and therefore collected.
@@ -1489,7 +1493,9 @@
         # The following counter keeps track of alive and pinned young objects
         # inside the nursery. We reset it here and increace it in
         # '_trace_drag_out()'.
+        any_pinned_object_from_earlier = self.any_pinned_object_kept
         self.pinned_objects_in_nursery = 0
+        self.any_pinned_object_kept = False
         #
         # Before everything else, remove from 'old_objects_pointing_to_young'
         # the young arrays.
@@ -1513,7 +1519,7 @@
         # are copied out or flagged.  They are also added to the list
         # 'old_objects_pointing_to_young'.
         self.nursery_surviving_size = 0
-        self.collect_roots_in_nursery()
+        self.collect_roots_in_nursery(any_pinned_object_from_earlier)
         #
         # visit all objects that are known for pointing to pinned
         # objects. This way we populate 'surviving_pinned_objects'
@@ -1649,7 +1655,7 @@
     def _visit_old_objects_pointing_to_pinned(self, obj, ignore):
         self.trace(obj, self._trace_drag_out, obj)
 
-    def collect_roots_in_nursery(self):
+    def collect_roots_in_nursery(self, any_pinned_object_from_earlier):
         # we don't need to trace prebuilt GcStructs during a minor collect:
         # if a prebuilt GcStruct contains a pointer to a young object,
         # then the write_barrier must have ensured that the prebuilt
@@ -1659,10 +1665,19 @@
             callback = IncrementalMiniMarkGC._trace_drag_out1_marking_phase
         else:
             callback = IncrementalMiniMarkGC._trace_drag_out1
+        #
+        # Note a subtlety: if the nursery contains pinned objects "from
+        # earlier", i.e. created earlier than the previous minor
+        # collection, then we can't use the "is_minor=True" optimization.
+        # We really need to walk the complete stack to be sure we still
+        # see them.
+        use_jit_frame_stoppers = not any_pinned_object_from_earlier
+        #
         self.root_walker.walk_roots(
             callback,     # stack roots
             callback,     # static in prebuilt non-gc
-            None)         # static in prebuilt gc
+            None,         # static in prebuilt gc
+            is_minor=use_jit_frame_stoppers)
         debug_stop("gc-minor-walkroots")
 
     def collect_cardrefs_to_nursery(self):
@@ -1844,6 +1859,7 @@
             self.surviving_pinned_objects.append(
                 llarena.getfakearenaaddress(obj - size_gc_header))
             self.pinned_objects_in_nursery += 1
+            self.any_pinned_object_kept = True
             return
         else:
             # First visit to an object that has already a shadow.
diff --git a/rpython/memory/gc/minimark.py b/rpython/memory/gc/minimark.py
--- a/rpython/memory/gc/minimark.py
+++ b/rpython/memory/gc/minimark.py
@@ -1322,7 +1322,8 @@
         self.root_walker.walk_roots(
             MiniMarkGC._trace_drag_out1,  # stack roots
             MiniMarkGC._trace_drag_out1,  # static in prebuilt non-gc
-            None)                         # static in prebuilt gc
+            None,                         # static in prebuilt gc
+            is_minor=True)
         debug_stop("gc-minor-walkroots")
 
     def collect_cardrefs_to_nursery(self):
diff --git a/rpython/memory/gc/test/test_direct.py 
b/rpython/memory/gc/test/test_direct.py
--- a/rpython/memory/gc/test/test_direct.py
+++ b/rpython/memory/gc/test/test_direct.py
@@ -34,7 +34,8 @@
 
     def walk_roots(self, collect_stack_root,
                    collect_static_in_prebuilt_nongc,
-                   collect_static_in_prebuilt_gc):
+                   collect_static_in_prebuilt_gc,
+                   is_minor=False):
         gc = self.tester.gc
         layoutbuilder = self.tester.layoutbuilder
         if collect_static_in_prebuilt_gc:
diff --git a/rpython/memory/gctransform/asmgcroot.py 
b/rpython/memory/gctransform/asmgcroot.py
--- a/rpython/memory/gctransform/asmgcroot.py
+++ b/rpython/memory/gctransform/asmgcroot.py
@@ -340,9 +340,10 @@
         # called first, to initialize self.belongs_to_current_thread.
         assert not hasattr(self, 'gc_detach_callback_pieces_ptr')
 
-    def walk_stack_roots(self, collect_stack_root):
+    def walk_stack_roots(self, collect_stack_root, is_minor=False):
         gcdata = self.gcdata
         gcdata._gc_collect_stack_root = collect_stack_root
+        gcdata._gc_collect_is_minor = is_minor
         pypy_asm_stackwalk(llhelper(ASM_CALLBACK_PTR, self._asm_callback),
                            gcrootanchor)
 
@@ -468,6 +469,13 @@
             if gc.points_to_valid_gc_object(addr):
                 collect_stack_root(gc, addr)
         #
+        # small hack: the JIT reserves THREADLOCAL_OFS's last bit for
+        # us.  We use it to store an "already traced past this frame"
+        # flag.
+        if self._with_jit and self.gcdata._gc_collect_is_minor:
+            if self.mark_jit_frame_can_stop(callee):
+                return False
+        #
         # track where the caller_frame saved the registers from its own
         # caller
         #
@@ -548,6 +556,19 @@
         else:  # kind == LOC_EBP_MINUS:   at -N(%ebp)
             return ebp_in_caller - offset
 
+    def mark_jit_frame_can_stop(self, callee):
+        location = self._shape_decompressor.get_threadlocal_loc()
+        if location == LOC_NOWHERE:
+            return False
+        addr = self.getlocation(callee, llmemory.NULL, location)
+        #
+        x = addr.signed[0]
+        if x & 1:
+            return True            # this JIT stack frame is already marked!
+        else:
+            addr.signed[0] = x | 1    # otherwise, mark it but don't stop
+            return False
+
 
 LOC_REG       = 0
 LOC_ESP_PLUS  = 1
@@ -729,6 +750,17 @@
             llop.debug_fatalerror(lltype.Void, "asmgcroot: invalid index")
             return 0   # annotator fix
 
+    def get_threadlocal_loc(self):
+        index = self.jit_index
+        if index < 0:
+            return LOC_NOWHERE     # case "outside the jit"
+        else:
+            # case "in the jit"
+            from rpython.jit.backend.x86.arch import THREADLOCAL_OFS, WORD
+            return (LOC_ESP_PLUS |
+                    ((THREADLOCAL_OFS // WORD + self.extra_stack_depth) << 2))
+
+
 # ____________________________________________________________
 
 #
diff --git a/rpython/memory/gctransform/framework.py 
b/rpython/memory/gctransform/framework.py
--- a/rpython/memory/gctransform/framework.py
+++ b/rpython/memory/gctransform/framework.py
@@ -1462,7 +1462,8 @@
 
     def walk_roots(self, collect_stack_root,
                    collect_static_in_prebuilt_nongc,
-                   collect_static_in_prebuilt_gc):
+                   collect_static_in_prebuilt_gc,
+                   is_minor=False):
         gcdata = self.gcdata
         gc = self.gc
         if collect_static_in_prebuilt_nongc:
@@ -1482,7 +1483,7 @@
                     collect_static_in_prebuilt_gc(gc, result)
                 addr += sizeofaddr
         if collect_stack_root:
-            self.walk_stack_roots(collect_stack_root)     # abstract
+            self.walk_stack_roots(collect_stack_root, is_minor)     # abstract
 
     def finished_minor_collection(self):
         func = self.finished_minor_collection_func
diff --git a/rpython/memory/gctransform/shadowstack.py 
b/rpython/memory/gctransform/shadowstack.py
--- a/rpython/memory/gctransform/shadowstack.py
+++ b/rpython/memory/gctransform/shadowstack.py
@@ -99,7 +99,7 @@
         self.shadow_stack_pool.initial_setup()
         BaseRootWalker.setup_root_walker(self)
 
-    def walk_stack_roots(self, collect_stack_root):
+    def walk_stack_roots(self, collect_stack_root, is_minor=False):
         gcdata = self.gcdata
         self.rootstackhook(collect_stack_root,
                            gcdata.root_stack_base, gcdata.root_stack_top)
diff --git a/rpython/memory/gcwrapper.py b/rpython/memory/gcwrapper.py
--- a/rpython/memory/gcwrapper.py
+++ b/rpython/memory/gcwrapper.py
@@ -191,7 +191,8 @@
 
     def walk_roots(self, collect_stack_root,
                    collect_static_in_prebuilt_nongc,
-                   collect_static_in_prebuilt_gc):
+                   collect_static_in_prebuilt_gc,
+                   is_minor=False):
         gcheap = self.gcheap
         gc = gcheap.gc
         if collect_static_in_prebuilt_gc:
@@ -203,7 +204,7 @@
                 if self.gcheap.gc.points_to_valid_gc_object(addrofaddr):
                     collect_static_in_prebuilt_nongc(gc, addrofaddr)
         if collect_stack_root:
-            for addrofaddr in gcheap.llinterp.find_roots():
+            for addrofaddr in gcheap.llinterp.find_roots(is_minor):
                 if self.gcheap.gc.points_to_valid_gc_object(addrofaddr):
                     collect_stack_root(gc, addrofaddr)
 
diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py
--- a/rpython/rtyper/llinterp.py
+++ b/rpython/rtyper/llinterp.py
@@ -146,13 +146,21 @@
                            }
             return self._tlobj
 
-    def find_roots(self):
+    def find_roots(self, is_minor=False):
         """Return a list of the addresses of the roots."""
         #log.findroots("starting")
         roots = []
-        for frame in self.frame_stack:
+        for frame in reversed(self.frame_stack):
             #log.findroots("graph", frame.graph.name)
             frame.find_roots(roots)
+            # If a call is done with 'is_minor=True', we can stop after the
+            # first frame in the stack that was already seen by the previous
+            # call with 'is_minor=True'.  (We still need to trace that frame,
+            # but not its callers.)
+            if is_minor:
+                if getattr(frame, '_find_roots_already_seen', False):
+                    break
+                frame._find_roots_already_seen = True
         return roots
 
     def find_exception(self, exc):
diff --git a/rpython/translator/c/gcc/test/test_asmgcroot.py 
b/rpython/translator/c/gcc/test/test_asmgcroot.py
--- a/rpython/translator/c/gcc/test/test_asmgcroot.py
+++ b/rpython/translator/c/gcc/test/test_asmgcroot.py
@@ -251,13 +251,17 @@
     def define_callback_with_collect(cls):
         return lambda: 0
 
-class TestAsmGCRootWithSemiSpaceGC_Shared(TestAsmGCRootWithSemiSpaceGC):
-    @classmethod
-    def make_config(cls):
-        config = TestAsmGCRootWithSemiSpaceGC.make_config()
-        config.translation.shared = True
-        return config
+#class TestAsmGCRootWithSemiSpaceGC_Shared(TestAsmGCRootWithSemiSpaceGC):
+#    @classmethod
+#    def make_config(cls):
+#        config = TestAsmGCRootWithSemiSpaceGC.make_config()
+#        config.translation.shared = True
+#        return config
 
 class TestAsmGCRootWithHybridTagged(AbstractTestAsmGCRoot,
                                     test_newgc.TestHybridTaggedPointers):
     pass
+
+class TestAsmGCRootWithIncrementalMinimark(AbstractTestAsmGCRoot,
+                                    test_newgc.TestIncrementalMiniMarkGC):
+    pass
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to