Author: Armin Rigo <[email protected]>
Branch:
Changeset: r75737:2f0b1cc091f5
Date: 2015-02-06 12:50 +0100
http://bitbucket.org/pypy/pypy/changeset/2f0b1cc091f5/
Log: hg merge stackroot-speedup-2
This is a quick hack to avoid walking the complete stack in every
minor GC collection. It only works with asmgcc so far. We'll see
if it shows some speed-ups. (Adapting shadowstack for it should not
be hard.)
diff --git a/rpython/jit/backend/x86/arch.py b/rpython/jit/backend/x86/arch.py
--- a/rpython/jit/backend/x86/arch.py
+++ b/rpython/jit/backend/x86/arch.py
@@ -35,7 +35,9 @@
PASS_ON_MY_FRAME = 15
JITFRAME_FIXED_SIZE = 6 + 8 * 2 # 6 GPR + 8 XMM * 2 WORDS/float
# 'threadlocal_addr' is passed as 2nd argument on the stack,
- # and it can be left here for when it is needed
+ # and it can be left here for when it is needed. As an additional hack,
+ # with asmgcc, it is made odd-valued to mean "already seen this frame
+ # during the previous minor collection".
THREADLOCAL_OFS = (FRAME_FIXED_SIZE + 2) * WORD
else:
# rbp + rbx + r12 + r13 + r14 + r15 + threadlocal + 12 extra words = 19
@@ -43,7 +45,9 @@
PASS_ON_MY_FRAME = 12
JITFRAME_FIXED_SIZE = 28 # 13 GPR + 15 XMM
# 'threadlocal_addr' is passed as 2nd argument in %esi,
- # and is moved into this frame location
+ # and is moved into this frame location. As an additional hack,
+ # with asmgcc, it is made odd-valued to mean "already seen this frame
+ # during the previous minor collection".
THREADLOCAL_OFS = (FRAME_FIXED_SIZE - 1) * WORD
assert PASS_ON_MY_FRAME >= 12 # asmgcc needs at least JIT_USE_WORDS + 3
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -1980,6 +1980,23 @@
def _call_assembler_emit_call(self, addr, argloc, _):
threadlocal_loc = RawEspLoc(THREADLOCAL_OFS, INT)
+ if self._is_asmgcc():
+ # We need to remove the bit "already seen during the
+ # previous minor collection" instead of passing this
+ # value directly.
+ if IS_X86_64:
+ tmploc = esi # already the correct place
+ if argloc is tmploc:
+ self.mc.MOV_rr(esi.value, edi.value)
+ argloc = edi
+ else:
+ tmploc = eax
+ if tmploc is argloc:
+ tmploc = edx
+ self.mc.MOV(tmploc, threadlocal_loc)
+ self.mc.AND_ri(tmploc.value, ~1)
+ threadlocal_loc = tmploc
+ #
self.simple_call(addr, [argloc, threadlocal_loc])
def _call_assembler_emit_helper_call(self, addr, arglocs, result_loc):
@@ -2355,6 +2372,8 @@
assert self.cpu.translate_support_code
assert isinstance(resloc, RegLoc)
self.mc.MOV_rs(resloc.value, THREADLOCAL_OFS)
+ if self._is_asmgcc():
+ self.mc.AND_ri(resloc.value, ~1)
self.load_from_mem(resloc, addr_add_const(resloc, offset),
imm(size), imm(sign))
diff --git a/rpython/jit/backend/x86/callbuilder.py
b/rpython/jit/backend/x86/callbuilder.py
--- a/rpython/jit/backend/x86/callbuilder.py
+++ b/rpython/jit/backend/x86/callbuilder.py
@@ -167,6 +167,8 @@
self.tlofs_reg = r12
self.mc.MOV_rs(self.tlofs_reg.value,
THREADLOCAL_OFS - self.current_esp)
+ if self.asm._is_asmgcc():
+ self.mc.AND_ri(self.tlofs_reg.value, ~1)
return self.tlofs_reg
def save_stack_position(self):
diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -375,6 +375,10 @@
# the nursery.
self.pinned_objects_in_nursery = 0
#
+ # This flag is set if the previous minor collection found at least
+ # one pinned object alive.
+ self.any_pinned_object_kept = False
+ #
# Keeps track of old objects pointing to pinned objects. These objects
# must be traced every minor collection. Without tracing them the
# referenced pinned object wouldn't be visited and therefore collected.
@@ -1489,7 +1493,9 @@
# The following counter keeps track of alive and pinned young objects
# inside the nursery. We reset it here and increace it in
# '_trace_drag_out()'.
+ any_pinned_object_from_earlier = self.any_pinned_object_kept
self.pinned_objects_in_nursery = 0
+ self.any_pinned_object_kept = False
#
# Before everything else, remove from 'old_objects_pointing_to_young'
# the young arrays.
@@ -1513,7 +1519,7 @@
# are copied out or flagged. They are also added to the list
# 'old_objects_pointing_to_young'.
self.nursery_surviving_size = 0
- self.collect_roots_in_nursery()
+ self.collect_roots_in_nursery(any_pinned_object_from_earlier)
#
# visit all objects that are known for pointing to pinned
# objects. This way we populate 'surviving_pinned_objects'
@@ -1649,7 +1655,7 @@
def _visit_old_objects_pointing_to_pinned(self, obj, ignore):
self.trace(obj, self._trace_drag_out, obj)
- def collect_roots_in_nursery(self):
+ def collect_roots_in_nursery(self, any_pinned_object_from_earlier):
# we don't need to trace prebuilt GcStructs during a minor collect:
# if a prebuilt GcStruct contains a pointer to a young object,
# then the write_barrier must have ensured that the prebuilt
@@ -1659,10 +1665,19 @@
callback = IncrementalMiniMarkGC._trace_drag_out1_marking_phase
else:
callback = IncrementalMiniMarkGC._trace_drag_out1
+ #
+ # Note a subtlety: if the nursery contains pinned objects "from
+ # earlier", i.e. created earlier than the previous minor
+ # collection, then we can't use the "is_minor=True" optimization.
+ # We really need to walk the complete stack to be sure we still
+ # see them.
+ use_jit_frame_stoppers = not any_pinned_object_from_earlier
+ #
self.root_walker.walk_roots(
callback, # stack roots
callback, # static in prebuilt non-gc
- None) # static in prebuilt gc
+ None, # static in prebuilt gc
+ is_minor=use_jit_frame_stoppers)
debug_stop("gc-minor-walkroots")
def collect_cardrefs_to_nursery(self):
@@ -1844,6 +1859,7 @@
self.surviving_pinned_objects.append(
llarena.getfakearenaaddress(obj - size_gc_header))
self.pinned_objects_in_nursery += 1
+ self.any_pinned_object_kept = True
return
else:
# First visit to an object that has already a shadow.
diff --git a/rpython/memory/gc/minimark.py b/rpython/memory/gc/minimark.py
--- a/rpython/memory/gc/minimark.py
+++ b/rpython/memory/gc/minimark.py
@@ -1322,7 +1322,8 @@
self.root_walker.walk_roots(
MiniMarkGC._trace_drag_out1, # stack roots
MiniMarkGC._trace_drag_out1, # static in prebuilt non-gc
- None) # static in prebuilt gc
+ None, # static in prebuilt gc
+ is_minor=True)
debug_stop("gc-minor-walkroots")
def collect_cardrefs_to_nursery(self):
diff --git a/rpython/memory/gc/test/test_direct.py
b/rpython/memory/gc/test/test_direct.py
--- a/rpython/memory/gc/test/test_direct.py
+++ b/rpython/memory/gc/test/test_direct.py
@@ -34,7 +34,8 @@
def walk_roots(self, collect_stack_root,
collect_static_in_prebuilt_nongc,
- collect_static_in_prebuilt_gc):
+ collect_static_in_prebuilt_gc,
+ is_minor=False):
gc = self.tester.gc
layoutbuilder = self.tester.layoutbuilder
if collect_static_in_prebuilt_gc:
diff --git a/rpython/memory/gctransform/asmgcroot.py
b/rpython/memory/gctransform/asmgcroot.py
--- a/rpython/memory/gctransform/asmgcroot.py
+++ b/rpython/memory/gctransform/asmgcroot.py
@@ -340,9 +340,10 @@
# called first, to initialize self.belongs_to_current_thread.
assert not hasattr(self, 'gc_detach_callback_pieces_ptr')
- def walk_stack_roots(self, collect_stack_root):
+ def walk_stack_roots(self, collect_stack_root, is_minor=False):
gcdata = self.gcdata
gcdata._gc_collect_stack_root = collect_stack_root
+ gcdata._gc_collect_is_minor = is_minor
pypy_asm_stackwalk(llhelper(ASM_CALLBACK_PTR, self._asm_callback),
gcrootanchor)
@@ -468,6 +469,13 @@
if gc.points_to_valid_gc_object(addr):
collect_stack_root(gc, addr)
#
+ # small hack: the JIT reserves THREADLOCAL_OFS's last bit for
+ # us. We use it to store an "already traced past this frame"
+ # flag.
+ if self._with_jit and self.gcdata._gc_collect_is_minor:
+ if self.mark_jit_frame_can_stop(callee):
+ return False
+ #
# track where the caller_frame saved the registers from its own
# caller
#
@@ -548,6 +556,19 @@
else: # kind == LOC_EBP_MINUS: at -N(%ebp)
return ebp_in_caller - offset
+ def mark_jit_frame_can_stop(self, callee):
+ location = self._shape_decompressor.get_threadlocal_loc()
+ if location == LOC_NOWHERE:
+ return False
+ addr = self.getlocation(callee, llmemory.NULL, location)
+ #
+ x = addr.signed[0]
+ if x & 1:
+ return True # this JIT stack frame is already marked!
+ else:
+ addr.signed[0] = x | 1 # otherwise, mark it but don't stop
+ return False
+
LOC_REG = 0
LOC_ESP_PLUS = 1
@@ -729,6 +750,17 @@
llop.debug_fatalerror(lltype.Void, "asmgcroot: invalid index")
return 0 # annotator fix
+ def get_threadlocal_loc(self):
+ index = self.jit_index
+ if index < 0:
+ return LOC_NOWHERE # case "outside the jit"
+ else:
+ # case "in the jit"
+ from rpython.jit.backend.x86.arch import THREADLOCAL_OFS, WORD
+ return (LOC_ESP_PLUS |
+ ((THREADLOCAL_OFS // WORD + self.extra_stack_depth) << 2))
+
+
# ____________________________________________________________
#
diff --git a/rpython/memory/gctransform/framework.py
b/rpython/memory/gctransform/framework.py
--- a/rpython/memory/gctransform/framework.py
+++ b/rpython/memory/gctransform/framework.py
@@ -1462,7 +1462,8 @@
def walk_roots(self, collect_stack_root,
collect_static_in_prebuilt_nongc,
- collect_static_in_prebuilt_gc):
+ collect_static_in_prebuilt_gc,
+ is_minor=False):
gcdata = self.gcdata
gc = self.gc
if collect_static_in_prebuilt_nongc:
@@ -1482,7 +1483,7 @@
collect_static_in_prebuilt_gc(gc, result)
addr += sizeofaddr
if collect_stack_root:
- self.walk_stack_roots(collect_stack_root) # abstract
+ self.walk_stack_roots(collect_stack_root, is_minor) # abstract
def finished_minor_collection(self):
func = self.finished_minor_collection_func
diff --git a/rpython/memory/gctransform/shadowstack.py
b/rpython/memory/gctransform/shadowstack.py
--- a/rpython/memory/gctransform/shadowstack.py
+++ b/rpython/memory/gctransform/shadowstack.py
@@ -99,7 +99,7 @@
self.shadow_stack_pool.initial_setup()
BaseRootWalker.setup_root_walker(self)
- def walk_stack_roots(self, collect_stack_root):
+ def walk_stack_roots(self, collect_stack_root, is_minor=False):
gcdata = self.gcdata
self.rootstackhook(collect_stack_root,
gcdata.root_stack_base, gcdata.root_stack_top)
diff --git a/rpython/memory/gcwrapper.py b/rpython/memory/gcwrapper.py
--- a/rpython/memory/gcwrapper.py
+++ b/rpython/memory/gcwrapper.py
@@ -191,7 +191,8 @@
def walk_roots(self, collect_stack_root,
collect_static_in_prebuilt_nongc,
- collect_static_in_prebuilt_gc):
+ collect_static_in_prebuilt_gc,
+ is_minor=False):
gcheap = self.gcheap
gc = gcheap.gc
if collect_static_in_prebuilt_gc:
@@ -203,7 +204,7 @@
if self.gcheap.gc.points_to_valid_gc_object(addrofaddr):
collect_static_in_prebuilt_nongc(gc, addrofaddr)
if collect_stack_root:
- for addrofaddr in gcheap.llinterp.find_roots():
+ for addrofaddr in gcheap.llinterp.find_roots(is_minor):
if self.gcheap.gc.points_to_valid_gc_object(addrofaddr):
collect_stack_root(gc, addrofaddr)
diff --git a/rpython/rtyper/llinterp.py b/rpython/rtyper/llinterp.py
--- a/rpython/rtyper/llinterp.py
+++ b/rpython/rtyper/llinterp.py
@@ -146,13 +146,21 @@
}
return self._tlobj
- def find_roots(self):
+ def find_roots(self, is_minor=False):
"""Return a list of the addresses of the roots."""
#log.findroots("starting")
roots = []
- for frame in self.frame_stack:
+ for frame in reversed(self.frame_stack):
#log.findroots("graph", frame.graph.name)
frame.find_roots(roots)
+ # If a call is done with 'is_minor=True', we can stop after the
+ # first frame in the stack that was already seen by the previous
+ # call with 'is_minor=True'. (We still need to trace that frame,
+ # but not its callers.)
+ if is_minor:
+ if getattr(frame, '_find_roots_already_seen', False):
+ break
+ frame._find_roots_already_seen = True
return roots
def find_exception(self, exc):
diff --git a/rpython/translator/c/gcc/test/test_asmgcroot.py
b/rpython/translator/c/gcc/test/test_asmgcroot.py
--- a/rpython/translator/c/gcc/test/test_asmgcroot.py
+++ b/rpython/translator/c/gcc/test/test_asmgcroot.py
@@ -251,13 +251,17 @@
def define_callback_with_collect(cls):
return lambda: 0
-class TestAsmGCRootWithSemiSpaceGC_Shared(TestAsmGCRootWithSemiSpaceGC):
- @classmethod
- def make_config(cls):
- config = TestAsmGCRootWithSemiSpaceGC.make_config()
- config.translation.shared = True
- return config
+#class TestAsmGCRootWithSemiSpaceGC_Shared(TestAsmGCRootWithSemiSpaceGC):
+# @classmethod
+# def make_config(cls):
+# config = TestAsmGCRootWithSemiSpaceGC.make_config()
+# config.translation.shared = True
+# return config
class TestAsmGCRootWithHybridTagged(AbstractTestAsmGCRoot,
test_newgc.TestHybridTaggedPointers):
pass
+
+class TestAsmGCRootWithIncrementalMinimark(AbstractTestAsmGCRoot,
+ test_newgc.TestIncrementalMiniMarkGC):
+ pass
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit