Author: Armin Rigo <ar...@tunes.org> Branch: stackroot-speedup-2 Changeset: r75710:1c9c0cc23c80 Date: 2015-02-04 19:01 +0100 http://bitbucket.org/pypy/pypy/changeset/1c9c0cc23c80/
Log: Untested code diff --git a/rpython/jit/backend/x86/arch.py b/rpython/jit/backend/x86/arch.py --- a/rpython/jit/backend/x86/arch.py +++ b/rpython/jit/backend/x86/arch.py @@ -35,7 +35,9 @@ PASS_ON_MY_FRAME = 15 JITFRAME_FIXED_SIZE = 6 + 8 * 2 # 6 GPR + 8 XMM * 2 WORDS/float # 'threadlocal_addr' is passed as 2nd argument on the stack, - # and it can be left here for when it is needed + # and it can be left here for when it is needed. As an additional hack, + # with asmgcc, it is made odd-valued to mean "already seen this frame + # during the previous minor collection". THREADLOCAL_OFS = (FRAME_FIXED_SIZE + 2) * WORD else: # rbp + rbx + r12 + r13 + r14 + r15 + threadlocal + 12 extra words = 19 @@ -43,7 +45,9 @@ PASS_ON_MY_FRAME = 12 JITFRAME_FIXED_SIZE = 28 # 13 GPR + 15 XMM # 'threadlocal_addr' is passed as 2nd argument in %esi, - # and is moved into this frame location + # and is moved into this frame location. As an additional hack, + # with asmgcc, it is made odd-valued to mean "already seen this frame + # during the previous minor collection". THREADLOCAL_OFS = (FRAME_FIXED_SIZE - 1) * WORD assert PASS_ON_MY_FRAME >= 12 # asmgcc needs at least JIT_USE_WORDS + 3 diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -1980,6 +1980,23 @@ def _call_assembler_emit_call(self, addr, argloc, _): threadlocal_loc = RawEspLoc(THREADLOCAL_OFS, INT) + if self._is_asmgcc(): + # We need to remove the bit "already seen during the + # previous minor collection" instead of passing this + # value directly. + if IS_X86_64: + tmploc = esi # already the correct place + if argloc is tmploc: + self.mc.MOV_rr(esi.value, edi.value) + argloc = edi + else: + tmploc = eax + if tmploc is argloc: + tmploc = edx + self.mc.MOV(tmploc, threadlocal_ofs) + self.mc.AND_ri(tmploc.value, ~1) + threadlocal_ofs = tmploc + # self.simple_call(addr, [argloc, threadlocal_loc]) def _call_assembler_emit_helper_call(self, addr, arglocs, result_loc): @@ -2355,6 +2372,8 @@ assert self.cpu.translate_support_code assert isinstance(resloc, RegLoc) self.mc.MOV_rs(resloc.value, THREADLOCAL_OFS) + if self._is_asmgcc(): + self.mc.AND_ri(resloc.value, ~1) self.load_from_mem(resloc, addr_add_const(resloc, offset), imm(size), imm(sign)) diff --git a/rpython/jit/backend/x86/callbuilder.py b/rpython/jit/backend/x86/callbuilder.py --- a/rpython/jit/backend/x86/callbuilder.py +++ b/rpython/jit/backend/x86/callbuilder.py @@ -167,6 +167,8 @@ self.tlofs_reg = r12 self.mc.MOV_rs(self.tlofs_reg.value, THREADLOCAL_OFS - self.current_esp) + if self.asm._is_asmgcc(): + self.mc.AND_ri(self.tlofs_reg.value, ~1) return self.tlofs_reg def save_stack_position(self): diff --git a/rpython/memory/gctransform/asmgcroot.py b/rpython/memory/gctransform/asmgcroot.py --- a/rpython/memory/gctransform/asmgcroot.py +++ b/rpython/memory/gctransform/asmgcroot.py @@ -343,6 +343,7 @@ def walk_stack_roots(self, collect_stack_root, is_minor=False): gcdata = self.gcdata gcdata._gc_collect_stack_root = collect_stack_root + gcdata._gc_collect_is_minor = is_minor pypy_asm_stackwalk(llhelper(ASM_CALLBACK_PTR, self._asm_callback), gcrootanchor) @@ -477,6 +478,14 @@ addr = self.getlocation(callee, ebp_in_caller, location) caller.regs_stored_at[reg] = addr reg -= 1 + # + # small hack: the JIT reserves THREADLOCAL_OFS's last bit for + # us. We use it to store an "already traced past this frame" + # flag. + if self._with_jit: + is_minor = self.gcdata._gc_collect_is_minor + if self.mark_jit_frame_can_stop(callee, is_minor): + return False location = self._shape_decompressor.next() caller.frame_address = self.getlocation(callee, ebp_in_caller, @@ -548,6 +557,23 @@ else: # kind == LOC_EBP_MINUS: at -N(%ebp) return ebp_in_caller - offset + def mark_jit_frame_can_stop(self, callee, is_minor): + location = self._shape_decompressor.get_threadlocal_loc() + if location == LOC_NOWHERE: + return False + addr = self.getlocation(callee, llmemory.NULL, location) + # + x = addr.signed[0] + if is_minor: + if x & 1: + return True # this JIT stack frame is already marked! + else: + addr.signed[0] = x | 1 # otherwise, mark it but don't stop + return False + else: + addr.signed[0] = x & ~1 # 'is_minor' is False, remove the marks + return False + LOC_REG = 0 LOC_ESP_PLUS = 1 @@ -729,6 +755,19 @@ llop.debug_fatalerror(lltype.Void, "asmgcroot: invalid index") return 0 # annotator fix + def get_threadlocal_loc(self): + index = self.jit_index + if index < 0: + return LOC_NOWHERE # case "outside the jit" + else: + # case "in the jit" + from rpython.jit.backend.x86.arch import THREADLOCAL_OFS + from rpython.jit.backend.x86.arch import PASS_ON_MY_FRAME + stack_depth = PASS_ON_MY_FRAME + self.extra_stack_depth + return (LOC_ESP_PLUS | + ((THREADLOCAL_OFS // WORD + self.extra_stack_depth) << 2)) + + # ____________________________________________________________ # _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit