Author: Armin Rigo <ar...@tunes.org> Branch: stmgc-c7-rewindjmp Changeset: r72868:8ff5b23d8b84 Date: 2014-08-18 10:41 +0200 http://bitbucket.org/pypy/pypy/changeset/8ff5b23d8b84/
Log: in-progress diff --git a/rpython/jit/backend/x86/arch.py b/rpython/jit/backend/x86/arch.py --- a/rpython/jit/backend/x86/arch.py +++ b/rpython/jit/backend/x86/arch.py @@ -16,7 +16,7 @@ # +--------------------+ <== aligned to 16 bytes # | return address | # +--------------------+ ------------------------. -# | resume buf (if STM)| STM_FRAME_FIXED_SIZE | +# | rewind_jmp_buf(STM)| STM_FRAME_FIXED_SIZE | # +--------------------+ ----------------------. | # | saved regs | FRAME_FIXED_SIZE | | # +--------------------+ --------------------. | | @@ -46,18 +46,9 @@ assert PASS_ON_MY_FRAME >= 12 # asmgcc needs at least JIT_USE_WORDS + 3 -# The STM resume buffer (on x86-64) is four words wide. Actually, clang -# uses three words (see test_stm.py): rbp, rip, rsp. But the value of -# rbp is not interesting for the JIT-generated machine code. So the -# STM_JMPBUF_OFS is the offset from the stack top to the start of the -# buffer, with only words at offset +1 and +2 in this buffer being -# meaningful. We use ebp, i.e. the word at offset +0, to store the -# resume counter. - -STM_RESUME_BUF_WORDS = 4 -STM_FRAME_FIXED_SIZE = FRAME_FIXED_SIZE + STM_RESUME_BUF_WORDS -STM_JMPBUF_OFS = WORD * FRAME_FIXED_SIZE -STM_JMPBUF_OFS_RBP = STM_JMPBUF_OFS + 0 * WORD -STM_JMPBUF_OFS_RIP = STM_JMPBUF_OFS + 1 * WORD -STM_JMPBUF_OFS_RSP = STM_JMPBUF_OFS + 2 * WORD -STM_OLD_SHADOWSTACK = STM_JMPBUF_OFS + 3 * WORD +# The STM rewind_jmp_buf (on x86-64) is two words wide: +STM_REWIND_JMP_BUF_WORDS = 2 +STM_FRAME_FIXED_SIZE = FRAME_FIXED_SIZE + STM_REWIND_JMP_BUF_WORDS +STM_JMPBUF_OFS = WORD * FRAME_FIXED_SIZE +STM_SHADOWSTACK_BASE_OFS = STM_JMPBUF_OFS + 0 * WORD +STM_PREV_OFS = STM_JMPBUF_OFS + 1 * WORD diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -19,8 +19,7 @@ from rpython.jit.backend.x86.arch import ( FRAME_FIXED_SIZE, WORD, IS_X86_64, JITFRAME_FIXED_SIZE, IS_X86_32, PASS_ON_MY_FRAME, STM_FRAME_FIXED_SIZE, STM_JMPBUF_OFS, - STM_JMPBUF_OFS_RIP, STM_JMPBUF_OFS_RSP, STM_JMPBUF_OFS_RBP, - STM_OLD_SHADOWSTACK) + STM_SHADOWSTACK_BASE_OFS, STM_PREV_OFS) from rpython.jit.backend.x86.regloc import (eax, ecx, edx, ebx, esp, ebp, esi, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, r8, r9, r10, r11, edi, r12, r13, r14, r15, X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG, @@ -886,45 +885,83 @@ gcrootmap = self.cpu.gc_ll_descr.gcrootmap return self.heap_tl(gcrootmap.get_root_stack_top_addr()) + def heap_rjthread(self): + """STM: Return an AddressLoc for '&stm_thread_local.rjthread'.""" + return self.heap_tl(rstm.adr_rjthread) + + def heap_rjthread_head(self): + """STM: Return an AddressLoc for '&stm_thread_local.rjthread.head'.""" + return self.heap_tl(rstm.adr_rjthread_head) + + def heap_rjthread_moved_off_base(self): + """STM: AddressLoc for '&stm_thread_local.rjthread.moved_off_base'.""" + return self.heap_tl(rstm.adr_rjthread_moved_off_base) + def _call_header_shadowstack(self): # put the frame in ebp on the shadowstack for the GC to find # (ebp is a writeable object and does not need a write-barrier # again (ensured by the code calling the loop)) - self.mc.MOV(ebx, self.heap_shadowstack_top()) + mc = self.mc + mc.MOV(ebx, self.heap_shadowstack_top()) + mc.MOV_mr((self.SEGMENT_NO, ebx.value, 0), ebp.value) + # MOV [ebx], ebp if self.cpu.gc_ll_descr.stm: - self.mc.MOV_mi((self.SEGMENT_NO, ebx.value, 0), - rstm.stm_stack_marker_new) # MOV [ebx], MARKER_NEW - self.mc.MOV_mr((self.SEGMENT_NO, ebx.value, WORD), - ebp.value) # MOV [ebx+WORD], ebp - self.mc.MOV_sr(STM_OLD_SHADOWSTACK, ebx.value) - # MOV [esp+xx], ebx - self.mc.ADD_ri(ebx.value, 2 * WORD) + # inlining stm_rewind_jmp_enterframe() + r11v = X86_64_SCRATCH_REG.value + rjh = self.heap_rjthread_head() + mc.ADD_ri8(ebx.value, 1) # ADD ebx, 1 + mc.MOV_rm(r11v, rjh) # MOV r11, [rjthread.head] + mc.MOV_sr(STM_SHADOWSTACK_BASE_OFS, ebx.value) + # MOV [esp+ssbase], ebx + mc.ADD_ri8(ebx.value, WORD-1) # ADD ebx, 7 + mc.MOV_sr(STM_PREV_OFS, r11v) # MOV [esp+prev], r11 + mc.MOV(self.heap_shadowstack_top(), ebx) # MOV [rootstacktop], ebx + mc.LEA_rs(r11v, STM_JMPBUF_OFS) # LEA r11, [esp+bufofs] + mc.MOV_mr(rjh, r11v) # MOV [rjthread.head], r11 + # else: - self.mc.MOV_mr((self.SEGMENT_NO, ebx.value, 0), - ebp.value) # MOV [ebx], ebp - self.mc.ADD_ri(ebx.value, WORD) - self.mc.MOV(self.heap_shadowstack_top(), ebx) # MOV [rootstacktop], ebx + mc.ADD_ri(ebx.value, WORD) # ADD ebx, WORD + mc.MOV(self.heap_shadowstack_top(), ebx) # MOV [rootstacktop], ebx def _call_footer_shadowstack(self): + mc = self.mc if self.cpu.gc_ll_descr.stm: # STM: in the rare case where we need realloc_frame, the new # frame is pushed on top of the old one. It's even possible # that this occurs more than once. So we have to restore # the old shadowstack by looking up its original saved value. - self.mc.MOV_rs(ecx.value, STM_OLD_SHADOWSTACK) - self.mc.MOV(self.heap_shadowstack_top(), ecx) + # The rest of this is inlining stm_rewind_jmp_leaveframe(). + r11v = X86_64_SCRATCH_REG.value + rjh = self.heap_rjthread_head() + rjmovd_o_b = self.heap_rjthread_moved_off_base() + adr_rjthread_moved_off_base + mc.MOV_rs(r11v, STM_SHADOWSTACK_BASE_OFS) # MOV r11, [esp+ssbase] + mc.MOV_rs(ebx.value, STM_PREV_OFS) # MOV ebx, [esp+prev] + mc.MOV(self.heap_shadowstack_top(), r11v) # MOV [rootstacktop], r11 + mc.LEA_rs(r11v, STM_JMPBUF_OFS) # LEA r11, [esp+bufofs] + mc.MOV_mr(rjh, ebx.value) # MOV [rjthread.head], ebx + mc.CMP_rm(r11v, rjmovd_o_b) # CMP r11, [rjth.movd_o_b] + mc.J_il8(rx86.Conditions['NE'], 0) # JNE label_below + jne_location = mc.get_relative_pos() + # + mc.CALL(imm(rstm.adr_pypy__rewind_jmp_copy_stack_slice)) + # + # patch the JNE above + offset = mc.get_relative_pos() - jne_location + assert 0 < offset <= 127 + mc.overwrite(jne_location-1, chr(offset)) else: # SUB [rootstacktop], WORD gcrootmap = self.cpu.gc_ll_descr.gcrootmap rst = gcrootmap.get_root_stack_top_addr() if rx86.fits_in_32bits(rst): # SUB [rootstacktop], WORD - self.mc.SUB_ji8((self.SEGMENT_NO, rst), WORD) + mc.SUB_ji8((self.SEGMENT_NO, rst), WORD) else: # MOV ebx, rootstacktop # SUB [ebx], WORD - self.mc.MOV_ri(ebx.value, rst) - self.mc.SUB_mi8((self.SEGMENT_NO, ebx.value, 0), WORD) + mc.MOV_ri(ebx.value, rst) + mc.SUB_mi8((self.SEGMENT_NO, ebx.value, 0), WORD) def redirect_call_assembler(self, oldlooptoken, newlooptoken): # some minimal sanity checking diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -1292,9 +1292,6 @@ need_lower_byte=True) self.perform(op, [], resloc) - def XXXconsider_stm_transaction_break(self, op, guard_op): - self.perform_with_guard(op, guard_op, [], None) - def consider_jump(self, op): assembler = self.assembler assert self.jump_target_descr is None diff --git a/rpython/rlib/rstm.py b/rpython/rlib/rstm.py --- a/rpython/rlib/rstm.py +++ b/rpython/rlib/rstm.py @@ -13,12 +13,16 @@ TID = rffi.UINT tid_offset = CFlexSymbolic('offsetof(struct rpyobj_s, tid)') stm_nb_segments = CFlexSymbolic('STM_NB_SEGMENTS') -stm_stack_marker_new = CFlexSymbolic('STM_STACK_MARKER_NEW') -stm_stack_marker_old = CFlexSymbolic('STM_STACK_MARKER_OLD') adr_nursery_free = CFlexSymbolic('((long)&STM_SEGMENT->nursery_current)') adr_nursery_top = CFlexSymbolic('((long)&STM_SEGMENT->nursery_end)') adr_pypy_stm_nursery_low_fill_mark = ( CFlexSymbolic('((long)&pypy_stm_nursery_low_fill_mark)')) +adr_rjthread = ( + CFlexSymbolic('((long)&stm_thread_local.rjthread')) +adr_rjthread_head = ( + CFlexSymbolic('((long)&stm_thread_local.rjthread.head')) +adr_rjthread_moved_off_base = ( + CFlexSymbolic('((long)&stm_thread_local.rjthread.moved_off_base')) adr_transaction_read_version = ( CFlexSymbolic('((long)&STM_SEGMENT->transaction_read_version)')) adr_jmpbuf_ptr = ( @@ -39,6 +43,8 @@ CFlexSymbolic('((long)&stm_commit_transaction)')) adr_pypy_stm_start_transaction = ( CFlexSymbolic('((long)&pypy_stm_start_transaction)')) +adr_pypy__rewind_jmp_copy_stack_slice = ( + CFlexSymbolic('((long)&pypy__rewind_jmp_copy_stack_slice)')) def rewind_jmp_frame(): diff --git a/rpython/translator/stm/src_stm/stmgcintf.h b/rpython/translator/stm/src_stm/stmgcintf.h --- a/rpython/translator/stm/src_stm/stmgcintf.h +++ b/rpython/translator/stm/src_stm/stmgcintf.h @@ -116,5 +116,10 @@ /* NB. this logic is hard-coded in jit/backend/x86/assembler.py too */ } +static void pypy__rewind_jmp_copy_stack_slice(void) +{ + _rewind_jmp_copy_stack_slice(&stm_thread_local.rjthread); +} + #endif /* _RPY_STMGCINTF_H */ _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit