Author: Maciej Fijalkowski <fij...@gmail.com> Branch: jitframe-on-heap Changeset: r61497:cd6fe2babfd9 Date: 2013-02-20 18:52 +0100 http://bitbucket.org/pypy/pypy/changeset/cd6fe2babfd9/
Log: (arigo, fijal, alex lurking) Implement asmgcc for this branch diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py --- a/rpython/jit/backend/llsupport/assembler.py +++ b/rpython/jit/backend/llsupport/assembler.py @@ -64,7 +64,7 @@ self._build_wb_slowpath(True) self._build_wb_slowpath(False, for_frame=True) # only one of those - self._build_stack_check_failure() + self.build_frame_realloc_slowpath() if self.cpu.supports_floats: self._build_failure_recovery(False, withfloats=True) self._build_failure_recovery(True, withfloats=True) diff --git a/rpython/jit/backend/llsupport/gc.py b/rpython/jit/backend/llsupport/gc.py --- a/rpython/jit/backend/llsupport/gc.py +++ b/rpython/jit/backend/llsupport/gc.py @@ -120,7 +120,7 @@ descrs = JitFrameDescrs() descrs.arraydescr = cpu.arraydescrof(jitframe.JITFRAME) for name in ['jf_descr', 'jf_guard_exc', 'jf_force_descr', - 'jf_frame_info', 'jf_gcmap']: + 'jf_frame_info', 'jf_gcmap', 'jf_extra_stack_depth']: setattr(descrs, name, cpu.fielddescrof(jitframe.JITFRAME, name)) descrs.jfi_frame_size = cpu.fielddescrof(jitframe.JITFRAMEINFO, 'jfi_frame_size') @@ -373,7 +373,6 @@ translator = self.translator self.layoutbuilder = framework.TransformerLayoutBuilder(translator) self.layoutbuilder.delay_encoding() - # XXX this can probably die horrible death translator._jit2gc = {'layoutbuilder': self.layoutbuilder} def _setup_gcclass(self): @@ -391,6 +390,8 @@ def _setup_tid(self): self.fielddescr_tid = get_field_descr(self, self.GCClass.HDR, 'tid') + frame_tid = self.layoutbuilder.get_type_id(jitframe.JITFRAME) + self.translator._jit2gc['frame_tid'] = frame_tid def _setup_write_barrier(self): self.WB_FUNCPTR = lltype.Ptr(lltype.FuncType( diff --git a/rpython/jit/backend/llsupport/jitframe.py b/rpython/jit/backend/llsupport/jitframe.py --- a/rpython/jit/backend/llsupport/jitframe.py +++ b/rpython/jit/backend/llsupport/jitframe.py @@ -55,6 +55,8 @@ ('jf_force_descr', llmemory.GCREF), # a map of GC pointers ('jf_gcmap', lltype.Ptr(GCMAP)), + # how much we decrease stack pointer. Used around calls and malloc slowpath + ('jf_extra_stack_depth', lltype.Signed), # For the front-end: a GCREF for the savedata ('jf_savedata', llmemory.GCREF), # For GUARD_(NO)_EXCEPTION and GUARD_NOT_FORCED: the exception we @@ -84,6 +86,7 @@ LENGTHOFS = llmemory.arraylengthoffset(JITFRAME.jf_frame) SIGN_SIZE = llmemory.sizeof(lltype.Signed) UNSIGN_SIZE = llmemory.sizeof(lltype.Unsigned) +STACK_DEPTH_OFS = getofs('jf_extra_stack_depth') def jitframe_trace(obj_addr, prev): if prev == llmemory.NULL: diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -131,7 +131,7 @@ self.float_const_neg_addr = float_constants self.float_const_abs_addr = float_constants + 16 - def _build_stack_check_failure(self): + def build_frame_realloc_slowpath(self): mc = codebuf.MachineCodeBlockWrapper() self._push_all_regs_to_frame(mc, [], self.cpu.supports_floats) # this is the gcmap stored by push_gcmap(mov=True) in _check_stack_frame @@ -153,12 +153,15 @@ mc.MOV_sr(0, ebp.value) # align + extra_ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth') + mc.MOV_bi(extra_ofs, align * WORD) self._store_and_reset_exception(mc, None, ebx, ecx) mc.CALL(imm(self.cpu.realloc_frame)) self._restore_exception(mc, None, ebx, ecx) mc.ADD_ri(esp.value, (align - 1) * WORD) mc.MOV_rr(ebp.value, eax.value) + mc.MOV_bi(extra_ofs, 0) gcrootmap = self.cpu.gc_ll_descr.gcrootmap @@ -169,7 +172,7 @@ mc.MOV_bi(gcmap_ofs, 0) self._pop_all_regs_from_frame(mc, [], self.cpu.supports_floats) mc.RET() - self._stack_check_failure = mc.materialize(self.cpu.asmmemmgr, []) + self._frame_realloc_slowpath = mc.materialize(self.cpu.asmmemmgr, []) def _build_malloc_slowpath(self): """ While arriving on slowpath, we have a gcpattern on stack, @@ -182,8 +185,6 @@ mc.MOV_rs(ecx.value, WORD) mc.MOV_br(ofs, ecx.value) addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr() - # XXX investigate if we need to save callee-saved registers - # on the frame mc.SUB_rr(edi.value, eax.value) # compute the size we want # the arg is already in edi mc.SUB_ri(esp.value, 16 - WORD) @@ -194,6 +195,8 @@ elif hasattr(self.cpu.gc_ll_descr, 'passes_frame'): # for tests only mc.MOV_rr(esi.value, ebp.value) + extra_ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth') + mc.MOV_bi(extra_ofs, 16) mc.CALL(imm(addr)) mc.ADD_ri(esp.value, 16 - WORD) mc.TEST_rr(eax.value, eax.value) @@ -202,6 +205,7 @@ # nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr() self._reload_frame_if_necessary(mc) + mc.MOV_bi(extra_ofs, 0) self._pop_all_regs_from_frame(mc, [eax, edi], self.cpu.supports_floats) mc.MOV(edi, heap(nursery_free_adr)) # load this in EDI # clear the gc pattern @@ -643,7 +647,7 @@ mc.MOV_si(WORD, expected_size) ofs2 = mc.get_relative_pos() - 4 self.push_gcmap(mc, gcmap, mov=True) - mc.CALL(imm(self._stack_check_failure)) + mc.CALL(imm(self._frame_realloc_slowpath)) # patch the JG above offset = mc.get_relative_pos() - jg_location assert 0 < offset <= 127 @@ -815,10 +819,6 @@ return rst def _call_header_shadowstack(self, gcrootmap): - # we don't *really* have to do it, since we have the frame - # being referenced by the caller. However, we still do it - # to provide a place where we can read the frame from, in case - # we need to reload it after a collection rst = self._load_shadowstack_top_in_ebx(self.mc, gcrootmap) self.mc.MOV_mr((ebx.value, 0), ebp.value) # MOV [ebx], ebp self.mc.ADD_ri(ebx.value, WORD) @@ -1091,6 +1091,9 @@ stack_depth = align_stack_words(stack_depth) align = (stack_depth - PASS_ON_MY_FRAME) self.mc.SUB_ri(esp.value, align * WORD) + if can_collect: + ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth') + self.mc.MOV_bi(ofs, align * WORD) else: align = 0 p = 0 @@ -1121,7 +1124,9 @@ self.mc.CALL(x) if can_collect: self._reload_frame_if_necessary(self.mc) - if can_collect: + if align: + ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth') + self.mc.MOV_bi(ofs, 0) self.pop_gcmap(self.mc) # if callconv != FFI_DEFAULT_ABI: @@ -1161,6 +1166,9 @@ if stack_depth > PASS_ON_MY_FRAME: stack_depth = align_stack_words(stack_depth) align = (stack_depth - PASS_ON_MY_FRAME) + if can_collect: + ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth') + self.mc.MOV_bi(ofs, align * WORD) self.mc.SUB_ri(esp.value, align * WORD) for i in range(start, len(arglocs)): loc = arglocs[i] @@ -1221,6 +1229,9 @@ self.mc.CALL(x) if can_collect: self._reload_frame_if_necessary(self.mc) + if align: + ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth') + self.mc.MOV_bi(ofs, 0) if align: self.mc.ADD_ri(esp.value, align * WORD) if can_collect: diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -723,7 +723,8 @@ # - at least the non-callee-saved registers # # - for shadowstack, we assume that any call can collect, and we - # save also the callee-saved registers that contain GC pointers. + # save also the callee-saved registers that contain GC pointers + # XXX for asmgcc too for now. # # - for CALL_MAY_FORCE or CALL_ASSEMBLER, we have to save all regs # anyway, in case we need to do cpu.force(). The issue is that @@ -734,7 +735,8 @@ self.xrm.before_call(force_store, save_all_regs=save_all_regs) if not save_all_regs: gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap - if gcrootmap and gcrootmap.is_shadow_stack: + # we save all the registers for shadowstack and asmgcc for now + if gcrootmap: # and gcrootmap.is_shadow_stack: save_all_regs = 2 self.rm.before_call(force_store, save_all_regs=save_all_regs) if op.result is not None: diff --git a/rpython/rtyper/memory/gctransform/asmgcroot.py b/rpython/rtyper/memory/gctransform/asmgcroot.py --- a/rpython/rtyper/memory/gctransform/asmgcroot.py +++ b/rpython/rtyper/memory/gctransform/asmgcroot.py @@ -135,15 +135,10 @@ self.walk_stack_from() self._asm_callback = _asm_callback self._shape_decompressor = ShapeDecompressor() - if hasattr(gctransformer.translator, '_jit2gc'): + self._with_jit = hasattr(gctransformer.translator, '_jit2gc') + if self._with_jit: jit2gc = gctransformer.translator._jit2gc - self._extra_gcmapstart = jit2gc['gcmapstart'] - self._extra_gcmapend = jit2gc['gcmapend'] - self._extra_mark_sorted = jit2gc['gcmarksorted'] - else: - self._extra_gcmapstart = lambda: llmemory.NULL - self._extra_gcmapend = lambda: llmemory.NULL - self._extra_mark_sorted = lambda: True + self.frame_tid = jit2gc['frame_tid'] def need_stacklet_support(self, gctransformer, getfn): # stacklet support: BIG HACK for rlib.rstacklet @@ -359,12 +354,12 @@ # try to locate the caller function based on retaddr. # set up self._shape_decompressor. # - self.locate_caller_based_on_retaddr(retaddr) + ebp_in_caller = callee.regs_stored_at[INDEX_OF_EBP].address[0] + self.locate_caller_based_on_retaddr(retaddr, ebp_in_caller) # # found! Enumerate the GC roots in the caller frame # collect_stack_root = self.gcdata._gc_collect_stack_root - ebp_in_caller = callee.regs_stored_at[INDEX_OF_EBP].address[0] gc = self.gc while True: location = self._shape_decompressor.next() @@ -391,46 +386,40 @@ # of the entry point, stop walking" return caller.frame_address != llmemory.NULL - def locate_caller_based_on_retaddr(self, retaddr): + def locate_caller_based_on_retaddr(self, retaddr, ebp_in_caller): gcmapstart = llop.gc_asmgcroot_static(llmemory.Address, 0) gcmapend = llop.gc_asmgcroot_static(llmemory.Address, 1) item = search_in_gcmap(gcmapstart, gcmapend, retaddr) if item: self._shape_decompressor.setpos(item.signed[1]) return - gcmapstart2 = self._extra_gcmapstart() - gcmapend2 = self._extra_gcmapend() - if gcmapstart2 != gcmapend2: - # we have a non-empty JIT-produced table to look in - item = search_in_gcmap2(gcmapstart2, gcmapend2, retaddr) + + if not self._shape_decompressor.sorted: + # the item may have been not found because the main array was + # not sorted. Sort it and try again. + win32_follow_gcmap_jmp(gcmapstart, gcmapend) + sort_gcmap(gcmapstart, gcmapend) + self._shape_decompressor.sorted = True + item = search_in_gcmap(gcmapstart, gcmapend, retaddr) if item: - self._shape_decompressor.setaddr(item) + self._shape_decompressor.setpos(item.signed[1]) return - # maybe the JIT-produced table is not sorted? - was_already_sorted = self._extra_mark_sorted() - if not was_already_sorted: - sort_gcmap(gcmapstart2, gcmapend2) - item = search_in_gcmap2(gcmapstart2, gcmapend2, retaddr) - if item: - self._shape_decompressor.setaddr(item) - return - # there is a rare risk that the array contains *two* entries - # with the same key, one of which is dead (null value), and we - # found the dead one above. Solve this case by replacing all - # dead keys with nulls, sorting again, and then trying again. - replace_dead_entries_with_nulls(gcmapstart2, gcmapend2) - sort_gcmap(gcmapstart2, gcmapend2) - item = search_in_gcmap2(gcmapstart2, gcmapend2, retaddr) - if item: - self._shape_decompressor.setaddr(item) - return - # the item may have been not found because the main array was - # not sorted. Sort it and try again. - win32_follow_gcmap_jmp(gcmapstart, gcmapend) - sort_gcmap(gcmapstart, gcmapend) - item = search_in_gcmap(gcmapstart, gcmapend, retaddr) - if item: - self._shape_decompressor.setpos(item.signed[1]) + + if self._with_jit: + # item not found. We assume that it's a JIT-generated + # location -- but we check for consistency that ebp points + # to a JITFRAME object. + from rpython.jit.backend.llsupport.jitframe import STACK_DEPTH_OFS + + tid = self.gc.get_type_id(ebp_in_caller) + ll_assert(rffi.cast(lltype.Signed, tid) == + rffi.cast(lltype.Signed, self.frame_tid), + "found a stack frame that does not belong " + "anywhere I know, bug in asmgcc") + # fish the depth + extra_stack_depth = (ebp_in_caller + STACK_DEPTH_OFS).signed[0] + extra_stack_depth //= rffi.sizeof(lltype.Signed) + self._shape_decompressor.setjitframe(extra_stack_depth) return llop.debug_fatalerror(lltype.Void, "cannot find gc roots!") @@ -561,27 +550,83 @@ class ShapeDecompressor: _alloc_flavor_ = "raw" + sorted = False + def setpos(self, pos): if pos < 0: pos = ~ pos # can ignore this "range" marker here gccallshapes = llop.gc_asmgcroot_static(llmemory.Address, 2) self.addr = gccallshapes + pos - def setaddr(self, addr): - self.addr = addr + def setjitframe(self, extra_stack_depth): + self.addr = llmemory.NULL + self.jit_index = 0 + self.extra_stack_depth = extra_stack_depth def next(self): - value = 0 addr = self.addr - while True: - b = ord(addr.char[0]) - addr += 1 - value += b - if b < 0x80: - break - value = (value - 0x80) << 7 - self.addr = addr - return value + if addr: + # case "outside the jit" + value = 0 + while True: + b = ord(addr.char[0]) + addr += 1 + value += b + if b < 0x80: + break + value = (value - 0x80) << 7 + self.addr = addr + return value + else: + # case "in the jit" + from rpython.jit.backend.x86.arch import FRAME_FIXED_SIZE + from rpython.jit.backend.x86.arch import PASS_ON_MY_FRAME + index = self.jit_index + self.jit_index = index + 1 + if index == 0: + # the jitframe is an object in EBP + return LOC_REG | ((INDEX_OF_EBP + 1) << 2) + if index == 1: + return 0 + # the remaining returned values should be: + # saved %rbp + # saved %r15 or on 32bit: + # saved %r14 saved %ebp + # saved %r13 saved %edi + # saved %r12 saved %esi + # saved %rbx saved %ebx + # return addr return addr + if IS_64_BITS: + stack_depth = PASS_ON_MY_FRAME + self.extra_stack_depth + if index == 2: # rbp + return LOC_ESP_PLUS | (stack_depth << 2) + if index == 3: # r15 + return LOC_ESP_PLUS | ((stack_depth + 5) << 2) + if index == 4: # r14 + return LOC_ESP_PLUS | ((stack_depth + 4) << 2) + if index == 5: # r13 + return LOC_ESP_PLUS | ((stack_depth + 3) << 2) + if index == 6: # r12 + return LOC_ESP_PLUS | ((stack_depth + 2) << 2) + if index == 7: # rbx + return LOC_ESP_PLUS | ((stack_depth + 1) << 2) + if index == 8: # return addr + return (LOC_ESP_PLUS | + ((FRAME_FIXED_SIZE + self.extra_stack_depth) << 2)) + else: + if index == 2: # ebp + return LOC_ESP_PLUS | (stack_depth << 2) + if index == 3: # edi + return LOC_ESP_PLUS | ((stack_depth + 3) << 2) + if index == 4: # esi + return LOC_ESP_PLUS | ((stack_depth + 2) << 2) + if index == 5: # ebx + return LOC_ESP_PLUS | ((stack_depth + 1) << 2) + if index == 6: # return addr + return (LOC_ESP_PLUS | + ((FRAME_FIXED_SIZE + self.extra_stack_depth) << 2)) + llop.debug_fatalerror(lltype.Void, "asmgcroot: invalid index") + return 0 # annotator fix # ____________________________________________________________ _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit