Author: Maciej Fijalkowski <[email protected]>
Branch: jitframe-on-heap
Changeset: r61497:cd6fe2babfd9
Date: 2013-02-20 18:52 +0100
http://bitbucket.org/pypy/pypy/changeset/cd6fe2babfd9/
Log: (arigo, fijal, alex lurking) Implement asmgcc for this branch
diff --git a/rpython/jit/backend/llsupport/assembler.py
b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -64,7 +64,7 @@
self._build_wb_slowpath(True)
self._build_wb_slowpath(False, for_frame=True)
# only one of those
- self._build_stack_check_failure()
+ self.build_frame_realloc_slowpath()
if self.cpu.supports_floats:
self._build_failure_recovery(False, withfloats=True)
self._build_failure_recovery(True, withfloats=True)
diff --git a/rpython/jit/backend/llsupport/gc.py
b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -120,7 +120,7 @@
descrs = JitFrameDescrs()
descrs.arraydescr = cpu.arraydescrof(jitframe.JITFRAME)
for name in ['jf_descr', 'jf_guard_exc', 'jf_force_descr',
- 'jf_frame_info', 'jf_gcmap']:
+ 'jf_frame_info', 'jf_gcmap', 'jf_extra_stack_depth']:
setattr(descrs, name, cpu.fielddescrof(jitframe.JITFRAME, name))
descrs.jfi_frame_size = cpu.fielddescrof(jitframe.JITFRAMEINFO,
'jfi_frame_size')
@@ -373,7 +373,6 @@
translator = self.translator
self.layoutbuilder = framework.TransformerLayoutBuilder(translator)
self.layoutbuilder.delay_encoding()
- # XXX this can probably die horrible death
translator._jit2gc = {'layoutbuilder': self.layoutbuilder}
def _setup_gcclass(self):
@@ -391,6 +390,8 @@
def _setup_tid(self):
self.fielddescr_tid = get_field_descr(self, self.GCClass.HDR, 'tid')
+ frame_tid = self.layoutbuilder.get_type_id(jitframe.JITFRAME)
+ self.translator._jit2gc['frame_tid'] = frame_tid
def _setup_write_barrier(self):
self.WB_FUNCPTR = lltype.Ptr(lltype.FuncType(
diff --git a/rpython/jit/backend/llsupport/jitframe.py
b/rpython/jit/backend/llsupport/jitframe.py
--- a/rpython/jit/backend/llsupport/jitframe.py
+++ b/rpython/jit/backend/llsupport/jitframe.py
@@ -55,6 +55,8 @@
('jf_force_descr', llmemory.GCREF),
# a map of GC pointers
('jf_gcmap', lltype.Ptr(GCMAP)),
+ # how much we decrease stack pointer. Used around calls and malloc slowpath
+ ('jf_extra_stack_depth', lltype.Signed),
# For the front-end: a GCREF for the savedata
('jf_savedata', llmemory.GCREF),
# For GUARD_(NO)_EXCEPTION and GUARD_NOT_FORCED: the exception we
@@ -84,6 +86,7 @@
LENGTHOFS = llmemory.arraylengthoffset(JITFRAME.jf_frame)
SIGN_SIZE = llmemory.sizeof(lltype.Signed)
UNSIGN_SIZE = llmemory.sizeof(lltype.Unsigned)
+STACK_DEPTH_OFS = getofs('jf_extra_stack_depth')
def jitframe_trace(obj_addr, prev):
if prev == llmemory.NULL:
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -131,7 +131,7 @@
self.float_const_neg_addr = float_constants
self.float_const_abs_addr = float_constants + 16
- def _build_stack_check_failure(self):
+ def build_frame_realloc_slowpath(self):
mc = codebuf.MachineCodeBlockWrapper()
self._push_all_regs_to_frame(mc, [], self.cpu.supports_floats)
# this is the gcmap stored by push_gcmap(mov=True) in
_check_stack_frame
@@ -153,12 +153,15 @@
mc.MOV_sr(0, ebp.value)
# align
+ extra_ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
+ mc.MOV_bi(extra_ofs, align * WORD)
self._store_and_reset_exception(mc, None, ebx, ecx)
mc.CALL(imm(self.cpu.realloc_frame))
self._restore_exception(mc, None, ebx, ecx)
mc.ADD_ri(esp.value, (align - 1) * WORD)
mc.MOV_rr(ebp.value, eax.value)
+ mc.MOV_bi(extra_ofs, 0)
gcrootmap = self.cpu.gc_ll_descr.gcrootmap
@@ -169,7 +172,7 @@
mc.MOV_bi(gcmap_ofs, 0)
self._pop_all_regs_from_frame(mc, [], self.cpu.supports_floats)
mc.RET()
- self._stack_check_failure = mc.materialize(self.cpu.asmmemmgr, [])
+ self._frame_realloc_slowpath = mc.materialize(self.cpu.asmmemmgr, [])
def _build_malloc_slowpath(self):
""" While arriving on slowpath, we have a gcpattern on stack,
@@ -182,8 +185,6 @@
mc.MOV_rs(ecx.value, WORD)
mc.MOV_br(ofs, ecx.value)
addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
- # XXX investigate if we need to save callee-saved registers
- # on the frame
mc.SUB_rr(edi.value, eax.value) # compute the size we want
# the arg is already in edi
mc.SUB_ri(esp.value, 16 - WORD)
@@ -194,6 +195,8 @@
elif hasattr(self.cpu.gc_ll_descr, 'passes_frame'):
# for tests only
mc.MOV_rr(esi.value, ebp.value)
+ extra_ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
+ mc.MOV_bi(extra_ofs, 16)
mc.CALL(imm(addr))
mc.ADD_ri(esp.value, 16 - WORD)
mc.TEST_rr(eax.value, eax.value)
@@ -202,6 +205,7 @@
#
nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
self._reload_frame_if_necessary(mc)
+ mc.MOV_bi(extra_ofs, 0)
self._pop_all_regs_from_frame(mc, [eax, edi], self.cpu.supports_floats)
mc.MOV(edi, heap(nursery_free_adr)) # load this in EDI
# clear the gc pattern
@@ -643,7 +647,7 @@
mc.MOV_si(WORD, expected_size)
ofs2 = mc.get_relative_pos() - 4
self.push_gcmap(mc, gcmap, mov=True)
- mc.CALL(imm(self._stack_check_failure))
+ mc.CALL(imm(self._frame_realloc_slowpath))
# patch the JG above
offset = mc.get_relative_pos() - jg_location
assert 0 < offset <= 127
@@ -815,10 +819,6 @@
return rst
def _call_header_shadowstack(self, gcrootmap):
- # we don't *really* have to do it, since we have the frame
- # being referenced by the caller. However, we still do it
- # to provide a place where we can read the frame from, in case
- # we need to reload it after a collection
rst = self._load_shadowstack_top_in_ebx(self.mc, gcrootmap)
self.mc.MOV_mr((ebx.value, 0), ebp.value) # MOV [ebx], ebp
self.mc.ADD_ri(ebx.value, WORD)
@@ -1091,6 +1091,9 @@
stack_depth = align_stack_words(stack_depth)
align = (stack_depth - PASS_ON_MY_FRAME)
self.mc.SUB_ri(esp.value, align * WORD)
+ if can_collect:
+ ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
+ self.mc.MOV_bi(ofs, align * WORD)
else:
align = 0
p = 0
@@ -1121,7 +1124,9 @@
self.mc.CALL(x)
if can_collect:
self._reload_frame_if_necessary(self.mc)
- if can_collect:
+ if align:
+ ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
+ self.mc.MOV_bi(ofs, 0)
self.pop_gcmap(self.mc)
#
if callconv != FFI_DEFAULT_ABI:
@@ -1161,6 +1166,9 @@
if stack_depth > PASS_ON_MY_FRAME:
stack_depth = align_stack_words(stack_depth)
align = (stack_depth - PASS_ON_MY_FRAME)
+ if can_collect:
+ ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
+ self.mc.MOV_bi(ofs, align * WORD)
self.mc.SUB_ri(esp.value, align * WORD)
for i in range(start, len(arglocs)):
loc = arglocs[i]
@@ -1221,6 +1229,9 @@
self.mc.CALL(x)
if can_collect:
self._reload_frame_if_necessary(self.mc)
+ if align:
+ ofs = self.cpu.get_ofs_of_frame_field('jf_extra_stack_depth')
+ self.mc.MOV_bi(ofs, 0)
if align:
self.mc.ADD_ri(esp.value, align * WORD)
if can_collect:
diff --git a/rpython/jit/backend/x86/regalloc.py
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -723,7 +723,8 @@
# - at least the non-callee-saved registers
#
# - for shadowstack, we assume that any call can collect, and we
- # save also the callee-saved registers that contain GC pointers.
+ # save also the callee-saved registers that contain GC pointers
+ # XXX for asmgcc too for now.
#
# - for CALL_MAY_FORCE or CALL_ASSEMBLER, we have to save all regs
# anyway, in case we need to do cpu.force(). The issue is that
@@ -734,7 +735,8 @@
self.xrm.before_call(force_store, save_all_regs=save_all_regs)
if not save_all_regs:
gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap
- if gcrootmap and gcrootmap.is_shadow_stack:
+ # we save all the registers for shadowstack and asmgcc for now
+ if gcrootmap: # and gcrootmap.is_shadow_stack:
save_all_regs = 2
self.rm.before_call(force_store, save_all_regs=save_all_regs)
if op.result is not None:
diff --git a/rpython/rtyper/memory/gctransform/asmgcroot.py
b/rpython/rtyper/memory/gctransform/asmgcroot.py
--- a/rpython/rtyper/memory/gctransform/asmgcroot.py
+++ b/rpython/rtyper/memory/gctransform/asmgcroot.py
@@ -135,15 +135,10 @@
self.walk_stack_from()
self._asm_callback = _asm_callback
self._shape_decompressor = ShapeDecompressor()
- if hasattr(gctransformer.translator, '_jit2gc'):
+ self._with_jit = hasattr(gctransformer.translator, '_jit2gc')
+ if self._with_jit:
jit2gc = gctransformer.translator._jit2gc
- self._extra_gcmapstart = jit2gc['gcmapstart']
- self._extra_gcmapend = jit2gc['gcmapend']
- self._extra_mark_sorted = jit2gc['gcmarksorted']
- else:
- self._extra_gcmapstart = lambda: llmemory.NULL
- self._extra_gcmapend = lambda: llmemory.NULL
- self._extra_mark_sorted = lambda: True
+ self.frame_tid = jit2gc['frame_tid']
def need_stacklet_support(self, gctransformer, getfn):
# stacklet support: BIG HACK for rlib.rstacklet
@@ -359,12 +354,12 @@
# try to locate the caller function based on retaddr.
# set up self._shape_decompressor.
#
- self.locate_caller_based_on_retaddr(retaddr)
+ ebp_in_caller = callee.regs_stored_at[INDEX_OF_EBP].address[0]
+ self.locate_caller_based_on_retaddr(retaddr, ebp_in_caller)
#
# found! Enumerate the GC roots in the caller frame
#
collect_stack_root = self.gcdata._gc_collect_stack_root
- ebp_in_caller = callee.regs_stored_at[INDEX_OF_EBP].address[0]
gc = self.gc
while True:
location = self._shape_decompressor.next()
@@ -391,46 +386,40 @@
# of the entry point, stop walking"
return caller.frame_address != llmemory.NULL
- def locate_caller_based_on_retaddr(self, retaddr):
+ def locate_caller_based_on_retaddr(self, retaddr, ebp_in_caller):
gcmapstart = llop.gc_asmgcroot_static(llmemory.Address, 0)
gcmapend = llop.gc_asmgcroot_static(llmemory.Address, 1)
item = search_in_gcmap(gcmapstart, gcmapend, retaddr)
if item:
self._shape_decompressor.setpos(item.signed[1])
return
- gcmapstart2 = self._extra_gcmapstart()
- gcmapend2 = self._extra_gcmapend()
- if gcmapstart2 != gcmapend2:
- # we have a non-empty JIT-produced table to look in
- item = search_in_gcmap2(gcmapstart2, gcmapend2, retaddr)
+
+ if not self._shape_decompressor.sorted:
+ # the item may have been not found because the main array was
+ # not sorted. Sort it and try again.
+ win32_follow_gcmap_jmp(gcmapstart, gcmapend)
+ sort_gcmap(gcmapstart, gcmapend)
+ self._shape_decompressor.sorted = True
+ item = search_in_gcmap(gcmapstart, gcmapend, retaddr)
if item:
- self._shape_decompressor.setaddr(item)
+ self._shape_decompressor.setpos(item.signed[1])
return
- # maybe the JIT-produced table is not sorted?
- was_already_sorted = self._extra_mark_sorted()
- if not was_already_sorted:
- sort_gcmap(gcmapstart2, gcmapend2)
- item = search_in_gcmap2(gcmapstart2, gcmapend2, retaddr)
- if item:
- self._shape_decompressor.setaddr(item)
- return
- # there is a rare risk that the array contains *two* entries
- # with the same key, one of which is dead (null value), and we
- # found the dead one above. Solve this case by replacing all
- # dead keys with nulls, sorting again, and then trying again.
- replace_dead_entries_with_nulls(gcmapstart2, gcmapend2)
- sort_gcmap(gcmapstart2, gcmapend2)
- item = search_in_gcmap2(gcmapstart2, gcmapend2, retaddr)
- if item:
- self._shape_decompressor.setaddr(item)
- return
- # the item may have been not found because the main array was
- # not sorted. Sort it and try again.
- win32_follow_gcmap_jmp(gcmapstart, gcmapend)
- sort_gcmap(gcmapstart, gcmapend)
- item = search_in_gcmap(gcmapstart, gcmapend, retaddr)
- if item:
- self._shape_decompressor.setpos(item.signed[1])
+
+ if self._with_jit:
+ # item not found. We assume that it's a JIT-generated
+ # location -- but we check for consistency that ebp points
+ # to a JITFRAME object.
+ from rpython.jit.backend.llsupport.jitframe import STACK_DEPTH_OFS
+
+ tid = self.gc.get_type_id(ebp_in_caller)
+ ll_assert(rffi.cast(lltype.Signed, tid) ==
+ rffi.cast(lltype.Signed, self.frame_tid),
+ "found a stack frame that does not belong "
+ "anywhere I know, bug in asmgcc")
+ # fish the depth
+ extra_stack_depth = (ebp_in_caller + STACK_DEPTH_OFS).signed[0]
+ extra_stack_depth //= rffi.sizeof(lltype.Signed)
+ self._shape_decompressor.setjitframe(extra_stack_depth)
return
llop.debug_fatalerror(lltype.Void, "cannot find gc roots!")
@@ -561,27 +550,83 @@
class ShapeDecompressor:
_alloc_flavor_ = "raw"
+ sorted = False
+
def setpos(self, pos):
if pos < 0:
pos = ~ pos # can ignore this "range" marker here
gccallshapes = llop.gc_asmgcroot_static(llmemory.Address, 2)
self.addr = gccallshapes + pos
- def setaddr(self, addr):
- self.addr = addr
+ def setjitframe(self, extra_stack_depth):
+ self.addr = llmemory.NULL
+ self.jit_index = 0
+ self.extra_stack_depth = extra_stack_depth
def next(self):
- value = 0
addr = self.addr
- while True:
- b = ord(addr.char[0])
- addr += 1
- value += b
- if b < 0x80:
- break
- value = (value - 0x80) << 7
- self.addr = addr
- return value
+ if addr:
+ # case "outside the jit"
+ value = 0
+ while True:
+ b = ord(addr.char[0])
+ addr += 1
+ value += b
+ if b < 0x80:
+ break
+ value = (value - 0x80) << 7
+ self.addr = addr
+ return value
+ else:
+ # case "in the jit"
+ from rpython.jit.backend.x86.arch import FRAME_FIXED_SIZE
+ from rpython.jit.backend.x86.arch import PASS_ON_MY_FRAME
+ index = self.jit_index
+ self.jit_index = index + 1
+ if index == 0:
+ # the jitframe is an object in EBP
+ return LOC_REG | ((INDEX_OF_EBP + 1) << 2)
+ if index == 1:
+ return 0
+ # the remaining returned values should be:
+ # saved %rbp
+ # saved %r15 or on 32bit:
+ # saved %r14 saved %ebp
+ # saved %r13 saved %edi
+ # saved %r12 saved %esi
+ # saved %rbx saved %ebx
+ # return addr return addr
+ if IS_64_BITS:
+ stack_depth = PASS_ON_MY_FRAME + self.extra_stack_depth
+ if index == 2: # rbp
+ return LOC_ESP_PLUS | (stack_depth << 2)
+ if index == 3: # r15
+ return LOC_ESP_PLUS | ((stack_depth + 5) << 2)
+ if index == 4: # r14
+ return LOC_ESP_PLUS | ((stack_depth + 4) << 2)
+ if index == 5: # r13
+ return LOC_ESP_PLUS | ((stack_depth + 3) << 2)
+ if index == 6: # r12
+ return LOC_ESP_PLUS | ((stack_depth + 2) << 2)
+ if index == 7: # rbx
+ return LOC_ESP_PLUS | ((stack_depth + 1) << 2)
+ if index == 8: # return addr
+ return (LOC_ESP_PLUS |
+ ((FRAME_FIXED_SIZE + self.extra_stack_depth) << 2))
+ else:
+ if index == 2: # ebp
+ return LOC_ESP_PLUS | (stack_depth << 2)
+ if index == 3: # edi
+ return LOC_ESP_PLUS | ((stack_depth + 3) << 2)
+ if index == 4: # esi
+ return LOC_ESP_PLUS | ((stack_depth + 2) << 2)
+ if index == 5: # ebx
+ return LOC_ESP_PLUS | ((stack_depth + 1) << 2)
+ if index == 6: # return addr
+ return (LOC_ESP_PLUS |
+ ((FRAME_FIXED_SIZE + self.extra_stack_depth) << 2))
+ llop.debug_fatalerror(lltype.Void, "asmgcroot: invalid index")
+ return 0 # annotator fix
# ____________________________________________________________
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit