Author: Armin Rigo <[email protected]>
Branch: vmprof
Changeset: r75316:73f3e2793377
Date: 2015-01-13 17:43 +0100
http://bitbucket.org/pypy/pypy/changeset/73f3e2793377/
Log: (fijal, arigo)
Work towards maintaining the current frame's stack depth everywhere
systematically.
diff --git a/rpython/jit/backend/x86/arch.py b/rpython/jit/backend/x86/arch.py
--- a/rpython/jit/backend/x86/arch.py
+++ b/rpython/jit/backend/x86/arch.py
@@ -47,3 +47,6 @@
THREADLOCAL_OFS = (FRAME_FIXED_SIZE - 1) * WORD
assert PASS_ON_MY_FRAME >= 12 # asmgcc needs at least JIT_USE_WORDS + 3
+
+# return address, followed by FRAME_FIXED_SIZE words
+DEFAULT_FRAME_BYTES = (1 + FRAME_FIXED_SIZE) * WORD
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -18,7 +18,8 @@
from rpython.jit.backend.llsupport.regalloc import (get_scale,
valid_addressing_size)
from rpython.jit.backend.x86.arch import (FRAME_FIXED_SIZE, WORD, IS_X86_64,
JITFRAME_FIXED_SIZE, IS_X86_32,
- PASS_ON_MY_FRAME, THREADLOCAL_OFS)
+ PASS_ON_MY_FRAME, THREADLOCAL_OFS,
+ DEFAULT_FRAME_BYTES)
from rpython.jit.backend.x86.regloc import (eax, ecx, edx, ebx, esp, ebp, esi,
xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, r8, r9, r10, r11, edi,
r12, r13, r14, r15, X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG,
@@ -266,6 +267,10 @@
# the correct "ret" arg
offset = mc.get_relative_pos() - jz_location
mc.overwrite32(jz_location-4, offset)
+ # From now on this function is basically "merged" with
+ # its caller and so contains DEFAULT_FRAME_BYTES bytes
+ # plus my own return address, which we'll ignore next
+ mc.force_frame_size(DEFAULT_FRAME_BYTES + WORD)
mc.ADD_ri(esp.value, WORD)
mc.JMP(imm(self.propagate_exception_path))
#
@@ -277,6 +282,7 @@
return # not supported (for tests, or non-translated)
#
self.mc = codebuf.MachineCodeBlockWrapper()
+ self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
#
# read and reset the current exception
@@ -298,22 +304,14 @@
if slowpathaddr == 0 or not self.cpu.propagate_exception_descr:
return # no stack check (for tests, or non-translated)
#
- # make a "function" that is called immediately at the start of
- # an assembler function. In particular, the stack looks like:
- #
- # | ... | <-- aligned to a multiple of 16
- # | retaddr of caller |
- # | my own retaddr | <-- esp
- # +---------------------+
- #
+ # make a regular function that is called from a point near the start
+ # of an assembler function (after it adjusts the stack and saves
+ # registers).
mc = codebuf.MachineCodeBlockWrapper()
#
if IS_X86_64:
- # on the x86_64, we have to save all the registers that may
- # have been used to pass arguments. Note that we pass only
- # one argument, that is the frame
mc.MOV_rr(edi.value, esp.value)
- mc.SUB_ri(esp.value, WORD)
+ mc.SUB_ri(esp.value, WORD) # alignment
#
if IS_X86_32:
mc.SUB_ri(esp.value, 2*WORD) # alignment
@@ -338,7 +336,10 @@
offset = mc.get_relative_pos() - jnz_location
assert 0 < offset <= 127
mc.overwrite(jnz_location-1, chr(offset))
- # adjust the esp to point back to the previous return
+ # From now on this function is basically "merged" with
+ # its caller and so contains DEFAULT_FRAME_BYTES bytes
+ # plus my own return address, which we'll ignore next
+ mc.force_frame_size(DEFAULT_FRAME_BYTES + WORD)
mc.ADD_ri(esp.value, WORD)
mc.JMP(imm(self.propagate_exception_path))
#
@@ -416,6 +417,8 @@
mc.LEA_rs(esp.value, 2 * WORD)
self._pop_all_regs_from_frame(mc, [], withfloats, callee_only=True)
mc.RET16_i(WORD)
+ # Note that wb_slowpath[0..3] end with a RET16_i, which must be
+ # taken care of in the caller by stack_frame_size_delta(-WORD)
else:
if IS_X86_32:
mc.MOV_rs(edx.value, 4 * WORD)
@@ -521,6 +524,7 @@
assert len(set(inputargs)) == len(inputargs)
self.setup(original_loop_token)
+ self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
descr_number = compute_unique_id(faildescr)
if log:
operations = self._inject_debugging_code(faildescr, operations,
@@ -693,6 +697,7 @@
# place, but clobber the recovery stub with a jump to the real
# target.
mc = codebuf.MachineCodeBlockWrapper()
+ mc.force_frame_size(DEFAULT_FRAME_BYTES)
if rx86.fits_in_32bits(offset):
mc.writeimm32(offset)
mc.copy_to_raw_memory(adr_jump_offset)
@@ -1763,6 +1768,7 @@
def generate_propagate_error_64(self):
assert WORD == 8
+ self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
startpos = self.mc.get_relative_pos()
self.mc.JMP(imm(self.propagate_exception_path))
return startpos
@@ -1770,6 +1776,7 @@
def generate_quick_failure(self, guardtok):
""" Gather information about failure
"""
+ self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
startpos = self.mc.get_relative_pos()
fail_descr, target = self.store_info_on_descr(startpos, guardtok)
self.mc.PUSH(imm(fail_descr))
@@ -1845,6 +1852,9 @@
def _build_failure_recovery(self, exc, withfloats=False):
mc = codebuf.MachineCodeBlockWrapper()
+ # this is jumped to, from a stack that has DEFAULT_FRAME_BYTES
+ # followed by 2 extra words just pushed
+ mc.force_frame_size(DEFAULT_FRAME_BYTES + 2 * WORD)
self.mc = mc
self._push_all_regs_to_frame(mc, [], withfloats)
@@ -1916,6 +1926,7 @@
self.mc.J_il(rx86.Conditions[condition], 0)
else:
self.mc.JMP_l(0)
+ self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
guard_token.pos_jump_offset = self.mc.get_relative_pos() - 4
self.pending_guard_tokens.append(guard_token)
@@ -2006,6 +2017,7 @@
offset = jmp_location - je_location
assert 0 < offset <= 127
self.mc.overwrite(je_location - 1, chr(offset))
+ self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
#
return jmp_location
@@ -2090,6 +2102,8 @@
if is_frame and align_stack:
mc.SUB_ri(esp.value, 16 - WORD) # erase the return address
mc.CALL(imm(self.wb_slowpath[helper_num]))
+ if not is_frame:
+ mc.stack_frame_size_delta(-WORD)
if is_frame and align_stack:
mc.ADD_ri(esp.value, 16 - WORD) # erase the return address
@@ -2326,6 +2340,7 @@
offset = self.mc.get_relative_pos() - jmp_adr1
assert 0 < offset <= 127
self.mc.overwrite(jmp_adr1-1, chr(offset))
+ self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
# write down the tid, but not if it's the result of the CALL
self.mc.MOV(mem(eax, 0), imm(arraydescr.tid))
# while we're at it, this line is not needed if we've done the CALL
diff --git a/rpython/jit/backend/x86/callbuilder.py
b/rpython/jit/backend/x86/callbuilder.py
--- a/rpython/jit/backend/x86/callbuilder.py
+++ b/rpython/jit/backend/x86/callbuilder.py
@@ -38,7 +38,7 @@
if not self.fnloc_is_immediate:
self.fnloc = None
self.arglocs = arglocs + [fnloc]
- self.current_esp = 0 # 0 or (usually) negative, counted in bytes
+ self.start_frame_size = self.mc._frame_size
def select_call_release_gil_mode(self):
AbstractCallBuilder.select_call_release_gil_mode(self)
@@ -50,13 +50,15 @@
def subtract_esp_aligned(self, count):
if count > 0:
align = align_stack_words(count)
- self.current_esp -= align * WORD
self.mc.SUB_ri(esp.value, align * WORD)
+ def get_current_esp(self):
+ return self.start_frame_size - self.mc._frame_size
+
def restore_stack_pointer(self, target_esp=0):
- if self.current_esp != target_esp:
- self.mc.ADD_ri(esp.value, target_esp - self.current_esp)
- self.current_esp = target_esp
+ current_esp = self.get_current_esp()
+ if current_esp != target_esp:
+ self.mc.ADD_ri(esp.value, target_esp - current_esp)
def load_result(self):
"""Overridden in CallBuilder32 and CallBuilder64"""
@@ -79,9 +81,10 @@
# after the rearrangements done just before, ignoring the return
# value eax, if necessary
assert not self.is_call_release_gil
- self.change_extra_stack_depth = (self.current_esp != 0)
+ current_esp = self.get_current_esp()
+ self.change_extra_stack_depth = (current_esp != 0)
if self.change_extra_stack_depth:
- self.asm.set_extra_stack_depth(self.mc, -self.current_esp)
+ self.asm.set_extra_stack_depth(self.mc, -current_esp)
noregs = self.asm.cpu.gc_ll_descr.is_shadow_stack()
gcmap = self.asm._regalloc.get_gcmap([eax], noregs=noregs)
self.asm.push_gcmap(self.mc, gcmap, store=True)
@@ -122,7 +125,7 @@
# and 5/7 words as described for asmgcroot.ASM_FRAMEDATA, for a
# total size of JIT_USE_WORDS. This structure is found at
# [ESP+css].
- css = -self.current_esp + (
+ css = -self.get_current_esp() + (
WORD * (PASS_ON_MY_FRAME - asmgcroot.JIT_USE_WORDS))
assert css >= 2 * WORD
# Save ebp
@@ -307,7 +310,10 @@
else:
self.mc.CALL(self.fnloc)
if self.callconv != FFI_DEFAULT_ABI:
- self.current_esp += self._fix_stdcall(self.callconv)
+ # in the STDCALL ABI, the CALL above has an effect on
+ # the stack depth. Adjust 'mc._frame_size'.
+ delta = self._fix_stdcall(self.callconv)
+ self.mc.stack_frame_size_delta(-delta)
def _fix_stdcall(self, callconv):
from rpython.rlib.clibffi import FFI_STDCALL
diff --git a/rpython/jit/backend/x86/codebuf.py
b/rpython/jit/backend/x86/codebuf.py
--- a/rpython/jit/backend/x86/codebuf.py
+++ b/rpython/jit/backend/x86/codebuf.py
@@ -22,6 +22,7 @@
LocationCodeBuilder,
codebuilder_cls):
def __init__(self):
+ codebuilder_cls.__init__(self)
self.init_block_builder()
# a list of relative positions; for each position p, the bytes
# at [p-4:p] encode an absolute address that will need to be
diff --git a/rpython/jit/backend/x86/regalloc.py
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -12,7 +12,7 @@
valid_addressing_size)
from rpython.jit.backend.x86 import rx86
from rpython.jit.backend.x86.arch import (WORD, JITFRAME_FIXED_SIZE, IS_X86_32,
- IS_X86_64)
+ IS_X86_64, DEFAULT_FRAME_BYTES)
from rpython.jit.backend.x86.jump import remap_frame_layout_mixed
from rpython.jit.backend.x86.regloc import (FrameLoc, RegLoc, ConstFloatLoc,
FloatImmedLoc, ImmedLoc, imm, imm0, imm1, ecx, eax, edx, ebx, esi, edi,
@@ -314,6 +314,7 @@
while i < len(operations):
op = operations[i]
self.assembler.mc.mark_op(op)
+ assert self.assembler.mc._frame_size == DEFAULT_FRAME_BYTES
self.rm.position = i
self.xrm.position = i
if op.has_no_side_effect() and op.result not in self.longevity:
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -447,6 +447,9 @@
class AbstractX86CodeBuilder(object):
"""Abstract base class."""
+ def __init__(self):
+ self.force_frame_size(self.WORD)
+
def writechar(self, char):
raise NotImplementedError
@@ -464,6 +467,19 @@
self.writechar(chr((imm >> 16) & 0xFF))
self.writechar(chr((imm >> 24) & 0xFF))
+ def force_frame_size(self, frame_size):
+ self._frame_size = frame_size
+
+ def stack_frame_size_delta(self, delta):
+ "Called when we generate an instruction that changes the value of ESP"
+ self._frame_size += delta
+ assert self._frame_size >= self.WORD
+
+ def check_stack_size_at_ret(self):
+ assert self._frame_size == self.WORD
+ if not we_are_translated():
+ self._frame_size = None
+
# ------------------------------ MOV ------------------------------
MOV_ri = insn(register(1), '\xB8', immediate(2))
@@ -474,14 +490,24 @@
INC_m = insn(rex_w, '\xFF', orbyte(0), mem_reg_plus_const(1))
INC_j = insn(rex_w, '\xFF', orbyte(0), abs_(1))
- ADD_ri,ADD_rr,ADD_rb,_,_,ADD_rm,ADD_rj,_,_ = common_modes(0)
+ AD1_ri,ADD_rr,ADD_rb,_,_,ADD_rm,ADD_rj,_,_ = common_modes(0)
OR_ri, OR_rr, OR_rb, _,_,OR_rm, OR_rj, _,_ = common_modes(1)
AND_ri,AND_rr,AND_rb,_,_,AND_rm,AND_rj,_,_ = common_modes(4)
- SUB_ri,SUB_rr,SUB_rb,_,_,SUB_rm,SUB_rj,SUB_ji8,SUB_mi8 = common_modes(5)
+ SU1_ri,SUB_rr,SUB_rb,_,_,SUB_rm,SUB_rj,SUB_ji8,SUB_mi8 = common_modes(5)
SBB_ri,SBB_rr,SBB_rb,_,_,SBB_rm,SBB_rj,_,_ = common_modes(3)
XOR_ri,XOR_rr,XOR_rb,_,_,XOR_rm,XOR_rj,_,_ = common_modes(6)
CMP_ri,CMP_rr,CMP_rb,CMP_bi,CMP_br,CMP_rm,CMP_rj,_,_ = common_modes(7)
+ def ADD_ri(self, reg, immed):
+ self.AD1_ri(reg, immed)
+ if reg == R.esp:
+ self.stack_frame_size_delta(-immed)
+
+ def SUB_ri(self, reg, immed):
+ self.SU1_ri(reg, immed)
+ if reg == R.esp:
+ self.stack_frame_size_delta(+immed)
+
CMP_mi8 = insn(rex_w, '\x83', orbyte(7<<3), mem_reg_plus_const(1),
immediate(2, 'b'))
CMP_mi32 = insn(rex_w, '\x81', orbyte(7<<3), mem_reg_plus_const(1),
immediate(2))
CMP_mi = select_8_or_32_bit_immed(CMP_mi8, CMP_mi32)
@@ -531,29 +557,60 @@
# ------------------------------ Misc stuff ------------------------------
NOP = insn('\x90')
- RET = insn('\xC3')
- RET16_i = insn('\xC2', immediate(1, 'h'))
+ RE1 = insn('\xC3')
+ RE116_i = insn('\xC2', immediate(1, 'h'))
- PUSH_r = insn(rex_nw, register(1), '\x50')
- PUSH_b = insn(rex_nw, '\xFF', orbyte(6<<3), stack_bp(1))
- PUSH_i8 = insn('\x6A', immediate(1, 'b'))
- PUSH_i32 = insn('\x68', immediate(1, 'i'))
- def PUSH_i(mc, immed):
+ def RET(self):
+ self.check_stack_size_at_ret()
+ self.RE1()
+
+ def RET16_i(self, immed):
+ self.check_stack_size_at_ret()
+ self.RE116_i(immed)
+
+ PUS1_r = insn(rex_nw, register(1), '\x50')
+ PUS1_b = insn(rex_nw, '\xFF', orbyte(6<<3), stack_bp(1))
+ PUS1_i8 = insn('\x6A', immediate(1, 'b'))
+ PUS1_i32 = insn('\x68', immediate(1, 'i'))
+
+ def PUSH_r(self, reg):
+ self.PUS1_r(reg)
+ self.stack_frame_size_delta(+self.WORD)
+
+ def PUSH_b(self, ofs):
+ self.PUS1_b(ofs)
+ self.stack_frame_size_delta(+self.WORD)
+
+ def PUSH_i(self, immed):
if single_byte(immed):
- mc.PUSH_i8(immed)
+ self.PUS1_i8(immed)
else:
- mc.PUSH_i32(immed)
+ self.PUS1_i32(immed)
+ self.stack_frame_size_delta(+self.WORD)
- POP_r = insn(rex_nw, register(1), '\x58')
- POP_b = insn(rex_nw, '\x8F', orbyte(0<<3), stack_bp(1))
+ PO1_r = insn(rex_nw, register(1), '\x58')
+ PO1_b = insn(rex_nw, '\x8F', orbyte(0<<3), stack_bp(1))
+
+ def POP_r(self, reg):
+ self.PO1_r(reg)
+ self.stack_frame_size_delta(-self.WORD)
+
+ def POP_b(self, ofs):
+ self.PO1_b(ofs)
+ self.stack_frame_size_delta(-self.WORD)
LEA_rb = insn(rex_w, '\x8D', register(1,8), stack_bp(2))
- LEA_rs = insn(rex_w, '\x8D', register(1,8), stack_sp(2))
+ LE1_rs = insn(rex_w, '\x8D', register(1,8), stack_sp(2))
LEA32_rb = insn(rex_w, '\x8D', register(1,8),stack_bp(2,force_32bits=True))
LEA_ra = insn(rex_w, '\x8D', register(1, 8),
mem_reg_plus_scaled_reg_plus_const(2))
LEA_rm = insn(rex_w, '\x8D', register(1, 8), mem_reg_plus_const(2))
LEA_rj = insn(rex_w, '\x8D', register(1, 8), abs_(2))
+ def LEA_rs(self, reg, ofs):
+ self.LE1_rs(reg, ofs)
+ if reg == R.esp:
+ self.stack_frame_size_delta(-ofs)
+
CALL_l = insn('\xE8', relative(1))
CALL_r = insn(rex_nw, '\xFF', register(1), chr(0xC0 | (2<<3)))
CALL_b = insn('\xFF', orbyte(2<<3), stack_bp(1))
@@ -564,15 +621,30 @@
# register-register exchange.
XCHG_rr = insn(rex_w, '\x87', register(1), register(2,8), '\xC0')
- JMP_l = insn('\xE9', relative(1))
- JMP_r = insn(rex_nw, '\xFF', orbyte(4<<3), register(1), '\xC0')
+ JM1_l = insn('\xE9', relative(1))
+ JM1_r = insn(rex_nw, '\xFF', orbyte(4<<3), register(1), '\xC0')
# FIXME: J_il8 and JMP_l8 assume the caller will do the appropriate
# calculation to find the displacement, but J_il does it for the caller.
# We need to be consistent.
- JMP_l8 = insn('\xEB', immediate(1, 'b'))
+ JM1_l8 = insn('\xEB', immediate(1, 'b'))
J_il8 = insn(immediate(1, 'o'), '\x70', immediate(2, 'b'))
J_il = insn('\x0F', immediate(1,'o'), '\x80', relative(2))
+ def JMP_l(self, rel):
+ self.JM1_l(rel)
+ if not we_are_translated():
+ self._frame_size = None
+
+ def JMP_r(self, reg):
+ self.JM1_r(reg)
+ if not we_are_translated():
+ self._frame_size = None
+
+ def JMP_l8(self, rel):
+ self.JM1_l8(rel)
+ if not we_are_translated():
+ self._frame_size = None
+
SET_ir = insn(rex_fw, '\x0F', immediate(1,'o'),'\x90', byte_register(2),
'\xC0')
# The 64-bit version of this, CQO, is defined in X86_64_CodeBuilder
diff --git a/rpython/jit/backend/x86/test/test_callbuilder.py
b/rpython/jit/backend/x86/test/test_callbuilder.py
--- a/rpython/jit/backend/x86/test/test_callbuilder.py
+++ b/rpython/jit/backend/x86/test/test_callbuilder.py
@@ -3,7 +3,8 @@
class FakeAssembler:
- mc = None
+ class mc:
+ _frame_size = 42
class _regalloc:
class rm:
free_regs = [ebx]
diff --git a/rpython/jit/backend/x86/test/test_rx86.py
b/rpython/jit/backend/x86/test/test_rx86.py
--- a/rpython/jit/backend/x86/test/test_rx86.py
+++ b/rpython/jit/backend/x86/test/test_rx86.py
@@ -5,6 +5,7 @@
class CodeBuilderMixin(object):
def __init__(self):
self.buffer = []
+ super(CodeBuilderMixin, self).__init__()
def writechar(self, c):
assert isinstance(c, str) and len(c) == 1
diff --git a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
--- a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
+++ b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
@@ -33,6 +33,12 @@
def done(self):
assert len(self.expected) == self.index
+ def stack_frame_size_delta(self, delta):
+ pass # ignored
+
+ def check_stack_size_at_ret(self):
+ pass # ignored
+
def hexdump(s):
return ' '.join(["%02X" % ord(c) for c in s])
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit