Author: Maciej Fijalkowski <[email protected]>
Branch: jitframe-on-heap
Changeset: r59988:1dc6fdd4000d
Date: 2013-01-12 16:59 +0200
http://bitbucket.org/pypy/pypy/changeset/1dc6fdd4000d/
Log: sort out the indexing and compress the frame
diff --git a/pypy/jit/backend/x86/arch.py b/pypy/jit/backend/x86/arch.py
--- a/pypy/jit/backend/x86/arch.py
+++ b/pypy/jit/backend/x86/arch.py
@@ -49,15 +49,17 @@
SAVED_REGISTERS = 1 # range(1, 5)
MY_COPY_OF_REGS = 5 # range(5, 9)
XXX
+ JITFRAME_FIXED_SIZE = 29 # 13 GPR + 16 XMM
+ # reg, we don't save it
else:
# rbp + rbx + r12 + r13 + r14 + r15 + 11 extra words + force_index = 18
FRAME_FIXED_SIZE = 18 # 18 aligned to 16 bytes = 2 * WORD
FORCE_INDEX_OFS = 0
SAVED_REGISTERS = 1 # range(1, 7)
MY_COPY_OF_REGS = 7 # range(7, 18)
- JITFRAME_FIXED_SIZE = 32 # 1 for the number and 32 for all the registers,
- # but they're never used together
-
+ JITFRAME_FIXED_SIZE = 29 # 13 GPR + 16 XMM
+ # reg, we don't save it
+
# "My copy of regs" has room for almost all registers, apart from eax and edx
# which are used in the malloc itself. They are:
# ecx, ebx, esi, edi [32 and 64 bits]
diff --git a/pypy/jit/backend/x86/assembler.py
b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -563,17 +563,14 @@
assert len(set(inputargs)) == len(inputargs)
descr_number = self.cpu.get_fail_descr_number(faildescr)
- failure_recovery = self._find_failure_recovery_bytecode(faildescr)
self.setup(original_loop_token)
if log:
operations = self._inject_debugging_code(faildescr, operations,
'b', descr_number)
- arglocs = self.rebuild_faillocs_from_descr(failure_recovery)
- if not we_are_translated():
- assert ([loc.assembler() for loc in arglocs] ==
- [loc.assembler() for loc in faildescr._x86_debug_faillocs])
+ descr = self.cpu.get_fail_descr_from_number(descr_number)
+ arglocs = self.rebuild_faillocs_from_descr(descr)
regalloc = RegAlloc(self, self.cpu.translate_support_code)
startpos = self.mc.get_relative_pos()
operations = regalloc.prepare_bridge(inputargs, arglocs,
@@ -687,24 +684,7 @@
struct.number = compute_unique_id(token)
self.loop_run_counters.append(struct)
return struct
-
- def _find_failure_recovery_bytecode(self, faildescr):
- adr_jump_offset = faildescr._x86_adr_jump_offset
- if adr_jump_offset == 0:
- # This case should be prevented by the logic in compile.py:
- # look for CNT_BUSY_FLAG, which disables tracing from a guard
- # when another tracing from the same guard is already in progress.
- raise BridgeAlreadyCompiled
- # follow the JMP/Jcond
- p = rffi.cast(rffi.INTP, adr_jump_offset)
- adr_target = adr_jump_offset + 4 + rffi.cast(lltype.Signed, p[0])
- # skip the CALL
- if WORD == 4:
- adr_target += 5 # CALL imm
- else:
- adr_target += 13 # MOV r11, imm-as-8-bytes; CALL *r11 xxxxxxxxxx
- return adr_target
-
+
def patch_jump_for_descr(self, faildescr, adr_new_target):
adr_jump_offset = faildescr._x86_adr_jump_offset
assert adr_jump_offset != 0
@@ -1846,7 +1826,6 @@
XXX
mc.writechar(chr(self.CODE_FORCED))
positions = [0] * len(guardtok.fail_locs)
- assert IS_X86_64
for i, loc in enumerate(guardtok.fail_locs):
if loc is None:
positions[i] = -1
@@ -1854,12 +1833,29 @@
xxx
else:
assert isinstance(loc, RegLoc)
- positions[i] = (loc.value + loc.is_xmm * 16) * WORD
+ v = (gpr_reg_mgr_cls.all_reg_indexes[loc.value] +
+ loc.is_xmm * len(gpr_reg_mgr_cls.all_regs))
+ positions[i] = v * WORD
guardtok.faildescr.rd_locs = positions
# write fail_index too
# for testing the decoding, write a final byte 0xCC
return startpos
+ def rebuild_faillocs_from_descr(self, descr):
+ locs = []
+ for pos in descr.rd_locs:
+ if pos == -1:
+ pass
+ elif pos < self.cpu.NUM_REGS * WORD:
+ locs.append(gpr_reg_mgr_cls.all_regs[pos // WORD])
+ elif pos < self.cpu.NUM_REGS * 2 * WORD:
+ locs.append(xmm_reg_mgr_cls.xrm.all_regs[pos // WORD])
+ else:
+ i = pos // WORD - 2 * self.cpu.NUM_REGS
+ tp = xxx
+ locs.append(StackLoc(i, pos, tp))
+ return locs
+
def setup_failure_recovery(self):
self.failure_recovery_code = [0, 0, 0, 0]
@@ -1868,8 +1864,8 @@
self.mc = mc
# Push all general purpose registers
- for gpr in range(self.cpu.NUM_REGS):
- mc.MOV_br(gpr * WORD, gpr)
+ for i, gpr in enumerate(gpr_reg_mgr_cls.all_regs):
+ mc.MOV_br(i * WORD, gpr.value)
if exc:
# We might have an exception pending. Load it into ebx
@@ -1879,8 +1875,9 @@
mc.MOV(heap(self.cpu.pos_exc_value()), imm0)
if withfloats:
+ ofs = len(gpr_reg_mgr_cls.all_regs)
for i in range(self.cpu.NUM_REGS):
- mc.MOVSD_bx((16 + i) * WORD, i)
+ mc.MOVSD_bx((ofs + i) * WORD, i)
if exc:
# save ebx into 'jf_guard_exc'
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -64,6 +64,7 @@
class X86_64_RegisterManager(X86RegisterManager):
# r11 omitted because it's used as scratch
all_regs = [ecx, eax, edx, ebx, esi, edi, r8, r9, r10, r12, r13, r14, r15]
+
no_lower_byte_regs = []
save_around_call_regs = [eax, ecx, edx, esi, edi, r8, r9, r10]
@@ -153,6 +154,9 @@
else:
raise AssertionError("Word size should be 4 or 8")
+gpr_reg_mgr_cls.all_reg_indexes = [-1] * WORD * 2 # eh, happens to be true
+for _i, _reg in enumerate(gpr_reg_mgr_cls.all_regs):
+ gpr_reg_mgr_cls.all_reg_indexes[_reg.value] = _i
class RegAlloc(object):
@@ -501,7 +505,8 @@
else:
locs = [imm(fail_no)]
self.Perform(op, locs, None)
- self.possibly_free_var(op.getarg(0))
+ if op.numargs() == 1:
+ self.possibly_free_var(op.getarg(0))
def consider_guard_no_exception(self, op):
self.perform_guard(op, [], None)
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit