Author: Maciej Fijalkowski <fij...@gmail.com> Branch: jitframe-on-heap Changeset: r59988:1dc6fdd4000d Date: 2013-01-12 16:59 +0200 http://bitbucket.org/pypy/pypy/changeset/1dc6fdd4000d/
Log: sort out the indexing and compress the frame diff --git a/pypy/jit/backend/x86/arch.py b/pypy/jit/backend/x86/arch.py --- a/pypy/jit/backend/x86/arch.py +++ b/pypy/jit/backend/x86/arch.py @@ -49,15 +49,17 @@ SAVED_REGISTERS = 1 # range(1, 5) MY_COPY_OF_REGS = 5 # range(5, 9) XXX + JITFRAME_FIXED_SIZE = 29 # 13 GPR + 16 XMM + # reg, we don't save it else: # rbp + rbx + r12 + r13 + r14 + r15 + 11 extra words + force_index = 18 FRAME_FIXED_SIZE = 18 # 18 aligned to 16 bytes = 2 * WORD FORCE_INDEX_OFS = 0 SAVED_REGISTERS = 1 # range(1, 7) MY_COPY_OF_REGS = 7 # range(7, 18) - JITFRAME_FIXED_SIZE = 32 # 1 for the number and 32 for all the registers, - # but they're never used together - + JITFRAME_FIXED_SIZE = 29 # 13 GPR + 16 XMM + # reg, we don't save it + # "My copy of regs" has room for almost all registers, apart from eax and edx # which are used in the malloc itself. They are: # ecx, ebx, esi, edi [32 and 64 bits] diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py --- a/pypy/jit/backend/x86/assembler.py +++ b/pypy/jit/backend/x86/assembler.py @@ -563,17 +563,14 @@ assert len(set(inputargs)) == len(inputargs) descr_number = self.cpu.get_fail_descr_number(faildescr) - failure_recovery = self._find_failure_recovery_bytecode(faildescr) self.setup(original_loop_token) if log: operations = self._inject_debugging_code(faildescr, operations, 'b', descr_number) - arglocs = self.rebuild_faillocs_from_descr(failure_recovery) - if not we_are_translated(): - assert ([loc.assembler() for loc in arglocs] == - [loc.assembler() for loc in faildescr._x86_debug_faillocs]) + descr = self.cpu.get_fail_descr_from_number(descr_number) + arglocs = self.rebuild_faillocs_from_descr(descr) regalloc = RegAlloc(self, self.cpu.translate_support_code) startpos = self.mc.get_relative_pos() operations = regalloc.prepare_bridge(inputargs, arglocs, @@ -687,24 +684,7 @@ struct.number = compute_unique_id(token) self.loop_run_counters.append(struct) return struct - - def _find_failure_recovery_bytecode(self, faildescr): - adr_jump_offset = faildescr._x86_adr_jump_offset - if adr_jump_offset == 0: - # This case should be prevented by the logic in compile.py: - # look for CNT_BUSY_FLAG, which disables tracing from a guard - # when another tracing from the same guard is already in progress. - raise BridgeAlreadyCompiled - # follow the JMP/Jcond - p = rffi.cast(rffi.INTP, adr_jump_offset) - adr_target = adr_jump_offset + 4 + rffi.cast(lltype.Signed, p[0]) - # skip the CALL - if WORD == 4: - adr_target += 5 # CALL imm - else: - adr_target += 13 # MOV r11, imm-as-8-bytes; CALL *r11 xxxxxxxxxx - return adr_target - + def patch_jump_for_descr(self, faildescr, adr_new_target): adr_jump_offset = faildescr._x86_adr_jump_offset assert adr_jump_offset != 0 @@ -1846,7 +1826,6 @@ XXX mc.writechar(chr(self.CODE_FORCED)) positions = [0] * len(guardtok.fail_locs) - assert IS_X86_64 for i, loc in enumerate(guardtok.fail_locs): if loc is None: positions[i] = -1 @@ -1854,12 +1833,29 @@ xxx else: assert isinstance(loc, RegLoc) - positions[i] = (loc.value + loc.is_xmm * 16) * WORD + v = (gpr_reg_mgr_cls.all_reg_indexes[loc.value] + + loc.is_xmm * len(gpr_reg_mgr_cls.all_regs)) + positions[i] = v * WORD guardtok.faildescr.rd_locs = positions # write fail_index too # for testing the decoding, write a final byte 0xCC return startpos + def rebuild_faillocs_from_descr(self, descr): + locs = [] + for pos in descr.rd_locs: + if pos == -1: + pass + elif pos < self.cpu.NUM_REGS * WORD: + locs.append(gpr_reg_mgr_cls.all_regs[pos // WORD]) + elif pos < self.cpu.NUM_REGS * 2 * WORD: + locs.append(xmm_reg_mgr_cls.xrm.all_regs[pos // WORD]) + else: + i = pos // WORD - 2 * self.cpu.NUM_REGS + tp = xxx + locs.append(StackLoc(i, pos, tp)) + return locs + def setup_failure_recovery(self): self.failure_recovery_code = [0, 0, 0, 0] @@ -1868,8 +1864,8 @@ self.mc = mc # Push all general purpose registers - for gpr in range(self.cpu.NUM_REGS): - mc.MOV_br(gpr * WORD, gpr) + for i, gpr in enumerate(gpr_reg_mgr_cls.all_regs): + mc.MOV_br(i * WORD, gpr.value) if exc: # We might have an exception pending. Load it into ebx @@ -1879,8 +1875,9 @@ mc.MOV(heap(self.cpu.pos_exc_value()), imm0) if withfloats: + ofs = len(gpr_reg_mgr_cls.all_regs) for i in range(self.cpu.NUM_REGS): - mc.MOVSD_bx((16 + i) * WORD, i) + mc.MOVSD_bx((ofs + i) * WORD, i) if exc: # save ebx into 'jf_guard_exc' diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py --- a/pypy/jit/backend/x86/regalloc.py +++ b/pypy/jit/backend/x86/regalloc.py @@ -64,6 +64,7 @@ class X86_64_RegisterManager(X86RegisterManager): # r11 omitted because it's used as scratch all_regs = [ecx, eax, edx, ebx, esi, edi, r8, r9, r10, r12, r13, r14, r15] + no_lower_byte_regs = [] save_around_call_regs = [eax, ecx, edx, esi, edi, r8, r9, r10] @@ -153,6 +154,9 @@ else: raise AssertionError("Word size should be 4 or 8") +gpr_reg_mgr_cls.all_reg_indexes = [-1] * WORD * 2 # eh, happens to be true +for _i, _reg in enumerate(gpr_reg_mgr_cls.all_regs): + gpr_reg_mgr_cls.all_reg_indexes[_reg.value] = _i class RegAlloc(object): @@ -501,7 +505,8 @@ else: locs = [imm(fail_no)] self.Perform(op, locs, None) - self.possibly_free_var(op.getarg(0)) + if op.numargs() == 1: + self.possibly_free_var(op.getarg(0)) def consider_guard_no_exception(self, op): self.perform_guard(op, [], None) _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit