Author: Armin Rigo <ar...@tunes.org> Branch: guard-compatible Changeset: r84685:14806afb0d3d Date: 2016-05-25 15:59 +0200 http://bitbucket.org/pypy/pypy/changeset/14806afb0d3d/
Log: arm: first draft diff --git a/rpython/jit/backend/arm/assembler.py b/rpython/jit/backend/arm/assembler.py --- a/rpython/jit/backend/arm/assembler.py +++ b/rpython/jit/backend/arm/assembler.py @@ -28,6 +28,7 @@ from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref from rpython.rtyper.lltypesystem import lltype, rffi from rpython.jit.backend.arm import callbuilder +from rpython.jit.backend.arm import guard_compat from rpython.rtyper.lltypesystem.lloperation import llop class AssemblerARM(ResOpAssembler): @@ -479,6 +480,9 @@ rawstart = mc.materialize(self.cpu, []) self.failure_recovery_code[exc + 2 * withfloats] = rawstart + def _build_guard_compat_slowpath(self): + guard_compat.build_once(self) + def generate_quick_failure(self, guardtok): startpos = self.mc.currpos() faildescrindex, target = self.store_info_on_descr(startpos, guardtok) @@ -759,11 +763,16 @@ def patch_gcref_table(self, looptoken, rawstart): # the gc table is at the start of the machine code. Fill it now + self.gc_table_addr = rawstart tracer = self.cpu.gc_ll_descr.make_gcref_tracer(rawstart, self._allgcrefs) gcreftracers = self.get_asmmemmgr_gcreftracers(looptoken) gcreftracers.append(tracer) # keepalive self.teardown_gcrefs_list() + self.gc_table_tracer = tracer + + def _addr_from_gc_table(self, index): + return self.gc_table_addr + index * WORD def load_from_gc_table(self, regnum, index): """emits either: @@ -985,6 +994,13 @@ assert isinstance(descr, AbstractFailDescr) failure_recovery_pos = block_start + tok.pos_recovery_stub descr.adr_jump_offset = failure_recovery_pos + # + if tok.guard_compatible(): + guard_compat.patch_guard_compatible(tok, block_start, + self._addr_from_gc_table, + self.gc_table_tracer) + continue + # relative_offset = tok.pos_recovery_stub - tok.offset guard_pos = block_start + tok.offset if not tok.guard_not_invalidated(): @@ -1037,6 +1053,9 @@ return fcond def patch_trace(self, faildescr, looptoken, bridge_addr, regalloc): + if isinstance(faildescr, guard_compat.GuardCompatibleDescr): + guard_compat.invalidate_cache(faildescr) + return b = InstrBuilder(self.cpu.cpuinfo.arch_version) patch_addr = faildescr.adr_jump_offset assert patch_addr != 0 diff --git a/rpython/jit/backend/ppc/guard_compat.py b/rpython/jit/backend/arm/guard_compat.py copy from rpython/jit/backend/ppc/guard_compat.py copy to rpython/jit/backend/arm/guard_compat.py --- a/rpython/jit/backend/ppc/guard_compat.py +++ b/rpython/jit/backend/arm/guard_compat.py @@ -1,8 +1,6 @@ from rpython.rtyper.annlowlevel import llhelper -import rpython.jit.backend.ppc.register as r -from rpython.jit.backend.ppc.arch import WORD, PARAM_SAVE_AREA_OFFSET -from rpython.jit.backend.ppc.codebuilder import PPCBuilder, OverwritingBuilder - +from rpython.jit.backend.arm import conditions as c +from rpython.jit.backend.arm import registers as r from rpython.jit.backend.llsupport.guard_compat import * from rpython.jit.backend.llsupport.guard_compat import _real_number @@ -10,146 +8,129 @@ # See comments in ../x86/guard_compat.py. -MANAGED_REGS_WITHOUT_R7_AND_R10 = list(r.MANAGED_REGS) -MANAGED_REGS_WITHOUT_R7_AND_R10.remove(r.r7) -MANAGED_REGS_WITHOUT_R7_AND_R10.remove(r.r10) - - def build_once(assembler): """Generate the 'search_tree' block of code""" - # called with r2 containing the BACKEND_CHOICES object, - # and r0 containing the actual value of the guard + # Called with lr containing the BACKEND_CHOICES object, and r1 + # containing the actual value of the guard. The old value of r1 + # is pushed on the stack. Additionally, r0 and lr are already + # pushed on the stack as well (the same values as the one passed + # in). mc = PPCBuilder() - r0 = r.SCRATCH - r2 = r.SCRATCH2 - r3 = r.r3 - r4 = r.r4 - r5 = r.r5 - r7 = r.r7 - r10 = r.r10 - - # save the values of r7 and r10 in the jitframe - assembler._push_core_regs_to_jitframe(mc, [r7, r10]) - - # save the original value of r2 for later - mc.std(r2.value, r.SP.value, PARAM_SAVE_AREA_OFFSET) + r0 = r.r0 + r1 = r.r1 + lr = r.lr + ip = r.ip ofs1 = _real_number(BCLIST + BCLISTLENGTHOFS) ofs2 = _real_number(BCLIST + BCLISTITEMSOFS) - assert ofs2 - 8 == ofs1 - mc.ldu(r10.value, r2.value, ofs1) # ldu r10, [r2 + bc_list.length] - mc.sldi(r10.value, r10.value, 3) # sldi r10, r10, 3 - # in the sequel, "r2 + 8" is a pointer to the leftmost array item of + mc.LDR_ri(r0.value, lr.value, ofs1) # LDR r0, [lr + bc_list.length] + mc.ADD_ri(lr.value, lr.value, imm=ofs2 - WORD) # ADD lr, lr, $items - 4 + # ^^^ NB. this could be done with a single LDR in "pre-indexed" mode + mc.LSL_ri(r0.value, r0.value, 2) # LSL r0, r0, 2 + # in the sequel, "lr + 4" is a pointer to the leftmost array item of # the range still under consideration. The length of this range, - # which is always a power-of-two-minus-1, is equal to "r10 / 8". - b_location = mc.get_relative_pos() - mc.trap() # b loop + # which is always a power-of-two-minus-1, is equal to "r0 / 4". + b_location = mc.currpos() + mc.BKPT() # B loop right_label = mc.get_relative_pos() - mc.add(r2.value, r2.value, r10.value) # add r2, r2, r10 - mc.addi(r2.value, r2.value, WORD) # addi r2, r2, 8 + mc.ADD_rr(lr.value, lr.value, r0.value)# ADD lr, lr, r0 + mc.ADD_ri(lr.value, lr.value, WORD) # ADD lr, lr, 4 left_label = mc.get_relative_pos() - mc.rldicrx(r10.value, r10.value, 63, 60)# rldicrx r10, r10, 63, 60 - # ^^ note: this does r10 = (r10 >> 1) & ~7, and sets the "EQ" flag - # if the result is equal to zero + mc.LSR_ri(r0.value, r0.value, 1) # LSR r0, r0, 1 + mc.SUBS_ri(r0.value, r0.value, 4) # SUBS r0, r0, 4 beq_location = mc.get_relative_pos() - mc.trap() # beq not_found + mc.trap() # BEQ not_found # loop: - pmc = OverwritingBuilder(mc, b_location, 1) - pmc.b(mc.currpos() - b_location) # jump here unconditionally - pmc.overwrite() - mc.ldx(r7.value, r2.value, r10.value) # ldx r7, [r2 + r10] - mc.cmp_op(0, r0.value, r7.value, - signed=False) # cmp r0, r7 - mc.bgt(right_label - mc.currpos()) # bgt right_label - mc.bne(left_label - mc.currpos()) # bne left_label + pmc = OverwritingBuilder(mc, b_location, WORD) + pmc.B_offs(mc.currpos(), c.AL) + mc.LDR_rr(ip.value, lr.value, r0.value)# LDR ip, [lr + r0] + mc.CMP_rr(r1.value, ip.value) # CMP r1, ip + mc.B_offs(right_label - mc.currpos(), c.GT) # BGT right_label + mc.B_offs(left_label - mc.currpos(), c.NE) # BNE left_label - # found: - mc.add(r2.value, r2.value, r10.value) # add r2, r2, r10 - mc.ld(r10.value, r2.value, 8) # ld r10, [r2 + 8] + # found: + mc.ADD_rr(ip.value, lr.value, r0.value)# ADD ip, lr, r0 + mc.LDR_ri(ip.value, ip.value, WORD) # LDR ip, [ip + 4] - # restore the value of r2 from the stack - mc.ld(r2.value, r.SP.value, PARAM_SAVE_AREA_OFFSET) # ld r2, [sp + ..] + mc.POP([lr.value]) # POP {lr} ofs = _real_number(BCMOSTRECENT) - mc.std(r0.value, r2.value, ofs) # std r0, [r2 + bc_most_recent] - mc.std(r10.value, r2.value, ofs + WORD) # std r0, [r2 + bc_most_recent + 8] - mc.mtctr(r10.value) + mc.STR(r1.value, lr.value, ofs) # STR r1, [lr + bc_most_recent] + mc.STR(ip.value, lr.value, ofs + WORD) # STR ip, [lr + bc_most_recent + 4] - # restore the values of r7 and r10 from the jitframe - assembler._pop_core_regs_from_jitframe(mc, [r7, r10]) - - mc.bctr() # jump to the old r10 + mc.POP([r0.value, r1.value]) # POP {r0, r1} + mc.BX(ip.value) # BX ip # ---------- # not_found: pmc = OverwritingBuilder(mc, beq_location, 1) - pmc.beq(mc.currpos() - beq_location) # jump here if r10 < 8 + pmc.B(mc.currpos() - beq_location, cond.EQ) # jump here if r0 is now 0 pmc.overwrite() - # save all other registers to the jitframe SPP, in addition to - # r7 and r10 which have already been saved - assembler._push_core_regs_to_jitframe(mc, MANAGED_REGS_WITHOUT_R7_AND_R10) - assembler._push_fp_regs_to_jitframe(mc) + # save all registers to the jitframe, expect r0 and r1 + assembler._push_all_regs_to_jitframe(mc, [r0, r1], withfloats=True) - # arg #1 (r3): the BACKEND_CHOICES objects, from the original value of r2 - # arg #2 (r4): the actual value of the guard, from r0 - # arg #3 (r5): the jitframe - mc.ld(r3.value, r.SP.value, PARAM_SAVE_AREA_OFFSET) # ld r3, [sp + ..] - mc.mr(r4.value, r0.value) - mc.mr(r5.value, r.SPP.value) + # pop the three values from the stack: + # r2 = saved value originally in r0 + # r3 = saved value originally in r1 + # lr = BACKEND_CHOICES object + mc.POP([r2.value, r3.value, lr.value]) + + # save r2 and r3 into the jitframe, at locations for r0 and r1 + assert r.all_regs[0] is r0 + assert r.all_regs[1] is r1 + base_ofs = assembler.cpu.get_baseofs_of_frame_field() + assembler.store_reg(mc, r2, r.fp, base_ofs + 0 * WORD) + assembler.store_reg(mc, r3, r.fp, base_ofs + 1 * WORD) + + # arg #1 (r0): the BACKEND_CHOICES objects, from the original value of lr + # arg #2 (r1): the actual value of the guard, already in r1 + # arg #3 (r2): the jitframe + mc.MOV_rr(r0.value, lr.value) + mc.MOV_rr(r2.value, r.fp.value) invoke_find_compatible = make_invoke_find_compatible(assembler.cpu) llfunc = llhelper(INVOKE_FIND_COMPATIBLE_FUNC, invoke_find_compatible) llfunc = assembler.cpu.cast_ptr_to_int(llfunc) - mc.load_imm(mc.RAW_CALL_REG, llfunc) - mc.raw_call() # mtctr / bctrl + mc.BL(llfunc) assembler._reload_frame_if_necessary(mc) - mc.mtctr(r3.value) # mtctr r3 + mc.MOV_rr(lr.value, r0.value) # restore the registers that the CALL has clobbered, plus the ones # containing GC pointers that may have moved. That means we just # restore them all. - assembler._pop_core_regs_from_jitframe(mc) - assembler._pop_fp_regs_from_jitframe(mc) + assembler._pop_all_regs_from_jitframe(mc, [], withfloats=True) - mc.bctr() # jump to the old r3 + mc.BX(lr.value) # jump to the return value above assembler.guard_compat_search_tree = mc.materialize(assembler.cpu, []) - #print hex(assembler.guard_compat_search_tree) - #raw_input('press enter...') + print hex(assembler.guard_compat_search_tree) + raw_input('press enter...') def generate_guard_compatible(assembler, guard_token, l0, bindex): mc = assembler.mc - r0 = r.SCRATCH - r2 = r.SCRATCH2 + ip = r.ip + lr = r.lr + r4 = r.r4 - assembler._load_from_gc_table(r2, r2, bindex) # ld r2, [gc tbl at bindex] + assembler.load_from_gc_table(lr.value, bindex) # LDR lr, [gctbl at bindex] ofs = _real_number(BCMOSTRECENT) - mc.ld(r0.value, r2.value, ofs) # ld r0, [r2 + bc_most_recent] - mc.cmp_op(0, l0.value, r0.value) # cmp l0, r0 + mc.LDR_ri(ip.value, lr.value, ofs) # LDR ip, [lr + bc_most_recent] + mc.CMP_rr(l0.value, ip.value) # CMP l0, ip - bne_location = mc.get_relative_pos() - mc.trap() # patched later to a 'bc' + mc.LDR_ri(ip.value, lr.value, # LDR.EQ ip, [lr + most_recent + 8] + ofs + WORD, cond=c.EQ) + mc.BR(ip.value, cond=c.EQ) # BR.EQ ip - mc.ld(r2.value, r2.value, ofs + WORD) # ld r2, [r2 + bc_most_recent + 8] - mc.mtctr(r2.value) - mc.bctr() # jump to r2 - - # slowpath: - pmc = OverwritingBuilder(mc, bne_location, 1) - pmc.bne(mc.currpos() - bne_location) # jump here if l0 != r0 - pmc.overwrite() - - mc.load_imm(r0, assembler.guard_compat_search_tree) - mc.mtctr(r0.value) - mc.mr(r0.value, l0.value) - mc.bctr() + mc.PUSH([r0.value, r1.value, lr.value]) # PUSH {r0, r1, lr} + mc.MOV_rr(r1.value, l0.value) # MOV r1, l0 + mc.BL(assembler.guard_compat_search_tree) # MOVW/MOVT ip, BLX ip # abuse this field to store the 'sequel' relative offset guard_token.pos_jump_offset = mc.get_relative_pos() diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py --- a/rpython/jit/backend/arm/opassembler.py +++ b/rpython/jit/backend/arm/opassembler.py @@ -17,6 +17,7 @@ from rpython.jit.backend.arm.jump import remap_frame_layout from rpython.jit.backend.arm.regalloc import TempVar from rpython.jit.backend.arm.locations import imm, RawSPStackLocation +from rpython.jit.backend.arm import guard_compat from rpython.jit.backend.llsupport import symbolic from rpython.jit.backend.llsupport.gcmap import allocate_gcmap from rpython.jit.backend.llsupport.assembler import GuardToken, BaseAssembler @@ -190,8 +191,9 @@ fcond=fcond) return token - def _emit_guard(self, op, arglocs, is_guard_not_invalidated=False): - if is_guard_not_invalidated: + def _emit_guard(self, op, arglocs, is_guard_not_invalidated=False, + is_guard_compatible=False): + if is_guard_not_invalidated or is_guard_compatible: fcond = c.cond_none else: fcond = self.guard_success_cc @@ -204,20 +206,22 @@ # For all guards that are not GUARD_NOT_INVALIDATED we emit a # breakpoint to ensure the location is patched correctly. In the case # of GUARD_NOT_INVALIDATED we use just a NOP, because it is only - # eventually patched at a later point. - if is_guard_not_invalidated: - self.mc.NOP() - else: - self.mc.BKPT() - return c.AL + # eventually patched at a later point. For GUARD_COMPATIBLE, we + # use a completely different mechanism. + if not is_guard_compatible: + if is_guard_not_invalidated: + self.mc.NOP() + else: + self.mc.BKPT() + return token def emit_op_guard_true(self, op, arglocs, regalloc, fcond): - fcond = self._emit_guard(op, arglocs) + self._emit_guard(op, arglocs) return fcond def emit_op_guard_false(self, op, arglocs, regalloc, fcond): self.guard_success_cc = c.get_opposite_of(self.guard_success_cc) - fcond = self._emit_guard(op, arglocs) + self._emit_guard(op, arglocs) return fcond def emit_op_guard_value(self, op, arglocs, regalloc, fcond): @@ -235,9 +239,16 @@ self.mc.VCMP(l0.value, l1.value) self.mc.VMRS(cond=fcond) self.guard_success_cc = c.EQ - fcond = self._emit_guard(op, failargs) + self._emit_guard(op, failargs) return fcond + def emit_op_guard_compatible(self, op, arglocs, regalloc, fcond): + l0 = arglocs[0] + assert l0.is_core_reg() + bindex = op.getarg(1).getint() + token = self._emit_guard(op, arglocs[1:], is_guard_compatible=True) + guard_compat.generate_guard_compatible(self, token, l0, bindex) + emit_op_guard_nonnull = emit_op_guard_true emit_op_guard_isnull = emit_op_guard_false @@ -348,7 +359,8 @@ return fcond def emit_op_guard_not_invalidated(self, op, locs, regalloc, fcond): - return self._emit_guard(op, locs, is_guard_not_invalidated=True) + self._emit_guard(op, locs, is_guard_not_invalidated=True) + return fcond def emit_op_label(self, op, arglocs, regalloc, fcond): self._check_frame_depth_debug(self.mc) @@ -487,7 +499,7 @@ self.mc.LDR_ri(loc.value, loc.value) self.mc.CMP_ri(loc.value, 0) self.guard_success_cc = c.EQ - fcond = self._emit_guard(op, failargs) + self._emit_guard(op, failargs) # If the previous operation was a COND_CALL, overwrite its conditional # jump to jump over this GUARD_NO_EXCEPTION as well, if we can if self._find_nearby_operation(-1).getopnum() == rop.COND_CALL: diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py --- a/rpython/jit/backend/arm/regalloc.py +++ b/rpython/jit/backend/arm/regalloc.py @@ -665,7 +665,18 @@ else: l1 = self.convert_to_imm(a1) arglocs = self._prepare_guard(op, [l0, l1]) - self.possibly_free_vars(op.getarglist()) + self.possibly_free_vars(boxes) + self.possibly_free_vars(op.getfailargs()) + return arglocs + + def prepare_op_guard_compatible(self, op, fcond): + op.getdescr().make_a_counter_per_value(op, -1) # -1 not used here + args = op.getarglist() + assert args[0].type == REF # only supported case for now + assert isinstance(args[1], ConstInt) # by rewrite.py + x = self.make_sure_var_in_reg(args[0], args) + arglocs = self._prepare_guard(op, [x]) + self.possibly_free_vars(args) self.possibly_free_vars(op.getfailargs()) return arglocs diff --git a/rpython/jit/backend/arm/registers.py b/rpython/jit/backend/arm/registers.py --- a/rpython/jit/backend/arm/registers.py +++ b/rpython/jit/backend/arm/registers.py @@ -18,9 +18,9 @@ # aliases for registers fp = r11 -ip = r12 +ip = r12 # ip is used as a general scratch register sp = r13 -lr = r14 +lr = r14 # lr is used as a general scratch register pc = r15 vfp_ip = d15 svfp_ip = s31 diff --git a/rpython/jit/backend/ppc/test/test_compatible.py b/rpython/jit/backend/arm/test/test_compatible.py copy from rpython/jit/backend/ppc/test/test_compatible.py copy to rpython/jit/backend/arm/test/test_compatible.py --- a/rpython/jit/backend/ppc/test/test_compatible.py +++ b/rpython/jit/backend/arm/test/test_compatible.py @@ -1,6 +1,6 @@ -from rpython.jit.backend.ppc.test.support import JitPPCMixin +from rpython.jit.backend.arm.test.support import JitARMMixin from rpython.jit.metainterp.test import test_compatible -class TestCompatible(JitPPCMixin, test_compatible.TestCompatible): +class TestCompatible(JitARMMixin, test_compatible.TestCompatible): pass _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit