Author: Armin Rigo <[email protected]>
Branch: guard-compatible
Changeset: r84685:14806afb0d3d
Date: 2016-05-25 15:59 +0200
http://bitbucket.org/pypy/pypy/changeset/14806afb0d3d/
Log: arm: first draft
diff --git a/rpython/jit/backend/arm/assembler.py
b/rpython/jit/backend/arm/assembler.py
--- a/rpython/jit/backend/arm/assembler.py
+++ b/rpython/jit/backend/arm/assembler.py
@@ -28,6 +28,7 @@
from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref
from rpython.rtyper.lltypesystem import lltype, rffi
from rpython.jit.backend.arm import callbuilder
+from rpython.jit.backend.arm import guard_compat
from rpython.rtyper.lltypesystem.lloperation import llop
class AssemblerARM(ResOpAssembler):
@@ -479,6 +480,9 @@
rawstart = mc.materialize(self.cpu, [])
self.failure_recovery_code[exc + 2 * withfloats] = rawstart
+ def _build_guard_compat_slowpath(self):
+ guard_compat.build_once(self)
+
def generate_quick_failure(self, guardtok):
startpos = self.mc.currpos()
faildescrindex, target = self.store_info_on_descr(startpos, guardtok)
@@ -759,11 +763,16 @@
def patch_gcref_table(self, looptoken, rawstart):
# the gc table is at the start of the machine code. Fill it now
+ self.gc_table_addr = rawstart
tracer = self.cpu.gc_ll_descr.make_gcref_tracer(rawstart,
self._allgcrefs)
gcreftracers = self.get_asmmemmgr_gcreftracers(looptoken)
gcreftracers.append(tracer) # keepalive
self.teardown_gcrefs_list()
+ self.gc_table_tracer = tracer
+
+ def _addr_from_gc_table(self, index):
+ return self.gc_table_addr + index * WORD
def load_from_gc_table(self, regnum, index):
"""emits either:
@@ -985,6 +994,13 @@
assert isinstance(descr, AbstractFailDescr)
failure_recovery_pos = block_start + tok.pos_recovery_stub
descr.adr_jump_offset = failure_recovery_pos
+ #
+ if tok.guard_compatible():
+ guard_compat.patch_guard_compatible(tok, block_start,
+ self._addr_from_gc_table,
+ self.gc_table_tracer)
+ continue
+ #
relative_offset = tok.pos_recovery_stub - tok.offset
guard_pos = block_start + tok.offset
if not tok.guard_not_invalidated():
@@ -1037,6 +1053,9 @@
return fcond
def patch_trace(self, faildescr, looptoken, bridge_addr, regalloc):
+ if isinstance(faildescr, guard_compat.GuardCompatibleDescr):
+ guard_compat.invalidate_cache(faildescr)
+ return
b = InstrBuilder(self.cpu.cpuinfo.arch_version)
patch_addr = faildescr.adr_jump_offset
assert patch_addr != 0
diff --git a/rpython/jit/backend/ppc/guard_compat.py
b/rpython/jit/backend/arm/guard_compat.py
copy from rpython/jit/backend/ppc/guard_compat.py
copy to rpython/jit/backend/arm/guard_compat.py
--- a/rpython/jit/backend/ppc/guard_compat.py
+++ b/rpython/jit/backend/arm/guard_compat.py
@@ -1,8 +1,6 @@
from rpython.rtyper.annlowlevel import llhelper
-import rpython.jit.backend.ppc.register as r
-from rpython.jit.backend.ppc.arch import WORD, PARAM_SAVE_AREA_OFFSET
-from rpython.jit.backend.ppc.codebuilder import PPCBuilder, OverwritingBuilder
-
+from rpython.jit.backend.arm import conditions as c
+from rpython.jit.backend.arm import registers as r
from rpython.jit.backend.llsupport.guard_compat import *
from rpython.jit.backend.llsupport.guard_compat import _real_number
@@ -10,146 +8,129 @@
# See comments in ../x86/guard_compat.py.
-MANAGED_REGS_WITHOUT_R7_AND_R10 = list(r.MANAGED_REGS)
-MANAGED_REGS_WITHOUT_R7_AND_R10.remove(r.r7)
-MANAGED_REGS_WITHOUT_R7_AND_R10.remove(r.r10)
-
-
def build_once(assembler):
"""Generate the 'search_tree' block of code"""
- # called with r2 containing the BACKEND_CHOICES object,
- # and r0 containing the actual value of the guard
+ # Called with lr containing the BACKEND_CHOICES object, and r1
+ # containing the actual value of the guard. The old value of r1
+ # is pushed on the stack. Additionally, r0 and lr are already
+ # pushed on the stack as well (the same values as the one passed
+ # in).
mc = PPCBuilder()
- r0 = r.SCRATCH
- r2 = r.SCRATCH2
- r3 = r.r3
- r4 = r.r4
- r5 = r.r5
- r7 = r.r7
- r10 = r.r10
-
- # save the values of r7 and r10 in the jitframe
- assembler._push_core_regs_to_jitframe(mc, [r7, r10])
-
- # save the original value of r2 for later
- mc.std(r2.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)
+ r0 = r.r0
+ r1 = r.r1
+ lr = r.lr
+ ip = r.ip
ofs1 = _real_number(BCLIST + BCLISTLENGTHOFS)
ofs2 = _real_number(BCLIST + BCLISTITEMSOFS)
- assert ofs2 - 8 == ofs1
- mc.ldu(r10.value, r2.value, ofs1) # ldu r10, [r2 + bc_list.length]
- mc.sldi(r10.value, r10.value, 3) # sldi r10, r10, 3
- # in the sequel, "r2 + 8" is a pointer to the leftmost array item of
+ mc.LDR_ri(r0.value, lr.value, ofs1) # LDR r0, [lr + bc_list.length]
+ mc.ADD_ri(lr.value, lr.value, imm=ofs2 - WORD) # ADD lr, lr, $items - 4
+ # ^^^ NB. this could be done with a single LDR in "pre-indexed" mode
+ mc.LSL_ri(r0.value, r0.value, 2) # LSL r0, r0, 2
+ # in the sequel, "lr + 4" is a pointer to the leftmost array item of
# the range still under consideration. The length of this range,
- # which is always a power-of-two-minus-1, is equal to "r10 / 8".
- b_location = mc.get_relative_pos()
- mc.trap() # b loop
+ # which is always a power-of-two-minus-1, is equal to "r0 / 4".
+ b_location = mc.currpos()
+ mc.BKPT() # B loop
right_label = mc.get_relative_pos()
- mc.add(r2.value, r2.value, r10.value) # add r2, r2, r10
- mc.addi(r2.value, r2.value, WORD) # addi r2, r2, 8
+ mc.ADD_rr(lr.value, lr.value, r0.value)# ADD lr, lr, r0
+ mc.ADD_ri(lr.value, lr.value, WORD) # ADD lr, lr, 4
left_label = mc.get_relative_pos()
- mc.rldicrx(r10.value, r10.value, 63, 60)# rldicrx r10, r10, 63, 60
- # ^^ note: this does r10 = (r10 >> 1) & ~7, and sets the "EQ" flag
- # if the result is equal to zero
+ mc.LSR_ri(r0.value, r0.value, 1) # LSR r0, r0, 1
+ mc.SUBS_ri(r0.value, r0.value, 4) # SUBS r0, r0, 4
beq_location = mc.get_relative_pos()
- mc.trap() # beq not_found
+ mc.trap() # BEQ not_found
# loop:
- pmc = OverwritingBuilder(mc, b_location, 1)
- pmc.b(mc.currpos() - b_location) # jump here unconditionally
- pmc.overwrite()
- mc.ldx(r7.value, r2.value, r10.value) # ldx r7, [r2 + r10]
- mc.cmp_op(0, r0.value, r7.value,
- signed=False) # cmp r0, r7
- mc.bgt(right_label - mc.currpos()) # bgt right_label
- mc.bne(left_label - mc.currpos()) # bne left_label
+ pmc = OverwritingBuilder(mc, b_location, WORD)
+ pmc.B_offs(mc.currpos(), c.AL)
+ mc.LDR_rr(ip.value, lr.value, r0.value)# LDR ip, [lr + r0]
+ mc.CMP_rr(r1.value, ip.value) # CMP r1, ip
+ mc.B_offs(right_label - mc.currpos(), c.GT) # BGT right_label
+ mc.B_offs(left_label - mc.currpos(), c.NE) # BNE left_label
- # found:
- mc.add(r2.value, r2.value, r10.value) # add r2, r2, r10
- mc.ld(r10.value, r2.value, 8) # ld r10, [r2 + 8]
+ # found:
+ mc.ADD_rr(ip.value, lr.value, r0.value)# ADD ip, lr, r0
+ mc.LDR_ri(ip.value, ip.value, WORD) # LDR ip, [ip + 4]
- # restore the value of r2 from the stack
- mc.ld(r2.value, r.SP.value, PARAM_SAVE_AREA_OFFSET) # ld r2, [sp + ..]
+ mc.POP([lr.value]) # POP {lr}
ofs = _real_number(BCMOSTRECENT)
- mc.std(r0.value, r2.value, ofs) # std r0, [r2 + bc_most_recent]
- mc.std(r10.value, r2.value, ofs + WORD) # std r0, [r2 + bc_most_recent + 8]
- mc.mtctr(r10.value)
+ mc.STR(r1.value, lr.value, ofs) # STR r1, [lr + bc_most_recent]
+ mc.STR(ip.value, lr.value, ofs + WORD) # STR ip, [lr + bc_most_recent + 4]
- # restore the values of r7 and r10 from the jitframe
- assembler._pop_core_regs_from_jitframe(mc, [r7, r10])
-
- mc.bctr() # jump to the old r10
+ mc.POP([r0.value, r1.value]) # POP {r0, r1}
+ mc.BX(ip.value) # BX ip
# ----------
# not_found:
pmc = OverwritingBuilder(mc, beq_location, 1)
- pmc.beq(mc.currpos() - beq_location) # jump here if r10 < 8
+ pmc.B(mc.currpos() - beq_location, cond.EQ) # jump here if r0 is now 0
pmc.overwrite()
- # save all other registers to the jitframe SPP, in addition to
- # r7 and r10 which have already been saved
- assembler._push_core_regs_to_jitframe(mc, MANAGED_REGS_WITHOUT_R7_AND_R10)
- assembler._push_fp_regs_to_jitframe(mc)
+ # save all registers to the jitframe, expect r0 and r1
+ assembler._push_all_regs_to_jitframe(mc, [r0, r1], withfloats=True)
- # arg #1 (r3): the BACKEND_CHOICES objects, from the original value of r2
- # arg #2 (r4): the actual value of the guard, from r0
- # arg #3 (r5): the jitframe
- mc.ld(r3.value, r.SP.value, PARAM_SAVE_AREA_OFFSET) # ld r3, [sp + ..]
- mc.mr(r4.value, r0.value)
- mc.mr(r5.value, r.SPP.value)
+ # pop the three values from the stack:
+ # r2 = saved value originally in r0
+ # r3 = saved value originally in r1
+ # lr = BACKEND_CHOICES object
+ mc.POP([r2.value, r3.value, lr.value])
+
+ # save r2 and r3 into the jitframe, at locations for r0 and r1
+ assert r.all_regs[0] is r0
+ assert r.all_regs[1] is r1
+ base_ofs = assembler.cpu.get_baseofs_of_frame_field()
+ assembler.store_reg(mc, r2, r.fp, base_ofs + 0 * WORD)
+ assembler.store_reg(mc, r3, r.fp, base_ofs + 1 * WORD)
+
+ # arg #1 (r0): the BACKEND_CHOICES objects, from the original value of lr
+ # arg #2 (r1): the actual value of the guard, already in r1
+ # arg #3 (r2): the jitframe
+ mc.MOV_rr(r0.value, lr.value)
+ mc.MOV_rr(r2.value, r.fp.value)
invoke_find_compatible = make_invoke_find_compatible(assembler.cpu)
llfunc = llhelper(INVOKE_FIND_COMPATIBLE_FUNC, invoke_find_compatible)
llfunc = assembler.cpu.cast_ptr_to_int(llfunc)
- mc.load_imm(mc.RAW_CALL_REG, llfunc)
- mc.raw_call() # mtctr / bctrl
+ mc.BL(llfunc)
assembler._reload_frame_if_necessary(mc)
- mc.mtctr(r3.value) # mtctr r3
+ mc.MOV_rr(lr.value, r0.value)
# restore the registers that the CALL has clobbered, plus the ones
# containing GC pointers that may have moved. That means we just
# restore them all.
- assembler._pop_core_regs_from_jitframe(mc)
- assembler._pop_fp_regs_from_jitframe(mc)
+ assembler._pop_all_regs_from_jitframe(mc, [], withfloats=True)
- mc.bctr() # jump to the old r3
+ mc.BX(lr.value) # jump to the return value above
assembler.guard_compat_search_tree = mc.materialize(assembler.cpu, [])
- #print hex(assembler.guard_compat_search_tree)
- #raw_input('press enter...')
+ print hex(assembler.guard_compat_search_tree)
+ raw_input('press enter...')
def generate_guard_compatible(assembler, guard_token, l0, bindex):
mc = assembler.mc
- r0 = r.SCRATCH
- r2 = r.SCRATCH2
+ ip = r.ip
+ lr = r.lr
+ r4 = r.r4
- assembler._load_from_gc_table(r2, r2, bindex) # ld r2, [gc tbl at bindex]
+ assembler.load_from_gc_table(lr.value, bindex) # LDR lr, [gctbl at bindex]
ofs = _real_number(BCMOSTRECENT)
- mc.ld(r0.value, r2.value, ofs) # ld r0, [r2 + bc_most_recent]
- mc.cmp_op(0, l0.value, r0.value) # cmp l0, r0
+ mc.LDR_ri(ip.value, lr.value, ofs) # LDR ip, [lr + bc_most_recent]
+ mc.CMP_rr(l0.value, ip.value) # CMP l0, ip
- bne_location = mc.get_relative_pos()
- mc.trap() # patched later to a 'bc'
+ mc.LDR_ri(ip.value, lr.value, # LDR.EQ ip, [lr + most_recent + 8]
+ ofs + WORD, cond=c.EQ)
+ mc.BR(ip.value, cond=c.EQ) # BR.EQ ip
- mc.ld(r2.value, r2.value, ofs + WORD) # ld r2, [r2 + bc_most_recent + 8]
- mc.mtctr(r2.value)
- mc.bctr() # jump to r2
-
- # slowpath:
- pmc = OverwritingBuilder(mc, bne_location, 1)
- pmc.bne(mc.currpos() - bne_location) # jump here if l0 != r0
- pmc.overwrite()
-
- mc.load_imm(r0, assembler.guard_compat_search_tree)
- mc.mtctr(r0.value)
- mc.mr(r0.value, l0.value)
- mc.bctr()
+ mc.PUSH([r0.value, r1.value, lr.value]) # PUSH {r0, r1, lr}
+ mc.MOV_rr(r1.value, l0.value) # MOV r1, l0
+ mc.BL(assembler.guard_compat_search_tree) # MOVW/MOVT ip, BLX ip
# abuse this field to store the 'sequel' relative offset
guard_token.pos_jump_offset = mc.get_relative_pos()
diff --git a/rpython/jit/backend/arm/opassembler.py
b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -17,6 +17,7 @@
from rpython.jit.backend.arm.jump import remap_frame_layout
from rpython.jit.backend.arm.regalloc import TempVar
from rpython.jit.backend.arm.locations import imm, RawSPStackLocation
+from rpython.jit.backend.arm import guard_compat
from rpython.jit.backend.llsupport import symbolic
from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
from rpython.jit.backend.llsupport.assembler import GuardToken, BaseAssembler
@@ -190,8 +191,9 @@
fcond=fcond)
return token
- def _emit_guard(self, op, arglocs, is_guard_not_invalidated=False):
- if is_guard_not_invalidated:
+ def _emit_guard(self, op, arglocs, is_guard_not_invalidated=False,
+ is_guard_compatible=False):
+ if is_guard_not_invalidated or is_guard_compatible:
fcond = c.cond_none
else:
fcond = self.guard_success_cc
@@ -204,20 +206,22 @@
# For all guards that are not GUARD_NOT_INVALIDATED we emit a
# breakpoint to ensure the location is patched correctly. In the case
# of GUARD_NOT_INVALIDATED we use just a NOP, because it is only
- # eventually patched at a later point.
- if is_guard_not_invalidated:
- self.mc.NOP()
- else:
- self.mc.BKPT()
- return c.AL
+ # eventually patched at a later point. For GUARD_COMPATIBLE, we
+ # use a completely different mechanism.
+ if not is_guard_compatible:
+ if is_guard_not_invalidated:
+ self.mc.NOP()
+ else:
+ self.mc.BKPT()
+ return token
def emit_op_guard_true(self, op, arglocs, regalloc, fcond):
- fcond = self._emit_guard(op, arglocs)
+ self._emit_guard(op, arglocs)
return fcond
def emit_op_guard_false(self, op, arglocs, regalloc, fcond):
self.guard_success_cc = c.get_opposite_of(self.guard_success_cc)
- fcond = self._emit_guard(op, arglocs)
+ self._emit_guard(op, arglocs)
return fcond
def emit_op_guard_value(self, op, arglocs, regalloc, fcond):
@@ -235,9 +239,16 @@
self.mc.VCMP(l0.value, l1.value)
self.mc.VMRS(cond=fcond)
self.guard_success_cc = c.EQ
- fcond = self._emit_guard(op, failargs)
+ self._emit_guard(op, failargs)
return fcond
+ def emit_op_guard_compatible(self, op, arglocs, regalloc, fcond):
+ l0 = arglocs[0]
+ assert l0.is_core_reg()
+ bindex = op.getarg(1).getint()
+ token = self._emit_guard(op, arglocs[1:], is_guard_compatible=True)
+ guard_compat.generate_guard_compatible(self, token, l0, bindex)
+
emit_op_guard_nonnull = emit_op_guard_true
emit_op_guard_isnull = emit_op_guard_false
@@ -348,7 +359,8 @@
return fcond
def emit_op_guard_not_invalidated(self, op, locs, regalloc, fcond):
- return self._emit_guard(op, locs, is_guard_not_invalidated=True)
+ self._emit_guard(op, locs, is_guard_not_invalidated=True)
+ return fcond
def emit_op_label(self, op, arglocs, regalloc, fcond):
self._check_frame_depth_debug(self.mc)
@@ -487,7 +499,7 @@
self.mc.LDR_ri(loc.value, loc.value)
self.mc.CMP_ri(loc.value, 0)
self.guard_success_cc = c.EQ
- fcond = self._emit_guard(op, failargs)
+ self._emit_guard(op, failargs)
# If the previous operation was a COND_CALL, overwrite its conditional
# jump to jump over this GUARD_NO_EXCEPTION as well, if we can
if self._find_nearby_operation(-1).getopnum() == rop.COND_CALL:
diff --git a/rpython/jit/backend/arm/regalloc.py
b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -665,7 +665,18 @@
else:
l1 = self.convert_to_imm(a1)
arglocs = self._prepare_guard(op, [l0, l1])
- self.possibly_free_vars(op.getarglist())
+ self.possibly_free_vars(boxes)
+ self.possibly_free_vars(op.getfailargs())
+ return arglocs
+
+ def prepare_op_guard_compatible(self, op, fcond):
+ op.getdescr().make_a_counter_per_value(op, -1) # -1 not used here
+ args = op.getarglist()
+ assert args[0].type == REF # only supported case for now
+ assert isinstance(args[1], ConstInt) # by rewrite.py
+ x = self.make_sure_var_in_reg(args[0], args)
+ arglocs = self._prepare_guard(op, [x])
+ self.possibly_free_vars(args)
self.possibly_free_vars(op.getfailargs())
return arglocs
diff --git a/rpython/jit/backend/arm/registers.py
b/rpython/jit/backend/arm/registers.py
--- a/rpython/jit/backend/arm/registers.py
+++ b/rpython/jit/backend/arm/registers.py
@@ -18,9 +18,9 @@
# aliases for registers
fp = r11
-ip = r12
+ip = r12 # ip is used as a general scratch register
sp = r13
-lr = r14
+lr = r14 # lr is used as a general scratch register
pc = r15
vfp_ip = d15
svfp_ip = s31
diff --git a/rpython/jit/backend/ppc/test/test_compatible.py
b/rpython/jit/backend/arm/test/test_compatible.py
copy from rpython/jit/backend/ppc/test/test_compatible.py
copy to rpython/jit/backend/arm/test/test_compatible.py
--- a/rpython/jit/backend/ppc/test/test_compatible.py
+++ b/rpython/jit/backend/arm/test/test_compatible.py
@@ -1,6 +1,6 @@
-from rpython.jit.backend.ppc.test.support import JitPPCMixin
+from rpython.jit.backend.arm.test.support import JitARMMixin
from rpython.jit.metainterp.test import test_compatible
-class TestCompatible(JitPPCMixin, test_compatible.TestCompatible):
+class TestCompatible(JitARMMixin, test_compatible.TestCompatible):
pass
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit