[pypy-commit] pypy guard-compatible: arm: first draft

arigo Wed, 25 May 2016 07:07:42 -0700

Author: Armin Rigo <ar...@tunes.org>
Branch: guard-compatible
Changeset: r84685:14806afb0d3d
Date: 2016-05-25 15:59 +0200
http://bitbucket.org/pypy/pypy/changeset/14806afb0d3d/


Log:    arm: first draft

diff --git a/rpython/jit/backend/arm/assembler.py 
b/rpython/jit/backend/arm/assembler.py
--- a/rpython/jit/backend/arm/assembler.py
+++ b/rpython/jit/backend/arm/assembler.py
@@ -28,6 +28,7 @@
 from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.jit.backend.arm import callbuilder
+from rpython.jit.backend.arm import guard_compat
 from rpython.rtyper.lltypesystem.lloperation import llop
 
 class AssemblerARM(ResOpAssembler):
@@ -479,6 +480,9 @@
         rawstart = mc.materialize(self.cpu, [])
         self.failure_recovery_code[exc + 2 * withfloats] = rawstart
 
+    def _build_guard_compat_slowpath(self):
+        guard_compat.build_once(self)
+
     def generate_quick_failure(self, guardtok):
         startpos = self.mc.currpos()
         faildescrindex, target = self.store_info_on_descr(startpos, guardtok)
@@ -759,11 +763,16 @@
 
     def patch_gcref_table(self, looptoken, rawstart):
         # the gc table is at the start of the machine code.  Fill it now
+        self.gc_table_addr = rawstart
         tracer = self.cpu.gc_ll_descr.make_gcref_tracer(rawstart,
                                                         self._allgcrefs)
         gcreftracers = self.get_asmmemmgr_gcreftracers(looptoken)
         gcreftracers.append(tracer)    # keepalive
         self.teardown_gcrefs_list()
+        self.gc_table_tracer = tracer
+
+    def _addr_from_gc_table(self, index):
+        return self.gc_table_addr + index * WORD
 
     def load_from_gc_table(self, regnum, index):
         """emits either:
@@ -985,6 +994,13 @@
             assert isinstance(descr, AbstractFailDescr)
             failure_recovery_pos = block_start + tok.pos_recovery_stub
             descr.adr_jump_offset = failure_recovery_pos
+            #
+            if tok.guard_compatible():
+                guard_compat.patch_guard_compatible(tok, block_start,
+                                                    self._addr_from_gc_table,
+                                                    self.gc_table_tracer)
+                continue
+            #
             relative_offset = tok.pos_recovery_stub - tok.offset
             guard_pos = block_start + tok.offset
             if not tok.guard_not_invalidated():
@@ -1037,6 +1053,9 @@
         return fcond
 
     def patch_trace(self, faildescr, looptoken, bridge_addr, regalloc):
+        if isinstance(faildescr, guard_compat.GuardCompatibleDescr):
+            guard_compat.invalidate_cache(faildescr)
+            return
         b = InstrBuilder(self.cpu.cpuinfo.arch_version)
         patch_addr = faildescr.adr_jump_offset
         assert patch_addr != 0
diff --git a/rpython/jit/backend/ppc/guard_compat.py 
b/rpython/jit/backend/arm/guard_compat.py
copy from rpython/jit/backend/ppc/guard_compat.py
copy to rpython/jit/backend/arm/guard_compat.py
--- a/rpython/jit/backend/ppc/guard_compat.py
+++ b/rpython/jit/backend/arm/guard_compat.py
@@ -1,8 +1,6 @@
 from rpython.rtyper.annlowlevel import llhelper
-import rpython.jit.backend.ppc.register as r
-from rpython.jit.backend.ppc.arch import WORD, PARAM_SAVE_AREA_OFFSET
-from rpython.jit.backend.ppc.codebuilder import PPCBuilder, OverwritingBuilder
-
+from rpython.jit.backend.arm import conditions as c
+from rpython.jit.backend.arm import registers as r
 from rpython.jit.backend.llsupport.guard_compat import *
 from rpython.jit.backend.llsupport.guard_compat import _real_number
 
@@ -10,146 +8,129 @@
 # See comments in ../x86/guard_compat.py.
 
 
-MANAGED_REGS_WITHOUT_R7_AND_R10 = list(r.MANAGED_REGS)
-MANAGED_REGS_WITHOUT_R7_AND_R10.remove(r.r7)
-MANAGED_REGS_WITHOUT_R7_AND_R10.remove(r.r10)
-
-
 def build_once(assembler):
     """Generate the 'search_tree' block of code"""
-    # called with r2 containing the BACKEND_CHOICES object,
-    # and r0 containing the actual value of the guard
+    # Called with lr containing the BACKEND_CHOICES object, and r1
+    # containing the actual value of the guard.  The old value of r1
+    # is pushed on the stack.  Additionally, r0 and lr are already
+    # pushed on the stack as well (the same values as the one passed
+    # in).
 
     mc = PPCBuilder()
-    r0 = r.SCRATCH
-    r2 = r.SCRATCH2
-    r3 = r.r3
-    r4 = r.r4
-    r5 = r.r5
-    r7 = r.r7
-    r10 = r.r10
-
-    # save the values of r7 and r10 in the jitframe
-    assembler._push_core_regs_to_jitframe(mc, [r7, r10])
-
-    # save the original value of r2 for later
-    mc.std(r2.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)
+    r0 = r.r0
+    r1 = r.r1
+    lr = r.lr
+    ip = r.ip
 
     ofs1 = _real_number(BCLIST + BCLISTLENGTHOFS)
     ofs2 = _real_number(BCLIST + BCLISTITEMSOFS)
-    assert ofs2 - 8 == ofs1
-    mc.ldu(r10.value, r2.value, ofs1)       # ldu  r10, [r2 + bc_list.length]
-    mc.sldi(r10.value, r10.value, 3)        # sldi r10, r10, 3
-    # in the sequel, "r2 + 8" is a pointer to the leftmost array item of
+    mc.LDR_ri(r0.value, lr.value, ofs1)    # LDR r0, [lr + bc_list.length]
+    mc.ADD_ri(lr.value, lr.value, imm=ofs2 - WORD)   # ADD lr, lr, $items - 4
+    # ^^^ NB. this could be done with a single LDR in "pre-indexed" mode
+    mc.LSL_ri(r0.value, r0.value, 2)       # LSL r0, r0, 2
+    # in the sequel, "lr + 4" is a pointer to the leftmost array item of
     # the range still under consideration.  The length of this range,
-    # which is always a power-of-two-minus-1, is equal to "r10 / 8".
-    b_location = mc.get_relative_pos()
-    mc.trap()                               # b loop
+    # which is always a power-of-two-minus-1, is equal to "r0 / 4".
+    b_location = mc.currpos()
+    mc.BKPT()                              # B loop
 
     right_label = mc.get_relative_pos()
-    mc.add(r2.value, r2.value, r10.value)   # add r2, r2, r10
-    mc.addi(r2.value, r2.value, WORD)       # addi r2, r2, 8
+    mc.ADD_rr(lr.value, lr.value, r0.value)# ADD lr, lr, r0
+    mc.ADD_ri(lr.value, lr.value, WORD)    # ADD lr, lr, 4
     left_label = mc.get_relative_pos()
-    mc.rldicrx(r10.value, r10.value, 63, 60)# rldicrx r10, r10, 63, 60
-    # ^^ note: this does r10 = (r10 >> 1) & ~7, and sets the "EQ" flag
-    # if the result is equal to zero
+    mc.LSR_ri(r0.value, r0.value, 1)       # LSR r0, r0, 1
+    mc.SUBS_ri(r0.value, r0.value, 4)      # SUBS r0, r0, 4
     beq_location = mc.get_relative_pos()
-    mc.trap()                               # beq not_found
+    mc.trap()                              # BEQ not_found
     #                                     loop:
-    pmc = OverwritingBuilder(mc, b_location, 1)
-    pmc.b(mc.currpos() - b_location)        # jump here unconditionally
-    pmc.overwrite()
-    mc.ldx(r7.value, r2.value, r10.value)   # ldx r7, [r2 + r10]
-    mc.cmp_op(0, r0.value, r7.value,
-              signed=False)                 # cmp r0, r7
-    mc.bgt(right_label - mc.currpos())      # bgt right_label
-    mc.bne(left_label - mc.currpos())       # bne left_label
+    pmc = OverwritingBuilder(mc, b_location, WORD)
+    pmc.B_offs(mc.currpos(), c.AL)
+    mc.LDR_rr(ip.value, lr.value, r0.value)# LDR ip, [lr + r0]
+    mc.CMP_rr(r1.value, ip.value)          # CMP r1, ip
+    mc.B_offs(right_label - mc.currpos(), c.GT)  # BGT right_label
+    mc.B_offs(left_label - mc.currpos(), c.NE)   # BNE left_label
 
-    #                                     found:
-    mc.add(r2.value, r2.value, r10.value)   # add r2, r2, r10
-    mc.ld(r10.value, r2.value, 8)           # ld r10, [r2 + 8]
+    #                                  found:
+    mc.ADD_rr(ip.value, lr.value, r0.value)# ADD ip, lr, r0
+    mc.LDR_ri(ip.value, ip.value, WORD)    # LDR ip, [ip + 4]
 
-    # restore the value of r2 from the stack
-    mc.ld(r2.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)    # ld r2, [sp + ..]
+    mc.POP([lr.value])                     # POP {lr}
 
     ofs = _real_number(BCMOSTRECENT)
-    mc.std(r0.value, r2.value, ofs)         # std r0, [r2 + bc_most_recent]
-    mc.std(r10.value, r2.value, ofs + WORD) # std r0, [r2 + bc_most_recent + 8]
-    mc.mtctr(r10.value)
+    mc.STR(r1.value, lr.value, ofs)        # STR r1, [lr + bc_most_recent]
+    mc.STR(ip.value, lr.value, ofs + WORD) # STR ip, [lr + bc_most_recent + 4]
 
-    # restore the values of r7 and r10 from the jitframe
-    assembler._pop_core_regs_from_jitframe(mc, [r7, r10])
-
-    mc.bctr()                               # jump to the old r10
+    mc.POP([r0.value, r1.value])           # POP {r0, r1}
+    mc.BX(ip.value)                        # BX ip
 
     # ----------
 
     #                                     not_found:
     pmc = OverwritingBuilder(mc, beq_location, 1)
-    pmc.beq(mc.currpos() - beq_location)    # jump here if r10 < 8
+    pmc.B(mc.currpos() - beq_location, cond.EQ)    # jump here if r0 is now 0
     pmc.overwrite()
 
-    # save all other registers to the jitframe SPP, in addition to
-    # r7 and r10 which have already been saved
-    assembler._push_core_regs_to_jitframe(mc, MANAGED_REGS_WITHOUT_R7_AND_R10)
-    assembler._push_fp_regs_to_jitframe(mc)
+    # save all registers to the jitframe, expect r0 and r1
+    assembler._push_all_regs_to_jitframe(mc, [r0, r1], withfloats=True)
 
-    # arg #1 (r3): the BACKEND_CHOICES objects, from the original value of r2
-    # arg #2 (r4): the actual value of the guard, from r0
-    # arg #3 (r5): the jitframe
-    mc.ld(r3.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)    # ld r3, [sp + ..]
-    mc.mr(r4.value, r0.value)
-    mc.mr(r5.value, r.SPP.value)
+    # pop the three values from the stack:
+    #    r2 = saved value originally in r0
+    #    r3 = saved value originally in r1
+    #    lr = BACKEND_CHOICES object
+    mc.POP([r2.value, r3.value, lr.value])
+
+    # save r2 and r3 into the jitframe, at locations for r0 and r1
+    assert r.all_regs[0] is r0
+    assert r.all_regs[1] is r1
+    base_ofs = assembler.cpu.get_baseofs_of_frame_field()
+    assembler.store_reg(mc, r2, r.fp, base_ofs + 0 * WORD)
+    assembler.store_reg(mc, r3, r.fp, base_ofs + 1 * WORD)
+
+    # arg #1 (r0): the BACKEND_CHOICES objects, from the original value of lr
+    # arg #2 (r1): the actual value of the guard, already in r1
+    # arg #3 (r2): the jitframe
+    mc.MOV_rr(r0.value, lr.value)
+    mc.MOV_rr(r2.value, r.fp.value)
 
     invoke_find_compatible = make_invoke_find_compatible(assembler.cpu)
     llfunc = llhelper(INVOKE_FIND_COMPATIBLE_FUNC, invoke_find_compatible)
     llfunc = assembler.cpu.cast_ptr_to_int(llfunc)
-    mc.load_imm(mc.RAW_CALL_REG, llfunc)
-    mc.raw_call()                           # mtctr / bctrl
+    mc.BL(llfunc)
     assembler._reload_frame_if_necessary(mc)
-    mc.mtctr(r3.value)                      # mtctr r3
+    mc.MOV_rr(lr.value, r0.value)
 
     # restore the registers that the CALL has clobbered, plus the ones
     # containing GC pointers that may have moved.  That means we just
     # restore them all.
-    assembler._pop_core_regs_from_jitframe(mc)
-    assembler._pop_fp_regs_from_jitframe(mc)
+    assembler._pop_all_regs_from_jitframe(mc, [], withfloats=True)
 
-    mc.bctr()                               # jump to the old r3
+    mc.BX(lr.value)                  # jump to the return value above
 
     assembler.guard_compat_search_tree = mc.materialize(assembler.cpu, [])
 
-    #print hex(assembler.guard_compat_search_tree)
-    #raw_input('press enter...')
+    print hex(assembler.guard_compat_search_tree)
+    raw_input('press enter...')
 
 
 def generate_guard_compatible(assembler, guard_token, l0, bindex):
     mc = assembler.mc
-    r0 = r.SCRATCH
-    r2 = r.SCRATCH2
+    ip = r.ip
+    lr = r.lr
+    r4 = r.r4
 
-    assembler._load_from_gc_table(r2, r2, bindex)  # ld r2, [gc tbl at bindex]
+    assembler.load_from_gc_table(lr.value, bindex)  # LDR lr, [gctbl at bindex]
 
     ofs = _real_number(BCMOSTRECENT)
-    mc.ld(r0.value, r2.value, ofs)          # ld r0, [r2 + bc_most_recent]
-    mc.cmp_op(0, l0.value, r0.value)        # cmp l0, r0
+    mc.LDR_ri(ip.value, lr.value, ofs)      # LDR ip, [lr + bc_most_recent]
+    mc.CMP_rr(l0.value, ip.value)           # CMP l0, ip
 
-    bne_location = mc.get_relative_pos()
-    mc.trap()                               # patched later to a 'bc'
+    mc.LDR_ri(ip.value, lr.value,           # LDR.EQ ip, [lr + most_recent + 8]
+              ofs + WORD, cond=c.EQ)
+    mc.BR(ip.value, cond=c.EQ)              # BR.EQ ip
 
-    mc.ld(r2.value, r2.value, ofs + WORD)   # ld r2, [r2 + bc_most_recent + 8]
-    mc.mtctr(r2.value)
-    mc.bctr()                               # jump to r2
-
-    #                                     slowpath:
-    pmc = OverwritingBuilder(mc, bne_location, 1)
-    pmc.bne(mc.currpos() - bne_location)    # jump here if l0 != r0
-    pmc.overwrite()
-
-    mc.load_imm(r0, assembler.guard_compat_search_tree)
-    mc.mtctr(r0.value)
-    mc.mr(r0.value, l0.value)
-    mc.bctr()
+    mc.PUSH([r0.value, r1.value, lr.value]) # PUSH {r0, r1, lr}
+    mc.MOV_rr(r1.value, l0.value)           # MOV r1, l0
+    mc.BL(assembler.guard_compat_search_tree)   # MOVW/MOVT ip, BLX ip
 
     # abuse this field to store the 'sequel' relative offset
     guard_token.pos_jump_offset = mc.get_relative_pos()
diff --git a/rpython/jit/backend/arm/opassembler.py 
b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -17,6 +17,7 @@
 from rpython.jit.backend.arm.jump import remap_frame_layout
 from rpython.jit.backend.arm.regalloc import TempVar
 from rpython.jit.backend.arm.locations import imm, RawSPStackLocation
+from rpython.jit.backend.arm import guard_compat
 from rpython.jit.backend.llsupport import symbolic
 from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
 from rpython.jit.backend.llsupport.assembler import GuardToken, BaseAssembler
@@ -190,8 +191,9 @@
                                     fcond=fcond)
         return token
 
-    def _emit_guard(self, op, arglocs, is_guard_not_invalidated=False):
-        if is_guard_not_invalidated:
+    def _emit_guard(self, op, arglocs, is_guard_not_invalidated=False,
+                    is_guard_compatible=False):
+        if is_guard_not_invalidated or is_guard_compatible:
             fcond = c.cond_none
         else:
             fcond = self.guard_success_cc
@@ -204,20 +206,22 @@
         # For all guards that are not GUARD_NOT_INVALIDATED we emit a
         # breakpoint to ensure the location is patched correctly. In the case
         # of GUARD_NOT_INVALIDATED we use just a NOP, because it is only
-        # eventually patched at a later point.
-        if is_guard_not_invalidated:
-            self.mc.NOP()
-        else:
-            self.mc.BKPT()
-        return c.AL
+        # eventually patched at a later point.  For GUARD_COMPATIBLE, we
+        # use a completely different mechanism.
+        if not is_guard_compatible:
+            if is_guard_not_invalidated:
+                self.mc.NOP()
+            else:
+                self.mc.BKPT()
+        return token
 
     def emit_op_guard_true(self, op, arglocs, regalloc, fcond):
-        fcond = self._emit_guard(op, arglocs)
+        self._emit_guard(op, arglocs)
         return fcond
 
     def emit_op_guard_false(self, op, arglocs, regalloc, fcond):
         self.guard_success_cc = c.get_opposite_of(self.guard_success_cc)
-        fcond = self._emit_guard(op, arglocs)
+        self._emit_guard(op, arglocs)
         return fcond
 
     def emit_op_guard_value(self, op, arglocs, regalloc, fcond):
@@ -235,9 +239,16 @@
             self.mc.VCMP(l0.value, l1.value)
             self.mc.VMRS(cond=fcond)
         self.guard_success_cc = c.EQ
-        fcond = self._emit_guard(op, failargs)
+        self._emit_guard(op, failargs)
         return fcond
 
+    def emit_op_guard_compatible(self, op, arglocs, regalloc, fcond):
+        l0 = arglocs[0]
+        assert l0.is_core_reg()
+        bindex = op.getarg(1).getint()
+        token = self._emit_guard(op, arglocs[1:], is_guard_compatible=True)
+        guard_compat.generate_guard_compatible(self, token, l0, bindex)
+
     emit_op_guard_nonnull = emit_op_guard_true
     emit_op_guard_isnull = emit_op_guard_false
 
@@ -348,7 +359,8 @@
         return fcond
 
     def emit_op_guard_not_invalidated(self, op, locs, regalloc, fcond):
-        return self._emit_guard(op, locs, is_guard_not_invalidated=True)
+        self._emit_guard(op, locs, is_guard_not_invalidated=True)
+        return fcond
 
     def emit_op_label(self, op, arglocs, regalloc, fcond):
         self._check_frame_depth_debug(self.mc)
@@ -487,7 +499,7 @@
         self.mc.LDR_ri(loc.value, loc.value)
         self.mc.CMP_ri(loc.value, 0)
         self.guard_success_cc = c.EQ
-        fcond = self._emit_guard(op, failargs)
+        self._emit_guard(op, failargs)
         # If the previous operation was a COND_CALL, overwrite its conditional
         # jump to jump over this GUARD_NO_EXCEPTION as well, if we can
         if self._find_nearby_operation(-1).getopnum() == rop.COND_CALL:
diff --git a/rpython/jit/backend/arm/regalloc.py 
b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -665,7 +665,18 @@
         else:
             l1 = self.convert_to_imm(a1)
         arglocs = self._prepare_guard(op, [l0, l1])
-        self.possibly_free_vars(op.getarglist())
+        self.possibly_free_vars(boxes)
+        self.possibly_free_vars(op.getfailargs())
+        return arglocs
+
+    def prepare_op_guard_compatible(self, op, fcond):
+        op.getdescr().make_a_counter_per_value(op, -1)   # -1 not used here
+        args = op.getarglist()
+        assert args[0].type == REF             # only supported case for now
+        assert isinstance(args[1], ConstInt)   # by rewrite.py
+        x = self.make_sure_var_in_reg(args[0], args)
+        arglocs = self._prepare_guard(op, [x])
+        self.possibly_free_vars(args)
         self.possibly_free_vars(op.getfailargs())
         return arglocs
 
diff --git a/rpython/jit/backend/arm/registers.py 
b/rpython/jit/backend/arm/registers.py
--- a/rpython/jit/backend/arm/registers.py
+++ b/rpython/jit/backend/arm/registers.py
@@ -18,9 +18,9 @@
 
 # aliases for registers
 fp = r11
-ip = r12
+ip = r12      # ip is used as a general scratch register
 sp = r13
-lr = r14
+lr = r14      # lr is used as a general scratch register
 pc = r15
 vfp_ip = d15
 svfp_ip = s31
diff --git a/rpython/jit/backend/ppc/test/test_compatible.py 
b/rpython/jit/backend/arm/test/test_compatible.py
copy from rpython/jit/backend/ppc/test/test_compatible.py
copy to rpython/jit/backend/arm/test/test_compatible.py
--- a/rpython/jit/backend/ppc/test/test_compatible.py
+++ b/rpython/jit/backend/arm/test/test_compatible.py
@@ -1,6 +1,6 @@
-from rpython.jit.backend.ppc.test.support import JitPPCMixin
+from rpython.jit.backend.arm.test.support import JitARMMixin
 from rpython.jit.metainterp.test import test_compatible
 
 
-class TestCompatible(JitPPCMixin, test_compatible.TestCompatible):
+class TestCompatible(JitARMMixin, test_compatible.TestCompatible):
     pass
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy guard-compatible: arm: first draft

Reply via email to