Author: edelsohn
Branch: ppc-jit-backend
Changeset: r56860:6b8cbecfe63f
Date: 2012-08-25 23:17 -0400
http://bitbucket.org/pypy/pypy/changeset/6b8cbecfe63f/

Log:    update some arm files for comparison.

diff --git a/pypy/jit/backend/arm/assembler.py 
b/pypy/jit/backend/arm/assembler.py
--- a/pypy/jit/backend/arm/assembler.py
+++ b/pypy/jit/backend/arm/assembler.py
@@ -59,6 +59,7 @@
         self._exit_code_addr = 0
         self.current_clt = None
         self.malloc_slowpath = 0
+        self.wb_slowpath = [0, 0, 0, 0]
         self._regalloc = None
         self.datablockwrapper = None
         self.propagate_exception_path = 0
@@ -107,6 +108,11 @@
         # Addresses of functions called by new_xxx operations
         gc_ll_descr = self.cpu.gc_ll_descr
         gc_ll_descr.initialize()
+        self._build_wb_slowpath(False)
+        self._build_wb_slowpath(True)
+        if self.cpu.supports_floats:
+            self._build_wb_slowpath(False, withfloats=True)
+            self._build_wb_slowpath(True, withfloats=True)
         self._build_propagate_exception_path()
         if gc_ll_descr.get_malloc_slowpath_addr is not None:
             self._build_malloc_slowpath()
@@ -286,6 +292,45 @@
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
         self.stack_check_slowpath = rawstart
 
+    def _build_wb_slowpath(self, withcards, withfloats=False):
+        descr = self.cpu.gc_ll_descr.write_barrier_descr
+        if descr is None:
+            return
+        if not withcards:
+            func = descr.get_write_barrier_fn(self.cpu)
+        else:
+            if descr.jit_wb_cards_set == 0:
+                return
+            func = descr.get_write_barrier_from_array_fn(self.cpu)
+            if func == 0:
+                return
+        #
+        # This builds a helper function called from the slow path of
+        # write barriers.  It must save all registers, and optionally
+        # all vfp registers.  It takes a single argument which is in r0.
+        # It must keep stack alignment accordingly.
+        mc = ARMv7Builder()
+        #
+        if withfloats:
+            floats = r.caller_vfp_resp
+        else:
+            floats = []
+        with saved_registers(mc, r.caller_resp + [r.ip, r.lr], floats):
+            mc.BL(func)
+        #
+        if withcards:
+            # A final TEST8 before the RET, for the caller.  Careful to
+            # not follow this instruction with another one that changes
+            # the status of the CPU flags!
+            mc.LDRB_ri(r.ip.value, r.r0.value,
+                                    imm=descr.jit_wb_if_flag_byteofs)
+            mc.TST_ri(r.ip.value, imm=0x80)
+        #
+       mc.MOV_rr(r.pc.value, r.lr.value)
+        #
+        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+        self.wb_slowpath[withcards + 2 * withfloats] = rawstart
+
     def setup_failure_recovery(self):
 
         @rgc.no_collect
@@ -429,11 +474,14 @@
 
     def _build_malloc_slowpath(self):
         mc = ARMv7Builder()
-        assert self.cpu.supports_floats
+        if self.cpu.supports_floats:
+            vfp_regs = r.all_vfp_regs
+        else:
+            vfp_regs = []
         # We need to push two registers here because we are going to make a
         # call an therefore the stack needs to be 8-byte aligned
         mc.PUSH([r.ip.value, r.lr.value])
-        with saved_registers(mc, [], r.all_vfp_regs):
+        with saved_registers(mc, [], vfp_regs):
             # At this point we know that the values we need to compute the size
             # are stored in r0 and r1.
             mc.SUB_rr(r.r0.value, r.r1.value, r.r0.value)
diff --git a/pypy/jit/backend/arm/opassembler.py 
b/pypy/jit/backend/arm/opassembler.py
--- a/pypy/jit/backend/arm/opassembler.py
+++ b/pypy/jit/backend/arm/opassembler.py
@@ -2,7 +2,7 @@
 from pypy.jit.backend.arm import conditions as c
 from pypy.jit.backend.arm import registers as r
 from pypy.jit.backend.arm import shift
-from pypy.jit.backend.arm.arch import WORD
+from pypy.jit.backend.arm.arch import WORD, DOUBLE_WORD
 
 from pypy.jit.backend.arm.helper.assembler import (gen_emit_op_by_helper_call,
                                                 gen_emit_op_unary_cmp,
@@ -370,31 +370,69 @@
 
     def _emit_call(self, force_index, adr, arglocs, fcond=c.AL, 
                                                  resloc=None, 
result_info=(-1,-1)):
+        if self.cpu.use_hf_abi:
+            stack_args, adr = self._setup_call_hf(force_index, adr, arglocs, 
fcond, resloc, result_info)
+        else:
+            stack_args, adr = self._setup_call_sf(force_index, adr, arglocs, 
fcond, resloc, result_info)
+
+        #the actual call
+        #self.mc.BKPT()
+        if adr.is_imm():
+            self.mc.BL(adr.value)
+        elif adr.is_stack():
+            self.mov_loc_loc(adr, r.ip)
+            adr = r.ip
+        else:
+            assert adr.is_reg()
+        if adr.is_reg():
+            self.mc.BLX(adr.value)
+        self.mark_gc_roots(force_index)
+        self._restore_sp(stack_args, fcond)
+
+        # ensure the result is wellformed and stored in the correct location
+        if resloc is not None:
+            if resloc.is_vfp_reg() and not self.cpu.use_hf_abi:
+                # move result to the allocated register
+                self.mov_to_vfp_loc(r.r0, r.r1, resloc)
+            elif resloc.is_reg() and result_info != (-1, -1):
+                self._ensure_result_bit_extension(resloc, result_info[0],
+                                                          result_info[1])
+        return fcond
+
+    def _restore_sp(self, stack_args, fcond):
+        # readjust the sp in case we passed some args on the stack
+        if len(stack_args) > 0:
+            n = 0
+            for arg in stack_args:
+                if arg is None or arg.type != FLOAT:
+                    n += WORD
+                else:
+                    n += DOUBLE_WORD
+            self._adjust_sp(-n, fcond=fcond)
+            assert n % 8 == 0 # sanity check
+
+    def _collect_stack_args_sf(self, arglocs):
         n_args = len(arglocs)
         reg_args = count_reg_args(arglocs)
         # all arguments past the 4th go on the stack
-        n = 0   # used to count the number of words pushed on the stack, so we
-                #can later modify the SP back to its original value
+        # first we need to prepare the list so it stays aligned
+        stack_args = []
+        count = 0
         if n_args > reg_args:
-            # first we need to prepare the list so it stays aligned
-            stack_args = []
-            count = 0
             for i in range(reg_args, n_args):
                 arg = arglocs[i]
                 if arg.type != FLOAT:
                     count += 1
-                    n += WORD
                 else:
-                    n += 2 * WORD
                     if count % 2 != 0:
                         stack_args.append(None)
-                        n += WORD
                         count = 0
                 stack_args.append(arg)
             if count % 2 != 0:
-                n += WORD
                 stack_args.append(None)
+        return stack_args
 
+    def _push_stack_args(self, stack_args):
             #then we push every thing on the stack
             for i in range(len(stack_args) - 1, -1, -1):
                 arg = stack_args[i]
@@ -402,6 +440,13 @@
                     self.mc.PUSH([r.ip.value])
                 else:
                     self.regalloc_push(arg)
+
+    def _setup_call_sf(self, force_index, adr, arglocs, fcond=c.AL, 
+                                                 resloc=None, 
result_info=(-1,-1)):
+        n_args = len(arglocs)
+        reg_args = count_reg_args(arglocs)
+        stack_args = self._collect_stack_args_sf(arglocs)
+        self._push_stack_args(stack_args)
         # collect variables that need to go in registers and the registers they
         # will be stored in
         num = 0
@@ -440,32 +485,55 @@
 
         for loc, reg in float_locs:
             self.mov_from_vfp_loc(loc, reg, r.all_regs[reg.value + 1])
+        return stack_args, adr
 
-        #the actual call
-        if adr.is_imm():
-            self.mc.BL(adr.value)
-        elif adr.is_stack():
-            self.mov_loc_loc(adr, r.ip)
-            adr = r.ip
-        else:
-            assert adr.is_reg()
-        if adr.is_reg():
-            self.mc.BLX(adr.value)
-        self.mark_gc_roots(force_index)
-        # readjust the sp in case we passed some args on the stack
-        if n > 0:
-            self._adjust_sp(-n, fcond=fcond)
 
-        # ensure the result is wellformed and stored in the correct location
-        if resloc is not None:
-            if resloc.is_vfp_reg():
-                # move result to the allocated register
-                self.mov_to_vfp_loc(r.r0, r.r1, resloc)
-            elif result_info != (-1, -1):
-                self._ensure_result_bit_extension(resloc, result_info[0],
-                                                          result_info[1])
+    def _setup_call_hf(self, force_index, adr, arglocs, fcond=c.AL, 
+                                                 resloc=None, 
result_info=(-1,-1)):
+        n_reg_args = n_vfp_args = 0
+        non_float_locs = []
+        non_float_regs = []
+        float_locs = []
+        float_regs = []
+        stack_args = []
+        count = 0                      # stack alignment counter
+        for arg in arglocs:
+            if arg.type != FLOAT:
+                if len(non_float_regs) < len(r.argument_regs):
+                   reg = r.argument_regs[len(non_float_regs)]
+                    non_float_locs.append(arg)
+                    non_float_regs.append(reg)
+                else: # non-float argument that needs to go on the stack 
+                    count += 1
+                    stack_args.append(arg)
+            else:
+                if len(float_regs) < len(r.vfp_argument_regs): 
+                   reg = r.vfp_argument_regs[len(float_regs)]
+                    float_locs.append(arg)
+                    float_regs.append(reg)
+                else: # float argument that needs to go on the stack
+                    if count % 2 != 0:
+                        stack_args.append(None)
+                       count = 0
+                    stack_args.append(arg)
+        # align the stack
+       if count % 2 != 0:
+            stack_args.append(None)
+        self._push_stack_args(stack_args)
+        # Check that the address of the function we want to call is not
+        # currently stored in one of the registers used to pass the arguments.
+        # If this happens to be the case we remap the register to r4 and use r4
+        # to call the function
+        if adr in non_float_regs:
+            non_float_locs.append(adr)
+            non_float_regs.append(r.r4)
+            adr = r.r4
+        # remap values stored in core registers
+        remap_frame_layout(self, non_float_locs, non_float_regs, r.ip)
+        # remap values stored in vfp registers
+        remap_frame_layout(self, float_locs, float_regs, r.vfp_ip)
 
-        return fcond
+        return stack_args, adr
 
     def emit_op_same_as(self, op, arglocs, regalloc, fcond):
         argloc, resloc = arglocs
@@ -506,32 +574,30 @@
 
     def emit_op_cond_call_gc_wb(self, op, arglocs, regalloc, fcond):
         # Write code equivalent to write_barrier() in the GC: it checks
-        # a flag in the object at arglocs[0], and if set, it calls the
-        # function remember_young_pointer() from the GC.  The two arguments
-        # to the call are in arglocs[:2].  The rest, arglocs[2:], contains
-        # registers that need to be saved and restored across the call.
+        # a flag in the object at arglocs[0], and if set, it calls a
+        # helper piece of assembler.  The latter saves registers as needed
+        # and call the function jit_remember_young_pointer() from the GC.
         descr = op.getdescr()
         if we_are_translated():
             cls = self.cpu.gc_ll_descr.has_write_barrier_class()
             assert cls is not None and isinstance(descr, cls)
-
+        #
         opnum = op.getopnum()
-        if opnum == rop.COND_CALL_GC_WB:
-            N = 2
-            addr = descr.get_write_barrier_fn(self.cpu)
-            card_marking = False
-        elif opnum == rop.COND_CALL_GC_WB_ARRAY:
-            N = 3
-            addr = descr.get_write_barrier_from_array_fn(self.cpu)
-            assert addr != 0
-            card_marking = descr.jit_wb_cards_set != 0
-        else:
-            raise AssertionError(opnum)
+        card_marking = False
+        mask = descr.jit_wb_if_flag_singlebyte
+        if opnum == rop.COND_CALL_GC_WB_ARRAY and descr.jit_wb_cards_set != 0:
+            # assumptions the rest of the function depends on:
+            assert (descr.jit_wb_cards_set_byteofs ==
+                    descr.jit_wb_if_flag_byteofs)
+            assert descr.jit_wb_cards_set_singlebyte == -0x80
+            card_marking = True
+            mask = descr.jit_wb_if_flag_singlebyte | -0x80
+        #
         loc_base = arglocs[0]
-        assert check_imm_arg(descr.jit_wb_if_flag_byteofs)
-        assert check_imm_arg(descr.jit_wb_if_flag_singlebyte)
-        self.mc.LDRB_ri(r.ip.value, loc_base.value, 
imm=descr.jit_wb_if_flag_byteofs)
-        self.mc.TST_ri(r.ip.value, imm=descr.jit_wb_if_flag_singlebyte)
+        self.mc.LDRB_ri(r.ip.value, loc_base.value,
+                                    imm=descr.jit_wb_if_flag_byteofs)
+        mask &= 0xFF
+        self.mc.TST_ri(r.ip.value, imm=mask)
 
         jz_location = self.mc.currpos()
         self.mc.BKPT()
@@ -539,68 +605,80 @@
         # for cond_call_gc_wb_array, also add another fast path:
         # if GCFLAG_CARDS_SET, then we can just set one bit and be done
         if card_marking:
-            assert check_imm_arg(descr.jit_wb_cards_set_byteofs)
-            assert check_imm_arg(descr.jit_wb_cards_set_singlebyte)
-            self.mc.LDRB_ri(r.ip.value, loc_base.value, 
imm=descr.jit_wb_cards_set_byteofs)
-            self.mc.TST_ri(r.ip.value, imm=descr.jit_wb_cards_set_singlebyte)
-            #
-            jnz_location = self.mc.currpos()
+            # GCFLAG_CARDS_SET is in this byte at 0x80
+            self.mc.TST_ri(r.ip.value, imm=0x80)
+
+            js_location = self.mc.currpos() # 
+            self.mc.BKPT()
+        else:
+            js_location = 0
+
+        # Write only a CALL to the helper prepared in advance, passing it as
+        # argument the address of the structure we are writing into
+        # (the first argument to COND_CALL_GC_WB).
+        helper_num = card_marking
+        if self._regalloc.vfprm.reg_bindings:
+            helper_num += 2
+        if self.wb_slowpath[helper_num] == 0:    # tests only
+            assert not we_are_translated()
+            self.cpu.gc_ll_descr.write_barrier_descr = descr
+            self._build_wb_slowpath(card_marking,
+                                    bool(self._regalloc.vfprm.reg_bindings))
+            assert self.wb_slowpath[helper_num] != 0
+        #
+        if loc_base is not r.r0:
+            # push two registers to keep stack aligned
+           self.mc.PUSH([r.r0.value, loc_base.value])
+            remap_frame_layout(self, [loc_base], [r.r0], r.ip)
+        self.mc.BL(self.wb_slowpath[helper_num])
+        if loc_base is not r.r0:
+           self.mc.POP([r.r0.value, loc_base.value])
+
+        if card_marking:
+           # The helper ends again with a check of the flag in the object.  So
+           # here, we can simply write again a conditional jump, which will be
+           # taken if GCFLAG_CARDS_SET is still not set.
+            jns_location = self.mc.currpos()
             self.mc.BKPT()
             #
-        else:
-            jnz_location = 0
-
-        # the following is supposed to be the slow path, so whenever possible
-        # we choose the most compact encoding over the most efficient one.
-        with saved_registers(self.mc, r.caller_resp):
-            if N == 2:
-                callargs = [r.r0, r.r1]
-            else:
-                callargs = [r.r0, r.r1, r.r2]
-            remap_frame_layout(self, arglocs, callargs, r.ip)
-            func = rffi.cast(lltype.Signed, addr)
-            # misaligned stack in the call, but it's ok because the write
-            # barrier is not going to call anything more.
-            self.mc.BL(func)
-
-        # if GCFLAG_CARDS_SET, then we can do the whole thing that would
-        # be done in the CALL above with just four instructions, so here
-        # is an inline copy of them
-        if card_marking:
-            jmp_location = self.mc.get_relative_pos()
-            self.mc.BKPT()  # jump to the exit, patched later
-            # patch the JNZ above
+            # patch the JS above
             offset = self.mc.currpos()
-            pmc = OverwritingBuilder(self.mc, jnz_location, WORD)
-            pmc.B_offs(offset, c.NE)
+            pmc = OverwritingBuilder(self.mc, js_location, WORD)
+           pmc.B_offs(offset, c.NE) # We want to jump if the z flag is not set
             #
+            # case GCFLAG_CARDS_SET: emit a few instructions to do
+            # directly the card flag setting
             loc_index = arglocs[1]
             assert loc_index.is_reg()
-            tmp1 = arglocs[-2]
-            tmp2 = arglocs[-1]
-            #byteofs
-            s = 3 + descr.jit_wb_card_page_shift
-            self.mc.MVN_rr(r.lr.value, loc_index.value,
-                                imm=s, shifttype=shift.LSR)
-            # byte_index
-            self.mc.MOV_ri(r.ip.value, imm=7)
-            self.mc.AND_rr(tmp1.value, r.ip.value, loc_index.value,
-                    imm=descr.jit_wb_card_page_shift, shifttype=shift.LSR)
+           # must save the register loc_index before it is mutated
+           self.mc.PUSH([loc_index.value])
+           tmp1 = loc_index
+           tmp2 = arglocs[2] 
+           # lr = byteofs
+           s = 3 + descr.jit_wb_card_page_shift
+           self.mc.MVN_rr(r.lr.value, loc_index.value,
+                       imm=s, shifttype=shift.LSR)
+           
+           # tmp1 = byte_index
+           self.mc.MOV_ri(r.ip.value, imm=7)
+           self.mc.AND_rr(tmp1.value, r.ip.value, loc_index.value,
+               imm=descr.jit_wb_card_page_shift, shifttype=shift.LSR)
+           
+           # set the bit
+           self.mc.MOV_ri(tmp2.value, imm=1)
+           self.mc.LDRB_rr(r.ip.value, loc_base.value, r.lr.value)
+           self.mc.ORR_rr_sr(r.ip.value, r.ip.value, tmp2.value,
+                               tmp1.value, shifttype=shift.LSL)
+           self.mc.STRB_rr(r.ip.value, loc_base.value, r.lr.value)
+           # done
+           self.mc.POP([loc_index.value])
+           #
+            #
+            # patch the JNS above
+            offset = self.mc.currpos()
+            pmc = OverwritingBuilder(self.mc, jns_location, WORD)
+           pmc.B_offs(offset, c.EQ) # We want to jump if the z flag is set
 
-            # set the bit
-            self.mc.MOV_ri(tmp2.value, imm=1)
-            self.mc.LDRB_rr(r.ip.value, loc_base.value, r.lr.value)
-            self.mc.ORR_rr_sr(r.ip.value, r.ip.value, tmp2.value,
-                                    tmp1.value, shifttype=shift.LSL)
-            self.mc.STRB_rr(r.ip.value, loc_base.value, r.lr.value)
-            # done
-
-            # patch the JMP above
-            offset = self.mc.currpos()
-            pmc = OverwritingBuilder(self.mc, jmp_location, WORD)
-            pmc.B_offs(offset)
-        #
-        # patch the JZ above
         offset = self.mc.currpos()
         pmc = OverwritingBuilder(self.mc, jz_location, WORD)
         pmc.B_offs(offset, c.EQ)
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to