Author: David Schneider <[email protected]>
Branch: arm-backend-2
Changeset: r56299:fc35e288761e
Date: 2012-07-20 18:33 +0000
http://bitbucket.org/pypy/pypy/changeset/fc35e288761e/

Log:    implement modified version of cond_call_gc_wb

diff --git a/pypy/jit/backend/arm/assembler.py 
b/pypy/jit/backend/arm/assembler.py
--- a/pypy/jit/backend/arm/assembler.py
+++ b/pypy/jit/backend/arm/assembler.py
@@ -59,6 +59,7 @@
         self._exit_code_addr = 0
         self.current_clt = None
         self.malloc_slowpath = 0
+        self.wb_slowpath = [0, 0, 0, 0]
         self._regalloc = None
         self.datablockwrapper = None
         self.propagate_exception_path = 0
@@ -107,6 +108,11 @@
         # Addresses of functions called by new_xxx operations
         gc_ll_descr = self.cpu.gc_ll_descr
         gc_ll_descr.initialize()
+        self._build_wb_slowpath(False)
+        self._build_wb_slowpath(True)
+        if self.cpu.supports_floats:
+            self._build_wb_slowpath(False, withfloats=True)
+            self._build_wb_slowpath(True, withfloats=True)
         self._build_propagate_exception_path()
         if gc_ll_descr.get_malloc_slowpath_addr is not None:
             self._build_malloc_slowpath()
@@ -286,6 +292,46 @@
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
         self.stack_check_slowpath = rawstart
 
+    def _build_wb_slowpath(self, withcards, withfloats=False):
+        descr = self.cpu.gc_ll_descr.write_barrier_descr
+        if descr is None:
+            return
+        if not withcards:
+            func = descr.get_write_barrier_fn(self.cpu)
+        else:
+            if descr.jit_wb_cards_set == 0:
+                return
+            func = descr.get_write_barrier_from_array_fn(self.cpu)
+            if func == 0:
+                return
+        #
+        # This builds a helper function called from the slow path of
+        # write barriers.  It must save all registers, and optionally
+        # all vfp registers.  It takes a single argument which is in r0.
+        # It must keep stack alignment accordingly.
+        mc = ARMv7Builder()
+        #
+        if withfloats:
+            floats = r.caller_vfp_resp
+        else:
+            floats = []
+        with saved_registers(mc, r.caller_resp + [r.ip, r.lr], floats):
+            mc.BL(func)
+        #
+        if withcards:
+            # A final TEST8 before the RET, for the caller.  Careful to
+            # not follow this instruction with another one that changes
+            # the status of the CPU flags!
+            mc.LDRB_ri(r.ip.value, r.r0.value,
+                                    imm=descr.jit_wb_if_flag_byteofs)
+            mc.TST_ri(r.ip.value, imm=0x80)
+        #
+       print 'Withcars is %d' % withcards
+       mc.MOV_rr(r.pc.value, r.lr.value)
+        #
+        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+        self.wb_slowpath[withcards + 2 * withfloats] = rawstart
+
     def setup_failure_recovery(self):
 
         @rgc.no_collect
diff --git a/pypy/jit/backend/arm/opassembler.py 
b/pypy/jit/backend/arm/opassembler.py
--- a/pypy/jit/backend/arm/opassembler.py
+++ b/pypy/jit/backend/arm/opassembler.py
@@ -506,32 +506,30 @@
 
     def emit_op_cond_call_gc_wb(self, op, arglocs, regalloc, fcond):
         # Write code equivalent to write_barrier() in the GC: it checks
-        # a flag in the object at arglocs[0], and if set, it calls the
-        # function remember_young_pointer() from the GC.  The two arguments
-        # to the call are in arglocs[:2].  The rest, arglocs[2:], contains
-        # registers that need to be saved and restored across the call.
+        # a flag in the object at arglocs[0], and if set, it calls a
+        # helper piece of assembler.  The latter saves registers as needed
+        # and call the function jit_remember_young_pointer() from the GC.
         descr = op.getdescr()
         if we_are_translated():
             cls = self.cpu.gc_ll_descr.has_write_barrier_class()
             assert cls is not None and isinstance(descr, cls)
-
+        #
         opnum = op.getopnum()
-        if opnum == rop.COND_CALL_GC_WB:
-            N = 2
-            addr = descr.get_write_barrier_fn(self.cpu)
-            card_marking = False
-        elif opnum == rop.COND_CALL_GC_WB_ARRAY:
-            N = 3
-            addr = descr.get_write_barrier_from_array_fn(self.cpu)
-            assert addr != 0
-            card_marking = descr.jit_wb_cards_set != 0
-        else:
-            raise AssertionError(opnum)
+        card_marking = False
+        mask = descr.jit_wb_if_flag_singlebyte
+        if opnum == rop.COND_CALL_GC_WB_ARRAY and descr.jit_wb_cards_set != 0:
+            # assumptions the rest of the function depends on:
+            assert (descr.jit_wb_cards_set_byteofs ==
+                    descr.jit_wb_if_flag_byteofs)
+            assert descr.jit_wb_cards_set_singlebyte == -0x80
+            card_marking = True
+            mask = descr.jit_wb_if_flag_singlebyte | -0x80
+        #
         loc_base = arglocs[0]
-        assert check_imm_arg(descr.jit_wb_if_flag_byteofs)
-        assert check_imm_arg(descr.jit_wb_if_flag_singlebyte)
-        self.mc.LDRB_ri(r.ip.value, loc_base.value, 
imm=descr.jit_wb_if_flag_byteofs)
-        self.mc.TST_ri(r.ip.value, imm=descr.jit_wb_if_flag_singlebyte)
+        self.mc.LDRB_ri(r.ip.value, loc_base.value,
+                                    imm=descr.jit_wb_if_flag_byteofs)
+        mask &= 0xFF
+        self.mc.TST_ri(r.ip.value, imm=mask)
 
         jz_location = self.mc.currpos()
         self.mc.BKPT()
@@ -539,68 +537,80 @@
         # for cond_call_gc_wb_array, also add another fast path:
         # if GCFLAG_CARDS_SET, then we can just set one bit and be done
         if card_marking:
-            assert check_imm_arg(descr.jit_wb_cards_set_byteofs)
-            assert check_imm_arg(descr.jit_wb_cards_set_singlebyte)
-            self.mc.LDRB_ri(r.ip.value, loc_base.value, 
imm=descr.jit_wb_cards_set_byteofs)
-            self.mc.TST_ri(r.ip.value, imm=descr.jit_wb_cards_set_singlebyte)
-            #
-            jnz_location = self.mc.currpos()
+            # GCFLAG_CARDS_SET is in this byte at 0x80
+            self.mc.TST_ri(r.ip.value, imm=0x80)
+
+            js_location = self.mc.currpos() # 
+            self.mc.BKPT()
+        else:
+            js_location = 0
+
+        # Write only a CALL to the helper prepared in advance, passing it as
+        # argument the address of the structure we are writing into
+        # (the first argument to COND_CALL_GC_WB).
+        helper_num = card_marking
+        if self._regalloc.vfprm.reg_bindings:
+            helper_num += 2
+        if self.wb_slowpath[helper_num] == 0:    # tests only
+            assert not we_are_translated()
+            self.cpu.gc_ll_descr.write_barrier_descr = descr
+            self._build_wb_slowpath(card_marking,
+                                    bool(self._regalloc.vfprm.reg_bindings))
+            assert self.wb_slowpath[helper_num] != 0
+        #
+        if loc_base is not r.r0:
+            # push two registers to keep stack aligned
+           self.mc.PUSH([r.r0.value, loc_base.value])
+            remap_frame_layout(self, [loc_base], [r.r0], r.ip)
+        self.mc.BL(self.wb_slowpath[helper_num])
+        if loc_base is not r.r0:
+           self.mc.POP([r.r0.value, loc_base.value])
+
+        if card_marking:
+           # The helper ends again with a check of the flag in the object.  So
+           # here, we can simply write again a conditional jump, which will be
+           # taken if GCFLAG_CARDS_SET is still not set.
+            jns_location = self.mc.currpos()
             self.mc.BKPT()
             #
-        else:
-            jnz_location = 0
-
-        # the following is supposed to be the slow path, so whenever possible
-        # we choose the most compact encoding over the most efficient one.
-        with saved_registers(self.mc, r.caller_resp):
-            if N == 2:
-                callargs = [r.r0, r.r1]
-            else:
-                callargs = [r.r0, r.r1, r.r2]
-            remap_frame_layout(self, arglocs, callargs, r.ip)
-            func = rffi.cast(lltype.Signed, addr)
-            # misaligned stack in the call, but it's ok because the write
-            # barrier is not going to call anything more.
-            self.mc.BL(func)
-
-        # if GCFLAG_CARDS_SET, then we can do the whole thing that would
-        # be done in the CALL above with just four instructions, so here
-        # is an inline copy of them
-        if card_marking:
-            jmp_location = self.mc.get_relative_pos()
-            self.mc.BKPT()  # jump to the exit, patched later
-            # patch the JNZ above
+            # patch the JS above
             offset = self.mc.currpos()
-            pmc = OverwritingBuilder(self.mc, jnz_location, WORD)
-            pmc.B_offs(offset, c.NE)
+            pmc = OverwritingBuilder(self.mc, js_location, WORD)
+           pmc.B_offs(offset, c.NE) # We want to jump if the z flag is not set
             #
+            # case GCFLAG_CARDS_SET: emit a few instructions to do
+            # directly the card flag setting
             loc_index = arglocs[1]
             assert loc_index.is_reg()
-            tmp1 = arglocs[-2]
-            tmp2 = arglocs[-1]
-            #byteofs
-            s = 3 + descr.jit_wb_card_page_shift
-            self.mc.MVN_rr(r.lr.value, loc_index.value,
-                                imm=s, shifttype=shift.LSR)
-            # byte_index
-            self.mc.MOV_ri(r.ip.value, imm=7)
-            self.mc.AND_rr(tmp1.value, r.ip.value, loc_index.value,
-                    imm=descr.jit_wb_card_page_shift, shifttype=shift.LSR)
+           # must save the register loc_index before it is mutated
+           self.mc.PUSH([loc_index.value])
+           tmp1 = loc_index
+           tmp2 = arglocs[2] 
+           # lr = byteofs
+           s = 3 + descr.jit_wb_card_page_shift
+           self.mc.MVN_rr(r.lr.value, loc_index.value,
+                       imm=s, shifttype=shift.LSR)
+           
+           # tmp1 = byte_index
+           self.mc.MOV_ri(r.ip.value, imm=7)
+           self.mc.AND_rr(tmp1.value, r.ip.value, loc_index.value,
+               imm=descr.jit_wb_card_page_shift, shifttype=shift.LSR)
+           
+           # set the bit
+           self.mc.MOV_ri(tmp2.value, imm=1)
+           self.mc.LDRB_rr(r.ip.value, loc_base.value, r.lr.value)
+           self.mc.ORR_rr_sr(r.ip.value, r.ip.value, tmp2.value,
+                               tmp1.value, shifttype=shift.LSL)
+           self.mc.STRB_rr(r.ip.value, loc_base.value, r.lr.value)
+           # done
+           self.mc.POP([loc_index.value])
+           #
+            #
+            # patch the JNS above
+            offset = self.mc.currpos()
+            pmc = OverwritingBuilder(self.mc, jns_location, WORD)
+           pmc.B_offs(offset, c.EQ) # We want to jump if the z flag is set
 
-            # set the bit
-            self.mc.MOV_ri(tmp2.value, imm=1)
-            self.mc.LDRB_rr(r.ip.value, loc_base.value, r.lr.value)
-            self.mc.ORR_rr_sr(r.ip.value, r.ip.value, tmp2.value,
-                                    tmp1.value, shifttype=shift.LSL)
-            self.mc.STRB_rr(r.ip.value, loc_base.value, r.lr.value)
-            # done
-
-            # patch the JMP above
-            offset = self.mc.currpos()
-            pmc = OverwritingBuilder(self.mc, jmp_location, WORD)
-            pmc.B_offs(offset)
-        #
-        # patch the JZ above
         offset = self.mc.currpos()
         pmc = OverwritingBuilder(self.mc, jz_location, WORD)
         pmc.B_offs(offset, c.EQ)
diff --git a/pypy/jit/backend/arm/regalloc.py b/pypy/jit/backend/arm/regalloc.py
--- a/pypy/jit/backend/arm/regalloc.py
+++ b/pypy/jit/backend/arm/regalloc.py
@@ -1045,27 +1045,23 @@
 
     def prepare_op_cond_call_gc_wb(self, op, fcond):
         assert op.result is None
-        N = op.numargs()
         # we force all arguments in a reg because it will be needed anyway by
         # the following setfield_gc or setarrayitem_gc. It avoids loading it
         # twice from the memory.
-        arglocs = []
+        N = op.numargs()
         args = op.getarglist()
-        for i in range(N):
-            loc = self._ensure_value_is_boxed(op.getarg(i), args)
-            arglocs.append(loc)
-        card_marking = False
-        if op.getopnum() == rop.COND_CALL_GC_WB_ARRAY:
-            descr = op.getdescr()
-            if we_are_translated():
-                cls = self.cpu.gc_ll_descr.has_write_barrier_class()
-                assert cls is not None and isinstance(descr, cls)
-            card_marking = descr.jit_wb_cards_set != 0
-        if card_marking:  # allocate scratch registers
-            tmp1 = self.get_scratch_reg(INT)
-            tmp2 = self.get_scratch_reg(INT)
-            arglocs.append(tmp1)
-            arglocs.append(tmp2)
+        arglocs = [self._ensure_value_is_boxed(op.getarg(i), args)
+                        for i in range(N)]
+        descr = op.getdescr()
+        if(op.getopnum() == rop.COND_CALL_GC_WB_ARRAY 
+            and descr.jit_wb_cards_set != 0):
+            # check conditions for card marking
+            assert (descr.jit_wb_cards_set_byteofs ==
+                    descr.jit_wb_if_flag_byteofs)
+            assert descr.jit_wb_cards_set_singlebyte == -0x80
+            # allocate scratch register
+            tmp = self.get_scratch_reg(INT)
+            arglocs.append(tmp)
         return arglocs
 
     prepare_op_cond_call_gc_wb_array = prepare_op_cond_call_gc_wb
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to