Author: David Schneider <david.schnei...@picle.org>
Branch: ppc-jit-backend
Changeset: r56429:069eb5ce9bf0
Date: 2012-07-24 09:50 -0700
http://bitbucket.org/pypy/pypy/changeset/069eb5ce9bf0/

Log:    (edelsohn, bivab) implement new version of cond_call_gc

diff --git a/pypy/jit/backend/ppc/opassembler.py 
b/pypy/jit/backend/ppc/opassembler.py
--- a/pypy/jit/backend/ppc/opassembler.py
+++ b/pypy/jit/backend/ppc/opassembler.py
@@ -1000,26 +1000,23 @@
 
         opnum = op.getopnum()
         card_marking = False
+        mask = descr.jit_wb_if_flag_singlebyte
         if opnum == rop.COND_CALL_GC_WB_ARRAY and descr.jit_wb_cards_set != 0:
-            N = 3
-            addr = descr.get_write_barrier_from_array_fn(self.cpu)
-            assert addr != 0
+            # assumptions the rest of the function depends on:
             assert (descr.jit_wb_cards_set_byteofs ==
                     descr.jit_wb_if_flag_byteofs)
             assert descr.jit_wb_cards_set_singlebyte == -0x80
             card_marking = True
-        else:
-            N = 2
-            addr = descr.get_write_barrier_fn(self.cpu)
+            mask = descr.jit_wb_if_flag_singlebyte | -0x80
+        #
         loc_base = arglocs[0]
         assert _check_imm_arg(descr.jit_wb_if_flag_byteofs)
         with scratch_reg(self.mc):
             self.mc.lbz(r.SCRATCH.value, loc_base.value,
                         descr.jit_wb_if_flag_byteofs)
-
             # test whether this bit is set
-            self.mc.andix(r.SCRATCH.value, r.SCRATCH.value,
-                          descr.jit_wb_if_flag_singlebyte)
+            mask &= 0xFF
+            self.mc.andix(r.SCRATCH.value, r.SCRATCH.value, mask)
 
         jz_location = self.mc.currpos()
         self.mc.nop()
@@ -1027,57 +1024,65 @@
         # for cond_call_gc_wb_array, also add another fast path:
         # if GCFLAG_CARDS_SET, then we can just set one bit and be done
         if card_marking:
-            assert _check_imm_arg(descr.jit_wb_cards_set_byteofs)
-            assert descr.jit_wb_cards_set_singlebyte == -0x80
             with scratch_reg(self.mc):
                 self.mc.lbz(r.SCRATCH.value, loc_base.value,
                             descr.jit_wb_if_flag_byteofs)
+                self.mc.extsb(r.SCRATCH.value, r.SCRATCH.value)
 
                 # test whether this bit is set
-                self.mc.andix(r.SCRATCH.value, r.SCRATCH.value,
-                              descr.jit_wb_cards_set_singlebyte)
+                self.mc.cmpwi(0, r.SCRATCH.value, 0)
 
-                jnz_location = self.mc.currpos()
+                js_location = self.mc.currpos()
                 self.mc.nop()
+                #self.mc.trap()
         else:
-            jnz_location = 0
+            js_location = 0
 
-        # the following is supposed to be the slow path, so whenever possible
-        # we choose the most compact encoding over the most efficient one.
-        with Saved_Volatiles(self.mc):
-            if N == 2:
-                callargs = [r.r3, r.r4]
-            else:
-                callargs = [r.r3, r.r4, r.r5]
-            remap_frame_layout(self, arglocs, callargs, r.SCRATCH)
-            func = rffi.cast(lltype.Signed, addr)
-            #
-            # misaligned stack in the call, but it's ok because the write
-            # barrier is not going to call anything more.  
-            self.mc.call(func)
+        # Write only a CALL to the helper prepared in advance, passing it as
+        # argument the address of the structure we are writing into
+        # (the first argument to COND_CALL_GC_WB).
+        helper_num = card_marking
+
+        if self._regalloc.fprm.reg_bindings:
+            helper_num += 2
+        if self.wb_slowpath[helper_num] == 0:    # tests only
+            assert not we_are_translated()
+            self.cpu.gc_ll_descr.write_barrier_descr = descr
+            self._build_wb_slowpath(card_marking,
+                                    bool(self._regalloc.fprm.reg_bindings))
+            assert self.wb_slowpath[helper_num] != 0
+        #
+        if loc_base is not r.r3:
+            remap_frame_layout(self, [loc_base], [r.r3], r.SCRATCH)
+        addr = self.wb_slowpath[helper_num]
+        func = rffi.cast(lltype.Signed, addr)
+        self.mc.bl_abs(func)
 
         # if GCFLAG_CARDS_SET, then we can do the whole thing that would
         # be done in the CALL above with just four instructions, so here
         # is an inline copy of them
         if card_marking:
             with scratch_reg(self.mc):
-                jmp_location = self.mc.currpos()
+                jns_location = self.mc.currpos()
                 self.mc.nop()  # jump to the exit, patched later
-                # patch the JNZ above
+                # patch the JS above
                 offset = self.mc.currpos()
-                pmc = OverwritingBuilder(self.mc, jnz_location, 1)
-                pmc.bc(12, 2, offset - jnz_location)     # jump on equality
+                pmc = OverwritingBuilder(self.mc, js_location, 1)
+                # Jump if JS comparison is less than (bit set)
+                pmc.bc(12, 0, offset - js_location)
                 pmc.overwrite()
                 #
+                # case GCFLAG_CARDS_SET: emit a few instructions to do
+                # directly the card flag setting
                 loc_index = arglocs[1]
                 assert loc_index.is_reg()
-                tmp1 = arglocs[-2]
-                tmp2 = arglocs[-1]
+                tmp1 = loc_index
+                tmp2 = arglocs[-2]
                 #byteofs
                 s = 3 + descr.jit_wb_card_page_shift
 
-                # use r20 as temporary register, save it in FORCE INDEX slot
-                temp_reg = r.r20
+                # use r11 as temporary register, save it in FORCE INDEX slot
+                temp_reg = r.r11
                 self.mc.store(temp_reg.value, r.SPP.value, FORCE_INDEX_OFS)
 
                 self.mc.srli_op(temp_reg.value, loc_index.value, s)
@@ -1097,24 +1102,21 @@
                 self.mc.stbx(r.SCRATCH.value, loc_base.value, temp_reg.value)
                 # done
 
-                # restore temporary register r20
+                # restore temporary register r11
                 self.mc.load(temp_reg.value, r.SPP.value, FORCE_INDEX_OFS)
 
-                # patch the JMP above
+                # patch the JNS above
                 offset = self.mc.currpos()
-                pmc = OverwritingBuilder(self.mc, jmp_location, 1)
-                pmc.b(offset - jmp_location)
+                pmc = OverwritingBuilder(self.mc, jns_location, 1)
+                # Jump if JNS comparison is not less than (bit not set)
+                pmc.bc(4, 0, offset - jns_location)
                 pmc.overwrite()
 
         # patch the JZ above
-        offset = self.mc.currpos() - jz_location
+        offset = self.mc.currpos()
         pmc = OverwritingBuilder(self.mc, jz_location, 1)
-        # We want to jump if the compared bits are not equal.
-        # This corresponds to the x86 backend, which uses
-        # the TEST operation. Hence, on first sight, it might
-        # seem that we use the wrong condition here. This is
-        # because TEST results in a 1 if the operands are different.
-        pmc.bc(4, 2, offset)
+        # Jump if JZ comparison is zero (CMP 0 is equal)
+        pmc.bc(12, 2, offset - jz_location)
         pmc.overwrite()
 
     emit_cond_call_gc_wb_array = emit_cond_call_gc_wb
diff --git a/pypy/jit/backend/ppc/ppc_assembler.py 
b/pypy/jit/backend/ppc/ppc_assembler.py
--- a/pypy/jit/backend/ppc/ppc_assembler.py
+++ b/pypy/jit/backend/ppc/ppc_assembler.py
@@ -89,11 +89,14 @@
                                                             failargs_limit)
         self.fail_boxes_ptr = values_array(llmemory.GCREF, failargs_limit)
         self.mc = None
-        self.datablockwrapper = None
         self.memcpy_addr = 0
+        self.pending_guards = None
         self.fail_boxes_count = 0
         self.current_clt = None
+        self.malloc_slowpath = 0
+        self.wb_slowpath = [0, 0, 0, 0]
         self._regalloc = None
+        self.datablockwrapper = None
         self.max_stack_params = 0
         self.propagate_exception_path = 0
         self.stack_check_slowpath = 0
@@ -497,6 +500,61 @@
             self.write_64_bit_func_descr(rawstart, rawstart+3*WORD)
         self.stack_check_slowpath = rawstart
 
+    def _build_wb_slowpath(self, withcards, withfloats=False):
+        descr = self.cpu.gc_ll_descr.write_barrier_descr
+        if descr is None:
+            return
+        if not withcards:
+            func = descr.get_write_barrier_fn(self.cpu)
+        else:
+            if descr.jit_wb_cards_set == 0:
+                return
+            func = descr.get_write_barrier_from_array_fn(self.cpu)
+            if func == 0:
+                return
+        #
+        # This builds a helper function called from the slow path of
+        # write barriers.  It must save all registers, and optionally
+        # all fp registers.
+        mc = PPCBuilder()
+        #
+        frame_size = ((len(r.VOLATILES) + len(r.VOLATILES_FLOAT)
+                      + BACKCHAIN_SIZE + MAX_REG_PARAMS) * WORD)
+        mc.make_function_prologue(frame_size)
+        for i in range(len(r.VOLATILES)):
+                       mc.store(r.VOLATILES[i].value, r.SP.value,
+                              (BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD)
+        if self.cpu.supports_floats:
+            for i in range(len(r.VOLATILES_FLOAT)):
+                           mc.stfd(r.VOLATILES_FLOAT[i].value, r.SP.value,
+                                  (len(r.VOLATILES) + BACKCHAIN_SIZE + 
MAX_REG_PARAMS + i) * WORD)
+
+        mc.call(rffi.cast(lltype.Signed, func))
+        if self.cpu.supports_floats:
+            for i in range(len(r.VOLATILES_FLOAT)):
+                           mc.lfd(r.VOLATILES_FLOAT[i].value, r.SP.value,
+                                  (len(r.VOLATILES) + BACKCHAIN_SIZE + 
MAX_REG_PARAMS + i) * WORD)
+        for i in range(len(r.VOLATILES)):
+                       mc.load(r.VOLATILES[i].value, r.SP.value,
+                              (BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD)
+        mc.restore_LR_from_caller_frame(frame_size)
+        #
+        if withcards:
+            # A final compare before the RET, for the caller.  Careful to
+            # not follow this instruction with another one that changes
+            # the status of the CPU flags!
+            mc.lbz(r.SCRATCH.value, r.r3.value,
+                   descr.jit_wb_if_flag_byteofs)
+            mc.extsb(r.SCRATCH.value, r.SCRATCH.value)
+            mc.cmpwi(0, r.SCRATCH.value, 0)
+        #
+        mc.addi(r.SP.value, r.SP.value, frame_size)
+        mc.blr()
+        #
+        mc.prepare_insts_blocks()
+        rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+        self.wb_slowpath[withcards + 2 * withfloats] = rawstart
+
     def _build_propagate_exception_path(self):
         if self.cpu.propagate_exception_v < 0:
             return
@@ -662,6 +720,11 @@
     def setup_once(self):
         gc_ll_descr = self.cpu.gc_ll_descr
         gc_ll_descr.initialize()
+        self._build_wb_slowpath(False)
+        self._build_wb_slowpath(True)
+        if self.cpu.supports_floats:
+            self._build_wb_slowpath(False, withfloats=True)
+            self._build_wb_slowpath(True, withfloats=True)
         self._build_propagate_exception_path()
         if gc_ll_descr.get_malloc_slowpath_addr is not None:
             self._build_malloc_slowpath()
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to