Author: hager <sven.ha...@uni-duesseldorf.de>
Branch: ppc-jit-backend
Changeset: r52973:52ece45399fc
Date: 2012-02-27 20:00 +0100
http://bitbucket.org/pypy/pypy/changeset/52ece45399fc/

Log:    merge

diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -523,7 +523,7 @@
         return []
 
     def add_frame_offset(self, shape, offset):
-        assert offset != 0
+        assert offset & 3 == 0
         shape.append(offset)
 
     def add_callee_save_reg(self, shape, register):
diff --git a/pypy/jit/backend/ppc/codebuilder.py 
b/pypy/jit/backend/ppc/codebuilder.py
--- a/pypy/jit/backend/ppc/codebuilder.py
+++ b/pypy/jit/backend/ppc/codebuilder.py
@@ -962,6 +962,11 @@
         PPCAssembler.__init__(self)
         self.init_block_builder()
         self.r0_in_use = r0_in_use
+        self.ops_offset = {}
+
+    def mark_op(self, op):
+        pos = self.get_relative_pos()
+        self.ops_offset[op] = pos
 
     def check(self, desc, v, *args):
         desc.__get__(self)(*args)
@@ -994,13 +999,12 @@
             self.ldx(rD.value, 0, rD.value)
 
     def store_reg(self, source_reg, addr):
-        self.alloc_scratch_reg()
-        self.load_imm(r.SCRATCH, addr)
-        if IS_PPC_32:
-            self.stwx(source_reg.value, 0, r.SCRATCH.value)
-        else:
-            self.stdx(source_reg.value, 0, r.SCRATCH.value)
-        self.free_scratch_reg()
+        with scratch_reg(self):
+            self.load_imm(r.SCRATCH, addr)
+            if IS_PPC_32:
+                self.stwx(source_reg.value, 0, r.SCRATCH.value)
+            else:
+                self.stdx(source_reg.value, 0, r.SCRATCH.value)
 
     def b_offset(self, target):
         curpos = self.currpos()
@@ -1020,17 +1024,15 @@
         BI = condition[0]
         BO = condition[1]
 
-        self.alloc_scratch_reg()
-        self.load_imm(r.SCRATCH, addr)
-        self.mtctr(r.SCRATCH.value)
-        self.free_scratch_reg()
+        with scratch_reg(self):
+            self.load_imm(r.SCRATCH, addr)
+            self.mtctr(r.SCRATCH.value)
         self.bcctr(BO, BI)
 
     def b_abs(self, address, trap=False):
-        self.alloc_scratch_reg()
-        self.load_imm(r.SCRATCH, address)
-        self.mtctr(r.SCRATCH.value)
-        self.free_scratch_reg()
+        with scratch_reg(self):
+            self.load_imm(r.SCRATCH, address)
+            self.mtctr(r.SCRATCH.value)
         if trap:
             self.trap()
         self.bctr()
@@ -1044,17 +1046,16 @@
     def call(self, address):
         """ do a call to an absolute address
         """
-        self.alloc_scratch_reg()
-        if IS_PPC_32:
-            self.load_imm(r.SCRATCH, address)
-        else:
-            self.store(r.TOC.value, r.SP.value, 5 * WORD)
-            self.load_imm(r.r11, address)
-            self.load(r.SCRATCH.value, r.r11.value, 0)
-            self.load(r.r2.value, r.r11.value, WORD)
-            self.load(r.r11.value, r.r11.value, 2 * WORD)
-        self.mtctr(r.SCRATCH.value)
-        self.free_scratch_reg()
+        with scratch_reg(self):
+            if IS_PPC_32:
+                self.load_imm(r.SCRATCH, address)
+            else:
+                self.store(r.TOC.value, r.SP.value, 5 * WORD)
+                self.load_imm(r.r11, address)
+                self.load(r.SCRATCH.value, r.r11.value, 0)
+                self.load(r.r2.value, r.r11.value, WORD)
+                self.load(r.r11.value, r.r11.value, 2 * WORD)
+            self.mtctr(r.SCRATCH.value)
         self.bctrl()
 
         if IS_PPC_64:
diff --git a/pypy/jit/backend/ppc/helper/regalloc.py 
b/pypy/jit/backend/ppc/helper/regalloc.py
--- a/pypy/jit/backend/ppc/helper/regalloc.py
+++ b/pypy/jit/backend/ppc/helper/regalloc.py
@@ -76,7 +76,7 @@
 
 def prepare_binary_int_op():
     def f(self, op):
-        boxes = list(op.getarglist())
+        boxes = op.getarglist()
         b0, b1 = boxes
 
         reg1 = self._ensure_value_is_boxed(b0, forbidden_vars=boxes)
diff --git a/pypy/jit/backend/ppc/opassembler.py 
b/pypy/jit/backend/ppc/opassembler.py
--- a/pypy/jit/backend/ppc/opassembler.py
+++ b/pypy/jit/backend/ppc/opassembler.py
@@ -12,7 +12,8 @@
 from pypy.jit.backend.ppc.helper.assembler import (count_reg_args,
                                                           Saved_Volatiles)
 from pypy.jit.backend.ppc.jump import remap_frame_layout
-from pypy.jit.backend.ppc.codebuilder import OverwritingBuilder
+from pypy.jit.backend.ppc.codebuilder import (OverwritingBuilder, scratch_reg,
+                                              PPCBuilder)
 from pypy.jit.backend.ppc.regalloc import TempPtr, TempInt
 from pypy.jit.backend.llsupport import symbolic
 from pypy.rpython.lltypesystem import rstr, rffi, lltype
@@ -210,12 +211,11 @@
     #        instead of XER could be more efficient
     def _emit_ovf_guard(self, op, arglocs, cond):
         # move content of XER to GPR
-        self.mc.alloc_scratch_reg()
-        self.mc.mfspr(r.SCRATCH.value, 1)
-        # shift and mask to get comparison result
-        self.mc.rlwinm(r.SCRATCH.value, r.SCRATCH.value, 1, 0, 0)
-        self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
-        self.mc.free_scratch_reg()
+        with scratch_reg(self.mc):
+            self.mc.mfspr(r.SCRATCH.value, 1)
+            # shift and mask to get comparison result
+            self.mc.rlwinm(r.SCRATCH.value, r.SCRATCH.value, 1, 0, 0)
+            self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
         self._emit_guard(op, arglocs, cond)
 
     def emit_guard_no_overflow(self, op, arglocs, regalloc):
@@ -244,14 +244,13 @@
     def _cmp_guard_class(self, op, locs, regalloc):
         offset = locs[2]
         if offset is not None:
-            self.mc.alloc_scratch_reg()
-            if offset.is_imm():
-                self.mc.load(r.SCRATCH.value, locs[0].value, offset.value)
-            else:
-                assert offset.is_reg()
-                self.mc.loadx(r.SCRATCH.value, locs[0].value, offset.value)
-            self.mc.cmp_op(0, r.SCRATCH.value, locs[1].value)
-            self.mc.free_scratch_reg()
+            with scratch_reg(self.mc):
+                if offset.is_imm():
+                    self.mc.load(r.SCRATCH.value, locs[0].value, offset.value)
+                else:
+                    assert offset.is_reg()
+                    self.mc.loadx(r.SCRATCH.value, locs[0].value, offset.value)
+                self.mc.cmp_op(0, r.SCRATCH.value, locs[1].value)
         else:
             assert 0, "not implemented yet"
         self._emit_guard(op, locs[3:], c.NE)
@@ -288,10 +287,9 @@
                     adr = self.fail_boxes_int.get_addr_for_num(i)
                 else:
                     assert 0
-                self.mc.alloc_scratch_reg()
-                self.mc.load_imm(r.SCRATCH, adr)
-                self.mc.storex(loc.value, 0, r.SCRATCH.value)
-                self.mc.free_scratch_reg()
+                with scratch_reg(self.mc):
+                    self.mc.load_imm(r.SCRATCH, adr)
+                    self.mc.storex(loc.value, 0, r.SCRATCH.value)
             elif loc.is_vfp_reg():
                 assert box.type == FLOAT
                 assert 0, "not implemented yet"
@@ -305,13 +303,12 @@
                         adr = self.fail_boxes_int.get_addr_for_num(i)
                     else:
                         assert 0
-                    self.mc.alloc_scratch_reg()
-                    self.mov_loc_loc(loc, r.SCRATCH)
-                    # store content of r5 temporary in ENCODING AREA
-                    self.mc.store(r.r5.value, r.SPP.value, 0)
-                    self.mc.load_imm(r.r5, adr)
-                    self.mc.store(r.SCRATCH.value, r.r5.value, 0)
-                    self.mc.free_scratch_reg()
+                    with scratch_reg(self.mc):
+                        self.mov_loc_loc(loc, r.SCRATCH)
+                        # store content of r5 temporary in ENCODING AREA
+                        self.mc.store(r.r5.value, r.SPP.value, 0)
+                        self.mc.load_imm(r.r5, adr)
+                        self.mc.store(r.SCRATCH.value, r.r5.value, 0)
                     # restore r5
                     self.mc.load(r.r5.value, r.SPP.value, 0)
             else:
@@ -362,10 +359,9 @@
         failargs = arglocs[5:]
         self.mc.load_imm(loc1, pos_exception.value)
 
-        self.mc.alloc_scratch_reg()
-        self.mc.load(r.SCRATCH.value, loc1.value, 0)
-        self.mc.cmp_op(0, r.SCRATCH.value, loc.value)
-        self.mc.free_scratch_reg()
+        with scratch_reg(self.mc):
+            self.mc.load(r.SCRATCH.value, loc1.value, 0)
+            self.mc.cmp_op(0, r.SCRATCH.value, loc.value)
 
         self._emit_guard(op, failargs, c.NE, save_exc=True)
         self.mc.load_imm(loc, pos_exc_value.value)
@@ -373,11 +369,10 @@
         if resloc:
             self.mc.load(resloc.value, loc.value, 0)
 
-        self.mc.alloc_scratch_reg()
-        self.mc.load_imm(r.SCRATCH, 0)
-        self.mc.store(r.SCRATCH.value, loc.value, 0)
-        self.mc.store(r.SCRATCH.value, loc1.value, 0)
-        self.mc.free_scratch_reg()
+        with scratch_reg(self.mc):
+            self.mc.load_imm(r.SCRATCH, 0)
+            self.mc.store(r.SCRATCH.value, loc.value, 0)
+            self.mc.store(r.SCRATCH.value, loc1.value, 0)
 
     def emit_call(self, op, args, regalloc, force_index=-1):
         adr = args[0].value
@@ -426,13 +421,12 @@
             param_offset = ((BACKCHAIN_SIZE + MAX_REG_PARAMS)
                     * WORD) # space for first 8 parameters
 
-        self.mc.alloc_scratch_reg()
-        for i, arg in enumerate(stack_args):
-            offset = param_offset + i * WORD
-            if arg is not None:
-                self.regalloc_mov(regalloc.loc(arg), r.SCRATCH)
-            self.mc.store(r.SCRATCH.value, r.SP.value, offset)
-        self.mc.free_scratch_reg()
+        with scratch_reg(self.mc):
+            for i, arg in enumerate(stack_args):
+                offset = param_offset + i * WORD
+                if arg is not None:
+                    self.regalloc_mov(regalloc.loc(arg), r.SCRATCH)
+                self.mc.store(r.SCRATCH.value, r.SP.value, offset)
 
         # collect variables that need to go in registers
         # and the registers they will be stored in 
@@ -542,31 +536,31 @@
     def emit_getinteriorfield_gc(self, op, arglocs, regalloc):
         (base_loc, index_loc, res_loc,
             ofs_loc, ofs, itemsize, fieldsize) = arglocs
-        self.mc.alloc_scratch_reg()
-        self.mc.load_imm(r.SCRATCH, itemsize.value)
-        self.mc.mullw(r.SCRATCH.value, index_loc.value, r.SCRATCH.value)
-        if ofs.value > 0:
-            if ofs_loc.is_imm():
-                self.mc.addic(r.SCRATCH.value, r.SCRATCH.value, ofs_loc.value)
+        with scratch_reg(self.mc):
+            self.mc.load_imm(r.SCRATCH, itemsize.value)
+            self.mc.mullw(r.SCRATCH.value, index_loc.value, r.SCRATCH.value)
+            if ofs.value > 0:
+                if ofs_loc.is_imm():
+                    self.mc.addic(r.SCRATCH.value, r.SCRATCH.value, 
ofs_loc.value)
+                else:
+                    self.mc.add(r.SCRATCH.value, r.SCRATCH.value, 
ofs_loc.value)
+
+            if fieldsize.value == 8:
+                self.mc.ldx(res_loc.value, base_loc.value, r.SCRATCH.value)
+            elif fieldsize.value == 4:
+                self.mc.lwzx(res_loc.value, base_loc.value, r.SCRATCH.value)
+            elif fieldsize.value == 2:
+                self.mc.lhzx(res_loc.value, base_loc.value, r.SCRATCH.value)
+            elif fieldsize.value == 1:
+                self.mc.lbzx(res_loc.value, base_loc.value, r.SCRATCH.value)
             else:
-                self.mc.add(r.SCRATCH.value, r.SCRATCH.value, ofs_loc.value)
-
-        if fieldsize.value == 8:
-            self.mc.ldx(res_loc.value, base_loc.value, r.SCRATCH.value)
-        elif fieldsize.value == 4:
-            self.mc.lwzx(res_loc.value, base_loc.value, r.SCRATCH.value)
-        elif fieldsize.value == 2:
-            self.mc.lhzx(res_loc.value, base_loc.value, r.SCRATCH.value)
-        elif fieldsize.value == 1:
-            self.mc.lbzx(res_loc.value, base_loc.value, r.SCRATCH.value)
-        else:
-            assert 0
-        self.mc.free_scratch_reg()
+                assert 0
 
         #XXX Hack, Hack, Hack
         if not we_are_translated():
             signed = op.getdescr().fielddescr.is_field_signed()
             self._ensure_result_bit_extension(res_loc, fieldsize.value, signed)
+    emit_getinteriorfield_raw = emit_getinteriorfield_gc
 
     def emit_setinteriorfield_gc(self, op, arglocs, regalloc):
         (base_loc, index_loc, value_loc,
@@ -588,7 +582,7 @@
             self.mc.stbx(value_loc.value, base_loc.value, r.SCRATCH.value)
         else:
             assert 0
-
+    emit_setinteriorfield_raw = emit_setinteriorfield_gc
 
 class ArrayOpAssembler(object):
     
@@ -752,13 +746,12 @@
             bytes_loc = regalloc.force_allocate_reg(bytes_box, forbidden_vars)
             scale = self._get_unicode_item_scale()
             assert length_loc.is_reg()
-            self.mc.alloc_scratch_reg()
-            self.mc.load_imm(r.SCRATCH, 1 << scale)
-            if IS_PPC_32:
-                self.mc.mullw(bytes_loc.value, r.SCRATCH.value, 
length_loc.value)
-            else:
-                self.mc.mulld(bytes_loc.value, r.SCRATCH.value, 
length_loc.value)
-            self.mc.free_scratch_reg()
+            with scratch_reg(self.mc):
+                self.mc.load_imm(r.SCRATCH, 1 << scale)
+                if IS_PPC_32:
+                    self.mc.mullw(bytes_loc.value, r.SCRATCH.value, 
length_loc.value)
+                else:
+                    self.mc.mulld(bytes_loc.value, r.SCRATCH.value, 
length_loc.value)
             length_box = bytes_box
             length_loc = bytes_loc
         # call memcpy()
@@ -873,15 +866,15 @@
     def set_vtable(self, box, vtable):
         if self.cpu.vtable_offset is not None:
             adr = rffi.cast(lltype.Signed, vtable)
-            self.mc.alloc_scratch_reg()
-            self.mc.load_imm(r.SCRATCH, adr)
-            self.mc.store(r.SCRATCH.value, r.RES.value, self.cpu.vtable_offset)
-            self.mc.free_scratch_reg()
+            with scratch_reg(self.mc):
+                self.mc.load_imm(r.SCRATCH, adr)
+                self.mc.store(r.SCRATCH.value, r.RES.value, 
self.cpu.vtable_offset)
 
     def emit_debug_merge_point(self, op, arglocs, regalloc):
         pass
 
     emit_jit_debug = emit_debug_merge_point
+    emit_keepalive = emit_debug_merge_point
 
     def emit_cond_call_gc_wb(self, op, arglocs, regalloc):
         # Write code equivalent to write_barrier() in the GC: it checks
@@ -906,26 +899,25 @@
             raise AssertionError(opnum)
         loc_base = arglocs[0]
 
-        self.mc.alloc_scratch_reg()
-        self.mc.load(r.SCRATCH.value, loc_base.value, 0)
+        with scratch_reg(self.mc):
+            self.mc.load(r.SCRATCH.value, loc_base.value, 0)
 
-        # get the position of the bit we want to test
-        bitpos = descr.jit_wb_if_flag_bitpos
+            # get the position of the bit we want to test
+            bitpos = descr.jit_wb_if_flag_bitpos
 
-        if IS_PPC_32:
-            # put this bit to the rightmost bitposition of r0
-            if bitpos > 0:
-                self.mc.rlwinm(r.SCRATCH.value, r.SCRATCH.value,
-                               32 - bitpos, 31, 31)
-            # test whether this bit is set
-            self.mc.cmpwi(0, r.SCRATCH.value, 1)
-        else:
-            if bitpos > 0:
-                self.mc.rldicl(r.SCRATCH.value, r.SCRATCH.value,
-                               64 - bitpos, 63)
-            # test whether this bit is set
-            self.mc.cmpdi(0, r.SCRATCH.value, 1)
-        self.mc.free_scratch_reg()
+            if IS_PPC_32:
+                # put this bit to the rightmost bitposition of r0
+                if bitpos > 0:
+                    self.mc.rlwinm(r.SCRATCH.value, r.SCRATCH.value,
+                                   32 - bitpos, 31, 31)
+                # test whether this bit is set
+                self.mc.cmpwi(0, r.SCRATCH.value, 1)
+            else:
+                if bitpos > 0:
+                    self.mc.rldicl(r.SCRATCH.value, r.SCRATCH.value,
+                                   64 - bitpos, 63)
+                # test whether this bit is set
+                self.mc.cmpdi(0, r.SCRATCH.value, 1)
 
         jz_location = self.mc.currpos()
         self.mc.nop()
@@ -947,7 +939,7 @@
         # patch the JZ above
         offset = self.mc.currpos() - jz_location
         pmc = OverwritingBuilder(self.mc, jz_location, 1)
-        pmc.bc(4, 2, offset) # jump if the two values are equal
+        pmc.bc(12, 2, offset) # jump if the two values are equal
         pmc.overwrite()
 
     emit_cond_call_gc_wb_array = emit_cond_call_gc_wb
@@ -989,10 +981,9 @@
         # check value
         resloc = regalloc.try_allocate_reg(resbox)
         assert resloc is r.RES
-        self.mc.alloc_scratch_reg()
-        self.mc.load_imm(r.SCRATCH, value)
-        self.mc.cmp_op(0, resloc.value, r.SCRATCH.value)
-        self.mc.free_scratch_reg()
+        with scratch_reg(self.mc):
+            self.mc.load_imm(r.SCRATCH, value)
+            self.mc.cmp_op(0, resloc.value, r.SCRATCH.value)
         regalloc.possibly_free_var(resbox)
 
         fast_jmp_pos = self.mc.currpos()
@@ -1035,11 +1026,10 @@
             assert isinstance(fielddescr, FieldDescr)
             ofs = fielddescr.offset
             resloc = regalloc.force_allocate_reg(resbox)
-            self.mc.alloc_scratch_reg()
-            self.mov_loc_loc(arglocs[1], r.SCRATCH)
-            self.mc.li(resloc.value, 0)
-            self.mc.storex(resloc.value, 0, r.SCRATCH.value)
-            self.mc.free_scratch_reg()
+            with scratch_reg(self.mc):
+                self.mov_loc_loc(arglocs[1], r.SCRATCH)
+                self.mc.li(resloc.value, 0)
+                self.mc.storex(resloc.value, 0, r.SCRATCH.value)
             regalloc.possibly_free_var(resbox)
 
         if op.result is not None:
@@ -1055,13 +1045,12 @@
                 raise AssertionError(kind)
             resloc = regalloc.force_allocate_reg(op.result)
             regalloc.possibly_free_var(resbox)
-            self.mc.alloc_scratch_reg()
-            self.mc.load_imm(r.SCRATCH, adr)
-            if op.result.type == FLOAT:
-                assert 0, "not implemented yet"
-            else:
-                self.mc.loadx(resloc.value, 0, r.SCRATCH.value)
-            self.mc.free_scratch_reg()
+            with scratch_reg(self.mc):
+                self.mc.load_imm(r.SCRATCH, adr)
+                if op.result.type == FLOAT:
+                    assert 0, "not implemented yet"
+                else:
+                    self.mc.loadx(resloc.value, 0, r.SCRATCH.value)
 
         # merge point
         offset = self.mc.currpos() - jmp_pos
@@ -1070,10 +1059,9 @@
             pmc.b(offset)
             pmc.overwrite()
 
-        self.mc.alloc_scratch_reg()
-        self.mc.load(r.SCRATCH.value, r.SPP.value, 0)
-        self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
-        self.mc.free_scratch_reg()
+        with scratch_reg(self.mc):
+            self.mc.load(r.SCRATCH.value, r.SPP.value, 0)
+            self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
 
         self._emit_guard(guard_op, regalloc._prepare_guard(guard_op), c.LT)
 
@@ -1102,10 +1090,9 @@
 
     def emit_guard_call_may_force(self, op, guard_op, arglocs, regalloc):
         ENCODING_AREA = len(r.MANAGED_REGS) * WORD
-        self.mc.alloc_scratch_reg()
-        self.mc.load(r.SCRATCH.value, r.SPP.value, ENCODING_AREA)
-        self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
-        self.mc.free_scratch_reg()
+        with scratch_reg(self.mc):
+            self.mc.load(r.SCRATCH.value, r.SPP.value, ENCODING_AREA)
+            self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
         self._emit_guard(guard_op, arglocs, c.LT, save_exc=True)
 
     emit_guard_call_release_gil = emit_guard_call_may_force
diff --git a/pypy/jit/backend/ppc/ppc_assembler.py 
b/pypy/jit/backend/ppc/ppc_assembler.py
--- a/pypy/jit/backend/ppc/ppc_assembler.py
+++ b/pypy/jit/backend/ppc/ppc_assembler.py
@@ -3,7 +3,7 @@
 from pypy.jit.backend.ppc.ppc_form import PPCForm as Form
 from pypy.jit.backend.ppc.ppc_field import ppc_fields
 from pypy.jit.backend.ppc.regalloc import (TempInt, PPCFrameManager,
-                                                  Regalloc)
+                                                  Regalloc, PPCRegisterManager)
 from pypy.jit.backend.ppc.assembler import Assembler
 from pypy.jit.backend.ppc.opassembler import OpAssembler
 from pypy.jit.backend.ppc.symbol_lookup import lookup
@@ -37,15 +37,23 @@
 from pypy.jit.metainterp.history import (BoxInt, ConstInt, ConstPtr,
                                          ConstFloat, Box, INT, REF, FLOAT)
 from pypy.jit.backend.x86.support import values_array
+from pypy.rlib.debug import (debug_print, debug_start, debug_stop,
+                             have_debug_prints)
 from pypy.rlib import rgc
 from pypy.rpython.annlowlevel import llhelper
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.jit.backend.ppc.locations import StackLocation, get_spp_offset
+from pypy.rlib.jit import AsmInfo
 
 memcpy_fn = rffi.llexternal('memcpy', [llmemory.Address, llmemory.Address,
                                        rffi.SIZE_T], lltype.Void,
                             sandboxsafe=True, _nowrapper=True)
+
+DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
+                              ('type', lltype.Char),  # 'b'ridge, 'l'abel or
+                                                      # 'e'ntry point
+                              ('number', lltype.Signed))
 def hi(w):
     return w >> 16
 
@@ -85,6 +93,7 @@
     EMPTY_LOC = '\xFE'
     END_OF_LOCS = '\xFF'
 
+    FORCE_INDEX_AREA            = len(r.MANAGED_REGS) * WORD
     ENCODING_AREA               = len(r.MANAGED_REGS) * WORD
     OFFSET_SPP_TO_GPR_SAVE_AREA = (FORCE_INDEX + FLOAT_INT_CONVERSION
                                    + ENCODING_AREA)
@@ -108,6 +117,12 @@
         self.max_stack_params = 0
         self.propagate_exception_path = 0
         self.setup_failure_recovery()
+        self._debug = False
+        self.loop_run_counters = []
+        self.debug_counter_descr = cpu.fielddescrof(DEBUG_COUNTER, 'i')
+
+    def set_debug(self, v):
+        self._debug = v
 
     def _save_nonvolatiles(self):
         """ save nonvolatile GPRs in GPR SAVE AREA 
@@ -298,24 +313,64 @@
 
     def _build_malloc_slowpath(self):
         mc = PPCBuilder()
-        with Saved_Volatiles(mc):
-            # Values to compute size stored in r3 and r4
-            mc.subf(r.r3.value, r.r3.value, r.r4.value)
-            addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
-            mc.call(addr)
+        if IS_PPC_64:
+            for _ in range(6):
+                mc.write32(0)
+        frame_size = (# add space for floats later
+                    + BACKCHAIN_SIZE * WORD)
+        if IS_PPC_32:
+            mc.stwu(r.SP.value, r.SP.value, -frame_size)
+            mc.mflr(r.SCRATCH.value)
+            mc.stw(r.SCRATCH.value, r.SP.value, frame_size + WORD) 
+        else:
+            mc.stdu(r.SP.value, r.SP.value, -frame_size)
+            mc.mflr(r.SCRATCH.value)
+            mc.std(r.SCRATCH.value, r.SP.value, frame_size + 2 * WORD)
+        # managed volatiles are saved below
+        if self.cpu.supports_floats:
+            assert 0, "make sure to save floats here"
+        # Values to compute size stored in r3 and r4
+        mc.subf(r.r3.value, r.r3.value, r.r4.value)
+        addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
+        for reg, ofs in PPCRegisterManager.REGLOC_TO_COPY_AREA_OFS.items():
+            mc.store(reg.value, r.SPP.value, ofs)
+        mc.call(addr)
+        for reg, ofs in PPCRegisterManager.REGLOC_TO_COPY_AREA_OFS.items():
+            mc.load(reg.value, r.SPP.value, ofs)
 
         mc.cmp_op(0, r.r3.value, 0, imm=True)
         jmp_pos = mc.currpos()
         mc.nop()
+
         nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
         mc.load_imm(r.r4, nursery_free_adr)
         mc.load(r.r4.value, r.r4.value, 0)
+ 
+        if IS_PPC_32:
+            ofs = WORD
+        else:
+            ofs = WORD * 2
+        mc.load(r.SCRATCH.value, r.SP.value, frame_size + ofs) 
+        mc.mtlr(r.SCRATCH.value)
+        mc.addi(r.SP.value, r.SP.value, frame_size)
+        mc.blr()
 
+        # if r3 == 0 we skip the return above and jump to the exception path
+        offset = mc.currpos() - jmp_pos
         pmc = OverwritingBuilder(mc, jmp_pos, 1)
-        pmc.bc(4, 2, jmp_pos) # jump if the two values are equal
+        pmc.bc(12, 2, offset) 
         pmc.overwrite()
+        # restore the frame before leaving
+        mc.load(r.SCRATCH.value, r.SP.value, frame_size + ofs) 
+        mc.mtlr(r.SCRATCH.value)
+        mc.addi(r.SP.value, r.SP.value, frame_size)
         mc.b_abs(self.propagate_exception_path)
+
+
+        mc.prepare_insts_blocks()
         rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+        if IS_PPC_64:
+            self.write_64_bit_func_descr(rawstart, rawstart+3*WORD)
         self.malloc_slowpath = rawstart
 
     def _build_propagate_exception_path(self):
@@ -362,8 +417,8 @@
         addr = rffi.cast(lltype.Signed, decode_func_addr)
 
         # load parameters into parameter registers
-        mc.load(r.r3.value, r.SPP.value, self.ENCODING_AREA)     # address of 
state encoding 
-        mc.mr(r.r4.value, r.SPP.value)         # load spilling pointer
+        mc.load(r.r3.value, r.SPP.value, self.FORCE_INDEX_AREA)    # address 
of state encoding 
+        mc.mr(r.r4.value, r.SPP.value)                             # load 
spilling pointer
         #
         # call decoding function
         mc.call(addr)
@@ -430,6 +485,23 @@
         self.exit_code_adr = self._gen_exit_path()
         self._leave_jitted_hook_save_exc = 
self._gen_leave_jitted_hook_code(True)
         self._leave_jitted_hook = self._gen_leave_jitted_hook_code(False)
+        debug_start('jit-backend-counts')
+        self.set_debug(have_debug_prints())
+        debug_stop('jit-backend-counts')
+
+    def finish_once(self):
+        if self._debug:
+            debug_start('jit-backend-counts')
+            for i in range(len(self.loop_run_counters)):
+                struct = self.loop_run_counters[i]
+                if struct.type == 'l':
+                    prefix = 'TargetToken(%d)' % struct.number
+                elif struct.type == 'b':
+                    prefix = 'bridge ' + str(struct.number)
+                else:
+                    prefix = 'entry ' + str(struct.number)
+                debug_print(prefix + ':' + str(struct.i))
+            debug_stop('jit-backend-counts')
 
     @staticmethod
     def _release_gil_shadowstack():
@@ -475,6 +547,7 @@
         looptoken._ppc_loop_code = start_pos
         clt.frame_depth = clt.param_depth = -1
         spilling_area, param_depth = self._assemble(operations, regalloc)
+        size_excluding_failure_stuff = self.mc.get_relative_pos()
         clt.frame_depth = spilling_area
         clt.param_depth = param_depth
      
@@ -502,8 +575,12 @@
             print 'Loop', inputargs, operations
             self.mc._dump_trace(loop_start, 'loop_%s.asm' % 
self.cpu.total_compiled_loops)
             print 'Done assembling loop with token %r' % looptoken
+        ops_offset = self.mc.ops_offset
         self._teardown()
 
+        # XXX 3rd arg may not be correct yet
+        return AsmInfo(ops_offset, real_start, size_excluding_failure_stuff)
+
     def _assemble(self, operations, regalloc):
         regalloc.compute_hint_frame_locations(operations)
         self._walk_operations(operations, regalloc)
@@ -531,7 +608,9 @@
 
         sp_patch_location = self._prepare_sp_patch_position()
 
+        startpos = self.mc.get_relative_pos()
         spilling_area, param_depth = self._assemble(operations, regalloc)
+        codeendpos = self.mc.get_relative_pos()
 
         self.write_pending_failure_recoveries()
 
@@ -553,8 +632,12 @@
             print 'Loop', inputargs, operations
             self.mc._dump_trace(rawstart, 'bridge_%s.asm' % 
self.cpu.total_compiled_loops)
             print 'Done assembling bridge with token %r' % looptoken
+
+        ops_offset = self.mc.ops_offset
         self._teardown()
 
+        return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
+
     def _patch_sp_offset(self, sp_patch_location, rawstart):
         mc = PPCBuilder()
         frame_depth = self.compute_frame_depth(self.current_clt.frame_depth,
@@ -828,11 +911,10 @@
                 return
             # move immediate value to memory
             elif loc.is_stack():
-                self.mc.alloc_scratch_reg()
-                offset = loc.value
-                self.mc.load_imm(r.SCRATCH, value)
-                self.mc.store(r.SCRATCH.value, r.SPP.value, offset)
-                self.mc.free_scratch_reg()
+                with scratch_reg(self.mc):
+                    offset = loc.value
+                    self.mc.load_imm(r.SCRATCH, value)
+                    self.mc.store(r.SCRATCH.value, r.SPP.value, offset)
                 return
             assert 0, "not supported location"
         elif prev_loc.is_stack():
@@ -845,10 +927,9 @@
             # move in memory
             elif loc.is_stack():
                 target_offset = loc.value
-                self.mc.alloc_scratch_reg()
-                self.mc.load(r.SCRATCH.value, r.SPP.value, offset)
-                self.mc.store(r.SCRATCH.value, r.SPP.value, target_offset)
-                self.mc.free_scratch_reg()
+                with scratch_reg(self.mc):
+                    self.mc.load(r.SCRATCH.value, r.SPP.value, offset)
+                    self.mc.store(r.SCRATCH.value, r.SPP.value, target_offset)
                 return
             assert 0, "not supported location"
         elif prev_loc.is_reg():
@@ -883,10 +964,7 @@
         elif loc.is_reg():
             self.mc.addi(r.SP.value, r.SP.value, -WORD) # decrease stack 
pointer
             # push value
-            if IS_PPC_32:
-                self.mc.stw(loc.value, r.SP.value, 0)
-            else:
-                self.mc.std(loc.value, r.SP.value, 0)
+            self.mc.store(loc.value, r.SP.value, 0)
         elif loc.is_imm():
             assert 0, "not implemented yet"
         elif loc.is_imm_float():
@@ -946,17 +1024,17 @@
     def malloc_cond(self, nursery_free_adr, nursery_top_adr, size):
         assert size & (WORD-1) == 0     # must be correctly aligned
 
-        self.mc.load_imm(r.RES.value, nursery_free_adr)
+        self.mc.load_imm(r.RES, nursery_free_adr)
         self.mc.load(r.RES.value, r.RES.value, 0)
 
         if _check_imm_arg(size):
             self.mc.addi(r.r4.value, r.RES.value, size)
         else:
-            self.mc.load_imm(r.r4.value, size)
+            self.mc.load_imm(r.r4, size)
             self.mc.add(r.r4.value, r.RES.value, r.r4.value)
 
         with scratch_reg(self.mc):
-            self.mc.gen_load_int(r.SCRATCH.value, nursery_top_adr)
+            self.mc.load_imm(r.SCRATCH, nursery_top_adr)
             self.mc.loadx(r.SCRATCH.value, 0, r.SCRATCH.value)
 
         self.mc.cmp_op(0, r.r4.value, r.SCRATCH.value, signed=False)
@@ -977,10 +1055,11 @@
         offset = self.mc.currpos() - fast_jmp_pos
         pmc = OverwritingBuilder(self.mc, fast_jmp_pos, 1)
         pmc.bc(4, 1, offset) # jump if LE (not GT)
+        pmc.overwrite()
         
         with scratch_reg(self.mc):
-            self.mc.load_imm(r.SCRATCH.value, nursery_free_adr)
-            self.mc.storex(r.r1.value, 0, r.SCRATCH.value)
+            self.mc.load_imm(r.SCRATCH, nursery_free_adr)
+            self.mc.storex(r.r4.value, 0, r.SCRATCH.value)
 
     def mark_gc_roots(self, force_index, use_copy_area=False):
         if force_index < 0:
@@ -1010,10 +1089,9 @@
             return 0
 
     def _write_fail_index(self, fail_index):
-        self.mc.alloc_scratch_reg()
-        self.mc.load_imm(r.SCRATCH, fail_index)
-        self.mc.store(r.SCRATCH.value, r.SPP.value, self.ENCODING_AREA)
-        self.mc.free_scratch_reg()
+        with scratch_reg(self.mc):
+            self.mc.load_imm(r.SCRATCH, fail_index)
+            self.mc.store(r.SCRATCH.value, r.SPP.value, self.FORCE_INDEX_AREA)
             
     def load(self, loc, value):
         assert loc.is_reg() and value.is_imm()
diff --git a/pypy/jit/backend/ppc/regalloc.py b/pypy/jit/backend/ppc/regalloc.py
--- a/pypy/jit/backend/ppc/regalloc.py
+++ b/pypy/jit/backend/ppc/regalloc.py
@@ -50,37 +50,33 @@
     save_around_call_regs = r.VOLATILES
 
     REGLOC_TO_COPY_AREA_OFS = {
-        r.r0:   MY_COPY_OF_REGS + 0 * WORD,
-        r.r2:   MY_COPY_OF_REGS + 1 * WORD,
-        r.r3:   MY_COPY_OF_REGS + 2 * WORD,
-        r.r4:   MY_COPY_OF_REGS + 3 * WORD,
-        r.r5:   MY_COPY_OF_REGS + 4 * WORD,
-        r.r6:   MY_COPY_OF_REGS + 5 * WORD,
-        r.r7:   MY_COPY_OF_REGS + 6 * WORD,
-        r.r8:   MY_COPY_OF_REGS + 7 * WORD,
-        r.r9:   MY_COPY_OF_REGS + 8 * WORD,
-        r.r10:  MY_COPY_OF_REGS + 9 * WORD,
-        r.r11:  MY_COPY_OF_REGS + 10 * WORD,
-        r.r12:  MY_COPY_OF_REGS + 11 * WORD,
-        r.r13:  MY_COPY_OF_REGS + 12 * WORD,
-        r.r14:  MY_COPY_OF_REGS + 13 * WORD,
-        r.r15:  MY_COPY_OF_REGS + 14 * WORD,
-        r.r16:  MY_COPY_OF_REGS + 15 * WORD,
-        r.r17:  MY_COPY_OF_REGS + 16 * WORD,
-        r.r18:  MY_COPY_OF_REGS + 17 * WORD,
-        r.r19:  MY_COPY_OF_REGS + 18 * WORD,
-        r.r20:  MY_COPY_OF_REGS + 19 * WORD,
-        r.r21:  MY_COPY_OF_REGS + 20 * WORD,
-        r.r22:  MY_COPY_OF_REGS + 21 * WORD,
-        r.r23:  MY_COPY_OF_REGS + 22 * WORD,
-        r.r24:  MY_COPY_OF_REGS + 23 * WORD,
-        r.r25:  MY_COPY_OF_REGS + 24 * WORD,
-        r.r26:  MY_COPY_OF_REGS + 25 * WORD,
-        r.r27:  MY_COPY_OF_REGS + 26 * WORD,
-        r.r28:  MY_COPY_OF_REGS + 27 * WORD,
-        r.r29:  MY_COPY_OF_REGS + 28 * WORD,
-        r.r30:  MY_COPY_OF_REGS + 29 * WORD,
-        r.r31:  MY_COPY_OF_REGS + 30 * WORD,
+        r.r3:   MY_COPY_OF_REGS + 0 * WORD,
+        r.r4:   MY_COPY_OF_REGS + 1 * WORD,
+        r.r5:   MY_COPY_OF_REGS + 2 * WORD,
+        r.r6:   MY_COPY_OF_REGS + 3 * WORD,
+        r.r7:   MY_COPY_OF_REGS + 4 * WORD,
+        r.r8:   MY_COPY_OF_REGS + 5 * WORD,
+        r.r9:   MY_COPY_OF_REGS + 6 * WORD,
+        r.r10:  MY_COPY_OF_REGS + 7 * WORD,
+        r.r11:  MY_COPY_OF_REGS + 8 * WORD,
+        r.r12:  MY_COPY_OF_REGS + 9 * WORD,
+        r.r14:  MY_COPY_OF_REGS + 10 * WORD,
+        r.r15:  MY_COPY_OF_REGS + 11 * WORD,
+        r.r16:  MY_COPY_OF_REGS + 12 * WORD,
+        r.r17:  MY_COPY_OF_REGS + 13 * WORD,
+        r.r18:  MY_COPY_OF_REGS + 14 * WORD,
+        r.r19:  MY_COPY_OF_REGS + 15 * WORD,
+        r.r20:  MY_COPY_OF_REGS + 16 * WORD,
+        r.r21:  MY_COPY_OF_REGS + 17 * WORD,
+        r.r22:  MY_COPY_OF_REGS + 18 * WORD,
+        r.r23:  MY_COPY_OF_REGS + 19 * WORD,
+        r.r24:  MY_COPY_OF_REGS + 20 * WORD,
+        r.r25:  MY_COPY_OF_REGS + 21 * WORD,
+        r.r26:  MY_COPY_OF_REGS + 22 * WORD,
+        r.r27:  MY_COPY_OF_REGS + 23 * WORD,
+        r.r28:  MY_COPY_OF_REGS + 24 * WORD,
+        r.r29:  MY_COPY_OF_REGS + 25 * WORD,
+        r.r30:  MY_COPY_OF_REGS + 26 * WORD,
     }
 
     def __init__(self, longevity, frame_manager=None, assembler=None):
@@ -177,7 +173,7 @@
     def prepare_loop(self, inputargs, operations):
         self._prepare(inputargs, operations)
         self._set_initial_bindings(inputargs)
-        self.possibly_free_vars(list(inputargs))
+        self.possibly_free_vars(inputargs)
 
     def prepare_bridge(self, inputargs, arglocs, ops):
         self._prepare(inputargs, ops)
@@ -425,7 +421,7 @@
     prepare_guard_not_invalidated = prepare_guard_no_overflow
 
     def prepare_guard_exception(self, op):
-        boxes = list(op.getarglist())
+        boxes = op.getarglist()
         arg0 = ConstInt(rffi.cast(lltype.Signed, op.getarg(0).getint()))
         loc = self._ensure_value_is_boxed(arg0)
         loc1 = self.get_scratch_reg(INT, boxes)
@@ -447,7 +443,7 @@
         return arglocs
 
     def prepare_guard_value(self, op):
-        boxes = list(op.getarglist())
+        boxes = op.getarglist()
         a0, a1 = boxes
         l0 = self._ensure_value_is_boxed(a0, boxes)
         l1 = self._ensure_value_is_boxed(a1, boxes)
@@ -459,7 +455,7 @@
 
     def prepare_guard_class(self, op):
         assert isinstance(op.getarg(0), Box)
-        boxes = list(op.getarglist())
+        boxes = op.getarglist()
         x = self._ensure_value_is_boxed(boxes[0], boxes)
         y = self.get_scratch_reg(REF, forbidden_vars=boxes)
         y_val = rffi.cast(lltype.Signed, op.getarg(1).getint())
@@ -559,7 +555,7 @@
         return []
 
     def prepare_setfield_gc(self, op):
-        boxes = list(op.getarglist())
+        boxes = op.getarglist()
         a0, a1 = boxes
         ofs, size, sign = unpack_fielddescr(op.getdescr())
         base_loc = self._ensure_value_is_boxed(a0, boxes)
@@ -608,6 +604,7 @@
         self.possibly_free_var(op.result)
         return [base_loc, index_loc, result_loc, ofs_loc, imm(ofs),
                                     imm(itemsize), imm(fieldsize)]
+    prepare_getinteriorfield_raw = prepare_getinteriorfield_gc
 
     def prepare_setinteriorfield_gc(self, op):
         t = unpack_interiorfielddescr(op.getdescr())
@@ -622,6 +619,7 @@
             ofs_loc = self._ensure_value_is_boxed(ConstInt(ofs), args)
         return [base_loc, index_loc, value_loc, ofs_loc, imm(ofs),
                                         imm(itemsize), imm(fieldsize)]
+    prepare_setinteriorfield_raw = prepare_setinteriorfield_gc
 
     def prepare_arraylen_gc(self, op):
         arraydescr = op.getdescr()
@@ -811,6 +809,7 @@
 
     prepare_debug_merge_point = void
     prepare_jit_debug = void
+    prepare_keepalive = void
 
     def prepare_cond_call_gc_wb(self, op):
         assert op.result is None
diff --git a/pypy/jit/backend/ppc/register.py b/pypy/jit/backend/ppc/register.py
--- a/pypy/jit/backend/ppc/register.py
+++ b/pypy/jit/backend/ppc/register.py
@@ -14,7 +14,8 @@
 
 NONVOLATILES        = [r14, r15, r16, r17, r18, r19, r20, r21, r22, r23,
                     r24, r25, r26, r27, r28, r29, r30, r31]
-VOLATILES           = [r0, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13]
+VOLATILES           = [r0, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12]
+# volatile r2 is persisted around calls and r13 can be ignored
 
 NONVOLATILES_FLOAT  = [f14, f15, f16, f17, f18, f19, f20, f21, f22, f23,
                     f24, f25, f26, f27, f28, f29, f30, f31]
diff --git a/pypy/jit/backend/ppc/runner.py b/pypy/jit/backend/ppc/runner.py
--- a/pypy/jit/backend/ppc/runner.py
+++ b/pypy/jit/backend/ppc/runner.py
@@ -32,7 +32,7 @@
             gcdescr.force_index_ofs = FORCE_INDEX_OFS
             # XXX for now the ppc backend does not support the gcremovetypeptr
             # translation option
-            assert gcdescr.config.translation.gcremovetypeptr is False
+            # assert gcdescr.config.translation.gcremovetypeptr is False
         AbstractLLCPU.__init__(self, rtyper, stats, opts,
                                translate_support_code, gcdescr)
 
diff --git a/pypy/jit/backend/ppc/test/test_ztranslation.py 
b/pypy/jit/backend/ppc/test/test_ztranslation.py
--- a/pypy/jit/backend/ppc/test/test_ztranslation.py
+++ b/pypy/jit/backend/ppc/test/test_ztranslation.py
@@ -18,8 +18,9 @@
     def _check_cbuilder(self, cbuilder):
         # We assume here that we have sse2.  If not, the CPUClass
         # needs to be changed to CPU386_NO_SSE2, but well.
-        assert '-msse2' in cbuilder.eci.compile_extra
-        assert '-mfpmath=sse' in cbuilder.eci.compile_extra
+        #assert '-msse2' in cbuilder.eci.compile_extra
+        #assert '-mfpmath=sse' in cbuilder.eci.compile_extra
+        pass
 
     def test_stuff_translates(self):
         # this is a basic test that tries to hit a number of features and their
@@ -176,7 +177,7 @@
     def _get_TranslationContext(self):
         t = TranslationContext()
         t.config.translation.gc = DEFL_GC   # 'hybrid' or 'minimark'
-        t.config.translation.gcrootfinder = 'asmgcc'
+        t.config.translation.gcrootfinder = 'shadowstack'
         t.config.translation.list_comprehension_operations = True
         t.config.translation.gcremovetypeptr = True
         return t
diff --git a/pypy/jit/backend/test/runner_test.py 
b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -1677,6 +1677,7 @@
         c_box = self.alloc_string("hi there").constbox()
         c_nest = ConstInt(0)
         self.execute_operation(rop.DEBUG_MERGE_POINT, [c_box, c_nest], 'void')
+        self.execute_operation(rop.KEEPALIVE, [c_box], 'void')
         self.execute_operation(rop.JIT_DEBUG, [c_box, c_nest, c_nest,
                                                c_nest, c_nest], 'void')
 
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to