Author: hager <[email protected]>
Branch: ppc-jit-backend
Changeset: r52973:52ece45399fc
Date: 2012-02-27 20:00 +0100
http://bitbucket.org/pypy/pypy/changeset/52ece45399fc/
Log: merge
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -523,7 +523,7 @@
return []
def add_frame_offset(self, shape, offset):
- assert offset != 0
+ assert offset & 3 == 0
shape.append(offset)
def add_callee_save_reg(self, shape, register):
diff --git a/pypy/jit/backend/ppc/codebuilder.py
b/pypy/jit/backend/ppc/codebuilder.py
--- a/pypy/jit/backend/ppc/codebuilder.py
+++ b/pypy/jit/backend/ppc/codebuilder.py
@@ -962,6 +962,11 @@
PPCAssembler.__init__(self)
self.init_block_builder()
self.r0_in_use = r0_in_use
+ self.ops_offset = {}
+
+ def mark_op(self, op):
+ pos = self.get_relative_pos()
+ self.ops_offset[op] = pos
def check(self, desc, v, *args):
desc.__get__(self)(*args)
@@ -994,13 +999,12 @@
self.ldx(rD.value, 0, rD.value)
def store_reg(self, source_reg, addr):
- self.alloc_scratch_reg()
- self.load_imm(r.SCRATCH, addr)
- if IS_PPC_32:
- self.stwx(source_reg.value, 0, r.SCRATCH.value)
- else:
- self.stdx(source_reg.value, 0, r.SCRATCH.value)
- self.free_scratch_reg()
+ with scratch_reg(self):
+ self.load_imm(r.SCRATCH, addr)
+ if IS_PPC_32:
+ self.stwx(source_reg.value, 0, r.SCRATCH.value)
+ else:
+ self.stdx(source_reg.value, 0, r.SCRATCH.value)
def b_offset(self, target):
curpos = self.currpos()
@@ -1020,17 +1024,15 @@
BI = condition[0]
BO = condition[1]
- self.alloc_scratch_reg()
- self.load_imm(r.SCRATCH, addr)
- self.mtctr(r.SCRATCH.value)
- self.free_scratch_reg()
+ with scratch_reg(self):
+ self.load_imm(r.SCRATCH, addr)
+ self.mtctr(r.SCRATCH.value)
self.bcctr(BO, BI)
def b_abs(self, address, trap=False):
- self.alloc_scratch_reg()
- self.load_imm(r.SCRATCH, address)
- self.mtctr(r.SCRATCH.value)
- self.free_scratch_reg()
+ with scratch_reg(self):
+ self.load_imm(r.SCRATCH, address)
+ self.mtctr(r.SCRATCH.value)
if trap:
self.trap()
self.bctr()
@@ -1044,17 +1046,16 @@
def call(self, address):
""" do a call to an absolute address
"""
- self.alloc_scratch_reg()
- if IS_PPC_32:
- self.load_imm(r.SCRATCH, address)
- else:
- self.store(r.TOC.value, r.SP.value, 5 * WORD)
- self.load_imm(r.r11, address)
- self.load(r.SCRATCH.value, r.r11.value, 0)
- self.load(r.r2.value, r.r11.value, WORD)
- self.load(r.r11.value, r.r11.value, 2 * WORD)
- self.mtctr(r.SCRATCH.value)
- self.free_scratch_reg()
+ with scratch_reg(self):
+ if IS_PPC_32:
+ self.load_imm(r.SCRATCH, address)
+ else:
+ self.store(r.TOC.value, r.SP.value, 5 * WORD)
+ self.load_imm(r.r11, address)
+ self.load(r.SCRATCH.value, r.r11.value, 0)
+ self.load(r.r2.value, r.r11.value, WORD)
+ self.load(r.r11.value, r.r11.value, 2 * WORD)
+ self.mtctr(r.SCRATCH.value)
self.bctrl()
if IS_PPC_64:
diff --git a/pypy/jit/backend/ppc/helper/regalloc.py
b/pypy/jit/backend/ppc/helper/regalloc.py
--- a/pypy/jit/backend/ppc/helper/regalloc.py
+++ b/pypy/jit/backend/ppc/helper/regalloc.py
@@ -76,7 +76,7 @@
def prepare_binary_int_op():
def f(self, op):
- boxes = list(op.getarglist())
+ boxes = op.getarglist()
b0, b1 = boxes
reg1 = self._ensure_value_is_boxed(b0, forbidden_vars=boxes)
diff --git a/pypy/jit/backend/ppc/opassembler.py
b/pypy/jit/backend/ppc/opassembler.py
--- a/pypy/jit/backend/ppc/opassembler.py
+++ b/pypy/jit/backend/ppc/opassembler.py
@@ -12,7 +12,8 @@
from pypy.jit.backend.ppc.helper.assembler import (count_reg_args,
Saved_Volatiles)
from pypy.jit.backend.ppc.jump import remap_frame_layout
-from pypy.jit.backend.ppc.codebuilder import OverwritingBuilder
+from pypy.jit.backend.ppc.codebuilder import (OverwritingBuilder, scratch_reg,
+ PPCBuilder)
from pypy.jit.backend.ppc.regalloc import TempPtr, TempInt
from pypy.jit.backend.llsupport import symbolic
from pypy.rpython.lltypesystem import rstr, rffi, lltype
@@ -210,12 +211,11 @@
# instead of XER could be more efficient
def _emit_ovf_guard(self, op, arglocs, cond):
# move content of XER to GPR
- self.mc.alloc_scratch_reg()
- self.mc.mfspr(r.SCRATCH.value, 1)
- # shift and mask to get comparison result
- self.mc.rlwinm(r.SCRATCH.value, r.SCRATCH.value, 1, 0, 0)
- self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
- self.mc.free_scratch_reg()
+ with scratch_reg(self.mc):
+ self.mc.mfspr(r.SCRATCH.value, 1)
+ # shift and mask to get comparison result
+ self.mc.rlwinm(r.SCRATCH.value, r.SCRATCH.value, 1, 0, 0)
+ self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
self._emit_guard(op, arglocs, cond)
def emit_guard_no_overflow(self, op, arglocs, regalloc):
@@ -244,14 +244,13 @@
def _cmp_guard_class(self, op, locs, regalloc):
offset = locs[2]
if offset is not None:
- self.mc.alloc_scratch_reg()
- if offset.is_imm():
- self.mc.load(r.SCRATCH.value, locs[0].value, offset.value)
- else:
- assert offset.is_reg()
- self.mc.loadx(r.SCRATCH.value, locs[0].value, offset.value)
- self.mc.cmp_op(0, r.SCRATCH.value, locs[1].value)
- self.mc.free_scratch_reg()
+ with scratch_reg(self.mc):
+ if offset.is_imm():
+ self.mc.load(r.SCRATCH.value, locs[0].value, offset.value)
+ else:
+ assert offset.is_reg()
+ self.mc.loadx(r.SCRATCH.value, locs[0].value, offset.value)
+ self.mc.cmp_op(0, r.SCRATCH.value, locs[1].value)
else:
assert 0, "not implemented yet"
self._emit_guard(op, locs[3:], c.NE)
@@ -288,10 +287,9 @@
adr = self.fail_boxes_int.get_addr_for_num(i)
else:
assert 0
- self.mc.alloc_scratch_reg()
- self.mc.load_imm(r.SCRATCH, adr)
- self.mc.storex(loc.value, 0, r.SCRATCH.value)
- self.mc.free_scratch_reg()
+ with scratch_reg(self.mc):
+ self.mc.load_imm(r.SCRATCH, adr)
+ self.mc.storex(loc.value, 0, r.SCRATCH.value)
elif loc.is_vfp_reg():
assert box.type == FLOAT
assert 0, "not implemented yet"
@@ -305,13 +303,12 @@
adr = self.fail_boxes_int.get_addr_for_num(i)
else:
assert 0
- self.mc.alloc_scratch_reg()
- self.mov_loc_loc(loc, r.SCRATCH)
- # store content of r5 temporary in ENCODING AREA
- self.mc.store(r.r5.value, r.SPP.value, 0)
- self.mc.load_imm(r.r5, adr)
- self.mc.store(r.SCRATCH.value, r.r5.value, 0)
- self.mc.free_scratch_reg()
+ with scratch_reg(self.mc):
+ self.mov_loc_loc(loc, r.SCRATCH)
+ # store content of r5 temporary in ENCODING AREA
+ self.mc.store(r.r5.value, r.SPP.value, 0)
+ self.mc.load_imm(r.r5, adr)
+ self.mc.store(r.SCRATCH.value, r.r5.value, 0)
# restore r5
self.mc.load(r.r5.value, r.SPP.value, 0)
else:
@@ -362,10 +359,9 @@
failargs = arglocs[5:]
self.mc.load_imm(loc1, pos_exception.value)
- self.mc.alloc_scratch_reg()
- self.mc.load(r.SCRATCH.value, loc1.value, 0)
- self.mc.cmp_op(0, r.SCRATCH.value, loc.value)
- self.mc.free_scratch_reg()
+ with scratch_reg(self.mc):
+ self.mc.load(r.SCRATCH.value, loc1.value, 0)
+ self.mc.cmp_op(0, r.SCRATCH.value, loc.value)
self._emit_guard(op, failargs, c.NE, save_exc=True)
self.mc.load_imm(loc, pos_exc_value.value)
@@ -373,11 +369,10 @@
if resloc:
self.mc.load(resloc.value, loc.value, 0)
- self.mc.alloc_scratch_reg()
- self.mc.load_imm(r.SCRATCH, 0)
- self.mc.store(r.SCRATCH.value, loc.value, 0)
- self.mc.store(r.SCRATCH.value, loc1.value, 0)
- self.mc.free_scratch_reg()
+ with scratch_reg(self.mc):
+ self.mc.load_imm(r.SCRATCH, 0)
+ self.mc.store(r.SCRATCH.value, loc.value, 0)
+ self.mc.store(r.SCRATCH.value, loc1.value, 0)
def emit_call(self, op, args, regalloc, force_index=-1):
adr = args[0].value
@@ -426,13 +421,12 @@
param_offset = ((BACKCHAIN_SIZE + MAX_REG_PARAMS)
* WORD) # space for first 8 parameters
- self.mc.alloc_scratch_reg()
- for i, arg in enumerate(stack_args):
- offset = param_offset + i * WORD
- if arg is not None:
- self.regalloc_mov(regalloc.loc(arg), r.SCRATCH)
- self.mc.store(r.SCRATCH.value, r.SP.value, offset)
- self.mc.free_scratch_reg()
+ with scratch_reg(self.mc):
+ for i, arg in enumerate(stack_args):
+ offset = param_offset + i * WORD
+ if arg is not None:
+ self.regalloc_mov(regalloc.loc(arg), r.SCRATCH)
+ self.mc.store(r.SCRATCH.value, r.SP.value, offset)
# collect variables that need to go in registers
# and the registers they will be stored in
@@ -542,31 +536,31 @@
def emit_getinteriorfield_gc(self, op, arglocs, regalloc):
(base_loc, index_loc, res_loc,
ofs_loc, ofs, itemsize, fieldsize) = arglocs
- self.mc.alloc_scratch_reg()
- self.mc.load_imm(r.SCRATCH, itemsize.value)
- self.mc.mullw(r.SCRATCH.value, index_loc.value, r.SCRATCH.value)
- if ofs.value > 0:
- if ofs_loc.is_imm():
- self.mc.addic(r.SCRATCH.value, r.SCRATCH.value, ofs_loc.value)
+ with scratch_reg(self.mc):
+ self.mc.load_imm(r.SCRATCH, itemsize.value)
+ self.mc.mullw(r.SCRATCH.value, index_loc.value, r.SCRATCH.value)
+ if ofs.value > 0:
+ if ofs_loc.is_imm():
+ self.mc.addic(r.SCRATCH.value, r.SCRATCH.value,
ofs_loc.value)
+ else:
+ self.mc.add(r.SCRATCH.value, r.SCRATCH.value,
ofs_loc.value)
+
+ if fieldsize.value == 8:
+ self.mc.ldx(res_loc.value, base_loc.value, r.SCRATCH.value)
+ elif fieldsize.value == 4:
+ self.mc.lwzx(res_loc.value, base_loc.value, r.SCRATCH.value)
+ elif fieldsize.value == 2:
+ self.mc.lhzx(res_loc.value, base_loc.value, r.SCRATCH.value)
+ elif fieldsize.value == 1:
+ self.mc.lbzx(res_loc.value, base_loc.value, r.SCRATCH.value)
else:
- self.mc.add(r.SCRATCH.value, r.SCRATCH.value, ofs_loc.value)
-
- if fieldsize.value == 8:
- self.mc.ldx(res_loc.value, base_loc.value, r.SCRATCH.value)
- elif fieldsize.value == 4:
- self.mc.lwzx(res_loc.value, base_loc.value, r.SCRATCH.value)
- elif fieldsize.value == 2:
- self.mc.lhzx(res_loc.value, base_loc.value, r.SCRATCH.value)
- elif fieldsize.value == 1:
- self.mc.lbzx(res_loc.value, base_loc.value, r.SCRATCH.value)
- else:
- assert 0
- self.mc.free_scratch_reg()
+ assert 0
#XXX Hack, Hack, Hack
if not we_are_translated():
signed = op.getdescr().fielddescr.is_field_signed()
self._ensure_result_bit_extension(res_loc, fieldsize.value, signed)
+ emit_getinteriorfield_raw = emit_getinteriorfield_gc
def emit_setinteriorfield_gc(self, op, arglocs, regalloc):
(base_loc, index_loc, value_loc,
@@ -588,7 +582,7 @@
self.mc.stbx(value_loc.value, base_loc.value, r.SCRATCH.value)
else:
assert 0
-
+ emit_setinteriorfield_raw = emit_setinteriorfield_gc
class ArrayOpAssembler(object):
@@ -752,13 +746,12 @@
bytes_loc = regalloc.force_allocate_reg(bytes_box, forbidden_vars)
scale = self._get_unicode_item_scale()
assert length_loc.is_reg()
- self.mc.alloc_scratch_reg()
- self.mc.load_imm(r.SCRATCH, 1 << scale)
- if IS_PPC_32:
- self.mc.mullw(bytes_loc.value, r.SCRATCH.value,
length_loc.value)
- else:
- self.mc.mulld(bytes_loc.value, r.SCRATCH.value,
length_loc.value)
- self.mc.free_scratch_reg()
+ with scratch_reg(self.mc):
+ self.mc.load_imm(r.SCRATCH, 1 << scale)
+ if IS_PPC_32:
+ self.mc.mullw(bytes_loc.value, r.SCRATCH.value,
length_loc.value)
+ else:
+ self.mc.mulld(bytes_loc.value, r.SCRATCH.value,
length_loc.value)
length_box = bytes_box
length_loc = bytes_loc
# call memcpy()
@@ -873,15 +866,15 @@
def set_vtable(self, box, vtable):
if self.cpu.vtable_offset is not None:
adr = rffi.cast(lltype.Signed, vtable)
- self.mc.alloc_scratch_reg()
- self.mc.load_imm(r.SCRATCH, adr)
- self.mc.store(r.SCRATCH.value, r.RES.value, self.cpu.vtable_offset)
- self.mc.free_scratch_reg()
+ with scratch_reg(self.mc):
+ self.mc.load_imm(r.SCRATCH, adr)
+ self.mc.store(r.SCRATCH.value, r.RES.value,
self.cpu.vtable_offset)
def emit_debug_merge_point(self, op, arglocs, regalloc):
pass
emit_jit_debug = emit_debug_merge_point
+ emit_keepalive = emit_debug_merge_point
def emit_cond_call_gc_wb(self, op, arglocs, regalloc):
# Write code equivalent to write_barrier() in the GC: it checks
@@ -906,26 +899,25 @@
raise AssertionError(opnum)
loc_base = arglocs[0]
- self.mc.alloc_scratch_reg()
- self.mc.load(r.SCRATCH.value, loc_base.value, 0)
+ with scratch_reg(self.mc):
+ self.mc.load(r.SCRATCH.value, loc_base.value, 0)
- # get the position of the bit we want to test
- bitpos = descr.jit_wb_if_flag_bitpos
+ # get the position of the bit we want to test
+ bitpos = descr.jit_wb_if_flag_bitpos
- if IS_PPC_32:
- # put this bit to the rightmost bitposition of r0
- if bitpos > 0:
- self.mc.rlwinm(r.SCRATCH.value, r.SCRATCH.value,
- 32 - bitpos, 31, 31)
- # test whether this bit is set
- self.mc.cmpwi(0, r.SCRATCH.value, 1)
- else:
- if bitpos > 0:
- self.mc.rldicl(r.SCRATCH.value, r.SCRATCH.value,
- 64 - bitpos, 63)
- # test whether this bit is set
- self.mc.cmpdi(0, r.SCRATCH.value, 1)
- self.mc.free_scratch_reg()
+ if IS_PPC_32:
+ # put this bit to the rightmost bitposition of r0
+ if bitpos > 0:
+ self.mc.rlwinm(r.SCRATCH.value, r.SCRATCH.value,
+ 32 - bitpos, 31, 31)
+ # test whether this bit is set
+ self.mc.cmpwi(0, r.SCRATCH.value, 1)
+ else:
+ if bitpos > 0:
+ self.mc.rldicl(r.SCRATCH.value, r.SCRATCH.value,
+ 64 - bitpos, 63)
+ # test whether this bit is set
+ self.mc.cmpdi(0, r.SCRATCH.value, 1)
jz_location = self.mc.currpos()
self.mc.nop()
@@ -947,7 +939,7 @@
# patch the JZ above
offset = self.mc.currpos() - jz_location
pmc = OverwritingBuilder(self.mc, jz_location, 1)
- pmc.bc(4, 2, offset) # jump if the two values are equal
+ pmc.bc(12, 2, offset) # jump if the two values are equal
pmc.overwrite()
emit_cond_call_gc_wb_array = emit_cond_call_gc_wb
@@ -989,10 +981,9 @@
# check value
resloc = regalloc.try_allocate_reg(resbox)
assert resloc is r.RES
- self.mc.alloc_scratch_reg()
- self.mc.load_imm(r.SCRATCH, value)
- self.mc.cmp_op(0, resloc.value, r.SCRATCH.value)
- self.mc.free_scratch_reg()
+ with scratch_reg(self.mc):
+ self.mc.load_imm(r.SCRATCH, value)
+ self.mc.cmp_op(0, resloc.value, r.SCRATCH.value)
regalloc.possibly_free_var(resbox)
fast_jmp_pos = self.mc.currpos()
@@ -1035,11 +1026,10 @@
assert isinstance(fielddescr, FieldDescr)
ofs = fielddescr.offset
resloc = regalloc.force_allocate_reg(resbox)
- self.mc.alloc_scratch_reg()
- self.mov_loc_loc(arglocs[1], r.SCRATCH)
- self.mc.li(resloc.value, 0)
- self.mc.storex(resloc.value, 0, r.SCRATCH.value)
- self.mc.free_scratch_reg()
+ with scratch_reg(self.mc):
+ self.mov_loc_loc(arglocs[1], r.SCRATCH)
+ self.mc.li(resloc.value, 0)
+ self.mc.storex(resloc.value, 0, r.SCRATCH.value)
regalloc.possibly_free_var(resbox)
if op.result is not None:
@@ -1055,13 +1045,12 @@
raise AssertionError(kind)
resloc = regalloc.force_allocate_reg(op.result)
regalloc.possibly_free_var(resbox)
- self.mc.alloc_scratch_reg()
- self.mc.load_imm(r.SCRATCH, adr)
- if op.result.type == FLOAT:
- assert 0, "not implemented yet"
- else:
- self.mc.loadx(resloc.value, 0, r.SCRATCH.value)
- self.mc.free_scratch_reg()
+ with scratch_reg(self.mc):
+ self.mc.load_imm(r.SCRATCH, adr)
+ if op.result.type == FLOAT:
+ assert 0, "not implemented yet"
+ else:
+ self.mc.loadx(resloc.value, 0, r.SCRATCH.value)
# merge point
offset = self.mc.currpos() - jmp_pos
@@ -1070,10 +1059,9 @@
pmc.b(offset)
pmc.overwrite()
- self.mc.alloc_scratch_reg()
- self.mc.load(r.SCRATCH.value, r.SPP.value, 0)
- self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
- self.mc.free_scratch_reg()
+ with scratch_reg(self.mc):
+ self.mc.load(r.SCRATCH.value, r.SPP.value, 0)
+ self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
self._emit_guard(guard_op, regalloc._prepare_guard(guard_op), c.LT)
@@ -1102,10 +1090,9 @@
def emit_guard_call_may_force(self, op, guard_op, arglocs, regalloc):
ENCODING_AREA = len(r.MANAGED_REGS) * WORD
- self.mc.alloc_scratch_reg()
- self.mc.load(r.SCRATCH.value, r.SPP.value, ENCODING_AREA)
- self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
- self.mc.free_scratch_reg()
+ with scratch_reg(self.mc):
+ self.mc.load(r.SCRATCH.value, r.SPP.value, ENCODING_AREA)
+ self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
self._emit_guard(guard_op, arglocs, c.LT, save_exc=True)
emit_guard_call_release_gil = emit_guard_call_may_force
diff --git a/pypy/jit/backend/ppc/ppc_assembler.py
b/pypy/jit/backend/ppc/ppc_assembler.py
--- a/pypy/jit/backend/ppc/ppc_assembler.py
+++ b/pypy/jit/backend/ppc/ppc_assembler.py
@@ -3,7 +3,7 @@
from pypy.jit.backend.ppc.ppc_form import PPCForm as Form
from pypy.jit.backend.ppc.ppc_field import ppc_fields
from pypy.jit.backend.ppc.regalloc import (TempInt, PPCFrameManager,
- Regalloc)
+ Regalloc, PPCRegisterManager)
from pypy.jit.backend.ppc.assembler import Assembler
from pypy.jit.backend.ppc.opassembler import OpAssembler
from pypy.jit.backend.ppc.symbol_lookup import lookup
@@ -37,15 +37,23 @@
from pypy.jit.metainterp.history import (BoxInt, ConstInt, ConstPtr,
ConstFloat, Box, INT, REF, FLOAT)
from pypy.jit.backend.x86.support import values_array
+from pypy.rlib.debug import (debug_print, debug_start, debug_stop,
+ have_debug_prints)
from pypy.rlib import rgc
from pypy.rpython.annlowlevel import llhelper
from pypy.rlib.objectmodel import we_are_translated
from pypy.rpython.lltypesystem.lloperation import llop
from pypy.jit.backend.ppc.locations import StackLocation, get_spp_offset
+from pypy.rlib.jit import AsmInfo
memcpy_fn = rffi.llexternal('memcpy', [llmemory.Address, llmemory.Address,
rffi.SIZE_T], lltype.Void,
sandboxsafe=True, _nowrapper=True)
+
+DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
+ ('type', lltype.Char), # 'b'ridge, 'l'abel or
+ # 'e'ntry point
+ ('number', lltype.Signed))
def hi(w):
return w >> 16
@@ -85,6 +93,7 @@
EMPTY_LOC = '\xFE'
END_OF_LOCS = '\xFF'
+ FORCE_INDEX_AREA = len(r.MANAGED_REGS) * WORD
ENCODING_AREA = len(r.MANAGED_REGS) * WORD
OFFSET_SPP_TO_GPR_SAVE_AREA = (FORCE_INDEX + FLOAT_INT_CONVERSION
+ ENCODING_AREA)
@@ -108,6 +117,12 @@
self.max_stack_params = 0
self.propagate_exception_path = 0
self.setup_failure_recovery()
+ self._debug = False
+ self.loop_run_counters = []
+ self.debug_counter_descr = cpu.fielddescrof(DEBUG_COUNTER, 'i')
+
+ def set_debug(self, v):
+ self._debug = v
def _save_nonvolatiles(self):
""" save nonvolatile GPRs in GPR SAVE AREA
@@ -298,24 +313,64 @@
def _build_malloc_slowpath(self):
mc = PPCBuilder()
- with Saved_Volatiles(mc):
- # Values to compute size stored in r3 and r4
- mc.subf(r.r3.value, r.r3.value, r.r4.value)
- addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
- mc.call(addr)
+ if IS_PPC_64:
+ for _ in range(6):
+ mc.write32(0)
+ frame_size = (# add space for floats later
+ + BACKCHAIN_SIZE * WORD)
+ if IS_PPC_32:
+ mc.stwu(r.SP.value, r.SP.value, -frame_size)
+ mc.mflr(r.SCRATCH.value)
+ mc.stw(r.SCRATCH.value, r.SP.value, frame_size + WORD)
+ else:
+ mc.stdu(r.SP.value, r.SP.value, -frame_size)
+ mc.mflr(r.SCRATCH.value)
+ mc.std(r.SCRATCH.value, r.SP.value, frame_size + 2 * WORD)
+ # managed volatiles are saved below
+ if self.cpu.supports_floats:
+ assert 0, "make sure to save floats here"
+ # Values to compute size stored in r3 and r4
+ mc.subf(r.r3.value, r.r3.value, r.r4.value)
+ addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr()
+ for reg, ofs in PPCRegisterManager.REGLOC_TO_COPY_AREA_OFS.items():
+ mc.store(reg.value, r.SPP.value, ofs)
+ mc.call(addr)
+ for reg, ofs in PPCRegisterManager.REGLOC_TO_COPY_AREA_OFS.items():
+ mc.load(reg.value, r.SPP.value, ofs)
mc.cmp_op(0, r.r3.value, 0, imm=True)
jmp_pos = mc.currpos()
mc.nop()
+
nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr()
mc.load_imm(r.r4, nursery_free_adr)
mc.load(r.r4.value, r.r4.value, 0)
+
+ if IS_PPC_32:
+ ofs = WORD
+ else:
+ ofs = WORD * 2
+ mc.load(r.SCRATCH.value, r.SP.value, frame_size + ofs)
+ mc.mtlr(r.SCRATCH.value)
+ mc.addi(r.SP.value, r.SP.value, frame_size)
+ mc.blr()
+ # if r3 == 0 we skip the return above and jump to the exception path
+ offset = mc.currpos() - jmp_pos
pmc = OverwritingBuilder(mc, jmp_pos, 1)
- pmc.bc(4, 2, jmp_pos) # jump if the two values are equal
+ pmc.bc(12, 2, offset)
pmc.overwrite()
+ # restore the frame before leaving
+ mc.load(r.SCRATCH.value, r.SP.value, frame_size + ofs)
+ mc.mtlr(r.SCRATCH.value)
+ mc.addi(r.SP.value, r.SP.value, frame_size)
mc.b_abs(self.propagate_exception_path)
+
+
+ mc.prepare_insts_blocks()
rawstart = mc.materialize(self.cpu.asmmemmgr, [])
+ if IS_PPC_64:
+ self.write_64_bit_func_descr(rawstart, rawstart+3*WORD)
self.malloc_slowpath = rawstart
def _build_propagate_exception_path(self):
@@ -362,8 +417,8 @@
addr = rffi.cast(lltype.Signed, decode_func_addr)
# load parameters into parameter registers
- mc.load(r.r3.value, r.SPP.value, self.ENCODING_AREA) # address of
state encoding
- mc.mr(r.r4.value, r.SPP.value) # load spilling pointer
+ mc.load(r.r3.value, r.SPP.value, self.FORCE_INDEX_AREA) # address
of state encoding
+ mc.mr(r.r4.value, r.SPP.value) # load
spilling pointer
#
# call decoding function
mc.call(addr)
@@ -430,6 +485,23 @@
self.exit_code_adr = self._gen_exit_path()
self._leave_jitted_hook_save_exc =
self._gen_leave_jitted_hook_code(True)
self._leave_jitted_hook = self._gen_leave_jitted_hook_code(False)
+ debug_start('jit-backend-counts')
+ self.set_debug(have_debug_prints())
+ debug_stop('jit-backend-counts')
+
+ def finish_once(self):
+ if self._debug:
+ debug_start('jit-backend-counts')
+ for i in range(len(self.loop_run_counters)):
+ struct = self.loop_run_counters[i]
+ if struct.type == 'l':
+ prefix = 'TargetToken(%d)' % struct.number
+ elif struct.type == 'b':
+ prefix = 'bridge ' + str(struct.number)
+ else:
+ prefix = 'entry ' + str(struct.number)
+ debug_print(prefix + ':' + str(struct.i))
+ debug_stop('jit-backend-counts')
@staticmethod
def _release_gil_shadowstack():
@@ -475,6 +547,7 @@
looptoken._ppc_loop_code = start_pos
clt.frame_depth = clt.param_depth = -1
spilling_area, param_depth = self._assemble(operations, regalloc)
+ size_excluding_failure_stuff = self.mc.get_relative_pos()
clt.frame_depth = spilling_area
clt.param_depth = param_depth
@@ -502,8 +575,12 @@
print 'Loop', inputargs, operations
self.mc._dump_trace(loop_start, 'loop_%s.asm' %
self.cpu.total_compiled_loops)
print 'Done assembling loop with token %r' % looptoken
+ ops_offset = self.mc.ops_offset
self._teardown()
+ # XXX 3rd arg may not be correct yet
+ return AsmInfo(ops_offset, real_start, size_excluding_failure_stuff)
+
def _assemble(self, operations, regalloc):
regalloc.compute_hint_frame_locations(operations)
self._walk_operations(operations, regalloc)
@@ -531,7 +608,9 @@
sp_patch_location = self._prepare_sp_patch_position()
+ startpos = self.mc.get_relative_pos()
spilling_area, param_depth = self._assemble(operations, regalloc)
+ codeendpos = self.mc.get_relative_pos()
self.write_pending_failure_recoveries()
@@ -553,8 +632,12 @@
print 'Loop', inputargs, operations
self.mc._dump_trace(rawstart, 'bridge_%s.asm' %
self.cpu.total_compiled_loops)
print 'Done assembling bridge with token %r' % looptoken
+
+ ops_offset = self.mc.ops_offset
self._teardown()
+ return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
+
def _patch_sp_offset(self, sp_patch_location, rawstart):
mc = PPCBuilder()
frame_depth = self.compute_frame_depth(self.current_clt.frame_depth,
@@ -828,11 +911,10 @@
return
# move immediate value to memory
elif loc.is_stack():
- self.mc.alloc_scratch_reg()
- offset = loc.value
- self.mc.load_imm(r.SCRATCH, value)
- self.mc.store(r.SCRATCH.value, r.SPP.value, offset)
- self.mc.free_scratch_reg()
+ with scratch_reg(self.mc):
+ offset = loc.value
+ self.mc.load_imm(r.SCRATCH, value)
+ self.mc.store(r.SCRATCH.value, r.SPP.value, offset)
return
assert 0, "not supported location"
elif prev_loc.is_stack():
@@ -845,10 +927,9 @@
# move in memory
elif loc.is_stack():
target_offset = loc.value
- self.mc.alloc_scratch_reg()
- self.mc.load(r.SCRATCH.value, r.SPP.value, offset)
- self.mc.store(r.SCRATCH.value, r.SPP.value, target_offset)
- self.mc.free_scratch_reg()
+ with scratch_reg(self.mc):
+ self.mc.load(r.SCRATCH.value, r.SPP.value, offset)
+ self.mc.store(r.SCRATCH.value, r.SPP.value, target_offset)
return
assert 0, "not supported location"
elif prev_loc.is_reg():
@@ -883,10 +964,7 @@
elif loc.is_reg():
self.mc.addi(r.SP.value, r.SP.value, -WORD) # decrease stack
pointer
# push value
- if IS_PPC_32:
- self.mc.stw(loc.value, r.SP.value, 0)
- else:
- self.mc.std(loc.value, r.SP.value, 0)
+ self.mc.store(loc.value, r.SP.value, 0)
elif loc.is_imm():
assert 0, "not implemented yet"
elif loc.is_imm_float():
@@ -946,17 +1024,17 @@
def malloc_cond(self, nursery_free_adr, nursery_top_adr, size):
assert size & (WORD-1) == 0 # must be correctly aligned
- self.mc.load_imm(r.RES.value, nursery_free_adr)
+ self.mc.load_imm(r.RES, nursery_free_adr)
self.mc.load(r.RES.value, r.RES.value, 0)
if _check_imm_arg(size):
self.mc.addi(r.r4.value, r.RES.value, size)
else:
- self.mc.load_imm(r.r4.value, size)
+ self.mc.load_imm(r.r4, size)
self.mc.add(r.r4.value, r.RES.value, r.r4.value)
with scratch_reg(self.mc):
- self.mc.gen_load_int(r.SCRATCH.value, nursery_top_adr)
+ self.mc.load_imm(r.SCRATCH, nursery_top_adr)
self.mc.loadx(r.SCRATCH.value, 0, r.SCRATCH.value)
self.mc.cmp_op(0, r.r4.value, r.SCRATCH.value, signed=False)
@@ -977,10 +1055,11 @@
offset = self.mc.currpos() - fast_jmp_pos
pmc = OverwritingBuilder(self.mc, fast_jmp_pos, 1)
pmc.bc(4, 1, offset) # jump if LE (not GT)
+ pmc.overwrite()
with scratch_reg(self.mc):
- self.mc.load_imm(r.SCRATCH.value, nursery_free_adr)
- self.mc.storex(r.r1.value, 0, r.SCRATCH.value)
+ self.mc.load_imm(r.SCRATCH, nursery_free_adr)
+ self.mc.storex(r.r4.value, 0, r.SCRATCH.value)
def mark_gc_roots(self, force_index, use_copy_area=False):
if force_index < 0:
@@ -1010,10 +1089,9 @@
return 0
def _write_fail_index(self, fail_index):
- self.mc.alloc_scratch_reg()
- self.mc.load_imm(r.SCRATCH, fail_index)
- self.mc.store(r.SCRATCH.value, r.SPP.value, self.ENCODING_AREA)
- self.mc.free_scratch_reg()
+ with scratch_reg(self.mc):
+ self.mc.load_imm(r.SCRATCH, fail_index)
+ self.mc.store(r.SCRATCH.value, r.SPP.value, self.FORCE_INDEX_AREA)
def load(self, loc, value):
assert loc.is_reg() and value.is_imm()
diff --git a/pypy/jit/backend/ppc/regalloc.py b/pypy/jit/backend/ppc/regalloc.py
--- a/pypy/jit/backend/ppc/regalloc.py
+++ b/pypy/jit/backend/ppc/regalloc.py
@@ -50,37 +50,33 @@
save_around_call_regs = r.VOLATILES
REGLOC_TO_COPY_AREA_OFS = {
- r.r0: MY_COPY_OF_REGS + 0 * WORD,
- r.r2: MY_COPY_OF_REGS + 1 * WORD,
- r.r3: MY_COPY_OF_REGS + 2 * WORD,
- r.r4: MY_COPY_OF_REGS + 3 * WORD,
- r.r5: MY_COPY_OF_REGS + 4 * WORD,
- r.r6: MY_COPY_OF_REGS + 5 * WORD,
- r.r7: MY_COPY_OF_REGS + 6 * WORD,
- r.r8: MY_COPY_OF_REGS + 7 * WORD,
- r.r9: MY_COPY_OF_REGS + 8 * WORD,
- r.r10: MY_COPY_OF_REGS + 9 * WORD,
- r.r11: MY_COPY_OF_REGS + 10 * WORD,
- r.r12: MY_COPY_OF_REGS + 11 * WORD,
- r.r13: MY_COPY_OF_REGS + 12 * WORD,
- r.r14: MY_COPY_OF_REGS + 13 * WORD,
- r.r15: MY_COPY_OF_REGS + 14 * WORD,
- r.r16: MY_COPY_OF_REGS + 15 * WORD,
- r.r17: MY_COPY_OF_REGS + 16 * WORD,
- r.r18: MY_COPY_OF_REGS + 17 * WORD,
- r.r19: MY_COPY_OF_REGS + 18 * WORD,
- r.r20: MY_COPY_OF_REGS + 19 * WORD,
- r.r21: MY_COPY_OF_REGS + 20 * WORD,
- r.r22: MY_COPY_OF_REGS + 21 * WORD,
- r.r23: MY_COPY_OF_REGS + 22 * WORD,
- r.r24: MY_COPY_OF_REGS + 23 * WORD,
- r.r25: MY_COPY_OF_REGS + 24 * WORD,
- r.r26: MY_COPY_OF_REGS + 25 * WORD,
- r.r27: MY_COPY_OF_REGS + 26 * WORD,
- r.r28: MY_COPY_OF_REGS + 27 * WORD,
- r.r29: MY_COPY_OF_REGS + 28 * WORD,
- r.r30: MY_COPY_OF_REGS + 29 * WORD,
- r.r31: MY_COPY_OF_REGS + 30 * WORD,
+ r.r3: MY_COPY_OF_REGS + 0 * WORD,
+ r.r4: MY_COPY_OF_REGS + 1 * WORD,
+ r.r5: MY_COPY_OF_REGS + 2 * WORD,
+ r.r6: MY_COPY_OF_REGS + 3 * WORD,
+ r.r7: MY_COPY_OF_REGS + 4 * WORD,
+ r.r8: MY_COPY_OF_REGS + 5 * WORD,
+ r.r9: MY_COPY_OF_REGS + 6 * WORD,
+ r.r10: MY_COPY_OF_REGS + 7 * WORD,
+ r.r11: MY_COPY_OF_REGS + 8 * WORD,
+ r.r12: MY_COPY_OF_REGS + 9 * WORD,
+ r.r14: MY_COPY_OF_REGS + 10 * WORD,
+ r.r15: MY_COPY_OF_REGS + 11 * WORD,
+ r.r16: MY_COPY_OF_REGS + 12 * WORD,
+ r.r17: MY_COPY_OF_REGS + 13 * WORD,
+ r.r18: MY_COPY_OF_REGS + 14 * WORD,
+ r.r19: MY_COPY_OF_REGS + 15 * WORD,
+ r.r20: MY_COPY_OF_REGS + 16 * WORD,
+ r.r21: MY_COPY_OF_REGS + 17 * WORD,
+ r.r22: MY_COPY_OF_REGS + 18 * WORD,
+ r.r23: MY_COPY_OF_REGS + 19 * WORD,
+ r.r24: MY_COPY_OF_REGS + 20 * WORD,
+ r.r25: MY_COPY_OF_REGS + 21 * WORD,
+ r.r26: MY_COPY_OF_REGS + 22 * WORD,
+ r.r27: MY_COPY_OF_REGS + 23 * WORD,
+ r.r28: MY_COPY_OF_REGS + 24 * WORD,
+ r.r29: MY_COPY_OF_REGS + 25 * WORD,
+ r.r30: MY_COPY_OF_REGS + 26 * WORD,
}
def __init__(self, longevity, frame_manager=None, assembler=None):
@@ -177,7 +173,7 @@
def prepare_loop(self, inputargs, operations):
self._prepare(inputargs, operations)
self._set_initial_bindings(inputargs)
- self.possibly_free_vars(list(inputargs))
+ self.possibly_free_vars(inputargs)
def prepare_bridge(self, inputargs, arglocs, ops):
self._prepare(inputargs, ops)
@@ -425,7 +421,7 @@
prepare_guard_not_invalidated = prepare_guard_no_overflow
def prepare_guard_exception(self, op):
- boxes = list(op.getarglist())
+ boxes = op.getarglist()
arg0 = ConstInt(rffi.cast(lltype.Signed, op.getarg(0).getint()))
loc = self._ensure_value_is_boxed(arg0)
loc1 = self.get_scratch_reg(INT, boxes)
@@ -447,7 +443,7 @@
return arglocs
def prepare_guard_value(self, op):
- boxes = list(op.getarglist())
+ boxes = op.getarglist()
a0, a1 = boxes
l0 = self._ensure_value_is_boxed(a0, boxes)
l1 = self._ensure_value_is_boxed(a1, boxes)
@@ -459,7 +455,7 @@
def prepare_guard_class(self, op):
assert isinstance(op.getarg(0), Box)
- boxes = list(op.getarglist())
+ boxes = op.getarglist()
x = self._ensure_value_is_boxed(boxes[0], boxes)
y = self.get_scratch_reg(REF, forbidden_vars=boxes)
y_val = rffi.cast(lltype.Signed, op.getarg(1).getint())
@@ -559,7 +555,7 @@
return []
def prepare_setfield_gc(self, op):
- boxes = list(op.getarglist())
+ boxes = op.getarglist()
a0, a1 = boxes
ofs, size, sign = unpack_fielddescr(op.getdescr())
base_loc = self._ensure_value_is_boxed(a0, boxes)
@@ -608,6 +604,7 @@
self.possibly_free_var(op.result)
return [base_loc, index_loc, result_loc, ofs_loc, imm(ofs),
imm(itemsize), imm(fieldsize)]
+ prepare_getinteriorfield_raw = prepare_getinteriorfield_gc
def prepare_setinteriorfield_gc(self, op):
t = unpack_interiorfielddescr(op.getdescr())
@@ -622,6 +619,7 @@
ofs_loc = self._ensure_value_is_boxed(ConstInt(ofs), args)
return [base_loc, index_loc, value_loc, ofs_loc, imm(ofs),
imm(itemsize), imm(fieldsize)]
+ prepare_setinteriorfield_raw = prepare_setinteriorfield_gc
def prepare_arraylen_gc(self, op):
arraydescr = op.getdescr()
@@ -811,6 +809,7 @@
prepare_debug_merge_point = void
prepare_jit_debug = void
+ prepare_keepalive = void
def prepare_cond_call_gc_wb(self, op):
assert op.result is None
diff --git a/pypy/jit/backend/ppc/register.py b/pypy/jit/backend/ppc/register.py
--- a/pypy/jit/backend/ppc/register.py
+++ b/pypy/jit/backend/ppc/register.py
@@ -14,7 +14,8 @@
NONVOLATILES = [r14, r15, r16, r17, r18, r19, r20, r21, r22, r23,
r24, r25, r26, r27, r28, r29, r30, r31]
-VOLATILES = [r0, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13]
+VOLATILES = [r0, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12]
+# volatile r2 is persisted around calls and r13 can be ignored
NONVOLATILES_FLOAT = [f14, f15, f16, f17, f18, f19, f20, f21, f22, f23,
f24, f25, f26, f27, f28, f29, f30, f31]
diff --git a/pypy/jit/backend/ppc/runner.py b/pypy/jit/backend/ppc/runner.py
--- a/pypy/jit/backend/ppc/runner.py
+++ b/pypy/jit/backend/ppc/runner.py
@@ -32,7 +32,7 @@
gcdescr.force_index_ofs = FORCE_INDEX_OFS
# XXX for now the ppc backend does not support the gcremovetypeptr
# translation option
- assert gcdescr.config.translation.gcremovetypeptr is False
+ # assert gcdescr.config.translation.gcremovetypeptr is False
AbstractLLCPU.__init__(self, rtyper, stats, opts,
translate_support_code, gcdescr)
diff --git a/pypy/jit/backend/ppc/test/test_ztranslation.py
b/pypy/jit/backend/ppc/test/test_ztranslation.py
--- a/pypy/jit/backend/ppc/test/test_ztranslation.py
+++ b/pypy/jit/backend/ppc/test/test_ztranslation.py
@@ -18,8 +18,9 @@
def _check_cbuilder(self, cbuilder):
# We assume here that we have sse2. If not, the CPUClass
# needs to be changed to CPU386_NO_SSE2, but well.
- assert '-msse2' in cbuilder.eci.compile_extra
- assert '-mfpmath=sse' in cbuilder.eci.compile_extra
+ #assert '-msse2' in cbuilder.eci.compile_extra
+ #assert '-mfpmath=sse' in cbuilder.eci.compile_extra
+ pass
def test_stuff_translates(self):
# this is a basic test that tries to hit a number of features and their
@@ -176,7 +177,7 @@
def _get_TranslationContext(self):
t = TranslationContext()
t.config.translation.gc = DEFL_GC # 'hybrid' or 'minimark'
- t.config.translation.gcrootfinder = 'asmgcc'
+ t.config.translation.gcrootfinder = 'shadowstack'
t.config.translation.list_comprehension_operations = True
t.config.translation.gcremovetypeptr = True
return t
diff --git a/pypy/jit/backend/test/runner_test.py
b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -1677,6 +1677,7 @@
c_box = self.alloc_string("hi there").constbox()
c_nest = ConstInt(0)
self.execute_operation(rop.DEBUG_MERGE_POINT, [c_box, c_nest], 'void')
+ self.execute_operation(rop.KEEPALIVE, [c_box], 'void')
self.execute_operation(rop.JIT_DEBUG, [c_box, c_nest, c_nest,
c_nest, c_nest], 'void')
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit