Author: hager <sven.ha...@uni-duesseldorf.de> Branch: ppc-jit-backend Changeset: r52973:52ece45399fc Date: 2012-02-27 20:00 +0100 http://bitbucket.org/pypy/pypy/changeset/52ece45399fc/
Log: merge diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py --- a/pypy/jit/backend/llsupport/gc.py +++ b/pypy/jit/backend/llsupport/gc.py @@ -523,7 +523,7 @@ return [] def add_frame_offset(self, shape, offset): - assert offset != 0 + assert offset & 3 == 0 shape.append(offset) def add_callee_save_reg(self, shape, register): diff --git a/pypy/jit/backend/ppc/codebuilder.py b/pypy/jit/backend/ppc/codebuilder.py --- a/pypy/jit/backend/ppc/codebuilder.py +++ b/pypy/jit/backend/ppc/codebuilder.py @@ -962,6 +962,11 @@ PPCAssembler.__init__(self) self.init_block_builder() self.r0_in_use = r0_in_use + self.ops_offset = {} + + def mark_op(self, op): + pos = self.get_relative_pos() + self.ops_offset[op] = pos def check(self, desc, v, *args): desc.__get__(self)(*args) @@ -994,13 +999,12 @@ self.ldx(rD.value, 0, rD.value) def store_reg(self, source_reg, addr): - self.alloc_scratch_reg() - self.load_imm(r.SCRATCH, addr) - if IS_PPC_32: - self.stwx(source_reg.value, 0, r.SCRATCH.value) - else: - self.stdx(source_reg.value, 0, r.SCRATCH.value) - self.free_scratch_reg() + with scratch_reg(self): + self.load_imm(r.SCRATCH, addr) + if IS_PPC_32: + self.stwx(source_reg.value, 0, r.SCRATCH.value) + else: + self.stdx(source_reg.value, 0, r.SCRATCH.value) def b_offset(self, target): curpos = self.currpos() @@ -1020,17 +1024,15 @@ BI = condition[0] BO = condition[1] - self.alloc_scratch_reg() - self.load_imm(r.SCRATCH, addr) - self.mtctr(r.SCRATCH.value) - self.free_scratch_reg() + with scratch_reg(self): + self.load_imm(r.SCRATCH, addr) + self.mtctr(r.SCRATCH.value) self.bcctr(BO, BI) def b_abs(self, address, trap=False): - self.alloc_scratch_reg() - self.load_imm(r.SCRATCH, address) - self.mtctr(r.SCRATCH.value) - self.free_scratch_reg() + with scratch_reg(self): + self.load_imm(r.SCRATCH, address) + self.mtctr(r.SCRATCH.value) if trap: self.trap() self.bctr() @@ -1044,17 +1046,16 @@ def call(self, address): """ do a call to an absolute address """ - self.alloc_scratch_reg() - if IS_PPC_32: - self.load_imm(r.SCRATCH, address) - else: - self.store(r.TOC.value, r.SP.value, 5 * WORD) - self.load_imm(r.r11, address) - self.load(r.SCRATCH.value, r.r11.value, 0) - self.load(r.r2.value, r.r11.value, WORD) - self.load(r.r11.value, r.r11.value, 2 * WORD) - self.mtctr(r.SCRATCH.value) - self.free_scratch_reg() + with scratch_reg(self): + if IS_PPC_32: + self.load_imm(r.SCRATCH, address) + else: + self.store(r.TOC.value, r.SP.value, 5 * WORD) + self.load_imm(r.r11, address) + self.load(r.SCRATCH.value, r.r11.value, 0) + self.load(r.r2.value, r.r11.value, WORD) + self.load(r.r11.value, r.r11.value, 2 * WORD) + self.mtctr(r.SCRATCH.value) self.bctrl() if IS_PPC_64: diff --git a/pypy/jit/backend/ppc/helper/regalloc.py b/pypy/jit/backend/ppc/helper/regalloc.py --- a/pypy/jit/backend/ppc/helper/regalloc.py +++ b/pypy/jit/backend/ppc/helper/regalloc.py @@ -76,7 +76,7 @@ def prepare_binary_int_op(): def f(self, op): - boxes = list(op.getarglist()) + boxes = op.getarglist() b0, b1 = boxes reg1 = self._ensure_value_is_boxed(b0, forbidden_vars=boxes) diff --git a/pypy/jit/backend/ppc/opassembler.py b/pypy/jit/backend/ppc/opassembler.py --- a/pypy/jit/backend/ppc/opassembler.py +++ b/pypy/jit/backend/ppc/opassembler.py @@ -12,7 +12,8 @@ from pypy.jit.backend.ppc.helper.assembler import (count_reg_args, Saved_Volatiles) from pypy.jit.backend.ppc.jump import remap_frame_layout -from pypy.jit.backend.ppc.codebuilder import OverwritingBuilder +from pypy.jit.backend.ppc.codebuilder import (OverwritingBuilder, scratch_reg, + PPCBuilder) from pypy.jit.backend.ppc.regalloc import TempPtr, TempInt from pypy.jit.backend.llsupport import symbolic from pypy.rpython.lltypesystem import rstr, rffi, lltype @@ -210,12 +211,11 @@ # instead of XER could be more efficient def _emit_ovf_guard(self, op, arglocs, cond): # move content of XER to GPR - self.mc.alloc_scratch_reg() - self.mc.mfspr(r.SCRATCH.value, 1) - # shift and mask to get comparison result - self.mc.rlwinm(r.SCRATCH.value, r.SCRATCH.value, 1, 0, 0) - self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True) - self.mc.free_scratch_reg() + with scratch_reg(self.mc): + self.mc.mfspr(r.SCRATCH.value, 1) + # shift and mask to get comparison result + self.mc.rlwinm(r.SCRATCH.value, r.SCRATCH.value, 1, 0, 0) + self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True) self._emit_guard(op, arglocs, cond) def emit_guard_no_overflow(self, op, arglocs, regalloc): @@ -244,14 +244,13 @@ def _cmp_guard_class(self, op, locs, regalloc): offset = locs[2] if offset is not None: - self.mc.alloc_scratch_reg() - if offset.is_imm(): - self.mc.load(r.SCRATCH.value, locs[0].value, offset.value) - else: - assert offset.is_reg() - self.mc.loadx(r.SCRATCH.value, locs[0].value, offset.value) - self.mc.cmp_op(0, r.SCRATCH.value, locs[1].value) - self.mc.free_scratch_reg() + with scratch_reg(self.mc): + if offset.is_imm(): + self.mc.load(r.SCRATCH.value, locs[0].value, offset.value) + else: + assert offset.is_reg() + self.mc.loadx(r.SCRATCH.value, locs[0].value, offset.value) + self.mc.cmp_op(0, r.SCRATCH.value, locs[1].value) else: assert 0, "not implemented yet" self._emit_guard(op, locs[3:], c.NE) @@ -288,10 +287,9 @@ adr = self.fail_boxes_int.get_addr_for_num(i) else: assert 0 - self.mc.alloc_scratch_reg() - self.mc.load_imm(r.SCRATCH, adr) - self.mc.storex(loc.value, 0, r.SCRATCH.value) - self.mc.free_scratch_reg() + with scratch_reg(self.mc): + self.mc.load_imm(r.SCRATCH, adr) + self.mc.storex(loc.value, 0, r.SCRATCH.value) elif loc.is_vfp_reg(): assert box.type == FLOAT assert 0, "not implemented yet" @@ -305,13 +303,12 @@ adr = self.fail_boxes_int.get_addr_for_num(i) else: assert 0 - self.mc.alloc_scratch_reg() - self.mov_loc_loc(loc, r.SCRATCH) - # store content of r5 temporary in ENCODING AREA - self.mc.store(r.r5.value, r.SPP.value, 0) - self.mc.load_imm(r.r5, adr) - self.mc.store(r.SCRATCH.value, r.r5.value, 0) - self.mc.free_scratch_reg() + with scratch_reg(self.mc): + self.mov_loc_loc(loc, r.SCRATCH) + # store content of r5 temporary in ENCODING AREA + self.mc.store(r.r5.value, r.SPP.value, 0) + self.mc.load_imm(r.r5, adr) + self.mc.store(r.SCRATCH.value, r.r5.value, 0) # restore r5 self.mc.load(r.r5.value, r.SPP.value, 0) else: @@ -362,10 +359,9 @@ failargs = arglocs[5:] self.mc.load_imm(loc1, pos_exception.value) - self.mc.alloc_scratch_reg() - self.mc.load(r.SCRATCH.value, loc1.value, 0) - self.mc.cmp_op(0, r.SCRATCH.value, loc.value) - self.mc.free_scratch_reg() + with scratch_reg(self.mc): + self.mc.load(r.SCRATCH.value, loc1.value, 0) + self.mc.cmp_op(0, r.SCRATCH.value, loc.value) self._emit_guard(op, failargs, c.NE, save_exc=True) self.mc.load_imm(loc, pos_exc_value.value) @@ -373,11 +369,10 @@ if resloc: self.mc.load(resloc.value, loc.value, 0) - self.mc.alloc_scratch_reg() - self.mc.load_imm(r.SCRATCH, 0) - self.mc.store(r.SCRATCH.value, loc.value, 0) - self.mc.store(r.SCRATCH.value, loc1.value, 0) - self.mc.free_scratch_reg() + with scratch_reg(self.mc): + self.mc.load_imm(r.SCRATCH, 0) + self.mc.store(r.SCRATCH.value, loc.value, 0) + self.mc.store(r.SCRATCH.value, loc1.value, 0) def emit_call(self, op, args, regalloc, force_index=-1): adr = args[0].value @@ -426,13 +421,12 @@ param_offset = ((BACKCHAIN_SIZE + MAX_REG_PARAMS) * WORD) # space for first 8 parameters - self.mc.alloc_scratch_reg() - for i, arg in enumerate(stack_args): - offset = param_offset + i * WORD - if arg is not None: - self.regalloc_mov(regalloc.loc(arg), r.SCRATCH) - self.mc.store(r.SCRATCH.value, r.SP.value, offset) - self.mc.free_scratch_reg() + with scratch_reg(self.mc): + for i, arg in enumerate(stack_args): + offset = param_offset + i * WORD + if arg is not None: + self.regalloc_mov(regalloc.loc(arg), r.SCRATCH) + self.mc.store(r.SCRATCH.value, r.SP.value, offset) # collect variables that need to go in registers # and the registers they will be stored in @@ -542,31 +536,31 @@ def emit_getinteriorfield_gc(self, op, arglocs, regalloc): (base_loc, index_loc, res_loc, ofs_loc, ofs, itemsize, fieldsize) = arglocs - self.mc.alloc_scratch_reg() - self.mc.load_imm(r.SCRATCH, itemsize.value) - self.mc.mullw(r.SCRATCH.value, index_loc.value, r.SCRATCH.value) - if ofs.value > 0: - if ofs_loc.is_imm(): - self.mc.addic(r.SCRATCH.value, r.SCRATCH.value, ofs_loc.value) + with scratch_reg(self.mc): + self.mc.load_imm(r.SCRATCH, itemsize.value) + self.mc.mullw(r.SCRATCH.value, index_loc.value, r.SCRATCH.value) + if ofs.value > 0: + if ofs_loc.is_imm(): + self.mc.addic(r.SCRATCH.value, r.SCRATCH.value, ofs_loc.value) + else: + self.mc.add(r.SCRATCH.value, r.SCRATCH.value, ofs_loc.value) + + if fieldsize.value == 8: + self.mc.ldx(res_loc.value, base_loc.value, r.SCRATCH.value) + elif fieldsize.value == 4: + self.mc.lwzx(res_loc.value, base_loc.value, r.SCRATCH.value) + elif fieldsize.value == 2: + self.mc.lhzx(res_loc.value, base_loc.value, r.SCRATCH.value) + elif fieldsize.value == 1: + self.mc.lbzx(res_loc.value, base_loc.value, r.SCRATCH.value) else: - self.mc.add(r.SCRATCH.value, r.SCRATCH.value, ofs_loc.value) - - if fieldsize.value == 8: - self.mc.ldx(res_loc.value, base_loc.value, r.SCRATCH.value) - elif fieldsize.value == 4: - self.mc.lwzx(res_loc.value, base_loc.value, r.SCRATCH.value) - elif fieldsize.value == 2: - self.mc.lhzx(res_loc.value, base_loc.value, r.SCRATCH.value) - elif fieldsize.value == 1: - self.mc.lbzx(res_loc.value, base_loc.value, r.SCRATCH.value) - else: - assert 0 - self.mc.free_scratch_reg() + assert 0 #XXX Hack, Hack, Hack if not we_are_translated(): signed = op.getdescr().fielddescr.is_field_signed() self._ensure_result_bit_extension(res_loc, fieldsize.value, signed) + emit_getinteriorfield_raw = emit_getinteriorfield_gc def emit_setinteriorfield_gc(self, op, arglocs, regalloc): (base_loc, index_loc, value_loc, @@ -588,7 +582,7 @@ self.mc.stbx(value_loc.value, base_loc.value, r.SCRATCH.value) else: assert 0 - + emit_setinteriorfield_raw = emit_setinteriorfield_gc class ArrayOpAssembler(object): @@ -752,13 +746,12 @@ bytes_loc = regalloc.force_allocate_reg(bytes_box, forbidden_vars) scale = self._get_unicode_item_scale() assert length_loc.is_reg() - self.mc.alloc_scratch_reg() - self.mc.load_imm(r.SCRATCH, 1 << scale) - if IS_PPC_32: - self.mc.mullw(bytes_loc.value, r.SCRATCH.value, length_loc.value) - else: - self.mc.mulld(bytes_loc.value, r.SCRATCH.value, length_loc.value) - self.mc.free_scratch_reg() + with scratch_reg(self.mc): + self.mc.load_imm(r.SCRATCH, 1 << scale) + if IS_PPC_32: + self.mc.mullw(bytes_loc.value, r.SCRATCH.value, length_loc.value) + else: + self.mc.mulld(bytes_loc.value, r.SCRATCH.value, length_loc.value) length_box = bytes_box length_loc = bytes_loc # call memcpy() @@ -873,15 +866,15 @@ def set_vtable(self, box, vtable): if self.cpu.vtable_offset is not None: adr = rffi.cast(lltype.Signed, vtable) - self.mc.alloc_scratch_reg() - self.mc.load_imm(r.SCRATCH, adr) - self.mc.store(r.SCRATCH.value, r.RES.value, self.cpu.vtable_offset) - self.mc.free_scratch_reg() + with scratch_reg(self.mc): + self.mc.load_imm(r.SCRATCH, adr) + self.mc.store(r.SCRATCH.value, r.RES.value, self.cpu.vtable_offset) def emit_debug_merge_point(self, op, arglocs, regalloc): pass emit_jit_debug = emit_debug_merge_point + emit_keepalive = emit_debug_merge_point def emit_cond_call_gc_wb(self, op, arglocs, regalloc): # Write code equivalent to write_barrier() in the GC: it checks @@ -906,26 +899,25 @@ raise AssertionError(opnum) loc_base = arglocs[0] - self.mc.alloc_scratch_reg() - self.mc.load(r.SCRATCH.value, loc_base.value, 0) + with scratch_reg(self.mc): + self.mc.load(r.SCRATCH.value, loc_base.value, 0) - # get the position of the bit we want to test - bitpos = descr.jit_wb_if_flag_bitpos + # get the position of the bit we want to test + bitpos = descr.jit_wb_if_flag_bitpos - if IS_PPC_32: - # put this bit to the rightmost bitposition of r0 - if bitpos > 0: - self.mc.rlwinm(r.SCRATCH.value, r.SCRATCH.value, - 32 - bitpos, 31, 31) - # test whether this bit is set - self.mc.cmpwi(0, r.SCRATCH.value, 1) - else: - if bitpos > 0: - self.mc.rldicl(r.SCRATCH.value, r.SCRATCH.value, - 64 - bitpos, 63) - # test whether this bit is set - self.mc.cmpdi(0, r.SCRATCH.value, 1) - self.mc.free_scratch_reg() + if IS_PPC_32: + # put this bit to the rightmost bitposition of r0 + if bitpos > 0: + self.mc.rlwinm(r.SCRATCH.value, r.SCRATCH.value, + 32 - bitpos, 31, 31) + # test whether this bit is set + self.mc.cmpwi(0, r.SCRATCH.value, 1) + else: + if bitpos > 0: + self.mc.rldicl(r.SCRATCH.value, r.SCRATCH.value, + 64 - bitpos, 63) + # test whether this bit is set + self.mc.cmpdi(0, r.SCRATCH.value, 1) jz_location = self.mc.currpos() self.mc.nop() @@ -947,7 +939,7 @@ # patch the JZ above offset = self.mc.currpos() - jz_location pmc = OverwritingBuilder(self.mc, jz_location, 1) - pmc.bc(4, 2, offset) # jump if the two values are equal + pmc.bc(12, 2, offset) # jump if the two values are equal pmc.overwrite() emit_cond_call_gc_wb_array = emit_cond_call_gc_wb @@ -989,10 +981,9 @@ # check value resloc = regalloc.try_allocate_reg(resbox) assert resloc is r.RES - self.mc.alloc_scratch_reg() - self.mc.load_imm(r.SCRATCH, value) - self.mc.cmp_op(0, resloc.value, r.SCRATCH.value) - self.mc.free_scratch_reg() + with scratch_reg(self.mc): + self.mc.load_imm(r.SCRATCH, value) + self.mc.cmp_op(0, resloc.value, r.SCRATCH.value) regalloc.possibly_free_var(resbox) fast_jmp_pos = self.mc.currpos() @@ -1035,11 +1026,10 @@ assert isinstance(fielddescr, FieldDescr) ofs = fielddescr.offset resloc = regalloc.force_allocate_reg(resbox) - self.mc.alloc_scratch_reg() - self.mov_loc_loc(arglocs[1], r.SCRATCH) - self.mc.li(resloc.value, 0) - self.mc.storex(resloc.value, 0, r.SCRATCH.value) - self.mc.free_scratch_reg() + with scratch_reg(self.mc): + self.mov_loc_loc(arglocs[1], r.SCRATCH) + self.mc.li(resloc.value, 0) + self.mc.storex(resloc.value, 0, r.SCRATCH.value) regalloc.possibly_free_var(resbox) if op.result is not None: @@ -1055,13 +1045,12 @@ raise AssertionError(kind) resloc = regalloc.force_allocate_reg(op.result) regalloc.possibly_free_var(resbox) - self.mc.alloc_scratch_reg() - self.mc.load_imm(r.SCRATCH, adr) - if op.result.type == FLOAT: - assert 0, "not implemented yet" - else: - self.mc.loadx(resloc.value, 0, r.SCRATCH.value) - self.mc.free_scratch_reg() + with scratch_reg(self.mc): + self.mc.load_imm(r.SCRATCH, adr) + if op.result.type == FLOAT: + assert 0, "not implemented yet" + else: + self.mc.loadx(resloc.value, 0, r.SCRATCH.value) # merge point offset = self.mc.currpos() - jmp_pos @@ -1070,10 +1059,9 @@ pmc.b(offset) pmc.overwrite() - self.mc.alloc_scratch_reg() - self.mc.load(r.SCRATCH.value, r.SPP.value, 0) - self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True) - self.mc.free_scratch_reg() + with scratch_reg(self.mc): + self.mc.load(r.SCRATCH.value, r.SPP.value, 0) + self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True) self._emit_guard(guard_op, regalloc._prepare_guard(guard_op), c.LT) @@ -1102,10 +1090,9 @@ def emit_guard_call_may_force(self, op, guard_op, arglocs, regalloc): ENCODING_AREA = len(r.MANAGED_REGS) * WORD - self.mc.alloc_scratch_reg() - self.mc.load(r.SCRATCH.value, r.SPP.value, ENCODING_AREA) - self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True) - self.mc.free_scratch_reg() + with scratch_reg(self.mc): + self.mc.load(r.SCRATCH.value, r.SPP.value, ENCODING_AREA) + self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True) self._emit_guard(guard_op, arglocs, c.LT, save_exc=True) emit_guard_call_release_gil = emit_guard_call_may_force diff --git a/pypy/jit/backend/ppc/ppc_assembler.py b/pypy/jit/backend/ppc/ppc_assembler.py --- a/pypy/jit/backend/ppc/ppc_assembler.py +++ b/pypy/jit/backend/ppc/ppc_assembler.py @@ -3,7 +3,7 @@ from pypy.jit.backend.ppc.ppc_form import PPCForm as Form from pypy.jit.backend.ppc.ppc_field import ppc_fields from pypy.jit.backend.ppc.regalloc import (TempInt, PPCFrameManager, - Regalloc) + Regalloc, PPCRegisterManager) from pypy.jit.backend.ppc.assembler import Assembler from pypy.jit.backend.ppc.opassembler import OpAssembler from pypy.jit.backend.ppc.symbol_lookup import lookup @@ -37,15 +37,23 @@ from pypy.jit.metainterp.history import (BoxInt, ConstInt, ConstPtr, ConstFloat, Box, INT, REF, FLOAT) from pypy.jit.backend.x86.support import values_array +from pypy.rlib.debug import (debug_print, debug_start, debug_stop, + have_debug_prints) from pypy.rlib import rgc from pypy.rpython.annlowlevel import llhelper from pypy.rlib.objectmodel import we_are_translated from pypy.rpython.lltypesystem.lloperation import llop from pypy.jit.backend.ppc.locations import StackLocation, get_spp_offset +from pypy.rlib.jit import AsmInfo memcpy_fn = rffi.llexternal('memcpy', [llmemory.Address, llmemory.Address, rffi.SIZE_T], lltype.Void, sandboxsafe=True, _nowrapper=True) + +DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed), + ('type', lltype.Char), # 'b'ridge, 'l'abel or + # 'e'ntry point + ('number', lltype.Signed)) def hi(w): return w >> 16 @@ -85,6 +93,7 @@ EMPTY_LOC = '\xFE' END_OF_LOCS = '\xFF' + FORCE_INDEX_AREA = len(r.MANAGED_REGS) * WORD ENCODING_AREA = len(r.MANAGED_REGS) * WORD OFFSET_SPP_TO_GPR_SAVE_AREA = (FORCE_INDEX + FLOAT_INT_CONVERSION + ENCODING_AREA) @@ -108,6 +117,12 @@ self.max_stack_params = 0 self.propagate_exception_path = 0 self.setup_failure_recovery() + self._debug = False + self.loop_run_counters = [] + self.debug_counter_descr = cpu.fielddescrof(DEBUG_COUNTER, 'i') + + def set_debug(self, v): + self._debug = v def _save_nonvolatiles(self): """ save nonvolatile GPRs in GPR SAVE AREA @@ -298,24 +313,64 @@ def _build_malloc_slowpath(self): mc = PPCBuilder() - with Saved_Volatiles(mc): - # Values to compute size stored in r3 and r4 - mc.subf(r.r3.value, r.r3.value, r.r4.value) - addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr() - mc.call(addr) + if IS_PPC_64: + for _ in range(6): + mc.write32(0) + frame_size = (# add space for floats later + + BACKCHAIN_SIZE * WORD) + if IS_PPC_32: + mc.stwu(r.SP.value, r.SP.value, -frame_size) + mc.mflr(r.SCRATCH.value) + mc.stw(r.SCRATCH.value, r.SP.value, frame_size + WORD) + else: + mc.stdu(r.SP.value, r.SP.value, -frame_size) + mc.mflr(r.SCRATCH.value) + mc.std(r.SCRATCH.value, r.SP.value, frame_size + 2 * WORD) + # managed volatiles are saved below + if self.cpu.supports_floats: + assert 0, "make sure to save floats here" + # Values to compute size stored in r3 and r4 + mc.subf(r.r3.value, r.r3.value, r.r4.value) + addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr() + for reg, ofs in PPCRegisterManager.REGLOC_TO_COPY_AREA_OFS.items(): + mc.store(reg.value, r.SPP.value, ofs) + mc.call(addr) + for reg, ofs in PPCRegisterManager.REGLOC_TO_COPY_AREA_OFS.items(): + mc.load(reg.value, r.SPP.value, ofs) mc.cmp_op(0, r.r3.value, 0, imm=True) jmp_pos = mc.currpos() mc.nop() + nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr() mc.load_imm(r.r4, nursery_free_adr) mc.load(r.r4.value, r.r4.value, 0) + + if IS_PPC_32: + ofs = WORD + else: + ofs = WORD * 2 + mc.load(r.SCRATCH.value, r.SP.value, frame_size + ofs) + mc.mtlr(r.SCRATCH.value) + mc.addi(r.SP.value, r.SP.value, frame_size) + mc.blr() + # if r3 == 0 we skip the return above and jump to the exception path + offset = mc.currpos() - jmp_pos pmc = OverwritingBuilder(mc, jmp_pos, 1) - pmc.bc(4, 2, jmp_pos) # jump if the two values are equal + pmc.bc(12, 2, offset) pmc.overwrite() + # restore the frame before leaving + mc.load(r.SCRATCH.value, r.SP.value, frame_size + ofs) + mc.mtlr(r.SCRATCH.value) + mc.addi(r.SP.value, r.SP.value, frame_size) mc.b_abs(self.propagate_exception_path) + + + mc.prepare_insts_blocks() rawstart = mc.materialize(self.cpu.asmmemmgr, []) + if IS_PPC_64: + self.write_64_bit_func_descr(rawstart, rawstart+3*WORD) self.malloc_slowpath = rawstart def _build_propagate_exception_path(self): @@ -362,8 +417,8 @@ addr = rffi.cast(lltype.Signed, decode_func_addr) # load parameters into parameter registers - mc.load(r.r3.value, r.SPP.value, self.ENCODING_AREA) # address of state encoding - mc.mr(r.r4.value, r.SPP.value) # load spilling pointer + mc.load(r.r3.value, r.SPP.value, self.FORCE_INDEX_AREA) # address of state encoding + mc.mr(r.r4.value, r.SPP.value) # load spilling pointer # # call decoding function mc.call(addr) @@ -430,6 +485,23 @@ self.exit_code_adr = self._gen_exit_path() self._leave_jitted_hook_save_exc = self._gen_leave_jitted_hook_code(True) self._leave_jitted_hook = self._gen_leave_jitted_hook_code(False) + debug_start('jit-backend-counts') + self.set_debug(have_debug_prints()) + debug_stop('jit-backend-counts') + + def finish_once(self): + if self._debug: + debug_start('jit-backend-counts') + for i in range(len(self.loop_run_counters)): + struct = self.loop_run_counters[i] + if struct.type == 'l': + prefix = 'TargetToken(%d)' % struct.number + elif struct.type == 'b': + prefix = 'bridge ' + str(struct.number) + else: + prefix = 'entry ' + str(struct.number) + debug_print(prefix + ':' + str(struct.i)) + debug_stop('jit-backend-counts') @staticmethod def _release_gil_shadowstack(): @@ -475,6 +547,7 @@ looptoken._ppc_loop_code = start_pos clt.frame_depth = clt.param_depth = -1 spilling_area, param_depth = self._assemble(operations, regalloc) + size_excluding_failure_stuff = self.mc.get_relative_pos() clt.frame_depth = spilling_area clt.param_depth = param_depth @@ -502,8 +575,12 @@ print 'Loop', inputargs, operations self.mc._dump_trace(loop_start, 'loop_%s.asm' % self.cpu.total_compiled_loops) print 'Done assembling loop with token %r' % looptoken + ops_offset = self.mc.ops_offset self._teardown() + # XXX 3rd arg may not be correct yet + return AsmInfo(ops_offset, real_start, size_excluding_failure_stuff) + def _assemble(self, operations, regalloc): regalloc.compute_hint_frame_locations(operations) self._walk_operations(operations, regalloc) @@ -531,7 +608,9 @@ sp_patch_location = self._prepare_sp_patch_position() + startpos = self.mc.get_relative_pos() spilling_area, param_depth = self._assemble(operations, regalloc) + codeendpos = self.mc.get_relative_pos() self.write_pending_failure_recoveries() @@ -553,8 +632,12 @@ print 'Loop', inputargs, operations self.mc._dump_trace(rawstart, 'bridge_%s.asm' % self.cpu.total_compiled_loops) print 'Done assembling bridge with token %r' % looptoken + + ops_offset = self.mc.ops_offset self._teardown() + return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos) + def _patch_sp_offset(self, sp_patch_location, rawstart): mc = PPCBuilder() frame_depth = self.compute_frame_depth(self.current_clt.frame_depth, @@ -828,11 +911,10 @@ return # move immediate value to memory elif loc.is_stack(): - self.mc.alloc_scratch_reg() - offset = loc.value - self.mc.load_imm(r.SCRATCH, value) - self.mc.store(r.SCRATCH.value, r.SPP.value, offset) - self.mc.free_scratch_reg() + with scratch_reg(self.mc): + offset = loc.value + self.mc.load_imm(r.SCRATCH, value) + self.mc.store(r.SCRATCH.value, r.SPP.value, offset) return assert 0, "not supported location" elif prev_loc.is_stack(): @@ -845,10 +927,9 @@ # move in memory elif loc.is_stack(): target_offset = loc.value - self.mc.alloc_scratch_reg() - self.mc.load(r.SCRATCH.value, r.SPP.value, offset) - self.mc.store(r.SCRATCH.value, r.SPP.value, target_offset) - self.mc.free_scratch_reg() + with scratch_reg(self.mc): + self.mc.load(r.SCRATCH.value, r.SPP.value, offset) + self.mc.store(r.SCRATCH.value, r.SPP.value, target_offset) return assert 0, "not supported location" elif prev_loc.is_reg(): @@ -883,10 +964,7 @@ elif loc.is_reg(): self.mc.addi(r.SP.value, r.SP.value, -WORD) # decrease stack pointer # push value - if IS_PPC_32: - self.mc.stw(loc.value, r.SP.value, 0) - else: - self.mc.std(loc.value, r.SP.value, 0) + self.mc.store(loc.value, r.SP.value, 0) elif loc.is_imm(): assert 0, "not implemented yet" elif loc.is_imm_float(): @@ -946,17 +1024,17 @@ def malloc_cond(self, nursery_free_adr, nursery_top_adr, size): assert size & (WORD-1) == 0 # must be correctly aligned - self.mc.load_imm(r.RES.value, nursery_free_adr) + self.mc.load_imm(r.RES, nursery_free_adr) self.mc.load(r.RES.value, r.RES.value, 0) if _check_imm_arg(size): self.mc.addi(r.r4.value, r.RES.value, size) else: - self.mc.load_imm(r.r4.value, size) + self.mc.load_imm(r.r4, size) self.mc.add(r.r4.value, r.RES.value, r.r4.value) with scratch_reg(self.mc): - self.mc.gen_load_int(r.SCRATCH.value, nursery_top_adr) + self.mc.load_imm(r.SCRATCH, nursery_top_adr) self.mc.loadx(r.SCRATCH.value, 0, r.SCRATCH.value) self.mc.cmp_op(0, r.r4.value, r.SCRATCH.value, signed=False) @@ -977,10 +1055,11 @@ offset = self.mc.currpos() - fast_jmp_pos pmc = OverwritingBuilder(self.mc, fast_jmp_pos, 1) pmc.bc(4, 1, offset) # jump if LE (not GT) + pmc.overwrite() with scratch_reg(self.mc): - self.mc.load_imm(r.SCRATCH.value, nursery_free_adr) - self.mc.storex(r.r1.value, 0, r.SCRATCH.value) + self.mc.load_imm(r.SCRATCH, nursery_free_adr) + self.mc.storex(r.r4.value, 0, r.SCRATCH.value) def mark_gc_roots(self, force_index, use_copy_area=False): if force_index < 0: @@ -1010,10 +1089,9 @@ return 0 def _write_fail_index(self, fail_index): - self.mc.alloc_scratch_reg() - self.mc.load_imm(r.SCRATCH, fail_index) - self.mc.store(r.SCRATCH.value, r.SPP.value, self.ENCODING_AREA) - self.mc.free_scratch_reg() + with scratch_reg(self.mc): + self.mc.load_imm(r.SCRATCH, fail_index) + self.mc.store(r.SCRATCH.value, r.SPP.value, self.FORCE_INDEX_AREA) def load(self, loc, value): assert loc.is_reg() and value.is_imm() diff --git a/pypy/jit/backend/ppc/regalloc.py b/pypy/jit/backend/ppc/regalloc.py --- a/pypy/jit/backend/ppc/regalloc.py +++ b/pypy/jit/backend/ppc/regalloc.py @@ -50,37 +50,33 @@ save_around_call_regs = r.VOLATILES REGLOC_TO_COPY_AREA_OFS = { - r.r0: MY_COPY_OF_REGS + 0 * WORD, - r.r2: MY_COPY_OF_REGS + 1 * WORD, - r.r3: MY_COPY_OF_REGS + 2 * WORD, - r.r4: MY_COPY_OF_REGS + 3 * WORD, - r.r5: MY_COPY_OF_REGS + 4 * WORD, - r.r6: MY_COPY_OF_REGS + 5 * WORD, - r.r7: MY_COPY_OF_REGS + 6 * WORD, - r.r8: MY_COPY_OF_REGS + 7 * WORD, - r.r9: MY_COPY_OF_REGS + 8 * WORD, - r.r10: MY_COPY_OF_REGS + 9 * WORD, - r.r11: MY_COPY_OF_REGS + 10 * WORD, - r.r12: MY_COPY_OF_REGS + 11 * WORD, - r.r13: MY_COPY_OF_REGS + 12 * WORD, - r.r14: MY_COPY_OF_REGS + 13 * WORD, - r.r15: MY_COPY_OF_REGS + 14 * WORD, - r.r16: MY_COPY_OF_REGS + 15 * WORD, - r.r17: MY_COPY_OF_REGS + 16 * WORD, - r.r18: MY_COPY_OF_REGS + 17 * WORD, - r.r19: MY_COPY_OF_REGS + 18 * WORD, - r.r20: MY_COPY_OF_REGS + 19 * WORD, - r.r21: MY_COPY_OF_REGS + 20 * WORD, - r.r22: MY_COPY_OF_REGS + 21 * WORD, - r.r23: MY_COPY_OF_REGS + 22 * WORD, - r.r24: MY_COPY_OF_REGS + 23 * WORD, - r.r25: MY_COPY_OF_REGS + 24 * WORD, - r.r26: MY_COPY_OF_REGS + 25 * WORD, - r.r27: MY_COPY_OF_REGS + 26 * WORD, - r.r28: MY_COPY_OF_REGS + 27 * WORD, - r.r29: MY_COPY_OF_REGS + 28 * WORD, - r.r30: MY_COPY_OF_REGS + 29 * WORD, - r.r31: MY_COPY_OF_REGS + 30 * WORD, + r.r3: MY_COPY_OF_REGS + 0 * WORD, + r.r4: MY_COPY_OF_REGS + 1 * WORD, + r.r5: MY_COPY_OF_REGS + 2 * WORD, + r.r6: MY_COPY_OF_REGS + 3 * WORD, + r.r7: MY_COPY_OF_REGS + 4 * WORD, + r.r8: MY_COPY_OF_REGS + 5 * WORD, + r.r9: MY_COPY_OF_REGS + 6 * WORD, + r.r10: MY_COPY_OF_REGS + 7 * WORD, + r.r11: MY_COPY_OF_REGS + 8 * WORD, + r.r12: MY_COPY_OF_REGS + 9 * WORD, + r.r14: MY_COPY_OF_REGS + 10 * WORD, + r.r15: MY_COPY_OF_REGS + 11 * WORD, + r.r16: MY_COPY_OF_REGS + 12 * WORD, + r.r17: MY_COPY_OF_REGS + 13 * WORD, + r.r18: MY_COPY_OF_REGS + 14 * WORD, + r.r19: MY_COPY_OF_REGS + 15 * WORD, + r.r20: MY_COPY_OF_REGS + 16 * WORD, + r.r21: MY_COPY_OF_REGS + 17 * WORD, + r.r22: MY_COPY_OF_REGS + 18 * WORD, + r.r23: MY_COPY_OF_REGS + 19 * WORD, + r.r24: MY_COPY_OF_REGS + 20 * WORD, + r.r25: MY_COPY_OF_REGS + 21 * WORD, + r.r26: MY_COPY_OF_REGS + 22 * WORD, + r.r27: MY_COPY_OF_REGS + 23 * WORD, + r.r28: MY_COPY_OF_REGS + 24 * WORD, + r.r29: MY_COPY_OF_REGS + 25 * WORD, + r.r30: MY_COPY_OF_REGS + 26 * WORD, } def __init__(self, longevity, frame_manager=None, assembler=None): @@ -177,7 +173,7 @@ def prepare_loop(self, inputargs, operations): self._prepare(inputargs, operations) self._set_initial_bindings(inputargs) - self.possibly_free_vars(list(inputargs)) + self.possibly_free_vars(inputargs) def prepare_bridge(self, inputargs, arglocs, ops): self._prepare(inputargs, ops) @@ -425,7 +421,7 @@ prepare_guard_not_invalidated = prepare_guard_no_overflow def prepare_guard_exception(self, op): - boxes = list(op.getarglist()) + boxes = op.getarglist() arg0 = ConstInt(rffi.cast(lltype.Signed, op.getarg(0).getint())) loc = self._ensure_value_is_boxed(arg0) loc1 = self.get_scratch_reg(INT, boxes) @@ -447,7 +443,7 @@ return arglocs def prepare_guard_value(self, op): - boxes = list(op.getarglist()) + boxes = op.getarglist() a0, a1 = boxes l0 = self._ensure_value_is_boxed(a0, boxes) l1 = self._ensure_value_is_boxed(a1, boxes) @@ -459,7 +455,7 @@ def prepare_guard_class(self, op): assert isinstance(op.getarg(0), Box) - boxes = list(op.getarglist()) + boxes = op.getarglist() x = self._ensure_value_is_boxed(boxes[0], boxes) y = self.get_scratch_reg(REF, forbidden_vars=boxes) y_val = rffi.cast(lltype.Signed, op.getarg(1).getint()) @@ -559,7 +555,7 @@ return [] def prepare_setfield_gc(self, op): - boxes = list(op.getarglist()) + boxes = op.getarglist() a0, a1 = boxes ofs, size, sign = unpack_fielddescr(op.getdescr()) base_loc = self._ensure_value_is_boxed(a0, boxes) @@ -608,6 +604,7 @@ self.possibly_free_var(op.result) return [base_loc, index_loc, result_loc, ofs_loc, imm(ofs), imm(itemsize), imm(fieldsize)] + prepare_getinteriorfield_raw = prepare_getinteriorfield_gc def prepare_setinteriorfield_gc(self, op): t = unpack_interiorfielddescr(op.getdescr()) @@ -622,6 +619,7 @@ ofs_loc = self._ensure_value_is_boxed(ConstInt(ofs), args) return [base_loc, index_loc, value_loc, ofs_loc, imm(ofs), imm(itemsize), imm(fieldsize)] + prepare_setinteriorfield_raw = prepare_setinteriorfield_gc def prepare_arraylen_gc(self, op): arraydescr = op.getdescr() @@ -811,6 +809,7 @@ prepare_debug_merge_point = void prepare_jit_debug = void + prepare_keepalive = void def prepare_cond_call_gc_wb(self, op): assert op.result is None diff --git a/pypy/jit/backend/ppc/register.py b/pypy/jit/backend/ppc/register.py --- a/pypy/jit/backend/ppc/register.py +++ b/pypy/jit/backend/ppc/register.py @@ -14,7 +14,8 @@ NONVOLATILES = [r14, r15, r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31] -VOLATILES = [r0, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13] +VOLATILES = [r0, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12] +# volatile r2 is persisted around calls and r13 can be ignored NONVOLATILES_FLOAT = [f14, f15, f16, f17, f18, f19, f20, f21, f22, f23, f24, f25, f26, f27, f28, f29, f30, f31] diff --git a/pypy/jit/backend/ppc/runner.py b/pypy/jit/backend/ppc/runner.py --- a/pypy/jit/backend/ppc/runner.py +++ b/pypy/jit/backend/ppc/runner.py @@ -32,7 +32,7 @@ gcdescr.force_index_ofs = FORCE_INDEX_OFS # XXX for now the ppc backend does not support the gcremovetypeptr # translation option - assert gcdescr.config.translation.gcremovetypeptr is False + # assert gcdescr.config.translation.gcremovetypeptr is False AbstractLLCPU.__init__(self, rtyper, stats, opts, translate_support_code, gcdescr) diff --git a/pypy/jit/backend/ppc/test/test_ztranslation.py b/pypy/jit/backend/ppc/test/test_ztranslation.py --- a/pypy/jit/backend/ppc/test/test_ztranslation.py +++ b/pypy/jit/backend/ppc/test/test_ztranslation.py @@ -18,8 +18,9 @@ def _check_cbuilder(self, cbuilder): # We assume here that we have sse2. If not, the CPUClass # needs to be changed to CPU386_NO_SSE2, but well. - assert '-msse2' in cbuilder.eci.compile_extra - assert '-mfpmath=sse' in cbuilder.eci.compile_extra + #assert '-msse2' in cbuilder.eci.compile_extra + #assert '-mfpmath=sse' in cbuilder.eci.compile_extra + pass def test_stuff_translates(self): # this is a basic test that tries to hit a number of features and their @@ -176,7 +177,7 @@ def _get_TranslationContext(self): t = TranslationContext() t.config.translation.gc = DEFL_GC # 'hybrid' or 'minimark' - t.config.translation.gcrootfinder = 'asmgcc' + t.config.translation.gcrootfinder = 'shadowstack' t.config.translation.list_comprehension_operations = True t.config.translation.gcremovetypeptr = True return t diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py --- a/pypy/jit/backend/test/runner_test.py +++ b/pypy/jit/backend/test/runner_test.py @@ -1677,6 +1677,7 @@ c_box = self.alloc_string("hi there").constbox() c_nest = ConstInt(0) self.execute_operation(rop.DEBUG_MERGE_POINT, [c_box, c_nest], 'void') + self.execute_operation(rop.KEEPALIVE, [c_box], 'void') self.execute_operation(rop.JIT_DEBUG, [c_box, c_nest, c_nest, c_nest, c_nest], 'void') _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit