Author: Armin Rigo <ar...@tunes.org> Branch: jitframe-on-heap Changeset: r60659:8df690cd860f Date: 2013-01-29 10:35 +0100 http://bitbucket.org/pypy/pypy/changeset/8df690cd860f/
Log: merge heads diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py --- a/pypy/module/_continuation/interp_continuation.py +++ b/pypy/module/_continuation/interp_continuation.py @@ -34,7 +34,7 @@ if self.sthread is not None: raise geterror(self.space, "continulet already __init__ialized") sthread = build_sthread(self.space) - workaround_disable_jit(sthread) + #workaround_disable_jit(sthread) # # hackish: build the frame "by hand", passing it the correct arguments space = self.space @@ -77,7 +77,7 @@ global_state.clear() raise geterror(self.space, "continulet already finished") self.check_sthread() - workaround_disable_jit(self.sthread) + #workaround_disable_jit(self.sthread) # global_state.origin = self if to is None: diff --git a/pypy/module/pypyjit/policy.py b/pypy/module/pypyjit/policy.py --- a/pypy/module/pypyjit/policy.py +++ b/pypy/module/pypyjit/policy.py @@ -106,7 +106,7 @@ 'posix', '_socket', '_sre', '_lsprof', '_weakref', '__pypy__', 'cStringIO', '_collections', 'struct', 'mmap', 'marshal', '_codecs', 'rctime', 'cppyy', - '_cffi_backend', 'pyexpat']: + '_cffi_backend', 'pyexpat', '_continuation']: if modname == 'pypyjit' and 'interp_resop' in rest: return False return True diff --git a/rpython/jit/backend/arm/arch.py b/rpython/jit/backend/arm/arch.py --- a/rpython/jit/backend/arm/arch.py +++ b/rpython/jit/backend/arm/arch.py @@ -1,7 +1,3 @@ -from rpython.rtyper.lltypesystem import lltype, rffi -from rpython.rlib.rarithmetic import r_uint - - FUNC_ALIGN = 8 WORD = 4 DOUBLE_WORD = 8 @@ -14,54 +10,13 @@ PC_OFFSET = 8 FORCE_INDEX_OFS = 0 -from rpython.translator.tool.cbuild import ExternalCompilationInfo -eci = ExternalCompilationInfo(post_include_bits=[""" -static int pypy__arm_int_div(int a, int b) { - return a/b; -} -static unsigned int pypy__arm_uint_div(unsigned int a, unsigned int b) { - return a/b; -} -static int pypy__arm_int_mod(int a, int b) { - return a % b; -} -"""]) +# The stack contains the force_index and the, callee saved registers and +# ABI required information +# All the rest of the data is in a GC-managed variable-size "frame". +# This jitframe object's address is always stored in the register FP +# A jitframe is a jit.backend.llsupport.llmodel.JITFRAME = GcArray(Signed). +# Stack frame fixed area +# Currently only the force_index +FRAME_FIXED_SIZE = 1 +JITFRAME_FIXED_SIZE = 16 + 16 * 2 # 16 GPR + 16 VFP Regs (64bit) - -def arm_int_div_emulator(a, b): - return int(a / float(b)) -arm_int_div_sign = lltype.Ptr( - lltype.FuncType([lltype.Signed, lltype.Signed], lltype.Signed)) -arm_int_div = rffi.llexternal( - "pypy__arm_int_div", [lltype.Signed, lltype.Signed], lltype.Signed, - _callable=arm_int_div_emulator, - compilation_info=eci, - _nowrapper=True, elidable_function=True) - - -def arm_uint_div_emulator(a, b): - return r_uint(a) / r_uint(b) -arm_uint_div_sign = lltype.Ptr( - lltype.FuncType([lltype.Unsigned, lltype.Unsigned], lltype.Unsigned)) -arm_uint_div = rffi.llexternal( - "pypy__arm_uint_div", [lltype.Unsigned, lltype.Unsigned], lltype.Unsigned, - _callable=arm_uint_div_emulator, - compilation_info=eci, - _nowrapper=True, elidable_function=True) - - -def arm_int_mod_emulator(a, b): - sign = 1 - if a < 0: - a = -1 * a - sign = -1 - if b < 0: - b = -1 * b - res = a % b - return sign * res -arm_int_mod_sign = arm_int_div_sign -arm_int_mod = rffi.llexternal( - "pypy__arm_int_mod", [lltype.Signed, lltype.Signed], lltype.Signed, - _callable=arm_int_mod_emulator, - compilation_info=eci, - _nowrapper=True, elidable_function=True) diff --git a/rpython/jit/backend/arm/assembler.py b/rpython/jit/backend/arm/assembler.py --- a/rpython/jit/backend/arm/assembler.py +++ b/rpython/jit/backend/arm/assembler.py @@ -5,7 +5,8 @@ from rpython.jit.backend.arm import conditions as c from rpython.jit.backend.arm import registers as r from rpython.jit.backend.arm.arch import WORD, DOUBLE_WORD, FUNC_ALIGN, \ - N_REGISTERS_SAVED_BY_MALLOC + N_REGISTERS_SAVED_BY_MALLOC, \ + JITFRAME_FIXED_SIZE, FRAME_FIXED_SIZE from rpython.jit.backend.arm.codebuilder import ARMv7Builder, OverwritingBuilder from rpython.jit.backend.arm.locations import get_fp_offset from rpython.jit.backend.arm.regalloc import (Regalloc, ARMFrameManager, @@ -21,7 +22,7 @@ from rpython.jit.metainterp.resoperation import rop, ResOperation from rpython.rlib import rgc from rpython.rlib.objectmodel import we_are_translated, specialize -from rpython.rtyper.annlowlevel import llhelper +from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref from rpython.rtyper.lltypesystem import lltype, rffi, llmemory from rpython.rtyper.lltypesystem.lloperation import llop from rpython.jit.backend.arm.opassembler import ResOpAssembler @@ -29,9 +30,9 @@ have_debug_prints, fatalerror) from rpython.rlib.jit import AsmInfo from rpython.rlib.objectmodel import compute_unique_id +from rpython.rlib.rarithmetic import intmask, r_uint -# XXX Move to llsupport -from rpython.jit.backend.x86.support import memcpy_fn +from rpython.jit.backend.arm.support import memcpy_fn DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed), ('type', lltype.Char), # 'b'ridge, 'l'abel or @@ -41,8 +42,6 @@ class AssemblerARM(ResOpAssembler): - STACK_FIXED_AREA = -1 - debug = True def __init__(self, cpu, translate_support_code=False): @@ -59,33 +58,19 @@ self.datablockwrapper = None self.propagate_exception_path = 0 self.stack_check_slowpath = 0 - self._compute_stack_size() self._debug = False self.loop_run_counters = [] self.debug_counter_descr = cpu.fielddescrof(DEBUG_COUNTER, 'i') - self.force_token_to_dead_frame = {} # XXX temporary hack + self.gcrootmap_retaddr_forced = 0 def set_debug(self, v): r = self._debug self._debug = v return r - def _compute_stack_size(self): - self.STACK_FIXED_AREA = len(r.callee_saved_registers) * WORD - self.STACK_FIXED_AREA += WORD # FORCE_TOKEN - self.STACK_FIXED_AREA += N_REGISTERS_SAVED_BY_MALLOC * WORD - if self.cpu.supports_floats: - self.STACK_FIXED_AREA += (len(r.callee_saved_vfp_registers) - * DOUBLE_WORD) - if self.STACK_FIXED_AREA % 8 != 0: - self.STACK_FIXED_AREA += WORD # Stack alignment - assert self.STACK_FIXED_AREA % 8 == 0 - - def setup(self, looptoken, operations): + def setup(self, looptoken): + assert self.memcpy_addr != 0, 'setup_once() not called?' self.current_clt = looptoken.compiled_loop_token - operations = self.cpu.gc_ll_descr.rewrite_assembler(self.cpu, - operations, self.current_clt.allgcrefs) - assert self.memcpy_addr != 0, 'setup_once() not called?' self.mc = ARMv7Builder() self.pending_guards = [] assert self.datablockwrapper is None @@ -93,7 +78,6 @@ self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr, allblocks) self.target_tokens_currently_compiling = {} - return operations def teardown(self): self.current_clt = None @@ -106,10 +90,11 @@ # Addresses of functions called by new_xxx operations gc_ll_descr = self.cpu.gc_ll_descr gc_ll_descr.initialize() + self.memcpy_addr = self.cpu.cast_ptr_to_int(memcpy_fn) + self._build_failure_recovery(exc=True, withfloats=False) + self._build_failure_recovery(exc=False, withfloats=False) self._build_wb_slowpath(False) self._build_wb_slowpath(True) - self._build_failure_recovery(exc=True, withfloats=False) - self._build_failure_recovery(exc=False, withfloats=False) if self.cpu.supports_floats: self._build_wb_slowpath(False, withfloats=True) self._build_wb_slowpath(True, withfloats=True) @@ -121,7 +106,6 @@ self._build_stack_check_slowpath() if gc_ll_descr.gcrootmap and gc_ll_descr.gcrootmap.is_shadow_stack: self._build_release_gil(gc_ll_descr.gcrootmap) - self.memcpy_addr = self.cpu.cast_ptr_to_int(memcpy_fn) if not self._debug: # if self._debug is already set it means that someone called @@ -130,6 +114,9 @@ debug_start('jit-backend-counts') self.set_debug(have_debug_prints()) debug_stop('jit-backend-counts') + # when finishing, we only have one value at [0], the rest dies + self.gcmap_for_finish = lltype.malloc(jitframe.GCMAP, 1, zero=True) + self.gcmap_for_finish[0] = r_uint(1) def finish_once(self): if self._debug: @@ -218,18 +205,50 @@ self.reacqgil_addr = rffi.cast(lltype.Signed, reacqgil_func) def _build_propagate_exception_path(self): - if self.cpu.propagate_exception_v < 0: + if not self.cpu.propagate_exception_descr: return # not supported (for tests, or non-translated) # mc = ARMv7Builder() # - # Call the helper, which will return a dead frame object with - # the correct exception set, or MemoryError by default - # XXX make sure we return the correct value here + # read and reset the current exception addr = rffi.cast(lltype.Signed, self.cpu.get_propagate_exception()) mc.BL(addr) self.gen_func_epilog(mc=mc) self.propagate_exception_path = mc.materialize(self.cpu.asmmemmgr, []) + # + self._store_and_reset_exception(r.r0) + ofs = self.cpu.get_ofs_of_frame_field('jf_guard_exc') + # make sure ofs fits into a register + assert check_imm_arg(ofs) + self.mc.STR_ri(r.r0.value, r.fp.value, imm=ofs) + propagate_exception_descr = rffi.cast(lltype.Signed, + cast_instance_to_gcref(self.cpu.propagate_exception_descr)) + ofs = self.cpu.get_ofs_of_frame_field('jf_descr') + # make sure ofs fits into a register + assert check_imm_arg(ofs) + self.mc.BKPT() + #base_ofs = self.cpu.get_baseofs_of_frame_field() + #self.mc.MOV_bi(ofs, propagate_exception_descr) + #self.mc.LEA_rb(eax.value, -base_ofs) + # + self._call_footer() + rawstart = self.mc.materialize(self.cpu.asmmemmgr, []) + self.propagate_exception_path = rawstart + self.mc = None + + def _store_and_reset_exception(self, resloc=None): + assert resloc is not r.ip + if resloc is not None: + self.mc.gen_load_int(resloc.value, self.cpu.pos_exc_value()) + self.mc.LDR_ri(resloc.value, resloc.value) + self.mc.MOV(resloc, heap(self.cpu.pos_exc_value())) + + with saved_registers(self.mc, [r.r0]): + self.mc.gen_load_int(r.r0.value, self.cpu.pos_exc_value()) + self.mc.gen_load_int(r.ip.value, 0) + self.mc.STR_ri(r.ip.value, r.r0.value) + self.mc.gen_load_int(r.r0.value, self.cpu.pos_exception()) + self.mc.STR_ri(r.ip.value, r.r0.value) def _build_stack_check_slowpath(self): _, _, slowpathaddr = self.cpu.insert_stack_check() @@ -558,7 +577,7 @@ # We might have an exception pending. Load it into r4 # (this is a register saved across calls) mc.gen_load_int(r.r5.value, self.cpu.pos_exc_value()) - mc.LDR_ri(r.r4.value, self.cpu.pos_exc_value()) + mc.LDR_ri(r.r4.value, r.r5.value) # clear the exc flags mc.gen_load_int(r.r6.value, 0) mc.STR_ri(r.r6.value, r.r5.value) @@ -661,37 +680,35 @@ self.mc.writechar(chr(0)) def gen_func_epilog(self, mc=None, cond=c.AL): - stack_size = self.STACK_FIXED_AREA - stack_size -= len(r.callee_saved_registers) * WORD - if self.cpu.supports_floats: - stack_size -= len(r.callee_saved_vfp_registers) * 2 * WORD - gcrootmap = self.cpu.gc_ll_descr.gcrootmap if mc is None: mc = self.mc if gcrootmap and gcrootmap.is_shadow_stack: self.gen_footer_shadowstack(gcrootmap, mc) - mc.MOV_rr(r.sp.value, r.fp.value, cond=cond) - mc.ADD_ri(r.sp.value, r.sp.value, stack_size, cond=cond) + mc.ADD_ri(r.sp.value, r.sp.value, WORD, cond=cond) # for the force index if self.cpu.supports_floats: mc.VPOP([reg.value for reg in r.callee_saved_vfp_registers], cond=cond) mc.POP([reg.value for reg in r.callee_restored_registers], cond=cond) + mc.BKPT() def gen_func_prolog(self): - stack_size = self.STACK_FIXED_AREA - stack_size -= len(r.callee_saved_registers) * WORD + stack_size = FRAME_FIXED_SIZE * WORD + stack_size += len(r.callee_saved_registers) * WORD if self.cpu.supports_floats: - stack_size -= len(r.callee_saved_vfp_registers) * 2 * WORD + stack_size += len(r.callee_saved_vfp_registers) * 2 * WORD self.mc.PUSH([reg.value for reg in r.callee_saved_registers]) if self.cpu.supports_floats: self.mc.VPUSH([reg.value for reg in r.callee_saved_vfp_registers]) - # here we modify the stack pointer to leave room for the 9 registers - # that are going to be saved here around malloc calls and one word to - # store the force index - self.mc.SUB_ri(r.sp.value, r.sp.value, stack_size) - self.mc.MOV_rr(r.fp.value, r.sp.value) + self.mc.SUB_ri(r.sp.value, r.sp.value, WORD) # for the force index + assert stack_size % 8 == 0 # ensure we keep alignment + + # set fp to point to the JITFRAME + ofs + ofs = self.cpu.get_baseofs_of_frame_field() + assert check_imm_arg(ofs) + self.mc.ADD_ri(r.fp.value, r.r0.value, imm=ofs) + # gcrootmap = self.cpu.gc_ll_descr.gcrootmap if gcrootmap and gcrootmap.is_shadow_stack: self.gen_shadowstack_header(gcrootmap) @@ -754,7 +771,9 @@ # cpu interface def assemble_loop(self, loopname, inputargs, operations, looptoken, log): clt = CompiledLoopToken(self.cpu, looptoken.number) + clt.frame_info = lltype.malloc(jitframe.JITFRAMEINFO) clt.allgcrefs = [] + clt.frame_info.jfi_frame_depth = 0 # for now looptoken.compiled_loop_token = clt clt._debug_nbargs = len(inputargs) @@ -762,38 +781,40 @@ # Arguments should be unique assert len(set(inputargs)) == len(inputargs) - operations = self.setup(looptoken, operations) - if log: + self.setup(looptoken) + if False and log: operations = self._inject_debugging_code(looptoken, operations, 'e', looptoken.number) self._call_header_with_stack_check() - sp_patch_location = self._prepare_sp_patch_position() + #sp_patch_location = self._prepare_sp_patch_position() - regalloc = Regalloc(assembler=self, frame_manager=ARMFrameManager()) - regalloc.prepare_loop(inputargs, operations) + regalloc = Regalloc(assembler=self) + operations = regalloc.prepare_loop(inputargs, operations, looptoken, + clt.allgcrefs) + rgc._make_sure_does_not_move(lltype.cast_opaque_ptr(llmemory.GCREF, + clt.frame_info)) loop_head = self.mc.get_relative_pos() looptoken._arm_loop_code = loop_head # - clt.frame_depth = -1 - frame_depth = self._assemble(operations, regalloc) - clt.frame_depth = frame_depth + frame_depth = self._assemble(regalloc, inputargs, operations) + self.update_frame_depth(frame_depth + JITFRAME_FIXED_SIZE) # size_excluding_failure_stuff = self.mc.get_relative_pos() - self._patch_sp_offset(sp_patch_location, frame_depth) + #self._patch_sp_offset(sp_patch_location, frame_depth) self.write_pending_failure_recoveries() rawstart = self.materialize_loop(looptoken) - looptoken._arm_func_addr = rawstart + looptoken._function_addr = looptoken._arm_func_addr = rawstart self.process_pending_guards(rawstart) self.fixup_target_tokens(rawstart) if log and not we_are_translated(): self.mc._dump_trace(rawstart, - 'loop_%s.asm' % self.cpu.total_compiled_loops) + 'loop.asm') ops_offset = self.mc.ops_offset self.teardown() @@ -809,18 +830,20 @@ return AsmInfo(ops_offset, rawstart + loop_head, size_excluding_failure_stuff - loop_head) - def _assemble(self, operations, regalloc): + def _assemble(self, regalloc, inputargs, operations): regalloc.compute_hint_frame_locations(operations) - self._walk_operations(operations, regalloc) - frame_depth = regalloc.frame_manager.get_frame_depth() + self._walk_operations(inputargs, operations, regalloc) + frame_depth = regalloc.get_final_frame_depth() jump_target_descr = regalloc.jump_target_descr if jump_target_descr is not None: - frame_depth = max(frame_depth, - jump_target_descr._arm_clt.frame_depth) + tgt_depth = jump_target_descr._arm_clt.frame_info.jfi_frame_depth + target_frame_depth = tgt_depth - JITFRAME_FIXED_SIZE + frame_depth = max(frame_depth, target_frame_depth) return frame_depth def assemble_bridge(self, faildescr, inputargs, operations, original_loop_token, log): + assert 0 operations = self.setup(original_loop_token, operations) descr_number = self.cpu.get_fail_descr_number(faildescr) if log: @@ -899,6 +922,17 @@ return self.mc.materialize(self.cpu.asmmemmgr, allblocks, self.cpu.gc_ll_descr.gcrootmap) + def update_frame_depth(self, frame_depth): + self.current_clt.frame_info.jfi_frame_depth = frame_depth + new_jumping_to = [] + for wref in self.current_clt.jumping_to: + clt = wref() + if clt is not None: + clt.frame_info.jfi_frame_depth = max(frame_depth, + clt.frame_info.jfi_frame_depth) + new_jumping_to.append(weakref.ref(clt)) + self.current_clt.jumping_to = new_jumping_to + def write_pending_failure_recoveries(self): for tok in self.pending_guards: #generate the exit stub and the encoded representation @@ -972,7 +1006,7 @@ else: cb.SUB_rr(r.sp.value, base_reg.value, r.ip.value, cond=fcond) - def _walk_operations(self, operations, regalloc): + def _walk_operations(self, inputargs, operations, regalloc): fcond = c.AL self._regalloc = regalloc while regalloc.position() < len(operations) - 1: @@ -1141,10 +1175,10 @@ if not check_imm_arg(offset, size=0xFFF): self.mc.PUSH([r.lr.value], cond=cond) pushed = True - self.mc.gen_load_int(r.lr.value, -offset, cond=cond) + self.mc.gen_load_int(r.lr.value, offset, cond=cond) self.mc.LDR_rr(loc.value, r.fp.value, r.lr.value, cond=cond) else: - self.mc.LDR_ri(loc.value, r.fp.value, imm=-offset, cond=cond) + self.mc.LDR_ri(loc.value, r.fp.value, imm=offset, cond=cond) if pushed: self.mc.POP([r.lr.value], cond=cond) elif loc.is_vfp_reg(): @@ -1364,6 +1398,26 @@ else: return 0 + def push_gcmap(self, mc, gcmap, push=False, mov=False, store=False): + gcmapref = lltype.cast_opaque_ptr(llmemory.GCREF, gcmap) + # keep the ref alive + self.current_clt.allgcrefs.append(gcmapref) + rgc._make_sure_does_not_move(gcmapref) + pass + #if push: + # mc.PUSH(imm(rffi.cast(lltype.Signed, gcmapref))) + #elif mov: + # mc.MOV(RawEspLoc(0, REF), + # imm(rffi.cast(lltype.Signed, gcmapref))) + #else: + # assert store + # ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap') + # mc.MOV(raw_stack(ofs), imm(rffi.cast(lltype.Signed, gcmapref))) + + def pop_gcmap(self, mc): + ofs = self.cpu.get_ofs_of_frame_field('jf_gcmap') + mc.MOV_bi(ofs, 0) + def not_implemented(msg): os.write(2, '[ARM/asm] %s\n' % msg) diff --git a/rpython/jit/backend/arm/codebuilder.py b/rpython/jit/backend/arm/codebuilder.py --- a/rpython/jit/backend/arm/codebuilder.py +++ b/rpython/jit/backend/arm/codebuilder.py @@ -1,6 +1,6 @@ -from rpython.jit.backend.arm import arch from rpython.jit.backend.arm import conditions as cond from rpython.jit.backend.arm import registers as reg +from rpython.jit.backend.arm import support from rpython.jit.backend.arm.arch import (WORD, FUNC_ALIGN) from rpython.jit.backend.arm.instruction_builder import define_instructions from rpython.jit.backend.llsupport.asmmemmgr import BlockBuilderMixin @@ -17,7 +17,7 @@ def binary_helper_call(name): - function = getattr(arch, 'arm_%s' % name) + function = getattr(support, 'arm_%s' % name) def f(self, c=cond.AL): """Generates a call to a helper function, takes its diff --git a/rpython/jit/backend/arm/locations.py b/rpython/jit/backend/arm/locations.py --- a/rpython/jit/backend/arm/locations.py +++ b/rpython/jit/backend/arm/locations.py @@ -1,5 +1,5 @@ from rpython.jit.metainterp.history import INT, FLOAT -from rpython.jit.backend.arm.arch import WORD, DOUBLE_WORD +from rpython.jit.backend.arm.arch import WORD, DOUBLE_WORD, JITFRAME_FIXED_SIZE class AssemblerLocation(object): @@ -136,9 +136,5 @@ return ImmLocation(i) -def get_fp_offset(i): - if i >= 0: - # Take the FORCE_TOKEN into account - return (1 + i) * WORD - else: - return i * WORD +def get_fp_offset(position): + return WORD * (position + JITFRAME_FIXED_SIZE) diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py --- a/rpython/jit/backend/arm/opassembler.py +++ b/rpython/jit/backend/arm/opassembler.py @@ -317,9 +317,27 @@ return fcond def emit_op_finish(self, op, arglocs, regalloc, fcond): - [argloc] = arglocs - if argloc is not r.r0: #XXX verify this - self.mov_loc_loc(argloc, r.r0, fcond) + base_ofs = self.cpu.get_baseofs_of_frame_field() - WORD + if len(arglocs) == 2: + [return_val, fail_descr_loc] = arglocs + if op.getarg(0).type == FLOAT and not IS_X86_64: + XXX + size = WORD * 2 + else: + size = WORD + self.mc.STR_ri(return_val.value, r.fp.value)#, imm=-base_ofs) + #self.save_into_mem(raw_stack(0), return_val, imm(size)) + else: + [fail_descr_loc] = arglocs + ofs = self.cpu.get_ofs_of_frame_field('jf_descr') + base_ofs = self.cpu.get_baseofs_of_frame_field() + + self.mc.gen_load_int(r.ip.value, fail_descr_loc.value) + # XXX self.mov(fail_descr_loc, RawStackLoc(ofs)) + self.mc.STR_ri(r.ip.value, r.fp.value, imm=ofs) + gcmap = self.gcmap_for_finish + self.push_gcmap(self.mc, gcmap, store=True) + self.mc.SUB_ri(r.r0.value, r.fp.value, base_ofs) # exit function self.gen_func_epilog() return fcond diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py --- a/rpython/jit/backend/arm/regalloc.py +++ b/rpython/jit/backend/arm/regalloc.py @@ -1,3 +1,5 @@ +from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref +from rpython.rlib import rgc from rpython.jit.backend.llsupport.regalloc import FrameManager, \ RegisterManager, TempBox, compute_vars_longevity from rpython.jit.backend.arm import registers as r @@ -180,10 +182,10 @@ class Regalloc(object): - def __init__(self, frame_manager=None, assembler=None): + def __init__(self, assembler=None): self.cpu = assembler.cpu self.assembler = assembler - self.frame_manager = frame_manager + self.frame_manager = None self.jump_target_descr = None self.final_jump_op = None @@ -282,7 +284,12 @@ assert isinstance(value, ConstFloat) return self.vfprm.convert_to_imm(value) - def _prepare(self, inputargs, operations): + def _prepare(self, inputargs, operations, allgcrefs): + self.frame_manager = self.fm = ARMFrameManager() + cpu = self.assembler.cpu + operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations, + allgcrefs) + # compute longevity of variables longevity, last_real_usage = compute_vars_longevity( inputargs, operations) self.longevity = longevity @@ -291,92 +298,27 @@ asm = self.assembler self.vfprm = VFPRegisterManager(longevity, fm, asm) self.rm = CoreRegisterManager(longevity, fm, asm) + return operations - def prepare_loop(self, inputargs, operations): - self._prepare(inputargs, operations) + def prepare_loop(self, inputargs, operations, looptoken, allgcrefs): + operations = self._prepare(inputargs, operations, allgcrefs) self._set_initial_bindings(inputargs) - self.possibly_free_vars(inputargs) + self.possibly_free_vars(list(inputargs)) + return operations def prepare_bridge(self, inputargs, arglocs, ops): self._prepare(inputargs, ops) self._update_bindings(arglocs, inputargs) + def get_final_frame_depth(self): + return self.frame_manager.get_frame_depth() + def _set_initial_bindings(self, inputargs): - # The first inputargs are passed in registers r0-r3 - # we relly on the soft-float calling convention so we need to move - # float params to the coprocessor. - if self.cpu.use_hf_abi: - self._set_initial_bindings_hf(inputargs) - else: - self._set_initial_bindings_sf(inputargs) - - def _set_initial_bindings_sf(self, inputargs): - - arg_index = 0 - count = 0 - n_register_args = len(r.argument_regs) - cur_frame_pos = 1 - (self.assembler.STACK_FIXED_AREA // WORD) + # the input args are passed in the jitframe for box in inputargs: assert isinstance(box, Box) - # handle inputargs in argument registers - if box.type == FLOAT and arg_index % 2 != 0: - arg_index += 1 # align argument index for float passed - # in register - if arg_index < n_register_args: - if box.type == FLOAT: - loc = r.argument_regs[arg_index] - loc2 = r.argument_regs[arg_index + 1] - vfpreg = self.try_allocate_reg(box) - # move soft-float argument to vfp - self.assembler.mov_to_vfp_loc(loc, loc2, vfpreg) - arg_index += 2 # this argument used two argument registers - else: - loc = r.argument_regs[arg_index] - self.try_allocate_reg(box, selected_reg=loc) - arg_index += 1 - else: - # treat stack args as stack locations with a negative offset - if box.type == FLOAT: - cur_frame_pos -= 2 - if count % 2 != 0: # Stack argument alignment - cur_frame_pos -= 1 - count = 0 - else: - cur_frame_pos -= 1 - count += 1 - loc = self.frame_manager.frame_pos(cur_frame_pos, box.type) - self.frame_manager.set_binding(box, loc) - - def _set_initial_bindings_hf(self, inputargs): - - arg_index = vfp_arg_index = 0 - count = 0 - n_reg_args = len(r.argument_regs) - n_vfp_reg_args = len(r.vfp_argument_regs) - cur_frame_pos = 1 - (self.assembler.STACK_FIXED_AREA // WORD) - for box in inputargs: - assert isinstance(box, Box) - # handle inputargs in argument registers - if box.type != FLOAT and arg_index < n_reg_args: - reg = r.argument_regs[arg_index] - self.try_allocate_reg(box, selected_reg=reg) - arg_index += 1 - elif box.type == FLOAT and vfp_arg_index < n_vfp_reg_args: - reg = r.vfp_argument_regs[vfp_arg_index] - self.try_allocate_reg(box, selected_reg=reg) - vfp_arg_index += 1 - else: - # treat stack args as stack locations with a negative offset - if box.type == FLOAT: - cur_frame_pos -= 2 - if count % 2 != 0: # Stack argument alignment - cur_frame_pos -= 1 - count = 0 - else: - cur_frame_pos -= 1 - count += 1 - loc = self.frame_manager.frame_pos(cur_frame_pos, box.type) - self.frame_manager.set_binding(box, loc) + assert box.type != FLOAT + self.fm.get_new_loc(box) def _update_bindings(self, locs, inputargs): used = {} @@ -644,9 +586,19 @@ return args def prepare_op_finish(self, op, fcond): - loc = self.loc(op.getarg(0)) - self.possibly_free_var(op.getarg(0)) - return [loc] + # the frame is in fp, but we have to point where in the frame is + # the potential argument to FINISH + descr = op.getdescr() + fail_descr = cast_instance_to_gcref(descr) + # we know it does not move, but well + rgc._make_sure_does_not_move(fail_descr) + fail_descr = rffi.cast(lltype.Signed, fail_descr) + if op.numargs() == 1: + loc = self.make_sure_var_in_reg(op.getarg(0)) + locs = [loc, imm(fail_descr)] + else: + locs = [imm(fail_descr)] + return locs def prepare_op_guard_true(self, op, fcond): l0 = self.make_sure_var_in_reg(op.getarg(0)) diff --git a/rpython/jit/backend/arm/runner.py b/rpython/jit/backend/arm/runner.py --- a/rpython/jit/backend/arm/runner.py +++ b/rpython/jit/backend/arm/runner.py @@ -1,12 +1,18 @@ +from rpython.jit.backend.arm.arch import JITFRAME_FIXED_SIZE from rpython.jit.backend.arm.assembler import AssemblerARM from rpython.jit.backend.arm.registers import all_regs, all_vfp_regs +from rpython.jit.backend.llsupport import jitframe +from rpython.jit.backend.llsupport.symbolic import WORD from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU +from rpython.jit.metainterp import history +from rpython.rlib.jit_hooks import LOOP_RUN_CONTAINER +from rpython.rlib.unroll import unrolling_iterable from rpython.rtyper.llinterp import LLInterpreter from rpython.rtyper.lltypesystem import lltype, rffi, llmemory -from rpython.rlib.jit_hooks import LOOP_RUN_CONTAINER -from rpython.jit.backend.arm.arch import FORCE_INDEX_OFS +jitframe.STATICSIZE = JITFRAME_FIXED_SIZE + class AbstractARMCPU(AbstractLLCPU): supports_floats = True @@ -18,14 +24,9 @@ def __init__(self, rtyper, stats, opts=None, translate_support_code=False, gcdescr=None): - if gcdescr is not None: - gcdescr.force_index_ofs = FORCE_INDEX_OFS AbstractLLCPU.__init__(self, rtyper, stats, opts, translate_support_code, gcdescr) - from rpython.jit.backend.llsupport import jitframe - self.deadframe_size_max = llmemory.sizeof(jitframe.DEADFRAME, - self.get_failargs_limit()) def set_debug(self, flag): return self.assembler.set_debug(flag) @@ -64,7 +65,11 @@ setitem(index, null) def make_execute_token(self, *ARGS): - FUNCPTR = lltype.Ptr(lltype.FuncType(ARGS, llmemory.GCREF)) + FUNCPTR = lltype.Ptr(lltype.FuncType([llmemory.GCREF], + llmemory.GCREF)) + + lst = [(i, history.getkind(ARG)[0]) for i, ARG in enumerate(ARGS)] + kinds = unrolling_iterable(lst) def execute_token(executable_token, *args): clt = executable_token.compiled_loop_token @@ -74,18 +79,32 @@ assert addr % 8 == 0 func = rffi.cast(FUNCPTR, addr) #llop.debug_print(lltype.Void, ">>>> Entering", addr) + frame_info = clt.frame_info + frame = self.gc_ll_descr.malloc_jitframe(frame_info) + ll_frame = lltype.cast_opaque_ptr(llmemory.GCREF, frame) prev_interpreter = None # help flow space if not self.translate_support_code: prev_interpreter = LLInterpreter.current_interpreter LLInterpreter.current_interpreter = self.debug_ll_interpreter try: - deadframe = func(*args) + num = JITFRAME_FIXED_SIZE * WORD + for i, kind in kinds: + arg = args[i] + if kind == history.INT: + self.set_int_value(ll_frame, num, arg) + elif kind == history.FLOAT: + self.set_float_value(ll_frame, num, arg) + num += WORD # on ARM(32 bit) a FLOAT needs two words + else: + assert kind == history.REF + self.set_ref_value(ll_frame, num, arg) + num += WORD + ll_frame = func(ll_frame) finally: if not self.translate_support_code: LLInterpreter.current_interpreter = prev_interpreter #llop.debug_print(lltype.Void, "<<<< Back") - self.gc_set_extra_threshold() - return deadframe + return ll_frame return execute_token def cast_ptr_to_int(x): diff --git a/rpython/jit/backend/arm/support.py b/rpython/jit/backend/arm/support.py new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/arm/support.py @@ -0,0 +1,61 @@ +from rpython.rtyper.lltypesystem import lltype, rffi, llmemory +from rpython.rlib.rarithmetic import r_uint +from rpython.translator.tool.cbuild import ExternalCompilationInfo + +eci = ExternalCompilationInfo(post_include_bits=[""" +static int pypy__arm_int_div(int a, int b) { + return a/b; +} +static unsigned int pypy__arm_uint_div(unsigned int a, unsigned int b) { + return a/b; +} +static int pypy__arm_int_mod(int a, int b) { + return a % b; +} +"""]) + + +def arm_int_div_emulator(a, b): + return int(a / float(b)) +arm_int_div_sign = lltype.Ptr( + lltype.FuncType([lltype.Signed, lltype.Signed], lltype.Signed)) +arm_int_div = rffi.llexternal( + "pypy__arm_int_div", [lltype.Signed, lltype.Signed], lltype.Signed, + _callable=arm_int_div_emulator, + compilation_info=eci, + _nowrapper=True, elidable_function=True) + + +def arm_uint_div_emulator(a, b): + return r_uint(a) / r_uint(b) +arm_uint_div_sign = lltype.Ptr( + lltype.FuncType([lltype.Unsigned, lltype.Unsigned], lltype.Unsigned)) +arm_uint_div = rffi.llexternal( + "pypy__arm_uint_div", [lltype.Unsigned, lltype.Unsigned], lltype.Unsigned, + _callable=arm_uint_div_emulator, + compilation_info=eci, + _nowrapper=True, elidable_function=True) + + +def arm_int_mod_emulator(a, b): + sign = 1 + if a < 0: + a = -1 * a + sign = -1 + if b < 0: + b = -1 * b + res = a % b + return sign * res +arm_int_mod_sign = arm_int_div_sign +arm_int_mod = rffi.llexternal( + "pypy__arm_int_mod", [lltype.Signed, lltype.Signed], lltype.Signed, + _callable=arm_int_mod_emulator, + compilation_info=eci, + _nowrapper=True, elidable_function=True) +# ____________________________________________________________ + +memcpy_fn = rffi.llexternal('memcpy', [llmemory.Address, llmemory.Address, + rffi.SIZE_T], lltype.Void, + sandboxsafe=True, _nowrapper=True) + +# ____________________________________________________________ diff --git a/rpython/jit/backend/arm/test/test_assembler.py b/rpython/jit/backend/arm/test/test_assembler.py --- a/rpython/jit/backend/arm/test/test_assembler.py +++ b/rpython/jit/backend/arm/test/test_assembler.py @@ -1,6 +1,6 @@ from rpython.jit.backend.arm import conditions as c from rpython.jit.backend.arm import registers as r -from rpython.jit.backend.arm.arch import arm_int_div +from rpython.jit.backend.arm.support import arm_int_div from rpython.jit.backend.arm.assembler import AssemblerARM from rpython.jit.backend.arm.locations import imm from rpython.jit.backend.arm.test.support import run_asm diff --git a/rpython/jit/backend/llsupport/llmodel.py b/rpython/jit/backend/llsupport/llmodel.py --- a/rpython/jit/backend/llsupport/llmodel.py +++ b/rpython/jit/backend/llsupport/llmodel.py @@ -336,6 +336,40 @@ # ____________________________________________________________ + def set_int_value(self, newframe, index, value): + """ Note that we keep index multiplied by WORD here mostly + for completeness with get_int_value and friends + """ + descr = self.gc_ll_descr.getframedescrs(self).arraydescr + ofs = self.unpack_arraydescr(descr) + self.write_int_at_mem(newframe, ofs + index, WORD, 1, value) + + def set_ref_value(self, newframe, index, value): + descr = self.gc_ll_descr.getframedescrs(self).arraydescr + ofs = self.unpack_arraydescr(descr) + self.write_ref_at_mem(newframe, ofs + index, value) + + def set_float_value(self, newframe, index, value): + descr = self.gc_ll_descr.getframedescrs(self).arraydescr + ofs = self.unpack_arraydescr(descr) + self.write_float_at_mem(newframe, ofs + index, value) + + @specialize.arg(1) + def get_ofs_of_frame_field(self, name): + descrs = self.gc_ll_descr.getframedescrs(self) + if name.startswith('jfi_'): + base_ofs = 0 # not relative to frame + else: + base_ofs = self.unpack_arraydescr(descrs.arraydescr) + ofs = self.unpack_fielddescr(getattr(descrs, name)) + return ofs - base_ofs + + def get_baseofs_of_frame_field(self): + descrs = self.gc_ll_descr.getframedescrs(self) + base_ofs = self.unpack_arraydescr(descrs.arraydescr) + return base_ofs + # ____________________________________________________________ + def bh_arraylen_gc(self, array, arraydescr): assert isinstance(arraydescr, ArrayDescr) diff --git a/rpython/jit/backend/model.py b/rpython/jit/backend/model.py --- a/rpython/jit/backend/model.py +++ b/rpython/jit/backend/model.py @@ -1,5 +1,4 @@ from rpython.rlib.debug import debug_start, debug_print, debug_stop -from rpython.jit.metainterp import compile from rpython.rtyper.lltypesystem import lltype class CPUTotalTracker(object): @@ -23,7 +22,6 @@ propagate_exception_descr = None def __init__(self): - self.__dict__.update(compile.make_done_loop_tokens()) self.tracker = CPUTotalTracker() def _freeze_(self): diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py --- a/rpython/jit/backend/test/runner_test.py +++ b/rpython/jit/backend/test/runner_test.py @@ -129,6 +129,8 @@ assert fail.identifier == 1 def test_compile_linear_float_loop(self): + if not self.cpu.supports_floats: + py.test.skip("requires floats") i0 = BoxFloat() i1 = BoxFloat() operations = [ @@ -2787,6 +2789,7 @@ looptoken = JitCellToken() looptoken.outermost_jitdriver_sd = FakeJitDriverSD() finish_descr = loop.operations[-1].getdescr() + self.cpu.done_with_this_frame_descr_int = BasicFinalDescr() self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken) ARGS = [lltype.Signed] * 10 RES = lltype.Signed @@ -2858,6 +2861,7 @@ finish_descr = loop.operations[-1].getdescr() looptoken = JitCellToken() looptoken.outermost_jitdriver_sd = FakeJitDriverSD() + self.cpu.done_with_this_frame_descr_float = BasicFinalDescr() self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken) args = [longlong.getfloatstorage(1.2), longlong.getfloatstorage(2.3)] @@ -2950,6 +2954,7 @@ loop = parse(ops) looptoken = JitCellToken() looptoken.outermost_jitdriver_sd = FakeJitDriverSD() + self.cpu.done_with_this_frame_descr_float = BasicFinalDescr() self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken) finish_descr = loop.operations[-1].getdescr() args = [longlong.getfloatstorage(1.25), diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -2271,7 +2271,9 @@ else: raise AssertionError(kind) - value = rffi.cast(lltype.Signed, cast_instance_to_gcref(value)) + gcref = cast_instance_to_gcref(value) + rgc._make_sure_does_not_move(gcref) + value = rffi.cast(lltype.Signed, gcref) base_ofs = self.cpu.get_baseofs_of_frame_field() ofs = self.cpu.get_ofs_of_frame_field('jf_descr') self.mc.CMP_mi((eax.value, base_ofs + ofs), value) @@ -2302,8 +2304,7 @@ fielddescr = jd.vable_token_descr assert isinstance(fielddescr, FieldDescr) vtoken_ofs = fielddescr.offset - vable_ofs = (jd.index_of_virtualizable + JITFRAME_FIXED_SIZE) * WORD - self.mc.MOV_rm(edx.value, (eax.value, vable_ofs)) + self.mc.MOV(edx, vloc) # we know vloc is on the current frame self.mc.MOV_mi((edx.value, vtoken_ofs), 0) # in the line above, TOKEN_NONE = 0 # diff --git a/rpython/jit/backend/x86/runner.py b/rpython/jit/backend/x86/runner.py --- a/rpython/jit/backend/x86/runner.py +++ b/rpython/jit/backend/x86/runner.py @@ -107,7 +107,7 @@ lst = [(i, history.getkind(ARG)[0]) for i, ARG in enumerate(ARGS)] kinds = unrolling_iterable(lst) - + def execute_token(executable_token, *args): clt = executable_token.compiled_loop_token assert len(args) == clt._debug_nbargs @@ -160,7 +160,7 @@ def invalidate_loop(self, looptoken): from rpython.jit.backend.x86 import codebuf - + for addr, tgt in looptoken.compiled_loop_token.invalidate_positions: mc = codebuf.MachineCodeBlockWrapper() mc.JMP_l(tgt) @@ -178,38 +178,6 @@ l[i].counter = ll_s.i return l - def set_int_value(self, newframe, index, value): - """ Note that we keep index multiplied by WORD here mostly - for completeness with get_int_value and friends - """ - descr = self.gc_ll_descr.getframedescrs(self).arraydescr - ofs = self.unpack_arraydescr(descr) - self.write_int_at_mem(newframe, ofs + index, WORD, 1, value) - - def set_ref_value(self, newframe, index, value): - descr = self.gc_ll_descr.getframedescrs(self).arraydescr - ofs = self.unpack_arraydescr(descr) - self.write_ref_at_mem(newframe, ofs + index, value) - - def set_float_value(self, newframe, index, value): - descr = self.gc_ll_descr.getframedescrs(self).arraydescr - ofs = self.unpack_arraydescr(descr) - self.write_float_at_mem(newframe, ofs + index, value) - - @specialize.arg(1) - def get_ofs_of_frame_field(self, name): - descrs = self.gc_ll_descr.getframedescrs(self) - if name.startswith('jfi_'): - base_ofs = 0 # not relative to frame - else: - base_ofs = self.unpack_arraydescr(descrs.arraydescr) - ofs = self.unpack_fielddescr(getattr(descrs, name)) - return ofs - base_ofs - - def get_baseofs_of_frame_field(self): - descrs = self.gc_ll_descr.getframedescrs(self) - base_ofs = self.unpack_arraydescr(descrs.arraydescr) - return base_ofs class CPU386(AbstractX86CPU): backend_name = 'x86' diff --git a/rpython/jit/metainterp/pyjitpl.py b/rpython/jit/metainterp/pyjitpl.py --- a/rpython/jit/metainterp/pyjitpl.py +++ b/rpython/jit/metainterp/pyjitpl.py @@ -1420,6 +1420,11 @@ self._addr2name_values = [] self.__dict__.update(compile.make_done_loop_tokens()) + for val in ['int', 'float', 'ref', 'void']: + fullname = 'done_with_this_frame_descr_' + val + setattr(self.cpu, fullname, getattr(self, fullname)) + d = self.exit_frame_with_exception_descr_ref + self.cpu.exit_frame_with_exception_descr_ref = d def _freeze_(self): return True diff --git a/rpython/rtyper/memory/gctransform/shadowstack.py b/rpython/rtyper/memory/gctransform/shadowstack.py --- a/rpython/rtyper/memory/gctransform/shadowstack.py +++ b/rpython/rtyper/memory/gctransform/shadowstack.py @@ -268,12 +268,6 @@ self.gc_start_fresh_new_state_ptr = getfn(gc_start_fresh_new_state, [], annmodel.s_None, inline=True) - # fish... - translator = gctransformer.translator - if hasattr(translator, '_jit2gc'): - from rpython.rlib._rffi_stacklet import _translate_pointer - root_iterator = translator._jit2gc['root_iterator'] - root_iterator.translateptr = _translate_pointer # ____________________________________________________________ @@ -366,23 +360,18 @@ def get_root_iterator(gctransformer): if hasattr(gctransformer, '_root_iterator'): return gctransformer._root_iterator # if already built - translator = gctransformer.translator - if (hasattr(translator, '_jit2gc') and - 'root_iterator' in translator._jit2gc): - result = translator._jit2gc['root_iterator'] - else: - class RootIterator(object): - def _freeze_(self): - return True - def setcontext(self, context): - pass - def nextleft(self, gc, start, addr): - while addr != start: - addr -= sizeofaddr - if gc.points_to_valid_gc_object(addr): - return addr - return llmemory.NULL - result = RootIterator() + class RootIterator(object): + def _freeze_(self): + return True + def setcontext(self, context): + pass + def nextleft(self, gc, start, addr): + while addr != start: + addr -= sizeofaddr + if gc.points_to_valid_gc_object(addr): + return addr + return llmemory.NULL + result = RootIterator() gctransformer._root_iterator = result return result _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit