Author: Maciej Fijalkowski <fij...@gmail.com> Branch: arm64 Changeset: r95918:dba5b910fb11 Date: 2019-02-09 14:26 +0000 http://bitbucket.org/pypy/pypy/changeset/dba5b910fb11/
Log: (arigo, fijal, rodolph, bivab) fight until the test nearly passes diff --git a/rpython/jit/backend/aarch64/assembler.py b/rpython/jit/backend/aarch64/assembler.py --- a/rpython/jit/backend/aarch64/assembler.py +++ b/rpython/jit/backend/aarch64/assembler.py @@ -4,9 +4,9 @@ #from rpython.jit.backend.arm.locations import imm, StackLocation, get_fp_offset #from rpython.jit.backend.arm.helper.regalloc import VMEM_imm_size from rpython.jit.backend.aarch64.opassembler import ResOpAssembler -from rpython.jit.backend.aarch64.regalloc import Regalloc +from rpython.jit.backend.aarch64.regalloc import (Regalloc, # CoreRegisterManager, check_imm_arg, VFPRegisterManager, -# operations as regalloc_operations) + operations as regalloc_operations) #from rpython.jit.backend.arm import callbuilder from rpython.jit.backend.aarch64 import registers as r from rpython.jit.backend.llsupport import jitframe @@ -30,6 +30,7 @@ def assemble_loop(self, jd_id, unique_id, logger, loopname, inputargs, operations, looptoken, log): clt = CompiledLoopToken(self.cpu, looptoken.number) + clt._debug_nbargs = len(inputargs) looptoken.compiled_loop_token = clt if not we_are_translated(): @@ -127,6 +128,12 @@ self.target_tokens_currently_compiling = {} self.frame_depth_to_patch = [] + def teardown(self): + self.current_clt = None + self._regalloc = None + self.mc = None + self.pending_guards = None + def _build_failure_recovery(self, exc, withfloats=False): pass # XXX @@ -148,9 +155,38 @@ def _check_frame_depth_debug(self, mc): pass + def update_frame_depth(self, frame_depth): + baseofs = self.cpu.get_baseofs_of_frame_field() + self.current_clt.frame_info.update_frame_depth(baseofs, frame_depth) + + def write_pending_failure_recoveries(self): + pass # XXX + def reserve_gcref_table(self, allgcrefs): pass + def materialize_loop(self, looptoken): + self.datablockwrapper.done() # finish using cpu.asmmemmgr + self.datablockwrapper = None + allblocks = self.get_asmmemmgr_blocks(looptoken) + size = self.mc.get_relative_pos() + res = self.mc.materialize(self.cpu, allblocks, + self.cpu.gc_ll_descr.gcrootmap) + #self.cpu.codemap.register_codemap( + # self.codemap.get_final_bytecode(res, size)) + return res + + def patch_gcref_table(self, looptoken, rawstart): + pass + + def process_pending_guards(self, rawstart): + pass + + def fixup_target_tokens(self, rawstart): + for targettoken in self.target_tokens_currently_compiling: + targettoken._ll_loop_code += rawstart + self.target_tokens_currently_compiling = None + def _call_header_with_stack_check(self): self._call_header() if self.stack_check_slowpath == 0: @@ -192,3 +228,160 @@ gcrootmap = self.cpu.gc_ll_descr.gcrootmap if gcrootmap and gcrootmap.is_shadow_stack: self.gen_shadowstack_header(gcrootmap) + + def _assemble(self, regalloc, inputargs, operations): + #self.guard_success_cc = c.cond_none + regalloc.compute_hint_frame_locations(operations) + self._walk_operations(inputargs, operations, regalloc) + #assert self.guard_success_cc == c.cond_none + frame_depth = regalloc.get_final_frame_depth() + jump_target_descr = regalloc.jump_target_descr + if jump_target_descr is not None: + tgt_depth = jump_target_descr._arm_clt.frame_info.jfi_frame_depth + target_frame_depth = tgt_depth - JITFRAME_FIXED_SIZE + frame_depth = max(frame_depth, target_frame_depth) + return frame_depth + + def _walk_operations(self, inputargs, operations, regalloc): + self._regalloc = regalloc + regalloc.operations = operations + while regalloc.position() < len(operations) - 1: + regalloc.next_instruction() + i = regalloc.position() + op = operations[i] + self.mc.mark_op(op) + opnum = op.getopnum() + if rop.has_no_side_effect(opnum) and op not in regalloc.longevity: + regalloc.possibly_free_vars_for_op(op) + elif not we_are_translated() and op.getopnum() == rop.FORCE_SPILL: + regalloc.prepare_force_spill(op) + else: + arglocs = regalloc_operations[opnum](regalloc, op) + if arglocs is not None: + asm_operations[opnum](self, op, arglocs) + if rop.is_guard(opnum): + regalloc.possibly_free_vars(op.getfailargs()) + if op.type != 'v': + regalloc.possibly_free_var(op) + regalloc.possibly_free_vars_for_op(op) + regalloc.free_temp_vars() + regalloc._check_invariants() + if not we_are_translated(): + self.mc.BRK() + self.mc.mark_op(None) # end of the loop + regalloc.operations = None + + # regalloc support + def load(self, loc, value): + """load an immediate value into a register""" + assert (loc.is_core_reg() and value.is_imm() + or loc.is_vfp_reg() and value.is_imm_float()) + if value.is_imm(): + self.mc.gen_load_int(loc.value, value.getint()) + elif value.is_imm_float(): + self.mc.gen_load_int(r.ip.value, value.getint()) + self.mc.VLDR(loc.value, r.ip.value) + + def _mov_stack_to_loc(self, prev_loc, loc): + offset = prev_loc.value + if loc.is_core_reg(): + assert prev_loc.type != FLOAT, 'trying to load from an \ + incompatible location into a core register' + # unspill a core register + assert 0 <= offset <= (1<<15) - 1 + self.mc.LDR_ri(loc.value, r.fp.value, offset) + return + xxx + # elif loc.is_vfp_reg(): + # assert prev_loc.type == FLOAT, 'trying to load from an \ + # incompatible location into a float register' + # # load spilled value into vfp reg + # is_imm = check_imm_arg(offset) + # helper, save = self.get_tmp_reg() + # save_helper = not is_imm and save + # elif loc.is_raw_sp(): + # assert (loc.type == prev_loc.type == FLOAT + # or (loc.type != FLOAT and prev_loc.type != FLOAT)) + # tmp = loc + # if loc.is_float(): + # loc = r.vfp_ip + # else: + # loc, save_helper = self.get_tmp_reg() + # assert not save_helper + # helper, save_helper = self.get_tmp_reg([loc]) + # assert not save_helper + # else: + # assert 0, 'unsupported case' + + # if save_helper: + # self.mc.PUSH([helper.value], cond=cond) + # self.load_reg(self.mc, loc, r.fp, offset, cond=cond, helper=helper) + # if save_helper: + # self.mc.POP([helper.value], cond=cond) + + def regalloc_mov(self, prev_loc, loc): + """Moves a value from a previous location to some other location""" + if prev_loc.is_imm(): + return self._mov_imm_to_loc(prev_loc, loc) + elif prev_loc.is_core_reg(): + self._mov_reg_to_loc(prev_loc, loc) + elif prev_loc.is_stack(): + self._mov_stack_to_loc(prev_loc, loc) + elif prev_loc.is_imm_float(): + self._mov_imm_float_to_loc(prev_loc, loc) + elif prev_loc.is_vfp_reg(): + self._mov_vfp_reg_to_loc(prev_loc, loc) + elif prev_loc.is_raw_sp(): + self._mov_raw_sp_to_loc(prev_loc, loc) + else: + assert 0, 'unsupported case' + mov_loc_loc = regalloc_mov + + def gen_func_epilog(self, mc=None): + gcrootmap = self.cpu.gc_ll_descr.gcrootmap + if mc is None: + mc = self.mc + if gcrootmap and gcrootmap.is_shadow_stack: + self.gen_footer_shadowstack(gcrootmap, mc) + if self.cpu.supports_floats: + XXX + # mc.VPOP([reg.value for reg in r.callee_saved_vfp_registers]) + + # pop all callee saved registers + + stack_size = (len(r.callee_saved_registers) + 2) * WORD + for i in range(0, len(r.callee_saved_registers), 2): + mc.LDP_rri(r.callee_saved_registers[i].value, + r.callee_saved_registers[i + 1].value, + r.sp.value, + (i + 2) * WORD) + mc.LDP_rr_postindex(r.fp.value, r.lr.value, r.sp.value, stack_size) + + mc.RET_r(r.lr.value) + + + +def not_implemented(msg): + msg = '[ARM/asm] %s\n' % msg + if we_are_translated(): + llop.debug_print(lltype.Void, msg) + raise NotImplementedError(msg) + + +def notimplemented_op(self, op, arglocs, regalloc): + print "[ARM/asm] %s not implemented" % op.getopname() + raise NotImplementedError(op) + + +asm_operations = [notimplemented_op] * (rop._LAST + 1) +asm_extra_operations = {} + +for name, value in ResOpAssembler.__dict__.iteritems(): + if name.startswith('emit_opx_'): + opname = name[len('emit_opx_'):] + num = getattr(EffectInfo, 'OS_' + opname.upper()) + asm_extra_operations[num] = value + elif name.startswith('emit_op_'): + opname = name[len('emit_op_'):] + num = getattr(rop, opname.upper()) + asm_operations[num] = value diff --git a/rpython/jit/backend/aarch64/codebuilder.py b/rpython/jit/backend/aarch64/codebuilder.py --- a/rpython/jit/backend/aarch64/codebuilder.py +++ b/rpython/jit/backend/aarch64/codebuilder.py @@ -1,7 +1,11 @@ +from rpython.rlib.objectmodel import we_are_translated from rpython.jit.backend.llsupport.asmmemmgr import BlockBuilderMixin from rpython.jit.backend.aarch64.locations import RegisterLocation from rpython.jit.backend.aarch64 import registers as r +from rpython.rlib.rarithmetic import intmask +from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.tool.udir import udir class AbstractAarch64Builder(object): @@ -28,6 +32,9 @@ self.write32((base << 22) | ((0x7F & (offset >> 3)) << 15) | (reg2 << 10) | (rn << 5) | reg1) + def MOV_r_u16(self, rd, immed, shift): # u16 is an unsigned 16-bit + self.MOVK_r_u16(rd, immed, shift) + def MOV_rr(self, rd, rn): self.ORR_rr(rd, r.xzr.value, rn) @@ -36,12 +43,68 @@ self.write32((base << 21) | (rm << 16) | (rn << 5) | rd) + def MOVK_r_u16(self, rd, immed, shift): + base = 0b111100101 + assert 0 <= immed < 1 << 16 + assert shift in (0, 16, 32, 48) + self.write32((base << 23) | (shift >> 4 << 21) | (immed << 5) | rd) + + def MOVN_r_u16(self, rd, immed): + base = 0b10010010100 + assert 0 <= immed < 1 << 16 + self.write32((base << 21) | (immed << 5) | rd) + def ADD_ri(self, rd, rn, constant): base = 0b1001000100 assert 0 <= constant < 4096 self.write32((base << 22) | (constant << 10) | (rn << 5) | rd) + def LDP_rri(self, reg1, reg2, rn, offset): + base = 0b1010100101 + assert -512 <= offset < 512 + assert offset & 0x7 == 0 + self.write32((base << 22) | ((0x7F & (offset >> 3)) << 15) | + (reg2 << 10) | (rn << 5) | reg1) + + def LDP_rr_postindex(self, reg1, reg2, rn, offset): + base = 0b1010100011 + assert -512 <= offset < 512 + assert offset & 0x7 == 0 + self.write32((base << 22) | ((0x7F & (offset >> 3)) << 15) | + (reg2 << 10) | (rn << 5) | reg1) + + def LDR_ri(self, rt, rn, immed): + base = 0b1111100101 + assert 0 <= immed <= 1<<15 + assert immed & 0x7 == 0 + immed >>= 3 + self.write32((base << 22) | (immed << 10) | (rn << 5) | rt) + + def ADD_rr(self, rd, rn, rm): + base = 0b10001011000 + self.write32((base << 21) | (rm << 16) | (rn << 5) | (rd)) + + def BRK(self): + self.write32(0b11010100001 << 21) + + def gen_load_int(self, r, value): + """r is the register number, value is the value to be loaded to the + register""" + shift = 0 + if value < 0: + value = ~value + nxt = intmask(value & 0xFFFF) + self.MOVN_r_u16(r, nxt) + value >>= 16 + shift += 16 + while value: + nxt = intmask(value & 0xFFFF) + self.MOV_r_u16(r, nxt, shift) + value >>= 16 + shift += 16 + + class InstrBuilder(BlockBuilderMixin, AbstractAarch64Builder): def __init__(self, arch_version=7): diff --git a/rpython/jit/backend/aarch64/locations.py b/rpython/jit/backend/aarch64/locations.py --- a/rpython/jit/backend/aarch64/locations.py +++ b/rpython/jit/backend/aarch64/locations.py @@ -69,6 +69,21 @@ def is_float(self): return True +class ImmLocation(AssemblerLocation): + _immutable_ = True + + def __init__(self, value): + self.value = value + + def getint(self): + return self.value + + def __repr__(self): + return "imm(%d)" % (self.value) + + def is_imm(self): + return True + class StackLocation(AssemblerLocation): _immutable_ = True diff --git a/rpython/jit/backend/aarch64/opassembler.py b/rpython/jit/backend/aarch64/opassembler.py --- a/rpython/jit/backend/aarch64/opassembler.py +++ b/rpython/jit/backend/aarch64/opassembler.py @@ -1,5 +1,35 @@ +from rpython.jit.backend.aarch64 import registers as r from rpython.jit.backend.llsupport.assembler import GuardToken, BaseAssembler class ResOpAssembler(BaseAssembler): - pass + def emit_op_int_add(self, op, arglocs): + return self.int_add_impl(op, arglocs) + + emit_op_nursery_ptr_increment = emit_op_int_add + + def int_add_impl(self, op, arglocs, ovfcheck=False): + l0, l1, res = arglocs + if ovfcheck: + XXX + s = 1 + else: + s = 0 + if l0.is_imm(): + self.mc.ADD_ri(res.value, l1.value, imm=l0.value, s=s) + elif l1.is_imm(): + self.mc.ADD_ri(res.value, l0.value, imm=l1.value, s=s) + else: + self.mc.ADD_rr(res.value, l0.value, l1.value) + + def emit_op_increment_debug_counter(self, op, arglocs): + return # XXXX + base_loc, value_loc = arglocs + self.mc.LDR_ri(value_loc.value, base_loc.value, 0) + self.mc.ADD_ri(value_loc.value, value_loc.value, 1) + self.mc.STR_ri(value_loc.value, base_loc.value, 0) + + def emit_op_finish(self, op, arglocs): + self.mc.MOV_rr(r.x0.value, r.fp.value) + # exit function + self.gen_func_epilog() diff --git a/rpython/jit/backend/aarch64/regalloc.py b/rpython/jit/backend/aarch64/regalloc.py --- a/rpython/jit/backend/aarch64/regalloc.py +++ b/rpython/jit/backend/aarch64/regalloc.py @@ -6,9 +6,34 @@ ConstPtr, INT, REF, FLOAT) from rpython.jit.metainterp.history import TargetToken +from rpython.jit.metainterp.resoperation import rop from rpython.jit.backend.llsupport.regalloc import FrameManager, \ RegisterManager, TempVar, compute_vars_longevity, BaseRegalloc, \ get_scale +from rpython.rtyper.lltypesystem import lltype, rffi, rstr, llmemory +from rpython.jit.backend.aarch64 import registers as r + + +class TempInt(TempVar): + type = INT + + def __repr__(self): + return "<TempInt at %s>" % (id(self),) + + +class TempPtr(TempVar): + type = REF + + def __repr__(self): + return "<TempPtr at %s>" % (id(self),) + + +class TempFloat(TempVar): + type = FLOAT + + def __repr__(self): + return "<TempFloat at %s>" % (id(self),) + class ARMFrameManager(FrameManager): @@ -168,3 +193,152 @@ for var in vars: if var is not None: # xxx kludgy self.possibly_free_var(var) + + def get_scratch_reg(self, type, forbidden_vars=[], selected_reg=None): + if type == FLOAT: + return self.vfprm.get_scratch_reg(type, forbidden_vars, + selected_reg) + else: + return self.rm.get_scratch_reg(type, forbidden_vars, selected_reg) + + def get_free_reg(self): + return self.rm.get_free_reg() + + def free_temp_vars(self): + self.rm.free_temp_vars() + self.vfprm.free_temp_vars() + + def make_sure_var_in_reg(self, var, forbidden_vars=[], + selected_reg=None, need_lower_byte=False): + if var.type == FLOAT: + return self.vfprm.make_sure_var_in_reg(var, forbidden_vars, + selected_reg, need_lower_byte) + else: + return self.rm.make_sure_var_in_reg(var, forbidden_vars, + selected_reg, need_lower_byte) + + def convert_to_imm(self, value): + if isinstance(value, ConstInt): + return self.rm.convert_to_imm(value) + else: + assert isinstance(value, ConstFloat) + return self.vfprm.convert_to_imm(value) + + def compute_hint_frame_locations(self, operations): + # optimization only: fill in the 'hint_frame_locations' dictionary + # of rm and xrm based on the JUMP at the end of the loop, by looking + # at where we would like the boxes to be after the jump. + op = operations[-1] + if op.getopnum() != rop.JUMP: + return + self.final_jump_op = op + descr = op.getdescr() + assert isinstance(descr, TargetToken) + if descr._ll_loop_code != 0: + # if the target LABEL was already compiled, i.e. if it belongs + # to some already-compiled piece of code + self._compute_hint_frame_locations_from_descr(descr) + #else: + # The loop ends in a JUMP going back to a LABEL in the same loop. + # We cannot fill 'hint_frame_locations' immediately, but we can + # wait until the corresponding prepare_op_label() to know where the + # we would like the boxes to be after the jump. + + def _compute_hint_frame_locations_from_descr(self, descr): + arglocs = self.assembler.target_arglocs(descr) + jump_op = self.final_jump_op + assert len(arglocs) == jump_op.numargs() + for i in range(jump_op.numargs()): + box = jump_op.getarg(i) + if not isinstance(box, Const): + loc = arglocs[i] + if loc is not None and loc.is_stack(): + self.frame_manager.hint_frame_pos[box] = ( + self.fm.get_loc_index(loc)) + + def position(self): + return self.rm.position + + def next_instruction(self): + self.rm.next_instruction() + self.vfprm.next_instruction() + + def prepare_op_increment_debug_counter(self, op): + boxes = op.getarglist() + a0, = boxes + base_loc = self.make_sure_var_in_reg(a0, boxes) + value_loc = self.get_scratch_reg(INT, boxes) + self.free_temp_vars() + return [base_loc, value_loc] + + def _prepare_op_int_add(self, op, fcond): + XXX + boxes = op.getarglist() + a0, a1 = boxes + imm_a0 = check_imm_box(a0) + imm_a1 = check_imm_box(a1) + if not imm_a0 and imm_a1: + l0 = self.make_sure_var_in_reg(a0, boxes) + l1 = self.convert_to_imm(a1) + elif imm_a0 and not imm_a1: + l0 = self.convert_to_imm(a0) + l1 = self.make_sure_var_in_reg(a1, boxes) + else: + l0 = self.make_sure_var_in_reg(a0, boxes) + l1 = self.make_sure_var_in_reg(a1, boxes) + return [l0, l1] + + def prepare_op_int_add(self, op): + arg0 = op.getarg(0) + arg1 = op.getarg(1) + # XXX support immediates + l0 = self.make_sure_var_in_reg(arg0, op.getarglist()) + l1 = self.make_sure_var_in_reg(arg1, op.getarglist()) + self.possibly_free_vars_for_op(op) + res = self.force_allocate_reg(op) + return [l0, l1, res] + + def prepare_op_finish(self, op): + # the frame is in fp, but we have to point where in the frame is + # the potential argument to FINISH + if op.numargs() == 1: + loc = self.make_sure_var_in_reg(op.getarg(0)) + locs = [loc] + else: + locs = [] + return locs + + prepare_op_nursery_ptr_increment = prepare_op_int_add + + def force_allocate_reg(self, var, forbidden_vars=[], selected_reg=None): + if var.type == FLOAT: + return self.vfprm.force_allocate_reg(var, forbidden_vars, + selected_reg) + else: + return self.rm.force_allocate_reg(var, forbidden_vars, + selected_reg) + + def _check_invariants(self): + self.rm._check_invariants() + self.vfprm._check_invariants() + + def get_final_frame_depth(self): + return self.frame_manager.get_frame_depth() + + +def notimplemented(self, op): + print "[ARM64/regalloc] %s not implemented" % op.getopname() + raise NotImplementedError(op) + + +operations = [notimplemented] * (rop._LAST + 1) + + +for key, value in rop.__dict__.items(): + key = key.lower() + if key.startswith('_'): + continue + methname = 'prepare_op_%s' % key + if hasattr(Regalloc, methname): + func = getattr(Regalloc, methname).im_func + operations[value] = func diff --git a/rpython/jit/backend/aarch64/registers.py b/rpython/jit/backend/aarch64/registers.py --- a/rpython/jit/backend/aarch64/registers.py +++ b/rpython/jit/backend/aarch64/registers.py @@ -15,6 +15,9 @@ lr = x30 fp = x29 + +# scratch registers that we use internally, but don't save them +# nor we use them for regalloc ip1 = x17 ip0 = x16 diff --git a/rpython/jit/backend/aarch64/test/test_instr_builder.py b/rpython/jit/backend/aarch64/test/test_instr_builder.py --- a/rpython/jit/backend/aarch64/test/test_instr_builder.py +++ b/rpython/jit/backend/aarch64/test/test_instr_builder.py @@ -45,3 +45,24 @@ cb = CodeBuilder() cb.MOV_rr(r1.value, r2.value) assert cb.hexdump() == assemble("MOV %r, %r" % (r1, r2)) + + @settings(max_examples=20) + @given(r1=st.sampled_from(r.registers), + immed=st.integers(min_value=0, max_value=(1<<16) - 1)) + def test_MOVN(self, r1, immed): + cb = CodeBuilder() + cb.MOVN_r_u16(r1.value, immed) + assert cb.hexdump() == assemble("MOV %r, %d" % (r1, ~immed)) + + @settings(max_examples=20) + @given(r1=st.sampled_from(r.registers), + immed=st.integers(min_value=0, max_value=(1<<16) - 1), + shift=st.sampled_from([0, 16, 32, 48])) + def test_MOV_r_u16(self, r1, immed, shift): + cb = CodeBuilder() + cb.MOV_r_u16(r1.value, immed, shift) + if shift == 0: + assert cb.hexdump() == assemble("MOVK %r, %d" % (r1, immed)) + else: + assert cb.hexdump() == assemble("MOVK %r, %d, lsl %d" % (r1, immed, shift)) + _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit