Author: edelsohn Branch: ppc-jit-backend Changeset: r56860:6b8cbecfe63f Date: 2012-08-25 23:17 -0400 http://bitbucket.org/pypy/pypy/changeset/6b8cbecfe63f/
Log: update some arm files for comparison. diff --git a/pypy/jit/backend/arm/assembler.py b/pypy/jit/backend/arm/assembler.py --- a/pypy/jit/backend/arm/assembler.py +++ b/pypy/jit/backend/arm/assembler.py @@ -59,6 +59,7 @@ self._exit_code_addr = 0 self.current_clt = None self.malloc_slowpath = 0 + self.wb_slowpath = [0, 0, 0, 0] self._regalloc = None self.datablockwrapper = None self.propagate_exception_path = 0 @@ -107,6 +108,11 @@ # Addresses of functions called by new_xxx operations gc_ll_descr = self.cpu.gc_ll_descr gc_ll_descr.initialize() + self._build_wb_slowpath(False) + self._build_wb_slowpath(True) + if self.cpu.supports_floats: + self._build_wb_slowpath(False, withfloats=True) + self._build_wb_slowpath(True, withfloats=True) self._build_propagate_exception_path() if gc_ll_descr.get_malloc_slowpath_addr is not None: self._build_malloc_slowpath() @@ -286,6 +292,45 @@ rawstart = mc.materialize(self.cpu.asmmemmgr, []) self.stack_check_slowpath = rawstart + def _build_wb_slowpath(self, withcards, withfloats=False): + descr = self.cpu.gc_ll_descr.write_barrier_descr + if descr is None: + return + if not withcards: + func = descr.get_write_barrier_fn(self.cpu) + else: + if descr.jit_wb_cards_set == 0: + return + func = descr.get_write_barrier_from_array_fn(self.cpu) + if func == 0: + return + # + # This builds a helper function called from the slow path of + # write barriers. It must save all registers, and optionally + # all vfp registers. It takes a single argument which is in r0. + # It must keep stack alignment accordingly. + mc = ARMv7Builder() + # + if withfloats: + floats = r.caller_vfp_resp + else: + floats = [] + with saved_registers(mc, r.caller_resp + [r.ip, r.lr], floats): + mc.BL(func) + # + if withcards: + # A final TEST8 before the RET, for the caller. Careful to + # not follow this instruction with another one that changes + # the status of the CPU flags! + mc.LDRB_ri(r.ip.value, r.r0.value, + imm=descr.jit_wb_if_flag_byteofs) + mc.TST_ri(r.ip.value, imm=0x80) + # + mc.MOV_rr(r.pc.value, r.lr.value) + # + rawstart = mc.materialize(self.cpu.asmmemmgr, []) + self.wb_slowpath[withcards + 2 * withfloats] = rawstart + def setup_failure_recovery(self): @rgc.no_collect @@ -429,11 +474,14 @@ def _build_malloc_slowpath(self): mc = ARMv7Builder() - assert self.cpu.supports_floats + if self.cpu.supports_floats: + vfp_regs = r.all_vfp_regs + else: + vfp_regs = [] # We need to push two registers here because we are going to make a # call an therefore the stack needs to be 8-byte aligned mc.PUSH([r.ip.value, r.lr.value]) - with saved_registers(mc, [], r.all_vfp_regs): + with saved_registers(mc, [], vfp_regs): # At this point we know that the values we need to compute the size # are stored in r0 and r1. mc.SUB_rr(r.r0.value, r.r1.value, r.r0.value) diff --git a/pypy/jit/backend/arm/opassembler.py b/pypy/jit/backend/arm/opassembler.py --- a/pypy/jit/backend/arm/opassembler.py +++ b/pypy/jit/backend/arm/opassembler.py @@ -2,7 +2,7 @@ from pypy.jit.backend.arm import conditions as c from pypy.jit.backend.arm import registers as r from pypy.jit.backend.arm import shift -from pypy.jit.backend.arm.arch import WORD +from pypy.jit.backend.arm.arch import WORD, DOUBLE_WORD from pypy.jit.backend.arm.helper.assembler import (gen_emit_op_by_helper_call, gen_emit_op_unary_cmp, @@ -370,31 +370,69 @@ def _emit_call(self, force_index, adr, arglocs, fcond=c.AL, resloc=None, result_info=(-1,-1)): + if self.cpu.use_hf_abi: + stack_args, adr = self._setup_call_hf(force_index, adr, arglocs, fcond, resloc, result_info) + else: + stack_args, adr = self._setup_call_sf(force_index, adr, arglocs, fcond, resloc, result_info) + + #the actual call + #self.mc.BKPT() + if adr.is_imm(): + self.mc.BL(adr.value) + elif adr.is_stack(): + self.mov_loc_loc(adr, r.ip) + adr = r.ip + else: + assert adr.is_reg() + if adr.is_reg(): + self.mc.BLX(adr.value) + self.mark_gc_roots(force_index) + self._restore_sp(stack_args, fcond) + + # ensure the result is wellformed and stored in the correct location + if resloc is not None: + if resloc.is_vfp_reg() and not self.cpu.use_hf_abi: + # move result to the allocated register + self.mov_to_vfp_loc(r.r0, r.r1, resloc) + elif resloc.is_reg() and result_info != (-1, -1): + self._ensure_result_bit_extension(resloc, result_info[0], + result_info[1]) + return fcond + + def _restore_sp(self, stack_args, fcond): + # readjust the sp in case we passed some args on the stack + if len(stack_args) > 0: + n = 0 + for arg in stack_args: + if arg is None or arg.type != FLOAT: + n += WORD + else: + n += DOUBLE_WORD + self._adjust_sp(-n, fcond=fcond) + assert n % 8 == 0 # sanity check + + def _collect_stack_args_sf(self, arglocs): n_args = len(arglocs) reg_args = count_reg_args(arglocs) # all arguments past the 4th go on the stack - n = 0 # used to count the number of words pushed on the stack, so we - #can later modify the SP back to its original value + # first we need to prepare the list so it stays aligned + stack_args = [] + count = 0 if n_args > reg_args: - # first we need to prepare the list so it stays aligned - stack_args = [] - count = 0 for i in range(reg_args, n_args): arg = arglocs[i] if arg.type != FLOAT: count += 1 - n += WORD else: - n += 2 * WORD if count % 2 != 0: stack_args.append(None) - n += WORD count = 0 stack_args.append(arg) if count % 2 != 0: - n += WORD stack_args.append(None) + return stack_args + def _push_stack_args(self, stack_args): #then we push every thing on the stack for i in range(len(stack_args) - 1, -1, -1): arg = stack_args[i] @@ -402,6 +440,13 @@ self.mc.PUSH([r.ip.value]) else: self.regalloc_push(arg) + + def _setup_call_sf(self, force_index, adr, arglocs, fcond=c.AL, + resloc=None, result_info=(-1,-1)): + n_args = len(arglocs) + reg_args = count_reg_args(arglocs) + stack_args = self._collect_stack_args_sf(arglocs) + self._push_stack_args(stack_args) # collect variables that need to go in registers and the registers they # will be stored in num = 0 @@ -440,32 +485,55 @@ for loc, reg in float_locs: self.mov_from_vfp_loc(loc, reg, r.all_regs[reg.value + 1]) + return stack_args, adr - #the actual call - if adr.is_imm(): - self.mc.BL(adr.value) - elif adr.is_stack(): - self.mov_loc_loc(adr, r.ip) - adr = r.ip - else: - assert adr.is_reg() - if adr.is_reg(): - self.mc.BLX(adr.value) - self.mark_gc_roots(force_index) - # readjust the sp in case we passed some args on the stack - if n > 0: - self._adjust_sp(-n, fcond=fcond) - # ensure the result is wellformed and stored in the correct location - if resloc is not None: - if resloc.is_vfp_reg(): - # move result to the allocated register - self.mov_to_vfp_loc(r.r0, r.r1, resloc) - elif result_info != (-1, -1): - self._ensure_result_bit_extension(resloc, result_info[0], - result_info[1]) + def _setup_call_hf(self, force_index, adr, arglocs, fcond=c.AL, + resloc=None, result_info=(-1,-1)): + n_reg_args = n_vfp_args = 0 + non_float_locs = [] + non_float_regs = [] + float_locs = [] + float_regs = [] + stack_args = [] + count = 0 # stack alignment counter + for arg in arglocs: + if arg.type != FLOAT: + if len(non_float_regs) < len(r.argument_regs): + reg = r.argument_regs[len(non_float_regs)] + non_float_locs.append(arg) + non_float_regs.append(reg) + else: # non-float argument that needs to go on the stack + count += 1 + stack_args.append(arg) + else: + if len(float_regs) < len(r.vfp_argument_regs): + reg = r.vfp_argument_regs[len(float_regs)] + float_locs.append(arg) + float_regs.append(reg) + else: # float argument that needs to go on the stack + if count % 2 != 0: + stack_args.append(None) + count = 0 + stack_args.append(arg) + # align the stack + if count % 2 != 0: + stack_args.append(None) + self._push_stack_args(stack_args) + # Check that the address of the function we want to call is not + # currently stored in one of the registers used to pass the arguments. + # If this happens to be the case we remap the register to r4 and use r4 + # to call the function + if adr in non_float_regs: + non_float_locs.append(adr) + non_float_regs.append(r.r4) + adr = r.r4 + # remap values stored in core registers + remap_frame_layout(self, non_float_locs, non_float_regs, r.ip) + # remap values stored in vfp registers + remap_frame_layout(self, float_locs, float_regs, r.vfp_ip) - return fcond + return stack_args, adr def emit_op_same_as(self, op, arglocs, regalloc, fcond): argloc, resloc = arglocs @@ -506,32 +574,30 @@ def emit_op_cond_call_gc_wb(self, op, arglocs, regalloc, fcond): # Write code equivalent to write_barrier() in the GC: it checks - # a flag in the object at arglocs[0], and if set, it calls the - # function remember_young_pointer() from the GC. The two arguments - # to the call are in arglocs[:2]. The rest, arglocs[2:], contains - # registers that need to be saved and restored across the call. + # a flag in the object at arglocs[0], and if set, it calls a + # helper piece of assembler. The latter saves registers as needed + # and call the function jit_remember_young_pointer() from the GC. descr = op.getdescr() if we_are_translated(): cls = self.cpu.gc_ll_descr.has_write_barrier_class() assert cls is not None and isinstance(descr, cls) - + # opnum = op.getopnum() - if opnum == rop.COND_CALL_GC_WB: - N = 2 - addr = descr.get_write_barrier_fn(self.cpu) - card_marking = False - elif opnum == rop.COND_CALL_GC_WB_ARRAY: - N = 3 - addr = descr.get_write_barrier_from_array_fn(self.cpu) - assert addr != 0 - card_marking = descr.jit_wb_cards_set != 0 - else: - raise AssertionError(opnum) + card_marking = False + mask = descr.jit_wb_if_flag_singlebyte + if opnum == rop.COND_CALL_GC_WB_ARRAY and descr.jit_wb_cards_set != 0: + # assumptions the rest of the function depends on: + assert (descr.jit_wb_cards_set_byteofs == + descr.jit_wb_if_flag_byteofs) + assert descr.jit_wb_cards_set_singlebyte == -0x80 + card_marking = True + mask = descr.jit_wb_if_flag_singlebyte | -0x80 + # loc_base = arglocs[0] - assert check_imm_arg(descr.jit_wb_if_flag_byteofs) - assert check_imm_arg(descr.jit_wb_if_flag_singlebyte) - self.mc.LDRB_ri(r.ip.value, loc_base.value, imm=descr.jit_wb_if_flag_byteofs) - self.mc.TST_ri(r.ip.value, imm=descr.jit_wb_if_flag_singlebyte) + self.mc.LDRB_ri(r.ip.value, loc_base.value, + imm=descr.jit_wb_if_flag_byteofs) + mask &= 0xFF + self.mc.TST_ri(r.ip.value, imm=mask) jz_location = self.mc.currpos() self.mc.BKPT() @@ -539,68 +605,80 @@ # for cond_call_gc_wb_array, also add another fast path: # if GCFLAG_CARDS_SET, then we can just set one bit and be done if card_marking: - assert check_imm_arg(descr.jit_wb_cards_set_byteofs) - assert check_imm_arg(descr.jit_wb_cards_set_singlebyte) - self.mc.LDRB_ri(r.ip.value, loc_base.value, imm=descr.jit_wb_cards_set_byteofs) - self.mc.TST_ri(r.ip.value, imm=descr.jit_wb_cards_set_singlebyte) - # - jnz_location = self.mc.currpos() + # GCFLAG_CARDS_SET is in this byte at 0x80 + self.mc.TST_ri(r.ip.value, imm=0x80) + + js_location = self.mc.currpos() # + self.mc.BKPT() + else: + js_location = 0 + + # Write only a CALL to the helper prepared in advance, passing it as + # argument the address of the structure we are writing into + # (the first argument to COND_CALL_GC_WB). + helper_num = card_marking + if self._regalloc.vfprm.reg_bindings: + helper_num += 2 + if self.wb_slowpath[helper_num] == 0: # tests only + assert not we_are_translated() + self.cpu.gc_ll_descr.write_barrier_descr = descr + self._build_wb_slowpath(card_marking, + bool(self._regalloc.vfprm.reg_bindings)) + assert self.wb_slowpath[helper_num] != 0 + # + if loc_base is not r.r0: + # push two registers to keep stack aligned + self.mc.PUSH([r.r0.value, loc_base.value]) + remap_frame_layout(self, [loc_base], [r.r0], r.ip) + self.mc.BL(self.wb_slowpath[helper_num]) + if loc_base is not r.r0: + self.mc.POP([r.r0.value, loc_base.value]) + + if card_marking: + # The helper ends again with a check of the flag in the object. So + # here, we can simply write again a conditional jump, which will be + # taken if GCFLAG_CARDS_SET is still not set. + jns_location = self.mc.currpos() self.mc.BKPT() # - else: - jnz_location = 0 - - # the following is supposed to be the slow path, so whenever possible - # we choose the most compact encoding over the most efficient one. - with saved_registers(self.mc, r.caller_resp): - if N == 2: - callargs = [r.r0, r.r1] - else: - callargs = [r.r0, r.r1, r.r2] - remap_frame_layout(self, arglocs, callargs, r.ip) - func = rffi.cast(lltype.Signed, addr) - # misaligned stack in the call, but it's ok because the write - # barrier is not going to call anything more. - self.mc.BL(func) - - # if GCFLAG_CARDS_SET, then we can do the whole thing that would - # be done in the CALL above with just four instructions, so here - # is an inline copy of them - if card_marking: - jmp_location = self.mc.get_relative_pos() - self.mc.BKPT() # jump to the exit, patched later - # patch the JNZ above + # patch the JS above offset = self.mc.currpos() - pmc = OverwritingBuilder(self.mc, jnz_location, WORD) - pmc.B_offs(offset, c.NE) + pmc = OverwritingBuilder(self.mc, js_location, WORD) + pmc.B_offs(offset, c.NE) # We want to jump if the z flag is not set # + # case GCFLAG_CARDS_SET: emit a few instructions to do + # directly the card flag setting loc_index = arglocs[1] assert loc_index.is_reg() - tmp1 = arglocs[-2] - tmp2 = arglocs[-1] - #byteofs - s = 3 + descr.jit_wb_card_page_shift - self.mc.MVN_rr(r.lr.value, loc_index.value, - imm=s, shifttype=shift.LSR) - # byte_index - self.mc.MOV_ri(r.ip.value, imm=7) - self.mc.AND_rr(tmp1.value, r.ip.value, loc_index.value, - imm=descr.jit_wb_card_page_shift, shifttype=shift.LSR) + # must save the register loc_index before it is mutated + self.mc.PUSH([loc_index.value]) + tmp1 = loc_index + tmp2 = arglocs[2] + # lr = byteofs + s = 3 + descr.jit_wb_card_page_shift + self.mc.MVN_rr(r.lr.value, loc_index.value, + imm=s, shifttype=shift.LSR) + + # tmp1 = byte_index + self.mc.MOV_ri(r.ip.value, imm=7) + self.mc.AND_rr(tmp1.value, r.ip.value, loc_index.value, + imm=descr.jit_wb_card_page_shift, shifttype=shift.LSR) + + # set the bit + self.mc.MOV_ri(tmp2.value, imm=1) + self.mc.LDRB_rr(r.ip.value, loc_base.value, r.lr.value) + self.mc.ORR_rr_sr(r.ip.value, r.ip.value, tmp2.value, + tmp1.value, shifttype=shift.LSL) + self.mc.STRB_rr(r.ip.value, loc_base.value, r.lr.value) + # done + self.mc.POP([loc_index.value]) + # + # + # patch the JNS above + offset = self.mc.currpos() + pmc = OverwritingBuilder(self.mc, jns_location, WORD) + pmc.B_offs(offset, c.EQ) # We want to jump if the z flag is set - # set the bit - self.mc.MOV_ri(tmp2.value, imm=1) - self.mc.LDRB_rr(r.ip.value, loc_base.value, r.lr.value) - self.mc.ORR_rr_sr(r.ip.value, r.ip.value, tmp2.value, - tmp1.value, shifttype=shift.LSL) - self.mc.STRB_rr(r.ip.value, loc_base.value, r.lr.value) - # done - - # patch the JMP above - offset = self.mc.currpos() - pmc = OverwritingBuilder(self.mc, jmp_location, WORD) - pmc.B_offs(offset) - # - # patch the JZ above offset = self.mc.currpos() pmc = OverwritingBuilder(self.mc, jz_location, WORD) pmc.B_offs(offset, c.EQ) _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit