Author: hager <sven.ha...@uni-duesseldorf.de> Branch: ppc-jit-backend Changeset: r51007:634bd8357b6e Date: 2012-01-04 14:02 +0100 http://bitbucket.org/pypy/pypy/changeset/634bd8357b6e/
Log: (bivab, hager): adjusted code to meet the latest refactoring, made first test pass diff --git a/pypy/jit/backend/ppc/ppcgen/codebuilder.py b/pypy/jit/backend/ppc/ppcgen/codebuilder.py --- a/pypy/jit/backend/ppc/ppcgen/codebuilder.py +++ b/pypy/jit/backend/ppc/ppcgen/codebuilder.py @@ -997,11 +997,11 @@ self.stdx(source_reg.value, 0, r.SCRATCH.value) self.free_scratch_reg() - def b_offset(self, offset): + def b_offset(self, target): curpos = self.currpos() - target_ofs = offset - curpos - assert target_ofs < (1 << 24) - self.b(target_ofs) + offset = target - curpos + assert offset < (1 << 24) + self.b(offset) def b_cond_offset(self, offset, condition): BI = condition[0] diff --git a/pypy/jit/backend/ppc/ppcgen/opassembler.py b/pypy/jit/backend/ppc/ppcgen/opassembler.py --- a/pypy/jit/backend/ppc/ppcgen/opassembler.py +++ b/pypy/jit/backend/ppc/ppcgen/opassembler.py @@ -7,7 +7,7 @@ MAX_REG_PARAMS) from pypy.jit.metainterp.history import (JitCellToken, TargetToken, - AbstractFailDescr, FLOAT, INT) + AbstractFailDescr, FLOAT, INT, REF) from pypy.rlib.objectmodel import we_are_translated from pypy.jit.backend.ppc.ppcgen.helper.assembler import (count_reg_args, Saved_Volatiles) @@ -273,7 +273,52 @@ _mixin_ = True def emit_finish(self, op, arglocs, regalloc): - self.gen_exit_stub(op.getdescr(), op.getarglist(), arglocs) + for i in range(len(arglocs) - 1): + loc = arglocs[i] + box = op.getarg(i) + if loc is None: + continue + if loc.is_reg(): + if box.type == REF: + adr = self.fail_boxes_ptr.get_addr_for_num(i) + elif box.type == INT: + adr = self.fail_boxes_int.get_addr_for_num(i) + else: + assert 0 + self.mc.alloc_scratch_reg(adr) + self.mc.storex(loc.value, 0, r.SCRATCH.value) + self.mc.free_scratch_reg() + elif loc.is_vfp_reg(): + assert box.type == FLOAT + assert 0, "not implemented yet" + elif loc.is_stack() or loc.is_imm() or loc.is_imm_float(): + if box.type == FLOAT: + assert 0, "not implemented yet" + elif box.type == REF or box.type == INT: + if box.type == REF: + adr = self.fail_boxes_ptr.get_addr_for_num(i) + elif box.type == INT: + adr = self.fail_boxes_int.get_addr_for_num(i) + else: + assert 0 + self.mc.alloc_scratch_reg() + self.mov_loc_loc(loc, r.SCRATCH) + # store content of r5 temporary in ENCODING AREA + self.mc.store(r.r5.value, r.SPP.value, 0) + self.mc.load_imm(r.r5, adr) + self.mc.store(r.SCRATCH.value, r.r5.value, 0) + self.mc.free_scratch_reg() + # restore r5 + self.mc.load(r.r5.value, r.SPP.value, 0) + else: + assert 0 + # note: no exception should currently be set in llop.get_exception_addr + # even if this finish may be an exit_frame_with_exception (in this case + # the exception instance is in arglocs[0]). + addr = self.cpu.get_on_leave_jitted_int(save_exception=False) + self.mc.call(addr) + self.mc.load_imm(r.RES, arglocs[-1].value) + self._gen_epilogue(self.mc) def emit_jump(self, op, arglocs, regalloc): descr = op.getdescr() diff --git a/pypy/jit/backend/ppc/ppcgen/ppc_assembler.py b/pypy/jit/backend/ppc/ppcgen/ppc_assembler.py --- a/pypy/jit/backend/ppc/ppcgen/ppc_assembler.py +++ b/pypy/jit/backend/ppc/ppcgen/ppc_assembler.py @@ -88,6 +88,10 @@ OFFSET_SPP_TO_OLD_BACKCHAIN = (OFFSET_SPP_TO_GPR_SAVE_AREA + GPR_SAVE_AREA + FPR_SAVE_AREA) + OFFSET_STACK_ARGS = OFFSET_SPP_TO_OLD_BACKCHAIN + BACKCHAIN_SIZE * WORD + if IS_PPC_64: + OFFSET_STACK_ARGS += MAX_REG_PARAMS * WORD + def __init__(self, cpu, failargs_limit=1000): self.cpu = cpu self.fail_boxes_int = values_array(lltype.Signed, failargs_limit) @@ -118,12 +122,6 @@ mc.load(reg.value, spp_reg.value, self.OFFSET_SPP_TO_GPR_SAVE_AREA + WORD * i) - def _make_prologue(self, target_pos, frame_depth): - self._make_frame(frame_depth) - curpos = self.mc.currpos() - offset = target_pos - curpos - self.mc.b(offset) - # The code generated here allocates a new stackframe # and is the first machine code to be executed. def _make_frame(self, frame_depth): @@ -143,7 +141,10 @@ # compute spilling pointer (SPP) self.mc.addi(r.SPP.value, r.SP.value, frame_depth - self.OFFSET_SPP_TO_OLD_BACKCHAIN) + + # save nonvolatile registers self._save_nonvolatiles() + # save r31, use r30 as scratch register # this is safe because r30 has been saved already assert NONVOLATILES[-1] == r.SPP @@ -180,6 +181,7 @@ regs = rffi.cast(rffi.CCHARP, spp_loc) i = -1 fail_index = -1 + import pdb; pdb.set_trace() while(True): i += 1 fail_index += 1 @@ -347,117 +349,23 @@ reg = r.MANAGED_REGS[i] mc.store(reg.value, r.SPP.value, i * WORD) - # Load parameters from fail args into locations (stack or registers) - def gen_bootstrap_code(self, nonfloatlocs, inputargs): - for i in range(len(nonfloatlocs)): - loc = nonfloatlocs[i] - arg = inputargs[i] - assert arg.type != FLOAT - if arg.type == INT: - addr = self.fail_boxes_int.get_addr_for_num(i) - elif arg.type == REF: - addr = self.fail_boxes_ptr.get_addr_for_num(i) - else: - assert 0, "%s not supported" % arg.type - if loc.is_reg(): - reg = loc - else: - reg = r.SCRATCH - self.mc.load_from_addr(reg, addr) - if loc.is_stack(): - self.regalloc_mov(r.SCRATCH, loc) - - def gen_direct_bootstrap_code(self, loophead, looptoken, inputargs, frame_depth): - self._make_frame(frame_depth) - nonfloatlocs = looptoken._ppc_arglocs[0] - - reg_args = count_reg_args(inputargs) - - stack_locs = len(inputargs) - reg_args - - selected_reg = 0 - count = 0 - nonfloat_args = [] - nonfloat_regs = [] - # load reg args - for i in range(reg_args): - arg = inputargs[i] - if arg.type == FLOAT and count % 2 != 0: - assert 0, "not implemented yet" - reg = r.PARAM_REGS[selected_reg] - - if arg.type == FLOAT: - assert 0, "not implemented yet" - else: - nonfloat_args.append(reg) - nonfloat_regs.append(nonfloatlocs[i]) - - if arg.type == FLOAT: - assert 0, "not implemented yet" - else: - selected_reg += 1 - count += 1 - - # remap values stored in core registers - self.mc.alloc_scratch_reg() - remap_frame_layout(self, nonfloat_args, nonfloat_regs, r.SCRATCH) - self.mc.free_scratch_reg() - - # load values passed on the stack to the corresponding locations - if IS_PPC_32: - stack_position = self.OFFSET_SPP_TO_OLD_BACKCHAIN\ - + BACKCHAIN_SIZE * WORD - else: - stack_position = self.OFFSET_SPP_TO_OLD_BACKCHAIN\ - + (BACKCHAIN_SIZE + MAX_REG_PARAMS) * WORD - - count = 0 - for i in range(reg_args, len(inputargs)): - arg = inputargs[i] - if arg.type == FLOAT: - assert 0, "not implemented yet" - else: - loc = nonfloatlocs[i] - if loc.is_reg(): - self.mc.load(loc.value, r.SPP.value, stack_position) - count += 1 - elif loc.is_vfp_reg(): - assert 0, "not implemented yet" - elif loc.is_stack(): - if loc.type == FLOAT: - assert 0, "not implemented yet" - elif loc.type == INT or loc.type == REF: - count += 1 - self.mc.alloc_scratch_reg() - self.mc.load(r.SCRATCH.value, r.SPP.value, stack_position) - self.mov_loc_loc(r.SCRATCH, loc) - self.mc.free_scratch_reg() - else: - assert 0, 'invalid location' - else: - assert 0, 'invalid location' - if loc.type == FLOAT: - assert 0, "not implemented yet" - else: - size = 1 - stack_position += size * WORD - - #sp_patch_location = self._prepare_sp_patch_position() + def gen_bootstrap_code(self, loophead, spilling_area): + self._make_frame(spilling_area) self.mc.b_offset(loophead) - #self._patch_sp_offset(sp_patch_location, looptoken._ppc_frame_depth) def setup(self, looptoken, operations): - assert self.memcpy_addr != 0 self.current_clt = looptoken.compiled_loop_token operations = self.cpu.gc_ll_descr.rewrite_assembler(self.cpu, operations, self.current_clt.allgcrefs) + assert self.memcpy_addr != 0 self.mc = PPCBuilder() self.pending_guards = [] allblocks = self.get_asmmemmgr_blocks(looptoken) self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr, allblocks) - self.stack_in_use = False self.max_stack_params = 0 + self.target_tokens_currently_compiling = {} + return operations def setup_once(self): gc_ll_descr = self.cpu.gc_ll_descr @@ -470,62 +378,62 @@ self._leave_jitted_hook = self._gen_leave_jitted_hook_code(False) def assemble_loop(self, inputargs, operations, looptoken, log): - clt = CompiledLoopToken(self.cpu, looptoken.number) clt.allgcrefs = [] looptoken.compiled_loop_token = clt + clt._debug_nbargs = len(inputargs) - self.setup(looptoken, operations) + if not we_are_translated(): + assert len(set(inputargs)) == len(inputargs) + + operations = self.setup(looptoken, operations) self.startpos = self.mc.currpos() longevity = compute_vars_longevity(inputargs, operations) regalloc = Regalloc(longevity, assembler=self, frame_manager=PPCFrameManager()) - nonfloatlocs = regalloc.prepare_loop(inputargs, operations, looptoken) + regalloc.prepare_loop(inputargs, operations) regalloc_head = self.mc.currpos() - self.gen_bootstrap_code(nonfloatlocs, inputargs) - - loophead = self.mc.currpos() # address of actual loop - looptoken._ppc_loop_code = loophead - looptoken._ppc_arglocs = [nonfloatlocs] - looptoken._ppc_bootstrap_code = 0 - - self._walk_operations(operations, regalloc) start_pos = self.mc.currpos() - self.framesize = frame_depth = self.compute_frame_depth(regalloc) - looptoken._ppc_frame_manager_depth = regalloc.frame_manager.frame_depth - self._make_prologue(regalloc_head, frame_depth) + clt.frame_depth = -1 + spilling_area = self._assemble(operations, regalloc) + clt.frame_depth = spilling_area direct_bootstrap_code = self.mc.currpos() - self.gen_direct_bootstrap_code(loophead, looptoken, inputargs, frame_depth) + frame_depth = self.compute_frame_depth(spilling_area) + self.gen_bootstrap_code(start_pos, frame_depth) self.write_pending_failure_recoveries() if IS_PPC_64: - fdescrs = self.gen_64_bit_func_descrs() - loop_start = self.materialize_loop(looptoken, False) - looptoken._ppc_bootstrap_code = loop_start + fdescr = self.gen_64_bit_func_descr() + + # write instructions to memory + loop_start = self.materialize_loop(looptoken, True) real_start = loop_start + direct_bootstrap_code if IS_PPC_32: - looptoken._ppc_direct_bootstrap_code = real_start + looptoken._ppc_func_addr = real_start else: - self.write_64_bit_func_descr(fdescrs[0], real_start) - looptoken._ppc_direct_bootstrap_code = fdescrs[0] + self.write_64_bit_func_descr(fdescr, real_start) + looptoken._ppc_func_addr = fdescr - real_start = loop_start + start_pos - if IS_PPC_32: - looptoken.ppc_code = real_start - else: - self.write_64_bit_func_descr(fdescrs[1], real_start) - looptoken.ppc_code = fdescrs[1] self.process_pending_guards(loop_start) if not we_are_translated(): print 'Loop', inputargs, operations self.mc._dump_trace(loop_start, 'loop_%s.asm' % self.cpu.total_compiled_loops) print 'Done assembling loop with token %r' % looptoken + self._teardown() - self._teardown() + def _assemble(self, operations, regalloc): + regalloc.compute_hint_frame_locations(operations) + self._walk_operations(operations, regalloc) + frame_depth = regalloc.frame_manager.get_frame_depth() + jump_target_descr = regalloc.jump_target_descr + if jump_target_descr is not None: + frame_depth = max(frame_depth, + jump_target_descr._ppc_clt.frame_depth) + return frame_depth def assemble_bridge(self, faildescr, inputargs, operations, looptoken, log): self.setup(looptoken, operations) @@ -598,7 +506,6 @@ i += 1 mem[j] = chr(0xFF) - n = self.cpu.get_fail_descr_number(descr) encode32(mem, j+1, n) return memaddr @@ -671,9 +578,7 @@ return True def gen_64_bit_func_descrs(self): - d0 = self.datablockwrapper.malloc_aligned(3*WORD, alignment=1) - d1 = self.datablockwrapper.malloc_aligned(3*WORD, alignment=1) - return [d0, d1] + return self.datablockwrapper.malloc_aligned(3*WORD, alignment=1) def write_64_bit_func_descr(self, descr, start_addr): data = rffi.cast(rffi.CArrayPtr(lltype.Signed), descr) @@ -681,11 +586,11 @@ data[1] = 0 data[2] = 0 - def compute_frame_depth(self, regalloc): + def compute_frame_depth(self, spilling_area): PARAMETER_AREA = self.max_stack_params * WORD if IS_PPC_64: PARAMETER_AREA += MAX_REG_PARAMS * WORD - SPILLING_AREA = regalloc.frame_manager.frame_depth * WORD + SPILLING_AREA = spilling_area * WORD frame_depth = ( GPR_SAVE_AREA + FPR_SAVE_AREA diff --git a/pypy/jit/backend/ppc/ppcgen/regalloc.py b/pypy/jit/backend/ppc/ppcgen/regalloc.py --- a/pypy/jit/backend/ppc/ppcgen/regalloc.py +++ b/pypy/jit/backend/ppc/ppcgen/regalloc.py @@ -98,11 +98,13 @@ class PPCFrameManager(FrameManager): def __init__(self): FrameManager.__init__(self) - self.frame_depth = 0 + self.used = [] @staticmethod def frame_pos(loc, type): num_words = PPCFrameManager.frame_size(type) + if type == FLOAT: + assert 0, "not implemented yet" return locations.StackLocation(loc, num_words=num_words, type=type) @staticmethod @@ -112,31 +114,63 @@ return 1 class Regalloc(object): + def __init__(self, longevity, frame_manager=None, assembler=None): self.cpu = assembler.cpu - self.longevity = longevity self.frame_manager = frame_manager self.assembler = assembler self.rm = PPCRegisterManager(longevity, frame_manager, assembler) + self.jump_target_descr = None - def prepare_loop(self, inputargs, operations, looptoken): - loop_consts = compute_loop_consts(inputargs, operations[-1], looptoken) - inputlen = len(inputargs) - nonfloatlocs = [None] * len(inputargs) - for i in range(inputlen): - arg = inputargs[i] - assert not isinstance(arg, Const) - if arg not in loop_consts and self.longevity[arg][1] > -1: - self.try_allocate_reg(arg) - loc = self.loc(arg) - nonfloatlocs[i] = loc - self.possibly_free_vars(inputargs) - return nonfloatlocs + def _prepare(self, inputargs, operations): + longevity, last_real_usage = compute_vars_longevity( + inputargs, operations) + self.longevity = longevity + self.last_real_usage = last_real_usage + fm = self.frame_manager + asm = self.assembler + self.rm = PPCRegisterManager(longevity, fm, asm) + + def prepare_loop(self, inputargs, operations): + self._prepare(inputargs, operations) + self._set_initial_bindings(inputargs) + self.possibly_free_vars(list(inputargs)) + + def prepare_bridge(self, inputargs, arglocs, ops): + self._prepare(inputargs, ops) + self._update_bindings(arglocs, inputargs) + + def _set_initial_bindings(self, inputargs): + arg_index = 0 + count = 0 + n_register_args = len(r.PARAM_REGS) + cur_frame_pos = -self.assembler.OFFSET_STACK_ARGS // WORD + 1 + for box in inputargs: + assert isinstance(box, Box) + # handle inputargs in argument registers + if box.type == FLOAT and arg_index % 2 != 0: + assert 0, "not implemented yet" + if arg_index < n_register_args: + if box.type == FLOAT: + assert 0, "not implemented yet" + else: + loc = r.PARAM_REGS[arg_index] + self.try_allocate_reg(box, selected_reg=loc) + arg_index += 1 + else: + # treat stack args as stack locations with a negative offset + if box.type == FLOAT: + assert 0, "not implemented yet" + else: + cur_frame_pos -= 1 + count += 1 + loc = self.frame_manager.frame_pos(cur_frame_pos, box.type) + self.frame_manager.set_binding(box, loc) def update_bindings(self, locs, frame_depth, inputargs): used = {} i = 0 - self.frame_manager.frame_depth = frame_depth + #self.frame_manager.frame_depth = frame_depth for loc in locs: arg = inputargs[i] i += 1 @@ -296,20 +330,20 @@ prepare_int_is_zero = prepare_unary_cmp() def prepare_finish(self, op): - args = [locations.imm(self.frame_manager.frame_depth)] + args = [None] * (op.numargs() + 1) for i in range(op.numargs()): arg = op.getarg(i) if arg: - args.append(self.loc(arg)) + args[i] = self.loc(arg) self.possibly_free_var(arg) - else: - args.append(None) + n = self.cpu.get_fail_descr_number(op.getdescr()) + args[-1] = imm(n) return args def _prepare_guard(self, op, args=None): if args is None: args = [] - args.append(imm(self.frame_manager.frame_depth)) + args.append(imm(len(self.frame_manager.used))) for arg in op.getfailargs(): if arg: args.append(self.loc(arg)) @@ -405,6 +439,65 @@ prepare_guard_nonnull_class = prepare_guard_class + def compute_hint_frame_locations(self, operations): + # optimization only: fill in the 'hint_frame_locations' dictionary + # of rm and xrm based on the JUMP at the end of the loop, by looking + # at where we would like the boxes to be after the jump. + op = operations[-1] + if op.getopnum() != rop.JUMP: + return + self.final_jump_op = op + descr = op.getdescr() + assert isinstance(descr, TargetToken) + if descr._ppc_loop_code != 0: + # if the target LABEL was already compiled, i.e. if it belongs + # to some already-compiled piece of code + self._compute_hint_frame_locations_from_descr(descr) + #else: + # The loop ends in a JUMP going back to a LABEL in the same loop. + # We cannot fill 'hint_frame_locations' immediately, but we can + # wait until the corresponding prepare_op_label() to know where the + # we would like the boxes to be after the jump. + + def _compute_hint_frame_locations_from_descr(self, descr): + arglocs = self.assembler.target_arglocs(descr) + jump_op = self.final_jump_op + assert len(arglocs) == jump_op.numargs() + for i in range(jump_op.numargs()): + box = jump_op.getarg(i) + if isinstance(box, Box): + loc = arglocs[i] + if loc is not None and loc.is_stack(): + self.frame_manager.hint_frame_locations[box] = loc + + def prepare_op_jump(self, op): + descr = op.getdescr() + assert isinstance(descr, TargetToken) + self.jump_target_descr = descr + arglocs = self.assembler.target_arglocs(descr) + + # get temporary locs + tmploc = r.SCRATCH + + # Part about non-floats + src_locations1 = [] + dst_locations1 = [] + + # Build the two lists + for i in range(op.numargs()): + box = op.getarg(i) + src_loc = self.loc(box) + dst_loc = arglocs[i] + if box.type != FLOAT: + src_locations1.append(src_loc) + dst_locations1.append(dst_loc) + else: + assert 0, "not implemented yet" + + remap_frame_layout(self.assembler, src_locations1, + dst_locations1, tmploc) + return [] + def prepare_guard_call_release_gil(self, op, guard_op): # first, close the stack in the sense of the asmgcc GC root tracker gcrootmap = self.cpu.gc_ll_descr.gcrootmap diff --git a/pypy/jit/backend/ppc/runner.py b/pypy/jit/backend/ppc/runner.py --- a/pypy/jit/backend/ppc/runner.py +++ b/pypy/jit/backend/ppc/runner.py @@ -43,7 +43,6 @@ self.asm.setup_once() def compile_loop(self, inputargs, operations, looptoken, log=False): - self.saved_descr = {} self.asm.assemble_loop(inputargs, operations, looptoken, log) def compile_bridge(self, faildescr, inputargs, operations, @@ -66,24 +65,26 @@ self.asm.fail_boxes_ptr.setitem(index, null) # executes the stored machine code in the token - def execute_token(self, looptoken): - addr = looptoken.ppc_code - func = rffi.cast(lltype.Ptr(self.BOOTSTRAP_TP), addr) - fail_index = self._execute_call(func) - return self.get_fail_descr_from_number(fail_index) + def make_execute_token(self, *ARGS): + FUNCPTR = lltype.Ptr(lltype.FuncType(ARGS, lltype.Signed)) - def _execute_call(self, func): - prev_interpreter = None - if not self.translate_support_code: - prev_interpreter = LLInterpreter.current_interpreter - LLInterpreter.current_interpreter = self.debug_ll_interpreter - res = 0 - try: - res = func() - finally: + def execute_token(executable_token, *args): + clt = executable_token.compiled_loop_token + assert len(args) == clt._debug_nbargs + # + addr = executable_token._ppc_func_addr + func = rffi.cast(FUNCPTR, addr) + prev_interpreter = None # help flow space if not self.translate_support_code: - LLInterpreter.current_interpreter = prev_interpreter - return res + prev_interpreter = LLInterpreter.current_interpreter + LLInterpreter.current_interpreter = self.debug_ll_interpreter + try: + fail_index = func(*args) + finally: + if not self.translate_support_code: + LLInterpreter.current_interpreter = prev_interpreter + return self.get_fail_descr_from_number(fail_index) + return execute_token @staticmethod def cast_ptr_to_int(x): _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit