Author: David Schneider <david.schnei...@picle.org> Branch: ppc-jit-backend Changeset: r56429:069eb5ce9bf0 Date: 2012-07-24 09:50 -0700 http://bitbucket.org/pypy/pypy/changeset/069eb5ce9bf0/
Log: (edelsohn, bivab) implement new version of cond_call_gc diff --git a/pypy/jit/backend/ppc/opassembler.py b/pypy/jit/backend/ppc/opassembler.py --- a/pypy/jit/backend/ppc/opassembler.py +++ b/pypy/jit/backend/ppc/opassembler.py @@ -1000,26 +1000,23 @@ opnum = op.getopnum() card_marking = False + mask = descr.jit_wb_if_flag_singlebyte if opnum == rop.COND_CALL_GC_WB_ARRAY and descr.jit_wb_cards_set != 0: - N = 3 - addr = descr.get_write_barrier_from_array_fn(self.cpu) - assert addr != 0 + # assumptions the rest of the function depends on: assert (descr.jit_wb_cards_set_byteofs == descr.jit_wb_if_flag_byteofs) assert descr.jit_wb_cards_set_singlebyte == -0x80 card_marking = True - else: - N = 2 - addr = descr.get_write_barrier_fn(self.cpu) + mask = descr.jit_wb_if_flag_singlebyte | -0x80 + # loc_base = arglocs[0] assert _check_imm_arg(descr.jit_wb_if_flag_byteofs) with scratch_reg(self.mc): self.mc.lbz(r.SCRATCH.value, loc_base.value, descr.jit_wb_if_flag_byteofs) - # test whether this bit is set - self.mc.andix(r.SCRATCH.value, r.SCRATCH.value, - descr.jit_wb_if_flag_singlebyte) + mask &= 0xFF + self.mc.andix(r.SCRATCH.value, r.SCRATCH.value, mask) jz_location = self.mc.currpos() self.mc.nop() @@ -1027,57 +1024,65 @@ # for cond_call_gc_wb_array, also add another fast path: # if GCFLAG_CARDS_SET, then we can just set one bit and be done if card_marking: - assert _check_imm_arg(descr.jit_wb_cards_set_byteofs) - assert descr.jit_wb_cards_set_singlebyte == -0x80 with scratch_reg(self.mc): self.mc.lbz(r.SCRATCH.value, loc_base.value, descr.jit_wb_if_flag_byteofs) + self.mc.extsb(r.SCRATCH.value, r.SCRATCH.value) # test whether this bit is set - self.mc.andix(r.SCRATCH.value, r.SCRATCH.value, - descr.jit_wb_cards_set_singlebyte) + self.mc.cmpwi(0, r.SCRATCH.value, 0) - jnz_location = self.mc.currpos() + js_location = self.mc.currpos() self.mc.nop() + #self.mc.trap() else: - jnz_location = 0 + js_location = 0 - # the following is supposed to be the slow path, so whenever possible - # we choose the most compact encoding over the most efficient one. - with Saved_Volatiles(self.mc): - if N == 2: - callargs = [r.r3, r.r4] - else: - callargs = [r.r3, r.r4, r.r5] - remap_frame_layout(self, arglocs, callargs, r.SCRATCH) - func = rffi.cast(lltype.Signed, addr) - # - # misaligned stack in the call, but it's ok because the write - # barrier is not going to call anything more. - self.mc.call(func) + # Write only a CALL to the helper prepared in advance, passing it as + # argument the address of the structure we are writing into + # (the first argument to COND_CALL_GC_WB). + helper_num = card_marking + + if self._regalloc.fprm.reg_bindings: + helper_num += 2 + if self.wb_slowpath[helper_num] == 0: # tests only + assert not we_are_translated() + self.cpu.gc_ll_descr.write_barrier_descr = descr + self._build_wb_slowpath(card_marking, + bool(self._regalloc.fprm.reg_bindings)) + assert self.wb_slowpath[helper_num] != 0 + # + if loc_base is not r.r3: + remap_frame_layout(self, [loc_base], [r.r3], r.SCRATCH) + addr = self.wb_slowpath[helper_num] + func = rffi.cast(lltype.Signed, addr) + self.mc.bl_abs(func) # if GCFLAG_CARDS_SET, then we can do the whole thing that would # be done in the CALL above with just four instructions, so here # is an inline copy of them if card_marking: with scratch_reg(self.mc): - jmp_location = self.mc.currpos() + jns_location = self.mc.currpos() self.mc.nop() # jump to the exit, patched later - # patch the JNZ above + # patch the JS above offset = self.mc.currpos() - pmc = OverwritingBuilder(self.mc, jnz_location, 1) - pmc.bc(12, 2, offset - jnz_location) # jump on equality + pmc = OverwritingBuilder(self.mc, js_location, 1) + # Jump if JS comparison is less than (bit set) + pmc.bc(12, 0, offset - js_location) pmc.overwrite() # + # case GCFLAG_CARDS_SET: emit a few instructions to do + # directly the card flag setting loc_index = arglocs[1] assert loc_index.is_reg() - tmp1 = arglocs[-2] - tmp2 = arglocs[-1] + tmp1 = loc_index + tmp2 = arglocs[-2] #byteofs s = 3 + descr.jit_wb_card_page_shift - # use r20 as temporary register, save it in FORCE INDEX slot - temp_reg = r.r20 + # use r11 as temporary register, save it in FORCE INDEX slot + temp_reg = r.r11 self.mc.store(temp_reg.value, r.SPP.value, FORCE_INDEX_OFS) self.mc.srli_op(temp_reg.value, loc_index.value, s) @@ -1097,24 +1102,21 @@ self.mc.stbx(r.SCRATCH.value, loc_base.value, temp_reg.value) # done - # restore temporary register r20 + # restore temporary register r11 self.mc.load(temp_reg.value, r.SPP.value, FORCE_INDEX_OFS) - # patch the JMP above + # patch the JNS above offset = self.mc.currpos() - pmc = OverwritingBuilder(self.mc, jmp_location, 1) - pmc.b(offset - jmp_location) + pmc = OverwritingBuilder(self.mc, jns_location, 1) + # Jump if JNS comparison is not less than (bit not set) + pmc.bc(4, 0, offset - jns_location) pmc.overwrite() # patch the JZ above - offset = self.mc.currpos() - jz_location + offset = self.mc.currpos() pmc = OverwritingBuilder(self.mc, jz_location, 1) - # We want to jump if the compared bits are not equal. - # This corresponds to the x86 backend, which uses - # the TEST operation. Hence, on first sight, it might - # seem that we use the wrong condition here. This is - # because TEST results in a 1 if the operands are different. - pmc.bc(4, 2, offset) + # Jump if JZ comparison is zero (CMP 0 is equal) + pmc.bc(12, 2, offset - jz_location) pmc.overwrite() emit_cond_call_gc_wb_array = emit_cond_call_gc_wb diff --git a/pypy/jit/backend/ppc/ppc_assembler.py b/pypy/jit/backend/ppc/ppc_assembler.py --- a/pypy/jit/backend/ppc/ppc_assembler.py +++ b/pypy/jit/backend/ppc/ppc_assembler.py @@ -89,11 +89,14 @@ failargs_limit) self.fail_boxes_ptr = values_array(llmemory.GCREF, failargs_limit) self.mc = None - self.datablockwrapper = None self.memcpy_addr = 0 + self.pending_guards = None self.fail_boxes_count = 0 self.current_clt = None + self.malloc_slowpath = 0 + self.wb_slowpath = [0, 0, 0, 0] self._regalloc = None + self.datablockwrapper = None self.max_stack_params = 0 self.propagate_exception_path = 0 self.stack_check_slowpath = 0 @@ -497,6 +500,61 @@ self.write_64_bit_func_descr(rawstart, rawstart+3*WORD) self.stack_check_slowpath = rawstart + def _build_wb_slowpath(self, withcards, withfloats=False): + descr = self.cpu.gc_ll_descr.write_barrier_descr + if descr is None: + return + if not withcards: + func = descr.get_write_barrier_fn(self.cpu) + else: + if descr.jit_wb_cards_set == 0: + return + func = descr.get_write_barrier_from_array_fn(self.cpu) + if func == 0: + return + # + # This builds a helper function called from the slow path of + # write barriers. It must save all registers, and optionally + # all fp registers. + mc = PPCBuilder() + # + frame_size = ((len(r.VOLATILES) + len(r.VOLATILES_FLOAT) + + BACKCHAIN_SIZE + MAX_REG_PARAMS) * WORD) + mc.make_function_prologue(frame_size) + for i in range(len(r.VOLATILES)): + mc.store(r.VOLATILES[i].value, r.SP.value, + (BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD) + if self.cpu.supports_floats: + for i in range(len(r.VOLATILES_FLOAT)): + mc.stfd(r.VOLATILES_FLOAT[i].value, r.SP.value, + (len(r.VOLATILES) + BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD) + + mc.call(rffi.cast(lltype.Signed, func)) + if self.cpu.supports_floats: + for i in range(len(r.VOLATILES_FLOAT)): + mc.lfd(r.VOLATILES_FLOAT[i].value, r.SP.value, + (len(r.VOLATILES) + BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD) + for i in range(len(r.VOLATILES)): + mc.load(r.VOLATILES[i].value, r.SP.value, + (BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD) + mc.restore_LR_from_caller_frame(frame_size) + # + if withcards: + # A final compare before the RET, for the caller. Careful to + # not follow this instruction with another one that changes + # the status of the CPU flags! + mc.lbz(r.SCRATCH.value, r.r3.value, + descr.jit_wb_if_flag_byteofs) + mc.extsb(r.SCRATCH.value, r.SCRATCH.value) + mc.cmpwi(0, r.SCRATCH.value, 0) + # + mc.addi(r.SP.value, r.SP.value, frame_size) + mc.blr() + # + mc.prepare_insts_blocks() + rawstart = mc.materialize(self.cpu.asmmemmgr, []) + self.wb_slowpath[withcards + 2 * withfloats] = rawstart + def _build_propagate_exception_path(self): if self.cpu.propagate_exception_v < 0: return @@ -662,6 +720,11 @@ def setup_once(self): gc_ll_descr = self.cpu.gc_ll_descr gc_ll_descr.initialize() + self._build_wb_slowpath(False) + self._build_wb_slowpath(True) + if self.cpu.supports_floats: + self._build_wb_slowpath(False, withfloats=True) + self._build_wb_slowpath(True, withfloats=True) self._build_propagate_exception_path() if gc_ll_descr.get_malloc_slowpath_addr is not None: self._build_malloc_slowpath() _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit