Author: Richard Plangger <planri...@gmail.com> Branch: s390x-backend Changeset: r81410:4d4c6bd91480 Date: 2015-12-21 11:12 +0100 http://bitbucket.org/pypy/pypy/changeset/4d4c6bd91480/
Log: implemented release gil half way, lock release and reacquire solved (the former uses a serialization point to make the store visible to other cpus, the latter uses compare and swap to set 1 to the lock) diff --git a/rpython/jit/backend/zarch/assembler.py b/rpython/jit/backend/zarch/assembler.py --- a/rpython/jit/backend/zarch/assembler.py +++ b/rpython/jit/backend/zarch/assembler.py @@ -171,6 +171,9 @@ # save the information mc.STG(r.r14, l.addr(14*WORD, r.SP)) # save the link + RCS2 = r.r10 + RCS3 = r.r12 + LOCAL_VARS_OFFSET = 0 extra_stack_size = LOCAL_VARS_OFFSET + 4 * WORD + 8 extra_stack_size = (extra_stack_size + 15) & ~15 @@ -183,29 +186,24 @@ # need to save many registers: the registers that are anyway # destroyed by the call can be ignored (VOLATILES), and the # non-volatile registers won't be changed here. It only needs - # to save r.RCS1 (used below), r1 and f0 (possible results of - # the call), and two more non-volatile registers (used to store + # to save r2 and f0 (possible results of the call), + # and two more non-volatile registers (used to store # the RPython exception that occurred in the CALL, if any). # # We need to increase our stack frame size a bit to store them. # - self.mc.TRAP2() - #self.mc.LGR(r.SCRATCH, l.addr(0,r.SP)) # SP back chain - #self.mc.STG(r.SCRATCH, l.addr(-extra_stack_size, r.SP.value)) - #self.mc.STG(r.RCS1.value, r.SP.value, LOCAL_VARS_OFFSET + 0 * WORD) - #self.mc.STG(r.RCS2.value, r.SP.value, LOCAL_VARS_OFFSET + 1 * WORD) - #self.mc.STG(r.RCS3.value, r.SP.value, LOCAL_VARS_OFFSET + 2 * WORD) - #self.mc.STG(r.r2.value, r.SP.value, LOCAL_VARS_OFFSET + 3 * WORD) - #self.mc.STD(r.f1.value, r.SP.value, LOCAL_VARS_OFFSET + 4 * WORD) + self._push_all_regs_to_frame(mc, withfloats, callee_only=True) + mc.STMG(r.r10, r.r12, l.addr(10*WORD, r.SP)) + mc.STG(r.r2, l.addr(2*WORD, r.SP)) + mc.STD(r.f0, l.addr(3*WORD, r.SP)) # slot of r3 is not used here saved_regs = None saved_fp_regs = None - else: # push all volatile registers, push RCS1, and sometimes push RCS2 if withcards: - saved_regs = r.VOLATILES # + [r.RCS1, r.RCS2] + saved_regs = r.VOLATILES + [RCS2] else: - saved_regs = r.VOLATILES # + [r.RCS1] + saved_regs = r.VOLATILES if withfloats: saved_fp_regs = r.MANAGED_FP_REGS else: @@ -221,16 +219,10 @@ # of _reload_frame_if_necessary) # This trashes r0 and r2, which is fine in this case assert argument_loc is not r.r0 - xxx - #self._store_and_reset_exception(mc, r.RCS2, r.RCS3) + self._store_and_reset_exception(mc, RCS2, RCS3) if withcards: - xxx - #kmc.mr(r.RCS2.value, argument_loc.value) - # - # Save the lr into r.RCS1 - #mc.mflr(r.RCS1.value) - # + mc.LGR(RCS2, argument_loc) func = rffi.cast(lltype.Signed, func) # Note: if not 'for_frame', argument_loc is r0, which must carefully # not be overwritten above @@ -242,32 +234,25 @@ mc.AGHI(r.SP, l.imm(STD_FRAME_SIZE_IN_BYTES)) if for_frame: - xxx - self._restore_exception(mc, r.RCS2, r.RCS3) + self._restore_exception(mc, RCS2, RCS3) if withcards: # A final andix before the blr, for the caller. Careful to # not follow this instruction with another one that changes # the status of cr0! card_marking_mask = descr.jit_wb_cards_set_singlebyte - mc.trap() - #mc.lbz(r.RCS2.value, r.RCS2.value, descr.jit_wb_if_flag_byteofs) - #mc.andix(r.RCS2.value, r.RCS2.value, card_marking_mask & 0xFF) + mc.LLGC(RCS2, l.addr(descr.jit_wb_if_flag_byteofs, RCS2)) + mc.NILL(RCS2, l.imm(card_marking_mask & 0xFF)) if for_frame: - self.mc.trap() - #self.mc.ld(r.RCS1.value, r.SP.value, LOCAL_VARS_OFFSET + 0 * WORD) - #self.mc.ld(r.RCS2.value, r.SP.value, LOCAL_VARS_OFFSET + 1 * WORD) - #self.mc.ld(r.RCS3.value, r.SP.value, LOCAL_VARS_OFFSET + 2 * WORD) - #self.mc.ld(r.r3.value, r.SP.value, LOCAL_VARS_OFFSET + 3 * WORD) - #self.mc.lfd(r.f1.value, r.SP.value, LOCAL_VARS_OFFSET + 4 * WORD) - #self.mc.addi(r.SP.value, r.SP.value, extra_stack_size) - + mc.LMG(r.r10, r.r12, l.addr(10*WORD, r.SP)) + mc.LG(r.r2, l.addr(2*WORD, r.SP)) + mc.LD(r.f0, l.addr(3*WORD, r.SP)) # slot of r3 is not used here else: self._pop_core_regs_from_jitframe(mc, saved_regs) self._pop_fp_regs_from_jitframe(mc, saved_fp_regs) - mc.LG(r.r14, l.addr(14*WORD, r.SP)) # restore the link + mc.LG(r.RETURN, l.addr(14*WORD, r.SP)) # restore the link mc.BCR(c.ANY, r.RETURN) self.mc = old_mc @@ -897,6 +882,37 @@ self.mc.LMG(r.r6, r.r15, l.addr(6*WORD, r.SP)) self.jmpto(r.r14) + def _push_all_regs_to_stack(self, mc, withfloats, callee_only=False): + base_ofs = 2*WORD + if callee_only: + regs = ZARCHRegisterManager.save_around_call_regs + else: + regs = r.registers[2:] + mc.STMG(regs[0], regs[1], l.addr(base_ofs, r.SP)) + if withfloats: + xxx + + def _push_all_regs_to_frame(self, mc, ignored_regs, withfloats, callee_only=False): + # Push all general purpose registers + base_ofs = self.cpu.get_baseofs_of_frame_field() + if callee_only: + regs = gpr_reg_mgr_cls.save_around_call_regs + else: + regs = gpr_reg_mgr_cls.all_regs + for gpr in regs: + if gpr not in ignored_regs: + v = gpr_reg_mgr_cls.all_reg_indexes[gpr.value] + mc.MOV_br(v * WORD + base_ofs, gpr.value) + if withfloats: + if IS_X86_64: + coeff = 1 + else: + coeff = 2 + # Push all XMM regs + ofs = len(gpr_reg_mgr_cls.all_regs) + for i in range(len(xmm_reg_mgr_cls.all_regs)): + mc.MOVSD_bx((ofs + i * coeff) * WORD + base_ofs, i) + def _push_core_regs_to_jitframe(self, mc, includes=r.registers): if len(includes) == 0: return diff --git a/rpython/jit/backend/zarch/callbuilder.py b/rpython/jit/backend/zarch/callbuilder.py --- a/rpython/jit/backend/zarch/callbuilder.py +++ b/rpython/jit/backend/zarch/callbuilder.py @@ -14,8 +14,9 @@ GPR_ARGS = [r.r2, r.r3, r.r4, r.r5, r.r6] FPR_ARGS = [r.f0, r.f2, r.f4, r.f6] - #RFASTGILPTR = r.RCS2 - #RSHADOWOLD = r.RCS3 + RSHADOWOLD = r.r9 + RSHADOWPTR = r.r10 + RFASTGILPTR = r.r12 def __init__(self, assembler, fnloc, arglocs, resloc): AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs, @@ -148,6 +149,12 @@ def call_releasegil_addr_and_move_real_arguments(self, fastgil): assert self.is_call_release_gil + RSHADOWOLD = self.RSHADOWOLD + RSHADOWPTR = self.RSHADOWPTR + RFASTGILPTR = self.RFASTGILPTR + # + # assumes RSHADOWOLD to be r9, stores all up to r15 + self.mc.STMG(RSHADOWOLD, r.r15, l.addr(9 * WORD, r.SP)) # # Save this thread's shadowstack pointer into r29, for later comparison gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap @@ -155,13 +162,13 @@ if gcrootmap.is_shadow_stack: rst = gcrootmap.get_root_stack_top_addr() self.mc.load_imm(RSHADOWPTR, rst) - self.mc.load(RSHADOWOLD.value, RSHADOWPTR.value, 0) + self.mc.LGR(RSHADOWOLD, RSHADOWPTR) # # change 'rpy_fastgil' to 0 (it should be non-zero right now) self.mc.load_imm(RFASTGILPTR, fastgil) - self.mc.li(r.r0.value, 0) - self.mc.lwsync() - self.mc.std(r.r0.value, RFASTGILPTR.value, 0) + self.mc.LGHI(r.SCRATCH, l.imm(0)) + self.mc.STG(r.SCRATCH, l.addr(0, RFASTGILPTR)) + self.mc.sync() # renders the store visible to other cpus # if not we_are_translated(): # for testing: we should not access self.mc.AGHI(r.SPP, l.imm(1)) # r31 any more @@ -169,21 +176,22 @@ def move_real_result_and_call_reacqgil_addr(self, fastgil): from rpython.jit.backend.zarch.codebuilder import InstrBuilder - xxx # try to reacquire the lock. The following registers are still # valid from before the call: - RSHADOWPTR = self.RSHADOWPTR # r30: &root_stack_top - RFASTGILPTR = self.RFASTGILPTR # r29: &fastgil - RSHADOWOLD = self.RSHADOWOLD # r28: previous val of root_stack_top + RSHADOWPTR = self.RSHADOWPTR # r9: &root_stack_top + RFASTGILPTR = self.RFASTGILPTR # r10: &fastgil + RSHADOWOLD = self.RSHADOWOLD # r12: previous val of root_stack_top - # Equivalent of 'r10 = __sync_lock_test_and_set(&rpy_fastgil, 1);' - self.mc.li(r.r9.value, 1) + # Equivalent of 'r14 = __sync_lock_test_and_set(&rpy_fastgil, 1);' + self.mc.LGHI(r.r11, l.imm(1)) + self.mc.LGHI(r.r14, l.imm(0)) retry_label = self.mc.currpos() - self.mc.ldarx(r.r10.value, 0, RFASTGILPTR.value) # load the lock value - self.mc.stdcxx(r.r9.value, 0, RFASTGILPTR.value) # try to claim lock - self.mc.bc(6, 2, retry_label - self.mc.currpos()) # retry if failed - self.mc.isync() + # compare and swap, only succeeds if the the contents of the + # lock is equal to r14 (= 0) + self.mc.CSG(r.r14, r.r11, l.addr(RFASTGILPTR)) # try to claim lock + self.mc.BRC(c.EQ, l.imm(retry_label - self.mc.currpos())) # retry if failed + #self.mc.sync() self.mc.cmpdi(0, r.r10.value, 0) b1_location = self.mc.currpos() @@ -244,7 +252,6 @@ def write_real_errno(self, save_err): - xxx if save_err & rffi.RFFI_READSAVED_ERRNO: # Just before a call, read '*_errno' and write it into the # real 'errno'. A lot of registers are free here, notably @@ -254,19 +261,19 @@ else: rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu) p_errno = llerrno.get_p_errno_offset(self.asm.cpu) - self.mc.ld(r.r11.value, r.SP.value, - THREADLOCAL_ADDR_OFFSET + self.subtracted_to_sp) - self.mc.lwz(r.r0.value, r.r11.value, rpy_errno) - self.mc.ld(r.r11.value, r.r11.value, p_errno) - self.mc.stw(r.r0.value, r.r11.value, 0) + self.mc.LG(r.r11, + l.addr(THREADLOCAL_ADDR_OFFSET + self.subtracted_to_sp, r.SP)) + self.mc.LGH(r.SCRATCH2, l.addr(rpy_errno, r.r11)) + self.mc.LG(r.r11, l.addr(p_errno, r.r11)) + self.mc.STHY(r.SCRATCH2, l.addr(0,r.r11)) elif save_err & rffi.RFFI_ZERO_ERRNO_BEFORE: # Same, but write zero. p_errno = llerrno.get_p_errno_offset(self.asm.cpu) - self.mc.ld(r.r11.value, r.SP.value, - THREADLOCAL_ADDR_OFFSET + self.subtracted_to_sp) - self.mc.ld(r.r11.value, r.r11.value, p_errno) - self.mc.li(r.r0.value, 0) - self.mc.stw(r.r0.value, r.r11.value, 0) + self.mc.LG(r.r11, + l.addr(THREADLOCAL_ADDR_OFFSET + self.subtracted_to_sp, r.SP)) + self.mc.LG(r.r11, l.addr(p_errno, r.r11)) + self.mc.LGHI(r.SCRATCH, 0) + self.mc.STHY(r.SCRATCH, l.addr(0,r.r11)) def read_real_errno(self, save_err): if save_err & rffi.RFFI_SAVE_ERRNO: diff --git a/rpython/jit/backend/zarch/codebuilder.py b/rpython/jit/backend/zarch/codebuilder.py --- a/rpython/jit/backend/zarch/codebuilder.py +++ b/rpython/jit/backend/zarch/codebuilder.py @@ -174,6 +174,10 @@ self.LGFI(dest_reg, l.imm(word & 0xFFFFffff)) self.IIHF(dest_reg, l.imm((word >> 32) & 0xFFFFffff)) + def sync(self): + # see sync. section of the zarch manual! + self.BCR_rr(0xf,0) + def raw_call(self, call_reg=r.RETURN): """Emit a call to the address stored in the register 'call_reg', which must be either RAW_CALL_REG or r12. This is a regular C diff --git a/rpython/jit/backend/zarch/instructions.py b/rpython/jit/backend/zarch/instructions.py --- a/rpython/jit/backend/zarch/instructions.py +++ b/rpython/jit/backend/zarch/instructions.py @@ -41,7 +41,9 @@ # rotating # rotate, then insert selected bits - 'RISBGN': ('rie_f', ['\xEC','\x59']), + # on the VM the miscellaneous-instruction-extensions + # does not seem to be installed, sad but true... + # 'RISBGN': ('rie_f', ['\xEC','\x59']), # invert & negative & absolute 'LPGR': ('rre', ['\xB9','\x00']), @@ -107,6 +109,9 @@ 'XI': ('si', ['\x97']), 'XIY': ('siy', ['\xEB','\x57']), + 'XILF': ('ril', ['\xC0','\x06']), + 'XIHF': ('ril', ['\xC0','\x07']), + # OR immediate 'OIHH': ('ri_u', ['\xA5', '\x08']), 'OIHL': ('ri_u', ['\xA5', '\x09']), diff --git a/rpython/jit/backend/zarch/opassembler.py b/rpython/jit/backend/zarch/opassembler.py --- a/rpython/jit/backend/zarch/opassembler.py +++ b/rpython/jit/backend/zarch/opassembler.py @@ -230,8 +230,8 @@ if is_call_release_gil: saveerrloc = arglocs[1] - assert saveerrloc.is_in_pool() - cb.emit_call_release_gil(saveerrloc) + assert saveerrloc.is_imm() + cb.emit_call_release_gil(saveerrloc.value) else: cb.emit() @@ -490,12 +490,15 @@ # compute in r2 the index of the bit inside the byte: # (index >> card_page_shift) & 7 # 0x80 sets zero flag. will store 0 into all selected bits - mc.RISBGN(r.SCRATCH2, loc_index, l.imm(3), l.imm(0x80 | 63), l.imm(61)) + # cannot be used on the VM + # mc.RISBGN(r.SCRATCH, loc_index, l.imm(3), l.imm(0x80 | 63), l.imm(61)) + mc.SLAG(r.SCRATCH, loc_index, l.addr(3)) + mc.NILL(r.SCRATCH, l.imm(0xff)) #mc.rldicl(r.SCRATCH2.value, loc_index.value, 64 - n, 61) # set r2 to 1 << r2 - mc.LGHI(r.SCRATCH, l.imm(1)) - mc.SLAG(r.SCRATCH2, r.SCRATCH, l.addr(0,r.SCRATCH2)) + mc.LGHI(r.SCRATCH2, l.imm(1)) + mc.SLAG(r.SCRATCH, r.SCRATCH2, l.addr(0,r.SCRATCH)) # set this bit inside the byte of interest addr = l.addr(0, loc_base, tmp_loc) diff --git a/rpython/jit/backend/zarch/pool.py b/rpython/jit/backend/zarch/pool.py --- a/rpython/jit/backend/zarch/pool.py +++ b/rpython/jit/backend/zarch/pool.py @@ -65,6 +65,12 @@ rop.GC_LOAD_INDEXED_R, rop.GC_LOAD_INDEXED_I,): return + elif op.is_call_release_gil(): + for arg in op.getarglist()[1:]: + if arg.is_constant(): + self.offset_map[arg] = self.size + self.reserve_literal(8) + return for arg in op.getarglist(): if arg.is_constant(): self.offset_map[arg] = self.size diff --git a/rpython/jit/backend/zarch/regalloc.py b/rpython/jit/backend/zarch/regalloc.py --- a/rpython/jit/backend/zarch/regalloc.py +++ b/rpython/jit/backend/zarch/regalloc.py @@ -859,14 +859,27 @@ prepare_call_may_force_f = _prepare_call_may_force prepare_call_may_force_n = _prepare_call_may_force + def _prepare_call_release_gil(self, op): + save_all_regs = False + errno_box = op.getarg(0) + assert isinstance(errno_box, ConstInt) + args = [None, l.imm(errno_box.value)] + for i in range(1,op.numargs()): + args.append(self.loc(op.getarg(i))) + self._spill_before_call(save_all_regs) + if op.type != VOID: + resloc = self.after_call(op) + args[0] = resloc + return args + + prepare_call_release_gil_i = _prepare_call_release_gil + prepare_call_release_gil_f = _prepare_call_release_gil + prepare_call_release_gil_n = _prepare_call_release_gil + def prepare_force_token(self, op): res_loc = self.force_allocate_reg(op) return [res_loc] - prepare_call_release_gil_i = _prepare_call_may_force - prepare_call_release_gil_f = _prepare_call_may_force - prepare_call_release_gil_n = _prepare_call_may_force - def _prepare_call_assembler(self, op): locs = self.locs_for_call_assembler(op) self._spill_before_call(save_all_regs=True) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit