Author: Richard Plangger <[email protected]>
Branch: s390x-backend
Changeset: r81410:4d4c6bd91480
Date: 2015-12-21 11:12 +0100
http://bitbucket.org/pypy/pypy/changeset/4d4c6bd91480/
Log: implemented release gil half way, lock release and reacquire solved
(the former uses a serialization point to make the store visible to
other cpus, the latter uses compare and swap to set 1 to the lock)
diff --git a/rpython/jit/backend/zarch/assembler.py
b/rpython/jit/backend/zarch/assembler.py
--- a/rpython/jit/backend/zarch/assembler.py
+++ b/rpython/jit/backend/zarch/assembler.py
@@ -171,6 +171,9 @@
# save the information
mc.STG(r.r14, l.addr(14*WORD, r.SP)) # save the link
+ RCS2 = r.r10
+ RCS3 = r.r12
+
LOCAL_VARS_OFFSET = 0
extra_stack_size = LOCAL_VARS_OFFSET + 4 * WORD + 8
extra_stack_size = (extra_stack_size + 15) & ~15
@@ -183,29 +186,24 @@
# need to save many registers: the registers that are anyway
# destroyed by the call can be ignored (VOLATILES), and the
# non-volatile registers won't be changed here. It only needs
- # to save r.RCS1 (used below), r1 and f0 (possible results of
- # the call), and two more non-volatile registers (used to store
+ # to save r2 and f0 (possible results of the call),
+ # and two more non-volatile registers (used to store
# the RPython exception that occurred in the CALL, if any).
#
# We need to increase our stack frame size a bit to store them.
#
- self.mc.TRAP2()
- #self.mc.LGR(r.SCRATCH, l.addr(0,r.SP)) # SP back chain
- #self.mc.STG(r.SCRATCH, l.addr(-extra_stack_size, r.SP.value))
- #self.mc.STG(r.RCS1.value, r.SP.value, LOCAL_VARS_OFFSET + 0 *
WORD)
- #self.mc.STG(r.RCS2.value, r.SP.value, LOCAL_VARS_OFFSET + 1 *
WORD)
- #self.mc.STG(r.RCS3.value, r.SP.value, LOCAL_VARS_OFFSET + 2 *
WORD)
- #self.mc.STG(r.r2.value, r.SP.value, LOCAL_VARS_OFFSET + 3 *
WORD)
- #self.mc.STD(r.f1.value, r.SP.value, LOCAL_VARS_OFFSET + 4 *
WORD)
+ self._push_all_regs_to_frame(mc, withfloats, callee_only=True)
+ mc.STMG(r.r10, r.r12, l.addr(10*WORD, r.SP))
+ mc.STG(r.r2, l.addr(2*WORD, r.SP))
+ mc.STD(r.f0, l.addr(3*WORD, r.SP)) # slot of r3 is not used here
saved_regs = None
saved_fp_regs = None
-
else:
# push all volatile registers, push RCS1, and sometimes push RCS2
if withcards:
- saved_regs = r.VOLATILES # + [r.RCS1, r.RCS2]
+ saved_regs = r.VOLATILES + [RCS2]
else:
- saved_regs = r.VOLATILES # + [r.RCS1]
+ saved_regs = r.VOLATILES
if withfloats:
saved_fp_regs = r.MANAGED_FP_REGS
else:
@@ -221,16 +219,10 @@
# of _reload_frame_if_necessary)
# This trashes r0 and r2, which is fine in this case
assert argument_loc is not r.r0
- xxx
- #self._store_and_reset_exception(mc, r.RCS2, r.RCS3)
+ self._store_and_reset_exception(mc, RCS2, RCS3)
if withcards:
- xxx
- #kmc.mr(r.RCS2.value, argument_loc.value)
- #
- # Save the lr into r.RCS1
- #mc.mflr(r.RCS1.value)
- #
+ mc.LGR(RCS2, argument_loc)
func = rffi.cast(lltype.Signed, func)
# Note: if not 'for_frame', argument_loc is r0, which must carefully
# not be overwritten above
@@ -242,32 +234,25 @@
mc.AGHI(r.SP, l.imm(STD_FRAME_SIZE_IN_BYTES))
if for_frame:
- xxx
- self._restore_exception(mc, r.RCS2, r.RCS3)
+ self._restore_exception(mc, RCS2, RCS3)
if withcards:
# A final andix before the blr, for the caller. Careful to
# not follow this instruction with another one that changes
# the status of cr0!
card_marking_mask = descr.jit_wb_cards_set_singlebyte
- mc.trap()
- #mc.lbz(r.RCS2.value, r.RCS2.value, descr.jit_wb_if_flag_byteofs)
- #mc.andix(r.RCS2.value, r.RCS2.value, card_marking_mask & 0xFF)
+ mc.LLGC(RCS2, l.addr(descr.jit_wb_if_flag_byteofs, RCS2))
+ mc.NILL(RCS2, l.imm(card_marking_mask & 0xFF))
if for_frame:
- self.mc.trap()
- #self.mc.ld(r.RCS1.value, r.SP.value, LOCAL_VARS_OFFSET + 0 * WORD)
- #self.mc.ld(r.RCS2.value, r.SP.value, LOCAL_VARS_OFFSET + 1 * WORD)
- #self.mc.ld(r.RCS3.value, r.SP.value, LOCAL_VARS_OFFSET + 2 * WORD)
- #self.mc.ld(r.r3.value, r.SP.value, LOCAL_VARS_OFFSET + 3 * WORD)
- #self.mc.lfd(r.f1.value, r.SP.value, LOCAL_VARS_OFFSET + 4 * WORD)
- #self.mc.addi(r.SP.value, r.SP.value, extra_stack_size)
-
+ mc.LMG(r.r10, r.r12, l.addr(10*WORD, r.SP))
+ mc.LG(r.r2, l.addr(2*WORD, r.SP))
+ mc.LD(r.f0, l.addr(3*WORD, r.SP)) # slot of r3 is not used here
else:
self._pop_core_regs_from_jitframe(mc, saved_regs)
self._pop_fp_regs_from_jitframe(mc, saved_fp_regs)
- mc.LG(r.r14, l.addr(14*WORD, r.SP)) # restore the link
+ mc.LG(r.RETURN, l.addr(14*WORD, r.SP)) # restore the link
mc.BCR(c.ANY, r.RETURN)
self.mc = old_mc
@@ -897,6 +882,37 @@
self.mc.LMG(r.r6, r.r15, l.addr(6*WORD, r.SP))
self.jmpto(r.r14)
+ def _push_all_regs_to_stack(self, mc, withfloats, callee_only=False):
+ base_ofs = 2*WORD
+ if callee_only:
+ regs = ZARCHRegisterManager.save_around_call_regs
+ else:
+ regs = r.registers[2:]
+ mc.STMG(regs[0], regs[1], l.addr(base_ofs, r.SP))
+ if withfloats:
+ xxx
+
+ def _push_all_regs_to_frame(self, mc, ignored_regs, withfloats,
callee_only=False):
+ # Push all general purpose registers
+ base_ofs = self.cpu.get_baseofs_of_frame_field()
+ if callee_only:
+ regs = gpr_reg_mgr_cls.save_around_call_regs
+ else:
+ regs = gpr_reg_mgr_cls.all_regs
+ for gpr in regs:
+ if gpr not in ignored_regs:
+ v = gpr_reg_mgr_cls.all_reg_indexes[gpr.value]
+ mc.MOV_br(v * WORD + base_ofs, gpr.value)
+ if withfloats:
+ if IS_X86_64:
+ coeff = 1
+ else:
+ coeff = 2
+ # Push all XMM regs
+ ofs = len(gpr_reg_mgr_cls.all_regs)
+ for i in range(len(xmm_reg_mgr_cls.all_regs)):
+ mc.MOVSD_bx((ofs + i * coeff) * WORD + base_ofs, i)
+
def _push_core_regs_to_jitframe(self, mc, includes=r.registers):
if len(includes) == 0:
return
diff --git a/rpython/jit/backend/zarch/callbuilder.py
b/rpython/jit/backend/zarch/callbuilder.py
--- a/rpython/jit/backend/zarch/callbuilder.py
+++ b/rpython/jit/backend/zarch/callbuilder.py
@@ -14,8 +14,9 @@
GPR_ARGS = [r.r2, r.r3, r.r4, r.r5, r.r6]
FPR_ARGS = [r.f0, r.f2, r.f4, r.f6]
- #RFASTGILPTR = r.RCS2
- #RSHADOWOLD = r.RCS3
+ RSHADOWOLD = r.r9
+ RSHADOWPTR = r.r10
+ RFASTGILPTR = r.r12
def __init__(self, assembler, fnloc, arglocs, resloc):
AbstractCallBuilder.__init__(self, assembler, fnloc, arglocs,
@@ -148,6 +149,12 @@
def call_releasegil_addr_and_move_real_arguments(self, fastgil):
assert self.is_call_release_gil
+ RSHADOWOLD = self.RSHADOWOLD
+ RSHADOWPTR = self.RSHADOWPTR
+ RFASTGILPTR = self.RFASTGILPTR
+ #
+ # assumes RSHADOWOLD to be r9, stores all up to r15
+ self.mc.STMG(RSHADOWOLD, r.r15, l.addr(9 * WORD, r.SP))
#
# Save this thread's shadowstack pointer into r29, for later comparison
gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
@@ -155,13 +162,13 @@
if gcrootmap.is_shadow_stack:
rst = gcrootmap.get_root_stack_top_addr()
self.mc.load_imm(RSHADOWPTR, rst)
- self.mc.load(RSHADOWOLD.value, RSHADOWPTR.value, 0)
+ self.mc.LGR(RSHADOWOLD, RSHADOWPTR)
#
# change 'rpy_fastgil' to 0 (it should be non-zero right now)
self.mc.load_imm(RFASTGILPTR, fastgil)
- self.mc.li(r.r0.value, 0)
- self.mc.lwsync()
- self.mc.std(r.r0.value, RFASTGILPTR.value, 0)
+ self.mc.LGHI(r.SCRATCH, l.imm(0))
+ self.mc.STG(r.SCRATCH, l.addr(0, RFASTGILPTR))
+ self.mc.sync() # renders the store visible to other cpus
#
if not we_are_translated(): # for testing: we should not access
self.mc.AGHI(r.SPP, l.imm(1)) # r31 any more
@@ -169,21 +176,22 @@
def move_real_result_and_call_reacqgil_addr(self, fastgil):
from rpython.jit.backend.zarch.codebuilder import InstrBuilder
- xxx
# try to reacquire the lock. The following registers are still
# valid from before the call:
- RSHADOWPTR = self.RSHADOWPTR # r30: &root_stack_top
- RFASTGILPTR = self.RFASTGILPTR # r29: &fastgil
- RSHADOWOLD = self.RSHADOWOLD # r28: previous val of root_stack_top
+ RSHADOWPTR = self.RSHADOWPTR # r9: &root_stack_top
+ RFASTGILPTR = self.RFASTGILPTR # r10: &fastgil
+ RSHADOWOLD = self.RSHADOWOLD # r12: previous val of root_stack_top
- # Equivalent of 'r10 = __sync_lock_test_and_set(&rpy_fastgil, 1);'
- self.mc.li(r.r9.value, 1)
+ # Equivalent of 'r14 = __sync_lock_test_and_set(&rpy_fastgil, 1);'
+ self.mc.LGHI(r.r11, l.imm(1))
+ self.mc.LGHI(r.r14, l.imm(0))
retry_label = self.mc.currpos()
- self.mc.ldarx(r.r10.value, 0, RFASTGILPTR.value) # load the lock value
- self.mc.stdcxx(r.r9.value, 0, RFASTGILPTR.value) # try to claim lock
- self.mc.bc(6, 2, retry_label - self.mc.currpos()) # retry if failed
- self.mc.isync()
+ # compare and swap, only succeeds if the the contents of the
+ # lock is equal to r14 (= 0)
+ self.mc.CSG(r.r14, r.r11, l.addr(RFASTGILPTR)) # try to claim lock
+ self.mc.BRC(c.EQ, l.imm(retry_label - self.mc.currpos())) # retry if
failed
+ #self.mc.sync()
self.mc.cmpdi(0, r.r10.value, 0)
b1_location = self.mc.currpos()
@@ -244,7 +252,6 @@
def write_real_errno(self, save_err):
- xxx
if save_err & rffi.RFFI_READSAVED_ERRNO:
# Just before a call, read '*_errno' and write it into the
# real 'errno'. A lot of registers are free here, notably
@@ -254,19 +261,19 @@
else:
rpy_errno = llerrno.get_rpy_errno_offset(self.asm.cpu)
p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
- self.mc.ld(r.r11.value, r.SP.value,
- THREADLOCAL_ADDR_OFFSET + self.subtracted_to_sp)
- self.mc.lwz(r.r0.value, r.r11.value, rpy_errno)
- self.mc.ld(r.r11.value, r.r11.value, p_errno)
- self.mc.stw(r.r0.value, r.r11.value, 0)
+ self.mc.LG(r.r11,
+ l.addr(THREADLOCAL_ADDR_OFFSET + self.subtracted_to_sp,
r.SP))
+ self.mc.LGH(r.SCRATCH2, l.addr(rpy_errno, r.r11))
+ self.mc.LG(r.r11, l.addr(p_errno, r.r11))
+ self.mc.STHY(r.SCRATCH2, l.addr(0,r.r11))
elif save_err & rffi.RFFI_ZERO_ERRNO_BEFORE:
# Same, but write zero.
p_errno = llerrno.get_p_errno_offset(self.asm.cpu)
- self.mc.ld(r.r11.value, r.SP.value,
- THREADLOCAL_ADDR_OFFSET + self.subtracted_to_sp)
- self.mc.ld(r.r11.value, r.r11.value, p_errno)
- self.mc.li(r.r0.value, 0)
- self.mc.stw(r.r0.value, r.r11.value, 0)
+ self.mc.LG(r.r11,
+ l.addr(THREADLOCAL_ADDR_OFFSET + self.subtracted_to_sp,
r.SP))
+ self.mc.LG(r.r11, l.addr(p_errno, r.r11))
+ self.mc.LGHI(r.SCRATCH, 0)
+ self.mc.STHY(r.SCRATCH, l.addr(0,r.r11))
def read_real_errno(self, save_err):
if save_err & rffi.RFFI_SAVE_ERRNO:
diff --git a/rpython/jit/backend/zarch/codebuilder.py
b/rpython/jit/backend/zarch/codebuilder.py
--- a/rpython/jit/backend/zarch/codebuilder.py
+++ b/rpython/jit/backend/zarch/codebuilder.py
@@ -174,6 +174,10 @@
self.LGFI(dest_reg, l.imm(word & 0xFFFFffff))
self.IIHF(dest_reg, l.imm((word >> 32) & 0xFFFFffff))
+ def sync(self):
+ # see sync. section of the zarch manual!
+ self.BCR_rr(0xf,0)
+
def raw_call(self, call_reg=r.RETURN):
"""Emit a call to the address stored in the register 'call_reg',
which must be either RAW_CALL_REG or r12. This is a regular C
diff --git a/rpython/jit/backend/zarch/instructions.py
b/rpython/jit/backend/zarch/instructions.py
--- a/rpython/jit/backend/zarch/instructions.py
+++ b/rpython/jit/backend/zarch/instructions.py
@@ -41,7 +41,9 @@
# rotating
# rotate, then insert selected bits
- 'RISBGN': ('rie_f', ['\xEC','\x59']),
+ # on the VM the miscellaneous-instruction-extensions
+ # does not seem to be installed, sad but true...
+ # 'RISBGN': ('rie_f', ['\xEC','\x59']),
# invert & negative & absolute
'LPGR': ('rre', ['\xB9','\x00']),
@@ -107,6 +109,9 @@
'XI': ('si', ['\x97']),
'XIY': ('siy', ['\xEB','\x57']),
+ 'XILF': ('ril', ['\xC0','\x06']),
+ 'XIHF': ('ril', ['\xC0','\x07']),
+
# OR immediate
'OIHH': ('ri_u', ['\xA5', '\x08']),
'OIHL': ('ri_u', ['\xA5', '\x09']),
diff --git a/rpython/jit/backend/zarch/opassembler.py
b/rpython/jit/backend/zarch/opassembler.py
--- a/rpython/jit/backend/zarch/opassembler.py
+++ b/rpython/jit/backend/zarch/opassembler.py
@@ -230,8 +230,8 @@
if is_call_release_gil:
saveerrloc = arglocs[1]
- assert saveerrloc.is_in_pool()
- cb.emit_call_release_gil(saveerrloc)
+ assert saveerrloc.is_imm()
+ cb.emit_call_release_gil(saveerrloc.value)
else:
cb.emit()
@@ -490,12 +490,15 @@
# compute in r2 the index of the bit inside the byte:
# (index >> card_page_shift) & 7
# 0x80 sets zero flag. will store 0 into all selected bits
- mc.RISBGN(r.SCRATCH2, loc_index, l.imm(3), l.imm(0x80 | 63),
l.imm(61))
+ # cannot be used on the VM
+ # mc.RISBGN(r.SCRATCH, loc_index, l.imm(3), l.imm(0x80 | 63),
l.imm(61))
+ mc.SLAG(r.SCRATCH, loc_index, l.addr(3))
+ mc.NILL(r.SCRATCH, l.imm(0xff))
#mc.rldicl(r.SCRATCH2.value, loc_index.value, 64 - n, 61)
# set r2 to 1 << r2
- mc.LGHI(r.SCRATCH, l.imm(1))
- mc.SLAG(r.SCRATCH2, r.SCRATCH, l.addr(0,r.SCRATCH2))
+ mc.LGHI(r.SCRATCH2, l.imm(1))
+ mc.SLAG(r.SCRATCH, r.SCRATCH2, l.addr(0,r.SCRATCH))
# set this bit inside the byte of interest
addr = l.addr(0, loc_base, tmp_loc)
diff --git a/rpython/jit/backend/zarch/pool.py
b/rpython/jit/backend/zarch/pool.py
--- a/rpython/jit/backend/zarch/pool.py
+++ b/rpython/jit/backend/zarch/pool.py
@@ -65,6 +65,12 @@
rop.GC_LOAD_INDEXED_R,
rop.GC_LOAD_INDEXED_I,):
return
+ elif op.is_call_release_gil():
+ for arg in op.getarglist()[1:]:
+ if arg.is_constant():
+ self.offset_map[arg] = self.size
+ self.reserve_literal(8)
+ return
for arg in op.getarglist():
if arg.is_constant():
self.offset_map[arg] = self.size
diff --git a/rpython/jit/backend/zarch/regalloc.py
b/rpython/jit/backend/zarch/regalloc.py
--- a/rpython/jit/backend/zarch/regalloc.py
+++ b/rpython/jit/backend/zarch/regalloc.py
@@ -859,14 +859,27 @@
prepare_call_may_force_f = _prepare_call_may_force
prepare_call_may_force_n = _prepare_call_may_force
+ def _prepare_call_release_gil(self, op):
+ save_all_regs = False
+ errno_box = op.getarg(0)
+ assert isinstance(errno_box, ConstInt)
+ args = [None, l.imm(errno_box.value)]
+ for i in range(1,op.numargs()):
+ args.append(self.loc(op.getarg(i)))
+ self._spill_before_call(save_all_regs)
+ if op.type != VOID:
+ resloc = self.after_call(op)
+ args[0] = resloc
+ return args
+
+ prepare_call_release_gil_i = _prepare_call_release_gil
+ prepare_call_release_gil_f = _prepare_call_release_gil
+ prepare_call_release_gil_n = _prepare_call_release_gil
+
def prepare_force_token(self, op):
res_loc = self.force_allocate_reg(op)
return [res_loc]
- prepare_call_release_gil_i = _prepare_call_may_force
- prepare_call_release_gil_f = _prepare_call_may_force
- prepare_call_release_gil_n = _prepare_call_may_force
-
def _prepare_call_assembler(self, op):
locs = self.locs_for_call_assembler(op)
self._spill_before_call(save_all_regs=True)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit