Author: Richard Plangger <planri...@gmail.com> Branch: s390x-backend Changeset: r81670:6c8cea4f1f86 Date: 2016-01-11 13:52 +0100 http://bitbucket.org/pypy/pypy/changeset/6c8cea4f1f86/
Log: push pop from jitframe optimization (using store/load multiple instruction on s390x) + test diff --git a/rpython/jit/backend/zarch/assembler.py b/rpython/jit/backend/zarch/assembler.py --- a/rpython/jit/backend/zarch/assembler.py +++ b/rpython/jit/backend/zarch/assembler.py @@ -512,7 +512,7 @@ # registers). mc = InstrBuilder() # - # mc.STG(r.r14, l.addr(14*WORD, r.SP)) + mc._push_core_regs_to_jitframe([r.r14]) # store the link on the jit frame # Do the call mc.push_std_frame() mc.LGR(r.r2, r.SP) @@ -527,6 +527,7 @@ # else we have an exception mc.cmp_op(r.SCRATCH, l.imm(0), imm=True) # + mc._pop_core_regs_from_jitframe([r.r14]) # restore the link on the jit frame # So we return to our caller, conditionally if "EQ" # mc.LG(r.r14, l.addr(14*WORD, r.SP)) mc.BCR(c.EQ, r.r14) @@ -551,16 +552,14 @@ endaddr, lengthaddr, _ = self.cpu.insert_stack_check() diff = lengthaddr - endaddr assert check_imm_value(diff) - xxx mc = self.mc - mc.load_imm(r.SCRATCH, self.stack_check_slowpath) - mc.load_imm(r.SCRATCH2, endaddr) # li r2, endaddr - mc.mtctr(r.SCRATCH.value) - mc.load(r.SCRATCH.value, r.SCRATCH2.value, 0) # ld r0, [end] - mc.load(r.SCRATCH2.value, r.SCRATCH2.value, diff)# ld r2, [length] - mc.subf(r.SCRATCH.value, r.SP.value, r.SCRATCH.value) # sub r0, SP - mc.cmp_op(0, r.SCRATCH.value, r.SCRATCH2.value, signed=False) + mc.load_imm(r.SCRATCH2, endaddr) # li r0, endaddr + mc.branch_absolute(self.stack_check_slowpath) + mc.load(r.SCRATCH, r.SCRATCH2, 0) # lg r1, [end] + mc.load(r.SCRATCH2, r.SCRATCH2, diff)# lg r0, [length] + mc.SGR(r.SCRATCH, r.SP) # sub r1, SP + mc.cmp_op(r.SCRATCH, r.SCRATCH2, signed=False) mc.bgtctrl() def _check_frame_depth(self, mc, gcmap): @@ -1057,21 +1056,71 @@ mc.MOVSD_bx((ofs + i * coeff) * WORD + base_ofs, i) def _push_core_regs_to_jitframe(self, mc, includes=r.registers): + self._multiple_to_or_from_jitframe(mc, includes, store=True) + + @specialize.arg(3) + def _multiple_to_or_from_jitframe(self, mc, includes, store): if len(includes) == 0: return base_ofs = self.cpu.get_baseofs_of_frame_field() - base = includes[0].value + if len(includes) == 1: + iv = includes[0] + addr = l.addr(base_ofs + iv.value * WORD, r.SPP) + if store: + mc.STG(iv, addr) + else: + mc.LG(iv, addr) + return + val = includes[0].value - for register in includes: - if register.value != val: - break - val += 1 + # includes[i => j] + # for each continous sequence in the registers are stored + # with STMG instead of STG, in the best case this only leads + # to 1 instruction to store r.ri -> r.rj (if it is continuous) + i = 0 + j = 1 + for register in includes[1:]: + if i >= j: + j += 1 + continue + regval = register.value + if regval != (val+1): + iv = includes[i] + diff = (val - iv.value) + addr = l.addr(base_ofs + iv.value * WORD, r.SPP) + if diff > 0: + if store: + mc.STMG(iv, includes[i+diff], addr) + else: + mc.LMG(iv, includes[i+diff], addr) + i = j + else: + if store: + mc.STG(iv, addr) + else: + mc.LG(iv, addr) + i = j + val = regval + j += 1 + if i >= len(includes): + # all have been stored + return + diff = (val - includes[i].value) + iv = includes[i] + addr = l.addr(base_ofs + iv.value * WORD, r.SPP) + if diff > 0: + if store: + mc.STMG(iv, includes[-1], addr) + else: + mc.LMG(iv, includes[-1], addr) else: - mc.STMG(includes[0], includes[-1], l.addr(base_ofs + base * WORD, r.SPP)) - return - # unordered! - for register in includes: - mc.STG(register, l.addr(base_ofs + register.value * WORD, r.SPP)) + if store: + mc.STG(iv, addr) + else: + mc.LG(iv, addr) + + def _pop_core_regs_from_jitframe(self, mc, includes=r.MANAGED_REGS): + self._multiple_to_or_from_jitframe(mc, includes, store=False) def _push_fp_regs_to_jitframe(self, mc, includes=r.fpregisters): if len(includes) == 0: @@ -1081,11 +1130,6 @@ for i,reg in enumerate(includes): mc.STDY(reg, l.addr(base_ofs + (v+i) * WORD, r.SPP)) - def _pop_core_regs_from_jitframe(self, mc, includes=r.MANAGED_REGS): - base_ofs = self.cpu.get_baseofs_of_frame_field() - for reg in includes: - mc.LG(reg, l.addr(base_ofs + reg.value * WORD, r.SPP)) - def _pop_fp_regs_from_jitframe(self, mc, includes=r.MANAGED_FP_REGS): base_ofs = self.cpu.get_baseofs_of_frame_field() v = 16 diff --git a/rpython/jit/backend/zarch/opassembler.py b/rpython/jit/backend/zarch/opassembler.py --- a/rpython/jit/backend/zarch/opassembler.py +++ b/rpython/jit/backend/zarch/opassembler.py @@ -673,7 +673,6 @@ self._emit_guard(op, arglocs[1:]) def emit_guard_subclass(self, op, arglocs, regalloc): - xxx assert self.cpu.supports_guard_gc_type loc_object = arglocs[0] loc_check_against_class = arglocs[1] @@ -681,10 +680,10 @@ offset2 = self.cpu.subclassrange_min_offset if offset is not None: # read this field to get the vtable pointer - self.mc(r.SCRATCH2, l.addr(offset, loc_object)) + self.mc.LG(r.SCRATCH2, l.addr(offset, loc_object)) # read the vtable's subclassrange_min field assert check_imm(offset2) - self.mc.ld(r.SCRATCH2.value, r.SCRATCH2.value, offset2) + self.mc.LG(r.SCRATCH2.value, r.SCRATCH2.value, offset2) else: # read the typeid self._read_typeid(r.SCRATCH, loc_object) diff --git a/rpython/jit/backend/zarch/pool.py b/rpython/jit/backend/zarch/pool.py --- a/rpython/jit/backend/zarch/pool.py +++ b/rpython/jit/backend/zarch/pool.py @@ -2,6 +2,7 @@ from rpython.jit.backend.zarch import locations as l from rpython.jit.metainterp.history import (INT, REF, FLOAT, TargetToken) +from rpython.rlib.objectmodel import we_are_translated from rpython.jit.metainterp.resoperation import rop from rpython.rtyper.lltypesystem import lltype, rffi, llmemory from rpython.jit.backend.zarch.arch import (WORD, @@ -36,7 +37,6 @@ self.reserve_literal(8) elif op.getopnum() == rop.LABEL: descr = op.getdescr() - descr._ll_loop_pool = self.pool_start if descr not in asm.target_tokens_currently_compiling: # this is a 'long' jump instead of a relative jump self.offset_map[descr] = self.size @@ -121,12 +121,10 @@ self.pool_start = asm.mc.get_relative_pos() for op in operations: self.ensure_can_hold_constants(asm, op) - if self.size == 0 and written != 0: + if self.size == 0: # no pool needed! return - assert self.size % 2 == 0 - #if self.size % 2 == 1: - # self.size += 1 + assert self.size % 2 == 0, "not aligned properly" asm.mc.write('\x00' * self.size) written = 0 if self.constant_64_ones != -1: @@ -146,7 +144,8 @@ self.constant_max_64_positive = self.size written += 8 self.size += written - print "pool with %d quad words" % (self.size // 8) + if not we_are_translated(): + print "pool with %d quad words" % (self.size // 8) def overwrite_64(self, mc, index, value): index += self.pool_start @@ -165,7 +164,8 @@ if self.size == 0: return for val, offset in self.offset_map.items(): - print val, offset + if not we_are_translated(): + print('pool: %s at offset: %d' % (val, offset)) if val.is_constant(): if val.type == FLOAT: self.overwrite_64(mc, offset, float2longlong(val.value)) diff --git a/rpython/jit/backend/zarch/test/test_assembler.py b/rpython/jit/backend/zarch/test/test_assembler.py --- a/rpython/jit/backend/zarch/test/test_assembler.py +++ b/rpython/jit/backend/zarch/test/test_assembler.py @@ -370,3 +370,72 @@ self.mc.LGHI(reg.r2, loc.imm(1)) self.a.jmpto(reg.r14) assert run_asm(self.a) == 1 + + def pushpop_jitframe(self, registers): + self.a._push_core_regs_to_jitframe(self.mc, registers) + self.a._pop_core_regs_from_jitframe(self.mc, registers) + + def test_pushpop_jitframe_multiple_optimization(self): + stored = [] + loaded = [] + def STMG(start, end, addr): + stored.append((start, end)) + def STG(reg, addr): + stored.append((reg,)) + def LMG(start, end, addr): + loaded.append((start, end)) + def LG(reg, addr): + loaded.append((reg,)) + self.mc.STMG = STMG + self.mc.STG = STG + self.mc.LMG = LMG + self.mc.LG = LG + + r = reg + + # two sequences 10-11, 13-14 + self.pushpop_jitframe([r.r10, r.r11, r.r13, r.r14]) + assert stored == [(r.r10, r.r11), (r.r13, r.r14)] + assert stored == loaded + stored = [] + loaded = [] + + # one sequence and on single + self.pushpop_jitframe([r.r0, r.r1, r.r3]) + assert stored == [(r.r0, r.r1), (r.r3,)] + assert stored == loaded + stored = [] + loaded = [] + + # single items + self.pushpop_jitframe(r.registers[::2]) + assert stored == [(x,) for x in r.registers[::2]] + assert stored == loaded + stored = [] + loaded = [] + + # large sequence 0-5 and one hole between + self.pushpop_jitframe([r.r0, r.r1, r.r2, r.r3, + r.r4, r.r5, r.r12, r.r13]) + assert stored == [(r.r0, r.r5), (r.r12, r.r13)] + assert stored == loaded + stored = [] + loaded = [] + + # ensure there is just on instruction for the 'best case' + self.pushpop_jitframe(r.registers) + assert stored == [(r.r0, r.r15)] + assert stored == loaded + stored = [] + loaded = [] + + # just one single + for r in [r.r14, r.r0, r.r1, r.r15]: + self.pushpop_jitframe([r]) + assert stored == [(r,)] + assert stored == loaded + stored = [] + loaded = [] + + + _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit