Author: Richard Plangger <[email protected]>
Branch: s390x-backend
Changeset: r81670:6c8cea4f1f86
Date: 2016-01-11 13:52 +0100
http://bitbucket.org/pypy/pypy/changeset/6c8cea4f1f86/
Log: push pop from jitframe optimization (using store/load multiple
instruction on s390x) + test
diff --git a/rpython/jit/backend/zarch/assembler.py
b/rpython/jit/backend/zarch/assembler.py
--- a/rpython/jit/backend/zarch/assembler.py
+++ b/rpython/jit/backend/zarch/assembler.py
@@ -512,7 +512,7 @@
# registers).
mc = InstrBuilder()
#
- # mc.STG(r.r14, l.addr(14*WORD, r.SP))
+ mc._push_core_regs_to_jitframe([r.r14]) # store the link on the jit
frame
# Do the call
mc.push_std_frame()
mc.LGR(r.r2, r.SP)
@@ -527,6 +527,7 @@
# else we have an exception
mc.cmp_op(r.SCRATCH, l.imm(0), imm=True)
#
+ mc._pop_core_regs_from_jitframe([r.r14]) # restore the link on the jit
frame
# So we return to our caller, conditionally if "EQ"
# mc.LG(r.r14, l.addr(14*WORD, r.SP))
mc.BCR(c.EQ, r.r14)
@@ -551,16 +552,14 @@
endaddr, lengthaddr, _ = self.cpu.insert_stack_check()
diff = lengthaddr - endaddr
assert check_imm_value(diff)
- xxx
mc = self.mc
- mc.load_imm(r.SCRATCH, self.stack_check_slowpath)
- mc.load_imm(r.SCRATCH2, endaddr) # li r2, endaddr
- mc.mtctr(r.SCRATCH.value)
- mc.load(r.SCRATCH.value, r.SCRATCH2.value, 0) # ld r0, [end]
- mc.load(r.SCRATCH2.value, r.SCRATCH2.value, diff)# ld r2, [length]
- mc.subf(r.SCRATCH.value, r.SP.value, r.SCRATCH.value) # sub r0, SP
- mc.cmp_op(0, r.SCRATCH.value, r.SCRATCH2.value, signed=False)
+ mc.load_imm(r.SCRATCH2, endaddr) # li r0, endaddr
+ mc.branch_absolute(self.stack_check_slowpath)
+ mc.load(r.SCRATCH, r.SCRATCH2, 0) # lg r1, [end]
+ mc.load(r.SCRATCH2, r.SCRATCH2, diff)# lg r0, [length]
+ mc.SGR(r.SCRATCH, r.SP) # sub r1, SP
+ mc.cmp_op(r.SCRATCH, r.SCRATCH2, signed=False)
mc.bgtctrl()
def _check_frame_depth(self, mc, gcmap):
@@ -1057,21 +1056,71 @@
mc.MOVSD_bx((ofs + i * coeff) * WORD + base_ofs, i)
def _push_core_regs_to_jitframe(self, mc, includes=r.registers):
+ self._multiple_to_or_from_jitframe(mc, includes, store=True)
+
+ @specialize.arg(3)
+ def _multiple_to_or_from_jitframe(self, mc, includes, store):
if len(includes) == 0:
return
base_ofs = self.cpu.get_baseofs_of_frame_field()
- base = includes[0].value
+ if len(includes) == 1:
+ iv = includes[0]
+ addr = l.addr(base_ofs + iv.value * WORD, r.SPP)
+ if store:
+ mc.STG(iv, addr)
+ else:
+ mc.LG(iv, addr)
+ return
+
val = includes[0].value
- for register in includes:
- if register.value != val:
- break
- val += 1
+ # includes[i => j]
+ # for each continous sequence in the registers are stored
+ # with STMG instead of STG, in the best case this only leads
+ # to 1 instruction to store r.ri -> r.rj (if it is continuous)
+ i = 0
+ j = 1
+ for register in includes[1:]:
+ if i >= j:
+ j += 1
+ continue
+ regval = register.value
+ if regval != (val+1):
+ iv = includes[i]
+ diff = (val - iv.value)
+ addr = l.addr(base_ofs + iv.value * WORD, r.SPP)
+ if diff > 0:
+ if store:
+ mc.STMG(iv, includes[i+diff], addr)
+ else:
+ mc.LMG(iv, includes[i+diff], addr)
+ i = j
+ else:
+ if store:
+ mc.STG(iv, addr)
+ else:
+ mc.LG(iv, addr)
+ i = j
+ val = regval
+ j += 1
+ if i >= len(includes):
+ # all have been stored
+ return
+ diff = (val - includes[i].value)
+ iv = includes[i]
+ addr = l.addr(base_ofs + iv.value * WORD, r.SPP)
+ if diff > 0:
+ if store:
+ mc.STMG(iv, includes[-1], addr)
+ else:
+ mc.LMG(iv, includes[-1], addr)
else:
- mc.STMG(includes[0], includes[-1], l.addr(base_ofs + base * WORD,
r.SPP))
- return
- # unordered!
- for register in includes:
- mc.STG(register, l.addr(base_ofs + register.value * WORD, r.SPP))
+ if store:
+ mc.STG(iv, addr)
+ else:
+ mc.LG(iv, addr)
+
+ def _pop_core_regs_from_jitframe(self, mc, includes=r.MANAGED_REGS):
+ self._multiple_to_or_from_jitframe(mc, includes, store=False)
def _push_fp_regs_to_jitframe(self, mc, includes=r.fpregisters):
if len(includes) == 0:
@@ -1081,11 +1130,6 @@
for i,reg in enumerate(includes):
mc.STDY(reg, l.addr(base_ofs + (v+i) * WORD, r.SPP))
- def _pop_core_regs_from_jitframe(self, mc, includes=r.MANAGED_REGS):
- base_ofs = self.cpu.get_baseofs_of_frame_field()
- for reg in includes:
- mc.LG(reg, l.addr(base_ofs + reg.value * WORD, r.SPP))
-
def _pop_fp_regs_from_jitframe(self, mc, includes=r.MANAGED_FP_REGS):
base_ofs = self.cpu.get_baseofs_of_frame_field()
v = 16
diff --git a/rpython/jit/backend/zarch/opassembler.py
b/rpython/jit/backend/zarch/opassembler.py
--- a/rpython/jit/backend/zarch/opassembler.py
+++ b/rpython/jit/backend/zarch/opassembler.py
@@ -673,7 +673,6 @@
self._emit_guard(op, arglocs[1:])
def emit_guard_subclass(self, op, arglocs, regalloc):
- xxx
assert self.cpu.supports_guard_gc_type
loc_object = arglocs[0]
loc_check_against_class = arglocs[1]
@@ -681,10 +680,10 @@
offset2 = self.cpu.subclassrange_min_offset
if offset is not None:
# read this field to get the vtable pointer
- self.mc(r.SCRATCH2, l.addr(offset, loc_object))
+ self.mc.LG(r.SCRATCH2, l.addr(offset, loc_object))
# read the vtable's subclassrange_min field
assert check_imm(offset2)
- self.mc.ld(r.SCRATCH2.value, r.SCRATCH2.value, offset2)
+ self.mc.LG(r.SCRATCH2.value, r.SCRATCH2.value, offset2)
else:
# read the typeid
self._read_typeid(r.SCRATCH, loc_object)
diff --git a/rpython/jit/backend/zarch/pool.py
b/rpython/jit/backend/zarch/pool.py
--- a/rpython/jit/backend/zarch/pool.py
+++ b/rpython/jit/backend/zarch/pool.py
@@ -2,6 +2,7 @@
from rpython.jit.backend.zarch import locations as l
from rpython.jit.metainterp.history import (INT, REF, FLOAT,
TargetToken)
+from rpython.rlib.objectmodel import we_are_translated
from rpython.jit.metainterp.resoperation import rop
from rpython.rtyper.lltypesystem import lltype, rffi, llmemory
from rpython.jit.backend.zarch.arch import (WORD,
@@ -36,7 +37,6 @@
self.reserve_literal(8)
elif op.getopnum() == rop.LABEL:
descr = op.getdescr()
- descr._ll_loop_pool = self.pool_start
if descr not in asm.target_tokens_currently_compiling:
# this is a 'long' jump instead of a relative jump
self.offset_map[descr] = self.size
@@ -121,12 +121,10 @@
self.pool_start = asm.mc.get_relative_pos()
for op in operations:
self.ensure_can_hold_constants(asm, op)
- if self.size == 0 and written != 0:
+ if self.size == 0:
# no pool needed!
return
- assert self.size % 2 == 0
- #if self.size % 2 == 1:
- # self.size += 1
+ assert self.size % 2 == 0, "not aligned properly"
asm.mc.write('\x00' * self.size)
written = 0
if self.constant_64_ones != -1:
@@ -146,7 +144,8 @@
self.constant_max_64_positive = self.size
written += 8
self.size += written
- print "pool with %d quad words" % (self.size // 8)
+ if not we_are_translated():
+ print "pool with %d quad words" % (self.size // 8)
def overwrite_64(self, mc, index, value):
index += self.pool_start
@@ -165,7 +164,8 @@
if self.size == 0:
return
for val, offset in self.offset_map.items():
- print val, offset
+ if not we_are_translated():
+ print('pool: %s at offset: %d' % (val, offset))
if val.is_constant():
if val.type == FLOAT:
self.overwrite_64(mc, offset, float2longlong(val.value))
diff --git a/rpython/jit/backend/zarch/test/test_assembler.py
b/rpython/jit/backend/zarch/test/test_assembler.py
--- a/rpython/jit/backend/zarch/test/test_assembler.py
+++ b/rpython/jit/backend/zarch/test/test_assembler.py
@@ -370,3 +370,72 @@
self.mc.LGHI(reg.r2, loc.imm(1))
self.a.jmpto(reg.r14)
assert run_asm(self.a) == 1
+
+ def pushpop_jitframe(self, registers):
+ self.a._push_core_regs_to_jitframe(self.mc, registers)
+ self.a._pop_core_regs_from_jitframe(self.mc, registers)
+
+ def test_pushpop_jitframe_multiple_optimization(self):
+ stored = []
+ loaded = []
+ def STMG(start, end, addr):
+ stored.append((start, end))
+ def STG(reg, addr):
+ stored.append((reg,))
+ def LMG(start, end, addr):
+ loaded.append((start, end))
+ def LG(reg, addr):
+ loaded.append((reg,))
+ self.mc.STMG = STMG
+ self.mc.STG = STG
+ self.mc.LMG = LMG
+ self.mc.LG = LG
+
+ r = reg
+
+ # two sequences 10-11, 13-14
+ self.pushpop_jitframe([r.r10, r.r11, r.r13, r.r14])
+ assert stored == [(r.r10, r.r11), (r.r13, r.r14)]
+ assert stored == loaded
+ stored = []
+ loaded = []
+
+ # one sequence and on single
+ self.pushpop_jitframe([r.r0, r.r1, r.r3])
+ assert stored == [(r.r0, r.r1), (r.r3,)]
+ assert stored == loaded
+ stored = []
+ loaded = []
+
+ # single items
+ self.pushpop_jitframe(r.registers[::2])
+ assert stored == [(x,) for x in r.registers[::2]]
+ assert stored == loaded
+ stored = []
+ loaded = []
+
+ # large sequence 0-5 and one hole between
+ self.pushpop_jitframe([r.r0, r.r1, r.r2, r.r3,
+ r.r4, r.r5, r.r12, r.r13])
+ assert stored == [(r.r0, r.r5), (r.r12, r.r13)]
+ assert stored == loaded
+ stored = []
+ loaded = []
+
+ # ensure there is just on instruction for the 'best case'
+ self.pushpop_jitframe(r.registers)
+ assert stored == [(r.r0, r.r15)]
+ assert stored == loaded
+ stored = []
+ loaded = []
+
+ # just one single
+ for r in [r.r14, r.r0, r.r1, r.r15]:
+ self.pushpop_jitframe([r])
+ assert stored == [(r,)]
+ assert stored == loaded
+ stored = []
+ loaded = []
+
+
+
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit