Author: Richard Plangger <[email protected]>
Branch: s390x-backend
Changeset: r81503:7288aa624ed6
Date: 2015-12-30 17:45 +0100
http://bitbucket.org/pypy/pypy/changeset/7288aa624ed6/
Log: fixed an edge case: s390x's native instruction for memset can return
in the middle of the copy (determined by the cpu), added a loop to
ensure all bytes are copied
diff --git a/rpython/jit/backend/zarch/assembler.py
b/rpython/jit/backend/zarch/assembler.py
--- a/rpython/jit/backend/zarch/assembler.py
+++ b/rpython/jit/backend/zarch/assembler.py
@@ -7,7 +7,8 @@
from rpython.jit.backend.zarch import registers as r
from rpython.jit.backend.zarch import locations as l
from rpython.jit.backend.zarch.pool import LiteralPool
-from rpython.jit.backend.zarch.codebuilder import InstrBuilder
+from rpython.jit.backend.zarch.codebuilder import (InstrBuilder,
+ OverwritingBuilder)
from rpython.jit.backend.zarch.helper.regalloc import check_imm_value
from rpython.jit.backend.zarch.registers import JITFRAME_FIXED_SIZE
from rpython.jit.backend.zarch.regalloc import ZARCHRegisterManager
@@ -493,7 +494,8 @@
ofs = self.cpu.unpack_fielddescr(descrs.arraydescr.lendescr)
#mc.LG(r.r2, l.addr(ofs, r.SPP))
patch_pos = mc.currpos()
- self.mc.trap()
+ # XXX TODO
+ #self.mc.trap()
#mc.TRAP2() # placeholder for cmpdi(0, r2, ...)
#mc.TRAP2() # placeholder for bge
#mc.TRAP2() # placeholder for li(r0, ...)
@@ -844,7 +846,6 @@
for traps_pos, jmp_target in self.frame_depth_to_patch:
pmc = OverwritingBuilder(self.mc, traps_pos, 3)
# three traps, so exactly three instructions to patch here
- xxx
#pmc.cmpdi(0, r.r2.value, frame_depth) # 1
#pmc.bc(7, 0, jmp_target - (traps_pos + 4)) # 2 "bge+"
#pmc.li(r.r0.value, frame_depth) # 3
diff --git a/rpython/jit/backend/zarch/opassembler.py
b/rpython/jit/backend/zarch/opassembler.py
--- a/rpython/jit/backend/zarch/opassembler.py
+++ b/rpython/jit/backend/zarch/opassembler.py
@@ -277,7 +277,7 @@
ofs = self.cpu.get_ofs_of_frame_field('jf_force_descr')
self.mc.load_imm(r.SCRATCH, rffi.cast(lltype.Signed,
cast_instance_to_gcref(faildescr)))
- self.mc.STD(r.SCRATCH, l.addr(ofs, r.SPP))
+ self.mc.STG(r.SCRATCH, l.addr(ofs, r.SPP))
def _find_nearby_operation(self, regalloc, delta):
return regalloc.operations[regalloc.rm.position + delta]
@@ -927,19 +927,22 @@
def emit_zero_array(self, op, arglocs, regalloc):
base_loc, startindex_loc, length_loc, \
ofs_loc, itemsize_loc, pad_byte_loc = arglocs
+ print(op, arglocs)
if ofs_loc.is_imm():
+ assert check_imm_value(ofs_loc.value)
self.mc.AGHI(base_loc, ofs_loc)
else:
self.mc.AGR(base_loc, ofs_loc)
if startindex_loc.is_imm():
+ assert check_imm_value(startindex_loc.value)
self.mc.AGHI(base_loc, startindex_loc)
else:
self.mc.AGR(base_loc, startindex_loc)
assert not length_loc.is_imm()
- self.mc.SGR(pad_byte_loc, pad_byte_loc)
- pad_byte_plus_one = r.odd_reg(pad_byte_loc)
- self.mc.SGR(pad_byte_plus_one, pad_byte_plus_one)
+ self.mc.XGR(pad_byte_loc, pad_byte_loc)
+ pad_plus = r.odd_reg(pad_byte_loc)
+ self.mc.XGR(pad_plus, pad_plus)
self.mc.XGR(r.SCRATCH, r.SCRATCH)
# s390x has memset directly as a hardware instruction!!
# it needs 5 registers allocated
@@ -947,9 +950,15 @@
# pad_byte is rY to rY+1
# scratch register holds the value written to dst
assert pad_byte_loc.is_even()
+ assert pad_plus.value == pad_byte_loc.value + 1
assert base_loc.is_even()
assert length_loc.value == base_loc.value + 1
+ assert base_loc.value != pad_byte_loc.value
+ # NOTE this instruction can (determined by the cpu), just
+ # quit the movement any time, thus it is looped until all bytes
+ # are copied!
self.mc.MVCLE(base_loc, pad_byte_loc, l.addr(0, r.SCRATCH))
+ self.mc.BCR(c.OF, l.imm(-self.mc.MVCLE_byte_count))
class ForceOpAssembler(object):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit