Author: Richard Plangger <planri...@gmail.com>
Branch: s390x-backend
Changeset: r81503:7288aa624ed6
Date: 2015-12-30 17:45 +0100
http://bitbucket.org/pypy/pypy/changeset/7288aa624ed6/

Log:    fixed an edge case: s390x's native instruction for memset can return
        in the middle of the copy (determined by the cpu), added a loop to
        ensure all bytes are copied

diff --git a/rpython/jit/backend/zarch/assembler.py 
b/rpython/jit/backend/zarch/assembler.py
--- a/rpython/jit/backend/zarch/assembler.py
+++ b/rpython/jit/backend/zarch/assembler.py
@@ -7,7 +7,8 @@
 from rpython.jit.backend.zarch import registers as r
 from rpython.jit.backend.zarch import locations as l
 from rpython.jit.backend.zarch.pool import LiteralPool
-from rpython.jit.backend.zarch.codebuilder import InstrBuilder
+from rpython.jit.backend.zarch.codebuilder import (InstrBuilder,
+        OverwritingBuilder)
 from rpython.jit.backend.zarch.helper.regalloc import check_imm_value
 from rpython.jit.backend.zarch.registers import JITFRAME_FIXED_SIZE
 from rpython.jit.backend.zarch.regalloc import ZARCHRegisterManager
@@ -493,7 +494,8 @@
         ofs = self.cpu.unpack_fielddescr(descrs.arraydescr.lendescr)
         #mc.LG(r.r2, l.addr(ofs, r.SPP))
         patch_pos = mc.currpos()
-        self.mc.trap()
+        # XXX TODO
+        #self.mc.trap()
         #mc.TRAP2()     # placeholder for cmpdi(0, r2, ...)
         #mc.TRAP2()     # placeholder for bge
         #mc.TRAP2()     # placeholder for li(r0, ...)
@@ -844,7 +846,6 @@
         for traps_pos, jmp_target in self.frame_depth_to_patch:
             pmc = OverwritingBuilder(self.mc, traps_pos, 3)
             # three traps, so exactly three instructions to patch here
-            xxx
             #pmc.cmpdi(0, r.r2.value, frame_depth)         # 1
             #pmc.bc(7, 0, jmp_target - (traps_pos + 4))    # 2   "bge+"
             #pmc.li(r.r0.value, frame_depth)               # 3
diff --git a/rpython/jit/backend/zarch/opassembler.py 
b/rpython/jit/backend/zarch/opassembler.py
--- a/rpython/jit/backend/zarch/opassembler.py
+++ b/rpython/jit/backend/zarch/opassembler.py
@@ -277,7 +277,7 @@
         ofs = self.cpu.get_ofs_of_frame_field('jf_force_descr')
         self.mc.load_imm(r.SCRATCH, rffi.cast(lltype.Signed,
                                            cast_instance_to_gcref(faildescr)))
-        self.mc.STD(r.SCRATCH, l.addr(ofs, r.SPP))
+        self.mc.STG(r.SCRATCH, l.addr(ofs, r.SPP))
 
     def _find_nearby_operation(self, regalloc, delta):
         return regalloc.operations[regalloc.rm.position + delta]
@@ -927,19 +927,22 @@
     def emit_zero_array(self, op, arglocs, regalloc):
         base_loc, startindex_loc, length_loc, \
             ofs_loc, itemsize_loc, pad_byte_loc = arglocs
+        print(op, arglocs)
 
         if ofs_loc.is_imm():
+            assert check_imm_value(ofs_loc.value)
             self.mc.AGHI(base_loc, ofs_loc)
         else:
             self.mc.AGR(base_loc, ofs_loc)
         if startindex_loc.is_imm():
+            assert check_imm_value(startindex_loc.value)
             self.mc.AGHI(base_loc, startindex_loc)
         else:
             self.mc.AGR(base_loc, startindex_loc)
         assert not length_loc.is_imm()
-        self.mc.SGR(pad_byte_loc, pad_byte_loc)
-        pad_byte_plus_one = r.odd_reg(pad_byte_loc)
-        self.mc.SGR(pad_byte_plus_one, pad_byte_plus_one)
+        self.mc.XGR(pad_byte_loc, pad_byte_loc)
+        pad_plus = r.odd_reg(pad_byte_loc)
+        self.mc.XGR(pad_plus, pad_plus)
         self.mc.XGR(r.SCRATCH, r.SCRATCH)
         # s390x has memset directly as a hardware instruction!!
         # it needs 5 registers allocated
@@ -947,9 +950,15 @@
         # pad_byte is rY to rY+1
         # scratch register holds the value written to dst
         assert pad_byte_loc.is_even()
+        assert pad_plus.value == pad_byte_loc.value + 1
         assert base_loc.is_even()
         assert length_loc.value == base_loc.value + 1
+        assert base_loc.value != pad_byte_loc.value
+        # NOTE this instruction can (determined by the cpu), just
+        # quit the movement any time, thus it is looped until all bytes
+        # are copied!
         self.mc.MVCLE(base_loc, pad_byte_loc, l.addr(0, r.SCRATCH))
+        self.mc.BCR(c.OF, l.imm(-self.mc.MVCLE_byte_count))
 
 
 class ForceOpAssembler(object):
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to