Author: Richard Plangger <planri...@gmail.com> Branch: s390x-backend Changeset: r81503:7288aa624ed6 Date: 2015-12-30 17:45 +0100 http://bitbucket.org/pypy/pypy/changeset/7288aa624ed6/
Log: fixed an edge case: s390x's native instruction for memset can return in the middle of the copy (determined by the cpu), added a loop to ensure all bytes are copied diff --git a/rpython/jit/backend/zarch/assembler.py b/rpython/jit/backend/zarch/assembler.py --- a/rpython/jit/backend/zarch/assembler.py +++ b/rpython/jit/backend/zarch/assembler.py @@ -7,7 +7,8 @@ from rpython.jit.backend.zarch import registers as r from rpython.jit.backend.zarch import locations as l from rpython.jit.backend.zarch.pool import LiteralPool -from rpython.jit.backend.zarch.codebuilder import InstrBuilder +from rpython.jit.backend.zarch.codebuilder import (InstrBuilder, + OverwritingBuilder) from rpython.jit.backend.zarch.helper.regalloc import check_imm_value from rpython.jit.backend.zarch.registers import JITFRAME_FIXED_SIZE from rpython.jit.backend.zarch.regalloc import ZARCHRegisterManager @@ -493,7 +494,8 @@ ofs = self.cpu.unpack_fielddescr(descrs.arraydescr.lendescr) #mc.LG(r.r2, l.addr(ofs, r.SPP)) patch_pos = mc.currpos() - self.mc.trap() + # XXX TODO + #self.mc.trap() #mc.TRAP2() # placeholder for cmpdi(0, r2, ...) #mc.TRAP2() # placeholder for bge #mc.TRAP2() # placeholder for li(r0, ...) @@ -844,7 +846,6 @@ for traps_pos, jmp_target in self.frame_depth_to_patch: pmc = OverwritingBuilder(self.mc, traps_pos, 3) # three traps, so exactly three instructions to patch here - xxx #pmc.cmpdi(0, r.r2.value, frame_depth) # 1 #pmc.bc(7, 0, jmp_target - (traps_pos + 4)) # 2 "bge+" #pmc.li(r.r0.value, frame_depth) # 3 diff --git a/rpython/jit/backend/zarch/opassembler.py b/rpython/jit/backend/zarch/opassembler.py --- a/rpython/jit/backend/zarch/opassembler.py +++ b/rpython/jit/backend/zarch/opassembler.py @@ -277,7 +277,7 @@ ofs = self.cpu.get_ofs_of_frame_field('jf_force_descr') self.mc.load_imm(r.SCRATCH, rffi.cast(lltype.Signed, cast_instance_to_gcref(faildescr))) - self.mc.STD(r.SCRATCH, l.addr(ofs, r.SPP)) + self.mc.STG(r.SCRATCH, l.addr(ofs, r.SPP)) def _find_nearby_operation(self, regalloc, delta): return regalloc.operations[regalloc.rm.position + delta] @@ -927,19 +927,22 @@ def emit_zero_array(self, op, arglocs, regalloc): base_loc, startindex_loc, length_loc, \ ofs_loc, itemsize_loc, pad_byte_loc = arglocs + print(op, arglocs) if ofs_loc.is_imm(): + assert check_imm_value(ofs_loc.value) self.mc.AGHI(base_loc, ofs_loc) else: self.mc.AGR(base_loc, ofs_loc) if startindex_loc.is_imm(): + assert check_imm_value(startindex_loc.value) self.mc.AGHI(base_loc, startindex_loc) else: self.mc.AGR(base_loc, startindex_loc) assert not length_loc.is_imm() - self.mc.SGR(pad_byte_loc, pad_byte_loc) - pad_byte_plus_one = r.odd_reg(pad_byte_loc) - self.mc.SGR(pad_byte_plus_one, pad_byte_plus_one) + self.mc.XGR(pad_byte_loc, pad_byte_loc) + pad_plus = r.odd_reg(pad_byte_loc) + self.mc.XGR(pad_plus, pad_plus) self.mc.XGR(r.SCRATCH, r.SCRATCH) # s390x has memset directly as a hardware instruction!! # it needs 5 registers allocated @@ -947,9 +950,15 @@ # pad_byte is rY to rY+1 # scratch register holds the value written to dst assert pad_byte_loc.is_even() + assert pad_plus.value == pad_byte_loc.value + 1 assert base_loc.is_even() assert length_loc.value == base_loc.value + 1 + assert base_loc.value != pad_byte_loc.value + # NOTE this instruction can (determined by the cpu), just + # quit the movement any time, thus it is looped until all bytes + # are copied! self.mc.MVCLE(base_loc, pad_byte_loc, l.addr(0, r.SCRATCH)) + self.mc.BCR(c.OF, l.imm(-self.mc.MVCLE_byte_count)) class ForceOpAssembler(object): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit