Author: Richard Plangger <planri...@gmail.com> Branch: zarch-simd-support Changeset: r87131:2cd9f79ff7de Date: 2016-09-16 11:59 +0200 http://bitbucket.org/pypy/pypy/changeset/2cd9f79ff7de/
Log: some more pack/unpack cases implemented diff --git a/rpython/jit/backend/zarch/instruction_builder.py b/rpython/jit/backend/zarch/instruction_builder.py --- a/rpython/jit/backend/zarch/instruction_builder.py +++ b/rpython/jit/backend/zarch/instruction_builder.py @@ -546,6 +546,30 @@ self.writechar(opcode2) return encode_vri_a +def build_vrs_b(mnemonic, (opcode1,opcode2), argtypes='v,r,db,m'): + @builder.arguments(argtypes) + def encode_vrs_b(self, v1, r2, db3, m4): + self.writechar(opcode1) + rbx = (v1 >= 16) << 3 + byte = (v1 & BIT_MASK_4) << 4 | (r2 & BIT_MASK_4) + self.writechar(chr(byte)) + encode_base_displace(self, db3) + self.writechar(chr((m4 & BIT_MASK_4) << 4 | (rbx & BIT_MASK_4))) + self.writechar(opcode2) + return encode_vrs_b + +def build_vrs_c(mnemonic, (opcode1,opcode2), argtypes='r,v,db,m'): + @builder.arguments(argtypes) + def encode_vrs_c(self, r1, v2, db3, m4): + self.writechar(opcode1) + rbx = (v2 >= 16) << 2 + byte = (r1 & BIT_MASK_4) << 4 | (v2 & BIT_MASK_4) + self.writechar(chr(byte)) + encode_base_displace(self, db3) + self.writechar(chr((m4 & BIT_MASK_4) << 4 | (rbx & BIT_MASK_4))) + self.writechar(opcode2) + return encode_vrs_c + def build_unpack_func(mnemonic, func): @always_inline diff --git a/rpython/jit/backend/zarch/instructions.py b/rpython/jit/backend/zarch/instructions.py --- a/rpython/jit/backend/zarch/instructions.py +++ b/rpython/jit/backend/zarch/instructions.py @@ -334,6 +334,8 @@ 'VMRL': ('vrr_c', ['\xE7','\x60'], 'v,v,v,m'), 'VMRH': ('vrr_c', ['\xE7','\x61'], 'v,v,v,m'), 'VPDI': ('vrr_c', ['\xE7','\x84'], 'v,v,v,m'), + 'VLVG': ('vrs_b', ['\xE7','\x22']), + 'VLGV': ('vrs_c', ['\xE7','\x21']), # '': ('', ['','']), } diff --git a/rpython/jit/backend/zarch/vector_ext.py b/rpython/jit/backend/zarch/vector_ext.py --- a/rpython/jit/backend/zarch/vector_ext.py +++ b/rpython/jit/backend/zarch/vector_ext.py @@ -263,24 +263,6 @@ # 4 => bit 1 from the MSB: XxC self.mc.VCGD(resloc, loc0, 3, 4, mask.RND_TOZERO.value) - def emit_vec_expand_f(self, op, arglocs, regalloc): - assert isinstance(op, VectorOp) - resloc, srcloc = arglocs - size = op.bytesize - res = resloc.value - if isinstance(srcloc, l.ConstFloatLoc): - # they are aligned! - assert size == 8 - tloc = regalloc.rm.get_scratch_reg() - self.mc.load_imm(tloc, srcloc.value) - self.mc.lxvd2x(res, 0, tloc.value) - elif size == 8: - # splat the low of src to both slots in res - src = srcloc.value - self.mc.xxspltdl(res, src, src) - else: - not_implemented("vec expand in this combination not supported") - def emit_vec_expand_i(self, op, arglocs, regalloc): assert isinstance(op, VectorOp) resloc, loc0 = arglocs @@ -292,13 +274,12 @@ def _accum_reduce(self, op, arg, accumloc, targetloc): # Currently the accumulator can ONLY be 64 bit float/int if arg.type == FLOAT: - # r = (r[0]+r[1],r[0]+r[1]) - self.mc.VMRL(targetloc, accumloc, accumloc, l.MASK_VEC_DWORD) + self.mc.VPDI(targetloc, accumloc, accumloc, permi(1,0)) if op == '+': - self.mc.VFA(targetloc, targetloc, accumloc, 3, 0, 0) + self.mc.VFA(targetloc, targetloc, accumloc, 3, 0b1000, 0) return elif op == '*': - self.mc.VFM(targetloc, targetloc, accumloc, 3, 0, 0) + self.mc.VFM(targetloc, targetloc, accumloc, 3, 0b1000, 0) return else: assert arg.type == INT @@ -317,41 +298,14 @@ def emit_vec_pack_i(self, op, arglocs, regalloc): assert isinstance(op, VectorOp) - resultloc, vloc, sourceloc, residxloc, srcidxloc, countloc = arglocs - srcidx = srcidxloc.value + resloc, vloc, sourceloc, residxloc, srcidxloc, countloc = arglocs residx = residxloc.value count = countloc.value - res = resultloc.value - vector = vloc.value - src = sourceloc.value size = op.bytesize assert resultloc.is_vector_reg() # vector <- reg - self.mc.load_imm(r.SCRATCH2, PARAM_SAVE_AREA_OFFSET) - self.mc.stvx(vector, r.SCRATCH2.value, r.SP.value) - idx = residx - if size == 8: - if not IS_BIG_ENDIAN: - idx = (16 // size) - 1 - idx - self.mc.store(src, r.SP.value, PARAM_SAVE_AREA_OFFSET+8*idx) - elif size == 4: - for j in range(count): - idx = j + residx - if not IS_BIG_ENDIAN: - idx = (16 // size) - 1 - idx - self.mc.stw(src, r.SP.value, PARAM_SAVE_AREA_OFFSET+4*idx) - elif size == 2: - for j in range(count): - idx = j + residx - if not IS_BIG_ENDIAN: - idx = (16 // size) - 1 - idx - self.mc.sth(src, r.SP.value, PARAM_SAVE_AREA_OFFSET+2*idx) - elif size == 1: - for j in range(count): - idx = j + residx - if not IS_BIG_ENDIAN: - idx = (16 // size) - 1 - idx - self.mc.stb(src, r.SP.value, PARAM_SAVE_AREA_OFFSET+idx) - self.mc.lvx(res, r.SCRATCH2.value, r.SP.value) + for j in range(count): + index = l.addr(j + residx) + self.mc.VLVG(resloc, sourceloc, index, l.itemsize_to_mask(size)) def emit_vec_unpack_i(self, op, arglocs, regalloc): assert isinstance(op, VectorOp) @@ -364,44 +318,26 @@ if count == 1: assert srcloc.is_vector_reg() assert not resloc.is_vector_reg() - off = PARAM_SAVE_AREA_OFFSET - self.mc.load_imm(r.SCRATCH2, off) - self.mc.stvx(src, r.SCRATCH2.value, r.SP.value) - if not IS_BIG_ENDIAN: - idx = (16 // size) - 1 - idx - off += size * idx - if size == 8: - self.mc.load(res, r.SP.value, off) - return - elif size == 4: - self.mc.lwa(res, r.SP.value, off) - return - elif size == 2: - self.mc.lha(res, r.SP.value, off) - return - elif size == 1: - self.mc.lbz(res, r.SP.value, off) - self.mc.extsb(res, res) - return + self.mc.VLGV(resloc, srcloc, index, l.itemsize_to_mask(size)) else: # count is not 1, but only 2 is supported for i32 # 4 for i16 and 8 for i8. src = srcloc.value res = resloc.value - self.mc.load_imm(r.SCRATCH2, PARAM_SAVE_AREA_OFFSET) - self.mc.stvx(src, r.SCRATCH2.value, r.SP.value) - self.mc.load_imm(r.SCRATCH2, PARAM_SAVE_AREA_OFFSET+16) - self.mc.stvx(res, r.SCRATCH2.value, r.SP.value) + #self.mc.load_imm(r.SCRATCH2, PARAM_SAVE_AREA_OFFSET) + #self.mc.stvx(src, r.SCRATCH2.value, r.SP.value) + #self.mc.load_imm(r.SCRATCH2, PARAM_SAVE_AREA_OFFSET+16) + #self.mc.stvx(res, r.SCRATCH2.value, r.SP.value) if count * size == 8: if not IS_BIG_ENDIAN: endian_off = 8 - off = PARAM_SAVE_AREA_OFFSET - off = off + endian_off - (idx * size) - assert idx * size + 8 <= 16 - self.mc.load(r.SCRATCH.value, r.SP.value, off) - self.mc.store(r.SCRATCH.value, r.SP.value, PARAM_SAVE_AREA_OFFSET+16+endian_off) - self.mc.lvx(res, r.SCRATCH2.value, r.SP.value) + #off = PARAM_SAVE_AREA_OFFSET + #off = off + endian_off - (idx * size) + #assert idx * size + 8 <= 16 + #self.mc.load(r.SCRATCH.value, r.SP.value, off) + #self.mc.store(r.SCRATCH.value, r.SP.value, PARAM_SAVE_AREA_OFFSET+16+endian_off) + #self.mc.lvx(res, r.SCRATCH2.value, r.SP.value) return not_implemented("%d bit integer, count %d" % \ diff --git a/rpython/jit/metainterp/test/test_vector.py b/rpython/jit/metainterp/test/test_vector.py --- a/rpython/jit/metainterp/test/test_vector.py +++ b/rpython/jit/metainterp/test/test_vector.py @@ -402,10 +402,10 @@ bits = 64 la = data.draw(st.lists(strat, min_size=10, max_size=150)) - #la = [1.0] * 10 + la = [1.0] * 10 l = len(la) - accum = 0 #data.draw(strat) + accum = data.draw(strat) rawstorage = RawStorage() va = rawstorage.new(la, type) res = self.meta_interp(f, [accum, l*size, va]) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit