Author: Richard Plangger <planri...@gmail.com> Branch: ppc-vsx-support Changeset: r85191:daa1de3f481f Date: 2016-06-16 18:01 +0200 http://bitbucket.org/pypy/pypy/changeset/daa1de3f481f/
Log: added vector add for powerpc, test modified to use hypothesis diff --git a/rpython/jit/backend/ppc/codebuilder.py b/rpython/jit/backend/ppc/codebuilder.py --- a/rpython/jit/backend/ppc/codebuilder.py +++ b/rpython/jit/backend/ppc/codebuilder.py @@ -62,6 +62,7 @@ XFX = Form("CRM", "rS", "XO1") XLL = Form("LL", "XO1") XX1 = Form("vrT", "rA", "rB", "XO1") +VX = Form("lvrT", "lvrA", "lvrB", "XO8") MI = Form("rA", "rS", "SH", "MB", "ME", "Rc") MB = Form("rA", "rS", "rB", "MB", "ME", "Rc") @@ -584,6 +585,9 @@ stxvd2x = XX1(31, XO1=972) stxvw4x = XX1(31, XO1=908) + # integer + vaddudm = VX(4, XO8=192) + class PPCAssembler(BasicPPCAssembler, PPCVSXAssembler): BA = BasicPPCAssembler diff --git a/rpython/jit/backend/ppc/ppc_field.py b/rpython/jit/backend/ppc/ppc_field.py --- a/rpython/jit/backend/ppc/ppc_field.py +++ b/rpython/jit/backend/ppc/ppc_field.py @@ -44,6 +44,13 @@ "TO": ( 6, 10), "UIMM": (16, 31), "vrT": (6, 31, 'unsigned', regname._V, 'overlap'), + # low vector register T (low in a sense: + # can only address 32 vector registers) + "lvrT": (6, 10, 'unsigned', regname._V), + # low vector register A + "lvrA": (11, 15, 'unsigned', regname._V), + # low vector register B + "lvrB": (16, 20, 'unsigned', regname._V), "XO1": (21, 30), "XO2": (22, 30), "XO3": (26, 30), @@ -51,6 +58,7 @@ "XO5": (27, 29), "XO6": (21, 29), "XO7": (27, 30), + "XO8": (21, 31), "LL": ( 9, 10), } @@ -102,16 +110,16 @@ value = super(sh, self).decode(inst) return (value & 32) << 5 | (value >> 10 & 31) -class tx(Field): - def encode(self, value): - value = (value & 31) << 20 | (value & 32) >> 5 - return super(tx, self).encode(value) - def decode(self, inst): - value = super(tx, self).decode(inst) - return (value & 32) << 5 | (value >> 20 & 31) - def r(self): - import pdb; pdb.set_trace() - return super(tx, self).r() +# ??? class tx(Field): +# ??? def encode(self, value): +# ??? value = (value & 31) << 20 | (value & 32) >> 5 +# ??? return super(tx, self).encode(value) +# ??? def decode(self, inst): +# ??? value = super(tx, self).decode(inst) +# ??? return (value & 32) << 5 | (value >> 20 & 31) +# ??? def r(self): +# ??? import pdb; pdb.set_trace() +# ??? return super(tx, self).r() # other special fields? ppc_fields = { @@ -121,7 +129,7 @@ "mbe": mbe("mbe", *fields["mbe"]), "sh": sh("sh", *fields["sh"]), "spr": spr("spr", *fields["spr"]), - "vrT": tx("vrT", *fields["vrT"]), + # ??? "vrT": tx("vrT", *fields["vrT"]), } for f in fields: diff --git a/rpython/jit/backend/ppc/rassemblermaker.py b/rpython/jit/backend/ppc/rassemblermaker.py --- a/rpython/jit/backend/ppc/rassemblermaker.py +++ b/rpython/jit/backend/ppc/rassemblermaker.py @@ -47,7 +47,7 @@ body.append('sh1 = (%s & 31) << 10 | (%s & 32) >> 5' % (value, value)) value = 'sh1' elif field.name == 'vrT': - body.append('vrT1 = (%s & 31) << 20 | (%s & 32) >> 5' % (value, value)) + body.append('vrT1 = (%s & 31) << 21 | (%s & 32) >> 5' % (value, value)) value = 'vrT1' if isinstance(field, IField): body.append('v |= ((%3s >> 2) & r_uint(%#05x)) << 2' % (value, field.mask)) diff --git a/rpython/jit/backend/ppc/regalloc.py b/rpython/jit/backend/ppc/regalloc.py --- a/rpython/jit/backend/ppc/regalloc.py +++ b/rpython/jit/backend/ppc/regalloc.py @@ -256,6 +256,8 @@ if var is not None: if var.type == FLOAT: self.fprm.possibly_free_var(var) + elif var.is_vector() and var.type != VOID: + self.vrm.possibly_free_var(var) else: self.rm.possibly_free_var(var) @@ -309,10 +311,10 @@ # for j in range(op.numargs()): box = op.getarg(j) - if box.type != FLOAT: + if box.is_vector(): + self.vrm.temp_boxes.append(box) + elif box.type != FLOAT: self.rm.temp_boxes.append(box) - elif box.is_vector(): - self.vrm.temp_boxes.append(box) else: self.fprm.temp_boxes.append(box) # @@ -436,6 +438,7 @@ # temporary boxes and all the current operation's arguments self.rm.free_temp_vars() self.fprm.free_temp_vars() + self.vrm.free_temp_vars() # ****************************************************** # * P R E P A R E O P E R A T I O N S * diff --git a/rpython/jit/backend/ppc/vector_ext.py b/rpython/jit/backend/ppc/vector_ext.py --- a/rpython/jit/backend/ppc/vector_ext.py +++ b/rpython/jit/backend/ppc/vector_ext.py @@ -49,9 +49,9 @@ def _vec_load(self, resloc, baseloc, indexloc, integer, itemsize, aligned): if integer: if itemsize == 4: + self.mc.lxvw4x(resloc.value, indexloc.value, baseloc.value) + elif itemsize == 8: self.mc.lxvd2x(resloc.value, indexloc.value, baseloc.value) - elif itemsize == 8: - self.mc.lxvw4x(resloc.value, indexloc.value, baseloc.value) else: raise NotImplementedError else: @@ -62,6 +62,48 @@ else: raise NotImplementedError + def _emit_vec_setitem(self, op, arglocs, regalloc): + # prepares item scale (raw_store does not) + base_loc, ofs_loc, value_loc, size_loc, baseofs, integer_loc, aligned_loc = arglocs + scale = get_scale(size_loc.value) + dest_loc = addr_add(base_loc, ofs_loc, baseofs.value, scale) + self._vec_store(dest_loc, value_loc, integer_loc.value, + size_loc.value, aligned_loc.value) + + genop_discard_vec_setarrayitem_raw = _emit_vec_setitem + genop_discard_vec_setarrayitem_gc = _emit_vec_setitem + + def emit_vec_raw_store(self, op, arglocs, regalloc): + baseloc, ofsloc, valueloc, size_loc, baseofs, \ + integer_loc, aligned_loc = arglocs + #dest_loc = addr_add(base_loc, ofs_loc, baseofs.value, 0) + assert baseofs.value == 0 + self._vec_store(baseloc, ofsloc, valueloc, integer_loc.value, + size_loc.value, aligned_loc.value) + + def _vec_store(self, baseloc, indexloc, valueloc, integer, itemsize, aligned): + if integer: + if itemsize == 4: + self.mc.stxvw4x(valueloc.value, indexloc.value, baseloc.value) + elif itemsize == 8: + self.mc.stxvd2x(valueloc.value, indexloc.value, baseloc.value) + else: + raise NotImplementedError + else: + raise NotImplementedError + + + def emit_vec_int_add(self, op, arglocs, regalloc): + resloc, loc0, loc1, size_loc = arglocs + size = size_loc.value + if size == 1: + raise NotImplementedError + elif size == 2: + raise NotImplementedError + elif size == 4: + raise NotImplementedError + elif size == 8: + self.mc.vaddudm(resloc.value, loc0.value, loc1.value) #def genop_guard_vec_guard_true(self, guard_op, guard_token, locs, resloc): # self.implement_guard(guard_token) @@ -167,35 +209,6 @@ # not_implemented("reduce sum for %s not impl." % arg) - #def _genop_discard_vec_setarrayitem(self, op, arglocs): - # # prepares item scale (raw_store does not) - # base_loc, ofs_loc, value_loc, size_loc, baseofs, integer_loc, aligned_loc = arglocs - # scale = get_scale(size_loc.value) - # dest_loc = addr_add(base_loc, ofs_loc, baseofs.value, scale) - # self._vec_store(dest_loc, value_loc, integer_loc.value, - # size_loc.value, aligned_loc.value) - - #genop_discard_vec_setarrayitem_raw = _genop_discard_vec_setarrayitem - #genop_discard_vec_setarrayitem_gc = _genop_discard_vec_setarrayitem - - #def genop_discard_vec_raw_store(self, op, arglocs): - # base_loc, ofs_loc, value_loc, size_loc, baseofs, integer_loc, aligned_loc = arglocs - # dest_loc = addr_add(base_loc, ofs_loc, baseofs.value, 0) - # self._vec_store(dest_loc, value_loc, integer_loc.value, - # size_loc.value, aligned_loc.value) - - #def _vec_store(self, dest_loc, value_loc, integer, itemsize, aligned): - # if integer: - # if aligned: - # self.mc.MOVDQA(dest_loc, value_loc) - # else: - # self.mc.MOVDQU(dest_loc, value_loc) - # else: - # if itemsize == 4: - # self.mc.MOVUPS(dest_loc, value_loc) - # elif itemsize == 8: - # self.mc.MOVUPD(dest_loc, value_loc) - #def genop_vec_int_is_true(self, op, arglocs, resloc): # loc, sizeloc = arglocs # temp = X86_64_XMM_SCRATCH_REG @@ -219,18 +232,6 @@ # # There is no 64x64 bit packed mul. For 8 bit either. It is questionable if it gives any benefit? # not_implemented("int8/64 mul") - #def genop_vec_int_add(self, op, arglocs, resloc): - # loc0, loc1, size_loc = arglocs - # size = size_loc.value - # if size == 1: - # self.mc.PADDB(loc0, loc1) - # elif size == 2: - # self.mc.PADDW(loc0, loc1) - # elif size == 4: - # self.mc.PADDD(loc0, loc1) - # elif size == 8: - # self.mc.PADDQ(loc0, loc1) - #def genop_vec_int_sub(self, op, arglocs, resloc): # loc0, loc1, size_loc = arglocs # size = size_loc.value @@ -525,6 +526,11 @@ forbidden_vars = self.vrm.temp_boxes return self.vrm.force_allocate_reg(op, forbidden_vars) + def ensure_vector_reg(self, box): + loc = self.vrm.make_sure_var_in_reg(box, + forbidden_vars=self.vrm.temp_boxes) + return loc + def _prepare_load(self, op): descr = op.getdescr() assert isinstance(descr, ArrayDescr) @@ -549,43 +555,49 @@ prepare_vec_raw_load_i = _prepare_load prepare_vec_raw_load_f = _prepare_load - #def _prepare_vec_setarrayitem(self, op): - # descr = op.getdescr() - # assert isinstance(descr, ArrayDescr) - # assert not descr.is_array_of_pointers() and \ - # not descr.is_array_of_structs() - # itemsize, ofs, _ = unpack_arraydescr(descr) - # args = op.getarglist() - # base_loc = self.rm.make_sure_var_in_reg(op.getarg(0), args) - # value_loc = self.make_sure_var_in_reg(op.getarg(2), args) - # ofs_loc = self.rm.make_sure_var_in_reg(op.getarg(1), args) + def prepare_vec_arith(self, op): + a0 = op.getarg(0) + a1 = op.getarg(1) + assert isinstance(op, VectorOp) + size = op.bytesize + args = op.getarglist() + loc0 = self.ensure_vector_reg(a0) + loc1 = self.ensure_vector_reg(a1) + resloc = self.force_allocate_vector_reg(op) + return [resloc, loc0, loc1, imm(size)] - # integer = not (descr.is_array_of_floats() or descr.getconcrete_type() == FLOAT) - # aligned = False - # self.perform_discard(op, [base_loc, ofs_loc, value_loc, - # imm(itemsize), imm(ofs), imm(integer), imm(aligned)]) - - #prepare_vec_setarrayitem_raw = _prepare_vec_setarrayitem - #prepare_vec_setarrayitem_gc = _prepare_vec_setarrayitem - #prepare_vec_raw_store = _prepare_vec_setarrayitem - - #def prepare_vec_arith(self, op): - # lhs = op.getarg(0) - # assert isinstance(op, VectorOp) - # size = op.bytesize - # args = op.getarglist() - # loc1 = self.make_sure_var_in_reg(op.getarg(1), args) - # loc0 = self.xrm.force_result_in_reg(op, op.getarg(0), args) - # self.perform(op, [loc0, loc1, imm(size)], loc0) - - #prepare_vec_int_add = prepare_vec_arith + prepare_vec_int_add = prepare_vec_arith #prepare_vec_int_sub = prepare_vec_arith #prepare_vec_int_mul = prepare_vec_arith #prepare_vec_float_add = prepare_vec_arith #prepare_vec_float_sub = prepare_vec_arith #prepare_vec_float_mul = prepare_vec_arith #prepare_vec_float_truediv = prepare_vec_arith - #del prepare_vec_arith + del prepare_vec_arith + + def _prepare_vec_store(self, op): + descr = op.getdescr() + assert isinstance(descr, ArrayDescr) + assert not descr.is_array_of_pointers() and \ + not descr.is_array_of_structs() + itemsize, ofs, _ = unpack_arraydescr(descr) + a0 = op.getarg(0) + a1 = op.getarg(1) + a2 = op.getarg(2) + baseloc = self.ensure_reg(a0) + ofsloc = self.ensure_reg(a1) + valueloc = self.ensure_vector_reg(a2) + + integer = not (descr.is_array_of_floats() or descr.getconcrete_type() == FLOAT) + aligned = False + return [baseloc, ofsloc, valueloc, + imm(itemsize), imm(ofs), imm(integer), imm(aligned)] + + prepare_vec_setarrayitem_raw = _prepare_vec_store + prepare_vec_setarrayitem_gc = _prepare_vec_store + prepare_vec_raw_store = _prepare_vec_store + del _prepare_vec_store + #def prepare_vec_arith_unary(self, op): # lhs = op.getarg(0) diff --git a/rpython/jit/metainterp/test/test_vector.py b/rpython/jit/metainterp/test/test_vector.py --- a/rpython/jit/metainterp/test/test_vector.py +++ b/rpython/jit/metainterp/test/test_vector.py @@ -1,5 +1,6 @@ import py +from hypothesis import given, note, strategies as st from rpython.jit.metainterp.warmspot import ll_meta_interp, get_stats from rpython.jit.metainterp.test.support import LLJitMixin from rpython.jit.codewriter.policy import StopAtXPolicy @@ -23,6 +24,8 @@ def free(mem): lltype.free(mem, flavor='raw') +integers_64bit = st.integers(min_value=-2**63, max_value=2**63-1) + class VectorizeTests: enable_opts = 'intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll' @@ -37,21 +40,26 @@ type_system=self.type_system, vec=vec, vec_all=vec_all) - @py.test.mark.parametrize('i',[3,4,5,6,7,8,9,50]) - def test_vectorize_simple_load_arith_store_int_add_index(self,i): + @given(st.lists(integers_64bit, min_size=5, max_size=50), + st.lists(integers_64bit, min_size=5, max_size=50)) + def test_vector_simple(self, la, lb): myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True) + i = min(len(la), len(lb)) + la = la[:i] + lb = lb[:i] + bc = i*rffi.sizeof(rffi.SIGNED) + vc = alloc_raw_storage(bc, zero=True) + size = rffi.sizeof(rffi.SIGNED) def f(d): - bc = d*rffi.sizeof(rffi.SIGNED) va = alloc_raw_storage(bc, zero=True) vb = alloc_raw_storage(bc, zero=True) - vc = alloc_raw_storage(bc, zero=True) x = 1 for i in range(d): - j = i*rffi.sizeof(rffi.SIGNED) - raw_storage_setitem(va, j, rffi.cast(rffi.SIGNED,i)) - raw_storage_setitem(vb, j, rffi.cast(rffi.SIGNED,i)) + j = i*size + raw_storage_setitem(va, j, rffi.cast(rffi.SIGNED,la[i])) + raw_storage_setitem(vb, j, rffi.cast(rffi.SIGNED,lb[i])) i = 0 while i < bc: myjitdriver.jit_merge_point() @@ -59,17 +67,15 @@ b = raw_storage_getitem(rffi.SIGNED,vb,i) c = a+b raw_storage_setitem(vc, i, rffi.cast(rffi.SIGNED,c)) - i += 1*rffi.sizeof(rffi.SIGNED) - res = 0 - for i in range(d): - res += raw_storage_getitem(rffi.SIGNED,vc,i*rffi.sizeof(rffi.SIGNED)) + i += 1*size free_raw_storage(va) free_raw_storage(vb) - free_raw_storage(vc) - return res - res = self.meta_interp(f, [i]) - assert res == f(i) + self.meta_interp(f, [i]) + for p in range(i): + c = raw_storage_getitem(rffi.SIGNED,vc,p*size) + assert intmask(la[p] + lb[p]) == c + free_raw_storage(vc) @py.test.mark.parametrize('i',[1,2,3,8,17,128,130,131,142,143]) def test_vectorize_array_get_set(self,i): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit