Author: Richard Plangger <planri...@gmail.com> Branch: ppc-vsx-support Changeset: r85475:c2a7f4349490 Date: 2016-06-30 16:36 +0200 http://bitbucket.org/pypy/pypy/changeset/c2a7f4349490/
Log: provide vec_expand_i implementation diff --git a/rpython/jit/backend/ppc/codebuilder.py b/rpython/jit/backend/ppc/codebuilder.py --- a/rpython/jit/backend/ppc/codebuilder.py +++ b/rpython/jit/backend/ppc/codebuilder.py @@ -706,6 +706,11 @@ vsel = VA(4, XO10=42) vspltisb = VXI(4, XO8=780) + VX_splat = Form("ivrT", "ivrB", "ivrA", "XO8") + vspltb = VX_splat(4, XO8=524) + vsplth = VX_splat(4, XO8=588) + vspltw = VX_splat(4, XO8=652) + diff --git a/rpython/jit/backend/ppc/vector_ext.py b/rpython/jit/backend/ppc/vector_ext.py --- a/rpython/jit/backend/ppc/vector_ext.py +++ b/rpython/jit/backend/ppc/vector_ext.py @@ -481,35 +481,35 @@ elif size == 8: # splat the low of src to both slots in res src = srcloc.value - #import pdb; pdb.set_trace() self.mc.xxspltdl(res, src, src) else: notimplemented("[ppc/assembler] vec expand in this combination not supported") def emit_vec_expand_i(self, op, arglocs, regalloc): - notimplemented("[vec expand i]") - srcloc, sizeloc = arglocs - if not isinstance(srcloc, RegLoc): - self.mov(srcloc, X86_64_SCRATCH_REG) - srcloc = X86_64_SCRATCH_REG - assert not srcloc.is_xmm - size = sizeloc.value + res, l0, off = arglocs + size = op.bytesize + + self.mc.load_imm(r.SCRATCH2, off.value) + self.mc.lvx(res.value, r.SCRATCH2.value, r.SP.value) if size == 1: - self.mc.PINSRB_xri(resloc.value, srcloc.value, 0) - self.mc.PSHUFB(resloc, heap(self.expand_byte_mask_addr)) + if IS_BIG_ENDIAN: + self.mc.vspltb(res.value, res.value, 0b0000) + else: + self.mc.vspltb(res.value, res.value, 0b1111) elif size == 2: - self.mc.PINSRW_xri(resloc.value, srcloc.value, 0) - self.mc.PINSRW_xri(resloc.value, srcloc.value, 4) - self.mc.PSHUFLW_xxi(resloc.value, resloc.value, 0) - self.mc.PSHUFHW_xxi(resloc.value, resloc.value, 0) + if IS_BIG_ENDIAN: + self.mc.vsplth(res.value, res.value, 0b000) + else: + self.mc.vsplth(res.value, res.value, 0b111) elif size == 4: - self.mc.PINSRD_xri(resloc.value, srcloc.value, 0) - self.mc.PSHUFD_xxi(resloc.value, resloc.value, 0) + if IS_BIG_ENDIAN: + self.mc.vspltw(res.value, res.value, 0b00) + else: + self.mc.vspltw(res.value, res.value, 0b11) elif size == 8: - self.mc.PINSRQ_xri(resloc.value, srcloc.value, 0) - self.mc.PINSRQ_xri(resloc.value, srcloc.value, 1) + pass else: - raise AssertionError("cannot handle size %d (int expand)" % (size,)) + notimplemented("[expand int size not impl]") #def genop_vec_pack_i(self, op, arglocs, regalloc): # resultloc, sourceloc, residxloc, srcidxloc, countloc, sizeloc = arglocs @@ -811,7 +811,20 @@ res = self.force_allocate_vector_reg(op) return [res, l0] - prepare_vec_expand_i = prepare_vec_expand_f + def prepare_vec_expand_i(self, op): + arg = op.getarg(0) + mc = self.assembler.mc + if arg.is_constant(): + l0 = self.rm.get_scratch_reg() + mc.load_imm(l0, arg.value) + else: + l0 = self.ensure_reg(arg) + mc.store(l0.value, r.SP.value, PARAM_SAVE_AREA_OFFSET) + size = op.bytesize + if size == 8: + mc.store(l0.value, r.SP.value, PARAM_SAVE_AREA_OFFSET+8) + res = self.force_allocate_vector_reg(op) + return [res, l0, imm(PARAM_SAVE_AREA_OFFSET)] def prepare_vec_int_is_true(self, op): arg = op.getarg(0) diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -755,7 +755,7 @@ node.pack = self node.pack_position = i - def split(self, packlist, vec_reg_size): + def split(self, packlist, vec_reg_size, vector_ext): """ Combination phase creates the biggest packs that are possible. In this step the pack is reduced in size to fit into an vector register. @@ -764,7 +764,7 @@ pack = self while pack.pack_load(vec_reg_size) > Pack.FULL: pack.clear() - oplist, newoplist = pack.slice_operations(vec_reg_size) + oplist, newoplist = pack.slice_operations(vec_reg_size, vector_ext) pack.operations = oplist pack.update_pack_of_nodes() if not pack.leftmost().is_typecast(): @@ -782,13 +782,13 @@ break pack.update_pack_of_nodes() - def opcount_filling_vector_register(self, vec_reg_size): + def opcount_filling_vector_register(self, vec_reg_size, vector_ext): left = self.leftmost() - oprestrict = trans.get(left) + oprestrict = vector_ext.get_operation_restriction(left) return oprestrict.opcount_filling_vector_register(left, vec_reg_size) - def slice_operations(self, vec_reg_size): - count = self.opcount_filling_vector_register(vec_reg_size) + def slice_operations(self, vec_reg_size, vector_ext): + count = self.opcount_filling_vector_register(vec_reg_size, vector_ext) assert count > 0 newoplist = self.operations[count:] oplist = self.operations[:count] diff --git a/rpython/jit/metainterp/optimizeopt/vector.py b/rpython/jit/metainterp/optimizeopt/vector.py --- a/rpython/jit/metainterp/optimizeopt/vector.py +++ b/rpython/jit/metainterp/optimizeopt/vector.py @@ -451,7 +451,7 @@ if len_before == len(self.packset.packs): break - self.packset.split_overloaded_packs() + self.packset.split_overloaded_packs(self.cpu.vector_ext) if not we_are_translated(): # some test cases check the accumulation variables @@ -814,12 +814,12 @@ state.setvector_of_box(seed, 0, vecop) # prevent it from expansion state.renamer.start_renaming(seed, vecop) - def split_overloaded_packs(self): + def split_overloaded_packs(self, vector_ext): newpacks = [] for i,pack in enumerate(self.packs): load = pack.pack_load(self.vec_reg_size) if load > Pack.FULL: - pack.split(newpacks, self.vec_reg_size) + pack.split(newpacks, self.vec_reg_size, vector_ext) continue if load < Pack.FULL: for op in pack.operations: diff --git a/rpython/jit/metainterp/test/test_vector.py b/rpython/jit/metainterp/test/test_vector.py --- a/rpython/jit/metainterp/test/test_vector.py +++ b/rpython/jit/metainterp/test/test_vector.py @@ -430,21 +430,24 @@ res = self.meta_interp(f, [60], vec_all=True) assert res == f(60) == 34.5 - def test_variable_expand(self): + @py.test.mark.parametrize('type,value', [(rffi.DOUBLE, 58.4547), + (lltype.Signed, 2300000), (rffi.INT, 4321), + (rffi.SHORT, 9922), (rffi.SIGNEDCHAR, -127)]) + def test_variable_expand(self, type, value): myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True) - T = lltype.Array(rffi.DOUBLE, hints={'nolength': True}) + T = lltype.Array(type, hints={'nolength': True}) def f(d,variable): va = lltype.malloc(T, d, flavor='raw', zero=True) i = 0 while i < d: myjitdriver.jit_merge_point() - va[i] = va[i] + variable + va[i] = rffi.cast(type, variable) i += 1 val = va[d//2] lltype.free(va, flavor='raw') return val - res = self.meta_interp(f, [60,58.4547]) - assert res == f(60,58.4547) == 58.4547 + res = self.meta_interp(f, [60,value]) + assert res == f(60,value) == value @py.test.mark.parametrize('vec,vec_all',[(False,True),(True,False),(True,True),(False,False)]) def test_accum(self, vec, vec_all): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit