Author: Richard Plangger <planri...@gmail.com>
Branch: ppc-vsx-support
Changeset: r85475:c2a7f4349490
Date: 2016-06-30 16:36 +0200
http://bitbucket.org/pypy/pypy/changeset/c2a7f4349490/

Log:    provide vec_expand_i implementation

diff --git a/rpython/jit/backend/ppc/codebuilder.py 
b/rpython/jit/backend/ppc/codebuilder.py
--- a/rpython/jit/backend/ppc/codebuilder.py
+++ b/rpython/jit/backend/ppc/codebuilder.py
@@ -706,6 +706,11 @@
     vsel = VA(4, XO10=42) 
     vspltisb = VXI(4, XO8=780)
 
+    VX_splat = Form("ivrT", "ivrB", "ivrA", "XO8")
+    vspltb = VX_splat(4, XO8=524)
+    vsplth = VX_splat(4, XO8=588)
+    vspltw = VX_splat(4, XO8=652)
+
 
 
 
diff --git a/rpython/jit/backend/ppc/vector_ext.py 
b/rpython/jit/backend/ppc/vector_ext.py
--- a/rpython/jit/backend/ppc/vector_ext.py
+++ b/rpython/jit/backend/ppc/vector_ext.py
@@ -481,35 +481,35 @@
         elif size == 8:
             # splat the low of src to both slots in res
             src = srcloc.value
-            #import pdb; pdb.set_trace()
             self.mc.xxspltdl(res, src, src)
         else:
             notimplemented("[ppc/assembler] vec expand in this combination not 
supported")
 
     def emit_vec_expand_i(self, op, arglocs, regalloc):
-        notimplemented("[vec expand i]")
-        srcloc, sizeloc = arglocs
-        if not isinstance(srcloc, RegLoc):
-            self.mov(srcloc, X86_64_SCRATCH_REG)
-            srcloc = X86_64_SCRATCH_REG
-        assert not srcloc.is_xmm
-        size = sizeloc.value
+        res, l0, off = arglocs
+        size = op.bytesize
+
+        self.mc.load_imm(r.SCRATCH2, off.value)
+        self.mc.lvx(res.value, r.SCRATCH2.value, r.SP.value)
         if size == 1:
-            self.mc.PINSRB_xri(resloc.value, srcloc.value, 0)
-            self.mc.PSHUFB(resloc, heap(self.expand_byte_mask_addr))
+            if IS_BIG_ENDIAN:
+                self.mc.vspltb(res.value, res.value, 0b0000)
+            else:
+                self.mc.vspltb(res.value, res.value, 0b1111)
         elif size == 2:
-            self.mc.PINSRW_xri(resloc.value, srcloc.value, 0)
-            self.mc.PINSRW_xri(resloc.value, srcloc.value, 4)
-            self.mc.PSHUFLW_xxi(resloc.value, resloc.value, 0)
-            self.mc.PSHUFHW_xxi(resloc.value, resloc.value, 0)
+            if IS_BIG_ENDIAN:
+                self.mc.vsplth(res.value, res.value, 0b000)
+            else:
+                self.mc.vsplth(res.value, res.value, 0b111)
         elif size == 4:
-            self.mc.PINSRD_xri(resloc.value, srcloc.value, 0)
-            self.mc.PSHUFD_xxi(resloc.value, resloc.value, 0)
+            if IS_BIG_ENDIAN:
+                self.mc.vspltw(res.value, res.value, 0b00)
+            else:
+                self.mc.vspltw(res.value, res.value, 0b11)
         elif size == 8:
-            self.mc.PINSRQ_xri(resloc.value, srcloc.value, 0)
-            self.mc.PINSRQ_xri(resloc.value, srcloc.value, 1)
+            pass
         else:
-            raise AssertionError("cannot handle size %d (int expand)" % 
(size,))
+            notimplemented("[expand int size not impl]")
 
     #def genop_vec_pack_i(self, op, arglocs, regalloc):
     #    resultloc, sourceloc, residxloc, srcidxloc, countloc, sizeloc = 
arglocs
@@ -811,7 +811,20 @@
             res = self.force_allocate_vector_reg(op)
         return [res, l0]
 
-    prepare_vec_expand_i = prepare_vec_expand_f
+    def prepare_vec_expand_i(self, op):
+        arg = op.getarg(0)
+        mc = self.assembler.mc
+        if arg.is_constant():
+            l0 = self.rm.get_scratch_reg()
+            mc.load_imm(l0, arg.value)
+        else:
+            l0 = self.ensure_reg(arg)
+        mc.store(l0.value, r.SP.value, PARAM_SAVE_AREA_OFFSET)
+        size = op.bytesize
+        if size == 8:
+            mc.store(l0.value, r.SP.value, PARAM_SAVE_AREA_OFFSET+8)
+        res = self.force_allocate_vector_reg(op)
+        return [res, l0, imm(PARAM_SAVE_AREA_OFFSET)]
 
     def prepare_vec_int_is_true(self, op):
         arg = op.getarg(0)
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py 
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -755,7 +755,7 @@
             node.pack = self
             node.pack_position = i
 
-    def split(self, packlist, vec_reg_size):
+    def split(self, packlist, vec_reg_size, vector_ext):
         """ Combination phase creates the biggest packs that are possible.
             In this step the pack is reduced in size to fit into an
             vector register.
@@ -764,7 +764,7 @@
         pack = self
         while pack.pack_load(vec_reg_size) > Pack.FULL:
             pack.clear()
-            oplist, newoplist = pack.slice_operations(vec_reg_size)
+            oplist, newoplist = pack.slice_operations(vec_reg_size, vector_ext)
             pack.operations = oplist
             pack.update_pack_of_nodes()
             if not pack.leftmost().is_typecast():
@@ -782,13 +782,13 @@
                 break
         pack.update_pack_of_nodes()
 
-    def opcount_filling_vector_register(self, vec_reg_size):
+    def opcount_filling_vector_register(self, vec_reg_size, vector_ext):
         left = self.leftmost()
-        oprestrict = trans.get(left)
+        oprestrict = vector_ext.get_operation_restriction(left)
         return oprestrict.opcount_filling_vector_register(left, vec_reg_size)
 
-    def slice_operations(self, vec_reg_size):
-        count = self.opcount_filling_vector_register(vec_reg_size)
+    def slice_operations(self, vec_reg_size, vector_ext):
+        count = self.opcount_filling_vector_register(vec_reg_size, vector_ext)
         assert count > 0
         newoplist = self.operations[count:]
         oplist = self.operations[:count]
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py 
b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -451,7 +451,7 @@
             if len_before == len(self.packset.packs):
                 break
 
-        self.packset.split_overloaded_packs()
+        self.packset.split_overloaded_packs(self.cpu.vector_ext)
 
         if not we_are_translated():
             # some test cases check the accumulation variables
@@ -814,12 +814,12 @@
             state.setvector_of_box(seed, 0, vecop) # prevent it from expansion
             state.renamer.start_renaming(seed, vecop)
 
-    def split_overloaded_packs(self):
+    def split_overloaded_packs(self, vector_ext):
         newpacks = []
         for i,pack in enumerate(self.packs):
             load = pack.pack_load(self.vec_reg_size)
             if load > Pack.FULL:
-                pack.split(newpacks, self.vec_reg_size)
+                pack.split(newpacks, self.vec_reg_size, vector_ext)
                 continue
             if load < Pack.FULL:
                 for op in pack.operations:
diff --git a/rpython/jit/metainterp/test/test_vector.py 
b/rpython/jit/metainterp/test/test_vector.py
--- a/rpython/jit/metainterp/test/test_vector.py
+++ b/rpython/jit/metainterp/test/test_vector.py
@@ -430,21 +430,24 @@
         res = self.meta_interp(f, [60], vec_all=True)
         assert res == f(60) == 34.5
 
-    def test_variable_expand(self):
+    @py.test.mark.parametrize('type,value', [(rffi.DOUBLE, 58.4547),
+        (lltype.Signed, 2300000), (rffi.INT, 4321),
+        (rffi.SHORT, 9922), (rffi.SIGNEDCHAR, -127)])
+    def test_variable_expand(self, type, value):
         myjitdriver = JitDriver(greens = [], reds = 'auto', vectorize=True)
-        T = lltype.Array(rffi.DOUBLE, hints={'nolength': True})
+        T = lltype.Array(type, hints={'nolength': True})
         def f(d,variable):
             va = lltype.malloc(T, d, flavor='raw', zero=True)
             i = 0
             while i < d:
                 myjitdriver.jit_merge_point()
-                va[i] = va[i] + variable
+                va[i] = rffi.cast(type, variable)
                 i += 1
             val = va[d//2]
             lltype.free(va, flavor='raw')
             return val
-        res = self.meta_interp(f, [60,58.4547])
-        assert res == f(60,58.4547) == 58.4547
+        res = self.meta_interp(f, [60,value])
+        assert res == f(60,value) == value
 
     
@py.test.mark.parametrize('vec,vec_all',[(False,True),(True,False),(True,True),(False,False)])
     def test_accum(self, vec, vec_all):
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to