[pypy-commit] pypy vecopt: removed the const arg for each vecop (but not load)

plan_rich Mon, 25 May 2015 08:02:45 -0700

Author: Richard Plangger <[email protected]>
Branch: vecopt
Changeset: r77543:bccd719ea178
Date: 2015-05-25 17:01 +0200
http://bitbucket.org/pypy/pypy/changeset/bccd719ea178/


Log:    removed the const arg for each vecop (but not load) rewrote the
        unpacking/packing and gave it a new structure (has moved down to the
        OpToVectorOp class) now displaying more info about the vector box
        v<number>[<type><bits>#<count>] adjusted tests

diff --git a/rpython/jit/backend/llgraph/runner.py 
b/rpython/jit/backend/llgraph/runner.py
--- a/rpython/jit/backend/llgraph/runner.py
+++ b/rpython/jit/backend/llgraph/runner.py
@@ -673,9 +673,8 @@
 
     # vector operations
     vector_arith_code = """
-    def bh_vec_{0}_{1}(self, vx, vy, count):
-        assert len(vx) == count
-        assert len(vy) == count
+    def bh_vec_{0}_{1}(self, vx, vy):
+        assert len(vx) == len(vy)
         return [_vx {2} _vy for _vx,_vy in zip(vx,vy)]
     """
     exec py.code.Source(vector_arith_code.format('int','add','+')).compile()
@@ -686,9 +685,8 @@
     exec py.code.Source(vector_arith_code.format('float','mul','*')).compile()
     exec py.code.Source(vector_arith_code.format('float','eq','==')).compile()
 
-    def bh_vec_float_eq(self, vx, vy, count):
-        assert len(vx) == count
-        assert len(vy) == count
+    def bh_vec_float_eq(self, vx, vy):
+        assert len(vx) == len(vy)
         return [_vx == _vy for _vx,_vy in zip(vx,vy)]
 
     def bh_vec_cast_float_to_singlefloat(self, vx):
@@ -706,7 +704,7 @@
     def bh_vec_expand(self, x, count):
         return [x] * count
 
-    def bh_vec_int_signext(self, vx, ext, count):
+    def bh_vec_int_signext(self, vx, ext):
         return [heaptracker.int_signext(_vx, ext) for _vx in vx]
 
     def bh_vec_getarrayitem_raw(self, struct, offset, count, descr):
@@ -715,6 +713,7 @@
             val = self.bh_getarrayitem_raw(struct, offset + i, descr)
             values.append(val)
         return values
+
     def bh_vec_raw_load(self, struct, offset, count, descr):
         values = []
         stride = descr.get_item_size_in_bytes()
@@ -723,13 +722,14 @@
             values.append(val)
         return values
 
-    def bh_vec_raw_store(self, struct, offset, newvalues, count, descr):
+    def bh_vec_raw_store(self, struct, offset, newvalues, descr):
         stride = descr.get_item_size_in_bytes()
-        for i in range(count):
-            self.bh_raw_store(struct, offset + i*stride, newvalues[i], descr)
-    def bh_vec_setarrayitem_raw(self, struct, offset, newvalues, count, descr):
-        for i in range(count):
-            self.bh_setarrayitem_raw(struct, offset + i, newvalues[i], descr)
+        for i,n in enumerate(newvalues):
+            self.bh_raw_store(struct, offset + i*stride, n, descr)
+
+    def bh_vec_setarrayitem_raw(self, struct, offset, newvalues, descr):
+        for i,n in enumerate(newvalues):
+            self.bh_setarrayitem_raw(struct, offset + i, n, descr)
 
 
     def store_fail_descr(self, deadframe, descr):
diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2705,7 +2705,6 @@
         self.mc.CVTDQ2PD(resloc, arglocs[0])
 
     def genop_vec_cast_singlefloat_to_float(self, op, arglocs, resloc):
-        loc0, tmploc, indexloc = arglocs
         self.mc.CVTPS2PD(resloc, arglocs[0])
 
     # ________________________________________
diff --git a/rpython/jit/backend/x86/regalloc.py 
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -1509,7 +1509,7 @@
     consider_vec_raw_store = consider_vec_setarrayitem_raw
 
     def consider_vec_arith(self, op):
-        lhs = op.getarg(1)
+        lhs = op.getarg(0)
         assert isinstance(lhs, BoxVector)
         size = lhs.item_size
         args = op.getarglist()
@@ -1526,7 +1526,7 @@
     del consider_vec_arith
 
     def consider_vec_logic(self, op):
-        lhs = op.getarg(1)
+        lhs = op.getarg(0)
         assert isinstance(lhs, BoxVector)
         size = lhs.item_size
         args = op.getarglist()
@@ -1609,34 +1609,15 @@
     def consider_guard_early_exit(self, op):
         pass
 
-    def consider_vec_cast_float_to_singlefloat(self, op):
-        count = op.getarg(1)
-        assert isinstance(count, ConstInt)
+    def consider_vec_cast_float_to_int(self, op):
         args = op.getarglist()
-        loc0 = self.make_sure_var_in_reg(op.getarg(0), args)
-        result = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
-        self.perform(op, [loc0, imm(count.value)], result)
-
-    def consider_vec_cast_singlefloat_to_float(self, op):
-        index = op.getarg(1)
-        assert isinstance(index, ConstInt)
-        args = op.getarglist()
-        loc0 = self.make_sure_var_in_reg(op.getarg(0), args)
-        result = self.force_allocate_reg(op.result, args)
-        tmpxvar = TempBox()
-        tmploc = self.xrm.force_allocate_reg(tmpxvar)
-        self.xrm.possibly_free_var(tmpxvar)
-        self.perform(op, [loc0, tmploc, imm(index.value)], result)
-
-    def consider_vec_cast_float_to_int(self, op):
-        src = op.getarg(0)
-        res = op.result
-        args = op.getarglist()
-        srcloc = self.make_sure_var_in_reg(src, args)
-        resloc = self.xrm.force_result_in_reg(res, src, args)
+        srcloc = self.make_sure_var_in_reg(op.getarg(0), args)
+        resloc = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
         self.perform(op, [srcloc], resloc)
 
     consider_vec_cast_int_to_float = consider_vec_cast_float_to_int
+    consider_vec_cast_float_to_singlefloat = consider_vec_cast_float_to_int
+    consider_vec_cast_singlefloat_to_float = consider_vec_cast_float_to_int
 
     # ________________________________________
 
diff --git a/rpython/jit/backend/x86/test/test_zrpy_vecopt.py 
b/rpython/jit/backend/x86/test/test_zrpy_vecopt.py
--- a/rpython/jit/backend/x86/test/test_zrpy_vecopt.py
+++ b/rpython/jit/backend/x86/test/test_zrpy_vecopt.py
@@ -19,11 +19,6 @@
     if kwds['jit']:
         apply_jit(t, vectorize=True)
 
-    #cbuilder = genc.CStandaloneBuilder(t, f, t.config)
-    #cbuilder.generate_source(defines=cbuilder.DEBUG_DEFINES)
-    #cbuilder.compile()
-    #return cbuilder
-
 class TestVecOptX86(object):
     def test_translate(self):
         jd = JitDriver(greens = [], reds = 'auto', vectorize=True)
diff --git a/rpython/jit/metainterp/history.py 
b/rpython/jit/metainterp/history.py
--- a/rpython/jit/metainterp/history.py
+++ b/rpython/jit/metainterp/history.py
@@ -390,6 +390,9 @@
             except AttributeError:
                 t = 'b'
             self._str = '%s%d' % (t, Box._counter)
+            if self.type == VECTOR:
+                self._str += '[%s%d#%d]' % (self.item_type, self.item_size * 8,
+                                            self.item_count)
             Box._counter += 1
         return self._str
 
diff --git a/rpython/jit/metainterp/logger.py b/rpython/jit/metainterp/logger.py
--- a/rpython/jit/metainterp/logger.py
+++ b/rpython/jit/metainterp/logger.py
@@ -127,7 +127,7 @@
         elif isinstance(arg, BoxFloat):
             return 'f' + str(mv)
         elif isinstance(arg, BoxVector):
-            return 'v' + str(mv)
+            return 'v%s[%s%d#%d]' % (str(mv), arg.item_type, arg.item_size, 
arg.item_count)
         elif arg is None:
             return 'None'
         else:
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py 
b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -872,12 +872,12 @@
         i12 = int_add(i1, {stride})
         v1 = vec_getarrayitem_raw(p0, i0, 2, descr={descr}arraydescr)
         v2 = vec_getarrayitem_raw(p1, i0, 2, descr={descr}arraydescr)
-        v3 = {op}(v1,v2,2)
-        vec_setarrayitem_raw(p2, i0, v3, 2, descr={descr}arraydescr)
+        v3 = {op}(v1,v2)
+        vec_setarrayitem_raw(p2, i0, v3, descr={descr}arraydescr)
         jump(p0,p1,p2,i12)
         """.format(op='vec_'+op,descr=descr,stride=1)
         loop = self.parse_loop(ops)
-        vopt = self.schedule(loop,1)
+        vopt = self.schedule(loop, 1)
         self.assert_equal(loop, self.parse_loop(vops))
 
     def test_vschedule_trace_1(self):
@@ -907,8 +907,8 @@
         guard_true(i18) []
         v19 = vec_raw_load(i2, i6, 2, descr=intarraydescr) 
         v20 = vec_raw_load(i3, i6, 2, descr=intarraydescr) 
-        v21 = vec_int_add(v19, v20, 2) 
-        vec_raw_store(i4, i6, v21, 2, descr=intarraydescr) 
+        v21 = vec_int_add(v19, v20) 
+        vec_raw_store(i4, i6, v21, descr=intarraydescr) 
         jump(i13, i1, i2, i3, i4)
         """
         vopt = self.schedule(self.parse_loop(ops),1)
@@ -925,8 +925,8 @@
         jump(p0,i2)
         """
         dead_code =  '\n        '.join([
-          "i{t1} = int_add(i{t},1)\n        i{s} = int_lt(i{t1}, 102)".format(
-              i=i+1, t1=i+201, t=i+200, s=i+20)
+          "i{t1} = int_add(i0,{i})\n        i{s} = int_lt(i{t1}, 102)".format(
+              i=i+2, t1=i+201, t=i+200, s=i+20)
           for i in range(0,14)])
         opt="""
         [p0,i0]
@@ -986,7 +986,7 @@
         i5 = int_lt(i4, 10)
         v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr)
         v3 = vec_int_expand(42)
-        v2 = vec_int_mul(v1, v3, 2)
+        v2 = vec_int_mul(v1, v3)
         jump(p0,i4)
         """
         vopt = self.vectorize(self.parse_loop(ops),1)
@@ -1015,7 +1015,7 @@
         i5 = int_lt(i4, 10)
         v1 = vec_getarrayitem_raw(p0, i0, 2, descr=floatarraydescr)
         v3 = vec_float_expand(f3)
-        v2 = vec_int_mul(v1, v3, 2)
+        v2 = vec_int_mul(v1, v3)
         jump(p0,i4,f3)
         """
         vopt = self.vectorize(self.parse_loop(ops),1)
@@ -1047,20 +1047,21 @@
         i48 = int_add(i41, 8) 
         i51 = int_add(i37, 8) 
         i52 = int_ge(i50, i18) 
-        guard_false(i52) [p38, p12, p9, p14, p39, i37, i44, f35, i40, p42, 
i43, f34, i28, p36, i41]
-        i55 = int_add(i46, 8) 
-        i54 = int_add(i48, 8) 
-        i56 = int_add(i51, 8) 
-        i53 = int_add(i50, 1)
-        i57 = int_ge(i53, i18) 
-        guard_false(i57) [p38, p12, p9, p14, p39, i37, i44, f35, i40, p42, 
i43, f34, i28, p36, i41]
+        i637 = int_add(i28, 2)
+        i638 = int_ge(i637, i18)
+        guard_false(i638) [p38, p12, p9, p14, p39, i37, i44, f35, i40, p42, 
i43, f34, i28, p36, i41]
+        i55 = int_add(i44, 16) 
+        i54 = int_add(i41, 16) 
+        i56 = int_add(i37, 16) 
+        i629 = same_as(i637)
+        i57 = int_ge(i629, i18) 
         v61 = vec_raw_load(i21, i44, 2, descr=floatarraydescr) 
         v62 = vec_raw_load(i4, i41, 2, descr=floatarraydescr) 
-        v63 = vec_float_add(v61, v62, 2) 
-        vec_raw_store(i0, i37, v63, 2, descr=floatarraydescr) 
+        v63 = vec_float_add(v61, v62) 
+        vec_raw_store(i0, i37, v63, descr=floatarraydescr) 
         f100 = vec_float_unpack(v61, 1, 1)
         f101 = vec_float_unpack(v62, 1, 1)
-        jump(p36, i53, p9, i56, p14, f100, p12, p38, f101, p39, i40, i54, p42, 
i43, i55, i21, i4, i0, i18)
+        jump(p36, i629, p9, i56, p14, f100, p12, p38, f101, p39, i40, i54, 
p42, i43, i55, i21, i4, i0, i18)
         """
         vopt = self.vectorize(self.parse_loop(ops))
         self.assert_equal(vopt.loop, self.parse_loop(opt))
@@ -1072,7 +1073,7 @@
         f1 = getarrayitem_raw(p0, i1, descr=floatarraydescr)
         i2 = cast_float_to_singlefloat(f1)
         setarrayitem_raw(p1, i1, i2, descr=singlefloatarraydescr)
-        i3 = int_sub(i1, 1)
+        i3 = int_add(i1, 1)
         i4 = int_ge(i3, 36)
         guard_false(i4) []
         jump(p0, p1, i3)
@@ -1080,23 +1081,23 @@
         opt = """
         [p0, p1, i1]
         guard_early_exit() []
-        i3 = int_sub(i1, 1)
+        i3 = int_add(i1, 1)
         i4 = int_ge(i3, 36)
-        i50 = int_add(i1, -4)
+        i50 = int_add(i1, 4)
         i51 = int_ge(i50, 36)
         guard_false(i51) []
-        i5 = int_sub(i3, 1)
+        i5 = int_add(i1, 2)
         i8 = int_ge(i5, 36)
-        i6 = int_sub(i5, 1)
+        i6 = int_add(i1, 3)
         i11 = int_ge(i6, 36)
         i7 = same_as(i50)
         i14 = int_ge(i7, 36)
         v17 = vec_getarrayitem_raw(p0, i1, 2, descr=floatarraydescr)
         v18 = vec_getarrayitem_raw(p0, i5, 2, descr=floatarraydescr)
-        v19 = vec_cast_float_to_singlefloat(v17, 2)
-        v20 = vec_cast_float_to_singlefloat(v18, 2)
+        v19 = vec_cast_float_to_singlefloat(v17)
+        v20 = vec_cast_float_to_singlefloat(v18)
         v21 = vec_float_pack(v19, v20, 2, 2)
-        vec_setarrayitem_raw(p1, i1, v21, 4, descr=singlefloatarraydescr)
+        vec_setarrayitem_raw(p1, i1, v21, descr=singlefloatarraydescr)
         jump(p0, p1, i7)
         """
         vopt = self.vectorize(self.parse_loop(ops))
@@ -1116,7 +1117,7 @@
         raw_store(p2, i4, i12, descr=singlefloatarraydescr)
         i5  = int_add(i4, 4) 
         i186 = int_lt(i5, 100) 
-        guard_false(i186) []
+        guard_true(i186) []
         jump(p0,p1,p2,i1,i5)
         """
         opt = """
@@ -1127,30 +1128,30 @@
         i186 = int_lt(i5, 100)
         i500 = int_add(i4, 16)
         i501 = int_lt(i500, 100)
-        guard_false(i501) []
-        i189 = int_add(i1, 4)
-        i187 = int_add(i5, 4)
-        i198 = int_add(i189, 4)
+        guard_true(i501) []
+        i189 = int_add(i0, 8)
+        i187 = int_add(i4, 8)
+        i198 = int_add(i0, 12)
         i188 = int_lt(i187, 100)
-        i207 = int_add(i198, 4)
-        i196 = int_add(i187, 4)
+        i207 = int_add(i0, 16)
+        i196 = int_add(i4, 12)
         i197 = int_lt(i196, 100)
         i205 = same_as(i500)
         i206 = int_lt(i205, 100)
         v228 = vec_raw_load(p0, i0, 4, descr=singlefloatarraydescr)
-        v229 = vec_cast_singlefloat_to_float(v228, 2)
+        v229 = vec_cast_singlefloat_to_float(v228)
         v230 = vec_int_unpack(v228, 2, 2)
-        v231 = vec_cast_singlefloat_to_float(v230, 2)
+        v231 = vec_cast_singlefloat_to_float(v230)
         v232 = vec_raw_load(p1, i1, 4, descr=singlefloatarraydescr)
-        v233 = vec_cast_singlefloat_to_float(v232, 2)
+        v233 = vec_cast_singlefloat_to_float(v232)
         v234 = vec_int_unpack(v232, 2, 2)
-        v235 = vec_cast_singlefloat_to_float(v234, 2)
-        v236 = vec_float_add(v229, v233, 2)
-        v237 = vec_float_add(v231, v235, 2)
-        v238 = vec_cast_float_to_singlefloat(v236, 2)
-        v239 = vec_cast_float_to_singlefloat(v237, 2)
+        v235 = vec_cast_singlefloat_to_float(v234)
+        v236 = vec_float_add(v229, v233)
+        v237 = vec_float_add(v231, v235)
+        v238 = vec_cast_float_to_singlefloat(v236)
+        v239 = vec_cast_float_to_singlefloat(v237)
         v240 = vec_float_pack(v238, v239, 2, 2)
-        vec_raw_store(p2, i4, v240, 4, descr=singlefloatarraydescr)
+        vec_raw_store(p2, i4, v240, descr=singlefloatarraydescr)
         jump(p0, p1, p2, i207, i205)
         """
         vopt = self.vectorize(self.parse_loop(ops))
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py 
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -62,8 +62,7 @@
             from rpython.rtyper.lltypesystem.lloperation import llop
             llop.debug_print_traceback(lltype.Void)
         else:
-            import py
-            py.test.set_trace()
+            raise
     finally:
         debug_stop("vec-opt-loop")
 
@@ -310,7 +309,6 @@
                     if memref_a.is_adjacent_to(memref_b):
                         if self.packset.can_be_packed(node_a, node_b):
                             pair = Pair(node_a,node_b)
-                            pair.ptype = 
PackType.by_descr(node_a.getoperation().getdescr())
                             self.packset.packs.append(pair)
 
     def extend_packset(self):
@@ -498,7 +496,6 @@
         self.stronger = False
 
     def implies(self, guard, opt):
-        #print self.cmp_op, "=>", guard.cmp_op, "?"
         if self.op.getopnum() != guard.op.getopnum():
             return False
 
@@ -509,8 +506,6 @@
             # same operation
             lc = self.compare(self.lhs, guard.lhs)
             rc = self.compare(self.rhs, guard.rhs)
-            #print "compare", self.lhs, guard.lhs, lc
-            #print "compare", self.rhs, guard.rhs, rc
             opnum = self.get_compare_opnum()
             if opnum == -1:
                 return False
@@ -719,11 +714,12 @@
         return self.count
 
     @staticmethod
-    def by_descr(descr):
+    def by_descr(descr, vec_reg_size):
         _t = INT
         if descr.is_array_of_floats() or descr.concrete_type == FLOAT:
             _t = FLOAT
-        pt = PackType(_t, descr.get_item_size_in_bytes(), 
descr.is_item_signed())
+        size = descr.get_item_size_in_bytes()
+        pt = PackType(_t, size, descr.is_item_signed(), vec_reg_size // size)
         return pt
 
     def is_valid(self):
@@ -732,206 +728,117 @@
     def new_vector_box(self, count):
         return BoxVector(self.type, count, self.size, self.signed)
 
-    def record_vbox(self, vbox):
-        if self.type == PackType.UNKNOWN_TYPE:
-            self.type = vbox.item_type
-            assert self.type in (FLOAT, INT)
-            self.signed = vbox.signed
-        if vbox.item_size > self.size:
-            self.size = vbox.item_size
+    def __repr__(self):
+        return 'PackType(%s, %d, %d, #%d)' % (self.type, self.size, 
self.signed, self.count)
 
-    def __repr__(self):
-        return 'PackType(%s, %s, %s)' % (self.type, self.size, self.signed)
+    @staticmethod
+    def of(box, count=-1):
+        assert isinstance(box, BoxVector)
+        if count == -1:
+            count = box.item_count
+        return PackType(box.item_type, box.item_size, box.signed, count)
 
     def clone(self):
-        return PackType(self.type, self.size, self.signed)
+        return PackType(self.type, self.size, self.signed, self.count)
 
 
 class OpToVectorOp(object):
-    def __init__(self, arg_ptypes, result_ptype, has_ptype=False, 
result_vsize_arg=-1):
+    def __init__(self, arg_ptypes, result_ptype, has_descr=False,
+                 arg_clone_ptype=0, 
+                 needs_count_in_params=False):
         self.arg_ptypes = list(arg_ptypes) # do not use a tuple. rpython 
cannot union
         self.result_ptype = result_ptype
-        self.has_ptype = has_ptype
-        self.result_vsize_arg = result_vsize_arg
+        self.has_descr = has_descr
+        self.arg_clone_ptype = arg_clone_ptype
+        self.needs_count_in_params = needs_count_in_params
+        self.preamble_ops = None
+        self.sched_data = None
 
-    def has_result(self):
-        return self.result_ptype is not None
-
-    def get_result_ptype(self):
-        return self.result_ptype
-
-    def get_arg_ptype(self, i):
-        if i < 0 or i >= len(self.arg_ptypes):
-            return None
-        return self.arg_ptypes[i]
-
-    def vector_arg(self, i):
+    def is_vector_arg(self, i):
         if i < 0 or i >= len(self.arg_ptypes):
             return False
         return self.arg_ptypes[i] is not None
 
-PT_FLOAT = PackType(FLOAT, 4, False)
-PT_FLOAT_2 = PackType(FLOAT, 4, False, count=2)
-PT_DOUBLE = PackType(FLOAT, 8, False)
-PT_INT_GENERIC = PackType(INT, -1, True)
-PT_INT64 = PackType(INT, 8, True)
-PT_INT32 = PackType(INT, 4, True)
-PT_INT32_2 = PackType(INT, 4, True, count=2)
-PT_FLOAT_GENERIC = PackType(INT, -1, True)
-PT_GENERIC = PackType(PackType.UNKNOWN_TYPE, -1, True)
+    def pack_ptype(self, op):
+        opnum = op.vector
+        args = op.getarglist()
+        result = op.result
+        if self.has_descr:
+            descr = op.getdescr()
+            return PackType.by_descr(descr, self.sched_data.vec_reg_size)
+        if self.arg_clone_ptype >= 0:
+            arg = args[self.arg_clone_ptype]
+            _, vbox = self.sched_data.box_to_vbox.get(arg, (-1, None))
+            if vbox:
+                return PackType.of(vbox)
 
-ROP_ARG_RES_VECTOR = {
-    rop.VEC_INT_ADD:     OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), 
PT_INT_GENERIC),
-    rop.VEC_INT_SUB:     OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), 
PT_INT_GENERIC),
-    rop.VEC_INT_MUL:     OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), 
PT_INT_GENERIC),
-    rop.VEC_INT_AND:     OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), 
PT_INT_GENERIC),
-    rop.VEC_INT_OR:      OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), 
PT_INT_GENERIC),
-    rop.VEC_INT_XOR:     OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), 
PT_INT_GENERIC),
-    rop.VEC_INT_SIGNEXT: OpToVectorOp((PT_INT_GENERIC,), PT_INT_GENERIC, 
result_vsize_arg=1),
+    def as_vector_operation(self, pack, sched_data, oplist):
+        self.sched_data = sched_data
+        self.preamble_ops = oplist
+        op0 = pack.operations[0].getoperation()
+        self.ptype = self.pack_ptype(op0)
 
-    rop.VEC_FLOAT_ADD:   OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC), 
PT_FLOAT_GENERIC),
-    rop.VEC_FLOAT_SUB:   OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC), 
PT_FLOAT_GENERIC),
-    rop.VEC_FLOAT_MUL:   OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC), 
PT_FLOAT_GENERIC),
-    rop.VEC_FLOAT_EQ:    OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC), 
PT_INT_GENERIC),
+        off = 0
+        stride = self.split_pack(pack)
+        while off < len(pack.operations):
+            ops = pack.operations[off:off+stride]
+            self.transform_pack(ops, off, stride)
+            off += stride
 
-    rop.VEC_RAW_LOAD:         OpToVectorOp((), PT_GENERIC, has_ptype=True),
-    rop.VEC_GETARRAYITEM_RAW: OpToVectorOp((), PT_GENERIC, has_ptype=True),
-    rop.VEC_RAW_STORE:        OpToVectorOp((None,None,PT_GENERIC,), None, 
has_ptype=True),
-    rop.VEC_SETARRAYITEM_RAW: OpToVectorOp((None,None,PT_GENERIC,), None, 
has_ptype=True),
+        self.preamble_ops = None
+        self.sched_data = None
+        self.ptype = None
 
-    rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpToVectorOp((PT_DOUBLE,), PT_FLOAT_2),
-    rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOp((PT_FLOAT_2,), PT_DOUBLE),
-    rop.VEC_CAST_FLOAT_TO_INT: OpToVectorOp((PT_DOUBLE,), PT_INT32_2),
-    rop.VEC_CAST_INT_TO_FLOAT: OpToVectorOp((PT_INT32_2,), PT_DOUBLE),
-}
+    def split_pack(self, pack):
+        pack_count = len(pack.operations)
+        vec_reg_size = self.sched_data.vec_reg_size
+        if pack_count * self.ptype.getsize() > vec_reg_size:
+            return vec_reg_size // self.ptype.getsize()
+        return pack_count
 
-
-class VecScheduleData(SchedulerData):
-    def __init__(self, vec_reg_size):
-        self.box_to_vbox = {}
-        self.unpack_rename_map = {}
-        self.preamble_ops = None
-        self.expansion_byte_count = -1
-        self.vec_reg_size = vec_reg_size
-        self.pack_ops = -1
-        self.pack_off = -1
-
-    def unpack_rename(self, arg):
-        return self.unpack_rename_map.get(arg, arg)
-
-    def rename_unpacked(self, arg, argdest):
-        self.unpack_rename_map[arg] = argdest
-
-    def as_vector_operation(self, pack):
-        op_count = len(pack.operations)
-        assert op_count > 1
-        self.pack = pack
-        # properties that hold for the pack are:
-        # + isomorphism (see func above)
-        # + tight packed (no room between vector elems)
-        if pack.ptype is None:
-            self.propagate_ptype()
-
-        self.preamble_ops = []
-        if pack.is_overloaded(self.vec_reg_size):
-            self.preamble_ops = []
-            stride = pack.size_in_bytes() // self.vec_reg_size
-            for i in range(0, op_count, stride):
-                self.pack_off = i
-                self.pack_ops = stride
-                self._as_vector_op()
-            return self.preamble_ops
-        else:
-            self.pack_off = 0
-            self.pack_ops = op_count
-            self._as_vector_op()
-            return self.preamble_ops
-
-    def _as_vector_op(self):
-        op0 = self.pack.operations[self.pack_off].getoperation()
-        assert op0.vector != -1
-        args = op0.getarglist()[:]
-
-        tovector = ROP_ARG_RES_VECTOR.get(op0.vector, None)
-        if tovector is None:
-            raise NotImplementedError("vecop map entry missing. trans: pack -> 
vop")
-
-        args.append(ConstInt(self.pack_ops))
-        vop = ResOperation(op0.vector, args, op0.result, op0.getdescr())
-
+    def transform_pack(self, ops, off, stride):
+        op = ops[0].getoperation()
+        args = op.getarglist()
+        if self.needs_count_in_params:
+            args.append(ConstInt(len(ops)))
+        result = op.result
+        descr = op.getdescr()
         for i,arg in enumerate(args):
-            arg_ptype = tovector.get_arg_ptype(i)
-            if arg_ptype and tovector.has_ptype:
-                arg_ptype = self.pack.ptype
-            if arg_ptype is not None:
-                if arg_ptype.size == -1:
-                    arg_ptype = self.pack.ptype
-                self.vector_arg(vop, i, arg_ptype)
-        if tovector.has_result():
-            self.vector_result(vop, tovector)
-
+            if self.is_vector_arg(i):
+                args[i] = self.transform_argument(ops, args[i], i, off, stride)
+        #
+        result = self.transform_result(ops, result, off)
+        #
+        vop = ResOperation(op.vector, args, result, descr)
         self.preamble_ops.append(vop)
 
-    def propagate_ptype(self):
-        op0 = self.pack.operations[0].getoperation()
-        tovector = ROP_ARG_RES_VECTOR.get(op0.vector, None)
-        if tovector is None:
-            raise NotImplementedError("vecop map entry missing. trans: pack -> 
vop")
-        if tovector.has_ptype:
-            assert False, "load/store must have ptypes attached from the 
descriptor"
-        args = op0.getarglist()[:]
-        res_ptype = tovector.get_result_ptype().clone()
-        for i,arg in enumerate(args):
-            if tovector.vector_arg(i):
-                _, vbox = self.box_to_vbox.get(arg, (-1, None))
-                if vbox is not None:
-                    res_ptype.record_vbox(vbox)
-                else:
-                    # vbox of a variable/constant is not present here
-                    pass
-        self.pack.ptype = res_ptype
+    def transform_result(self, ops, result, off):
+        if result is None:
+            return None
+        vbox = self.new_result_vector_box()
+        #
+        # mark the position and the vbox in the hash
+        for i, node in enumerate(ops):
+            op = node.getoperation()
+            self.sched_data.setvector_of_box(op.result, i, vbox)
+        return vbox
 
-    def vector_result(self, vop, tovector):
-        ops = self.pack.operations
-        ptype = tovector.get_result_ptype().clone()
-        if tovector.has_ptype:
-            ptype = self.pack.ptype
-        count = -1
-        if tovector.result_vsize_arg != -1:
-            # vec_int_signext specifies the size in bytes on the
-            # first argument.
-            arg = vop.getarg(tovector.result_vsize_arg)
-            assert isinstance(arg, ConstInt)
-            count = arg.value
-        else:
-            count = self.pack_ops
-        if ptype is not None:
-            if ptype.size == -1:
-                ptype.size = self.pack.ptype.size
-            vbox = ptype.new_vector_box(count)
-        else:
-            vbox = self.pack.ptype.new_vector_box(count)
-        #
-        vop.result = vbox
-        i = self.pack_off
-        off = 0 # XXX assumption. the result is always placed at index 
[0,...,x]
-        end = i + self.pack_ops
-        while i < end:
-            op = ops[i].getoperation()
-            self.box_in_vector(op.result, off, vbox)
-            i += 1
-            off += 1
+    def new_result_vector_box(self):
+        size = self.ptype.getsize()
+        count = self.ptype.getcount()
+        return BoxVector(self.ptype.gettype(), count, size, self.ptype.signed)
 
-    def box_in_vector(self, box, off, vector):
-        self.box_to_vbox[box] = (off, vector)
+    def transform_argument(self, ops, arg, argidx, off, count):
+        box_pos, vbox = self.sched_data.getvector_of_box(arg)
+        if not vbox:
+            # constant/variable expand this box
+            vbox = self.ptype.new_vector_box(count)
+            vbox = self.expand_box_to_vector_box(vbox, ops, arg, argidx)
+            box_pos = 0
 
-    def vector_arg(self, vop, argidx, arg_ptype):
-        ops = self.pack.operations
-        _, vbox = self.box_to_vbox.get(vop.getarg(argidx), (-1, None))
-        if not vbox:
-            vbox = self.expand_box_to_vector_box(vop, argidx)
-        # vbox is a primitive type mixin
-        packable = self.vec_reg_size // arg_ptype.getsize()
+        # use the input as an indicator for the pack type
+        arg_ptype = PackType.of(vbox)
+        packable = self.sched_data.vec_reg_size // arg_ptype.getsize()
         packed = vbox.item_count
         assert packed >= 0
         assert packable >= 0
@@ -941,26 +848,22 @@
             vbox = self._pack(vbox, packed, args, packable)
         elif packed > packable:
             # the argument has more items than the operation is able to 
process!
-            vbox = self.unpack(vbox, self.pack_off, packable, arg_ptype)
+            vbox = self.unpack(vbox, off, packable, arg_ptype)
             vbox = self.extend(vbox, arg_ptype)
-
+            # continue to handle the rest of the vbox
+        #
         # The instruction takes less items than the vector has.
-        # Unpack if not at pack_off 0
-        count = arg_ptype.getcount()
-        if count != -1 and count < vbox.item_count:
-            if self.pack_off == 0:
-                pass # right place already
-            else:
-                vbox = self.unpack(vbox, self.pack_off, count, arg_ptype)
-
-        vop.setarg(argidx, vbox)
+        # Unpack if not at off 0
+        if off != 0 and box_pos != 0:
+            vbox = self.unpack(vbox, off, count, arg_ptype)
+        #
         return vbox
 
     def extend(self, vbox, arg_ptype):
-        if vbox.item_count * vbox.item_size == self.vec_reg_size:
+        if vbox.item_count * vbox.item_size == self.sched_data.vec_reg_size:
             return vbox
         size = arg_ptype.getsize()
-        assert (vbox.item_count * size) == self.vec_reg_size
+        assert (vbox.item_count * size) == self.sched_data.vec_reg_size
         opnum = rop.VEC_INT_SIGNEXT
         vbox_cloned = arg_ptype.new_vector_box(vbox.item_count)
         op = ResOperation(opnum, [vbox, ConstInt(size), 
ConstInt(vbox.item_count)], vbox_cloned)
@@ -991,7 +894,7 @@
         i = index
         while i < arg_count and tgt_box.item_count < packable:
             arg = args[i]
-            pos, src_box = self.box_to_vbox.get(arg, (-1, None))
+            pos, src_box = self.sched_data.getvector_of_box(arg)
             if pos == -1:
                 i += 1
                 continue
@@ -1007,8 +910,9 @@
             # at a new position
             for j in range(i):
                 arg = args[j]
-                self.box_in_vector(arg, j, new_box)
-        _, vbox = self.box_to_vbox.get(args[0], (-1, None))
+                self.sched_data.setvector_of_box(arg, j, new_box)
+            tgt_box = new_box
+        _, vbox = self.sched_data.getvector_of_box(args[0])
         return vbox
 
     def _check_vec_pack(self, op):
@@ -1026,18 +930,13 @@
             assert arg1.item_size == result.item_size
         else:
             assert count.value == 1
-        assert index.value < result.item_size
-        assert index.value + count.value <= result.item_size
+        assert index.value < result.item_count
+        assert index.value + count.value <= result.item_count
         assert result.item_count > arg0.item_count
 
-    def expand_box_to_vector_box(self, vop, argidx):
-        arg = vop.getarg(argidx)
+    def expand_box_to_vector_box(self, vbox, ops, arg, argidx):
         all_same_box = True
-        ops = self.pack.operations
-        i = self.pack_off
-        end = i + self.pack_ops
-        while i < end:
-            op = ops[i]
+        for i, op in enumerate(ops):
             if arg is not op.getoperation().getarg(argidx):
                 all_same_box = False
                 break
@@ -1050,8 +949,6 @@
         if box_type == INT:
             expand_opnum = rop.VEC_INT_EXPAND
 
-        # TODO
-        vbox = BoxVector(box_type, self.pack_ops)
         if all_same_box:
             expand_op = ResOperation(expand_opnum, [arg], vbox)
             self.preamble_ops.append(expand_op)
@@ -1068,6 +965,142 @@
                 self.preamble_ops.append(resop)
         return vbox
 
+class OpToVectorOpConv(OpToVectorOp):
+    def __init__(self, intype, outtype):
+        OpToVectorOp.__init__(self, (intype,), outtype)
+        self.from_size = intype.getsize()
+        self.to_size = outtype.getsize()
+
+    def split_pack(self, pack):
+        if self.from_size > self.to_size:
+            # cast down
+            return OpToVectorOp.split_pack(self, pack)
+        op0 = pack.operations[0].getoperation()
+        _, vbox = self.sched_data.getvector_of_box(op0.getarg(0))
+        vec_reg_size = self.sched_data.vec_reg_size
+        if vbox.getcount() * self.to_size > vec_reg_size:
+            return vec_reg_size // self.to_size
+        return len(pack.operations)
+
+    def new_result_vector_box(self):
+        size = self.to_size
+        count = self.ptype.getcount()
+        vec_reg_size = self.sched_data.vec_reg_size
+        if count * size > vec_reg_size:
+            count = vec_reg_size // size
+        return BoxVector(self.result_ptype.gettype(), count, size, 
self.ptype.signed)
+
+class SignExtToVectorOp(OpToVectorOp):
+    def __init__(self, intype, outtype):
+        OpToVectorOp.__init__(self, (intype,), outtype)
+        self.size = -1
+
+    def split_pack(self, pack):
+        op0 = pack.operations[0].getoperation()
+        sizearg = op0.getarg(1)
+        assert isinstance(sizearg, ConstInt)
+        self.size = sizearg.value
+        if self.ptype.getsize() > self.size:
+            # cast down
+            return OpToVectorOp.split_pack(self, pack)
+        _, vbox = self.sched_data.getvector_of_box(op0.getarg(0))
+        vec_reg_size = self.sched_data.vec_reg_size
+        if vbox.getcount() * self.size > vec_reg_size:
+            return vec_reg_size // self.to_size
+        return vbox.getcount()
+
+    def new_result_vector_box(self):
+        count = self.ptype.getcount()
+        vec_reg_size = self.sched_data.vec_reg_size
+        if count * self.size > vec_reg_size:
+            count = vec_reg_size // self.size
+        return BoxVector(self.result_ptype.gettype(), count, self.size, 
self.ptype.signed)
+
+
+PT_FLOAT = PackType(FLOAT, 4, False)
+PT_DOUBLE = PackType(FLOAT, 8, False)
+PT_FLOAT_GENERIC = PackType(INT, -1, True)
+PT_INT64 = PackType(INT, 8, True)
+PT_INT32 = PackType(INT, 4, True)
+PT_INT_GENERIC = PackType(INT, -1, True)
+PT_GENERIC = PackType(PackType.UNKNOWN_TYPE, -1, True)
+
+INT_RES = PT_INT_GENERIC
+FLOAT_RES = PT_FLOAT_GENERIC
+LOAD_RES = PT_GENERIC
+
+ROP_ARG_RES_VECTOR = {
+    rop.VEC_INT_ADD:     OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), 
INT_RES),
+    rop.VEC_INT_SUB:     OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), 
INT_RES),
+    rop.VEC_INT_MUL:     OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), 
INT_RES),
+    rop.VEC_INT_AND:     OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), 
INT_RES),
+    rop.VEC_INT_OR:      OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), 
INT_RES),
+    rop.VEC_INT_XOR:     OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), 
INT_RES),
+
+    rop.VEC_INT_SIGNEXT: SignExtToVectorOp((PT_INT_GENERIC,), INT_RES),
+
+    rop.VEC_FLOAT_ADD:   OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC), 
FLOAT_RES),
+    rop.VEC_FLOAT_SUB:   OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC), 
FLOAT_RES),
+    rop.VEC_FLOAT_MUL:   OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC), 
FLOAT_RES),
+    rop.VEC_FLOAT_EQ:    OpToVectorOp((PT_FLOAT_GENERIC,PT_FLOAT_GENERIC), 
INT_RES),
+
+    rop.VEC_RAW_LOAD:         OpToVectorOp((), LOAD_RES, has_descr=True,
+                                           arg_clone_ptype=-2,
+                                           needs_count_in_params=True
+                                          ),
+    rop.VEC_GETARRAYITEM_RAW: OpToVectorOp((), LOAD_RES,
+                                           has_descr=True,
+                                           arg_clone_ptype=-2,
+                                           needs_count_in_params=True
+                                          ),
+    rop.VEC_RAW_STORE:        OpToVectorOp((None,None,PT_GENERIC,), None, 
has_descr=True, arg_clone_ptype=2),
+    rop.VEC_SETARRAYITEM_RAW: OpToVectorOp((None,None,PT_GENERIC,), None, 
has_descr=True, arg_clone_ptype=2),
+
+    rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpToVectorOpConv(PT_DOUBLE, PT_FLOAT),
+    rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOpConv(PT_FLOAT, PT_DOUBLE),
+    rop.VEC_CAST_FLOAT_TO_INT: OpToVectorOpConv(PT_DOUBLE, PT_INT32),
+    rop.VEC_CAST_INT_TO_FLOAT: OpToVectorOpConv(PT_INT32, PT_DOUBLE),
+}
+
+class VecScheduleData(SchedulerData):
+    def __init__(self, vec_reg_size):
+        self.box_to_vbox = {}
+        self.unpack_rename_map = {}
+        self.preamble_ops = None
+        self.expansion_byte_count = -1
+        self.vec_reg_size = vec_reg_size
+        self.pack_ops = -1
+        self.pack_off = -1
+
+    def unpack_rename(self, arg):
+        return self.unpack_rename_map.get(arg, arg)
+
+    def rename_unpacked(self, arg, argdest):
+        self.unpack_rename_map[arg] = argdest
+
+    def as_vector_operation(self, pack):
+        op_count = len(pack.operations)
+        assert op_count > 1
+        self.pack = pack
+        # properties that hold for the pack are:
+        # + isomorphism (see func above)
+        # + tight packed (no room between vector elems)
+
+        op0 = pack.operations[0].getoperation()
+        tovector = ROP_ARG_RES_VECTOR.get(op0.vector, None)
+        if tovector is None:
+            raise NotImplementedError("vecop map entry missing. trans: pack -> 
vop")
+        oplist = []
+        tovector.as_vector_operation(pack, self, oplist)
+        return oplist
+
+    def getvector_of_box(self, arg):
+        return self.box_to_vbox.get(arg, (-1, None))
+
+    def setvector_of_box(self, box, off, vector):
+        self.box_to_vbox[box] = (off, vector)
+
+
 def isomorphic(l_op, r_op):
     """ Same instructions have the same operation name.
     TODO what about parameters?
diff --git a/rpython/jit/metainterp/resoperation.py 
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -453,26 +453,26 @@
     # vector operations
     '_VEC_PURE_FIRST',
     '_VEC_ARITHMETIC_FIRST',
-    'VEC_INT_ADD/3',
-    'VEC_INT_SUB/3',
-    'VEC_INT_MUL/3',
-    'VEC_INT_AND/3',
-    'VEC_INT_OR/3',
-    'VEC_INT_XOR/3',
-    'VEC_FLOAT_ADD/3',
-    'VEC_FLOAT_SUB/3',
-    'VEC_FLOAT_MUL/3',
-    'VEC_FLOAT_DIV/3',
+    'VEC_INT_ADD/2',
+    'VEC_INT_SUB/2',
+    'VEC_INT_MUL/2',
+    'VEC_INT_AND/2',
+    'VEC_INT_OR/2',
+    'VEC_INT_XOR/2',
+    'VEC_FLOAT_ADD/2',
+    'VEC_FLOAT_SUB/2',
+    'VEC_FLOAT_MUL/2',
+    'VEC_FLOAT_DIV/2',
     '_VEC_ARITHMETIC_LAST',
-    'VEC_FLOAT_EQ/3',
+    'VEC_FLOAT_EQ/2',
 
-    'VEC_INT_SIGNEXT/3',
+    'VEC_INT_SIGNEXT/2',
     # double -> float: v2 = cast(v1, 2) equal to v2 = (v1[0], v1[1], X, X)
-    'VEC_CAST_FLOAT_TO_SINGLEFLOAT/2',
+    'VEC_CAST_FLOAT_TO_SINGLEFLOAT/1',
     # v4 = cast(v3, 0, 2), v4 = (v3[0], v3[1])
-    'VEC_CAST_SINGLEFLOAT_TO_FLOAT/2',
-    'VEC_CAST_FLOAT_TO_INT/2',
-    'VEC_CAST_INT_TO_FLOAT/2',
+    'VEC_CAST_SINGLEFLOAT_TO_FLOAT/1',
+    'VEC_CAST_FLOAT_TO_INT/1',
+    'VEC_CAST_INT_TO_FLOAT/1',
 
     'VEC_FLOAT_UNPACK/3',        # iX|fX = VEC_FLOAT_UNPACK(vX, index, 
item_count)
     'VEC_FLOAT_PACK/4',          # VEC_FLOAT_PACK(vX, var/const, index, 
item_count)
@@ -553,9 +553,9 @@
     'INCREMENT_DEBUG_COUNTER/1',
     'SETARRAYITEM_GC/3d',
     'SETARRAYITEM_RAW/3d',
-    'VEC_SETARRAYITEM_RAW/4d',
+    'VEC_SETARRAYITEM_RAW/3d',
     'RAW_STORE/3d',
-    'VEC_RAW_STORE/4d',
+    'VEC_RAW_STORE/3d',
     'SETINTERIORFIELD_GC/3d',
     'SETINTERIORFIELD_RAW/3d',    # right now, only used by tests
     'SETFIELD_GC/2d',
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy vecopt: removed the const arg for each vecop (but not load)

Reply via email to