Author: Richard Plangger <[email protected]>
Branch: vecopt2
Changeset: r77104:df7310e361b1
Date: 2015-04-01 14:35 +0200
http://bitbucket.org/pypy/pypy/changeset/df7310e361b1/

Log:    memory reference now correctly tracks if bytes overlap (previously
        the cell stride was ignored)

diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py 
b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -190,12 +190,13 @@
                 pass
         # handle fail args
         op = self.operations[guard_idx]
-        for arg in op.getfailargs():
-            try:
-                def_idx = tracker.definition_index(arg)
-                self._put_edge(def_idx, guard_idx, arg)
-            except KeyError:
-                assert False
+        if op.getfailargs():
+            for arg in op.getfailargs():
+                try:
+                    def_idx = tracker.definition_index(arg)
+                    self._put_edge(def_idx, guard_idx, arg)
+                except KeyError:
+                    assert False
         #
         # guards check overflow or raise are directly dependent
         # find the first non guard operation
@@ -613,6 +614,7 @@
     will result in the linear combination i0 * (2/1) + 2
     """
     def __init__(self, array, origin, descr):
+        assert descr is not None
         self.array = array
         self.origin = origin
         self.descr = descr
@@ -623,15 +625,21 @@
     def is_adjacent_to(self, other):
         """ this is a symmetric relation """
         match, off = self.calc_difference(other)
-        if match:
-            return off == 1 or off == -1
+        stride = self.stride()
+        if match and stride != 0:
+            return abs(off) - stride == 0
         return False
 
+    def stride(self):
+        """ the stride in bytes """
+        return self.descr.get_item_size_in_bytes()
+
     def is_adjacent_after(self, other):
         """ the asymetric relation to is_adjacent_to """
         match, off = self.calc_difference(other)
-        if match:
-            return off == 1
+        stride = self.stride()
+        if match and stride != 0:
+            return off == stride # must be equal to the positive stride
         return False
 
     def indices_can_alias(self, other):
@@ -641,7 +649,7 @@
         """
         match, off = self.calc_difference(other)
         if match:
-            return off == 0
+            return abs(off) < self.stride()
         return False
 
     def __eq__(self, other):
@@ -658,6 +666,7 @@
         return self.array == other.array
 
     def calc_difference(self, other):
+        """ calculates the difference in bytes as second return value """
         assert isinstance(other, MemoryRef)
         if self.array == other.array \
             and self.origin == other.origin:
@@ -670,4 +679,3 @@
     def __repr__(self):
         return 'MemoryRef(%s*(%s/%s)+%s)' % (self.origin, self.coefficient_mul,
                                             self.coefficient_div, 
self.constant)
-
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py 
b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
@@ -297,9 +297,9 @@
     def test_setarrayitem_dont_depend_with_memref_info(self):
         ops="""
         [p0, i1] # 0: 1,2,3,4
-        setarrayitem_raw(p0, i1, 1, descr=floatarraydescr) # 1: 4
+        setarrayitem_raw(p0, i1, 1, descr=chararraydescr) # 1: 4
         i2 = int_add(i1,1) # 2: 3
-        setarrayitem_raw(p0, i2, 2, descr=floatarraydescr) # 3: 4
+        setarrayitem_raw(p0, i2, 2, descr=chararraydescr) # 3: 4
         jump(p0, i1) # 4:
         """
         self.assert_dependencies(ops, memref=True, full_check=True)
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_util.py 
b/rpython/jit/metainterp/optimizeopt/test/test_util.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_util.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_util.py
@@ -154,7 +154,10 @@
 
     arraydescr = cpu.arraydescrof(lltype.GcArray(lltype.Signed))
     floatarraydescr = cpu.arraydescrof(lltype.GcArray(lltype.Float))
+    intarraydescr = cpu.arraydescrof(lltype.GcArray(lltype.Signed))
+    uintarraydescr = cpu.arraydescrof(lltype.GcArray(lltype.Unsigned))
     chararraydescr = cpu.arraydescrof(lltype.GcArray(lltype.Char))
+    singlefloatarraydescr = 
cpu.arraydescrof(lltype.GcArray(lltype.SingleFloat))
 
     # a GcStruct not inheriting from OBJECT
     S = lltype.GcStruct('TUPLE', ('a', lltype.Signed), ('b', lltype.Ptr(NODE)))
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py 
b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -647,7 +647,7 @@
     def test_packset_init_raw_load_not_adjacent_and_adjacent(self):
         ops = """
         [p0,i0]
-        i3 = raw_load(p0, i0, descr=floatarraydescr)
+        i3 = raw_load(p0, i0, descr=chararraydescr)
         jump(p0,i0)
         """
         loop = self.parse_loop(ops)
@@ -657,12 +657,13 @@
         ops = """
         [p0,i0]
         i2 = int_add(i0,1)
-        raw_load(p0, i2, descr=floatarraydescr)
+        raw_load(p0, i2, descr=chararraydescr)
         jump(p0,i2)
         """
         loop = self.parse_loop(ops)
         vopt = self.init_packset(loop,3)
         assert len(vopt.vec_info.memory_refs) == 4
+        print vopt.packset.packs
         assert len(vopt.packset.packs) == 3
         for i in range(3):
             x = (i+1)*2
@@ -765,28 +766,32 @@
         self.assert_packset_empty(vopt.packset, len(loop.operations),
                                   [(5,11), (4,10), (6,12)])
 
-    def test_packset_combine_simple(self):
+    @pytest.mark.parametrize("descr,stride",
+            [('char',1),('float',8),('int',8),('singlefloat',4)])
+    def test_packset_combine_simple(self,descr,stride):
         ops = """
         [p0,i0]
-        i3 = getarrayitem_gc(p0, i0, descr=floatarraydescr)
-        i1 = int_add(i0,1)
+        i3 = getarrayitem_gc(p0, i0, descr={descr}arraydescr)
+        i1 = int_add(i0,{stride})
         jump(p0,i1)
-        """
+        """.format(descr=descr,stride=stride)
         loop = self.parse_loop(ops)
         vopt = self.combine_packset(loop,3)
         assert len(vopt.vec_info.memory_refs) == 4
         assert len(vopt.packset.packs) == 1
         self.assert_pack(vopt.packset.packs[0], (1,3,5,7))
 
-    def test_packset_combine_2_loads_in_trace(self):
+    @pytest.mark.parametrize("descr,stride",
+            [('char',1),('float',8),('int',8),('singlefloat',4)])
+    def test_packset_combine_2_loads_in_trace(self, descr, stride):
         ops = """
         [p0,i0]
-        i3 = getarrayitem_gc(p0, i0, descr=floatarraydescr)
-        i1 = int_add(i0,1)
-        i4 = getarrayitem_gc(p0, i1, descr=floatarraydescr)
-        i2 = int_add(i1,1)
+        i3 = getarrayitem_gc(p0, i0, descr={type}arraydescr)
+        i1 = int_add(i0,{stride})
+        i4 = getarrayitem_gc(p0, i1, descr={type}arraydescr)
+        i2 = int_add(i1,{stride})
         jump(p0,i2)
-        """
+        """.format(type=descr,stride=stride)
         loop = self.parse_loop(ops)
         vopt = self.combine_packset(loop,3)
         assert len(vopt.vec_info.memory_refs) == 8
@@ -831,59 +836,140 @@
         except NotAVectorizeableLoop:
             pass
 
-    def test_packset_vector_operation(self):
-        for op in ['int_add', 'int_sub', 'int_mul']:
-            ops = """
-            [p0,p1,p2,i0]
-            i1 = int_add(i0, 1)
-            i10 = int_le(i1, 128)
-            guard_true(i10) []
-            i2 = getarrayitem_gc(p0, i0, descr=floatarraydescr)
-            i3 = getarrayitem_gc(p1, i0, descr=floatarraydescr)
-            i4 = {op}(i2,i3)
-            setarrayitem_gc(p2, i0, i4, descr=floatarraydescr)
-            jump(p0,p1,p2,i1)
-            """.format(op=op)
-            loop = self.parse_loop(ops)
-            vopt = self.combine_packset(loop,3)
-            assert len(vopt.vec_info.memory_refs) == 12
-            assert len(vopt.packset.packs) == 4
+    @pytest.mark.parametrize("op,descr,stride",
+            [('int_add','char',1),
+             ('int_sub','char',1),
+             ('int_mul','char',1),
+             ('float_add','float',8),
+             ('float_sub','float',8),
+             ('float_mul','float',8),
+             ('float_add','singlefloat',4),
+             ('float_sub','singlefloat',4),
+             ('float_mul','singlefloat',4),
+             ('int_add','int',8),
+             ('int_sub','int',8),
+             ('int_mul','int',8),
+            ])
+    def test_packset_vector_operation(self, op, descr, stride):
+        ops = """
+        [p0,p1,p2,i0]
+        i1 = int_add(i0, {stride})
+        i10 = int_le(i1, 128)
+        guard_true(i10) []
+        i2 = getarrayitem_gc(p0, i0, descr={descr}arraydescr)
+        i3 = getarrayitem_gc(p1, i0, descr={descr}arraydescr)
+        i4 = {op}(i2,i3)
+        setarrayitem_gc(p2, i0, i4, descr={descr}arraydescr)
+        jump(p0,p1,p2,i1)
+        """.format(op=op,descr=descr,stride=stride)
+        loop = self.parse_loop(ops)
+        vopt = self.combine_packset(loop,3)
+        assert len(vopt.vec_info.memory_refs) == 12
+        assert len(vopt.packset.packs) == 4
 
-            for opindices in [(4,11,18,25),(5,12,19,26),
-                              (6,13,20,27),(7,14,21,28)]:
-                self.assert_has_pack_with(vopt.packset, opindices)
+        for opindices in [(4,11,18,25),(5,12,19,26),
+                          (6,13,20,27),(7,14,21,28)]:
+            self.assert_has_pack_with(vopt.packset, opindices)
 
-    @pytest.mark.parametrize('op', 
['int_mul','int_add','int_sub','float_mul','float_add','float_sub'])
-    def test_schedule_vector_operation(self, op):
+    @pytest.mark.parametrize('op,descr,stride',
+            [('int_add','char',1),
+             ('int_sub','char',1),
+             ('int_mul','char',1),
+             ('float_add','float',8),
+             ('float_sub','float',8),
+             ('float_mul','float',8),
+             ('float_add','singlefloat',4),
+             ('float_sub','singlefloat',4),
+             ('float_mul','singlefloat',4),
+             ('int_add','int',8),
+             ('int_sub','int',8),
+             ('int_mul','int',8),
+            ])
+    def test_schedule_vector_operation(self, op, descr, stride):
         ops = """
         [p0,p1,p2,i0] # 0
         i10 = int_le(i0, 128)  # 1, 8, 15, 22
         guard_true(i10) [p0,p1,p2,i0] # 2, 9, 16, 23
-        i2 = getarrayitem_gc(p0, i0, descr=floatarraydescr) # 3, 10, 17, 24
-        i3 = getarrayitem_gc(p1, i0, descr=floatarraydescr) # 4, 11, 18, 25
+        i2 = getarrayitem_gc(p0, i0, descr={descr}arraydescr) # 3, 10, 17, 24
+        i3 = getarrayitem_gc(p1, i0, descr={descr}arraydescr) # 4, 11, 18, 25
         i4 = {op}(i2,i3) # 5, 12, 19, 26
-        setarrayitem_gc(p2, i0, i4, descr=floatarraydescr) # 6, 13, 20, 27
-        i1 = int_add(i0, 1) # 7, 14, 21, 28
+        setarrayitem_gc(p2, i0, i4, descr={descr}arraydescr) # 6, 13, 20, 27
+        i1 = int_add(i0, {stride}) # 7, 14, 21, 28
         jump(p0,p1,p2,i1) # 29
-        """.format(op=op)
+        """.format(op=op,descr=descr,stride=stride)
         vops = """
         [p0,p1,p2,i0]
         i10 = int_le(i0, 128)
         guard_true(i10) [p0,p1,p2,i0]
-        i1 = int_add(i0, 1)
+        i1 = int_add(i0, {stride})
         i11 = int_le(i1, 128)
         guard_true(i11) [p0,p1,p2,i0]
-        v1 = vec_raw_load(p0, i0, 2, descr=floatarraydescr)
-        v2 = vec_raw_load(p1, i0, 2, descr=floatarraydescr)
-        i12 = int_add(i1, 1)
+        v1 = vec_raw_load(p0, i0, 2, descr={descr}arraydescr)
+        v2 = vec_raw_load(p1, i0, 2, descr={descr}arraydescr)
+        i12 = int_add(i1, {stride})
         v3 = {op}(v1,v2)
-        vec_raw_store(p2, i0, v3, 2, descr=floatarraydescr)
+        vec_raw_store(p2, i0, v3, 2, descr={descr}arraydescr)
         jump(p0,p1,p2,i12)
-        """.format(op='vec_'+op)
+        """.format(op='vec_'+op,descr=descr,stride=stride)
         loop = self.parse_loop(ops)
         vopt = self.schedule(loop,1)
         self.debug_print_operations(vopt.loop)
         self.assert_equal(loop, self.parse_loop(vops))
 
+    @pytest.mark.parametrize('unroll', range(1,16,2))
+    def test_vectorize_index_variable_combination(self, unroll):
+        pytest.skip("implement index variable combination")
+        ops = """
+        [p0,i0]
+        i1 = raw_load(p0, i0, descr=floatarraydescr)
+        i2 = int_add(i0,1)
+        jump(p0,i2)
+        """
+        vops = """
+        [p0,i0]
+        v1 = vec_raw_load(p0, i0, {count}, descr=floatarraydescr)
+        i1 = int_add(i0,{count})
+        jump(p0,i1)
+        """.format(count=unroll+1)
+        loop = self.parse_loop(ops)
+        vopt = self.schedule(loop,unroll)
+        self.assert_equal(loop, self.parse_loop(vops))
+
+
+    def test_vectorize_raw_load_mul_index(self):
+        ops = """
+        [i0, i1, i2, i3, i4, i5, i6, i7]
+        i9 = int_mul(i0, 8)
+        i10 = raw_load(i3, i9, descr=intarraydescr)
+        i11 = int_mul(i0, 8)
+        i12 = raw_load(i3, i11, descr=intarraydescr)
+        i13 = int_add(i10, i12)
+        i14 = int_mul(i0, 8)
+        raw_store(i5, i14, i13, descr=intarraydescr)
+        i16 = int_add(i0, 1)
+        i17 = int_lt(i16, i7)
+        guard_true(i17) [i7, i13, i5, i4, i3, i12, i10, i16]
+        guard_future_condition() []
+        jump(i16, i10, i12, i3, i4, i5, i13, i7)
+        """
+        vopt = self.schedule(self.parse_loop(ops),1)
+
+    def test_vectorize_raw_load_add_index_item_byte_size(self):
+        ops = """
+        [i0, i1, i2, i3, i4, i5, i6, i7]
+        i8 = raw_load(i3, i0, descr=intarraydescr)
+        i9 = raw_load(i3, i0, descr=intarraydescr)
+        i10 = int_add(i8, i9)
+        raw_store(i5, i0, i10, descr=intarraydescr)
+        i12 = int_add(i0, 8)
+        i14 = int_mul(i7, 8)
+        i15 = int_lt(i12, i14)
+        guard_true(i15) [i7, i10, i5, i4, i3, i9, i8, i12]
+        guard_future_condition() []
+        jump(i12, i8, i9, i3, i4, i5, i10, i7)
+        """
+        vopt = self.schedule(self.parse_loop(ops),1)
+
+
 class TestLLtype(BaseTestVectorize, LLtypeMixin):
     pass
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py 
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -221,7 +221,7 @@
                         if self.packset.can_be_packed(a_opidx, b_opidx,
                                                        a_memref, b_memref):
                             self.packset.add_pair(a_opidx, b_opidx,
-                                                   a_memref, b_memref)
+                                                  a_memref, b_memref)
 
     def extend_packset(self):
         pack_count = self.packset.pack_count()
diff --git a/rpython/jit/metainterp/test/test_vectorize.py 
b/rpython/jit/metainterp/test/test_vectorize.py
--- a/rpython/jit/metainterp/test/test_vectorize.py
+++ b/rpython/jit/metainterp/test/test_vectorize.py
@@ -21,27 +21,42 @@
                               CPUClass=self.CPUClass,
                               type_system=self.type_system)
 
-    def test_simple_raw_load(self):
+    def test_vectorize_simple_load_arith_store(self):
         myjitdriver = JitDriver(greens = [],
-                                reds = ['i', 'res', 'va','c'],
+                                reds = ['i','a','b','va','vb','vc','c','d'],
                                 vectorize=True)
-        def f(c):
-            res = 0
-            va = alloc_raw_storage(c*rffi.sizeof(rffi.SIGNED), zero=True)
-            for i in range(c):
+        def f(d):
+            va = alloc_raw_storage(d*rffi.sizeof(rffi.SIGNED), zero=True)
+            vb = alloc_raw_storage(d*rffi.sizeof(rffi.SIGNED), zero=True)
+            vc = alloc_raw_storage(d*rffi.sizeof(rffi.SIGNED), zero=True)
+            for i in range(d):
                 raw_storage_setitem(va, i*rffi.sizeof(rffi.SIGNED),
                                     rffi.cast(rffi.SIGNED,i))
+                raw_storage_setitem(vb, i*rffi.sizeof(rffi.SIGNED),
+                                    rffi.cast(rffi.SIGNED,i))
             i = 0
-            while i < c:
-                myjitdriver.can_enter_jit(i=i, res=res,  va=va, c=c)
-                myjitdriver.jit_merge_point(i=i, res=res, va=va, c=c)
-                res += 
raw_storage_getitem(rffi.SIGNED,va,i*rffi.sizeof(rffi.SIGNED))
+            a = 0
+            b = 0
+            c = 0
+            while i < d:
+                myjitdriver.can_enter_jit(i=i, a=a, b=b, va=va, vb=vb, vc=vc, 
d=d, c=c)
+                myjitdriver.jit_merge_point(i=i, a=a, b=b, va=va, vb=vb, 
vc=vc, d=d, c=c)
+                a = 
raw_storage_getitem(rffi.SIGNED,va,i*rffi.sizeof(rffi.SIGNED))
+                b = 
raw_storage_getitem(rffi.SIGNED,va,i*rffi.sizeof(rffi.SIGNED))
+                c = a+b
+                raw_storage_setitem(vc, i*rffi.sizeof(rffi.SIGNED), 
rffi.cast(rffi.SIGNED,c))
                 i += 1
+            res = 0
+            for i in range(d):
+                res += 
raw_storage_getitem(rffi.SIGNED,vc,i*rffi.sizeof(rffi.SIGNED))
+
             free_raw_storage(va)
+            free_raw_storage(vb)
+            free_raw_storage(vc)
             return res
         i = 32
         res = self.meta_interp(f, [i])
-        assert res == sum(range(i))
+        assert res == sum(range(i)) + sum(range(i))
         self.check_trace_count(1)
 
 class TestLLtype(VectorizeTest, LLJitMixin):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to