Author: Richard Plangger <[email protected]>
Branch: vecopt2
Changeset: r77082:dd4ba307d155
Date: 2015-03-18 16:10 +0100
http://bitbucket.org/pypy/pypy/changeset/dd4ba307d155/

Log:    enhanced the vectorizing testcase, clarified unroll count. it is now
        a number how often to fruther unroll it, not the total amount

diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py 
b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -53,7 +53,7 @@
         opt = self.vec_optimizer(loop)
         opt._gather_trace_information(loop)
         if unroll_factor == -1:
-            unroll_factor = opt.get_estimated_unroll_factor()
+            unroll_factor = opt.get_unroll_count()
         opt.unroll_loop_iterations(loop, unroll_factor)
         opt.loop.operations = opt.get_newoperations()
         return opt
@@ -184,7 +184,7 @@
         guard_true(i10) []
         jump(p0,p1,p2,i9)
         """
-        self.assert_unroll_loop_equals(self.parse_loop(ops), 
self.parse_loop(opt_ops), 2)
+        self.assert_unroll_loop_equals(self.parse_loop(ops), 
self.parse_loop(opt_ops), 1)
 
     def test_estimate_unroll_factor_smallest_byte_zero(self):
         ops = """
@@ -194,7 +194,7 @@
         """
         vopt = self.vec_optimizer(self.parse_loop(ops))
         assert 0 == vopt.vec_info.smallest_type_bytes
-        assert 0 == vopt.get_estimated_unroll_factor()
+        assert 0 == vopt.get_unroll_count()
 
     def test_array_operation_indices_not_unrolled(self):
         ops = """
@@ -212,7 +212,7 @@
         raw_load(p0,i0,descr=chararraydescr)
         jump(p0,i0)
         """
-        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),2)
+        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
         assert 1 in vopt.vec_info.memory_refs
         assert 2 in vopt.vec_info.memory_refs
         assert len(vopt.vec_info.memory_refs) == 2
@@ -224,15 +224,15 @@
         i4 = raw_load(p0,i1,descr=chararraydescr)
         jump(p0,i3,i4)
         """
-        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
         assert 1 in vopt.vec_info.memory_refs
         assert 2 in vopt.vec_info.memory_refs
         assert len(vopt.vec_info.memory_refs) == 2
-        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),2)
+        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
         for i in [1,2,3,4]:
             assert i in vopt.vec_info.memory_refs
         assert len(vopt.vec_info.memory_refs) == 4
-        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),4)
+        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),3)
         for i in [1,2,3,4,5,6,7,8]:
             assert i in vopt.vec_info.memory_refs
         assert len(vopt.vec_info.memory_refs) == 8
@@ -244,7 +244,7 @@
         i1 = int_add(i0,1)
         jump(p0,i1)
         """
-        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),2)
+        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
         vopt.build_dependency_graph()
         self.assert_no_edge(vopt.dependency_graph, [(i,i) for i in range(6)])
         self.assert_def_use(vopt.dependency_graph, [(0,1),(2,3),(4,5)])
@@ -269,7 +269,7 @@
         i3 = raw_load(p0,i0,descr=chararraydescr)
         jump(p0,i0)
         """
-        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
         vopt.build_dependency_graph()
         vopt.find_adjacent_memory_refs()
         mref1 = vopt.vec_info.memory_refs[1]
@@ -284,7 +284,7 @@
         i3 = raw_load(p0,i1,descr=chararraydescr)
         jump(p0,i1)
         """
-        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
         vopt.build_dependency_graph()
         vopt.find_adjacent_memory_refs()
         mref1 = vopt.vec_info.memory_refs[2]
@@ -299,7 +299,7 @@
         i3 = raw_load(p0,i1,descr=chararraydescr)
         jump(p0,i1)
         """
-        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
         vopt.build_dependency_graph()
         vopt.find_adjacent_memory_refs()
         mref1 = vopt.vec_info.memory_refs[2]
@@ -315,7 +315,7 @@
         i3 = raw_load(p0,i2,descr=chararraydescr)
         jump(p0,i1)
         """
-        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
         vopt.build_dependency_graph()
         vopt.find_adjacent_memory_refs()
         mref1 = vopt.vec_info.memory_refs[3]
@@ -333,7 +333,7 @@
         i5 = raw_load(p0,i4,descr=chararraydescr)
         jump(p0,i4)
         """
-        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
         vopt.build_dependency_graph()
         vopt.find_adjacent_memory_refs()
         mref1 = vopt.vec_info.memory_refs[5]
@@ -352,7 +352,7 @@
         i7 = raw_load(p0,i6,descr=chararraydescr)
         jump(p0,i6)
         """
-        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
         vopt.build_dependency_graph()
         vopt.find_adjacent_memory_refs()
         mref1 = vopt.vec_info.memory_refs[7]
@@ -371,7 +371,7 @@
         i5 = raw_load(p0,i4,descr=chararraydescr)
         jump(p0,i4)
         """
-        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
         vopt.build_dependency_graph()
         vopt.find_adjacent_memory_refs()
         mref1 = vopt.vec_info.memory_refs[5]
@@ -389,7 +389,7 @@
         i6 = int_add(i4,1)
         jump(p0,i1,i6)
         """
-        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),2)
+        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
         vopt.build_dependency_graph()
         self.assert_no_edge(vopt.dependency_graph, [(i,i) for i in range(6)])
         self.assert_def_use(vopt.dependency_graph, 
[(0,1),(0,2),(0,3),(0,4),(2,5)])
@@ -424,7 +424,7 @@
         i3 = raw_load(p0,i2,descr=chararraydescr)
         jump(p0,i2)
         """
-        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
         vopt.build_dependency_graph()
         vopt.find_adjacent_memory_refs()
         mref = vopt.vec_info.memory_refs[3]
@@ -436,7 +436,7 @@
         i3 = raw_load(p0,i2,descr=chararraydescr)
         jump(p0,i2)
         """
-        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
         vopt.build_dependency_graph()
         vopt.find_adjacent_memory_refs()
         mref = vopt.vec_info.memory_refs[3]
@@ -452,7 +452,7 @@
         i6 = raw_load(p0,i5,descr=chararraydescr)
         jump(p0,i2)
         """
-        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
         vopt.build_dependency_graph()
         vopt.find_adjacent_memory_refs()
         mref = vopt.vec_info.memory_refs[3]
@@ -473,7 +473,7 @@
         i7 = raw_load(p0,i6,descr=chararraydescr)
         jump(p0,i2)
         """
-        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
         vopt.build_dependency_graph()
         vopt.find_adjacent_memory_refs()
         mref = vopt.vec_info.memory_refs[3]
@@ -494,7 +494,7 @@
         i7 = raw_load(p0,i6,descr=chararraydescr)
         jump(p0,i2)
         """
-        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),1)
+        vopt = self.vec_optimizer_unrolled(self.parse_loop(ops),0)
         vopt.build_dependency_graph()
         vopt.find_adjacent_memory_refs()
         mref = vopt.vec_info.memory_refs[3]
@@ -511,7 +511,7 @@
         jump()
         """
         loop = self.parse_loop(ops)
-        vopt = self.vec_optimizer_unrolled(loop,2)
+        vopt = self.vec_optimizer_unrolled(loop,1)
         self.assert_equal(loop, self.parse_loop(ops))
 
 
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py 
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -59,6 +59,9 @@
                 rename_map[la] = ja
 
     def unroll_loop_iterations(self, loop, unroll_factor):
+        """ Unroll the loop X times. Unroll_factor of 0 = no unrolling,
+        1 once, ...
+        """
         op_count = len(loop.operations)
 
         label_op = loop.operations[0]
@@ -81,7 +84,7 @@
         jump_op_args = jump_op.getarglist()
 
         rename_map = {}
-        for i in range(2, unroll_factor+1):
+        for i in range(0, unroll_factor):
             # for each unrolling factor the boxes are renamed.
             self._rename_arguments_ssa(rename_map, label_op_args, jump_op_args)
             for op in operations:
@@ -102,6 +105,19 @@
                     except KeyError:
                         pass
 
+
+                #if copied_op.is_guard():
+                #    self.store_final_boxes_in_guard(copied_op, [])
+                #failargs = copied_op.getfailargs()
+                #if failargs:
+                #    for i, arg in enumerate(failargs):
+                #        try:
+                #            value = rename_map[arg]
+                #            print(type(copied_op))
+                #            copied_op.setfailarg(i, value)
+                #        except KeyError:
+                #            pass
+
                 self.emit_unrolled_operation(copied_op)
                 self.vec_info.inspect_operation(copied_op)
 
@@ -129,17 +145,15 @@
         for i,op in enumerate(loop.operations):
             self.vec_info.inspect_operation(op)
 
-    def get_estimated_unroll_factor(self, force_reg_bytes = -1):
-        """ force_reg_bytes used for testing """
+    def get_unroll_count(self):
+        """ This is an estimated number of further unrolls """
         # this optimization is not opaque, and needs info about the CPU
         byte_count = self.vec_info.smallest_type_bytes
         if byte_count == 0:
             return 0
         simd_vec_reg_bytes = 16 # TODO get from cpu
-        if force_reg_bytes > 0:
-            simd_vec_reg_bytes = force_reg_bytes
         unroll_factor = simd_vec_reg_bytes // byte_count
-        return unroll_factor
+        return unroll_factor-1 # it is already unrolled once
 
     def propagate_all_forward(self):
 
@@ -152,7 +166,7 @@
             # stop, there is no chance to vectorize this trace
             raise NotAVectorizeableLoop()
 
-        unroll_factor = self.get_estimated_unroll_factor()
+        unroll_factor = self.get_unroll_count()
 
         self.unroll_loop_iterations(self.loop, unroll_factor)
 
diff --git a/rpython/jit/metainterp/test/test_vectorize.py 
b/rpython/jit/metainterp/test/test_vectorize.py
--- a/rpython/jit/metainterp/test/test_vectorize.py
+++ b/rpython/jit/metainterp/test/test_vectorize.py
@@ -23,23 +23,25 @@
 
     def test_simple_raw_load(self):
         myjitdriver = JitDriver(greens = [],
-                                reds = ['i', 'res', 'va'],
+                                reds = ['i', 'res', 'va','c'],
                                 vectorize=True)
-        def f():
-            res = r_uint(0)
-            va = alloc_raw_storage(32, zero=True)
-            for i in range(32):
-                raw_storage_setitem(va, i, rffi.cast(rffi.UCHAR,i))
+        def f(c):
+            res = 0
+            va = alloc_raw_storage(c*rffi.sizeof(rffi.SIGNED), zero=True)
+            for i in range(c):
+                raw_storage_setitem(va, i*rffi.sizeof(rffi.SIGNED),
+                                    rffi.cast(rffi.SIGNED,i))
             i = 0
-            while i < 32:
-                myjitdriver.can_enter_jit(i=i, res=res,  va=va)
-                myjitdriver.jit_merge_point(i=i, res=res, va=va)
-                res += raw_storage_getitem(rffi.UCHAR,va,i)
+            while i < c:
+                myjitdriver.can_enter_jit(i=i, res=res,  va=va, c=c)
+                myjitdriver.jit_merge_point(i=i, res=res, va=va, c=c)
+                res += 
raw_storage_getitem(rffi.SIGNED,va,i*rffi.sizeof(rffi.SIGNED))
                 i += 1
             free_raw_storage(va)
             return res
-        res = self.meta_interp(f, [])
-        assert res == sum(range(32))
+        i = 32
+        res = self.meta_interp(f, [i])
+        assert res == sum(range(i))
         self.check_trace_count(1)
 
 class TestLLtype(VectorizeTest, LLJitMixin):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to