Author: Richard Plangger <r...@pasra.at>
Branch: vecopt
Changeset: r77741:09b0ee52aaf2
Date: 2015-06-01 15:47 +0200
http://bitbucket.org/pypy/pypy/changeset/09b0ee52aaf2/

Log:    activated all but 3 zjit tests (pow, take missing), all others pass
        added a jit param vec_cost to still be able to test if internal
        errors occur

diff --git a/pypy/module/micronumpy/compile.py 
b/pypy/module/micronumpy/compile.py
--- a/pypy/module/micronumpy/compile.py
+++ b/pypy/module/micronumpy/compile.py
@@ -539,15 +539,15 @@
                 w_rhs = IntObject(int(w_rhs.floatval))
             assert isinstance(w_lhs, W_NDimArray)
             w_res = w_lhs.descr_getitem(interp.space, w_rhs)
-            assert isinstance(w_rhs, IntObject)
-            if isinstance(w_res, boxes.W_Float64Box):
-                print "access", w_lhs, "[", w_rhs.intval, "] => ", 
float(w_res.value)
-            if isinstance(w_res, boxes.W_Float32Box):
-                print "access", w_lhs, "[", w_rhs.intval, "] => ", 
float(w_res.value)
-            if isinstance(w_res, boxes.W_Int64Box):
-                print "access", w_lhs, "[", w_rhs.intval, "] => ", 
int(w_res.value)
-            if isinstance(w_res, boxes.W_Int32Box):
-                print "access", w_lhs, "[", w_rhs.intval, "] => ", 
int(w_res.value)
+            if isinstance(w_rhs, IntObject):
+                if isinstance(w_res, boxes.W_Float64Box):
+                    print "access", w_lhs, "[", w_rhs.intval, "] => ", 
float(w_res.value)
+                if isinstance(w_res, boxes.W_Float32Box):
+                    print "access", w_lhs, "[", w_rhs.intval, "] => ", 
float(w_res.value)
+                if isinstance(w_res, boxes.W_Int64Box):
+                    print "access", w_lhs, "[", w_rhs.intval, "] => ", 
int(w_res.value)
+                if isinstance(w_res, boxes.W_Int32Box):
+                    print "access", w_lhs, "[", w_rhs.intval, "] => ", 
int(w_res.value)
         else:
             raise NotImplementedError
         if (not isinstance(w_res, W_NDimArray) and
diff --git a/pypy/module/micronumpy/test/test_zjit.py 
b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -213,23 +213,6 @@
         assert int(result) == 7+1+8+1+11+2+12+2
         self.check_vectorized(2, 2)
 
-    def define_int_mul_array():
-        return """
-        a = astype(|30|, int)
-        b = astype(|30|, int)
-        c = a * b
-        x1 = c -> 7
-        x2 = c -> 8
-        x3 = c -> 11
-        x4 = c -> 12
-        x1 + x2 + x3 + x4
-        """
-    def test_int_mul_array(self):
-        py.test.skip("how to multiply quad word integers?")
-        result = self.run("int_mul_array")
-        assert int(result) == 7*7+8*8+11*11+12*12
-        self.check_vectorized(2, 2)
-
     def define_float_mul_array():
         return """
         a = astype(|30|, float)
@@ -278,6 +261,390 @@
         assert int(result) == 7*7+8*8+11*11+12*12
         self.check_vectorized(2, 2)
 
+    def define_sum():
+        return """
+        a = |30|
+        sum(a)
+        """
+
+    def test_sum(self):
+        result = self.run("sum")
+        assert result == sum(range(30))
+        # TODO impl reduce
+        self.check_vectorized(1, 0)
+
+    def define_cumsum():
+        return """
+        a = |30|
+        b = cumsum(a)
+        b -> 5
+        """
+
+    def test_cumsum(self):
+        result = self.run("cumsum")
+        assert result == 15
+        self.check_vectorized(1, 0)
+
+    def define_axissum():
+        return """
+        a = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
+        b = sum(a,0)
+        b -> 1
+        """
+
+    def test_axissum(self):
+        result = self.run("axissum")
+        assert result == 30
+        # XXX note - the bridge here is fairly crucial and yet it's pretty
+        #            bogus. We need to improve the situation somehow.
+        self.check_vectorized(1, 0)
+
+    def define_reduce():
+        return """
+        a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        sum(a)
+        """
+
+    def test_reduce_compile_only_once(self):
+        self.compile_graph()
+        reset_jit()
+        i = self.code_mapping['reduce']
+        # run it twice
+        retval = self.interp.eval_graph(self.graph, [i])
+        retval = self.interp.eval_graph(self.graph, [i])
+        # check that we got only one loop
+        assert len(get_stats().loops) == 1
+        # TODO imple reduce opt
+        self.check_vectorized(2, 0)
+
+    def test_reduce_axis_compile_only_once(self):
+        self.compile_graph()
+        reset_jit()
+        i = self.code_mapping['axissum']
+        # run it twice
+        retval = self.interp.eval_graph(self.graph, [i])
+        retval = self.interp.eval_graph(self.graph, [i])
+        # check that we got only one loop
+        assert len(get_stats().loops) == 1
+        # TODO imple reduce opt
+        self.check_vectorized(3, 0)
+
+    def define_prod():
+        return """
+        a = |30|
+        prod(a)
+        """
+
+    def test_prod(self):
+        result = self.run("prod")
+        expected = 1
+        for i in range(30):
+            expected *= i * 2
+        assert result == expected
+        self.check_trace_count(1)
+
+    def define_max():
+        return """
+        a = |30|
+        a[13] = 128.0
+        max(a)
+        """
+
+    def test_max(self):
+        result = self.run("max")
+        assert result == 128
+        self.check_vectorized(1, 0) # TODO reduce
+
+    def define_min():
+        return """
+        a = |30|
+        a[13] = -128
+        min(a)
+        """
+
+    def test_min(self):
+        result = self.run("min")
+        assert result == -128
+        self.check_vectorized(1, 0) # TODO reduce
+
+    def define_any():
+        return """
+        a = [0,0,0,0,0,0,0,1,0,0,0]
+        any(a)
+        """
+
+    def test_any(self):
+        result = self.run("any")
+        assert result == 1
+        self.check_vectorized(1, 1)
+
+    def define_all():
+        return """
+        a = [1,1,1,1,1,1,1,1]
+        all(a)
+        """
+
+    def test_all(self):
+        result = self.run("all")
+        assert result == 1
+        self.check_vectorized(1, 1)
+
+    def define_logical_xor_reduce():
+        return """
+        a = [1,1,1,1,1,1,1,1]
+        logical_xor_reduce(a)
+        """
+
+    def test_logical_xor_reduce(self):
+        result = self.run("logical_xor_reduce")
+        assert result == 0
+        self.check_vectorized(0, 0) # TODO reduce
+
+    def define_already_forced():
+        return """
+        a = |30|
+        b = a + 4.5
+        b -> 5 # forces
+        c = b * 8
+        c -> 5
+        """
+
+    def test_already_forced(self):
+        result = self.run("already_forced")
+        assert result == (5 + 4.5) * 8
+        self.check_vectorized(2, 2)
+
+    def define_ufunc():
+        return """
+        a = |30|
+        b = unegative(a)
+        b -> 3
+        """
+
+    def test_ufunc(self):
+        result = self.run("ufunc")
+        assert result == -3
+        self.check_vectorized(1, 1)
+
+    def define_specialization():
+        return """
+        a = |30|
+        b = a + a
+        c = unegative(b)
+        c -> 3
+        d = a * a
+        unegative(d)
+        d -> 3
+        d = a * a
+        unegative(d)
+        d -> 3
+        d = a * a
+        unegative(d)
+        d -> 3
+        d = a * a
+        unegative(d)
+        d -> 3
+        """
+
+    def test_specialization(self):
+        result = self.run("specialization")
+        assert result == (3*3)
+        self.check_vectorized(3, 3)
+
+    def define_multidim():
+        return """
+        a = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
+        b = a + a
+        b -> 1 -> 1
+        """
+
+    def test_multidim(self):
+        result = self.run('multidim')
+        assert result == 8
+        self.check_vectorized(1, 1)
+
+    def define_broadcast():
+        return """
+        a = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
+        b = [1, 2, 3, 4]
+        c = a + b
+        c -> 1 -> 2
+        """
+
+    def test_broadcast(self):
+        result = self.run("broadcast")
+        assert result == 10
+        self.check_vectorized(1, 0) # TODO check on broadcast
+
+    def define_setslice():
+        return """
+        a = |30|
+        b = |10|
+        b[1] = 5.5
+        a[0:30:3] = b
+        a -> 3
+        """
+
+    def test_setslice(self):
+        result = self.run("setslice")
+        assert result == 5.5
+        self.check_vectorized(1, 0) # TODO?
+
+    def define_virtual_slice():
+        return """
+        a = |30|
+        c = a + a
+        d = c -> 1:20
+        d -> 1
+        """
+
+    def test_virtual_slice(self):
+        result = self.run("virtual_slice")
+        assert result == 4
+        self.check_vectorized(1, 1)
+
+    def define_flat_iter():
+        return '''
+        a = |30|
+        b = flat(a)
+        c = b + a
+        c -> 3
+        '''
+
+    def test_flat_iter(self):
+        result = self.run("flat_iter")
+        assert result == 6
+        self.check_vectorized(1, 1)
+
+    def define_flat_getitem():
+        return '''
+        a = |30|
+        b = flat(a)
+        b -> 4: -> 6
+        '''
+
+    def test_flat_getitem(self):
+        result = self.run("flat_getitem")
+        assert result == 10.0
+        self.check_trace_count(1)
+        self.check_vectorized(0,0)
+
+    def define_flat_setitem():
+        return '''
+        a = |30|
+        b = flat(a)
+        b[4:] = a->:26
+        a -> 5
+        '''
+
+    def test_flat_setitem(self):
+        result = self.run("flat_setitem")
+        assert result == 1.0
+        self.check_trace_count(1)
+        self.check_vectorized(1,0) # TODO this can be improved
+
+    def define_dot():
+        return """
+        a = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
+        b = [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]
+        c = dot(a, b)
+        c -> 1 -> 2
+        """
+
+    def test_dot(self):
+        result = self.run("dot")
+        assert result == 184
+        self.check_trace_count(3)
+        self.check_vectorized(3,0)
+
+    def define_argsort():
+        return """
+        a = |30|
+        argsort(a)
+        a->6
+        """
+
+    def test_argsort(self):
+        result = self.run("argsort")
+        assert result == 6
+        self.check_trace_count(1)
+        self.check_vectorized(1,1) # vec. setslice
+
+    def define_where():
+        return """
+        a = [1, 0, 1, 0]
+        x = [1, 2, 3, 4]
+        y = [-10, -20, -30, -40]
+        r = where(a, x, y)
+        r -> 3
+        """
+
+    def test_where(self):
+        result = self.run("where")
+        assert result == -40
+        self.check_trace_count(1)
+        self.check_vectorized(1, 0) # TODO might be possible to vectorize
+
+    def define_searchsorted():
+        return """
+        a = [1, 4, 5, 6, 9]
+        b = |30| -> ::-1
+        c = searchsorted(a, b)
+        c -> -1
+        """
+
+    def test_searchsorted(self):
+        result = self.run("searchsorted")
+        assert result == 0
+        self.check_trace_count(6)
+        # TODO?
+
+    def define_int_mul_array():
+        return """
+        a = astype(|30|, int)
+        b = astype(|30|, int)
+        c = a * b
+        x1 = c -> 7
+        x2 = c -> 8
+        x3 = c -> 11
+        x4 = c -> 12
+        x1 + x2 + x3 + x4
+        """
+    def test_int_mul_array(self):
+        result = self.run("int_mul_array")
+        assert int(result) == 7*7+8*8+11*11+12*12
+        self.check_vectorized(2, 2)
+
+    def define_slice():
+        return """
+        a = |30|
+        b = a -> ::3
+        c = b + b
+        c -> 3
+        """
+
+    def test_slice(self):
+        result = self.run("slice")
+        assert result == 18
+        self.check_trace_count(1)
+        self.check_vectorized(1,1)
+
+    def define_multidim_slice():
+        return """
+        a = [[1, 2, 3, 4], [3, 4, 5, 6], [5, 6, 7, 8], [7, 8, 9, 10], [9, 10, 
11, 12], [11, 12, 13, 14], [13, 14, 15, 16], [16, 17, 18, 19]]
+        b = a -> ::2
+        c = b + b
+        c -> 1 -> 1
+        """
+
+    def test_multidim_slice(self):
+        result = self.run('multidim_slice')
+        assert result == 12
+        self.check_trace_count(2)
+        self.check_vectorized(1,0) # TODO?
+
+    # NOT WORKING
+
     def define_pow():
         return """
         a = |30| ** 2
@@ -304,245 +671,6 @@
         assert result == 15 ** 2
         self.check_trace_count(4)  # extra one for the astype
 
-    def define_sum():
-        return """
-        a = |30|
-        sum(a)
-        """
-
-    def test_sum(self):
-        result = self.run("sum")
-        assert result == sum(range(30))
-        self.check_trace_count(1)
-
-    def define_cumsum():
-        return """
-        a = |30|
-        b = cumsum(a)
-        b -> 5
-        """
-
-    def test_cumsum(self):
-        result = self.run("cumsum")
-        assert result == 15
-        self.check_trace_count(1)
-
-    def define_axissum():
-        return """
-        a = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
-        b = sum(a,0)
-        b -> 1
-        """
-
-    def test_axissum(self):
-        result = self.run("axissum")
-        assert result == 30
-        # XXX note - the bridge here is fairly crucial and yet it's pretty
-        #            bogus. We need to improve the situation somehow.
-
-    def define_reduce():
-        return """
-        a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
-        sum(a)
-        """
-
-    def test_reduce_compile_only_once(self):
-        self.compile_graph()
-        reset_jit()
-        i = self.code_mapping['reduce']
-        # run it twice
-        retval = self.interp.eval_graph(self.graph, [i])
-        retval = self.interp.eval_graph(self.graph, [i])
-        # check that we got only one loop
-        assert len(get_stats().loops) == 1
-
-    def test_reduce_axis_compile_only_once(self):
-        self.compile_graph()
-        reset_jit()
-        i = self.code_mapping['axissum']
-        # run it twice
-        retval = self.interp.eval_graph(self.graph, [i])
-        retval = self.interp.eval_graph(self.graph, [i])
-        # check that we got only one loop
-        assert len(get_stats().loops) == 1
-
-    def define_prod():
-        return """
-        a = |30|
-        prod(a)
-        """
-
-    def test_prod(self):
-        result = self.run("prod")
-        expected = 1
-        for i in range(30):
-            expected *= i * 2
-        assert result == expected
-        self.check_trace_count(1)
-
-    def define_max():
-        return """
-        a = |30|
-        a[13] = 128.0
-        max(a)
-        """
-
-    def test_max(self):
-        result = self.run("max")
-        assert result == 128
-        # TODO self.check_trace_count(3)
-
-    def define_min():
-        return """
-        a = |30|
-        a[13] = -128
-        min(a)
-        """
-
-    def test_min(self):
-        result = self.run("min")
-        assert result == -128
-        #self.check_trace_count(1)
-
-    def define_any():
-        return """
-        a = [0,0,0,0,0,0,0,1,0,0,0]
-        any(a)
-        """
-
-    def test_any(self):
-        result = self.run("any")
-        assert result == 1
-        self.check_trace_count(1)
-
-    def define_all():
-        return """
-        a = [1,1,1,1,1,1,1,1]
-        all(a)
-        """
-
-    def test_all(self):
-        result = self.run("all")
-        assert result == 1
-        self.check_trace_count(1)
-
-    def define_logical_xor_reduce():
-        return """
-        a = [1,1,1,1,1,1,1,1]
-        logical_xor_reduce(a)
-        """
-
-    def test_logical_xor_reduce(self):
-        result = self.run("logical_xor_reduce")
-        assert result == 0
-        self.check_trace_count(2)
-        # XXX fix this
-        #self.check_simple_loop({
-        #    'cast_float_to_int': 1,
-        #    'getfield_gc': 2,
-        #    'getfield_gc_pure': 11,
-        #    'guard_class': 1,
-        #    'guard_false': 1,
-        #    'guard_not_invalidated': 1,
-        #    'guard_true': 5,
-        #    'int_add': 2,
-        #    'int_and': 1,
-        #    'int_ge': 1,
-        #    'int_is_true': 2,
-        #    'jump': 1,
-        #    'new_with_vtable': 1,
-        #    'raw_load': 1,
-        #    'setfield_gc': 4,
-        #})
-
-    def define_already_forced():
-        return """
-        a = |30|
-        b = a + 4.5
-        b -> 5 # forces
-        c = b * 8
-        c -> 5
-        """
-
-    def test_already_forced(self):
-        #py.test.skip('TODO')
-        result = self.run("already_forced")
-        assert result == (5 + 4.5) * 8
-        # This is the sum of the ops for both loops, however if you remove the
-        # optimization then you end up with 2 float_adds, so we can still be
-        # sure it was optimized correctly.
-        #py.test.skip("too fragile")
-        #self.check_resops({'raw_store': 4, 'getfield_gc': 22,
-        #                   'getarrayitem_gc': 4, 'getarrayitem_gc_pure': 2,
-        #                   'getfield_gc_pure': 8,
-        #                   'guard_class': 8, 'int_add': 8, 'float_mul': 2,
-        #                   'jump': 2, 'int_ge': 4,
-        #                   'raw_load': 4, 'float_add': 2,
-        #                   'guard_false': 4, 'arraylen_gc': 2, 'same_as': 2})
-
-    def define_ufunc():
-        return """
-        a = |30|
-        b = unegative(a)
-        b -> 3
-        """
-
-    def test_ufunc(self):
-        result = self.run("ufunc")
-        assert result == -3
-
-    def define_specialization():
-        return """
-        a = |30|
-        b = a + a
-        c = unegative(b)
-        c -> 3
-        d = a * a
-        unegative(d)
-        d -> 3
-        d = a * a
-        unegative(d)
-        d -> 3
-        d = a * a
-        unegative(d)
-        d -> 3
-        d = a * a
-        unegative(d)
-        d -> 3
-        """
-
-    def test_specialization(self):
-        result = self.run("specialization")
-        # TODO
-        assert result == (3*3)
-        #py.test.skip("don't run for now")
-        # This is 3, not 2 because there is a bridge for the exit.
-        #self.check_trace_count(3)
-
-    def define_slice():
-        return """
-        a = |30|
-        b = a -> ::3
-        c = b + b
-        c -> 3
-        """
-
-    def test_slice(self):
-        py.test.skip("slice not impl in compile.py")
-        result = self.run("slice")
-        assert result == 18
-        self.check_trace_count(1)
-        #self.check_simple_loop({
-        #    'arraylen_gc': 2,
-        #    'float_add': 1,
-        #    'guard_false': 1,
-        #    'guard_not_invalidated': 1,
-        #    'int_add': 4,
-        #    'int_ge': 1,
-        #    'jump': 1,
-        #    'raw_load': 2,
-        #    'raw_store': 1,
-        #})
 
     def define_take():
         return """
@@ -552,366 +680,6 @@
         """
 
     def test_take(self):
-        py.test.skip("not impl")
+        py.test.skip("key error get item?")
         result = self.run("take")
         assert result == 3
-
-    def define_multidim():
-        return """
-        a = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
-        b = a + a
-        b -> 1 -> 1
-        """
-
-    def test_multidim(self):
-        result = self.run('multidim')
-        assert result == 8
-        # int_add might be 1 here if we try slightly harder with
-        # reusing indexes or some optimization
-        self.check_trace_count(1)
-        #self.check_simple_loop({
-        #    'float_add': 1,
-        #    'guard_false': 1,
-        #    'guard_not_invalidated': 1,
-        #    'int_add': 4,
-        #    'int_ge': 1,
-        #    'jump': 1,
-        #    'raw_load': 2,
-        #    'raw_store': 1,
-        #})
-
-    def define_multidim_slice():
-        return """
-        a = [[1, 2, 3, 4], [3, 4, 5, 6], [5, 6, 7, 8], [7, 8, 9, 10], [9, 10, 
11, 12], [11, 12, 13, 14], [13, 14, 15, 16], [16, 17, 18, 19]]
-        b = a -> ::2
-        c = b + b
-        c -> 1 -> 1
-        """
-
-    def test_multidim_slice(self):
-        py.test.skip("seems to be a problem in compile.py")
-        result = self.run('multidim_slice')
-        assert result == 12
-        # XXX the bridge here is scary. Hopefully jit-targets will fix that,
-        #     otherwise it looks kind of good
-        self.check_trace_count(2)
-        #self.check_simple_loop({
-        #    'float_add': 1,
-        #    'getarrayitem_gc': 2,
-        #    'guard_false': 1,
-        #    'guard_not_invalidated': 1,
-        #    'guard_true': 2,
-        #    'int_add': 6,
-        #    'int_ge': 1,
-        #    'int_lt': 2,
-        #    'jump': 1,
-        #    'raw_load': 2,
-        #    'raw_store': 1,
-        #    'setarrayitem_gc': 2,
-        #})
-        #self.check_resops({
-        #    'float_add': 3,
-        #    'getarrayitem_gc': 7,
-        #    'getarrayitem_gc_pure': 14,
-        #    'getfield_gc': 6,
-        #    'getfield_gc_pure': 63,
-        #    'guard_class': 5,
-        #    'guard_false': 19,
-        #    'guard_nonnull': 6,
-        #    'guard_nonnull_class': 1,
-        #    'guard_not_invalidated': 3,
-        #    'guard_true': 16,
-        #    'guard_value': 3,
-        #    'int_add': 24,
-        #    'int_ge': 4,
-        #    'int_is_true': 5,
-        #    'int_is_zero': 4,
-        #    'int_le': 5,
-        #    'int_lt': 7,
-        #    'int_sub': 2,
-        #    'jump': 2,
-        #    'raw_load': 5,
-        #    'raw_store': 3,
-        #    'setarrayitem_gc': 8,
-        #})
-
-    def define_broadcast():
-        return """
-        a = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
-        b = [1, 2, 3, 4]
-        c = a + b
-        c -> 1 -> 2
-        """
-
-    def test_broadcast(self):
-        result = self.run("broadcast")
-        assert result == 10
-        #self.check_trace_count(2)
-        #self.check_simple_loop({
-        #    'float_add': 1,
-        #    'getarrayitem_gc': 1,
-        #    'guard_false': 1,
-        #    'guard_not_invalidated': 1,
-        #    'guard_true': 1,
-        #    'int_add': 5,
-        #    'int_ge': 1,
-        #    'int_lt': 1,
-        #    'jump': 1,
-        #    'raw_load': 2,
-        #    'raw_store': 1,
-        #    'setarrayitem_gc': 1,
-        #})
-        #self.check_resops({
-        #    'float_add': 2,
-        #    'getarrayitem_gc': 2,
-        #    'getarrayitem_gc_pure': 2,
-        #    'getfield_gc': 6,
-        #    'getfield_gc_pure': 30,
-        #    'guard_class': 3,
-        #    'guard_false': 7,
-        #    'guard_nonnull': 2,
-        #    'guard_not_invalidated': 2,
-        #    'guard_true': 8,
-        #    'int_add': 11,
-        #    'int_ge': 2,
-        #    'int_is_true': 3,
-        #    'int_is_zero': 1,
-        #    'int_le': 1,
-        #    'int_lt': 2,
-        #    'jump': 1,
-        #    'raw_load': 4,
-        #    'raw_store': 2,
-        #    'setarrayitem_gc': 2,
-        #})
-
-    def define_setslice():
-        return """
-        a = |30|
-        b = |10|
-        b[1] = 5.5
-        a[0:30:3] = b
-        a -> 3
-        """
-
-    def test_setslice(self):
-        result = self.run("setslice")
-        assert result == 5.5
-        self.check_trace_count(1)
-        #self.check_simple_loop({
-        #    'arraylen_gc': 1,
-        #    'guard_false': 1,
-        #    'guard_not_invalidated': 1,
-        #    'int_add': 3,
-        #    'int_ge': 1,
-        #    'jump': 1,
-        #    'raw_load': 1,
-        #    'raw_store': 1,
-        #})
-
-    def define_virtual_slice():
-        return """
-        a = |30|
-        c = a + a
-        d = c -> 1:20
-        d -> 1
-        """
-
-    def test_virtual_slice(self):
-        py.test.skip('TODO')
-        result = self.run("virtual_slice")
-        assert result == 4
-        py.test.skip("don't run for now")
-        self.check_trace_count(1)
-        self.check_simple_loop({'raw_load': 2, 'float_add': 1,
-                                'raw_store': 1, 'int_add': 1,
-                                'int_ge': 1, 'guard_false': 1, 'jump': 1,
-                                'arraylen_gc': 1})
-
-    def define_flat_iter():
-        return '''
-        a = |30|
-        b = flat(a)
-        c = b + a
-        c -> 3
-        '''
-
-    def test_flat_iter(self):
-        py.test.skip('TODO')
-        result = self.run("flat_iter")
-        assert result == 6
-        self.check_trace_count(1)
-        self.check_simple_loop({
-            'float_add': 1,
-            'guard_false': 1,
-            'guard_not_invalidated': 1,
-            'int_add': 4,
-            'int_ge': 1,
-            'jump': 1,
-            'raw_load': 2,
-            'raw_store': 1,
-        })
-
-    def define_flat_getitem():
-        return '''
-        a = |30|
-        b = flat(a)
-        b -> 4: -> 6
-        '''
-
-    def test_flat_getitem(self):
-        py.test.skip('TODO')
-        result = self.run("flat_getitem")
-        assert result == 10.0
-        self.check_trace_count(1)
-        self.check_simple_loop({
-            'guard_false': 1,
-            'int_add': 4,
-            'int_ge': 1,
-            'int_mul': 1,
-            'jump': 1,
-            'raw_load': 1,
-            'raw_store': 1,
-        })
-
-    def define_flat_setitem():
-        return '''
-        a = |30|
-        b = flat(a)
-        b[4:] = a->:26
-        a -> 5
-        '''
-
-    def test_flat_setitem(self):
-        py.test.skip('TODO')
-        result = self.run("flat_setitem")
-        assert result == 1.0
-        self.check_trace_count(1)
-        self.check_simple_loop({
-            'guard_false': 1,
-            'guard_not_invalidated': 1,
-            'guard_true': 1,
-            'int_add': 4,
-            'int_ge': 1,
-            'int_gt': 1,
-            'int_mul': 1,
-            'int_sub': 1,
-            'jump': 1,
-            'raw_load': 1,
-            'raw_store': 1,
-        })
-
-    def define_dot():
-        return """
-        a = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
-        b = [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]
-        c = dot(a, b)
-        c -> 1 -> 2
-        """
-
-    def test_dot(self):
-        py.test.skip('TODO')
-        result = self.run("dot")
-        assert result == 184
-        self.check_trace_count(3)
-        self.check_simple_loop({
-            'float_add': 1,
-            'float_mul': 1,
-            'guard_not_invalidated': 1,
-            'guard_true': 1,
-            'int_add': 3,
-            'int_lt': 1,
-            'jump': 1,
-            'raw_load': 2,
-        })
-        self.check_resops({
-            'float_add': 2,
-            'float_mul': 2,
-            'getarrayitem_gc': 4,
-            'getarrayitem_gc_pure': 9,
-            'getfield_gc': 7,
-            'getfield_gc_pure': 42,
-            'guard_class': 4,
-            'guard_false': 15,
-            'guard_not_invalidated': 2,
-            'guard_true': 14,
-            'int_add': 17,
-            'int_ge': 4,
-            'int_is_true': 3,
-            'int_is_zero': 2,
-            'int_le': 5,
-            'int_lt': 8,
-            'int_sub': 3,
-            'jump': 3,
-            'new_with_vtable': 7,
-            'raw_load': 6,
-            'raw_store': 1,
-            'same_as': 2,
-            'setarrayitem_gc': 7,
-            'setfield_gc': 22,
-        })
-
-    def define_argsort():
-        return """
-        a = |30|
-        argsort(a)
-        a->6
-        """
-
-    def test_argsort(self):
-        py.test.skip('TODO')
-        result = self.run("argsort")
-        assert result == 6
-
-    def define_where():
-        return """
-        a = [1, 0, 1, 0]
-        x = [1, 2, 3, 4]
-        y = [-10, -20, -30, -40]
-        r = where(a, x, y)
-        r -> 3
-        """
-
-    def test_where(self):
-        py.test.skip('TODO')
-        result = self.run("where")
-        assert result == -40
-        self.check_trace_count(1)
-        self.check_simple_loop({
-            'float_ne': 1,
-            'guard_false': 1,
-            'guard_not_invalidated': 1,
-            'guard_true': 1,
-            'int_add': 5,
-            'int_ge': 1,
-            'jump': 1,
-            'raw_load': 2,
-            'raw_store': 1,
-        })
-
-    def define_searchsorted():
-        return """
-        a = [1, 4, 5, 6, 9]
-        b = |30| -> ::-1
-        c = searchsorted(a, b)
-        c -> -1
-        """
-
-    def test_searchsorted(self):
-        py.test.skip('TODO')
-        result = self.run("searchsorted")
-        assert result == 0
-        self.check_trace_count(6)
-        self.check_simple_loop({
-            'float_lt': 1,
-            'guard_false': 2,
-            'guard_not_invalidated': 1,
-            'guard_true': 2,
-            'int_add': 3,
-            'int_ge': 1,
-            'int_lt': 2,
-            'int_mul': 1,
-            'int_rshift': 1,
-            'int_sub': 1,
-            'jump': 1,
-            'raw_load': 1,
-        })
diff --git a/rpython/jit/metainterp/optimizeopt/__init__.py 
b/rpython/jit/metainterp/optimizeopt/__init__.py
--- a/rpython/jit/metainterp/optimizeopt/__init__.py
+++ b/rpython/jit/metainterp/optimizeopt/__init__.py
@@ -71,8 +71,9 @@
             if not export_state and \
                 ((warmstate.vectorize and jitdriver_sd.vectorize) \
                  or warmstate.vectorize_user):
-                optimize_vector(metainterp_sd, jitdriver_sd, loop, 
optimizations,
-                                inline_short_preamble, start_state)
+                optimize_vector(metainterp_sd, jitdriver_sd, loop,
+                                optimizations, inline_short_preamble,
+                                start_state, warmstate.vec_cost)
             else:
                 return optimize_unroll(metainterp_sd, jitdriver_sd, loop,
                                        optimizations, inline_short_preamble,
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py 
b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -126,7 +126,6 @@
 
     def edge_to(self, to, arg=None, failarg=False, label=None):
         if self is to:
-            #debug_print "debug: tried to put edge from: ", self.op, "to:", 
to.op
             return
         dep = self.depends_on(to)
         if not dep:
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py 
b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
@@ -21,7 +21,6 @@
         iv = self.index_var
         ov = other.index_var
         val = (int(str(ov.var)[1:]) - int(str(iv.var)[1:]))
-        print iv, ov, "adja?", val == 1
         # i0 and i1 are adjacent
         # i1 and i2 ...
         # but not i0, i2
@@ -37,13 +36,12 @@
         graph = opt.dependency_graph
         for k,m in graph.memory_refs.items():
             graph.memory_refs[k] = FakeMemoryRef(m.index_var)
-            print "memory ref", k, m
         opt.find_adjacent_memory_refs()
         opt.extend_packset()
         opt.combine_packset()
         for pack in opt.packset.packs:
-            print "apck:"
-            print '\n'.join([str(op.getoperation()) for op in pack.operations])
+            print "pack: \n   ",
+            print '\n    '.join([str(op.getoperation()) for op in 
pack.operations])
             print
         return opt.costmodel.calculate_savings(opt.packset)
 
@@ -111,21 +109,17 @@
 
     def test_load_arith_store(self):
         loop1 = self.parse("""
-        i10 = raw_load(p0, i0, descr=int)
-        i11 = raw_load(p0, i1, descr=int)
-        i12 = raw_load(p0, i2, descr=int)
-        i13 = raw_load(p0, i3, descr=int)
-        i15 = int_add(i10, 1)
-        i16 = int_add(i11, 1)
-        i17 = int_add(i12, 1)
-        i18 = int_add(i13, 1)
-        raw_store(p1, i4, i15, descr=int)
-        raw_store(p1, i5, i16, descr=int)
-        raw_store(p1, i6, i17, descr=int)
-        raw_store(p1, i7, i18, descr=int)
+        f10 = raw_load(p0, i0, descr=double)
+        f11 = raw_load(p0, i1, descr=double)
+        i20 = cast_float_to_int(f10)
+        i21 = cast_float_to_int(f11)
+        i30 = int_signext(i20, 4)
+        i31 = int_signext(i21, 4)
+        raw_store(p0, i3, i30, descr=int)
+        raw_store(p0, i4, i31, descr=int)
         """)
         savings = self.savings(loop1)
-        assert savings == 6
+        assert savings == 1
 
 class Test(CostModelBaseTest, LLtypeMixin):
     pass
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py 
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -40,7 +40,7 @@
                 print ""
 
 def optimize_vector(metainterp_sd, jitdriver_sd, loop, optimizations,
-                    inline_short_preamble, start_state):
+                    inline_short_preamble, start_state, cost_threshold):
     optimize_unroll(metainterp_sd, jitdriver_sd, loop, optimizations,
                     inline_short_preamble, start_state, False)
     orig_ops = loop.operations
@@ -48,13 +48,16 @@
         debug_start("vec-opt-loop")
         metainterp_sd.logger_noopt.log_loop(loop.inputargs, loop.operations, 
-2, None, None, "pre vectorize")
         metainterp_sd.profiler.count(Counters.OPT_VECTORIZE_TRY)
-        opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, 
optimizations)
+        opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, 
cost_threshold)
         opt.propagate_all_forward()
         metainterp_sd.profiler.count(Counters.OPT_VECTORIZED)
         metainterp_sd.logger_noopt.log_loop(loop.inputargs, loop.operations, 
-2, None, None, "post vectorize")
     except NotAVectorizeableLoop:
         # vectorization is not possible
         loop.operations = orig_ops
+    except NotAProfitableLoop:
+        # cost model says to skip this loop
+        loop.operations = orig_ops
     except Exception as e:
         loop.operations = orig_ops
         debug_print("failed to vectorize loop. THIS IS A FATAL ERROR!")
@@ -70,8 +73,8 @@
 class VectorizingOptimizer(Optimizer):
     """ Try to unroll the loop and find instructions to group """
 
-    def __init__(self, metainterp_sd, jitdriver_sd, loop, optimizations):
-        Optimizer.__init__(self, metainterp_sd, jitdriver_sd, loop, 
optimizations)
+    def __init__(self, metainterp_sd, jitdriver_sd, loop, cost_threshold=0):
+        Optimizer.__init__(self, metainterp_sd, jitdriver_sd, loop, [])
         self.dependency_graph = None
         self.packset = None
         self.unroll_count = 0
@@ -79,13 +82,16 @@
         self.early_exit_idx = -1
         self.sched_data = None
         self.tried_to_pack = False
-        self.costmodel = X86_CostModel()
+        self.costmodel = X86_CostModel(cost_threshold)
 
     def propagate_all_forward(self, clear=True):
         self.clear_newoperations()
         label = self.loop.operations[0]
         jump = self.loop.operations[-1]
-        if jump.getopnum() not in (rop.LABEL, rop.JUMP):
+        if jump.getopnum() not in (rop.LABEL, rop.JUMP) or \
+           label.getopnum() != rop.LABEL:
+            raise NotAVectorizeableLoop()
+        if jump.numargs() != label.numargs():
             raise NotAVectorizeableLoop()
 
         self.linear_find_smallest_type(self.loop)
@@ -721,6 +727,9 @@
         self._newoperations.append(op)
 
 class CostModel(object):
+    def __init__(self, threshold):
+        self.threshold = threshold
+
     def unpack_cost(self, index, op):
         raise NotImplementedError
 
@@ -730,28 +739,23 @@
     def savings_for_unpacking(self, node, index):
         savings = 0
         result = node.getoperation().result
-        print node.op, "[", index, "]===>"
         for use in node.provides():
             if use.to.pack is None and use.because_of(result):
                 savings -= self.unpack_cost(index, node.getoperation())
-                print "   - ", savings, use.to.op
         return savings
 
     def calculate_savings(self, packset):
         savings = 0
         for pack in packset.packs:
             savings += self.savings_for_pack(pack.opnum, pack.opcount())
-            print
-            print "pack", savings
             op0 = pack.operations[0].getoperation()
             if op0.result:
                 for i,node in enumerate(pack.operations):
                     savings += self.savings_for_unpacking(node, i)
-                    print " +=> sss", savings
         return savings
 
     def profitable(self, packset):
-        return self.calculate_savings(packset) >= 0
+        return self.calculate_savings(packset) >= self.threshold
 
 class X86_CostModel(CostModel):
 
diff --git a/rpython/jit/metainterp/warmspot.py 
b/rpython/jit/metainterp/warmspot.py
--- a/rpython/jit/metainterp/warmspot.py
+++ b/rpython/jit/metainterp/warmspot.py
@@ -74,7 +74,7 @@
                     function_threshold=4,
                     enable_opts=ALL_OPTS_NAMES, max_retrace_guards=15, 
                     max_unroll_recursion=7, vectorize=0, vectorize_user=0,
-                    **kwds):
+                    vec_cost=0, **kwds):
     from rpython.config.config import ConfigError
     translator = interp.typer.annotator.translator
     try:
@@ -99,6 +99,7 @@
         jd.warmstate.set_param_max_unroll_recursion(max_unroll_recursion)
         jd.warmstate.set_param_vectorize(vectorize)
         jd.warmstate.set_param_vectorize_user(vectorize_user)
+        jd.warmstate.set_param_vec_cost(vec_cost)
     warmrunnerdesc.finish()
     if graph_and_interp_only:
         return interp, graph
diff --git a/rpython/jit/metainterp/warmstate.py 
b/rpython/jit/metainterp/warmstate.py
--- a/rpython/jit/metainterp/warmstate.py
+++ b/rpython/jit/metainterp/warmstate.py
@@ -303,6 +303,9 @@
     def set_param_vectorize_user(self, value):
         self.vectorize_user = bool(value)
 
+    def set_param_vec_cost(self, value):
+        self.vec_cost = bool(value)
+
     def disable_noninlinable_function(self, greenkey):
         cell = self.JitCell.ensure_jit_cell_at_key(greenkey)
         cell.flags |= JC_DONT_TRACE_HERE
diff --git a/rpython/rlib/jit.py b/rpython/rlib/jit.py
--- a/rpython/rlib/jit.py
+++ b/rpython/rlib/jit.py
@@ -554,7 +554,8 @@
     'max_unroll_recursion': 'how many levels deep to unroll a recursive 
function',
     'vectorize': 'turn on the vectorization optimization (vecopt). requires 
sse4.1',
     'vectorize_user': 'turn on the vecopt for the python user program. 
requires sse4.1',
-    }
+    'vec_cost': 'threshold which traces to vectorize.',
+}
 
 PARAMETERS = {'threshold': 1039, # just above 1024, prime
               'function_threshold': 1619, # slightly more than one above, also 
prime
@@ -570,6 +571,7 @@
               'max_unroll_recursion': 7,
               'vectorize': 0,
               'vectorize_user': 0,
+              'vec_cost': 0,
               }
 unroll_parameters = unrolling_iterable(PARAMETERS.items())
 
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to