Author: Richard Plangger <[email protected]>
Branch: vecopt
Changeset: r77741:09b0ee52aaf2
Date: 2015-06-01 15:47 +0200
http://bitbucket.org/pypy/pypy/changeset/09b0ee52aaf2/
Log: activated all but 3 zjit tests (pow, take missing), all others pass
added a jit param vec_cost to still be able to test if internal
errors occur
diff --git a/pypy/module/micronumpy/compile.py
b/pypy/module/micronumpy/compile.py
--- a/pypy/module/micronumpy/compile.py
+++ b/pypy/module/micronumpy/compile.py
@@ -539,15 +539,15 @@
w_rhs = IntObject(int(w_rhs.floatval))
assert isinstance(w_lhs, W_NDimArray)
w_res = w_lhs.descr_getitem(interp.space, w_rhs)
- assert isinstance(w_rhs, IntObject)
- if isinstance(w_res, boxes.W_Float64Box):
- print "access", w_lhs, "[", w_rhs.intval, "] => ",
float(w_res.value)
- if isinstance(w_res, boxes.W_Float32Box):
- print "access", w_lhs, "[", w_rhs.intval, "] => ",
float(w_res.value)
- if isinstance(w_res, boxes.W_Int64Box):
- print "access", w_lhs, "[", w_rhs.intval, "] => ",
int(w_res.value)
- if isinstance(w_res, boxes.W_Int32Box):
- print "access", w_lhs, "[", w_rhs.intval, "] => ",
int(w_res.value)
+ if isinstance(w_rhs, IntObject):
+ if isinstance(w_res, boxes.W_Float64Box):
+ print "access", w_lhs, "[", w_rhs.intval, "] => ",
float(w_res.value)
+ if isinstance(w_res, boxes.W_Float32Box):
+ print "access", w_lhs, "[", w_rhs.intval, "] => ",
float(w_res.value)
+ if isinstance(w_res, boxes.W_Int64Box):
+ print "access", w_lhs, "[", w_rhs.intval, "] => ",
int(w_res.value)
+ if isinstance(w_res, boxes.W_Int32Box):
+ print "access", w_lhs, "[", w_rhs.intval, "] => ",
int(w_res.value)
else:
raise NotImplementedError
if (not isinstance(w_res, W_NDimArray) and
diff --git a/pypy/module/micronumpy/test/test_zjit.py
b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -213,23 +213,6 @@
assert int(result) == 7+1+8+1+11+2+12+2
self.check_vectorized(2, 2)
- def define_int_mul_array():
- return """
- a = astype(|30|, int)
- b = astype(|30|, int)
- c = a * b
- x1 = c -> 7
- x2 = c -> 8
- x3 = c -> 11
- x4 = c -> 12
- x1 + x2 + x3 + x4
- """
- def test_int_mul_array(self):
- py.test.skip("how to multiply quad word integers?")
- result = self.run("int_mul_array")
- assert int(result) == 7*7+8*8+11*11+12*12
- self.check_vectorized(2, 2)
-
def define_float_mul_array():
return """
a = astype(|30|, float)
@@ -278,6 +261,390 @@
assert int(result) == 7*7+8*8+11*11+12*12
self.check_vectorized(2, 2)
+ def define_sum():
+ return """
+ a = |30|
+ sum(a)
+ """
+
+ def test_sum(self):
+ result = self.run("sum")
+ assert result == sum(range(30))
+ # TODO impl reduce
+ self.check_vectorized(1, 0)
+
+ def define_cumsum():
+ return """
+ a = |30|
+ b = cumsum(a)
+ b -> 5
+ """
+
+ def test_cumsum(self):
+ result = self.run("cumsum")
+ assert result == 15
+ self.check_vectorized(1, 0)
+
+ def define_axissum():
+ return """
+ a = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
+ b = sum(a,0)
+ b -> 1
+ """
+
+ def test_axissum(self):
+ result = self.run("axissum")
+ assert result == 30
+ # XXX note - the bridge here is fairly crucial and yet it's pretty
+ # bogus. We need to improve the situation somehow.
+ self.check_vectorized(1, 0)
+
+ def define_reduce():
+ return """
+ a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+ sum(a)
+ """
+
+ def test_reduce_compile_only_once(self):
+ self.compile_graph()
+ reset_jit()
+ i = self.code_mapping['reduce']
+ # run it twice
+ retval = self.interp.eval_graph(self.graph, [i])
+ retval = self.interp.eval_graph(self.graph, [i])
+ # check that we got only one loop
+ assert len(get_stats().loops) == 1
+ # TODO imple reduce opt
+ self.check_vectorized(2, 0)
+
+ def test_reduce_axis_compile_only_once(self):
+ self.compile_graph()
+ reset_jit()
+ i = self.code_mapping['axissum']
+ # run it twice
+ retval = self.interp.eval_graph(self.graph, [i])
+ retval = self.interp.eval_graph(self.graph, [i])
+ # check that we got only one loop
+ assert len(get_stats().loops) == 1
+ # TODO imple reduce opt
+ self.check_vectorized(3, 0)
+
+ def define_prod():
+ return """
+ a = |30|
+ prod(a)
+ """
+
+ def test_prod(self):
+ result = self.run("prod")
+ expected = 1
+ for i in range(30):
+ expected *= i * 2
+ assert result == expected
+ self.check_trace_count(1)
+
+ def define_max():
+ return """
+ a = |30|
+ a[13] = 128.0
+ max(a)
+ """
+
+ def test_max(self):
+ result = self.run("max")
+ assert result == 128
+ self.check_vectorized(1, 0) # TODO reduce
+
+ def define_min():
+ return """
+ a = |30|
+ a[13] = -128
+ min(a)
+ """
+
+ def test_min(self):
+ result = self.run("min")
+ assert result == -128
+ self.check_vectorized(1, 0) # TODO reduce
+
+ def define_any():
+ return """
+ a = [0,0,0,0,0,0,0,1,0,0,0]
+ any(a)
+ """
+
+ def test_any(self):
+ result = self.run("any")
+ assert result == 1
+ self.check_vectorized(1, 1)
+
+ def define_all():
+ return """
+ a = [1,1,1,1,1,1,1,1]
+ all(a)
+ """
+
+ def test_all(self):
+ result = self.run("all")
+ assert result == 1
+ self.check_vectorized(1, 1)
+
+ def define_logical_xor_reduce():
+ return """
+ a = [1,1,1,1,1,1,1,1]
+ logical_xor_reduce(a)
+ """
+
+ def test_logical_xor_reduce(self):
+ result = self.run("logical_xor_reduce")
+ assert result == 0
+ self.check_vectorized(0, 0) # TODO reduce
+
+ def define_already_forced():
+ return """
+ a = |30|
+ b = a + 4.5
+ b -> 5 # forces
+ c = b * 8
+ c -> 5
+ """
+
+ def test_already_forced(self):
+ result = self.run("already_forced")
+ assert result == (5 + 4.5) * 8
+ self.check_vectorized(2, 2)
+
+ def define_ufunc():
+ return """
+ a = |30|
+ b = unegative(a)
+ b -> 3
+ """
+
+ def test_ufunc(self):
+ result = self.run("ufunc")
+ assert result == -3
+ self.check_vectorized(1, 1)
+
+ def define_specialization():
+ return """
+ a = |30|
+ b = a + a
+ c = unegative(b)
+ c -> 3
+ d = a * a
+ unegative(d)
+ d -> 3
+ d = a * a
+ unegative(d)
+ d -> 3
+ d = a * a
+ unegative(d)
+ d -> 3
+ d = a * a
+ unegative(d)
+ d -> 3
+ """
+
+ def test_specialization(self):
+ result = self.run("specialization")
+ assert result == (3*3)
+ self.check_vectorized(3, 3)
+
+ def define_multidim():
+ return """
+ a = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
+ b = a + a
+ b -> 1 -> 1
+ """
+
+ def test_multidim(self):
+ result = self.run('multidim')
+ assert result == 8
+ self.check_vectorized(1, 1)
+
+ def define_broadcast():
+ return """
+ a = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
+ b = [1, 2, 3, 4]
+ c = a + b
+ c -> 1 -> 2
+ """
+
+ def test_broadcast(self):
+ result = self.run("broadcast")
+ assert result == 10
+ self.check_vectorized(1, 0) # TODO check on broadcast
+
+ def define_setslice():
+ return """
+ a = |30|
+ b = |10|
+ b[1] = 5.5
+ a[0:30:3] = b
+ a -> 3
+ """
+
+ def test_setslice(self):
+ result = self.run("setslice")
+ assert result == 5.5
+ self.check_vectorized(1, 0) # TODO?
+
+ def define_virtual_slice():
+ return """
+ a = |30|
+ c = a + a
+ d = c -> 1:20
+ d -> 1
+ """
+
+ def test_virtual_slice(self):
+ result = self.run("virtual_slice")
+ assert result == 4
+ self.check_vectorized(1, 1)
+
+ def define_flat_iter():
+ return '''
+ a = |30|
+ b = flat(a)
+ c = b + a
+ c -> 3
+ '''
+
+ def test_flat_iter(self):
+ result = self.run("flat_iter")
+ assert result == 6
+ self.check_vectorized(1, 1)
+
+ def define_flat_getitem():
+ return '''
+ a = |30|
+ b = flat(a)
+ b -> 4: -> 6
+ '''
+
+ def test_flat_getitem(self):
+ result = self.run("flat_getitem")
+ assert result == 10.0
+ self.check_trace_count(1)
+ self.check_vectorized(0,0)
+
+ def define_flat_setitem():
+ return '''
+ a = |30|
+ b = flat(a)
+ b[4:] = a->:26
+ a -> 5
+ '''
+
+ def test_flat_setitem(self):
+ result = self.run("flat_setitem")
+ assert result == 1.0
+ self.check_trace_count(1)
+ self.check_vectorized(1,0) # TODO this can be improved
+
+ def define_dot():
+ return """
+ a = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
+ b = [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]
+ c = dot(a, b)
+ c -> 1 -> 2
+ """
+
+ def test_dot(self):
+ result = self.run("dot")
+ assert result == 184
+ self.check_trace_count(3)
+ self.check_vectorized(3,0)
+
+ def define_argsort():
+ return """
+ a = |30|
+ argsort(a)
+ a->6
+ """
+
+ def test_argsort(self):
+ result = self.run("argsort")
+ assert result == 6
+ self.check_trace_count(1)
+ self.check_vectorized(1,1) # vec. setslice
+
+ def define_where():
+ return """
+ a = [1, 0, 1, 0]
+ x = [1, 2, 3, 4]
+ y = [-10, -20, -30, -40]
+ r = where(a, x, y)
+ r -> 3
+ """
+
+ def test_where(self):
+ result = self.run("where")
+ assert result == -40
+ self.check_trace_count(1)
+ self.check_vectorized(1, 0) # TODO might be possible to vectorize
+
+ def define_searchsorted():
+ return """
+ a = [1, 4, 5, 6, 9]
+ b = |30| -> ::-1
+ c = searchsorted(a, b)
+ c -> -1
+ """
+
+ def test_searchsorted(self):
+ result = self.run("searchsorted")
+ assert result == 0
+ self.check_trace_count(6)
+ # TODO?
+
+ def define_int_mul_array():
+ return """
+ a = astype(|30|, int)
+ b = astype(|30|, int)
+ c = a * b
+ x1 = c -> 7
+ x2 = c -> 8
+ x3 = c -> 11
+ x4 = c -> 12
+ x1 + x2 + x3 + x4
+ """
+ def test_int_mul_array(self):
+ result = self.run("int_mul_array")
+ assert int(result) == 7*7+8*8+11*11+12*12
+ self.check_vectorized(2, 2)
+
+ def define_slice():
+ return """
+ a = |30|
+ b = a -> ::3
+ c = b + b
+ c -> 3
+ """
+
+ def test_slice(self):
+ result = self.run("slice")
+ assert result == 18
+ self.check_trace_count(1)
+ self.check_vectorized(1,1)
+
+ def define_multidim_slice():
+ return """
+ a = [[1, 2, 3, 4], [3, 4, 5, 6], [5, 6, 7, 8], [7, 8, 9, 10], [9, 10,
11, 12], [11, 12, 13, 14], [13, 14, 15, 16], [16, 17, 18, 19]]
+ b = a -> ::2
+ c = b + b
+ c -> 1 -> 1
+ """
+
+ def test_multidim_slice(self):
+ result = self.run('multidim_slice')
+ assert result == 12
+ self.check_trace_count(2)
+ self.check_vectorized(1,0) # TODO?
+
+ # NOT WORKING
+
def define_pow():
return """
a = |30| ** 2
@@ -304,245 +671,6 @@
assert result == 15 ** 2
self.check_trace_count(4) # extra one for the astype
- def define_sum():
- return """
- a = |30|
- sum(a)
- """
-
- def test_sum(self):
- result = self.run("sum")
- assert result == sum(range(30))
- self.check_trace_count(1)
-
- def define_cumsum():
- return """
- a = |30|
- b = cumsum(a)
- b -> 5
- """
-
- def test_cumsum(self):
- result = self.run("cumsum")
- assert result == 15
- self.check_trace_count(1)
-
- def define_axissum():
- return """
- a = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
- b = sum(a,0)
- b -> 1
- """
-
- def test_axissum(self):
- result = self.run("axissum")
- assert result == 30
- # XXX note - the bridge here is fairly crucial and yet it's pretty
- # bogus. We need to improve the situation somehow.
-
- def define_reduce():
- return """
- a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
- sum(a)
- """
-
- def test_reduce_compile_only_once(self):
- self.compile_graph()
- reset_jit()
- i = self.code_mapping['reduce']
- # run it twice
- retval = self.interp.eval_graph(self.graph, [i])
- retval = self.interp.eval_graph(self.graph, [i])
- # check that we got only one loop
- assert len(get_stats().loops) == 1
-
- def test_reduce_axis_compile_only_once(self):
- self.compile_graph()
- reset_jit()
- i = self.code_mapping['axissum']
- # run it twice
- retval = self.interp.eval_graph(self.graph, [i])
- retval = self.interp.eval_graph(self.graph, [i])
- # check that we got only one loop
- assert len(get_stats().loops) == 1
-
- def define_prod():
- return """
- a = |30|
- prod(a)
- """
-
- def test_prod(self):
- result = self.run("prod")
- expected = 1
- for i in range(30):
- expected *= i * 2
- assert result == expected
- self.check_trace_count(1)
-
- def define_max():
- return """
- a = |30|
- a[13] = 128.0
- max(a)
- """
-
- def test_max(self):
- result = self.run("max")
- assert result == 128
- # TODO self.check_trace_count(3)
-
- def define_min():
- return """
- a = |30|
- a[13] = -128
- min(a)
- """
-
- def test_min(self):
- result = self.run("min")
- assert result == -128
- #self.check_trace_count(1)
-
- def define_any():
- return """
- a = [0,0,0,0,0,0,0,1,0,0,0]
- any(a)
- """
-
- def test_any(self):
- result = self.run("any")
- assert result == 1
- self.check_trace_count(1)
-
- def define_all():
- return """
- a = [1,1,1,1,1,1,1,1]
- all(a)
- """
-
- def test_all(self):
- result = self.run("all")
- assert result == 1
- self.check_trace_count(1)
-
- def define_logical_xor_reduce():
- return """
- a = [1,1,1,1,1,1,1,1]
- logical_xor_reduce(a)
- """
-
- def test_logical_xor_reduce(self):
- result = self.run("logical_xor_reduce")
- assert result == 0
- self.check_trace_count(2)
- # XXX fix this
- #self.check_simple_loop({
- # 'cast_float_to_int': 1,
- # 'getfield_gc': 2,
- # 'getfield_gc_pure': 11,
- # 'guard_class': 1,
- # 'guard_false': 1,
- # 'guard_not_invalidated': 1,
- # 'guard_true': 5,
- # 'int_add': 2,
- # 'int_and': 1,
- # 'int_ge': 1,
- # 'int_is_true': 2,
- # 'jump': 1,
- # 'new_with_vtable': 1,
- # 'raw_load': 1,
- # 'setfield_gc': 4,
- #})
-
- def define_already_forced():
- return """
- a = |30|
- b = a + 4.5
- b -> 5 # forces
- c = b * 8
- c -> 5
- """
-
- def test_already_forced(self):
- #py.test.skip('TODO')
- result = self.run("already_forced")
- assert result == (5 + 4.5) * 8
- # This is the sum of the ops for both loops, however if you remove the
- # optimization then you end up with 2 float_adds, so we can still be
- # sure it was optimized correctly.
- #py.test.skip("too fragile")
- #self.check_resops({'raw_store': 4, 'getfield_gc': 22,
- # 'getarrayitem_gc': 4, 'getarrayitem_gc_pure': 2,
- # 'getfield_gc_pure': 8,
- # 'guard_class': 8, 'int_add': 8, 'float_mul': 2,
- # 'jump': 2, 'int_ge': 4,
- # 'raw_load': 4, 'float_add': 2,
- # 'guard_false': 4, 'arraylen_gc': 2, 'same_as': 2})
-
- def define_ufunc():
- return """
- a = |30|
- b = unegative(a)
- b -> 3
- """
-
- def test_ufunc(self):
- result = self.run("ufunc")
- assert result == -3
-
- def define_specialization():
- return """
- a = |30|
- b = a + a
- c = unegative(b)
- c -> 3
- d = a * a
- unegative(d)
- d -> 3
- d = a * a
- unegative(d)
- d -> 3
- d = a * a
- unegative(d)
- d -> 3
- d = a * a
- unegative(d)
- d -> 3
- """
-
- def test_specialization(self):
- result = self.run("specialization")
- # TODO
- assert result == (3*3)
- #py.test.skip("don't run for now")
- # This is 3, not 2 because there is a bridge for the exit.
- #self.check_trace_count(3)
-
- def define_slice():
- return """
- a = |30|
- b = a -> ::3
- c = b + b
- c -> 3
- """
-
- def test_slice(self):
- py.test.skip("slice not impl in compile.py")
- result = self.run("slice")
- assert result == 18
- self.check_trace_count(1)
- #self.check_simple_loop({
- # 'arraylen_gc': 2,
- # 'float_add': 1,
- # 'guard_false': 1,
- # 'guard_not_invalidated': 1,
- # 'int_add': 4,
- # 'int_ge': 1,
- # 'jump': 1,
- # 'raw_load': 2,
- # 'raw_store': 1,
- #})
def define_take():
return """
@@ -552,366 +680,6 @@
"""
def test_take(self):
- py.test.skip("not impl")
+ py.test.skip("key error get item?")
result = self.run("take")
assert result == 3
-
- def define_multidim():
- return """
- a = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
- b = a + a
- b -> 1 -> 1
- """
-
- def test_multidim(self):
- result = self.run('multidim')
- assert result == 8
- # int_add might be 1 here if we try slightly harder with
- # reusing indexes or some optimization
- self.check_trace_count(1)
- #self.check_simple_loop({
- # 'float_add': 1,
- # 'guard_false': 1,
- # 'guard_not_invalidated': 1,
- # 'int_add': 4,
- # 'int_ge': 1,
- # 'jump': 1,
- # 'raw_load': 2,
- # 'raw_store': 1,
- #})
-
- def define_multidim_slice():
- return """
- a = [[1, 2, 3, 4], [3, 4, 5, 6], [5, 6, 7, 8], [7, 8, 9, 10], [9, 10,
11, 12], [11, 12, 13, 14], [13, 14, 15, 16], [16, 17, 18, 19]]
- b = a -> ::2
- c = b + b
- c -> 1 -> 1
- """
-
- def test_multidim_slice(self):
- py.test.skip("seems to be a problem in compile.py")
- result = self.run('multidim_slice')
- assert result == 12
- # XXX the bridge here is scary. Hopefully jit-targets will fix that,
- # otherwise it looks kind of good
- self.check_trace_count(2)
- #self.check_simple_loop({
- # 'float_add': 1,
- # 'getarrayitem_gc': 2,
- # 'guard_false': 1,
- # 'guard_not_invalidated': 1,
- # 'guard_true': 2,
- # 'int_add': 6,
- # 'int_ge': 1,
- # 'int_lt': 2,
- # 'jump': 1,
- # 'raw_load': 2,
- # 'raw_store': 1,
- # 'setarrayitem_gc': 2,
- #})
- #self.check_resops({
- # 'float_add': 3,
- # 'getarrayitem_gc': 7,
- # 'getarrayitem_gc_pure': 14,
- # 'getfield_gc': 6,
- # 'getfield_gc_pure': 63,
- # 'guard_class': 5,
- # 'guard_false': 19,
- # 'guard_nonnull': 6,
- # 'guard_nonnull_class': 1,
- # 'guard_not_invalidated': 3,
- # 'guard_true': 16,
- # 'guard_value': 3,
- # 'int_add': 24,
- # 'int_ge': 4,
- # 'int_is_true': 5,
- # 'int_is_zero': 4,
- # 'int_le': 5,
- # 'int_lt': 7,
- # 'int_sub': 2,
- # 'jump': 2,
- # 'raw_load': 5,
- # 'raw_store': 3,
- # 'setarrayitem_gc': 8,
- #})
-
- def define_broadcast():
- return """
- a = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
- b = [1, 2, 3, 4]
- c = a + b
- c -> 1 -> 2
- """
-
- def test_broadcast(self):
- result = self.run("broadcast")
- assert result == 10
- #self.check_trace_count(2)
- #self.check_simple_loop({
- # 'float_add': 1,
- # 'getarrayitem_gc': 1,
- # 'guard_false': 1,
- # 'guard_not_invalidated': 1,
- # 'guard_true': 1,
- # 'int_add': 5,
- # 'int_ge': 1,
- # 'int_lt': 1,
- # 'jump': 1,
- # 'raw_load': 2,
- # 'raw_store': 1,
- # 'setarrayitem_gc': 1,
- #})
- #self.check_resops({
- # 'float_add': 2,
- # 'getarrayitem_gc': 2,
- # 'getarrayitem_gc_pure': 2,
- # 'getfield_gc': 6,
- # 'getfield_gc_pure': 30,
- # 'guard_class': 3,
- # 'guard_false': 7,
- # 'guard_nonnull': 2,
- # 'guard_not_invalidated': 2,
- # 'guard_true': 8,
- # 'int_add': 11,
- # 'int_ge': 2,
- # 'int_is_true': 3,
- # 'int_is_zero': 1,
- # 'int_le': 1,
- # 'int_lt': 2,
- # 'jump': 1,
- # 'raw_load': 4,
- # 'raw_store': 2,
- # 'setarrayitem_gc': 2,
- #})
-
- def define_setslice():
- return """
- a = |30|
- b = |10|
- b[1] = 5.5
- a[0:30:3] = b
- a -> 3
- """
-
- def test_setslice(self):
- result = self.run("setslice")
- assert result == 5.5
- self.check_trace_count(1)
- #self.check_simple_loop({
- # 'arraylen_gc': 1,
- # 'guard_false': 1,
- # 'guard_not_invalidated': 1,
- # 'int_add': 3,
- # 'int_ge': 1,
- # 'jump': 1,
- # 'raw_load': 1,
- # 'raw_store': 1,
- #})
-
- def define_virtual_slice():
- return """
- a = |30|
- c = a + a
- d = c -> 1:20
- d -> 1
- """
-
- def test_virtual_slice(self):
- py.test.skip('TODO')
- result = self.run("virtual_slice")
- assert result == 4
- py.test.skip("don't run for now")
- self.check_trace_count(1)
- self.check_simple_loop({'raw_load': 2, 'float_add': 1,
- 'raw_store': 1, 'int_add': 1,
- 'int_ge': 1, 'guard_false': 1, 'jump': 1,
- 'arraylen_gc': 1})
-
- def define_flat_iter():
- return '''
- a = |30|
- b = flat(a)
- c = b + a
- c -> 3
- '''
-
- def test_flat_iter(self):
- py.test.skip('TODO')
- result = self.run("flat_iter")
- assert result == 6
- self.check_trace_count(1)
- self.check_simple_loop({
- 'float_add': 1,
- 'guard_false': 1,
- 'guard_not_invalidated': 1,
- 'int_add': 4,
- 'int_ge': 1,
- 'jump': 1,
- 'raw_load': 2,
- 'raw_store': 1,
- })
-
- def define_flat_getitem():
- return '''
- a = |30|
- b = flat(a)
- b -> 4: -> 6
- '''
-
- def test_flat_getitem(self):
- py.test.skip('TODO')
- result = self.run("flat_getitem")
- assert result == 10.0
- self.check_trace_count(1)
- self.check_simple_loop({
- 'guard_false': 1,
- 'int_add': 4,
- 'int_ge': 1,
- 'int_mul': 1,
- 'jump': 1,
- 'raw_load': 1,
- 'raw_store': 1,
- })
-
- def define_flat_setitem():
- return '''
- a = |30|
- b = flat(a)
- b[4:] = a->:26
- a -> 5
- '''
-
- def test_flat_setitem(self):
- py.test.skip('TODO')
- result = self.run("flat_setitem")
- assert result == 1.0
- self.check_trace_count(1)
- self.check_simple_loop({
- 'guard_false': 1,
- 'guard_not_invalidated': 1,
- 'guard_true': 1,
- 'int_add': 4,
- 'int_ge': 1,
- 'int_gt': 1,
- 'int_mul': 1,
- 'int_sub': 1,
- 'jump': 1,
- 'raw_load': 1,
- 'raw_store': 1,
- })
-
- def define_dot():
- return """
- a = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]
- b = [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]
- c = dot(a, b)
- c -> 1 -> 2
- """
-
- def test_dot(self):
- py.test.skip('TODO')
- result = self.run("dot")
- assert result == 184
- self.check_trace_count(3)
- self.check_simple_loop({
- 'float_add': 1,
- 'float_mul': 1,
- 'guard_not_invalidated': 1,
- 'guard_true': 1,
- 'int_add': 3,
- 'int_lt': 1,
- 'jump': 1,
- 'raw_load': 2,
- })
- self.check_resops({
- 'float_add': 2,
- 'float_mul': 2,
- 'getarrayitem_gc': 4,
- 'getarrayitem_gc_pure': 9,
- 'getfield_gc': 7,
- 'getfield_gc_pure': 42,
- 'guard_class': 4,
- 'guard_false': 15,
- 'guard_not_invalidated': 2,
- 'guard_true': 14,
- 'int_add': 17,
- 'int_ge': 4,
- 'int_is_true': 3,
- 'int_is_zero': 2,
- 'int_le': 5,
- 'int_lt': 8,
- 'int_sub': 3,
- 'jump': 3,
- 'new_with_vtable': 7,
- 'raw_load': 6,
- 'raw_store': 1,
- 'same_as': 2,
- 'setarrayitem_gc': 7,
- 'setfield_gc': 22,
- })
-
- def define_argsort():
- return """
- a = |30|
- argsort(a)
- a->6
- """
-
- def test_argsort(self):
- py.test.skip('TODO')
- result = self.run("argsort")
- assert result == 6
-
- def define_where():
- return """
- a = [1, 0, 1, 0]
- x = [1, 2, 3, 4]
- y = [-10, -20, -30, -40]
- r = where(a, x, y)
- r -> 3
- """
-
- def test_where(self):
- py.test.skip('TODO')
- result = self.run("where")
- assert result == -40
- self.check_trace_count(1)
- self.check_simple_loop({
- 'float_ne': 1,
- 'guard_false': 1,
- 'guard_not_invalidated': 1,
- 'guard_true': 1,
- 'int_add': 5,
- 'int_ge': 1,
- 'jump': 1,
- 'raw_load': 2,
- 'raw_store': 1,
- })
-
- def define_searchsorted():
- return """
- a = [1, 4, 5, 6, 9]
- b = |30| -> ::-1
- c = searchsorted(a, b)
- c -> -1
- """
-
- def test_searchsorted(self):
- py.test.skip('TODO')
- result = self.run("searchsorted")
- assert result == 0
- self.check_trace_count(6)
- self.check_simple_loop({
- 'float_lt': 1,
- 'guard_false': 2,
- 'guard_not_invalidated': 1,
- 'guard_true': 2,
- 'int_add': 3,
- 'int_ge': 1,
- 'int_lt': 2,
- 'int_mul': 1,
- 'int_rshift': 1,
- 'int_sub': 1,
- 'jump': 1,
- 'raw_load': 1,
- })
diff --git a/rpython/jit/metainterp/optimizeopt/__init__.py
b/rpython/jit/metainterp/optimizeopt/__init__.py
--- a/rpython/jit/metainterp/optimizeopt/__init__.py
+++ b/rpython/jit/metainterp/optimizeopt/__init__.py
@@ -71,8 +71,9 @@
if not export_state and \
((warmstate.vectorize and jitdriver_sd.vectorize) \
or warmstate.vectorize_user):
- optimize_vector(metainterp_sd, jitdriver_sd, loop,
optimizations,
- inline_short_preamble, start_state)
+ optimize_vector(metainterp_sd, jitdriver_sd, loop,
+ optimizations, inline_short_preamble,
+ start_state, warmstate.vec_cost)
else:
return optimize_unroll(metainterp_sd, jitdriver_sd, loop,
optimizations, inline_short_preamble,
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py
b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -126,7 +126,6 @@
def edge_to(self, to, arg=None, failarg=False, label=None):
if self is to:
- #debug_print "debug: tried to put edge from: ", self.op, "to:",
to.op
return
dep = self.depends_on(to)
if not dep:
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
@@ -21,7 +21,6 @@
iv = self.index_var
ov = other.index_var
val = (int(str(ov.var)[1:]) - int(str(iv.var)[1:]))
- print iv, ov, "adja?", val == 1
# i0 and i1 are adjacent
# i1 and i2 ...
# but not i0, i2
@@ -37,13 +36,12 @@
graph = opt.dependency_graph
for k,m in graph.memory_refs.items():
graph.memory_refs[k] = FakeMemoryRef(m.index_var)
- print "memory ref", k, m
opt.find_adjacent_memory_refs()
opt.extend_packset()
opt.combine_packset()
for pack in opt.packset.packs:
- print "apck:"
- print '\n'.join([str(op.getoperation()) for op in pack.operations])
+ print "pack: \n ",
+ print '\n '.join([str(op.getoperation()) for op in
pack.operations])
print
return opt.costmodel.calculate_savings(opt.packset)
@@ -111,21 +109,17 @@
def test_load_arith_store(self):
loop1 = self.parse("""
- i10 = raw_load(p0, i0, descr=int)
- i11 = raw_load(p0, i1, descr=int)
- i12 = raw_load(p0, i2, descr=int)
- i13 = raw_load(p0, i3, descr=int)
- i15 = int_add(i10, 1)
- i16 = int_add(i11, 1)
- i17 = int_add(i12, 1)
- i18 = int_add(i13, 1)
- raw_store(p1, i4, i15, descr=int)
- raw_store(p1, i5, i16, descr=int)
- raw_store(p1, i6, i17, descr=int)
- raw_store(p1, i7, i18, descr=int)
+ f10 = raw_load(p0, i0, descr=double)
+ f11 = raw_load(p0, i1, descr=double)
+ i20 = cast_float_to_int(f10)
+ i21 = cast_float_to_int(f11)
+ i30 = int_signext(i20, 4)
+ i31 = int_signext(i21, 4)
+ raw_store(p0, i3, i30, descr=int)
+ raw_store(p0, i4, i31, descr=int)
""")
savings = self.savings(loop1)
- assert savings == 6
+ assert savings == 1
class Test(CostModelBaseTest, LLtypeMixin):
pass
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -40,7 +40,7 @@
print ""
def optimize_vector(metainterp_sd, jitdriver_sd, loop, optimizations,
- inline_short_preamble, start_state):
+ inline_short_preamble, start_state, cost_threshold):
optimize_unroll(metainterp_sd, jitdriver_sd, loop, optimizations,
inline_short_preamble, start_state, False)
orig_ops = loop.operations
@@ -48,13 +48,16 @@
debug_start("vec-opt-loop")
metainterp_sd.logger_noopt.log_loop(loop.inputargs, loop.operations,
-2, None, None, "pre vectorize")
metainterp_sd.profiler.count(Counters.OPT_VECTORIZE_TRY)
- opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop,
optimizations)
+ opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop,
cost_threshold)
opt.propagate_all_forward()
metainterp_sd.profiler.count(Counters.OPT_VECTORIZED)
metainterp_sd.logger_noopt.log_loop(loop.inputargs, loop.operations,
-2, None, None, "post vectorize")
except NotAVectorizeableLoop:
# vectorization is not possible
loop.operations = orig_ops
+ except NotAProfitableLoop:
+ # cost model says to skip this loop
+ loop.operations = orig_ops
except Exception as e:
loop.operations = orig_ops
debug_print("failed to vectorize loop. THIS IS A FATAL ERROR!")
@@ -70,8 +73,8 @@
class VectorizingOptimizer(Optimizer):
""" Try to unroll the loop and find instructions to group """
- def __init__(self, metainterp_sd, jitdriver_sd, loop, optimizations):
- Optimizer.__init__(self, metainterp_sd, jitdriver_sd, loop,
optimizations)
+ def __init__(self, metainterp_sd, jitdriver_sd, loop, cost_threshold=0):
+ Optimizer.__init__(self, metainterp_sd, jitdriver_sd, loop, [])
self.dependency_graph = None
self.packset = None
self.unroll_count = 0
@@ -79,13 +82,16 @@
self.early_exit_idx = -1
self.sched_data = None
self.tried_to_pack = False
- self.costmodel = X86_CostModel()
+ self.costmodel = X86_CostModel(cost_threshold)
def propagate_all_forward(self, clear=True):
self.clear_newoperations()
label = self.loop.operations[0]
jump = self.loop.operations[-1]
- if jump.getopnum() not in (rop.LABEL, rop.JUMP):
+ if jump.getopnum() not in (rop.LABEL, rop.JUMP) or \
+ label.getopnum() != rop.LABEL:
+ raise NotAVectorizeableLoop()
+ if jump.numargs() != label.numargs():
raise NotAVectorizeableLoop()
self.linear_find_smallest_type(self.loop)
@@ -721,6 +727,9 @@
self._newoperations.append(op)
class CostModel(object):
+ def __init__(self, threshold):
+ self.threshold = threshold
+
def unpack_cost(self, index, op):
raise NotImplementedError
@@ -730,28 +739,23 @@
def savings_for_unpacking(self, node, index):
savings = 0
result = node.getoperation().result
- print node.op, "[", index, "]===>"
for use in node.provides():
if use.to.pack is None and use.because_of(result):
savings -= self.unpack_cost(index, node.getoperation())
- print " - ", savings, use.to.op
return savings
def calculate_savings(self, packset):
savings = 0
for pack in packset.packs:
savings += self.savings_for_pack(pack.opnum, pack.opcount())
- print
- print "pack", savings
op0 = pack.operations[0].getoperation()
if op0.result:
for i,node in enumerate(pack.operations):
savings += self.savings_for_unpacking(node, i)
- print " +=> sss", savings
return savings
def profitable(self, packset):
- return self.calculate_savings(packset) >= 0
+ return self.calculate_savings(packset) >= self.threshold
class X86_CostModel(CostModel):
diff --git a/rpython/jit/metainterp/warmspot.py
b/rpython/jit/metainterp/warmspot.py
--- a/rpython/jit/metainterp/warmspot.py
+++ b/rpython/jit/metainterp/warmspot.py
@@ -74,7 +74,7 @@
function_threshold=4,
enable_opts=ALL_OPTS_NAMES, max_retrace_guards=15,
max_unroll_recursion=7, vectorize=0, vectorize_user=0,
- **kwds):
+ vec_cost=0, **kwds):
from rpython.config.config import ConfigError
translator = interp.typer.annotator.translator
try:
@@ -99,6 +99,7 @@
jd.warmstate.set_param_max_unroll_recursion(max_unroll_recursion)
jd.warmstate.set_param_vectorize(vectorize)
jd.warmstate.set_param_vectorize_user(vectorize_user)
+ jd.warmstate.set_param_vec_cost(vec_cost)
warmrunnerdesc.finish()
if graph_and_interp_only:
return interp, graph
diff --git a/rpython/jit/metainterp/warmstate.py
b/rpython/jit/metainterp/warmstate.py
--- a/rpython/jit/metainterp/warmstate.py
+++ b/rpython/jit/metainterp/warmstate.py
@@ -303,6 +303,9 @@
def set_param_vectorize_user(self, value):
self.vectorize_user = bool(value)
+ def set_param_vec_cost(self, value):
+ self.vec_cost = bool(value)
+
def disable_noninlinable_function(self, greenkey):
cell = self.JitCell.ensure_jit_cell_at_key(greenkey)
cell.flags |= JC_DONT_TRACE_HERE
diff --git a/rpython/rlib/jit.py b/rpython/rlib/jit.py
--- a/rpython/rlib/jit.py
+++ b/rpython/rlib/jit.py
@@ -554,7 +554,8 @@
'max_unroll_recursion': 'how many levels deep to unroll a recursive
function',
'vectorize': 'turn on the vectorization optimization (vecopt). requires
sse4.1',
'vectorize_user': 'turn on the vecopt for the python user program.
requires sse4.1',
- }
+ 'vec_cost': 'threshold which traces to vectorize.',
+}
PARAMETERS = {'threshold': 1039, # just above 1024, prime
'function_threshold': 1619, # slightly more than one above, also
prime
@@ -570,6 +571,7 @@
'max_unroll_recursion': 7,
'vectorize': 0,
'vectorize_user': 0,
+ 'vec_cost': 0,
}
unroll_parameters = unrolling_iterable(PARAMETERS.items())
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit