Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r77741:09b0ee52aaf2 Date: 2015-06-01 15:47 +0200 http://bitbucket.org/pypy/pypy/changeset/09b0ee52aaf2/
Log: activated all but 3 zjit tests (pow, take missing), all others pass added a jit param vec_cost to still be able to test if internal errors occur diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py --- a/pypy/module/micronumpy/compile.py +++ b/pypy/module/micronumpy/compile.py @@ -539,15 +539,15 @@ w_rhs = IntObject(int(w_rhs.floatval)) assert isinstance(w_lhs, W_NDimArray) w_res = w_lhs.descr_getitem(interp.space, w_rhs) - assert isinstance(w_rhs, IntObject) - if isinstance(w_res, boxes.W_Float64Box): - print "access", w_lhs, "[", w_rhs.intval, "] => ", float(w_res.value) - if isinstance(w_res, boxes.W_Float32Box): - print "access", w_lhs, "[", w_rhs.intval, "] => ", float(w_res.value) - if isinstance(w_res, boxes.W_Int64Box): - print "access", w_lhs, "[", w_rhs.intval, "] => ", int(w_res.value) - if isinstance(w_res, boxes.W_Int32Box): - print "access", w_lhs, "[", w_rhs.intval, "] => ", int(w_res.value) + if isinstance(w_rhs, IntObject): + if isinstance(w_res, boxes.W_Float64Box): + print "access", w_lhs, "[", w_rhs.intval, "] => ", float(w_res.value) + if isinstance(w_res, boxes.W_Float32Box): + print "access", w_lhs, "[", w_rhs.intval, "] => ", float(w_res.value) + if isinstance(w_res, boxes.W_Int64Box): + print "access", w_lhs, "[", w_rhs.intval, "] => ", int(w_res.value) + if isinstance(w_res, boxes.W_Int32Box): + print "access", w_lhs, "[", w_rhs.intval, "] => ", int(w_res.value) else: raise NotImplementedError if (not isinstance(w_res, W_NDimArray) and diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py --- a/pypy/module/micronumpy/test/test_zjit.py +++ b/pypy/module/micronumpy/test/test_zjit.py @@ -213,23 +213,6 @@ assert int(result) == 7+1+8+1+11+2+12+2 self.check_vectorized(2, 2) - def define_int_mul_array(): - return """ - a = astype(|30|, int) - b = astype(|30|, int) - c = a * b - x1 = c -> 7 - x2 = c -> 8 - x3 = c -> 11 - x4 = c -> 12 - x1 + x2 + x3 + x4 - """ - def test_int_mul_array(self): - py.test.skip("how to multiply quad word integers?") - result = self.run("int_mul_array") - assert int(result) == 7*7+8*8+11*11+12*12 - self.check_vectorized(2, 2) - def define_float_mul_array(): return """ a = astype(|30|, float) @@ -278,6 +261,390 @@ assert int(result) == 7*7+8*8+11*11+12*12 self.check_vectorized(2, 2) + def define_sum(): + return """ + a = |30| + sum(a) + """ + + def test_sum(self): + result = self.run("sum") + assert result == sum(range(30)) + # TODO impl reduce + self.check_vectorized(1, 0) + + def define_cumsum(): + return """ + a = |30| + b = cumsum(a) + b -> 5 + """ + + def test_cumsum(self): + result = self.run("cumsum") + assert result == 15 + self.check_vectorized(1, 0) + + def define_axissum(): + return """ + a = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] + b = sum(a,0) + b -> 1 + """ + + def test_axissum(self): + result = self.run("axissum") + assert result == 30 + # XXX note - the bridge here is fairly crucial and yet it's pretty + # bogus. We need to improve the situation somehow. + self.check_vectorized(1, 0) + + def define_reduce(): + return """ + a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + sum(a) + """ + + def test_reduce_compile_only_once(self): + self.compile_graph() + reset_jit() + i = self.code_mapping['reduce'] + # run it twice + retval = self.interp.eval_graph(self.graph, [i]) + retval = self.interp.eval_graph(self.graph, [i]) + # check that we got only one loop + assert len(get_stats().loops) == 1 + # TODO imple reduce opt + self.check_vectorized(2, 0) + + def test_reduce_axis_compile_only_once(self): + self.compile_graph() + reset_jit() + i = self.code_mapping['axissum'] + # run it twice + retval = self.interp.eval_graph(self.graph, [i]) + retval = self.interp.eval_graph(self.graph, [i]) + # check that we got only one loop + assert len(get_stats().loops) == 1 + # TODO imple reduce opt + self.check_vectorized(3, 0) + + def define_prod(): + return """ + a = |30| + prod(a) + """ + + def test_prod(self): + result = self.run("prod") + expected = 1 + for i in range(30): + expected *= i * 2 + assert result == expected + self.check_trace_count(1) + + def define_max(): + return """ + a = |30| + a[13] = 128.0 + max(a) + """ + + def test_max(self): + result = self.run("max") + assert result == 128 + self.check_vectorized(1, 0) # TODO reduce + + def define_min(): + return """ + a = |30| + a[13] = -128 + min(a) + """ + + def test_min(self): + result = self.run("min") + assert result == -128 + self.check_vectorized(1, 0) # TODO reduce + + def define_any(): + return """ + a = [0,0,0,0,0,0,0,1,0,0,0] + any(a) + """ + + def test_any(self): + result = self.run("any") + assert result == 1 + self.check_vectorized(1, 1) + + def define_all(): + return """ + a = [1,1,1,1,1,1,1,1] + all(a) + """ + + def test_all(self): + result = self.run("all") + assert result == 1 + self.check_vectorized(1, 1) + + def define_logical_xor_reduce(): + return """ + a = [1,1,1,1,1,1,1,1] + logical_xor_reduce(a) + """ + + def test_logical_xor_reduce(self): + result = self.run("logical_xor_reduce") + assert result == 0 + self.check_vectorized(0, 0) # TODO reduce + + def define_already_forced(): + return """ + a = |30| + b = a + 4.5 + b -> 5 # forces + c = b * 8 + c -> 5 + """ + + def test_already_forced(self): + result = self.run("already_forced") + assert result == (5 + 4.5) * 8 + self.check_vectorized(2, 2) + + def define_ufunc(): + return """ + a = |30| + b = unegative(a) + b -> 3 + """ + + def test_ufunc(self): + result = self.run("ufunc") + assert result == -3 + self.check_vectorized(1, 1) + + def define_specialization(): + return """ + a = |30| + b = a + a + c = unegative(b) + c -> 3 + d = a * a + unegative(d) + d -> 3 + d = a * a + unegative(d) + d -> 3 + d = a * a + unegative(d) + d -> 3 + d = a * a + unegative(d) + d -> 3 + """ + + def test_specialization(self): + result = self.run("specialization") + assert result == (3*3) + self.check_vectorized(3, 3) + + def define_multidim(): + return """ + a = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] + b = a + a + b -> 1 -> 1 + """ + + def test_multidim(self): + result = self.run('multidim') + assert result == 8 + self.check_vectorized(1, 1) + + def define_broadcast(): + return """ + a = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]] + b = [1, 2, 3, 4] + c = a + b + c -> 1 -> 2 + """ + + def test_broadcast(self): + result = self.run("broadcast") + assert result == 10 + self.check_vectorized(1, 0) # TODO check on broadcast + + def define_setslice(): + return """ + a = |30| + b = |10| + b[1] = 5.5 + a[0:30:3] = b + a -> 3 + """ + + def test_setslice(self): + result = self.run("setslice") + assert result == 5.5 + self.check_vectorized(1, 0) # TODO? + + def define_virtual_slice(): + return """ + a = |30| + c = a + a + d = c -> 1:20 + d -> 1 + """ + + def test_virtual_slice(self): + result = self.run("virtual_slice") + assert result == 4 + self.check_vectorized(1, 1) + + def define_flat_iter(): + return ''' + a = |30| + b = flat(a) + c = b + a + c -> 3 + ''' + + def test_flat_iter(self): + result = self.run("flat_iter") + assert result == 6 + self.check_vectorized(1, 1) + + def define_flat_getitem(): + return ''' + a = |30| + b = flat(a) + b -> 4: -> 6 + ''' + + def test_flat_getitem(self): + result = self.run("flat_getitem") + assert result == 10.0 + self.check_trace_count(1) + self.check_vectorized(0,0) + + def define_flat_setitem(): + return ''' + a = |30| + b = flat(a) + b[4:] = a->:26 + a -> 5 + ''' + + def test_flat_setitem(self): + result = self.run("flat_setitem") + assert result == 1.0 + self.check_trace_count(1) + self.check_vectorized(1,0) # TODO this can be improved + + def define_dot(): + return """ + a = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]] + b = [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]] + c = dot(a, b) + c -> 1 -> 2 + """ + + def test_dot(self): + result = self.run("dot") + assert result == 184 + self.check_trace_count(3) + self.check_vectorized(3,0) + + def define_argsort(): + return """ + a = |30| + argsort(a) + a->6 + """ + + def test_argsort(self): + result = self.run("argsort") + assert result == 6 + self.check_trace_count(1) + self.check_vectorized(1,1) # vec. setslice + + def define_where(): + return """ + a = [1, 0, 1, 0] + x = [1, 2, 3, 4] + y = [-10, -20, -30, -40] + r = where(a, x, y) + r -> 3 + """ + + def test_where(self): + result = self.run("where") + assert result == -40 + self.check_trace_count(1) + self.check_vectorized(1, 0) # TODO might be possible to vectorize + + def define_searchsorted(): + return """ + a = [1, 4, 5, 6, 9] + b = |30| -> ::-1 + c = searchsorted(a, b) + c -> -1 + """ + + def test_searchsorted(self): + result = self.run("searchsorted") + assert result == 0 + self.check_trace_count(6) + # TODO? + + def define_int_mul_array(): + return """ + a = astype(|30|, int) + b = astype(|30|, int) + c = a * b + x1 = c -> 7 + x2 = c -> 8 + x3 = c -> 11 + x4 = c -> 12 + x1 + x2 + x3 + x4 + """ + def test_int_mul_array(self): + result = self.run("int_mul_array") + assert int(result) == 7*7+8*8+11*11+12*12 + self.check_vectorized(2, 2) + + def define_slice(): + return """ + a = |30| + b = a -> ::3 + c = b + b + c -> 3 + """ + + def test_slice(self): + result = self.run("slice") + assert result == 18 + self.check_trace_count(1) + self.check_vectorized(1,1) + + def define_multidim_slice(): + return """ + a = [[1, 2, 3, 4], [3, 4, 5, 6], [5, 6, 7, 8], [7, 8, 9, 10], [9, 10, 11, 12], [11, 12, 13, 14], [13, 14, 15, 16], [16, 17, 18, 19]] + b = a -> ::2 + c = b + b + c -> 1 -> 1 + """ + + def test_multidim_slice(self): + result = self.run('multidim_slice') + assert result == 12 + self.check_trace_count(2) + self.check_vectorized(1,0) # TODO? + + # NOT WORKING + def define_pow(): return """ a = |30| ** 2 @@ -304,245 +671,6 @@ assert result == 15 ** 2 self.check_trace_count(4) # extra one for the astype - def define_sum(): - return """ - a = |30| - sum(a) - """ - - def test_sum(self): - result = self.run("sum") - assert result == sum(range(30)) - self.check_trace_count(1) - - def define_cumsum(): - return """ - a = |30| - b = cumsum(a) - b -> 5 - """ - - def test_cumsum(self): - result = self.run("cumsum") - assert result == 15 - self.check_trace_count(1) - - def define_axissum(): - return """ - a = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] - b = sum(a,0) - b -> 1 - """ - - def test_axissum(self): - result = self.run("axissum") - assert result == 30 - # XXX note - the bridge here is fairly crucial and yet it's pretty - # bogus. We need to improve the situation somehow. - - def define_reduce(): - return """ - a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - sum(a) - """ - - def test_reduce_compile_only_once(self): - self.compile_graph() - reset_jit() - i = self.code_mapping['reduce'] - # run it twice - retval = self.interp.eval_graph(self.graph, [i]) - retval = self.interp.eval_graph(self.graph, [i]) - # check that we got only one loop - assert len(get_stats().loops) == 1 - - def test_reduce_axis_compile_only_once(self): - self.compile_graph() - reset_jit() - i = self.code_mapping['axissum'] - # run it twice - retval = self.interp.eval_graph(self.graph, [i]) - retval = self.interp.eval_graph(self.graph, [i]) - # check that we got only one loop - assert len(get_stats().loops) == 1 - - def define_prod(): - return """ - a = |30| - prod(a) - """ - - def test_prod(self): - result = self.run("prod") - expected = 1 - for i in range(30): - expected *= i * 2 - assert result == expected - self.check_trace_count(1) - - def define_max(): - return """ - a = |30| - a[13] = 128.0 - max(a) - """ - - def test_max(self): - result = self.run("max") - assert result == 128 - # TODO self.check_trace_count(3) - - def define_min(): - return """ - a = |30| - a[13] = -128 - min(a) - """ - - def test_min(self): - result = self.run("min") - assert result == -128 - #self.check_trace_count(1) - - def define_any(): - return """ - a = [0,0,0,0,0,0,0,1,0,0,0] - any(a) - """ - - def test_any(self): - result = self.run("any") - assert result == 1 - self.check_trace_count(1) - - def define_all(): - return """ - a = [1,1,1,1,1,1,1,1] - all(a) - """ - - def test_all(self): - result = self.run("all") - assert result == 1 - self.check_trace_count(1) - - def define_logical_xor_reduce(): - return """ - a = [1,1,1,1,1,1,1,1] - logical_xor_reduce(a) - """ - - def test_logical_xor_reduce(self): - result = self.run("logical_xor_reduce") - assert result == 0 - self.check_trace_count(2) - # XXX fix this - #self.check_simple_loop({ - # 'cast_float_to_int': 1, - # 'getfield_gc': 2, - # 'getfield_gc_pure': 11, - # 'guard_class': 1, - # 'guard_false': 1, - # 'guard_not_invalidated': 1, - # 'guard_true': 5, - # 'int_add': 2, - # 'int_and': 1, - # 'int_ge': 1, - # 'int_is_true': 2, - # 'jump': 1, - # 'new_with_vtable': 1, - # 'raw_load': 1, - # 'setfield_gc': 4, - #}) - - def define_already_forced(): - return """ - a = |30| - b = a + 4.5 - b -> 5 # forces - c = b * 8 - c -> 5 - """ - - def test_already_forced(self): - #py.test.skip('TODO') - result = self.run("already_forced") - assert result == (5 + 4.5) * 8 - # This is the sum of the ops for both loops, however if you remove the - # optimization then you end up with 2 float_adds, so we can still be - # sure it was optimized correctly. - #py.test.skip("too fragile") - #self.check_resops({'raw_store': 4, 'getfield_gc': 22, - # 'getarrayitem_gc': 4, 'getarrayitem_gc_pure': 2, - # 'getfield_gc_pure': 8, - # 'guard_class': 8, 'int_add': 8, 'float_mul': 2, - # 'jump': 2, 'int_ge': 4, - # 'raw_load': 4, 'float_add': 2, - # 'guard_false': 4, 'arraylen_gc': 2, 'same_as': 2}) - - def define_ufunc(): - return """ - a = |30| - b = unegative(a) - b -> 3 - """ - - def test_ufunc(self): - result = self.run("ufunc") - assert result == -3 - - def define_specialization(): - return """ - a = |30| - b = a + a - c = unegative(b) - c -> 3 - d = a * a - unegative(d) - d -> 3 - d = a * a - unegative(d) - d -> 3 - d = a * a - unegative(d) - d -> 3 - d = a * a - unegative(d) - d -> 3 - """ - - def test_specialization(self): - result = self.run("specialization") - # TODO - assert result == (3*3) - #py.test.skip("don't run for now") - # This is 3, not 2 because there is a bridge for the exit. - #self.check_trace_count(3) - - def define_slice(): - return """ - a = |30| - b = a -> ::3 - c = b + b - c -> 3 - """ - - def test_slice(self): - py.test.skip("slice not impl in compile.py") - result = self.run("slice") - assert result == 18 - self.check_trace_count(1) - #self.check_simple_loop({ - # 'arraylen_gc': 2, - # 'float_add': 1, - # 'guard_false': 1, - # 'guard_not_invalidated': 1, - # 'int_add': 4, - # 'int_ge': 1, - # 'jump': 1, - # 'raw_load': 2, - # 'raw_store': 1, - #}) def define_take(): return """ @@ -552,366 +680,6 @@ """ def test_take(self): - py.test.skip("not impl") + py.test.skip("key error get item?") result = self.run("take") assert result == 3 - - def define_multidim(): - return """ - a = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] - b = a + a - b -> 1 -> 1 - """ - - def test_multidim(self): - result = self.run('multidim') - assert result == 8 - # int_add might be 1 here if we try slightly harder with - # reusing indexes or some optimization - self.check_trace_count(1) - #self.check_simple_loop({ - # 'float_add': 1, - # 'guard_false': 1, - # 'guard_not_invalidated': 1, - # 'int_add': 4, - # 'int_ge': 1, - # 'jump': 1, - # 'raw_load': 2, - # 'raw_store': 1, - #}) - - def define_multidim_slice(): - return """ - a = [[1, 2, 3, 4], [3, 4, 5, 6], [5, 6, 7, 8], [7, 8, 9, 10], [9, 10, 11, 12], [11, 12, 13, 14], [13, 14, 15, 16], [16, 17, 18, 19]] - b = a -> ::2 - c = b + b - c -> 1 -> 1 - """ - - def test_multidim_slice(self): - py.test.skip("seems to be a problem in compile.py") - result = self.run('multidim_slice') - assert result == 12 - # XXX the bridge here is scary. Hopefully jit-targets will fix that, - # otherwise it looks kind of good - self.check_trace_count(2) - #self.check_simple_loop({ - # 'float_add': 1, - # 'getarrayitem_gc': 2, - # 'guard_false': 1, - # 'guard_not_invalidated': 1, - # 'guard_true': 2, - # 'int_add': 6, - # 'int_ge': 1, - # 'int_lt': 2, - # 'jump': 1, - # 'raw_load': 2, - # 'raw_store': 1, - # 'setarrayitem_gc': 2, - #}) - #self.check_resops({ - # 'float_add': 3, - # 'getarrayitem_gc': 7, - # 'getarrayitem_gc_pure': 14, - # 'getfield_gc': 6, - # 'getfield_gc_pure': 63, - # 'guard_class': 5, - # 'guard_false': 19, - # 'guard_nonnull': 6, - # 'guard_nonnull_class': 1, - # 'guard_not_invalidated': 3, - # 'guard_true': 16, - # 'guard_value': 3, - # 'int_add': 24, - # 'int_ge': 4, - # 'int_is_true': 5, - # 'int_is_zero': 4, - # 'int_le': 5, - # 'int_lt': 7, - # 'int_sub': 2, - # 'jump': 2, - # 'raw_load': 5, - # 'raw_store': 3, - # 'setarrayitem_gc': 8, - #}) - - def define_broadcast(): - return """ - a = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]] - b = [1, 2, 3, 4] - c = a + b - c -> 1 -> 2 - """ - - def test_broadcast(self): - result = self.run("broadcast") - assert result == 10 - #self.check_trace_count(2) - #self.check_simple_loop({ - # 'float_add': 1, - # 'getarrayitem_gc': 1, - # 'guard_false': 1, - # 'guard_not_invalidated': 1, - # 'guard_true': 1, - # 'int_add': 5, - # 'int_ge': 1, - # 'int_lt': 1, - # 'jump': 1, - # 'raw_load': 2, - # 'raw_store': 1, - # 'setarrayitem_gc': 1, - #}) - #self.check_resops({ - # 'float_add': 2, - # 'getarrayitem_gc': 2, - # 'getarrayitem_gc_pure': 2, - # 'getfield_gc': 6, - # 'getfield_gc_pure': 30, - # 'guard_class': 3, - # 'guard_false': 7, - # 'guard_nonnull': 2, - # 'guard_not_invalidated': 2, - # 'guard_true': 8, - # 'int_add': 11, - # 'int_ge': 2, - # 'int_is_true': 3, - # 'int_is_zero': 1, - # 'int_le': 1, - # 'int_lt': 2, - # 'jump': 1, - # 'raw_load': 4, - # 'raw_store': 2, - # 'setarrayitem_gc': 2, - #}) - - def define_setslice(): - return """ - a = |30| - b = |10| - b[1] = 5.5 - a[0:30:3] = b - a -> 3 - """ - - def test_setslice(self): - result = self.run("setslice") - assert result == 5.5 - self.check_trace_count(1) - #self.check_simple_loop({ - # 'arraylen_gc': 1, - # 'guard_false': 1, - # 'guard_not_invalidated': 1, - # 'int_add': 3, - # 'int_ge': 1, - # 'jump': 1, - # 'raw_load': 1, - # 'raw_store': 1, - #}) - - def define_virtual_slice(): - return """ - a = |30| - c = a + a - d = c -> 1:20 - d -> 1 - """ - - def test_virtual_slice(self): - py.test.skip('TODO') - result = self.run("virtual_slice") - assert result == 4 - py.test.skip("don't run for now") - self.check_trace_count(1) - self.check_simple_loop({'raw_load': 2, 'float_add': 1, - 'raw_store': 1, 'int_add': 1, - 'int_ge': 1, 'guard_false': 1, 'jump': 1, - 'arraylen_gc': 1}) - - def define_flat_iter(): - return ''' - a = |30| - b = flat(a) - c = b + a - c -> 3 - ''' - - def test_flat_iter(self): - py.test.skip('TODO') - result = self.run("flat_iter") - assert result == 6 - self.check_trace_count(1) - self.check_simple_loop({ - 'float_add': 1, - 'guard_false': 1, - 'guard_not_invalidated': 1, - 'int_add': 4, - 'int_ge': 1, - 'jump': 1, - 'raw_load': 2, - 'raw_store': 1, - }) - - def define_flat_getitem(): - return ''' - a = |30| - b = flat(a) - b -> 4: -> 6 - ''' - - def test_flat_getitem(self): - py.test.skip('TODO') - result = self.run("flat_getitem") - assert result == 10.0 - self.check_trace_count(1) - self.check_simple_loop({ - 'guard_false': 1, - 'int_add': 4, - 'int_ge': 1, - 'int_mul': 1, - 'jump': 1, - 'raw_load': 1, - 'raw_store': 1, - }) - - def define_flat_setitem(): - return ''' - a = |30| - b = flat(a) - b[4:] = a->:26 - a -> 5 - ''' - - def test_flat_setitem(self): - py.test.skip('TODO') - result = self.run("flat_setitem") - assert result == 1.0 - self.check_trace_count(1) - self.check_simple_loop({ - 'guard_false': 1, - 'guard_not_invalidated': 1, - 'guard_true': 1, - 'int_add': 4, - 'int_ge': 1, - 'int_gt': 1, - 'int_mul': 1, - 'int_sub': 1, - 'jump': 1, - 'raw_load': 1, - 'raw_store': 1, - }) - - def define_dot(): - return """ - a = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]] - b = [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]] - c = dot(a, b) - c -> 1 -> 2 - """ - - def test_dot(self): - py.test.skip('TODO') - result = self.run("dot") - assert result == 184 - self.check_trace_count(3) - self.check_simple_loop({ - 'float_add': 1, - 'float_mul': 1, - 'guard_not_invalidated': 1, - 'guard_true': 1, - 'int_add': 3, - 'int_lt': 1, - 'jump': 1, - 'raw_load': 2, - }) - self.check_resops({ - 'float_add': 2, - 'float_mul': 2, - 'getarrayitem_gc': 4, - 'getarrayitem_gc_pure': 9, - 'getfield_gc': 7, - 'getfield_gc_pure': 42, - 'guard_class': 4, - 'guard_false': 15, - 'guard_not_invalidated': 2, - 'guard_true': 14, - 'int_add': 17, - 'int_ge': 4, - 'int_is_true': 3, - 'int_is_zero': 2, - 'int_le': 5, - 'int_lt': 8, - 'int_sub': 3, - 'jump': 3, - 'new_with_vtable': 7, - 'raw_load': 6, - 'raw_store': 1, - 'same_as': 2, - 'setarrayitem_gc': 7, - 'setfield_gc': 22, - }) - - def define_argsort(): - return """ - a = |30| - argsort(a) - a->6 - """ - - def test_argsort(self): - py.test.skip('TODO') - result = self.run("argsort") - assert result == 6 - - def define_where(): - return """ - a = [1, 0, 1, 0] - x = [1, 2, 3, 4] - y = [-10, -20, -30, -40] - r = where(a, x, y) - r -> 3 - """ - - def test_where(self): - py.test.skip('TODO') - result = self.run("where") - assert result == -40 - self.check_trace_count(1) - self.check_simple_loop({ - 'float_ne': 1, - 'guard_false': 1, - 'guard_not_invalidated': 1, - 'guard_true': 1, - 'int_add': 5, - 'int_ge': 1, - 'jump': 1, - 'raw_load': 2, - 'raw_store': 1, - }) - - def define_searchsorted(): - return """ - a = [1, 4, 5, 6, 9] - b = |30| -> ::-1 - c = searchsorted(a, b) - c -> -1 - """ - - def test_searchsorted(self): - py.test.skip('TODO') - result = self.run("searchsorted") - assert result == 0 - self.check_trace_count(6) - self.check_simple_loop({ - 'float_lt': 1, - 'guard_false': 2, - 'guard_not_invalidated': 1, - 'guard_true': 2, - 'int_add': 3, - 'int_ge': 1, - 'int_lt': 2, - 'int_mul': 1, - 'int_rshift': 1, - 'int_sub': 1, - 'jump': 1, - 'raw_load': 1, - }) diff --git a/rpython/jit/metainterp/optimizeopt/__init__.py b/rpython/jit/metainterp/optimizeopt/__init__.py --- a/rpython/jit/metainterp/optimizeopt/__init__.py +++ b/rpython/jit/metainterp/optimizeopt/__init__.py @@ -71,8 +71,9 @@ if not export_state and \ ((warmstate.vectorize and jitdriver_sd.vectorize) \ or warmstate.vectorize_user): - optimize_vector(metainterp_sd, jitdriver_sd, loop, optimizations, - inline_short_preamble, start_state) + optimize_vector(metainterp_sd, jitdriver_sd, loop, + optimizations, inline_short_preamble, + start_state, warmstate.vec_cost) else: return optimize_unroll(metainterp_sd, jitdriver_sd, loop, optimizations, inline_short_preamble, diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py b/rpython/jit/metainterp/optimizeopt/dependency.py --- a/rpython/jit/metainterp/optimizeopt/dependency.py +++ b/rpython/jit/metainterp/optimizeopt/dependency.py @@ -126,7 +126,6 @@ def edge_to(self, to, arg=None, failarg=False, label=None): if self is to: - #debug_print "debug: tried to put edge from: ", self.op, "to:", to.op return dep = self.depends_on(to) if not dep: diff --git a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py --- a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py @@ -21,7 +21,6 @@ iv = self.index_var ov = other.index_var val = (int(str(ov.var)[1:]) - int(str(iv.var)[1:])) - print iv, ov, "adja?", val == 1 # i0 and i1 are adjacent # i1 and i2 ... # but not i0, i2 @@ -37,13 +36,12 @@ graph = opt.dependency_graph for k,m in graph.memory_refs.items(): graph.memory_refs[k] = FakeMemoryRef(m.index_var) - print "memory ref", k, m opt.find_adjacent_memory_refs() opt.extend_packset() opt.combine_packset() for pack in opt.packset.packs: - print "apck:" - print '\n'.join([str(op.getoperation()) for op in pack.operations]) + print "pack: \n ", + print '\n '.join([str(op.getoperation()) for op in pack.operations]) print return opt.costmodel.calculate_savings(opt.packset) @@ -111,21 +109,17 @@ def test_load_arith_store(self): loop1 = self.parse(""" - i10 = raw_load(p0, i0, descr=int) - i11 = raw_load(p0, i1, descr=int) - i12 = raw_load(p0, i2, descr=int) - i13 = raw_load(p0, i3, descr=int) - i15 = int_add(i10, 1) - i16 = int_add(i11, 1) - i17 = int_add(i12, 1) - i18 = int_add(i13, 1) - raw_store(p1, i4, i15, descr=int) - raw_store(p1, i5, i16, descr=int) - raw_store(p1, i6, i17, descr=int) - raw_store(p1, i7, i18, descr=int) + f10 = raw_load(p0, i0, descr=double) + f11 = raw_load(p0, i1, descr=double) + i20 = cast_float_to_int(f10) + i21 = cast_float_to_int(f11) + i30 = int_signext(i20, 4) + i31 = int_signext(i21, 4) + raw_store(p0, i3, i30, descr=int) + raw_store(p0, i4, i31, descr=int) """) savings = self.savings(loop1) - assert savings == 6 + assert savings == 1 class Test(CostModelBaseTest, LLtypeMixin): pass diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py b/rpython/jit/metainterp/optimizeopt/vectorize.py --- a/rpython/jit/metainterp/optimizeopt/vectorize.py +++ b/rpython/jit/metainterp/optimizeopt/vectorize.py @@ -40,7 +40,7 @@ print "" def optimize_vector(metainterp_sd, jitdriver_sd, loop, optimizations, - inline_short_preamble, start_state): + inline_short_preamble, start_state, cost_threshold): optimize_unroll(metainterp_sd, jitdriver_sd, loop, optimizations, inline_short_preamble, start_state, False) orig_ops = loop.operations @@ -48,13 +48,16 @@ debug_start("vec-opt-loop") metainterp_sd.logger_noopt.log_loop(loop.inputargs, loop.operations, -2, None, None, "pre vectorize") metainterp_sd.profiler.count(Counters.OPT_VECTORIZE_TRY) - opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, optimizations) + opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, cost_threshold) opt.propagate_all_forward() metainterp_sd.profiler.count(Counters.OPT_VECTORIZED) metainterp_sd.logger_noopt.log_loop(loop.inputargs, loop.operations, -2, None, None, "post vectorize") except NotAVectorizeableLoop: # vectorization is not possible loop.operations = orig_ops + except NotAProfitableLoop: + # cost model says to skip this loop + loop.operations = orig_ops except Exception as e: loop.operations = orig_ops debug_print("failed to vectorize loop. THIS IS A FATAL ERROR!") @@ -70,8 +73,8 @@ class VectorizingOptimizer(Optimizer): """ Try to unroll the loop and find instructions to group """ - def __init__(self, metainterp_sd, jitdriver_sd, loop, optimizations): - Optimizer.__init__(self, metainterp_sd, jitdriver_sd, loop, optimizations) + def __init__(self, metainterp_sd, jitdriver_sd, loop, cost_threshold=0): + Optimizer.__init__(self, metainterp_sd, jitdriver_sd, loop, []) self.dependency_graph = None self.packset = None self.unroll_count = 0 @@ -79,13 +82,16 @@ self.early_exit_idx = -1 self.sched_data = None self.tried_to_pack = False - self.costmodel = X86_CostModel() + self.costmodel = X86_CostModel(cost_threshold) def propagate_all_forward(self, clear=True): self.clear_newoperations() label = self.loop.operations[0] jump = self.loop.operations[-1] - if jump.getopnum() not in (rop.LABEL, rop.JUMP): + if jump.getopnum() not in (rop.LABEL, rop.JUMP) or \ + label.getopnum() != rop.LABEL: + raise NotAVectorizeableLoop() + if jump.numargs() != label.numargs(): raise NotAVectorizeableLoop() self.linear_find_smallest_type(self.loop) @@ -721,6 +727,9 @@ self._newoperations.append(op) class CostModel(object): + def __init__(self, threshold): + self.threshold = threshold + def unpack_cost(self, index, op): raise NotImplementedError @@ -730,28 +739,23 @@ def savings_for_unpacking(self, node, index): savings = 0 result = node.getoperation().result - print node.op, "[", index, "]===>" for use in node.provides(): if use.to.pack is None and use.because_of(result): savings -= self.unpack_cost(index, node.getoperation()) - print " - ", savings, use.to.op return savings def calculate_savings(self, packset): savings = 0 for pack in packset.packs: savings += self.savings_for_pack(pack.opnum, pack.opcount()) - print - print "pack", savings op0 = pack.operations[0].getoperation() if op0.result: for i,node in enumerate(pack.operations): savings += self.savings_for_unpacking(node, i) - print " +=> sss", savings return savings def profitable(self, packset): - return self.calculate_savings(packset) >= 0 + return self.calculate_savings(packset) >= self.threshold class X86_CostModel(CostModel): diff --git a/rpython/jit/metainterp/warmspot.py b/rpython/jit/metainterp/warmspot.py --- a/rpython/jit/metainterp/warmspot.py +++ b/rpython/jit/metainterp/warmspot.py @@ -74,7 +74,7 @@ function_threshold=4, enable_opts=ALL_OPTS_NAMES, max_retrace_guards=15, max_unroll_recursion=7, vectorize=0, vectorize_user=0, - **kwds): + vec_cost=0, **kwds): from rpython.config.config import ConfigError translator = interp.typer.annotator.translator try: @@ -99,6 +99,7 @@ jd.warmstate.set_param_max_unroll_recursion(max_unroll_recursion) jd.warmstate.set_param_vectorize(vectorize) jd.warmstate.set_param_vectorize_user(vectorize_user) + jd.warmstate.set_param_vec_cost(vec_cost) warmrunnerdesc.finish() if graph_and_interp_only: return interp, graph diff --git a/rpython/jit/metainterp/warmstate.py b/rpython/jit/metainterp/warmstate.py --- a/rpython/jit/metainterp/warmstate.py +++ b/rpython/jit/metainterp/warmstate.py @@ -303,6 +303,9 @@ def set_param_vectorize_user(self, value): self.vectorize_user = bool(value) + def set_param_vec_cost(self, value): + self.vec_cost = bool(value) + def disable_noninlinable_function(self, greenkey): cell = self.JitCell.ensure_jit_cell_at_key(greenkey) cell.flags |= JC_DONT_TRACE_HERE diff --git a/rpython/rlib/jit.py b/rpython/rlib/jit.py --- a/rpython/rlib/jit.py +++ b/rpython/rlib/jit.py @@ -554,7 +554,8 @@ 'max_unroll_recursion': 'how many levels deep to unroll a recursive function', 'vectorize': 'turn on the vectorization optimization (vecopt). requires sse4.1', 'vectorize_user': 'turn on the vecopt for the python user program. requires sse4.1', - } + 'vec_cost': 'threshold which traces to vectorize.', +} PARAMETERS = {'threshold': 1039, # just above 1024, prime 'function_threshold': 1619, # slightly more than one above, also prime @@ -570,6 +571,7 @@ 'max_unroll_recursion': 7, 'vectorize': 0, 'vectorize_user': 0, + 'vec_cost': 0, } unroll_parameters = unrolling_iterable(PARAMETERS.items()) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit