Author: Richard Plangger <[email protected]>
Branch: vecopt-merge
Changeset: r79885:bb3eebb00aa2
Date: 2015-09-28 21:54 +0200
http://bitbucket.org/pypy/pypy/changeset/bb3eebb00aa2/
Log: wunderbar! all but one test_zjit test passing
diff --git a/pypy/module/micronumpy/test/test_zjit.py
b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -18,7 +18,7 @@
from rpython.jit.metainterp import pyjitpl
return pyjitpl._warmrunnerdesc.metainterp_sd.profiler
-class TestNumpyJit(Jit386Mixin):
+class TestNumpyJit(LLJitMixin):
enable_opts =
"intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll"
graph = None
interp = None
@@ -99,11 +99,6 @@
backendopt=True,
graph_and_interp_only=True,
ProfilerClass=Profiler,
- translate_support_code=True,
-
translationoptions={'gc':'minimark',
-
'gcrootfinder': 'asmgcc',
-
'gcremovetypeptr': False
- },
vec=True)
self.__class__.interp = interp
self.__class__.graph = graph
@@ -120,8 +115,6 @@
self.compile_graph()
profiler = get_profiler()
profiler.start()
- from rpython.jit.metainterp import pyjitpl
- pyjitpl._warmrunnerdesc.jitcounter = counter.DeterministicJitCounter()
reset_jit()
i = self.code_mapping[name]
retval = self.interp.eval_graph(self.graph, [i])
@@ -165,7 +158,7 @@
def test_float32_add(self):
result = self.run("float32_add")
self.assert_float_equal(result, 15.0 + 15.0)
- self.check_vectorized(1, 1)
+ self.check_vectorized(2, 2)
def define_float_add():
return """
@@ -198,7 +191,7 @@
def test_float32_add_const(self):
result = self.run("float32_add_const")
self.assert_float_equal(result, 29.0 + 77.345)
- self.check_vectorized(1, 1)
+ self.check_vectorized(2, 2)
def define_float_add_const():
return """
@@ -240,7 +233,7 @@
def test_int_expand(self):
result = self.run("int_expand")
assert int(result) == 7+16+8+16
- self.check_vectorized(1, 1)
+ self.check_vectorized(2, 2)
def define_int32_expand():
return """
@@ -255,7 +248,7 @@
def test_int32_expand(self):
result = self.run("int32_expand")
assert int(result) == 7+16+8+16
- self.check_vectorized(2, 2)
+ self.check_vectorized(2, 1)
def define_int16_expand():
return """
@@ -271,7 +264,7 @@
i = 8
assert int(result) == i*16 + sum(range(7,7+i))
# currently is is not possible to accum for types with < 8 bytes
- self.check_vectorized(3, 1)
+ self.check_vectorized(3, 0)
def define_int8_expand():
return """
@@ -289,7 +282,7 @@
# neither does sum
# a + c should work, but it is given as a parameter
# thus the accum must handle this!
- self.check_vectorized(3, 1)
+ self.check_vectorized(3, 0)
def define_int32_add_const():
return """
@@ -306,7 +299,7 @@
def test_int32_add_const(self):
result = self.run("int32_add_const")
assert int(result) == 7+1+8+1+11+2+12+2
- self.check_vectorized(1, 1)
+ self.check_vectorized(2, 2)
def define_float_mul_array():
return """
@@ -338,7 +331,7 @@
def test_int32_mul_array(self):
result = self.run("int32_mul_array")
assert int(result) == 7*7+8*8+11*11+12*12
- self.check_vectorized(1, 1)
+ self.check_vectorized(2, 2)
def define_float32_mul_array():
return """
@@ -366,7 +359,7 @@
def test_conversion(self):
result = self.run("conversion")
assert result == sum(range(30)) + sum(range(30))
- self.check_vectorized(2, 2) # only sum and astype(int) succeed
+ self.check_vectorized(4, 2) # only sum and astype(int) succeed
def define_sum():
return """
@@ -396,7 +389,7 @@
def test_sum_int(self):
result = self.run("sum_int")
assert result == sum(range(65))
- self.check_vectorized(1, 1)
+ self.check_vectorized(2, 2)
def define_sum_multi():
return """
@@ -420,7 +413,9 @@
def test_sum_float_to_int16(self):
result = self.run("sum_float_to_int16")
assert result == sum(range(30))
- self.check_vectorized(1, 0)
+ # one can argue that this is not desired,
+ # but unpacking exactly hits savings = 0
+ self.check_vectorized(1, 1)
def define_sum_float_to_int32():
return """
a = |30|
@@ -504,7 +499,7 @@
retval = self.interp.eval_graph(self.graph, [i])
# check that we got only one loop
assert len(get_stats().loops) == 1
- self.check_vectorized(2, 1)
+ self.check_vectorized(3, 1)
def define_prod():
return """
@@ -823,7 +818,7 @@
result = self.run("dot")
assert result == 184
self.check_trace_count(4)
- self.check_vectorized(3,1)
+ self.check_vectorized(1,1)
def define_argsort():
return """
@@ -923,7 +918,7 @@
def test_dot_matrix(self):
result = self.run("dot_matrix")
assert int(result) == 86
- self.check_vectorized(2, 1)
+ self.check_vectorized(1, 1)
# NOT WORKING
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -9,6 +9,7 @@
from rpython.rlib.objectmodel import we_are_translated
from rpython.jit.metainterp.jitexc import NotAProfitableLoop
from rpython.rlib.objectmodel import specialize, always_inline
+from rpython.jit.metainterp.jitexc import NotAVectorizeableLoop,
NotAProfitableLoop
class SchedulerState(object):
@@ -206,6 +207,25 @@
return self.count
return count
+class OpRestrict(object):
+ def __init__(self, argument_restris):
+ self.argument_restrictions = argument_restris
+
+ def check_operation(self, state, pack, op):
+ pass
+
+class OpMatchSizeTypeFirst(OpRestrict):
+ def check_operation(self, state, pack, op):
+ arg0 = op.getarg(0)
+ bytesize = arg0.bytesize
+ datatype = arg0.datatype
+
+ for arg in op.getarglist():
+ if arg.bytesize != bytesize:
+ raise NotAVectorizeableLoop()
+ if arg.datatype != datatype:
+ raise NotAVectorizeableLoop()
+
class trans(object):
TR_ANY = TypeRestrict()
@@ -215,43 +235,46 @@
TR_DOUBLE_2 = TypeRestrict(FLOAT, 8, 2)
TR_INT32_2 = TypeRestrict(INT, 4, 2)
+ OR_MSTF_I = OpMatchSizeTypeFirst([TR_ANY_INTEGER, TR_ANY_INTEGER])
+ OR_MSTF_F = OpMatchSizeTypeFirst([TR_ANY_FLOAT, TR_ANY_FLOAT])
+
# note that the following definition is x86 arch specific
MAPPING = {
- rop.VEC_INT_ADD: [TR_ANY_INTEGER, TR_ANY_INTEGER],
- rop.VEC_INT_SUB: [TR_ANY_INTEGER, TR_ANY_INTEGER],
- rop.VEC_INT_MUL: [TR_ANY_INTEGER, TR_ANY_INTEGER],
- rop.VEC_INT_AND: [TR_ANY_INTEGER, TR_ANY_INTEGER],
- rop.VEC_INT_OR: [TR_ANY_INTEGER, TR_ANY_INTEGER],
- rop.VEC_INT_XOR: [TR_ANY_INTEGER, TR_ANY_INTEGER],
- rop.VEC_INT_EQ: [TR_ANY_INTEGER, TR_ANY_INTEGER],
- rop.VEC_INT_NE: [TR_ANY_INTEGER, TR_ANY_INTEGER],
+ rop.VEC_INT_ADD: OR_MSTF_I,
+ rop.VEC_INT_SUB: OR_MSTF_I,
+ rop.VEC_INT_MUL: OR_MSTF_I,
+ rop.VEC_INT_AND: OR_MSTF_I,
+ rop.VEC_INT_OR: OR_MSTF_I,
+ rop.VEC_INT_XOR: OR_MSTF_I,
+ rop.VEC_INT_EQ: OR_MSTF_I,
+ rop.VEC_INT_NE: OR_MSTF_I,
- rop.VEC_FLOAT_ADD: [TR_ANY_FLOAT, TR_ANY_FLOAT],
- rop.VEC_FLOAT_SUB: [TR_ANY_FLOAT, TR_ANY_FLOAT],
- rop.VEC_FLOAT_MUL: [TR_ANY_FLOAT, TR_ANY_FLOAT],
- rop.VEC_FLOAT_TRUEDIV: [TR_ANY_FLOAT, TR_ANY_FLOAT],
- rop.VEC_FLOAT_ABS: [TR_ANY_FLOAT],
- rop.VEC_FLOAT_NEG: [TR_ANY_FLOAT],
+ rop.VEC_FLOAT_ADD: OR_MSTF_F,
+ rop.VEC_FLOAT_SUB: OR_MSTF_F,
+ rop.VEC_FLOAT_MUL: OR_MSTF_F,
+ rop.VEC_FLOAT_TRUEDIV: OR_MSTF_F,
+ rop.VEC_FLOAT_ABS: OpRestrict([TR_ANY_FLOAT]),
+ rop.VEC_FLOAT_NEG: OpRestrict([TR_ANY_FLOAT]),
- rop.VEC_RAW_STORE: [None, None, TR_ANY],
- rop.VEC_SETARRAYITEM_RAW: [None, None, TR_ANY],
- rop.VEC_SETARRAYITEM_GC: [None, None, TR_ANY],
+ rop.VEC_RAW_STORE: OpRestrict([None, None, TR_ANY]),
+ rop.VEC_SETARRAYITEM_RAW: OpRestrict([None, None, TR_ANY]),
+ rop.VEC_SETARRAYITEM_GC: OpRestrict([None, None, TR_ANY]),
- rop.GUARD_TRUE: [TR_ANY_INTEGER],
- rop.GUARD_FALSE: [TR_ANY_INTEGER],
+ rop.GUARD_TRUE: OpRestrict([TR_ANY_INTEGER]),
+ rop.GUARD_FALSE: OpRestrict([TR_ANY_INTEGER]),
## irregular
- rop.VEC_INT_SIGNEXT: [TR_ANY_INTEGER],
+ rop.VEC_INT_SIGNEXT: OpRestrict([TR_ANY_INTEGER]),
- rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: [TR_DOUBLE_2],
+ rop.VEC_CAST_FLOAT_TO_SINGLEFLOAT: OpRestrict([TR_DOUBLE_2]),
# weird but the trace will store single floats in int boxes
- rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: [TR_INT32_2],
- rop.VEC_CAST_FLOAT_TO_INT: [TR_DOUBLE_2],
- rop.VEC_CAST_INT_TO_FLOAT: [TR_INT32_2],
+ rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpRestrict([TR_INT32_2]),
+ rop.VEC_CAST_FLOAT_TO_INT: OpRestrict([TR_DOUBLE_2]),
+ rop.VEC_CAST_INT_TO_FLOAT: OpRestrict([TR_INT32_2]),
- rop.VEC_FLOAT_EQ: [TR_ANY_FLOAT,TR_ANY_FLOAT],
- rop.VEC_FLOAT_NE: [TR_ANY_FLOAT,TR_ANY_FLOAT],
- rop.VEC_INT_IS_TRUE: [TR_ANY_INTEGER,TR_ANY_INTEGER],
+ rop.VEC_FLOAT_EQ: OpRestrict([TR_ANY_FLOAT,TR_ANY_FLOAT]),
+ rop.VEC_FLOAT_NE: OpRestrict([TR_ANY_FLOAT,TR_ANY_FLOAT]),
+ rop.VEC_INT_IS_TRUE:
OpRestrict([TR_ANY_INTEGER,TR_ANY_INTEGER]),
}
def turn_into_vector(state, pack):
@@ -259,6 +282,9 @@
check_if_pack_supported(state, pack)
state.costmodel.record_pack_savings(pack, pack.numops())
left = pack.leftmost()
+ oprestrict = trans.MAPPING.get(pack.leftmost().vector, None)
+ if oprestrict is not None:
+ oprestrict.check_operation(state, pack, left)
args = left.getarglist_copy()
prepare_arguments(state, pack, args)
vecop = VecOperation(left.vector, args, left,
@@ -287,9 +313,10 @@
# a) expand vars/consts before the label and add as argument
# b) expand vars created in the loop body
#
- restrictions = trans.MAPPING.get(pack.leftmost().vector, [])
- if not restrictions:
+ oprestrict = trans.MAPPING.get(pack.leftmost().vector, None)
+ if not oprestrict:
return
+ restrictions = oprestrict.argument_restrictions
for i,arg in enumerate(args):
if i >= len(restrictions) or restrictions[i] is None:
# ignore this argument
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
@@ -1234,6 +1234,23 @@
vopt = self.vectorize(trace)
self.assert_equal(trace, trace_opt)
+ def test_sum_int16_prevent(self):
+ trace = self.parse_loop("""
+ [i0, p1, i2, p3, i4, i5, i6]
+ i7 = raw_load_i(i5, i4, descr=int16arraydescr)
+ i8 = int_add(i0, i7)
+ i10 = int_add(i2, 1)
+ i12 = int_add(i4, 2)
+ i13 = int_ge(i10, i6)
+ guard_false(i13,
descr=<rpython.jit.metainterp.compile.ResumeGuardFalseDescr object at
0x7fe5a1848150>) [p3, i10, i8, i12, None, p1, None, None]
+ jump(i8, p1, i10, p3, i12, i5, i6)
+ """)
+ try:
+ vopt = self.vectorize(trace)
+ py.test.fail()
+ except NotAVectorizeableLoop:
+ pass
+
def test_axis_sum(self):
# TODO
trace = """
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit