Author: Richard Plangger <[email protected]>
Branch: vecopt
Changeset: r78299:269e30fb6042
Date: 2015-06-24 17:31 +0200
http://bitbucket.org/pypy/pypy/changeset/269e30fb6042/
Log: adding guards as vector instructions. i'm not yet sure how this will
work out, but could help to generate better loops for reductions
diff --git a/pypy/module/micronumpy/test/test_zjit.py
b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -553,7 +553,7 @@
def test_all(self):
result = self.run("all")
assert result == 1
- self.check_vectorized(1, 0) # success?
+ self.check_vectorized(1, 1)
def define_logical_xor_reduce():
return """
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -603,6 +603,18 @@
def determine_output_type(self, op):
return None
+class PassThroughOp(OpToVectorOp):
+ """ This pass through is only applicable if the target
+ operation is capable of handling vector operations.
+ Guard true/false is such an example.
+ """
+ def __init__(self, args):
+ OpToVectorOp.__init__(self, args, None)
+
+ def determine_output_type(self, op):
+ return None
+
+GUARD_TF = PassThroughOp((PT_INT_GENERIC,))
INT_OP_TO_VOP = OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), INT_RES)
FLOAT_OP_TO_VOP = OpToVectorOp((PT_FLOAT_GENERIC, PT_FLOAT_GENERIC), FLOAT_RES)
FLOAT_SINGLE_ARG_OP_TO_VOP = OpToVectorOp((PT_FLOAT_GENERIC,), FLOAT_RES)
@@ -637,6 +649,9 @@
rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOpConv(PT_FLOAT_2,
PT_DOUBLE_2),
rop.VEC_CAST_FLOAT_TO_INT: OpToVectorOpConv(PT_DOUBLE_2, PT_INT32_2),
rop.VEC_CAST_INT_TO_FLOAT: OpToVectorOpConv(PT_INT32_2, PT_DOUBLE_2),
+
+ rop.GUARD_TRUE: GUARD_TF,
+ rop.GUARD_FALSE: GUARD_TF,
}
def determine_output_type(node, input_type):
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -16,7 +16,12 @@
class SchedulerBaseTest(DependencyBaseTest):
- def parse(self, source, inc_label_jump=True):
+ def parse(self, source, inc_label_jump=True,
+ pargs=2,
+ iargs=10,
+ fargs=6,
+ additional_args=None,
+ replace_args=None):
ns = {
'double': self.floatarraydescr,
'float': self.singlefloatarraydescr,
@@ -25,10 +30,24 @@
'short': self.int16arraydescr,
'char': self.chararraydescr,
}
- loop = opparse("
[p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,i6,i7,i8,i9,f0,f1,f2,f3,f4,f5,v103204[i32|4]]\n"
+ source + \
- "\n
jump(p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,i6,i7,i8,i9,f0,f1,f2,f3,f4,f5,v103204[i32|4])",
- cpu=self.cpu,
- namespace=ns)
+ args = []
+ for prefix, rang in [('p',range(pargs)), ('i',range(iargs)),
('f',range(fargs))]:
+ for i in rang:
+ args.append(prefix + str(i))
+
+ assert additional_args is None or isinstance(additional_args,list)
+ for arg in additional_args or []:
+ args.append(arg)
+ for k,v in (replace_args or {}).items():
+ for i,_ in enumerate(args):
+ if k == args[i]:
+ args[i] = v
+ break
+ indent = " "
+ joinedargs = ','.join(args)
+ fmt = (indent, joinedargs, source, indent, joinedargs)
+ src = "%s[%s]\n%s\n%sjump(%s)" % fmt
+ loop = opparse(src, cpu=self.cpu, namespace=ns)
if inc_label_jump:
token = JitCellToken()
loop.operations = \
@@ -163,21 +182,19 @@
return arg
raise Exception("could not find %s in args %s" % (name,
loop.inputargs))
- def test_signext_int16(self):
+ def test_signext_int32(self):
loop1 = self.parse("""
- i10 = int_signext(i1, 2)
- i11 = int_signext(i1, 2)
- i12 = int_signext(i1, 2)
- i13 = int_signext(i1, 2)
- """)
- pack1 = self.pack(loop1, 0, 4)
- v103204 = self.find_input_arg('v103204', loop1)
- def i1inv103204(var):
- return 0, v103204
+ i10 = int_signext(i1, 4)
+ i11 = int_signext(i1, 4)
+ """, additional_args=['v10[i64|2]'])
+ pack1 = self.pack(loop1, 0, 2)
+ var = self.find_input_arg('v10', loop1)
+ def i1inv103204(v):
+ return 0, var
loop2 = self.schedule(loop1, [pack1], prepend_invariant=True,
getvboxfunc=i1inv103204)
loop3 = self.parse("""
- v11[i16|4] = vec_int_signext(v103204[i32|4], 2)
- """, False)
+ v11[i32|2] = vec_int_signext(v10[i64|2], 4)
+ """, False, additional_args=['v10[i64|2]'])
self.assert_equal(loop2, loop3)
def test_cast_float_to_int(self):
@@ -275,13 +292,12 @@
self.assert_equal(loop2, loop3)
def test_all(self):
- py.test.skip("this could be an improvement")
loop1 = self.parse("""
i10 = raw_load(p0, i1, descr=long)
i11 = raw_load(p0, i2, descr=long)
#
- i12 = int_and(i10, i6)
- i13 = int_and(i11, i12)
+ i12 = int_and(i10, 255)
+ i13 = int_and(i11, 255)
#
guard_true(i12) []
guard_true(i13) []
@@ -289,9 +305,10 @@
pack1 = self.pack(loop1, 0, 2)
pack2 = self.pack(loop1, 2, 4)
pack3 = self.pack(loop1, 4, 6)
- loop2 = self.schedule(loop1, [pack1,pack2,pack3])
+ loop2 = self.schedule(loop1, [pack1,pack2,pack3],
prepend_invariant=True)
loop3 = self.parse("""
- v10[i64|2] = vec_raw_load(p0, i1, 2, descr=long)
+ v9[i64|2] = vec_int_expand(255)
+ v10[i64|2] = vec_raw_load(p0, i1, 2, descr=long)
v11[i64|2] = vec_int_and(v10[i64|2], v9[i64|2])
guard_true(v11[i64|2]) []
""", False)
diff --git a/rpython/jit/metainterp/resoperation.py
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -812,6 +812,10 @@
rop.CAST_SINGLEFLOAT_TO_FLOAT: rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT,
rop.CAST_INT_TO_FLOAT: rop.VEC_CAST_INT_TO_FLOAT,
rop.CAST_FLOAT_TO_INT: rop.VEC_CAST_FLOAT_TO_INT,
+
+ # guard
+ rop.GUARD_TRUE: rop.GUARD_TRUE,
+ rop.GUARD_FALSE: rop.GUARD_FALSE,
}
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit