Author: Richard Plangger <r...@pasra.at> Branch: vecopt Changeset: r78299:269e30fb6042 Date: 2015-06-24 17:31 +0200 http://bitbucket.org/pypy/pypy/changeset/269e30fb6042/
Log: adding guards as vector instructions. i'm not yet sure how this will work out, but could help to generate better loops for reductions diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py --- a/pypy/module/micronumpy/test/test_zjit.py +++ b/pypy/module/micronumpy/test/test_zjit.py @@ -553,7 +553,7 @@ def test_all(self): result = self.run("all") assert result == 1 - self.check_vectorized(1, 0) # success? + self.check_vectorized(1, 1) def define_logical_xor_reduce(): return """ diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py b/rpython/jit/metainterp/optimizeopt/schedule.py --- a/rpython/jit/metainterp/optimizeopt/schedule.py +++ b/rpython/jit/metainterp/optimizeopt/schedule.py @@ -603,6 +603,18 @@ def determine_output_type(self, op): return None +class PassThroughOp(OpToVectorOp): + """ This pass through is only applicable if the target + operation is capable of handling vector operations. + Guard true/false is such an example. + """ + def __init__(self, args): + OpToVectorOp.__init__(self, args, None) + + def determine_output_type(self, op): + return None + +GUARD_TF = PassThroughOp((PT_INT_GENERIC,)) INT_OP_TO_VOP = OpToVectorOp((PT_INT_GENERIC, PT_INT_GENERIC), INT_RES) FLOAT_OP_TO_VOP = OpToVectorOp((PT_FLOAT_GENERIC, PT_FLOAT_GENERIC), FLOAT_RES) FLOAT_SINGLE_ARG_OP_TO_VOP = OpToVectorOp((PT_FLOAT_GENERIC,), FLOAT_RES) @@ -637,6 +649,9 @@ rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT: OpToVectorOpConv(PT_FLOAT_2, PT_DOUBLE_2), rop.VEC_CAST_FLOAT_TO_INT: OpToVectorOpConv(PT_DOUBLE_2, PT_INT32_2), rop.VEC_CAST_INT_TO_FLOAT: OpToVectorOpConv(PT_INT32_2, PT_DOUBLE_2), + + rop.GUARD_TRUE: GUARD_TF, + rop.GUARD_FALSE: GUARD_TF, } def determine_output_type(node, input_type): diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py --- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py @@ -16,7 +16,12 @@ class SchedulerBaseTest(DependencyBaseTest): - def parse(self, source, inc_label_jump=True): + def parse(self, source, inc_label_jump=True, + pargs=2, + iargs=10, + fargs=6, + additional_args=None, + replace_args=None): ns = { 'double': self.floatarraydescr, 'float': self.singlefloatarraydescr, @@ -25,10 +30,24 @@ 'short': self.int16arraydescr, 'char': self.chararraydescr, } - loop = opparse(" [p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,i6,i7,i8,i9,f0,f1,f2,f3,f4,f5,v103204[i32|4]]\n" + source + \ - "\n jump(p0,p1,p2,p3,p4,p5,i0,i1,i2,i3,i4,i5,i6,i7,i8,i9,f0,f1,f2,f3,f4,f5,v103204[i32|4])", - cpu=self.cpu, - namespace=ns) + args = [] + for prefix, rang in [('p',range(pargs)), ('i',range(iargs)), ('f',range(fargs))]: + for i in rang: + args.append(prefix + str(i)) + + assert additional_args is None or isinstance(additional_args,list) + for arg in additional_args or []: + args.append(arg) + for k,v in (replace_args or {}).items(): + for i,_ in enumerate(args): + if k == args[i]: + args[i] = v + break + indent = " " + joinedargs = ','.join(args) + fmt = (indent, joinedargs, source, indent, joinedargs) + src = "%s[%s]\n%s\n%sjump(%s)" % fmt + loop = opparse(src, cpu=self.cpu, namespace=ns) if inc_label_jump: token = JitCellToken() loop.operations = \ @@ -163,21 +182,19 @@ return arg raise Exception("could not find %s in args %s" % (name, loop.inputargs)) - def test_signext_int16(self): + def test_signext_int32(self): loop1 = self.parse(""" - i10 = int_signext(i1, 2) - i11 = int_signext(i1, 2) - i12 = int_signext(i1, 2) - i13 = int_signext(i1, 2) - """) - pack1 = self.pack(loop1, 0, 4) - v103204 = self.find_input_arg('v103204', loop1) - def i1inv103204(var): - return 0, v103204 + i10 = int_signext(i1, 4) + i11 = int_signext(i1, 4) + """, additional_args=['v10[i64|2]']) + pack1 = self.pack(loop1, 0, 2) + var = self.find_input_arg('v10', loop1) + def i1inv103204(v): + return 0, var loop2 = self.schedule(loop1, [pack1], prepend_invariant=True, getvboxfunc=i1inv103204) loop3 = self.parse(""" - v11[i16|4] = vec_int_signext(v103204[i32|4], 2) - """, False) + v11[i32|2] = vec_int_signext(v10[i64|2], 4) + """, False, additional_args=['v10[i64|2]']) self.assert_equal(loop2, loop3) def test_cast_float_to_int(self): @@ -275,13 +292,12 @@ self.assert_equal(loop2, loop3) def test_all(self): - py.test.skip("this could be an improvement") loop1 = self.parse(""" i10 = raw_load(p0, i1, descr=long) i11 = raw_load(p0, i2, descr=long) # - i12 = int_and(i10, i6) - i13 = int_and(i11, i12) + i12 = int_and(i10, 255) + i13 = int_and(i11, 255) # guard_true(i12) [] guard_true(i13) [] @@ -289,9 +305,10 @@ pack1 = self.pack(loop1, 0, 2) pack2 = self.pack(loop1, 2, 4) pack3 = self.pack(loop1, 4, 6) - loop2 = self.schedule(loop1, [pack1,pack2,pack3]) + loop2 = self.schedule(loop1, [pack1,pack2,pack3], prepend_invariant=True) loop3 = self.parse(""" - v10[i64|2] = vec_raw_load(p0, i1, 2, descr=long) + v9[i64|2] = vec_int_expand(255) + v10[i64|2] = vec_raw_load(p0, i1, 2, descr=long) v11[i64|2] = vec_int_and(v10[i64|2], v9[i64|2]) guard_true(v11[i64|2]) [] """, False) diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py --- a/rpython/jit/metainterp/resoperation.py +++ b/rpython/jit/metainterp/resoperation.py @@ -812,6 +812,10 @@ rop.CAST_SINGLEFLOAT_TO_FLOAT: rop.VEC_CAST_SINGLEFLOAT_TO_FLOAT, rop.CAST_INT_TO_FLOAT: rop.VEC_CAST_INT_TO_FLOAT, rop.CAST_FLOAT_TO_INT: rop.VEC_CAST_FLOAT_TO_INT, + + # guard + rop.GUARD_TRUE: rop.GUARD_TRUE, + rop.GUARD_FALSE: rop.GUARD_FALSE, } _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit