Author: Richard Plangger <[email protected]>
Branch: vecopt
Changeset: r78326:d1a942296dd8
Date: 2015-06-26 17:46 +0200
http://bitbucket.org/pypy/pypy/changeset/d1a942296dd8/
Log: rewritten scheduling tests to add type (could not be inferred
easily), cost model passing again
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -229,6 +229,7 @@
self.output_type = None
self.costmodel = None
+
def determine_input_type(self, op):
arg = op.getarg(0)
_, vbox = self.sched_data.getvector_of_box(arg)
@@ -267,9 +268,13 @@
#
self.check_if_pack_supported(pack)
#
- self.pack = pack
- self.transform_pack()
-
+ if self.must_be_full_but_is_not(pack):
+ for op in pack.operations:
+ self.preamble_ops.append(op.getoperation())
+ else:
+ self.pack = pack
+ self.transform_pack()
+ #
self.pack = None
self.costmodel = None
self.preamble_ops = None
@@ -277,6 +282,9 @@
self.input_type = None
self.output_type = None
+ def must_be_full_but_is_not(self, pack):
+ return False
+
def split_pack(self, pack, vec_reg_size):
""" Returns how many items of the pack should be
emitted as vector operation. """
@@ -294,11 +302,9 @@
pass
def transform_pack(self):
- op = self.pack.operations[0].getoperation()
+ op = self.pack.leftmost()
args = op.getarglist()
- #
self.before_argument_transform(args)
- #
self.transform_arguments(args)
#
result = op.result
@@ -614,6 +620,7 @@
assert isinstance(sizearg, ConstInt)
self.size = sizearg.value
+
def new_result_vector_box(self):
type = self.output_type.gettype()
count = self.input_type.getcount()
@@ -656,6 +663,11 @@
def determine_input_type(self, op):
return PackType.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
+ def must_be_full_but_is_not(self, pack):
+ vrs = self.sched_data.vec_reg_size
+ it = pack.input_type
+ return it.getsize() * it.getcount() < vrs
+
def determine_output_type(self, op):
return None
@@ -833,7 +845,6 @@
ptype = self.input_type
if self.input_type is None:
# load does not have an input type, but only an output type
- assert self.leftmost().is_raw_load()
ptype = self.output_type
op = self.leftmost()
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
@@ -136,7 +136,7 @@
savings = self.savings(loop1)
assert savings == 2
- @py.test.mark.parametrize("bytes,s", [(1,-1),(2,-1),(4,0),(8,-1)])
+ @py.test.mark.parametrize("bytes,s", [(1,None),(2,None),(4,0),(8,-1)])
def test_sum_float_to_int(self, bytes, s):
loop1 = self.parse("""
f10 = raw_load(p0, i0, descr=double)
@@ -150,13 +150,19 @@
i15 = int_add(i16, i13)
i17 = int_signext(i15, {c})
""".format(c=bytes))
- savings = self.savings(loop1)
- # it does not benefit because signext has
- # a very inefficient implementation (x86
- # does not provide nice instr to convert
- # integer sizes)
- # signext -> no benefit, + 2x unpack
- assert savings <= s
+ try:
+ savings = self.savings(loop1)
+ if s is None:
+ py.test.fail("must fail")
+ # it does not benefit because signext has
+ # a very inefficient implementation (x86
+ # does not provide nice instr to convert
+ # integer sizes)
+ # signext -> no benefit, + 2x unpack
+ assert savings <= s
+ except NotAProfitableLoop:
+ if s is not None:
+ py.test.fail("must not fail")
def test_cast(self):
loop1 = self.parse("""
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -3,7 +3,8 @@
from rpython.jit.metainterp.history import TargetToken, JitCellToken, TreeLoop
from rpython.jit.metainterp.optimizeopt.util import equaloplists, Renamer
from rpython.jit.metainterp.optimizeopt.vectorize import (VecScheduleData,
- Pack, NotAProfitableLoop, VectorizingOptimizer, X86_CostModel)
+ Pack, Pair, NotAProfitableLoop, VectorizingOptimizer, X86_CostModel,
+ PackSet)
from rpython.jit.metainterp.optimizeopt.dependency import Node
from rpython.jit.metainterp.optimizeopt.schedule import PackType
from rpython.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin
@@ -14,6 +15,14 @@
from rpython.jit.tool.oparser import parse as opparse
from rpython.jit.tool.oparser_model import get_model
+F64 = PackType('f',8,True,2)
+F32 = PackType('f',4,True,4)
+F32_2 = PackType('f',4,True,2)
+I64 = PackType('i',8,True,2)
+I32 = PackType('i',4,True,4)
+I32_2 = PackType('i',4,True,2)
+I16 = PackType('i',2,True,8)
+
class SchedulerBaseTest(DependencyBaseTest):
def parse(self, source, inc_label_jump=True,
@@ -58,8 +67,8 @@
del loop.operations[-1]
return loop
- def pack(self, loop, l, r):
- return Pack([Node(op,1+l+i) for i,op in
enumerate(loop.operations[1+l:1+r])], None, None)
+ def pack(self, loop, l, r, input_type, output_type):
+ return Pack([Node(op,1+l+i) for i,op in
enumerate(loop.operations[1+l:1+r])], input_type, output_type)
def schedule(self, loop_orig, packs, vec_reg_size=16,
prepend_invariant=False, overwrite_funcs=None):
loop = get_model(False).ExtendedTreeLoop("loop")
@@ -72,16 +81,32 @@
for name, overwrite in (overwrite_funcs or {}).items():
setattr(vsd, name, overwrite)
renamer = Renamer()
+ metainterp_sd = FakeMetaInterpStaticData(self.cpu)
+ jitdriver_sd = FakeJitDriverStaticData()
+ opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, 0)
+ pairs = []
for pack in packs:
+ for i in range(len(pack.operations)-1):
+ o1 = pack.operations[i]
+ o2 = pack.operations[i+1]
+ pairs.append(Pair(o1,o2,pack.input_type,pack.output_type))
+
+ class FakePackSet(PackSet):
+ def __init__(self):
+ self.packs = None
+
+ opt.packset = FakePackSet()
+ opt.packset.packs = pairs
+
+ opt.combine_packset()
+
+ for pack in opt.packset.packs:
if pack.opcount() == 1:
ops.append(pack.operations[0].getoperation())
else:
for op in vsd.as_vector_operation(pack, renamer):
ops.append(op)
loop.operations = ops
- metainterp_sd = FakeMetaInterpStaticData(self.cpu)
- jitdriver_sd = FakeJitDriverStaticData()
- opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, 0)
opt.clear_newoperations()
for op in ops:
opt.unpack_from_vector(op, vsd, renamer)
@@ -106,7 +131,7 @@
i14 = raw_load(p0, i4, descr=float)
i15 = raw_load(p0, i5, descr=float)
""")
- pack1 = self.pack(loop1, 0, 6)
+ pack1 = self.pack(loop1, 0, 6, None, F32)
loop2 = self.schedule(loop1, [pack1])
loop3 = self.parse("""
v10[i32|4] = vec_raw_load(p0, i0, 4, descr=float)
@@ -123,9 +148,9 @@
f10 = cast_int_to_float(i12)
f11 = cast_int_to_float(i13)
""")
- pack1 = self.pack(loop1, 0, 2)
- pack2 = self.pack(loop1, 2, 4)
- pack3 = self.pack(loop1, 4, 6)
+ pack1 = self.pack(loop1, 0, 2, None, I64)
+ pack2 = self.pack(loop1, 2, 4, I64, I32_2)
+ pack3 = self.pack(loop1, 4, 6, I32_2, F32_2)
loop2 = self.schedule(loop1, [pack1, pack2, pack3])
loop3 = self.parse("""
v10[i64|2] = vec_raw_load(p0, i0, 2, descr=long)
@@ -139,7 +164,7 @@
i10 = int_add(i0, 73)
i11 = int_add(i1, 73)
""")
- pack1 = self.pack(loop1, 0, 2)
+ pack1 = self.pack(loop1, 0, 2, I64, I64)
loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
loop3 = self.parse("""
v10[i64|2] = vec_box(2)
@@ -155,7 +180,7 @@
f10 = float_add(f0, 73.0)
f11 = float_add(f1, 73.0)
""")
- pack1 = self.pack(loop1, 0, 2)
+ pack1 = self.pack(loop1, 0, 2, I64, I64)
loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
loop3 = self.parse("""
v10[f64|2] = vec_box(2)
@@ -174,8 +199,8 @@
f12 = float_add(f10, f5)
f13 = float_add(f11, f5)
""")
- pack1 = self.pack(loop1, 0, 2)
- pack2 = self.pack(loop1, 2, 4)
+ pack1 = self.pack(loop1, 0, 2, F64, F64)
+ pack2 = self.pack(loop1, 2, 4, F64, F64)
loop2 = self.schedule(loop1, [pack1, pack2], prepend_invariant=True)
loop3 = self.parse("""
v10[f64|2] = vec_box(2)
@@ -199,7 +224,7 @@
i10 = int_signext(i1, 4)
i11 = int_signext(i1, 4)
""", additional_args=['v10[i64|2]'])
- pack1 = self.pack(loop1, 0, 2)
+ pack1 = self.pack(loop1, 0, 2, I64, I32_2)
var = self.find_input_arg('v10', loop1)
def i1inv103204(v):
return 0, var
@@ -250,10 +275,11 @@
raw_store(p1, i7, i24, descr=short)
raw_store(p1, i8, i25, descr=short)
""")
- pack1 = self.pack(loop1, 0, 8)
- pack2 = self.pack(loop1, 8, 16)
- pack3 = self.pack(loop1, 16, 24)
- pack4 = self.pack(loop1, 24, 32)
+ pack1 = self.pack(loop1, 0, 8, None, I64)
+ pack2 = self.pack(loop1, 8, 16, I64, I32_2)
+ I16_2 = PackType('i',2,True,2)
+ pack3 = self.pack(loop1, 16, 24, I32, I16_2)
+ pack4 = self.pack(loop1, 24, 32, I16, None)
def void(b,c):
pass
loop2 = self.schedule(loop1, [pack1,pack2,pack3,pack4],
@@ -297,9 +323,9 @@
raw_store(p1, i3, i12, descr=float)
raw_store(p1, i4, i13, descr=float)
""")
- pack1 = self.pack(loop1, 0, 4)
- pack2 = self.pack(loop1, 4, 8)
- pack3 = self.pack(loop1, 8, 12)
+ pack1 = self.pack(loop1, 0, 4, None, I64)
+ pack2 = self.pack(loop1, 4, 8, I64, I32_2)
+ pack3 = self.pack(loop1, 8, 12, I32, None)
loop2 = self.schedule(loop1, [pack1,pack2,pack3])
loop3 = self.parse("""
v44[f64|2] = vec_raw_load(p0, i1, 2, descr=double)
@@ -322,9 +348,9 @@
guard_true(i12) []
guard_true(i13) []
""")
- pack1 = self.pack(loop1, 0, 2)
- pack2 = self.pack(loop1, 2, 4)
- pack3 = self.pack(loop1, 4, 6)
+ pack1 = self.pack(loop1, 0, 2, None, I64)
+ pack2 = self.pack(loop1, 2, 4, I64, I64)
+ pack3 = self.pack(loop1, 4, 6, None, I64)
loop2 = self.schedule(loop1, [pack1,pack2,pack3],
prepend_invariant=True)
loop3 = self.parse("""
v9[i64|2] = vec_int_expand(255)
@@ -342,8 +368,8 @@
raw_store(p0, i3, i10, descr=float)
raw_store(p0, i4, i11, descr=float)
""")
- pack1 = self.pack(loop1, 0, 2)
- pack2 = self.pack(loop1, 2, 4)
+ pack1 = self.pack(loop1, 0, 2, None, I32_2)
+ pack2 = self.pack(loop1, 2, 4, I32_2, None)
loop2 = self.schedule(loop1, [pack1,pack2], prepend_invariant=True)
loop3 = self.parse("""
v1[ui32|2] = vec_raw_load(p0, i1, 2, descr=float)
@@ -361,7 +387,7 @@
i10 = int_and(255, i1)
i11 = int_and(255, i1)
""")
- pack1 = self.pack(loop1, 0, 2)
+ pack1 = self.pack(loop1, 0, 2, I64, I64)
loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
loop3 = self.parse("""
v1[i64|2] = vec_int_expand(255)
@@ -375,7 +401,7 @@
i10 = int_and(255, i1)
i11 = int_and(255, i1)
""")
- pack1 = self.pack(loop1, 0, 2)
+ pack1 = self.pack(loop1, 0, 2, I64, I64)
loop2 = self.schedule(loop1, [pack1], prepend_invariant=True)
loop3 = self.parse("""
v1[i64|2] = vec_int_expand(255)
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -408,6 +408,7 @@
left = pack1.operations[0]
if left in remove_left:
remove_left[left] = pack1
+ pack1.clear()
del self.packset.packs[i]
end_ij -= 1
continue
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit