Author: Richard Plangger <[email protected]>
Branch: vecopt
Changeset: r78696:08d59f3ff88c
Date: 2015-07-28 18:45 +0200
http://bitbucket.org/pypy/pypy/changeset/08d59f3ff88c/
Log: all but 2 vectoriztion tests passing again. the scheduling that
prefers pure operations messes up these test cases
diff --git a/rpython/jit/backend/x86/vector_ext.py
b/rpython/jit/backend/x86/vector_ext.py
--- a/rpython/jit/backend/x86/vector_ext.py
+++ b/rpython/jit/backend/x86/vector_ext.py
@@ -11,6 +11,7 @@
xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14,
X86_64_SCRATCH_REG, X86_64_XMM_SCRATCH_REG, AddressLoc)
from rpython.jit.backend.llsupport.regalloc import (get_scale,
valid_addressing_size)
+from rpython.jit.metainterp.resoperation import rop, ResOperation
from rpython.rlib.objectmodel import we_are_translated
from rpython.rtyper.lltypesystem.lloperation import llop
from rpython.rtyper.lltypesystem import lltype
@@ -170,19 +171,23 @@
self.mc.MOVUPD(dest_loc, value_loc)
def genop_vec_int_is_true(self, op, arglocs, resloc):
- loc, size = arglocs
+ loc, sizeloc = arglocs
temp = X86_64_XMM_SCRATCH_REG
self.mc.PXOR(temp, temp)
# every entry that is non zero -> becomes zero
# zero entries become ones
- self.mc.PCMPEQ(loc, temp, size)
+ self.mc.PCMPEQ(loc, temp, sizeloc.value)
# a second time -> every zero entry (corresponding to non zero
# entries before) become ones
- self.mc.PCMPEQ(loc, temp, size)
+ self.mc.PCMPEQ(loc, temp, sizeloc.value)
def genop_guard_vec_int_is_true(self, op, guard_op, guard_token, arglocs,
resloc):
self._guard_vector_true(op, arglocs[0])
- self.implement_guard(guard_token, 'NZ')
+ guard_opnum = guard_op.getopnum()
+ if guard_opnum == rop.GUARD_TRUE:
+ self.implement_guard(guard_token, 'NZ')
+ else:
+ self.implement_guard(guard_token, 'Z')
def genop_vec_int_mul(self, op, arglocs, resloc):
loc0, loc1, itemsize_loc = arglocs
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_costmodel.py
@@ -4,7 +4,7 @@
from rpython.jit.metainterp.optimizeopt.util import equaloplists
from rpython.jit.metainterp.optimizeopt.vectorize import (VecScheduleData,
Pack, NotAProfitableLoop, VectorizingOptimizer)
-from rpython.jit.metainterp.optimizeopt.dependency import Node
+from rpython.jit.metainterp.optimizeopt.dependency import Node, DependencyGraph
from rpython.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin
from rpython.jit.metainterp.optimizeopt.test.test_schedule import
SchedulerBaseTest
from rpython.jit.metainterp.optimizeopt.test.test_vectorize import
(FakeMetaInterpStaticData,
@@ -35,8 +35,7 @@
metainterp_sd = FakeMetaInterpStaticData(self.cpu)
jitdriver_sd = FakeJitDriverStaticData()
opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, [])
- opt.build_dependency_graph()
- graph = opt.dependency_graph
+ graph = opt.dependency_graph = DependencyGraph(loop)
for k,m in graph.memory_refs.items():
graph.memory_refs[k] = FakeMemoryRef(m.array, m.index_var)
opt.find_adjacent_memory_refs()
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -82,7 +82,7 @@
opt.loop.operations = opt.get_newoperations()
self.debug_print_operations(opt.loop)
opt.clear_newoperations()
- opt.build_dependency_graph()
+ opt.dependency_graph = DependencyGraph(loop)
self.last_graph = opt.dependency_graph
self.show_dot_graph(self.last_graph, self.test_name)
return opt
@@ -278,20 +278,20 @@
"""
opt_ops = """
[p0,p1,p2,i0]
+ i4 = int_add(i0, 1)
+ i5 = int_le(i4, 10)
+ guard_true(i5) []
i1 = raw_load(p1, i0, descr=floatarraydescr)
i2 = raw_load(p2, i0, descr=floatarraydescr)
i3 = int_add(i1,i2)
raw_store(p0, i0, i3, descr=floatarraydescr)
- i4 = int_add(i0, 1)
- i5 = int_le(i4, 10)
- guard_true(i5) []
+ i9 = int_add(i4, 1)
+ i10 = int_le(i9, 10)
+ guard_true(i10) []
i6 = raw_load(p1, i4, descr=floatarraydescr)
i7 = raw_load(p2, i4, descr=floatarraydescr)
i8 = int_add(i6,i7)
raw_store(p0, i4, i8, descr=floatarraydescr)
- i9 = int_add(i4, 1)
- i10 = int_le(i9, 10)
- guard_true(i10) []
jump(p0,p1,p2,i9)
"""
self.assert_unroll_loop_equals(self.parse_loop(ops),
self.parse_loop(opt_ops), 1)
@@ -334,8 +334,8 @@
i4 = raw_load(p0,i1,descr=chararraydescr)
jump(p0,i3,i4)
"""
- vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
- vopt.build_dependency_graph()
+ loop = self.parse_loop(ops)
+ vopt = self.vectoroptimizer_unrolled(loop,0)
assert len(vopt.dependency_graph.memory_refs) == 2
self.assert_has_memory_ref_at(1)
self.assert_has_memory_ref_at(2)
@@ -571,7 +571,7 @@
"""
vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
vopt.find_adjacent_memory_refs()
- mref = self.getmemref(3)
+ mref = self.getmemref(5)
mref2 = self.getmemref(6)
self.assert_memory_ref_not_adjacent(mref, mref2)
@@ -591,7 +591,7 @@
"""
vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
vopt.find_adjacent_memory_refs()
- mref = self.getmemref(3)
+ mref = self.getmemref(6)
mref2 = self.getmemref(7)
self.assert_memory_ref_not_adjacent(mref, mref2)
@@ -611,7 +611,7 @@
"""
vopt = self.vectoroptimizer_unrolled(self.parse_loop(ops),0)
vopt.find_adjacent_memory_refs()
- mref = self.getmemref(3)
+ mref = self.getmemref(6)
mref2 = self.getmemref(7)
self.assert_memory_ref_not_adjacent(mref, mref2)
@@ -628,7 +628,7 @@
"""
loop = self.parse_loop(ops)
vopt = self.init_packset(loop,1)
- self.assert_independent(1,5)
+ self.assert_independent(4,8)
assert vopt.packset is not None
assert len(vopt.dependency_graph.memory_refs) == 2
assert len(vopt.packset.packs) == 1
@@ -748,18 +748,18 @@
loop = self.parse_loop(ops)
vopt = self.extend_packset(loop,1)
assert len(vopt.dependency_graph.memory_refs) == 4
+ self.assert_independent(4,10)
self.assert_independent(5,11)
self.assert_independent(6,12)
- self.assert_independent(7,13)
assert len(vopt.packset.packs) == 3
self.assert_packset_empty(vopt.packset, len(loop.operations),
- [(6,12), (5,11), (7,13)])
+ [(6,12), (5,11), (4,10)])
@pytest.mark.parametrize("descr,packs,packidx",
- [('char',1, [(0,(1,3,5,7))]),
- ('float',2, [(0,(1,3)),(1,(5,7))]),
- ('int',2, [(0,(1,3)),(1,(5,7))]),
- ('singlefloat',1,[(0,(1,3,5,7))])])
+ [('char',1, [(0,(2,4,6,8))]),
+ ('float',2, [(0,(2,4)),(1,(6,8))]),
+ ('int',2, [(0,(2,4)),(1,(6,8))]),
+ ('singlefloat',1,[(0,(2,4,6,8))])])
def test_packset_combine_simple(self,descr,packs,packidx):
ops = """
[p0,i0]
@@ -849,7 +849,7 @@
assert len(vopt.packset.packs) == 4
for opindices in [(5,12,19,26),(6,13,20,27),
- (7,14,21,28),(8,15,22,29)]:
+ (7,14,21,28),(4,11,18,25)]:
self.assert_has_pack_with(vopt.packset, opindices)
@pytest.mark.parametrize('op,descr,stride',
@@ -874,7 +874,6 @@
""".format(op=op,descr=descr,stride=1) # stride getarray is always 1
vops = """
[p0,p1,p2,i0]
- guard_early_exit() []
i10 = int_le(i0, 128)
guard_true(i10) []
i1 = int_add(i0, {stride})
@@ -907,7 +906,6 @@
"""
opt="""
[i0, i1, i2, i3, i4]
- guard_early_exit() []
i11 = int_add(i0, 1)
i6 = int_mul(i0, 8)
i12 = int_lt(i11, i1)
@@ -941,7 +939,6 @@
for i in range(0,14)])
opt="""
[p0,i0]
- guard_early_exit() [p0,i0]
i200 = int_add(i0, 1)
i400 = int_lt(i200, 102)
i2 = int_add(i0, 16)
@@ -989,7 +986,6 @@
[p0,i0]
v3 = vec_int_expand(42)
label(p0,i0,v3)
- guard_early_exit() [p0,i0]
i20 = int_add(i0, 1)
i30 = int_lt(i20, 10)
i2 = int_add(i0, 2)
@@ -1019,7 +1015,6 @@
[p0,i0,f3]
v3 = vec_float_expand(f3)
label(p0,i0,f3,v3)
- guard_early_exit() [p0,i0]
i20 = int_add(i0, 1)
i30 = int_lt(i20, 10)
i2 = int_add(i0, 2)
@@ -1047,7 +1042,6 @@
"""
trace_opt = """
[p0, i0, v2[f64|2]]
- guard_early_exit() [p0, i0, v2[f64|2]]
i1 = int_add(i0, 16)
i2 = int_lt(i1, 100)
guard_false(i2) [p0, i0, v[f64|2]]
@@ -1103,7 +1097,6 @@
opt = """
[p36, i28, p9, i37, p14, f34, p12, p38, f35, p39, i40, i41, p42, i43,
i44, i21, i4, i0, i18]
guard_not_invalidated() [p38, p12, p9, p14, p39, i37, i44, f35, i40,
p42, i43, f34, i28, p36, i41]
- guard_early_exit() [p38, p12, p9, p14, p39, i37, i44, f35, i40, p42,
i43, f34, i28, p36, i41]
i50 = int_add(i28, 1)
i46 = int_add(i44, 8)
i48 = int_add(i41, 8)
@@ -1142,7 +1135,6 @@
"""
opt = """
[p0, p1, i1]
- guard_early_exit() []
i3 = int_add(i1, 1)
i4 = int_ge(i3, 36)
i50 = int_add(i1, 4)
@@ -1184,7 +1176,6 @@
"""
opt = """
[p0, p1, p2, i0, i4]
- guard_early_exit() []
i5 = int_add(i4, 4)
i1 = int_add(i0, 4)
i186 = int_lt(i5, 100)
@@ -1219,39 +1210,6 @@
vopt = self.vectorize(self.parse_loop(ops))
self.assert_equal(vopt.loop, self.parse_loop(opt))
- def test_call_prohibits_vectorization(self):
- # think about this
- py.test.skip("")
- ops = """
- [p31, i32, p3, i33, f10, p24, p34, p35, i19, p5, i36, p37, i28, f13,
i29, i15]
- guard_early_exit() [p5,p37,p34,p3,p24,i32,p35,i36,i33,f10,p31,i19]
- f38 = raw_load(i28, i33, descr=floatarraydescr)
-
guard_not_invalidated()[p5,p37,p34,p3,p24,f38,i32,p35,i36,i33,None,p31,i19]
- i39 = int_add(i33, 8)
- f40 = float_mul(f38, 0.0)
- i41 = float_eq(f40, f40)
- guard_true(i41)
[p5,p37,p34,p3,p24,f13,f38,i39,i32,p35,i36,None,None,p31,i19]
- f42 = call(111, f38, f13, descr=writeadescr)
- i43 = call(222, 333, descr=writeadescr)
- f44 = float_mul(f42, 0.0)
- i45 = float_eq(f44, f44)
- guard_true(i45)
[p5,p37,p34,p3,p24,f13,f38,i43,f42,i39,i32,p35,i36,None,None,p31,i19]
- i46 = int_is_true(i43)
- guard_false(i46)
[p5,p37,p34,p3,p24,f13,f38,i43,f42,i39,i32,p35,i36,None,None,p31,i19]
- raw_store(i29, i36, f42, descr=floatarraydescr)
- i47 = int_add(i19, 1)
- i48 = int_add(i36, 8)
- i49 = int_ge(i47, i15)
- guard_false(i49)
[p5,p37,p34,p3,p24,i47,f38,i48,i39,i32,p35,None,None,None,p31,None]
- jump(p31, i32, p3, i39, f38, p24, p34, p35, i47, p5, i48, p37, i28,
f13, i29, i15)
- """
- try:
- vopt = self.vectorize(self.parse_loop(ops))
- self.debug_print_operations(vopt.loop)
- py.test.fail("this loop should not be vectorized")
- except NotAVectorizeableLoop:
- pass
-
def test_truediv_abs_neg_float(self):
ops = """
[f9,p10,i11,p4,i12,p2,p5,p13,i14,p7,i15,p8,i16,f17,i18,i19]
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -229,18 +229,17 @@
# that are needed to resume.
if copied_op.is_guard():
assert isinstance(copied_op, GuardResOp)
- target_guard = copied_op
descr = copied_op.getdescr()
- assert isinstance(descr, ResumeGuardDescr)
- copied_op.setdescr(descr.clone())
- descr = target_guard.getdescr()
- # copy failargs/snapshot
- copied_op.rd_snapshot = \
- renamer.rename_rd_snapshot(copied_op.rd_snapshot,
- clone=True)
- renamed_failargs = \
- renamer.rename_failargs(copied_op, clone=True)
- copied_op.setfailargs(renamed_failargs)
+ if descr:
+ assert isinstance(descr, ResumeGuardDescr)
+ copied_op.setdescr(descr.clone())
+ # copy failargs/snapshot
+ copied_op.rd_snapshot = \
+ renamer.rename_rd_snapshot(copied_op.rd_snapshot,
+ clone=True)
+ renamed_failargs = \
+ renamer.rename_failargs(copied_op, clone=True)
+ copied_op.setfailargs(renamed_failargs)
#
self.emit_unrolled_operation(copied_op)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit