Author: Richard Plangger <[email protected]>
Branch: vecopt-merge
Changeset: r79320:5a31e0903f8d
Date: 2015-08-31 15:06 +0200
http://bitbucket.org/pypy/pypy/changeset/5a31e0903f8d/
Log: fixed the scheduling tests, two of them where wrong and execute
vector instructions on half filled vector registers
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -272,21 +272,12 @@
self.sched_data = sched_data
self.vecops = oplist
self.costmodel = sched_data.costmodel
- #
self.input_type = pack.input_type
self.output_type = pack.output_type
#
self.check_if_pack_supported(pack)
-
- #
- if self.must_be_full_but_is_not(pack):
- for op in pack.operations:
- operation = op.getoperation()
- self.sched_data.unpack_from_vector(operation, scheduler)
- self.vecops.append(operation)
- else:
- self.pack = pack
- self.transform_pack()
+ self.pack = pack
+ self.transform_pack()
#
self.pack = None
self.costmodel = None
@@ -295,9 +286,6 @@
self.input_type = None
self.output_type = None
- def must_be_full_but_is_not(self, pack):
- return False
-
def before_argument_transform(self, args):
pass
@@ -1008,6 +996,8 @@
packlist.append(newpack)
else:
newpack.clear()
+ newpack.operations = []
+ break
def slice_operations(self, vec_reg_size):
count = opcount_filling_vector_register(self, vec_reg_size)
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -24,8 +24,9 @@
I16 = PackType('i',2,True,8)
class FakePackSet(PackSet):
- def __init__(self):
- self.packs = None
+ def __init__(self, packs):
+ self.packs = packs
+ self.vec_reg_size = 16
class FakeDependencyGraph(DependencyGraph):
""" A dependency graph that is able to emit every instruction
@@ -115,8 +116,7 @@
pair = Pair(o1,o2,pack.input_type,pack.output_type)
pairs.append(pair)
- opt.packset = FakePackSet()
- opt.packset.packs = pairs
+ opt.packset = FakePackSet(pairs)
if not prepend_invariant:
def pio(oplist, labels):
@@ -149,7 +149,8 @@
loop2 = self.schedule(loop1, [pack1])
loop3 = self.parse("""
v10[i32|4] = vec_raw_load(p0, i0, 4, descr=float)
- v11[i32|2] = vec_raw_load(p0, i4, 2, descr=float)
+ f10 = raw_load(p0, i4, descr=float)
+ f11 = raw_load(p0, i5, descr=float)
""", False)
self.assert_equal(loop2, loop3)
@@ -379,17 +380,19 @@
loop1 = self.parse("""
i10 = raw_load(p0, i1, descr=float)
i11 = raw_load(p0, i2, descr=float)
+ i12 = raw_load(p0, i3, descr=float)
+ i13 = raw_load(p0, i4, descr=float)
raw_store(p0, i3, i10, descr=float)
raw_store(p0, i4, i11, descr=float)
""")
- pack1 = self.pack(loop1, 0, 2, None, I32_2)
- pack2 = self.pack(loop1, 2, 4, I32_2, None)
+ pack1 = self.pack(loop1, 0, 4, None, I32)
+ pack2 = self.pack(loop1, 4, 6, I32_2, None)
loop2 = self.schedule(loop1, [pack1,pack2], prepend_invariant=True)
loop3 = self.parse("""
- v1[i32|2] = vec_raw_load(p0, i1, 2, descr=float)
- i10 = vec_int_unpack(v1[i32|2], 0, 1)
+ v1[i32|4] = vec_raw_load(p0, i1, 4, descr=float)
+ i10 = vec_int_unpack(v1[i32|4], 0, 1)
raw_store(p0, i3, i10, descr=float)
- i11 = vec_int_unpack(v1[i32|2], 1, 1)
+ i11 = vec_int_unpack(v1[i32|4], 1, 1)
raw_store(p0, i4, i11, descr=float)
""", False)
# unfortunate ui32 is the type for float32... the unsigned u is for
@@ -466,5 +469,13 @@
packs.append(pack)
assert len(packs) == 2
+ def test_combine_packset_nearly_empty_pack(self):
+ trace = self.parse("""
+ i10 = int_add(i1, i3)
+ i11 = int_add(i2, i3)
+ """)
+ pack = self.pack(trace, 0, 2, I16, I16)
+ packset = FakePackSet([pack])
+ packset.split_overloaded_packs()
+ assert len(packset.packs) == 0
-
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -317,9 +317,7 @@
loop = self.loop
operations = loop.operations
- self.packset = PackSet(self.dependency_graph, operations,
- self.unroll_count, self.smallest_type_bytes,
- self.cpu)
+ self.packset = PackSet(self.cpu.vector_register_size)
graph = self.dependency_graph
memory_refs = graph.memory_refs.items()
# initialize the pack set
@@ -422,14 +420,8 @@
j = 0
if len_before == len(self.packset.packs):
break
- newpacks = []
- vec_reg_size = self.cpu.vector_register_size
- for pack in self.packset.packs:
- if pack.pack_load(vec_reg_size) > Pack.FULL:
- pack.split(newpacks, vec_reg_size)
- continue
- pack.update_pack_of_nodes()
- self.packset.packs.extend(newpacks)
+
+ self.packset.split_overloaded_packs()
if not we_are_translated():
# some test cases check the accumulation variables
@@ -700,15 +692,10 @@
return False
class PackSet(object):
- def __init__(self, dependency_graph, operations, unroll_count,
- smallest_type_bytes, cpu):
+ _attrs_ = ('packs', 'vec_reg_size')
+ def __init__(self, vec_reg_size):
self.packs = []
- self.dependency_graph = dependency_graph
- self.operations = operations
- self.unroll_count = unroll_count
- self.smallest_type_bytes = smallest_type_bytes
- self.cpu = cpu
- self.vec_reg_size = self.cpu.vector_register_size
+ self.vec_reg_size = vec_reg_size
def pack_count(self):
return len(self.packs)
@@ -898,3 +885,17 @@
sched_data.setvector_of_box(accum.getoriginalbox(), 0, result) #
prevent it from expansion
renamer.start_renaming(accum.getoriginalbox(), result)
+ def split_overloaded_packs(self):
+ newpacks = []
+ for i,pack in enumerate(self.packs):
+ load = pack.pack_load(self.vec_reg_size)
+ if load > Pack.FULL:
+ pack.split(newpacks, self.vec_reg_size)
+ continue
+ if load < Pack.FULL:
+ pack.clear()
+ self.packs[i] = None
+ continue
+ pack.update_pack_of_nodes()
+ self.packs = [pack for pack in self.packs + newpacks if pack]
+
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit