Author: Richard Plangger <[email protected]>
Branch: vecopt
Changeset: r78322:7419dfb817a7
Date: 2015-06-26 14:48 +0200
http://bitbucket.org/pypy/pypy/changeset/7419dfb817a7/
Log: tyring to make things more easier, complexity gets hard to manage if
extending the accumulation. trying to prevent the splitting entering
the scheduling (work in progress)
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -188,6 +188,9 @@
def new_vector_box(self, count = -1):
if count == -1:
count = self.count
+ assert count > 1
+ assert self.type in ('i','f')
+ assert self.size > 0
return BoxVector(self.type, count, self.size, self.signed)
def __repr__(self):
@@ -291,29 +294,29 @@
pass
def transform_pack(self):
- self.off = 0
- while self.off < self.pack.opcount():
- op = self.pack.operations[self.off].getoperation()
- args = op.getarglist()
- #
- self.before_argument_transform(args)
- #
- argument_infos = []
- self.transform_arguments(args, argument_infos)
- #
- result = op.result
- result = self.transform_result(result)
- #
- vop = ResOperation(op.vector, args, result, op.getdescr())
- if op.is_guard():
- assert isinstance(op, GuardResOp)
- vop.setfailargs(op.getfailargs())
- vop.rd_snapshot = op.rd_snapshot
- self.preamble_ops.append(vop)
- stride = self.consumed_operations(argument_infos, result)
- self.costmodel.record_pack_savings(self.pack, stride)
- assert stride != 0
- self.off += stride
+ #self.off = 0
+ #while self.off < self.pack.opcount():
+ op = self.pack.operations[0].getoperation()
+ args = op.getarglist()
+ #
+ self.before_argument_transform(args)
+ #
+ argument_infos = []
+ self.transform_arguments(args, argument_infos)
+ #
+ result = op.result
+ result = self.transform_result(result)
+ #
+ vop = ResOperation(op.vector, args, result, op.getdescr())
+ if op.is_guard():
+ assert isinstance(op, GuardResOp)
+ vop.setfailargs(op.getfailargs())
+ vop.rd_snapshot = op.rd_snapshot
+ self.preamble_ops.append(vop)
+ #stride = self.consumed_operations(argument_infos, result)
+ self.costmodel.record_pack_savings(self.pack, self.pack.opcount())
+ #assert stride != 0
+ #self.off += stride
def consumed_operations(self, argument_infos, result):
ops = self.getoperations()
@@ -348,7 +351,7 @@
return BoxVector(type, count, size, signed)
def getoperations(self):
- return self.pack.operations[self.off:]
+ return self.pack.operations
def transform_arguments(self, args, argument_info):
for i,arg in enumerate(args):
@@ -406,16 +409,14 @@
argument_info.append(args[i].item_count)
def gather(self, vboxes, target_count): # packed < packable and packed <
stride:
- i = 0
(_, box) = vboxes[0]
+ i = 1
while i < len(vboxes):
- if i+1 >= len(vboxes):
- break
- (box2_pos, box2) = vboxes[i+1]
+ (box2_pos, box2) = vboxes[i]
if box.getcount() + box2.getcount() <= target_count:
box = self.package(box, box.getcount(),
box2, box2_pos, box2.getcount())
- i += 2
+ i += 1
return box
pass
# OLD
@@ -453,8 +454,10 @@
def extend_int(self, vbox, newtype):
vbox_cloned = newtype.new_vector_box(vbox.item_count)
self.sched_data._prevent_signext(newtype.getsize(), vbox.getsize())
+ newsize = newtype.getsize()
+ assert newsize > 0
op = ResOperation(rop.VEC_INT_SIGNEXT,
- [vbox, ConstInt(newtype.getsize())],
+ [vbox, ConstInt(newsize)],
vbox_cloned)
self.costmodel.record_cast_int(vbox.getsize(), newtype.getsize(),
vbox.getcount())
self.preamble_ops.append(op)
@@ -618,6 +621,9 @@
if count * size > vec_reg_size:
count = vec_reg_size // size
signed = self.output_type.signed
+ assert type in ('i','f')
+ assert size > 0
+ assert count > 1
return BoxVector(type, count, size, signed)
class SignExtToVectorOp(OpToVectorOp):
@@ -625,15 +631,10 @@
OpToVectorOp.__init__(self, intype, outtype)
self.size = -1
- def split_pack(self, pack, vec_reg_size):
- op0 = pack.operations[0].getoperation()
- sizearg = op0.getarg(1)
+ def before_argument_transform(self, args):
+ sizearg = args[1]
assert isinstance(sizearg, ConstInt)
self.size = sizearg.value
- _, vbox = self.sched_data.getvector_of_box(op0.getarg(0))
- if vbox.getcount() * self.size > vec_reg_size:
- return vec_reg_size // self.size
- return vbox.getcount()
def new_result_vector_box(self):
type = self.output_type.gettype()
@@ -642,6 +643,9 @@
if count * self.size > vec_reg_size:
count = vec_reg_size // self.size
signed = self.input_type.signed
+ assert type in ('i','f')
+ assert self.size > 0
+ assert count > 1
return BoxVector(type, count, self.size, signed)
class LoadToVectorLoad(OpToVectorOp):
@@ -655,18 +659,12 @@
return PackType.by_descr(op.getdescr(), self.sched_data.vec_reg_size)
def before_argument_transform(self, args):
- args.append(ConstInt(len(self.pack.operations)))
+ count = min(self.output_type.getcount(), len(self.getoperations()))
+ args.append(ConstInt(count))
def getscalarsize(self):
return self.output_type.getsize()
- def new_result_vector_box(self):
- type = self.output_type.gettype()
- size = self.output_type.getsize()
- count = len(self.pack.operations)
- signed = self.output_type.signed
- return BoxVector(type, count, size, signed)
-
class StoreToVectorStore(OpToVectorOp):
"""
Storing operations are special because they are not allowed
@@ -846,6 +844,28 @@
def opcount(self):
return len(self.operations)
+ def process_count(self):
+ return len(self.operations)
+
+ def is_full(self, vec_reg_size):
+ """ if one input element times the opcount is equal
+ to the vector register size, we are full!
+ """
+ ptype = self.input_type
+ if self.input_type is None:
+ # load does not have an input type, but only an output type
+ assert self.operations[0].getoperation().is_raw_load()
+ ptype = self.output_type
+ bytes = ptype.getsize() * self.process_count()
+ assert bytes <= vec_reg_size
+ if bytes == vec_reg_size:
+ return True
+ if ptype.getcount() != -1:
+ size = ptype.getcount() * ptype.getsize()
+ assert bytes <= size
+ return bytes == size
+ return False
+
def opnum(self):
assert len(self.operations) > 0
return self.operations[0].getoperation().getopnum()
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -747,8 +747,8 @@
self.assert_packset_empty(vopt.packset, len(loop.operations),
[(6,12), (5,11), (7,13)])
- @pytest.mark.parametrize("descr", ['char','float','int','singlefloat'])
- def test_packset_combine_simple(self,descr):
+ @pytest.mark.parametrize("descr,size",
[('char',16),('float',2),('int',2),('singlefloat',4)])
+ def test_packset_combine_simple(self,descr,size):
ops = """
[p0,i0]
i3 = getarrayitem_raw(p0, i0, descr={descr}arraydescr)
@@ -758,18 +758,7 @@
loop = self.parse_loop(ops)
vopt = self.combine_packset(loop,3)
assert len(vopt.dependency_graph.memory_refs) == 4
- assert len(vopt.packset.packs) == 1
- self.assert_pack(vopt.packset.packs[0], (1,3,5,7))
- ops = """
- [p0,i0]
- i3 = getarrayitem_raw(p0, i0, descr={descr}arraydescr)
- i1 = int_add(i0,1)
- jump(p0,i1)
- """.format(descr=descr)
- loop = self.parse_loop(ops)
- vopt = self.combine_packset(loop,3)
- assert len(vopt.dependency_graph.memory_refs) == 4
- assert len(vopt.packset.packs) == 1
+ assert len(vopt.packset.packs) == 16 // size
self.assert_pack(vopt.packset.packs[0], (1,3,5,7))
@pytest.mark.parametrize("descr,stride",
@@ -786,7 +775,7 @@
loop = self.parse_loop(ops)
vopt = self.combine_packset(loop,3)
assert len(vopt.dependency_graph.memory_refs) == 8
- assert len(vopt.packset.packs) == 1
+ assert len(vopt.packset.packs) == (16//stride) * 2
self.assert_pack(vopt.packset.packs[0], (1,3,5,7,9,11,13,15))
def test_packset_combine_2_loads_one_redundant(self):
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -24,6 +24,7 @@
getunpackopnum, PackType, determine_output_type, determine_trans)
from rpython.jit.metainterp.optimizeopt.guard import GuardStrengthenOpt
from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp)
+from rpython.rlib import listsort
from rpython.rlib.objectmodel import we_are_translated
from rpython.rlib.debug import debug_print, debug_start, debug_stop
from rpython.rlib.jit import Counters
@@ -94,6 +95,9 @@
else:
raise
+def cmp_pack_lt(a,b):
+ return a.left.getindex() < b.left.getindex()
+packsort = listsort.make_timsort_class(lt=cmp_pack_lt)
class VectorizingOptimizer(Optimizer):
""" Try to unroll the loop and find instructions to group """
@@ -327,10 +331,13 @@
pack_count = self.packset.pack_count()
while True:
for pack in self.packset.packs:
- self.follow_use_defs(pack)
self.follow_def_uses(pack)
if pack_count == self.packset.pack_count():
- break
+ pack_count = self.packset.pack_count()
+ for pack in self.packset.packs:
+ self.follow_use_defs(pack)
+ if pack_count == self.packset.pack_count():
+ break
pack_count = self.packset.pack_count()
def follow_use_defs(self, pack):
@@ -371,6 +378,7 @@
raise NotAVectorizeableLoop()
i = 0
j = 0
+ packsort(self.packset.packs)
end_ij = len(self.packset.packs)
while True:
len_before = len(self.packset.packs)
@@ -381,6 +389,8 @@
j += 1
continue
pack1 = self.packset.packs[i]
+ if pack1.is_full(self.cpu.vector_register_size):
+ break
pack2 = self.packset.packs[j]
if pack1.rightmost_match_leftmost(pack2):
end_ij = self.packset.combine(i,j)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit