Author: Richard Plangger <[email protected]>
Branch: vecopt-merge
Changeset: r79319:75f6354522df
Date: 2015-08-31 14:33 +0200
http://bitbucket.org/pypy/pypy/changeset/75f6354522df/
Log: resolving some issues introduced by the simpler combination and
separate splitting phase, needs to remove packs that are not full
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -240,6 +240,11 @@
def getcount(self):
return self.count
+ def pack_byte_size(self, pack):
+ if len(pack.operations) == 0:
+ return 0
+ return self.getsize() * pack.opcount()
+
PT_GENERIC = PackType(PackType.UNKNOWN_TYPE, -1, False)
PT_FLOAT_2 = PackType(FLOAT, 4, False, 2)
@@ -873,11 +878,43 @@
assert not isinstance(box, BoxVector)
self.box_to_vbox[box] = (off, vector)
+def opcount_filling_vector_register(pack, vec_reg_size):
+ """ how many operations of that kind can one execute
+ with a machine instruction of register size X?
+ """
+ pack_type = pack.input_type
+ if pack_type is None:
+ pack_type = pack.output_type # load operations
+
+ op = pack.leftmost()
+ if op.casts_box():
+ count = pack_type.getcount()
+ return count
+
+ count = vec_reg_size // pack_type.getsize()
+ return count
+
+def maximum_byte_size(pack, vec_reg_size):
+ """ The maxmum size in bytes the operation is able to
+ process with the hardware register and the operation
+ semantics.
+ """
+ op = pack.leftmost()
+ if op.casts_box():
+ # casting is special, often only takes a half full vector
+ pack_type = pack.input_type
+ if pack_type is None:
+ pack_type = self.output_type # load operations
+ return pack_type.byte_size()
+ return vec_reg_size
+
class Pack(object):
""" A pack is a set of n statements that are:
* isomorphic
* independent
"""
+ FULL = 0
+
def __init__(self, ops, input_type, output_type):
self.operations = ops
self.accum = None
@@ -899,30 +936,43 @@
ptype = self.output_type
return ptype
- def pack_byte_size(self):
- return self.pack_type().getsize() * self.opcount()
+ def input_byte_size(self):
+ """ The amount of bytes the operations need with the current
+ entries in self.operations. E.g. cast_singlefloat_to_float
+ takes only #2 operations.
+ """
+ return self._byte_size(self.input_type)
+
+ def output_byte_size(self):
+ """ The amount of bytes the operations need with the current
+ entries in self.operations. E.g. vec_load(..., descr=short)
+ with 10 operations returns 20
+ """
+ return self._byte_size(self.output_type)
+
+ def pack_load(self, vec_reg_size):
+ """ Returns the load of the pack. A value
+ smaller than 0 indicates that it is empty
+ or nearly empty, zero indicates that all slots
+ are used and > 0 indicates that too many operations
+ are in this pack instance.
+ """
+ if len(self.operations) == 0:
+ return -1
+ size = maximum_byte_size(self, vec_reg_size)
+ if self.input_type is None:
+ # e.g. load operations
+ return self.output_type.pack_byte_size(self) - size
+ # default only consider the input type
+ # e.g. store operations, int_add, ...
+ return self.input_type.pack_byte_size(self) - size
+
def is_full(self, vec_reg_size):
""" If one input element times the opcount is equal
to the vector register size, we are full!
"""
- ptype = self.pack_type()
- op = self.leftmost()
- if op.casts_box():
- cur_bytes = ptype.getsize() * self.opcount()
- max_bytes = self.input_type.byte_size()
- assert cur_bytes <= max_bytes
- return cur_bytes == max_bytes
-
- bytes = self.pack_byte_size()
- assert bytes <= vec_reg_size
- if bytes == vec_reg_size:
- return True
- if ptype.getcount() != -1:
- size = ptype.getcount() * ptype.getsize()
- assert bytes <= size
- return bytes == size
- return False
+ return self.pack_load(vec_reg_size) == Pack.FULL
def opnum(self):
assert len(self.operations) > 0
@@ -930,9 +980,8 @@
def clear(self):
for node in self.operations:
- if node.pack is not self:
- node.pack = None
- node.pack_position = -1
+ node.pack = None
+ node.pack_position = -1
def update_pack_of_nodes(self):
for i,node in enumerate(self.operations):
@@ -945,19 +994,28 @@
vector register.
"""
pack = self
- pack_type = self.pack_type()
- max_count = vec_reg_size // pack_type.getsize()
- assert max_count * pack_type.getsize() == vec_reg_size
- while pack.pack_byte_size() > vec_reg_size:
- assert max_count > 0
- newpack = pack.clone()
- oplist = pack.operations[:max_count]
- newpack.operations = pack.operations[max_count:]
+ while pack.pack_load(vec_reg_size) > Pack.FULL:
+ pack.clear()
+ oplist, newoplist = pack.slice_operations(vec_reg_size)
pack.operations = oplist
pack.update_pack_of_nodes()
- newpack.update_pack_of_nodes()
- pack = newpack
- packlist.append(newpack)
+ assert pack.is_full(vec_reg_size)
+ #
+ newpack = pack.clone(newoplist)
+ load = newpack.pack_load(vec_reg_size)
+ if load >= Pack.FULL:
+ pack = newpack
+ packlist.append(newpack)
+ else:
+ newpack.clear()
+
+ def slice_operations(self, vec_reg_size):
+ count = opcount_filling_vector_register(self, vec_reg_size)
+ newoplist = self.operations[count:]
+ oplist = self.operations[:count]
+ assert len(newoplist) + len(oplist) == len(self.operations)
+ assert len(newoplist) != 0
+ return oplist, newoplist
def rightmost_match_leftmost(self, other):
""" Check if pack A can be combined with pack B """
@@ -974,14 +1032,16 @@
return rightmost is leftmost and accum
def __repr__(self):
+ if len(self.operations) == 0:
+ return "Pack(-, [])"
opname = self.operations[0].getoperation().getopname()
return "Pack(%s,%r)" % (opname, self.operations)
def is_accumulating(self):
return self.accum is not None
- def clone(self):
- cloned = Pack(self.operations, self.input_type, self.output_type)
+ def clone(self, oplist):
+ cloned = Pack(oplist, self.input_type, self.output_type)
cloned.accum = self.accum
return cloned
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -452,3 +452,19 @@
v9[i64|2] = vec_int_and(v4[i64|2], v8[i64|2])
""", False)
self.assert_equal(loop2, loop3)
+
+ def test_split_cast(self):
+ trace = self.parse("""
+ f10 = cast_int_to_float(i1)
+ f11 = cast_int_to_float(i2)
+ f12 = cast_int_to_float(i3)
+ f13 = cast_int_to_float(i4)
+ """)
+ pack = self.pack(trace, 0, 4, I64, F32)
+ packs = []
+ pack.split(packs, 16)
+ packs.append(pack)
+ assert len(packs) == 2
+
+
+
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -1427,5 +1427,51 @@
opt = self.schedule(self.parse_loop(trace))
self.debug_print_operations(opt.loop)
+ def test_arraylen(self):
+ trace = """
+ [i45, i33, p40]
+ # while i < len(l):
+ # LOAD_FAST i
+ # LOAD_GLOBAL len
+ guard_not_invalidated(descr=<Guard0x7f82c00c3518>) [i33,p40]
+ # LOAD_FAST l
+ # CALL_FUNCTION 1
+ # COMPARE_OP <
+ i50 = int_lt(i45, i33)
+ guard_true(i50) [i50,i33,p40]
+ # POP_JUMP_IF_FALSE 70
+ # l[i] = l[i] + 1
+ # LOAD_FAST l
+ # LOAD_FAST i
+ # BINARY_SUBSCR
+ i51 = uint_ge(i45, i33)
+ guard_false(i51) [i50, i45]
+ i52 = getarrayitem_gc(p40, i45, descr=intarraydescr)
+ # LOAD_CONST 1
+ # BINARY_ADD
+ i53 = int_add(i52, 1)
+ #guard_no_overflow(descr=<Guard0x7f82c00c33b8>) []
+ # LOAD_FAST l
+ # LOAD_FAST i
+ # STORE_SUBSCR
+ setarrayitem_gc(p40, i45, i53, descr=intarraydescr)
+ # i += 1
+ # LOAD_FAST i
+ # LOAD_CONST 1
+ # INPLACE_ADD
+ i54 = int_add(i45,1)
+ # STORE_FAST i
+ # JUMP_ABSOLUTE 21
+ #getfield_raw_i(140199654614400, descr=<FieldS
pypysig_long_struct.c_value 0>)
+ #None = i55 < 0
+ #guard(i56 is false)
+ # LOAD_FAST i
+ #i34 = arraylen_gc(p40, descr=<ArrayS 8>)
+ jump(i54, i33, p40)
+ """
+ opt = self.vectorize(self.parse_loop(trace))
+ self.debug_print_operations(opt.loop)
+
+
class TestLLtype(BaseTestVectorize, LLtypeMixin):
pass
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -133,10 +133,6 @@
return False
-def cmp_pack_lt(a,b):
- return a.left.getindex() < b.left.getindex()
-packsort = listsort.make_timsort_class(lt=cmp_pack_lt)
-
class VectorizingOptimizer(Optimizer):
""" Try to unroll the loop and find instructions to group """
@@ -401,13 +397,6 @@
"""
if len(self.packset.packs) == 0:
raise NotAVectorizeableLoop()
- #packsort(self.packset.packs).sort()
- #if not we_are_translated():
- # # ensure we are really sorted!
- # x = 0
- # for i,pack in enumerate(self.packset.packs):
- # assert x <= pack.left.getindex()
- # x = pack.left.getindex()
i = 0
j = 0
end_ij = len(self.packset.packs)
@@ -423,33 +412,6 @@
continue
pack1 = self.packset.packs[i]
pack2 = self.packset.packs[j]
- # remove intermediate
- left = pack1.operations[0]
- #if left in orphan:
- # # a pack was filled, thus the rhs was put
- # # into the orphan map.
- # if orphan[left] is False:
- # # this pack might be redundant if pack1.right
- # # is the at the left position in another pack
- # assert pack1.opcount() == 2
- # right = pack1.operations[1]
- # orphan[right] = True
- # pack1.clear()
- # del self.packset.packs[i]
- # end_ij -= 1
- # continue
- # else:
- # # left is not an orphan, this pack proves that
- # # there might be more packs
- # del orphan[left]
- # check if the pack is already full
- #if pack1.is_full(self.cpu.vector_register_size):
- # right = pack1.operations[-1]
- # # False indicates that the next pair might not
- # # be needed, because left is already computed
- # # in another set
- # orphan[right] = False
- # break
if pack1.rightmost_match_leftmost(pack2):
end_ij = self.packset.combine(i,j)
else:
@@ -460,11 +422,14 @@
j = 0
if len_before == len(self.packset.packs):
break
+ newpacks = []
+ vec_reg_size = self.cpu.vector_register_size
for pack in self.packset.packs:
- if pack.pack_byte_size() > self.cpu.vector_register_size:
- pack.split(self.packset.packs, self.cpu.vector_register_size)
- else:
- pack.update_pack_of_nodes()
+ if pack.pack_load(vec_reg_size) > Pack.FULL:
+ pack.split(newpacks, vec_reg_size)
+ continue
+ pack.update_pack_of_nodes()
+ self.packset.packs.extend(newpacks)
if not we_are_translated():
# some test cases check the accumulation variables
@@ -483,9 +448,10 @@
if accum:
self.packset.accum_vars[accum.var] = accum.pos
- print " %dx %s (accum? %d) " % (len(pack.operations),
-
pack.operations[0].op.getopname(),
- accum is not None)
+ print " %dx %s " % (len(pack.operations),
+ pack.operations[0].op.getopname())
+ if accum:
+ print " accumulates!"
if fail:
assert False
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit