Author: Richard Plangger <[email protected]>
Branch: vecopt
Changeset: r77974:66758cffe3af
Date: 2015-06-09 09:34 +0200
http://bitbucket.org/pypy/pypy/changeset/66758cffe3af/
Log: generating vector box for accumulation before the label and renaming
occurances
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -88,6 +88,49 @@
node.clear_dependencies()
node.emitted = True
+def vectorbox_outof_box(box, count=-1, size=-1, type='-', clone_signed=True,
signed=False):
+ if box.type not in (FLOAT, INT):
+ raise AssertionError("cannot create vector box of type %s" %
(box.type))
+ signed = True
+ if box.type == FLOAT:
+ signed = False
+ return BoxVector(box.type, 2, 8, signed)
+
+def vectorbox_clone_set(box, count=-1, size=-1, type='-', clone_signed=True,
signed=False):
+ if count == -1:
+ count = box.item_count
+ if size == -1:
+ size = box.item_size
+ if type == '-':
+ type = box.item_type
+ if clone_signed:
+ signed = box.item_signed
+ return BoxVector(type, count, size, signed)
+
+def getpackopnum(type):
+ if type == INT:
+ return rop.VEC_INT_PACK
+ elif type == FLOAT:
+ return rop.VEC_FLOAT_PACK
+ #
+ raise AssertionError("getpackopnum type %s not supported" % (type,))
+
+def getunpackopnum(type):
+ if type == INT:
+ return rop.VEC_INT_UNPACK
+ elif type == FLOAT:
+ return rop.VEC_FLOAT_UNPACK
+ #
+ raise AssertionError("getunpackopnum type %s not supported" % (type,))
+
+def getexpandopnum(type):
+ if type == INT:
+ return rop.VEC_INT_EXPAND
+ elif type == FLOAT:
+ return rop.VEC_FLOAT_EXPAND
+ #
+ raise AssertionError("getexpandopnum type %s not supported" % (type,))
+
class PackType(object):
UNKNOWN_TYPE = '-'
@@ -163,9 +206,6 @@
self.input_type = None
self.output_type = None
- def clone_vbox_set_count(self, box, count):
- return BoxVector(box.item_type, count, box.item_size, box.item_signed)
-
def is_vector_arg(self, i):
if i < 0 or i >= len(self.arg_ptypes):
return False
@@ -321,10 +361,8 @@
return vbox_cloned
def unpack(self, vbox, index, count, arg_ptype):
- vbox_cloned = self.clone_vbox_set_count(vbox, count)
- opnum = rop.VEC_FLOAT_UNPACK
- if vbox.item_type == INT:
- opnum = rop.VEC_INT_UNPACK
+ vbox_cloned = vectorbox_clone_set(vbox, count=count)
+ opnum = getunpackopnum(vbox.item_type)
op = ResOperation(opnum, [vbox, ConstInt(index), ConstInt(count)],
vbox_cloned)
self.preamble_ops.append(op)
return vbox_cloned
@@ -336,9 +374,7 @@
this function creates a box pack instruction to merge them to:
v1/2 = [A,B,X,Y]
"""
- opnum = rop.VEC_FLOAT_PACK
- if tgt_box.item_type == INT:
- opnum = rop.VEC_INT_PACK
+ opnum = getpackopnum(tgt_box.item_type)
arg_count = len(args)
i = index
while i < arg_count and tgt_box.item_count < packable:
@@ -348,7 +384,7 @@
i += 1
continue
count = tgt_box.item_count + src_box.item_count
- new_box = self.clone_vbox_set_count(tgt_box, count)
+ new_box = vectorbox_clone_set(tgt_box, count=count)
op = ResOperation(opnum, [tgt_box, src_box, ConstInt(i),
ConstInt(src_box.item_count)], new_box)
self.preamble_ops.append(op)
@@ -404,9 +440,7 @@
break
i += 1
else:
- expand_opnum = rop.VEC_FLOAT_EXPAND
- if box_type == INT:
- expand_opnum = rop.VEC_INT_EXPAND
+ expand_opnum = getexpandopnum(box_type)
op = ResOperation(expand_opnum, [arg], vbox)
invariant_ops.append(op)
invariant_vars.append(vbox)
@@ -415,9 +449,7 @@
op = ResOperation(rop.VEC_BOX, [ConstInt(len(nodes))], vbox)
invariant_ops.append(op)
- opnum = rop.VEC_FLOAT_PACK
- if arg.type == INT:
- opnum = rop.VEC_INT_PACK
+ opnum = getpackopnum(arg.type)
for i,node in enumerate(nodes):
op = node.getoperation()
arg = op.getarg(argidx)
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -68,7 +68,7 @@
opt.analyse_index_calculations()
if opt.dependency_graph is not None:
self._write_dot_and_convert_to_svg(opt.dependency_graph, "ee" +
self.test_name)
- opt.schedule()
+ opt.schedule(False)
opt.unroll_loop_iterations(loop, unroll_factor)
opt.loop.operations = opt.get_newoperations()
self.debug_print_operations(opt.loop)
@@ -101,7 +101,7 @@
opt.find_adjacent_memory_refs()
opt.extend_packset()
opt.combine_packset()
- opt.schedule()
+ opt.schedule(True)
return opt
def vectorize(self, loop, unroll_factor = -1):
@@ -109,7 +109,7 @@
opt.find_adjacent_memory_refs()
opt.extend_packset()
opt.combine_packset()
- opt.schedule()
+ opt.schedule(True)
gso = GuardStrengthenOpt(opt.dependency_graph.index_vars)
gso.propagate_all_forward(opt.loop)
return opt
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -10,7 +10,8 @@
from rpython.jit.metainterp.optimizeopt.util import make_dispatcher_method,
Renamer
from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph,
MemoryRef, Node, IndexVar)
-from rpython.jit.metainterp.optimizeopt.schedule import VecScheduleData,
Scheduler, Pack, Pair, AccumPair
+from rpython.jit.metainterp.optimizeopt.schedule import (VecScheduleData,
+ Scheduler, Pack, Pair, AccumPair, vectorbox_outof_box, getpackopnum)
from rpython.jit.metainterp.optimizeopt.guard import GuardStrengthenOpt
from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp)
from rpython.rlib.objectmodel import we_are_translated
@@ -83,7 +84,6 @@
self.smallest_type_bytes = 0
self.early_exit_idx = -1
self.sched_data = None
- self.tried_to_pack = False
self.costmodel = X86_CostModel(cost_threshold)
def propagate_all_forward(self, clear=True):
@@ -107,7 +107,7 @@
# find index guards and move to the earliest position
self.analyse_index_calculations()
if self.dependency_graph is not None:
- self.schedule() # reorder the trace
+ self.schedule(False) # reorder the trace
# unroll
self.unroll_count = self.get_unroll_count(vsize)
@@ -122,7 +122,7 @@
self.combine_packset()
if not self.costmodel.profitable(self.packset):
raise NotAProfitableLoop()
- self.schedule()
+ self.schedule(True)
gso = GuardStrengthenOpt(self.dependency_graph.index_vars)
gso.propagate_all_forward(self.loop)
@@ -275,8 +275,6 @@
loop = self.loop
operations = loop.operations
- self.tried_to_pack = True
-
self.packset = PackSet(self.dependency_graph, operations,
self.unroll_count,
self.smallest_type_bytes)
@@ -356,17 +354,21 @@
if len_before == len(self.packset.packs):
break
- def schedule(self):
+ def schedule(self, vector=False):
self.guard_early_exit = -1
self.clear_newoperations()
sched_data =
VecScheduleData(self.metainterp_sd.cpu.vector_register_size)
scheduler = Scheduler(self.dependency_graph, sched_data)
renamer = Renamer()
+ #
+ if vector:
+ self.packset.accumulate_prepare(sched_data, renamer)
+ #
while scheduler.has_more():
position = len(self._newoperations)
ops = scheduler.next(position)
for op in ops:
- if self.tried_to_pack:
+ if vector:
self.unpack_from_vector(op, sched_data, renamer)
self.emit_operation(op)
@@ -534,51 +536,6 @@
self.accum_vars[pack.accum_variable] = pack.accum_variable
self.packs.append(pack)
- def accumulates_pair(self, lnode, rnode, origin_pack):
- # lnode and rnode are isomorphic and dependent
- assert isinstance(origin_pack, Pair)
- lop = lnode.getoperation()
- opnum = lop.getopnum()
-
- if opnum in (rop.FLOAT_ADD, rop.INT_ADD):
- roper = rnode.getoperation()
- assert lop.numargs() == 2 and lop.result is not None
- accum, accum_pos = self.getaccumulator_variable(lop, roper,
origin_pack)
- if not accum:
- return None
- # the dependency exists only because of the result of lnode
- for dep in lnode.provides():
- if dep.to is rnode:
- if not dep.because_of(accum):
- # not quite ... this is not handlable
- return None
- # get the original variable
- accum = lop.getarg(accum_pos)
-
- # in either of the two cases the arguments are mixed,
- # which is not handled currently
- var_pos = (accum_pos + 1) % 2
- plop = origin_pack.left.getoperation()
- if lop.getarg(var_pos) is not plop.result:
- return None
- prop = origin_pack.right.getoperation()
- if roper.getarg(var_pos) is not prop.result:
- return None
-
- # this can be handled by accumulation
- return AccumPair(lnode, rnode, accum, accum_pos)
-
- return None
-
- def getaccumulator_variable(self, lop, rop, origin_pack):
- args = rop.getarglist()
- for i, arg in enumerate(args):
- print arg, "is", lop.result
- if arg is lop.result:
- return arg, i
- #
- return None, -1
-
def can_be_packed(self, lnode, rnode, origin_pack):
if isomorphic(lnode.getoperation(), rnode.getoperation()):
if lnode.independent(rnode):
@@ -645,3 +602,67 @@
del self.packs[last_pos]
return last_pos
+ def accumulates_pair(self, lnode, rnode, origin_pack):
+ # lnode and rnode are isomorphic and dependent
+ assert isinstance(origin_pack, Pair)
+ lop = lnode.getoperation()
+ opnum = lop.getopnum()
+
+ if opnum in (rop.FLOAT_ADD, rop.INT_ADD):
+ roper = rnode.getoperation()
+ assert lop.numargs() == 2 and lop.result is not None
+ accum, accum_pos = self.getaccumulator_variable(lop, roper,
origin_pack)
+ if not accum:
+ return None
+ # the dependency exists only because of the result of lnode
+ for dep in lnode.provides():
+ if dep.to is rnode:
+ if not dep.because_of(accum):
+ # not quite ... this is not handlable
+ return None
+ # get the original variable
+ accum = lop.getarg(accum_pos)
+
+ # in either of the two cases the arguments are mixed,
+ # which is not handled currently
+ var_pos = (accum_pos + 1) % 2
+ plop = origin_pack.left.getoperation()
+ if lop.getarg(var_pos) is not plop.result:
+ return None
+ prop = origin_pack.right.getoperation()
+ if roper.getarg(var_pos) is not prop.result:
+ return None
+
+ # this can be handled by accumulation
+ return AccumPair(lnode, rnode, accum, accum_pos)
+
+ return None
+
+ def getaccumulator_variable(self, lop, rop, origin_pack):
+ args = rop.getarglist()
+ for i, arg in enumerate(args):
+ if arg is lop.result:
+ return arg, i
+ #
+ return None, -1
+
+ def accumulate_prepare(self, sched_data, renamer):
+ for var, pos in self.accum_vars.items():
+ # create a new vector box for the parameters
+ box = vectorbox_outof_box(var)
+ op = ResOperation(rop.VEC_BOX, [ConstInt(0)], box)
+ sched_data.invariant_oplist.append(op)
+ result = box.clonebox()
+ # clear the box to zero
+ op = ResOperation(rop.VEC_INT_XOR, [box, box], result)
+ sched_data.invariant_oplist.append(op)
+ box = result
+ result = box.clonebox()
+ # pack the scalar value
+ op = ResOperation(getpackopnum(box.item_type),
+ [box, var, ConstInt(0), ConstInt(1)], result)
+ sched_data.invariant_oplist.append(op)
+ # rename the variable with the box
+ renamer.start_renaming(var, result)
+
+
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit