Author: Richard Plangger <[email protected]>
Branch: ppc-vsx-support
Changeset: r86956:545565fe89bc
Date: 2016-09-08 14:36 +0200
http://bitbucket.org/pypy/pypy/changeset/545565fe89bc/
Log: operations now can be delayed (if they are pure), operation
arguments can pull them just before the op itself is emitted
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py
b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -1042,29 +1042,35 @@
assert isinstance(other, IndexVar)
return self.constant - other.constant
+ def get_operations(self):
+ var = self.var
+ tolist = []
+ if self.coefficient_mul != 1:
+ args = [var, ConstInt(self.coefficient_mul)]
+ var = ResOperation(rop.INT_MUL, args)
+ tolist.append(var)
+ if self.coefficient_div != 1:
+ assert 0 # should never be the case with handling
+ # of INT_PY_DIV commented out in this file...
+ if self.constant > 0:
+ args = [var, ConstInt(self.constant)]
+ var = ResOperation(rop.INT_ADD, args)
+ tolist.append(var)
+ if self.constant < 0:
+ args = [var, ConstInt(self.constant)]
+ var = ResOperation(rop.INT_SUB, args)
+ tolist.append(var)
+ return tolist
+
def emit_operations(self, opt, result_box=None):
var = self.var
if self.is_identity():
return var
- if self.coefficient_mul != 1:
- args = [var, ConstInt(self.coefficient_mul)]
- var = ResOperation(rop.INT_MUL, args)
- opt.emit_operation(var)
- if self.coefficient_div != 1:
- assert 0 # XXX for now; should never be the case with handling
- # of INT_PY_DIV commented out in this file...
- #args = [var, ConstInt(self.coefficient_div)]
- #var = ResOperation(rop.INT_FLOORDIV, args)
- #opt.emit_operation(var)
- if self.constant > 0:
- args = [var, ConstInt(self.constant)]
- var = ResOperation(rop.INT_ADD, args)
- opt.emit_operation(var)
- if self.constant < 0:
- args = [var, ConstInt(self.constant)]
- var = ResOperation(rop.INT_SUB, args)
- opt.emit_operation(var)
- return var
+ last = None
+ for op in self.get_operations():
+ opt.emit_operation(op)
+ last = op
+ return last
def compare(self, other):
""" Returns if the two are compareable as a first result
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -36,9 +36,48 @@
self.invariant_oplist = []
self.invariant_vector_vars = []
self.seen = {}
+ self.delayed = []
+
+ def resolve_delayed(self, needs_resolving, delayed, op):
+ # recursive solving of all delayed objects
+ if not delayed:
+ return
+ args = op.getarglist()
+ for arg in args:
+ if arg.is_constant() or arg.is_inputarg():
+ continue
+ if arg not in self.seen:
+ needs_resolving[arg] = None
+ indexvars = self.graph.index_vars
+ i = len(delayed)-1
+ while i >= 0:
+ node = delayed[i]
+ op = node.getoperation()
+ if op in needs_resolving:
+ # either it is a normal operation, or we know that there is a
linear combination
+ if op in indexvars:
+ indexvar = indexvars[op]
+ for operation in indexvar.get_operations():
+ self.oplist.append(operation)
+ last = operation
+ self.renamer.start_renaming(op, last)
+ del needs_resolving[op]
+ else:
+ del needs_resolving[op]
+ self.resolve_delayed(needs_resolving, delayed, op)
+ self.oplist.append(op)
+ i -= 1
+
def post_schedule(self):
loop = self.graph.loop
+ #
+ if self.delayed:
+ # some operations can be delayed until the jump instruction,
+ # handle them here
+ self.resolve_delayed({}, self.delayed, loop.jump)
+
+ #
self.renamer.rename(loop.jump)
self.ensure_args_unpacked(loop.jump)
loop.operations = self.oplist
@@ -65,7 +104,7 @@
if node.depends_count() == 0:
self.worklist.insert(0, node)
- def try_emit_or_delay(self, node, scheduler):
+ def try_emit_or_delay(self, node):
# implement me in subclass. e.g. as in VecScheduleState
raise NotImplementedError
@@ -137,42 +176,6 @@
return True
return node.depends_count() != 0
- def mark_emitted(self, node, state, unpack=True):
- """ An operation has been emitted, adds new operations to the worklist
- whenever their dependency count drops to zero.
- Keeps worklist sorted (see priority) """
- worklist = state.worklist
- provides = node.provides()[:]
- for dep in provides: # COPY
- target = dep.to
- node.remove_edge_to(target)
- if not target.emitted and target.depends_count() == 0:
- # sorts them by priority
- i = len(worklist)-1
- while i >= 0:
- cur = worklist[i]
- c = (cur.priority - target.priority)
- if c < 0: # meaning itnode.priority < target.priority:
- worklist.insert(i+1, target)
- break
- elif c == 0:
- # if they have the same priority, sort them
- # using the original position in the trace
- if target.getindex() < cur.getindex():
- worklist.insert(i+1, target)
- break
- i -= 1
- else:
- worklist.insert(0, target)
- node.clear_dependencies()
- node.emitted = True
- if not node.is_imaginary():
- op = node.getoperation()
- state.renamer.rename(op)
- if unpack:
- state.ensure_args_unpacked(op)
- state.post_emit(node)
-
def walk_and_emit(self, state):
""" Emit all the operations into the oplist parameter.
Initiates the scheduling. """
@@ -180,7 +183,7 @@
while state.has_more():
node = self.next(state)
if node:
- state.try_emit_or_delay(node, scheduler)
+ state.try_emit_or_delay(node)
continue
# it happens that packs can emit many nodes that have been
@@ -522,7 +525,7 @@
def post_emit(self, node):
pass
- def pre_emit(self, node):
+ def pre_emit(self, node, pack_first=True):
op = node.getoperation()
if op.is_guard():
# add accumulation info to the descriptor
@@ -546,7 +549,21 @@
delayed = node.delayed
if delayed:
- import pdb; pdb.set_trace()
+ # there are some nodes that have been delayed just for this
operation
+ if pack_first:
+ self.resolve_delayed({}, delayed, op)
+ for node in delayed:
+ if node is not None:
+ provides = node.provides()
+ if len(provides) == 0:
+ # add this node to the final delay list
+ # might be emitted before jumping!
+ self.delayed.append(node)
+ else:
+ for to in node.provides():
+ tnode = to.target_node()
+ self.delegate_delay(tnode, [node])
+ node.delayed = None
def profitable(self):
return self.costmodel.profitable()
@@ -557,36 +574,41 @@
for arg in self.graph.loop.label.getarglist():
self.seen[arg] = None
- def try_emit_or_delay(self, scheduler, state):
+ def try_emit_or_delay(self, node):
# emission might be blocked by other nodes if this node has a pack!
- if self.pack:
+ if node.pack:
assert node.pack.numops() > 1
- for node in node.pack.operations:
- self.pre_emit(node)
- scheduler.mark_emitted(node, self, unpack=False)
+ for i,node in enumerate(node.pack.operations):
+ self.pre_emit(node, i==0)
+ self.mark_emitted(node, unpack=False)
turn_into_vector(self, node.pack)
return
elif not node.emitted:
if not node.is_imaginary() and node.is_pure():
# this operation might never be emitted. only if it is really
needed
- self.delay_emit(scheduler, node)
+ self.delay_emit(node)
return
# emit a now!
- state.pre_emit(node)
- self.mark_emitted(node, state)
+ self.pre_emit(node)
+ self.mark_emitted(node)
if not node.is_imaginary():
op = node.getoperation()
- state.seen[op] = None
- state.oplist.append(op)
+ self.seen[op] = None
+ self.oplist.append(op)
- def delay_emit(self, scheduler, node):
+ def delay_emit(self, node):
""" it has been decided that the operation might be scheduled later """
delayed = node.delayed or []
- delayed.append(self)
+ delayed.append(node)
node.delayed = None
- for to in self.provides():
- self.delegate_delay(to, delayed)
- self.mark_emitted(node, state)
+ provides = node.provides()
+ if len(provides) == 0:
+ self.delayed.append(node)
+ else:
+ for to in node.provides():
+ tnode = to.target_node()
+ self.delegate_delay(tnode, delayed[:])
+ self.mark_emitted(node)
def delegate_delay(self, node, delayed):
""" Chain up delays, this can reduce many more of the operations """
@@ -597,6 +619,42 @@
for d in delayed:
delayedlist.append(d)
+ def mark_emitted(state, node, unpack=True):
+ """ An operation has been emitted, adds new operations to the worklist
+ whenever their dependency count drops to zero.
+ Keeps worklist sorted (see priority) """
+ worklist = state.worklist
+ provides = node.provides()[:]
+ for dep in provides: # COPY
+ target = dep.to
+ node.remove_edge_to(target)
+ if not target.emitted and target.depends_count() == 0:
+ # sorts them by priority
+ i = len(worklist)-1
+ while i >= 0:
+ cur = worklist[i]
+ c = (cur.priority - target.priority)
+ if c < 0: # meaning itnode.priority < target.priority:
+ worklist.insert(i+1, target)
+ break
+ elif c == 0:
+ # if they have the same priority, sort them
+ # using the original position in the trace
+ if target.getindex() < cur.getindex():
+ worklist.insert(i+1, target)
+ break
+ i -= 1
+ else:
+ worklist.insert(0, target)
+ node.clear_dependencies()
+ node.emitted = True
+ if not node.is_imaginary():
+ op = node.getoperation()
+ state.renamer.rename(op)
+ if unpack:
+ state.ensure_args_unpacked(op)
+ state.post_emit(node)
+
def delay(self, node):
if node.pack:
pack = node.pack
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
@@ -1395,11 +1395,10 @@
i2 = int_add(i1,8)
jump(p0,i2)
""")
- self.schedule(trace)
+ self.schedule(trace, unroll_factor=0)
self.ensure_operations([
'v0[2xf64] = vec_load_f(p0, i0, 8, 0, descr=floatarraydescr)',
'i2 = int_add(i0, 16)',
- 'jump(p0,i2)',
], trace)
class TestLLtype(BaseTestVectorize, LLtypeMixin):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit