Author: Richard Plangger <[email protected]>
Branch: vecopt
Changeset: r78728:948a227eab7f
Date: 2015-07-31 16:01 +0200
http://bitbucket.org/pypy/pypy/changeset/948a227eab7f/
Log: that was a nasty problem. entering the vecopt trace through the
preamble only worked for non accum/expanded traces, otherwise the
arguments would not match the loop has now an original label, where
invariant operations follow leading to an label that can carry
expanded values
diff --git a/pypy/module/micronumpy/loop.py b/pypy/module/micronumpy/loop.py
--- a/pypy/module/micronumpy/loop.py
+++ b/pypy/module/micronumpy/loop.py
@@ -85,7 +85,7 @@
call_many_to_one_driver = jit.JitDriver(
name='numpy_call_many_to_one',
greens=['shapelen', 'nin', 'func', 'res_dtype'],
- reds='auto', vectorize=True)
+ reds='auto')
def call_many_to_one(space, shape, func, res_dtype, in_args, out):
# out must hav been built. func needs no calc_type, is usually an
@@ -119,7 +119,7 @@
call_many_to_many_driver = jit.JitDriver(
name='numpy_call_many_to_many',
greens=['shapelen', 'nin', 'nout', 'func', 'res_dtype'],
- reds='auto', vectorize=True)
+ reds='auto')
def call_many_to_many(space, shape, func, res_dtype, in_args, out_args):
# out must hav been built. func needs no calc_type, is usually an
@@ -228,7 +228,7 @@
reduce_cum_driver = jit.JitDriver(
name='numpy_reduce_cum_driver',
greens=['shapelen', 'func', 'dtype', 'out_dtype'],
- reds='auto', vectorize=True)
+ reds='auto')
def compute_reduce_cumulative(space, obj, out, calc_dtype, func, identity):
obj_iter, obj_state = obj.create_iter()
@@ -356,7 +356,7 @@
def _new_argmin_argmax(op_name):
arg_driver = jit.JitDriver(name='numpy_' + op_name,
greens = ['shapelen', 'dtype'],
- reds = 'auto', vectorize=True)
+ reds = 'auto')
def argmin_argmax(arr):
result = 0
@@ -536,7 +536,7 @@
flatiter_getitem_driver = jit.JitDriver(name = 'numpy_flatiter_getitem',
greens = ['dtype'],
- reds = 'auto')
+ reds = 'auto', vectorize=True)
def flatiter_getitem(res, base_iter, base_state, step):
ri, rs = res.create_iter()
@@ -570,7 +570,7 @@
fromstring_driver = jit.JitDriver(name = 'numpy_fromstring',
greens = ['itemsize', 'dtype'],
- reds = 'auto', vectorize=True)
+ reds = 'auto')
def fromstring_loop(space, a, dtype, itemsize, s):
i = 0
@@ -604,7 +604,7 @@
getitem_int_driver = jit.JitDriver(name = 'numpy_getitem_int',
greens = ['shapelen', 'indexlen',
'prefixlen', 'dtype'],
- reds = 'auto', vectorize=True)
+ reds = 'auto')
def getitem_array_int(space, arr, res, iter_shape, indexes_w, prefix_w):
shapelen = len(iter_shape)
@@ -632,7 +632,7 @@
setitem_int_driver = jit.JitDriver(name = 'numpy_setitem_int',
greens = ['shapelen', 'indexlen',
'prefixlen', 'dtype'],
- reds = 'auto', vectorize=True)
+ reds = 'auto')
def setitem_array_int(space, arr, iter_shape, indexes_w, val_arr,
prefix_w):
@@ -762,7 +762,7 @@
diagonal_simple_driver = jit.JitDriver(name='numpy_diagonal_simple_driver',
greens = ['axis1', 'axis2'],
- reds = 'auto', vectorize=True)
+ reds = 'auto')
def diagonal_simple(space, arr, out, offset, axis1, axis2, size):
out_iter, out_state = out.create_iter()
@@ -806,7 +806,7 @@
def _new_binsearch(side, op_name):
binsearch_driver = jit.JitDriver(name='numpy_binsearch_' + side,
greens=['dtype'],
- reds='auto', vectorize=True)
+ reds='auto')
def binsearch(space, arr, key, ret):
assert len(arr.get_shape()) == 1
diff --git a/pypy/module/micronumpy/test/test_zjit.py
b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -466,8 +466,6 @@
def test_cumsum(self):
result = self.run("cumsum")
assert result == 15
- # not vectorizable, has one back edge
- self.check_vectorized(1, 0)
def define_axissum():
return """
@@ -803,7 +801,7 @@
def test_flat_getitem(self):
result = self.run("flat_getitem")
assert result == 10.0
- self.check_vectorized(0,0)
+ self.check_vectorized(1,1)
def define_flat_setitem():
return '''
diff --git a/rpython/jit/metainterp/compile.py
b/rpython/jit/metainterp/compile.py
--- a/rpython/jit/metainterp/compile.py
+++ b/rpython/jit/metainterp/compile.py
@@ -149,23 +149,14 @@
[inliner.inline_op(h_ops[i]) for i in range(start,
len(h_ops))] + \
[ResOperation(rop.JUMP, [inliner.inline_arg(a) for a
in jumpargs],
None, descr=jitcell_token)]
- target_token = part.operations[0].getdescr()
- assert isinstance(target_token, TargetToken)
- all_target_tokens.append(target_token)
- inputargs = jumpargs
- jumpargs = part.operations[-1].getarglist()
-
try:
optimize_trace(metainterp_sd, jitdriver_sd, part, warmstate,
start_state=start_state, export_state=False,
try_disabling_unroll=try_disabling_unroll)
except InvalidLoop:
return None
-
- loop.operations = loop.operations[:-1] + part.operations
- loop.versions = part.versions
- if part.quasi_immutable_deps:
- loop.quasi_immutable_deps.update(part.quasi_immutable_deps)
+ #
+ loop.append_loop(part, all_target_tokens)
assert part.operations[-1].getopnum() != rop.LABEL
if loop.versions is not None:
@@ -197,7 +188,6 @@
metainterp_sd = metainterp.staticdata
cpu = metainterp_sd.cpu
if loop.versions is not None:
- token = jitcell_token
for version in loop.versions:
if len(version.faildescrs) == 0:
continue
diff --git a/rpython/jit/metainterp/history.py
b/rpython/jit/metainterp/history.py
--- a/rpython/jit/metainterp/history.py
+++ b/rpython/jit/metainterp/history.py
@@ -761,7 +761,10 @@
def register_all_guards(self, opt_ops, invariant_arg_count=0):
from rpython.jit.metainterp.compile import CompileLoopVersionDescr
+ pass_by = 0
idx = index_of_first(rop.LABEL, opt_ops)
+ if opt_ops[idx].getdescr() is not opt_ops[-1].getdescr():
+ idx = index_of_first(rop.LABEL, opt_ops, pass_by=1)
assert idx >= 0
version_failargs = opt_ops[idx].getarglist()
if invariant_arg_count > 0:
@@ -799,6 +802,7 @@
op.rd_snapshot = None
def update_token(self, jitcell_token):
+ # this is only invoked for versioned loops!
label = self.operations[self.label_pos]
jump = self.operations[-1]
#
@@ -849,6 +853,29 @@
insns[opname] = insns.get(opname, 0) + 1
return insns
+ def append_loop(self, loop, all_target_tokens):
+ # append e.g. the peeled loop to this loop!
+ label, jump = loop.operations[0], loop.operations[-1]
+ assert label.getopnum() == rop.LABEL
+ assert jump.getopnum() == rop.JUMP
+ target_token = None
+ i = 0
+ # adds all target token until the one is found that jumps from the
+ # last instruction to the label
+ while target_token is not jump.getdescr():
+ # there is another label
+ op = loop.operations[i]
+ if op.getopnum() == rop.LABEL:
+ target_token = op.getdescr()
+ assert isinstance(target_token, TargetToken)
+ all_target_tokens.append(target_token)
+ i += 1
+ #
+ self.operations = self.operations[:-1] + loop.operations
+ self.versions = loop.versions
+ if loop.quasi_immutable_deps:
+ self.quasi_immutable_deps.update(loop.quasi_immutable_deps)
+
def get_operations(self):
return self.operations
diff --git a/rpython/jit/metainterp/optimizeopt/guard.py
b/rpython/jit/metainterp/optimizeopt/guard.py
--- a/rpython/jit/metainterp/optimizeopt/guard.py
+++ b/rpython/jit/metainterp/optimizeopt/guard.py
@@ -119,8 +119,8 @@
descr = myop.getdescr()
descr.copy_all_attributes_from(other.op.getdescr())
myop.rd_frame_info_list = otherop.rd_frame_info_list
+ myop.setfailargs(otherop.getfailargs())
myop.rd_snapshot = otherop.rd_snapshot
- myop.setfailargs(otherop.getfailargs())
def emit_varops(self, opt, var, old_arg):
assert isinstance(var, IndexVar)
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -1,6 +1,6 @@
from rpython.jit.metainterp.history import
(VECTOR,FLOAT,INT,ConstInt,BoxVector,
- BoxFloat,BoxInt,ConstFloat)
+ BoxFloat,BoxInt,ConstFloat,TargetToken)
from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp)
from rpython.jit.metainterp.optimizeopt.dependency import (DependencyGraph,
MemoryRef, Node, IndexVar)
@@ -839,24 +839,32 @@
assert off < vector.getcount()
self.box_to_vbox[box] = (off, vector)
- def prepend_invariant_operations(self, oplist):
+ def prepend_invariant_operations(self, oplist, orig_label_args):
if len(self.invariant_oplist) > 0:
label = oplist[0]
assert label.getopnum() == rop.LABEL
+ #
jump = oplist[-1]
assert jump.getopnum() == rop.JUMP
-
- label_args = label.getarglist()
+ #
+ label_args = label.getarglist()[:]
jump_args = jump.getarglist()
for var in self.invariant_vector_vars:
label_args.append(var)
jump_args.append(var)
-
- oplist[0] = label.copy_and_change(label.getopnum(), label_args,
None, label.getdescr())
- oplist[-1] = jump.copy_and_change(jump.getopnum(), jump_args,
None, jump.getdescr())
-
- return self.invariant_oplist + oplist
-
+ #
+ # in case of any invariant_vector_vars, the label is restored
+ # and the invariant operations are added between the original label
+ # and the new label
+ descr = label.getdescr()
+ assert isinstance(descr, TargetToken)
+ token = TargetToken(descr.targeting_jitcell_token)
+ oplist[0] = label.copy_and_change(label.getopnum(), label_args,
None, token)
+ oplist[-1] = jump.copy_and_change(jump.getopnum(), jump_args,
None, token)
+ #
+ return [ResOperation(rop.LABEL, orig_label_args, None, descr)] + \
+ self.invariant_oplist + oplist
+ #
return oplist
class Pack(object):
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -107,10 +107,12 @@
self.cpu = metainterp_sd.cpu
self.costmodel = X86_CostModel(cost_threshold,
self.cpu.vector_register_size)
self.appended_arg_count = 0
+ self.orig_label_args = None
def propagate_all_forward(self, clear=True):
self.clear_newoperations()
label = self.loop.operations[0]
+ self.orig_label_args = label.getarglist()[:]
jump = self.loop.operations[-1]
if jump.getopnum() not in (rop.LABEL, rop.JUMP) or \
label.getopnum() != rop.LABEL:
@@ -463,7 +465,8 @@
if accum:
accum.save_to_descr(op.getdescr(),i)
self.loop.operations = \
- sched_data.prepend_invariant_operations(self._newoperations)
+ sched_data.prepend_invariant_operations(self._newoperations,
+ self.orig_label_args)
self.clear_newoperations()
def unpack_from_vector(self, op, sched_data, renamer):
@@ -577,7 +580,7 @@
#
tgt_op.setdescr(descr)
tgt_op.rd_snapshot = op.rd_snapshot
- tgt_op.setfailargs(op.getfailargs())
+ tgt_op.setfailargs(op.getfailargs()[:])
class CostModel(object):
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit