Author: Richard Plangger <[email protected]>
Branch: vecopt-merge
Changeset: r79673:32891e533aab
Date: 2015-09-17 18:50 +0200
http://bitbucket.org/pypy/pypy/changeset/32891e533aab/
Log: reanimated vecopt integration tests that use the assembler backend.
not quite sure yet how to come along supports_gc_type
diff --git a/rpython/jit/backend/llsupport/llmodel.py
b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -52,6 +52,10 @@
else:
translator = None
self.gc_ll_descr = get_ll_description(gcdescr, translator, rtyper)
+ # support_guard_gc_type indicates if a gc type of an object can be
read.
+ # In some states (boehm or x86 untranslated) the type is not known
just yet,
+ # because there are cases where it is not guarded. The precise place
where it's not
+ # is while inlining short preamble.
self.supports_guard_gc_type = self.gc_ll_descr.supports_guard_gc_type
if translator and translator.config.translation.gcremovetypeptr:
self.vtable_offset = None
diff --git a/rpython/jit/backend/x86/assembler.py
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -648,7 +648,7 @@
startpos = self.mc.get_relative_pos()
self.store_info_on_descr(startpos, tok)
else:
- regalloc.position = tok.position
+ # TODO regalloc.position = tok.position
tok.pos_recovery_stub = self.generate_quick_failure(tok,
regalloc)
if WORD == 8 and len(self.pending_memoryerror_trampoline_from) > 0:
self.error_trampoline_64 = self.generate_propagate_error_64()
@@ -1654,27 +1654,27 @@
self.mc.PUNPCKLDQ_xx(resloc.value, loc1.value)
def genop_guard_guard_true(self, guard_op, guard_token, locs, resloc):
- loc = locs[0]
- if isinstance(loc, RegLoc):
- if loc.is_xmm:
- self._guard_vector_true(guard_op, loc)
- # XXX
- self.implement_guard(guard_token, 'NZ')
- return
- self.mc.TEST(loc, loc)
+ #loc = locs[0]
+ #if isinstance(loc, RegLoc):
+ # if loc.is_xmm:
+ # self._guard_vector_true(guard_op, loc)
+ # # XXX
+ # self.implement_guard(guard_token, 'NZ')
+ # return
+ #self.mc.TEST(loc, loc)
self.implement_guard(guard_token)
genop_guard_guard_nonnull = genop_guard_guard_true
def genop_guard_guard_false(self, guard_op, guard_token, locs, resloc):
self.guard_success_cc = rx86.invert_condition(self.guard_success_cc)
- loc = locs[0]
- if isinstance(loc, RegLoc):
- if loc.is_xmm:
- self._guard_vector_false(guard_op, loc)
- # XXX
- self.implement_guard(guard_token, 'NZ')
- return
- self.mc.TEST(loc, loc)
+ # TODO loc = locs[0]
+ #if isinstance(loc, RegLoc):
+ # if loc.is_xmm:
+ # self._guard_vector_false(guard_op, loc)
+ # # XXX
+ # self.implement_guard(guard_token, 'NZ')
+ # return
+ #self.mc.TEST(loc, loc)
self.implement_guard(guard_token)
genop_guard_guard_isnull = genop_guard_guard_false
@@ -1884,7 +1884,7 @@
self.mc.JMP(imm(self.propagate_exception_path))
return startpos
- def generate_quick_failure(self, guardtok):
+ def generate_quick_failure(self, guardtok, regalloc):
""" Gather information about failure
"""
self.mc.force_frame_size(DEFAULT_FRAME_BYTES)
diff --git a/rpython/jit/backend/x86/regalloc.py
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -323,8 +323,7 @@
if arg is None:
faillocs.append(None)
continue
- accum = arg.getaccum()
- if accum:
+ if arg.is_vector() and arg.getaccum():
# for an accumulator store the position of the original
# box and in llsupport/assembler save restore information
# on the descriptor
diff --git a/rpython/jit/backend/x86/test/test_x86vector.py
b/rpython/jit/backend/x86/test/test_x86vector.py
--- a/rpython/jit/backend/x86/test/test_x86vector.py
+++ b/rpython/jit/backend/x86/test/test_x86vector.py
@@ -11,7 +11,7 @@
from rpython.rtyper.lltypesystem import lltype
-class TestBasic(test_vector.VectorizeLLtypeTests, test_basic.Jit386Mixin):
+class TestBasic(test_basic.Jit386Mixin, test_vector.VectorizeTests):
# for the individual tests see
# ====> ../../../metainterp/test/test_basic.py
enable_opts =
'intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll'
diff --git a/rpython/jit/metainterp/optimizeopt/__init__.py
b/rpython/jit/metainterp/optimizeopt/__init__.py
--- a/rpython/jit/metainterp/optimizeopt/__init__.py
+++ b/rpython/jit/metainterp/optimizeopt/__init__.py
@@ -32,9 +32,9 @@
def build_opt_chain(metainterp_sd, enable_opts):
optimizations = []
unroll = 'unroll' in enable_opts # 'enable_opts' is normally a dict
- if (metainterp_sd.cpu is not None and
- not metainterp_sd.cpu.supports_guard_gc_type):
- unroll = False
+ #if (metainterp_sd.cpu is not None and
+ # not metainterp_sd.cpu.supports_guard_gc_type):
+ # unroll = False
for name, opt in unroll_all_opts:
if name in enable_opts:
if opt is not None:
diff --git a/rpython/jit/metainterp/optimizeopt/dependency.py
b/rpython/jit/metainterp/optimizeopt/dependency.py
--- a/rpython/jit/metainterp/optimizeopt/dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/dependency.py
@@ -132,6 +132,9 @@
self.guard_bool_bool_node = None
self._stack = False
+ def is_imaginary(self):
+ return False
+
def getoperation(self):
return self.op
def getindex(self):
@@ -186,8 +189,8 @@
isinstance(descr, compile.CompileLoopVersionDescr)
return False
- def is_guard_early_exit(self):
- return self.op.getopnum() == rop.GUARD_EARLY_EXIT
+ # TODO def is_guard_early_exit(self):
+ # return self.op.getopnum() == rop.GUARD_EARLY_EXIT
def loads_from_complex_object(self):
return rop._ALWAYS_PURE_LAST <= self.op.getopnum() < rop._MALLOC_FIRST
@@ -286,11 +289,14 @@
return True
def iterate_paths(self, to, backwards=False, path_max_len=-1,
blacklist=False):
- """ yield all nodes from self leading to 'to'. backwards determines
- the iteration direction and blacklist marks nodes that have already
been visited.
- blacklist comes in handy if a property must hold for every path. not
*every* possible
- instance must be iterated, but trees that have already been visited
can be ignored
- after the have been visited
+ """ Yield all nodes from self leading to 'to'.
+
+ backwards: Determines the iteration direction.
+ blacklist: Marks nodes that have already been visited.
+ It comes in handy if a property must hold for every
path.
+ Not *every* possible instance must be iterated, but
trees
+ that have already been visited can be ignored after the
+ first visit.
"""
if self is to:
return
@@ -304,6 +310,8 @@
else:
iterdir = node.provides()
if index >= len(iterdir):
+ if to is None and index == 0:
+ yield Path(path.path[:])
if blacklist:
blacklist_visit[node] = None
continue
@@ -322,7 +330,8 @@
continue
pathlen += 1
- if next_node is to or (path_max_len > 0 and pathlen >=
path_max_len):
+ if next_node is to or \
+ (path_max_len > 0 and pathlen >= path_max_len):
yield Path(path.path[:])
# note that the destiantion node ``to'' is never
blacklisted
#if blacklist:
@@ -334,14 +343,14 @@
i = 0
while i < len(self.adjacent_list):
dep = self.adjacent_list[i]
- if dep.to == node:
+ if dep.to is node:
del self.adjacent_list[i]
break
i += 1
i = 0
while i < len(node.adjacent_list_back):
dep = node.adjacent_list_back[i]
- if dep.to == self:
+ if dep.to is self:
del node.adjacent_list_back[i]
break
i += 1
@@ -358,15 +367,30 @@
pack = "p: %d" % self.pack.numops()
return "Node(%s,%s i: %d)" % (self.op, pack, self.opidx)
- def __ne__(self, other):
- return not self.__eq__(other)
+ def getdotlabel(self):
+ """ NOT_RPTYHON """
+ op_str = str(self.op)
+ if self.op.is_guard():
+ args_str = [str(arg) for arg in self.op.getfailargs()]
+ op_str += " " + ','.join(args_str)
+ return "[%d] %s" % (self.opidx, op_str)
- def __eq__(self, other):
- if other is None:
- return False
- assert isinstance(other, Node)
- return self.opidx == other.opidx
+class ImaginaryNode(Node):
+ _index = 987654321 # big enough? :)
+ def __init__(self, label):
+ index = -1
+ if not we_are_translated():
+ self.dotlabel = label
+ index = ImaginaryNode._index
+ ImaginaryNode._index += 1
+ Node.__init__(self, None, index)
+ def is_imaginary(self):
+ return True
+
+ def getdotlabel(self):
+ """ NOT_RPTYHON """
+ return self.dotlabel
class Dependency(object):
def __init__(self, at, to, arg, failarg=False):
@@ -385,6 +409,12 @@
return True
return False
+ def target_node(self):
+ return self.to
+
+ def origin_node(self):
+ return self.at
+
def to_index(self):
return self.to.getindex()
def at_index(self):
@@ -509,7 +539,8 @@
def __init__(self, loop):
self.loop = loop
self.label = Node(loop.label, 0)
- self.nodes = [ Node(op,i+1) for i,op in enumerate(loop.operations) ]
+ self.nodes = [ Node(op,i+1) for i,op in enumerate(loop.operations) if
not op.is_debug() ]
+ self.inodes = [] # imaginary nodes
self.jump = Node(loop.jump, len(self.nodes)+1)
self.invariant_vars = {}
self.update_invariant_vars()
@@ -523,6 +554,11 @@
def getnode(self, i):
return self.nodes[i]
+ def imaginary_node(self, label):
+ node = ImaginaryNode(label)
+ self.inodes.append(node)
+ return node
+
def update_invariant_vars(self):
label_op = self.label.getoperation()
jump_op = self.jump.getoperation()
@@ -559,18 +595,20 @@
node.setpriority(2)
# the label operation defines all operations at the
# beginning of the loop
- if op.getopnum() == rop.LABEL and i != jump_pos:
- node.setpriority(100)
- label_pos = i
- for arg in op.getarglist():
- tracker.define(arg, node)
- continue # prevent adding edge to the label itself
- elif node.is_guard_early_exit():
- label_node = self.nodes[label_pos]
- label_node.edge_to(node,None,label='L->EE')
- for arg in label_node.getoperation().getarglist():
- tracker.define(arg, node)
- continue
+
+ # TODO if op.getopnum() == rop.LABEL and i != jump_pos:
+ # node.setpriority(100)
+ # label_pos = i
+ # for arg in op.getarglist():
+ # tracker.define(arg, node)
+ # continue # prevent adding edge to the label itself
+ #elif node.is_guard_early_exit():
+ # label_node = self.nodes[label_pos]
+ # label_node.edge_to(node,None,label='L->EE')
+ # for arg in label_node.getoperation().getarglist():
+ # tracker.define(arg, node)
+ # continue
+
intformod.inspect_operation(op,node)
# definition of a new variable
if op.type != 'v':
@@ -774,20 +812,22 @@
graph += "\n"
return graph + " ])"
+ def view(self):
+ """ NOT_RPYTHON """
+ from rpython.translator.tool.graphpage import GraphPage
+ page = GraphPage()
+ page.source = self.as_dot()
+ page.links = []
+ page.display()
+
def as_dot(self):
""" NOT_RPTYHON """
if not we_are_translated():
dot = "digraph dep_graph {\n"
- for node in self.nodes:
- op = node.getoperation()
- if op.getopnum() == rop.DEBUG_MERGE_POINT:
- continue
- op_str = str(op)
- if op.is_guard():
- op_str += " " + ','.join([str(arg) for arg in
op.getfailargs()])
- dot += " n%d [label=\"[%d]: %s\"];\n" %
(node.getindex(),node.getindex(),op_str)
+ for node in self.nodes + self.inodes:
+ dot += " n%d [label=\"%s\"];\n" %
(node.getindex(),node.getdotlabel())
dot += "\n"
- for node in self.nodes:
+ for node in self.nodes + self.inodes:
for dep in node.provides():
label = ''
if getattr(dep, 'label', None):
diff --git a/rpython/jit/metainterp/optimizeopt/schedule.py
b/rpython/jit/metainterp/optimizeopt/schedule.py
--- a/rpython/jit/metainterp/optimizeopt/schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/schedule.py
@@ -18,6 +18,7 @@
self.worklist = []
self.invariant_oplist = []
self.invariant_vector_vars = []
+ self.seen = {}
def post_schedule(self):
loop = self.graph.loop
@@ -32,17 +33,30 @@
loop.prefix_label = loop.label.copy_and_change(opnum, args)
def profitable(self):
- return self.costmodel.profitable()
+ return True
def prepare(self):
- pass
+ for node in self.graph.nodes:
+ if node.depends_count() == 0:
+ self.worklist.insert(0, node)
- def delay(self):
+ def emit(self, node, scheduler):
+ # implement me in subclass. e.g. as in VecScheduleState
+ return False
+
+ def delay(self, node):
return False
def has_more(self):
return len(self.worklist) > 0
+ def ensure_args_unpacked(self, op):
+ pass
+
+ def post_emit(self, op):
+ pass
+
+
class Scheduler(object):
""" Create an instance of this class to (re)schedule a vector trace. """
def __init__(self):
@@ -75,11 +89,6 @@
""" An operation has been emitted, adds new operations to the worklist
whenever their dependency count drops to zero.
Keeps worklist sorted (see priority) """
- op = node.getoperation()
- state.renamer.rename(op)
- if unpack:
- state.ensure_args_unpacked(op)
- node.vector=Trueposition = len(state.oplist)
worklist = state.worklist
for dep in node.provides()[:]: # COPY
to = dep.to
@@ -104,20 +113,28 @@
worklist.insert(0, to)
node.clear_dependencies()
node.emitted = True
+ if not node.is_imaginary():
+ op = node.getoperation()
+ state.renamer.rename(op)
+ if unpack:
+ state.ensure_args_unpacked(op)
+ state.post_emit(node.getoperation())
def walk_and_emit(self, state):
""" Emit all the operations into the oplist parameter.
Initiates the scheduling. """
assert isinstance(state, SchedulerState)
+ import pdb; pdb.set_trace()
while state.has_more():
node = self.next(state)
if node:
if not state.emit(node, self):
if not node.emitted:
- op = node.getoperation()
self.mark_emitted(node, state)
- state.seen[op] = None
- state.oplist.append(op)
+ if not node.is_imaginary():
+ op = node.getoperation()
+ state.seen[op] = None
+ state.oplist.append(op)
continue
# it happens that packs can emit many nodes that have been
@@ -246,6 +263,10 @@
assert isinstance(vecop, GuardResOp)
vecop.setfailargs(op.getfailargs())
vecop.rd_snapshot = op.rd_snapshot
+ if pack.is_accumulating():
+ for i,node in enumerate(pack.operations):
+ op = node.getoperation()
+ state.accumulation[op] = pack
def prepare_arguments(state, pack, args):
@@ -456,7 +477,7 @@
self.packset = packset
for arg in graph.loop.inputargs:
self.inputargs[arg] = None
- self.seen = {}
+ self.accumulation = {}
def expand(self, args, vecop):
index = 0
@@ -496,39 +517,33 @@
return vecop
return None
+ def post_emit(self, op):
+ if op.is_guard():
+ # add accumulation info to the descriptor
+ # TODO for version in self.loop.versions:
+ # # this needs to be done for renamed (accum arguments)
+ # version.renamed_inputargs = [ renamer.rename_map.get(arg,arg)
for arg in version.inputargs ]
+ #self.appendedvar_pos_arg_count =
len(sched_data.invariant_vector_vars)
+ failargs = op.getfailargs()
+ descr = op.getdescr()
+ for i,arg in enumerate(failargs):
+ if arg is None:
+ continue
+ accum = state.accumulation.get(arg, None)
+ if accum:
+ assert isinstance(accum, AccumPack)
+ accum.attach_accum_info(descr.rd_accum_list, i)
+
def post_schedule(self):
loop = self.graph.loop
self.ensure_args_unpacked(loop.jump)
SchedulerState.post_schedule(self)
- # add accumulation info to the descriptor
- # TODO for version in self.loop.versions:
- # # this needs to be done for renamed (accum arguments)
- # version.renamed_inputargs = [ renamer.rename_map.get(arg,arg) for
arg in version.inputargs ]
- #self.appended_arg_count = len(sched_data.invariant_vector_vars)
- ##for guard_node in graph.guards:
- ## op = guard_node.getoperation()
- ## failargs = op.getfailargs()
- ## for i,arg in enumerate(failargs):
- ## if arg is None:
- ## continue
- ## accum = arg.getaccum()
- ## if accum:
- ## pass
- ## #accum.save_to_descr(op.getdescr(),i)
- #self.has_two_labels = len(sched_data.invariant_oplist) > 0
- #self.loop.operations = self.prepend_invariant_operations(sched_data)
-
-
def profitable(self):
return self.costmodel.profitable()
def prepare(self):
SchedulerState.prepare(self)
- for node in self.graph.nodes:
- if node.depends_count() == 0:
- self.worklist.insert(0, node)
-
self.packset.accumulate_prepare(self)
for arg in self.graph.loop.label.getarglist():
self.seen[arg] = None
@@ -640,10 +655,14 @@
* independent
"""
FULL = 0
+ _attrs_ = ('operations', 'accumulator', 'operator', 'position')
+
+ operator = '\x00'
+ position = -1
+ accumulator = None
def __init__(self, ops):
self.operations = ops
- self.accum = None
self.update_pack_of_nodes()
def numops(self):
@@ -776,13 +795,12 @@
rightmost = self.operations[-1]
leftmost = other.operations[0]
# if it is not accumulating it is valid
- accum = True
if self.is_accumulating():
if not other.is_accumulating():
- accum = False
- elif self.accum.pos != other.accum.pos:
- accum = False
- return rightmost is leftmost and accum
+ return False
+ elif self.position != other.position:
+ return False
+ return rightmost is leftmost
def argument_vectors(self, state, pack, index, pack_args_index):
vectors = []
@@ -800,12 +818,10 @@
return "Pack(%dx %s)" % (self.numops(), self.operations)
def is_accumulating(self):
- return self.accum is not None
+ return False
def clone(self, oplist):
- cloned = Pack(oplist)
- cloned.accum = self.accum
- return cloned
+ return Pack(oplist)
class Pair(Pack):
""" A special Pack object with only two statements. """
@@ -819,10 +835,37 @@
return self.left is other.left and \
self.right is other.right
-class AccumPair(Pair):
- """ A pair that keeps track of an accumulation value """
- def __init__(self, left, right, accum):
- assert isinstance(left, Node)
- assert isinstance(right, Node)
- Pair.__init__(self, left, right)
- self.accum = accum
+class AccumPack(Pack):
+ SUPPORTED = { rop.FLOAT_ADD: '+',
+ rop.INT_ADD: '+',
+ rop.FLOAT_MUL: '*',
+ }
+
+ def __init__(self, nodes, operator, accum, position):
+ Pack.__init__(self, [left, right])
+ self.accumulator = accum
+ self.operator = operator
+ self.position = position
+
+ def getdatatype(self):
+ return self.accumulator.datatype
+
+ def getbytesize(self):
+ return self.accumulator.bytesize
+
+ def getseed(self):
+ """ The accumulatoriable holding the seed value """
+ return self.accumulator
+
+ def attach_accum_info(self, descr, position, scalar):
+ descr.rd_accum_list = AccumInfo(descr.rd_accum_list,
+ position, self.operator,
+ self.scalar, None)
+
+ def is_accumulating(self):
+ return True
+
+ def clone(self):
+ return AccumPack(operations, self.operator,
+ self.accumulator, self.position)
+
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_dependency.py
@@ -58,6 +58,32 @@
op.setdescr(ResumeAtLoopHeaderDescr())
return loop
+ def parse_trace(self, source, inc_label_jump=True, pargs=2, iargs=10,
+ fargs=6, additional_args=None, replace_args=None):
+ args = []
+ for prefix, rang in [('p',range(pargs)),
+ ('i',range(iargs)),
+ ('f',range(fargs))]:
+ for i in rang:
+ args.append(prefix + str(i))
+
+ assert additional_args is None or isinstance(additional_args,list)
+ for arg in additional_args or []:
+ args.append(arg)
+ for k,v in (replace_args or {}).items():
+ for i,_ in enumerate(args):
+ if k == args[i]:
+ args[i] = v
+ break
+ indent = " "
+ joinedargs = ','.join(args)
+ fmt = (indent, joinedargs, source, indent, joinedargs)
+ src = "%s[%s]\n%s\n%sjump(%s)" % fmt
+ loop = self.parse_loop(src)
+ loop.graph = FakeDependencyGraph(loop)
+ return loop
+
+
def assert_edges(self, graph, edge_list, exceptions):
""" Check if all dependencies are met. for complex cases
adding None instead of a list of integers skips the test.
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_guard.py
b/rpython/jit/metainterp/optimizeopt/test/test_guard.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_guard.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_guard.py
@@ -2,17 +2,17 @@
from rpython.jit.metainterp import compile
from rpython.jit.metainterp.history import (TargetToken, JitCellToken,
- TreeLoop, Box, Const)
+ TreeLoop, Const)
from rpython.jit.metainterp.optimizeopt.util import equaloplists
-from rpython.jit.metainterp.optimizeopt.vectorize import (VecScheduleData,
- Pack, NotAProfitableLoop, VectorizingOptimizer)
+from rpython.jit.metainterp.optimizeopt.vector import (Pack,
+ NotAProfitableLoop, VectorizingOptimizer)
from rpython.jit.metainterp.optimizeopt.dependency import (Node,
DependencyGraph, IndexVar)
from rpython.jit.metainterp.optimizeopt.guard import (GuardStrengthenOpt,
Guard)
from rpython.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin
from rpython.jit.metainterp.optimizeopt.test.test_schedule import
SchedulerBaseTest
-from rpython.jit.metainterp.optimizeopt.test.test_vectorize import
(FakeMetaInterpStaticData,
+from rpython.jit.metainterp.optimizeopt.test.test_vecopt import
(FakeMetaInterpStaticData,
FakeJitDriverStaticData)
from rpython.jit.metainterp.resoperation import rop, ResOperation
from rpython.jit.tool.oparser_model import get_model
@@ -57,7 +57,7 @@
return self.opnum
def box(value):
- return Box._new(value)
+ return InputArgInt(value)
def const(value):
return Const._new(value)
@@ -80,12 +80,13 @@
class GuardBaseTest(SchedulerBaseTest):
def optguards(self, loop, user_code=False):
- loop.snapshot()
+ #loop.snapshot()
for op in loop.operations:
if op.is_guard():
op.setdescr(compile.CompileLoopVersionDescr())
dep = DependencyGraph(loop)
opt = GuardStrengthenOpt(dep.index_vars, False)
+ xxx
opt.propagate_all_forward(loop, user_code)
return opt
@@ -159,7 +160,7 @@
assert j == len(operations), self.debug_print_operations(loop)
def test_basic(self):
- loop1 = self.parse("""
+ loop1 = self.parse_trace("""
i10 = int_lt(i1, 42)
guard_true(i10) []
i11 = int_add(i1, 1)
@@ -177,7 +178,7 @@
""")
def test_basic_sub(self):
- loop1 = self.parse("""
+ loop1 = self.parse_trace("""
i10 = int_gt(i1, 42)
guard_true(i10) []
i11 = int_sub(i1, 1)
@@ -195,7 +196,7 @@
""")
def test_basic_mul(self):
- loop1 = self.parse("""
+ loop1 = self.parse_trace("""
i10 = int_mul(i1, 4)
i20 = int_lt(i10, 42)
guard_true(i20) []
@@ -310,7 +311,7 @@
assert not g2.implies(g1)
def test_collapse(self):
- loop1 = self.parse("""
+ loop1 = self.parse_trace("""
i10 = int_gt(i1, 42)
guard_true(i10) []
i11 = int_add(i1, 1)
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_schedule.py
@@ -9,8 +9,7 @@
from rpython.jit.metainterp.optimizeopt.dependency import Node, DependencyGraph
from rpython.jit.metainterp.optimizeopt.schedule import Scheduler
from rpython.jit.metainterp.optimizeopt.test.test_util import LLtypeMixin
-from rpython.jit.metainterp.optimizeopt.test.test_dependency import
(DependencyBaseTest,
- FakeDependencyGraph)
+from rpython.jit.metainterp.optimizeopt.test.test_dependency import
(DependencyBaseTest)
from rpython.jit.metainterp.optimizeopt.test.test_vecopt import
(FakeMetaInterpStaticData,
FakeJitDriverStaticData)
from rpython.jit.metainterp.resoperation import rop, ResOperation
@@ -39,31 +38,6 @@
'char': self.chararraydescr,
}
- def parse_trace(self, source, inc_label_jump=True, pargs=2, iargs=10,
- fargs=6, additional_args=None, replace_args=None):
- args = []
- for prefix, rang in [('p',range(pargs)),
- ('i',range(iargs)),
- ('f',range(fargs))]:
- for i in rang:
- args.append(prefix + str(i))
-
- assert additional_args is None or isinstance(additional_args,list)
- for arg in additional_args or []:
- args.append(arg)
- for k,v in (replace_args or {}).items():
- for i,_ in enumerate(args):
- if k == args[i]:
- args[i] = v
- break
- indent = " "
- joinedargs = ','.join(args)
- fmt = (indent, joinedargs, source, indent, joinedargs)
- src = "%s[%s]\n%s\n%sjump(%s)" % fmt
- loop = self.parse_loop(src)
- loop.graph = FakeDependencyGraph(loop)
- return loop
-
def pack(self, loop, l, r, input_type=None, output_type=None):
return Pack(loop.graph.nodes[l:r])
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vecopt.py
@@ -14,6 +14,7 @@
from rpython.jit.metainterp.optimizeopt.vector import (VectorizingOptimizer,
MemoryRef,
isomorphic, Pair, NotAVectorizeableLoop, NotAProfitableLoop,
GuardStrengthenOpt,
CostModel, VectorLoop)
+from rpython.jit.metainterp.optimizeopt.schedule import (Scheduler,
SchedulerState)
from rpython.jit.metainterp.optimize import InvalidLoop
from rpython.jit.metainterp import compile
from rpython.jit.metainterp.resoperation import rop, ResOperation
@@ -42,17 +43,24 @@
jitdriver_sd = FakeJitDriverStaticData()
def assert_vectorize(self, loop, expected_loop, call_pure_results=None):
- self._do_optimize_loop(loop, call_pure_results, export_state=True)
+ self._do_optimize_loop(loop)
self.assert_equal(loop, expected_loop)
def vectoroptimizer(self, loop):
metainterp_sd = FakeMetaInterpStaticData(self.cpu)
jitdriver_sd = FakeJitDriverStaticData()
opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, 0)
- label_index = loop.find_first_index(rop.LABEL)
- opt.orig_label_args = loop.operations[label_index].getarglist()[:]
+ opt.orig_label_args = loop.label.getarglist()[:]
return opt
+ def earlyexit(self, loop):
+ opt = self.vectoroptimizer(loop)
+ graph = opt.analyse_index_calculations(loop)
+ graph.view()
+ state = SchedulerState(graph)
+ opt.schedule(state)
+ return graph.loop
+
def vectoroptimizer_unrolled(self, loop, unroll_factor = -1):
loop.snapshot()
opt = self.vectoroptimizer(loop)
@@ -185,6 +193,19 @@
class BaseTestVectorize(VecTestHelper):
+ def test_move_guard_first(self):
+ trace = self.parse_trace("""
+ i10 = int_add(i0, i1)
+ #
+ i11 = int_add(i0, i1)
+ guard_true(i11) []
+ """)
+ add = trace.operations[1]
+ guard = trace.operations[2]
+ trace = self.earlyexit(trace)
+ assert trace.operations[0] is add
+ assert trace.operations[1] is guard
+
def test_vectorize_skip(self):
ops = """
[p0,i0]
@@ -757,7 +778,7 @@
@pytest.mark.parametrize("descr,stride,packs,suffix",
[('char',1,1,'_i'),('float',8,4,'_f'),('int',8,4,'_i'),('float32',4,2,'_i')])
- def test_packset_combine_2_loads_in_trace(self, descr, stride,packs):
+ def test_packset_combine_2_loads_in_trace(self, descr, stride, packs,
suffix):
ops = """
[p0,i0]
i3 = raw_load{suffix}(p0, i0, descr={type}arraydescr)
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py
b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -21,10 +21,10 @@
MemoryRef, Node, IndexVar)
from rpython.jit.metainterp.optimizeopt.version import LoopVersionInfo
from rpython.jit.metainterp.optimizeopt.schedule import (VecScheduleState,
- Scheduler, Pack, Pair, AccumPair)
+ SchedulerState, Scheduler, Pack, Pair, AccumPack)
from rpython.jit.metainterp.optimizeopt.guard import GuardStrengthenOpt
from rpython.jit.metainterp.resoperation import (rop, ResOperation, GuardResOp,
- Accum, OpHelpers, VecOperation)
+ OpHelpers, VecOperation)
from rpython.rlib import listsort
from rpython.rlib.objectmodel import we_are_translated
from rpython.rlib.debug import debug_print, debug_start, debug_stop
@@ -60,7 +60,7 @@
# the original loop (output of optimize_unroll)
info = LoopVersionInfo(loop_info)
version = info.snapshot(loop_ops, info.label_op)
- loop = VectorLoop(loop_info.label_op, loop_ops[:-1], loop_ops[-1])
+ loop = VectorLoop(loop_info.label_op, loop_ops[1:-1], loop_ops[-1])
try:
debug_start("vec-opt-loop")
metainterp_sd.logger_noopt.log_loop([], loop.operation_list(), -2,
None, None, "pre vectorize")
@@ -160,21 +160,23 @@
self.has_two_labels = False
def propagate_all_forward(self, info, loop):
- label = loop.label
- jump = loop.jump
- self.orig_label_args = label.getarglist_copy()
- if jump.getopnum() not in (rop.LABEL, rop.JUMP) or \
- label.getopnum() != rop.LABEL:
- raise NotAVectorizeableLoop()
- if jump.numargs() != label.numargs():
- raise NotAVectorizeableLoop()
-
+ #label = loop.label
+ #jump = loop.jump
+ #if jump.getopnum() not in (rop.LABEL, rop.JUMP) or \
+ # label.getopnum() != rop.LABEL:
+ # import pdb; pdb. set_trace()
+ # raise NotAVectorizeableLoop()
+ #if jump.numargs() != label.numargs():
+ # import pdb; pdb. set_trace()
+ # raise NotAVectorizeableLoop()
+ self.orig_label_args = loop.label.getarglist_copy()
self.linear_find_smallest_type(loop)
byte_count = self.smallest_type_bytes
vsize = self.cpu.vector_register_size
- if vsize == 0 or byte_count == 0 or label.getopnum() != rop.LABEL:
+ if vsize == 0 or byte_count == 0 or loop.label.getopnum() != rop.LABEL:
# stop, there is no chance to vectorize this trace
# we cannot optimize normal traces (if there is no label)
+ import pdb; pdb. set_trace()
raise NotAVectorizeableLoop()
# find index guards and move to the earliest position
@@ -186,7 +188,7 @@
# unroll
self.unroll_count = self.get_unroll_count(vsize)
self.unroll_loop_iterations(loop, self.unroll_count)
- self.loop.operations = self.get_newoperations();
+ loop.operations = self.get_newoperations()
self.clear_newoperations();
# vectorize
@@ -207,29 +209,26 @@
def unroll_loop_iterations(self, loop, unroll_count):
""" Unroll the loop X times. unroll_count + 1 = unroll_factor """
- op_count = len(loop.operations)
-
- label_op = loop.operations[0].clone()
- assert label_op.getopnum() == rop.LABEL
- jump_op = loop.operations[op_count-1]
- assert jump_op.getopnum() in (rop.LABEL, rop.JUMP)
+ numops = len(loop.operations)
+ label_op = loop.label
+ jump_op = loop.jump
# use the target token of the label
- target_token = label_op.getdescr()
- if not we_are_translated():
- target_token.assumed_classes = {}
- if jump_op.getopnum() == rop.LABEL:
- jump_op = ResOperation(rop.JUMP, jump_op.getarglist(),
target_token)
- else:
- jump_op = jump_op.clone()
- jump_op.setdescr(target_token)
- assert jump_op.is_final()
+ #target_token = label_op.getdescr()
+ #if not we_are_translated():
+ # target_token.assumed_classes = {}
+ #if jump_op.getopnum() == rop.LABEL:
+ # jump_op = ResOperation(rop.JUMP, jump_op.getarglist(),
target_token)
+ #else:
+ # jump_op = jump_op.clone()
+ # jump_op.setdescr(target_token)
+ #assert jump_op.is_final()
self.emit_unrolled_operation(label_op)
renamer = Renamer()
operations = []
- for i in range(1,op_count-1):
- op = loop.operations[i].clone()
+ for i in range(1,numops-1):
+ op = loop.operations[i].copy()
if op.is_guard():
assert isinstance(op, GuardResOp)
failargs = renamer.rename_failargs(op, clone=True)
@@ -258,13 +257,11 @@
for i, op in enumerate(operations):
if op.getopnum() in prohibit_opnums:
continue # do not unroll this operation twice
- copied_op = op.clone()
+ copied_op = op.copy()
if not copied_op.returns_void():
# every result assigns a new box, thus creates an entry
# to the rename map.
- new_assigned_box = copied_op.result.clonebox()
- renamer.start_renaming(copied_op.result, new_assigned_box)
- copied_op.result = new_assigned_box
+ renamer.start_renaming(op, copied_op)
#
args = copied_op.getarglist()
for a, arg in enumerate(args):
@@ -518,14 +515,14 @@
step vectorization would not be possible!
"""
graph = DependencyGraph(loop)
- ee_guard_node = graph.getnode(0)
- if ee_guard_node.getopnum() != rop.GUARD_EARLY_EXIT:
- raise NotAVectorizeableLoop()
- label_node = graph.getnode(0)
+ zero_deps = {}
+ for node in graph.nodes:
+ if node.depends_count() == 0:
+ zero_deps[node] = 0
+ earlyexit = graph.imaginary_node("early exit")
guards = graph.guards
+ one_valid = False
for guard_node in guards:
- if guard_node is ee_guard_node:
- continue
modify_later = []
last_prev_node = None
valid = True
@@ -537,34 +534,35 @@
# 2) non pure operation points to this guard.
# but if this guard only depends on pure operations, it
can be checked
# at an earlier position, the non pure op can execute
later!
- modify_later.append((prev_node, guard_node))
+ modify_later.append(prev_node)
else:
- for path in prev_node.iterate_paths(ee_guard_node,
backwards=True, blacklist=True):
- if path.is_always_pure(exclude_first=True,
exclude_last=True):
- path.set_schedule_priority(10)
- if path.last() is ee_guard_node:
- modify_later.append((path.last_but_one(),
None))
- else:
- # transformation is invalid.
- # exit and do not enter else branch!
+ for path in prev_node.iterate_paths(None, backwards=True,
blacklist=True):
+ if not path.is_always_pure(exclude_first=True):
+ path.set_schedule_priority(90)
valid = False
+ if path.last() in zero_deps:
+ del zero_deps[path.last()]
if not valid:
break
if valid:
# transformation is valid, modify the graph and execute
# this guard earlier
- for a,b in modify_later:
- if b is not None:
- a.remove_edge_to(b)
- else:
- last_but_one = a
- if last_but_one is ee_guard_node:
- continue
- ee_guard_node.remove_edge_to(last_but_one)
- #label_node.edge_to(last_but_one, label='pullup')
- # only the last guard needs a connection
- guard_node.edge_to(ee_guard_node, label='pullup-last-guard')
- self.relax_guard_to(guard_node, ee_guard_node)
+ one_valid = True
+ for node in modify_later:
+ node.remove_edge_to(guard_node)
+ # every edge that starts in the guard, the early exit
+ # inherts the edge and guard then provides to early exit
+ for dep in guard_node.provides()[:]:
+ earlyexit.edge_to(dep.target_node())
+ guard_node.remove_edge_to(dep.target_node())
+ guard_node.edge_to(earlyexit)
+
+ for node in zero_deps.keys():
+ earlyexit.edge_to(node)
+ # TODO self.relax_guard_to(guard_node, ee_guard_node)
+ if one_valid:
+ return graph
+ return None
def relax_guard_to(self, guard_node, other_node):
""" Relaxes a guard operation to an earlier guard. """
@@ -686,9 +684,10 @@
"""
if isomorphic(lnode.getoperation(), rnode.getoperation()):
if lnode.independent(rnode):
- if forward and isinstance(origin_pack, AccumPair):
+ if forward and origin_pack.is_accumulating():
# in this case the splitted accumulator must
# be combined. This case is not supported
+ import pdb; pdb. set_trace()
raise NotAVectorizeableLoop()
#
if self.contains_pair(lnode, rnode):
@@ -739,20 +738,15 @@
return False
def combine(self, i, j):
- """ Combine two packs. it is assumed that the attribute self.packs
+ """ Combine two packs. It is assumed that the attribute self.packs
is not iterated when calling this method.
"""
- pack_i = self.packs[i]
- pack_j = self.packs[j]
- operations = pack_i.operations
- for op in pack_j.operations[1:]:
+ pkg_a = self.packs[i]
+ pkg_b = self.packs[j]
+ operations = pkg_a.operations
+ for op in pkg_b.operations[1:]:
operations.append(op)
- pack = Pack(operations)
- self.packs[i] = pack
- # preserve the accum variable (if present)
- pack.accum = pack_i.accum
- pack_i.accum = pack_j.accum = None
-
+ self.packs[i] = pkg_a.clone(operations)
del self.packs[j]
return len(self.packs)
@@ -762,27 +756,27 @@
left = lnode.getoperation()
opnum = left.getopnum()
- if opnum in (rop.FLOAT_ADD, rop.INT_ADD, rop.FLOAT_MUL):
+ if opnum in AccumPack.SUPPORTED:
right = rnode.getoperation()
assert left.numargs() == 2 and not left.returns_void()
- accum_var, accum_pos = self.getaccumulator_variable(left, right,
origin_pack)
- if not accum_var:
+ scalar, index = self.getaccumulator_variable(left, right,
origin_pack)
+ if not scalar:
return None
# the dependency exists only because of the left?
for dep in lnode.provides():
if dep.to is rnode:
- if not dep.because_of(accum_var):
+ if not dep.because_of(scalar):
# not quite ... this is not handlable
return None
# get the original variable
- accum_var = left.getarg(accum_pos)
+ scalar = left.getarg(index)
# in either of the two cases the arguments are mixed,
# which is not handled currently
- var_pos = (accum_pos + 1) % 2
- if left.getarg(var_pos) is not origin_pack.leftmost():
+ other_index = (index + 1) % 2
+ if left.getarg(other_index) is not origin_pack.leftmost():
return None
- if right.getarg(var_pos) is not origin_pack.rightmost():
+ if right.getarg(other_index) is not origin_pack.rightmost():
return None
# this can be handled by accumulation
@@ -797,8 +791,8 @@
# of leading/preceding signext/floatcast instructions needs to
be
# considered. => tree pattern matching problem.
return None
- accum = Accum(opnum, accum_var, accum_pos)
- return AccumPair(lnode, rnode, accum)
+ operator = AccumPack.SUPPORTED[opnum]
+ return AccumPack(lnode, rnode, operator, scalar, index)
return None
diff --git a/rpython/jit/metainterp/resoperation.py
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -256,6 +256,9 @@
# common methods
# --------------
+ def copy(self):
+ return self.copy_and_change(self.opnum)
+
def copy_and_change(self, opnum, args=None, descr=None):
"shallow copy: the returned operation is meant to be used in place of
self"
# XXX specialize
@@ -419,6 +422,9 @@
def is_raw_array_access(self):
return self.is_raw_load() or self.is_raw_store()
+ def is_debug(self):
+ return rop._DEBUG_FIRST <= self.getopnum() <= rop._DEBUG_LAST
+
def is_primitive_array_access(self):
""" Indicates that this operations loads/stores a
primitive type (int,float) """
@@ -626,27 +632,6 @@
from rpython.jit.metainterp import history
return history.ConstPtr(self.getref_base())
-class Accum(object):
- PLUS = '+'
- MULTIPLY = '*'
-
- def __init__(self, opnum, var, pos):
- self.var = var
- self.pos = pos
- self.operator = Accum.PLUS
- if opnum == rop.FLOAT_MUL:
- self.operator = Accum.MULTIPLY
-
- def getdatatype(self):
- return self.var.datatype
-
- def getbytesize(self):
- return self.var.bytesize
-
- def getseed(self):
- """ The variable holding the seed value """
- return self.var
-
class CastOp(object):
_mixin_ = True
@@ -726,9 +711,6 @@
return False
return True
- def getaccum(self):
- return self.accum
-
class AbstractInputArg(AbstractResOpOrInputArg):
def set_forwarded(self, forwarded_to):
self._forwarded = forwarded_to
@@ -1114,6 +1096,13 @@
# must be forced, however we need to execute it anyway
'_NOSIDEEFFECT_LAST', # ----- end of no_side_effect operations -----
+ '_DEBUG_FIRST',
+ 'DEBUG_MERGE_POINT/*/n', # debugging only
+ 'ENTER_PORTAL_FRAME/2/n', # debugging only
+ 'LEAVE_PORTAL_FRAME/1/n', # debugging only
+ 'JIT_DEBUG/*/n', # debugging only
+ '_DEBUG_LAST',
+
'INCREMENT_DEBUG_COUNTER/1/n',
'_RAW_STORE_FIRST',
'SETARRAYITEM_GC/3d/n',
@@ -1135,10 +1124,6 @@
'UNICODESETITEM/3/n',
'COND_CALL_GC_WB/1d/n', # [objptr] (for the write barrier)
'COND_CALL_GC_WB_ARRAY/2d/n', # [objptr, arrayindex] (write barr. for
array)
- 'DEBUG_MERGE_POINT/*/n', # debugging only
- 'ENTER_PORTAL_FRAME/2/n', # debugging only
- 'LEAVE_PORTAL_FRAME/1/n', # debugging only
- 'JIT_DEBUG/*/n', # debugging only
'VIRTUAL_REF_FINISH/2/n', # removed before it's passed to the backend
'COPYSTRCONTENT/5/n', # src, dst, srcstart, dststart, length
'COPYUNICODECONTENT/5/n',
diff --git a/rpython/jit/metainterp/resume.py b/rpython/jit/metainterp/resume.py
--- a/rpython/jit/metainterp/resume.py
+++ b/rpython/jit/metainterp/resume.py
@@ -48,7 +48,8 @@
self.pc = pc
class AccumInfo(object):
- __slots__ = ('prev', 'accum_operation', 'scalar_position', 'scalar_box',
'vector_loc')
+ _attrs_ = ('prev', 'accum_operation', 'scalar_position', 'scalar_box',
'vector_loc')
+
def __init__(self, prev, position, operation, box, loc):
self.prev = prev
self.accum_operation = operation
diff --git a/rpython/jit/metainterp/test/test_vector.py
b/rpython/jit/metainterp/test/test_vector.py
--- a/rpython/jit/metainterp/test/test_vector.py
+++ b/rpython/jit/metainterp/test/test_vector.py
@@ -342,8 +342,5 @@
res = self.meta_interp(f, [size], vec_all=True)
assert res == f(size)
-class VectorizeLLtypeTests(VectorizeTests):
+class TestLLtype(LLJitMixin, VectorizeTests):
pass
-
-class TestLLtype(VectorizeLLtypeTests, LLJitMixin):
- pass
diff --git a/rpython/jit/metainterp/warmspot.py
b/rpython/jit/metainterp/warmspot.py
--- a/rpython/jit/metainterp/warmspot.py
+++ b/rpython/jit/metainterp/warmspot.py
@@ -71,7 +71,7 @@
backendopt=False, trace_limit=sys.maxint, inline=False,
loop_longevity=0, retrace_limit=5, function_threshold=4,
enable_opts=ALL_OPTS_NAMES, max_retrace_guards=15,
- max_unroll_recursion=7, vec=0, vec_all=0, vec_cost=0,
+ max_unroll_recursion=7, vec=1, vec_all=0, vec_cost=0,
vec_length=60, vec_ratio=2, vec_guard_ratio=3, **kwds):
from rpython.config.config import ConfigError
translator = interp.typer.annotator.translator
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit