Author: Richard Plangger <[email protected]>
Branch: vecopt-merge
Changeset: r79029:fc65e2cfbacc
Date: 2015-08-18 10:41 +0200
http://bitbucket.org/pypy/pypy/changeset/fc65e2cfbacc/
Log: added a new jit parameter vec_params which encode the previously
known vectorize_user, vec_cost and add a maximum trace length and a
ratio in the fast path to skip vector traces. renamed vectorize to
vec, vectorize_user to vec_all
diff --git a/pypy/module/micronumpy/test/test_zjit.py
b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -97,7 +97,7 @@
backendopt=True,
graph_and_interp_only=True,
ProfilerClass=Profiler,
- vectorize=True)
+ vec=True)
self.__class__.interp = interp
self.__class__.graph = graph
diff --git a/rpython/jit/backend/x86/test/test_zrpy_vecopt.py
b/rpython/jit/backend/x86/test/test_zrpy_vecopt.py
--- a/rpython/jit/backend/x86/test/test_zrpy_vecopt.py
+++ b/rpython/jit/backend/x86/test/test_zrpy_vecopt.py
@@ -17,7 +17,7 @@
t.buildrtyper().specialize()
if kwds['jit']:
- apply_jit(t, vectorize=True)
+ apply_jit(t, vec=True)
class TestVecOptX86(object):
def test_translate(self):
diff --git a/rpython/jit/metainterp/optimizeopt/__init__.py
b/rpython/jit/metainterp/optimizeopt/__init__.py
--- a/rpython/jit/metainterp/optimizeopt/__init__.py
+++ b/rpython/jit/metainterp/optimizeopt/__init__.py
@@ -69,8 +69,8 @@
optimizations, unroll = build_opt_chain(metainterp_sd, enable_opts)
if unroll:
if not export_state and \
- ((warmstate.vectorize and jitdriver_sd.vectorize) \
- or warmstate.vectorize_user):
+ ((warmstate.vec and jitdriver_sd.vec) \
+ or warmstate.vec_all):
optimize_vector(metainterp_sd, jitdriver_sd, loop,
optimizations, inline_short_preamble,
start_state, warmstate)
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_util.py
b/rpython/jit/metainterp/optimizeopt/test/test_util.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_util.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_util.py
@@ -333,14 +333,14 @@
storedebug = None
class FakeWarmState(object):
- vectorize = True # default is on
- vectorize_user = False
+ vec = True # default is on
+ vec_all = False
vec_cost = 0
def __init__(self, enable_opts):
self.enable_opts = enable_opts
class FakeJitDriverStaticData(object):
- vectorize = False
+ vec = False
class FakeMetaInterpStaticData(object):
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_vectorize.py
@@ -22,7 +22,7 @@
from rpython.rlib.rarithmetic import LONG_BIT
class FakeJitDriverStaticData(object):
- vectorize=True
+ vec=True
class FakeCostModel(CostModel):
def __init__(self):
diff --git a/rpython/jit/metainterp/optimizeopt/vectorize.py
b/rpython/jit/metainterp/optimizeopt/vectorize.py
--- a/rpython/jit/metainterp/optimizeopt/vectorize.py
+++ b/rpython/jit/metainterp/optimizeopt/vectorize.py
@@ -35,6 +35,9 @@
inline_short_preamble, start_state, warmstate):
optimize_unroll(metainterp_sd, jitdriver_sd, loop, optimizations,
inline_short_preamble, start_state, False)
+ user_code = not jitdriver_sd.vec and warmstate.vec_all
+ if user_code and user_loop_bail_fast_path(loop, warmstate):
+ return
version = loop.snapshot()
try:
debug_start("vec-opt-loop")
@@ -47,7 +50,6 @@
opt = VectorizingOptimizer(metainterp_sd, jitdriver_sd, loop, 0)
opt.propagate_all_forward()
gso = GuardStrengthenOpt(opt.dependency_graph.index_vars)
- user_code = not jitdriver_sd.vectorize and warmstate.vectorize_user
gso.propagate_all_forward(opt.loop, user_code)
# connect all compile loop version fail descriptors to this version
version.register_all_guards(loop.operations, opt.appended_arg_count)
@@ -85,6 +87,36 @@
else:
raise
+def user_loop_bail_fast_path(loop, warmstate):
+ """ in a fast path over the trace loop: try to prevent vecopt
+ of spending time on a loop that will most probably fail """
+
+ resop_count = 0 # the count of operations minus debug_merge_points
+ vector_instr = 0
+ at_least_one_array_access = True
+ for i,op in enumerate(loop.operations):
+ if op.getopnum() == rop.DEBUG_MERGE_POINT:
+ continue
+
+ if op.vector >= 0 and not op.is_guard():
+ vector_instr += 1
+
+ resop_count += 1
+
+ if op.is_primitive_array_access():
+ at_least_one_array_access = True
+
+ if not at_least_one_array_access:
+ return True
+
+ if resop_count > warmstate.vec_length:
+ return True
+
+ if float(vector_instr)/float(resop_count) <= warmstate.vec_ratio:
+ return True
+
+ return False
+
def cmp_pack_lt(a,b):
return a.left.getindex() < b.left.getindex()
packsort = listsort.make_timsort_class(lt=cmp_pack_lt)
diff --git a/rpython/jit/metainterp/pyjitpl.py
b/rpython/jit/metainterp/pyjitpl.py
--- a/rpython/jit/metainterp/pyjitpl.py
+++ b/rpython/jit/metainterp/pyjitpl.py
@@ -1123,7 +1123,7 @@
if self.metainterp.seen_loop_header_for_jdindex < 0:
if not any_operation:
- if jitdriver_sd.vectorize or
jitdriver_sd.warmstate.vectorize_user:
+ if jitdriver_sd.vec or jitdriver_sd.warmstate.vec_all:
self.metainterp.generate_guard(rop.GUARD_EARLY_EXIT)
return
if self.metainterp.portal_call_depth or not
self.metainterp.get_procedure_token(greenboxes, True):
diff --git a/rpython/jit/metainterp/test/support.py
b/rpython/jit/metainterp/test/support.py
--- a/rpython/jit/metainterp/test/support.py
+++ b/rpython/jit/metainterp/test/support.py
@@ -51,7 +51,7 @@
trace_limit = sys.maxint
enable_opts = ALL_OPTS_DICT
- vectorize = True
+ vec = True
if kwds.pop('disable_optimizations', False):
FakeWarmRunnerState.enable_opts = {}
@@ -70,7 +70,7 @@
greenfield_info = None
result_type = result_kind
portal_runner_ptr = "???"
- vectorize = False
+ vec = False
stats = history.Stats()
cpu = CPUClass(rtyper, stats, None, False)
diff --git a/rpython/jit/metainterp/test/test_compile.py
b/rpython/jit/metainterp/test/test_compile.py
--- a/rpython/jit/metainterp/test/test_compile.py
+++ b/rpython/jit/metainterp/test/test_compile.py
@@ -66,7 +66,7 @@
index = 0
warmstate = FakeState()
virtualizable_info = None
- vectorize = False
+ vec = False
def test_compile_loop():
cpu = FakeCPU()
diff --git a/rpython/jit/metainterp/test/test_vectorize.py
b/rpython/jit/metainterp/test/test_vectorize.py
--- a/rpython/jit/metainterp/test/test_vectorize.py
+++ b/rpython/jit/metainterp/test/test_vectorize.py
@@ -24,7 +24,7 @@
policy=policy,
CPUClass=self.CPUClass,
type_system=self.type_system,
- vectorize=1)
+ vec=True)
@py.test.mark.parametrize('i',[3,4,5,6,7,8,9,50])
def test_vectorize_simple_load_arith_store_int_add_index(self,i):
diff --git a/rpython/jit/metainterp/warmspot.py
b/rpython/jit/metainterp/warmspot.py
--- a/rpython/jit/metainterp/warmspot.py
+++ b/rpython/jit/metainterp/warmspot.py
@@ -32,7 +32,7 @@
# Bootstrapping
def apply_jit(translator, backend_name="auto", inline=False,
- vectorize=False, enable_opts=ALL_OPTS_NAMES, **kwds):
+ vec=False, enable_opts=ALL_OPTS_NAMES, **kwds):
if 'CPUClass' not in kwds:
from rpython.jit.backend.detect_cpu import getcpuclass
kwds['CPUClass'] = getcpuclass(backend_name)
@@ -47,7 +47,7 @@
**kwds)
for jd in warmrunnerdesc.jitdrivers_sd:
jd.warmstate.set_param_inlining(inline)
- jd.warmstate.set_param_vectorize(vectorize)
+ jd.warmstate.set_param_vec(vec)
jd.warmstate.set_param_enable_opts(enable_opts)
warmrunnerdesc.finish()
translator.warmrunnerdesc = warmrunnerdesc # for later debugging
@@ -68,12 +68,11 @@
return jittify_and_run(interp, graph, args, backendopt=backendopt, **kwds)
def jittify_and_run(interp, graph, args, repeat=1, graph_and_interp_only=False,
- backendopt=False, trace_limit=sys.maxint,
- inline=False, loop_longevity=0, retrace_limit=5,
- function_threshold=4,
+ backendopt=False, trace_limit=sys.maxint, inline=False,
+ loop_longevity=0, retrace_limit=5, function_threshold=4,
enable_opts=ALL_OPTS_NAMES, max_retrace_guards=15,
- max_unroll_recursion=7, vectorize=0, vectorize_user=0,
- vec_cost=0, **kwds):
+ max_unroll_recursion=7, vec=0, vec_params='0:0:50:0.6',
+ **kwds):
from rpython.config.config import ConfigError
translator = interp.typer.annotator.translator
try:
@@ -96,9 +95,8 @@
jd.warmstate.set_param_max_retrace_guards(max_retrace_guards)
jd.warmstate.set_param_enable_opts(enable_opts)
jd.warmstate.set_param_max_unroll_recursion(max_unroll_recursion)
- jd.warmstate.set_param_vectorize(vectorize)
- jd.warmstate.set_param_vectorize_user(vectorize_user)
- jd.warmstate.set_param_vec_cost(vec_cost)
+ jd.warmstate.set_param_vec(vec)
+ jd.warmstate.set_param_vec_params(vec_params)
warmrunnerdesc.finish()
if graph_and_interp_only:
return interp, graph
@@ -398,7 +396,7 @@
graph.func._dont_inline_ = True
graph.func._jit_unroll_safe_ = True
jd.jitdriver = block.operations[pos].args[1].value
- jd.vectorize = jd.jitdriver.vectorize
+ jd.vec = jd.jitdriver.vec
jd.portal_runner_ptr = "<not set so far>"
jd.result_type = history.getkind(jd.portal_graph.getreturnvar()
.concretetype)[0]
diff --git a/rpython/jit/metainterp/warmstate.py
b/rpython/jit/metainterp/warmstate.py
--- a/rpython/jit/metainterp/warmstate.py
+++ b/rpython/jit/metainterp/warmstate.py
@@ -300,14 +300,21 @@
if self.warmrunnerdesc.memory_manager:
self.warmrunnerdesc.memory_manager.max_unroll_recursion = value
- def set_param_vectorize(self, value):
- self.vectorize = bool(value)
+ def set_param_vec(self, value):
+ self.vec = bool(value)
- def set_param_vectorize_user(self, value):
- self.vectorize_user = bool(value)
-
- def set_param_vec_cost(self, value):
- self.vec_cost = bool(value)
+ def set_param_vec_params(self, value):
+ values = value.split(":")
+ self.vec_all = bool(values[0])
+ self.vec_cost = 0
+ if len(values) > 1:
+ self.vec_cost = int(values[1])
+ self.vec_length = 50
+ if len(values) > 2:
+ self.vec_length = int(values[2])
+ self.vec_ratio = 0.60
+ if len(values) > 3:
+ self.vec_ratio = float(values[3])
def disable_noninlinable_function(self, greenkey):
cell = self.JitCell.ensure_jit_cell_at_key(greenkey)
diff --git a/rpython/rlib/jit.py b/rpython/rlib/jit.py
--- a/rpython/rlib/jit.py
+++ b/rpython/rlib/jit.py
@@ -553,9 +553,14 @@
'enable_opts': 'INTERNAL USE ONLY (MAY NOT WORK OR LEAD TO CRASHES): '
'optimizations to enable, or all = %s' % ENABLE_ALL_OPTS,
'max_unroll_recursion': 'how many levels deep to unroll a recursive
function',
- 'vectorize': 'turn on the vectorization optimization (vecopt). requires
sse4.1',
- 'vectorize_user': 'turn on the vecopt for the python user program.
requires sse4.1',
- 'vec_cost': 'threshold which traces to vectorize.',
+ 'vec': 'turn on the vectorization optimization (vecopt). requires sse4.1',
+ 'vec_params': 'parameters to the optimization separated by colons.
<all>[:<cost>[:<length>[:<ratio>]]]. '
+ 'all = 1: try to vectorize trace loops that occur outside of
the numpy library. '
+ 'cost = 0: threshold for which traces to bail. 0 means the
costs '
+ 'balance the unpacking, if below the vectorizer bails out. '
+ 'length = 50: the amount of instructions allowed in "all"
traces. '
+ 'ratio = 0.60: the number statements that have vector
equivalents divided '
+ 'by the total number of trace instructions.',
}
PARAMETERS = {'threshold': 1039, # just above 1024, prime
@@ -571,9 +576,8 @@
'disable_unrolling': 200,
'enable_opts': 'all',
'max_unroll_recursion': 7,
- 'vectorize': 0,
- 'vectorize_user': 0,
- 'vec_cost': 0,
+ 'vec': 0,
+ 'vec_params': '0:0:50:0.60',
}
unroll_parameters = unrolling_iterable(PARAMETERS.items())
@@ -636,7 +640,7 @@
self.can_never_inline = can_never_inline
self.should_unroll_one_iteration = should_unroll_one_iteration
self.check_untranslated = check_untranslated
- self.vectorize = vectorize
+ self.vec = vectorize
def _freeze_(self):
return True
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit