Author: Richard Plangger <[email protected]>
Branch: ppc-vsx-support
Changeset: r87340:d04b9daae851
Date: 2016-09-23 09:34 +0200
http://bitbucket.org/pypy/pypy/changeset/d04b9daae851/
Log: do not force alloc variable of trace op the bridge is attached to.
use a scratch reg for x86
diff --git a/rpython/jit/backend/llsupport/regalloc.py
b/rpython/jit/backend/llsupport/regalloc.py
--- a/rpython/jit/backend/llsupport/regalloc.py
+++ b/rpython/jit/backend/llsupport/regalloc.py
@@ -349,6 +349,8 @@
assert len(self.temp_boxes) == 0
if self.longevity:
for v in self.reg_bindings:
+ if v not in self.longevity:
+ llop.debug_print(lltype.Void, "variable %s not in
longevity\n" % v.repr({}))
assert self.longevity[v][1] > self.position
def try_allocate_reg(self, v, selected_reg=None, need_lower_byte=False):
diff --git a/rpython/jit/backend/x86/vector_ext.py
b/rpython/jit/backend/x86/vector_ext.py
--- a/rpython/jit/backend/x86/vector_ext.py
+++ b/rpython/jit/backend/x86/vector_ext.py
@@ -121,9 +121,15 @@
# the upper elements will be lost if saved to the stack!
scalar_arg = accum_info.getoriginal()
assert isinstance(vector_loc, RegLoc)
+ assert scalar_arg is not None
+ orig_scalar_loc = scalar_loc
if not isinstance(scalar_loc, RegLoc):
- scalar_loc = regalloc.force_allocate_reg(scalar_arg)
- assert scalar_arg is not None
+ # scalar loc might live in memory, use scratch register and
save it back later
+ if scalar_arg.type == FLOAT:
+ scalar_loc = X86_64_XMM_SCRATCH_REG
+ else:
+ scalar_loc = X86_64_SCRATCH_REG
+ self.mov(orig_scalar_loc, scalar_loc)
if accum_info.accum_operation == '+':
self._accum_reduce_sum(scalar_arg, vector_loc, scalar_loc)
elif accum_info.accum_operation == '*':
@@ -131,16 +137,16 @@
else:
not_implemented("accum operator %s not implemented" %
(accum_info.accum_operation))
+ if scalar_loc is not orig_scalar_loc:
+ self.mov(scalar_loc, orig_scalar_loc)
accum_info = accum_info.next()
def _accum_reduce_mul(self, arg, accumloc, targetloc):
scratchloc = X86_64_XMM_SCRATCH_REG
- self.mov(accumloc, scratchloc)
+ self.mov(accumloc, targetloc)
# swap the two elements
- self.mc.SHUFPD_xxi(scratchloc.value, scratchloc.value, 0x01)
- self.mc.MULSD(accumloc, scratchloc)
- if accumloc is not targetloc:
- self.mov(accumloc, targetloc)
+ self.mc.SHUFPD_xxi(targetloc.value, targetloc.value, 0x01)
+ self.mc.MULSD(targetloc, accumloc)
def _accum_reduce_sum(self, arg, accumloc, targetloc):
# Currently the accumulator can ONLY be the biggest
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py
b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -237,7 +237,7 @@
self.unroll_count = self.get_unroll_count(vsize)
align_unroll = self.unroll_count==1 and \
self.vector_ext.should_align_unroll
- self.unroll_loop_iterations(info, loop, self.unroll_count,
+ self.unroll_loop_iterations(loop, self.unroll_count,
align_unroll_once=align_unroll)
# vectorize
@@ -262,7 +262,7 @@
return loop.finaloplist(jitcell_token=jitcell_token,
reset_label_token=False)
- def unroll_loop_iterations(self, info, loop, unroll_count,
align_unroll_once=False):
+ def unroll_loop_iterations(self, loop, unroll_count,
align_unroll_once=False):
""" Unroll the loop `unroll_count` times. There can be an additional
unroll step
if alignment might benefit """
numops = len(loop.operations)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit