[pypy-commit] pypy ppc-vsx-support: do not force alloc variable of trace op the bridge is attached to. use a scratch reg for x86

plan_rich Fri, 23 Sep 2016 01:06:26 -0700

Author: Richard Plangger <[email protected]>
Branch: ppc-vsx-support
Changeset: r87340:d04b9daae851
Date: 2016-09-23 09:34 +0200
http://bitbucket.org/pypy/pypy/changeset/d04b9daae851/


Log:    do not force alloc variable of trace op the bridge is attached to.
        use a scratch reg for x86

diff --git a/rpython/jit/backend/llsupport/regalloc.py 
b/rpython/jit/backend/llsupport/regalloc.py
--- a/rpython/jit/backend/llsupport/regalloc.py
+++ b/rpython/jit/backend/llsupport/regalloc.py
@@ -349,6 +349,8 @@
         assert len(self.temp_boxes) == 0
         if self.longevity:
             for v in self.reg_bindings:
+                if v not in self.longevity:
+                    llop.debug_print(lltype.Void, "variable %s not in 
longevity\n" % v.repr({}))
                 assert self.longevity[v][1] > self.position
 
     def try_allocate_reg(self, v, selected_reg=None, need_lower_byte=False):
diff --git a/rpython/jit/backend/x86/vector_ext.py 
b/rpython/jit/backend/x86/vector_ext.py
--- a/rpython/jit/backend/x86/vector_ext.py
+++ b/rpython/jit/backend/x86/vector_ext.py
@@ -121,9 +121,15 @@
             # the upper elements will be lost if saved to the stack!
             scalar_arg = accum_info.getoriginal()
             assert isinstance(vector_loc, RegLoc)
+            assert scalar_arg is not None
+            orig_scalar_loc = scalar_loc
             if not isinstance(scalar_loc, RegLoc):
-                scalar_loc = regalloc.force_allocate_reg(scalar_arg)
-            assert scalar_arg is not None
+                # scalar loc might live in memory, use scratch register and 
save it back later
+                if scalar_arg.type == FLOAT:
+                    scalar_loc = X86_64_XMM_SCRATCH_REG
+                else:
+                    scalar_loc = X86_64_SCRATCH_REG
+                self.mov(orig_scalar_loc, scalar_loc)
             if accum_info.accum_operation == '+':
                 self._accum_reduce_sum(scalar_arg, vector_loc, scalar_loc)
             elif accum_info.accum_operation == '*':
@@ -131,16 +137,16 @@
             else:
                 not_implemented("accum operator %s not implemented" %
                                             (accum_info.accum_operation)) 
+            if scalar_loc is not orig_scalar_loc:
+                self.mov(scalar_loc, orig_scalar_loc)
             accum_info = accum_info.next()
 
     def _accum_reduce_mul(self, arg, accumloc, targetloc):
         scratchloc = X86_64_XMM_SCRATCH_REG
-        self.mov(accumloc, scratchloc)
+        self.mov(accumloc, targetloc)
         # swap the two elements
-        self.mc.SHUFPD_xxi(scratchloc.value, scratchloc.value, 0x01)
-        self.mc.MULSD(accumloc, scratchloc)
-        if accumloc is not targetloc:
-            self.mov(accumloc, targetloc)
+        self.mc.SHUFPD_xxi(targetloc.value, targetloc.value, 0x01)
+        self.mc.MULSD(targetloc, accumloc)
 
     def _accum_reduce_sum(self, arg, accumloc, targetloc):
         # Currently the accumulator can ONLY be the biggest
diff --git a/rpython/jit/metainterp/optimizeopt/vector.py 
b/rpython/jit/metainterp/optimizeopt/vector.py
--- a/rpython/jit/metainterp/optimizeopt/vector.py
+++ b/rpython/jit/metainterp/optimizeopt/vector.py
@@ -237,7 +237,7 @@
         self.unroll_count = self.get_unroll_count(vsize)
         align_unroll = self.unroll_count==1 and \
                        self.vector_ext.should_align_unroll
-        self.unroll_loop_iterations(info, loop, self.unroll_count,
+        self.unroll_loop_iterations(loop, self.unroll_count,
                                     align_unroll_once=align_unroll)
 
         # vectorize
@@ -262,7 +262,7 @@
 
         return loop.finaloplist(jitcell_token=jitcell_token, 
reset_label_token=False)
 
-    def unroll_loop_iterations(self, info, loop, unroll_count, 
align_unroll_once=False):
+    def unroll_loop_iterations(self, loop, unroll_count, 
align_unroll_once=False):
         """ Unroll the loop `unroll_count` times. There can be an additional 
unroll step
             if alignment might benefit """
         numops = len(loop.operations)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy ppc-vsx-support: do not force alloc variable of trace op the bridge is attached to. use a scratch reg for x86

Reply via email to