Author: Armin Rigo <[email protected]>
Branch: 
Changeset: r75851:8bc24ee62997
Date: 2015-02-13 13:48 +0100
http://bitbucket.org/pypy/pypy/changeset/8bc24ee62997/

Log:    Redo parts of the very old swap-of-arguments optimization that was
        disabled in the x86 backend.

diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -934,7 +934,7 @@
             getattr(self.mc, asmop)(arglocs[0])
         return genop_unary
 
-    def _binaryop(asmop, can_swap=False):
+    def _binaryop(asmop):
         def genop_binary(self, op, arglocs, result_loc):
             getattr(self.mc, asmop)(arglocs[0], arglocs[1])
         return genop_binary
@@ -1078,18 +1078,18 @@
 
     genop_int_neg = _unaryop("NEG")
     genop_int_invert = _unaryop("NOT")
-    genop_int_add = _binaryop_or_lea("ADD", True)
-    genop_int_sub = _binaryop_or_lea("SUB", False)
-    genop_int_mul = _binaryop("IMUL", True)
-    genop_int_and = _binaryop("AND", True)
-    genop_int_or  = _binaryop("OR", True)
-    genop_int_xor = _binaryop("XOR", True)
+    genop_int_add = _binaryop_or_lea("ADD", is_add=True)
+    genop_int_sub = _binaryop_or_lea("SUB", is_add=False)
+    genop_int_mul = _binaryop("IMUL")
+    genop_int_and = _binaryop("AND")
+    genop_int_or  = _binaryop("OR")
+    genop_int_xor = _binaryop("XOR")
     genop_int_lshift = _binaryop("SHL")
     genop_int_rshift = _binaryop("SAR")
     genop_uint_rshift = _binaryop("SHR")
-    genop_float_add = _binaryop("ADDSD", True)
+    genop_float_add = _binaryop("ADDSD")
     genop_float_sub = _binaryop('SUBSD')
-    genop_float_mul = _binaryop('MULSD', True)
+    genop_float_mul = _binaryop('MULSD')
     genop_float_truediv = _binaryop('DIVSD')
 
     genop_int_lt = _cmpop("L", "G")
@@ -1273,11 +1273,11 @@
         self.mc.XOR_rr(edx.value, edx.value)
         self.mc.DIV_r(ecx.value)
 
-    genop_llong_add = _binaryop("PADDQ", True)
+    genop_llong_add = _binaryop("PADDQ")
     genop_llong_sub = _binaryop("PSUBQ")
-    genop_llong_and = _binaryop("PAND",  True)
-    genop_llong_or  = _binaryop("POR",   True)
-    genop_llong_xor = _binaryop("PXOR",  True)
+    genop_llong_and = _binaryop("PAND")
+    genop_llong_or  = _binaryop("POR")
+    genop_llong_xor = _binaryop("PXOR")
 
     def genop_llong_to_int(self, op, arglocs, resloc):
         loc = arglocs[0]
diff --git a/rpython/jit/backend/x86/regalloc.py 
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -421,9 +421,19 @@
 
     consider_guard_nonnull_class = consider_guard_class
 
-    def _consider_binop_part(self, op):
+    def _consider_binop_part(self, op, symm=False):
         x = op.getarg(0)
-        argloc = self.loc(op.getarg(1))
+        y = op.getarg(1)
+        argloc = self.loc(y)
+        #
+        # For symmetrical operations, if 'y' is already in a register
+        # and won't be used after the current operation finishes,
+        # then swap the role of 'x' and 'y'
+        if (symm and isinstance(argloc, RegLoc) and
+                self.rm.longevity[y][1] == self.rm.position):
+            x, y = y, x
+            argloc = self.loc(y)
+        #
         args = op.getarglist()
         loc = self.rm.force_result_in_reg(op.result, x, args)
         return loc, argloc
@@ -432,6 +442,10 @@
         loc, argloc = self._consider_binop_part(op)
         self.perform(op, [loc, argloc], loc)
 
+    def _consider_binop_symm(self, op):
+        loc, argloc = self._consider_binop_part(op, symm=True)
+        self.perform(op, [loc, argloc], loc)
+
     def _consider_lea(self, op, loc):
         argloc = self.loc(op.getarg(1))
         resloc = self.force_allocate_reg(op.result)
@@ -444,7 +458,7 @@
             isinstance(y, ConstInt) and rx86.fits_in_32bits(y.value)):
             self._consider_lea(op, loc)
         else:
-            self._consider_binop(op)
+            self._consider_binop_symm(op)
 
     def consider_int_sub(self, op):
         loc = self.loc(op.getarg(0))
@@ -455,18 +469,22 @@
         else:
             self._consider_binop(op)
 
-    consider_int_mul = _consider_binop
-    consider_int_and = _consider_binop
-    consider_int_or  = _consider_binop
-    consider_int_xor = _consider_binop
+    consider_int_mul = _consider_binop_symm
+    consider_int_and = _consider_binop_symm
+    consider_int_or  = _consider_binop_symm
+    consider_int_xor = _consider_binop_symm
 
     def _consider_binop_with_guard(self, op, guard_op):
         loc, argloc = self._consider_binop_part(op)
         self.perform_with_guard(op, guard_op, [loc, argloc], loc)
 
-    consider_int_mul_ovf = _consider_binop_with_guard
+    def _consider_binop_with_guard_symm(self, op, guard_op):
+        loc, argloc = self._consider_binop_part(op, symm=True)
+        self.perform_with_guard(op, guard_op, [loc, argloc], loc)
+
+    consider_int_mul_ovf = _consider_binop_with_guard_symm
     consider_int_sub_ovf = _consider_binop_with_guard
-    consider_int_add_ovf = _consider_binop_with_guard
+    consider_int_add_ovf = _consider_binop_with_guard_symm
 
     def consider_int_neg(self, op):
         res = self.rm.force_result_in_reg(op.result, op.getarg(0))
@@ -551,9 +569,9 @@
         loc0 = self.xrm.force_result_in_reg(op.result, op.getarg(0), args)
         self.perform(op, [loc0, loc1], loc0)
 
-    consider_float_add = _consider_float_op
+    consider_float_add = _consider_float_op      # xxx could be _symm
     consider_float_sub = _consider_float_op
-    consider_float_mul = _consider_float_op
+    consider_float_mul = _consider_float_op      # xxx could be _symm
     consider_float_truediv = _consider_float_op
 
     def _consider_float_cmp(self, op, guard_op):
@@ -632,6 +650,7 @@
         # must force both arguments into xmm registers, because we don't
         # know if they will be suitably aligned.  Exception: if the second
         # argument is a constant, we can ask it to be aligned to 16 bytes.
+        # xxx some of these operations could be '_symm'.
         args = [op.getarg(1), op.getarg(2)]
         loc1 = self.load_xmm_aligned_16_bytes(args[1])
         loc0 = self.xrm.force_result_in_reg(op.result, args[0], args)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to