https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122518
Uroš Bizjak <ubizjak at gmail dot com> changed:
What |Removed |Added
----------------------------------------------------------------------------
Resolution|DUPLICATE |FIXED
--- Comment #4 from Uroš Bizjak <ubizjak at gmail dot com> ---
(In reply to Andrew Pinski from comment #3)
> > Not really, I'm still able to trigger missing optimizations on i386 *and*
> > x86_64.
>
> Right it is a generic issue that happens on more than just x86; happens on
> aarch64, arm and a few others too.
>
> *** This bug has been marked as a duplicate of bug 3507 ***
OK, let me analyse PR3507. The testcase in this PR is solved by the following
patch:
--cut here--
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 587b2bd0c1d..6b6febc8870 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -598,6 +598,20 @@ ix86_canonicalize_comparison (int *code, rtx *op0, rtx
*op1,
}
}
+ /* SUB (a, b) underflows precisely when a < b. Convert
+ (compare (minus (a b)) a) to (compare (a b))
+ to match *sub<mode>_3 pattern. */
+ if (!op0_preserve_value
+ && (*code == GTU || *code == LEU)
+ && GET_CODE (*op0) == MINUS
+ && rtx_equal_p (XEXP (*op0, 0), *op1))
+ {
+ *op1 = XEXP (*op0, 1);
+ *op0 = XEXP (*op0, 0);
+ *code = (int) swap_condition ((enum rtx_code) *code);
+ return;
+ }
+
/* Swap operands of GTU comparison to canonicalize
addcarry/subborrow comparison. */
if (!op0_preserve_value
--cut here--
that allows combine to create:
Trying 10 -> 11:
10: {r102:SI=r100:SI-r109:SI;clobber flags:CC;}
REG_UNUSED flags:CC
REG_DEAD r109:SI
11: flags:CC=cmp(r100:SI,r102:SI)
Successfully matched this instruction:
(parallel [
(set (reg:CC 17 flags)
(compare:CC (reg/v:SI 100 [ a ])
(reg:SI 109 [ b ])))
(set (reg:SI 102 [ _5 ])
(minus:SI (reg/v:SI 100 [ a ])
(reg:SI 109 [ b ])))
])
and results in:
uminsub:
movl %eax, %ecx # 50 [c=4 l=2] *movsi_internal/0
subl %edx, %ecx # 11 [c=4 l=2] *subsi_3/0
jnb .L2 # 12 [c=12 l=2] *jcc
movl %eax, %ecx # 13 [c=4 l=2] *movsi_internal/0
.L2:
movl %ecx, %eax # 19 [c=4 l=2] *movsi_internal/0
ret # 54 [c=0 l=1] simple_return_internal