https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109845
--- Comment #4 from Uroš Bizjak <ubizjak at gmail dot com> ---
The original testcase:
--cut here--
void foo (unsigned x, unsigned y, unsigned z, unsigned *sink)
{
unsigned s = y + z;
_Bool ov = s < y;
if (x || ov)
return;
*sink = s;
}
--cut here--
can be rewritten to:
--cut here--
void bar (unsigned x, unsigned y, unsigned z, unsigned *sink)
{
unsigned s;
if (__builtin_uadd_overflow (y, z, &s) || x)
return;
*sink = s;
}
--cut here--
The tree optimizers correctly detect .ADD_OVERFLOW in the first function:
;; Function foo (foo, funcdef_no=0, decl_uid=2962, cgraph_uid=1,
symbol_order=0)
void foo (unsigned int x, unsigned int y, unsigned int z, unsigned int * sink)
{
_Bool ov;
unsigned int s;
_Bool _1;
_Bool _2;
__complex__ unsigned int _12;
unsigned int _13;
;; basic block 2, loop depth 0
;; pred: ENTRY
_12 = .ADD_OVERFLOW (y_4(D), z_5(D));
s_6 = REALPART_EXPR <_12>;
_13 = IMAGPART_EXPR <_12>;
ov_7 = _13 != 0;
_1 = x_8(D) != 0;
_2 = _1 | ov_7;
if (_2 != 0)
goto <bb 4>; [34.00%]
else
goto <bb 3>; [66.00%]
but they failed to optimize:
(ov != 0) | (x != 0)
to the equivalent:
(.IMGPART_EXPR | x) != 0
in the above code.
Function bar results in much better code:
;; Function bar (bar, funcdef_no=1, decl_uid=2843, cgraph_uid=2,
symbol_order=1)
Removing basic block 5
void bar (unsigned int x, unsigned int y, unsigned int z, unsigned int * sink)
{
unsigned int _1;
unsigned int _2;
__complex__ unsigned int _6;
unsigned int _11;
<bb 2> [local count: 1073741822]:
_6 = .ADD_OVERFLOW (y_4(D), z_5(D));
_2 = IMAGPART_EXPR <_6>;
_11 = _2 | x_7(D);
if (_11 != 0)
goto <bb 4>; [56.44%]
else
goto <bb 3>; [43.56%]
And even by using
unsigned ov = s < y;
the foo compiles to:
;; Function foo (foo, funcdef_no=0, decl_uid=2962, cgraph_uid=1,
symbol_order=0)
void foo (unsigned int x, unsigned int y, unsigned int z, unsigned int * sink)
{
unsigned int ov;
unsigned int s;
_Bool _1;
unsigned int _2;
__complex__ unsigned int _12;
unsigned int _13;
;; basic block 2, loop depth 0
;; pred: ENTRY
_12 = .ADD_OVERFLOW (y_4(D), z_5(D));
s_6 = REALPART_EXPR <_12>;
_13 = IMAGPART_EXPR <_12>;
_1 = _13 != 0;
ov_7 = (unsigned int) _1;
_2 = ov_7 | x_8(D);
if (_2 != 0)
goto <bb 4>; [34.00%]
else
goto <bb 3>; [66.00%]
which results in optimal assembly:
foo:
xorl %eax, %eax
addl %edx, %esi
setc %al
orl %edi, %eax
jne .L1
movl %esi, (%rcx)
.L1:
ret