https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109845

--- Comment #4 from Uroš Bizjak <ubizjak at gmail dot com> ---
The original testcase:

--cut here--
void foo (unsigned x, unsigned y, unsigned z, unsigned *sink)
{
  unsigned s = y + z;
  _Bool ov = s < y;

  if (x || ov)
    return;

  *sink = s;
}
--cut here--

can be rewritten to:

--cut here--
void bar (unsigned x, unsigned y, unsigned z, unsigned *sink)
{
  unsigned s;

  if (__builtin_uadd_overflow (y, z, &s) || x)
    return;

  *sink = s;
}
--cut here--

The tree optimizers correctly detect .ADD_OVERFLOW in the first function:

;; Function foo (foo, funcdef_no=0, decl_uid=2962, cgraph_uid=1,
symbol_order=0)

void foo (unsigned int x, unsigned int y, unsigned int z, unsigned int * sink)
{
  _Bool ov;
  unsigned int s;
  _Bool _1;
  _Bool _2;
  __complex__ unsigned int _12;
  unsigned int _13;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  _12 = .ADD_OVERFLOW (y_4(D), z_5(D));
  s_6 = REALPART_EXPR <_12>;
  _13 = IMAGPART_EXPR <_12>;
  ov_7 = _13 != 0;
  _1 = x_8(D) != 0;
  _2 = _1 | ov_7;
  if (_2 != 0)
    goto <bb 4>; [34.00%]
  else
    goto <bb 3>; [66.00%]

but they failed to optimize:

(ov != 0) | (x != 0)

to the equivalent:

(.IMGPART_EXPR | x) != 0

in the above code.

Function bar results in much better code:

;; Function bar (bar, funcdef_no=1, decl_uid=2843, cgraph_uid=2,
symbol_order=1)

Removing basic block 5
void bar (unsigned int x, unsigned int y, unsigned int z, unsigned int * sink)
{
  unsigned int _1;
  unsigned int _2;
  __complex__ unsigned int _6;
  unsigned int _11;

  <bb 2> [local count: 1073741822]:
  _6 = .ADD_OVERFLOW (y_4(D), z_5(D));
  _2 = IMAGPART_EXPR <_6>;
  _11 = _2 | x_7(D);
  if (_11 != 0)
    goto <bb 4>; [56.44%]
  else
    goto <bb 3>; [43.56%]

And even by using

unsigned ov = s < y;

the foo compiles to:

;; Function foo (foo, funcdef_no=0, decl_uid=2962, cgraph_uid=1,
symbol_order=0)

void foo (unsigned int x, unsigned int y, unsigned int z, unsigned int * sink)
{
  unsigned int ov;
  unsigned int s;
  _Bool _1;
  unsigned int _2;
  __complex__ unsigned int _12;
  unsigned int _13;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  _12 = .ADD_OVERFLOW (y_4(D), z_5(D));
  s_6 = REALPART_EXPR <_12>;
  _13 = IMAGPART_EXPR <_12>;
  _1 = _13 != 0;
  ov_7 = (unsigned int) _1;
  _2 = ov_7 | x_8(D);
  if (_2 != 0)
    goto <bb 4>; [34.00%]
  else
    goto <bb 3>; [66.00%]

which results in optimal assembly:

foo:
        xorl    %eax, %eax
        addl    %edx, %esi
        setc    %al
        orl     %edi, %eax
        jne     .L1
        movl    %esi, (%rcx)
.L1:
        ret

Reply via email to