https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94274

            Bug ID: 94274
           Summary: fold phi whose incoming args are defined from binary
                    operations
           Product: gcc
           Version: 10.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: z.zhanghaijian at huawei dot com
  Target Milestone: ---

For if/else structure, 
Example 1:

int test(int cond, int a, int b, int c)
{
  int result = 0;

  if (cond)
    result = a + b;
  else
    result = a + c;
  return result;
}

The expressions is binary operation and have a common subexpression "a", and
the opcode is the same.

E.g. on aarch64, gcc will do the binary operation first, and then do csel:

cmp     w0, 0
add     w0, w1, w2
add     w1, w1, w3
csel    w0, w1, w0, eq

In fact, it can be optimized to do csel first and then do binary operations:

cmp     w0, 0
csel    w2, w2, w3, ne
add     w0, w2, w1

This can eliminate one instruction. This scenario is very common, and the
switch/case structure is the same.

Example 2:

int test(int cond, int a, int b, int c, int d)
{
  int result = 0;

  switch (cond) {
    case 1:
      result = a + b;
      break;
    case 8:
      result = a + c;
      break;
    default:
      result = a + d;
      break;
  }
  return result;
}

gcc will do the binary operation first, and then do csel :

        mov     w5, w0
        add     w0, w1, w2
        cmp     w5, 1
        beq     .L1
        add     w4, w1, w4
        cmp     w5, 8
        add     w1, w1, w3
        csel    w0, w1, w4, eq
.L1:
        ret

Which can further optimized into :

        cmp     w0, 1
        beq     .L3
        cmp     w0, 8
        csel    w4, w4, w3, ne
        add     w0, w1, w4
        ret
.L3:
        mov     w4, w2
        add     w0, w1, w4
        ret

My proposal: fold the merging phi node in tree_ssa_phiopt_worker (ssa-phiopt) :

For example 1:

replaces

bb0:
  if (cond) goto bb1; else goto bb2;
bb1:
  x1 = a + b;
  goto <bb3>
bb2:
  x2 = a + c;
bb3:
  x = PHI <x1 (bb1), x2 (bb2), ...>;

with

bb0:
  if (cond) goto bb1; else goto bb2;
bb1:
bb2:
bb3:
  x3 = PHI <b (bb1), c (bb2), ...>;
  x = a + x3;


For example 2:

replaces

bb0:
  if (cond == 1) goto bb2; else goto bb1;
bb1:
  if (cond == 8) goto bb3; else goto bb4;
bb2:
  x2 = a + b;
  goto <bb5>
bb3:
  x3 = a + c;
  goto <bb5>
bb4:
  x4 = a + d;
bb5:
  x5 = PHI <x2 (bb2), x3 (bb3), x4 (bb4), ...>;

with

bb0:
  if (cond == 1) goto bb2; else goto bb1;
bb1:
  if (cond == 8) goto bb3; else goto bb4;
bb2:
bb3:
bb4:
bb5:
  x5 = PHI <b (bb2), c (bb3), c (bb4), ...>;
  x = a + x5;

I have an initial implementation that is under testing. In part, it based on
the LLVM InstCombinePass(InstCombinePHI.cpp).

Any suggestions?

Reply via email to