https://bugs.llvm.org/show_bug.cgi?id=50171

            Bug ID: 50171
           Summary: Missed optimization to remove unnecessary branch from
                    loop entry
           Product: clang
           Version: unspecified
          Hardware: PC
                OS: Windows NT
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: C++
          Assignee: [email protected]
          Reporter: [email protected]
                CC: [email protected], [email protected],
                    [email protected], [email protected],
                    [email protected]

The following toy example:

  void Loop(int len) {
      int i = 0;
      const int kUnrollFactor = 8;
      for (int num_calls = 0; i <= len - kUnrollFactor; ) {
          if (num_calls + kUnrollFactor > 100) {
              extern void Foo(); Foo();
              num_calls = 0;
          }

          for (int j = 0; j < kUnrollFactor; j++, i++, num_calls++) {
              extern void Bar(int); Bar(i);
          }
      }
  }

Compiles to the following x86 assembly code with clang-9:

  Loop(int):
          ... prolog ...
        mov     r14d, edi
        add     r14d, -8
        js      .LBB0_5
        xor     r15d, r15d         <=== num_calls = 0
        xor     ebx, ebx
        cmp     r15d, 93           <=== num_calls still zero here
        jge     .LBB0_3            <=== branch can NEVER be taken
  .LBB0_4:
          ... unrolled loop body with 8 calls to Bar() ...
        add     r15d, 8
        add     ebx, 8
        cmp     ebp, r14d
        jge     .LBB0_5
        cmp     r15d, 93
        jl      .LBB0_4
  .LBB0_3:
        call    Foo()
        xor     r15d, r15d
        jmp     .LBB0_4
  .LBB0_5:
          ... epilogue ...
        ret

Ideally, the compiler should elide the provably redundant cmp+jge pair, leaving
only the xor:

        xor     r15d, r15d
        cmp     r15d, 93
        jge     .LBB0_3


With clang-12, the result is arguably worse, because the extra branching masks
the missed opportunity altogether:

  Loop(int):
          ... prologue ...
        cmp     edi, 8
        jge     .LBB0_1
  .LBB0_5:
          ... epilogue ...
        ret
  .LBB0_1:
          ... loop initialization ...
        xor     r15d, r15d          <=== num_calls = 0
        xor     ebx, ebx
        jmp     .LBB0_2
  .LBB0_4:
          ... unrolled loop body with 8 calls to Bar() ...
        add     r15d, 8
        add     ebx, 8
        cmp     ebp, r14d
        jge     .LBB0_5
  .LBB0_2:
        cmp     r15d, 93            <=== num_calls = 0 the first time
        jl      .LBB0_4             <=== branch never taken the first time
        call    Foo()
        xor     r15d, r15d
        jmp     .LBB0_4

-- 
You are receiving this mail because:
You are on the CC list for the bug.
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to