https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113080

            Bug ID: 113080
           Summary: Missed optimization of loop invariant elimination
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: 652023330028 at smail dot nju.edu.cn
  Target Milestone: ---

Hello, we noticed that maybe there is a missed optimization for loop invariant
elimination.

Loop invariant: a+b.

https://godbolt.org/z/zYb7Ej47a

int a,b,n;
int w;
void fun1(int t){
    for(int i=0;i<100;i++){
        a+=w;
        b-=w;
        t+=a+b;
    }
    n=t;
}

GCC -O3 -fwrapv (Loop part of the code):
<bb 3> [local count: 1063004408]:
  # t_20 = PHI <t_15(3), t_10(D)(2)>
  # ivtmp_1 = PHI <ivtmp_7(3), 100(2)>
  # ivtmp.32_34 = PHI <ivtmp.32_35(3), ivtmp.32_26(2)>
  # ivtmp.33_25 = PHI <_31(3), ivtmp.33_22(2)>
  # DEBUG i => NULL
  # DEBUG t => NULL
  b_lsm.18_24 = (int) ivtmp.33_25;
  a_lsm.17_21 = (int) ivtmp.32_34;
  # DEBUG i => NULL
  # DEBUG t => NULL
  # DEBUG BEGIN_STMT
  # DEBUG BEGIN_STMT
  # DEBUG BEGIN_STMT
  _6 = a_lsm.17_21 + b_lsm.18_24;
  t_15 = _6 + t_20;
  # DEBUG t => t_15
  # DEBUG BEGIN_STMT
  # DEBUG i => NULL
  # DEBUG t => t_15
  # DEBUG BEGIN_STMT
  ivtmp_7 = ivtmp_1 + 4294967295;
  ivtmp.32_35 = _27 + ivtmp.32_34;
  _14 = (unsigned int) w.1_2;
  _31 = ivtmp.33_25 - _14;
  if (ivtmp_7 != 0)
    goto <bb 3>; [98.99%]
  else
    goto <bb 4>; [1.01%]

fun1(int):
        mov     r9d, DWORD PTR a[rip]
        mov     esi, DWORD PTR w[rip]
        mov     eax, 100
        mov     r10d, DWORD PTR b[rip]
        mov     ecx, r9d
        mov     edx, r10d
.L2:
        lea     r8d, [rcx+rdx]
        add     ecx, esi
        sub     edx, esi
        add     edi, r8d
        sub     eax, 1
        jne     .L2
        imul    eax, esi, -99
        add     r9d, esi
        mov     DWORD PTR n[rip], edi
        sub     r9d, eax
        sub     eax, esi
        add     eax, r10d
        mov     DWORD PTR a[rip], r9d
        mov     DWORD PTR b[rip], eax
        ret


Expected code (Clang):
fun1(int):                               # @fun1(int)
        mov     eax, dword ptr [rip + a]
        mov     ecx, dword ptr [rip + b]
        imul    edx, dword ptr [rip + w], 100
        lea     esi, [rcx + rax]
        add     eax, edx
        imul    esi, esi, 100
        sub     ecx, edx
        add     esi, edi
        mov     dword ptr [rip + a], eax
        mov     dword ptr [rip + b], ecx
        mov     dword ptr [rip + n], esi
        ret

The following code is optimized by gcc as expected in terms of loop invariants,
which may help with this issue:
int a,b,n;
int w;
void fun2(int t){
    n=t;
    for(int i=0;i<100;i++){
        a+=w;
        b-=w;
        n+=a+b;
    }
}

Thank you very much for your time and effort! We look forward to hearing from
you.

Reply via email to