[Bug tree-optimization/94945] Missed optimization: Carry chain not recognized in manually unrolled loop

pinskia at gcc dot gnu.org via Gcc-bugs Thu, 19 Aug 2021 18:22:34 -0700

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94945


Andrew Pinski <pinskia at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
         Resolution|---                         |FIXED
             Status|UNCONFIRMED                 |RESOLVED
           See Also|                            |https://gcc.gnu.org/bugzill
                   |                            |a/show_bug.cgi?id=94913,
                   |                            |https://gcc.gnu.org/bugzill
                   |                            |a/show_bug.cgi?id=97387
   Target Milestone|---                         |11.0

--- Comment #2 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
GCC 11+ can produce:
.L3:
        movl    %eax, %r10d
        addb    $-1, %r9b
        leal    1(%rax), %r9d
        movq    (%rdi,%r10,8), %rdx
        adcq    (%rsi,%r10,8), %rdx
        movq    %rdx, (%r8,%r10,8)
        movq    (%rdi,%r9,8), %rdx
        leal    3(%rax), %r10d
        adcq    (%rsi,%r9,8), %rdx
        movq    %rdx, (%r8,%r9,8)
        leal    2(%rax), %r9d
        movq    (%rdi,%r9,8), %rdx
        adcq    (%rsi,%r9,8), %rdx
        movq    %rdx, (%r8,%r9,8)
        movq    (%rdi,%r10,8), %rdx
        adcq    (%rsi,%r10,8), %rdx
        setc    %r9b
        addl    $4, %eax
        movq    %rdx, (%r8,%r10,8)
        cmpl    %eax, %ecx
        ja      .L3


With a single iteration and -funroll-loops GCC 11 gives this for the inner
loop:

.L12:
        movq    (%r8,%r10), %r11
        addb    $-1, %dl
        adcq    (%rdi,%r10), %r11
        movq    %r11, (%rsi,%r10)
        movq    8(%r8,%r10), %r9
        adcq    8(%rdi,%r10), %r9
        movq    %r9, 8(%rsi,%r10)
        movq    16(%r8,%r10), %rax
        adcq    16(%rdi,%r10), %rax
        movq    %rax, 16(%rsi,%r10)
        movq    24(%r8,%r10), %rdx
        adcq    24(%rdi,%r10), %rdx
        movq    %rdx, 24(%rsi,%r10)
        movq    32(%r8,%r10), %r11
        adcq    32(%rdi,%r10), %r11
        movq    %r11, 32(%rsi,%r10)
        movq    40(%r8,%r10), %r9
        adcq    40(%rdi,%r10), %r9
        movq    %r9, 40(%rsi,%r10)
        movq    48(%r8,%r10), %rax
        adcq    48(%rdi,%r10), %rax
        movq    %rax, 48(%rsi,%r10)
        movq    56(%r8,%r10), %r11
        adcq    56(%rdi,%r10), %r11
        movq    %r11, 56(%rsi,%r10)
        setc    %dl
        addq    $64, %r10
        cmpq    %rcx, %r10
        jne     .L12

So Fixed in GCC 11.

Note was most likely fixed by r11-145 (which was pushed around the time you
filed the bug) and r11-3882 (later on last year).

[Bug tree-optimization/94945] Missed optimization: Carry chain not recognized in manually unrolled loop

Reply via email to