https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46391

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |RESOLVED
      Known to work|                            |12.0, 7.5.0
         Resolution|---                         |FIXED
      Known to fail|                            |4.3.5

--- Comment #5 from Richard Biener <rguenth at gcc dot gnu.org> ---
It is.  Slightly altered testcase (to avoid unrolling on GIMPLE), with
-fno-schedule-insn2 on x86_64:

.L2:
        movdqu  (%rdi,%rax), %xmm1
        paddb   %xmm0, %xmm1
        movups  %xmm1, (%rsi,%rax)
        movdqu  16(%rdi,%rax), %xmm2
        paddb   %xmm0, %xmm2
        movups  %xmm2, 16(%rsi,%rax)
        movdqu  32(%rdi,%rax), %xmm3
        paddb   %xmm0, %xmm3
        movups  %xmm3, 32(%rsi,%rax)
        movdqu  48(%rdi,%rax), %xmm4
        paddb   %xmm0, %xmm4
        movups  %xmm4, 48(%rsi,%rax)
        movdqu  64(%rdi,%rax), %xmm5
        paddb   %xmm0, %xmm5
        movups  %xmm5, 64(%rsi,%rax)
        movdqu  80(%rdi,%rax), %xmm6
        paddb   %xmm0, %xmm6
        movups  %xmm6, 80(%rsi,%rax)
        movdqu  96(%rdi,%rax), %xmm7
        paddb   %xmm0, %xmm7
        movups  %xmm7, 96(%rsi,%rax)
        movdqu  112(%rdi,%rax), %xmm8
        paddb   %xmm0, %xmm8
        movups  %xmm8, 112(%rsi,%rax)
        subq    $-128, %rax
        cmpq    $4096, %rax
        jne     .L2

and without:

.L2:
        movdqu  (%rdi,%rax), %xmm1
        movdqu  16(%rdi,%rax), %xmm2
        movdqu  32(%rdi,%rax), %xmm3
        movdqu  48(%rdi,%rax), %xmm4
        movdqu  64(%rdi,%rax), %xmm5
        paddb   %xmm0, %xmm1
        paddb   %xmm0, %xmm2
        movdqu  80(%rdi,%rax), %xmm6
        movdqu  96(%rdi,%rax), %xmm7
        paddb   %xmm0, %xmm3
        paddb   %xmm0, %xmm4
        movups  %xmm1, (%rsi,%rax)
        movdqu  112(%rdi,%rax), %xmm8
        paddb   %xmm0, %xmm5
        paddb   %xmm0, %xmm6
        movups  %xmm2, 16(%rsi,%rax)
        paddb   %xmm0, %xmm7
        movups  %xmm3, 32(%rsi,%rax)
        paddb   %xmm0, %xmm8
        movups  %xmm4, 48(%rsi,%rax)
        movups  %xmm5, 64(%rsi,%rax)
        movups  %xmm6, 80(%rsi,%rax)
        movups  %xmm7, 96(%rsi,%rax)
        movups  %xmm8, 112(%rsi,%rax)
        subq    $-128, %rax
        cmpq    $4096, %rax
        jne     .L2

and that's only possible if this dependence is not visible.  4.3 shows the
problem still, GCC 7 doesn't.

Reply via email to