https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101097

            Bug ID: 101097
           Summary: Vectorizer is too eager to use vec_unpack
           Product: gcc
           Version: 12.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: ubizjak at gmail dot com
  Target Milestone: ---

Following two testcases:

void
foo (unsigned short* p1, unsigned short* p2, int* __restrict p3)
{
    for (int i = 0 ; i != 8; i++)
     p3[i] = p1[i] + p2[i];
     return;
}

void
bar (unsigned short* p1, unsigned short* p2, int* __restrict p3)
{
    for (int i = 0 ; i != 4; i++)
     p3[i] = p1[i] + p2[i];
     return;
}

compile with -O3 -mavx2 to:

foo:
        vmovdqu (%rdi), %xmm1
        vmovdqu (%rsi), %xmm0
        vpmovzxwd       %xmm1, %xmm3
        vpsrldq $8, %xmm1, %xmm1
        vpmovzxwd       %xmm0, %xmm2
        vpsrldq $8, %xmm0, %xmm0
        vpmovzxwd       %xmm1, %xmm1
        vpaddd  %xmm3, %xmm2, %xmm2
        vpmovzxwd       %xmm0, %xmm0
        vmovdqu %xmm2, (%rdx)
        vpaddd  %xmm1, %xmm0, %xmm0
        vmovdqu %xmm0, 16(%rdx)
        ret

bar:
        vpmovzxwd       (%rsi), %xmm1
        vpmovzxwd       (%rdi), %xmm0
        vpaddd  %xmm1, %xmm0, %xmm0
        vmovdqu %xmm0, (%rdx)
        ret

However, with "foo" the vec_unpack* named patterns somehow interfere with the
compilation, preventing the compiler to generate code, similar to "bar", but
with %ymm registers.

Disabling vec_unpacku_hi_<mode> and vec_unpacku_lo_<mode> patterns in sse.md
results in the optimal code for foo:

foo:
        vpmovzxwd       (%rsi), %ymm0
        vpmovzxwd       (%rdi), %ymm1
        vpaddd  %ymm1, %ymm0, %ymm0
        vmovdqu %ymm0, (%rdx)
        vzeroupper
        ret

Reply via email to