https://gcc.gnu.org/bugzilla/show_bug.cgi?id=125880

--- Comment #4 from Richard Biener <rguenth at gcc dot gnu.org> ---
Blueprint for testcases not requiring vectorization:

typedef short v8hi __attribute__((vector_size(16)));

v8hi foo (short *p, short *q, short *r, short *w, int s)
{
  return (v8hi){*p, *q, *r, *w, *(p+s), *(q+s), *(r+s), *(w+s)};
}


currently with -march=znver5 gets you

foo:
.LFB0:
        .cfi_startproc
        movzwl  (%rdi), %eax
        movslq  %r8d, %r8
        vmovd   %eax, %xmm0
        movzwl  (%rdx), %eax
        vpinsrw $1, (%rsi), %xmm0, %xmm0
        vmovd   %eax, %xmm3
        movzwl  (%rdi,%r8,2), %eax
        vpinsrw $1, (%rcx), %xmm3, %xmm3
        vmovd   %eax, %xmm1
        movzwl  (%rdx,%r8,2), %eax
        vpinsrw $1, (%rsi,%r8,2), %xmm1, %xmm1
        vpunpckldq      %xmm3, %xmm0, %xmm0
        vmovd   %eax, %xmm2
        vpinsrw $1, (%rcx,%r8,2), %xmm2, %xmm2
        vpunpckldq      %xmm2, %xmm1, %xmm1
        vpunpcklqdq     %xmm1, %xmm0, %xmm0

Reply via email to