On Tue, May 19, 2026 at 4:52 PM Jakub Jelinek <[email protected]> wrote:
>
> On Tue, May 19, 2026 at 10:30:16AM +0200, Jakub Jelinek wrote:
> > On Tue, May 19, 2026 at 10:51:37AM +0300, Alexander Monakov wrote:
> > > Thanks for looking at the issue, I really appreciate it. The same problem
> > > exists with 64-bit lanes (V2DF/V2SI modes, we fail to utilize vpermilpd).
> >
> > The control in that case is in bits 1 and 65 rather than 0 and 64.
> > So, in order to use vpermilpd for
> > __builtin_shuffle (v2di_or_v2df, v2di);
> > one would need to first shift the mask (or vpaddq with itself).
> > Though, that is still shorter than what we emit right now.
>
> The following seems to work for me.
>
> -       movl    $1, %eax
> -       vmovq   %rax, %xmm2
> -       vpunpcklqdq     %xmm2, %xmm2, %xmm2
> -       vpand   %xmm2, %xmm1, %xmm1
> -       vpsllq  $3, %xmm1, %xmm1
> -       vpshufb .LC1(%rip), %xmm1, %xmm1
> -       vpaddb  .LC2(%rip), %xmm1, %xmm1
> -       vpshufb %xmm1, %xmm0, %xmm0
> +       vpaddq  %xmm1, %xmm1, %xmm1
> +       vpermilpd       %xmm1, %xmm0, %xmm0
>
> for both V2DI and V2DF.
>
> Ok for trunk if it passes full bootstrap/regtest on x86_64-linux and
> i686-linux?

Ok.
>
> 2026-05-19  Jakub Jelinek  <[email protected]>
>
>         PR target/125357
>         * config/i386/i386-expand.cc (ix86_expand_vec_perm): For TARGET_AVX
>         one_operand_shuffle handle also V2DImode and V2DFmode using
>         vpaddq and vpermilpd.
>
>         * gcc.target/i386/avx-pr125357-2.c: New test.
>         * gcc.target/i386/avx2-pr125357-2.c: New test.
>
> --- gcc/config/i386/i386-expand.cc.jj   2026-05-19 10:11:01.102445891 +0200
> +++ gcc/config/i386/i386-expand.cc      2026-05-19 10:37:46.988388686 +0200
> @@ -5824,21 +5824,34 @@ ix86_expand_vec_perm (rtx operands[])
>         }
>      }
>
> -  if (TARGET_AVX
> -      && one_operand_shuffle
> -      && (mode == V4SImode || mode == V4SFmode))
> -    {
> -      if (mode == V4SImode)
> -       {
> -         op0 = gen_lowpart (V4SFmode, op0);
> -         t1 = gen_reg_rtx (V4SFmode);
> -         emit_insn (gen_avx_vpermilvarv4sf3 (t1, op0, mask));
> -         emit_move_insn (target, gen_lowpart (mode, t1));
> -       }
> -      else
> +  if (TARGET_AVX && one_operand_shuffle)
> +    switch (mode)
> +      {
> +      case V4SImode:
> +       op0 = gen_lowpart (V4SFmode, op0);
> +       t1 = gen_reg_rtx (V4SFmode);
> +       emit_insn (gen_avx_vpermilvarv4sf3 (t1, op0, mask));
> +       emit_move_insn (target, gen_lowpart (mode, t1));
> +       return;
> +      case V4SFmode:
>         emit_insn (gen_avx_vpermilvarv4sf3 (target, op0, mask));
> -      return;
> -    }
> +       return;
> +      case V2DImode:
> +       op0 = gen_lowpart (V2DFmode, op0);
> +       t1 = gen_reg_rtx (V2DImode);
> +       t2 = gen_reg_rtx (V2DFmode);
> +       emit_insn (gen_addv2di3 (t1, mask, mask));
> +       emit_insn (gen_avx_vpermilvarv2df3 (t2, op0, t1));
> +       emit_move_insn (target, gen_lowpart (mode, t2));
> +       return;
> +      case V2DFmode:
> +       t1 = gen_reg_rtx (V2DImode);
> +       emit_insn (gen_addv2di3 (t1, mask, mask));
> +       emit_insn (gen_avx_vpermilvarv2df3 (target, op0, t1));
> +       return;
> +      default:
> +       break;
> +      }
>
>    if (TARGET_XOP)
>      {
> --- gcc/testsuite/gcc.target/i386/avx-pr125357-2.c.jj   2026-05-19 
> 10:42:40.697456727 +0200
> +++ gcc/testsuite/gcc.target/i386/avx-pr125357-2.c      2026-05-19 
> 10:44:05.909025855 +0200
> @@ -0,0 +1,20 @@
> +/* PR target/125357 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx -mno-avx2 -mno-xop" } */
> +/* { dg-final { scan-assembler-times "\tvpaddq\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tvpermilpd\t" 2 } } */
> +
> +typedef long long v2di __attribute__((vector_size (16)));
> +typedef double v2df __attribute__((vector_size (16)));
> +
> +v2di
> +foo (v2di x, v2di y)
> +{
> +  return __builtin_shuffle (x, y);
> +}
> +
> +v2df
> +bar (v2df x, v2di y)
> +{
> +  return __builtin_shuffle (x, y);
> +}
> --- gcc/testsuite/gcc.target/i386/avx2-pr125357-2.c.jj  2026-05-19 
> 10:43:22.313757908 +0200
> +++ gcc/testsuite/gcc.target/i386/avx2-pr125357-2.c     2026-05-19 
> 10:43:50.024292593 +0200
> @@ -0,0 +1,20 @@
> +/* PR target/125357 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx2 -mno-avx512f -mno-xop" } */
> +/* { dg-final { scan-assembler-times "\tvpaddq\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tvpermilpd\t" 2 } } */
> +
> +typedef long long v2di __attribute__((vector_size (16)));
> +typedef double v2df __attribute__((vector_size (16)));
> +
> +v2di
> +foo (v2di x, v2di y)
> +{
> +  return __builtin_shuffle (x, y);
> +}
> +
> +v2df
> +bar (v2df x, v2di y)
> +{
> +  return __builtin_shuffle (x, y);
> +}
>
>
>         Jakub
>


-- 
BR,
Hongtao

Reply via email to