https://gcc.gnu.org/g:79649a5dcd81bc05c0ba591068c9075de43bd417

commit r15-222-g79649a5dcd81bc05c0ba591068c9075de43bd417
Author: Roger Sayle <ro...@nextmovesoftware.com>
Date:   Tue May 7 07:14:40 2024 +0100

    PR target/106060: Improved SSE vector constant materialization on x86.
    
    This patch resolves PR target/106060 by providing efficient methods for
    materializing/synthesizing special "vector" constants on x86.  Currently
    there are three methods of materializing a vector constant; the most
    general is to load a vector from the constant pool, secondly "duplicated"
    constants can be synthesized by moving an integer between units and
    broadcasting (of shuffling it), and finally the special cases of the
    all-zeros vector and all-ones vectors can be loaded via a single SSE
    instruction.   This patch handle additional cases that can be synthesized
    in two instructions, loading an all-ones vector followed by another SSE
    instruction.  Following my recent patch for PR target/112992, there's
    conveniently a single place in i386-expand.cc where these special cases
    can be handled.
    
    Two examples are given in the original bugzilla PR for 106060.
    
    __m256i should_be_cmpeq_abs ()
    {
      return _mm256_set1_epi8 (1);
    }
    
    is now generated (with -O3 -march=x86-64-v3) as:
    
            vpcmpeqd        %ymm0, %ymm0, %ymm0
            vpabsb  %ymm0, %ymm0
            ret
    
    and
    
    __m256i should_be_cmpeq_add ()
    {
      return _mm256_set1_epi8 (-2);
    }
    
    is now generated as:
    
            vpcmpeqd        %ymm0, %ymm0, %ymm0
            vpaddb  %ymm0, %ymm0, %ymm0
            ret
    
    2024-05-07  Roger Sayle  <ro...@nextmovesoftware.com>
                Hongtao Liu  <hongtao....@intel.com>
    
    gcc/ChangeLog
            PR target/106060
            * config/i386/i386-expand.cc (enum ix86_vec_bcast_alg): New.
            (struct ix86_vec_bcast_map_simode_t): New type for table below.
            (ix86_vec_bcast_map_simode): Table of SImode constants that may
            be efficiently synthesized by a ix86_vec_bcast_alg method.
            (ix86_vec_bcast_map_simode_cmp): New comparator for bsearch.
            (ix86_vector_duplicate_simode_const): Efficiently synthesize
            V4SImode and V8SImode constants that duplicate special constants.
            (ix86_vector_duplicate_value): Attempt to synthesize "special"
            vector constants using ix86_vector_duplicate_simode_const.
            * config/i386/i386.cc (ix86_rtx_costs) <case ABS>: ABS of a
            vector integer mode costs with a single SSE instruction.
    
    gcc/testsuite/ChangeLog
            PR target/106060
            * gcc.target/i386/auto-init-8.c: Update test case.
            * gcc.target/i386/avx512fp16-13.c: Likewise.
            * gcc.target/i386/pr100865-9a.c: Likewise.
            * gcc.target/i386/pr101796-1.c: Likewise.
            * gcc.target/i386/pr106060-1.c: New test case.
            * gcc.target/i386/pr106060-2.c: Likewise.
            * gcc.target/i386/pr106060-3.c: Likewise.
            * gcc.target/i386/pr70314.c: Update test case.
            * gcc.target/i386/vect-shiftv4qi.c: Likewise.
            * gcc.target/i386/vect-shiftv8qi.c: Likewise.

Diff:
---
 gcc/config/i386/i386-expand.cc                 | 364 ++++++++++++++++++++++++-
 gcc/config/i386/i386.cc                        |   2 +
 gcc/testsuite/gcc.target/i386/auto-init-8.c    |   2 +-
 gcc/testsuite/gcc.target/i386/avx512fp16-13.c  |   3 -
 gcc/testsuite/gcc.target/i386/pr100865-9a.c    |   2 +-
 gcc/testsuite/gcc.target/i386/pr101796-1.c     |   6 +-
 gcc/testsuite/gcc.target/i386/pr106060-1.c     |  12 +
 gcc/testsuite/gcc.target/i386/pr106060-2.c     |  13 +
 gcc/testsuite/gcc.target/i386/pr106060-3.c     |  14 +
 gcc/testsuite/gcc.target/i386/pr70314.c        |   2 +-
 gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c |   2 +-
 gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c |   2 +-
 12 files changed, 411 insertions(+), 13 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 8bb8f21e686..a6132911e6a 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -15696,6 +15696,332 @@ s4fma_expand:
   gcc_unreachable ();
 }
 
+/* See below where shifts are handled for explanation of this enum.  */
+enum ix86_vec_bcast_alg
+{
+  VEC_BCAST_PXOR,
+  VEC_BCAST_PCMPEQ,
+  VEC_BCAST_PABSB,
+  VEC_BCAST_PADDB,
+  VEC_BCAST_PSRLW,
+  VEC_BCAST_PSRLD,
+  VEC_BCAST_PSLLW,
+  VEC_BCAST_PSLLD
+};
+
+struct ix86_vec_bcast_map_simode_t
+{
+  unsigned int key;
+  enum ix86_vec_bcast_alg alg;
+  unsigned int arg;
+};
+
+/* This table must be kept sorted as values are looked-up using bsearch.  */
+static const ix86_vec_bcast_map_simode_t ix86_vec_bcast_map_simode[] = {
+  { 0x00000000, VEC_BCAST_PXOR,    0 },
+  { 0x00000001, VEC_BCAST_PSRLD,  31 },
+  { 0x00000003, VEC_BCAST_PSRLD,  30 },
+  { 0x00000007, VEC_BCAST_PSRLD,  29 },
+  { 0x0000000f, VEC_BCAST_PSRLD,  28 },
+  { 0x0000001f, VEC_BCAST_PSRLD,  27 },
+  { 0x0000003f, VEC_BCAST_PSRLD,  26 },
+  { 0x0000007f, VEC_BCAST_PSRLD,  25 },
+  { 0x000000ff, VEC_BCAST_PSRLD,  24 },
+  { 0x000001ff, VEC_BCAST_PSRLD,  23 },
+  { 0x000003ff, VEC_BCAST_PSRLD,  22 },
+  { 0x000007ff, VEC_BCAST_PSRLD,  21 },
+  { 0x00000fff, VEC_BCAST_PSRLD,  20 },
+  { 0x00001fff, VEC_BCAST_PSRLD,  19 },
+  { 0x00003fff, VEC_BCAST_PSRLD,  18 },
+  { 0x00007fff, VEC_BCAST_PSRLD,  17 },
+  { 0x0000ffff, VEC_BCAST_PSRLD,  16 },
+  { 0x00010001, VEC_BCAST_PSRLW,  15 },
+  { 0x0001ffff, VEC_BCAST_PSRLD,  15 },
+  { 0x00030003, VEC_BCAST_PSRLW,  14 },
+  { 0x0003ffff, VEC_BCAST_PSRLD,  14 },
+  { 0x00070007, VEC_BCAST_PSRLW,  13 },
+  { 0x0007ffff, VEC_BCAST_PSRLD,  13 },
+  { 0x000f000f, VEC_BCAST_PSRLW,  12 },
+  { 0x000fffff, VEC_BCAST_PSRLD,  12 },
+  { 0x001f001f, VEC_BCAST_PSRLW,  11 },
+  { 0x001fffff, VEC_BCAST_PSRLD,  11 },
+  { 0x003f003f, VEC_BCAST_PSRLW,  10 },
+  { 0x003fffff, VEC_BCAST_PSRLD,  10 },
+  { 0x007f007f, VEC_BCAST_PSRLW,   9 },
+  { 0x007fffff, VEC_BCAST_PSRLD,   9 },
+  { 0x00ff00ff, VEC_BCAST_PSRLW,   8 },
+  { 0x00ffffff, VEC_BCAST_PSRLD,   8 },
+  { 0x01010101, VEC_BCAST_PABSB,   0 },
+  { 0x01ff01ff, VEC_BCAST_PSRLW,   7 },
+  { 0x01ffffff, VEC_BCAST_PSRLD,   7 },
+  { 0x03ff03ff, VEC_BCAST_PSRLW,   6 },
+  { 0x03ffffff, VEC_BCAST_PSRLD,   6 },
+  { 0x07ff07ff, VEC_BCAST_PSRLW,   5 },
+  { 0x07ffffff, VEC_BCAST_PSRLD,   5 },
+  { 0x0fff0fff, VEC_BCAST_PSRLW,   4 },
+  { 0x0fffffff, VEC_BCAST_PSRLD,   4 },
+  { 0x1fff1fff, VEC_BCAST_PSRLW,   3 },
+  { 0x1fffffff, VEC_BCAST_PSRLD,   3 },
+  { 0x3fff3fff, VEC_BCAST_PSRLW,   2 },
+  { 0x3fffffff, VEC_BCAST_PSRLD,   2 },
+  { 0x7fff7fff, VEC_BCAST_PSRLW,   1 },
+  { 0x7fffffff, VEC_BCAST_PSRLD,   1 },
+  { 0x80000000, VEC_BCAST_PSLLD,  31 },
+  { 0x80008000, VEC_BCAST_PSLLW,  15 },
+  { 0xc0000000, VEC_BCAST_PSLLD,  30 },
+  { 0xc000c000, VEC_BCAST_PSLLW,  14 },
+  { 0xe0000000, VEC_BCAST_PSLLD,  29 },
+  { 0xe000e000, VEC_BCAST_PSLLW,  13 },
+  { 0xf0000000, VEC_BCAST_PSLLD,  28 },
+  { 0xf000f000, VEC_BCAST_PSLLW,  12 },
+  { 0xf8000000, VEC_BCAST_PSLLD,  27 },
+  { 0xf800f800, VEC_BCAST_PSLLW,  11 },
+  { 0xfc000000, VEC_BCAST_PSLLD,  26 },
+  { 0xfc00fc00, VEC_BCAST_PSLLW,  10 },
+  { 0xfe000000, VEC_BCAST_PSLLD,  25 },
+  { 0xfe00fe00, VEC_BCAST_PSLLW,   9 },
+  { 0xfefefefe, VEC_BCAST_PADDB,   0 },
+  { 0xff000000, VEC_BCAST_PSLLD,  24 },
+  { 0xff00ff00, VEC_BCAST_PSLLW,   8 },
+  { 0xff800000, VEC_BCAST_PSLLD,  23 },
+  { 0xff80ff80, VEC_BCAST_PSLLW,   7 },
+  { 0xffc00000, VEC_BCAST_PSLLD,  22 },
+  { 0xffc0ffc0, VEC_BCAST_PSLLW,   6 },
+  { 0xffe00000, VEC_BCAST_PSLLD,  21 },
+  { 0xffe0ffe0, VEC_BCAST_PSLLW,   5 },
+  { 0xfff00000, VEC_BCAST_PSLLD,  20 },
+  { 0xfff0fff0, VEC_BCAST_PSLLW,   4 },
+  { 0xfff80000, VEC_BCAST_PSLLD,  19 },
+  { 0xfff8fff8, VEC_BCAST_PSLLW,   3 },
+  { 0xfffc0000, VEC_BCAST_PSLLD,  18 },
+  { 0xfffcfffc, VEC_BCAST_PSLLW,   2 },
+  { 0xfffe0000, VEC_BCAST_PSLLD,  17 },
+  { 0xfffefffe, VEC_BCAST_PSLLW,   1 },
+  { 0xffff0000, VEC_BCAST_PSLLD,  16 },
+  { 0xffff8000, VEC_BCAST_PSLLD,  15 },
+  { 0xffffc000, VEC_BCAST_PSLLD,  14 },
+  { 0xffffe000, VEC_BCAST_PSLLD,  13 },
+  { 0xfffff000, VEC_BCAST_PSLLD,  12 },
+  { 0xfffff800, VEC_BCAST_PSLLD,  11 },
+  { 0xfffffc00, VEC_BCAST_PSLLD,  10 },
+  { 0xfffffe00, VEC_BCAST_PSLLD,   9 },
+  { 0xffffff00, VEC_BCAST_PSLLD,   8 },
+  { 0xffffff80, VEC_BCAST_PSLLD,   7 },
+  { 0xffffffc0, VEC_BCAST_PSLLD,   6 },
+  { 0xffffffe0, VEC_BCAST_PSLLD,   5 },
+  { 0xfffffff0, VEC_BCAST_PSLLD,   4 },
+  { 0xfffffff8, VEC_BCAST_PSLLD,   3 },
+  { 0xfffffffc, VEC_BCAST_PSLLD,   2 },
+  { 0xfffffffe, VEC_BCAST_PSLLD,   1 },
+  { 0xffffffff, VEC_BCAST_PCMPEQ,  0 }
+};
+
+/* Comparator for bsearch on ix86_vec_bcast_map.  */
+static int 
+ix86_vec_bcast_map_simode_cmp (const void *key, const void *entry)
+{
+  return (*(const unsigned int*)key)
+        - ((const ix86_vec_bcast_map_simode_t*)entry)->key;
+}
+
+/* A subroutine of ix86_vector_duplicate_value.  Tries to efficiently
+   materialize V4SImode, V8SImode and V16SImode vectors from SImode
+   integer constants.  */
+static bool
+ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
+                                   unsigned int val)
+{
+  const ix86_vec_bcast_map_simode_t *entry;
+  rtx tmp1, tmp2;
+
+  entry = (const ix86_vec_bcast_map_simode_t*)
+         bsearch(&val, ix86_vec_bcast_map_simode,
+                 ARRAY_SIZE (ix86_vec_bcast_map_simode),
+                 sizeof (ix86_vec_bcast_map_simode_t),
+                 ix86_vec_bcast_map_simode_cmp);
+  if (!entry)
+    return false;
+
+  switch (entry->alg)
+    {
+    case VEC_BCAST_PXOR:
+      if ((mode == V8SImode && !TARGET_AVX2)
+         || (mode == V16SImode && !(TARGET_AVX512F && TARGET_EVEX512)))
+       return false;
+      emit_move_insn (target, CONST0_RTX (mode));
+      return true;
+
+    case VEC_BCAST_PCMPEQ:
+      if ((mode == V4SImode && !TARGET_SSE2)
+         || (mode == V8SImode && !TARGET_AVX2)
+         || (mode == V16SImode && !(TARGET_AVX512F && TARGET_EVEX512)))
+       return false;
+      emit_move_insn (target, CONSTM1_RTX (mode));
+      return true;
+
+    case VEC_BCAST_PABSB:
+      if (mode == V4SImode && TARGET_SSE2)
+       {
+         tmp1 = gen_reg_rtx (V16QImode);
+         emit_move_insn (tmp1, CONSTM1_RTX (V16QImode));
+         tmp2 = gen_reg_rtx (V16QImode);
+         emit_insn (gen_absv16qi2 (tmp2, tmp1));
+       }
+      else if (mode == V8SImode && TARGET_AVX2)
+       {
+         tmp1 = gen_reg_rtx (V32QImode);
+         emit_move_insn (tmp1, CONSTM1_RTX (V32QImode));
+         tmp2 = gen_reg_rtx (V32QImode);
+         emit_insn (gen_absv32qi2 (tmp2, tmp1));
+       }
+      else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
+       {
+         tmp1 = gen_reg_rtx (V64QImode);
+         emit_move_insn (tmp1, CONSTM1_RTX (V64QImode));
+         tmp2 = gen_reg_rtx (V64QImode);
+         emit_insn (gen_absv64qi2 (tmp2, tmp1));
+       }
+      else
+       return false;
+      break;
+
+    case VEC_BCAST_PADDB:
+      if (mode == V4SImode && TARGET_SSE2)
+       {
+         tmp1 = gen_reg_rtx (V16QImode);
+         emit_move_insn (tmp1, CONSTM1_RTX (V16QImode));
+         tmp2 = gen_reg_rtx (V16QImode);
+         emit_insn (gen_addv16qi3 (tmp2, tmp1, tmp1));
+       }
+      else if (mode == V8SImode && TARGET_AVX2)
+       {
+         tmp1 = gen_reg_rtx (V32QImode);
+         emit_move_insn (tmp1, CONSTM1_RTX (V32QImode));
+         tmp2 = gen_reg_rtx (V32QImode);
+         emit_insn (gen_addv32qi3 (tmp2, tmp1, tmp1));
+       }
+      else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
+       {
+         tmp1 = gen_reg_rtx (V64QImode);
+         emit_move_insn (tmp1, CONSTM1_RTX (V64QImode));
+         tmp2 = gen_reg_rtx (V64QImode);
+         emit_insn (gen_addv64qi3 (tmp2, tmp1, tmp1));
+       }
+      else
+       return false;
+      break;
+
+    case VEC_BCAST_PSRLW:
+      if (mode == V4SImode && TARGET_SSE2)
+       {
+         tmp1 = gen_reg_rtx (V8HImode);
+         emit_move_insn (tmp1, CONSTM1_RTX (V8HImode));
+         tmp2 = gen_reg_rtx (V8HImode);
+         emit_insn (gen_lshrv8hi3 (tmp2, tmp1, GEN_INT (entry->arg)));
+       }
+      else if (mode == V8SImode && TARGET_AVX2)
+       {
+         tmp1 = gen_reg_rtx (V16HImode);
+         emit_move_insn (tmp1, CONSTM1_RTX (V16HImode));
+         tmp2 = gen_reg_rtx (V16HImode);
+         emit_insn (gen_lshrv16hi3 (tmp2, tmp1, GEN_INT (entry->arg)));
+       }
+      else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
+       {
+         tmp1 = gen_reg_rtx (V32HImode);
+         emit_move_insn (tmp1, CONSTM1_RTX (V32HImode));
+         tmp2 = gen_reg_rtx (V32HImode);
+         emit_insn (gen_lshrv32hi3 (tmp2, tmp1, GEN_INT (entry->arg)));
+       }
+      else
+       return false;
+      break;
+
+    case VEC_BCAST_PSRLD:
+      if (mode == V4SImode && TARGET_SSE2)
+       {
+         tmp1 = gen_reg_rtx (V4SImode);
+         emit_move_insn (tmp1, CONSTM1_RTX (V4SImode));
+         emit_insn (gen_lshrv4si3 (target, tmp1, GEN_INT (entry->arg)));
+         return true;
+       }
+      else if (mode == V8SImode && TARGET_AVX2)
+       {
+         tmp1 = gen_reg_rtx (V8SImode);
+         emit_move_insn (tmp1, CONSTM1_RTX (V8SImode));
+         emit_insn (gen_lshrv8si3 (target, tmp1, GEN_INT (entry->arg)));
+         return true;
+       }
+      else if (mode == V16SImode && TARGET_AVX512F && TARGET_EVEX512)
+       {
+         tmp1 = gen_reg_rtx (V16SImode);
+         emit_move_insn (tmp1, CONSTM1_RTX (V16SImode));
+         emit_insn (gen_lshrv16si3 (target, tmp1, GEN_INT (entry->arg)));
+         return true;
+       }
+      else
+       return false;
+      break;
+
+    case VEC_BCAST_PSLLW:
+      if (mode == V4SImode && TARGET_SSE2)
+       {
+         tmp1 = gen_reg_rtx (V8HImode);
+         emit_move_insn (tmp1, CONSTM1_RTX (V8HImode));
+         tmp2 = gen_reg_rtx (V8HImode);
+         emit_insn (gen_ashlv8hi3 (tmp2, tmp1, GEN_INT (entry->arg)));
+       }
+      else if (mode == V8SImode && TARGET_AVX2)
+       {
+         tmp1 = gen_reg_rtx (V16HImode);
+         emit_move_insn (tmp1, CONSTM1_RTX (V16HImode));
+         tmp2 = gen_reg_rtx (V16HImode);
+         emit_insn (gen_ashlv16hi3 (tmp2, tmp1, GEN_INT (entry->arg)));
+       }
+      else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
+       {
+         tmp1 = gen_reg_rtx (V32HImode);
+         emit_move_insn (tmp1, CONSTM1_RTX (V32HImode));
+         tmp2 = gen_reg_rtx (V32HImode);
+         emit_insn (gen_ashlv32hi3 (tmp2, tmp1, GEN_INT (entry->arg)));
+       }
+      else
+       return false;
+      break;
+
+    case VEC_BCAST_PSLLD:
+      if (mode == V4SImode && TARGET_SSE2)
+       {
+         tmp1 = gen_reg_rtx (V4SImode);
+         emit_move_insn (tmp1, CONSTM1_RTX (V4SImode));
+         emit_insn (gen_ashlv4si3 (target, tmp1, GEN_INT (entry->arg)));
+         return true;
+       }
+      else if (mode == V8SImode && TARGET_AVX2)
+       {
+         tmp1 = gen_reg_rtx (V8SImode);
+         emit_move_insn (tmp1, CONSTM1_RTX (V8SImode));
+         emit_insn (gen_ashlv8si3 (target, tmp1, GEN_INT (entry->arg)));
+         return true;
+       }
+      else if (mode == V16SImode && TARGET_AVX512F && TARGET_EVEX512)
+       {
+         tmp1 = gen_reg_rtx (V16SImode);
+         emit_move_insn (tmp1, CONSTM1_RTX (V16SImode));
+         emit_insn (gen_ashlv16si3 (target, tmp1, GEN_INT (entry->arg)));
+         return true;
+       }
+      else
+       return false;
+
+    default:
+      return false;
+    }
+
+  emit_move_insn (target, gen_lowpart (mode, tmp2));
+  return true;
+}
+
 /* A subroutine of ix86_expand_vector_init_duplicate.  Tries to
    fill target with val via vec_duplicate.  */
 
@@ -15705,6 +16031,12 @@ ix86_vector_duplicate_value (machine_mode mode, rtx 
target, rtx val)
   bool ok;
   rtx_insn *insn;
   rtx dup;
+
+  if ((mode == V4SImode || mode == V8SImode || mode == V16SImode)
+      && CONST_INT_P (val)
+      && ix86_vector_duplicate_simode_const (mode, target, INTVAL (val)))
+    return true;
+
   /* Save/restore recog_data in case this is called from splitters
      or other routines where recog_data needs to stay valid across
      force_reg.  See PR106577.  */
@@ -15801,6 +16133,24 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, 
machine_mode mode,
        }
       return ix86_vector_duplicate_value (mode, target, val);
 
+    case E_V8DImode:
+      if (CONST_INT_P (val))
+       {
+         int tmp = (int)INTVAL (val);
+         if (tmp == (int)(INTVAL (val) >> 32))
+           {
+             rtx reg = gen_reg_rtx (V16SImode);
+             ok = ix86_vector_duplicate_value (V16SImode, reg,
+                                               GEN_INT (tmp));
+             if (ok)
+               {
+                 emit_move_insn (target, gen_lowpart (V8DImode, reg));
+                 return true;
+               }
+           }
+       }
+      return ix86_vector_duplicate_value (mode, target, val);
+
     case E_V2SImode:
     case E_V2SFmode:
       if (!mmx_ok)
@@ -15814,7 +16164,6 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, 
machine_mode mode,
     case E_V4SFmode:
     case E_V4SImode:
     case E_V16SImode:
-    case E_V8DImode:
     case E_V16SFmode:
     case E_V8DFmode:
       return ix86_vector_duplicate_value (mode, target, val);
@@ -16019,9 +16368,13 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, 
machine_mode mode,
       return true;
 
     case E_V32HImode:
+    case E_V64QImode:
+      if (CONST_INT_P (val))
+       goto widen;
+      /* FALLTHRU */
+
     case E_V32HFmode:
     case E_V32BFmode:
-    case E_V64QImode:
       gcc_assert (TARGET_EVEX512);
       if (TARGET_AVX512BW)
        return ix86_vector_duplicate_value (mode, target, val);
@@ -17021,6 +17374,13 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx 
vals)
        all_same = false;
     }
 
+  /* Handle the zero vector as special case.  */
+  if (n_var == 0 && all_const_zero)
+    {
+      emit_move_insn (target, CONST0_RTX (mode));
+      return;
+    }
+
   /* If all values are identical, broadcast the value.  */
   if (all_same
       && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 4d6b2b98761..e67e5f62533 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22134,6 +22134,8 @@ ix86_rtx_costs (rtx x, machine_mode mode, int 
outer_code_i, int opno,
        *total = cost->fabs;
       else if (FLOAT_MODE_P (mode))
        *total = ix86_vec_cost (mode, cost->sse_op);
+      else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+       *total = cost->sse_op;
       return false;
 
     case SQRT:
diff --git a/gcc/testsuite/gcc.target/i386/auto-init-8.c 
b/gcc/testsuite/gcc.target/i386/auto-init-8.c
index 7023d72a4e8..666ee14d2bc 100644
--- a/gcc/testsuite/gcc.target/i386/auto-init-8.c
+++ b/gcc/testsuite/gcc.target/i386/auto-init-8.c
@@ -29,7 +29,7 @@ double foo()
   return result;
 }
 
-/* { dg-final { scan-rtl-dump-times "0xfffffffffefefefe" 3 "expand" } } */
+/* { dg-final { scan-rtl-dump-times "0xfffffffffefefefe" 1 "expand" } } */
 /* { dg-final { scan-rtl-dump-times "\\\[0xfefefefefefefefe\\\]" 2 "expand" } 
} */
 /* { dg-final { scan-rtl-dump-times "0xfffffffffffffffe\\\]\\\) repeated x16" 
2 "expand" } } */
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-13.c 
b/gcc/testsuite/gcc.target/i386/avx512fp16-13.c
index f431b8a88b0..1cd9a07b835 100644
--- a/gcc/testsuite/gcc.target/i386/avx512fp16-13.c
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-13.c
@@ -116,7 +116,6 @@ abs512_ph (__m512h a)
   return _mm512_abs_ph (a);
 }
 
-/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%zmm\[0-9\]+" 1 { 
target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpandd\[^\n\]*%zmm\[0-9\]+" 1 } } */
 
 __m256h
@@ -126,7 +125,6 @@ abs256_ph (__m256h a)
   return _mm256_abs_ph (a);
 }
 
-/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%ymm\[0-9\]+" 1 { 
target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpand\[^\n\]*%ymm\[0-9\]+" 1 } } */
 
 __m128h
@@ -136,5 +134,4 @@ abs_ph (__m128h a)
   return _mm_abs_ph (a);
 }
 
-/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%xmm\[0-9\]+" 1 { 
target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpand\[^\n\]*%xmm\[0-9\]+" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-9a.c 
b/gcc/testsuite/gcc.target/i386/pr100865-9a.c
index f2ac1bd94ea..91cfeda64bc 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-9a.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-9a.c
@@ -18,7 +18,7 @@ foo (void)
 {
   int i;
   for (i = 0; i < sizeof (array) / sizeof (array[0]); i++)
-    array[i] = MK_CONST128_BROADCAST (0x1fff);
+    array[i] = MK_CONST128_BROADCAST (0x1234);
 }
 
 /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, 
%xmm\[0-9\]+" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr101796-1.c 
b/gcc/testsuite/gcc.target/i386/pr101796-1.c
index b25464ddb87..09532f9a43a 100644
--- a/gcc/testsuite/gcc.target/i386/pr101796-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr101796-1.c
@@ -15,7 +15,7 @@ volatile __m512i a, b;
 void
 foo()
 {
-  b = _mm512_srlv_epi16 (a, _mm512_set1_epi16 (3));
-  b = _mm512_sllv_epi16 (a, _mm512_set1_epi16 (4));
-  b = _mm512_srav_epi16 (a, _mm512_set1_epi16 (5));
+  b = _mm512_srlv_epi16 (a, _mm512_set1_epi16 (4));
+  b = _mm512_sllv_epi16 (a, _mm512_set1_epi16 (5));
+  b = _mm512_srav_epi16 (a, _mm512_set1_epi16 (6));
 }
diff --git a/gcc/testsuite/gcc.target/i386/pr106060-1.c 
b/gcc/testsuite/gcc.target/i386/pr106060-1.c
new file mode 100644
index 00000000000..a734d5627b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr106060-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=x86-64-v3" } */
+#include <immintrin.h>
+
+__m256i
+foo ()
+{
+  /* shouldnt_have_movabs */
+  return _mm256_set1_epi8 (123);
+}
+
+/* { dg-final { scan-assembler-not "movabs" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr106060-2.c 
b/gcc/testsuite/gcc.target/i386/pr106060-2.c
new file mode 100644
index 00000000000..23933aba978
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr106060-2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=x86-64-v3" } */
+#include <immintrin.h>
+
+__m256i
+foo ()
+{
+  /* should_be_cmpeq_abs */
+  return _mm256_set1_epi8 (1);
+}
+
+/* { dg-final { scan-assembler "pcmpeq" } } */
+/* { dg-final { scan-assembler "pabsb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr106060-3.c 
b/gcc/testsuite/gcc.target/i386/pr106060-3.c
new file mode 100644
index 00000000000..59c128cf923
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr106060-3.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=x86-64-v3" } */
+#include <immintrin.h>
+
+__m256i
+foo ()
+{
+  /* should_be_cmpeq_add */
+  return _mm256_set1_epi8 (-2);
+}
+
+/* { dg-final { scan-assembler "pcmpeq" } } */
+/* { dg-final { scan-assembler "paddb" } } */
+
diff --git a/gcc/testsuite/gcc.target/i386/pr70314.c 
b/gcc/testsuite/gcc.target/i386/pr70314.c
index aad8dd9b57e..181d2b4b9e2 100644
--- a/gcc/testsuite/gcc.target/i386/pr70314.c
+++ b/gcc/testsuite/gcc.target/i386/pr70314.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-march=skylake-avx512 -O2" } */
-/* { dg-final { scan-assembler-times "cmp" 2 } } */
+/* { dg-final { scan-assembler-times "cmp\[dq\]" 2 } } */
 /* { dg-final { scan-assembler-not "and" } } */
 
 typedef long vec __attribute__((vector_size(16)));
diff --git a/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c 
b/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c
index c6a63903604..b7e45c2e879 100644
--- a/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c
+++ b/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c
@@ -28,7 +28,7 @@ __vu srl_c (__vu a)
   return a >> 5;
 }
 
-/* { dg-final { scan-assembler-times "psrlw" 2 } } */
+/* { dg-final { scan-assembler-times "psrlw" 5 } } */
 
 __vi sra (__vi a, int n)
 {
diff --git a/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c 
b/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c
index 244b0dbd28a..2471e6ed17d 100644
--- a/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c
+++ b/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c
@@ -28,7 +28,7 @@ __vu srl_c (__vu a)
   return a >> 5;
 }
 
-/* { dg-final { scan-assembler-times "psrlw" 2 } } */
+/* { dg-final { scan-assembler-times "psrlw" 5 } } */
 
 __vi sra (__vi a, int n)
 {

Reply via email to