Here is the v3 patch to reject CONST_VECTOR in REG_EQUAL note
when the CONST_VECTOR mode isn't the same as the dest mode:

(gdb) call debug (insn)
(insn 7 5 12 2 (set (subreg:V8SI (reg:V4DI 100) 0)
        (vec_duplicate:V8SI (reg:SI 102)))
"/export/gnu/import/git/gitlab/x86-gcc-test/gcc/testsuite/gcc.target/i386/pr40957.c":16:10
discrim 2 9361 {vec_dupv8si}
     (expr_list:REG_DEAD (reg:SI 102)
        (expr_list:REG_EQUAL (const_vector:V4DI [
                    (const_int -1 [0xffffffffffffffff]) repeated x4
                ])
            (nil))))
(gdb)

It happens since V4DI CONST_VECTOR is supported in AVX2, not AVX.

Tested on Linux/x86-64.

-- 
H.J.
---
Add X86_CSE_CONST_VECTOR for native CONST_VECTOR:

(insn 25 23 234 4 (set (reg:V16QI 135)
        (const_vector:V16QI [
                (const_int -1 [0xffffffffffffffff]) repeated x16
            ])) "bar-2.c":10:16 discrim 67584 2453 {movv16qi_internal}
     (nil))

and constant integer load:

(insn 280 8 279 2 (set (subreg:HI (reg:V2QI 172) 0)
        (const_int -1 [0xffffffffffffffff])) -1
     (nil))
...
(insn 110 39 194 9 (set (reg:V2QI 147)
        (reg:V2QI 172)) 2089 {*movv2qi_internal}
     (expr_list:REG_EQUAL (const_vector:V2QI [
                (const_int -1 [0xffffffffffffffff]) repeated x2
            ])
        (nil)))

converted from

(insn 111 87 121 18 (set (reg:V2QI 147)
        (mem/u/c:V2QI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0  S2 A16])) 2089
 {*movv2qi_internal}
     (expr_list:REG_EQUAL (const_vector:V2QI [
                (const_int -1 [0xffffffffffffffff]) repeated x2])
        (nil)))

1. Use CONST_VECTOR in REG_EQUAL note to avoid DF chain.
2. Keep constant integer load when crossing a function call since it is
faster than save and restore an integer register.
3. Convert CONST_VECTOR load no larger than integer register to constant
integer load even if there is no redundant CONST_VECTOR load.

gcc/

PR target/125100
* config/i386/i386-features.cc (x86_cse_kind): Add
X86_CSE_CONST_VECTOR.
(redundant_pattern): Add dest_mode.
(ix86_place_single_vector_set): Handle X86_CSE_CONST_VECTOR.
Generate SUBREG for constant integer source.
(ix86_broadcast_inner): Add an INSN argument.  Use CONST_VECTOR
in REG_EQUAL note.  Set load kind to X86_CSE_CONST_VECTOR for
native and converted CONST_VECTORs.  Return CONST_VECTOR if it
can be converted to constant integer load.
(pass_x86_cse::candidate_vector_p): Add an INSN argument and
pass the insn to ix86_broadcast_inner.
(pass_x86_cse::x86_cse): Add a basic block bitmap for calls.
Pass the insn to candidate_vector_p.  Handle X86_CSE_CONST_VECTOR.
Set dest_mode.  Keep constant integer load when crossing a
function call.  Convert CONST_VECTOR load no larger than integer
register to constant integer load even if there are no redundant
CONST_VECTOR loads.

gcc/testsuite/

PR target/125100
* gcc.target/i386/pr125100-1.c: New test.
* gcc.target/i386/pr125100-2.c: Likewise.
* gcc.target/i386/pr125100-3.c: Likewise.
* gcc.target/i386/pr125100-4.c: Likewise.
From f5f8c1a8dc43ee8d29d118a2a46c6ab9e560998f Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <[email protected]>
Date: Thu, 30 Apr 2026 09:21:27 +0800
Subject: [PATCH v3] x86_cse: Add X86_CSE_CONST_VECTOR

Add X86_CSE_CONST_VECTOR for native CONST_VECTOR:

(insn 25 23 234 4 (set (reg:V16QI 135)
        (const_vector:V16QI [
                (const_int -1 [0xffffffffffffffff]) repeated x16
            ])) "bar-2.c":10:16 discrim 67584 2453 {movv16qi_internal}
     (nil))

and constant integer load:

(insn 280 8 279 2 (set (subreg:HI (reg:V2QI 172) 0)
        (const_int -1 [0xffffffffffffffff])) -1
     (nil))
...
(insn 110 39 194 9 (set (reg:V2QI 147)
        (reg:V2QI 172)) 2089 {*movv2qi_internal}
     (expr_list:REG_EQUAL (const_vector:V2QI [
                (const_int -1 [0xffffffffffffffff]) repeated x2
            ])
        (nil)))

converted from

(insn 111 87 121 18 (set (reg:V2QI 147)
        (mem/u/c:V2QI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0  S2 A16])) 2089 {*movv2qi_internal}
     (expr_list:REG_EQUAL (const_vector:V2QI [
                (const_int -1 [0xffffffffffffffff]) repeated x2])
        (nil)))

1. Use CONST_VECTOR in REG_EQUAL note to avoid DF chain.
2. Keep constant integer load when crossing a function call since it is
faster than save and restore an integer register.
3. Convert CONST_VECTOR load no larger than integer register to constant
integer load even if there is no redundant CONST_VECTOR load.

Tested on Linux/x86-64.

gcc/

	PR target/125100
	* config/i386/i386-features.cc (x86_cse_kind): Add
	X86_CSE_CONST_VECTOR.
	(redundant_pattern): Add dest_mode.
	(ix86_place_single_vector_set): Handle X86_CSE_CONST_VECTOR.
	Generate SUBREG for constant integer source.
	(ix86_broadcast_inner): Add an INSN argument.  Use CONST_VECTOR
	in REG_EQUAL note.  Set load kind to X86_CSE_CONST_VECTOR for
	native and converted CONST_VECTORs.  Return CONST_VECTOR if it
	can be converted to constant integer load.
	(pass_x86_cse::candidate_vector_p): Add an INSN argument and
	pass the insn to ix86_broadcast_inner.
	(pass_x86_cse::x86_cse): Add a basic block bitmap for calls.
	Pass the insn to candidate_vector_p.  Handle X86_CSE_CONST_VECTOR.
	Set dest_mode.  Keep constant integer load when crossing a
	function call.  Convert CONST_VECTOR load no larger than integer
	register to constant integer load even if there are no redundant
	CONST_VECTOR loads.

gcc/testsuite/

	PR target/125100
	* gcc.target/i386/pr125100-1.c: New test.
	* gcc.target/i386/pr125100-2.c: Likewise.
	* gcc.target/i386/pr125100-3.c: Likewise.
	* gcc.target/i386/pr125100-4.c: Likewise.

Signed-off-by: H.J. Lu <[email protected]>
---
 gcc/config/i386/i386-features.cc           | 283 +++++++++++++++++----
 gcc/testsuite/gcc.target/i386/pr125100-1.c |  20 ++
 gcc/testsuite/gcc.target/i386/pr125100-2.c |  18 ++
 gcc/testsuite/gcc.target/i386/pr125100-3.c |  18 ++
 gcc/testsuite/gcc.target/i386/pr125100-4.c |  24 ++
 5 files changed, 310 insertions(+), 53 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr125100-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr125100-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr125100-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr125100-4.c

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index ce5f0e9c178..a1411d2b1b1 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -3222,6 +3222,7 @@ enum x86_cse_kind
 {
   X86_CSE_CONST0_VECTOR,
   X86_CSE_CONSTM1_VECTOR,
+  X86_CSE_CONST_VECTOR,
   X86_CSE_VEC_DUP,
   X86_CSE_TLS_GD,
   X86_CSE_TLS_LD_BASE,
@@ -3240,6 +3241,9 @@ struct redundant_pattern
   rtx tlsdesc_val;
   /* The inner scalar mode.  */
   machine_mode mode;
+  /* The destination mode which can be changed to the integer mode of
+     the same time.  */
+  machine_mode dest_mode;
   /* The instruction which sets the inner scalar.  Nullptr if the inner
      scalar is applied to the whole function, instead of within the same
      block.  */
@@ -3271,9 +3275,11 @@ ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs,
 			      redundant_pattern *load = nullptr)
 {
   basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
-  /* For X86_CSE_VEC_DUP, don't place the vector set outside of the loop
-     to avoid extra spills.  */
-  if (!load || load->kind != X86_CSE_VEC_DUP)
+  /* For X86_CSE_VEC_DUP and X86_CSE_CONST_VECTOR, don't place the vector
+     set outside of the loop to avoid extra spills.  */
+  if (!load
+      || (load->kind != X86_CSE_VEC_DUP
+	  && load->kind != X86_CSE_CONST_VECTOR))
     {
       while (bb->loop_father->latch
 	     != EXIT_BLOCK_PTR_FOR_FN (cfun))
@@ -3281,6 +3287,8 @@ ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs,
 				      bb->loop_father->header);
     }
 
+  if (CONST_INT_P (src))
+    dest = gen_rtx_SUBREG (load->dest_mode, dest, 0);
   rtx set = gen_rtx_SET (dest, src);
 
   rtx_insn *insn = BB_HEAD (bb);
@@ -3321,10 +3329,7 @@ ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs,
 	}
     }
 
-  /* NB: CONST_VECTOR load is generated and handled in x86_cse.  */
-  if (load
-      && !CONST_VECTOR_P (src)
-      && load->kind == X86_CSE_VEC_DUP)
+  if (load && load->kind == X86_CSE_VEC_DUP)
     {
       /* Get the source from LOAD as (reg:SI 99) in
 
@@ -3729,55 +3734,175 @@ ix86_broadcast_inner (rtx op, machine_mode mode,
   if (nunits < 2)
     return nullptr;
 
-  *kind_p = X86_CSE_VEC_DUP;
-
-  rtx reg;
-  if (GET_CODE (op) == VEC_DUPLICATE)
+  bool const_vector_p = CONST_VECTOR_P (op);
+  bool duplicated = GET_CODE (op) == VEC_DUPLICATE;
+  rtx orig_op = op;
+  if (!const_vector_p)
     {
-      /* Only
-	  (vec_duplicate:V4SI (reg:SI 99))
-	  (vec_duplicate:V2DF (mem/u/c:DF (symbol_ref/u:DI ("*.LC1") [flags 0x2]) [0  S8 A64]))
-	 are supported.  Set OP to the broadcast source by default.  */
-      op = XEXP (op, 0);
-      reg = op;
-      if (SUBREG_P (op)
-	  && SUBREG_BYTE (op) == 0
-	  && !paradoxical_subreg_p (op))
-	reg = SUBREG_REG (op);
-      if (!REG_P (reg))
+      /* Check CONST_VECTOR in REG_EQUAL note.  */
+      rtx equal = find_reg_equal_equiv_note (*insn_p);
+      if (equal)
 	{
-	  if (MEM_P (op)
-	      && SYMBOL_REF_P (XEXP (op, 0))
-	      && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
+	  equal = XEXP (equal, 0);
+	  const_vector_p = CONST_VECTOR_P (equal);
+	  /* Use CONST_VECTOR in REG_EQUAL note.  */
+	  if (const_vector_p)
 	    {
-	      /* Handle constant broadcast from memory.  */
-	      *scalar_mode_p = GET_MODE_INNER (mode);
-	      *insn_p = nullptr;
-	      return op;
+	      /* Handle REG_EQUAL note in:
+
+		 (insn 7 5 12 2 (set (subreg:V8SI (reg:V4DI 100) 0)
+			(vec_duplicate:V8SI (reg:SI 102)))
+		    (expr_list:REG_DEAD (reg:SI 102)
+		       (expr_list:REG_EQUAL (const_vector:V4DI [
+			  (const_int -1 [0xffffffffffffffff]) repeated x4]) (nil))))
+
+		 NB: Don't treat it as CONST_VECTOR since EQUAL isn't
+		 supported by ISAs as in gcc.target/i386/pr40957.c.  */
+	      if (GET_MODE (equal) != mode)
+		const_vector_p = false;
+	      else
+		op = equal;
 	    }
-	  return nullptr;
 	}
     }
-  else if (CONST_VECTOR_P (op))
+
+  machine_mode inner_mode = GET_MODE_INNER (mode);
+
+  if (const_vector_p)
     {
+      bool int_load_p = GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
+      *kind_p = X86_CSE_CONST_VECTOR;
+      if (int_load_p)
+	{
+	  /* This CONST_VECTOR load can be converted to constant
+	     integer load.  */
+	  *scalar_mode_p = mode;
+	  *insn_p = nullptr;
+	  return op;
+	}
+
+      /* This CONST_VECTOR is wider than the integer register.  */
       rtx first = XVECEXP (op, 0, 0);
-      for (int i = 1; i < nunits; ++i)
+
+      if (duplicated)
+	{
+	  /* Check if CONST_VECTOR in REG_EQUAL note is duplicated in
+
+	     (insn 10 7 12 2 (set (reg:V8SI 128)
+		(vec_duplicate:V8SI (vec_select:V2SI (reg:V4SI 180)
+			(parallel [(const_int 0 [0])
+				   (const_int 1 [0x1])]))))
+		  (expr_list:REG_EQUAL (const_vector:V8SI [
+		    (const_int 0 [0])
+		    (const_int 34 [0x22])
+		    (const_int 0 [0])
+		    (const_int 34 [0x22])
+		    (const_int 0 [0])
+		    (const_int 34 [0x22])
+		    (const_int 0 [0])
+		    (const_int 34 [0x22])])(nil)))
+
+	   */
+
+	  bool duplicated_const_vector = true;
+	  for (int i = 1; i < nunits; ++i)
+	    {
+	      rtx tmp = XVECEXP (op, 0, i);
+	      if (!rtx_equal_p (tmp, first))
+		{
+		  duplicated_const_vector = false;
+		  break;
+		}
+	    }
+
+	  if (duplicated_const_vector)
+	    {
+	      bool const_double_p = CONST_DOUBLE_P (first);
+	      /* Force the floating point constant to memory.  */
+	      if (const_double_p)
+		first = validize_mem (force_const_mem (inner_mode, first));
+
+	      if (const_double_p || CONST_INT_P (first))
+		{
+		  /* Handle
+
+		     (insn 7 6 8 2 (set (reg:V4SF 99)
+			  (vec_duplicate:V4SF (mem/u/c:SF (symbol_ref/u:DI ("*.LC2") [flags 0x2]) [0  S4 A32])))
+			(expr_list:REG_EQUAL (const_vector:V4SF [
+			   (const_double:SF 3.4e+1 [0x0.88p+6]) repeated x4]) (nil)))
+
+		     and
+
+		     (insn 14 15 16 3 (set (reg:V4SI 116)
+			  (vec_duplicate:V4SI (reg:SI 117)))
+		       (expr_list:REG_EQUAL (const_vector:V4SI [
+			  (const_int 34 [0x22]) repeated x4]) (nil)))
+
+		   */
+		  *kind_p = X86_CSE_VEC_DUP;
+		  *insn_p = nullptr;
+		  *scalar_mode_p = inner_mode;
+		  return first;
+		}
+	    }
+
+	  op = orig_op;
+	}
+      else
 	{
-	  rtx tmp = XVECEXP (op, 0, i);
-	  /* Vector duplicate value.  */
-	  if (!rtx_equal_p (tmp, first))
+	  /* Only native CONST_VECTOR is allowed.  */
+	  if (orig_op != op)
 	    return nullptr;
+
+	  /* Check if VEC_DUPLICATE can be used.  */
+	  for (int i = 1; i < nunits; ++i)
+	    {
+	      rtx tmp = XVECEXP (op, 0, i);
+	      /* Vector duplicate value.  */
+	      if (!rtx_equal_p (tmp, first))
+		return nullptr;
+	    }
+
+	  /* Use the inner mode to handle
+	     (const_vector:V2QI [(const_int 0 [0]) repeated x2])
+	   */
+	  *scalar_mode_p = inner_mode;
+	  *insn_p = nullptr;
+	  return first;
 	}
-      /* Use the inner mode to handle
-	   (const_vector:V2QI [(const_int 0 [0]) repeated x2])
-       */
-      *scalar_mode_p = GET_MODE_INNER (mode);
-      *insn_p = nullptr;
-      return first;
     }
-  else
+
+  if (!duplicated)
     return nullptr;
 
+  *kind_p = X86_CSE_VEC_DUP;
+
+  /* Only
+
+     (vec_duplicate:V4SI (reg:SI 99))
+     (vec_duplicate:V2DF (mem/u/c:DF (symbol_ref/u:DI ("*.LC1") [flags 0x2]) [0 S8 A64]))
+
+     are supported.  Set OP to the broadcast source by default.  */
+  op = XEXP (op, 0);
+  rtx reg = op;
+  if (SUBREG_P (op)
+      && SUBREG_BYTE (op) == 0
+      && !paradoxical_subreg_p (op))
+    reg = SUBREG_REG (op);
+  if (!REG_P (reg))
+    {
+      if (MEM_P (op)
+	  && SYMBOL_REF_P (XEXP (op, 0))
+	  && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
+	{
+	  /* Handle constant broadcast from memory.  */
+	  *scalar_mode_p = inner_mode;
+	  *insn_p = nullptr;
+	  return op;
+	}
+      return nullptr;
+    }
+
   mode = GET_MODE (op);
 
   /* Only single def chain is supported.  */
@@ -4356,7 +4481,7 @@ private:
   unsigned int x86_cse (void);
   bool candidate_gnu_tls_p (rtx_insn *, attr_tls64);
   bool candidate_gnu2_tls_p (rtx, attr_tls64);
-  bool candidate_vector_p (rtx);
+  bool candidate_vector_p (rtx, rtx_insn *);
   rtx_insn *tls_set_insn_from_symbol (const_rtx, const_rtx);
 }; // class pass_x86_cse
 
@@ -4538,7 +4663,7 @@ pass_x86_cse::candidate_gnu2_tls_p (rtx set, attr_tls64 tls64)
   INSN is a vector broadcast instruction.  */
 
 bool
-pass_x86_cse::candidate_vector_p (rtx set)
+pass_x86_cse::candidate_vector_p (rtx set, rtx_insn *insn)
 {
   rtx src = SET_SRC (set);
   rtx dest = SET_DEST (set);
@@ -4551,6 +4676,7 @@ pass_x86_cse::candidate_vector_p (rtx set)
   if (!REG_P (dest) && !SUBREG_P (dest))
     return false;
 
+  def_insn = insn;
   val = ix86_broadcast_inner (src, mode, &scalar_mode, &kind,
 			      &def_insn);
   return val ? true : false;
@@ -4584,6 +4710,7 @@ pass_x86_cse::x86_cse (void)
   unsigned int i;
   auto_bitmap updated_gnu_tls_insns;
   auto_bitmap updated_gnu2_tls_insns;
+  auto_bitmap call_bbs;
 
   df_set_flags (DF_DEFER_INSN_RESCAN);
 
@@ -4601,13 +4728,19 @@ pass_x86_cse::x86_cse (void)
 	     them.  */
 	  unsigned int threshold = 2;
 
+	  bool call_p = CALL_P (insn);
 	  rtx set = single_set (insn);
-	  if (!set && !CALL_P (insn))
+	  if (!set && !call_p)
 	    continue;
 
 	  tlsdesc_val = nullptr;
 
 	  attr_tls64 tls64 = get_attr_tls64 (insn);
+
+	  /* NB: TLS calls preserve all registers.  */
+	  if (call_p && tls64 == TLS64_NONE)
+	    bitmap_set_bit (call_bbs, BLOCK_FOR_INSN (insn)->index);
+
 	  switch (tls64)
 	    {
 	    case TLS64_GD:
@@ -4633,7 +4766,7 @@ pass_x86_cse::x86_cse (void)
 		continue;
 
 	      /* Check for vector broadcast.  */
-	      if (candidate_vector_p (set))
+	      if (candidate_vector_p (set, insn))
 		break;
 	      continue;
 	    }
@@ -4644,7 +4777,8 @@ pass_x86_cse::x86_cse (void)
 		&& load->kind == kind
 		&& load->mode == scalar_mode
 		&& (load->bb == bb
-		    || kind != X86_CSE_VEC_DUP
+		    || (kind != X86_CSE_VEC_DUP
+			&& kind != X86_CSE_CONST_VECTOR)
 		    /* Non all 0s/1s vector load must be in the same
 		       basic block if it is in a recursive call.  */
 		    || !recursive_call_p)
@@ -4677,12 +4811,19 @@ pass_x86_cse::x86_cse (void)
 	     instruction basic block and the instruction kind.  */
 	  load = new redundant_pattern;
 
+	  /* Convert CONST_VECTOR load no larger than integer register
+	     to constant integer load even if there is no redundant
+	     CONST_VECTOR load.  */
+	  if (CONST_VECTOR_P (val))
+	    threshold = 1;
+
 	  load->val = copy_rtx (val);
 	  if (tlsdesc_val)
 	    load->tlsdesc_val = copy_rtx (tlsdesc_val);
 	  else
 	    load->tlsdesc_val = nullptr;
 	  load->mode = scalar_mode;
+	  load->dest_mode = mode;
 	  load->size = GET_MODE_SIZE (mode);
 	  load->def_insn = def_insn;
 	  load->count = 1;
@@ -4724,10 +4865,13 @@ pass_x86_cse::x86_cse (void)
 		    || load->size <= UNITS_PER_WORD))
 	      {
 		/* Generate CONST_VECTOR load.  */
+	      case X86_CSE_CONST_VECTOR:
 		mode = ix86_get_vector_cse_mode (load->size,
 						 load->mode);
 
-		if (load->val == CONST0_RTX (load->mode))
+		if (CONST_VECTOR_P (load->val))
+		  broadcast_source = load->val;
+		else if (load->val == CONST0_RTX (load->mode))
 		  broadcast_source = CONST0_RTX (mode);
 		else if (load->val == CONSTM1_RTX (load->mode))
 		  broadcast_source = CONSTM1_RTX (mode);
@@ -4757,15 +4901,46 @@ pass_x86_cse::x86_cse (void)
 		       */
 		    machine_mode int_mode
 		      = int_mode_for_mode (mode).require ();
+		    load->dest_mode = int_mode;
 		    broadcast_source = simplify_subreg (int_mode,
 							broadcast_source,
 							mode, 0);
 		    gcc_assert (broadcast_source != nullptr);
-		    replace_vector_const (mode, broadcast_source,
-					  load->insns, int_mode);
-		    /* Keep redundant constant integer load.  */
-		    load->broadcast_source = nullptr;
-		    load->broadcast_reg = nullptr;
+
+		    bool keep_const_int_load = false;
+		    if (!bitmap_empty_p (call_bbs))
+		      {
+			bitmap_iterator bi;
+			unsigned int id;
+			EXECUTE_IF_SET_IN_BITMAP (load->bbs, 0, id, bi)
+			  if (bitmap_bit_p (call_bbs, id))
+			    {
+			      /* NB: Constant integer load is faster
+				 than save and restore an integer
+				 register when crossing a function call.
+			       */
+			      keep_const_int_load = true;
+			      break;
+			    }
+		      }
+
+		    if (keep_const_int_load)
+		      {
+			/* Keep constant integer load.  */
+			replace_vector_const (mode, broadcast_source,
+					      load->insns, int_mode);
+			load->broadcast_source = nullptr;
+			load->broadcast_reg = nullptr;
+		      }
+		    else
+		      {
+			broadcast_reg = gen_reg_rtx (mode);
+			reg = gen_reg_rtx (load->mode);
+			replace_vector_const (mode, broadcast_reg,
+					      load->insns, load->mode);
+			load->broadcast_source = broadcast_source;
+			load->broadcast_reg = broadcast_reg;
+		      }
 		    break;
 		  }
 	      }
@@ -4796,6 +4971,7 @@ pass_x86_cse::x86_cse (void)
 		case X86_CSE_CONSTM1_VECTOR:
 		  broadcast_source = CONSTM1_RTX (mode);
 		  break;
+		case X86_CSE_CONST_VECTOR:
 		case X86_CSE_VEC_DUP:
 		  if (!broadcast_source)
 		    {
@@ -4887,6 +5063,7 @@ pass_x86_cse::x86_cse (void)
 					      updated_gnu_tls_insns,
 					      updated_gnu2_tls_insns);
 		  break;
+		case X86_CSE_CONST_VECTOR:
 		case X86_CSE_VEC_DUP:
 		  /* Keep redundant constant integer load.  */
 		  if (!load->broadcast_reg)
diff --git a/gcc/testsuite/gcc.target/i386/pr125100-1.c b/gcc/testsuite/gcc.target/i386/pr125100-1.c
new file mode 100644
index 00000000000..21765a5843e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr125100-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target fpic } } */
+/* { dg-options "-mtune=generic -O2 -fPIC" } */
+/* { dg-additional-options "-march=pentiumpro" { target ia32 } } */
+
+struct desc {
+  char c1;
+  char c2;
+};
+void
+foo (struct desc *list, int n, int l)
+{
+  int j;
+  for (j = 0; j < l; j++)
+    list[j].c1 = list[j].c2 = -1;
+  for (;j < n; j++)
+    list[j].c1 = list[j].c2 = -1;
+}
+
+/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$-1, %\[a-z0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-not "__x86.get_pc_thunk" { target ia32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr125100-2.c b/gcc/testsuite/gcc.target/i386/pr125100-2.c
new file mode 100644
index 00000000000..d179c3f4050
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr125100-2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target fpic } } */
+/* { dg-options "-mtune=generic -O2 -fPIC" } */
+/* { dg-additional-options "-march=pentiumpro" { target ia32 } } */
+
+struct desc {
+  char c1;
+  char c2;
+};
+void
+foo (struct desc *list, int n, int l)
+{
+  int j;
+  for (j = 0; j < l; j++)
+    list[j].c1 = list[j].c2 = -1;
+}
+
+/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$-1, %\[a-z0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-not "__x86.get_pc_thunk" { target ia32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr125100-3.c b/gcc/testsuite/gcc.target/i386/pr125100-3.c
new file mode 100644
index 00000000000..2015ee819b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr125100-3.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target fpic } } */
+/* { dg-options "-mtune=generic -O2 -fPIC" } */
+/* { dg-additional-options "-march=pentiumpro" { target ia32 } } */
+
+struct desc {
+  char c1;
+  char c2;
+};
+void
+foo (struct desc *list, int n, int l)
+{
+  int j;
+  for (j = 0; j < l; j++)
+    list[j].c1 = list[j].c2 = 1;
+}
+
+/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$257, %\[a-z0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-not "__x86.get_pc_thunk" { target ia32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr125100-4.c b/gcc/testsuite/gcc.target/i386/pr125100-4.c
new file mode 100644
index 00000000000..a36f24b8323
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr125100-4.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target { { ! ia32 } && fpic } } } */
+/* { dg-options "-O2" } */
+/* { dg-add-options check_function_bodies } */
+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
+/* { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {^\t?\.} } } */
+
+/*
+**foo:
+**.LFB[0-9]+:
+**	.cfi_startproc
+**	movabsq	\$4758053007424749568, %rax
+**	movq	%rax, \(%rdi\)
+**	ret
+**	.cfi_endproc
+**...
+*/
+
+typedef float v2sf __attribute__((vector_size(8)));
+
+void
+foo (v2sf *c)
+{
+  *c =  __extension__(v2sf){34, 34};
+}
-- 
2.54.0

Reply via email to