On Sat, May 9, 2026 at 9:10 AM Liu, Hongtao <[email protected]> wrote:
>
>
>
> > -----Original Message-----
> > From: H.J. Lu <[email protected]>
> > Sent: Saturday, May 9, 2026 7:57 AM
> > To: GCC Patches <[email protected]>; Uros Bizjak
> > <[email protected]>; Liu, Hongtao <[email protected]>
> > Subject: [PATCH] x86_cse: Check CONST0_RTX and CONSTM1_RTX for
> > X86_CSE_VEC_DUP
> >
> > Check CONST0_RTX and CONSTM1_RTX when placing
> >
> > (insn 32 2 7 2 (set (reg:V2DI 114)
> >         (const_vector:V2DI [
> >                 (const_int 0 [0]) repeated x2
> >             ])) -1
> >      (nil))
> >
> > after
> >
> > (note 2 3 32 2 NOTE_INSN_FUNCTION_BEG)
> >
> > for X86_CSE_VEC_DUP, not X86_CSE_CONST0_VECTOR or
> > X86_CSE_CONSTM1_VECTOR, after replacing redundant vector loads:
> >
> > (insn 31 15 16 2 (set (reg/v/f:DI 99 [ d ])
> >         (const_int 0 [0])) "x.c":5:16 -1
> >      (nil))
> > ...
> > (insn 18 17 19 2 (set (reg:V2DI 111 [ _22 ])
> >         (vec_duplicate:V2DI (reg/v/f:DI 99 [ d ]))) "x.c":5:16 9345 
> > {*vec_dupv2di}
> >      (nil))
> >
> > ...
> > (insn 29 12 15 2 (set (reg/v/f:DI 98 [ c ])
> >         (const_int 0 [0])) "x.c":5:16 -1
> >      (nil))
> > ...
> > (insn 20 19 21 2 (set (reg:V2DI 112 [ _20 ])
> >         (vec_duplicate:V2DI (reg/v/f:DI 98 [ c ]))) "x.c":5:16 9345 
> > {*vec_dupv2di}
> >      (nil))
> >
> > with
> >
> > (insn 18 17 19 2 (set (reg:V2DI 111 [ _22 ])
> >         (reg:V2DI 114)) "x.c":5:16 2454 {movv2di_internal}
> >      (nil))
> >
> > and
> >
> > (insn 20 19 21 2 (set (reg:V2DI 112 [ _20 ])
> >         (reg:V2DI 114)) "x.c":5:16 2454 {movv2di_internal}
> >      (nil))
> >
> > gcc/
> >
> > PR target/125239
> > * config/i386/i386-features.cc (ix86_place_single_vector_set):
> > Check CONST0_RTX and CONSTM1_RTX for X86_CSE_VEC_DUP.
>
> Can we detect it in ix86_broadcast_inner, set *kind_p to 
> X86_CSE_CONST0_VECTOR, instead of handle it in ix86_place_single_vector_set.

Done.  I am testing this patch.

> Also, I wonder why pass_combine(or fwprop) doesn't catch this miss 
> optimization. Set with CONST0_VECTOR should be cheaper than with 
> vec_duplicate.

Because of -fno-tree-dse -fno-tree-dce?

> >
> > gcc/testsuite/
> >
> > PR target/125239
> > * gcc.target/i386/pr125239.c: New test.
> >
> >
> > --
> > H.J.



-- 
H.J.
From f09aad4fdf061d9a5de8d6bceb7781686f5a2c28 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <[email protected]>
Date: Sat, 9 May 2026 05:58:09 +0800
Subject: [PATCH v2] x86_cse: Check CONST0_RTX and CONSTM1_RTX

Check CONST0_RTX against X86_CSE_CONST0_VECTOR and CONSTM1_RTX against
X86_CSE_CONSTM1_VECTOR when placing

(insn 32 2 7 2 (set (reg:V2DI 114)
        (const_vector:V2DI [
                (const_int 0 [0]) repeated x2
            ])) -1
     (nil))

after

(note 2 3 32 2 NOTE_INSN_FUNCTION_BEG)

for X86_CSE_VEC_DUP, not X86_CSE_CONST0_VECTOR or X86_CSE_CONSTM1_VECTOR,
after replacing redundant vector loads:

(insn 31 15 16 2 (set (reg/v/f:DI 99 [ d ])
        (const_int 0 [0])) "x.c":5:16 -1
     (nil))
...
(insn 18 17 19 2 (set (reg:V2DI 111 [ _22 ])
        (vec_duplicate:V2DI (reg/v/f:DI 99 [ d ]))) "x.c":5:16 9345 {*vec_dupv2di}
     (nil))

...
(insn 29 12 15 2 (set (reg/v/f:DI 98 [ c ])
        (const_int 0 [0])) "x.c":5:16 -1
     (nil))
...
(insn 20 19 21 2 (set (reg:V2DI 112 [ _20 ])
        (vec_duplicate:V2DI (reg/v/f:DI 98 [ c ]))) "x.c":5:16 9345 {*vec_dupv2di}
     (nil))

with

(insn 18 17 19 2 (set (reg:V2DI 111 [ _22 ])
        (reg:V2DI 114)) "x.c":5:16 2454 {movv2di_internal}
     (nil))

and

(insn 20 19 21 2 (set (reg:V2DI 112 [ _20 ])
        (reg:V2DI 114)) "x.c":5:16 2454 {movv2di_internal}
     (nil))

gcc/

	PR target/125239
	* config/i386/i386-features.cc (ix86_place_single_vector_set):
	Check CONST0_RTX against X86_CSE_CONST0_VECTOR and CONSTM1_RTX
	against X86_CSE_CONSTM1_VECTOR.
	(ix86_broadcast_inner): Set x86_cse kind to X86_CSE_CONST0_VECTOR
	for CONST0_RTX and X86_CSE_CONSTM1_VECTOR for CONSTM1_RTX.

gcc/testsuite/

	PR target/125239
	* gcc.target/i386/pr125239.c: New test.

Signed-off-by: H.J. Lu <[email protected]>
---
 gcc/config/i386/i386-features.cc         | 42 +++++++++++++++++++++---
 gcc/testsuite/gcc.target/i386/pr125239.c | 10 ++++++
 2 files changed, 47 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr125239.c

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index bf6cbe094e1..8874590b952 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -3290,6 +3290,21 @@ ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs,
 
   if (CONST_INT_P (src))
     dest = gen_rtx_SUBREG (load->dest_mode, dest, 0);
+  else if (CONST_VECTOR_P (src))
+    {
+      /* The only possible CONST_VECTORs of SRC are CONST0_RTX and
+	 CONSTM1_RTX.  Otherwise,
+
+	 rtx set = gen_rtx_SET (dest, src);
+
+	 won't be a valid instruction.  */
+      machine_mode mode = GET_MODE (dest);
+      if ((src == CONST0_RTX (mode)
+	   && load->kind != X86_CSE_CONST0_VECTOR)
+	  || (src == CONSTM1_RTX (mode)
+	      && load->kind != X86_CSE_CONSTM1_VECTOR))
+	gcc_unreachable ();
+    }
   rtx set = gen_rtx_SET (dest, src);
 
   rtx_insn *insn = BB_HEAD (bb);
@@ -3904,6 +3919,7 @@ ix86_broadcast_inner (rtx op, machine_mode mode,
       return nullptr;
     }
 
+  machine_mode orig_mode = mode;
   mode = GET_MODE (op);
 
   /* Only single def chain is supported.  */
@@ -3939,13 +3955,29 @@ ix86_broadcast_inner (rtx op, machine_mode mode,
 	 Set *INSN_P to nullptr and return SET_SRC if SET_SRC is an
 	 integer constant.  */
       op = src;
-      if (SCALAR_INT_MODE_P (mode))
+      if (SCALAR_INT_MODE_P (mode) && mode != GET_MODE (reg))
+	op = gen_int_mode (INTVAL (src), mode);
+      if (op == const0_rtx)
+	{
+	   if (standard_sse_constant_p (CONST0_RTX (orig_mode),
+					orig_mode) == 1)
+	     {
+	       *scalar_mode_p = QImode;
+	       *kind_p = X86_CSE_CONST0_VECTOR;
+	       *insn_p = nullptr;
+	       return const0_rtx;
+	     }
+	   op = CONST0_RTX (mode);
+	}
+      else if (op == constm1_rtx
+	       && standard_sse_constant_p (CONSTM1_RTX (orig_mode),
+					   orig_mode) == 2)
 	{
-	  if (mode != GET_MODE (reg))
-	    op = gen_int_mode (INTVAL (src), mode);
+	  *scalar_mode_p = QImode;
+	  *kind_p = X86_CSE_CONSTM1_VECTOR;
+	  *insn_p = nullptr;
+	  return constm1_rtx;
 	}
-      else if (op == const0_rtx)
-	op = CONST0_RTX (mode);
       *insn_p = nullptr;
     }
   else
diff --git a/gcc/testsuite/gcc.target/i386/pr125239.c b/gcc/testsuite/gcc.target/i386/pr125239.c
new file mode 100644
index 00000000000..7ebf45ed661
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr125239.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -fno-tree-dse -fno-tree-dce" } */
+
+extern void a (void);
+void
+b (void)
+{
+  int *c, *d, *e[2][20] = {{c}, {c, d, d, d, c, c, d, c}};
+  a ();
+}
-- 
2.54.0

Reply via email to