On Sat, May 9, 2026 at 9:10 AM Liu, Hongtao <[email protected]> wrote: > > > > > -----Original Message----- > > From: H.J. Lu <[email protected]> > > Sent: Saturday, May 9, 2026 7:57 AM > > To: GCC Patches <[email protected]>; Uros Bizjak > > <[email protected]>; Liu, Hongtao <[email protected]> > > Subject: [PATCH] x86_cse: Check CONST0_RTX and CONSTM1_RTX for > > X86_CSE_VEC_DUP > > > > Check CONST0_RTX and CONSTM1_RTX when placing > > > > (insn 32 2 7 2 (set (reg:V2DI 114) > > (const_vector:V2DI [ > > (const_int 0 [0]) repeated x2 > > ])) -1 > > (nil)) > > > > after > > > > (note 2 3 32 2 NOTE_INSN_FUNCTION_BEG) > > > > for X86_CSE_VEC_DUP, not X86_CSE_CONST0_VECTOR or > > X86_CSE_CONSTM1_VECTOR, after replacing redundant vector loads: > > > > (insn 31 15 16 2 (set (reg/v/f:DI 99 [ d ]) > > (const_int 0 [0])) "x.c":5:16 -1 > > (nil)) > > ... > > (insn 18 17 19 2 (set (reg:V2DI 111 [ _22 ]) > > (vec_duplicate:V2DI (reg/v/f:DI 99 [ d ]))) "x.c":5:16 9345 > > {*vec_dupv2di} > > (nil)) > > > > ... > > (insn 29 12 15 2 (set (reg/v/f:DI 98 [ c ]) > > (const_int 0 [0])) "x.c":5:16 -1 > > (nil)) > > ... > > (insn 20 19 21 2 (set (reg:V2DI 112 [ _20 ]) > > (vec_duplicate:V2DI (reg/v/f:DI 98 [ c ]))) "x.c":5:16 9345 > > {*vec_dupv2di} > > (nil)) > > > > with > > > > (insn 18 17 19 2 (set (reg:V2DI 111 [ _22 ]) > > (reg:V2DI 114)) "x.c":5:16 2454 {movv2di_internal} > > (nil)) > > > > and > > > > (insn 20 19 21 2 (set (reg:V2DI 112 [ _20 ]) > > (reg:V2DI 114)) "x.c":5:16 2454 {movv2di_internal} > > (nil)) > > > > gcc/ > > > > PR target/125239 > > * config/i386/i386-features.cc (ix86_place_single_vector_set): > > Check CONST0_RTX and CONSTM1_RTX for X86_CSE_VEC_DUP. > > Can we detect it in ix86_broadcast_inner, set *kind_p to > X86_CSE_CONST0_VECTOR, instead of handle it in ix86_place_single_vector_set.
Done. I am testing this patch. > Also, I wonder why pass_combine(or fwprop) doesn't catch this miss > optimization. Set with CONST0_VECTOR should be cheaper than with > vec_duplicate. Because of -fno-tree-dse -fno-tree-dce? > > > > gcc/testsuite/ > > > > PR target/125239 > > * gcc.target/i386/pr125239.c: New test. > > > > > > -- > > H.J. -- H.J.
From f09aad4fdf061d9a5de8d6bceb7781686f5a2c28 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <[email protected]> Date: Sat, 9 May 2026 05:58:09 +0800 Subject: [PATCH v2] x86_cse: Check CONST0_RTX and CONSTM1_RTX Check CONST0_RTX against X86_CSE_CONST0_VECTOR and CONSTM1_RTX against X86_CSE_CONSTM1_VECTOR when placing (insn 32 2 7 2 (set (reg:V2DI 114) (const_vector:V2DI [ (const_int 0 [0]) repeated x2 ])) -1 (nil)) after (note 2 3 32 2 NOTE_INSN_FUNCTION_BEG) for X86_CSE_VEC_DUP, not X86_CSE_CONST0_VECTOR or X86_CSE_CONSTM1_VECTOR, after replacing redundant vector loads: (insn 31 15 16 2 (set (reg/v/f:DI 99 [ d ]) (const_int 0 [0])) "x.c":5:16 -1 (nil)) ... (insn 18 17 19 2 (set (reg:V2DI 111 [ _22 ]) (vec_duplicate:V2DI (reg/v/f:DI 99 [ d ]))) "x.c":5:16 9345 {*vec_dupv2di} (nil)) ... (insn 29 12 15 2 (set (reg/v/f:DI 98 [ c ]) (const_int 0 [0])) "x.c":5:16 -1 (nil)) ... (insn 20 19 21 2 (set (reg:V2DI 112 [ _20 ]) (vec_duplicate:V2DI (reg/v/f:DI 98 [ c ]))) "x.c":5:16 9345 {*vec_dupv2di} (nil)) with (insn 18 17 19 2 (set (reg:V2DI 111 [ _22 ]) (reg:V2DI 114)) "x.c":5:16 2454 {movv2di_internal} (nil)) and (insn 20 19 21 2 (set (reg:V2DI 112 [ _20 ]) (reg:V2DI 114)) "x.c":5:16 2454 {movv2di_internal} (nil)) gcc/ PR target/125239 * config/i386/i386-features.cc (ix86_place_single_vector_set): Check CONST0_RTX against X86_CSE_CONST0_VECTOR and CONSTM1_RTX against X86_CSE_CONSTM1_VECTOR. (ix86_broadcast_inner): Set x86_cse kind to X86_CSE_CONST0_VECTOR for CONST0_RTX and X86_CSE_CONSTM1_VECTOR for CONSTM1_RTX. gcc/testsuite/ PR target/125239 * gcc.target/i386/pr125239.c: New test. Signed-off-by: H.J. Lu <[email protected]> --- gcc/config/i386/i386-features.cc | 42 +++++++++++++++++++++--- gcc/testsuite/gcc.target/i386/pr125239.c | 10 ++++++ 2 files changed, 47 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr125239.c diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index bf6cbe094e1..8874590b952 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -3290,6 +3290,21 @@ ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs, if (CONST_INT_P (src)) dest = gen_rtx_SUBREG (load->dest_mode, dest, 0); + else if (CONST_VECTOR_P (src)) + { + /* The only possible CONST_VECTORs of SRC are CONST0_RTX and + CONSTM1_RTX. Otherwise, + + rtx set = gen_rtx_SET (dest, src); + + won't be a valid instruction. */ + machine_mode mode = GET_MODE (dest); + if ((src == CONST0_RTX (mode) + && load->kind != X86_CSE_CONST0_VECTOR) + || (src == CONSTM1_RTX (mode) + && load->kind != X86_CSE_CONSTM1_VECTOR)) + gcc_unreachable (); + } rtx set = gen_rtx_SET (dest, src); rtx_insn *insn = BB_HEAD (bb); @@ -3904,6 +3919,7 @@ ix86_broadcast_inner (rtx op, machine_mode mode, return nullptr; } + machine_mode orig_mode = mode; mode = GET_MODE (op); /* Only single def chain is supported. */ @@ -3939,13 +3955,29 @@ ix86_broadcast_inner (rtx op, machine_mode mode, Set *INSN_P to nullptr and return SET_SRC if SET_SRC is an integer constant. */ op = src; - if (SCALAR_INT_MODE_P (mode)) + if (SCALAR_INT_MODE_P (mode) && mode != GET_MODE (reg)) + op = gen_int_mode (INTVAL (src), mode); + if (op == const0_rtx) + { + if (standard_sse_constant_p (CONST0_RTX (orig_mode), + orig_mode) == 1) + { + *scalar_mode_p = QImode; + *kind_p = X86_CSE_CONST0_VECTOR; + *insn_p = nullptr; + return const0_rtx; + } + op = CONST0_RTX (mode); + } + else if (op == constm1_rtx + && standard_sse_constant_p (CONSTM1_RTX (orig_mode), + orig_mode) == 2) { - if (mode != GET_MODE (reg)) - op = gen_int_mode (INTVAL (src), mode); + *scalar_mode_p = QImode; + *kind_p = X86_CSE_CONSTM1_VECTOR; + *insn_p = nullptr; + return constm1_rtx; } - else if (op == const0_rtx) - op = CONST0_RTX (mode); *insn_p = nullptr; } else diff --git a/gcc/testsuite/gcc.target/i386/pr125239.c b/gcc/testsuite/gcc.target/i386/pr125239.c new file mode 100644 index 00000000000..7ebf45ed661 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr125239.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64 -fno-tree-dse -fno-tree-dce" } */ + +extern void a (void); +void +b (void) +{ + int *c, *d, *e[2][20] = {{c}, {c, d, d, d, c, c, d, c}}; + a (); +} -- 2.54.0
