https://gcc.gnu.org/g:1670d0ad2dd1fba510eef6078f3f7fd615fd23a1

commit r16-6964-g1670d0ad2dd1fba510eef6078f3f7fd615fd23a1
Author: liuhongt <[email protected]>
Date:   Mon Jan 19 00:02:21 2026 -0800

    Add u-arch tune prefer_bcst_from_integer.
    
    /* X86_TUNE_PREFER_BCST_FROM_INTEGER: Enable broadcast from integer for
       128/256/512-bit vector, if disabled, the move will be done by
       broadcast/load from constant pool
    
       broadcast from integer:
          mov    $0xa,%eax
          vmovd  %eax,%xmm0
          vpbroadcastd %xmm0,%xmm0
    
       broadcast/load from constant pool:
          vpbroadcastd CST.0(%rip), %xmm0  */
    
    The tune is on by default.
    
    gcc/ChangeLog:
    
            PR target/123631
            * config/i386/i386-expand.cc (ix86_vector_duplicate_value):
            Don't force CONST_INT to reg !TARGET_PREFER_BCST_FROM_INTEGER,
            force it to mem instead.
            * config/i386/i386.h (TARGET_PREFER_BCST_FROM_INTEGER): New macro.
            * config/i386/x86-tune.def
            (X86_TUNE_PREFER_BCST_FROM_INTEGER): New tune.

Diff:
---
 gcc/config/i386/i386-expand.cc | 17 +++++++++++++----
 gcc/config/i386/i386.h         |  3 +++
 gcc/config/i386/x86-tune.def   | 15 +++++++++++++++
 3 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index d6525ddcdd00..a82bb4399c9b 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -17361,12 +17361,21 @@ ix86_vector_duplicate_value (machine_mode mode, rtx 
target, rtx val)
       machine_mode innermode = GET_MODE_INNER (mode);
       rtx reg;
 
-      /* If that fails, force VAL into a register.  */
+      /* If that fails, force VAL into a register or mem.  */
 
       start_sequence ();
-      reg = force_reg (innermode, val);
-      if (GET_MODE (reg) != innermode)
-       reg = gen_lowpart (innermode, reg);
+
+      if (!TARGET_PREFER_BCST_FROM_INTEGER && CONST_INT_P (val)
+         && GET_MODE_BITSIZE (innermode) <= HOST_BITS_PER_WIDE_INT
+         && GET_MODE_BITSIZE(mode) >= 128)
+       reg = validize_mem (force_const_mem (innermode, val));
+      else
+       {
+         reg = force_reg (innermode, val);
+         if (GET_MODE (reg) != innermode)
+           reg = gen_lowpart (innermode, reg);
+       }
+
       SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg);
       seq = end_sequence ();
       if (seq)
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 71bacc220524..888edfed88f0 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -409,6 +409,9 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
        ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES_FROM_VEC]
 #define TARGET_INTER_UNIT_CONVERSIONS \
        ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS]
+#define TARGET_PREFER_BCST_FROM_INTEGER \
+  ix86_tune_features[X86_TUNE_PREFER_BCST_FROM_INTEGER]
+
 #define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT]
 #define TARGET_SCHEDULE                ix86_tune_features[X86_TUNE_SCHEDULE]
 #define TARGET_USE_BT          ix86_tune_features[X86_TUNE_USE_BT]
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index a1944620daff..53cf1a194330 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -488,6 +488,21 @@ DEF_TUNE (X86_TUNE_INTER_UNIT_MOVES_FROM_VEC, 
"inter_unit_moves_from_vec",
 DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions",
           ~(m_AMDFAM10 | m_BDVER))
 
+/* X86_TUNE_PREFER_BCST_FROM_INTEGER: Enable broadcast from integer for
+   128/256/512-bit vector, if disabled, the move will be done by
+   broadcast/load from constant pool
+
+   broadcast from integer:
+      mov    $0xa,%eax
+      vmovd  %eax,%xmm0
+      vpbroadcastd %xmm0,%xmm0
+
+   broadcast/load from constant pool:
+      vpbroadcastd CST.0(%rip), %xmm0  */
+
+DEF_TUNE (X86_TUNE_PREFER_BCST_FROM_INTEGER, "prefer_bcst_from_integer",
+          m_ALL)
+
 /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
    fp converts to destination register.  */
 DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, 
"split_mem_opnd_for_fp_converts",

Reply via email to