https://gcc.gnu.org/g:1670d0ad2dd1fba510eef6078f3f7fd615fd23a1
commit r16-6964-g1670d0ad2dd1fba510eef6078f3f7fd615fd23a1 Author: liuhongt <[email protected]> Date: Mon Jan 19 00:02:21 2026 -0800 Add u-arch tune prefer_bcst_from_integer. /* X86_TUNE_PREFER_BCST_FROM_INTEGER: Enable broadcast from integer for 128/256/512-bit vector, if disabled, the move will be done by broadcast/load from constant pool broadcast from integer: mov $0xa,%eax vmovd %eax,%xmm0 vpbroadcastd %xmm0,%xmm0 broadcast/load from constant pool: vpbroadcastd CST.0(%rip), %xmm0 */ The tune is on by default. gcc/ChangeLog: PR target/123631 * config/i386/i386-expand.cc (ix86_vector_duplicate_value): Don't force CONST_INT to reg !TARGET_PREFER_BCST_FROM_INTEGER, force it to mem instead. * config/i386/i386.h (TARGET_PREFER_BCST_FROM_INTEGER): New macro. * config/i386/x86-tune.def (X86_TUNE_PREFER_BCST_FROM_INTEGER): New tune. Diff: --- gcc/config/i386/i386-expand.cc | 17 +++++++++++++---- gcc/config/i386/i386.h | 3 +++ gcc/config/i386/x86-tune.def | 15 +++++++++++++++ 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index d6525ddcdd00..a82bb4399c9b 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -17361,12 +17361,21 @@ ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val) machine_mode innermode = GET_MODE_INNER (mode); rtx reg; - /* If that fails, force VAL into a register. */ + /* If that fails, force VAL into a register or mem. */ start_sequence (); - reg = force_reg (innermode, val); - if (GET_MODE (reg) != innermode) - reg = gen_lowpart (innermode, reg); + + if (!TARGET_PREFER_BCST_FROM_INTEGER && CONST_INT_P (val) + && GET_MODE_BITSIZE (innermode) <= HOST_BITS_PER_WIDE_INT + && GET_MODE_BITSIZE(mode) >= 128) + reg = validize_mem (force_const_mem (innermode, val)); + else + { + reg = force_reg (innermode, val); + if (GET_MODE (reg) != innermode) + reg = gen_lowpart (innermode, reg); + } + SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg); seq = end_sequence (); if (seq) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 71bacc220524..888edfed88f0 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -409,6 +409,9 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES_FROM_VEC] #define TARGET_INTER_UNIT_CONVERSIONS \ ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS] +#define TARGET_PREFER_BCST_FROM_INTEGER \ + ix86_tune_features[X86_TUNE_PREFER_BCST_FROM_INTEGER] + #define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT] #define TARGET_SCHEDULE ix86_tune_features[X86_TUNE_SCHEDULE] #define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT] diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index a1944620daff..53cf1a194330 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -488,6 +488,21 @@ DEF_TUNE (X86_TUNE_INTER_UNIT_MOVES_FROM_VEC, "inter_unit_moves_from_vec", DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions", ~(m_AMDFAM10 | m_BDVER)) +/* X86_TUNE_PREFER_BCST_FROM_INTEGER: Enable broadcast from integer for + 128/256/512-bit vector, if disabled, the move will be done by + broadcast/load from constant pool + + broadcast from integer: + mov $0xa,%eax + vmovd %eax,%xmm0 + vpbroadcastd %xmm0,%xmm0 + + broadcast/load from constant pool: + vpbroadcastd CST.0(%rip), %xmm0 */ + +DEF_TUNE (X86_TUNE_PREFER_BCST_FROM_INTEGER, "prefer_bcst_from_integer", + m_ALL) + /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for fp converts to destination register. */ DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, "split_mem_opnd_for_fp_converts",
