And now with the patch attached.
On Fri, Aug 23, 2019 at 1:39 PM Uros Bizjak <ubiz...@gmail.com> wrote: > > This is currently a heads-up patch that removes the minimum limitation > of cost of moves to/from XMM reg. The immediate benefit is the removal > of mismatched spills, caused by subreg usage. > > *If* the patch proves to be beneficial (as in "doesn't regress > important benchmarks"), then we should be able to un-hide the > inter-regset moves from RA and allow it to collapse some moves. As an > example, patched compiler removes a movd in gcc.target/i386/minmax-6.c > and still avoids mismatched spill. > > 2019-08-23 Uroš Bizjak <ubiz...@gmail.com> > > * config/i386/i386.c (ix86_register_move_cost): Do not > limit the cost of moves to/from XMM register to minimum 8. > * config/i386/i386-features.c > (general_scalar_chain::make_vector_copies): Do not generate > zeroing move from GPR to XMM register, use gen_move_insn > instead of gen_gpr_to_xmm_move_src. > (general_scalar_chain::convert_op): Ditto. > (gen_gpr_to_xmm_move_src): Remove. > > The patch was bootstrapped and regression tested on x86_64-linux-gnu > {,-m32}, configured w/ and w/o -with-arch=ivybridge. > > The patch regresses PR80481 scan-asm-not (where the compiler generates > unrelated XMM spill on register starved x86_32). However, during the > analysis, I found that the original issue is not fixed, and is still > visible without -funrol-loops [1]. > > [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80481#c10 > > So, I'd wait for the HJ's benchmark results of the cost to/from XMM > change before proceeding with the patch. > > Uros.
diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c index fb7ac1b7d102..bfec13af5096 100644 --- a/gcc/config/i386/i386-features.c +++ b/gcc/config/i386/i386-features.c @@ -662,25 +662,6 @@ scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after) emit_insn_after (insns, BB_HEAD (new_bb)); } -/* Generate the canonical SET_SRC to move GPR to a VMODE vector register, - zeroing the upper parts. */ - -static rtx -gen_gpr_to_xmm_move_src (enum machine_mode vmode, rtx gpr) -{ - switch (GET_MODE_NUNITS (vmode)) - { - case 1: - return gen_rtx_SUBREG (vmode, gpr, 0); - case 2: - return gen_rtx_VEC_CONCAT (vmode, gpr, - CONST0_RTX (GET_MODE_INNER (vmode))); - default: - return gen_rtx_VEC_MERGE (vmode, gen_rtx_VEC_DUPLICATE (vmode, gpr), - CONST0_RTX (vmode), GEN_INT (HOST_WIDE_INT_1U)); - } -} - /* Make vector copies for all register REGNO definitions and replace its uses in a chain. */ @@ -707,8 +688,8 @@ general_scalar_chain::make_vector_copies (unsigned regno) } else emit_move_insn (copy_rtx (tmp), reg); - emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0), - gen_gpr_to_xmm_move_src (vmode, tmp))); + + emit_insn (gen_move_insn (vreg, tmp)); } else if (!TARGET_64BIT && smode == DImode) { @@ -738,8 +719,8 @@ general_scalar_chain::make_vector_copies (unsigned regno) } } else - emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0), - gen_gpr_to_xmm_move_src (vmode, reg))); + emit_insn (gen_move_insn (vreg, reg)); + rtx_insn *seq = get_insns (); end_sequence (); rtx_insn *insn = DF_REF_INSN (ref); @@ -910,9 +891,8 @@ general_scalar_chain::convert_op (rtx *op, rtx_insn *insn) { rtx tmp = gen_reg_rtx (GET_MODE (*op)); - emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (vmode, tmp, 0), - gen_gpr_to_xmm_move_src (vmode, *op)), - insn); + emit_insn_before (gen_move_insn (tmp, *op), insn); + *op = gen_rtx_SUBREG (vmode, tmp, 0); if (dump_file) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 49ab50ea41bf..11c75be113e0 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -18601,9 +18601,9 @@ ix86_register_move_cost (machine_mode mode, reg_class_t class1_i, where integer modes in SSE registers are not tieable because of missing QImode and HImode moves to, from or between MMX/SSE registers. */ - return MAX (8, SSE_CLASS_P (class1) - ? ix86_cost->hard_register.sse_to_integer - : ix86_cost->hard_register.integer_to_sse); + return (SSE_CLASS_P (class1) + ? ix86_cost->hard_register.sse_to_integer + : ix86_cost->hard_register.integer_to_sse); if (MAYBE_FLOAT_CLASS_P (class1)) return ix86_cost->hard_register.fp_move;