In some benchmark, I notice stv failed due to cost unprofitable, but the igain
is inside the loop, but sse<->integer conversion is outside the loop, current
cost
model doesn't consider the frequency of those gain/cost.
The patch weights those cost with frequency just like LRA does.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for GCC16?
gcc/ChangeLog:
* config/i386/i386-features.cc (scalar_chain::mark_dual_mode_def):
(general_scalar_chain::compute_convert_gain):
---
gcc/config/i386/i386-features.cc | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index c35ac24fd8a..ae0844a70c2 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -337,18 +337,20 @@ scalar_chain::mark_dual_mode_def (df_ref def)
/* Record the def/insn pair so we can later efficiently iterate over
the defs to convert on insns not in the chain. */
bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
+ unsigned frequency
+ = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (DF_REF_INSN (def)));
if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def)))
{
if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def))
&& !reg_new)
return;
- n_integer_to_sse++;
+ n_integer_to_sse += frequency;
}
else
{
if (!reg_new)
return;
- n_sse_to_integer++;
+ n_sse_to_integer += frequency;
}
if (dump_file)
@@ -556,6 +558,8 @@ general_scalar_chain::compute_convert_gain ()
rtx src = SET_SRC (def_set);
rtx dst = SET_DEST (def_set);
int igain = 0;
+ unsigned frequency
+ = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn));
if (REG_P (src) && REG_P (dst))
igain += 2 * m - ix86_cost->xmm_move;
@@ -755,6 +759,7 @@ general_scalar_chain::compute_convert_gain ()
}
}
+ igain *= frequency;
if (igain != 0 && dump_file)
{
fprintf (dump_file, " Instruction gain %d for ", igain);
--
2.34.1