In some benchmark, I notice stv failed due to cost unprofitable, but the igain
is inside the loop, but sse<->integer conversion is outside the loop, current 
cost
model doesn't consider the frequency of those gain/cost.
The patch weights those cost with frequency just like LRA does.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for GCC16?

gcc/ChangeLog:

        * config/i386/i386-features.cc (scalar_chain::mark_dual_mode_def):
        (general_scalar_chain::compute_convert_gain):
---
 gcc/config/i386/i386-features.cc | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index c35ac24fd8a..ae0844a70c2 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -337,18 +337,20 @@ scalar_chain::mark_dual_mode_def (df_ref def)
   /* Record the def/insn pair so we can later efficiently iterate over
      the defs to convert on insns not in the chain.  */
   bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
+  unsigned frequency
+    = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (DF_REF_INSN (def)));
   if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def)))
     {
       if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def))
          && !reg_new)
        return;
-      n_integer_to_sse++;
+      n_integer_to_sse += frequency;
     }
   else
     {
       if (!reg_new)
        return;
-      n_sse_to_integer++;
+      n_sse_to_integer += frequency;
     }
 
   if (dump_file)
@@ -556,6 +558,8 @@ general_scalar_chain::compute_convert_gain ()
       rtx src = SET_SRC (def_set);
       rtx dst = SET_DEST (def_set);
       int igain = 0;
+      unsigned frequency
+       = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn));
 
       if (REG_P (src) && REG_P (dst))
        igain += 2 * m - ix86_cost->xmm_move;
@@ -755,6 +759,7 @@ general_scalar_chain::compute_convert_gain ()
            }
        }
 
+      igain *= frequency;
       if (igain != 0 && dump_file)
        {
          fprintf (dump_file, "  Instruction gain %d for ", igain);
-- 
2.34.1

Reply via email to