[gcc r16-4322] xtensa: Optimize assignment of certain constants to hardware FP registers

Max Filippov via Gcc-cvs Thu, 09 Oct 2025 02:30:11 -0700

https://gcc.gnu.org/g:4864f24c86e1bfd8c34c74aed5ec73dcd98151b1


commit r16-4322-g4864f24c86e1bfd8c34c74aed5ec73dcd98151b1
Author: Takayuki 'January June' Suwa <[email protected]>
Date:   Fri Sep 19 21:25:10 2025 +0900

    xtensa: Optimize assignment of certain constants to hardware FP registers
    
    This patch introduces an optimization that replaces assignments of signed
    12-bit integer values divided by 0th through 15th power of two to hardware
    FP registers with assignments of that integer values to address (GP)
    registers followed by negatively-scaled floating-point conversion
    instructions.
    
    For example, 0.12005615234375f is exactly equal to (1967.f / (1 << 14)), so
    we can emit such as:
    
            movi    a9, 1967
            float.s f0, a9, 14
    
    if such conversion reduces costs.
    
    gcc/ChangeLog:
    
            * config/xtensa/xtensa.cc (xt_full_rtx_costs):
            New struct, derived from full_rtx_costs.
            (FPreg_neg_scaled_simm12b_1, FPreg_neg_scaled_simm12b):
            New worker functions.
            (do_largeconst): Add a call to FPreg_neg_scaled_simm12b() to the
            insn enumeration loop.

Diff:
---
 gcc/config/xtensa/xtensa.cc | 170 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 170 insertions(+)

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 4b44d35054b7..c878f1183c6b 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -5730,6 +5730,168 @@ xtensa_md_asm_adjust (vec<rtx> &outputs 
ATTRIBUTE_UNUSED,
 namespace
 {
 
+/* Cheap full_rtx_costs derivative for concise handling of insn sequence
+   costs.  */
+
+struct xt_full_rtx_costs : public full_rtx_costs
+{
+  inline xt_full_rtx_costs ()
+  {
+    init_costs_to_zero (this);
+  }
+
+  /* "Less-than" cost comparison.  */
+  inline bool operator< (xt_full_rtx_costs &rhs)
+  {
+    return costs_lt_p (this, &rhs, !optimize_size);
+  }
+
+  /* Accumulate the costs of a specified insn.  */
+  xt_full_rtx_costs &operator+= (rtx_insn *insn)
+  {
+    speed += xtensa_insn_cost (insn, true);
+    size += xtensa_insn_cost (insn, false);
+    return *this;
+  }
+
+  /* Create a new instance from the specified insn sequence.  */
+  explicit xt_full_rtx_costs (rtx_insn *seq)
+    : xt_full_rtx_costs ()
+  {
+    for (; seq; seq = NEXT_INSN (seq))
+      *this += seq;
+  }
+
+  /* superior/inferior parts of the costs.  */
+  inline int major ()
+  {
+    return optimize_size ? size : speed;
+  }
+  inline int minor ()
+  {
+    return optimize_size ? speed : size;
+  }
+};
+
+/* Optimize assignment of negatively-scaled (up to the minus 15th power
+   of two) signed 12-bit integer immediate values to hardware floating-
+   point registers.  For example, 0.12005615234375f is exactly equal to
+   (1967.f / (1 << 14)), so we can emit such as:
+       movi    a9, 1967
+       float.s f0, a9, 14
+   if such conversion reduces costs.  */
+
+static bool
+FPreg_neg_scaled_simm12b_1 (const REAL_VALUE_TYPE *rval,
+                           HOST_WIDE_INT &v, int &scale)
+{
+  REAL_VALUE_TYPE r;
+  int shift;
+
+  /* Non-zero finite values can only be accepted.  */
+  if (! real_isfinite (rval) || rval->cl == rvc_zero)
+    return false;
+
+  /* Check whether the value multiplied by 32768 is an exact integer and
+     the result after truncating the trailing '0' bits fits into a signed
+     12-bit.  */
+  real_ldexp (&r, rval, 15);
+  if (! real_isinteger (&r, &v)
+      || ! xtensa_simm12b (v >>= (shift = MIN (ctz_hwi (v), 15))))
+    return false;
+
+  scale = shift - 15;
+  return true;
+}
+
+static bool
+FPreg_neg_scaled_simm12b (rtx_insn *insn)
+{
+  rtx pat, dest, src, pat_1, dest_1, note, dest_2, pat_2;
+  HOST_WIDE_INT v;
+  int scale;
+  rtx_insn *next, *last, *seq;
+  REAL_VALUE_TYPE r;
+
+  /* It matches RTL expressions of the following format:
+       (set (reg:SF gpr) (const_double:SF cst))
+       (set (reg:SF fpr) (reg:SF gpr))
+               REG_DEAD (reg:SF gpr)
+     where cst is a negatively-scaled signed 12-bit integer immediate
+     value.  */
+  if (TARGET_HARD_FLOAT && !TARGET_CONST16
+      && GET_CODE (pat = PATTERN (insn)) == SET
+      && REG_P (dest = SET_DEST (pat)) && GP_REG_P (REGNO (dest))
+      && GET_MODE (dest) == SFmode
+      && CONST_DOUBLE_P (src = avoid_constant_pool_reference (SET_SRC (pat)))
+      && GET_MODE (src) == SFmode
+      && FPreg_neg_scaled_simm12b_1 (CONST_DOUBLE_REAL_VALUE (src),
+                                    v, scale)
+      && (next = next_nonnote_nondebug_insn (insn))
+      && NONJUMP_INSN_P (next)
+      && GET_CODE (pat_1 = PATTERN (next)) == SET
+      && REG_P (dest_1 = SET_DEST (pat_1)) && FP_REG_P (REGNO (dest_1))
+      && GET_MODE (dest_1) == SFmode
+      && rtx_equal_p (SET_SRC (pat_1), dest)
+      && (note = find_reg_note (next, REG_DEAD, dest)))
+    {
+      /* Estimate the costs of two matching insns.  */
+      xt_full_rtx_costs costs;
+      costs += insn, costs += next;
+
+      /* Prepare alternative insns and estimate their costs.  */
+      start_sequence ();
+      emit_insn (gen_rtx_SET (dest_2 = gen_rtx_REG (SImode, REGNO (dest)),
+                             GEN_INT (v)));
+      pat_2 = gen_rtx_FLOAT (SFmode, dest_2);
+      if (scale < 0)
+       {
+         real_ldexp (&r, &dconst1, scale);
+         pat_2 = gen_rtx_MULT (SFmode, pat_2,
+                               const_double_from_real_value (r, SFmode));
+       }
+      last = emit_insn (gen_rtx_SET (dest_1, pat_2));
+      xt_full_rtx_costs costs_1 (seq = end_sequence ());
+
+      /* If the alternative is more cost effective, it replaces the original
+        insns.  */
+      if (costs_1 < costs)
+       {
+         if (dump_file)
+           {
+             fputs ("FPreg_neg_scaled_simm12b: ", dump_file);
+             dump_value_slim (dump_file, src, 0);
+             fprintf (dump_file,
+                      "f = (" HOST_WIDE_INT_PRINT_DEC ".f/(1<<%d))\n",
+                      v, -scale);
+             dump_insn_slim (dump_file, insn);
+             dump_insn_slim (dump_file, next);
+           }
+         remove_reg_equal_equiv_notes (insn);
+         validate_change (insn, &PATTERN (insn),
+                          PATTERN (seq), 0);
+         remove_reg_equal_equiv_notes (next);
+         remove_note (next, note);
+         validate_change (next, &PATTERN (next),
+                          PATTERN (last), 0);
+         add_reg_note (next, REG_EQUIV, src);
+         add_reg_note (next, REG_DEAD, dest_2);
+         if (dump_file)
+           {
+             fprintf (dump_file,
+                      "FPreg_neg_scaled_simm12b: costs (%d,%d) -> (%d,%d)\n",
+                      costs.major (), costs.minor (),
+                      costs_1.major (), costs_1.minor ());
+             dump_insn_slim (dump_file, insn);
+             dump_insn_slim (dump_file, next);
+           }
+         return true;
+       }
+    }
+
+  return false;
+}
+
 /* Replace the source of [SH]Imode allocation whose value does not fit
    into signed 12 bits with a reference to litpool entry.  */
 
@@ -5791,11 +5953,19 @@ static void
 do_largeconst (void)
 {
   bool replacing_required = !TARGET_CONST16 && !TARGET_AUTO_LITPOOLS;
+  bool optimize_enabled = optimize && !optimize_debug;
   rtx_insn *insn;
 
   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
     if (NONJUMP_INSN_P (insn))
       {
+       /* Optimize assignment of negatively scaled (up to the minus
+          15th power of two) signed 12-bit immediate values to hardware
+          floating-point registers.  */
+       if (optimize_enabled
+           && FPreg_neg_scaled_simm12b (insn))
+         continue;
+
        /* Replace the source of [SH]Imode allocation whose value does not
           fit into signed 12 bits with a reference to litpool entry.  */
        if (replacing_required)

[gcc r16-4322] xtensa: Optimize assignment of certain constants to hardware FP registers

Reply via email to