https://gcc.gnu.org/g:fc03165fb8829a78224882d053e7850132bfb9c9

commit r15-10180-gfc03165fb8829a78224882d053e7850132bfb9c9
Author: Georg-Johann Lay <a...@gjlay.de>
Date:   Wed Jul 30 12:34:56 2025 +0200

    AVR: rtl-optimization/121340 - New mini-pass to undo superfluous moves from 
insn combine.
    
    Insn combine may come up with superfluous reg-reg moves, where the combine
    people say that these are no problem since reg-alloc is supposed to optimize
    them.  The issue is that the lower-subreg pass sitting between combine and
    reg-alloc may split such moves, coming up with a zoo of subregs which are
    only handled poorly by the register allocator.
    
    This patch adds a new avr mini-pass that handles such cases.
    
    As an example, take
    
    int f_ffssi (long x)
    {
        return __builtin_ffsl (x);
    }
    
    where the two functions have the same interface, i.e. there are no extra
    moves required for the argument or for the return value. However,
    
    $ avr-gcc -S -Os -dp -mno-fuse-move ...
    
    f_ffssi:
            mov r20,r22      ;  29  [c=4 l=1]  movqi_insn/0
            mov r21,r23      ;  30  [c=4 l=1]  movqi_insn/0
            mov r22,r24      ;  31  [c=4 l=1]  movqi_insn/0
            mov r23,r25      ;  32  [c=4 l=1]  movqi_insn/0
            mov r25,r23      ;  33  [c=4 l=4]  *movsi/0
            mov r24,r22
            mov r23,r21
            mov r22,r20
            rcall __ffssi2   ;  34  [c=16 l=1]  *ffssihi2.libgcc
            ret              ;  37  [c=0 l=1]  return
    
    where all the moves add up to a no-op.  The -mno-fuse-move option
    stops any attempts by the avr backend to clean up that mess.
    
            PR rtl-optimization/121340
    gcc/
            * config/avr/avr.opt (-mfuse-move2): New option.
            * config/avr/avr-passes.def (avr_pass_2moves): Insert after combine.
            * config/avr/avr-passes.cc (make_avr_pass_2moves): New function.
            (pass_data avr_pass_data_2moves): New static variable.
            (avr_pass_2moves): New rtl_opt_pass.
            * config/avr/avr-protos.h (make_avr_pass_2moves): New proto.
            * common/config/avr/avr-common.cc
            (default_options avr_option_optimization_table) <-mfuse-move2>:
            Set for -O1 and higher.
            * doc/invoke.texi (AVR Options) <-mfuse-move2>: Document.
            * config/avr/avr.opt.urls (-mfuse-move2): Add url.
    
    (cherry picked from commit 7d75e87f3051fa29d48e923016f04a8042a8ec53)

Diff:
---
 gcc/common/config/avr/avr-common.cc |   1 +
 gcc/config/avr/avr-passes.cc        | 139 ++++++++++++++++++++++++++++++++++++
 gcc/config/avr/avr-passes.def       |   8 +++
 gcc/config/avr/avr-protos.h         |   1 +
 gcc/config/avr/avr.opt              |   4 ++
 gcc/config/avr/avr.opt.urls         |   3 +
 gcc/doc/invoke.texi                 |   6 +-
 7 files changed, 161 insertions(+), 1 deletion(-)

diff --git a/gcc/common/config/avr/avr-common.cc 
b/gcc/common/config/avr/avr-common.cc
index 203a96528186..d8b982c4fa6f 100644
--- a/gcc/common/config/avr/avr-common.cc
+++ b/gcc/common/config/avr/avr-common.cc
@@ -38,6 +38,7 @@ static const struct default_options 
avr_option_optimization_table[] =
     { OPT_LEVELS_1_PLUS, OPT_mmain_is_OS_task, NULL, 1 },
     { OPT_LEVELS_1_PLUS, OPT_mfuse_add_, NULL, 1 },
     { OPT_LEVELS_2_PLUS, OPT_mfuse_add_, NULL, 2 },
+    { OPT_LEVELS_1_PLUS, OPT_mfuse_move2, NULL, 1 },
     { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_mfuse_move_, NULL, 3 },
     { OPT_LEVELS_2_PLUS, OPT_mfuse_move_, NULL, 23 },
     { OPT_LEVELS_2_PLUS, OPT_msplit_bit_shift, NULL, 1 },
diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc
index 2c21e7be7aba..6bb9e131045c 100644
--- a/gcc/config/avr/avr-passes.cc
+++ b/gcc/config/avr/avr-passes.cc
@@ -4847,6 +4847,137 @@ avr_pass_fuse_add::execute1 (function *func)
 }
 
 
+
+//////////////////////////////////////////////////////////////////////////////
+// Fuse 2 move insns after combine.
+
+static const pass_data avr_pass_data_2moves =
+{
+  RTL_PASS,        // type
+  "",              // name (will be patched)
+  OPTGROUP_NONE,    // optinfo_flags
+  TV_DF_SCAN,      // tv_id
+  0,               // properties_required
+  0,               // properties_provided
+  0,               // properties_destroyed
+  0,               // todo_flags_start
+  0                // todo_flags_finish
+};
+
+class avr_pass_2moves : public rtl_opt_pass
+{
+public:
+  avr_pass_2moves (gcc::context *ctxt, const char *name)
+    : rtl_opt_pass (avr_pass_data_2moves, ctxt)
+  {
+    this->name = name;
+  }
+
+  unsigned int execute (function *func) final override
+  {
+    if (optimize && avropt_fuse_move2)
+      {
+       bool changed = false;
+       basic_block bb;
+
+       FOR_EACH_BB_FN (bb, func)
+         {
+           changed |= optimize_2moves_bb (bb);
+         }
+
+       if (changed)
+         {
+           df_note_add_problem ();
+           df_analyze ();
+         }
+      }
+
+    return 0;
+  }
+
+  bool optimize_2moves (rtx_insn *, rtx_insn *);
+  bool optimize_2moves_bb (basic_block);
+}; // avr_pass_2moves
+
+bool
+avr_pass_2moves::optimize_2moves_bb (basic_block bb)
+{
+  bool changed = false;
+  rtx_insn *insn1 = nullptr;
+  rtx_insn *insn2 = nullptr;
+  rtx_insn *curr;
+
+  FOR_BB_INSNS (bb, curr)
+    {
+      if (insn1 && INSN_P (insn1)
+         && insn2 && INSN_P (insn2))
+       changed |= optimize_2moves (insn1, insn2);
+
+      insn1 = insn2;
+      insn2 = curr;
+    }
+
+  return changed;
+}
+
+bool
+avr_pass_2moves::optimize_2moves (rtx_insn *insn1, rtx_insn *insn2)
+{
+  bool good = false;
+  bool bad = false;
+  rtx set1, dest1, src1;
+  rtx set2, dest2, src2;
+
+  if ((set1 = single_set (insn1))
+      && (set2 = single_set (insn2))
+      && (src1 = SET_SRC (set1))
+      && REG_P (src2 = SET_SRC (set2))
+      && REG_P (dest1 = SET_DEST (set1))
+      && REG_P (dest2 = SET_DEST (set2))
+      && rtx_equal_p (dest1, src2)
+      // Now we have:
+      // insn1: dest1 = src1
+      // insn2: dest2 = dest1
+      && REGNO (dest1) >= FIRST_PSEUDO_REGISTER
+      // Paranoia.
+      && GET_CODE (PATTERN (insn1)) != PARALLEL
+      && GET_CODE (PATTERN (insn2)) != PARALLEL
+      && (rtx_equal_p (dest2, src1)
+         || !reg_overlap_mentioned_p (dest2, src1)))
+    {
+      avr_dump ("\n;; Found 2moves:\n%r\n%r\n", insn1, insn2);
+      avr_dump (";; reg %d: insn uses uids:", REGNO (dest1));
+
+      // Go check that dest1 is used exactly once, namely by insn2.
+
+      df_ref use = DF_REG_USE_CHAIN (REGNO (dest1));
+      for (; use; use = DF_REF_NEXT_REG (use))
+       {
+         rtx_insn *user = DF_REF_INSN (use);
+         avr_dump (" %d", INSN_UID (user));
+         good |= INSN_UID (user) == INSN_UID (insn2);
+         bad |= INSN_UID (user) != INSN_UID (insn2);
+       }
+      avr_dump (".\n");
+
+      if (good && !bad
+         // Propagate src1 to insn2:
+         // insn1: # Deleted
+         // insn2: dest2 = src1
+         && validate_change (insn2, &SET_SRC (set2), src1, false))
+       {
+         SET_INSN_DELETED (insn1);
+         return true;
+       }
+    }
+
+  if (good && !bad)
+    avr_dump (";; Failed\n");
+
+  return false;
+}
+
+
 
 //////////////////////////////////////////////////////////////////////////////
 // Split insns with nonzero_bits() after combine.
@@ -5710,6 +5841,14 @@ make_avr_pass_casesi (gcc::context *ctxt)
   return new avr_pass_casesi (ctxt, "avr-casesi");
 }
 
+// Optimize 2 consecutive moves after combine.
+
+rtl_opt_pass *
+make_avr_pass_2moves (gcc::context *ctxt)
+{
+  return new avr_pass_2moves (ctxt, "avr-2moves");
+}
+
 rtl_opt_pass *
 make_avr_pass_split_nzb (gcc::context *ctxt)
 {
diff --git a/gcc/config/avr/avr-passes.def b/gcc/config/avr/avr-passes.def
index eb60a93eeeb0..d668c7fadfc2 100644
--- a/gcc/config/avr/avr-passes.def
+++ b/gcc/config/avr/avr-passes.def
@@ -74,6 +74,14 @@ INSERT_PASS_BEFORE (pass_free_cfg, 1, 
avr_pass_recompute_notes);
 
 INSERT_PASS_AFTER (pass_expand, 1, avr_pass_casesi);
 
+/* Insn combine may come up with superfluous reg-reg moves, where the combine
+   people say that these are no problem since reg-alloc is supposed to optimize
+   them.  The issue is that the lower-subreg pass sitting between combine and
+   reg-alloc may split such moves, coming up with a zoo of subregs which are
+   only handled poorly by the register allocator.  */
+
+INSERT_PASS_AFTER (pass_combine, 1, avr_pass_2moves);
+
 /* Some combine insns have nonzero_bits() in their condition, though insns
    should not use such stuff in their condition.  Therefore, we split such
    insn into something without nonzero_bits() in their condition right after
diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index ca30136797dd..37911e7d275d 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -208,6 +208,7 @@ extern rtl_opt_pass *make_avr_pass_casesi (gcc::context *);
 extern rtl_opt_pass *make_avr_pass_ifelse (gcc::context *);
 extern rtl_opt_pass *make_avr_pass_split_nzb (gcc::context *);
 extern rtl_opt_pass *make_avr_pass_split_after_peephole2 (gcc::context *);
+extern rtl_opt_pass *make_avr_pass_2moves (gcc::context *);
 #ifdef RTX_CODE
 extern bool avr_casei_sequence_check_operands (rtx *xop);
 extern bool avr_split_fake_addressing_move (rtx_insn *insn, rtx *operands);
diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt
index fcd2bf68f2a8..6b03a26d2dba 100644
--- a/gcc/config/avr/avr.opt
+++ b/gcc/config/avr/avr.opt
@@ -164,6 +164,10 @@ mfuse-move=
 Target Joined RejectNegative UInteger Var(avropt_fuse_move) Init(0) 
Optimization IntegerRange(0, 23)
 -mfuse-move=<0,23>     Optimization. Run a post-reload pass that tweaks move 
instructions.
 
+mfuse-move2
+Target Var(avropt_fuse_move2) Init(0) Optimization
+Optimization. Fuse some move insns after insn combine.
+
 mabsdata
 Target Mask(ABSDATA)
 Assume that all data in static storage can be accessed by LDS / STS 
instructions.  This option is only useful for reduced Tiny devices like 
ATtiny40.
diff --git a/gcc/config/avr/avr.opt.urls b/gcc/config/avr/avr.opt.urls
index 662fdee38fd4..87c26b2f95e5 100644
--- a/gcc/config/avr/avr.opt.urls
+++ b/gcc/config/avr/avr.opt.urls
@@ -92,6 +92,9 @@ UrlSuffix(gcc/AVR-Options.html#index-mfuse-move)
 mfuse-move=
 UrlSuffix(gcc/AVR-Options.html#index-mfuse-move)
 
+mfuse-move2
+UrlSuffix(gcc/AVR-Options.html#index-mfuse-move2)
+
 mabsdata
 UrlSuffix(gcc/AVR-Options.html#index-mabsdata)
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 72009c33cd0f..64a3131c1906 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -913,7 +913,7 @@ Objective-C and Objective-C++ Dialects}.
 @emph{AVR Options} (@ref{AVR Options})
 @gccoptlist{-mmcu=@var{mcu}  -mabsdata  -maccumulate-args  -mcvt
 -mbranch-cost=@var{cost}  -mfuse-add=@var{level}  -mfuse-move=@var{level}
--mcall-prologues  -mgas-isr-prologues  -mint8  -mflmap
+-mfuse-move2  -mcall-prologues  -mgas-isr-prologues  -mint8  -mflmap
 -mdouble=@var{bits}  -mlong-double=@var{bits}  -mno-call-main
 -mn_flash=@var{size}  -mfract-convert-truncate  -mno-interrupts
 -mmain-is-OS_task  -mrelax  -mrmw  -mstrict-X  -mtiny-stack
@@ -24860,6 +24860,10 @@ Valid values for @var{level} are in the range @code{0} 
@dots{} @code{23}
 which is a 3:2:2:2 mixed radix value.  Each digit controls some
 aspect of the optimization.
 
+@opindex mfuse-move2
+@item -mfuse-move2
+Run a post combine optimization pass that tries to fuse move instructions.
+
 @opindex mstrict-X
 @item -mstrict-X
 Use address register @code{X} in a way proposed by the hardware.  This means

Reply via email to