The following patch solves

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115042

The patch was successfully tested and bootstrapped on x86_64, i686, aarch64, and ppc64le.

commit 2cddf4c54f2a777ce215cdec621738cef4f2bdbc
Author: Vladimir N. Makarov <[email protected]>
Date:   Tue Mar 3 15:24:39 2026 -0500

    [PR115042, LRA]: Postpone processing of new reload insns, 2nd variant
    
    This is the second attempt to solve the PR.  The first attempt (see
    commit 9a7da540b63e7d77e747b5cdd6fdbbd3954e28c8) resulted in numerous
    test suite failures on some secondary targets.
    
    LRA in this PR can not find regs for asm insn which requires 11
    general regs when 13 regs are available.  Arm subtarget (thumb) has
    two stores with low and high general regs.  LRA systematically chooses
    stores involving low regs as having less costs and there are only 8
    low regs.  That is because LRA (and reload) chooses (mov) insn
    alternatives independently from register pressure.
    
    The proposed patch postpones processing new reload insns until the
    reload pseudos are assigned and after that considers new reload insns.
    We postpone reloads only for asm insns as they can have a lot of
    operands.  Depending on the assignment LRA chooses insns involving low
    or high regs.  Generally speaking it can change code generation in
    better or worse way but it should be a very rare case.
    
    The patch does not contain the test as original test is too big (300KB
    of C code).  Unfortunately cvise after 2 days of work managed to
    decrease the test only to 100KB file.
    
    gcc/ChangeLog:
    
            PR target/115042
            * lra-int.h (lra_postponed_insns): New.
            * lra.cc (lra_set_insn_deleted, lra_asm_insn_error): Clear
            postponed insn flag.
            (lra_process_new_insns): Propagate postponed insn flag for asm
            gotos.
            (lra_postponed_insns): New.
            (lra): Initialize lra_postponed_insns.  Push postponed insns on
            the stack.
            * lra-constraints.cc (postpone_insns): New function.
            (curr_insn_transform): Use it to postpone processing reload insn
            constraints.  Skip processing postponed insns.

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 1b5a875f724..84f78b40d6a 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -4245,7 +4245,7 @@ simple_move_p (void)
 	  /* The backend guarantees that register moves of cost 2
 	     never need reloads.  */
 	  && targetm.register_move_cost (GET_MODE (src), sclass, dclass) == 2);
- }
+}
 
 /* Swap operands NOP and NOP + 1. */
 static inline void
@@ -4293,6 +4293,22 @@ multiple_insn_refs_p (int regno)
   return false;
 }
 
+/* Mark insns starting with FIRST as postponed for processing their
+   constraints.  See comments for lra_postponed_insns.  */
+static void
+postpone_insns (rtx_insn *first)
+{
+  for (auto insn = first; insn != NULL_RTX; insn = NEXT_INSN (insn))
+    {
+      bitmap_set_bit (&lra_postponed_insns, INSN_UID (insn));
+      if (lra_dump_file != NULL)
+	{
+	  fprintf (lra_dump_file, "    Postponing constraint processing: ");
+	  dump_insn_slim (lra_dump_file, insn);
+	}
+    }
+}
+
 /* Main entry point of the constraint code: search the body of the
    current insn to choose the best alternative.  It is mimicking insn
    alternative cost calculation model of former reload pass.  That is
@@ -4442,9 +4458,17 @@ curr_insn_transform (bool check_only_p)
        we chose previously may no longer be valid.  */
     lra_set_used_insn_alternative (curr_insn, LRA_UNKNOWN_ALT);
 
-  if (! check_only_p && curr_insn_set != NULL_RTX
-      && check_and_process_move (&change_p, &sec_mem_p))
-    return change_p;
+  if (! check_only_p)
+    {
+      if (bitmap_bit_p (&lra_postponed_insns, INSN_UID (curr_insn)))
+	/* Processing insn constraints were postponed.  Do nothing, the insn
+	   will be processed on the next constraint sub-pass after assignment
+	   of reload pseudos in the insn.  */
+	return true;
+      if (curr_insn_set != NULL_RTX
+	  && check_and_process_move (&change_p, &sec_mem_p))
+	return change_p;
+    }
 
  try_swapped:
 
@@ -5092,6 +5116,14 @@ curr_insn_transform (bool check_only_p)
 	  const_insn = prev;
 	}
     }
+  if (asm_noperands (PATTERN (curr_insn)) >= 0)
+    {
+      /* Asm can have a lot of operands.  To guarantee their assignment,
+	 postpone processing the reload insns until the reload pseudos are
+	 assigned.  */
+      postpone_insns (before);
+      postpone_insns (after);
+    }
   lra_process_new_insns (curr_insn, before, after,
 			 "Inserting insn reload", true);
   if (const_regno >= 0) {
diff --git a/gcc/lra-int.h b/gcc/lra-int.h
index 1c0561f496c..7da359ec2e2 100644
--- a/gcc/lra-int.h
+++ b/gcc/lra-int.h
@@ -356,6 +356,7 @@ extern bitmap_head lra_inheritance_pseudos;
 extern bitmap_head lra_split_regs;
 extern bitmap_head lra_subreg_reload_pseudos;
 extern bitmap_head lra_optional_reload_pseudos;
+extern bitmap_head lra_postponed_insns;
 
 /* lra-constraints.cc: */
 
diff --git a/gcc/lra.cc b/gcc/lra.cc
index 20a3db45747..f0871d3faf7 100644
--- a/gcc/lra.cc
+++ b/gcc/lra.cc
@@ -260,6 +260,7 @@ lra_invalidate_insn_data (rtx_insn *insn)
 void
 lra_set_insn_deleted (rtx_insn *insn)
 {
+  bitmap_clear_bit (&lra_postponed_insns, INSN_UID (insn));
   lra_invalidate_insn_data (insn);
   SET_INSN_DELETED (insn);
 }
@@ -559,6 +560,7 @@ lra_asm_insn_error (rtx_insn *insn)
     {
       ira_nullify_asm_goto (insn);
       lra_invalidate_insn_data (insn);
+      bitmap_clear_bit (&lra_postponed_insns, INSN_UID (insn));
     }
   else
     {
@@ -2014,7 +2016,7 @@ lra_process_new_insns (rtx_insn *insn, rtx_insn *before, rtx_insn *after,
 	      {
 		/* We already made the edge no-critical in ira.cc::ira */
 		lra_assert (!EDGE_CRITICAL_P (e));
-		rtx_insn *curr, *tmp = BB_HEAD (e->dest);
+		rtx_insn *tmp = BB_HEAD (e->dest);
 		if (LABEL_P (tmp))
 		  tmp = NEXT_INSN (tmp);
 		if (NOTE_INSN_BASIC_BLOCK_P (tmp))
@@ -2024,8 +2026,14 @@ lra_process_new_insns (rtx_insn *insn, rtx_insn *before, rtx_insn *after,
 		if (tmp == NULL)
 		  continue;
 		start_sequence ();
-		for (curr = after; curr != NULL_RTX; curr = NEXT_INSN (curr))
-		  emit_insn (copy_insn (PATTERN (curr)));
+		for (rtx_insn *curr = after; curr != NULL_RTX; curr = NEXT_INSN (curr))
+		  {
+		    rtx pat = copy_insn (PATTERN (curr));
+		    rtx_insn *copy = emit_insn (pat);
+		    if (bitmap_bit_p (&lra_postponed_insns, INSN_UID (curr)))
+		      /* Propagate flags of postponed insns.  */
+		      bitmap_set_bit (&lra_postponed_insns, INSN_UID (copy));
+		  }
 		rtx_insn *copy = get_insns (), *last = get_last_insn ();
 		end_sequence ();
 		if (lra_dump_file != NULL)
@@ -2045,6 +2053,10 @@ lra_process_new_insns (rtx_insn *insn, rtx_insn *before, rtx_insn *after,
 		   will be updated before the next assignment
 		   sub-pass. */
 	      }
+	  for (rtx_insn *curr = after; curr != NULL_RTX; curr = NEXT_INSN (curr))
+	    /* Clear flags of postponed insns which will be absent in the
+	       result code.  */
+	    bitmap_clear_bit (&lra_postponed_insns, INSN_UID (curr));
 	}
     }
   if (lra_dump_file != NULL)
@@ -2354,6 +2366,14 @@ bitmap_head lra_optional_reload_pseudos;
    pass.  */
 bitmap_head lra_subreg_reload_pseudos;
 
+/* UIDs of reload insns which should be processed after assigning the reload
+   pseudos.  We need to do this when reload pseudo should be a general reg but
+   we have different mov insns for different subsets of general regs, e.g. hi
+   and lo regs of arm thumb.  Such way we can guarantee finding regs for the
+   reload pseudos of asm insn which can have a lot of operands (general regs in
+   our example).  */
+bitmap_head lra_postponed_insns;
+
 /* File used for output of LRA debug information.  */
 FILE *lra_dump_file;
 
@@ -2469,6 +2489,7 @@ lra (FILE *f, int verbose)
   bitmap_initialize (&lra_split_regs, &reg_obstack);
   bitmap_initialize (&lra_optional_reload_pseudos, &reg_obstack);
   bitmap_initialize (&lra_subreg_reload_pseudos, &reg_obstack);
+  bitmap_initialize (&lra_postponed_insns, &reg_obstack);
   live_p = false;
   if (maybe_ne (get_frame_size (), 0) && crtl->stack_alignment_needed)
     /* If we have a stack frame, we must align it now.  The stack size
@@ -2575,6 +2596,11 @@ lra (FILE *f, int verbose)
 	    lra_create_live_ranges (true, true);
 	    live_p = true;
 	  }
+	  bitmap_iterator bi;
+	  unsigned int uid;
+	  EXECUTE_IF_SET_IN_BITMAP (&lra_postponed_insns, 0, uid, bi)
+	    lra_push_insn_by_uid (uid);
+	  bitmap_clear (&lra_postponed_insns);
 	}
       /* Don't clear optional reloads bitmap until all constraints are
 	 satisfied as we need to differ them from regular reloads.  */

Reply via email to