The following patch fixes https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115042
The patch was successfully bootstrapped and tested on x86_64, i686, aarch64, and ppc64le.
commit e275c510f6545410b231e76fcbc00bbb882b91d7 Author: Vladimir N. Makarov <[email protected]> Date: Thu Feb 26 15:01:53 2026 -0500 [PR115042, LRA]: Postpone processing of new reload insns LRA in this PR can not find regs for asm insn which requires 11 general regs when 13 regs are available. Arm subtarget (thumb) has two stores with low and high general regs. LRA systematically chooses stores involving low regs as having less costs and there are only 8 low regs. That is because LRA (and reload) chooses (mov) insn alternatives independently from register pressure. The proposed patch postpones processing new reload insns until reload pseudos are assigned and after that considers new reload insns. Depending on the assignment LRA chooses insns involving low or high regs. Generally speaking it can change code generation in better or worse way but it should be a rare case. The patch does not contain the test as original test is too big (300KB of C code). Unfortunately cvise after 2 days of work managed to decrease the test only to 100KB file. gcc/ChangeLog: PR target/115042 * lra-int.h (lra_constraint_insn_stack_clear): New prototype. * lra.cc (lra_constraint_insn_stack2): New vector. (lra_constraint_insn_stack_clear): New function. (lra): Initialize/finalize lra_constraint_insn_stack2. * lra-constraints.cc (lra_constraints): Use lra_constraint_insn_stack_clear to postpone processing new reload insns. diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc index ccd68efc956..04f868c7dad 100644 --- a/gcc/lra-constraints.cc +++ b/gcc/lra-constraints.cc @@ -5504,10 +5504,9 @@ bool lra_constraints (bool first_p) { bool changed_p; - int i, hard_regno, new_insns_num; - unsigned int min_len, new_min_len, uid; + int i, hard_regno; + unsigned int uid; rtx set, x, reg, nosubreg_dest; - rtx_insn *original_insn; basic_block last_bb; bitmap_iterator bi; @@ -5617,36 +5616,18 @@ lra_constraints (bool first_p) substituted by their equivalences. */ EXECUTE_IF_SET_IN_BITMAP (equiv_insn_bitmap, 0, uid, bi) lra_push_insn_by_uid (uid); - min_len = lra_insn_stack_length (); - new_insns_num = 0; last_bb = NULL; changed_p = false; - original_insn = NULL; - while ((new_min_len = lra_insn_stack_length ()) != 0) + auto constraint_insns = lra_constraint_insn_stack_clear (); + while (constraint_insns.length () != 0) { - curr_insn = lra_pop_insn (); - --new_min_len; + curr_insn = constraint_insns.pop (); curr_bb = BLOCK_FOR_INSN (curr_insn); if (curr_bb != last_bb) { last_bb = curr_bb; bb_reload_num = lra_curr_reload_num; } - if (min_len > new_min_len) - { - min_len = new_min_len; - new_insns_num = 0; - original_insn = curr_insn; - } - else if (combine_reload_insn (curr_insn, original_insn)) - { - continue; - } - if (new_insns_num > MAX_RELOAD_INSNS_NUMBER) - internal_error - ("maximum number of generated reload insns per insn achieved (%d)", - MAX_RELOAD_INSNS_NUMBER); - new_insns_num++; if (DEBUG_INSN_P (curr_insn)) { /* We need to check equivalence in debug insn and change @@ -5737,7 +5718,15 @@ lra_constraints (bool first_p) init_curr_insn_input_reloads (); init_curr_operand_mode (); if (curr_insn_transform (false)) - changed_p = true; + { + if (lra_insn_stack_length () != 0) + { + auto reload_insn = lra_pop_insn (); + if (!combine_reload_insn (reload_insn, curr_insn)) + lra_push_insn (reload_insn); + } + changed_p = true; + } /* Check non-transformed insns too for equiv change as USE or CLOBBER don't need reloads but can contain pseudos being changed on their equivalences. */ diff --git a/gcc/lra-int.h b/gcc/lra-int.h index 1c0561f496c..18c4cf6eaeb 100644 --- a/gcc/lra-int.h +++ b/gcc/lra-int.h @@ -310,6 +310,7 @@ extern void lra_push_insn_by_uid (unsigned int); extern void lra_push_insn_and_update_insn_regno_info (rtx_insn *); extern rtx_insn *lra_pop_insn (void); extern unsigned int lra_insn_stack_length (void); +extern vec<rtx_insn *> lra_constraint_insn_stack_clear (void); extern rtx lra_create_new_reg (machine_mode, rtx, enum reg_class, HARD_REG_SET *, const char *); diff --git a/gcc/lra.cc b/gcc/lra.cc index 20a3db45747..b4471a3e1c8 100644 --- a/gcc/lra.cc +++ b/gcc/lra.cc @@ -1808,8 +1808,8 @@ lra_rtx_hash (rtx x) /* Bitmap used to put an insn on the stack only in one exemplar. */ static sbitmap lra_constraint_insn_stack_bitmap; -/* The stack itself. */ -vec<rtx_insn *> lra_constraint_insn_stack; +/* Vectors used for the stack. */ +static vec<rtx_insn *> lra_constraint_insn_stack, lra_constraint_insn_stack2; /* Put INSN on the stack. If ALWAYS_UPDATE is true, always update the reg info for INSN, otherwise only update it if INSN is not already on the @@ -1868,6 +1868,16 @@ lra_insn_stack_length (void) return lra_constraint_insn_stack.length (); } +/* Purge the stack and return stack vector before purging. */ +vec<rtx_insn *> +lra_constraint_insn_stack_clear (void) +{ + std::swap (lra_constraint_insn_stack, lra_constraint_insn_stack2); + lra_constraint_insn_stack.truncate (0); + bitmap_clear (lra_constraint_insn_stack_bitmap); + return lra_constraint_insn_stack2; +} + /* Push insns FROM to TO (excluding it) going in reverse order. */ static void push_insns (rtx_insn *from, rtx_insn *to) @@ -2458,6 +2468,7 @@ lra (FILE *f, int verbose) expensive when a lot of RTL changes are made. */ df_set_flags (DF_NO_INSN_RESCAN); lra_constraint_insn_stack.create (get_max_uid ()); + lra_constraint_insn_stack2.create (get_max_uid ()); lra_constraint_insn_stack_bitmap = sbitmap_alloc (get_max_uid ()); bitmap_clear (lra_constraint_insn_stack_bitmap); lra_live_ranges_init (); @@ -2635,6 +2646,7 @@ lra (FILE *f, int verbose) finish_reg_info (); sbitmap_free (lra_constraint_insn_stack_bitmap); lra_constraint_insn_stack.release (); + lra_constraint_insn_stack2.release (); finish_insn_recog_data (); lra_finish_equiv (); regstat_free_n_sets_and_refs ();
