On 12/3/25 10:14 AM, Stefan Schulze Frielinghaus wrote:
Hi Vladimir,

Could you have a look at this?  Any feedback is welcome.

Sorry, it seems I missed your original message.

The patch is OK for me.  You can commit it into the trunk.

Thank you for fixing this.


Cheers,
Stefan

On Fri, Nov 07, 2025 at 09:26:21AM +0100, Stefan Schulze Frielinghaus wrote:
Ping

On Mon, Oct 13, 2025 at 10:52:48AM +0200, Stefan Schulze Frielinghaus wrote:
From: Stefan Schulze Frielinghaus <[email protected]>

This fixes

asm-hard-reg-3.c:10:1: error: unrecognizable insn:
    10 | }
       | ^
(insn 9 18 14 2 (parallel [
             (set (reg:DI 0 ax [orig:99 x ] [99])
                 (asm_operands:DI ("") ("=r") 0 [
                         (reg:SI 0 ax [100])
                         (reg:DI 1 dx [105]) repeated x2
                     ]
                      [
                         (asm_input:SI ("0") asm-hard-reg-3.c:8)
                         (asm_input:DI ("r") asm-hard-reg-3.c:8)
                         (asm_input:DI ("{r8}") asm-hard-reg-3.c:8)
                     ]
                      [] asm-hard-reg-3.c:8))
             (clobber (reg:CC 17 flags))
         ]) "asm-hard-reg-3.c":8:3 -1
      (nil))
during RTL pass: reload

During get_reload_reg() a reload register may be reused and so far
exclude start hard registers were not taken into account.  For the test
case this means operands 2 and 3 use the same reload register which gets
dx assigned, although, the constraint of operand 3 refers to register
r8.  That in turn renders the insn unsatisfiable.

A conservative approach would be to simply not reuse any reload register
whenever the set of exclude start hard regs is non-empty.  However, this
would lead to some missed optimizations like in this example where
operands 2 and 3 would land in different registers.  Therefore, if both
share a start hard register, still reuse the reload and refine the
exclude start hard regs set.

I only have a test case for inputs.  However, I expect an analogue
problem for outputs which is why I adapted that case, too.

gcc/ChangeLog:

        * lra-constraints.cc (get_reload_reg): Honor exclude start regs
        while reusing reloads.

gcc/testsuite/ChangeLog:

        * gcc.dg/asm-hard-reg-9.c: New test.
---
  Bootstrapped and regtested on s390 and x86_64.  Ok for mainline?


  gcc/lra-constraints.cc                | 21 ++++++++++++++++++++-
  gcc/testsuite/gcc.dg/asm-hard-reg-9.c | 15 +++++++++++++++
  2 files changed, 35 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-9.c

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 48ce75781d4..f4223dd8e51 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -686,7 +686,11 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx 
original,
          && (int) REGNO (original) >= new_regno_start
          && (INSN_UID (curr_insn) >= new_insn_uid_start
              || ira_former_scratch_p (REGNO (original)))
-         && in_class_p (original, rclass, &new_class, true))
+         && in_class_p (original, rclass, &new_class, true)
+         && (exclude_start_hard_regs == nullptr
+             || hard_reg_set_intersect_p (
+                 ~lra_reg_info[REGNO (original)].exclude_start_hard_regs,
+                 ~*exclude_start_hard_regs)))
        {
          unsigned int regno = REGNO (original);
          if (lra_dump_file != NULL)
@@ -698,6 +702,9 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx 
original,
            lra_change_class (regno, new_class, ", change to", false);
          if (lra_dump_file != NULL)
            fprintf (lra_dump_file, "\n");
+         if (exclude_start_hard_regs)
+           lra_reg_info[regno].exclude_start_hard_regs
+             |= *exclude_start_hard_regs;
          *result_reg = original;
          return false;
        }
@@ -734,6 +741,18 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx 
original,
                if (reg == NULL_RTX || GET_CODE (reg) != SUBREG)
                  continue;
              }
+           /* If the existing reload and this have no start hard register in
+              common, then skip.  Otherwise update exclude_start_hard_regs.  */
+           if (exclude_start_hard_regs
+               && ! hard_reg_set_empty_p (*exclude_start_hard_regs))
+             {
+               HARD_REG_SET r = lra_reg_info[regno].exclude_start_hard_regs
+                                | *exclude_start_hard_regs;
+               if (hard_reg_set_empty_p (~r))
+                 continue;
+               else
+                 lra_reg_info[regno].exclude_start_hard_regs = r;
+             }
            *result_reg = reg;
            if (lra_dump_file != NULL)
              {
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-9.c 
b/gcc/testsuite/gcc.dg/asm-hard-reg-9.c
new file mode 100644
index 00000000000..0866cb4554a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-9.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target s390*-*-* x86_64-*-* } } */
+/* { dg-options "-O2" } */
+
+/* Ensure that if the reload register for operand 2 is resued for operand 3,
+   that exclude start hard regs coming from operand 3 are taken into account.
+   Otherwise a different register than r8 may be chosen rendering the insn
+   after LRA unsatisfiable.  */
+
+long
+test ()
+{
+  long x;
+  __asm__ ("" : "=r" (x) : "0" (1000), "r" (0l), "{r8}" (0l));
+  return x;
+}
--
2.49.0


Reply via email to