On Tue, Feb 24, 2026 at 12:06 PM Andrew Pinski
<[email protected]> wrote:
>
> On Tue, Feb 24, 2026 at 7:33 AM Richard Sandiford
> <[email protected]> wrote:
> >
> > Andrew Pinski <[email protected]> writes:
> > > So the problem here is while forming chains, we don't process hard 
> > > register
> > > conflicts (and ABI based ones) for allocnos which are already part of a 
> > > chain.
> > > This means sometimes we allocate a register to a color which might be 
> > > clobbered
> > > over is live range.
> > > Processing clobbers for all allocnos don't work while forming a chain does
> > > not work as the chain's front allocnos' candidates does not get updated.
> > > So we need to the processing of clobbers (and ABI clobbers) before 
> > > starting
> > > to form the chains.
> > >
> > > Changes since v1:
> > >  * v2: remove accidental hack which was there just for testing.
> > >
> > > Bootstrappd and tested on aarch64-linux-gnu.
> > >
> > >       PR target/123285
> > >
> > > gcc/ChangeLog:
> > >
> > >       * config/aarch64/aarch64-early-ra.cc (early_ra::form_chains): 
> > > Process clobbers
> > >       and ABI clobbers before starting to form the chain.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >       * gcc.target/aarch64/pr123285-1.c: New test.
> > >
> > > Signed-off-by: Andrew Pinski <[email protected]>
> >
> > Thanks for looking at this.  Couple of comments below.
> >
> > > ---
> > >  gcc/config/aarch64/aarch64-early-ra.cc        | 40 ++++++++++++-------
> > >  gcc/testsuite/gcc.target/aarch64/pr123285-1.c | 36 +++++++++++++++++
> > >  2 files changed, 61 insertions(+), 15 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/aarch64/pr123285-1.c
> > >
> > > diff --git a/gcc/config/aarch64/aarch64-early-ra.cc 
> > > b/gcc/config/aarch64/aarch64-early-ra.cc
> > > index e2adde4ed94..85826ad81b5 100644
> > > --- a/gcc/config/aarch64/aarch64-early-ra.cc
> > > +++ b/gcc/config/aarch64/aarch64-early-ra.cc
> > > @@ -2739,6 +2739,31 @@ early_ra::form_chains ()
> > >    if (dump_file && (dump_flags & TDF_DETAILS))
> > >      fprintf (dump_file, "\nChaining allocnos:\n");
> > >
> > > +  // Record conflicts of hard register and ABI conflicts before the
> > > +  // forming of chains so chains have the updated candidates
> > > +  for(auto *allocno1 : m_allocnos)
> >
> > Missing space after "for"
>
> Ok.
>
> >
> > > +    {
> > > +      // Record conflicts with direct uses for FPR hard registers.
> > > +      auto *group1 = allocno1->group ();
> > > +      for (unsigned int fpr = allocno1->offset; fpr < 32; ++fpr)
> > > +      {
> > > +     if (fpr == 30 && group1->regno == 112)
> > > +       group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset));
> >
> > The hack still seems to be there :)
>
> So what happened was the following:
> I removed it from the tree where I was testing but then missed it from
> the version which I had sent.
> I just double checked the machine which I had tested with had the
> corrected version too.
>
> >
> > > +     if (fpr_conflicts_with_allocno_p (fpr, allocno1))
> > > +       group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset));
> > > +      }
> > > +
> > > +      // Record conflicts due to partially call-clobbered registers.
> > > +      // (Full clobbers are handled by the previous loop.)
> > > +      for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id)
> > > +     if (call_in_range_p (abi_id, allocno1->start_point,
> > > +                          allocno1->end_point))
> > > +       {
> > > +         auto fprs = partial_fpr_clobbers (abi_id, group1->fpr_size);
> > > +         group1->fpr_candidates &= ~fprs >> allocno1->offset;
> > > +       }
> > > +    }
> > > +
> >
> > I think we should also move:
> >
> >       if (allocno1->is_shared ())
> >         {
> >           auto *allocno2 = m_allocnos[allocno1->related_allocno];
> >           merge_fpr_info (allocno2->group (), group1, allocno2->offset);
> >         }
> >
> > although:
> >
> >       if (allocno1->is_shared ())
> >         {
> >           if (dump_file && (dump_flags & TDF_DETAILS))
> >             fprintf (dump_file, "  Allocno %d shares the same hard register"
> >                      " as allocno %d\n", allocno1->id,
> >                      allocno1->related_allocno);
> >           m_shared_allocnos.safe_push (allocno1);
> >           continue;
> >         }
> >
> > should probably stay where it is.
>
> Let me test with the above changes too.  This time I am going to make
> the changes to the tree where I will send it out first and then copy
> the exact patch over to the other machine.

Attached is what I tested and pushed.

Thanks,
Andrew

>
> Thanks,
> Andrew
>
> >
> > OK with those changes, thanks.
> >
> > Richard
> >
> > >    // Perform (modified) interval graph coloring.  First sort by
> > >    // increasing start point.
> > >    m_sorted_allocnos.reserve (m_allocnos.length ());
> > > @@ -2756,22 +2781,7 @@ early_ra::form_chains ()
> > >        if (allocno1->chain_next != INVALID_ALLOCNO)
> > >       continue;
> > >
> > > -      // Record conflicts with direct uses for FPR hard registers.
> > >        auto *group1 = allocno1->group ();
> > > -      for (unsigned int fpr = allocno1->offset; fpr < 32; ++fpr)
> > > -     if (fpr_conflicts_with_allocno_p (fpr, allocno1))
> > > -       group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset));
> > > -
> > > -      // Record conflicts due to partially call-clobbered registers.
> > > -      // (Full clobbers are handled by the previous loop.)
> > > -      for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id)
> > > -     if (call_in_range_p (abi_id, allocno1->start_point,
> > > -                          allocno1->end_point))
> > > -       {
> > > -         auto fprs = partial_fpr_clobbers (abi_id, group1->fpr_size);
> > > -         group1->fpr_candidates &= ~fprs >> allocno1->offset;
> > > -       }
> > > -
> > >        if (allocno1->is_shared ())
> > >       {
> > >         if (dump_file && (dump_flags & TDF_DETAILS))
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/pr123285-1.c 
> > > b/gcc/testsuite/gcc.target/aarch64/pr123285-1.c
> > > new file mode 100644
> > > index 00000000000..9ef5a28c9af
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/pr123285-1.c
> > > @@ -0,0 +1,36 @@
> > > +/* { dg-do run } */
> > > +/* { dg-options "-O3" } */
> > > +/* PR target/123285 */
> > > +
> > > +#define BS_VEC(type, num) type __attribute__((vector_size(num * 
> > > sizeof(type))))
> > > +
> > > +/* f used to allocate v30 to either a or b and the inline-asm
> > > +   would clobber the v30. */
> > > +[[gnu::noipa]]
> > > +BS_VEC(int, 8) f(BS_VEC(int, 8) a, BS_VEC(int, 8) b)
> > > +{
> > > +  a+=b;
> > > +  asm("movi v30.16b, 0":::"v30");
> > > +  a+=b;
> > > +  return a;
> > > +}
> > > +[[gnu::noipa]]
> > > +BS_VEC(int, 8) f1(BS_VEC(int, 8) a, BS_VEC(int, 8) b)
> > > +{
> > > +  a+=b;
> > > +  a+=b;
> > > +  return a;
> > > +}
> > > +
> > > +int main()
> > > +{
> > > +  BS_VEC(int, 8) a = {0,1,2,3,4,5,6,7};
> > > +  BS_VEC(int, 8) b = {8,9,10,11,12,13,14};
> > > +  BS_VEC(int, 8) c0 = f(a,b);
> > > +  BS_VEC(int, 8) c1 = f1(a,b);
> > > +  for(int i=0;i<8;i++)
> > > +  if ( c0[i] != c1[i] )
> > > +    __builtin_abort ();
> > > +}
> > > +
> > > +

Attachment: v2-0001-aarch64-early-ra-Fix-handling-of-multi-register-a.patch
Description: Binary data

Reply via email to