On Tue, Jan 27, 2026 at 4:28 PM Andrew Pinski
<[email protected]> wrote:
>
> So the problem here is while forming chains, we don't process hard register
> conflicts (and ABI based ones) for allocnos which are already part of a chain.
> This means sometimes we allocate a register to a color which might be 
> clobbered
> over is live range.
> Processing clobbers for all allocnos don't work while forming a chain does
> not work as the chain's front allocnos' candidates does not get updated.
> So we need to the processing of clobbers (and ABI clobbers) before starting
> to form the chains.
>
> Bootstrappd and tested on aarch64-linux-gnu.
>
>         PR target/123285
>
> gcc/ChangeLog:
>
>         * config/aarch64/aarch64-early-ra.cc (early_ra::form_chains): Process 
> clobbers
>         and ABI clobbers before starting to form the chain.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/aarch64/pr123285-1.c: New test.
>
> Signed-off-by: Andrew Pinski <[email protected]>
> ---
>  gcc/config/aarch64/aarch64-early-ra.cc        | 40 ++++++++++++-------
>  gcc/testsuite/gcc.target/aarch64/pr123285-1.c | 36 +++++++++++++++++
>  2 files changed, 61 insertions(+), 15 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/pr123285-1.c
>
> diff --git a/gcc/config/aarch64/aarch64-early-ra.cc 
> b/gcc/config/aarch64/aarch64-early-ra.cc
> index e2adde4ed94..85826ad81b5 100644
> --- a/gcc/config/aarch64/aarch64-early-ra.cc
> +++ b/gcc/config/aarch64/aarch64-early-ra.cc
> @@ -2739,6 +2739,31 @@ early_ra::form_chains ()
>    if (dump_file && (dump_flags & TDF_DETAILS))
>      fprintf (dump_file, "\nChaining allocnos:\n");
>
> +  // Record conflicts of hard register and ABI conflicts before the
> +  // forming of chains so chains have the updated candidates
> +  for(auto *allocno1 : m_allocnos)
> +    {
> +      // Record conflicts with direct uses for FPR hard registers.
> +      auto *group1 = allocno1->group ();
> +      for (unsigned int fpr = allocno1->offset; fpr < 32; ++fpr)
> +      {
> +       if (fpr == 30 && group1->regno == 112)
> +         group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset));
> +       if (fpr_conflicts_with_allocno_p (fpr, allocno1))
> +         group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset));
> +      }

I just noticed I had a hack in this version of the patch, and will
send out a new one after it finishes testing.

> +
> +      // Record conflicts due to partially call-clobbered registers.
> +      // (Full clobbers are handled by the previous loop.)
> +      for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id)
> +       if (call_in_range_p (abi_id, allocno1->start_point,
> +                            allocno1->end_point))
> +         {
> +           auto fprs = partial_fpr_clobbers (abi_id, group1->fpr_size);
> +           group1->fpr_candidates &= ~fprs >> allocno1->offset;
> +         }
> +    }
> +
>    // Perform (modified) interval graph coloring.  First sort by
>    // increasing start point.
>    m_sorted_allocnos.reserve (m_allocnos.length ());
> @@ -2756,22 +2781,7 @@ early_ra::form_chains ()
>        if (allocno1->chain_next != INVALID_ALLOCNO)
>         continue;
>
> -      // Record conflicts with direct uses for FPR hard registers.
>        auto *group1 = allocno1->group ();
> -      for (unsigned int fpr = allocno1->offset; fpr < 32; ++fpr)
> -       if (fpr_conflicts_with_allocno_p (fpr, allocno1))
> -         group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset));
> -
> -      // Record conflicts due to partially call-clobbered registers.
> -      // (Full clobbers are handled by the previous loop.)
> -      for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id)
> -       if (call_in_range_p (abi_id, allocno1->start_point,
> -                            allocno1->end_point))
> -         {
> -           auto fprs = partial_fpr_clobbers (abi_id, group1->fpr_size);
> -           group1->fpr_candidates &= ~fprs >> allocno1->offset;
> -         }
> -
>        if (allocno1->is_shared ())
>         {
>           if (dump_file && (dump_flags & TDF_DETAILS))
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr123285-1.c 
> b/gcc/testsuite/gcc.target/aarch64/pr123285-1.c
> new file mode 100644
> index 00000000000..9ef5a28c9af
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr123285-1.c
> @@ -0,0 +1,36 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3" } */
> +/* PR target/123285 */
> +
> +#define BS_VEC(type, num) type __attribute__((vector_size(num * 
> sizeof(type))))
> +
> +/* f used to allocate v30 to either a or b and the inline-asm
> +   would clobber the v30. */
> +[[gnu::noipa]]
> +BS_VEC(int, 8) f(BS_VEC(int, 8) a, BS_VEC(int, 8) b)
> +{
> +  a+=b;
> +  asm("movi v30.16b, 0":::"v30");
> +  a+=b;
> +  return a;
> +}
> +[[gnu::noipa]]
> +BS_VEC(int, 8) f1(BS_VEC(int, 8) a, BS_VEC(int, 8) b)
> +{
> +  a+=b;
> +  a+=b;
> +  return a;
> +}
> +
> +int main()
> +{
> +  BS_VEC(int, 8) a = {0,1,2,3,4,5,6,7};
> +  BS_VEC(int, 8) b = {8,9,10,11,12,13,14};
> +  BS_VEC(int, 8) c0 = f(a,b);
> +  BS_VEC(int, 8) c1 = f1(a,b);
> +  for(int i=0;i<8;i++)
> +  if ( c0[i] != c1[i] )
> +    __builtin_abort ();
> +}
> +
> +
> --
> 2.43.0
>

Reply via email to