On Tue, Jan 27, 2026 at 4:28 PM Andrew Pinski
<[email protected]> wrote:
>
> So the problem here is while forming chains, we don't process hard register
> conflicts (and ABI based ones) for allocnos which are already part of a chain.
> This means sometimes we allocate a register to a color which might be
> clobbered
> over is live range.
> Processing clobbers for all allocnos don't work while forming a chain does
> not work as the chain's front allocnos' candidates does not get updated.
> So we need to the processing of clobbers (and ABI clobbers) before starting
> to form the chains.
>
> Bootstrappd and tested on aarch64-linux-gnu.
>
> PR target/123285
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64-early-ra.cc (early_ra::form_chains): Process
> clobbers
> and ABI clobbers before starting to form the chain.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/pr123285-1.c: New test.
>
> Signed-off-by: Andrew Pinski <[email protected]>
> ---
> gcc/config/aarch64/aarch64-early-ra.cc | 40 ++++++++++++-------
> gcc/testsuite/gcc.target/aarch64/pr123285-1.c | 36 +++++++++++++++++
> 2 files changed, 61 insertions(+), 15 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/pr123285-1.c
>
> diff --git a/gcc/config/aarch64/aarch64-early-ra.cc
> b/gcc/config/aarch64/aarch64-early-ra.cc
> index e2adde4ed94..85826ad81b5 100644
> --- a/gcc/config/aarch64/aarch64-early-ra.cc
> +++ b/gcc/config/aarch64/aarch64-early-ra.cc
> @@ -2739,6 +2739,31 @@ early_ra::form_chains ()
> if (dump_file && (dump_flags & TDF_DETAILS))
> fprintf (dump_file, "\nChaining allocnos:\n");
>
> + // Record conflicts of hard register and ABI conflicts before the
> + // forming of chains so chains have the updated candidates
> + for(auto *allocno1 : m_allocnos)
> + {
> + // Record conflicts with direct uses for FPR hard registers.
> + auto *group1 = allocno1->group ();
> + for (unsigned int fpr = allocno1->offset; fpr < 32; ++fpr)
> + {
> + if (fpr == 30 && group1->regno == 112)
> + group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset));
> + if (fpr_conflicts_with_allocno_p (fpr, allocno1))
> + group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset));
> + }
I just noticed I had a hack in this version of the patch, and will
send out a new one after it finishes testing.
> +
> + // Record conflicts due to partially call-clobbered registers.
> + // (Full clobbers are handled by the previous loop.)
> + for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id)
> + if (call_in_range_p (abi_id, allocno1->start_point,
> + allocno1->end_point))
> + {
> + auto fprs = partial_fpr_clobbers (abi_id, group1->fpr_size);
> + group1->fpr_candidates &= ~fprs >> allocno1->offset;
> + }
> + }
> +
> // Perform (modified) interval graph coloring. First sort by
> // increasing start point.
> m_sorted_allocnos.reserve (m_allocnos.length ());
> @@ -2756,22 +2781,7 @@ early_ra::form_chains ()
> if (allocno1->chain_next != INVALID_ALLOCNO)
> continue;
>
> - // Record conflicts with direct uses for FPR hard registers.
> auto *group1 = allocno1->group ();
> - for (unsigned int fpr = allocno1->offset; fpr < 32; ++fpr)
> - if (fpr_conflicts_with_allocno_p (fpr, allocno1))
> - group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset));
> -
> - // Record conflicts due to partially call-clobbered registers.
> - // (Full clobbers are handled by the previous loop.)
> - for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id)
> - if (call_in_range_p (abi_id, allocno1->start_point,
> - allocno1->end_point))
> - {
> - auto fprs = partial_fpr_clobbers (abi_id, group1->fpr_size);
> - group1->fpr_candidates &= ~fprs >> allocno1->offset;
> - }
> -
> if (allocno1->is_shared ())
> {
> if (dump_file && (dump_flags & TDF_DETAILS))
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr123285-1.c
> b/gcc/testsuite/gcc.target/aarch64/pr123285-1.c
> new file mode 100644
> index 00000000000..9ef5a28c9af
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr123285-1.c
> @@ -0,0 +1,36 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3" } */
> +/* PR target/123285 */
> +
> +#define BS_VEC(type, num) type __attribute__((vector_size(num *
> sizeof(type))))
> +
> +/* f used to allocate v30 to either a or b and the inline-asm
> + would clobber the v30. */
> +[[gnu::noipa]]
> +BS_VEC(int, 8) f(BS_VEC(int, 8) a, BS_VEC(int, 8) b)
> +{
> + a+=b;
> + asm("movi v30.16b, 0":::"v30");
> + a+=b;
> + return a;
> +}
> +[[gnu::noipa]]
> +BS_VEC(int, 8) f1(BS_VEC(int, 8) a, BS_VEC(int, 8) b)
> +{
> + a+=b;
> + a+=b;
> + return a;
> +}
> +
> +int main()
> +{
> + BS_VEC(int, 8) a = {0,1,2,3,4,5,6,7};
> + BS_VEC(int, 8) b = {8,9,10,11,12,13,14};
> + BS_VEC(int, 8) c0 = f(a,b);
> + BS_VEC(int, 8) c1 = f1(a,b);
> + for(int i=0;i<8;i++)
> + if ( c0[i] != c1[i] )
> + __builtin_abort ();
> +}
> +
> +
> --
> 2.43.0
>