So the problem here is while forming chains, we don't process hard register
conflicts (and ABI based ones) for allocnos which are already part of a chain.
This means sometimes we allocate a register to a color which might be clobbered
over is live range.
Processing clobbers for all allocnos don't work while forming a chain does
not work as the chain's front allocnos' candidates does not get updated.
So we need to the processing of clobbers (and ABI clobbers) before starting
to form the chains.
Bootstrappd and tested on aarch64-linux-gnu.
PR target/123285
gcc/ChangeLog:
* config/aarch64/aarch64-early-ra.cc (early_ra::form_chains): Process
clobbers
and ABI clobbers before starting to form the chain.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/pr123285-1.c: New test.
Signed-off-by: Andrew Pinski <[email protected]>
---
gcc/config/aarch64/aarch64-early-ra.cc | 40 ++++++++++++-------
gcc/testsuite/gcc.target/aarch64/pr123285-1.c | 36 +++++++++++++++++
2 files changed, 61 insertions(+), 15 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/pr123285-1.c
diff --git a/gcc/config/aarch64/aarch64-early-ra.cc
b/gcc/config/aarch64/aarch64-early-ra.cc
index e2adde4ed94..85826ad81b5 100644
--- a/gcc/config/aarch64/aarch64-early-ra.cc
+++ b/gcc/config/aarch64/aarch64-early-ra.cc
@@ -2739,6 +2739,31 @@ early_ra::form_chains ()
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "\nChaining allocnos:\n");
+ // Record conflicts of hard register and ABI conflicts before the
+ // forming of chains so chains have the updated candidates
+ for(auto *allocno1 : m_allocnos)
+ {
+ // Record conflicts with direct uses for FPR hard registers.
+ auto *group1 = allocno1->group ();
+ for (unsigned int fpr = allocno1->offset; fpr < 32; ++fpr)
+ {
+ if (fpr == 30 && group1->regno == 112)
+ group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset));
+ if (fpr_conflicts_with_allocno_p (fpr, allocno1))
+ group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset));
+ }
+
+ // Record conflicts due to partially call-clobbered registers.
+ // (Full clobbers are handled by the previous loop.)
+ for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id)
+ if (call_in_range_p (abi_id, allocno1->start_point,
+ allocno1->end_point))
+ {
+ auto fprs = partial_fpr_clobbers (abi_id, group1->fpr_size);
+ group1->fpr_candidates &= ~fprs >> allocno1->offset;
+ }
+ }
+
// Perform (modified) interval graph coloring. First sort by
// increasing start point.
m_sorted_allocnos.reserve (m_allocnos.length ());
@@ -2756,22 +2781,7 @@ early_ra::form_chains ()
if (allocno1->chain_next != INVALID_ALLOCNO)
continue;
- // Record conflicts with direct uses for FPR hard registers.
auto *group1 = allocno1->group ();
- for (unsigned int fpr = allocno1->offset; fpr < 32; ++fpr)
- if (fpr_conflicts_with_allocno_p (fpr, allocno1))
- group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset));
-
- // Record conflicts due to partially call-clobbered registers.
- // (Full clobbers are handled by the previous loop.)
- for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id)
- if (call_in_range_p (abi_id, allocno1->start_point,
- allocno1->end_point))
- {
- auto fprs = partial_fpr_clobbers (abi_id, group1->fpr_size);
- group1->fpr_candidates &= ~fprs >> allocno1->offset;
- }
-
if (allocno1->is_shared ())
{
if (dump_file && (dump_flags & TDF_DETAILS))
diff --git a/gcc/testsuite/gcc.target/aarch64/pr123285-1.c
b/gcc/testsuite/gcc.target/aarch64/pr123285-1.c
new file mode 100644
index 00000000000..9ef5a28c9af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr123285-1.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+/* PR target/123285 */
+
+#define BS_VEC(type, num) type __attribute__((vector_size(num * sizeof(type))))
+
+/* f used to allocate v30 to either a or b and the inline-asm
+ would clobber the v30. */
+[[gnu::noipa]]
+BS_VEC(int, 8) f(BS_VEC(int, 8) a, BS_VEC(int, 8) b)
+{
+ a+=b;
+ asm("movi v30.16b, 0":::"v30");
+ a+=b;
+ return a;
+}
+[[gnu::noipa]]
+BS_VEC(int, 8) f1(BS_VEC(int, 8) a, BS_VEC(int, 8) b)
+{
+ a+=b;
+ a+=b;
+ return a;
+}
+
+int main()
+{
+ BS_VEC(int, 8) a = {0,1,2,3,4,5,6,7};
+ BS_VEC(int, 8) b = {8,9,10,11,12,13,14};
+ BS_VEC(int, 8) c0 = f(a,b);
+ BS_VEC(int, 8) c1 = f1(a,b);
+ for(int i=0;i<8;i++)
+ if ( c0[i] != c1[i] )
+ __builtin_abort ();
+}
+
+
--
2.43.0