https://gcc.gnu.org/bugzilla/show_bug.cgi?id=125795
--- Comment #7 from Tamar Christina <tnfchris at gcc dot gnu.org> ---
before unrolling:
.L15:
add x4, x2, 512
.L3:
ld2 {v27.4s - v28.4s}, [x2], 32
add x0, x0, 16
sub x3, x3, #16
tbl v0.16b, {v28.16b}, v25.16b
str q27, [x0, -16]
ld2 {v26.4s - v27.4s}, [x4]
str q0, [x3, 16]
tbl v27.16b, {v27.16b}, v25.16b
str q26, [x0, 240]
str q27, [x3, -240]
cmp x2, x5
bne .L15
where v25 is the loop invariant anchor.
ldr q25, [x6, #:lo12:.LANCHOR0+16]
however after unrolling + early_ra
tbl v23.16b, {v28.16b}, v27.16b
mov w8, 511
ld2 {v28.4s - v29.4s}, [x9]
add x9, sp, 1424
tbl v22.16b, {v31.16b}, v27.16b
tbl v21.16b, {v30.16b}, v27.16b
str q23, [sp, 1088]
tbl v20.16b, {v29.16b}, v27.16b
str q28, [sp, 352]
ld2 {v27.4s - v28.4s}, [x10] <--- whoops
str q22, [sp, 832]
add x10, sp, 1936
str q21, [sp, 1072]
tbl v19.16b, {v28.16b}, v27.16b
str q27, [sp, 112]
ld2 {v26.4s - v27.4s}, [x11] <--- doh...
add x11, sp, 1456
str q19, [sp, 1056]
tbl v18.16b, {v27.16b}, v27.16b <-- ouch..
str q26, [sp, 368]
stp q18, q20, [sp, 800]
so early_ra assigns the hard register v27 to the
destination of one of the loads, and thus clobbering
the index vectors.
early_ra does mark it live across the entire function
Allocno chains:
Id Regno Range Src Dest Equiv Shared FPR
=> 0 r440[0] [16,15] Yes - - - -
=> 1 r224[0] [328,17] - 0 - - -