https://gcc.gnu.org/bugzilla/show_bug.cgi?id=125795

--- Comment #7 from Tamar Christina <tnfchris at gcc dot gnu.org> ---
before unrolling:

.L15:
        add     x4, x2, 512
.L3:
        ld2     {v27.4s - v28.4s}, [x2], 32
        add     x0, x0, 16
        sub     x3, x3, #16
        tbl     v0.16b, {v28.16b}, v25.16b
        str     q27, [x0, -16]
        ld2     {v26.4s - v27.4s}, [x4]
        str     q0, [x3, 16]
        tbl     v27.16b, {v27.16b}, v25.16b
        str     q26, [x0, 240]
        str     q27, [x3, -240]
        cmp     x2, x5
        bne     .L15

where v25 is the loop invariant anchor.

        ldr     q25, [x6, #:lo12:.LANCHOR0+16]

however after unrolling + early_ra

        tbl     v23.16b, {v28.16b}, v27.16b
        mov     w8, 511
        ld2     {v28.4s - v29.4s}, [x9]
        add     x9, sp, 1424
        tbl     v22.16b, {v31.16b}, v27.16b
        tbl     v21.16b, {v30.16b}, v27.16b
        str     q23, [sp, 1088]
        tbl     v20.16b, {v29.16b}, v27.16b
        str     q28, [sp, 352]
        ld2     {v27.4s - v28.4s}, [x10] <--- whoops
        str     q22, [sp, 832]
        add     x10, sp, 1936
        str     q21, [sp, 1072]
        tbl     v19.16b, {v28.16b}, v27.16b
        str     q27, [sp, 112]
        ld2     {v26.4s - v27.4s}, [x11] <--- doh...
        add     x11, sp, 1456
        str     q19, [sp, 1056]
        tbl     v18.16b, {v27.16b}, v27.16b <-- ouch..
        str     q26, [sp, 368]
        stp     q18, q20, [sp, 800]

so early_ra assigns the hard register v27 to the
destination of one of the loads, and thus clobbering
the index vectors.

early_ra does mark it live across the entire function

Allocno chains:
         Id        Regno       Range     Src  Dest Equiv Shared   FPR
  =>      0      r440[0]     [16,15]     Yes     -     -      -     -
  =>      1      r224[0]    [328,17]       -     0     -      -     -

Reply via email to