https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97457
Bug ID: 97457 Summary: [10/11 Regression] SVE: wrong code since r10-4752-g2d56600c Product: gcc Version: 11.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: acoplan at gcc dot gnu.org Target Milestone: --- AArch64 GCC miscompiles the following testcase: int a; long c; signed char d(char e, char f) { return e + f; } int main(void) { for (; a <= 1; a++) { c = -8; for (; c != 3; c = d(c, 1)) ; } char b = c; if (b != 3) __builtin_abort(); } with -O3 -march=armv8.2-a+sve since r10-4752-g2d56600c8de397d09a16dedd33d310a763a832ae: commit 2d56600c8de397d09a16dedd33d310a763a832ae Author: Richard Sandiford <richard.sandif...@arm.com> Date: Sat Nov 16 11:14:51 2019 [AArch64] Add truncation for partial SVE modes On a machine with 512-bit SVE vectors, we end up with c = 11 and hit the call to abort. The generated code at r10-4752 is as follows. I've annotated it with the runtime behaviour of a machine with 512-bit SVE vectors. main: addvl sp, sp, #-1 adrp x7, a sub sp, sp, #16 stp x29, x30, [sp] mov x29, sp ldr w4, [x7, #:lo12:a] cmp w4, 1 bgt .L20 cntd x6 // x6 := 8 neg w3, w6 // w3 := -8 add w3, w3, 11 // w3 := 3 sub w6, w6, #1 // x6 := 7 ptrue p0.b, all pfalse p1.b .p2align 3,,7 .L4: cmp w6, 10 bhi .L10 // not taken mov w0, 0 index z1.d, #-8, #1 .p2align 3,,7 .L7: incd x0 // x0 := 8 mov z0.d, z1.d cmp w0, w3 add z0.b, z0.b, #1 incd z1.d sxtb z2.d, p0/m, z0.d bls .L7 // not taken add x1, sp, 16 addvl x2, sp, #1 st1b z0.d, p0, [x1, #7, mul vl] uxtw x1, w0 // x1 := 8 cmp w0, 11 ldrb w5, [x2, 15] sub x2, x1, #8 // x2 := 0 lastb x1, p1, z2.d beq .L8 // not taken add w1, w2, 1 // w1 := 1 cmp w0, 10 and w5, w1, 255 // w5 := 1 sxtb x1, w1 // x1 := 1 beq .L8 .L6: add w1, w5, 10 // w1 := 11 and w5, w1, 255 // w5 := 11 sxtb x1, w1 // x1 := 11 .L8: add w4, w4, 1 // outer loop induction variable (a) cmp w4, 2 bne .L4 // taken 1st time round, x0 reset to 0 at top of loop adrp x0, c str w4, [x7, #:lo12:a] // write a back (a = 2) str x1, [x0, #:lo12:c] // write c back (c = 11!) .L5: cmp w5, 3 bne .L22 // boom! (w5 = 11) ldp x29, x30, [sp] addvl sp, sp, #1 mov w0, 0 add sp, sp, 16 ret .p2align 2,,3 .L10: mov w5, 249 b .L6 .L20: adrp x0, c ldrb w5, [x0, #:lo12:c] b .L5 .L22: bl abort .size main, .-main .comm c,8,8 .comm a,4,4 .ident "GCC: (unknown) 10.0.0 20191116 (experimental)"