https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102977
Bug ID: 102977 Summary: [GCC12 regression] vectorizer failed to generate complex fma. Product: gcc Version: 12.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: middle-end Assignee: unassigned at gcc dot gnu.org Reporter: crazylht at gmail dot com Target Milestone: --- Target: aarch64-linux-gnu #include<complex.h> #include<complex.h> void foo (_Complex _Float16* __restrict a, _Complex _Float16* b, _Complex _Float16 *c) { for (int i =0 ; i != 8; i++) a[i] += b[i] * c[i]; } gcc11.2 generate foo: mov x3, 16 ptrue p1.b, all whilelo p0.h, xzr, x3 ld1h z2.h, p0/z, [x1] ld1h z1.h, p0/z, [x2] ld1h z0.h, p0/z, [x0] fcmla z0.h, p1/m, z1.h, z2.h, #0 fcmla z0.h, p1/m, z1.h, z2.h, #90 st1h z0.h, p0, [x0] cntb x4 cnth x5 add x0, x0, x4 add x1, x1, x4 add x2, x2, x4 whilelo p0.h, x5, x3 b.none .L1 ld1h z2.h, p0/z, [x1] ld1h z1.h, p0/z, [x2] ld1h z0.h, p0/z, [x0] fcmla z0.h, p1/m, z1.h, z2.h, #0 fcmla z0.h, p1/m, z1.h, z2.h, #90 st1h z0.h, p0, [x0] .L1: ret current trunk foo: ptrue p1.h, vl8 ptrue p0.b, all ld2h {z2.h - z3.h}, p1/z, [x1] ld2h {z0.h - z1.h}, p1/z, [x2] ld2h {z16.h - z17.h}, p1/z, [x0] fmul z6.h, z0.h, z3.h movprfx z7, z16 fmla z7.h, p0/m, z0.h, z2.h fmla z6.h, p0/m, z1.h, z2.h movprfx z4, z7 fmls z4.h, p0/m, z1.h, z3.h fadd z5.h, z6.h, z17.h st2h {z4.h - z5.h}, p1, [x0] ret options: -Ofast -march=armv8.3-a+sve+fp16 refer to https://godbolt.org/z/4PPKnWvc1