https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80281
--- Comment #16 from Jakub Jelinek <jakub at gcc dot gnu.org> --- Can be reproduced on x86_64-linux with -O2 -mfma -ffast-math. Comparing pre and post r246674 differences, the first one I see in forwprop3: @@ -3,6 +3,7 @@ foo (unsigned int x, unsigned int y, unsigned int z) { + unsigned int _1; unsigned int _3; unsigned int _7; unsigned int _9; @@ -12,8 +13,9 @@ foo (unsigned int x, unsigned int y, uns _9 = z_5(D) * z_5(D); _10 = y_4(D) * _9; _3 = -_10; - _7 = x_6(D) - _10; - return _7; + _1 = x_6(D) - _10; + _7 = _1; + return _1; } @@ -23,6 +25,7 @@ foo (unsigned int x, unsigned int y, uns bar (float x, float y, float z) { + float _1; float _3; float _7; float reassocpow_9; @@ -32,8 +35,9 @@ bar (float x, float y, float z) reassocpow_9 = __builtin_powif (z_5(D), 2); _10 = y_4(D) * reassocpow_9; _3 = -_10; - _7 = x_6(D) - _10; - return _7; + _1 = x_6(D) - _10; + _7 = _1; + return _1; } Wonder why anything changed for the float types or unsigned types. And in bar, where the failure appears, the difference between -mfma vs. -mno-fma appears in widening_mul pass: powmult_7 = z_5(D) * z_5(D); - _10 = y_4(D) * powmult_7; - _1 = x_6(D) - _10; + _3 = -y_4(D); + _1 = _3 * powmult_7 + x_6(D); On the other side, with -mfma the actual difference for -O2 -ffast-math -mfma before/after this commit is: - powmult_1 = z_5(D) * z_5(D); - _3 = -powmult_1; - _7 = _3 * y_4(D) + x_6(D); - return _7; + powmult_7 = z_5(D) * z_5(D); + _3 = -y_4(D); + _1 = _3 * powmult_7 + x_6(D); + return _1; so not an actual functional change, no idea why negating the pow2 vs. the other operand would have any advantages/disadvantages, the difference is: - vfnmadd231ss %xmm2, %xmm1, %xmm0 + vfnmadd231ss %xmm1, %xmm2, %xmm0