[Bug tree-optimization/109184] [10/11/12/13 Regression] csmith: 2017 bug with -floop-interchange
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109184 Richard Biener changed: What|Removed |Added Priority|P3 |P2
[Bug tree-optimization/109184] [10/11/12/13 Regression] csmith: 2017 bug with -floop-interchange
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109184 --- Comment #13 from Richard Biener --- Testcase with just the essential stuff. static int g_1731[7] = { 42, 0, 0, 0, 0, 0, 42 }; void __attribute__((noipa)) foo () { int l_1930[5] = { 0, }; for (int i = 0; i < 15; ++i) for (int j = 4; (j >= 1); j -= 1) #pragma GCC unroll 0 for (int k = 0; (k <= 4); k += 1) g_1731[(j + 1)] = --l_1930[k]; } int main() { foo (); if (g_1731[0] != 42 || g_1731[1] != 0 || g_1731[2] != -60 || g_1731[3] != -59 || g_1731[4] != -58 || g_1731[5] != -57 || g_1731[6] != 42) __builtin_abort (); return 0; } The innermost loop body then is [local count: 894749066]: # k_26 = PHI # ivtmp_23 = PHI _1 = l_1930[k_26]; _2 = _1 + -1; l_1930[k_26] = _2; g_1731[_6] = _2; k_17 = k_26 + 1; ivtmp_21 = ivtmp_23 - 1; if (ivtmp_21 != 0) one should note that for data dependence analysis we'd usually need to treat scalars (in this case SSA names) as arrays of the size of the whole nest iteration domain and the dependences would be between statements, not reads/writes. So the above is _1 = l_1930[k_26]; _2[i] = _1 + -1; l_1930[k_26] = _2[i]; g_1731[_6] = _2[i]; then and when we interchange the loop we suddenly need two different _2[] elements and when eliminating _2[] there's a dependence between the l_1930 store and the implied load from a different iteration. Note that when l_1930[k] wouldn't be stored to g_1731[j+1] the interchange would be of course valid and we do not want to break that case.
[Bug tree-optimization/109184] [10/11/12/13 Regression] csmith: 2017 bug with -floop-interchange
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109184 Alex Coplan changed: What|Removed |Added CC||acoplan at gcc dot gnu.org --- Comment #12 from Alex Coplan --- The original testcase with -fsanitize=undefined shows some "load of misaligned address" errors at runtime, which suggests it is invalid.
[Bug tree-optimization/109184] [10/11/12/13 Regression] csmith: 2017 bug with -floop-interchange
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109184 --- Comment #11 from Richard Biener --- Fails even with -O2 -floop-interchange -fno-move-loop-stores (otherwise we complicate the IL by applying store-motion to g_1731). (compute_affine_dependence ref_a: l_1930[k_33], stmt_a: _1 = l_1930[k_33]; ref_b: g_1731[_51], stmt_b: g_1731[_51] = _5; ) -> no dependence (compute_affine_dependence ref_a: l_1930[k_33], stmt_a: l_1930[k_33] = _2; ref_b: g_1731[_51], stmt_b: _4 = g_1731[_51]; ) -> no dependence (compute_affine_dependence ref_a: l_1930[k_33], stmt_a: l_1930[k_33] = _2; ref_b: g_1731[_51], stmt_b: g_1731[_51] = _5; ) -> no dependence maybe I'm missing something but we seem to fail to honor dependences from SSA edges? [local count: 894749066]: # k_33 = PHI # ivtmp_41 = PHI _1 = l_1930[k_33]; _2 = _1 + 18446744073709551615; l_1930[k_33] = _2; _4 = g_1731[_51]; _38 = _2 & _4; _5 = _38 & 38; g_1731[_51] = _5; k_24 = k_33 + 1; ivtmp_40 = ivtmp_41 - 1; if (ivtmp_40 != 0) Of course DDR_ARE_DEPENDENT (ddr) == chrec_known just tells us there's no memory dependece. As said, maybe I'm missing something ...
[Bug tree-optimization/109184] [10/11/12/13 Regression] csmith: 2017 bug with -floop-interchange
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109184 --- Comment #10 from Richard Biener --- Somewhat more reduced: typedef __UINT64_TYPE__ uint64_t; static uint64_t g_1731[7] = {0xF75EE82FC4736923LL, 0, 0xF75EE82FC4736923LL, 0, 0xF75EE82FC4736923LL, 0, 0xF75EE82FC4736923LL}; void __attribute__((noipa)) foo () { uint64_t l_1930[5] = { 0x5e44d2fed3bca5f2, 0x5e44d2fed3bca5f2, 0x5e44d2fed3bca5f2, 0x5e44d2fed3bca5f2, 0x5e44d2fed3bca5f2 }; for (int i = 0; i < 15; ++i) for (int j = 4; (j >= 1); j -= 1) #pragma GCC unroll 0 for (int k = 0; (k <= 4); k += 1) g_1731[(j + 1)] &= (0x26L & (--l_1930[k])); } int main() { foo (); /* f75ee82fc4736923 0 2 0 0 0 f75ee82fc4736923 */ if (g_1731[0] != 0xF75EE82FC4736923LL || g_1731[2] != 2 || g_1731[4] != 0 || g_1731[6] != 0xF75EE82FC4736923LL) __builtin_abort (); return 0; }
[Bug tree-optimization/109184] [10/11/12/13 Regression] csmith: 2017 bug with -floop-interchange
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109184 --- Comment #9 from Jakub Jelinek --- I've tried to deobfuscate the innermost loop's body: int32_t l_1942 = (-3L); int32_t *l_1947 = &l_1946[0][6]; int i, j; uint64_t t1 = --l_1930[g_1179]; t1 += 0xFC07342370A5FE25ULL; t1 &= 38; uint64_t t2 = (g_1731[l_1719 + 1][l_1721] &= t1); l_1942 ^= (t2 + 4ULL <= p_5.f0); l_1943[0][1][1]++; int32_t t3 = l_1946[0][6]; *l_1947 = t3; l_1949 = t3 == g_1948;
[Bug tree-optimization/109184] [10/11/12/13 Regression] csmith: 2017 bug with -floop-interchange
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109184 --- Comment #8 from Richard Biener --- Reduced testcase: typedef __UINT64_TYPE__ uint64_t; static uint64_t g_1731[7][1] = {{0xF75EE82FC4736923LL},{0UL}, {0xF75EE82FC4736923LL},{0UL}, {0xF75EE82FC4736923LL},{0UL}, {0xF75EE82FC4736923LL}}; static int g_149; static unsigned short g_1179; void __attribute__((noipa)) foo () { uint64_t l_1930[5]; int l_1719, l_1721; int i; for (i = 0; i < 5; i++) l_1930[i] = 0x623D9EDB6316A7CDLL; for (g_149 = 0; (g_149 > (-15)); g_149--) for (l_1719 = 4; (l_1719 >= 1); l_1719 -= 1) for (l_1721 = 0; (l_1721 >= 0); l_1721 -= 1) for (g_1179 = 0; (g_1179 <= 4); g_1179 += 1) g_1731[(l_1719 + 1)][l_1721] &= ((0x26L & (((--l_1930[g_1179]) + 0xFC07342370A5FE25LL; } int main() { foo (); /* f75ee82fc4736923 0 2 0 0 0 f75ee82fc4736923 */ if (g_1731[0][0] != 0xF75EE82FC4736923LL || g_1731[2][0] != 2 || g_1731[4][0] != 0 || g_1731[6][0] != 0xF75EE82FC4736923LL) __builtin_abort (); return 0; }
[Bug tree-optimization/109184] [10/11/12/13 Regression] csmith: 2017 bug with -floop-interchange
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109184 --- Comment #7 from Richard Biener --- (In reply to Richard Biener from comment #6) > Confirmed with -O2 -floop-interchange. There's just a single interchange > done: > > runData/keep/in.713.c:648:32: optimized: loops interchanged in loop nest > > that's in func_2 for the nest > > for (g_149 = 0; (g_149 > (-15)); g_149--) > { > for (l_1719 = 4; (l_1719 >= 1); l_1719 -= 1) > { > for (l_1721 = 0; (l_1721 >= 0); l_1721 -= 1) > { > struct S1 l_1935 = {0x13186D76L,0xC9L,36,24,1L,0x87L}; > for (g_1179 = 0; (g_1179 <= 4); g_1179 += 1) > { > int32_t l_1942 = (-3L); > int32_t *l_1947 = &l_1946[0][6]; > int i, j; > l_1942 ^= ((safe_add_func_uint64_t_u_u((l_1935 , > (((l_1936[1] != (void*)0) < (*g_511)) & (g_1731[(l_1719 + 1)][l_1721] &= > (((0x943C8AB0L | 0xE398A931L) != g_20) , (0x26L & > (safe_add_func_uint64_t_u_u((--l_1930[g_1179]), 0xFC07342370A5FE25LL))), > 4L)) <= p_5.f0); > l_1943[0][1][1]++; > l_1949 = (((*l_1947) = l_1946[0][6]) == g_1948); > } > } > } > } And we are interchanging the outer two loops. Since the outer loop IV isn't used in the body it doesn't change anything data dep wise? Interchanging the loops in the source reproduces the issue, so somehow for the only use of l_1719 (g_1731[(l_1719 + 1)][l_1721] &= (((0x943C8AB0L | 0xE398A931L) != g_20) , (0x26L & (safe_add_func_uint64_t_u_u((--l_1930[g_1179]), 0xFC07342370A5FE25LL) it makes a difference. It's g_1731[(l_1719 + 1)][l_1721] &= val; the order we & values into it shouldn't matter. But it's so much obfuscated code ...
[Bug tree-optimization/109184] [10/11/12/13 Regression] csmith: 2017 bug with -floop-interchange
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109184 --- Comment #6 from Richard Biener --- Confirmed with -O2 -floop-interchange. There's just a single interchange done: runData/keep/in.713.c:648:32: optimized: loops interchanged in loop nest that's in func_2 for the nest for (g_149 = 0; (g_149 > (-15)); g_149--) { for (l_1719 = 4; (l_1719 >= 1); l_1719 -= 1) { for (l_1721 = 0; (l_1721 >= 0); l_1721 -= 1) { struct S1 l_1935 = {0x13186D76L,0xC9L,36,24,1L,0x87L}; for (g_1179 = 0; (g_1179 <= 4); g_1179 += 1) { int32_t l_1942 = (-3L); int32_t *l_1947 = &l_1946[0][6]; int i, j; l_1942 ^= ((safe_add_func_uint64_t_u_u((l_1935 , (((l_1936[1] != (void*)0) < (*g_511)) & (g_1731[(l_1719 + 1)][l_1721] &= (((0x943C8AB0L | 0xE398A931L) != g_20) , (0x26L & (safe_add_func_uint64_t_u_u((--l_1930[g_1179]), 0xFC07342370A5FE25LL))), 4L)) <= p_5.f0); l_1943[0][1][1]++; l_1949 = (((*l_1947) = l_1946[0][6]) == g_1948); } } } }
[Bug tree-optimization/109184] [10/11/12/13 Regression] csmith: 2017 bug with -floop-interchange
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109184 Richard Biener changed: What|Removed |Added Status|NEW |ASSIGNED Assignee|unassigned at gcc dot gnu.org |rguenth at gcc dot gnu.org --- Comment #5 from Richard Biener --- I will try to have a look.