https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105216
Bug ID: 105216
Summary: [12 regression] 8% regression for m-queens compared to
gcc11 O2
Product: gcc
Version: 12.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: crazylht at gmail dot com
Target Milestone: ---
Created attachment 52778
--> https://gcc.gnu.org/bugzilla/attachment.cgi?id=52778&action=edit
g++ -O2 main.c;./a.out
Regression happens in pass_pre
while (posib != UINT_FAST32_MAX) {
// The standard trick for getting the rightmost bit in the mask
uint_fast32_t bit = ~posib & (posib + 1);
posib ^= bit; // Eliminate the tried possibility.
uint_fast32_t new_diagl = (bit << 1) | diagl_shifted;
uint_fast32_t new_diagr = (bit >> 1) | diagr_shifted;
bit |= cols[d];
uint_fast32_t new_posib = (bit | new_diagl | new_diagr);
if (new_posib != UINT_FAST32_MAX) {
uint_fast32_t lookahead1 = (bit | (new_diagl << (LOOKAHEAD - 2)) |
(new_diagr >> (LOOKAHEAD - 2)));
uint_fast32_t lookahead2 = (bit | (new_diagl << (LOOKAHEAD - 1)) |
(new_diagr >> (LOOKAHEAD - 1)));
uint_fast32_t allowed2 = l_rest > (int8_t)0;
if(allowed2 && ((lookahead2 == UINT_FAST32_MAX) || (lookahead1 ==
UINT_FAST32_MAX))) {
continue;
}
if(l_rest == (STORE_LEVEL + 1)) {
start_level4[level4_cnt][0] = bit; // cols
start_level4[level4_cnt][1] = new_diagl; // diagl
start_level4[level4_cnt][2] = new_diagr; // diagr
level4_cnt++;
continue;
}
l_rest--;
// The next two lines save stack depth + backtrack operations
// when we passed the last possibility in a row.
// Go lower in the stack, avoid branching by writing above the
current
// position
posibs[d] = posib;
d += posib != UINT_FAST32_MAX; // avoid branching with this trick
posib = new_posib;
// make values current
cols[d] = bit;
diagl[d] = new_diagl;
diagr[d] = new_diagr;
rest[d] = l_rest;
diagl_shifted = new_diagl << 1;
diagr_shifted = new_diagr >> 1;
}
}
d--;
posib = posibs[d]; // backtrack ...
}
For bit |= cols[d]; cols[d] is loop invariant if loop latch from *continue*,
GCC11 catches it, GCC12 not.