https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122528
Bug ID: 122528
Summary: poor code generation of floating-point comparisons in
x86-64
Product: gcc
Version: 16.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: middle-end
Assignee: unassigned at gcc dot gnu.org
Reporter: manu at gcc dot gnu.org
Target Milestone: ---
Not sure if this is middle-end or back-end.
The following code: https://godbolt.org/z/nxr7hrs3T
typedef struct dlnode {
const double * x; // point coordinates (objective vector).
struct dlnode * next[2]; /* keeps the points sorted according to
coordinates 2,3 and 4
(in the case of 2 and 3, only the points swept
by 4 are kept) */
struct dlnode * closest[2]; // closest[0] == cx, closest[1] == cy
} dlnode_t;
bool restart_base_setup_z_and_closest(dlnode_t * restrict list, dlnode_t *
restrict new)
{
const double newx[] = { new->x[0], new->x[1], new->x[2], new->x[3] };
dlnode_t * closest0 = list+1;
dlnode_t * closest1 = list;
const double * closest0x = closest0->x;
const double * closest1x = closest1->x;
dlnode_t * p = (list+1)->next[0];
while (true) {
const double * restrict px = p->x;
bool p_lt_new_0 = px[0] < newx[0];
bool p_lt_new_1 = px[1] < newx[1];
bool p_lt_new_2 = px[2] < newx[2];
bool p_eq_new_0 = px[0] == newx[0];
bool p_eq_new_1 = px[1] == newx[1];
bool p_eq_new_2 = px[2] == newx[2];
bool p_leq_new_0 = p_lt_new_0 | p_eq_new_0;
bool p_leq_new_1 = p_lt_new_1 | p_eq_new_1;
bool p_leq_new_2 = p_lt_new_2 | p_eq_new_2;
if (p_leq_new_0 & p_leq_new_1 & p_leq_new_2) {
return false;
}
if (!(p_lt_new_2 || (p_eq_new_2 && (p_lt_new_1 || (p_eq_new_1 &&
p_leq_new_0))))) {
new->closest[0] = closest0;
new->closest[1] = closest1;
new->next[0] = p;
return true;
}
// setup_z_and_closest()
if (p_lt_new_1 && (px[0] < closest0x[0] || (px[0] == closest0x[0] &&
px[1] < closest0x[1]))) {
closest0 = p;
closest0x = px;
} else if (p_lt_new_0 && (px[1] < closest1x[1] || (px[1] ==
closest1x[1] && px[0] < closest1x[0]))) {
closest1 = p;
closest1x = px;
}
p = p->next[0];
}
__builtin_unreachable();
}
compiled with:
-O2 -march=x86-64-v3 -fopt-info-vec-missed-optimized -fno-signed-zeros
-ffinite-math-only
generates redundant floating-point comparisons. For example:
vcomisd xmm2, xmm4
jb .L32
vcomisd xmm4, xmm2
jne .L9
vcomisd xmm0, xmm3
jb .L7
vcomisd xmm3, xmm0
jne .L9
test sil, sil
je .L9
My assembly may be a bit rusty, but
vcomisd xmm2, xmm4
jb .L32
vcomisd xmm4, xmm2
jne .L9
should be the same as:
vcomisd xmm2, xmm4
jb .L32
jne .L9