https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122528
--- Comment #3 from Manuel López-Ibáñez <manu at gcc dot gnu.org> ---
(In reply to Manuel López-Ibáñez from comment #2)
> A smaller testcase showing the problem:
Clang generates much better code:
cmp:
vmovsd xmm0, qword ptr [rdi]
vmovsd xmm2, qword ptr [rdi + 8]
vmovsd xmm1, qword ptr [rsi]
vmovsd xmm3, qword ptr [rsi + 8]
vmovsd xmm4, qword ptr [rdi + 16]
vmovsd xmm5, qword ptr [rsi + 16]
vucomisd xmm0, xmm1
ja .LBB0_3
vucomisd xmm2, xmm3
ja .LBB0_3
mov eax, -1
vucomisd xmm4, xmm5
ja .LBB0_3
ret
.LBB0_3:
vucomisd xmm4, xmm5
setbe al
vucomisd xmm2, xmm3
sete cl
setb dl
vucomisd xmm0, xmm1
sete sil
setb dil
and sil, dl
or sil, dil
and cl, al
or cl, sil
movzx eax, cl
and eax, 1
ret