https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121591

Manuel López-Ibáñez <manu at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |manu at gcc dot gnu.org

--- Comment #4 from Manuel López-Ibáñez <manu at gcc dot gnu.org> ---
And even smaller example:

int dominates(const double * restrict a, const double * restrict b)
{
    int a_le_b_0=(a[0] <= b[0]);
    int a_eq_b_0=(a[0] == b[0]);
    int a_le_b_1=(a[1] <= b[1]);
    int a_eq_b_1=(a[1] == b[1]);
    return a_le_b_0 & a_le_b_1 & !(a_eq_b_0 & a_eq_b_1);
}

x86-64 gcc (trunk) -O3 -march=x86-64-v4  -ffast-math

"dominates":
        vmovsd  xmm2, QWORD PTR [rdi]
        vmovsd  xmm3, QWORD PTR [rsi]
        vmovsd  xmm0, QWORD PTR [rdi+8]
        vmovsd  xmm1, QWORD PTR [rsi+8]
        vcomisd xmm2, xmm3
        sete    al
        vcomisd xmm0, xmm1
        sete    dl
        and     eax, edx
        vcomisd xmm3, xmm2
        setnb   dl
        vcomisd xmm1, xmm0
        setnb   cl
        and     edx, ecx
        andn    eax, eax, edx
        and     eax, 1
        ret

x86-64 clang (trunk) -O3 -march=x86-64-v4  -ffast-math

dominates:
        vmovsd  xmm0, qword ptr [rdi]
        vmovsd  xmm1, qword ptr [rdi + 8]
        vmovsd  xmm2, qword ptr [rsi]
        vmovsd  xmm3, qword ptr [rsi + 8]
        vcmplesd        k0, xmm1, xmm3
        vcmplesd        k1, xmm0, xmm2
        kandw   k0, k1, k0
        kmovd   eax, k0
        vcmpneqsd       k0, xmm1, xmm3
        vcmpneqsd       k1, xmm0, xmm2
        korw    k0, k1, k0
        kmovd   ecx, k0
        and     cl, al
        movzx   eax, cl
        and     eax, 1
        ret

x86-64 icc 19.0.1

dominates:
..B1.1:                         # Preds ..B1.0
        xor       r9d, r9d                                      #7.47
        movsd     xmm3, QWORD PTR [rdi]                         #3.19
        movsd     xmm5, QWORD PTR [8+rdi]                       #5.19
        movaps    xmm0, xmm3                                    #7.47
        movsd     xmm2, QWORD PTR [rsi]                         #3.27
        movaps    xmm1, xmm5                                    #7.47
        movsd     xmm4, QWORD PTR [8+rsi]                       #5.27
        cmpeqsd   xmm3, xmm2                                    #7.47
        cmpeqsd   xmm5, xmm4                                    #7.47
        cmplesd   xmm0, xmm2                                    #7.47
        cmplesd   xmm1, xmm4                                    #7.47
        movd      ecx, xmm3                                     #7.47
        movd      r8d, xmm5                                     #7.47
        movd      eax, xmm0                                     #7.47
        movd      edx, xmm1                                     #7.47
        neg       ecx                                           #7.47
        neg       r8d                                           #7.47
        test      ecx, r8d                                      #7.47
        sete      r9b                                           #7.47
        neg       eax                                           #7.47
        neg       edx                                           #7.47
        and       eax, edx                                      #7.23
        and       eax, r9d                                      #7.47
        ret                                                     #7.47

Reply via email to