On Friday, 22 January 2016 at 17:12:25 UTC, userABCabc123 wrote:

Note that there is maybe a DMD codegen bug because the asm generated for the non naked version copy the result to the stack and then the stack to result but after pmovmskb it's already setup in EAX.

000000000044C580h  push rbp
000000000044C581h  mov rbp, rsp
000000000044C584h  sub rsp, 20h
000000000044C588h  movdqa dqword ptr [rbp-10h], xmm0
000000000044C58Dh  mov dword ptr [rbp-18h], 00000000h
000000000044C594h  movdqa xmm0, dqword ptr [rbp-10h]
000000000044C599h  pmovmskb eax, xmm0 ; already in result
000000000044C59Dh  mov dword ptr [rbp-18h], eax ; what?
000000000044C5A0h  mov eax, dword ptr [rbp-18h] ; what?
000000000044C5A3h  mov rsp, rbp
000000000044C5A6h  pop rbp
000000000044C5A7h  ret

Oops, there no DMD codegen bug, the non naked version explicitly uses a local value for the return so without the local "r" this gives:

int pmovmskb(byte16 v)
{
    asm
    {
        naked;
        push RBP;
        mov RBP, RSP;
        sub RSP, 0x10;
        movdqa dword ptr[RBP-0x10], XMM0;
        movdqa XMM0, dword ptr[RBP-0x10];
        pmovmskb EAX, XMM0;
        mov RSP, RBP;
        pop RBP;
        ret;
    }
}

Reply via email to