On Friday, 22 January 2016 at 17:12:25 UTC, userABCabc123 wrote:
Note that there is maybe a DMD codegen bug because the asm
generated for the non naked version copy the result to the
stack and then the stack to result but after pmovmskb it's
already setup in EAX.
000000000044C580h push rbp
000000000044C581h mov rbp, rsp
000000000044C584h sub rsp, 20h
000000000044C588h movdqa dqword ptr [rbp-10h], xmm0
000000000044C58Dh mov dword ptr [rbp-18h], 00000000h
000000000044C594h movdqa xmm0, dqword ptr [rbp-10h]
000000000044C599h pmovmskb eax, xmm0 ; already in result
000000000044C59Dh mov dword ptr [rbp-18h], eax ; what?
000000000044C5A0h mov eax, dword ptr [rbp-18h] ; what?
000000000044C5A3h mov rsp, rbp
000000000044C5A6h pop rbp
000000000044C5A7h ret
Oops, there no DMD codegen bug, the non naked version explicitly
uses a local value for the return so without the local "r" this
gives:
int pmovmskb(byte16 v)
{
asm
{
naked;
push RBP;
mov RBP, RSP;
sub RSP, 0x10;
movdqa dword ptr[RBP-0x10], XMM0;
movdqa XMM0, dword ptr[RBP-0x10];
pmovmskb EAX, XMM0;
mov RSP, RBP;
pop RBP;
ret;
}
}