------- Additional Comments From uros at kss-loka dot si 2005-08-17 09:54
-------
The code produced by crosscompiling from i686 for x86_86, I got functionally
equal asm for 32bit and 64bit mode:
gcc -O2 -m64:
.LFB128:
subq $24, %rsp
.LCFI0:
leaq 20(%rsp), %rax
leaq 24(%rsp), %rdx
cmpq %rax, %rdx
jbe .L7
movq .LC5(%rip), %mm0
pxor %mm1, %mm1
packuswb %mm1, %mm0
movq %mm0, (%rsp)
movq (%rsp), %rax
incl %eax
jne .L4
emms
...
gcc -m32 -mmmx:
pushl %ebp
movl %esp, %ebp
subl $40, %esp
leal -4(%ebp), %eax
andl $-16, %esp
subl $16, %esp
cmpl %eax, %ebp
jbe .L7
movq .LC6, %mm0
pxor %mm1, %mm1
packuswb %mm1, %mm0
movd %mm0, -20(%ebp)
movl -20(%ebp), %eax
incl %eax
jne .L4
emms
...
Tested with gcc version 4.1.0 20050716 (experimental)
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=22432