On Sun, Apr 24, 2011 at 09:06:16AM +0300, Uoti Urpala wrote:
> The current generic C implementation, which is always used when the
> public header is included from other programs (either directly or
> through intreadwrite.h) compiles to a mess like this (gcc-4.6 on AMD64):
>         movq    %rdi, %rdx
>         shrq    $32, %rdx
>         movl    %edx, %eax
>         sall    $8, %edx
>         shrl    $8, %eax
>         andl    $-16711936, %edx
>         andl    $16711935, %eax
>         orl     %edx, %eax
>         movl    %edi, %edx
>         sall    $8, %edi
>         shrl    $8, %edx
>         andl    $-16711936, %edi
>         roll    $16, %eax
>         andl    $16711935, %edx
>         orl     %edi, %edx
>         roll    $16, %edx
>         salq    $32, %rdx
>         orq     %rdx, %rax
>         ret

Is this av_bswap64? av_bswap32, on gcc46 and amd64, compiled in -O2 generates
something simpler here. Also, a naive implementation of the swap seems to be
detected as a bswap:

    #include <stdio.h>
    #include <stdint.h>

    uint32_t swap_naive(uint32_t x)
    {
        return (x&0x000000ff)<<24 | (x&0x0000ff00)<<8 | (x&0x00ff0000)>>8 | 
(x&0xff000000)>>24;
    }

    uint32_t swap_ff(uint32_t x)
    {
         x= ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF);
         x= (x>>16) | (x<<16);
         return x;
    }

    uint32_t swap_builtin(uint32_t x)
    {
        return __builtin_bswap32(x);
    }

    int main()
    {
        uint32_t v = 0x12345678;

        printf("%08x\n", swap_naive(v));
        printf("%08x\n", swap_ff(v));
        printf("%08x\n", swap_builtin(v));
        return 0;
    }

This is the generated ASM here:

    0000000000400530 <swap_naive>:
      400530:       89 f8                   mov    %edi,%eax
      400532:       0f c8                   bswap  %eax
      400534:       c3                      retq   
      400535:       66 66 2e 0f 1f 84 00    data32 nopw %cs:0x0(%rax,%rax,1)
      40053c:       00 00 00 00 

    0000000000400540 <swap_ff>:
      400540:       89 fa                   mov    %edi,%edx
      400542:       89 f8                   mov    %edi,%eax
      400544:       c1 e2 08                shl    $0x8,%edx
      400547:       c1 e8 08                shr    $0x8,%eax
      40054a:       81 e2 00 ff 00 ff       and    $0xff00ff00,%edx
      400550:       25 ff 00 ff 00          and    $0xff00ff,%eax
      400555:       09 d0                   or     %edx,%eax
      400557:       c1 c0 10                rol    $0x10,%eax
      40055a:       c3                      retq   
      40055b:       0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)

    0000000000400560 <swap_builtin>:
      400560:       89 f8                   mov    %edi,%eax
      400562:       0f c8                   bswap  %eax
      400564:       c3                      retq  

Also note clang use the bswap in all the cases.

I didn't test on any other arch, older compiler versions, and didn't test with
u64, so a naive implementation might not be a better alternative. But well,
just a mail FYI.

-- 
Clément B.

Attachment: pgpr7JQxGmfwR.pgp
Description: PGP signature

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to