http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55258



             Bug #: 55258

           Summary: SSE register isn't used for 16byte copy

    Classification: Unclassified

           Product: gcc

           Version: 4.8.0

            Status: UNCONFIRMED

          Severity: normal

          Priority: P3

         Component: target

        AssignedTo: unassig...@gcc.gnu.org

        ReportedBy: hjl.to...@gmail.com

                CC: ubiz...@gmail.com





[hjl@gnu-tools-1 pr55247]$ cat x.i 

typedef unsigned int uint32_t;

typedef uint32_t Elf32_Word;

typedef uint32_t Elf32_Addr;

typedef struct {

  Elf32_Word st_name;

  Elf32_Addr st_value;

  Elf32_Word st_size;

  unsigned char st_other;

} Elf32_Sym;

typedef struct {

  Elf32_Word r_info;

}

Elf32_Rela;

typedef struct {

  union {

    Elf32_Addr d_ptr;

  }

  d_un;

} Elf32_Dyn;

struct link_map   {

  Elf32_Dyn *l_info[34];

};

extern void symbind32 (Elf32_Sym *);

void

_dl_profile_fixup (struct link_map *l, Elf32_Word reloc_arg)

{

  const Elf32_Sym *const symtab  = (const void *) l->l_info[6]->d_un.d_ptr;

  const Elf32_Rela *const reloc  = (const void *) (l->l_info[23]->d_un.d_ptr +

reloc_arg * sizeof (Elf32_Rela));

  Elf32_Sym sym = symtab[(reloc->r_info) >> 8];

  symbind32 (&sym);

}

[hjl@gnu-tools-1 pr55247]$ /export/build/gnu/gcc/build-x86_64-linux/gcc/xgcc

-B/export/build/gnu/gcc/build-x86_64-linux/gcc/ -O -Wall -mx32

-maddress-mode=short  -S x.i -o short.asm

[hjl@gnu-tools-1 pr55247]$ cat short.asm 

    .file    "x.i"

    .text

    .globl    _dl_profile_fixup

    .type    _dl_profile_fixup, @function

_dl_profile_fixup:

.LFB0:

    .cfi_startproc

    subl    $24, %esp

    .cfi_def_cfa_offset 32

    movl    24(%edi), %edx

    movl    92(%edi), %eax

    movl    (%eax), %eax

    movl    (%eax,%esi,4), %eax

    shrl    $8, %eax

    sall    $4, %eax

    addl    (%edx), %eax

    movq    8(%eax), %rdx

    movq    (%eax), %rax

    movq    %rax, (%esp)

    movq    %rdx, 8(%esp)

    movl    %esp, %edi

    call    symbind32

    addl    $24, %esp

    .cfi_def_cfa_offset 8

    ret

    .cfi_endproc

.LFE0:

    .size    _dl_profile_fixup, .-_dl_profile_fixup

    .ident    "GCC: (GNU) 4.8.0 20121110 (experimental)"

    .section    .note.GNU-stack,"",@progbits

[hjl@gnu-tools-1 pr55247]$ /export/build/gnu/gcc/build-x86_64-linux/gcc/xgcc

-B/export/build/gnu/gcc/build-x86_64-linux/gcc/ -O -Wall -mx32

-maddress-mode=long  -S x.i -o long.asm

[hjl@gnu-tools-1 pr55247]$ cat long.asm 

    .file    "x.i"

    .text

    .globl    _dl_profile_fixup

    .type    _dl_profile_fixup, @function

_dl_profile_fixup:

.LFB0:

    .cfi_startproc

    subq    $40, %rsp

    .cfi_def_cfa_offset 48

    movl    24(%rdi), %edx

    movl    92(%rdi), %eax

    movl    (%rax), %eax

    movl    (%eax,%esi,4), %eax

    shrl    $8, %eax

    sall    $4, %eax

    addl    (%rdx), %eax

    movdqu    (%eax), %xmm0

    movdqa    %xmm0, (%rsp)

    movq    (%rsp), %rax

    movq    8(%rsp), %rdx

    movq    %rax, 16(%rsp)

    movq    %rdx, 24(%rsp)

    leaq    16(%rsp), %rdi

    call    symbind32

    addq    $40, %rsp

    .cfi_def_cfa_offset 8

    ret

    .cfi_endproc

.LFE0:

    .size    _dl_profile_fixup, .-_dl_profile_fixup

    .ident    "GCC: (GNU) 4.8.0 20121110 (experimental)"

    .section    .note.GNU-stack,"",@progbits

[hjl@gnu-tools-1 pr55247]$ 



For TARGET_SSE_UNALIGNED_LOAD_OPTIMAL/TARGET_SSE_UNALIGNED_STORE_OPTIMAL,

we should always generate



    movdqu    (%eax), %xmm0

    movdqa    %xmm0, (%rsp)

Reply via email to