https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90599

            Bug ID: 90599
           Summary: Inefficient code for __builtin_memset
           Product: gcc
           Version: 10.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: hjl.tools at gmail dot com
  Target Milestone: ---

[hjl@gnu-cfl-1 pieces-6]$ cat x.i
extern char *dst;

void
foo (int x)
{
  __builtin_memset (dst, x, 12);
}
[hjl@gnu-cfl-1 pieces-6]$ make x.s
/export/build/gnu/tools-build/gcc-debug/build-x86_64-linux/gcc/xgcc
-B/export/build/gnu/tools-build/gcc-debug/build-x86_64-linux/gcc/ -O3
-march=skylake  -S x.i
[hjl@gnu-cfl-1 pieces-6]$ cat x.s
        .file   "x.i"
        .text
        .p2align 4
        .globl  foo
        .type   foo, @function
foo:
.LFB0:
        .cfi_startproc
        movzbl  %dil, %eax
        movabsq $72340172838076673, %rcx
        movzbl  %dil, %edi
        imulq   %rcx, %rax  <<< x has been broadcasted to RAX.
        imull   $16843009, %edi, %edi
        movq    dst(%rip), %rdx
        movq    %rax, (%rdx)
        movl    %edi, 8(%rdx)  <<< We should just reuse RAX/EAX.
        ret
        .cfi_endproc
.LFE0:
        .size   foo, .-foo
        .ident  "GCC: (GNU) 10.0.0 20190522 (experimental)"
        .section        .note.GNU-stack,"",@progbits
[hjl@gnu-cfl-1 pieces-6]$

Reply via email to