https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86017

            Bug ID: 86017
           Summary: multiple consecutive calls to bzero/memset not merged
           Product: gcc
           Version: 8.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: msebor at gcc dot gnu.org
  Target Milestone: ---

Even though the two functions defined in the test case below are equivalent,
GCC emits considerably less efficient code the one with multiple calls to
memset than for the one with just a single call.  Clang emits the same
optimally efficient code for both.

$ cat b.c && gcc -O2 -S -Wall -Wextra -fdump-tree-optimized=/dev/stdout
-o/dev/stdout b.c
void f (void*);

void g (void)
{
  char a[8];
  __builtin_memset (a, 0, 8);

  f (a);
}

void h (void)
{
  char a[8];
  __builtin_memset (a, 0, 1);
  __builtin_memset (a + 1, 0, 1);
  __builtin_memset (a + 2, 0, 1);
  __builtin_memset (a + 3, 0, 1);
  __builtin_memset (a + 4, 0, 1);
  __builtin_memset (a + 5, 0, 1);
  __builtin_memset (a + 6, 0, 1);
  __builtin_memset (a + 7, 0, 1);

  f (a);
}

        .file   "b.c"
        .text

;; Function g (g, funcdef_no=0, decl_uid=1958, cgraph_uid=0, symbol_order=0)

g ()
{
  char a[8];

  <bb 2> [local count: 1073741825]:
  __builtin_memset (&a, 0, 8);
  f (&a);
  a ={v} {CLOBBER};
  return;

}


        .p2align 4,,15
        .globl  g
        .type   g, @function
g:
.LFB0:
        .cfi_startproc
        subq    $24, %rsp
        .cfi_def_cfa_offset 32
        movq    $0, 8(%rsp)
        leaq    8(%rsp), %rdi
        call    f
        addq    $24, %rsp
        .cfi_def_cfa_offset 8
        ret
        .cfi_endproc
.LFE0:
        .size   g, .-g

;; Function h (h, funcdef_no=1, decl_uid=1962, cgraph_uid=1, symbol_order=1)

h ()
{
  char a[8];

  <bb 2> [local count: 1073741825]:
  MEM[(void *)&a] = 0;
  __builtin_memset (&MEM[(void *)&a + 1B], 0, 1);
  __builtin_memset (&MEM[(void *)&a + 2B], 0, 1);
  __builtin_memset (&MEM[(void *)&a + 3B], 0, 1);
  __builtin_memset (&MEM[(void *)&a + 4B], 0, 1);
  __builtin_memset (&MEM[(void *)&a + 5B], 0, 1);
  __builtin_memset (&MEM[(void *)&a + 6B], 0, 1);
  __builtin_memset (&MEM[(void *)&a + 7B], 0, 1);
  f (&a);
  a ={v} {CLOBBER};
  return;

}


        .p2align 4,,15
        .globl  h
        .type   h, @function
h:
.LFB1:
        .cfi_startproc
        subq    $24, %rsp
        .cfi_def_cfa_offset 32
        leaq    8(%rsp), %rdi
        movb    $0, 8(%rsp)
        movb    $0, 9(%rsp)
        movb    $0, 10(%rsp)
        movb    $0, 11(%rsp)
        movb    $0, 12(%rsp)
        movb    $0, 13(%rsp)
        movb    $0, 14(%rsp)
        movb    $0, 15(%rsp)
        call    f
        addq    $24, %rsp
        .cfi_def_cfa_offset 8
        ret
        .cfi_endproc
.LFE1:
        .size   h, .-h
        .ident  "GCC: (GNU) 8.1.1 20180522"
        .section        .note.GNU-stack,"",@progbits

Reply via email to