http://gcc.gnu.org/bugzilla/show_bug.cgi?id=56511
Bug #: 56511 Summary: memcpy misses chance to use AVX instructions Classification: Unclassified Product: gcc Version: 4.7.2 Status: UNCONFIRMED Severity: normal Priority: P3 Component: rtl-optimization AssignedTo: unassig...@gcc.gnu.org ReportedBy: jyass...@gcc.gnu.org When operating on sufficiently aligned storage, memcpy should be able to use vector instructions. $ cat test.c #include <string.h> typedef float vec __attribute__((vector_size(32))); typedef struct S { vec v; char __attribute__((aligned(__alignof__(vec)))) c[sizeof(vec)]; } S; void assign_vec(S* s, const vec* v) { s->v = *v; } void memcpy_vec(S* s, const vec* v) { memcpy(&s->v, v, sizeof(vec)); } void memcpy_char(S* s, const vec* v) { memcpy(s->c, v, sizeof(vec)); } $ gcc -mavx -S test.c -O2 -Wall -o - .file "test.c" .text .p2align 4,,15 .globl assign_vec .type assign_vec, @function assign_vec: .LFB12: .cfi_startproc vmovaps (%rsi), %ymm0 vmovaps %ymm0, (%rdi) vzeroupper ret .cfi_endproc .LFE12: .size assign_vec, .-assign_vec .p2align 4,,15 .globl memcpy_vec .type memcpy_vec, @function memcpy_vec: .LFB13: .cfi_startproc movq (%rsi), %rax movq %rax, (%rdi) movq 8(%rsi), %rax movq %rax, 8(%rdi) movq 16(%rsi), %rax movq %rax, 16(%rdi) movq 24(%rsi), %rax movq %rax, 24(%rdi) ret .cfi_endproc .LFE13: .size memcpy_vec, .-memcpy_vec .p2align 4,,15 .globl memcpy_char .type memcpy_char, @function memcpy_char: .LFB14: .cfi_startproc movq (%rsi), %rdx movq %rdx, 32(%rdi) movq 8(%rsi), %rdx movq %rdx, 40(%rdi) movq 16(%rsi), %rdx movq %rdx, 48(%rdi) movq 24(%rsi), %rdx movq %rdx, 56(%rdi) ret .cfi_endproc .LFE14: .size memcpy_char, .-memcpy_char I don't have a gcc-4.8 around to test with, but I believe it's also missing this optimization.