https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87505
Bug ID: 87505 Summary: Vectorizer generates a lot of code for a small loop Product: gcc Version: unknown Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: hiraditya at msn dot com Target Milestone: --- test.cpp #include <cstdint> int bar(int* v, std::size_t base) { int sum = 0; for (int i = base; i < base + 4; ++i) { sum += v[i]; } return sum; } $ gcc-8.2 -std=c++17 -O3 -DNDEBUG test.cpp bar(int*, unsigned long): movslq %esi, %rcx leaq 4(%rsi), %r8 movl %esi, %edx cmpq %r8, %rcx jnb .L7 leaq 3(%rsi), %rax movq %r8, %r9 subq %rcx, %rax subq %rcx, %r9 cmpq $3, %rax jbe .L8 movq %r9, %rdx leaq (%rdi,%rcx,4), %rax pxor %xmm0, %xmm0 shrq $2, %rdx salq $4, %rdx addq %rax, %rdx .L5: movdqu (%rax), %xmm2 addq $16, %rax paddd %xmm2, %xmm0 cmpq %rdx, %rax jne .L5 movdqa %xmm0, %xmm1 movq %r9, %r10 psrldq $8, %xmm1 andq $-4, %r10 paddd %xmm1, %xmm0 addq %r10, %rcx leal (%rsi,%r10), %edx movdqa %xmm0, %xmm1 psrldq $4, %xmm1 paddd %xmm1, %xmm0 movd %xmm0, %eax cmpq %r10, %r9 je .L10 .L3: addl (%rdi,%rcx,4), %eax leal 1(%rdx), %ecx movslq %ecx, %rcx cmpq %r8, %rcx jnb .L1 addl (%rdi,%rcx,4), %eax leal 2(%rdx), %ecx movslq %ecx, %rcx cmpq %rcx, %r8 jbe .L1 addl $3, %edx addl (%rdi,%rcx,4), %eax movslq %edx, %rdx cmpq %rdx, %r8 jbe .L1 addl (%rdi,%rdx,4), %eax ret .L7: xorl %eax, %eax .L1: ret .L10: ret .L8: xorl %eax, %eax jmp .L3