https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66278
Bug ID: 66278 Summary: Missed auto-vectorization of an array subtraction Product: gcc Version: 5.1.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: marxin at gcc dot gnu.org Target Milestone: --- Target: x86_64-linux-gnu Hello. In the following test case, we do not optimize assembly to utilize a vector instruction. $ cat vector.c #include <stdint.h> #define N 101 int main(int argc, char **argv) { uint32_t array[N][N][N]; const unsigned int next = argc == 3 ? 0 : 1; for (unsigned i = next; i < N; i++) array[3][3][i] = array[3][3][i] - 10; return array[3][3][argc]; } gcc 5.1.0 (same for GCC 4.8) with -O3 (http://goo.gl/zA7LMy): main: xorl %eax, %eax subq $4121104, %rsp cmpl $3, %edi setne %al .L2: movl %eax, %edx addl $1, %eax subl $10, 123504(%rsp,%rdx,4) cmpl $101, %eax jne .L2 movslq %edi, %rdi movl 123504(%rsp,%rdi,4), %eax addq $4121104, %rsp ret icc 13.0.1 with -O3 (http://goo.gl/xzlz2C): L__routine_start_main_0: main: pushq %rbp #6.1 movq %rsp, %rbp #6.1 andq $-128, %rsp #6.1 pushq %r12 #6.1 subq $4121208, %rsp #6.1 movl %edi, %r12d #6.1 movl $3, %edi #6.1 call __intel_new_proc_init #6.1 stmxcsr (%rsp) #6.1 movslq %r12d, %r12 #6.1 xorl %edi, %edi #9.37 movl $1, %esi #9.37 cmpq $3, %r12 #9.37 cmove %edi, %esi #9.37 orl $32832, (%rsp) #6.1 ldmxcsr (%rsp) #6.1 movl %esi, %ecx #11.3 negl %ecx #11.3 addl $101, %ecx #11.3 lea 123624(%rsp,%rsi,4), %rax #11.3 andq $15, %rax #11.3 movl %eax, %edx #11.3 negl %edx #11.3 addl $16, %edx #11.3 shrl $2, %edx #11.3 testl %eax, %eax #11.3 cmovne %edx, %eax #11.3 lea 4(%rax), %r8d #11.3 cmpl %r8d, %ecx #11.3 jl ..B1.16 # Prob 10% #11.3 movl %ecx, %edx #11.3 subl %eax, %edx #11.3 andl $3, %edx #11.3 negl %edx #11.3 addl %ecx, %edx #11.3 testl %eax, %eax #11.3 jbe ..B1.8 # Prob 10% #11.3 ..B1.6: # Preds ..B1.4 ..B1.6 lea (%rsi,%rdi), %r8d #12.22 incl %edi #11.3 addl $-10, 123624(%rsp,%r8,4) #12.39 cmpl %eax, %edi #11.3 jb ..B1.6 # Prob 99% #11.3 ..B1.8: # Preds ..B1.6 ..B1.4 movdqa .L_2il0floatpacket.2(%rip), %xmm0 #12.39 ..B1.9: # Preds ..B1.9 ..B1.8 lea (%rsi,%rax), %edi #12.22 addl $4, %eax #11.3 cmpl %edx, %eax #11.3 movdqa 123624(%rsp,%rdi,4), %xmm1 #12.39 paddd %xmm0, %xmm1 #12.39 movdqa %xmm1, 123624(%rsp,%rdi,4) #12.5 jb ..B1.9 # Prob 99% #11.3 ..B1.11: # Preds ..B1.9 ..B1.16 cmpl %ecx, %edx #11.3 jae ..B1.15 # Prob 10% #11.3 ..B1.13: # Preds ..B1.11 ..B1.13 lea (%rsi,%rdx), %eax #12.22 incl %edx #11.3 addl $-10, 123624(%rsp,%rax,4) #12.39 cmpl %ecx, %edx #11.3 jb ..B1.13 # Prob 99% #11.3 ..B1.15: # Preds ..B1.13 ..B1.11 movl 123624(%rsp,%r12,4), %eax #14.10 addq $4121208, %rsp #14.10 popq %r12 #14.10 movq %rbp, %rsp #14.10 popq %rbp #14.10 ret #14.10 ..B1.16: # Preds ..B1.3 # Infreq movl %edi, %edx #11.3 jmp ..B1.11 # Prob 100% #11.3 .L_2il0floatpacket.2: .long 0xfffffff6,0xfffffff6,0xfffffff6,0xfffffff6 Thanks, Martin