https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69489
--- Comment #14 from Julian Taylor <jtaylor.debian at googlemail dot com> --- I am on x86_64. It actually does vectorize with -mavx but not with -msse2. The other variant of the loop I posted does vectorize with sse2. $ gcc --version gcc (GCC) 7.0.0 20160421 (experimental) Copyright (C) 2016 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. $ cat test.c double yule_bool_distance_char2(const char *u, const char *v, long n) { long i; long ntt = 0l, nff = 0l, nft = 0l, ntf = 0l; for (i = 0l; i < n; i++) { ntf += (u[i] && !v[i]); nft += (!u[i] && v[i]); } return (2.0 * ntf * nft); } $ gcc -O2 -ftree-vectorize test.c -c #same with O3 $ objdump -d test.o test.o: file format elf64-x86-64 Disassembly of section .text: 0000000000000000 <yule_bool_distance_char2>: 0: 48 85 d2 test %rdx,%rdx 3: 7e 69 jle 6e <yule_bool_distance_char2+0x6e> 5: 55 push %rbp 6: 53 push %rbx 7: 45 31 d2 xor %r10d,%r10d a: 45 31 db xor %r11d,%r11d d: 31 c0 xor %eax,%eax f: 31 ed xor %ebp,%ebp 11: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 18: 44 0f b6 0c 06 movzbl (%rsi,%rax,1),%r9d 1d: 44 0f b6 04 07 movzbl (%rdi,%rax,1),%r8d 22: 45 84 c9 test %r9b,%r9b 25: 0f 94 c3 sete %bl 28: 31 c9 xor %ecx,%ecx 2a: 45 84 c0 test %r8b,%r8b 2d: 0f 95 c1 setne %cl 30: 48 21 d9 and %rbx,%rcx 33: 49 01 ca add %rcx,%r10 36: 31 c9 xor %ecx,%ecx 38: 45 84 c9 test %r9b,%r9b 3b: 0f 95 c1 setne %cl 3e: 45 84 c0 test %r8b,%r8b 41: 48 0f 45 cd cmovne %rbp,%rcx 45: 48 83 c0 01 add $0x1,%rax 49: 49 01 cb add %rcx,%r11 4c: 48 39 c2 cmp %rax,%rdx 4f: 75 c7 jne 18 <yule_bool_distance_char2+0x18> 51: 66 0f ef c0 pxor %xmm0,%xmm0 55: 66 0f ef c9 pxor %xmm1,%xmm1 59: 5b pop %rbx 5a: f2 49 0f 2a c2 cvtsi2sd %r10,%xmm0 5f: f2 49 0f 2a cb cvtsi2sd %r11,%xmm1 64: 5d pop %rbp 65: f2 0f 58 c0 addsd %xmm0,%xmm0 69: f2 0f 59 c1 mulsd %xmm1,%xmm0 6d: c3 retq 6e: 66 0f ef c0 pxor %xmm0,%xmm0 72: c3 retq