https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79938
--- Comment #3 from postmaster at raasu dot org --- With -mssse3 instead of -msse4.1, the issue gets even worse: --- ... pxor %xmm1, %xmm1 movl $.LC0, %esi movl $1, %edi movd %eax, %xmm0 movdqa %xmm0, %xmm4 pshufb %xmm1, %xmm4 movaps %xmm4, (%rsp) movzbl (%rsp), %eax movaps %xmm4, 224(%rsp) movzbl 225(%rsp), %edx movaps %xmm4, 208(%rsp) movaps %xmm4, 192(%rsp) movaps %xmm4, 176(%rsp) addl %edx, %eax movzbl 210(%rsp), %edx movaps %xmm4, 160(%rsp) movaps %xmm4, 144(%rsp) movaps %xmm4, 128(%rsp) movaps %xmm4, 112(%rsp) addl %edx, %eax movzbl 195(%rsp), %edx movaps %xmm4, 96(%rsp) movzbl 105(%rsp), %ecx movaps %xmm4, 80(%rsp) movaps %xmm4, 64(%rsp) movaps %xmm4, 48(%rsp) addl %edx, %eax movzbl 165(%rsp), %edx movaps %xmm4, 32(%rsp) movd %eax, %xmm0 movzbl 180(%rsp), %eax movaps %xmm4, 16(%rsp) movaps %xmm4, 240(%rsp) addl %edx, %eax movzbl 150(%rsp), %edx addl %edx, %eax movzbl 135(%rsp), %edx addl %eax, %edx movzbl 120(%rsp), %eax movd %edx, %xmm6 punpckldq %xmm6, %xmm0 addl %ecx, %eax movzbl 90(%rsp), %ecx addl %ecx, %eax movzbl 75(%rsp), %ecx addl %ecx, %eax movzbl 45(%rsp), %ecx movd %eax, %xmm1 movzbl 60(%rsp), %eax addl %ecx, %eax movzbl 30(%rsp), %ecx addl %ecx, %eax movzbl 15(%rsp), %ecx addl %ecx, %eax movd %eax, %xmm5 xorl %eax, %eax punpckldq %xmm5, %xmm1 punpcklqdq %xmm1, %xmm0 movdqa %xmm0, %xmm2 movd %xmm0, %edx pshufd $255, %xmm0, %xmm3 punpckhdq %xmm0, %xmm2 pshufd $85, %xmm0, %xmm1 ... --- Notice all the lines starting with " movaps %xmm4," Same register contents are polluted all over the stack.