------- Comment #2 from kretz at kde dot org 2010-03-26 13:13 ------- BTW, I think you should consider this as a bug, not enhancement. Because if the shift argument is >= (1 << 32) the result of _mm_sll_epi64 differs from the expected result. Thus the code GCC generates could potentially hide a bug, making the code behave differently (but correct) when compiled by a different compiler.
Testcase: #include <tmmintrin.h> __m128i intrin() { __m128i a = _mm_setzero_si128(); a = _mm_cmpeq_epi8(a, a); __m128i count = _mm_sad_epu8(_mm_abs_epi8(a), _mm_setzero_si128()); // 8 0 ... 8 0 ... count = _mm_slli_epi64(count, 29); return _mm_sll_epi64(a, count); } __m128i assem() { register __m128i r asm("xmm0"); asm( "pxor %%xmm1,%%xmm1\n\t" // 0x0000 "pcmpeqb %%xmm0,%%xmm0\n\t" // 0xffff "pabsb %%xmm0,%%xmm2\n\t" // 0x0101 "psadbw %%xmm1,%%xmm2\n\t" // 0x0008 "psllq $29,%%xmm2\n\t" // 0x0100 "psllq %%xmm2,%%xmm0\n\t" :::"xmm0", "xmm1", "xmm2"); return r; } int main() { const __m128i a = intrin(); const __m128i b = assem(); return (_mm_movemask_epi8(_mm_cmpeq_epi32(a, b)) == 0xffff) ? 0 : -1; } -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43514