https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98167
--- Comment #16 from Hongtao.liu <crazylht at gmail dot com> ---
typedef int v4si __attribute__ ((vector_size(16)));
v4si f(v4si a, v4si b) {
v4si a1 = __builtin_shufflevector (a, a, 2, 3 ,1 ,0);
v4si b1 = __builtin_shufflevector (b, a, 2, 3 ,1 ,0);
return a1 * b1;
}
gcc generate
f:
vpshufd xmm1, xmm1, 30
vpshufd xmm0, xmm0, 30
vpmulld xmm0, xmm0, xmm1
ret
llvm generate
f: # @f
vpmulld xmm0, xmm1, xmm0
vpshufd xmm0, xmm0, 30 # xmm0 = xmm0[2,3,1,0]
ret