https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97081
Bug ID: 97081
Summary: wrong code for rotate vectorization (x86 target)
Product: gcc
Version: 10.2.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: igor at tachyum dot com
Target Milestone: ---
Created attachment 49231
--> https://gcc.gnu.org/bugzilla/attachment.cgi?id=49231=edit
testcase
incorrectly calculates left shift amount when breaking (a r>> sh) into (a <<
sh_l) | (a >> sh_r)
shr is calculated as (-sh & 31) while should be (-sh & 63)
.file "t.c"
.text
.p2align 4
.globl exec_VRORudi_i
.type exec_VRORudi_i, @function
exec_VRORudi_i:
.LFB0:
.cfi_startproc
endbr64
andl$63, %edx
movdqu (%rsi), %xmm1
movl%edx, %eax
movd%edx, %xmm0
negl%eax
movdqa %xmm1, %xmm3
movl%eax, %ecx
psrlq %xmm0, %xmm3
andl$31, %ecx
movq%rcx, %xmm2
psllq %xmm2, %xmm1
por %xmm3, %xmm1
movups %xmm1, (%rdi)
movdqu 16(%rsi), %xmm1
movdqa %xmm1, %xmm3
psrlq %xmm0, %xmm1
psllq %xmm2, %xmm3
por %xmm3, %xmm1
movups %xmm1, 16(%rdi)
ret
.cfi_endproc