https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100267
--- Comment #3 from Hongtao.liu <crazylht at gmail dot com> ---
After support v{,p}expand* thats w/o mask operands, codegen seems to be optimal
dummyf1_avx512x8:
.LFB5668:
.cfi_startproc
movl (%rdi), %edx
movq 8(%rdi), %rax
vmovdqu (%rax,%rdx,8), %ymm0
vmovdqu 32(%rax,%rdx,8), %ymm1
vpexpandq %ymm0, %ymm0
vpexpandq %ymm1, %ymm1
vpaddq %ymm1, %ymm0, %ymm0
ret
.cfi_endproc
