| Issue |
114001
|
| Summary |
[AVX-512] Decompose `vpermb` with constant shuffle vector in some cases
|
| Labels |
new issue
|
| Assignees |
|
| Reporter |
Validark
|
```llvm
define dso_local <64 x i8> @foo(<8 x i8> %0) local_unnamed_addr {
Entry:
%1 = shufflevector <8 x i8> %0, <8 x i8> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
ret <64 x i8> %1
}
```
Compiled for Zen 4, we get:
```asm
.LCPI0_0:
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 1
.byte 1
.byte 1
.byte 1
.byte 1
.byte 1
.byte 1
.byte 1
.byte 2
.byte 2
.byte 2
.byte 2
.byte 2
.byte 2
.byte 2
.byte 2
.byte 3
.byte 3
.byte 3
.byte 3
.byte 3
.byte 3
.byte 3
.byte 3
.byte 4
.byte 4
.byte 4
.byte 4
.byte 4
.byte 4
.byte 4
.byte 4
.byte 5
.byte 5
.byte 5
.byte 5
.byte 5
.byte 5
.byte 5
.byte 5
.byte 6
.byte 6
.byte 6
.byte 6
.byte 6
.byte 6
.byte 6
.byte 6
.byte 7
.byte 7
.byte 7
.byte 7
.byte 7
.byte 7
.byte 7
.byte 7
foo:
.Lfoo$local:
vmovdqa64 zmm1, zmmword ptr [rip + .LCPI0_0]
vpermb zmm0, zmm1, zmm0
ret
```
I think `vpermb` should be decomposed into `vpbroadcastq`+`vpshufb`
```asm
foo:
vpbroadcastq zmm0, xmm0
vpshufb zmm0, zmm0, zmmword ptr [rip + .LCPI0_0]
ret
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs