| Issue |
183398
|
| Summary |
[AArch64] Opportunity to use shifts instead of `ext` for some shuffles
|
| Labels |
backend:AArch64,
missed-optimization
|
| Assignees |
|
| Reporter |
Kmeakin
|
When generating code for shuffles that "slide" the whole vector left/right, filling in with zeros, we can emit a shift on the bottom 64bit half of the register instead.
# C++
https://godbolt.org/z/9sTf6x1ca
```c++
#include <arm_neon.h>
// slide left: {v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]} => {v[1], v[2], v[3], v[4], v[5], v[6], v[7], 0}
uint8x8_t src1(uint8x8_t v) { return vext_u8(v, vdup_n_u8(0), 1); }
// slide left: {v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]} => {v[1], v[2], v[3], v[4], v[5], v[6], v[7], 0}
uint8x8_t tgt1(uint8x8_t v) { return (uint8x8_t)((uint64x1_t)v >> 8); }
// slide ight: {v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]} => {0, v[0], v[1], v[2], v[3], v[4], v[5], v[6]}
uint8x8_t src2(uint8x8_t v) { return vext_u8(vdup_n_u8(0), v, 7); }
// slide ight: {v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]} => {0, v[0], v[1], v[2], v[3], v[4], v[5], v[6]}
uint8x8_t tgt2(uint8x8_t v) { return (uint8x8_t)((uint64x1_t)v << 8); }
```
# AArch64 assembly
```asm
src1(__Uint8x8_t):
movi v1.2d, #0000000000000000
ext v0.8b, v0.8b, v1.8b, #1
ret
tgt1(__Uint8x8_t):
ushr d0, d0, #8
ret
src2(__Uint8x8_t):
movi v1.2d, #0000000000000000
ext v0.8b, v1.8b, v0.8b, #7
ret
tgt2(__Uint8x8_t):
shl d0, d0, #8
ret
```
# Alive proof
https://alive2.llvm.org/ce/z/QaW5CQ
```llvm-ir
----------------------------------------
define <8 x i8> @src1(<8 x i8> noundef %v) noundef {
entry:
%vext = shufflevector <8 x i8> noundef %v, <8 x i8> { 0, poison, poison, poison, poison, poison, poison, poison }, 1, 2, 3, 4, 5, 6, 7, 8
ret <8 x i8> %vext
}
=>
define <8 x i8> @tgt1(<8 x i8> noundef %v) noundef {
entry:
%#0 = bitcast <8 x i8> noundef %v to <1 x i64>
%shr = lshr <1 x i64> %#0, { 8 }
%#1 = bitcast <1 x i64> %shr to <8 x i8>
ret <8 x i8> %#1
}
Transformation seems to be correct!
----------------------------------------
define <8 x i8> @src2(<8 x i8> noundef %v) noundef {
entry:
%vext = shufflevector <8 x i8> { poison, poison, poison, poison, poison, poison, poison, 0 }, <8 x i8> noundef %v, 7, 8, 9, 10, 11, 12, 13, 14
ret <8 x i8> %vext
}
=>
define <8 x i8> @tgt2(<8 x i8> noundef %v) noundef {
entry:
%#0 = bitcast <8 x i8> noundef %v to <1 x i64>
%shl = shl <1 x i64> %#0, { 8 }
%#1 = bitcast <1 x i64> %shl to <8 x i8>
ret <8 x i8> %#1
}
Transformation seems to be correct!
Summary:
2 correct transformations
0 incorrect transformations
0 failed-to-prove transformations
0 Alive2 errors
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs