Issue 183398
Summary [AArch64] Opportunity to use shifts instead of `ext` for some shuffles
Labels backend:AArch64, missed-optimization
Assignees
Reporter Kmeakin
    When generating code for shuffles that "slide" the whole vector left/right, filling in with zeros, we can emit a shift on the bottom 64bit half of the register instead.

# C++
https://godbolt.org/z/9sTf6x1ca
```c++
#include <arm_neon.h>

// slide left: {v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]} => {v[1], v[2], v[3], v[4], v[5], v[6], v[7], 0}
uint8x8_t src1(uint8x8_t v) { return vext_u8(v, vdup_n_u8(0), 1); }

// slide left: {v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]} => {v[1], v[2], v[3], v[4], v[5], v[6], v[7], 0}
uint8x8_t tgt1(uint8x8_t v) { return (uint8x8_t)((uint64x1_t)v >> 8); }

// slide ight: {v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]} => {0, v[0], v[1], v[2], v[3], v[4], v[5], v[6]}
uint8x8_t src2(uint8x8_t v) { return vext_u8(vdup_n_u8(0), v, 7); }

// slide ight: {v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]} => {0, v[0], v[1], v[2], v[3], v[4], v[5], v[6]}
uint8x8_t tgt2(uint8x8_t v) { return (uint8x8_t)((uint64x1_t)v << 8); }
```

# AArch64 assembly
```asm
src1(__Uint8x8_t):
        movi    v1.2d, #0000000000000000
        ext     v0.8b, v0.8b, v1.8b, #1
 ret

tgt1(__Uint8x8_t):
        ushr    d0, d0, #8
 ret

src2(__Uint8x8_t):
        movi    v1.2d, #0000000000000000
 ext     v0.8b, v1.8b, v0.8b, #7
        ret

tgt2(__Uint8x8_t):
 shl     d0, d0, #8
        ret
```

# Alive proof
https://alive2.llvm.org/ce/z/QaW5CQ
```llvm-ir

----------------------------------------
define <8 x i8> @src1(<8 x i8> noundef %v) noundef {
entry:
  %vext = shufflevector <8 x i8> noundef %v, <8 x i8> { 0, poison, poison, poison, poison, poison, poison, poison }, 1, 2, 3, 4, 5, 6, 7, 8
  ret <8 x i8> %vext
}
=>
define <8 x i8> @tgt1(<8 x i8> noundef %v) noundef {
entry:
 %#0 = bitcast <8 x i8> noundef %v to <1 x i64>
  %shr = lshr <1 x i64> %#0, { 8 }
  %#1 = bitcast <1 x i64> %shr to <8 x i8>
  ret <8 x i8> %#1
}
Transformation seems to be correct!


----------------------------------------
define <8 x i8> @src2(<8 x i8> noundef %v) noundef {
entry:
  %vext = shufflevector <8 x i8> { poison, poison, poison, poison, poison, poison, poison, 0 }, <8 x i8> noundef %v, 7, 8, 9, 10, 11, 12, 13, 14
  ret <8 x i8> %vext
}
=>
define <8 x i8> @tgt2(<8 x i8> noundef %v) noundef {
entry:
  %#0 = bitcast <8 x i8> noundef %v to <1 x i64>
  %shl = shl <1 x i64> %#0, { 8 }
  %#1 = bitcast <1 x i64> %shl to <8 x i8>
  ret <8 x i8> %#1
}
Transformation seems to be correct!

Summary:
  2 correct transformations
  0 incorrect transformations
  0 failed-to-prove transformations
  0 Alive2 errors
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to