[llvm-bugs] [Bug 174500] [AArch64] Use `@llvm.vector.reduce.umax.v16i8` instead of `@llvm.vector.reduce.or.v16i8` where possible

LLVM Bugs via llvm-bugs Mon, 05 Jan 2026 14:51:52 -0800

Issue	174500
Summary	[AArch64] Use `@llvm.vector.reduce.umax.v16i8` instead of `@llvm.vector.reduce.or.v16i8` where possible
Labels	backend:AArch64, missed-optimization
Assignees
Reporter	Kmeakin

    https://godbolt.org/z/rcEGWdc1z

Consider this code for checking if 16 bytes are ASCII:


```c++
#include <cstdint>

using u8 = uint8_t;
using u8x16 = u8 __attribute__((vector_size(16)));

using u64 = uint64_t;
using usize = unsigned long;

auto src16(u8* p) {
    auto ret = true;
    for (usize i = 0; i < 16; i++) {
        ret &= p[i] < 0x80;
    }
    return ret;
}
```

This is auto-vectorised to
```llvm
define dso_local noundef i1 @src16(unsigned char*)(ptr noundef readonly captures(none) %0) local_unnamed_addr #1 {
  %2 = load <16 x i8>, ptr %0, align 1
  %3 = tail call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %2)
  %4 = icmp sgt i8 %3, -1
  ret i1 %4
}
```

but since AArch64 does not have an instruction for horizontal or, it produces this assembly:
```asm
src16(unsigned char*):
 ldr     q0, [x0]
        ext     v1.16b, v0.16b, v0.16b, #8
 orr     v0.8b, v0.8b, v1.8b
        fmov    x8, d0
        orr     x8, x8, x8, lsr #32
        lsr     x9, x8, #16
        orr     w8, w8, w9
 orr     w8, w8, w8, lsr #8
        ubfx    w8, w8, #7, #1
        eor w0, w8, #0x1
        ret
```

Using horizontal umax produces much better assembly, since AArch64 has an instruction for doing this natively:
```c++
auto tgt16(u8x16* p) {
    u8x16 v0 = p[0];
    return __builtin_reduce_max(v0) < 0x80;
}
```


```llvm
define dso_local noundef i1 @tgt16(unsigned char vector[16]*)(ptr noundef readonly captures(none) %0) local_unnamed_addr #0 {
  %2 = load <16 x i8>, ptr %0, align 16
  %3 = tail call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %2)
 %4 = icmp sgt i8 %3, -1
  ret i1 %4
}
```

```asm
tgt16(unsigned char vector[16]*):
        ldr     q0, [x0]
        umaxv   b0, v0.16b
 fmov    w8, s0
        ubfx    w8, w8, #7, #1
        eor     w0, w8, #0x1
        ret
```

_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

[llvm-bugs] [Bug 174500] [AArch64] Use `@llvm.vector.reduce.umax.v16i8` instead of `@llvm.vector.reduce.or.v16i8` where possible

Reply via email to