Issue |
79665
|
Summary |
RISCV __riscv_vsetvl() and __riscv_vsetvlmax() intrinsics fail to inform optimiser about return constraints
|
Labels |
new issue
|
Assignees |
|
Reporter |
sh1boot
|
By wrapping the intrinsic `__riscv_vsetvlmax_u8m1()` in a function which exposes some additional details about the behaviour of the operations, like so:
```C
__attribute__((const)) inline size_t get_vlmax_u8m1() {
size_t vlmax = __riscv_vsetvlmax_e8m1();
#if defined __riscv_v_fixed_vlen
constexpr size_t fixed_vl = __riscv_v_fixed_vlen / 8;
if (vlmax != fixed_vl) __builtin_unreachable();
#else
constexpr size_t min_vl = __riscv_v_min_vlen / 8;
if (vlmax < min_vl) __builtin_unreachable();
#endif
return vlmax;
}
__attribute__((const)) inline size_t set_vl_u8m1(size_t avl) {
size_t vl = __riscv_vsetvl_e8m1(avl);
#if defined __riscv_v_fixed_vlen
constexpr size_t fixed_vl = __riscv_v_fixed_vlen / 8;
if (avl <= fixed_vl && vl != avl) __builtin_unreachable();
if (avl >= fixed_vl * 2 && vl != fixed_vl) __builtin_unreachable();
#else
constexpr size_t min_vl = __riscv_v_min_vlen / 8;
if (avl <= min_vl && vl != avl) __builtin_unreachable();
#endif
return vl;
}
```
The optimiser can then use this knowledge to elide more dead code. For example:
```C
inline void generic(void* data, size_t len) {
uint8_t* ptr = reinterpret_cast<uint8_t*>(data);
size_t vl = get_vlmax_u8m1();
vuint8m1_t zero = __riscv_vmv_v_x_u8m1(0, vl);
while (len > vl) {
__riscv_vse8(ptr, zero, vl);
ptr += vl;
len -= vl;
}
__riscv_vse8(ptr, zero, len);
}
inline void generic2(void* data, size_t len) {
uint8_t* ptr = reinterpret_cast<uint8_t*>(data);
size_t vl = set_vl_u8m1(len);
while (len > 0) {
vuint8m1_t zero = __riscv_vmv_v_x_u8m1(0, vl);
__riscv_vse8(ptr, zero, vl);
ptr += vl;
len -= vl;
vl = set_vl_u8m1(len);
}
}
void u64_wrapper(uint64_t& data) {
generic(&data, sizeof(data));
}
void u64_wrapper2(uint64_t& data) {
generic2(&data, sizeof(data));
}
void generic_wrapper(void* data, size_t len) {
generic(data, len);
}
void generic_wrapper2(void* data, size_t len) {
generic2(data, len);
}
```
The hints allow the `u64_wrapper()` functions to emit trivial, straight-through code with no loops. Removing the hints makes these functions perform unnecessary tests and loops.
I'm not 100% sure I defined the hints correctly, but something like that. Knowledge of the target might help further.
Perhaps these hints could be embedded in the intrinsics themselves?
https://godbolt.org/z/P4z9a5jjT
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs