Issue 87938
Summary [AMDGPU] generates v_cndmask/lshlrev for uniform select between 0 and a power of 2
Labels new issue
Assignees
Reporter Engininja2
    Here is my test kernel:
```cpp
static __global__ void test(const int32_t input, int32_t * result) {
    const int data = "" ? 32 : 0;
    int output;

    asm volatile("s_mov_b32 %0, %1" : "=s"(output) : "s"(data));

 result[threadIdx.x] = output;
}
```
and here's the asm it compiles to for gfx900 on rocm 6.0.0, where it fails to assemble because of an invalid operand.
```asm
	s_load_dword s2, s[4:5], 0x0
	s_load_dwordx2 s[0:1], s[4:5], 0x8
	v_lshlrev_b32_e32 v0, 2, v0
	s_waitcnt lgkmcnt(0)
	s_cmp_lg_u32 s2, 0
	s_cselect_b64 s[2:3], -1, 0
	v_cndmask_b32_e64 v1, 0, 1, s[2:3]
	v_lshlrev_b32_e32 v1, 5, v1
	;;#ASMSTART
	s_mov_b32 s2, v1
	;;#ASMEND
	v_mov_b32_e32 v1, s2
	global_store_dword v0, v1, s[0:1]
```
If either of the values being selected is changed so that either one isn't 0, or the other isn't a power of 2, instead it compiles to this, which works.
```asm
	s_load_dword s2, s[4:5], 0x0
	s_load_dwordx2 s[0:1], s[4:5], 0x8
	v_lshlrev_b32_e32 v0, 2, v0
	s_waitcnt lgkmcnt(0)
	s_cmp_eq_u32 s2, 0
	s_cselect_b32 s2, 0, 33
	;;#ASMSTART
	s_mov_b32 s2, s2
	;;#ASMEND
	v_mov_b32_e32 v1, s2
	global_store_dword v0, v1, s[0:1]
```

_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to