| Issue |
175745
|
| Summary |
[AMDGPU] Wrong code at -Os
|
| Labels |
backend:AMDGPU,
miscompilation
|
| Assignees |
|
| Reporter |
XChy
|
### Testcase
A reduced OpenCL testcase:
```opencl
// test.cl
struct S1
{
char f0;
int f1;
int f2;
int f4;
uint f5;
ulong f6;
int f8
};
struct S4
{
char g_11[5][9];
int g_17;
long f2;
uint g_24;
ushort g_53;
struct S1 g_57[8][7];
short g_66;
volatile struct S1 **g_118;
struct S1 g_127
};
void func_29(int *, struct S4 *);
struct S1 func_58() {}
void func_14(struct S4 *p_238)
{
BS_LABEL_4:
int volatile BS_COND_1 = 76;
switch (BS_COND_1)
{
case 5: goto BS_LABEL_4;
case -7: goto BS_LABEL_2;
case 6: goto BS_LABEL_3;
}
int l_22[9];
uint *l_23 = &p_238->g_24;
BS_LABEL_3:
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++) l_22[j] = 0;
*l_23 ^= 0 <= l_22[2];
func_29(&p_238->g_17, p_238);
BS_LABEL_2:
}
void func_29(int *p_32, struct S4 *p_238)
{
BS_LABEL_3:
for (; p_238->g_53; p_238 += 1)
{
int f1;
for (; p_238->f2;)
{
struct S1 l_142;
**p_238->g_118 = func_58();
if (p_238->g_17) continue;
p_238->g_127.f4 -= 1;
for (; p_238->g_127.f4;)
for (; p_238->g_127.f8; p_238 -= 1)
{
int *l_149 = &p_238->g_57[7][0].f2;
for (; p_238->g_66; p_238 += 1)
if (p_238->g_11[p_238->g_53][p_238->g_53])
*l_149 |= 0 == p_32;
int volatile BS_COND_5;
if (BS_COND_5) goto BS_LABEL_3;
}
}
for (; 8;) return;
}
}
__kernel void entry(__global ulong *result, __global ulong *bs_result)
{
struct S4 c_239 = {};
func_14(&c_239);
bs_result[0] = result[0] = c_239.g_24;
}
```
### Reproduce
GPU: AMD Radeon RX 9070
Reproduce method: Run the kernel `entry` on a single thread, and print `result[0]` and `bs_result[0]` at the host.
Host launcher `cl_launcher.c`: [cl_launcher.c](https://github.com/user-attachments/files/24588166/cl_launcher.c)
Result at Os:
```
0,
0,
```
Result at other optimization levels:
```
0x1,
0x1,
```
Full reproducible command:
```bash
# Compile the device code
clang -O1 -x cl -target amdgcn-amd-amdhsa -mcpu=gfx1201 test.cl -c -w -o ./unlinked.o
$ROCM_HOME/llvm/bin/ld.lld --no-undefined -shared -plugin-opt=mcpu=gfx1201 --enable-new-dtags ./unlinked.o -o correct.out
clang -Os -x cl -target amdgcn-amd-amdhsa -mcpu=gfx1201 test.cl -c -w -o ./unlinked.o
$ROCM_HOME/llvm/bin/ld.lld --no-undefined -shared -plugin-opt=mcpu=gfx1201 --enable-new-dtags ./unlinked.o -o wrong.out
# Compile the host launcher
gcc -lOpenCL cl_launcher.c -o cl_launcher
# Run the kernel entry on a single thread
./cl_launcher -f correct.out -p <platform id> -d <device id> -l 1 -g 1 ---backsmith ---binary
./cl_launcher -f wrong.out -p <platform id> -d <device id> -l 1 -g 1 ---backsmith ---binary
```
### UB-free check
We treat the device code as C code and run it on X86-64 with sanitizer: https://godbolt.org/z/d9341nxME
It pass the UBSan, MemorySan and ASan.
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs