Issue 178259
Summary [AMDGPU] Wrong code at -O2
Labels backend:AMDGPU, miscompilation, generated by fuzzer
Assignees
Reporter XChy
    ### Reduced OpenCL Testcase
```opencl
// test.cl
#define BARRIER_u32(x) \
    __builtin_amdgcn_perm(__builtin_amdgcn_msad_u8(0, 0, 0), 0, 0) + x
struct S0
{
    int a;
    long b;
    uchar zero;
    int *c;
 int d;
    char e[8][6][5];
};
void func_2(struct S0 *s)
{
    for (; s->zero;) // dead loop
    {
        char *l_1608 = &s->e[3][3][7];
 char *l_1611 = &s->e[3][0][7];
        *l_1611 &= *l_1608 &= s != s->a;
 __asm("");
        s->d |= 3;
        s->a = 0;
        for (; s->a <= 3; s->a += 1)
            for (ushort i = 0; i <= 3; i += 1)
 {
                int volatile BS_COND_4 = BS_COND_4++;
            }
 }
}
void func_1(struct S0 *s)
{
BS_LABEL_1:
    switch (BARRIER_u32(6058))
    {
        case 90035385:
        case 21: goto BS_LABEL_1;
        case 57: goto BS_LABEL_0;
        case 6: goto BS_LABEL_3;
    }
    long *ptr = &s->b;
    for (int i = 0; i < 3; i++)
 for (int j = 0; j > -27; j--)
        {
            __asm("");
 *s->c = s->zero;
        }
BS_LABEL_0:
    func_2(s);
    *ptr = 3;
BS_LABEL_3:
}
__kernel void entry(__global ulong *result, __global ulong *bs_result)
{
    int s_6;
    struct S0 s = { 0, 0, 0, &s_6 };
 func_1(&s);
    bs_result[0] = result[0] = s.b;
}
```

### Reproduce
GPU: AMD Radeon RX 9070
Reproduce method: Run the kernel `entry` on a single thread, and print `result[0]` and `bs_result[0]` at the host.
Host launcher `cl_launcher.c`: [cl_launcher.c](https://github.com/user-attachments/files/24588166/cl_launcher.c)

Result at -O2:
```
0,
0,
```

Result at -O0:
```
0x3,
0x3,
```

Full reproducible command:
```bash
# Compile the device code
clang -O0 -x cl -target amdgcn-amd-amdhsa -mcpu=gfx1201 test.cl -c -w -o ./unlinked.o
$ROCM_HOME/llvm/bin/ld.lld --no-undefined -shared -plugin-opt=mcpu=gfx1201 --enable-new-dtags ./unlinked.o -o correct.out
clang -O2 -x cl -target amdgcn-amd-amdhsa -mcpu=gfx1201 test.cl -c -w -o ./unlinked.o
$ROCM_HOME/llvm/bin/ld.lld --no-undefined -shared -plugin-opt=mcpu=gfx1201 --enable-new-dtags ./unlinked.o -o wrong.out

# Compile the host launcher
gcc -lOpenCL cl_launcher.c -o cl_launcher

# Run the kernel entry on a single thread
./cl_launcher -f correct.out -p <platform id> -d <device id> -l 1 -g 1  ---backsmith ---binary
./cl_launcher -f wrong.out -p <platform id> -d <device id> -l 1 -g 1  ---backsmith ---binary
```

### UB-free check
We treat the device code as C code and run it on X86-64 with sanitizer: https://godbolt.org/z/6W9Mz18bf
It passed the UBSan, MemorySan, and ASan.
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to