================ @@ -0,0 +1,172 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=atomic-expand %s | FileCheck -check-prefixes=GFX1200 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -passes=atomic-expand %s | FileCheck -check-prefixes=GFX1250 %s + +define void @system_atomic_store_unordered_float(ptr addrspace(5) %addr, float %val) { +; GFX1200-LABEL: define void @system_atomic_store_unordered_float( +; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0:[0-9]+]] { +; GFX1200-NEXT: store float [[VAL]], ptr addrspace(5) [[ADDR]], align 4 +; GFX1200-NEXT: ret void +; +; GFX1250-LABEL: define void @system_atomic_store_unordered_float( +; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0:[0-9]+]] { +; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr +; GFX1250-NEXT: store atomic float [[VAL]], ptr [[SCRATCH_ASCAST]] unordered, align 4 +; GFX1250-NEXT: ret void +; + store atomic float %val, ptr addrspace(5) %addr unordered, align 4 + ret void +} + +define void @system_atomic_store_unordered_i32(ptr addrspace(5) %addr, i32 %val) { +; GFX1200-LABEL: define void @system_atomic_store_unordered_i32( +; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; GFX1200-NEXT: store i32 [[VAL]], ptr addrspace(5) [[ADDR]], align 4 +; GFX1200-NEXT: ret void +; +; GFX1250-LABEL: define void @system_atomic_store_unordered_i32( +; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr +; GFX1250-NEXT: store atomic i32 [[VAL]], ptr [[SCRATCH_ASCAST]] unordered, align 4 +; GFX1250-NEXT: ret void +; + store atomic i32 %val, ptr addrspace(5) %addr unordered, align 4 + ret void +} + +define void @system_atomic_store_release_i32(ptr addrspace(5) %addr, i32 %val) { +; GFX1200-LABEL: define void @system_atomic_store_release_i32( +; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; GFX1200-NEXT: store i32 [[VAL]], ptr addrspace(5) [[ADDR]], align 4 +; GFX1200-NEXT: ret void +; +; GFX1250-LABEL: define void @system_atomic_store_release_i32( +; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr +; GFX1250-NEXT: store atomic i32 [[VAL]], ptr [[SCRATCH_ASCAST]] release, align 4 +; GFX1250-NEXT: ret void +; + store atomic i32 %val, ptr addrspace(5) %addr release, align 4 + ret void +} + +define void @workgroup_atomic_store_release_i32(ptr addrspace(5) %addr, i32 %val) { +; GFX1200-LABEL: define void @workgroup_atomic_store_release_i32( +; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; GFX1200-NEXT: store i32 [[VAL]], ptr addrspace(5) [[ADDR]], align 4 +; GFX1200-NEXT: ret void +; +; GFX1250-LABEL: define void @workgroup_atomic_store_release_i32( +; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr +; GFX1250-NEXT: store atomic i32 [[VAL]], ptr [[SCRATCH_ASCAST]] syncscope("workgroup") release, align 4 +; GFX1250-NEXT: ret void +; + store atomic i32 %val, ptr addrspace(5) %addr syncscope("workgroup") release, align 4 + ret void +} + +define float @system_atomic_load_unordered_float(ptr addrspace(5) %addr) { +; GFX1200-LABEL: define float @system_atomic_load_unordered_float( +; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] { +; GFX1200-NEXT: [[VAL:%.*]] = load float, ptr addrspace(5) [[ADDR]], align 4 +; GFX1200-NEXT: ret float [[VAL]] +; +; GFX1250-LABEL: define float @system_atomic_load_unordered_float( +; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] { +; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr +; GFX1250-NEXT: [[VAL:%.*]] = load atomic float, ptr [[SCRATCH_ASCAST]] unordered, align 4 +; GFX1250-NEXT: ret float [[VAL]] +; + %val = load atomic float, ptr addrspace(5) %addr unordered, align 4 + ret float %val +} + +define i32 @system_atomic_load_unordered_i32(ptr addrspace(5) %addr) { +; GFX1200-LABEL: define i32 @system_atomic_load_unordered_i32( +; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] { +; GFX1200-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4 +; GFX1200-NEXT: ret i32 [[VAL]] +; +; GFX1250-LABEL: define i32 @system_atomic_load_unordered_i32( +; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] { +; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr +; GFX1250-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[SCRATCH_ASCAST]] unordered, align 4 +; GFX1250-NEXT: ret i32 [[VAL]] +; + %val = load atomic i32, ptr addrspace(5) %addr unordered, align 4 + ret i32 %val +} + +define i32 @system_atomic_load_acquire_i32(ptr addrspace(5) %addr) { +; GFX1200-LABEL: define i32 @system_atomic_load_acquire_i32( +; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] { +; GFX1200-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4 +; GFX1200-NEXT: ret i32 [[VAL]] +; +; GFX1250-LABEL: define i32 @system_atomic_load_acquire_i32( +; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] { +; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr +; GFX1250-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[SCRATCH_ASCAST]] acquire, align 4 +; GFX1250-NEXT: ret i32 [[VAL]] +; + %val = load atomic i32, ptr addrspace(5) %addr acquire, align 4 + ret i32 %val +} + +define i32 @workgroup_atomic_load_acquire_i32(ptr addrspace(5) %addr) { +; GFX1200-LABEL: define i32 @workgroup_atomic_load_acquire_i32( +; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] { +; GFX1200-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4 +; GFX1200-NEXT: ret i32 [[VAL]] +; +; GFX1250-LABEL: define i32 @workgroup_atomic_load_acquire_i32( +; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]]) #[[ATTR0]] { +; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr +; GFX1250-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[SCRATCH_ASCAST]] syncscope("workgroup") acquire, align 4 +; GFX1250-NEXT: ret i32 [[VAL]] +; + %val = load atomic i32, ptr addrspace(5) %addr syncscope("workgroup") acquire, align 4 + ret i32 %val +} + +define i32 @system_atomic_cmpxchg_acq_rel_acquire_i32(ptr addrspace(5) %addr, i32 %old, i32 %in) { +; GFX1200-LABEL: define i32 @system_atomic_cmpxchg_acq_rel_acquire_i32( +; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[OLD:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] { +; GFX1200-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4 +; GFX1200-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], [[OLD]] +; GFX1200-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[IN]], i32 [[TMP1]] +; GFX1200-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[ADDR]], align 4 +; GFX1200-NEXT: [[TMP4:%.*]] = insertvalue { i32, i1 } poison, i32 [[TMP1]], 0 +; GFX1200-NEXT: [[TMP5:%.*]] = insertvalue { i32, i1 } [[TMP4]], i1 [[TMP2]], 1 +; GFX1200-NEXT: [[RES:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX1200-NEXT: ret i32 [[RES]] +; +; GFX1250-LABEL: define i32 @system_atomic_cmpxchg_acq_rel_acquire_i32( +; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[OLD:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] { +; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr +; GFX1250-NEXT: [[VAL:%.*]] = cmpxchg volatile ptr [[SCRATCH_ASCAST]], i32 [[OLD]], i32 [[IN]] acq_rel acquire, align 4 +; GFX1250-NEXT: [[RES:%.*]] = extractvalue { i32, i1 } [[VAL]], 0 +; GFX1250-NEXT: ret i32 [[RES]] +; + %val = cmpxchg volatile ptr addrspace(5) %addr, i32 %old, i32 %in acq_rel acquire + %res = extractvalue { i32, i1 } %val, 0 + ret i32 %res +} + +define i32 @system_atomicrmw_add_acq_rel_i32(ptr addrspace(5) %addr, i32 %in) { +; GFX1200-LABEL: define i32 @system_atomicrmw_add_acq_rel_i32( +; GFX1200-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] { +; GFX1200-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[ADDR]], align 4 +; GFX1200-NEXT: store i32 [[IN]], ptr addrspace(5) [[ADDR]], align 4 +; GFX1200-NEXT: ret i32 [[TMP1]] +; +; GFX1250-LABEL: define i32 @system_atomicrmw_add_acq_rel_i32( +; GFX1250-SAME: ptr addrspace(5) [[ADDR:%.*]], i32 [[IN:%.*]]) #[[ATTR0]] { +; GFX1250-NEXT: [[SCRATCH_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR]] to ptr +; GFX1250-NEXT: [[VAL:%.*]] = atomicrmw volatile xchg ptr [[SCRATCH_ASCAST]], i32 [[IN]] acq_rel, align 4 +; GFX1250-NEXT: ret i32 [[VAL]] +; + %val = atomicrmw volatile xchg ptr addrspace(5) %addr, i32 %in acq_rel + ret i32 %val +} ---------------- arsenm wrote:
Can you test few more atomicrmws, especially some FP atomics. Also test 16-bit and 64-bit cases https://github.com/llvm/llvm-project/pull/154710 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits