https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/109408

>From caecd58b94c52b5568fc0014dad1c51796e4d36e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <matthew.arsena...@amd.com>
Date: Thu, 12 Sep 2024 12:44:04 +0400
Subject: [PATCH] AMDGPU: Add baseline tests for cmpxchg custom expansion

We need a non-atomic path if flat may access private.
---
 .../AMDGPU/flat_atomics_i64_noprivate.ll      |  34 +--
 .../AtomicExpand/AMDGPU/expand-atomic-mmra.ll |  12 +-
 ...and-atomic-rmw-fadd-flat-specialization.ll |   4 +-
 ...expand-atomicrmw-flat-noalias-addrspace.ll | 149 ++++++++++++-
 .../expand-cmpxchg-flat-maybe-private.ll      | 208 ++++++++++++++++++
 5 files changed, 382 insertions(+), 25 deletions(-)
 create mode 100644 
llvm/test/Transforms/AtomicExpand/AMDGPU/expand-cmpxchg-flat-maybe-private.ll

diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll 
b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll
index c0b3adce81342d..f4fe003a34d3fb 100644
--- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll
@@ -5088,7 +5088,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_offset(ptr 
%out, i64 %in, i64 %old
 ; GFX12-NEXT:    s_endpgm
 entry:
   %gep = getelementptr i64, ptr %out, i64 4
-  %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst
+  %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst, !noalias.addrspace !0
   ret void
 }
 
@@ -5145,7 +5145,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(ptr 
%out, i64 %in, i64 %ol
 ; GFX12-NEXT:    s_endpgm
 entry:
   %gep = getelementptr i64, ptr %out, i64 9000
-  %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst
+  %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst, !noalias.addrspace !0
   ret void
 }
 
@@ -5206,7 +5206,7 @@ define amdgpu_kernel void 
@atomic_cmpxchg_i64_ret_offset(ptr %out, ptr %out2, i6
 ; GFX12-NEXT:    s_endpgm
 entry:
   %gep = getelementptr i64, ptr %out, i64 4
-  %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst
+  %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst, !noalias.addrspace !0
   %extract0 = extractvalue { i64, i1 } %val, 0
   store i64 %extract0, ptr %out2
   ret void
@@ -5270,7 +5270,7 @@ define amdgpu_kernel void 
@atomic_cmpxchg_i64_addr64_offset(ptr %out, i64 %in, i
 entry:
   %ptr = getelementptr i64, ptr %out, i64 %index
   %gep = getelementptr i64, ptr %ptr, i64 4
-  %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst
+  %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst, !noalias.addrspace !0
   ret void
 }
 
@@ -5344,7 +5344,7 @@ define amdgpu_kernel void 
@atomic_cmpxchg_i64_ret_addr64_offset(ptr %out, ptr %o
 entry:
   %ptr = getelementptr i64, ptr %out, i64 %index
   %gep = getelementptr i64, ptr %ptr, i64 4
-  %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst
+  %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst, !noalias.addrspace !0
   %extract0 = extractvalue { i64, i1 } %val, 0
   store i64 %extract0, ptr %out2
   ret void
@@ -5398,7 +5398,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64(ptr %out, 
i64 %in, i64 %old) {
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:    s_endpgm
 entry:
-  %val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst
+  %val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst, !noalias.addrspace !0
   ret void
 }
 
@@ -5454,7 +5454,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret(ptr 
%out, ptr %out2, i64 %in,
 ; GFX12-NEXT:    flat_store_b64 v[2:3], v[0:1]
 ; GFX12-NEXT:    s_endpgm
 entry:
-  %val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst
+  %val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst, !noalias.addrspace !0
   %extract0 = extractvalue { i64, i1 } %val, 0
   store i64 %extract0, ptr %out2
   ret void
@@ -5513,7 +5513,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(ptr 
%out, i64 %in, i64 %ind
 ; GFX12-NEXT:    s_endpgm
 entry:
   %ptr = getelementptr i64, ptr %out, i64 %index
-  %val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst
+  %val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst, !noalias.addrspace !0
   ret void
 }
 
@@ -5582,7 +5582,7 @@ define amdgpu_kernel void 
@atomic_cmpxchg_i64_ret_addr64(ptr %out, ptr %out2, i6
 ; GFX12-NEXT:    s_endpgm
 entry:
   %ptr = getelementptr i64, ptr %out, i64 %index
-  %val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst
+  %val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst, !noalias.addrspace !0
   %extract0 = extractvalue { i64, i1 } %val, 0
   store i64 %extract0, ptr %out2
   ret void
@@ -5634,7 +5634,7 @@ define amdgpu_kernel void @atomic_load_f64_offset(ptr 
%in, ptr %out) {
 ; GFX12-NEXT:    s_endpgm
 entry:
   %gep = getelementptr double, ptr %in, i64 4
-  %val = load atomic double, ptr %gep  seq_cst, align 8
+  %val = load atomic double, ptr %gep  seq_cst, align 8, !noalias.addrspace !0
   store double %val, ptr %out
   ret void
 }
@@ -5680,7 +5680,7 @@ define amdgpu_kernel void @atomic_load_f64(ptr %in, ptr 
%out) {
 ; GFX12-NEXT:    flat_store_b64 v[2:3], v[0:1]
 ; GFX12-NEXT:    s_endpgm
 entry:
-  %val = load atomic double, ptr %in syncscope("agent") seq_cst, align 8
+  %val = load atomic double, ptr %in syncscope("agent") seq_cst, align 8, 
!noalias.addrspace !0
   store double %val, ptr %out
   ret void
 }
@@ -5745,7 +5745,7 @@ define amdgpu_kernel void 
@atomic_load_f64_addr64_offset(ptr %in, ptr %out, i64
 entry:
   %ptr = getelementptr double, ptr %in, i64 %index
   %gep = getelementptr double, ptr %ptr, i64 4
-  %val = load atomic double, ptr %gep seq_cst, align 8
+  %val = load atomic double, ptr %gep seq_cst, align 8, !noalias.addrspace !0
   store double %val, ptr %out
   ret void
 }
@@ -5805,7 +5805,7 @@ define amdgpu_kernel void @atomic_load_f64_addr64(ptr 
%in, ptr %out, i64 %index)
 ; GFX12-NEXT:    s_endpgm
 entry:
   %ptr = getelementptr double, ptr %in, i64 %index
-  %val = load atomic double, ptr %ptr seq_cst, align 8
+  %val = load atomic double, ptr %ptr seq_cst, align 8, !noalias.addrspace !0
   store double %val, ptr %out
   ret void
 }
@@ -5848,7 +5848,7 @@ define amdgpu_kernel void @atomic_store_f64_offset(double 
%in, ptr %out) {
 ; GFX12-NEXT:    s_endpgm
 entry:
   %gep = getelementptr double, ptr %out, i64 4
-  store atomic double %in, ptr %gep  seq_cst, align 8
+  store atomic double %in, ptr %gep  seq_cst, align 8, !noalias.addrspace !0
   ret void
 }
 
@@ -5885,7 +5885,7 @@ define amdgpu_kernel void @atomic_store_f64(double %in, 
ptr %out) {
 ; GFX12-NEXT:    flat_store_b64 v[2:3], v[0:1] scope:SCOPE_SYS
 ; GFX12-NEXT:    s_endpgm
 entry:
-  store atomic double %in, ptr %out seq_cst, align 8
+  store atomic double %in, ptr %out seq_cst, align 8, !noalias.addrspace !0
   ret void
 }
 
@@ -5941,7 +5941,7 @@ define amdgpu_kernel void 
@atomic_store_f64_addr64_offset(double %in, ptr %out,
 entry:
   %ptr = getelementptr double, ptr %out, i64 %index
   %gep = getelementptr double, ptr %ptr, i64 4
-  store atomic double %in, ptr %gep seq_cst, align 8
+  store atomic double %in, ptr %gep seq_cst, align 8, !noalias.addrspace !0
   ret void
 }
 
@@ -5992,7 +5992,7 @@ define amdgpu_kernel void @atomic_store_f64_addr64(double 
%in, ptr %out, i64 %in
 ; GFX12-NEXT:    s_endpgm
 entry:
   %ptr = getelementptr double, ptr %out, i64 %index
-  store atomic double %in, ptr %ptr seq_cst, align 8
+  store atomic double %in, ptr %ptr seq_cst, align 8, !noalias.addrspace !0
   ret void
 }
 
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll 
b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll
index 3c5f3a09082a72..e79bb465563e84 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll
@@ -126,12 +126,12 @@ define i16 @test_cmpxchg_i16_global_agent_align4(ptr 
addrspace(1) %out, i16 %in,
 
 define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
 ; GFX90A-LABEL: define void @syncscope_workgroup_nortn(
-; GFX90A-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR1:[0-9]+]] {
+; GFX90A-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0]] {
 ; GFX90A-NEXT:    [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr 
[[ADDR]])
 ; GFX90A-NEXT:    br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label 
[[ATOMICRMW_CHECK_PRIVATE:%.*]]
 ; GFX90A:       atomicrmw.shared:
 ; GFX90A-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(3)
-; GFX90A-NEXT:    [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], 
float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]]
+; GFX90A-NEXT:    [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], 
float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]], 
!amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.ignore.denormal.mode 
[[META3]]
 ; GFX90A-NEXT:    br label [[ATOMICRMW_PHI:%.*]]
 ; GFX90A:       atomicrmw.check.private:
 ; GFX90A-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr 
[[ADDR]])
@@ -144,7 +144,7 @@ define void @syncscope_workgroup_nortn(ptr %addr, float 
%val) {
 ; GFX90A-NEXT:    br label [[ATOMICRMW_PHI]]
 ; GFX90A:       atomicrmw.global:
 ; GFX90A-NEXT:    [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
-; GFX90A-NEXT:    [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], 
float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]]
+; GFX90A-NEXT:    [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], 
float [[VAL]] syncscope("workgroup") seq_cst, align 4, !mmra [[META0]], 
!amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
 ; GFX90A-NEXT:    br label [[ATOMICRMW_PHI]]
 ; GFX90A:       atomicrmw.phi:
 ; GFX90A-NEXT:    br label [[ATOMICRMW_END:%.*]]
@@ -152,8 +152,8 @@ define void @syncscope_workgroup_nortn(ptr %addr, float 
%val) {
 ; GFX90A-NEXT:    ret void
 ;
 ; GFX1100-LABEL: define void @syncscope_workgroup_nortn(
-; GFX1100-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR1:[0-9]+]] {
-; GFX1100-NEXT:    [[RES:%.*]] = atomicrmw fadd ptr [[ADDR]], float [[VAL]] 
syncscope("workgroup") seq_cst, align 4, !mmra [[META0]]
+; GFX1100-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR0]] {
+; GFX1100-NEXT:    [[RES:%.*]] = atomicrmw fadd ptr [[ADDR]], float [[VAL]] 
syncscope("workgroup") seq_cst, align 4, !mmra [[META0]], 
!amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.ignore.denormal.mode 
[[META3]]
 ; GFX1100-NEXT:    ret void
 ;
   %res = atomicrmw fadd ptr %addr, float %val syncscope("workgroup") seq_cst, 
!mmra !2, !amdgpu.no.fine.grained.memory !3, !amdgpu.ignore.denormal.mode !3
@@ -193,8 +193,10 @@ define i32 @atomic_load_global_align1(ptr addrspace(1) 
%ptr) {
 ; GFX90A: [[META0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]]}
 ; GFX90A: [[META1]] = !{!"foo", !"bar"}
 ; GFX90A: [[META2]] = !{!"bux", !"baz"}
+; GFX90A: [[META3]] = !{}
 ;.
 ; GFX1100: [[META0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]]}
 ; GFX1100: [[META1]] = !{!"foo", !"bar"}
 ; GFX1100: [[META2]] = !{!"bux", !"baz"}
+; GFX1100: [[META3]] = !{}
 ;.
diff --git 
a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll
 
b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll
index e8b4e752d3a28c..056eee5b987d65 100644
--- 
a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll
+++ 
b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll
@@ -163,7 +163,7 @@ define void @syncscope_workgroup_nortn(ptr %addr, float 
%val) {
 ; GFX908-NEXT:    br label [[ATOMICRMW_PHI]]
 ; GFX908:       atomicrmw.global:
 ; GFX908-NEXT:    [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
-; GFX908-NEXT:    [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], 
float [[VAL]] syncscope("workgroup") seq_cst, align 4, 
!amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX908-NEXT:    [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], 
float [[VAL]] syncscope("workgroup") seq_cst, align 4, 
!amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
 ; GFX908-NEXT:    br label [[ATOMICRMW_PHI]]
 ; GFX908:       atomicrmw.phi:
 ; GFX908-NEXT:    br label [[ATOMICRMW_END:%.*]]
@@ -188,7 +188,7 @@ define void @syncscope_workgroup_nortn(ptr %addr, float 
%val) {
 ; GFX90A-NEXT:    br label [[ATOMICRMW_PHI]]
 ; GFX90A:       atomicrmw.global:
 ; GFX90A-NEXT:    [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
-; GFX90A-NEXT:    [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], 
float [[VAL]] syncscope("workgroup") seq_cst, align 4, 
!amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
+; GFX90A-NEXT:    [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], 
float [[VAL]] syncscope("workgroup") seq_cst, align 4, 
!amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
 ; GFX90A-NEXT:    br label [[ATOMICRMW_PHI]]
 ; GFX90A:       atomicrmw.phi:
 ; GFX90A-NEXT:    br label [[ATOMICRMW_END:%.*]]
diff --git 
a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-flat-noalias-addrspace.ll
 
b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-flat-noalias-addrspace.ll
index cb51bcf9356141..cb2ba0f7eb0b5d 100644
--- 
a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-flat-noalias-addrspace.ll
+++ 
b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-flat-noalias-addrspace.ll
@@ -341,7 +341,6 @@ define i64 
@test_flat_atomicrmw_and_i64_agent__noalias_addrspace_5__maybe_fine_g
   ret i64 %res
 }
 
-
 define i32 @test_flat_atomicrmw_and_i32_agent__noalias_addrspace_5(ptr %ptr, 
i32 %value) {
 ; ALL-LABEL: define i32 
@test_flat_atomicrmw_and_i32_agent__noalias_addrspace_5(
 ; ALL-SAME: ptr [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
@@ -352,6 +351,132 @@ define i32 
@test_flat_atomicrmw_and_i32_agent__noalias_addrspace_5(ptr %ptr, i32
   ret i32 %res
 }
 
+define i64 @test_flat_atomicrmw_and_i64_agent__mmra(ptr %ptr, i64 %value) {
+; ALL-LABEL: define i64 @test_flat_atomicrmw_and_i64_agent__mmra(
+; ALL-SAME: ptr [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
+; ALL-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr 
[[PTR]])
+; ALL-NEXT:    br i1 [[IS_PRIVATE]], label %[[ATOMICRMW_PRIVATE:.*]], label 
%[[ATOMICRMW_GLOBAL:.*]]
+; ALL:       [[ATOMICRMW_PRIVATE]]:
+; ALL-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5)
+; ALL-NEXT:    [[LOADED_PRIVATE:%.*]] = load i64, ptr addrspace(5) [[TMP1]], 
align 8
+; ALL-NEXT:    [[NEW:%.*]] = and i64 [[LOADED_PRIVATE]], [[VALUE]]
+; ALL-NEXT:    store i64 [[NEW]], ptr addrspace(5) [[TMP1]], align 8
+; ALL-NEXT:    br label %[[ATOMICRMW_PHI:.*]]
+; ALL:       [[ATOMICRMW_GLOBAL]]:
+; ALL-NEXT:    [[TMP2:%.*]] = atomicrmw and ptr [[PTR]], i64 [[VALUE]] 
syncscope("agent") seq_cst, align 8, !mmra [[META2:![0-9]+]], 
!noalias.addrspace [[META0]], !amdgpu.no.fine.grained.memory [[META1]]
+; ALL-NEXT:    br label %[[ATOMICRMW_PHI]]
+; ALL:       [[ATOMICRMW_PHI]]:
+; ALL-NEXT:    [[RES:%.*]] = phi i64 [ [[LOADED_PRIVATE]], 
%[[ATOMICRMW_PRIVATE]] ], [ [[TMP2]], %[[ATOMICRMW_GLOBAL]] ]
+; ALL-NEXT:    br label %[[ATOMICRMW_END:.*]]
+; ALL:       [[ATOMICRMW_END]]:
+; ALL-NEXT:    ret i64 [[RES]]
+;
+  %res = atomicrmw and ptr %ptr, i64 %value syncscope("agent") seq_cst, !mmra 
!4, !amdgpu.no.fine.grained.memory !0
+  ret i64 %res
+}
+
+define i64 @test_flat_atomicrmw_and_i64_agent__noalias_addrspace_5__mmra(ptr 
%ptr, i64 %value) {
+; ALL-LABEL: define i64 
@test_flat_atomicrmw_and_i64_agent__noalias_addrspace_5__mmra(
+; ALL-SAME: ptr [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
+; ALL-NEXT:    [[RES:%.*]] = atomicrmw and ptr [[PTR]], i64 [[VALUE]] 
syncscope("agent") seq_cst, align 8, !mmra [[META2]], !noalias.addrspace 
[[META0]], !amdgpu.no.fine.grained.memory [[META1]]
+; ALL-NEXT:    ret i64 [[RES]]
+;
+  %res = atomicrmw and ptr %ptr, i64 %value syncscope("agent") seq_cst, 
!noalias.addrspace !1, !mmra !4, !amdgpu.no.fine.grained.memory !0
+  ret i64 %res
+}
+
+; --------------------------------------------------------------------
+; General expansion for subb
+; --------------------------------------------------------------------
+
+define i64 @test_flat_atomicrmw_sub_i64_agent(ptr %ptr, i64 %value) {
+; ALL-LABEL: define i64 @test_flat_atomicrmw_sub_i64_agent(
+; ALL-SAME: ptr [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
+; ALL-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr 
[[PTR]])
+; ALL-NEXT:    br i1 [[IS_PRIVATE]], label %[[ATOMICRMW_PRIVATE:.*]], label 
%[[ATOMICRMW_GLOBAL:.*]]
+; ALL:       [[ATOMICRMW_PRIVATE]]:
+; ALL-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5)
+; ALL-NEXT:    [[LOADED_PRIVATE:%.*]] = load i64, ptr addrspace(5) [[TMP1]], 
align 8
+; ALL-NEXT:    [[NEW:%.*]] = sub i64 [[LOADED_PRIVATE]], [[VALUE]]
+; ALL-NEXT:    store i64 [[NEW]], ptr addrspace(5) [[TMP1]], align 8
+; ALL-NEXT:    br label %[[ATOMICRMW_PHI:.*]]
+; ALL:       [[ATOMICRMW_GLOBAL]]:
+; ALL-NEXT:    [[TMP2:%.*]] = atomicrmw sub ptr [[PTR]], i64 [[VALUE]] 
syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]], 
!amdgpu.no.fine.grained.memory [[META1]]
+; ALL-NEXT:    br label %[[ATOMICRMW_PHI]]
+; ALL:       [[ATOMICRMW_PHI]]:
+; ALL-NEXT:    [[RES:%.*]] = phi i64 [ [[LOADED_PRIVATE]], 
%[[ATOMICRMW_PRIVATE]] ], [ [[TMP2]], %[[ATOMICRMW_GLOBAL]] ]
+; ALL-NEXT:    br label %[[ATOMICRMW_END:.*]]
+; ALL:       [[ATOMICRMW_END]]:
+; ALL-NEXT:    ret i64 [[RES]]
+;
+  %res = atomicrmw sub ptr %ptr, i64 %value syncscope("agent") seq_cst, 
!amdgpu.no.fine.grained.memory !0
+  ret i64 %res
+}
+
+define i64 @test_flat_atomicrmw_sub_i64_agent__noalias_addrspace_5(ptr %ptr, 
i64 %value) {
+; ALL-LABEL: define i64 
@test_flat_atomicrmw_sub_i64_agent__noalias_addrspace_5(
+; ALL-SAME: ptr [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
+; ALL-NEXT:    [[RES:%.*]] = atomicrmw sub ptr [[PTR]], i64 [[VALUE]] 
syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]], 
!amdgpu.no.fine.grained.memory [[META1]]
+; ALL-NEXT:    ret i64 [[RES]]
+;
+  %res = atomicrmw sub ptr %ptr, i64 %value syncscope("agent") seq_cst, 
!noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
+  ret i64 %res
+}
+
+define i64 
@test_flat_atomicrmw_sub_i64_agent__noalias_addrspace_5__maybe_fine_grained(ptr 
%ptr, i64 %value) {
+; ALL-LABEL: define i64 
@test_flat_atomicrmw_sub_i64_agent__noalias_addrspace_5__maybe_fine_grained(
+; ALL-SAME: ptr [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
+; ALL-NEXT:    [[RES:%.*]] = atomicrmw sub ptr [[PTR]], i64 [[VALUE]] 
syncscope("agent") seq_cst, align 8, !noalias.addrspace [[META0]]
+; ALL-NEXT:    ret i64 [[RES]]
+;
+  %res = atomicrmw sub ptr %ptr, i64 %value syncscope("agent") seq_cst, 
!noalias.addrspace !1
+  ret i64 %res
+}
+
+define i32 @test_flat_atomicrmw_sub_i32_agent__noalias_addrspace_5(ptr %ptr, 
i32 %value) {
+; ALL-LABEL: define i32 
@test_flat_atomicrmw_sub_i32_agent__noalias_addrspace_5(
+; ALL-SAME: ptr [[PTR:%.*]], i32 [[VALUE:%.*]]) #[[ATTR0]] {
+; ALL-NEXT:    [[RES:%.*]] = atomicrmw sub ptr [[PTR]], i32 [[VALUE]] 
syncscope("agent") seq_cst, align 4, !noalias.addrspace [[META0]], 
!amdgpu.no.fine.grained.memory [[META1]]
+; ALL-NEXT:    ret i32 [[RES]]
+;
+  %res = atomicrmw sub ptr %ptr, i32 %value syncscope("agent") seq_cst, 
!noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
+  ret i32 %res
+}
+
+define i64 @test_flat_atomicrmw_sub_i64_agent__mmra(ptr %ptr, i64 %value) {
+; ALL-LABEL: define i64 @test_flat_atomicrmw_sub_i64_agent__mmra(
+; ALL-SAME: ptr [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
+; ALL-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr 
[[PTR]])
+; ALL-NEXT:    br i1 [[IS_PRIVATE]], label %[[ATOMICRMW_PRIVATE:.*]], label 
%[[ATOMICRMW_GLOBAL:.*]]
+; ALL:       [[ATOMICRMW_PRIVATE]]:
+; ALL-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5)
+; ALL-NEXT:    [[LOADED_PRIVATE:%.*]] = load i64, ptr addrspace(5) [[TMP1]], 
align 8
+; ALL-NEXT:    [[NEW:%.*]] = sub i64 [[LOADED_PRIVATE]], [[VALUE]]
+; ALL-NEXT:    store i64 [[NEW]], ptr addrspace(5) [[TMP1]], align 8
+; ALL-NEXT:    br label %[[ATOMICRMW_PHI:.*]]
+; ALL:       [[ATOMICRMW_GLOBAL]]:
+; ALL-NEXT:    [[TMP2:%.*]] = atomicrmw sub ptr [[PTR]], i64 [[VALUE]] 
syncscope("agent") seq_cst, align 8, !mmra [[META2]], !noalias.addrspace 
[[META0]], !amdgpu.no.fine.grained.memory [[META1]]
+; ALL-NEXT:    br label %[[ATOMICRMW_PHI]]
+; ALL:       [[ATOMICRMW_PHI]]:
+; ALL-NEXT:    [[RES:%.*]] = phi i64 [ [[LOADED_PRIVATE]], 
%[[ATOMICRMW_PRIVATE]] ], [ [[TMP2]], %[[ATOMICRMW_GLOBAL]] ]
+; ALL-NEXT:    br label %[[ATOMICRMW_END:.*]]
+; ALL:       [[ATOMICRMW_END]]:
+; ALL-NEXT:    ret i64 [[RES]]
+;
+  %res = atomicrmw sub ptr %ptr, i64 %value syncscope("agent") seq_cst, !mmra 
!4, !amdgpu.no.fine.grained.memory !0
+  ret i64 %res
+}
+
+define i64 @test_flat_atomicrmw_sub_i64_agent__noalias_addrspace_5__mmra(ptr 
%ptr, i64 %value) {
+; ALL-LABEL: define i64 
@test_flat_atomicrmw_sub_i64_agent__noalias_addrspace_5__mmra(
+; ALL-SAME: ptr [[PTR:%.*]], i64 [[VALUE:%.*]]) #[[ATTR0]] {
+; ALL-NEXT:    [[RES:%.*]] = atomicrmw sub ptr [[PTR]], i64 [[VALUE]] 
syncscope("agent") seq_cst, align 8, !mmra [[META2]], !noalias.addrspace 
[[META0]], !amdgpu.no.fine.grained.memory [[META1]]
+; ALL-NEXT:    ret i64 [[RES]]
+;
+  %res = atomicrmw sub ptr %ptr, i64 %value syncscope("agent") seq_cst, 
!noalias.addrspace !1, !mmra !4, !amdgpu.no.fine.grained.memory !0
+  ret i64 %res
+}
+
 ; --------------------------------------------------------------------
 ; General expansion for fadd
 ; --------------------------------------------------------------------
@@ -1878,23 +2003,45 @@ define i32 
@test_flat_atomicrmw_nand_i32_agent__noalias_addrspace_5(ptr %ptr, i3
 
 !0 = !{}
 !1 = !{i32 5, i32 6}
+!2 = !{!"foo", !"bar"}
+!3 = !{!"bux", !"baz"}
+!4 = !{!2, !3}
+!5 = !{}
 
 ;.
 ; GFX7: [[META0]] = !{i32 5, i32 6}
 ; GFX7: [[META1]] = !{}
+; GFX7: [[META2]] = !{[[META3:![0-9]+]], [[META4:![0-9]+]]}
+; GFX7: [[META3]] = !{!"foo", !"bar"}
+; GFX7: [[META4]] = !{!"bux", !"baz"}
 ;.
 ; GFX900: [[META0]] = !{i32 5, i32 6}
 ; GFX900: [[META1]] = !{}
+; GFX900: [[META2]] = !{[[META3:![0-9]+]], [[META4:![0-9]+]]}
+; GFX900: [[META3]] = !{!"foo", !"bar"}
+; GFX900: [[META4]] = !{!"bux", !"baz"}
 ;.
 ; GFX908: [[META0]] = !{i32 5, i32 6}
 ; GFX908: [[META1]] = !{}
+; GFX908: [[META2]] = !{[[META3:![0-9]+]], [[META4:![0-9]+]]}
+; GFX908: [[META3]] = !{!"foo", !"bar"}
+; GFX908: [[META4]] = !{!"bux", !"baz"}
 ;.
 ; GFX90A: [[META0]] = !{i32 5, i32 6}
 ; GFX90A: [[META1]] = !{}
+; GFX90A: [[META2]] = !{[[META3:![0-9]+]], [[META4:![0-9]+]]}
+; GFX90A: [[META3]] = !{!"foo", !"bar"}
+; GFX90A: [[META4]] = !{!"bux", !"baz"}
 ;.
 ; GFX940: [[META0]] = !{i32 5, i32 6}
 ; GFX940: [[META1]] = !{}
+; GFX940: [[META2]] = !{[[META3:![0-9]+]], [[META4:![0-9]+]]}
+; GFX940: [[META3]] = !{!"foo", !"bar"}
+; GFX940: [[META4]] = !{!"bux", !"baz"}
 ;.
 ; GFX12: [[META0]] = !{i32 5, i32 6}
 ; GFX12: [[META1]] = !{}
+; GFX12: [[META2]] = !{[[META3:![0-9]+]], [[META4:![0-9]+]]}
+; GFX12: [[META3]] = !{!"foo", !"bar"}
+; GFX12: [[META4]] = !{!"bux", !"baz"}
 ;.
diff --git 
a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-cmpxchg-flat-maybe-private.ll 
b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-cmpxchg-flat-maybe-private.ll
new file mode 100644
index 00000000000000..6b3c27be8688c2
--- /dev/null
+++ 
b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-cmpxchg-flat-maybe-private.ll
@@ -0,0 +1,208 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=atomic-expand %s | FileCheck 
%s
+
+define { i16, i1 } @cmpxchg_flat_agent_i16(ptr %ptr, i16 %val, i16 %swap) {
+; CHECK-LABEL: define { i16, i1 } @cmpxchg_flat_agent_i16(
+; CHECK-SAME: ptr [[PTR:%.*]], i16 [[VAL:%.*]], i16 [[SWAP:%.*]]) {
+; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr 
[[PTR]], i64 -4)
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
+; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
+; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
+; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
+; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[SWAP]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
+; CHECK-NEXT:    [[TMP5:%.*]] = zext i16 [[VAL]] to i32
+; CHECK-NEXT:    [[TMP6:%.*]] = shl i32 [[TMP5]], [[SHIFTAMT]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = and i32 [[TMP7]], [[INV_MASK]]
+; CHECK-NEXT:    br label %[[PARTWORD_CMPXCHG_LOOP:.*]]
+; CHECK:       [[PARTWORD_CMPXCHG_LOOP]]:
+; CHECK-NEXT:    [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[TMP0:%.*]] ], [ 
[[TMP15:%.*]], %[[PARTWORD_CMPXCHG_FAILURE:.*]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP4]]
+; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP6]]
+; CHECK-NEXT:    [[TMP12:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[TMP11]], 
i32 [[TMP10]] syncscope("agent") monotonic seq_cst, align 4
+; CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0
+; CHECK-NEXT:    [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1
+; CHECK-NEXT:    br i1 [[TMP14]], label %[[PARTWORD_CMPXCHG_END:.*]], label 
%[[PARTWORD_CMPXCHG_FAILURE]]
+; CHECK:       [[PARTWORD_CMPXCHG_FAILURE]]:
+; CHECK-NEXT:    [[TMP15]] = and i32 [[TMP13]], [[INV_MASK]]
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP9]], [[TMP15]]
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[PARTWORD_CMPXCHG_LOOP]], label 
%[[PARTWORD_CMPXCHG_END]]
+; CHECK:       [[PARTWORD_CMPXCHG_END]]:
+; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP13]], [[SHIFTAMT]]
+; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
+; CHECK-NEXT:    [[TMP17:%.*]] = insertvalue { i16, i1 } poison, i16 
[[EXTRACTED]], 0
+; CHECK-NEXT:    [[TMP18:%.*]] = insertvalue { i16, i1 } [[TMP17]], i1 
[[TMP14]], 1
+; CHECK-NEXT:    ret { i16, i1 } [[TMP18]]
+;
+  %result = cmpxchg ptr %ptr, i16 %val, i16 %swap syncscope("agent") monotonic 
seq_cst
+  ret { i16, i1 } %result
+}
+
+define { i16, i1 } @cmpxchg_flat_agent_i16_align4(ptr %ptr, i16 %val, i16 
%swap) {
+; CHECK-LABEL: define { i16, i1 } @cmpxchg_flat_agent_i16_align4(
+; CHECK-SAME: ptr [[PTR:%.*]], i16 [[VAL:%.*]], i16 [[SWAP:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[SWAP]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[VAL]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], -65536
+; CHECK-NEXT:    br label %[[PARTWORD_CMPXCHG_LOOP:.*]]
+; CHECK:       [[PARTWORD_CMPXCHG_LOOP]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ 
[[TMP11:%.*]], %[[PARTWORD_CMPXCHG_FAILURE:.*]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = or i32 [[TMP5]], [[TMP1]]
+; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP5]], [[TMP2]]
+; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP7]], i32 
[[TMP6]] syncscope("agent") monotonic seq_cst, align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i1 } [[TMP8]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[PARTWORD_CMPXCHG_END:.*]], label 
%[[PARTWORD_CMPXCHG_FAILURE]]
+; CHECK:       [[PARTWORD_CMPXCHG_FAILURE]]:
+; CHECK-NEXT:    [[TMP11]] = and i32 [[TMP9]], -65536
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP5]], [[TMP11]]
+; CHECK-NEXT:    br i1 [[TMP12]], label %[[PARTWORD_CMPXCHG_LOOP]], label 
%[[PARTWORD_CMPXCHG_END]]
+; CHECK:       [[PARTWORD_CMPXCHG_END]]:
+; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[TMP9]] to i16
+; CHECK-NEXT:    [[TMP13:%.*]] = insertvalue { i16, i1 } poison, i16 
[[EXTRACTED]], 0
+; CHECK-NEXT:    [[TMP14:%.*]] = insertvalue { i16, i1 } [[TMP13]], i1 
[[TMP10]], 1
+; CHECK-NEXT:    ret { i16, i1 } [[TMP14]]
+;
+  %result = cmpxchg ptr %ptr, i16 %val, i16 %swap syncscope("agent") monotonic 
seq_cst, align 4
+  ret { i16, i1 } %result
+}
+
+define { i32, i1 } @cmpxchg_flat_agent_i32(ptr %ptr, i32 %val, i32 %swap) {
+; CHECK-LABEL: define { i32, i1 } @cmpxchg_flat_agent_i32(
+; CHECK-SAME: ptr [[PTR:%.*]], i32 [[VAL:%.*]], i32 [[SWAP:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = cmpxchg ptr [[PTR]], i32 [[VAL]], i32 
[[SWAP]] syncscope("agent") monotonic seq_cst, align 4
+; CHECK-NEXT:    ret { i32, i1 } [[RESULT]]
+;
+  %result = cmpxchg ptr %ptr, i32 %val, i32 %swap syncscope("agent") monotonic 
seq_cst
+  ret { i32, i1 } %result
+}
+
+define { i64, i1 } @cmpxchg_flat_agent_i64(ptr %ptr, i64 %val, i64 %swap) {
+; CHECK-LABEL: define { i64, i1 } @cmpxchg_flat_agent_i64(
+; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]], i64 [[SWAP:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL]], i64 
[[SWAP]] syncscope("agent") monotonic seq_cst, align 8
+; CHECK-NEXT:    ret { i64, i1 } [[RESULT]]
+;
+  %result = cmpxchg ptr %ptr, i64 %val, i64 %swap syncscope("agent") monotonic 
seq_cst
+  ret { i64, i1 } %result
+}
+
+define { i64, i1 } @cmpxchg_flat_agent_i64_volatile(ptr %ptr, i64 %val, i64 
%swap) {
+; CHECK-LABEL: define { i64, i1 } @cmpxchg_flat_agent_i64_volatile(
+; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]], i64 [[SWAP:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = cmpxchg volatile ptr [[PTR]], i64 [[VAL]], 
i64 [[SWAP]] syncscope("agent") monotonic seq_cst, align 8
+; CHECK-NEXT:    ret { i64, i1 } [[RESULT]]
+;
+  %result = cmpxchg volatile ptr %ptr, i64 %val, i64 %swap syncscope("agent") 
monotonic seq_cst
+  ret { i64, i1 } %result
+}
+
+define { i16, i1 } @cmpxchg_flat_agent_i16__noprivate(ptr %ptr, i16 %val, i16 
%swap) {
+; CHECK-LABEL: define { i16, i1 } @cmpxchg_flat_agent_i16__noprivate(
+; CHECK-SAME: ptr [[PTR:%.*]], i16 [[VAL:%.*]], i16 [[SWAP:%.*]]) {
+; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr 
[[PTR]], i64 -4)
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
+; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
+; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
+; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
+; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[SWAP]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
+; CHECK-NEXT:    [[TMP5:%.*]] = zext i16 [[VAL]] to i32
+; CHECK-NEXT:    [[TMP6:%.*]] = shl i32 [[TMP5]], [[SHIFTAMT]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = and i32 [[TMP7]], [[INV_MASK]]
+; CHECK-NEXT:    br label %[[PARTWORD_CMPXCHG_LOOP:.*]]
+; CHECK:       [[PARTWORD_CMPXCHG_LOOP]]:
+; CHECK-NEXT:    [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[TMP0:%.*]] ], [ 
[[TMP15:%.*]], %[[PARTWORD_CMPXCHG_FAILURE:.*]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP4]]
+; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP6]]
+; CHECK-NEXT:    [[TMP12:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[TMP11]], 
i32 [[TMP10]] syncscope("agent") monotonic seq_cst, align 4
+; CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0
+; CHECK-NEXT:    [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1
+; CHECK-NEXT:    br i1 [[TMP14]], label %[[PARTWORD_CMPXCHG_END:.*]], label 
%[[PARTWORD_CMPXCHG_FAILURE]]
+; CHECK:       [[PARTWORD_CMPXCHG_FAILURE]]:
+; CHECK-NEXT:    [[TMP15]] = and i32 [[TMP13]], [[INV_MASK]]
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP9]], [[TMP15]]
+; CHECK-NEXT:    br i1 [[TMP16]], label %[[PARTWORD_CMPXCHG_LOOP]], label 
%[[PARTWORD_CMPXCHG_END]]
+; CHECK:       [[PARTWORD_CMPXCHG_END]]:
+; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP13]], [[SHIFTAMT]]
+; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
+; CHECK-NEXT:    [[TMP17:%.*]] = insertvalue { i16, i1 } poison, i16 
[[EXTRACTED]], 0
+; CHECK-NEXT:    [[TMP18:%.*]] = insertvalue { i16, i1 } [[TMP17]], i1 
[[TMP14]], 1
+; CHECK-NEXT:    ret { i16, i1 } [[TMP18]]
+;
+  %result = cmpxchg ptr %ptr, i16 %val, i16 %swap syncscope("agent") monotonic 
seq_cst, !noalias.addrspace !0
+  ret { i16, i1 } %result
+}
+
+define { i32, i1 } @cmpxchg_flat_agent_i32__noprivate(ptr %ptr, i32 %val, i32 
%swap) {
+; CHECK-LABEL: define { i32, i1 } @cmpxchg_flat_agent_i32__noprivate(
+; CHECK-SAME: ptr [[PTR:%.*]], i32 [[VAL:%.*]], i32 [[SWAP:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = cmpxchg ptr [[PTR]], i32 [[VAL]], i32 
[[SWAP]] syncscope("agent") monotonic seq_cst, align 4, !noalias.addrspace 
[[META0:![0-9]+]]
+; CHECK-NEXT:    ret { i32, i1 } [[RESULT]]
+;
+  %result = cmpxchg ptr %ptr, i32 %val, i32 %swap syncscope("agent") monotonic 
seq_cst, !noalias.addrspace !0
+  ret { i32, i1 } %result
+}
+
+define { i64, i1 } @cmpxchg_flat_agent_i64__noprivate(ptr %ptr, i64 %val, i64 
%swap) {
+; CHECK-LABEL: define { i64, i1 } @cmpxchg_flat_agent_i64__noprivate(
+; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]], i64 [[SWAP:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL]], i64 
[[SWAP]] syncscope("agent") monotonic seq_cst, align 8, !noalias.addrspace 
[[META0]]
+; CHECK-NEXT:    ret { i64, i1 } [[RESULT]]
+;
+  %result = cmpxchg ptr %ptr, i64 %val, i64 %swap syncscope("agent") monotonic 
seq_cst, !noalias.addrspace !0
+  ret { i64, i1 } %result
+}
+
+define { i64, i1 } @cmpxchg_flat_agent_i64__nolocal(ptr %ptr, i64 %val, i64 
%swap) {
+; CHECK-LABEL: define { i64, i1 } @cmpxchg_flat_agent_i64__nolocal(
+; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]], i64 [[SWAP:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL]], i64 
[[SWAP]] syncscope("agent") monotonic seq_cst, align 8, !noalias.addrspace 
[[META1:![0-9]+]]
+; CHECK-NEXT:    ret { i64, i1 } [[RESULT]]
+;
+  %result = cmpxchg ptr %ptr, i64 %val, i64 %swap syncscope("agent") monotonic 
seq_cst, !noalias.addrspace !1
+  ret { i64, i1 } %result
+}
+
+define { i64, i1 } @cmpxchg_flat_agent_i64_mmra(ptr %ptr, i64 %val, i64 %swap) 
{
+; CHECK-LABEL: define { i64, i1 } @cmpxchg_flat_agent_i64_mmra(
+; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]], i64 [[SWAP:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL]], i64 
[[SWAP]] syncscope("agent") monotonic seq_cst, align 8, !mmra [[META2:![0-9]+]]
+; CHECK-NEXT:    ret { i64, i1 } [[RESULT]]
+;
+  %result = cmpxchg ptr %ptr, i64 %val, i64 %swap syncscope("agent") monotonic 
seq_cst, !mmra !4
+  ret { i64, i1 } %result
+}
+
+define { i64, i1 } @cmpxchg_flat_agent_i64_mmra_noprivate(ptr %ptr, i64 %val, 
i64 %swap) {
+; CHECK-LABEL: define { i64, i1 } @cmpxchg_flat_agent_i64_mmra_noprivate(
+; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]], i64 [[SWAP:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL]], i64 
[[SWAP]] syncscope("agent") monotonic seq_cst, align 8, !mmra [[META2]], 
!noalias.addrspace [[META1]]
+; CHECK-NEXT:    ret { i64, i1 } [[RESULT]]
+;
+  %result = cmpxchg ptr %ptr, i64 %val, i64 %swap syncscope("agent") monotonic 
seq_cst, !noalias.addrspace !1, !mmra !4
+  ret { i64, i1 } %result
+}
+
+!0 = !{i32 5, i32 6}
+!1 = !{i32 3, i32 4}
+!2 = !{!"foo", !"bar"}
+!3 = !{!"bux", !"baz"}
+!4 = !{!2, !3}
+!5 = !{}
+
+
+;.
+; CHECK: [[META0]] = !{i32 5, i32 6}
+; CHECK: [[META1]] = !{i32 3, i32 4}
+; CHECK: [[META2]] = !{[[META3:![0-9]+]], [[META4:![0-9]+]]}
+; CHECK: [[META3]] = !{!"foo", !"bar"}
+; CHECK: [[META4]] = !{!"bux", !"baz"}
+;.

_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to