https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/197862
>From bcca9aff5296595e285d2ccc6a43ac6b7d8b1cef Mon Sep 17 00:00:00 2001 From: jofrn <[email protected]> Date: Thu, 14 May 2026 20:42:42 -0700 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding store atomic vector AtomicExpand fails for aligned \`store atomic <n x T>\` because it does not find a compatible library call. This change adds appropriate ptrtoint + bitcast so that the call can be lowered, mirroring the load-side handling from #148900. --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 6 +- llvm/test/CodeGen/ARM/atomic-load-store.ll | 49 ++++++++ llvm/test/CodeGen/X86/atomic-load-store.ll | 105 +++++++++++++++++- .../X86/expand-atomic-non-integer.ll | 98 ++++++++++++++++ 4 files changed, 250 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 960d2492c2856..db048e0c5ab5c 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -711,7 +711,9 @@ StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) { auto *M = SI->getModule(); Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(), M->getDataLayout()); - Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy); + Value *NewVal = SI->getValueOperand()->getType()->isPtrOrPtrVectorTy() + ? Builder.CreatePtrToInt(SI->getValueOperand(), NewTy) + : Builder.CreateBitCast(SI->getValueOperand(), NewTy); Value *Addr = SI->getPointerOperand(); @@ -2191,7 +2193,7 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( if (ValueOperand) { if (UseSizedLibcall) { Value *IntValue = - Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy); + Builder.CreateBitPreservingCastChain(DL, ValueOperand, SizedIntTy); Args.push_back(IntValue); } else { AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType()); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 1af2832702296..0c787a4ca05c3 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -1038,3 +1038,52 @@ define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 ret <1 x ptr> %ret } + +define void @store_atomic_vec1_ptr(ptr %x, <1 x ptr> %v) #0 { +; ARM-LABEL: store_atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT: dmb ish +; ARM-NEXT: str r1, [r0] +; ARM-NEXT: bx lr +; +; ARMOPTNONE-LABEL: store_atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT: dmb ish +; ARMOPTNONE-NEXT: str r1, [r0] +; ARMOPTNONE-NEXT: bx lr +; +; THUMBTWO-LABEL: store_atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT: dmb ish +; THUMBTWO-NEXT: str r1, [r0] +; THUMBTWO-NEXT: bx lr +; +; THUMBONE-LABEL: store_atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT: push {r7, lr} +; THUMBONE-NEXT: bl __sync_lock_test_and_set_4 +; THUMBONE-NEXT: pop {r7, pc} +; +; ARMV4-LABEL: store_atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT: push {r11, lr} +; ARMV4-NEXT: mov r2, #3 +; ARMV4-NEXT: bl __atomic_store_4 +; ARMV4-NEXT: pop {r11, lr} +; ARMV4-NEXT: mov pc, lr +; +; ARMV6-LABEL: store_atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT: mov r2, #0 +; ARMV6-NEXT: mcr p15, #0, r2, c7, c10, #5 +; ARMV6-NEXT: str r1, [r0] +; ARMV6-NEXT: bx lr +; +; THUMBM-LABEL: store_atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT: dmb sy +; THUMBM-NEXT: str r1, [r0] +; THUMBM-NEXT: bx lr + store atomic <1 x ptr> %v, ptr %x release, align 4 + ret void +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index c09f7e5ba3db4..40bf914bdf854 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -657,6 +657,53 @@ define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 ret <2 x ptr> %ret } + +define void @store_atomic_vec2_ptr_align(ptr %x, <2 x ptr> %v) nounwind { +; CHECK-SSE2-O3-LABEL: store_atomic_vec2_ptr_align: +; CHECK-SSE2-O3: # %bb.0: +; CHECK-SSE2-O3-NEXT: pushq %rax +; CHECK-SSE2-O3-NEXT: movq %xmm0, %rsi +; CHECK-SSE2-O3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; CHECK-SSE2-O3-NEXT: movq %xmm0, %rdx +; CHECK-SSE2-O3-NEXT: movl $3, %ecx +; CHECK-SSE2-O3-NEXT: callq __atomic_store_16@PLT +; CHECK-SSE2-O3-NEXT: popq %rax +; CHECK-SSE2-O3-NEXT: retq +; +; CHECK-SSE4-O3-LABEL: store_atomic_vec2_ptr_align: +; CHECK-SSE4-O3: # %bb.0: +; CHECK-SSE4-O3-NEXT: movaps %xmm0, (%rdi) +; CHECK-SSE4-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: store_atomic_vec2_ptr_align: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: vmovaps %xmm0, (%rdi) +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-SSE2-O0-LABEL: store_atomic_vec2_ptr_align: +; CHECK-SSE2-O0: # %bb.0: +; CHECK-SSE2-O0-NEXT: pushq %rax +; CHECK-SSE2-O0-NEXT: movq %xmm0, %rsi +; CHECK-SSE2-O0-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; CHECK-SSE2-O0-NEXT: movq %xmm0, %rdx +; CHECK-SSE2-O0-NEXT: movl $3, %ecx +; CHECK-SSE2-O0-NEXT: callq __atomic_store_16@PLT +; CHECK-SSE2-O0-NEXT: popq %rax +; CHECK-SSE2-O0-NEXT: retq +; +; CHECK-SSE4-O0-LABEL: store_atomic_vec2_ptr_align: +; CHECK-SSE4-O0: # %bb.0: +; CHECK-SSE4-O0-NEXT: movaps %xmm0, (%rdi) +; CHECK-SSE4-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: store_atomic_vec2_ptr_align: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: vmovdqa %xmm0, (%rdi) +; CHECK-AVX-O0-NEXT: retq + store atomic <2 x ptr> %v, ptr %x release, align 16 + ret void +} + define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind { ; CHECK-SSE2-O3-LABEL: atomic_vec4_ptr270: ; CHECK-SSE2-O3: # %bb.0: @@ -703,6 +750,52 @@ define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind { ret <4 x ptr addrspace(270)> %ret } +define void @store_atomic_vec4_ptr270_align(ptr %x, <4 x ptr addrspace(270)> %v) nounwind { +; CHECK-SSE2-O3-LABEL: store_atomic_vec4_ptr270_align: +; CHECK-SSE2-O3: # %bb.0: +; CHECK-SSE2-O3-NEXT: pushq %rax +; CHECK-SSE2-O3-NEXT: movq %xmm0, %rsi +; CHECK-SSE2-O3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; CHECK-SSE2-O3-NEXT: movq %xmm0, %rdx +; CHECK-SSE2-O3-NEXT: movl $3, %ecx +; CHECK-SSE2-O3-NEXT: callq __atomic_store_16@PLT +; CHECK-SSE2-O3-NEXT: popq %rax +; CHECK-SSE2-O3-NEXT: retq +; +; CHECK-SSE4-O3-LABEL: store_atomic_vec4_ptr270_align: +; CHECK-SSE4-O3: # %bb.0: +; CHECK-SSE4-O3-NEXT: movaps %xmm0, (%rdi) +; CHECK-SSE4-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: store_atomic_vec4_ptr270_align: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: vmovaps %xmm0, (%rdi) +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-SSE2-O0-LABEL: store_atomic_vec4_ptr270_align: +; CHECK-SSE2-O0: # %bb.0: +; CHECK-SSE2-O0-NEXT: pushq %rax +; CHECK-SSE2-O0-NEXT: movq %xmm0, %rsi +; CHECK-SSE2-O0-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; CHECK-SSE2-O0-NEXT: movq %xmm0, %rdx +; CHECK-SSE2-O0-NEXT: movl $3, %ecx +; CHECK-SSE2-O0-NEXT: callq __atomic_store_16@PLT +; CHECK-SSE2-O0-NEXT: popq %rax +; CHECK-SSE2-O0-NEXT: retq +; +; CHECK-SSE4-O0-LABEL: store_atomic_vec4_ptr270_align: +; CHECK-SSE4-O0: # %bb.0: +; CHECK-SSE4-O0-NEXT: movaps %xmm0, (%rdi) +; CHECK-SSE4-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: store_atomic_vec4_ptr270_align: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: vmovdqa %xmm0, (%rdi) +; CHECK-AVX-O0-NEXT: retq + store atomic <4 x ptr addrspace(270)> %v, ptr %x release, align 16 + ret void +} + define <2 x i32> @atomic_vec2_i32_align(ptr %x) { ; CHECK-SSE-O3-LABEL: atomic_vec2_i32_align: ; CHECK-SSE-O3: # %bb.0: @@ -1084,7 +1177,7 @@ define void @store_atomic_vec4_float_align(ptr %x, <4 x float> %v) nounwind { ; CHECK-SSE4-O3-NEXT: pextrq $1, %xmm0, %rcx ; CHECK-SSE4-O3-NEXT: movq %xmm0, %rbx ; CHECK-SSE4-O3-NEXT: .p2align 4 -; CHECK-SSE4-O3-NEXT: .LBB39_1: # %atomicrmw.start +; CHECK-SSE4-O3-NEXT: .LBB41_1: # %atomicrmw.start ; CHECK-SSE4-O3-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-SSE4-O3-NEXT: movq %xmm1, %rax ; CHECK-SSE4-O3-NEXT: pextrq $1, %xmm1, %rdx @@ -1092,7 +1185,7 @@ define void @store_atomic_vec4_float_align(ptr %x, <4 x float> %v) nounwind { ; CHECK-SSE4-O3-NEXT: movq %rdx, %xmm0 ; CHECK-SSE4-O3-NEXT: movq %rax, %xmm1 ; CHECK-SSE4-O3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; CHECK-SSE4-O3-NEXT: jne .LBB39_1 +; CHECK-SSE4-O3-NEXT: jne .LBB41_1 ; CHECK-SSE4-O3-NEXT: # %bb.2: # %atomicrmw.end ; CHECK-SSE4-O3-NEXT: popq %rbx ; CHECK-SSE4-O3-NEXT: retq @@ -1120,7 +1213,7 @@ define void @store_atomic_vec4_float_align(ptr %x, <4 x float> %v) nounwind { ; CHECK-SSE4-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-SSE4-O0-NEXT: movaps (%rdi), %xmm0 ; CHECK-SSE4-O0-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-SSE4-O0-NEXT: .LBB39_1: # %atomicrmw.start +; CHECK-SSE4-O0-NEXT: .LBB41_1: # %atomicrmw.start ; CHECK-SSE4-O0-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-SSE4-O0-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-SSE4-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload @@ -1137,9 +1230,9 @@ define void @store_atomic_vec4_float_align(ptr %x, <4 x float> %v) nounwind { ; CHECK-SSE4-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-SSE4-O0-NEXT: testb $1, %al ; CHECK-SSE4-O0-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-SSE4-O0-NEXT: jne .LBB39_2 -; CHECK-SSE4-O0-NEXT: jmp .LBB39_1 -; CHECK-SSE4-O0-NEXT: .LBB39_2: # %atomicrmw.end +; CHECK-SSE4-O0-NEXT: jne .LBB41_2 +; CHECK-SSE4-O0-NEXT: jmp .LBB41_1 +; CHECK-SSE4-O0-NEXT: .LBB41_2: # %atomicrmw.end ; CHECK-SSE4-O0-NEXT: popq %rbx ; CHECK-SSE4-O0-NEXT: retq ; diff --git a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll index bb1cb5a0eaee6..142ebefa5439a 100644 --- a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll +++ b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll @@ -646,3 +646,101 @@ define void @store_i128_volatile_syncscope(ptr %p, i128 %x) { store atomic volatile i128 %x, ptr %p syncscope("singlethread") seq_cst, align 16 ret void } + +define void @store_atomic_vec2_ptr_align(ptr %x, <2 x ptr> %v) nounwind { +; CHECK64-LABEL: define void @store_atomic_vec2_ptr_align( +; CHECK64-SAME: ptr [[X:%.*]], <2 x ptr> [[V:%.*]]) #[[ATTR0]] { +; CHECK64-NEXT: [[TMP1:%.*]] = ptrtoint <2 x ptr> [[V]] to <2 x i64> +; CHECK64-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK64-NEXT: call void @__atomic_store_16(ptr [[X]], i128 [[TMP2]], i32 3) +; CHECK64-NEXT: ret void +; +; CHECK32-LABEL: define void @store_atomic_vec2_ptr_align( +; CHECK32-SAME: ptr [[X:%.*]], <2 x ptr> [[V:%.*]]) #[[ATTR0]] { +; CHECK32-NEXT: store atomic <2 x ptr> [[V]], ptr [[X]] release, align 16 +; CHECK32-NEXT: ret void +; + store atomic <2 x ptr> %v, ptr %x release, align 16 + ret void +} + +define void @store_atomic_vec4_ptr270_align(ptr %x, <4 x ptr addrspace(270)> %v) nounwind { +; CHECK64-LABEL: define void @store_atomic_vec4_ptr270_align( +; CHECK64-SAME: ptr [[X:%.*]], <4 x ptr addrspace(270)> [[V:%.*]]) #[[ATTR0]] { +; CHECK64-NEXT: [[TMP1:%.*]] = ptrtoint <4 x ptr addrspace(270)> [[V]] to <4 x i32> +; CHECK64-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK64-NEXT: call void @__atomic_store_16(ptr [[X]], i128 [[TMP2]], i32 3) +; CHECK64-NEXT: ret void +; +; CHECK32-LABEL: define void @store_atomic_vec4_ptr270_align( +; CHECK32-SAME: ptr [[X:%.*]], <4 x ptr addrspace(270)> [[V:%.*]]) #[[ATTR0]] { +; CHECK32-NEXT: [[TMP1:%.*]] = alloca <4 x ptr addrspace(270)>, align 16 +; CHECK32-NEXT: call void @llvm.lifetime.start.p0(ptr [[TMP1]]) +; CHECK32-NEXT: store <4 x ptr addrspace(270)> [[V]], ptr [[TMP1]], align 16 +; CHECK32-NEXT: call void @__atomic_store(i32 16, ptr [[X]], ptr [[TMP1]], i32 3) +; CHECK32-NEXT: call void @llvm.lifetime.end.p0(ptr [[TMP1]]) +; CHECK32-NEXT: ret void +; + store atomic <4 x ptr addrspace(270)> %v, ptr %x release, align 16 + ret void +} + +define void @store_atomic_vec2_i16(ptr %x, <2 x i16> %v) nounwind { +; CHECK-LABEL: define void @store_atomic_vec2_i16( +; CHECK-SAME: ptr [[X:%.*]], <2 x i16> [[V:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store atomic <2 x i16> [[V]], ptr [[X]] release, align 8 +; CHECK-NEXT: ret void +; + store atomic <2 x i16> %v, ptr %x release, align 8 + ret void +} + +define void @store_atomic_vec2_half(ptr %x, <2 x half> %v) nounwind { +; CHECK-LABEL: define void @store_atomic_vec2_half( +; CHECK-SAME: ptr [[X:%.*]], <2 x half> [[V:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store atomic <2 x half> [[V]], ptr [[X]] release, align 8 +; CHECK-NEXT: ret void +; + store atomic <2 x half> %v, ptr %x release, align 8 + ret void +} + +define void @store_atomic_vec4_i32(ptr %x, <4 x i32> %v) nounwind { +; CHECK64-LABEL: define void @store_atomic_vec4_i32( +; CHECK64-SAME: ptr [[X:%.*]], <4 x i32> [[V:%.*]]) #[[ATTR0]] { +; CHECK64-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V]] to i128 +; CHECK64-NEXT: call void @__atomic_store_16(ptr [[X]], i128 [[TMP1]], i32 3) +; CHECK64-NEXT: ret void +; +; CHECK32-LABEL: define void @store_atomic_vec4_i32( +; CHECK32-SAME: ptr [[X:%.*]], <4 x i32> [[V:%.*]]) #[[ATTR0]] { +; CHECK32-NEXT: [[TMP1:%.*]] = alloca <4 x i32>, align 16 +; CHECK32-NEXT: call void @llvm.lifetime.start.p0(ptr [[TMP1]]) +; CHECK32-NEXT: store <4 x i32> [[V]], ptr [[TMP1]], align 16 +; CHECK32-NEXT: call void @__atomic_store(i32 16, ptr [[X]], ptr [[TMP1]], i32 3) +; CHECK32-NEXT: call void @llvm.lifetime.end.p0(ptr [[TMP1]]) +; CHECK32-NEXT: ret void +; + store atomic <4 x i32> %v, ptr %x release, align 16 + ret void +} + +define void @store_atomic_vec4_float(ptr %x, <4 x float> %v) nounwind { +; CHECK64-LABEL: define void @store_atomic_vec4_float( +; CHECK64-SAME: ptr [[X:%.*]], <4 x float> [[V:%.*]]) #[[ATTR0]] { +; CHECK64-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V]] to i128 +; CHECK64-NEXT: call void @__atomic_store_16(ptr [[X]], i128 [[TMP1]], i32 3) +; CHECK64-NEXT: ret void +; +; CHECK32-LABEL: define void @store_atomic_vec4_float( +; CHECK32-SAME: ptr [[X:%.*]], <4 x float> [[V:%.*]]) #[[ATTR0]] { +; CHECK32-NEXT: [[TMP1:%.*]] = alloca <4 x float>, align 16 +; CHECK32-NEXT: call void @llvm.lifetime.start.p0(ptr [[TMP1]]) +; CHECK32-NEXT: store <4 x float> [[V]], ptr [[TMP1]], align 16 +; CHECK32-NEXT: call void @__atomic_store(i32 16, ptr [[X]], ptr [[TMP1]], i32 3) +; CHECK32-NEXT: call void @llvm.lifetime.end.p0(ptr [[TMP1]]) +; CHECK32-NEXT: ret void +; + store atomic <4 x float> %v, ptr %x release, align 16 + ret void +} _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
