https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716
>From 1a9eae9445c20f8f611964b14315c4d57a93b0cd Mon Sep 17 00:00:00 2001 From: jofrn <jofer...@amd.com> Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Avoid sized call when expanding load atomic vector AtomicExpand fails for aligned `load atomic <n x T>` because it does not find a compatible library call. This change marks load atomics to not use sized calls and instead resort to using `___atomic_load`. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 5 +- llvm/test/CodeGen/ARM/atomic-load-store.ll | 57 ++++++++++++++++++++ llvm/test/CodeGen/X86/atomic-load-store.ll | 61 ++++++++++++++++++---- 3 files changed, 112 insertions(+), 11 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index a75fa688d87a8d..cd9896159a77f3 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -1884,7 +1884,10 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( IRBuilder<> Builder(I); IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front()); - bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL); + const bool IsAtomic = + isa<LoadInst>(I) ? cast<LoadInst>(I)->isAtomic() : false; + const bool UseSizedLibcall = !(I->getType()->isVectorTy() && IsAtomic) && + canUseSizedAtomicCall(Size, Alignment, DL); Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8); const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29d..7f0f3008d2d5c2 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,60 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT: ldr r0, [r0] +; ARM-NEXT: dmb ish +; ARM-NEXT: bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT: ldr r0, [r0] +; ARMOPTNONE-NEXT: dmb ish +; ARMOPTNONE-NEXT: bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT: ldr r0, [r0] +; THUMBTWO-NEXT: dmb ish +; THUMBTWO-NEXT: bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT: push {r7, lr} +; THUMBONE-NEXT: movs r1, #0 +; THUMBONE-NEXT: mov r2, r1 +; THUMBONE-NEXT: bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT: pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT: push {r11, lr} +; ARMV4-NEXT: sub sp, sp, #8 +; ARMV4-NEXT: add r2, sp, #4 +; ARMV4-NEXT: mov r1, r0 +; ARMV4-NEXT: mov r0, #4 +; ARMV4-NEXT: mov r3, #2 +; ARMV4-NEXT: bl __atomic_load +; ARMV4-NEXT: ldr r0, [sp, #4] +; ARMV4-NEXT: add sp, sp, #8 +; ARMV4-NEXT: pop {r11, lr} +; ARMV4-NEXT: mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT: ldr r0, [r0] +; ARMV6-NEXT: mov r1, #0 +; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT: bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT: ldr r0, [r0] +; THUMBM-NEXT: dmb sy +; THUMBM-NEXT: bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 2242071f2cdf4a..f4adc096af1089 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -399,17 +399,58 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec2_ptr_align: +; CHECK3: ## %bb.0: +; CHECK3-NEXT: subq $24, %rsp +; CHECK3-NEXT: movq %rdi, %rsi +; CHECK3-NEXT: movq %rsp, %rdx +; CHECK3-NEXT: movl $16, %edi +; CHECK3-NEXT: movl $2, %ecx +; CHECK3-NEXT: callq ___atomic_load +; CHECK3-NEXT: movaps (%rsp), %xmm0 +; CHECK3-NEXT: addq $24, %rsp +; CHECK3-NEXT: retq +; +; CHECK0-LABEL: atomic_vec2_ptr_align: +; CHECK0: ## %bb.0: +; CHECK0-NEXT: subq $24, %rsp +; CHECK0-NEXT: movq %rdi, %rsi +; CHECK0-NEXT: movl $16, %edi +; CHECK0-NEXT: movq %rsp, %rdx +; CHECK0-NEXT: movl $2, %ecx +; CHECK0-NEXT: callq ___atomic_load +; CHECK0-NEXT: movdqa (%rsp), %xmm0 +; CHECK0-NEXT: addq $24, %rsp +; CHECK0-NEXT: retq + %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 + ret <2 x ptr> %ret +} + define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { -; CHECK-LABEL: atomic_vec4_float_align: -; CHECK: ## %bb.0: -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: movl $2, %esi -; CHECK-NEXT: callq ___atomic_load_16 -; CHECK-NEXT: movq %rdx, %xmm1 -; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; CHECK3-LABEL: atomic_vec4_float_align: +; CHECK3: ## %bb.0: +; CHECK3-NEXT: subq $24, %rsp +; CHECK3-NEXT: movq %rdi, %rsi +; CHECK3-NEXT: movq %rsp, %rdx +; CHECK3-NEXT: movl $16, %edi +; CHECK3-NEXT: movl $2, %ecx +; CHECK3-NEXT: callq ___atomic_load +; CHECK3-NEXT: movaps (%rsp), %xmm0 +; CHECK3-NEXT: addq $24, %rsp +; CHECK3-NEXT: retq +; +; CHECK0-LABEL: atomic_vec4_float_align: +; CHECK0: ## %bb.0: +; CHECK0-NEXT: subq $24, %rsp +; CHECK0-NEXT: movq %rdi, %rsi +; CHECK0-NEXT: movl $16, %edi +; CHECK0-NEXT: movq %rsp, %rdx +; CHECK0-NEXT: movl $2, %ecx +; CHECK0-NEXT: callq ___atomic_load +; CHECK0-NEXT: movaps (%rsp), %xmm0 +; CHECK0-NEXT: addq $24, %rsp +; CHECK0-NEXT: retq %ret = load atomic <4 x float>, ptr %x acquire, align 16 ret <4 x float> %ret } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits