https://github.com/jofrn created https://github.com/llvm/llvm-project/pull/142320
This commit casts floats to ints in an atomic load during AtomicExpand to support floating point types. It also is required to support 128 bit vectors in SSE/AVX. --- **Stack**: - #120716 - #142320 ⬅ - #138635 - #120598 - #120387 - #120386 - #120385 - #120384 ⚠️ *Part of a stack created by [spr](https://github.com/ejoffe/spr). Do not merge manually using the UI - doing so may have unexpected results.* >From d074cd39e75aed1ad8022669c825de1c5b72dbfb Mon Sep 17 00:00:00 2001 From: jofrn <jofer...@amd.com> Date: Sun, 1 Jun 2025 16:23:26 -0400 Subject: [PATCH] [X86] Cast atomic vectors in IR to support floats This commit casts floats to ints in an atomic load during AtomicExpand to support floating point types. It also is required to support 128 bit vectors in SSE/AVX. commit-id:80b9b6a7 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 7 + llvm/lib/Target/X86/X86ISelLowering.h | 2 + llvm/test/CodeGen/X86/atomic-load-store.ll | 181 +++++++++++++++++++-- 3 files changed, 172 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 776d3c0a42e2f..3debf30da0a29 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -32070,6 +32070,13 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { } } +TargetLowering::AtomicExpansionKind +X86TargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const { + if (LI->getType()->getScalarType()->isFloatingPointTy()) + return AtomicExpansionKind::CastToInteger; + return AtomicExpansionKind::None; +} + LoadInst * X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 5cb6b3e493a32..43cddb2b53bd6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1839,6 +1839,8 @@ namespace llvm { shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const; + TargetLoweringBase::AtomicExpansionKind + shouldCastAtomicLoadInIR(LoadInst *LI) const override; void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 4b818b6cfa57e..039edcbf83544 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -207,19 +207,19 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) { ; CHECK-O3-LABEL: atomic_vec1_bfloat: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movzwl (%rdi), %eax -; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-O3-NEXT: movd %eax, %xmm0 ; CHECK-O3-NEXT: retq ; ; CHECK-SSE-O3-LABEL: atomic_vec1_bfloat: ; CHECK-SSE-O3: # %bb.0: ; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax -; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-SSE-O3-NEXT: movd %eax, %xmm0 ; CHECK-SSE-O3-NEXT: retq ; ; CHECK-AVX-O3-LABEL: atomic_vec1_bfloat: ; CHECK-AVX-O3: # %bb.0: ; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax -; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0 ; CHECK-AVX-O3-NEXT: retq ; ; CHECK-O0-LABEL: atomic_vec1_bfloat: @@ -227,8 +227,7 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) { ; CHECK-O0-NEXT: movw (%rdi), %cx ; CHECK-O0-NEXT: # implicit-def: $eax ; CHECK-O0-NEXT: movw %cx, %ax -; CHECK-O0-NEXT: # implicit-def: $xmm0 -; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-O0-NEXT: movd %eax, %xmm0 ; CHECK-O0-NEXT: retq ; ; CHECK-SSE-O0-LABEL: atomic_vec1_bfloat: @@ -236,8 +235,7 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) { ; CHECK-SSE-O0-NEXT: movw (%rdi), %cx ; CHECK-SSE-O0-NEXT: # implicit-def: $eax ; CHECK-SSE-O0-NEXT: movw %cx, %ax -; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0 -; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-SSE-O0-NEXT: movd %eax, %xmm0 ; CHECK-SSE-O0-NEXT: retq ; ; CHECK-AVX-O0-LABEL: atomic_vec1_bfloat: @@ -245,8 +243,7 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) { ; CHECK-AVX-O0-NEXT: movw (%rdi), %cx ; CHECK-AVX-O0-NEXT: # implicit-def: $eax ; CHECK-AVX-O0-NEXT: movw %cx, %ax -; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0 -; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0 ; CHECK-AVX-O0-NEXT: retq %ret = load atomic <1 x bfloat>, ptr %x acquire, align 2 ret <1 x bfloat> %ret @@ -377,6 +374,74 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK-O3-LABEL: atomic_vec2_half: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec2_half: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec2_half: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec2_half: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec2_half: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec2_half: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <2 x half>, ptr %x acquire, align 4 + ret <2 x half> %ret +} + +define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) { +; CHECK-O3-LABEL: atomic_vec2_bfloat: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec2_bfloat: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec2_bfloat: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec2_bfloat: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec2_bfloat: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec2_bfloat: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <2 x bfloat>, ptr %x acquire, align 4 + ret <2 x bfloat> %ret +} + define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { ; CHECK-O3-LABEL: atomic_vec1_ptr: ; CHECK-O3: # %bb.0: @@ -457,19 +522,19 @@ define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK-O3-LABEL: atomic_vec1_half: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movzwl (%rdi), %eax -; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-O3-NEXT: movd %eax, %xmm0 ; CHECK-O3-NEXT: retq ; ; CHECK-SSE-O3-LABEL: atomic_vec1_half: ; CHECK-SSE-O3: # %bb.0: ; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax -; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-SSE-O3-NEXT: movd %eax, %xmm0 ; CHECK-SSE-O3-NEXT: retq ; ; CHECK-AVX-O3-LABEL: atomic_vec1_half: ; CHECK-AVX-O3: # %bb.0: ; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax -; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0 ; CHECK-AVX-O3-NEXT: retq ; ; CHECK-O0-LABEL: atomic_vec1_half: @@ -477,8 +542,7 @@ define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK-O0-NEXT: movw (%rdi), %cx ; CHECK-O0-NEXT: # implicit-def: $eax ; CHECK-O0-NEXT: movw %cx, %ax -; CHECK-O0-NEXT: # implicit-def: $xmm0 -; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-O0-NEXT: movd %eax, %xmm0 ; CHECK-O0-NEXT: retq ; ; CHECK-SSE-O0-LABEL: atomic_vec1_half: @@ -486,8 +550,7 @@ define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK-SSE-O0-NEXT: movw (%rdi), %cx ; CHECK-SSE-O0-NEXT: # implicit-def: $eax ; CHECK-SSE-O0-NEXT: movw %cx, %ax -; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0 -; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-SSE-O0-NEXT: movd %eax, %xmm0 ; CHECK-SSE-O0-NEXT: retq ; ; CHECK-AVX-O0-LABEL: atomic_vec1_half: @@ -495,8 +558,7 @@ define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK-AVX-O0-NEXT: movw (%rdi), %cx ; CHECK-AVX-O0-NEXT: # implicit-def: $eax ; CHECK-AVX-O0-NEXT: movw %cx, %ax -; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0 -; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0 ; CHECK-AVX-O0-NEXT: retq %ret = load atomic <1 x half>, ptr %x acquire, align 2 ret <1 x half> %ret @@ -841,6 +903,89 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind { ret <4 x i16> %ret } +define <4 x half> @atomic_vec4_half(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_half: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %xmm0 +; CHECK-NEXT: retq + %ret = load atomic <4 x half>, ptr %x acquire, align 8 + ret <4 x half> %ret +} + +define <4 x bfloat> @atomic_vec4_bfloat(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_bfloat: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %xmm0 +; CHECK-NEXT: retq + %ret = load atomic <4 x bfloat>, ptr %x acquire, align 8 + ret <4 x bfloat> %ret +} + +define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec4_float_align: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: pushq %rax +; CHECK-O3-NEXT: movl $2, %esi +; CHECK-O3-NEXT: callq __atomic_load_16@PLT +; CHECK-O3-NEXT: movq %rdx, %xmm1 +; CHECK-O3-NEXT: movq %rax, %xmm0 +; CHECK-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-O3-NEXT: popq %rax +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec4_float_align: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: pushq %rbx +; CHECK-SSE-O3-NEXT: xorl %eax, %eax +; CHECK-SSE-O3-NEXT: xorl %edx, %edx +; CHECK-SSE-O3-NEXT: xorl %ecx, %ecx +; CHECK-SSE-O3-NEXT: xorl %ebx, %ebx +; CHECK-SSE-O3-NEXT: lock cmpxchg16b (%rdi) +; CHECK-SSE-O3-NEXT: movq %rdx, %xmm1 +; CHECK-SSE-O3-NEXT: movq %rax, %xmm0 +; CHECK-SSE-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-SSE-O3-NEXT: popq %rbx +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec4_float_align: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec4_float_align: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: pushq %rax +; CHECK-O0-NEXT: movl $2, %esi +; CHECK-O0-NEXT: callq __atomic_load_16@PLT +; CHECK-O0-NEXT: movq %rdx, %xmm1 +; CHECK-O0-NEXT: movq %rax, %xmm0 +; CHECK-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-O0-NEXT: popq %rax +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec4_float_align: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: pushq %rbx +; CHECK-SSE-O0-NEXT: xorl %eax, %eax +; CHECK-SSE-O0-NEXT: movl %eax, %ebx +; CHECK-SSE-O0-NEXT: movq %rbx, %rax +; CHECK-SSE-O0-NEXT: movq %rbx, %rdx +; CHECK-SSE-O0-NEXT: movq %rbx, %rcx +; CHECK-SSE-O0-NEXT: lock cmpxchg16b (%rdi) +; CHECK-SSE-O0-NEXT: movq %rdx, %xmm1 +; CHECK-SSE-O0-NEXT: movq %rax, %xmm0 +; CHECK-SSE-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-SSE-O0-NEXT: popq %rbx +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec4_float_align: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <4 x float>, ptr %x acquire, align 16 + ret <4 x float> %ret +} + define <4 x float> @atomic_vec4_float(ptr %x) nounwind { ; CHECK-O3-LABEL: atomic_vec4_float: ; CHECK-O3: # %bb.0: _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits