llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-x86 Author: None (jofrn) <details> <summary>Changes</summary> This commit casts floats to ints in an atomic load during AtomicExpand to support floating point types. It also is required to support 128 bit vectors in SSE/AVX. --- **Stack**: - #<!-- -->120716 - #<!-- -->142320 ⬅ - #<!-- -->138635 - #<!-- -->120598 - #<!-- -->120387 - #<!-- -->120386 - #<!-- -->120385 - #<!-- -->120384 ⚠️ *Part of a stack created by [spr](https://github.com/ejoffe/spr). Do not merge manually using the UI - doing so may have unexpected results.* --- Full diff: https://github.com/llvm/llvm-project/pull/142320.diff 3 Files Affected: - (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+7) - (modified) llvm/lib/Target/X86/X86ISelLowering.h (+2) - (modified) llvm/test/CodeGen/X86/atomic-load-store.ll (+163-18) ``````````diff diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 776d3c0a42e2f..3debf30da0a29 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -32070,6 +32070,13 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { } } +TargetLowering::AtomicExpansionKind +X86TargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const { + if (LI->getType()->getScalarType()->isFloatingPointTy()) + return AtomicExpansionKind::CastToInteger; + return AtomicExpansionKind::None; +} + LoadInst * X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 5cb6b3e493a32..43cddb2b53bd6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1839,6 +1839,8 @@ namespace llvm { shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const; + TargetLoweringBase::AtomicExpansionKind + shouldCastAtomicLoadInIR(LoadInst *LI) const override; void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 4b818b6cfa57e..039edcbf83544 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -207,19 +207,19 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) { ; CHECK-O3-LABEL: atomic_vec1_bfloat: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movzwl (%rdi), %eax -; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-O3-NEXT: movd %eax, %xmm0 ; CHECK-O3-NEXT: retq ; ; CHECK-SSE-O3-LABEL: atomic_vec1_bfloat: ; CHECK-SSE-O3: # %bb.0: ; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax -; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-SSE-O3-NEXT: movd %eax, %xmm0 ; CHECK-SSE-O3-NEXT: retq ; ; CHECK-AVX-O3-LABEL: atomic_vec1_bfloat: ; CHECK-AVX-O3: # %bb.0: ; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax -; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0 ; CHECK-AVX-O3-NEXT: retq ; ; CHECK-O0-LABEL: atomic_vec1_bfloat: @@ -227,8 +227,7 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) { ; CHECK-O0-NEXT: movw (%rdi), %cx ; CHECK-O0-NEXT: # implicit-def: $eax ; CHECK-O0-NEXT: movw %cx, %ax -; CHECK-O0-NEXT: # implicit-def: $xmm0 -; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-O0-NEXT: movd %eax, %xmm0 ; CHECK-O0-NEXT: retq ; ; CHECK-SSE-O0-LABEL: atomic_vec1_bfloat: @@ -236,8 +235,7 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) { ; CHECK-SSE-O0-NEXT: movw (%rdi), %cx ; CHECK-SSE-O0-NEXT: # implicit-def: $eax ; CHECK-SSE-O0-NEXT: movw %cx, %ax -; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0 -; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-SSE-O0-NEXT: movd %eax, %xmm0 ; CHECK-SSE-O0-NEXT: retq ; ; CHECK-AVX-O0-LABEL: atomic_vec1_bfloat: @@ -245,8 +243,7 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) { ; CHECK-AVX-O0-NEXT: movw (%rdi), %cx ; CHECK-AVX-O0-NEXT: # implicit-def: $eax ; CHECK-AVX-O0-NEXT: movw %cx, %ax -; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0 -; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0 ; CHECK-AVX-O0-NEXT: retq %ret = load atomic <1 x bfloat>, ptr %x acquire, align 2 ret <1 x bfloat> %ret @@ -377,6 +374,74 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK-O3-LABEL: atomic_vec2_half: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec2_half: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec2_half: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec2_half: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec2_half: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec2_half: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <2 x half>, ptr %x acquire, align 4 + ret <2 x half> %ret +} + +define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) { +; CHECK-O3-LABEL: atomic_vec2_bfloat: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec2_bfloat: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec2_bfloat: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec2_bfloat: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec2_bfloat: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec2_bfloat: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <2 x bfloat>, ptr %x acquire, align 4 + ret <2 x bfloat> %ret +} + define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { ; CHECK-O3-LABEL: atomic_vec1_ptr: ; CHECK-O3: # %bb.0: @@ -457,19 +522,19 @@ define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK-O3-LABEL: atomic_vec1_half: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movzwl (%rdi), %eax -; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-O3-NEXT: movd %eax, %xmm0 ; CHECK-O3-NEXT: retq ; ; CHECK-SSE-O3-LABEL: atomic_vec1_half: ; CHECK-SSE-O3: # %bb.0: ; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax -; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-SSE-O3-NEXT: movd %eax, %xmm0 ; CHECK-SSE-O3-NEXT: retq ; ; CHECK-AVX-O3-LABEL: atomic_vec1_half: ; CHECK-AVX-O3: # %bb.0: ; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax -; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0 ; CHECK-AVX-O3-NEXT: retq ; ; CHECK-O0-LABEL: atomic_vec1_half: @@ -477,8 +542,7 @@ define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK-O0-NEXT: movw (%rdi), %cx ; CHECK-O0-NEXT: # implicit-def: $eax ; CHECK-O0-NEXT: movw %cx, %ax -; CHECK-O0-NEXT: # implicit-def: $xmm0 -; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-O0-NEXT: movd %eax, %xmm0 ; CHECK-O0-NEXT: retq ; ; CHECK-SSE-O0-LABEL: atomic_vec1_half: @@ -486,8 +550,7 @@ define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK-SSE-O0-NEXT: movw (%rdi), %cx ; CHECK-SSE-O0-NEXT: # implicit-def: $eax ; CHECK-SSE-O0-NEXT: movw %cx, %ax -; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0 -; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-SSE-O0-NEXT: movd %eax, %xmm0 ; CHECK-SSE-O0-NEXT: retq ; ; CHECK-AVX-O0-LABEL: atomic_vec1_half: @@ -495,8 +558,7 @@ define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK-AVX-O0-NEXT: movw (%rdi), %cx ; CHECK-AVX-O0-NEXT: # implicit-def: $eax ; CHECK-AVX-O0-NEXT: movw %cx, %ax -; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0 -; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0 ; CHECK-AVX-O0-NEXT: retq %ret = load atomic <1 x half>, ptr %x acquire, align 2 ret <1 x half> %ret @@ -841,6 +903,89 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind { ret <4 x i16> %ret } +define <4 x half> @atomic_vec4_half(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_half: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %xmm0 +; CHECK-NEXT: retq + %ret = load atomic <4 x half>, ptr %x acquire, align 8 + ret <4 x half> %ret +} + +define <4 x bfloat> @atomic_vec4_bfloat(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_bfloat: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %xmm0 +; CHECK-NEXT: retq + %ret = load atomic <4 x bfloat>, ptr %x acquire, align 8 + ret <4 x bfloat> %ret +} + +define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { +; CHECK-O3-LABEL: atomic_vec4_float_align: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: pushq %rax +; CHECK-O3-NEXT: movl $2, %esi +; CHECK-O3-NEXT: callq __atomic_load_16@PLT +; CHECK-O3-NEXT: movq %rdx, %xmm1 +; CHECK-O3-NEXT: movq %rax, %xmm0 +; CHECK-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-O3-NEXT: popq %rax +; CHECK-O3-NEXT: retq +; +; CHECK-SSE-O3-LABEL: atomic_vec4_float_align: +; CHECK-SSE-O3: # %bb.0: +; CHECK-SSE-O3-NEXT: pushq %rbx +; CHECK-SSE-O3-NEXT: xorl %eax, %eax +; CHECK-SSE-O3-NEXT: xorl %edx, %edx +; CHECK-SSE-O3-NEXT: xorl %ecx, %ecx +; CHECK-SSE-O3-NEXT: xorl %ebx, %ebx +; CHECK-SSE-O3-NEXT: lock cmpxchg16b (%rdi) +; CHECK-SSE-O3-NEXT: movq %rdx, %xmm1 +; CHECK-SSE-O3-NEXT: movq %rax, %xmm0 +; CHECK-SSE-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-SSE-O3-NEXT: popq %rbx +; CHECK-SSE-O3-NEXT: retq +; +; CHECK-AVX-O3-LABEL: atomic_vec4_float_align: +; CHECK-AVX-O3: # %bb.0: +; CHECK-AVX-O3-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-AVX-O3-NEXT: retq +; +; CHECK-O0-LABEL: atomic_vec4_float_align: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: pushq %rax +; CHECK-O0-NEXT: movl $2, %esi +; CHECK-O0-NEXT: callq __atomic_load_16@PLT +; CHECK-O0-NEXT: movq %rdx, %xmm1 +; CHECK-O0-NEXT: movq %rax, %xmm0 +; CHECK-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-O0-NEXT: popq %rax +; CHECK-O0-NEXT: retq +; +; CHECK-SSE-O0-LABEL: atomic_vec4_float_align: +; CHECK-SSE-O0: # %bb.0: +; CHECK-SSE-O0-NEXT: pushq %rbx +; CHECK-SSE-O0-NEXT: xorl %eax, %eax +; CHECK-SSE-O0-NEXT: movl %eax, %ebx +; CHECK-SSE-O0-NEXT: movq %rbx, %rax +; CHECK-SSE-O0-NEXT: movq %rbx, %rdx +; CHECK-SSE-O0-NEXT: movq %rbx, %rcx +; CHECK-SSE-O0-NEXT: lock cmpxchg16b (%rdi) +; CHECK-SSE-O0-NEXT: movq %rdx, %xmm1 +; CHECK-SSE-O0-NEXT: movq %rax, %xmm0 +; CHECK-SSE-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-SSE-O0-NEXT: popq %rbx +; CHECK-SSE-O0-NEXT: retq +; +; CHECK-AVX-O0-LABEL: atomic_vec4_float_align: +; CHECK-AVX-O0: # %bb.0: +; CHECK-AVX-O0-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-AVX-O0-NEXT: retq + %ret = load atomic <4 x float>, ptr %x acquire, align 16 + ret <4 x float> %ret +} + define <4 x float> @atomic_vec4_float(ptr %x) nounwind { ; CHECK-O3-LABEL: atomic_vec4_float: ; CHECK-O3: # %bb.0: `````````` </details> https://github.com/llvm/llvm-project/pull/142320 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits