llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) <details> <summary>Changes</summary> HW will emulate unsupported PCIe atomics via CAS loop, we do not need to expand these anymore. --- Patch is 73.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152369.diff 5 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+9) - (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+7) - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+3-2) - (added) llvm/test/CodeGen/AMDGPU/atomics-system-scope.ll (+1486) - (modified) llvm/test/CodeGen/AMDGPU/literal64.ll (+2-18) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index d84f512f4976d..ddeca07e51103 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1013,6 +1013,14 @@ def FeatureAgentScopeFineGrainedRemoteMemoryAtomics "device memory." >; +def FeatureEmulatedSystemScopeAtomics + : SubtargetFeature<"emulated-system-scope-atomics", + "HasEmulatedSystemScopeAtomics", + "true", + "System scope atomics unsupported by the PCI-e are emulated in HW via CAS " + "loop and functional." +>; + def FeatureDefaultComponentZero : SubtargetFeature<"default-component-zero", "HasDefaultComponentZero", "true", @@ -2062,6 +2070,7 @@ def FeatureISAVersion12_50 : FeatureSet< FeatureAtomicFMinFMaxF64FlatInsts, FeatureFlatBufferGlobalAtomicFaddF64Inst, FeatureMemoryAtomicFAddF32DenormalSupport, + FeatureEmulatedSystemScopeAtomics, FeatureGloballyAddressableScratch, FeatureKernargPreload, FeatureVmemPrefInsts, diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 9114f249c92a7..1c3749d81eec8 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -187,6 +187,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasFlatBufferGlobalAtomicFaddF64Inst = false; bool HasDefaultComponentZero = false; bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false; + bool HasEmulatedSystemScopeAtomics = false; bool HasDefaultComponentBroadcast = false; bool HasXF32Insts = false; /// The maximum number of instructions that may be placed within an S_CLAUSE, @@ -950,6 +951,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return HasAgentScopeFineGrainedRemoteMemoryAtomics; } + /// \return true is HW emulates system scope atomics unsupported by the PCI-e + /// via CAS loop. + bool hasEmulatedSystemScopeAtomics() const { + return HasEmulatedSystemScopeAtomics; + } + bool hasDefaultComponentZero() const { return HasDefaultComponentZero; } bool hasDefaultComponentBroadcast() const { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 63826b782a377..8f44c03d95b43 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -17695,6 +17695,8 @@ static bool globalMemoryFPAtomicIsLegal(const GCNSubtarget &Subtarget, if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics() && RMW->hasMetadata("amdgpu.no.remote.memory")) return true; + if (Subtarget.hasEmulatedSystemScopeAtomics()) + return true; } else if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics()) return true; @@ -17942,8 +17944,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { case AtomicRMWInst::UMax: { if (AMDGPU::isFlatGlobalAddrSpace(AS) || AS == AMDGPUAS::BUFFER_FAT_POINTER) { - // Always expand system scope min/max atomics. - if (HasSystemScope) + if (HasSystemScope && !Subtarget->hasEmulatedSystemScopeAtomics()) return AtomicExpansionKind::CmpXChg; } diff --git a/llvm/test/CodeGen/AMDGPU/atomics-system-scope.ll b/llvm/test/CodeGen/AMDGPU/atomics-system-scope.ll new file mode 100644 index 0000000000000..5fc9f4a0f8038 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/atomics-system-scope.ll @@ -0,0 +1,1486 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN:llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GFX1250 %s + +define float @global_system_atomic_fadd_f32(ptr addrspace(1) %ptr, float %val) { +; GFX1250-LABEL: global_system_atomic_fadd_f32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic + ret float %result +} + +define float @global_one_as_atomic_fadd_f32(ptr addrspace(1) %ptr, float %val) { +; GFX1250-LABEL: global_one_as_atomic_fadd_f32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic + ret float %result +} + +define double @global_system_atomic_fadd_f64(ptr addrspace(1) %ptr, double %val) { +; GFX1250-LABEL: global_system_atomic_fadd_f64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val monotonic + ret double %result +} + +define double @global_one_as_atomic_fadd_f64(ptr addrspace(1) %ptr, double %val) { +; GFX1250-LABEL: global_one_as_atomic_fadd_f64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("one-as") monotonic + ret double %result +} + +define float @global_system_atomic_fmin_f32(ptr addrspace(1) %ptr, float %val) { +; GFX1250-LABEL: global_system_atomic_fmin_f32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_min_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val monotonic + ret float %result +} + +define float @global_one_as_atomic_fmin_f32(ptr addrspace(1) %ptr, float %val) { +; GFX1250-LABEL: global_one_as_atomic_fmin_f32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_min_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic + ret float %result +} + +define double @global_system_atomic_fmin_f64(ptr addrspace(1) %ptr, double %val) { +; GFX1250-LABEL: global_system_atomic_fmin_f64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_min_num_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw fmin ptr addrspace(1) %ptr, double %val monotonic + ret double %result +} + +define double @global_one_as_atomic_fmin_f64(ptr addrspace(1) %ptr, double %val) { +; GFX1250-LABEL: global_one_as_atomic_fmin_f64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_min_num_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("one-as") monotonic + ret double %result +} + +define float @global_system_atomic_fmax_f32(ptr addrspace(1) %ptr, float %val) { +; GFX1250-LABEL: global_system_atomic_fmax_f32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val monotonic + ret float %result +} + +define float @global_one_as_atomic_fmax_f32(ptr addrspace(1) %ptr, float %val) { +; GFX1250-LABEL: global_one_as_atomic_fmax_f32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic + ret float %result +} + +define double @global_system_atomic_fmax_f64(ptr addrspace(1) %ptr, double %val) { +; GFX1250-LABEL: global_system_atomic_fmax_f64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_max_num_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw fmax ptr addrspace(1) %ptr, double %val monotonic + ret double %result +} + +define double @global_one_as_atomic_fmax_f64(ptr addrspace(1) %ptr, double %val) { +; GFX1250-LABEL: global_one_as_atomic_fmax_f64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_max_num_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw fmax ptr addrspace(1) %ptr, double %val syncscope("one-as") monotonic + ret double %result +} + +define i32 @global_one_as_atomic_min_i32(ptr addrspace(1) %ptr, i32 %val) { +; GFX1250-LABEL: global_one_as_atomic_min_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_min_i32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw min ptr addrspace(1) %ptr, i32 %val syncscope("one-as") monotonic + ret i32 %result +} + +define i32 @global_system_atomic_min_i32(ptr addrspace(1) %ptr, i32 %val) { +; GFX1250-LABEL: global_system_atomic_min_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_min_i32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw min ptr addrspace(1) %ptr, i32 %val monotonic + ret i32 %result +} + +define i32 @global_one_as_atomic_max_i32(ptr addrspace(1) %ptr, i32 %val) { +; GFX1250-LABEL: global_one_as_atomic_max_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_max_i32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw max ptr addrspace(1) %ptr, i32 %val syncscope("one-as") monotonic + ret i32 %result +} + +define i32 @global_system_atomic_max_i32(ptr addrspace(1) %ptr, i32 %val) { +; GFX1250-LABEL: global_system_atomic_max_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_max_i32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw max ptr addrspace(1) %ptr, i32 %val monotonic + ret i32 %result +} + +define i32 @global_one_as_atomic_umin_i32(ptr addrspace(1) %ptr, i32 %val) { +; GFX1250-LABEL: global_one_as_atomic_umin_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_min_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw umin ptr addrspace(1) %ptr, i32 %val syncscope("one-as") monotonic + ret i32 %result +} + +define i32 @global_system_atomic_umin_i32(ptr addrspace(1) %ptr, i32 %val) { +; GFX1250-LABEL: global_system_atomic_umin_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_min_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw umin ptr addrspace(1) %ptr, i32 %val monotonic + ret i32 %result +} + +define i32 @global_one_as_atomic_umax_i32(ptr addrspace(1) %ptr, i32 %val) { +; GFX1250-LABEL: global_one_as_atomic_umax_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_max_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw umax ptr addrspace(1) %ptr, i32 %val syncscope("one-as") monotonic + ret i32 %result +} + +define i32 @global_system_atomic_umax_i32(ptr addrspace(1) %ptr, i32 %val) { +; GFX1250-LABEL: global_system_atomic_umax_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_max_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw umax ptr addrspace(1) %ptr, i32 %val monotonic + ret i32 %result +} + +define i64 @global_one_as_atomic_min_i64(ptr addrspace(1) %ptr, i64 %val) { +; GFX1250-LABEL: global_one_as_atomic_min_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_min_i64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw min ptr addrspace(1) %ptr, i64 %val syncscope("one-as") monotonic + ret i64 %result +} + +define i64 @global_system_atomic_min_i64(ptr addrspace(1) %ptr, i64 %val) { +; GFX1250-LABEL: global_system_atomic_min_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_min_i64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw min ptr addrspace(1) %ptr, i64 %val monotonic + ret i64 %result +} + +define i64 @global_one_as_atomic_max_i64(ptr addrspace(1) %ptr, i64 %val) { +; GFX1250-LABEL: global_one_as_atomic_max_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_max_i64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw max ptr addrspace(1) %ptr, i64 %val syncscope("one-as") monotonic + ret i64 %result +} + +define i64 @global_system_atomic_max_i64(ptr addrspace(1) %ptr, i64 %val) { +; GFX1250-LABEL: global_system_atomic_max_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_max_i64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw max ptr addrspace(1) %ptr, i64 %val monotonic + ret i64 %result +} + +define i64 @global_one_as_atomic_umin_i64(ptr addrspace(1) %ptr, i64 %val) { +; GFX1250-LABEL: global_one_as_atomic_umin_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_min_u64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw umin ptr addrspace(1) %ptr, i64 %val syncscope("one-as") monotonic + ret i64 %result +} + +define i64 @global_system_atomic_umin_i64(ptr addrspace(1) %ptr, i64 %val) { +; GFX1250-LABEL: global_system_atomic_umin_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_min_u64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw umin ptr addrspace(1) %ptr, i64 %val monotonic + ret i64 %result +} + +define i64 @global_one_as_atomic_umax_i64(ptr addrspace(1) %ptr, i64 %val) { +; GFX1250-LABEL: global_one_as_atomic_umax_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_max_u64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw umax ptr addrspace(1) %ptr, i64 %val syncscope("one-as") monotonic + ret i64 %result +} + +define i64 @global_system_atomic_umax_i64(ptr addrspace(1) %ptr, i64 %val) { +; GFX1250-LABEL: global_system_atomic_umax_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_atomic_max_u64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw umax ptr addrspace(1) %ptr, i64 %val monotonic + ret i64 %result +} + +define i16 @global_one_as_atomic_min_i16(ptr addrspace(1) %ptr, i16 %val) { +; GFX1250-LABEL: global_one_as_atomic_min_i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b32_e32 v3, v0 +; GFX1250-NEXT: s_mov_b32 s0, 0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_and_b32_e32 v0, -4, v3 +; GFX1250-NEXT: v_and_b32_e32 v3, 3, v3 +; GFX1250-NEXT: v_lshlrev_b32_e32 v3, 3, v3 +; GFX1250-NEXT: global_load_b32 v5, v[0:1], off +; GFX1250-NEXT: v_lshlrev_b32_e64 v4, v3, 0xffff +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_not_b32_e32 v4, v4 +; GFX1250-NEXT: .LBB28_1: ; %atomicrmw.start +; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: v_mov_b32_e32 v7, v5 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_lshrrev_b32_e32 v5, v3, v7 +; GFX1250-NEXT: v_min_i16 v5, v5, v2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_and_b32_e32 v5, 0xffff, v5 +; GFX1250-NEXT: v_lshlrev_b32_e32 v5, v3, v5 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_and_or_b32 v6, v7, v4, v5 +; GFX1250-NEXT: global_atomic_cmpswap_b32 v5, v[0:1], v[6:7], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX1250-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX1250-NEXT: s_cbranch_execnz .LBB28_1 +; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX1250-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1250-NEXT: v_lshrrev_b32_e32 v0, v3, v5 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] + %result = atomicrmw min ptr addrspace(1) %ptr, i16 %val syncscope("one-as") monotonic + ret i16 %resu... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/152369 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits