Author: Joseph Huber Date: 2026-03-16T08:15:49-05:00 New Revision: 50f471fc62b2abfd102160fadd6855244eb76cca
URL: https://github.com/llvm/llvm-project/commit/50f471fc62b2abfd102160fadd6855244eb76cca DIFF: https://github.com/llvm/llvm-project/commit/50f471fc62b2abfd102160fadd6855244eb76cca.diff LOG: [libclc] Add generic clc_mem_fence instruction (#185889) Summary: This can be made generic, which works as expected on NVPTX and SPIR-V. We do not replace this for AMDGPU because the dedicated built-in has an extra argument that controls whether or not local memory or global memory will be invalidated. It would be correct to use this generic operation there, but we'd lose that minor optimization so we likely should not regress. Added: libclc/clc/lib/generic/mem_fence/clc_mem_fence.cl Modified: libclc/clc/lib/generic/CMakeLists.txt libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt Removed: libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl ################################################################################ diff --git a/libclc/clc/lib/generic/CMakeLists.txt b/libclc/clc/lib/generic/CMakeLists.txt index 07200536328f3..70a0863524b19 100644 --- a/libclc/clc/lib/generic/CMakeLists.txt +++ b/libclc/clc/lib/generic/CMakeLists.txt @@ -163,6 +163,7 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES math/clc_tanpi.cl math/clc_tgamma.cl math/clc_trunc.cl + mem_fence/clc_mem_fence.cl misc/clc_shuffle.cl misc/clc_shuffle2.cl relational/clc_all.cl diff --git a/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl b/libclc/clc/lib/generic/mem_fence/clc_mem_fence.cl similarity index 83% rename from libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl rename to libclc/clc/lib/generic/mem_fence/clc_mem_fence.cl index afc72e831cd99..5c5185292184c 100644 --- a/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl +++ b/libclc/clc/lib/generic/mem_fence/clc_mem_fence.cl @@ -11,8 +11,6 @@ _CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(int memory_scope, int memory_order, __CLC_MemorySemantics memory_semantics) { - (void)memory_order; (void)memory_semantics; - if (memory_scope & (__MEMORY_SCOPE_DEVICE | __MEMORY_SCOPE_WRKGRP)) - __nvvm_membar_cta(); + __scoped_atomic_thread_fence(memory_scope, memory_order); } diff --git a/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt b/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt index f345007e852e2..6eb0baab1c0bb 100644 --- a/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt +++ b/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt @@ -4,7 +4,6 @@ libclc_configure_source_list(CLC_PTX_NVIDIACL_SOURCES math/clc_rsqrt.cl math/clc_sinpi.cl math/clc_sqrt.cl - mem_fence/clc_mem_fence.cl relational/clc_isinf.cl synchronization/clc_work_group_barrier.cl workitem/clc_get_global_id.cl _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
