Author: Joseph Huber
Date: 2026-03-16T08:15:49-05:00
New Revision: 50f471fc62b2abfd102160fadd6855244eb76cca

URL: 
https://github.com/llvm/llvm-project/commit/50f471fc62b2abfd102160fadd6855244eb76cca
DIFF: 
https://github.com/llvm/llvm-project/commit/50f471fc62b2abfd102160fadd6855244eb76cca.diff

LOG: [libclc] Add generic clc_mem_fence instruction (#185889)

Summary:
This can be made generic, which works as expected on NVPTX and SPIR-V.
We do not replace this for AMDGPU because the dedicated built-in has an
extra argument that controls whether or not local memory or global
memory will be invalidated. It would be correct to use this generic
operation there, but we'd lose that minor optimization so we likely
should not regress.

Added: 
    libclc/clc/lib/generic/mem_fence/clc_mem_fence.cl

Modified: 
    libclc/clc/lib/generic/CMakeLists.txt
    libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt

Removed: 
    libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl


################################################################################
diff  --git a/libclc/clc/lib/generic/CMakeLists.txt 
b/libclc/clc/lib/generic/CMakeLists.txt
index 07200536328f3..70a0863524b19 100644
--- a/libclc/clc/lib/generic/CMakeLists.txt
+++ b/libclc/clc/lib/generic/CMakeLists.txt
@@ -163,6 +163,7 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES
   math/clc_tanpi.cl
   math/clc_tgamma.cl
   math/clc_trunc.cl
+  mem_fence/clc_mem_fence.cl
   misc/clc_shuffle.cl
   misc/clc_shuffle2.cl
   relational/clc_all.cl

diff  --git a/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl 
b/libclc/clc/lib/generic/mem_fence/clc_mem_fence.cl
similarity index 83%
rename from libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl
rename to libclc/clc/lib/generic/mem_fence/clc_mem_fence.cl
index afc72e831cd99..5c5185292184c 100644
--- a/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl
+++ b/libclc/clc/lib/generic/mem_fence/clc_mem_fence.cl
@@ -11,8 +11,6 @@
 _CLC_OVERLOAD _CLC_DEF void
 __clc_mem_fence(int memory_scope, int memory_order,
                 __CLC_MemorySemantics memory_semantics) {
-  (void)memory_order;
   (void)memory_semantics;
-  if (memory_scope & (__MEMORY_SCOPE_DEVICE | __MEMORY_SCOPE_WRKGRP))
-    __nvvm_membar_cta();
+  __scoped_atomic_thread_fence(memory_scope, memory_order);
 }

diff  --git a/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt 
b/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt
index f345007e852e2..6eb0baab1c0bb 100644
--- a/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt
+++ b/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt
@@ -4,7 +4,6 @@ libclc_configure_source_list(CLC_PTX_NVIDIACL_SOURCES
   math/clc_rsqrt.cl
   math/clc_sinpi.cl
   math/clc_sqrt.cl
-  mem_fence/clc_mem_fence.cl
   relational/clc_isinf.cl
   synchronization/clc_work_group_barrier.cl
   workitem/clc_get_global_id.cl


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to