https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/185190
None >From 9c3c86223234e46f65e30a5a055e9da2acf3b69f Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Sat, 7 Mar 2026 14:32:18 +0100 Subject: [PATCH] libclc: Use separate acquire and release fences in work_group_barrier --- .../synchronization/clc_work_group_barrier.h | 2 +- .../synchronization/clc_work_group_barrier.cl | 20 ++++++++++++++++--- .../synchronization/clc_work_group_barrier.cl | 3 +-- .../synchronization/work_group_barrier.cl | 3 +-- 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h index 34745bd47c068..e98dc38e1b0b3 100644 --- a/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h +++ b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h @@ -13,7 +13,7 @@ #include <clc/mem_fence/clc_mem_semantic.h> _CLC_OVERLOAD _CLC_DECL void -__clc_work_group_barrier(int memory_scope, int memory_order, +__clc_work_group_barrier(int memory_scope, __CLC_MemorySemantics memory_semantics); #endif // __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__ diff --git a/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl index 034e6e7bd8ed4..67b3d9b2f308b 100644 --- a/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl +++ b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl @@ -10,8 +10,22 @@ #include <clc/synchronization/clc_work_group_barrier.h> _CLC_OVERLOAD _CLC_DEF void -__clc_work_group_barrier(int memory_scope, int memory_order, +__clc_work_group_barrier(int memory_scope, __CLC_MemorySemantics memory_semantics) { - __clc_mem_fence(memory_scope, memory_order, memory_semantics); - __builtin_amdgcn_s_barrier(); + if (memory_semantics == 0) { + __builtin_amdgcn_s_barrier(); + } else { + int memory_order_before = + memory_semantics & (__CLC_MEMORY_GLOBAL | __CLC_MEMORY_LOCAL) + ? __ATOMIC_SEQ_CST + : __ATOMIC_RELEASE; + int memory_order_after = + memory_semantics & (__CLC_MEMORY_GLOBAL | __CLC_MEMORY_LOCAL) + ? __ATOMIC_SEQ_CST + : __ATOMIC_ACQUIRE; + + __clc_mem_fence(memory_scope, memory_order_before, memory_semantics); + __builtin_amdgcn_s_barrier(); + __clc_mem_fence(memory_scope, memory_order_after, memory_semantics); + } } diff --git a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl index 3afc88ca50b15..35b381052367d 100644 --- a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl +++ b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl @@ -9,10 +9,9 @@ #include <clc/synchronization/clc_work_group_barrier.h> _CLC_OVERLOAD _CLC_DEF void -__clc_work_group_barrier(int memory_scope, int memory_order, +__clc_work_group_barrier(int memory_scope, __CLC_MemorySemantics memory_semantics) { (void)memory_scope; - (void)memory_order; (void)memory_semantics; __syncthreads(); } diff --git a/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl b/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl index 14de313c4f582..595c7f8cd95a6 100644 --- a/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl +++ b/libclc/opencl/lib/generic/synchronization/work_group_barrier.cl @@ -12,9 +12,8 @@ _CLC_DEF _CLC_OVERLOAD void work_group_barrier(cl_mem_fence_flags flags, memory_scope scope) { - int memory_order = __ATOMIC_SEQ_CST; __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); - __clc_work_group_barrier(__opencl_get_clang_memory_scope(scope), memory_order, + __clc_work_group_barrier(__opencl_get_clang_memory_scope(scope), memory_semantics); } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
