https://github.com/wenju-he updated https://github.com/llvm/llvm-project/pull/181311
>From b025d155d33c7e5bc1561efb6819de94f17a04cf Mon Sep 17 00:00:00 2001 From: Wenju He <[email protected]> Date: Fri, 13 Feb 2026 06:56:53 +0100 Subject: [PATCH 1/2] [libclc] Fix memory_scope and memory_order of *mem_fence builtins See OpenCL spec 6.15.12.5. https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#fences // Older syntax memory fences are equivalent to atomic_work_item_fence with the // same flags parameter, memory_scope_work_group scope, and ordering as follows: void mem_fence(cl_mem_fence_flags flags) // memory_order_acq_rel void read_mem_fence(cl_mem_fence_flags flags) // memory_order_acquire void write_mem_fence(cl_mem_fence_flags flags) // memory_order_release --- libclc/opencl/lib/amdgcn/mem_fence/fence.cl | 15 ++++++++++----- libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl | 13 ++++++++++--- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl index 7e5d97bc6de62..38fb15c2c1de8 100644 --- a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl +++ b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl @@ -10,17 +10,22 @@ #include <clc/opencl/synchronization/utils.h> _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) { - int memory_scope = __opencl_get_memory_scope(flags); - int memory_order = __ATOMIC_SEQ_CST; + int memory_scope = __MEMORY_SCOPE_WRKGRP; + int memory_order = __ATOMIC_ACQ_REL; __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); __clc_mem_fence(memory_scope, memory_order, memory_semantics); } -// We don't have separate mechanism for read and write fences _CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) { - mem_fence(flags); + int memory_scope = __MEMORY_SCOPE_WRKGRP; + int memory_order = __ATOMIC_ACQUIRE; + __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); + __clc_mem_fence(memory_scope, memory_order, memory_semantics); } _CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) { - mem_fence(flags); + int memory_scope = __MEMORY_SCOPE_WRKGRP; + int memory_order = __ATOMIC_RELEASE; + __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); + __clc_mem_fence(memory_scope, memory_order, memory_semantics); } diff --git a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl index c799cf2ad7dde..5226f0c6edf40 100644 --- a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl +++ b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl @@ -10,17 +10,24 @@ #include <clc/opencl/synchronization/utils.h> _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) { - int memory_scope = __opencl_get_memory_scope(flags); - int memory_order = __ATOMIC_SEQ_CST; + int memory_scope = __MEMORY_SCOPE_WRKGRP; + int memory_order = __ATOMIC_ACQ_REL; __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); __clc_mem_fence(memory_scope, memory_order, memory_semantics); } -// We do not have separate mechanism for read and write fences. _CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) { + int memory_scope = __MEMORY_SCOPE_WRKGRP; + int memory_order = __ATOMIC_ACQUIRE; + __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); + __clc_mem_fence(memory_scope, memory_order, memory_semantics); mem_fence(flags); } _CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) { + int memory_scope = __MEMORY_SCOPE_WRKGRP; + int memory_order = __ATOMIC_RELEASE; + __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); + __clc_mem_fence(memory_scope, memory_order, memory_semantics); mem_fence(flags); } >From b0a29573d451733a32a2c2b1a5cb947200cd5bbb Mon Sep 17 00:00:00 2001 From: Wenju He <[email protected]> Date: Fri, 13 Feb 2026 14:04:23 +0800 Subject: [PATCH 2/2] Update libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl Co-authored-by: Copilot <[email protected]> --- libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl | 1 - 1 file changed, 1 deletion(-) diff --git a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl index 5226f0c6edf40..fa7f3c931370d 100644 --- a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl +++ b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl @@ -21,7 +21,6 @@ _CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) { int memory_order = __ATOMIC_ACQUIRE; __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); __clc_mem_fence(memory_scope, memory_order, memory_semantics); - mem_fence(flags); } _CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) { _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
