Author: Wenju He Date: 2026-02-14T09:47:53+08:00 New Revision: 75fcaef3b60367e293e4208c8d88803f8650f3e1
URL: https://github.com/llvm/llvm-project/commit/75fcaef3b60367e293e4208c8d88803f8650f3e1 DIFF: https://github.com/llvm/llvm-project/commit/75fcaef3b60367e293e4208c8d88803f8650f3e1.diff LOG: [libclc] Fix memory_scope and memory_order of *mem_fence builtins (#181311) See OpenCL spec 6.15.12.5. https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#fences // Older syntax memory fences are equivalent to atomic_work_item_fence with the // same flags parameter, memory_scope_work_group scope, and ordering as follows: void mem_fence(cl_mem_fence_flags flags) // memory_order_acq_rel void read_mem_fence(cl_mem_fence_flags flags) // memory_order_acquire void write_mem_fence(cl_mem_fence_flags flags) // memory_order_release --------- Co-authored-by: Copilot <[email protected]> Added: Modified: libclc/opencl/lib/amdgcn/mem_fence/fence.cl libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl Removed: ################################################################################ diff --git a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl index 7e5d97bc6de62..38fb15c2c1de8 100644 --- a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl +++ b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl @@ -10,17 +10,22 @@ #include <clc/opencl/synchronization/utils.h> _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) { - int memory_scope = __opencl_get_memory_scope(flags); - int memory_order = __ATOMIC_SEQ_CST; + int memory_scope = __MEMORY_SCOPE_WRKGRP; + int memory_order = __ATOMIC_ACQ_REL; __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); __clc_mem_fence(memory_scope, memory_order, memory_semantics); } -// We don't have separate mechanism for read and write fences _CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) { - mem_fence(flags); + int memory_scope = __MEMORY_SCOPE_WRKGRP; + int memory_order = __ATOMIC_ACQUIRE; + __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); + __clc_mem_fence(memory_scope, memory_order, memory_semantics); } _CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) { - mem_fence(flags); + int memory_scope = __MEMORY_SCOPE_WRKGRP; + int memory_order = __ATOMIC_RELEASE; + __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); + __clc_mem_fence(memory_scope, memory_order, memory_semantics); } diff --git a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl index c799cf2ad7dde..38fb15c2c1de8 100644 --- a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl +++ b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl @@ -10,17 +10,22 @@ #include <clc/opencl/synchronization/utils.h> _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) { - int memory_scope = __opencl_get_memory_scope(flags); - int memory_order = __ATOMIC_SEQ_CST; + int memory_scope = __MEMORY_SCOPE_WRKGRP; + int memory_order = __ATOMIC_ACQ_REL; __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); __clc_mem_fence(memory_scope, memory_order, memory_semantics); } -// We do not have separate mechanism for read and write fences. _CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) { - mem_fence(flags); + int memory_scope = __MEMORY_SCOPE_WRKGRP; + int memory_order = __ATOMIC_ACQUIRE; + __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); + __clc_mem_fence(memory_scope, memory_order, memory_semantics); } _CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) { - mem_fence(flags); + int memory_scope = __MEMORY_SCOPE_WRKGRP; + int memory_order = __ATOMIC_RELEASE; + __CLC_MemorySemantics memory_semantics = __opencl_get_memory_semantics(flags); + __clc_mem_fence(memory_scope, memory_order, memory_semantics); } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
