Author: Wenju He
Date: 2025-08-06T09:49:28+08:00
New Revision: af16fc2e2a50c1cbac49726ea70739ad6e193729

URL: 
https://github.com/llvm/llvm-project/commit/af16fc2e2a50c1cbac49726ea70739ad6e193729
DIFF: 
https://github.com/llvm/llvm-project/commit/af16fc2e2a50c1cbac49726ea70739ad6e193729.diff

LOG: [libclc] Move mem_fence and barrier to clc library (#151446)

__clc_mem_fence and __clc_work_group_barrier function have two
parameters memory_scope and memory_order. The design allows the clc
functions to implement SPIR-V ControlBarrier and MemoryBarrier
functions in the future.

The default memory ordering in clc is set to __ATOMIC_SEQ_CST, which is
also the default and strongest ordering in OpenCL and C++.

OpenCL cl_mem_fence_flags parameter is converted to combination of
__MEMORY_SCOPE_DEVICE and __MEMORY_SCOPE_WRKGRP, which is passed to clc.

llvm-diff shows no change to nvptx64--nvidiacl.bc.
llvm-diff show a small change to amdgcn--amdhsa.bc and the number of
LLVM IR instruction is reduced by 1: https://alive2.llvm.org/ce/z/_Uhqvt

Added: 
    libclc/clc/include/clc/mem_fence/clc_mem_fence.h
    libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
    libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl
    libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
    libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl
    libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
    libclc/opencl/include/clc/opencl/synchronization/utils.h

Modified: 
    libclc/clc/lib/amdgcn/SOURCES
    libclc/clc/lib/ptx-nvidiacl/SOURCES
    libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h
    libclc/opencl/lib/amdgcn/mem_fence/fence.cl
    libclc/opencl/lib/amdgcn/synchronization/barrier.cl
    libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
    libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl

Removed: 
    


################################################################################
diff  --git a/libclc/clc/include/clc/mem_fence/clc_mem_fence.h 
b/libclc/clc/include/clc/mem_fence/clc_mem_fence.h
new file mode 100644
index 0000000000000..2321634c76842
--- /dev/null
+++ b/libclc/clc/include/clc/mem_fence/clc_mem_fence.h
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MEM_FENCE_CLC_MEM_FENCE_H__
+#define __CLC_MEM_FENCE_CLC_MEM_FENCE_H__
+
+#include <clc/internal/clc.h>
+
+_CLC_OVERLOAD _CLC_DECL void __clc_mem_fence(int memory_scope,
+                                             int memory_order);
+
+#endif // __CLC_MEM_FENCE_CLC_MEM_FENCE_H__

diff  --git a/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h 
b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
new file mode 100644
index 0000000000000..5f864e1057b8b
--- /dev/null
+++ b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__
+#define __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__
+
+#include <clc/internal/clc.h>
+
+_CLC_OVERLOAD _CLC_DECL void __clc_work_group_barrier(int memory_scope,
+                                                      int memory_order);
+
+#endif // __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__

diff  --git a/libclc/clc/lib/amdgcn/SOURCES b/libclc/clc/lib/amdgcn/SOURCES
index d91f08533e149..76c3266e3af7b 100644
--- a/libclc/clc/lib/amdgcn/SOURCES
+++ b/libclc/clc/lib/amdgcn/SOURCES
@@ -1,4 +1,6 @@
 math/clc_ldexp_override.cl
+mem_fence/clc_mem_fence.cl
+synchronization/clc_work_group_barrier.cl
 workitem/clc_get_global_offset.cl
 workitem/clc_get_global_size.cl
 workitem/clc_get_group_id.cl

diff  --git a/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl 
b/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl
new file mode 100644
index 0000000000000..9e6460313718e
--- /dev/null
+++ b/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl
@@ -0,0 +1,37 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/mem_fence/clc_mem_fence.h>
+
+void __clc_amdgcn_s_waitcnt(unsigned flags);
+
+// s_waitcnt takes 16bit argument with a combined number of maximum allowed
+// pending operations:
+// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages
+// [7] -- undefined
+// [6:4] -- exports, GDS, and mem write
+// [3:0] -- vector memory operations
+
+// Newer clang supports __builtin_amdgcn_s_waitcnt
+#if __clang_major__ >= 5
+#define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x)
+#else
+#define __waitcnt(x) __clc_amdgcn_s_waitcnt(x)
+_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt");
+#endif
+
+_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(int memory_scope,
+                                            int memory_order) {
+  if (memory_scope & __MEMORY_SCOPE_DEVICE) {
+    // scalar loads are counted with LGKM but we don't know whether
+    // the compiler turned any loads to scalar
+    __waitcnt(0);
+  } else if (memory_scope & __MEMORY_SCOPE_WRKGRP)
+    __waitcnt(0xff); // LGKM is [12:8]
+}
+#undef __waitcnt

diff  --git a/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl 
b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
new file mode 100644
index 0000000000000..ff3628fa7c339
--- /dev/null
+++ b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
@@ -0,0 +1,16 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/mem_fence/clc_mem_fence.h>
+#include <clc/synchronization/clc_work_group_barrier.h>
+
+_CLC_OVERLOAD _CLC_DEF void __clc_work_group_barrier(int memory_scope,
+                                                     int memory_order) {
+  __clc_mem_fence(memory_scope, memory_order);
+  __builtin_amdgcn_s_barrier();
+}

diff  --git a/libclc/clc/lib/ptx-nvidiacl/SOURCES 
b/libclc/clc/lib/ptx-nvidiacl/SOURCES
index 05368c5e4d4e3..b6f50654f89c5 100644
--- a/libclc/clc/lib/ptx-nvidiacl/SOURCES
+++ b/libclc/clc/lib/ptx-nvidiacl/SOURCES
@@ -1,3 +1,5 @@
+mem_fence/clc_mem_fence.cl
+synchronization/clc_work_group_barrier.cl
 workitem/clc_get_global_id.cl
 workitem/clc_get_group_id.cl
 workitem/clc_get_local_id.cl

diff  --git a/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl 
b/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl
new file mode 100644
index 0000000000000..b3e2375e755a2
--- /dev/null
+++ b/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/mem_fence/clc_mem_fence.h>
+
+_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(int memory_scope,
+                                            int memory_order) {
+  if (memory_scope & (__MEMORY_SCOPE_DEVICE | __MEMORY_SCOPE_WRKGRP))
+    __nvvm_membar_cta();
+}

diff  --git 
a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl 
b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
new file mode 100644
index 0000000000000..6cb37a38f06ac
--- /dev/null
+++ b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
@@ -0,0 +1,14 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/synchronization/clc_work_group_barrier.h>
+
+_CLC_OVERLOAD _CLC_DEF void __clc_work_group_barrier(int memory_scope,
+                                                     int memory_order) {
+  __syncthreads();
+}

diff  --git 
a/libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h 
b/libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h
index 6636515fca47d..7b2f701c1ff99 100644
--- a/libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h
+++ b/libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h
@@ -13,5 +13,6 @@ typedef uint cl_mem_fence_flags;
 
 #define CLK_LOCAL_MEM_FENCE 1
 #define CLK_GLOBAL_MEM_FENCE 2
+#define CLK_IMAGE_MEM_FENCE 4
 
 #endif // __CLC_OPENCL_SYNCHRONIZATION_CL_MEM_FENCE_FLAGS_H__

diff  --git a/libclc/opencl/include/clc/opencl/synchronization/utils.h 
b/libclc/opencl/include/clc/opencl/synchronization/utils.h
new file mode 100644
index 0000000000000..cf3baf28cb5f1
--- /dev/null
+++ b/libclc/opencl/include/clc/opencl/synchronization/utils.h
@@ -0,0 +1,24 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__
+#define __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__
+
+#include <clc/internal/clc.h>
+#include <clc/opencl/synchronization/cl_mem_fence_flags.h>
+
+_CLC_INLINE int getCLCMemoryScope(cl_mem_fence_flags flag) {
+  int memory_scope = 0;
+  if (flag & CLK_GLOBAL_MEM_FENCE)
+    memory_scope |= __MEMORY_SCOPE_DEVICE;
+  if (flag & CLK_LOCAL_MEM_FENCE)
+    memory_scope |= __MEMORY_SCOPE_WRKGRP;
+  return memory_scope;
+}
+
+#endif // __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__

diff  --git a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl 
b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
index 88b953005aae6..81216d6a26cf2 100644
--- a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
+++ b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
@@ -6,34 +6,15 @@
 //
 
//===----------------------------------------------------------------------===//
 
+#include <clc/mem_fence/clc_mem_fence.h>
 #include <clc/opencl/explicit_fence/explicit_memory_fence.h>
-
-void __clc_amdgcn_s_waitcnt(unsigned flags);
-
-// s_waitcnt takes 16bit argument with a combined number of maximum allowed
-// pending operations:
-// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages
-// [7] -- undefined
-// [6:4] -- exports, GDS, and mem write
-// [3:0] -- vector memory operations
-
-// Newer clang supports __builtin_amdgcn_s_waitcnt
-#if __clang_major__ >= 5
-#define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x)
-#else
-#define __waitcnt(x) __clc_amdgcn_s_waitcnt(x)
-_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt");
-#endif
+#include <clc/opencl/synchronization/utils.h>
 
 _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
-  if (flags & CLK_GLOBAL_MEM_FENCE) {
-    // scalar loads are counted with LGKM but we don't know whether
-    // the compiler turned any loads to scalar
-    __waitcnt(0);
-  } else if (flags & CLK_LOCAL_MEM_FENCE)
-    __waitcnt(0xff); // LGKM is [12:8]
+  int memory_scope = getCLCMemoryScope(flags);
+  int memory_order = __ATOMIC_SEQ_CST;
+  __clc_mem_fence(memory_scope, memory_order);
 }
-#undef __waitcnt
 
 // We don't have separate mechanism for read and write fences
 _CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {

diff  --git a/libclc/opencl/lib/amdgcn/synchronization/barrier.cl 
b/libclc/opencl/lib/amdgcn/synchronization/barrier.cl
index 5203db72f484c..c8322e602302c 100644
--- a/libclc/opencl/lib/amdgcn/synchronization/barrier.cl
+++ b/libclc/opencl/lib/amdgcn/synchronization/barrier.cl
@@ -6,10 +6,12 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#include <clc/opencl/explicit_fence/explicit_memory_fence.h>
 #include <clc/opencl/synchronization/barrier.h>
+#include <clc/opencl/synchronization/utils.h>
+#include <clc/synchronization/clc_work_group_barrier.h>
 
 _CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
-  mem_fence(flags);
-  __builtin_amdgcn_s_barrier();
+  int memory_scope = getCLCMemoryScope(flags);
+  int memory_order = __ATOMIC_SEQ_CST;
+  __clc_work_group_barrier(memory_scope, memory_order);
 }

diff  --git a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl 
b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
index d24569ecda1bc..e22ed870a7e6b 100644
--- a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
+++ b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
@@ -6,11 +6,14 @@
 //
 
//===----------------------------------------------------------------------===//
 
+#include <clc/mem_fence/clc_mem_fence.h>
 #include <clc/opencl/explicit_fence/explicit_memory_fence.h>
+#include <clc/opencl/synchronization/utils.h>
 
 _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
-  if (flags & (CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE))
-    __nvvm_membar_cta();
+  int memory_scope = getCLCMemoryScope(flags);
+  int memory_order = __ATOMIC_SEQ_CST;
+  __clc_mem_fence(memory_scope, memory_order);
 }
 
 // We do not have separate mechanism for read and write fences.

diff  --git a/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl 
b/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl
index 7c57478795dda..c8322e602302c 100644
--- a/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl
+++ b/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl
@@ -7,7 +7,11 @@
 
//===----------------------------------------------------------------------===//
 
 #include <clc/opencl/synchronization/barrier.h>
+#include <clc/opencl/synchronization/utils.h>
+#include <clc/synchronization/clc_work_group_barrier.h>
 
 _CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
-  __syncthreads();
+  int memory_scope = getCLCMemoryScope(flags);
+  int memory_order = __ATOMIC_SEQ_CST;
+  __clc_work_group_barrier(memory_scope, memory_order);
 }


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to