https://github.com/wenju-he updated 
https://github.com/llvm/llvm-project/pull/151446

>From eed56d228c0613f563c23f9be23d681ef3d87f2b Mon Sep 17 00:00:00 2001
From: Wenju He <wenju...@intel.com>
Date: Thu, 31 Jul 2025 05:07:23 +0200
Subject: [PATCH 1/3] [libclc] Move mem_fence and barrier to clc library

__clc_mem_fence and __clc_barrier function have two parameters Scope and
MemorySemantics, which are defined in SPIR-V spec. The design allows the
clc functions to implement SPIR-V ControlBarrier and MemoryBarrier
functions in the future.

The default memory ordering in clc is set to SequentiallyConsistent,
which is also the default and strongest ordering in OpenCL and C++.

The default memory scope in clc is set to memory_scope_device for amdgcn
and ptx-nvidiacl since __opencl_c_atomic_scope_all_devices feature macro
is not defined for these targets.

llvm-diff shows no change to amdgcn--amdhsa.bc and nvptx64--nvidiacl.bc.
---
 .../clc/include/clc/mem_fence/clc_mem_fence.h | 18 +++++++
 .../clc/mem_fence/clc_mem_scope_semantics.h   | 36 +++++++++++++
 .../include/clc/synchronization/clc_barrier.h | 18 +++++++
 libclc/clc/lib/amdgcn/SOURCES                 |  2 +
 .../clc/lib/amdgcn/mem_fence/clc_mem_fence.cl | 37 ++++++++++++++
 .../lib/amdgcn/synchronization/clc_barrier.cl | 16 ++++++
 libclc/clc/lib/ptx-nvidiacl/SOURCES           |  2 +
 .../ptx-nvidiacl/mem_fence/clc_mem_fence.cl   | 15 ++++++
 .../synchronization/clc_barrier.cl            | 14 ++++++
 .../synchronization/cl_mem_fence_flags.h      | 27 ++++++++++
 .../clc/opencl/synchronization/utils.h        | 50 +++++++++++++++++++
 libclc/opencl/lib/amdgcn/mem_fence/fence.cl   | 29 ++---------
 .../lib/amdgcn/synchronization/barrier.cl     |  8 +--
 .../lib/ptx-nvidiacl/mem_fence/fence.cl       |  7 ++-
 .../ptx-nvidiacl/synchronization/barrier.cl   |  6 ++-
 15 files changed, 255 insertions(+), 30 deletions(-)
 create mode 100644 libclc/clc/include/clc/mem_fence/clc_mem_fence.h
 create mode 100644 libclc/clc/include/clc/mem_fence/clc_mem_scope_semantics.h
 create mode 100644 libclc/clc/include/clc/synchronization/clc_barrier.h
 create mode 100644 libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl
 create mode 100644 libclc/clc/lib/amdgcn/synchronization/clc_barrier.cl
 create mode 100644 libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl
 create mode 100644 libclc/clc/lib/ptx-nvidiacl/synchronization/clc_barrier.cl
 create mode 100644 libclc/opencl/include/clc/opencl/synchronization/utils.h

diff --git a/libclc/clc/include/clc/mem_fence/clc_mem_fence.h 
b/libclc/clc/include/clc/mem_fence/clc_mem_fence.h
new file mode 100644
index 0000000000000..f0bbd136955bd
--- /dev/null
+++ b/libclc/clc/include/clc/mem_fence/clc_mem_fence.h
@@ -0,0 +1,18 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MEM_FENCE_CLC_MEM_FENCE_H__
+#define __CLC_MEM_FENCE_CLC_MEM_FENCE_H__
+
+#include <clc/internal/clc.h>
+#include <clc/mem_fence/clc_mem_scope_semantics.h>
+
+_CLC_OVERLOAD _CLC_DECL void __clc_mem_fence(Scope scope,
+                                             MemorySemantics semantics);
+
+#endif // __CLC_MEM_FENCE_CLC_MEM_FENCE_H__
diff --git a/libclc/clc/include/clc/mem_fence/clc_mem_scope_semantics.h 
b/libclc/clc/include/clc/mem_fence/clc_mem_scope_semantics.h
new file mode 100644
index 0000000000000..7294026386b7a
--- /dev/null
+++ b/libclc/clc/include/clc/mem_fence/clc_mem_scope_semantics.h
@@ -0,0 +1,36 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MEM_FENCE_CLC_MEM_SCOPE_SEMANTICS_H__
+#define __CLC_MEM_FENCE_CLC_MEM_SCOPE_SEMANTICS_H__
+
+// Scope values are defined in SPIR-V spec.
+typedef enum Scope {
+  CrossDevice = 0,
+  Device = 1,
+  Workgroup = 2,
+  Subgroup = 3,
+  Invocation = 4,
+} Scope;
+
+// MemorySemantics values are defined in SPIR-V spec.
+typedef enum MemorySemantics {
+  None = 0x0,
+  Acquire = 0x2,
+  Release = 0x4,
+  AcquireRelease = 0x8,
+  SequentiallyConsistent = 0x10,
+  UniformMemory = 0x40,
+  SubgroupMemory = 0x80,
+  WorkgroupMemory = 0x100,
+  CrossWorkgroupMemory = 0x200,
+  AtomicCounterMemory = 0x400,
+  ImageMemory = 0x800,
+} MemorySemantics;
+
+#endif // __CLC_MEM_FENCE_CLC_MEM_SCOPE_SEMANTICS_H__
diff --git a/libclc/clc/include/clc/synchronization/clc_barrier.h 
b/libclc/clc/include/clc/synchronization/clc_barrier.h
new file mode 100644
index 0000000000000..d363652c6e14d
--- /dev/null
+++ b/libclc/clc/include/clc/synchronization/clc_barrier.h
@@ -0,0 +1,18 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_SYNCHRONIZATION_CLC_BARRIER_H__
+#define __CLC_SYNCHRONIZATION_CLC_BARRIER_H__
+
+#include <clc/internal/clc.h>
+#include <clc/mem_fence/clc_mem_scope_semantics.h>
+
+_CLC_OVERLOAD _CLC_DECL void __clc_barrier(Scope scope,
+                                           MemorySemantics semantics);
+
+#endif // __CLC_SYNCHRONIZATION_CLC_BARRIER_H__
diff --git a/libclc/clc/lib/amdgcn/SOURCES b/libclc/clc/lib/amdgcn/SOURCES
index 7bec1740f7636..f2f58e3124aa8 100644
--- a/libclc/clc/lib/amdgcn/SOURCES
+++ b/libclc/clc/lib/amdgcn/SOURCES
@@ -1,6 +1,8 @@
 math/clc_fmax.cl
 math/clc_fmin.cl
 math/clc_ldexp_override.cl
+mem_fence/clc_mem_fence.cl
+synchronization/clc_barrier.cl
 workitem/clc_get_global_offset.cl
 workitem/clc_get_global_size.cl
 workitem/clc_get_group_id.cl
diff --git a/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl 
b/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl
new file mode 100644
index 0000000000000..12ec6d8d18091
--- /dev/null
+++ b/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl
@@ -0,0 +1,37 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/mem_fence/clc_mem_fence.h>
+
+void __clc_amdgcn_s_waitcnt(unsigned flags);
+
+// s_waitcnt takes 16bit argument with a combined number of maximum allowed
+// pending operations:
+// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages
+// [7] -- undefined
+// [6:4] -- exports, GDS, and mem write
+// [3:0] -- vector memory operations
+
+// Newer clang supports __builtin_amdgcn_s_waitcnt
+#if __clang_major__ >= 5
+#define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x)
+#else
+#define __waitcnt(x) __clc_amdgcn_s_waitcnt(x)
+_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt");
+#endif
+
+_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(Scope scope,
+                                            MemorySemantics semantics) {
+  if (semantics & CrossWorkgroupMemory) {
+    // scalar loads are counted with LGKM but we don't know whether
+    // the compiler turned any loads to scalar
+    __waitcnt(0);
+  } else if (semantics & WorkgroupMemory)
+    __waitcnt(0xff); // LGKM is [12:8]
+}
+#undef __waitcnt
diff --git a/libclc/clc/lib/amdgcn/synchronization/clc_barrier.cl 
b/libclc/clc/lib/amdgcn/synchronization/clc_barrier.cl
new file mode 100644
index 0000000000000..0299a426e4d21
--- /dev/null
+++ b/libclc/clc/lib/amdgcn/synchronization/clc_barrier.cl
@@ -0,0 +1,16 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/mem_fence/clc_mem_fence.h>
+#include <clc/synchronization/clc_barrier.h>
+
+_CLC_OVERLOAD _CLC_DEF void __clc_barrier(Scope scope,
+                                          MemorySemantics semantics) {
+  __clc_mem_fence(scope, semantics);
+  __builtin_amdgcn_s_barrier();
+}
diff --git a/libclc/clc/lib/ptx-nvidiacl/SOURCES 
b/libclc/clc/lib/ptx-nvidiacl/SOURCES
index 05368c5e4d4e3..a0fb861549ebc 100644
--- a/libclc/clc/lib/ptx-nvidiacl/SOURCES
+++ b/libclc/clc/lib/ptx-nvidiacl/SOURCES
@@ -1,3 +1,5 @@
+mem_fence/clc_mem_fence.cl
+synchronization/clc_barrier.cl
 workitem/clc_get_global_id.cl
 workitem/clc_get_group_id.cl
 workitem/clc_get_local_id.cl
diff --git a/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl 
b/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl
new file mode 100644
index 0000000000000..4c0d342b7244f
--- /dev/null
+++ b/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/mem_fence/clc_mem_fence.h>
+
+_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(Scope scope,
+                                            MemorySemantics semantics) {
+  if (semantics & (CrossWorkgroupMemory | WorkgroupMemory))
+    __nvvm_membar_cta();
+}
diff --git a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_barrier.cl 
b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_barrier.cl
new file mode 100644
index 0000000000000..920b17cb02f92
--- /dev/null
+++ b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_barrier.cl
@@ -0,0 +1,14 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/synchronization/clc_barrier.h>
+
+_CLC_OVERLOAD _CLC_DEF void __clc_barrier(Scope scope,
+                                          MemorySemantics semantics) {
+  __syncthreads();
+}
diff --git 
a/libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h 
b/libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h
index 6636515fca47d..18f9a4afb2d5f 100644
--- a/libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h
+++ b/libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h
@@ -11,7 +11,34 @@
 
 typedef uint cl_mem_fence_flags;
 
+// Copied from
+// 
https://github.com/llvm/llvm-project/blob/08e40c12fa0c/clang/lib/Headers/opencl-c-base.h#L390
+typedef enum memory_scope {
+  memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
+  memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
+  memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
+#if defined(__opencl_c_atomic_scope_all_devices)
+  memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 
202100)
+  memory_scope_all_devices = memory_scope_all_svm_devices,
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >=
+       // 202100)
+#endif // defined(__opencl_c_atomic_scope_all_devices)
+/**
+ * Subgroups have different requirements on forward progress, so just test
+ * all the relevant macros.
+ * CL 3.0 sub-groups "they are not guaranteed to make independent forward
+ * progress" KHR subgroups "Subgroups within a workgroup are independent, make
+ * forward progress with respect to each other"
+ */
+#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) ||                
\
+    defined(__opencl_c_subgroups)
+  memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
+#endif
+} memory_scope;
+
 #define CLK_LOCAL_MEM_FENCE 1
 #define CLK_GLOBAL_MEM_FENCE 2
+#define CLK_IMAGE_MEM_FENCE 4
 
 #endif // __CLC_OPENCL_SYNCHRONIZATION_CL_MEM_FENCE_FLAGS_H__
diff --git a/libclc/opencl/include/clc/opencl/synchronization/utils.h 
b/libclc/opencl/include/clc/opencl/synchronization/utils.h
new file mode 100644
index 0000000000000..098d96d0a8a32
--- /dev/null
+++ b/libclc/opencl/include/clc/opencl/synchronization/utils.h
@@ -0,0 +1,50 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__
+#define __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__
+
+#include <clc/internal/clc.h>
+#include <clc/mem_fence/clc_mem_scope_semantics.h>
+#include <clc/opencl/synchronization/cl_mem_fence_flags.h>
+
+_CLC_INLINE Scope getCLCScope(memory_scope memory_scope) {
+  switch (memory_scope) {
+  case memory_scope_work_item:
+    return Invocation;
+#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) ||                
\
+    defined(__opencl_c_subgroups)
+  case memory_scope_sub_group:
+    return Subgroup;
+#endif
+  case memory_scope_work_group:
+    return Workgroup;
+  case memory_scope_device:
+    return Device;
+  default:
+    break;
+  }
+#ifdef __opencl_c_atomic_scope_all_devices
+  return CrossDevice;
+#else
+  return Device;
+#endif
+}
+
+_CLC_INLINE MemorySemantics getCLCMemorySemantics(cl_mem_fence_flags flag) {
+  MemorySemantics semantics = AcquireRelease;
+  if (flag & CLK_GLOBAL_MEM_FENCE)
+    semantics |= CrossWorkgroupMemory;
+  if (flag & CLK_LOCAL_MEM_FENCE)
+    semantics |= WorkgroupMemory;
+  if (flag & CLK_IMAGE_MEM_FENCE)
+    semantics |= ImageMemory;
+  return semantics;
+}
+
+#endif // __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__
diff --git a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl 
b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
index 88b953005aae6..10d879d835c06 100644
--- a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
+++ b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
@@ -6,34 +6,15 @@
 //
 
//===----------------------------------------------------------------------===//
 
+#include <clc/mem_fence/clc_mem_fence.h>
 #include <clc/opencl/explicit_fence/explicit_memory_fence.h>
-
-void __clc_amdgcn_s_waitcnt(unsigned flags);
-
-// s_waitcnt takes 16bit argument with a combined number of maximum allowed
-// pending operations:
-// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages
-// [7] -- undefined
-// [6:4] -- exports, GDS, and mem write
-// [3:0] -- vector memory operations
-
-// Newer clang supports __builtin_amdgcn_s_waitcnt
-#if __clang_major__ >= 5
-#define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x)
-#else
-#define __waitcnt(x) __clc_amdgcn_s_waitcnt(x)
-_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt");
-#endif
+#include <clc/opencl/synchronization/utils.h>
 
 _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
-  if (flags & CLK_GLOBAL_MEM_FENCE) {
-    // scalar loads are counted with LGKM but we don't know whether
-    // the compiler turned any loads to scalar
-    __waitcnt(0);
-  } else if (flags & CLK_LOCAL_MEM_FENCE)
-    __waitcnt(0xff); // LGKM is [12:8]
+  Scope scope = getCLCScope(memory_scope_device);
+  MemorySemantics semantics = getCLCMemorySemantics(flags);
+  __clc_mem_fence(scope, semantics);
 }
-#undef __waitcnt
 
 // We don't have separate mechanism for read and write fences
 _CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
diff --git a/libclc/opencl/lib/amdgcn/synchronization/barrier.cl 
b/libclc/opencl/lib/amdgcn/synchronization/barrier.cl
index 5203db72f484c..b8372d4800bf1 100644
--- a/libclc/opencl/lib/amdgcn/synchronization/barrier.cl
+++ b/libclc/opencl/lib/amdgcn/synchronization/barrier.cl
@@ -6,10 +6,12 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#include <clc/opencl/explicit_fence/explicit_memory_fence.h>
 #include <clc/opencl/synchronization/barrier.h>
+#include <clc/opencl/synchronization/utils.h>
+#include <clc/synchronization/clc_barrier.h>
 
 _CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
-  mem_fence(flags);
-  __builtin_amdgcn_s_barrier();
+  Scope scope = getCLCScope(memory_scope_device);
+  MemorySemantics semantics = getCLCMemorySemantics(flags);
+  __clc_barrier(scope, semantics);
 }
diff --git a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl 
b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
index d24569ecda1bc..2d591c90d63c2 100644
--- a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
+++ b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
@@ -6,11 +6,14 @@
 //
 
//===----------------------------------------------------------------------===//
 
+#include <clc/mem_fence/clc_mem_fence.h>
 #include <clc/opencl/explicit_fence/explicit_memory_fence.h>
+#include <clc/opencl/synchronization/utils.h>
 
 _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
-  if (flags & (CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE))
-    __nvvm_membar_cta();
+  Scope scope = getCLCScope(memory_scope_device);
+  MemorySemantics semantics = getCLCMemorySemantics(flags);
+  __clc_mem_fence(scope, semantics);
 }
 
 // We do not have separate mechanism for read and write fences.
diff --git a/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl 
b/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl
index 7c57478795dda..b8372d4800bf1 100644
--- a/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl
+++ b/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl
@@ -7,7 +7,11 @@
 
//===----------------------------------------------------------------------===//
 
 #include <clc/opencl/synchronization/barrier.h>
+#include <clc/opencl/synchronization/utils.h>
+#include <clc/synchronization/clc_barrier.h>
 
 _CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
-  __syncthreads();
+  Scope scope = getCLCScope(memory_scope_device);
+  MemorySemantics semantics = getCLCMemorySemantics(flags);
+  __clc_barrier(scope, semantics);
 }

>From 29ec1763d807d66c5fd3ec19e0ef311e520026e7 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju...@intel.com>
Date: Thu, 31 Jul 2025 07:17:11 +0200
Subject: [PATCH 2/3] default to SequentiallyConsistent

---
 libclc/opencl/include/clc/opencl/synchronization/utils.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libclc/opencl/include/clc/opencl/synchronization/utils.h 
b/libclc/opencl/include/clc/opencl/synchronization/utils.h
index 098d96d0a8a32..bbcfa20a556d5 100644
--- a/libclc/opencl/include/clc/opencl/synchronization/utils.h
+++ b/libclc/opencl/include/clc/opencl/synchronization/utils.h
@@ -37,7 +37,7 @@ _CLC_INLINE Scope getCLCScope(memory_scope memory_scope) {
 }
 
 _CLC_INLINE MemorySemantics getCLCMemorySemantics(cl_mem_fence_flags flag) {
-  MemorySemantics semantics = AcquireRelease;
+  MemorySemantics semantics = SequentiallyConsistent;
   if (flag & CLK_GLOBAL_MEM_FENCE)
     semantics |= CrossWorkgroupMemory;
   if (flag & CLK_LOCAL_MEM_FENCE)

>From 86ca62c88512b55cfd772e762e729f52907ee102 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju...@intel.com>
Date: Tue, 5 Aug 2025 12:45:42 +0200
Subject: [PATCH 3/3] rename clc_barrier to clc_work_group_barrier, replace
 SPIR-V constants with clang macros

---
 .../clc/include/clc/mem_fence/clc_mem_fence.h |  5 ++-
 .../clc/mem_fence/clc_mem_scope_semantics.h   | 36 -------------------
 ...clc_barrier.h => clc_work_group_barrier.h} | 11 +++---
 libclc/clc/lib/amdgcn/SOURCES                 |  2 +-
 .../clc/lib/amdgcn/mem_fence/clc_mem_fence.cl |  8 ++---
 ...c_barrier.cl => clc_work_group_barrier.cl} |  8 ++---
 libclc/clc/lib/ptx-nvidiacl/SOURCES           |  2 +-
 .../ptx-nvidiacl/mem_fence/clc_mem_fence.cl   |  6 ++--
 ...c_barrier.cl => clc_work_group_barrier.cl} |  6 ++--
 .../synchronization/cl_mem_fence_flags.h      | 26 --------------
 .../clc/opencl/synchronization/utils.h        | 36 +++----------------
 libclc/opencl/lib/amdgcn/mem_fence/fence.cl   |  6 ++--
 .../lib/amdgcn/synchronization/barrier.cl     |  8 ++---
 .../lib/ptx-nvidiacl/mem_fence/fence.cl       |  6 ++--
 .../ptx-nvidiacl/synchronization/barrier.cl   |  8 ++---
 15 files changed, 42 insertions(+), 132 deletions(-)
 delete mode 100644 libclc/clc/include/clc/mem_fence/clc_mem_scope_semantics.h
 rename libclc/clc/include/clc/synchronization/{clc_barrier.h => 
clc_work_group_barrier.h} (55%)
 rename libclc/clc/lib/amdgcn/synchronization/{clc_barrier.cl => 
clc_work_group_barrier.cl} (64%)
 rename libclc/clc/lib/ptx-nvidiacl/synchronization/{clc_barrier.cl => 
clc_work_group_barrier.cl} (66%)

diff --git a/libclc/clc/include/clc/mem_fence/clc_mem_fence.h 
b/libclc/clc/include/clc/mem_fence/clc_mem_fence.h
index f0bbd136955bd..2321634c76842 100644
--- a/libclc/clc/include/clc/mem_fence/clc_mem_fence.h
+++ b/libclc/clc/include/clc/mem_fence/clc_mem_fence.h
@@ -10,9 +10,8 @@
 #define __CLC_MEM_FENCE_CLC_MEM_FENCE_H__
 
 #include <clc/internal/clc.h>
-#include <clc/mem_fence/clc_mem_scope_semantics.h>
 
-_CLC_OVERLOAD _CLC_DECL void __clc_mem_fence(Scope scope,
-                                             MemorySemantics semantics);
+_CLC_OVERLOAD _CLC_DECL void __clc_mem_fence(int memory_scope,
+                                             int memory_order);
 
 #endif // __CLC_MEM_FENCE_CLC_MEM_FENCE_H__
diff --git a/libclc/clc/include/clc/mem_fence/clc_mem_scope_semantics.h 
b/libclc/clc/include/clc/mem_fence/clc_mem_scope_semantics.h
deleted file mode 100644
index 7294026386b7a..0000000000000
--- a/libclc/clc/include/clc/mem_fence/clc_mem_scope_semantics.h
+++ /dev/null
@@ -1,36 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef __CLC_MEM_FENCE_CLC_MEM_SCOPE_SEMANTICS_H__
-#define __CLC_MEM_FENCE_CLC_MEM_SCOPE_SEMANTICS_H__
-
-// Scope values are defined in SPIR-V spec.
-typedef enum Scope {
-  CrossDevice = 0,
-  Device = 1,
-  Workgroup = 2,
-  Subgroup = 3,
-  Invocation = 4,
-} Scope;
-
-// MemorySemantics values are defined in SPIR-V spec.
-typedef enum MemorySemantics {
-  None = 0x0,
-  Acquire = 0x2,
-  Release = 0x4,
-  AcquireRelease = 0x8,
-  SequentiallyConsistent = 0x10,
-  UniformMemory = 0x40,
-  SubgroupMemory = 0x80,
-  WorkgroupMemory = 0x100,
-  CrossWorkgroupMemory = 0x200,
-  AtomicCounterMemory = 0x400,
-  ImageMemory = 0x800,
-} MemorySemantics;
-
-#endif // __CLC_MEM_FENCE_CLC_MEM_SCOPE_SEMANTICS_H__
diff --git a/libclc/clc/include/clc/synchronization/clc_barrier.h 
b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
similarity index 55%
rename from libclc/clc/include/clc/synchronization/clc_barrier.h
rename to libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
index d363652c6e14d..5f864e1057b8b 100644
--- a/libclc/clc/include/clc/synchronization/clc_barrier.h
+++ b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h
@@ -6,13 +6,12 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#ifndef __CLC_SYNCHRONIZATION_CLC_BARRIER_H__
-#define __CLC_SYNCHRONIZATION_CLC_BARRIER_H__
+#ifndef __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__
+#define __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__
 
 #include <clc/internal/clc.h>
-#include <clc/mem_fence/clc_mem_scope_semantics.h>
 
-_CLC_OVERLOAD _CLC_DECL void __clc_barrier(Scope scope,
-                                           MemorySemantics semantics);
+_CLC_OVERLOAD _CLC_DECL void __clc_work_group_barrier(int memory_scope,
+                                                      int memory_order);
 
-#endif // __CLC_SYNCHRONIZATION_CLC_BARRIER_H__
+#endif // __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__
diff --git a/libclc/clc/lib/amdgcn/SOURCES b/libclc/clc/lib/amdgcn/SOURCES
index f2f58e3124aa8..b20d3db50c416 100644
--- a/libclc/clc/lib/amdgcn/SOURCES
+++ b/libclc/clc/lib/amdgcn/SOURCES
@@ -2,7 +2,7 @@ math/clc_fmax.cl
 math/clc_fmin.cl
 math/clc_ldexp_override.cl
 mem_fence/clc_mem_fence.cl
-synchronization/clc_barrier.cl
+synchronization/clc_work_group_barrier.cl
 workitem/clc_get_global_offset.cl
 workitem/clc_get_global_size.cl
 workitem/clc_get_group_id.cl
diff --git a/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl 
b/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl
index 12ec6d8d18091..9e6460313718e 100644
--- a/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl
+++ b/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl
@@ -25,13 +25,13 @@ void __clc_amdgcn_s_waitcnt(unsigned flags);
 _CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt");
 #endif
 
-_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(Scope scope,
-                                            MemorySemantics semantics) {
-  if (semantics & CrossWorkgroupMemory) {
+_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(int memory_scope,
+                                            int memory_order) {
+  if (memory_scope & __MEMORY_SCOPE_DEVICE) {
     // scalar loads are counted with LGKM but we don't know whether
     // the compiler turned any loads to scalar
     __waitcnt(0);
-  } else if (semantics & WorkgroupMemory)
+  } else if (memory_scope & __MEMORY_SCOPE_WRKGRP)
     __waitcnt(0xff); // LGKM is [12:8]
 }
 #undef __waitcnt
diff --git a/libclc/clc/lib/amdgcn/synchronization/clc_barrier.cl 
b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
similarity index 64%
rename from libclc/clc/lib/amdgcn/synchronization/clc_barrier.cl
rename to libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
index 0299a426e4d21..ff3628fa7c339 100644
--- a/libclc/clc/lib/amdgcn/synchronization/clc_barrier.cl
+++ b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl
@@ -7,10 +7,10 @@
 
//===----------------------------------------------------------------------===//
 
 #include <clc/mem_fence/clc_mem_fence.h>
-#include <clc/synchronization/clc_barrier.h>
+#include <clc/synchronization/clc_work_group_barrier.h>
 
-_CLC_OVERLOAD _CLC_DEF void __clc_barrier(Scope scope,
-                                          MemorySemantics semantics) {
-  __clc_mem_fence(scope, semantics);
+_CLC_OVERLOAD _CLC_DEF void __clc_work_group_barrier(int memory_scope,
+                                                     int memory_order) {
+  __clc_mem_fence(memory_scope, memory_order);
   __builtin_amdgcn_s_barrier();
 }
diff --git a/libclc/clc/lib/ptx-nvidiacl/SOURCES 
b/libclc/clc/lib/ptx-nvidiacl/SOURCES
index a0fb861549ebc..b6f50654f89c5 100644
--- a/libclc/clc/lib/ptx-nvidiacl/SOURCES
+++ b/libclc/clc/lib/ptx-nvidiacl/SOURCES
@@ -1,5 +1,5 @@
 mem_fence/clc_mem_fence.cl
-synchronization/clc_barrier.cl
+synchronization/clc_work_group_barrier.cl
 workitem/clc_get_global_id.cl
 workitem/clc_get_group_id.cl
 workitem/clc_get_local_id.cl
diff --git a/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl 
b/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl
index 4c0d342b7244f..b3e2375e755a2 100644
--- a/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl
+++ b/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl
@@ -8,8 +8,8 @@
 
 #include <clc/mem_fence/clc_mem_fence.h>
 
-_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(Scope scope,
-                                            MemorySemantics semantics) {
-  if (semantics & (CrossWorkgroupMemory | WorkgroupMemory))
+_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(int memory_scope,
+                                            int memory_order) {
+  if (memory_scope & (__MEMORY_SCOPE_DEVICE | __MEMORY_SCOPE_WRKGRP))
     __nvvm_membar_cta();
 }
diff --git a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_barrier.cl 
b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
similarity index 66%
rename from libclc/clc/lib/ptx-nvidiacl/synchronization/clc_barrier.cl
rename to libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
index 920b17cb02f92..6cb37a38f06ac 100644
--- a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_barrier.cl
+++ b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl
@@ -6,9 +6,9 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#include <clc/synchronization/clc_barrier.h>
+#include <clc/synchronization/clc_work_group_barrier.h>
 
-_CLC_OVERLOAD _CLC_DEF void __clc_barrier(Scope scope,
-                                          MemorySemantics semantics) {
+_CLC_OVERLOAD _CLC_DEF void __clc_work_group_barrier(int memory_scope,
+                                                     int memory_order) {
   __syncthreads();
 }
diff --git 
a/libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h 
b/libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h
index 18f9a4afb2d5f..7b2f701c1ff99 100644
--- a/libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h
+++ b/libclc/opencl/include/clc/opencl/synchronization/cl_mem_fence_flags.h
@@ -11,32 +11,6 @@
 
 typedef uint cl_mem_fence_flags;
 
-// Copied from
-// 
https://github.com/llvm/llvm-project/blob/08e40c12fa0c/clang/lib/Headers/opencl-c-base.h#L390
-typedef enum memory_scope {
-  memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
-  memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
-  memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
-#if defined(__opencl_c_atomic_scope_all_devices)
-  memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
-#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 
202100)
-  memory_scope_all_devices = memory_scope_all_svm_devices,
-#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >=
-       // 202100)
-#endif // defined(__opencl_c_atomic_scope_all_devices)
-/**
- * Subgroups have different requirements on forward progress, so just test
- * all the relevant macros.
- * CL 3.0 sub-groups "they are not guaranteed to make independent forward
- * progress" KHR subgroups "Subgroups within a workgroup are independent, make
- * forward progress with respect to each other"
- */
-#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) ||                
\
-    defined(__opencl_c_subgroups)
-  memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
-#endif
-} memory_scope;
-
 #define CLK_LOCAL_MEM_FENCE 1
 #define CLK_GLOBAL_MEM_FENCE 2
 #define CLK_IMAGE_MEM_FENCE 4
diff --git a/libclc/opencl/include/clc/opencl/synchronization/utils.h 
b/libclc/opencl/include/clc/opencl/synchronization/utils.h
index bbcfa20a556d5..cf3baf28cb5f1 100644
--- a/libclc/opencl/include/clc/opencl/synchronization/utils.h
+++ b/libclc/opencl/include/clc/opencl/synchronization/utils.h
@@ -10,41 +10,15 @@
 #define __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__
 
 #include <clc/internal/clc.h>
-#include <clc/mem_fence/clc_mem_scope_semantics.h>
 #include <clc/opencl/synchronization/cl_mem_fence_flags.h>
 
-_CLC_INLINE Scope getCLCScope(memory_scope memory_scope) {
-  switch (memory_scope) {
-  case memory_scope_work_item:
-    return Invocation;
-#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) ||                
\
-    defined(__opencl_c_subgroups)
-  case memory_scope_sub_group:
-    return Subgroup;
-#endif
-  case memory_scope_work_group:
-    return Workgroup;
-  case memory_scope_device:
-    return Device;
-  default:
-    break;
-  }
-#ifdef __opencl_c_atomic_scope_all_devices
-  return CrossDevice;
-#else
-  return Device;
-#endif
-}
-
-_CLC_INLINE MemorySemantics getCLCMemorySemantics(cl_mem_fence_flags flag) {
-  MemorySemantics semantics = SequentiallyConsistent;
+_CLC_INLINE int getCLCMemoryScope(cl_mem_fence_flags flag) {
+  int memory_scope = 0;
   if (flag & CLK_GLOBAL_MEM_FENCE)
-    semantics |= CrossWorkgroupMemory;
+    memory_scope |= __MEMORY_SCOPE_DEVICE;
   if (flag & CLK_LOCAL_MEM_FENCE)
-    semantics |= WorkgroupMemory;
-  if (flag & CLK_IMAGE_MEM_FENCE)
-    semantics |= ImageMemory;
-  return semantics;
+    memory_scope |= __MEMORY_SCOPE_WRKGRP;
+  return memory_scope;
 }
 
 #endif // __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__
diff --git a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl 
b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
index 10d879d835c06..81216d6a26cf2 100644
--- a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
+++ b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl
@@ -11,9 +11,9 @@
 #include <clc/opencl/synchronization/utils.h>
 
 _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
-  Scope scope = getCLCScope(memory_scope_device);
-  MemorySemantics semantics = getCLCMemorySemantics(flags);
-  __clc_mem_fence(scope, semantics);
+  int memory_scope = getCLCMemoryScope(flags);
+  int memory_order = __ATOMIC_SEQ_CST;
+  __clc_mem_fence(memory_scope, memory_order);
 }
 
 // We don't have separate mechanism for read and write fences
diff --git a/libclc/opencl/lib/amdgcn/synchronization/barrier.cl 
b/libclc/opencl/lib/amdgcn/synchronization/barrier.cl
index b8372d4800bf1..c8322e602302c 100644
--- a/libclc/opencl/lib/amdgcn/synchronization/barrier.cl
+++ b/libclc/opencl/lib/amdgcn/synchronization/barrier.cl
@@ -8,10 +8,10 @@
 
 #include <clc/opencl/synchronization/barrier.h>
 #include <clc/opencl/synchronization/utils.h>
-#include <clc/synchronization/clc_barrier.h>
+#include <clc/synchronization/clc_work_group_barrier.h>
 
 _CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
-  Scope scope = getCLCScope(memory_scope_device);
-  MemorySemantics semantics = getCLCMemorySemantics(flags);
-  __clc_barrier(scope, semantics);
+  int memory_scope = getCLCMemoryScope(flags);
+  int memory_order = __ATOMIC_SEQ_CST;
+  __clc_work_group_barrier(memory_scope, memory_order);
 }
diff --git a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl 
b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
index 2d591c90d63c2..e22ed870a7e6b 100644
--- a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
+++ b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl
@@ -11,9 +11,9 @@
 #include <clc/opencl/synchronization/utils.h>
 
 _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
-  Scope scope = getCLCScope(memory_scope_device);
-  MemorySemantics semantics = getCLCMemorySemantics(flags);
-  __clc_mem_fence(scope, semantics);
+  int memory_scope = getCLCMemoryScope(flags);
+  int memory_order = __ATOMIC_SEQ_CST;
+  __clc_mem_fence(memory_scope, memory_order);
 }
 
 // We do not have separate mechanism for read and write fences.
diff --git a/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl 
b/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl
index b8372d4800bf1..c8322e602302c 100644
--- a/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl
+++ b/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl
@@ -8,10 +8,10 @@
 
 #include <clc/opencl/synchronization/barrier.h>
 #include <clc/opencl/synchronization/utils.h>
-#include <clc/synchronization/clc_barrier.h>
+#include <clc/synchronization/clc_work_group_barrier.h>
 
 _CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
-  Scope scope = getCLCScope(memory_scope_device);
-  MemorySemantics semantics = getCLCMemorySemantics(flags);
-  __clc_barrier(scope, semantics);
+  int memory_scope = getCLCMemoryScope(flags);
+  int memory_order = __ATOMIC_SEQ_CST;
+  __clc_work_group_barrier(memory_scope, memory_order);
 }

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to