https://github.com/wenju-he updated https://github.com/llvm/llvm-project/pull/152275
>From c48a94749e7e4ee261895826f2df2e2c48f040ef Mon Sep 17 00:00:00 2001 From: Wenju He <wenju...@intel.com> Date: Wed, 6 Aug 2025 11:07:15 +0200 Subject: [PATCH 1/3] [libclc] update __clc_mem_fence: add MemorySemantic arg and use __builtin_amdgcn_fence for AMDGPU It is necessary to add MemorySemantic argument which means the memory or address space to which the memory ordering is applied. The MemorySemantic is also necessary for implementing the SPIR-V MemoryBarrier instruction. Additionally, the implementation of __clc_mem_fence on Intel GPUs requires the MemorySemantic argument. Using __builtin_amdgcn_fence for AMDGPU is follow-up of https://github.com/llvm/llvm-project/pull/151446#discussion_r2254006508 llvm-diff shows no change to nvptx64--nvidiacl.bc. --- .../clc/include/clc/mem_fence/clc_mem_fence.h | 5 +- .../include/clc/mem_fence/clc_mem_semantic.h | 21 ++++++ .../synchronization/clc_work_group_barrier.h | 6 +- .../clc/lib/amdgcn/mem_fence/clc_mem_fence.cl | 67 ++++++++++++------- .../synchronization/clc_work_group_barrier.cl | 7 +- .../ptx-nvidiacl/mem_fence/clc_mem_fence.cl | 4 +- .../synchronization/clc_work_group_barrier.cl | 5 +- .../clc/opencl/synchronization/utils.h | 11 +++ libclc/opencl/lib/amdgcn/mem_fence/fence.cl | 3 +- .../lib/amdgcn/synchronization/barrier.cl | 3 +- .../lib/ptx-nvidiacl/mem_fence/fence.cl | 3 +- .../ptx-nvidiacl/synchronization/barrier.cl | 3 +- 12 files changed, 99 insertions(+), 39 deletions(-) create mode 100644 libclc/clc/include/clc/mem_fence/clc_mem_semantic.h diff --git a/libclc/clc/include/clc/mem_fence/clc_mem_fence.h b/libclc/clc/include/clc/mem_fence/clc_mem_fence.h index 2321634c76842..92502270c802f 100644 --- a/libclc/clc/include/clc/mem_fence/clc_mem_fence.h +++ b/libclc/clc/include/clc/mem_fence/clc_mem_fence.h @@ -10,8 +10,9 @@ #define __CLC_MEM_FENCE_CLC_MEM_FENCE_H__ #include <clc/internal/clc.h> +#include <clc/mem_fence/clc_mem_semantic.h> -_CLC_OVERLOAD _CLC_DECL void __clc_mem_fence(int memory_scope, - int memory_order); +_CLC_OVERLOAD _CLC_DECL void __clc_mem_fence(int memory_scope, int memory_order, + MemorySemantic memory_semantic); #endif // __CLC_MEM_FENCE_CLC_MEM_FENCE_H__ diff --git a/libclc/clc/include/clc/mem_fence/clc_mem_semantic.h b/libclc/clc/include/clc/mem_fence/clc_mem_semantic.h new file mode 100644 index 0000000000000..662ef9ce087e9 --- /dev/null +++ b/libclc/clc/include/clc/mem_fence/clc_mem_semantic.h @@ -0,0 +1,21 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MEM_FENCE_CLC_MEM_SEMANTIC_H__ +#define __CLC_MEM_FENCE_CLC_MEM_SEMANTIC_H__ + +// The memory or address space to which the memory ordering is applied. +typedef enum MemorySemantic { + MEMORY_PRIVATE = 0x1, + MEMORY_LOCAL = 0x2, + MEMORY_GLOBAL = 0x4, + MEMORY_CONSTANT = 0x8, + MEMORY_GENERIC = 0x10 +} MemorySemantic; + +#endif // __CLC_MEM_FENCE_CLC_MEM_SEMANTIC_H__ diff --git a/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h index 5f864e1057b8b..bc8627782d3c0 100644 --- a/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h +++ b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h @@ -10,8 +10,10 @@ #define __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__ #include <clc/internal/clc.h> +#include <clc/mem_fence/clc_mem_semantic.h> -_CLC_OVERLOAD _CLC_DECL void __clc_work_group_barrier(int memory_scope, - int memory_order); +_CLC_OVERLOAD _CLC_DECL void +__clc_work_group_barrier(int memory_scope, int memory_order, + MemorySemantic memory_semantic); #endif // __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__ diff --git a/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl b/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl index 9e6460313718e..880db3c2f29a3 100644 --- a/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl +++ b/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl @@ -8,30 +8,49 @@ #include <clc/mem_fence/clc_mem_fence.h> -void __clc_amdgcn_s_waitcnt(unsigned flags); +#define BUILTIN_FENCE_ORDER(memory_order, ...) \ + switch (memory_order) { \ + case __ATOMIC_ACQUIRE: \ + __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, __VA_ARGS__); \ + break; \ + case __ATOMIC_RELEASE: \ + __builtin_amdgcn_fence(__ATOMIC_RELEASE, __VA_ARGS__); \ + break; \ + case __ATOMIC_ACQ_REL: \ + __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, __VA_ARGS__); \ + break; \ + case __ATOMIC_SEQ_CST: \ + __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, __VA_ARGS__); \ + break; \ + default: \ + __builtin_unreachable(); \ + } \ + break; -// s_waitcnt takes 16bit argument with a combined number of maximum allowed -// pending operations: -// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages -// [7] -- undefined -// [6:4] -- exports, GDS, and mem write -// [3:0] -- vector memory operations +#define BUILTIN_FENCE(memory_scope, memory_order, ...) \ + switch (memory_scope) { \ + case __MEMORY_SCOPE_DEVICE: \ + BUILTIN_FENCE_ORDER(memory_order, "agent", ##__VA_ARGS__) \ + case __MEMORY_SCOPE_WRKGRP: \ + BUILTIN_FENCE_ORDER(memory_order, "workgroup", ##__VA_ARGS__) \ + case __MEMORY_SCOPE_WVFRNT: \ + BUILTIN_FENCE_ORDER(memory_order, "wavefront", ##__VA_ARGS__) \ + case __MEMORY_SCOPE_SINGLE: \ + BUILTIN_FENCE_ORDER(memory_order, "singlethread", ##__VA_ARGS__) \ + case __MEMORY_SCOPE_SYSTEM: \ + default: \ + BUILTIN_FENCE_ORDER(memory_order, "", ##__VA_ARGS__) \ + } -// Newer clang supports __builtin_amdgcn_s_waitcnt -#if __clang_major__ >= 5 -#define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x) -#else -#define __waitcnt(x) __clc_amdgcn_s_waitcnt(x) -_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt"); -#endif - -_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(int memory_scope, - int memory_order) { - if (memory_scope & __MEMORY_SCOPE_DEVICE) { - // scalar loads are counted with LGKM but we don't know whether - // the compiler turned any loads to scalar - __waitcnt(0); - } else if (memory_scope & __MEMORY_SCOPE_WRKGRP) - __waitcnt(0xff); // LGKM is [12:8] +_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(int memory_scope, int memory_order, + MemorySemantic memory_semantic) { + if (memory_semantic == MEMORY_LOCAL) { + BUILTIN_FENCE(memory_scope, memory_order, "local") + } else if (memory_semantic == MEMORY_GLOBAL) { + BUILTIN_FENCE(memory_scope, memory_order, "global") + } else if (memory_semantic == (MEMORY_LOCAL | MEMORY_GLOBAL)) { + BUILTIN_FENCE(memory_scope, memory_order, "local", "global") + } else { + BUILTIN_FENCE(memory_scope, memory_order) + } } -#undef __waitcnt diff --git a/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl index ff3628fa7c339..a034ce1855cc1 100644 --- a/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl +++ b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl @@ -9,8 +9,9 @@ #include <clc/mem_fence/clc_mem_fence.h> #include <clc/synchronization/clc_work_group_barrier.h> -_CLC_OVERLOAD _CLC_DEF void __clc_work_group_barrier(int memory_scope, - int memory_order) { - __clc_mem_fence(memory_scope, memory_order); +_CLC_OVERLOAD _CLC_DEF void +__clc_work_group_barrier(int memory_scope, int memory_order, + MemorySemantic memory_semantic) { + __clc_mem_fence(memory_scope, memory_order, memory_semantic); __builtin_amdgcn_s_barrier(); } diff --git a/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl b/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl index b3e2375e755a2..fb8da1690dfed 100644 --- a/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl +++ b/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl @@ -8,8 +8,8 @@ #include <clc/mem_fence/clc_mem_fence.h> -_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(int memory_scope, - int memory_order) { +_CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(int memory_scope, int memory_order, + MemorySemantic memory_semantic) { if (memory_scope & (__MEMORY_SCOPE_DEVICE | __MEMORY_SCOPE_WRKGRP)) __nvvm_membar_cta(); } diff --git a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl index 6cb37a38f06ac..d3c39c844752f 100644 --- a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl +++ b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl @@ -8,7 +8,8 @@ #include <clc/synchronization/clc_work_group_barrier.h> -_CLC_OVERLOAD _CLC_DEF void __clc_work_group_barrier(int memory_scope, - int memory_order) { +_CLC_OVERLOAD _CLC_DEF void +__clc_work_group_barrier(int memory_scope, int memory_order, + MemorySemantic memory_semantic) { __syncthreads(); } diff --git a/libclc/opencl/include/clc/opencl/synchronization/utils.h b/libclc/opencl/include/clc/opencl/synchronization/utils.h index cf3baf28cb5f1..a0b63ae2a45d9 100644 --- a/libclc/opencl/include/clc/opencl/synchronization/utils.h +++ b/libclc/opencl/include/clc/opencl/synchronization/utils.h @@ -10,6 +10,7 @@ #define __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__ #include <clc/internal/clc.h> +#include <clc/mem_fence/clc_mem_semantic.h> #include <clc/opencl/synchronization/cl_mem_fence_flags.h> _CLC_INLINE int getCLCMemoryScope(cl_mem_fence_flags flag) { @@ -21,4 +22,14 @@ _CLC_INLINE int getCLCMemoryScope(cl_mem_fence_flags flag) { return memory_scope; } +_CLC_INLINE MemorySemantic getCLCMemorySemantic(cl_mem_fence_flags flag) { + if ((flag & CLK_LOCAL_MEM_FENCE) && (flag & CLK_GLOBAL_MEM_FENCE)) + return MEMORY_LOCAL | MEMORY_GLOBAL; + if (flag & CLK_LOCAL_MEM_FENCE) + return MEMORY_LOCAL; + if (flag & CLK_GLOBAL_MEM_FENCE) + return MEMORY_GLOBAL; + __builtin_unreachable(); +} + #endif // __CLC_OPENCL_SYNCHRONIZATION_UTILS_H__ diff --git a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl index 81216d6a26cf2..ccec510d95141 100644 --- a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl +++ b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl @@ -13,7 +13,8 @@ _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) { int memory_scope = getCLCMemoryScope(flags); int memory_order = __ATOMIC_SEQ_CST; - __clc_mem_fence(memory_scope, memory_order); + MemorySemantic memory_semantic = getCLCMemorySemantic(flags); + __clc_mem_fence(memory_scope, memory_order, memory_semantic); } // We don't have separate mechanism for read and write fences diff --git a/libclc/opencl/lib/amdgcn/synchronization/barrier.cl b/libclc/opencl/lib/amdgcn/synchronization/barrier.cl index c8322e602302c..13427d2f2102b 100644 --- a/libclc/opencl/lib/amdgcn/synchronization/barrier.cl +++ b/libclc/opencl/lib/amdgcn/synchronization/barrier.cl @@ -13,5 +13,6 @@ _CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) { int memory_scope = getCLCMemoryScope(flags); int memory_order = __ATOMIC_SEQ_CST; - __clc_work_group_barrier(memory_scope, memory_order); + MemorySemantic memory_semantic = getCLCMemorySemantic(flags); + __clc_work_group_barrier(memory_scope, memory_order, memory_semantic); } diff --git a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl index e22ed870a7e6b..6291fbc5b41ad 100644 --- a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl +++ b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl @@ -13,7 +13,8 @@ _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) { int memory_scope = getCLCMemoryScope(flags); int memory_order = __ATOMIC_SEQ_CST; - __clc_mem_fence(memory_scope, memory_order); + MemorySemantic memory_semantic = getCLCMemorySemantic(flags); + __clc_mem_fence(memory_scope, memory_order, memory_semantic); } // We do not have separate mechanism for read and write fences. diff --git a/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl b/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl index c8322e602302c..13427d2f2102b 100644 --- a/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl +++ b/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl @@ -13,5 +13,6 @@ _CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) { int memory_scope = getCLCMemoryScope(flags); int memory_order = __ATOMIC_SEQ_CST; - __clc_work_group_barrier(memory_scope, memory_order); + MemorySemantic memory_semantic = getCLCMemorySemantic(flags); + __clc_work_group_barrier(memory_scope, memory_order, memory_semantic); } >From f25de34f8e1f30c4eff2f3e2432eff8637ec65b7 Mon Sep 17 00:00:00 2001 From: Wenju He <wenju...@intel.com> Date: Thu, 7 Aug 2025 03:29:50 +0200 Subject: [PATCH 2/3] use << --- libclc/clc/include/clc/mem_fence/clc_mem_semantic.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libclc/clc/include/clc/mem_fence/clc_mem_semantic.h b/libclc/clc/include/clc/mem_fence/clc_mem_semantic.h index 662ef9ce087e9..b13db65249f19 100644 --- a/libclc/clc/include/clc/mem_fence/clc_mem_semantic.h +++ b/libclc/clc/include/clc/mem_fence/clc_mem_semantic.h @@ -11,11 +11,11 @@ // The memory or address space to which the memory ordering is applied. typedef enum MemorySemantic { - MEMORY_PRIVATE = 0x1, - MEMORY_LOCAL = 0x2, - MEMORY_GLOBAL = 0x4, - MEMORY_CONSTANT = 0x8, - MEMORY_GENERIC = 0x10 + MEMORY_PRIVATE = 1 << 0, + MEMORY_GLOBAL = 1 << 1, + MEMORY_CONSTANT = 1 << 2, + MEMORY_LOCAL = 1 << 3, + MEMORY_GENERIC = 1 << 4, } MemorySemantic; #endif // __CLC_MEM_FENCE_CLC_MEM_SEMANTIC_H__ >From 001cec4941559bbdf0be8a1e0441509cdddbd25c Mon Sep 17 00:00:00 2001 From: Wenju He <wenju...@intel.com> Date: Tue, 12 Aug 2025 02:29:47 +0200 Subject: [PATCH 3/3] rename MemorySemantic to MemorySemantics --- libclc/clc/include/clc/mem_fence/clc_mem_fence.h | 2 +- libclc/clc/include/clc/mem_fence/clc_mem_semantic.h | 4 ++-- .../include/clc/synchronization/clc_work_group_barrier.h | 2 +- libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl | 8 ++++---- .../lib/amdgcn/synchronization/clc_work_group_barrier.cl | 4 ++-- libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl | 2 +- .../synchronization/clc_work_group_barrier.cl | 2 +- libclc/opencl/include/clc/opencl/synchronization/utils.h | 2 +- libclc/opencl/lib/amdgcn/mem_fence/fence.cl | 4 ++-- libclc/opencl/lib/amdgcn/synchronization/barrier.cl | 4 ++-- libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl | 4 ++-- libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl | 4 ++-- 12 files changed, 21 insertions(+), 21 deletions(-) diff --git a/libclc/clc/include/clc/mem_fence/clc_mem_fence.h b/libclc/clc/include/clc/mem_fence/clc_mem_fence.h index 92502270c802f..725fcd90f4ee8 100644 --- a/libclc/clc/include/clc/mem_fence/clc_mem_fence.h +++ b/libclc/clc/include/clc/mem_fence/clc_mem_fence.h @@ -13,6 +13,6 @@ #include <clc/mem_fence/clc_mem_semantic.h> _CLC_OVERLOAD _CLC_DECL void __clc_mem_fence(int memory_scope, int memory_order, - MemorySemantic memory_semantic); + MemorySemantics memory_semantics); #endif // __CLC_MEM_FENCE_CLC_MEM_FENCE_H__ diff --git a/libclc/clc/include/clc/mem_fence/clc_mem_semantic.h b/libclc/clc/include/clc/mem_fence/clc_mem_semantic.h index b13db65249f19..a54d21cc5b8f1 100644 --- a/libclc/clc/include/clc/mem_fence/clc_mem_semantic.h +++ b/libclc/clc/include/clc/mem_fence/clc_mem_semantic.h @@ -10,12 +10,12 @@ #define __CLC_MEM_FENCE_CLC_MEM_SEMANTIC_H__ // The memory or address space to which the memory ordering is applied. -typedef enum MemorySemantic { +typedef enum MemorySemantics { MEMORY_PRIVATE = 1 << 0, MEMORY_GLOBAL = 1 << 1, MEMORY_CONSTANT = 1 << 2, MEMORY_LOCAL = 1 << 3, MEMORY_GENERIC = 1 << 4, -} MemorySemantic; +} MemorySemantics; #endif // __CLC_MEM_FENCE_CLC_MEM_SEMANTIC_H__ diff --git a/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h index bc8627782d3c0..e4ff048ff82d8 100644 --- a/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h +++ b/libclc/clc/include/clc/synchronization/clc_work_group_barrier.h @@ -14,6 +14,6 @@ _CLC_OVERLOAD _CLC_DECL void __clc_work_group_barrier(int memory_scope, int memory_order, - MemorySemantic memory_semantic); + MemorySemantics memory_semantics); #endif // __CLC_SYNCHRONIZATION_CLC_WORK_GROUP_BARRIER_H__ diff --git a/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl b/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl index 880db3c2f29a3..27e6223f936ff 100644 --- a/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl +++ b/libclc/clc/lib/amdgcn/mem_fence/clc_mem_fence.cl @@ -43,12 +43,12 @@ } _CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(int memory_scope, int memory_order, - MemorySemantic memory_semantic) { - if (memory_semantic == MEMORY_LOCAL) { + MemorySemantics memory_semantics) { + if (memory_semantics == MEMORY_LOCAL) { BUILTIN_FENCE(memory_scope, memory_order, "local") - } else if (memory_semantic == MEMORY_GLOBAL) { + } else if (memory_semantics == MEMORY_GLOBAL) { BUILTIN_FENCE(memory_scope, memory_order, "global") - } else if (memory_semantic == (MEMORY_LOCAL | MEMORY_GLOBAL)) { + } else if (memory_semantics == (MEMORY_LOCAL | MEMORY_GLOBAL)) { BUILTIN_FENCE(memory_scope, memory_order, "local", "global") } else { BUILTIN_FENCE(memory_scope, memory_order) diff --git a/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl index a034ce1855cc1..e8def6994833e 100644 --- a/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl +++ b/libclc/clc/lib/amdgcn/synchronization/clc_work_group_barrier.cl @@ -11,7 +11,7 @@ _CLC_OVERLOAD _CLC_DEF void __clc_work_group_barrier(int memory_scope, int memory_order, - MemorySemantic memory_semantic) { - __clc_mem_fence(memory_scope, memory_order, memory_semantic); + MemorySemantics memory_semantics) { + __clc_mem_fence(memory_scope, memory_order, memory_semantics); __builtin_amdgcn_s_barrier(); } diff --git a/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl b/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl index fb8da1690dfed..db2900580a638 100644 --- a/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl +++ b/libclc/clc/lib/ptx-nvidiacl/mem_fence/clc_mem_fence.cl @@ -9,7 +9,7 @@ #include <clc/mem_fence/clc_mem_fence.h> _CLC_OVERLOAD _CLC_DEF void __clc_mem_fence(int memory_scope, int memory_order, - MemorySemantic memory_semantic) { + MemorySemantics memory_semantics) { if (memory_scope & (__MEMORY_SCOPE_DEVICE | __MEMORY_SCOPE_WRKGRP)) __nvvm_membar_cta(); } diff --git a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl index d3c39c844752f..5f8dad08db8ef 100644 --- a/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl +++ b/libclc/clc/lib/ptx-nvidiacl/synchronization/clc_work_group_barrier.cl @@ -10,6 +10,6 @@ _CLC_OVERLOAD _CLC_DEF void __clc_work_group_barrier(int memory_scope, int memory_order, - MemorySemantic memory_semantic) { + MemorySemantics memory_semantics) { __syncthreads(); } diff --git a/libclc/opencl/include/clc/opencl/synchronization/utils.h b/libclc/opencl/include/clc/opencl/synchronization/utils.h index a0b63ae2a45d9..217fbbb1a50fc 100644 --- a/libclc/opencl/include/clc/opencl/synchronization/utils.h +++ b/libclc/opencl/include/clc/opencl/synchronization/utils.h @@ -22,7 +22,7 @@ _CLC_INLINE int getCLCMemoryScope(cl_mem_fence_flags flag) { return memory_scope; } -_CLC_INLINE MemorySemantic getCLCMemorySemantic(cl_mem_fence_flags flag) { +_CLC_INLINE MemorySemantics getCLCMemorySemantics(cl_mem_fence_flags flag) { if ((flag & CLK_LOCAL_MEM_FENCE) && (flag & CLK_GLOBAL_MEM_FENCE)) return MEMORY_LOCAL | MEMORY_GLOBAL; if (flag & CLK_LOCAL_MEM_FENCE) diff --git a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl index ccec510d95141..838b42ab2049e 100644 --- a/libclc/opencl/lib/amdgcn/mem_fence/fence.cl +++ b/libclc/opencl/lib/amdgcn/mem_fence/fence.cl @@ -13,8 +13,8 @@ _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) { int memory_scope = getCLCMemoryScope(flags); int memory_order = __ATOMIC_SEQ_CST; - MemorySemantic memory_semantic = getCLCMemorySemantic(flags); - __clc_mem_fence(memory_scope, memory_order, memory_semantic); + MemorySemantics memory_semantics = getCLCMemorySemantics(flags); + __clc_mem_fence(memory_scope, memory_order, memory_semantics); } // We don't have separate mechanism for read and write fences diff --git a/libclc/opencl/lib/amdgcn/synchronization/barrier.cl b/libclc/opencl/lib/amdgcn/synchronization/barrier.cl index 13427d2f2102b..a06c10445002e 100644 --- a/libclc/opencl/lib/amdgcn/synchronization/barrier.cl +++ b/libclc/opencl/lib/amdgcn/synchronization/barrier.cl @@ -13,6 +13,6 @@ _CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) { int memory_scope = getCLCMemoryScope(flags); int memory_order = __ATOMIC_SEQ_CST; - MemorySemantic memory_semantic = getCLCMemorySemantic(flags); - __clc_work_group_barrier(memory_scope, memory_order, memory_semantic); + MemorySemantics memory_semantics = getCLCMemorySemantics(flags); + __clc_work_group_barrier(memory_scope, memory_order, memory_semantics); } diff --git a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl index 6291fbc5b41ad..9415802e794dc 100644 --- a/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl +++ b/libclc/opencl/lib/ptx-nvidiacl/mem_fence/fence.cl @@ -13,8 +13,8 @@ _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) { int memory_scope = getCLCMemoryScope(flags); int memory_order = __ATOMIC_SEQ_CST; - MemorySemantic memory_semantic = getCLCMemorySemantic(flags); - __clc_mem_fence(memory_scope, memory_order, memory_semantic); + MemorySemantics memory_semantics = getCLCMemorySemantics(flags); + __clc_mem_fence(memory_scope, memory_order, memory_semantics); } // We do not have separate mechanism for read and write fences. diff --git a/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl b/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl index 13427d2f2102b..a06c10445002e 100644 --- a/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl +++ b/libclc/opencl/lib/ptx-nvidiacl/synchronization/barrier.cl @@ -13,6 +13,6 @@ _CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) { int memory_scope = getCLCMemoryScope(flags); int memory_order = __ATOMIC_SEQ_CST; - MemorySemantic memory_semantic = getCLCMemorySemantic(flags); - __clc_work_group_barrier(memory_scope, memory_order, memory_semantic); + MemorySemantics memory_semantics = getCLCMemorySemantics(flags); + __clc_work_group_barrier(memory_scope, memory_order, memory_semantics); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits