https://github.com/wenju-he updated https://github.com/llvm/llvm-project/pull/189328
>From 494cb1c1b5e176e415364a25248ddeb2ddefad33 Mon Sep 17 00:00:00 2001 From: Wenju He <[email protected]> Date: Mon, 30 Mar 2026 09:09:55 +0200 Subject: [PATCH 1/5] [libclc][NFC] De-duplicate subgroup workitem function decls and reorganize De-duplicate since some are already declaration in include/clc/workitem. Move subgroup workitem function implementations into individual files to align with other workitem functions. --- .../clc/include/clc/subgroup/clc_subgroup.h | 6 ----- .../clc_get_enqueued_num_sub_groups.h | 16 +++++++++++++ .../clc/workitem/clc_get_max_sub_group_size.h | 2 +- .../clc/workitem/clc_get_num_sub_groups.h | 2 +- .../clc/workitem/clc_get_sub_group_id.h | 2 +- .../clc/workitem/clc_get_sub_group_local_id.h | 2 +- .../clc/workitem/clc_get_sub_group_size.h | 2 +- libclc/clc/lib/amdgpu/CMakeLists.txt | 2 ++ .../lib/amdgpu/subgroup/clc_sub_group_scan.cl | 1 + .../clc/lib/amdgpu/subgroup/clc_subgroup.cl | 10 -------- .../clc_get_enqueued_num_sub_groups.cl | 16 +++++++++++++ .../amdgpu/workitem/clc_get_num_sub_groups.cl | 2 +- .../workitem/clc_get_sub_group_local_id.cl | 13 ++++++++++ .../amdgpu/workitem/clc_get_sub_group_size.cl | 1 + .../collective/clc_work_group_any_all.cl | 3 +++ .../collective/clc_work_group_broadcast.cl | 1 + .../generic/collective/clc_work_group_scan.cl | 1 + .../generic/workitem/clc_get_sub_group_id.cl | 1 + libclc/opencl/lib/generic/CMakeLists.txt | 6 +++++ .../opencl/lib/generic/subgroup/subgroup.cl | 24 ------------------- .../workitem/get_enqueued_num_sub_groups.cl | 13 ++++++++++ .../workitem/get_max_sub_group_size.cl | 13 ++++++++++ .../generic/workitem/get_num_sub_groups.cl | 13 ++++++++++ .../lib/generic/workitem/get_sub_group_id.cl | 13 ++++++++++ .../workitem/get_sub_group_local_id.cl | 13 ++++++++++ .../generic/workitem/get_sub_group_size.cl | 13 ++++++++++ 26 files changed, 145 insertions(+), 46 deletions(-) create mode 100644 libclc/clc/include/clc/workitem/clc_get_enqueued_num_sub_groups.h create mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl create mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl create mode 100644 libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl create mode 100644 libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl create mode 100644 libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl create mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_id.cl create mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl create mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_size.cl diff --git a/libclc/clc/include/clc/subgroup/clc_subgroup.h b/libclc/clc/include/clc/subgroup/clc_subgroup.h index f0a2a11d48445..133ba33644120 100644 --- a/libclc/clc/include/clc/subgroup/clc_subgroup.h +++ b/libclc/clc/include/clc/subgroup/clc_subgroup.h @@ -11,12 +11,6 @@ #include "clc/internal/clc.h" -_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_size(void); -_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_max_sub_group_size(void); -_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_num_sub_groups(void); -_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void); -_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_id(void); -_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_local_id(void); _CLC_DECL _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_all(int x); _CLC_DECL _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_any(int x); diff --git a/libclc/clc/include/clc/workitem/clc_get_enqueued_num_sub_groups.h b/libclc/clc/include/clc/workitem/clc_get_enqueued_num_sub_groups.h new file mode 100644 index 0000000000000..14afdd80ca11f --- /dev/null +++ b/libclc/clc/include/clc/workitem/clc_get_enqueued_num_sub_groups.h @@ -0,0 +1,16 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_WORKITEM_CLC_GET_ENQUEUED_NUM_SUB_GROUPS_H__ +#define __CLC_WORKITEM_CLC_GET_ENQUEUED_NUM_SUB_GROUPS_H__ + +#include "clc/internal/clc.h" + +_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void); + +#endif // __CLC_WORKITEM_CLC_GET_ENQUEUED_NUM_SUB_GROUPS_H__ diff --git a/libclc/clc/include/clc/workitem/clc_get_max_sub_group_size.h b/libclc/clc/include/clc/workitem/clc_get_max_sub_group_size.h index a5c98aeba94b8..d5a3a13945e7b 100644 --- a/libclc/clc/include/clc/workitem/clc_get_max_sub_group_size.h +++ b/libclc/clc/include/clc/workitem/clc_get_max_sub_group_size.h @@ -11,6 +11,6 @@ #include "clc/internal/clc.h" -_CLC_OVERLOAD _CLC_CONST _CLC_DECL uint __clc_get_max_sub_group_size(void); +_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_max_sub_group_size(void); #endif // __CLC_WORKITEM_CLC_GET_MAX_SUB_GROUP_SIZE_H__ diff --git a/libclc/clc/include/clc/workitem/clc_get_num_sub_groups.h b/libclc/clc/include/clc/workitem/clc_get_num_sub_groups.h index b584df98e44a6..3d1da26e8a02a 100644 --- a/libclc/clc/include/clc/workitem/clc_get_num_sub_groups.h +++ b/libclc/clc/include/clc/workitem/clc_get_num_sub_groups.h @@ -11,6 +11,6 @@ #include "clc/internal/clc.h" -_CLC_OVERLOAD _CLC_CONST _CLC_DECL uint __clc_get_num_sub_groups(); +_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_num_sub_groups(void); #endif // __CLC_WORKITEM_CLC_GET_NUM_SUB_GROUPS_H__ diff --git a/libclc/clc/include/clc/workitem/clc_get_sub_group_id.h b/libclc/clc/include/clc/workitem/clc_get_sub_group_id.h index 44a4459aa48b4..b21e3c1d8df05 100644 --- a/libclc/clc/include/clc/workitem/clc_get_sub_group_id.h +++ b/libclc/clc/include/clc/workitem/clc_get_sub_group_id.h @@ -11,6 +11,6 @@ #include "clc/internal/clc.h" -_CLC_OVERLOAD _CLC_CONST _CLC_DECL uint __clc_get_sub_group_id(); +_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_id(void); #endif // __CLC_WORKITEM_CLC_GET_SUB_GROUP_ID_H__ diff --git a/libclc/clc/include/clc/workitem/clc_get_sub_group_local_id.h b/libclc/clc/include/clc/workitem/clc_get_sub_group_local_id.h index 52e4b3f28083a..0cf8890dd46b5 100644 --- a/libclc/clc/include/clc/workitem/clc_get_sub_group_local_id.h +++ b/libclc/clc/include/clc/workitem/clc_get_sub_group_local_id.h @@ -11,6 +11,6 @@ #include "clc/internal/clc.h" -_CLC_OVERLOAD _CLC_CONST _CLC_DECL uint __clc_get_sub_group_local_id(); +_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_local_id(void); #endif // __CLC_WORKITEM_CLC_GET_SUB_GROUP_LOCAL_ID_H__ diff --git a/libclc/clc/include/clc/workitem/clc_get_sub_group_size.h b/libclc/clc/include/clc/workitem/clc_get_sub_group_size.h index 4603bfdcbeb25..1dd857d16a2bf 100644 --- a/libclc/clc/include/clc/workitem/clc_get_sub_group_size.h +++ b/libclc/clc/include/clc/workitem/clc_get_sub_group_size.h @@ -11,6 +11,6 @@ #include "clc/internal/clc.h" -_CLC_OVERLOAD _CLC_CONST _CLC_DECL uint __clc_get_sub_group_size(); +_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_size(void); #endif // __CLC_WORKITEM_CLC_GET_SUB_GROUP_SIZE_H__ diff --git a/libclc/clc/lib/amdgpu/CMakeLists.txt b/libclc/clc/lib/amdgpu/CMakeLists.txt index a5cd47fab4462..69af2ebe525ad 100644 --- a/libclc/clc/lib/amdgpu/CMakeLists.txt +++ b/libclc/clc/lib/amdgpu/CMakeLists.txt @@ -35,6 +35,7 @@ libclc_configure_source_list(CLC_AMDGPU_SOURCES synchronization/clc_sub_group_barrier.cl synchronization/clc_work_group_barrier.cl workitem/clc_get_enqueued_local_size.cl + workitem/clc_get_enqueued_num_sub_groups.cl workitem/clc_get_global_offset.cl workitem/clc_get_global_size.cl workitem/clc_get_group_id.cl @@ -44,6 +45,7 @@ libclc_configure_source_list(CLC_AMDGPU_SOURCES workitem/clc_get_num_groups.cl workitem/clc_get_num_sub_groups.cl workitem/clc_get_sub_group_id.cl + workitem/clc_get_sub_group_local_id.cl workitem/clc_get_sub_group_size.cl workitem/clc_get_work_dim.cl) diff --git a/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl b/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl index 3ef735aac2aae..573866dee1fa0 100644 --- a/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl +++ b/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl @@ -14,6 +14,7 @@ #include "clc/subgroup/clc_sub_group_broadcast.h" #include "clc/subgroup/clc_sub_group_scan.h" #include "clc/subgroup/clc_subgroup.h" +#include "clc/workitem/clc_get_sub_group_local_id.h" #define QUAD_PERM (1 << 15) diff --git a/libclc/clc/lib/amdgpu/subgroup/clc_subgroup.cl b/libclc/clc/lib/amdgpu/subgroup/clc_subgroup.cl index 71f4abc42e895..eda7ca2aff394 100644 --- a/libclc/clc/lib/amdgpu/subgroup/clc_subgroup.cl +++ b/libclc/clc/lib/amdgpu/subgroup/clc_subgroup.cl @@ -9,16 +9,6 @@ #include "clc/amdgpu/amdgpu_utils.h" #include "clc/subgroup/clc_subgroup.h" -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void) { - return (__clc_amdgpu_enqueued_workgroup_size() + - __builtin_amdgcn_wavefrontsize() - 1) >> - __clc_amdgpu_wavesize_log2(); -} - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_local_id(void) { - return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); -} - _CLC_DEF _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_all(int x) { return __builtin_amdgcn_ballot_w64(x) == __builtin_amdgcn_read_exec(); } diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl b/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl new file mode 100644 index 0000000000000..bb702da96f0a1 --- /dev/null +++ b/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl @@ -0,0 +1,16 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/amdgpu/amdgpu_utils.h" +#include "clc/workitem/clc_get_sub_group_local_id.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void) { + return (__clc_amdgpu_enqueued_workgroup_size() + + __builtin_amdgcn_wavefrontsize() - 1) >> + __clc_amdgpu_wavesize_log2(); +} diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl b/libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl index cb71ef282466b..5dcd3a57b4a4c 100644 --- a/libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl +++ b/libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "clc/amdgpu/amdgpu_utils.h" -#include "clc/subgroup/clc_subgroup.h" +#include "clc/workitem/clc_get_num_sub_groups.h" _CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_num_sub_groups(void) { uint group_size = __clc_amdgpu_workgroup_size(); diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl new file mode 100644 index 0000000000000..2493cca0c365c --- /dev/null +++ b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_sub_group_local_id.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_local_id(void) { + return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); +} diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl index 77c9f8e91d8ee..7ee264f94b0d0 100644 --- a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl +++ b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl @@ -9,6 +9,7 @@ #include "clc/amdgpu/amdgpu_utils.h" #include "clc/shared/clc_min.h" #include "clc/workitem/clc_get_local_linear_id.h" +#include "clc/workitem/clc_get_sub_group_size.h" _CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_size(void) { uint wavesize = __builtin_amdgcn_wavefrontsize(); diff --git a/libclc/clc/lib/generic/collective/clc_work_group_any_all.cl b/libclc/clc/lib/generic/collective/clc_work_group_any_all.cl index 4c79ef1f73eba..33fe5f7bd7ddb 100644 --- a/libclc/clc/lib/generic/collective/clc_work_group_any_all.cl +++ b/libclc/clc/lib/generic/collective/clc_work_group_any_all.cl @@ -13,6 +13,9 @@ #include "clc/collective/clc_work_group_any_all.h" #include "clc/subgroup/clc_subgroup.h" #include "clc/synchronization/clc_work_group_barrier.h" +#include "clc/workitem/clc_get_num_sub_groups.h" +#include "clc/workitem/clc_get_sub_group_id.h" +#include "clc/workitem/clc_get_sub_group_local_id.h" #pragma OPENCL EXTENSION __cl_clang_function_scope_local_variables : enable diff --git a/libclc/clc/lib/generic/collective/clc_work_group_broadcast.cl b/libclc/clc/lib/generic/collective/clc_work_group_broadcast.cl index ebf2d2eb1710f..cdecc39725647 100644 --- a/libclc/clc/lib/generic/collective/clc_work_group_broadcast.cl +++ b/libclc/clc/lib/generic/collective/clc_work_group_broadcast.cl @@ -13,6 +13,7 @@ #include "clc/subgroup/clc_subgroup.h" #include "clc/synchronization/clc_work_group_barrier.h" #include "clc/workitem/clc_get_local_id.h" +#include "clc/workitem/clc_get_num_sub_groups.h" #pragma OPENCL EXTENSION __cl_clang_function_scope_local_variables : enable diff --git a/libclc/clc/lib/generic/collective/clc_work_group_scan.cl b/libclc/clc/lib/generic/collective/clc_work_group_scan.cl index ae333cd9b8cdf..a4d377c0be964 100644 --- a/libclc/clc/lib/generic/collective/clc_work_group_scan.cl +++ b/libclc/clc/lib/generic/collective/clc_work_group_scan.cl @@ -22,6 +22,7 @@ #include "clc/workitem/clc_get_num_sub_groups.h" #include "clc/workitem/clc_get_sub_group_id.h" #include "clc/workitem/clc_get_sub_group_local_id.h" +#include "clc/workitem/clc_get_sub_group_size.h" #pragma OPENCL EXTENSION __cl_clang_function_scope_local_variables : enable diff --git a/libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl b/libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl index 02391c52ca813..67b008c312f29 100644 --- a/libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl +++ b/libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl @@ -8,6 +8,7 @@ #include "clc/workitem/clc_get_local_linear_id.h" #include "clc/workitem/clc_get_max_sub_group_size.h" +#include "clc/workitem/clc_get_sub_group_id.h" _CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_id(void) { return __clc_get_local_linear_id() / __clc_get_max_sub_group_size(); diff --git a/libclc/opencl/lib/generic/CMakeLists.txt b/libclc/opencl/lib/generic/CMakeLists.txt index 4ad60248139ae..1d0d7ddd705e8 100644 --- a/libclc/opencl/lib/generic/CMakeLists.txt +++ b/libclc/opencl/lib/generic/CMakeLists.txt @@ -215,6 +215,7 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES synchronization/sub_group_barrier.cl synchronization/work_group_barrier.cl workitem/get_enqueued_local_size.cl + workitem/get_enqueued_num_sub_groups.cl workitem/get_global_id.cl workitem/get_global_linear_id.cl workitem/get_global_offset.cl @@ -223,7 +224,12 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES workitem/get_local_id.cl workitem/get_local_linear_id.cl workitem/get_local_size.cl + workitem/get_max_sub_group_size.cl workitem/get_num_groups.cl + workitem/get_num_sub_groups.cl + workitem/get_sub_group_id.cl + workitem/get_sub_group_local_id.cl + workitem/get_sub_group_size.cl workitem/get_work_dim.cl ) diff --git a/libclc/opencl/lib/generic/subgroup/subgroup.cl b/libclc/opencl/lib/generic/subgroup/subgroup.cl index fd552ada4afaf..dfe9867fd0801 100644 --- a/libclc/opencl/lib/generic/subgroup/subgroup.cl +++ b/libclc/opencl/lib/generic/subgroup/subgroup.cl @@ -8,30 +8,6 @@ #include "clc/subgroup/clc_subgroup.h" -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) { - return __clc_get_sub_group_size(); -} - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) { - return __clc_get_max_sub_group_size(); -} - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) { - return __clc_get_num_sub_groups(); -} - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) { - return __clc_get_enqueued_num_sub_groups(); -} - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) { - return __clc_get_sub_group_id(); -} - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) { - return __clc_get_sub_group_local_id(); -} - _CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_all(int x) { return __clc_sub_group_all(x); } diff --git a/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl b/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl new file mode 100644 index 0000000000000..fee3a588c2bbf --- /dev/null +++ b/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_enqueued_num_sub_groups.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) { + return __clc_get_enqueued_num_sub_groups(); +} diff --git a/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl b/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl new file mode 100644 index 0000000000000..bbd19a88a0165 --- /dev/null +++ b/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_max_sub_group_size.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) { + return __clc_get_max_sub_group_size(); +} diff --git a/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl b/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl new file mode 100644 index 0000000000000..77163234fe54d --- /dev/null +++ b/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_num_sub_groups.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) { + return __clc_get_num_sub_groups(); +} diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl b/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl new file mode 100644 index 0000000000000..a1ad6adb4e2cb --- /dev/null +++ b/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_sub_group_id.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) { + return __clc_get_sub_group_id(); +} diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl b/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl new file mode 100644 index 0000000000000..33164282165b3 --- /dev/null +++ b/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_sub_group_local_id.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) { + return __clc_get_sub_group_local_id(); +} diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl b/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl new file mode 100644 index 0000000000000..62f3382b6d7df --- /dev/null +++ b/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_sub_group_size.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) { + return __clc_get_sub_group_size(); +} >From 65357182f47a30c4d49e343409e969d9fa986175 Mon Sep 17 00:00:00 2001 From: Wenju He <[email protected]> Date: Mon, 30 Mar 2026 10:31:35 +0200 Subject: [PATCH 2/5] merge subgroup workitem functions into one file llvm-diff shows only nvptx64 libclc.bc changed. It is caused by different optimization opportunities and pass ordering after the subgroup helpers were merged into one .cl file. --- libclc/clc/lib/amdgpu/CMakeLists.txt | 9 +--- .../clc_get_enqueued_num_sub_groups.cl | 16 ------- .../workitem/clc_get_max_sub_group_size.cl | 16 ------- .../amdgpu/workitem/clc_get_num_sub_groups.cl | 16 ------- .../amdgpu/workitem/clc_get_sub_group_id.cl | 15 ------ .../workitem/clc_get_sub_group_local_id.cl | 13 ----- .../amdgpu/workitem/clc_get_sub_group_size.cl | 19 -------- .../amdgpu/workitem/clc_workitem_sub_group.cl | 48 +++++++++++++++++++ libclc/clc/lib/generic/CMakeLists.txt | 4 +- .../workitem/clc_get_num_sub_groups.cl | 18 ------- .../generic/workitem/clc_get_sub_group_id.cl | 15 ------ ...roup_size.cl => clc_workitem_sub_group.cl} | 12 +++++ libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt | 3 +- .../workitem/clc_get_max_sub_group_size.cl | 13 ----- .../workitem/clc_get_sub_group_local_id.cl | 13 ----- .../workitem/clc_workitem_sub_group.cl | 47 ++++++++++++++++++ libclc/opencl/lib/generic/CMakeLists.txt | 7 +-- .../workitem/get_enqueued_num_sub_groups.cl | 13 ----- .../workitem/get_max_sub_group_size.cl | 13 ----- .../generic/workitem/get_num_sub_groups.cl | 13 ----- .../lib/generic/workitem/get_sub_group_id.cl | 13 ----- .../workitem/get_sub_group_local_id.cl | 13 ----- .../generic/workitem/get_sub_group_size.cl | 13 ----- .../generic/workitem/workitem_sub_group.cl | 38 +++++++++++++++ 24 files changed, 150 insertions(+), 250 deletions(-) delete mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl delete mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_max_sub_group_size.cl delete mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl delete mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_id.cl delete mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl delete mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl create mode 100644 libclc/clc/lib/amdgpu/workitem/clc_workitem_sub_group.cl delete mode 100644 libclc/clc/lib/generic/workitem/clc_get_num_sub_groups.cl delete mode 100644 libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl rename libclc/clc/lib/generic/workitem/{clc_get_sub_group_size.cl => clc_workitem_sub_group.cl} (70%) delete mode 100644 libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_max_sub_group_size.cl delete mode 100644 libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_sub_group_local_id.cl create mode 100644 libclc/clc/lib/ptx-nvidiacl/workitem/clc_workitem_sub_group.cl delete mode 100644 libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl delete mode 100644 libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl delete mode 100644 libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl delete mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_id.cl delete mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl delete mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_size.cl create mode 100644 libclc/opencl/lib/generic/workitem/workitem_sub_group.cl diff --git a/libclc/clc/lib/amdgpu/CMakeLists.txt b/libclc/clc/lib/amdgpu/CMakeLists.txt index 69af2ebe525ad..a65308cc34898 100644 --- a/libclc/clc/lib/amdgpu/CMakeLists.txt +++ b/libclc/clc/lib/amdgpu/CMakeLists.txt @@ -35,19 +35,14 @@ libclc_configure_source_list(CLC_AMDGPU_SOURCES synchronization/clc_sub_group_barrier.cl synchronization/clc_work_group_barrier.cl workitem/clc_get_enqueued_local_size.cl - workitem/clc_get_enqueued_num_sub_groups.cl workitem/clc_get_global_offset.cl workitem/clc_get_global_size.cl workitem/clc_get_group_id.cl workitem/clc_get_local_id.cl workitem/clc_get_local_size.cl - workitem/clc_get_max_sub_group_size.cl workitem/clc_get_num_groups.cl - workitem/clc_get_num_sub_groups.cl - workitem/clc_get_sub_group_id.cl - workitem/clc_get_sub_group_local_id.cl - workitem/clc_get_sub_group_size.cl - workitem/clc_get_work_dim.cl) + workitem/clc_get_work_dim.cl + workitem/clc_workitem_sub_group.cl) libclc_configure_source_options(${CMAKE_CURRENT_SOURCE_DIR} -fapprox-func math/clc_native_exp.cl diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl b/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl deleted file mode 100644 index bb702da96f0a1..0000000000000 --- a/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl +++ /dev/null @@ -1,16 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/amdgpu/amdgpu_utils.h" -#include "clc/workitem/clc_get_sub_group_local_id.h" - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void) { - return (__clc_amdgpu_enqueued_workgroup_size() + - __builtin_amdgcn_wavefrontsize() - 1) >> - __clc_amdgpu_wavesize_log2(); -} diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_max_sub_group_size.cl b/libclc/clc/lib/amdgpu/workitem/clc_get_max_sub_group_size.cl deleted file mode 100644 index 5eb0166135663..0000000000000 --- a/libclc/clc/lib/amdgpu/workitem/clc_get_max_sub_group_size.cl +++ /dev/null @@ -1,16 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/amdgpu/amdgpu_utils.h" -#include "clc/shared/clc_min.h" -#include "clc/workitem/clc_get_max_sub_group_size.h" - -_CLC_OVERLOAD _CLC_DEF uint __clc_get_max_sub_group_size(void) { - return __clc_min(__builtin_amdgcn_wavefrontsize(), - __clc_amdgpu_enqueued_workgroup_size()); -} diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl b/libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl deleted file mode 100644 index 5dcd3a57b4a4c..0000000000000 --- a/libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl +++ /dev/null @@ -1,16 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/amdgpu/amdgpu_utils.h" -#include "clc/workitem/clc_get_num_sub_groups.h" - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_num_sub_groups(void) { - uint group_size = __clc_amdgpu_workgroup_size(); - return (group_size + __builtin_amdgcn_wavefrontsize() - 1) >> - __clc_amdgpu_wavesize_log2(); -} diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_id.cl b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_id.cl deleted file mode 100644 index ba3baf98bda14..0000000000000 --- a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_id.cl +++ /dev/null @@ -1,15 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/amdgpu/amdgpu_utils.h" -#include "clc/workitem/clc_get_local_linear_id.h" -#include "clc/workitem/clc_get_sub_group_id.h" - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_id(void) { - return (uint)__clc_get_local_linear_id() >> __clc_amdgpu_wavesize_log2(); -} diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl deleted file mode 100644 index 2493cca0c365c..0000000000000 --- a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl +++ /dev/null @@ -1,13 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/workitem/clc_get_sub_group_local_id.h" - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_local_id(void) { - return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); -} diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl deleted file mode 100644 index 7ee264f94b0d0..0000000000000 --- a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl +++ /dev/null @@ -1,19 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/amdgpu/amdgpu_utils.h" -#include "clc/shared/clc_min.h" -#include "clc/workitem/clc_get_local_linear_id.h" -#include "clc/workitem/clc_get_sub_group_size.h" - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_size(void) { - uint wavesize = __builtin_amdgcn_wavefrontsize(); - uint lid = (uint)__clc_get_local_linear_id(); - return __clc_min(wavesize, - __clc_amdgpu_workgroup_size() - (lid & ~(wavesize - 1))); -} diff --git a/libclc/clc/lib/amdgpu/workitem/clc_workitem_sub_group.cl b/libclc/clc/lib/amdgpu/workitem/clc_workitem_sub_group.cl new file mode 100644 index 0000000000000..e78e955ab5f56 --- /dev/null +++ b/libclc/clc/lib/amdgpu/workitem/clc_workitem_sub_group.cl @@ -0,0 +1,48 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/amdgpu/amdgpu_utils.h" +#include "clc/shared/clc_min.h" +#include "clc/workitem/clc_get_local_linear_id.h" +#include "clc/workitem/clc_get_max_sub_group_size.h" +#include "clc/workitem/clc_get_num_sub_groups.h" +#include "clc/workitem/clc_get_sub_group_id.h" +#include "clc/workitem/clc_get_sub_group_local_id.h" +#include "clc/workitem/clc_get_sub_group_size.h" + +_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void) { + return (__clc_amdgpu_enqueued_workgroup_size() + + __builtin_amdgcn_wavefrontsize() - 1) >> + __clc_amdgpu_wavesize_log2(); +} + +_CLC_OVERLOAD _CLC_DEF uint __clc_get_max_sub_group_size(void) { + return __clc_min(__builtin_amdgcn_wavefrontsize(), + __clc_amdgpu_enqueued_workgroup_size()); +} + +_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint __clc_get_num_sub_groups(void) { + uint group_size = __clc_amdgpu_workgroup_size(); + return (group_size + __builtin_amdgcn_wavefrontsize() - 1) >> + __clc_amdgpu_wavesize_log2(); +} + +_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint __clc_get_sub_group_id(void) { + return (uint)__clc_get_local_linear_id() >> __clc_amdgpu_wavesize_log2(); +} + +_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint __clc_get_sub_group_local_id(void) { + return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); +} + +_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint __clc_get_sub_group_size(void) { + uint wavesize = __builtin_amdgcn_wavefrontsize(); + uint lid = (uint)__clc_get_local_linear_id(); + return __clc_min(wavesize, + __clc_amdgpu_workgroup_size() - (lid & ~(wavesize - 1))); +} diff --git a/libclc/clc/lib/generic/CMakeLists.txt b/libclc/clc/lib/generic/CMakeLists.txt index 168a0f1ff1e84..0f2f46ccdf3c6 100644 --- a/libclc/clc/lib/generic/CMakeLists.txt +++ b/libclc/clc/lib/generic/CMakeLists.txt @@ -204,9 +204,7 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES workitem/clc_get_global_id.cl workitem/clc_get_global_linear_id.cl workitem/clc_get_local_linear_id.cl - workitem/clc_get_num_sub_groups.cl - workitem/clc_get_sub_group_id.cl - workitem/clc_get_sub_group_size.cl + workitem/clc_workitem_sub_group.cl ) libclc_configure_source_options(${CMAKE_CURRENT_SOURCE_DIR} -fapprox-func diff --git a/libclc/clc/lib/generic/workitem/clc_get_num_sub_groups.cl b/libclc/clc/lib/generic/workitem/clc_get_num_sub_groups.cl deleted file mode 100644 index 7d6d922d52bc4..0000000000000 --- a/libclc/clc/lib/generic/workitem/clc_get_num_sub_groups.cl +++ /dev/null @@ -1,18 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/workitem/clc_get_local_size.h" -#include "clc/workitem/clc_get_max_sub_group_size.h" -#include "clc/workitem/clc_get_num_sub_groups.h" - -_CLC_OVERLOAD _CLC_DEF uint __clc_get_num_sub_groups() { - size_t linear_size = __clc_get_local_size(0) * __clc_get_local_size(1) * - __clc_get_local_size(2); - uint sg_size = __clc_get_max_sub_group_size(); - return (uint)((linear_size + sg_size - 1) / sg_size); -} diff --git a/libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl b/libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl deleted file mode 100644 index 67b008c312f29..0000000000000 --- a/libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl +++ /dev/null @@ -1,15 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/workitem/clc_get_local_linear_id.h" -#include "clc/workitem/clc_get_max_sub_group_size.h" -#include "clc/workitem/clc_get_sub_group_id.h" - -_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_id(void) { - return __clc_get_local_linear_id() / __clc_get_max_sub_group_size(); -} diff --git a/libclc/clc/lib/generic/workitem/clc_get_sub_group_size.cl b/libclc/clc/lib/generic/workitem/clc_workitem_sub_group.cl similarity index 70% rename from libclc/clc/lib/generic/workitem/clc_get_sub_group_size.cl rename to libclc/clc/lib/generic/workitem/clc_workitem_sub_group.cl index 7944486aac0f0..c9638c59e2877 100644 --- a/libclc/clc/lib/generic/workitem/clc_get_sub_group_size.cl +++ b/libclc/clc/lib/generic/workitem/clc_workitem_sub_group.cl @@ -6,12 +6,24 @@ // //===----------------------------------------------------------------------===// +#include "clc/workitem/clc_get_local_linear_id.h" #include "clc/workitem/clc_get_local_size.h" #include "clc/workitem/clc_get_max_sub_group_size.h" #include "clc/workitem/clc_get_num_sub_groups.h" #include "clc/workitem/clc_get_sub_group_id.h" #include "clc/workitem/clc_get_sub_group_size.h" +_CLC_OVERLOAD _CLC_DEF uint __clc_get_num_sub_groups() { + size_t linear_size = __clc_get_local_size(0) * __clc_get_local_size(1) * + __clc_get_local_size(2); + uint sg_size = __clc_get_max_sub_group_size(); + return (uint)((linear_size + sg_size - 1) / sg_size); +} + +_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_id(void) { + return __clc_get_local_linear_id() / __clc_get_max_sub_group_size(); +} + _CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_size() { if (__clc_get_sub_group_id() != __clc_get_num_sub_groups() - 1) { return __clc_get_max_sub_group_size(); diff --git a/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt b/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt index 6eb0baab1c0bb..6a5860027f0db 100644 --- a/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt +++ b/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt @@ -11,7 +11,6 @@ libclc_configure_source_list(CLC_PTX_NVIDIACL_SOURCES workitem/clc_get_group_id.cl workitem/clc_get_local_id.cl workitem/clc_get_local_size.cl - workitem/clc_get_max_sub_group_size.cl workitem/clc_get_num_groups.cl - workitem/clc_get_sub_group_local_id.cl + workitem/clc_workitem_sub_group.cl ) diff --git a/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_max_sub_group_size.cl b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_max_sub_group_size.cl deleted file mode 100644 index 9a380c2fc4b8f..0000000000000 --- a/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_max_sub_group_size.cl +++ /dev/null @@ -1,13 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/workitem/clc_get_max_sub_group_size.h" - -_CLC_OVERLOAD _CLC_DEF uint __clc_get_max_sub_group_size() { - return __nvvm_read_ptx_sreg_warpsize(); -} diff --git a/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_sub_group_local_id.cl b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_sub_group_local_id.cl deleted file mode 100644 index 7e61e09bff1e3..0000000000000 --- a/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_sub_group_local_id.cl +++ /dev/null @@ -1,13 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/workitem/clc_get_sub_group_local_id.h" - -_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_local_id() { - return __nvvm_read_ptx_sreg_laneid(); -} diff --git a/libclc/clc/lib/ptx-nvidiacl/workitem/clc_workitem_sub_group.cl b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_workitem_sub_group.cl new file mode 100644 index 0000000000000..bdc09c9f61714 --- /dev/null +++ b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_workitem_sub_group.cl @@ -0,0 +1,47 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_local_linear_id.h" +#include "clc/workitem/clc_get_local_size.h" +#include "clc/workitem/clc_get_max_sub_group_size.h" +#include "clc/workitem/clc_get_num_sub_groups.h" +#include "clc/workitem/clc_get_sub_group_id.h" +#include "clc/workitem/clc_get_sub_group_local_id.h" +#include "clc/workitem/clc_get_sub_group_size.h" + +_CLC_OVERLOAD _CLC_DEF uint __clc_get_max_sub_group_size() { + return __nvvm_read_ptx_sreg_warpsize(); +} + +_CLC_OVERLOAD _CLC_DEF uint __clc_get_num_sub_groups() { + size_t linear_size = __clc_get_local_size(0) * __clc_get_local_size(1) * + __clc_get_local_size(2); + uint sg_size = __clc_get_max_sub_group_size(); + return (uint)((linear_size + sg_size - 1) / sg_size); +} + +_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_id(void) { + return __clc_get_local_linear_id() / __clc_get_max_sub_group_size(); +} + +_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_local_id() { + return __nvvm_read_ptx_sreg_laneid(); +} + +_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_size() { + if (__clc_get_sub_group_id() != __clc_get_num_sub_groups() - 1) { + return __clc_get_max_sub_group_size(); + } + size_t size_x = __clc_get_local_size(0); + size_t size_y = __clc_get_local_size(1); + size_t size_z = __clc_get_local_size(2); + size_t linear_size = size_z * size_y * size_x; + size_t uniform_groups = __clc_get_num_sub_groups() - 1; + size_t uniform_size = __clc_get_max_sub_group_size() * uniform_groups; + return linear_size - uniform_size; +} diff --git a/libclc/opencl/lib/generic/CMakeLists.txt b/libclc/opencl/lib/generic/CMakeLists.txt index 1d0d7ddd705e8..6877b937e20c5 100644 --- a/libclc/opencl/lib/generic/CMakeLists.txt +++ b/libclc/opencl/lib/generic/CMakeLists.txt @@ -215,7 +215,6 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES synchronization/sub_group_barrier.cl synchronization/work_group_barrier.cl workitem/get_enqueued_local_size.cl - workitem/get_enqueued_num_sub_groups.cl workitem/get_global_id.cl workitem/get_global_linear_id.cl workitem/get_global_offset.cl @@ -224,13 +223,9 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES workitem/get_local_id.cl workitem/get_local_linear_id.cl workitem/get_local_size.cl - workitem/get_max_sub_group_size.cl workitem/get_num_groups.cl - workitem/get_num_sub_groups.cl - workitem/get_sub_group_id.cl - workitem/get_sub_group_local_id.cl - workitem/get_sub_group_size.cl workitem/get_work_dim.cl + workitem/workitem_sub_group.cl ) libclc_configure_source_options(${CMAKE_CURRENT_SOURCE_DIR} -fapprox-func diff --git a/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl b/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl deleted file mode 100644 index fee3a588c2bbf..0000000000000 --- a/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl +++ /dev/null @@ -1,13 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/workitem/clc_get_enqueued_num_sub_groups.h" - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) { - return __clc_get_enqueued_num_sub_groups(); -} diff --git a/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl b/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl deleted file mode 100644 index bbd19a88a0165..0000000000000 --- a/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl +++ /dev/null @@ -1,13 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/workitem/clc_get_max_sub_group_size.h" - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) { - return __clc_get_max_sub_group_size(); -} diff --git a/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl b/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl deleted file mode 100644 index 77163234fe54d..0000000000000 --- a/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl +++ /dev/null @@ -1,13 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/workitem/clc_get_num_sub_groups.h" - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) { - return __clc_get_num_sub_groups(); -} diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl b/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl deleted file mode 100644 index a1ad6adb4e2cb..0000000000000 --- a/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl +++ /dev/null @@ -1,13 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/workitem/clc_get_sub_group_id.h" - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) { - return __clc_get_sub_group_id(); -} diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl b/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl deleted file mode 100644 index 33164282165b3..0000000000000 --- a/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl +++ /dev/null @@ -1,13 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/workitem/clc_get_sub_group_local_id.h" - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) { - return __clc_get_sub_group_local_id(); -} diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl b/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl deleted file mode 100644 index 62f3382b6d7df..0000000000000 --- a/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl +++ /dev/null @@ -1,13 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/workitem/clc_get_sub_group_size.h" - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) { - return __clc_get_sub_group_size(); -} diff --git a/libclc/opencl/lib/generic/workitem/workitem_sub_group.cl b/libclc/opencl/lib/generic/workitem/workitem_sub_group.cl new file mode 100644 index 0000000000000..970c35f945458 --- /dev/null +++ b/libclc/opencl/lib/generic/workitem/workitem_sub_group.cl @@ -0,0 +1,38 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_enqueued_num_sub_groups.h" +#include "clc/workitem/clc_get_max_sub_group_size.h" +#include "clc/workitem/clc_get_num_sub_groups.h" +#include "clc/workitem/clc_get_sub_group_id.h" +#include "clc/workitem/clc_get_sub_group_local_id.h" +#include "clc/workitem/clc_get_sub_group_size.h" + +_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_enqueued_num_sub_groups(void) { + return __clc_get_enqueued_num_sub_groups(); +} + +_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_max_sub_group_size(void) { + return __clc_get_max_sub_group_size(); +} + +_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_num_sub_groups(void) { + return __clc_get_num_sub_groups(); +} + +_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_sub_group_id(void) { + return __clc_get_sub_group_id(); +} + +_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_sub_group_local_id(void) { + return __clc_get_sub_group_local_id(); +} + +_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_sub_group_size(void) { + return __clc_get_sub_group_size(); +} >From 1d5d491d5c47198c215126ffef254f709eae8ab2 Mon Sep 17 00:00:00 2001 From: Wenju He <[email protected]> Date: Mon, 30 Mar 2026 11:28:32 +0200 Subject: [PATCH 3/5] Revert "merge subgroup workitem functions into one file" This reverts commit 65357182f47a30c4d49e343409e969d9fa986175. --- libclc/clc/lib/amdgpu/CMakeLists.txt | 9 +++- .../clc_get_enqueued_num_sub_groups.cl | 16 +++++++ .../workitem/clc_get_max_sub_group_size.cl | 16 +++++++ .../amdgpu/workitem/clc_get_num_sub_groups.cl | 16 +++++++ .../amdgpu/workitem/clc_get_sub_group_id.cl | 15 ++++++ .../workitem/clc_get_sub_group_local_id.cl | 13 +++++ .../amdgpu/workitem/clc_get_sub_group_size.cl | 19 ++++++++ .../amdgpu/workitem/clc_workitem_sub_group.cl | 48 ------------------- libclc/clc/lib/generic/CMakeLists.txt | 4 +- .../workitem/clc_get_num_sub_groups.cl | 18 +++++++ .../generic/workitem/clc_get_sub_group_id.cl | 15 ++++++ ...sub_group.cl => clc_get_sub_group_size.cl} | 12 ----- libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt | 3 +- .../workitem/clc_get_max_sub_group_size.cl | 13 +++++ .../workitem/clc_get_sub_group_local_id.cl | 13 +++++ .../workitem/clc_workitem_sub_group.cl | 47 ------------------ libclc/opencl/lib/generic/CMakeLists.txt | 7 ++- .../workitem/get_enqueued_num_sub_groups.cl | 13 +++++ .../workitem/get_max_sub_group_size.cl | 13 +++++ .../generic/workitem/get_num_sub_groups.cl | 13 +++++ .../lib/generic/workitem/get_sub_group_id.cl | 13 +++++ .../workitem/get_sub_group_local_id.cl | 13 +++++ .../generic/workitem/get_sub_group_size.cl | 13 +++++ .../generic/workitem/workitem_sub_group.cl | 38 --------------- 24 files changed, 250 insertions(+), 150 deletions(-) create mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl create mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_max_sub_group_size.cl create mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl create mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_id.cl create mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl create mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl delete mode 100644 libclc/clc/lib/amdgpu/workitem/clc_workitem_sub_group.cl create mode 100644 libclc/clc/lib/generic/workitem/clc_get_num_sub_groups.cl create mode 100644 libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl rename libclc/clc/lib/generic/workitem/{clc_workitem_sub_group.cl => clc_get_sub_group_size.cl} (70%) create mode 100644 libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_max_sub_group_size.cl create mode 100644 libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_sub_group_local_id.cl delete mode 100644 libclc/clc/lib/ptx-nvidiacl/workitem/clc_workitem_sub_group.cl create mode 100644 libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl create mode 100644 libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl create mode 100644 libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl create mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_id.cl create mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl create mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_size.cl delete mode 100644 libclc/opencl/lib/generic/workitem/workitem_sub_group.cl diff --git a/libclc/clc/lib/amdgpu/CMakeLists.txt b/libclc/clc/lib/amdgpu/CMakeLists.txt index a65308cc34898..69af2ebe525ad 100644 --- a/libclc/clc/lib/amdgpu/CMakeLists.txt +++ b/libclc/clc/lib/amdgpu/CMakeLists.txt @@ -35,14 +35,19 @@ libclc_configure_source_list(CLC_AMDGPU_SOURCES synchronization/clc_sub_group_barrier.cl synchronization/clc_work_group_barrier.cl workitem/clc_get_enqueued_local_size.cl + workitem/clc_get_enqueued_num_sub_groups.cl workitem/clc_get_global_offset.cl workitem/clc_get_global_size.cl workitem/clc_get_group_id.cl workitem/clc_get_local_id.cl workitem/clc_get_local_size.cl + workitem/clc_get_max_sub_group_size.cl workitem/clc_get_num_groups.cl - workitem/clc_get_work_dim.cl - workitem/clc_workitem_sub_group.cl) + workitem/clc_get_num_sub_groups.cl + workitem/clc_get_sub_group_id.cl + workitem/clc_get_sub_group_local_id.cl + workitem/clc_get_sub_group_size.cl + workitem/clc_get_work_dim.cl) libclc_configure_source_options(${CMAKE_CURRENT_SOURCE_DIR} -fapprox-func math/clc_native_exp.cl diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl b/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl new file mode 100644 index 0000000000000..bb702da96f0a1 --- /dev/null +++ b/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl @@ -0,0 +1,16 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/amdgpu/amdgpu_utils.h" +#include "clc/workitem/clc_get_sub_group_local_id.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void) { + return (__clc_amdgpu_enqueued_workgroup_size() + + __builtin_amdgcn_wavefrontsize() - 1) >> + __clc_amdgpu_wavesize_log2(); +} diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_max_sub_group_size.cl b/libclc/clc/lib/amdgpu/workitem/clc_get_max_sub_group_size.cl new file mode 100644 index 0000000000000..5eb0166135663 --- /dev/null +++ b/libclc/clc/lib/amdgpu/workitem/clc_get_max_sub_group_size.cl @@ -0,0 +1,16 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/amdgpu/amdgpu_utils.h" +#include "clc/shared/clc_min.h" +#include "clc/workitem/clc_get_max_sub_group_size.h" + +_CLC_OVERLOAD _CLC_DEF uint __clc_get_max_sub_group_size(void) { + return __clc_min(__builtin_amdgcn_wavefrontsize(), + __clc_amdgpu_enqueued_workgroup_size()); +} diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl b/libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl new file mode 100644 index 0000000000000..5dcd3a57b4a4c --- /dev/null +++ b/libclc/clc/lib/amdgpu/workitem/clc_get_num_sub_groups.cl @@ -0,0 +1,16 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/amdgpu/amdgpu_utils.h" +#include "clc/workitem/clc_get_num_sub_groups.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_num_sub_groups(void) { + uint group_size = __clc_amdgpu_workgroup_size(); + return (group_size + __builtin_amdgcn_wavefrontsize() - 1) >> + __clc_amdgpu_wavesize_log2(); +} diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_id.cl b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_id.cl new file mode 100644 index 0000000000000..ba3baf98bda14 --- /dev/null +++ b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_id.cl @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/amdgpu/amdgpu_utils.h" +#include "clc/workitem/clc_get_local_linear_id.h" +#include "clc/workitem/clc_get_sub_group_id.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_id(void) { + return (uint)__clc_get_local_linear_id() >> __clc_amdgpu_wavesize_log2(); +} diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl new file mode 100644 index 0000000000000..2493cca0c365c --- /dev/null +++ b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_sub_group_local_id.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_local_id(void) { + return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); +} diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl new file mode 100644 index 0000000000000..7ee264f94b0d0 --- /dev/null +++ b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_size.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/amdgpu/amdgpu_utils.h" +#include "clc/shared/clc_min.h" +#include "clc/workitem/clc_get_local_linear_id.h" +#include "clc/workitem/clc_get_sub_group_size.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_size(void) { + uint wavesize = __builtin_amdgcn_wavefrontsize(); + uint lid = (uint)__clc_get_local_linear_id(); + return __clc_min(wavesize, + __clc_amdgpu_workgroup_size() - (lid & ~(wavesize - 1))); +} diff --git a/libclc/clc/lib/amdgpu/workitem/clc_workitem_sub_group.cl b/libclc/clc/lib/amdgpu/workitem/clc_workitem_sub_group.cl deleted file mode 100644 index e78e955ab5f56..0000000000000 --- a/libclc/clc/lib/amdgpu/workitem/clc_workitem_sub_group.cl +++ /dev/null @@ -1,48 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/amdgpu/amdgpu_utils.h" -#include "clc/shared/clc_min.h" -#include "clc/workitem/clc_get_local_linear_id.h" -#include "clc/workitem/clc_get_max_sub_group_size.h" -#include "clc/workitem/clc_get_num_sub_groups.h" -#include "clc/workitem/clc_get_sub_group_id.h" -#include "clc/workitem/clc_get_sub_group_local_id.h" -#include "clc/workitem/clc_get_sub_group_size.h" - -_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void) { - return (__clc_amdgpu_enqueued_workgroup_size() + - __builtin_amdgcn_wavefrontsize() - 1) >> - __clc_amdgpu_wavesize_log2(); -} - -_CLC_OVERLOAD _CLC_DEF uint __clc_get_max_sub_group_size(void) { - return __clc_min(__builtin_amdgcn_wavefrontsize(), - __clc_amdgpu_enqueued_workgroup_size()); -} - -_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint __clc_get_num_sub_groups(void) { - uint group_size = __clc_amdgpu_workgroup_size(); - return (group_size + __builtin_amdgcn_wavefrontsize() - 1) >> - __clc_amdgpu_wavesize_log2(); -} - -_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint __clc_get_sub_group_id(void) { - return (uint)__clc_get_local_linear_id() >> __clc_amdgpu_wavesize_log2(); -} - -_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint __clc_get_sub_group_local_id(void) { - return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); -} - -_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint __clc_get_sub_group_size(void) { - uint wavesize = __builtin_amdgcn_wavefrontsize(); - uint lid = (uint)__clc_get_local_linear_id(); - return __clc_min(wavesize, - __clc_amdgpu_workgroup_size() - (lid & ~(wavesize - 1))); -} diff --git a/libclc/clc/lib/generic/CMakeLists.txt b/libclc/clc/lib/generic/CMakeLists.txt index 0f2f46ccdf3c6..168a0f1ff1e84 100644 --- a/libclc/clc/lib/generic/CMakeLists.txt +++ b/libclc/clc/lib/generic/CMakeLists.txt @@ -204,7 +204,9 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES workitem/clc_get_global_id.cl workitem/clc_get_global_linear_id.cl workitem/clc_get_local_linear_id.cl - workitem/clc_workitem_sub_group.cl + workitem/clc_get_num_sub_groups.cl + workitem/clc_get_sub_group_id.cl + workitem/clc_get_sub_group_size.cl ) libclc_configure_source_options(${CMAKE_CURRENT_SOURCE_DIR} -fapprox-func diff --git a/libclc/clc/lib/generic/workitem/clc_get_num_sub_groups.cl b/libclc/clc/lib/generic/workitem/clc_get_num_sub_groups.cl new file mode 100644 index 0000000000000..7d6d922d52bc4 --- /dev/null +++ b/libclc/clc/lib/generic/workitem/clc_get_num_sub_groups.cl @@ -0,0 +1,18 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_local_size.h" +#include "clc/workitem/clc_get_max_sub_group_size.h" +#include "clc/workitem/clc_get_num_sub_groups.h" + +_CLC_OVERLOAD _CLC_DEF uint __clc_get_num_sub_groups() { + size_t linear_size = __clc_get_local_size(0) * __clc_get_local_size(1) * + __clc_get_local_size(2); + uint sg_size = __clc_get_max_sub_group_size(); + return (uint)((linear_size + sg_size - 1) / sg_size); +} diff --git a/libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl b/libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl new file mode 100644 index 0000000000000..67b008c312f29 --- /dev/null +++ b/libclc/clc/lib/generic/workitem/clc_get_sub_group_id.cl @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_local_linear_id.h" +#include "clc/workitem/clc_get_max_sub_group_size.h" +#include "clc/workitem/clc_get_sub_group_id.h" + +_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_id(void) { + return __clc_get_local_linear_id() / __clc_get_max_sub_group_size(); +} diff --git a/libclc/clc/lib/generic/workitem/clc_workitem_sub_group.cl b/libclc/clc/lib/generic/workitem/clc_get_sub_group_size.cl similarity index 70% rename from libclc/clc/lib/generic/workitem/clc_workitem_sub_group.cl rename to libclc/clc/lib/generic/workitem/clc_get_sub_group_size.cl index c9638c59e2877..7944486aac0f0 100644 --- a/libclc/clc/lib/generic/workitem/clc_workitem_sub_group.cl +++ b/libclc/clc/lib/generic/workitem/clc_get_sub_group_size.cl @@ -6,24 +6,12 @@ // //===----------------------------------------------------------------------===// -#include "clc/workitem/clc_get_local_linear_id.h" #include "clc/workitem/clc_get_local_size.h" #include "clc/workitem/clc_get_max_sub_group_size.h" #include "clc/workitem/clc_get_num_sub_groups.h" #include "clc/workitem/clc_get_sub_group_id.h" #include "clc/workitem/clc_get_sub_group_size.h" -_CLC_OVERLOAD _CLC_DEF uint __clc_get_num_sub_groups() { - size_t linear_size = __clc_get_local_size(0) * __clc_get_local_size(1) * - __clc_get_local_size(2); - uint sg_size = __clc_get_max_sub_group_size(); - return (uint)((linear_size + sg_size - 1) / sg_size); -} - -_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_id(void) { - return __clc_get_local_linear_id() / __clc_get_max_sub_group_size(); -} - _CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_size() { if (__clc_get_sub_group_id() != __clc_get_num_sub_groups() - 1) { return __clc_get_max_sub_group_size(); diff --git a/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt b/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt index 6a5860027f0db..6eb0baab1c0bb 100644 --- a/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt +++ b/libclc/clc/lib/ptx-nvidiacl/CMakeLists.txt @@ -11,6 +11,7 @@ libclc_configure_source_list(CLC_PTX_NVIDIACL_SOURCES workitem/clc_get_group_id.cl workitem/clc_get_local_id.cl workitem/clc_get_local_size.cl + workitem/clc_get_max_sub_group_size.cl workitem/clc_get_num_groups.cl - workitem/clc_workitem_sub_group.cl + workitem/clc_get_sub_group_local_id.cl ) diff --git a/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_max_sub_group_size.cl b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_max_sub_group_size.cl new file mode 100644 index 0000000000000..9a380c2fc4b8f --- /dev/null +++ b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_max_sub_group_size.cl @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_max_sub_group_size.h" + +_CLC_OVERLOAD _CLC_DEF uint __clc_get_max_sub_group_size() { + return __nvvm_read_ptx_sreg_warpsize(); +} diff --git a/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_sub_group_local_id.cl b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_sub_group_local_id.cl new file mode 100644 index 0000000000000..7e61e09bff1e3 --- /dev/null +++ b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_sub_group_local_id.cl @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_sub_group_local_id.h" + +_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_local_id() { + return __nvvm_read_ptx_sreg_laneid(); +} diff --git a/libclc/clc/lib/ptx-nvidiacl/workitem/clc_workitem_sub_group.cl b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_workitem_sub_group.cl deleted file mode 100644 index bdc09c9f61714..0000000000000 --- a/libclc/clc/lib/ptx-nvidiacl/workitem/clc_workitem_sub_group.cl +++ /dev/null @@ -1,47 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/workitem/clc_get_local_linear_id.h" -#include "clc/workitem/clc_get_local_size.h" -#include "clc/workitem/clc_get_max_sub_group_size.h" -#include "clc/workitem/clc_get_num_sub_groups.h" -#include "clc/workitem/clc_get_sub_group_id.h" -#include "clc/workitem/clc_get_sub_group_local_id.h" -#include "clc/workitem/clc_get_sub_group_size.h" - -_CLC_OVERLOAD _CLC_DEF uint __clc_get_max_sub_group_size() { - return __nvvm_read_ptx_sreg_warpsize(); -} - -_CLC_OVERLOAD _CLC_DEF uint __clc_get_num_sub_groups() { - size_t linear_size = __clc_get_local_size(0) * __clc_get_local_size(1) * - __clc_get_local_size(2); - uint sg_size = __clc_get_max_sub_group_size(); - return (uint)((linear_size + sg_size - 1) / sg_size); -} - -_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_id(void) { - return __clc_get_local_linear_id() / __clc_get_max_sub_group_size(); -} - -_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_local_id() { - return __nvvm_read_ptx_sreg_laneid(); -} - -_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_size() { - if (__clc_get_sub_group_id() != __clc_get_num_sub_groups() - 1) { - return __clc_get_max_sub_group_size(); - } - size_t size_x = __clc_get_local_size(0); - size_t size_y = __clc_get_local_size(1); - size_t size_z = __clc_get_local_size(2); - size_t linear_size = size_z * size_y * size_x; - size_t uniform_groups = __clc_get_num_sub_groups() - 1; - size_t uniform_size = __clc_get_max_sub_group_size() * uniform_groups; - return linear_size - uniform_size; -} diff --git a/libclc/opencl/lib/generic/CMakeLists.txt b/libclc/opencl/lib/generic/CMakeLists.txt index 6877b937e20c5..1d0d7ddd705e8 100644 --- a/libclc/opencl/lib/generic/CMakeLists.txt +++ b/libclc/opencl/lib/generic/CMakeLists.txt @@ -215,6 +215,7 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES synchronization/sub_group_barrier.cl synchronization/work_group_barrier.cl workitem/get_enqueued_local_size.cl + workitem/get_enqueued_num_sub_groups.cl workitem/get_global_id.cl workitem/get_global_linear_id.cl workitem/get_global_offset.cl @@ -223,9 +224,13 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES workitem/get_local_id.cl workitem/get_local_linear_id.cl workitem/get_local_size.cl + workitem/get_max_sub_group_size.cl workitem/get_num_groups.cl + workitem/get_num_sub_groups.cl + workitem/get_sub_group_id.cl + workitem/get_sub_group_local_id.cl + workitem/get_sub_group_size.cl workitem/get_work_dim.cl - workitem/workitem_sub_group.cl ) libclc_configure_source_options(${CMAKE_CURRENT_SOURCE_DIR} -fapprox-func diff --git a/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl b/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl new file mode 100644 index 0000000000000..fee3a588c2bbf --- /dev/null +++ b/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_enqueued_num_sub_groups.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) { + return __clc_get_enqueued_num_sub_groups(); +} diff --git a/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl b/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl new file mode 100644 index 0000000000000..bbd19a88a0165 --- /dev/null +++ b/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_max_sub_group_size.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) { + return __clc_get_max_sub_group_size(); +} diff --git a/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl b/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl new file mode 100644 index 0000000000000..77163234fe54d --- /dev/null +++ b/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_num_sub_groups.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) { + return __clc_get_num_sub_groups(); +} diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl b/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl new file mode 100644 index 0000000000000..a1ad6adb4e2cb --- /dev/null +++ b/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_sub_group_id.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) { + return __clc_get_sub_group_id(); +} diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl b/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl new file mode 100644 index 0000000000000..33164282165b3 --- /dev/null +++ b/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_sub_group_local_id.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) { + return __clc_get_sub_group_local_id(); +} diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl b/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl new file mode 100644 index 0000000000000..62f3382b6d7df --- /dev/null +++ b/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl @@ -0,0 +1,13 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_sub_group_size.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) { + return __clc_get_sub_group_size(); +} diff --git a/libclc/opencl/lib/generic/workitem/workitem_sub_group.cl b/libclc/opencl/lib/generic/workitem/workitem_sub_group.cl deleted file mode 100644 index 970c35f945458..0000000000000 --- a/libclc/opencl/lib/generic/workitem/workitem_sub_group.cl +++ /dev/null @@ -1,38 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/workitem/clc_get_enqueued_num_sub_groups.h" -#include "clc/workitem/clc_get_max_sub_group_size.h" -#include "clc/workitem/clc_get_num_sub_groups.h" -#include "clc/workitem/clc_get_sub_group_id.h" -#include "clc/workitem/clc_get_sub_group_local_id.h" -#include "clc/workitem/clc_get_sub_group_size.h" - -_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_enqueued_num_sub_groups(void) { - return __clc_get_enqueued_num_sub_groups(); -} - -_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_max_sub_group_size(void) { - return __clc_get_max_sub_group_size(); -} - -_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_num_sub_groups(void) { - return __clc_get_num_sub_groups(); -} - -_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_sub_group_id(void) { - return __clc_get_sub_group_id(); -} - -_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_sub_group_local_id(void) { - return __clc_get_sub_group_local_id(); -} - -_CLC_OVERLOAD _CLC_DEF _CLC_CONST uint get_sub_group_size(void) { - return __clc_get_sub_group_size(); -} >From 2dd30336fc85c9993c817ae84dd14f1d9a7f44db Mon Sep 17 00:00:00 2001 From: Wenju He <[email protected]> Date: Mon, 30 Mar 2026 11:34:53 +0200 Subject: [PATCH 4/5] partially Revert "[libclc][NFC] De-duplicate subgroup workitem function decls and reorganize" Only keep header changes --- libclc/clc/lib/amdgpu/CMakeLists.txt | 2 -- .../clc/lib/amdgpu/subgroup/clc_subgroup.cl | 10 +++++++ .../clc_get_enqueued_num_sub_groups.cl | 16 ---------- .../workitem/clc_get_sub_group_local_id.cl | 13 -------- libclc/opencl/lib/generic/CMakeLists.txt | 6 ---- .../opencl/lib/generic/subgroup/subgroup.cl | 30 +++++++++++++++++++ .../workitem/get_enqueued_num_sub_groups.cl | 13 -------- .../workitem/get_max_sub_group_size.cl | 13 -------- .../generic/workitem/get_num_sub_groups.cl | 13 -------- .../lib/generic/workitem/get_sub_group_id.cl | 13 -------- .../workitem/get_sub_group_local_id.cl | 13 -------- .../generic/workitem/get_sub_group_size.cl | 13 -------- 12 files changed, 40 insertions(+), 115 deletions(-) delete mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl delete mode 100644 libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl delete mode 100644 libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl delete mode 100644 libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl delete mode 100644 libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl delete mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_id.cl delete mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl delete mode 100644 libclc/opencl/lib/generic/workitem/get_sub_group_size.cl diff --git a/libclc/clc/lib/amdgpu/CMakeLists.txt b/libclc/clc/lib/amdgpu/CMakeLists.txt index 69af2ebe525ad..a5cd47fab4462 100644 --- a/libclc/clc/lib/amdgpu/CMakeLists.txt +++ b/libclc/clc/lib/amdgpu/CMakeLists.txt @@ -35,7 +35,6 @@ libclc_configure_source_list(CLC_AMDGPU_SOURCES synchronization/clc_sub_group_barrier.cl synchronization/clc_work_group_barrier.cl workitem/clc_get_enqueued_local_size.cl - workitem/clc_get_enqueued_num_sub_groups.cl workitem/clc_get_global_offset.cl workitem/clc_get_global_size.cl workitem/clc_get_group_id.cl @@ -45,7 +44,6 @@ libclc_configure_source_list(CLC_AMDGPU_SOURCES workitem/clc_get_num_groups.cl workitem/clc_get_num_sub_groups.cl workitem/clc_get_sub_group_id.cl - workitem/clc_get_sub_group_local_id.cl workitem/clc_get_sub_group_size.cl workitem/clc_get_work_dim.cl) diff --git a/libclc/clc/lib/amdgpu/subgroup/clc_subgroup.cl b/libclc/clc/lib/amdgpu/subgroup/clc_subgroup.cl index eda7ca2aff394..71f4abc42e895 100644 --- a/libclc/clc/lib/amdgpu/subgroup/clc_subgroup.cl +++ b/libclc/clc/lib/amdgpu/subgroup/clc_subgroup.cl @@ -9,6 +9,16 @@ #include "clc/amdgpu/amdgpu_utils.h" #include "clc/subgroup/clc_subgroup.h" +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void) { + return (__clc_amdgpu_enqueued_workgroup_size() + + __builtin_amdgcn_wavefrontsize() - 1) >> + __clc_amdgpu_wavesize_log2(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_local_id(void) { + return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); +} + _CLC_DEF _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_all(int x) { return __builtin_amdgcn_ballot_w64(x) == __builtin_amdgcn_read_exec(); } diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl b/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl deleted file mode 100644 index bb702da96f0a1..0000000000000 --- a/libclc/clc/lib/amdgpu/workitem/clc_get_enqueued_num_sub_groups.cl +++ /dev/null @@ -1,16 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/amdgpu/amdgpu_utils.h" -#include "clc/workitem/clc_get_sub_group_local_id.h" - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void) { - return (__clc_amdgpu_enqueued_workgroup_size() + - __builtin_amdgcn_wavefrontsize() - 1) >> - __clc_amdgpu_wavesize_log2(); -} diff --git a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl b/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl deleted file mode 100644 index 2493cca0c365c..0000000000000 --- a/libclc/clc/lib/amdgpu/workitem/clc_get_sub_group_local_id.cl +++ /dev/null @@ -1,13 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/workitem/clc_get_sub_group_local_id.h" - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_local_id(void) { - return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); -} diff --git a/libclc/opencl/lib/generic/CMakeLists.txt b/libclc/opencl/lib/generic/CMakeLists.txt index 1d0d7ddd705e8..4ad60248139ae 100644 --- a/libclc/opencl/lib/generic/CMakeLists.txt +++ b/libclc/opencl/lib/generic/CMakeLists.txt @@ -215,7 +215,6 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES synchronization/sub_group_barrier.cl synchronization/work_group_barrier.cl workitem/get_enqueued_local_size.cl - workitem/get_enqueued_num_sub_groups.cl workitem/get_global_id.cl workitem/get_global_linear_id.cl workitem/get_global_offset.cl @@ -224,12 +223,7 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES workitem/get_local_id.cl workitem/get_local_linear_id.cl workitem/get_local_size.cl - workitem/get_max_sub_group_size.cl workitem/get_num_groups.cl - workitem/get_num_sub_groups.cl - workitem/get_sub_group_id.cl - workitem/get_sub_group_local_id.cl - workitem/get_sub_group_size.cl workitem/get_work_dim.cl ) diff --git a/libclc/opencl/lib/generic/subgroup/subgroup.cl b/libclc/opencl/lib/generic/subgroup/subgroup.cl index dfe9867fd0801..4fd1f04ca7189 100644 --- a/libclc/opencl/lib/generic/subgroup/subgroup.cl +++ b/libclc/opencl/lib/generic/subgroup/subgroup.cl @@ -7,6 +7,36 @@ //===----------------------------------------------------------------------===// #include "clc/subgroup/clc_subgroup.h" +#include "clc/workitem/clc_get_enqueued_num_sub_groups.h" +#include "clc/workitem/clc_get_max_sub_group_size.h" +#include "clc/workitem/clc_get_num_sub_groups.h" +#include "clc/workitem/clc_get_sub_group_id.h" +#include "clc/workitem/clc_get_sub_group_local_id.h" +#include "clc/workitem/clc_get_sub_group_size.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) { + return __clc_get_sub_group_size(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) { + return __clc_get_max_sub_group_size(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) { + return __clc_get_num_sub_groups(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) { + return __clc_get_enqueued_num_sub_groups(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) { + return __clc_get_sub_group_id(); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) { + return __clc_get_sub_group_local_id(); +} _CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_all(int x) { return __clc_sub_group_all(x); diff --git a/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl b/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl deleted file mode 100644 index fee3a588c2bbf..0000000000000 --- a/libclc/opencl/lib/generic/workitem/get_enqueued_num_sub_groups.cl +++ /dev/null @@ -1,13 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/workitem/clc_get_enqueued_num_sub_groups.h" - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) { - return __clc_get_enqueued_num_sub_groups(); -} diff --git a/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl b/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl deleted file mode 100644 index bbd19a88a0165..0000000000000 --- a/libclc/opencl/lib/generic/workitem/get_max_sub_group_size.cl +++ /dev/null @@ -1,13 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/workitem/clc_get_max_sub_group_size.h" - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) { - return __clc_get_max_sub_group_size(); -} diff --git a/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl b/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl deleted file mode 100644 index 77163234fe54d..0000000000000 --- a/libclc/opencl/lib/generic/workitem/get_num_sub_groups.cl +++ /dev/null @@ -1,13 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/workitem/clc_get_num_sub_groups.h" - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) { - return __clc_get_num_sub_groups(); -} diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl b/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl deleted file mode 100644 index a1ad6adb4e2cb..0000000000000 --- a/libclc/opencl/lib/generic/workitem/get_sub_group_id.cl +++ /dev/null @@ -1,13 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/workitem/clc_get_sub_group_id.h" - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) { - return __clc_get_sub_group_id(); -} diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl b/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl deleted file mode 100644 index 33164282165b3..0000000000000 --- a/libclc/opencl/lib/generic/workitem/get_sub_group_local_id.cl +++ /dev/null @@ -1,13 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/workitem/clc_get_sub_group_local_id.h" - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) { - return __clc_get_sub_group_local_id(); -} diff --git a/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl b/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl deleted file mode 100644 index 62f3382b6d7df..0000000000000 --- a/libclc/opencl/lib/generic/workitem/get_sub_group_size.cl +++ /dev/null @@ -1,13 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/workitem/clc_get_sub_group_size.h" - -_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) { - return __clc_get_sub_group_size(); -} >From 8377c14d0bd1a6a145f5a845555bbbeae2c2d917 Mon Sep 17 00:00:00 2001 From: Wenju He <[email protected]> Date: Mon, 30 Mar 2026 11:43:02 +0200 Subject: [PATCH 5/5] cleanup include clc_subgroup.h --- libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl | 1 - libclc/clc/lib/generic/collective/clc_work_group_broadcast.cl | 1 - libclc/clc/lib/generic/collective/clc_work_group_scan.cl | 1 - 3 files changed, 3 deletions(-) diff --git a/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl b/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl index 573866dee1fa0..ff1bd2855768c 100644 --- a/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl +++ b/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl @@ -13,7 +13,6 @@ #include "clc/shared/clc_min.h" #include "clc/subgroup/clc_sub_group_broadcast.h" #include "clc/subgroup/clc_sub_group_scan.h" -#include "clc/subgroup/clc_subgroup.h" #include "clc/workitem/clc_get_sub_group_local_id.h" #define QUAD_PERM (1 << 15) diff --git a/libclc/clc/lib/generic/collective/clc_work_group_broadcast.cl b/libclc/clc/lib/generic/collective/clc_work_group_broadcast.cl index cdecc39725647..29ef2a4543690 100644 --- a/libclc/clc/lib/generic/collective/clc_work_group_broadcast.cl +++ b/libclc/clc/lib/generic/collective/clc_work_group_broadcast.cl @@ -10,7 +10,6 @@ #include "clc/atomic/clc_atomic_store.h" #include "clc/collective/clc_work_group_broadcast.h" #include "clc/subgroup/clc_sub_group_broadcast.h" -#include "clc/subgroup/clc_subgroup.h" #include "clc/synchronization/clc_work_group_barrier.h" #include "clc/workitem/clc_get_local_id.h" #include "clc/workitem/clc_get_num_sub_groups.h" diff --git a/libclc/clc/lib/generic/collective/clc_work_group_scan.cl b/libclc/clc/lib/generic/collective/clc_work_group_scan.cl index a4d377c0be964..05808b663aad6 100644 --- a/libclc/clc/lib/generic/collective/clc_work_group_scan.cl +++ b/libclc/clc/lib/generic/collective/clc_work_group_scan.cl @@ -17,7 +17,6 @@ #include "clc/shared/clc_min.h" #include "clc/subgroup/clc_sub_group_scan.h" -#include "clc/subgroup/clc_subgroup.h" #include "clc/synchronization/clc_work_group_barrier.h" #include "clc/workitem/clc_get_num_sub_groups.h" #include "clc/workitem/clc_get_sub_group_id.h" _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
