https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/185171
>From e070a7ffdffcdfeb58cfb74243c91a2e00db0847 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Fri, 6 Mar 2026 17:55:54 +0100 Subject: [PATCH 1/6] libclc: Avoid duplicated get_local_size/get_global_size functions Move opencl handling on top of clc into opencl generic, delete amdgpu implementations in opencl. --- libclc/opencl/lib/amdgcn-amdhsa/SOURCES | 2 -- .../amdgcn-amdhsa/workitem/get_global_size.cl | 20 -------------- .../amdgcn-amdhsa/workitem/get_local_size.cl | 26 ------------------- libclc/opencl/lib/amdgcn/SOURCES | 2 -- .../lib/amdgcn/workitem/get_global_size.cl | 13 ---------- .../lib/amdgcn/workitem/get_local_size.cl | 22 ---------------- libclc/opencl/lib/generic/SOURCES | 1 + .../workitem/get_local_size.cl | 0 libclc/opencl/lib/ptx-nvidiacl/SOURCES | 1 - 9 files changed, 1 insertion(+), 86 deletions(-) delete mode 100644 libclc/opencl/lib/amdgcn-amdhsa/SOURCES delete mode 100644 libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl delete mode 100644 libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl delete mode 100644 libclc/opencl/lib/amdgcn/workitem/get_global_size.cl delete mode 100644 libclc/opencl/lib/amdgcn/workitem/get_local_size.cl rename libclc/opencl/lib/{ptx-nvidiacl => generic}/workitem/get_local_size.cl (100%) diff --git a/libclc/opencl/lib/amdgcn-amdhsa/SOURCES b/libclc/opencl/lib/amdgcn-amdhsa/SOURCES deleted file mode 100644 index ee3a48ce2c474..0000000000000 --- a/libclc/opencl/lib/amdgcn-amdhsa/SOURCES +++ /dev/null @@ -1,2 +0,0 @@ -workitem/get_global_size.cl -workitem/get_local_size.cl diff --git a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl b/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl deleted file mode 100644 index f21a060849dbe..0000000000000 --- a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_global_size.cl +++ /dev/null @@ -1,20 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include <amdhsa_abi.h> -#include <clc/opencl/opencl-base.h> - -_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) { - if (dim > 2) - return 1; - __constant amdhsa_implicit_kernarg_v5 *args = - (__constant amdhsa_implicit_kernarg_v5 *) - __builtin_amdgcn_implicitarg_ptr(); - return args->block_count[dim] * (uint)args->group_size[dim] + - (uint)args->remainder[dim]; -} diff --git a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl b/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl deleted file mode 100644 index ed1e17776361e..0000000000000 --- a/libclc/opencl/lib/amdgcn-amdhsa/workitem/get_local_size.cl +++ /dev/null @@ -1,26 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include <amdhsa_abi.h> -#include <clc/opencl/opencl-base.h> - -_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) { - if (dim > 2) - return 1; - - __constant amdhsa_implicit_kernarg_v5 *args = - (__constant amdhsa_implicit_kernarg_v5 *) - __builtin_amdgcn_implicitarg_ptr(); - - uint group_ids[3] = {__builtin_amdgcn_workgroup_id_x(), - __builtin_amdgcn_workgroup_id_y(), - __builtin_amdgcn_workgroup_id_z()}; - - return group_ids[dim] < args->block_count[dim] ? (size_t)args->group_size[dim] - : (size_t)args->remainder[dim]; -} diff --git a/libclc/opencl/lib/amdgcn/SOURCES b/libclc/opencl/lib/amdgcn/SOURCES index ac72d8a00c9d0..e52f54789bfab 100644 --- a/libclc/opencl/lib/amdgcn/SOURCES +++ b/libclc/opencl/lib/amdgcn/SOURCES @@ -3,7 +3,5 @@ subgroup/subgroup.cl synchronization/sub_group_barrier.cl workitem/get_global_offset.cl workitem/get_group_id.cl -workitem/get_global_size.cl workitem/get_local_id.cl -workitem/get_local_size.cl workitem/get_work_dim.cl diff --git a/libclc/opencl/lib/amdgcn/workitem/get_global_size.cl b/libclc/opencl/lib/amdgcn/workitem/get_global_size.cl deleted file mode 100644 index eca7199a766fc..0000000000000 --- a/libclc/opencl/lib/amdgcn/workitem/get_global_size.cl +++ /dev/null @@ -1,13 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include <clc/workitem/clc_get_global_size.h> - -_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) { - return __clc_get_global_size(dim); -} diff --git a/libclc/opencl/lib/amdgcn/workitem/get_local_size.cl b/libclc/opencl/lib/amdgcn/workitem/get_local_size.cl deleted file mode 100644 index 34e4f2f1b4c19..0000000000000 --- a/libclc/opencl/lib/amdgcn/workitem/get_local_size.cl +++ /dev/null @@ -1,22 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include <clc/opencl/opencl-base.h> - -_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) { - switch (dim) { - case 0: - return __builtin_amdgcn_workgroup_size_x(); - case 1: - return __builtin_amdgcn_workgroup_size_y(); - case 2: - return __builtin_amdgcn_workgroup_size_z(); - default: - return 1; - } -} diff --git a/libclc/opencl/lib/generic/SOURCES b/libclc/opencl/lib/generic/SOURCES index cdc1d8321dfeb..18dd6fd3a10c2 100644 --- a/libclc/opencl/lib/generic/SOURCES +++ b/libclc/opencl/lib/generic/SOURCES @@ -207,4 +207,5 @@ workitem/get_global_id.cl workitem/get_global_linear_id.cl workitem/get_global_size.cl workitem/get_local_linear_id.cl +workitem/get_local_size.cl workitem/get_num_groups.cl diff --git a/libclc/opencl/lib/ptx-nvidiacl/workitem/get_local_size.cl b/libclc/opencl/lib/generic/workitem/get_local_size.cl similarity index 100% rename from libclc/opencl/lib/ptx-nvidiacl/workitem/get_local_size.cl rename to libclc/opencl/lib/generic/workitem/get_local_size.cl diff --git a/libclc/opencl/lib/ptx-nvidiacl/SOURCES b/libclc/opencl/lib/ptx-nvidiacl/SOURCES index b8e8f64b5802a..3ece564c9760e 100644 --- a/libclc/opencl/lib/ptx-nvidiacl/SOURCES +++ b/libclc/opencl/lib/ptx-nvidiacl/SOURCES @@ -3,7 +3,6 @@ workitem/get_global_id.cl workitem/get_group_id.cl workitem/get_local_id.cl workitem/get_local_linear_id.cl -workitem/get_local_size.cl workitem/get_max_sub_group_size.cl workitem/get_num_sub_groups.cl workitem/get_sub_group_id.cl >From dd9f0bf312876bd9c3b766e75c1b044fd55e0efa Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Sat, 7 Mar 2026 10:53:01 +0100 Subject: [PATCH 2/6] Use v5 implementation --- .../lib/amdgcn/workitem/clc_get_global_size.cl | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/libclc/clc/lib/amdgcn/workitem/clc_get_global_size.cl b/libclc/clc/lib/amdgcn/workitem/clc_get_global_size.cl index b1d8f27dc68c8..1886ab6d1a1a0 100644 --- a/libclc/clc/lib/amdgcn/workitem/clc_get_global_size.cl +++ b/libclc/clc/lib/amdgcn/workitem/clc_get_global_size.cl @@ -6,17 +6,15 @@ // //===----------------------------------------------------------------------===// -#include <clc/workitem/clc_get_global_size.h> +#include "clc/workitem/clc_get_global_size.h" +#include <amdhsa_abi.h> _CLC_DEF _CLC_OVERLOAD size_t __clc_get_global_size(uint dim) { - switch (dim) { - case 0: - return __builtin_amdgcn_grid_size_x(); - case 1: - return __builtin_amdgcn_grid_size_y(); - case 2: - return __builtin_amdgcn_grid_size_z(); - default: + if (dim > 2) return 1; - } + __constant amdhsa_implicit_kernarg_v5 *args = + (__constant amdhsa_implicit_kernarg_v5 *) + __builtin_amdgcn_implicitarg_ptr(); + return args->block_count[dim] * (uint)args->group_size[dim] + + (uint)args->remainder[dim]; } >From 62fe685987a904b94f282d55cc501ec7fe2f2d8e Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Sat, 7 Mar 2026 10:56:51 +0100 Subject: [PATCH 3/6] Use __clc_get_global_size --- libclc/opencl/lib/generic/workitem/get_global_size.cl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libclc/opencl/lib/generic/workitem/get_global_size.cl b/libclc/opencl/lib/generic/workitem/get_global_size.cl index 34d00f8fa809b..2fe343ca48c41 100644 --- a/libclc/opencl/lib/generic/workitem/get_global_size.cl +++ b/libclc/opencl/lib/generic/workitem/get_global_size.cl @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include <clc/opencl/opencl-base.h> +#include "clc/workitem/clc_get_global_size.h" _CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) { - return get_num_groups(dim) * get_local_size(dim); + return __clc_get_global_size(dim); } >From 7a34a3a943c5064165b96004b5d410936da74f15 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Sat, 7 Mar 2026 11:01:58 +0100 Subject: [PATCH 4/6] Fix ptx build --- libclc/clc/lib/ptx-nvidiacl/SOURCES | 1 + .../ptx-nvidiacl/workitem/clc_get_global_size.cl | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_global_size.cl diff --git a/libclc/clc/lib/ptx-nvidiacl/SOURCES b/libclc/clc/lib/ptx-nvidiacl/SOURCES index cafd90943f22e..9ed25c71a3f35 100644 --- a/libclc/clc/lib/ptx-nvidiacl/SOURCES +++ b/libclc/clc/lib/ptx-nvidiacl/SOURCES @@ -5,6 +5,7 @@ math/clc_sqrt.cl mem_fence/clc_mem_fence.cl synchronization/clc_work_group_barrier.cl workitem/clc_get_global_id.cl +workitem/clc_get_global_size.cl workitem/clc_get_group_id.cl workitem/clc_get_local_id.cl workitem/clc_get_local_size.cl diff --git a/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_global_size.cl b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_global_size.cl new file mode 100644 index 0000000000000..262fd3b1b43a3 --- /dev/null +++ b/libclc/clc/lib/ptx-nvidiacl/workitem/clc_get_global_size.cl @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/workitem/clc_get_global_size.h" +#include "clc/workitem/clc_get_local_size.h" +#include "clc/workitem/clc_get_num_groups.h" + +_CLC_DEF _CLC_OVERLOAD size_t __clc_get_global_size(uint dim) { + return __clc_get_num_groups(dim) * __clc_get_local_size(dim); +} >From faee8e53f24de576b881dcced8f4e4a07726f4ba Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Sat, 7 Mar 2026 11:26:59 +0100 Subject: [PATCH 5/6] Move get_local_size implementation --- .../lib/amdgcn/workitem/clc_get_local_size.cl | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/libclc/clc/lib/amdgcn/workitem/clc_get_local_size.cl b/libclc/clc/lib/amdgcn/workitem/clc_get_local_size.cl index 1e749404168d8..57d801a2d02d4 100644 --- a/libclc/clc/lib/amdgcn/workitem/clc_get_local_size.cl +++ b/libclc/clc/lib/amdgcn/workitem/clc_get_local_size.cl @@ -6,17 +6,21 @@ // //===----------------------------------------------------------------------===// -#include <clc/workitem/clc_get_local_size.h> +#include "clc/workitem/clc_get_local_size.h" +#include <amdhsa_abi.h> _CLC_OVERLOAD _CLC_DEF size_t __clc_get_local_size(uint dim) { - switch (dim) { - case 0: - return __builtin_amdgcn_workgroup_size_x(); - case 1: - return __builtin_amdgcn_workgroup_size_y(); - case 2: - return __builtin_amdgcn_workgroup_size_z(); - default: + if (dim > 2) return 1; - } + + __constant amdhsa_implicit_kernarg_v5 *args = + (__constant amdhsa_implicit_kernarg_v5 *) + __builtin_amdgcn_implicitarg_ptr(); + + uint group_ids[3] = {__builtin_amdgcn_workgroup_id_x(), + __builtin_amdgcn_workgroup_id_y(), + __builtin_amdgcn_workgroup_id_z()}; + + return group_ids[dim] < args->block_count[dim] ? (size_t)args->group_size[dim] + : (size_t)args->remainder[dim]; } >From a2c7b88d3adee8c1403329c13ab3e9718d168285 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Sat, 7 Mar 2026 10:39:39 +0100 Subject: [PATCH 6/6] libclc: Fix amdgpu get_enqueued_local_size This should not be the same as get_local_size --- .../clc/lib/amdgcn/workitem/clc_get_enqueued_local_size.cl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/libclc/clc/lib/amdgcn/workitem/clc_get_enqueued_local_size.cl b/libclc/clc/lib/amdgcn/workitem/clc_get_enqueued_local_size.cl index c7226241694b3..102294e6dbb25 100644 --- a/libclc/clc/lib/amdgcn/workitem/clc_get_enqueued_local_size.cl +++ b/libclc/clc/lib/amdgcn/workitem/clc_get_enqueued_local_size.cl @@ -7,8 +7,11 @@ //===----------------------------------------------------------------------===// #include "clc/workitem/clc_get_enqueued_local_size.h" -#include "clc/workitem/clc_get_local_size.h" +#include <amdhsa_abi.h> _CLC_OVERLOAD _CLC_DEF size_t __clc_get_enqueued_local_size(uint dim) { - return __clc_get_local_size(dim); + __constant amdhsa_implicit_kernarg_v5 *args = + (__constant amdhsa_implicit_kernarg_v5 *) + __builtin_amdgcn_implicitarg_ptr(); + return dim < 3 ? args->group_size[dim] : 1; } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
