Author: Matt Arsenault Date: 2026-03-27T09:53:35+01:00 New Revision: 56e1510d21f0782af95b32ee41fe799fa63b93d1
URL: https://github.com/llvm/llvm-project/commit/56e1510d21f0782af95b32ee41fe799fa63b93d1 DIFF: https://github.com/llvm/llvm-project/commit/56e1510d21f0782af95b32ee41fe799fa63b93d1.diff LOG: libclc: Add work group scan functions (#188829) Added: libclc/clc/include/clc/clc_target_defines.h libclc/clc/include/clc/collective/clc_work_group_scan.h libclc/clc/include/clc/collective/clc_work_group_scan_decl.inc libclc/clc/lib/generic/collective/clc_work_group_scan.cl libclc/clc/lib/generic/collective/clc_work_group_scan.inc libclc/opencl/lib/generic/collective/work_group_scan.cl libclc/opencl/lib/generic/collective/work_group_scan.inc Modified: libclc/clc/lib/generic/CMakeLists.txt libclc/opencl/lib/generic/CMakeLists.txt Removed: ################################################################################ diff --git a/libclc/clc/include/clc/clc_target_defines.h b/libclc/clc/include/clc/clc_target_defines.h new file mode 100644 index 0000000000000..2a6aa75409432 --- /dev/null +++ b/libclc/clc/include/clc/clc_target_defines.h @@ -0,0 +1,23 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_CLC_TARGET_DEFINES_H__ +#define __CLC_CLC_TARGET_DEFINES_H__ + +#if defined(__AMDGPU__) || defined(__NVPTX__) +#define __CLC_MAX_WORK_GROUP_SIZE 1024 +#define __CLC_MIN_NATIVE_SUB_GROUP_SIZE 32 +#else +#define __CLC_MAX_WORK_GROUP_SIZE 4096 +#define __CLC_MIN_NATIVE_SUB_GROUP_SIZE 1 +#endif + +#define __CLC_MAX_NUM_WORK_GROUPS \ + (__CLC_MAX_WORK_GROUP_SIZE / __CLC_MIN_NATIVE_SUB_GROUP_SIZE) + +#endif // __CLC_CLC_TARGET_DEFINES_H__ diff --git a/libclc/clc/include/clc/collective/clc_work_group_scan.h b/libclc/clc/include/clc/collective/clc_work_group_scan.h new file mode 100644 index 0000000000000..e95c3e81e550c --- /dev/null +++ b/libclc/clc/include/clc/collective/clc_work_group_scan.h @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_COLLECTIVE_CLC_WORK_GROUP_SCAN_H__ +#define __CLC_COLLECTIVE_CLC_WORK_GROUP_SCAN_H__ + +#include "clc/internal/clc.h" + +#define __CLC_BODY "clc/collective/clc_work_group_scan_decl.inc" +#include "clc/integer/gentype.inc" + +#define __CLC_BODY "clc/collective/clc_work_group_scan_decl.inc" +#include "clc/math/gentype.inc" + +#endif // __CLC_COLLECTIVE_CLC_WORK_GROUP_SCAN_H__ diff --git a/libclc/clc/include/clc/collective/clc_work_group_scan_decl.inc b/libclc/clc/include/clc/collective/clc_work_group_scan_decl.inc new file mode 100644 index 0000000000000..a7a73bc9f8b95 --- /dev/null +++ b/libclc/clc/include/clc/collective/clc_work_group_scan_decl.inc @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if defined(__CLC_SCALAR) && \ + (defined(__CLC_FPSIZE) || __CLC_GENSIZE == 32 || __CLC_GENSIZE == 64) +_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE +__clc_work_group_scan_inclusive_add(__CLC_GENTYPE x); +_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE +__clc_work_group_scan_inclusive_min(__CLC_GENTYPE x); +_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE +__clc_work_group_scan_inclusive_max(__CLC_GENTYPE x); + +_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE +__clc_work_group_scan_exclusive_add(__CLC_GENTYPE x); +_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE +__clc_work_group_scan_exclusive_min(__CLC_GENTYPE x); +_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE +__clc_work_group_scan_exclusive_max(__CLC_GENTYPE x); + +#endif diff --git a/libclc/clc/lib/generic/CMakeLists.txt b/libclc/clc/lib/generic/CMakeLists.txt index 03f4aa3e55a0a..168a0f1ff1e84 100644 --- a/libclc/clc/lib/generic/CMakeLists.txt +++ b/libclc/clc/lib/generic/CMakeLists.txt @@ -18,6 +18,7 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES atomic/clc_atomic_store.cl collective/clc_work_group_any_all.cl collective/clc_work_group_broadcast.cl + collective/clc_work_group_scan.cl collective/clc_work_group_reduce.cl common/clc_degrees.cl common/clc_radians.cl diff --git a/libclc/clc/lib/generic/collective/clc_work_group_scan.cl b/libclc/clc/lib/generic/collective/clc_work_group_scan.cl new file mode 100644 index 0000000000000..ae333cd9b8cdf --- /dev/null +++ b/libclc/clc/lib/generic/collective/clc_work_group_scan.cl @@ -0,0 +1,38 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clc/collective/clc_work_group_scan.h" + +#include "clc/clc_target_defines.h" + +#include "clc/atomic/clc_atomic_load.h" +#include "clc/atomic/clc_atomic_store.h" +#include "clc/math/clc_fmax.h" +#include "clc/math/clc_fmin.h" +#include "clc/shared/clc_max.h" +#include "clc/shared/clc_min.h" + +#include "clc/subgroup/clc_sub_group_scan.h" +#include "clc/subgroup/clc_subgroup.h" +#include "clc/synchronization/clc_work_group_barrier.h" +#include "clc/workitem/clc_get_num_sub_groups.h" +#include "clc/workitem/clc_get_sub_group_id.h" +#include "clc/workitem/clc_get_sub_group_local_id.h" + +#pragma OPENCL EXTENSION __cl_clang_function_scope_local_variables : enable + +enum __CLC_WORK_GROUP_SCAN_OP { + __CLC_WORK_GROUP_SCAN_ADD, + __CLC_WORK_GROUP_SCAN_MIN, + __CLC_WORK_GROUP_SCAN_MAX +}; + +#define __CLC_BODY "clc_work_group_scan.inc" +#include "clc/integer/gentype.inc" + +#define __CLC_BODY "clc_work_group_scan.inc" +#include "clc/math/gentype.inc" diff --git a/libclc/clc/lib/generic/collective/clc_work_group_scan.inc b/libclc/clc/lib/generic/collective/clc_work_group_scan.inc new file mode 100644 index 0000000000000..d3e27aee9c886 --- /dev/null +++ b/libclc/clc/lib/generic/collective/clc_work_group_scan.inc @@ -0,0 +1,155 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if defined(__CLC_SCALAR) && \ + (defined(__CLC_FPSIZE) || __CLC_GENSIZE == 32 || __CLC_GENSIZE == 64) + +static _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE sub_group_scan_op( + __CLC_GENTYPE x, enum __CLC_WORK_GROUP_SCAN_OP opcode, bool inclusive) { + switch (opcode) { + case __CLC_WORK_GROUP_SCAN_ADD: + return inclusive ? __clc_sub_group_scan_inclusive_add(x) + : __clc_sub_group_scan_exclusive_add(x); + case __CLC_WORK_GROUP_SCAN_MIN: + return inclusive ? __clc_sub_group_scan_inclusive_min(x) + : __clc_sub_group_scan_exclusive_min(x); + case __CLC_WORK_GROUP_SCAN_MAX: + return inclusive ? __clc_sub_group_scan_inclusive_max(x) + : __clc_sub_group_scan_exclusive_max(x); + } +} + +static _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE scan_op( + __CLC_GENTYPE x, __CLC_GENTYPE y, enum __CLC_WORK_GROUP_SCAN_OP opcode) { + switch (opcode) { + case __CLC_WORK_GROUP_SCAN_ADD: + return x + y; + case __CLC_WORK_GROUP_SCAN_MIN: + return __clc_min(x, y); + case __CLC_WORK_GROUP_SCAN_MAX: + return __clc_max(x, y); + } +} + +#ifdef __CLC_FPSIZE +static _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE work_group_scan_identity_value( + __CLC_GENTYPE x, enum __CLC_WORK_GROUP_SCAN_OP opcode) { + (void)x; + switch (opcode) { + case __CLC_WORK_GROUP_SCAN_ADD: + return __CLC_FP_LIT(0.0); + case __CLC_WORK_GROUP_SCAN_MIN: + return (__CLC_GENTYPE)INFINITY; + case __CLC_WORK_GROUP_SCAN_MAX: + return (__CLC_GENTYPE)-INFINITY; + } +} +#else +static _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE work_group_scan_identity_value( + __CLC_GENTYPE x, enum __CLC_WORK_GROUP_SCAN_OP opcode) { + (void)x; + switch (opcode) { + case __CLC_WORK_GROUP_SCAN_ADD: + return (__CLC_GENTYPE)0; + case __CLC_WORK_GROUP_SCAN_MIN: +#ifdef __CLC_GEN_S + return (__CLC_GENTYPE)LONG_MAX; +#else + return (__CLC_GENTYPE)ULONG_MAX; +#endif + case __CLC_WORK_GROUP_SCAN_MAX: +#ifdef __CLC_GEN_S + return (__CLC_GENTYPE)LONG_MIN; +#else + return (__CLC_GENTYPE)0; +#endif + } +} +#endif + +static _CLC_OVERLOAD __CLC_GENTYPE __clc_work_group_scan_impl( + __CLC_GENTYPE a, enum __CLC_WORK_GROUP_SCAN_OP opcode, bool inclusive) { + uint n = __clc_get_num_sub_groups(); + __CLC_GENTYPE t = sub_group_scan_op(a, opcode, inclusive); + if (n == 1) + return t; + + __local __CLC_GENTYPE scratch[__CLC_MAX_NUM_WORK_GROUPS]; + uint l = __clc_get_sub_group_local_id(); + uint i = __clc_get_sub_group_id(); + + if (l == __clc_get_sub_group_size() - 1u) { + __CLC_GENTYPE store_val = inclusive ? t : scan_op(a, t, opcode); + __scoped_atomic_store_n(&scratch[i], store_val, __ATOMIC_RELAXED, + __MEMORY_SCOPE_WRKGRP); + } + + __clc_work_group_barrier(__MEMORY_SCOPE_WRKGRP, __CLC_MEMORY_LOCAL); + + if (i == 0) { + __CLC_GENTYPE s = + l < n ? __scoped_atomic_load_n(&scratch[l], __ATOMIC_RELAXED, + __MEMORY_SCOPE_WRKGRP) + : work_group_scan_identity_value(t, opcode); + s = sub_group_scan_op(s, opcode, /*inclusive=*/true); + if (l < n) { + __scoped_atomic_store_n(&scratch[l], s, __ATOMIC_RELAXED, + __MEMORY_SCOPE_WRKGRP); + } + } + + __clc_work_group_barrier(__MEMORY_SCOPE_WRKGRP, __CLC_MEMORY_LOCAL); + + __CLC_GENTYPE ret = t; + if (i != 0) { + __CLC_GENTYPE load_scratch = __scoped_atomic_load_n( + &scratch[i - 1], __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); + ret = scan_op(t, load_scratch, opcode); + } + + __clc_work_group_barrier(__MEMORY_SCOPE_WRKGRP, __CLC_MEMORY_LOCAL); + return ret; +} + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE +__clc_work_group_scan_inclusive_add(__CLC_GENTYPE a) { + return __clc_work_group_scan_impl(a, __CLC_WORK_GROUP_SCAN_ADD, + /*inclusive=*/true); +} + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE +__clc_work_group_scan_inclusive_min(__CLC_GENTYPE a) { + return __clc_work_group_scan_impl(a, __CLC_WORK_GROUP_SCAN_MIN, + /*inclusive=*/true); +} + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE +__clc_work_group_scan_inclusive_max(__CLC_GENTYPE a) { + return __clc_work_group_scan_impl(a, __CLC_WORK_GROUP_SCAN_MAX, + /*inclusive=*/true); +} + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE +__clc_work_group_scan_exclusive_add(__CLC_GENTYPE a) { + return __clc_work_group_scan_impl(a, __CLC_WORK_GROUP_SCAN_ADD, + /*inclusive=*/false); +} + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE +__clc_work_group_scan_exclusive_min(__CLC_GENTYPE a) { + return __clc_work_group_scan_impl(a, __CLC_WORK_GROUP_SCAN_MIN, + /*inclusive=*/false); +} + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE +__clc_work_group_scan_exclusive_max(__CLC_GENTYPE a) { + return __clc_work_group_scan_impl(a, __CLC_WORK_GROUP_SCAN_MAX, + /*inclusive=*/false); +} + +#endif diff --git a/libclc/opencl/lib/generic/CMakeLists.txt b/libclc/opencl/lib/generic/CMakeLists.txt index 1b8beb57e34de..e6565c7f9ed67 100644 --- a/libclc/opencl/lib/generic/CMakeLists.txt +++ b/libclc/opencl/lib/generic/CMakeLists.txt @@ -46,6 +46,7 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES collective/work_group_any_all.cl collective/work_group_broadcast.cl collective/work_group_reduce.cl + collective/work_group_scan.cl common/degrees.cl common/mix.cl common/radians.cl diff --git a/libclc/opencl/lib/generic/collective/work_group_scan.cl b/libclc/opencl/lib/generic/collective/work_group_scan.cl new file mode 100644 index 0000000000000..7794fdd30a676 --- /dev/null +++ b/libclc/opencl/lib/generic/collective/work_group_scan.cl @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/collective/clc_work_group_scan.h" + +#define __CLC_BODY "work_group_scan.inc" +#include "clc/integer/gentype.inc" + +#define __CLC_BODY "work_group_scan.inc" +#include "clc/math/gentype.inc" diff --git a/libclc/opencl/lib/generic/collective/work_group_scan.inc b/libclc/opencl/lib/generic/collective/work_group_scan.inc new file mode 100644 index 0000000000000..61035bbef1d2a --- /dev/null +++ b/libclc/opencl/lib/generic/collective/work_group_scan.inc @@ -0,0 +1,41 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if defined(__CLC_SCALAR) && (defined(__CLC_FPSIZE) || __CLC_GENSIZE >= 32) + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE +work_group_scan_inclusive_add(__CLC_GENTYPE a) { + return __clc_work_group_scan_inclusive_add(a); +} + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE +work_group_scan_inclusive_min(__CLC_GENTYPE a) { + return __clc_work_group_scan_inclusive_min(a); +} + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE +work_group_scan_inclusive_max(__CLC_GENTYPE a) { + return __clc_work_group_scan_inclusive_max(a); +} + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE +work_group_scan_exclusive_add(__CLC_GENTYPE a) { + return __clc_work_group_scan_exclusive_add(a); +} + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE +work_group_scan_exclusive_min(__CLC_GENTYPE a) { + return __clc_work_group_scan_exclusive_min(a); +} + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE +work_group_scan_exclusive_max(__CLC_GENTYPE a) { + return __clc_work_group_scan_exclusive_max(a); +} + +#endif _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
