https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/185294
>From cb492cc5282c3ffbeff2c4f4c10a1980452aff96 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Sun, 8 Mar 2026 16:07:09 +0100 Subject: [PATCH] libclc: Add sub_group_reduce_* functions --- .../clc/subgroup/clc_sub_group_reduce.h | 20 +++ .../clc/subgroup/clc_sub_group_reduce.inc | 18 +++ libclc/clc/lib/amdgcn/CMakeLists.txt | 1 + .../lib/amdgcn/subgroup/sub_group_reduce.cl | 145 ++++++++++++++++++ libclc/opencl/lib/generic/CMakeLists.txt | 1 + .../lib/generic/subgroup/sub_group_reduce.cl | 15 ++ .../lib/generic/subgroup/sub_group_reduce.inc | 28 ++++ 7 files changed, 228 insertions(+) create mode 100644 libclc/clc/include/clc/subgroup/clc_sub_group_reduce.h create mode 100644 libclc/clc/include/clc/subgroup/clc_sub_group_reduce.inc create mode 100644 libclc/clc/lib/amdgcn/subgroup/sub_group_reduce.cl create mode 100644 libclc/opencl/lib/generic/subgroup/sub_group_reduce.cl create mode 100644 libclc/opencl/lib/generic/subgroup/sub_group_reduce.inc diff --git a/libclc/clc/include/clc/subgroup/clc_sub_group_reduce.h b/libclc/clc/include/clc/subgroup/clc_sub_group_reduce.h new file mode 100644 index 0000000000000..6454b9915dffe --- /dev/null +++ b/libclc/clc/include/clc/subgroup/clc_sub_group_reduce.h @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_SUBGROUP_CLC_SUB_GROUP_REDUCE_H__ +#define __CLC_SUBGROUP_CLC_SUB_GROUP_REDUCE_H__ + +#include "clc/internal/clc.h" + +#define __CLC_BODY <clc/subgroup/clc_sub_group_reduce.inc> +#include <clc/integer/gentype.inc> + +#define __CLC_BODY <clc/subgroup/clc_sub_group_reduce.inc> +#include <clc/math/gentype.inc> + +#endif // __CLC_SUBGROUP_CLC_SUB_GROUP_REDUCE_H__ diff --git a/libclc/clc/include/clc/subgroup/clc_sub_group_reduce.inc b/libclc/clc/include/clc/subgroup/clc_sub_group_reduce.inc new file mode 100644 index 0000000000000..57ceb31c9ee1d --- /dev/null +++ b/libclc/clc/include/clc/subgroup/clc_sub_group_reduce.inc @@ -0,0 +1,18 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if defined(__CLC_SCALAR) +_CLC_OVERLOAD _CLC_DECL _CLC_CONST __CLC_GENTYPE +__clc_sub_group_reduce_add(__CLC_GENTYPE x); + +_CLC_OVERLOAD _CLC_DECL _CLC_CONST __CLC_GENTYPE +__clc_sub_group_reduce_min(__CLC_GENTYPE x); + +_CLC_OVERLOAD _CLC_DECL _CLC_CONST __CLC_GENTYPE +__clc_sub_group_reduce_max(__CLC_GENTYPE x); +#endif diff --git a/libclc/clc/lib/amdgcn/CMakeLists.txt b/libclc/clc/lib/amdgcn/CMakeLists.txt index 12bbba2d6566d..0ab4ab691b2ec 100644 --- a/libclc/clc/lib/amdgcn/CMakeLists.txt +++ b/libclc/clc/lib/amdgcn/CMakeLists.txt @@ -5,6 +5,7 @@ libclc_configure_source_list(CLC_AMDGCN_SOURCES mem_fence/clc_mem_fence.cl subgroup/subgroup.cl subgroup/sub_group_broadcast.cl + subgroup/sub_group_reduce.cl synchronization/clc_sub_group_barrier.cl synchronization/clc_work_group_barrier.cl workitem/clc_get_enqueued_local_size.cl diff --git a/libclc/clc/lib/amdgcn/subgroup/sub_group_reduce.cl b/libclc/clc/lib/amdgcn/subgroup/sub_group_reduce.cl new file mode 100644 index 0000000000000..7ab49f0fa3b3e --- /dev/null +++ b/libclc/clc/lib/amdgcn/subgroup/sub_group_reduce.cl @@ -0,0 +1,145 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/subgroup/clc_sub_group_broadcast.h" + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_sub_group_reduce_add(uint x) { + return __builtin_amdgcn_wave_reduce_add_u32(x, 0); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_reduce_add(int x) { + return (int)__clc_sub_group_reduce_add((uint)x); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST ulong __clc_sub_group_reduce_add(ulong x) { + return __builtin_amdgcn_wave_reduce_add_u64(x, 0); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST long __clc_sub_group_reduce_add(long x) { + return (long)__clc_sub_group_reduce_add((ulong)x); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_sub_group_reduce_min(uint x) { + return __builtin_amdgcn_wave_reduce_min_u32(x, 0); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_reduce_min(int x) { + return __builtin_amdgcn_wave_reduce_min_i32(x, 0); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST ulong __clc_sub_group_reduce_min(ulong x) { + return __builtin_amdgcn_wave_reduce_min_u64(x, 0); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST long __clc_sub_group_reduce_min(long x) { + return __builtin_amdgcn_wave_reduce_min_i64(x, 0); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_sub_group_reduce_max(uint x) { + return __builtin_amdgcn_wave_reduce_max_u32(x, 0); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_reduce_max(int x) { + return __builtin_amdgcn_wave_reduce_max_i32(x, 0); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST ulong __clc_sub_group_reduce_max(ulong x) { + return __builtin_amdgcn_wave_reduce_max_u32(x, 0); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST long __clc_sub_group_reduce_max(long x) { + return __builtin_amdgcn_wave_reduce_max_i64(x, 0); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST float __clc_sub_group_reduce_add(float x) { + return __builtin_amdgcn_wave_reduce_fadd_f32(x, 0); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST double __clc_sub_group_reduce_add(double x) { + return __builtin_amdgcn_wave_reduce_fadd_f64(x, 0); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST float __clc_sub_group_reduce_min(float x) { + return __builtin_amdgcn_wave_reduce_fmin_f32(x, 0); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST double __clc_sub_group_reduce_min(double x) { + return __builtin_amdgcn_wave_reduce_fmin_f64(x, 0); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST float __clc_sub_group_reduce_max(float x) { + return __builtin_amdgcn_wave_reduce_fmax_f32(x, 0); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST double __clc_sub_group_reduce_max(double x) { + return __builtin_amdgcn_wave_reduce_fmax_f64(x, 0); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST half __clc_sub_group_reduce_add(half x) { + // FIXME: There should be a direct half builtin available. + return (float)__clc_sub_group_reduce_add((float)x); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST half __clc_sub_group_reduce_min(half x) { + // FIXME: There should be a direct half builtin available. + return (float)__clc_sub_group_reduce_min((float)x); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST half __clc_sub_group_reduce_max(half x) { + // FIXME: There should be a direct half builtin available. + return (float)__clc_sub_group_reduce_max((float)x); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uchar __clc_sub_group_reduce_add(uchar x) { + return (uchar)__clc_sub_group_reduce_add((uint)x); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST char __clc_sub_group_reduce_add(char x) { + return (char)__clc_sub_group_reduce_add((int)x); +} + +// FIXME: There should be a direct short builtin available. +_CLC_DEF _CLC_OVERLOAD _CLC_CONST ushort __clc_sub_group_reduce_add(ushort x) { + return (ushort)__clc_sub_group_reduce_add((uint)x); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST short __clc_sub_group_reduce_add(short x) { + return (int)__clc_sub_group_reduce_add((int)x); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uchar __clc_sub_group_reduce_min(uchar x) { + return (uchar)__clc_sub_group_reduce_min((uint)x); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST char __clc_sub_group_reduce_min(char x) { + return (char)__clc_sub_group_reduce_min((int)x); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST ushort __clc_sub_group_reduce_min(ushort x) { + return (ushort)__clc_sub_group_reduce_min((uint)x); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST short __clc_sub_group_reduce_min(short x) { + return (int)__clc_sub_group_reduce_min((int)x); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST uchar __clc_sub_group_reduce_max(uchar x) { + return (uchar)__clc_sub_group_reduce_max((uint)x); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST char __clc_sub_group_reduce_max(char x) { + return (char)__clc_sub_group_reduce_max((int)x); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST ushort __clc_sub_group_reduce_max(ushort x) { + return (ushort)__clc_sub_group_reduce_max((uint)x); +} + +_CLC_DEF _CLC_OVERLOAD _CLC_CONST short __clc_sub_group_reduce_max(short x) { + return (int)__clc_sub_group_reduce_max((int)x); +} diff --git a/libclc/opencl/lib/generic/CMakeLists.txt b/libclc/opencl/lib/generic/CMakeLists.txt index ea36c741f3fee..d380b8b6becfa 100644 --- a/libclc/opencl/lib/generic/CMakeLists.txt +++ b/libclc/opencl/lib/generic/CMakeLists.txt @@ -205,6 +205,7 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES shared/vload.cl shared/vstore.cl subgroup/sub_group_broadcast.cl + subgroup/sub_group_reduce.cl subgroup/subgroup.cl subnormal_config.cl synchronization/sub_group_barrier.cl diff --git a/libclc/opencl/lib/generic/subgroup/sub_group_reduce.cl b/libclc/opencl/lib/generic/subgroup/sub_group_reduce.cl new file mode 100644 index 0000000000000..1614fafd0c6eb --- /dev/null +++ b/libclc/opencl/lib/generic/subgroup/sub_group_reduce.cl @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/subgroup/clc_sub_group_reduce.h" + +#define __CLC_BODY <sub_group_reduce.inc> +#include <clc/integer/gentype.inc> + +#define __CLC_BODY <sub_group_reduce.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/opencl/lib/generic/subgroup/sub_group_reduce.inc b/libclc/opencl/lib/generic/subgroup/sub_group_reduce.inc new file mode 100644 index 0000000000000..b546add0d7601 --- /dev/null +++ b/libclc/opencl/lib/generic/subgroup/sub_group_reduce.inc @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if defined(__CLC_SCALAR) && \ + ((defined(__CLC_FPSIZE) || __CLC_GENSIZE == 32 || __CLC_GENSIZE == 64) || \ + defined(cl_khr_subgroup_extended_types)) + +_CLC_OVERLOAD _CLC_DEF _CLC_CONST __CLC_GENTYPE +sub_group_reduce_add(__CLC_GENTYPE x) { + return __clc_sub_group_reduce_add(x); +} + +_CLC_OVERLOAD _CLC_DEF _CLC_CONST __CLC_GENTYPE +sub_group_reduce_min(__CLC_GENTYPE x) { + return __clc_sub_group_reduce_min(x); +} + +_CLC_OVERLOAD _CLC_DEF _CLC_CONST __CLC_GENTYPE +sub_group_reduce_max(__CLC_GENTYPE x) { + return __clc_sub_group_reduce_max(x); +} + +#endif _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
