Author: Matt Arsenault Date: 2026-03-27T09:37:27+01:00 New Revision: 1a32a4185b84bba30474305cd915d1fbcfa1a352
URL: https://github.com/llvm/llvm-project/commit/1a32a4185b84bba30474305cd915d1fbcfa1a352 DIFF: https://github.com/llvm/llvm-project/commit/1a32a4185b84bba30474305cd915d1fbcfa1a352.diff LOG: libclc: Add subgroup scan functions (#188828) Add the base implementation using ds_swizzle which should work on all subtargets. There are at least 2 more paths available for newer targets. Added: libclc/clc/include/clc/subgroup/clc_sub_group_scan.h libclc/clc/include/clc/subgroup/clc_sub_group_scan.inc libclc/clc/lib/amdgpu/subgroup/clc_amdgpu_ds_swizzle.inc libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.inc libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.cl libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.inc libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.cl libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.inc Modified: libclc/clc/include/clc/subgroup/clc_sub_group_broadcast.h libclc/clc/lib/amdgpu/CMakeLists.txt libclc/opencl/lib/generic/CMakeLists.txt Removed: ################################################################################ diff --git a/libclc/clc/include/clc/subgroup/clc_sub_group_broadcast.h b/libclc/clc/include/clc/subgroup/clc_sub_group_broadcast.h index d61f57860fe5b..e9fb566c54ef8 100644 --- a/libclc/clc/include/clc/subgroup/clc_sub_group_broadcast.h +++ b/libclc/clc/include/clc/subgroup/clc_sub_group_broadcast.h @@ -19,4 +19,6 @@ #define __CLC_BODY "clc/subgroup/clc_subgroup_broadcast.inc" #include "clc/math/gentype.inc" +#undef __CLC_FUNCTION + #endif // __CLC_SUBGROUP_CLC_SUB_GROUP_BROADCAST_H__ diff --git a/libclc/clc/include/clc/subgroup/clc_sub_group_scan.h b/libclc/clc/include/clc/subgroup/clc_sub_group_scan.h new file mode 100644 index 0000000000000..a849ffa792758 --- /dev/null +++ b/libclc/clc/include/clc/subgroup/clc_sub_group_scan.h @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_SUBGROUP_CLC_SUB_GROUP_SCAN_H__ +#define __CLC_SUBGROUP_CLC_SUB_GROUP_SCAN_H__ + +#include "clc/internal/clc.h" + +#define __CLC_BODY "clc/subgroup/clc_sub_group_scan.inc" +#include "clc/integer/gentype.inc" + +#define __CLC_BODY "clc/subgroup/clc_sub_group_scan.inc" +#include "clc/math/gentype.inc" + +#endif // __CLC_SUBGROUP_CLC_SUB_GROUP_SCAN_H__ diff --git a/libclc/clc/include/clc/subgroup/clc_sub_group_scan.inc b/libclc/clc/include/clc/subgroup/clc_sub_group_scan.inc new file mode 100644 index 0000000000000..924da27782f32 --- /dev/null +++ b/libclc/clc/include/clc/subgroup/clc_sub_group_scan.inc @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifdef __CLC_SCALAR +_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE +__clc_sub_group_scan_inclusive_add(__CLC_GENTYPE x); + +_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE +__clc_sub_group_scan_inclusive_min(__CLC_GENTYPE x); + +_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE +__clc_sub_group_scan_inclusive_max(__CLC_GENTYPE x); + +_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE +__clc_sub_group_scan_exclusive_add(__CLC_GENTYPE x); + +_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE +__clc_sub_group_scan_exclusive_min(__CLC_GENTYPE x); + +_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE +__clc_sub_group_scan_exclusive_max(__CLC_GENTYPE x); +#endif diff --git a/libclc/clc/lib/amdgpu/CMakeLists.txt b/libclc/clc/lib/amdgpu/CMakeLists.txt index ea79b2294d991..186b6ab0b85b1 100644 --- a/libclc/clc/lib/amdgpu/CMakeLists.txt +++ b/libclc/clc/lib/amdgpu/CMakeLists.txt @@ -30,6 +30,7 @@ libclc_configure_source_list(CLC_AMDGPU_SOURCES subgroup/clc_subgroup.cl subgroup/clc_sub_group_broadcast.cl subgroup/clc_sub_group_reduce.cl + subgroup/clc_sub_group_scan.cl synchronization/clc_sub_group_barrier.cl synchronization/clc_work_group_barrier.cl workitem/clc_get_enqueued_local_size.cl diff --git a/libclc/clc/lib/amdgpu/subgroup/clc_amdgpu_ds_swizzle.inc b/libclc/clc/lib/amdgpu/subgroup/clc_amdgpu_ds_swizzle.inc new file mode 100644 index 0000000000000..2455b0698aae1 --- /dev/null +++ b/libclc/clc/lib/amdgpu/subgroup/clc_amdgpu_ds_swizzle.inc @@ -0,0 +1,60 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if defined(__CLC_SCALAR) + +#if (defined(__CLC_GENSIZE) && __CLC_GENSIZE <= 32) || \ + defined(__CLC_FPSIZE) && __CLC_FPSIZE <= 32 + +#define COERCE_FUNC(func, mask) \ + static _CLC_OVERLOAD __CLC_GENTYPE func(__CLC_GENTYPE x) { \ + __CLC_U_GENTYPE bitcast = __CLC_AS_U_GENTYPE(x); \ + uint ext = __clc_convert_uint(bitcast); \ + uint swizzle = __builtin_amdgcn_ds_swizzle(ext, mask); \ + __CLC_U_GENTYPE trunc = __CLC_CONVERT_U_GENTYPE(swizzle); \ + return __CLC_AS_GENTYPE(trunc); \ + } + +//------------------------------------------------------------------------------ +// Swizzle masks used in inclusive scan +//------------------------------------------------------------------------------ + +COERCE_FUNC(__clc_amdgpu_ds_swizzle_bcastx2_lane0, SWIZZLE_BCASTX2_LANE0); +COERCE_FUNC(__clc_amdgpu_ds_swizzle_bcastx4_lane1, SWIZZLE_BCASTX4_LANE1); +COERCE_FUNC(__clc_amdgpu_ds_swizzle_bcastx8_lane3, SWIZZLE_BCASTX8_LANE3); +COERCE_FUNC(__clc_amdgpu_ds_swizzle_bcastx16_lane7, SWIZZLE_BCASTX16_LANE7); +COERCE_FUNC(__clc_amdgpu_ds_swizzle_bcastx32_lane15, SWIZZLE_BCASTX32_LANE15); + +//------------------------------------------------------------------------------ +// Swizzle masks used in exclusive scan adjustment +//------------------------------------------------------------------------------ + +COERCE_FUNC(__clc_amdgpu_ds_swizzle_quad_perm_shift_right1, + SWIZZLE_QUAD_PERM(0, 0, 1, 2)); + +#elif defined(__CLC_GENSIZE) && __CLC_GENSIZE == 64 || \ + defined(__CLC_FPSIZE) && __CLC_FPSIZE == 64 + +#define SPLIT_FUNC_64(func_name) \ + static _CLC_OVERLOAD __CLC_GENTYPE func_name(__CLC_GENTYPE x) { \ + uint2 vec = __clc_as_uint2(x); \ + uint2 r = {func_name(vec.lo), func_name(vec.hi)}; \ + return __CLC_AS_GENTYPE(r); \ + } + +SPLIT_FUNC_64(__clc_amdgpu_ds_swizzle_bcastx2_lane0) +SPLIT_FUNC_64(__clc_amdgpu_ds_swizzle_bcastx4_lane1) +SPLIT_FUNC_64(__clc_amdgpu_ds_swizzle_bcastx8_lane3) +SPLIT_FUNC_64(__clc_amdgpu_ds_swizzle_bcastx16_lane7) +SPLIT_FUNC_64(__clc_amdgpu_ds_swizzle_bcastx32_lane15) + +SPLIT_FUNC_64(__clc_amdgpu_ds_swizzle_quad_perm_shift_right1) + +#endif + +#endif // __CLC_SCALAR diff --git a/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl b/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl new file mode 100644 index 0000000000000..3ef735aac2aae --- /dev/null +++ b/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl @@ -0,0 +1,94 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/clc_convert.h" +#include "clc/math/clc_fmax.h" +#include "clc/math/clc_fmin.h" +#include "clc/shared/clc_max.h" +#include "clc/shared/clc_min.h" +#include "clc/subgroup/clc_sub_group_broadcast.h" +#include "clc/subgroup/clc_sub_group_scan.h" +#include "clc/subgroup/clc_subgroup.h" + +#define QUAD_PERM (1 << 15) + +// The first basic swizzle mode (when offset[15] == 1) allows full data sharing +// between a group of 4 consecutive threads. +#define SWIZZLE_QUAD_PERM(S0, S1, S2, S3) \ + (uint)(QUAD_PERM | (S3 << 6) | (S2 << 4) | (S1 << 2) | S0) + +#define SWIZZLE_PAIRWISE(XOR_MASK, OR_MASK, AND_MASK) \ + (uint)((XOR_MASK << 10) | (OR_MASK << 5) | AND_MASK) + +#define SWIZZLE_BCASTX2_LANE0 SWIZZLE_PAIRWISE(0x00, 0x00, 0x1e) +#define SWIZZLE_BCASTX4_LANE1 SWIZZLE_PAIRWISE(0x00, 0x01, 0x1c) +#define SWIZZLE_BCASTX8_LANE3 SWIZZLE_PAIRWISE(0x00, 0x03, 0x18) +#define SWIZZLE_BCASTX16_LANE7 SWIZZLE_PAIRWISE(0x00, 0x07, 0x10) +#define SWIZZLE_BCASTX32_LANE15 SWIZZLE_PAIRWISE(0x00, 0x0f, 0x00) + +#define __CLC_BODY "clc_amdgpu_ds_swizzle.inc" +#include "clc/integer/gentype.inc" + +#define __CLC_BODY "clc_amdgpu_ds_swizzle.inc" +#include "clc/math/gentype.inc" + +//------------------------------------------------------------------------------ +// Integer and fp add +//------------------------------------------------------------------------------ + +#define __CLC_FUNCTION_INCLUSIVE __clc_sub_group_scan_inclusive_add +#define __CLC_FUNCTION_EXCLUSIVE __clc_sub_group_scan_exclusive_add +#define __CLC_FUNCTION_IMPL(x, y) ((x) + (y)) +#define __CLC_SUBGROUP_SCAN_ID_VAL (__CLC_GENTYPE)0 +#define __CLC_BODY "clc_sub_group_scan.inc" +#include "clc/integer/gentype.inc" + +#define __CLC_BODY "clc_sub_group_scan.inc" +#include "clc/math/gentype.inc" + +#undef __CLC_FUNCTION_INCLUSIVE +#undef __CLC_FUNCTION_EXCLUSIVE +#undef __CLC_FUNCTION_IMPL +#undef __CLC_SUBGROUP_SCAN_ID_VAL + +//------------------------------------------------------------------------------ +// Integer and fp min +//------------------------------------------------------------------------------ + +#define __CLC_FUNCTION_INCLUSIVE __clc_sub_group_scan_inclusive_min +#define __CLC_FUNCTION_EXCLUSIVE __clc_sub_group_scan_exclusive_min +#define __CLC_FUNCTION_IMPL(x, y) __clc_min(x, y) +#define __CLC_SUBGROUP_SCAN_ID_VAL __CLC_GEN_MAX +#define __CLC_BODY "clc_sub_group_scan.inc" +#include "clc/integer/gentype.inc" + +#define __CLC_BODY "clc_sub_group_scan.inc" +#include "clc/math/gentype.inc" +#undef __CLC_FUNCTION_IMPL +#undef __CLC_FUNCTION_INCLUSIVE +#undef __CLC_FUNCTION_EXCLUSIVE +#undef __CLC_SUBGROUP_SCAN_ID_VAL + +//------------------------------------------------------------------------------ +// Integer and fp max +//------------------------------------------------------------------------------ + +#define __CLC_FUNCTION_INCLUSIVE __clc_sub_group_scan_inclusive_max +#define __CLC_FUNCTION_EXCLUSIVE __clc_sub_group_scan_exclusive_max +#define __CLC_FUNCTION_IMPL(x, y) __clc_max(x, y) +#define __CLC_SUBGROUP_SCAN_ID_VAL __CLC_GEN_MIN + +#define __CLC_BODY "clc_sub_group_scan.inc" +#include "clc/integer/gentype.inc" + +#define __CLC_BODY "clc_sub_group_scan.inc" +#include "clc/math/gentype.inc" +#undef __CLC_FUNCTION_IMPL +#undef __CLC_FUNCTION_INCLUSIVE +#undef __CLC_FUNCTION_EXCLUSIVE +#undef __CLC_SUBGROUP_SCAN_ID_VAL diff --git a/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.inc b/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.inc new file mode 100644 index 0000000000000..af0b3a30a0bfd --- /dev/null +++ b/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.inc @@ -0,0 +1,83 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if defined(__CLC_SCALAR) + +#if defined(__CLC_GEN_S) +#define __CLC_GEN_MAX (__CLC_GENTYPE)((1LL << (__CLC_GENSIZE - 1)) - 1LL) +#define __CLC_GEN_MIN (__CLC_GENTYPE)(-(1LL << (__CLC_GENSIZE - 1))) +#elif defined(__CLC_GEN_U) +#define __CLC_GEN_MAX (__CLC_GENTYPE)((1ull << __CLC_GENSIZE) - 1ull) +#define __CLC_GEN_MIN (__CLC_GENTYPE)0 +#elif defined(__CLC_FPSIZE) +#define __CLC_GEN_MIN -INFINITY +#define __CLC_GEN_MAX INFINITY +#endif + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __CLC_FUNCTION_INCLUSIVE(__CLC_GENTYPE x) { + uint l = __clc_get_sub_group_local_id(); + + __CLC_GENTYPE v = __clc_amdgpu_ds_swizzle_bcastx2_lane0(x); + v = (l & 1) ? v : __CLC_SUBGROUP_SCAN_ID_VAL; + __CLC_GENTYPE s = __CLC_FUNCTION_IMPL(x, v); + + v = __clc_amdgpu_ds_swizzle_bcastx4_lane1(s); + v = (l & 2) ? v : __CLC_SUBGROUP_SCAN_ID_VAL; + s = __CLC_FUNCTION_IMPL(s, v); + + v = __clc_amdgpu_ds_swizzle_bcastx8_lane3(s); + v = (l & 4) ? v : __CLC_SUBGROUP_SCAN_ID_VAL; + s = __CLC_FUNCTION_IMPL(s, v); + + v = __clc_amdgpu_ds_swizzle_bcastx16_lane7(s); + v = (l & 8) ? v : __CLC_SUBGROUP_SCAN_ID_VAL; + s = __CLC_FUNCTION_IMPL(s, v); + + v = __clc_amdgpu_ds_swizzle_bcastx32_lane15(s); + v = (l & 16) ? v : __CLC_SUBGROUP_SCAN_ID_VAL; + s = __CLC_FUNCTION_IMPL(s, v); + + if (__builtin_amdgcn_wavefrontsize() == 64) { + v = __clc_sub_group_broadcast(s, 31); + v = l > 31 ? v : __CLC_SUBGROUP_SCAN_ID_VAL; + s = __CLC_FUNCTION_IMPL(s, v); + } + + return s; +} + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __CLC_FUNCTION_EXCLUSIVE(__CLC_GENTYPE x) { + __CLC_GENTYPE s = __CLC_FUNCTION_INCLUSIVE(x); + __CLC_GENTYPE t = s; + + s = __clc_amdgpu_ds_swizzle_quad_perm_shift_right1(t); + + __CLC_GENTYPE v = __clc_amdgpu_ds_swizzle_bcastx8_lane3(t); + + uint l = __clc_get_sub_group_local_id(); + + s = ((l & 0x7) == 0x4) ? v : s; + + v = __clc_amdgpu_ds_swizzle_bcastx16_lane7(t); + s = ((l & 0xf) == 0x8) ? v : s; + + v = __clc_amdgpu_ds_swizzle_bcastx32_lane15(t); + s = ((l & 0x1f) == 0x10) ? v : s; + + if (__builtin_amdgcn_wavefrontsize() == 64) { + v = __clc_sub_group_broadcast(t, 31); + s = (l == 32) ? v : s; + } + + return (l == 0) ? __CLC_SUBGROUP_SCAN_ID_VAL : s; +} + +#undef __CLC_GEN_MIN +#undef __CLC_GEN_MAX + +#endif // __CLC_SCALAR diff --git a/libclc/opencl/lib/generic/CMakeLists.txt b/libclc/opencl/lib/generic/CMakeLists.txt index f30af80e9c65e..1b8beb57e34de 100644 --- a/libclc/opencl/lib/generic/CMakeLists.txt +++ b/libclc/opencl/lib/generic/CMakeLists.txt @@ -207,6 +207,8 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES shared/vstore.cl subgroup/sub_group_broadcast.cl subgroup/sub_group_reduce.cl + subgroup/sub_group_scan_exclusive.cl + subgroup/sub_group_scan_inclusive.cl subgroup/subgroup.cl synchronization/sub_group_barrier.cl synchronization/work_group_barrier.cl diff --git a/libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.cl b/libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.cl new file mode 100644 index 0000000000000..8825a838f3392 --- /dev/null +++ b/libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.cl @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/subgroup/clc_sub_group_scan.h" + +#define __CLC_BODY "sub_group_scan_exclusive.inc" +#include "clc/integer/gentype.inc" + +#define __CLC_BODY "sub_group_scan_exclusive.inc" +#include "clc/math/gentype.inc" diff --git a/libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.inc b/libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.inc new file mode 100644 index 0000000000000..243637d8d824f --- /dev/null +++ b/libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.inc @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if defined(__CLC_SCALAR) && \ + ((defined(__CLC_FPSIZE) || __CLC_GENSIZE == 32 || __CLC_GENSIZE == 64) || \ + defined(cl_khr_subgroup_extended_types)) + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE +sub_group_scan_exclusive_add(__CLC_GENTYPE x) { + return __clc_sub_group_scan_exclusive_add(x); +} + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE +sub_group_scan_exclusive_min(__CLC_GENTYPE x) { + return __clc_sub_group_scan_exclusive_min(x); +} + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE +sub_group_scan_exclusive_max(__CLC_GENTYPE x) { + return __clc_sub_group_scan_exclusive_max(x); +} + +#endif diff --git a/libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.cl b/libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.cl new file mode 100644 index 0000000000000..15e8120443c61 --- /dev/null +++ b/libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.cl @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/subgroup/clc_sub_group_scan.h" + +#define __CLC_BODY "sub_group_scan_inclusive.inc" +#include "clc/integer/gentype.inc" + +#define __CLC_BODY "sub_group_scan_inclusive.inc" +#include "clc/math/gentype.inc" diff --git a/libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.inc b/libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.inc new file mode 100644 index 0000000000000..7f0ad6d01b153 --- /dev/null +++ b/libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.inc @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if defined(__CLC_SCALAR) && \ + ((defined(__CLC_FPSIZE) || __CLC_GENSIZE == 32 || __CLC_GENSIZE == 64) || \ + defined(cl_khr_subgroup_extended_types)) + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE +sub_group_scan_inclusive_add(__CLC_GENTYPE x) { + return __clc_sub_group_scan_inclusive_add(x); +} + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE +sub_group_scan_inclusive_min(__CLC_GENTYPE x) { + return __clc_sub_group_scan_inclusive_min(x); +} + +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE +sub_group_scan_inclusive_max(__CLC_GENTYPE x) { + return __clc_sub_group_scan_inclusive_max(x); +} + +#endif _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
