https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/187457
>From ceb301247623d2729849f1bab06329d0413c46d6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Wed, 18 Mar 2026 13:55:07 +0100 Subject: [PATCH] libclc: Really implement half trig functions Previously these just cast to float. --- .../clc/include/clc/math/clc_sincos_helpers.h | 6 ++++ .../clc/math/clc_sincos_helpers_fp16_decl.inc | 19 ++++++++++ libclc/clc/lib/generic/math/clc_cos.inc | 21 +++++++++-- libclc/clc/lib/generic/math/clc_sin.inc | 22 ++++++++++-- libclc/clc/lib/generic/math/clc_sincos.inc | 28 ++++++++++++--- .../lib/generic/math/clc_sincos_helpers.cl | 6 ++++ .../generic/math/clc_sincos_helpers_fp16.inc | 36 +++++++++++++++++++ libclc/clc/lib/generic/math/clc_tan.inc | 25 ++++++++++++- 8 files changed, 154 insertions(+), 9 deletions(-) create mode 100644 libclc/clc/include/clc/math/clc_sincos_helpers_fp16_decl.inc create mode 100644 libclc/clc/lib/generic/math/clc_sincos_helpers_fp16.inc diff --git a/libclc/clc/include/clc/math/clc_sincos_helpers.h b/libclc/clc/include/clc/math/clc_sincos_helpers.h index 4dd6ac74354ad..b2111f2d01c85 100644 --- a/libclc/clc/include/clc/math/clc_sincos_helpers.h +++ b/libclc/clc/include/clc/math/clc_sincos_helpers.h @@ -19,4 +19,10 @@ #include "clc/math/gentype.inc" +#ifdef cl_khr_fp16 +#define __CLC_HALF_ONLY +#define __CLC_BODY "clc/math/clc_sincos_helpers_fp16_decl.inc" +#include "clc/math/gentype.inc" +#endif + #endif // __CLC_MATH_CLC_SINCOS_HELPERS_H__ diff --git a/libclc/clc/include/clc/math/clc_sincos_helpers_fp16_decl.inc b/libclc/clc/include/clc/math/clc_sincos_helpers_fp16_decl.inc new file mode 100644 index 0000000000000..a490097c6548f --- /dev/null +++ b/libclc/clc/include/clc/math/clc_sincos_helpers_fp16_decl.inc @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +typedef struct __CLC_XCONCAT(__clc_sincos_ret_, __CLC_GENTYPE) { + __CLC_GENTYPE sin, cos; +} __CLC_XCONCAT(__clc_sincos_ret_, __CLC_GENTYPE); + +#define __CLC_SINCOS_RET_GENTYPE __CLC_XCONCAT(__clc_sincos_ret_, __CLC_GENTYPE) + +_CLC_DEF _CLC_OVERLOAD __CLC_SINCOS_RET_GENTYPE +__clc_sincos_reduced_eval(__CLC_HALFN x); + +_CLC_DEF _CLC_OVERLOAD __CLC_INTN __clc_argReductionS(private __CLC_HALFN *r, + __CLC_HALFN x); diff --git a/libclc/clc/lib/generic/math/clc_cos.inc b/libclc/clc/lib/generic/math/clc_cos.inc index 8f6d2391e50c0..9f6125a9da8e3 100644 --- a/libclc/clc/lib/generic/math/clc_cos.inc +++ b/libclc/clc/lib/generic/math/clc_cos.inc @@ -23,8 +23,25 @@ _CLC_OVERLOAD _CLC_DEF __CLC_FLOATN __clc_cos(__CLC_FLOATN x) { #elif __CLC_FPSIZE == 16 -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cos(__CLC_GENTYPE x) { - return __CLC_CONVERT_GENTYPE(__clc_cos(__CLC_CONVERT_FLOATN(x))); +_CLC_OVERLOAD _CLC_DEF __CLC_HALFN __clc_cos(__CLC_HALFN x) { + x = __clc_select(x, __CLC_GENTYPE_NAN, + __CLC_CONVERT_S_GENTYPE(__clc_isinf(x))); + + __CLC_HALFN absx = __clc_fabs(x); + + __CLC_HALFN reduced; + __CLC_INTN n = __clc_argReductionS(&reduced, absx); + + __CLC_SINCOS_RET_GENTYPE eval = __clc_sincos_reduced_eval(reduced); + + __CLC_HALFN c = __CLC_CONVERT_S_GENTYPE((n & 1) == 0) ? eval.cos : -eval.sin; + + __CLC_S_GENTYPE flip = __CLC_CONVERT_S_GENTYPE(n > 1) + ? (__CLC_S_GENTYPE)SIGNBIT_FP16 + : (__CLC_S_GENTYPE)0; + + __CLC_S_GENTYPE result_i = __CLC_AS_SHORTN(c) ^ flip; + return __CLC_AS_HALFN(result_i); } #elif __CLC_FPSIZE == 64 diff --git a/libclc/clc/lib/generic/math/clc_sin.inc b/libclc/clc/lib/generic/math/clc_sin.inc index 3e839fdf43f17..9a3ad8989f45d 100644 --- a/libclc/clc/lib/generic/math/clc_sin.inc +++ b/libclc/clc/lib/generic/math/clc_sin.inc @@ -25,8 +25,26 @@ _CLC_OVERLOAD _CLC_DEF __CLC_FLOATN __clc_sin(__CLC_FLOATN x) { #elif __CLC_FPSIZE == 16 -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sin(__CLC_GENTYPE x) { - return __CLC_CONVERT_GENTYPE(__clc_sin(__CLC_CONVERT_FLOATN(x))); +_CLC_OVERLOAD _CLC_DEF __CLC_HALFN __clc_sin(__CLC_HALFN x) { + x = __clc_select(x, __CLC_GENTYPE_NAN, + __CLC_CONVERT_S_GENTYPE(__clc_isinf(x))); + + __CLC_HALFN absx = __clc_fabs(x); + + __CLC_HALFN reduced; + __CLC_INTN n = __clc_argReductionS(&reduced, absx); + + __CLC_SINCOS_RET_GENTYPE eval = __clc_sincos_reduced_eval(reduced); + + __CLC_HALFN s = __CLC_CONVERT_S_GENTYPE((n & 1) == 0) ? eval.sin : eval.cos; + __CLC_S_GENTYPE flip = __CLC_CONVERT_S_GENTYPE(n > 1) + ? (__CLC_S_GENTYPE)SIGNBIT_FP16 + : (__CLC_S_GENTYPE)0; + + __CLC_S_GENTYPE result_i = + __CLC_AS_SHORTN(s) ^ + (flip ^ (__CLC_AS_SHORTN(x) & (__CLC_S_GENTYPE)SIGNBIT_FP16)); + return __CLC_AS_HALFN(result_i); } #elif __CLC_FPSIZE == 64 diff --git a/libclc/clc/lib/generic/math/clc_sincos.inc b/libclc/clc/lib/generic/math/clc_sincos.inc index 944b8032fae1a..c16c601409ce9 100644 --- a/libclc/clc/lib/generic/math/clc_sincos.inc +++ b/libclc/clc/lib/generic/math/clc_sincos.inc @@ -33,10 +33,30 @@ __clc_sincos(__CLC_FLOATN x, private __CLC_FLOATN *cos_out) { _CLC_OVERLOAD _CLC_DEF __CLC_HALFN __clc_sincos(__CLC_HALFN x, private __CLC_HALFN *cos_out) { - __CLC_FLOATN cos_result; - __CLC_FLOATN sin_result = __clc_sincos(__CLC_CONVERT_FLOATN(x), &cos_result); - *cos_out = __CLC_CONVERT_HALFN(cos_result); - return __CLC_CONVERT_HALFN(sin_result); + x = __clc_select(x, __CLC_GENTYPE_NAN, + __CLC_CONVERT_S_GENTYPE(__clc_isinf(x))); + + __CLC_HALFN absx = __clc_fabs(x); + + __CLC_HALFN reduced; + __CLC_INTN n = __clc_argReductionS(&reduced, absx); + + __CLC_SINCOS_RET_GENTYPE eval = __clc_sincos_reduced_eval(reduced); + + __CLC_HALFN s = __CLC_CONVERT_S_GENTYPE((n & 1) == 0) ? eval.sin : eval.cos; + __CLC_S_GENTYPE flip = __CLC_CONVERT_S_GENTYPE(n > 1) + ? (__CLC_S_GENTYPE)SIGNBIT_FP16 + : (__CLC_S_GENTYPE)0; + + __CLC_S_GENTYPE sin_result_i = + __CLC_AS_SHORTN(s) ^ + (flip ^ (__CLC_AS_SHORTN(x) & (__CLC_S_GENTYPE)SIGNBIT_FP16)); + + __CLC_HALFN c = __CLC_CONVERT_S_GENTYPE((n & 1) == 0) ? eval.cos : -eval.sin; + + __CLC_S_GENTYPE cos_result_i = __CLC_AS_SHORTN(c) ^ flip; + *cos_out = __CLC_AS_HALFN(cos_result_i); + return __CLC_AS_HALFN(sin_result_i); } #elif __CLC_FPSIZE == 64 diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers.cl b/libclc/clc/lib/generic/math/clc_sincos_helpers.cl index 8c899c08d57a3..bb41e6cc67ed0 100644 --- a/libclc/clc/lib/generic/math/clc_sincos_helpers.cl +++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.cl @@ -44,3 +44,9 @@ #include "clc/math/gentype.inc" #endif + +#ifdef cl_khr_fp16 +#define __CLC_HALF_ONLY +#define __CLC_BODY "clc_sincos_helpers_fp16.inc" +#include "clc/math/gentype.inc" +#endif diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers_fp16.inc b/libclc/clc/lib/generic/math/clc_sincos_helpers_fp16.inc new file mode 100644 index 0000000000000..31ba31b6e3c30 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_sincos_helpers_fp16.inc @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma OPENCL FP_CONTRACT OFF + +_CLC_DEF _CLC_OVERLOAD __CLC_INTN __clc_argReductionS(private __CLC_HALFN *r_lo, + __CLC_HALFN x) { + const __CLC_FLOATN twobypi = 0x1.45f306p-1f; + const __CLC_FLOATN pb2_a = 0x1.92p+0f; + const __CLC_FLOATN pb2_b = 0x1.fap-12f; + const __CLC_FLOATN pb2_c = 0x1.54442ep-20f; + + __CLC_FLOATN x_float = __CLC_CONVERT_FLOATN(x); + __CLC_FLOATN rint = __clc_rint(x_float * twobypi); + + __CLC_FLOATN result = __clc_mad( + rint, -pb2_c, __clc_mad(rint, -pb2_b, __clc_mad(rint, -pb2_a, x_float))); + + *r_lo = __CLC_CONVERT_HALFN(result); + return __CLC_CONVERT_INTN(rint) & 0x3; +} + +_CLC_DEF _CLC_OVERLOAD __CLC_SINCOS_RET_GENTYPE +__clc_sincos_reduced_eval(__CLC_HALFN x) { + __CLC_HALFN t = x * x; + + __CLC_SINCOS_RET_GENTYPE ret; + ret.cos = __clc_mad(t, __clc_mad(t, 0x1.4b4p-5h, -0x1.ffcp-2h), 1.0h); + ret.sin = __clc_mad(x, t * __clc_mad(t, 0x1.0bp-7h, -0x1.554p-3h), x); + return ret; +} diff --git a/libclc/clc/lib/generic/math/clc_tan.inc b/libclc/clc/lib/generic/math/clc_tan.inc index e4180d1047651..8b6d440560519 100644 --- a/libclc/clc/lib/generic/math/clc_tan.inc +++ b/libclc/clc/lib/generic/math/clc_tan.inc @@ -60,8 +60,31 @@ _CLC_DEF _CLC_OVERLOAD __CLC_DOUBLEN __clc_tan(__CLC_DOUBLEN x) { #elif __CLC_FPSIZE == 16 +_CLC_DEF _CLC_OVERLOAD __CLC_HALFN __clc_tan_reduced_eval(__CLC_HALFN x, + __CLC_INTN is_odd) { + __CLC_HALFN s = x * x; + + __CLC_HALFN t = + __clc_mad(s, __clc_mad(s, 0x1.794p-4h, 0x1.e3cp-4h), 0x1.57p-2h); + t = __clc_mad(x, s * t, x); + + __CLC_HALFN tr = -1.0h / t; + return __CLC_CONVERT_SHORTN(is_odd) ? tr : t; +} + _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_tan(__CLC_GENTYPE x) { - return __CLC_CONVERT_GENTYPE(__clc_tan(__CLC_CONVERT_FLOATN(x))); + x = __clc_select(x, __CLC_GENTYPE_NAN, __CLC_CONVERT_SHORTN(__clc_isinf(x))); + __CLC_HALFN absx = __clc_fabs(x); + + __CLC_HALFN reduced; + __CLC_INTN n = __clc_argReductionS(&reduced, absx); + + __CLC_HALFN t = __clc_tan_reduced_eval(reduced, (n & 1) != 0); + + __CLC_S_GENTYPE result_i = + (__CLC_AS_SHORTN(t) ^ + (__CLC_AS_SHORTN(x) & (__CLC_S_GENTYPE)SIGNBIT_FP16)); + return __CLC_AS_HALFN(result_i); } #endif _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
