https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/203805
>From f3b713b66b411577d44fceaa8c37b7bf4f36de9b Mon Sep 17 00:00:00 2001 From: Joseph Huber <[email protected]> Date: Sun, 14 Jun 2026 15:01:28 -0500 Subject: [PATCH 1/2] [libc]c] Improve performance and precision of reciprocal functions Summary: Small change to improve the performance and output of functions using the reciprocol. This makes these functions *byte-for-byte* identical with their OCML counterparts in ROCm. --- libclc/clc/lib/amdgpu/CMakeLists.txt | 1 + libclc/clc/lib/amdgpu/math/clc_recip_fast.cl | 14 ++++++++++++++ libclc/clc/lib/amdgpu/math/clc_recip_fast.inc | 16 ++++++++++++++++ libclc/clc/lib/generic/CMakeLists.txt | 1 + 4 files changed, 32 insertions(+) create mode 100644 libclc/clc/lib/amdgpu/math/clc_recip_fast.cl create mode 100644 libclc/clc/lib/amdgpu/math/clc_recip_fast.inc diff --git a/libclc/clc/lib/amdgpu/CMakeLists.txt b/libclc/clc/lib/amdgpu/CMakeLists.txt index 910a0cf1765df..06fad529180d4 100644 --- a/libclc/clc/lib/amdgpu/CMakeLists.txt +++ b/libclc/clc/lib/amdgpu/CMakeLists.txt @@ -25,6 +25,7 @@ libclc_add_sources(${LIBCLC_CLC_TARGET} FILES math/clc_native_exp.cl math/clc_native_exp2.cl math/clc_native_log10.cl + math/clc_recip_fast.cl mem_fence/clc_mem_fence.cl subgroup/clc_subgroup.cl subgroup/clc_sub_group_broadcast.cl diff --git a/libclc/clc/lib/amdgpu/math/clc_recip_fast.cl b/libclc/clc/lib/amdgpu/math/clc_recip_fast.cl new file mode 100644 index 0000000000000..32a52cf97b8e2 --- /dev/null +++ b/libclc/clc/lib/amdgpu/math/clc_recip_fast.cl @@ -0,0 +1,14 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/internal/clc.h" + +#define __CLC_FUNCTION __clc_recip_fast +#define __CLC_BODY "clc_recip_fast.inc" + +#include "clc/math/gentype.inc" diff --git a/libclc/clc/lib/amdgpu/math/clc_recip_fast.inc b/libclc/clc/lib/amdgpu/math/clc_recip_fast.inc new file mode 100644 index 0000000000000..9d635cc700442 --- /dev/null +++ b/libclc/clc/lib/amdgpu/math/clc_recip_fast.inc @@ -0,0 +1,16 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// On AMDGPU the "fast" reciprocal is the hardware v_rcp_f32 approximation, +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_recip_fast(__CLC_GENTYPE x) { +#if defined(__CLC_SCALAR) && __CLC_FPSIZE == 32 + return __builtin_amdgcn_rcpf(x); +#else + return ((__CLC_GENTYPE)1.0) / x; +#endif +} diff --git a/libclc/clc/lib/generic/CMakeLists.txt b/libclc/clc/lib/generic/CMakeLists.txt index 40261545fce91..673c82b002b33 100644 --- a/libclc/clc/lib/generic/CMakeLists.txt +++ b/libclc/clc/lib/generic/CMakeLists.txt @@ -231,4 +231,5 @@ libclc_set_source_options(-fapprox-func libclc_set_source_options(-cl-fp32-correctly-rounded-divide-sqrt math/clc_div_cr.cl math/clc_sqrt_cr.cl + math/clc_tanpi.cl ) >From 6161dd207c8191cc9d1e509de7dd3be007ffb4fc Mon Sep 17 00:00:00 2001 From: Joseph Huber <[email protected]> Date: Sun, 14 Jun 2026 19:41:05 -0500 Subject: [PATCH 2/2] commnets --- libclc/clc/lib/amdgpu/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/libclc/clc/lib/amdgpu/CMakeLists.txt b/libclc/clc/lib/amdgpu/CMakeLists.txt index 06fad529180d4..1111dc64f605c 100644 --- a/libclc/clc/lib/amdgpu/CMakeLists.txt +++ b/libclc/clc/lib/amdgpu/CMakeLists.txt @@ -52,4 +52,5 @@ libclc_set_source_options(-fapprox-func math/clc_native_exp.cl math/clc_native_exp2.cl math/clc_native_log10.cl + math/clc_recip_fast.cl ) _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
