https://github.com/frasercrmck updated https://github.com/llvm/llvm-project/pull/133696
>From b927766ee9d6e8a19af3cfcd5a05f7dcd2197ced Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Mon, 31 Mar 2025 11:37:43 +0100 Subject: [PATCH] [libclc][amdgpu] Implement native_exp via AMD builtin This came up during a discussion on #129679, which has been split out as a preparatory commit. An example of the AMDGPU codegen is: define <2 x float> @_Z10native_expDv2_f(<2 x float> %val) { entry: %mul = fmul afn <2 x float> %val, splat (float 0x3FF7154760000000) %0 = extractelement <2 x float> %mul, i64 0 %1 = tail call float @llvm.amdgcn.exp2.f32(float %0) %vecinit.i = insertelement <2 x float> poison, float %1, i64 0 %2 = extractelement <2 x float> %mul, i64 1 %3 = tail call float @llvm.amdgcn.exp2.f32(float %2) %vecinit2.i = insertelement <2 x float> %vecinit.i, float %3, i64 1 ret <2 x float> %vecinit2.i } define <2 x float> @_Z11native_exp2Dv2_f(<2 x float> %x) { entry: %0 = extractelement <2 x float> %x, i64 0 %1 = tail call float @llvm.amdgcn.exp2.f32(float %0) %vecinit = insertelement <2 x float> poison, float %1, i64 0 %2 = extractelement <2 x float> %x, i64 1 %3 = tail call float @llvm.amdgcn.exp2.f32(float %2) %vecinit2 = insertelement <2 x float> %vecinit, float %3, i64 1 ret <2 x float> %vecinit2 } --- libclc/amdgpu/lib/SOURCES | 1 + libclc/amdgpu/lib/math/native_exp2.cl | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 libclc/amdgpu/lib/math/native_exp2.cl diff --git a/libclc/amdgpu/lib/SOURCES b/libclc/amdgpu/lib/SOURCES index d7782a2ae14dc..ed5e45a37c18d 100644 --- a/libclc/amdgpu/lib/SOURCES +++ b/libclc/amdgpu/lib/SOURCES @@ -1,4 +1,5 @@ math/native_exp.cl +math/native_exp2.cl math/native_log.cl math/native_log10.cl math/half_exp.cl diff --git a/libclc/amdgpu/lib/math/native_exp2.cl b/libclc/amdgpu/lib/math/native_exp2.cl new file mode 100644 index 0000000000000..39ae914b19634 --- /dev/null +++ b/libclc/amdgpu/lib/math/native_exp2.cl @@ -0,0 +1,16 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/clc.h> +#include <clc/clcmacro.h> + +_CLC_OVERLOAD _CLC_DEF float native_exp2(float val) { + return __builtin_amdgcn_exp2f(val); +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, native_exp2, float) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits