https://github.com/wenju-he created https://github.com/llvm/llvm-project/pull/152436
Motivation is to upstream use of __ocml_rsqrt_ in https://github.com/intel/llvm/blob/sycl/libclc/libspirv/lib/amdgcn-amdhsa/math/rsqrt.cl llvm-diff shows vectorized calls of llvm.sqrt.v2f32 and fdiv are scalarized: Old: > %2 = tail call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %0), !fpmath !5 > %3 = fdiv contract <2 x float> splat (float 1.000000e+00), %2, !fpmath !4 !4 = !{float 2.500000e+00} !5 = !{float 3.000000e+00} New: < %2 = extractelement <2 x float> %0, i64 0 < %3 = tail call float @__ocml_rsqrt_f32(float noundef %2) < %4 = insertelement <2 x float> poison, float %3, i64 0 < %5 = extractelement <2 x float> %0, i64 1 < %6 = tail call float @__ocml_rsqrt_f32(float noundef %5) < %7 = insertelement <2 x float> %4, float %6, i64 1 >From 96e4768816ffab2bba1d0f836f9453fec406aa7f Mon Sep 17 00:00:00 2001 From: Wenju He <wenju...@intel.com> Date: Thu, 7 Aug 2025 06:42:16 +0200 Subject: [PATCH] [libclc] Implement __clc_rsqrt with __ocml_rsqrt_* functions Motivation is to upstream use of __ocml_rsqrt_ in https://github.com/intel/llvm/blob/sycl/libclc/libspirv/lib/amdgcn-amdhsa/math/rsqrt.cl llvm-diff shows vectorized calls of llvm.sqrt.v2f32 and fdiv are scalarized: Old: > %2 = tail call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %0), !fpmath !5 > %3 = fdiv contract <2 x float> splat (float 1.000000e+00), %2, !fpmath !4 !4 = !{float 2.500000e+00} !5 = !{float 3.000000e+00} New: < %2 = extractelement <2 x float> %0, i64 0 < %3 = tail call float @__ocml_rsqrt_f32(float noundef %2) < %4 = insertelement <2 x float> poison, float %3, i64 0 < %5 = extractelement <2 x float> %0, i64 1 < %6 = tail call float @__ocml_rsqrt_f32(float noundef %5) < %7 = insertelement <2 x float> %4, float %6, i64 1 --- libclc/clc/lib/amdgcn/SOURCES | 1 + libclc/clc/lib/amdgcn/math/clc_rsqrt.cl | 35 +++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 libclc/clc/lib/amdgcn/math/clc_rsqrt.cl diff --git a/libclc/clc/lib/amdgcn/SOURCES b/libclc/clc/lib/amdgcn/SOURCES index 76c3266e3af7b..357027b7facd9 100644 --- a/libclc/clc/lib/amdgcn/SOURCES +++ b/libclc/clc/lib/amdgcn/SOURCES @@ -1,4 +1,5 @@ math/clc_ldexp_override.cl +math/clc_rsqrt.cl mem_fence/clc_mem_fence.cl synchronization/clc_work_group_barrier.cl workitem/clc_get_global_offset.cl diff --git a/libclc/clc/lib/amdgcn/math/clc_rsqrt.cl b/libclc/clc/lib/amdgcn/math/clc_rsqrt.cl new file mode 100644 index 0000000000000..4a9ae94b744af --- /dev/null +++ b/libclc/clc/lib/amdgcn/math/clc_rsqrt.cl @@ -0,0 +1,35 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/math/clc_rsqrt.h> + +float __ocml_rsqrt_f32(float); + +_CLC_OVERLOAD _CLC_DEF float __clc_rsqrt(float x) { return __ocml_rsqrt_f32(x); } + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +double __ocml_rsqrt_f64(double); + +_CLC_OVERLOAD _CLC_DEF double __clc_rsqrt(double x) { return __ocml_rsqrt_f64(x); } + +#endif + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +half __ocml_rsqrt_f16(half); + +_CLC_OVERLOAD _CLC_DEF half __clc_rsqrt(half x) { + return __ocml_rsqrt_f16(x); +} + +#endif + +#define FUNCTION __clc_rsqrt +#define __CLC_BODY <clc/shared/unary_def_scalarize.inc> +#include <clc/math/gentype.inc> _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits