https://github.com/wenju-he created 
https://github.com/llvm/llvm-project/pull/152436

Motivation is to upstream use of __ocml_rsqrt_ in
https://github.com/intel/llvm/blob/sycl/libclc/libspirv/lib/amdgcn-amdhsa/math/rsqrt.cl

llvm-diff shows vectorized calls of llvm.sqrt.v2f32 and fdiv are scalarized: 
Old:
    >   %2 = tail call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %0), 
!fpmath !5
    >   %3 = fdiv contract <2 x float> splat (float 1.000000e+00), %2, !fpmath 
!4
    !4 = !{float 2.500000e+00}
    !5 = !{float 3.000000e+00}
New:
    <   %2 = extractelement <2 x float> %0, i64 0
    <   %3 = tail call float @__ocml_rsqrt_f32(float noundef %2)
    <   %4 = insertelement <2 x float> poison, float %3, i64 0
    <   %5 = extractelement <2 x float> %0, i64 1
    <   %6 = tail call float @__ocml_rsqrt_f32(float noundef %5)
    <   %7 = insertelement <2 x float> %4, float %6, i64 1

>From 96e4768816ffab2bba1d0f836f9453fec406aa7f Mon Sep 17 00:00:00 2001
From: Wenju He <wenju...@intel.com>
Date: Thu, 7 Aug 2025 06:42:16 +0200
Subject: [PATCH] [libclc] Implement __clc_rsqrt with __ocml_rsqrt_* functions

Motivation is to upstream use of __ocml_rsqrt_ in
https://github.com/intel/llvm/blob/sycl/libclc/libspirv/lib/amdgcn-amdhsa/math/rsqrt.cl

llvm-diff shows vectorized calls of llvm.sqrt.v2f32 and fdiv are scalarized:
Old:
    >   %2 = tail call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %0), 
!fpmath !5
    >   %3 = fdiv contract <2 x float> splat (float 1.000000e+00), %2, !fpmath 
!4
    !4 = !{float 2.500000e+00}
    !5 = !{float 3.000000e+00}
New:
    <   %2 = extractelement <2 x float> %0, i64 0
    <   %3 = tail call float @__ocml_rsqrt_f32(float noundef %2)
    <   %4 = insertelement <2 x float> poison, float %3, i64 0
    <   %5 = extractelement <2 x float> %0, i64 1
    <   %6 = tail call float @__ocml_rsqrt_f32(float noundef %5)
    <   %7 = insertelement <2 x float> %4, float %6, i64 1
---
 libclc/clc/lib/amdgcn/SOURCES           |  1 +
 libclc/clc/lib/amdgcn/math/clc_rsqrt.cl | 35 +++++++++++++++++++++++++
 2 files changed, 36 insertions(+)
 create mode 100644 libclc/clc/lib/amdgcn/math/clc_rsqrt.cl

diff --git a/libclc/clc/lib/amdgcn/SOURCES b/libclc/clc/lib/amdgcn/SOURCES
index 76c3266e3af7b..357027b7facd9 100644
--- a/libclc/clc/lib/amdgcn/SOURCES
+++ b/libclc/clc/lib/amdgcn/SOURCES
@@ -1,4 +1,5 @@
 math/clc_ldexp_override.cl
+math/clc_rsqrt.cl
 mem_fence/clc_mem_fence.cl
 synchronization/clc_work_group_barrier.cl
 workitem/clc_get_global_offset.cl
diff --git a/libclc/clc/lib/amdgcn/math/clc_rsqrt.cl 
b/libclc/clc/lib/amdgcn/math/clc_rsqrt.cl
new file mode 100644
index 0000000000000..4a9ae94b744af
--- /dev/null
+++ b/libclc/clc/lib/amdgcn/math/clc_rsqrt.cl
@@ -0,0 +1,35 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/math/clc_rsqrt.h>
+
+float __ocml_rsqrt_f32(float);
+
+_CLC_OVERLOAD _CLC_DEF float __clc_rsqrt(float x) { return 
__ocml_rsqrt_f32(x); }
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+double __ocml_rsqrt_f64(double);
+
+_CLC_OVERLOAD _CLC_DEF double __clc_rsqrt(double x) { return 
__ocml_rsqrt_f64(x); }
+
+#endif
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+half __ocml_rsqrt_f16(half);
+
+_CLC_OVERLOAD _CLC_DEF half __clc_rsqrt(half x) {
+  return __ocml_rsqrt_f16(x);
+}
+
+#endif
+
+#define FUNCTION __clc_rsqrt
+#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
+#include <clc/math/gentype.inc>

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to