[libclc] [libclc] Optimize generic CLC fmin/fmax (PR #128506)

Fraser Cormack via cfe-commits Mon, 28 Jul 2025 09:57:58 -0700

https://github.com/frasercrmck updated 
https://github.com/llvm/llvm-project/pull/128506


>From 414c6cf560248cfdaff8ae1564b7a9313990a087 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Mon, 24 Feb 2025 12:25:22 +0000
Subject: [PATCH 1/3] [libclc] Optimize generic CLC fmin/fmax

The CLC fmin/fmax builtins now use clang's
__builtin_elementwise_(min|max) which helps us generate
llvm.(min|max)num intrinsics directly. These intrinsics select the
non-NAN input over the NAN input, which adheres to the OpenCL
specification. Note that the OpenCL specification doesn't require
support for sNAN, so returning qNAN over sNAN is acceptable. Note also
that the intrinsics don't differentiate between -0.0 and +0.0; this does
not appear to be required - going by the OpenCL CTS, at least.

These intrinsics maintain the vector types, as opposed to scalarizing,
which was previously happening. This commit therefore helps to optimize
codegen for those targets.
---
 libclc/clc/lib/generic/math/clc_fmax.cl | 47 ++-----------------------
 libclc/clc/lib/generic/math/clc_fmin.cl | 46 ++----------------------
 2 files changed, 4 insertions(+), 89 deletions(-)

diff --git a/libclc/clc/lib/generic/math/clc_fmax.cl 
b/libclc/clc/lib/generic/math/clc_fmax.cl
index 5ebbf1b28df9c..b334207365b98 100644
--- a/libclc/clc/lib/generic/math/clc_fmax.cl
+++ b/libclc/clc/lib/generic/math/clc_fmax.cl
@@ -6,53 +6,10 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#include <clc/clcmacro.h>
 #include <clc/internal/clc.h>
-#include <clc/relational/clc_isnan.h>
 
-#define __FLOAT_ONLY
-#define __CLC_MIN_VECSIZE 1
 #define FUNCTION __clc_fmax
-#define __IMPL_FUNCTION __builtin_fmaxf
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef __CLC_MIN_VECSIZE
-#undef FUNCTION
-#undef __IMPL_FUNCTION
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-#define __DOUBLE_ONLY
-#define __CLC_MIN_VECSIZE 1
-#define FUNCTION __clc_fmax
-#define __IMPL_FUNCTION __builtin_fmax
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef __CLC_MIN_VECSIZE
-#undef FUNCTION
-#undef __IMPL_FUNCTION
+#define __IMPL_FUNCTION(x) __builtin_elementwise_maximumnum
+#define __CLC_BODY <clc/shared/binary_def.inc>
 
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEF _CLC_OVERLOAD half __clc_fmax(half x, half y) {
-  if (__clc_isnan(x))
-    return y;
-  if (__clc_isnan(y))
-    return x;
-  return (x < y) ? y : x;
-}
-
-#define __HALF_ONLY
-#define __CLC_SUPPORTED_VECSIZE_OR_1 2
-#define FUNCTION __clc_fmax
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
-#undef FUNCTION
-
-#endif
diff --git a/libclc/clc/lib/generic/math/clc_fmin.cl 
b/libclc/clc/lib/generic/math/clc_fmin.cl
index 5bddbb8634126..d21bb8d076790 100644
--- a/libclc/clc/lib/generic/math/clc_fmin.cl
+++ b/libclc/clc/lib/generic/math/clc_fmin.cl
@@ -6,52 +6,10 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#include <clc/clcmacro.h>
 #include <clc/internal/clc.h>
-#include <clc/relational/clc_isnan.h>
 
-#define __FLOAT_ONLY
-#define __CLC_MIN_VECSIZE 1
 #define FUNCTION __clc_fmin
-#define __IMPL_FUNCTION __builtin_fminf
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef __CLC_MIN_VECSIZE
-#undef FUNCTION
-#undef __IMPL_FUNCTION
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-#define __DOUBLE_ONLY
-#define __CLC_MIN_VECSIZE 1
-#define FUNCTION __clc_fmin
-#define __IMPL_FUNCTION __builtin_fmin
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef __CLC_MIN_VECSIZE
-#undef FUNCTION
-#undef __IMPL_FUNCTION
+#define __IMPL_FUNCTION(x) __builtin_elementwise_minimumnum
+#define __CLC_BODY <clc/shared/binary_def.inc>
 
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEF _CLC_OVERLOAD half __clc_fmin(half x, half y) {
-  if (__clc_isnan(x))
-    return y;
-  if (__clc_isnan(y))
-    return x;
-  return (y < x) ? y : x;
-}
-
-#define __HALF_ONLY
-#define __CLC_SUPPORTED_VECSIZE_OR_1 2
-#define FUNCTION __clc_fmin
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>
-
-#endif

>From 001f4276d5f3c00bed275b89a26de10ec8e4559c Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Mon, 28 Jul 2025 17:51:44 +0100
Subject: [PATCH 2/3] spirv fmin/fmax

---
 libclc/clc/lib/spirv/SOURCES          |  2 +
 libclc/clc/lib/spirv/math/clc_fmax.cl | 58 +++++++++++++++++++++++++++
 libclc/clc/lib/spirv/math/clc_fmin.cl | 57 ++++++++++++++++++++++++++
 3 files changed, 117 insertions(+)
 create mode 100644 libclc/clc/lib/spirv/math/clc_fmax.cl
 create mode 100644 libclc/clc/lib/spirv/math/clc_fmin.cl

diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES
index cd6e0b2ea7088..07bc7aaead8e8 100644
--- a/libclc/clc/lib/spirv/SOURCES
+++ b/libclc/clc/lib/spirv/SOURCES
@@ -1 +1,3 @@
+math/clc_fmax.cl
+math/clc_fmin.cl
 math/clc_runtime_has_hw_fma32.cl
diff --git a/libclc/clc/lib/spirv/math/clc_fmax.cl 
b/libclc/clc/lib/spirv/math/clc_fmax.cl
new file mode 100644
index 0000000000000..5ebbf1b28df9c
--- /dev/null
+++ b/libclc/clc/lib/spirv/math/clc_fmax.cl
@@ -0,0 +1,58 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/clcmacro.h>
+#include <clc/internal/clc.h>
+#include <clc/relational/clc_isnan.h>
+
+#define __FLOAT_ONLY
+#define __CLC_MIN_VECSIZE 1
+#define FUNCTION __clc_fmax
+#define __IMPL_FUNCTION __builtin_fmaxf
+#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_MIN_VECSIZE
+#undef FUNCTION
+#undef __IMPL_FUNCTION
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+#define __DOUBLE_ONLY
+#define __CLC_MIN_VECSIZE 1
+#define FUNCTION __clc_fmax
+#define __IMPL_FUNCTION __builtin_fmax
+#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_MIN_VECSIZE
+#undef FUNCTION
+#undef __IMPL_FUNCTION
+
+#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_DEF _CLC_OVERLOAD half __clc_fmax(half x, half y) {
+  if (__clc_isnan(x))
+    return y;
+  if (__clc_isnan(y))
+    return x;
+  return (x < y) ? y : x;
+}
+
+#define __HALF_ONLY
+#define __CLC_SUPPORTED_VECSIZE_OR_1 2
+#define FUNCTION __clc_fmax
+#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
+#include <clc/math/gentype.inc>
+#undef FUNCTION
+
+#endif
diff --git a/libclc/clc/lib/spirv/math/clc_fmin.cl 
b/libclc/clc/lib/spirv/math/clc_fmin.cl
new file mode 100644
index 0000000000000..5bddbb8634126
--- /dev/null
+++ b/libclc/clc/lib/spirv/math/clc_fmin.cl
@@ -0,0 +1,57 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/clcmacro.h>
+#include <clc/internal/clc.h>
+#include <clc/relational/clc_isnan.h>
+
+#define __FLOAT_ONLY
+#define __CLC_MIN_VECSIZE 1
+#define FUNCTION __clc_fmin
+#define __IMPL_FUNCTION __builtin_fminf
+#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_MIN_VECSIZE
+#undef FUNCTION
+#undef __IMPL_FUNCTION
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+#define __DOUBLE_ONLY
+#define __CLC_MIN_VECSIZE 1
+#define FUNCTION __clc_fmin
+#define __IMPL_FUNCTION __builtin_fmin
+#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_MIN_VECSIZE
+#undef FUNCTION
+#undef __IMPL_FUNCTION
+
+#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_DEF _CLC_OVERLOAD half __clc_fmin(half x, half y) {
+  if (__clc_isnan(x))
+    return y;
+  if (__clc_isnan(y))
+    return x;
+  return (y < x) ? y : x;
+}
+
+#define __HALF_ONLY
+#define __CLC_SUPPORTED_VECSIZE_OR_1 2
+#define FUNCTION __clc_fmin
+#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
+#include <clc/math/gentype.inc>
+
+#endif

>From 8cd4a8e47e16401993579993efb238bdfd3b6eec Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Mon, 28 Jul 2025 17:57:03 +0100
Subject: [PATCH 3/3] amdgcn/r600 simplify

---
 libclc/clc/lib/amdgcn/SOURCES          |  2 --
 libclc/clc/lib/amdgcn/math/clc_fmax.cl | 49 -------------------------
 libclc/clc/lib/amdgcn/math/clc_fmin.cl | 50 --------------------------
 libclc/clc/lib/r600/SOURCES            |  2 --
 libclc/clc/lib/r600/math/clc_fmax.cl   | 41 ---------------------
 libclc/clc/lib/r600/math/clc_fmin.cl   | 42 ----------------------
 6 files changed, 186 deletions(-)
 delete mode 100644 libclc/clc/lib/amdgcn/math/clc_fmax.cl
 delete mode 100644 libclc/clc/lib/amdgcn/math/clc_fmin.cl
 delete mode 100644 libclc/clc/lib/r600/math/clc_fmax.cl
 delete mode 100644 libclc/clc/lib/r600/math/clc_fmin.cl

diff --git a/libclc/clc/lib/amdgcn/SOURCES b/libclc/clc/lib/amdgcn/SOURCES
index 7bec1740f7636..d91f08533e149 100644
--- a/libclc/clc/lib/amdgcn/SOURCES
+++ b/libclc/clc/lib/amdgcn/SOURCES
@@ -1,5 +1,3 @@
-math/clc_fmax.cl
-math/clc_fmin.cl
 math/clc_ldexp_override.cl
 workitem/clc_get_global_offset.cl
 workitem/clc_get_global_size.cl
diff --git a/libclc/clc/lib/amdgcn/math/clc_fmax.cl 
b/libclc/clc/lib/amdgcn/math/clc_fmax.cl
deleted file mode 100644
index cea90a7135d5a..0000000000000
--- a/libclc/clc/lib/amdgcn/math/clc_fmax.cl
+++ /dev/null
@@ -1,49 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/clcmacro.h>
-#include <clc/internal/clc.h>
-#include <clc/relational/clc_isnan.h>
-
-_CLC_DEF _CLC_OVERLOAD float __clc_fmax(float x, float y) {
-  // fcanonicalize removes sNaNs and flushes denormals if not enabled. 
Otherwise
-  // fmax instruction flushes the values for comparison, but outputs original
-  // denormal
-  x = __builtin_canonicalizef(x);
-  y = __builtin_canonicalizef(y);
-  return __builtin_fmaxf(x, y);
-}
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_DEF _CLC_OVERLOAD double __clc_fmax(double x, double y) {
-  x = __builtin_canonicalize(x);
-  y = __builtin_canonicalize(y);
-  return __builtin_fmax(x, y);
-}
-
-#endif
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEF _CLC_OVERLOAD half __clc_fmax(half x, half y) {
-  if (__clc_isnan(x))
-    return y;
-  if (__clc_isnan(y))
-    return x;
-  return (y < x) ? x : y;
-}
-
-#endif
-
-#define FUNCTION __clc_fmax
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/amdgcn/math/clc_fmin.cl 
b/libclc/clc/lib/amdgcn/math/clc_fmin.cl
deleted file mode 100644
index 12bb0c64429fd..0000000000000
--- a/libclc/clc/lib/amdgcn/math/clc_fmin.cl
+++ /dev/null
@@ -1,50 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/clcmacro.h>
-#include <clc/internal/clc.h>
-#include <clc/relational/clc_isnan.h>
-
-_CLC_DEF _CLC_OVERLOAD float __clc_fmin(float x, float y) {
-  // fcanonicalize removes sNaNs and flushes denormals if not enabled. 
Otherwise
-  // fmin instruction flushes the values for comparison, but outputs original
-  // denormal
-  x = __builtin_canonicalizef(x);
-  y = __builtin_canonicalizef(y);
-  return __builtin_fminf(x, y);
-}
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_DEF _CLC_OVERLOAD double __clc_fmin(double x, double y) {
-  x = __builtin_canonicalize(x);
-  y = __builtin_canonicalize(y);
-  return __builtin_fmin(x, y);
-}
-
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEF _CLC_OVERLOAD half __clc_fmin(half x, half y) {
-  if (__clc_isnan(x))
-    return y;
-  if (__clc_isnan(y))
-    return x;
-  return (y < x) ? y : x;
-}
-
-#endif
-
-#define FUNCTION __clc_fmin
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/r600/SOURCES b/libclc/clc/lib/r600/SOURCES
index 75d32f4f535bc..8d5caf167aa4e 100644
--- a/libclc/clc/lib/r600/SOURCES
+++ b/libclc/clc/lib/r600/SOURCES
@@ -1,4 +1,2 @@
-math/clc_fmax.cl
-math/clc_fmin.cl
 math/clc_native_rsqrt.cl
 math/clc_rsqrt_override.cl
diff --git a/libclc/clc/lib/r600/math/clc_fmax.cl 
b/libclc/clc/lib/r600/math/clc_fmax.cl
deleted file mode 100644
index 689e51a9829aa..0000000000000
--- a/libclc/clc/lib/r600/math/clc_fmax.cl
+++ /dev/null
@@ -1,41 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/clcmacro.h>
-#include <clc/internal/clc.h>
-#include <clc/math/math.h>
-
-_CLC_DEF _CLC_OVERLOAD float __clc_fmax(float x, float y) {
-  // Flush denormals if not enabled. Otherwise fmax instruction flushes the
-  // values for comparison, but outputs original denormal
-  x = __clc_flush_denormal_if_not_supported(x);
-  y = __clc_flush_denormal_if_not_supported(y);
-  return __builtin_fmaxf(x, y);
-}
-
-#define __FLOAT_ONLY
-#define FUNCTION __clc_fmax
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef FUNCTION
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_DEF _CLC_OVERLOAD double __clc_fmax(double x, double y) {
-  return __builtin_fmax(x, y);
-}
-
-#define __DOUBLE_ONLY
-#define FUNCTION __clc_fmax
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef FUNCTION
-
-#endif
diff --git a/libclc/clc/lib/r600/math/clc_fmin.cl 
b/libclc/clc/lib/r600/math/clc_fmin.cl
deleted file mode 100644
index 22cb7046a4ce3..0000000000000
--- a/libclc/clc/lib/r600/math/clc_fmin.cl
+++ /dev/null
@@ -1,42 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/clcmacro.h>
-#include <clc/internal/clc.h>
-#include <clc/math/math.h>
-
-_CLC_DEF _CLC_OVERLOAD float __clc_fmin(float x, float y) {
-  // fcanonicalize removes sNaNs and flushes denormals if not enabled. 
Otherwise
-  // fmin instruction flushes the values for comparison, but outputs original
-  // denormal
-  x = __clc_flush_denormal_if_not_supported(x);
-  y = __clc_flush_denormal_if_not_supported(y);
-  return __builtin_fminf(x, y);
-}
-
-#define __FLOAT_ONLY
-#define FUNCTION __clc_fmin
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef FUNCTION
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_DEF _CLC_OVERLOAD double __clc_fmin(double x, double y) {
-  return __builtin_fmin(x, y);
-}
-
-#define __DOUBLE_ONLY
-#define FUNCTION __clc_fmin
-#define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
-#include <clc/math/gentype.inc>
-#undef FUNCTION
-
-#endif

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[libclc] [libclc] Optimize generic CLC fmin/fmax (PR #128506)

Reply via email to