https://github.com/wenju-he updated https://github.com/llvm/llvm-project/pull/199497
>From 89c0b81d7f096c36b6b3411610bbf636c677b492 Mon Sep 17 00:00:00 2001 From: Wenju He <[email protected]> Date: Mon, 25 May 2026 10:00:31 +0200 Subject: [PATCH 1/2] [libclc] Optimize and vectorize signbit Replace element-wise scalarizing implementation with bitwise masking. For example, define hidden range(i32 -1, 1) <2 x i32> @_Z7signbitDv2_f(<2 x float> noundef %0) #0 { %2 = bitcast <2 x float> %0 to <2 x i32> %3 = extractelement <2 x i32> %2, i64 0 %4 = lshr i32 %3, 31 %5 = insertelement <2 x i32> poison, i32 %4, i64 0 %6 = extractelement <2 x i32> %2, i64 1 %7 = lshr i32 %6, 31 %8 = insertelement <2 x i32> %5, i32 %7, i64 1 %9 = icmp ne <2 x i32> %8, zeroinitializer %10 = sext <2 x i1> %9 to <2 x i32> ret <2 x i32> %10 } is changed to: define hidden noundef range(i32 -1, 1) <2 x i32> @_Z7signbitDv2_f(<2 x float> noundef %0) #0 { %2 = bitcast <2 x float> %0 to <2 x i32> %3 = ashr <2 x i32> %2, splat (i32 31) ret <2 x i32> %3 } --- .../clc/include/clc/relational/clc_signbit.h | 2 + .../clc/lib/generic/relational/clc_signbit.cl | 89 +------------------ .../lib/generic/relational/clc_signbit.inc | 38 ++++++++ 3 files changed, 44 insertions(+), 85 deletions(-) create mode 100644 libclc/clc/lib/generic/relational/clc_signbit.inc diff --git a/libclc/clc/include/clc/relational/clc_signbit.h b/libclc/clc/include/clc/relational/clc_signbit.h index 45677fba6cb89..1656ba1bcae76 100644 --- a/libclc/clc/include/clc/relational/clc_signbit.h +++ b/libclc/clc/include/clc/relational/clc_signbit.h @@ -9,6 +9,8 @@ #ifndef __CLC_RELATIONAL_CLC_SIGNBIT_H__ #define __CLC_RELATIONAL_CLC_SIGNBIT_H__ +#include "clc/internal/clc.h" + #define __CLC_FUNCTION __clc_signbit #define __CLC_BODY "clc/relational/unary_decl.inc" diff --git a/libclc/clc/lib/generic/relational/clc_signbit.cl b/libclc/clc/lib/generic/relational/clc_signbit.cl index 05d2e8a0039ad..20e7a7a14297e 100644 --- a/libclc/clc/lib/generic/relational/clc_signbit.cl +++ b/libclc/clc/lib/generic/relational/clc_signbit.cl @@ -6,89 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "clc/internal/clc.h" -#include "clc/relational/relational.h" +#include "clc/math/math.h" +#include "clc/relational/clc_signbit.h" -#define _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE, __CLC_FUNCTION, ARG_TYPE) \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE __CLC_FUNCTION(ARG_TYPE x) { \ - return (RET_TYPE)((RET_TYPE){__CLC_FUNCTION(x.lo), \ - __CLC_FUNCTION(x.hi)} != (RET_TYPE)0); \ - } - -#define _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE, __CLC_FUNCTION, ARG_TYPE) \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE __CLC_FUNCTION(ARG_TYPE x) { \ - return (RET_TYPE)((RET_TYPE){__CLC_FUNCTION(x.s0), __CLC_FUNCTION(x.s1), \ - __CLC_FUNCTION(x.s2)} != (RET_TYPE)0); \ - } - -#define _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE, __CLC_FUNCTION, ARG_TYPE) \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE __CLC_FUNCTION(ARG_TYPE x) { \ - return (RET_TYPE)((RET_TYPE){__CLC_FUNCTION(x.s0), __CLC_FUNCTION(x.s1), \ - __CLC_FUNCTION(x.s2), \ - __CLC_FUNCTION(x.s3)} != (RET_TYPE)0); \ - } - -#define _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE, __CLC_FUNCTION, ARG_TYPE) \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE __CLC_FUNCTION(ARG_TYPE x) { \ - return (RET_TYPE)((RET_TYPE){__CLC_FUNCTION(x.s0), __CLC_FUNCTION(x.s1), \ - __CLC_FUNCTION(x.s2), __CLC_FUNCTION(x.s3), \ - __CLC_FUNCTION(x.s4), __CLC_FUNCTION(x.s5), \ - __CLC_FUNCTION(x.s6), \ - __CLC_FUNCTION(x.s7)} != (RET_TYPE)0); \ - } - -#define _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE, __CLC_FUNCTION, ARG_TYPE) \ - _CLC_DEF _CLC_OVERLOAD RET_TYPE __CLC_FUNCTION(ARG_TYPE x) { \ - return (RET_TYPE)((RET_TYPE){__CLC_FUNCTION(x.s0), __CLC_FUNCTION(x.s1), \ - __CLC_FUNCTION(x.s2), __CLC_FUNCTION(x.s3), \ - __CLC_FUNCTION(x.s4), __CLC_FUNCTION(x.s5), \ - __CLC_FUNCTION(x.s6), __CLC_FUNCTION(x.s7), \ - __CLC_FUNCTION(x.s8), __CLC_FUNCTION(x.s9), \ - __CLC_FUNCTION(x.sa), __CLC_FUNCTION(x.sb), \ - __CLC_FUNCTION(x.sc), __CLC_FUNCTION(x.sd), \ - __CLC_FUNCTION(x.se), \ - __CLC_FUNCTION(x.sf)} != (RET_TYPE)0); \ - } - -#define _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(RET_TYPE, __CLC_FUNCTION, \ - ARG_TYPE) \ - _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE##2, __CLC_FUNCTION, ARG_TYPE##2) \ - _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE##3, __CLC_FUNCTION, ARG_TYPE##3) \ - _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE##4, __CLC_FUNCTION, ARG_TYPE##4) \ - _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE##8, __CLC_FUNCTION, ARG_TYPE##8) \ - _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE##16, __CLC_FUNCTION, ARG_TYPE##16) - -_CLC_DEF _CLC_OVERLOAD int __clc_signbit(float x) { - return __builtin_signbitf(x); -} - -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(int, __clc_signbit, float) - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -// The scalar version of __clc_signbit(double) returns an int, but the vector -// versions return long. - -_CLC_DEF _CLC_OVERLOAD int __clc_signbit(double x) { - return __builtin_signbit(x); -} - -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, __clc_signbit, double) - -#endif -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -// The scalar version of __clc_signbit(half) returns an int, but the vector -// versions return short. - -_CLC_DEF _CLC_OVERLOAD int __clc_signbit(half x) { - return __builtin_signbit(x); -} - -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, __clc_signbit, half) - -#endif +#define __CLC_BODY "clc_signbit.inc" +#include "clc/math/gentype.inc" diff --git a/libclc/clc/lib/generic/relational/clc_signbit.inc b/libclc/clc/lib/generic/relational/clc_signbit.inc new file mode 100644 index 0000000000000..f74bad93f513e --- /dev/null +++ b/libclc/clc/lib/generic/relational/clc_signbit.inc @@ -0,0 +1,38 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __CLC_VECSIZE_OR_1 == 1 +#define __CLC_RETTYPE __CLC_INTN +#else +#define __CLC_RETTYPE __CLC_S_GENTYPE +#endif + +#if __CLC_FPSIZE == 32 +#define __CLC_SIGNBIT_MASK SIGNBIT_SP32 +#elif __CLC_FPSIZE == 64 +#define __CLC_SIGNBIT_MASK SIGNBIT_DP64 +#elif __CLC_FPSIZE == 16 +#define __CLC_SIGNBIT_MASK SIGNBIT_FP16 +#else +#error "Invalid FP size" +#endif + +_CLC_OVERLOAD _CLC_DEF __CLC_RETTYPE __clc_signbit(__CLC_GENTYPE x) { +#if __CLC_VECSIZE_OR_1 == 1 + return (__CLC_INTN)((__CLC_AS_S_GENTYPE(x) & + (__CLC_S_GENTYPE)__CLC_SIGNBIT_MASK) != 0); +#else + return (__CLC_AS_S_GENTYPE(x) & (__CLC_S_GENTYPE)__CLC_SIGNBIT_MASK) != + (__CLC_S_GENTYPE)0 + ? (__CLC_S_GENTYPE)-1 + : (__CLC_S_GENTYPE)0; +#endif +} + +#undef __CLC_RETTYPE +#undef __CLC_SIGNBIT_MASK >From fdf1c1ee0037811bc87d7e9ba66c2b0fc64f2800 Mon Sep 17 00:00:00 2001 From: Wenju He <[email protected]> Date: Wed, 27 May 2026 07:07:06 +0200 Subject: [PATCH 2/2] bitcast and shift --- .../clc/lib/generic/relational/clc_signbit.cl | 1 - .../lib/generic/relational/clc_signbit.inc | 19 ++----------------- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/libclc/clc/lib/generic/relational/clc_signbit.cl b/libclc/clc/lib/generic/relational/clc_signbit.cl index 20e7a7a14297e..68faaa1594cbe 100644 --- a/libclc/clc/lib/generic/relational/clc_signbit.cl +++ b/libclc/clc/lib/generic/relational/clc_signbit.cl @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include "clc/math/math.h" #include "clc/relational/clc_signbit.h" #define __CLC_BODY "clc_signbit.inc" diff --git a/libclc/clc/lib/generic/relational/clc_signbit.inc b/libclc/clc/lib/generic/relational/clc_signbit.inc index f74bad93f513e..9e3c9c16f41c7 100644 --- a/libclc/clc/lib/generic/relational/clc_signbit.inc +++ b/libclc/clc/lib/generic/relational/clc_signbit.inc @@ -12,27 +12,12 @@ #define __CLC_RETTYPE __CLC_S_GENTYPE #endif -#if __CLC_FPSIZE == 32 -#define __CLC_SIGNBIT_MASK SIGNBIT_SP32 -#elif __CLC_FPSIZE == 64 -#define __CLC_SIGNBIT_MASK SIGNBIT_DP64 -#elif __CLC_FPSIZE == 16 -#define __CLC_SIGNBIT_MASK SIGNBIT_FP16 -#else -#error "Invalid FP size" -#endif - _CLC_OVERLOAD _CLC_DEF __CLC_RETTYPE __clc_signbit(__CLC_GENTYPE x) { #if __CLC_VECSIZE_OR_1 == 1 - return (__CLC_INTN)((__CLC_AS_S_GENTYPE(x) & - (__CLC_S_GENTYPE)__CLC_SIGNBIT_MASK) != 0); + return (__CLC_INTN)(__CLC_AS_U_GENTYPE(x) >> (__CLC_FPSIZE - 1)); #else - return (__CLC_AS_S_GENTYPE(x) & (__CLC_S_GENTYPE)__CLC_SIGNBIT_MASK) != - (__CLC_S_GENTYPE)0 - ? (__CLC_S_GENTYPE)-1 - : (__CLC_S_GENTYPE)0; + return __CLC_AS_S_GENTYPE(x) >> (__CLC_FPSIZE - 1); #endif } #undef __CLC_RETTYPE -#undef __CLC_SIGNBIT_MASK _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
