Author: Wenju He
Date: 2025-09-18T07:47:35+08:00
New Revision: 7f3661128b1e5dda69586afcff99a8f662e4126f

URL: 
https://github.com/llvm/llvm-project/commit/7f3661128b1e5dda69586afcff99a8f662e4126f
DIFF: 
https://github.com/llvm/llvm-project/commit/7f3661128b1e5dda69586afcff99a8f662e4126f.diff

LOG: [libclc] Remove __attribute__((always_inline)) (#158791)

always_inline doesn't guarantee performance improvement.
Target-specific optimizations decide whether inlining is profitable.
Changes to amdgcn--amdhsa.bc:
* _Z9__clc_logDv16_f and _Z15__clc_remainderDv16_fS_ are not inlined.
* sincos vector function code size has doubled due to apparent
duplication.


Also replace typo _CLC_DECL with _CLC_DEF for function definition.

Added: 
    

Modified: 
    libclc/clc/include/clc/clcfunc.h
    libclc/clc/include/clc/misc/shuffle2_def.inc
    libclc/clc/include/clc/misc/shuffle_def.inc
    libclc/clc/lib/generic/atomic/clc_atomic_compare_exchange.inc
    libclc/clc/lib/generic/atomic/clc_atomic_def.inc
    libclc/clc/lib/generic/math/clc_sincos_helpers.inc

Removed: 
    


################################################################################
diff  --git a/libclc/clc/include/clc/clcfunc.h 
b/libclc/clc/include/clc/clcfunc.h
index 30feaf99a4d31..5457a1892ac87 100644
--- a/libclc/clc/include/clc/clcfunc.h
+++ b/libclc/clc/include/clc/clcfunc.h
@@ -11,17 +11,13 @@
 
 #define _CLC_OVERLOAD __attribute__((overloadable))
 #define _CLC_DECL
-#define _CLC_INLINE __attribute__((always_inline)) inline
+#define _CLC_INLINE inline
 #define _CLC_CONST __attribute__((const))
 
-// avoid inlines for SPIR-V related targets since we'll optimise later in the
-// chain
-#if defined(CLC_SPIRV)
-#define _CLC_DEF
-#elif defined(CLC_CLSPV)
+#if defined(CLC_CLSPV)
 #define _CLC_DEF __attribute__((noinline)) 
__attribute__((clspv_libclc_builtin))
 #else
-#define _CLC_DEF __attribute__((always_inline))
+#define _CLC_DEF
 #endif
 
 #if __OPENCL_C_VERSION__ == CL_VERSION_2_0 ||                                  
\

diff  --git a/libclc/clc/include/clc/misc/shuffle2_def.inc 
b/libclc/clc/include/clc/misc/shuffle2_def.inc
index f25d281af1169..c319f57f880fe 100644
--- a/libclc/clc/include/clc/misc/shuffle2_def.inc
+++ b/libclc/clc/include/clc/misc/shuffle2_def.inc
@@ -18,22 +18,22 @@
 // The return type is same base type as the input type, with the same vector
 // size as the mask. Elements in the mask must be the same size (number of 
bits)
 // as the input value., e.g. char8 ret = shuffle(char2 x, uchar8 mask);
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
 __CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) x,
                __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) y, __CLC_U_GENTYPE mask) 
{
   return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, y, mask);
 }
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
 __CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) x,
                __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) y, __CLC_U_GENTYPE mask) 
{
   return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, y, mask);
 }
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
 __CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) x,
                __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) y, __CLC_U_GENTYPE mask) 
{
   return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, y, mask);
 }
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(
     __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) x,
     __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) y, __CLC_U_GENTYPE mask) {
   return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, y, mask);

diff  --git a/libclc/clc/include/clc/misc/shuffle_def.inc 
b/libclc/clc/include/clc/misc/shuffle_def.inc
index 49a47daf821fe..0f0340979c557 100644
--- a/libclc/clc/include/clc/misc/shuffle_def.inc
+++ b/libclc/clc/include/clc/misc/shuffle_def.inc
@@ -18,19 +18,19 @@
 // The return type is same base type as the input type, with the same vector
 // size as the mask. Elements in the mask must be the same size (number of 
bits)
 // as the input value., e.g. char8 ret = shuffle(char2 x, uchar8 mask);
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
 __CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) x, __CLC_U_GENTYPE mask) 
{
   return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, mask);
 }
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
 __CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) x, __CLC_U_GENTYPE mask) 
{
   return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, mask);
 }
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
 __CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) x, __CLC_U_GENTYPE mask) 
{
   return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, mask);
 }
-_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(
     __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) x, __CLC_U_GENTYPE mask) {
   return __CLC_IMPL_FUNCTION(__CLC_FUNCTION)(x, mask);
 }

diff  --git a/libclc/clc/lib/generic/atomic/clc_atomic_compare_exchange.inc 
b/libclc/clc/lib/generic/atomic/clc_atomic_compare_exchange.inc
index 32ff9b45b769e..74284fd61024c 100644
--- a/libclc/clc/lib/generic/atomic/clc_atomic_compare_exchange.inc
+++ b/libclc/clc/lib/generic/atomic/clc_atomic_compare_exchange.inc
@@ -24,7 +24,7 @@
 #ifdef __CLC_FPSIZE
 
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         
\
-  _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_atomic_compare_exchange(         
\
+  _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atomic_compare_exchange(          
\
       volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Comparator,         
\
       __CLC_GENTYPE Value, int MemoryOrderEqual, int MemoryOrderUnequal,       
\
       int MemoryScope) {                                                       
\
@@ -38,7 +38,7 @@
 #else
 
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         
\
-  _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __clc_atomic_compare_exchange(         
\
+  _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atomic_compare_exchange(          
\
       volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Comparator,         
\
       __CLC_GENTYPE Value, int MemoryOrderEqual, int MemoryOrderUnequal,       
\
       int MemoryScope) {                                                       
\

diff  --git a/libclc/clc/lib/generic/atomic/clc_atomic_def.inc 
b/libclc/clc/lib/generic/atomic/clc_atomic_def.inc
index c1a0731eb8439..14a09b1f09f5c 100644
--- a/libclc/clc/lib/generic/atomic/clc_atomic_def.inc
+++ b/libclc/clc/lib/generic/atomic/clc_atomic_def.inc
@@ -31,7 +31,7 @@
 
 #ifdef __CLC_NO_VALUE_ARG
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         
\
-  _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(                        
\
+  _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(                         
\
       volatile ADDRSPACE __CLC_GENTYPE *Ptr, int MemoryOrder,                  
\
       int MemoryScope) {                                                       
\
     return __CLC_AS_RETTYPE(__CLC_IMPL_FUNCTION(                               
\
@@ -39,7 +39,7 @@
   }
 #elif defined(__CLC_INC_DEC)
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         
\
-  _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(                        
\
+  _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(                         
\
       volatile ADDRSPACE __CLC_GENTYPE *Ptr, int MemoryOrder,                  
\
       int MemoryScope) {                                                       
\
     return __CLC_AS_RETTYPE(                                                   
\
@@ -48,7 +48,7 @@
   }
 #elif defined(__CLC_RETURN_VOID)
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         
\
-  _CLC_OVERLOAD _CLC_DECL void __CLC_FUNCTION(                                 
\
+  _CLC_OVERLOAD _CLC_DEF void __CLC_FUNCTION(                                  
\
       volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Value,              
\
       int MemoryOrder, int MemoryScope) {                                      
\
     __CLC_IMPL_FUNCTION((ADDRSPACE __CLC_PTR_CASTTYPE *)Ptr, Value,            
\
@@ -56,7 +56,7 @@
   }
 #else
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         
\
-  _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(                        
\
+  _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(                         
\
       volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Value,              
\
       int MemoryOrder, int MemoryScope) {                                      
\
     return __CLC_AS_RETTYPE(                                                   
\

diff  --git a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc 
b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
index bddc0998cf950..9a46170a3db38 100644
--- a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
+++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
@@ -74,8 +74,8 @@ _CLC_DEF _CLC_OVERLOAD __CLC_FLOATN 
__clc_cosf_piby4(__CLC_FLOATN x,
   return ret;
 }
 
-_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_tanf_piby4(__CLC_FLOATN x,
-                                                      __CLC_INTN regn) {
+_CLC_DEF _CLC_OVERLOAD __CLC_FLOATN __clc_tanf_piby4(__CLC_FLOATN x,
+                                                     __CLC_INTN regn) {
   // Core Remez [1,2] approximation to tan(x) on the interval [0,pi/4].
   __CLC_FLOATN r = x * x;
 


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to