https://github.com/lalaniket8 updated 
https://github.com/llvm/llvm-project/pull/137769

>From 8fd3b0cafa7ddd260d073232f93e262a6d508d52 Mon Sep 17 00:00:00 2001
From: anikelal <anike...@amd.com>
Date: Tue, 29 Apr 2025 13:59:48 +0530
Subject: [PATCH] add alwaysinline attribute to stubs

---
 clang/lib/CodeGen/CodeGenModule.cpp           | 13 ++++++++++
 .../CodeGenOpenCL/amdgpu-enqueue-kernel.cl    |  6 ++---
 .../test/CodeGenOpenCL/cl-uniform-wg-size.cl  |  4 ----
 .../CodeGenOpenCL/cl20-device-side-enqueue.cl | 24 +++++++------------
 clang/test/CodeGenOpenCL/convergent.cl        |  9 +++----
 .../enqueue-kernel-non-entry-block.cl         | 11 ++-------
 6 files changed, 32 insertions(+), 35 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index e917f3c42da06..2daeb6dbc751a 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -6174,6 +6174,19 @@ void 
CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
   CodeGenFunction(*this).GenerateCode(GD, Fn, FI);
 
   setNonAliasAttributes(GD, Fn);
+
+  bool ShouldAddOptNone = !CodeGenOpts.DisableO0ImplyOptNone &&
+                          (CodeGenOpts.OptimizationLevel == 0) &&
+                          !D->hasAttr<MinSizeAttr>();
+
+  if (D->hasAttr<OpenCLKernelAttr>())
+    if (GD.getKernelReferenceKind() == KernelReferenceKind::Stub &&
+        !D->hasAttr<NoInlineAttr>() &&
+        !Fn->hasFnAttribute(llvm::Attribute::NoInline) &&
+        !D->hasAttr<OptimizeNoneAttr>() &&
+        !Fn->hasFnAttribute(llvm::Attribute::OptimizeNone) && 
!ShouldAddOptNone)
+      Fn->addFnAttr(llvm::Attribute::AlwaysInline);
+
   SetLLVMFunctionAttributesForDefinition(D, Fn);
 
   if (const ConstructorAttr *CA = D->getAttr<ConstructorAttr>())
diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl 
b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index 773daf53b2746..a0e11a1b5997e 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -492,7 +492,7 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    ret void
 //
 //
-// GFX900: Function Attrs: convergent norecurse nounwind
+// GFX900: Function Attrs: alwaysinline convergent norecurse nounwind
 // GFX900-LABEL: define dso_local void @__clang_ocl_kern_imp_test(
 // GFX900-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef signext 
[[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) 
#[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual 
[[META11]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] 
!kernel_arg_type_qual [[META13]] {
 // GFX900-NEXT:  [[ENTRY:.*:]]
@@ -640,7 +640,7 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    ret void
 //
 //
-// GFX900: Function Attrs: convergent norecurse nounwind
+// GFX900: Function Attrs: alwaysinline convergent norecurse nounwind
 // GFX900-LABEL: define dso_local void 
@__clang_ocl_kern_imp_test_target_features_kernel(
 // GFX900-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR3]] 
!kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] 
!kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] 
!kernel_arg_type_qual [[META25]] {
 // GFX900-NEXT:  [[ENTRY:.*:]]
@@ -832,7 +832,7 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" }
 // GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind 
"denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" 
"stack-protector-buffer-size"="8" "target-cpu"="gfx900" 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc"
 }
 // GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind 
"amdgpu-flat-work-group-size"="1,256" 
"denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" 
"stack-protector-buffer-size"="8" "target-cpu"="gfx900" 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc"
 "uniform-work-group-size"="false" }
-// GFX900: attributes #[[ATTR3]] = { convergent norecurse nounwind 
"amdgpu-flat-work-group-size"="1,256" 
"denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" 
"stack-protector-buffer-size"="8" "target-cpu"="gfx900" 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc"
 }
+// GFX900: attributes #[[ATTR3]] = { alwaysinline convergent norecurse 
nounwind "amdgpu-flat-work-group-size"="1,256" 
"denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" 
"stack-protector-buffer-size"="8" "target-cpu"="gfx900" 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc"
 }
 // GFX900: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind 
willreturn memory(argmem: readwrite) }
 // GFX900: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind 
willreturn memory(argmem: readwrite) }
 // GFX900: attributes #[[ATTR6]] = { convergent nounwind 
"denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" 
"stack-protector-buffer-size"="8" "target-cpu"="gfx900" 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc"
 }
diff --git a/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl 
b/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl
index 5f32231b18c3d..98587c694619f 100644
--- a/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl
+++ b/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl
@@ -5,7 +5,6 @@
 
 kernel void ker() {};
 // CHECK: define{{.*}}@ker() #[[ATTR0:[0-9]+]]
-// CHECK: call void @__clang_ocl_kern_imp_ker() #[[ATTR2:[0-9]+]]
 
 // CHECK: define{{.*}}@__clang_ocl_kern_imp_ker() #[[ATTR1:[0-9]+]]
 
@@ -18,6 +17,3 @@ void foo() {};
 
 // CHECK: attributes #[[ATTR1]]
 // CHECK-NOT: uniform-work-group-size
-
-// CHECK: attributes #[[ATTR2]]
-// CHECK-NOT: uniform-work-group-size
diff --git a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl 
b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
index 3355fe1c25819..6c85e734c0eb4 100644
--- a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -1,12 +1,12 @@
-// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 
-ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | 
FileCheck %s --check-prefixes=COMMON,B32,SPIR,TRIPLESPIR 
-// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 
-ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | 
FileCheck %s --check-prefixes=COMMON,B64,SPIR,TRIPLESPIR 
-// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 
-ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | 
FileCheck %s --check-prefixes=CHECK-LIFETIMES,TRIPLESPIR 
-// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 
-ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | 
FileCheck %s --check-prefixes=COMMON,B32,SPIR,TRIPLESPIR 
-// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 
-ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | 
FileCheck %s --check-prefixes=COMMON,B64,SPIR,TRIPLESPIR 
-// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 
-ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | 
FileCheck %s --check-prefixes=CHECK-LIFETIMES,TRIPLESPIR 
-// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 
-ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" 
| FileCheck %s --check-prefixes=COMMON,B64,X86,TRIPLEX86 
-// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 
-ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" 
| FileCheck %s --check-prefixes=COMMON,B64,X86,TRIPLEX86 
-// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 
-ffake-address-space-map -O1 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" 
| FileCheck %s --check-prefixes=CHECK-LIFETIMES,TRIPLEX86 
+// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 
-ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | 
FileCheck %s --check-prefixes=COMMON,B32,SPIR 
+// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 
-ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | 
FileCheck %s --check-prefixes=COMMON,B64,SPIR 
+// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 
-ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | 
FileCheck %s --check-prefix=CHECK-LIFETIMES 
+// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 
-ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | 
FileCheck %s --check-prefixes=COMMON,B32,SPIR 
+// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 
-ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | 
FileCheck %s --check-prefixes=COMMON,B64,SPIR 
+// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 
-ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | 
FileCheck %s --check-prefix=CHECK-LIFETIMES 
+// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 
-ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" 
| FileCheck %s --check-prefixes=COMMON,B64,X86 
+// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 
-ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" 
| FileCheck %s --check-prefixes=COMMON,B64,X86 
+// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 
-ffake-address-space-map -O1 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" 
| FileCheck %s --check-prefix=CHECK-LIFETIMES 
 
 #pragma OPENCL EXTENSION cl_khr_subgroups : enable
 
@@ -39,12 +39,6 @@ void callee(int id, __global int *out) {
   out[id] = id;
 }
 
-// TRIPLESPIR: define{{.*}} void @device_side_enqueue(ptr addrspace(1) align 4 
%{{.*}}, ptr addrspace(1) align 4 %b, i32 %i)
-// TRIPLESPIR:    call spir_func void 
@__clang_ocl_kern_imp_device_side_enqueue({{.*}})
-
-// TRIPLEX86: define{{.*}} void @device_side_enqueue(ptr addrspace(1) align 4 
%{{.*}}, ptr addrspace(1) align 4 %b, i32 %i)
-// TRIPLEX86:    call void @__clang_ocl_kern_imp_device_side_enqueue({{.*}})
-
 // COMMON-LABEL: define{{.*}} void 
@__clang_ocl_kern_imp_device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr 
addrspace(1) align 4 %b, i32 %i)
 kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // SPIR: %default_queue = alloca target("spirv.Queue")
diff --git a/clang/test/CodeGenOpenCL/convergent.cl 
b/clang/test/CodeGenOpenCL/convergent.cl
index 123adba7b40d2..53a35a4f73119 100644
--- a/clang/test/CodeGenOpenCL/convergent.cl
+++ b/clang/test/CodeGenOpenCL/convergent.cl
@@ -127,7 +127,7 @@ void test_not_unroll() {
 // CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]]
 
 // CHECK-LABEL: @assume_convergent_asm
-// CHECK: tail call void asm sideeffect "s_barrier", ""() #5
+// CHECK: tail call void asm sideeffect "s_barrier", ""() #6
 kernel void assume_convergent_asm()
 {
   __asm__ volatile("s_barrier");
@@ -138,6 +138,7 @@ kernel void assume_convergent_asm()
 // CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} }
 // CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
 // CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} }
-// CHECK: attributes #5 = { {{[^}]*}}convergent{{[^}]*}} }
-// CHECK: attributes #6 = { {{[^}]*}}nounwind{{[^}]*}} }
-// CHECK: attributes #7 = { {{[^}]*}}convergent noduplicate nounwind{{[^}]*}} }
+// CHECK: attributes #5 = { {{[^}]*}}alwaysinline convergent{{[^}]*}} }
+// CHECK: attributes #6 = { {{[^}]*}}convergent{{[^}]*}} }
+// CHECK: attributes #7 = { {{[^}]*}}nounwind{{[^}]*}} }
+// CHECK: attributes #8 = { {{[^}]*}}convergent noduplicate nounwind{{[^}]*}} }
diff --git a/clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl 
b/clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl
index e741cf63f30b5..8e970f121bca8 100644
--- a/clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl
+++ b/clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl
@@ -9,15 +9,8 @@
 typedef struct {int a;} ndrange_t;
 
 kernel void test(int i) {
-
 // AMDGPU-LABEL: define {{.*}} amdgpu_kernel void @test
-// AMDGPU-LABEL: call void @__clang_ocl_kern_imp_test(i32 noundef %0)
-
 // SPIR-LABEL: define {{.*}} spir_kernel void @test
-// SPIR-LABEL: call spir_func void @__clang_ocl_kern_imp_test(i32 noundef %0)
-
-// AMDGPU-LABEL: define {{.*}} void @__clang_ocl_kern_imp_test
-// SPIR-LABEL: define {{.*}} spir_func void @__clang_ocl_kern_imp_test
 
 // COMMON-LABEL: entry:
 // AMDGPU: %block_sizes = alloca [1 x i64]
@@ -44,5 +37,5 @@ kernel void test(int i) {
 
 // CHECK-DEBUG: ![[TESTFILE:[0-9]+]] = !DIFile(filename: "<stdin>"
 // CHECK-DEBUG: ![[TESTSCOPE:[0-9]+]] = distinct !DISubprogram(name: "test", 
linkageName: "__clang_ocl_kern_imp_test", {{.*}} file: ![[TESTFILE]]
-// CHECK-DEBUG: ![[IFSCOPE:[0-9]+]] = distinct !DILexicalBlock(scope: 
![[TESTSCOPE]], file: ![[TESTFILE]], line: 33)
-// CHECK-DEBUG: ![[TEMPLOCATION]] = !DILocation(line: 34, scope: ![[IFSCOPE]])
+// CHECK-DEBUG: ![[IFSCOPE:[0-9]+]] = distinct !DILexicalBlock(scope: 
![[TESTSCOPE]], file: ![[TESTFILE]], line: 26)
+// CHECK-DEBUG: ![[TEMPLOCATION]] = !DILocation(line: 27, scope: ![[IFSCOPE]])

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to