https://github.com/lalaniket8 updated https://github.com/llvm/llvm-project/pull/137769
>From 8fd3b0cafa7ddd260d073232f93e262a6d508d52 Mon Sep 17 00:00:00 2001 From: anikelal <anike...@amd.com> Date: Tue, 29 Apr 2025 13:59:48 +0530 Subject: [PATCH] add alwaysinline attribute to stubs --- clang/lib/CodeGen/CodeGenModule.cpp | 13 ++++++++++ .../CodeGenOpenCL/amdgpu-enqueue-kernel.cl | 6 ++--- .../test/CodeGenOpenCL/cl-uniform-wg-size.cl | 4 ---- .../CodeGenOpenCL/cl20-device-side-enqueue.cl | 24 +++++++------------ clang/test/CodeGenOpenCL/convergent.cl | 9 +++---- .../enqueue-kernel-non-entry-block.cl | 11 ++------- 6 files changed, 32 insertions(+), 35 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index e917f3c42da06..2daeb6dbc751a 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -6174,6 +6174,19 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, CodeGenFunction(*this).GenerateCode(GD, Fn, FI); setNonAliasAttributes(GD, Fn); + + bool ShouldAddOptNone = !CodeGenOpts.DisableO0ImplyOptNone && + (CodeGenOpts.OptimizationLevel == 0) && + !D->hasAttr<MinSizeAttr>(); + + if (D->hasAttr<OpenCLKernelAttr>()) + if (GD.getKernelReferenceKind() == KernelReferenceKind::Stub && + !D->hasAttr<NoInlineAttr>() && + !Fn->hasFnAttribute(llvm::Attribute::NoInline) && + !D->hasAttr<OptimizeNoneAttr>() && + !Fn->hasFnAttribute(llvm::Attribute::OptimizeNone) && !ShouldAddOptNone) + Fn->addFnAttr(llvm::Attribute::AlwaysInline); + SetLLVMFunctionAttributesForDefinition(D, Fn); if (const ConstructorAttr *CA = D->getAttr<ConstructorAttr>()) diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index 773daf53b2746..a0e11a1b5997e 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -492,7 +492,7 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: ret void // // -// GFX900: Function Attrs: convergent norecurse nounwind +// GFX900: Function Attrs: alwaysinline convergent norecurse nounwind // GFX900-LABEL: define dso_local void @__clang_ocl_kern_imp_test( // GFX900-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef signext [[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13]] { // GFX900-NEXT: [[ENTRY:.*:]] @@ -640,7 +640,7 @@ kernel void test_target_features_kernel(global int *i) { // GFX900-NEXT: ret void // // -// GFX900: Function Attrs: convergent norecurse nounwind +// GFX900: Function Attrs: alwaysinline convergent norecurse nounwind // GFX900-LABEL: define dso_local void @__clang_ocl_kern_imp_test_target_features_kernel( // GFX900-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META22]] !kernel_arg_access_qual [[META23]] !kernel_arg_type [[META24]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25]] { // GFX900-NEXT: [[ENTRY:.*:]] @@ -832,7 +832,7 @@ kernel void test_target_features_kernel(global int *i) { // GFX900: attributes #[[ATTR0:[0-9]+]] = { "objc_arc_inert" } // GFX900: attributes #[[ATTR1]] = { convergent norecurse nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" } // GFX900: attributes #[[ATTR2]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" "uniform-work-group-size"="false" } -// GFX900: attributes #[[ATTR3]] = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" } +// GFX900: attributes #[[ATTR3]] = { alwaysinline convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,256" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" } // GFX900: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } // GFX900: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } // GFX900: attributes #[[ATTR6]] = { convergent nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-sram-ecc" } diff --git a/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl b/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl index 5f32231b18c3d..98587c694619f 100644 --- a/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl +++ b/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl @@ -5,7 +5,6 @@ kernel void ker() {}; // CHECK: define{{.*}}@ker() #[[ATTR0:[0-9]+]] -// CHECK: call void @__clang_ocl_kern_imp_ker() #[[ATTR2:[0-9]+]] // CHECK: define{{.*}}@__clang_ocl_kern_imp_ker() #[[ATTR1:[0-9]+]] @@ -18,6 +17,3 @@ void foo() {}; // CHECK: attributes #[[ATTR1]] // CHECK-NOT: uniform-work-group-size - -// CHECK: attributes #[[ATTR2]] -// CHECK-NOT: uniform-work-group-size diff --git a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl index 3355fe1c25819..6c85e734c0eb4 100644 --- a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl +++ b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl @@ -1,12 +1,12 @@ -// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR,TRIPLESPIR -// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR,TRIPLESPIR -// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=CHECK-LIFETIMES,TRIPLESPIR -// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR,TRIPLESPIR -// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR,TRIPLESPIR -// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=CHECK-LIFETIMES,TRIPLESPIR -// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86,TRIPLEX86 -// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86,TRIPLEX86 -// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=CHECK-LIFETIMES,TRIPLEX86 +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=CHECK-LIFETIMES +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=CHECK-LIFETIMES +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86 +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86 +// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefix=CHECK-LIFETIMES #pragma OPENCL EXTENSION cl_khr_subgroups : enable @@ -39,12 +39,6 @@ void callee(int id, __global int *out) { out[id] = id; } -// TRIPLESPIR: define{{.*}} void @device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr addrspace(1) align 4 %b, i32 %i) -// TRIPLESPIR: call spir_func void @__clang_ocl_kern_imp_device_side_enqueue({{.*}}) - -// TRIPLEX86: define{{.*}} void @device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr addrspace(1) align 4 %b, i32 %i) -// TRIPLEX86: call void @__clang_ocl_kern_imp_device_side_enqueue({{.*}}) - // COMMON-LABEL: define{{.*}} void @__clang_ocl_kern_imp_device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr addrspace(1) align 4 %b, i32 %i) kernel void device_side_enqueue(global int *a, global int *b, int i) { // SPIR: %default_queue = alloca target("spirv.Queue") diff --git a/clang/test/CodeGenOpenCL/convergent.cl b/clang/test/CodeGenOpenCL/convergent.cl index 123adba7b40d2..53a35a4f73119 100644 --- a/clang/test/CodeGenOpenCL/convergent.cl +++ b/clang/test/CodeGenOpenCL/convergent.cl @@ -127,7 +127,7 @@ void test_not_unroll() { // CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]] // CHECK-LABEL: @assume_convergent_asm -// CHECK: tail call void asm sideeffect "s_barrier", ""() #5 +// CHECK: tail call void asm sideeffect "s_barrier", ""() #6 kernel void assume_convergent_asm() { __asm__ volatile("s_barrier"); @@ -138,6 +138,7 @@ kernel void assume_convergent_asm() // CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} } // CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} } // CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} } -// CHECK: attributes #5 = { {{[^}]*}}convergent{{[^}]*}} } -// CHECK: attributes #6 = { {{[^}]*}}nounwind{{[^}]*}} } -// CHECK: attributes #7 = { {{[^}]*}}convergent noduplicate nounwind{{[^}]*}} } +// CHECK: attributes #5 = { {{[^}]*}}alwaysinline convergent{{[^}]*}} } +// CHECK: attributes #6 = { {{[^}]*}}convergent{{[^}]*}} } +// CHECK: attributes #7 = { {{[^}]*}}nounwind{{[^}]*}} } +// CHECK: attributes #8 = { {{[^}]*}}convergent noduplicate nounwind{{[^}]*}} } diff --git a/clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl b/clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl index e741cf63f30b5..8e970f121bca8 100644 --- a/clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl +++ b/clang/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl @@ -9,15 +9,8 @@ typedef struct {int a;} ndrange_t; kernel void test(int i) { - // AMDGPU-LABEL: define {{.*}} amdgpu_kernel void @test -// AMDGPU-LABEL: call void @__clang_ocl_kern_imp_test(i32 noundef %0) - // SPIR-LABEL: define {{.*}} spir_kernel void @test -// SPIR-LABEL: call spir_func void @__clang_ocl_kern_imp_test(i32 noundef %0) - -// AMDGPU-LABEL: define {{.*}} void @__clang_ocl_kern_imp_test -// SPIR-LABEL: define {{.*}} spir_func void @__clang_ocl_kern_imp_test // COMMON-LABEL: entry: // AMDGPU: %block_sizes = alloca [1 x i64] @@ -44,5 +37,5 @@ kernel void test(int i) { // CHECK-DEBUG: ![[TESTFILE:[0-9]+]] = !DIFile(filename: "<stdin>" // CHECK-DEBUG: ![[TESTSCOPE:[0-9]+]] = distinct !DISubprogram(name: "test", linkageName: "__clang_ocl_kern_imp_test", {{.*}} file: ![[TESTFILE]] -// CHECK-DEBUG: ![[IFSCOPE:[0-9]+]] = distinct !DILexicalBlock(scope: ![[TESTSCOPE]], file: ![[TESTFILE]], line: 33) -// CHECK-DEBUG: ![[TEMPLOCATION]] = !DILocation(line: 34, scope: ![[IFSCOPE]]) +// CHECK-DEBUG: ![[IFSCOPE:[0-9]+]] = distinct !DILexicalBlock(scope: ![[TESTSCOPE]], file: ![[TESTFILE]], line: 26) +// CHECK-DEBUG: ![[TEMPLOCATION]] = !DILocation(line: 27, scope: ![[IFSCOPE]]) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits