https://github.com/XChy created https://github.com/llvm/llvm-project/pull/170170
Fixes #154772 We previously set `ptx_kernel` for all kernels. But it's incorrect to add `ptx_kernel` to the stub version of kernel introduced in #115821. This patch copies the workaround of AMDGPU. >From def58994c7e783e50260be3eba888f100956797d Mon Sep 17 00:00:00 2001 From: XChy <[email protected]> Date: Tue, 2 Dec 2025 00:42:10 +0800 Subject: [PATCH 1/2] precommit tests --- clang/test/CodeGenOpenCL/ptx-calls.cl | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/clang/test/CodeGenOpenCL/ptx-calls.cl b/clang/test/CodeGenOpenCL/ptx-calls.cl index ae187173b1730..0aa7024aa44bf 100644 --- a/clang/test/CodeGenOpenCL/ptx-calls.cl +++ b/clang/test/CodeGenOpenCL/ptx-calls.cl @@ -1,11 +1,22 @@ -// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -emit-llvm -O0 -o - | FileCheck %s +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -emit-llvm -O1 -o - | FileCheck %s +// CHECK-LABEL: define dso_local void @device_function( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret void +// void device_function() { } -// CHECK-LABEL: define{{.*}} void @device_function() +// CHECK-LABEL: define dso_local ptx_kernel void @kernel_function( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META7]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: unreachable +// __kernel void kernel_function() { device_function(); } -// CHECK-LABEL: define{{.*}} ptx_kernel void @kernel_function() -// CHECK: call void @device_function() +//. +// CHECK: [[META7]] = !{} +//. >From 284f9f7dd2c2275566d7de4e9c51d67cb9a66911 Mon Sep 17 00:00:00 2001 From: XChy <[email protected]> Date: Tue, 2 Dec 2025 00:43:44 +0800 Subject: [PATCH 2/2] [OpenCL][NVPTX] Don't set calling convention for OpenCL kernel --- clang/lib/CodeGen/Targets/NVPTX.cpp | 4 +++- clang/test/CodeGenOpenCL/ptx-calls.cl | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index f6715861d91bc..5afef658c840b 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -277,7 +277,9 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( } } // Attach kernel metadata directly if compiling for NVPTX. - if (FD->hasAttr<DeviceKernelAttr>()) + // NOTE: Don't set kernel calling convention for handled OpenCL kernel, + // otherwise the stub version of kernel would be incorrect. + if (FD->hasAttr<DeviceKernelAttr>() && !M.getLangOpts().OpenCL) F->setCallingConv(getDeviceKernelCallingConv()); } diff --git a/clang/test/CodeGenOpenCL/ptx-calls.cl b/clang/test/CodeGenOpenCL/ptx-calls.cl index 0aa7024aa44bf..d5e27fce426a7 100644 --- a/clang/test/CodeGenOpenCL/ptx-calls.cl +++ b/clang/test/CodeGenOpenCL/ptx-calls.cl @@ -12,7 +12,7 @@ void device_function() { // CHECK-LABEL: define dso_local ptx_kernel void @kernel_function( // CHECK-SAME: ) local_unnamed_addr #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META7]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: unreachable +// CHECK-NEXT: ret void // __kernel void kernel_function() { device_function(); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
