llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Hongyu Chen (XChy)

<details>
<summary>Changes</summary>

Fixes #<!-- -->154772
We previously set `ptx_kernel` for all kernels. But it's incorrect to add 
`ptx_kernel` to the stub version of kernel introduced in #<!-- -->115821. This 
patch copies the workaround of AMDGPU.

---
Full diff: https://github.com/llvm/llvm-project/pull/170170.diff


2 Files Affected:

- (modified) clang/lib/CodeGen/Targets/NVPTX.cpp (+3-1) 
- (modified) clang/test/CodeGenOpenCL/ptx-calls.cl (+15-4) 


``````````diff
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp 
b/clang/lib/CodeGen/Targets/NVPTX.cpp
index f6715861d91bc..5afef658c840b 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -277,7 +277,9 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
     }
   }
   // Attach kernel metadata directly if compiling for NVPTX.
-  if (FD->hasAttr<DeviceKernelAttr>())
+  // NOTE: Don't set kernel calling convention for handled OpenCL kernel,
+  // otherwise the stub version of kernel would be incorrect.
+  if (FD->hasAttr<DeviceKernelAttr>() && !M.getLangOpts().OpenCL)
     F->setCallingConv(getDeviceKernelCallingConv());
 }
 
diff --git a/clang/test/CodeGenOpenCL/ptx-calls.cl 
b/clang/test/CodeGenOpenCL/ptx-calls.cl
index ae187173b1730..d5e27fce426a7 100644
--- a/clang/test/CodeGenOpenCL/ptx-calls.cl
+++ b/clang/test/CodeGenOpenCL/ptx-calls.cl
@@ -1,11 +1,22 @@
-// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -emit-llvm -O0 -o - | 
FileCheck %s
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
+// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -emit-llvm -O1 -o - | 
FileCheck %s
 
+// CHECK-LABEL: define dso_local void @device_function(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret void
+//
 void device_function() {
 }
-// CHECK-LABEL: define{{.*}} void @device_function()
 
+// CHECK-LABEL: define dso_local ptx_kernel void @kernel_function(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR1:[0-9]+]] !kernel_arg_addr_space 
[[META7:![0-9]+]] !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META7]] 
!kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META7]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret void
+//
 __kernel void kernel_function() {
   device_function();
 }
-// CHECK-LABEL: define{{.*}} ptx_kernel void @kernel_function()
-// CHECK: call void @device_function()
+//.
+// CHECK: [[META7]] = !{}
+//.

``````````

</details>


https://github.com/llvm/llvm-project/pull/170170
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to