[clang] [HIP] fix host-used external kernel (PR #83870)

2024-03-08 Thread Yaxun Liu via cfe-commits

https://github.com/yxsamliu closed 
https://github.com/llvm/llvm-project/pull/83870
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [HIP] fix host-used external kernel (PR #83870)

2024-03-05 Thread Yaxun Liu via cfe-commits


@@ -24,6 +24,7 @@
 
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel2v
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel3v
+// XEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel5v

yxsamliu wrote:

fixed

https://github.com/llvm/llvm-project/pull/83870
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [HIP] fix host-used external kernel (PR #83870)

2024-03-05 Thread Yaxun Liu via cfe-commits

https://github.com/yxsamliu updated 
https://github.com/llvm/llvm-project/pull/83870

>From 902f09d9124b387ad02bd758e9c54bf44746b0fd Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" 
Date: Mon, 4 Mar 2024 11:38:06 -0500
Subject: [PATCH] [HIP] fix host-used external kernel

In -fgpu-rdc mode, when an external kernel is used by a host function
with weak_odr linkage (e.g. explicitly instantiated template function),
the kernel should not be marked as host-used external kernel, since
the host function may be dropped by the linker. Mark the external
kernel as host-used external kernel will force a reference to
the external kernel, which the user may not define in other TU.

Fixes: https://github.com/llvm/llvm-project/issues/83771
---
 clang/lib/Sema/SemaCUDA.cpp|  5 -
 clang/lib/Sema/SemaExpr.cpp|  5 -
 clang/test/CodeGenCUDA/host-used-extern.cu | 13 +
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 6a66ecf6f94c17..4d4f4b6a2d4d95 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -895,7 +895,10 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl 
*Callee) {
   if (DiagKind == SemaDiagnosticBuilder::K_Nop) {
 // For -fgpu-rdc, keep track of external kernels used by host functions.
 if (LangOpts.CUDAIsDevice && LangOpts.GPURelocatableDeviceCode &&
-Callee->hasAttr() && !Callee->isDefined())
+Callee->hasAttr() && !Callee->isDefined() &&
+(!Caller || (!Caller->getDescribedFunctionTemplate() &&
+ getASTContext().GetGVALinkageForFunction(Caller) ==
+ GVA_StrongExternal)))
   getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee);
 return true;
   }
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 47bb263f56aade..93f82e68ab6440 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19218,7 +19218,10 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, 
Sema ,
   // externalize the static device side variable ODR-used by host code.
   if (!Var->hasExternalStorage())
 SemaRef.getASTContext().CUDADeviceVarODRUsedByHost.insert(Var);
-  else if (SemaRef.LangOpts.GPURelocatableDeviceCode)
+  else if (SemaRef.LangOpts.GPURelocatableDeviceCode &&
+   (!FD || (!FD->getDescribedFunctionTemplate() &&
+SemaRef.getASTContext().GetGVALinkageForFunction(FD) ==
+GVA_StrongExternal)))
 
SemaRef.getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Var);
 }
   }
diff --git a/clang/test/CodeGenCUDA/host-used-extern.cu 
b/clang/test/CodeGenCUDA/host-used-extern.cu
index e8f8e12aad47d1..1ae644ae981aaf 100644
--- a/clang/test/CodeGenCUDA/host-used-extern.cu
+++ b/clang/test/CodeGenCUDA/host-used-extern.cu
@@ -24,6 +24,7 @@
 
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel2v
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel3v
+// NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel5v
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @var2
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @var3
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @ext_shvar
@@ -44,6 +45,10 @@ __global__ void kernel3();
 // kernel4 is marked as used even though it is not called.
 __global__ void kernel4();
 
+// kernel5 is not marked as used since it is called by host function
+// with weak_odr linkage, which may be dropped by linker.
+__global__ void kernel5();
+
 extern __device__ int var1;
 
 __device__ int var2;
@@ -67,3 +72,11 @@ __global__ void test_lambda_using_extern_shared() {
   };
   lambda();
 }
+
+template
+void template_caller() {
+  kernel5<<<1, 1>>>();
+  var1 = 1;
+}
+
+template void template_caller();

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [HIP] fix host-used external kernel (PR #83870)

2024-03-05 Thread Yaxun Liu via cfe-commits

https://github.com/yxsamliu updated 
https://github.com/llvm/llvm-project/pull/83870

>From 9c6991bbcdce6f24c8f99c8f2a6ff0e5b6c2ac5a Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" 
Date: Mon, 4 Mar 2024 11:38:06 -0500
Subject: [PATCH] [HIP] fix host-used external kernel

In -fgpu-rdc mode, when an external kernel is used by a host function
with weak_odr linkage (e.g. explicitly instantiated template function),
the kernel should not be marked as host-used external kernel, since
the host function may be dropped by the linker. Mark the external
kernel as host-used external kernel will force a reference to
the external kernel, which the user may not define in other TU.

Fixes: https://github.com/llvm/llvm-project/issues/83771
---
 clang/lib/Sema/SemaCUDA.cpp|  5 -
 clang/lib/Sema/SemaExpr.cpp|  5 -
 clang/test/CodeGenCUDA/host-used-extern.cu | 13 +
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 6a66ecf6f94c17..4d4f4b6a2d4d95 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -895,7 +895,10 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl 
*Callee) {
   if (DiagKind == SemaDiagnosticBuilder::K_Nop) {
 // For -fgpu-rdc, keep track of external kernels used by host functions.
 if (LangOpts.CUDAIsDevice && LangOpts.GPURelocatableDeviceCode &&
-Callee->hasAttr() && !Callee->isDefined())
+Callee->hasAttr() && !Callee->isDefined() &&
+(!Caller || (!Caller->getDescribedFunctionTemplate() &&
+ getASTContext().GetGVALinkageForFunction(Caller) ==
+ GVA_StrongExternal)))
   getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee);
 return true;
   }
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 47bb263f56aade..93f82e68ab6440 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19218,7 +19218,10 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, 
Sema ,
   // externalize the static device side variable ODR-used by host code.
   if (!Var->hasExternalStorage())
 SemaRef.getASTContext().CUDADeviceVarODRUsedByHost.insert(Var);
-  else if (SemaRef.LangOpts.GPURelocatableDeviceCode)
+  else if (SemaRef.LangOpts.GPURelocatableDeviceCode &&
+   (!FD || (!FD->getDescribedFunctionTemplate() &&
+SemaRef.getASTContext().GetGVALinkageForFunction(FD) ==
+GVA_StrongExternal)))
 
SemaRef.getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Var);
 }
   }
diff --git a/clang/test/CodeGenCUDA/host-used-extern.cu 
b/clang/test/CodeGenCUDA/host-used-extern.cu
index e8f8e12aad47d1..b82b2d65aa741f 100644
--- a/clang/test/CodeGenCUDA/host-used-extern.cu
+++ b/clang/test/CodeGenCUDA/host-used-extern.cu
@@ -24,6 +24,7 @@
 
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel2v
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel3v
+// NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel5v
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @var2
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @var3
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @ext_shvar
@@ -44,6 +45,10 @@ __global__ void kernel3();
 // kernel4 is marked as used even though it is not called.
 __global__ void kernel4();
 
+// kernel5 is not marked as used since it is called by host function
+// with linkonce_odr linkage, which may be dropped by linker.
+__global__ void kernel5();
+
 extern __device__ int var1;
 
 __device__ int var2;
@@ -67,3 +72,11 @@ __global__ void test_lambda_using_extern_shared() {
   };
   lambda();
 }
+
+template
+void template_caller() {
+  kernel5<<<1, 1>>>();
+  var1 = 1;
+}
+
+template void template_caller();

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [HIP] fix host-used external kernel (PR #83870)

2024-03-04 Thread Yaxun Liu via cfe-commits

https://github.com/yxsamliu updated 
https://github.com/llvm/llvm-project/pull/83870

>From dc94bb78adb323a539d195b791e50cf69c774246 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" 
Date: Mon, 4 Mar 2024 11:38:06 -0500
Subject: [PATCH] [HIP] fix host-used external kernel

In -fgpu-rdc mode, when an external kernel is used by a host function
with weak_odr linkage (e.g. explicitly instantiated template function),
the kernel should not be marked as host-used external kernel, since
the host function may be dropped by the linker. Mark the external
kernel as host-used external kernel will force a reference to
the external kernel, which the user may not define in other TU.

Fixes: https://github.com/llvm/llvm-project/issues/83771
---
 clang/lib/Sema/SemaCUDA.cpp|  4 +++-
 clang/lib/Sema/SemaExpr.cpp|  5 -
 clang/test/CodeGenCUDA/host-used-extern.cu | 13 +
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 6a66ecf6f94c17..0acfb0d564694f 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -895,7 +895,9 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl 
*Callee) {
   if (DiagKind == SemaDiagnosticBuilder::K_Nop) {
 // For -fgpu-rdc, keep track of external kernels used by host functions.
 if (LangOpts.CUDAIsDevice && LangOpts.GPURelocatableDeviceCode &&
-Callee->hasAttr() && !Callee->isDefined())
+Callee->hasAttr() && !Callee->isDefined() &&
+!Caller->getDescribedFunctionTemplate() &&
+getASTContext().GetGVALinkageForFunction(Caller) == GVA_StrongExternal)
   getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee);
 return true;
   }
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 0a449fc1082bd4..299f066a084c05 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19211,7 +19211,10 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, 
Sema ,
 } else if (VarTarget == Sema::CVT_Device &&
!Var->hasAttr() &&
(UserTarget == Sema::CFT_Host ||
-UserTarget == Sema::CFT_HostDevice)) {
+UserTarget == Sema::CFT_HostDevice) &&
+   !FD->getDescribedFunctionTemplate() &&
+   SemaRef.getASTContext().GetGVALinkageForFunction(FD) ==
+   GVA_StrongExternal) {
   // Record a CUDA/HIP device side variable if it is ODR-used
   // by host code. This is done conservatively, when the variable is
   // referenced in any of the following contexts:
diff --git a/clang/test/CodeGenCUDA/host-used-extern.cu 
b/clang/test/CodeGenCUDA/host-used-extern.cu
index e8f8e12aad47d1..b82b2d65aa741f 100644
--- a/clang/test/CodeGenCUDA/host-used-extern.cu
+++ b/clang/test/CodeGenCUDA/host-used-extern.cu
@@ -24,6 +24,7 @@
 
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel2v
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel3v
+// NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel5v
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @var2
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @var3
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @ext_shvar
@@ -44,6 +45,10 @@ __global__ void kernel3();
 // kernel4 is marked as used even though it is not called.
 __global__ void kernel4();
 
+// kernel5 is not marked as used since it is called by host function
+// with linkonce_odr linkage, which may be dropped by linker.
+__global__ void kernel5();
+
 extern __device__ int var1;
 
 __device__ int var2;
@@ -67,3 +72,11 @@ __global__ void test_lambda_using_extern_shared() {
   };
   lambda();
 }
+
+template
+void template_caller() {
+  kernel5<<<1, 1>>>();
+  var1 = 1;
+}
+
+template void template_caller();

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [HIP] fix host-used external kernel (PR #83870)

2024-03-04 Thread Artem Belevich via cfe-commits


@@ -24,6 +24,7 @@
 
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel2v
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel3v
+// XEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel5v

Artem-B wrote:

Did you mean `NEG-NOT` ?

https://github.com/llvm/llvm-project/pull/83870
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [HIP] fix host-used external kernel (PR #83870)

2024-03-04 Thread Artem Belevich via cfe-commits

https://github.com/Artem-B edited 
https://github.com/llvm/llvm-project/pull/83870
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [HIP] fix host-used external kernel (PR #83870)

2024-03-04 Thread Artem Belevich via cfe-commits

https://github.com/Artem-B approved this pull request.

LGTM in principle, but I'd run it by someone with more familiarity with linking 
quirks.

@MaskRay PTAL, when you get a chance.

https://github.com/llvm/llvm-project/pull/83870
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [HIP] fix host-used external kernel (PR #83870)

2024-03-04 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Yaxun (Sam) Liu (yxsamliu)


Changes

In -fgpu-rdc mode, when an external kernel is used by a host function with 
weak_odr linkage (e.g. explicitly instantiated template function), the kernel 
should not be marked as host-used external kernel, since the host function may 
be dropped by the linker. Mark the external kernel as host-used external kernel 
will force a reference to the external kernel, which the user may not define in 
other TU.

Fixes: https://github.com/llvm/llvm-project/issues/83771

---
Full diff: https://github.com/llvm/llvm-project/pull/83870.diff


3 Files Affected:

- (modified) clang/lib/Sema/SemaCUDA.cpp (+2-1) 
- (modified) clang/lib/Sema/SemaExpr.cpp (+3-1) 
- (modified) clang/test/CodeGenCUDA/host-used-extern.cu (+13) 


``diff
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 6a66ecf6f94c17..40a2d47e4da127 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -895,7 +895,8 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl 
*Callee) {
   if (DiagKind == SemaDiagnosticBuilder::K_Nop) {
 // For -fgpu-rdc, keep track of external kernels used by host functions.
 if (LangOpts.CUDAIsDevice && LangOpts.GPURelocatableDeviceCode &&
-Callee->hasAttr() && !Callee->isDefined())
+Callee->hasAttr() && !Callee->isDefined() &&
+getASTContext().GetGVALinkageForFunction(Caller) == GVA_StrongExternal)
   getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee);
 return true;
   }
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 0a449fc1082bd4..dd16fc4eeffb72 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19211,7 +19211,9 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, 
Sema ,
 } else if (VarTarget == Sema::CVT_Device &&
!Var->hasAttr() &&
(UserTarget == Sema::CFT_Host ||
-UserTarget == Sema::CFT_HostDevice)) {
+UserTarget == Sema::CFT_HostDevice) &&
+   SemaRef.getASTContext().GetGVALinkageForFunction(FD) ==
+   GVA_StrongExternal) {
   // Record a CUDA/HIP device side variable if it is ODR-used
   // by host code. This is done conservatively, when the variable is
   // referenced in any of the following contexts:
diff --git a/clang/test/CodeGenCUDA/host-used-extern.cu 
b/clang/test/CodeGenCUDA/host-used-extern.cu
index e8f8e12aad47d1..1db6123916c700 100644
--- a/clang/test/CodeGenCUDA/host-used-extern.cu
+++ b/clang/test/CodeGenCUDA/host-used-extern.cu
@@ -24,6 +24,7 @@
 
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel2v
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel3v
+// XEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel5v
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @var2
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @var3
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @ext_shvar
@@ -44,6 +45,10 @@ __global__ void kernel3();
 // kernel4 is marked as used even though it is not called.
 __global__ void kernel4();
 
+// kernel5 is not marked as used since it is called by host function
+// with linkonce_odr linkage, which may be dropped by linker.
+__global__ void kernel5();
+
 extern __device__ int var1;
 
 __device__ int var2;
@@ -67,3 +72,11 @@ __global__ void test_lambda_using_extern_shared() {
   };
   lambda();
 }
+
+template
+void template_caller() {
+  kernel5<<<1, 1>>>();
+  var1 = 1;
+}
+
+template void template_caller();

``




https://github.com/llvm/llvm-project/pull/83870
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [HIP] fix host-used external kernel (PR #83870)

2024-03-04 Thread Yaxun Liu via cfe-commits

https://github.com/yxsamliu created 
https://github.com/llvm/llvm-project/pull/83870

In -fgpu-rdc mode, when an external kernel is used by a host function with 
weak_odr linkage (e.g. explicitly instantiated template function), the kernel 
should not be marked as host-used external kernel, since the host function may 
be dropped by the linker. Mark the external kernel as host-used external kernel 
will force a reference to the external kernel, which the user may not define in 
other TU.

Fixes: https://github.com/llvm/llvm-project/issues/83771

>From 3097ccf002464823262f73e53087ea9129f8e7ee Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" 
Date: Mon, 4 Mar 2024 11:38:06 -0500
Subject: [PATCH] [HIP] fix host-used external kernel

In -fgpu-rdc mode, when an external kernel is used by a host function
with weak_odr linkage (e.g. explicitly instantiated template function),
the kernel should not be marked as host-used external kernel, since
the host function may be dropped by the linker. Mark the external
kernel as host-used external kernel will force a reference to
the external kernel, which the user may not define in other TU.

Fixes: https://github.com/llvm/llvm-project/issues/83771
---
 clang/lib/Sema/SemaCUDA.cpp|  3 ++-
 clang/lib/Sema/SemaExpr.cpp|  4 +++-
 clang/test/CodeGenCUDA/host-used-extern.cu | 13 +
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 6a66ecf6f94c17..40a2d47e4da127 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -895,7 +895,8 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl 
*Callee) {
   if (DiagKind == SemaDiagnosticBuilder::K_Nop) {
 // For -fgpu-rdc, keep track of external kernels used by host functions.
 if (LangOpts.CUDAIsDevice && LangOpts.GPURelocatableDeviceCode &&
-Callee->hasAttr() && !Callee->isDefined())
+Callee->hasAttr() && !Callee->isDefined() &&
+getASTContext().GetGVALinkageForFunction(Caller) == GVA_StrongExternal)
   getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee);
 return true;
   }
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 0a449fc1082bd4..dd16fc4eeffb72 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19211,7 +19211,9 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, 
Sema ,
 } else if (VarTarget == Sema::CVT_Device &&
!Var->hasAttr() &&
(UserTarget == Sema::CFT_Host ||
-UserTarget == Sema::CFT_HostDevice)) {
+UserTarget == Sema::CFT_HostDevice) &&
+   SemaRef.getASTContext().GetGVALinkageForFunction(FD) ==
+   GVA_StrongExternal) {
   // Record a CUDA/HIP device side variable if it is ODR-used
   // by host code. This is done conservatively, when the variable is
   // referenced in any of the following contexts:
diff --git a/clang/test/CodeGenCUDA/host-used-extern.cu 
b/clang/test/CodeGenCUDA/host-used-extern.cu
index e8f8e12aad47d1..1db6123916c700 100644
--- a/clang/test/CodeGenCUDA/host-used-extern.cu
+++ b/clang/test/CodeGenCUDA/host-used-extern.cu
@@ -24,6 +24,7 @@
 
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel2v
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel3v
+// XEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel5v
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @var2
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @var3
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @ext_shvar
@@ -44,6 +45,10 @@ __global__ void kernel3();
 // kernel4 is marked as used even though it is not called.
 __global__ void kernel4();
 
+// kernel5 is not marked as used since it is called by host function
+// with linkonce_odr linkage, which may be dropped by linker.
+__global__ void kernel5();
+
 extern __device__ int var1;
 
 __device__ int var2;
@@ -67,3 +72,11 @@ __global__ void test_lambda_using_extern_shared() {
   };
   lambda();
 }
+
+template
+void template_caller() {
+  kernel5<<<1, 1>>>();
+  var1 = 1;
+}
+
+template void template_caller();

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits