https://github.com/yxsamliu updated 
https://github.com/llvm/llvm-project/pull/177292

>From c20cbf4152ac2f463cbe27a8869ef11938728fe7 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <[email protected]>
Date: Wed, 21 Jan 2026 11:43:53 -0500
Subject: [PATCH] [CUDA/HIP] Externalize __device__ const variables accessed by
 host code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In standard C++, const variables at namespace scope have internal
linkage. For __device__ const variables, this makes them invisible to
runtime symbol lookup APIs (cudaGetSymbolAddress/hipGetSymbolAddress).

Reading a __device__ const variable from host code is a valid usage
pattern — the host may need to know the value at runtime. This is
also needed by libcudacxx's cuda::get_device_address.

This patch extends the existing CUDADeviceVarODRUsedByHost tracking
to cover __device__ const variables. When host code references such a
variable, it gets externalized (same mechanism used for static device
vars). Variables only used in device code keep internal linkage and
can still be constant-folded.

The fix is in SemaExpr: __device__ const variables are classified as
CVT_Both (due to an implicit CUDAConstantAttr), so the ODR-use
tracking is extended to include CVT_Both variables with an explicit
CUDADeviceAttr, distinguishing them from plain const variables.
---
 clang/lib/Sema/SemaExpr.cpp                   | 10 ++-
 .../CodeGenCUDA/device-const-var-linkage.cu   | 86 +++++++++++++++++++
 2 files changed, 95 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/CodeGenCUDA/device-const-var-linkage.cu

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 82da5dc032237..58f2746834303 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19108,7 +19108,15 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, 
Sema &SemaRef,
                                ? diag::note_cuda_const_var_unpromoted
                                : diag::note_cuda_host_var);
       }
-    } else if (VarTarget == SemaCUDA::CVT_Device &&
+    } else if ((VarTarget == SemaCUDA::CVT_Device ||
+                // Also capture __device__ const variables, which are 
classified
+                // as CVT_Both due to an implicit CUDAConstantAttr. We check 
for
+                // an explicit CUDADeviceAttr to distinguish them from plain
+                // const variables (no __device__), which also get CVT_Both but
+                // only have an implicit CUDADeviceAttr.
+                (VarTarget == SemaCUDA::CVT_Both &&
+                 Var->hasAttr<CUDADeviceAttr>() &&
+                 !Var->getAttr<CUDADeviceAttr>()->isImplicit())) &&
                !Var->hasAttr<CUDASharedAttr>() &&
                (UserTarget == CUDAFunctionTarget::Host ||
                 UserTarget == CUDAFunctionTarget::HostDevice)) {
diff --git a/clang/test/CodeGenCUDA/device-const-var-linkage.cu 
b/clang/test/CodeGenCUDA/device-const-var-linkage.cu
new file mode 100644
index 0000000000000..90f908d627a26
--- /dev/null
+++ b/clang/test/CodeGenCUDA/device-const-var-linkage.cu
@@ -0,0 +1,86 @@
+// Test that __device__ const variables are externalized only when referenced
+// by host code. Variables only used in device code retain internal linkage.
+
+// Non-RDC mode
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN:   -std=c++17 -emit-llvm -o - | FileCheck -check-prefix=DEV %s
+
+// RDC mode
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN:   -std=c++17 -fgpu-rdc -emit-llvm -o - | FileCheck -check-prefix=RDC %s
+
+// With -fvisibility=hidden
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN:   -std=c++17 -fvisibility=hidden -fapply-global-visibility-to-externs \
+// RUN:   -emit-llvm -o - | FileCheck -check-prefix=HIDDEN %s
+
+// Negative test: const device vars NOT referenced by host should not be
+// externalized.
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN:   -std=c++17 -emit-llvm -o - | FileCheck -check-prefix=NEG %s
+
+#include "Inputs/cuda.h"
+
+// Case 1: __device__ const referenced by host — should be externalized so
+// the host can access it via hipGetSymbolAddress/hipMemcpyFromSymbol. In C++,
+// namespace-scope const has internal linkage, but the ODR-use by host code
+// triggers externalization to make the symbol visible to the runtime.
+// DEV-DAG: @_ZL18const_host_visible = addrspace(4) constant i32 42
+// HIDDEN-DAG: @_ZL18const_host_visible = addrspace(4) constant i32 42
+// RDC-DAG: @_ZL18const_host_visible.static.{{[0-9a-f_]+}} = addrspace(4) 
constant i32 42
+__device__ const int const_host_visible = 42;
+
+// Case 2: __device__ const NOT referenced by host — should retain internal
+// linkage and be optimized away. Only host-referenced const device vars need
+// externalization; blindly externalizing all would bloat the symbol table.
+// NEG-NOT: @{{.*}}const_dev_only
+__device__ const int const_dev_only = 100;
+
+// Case 3: __device__ non-const — always externalized (baseline comparison).
+// Non-const device vars already have external linkage by default.
+// DEV-DAG: @nonconst_val = addrspace(1) externally_initialized global i32 42
+// HIDDEN-DAG: @nonconst_val = protected addrspace(1) externally_initialized 
global i32 42
+__device__ int nonconst_val = 42;
+
+// Case 4: __constant__ const referenced by host — same as Case 1 but in
+// constant address space. __constant__ const also gets internal linkage in
+// C++ and needs externalization when host code takes its address.
+// DEV-DAG: @_ZL17constant_host_ref = addrspace(4) constant i32 200
+// HIDDEN-DAG: @_ZL17constant_host_ref = addrspace(4) constant i32 200
+// RDC-DAG: @_ZL17constant_host_ref.static.{{[0-9a-f_]+}} = addrspace(4) 
constant i32 200
+__constant__ const int constant_host_ref = 200;
+
+// Case 5: __constant__ const NOT referenced by host — same as Case 2 but
+// for __constant__. Should not be externalized.
+// NEG-NOT: @{{.*}}constant_no_host
+__constant__ const int constant_no_host = 201;
+
+// Case 6: Plain const (no __device__) referenced by host — should NOT be
+// externalized on the device side. It gets an implicit CUDAConstantAttr
+// (making it CVT_Both) but has no explicit CUDADeviceAttr. The ODR-use
+// tracking in SemaExpr checks for an explicit CUDADeviceAttr to distinguish
+// this from __device__ const vars.
+// NEG-NOT: @{{.*}}plain_const
+const int plain_const = 300;
+
+__global__ void kernel(int* out) {
+  out[0] = const_host_visible;
+  out[1] = const_dev_only;
+  out[2] = nonconst_val;
+  out[3] = constant_host_ref;
+  out[4] = constant_no_host;
+  out[5] = plain_const;
+}
+
+__host__ __device__ void use(const int *p);
+void host_uses() {
+  use(&const_host_visible);
+  use(&nonconst_val);
+  use(&constant_host_ref);
+  use(&plain_const);
+}
+
+// Verify compiler.used contains the externalized vars.
+// DEV: @llvm.compiler.used = {{.*}}@nonconst_val{{.*}}@_ZL1{{[78]}}
+// plain_const should NOT be in compiler.used.
+// NEG-NOT: @llvm.compiler.used = {{.*}}plain_const

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to