https://github.com/yxsamliu updated https://github.com/llvm/llvm-project/pull/177292
>From c20cbf4152ac2f463cbe27a8869ef11938728fe7 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" <[email protected]> Date: Wed, 21 Jan 2026 11:43:53 -0500 Subject: [PATCH] [CUDA/HIP] Externalize __device__ const variables accessed by host code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In standard C++, const variables at namespace scope have internal linkage. For __device__ const variables, this makes them invisible to runtime symbol lookup APIs (cudaGetSymbolAddress/hipGetSymbolAddress). Reading a __device__ const variable from host code is a valid usage pattern — the host may need to know the value at runtime. This is also needed by libcudacxx's cuda::get_device_address. This patch extends the existing CUDADeviceVarODRUsedByHost tracking to cover __device__ const variables. When host code references such a variable, it gets externalized (same mechanism used for static device vars). Variables only used in device code keep internal linkage and can still be constant-folded. The fix is in SemaExpr: __device__ const variables are classified as CVT_Both (due to an implicit CUDAConstantAttr), so the ODR-use tracking is extended to include CVT_Both variables with an explicit CUDADeviceAttr, distinguishing them from plain const variables. --- clang/lib/Sema/SemaExpr.cpp | 10 ++- .../CodeGenCUDA/device-const-var-linkage.cu | 86 +++++++++++++++++++ 2 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGenCUDA/device-const-var-linkage.cu diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 82da5dc032237..58f2746834303 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -19108,7 +19108,15 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, Sema &SemaRef, ? diag::note_cuda_const_var_unpromoted : diag::note_cuda_host_var); } - } else if (VarTarget == SemaCUDA::CVT_Device && + } else if ((VarTarget == SemaCUDA::CVT_Device || + // Also capture __device__ const variables, which are classified + // as CVT_Both due to an implicit CUDAConstantAttr. We check for + // an explicit CUDADeviceAttr to distinguish them from plain + // const variables (no __device__), which also get CVT_Both but + // only have an implicit CUDADeviceAttr. + (VarTarget == SemaCUDA::CVT_Both && + Var->hasAttr<CUDADeviceAttr>() && + !Var->getAttr<CUDADeviceAttr>()->isImplicit())) && !Var->hasAttr<CUDASharedAttr>() && (UserTarget == CUDAFunctionTarget::Host || UserTarget == CUDAFunctionTarget::HostDevice)) { diff --git a/clang/test/CodeGenCUDA/device-const-var-linkage.cu b/clang/test/CodeGenCUDA/device-const-var-linkage.cu new file mode 100644 index 0000000000000..90f908d627a26 --- /dev/null +++ b/clang/test/CodeGenCUDA/device-const-var-linkage.cu @@ -0,0 +1,86 @@ +// Test that __device__ const variables are externalized only when referenced +// by host code. Variables only used in device code retain internal linkage. + +// Non-RDC mode +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \ +// RUN: -std=c++17 -emit-llvm -o - | FileCheck -check-prefix=DEV %s + +// RDC mode +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \ +// RUN: -std=c++17 -fgpu-rdc -emit-llvm -o - | FileCheck -check-prefix=RDC %s + +// With -fvisibility=hidden +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \ +// RUN: -std=c++17 -fvisibility=hidden -fapply-global-visibility-to-externs \ +// RUN: -emit-llvm -o - | FileCheck -check-prefix=HIDDEN %s + +// Negative test: const device vars NOT referenced by host should not be +// externalized. +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \ +// RUN: -std=c++17 -emit-llvm -o - | FileCheck -check-prefix=NEG %s + +#include "Inputs/cuda.h" + +// Case 1: __device__ const referenced by host — should be externalized so +// the host can access it via hipGetSymbolAddress/hipMemcpyFromSymbol. In C++, +// namespace-scope const has internal linkage, but the ODR-use by host code +// triggers externalization to make the symbol visible to the runtime. +// DEV-DAG: @_ZL18const_host_visible = addrspace(4) constant i32 42 +// HIDDEN-DAG: @_ZL18const_host_visible = addrspace(4) constant i32 42 +// RDC-DAG: @_ZL18const_host_visible.static.{{[0-9a-f_]+}} = addrspace(4) constant i32 42 +__device__ const int const_host_visible = 42; + +// Case 2: __device__ const NOT referenced by host — should retain internal +// linkage and be optimized away. Only host-referenced const device vars need +// externalization; blindly externalizing all would bloat the symbol table. +// NEG-NOT: @{{.*}}const_dev_only +__device__ const int const_dev_only = 100; + +// Case 3: __device__ non-const — always externalized (baseline comparison). +// Non-const device vars already have external linkage by default. +// DEV-DAG: @nonconst_val = addrspace(1) externally_initialized global i32 42 +// HIDDEN-DAG: @nonconst_val = protected addrspace(1) externally_initialized global i32 42 +__device__ int nonconst_val = 42; + +// Case 4: __constant__ const referenced by host — same as Case 1 but in +// constant address space. __constant__ const also gets internal linkage in +// C++ and needs externalization when host code takes its address. +// DEV-DAG: @_ZL17constant_host_ref = addrspace(4) constant i32 200 +// HIDDEN-DAG: @_ZL17constant_host_ref = addrspace(4) constant i32 200 +// RDC-DAG: @_ZL17constant_host_ref.static.{{[0-9a-f_]+}} = addrspace(4) constant i32 200 +__constant__ const int constant_host_ref = 200; + +// Case 5: __constant__ const NOT referenced by host — same as Case 2 but +// for __constant__. Should not be externalized. +// NEG-NOT: @{{.*}}constant_no_host +__constant__ const int constant_no_host = 201; + +// Case 6: Plain const (no __device__) referenced by host — should NOT be +// externalized on the device side. It gets an implicit CUDAConstantAttr +// (making it CVT_Both) but has no explicit CUDADeviceAttr. The ODR-use +// tracking in SemaExpr checks for an explicit CUDADeviceAttr to distinguish +// this from __device__ const vars. +// NEG-NOT: @{{.*}}plain_const +const int plain_const = 300; + +__global__ void kernel(int* out) { + out[0] = const_host_visible; + out[1] = const_dev_only; + out[2] = nonconst_val; + out[3] = constant_host_ref; + out[4] = constant_no_host; + out[5] = plain_const; +} + +__host__ __device__ void use(const int *p); +void host_uses() { + use(&const_host_visible); + use(&nonconst_val); + use(&constant_host_ref); + use(&plain_const); +} + +// Verify compiler.used contains the externalized vars. +// DEV: @llvm.compiler.used = {{.*}}@nonconst_val{{.*}}@_ZL1{{[78]}} +// plain_const should NOT be in compiler.used. +// NEG-NOT: @llvm.compiler.used = {{.*}}plain_const _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
