[clang] [llvm] [CIR][CUDA] Shadow variables, internalize device side variables, lower poison attribute (PR #190087)

David Rivera via cfe-commits Tue, 05 May 2026 23:21:22 -0700

================
@@ -346,3 +367,121 @@ mlir::Operation 
*CIRGenNVCUDARuntime::getKernelHandle(cir::FuncOp fn,
 
   return globalOp;
 }
+
+void CIRGenNVCUDARuntime::internalizeDeviceSideVar(
+    const VarDecl *d, cir::GlobalLinkageKind &linkage) {
+  if (cgm.getLangOpts().GPURelocatableDeviceCode)
+    cgm.errorNYI(d->getSourceRange(),
+                 "internalizeDeviceSideVar: GPU Relocatable Device Code 
(RDC)");
+
+  // __shared__ variables are odd. Shadows do get created, but
+  // they are not registered with the CUDA runtime, so they
+  // can't really be used to access their device-side
+  // counterparts. It's not clear yet whether it's nvcc's bug or
+  // a feature, but we've got to do the same for compatibility.
+  if (d->hasAttr<CUDADeviceAttr>() || d->hasAttr<CUDAConstantAttr>() ||
+      d->hasAttr<CUDASharedAttr>()) {
+    linkage = cir::GlobalLinkageKind::InternalLinkage;
+  }
+
+  if (d->getType()->isCUDADeviceBuiltinSurfaceType() ||
+      d->getType()->isCUDADeviceBuiltinTextureType())
+    cgm.errorNYI(d->getSourceRange(),
+                 "internalizeDeviceSideVar: CUDA Surface/Texture support");
+}
+
+std::string CIRGenNVCUDARuntime::getDeviceSideName(const NamedDecl *nd) {
+  GlobalDecl gd;
+  // nd could be either a kernel or a variable.
+  if (auto *fd = dyn_cast<FunctionDecl>(nd))
+    gd = GlobalDecl(fd, KernelReferenceKind::Kernel);
+  else
+    gd = GlobalDecl(nd);
+  std::string deviceSideName;
+  MangleContext *mc;
+  if (cgm.getLangOpts().CUDAIsDevice)
+    mc = &cgm.getCXXABI().getMangleContext();
+  else
+    mc = deviceMC.get();
+  if (mc->shouldMangleDeclName(nd)) {
+    SmallString<256> buffer;
+    llvm::raw_svector_ostream out(buffer);
+    mc->mangleName(gd, out);
+    deviceSideName = std::string(out.str());
+  } else
+    deviceSideName = std::string(nd->getIdentifier()->getName());
+
+  // Make unique name for device side static file-scope variable for HIP.
+  if (cgm.getASTContext().shouldExternalize(nd) &&
+      cgm.getLangOpts().GPURelocatableDeviceCode) {
+    SmallString<256> buffer;
+    llvm::raw_svector_ostream out(buffer);
+    out << deviceSideName;
+    cgm.printPostfixForExternalizedDecl(out, nd);
+    deviceSideName = std::string(out.str());
+  }
+  return deviceSideName;
+}
+
+void CIRGenNVCUDARuntime::handleVarRegistration(const VarDecl *vd,
+                                                cir::GlobalOp var) {
+  if (vd->hasAttr<CUDADeviceAttr>() || vd->hasAttr<CUDAConstantAttr>()) {
+    // Shadow variables and their properties must be registered with CUDA
+    // runtime. Skip Extern global variables, which will be registered in
+    // the TU where they are defined.
+    //
+    // Don't register a C++17 inline variable. The local symbol can be
+    // discarded and referencing a discarded local symbol from outside the
+    // comdat (__cuda_register_globals) is disallowed by the ELF spec.
+    //
+    // HIP managed variables need to be always recorded in device and host
+    // compilations for transformation.
+    //
+    // HIP managed variables and variables in CUDADeviceVarODRUsedByHost are
+    // added to llvm.compiler-used, therefore they are safe to be registered.
+    if ((!vd->hasExternalStorage() && !vd->isInline()) ||
+        cgm.getASTContext().CUDADeviceVarODRUsedByHost.contains(vd) ||
+        vd->hasAttr<HIPManagedAttr>()) {
+      registerDeviceVar(vd, var, !vd->hasDefinition(),
+                        vd->hasAttr<CUDAConstantAttr>());
+    }
+  } else if (vd->getType()->isCUDADeviceBuiltinSurfaceType() ||
+             vd->getType()->isCUDADeviceBuiltinTextureType()) {
+    // Builtin surfaces and textures and their template arguments are
+    // also registered with CUDA runtime.
+    cgm.errorNYI(vd->getSourceRange(),
+                 "handleVarRegistration: Surface and Texture registration");
+  }
+}
+
+void CIRGenNVCUDARuntime::finalizeModule() {
+  if (!cgm.getLangOpts().CUDAIsDevice)
+    return;
+
+  // Mark ODR-used device variables as compiler used to prevent them from being
+  // eliminated by optimization. This is necessary for device variables
+  // ODR-used by host functions. Sema correctly marks them as ODR-used no
+  // matter whether they are ODR-used by device or host functions.
+  //
+  // We do not need to do this if the variable has used attribute since it
+  // has already been added.
+  //
+  // Static device variables have been externalized at this point, therefore
+  // variables with private or internal linkage need not be added.
+  for (auto globalOp : cgm.getModule().getOps<cir::GlobalOp>()) {
+    auto regAttr = globalOp->getAttrOfType<cir::CUDAVarRegistrationInfoAttr>(
+        cir::CUDAVarRegistrationInfoAttr::getMnemonic());
+    if (!regAttr)
+      continue;
+
+    auto kind = regAttr.getKind();
+    if (!globalOp.isDeclaration() &&
+        !cir::isLocalLinkage(globalOp.getLinkage()) &&
+        (kind == cir::CUDADeviceVarKind::Variable ||
+         kind == cir::CUDADeviceVarKind::Surface ||
+         kind == cir::CUDADeviceVarKind::Texture)) {
----------------
RiverDave wrote:


I think we can make this closer to OG.

OG's condition depends on AST-side state (`Info.D->isUsed()` and 
`!Info.D->hasAttr<UsedAttr>()` - see here: 
https://github.com/llvm/llvm-project/blob/3053a3c7b8646c5a9892c339befcb61802488b9e/clang/lib/CodeGen/CGCUDANV.cpp#L1281),
 and that information is not represented in `cu.var_registration`.

 Could we keep an OG-style `DeviceVars` side table in `CIRGenNVCUDARuntime` 
containing the `cir::GlobalOp`, `VarDecl *`, and kind? The attr can still be 
attached for later lowering, but `finalizeModule` should use the side table 
referencing the AST decl so this stays equivalent to OG.


https://github.com/llvm/llvm-project/pull/190087
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [CIR][CUDA] Shadow variables, internalize device side variables, lower poison attribute (PR #190087)

Reply via email to