https://github.com/ZakyHermawan updated 
https://github.com/llvm/llvm-project/pull/190087

>From a30d3830c28be0ee50f76c836c06393709dfb8c4 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <[email protected]>
Date: Thu, 2 Apr 2026 06:49:09 +0700
Subject: [PATCH 1/5] [CIR][CUDA] Shadow variables, lower poison attribute,
 improve test readability

Poison PoisonAtt already been introduced, but no lowering exist, this commit 
address that,
Improve readability of checks in address-spaces.cu by removing information that 
we did not care,

Note:
- CIR->LLVM initialize global variables with poison,
while OGCG initialize global variables with undef.

Signed-off-by: ZakyHermawan <[email protected]>
---
 .../clang/CIR/Dialect/IR/CIRCUDAAttrs.td      | 18 ++++++
 clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp        | 60 ++++++++++++++++++
 clang/lib/CIR/CodeGen/CIRGenCUDARuntime.cpp   | 19 ++++++
 clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h     | 13 +++-
 clang/lib/CIR/CodeGen/CIRGenModule.cpp        | 49 ++++++++++++++-
 clang/lib/CIR/CodeGen/CIRGenModule.h          |  7 +++
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 24 ++++---
 clang/test/CIR/CodeGenCUDA/address-spaces.cu  | 63 ++++++++++++++-----
 8 files changed, 225 insertions(+), 28 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td 
b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
index 5932db8323196..d68fb61fd115c 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
@@ -37,6 +37,24 @@ def CIR_CUDAKernelNameAttr : CIR_Attr<"CUDAKernelName", 
"cu.kernel_name"> {
   let canHaveIllegalCXXABIType = 0;
 }
 
+def CUDAShadowNameAttr : CIR_Attr<"CUDAShadowName",
+                                  "cu.shadow_name"> {
+  let summary = "Device-side global variable name for this shadow.";
+  let description =
+  [{
+    This attribute is attached to global variable definitions and records the
+    mangled name of the global variable used on the device.
+
+    In CUDA, __device__, __constant__ and __shared__ variables, as well as 
+    surface and texture variables, will generate a shadow symbol on host.
+    We must preserve the correspodence in order to generate registration
+    functions.
+  }];
+
+  let parameters = (ins "std::string":$device_side_name);
+  let assemblyFormat = "`<` $device_side_name `>`";
+}
+
 def CUDAExternallyInitializedAttr : CIR_Attr<"CUDAExternallyInitialized",
                                              "cu.externally_initialized"> {
   let summary = "The marked variable is externally initialized.";
diff --git a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp 
b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp
index 8b8e99023eceb..8fb7191e7a89e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp
@@ -12,6 +12,7 @@
 
//===----------------------------------------------------------------------===//
 
 #include "CIRGenCUDARuntime.h"
+#include "CIRGenCXXABI.h"
 #include "CIRGenFunction.h"
 #include "CIRGenModule.h"
 #include "mlir/IR/Operation.h"
@@ -64,6 +65,11 @@ class CIRGenNVCUDARuntime : public CIRGenCUDARuntime {
 
   void emitDeviceStub(CIRGenFunction &cgf, cir::FuncOp fn,
                       FunctionArgList &args) override;
+
+  void internalizeDeviceSideVar(const VarDecl *d,
+                                cir::GlobalLinkageKind &linkage) override;
+
+  std::string getDeviceSideName(const NamedDecl *nd) override;
 };
 
 } // namespace
@@ -342,3 +348,57 @@ mlir::Operation 
*CIRGenNVCUDARuntime::getKernelHandle(cir::FuncOp fn,
 
   return globalOp;
 }
+
+void CIRGenNVCUDARuntime::internalizeDeviceSideVar(
+    const VarDecl *d, cir::GlobalLinkageKind &linkage) {
+  if (cgm.getLangOpts().GPURelocatableDeviceCode)
+    cgm.errorNYI(
+        "internalizeDeviceSideVar: GPU Relocatable Deviced Code (RDC)");
+
+  // __shared__ variables are odd. Shadows do get created, but
+  // they are not registered with the CUDA runtime, so they
+  // can't really be used to access their device-side
+  // counterparts. It's not clear yet whether it's nvcc's bug or
+  // a feature, but we've got to do the same for compatibility.
+  if (d->hasAttr<CUDADeviceAttr>() || d->hasAttr<CUDAConstantAttr>() ||
+      d->hasAttr<CUDASharedAttr>()) {
+    linkage = cir::GlobalLinkageKind::InternalLinkage;
+  }
+
+  if (d->getType()->isCUDADeviceBuiltinSurfaceType() ||
+      d->getType()->isCUDADeviceBuiltinTextureType())
+    cgm.errorNYI("internalizeDeviceSideVar: CUDA Surface/Texture support");
+}
+
+std::string CIRGenNVCUDARuntime::getDeviceSideName(const NamedDecl *nd) {
+  GlobalDecl gd;
+  // nd could be either a kernel or a variable.
+  if (auto *fd = dyn_cast<FunctionDecl>(nd))
+    gd = GlobalDecl(fd, KernelReferenceKind::Kernel);
+  else
+    gd = GlobalDecl(nd);
+  std::string deviceSideName;
+  MangleContext *mc;
+  if (cgm.getLangOpts().CUDAIsDevice)
+    mc = &cgm.getCXXABI().getMangleContext();
+  else
+    mc = deviceMC.get();
+  if (mc->shouldMangleDeclName(nd)) {
+    SmallString<256> buffer;
+    llvm::raw_svector_ostream out(buffer);
+    mc->mangleName(gd, out);
+    deviceSideName = std::string(out.str());
+  } else
+    deviceSideName = std::string(nd->getIdentifier()->getName());
+
+  // Make unique name for device side static file-scope variable for HIP.
+  if (cgm.getASTContext().shouldExternalize(nd) &&
+      cgm.getLangOpts().GPURelocatableDeviceCode) {
+    SmallString<256> buffer;
+    llvm::raw_svector_ostream out(buffer);
+    out << deviceSideName;
+    cgm.printPostfixForExternalizedDecl(out, nd);
+    deviceSideName = std::string(out.str());
+  }
+  return deviceSideName;
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.cpp 
b/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.cpp
index 25d981ef2f64b..8898071a35c12 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.cpp
@@ -20,6 +20,25 @@
 using namespace clang;
 using namespace CIRGen;
 
+static std::unique_ptr<MangleContext> initDeviceMC(CIRGenModule &cgm) {
+  // If the host and device have different C++ ABIs, mark it as the device
+  // mangle context so that the mangling needs to retrieve the additional
+  // device lambda mangling number instead of the regular host one.
+  if (cgm.getASTContext().getAuxTargetInfo() &&
+      cgm.getASTContext().getTargetInfo().getCXXABI().isMicrosoft() &&
+      cgm.getASTContext().getAuxTargetInfo()->getCXXABI().isItaniumFamily()) {
+    return std::unique_ptr<MangleContext>(
+        cgm.getASTContext().createDeviceMangleContext(
+            *cgm.getASTContext().getAuxTargetInfo()));
+  }
+
+  return 
std::unique_ptr<MangleContext>(cgm.getASTContext().createMangleContext(
+      cgm.getASTContext().getAuxTargetInfo()));
+}
+
+CIRGenCUDARuntime::CIRGenCUDARuntime(CIRGenModule &cgm)
+    : cgm(cgm), deviceMC(initDeviceMC(cgm)) {}
+
 CIRGenCUDARuntime::~CIRGenCUDARuntime() {}
 
 RValue CIRGenCUDARuntime::emitCUDAKernelCallExpr(CIRGenFunction &cgf,
diff --git a/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h 
b/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h
index ba33602511e3b..39b6571849f29 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h
+++ b/clang/lib/CIR/CodeGen/CIRGenCUDARuntime.h
@@ -33,8 +33,11 @@ class CIRGenCUDARuntime {
 protected:
   CIRGenModule &cgm;
 
+  /// Mangle context for device.
+  std::unique_ptr<MangleContext> deviceMC;
+
 public:
-  CIRGenCUDARuntime(CIRGenModule &cgm) : cgm(cgm) {}
+  CIRGenCUDARuntime(CIRGenModule &cgm);
   virtual ~CIRGenCUDARuntime();
 
   virtual void emitDeviceStub(CIRGenFunction &cgf, cir::FuncOp fn,
@@ -47,6 +50,14 @@ class CIRGenCUDARuntime {
   virtual mlir::Operation *getKernelHandle(cir::FuncOp fn, GlobalDecl gd) = 0;
 
   virtual mlir::Operation *getKernelStub(mlir::Operation *handle) = 0;
+
+  /// Adjust linkage of shadow variables in host compilation
+  virtual void internalizeDeviceSideVar(const VarDecl *d,
+                                        cir::GlobalLinkageKind &linkage) = 0;
+
+  /// Returns function or variable name on device side even if the current
+  /// compilation is for host.
+  virtual std::string getDeviceSideName(const NamedDecl *nd) = 0;
 };
 
 CIRGenCUDARuntime *createNVCUDARuntime(CIRGenModule &cgm);
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp 
b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 2bc33c191bb32..88c4ac5d27115 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -434,6 +434,26 @@ bool CIRGenModule::shouldEmitCUDAGlobalVar(const VarDecl 
*global) const {
          global->getType()->isCUDADeviceBuiltinTextureType();
 }
 
+void CIRGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &os,
+                                                   const Decl *d) {
+  // ptxas does not allow '.' in symbol names. On the other hand, HIP prefers
+  // postfix beginning with '.' since the symbol name can be demangled.
+  if (langOpts.HIP)
+    os << (isa<VarDecl>(d) ? ".static." : ".intern.");
+  else
+    os << (isa<VarDecl>(d) ? "__static__" : "__intern__");
+
+  // If the CUID is not specified we try to generate a unique postfix.
+  if (getLangOpts().CUID.empty()) {
+    // TODO: Once we add 'PreprocessorOpts' into CIRGenModule this part can be
+    // brought in from OG.
+    errorNYI(d->getSourceRange(),
+             "printPostfixForExternalizedDecl: CUID is not specified");
+  } else {
+    os << getASTContext().getCUIDHash();
+  }
+}
+
 void CIRGenModule::emitGlobal(clang::GlobalDecl gd) {
   if (const auto *cd = dyn_cast<clang::OpenACCConstructDecl>(gd.getDecl())) {
     emitGlobalOpenACCDecl(cd);
@@ -1233,15 +1253,40 @@ void CIRGenModule::emitGlobalVarDefinition(const 
clang::VarDecl *vd,
                     
cir::CUDAExternallyInitializedAttr::get(&getMLIRContext()));
       }
     } else {
-      // TODO(cir):
       // Adjust linkage of shadow variables in host compilation
-      // getCUDARuntime().internalizeDeviceSideVar(vd, linkage);
+      getCUDARuntime().internalizeDeviceSideVar(vd, linkage);
     }
     // TODO(cir):
     // Handle variable registration
     // getCUDARuntime().handleVarRegistration(vd, gv);
   }
 
+  // Decorate CUDA shadow variables with the cu.shadow_name attribute so we 
know
+  // how to register them when lowering.
+  if (langOpts.CUDA && !langOpts.CUDAIsDevice &&
+      (vd->hasAttr<CUDAConstantAttr>() || vd->hasAttr<CUDADeviceAttr>())) {
+    // Shadow variables and their properties must be registered with CUDA
+    // runtime. Skip Extern global variables, which will be registered in
+    // the TU where they are defined.
+    //
+    // Don't register a C++17 inline variable. The local symbol can be
+    // discarded and referencing a discarded local symbol from outside the
+    // comdat (__cuda_register_globals) is disallowed by the ELF spec.
+    //
+    // HIP managed variables need to be always recorded in device and host
+    // compilations for transformation.
+    //
+    // HIP managed variables and variables in CUDADeviceVarODRUsedByHost are
+    // added to llvm.compiler-used, therefore they are safe to be registered.
+    if ((!vd->hasExternalStorage() && !vd->isInline()) ||
+        getASTContext().CUDADeviceVarODRUsedByHost.contains(vd) ||
+        vd->hasAttr<HIPManagedAttr>()) {
+      auto shadowName = cudaRuntime->getDeviceSideName(cast<NamedDecl>(vd));
+      auto attr = cir::CUDAShadowNameAttr::get(&getMLIRContext(), shadowName);
+      gv->setAttr(cir::CUDAShadowNameAttr::getMnemonic(), attr);
+    }
+  }
+
   // Set initializer and finalize emission
   CIRGenModule::setInitializer(gv, init);
   if (emitter)
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h 
b/clang/lib/CIR/CodeGen/CIRGenModule.h
index 266510de84fd0..388b78f2a75e6 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.h
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -632,6 +632,13 @@ class CIRGenModule : public CIRGenTypeCache {
   // related attributes.
   bool shouldEmitCUDAGlobalVar(const VarDecl *global) const;
 
+  /// Print the postfix for externalized static variable or kernels for single
+  /// source offloading languages CUDA and HIP. The unique postfix is created
+  /// using either the CUID argument, or the file's UniqueID and active macros.
+  /// The fallback method without a CUID requires that the offloading toolchain
+  /// does not define separate macros via the -cc1 options.
+  void printPostfixForExternalizedDecl(llvm::raw_ostream &os, const Decl *d);
+
   /// Replace all uses of the old global with the new global, updating types
   /// and references as needed. Erases the old global when done.
   void replaceGlobal(cir::GlobalOp oldGV, cir::GlobalOp newGV);
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp 
b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index ba89fbe3091bc..6881fef5d9f49 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -25,6 +25,7 @@
 #include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/IR/BuiltinDialect.h"
 #include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/Location.h"
 #include "mlir/IR/Types.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassManager.h"
@@ -390,7 +391,7 @@ class CIRAttrToValue {
         .Case<cir::BoolAttr, cir::IntAttr, cir::FPAttr, cir::ConstComplexAttr,
               cir::ConstArrayAttr, cir::ConstRecordAttr, cir::ConstVectorAttr,
               cir::ConstPtrAttr, cir::GlobalViewAttr, cir::TypeInfoAttr,
-              cir::UndefAttr, cir::VTableAttr, cir::ZeroAttr>(
+              cir::UndefAttr, cir::PoisonAttr, cir::VTableAttr, cir::ZeroAttr>(
             [&](auto attrT) { return visitCirAttr(attrT); })
         .Default([&](auto attrT) { return mlir::Value(); });
   }
@@ -406,6 +407,7 @@ class CIRAttrToValue {
   mlir::Value visitCirAttr(cir::GlobalViewAttr attr);
   mlir::Value visitCirAttr(cir::TypeInfoAttr attr);
   mlir::Value visitCirAttr(cir::UndefAttr attr);
+  mlir::Value visitCirAttr(cir::PoisonAttr attr);
   mlir::Value visitCirAttr(cir::VTableAttr attr);
   mlir::Value visitCirAttr(cir::ZeroAttr attr);
 
@@ -767,6 +769,13 @@ mlir::Value CIRAttrToValue::visitCirAttr(cir::UndefAttr 
undefAttr) {
       rewriter, loc, converter->convertType(undefAttr.getType()));
 }
 
+/// PoisonAttr visitor.
+mlir::Value CIRAttrToValue::visitCirAttr(cir::PoisonAttr poisonAttr) {
+  mlir::Location loc = parentOp->getLoc();
+  return mlir::LLVM::PoisonOp::create(
+      rewriter, loc, converter->convertType(poisonAttr.getType()));
+}
+
 // VTableAttr visitor.
 mlir::Value CIRAttrToValue::visitCirAttr(cir::VTableAttr vtableArr) {
   mlir::Type llvmTy = converter->convertType(vtableArr.getType());
@@ -2603,11 +2612,10 @@ 
CIRToLLVMGlobalOpLowering::matchAndRewriteRegionInitializedGlobal(
     cir::GlobalOp op, mlir::Attribute init,
     mlir::ConversionPatternRewriter &rewriter) const {
   // TODO: Generalize this handling when more types are needed here.
-  assert(
-      (isa<cir::ConstArrayAttr, cir::ConstRecordAttr, cir::ConstVectorAttr,
-           cir::ConstPtrAttr, cir::ConstComplexAttr, cir::GlobalViewAttr,
-           cir::TypeInfoAttr, cir::UndefAttr, cir::VTableAttr, cir::ZeroAttr>(
-          init)));
+  assert((isa<cir::ConstArrayAttr, cir::ConstRecordAttr, cir::ConstVectorAttr,
+              cir::ConstPtrAttr, cir::ConstComplexAttr, cir::GlobalViewAttr,
+              cir::TypeInfoAttr, cir::UndefAttr, cir::PoisonAttr,
+              cir::VTableAttr, cir::ZeroAttr>(init)));
 
   // TODO(cir): once LLVM's dialect has proper equivalent attributes this
   // should be updated. For now, we use a custom op to initialize globals
@@ -2674,8 +2682,8 @@ mlir::LogicalResult 
CIRToLLVMGlobalOpLowering::matchAndRewrite(
     } else if (mlir::isa<cir::ConstArrayAttr, cir::ConstVectorAttr,
                          cir::ConstRecordAttr, cir::ConstPtrAttr,
                          cir::ConstComplexAttr, cir::GlobalViewAttr,
-                         cir::TypeInfoAttr, cir::UndefAttr, cir::VTableAttr,
-                         cir::ZeroAttr>(init.value())) {
+                         cir::TypeInfoAttr, cir::UndefAttr, cir::PoisonAttr,
+                         cir::VTableAttr, cir::ZeroAttr>(init.value())) {
       // TODO(cir): once LLVM's dialect has proper equivalent attributes this
       // should be updated. For now, we use a custom op to initialize globals
       // to the appropriate value.
diff --git a/clang/test/CIR/CodeGenCUDA/address-spaces.cu 
b/clang/test/CIR/CodeGenCUDA/address-spaces.cu
index 1ed52378b99ac..65fa86ac4790c 100644
--- a/clang/test/CIR/CodeGenCUDA/address-spaces.cu
+++ b/clang/test/CIR/CodeGenCUDA/address-spaces.cu
@@ -15,20 +15,49 @@
 // RUN:            -I%S/Inputs/ %s -o %t.ll
 // RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
 
-// CIR-DEVICE: cir.global "private" internal dso_local @_ZZ2fnvE1j = 
#cir.undef : !s32i {alignment = 4 : i64}
-// LLVM-DEVICE: @_ZZ2fnvE1j = internal global i32 undef, align 4
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-cir -target-sdk-version=12.3 \
+// RUN:            %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN:            %s -o %t.cir
+// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
+// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN:            %s -o %t.cir
+// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.cir %s
+
+// CIR-DEVICE: cir.global {{.*}} @_ZZ2fnvE1j = #cir.undef
+// LLVM-DEVICE: @_ZZ2fnvE1j = internal global i32 undef
 
 __device__ int a;
-// CIR-DEVICE: cir.global external lang_address_space(offload_global) 
@[[DEV:.*]] = #cir.int<0> : !s32i {alignment = 4 : i64, 
cu.externally_initialized = #cir.cu.externally_initialized}
-// LLVM-DEVICE: @[[DEV_LD:.*]] = externally_initialized global i32 0, align 4
-// OGCG-DEVICE: @[[DEV_OD:.*]] = addrspace(1) externally_initialized global 
i32 0, align 4
+// CIR-DEVICE: cir.global external lang_address_space(offload_global) @a = 
#cir.int<0> : !s32i {{{.*}}, cu.externally_initialized = 
#cir.cu.externally_initialized}
+// LLVM-DEVICE: @a = externally_initialized global i32 0
+// OGCG-DEVICE: @a = addrspace(1) externally_initialized global i32 0
+// CIR-HOST: cir.global {{.*}} @a = #cir.poison : !s32i {{{.*}}, 
cu.shadow_name = #cir.cu.shadow_name<a>}
+// LLVM-HOST: @a = internal global i32 poison
+// OGCG-HOST: @a = internal global i32 undef
+
+__shared__ int b;
+// CIR-DEVICE: cir.global external  lang_address_space(offload_local) @b = 
#cir.poison {{.*}}
+// LLVM-DEVICE: @b = global i32 poison
+// OGCG-DEVICE: @b = addrspace(3) global i32 undef
+// CIR-HOST: cir.global {{.*}} @b = #cir.poison
+// LLVM-HOST: @b = internal global i32 poison
+// OGCG-HOST: @b = internal global i32 undef
 
 __constant__ int c;
-// CIR-DEVICE: cir.global constant external 
lang_address_space(offload_constant) @[[CONST:.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64, cu.externally_initialized = 
#cir.cu.externally_initialized}
-// LLVM-DEVICE: @[[CONST_LL:.*]] = externally_initialized constant i32 0, 
align 4
-// OGCG-DEVICE: @[[CONST_OD:.*]] = addrspace(4) externally_initialized 
constant i32 0, align 4
+// CIR-DEVICE: cir.global constant external 
lang_address_space(offload_constant) @c = #cir.int<0> : !s32i {{{.*}}, 
cu.externally_initialized = #cir.cu.externally_initialized}
+// LLVM-DEVICE: @c = externally_initialized constant i32 0
+// OGCG-DEVICE: @c = addrspace(4) externally_initialized constant i32 0
+// CIR-HOST: cir.global {{.*}} @c = #cir.poison : !s32i {{{.*}}, 
cu.shadow_name = #cir.cu.shadow_name<c>}
+// LLVM-HOST: @c = internal global i32 poison
+// OGCG-HOST: @c = internal global i32 undef
 
-// OGCG-DEVICE: @_ZZ2fnvE1j = internal addrspace(3) global i32 undef, align 4
+// OGCG-DEVICE: @_ZZ2fnvE1j = internal addrspace(3) global i32 undef
 
 __global__ void fn() {
   int i = 0;
@@ -46,16 +75,16 @@ __global__ void fn() {
 // CIR-DEVICE:   cir.return
 
 // LLVM-DEVICE: define dso_local void @_Z2fnv()
-// LLVM-DEVICE:   %[[ALLOCA:.*]] = alloca i32, i64 1, align 4
-// LLVM-DEVICE:   store i32 0, ptr %[[ALLOCA]], align 4
-// LLVM-DEVICE:   %[[VAL:.*]] = load i32, ptr %[[ALLOCA]], align 4
-// LLVM-DEVICE:   store i32 %[[VAL]], ptr @_ZZ2fnvE1j, align 4
+// LLVM-DEVICE:   %[[ALLOCA:.*]] = alloca i32, i64 1
+// LLVM-DEVICE:   store i32 0, ptr %[[ALLOCA]]
+// LLVM-DEVICE:   %[[VAL:.*]] = load i32, ptr %[[ALLOCA]]
+// LLVM-DEVICE:   store i32 %[[VAL]], ptr @_ZZ2fnvE1j
 // LLVM-DEVICE:   ret void
 
 // OGCG-DEVICE: define dso_local ptx_kernel void @_Z2fnv()
 // OGCG-DEVICE: entry:
-// OGCG-DEVICE:   %[[ALLOCA:.*]] = alloca i32, align 4
-// OGCG-DEVICE:   store i32 0, ptr %[[ALLOCA]], align 4
-// OGCG-DEVICE:   %[[VAL:.*]] = load i32, ptr %[[ALLOCA]], align 4
-// OGCG-DEVICE:   store i32 %[[VAL]], ptr addrspacecast (ptr addrspace(3) 
@_ZZ2fnvE1j to ptr), align 4
+// OGCG-DEVICE:   %[[ALLOCA:.*]] = alloca i32
+// OGCG-DEVICE:   store i32 0, ptr %[[ALLOCA]]
+// OGCG-DEVICE:   %[[VAL:.*]] = load i32, ptr %[[ALLOCA]]
+// OGCG-DEVICE:   store i32 %[[VAL]], ptr addrspacecast (ptr addrspace(3) 
@_ZZ2fnvE1j to ptr)
 // OGCG-DEVICE:   ret void

>From af76d01740c17941d67c67b0f8b8001cf7efb682 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <[email protected]>
Date: Thu, 2 Apr 2026 07:17:17 +0700
Subject: [PATCH 2/5] [CIR][CUDA][NFC] Add source range to NYI diagnostics for
 internalizeDeviceSideVar

Signed-off-by: ZakyHermawan <[email protected]>
---
 clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp 
b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp
index 8fb7191e7a89e..773924e6ca301 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp
@@ -352,8 +352,8 @@ mlir::Operation 
*CIRGenNVCUDARuntime::getKernelHandle(cir::FuncOp fn,
 void CIRGenNVCUDARuntime::internalizeDeviceSideVar(
     const VarDecl *d, cir::GlobalLinkageKind &linkage) {
   if (cgm.getLangOpts().GPURelocatableDeviceCode)
-    cgm.errorNYI(
-        "internalizeDeviceSideVar: GPU Relocatable Deviced Code (RDC)");
+    cgm.errorNYI(d->getSourceRange(),
+                 "internalizeDeviceSideVar: GPU Relocatable Device Code 
(RDC)");
 
   // __shared__ variables are odd. Shadows do get created, but
   // they are not registered with the CUDA runtime, so they
@@ -367,7 +367,8 @@ void CIRGenNVCUDARuntime::internalizeDeviceSideVar(
 
   if (d->getType()->isCUDADeviceBuiltinSurfaceType() ||
       d->getType()->isCUDADeviceBuiltinTextureType())
-    cgm.errorNYI("internalizeDeviceSideVar: CUDA Surface/Texture support");
+    cgm.errorNYI(d->getSourceRange(),
+                 "internalizeDeviceSideVar: CUDA Surface/Texture support");
 }
 
 std::string CIRGenNVCUDARuntime::getDeviceSideName(const NamedDecl *nd) {

>From c590ae596917604ef17d50f50762ed9e36e6029b Mon Sep 17 00:00:00 2001
From: ZakyHermawan <[email protected]>
Date: Fri, 3 Apr 2026 03:07:33 +0700
Subject: [PATCH 3/5] [CIR][NFC] Try resolve conflict

Signed-off-by: ZakyHermawan <[email protected]>
---
 clang/test/CIR/CodeGenCUDA/address-spaces.cu | 79 ++++++++++++--------
 1 file changed, 46 insertions(+), 33 deletions(-)

diff --git a/clang/test/CIR/CodeGenCUDA/address-spaces.cu 
b/clang/test/CIR/CodeGenCUDA/address-spaces.cu
index b2b20c3a7771b..38ebef92f4fc4 100644
--- a/clang/test/CIR/CodeGenCUDA/address-spaces.cu
+++ b/clang/test/CIR/CodeGenCUDA/address-spaces.cu
@@ -10,8 +10,9 @@
 // RUN:   -mmlir -mlir-print-ir-before=cir-target-lowering %s -o %t.cir 2> 
%t-pre.cir
 // RUN: FileCheck --check-prefix=CIR-PRE --input-file=%t-pre.cir %s
 
-// TODO: Add CIR (post target lowering) and LLVM checks once NVPTX 
TargetLoweringInfo
-// is implemented.
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -x cuda \
+// RUN:   -fcuda-is-device -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-POST --input-file=%t.cir %s
 
 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
 // RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
@@ -44,44 +45,56 @@
 
 // Verifies CIR emits correct address spaces for CUDA globals.
 
-// CIR-DEVICE: cir.global {{.*}} @_ZZ2fnvE1j = #cir.undef
+// CIR-DEVICE: cir.global "private" internal dso_local @_ZZ2fnvE1j = #cir.undef
 // LLVM-DEVICE: @_ZZ2fnvE1j = internal global i32 undef
 
-__device__ int a;
-// CIR-PRE: cir.global external lang_address_space(offload_global) @a = 
#cir.int<0> : !s32i {{{.*}}, cu.externally_initialized = 
#cir.cu.externally_initialized}
-// LLVM-DEVICE: @a = externally_initialized global i32 0
-// OGCG-DAG: @a = addrspace(1) externally_initialized global i32 0
-// OGCG-DEVICE: @a = addrspace(1) externally_initialized global i32 0
-// CIR-HOST: cir.global {{.*}} @a = #cir.poison : !s32i {{{.*}}, 
cu.shadow_name = #cir.cu.shadow_name<a>}
-// LLVM-HOST: @a = internal global i32 poison
-// OGCG-HOST: @a = internal global i32 undef
-
-__shared__ int b;
-// CIR-PRE: cir.global external  lang_address_space(offload_local) @b = 
#cir.poison {{.*}}
-// LLVM-DEVICE: @b = global i32 poison
-// OGCG-DEVICE: @b = addrspace(3) global i32 undef
-// CIR-HOST: cir.global {{.*}} @b = #cir.poison
-// LLVM-HOST: @b = internal global i32 poison
-// OGCG-HOST: @b = internal global i32 undef
-
-__constant__ int c;
-// CIR-PRE: cir.global constant external lang_address_space(offload_constant) 
@c = #cir.int<0> : !s32i {{{.*}}, cu.externally_initialized = 
#cir.cu.externally_initialized}
-// LLVM-DEVICE: @c = externally_initialized constant i32 0
-// OGCG-DAG: @c = addrspace(4) externally_initialized constant i32 0
-// OGCG-DEVICE: @c = addrspace(4) externally_initialized constant i32 0
-// CIR-HOST: cir.global {{.*}} @c = #cir.poison : !s32i {{{.*}}, 
cu.shadow_name = #cir.cu.shadow_name<c>}
-// LLVM-HOST: @c = internal global i32 poison
-// OGCG-HOST: @c = internal global i32 undef
+// CIR-PRE: cir.global external  lang_address_space(offload_global) @i = 
#cir.int<0> : !s32i
+// CIR-POST: cir.global external  target_address_space(1) @i = #cir.int<0> : 
!s32i
+// LLVM-DEVICE-DAG: @i = addrspace(1) {{.*}}global i32 0
+// OGCG-DAG: @i = addrspace(1) externally_initialized global i32 0
+// CIR-HOST: cir.global {{.*}} @i = #cir.poison : !s32i {{{.*}}, 
cu.shadow_name = #cir.cu.shadow_name<a>}
+// LLVM-HOST: @i = internal global i32 poison
+// OGCG-HOST: @i = internal global i32 undef
+__device__ int i;
+
+// CIR-PRE: cir.global constant external  lang_address_space(offload_constant) 
@j = #cir.int<0> : !s32i
+// CIR-POST: cir.global constant external  target_address_space(4) @j = 
#cir.int<0> : !s32i
+// LLVM-DEVICE-DAG: @j = addrspace(4) {{.*}}constant i32 0
+// OGCG-DAG: @j = addrspace(4) externally_initialized constant i32 0
+// CIR-HOST: cir.global {{.*}} @j = #cir.poison : !s32i {{{.*}}, 
cu.shadow_name = #cir.cu.shadow_name<c>}
+// LLVM-HOST: @j = internal global i32 poison
+// OGCG-HOST: @j = internal global i32 undef
+__constant__ int j;
+
+// CIR-PRE: cir.global external  lang_address_space(offload_local) @k = 
#cir.poison : !s32i
+// CIR-POST: cir.global external  target_address_space(3) @k = #cir.poison : 
!s32i
+// LLVM-DEVICE-DAG: @k = addrspace(3) global i32 {{undef|poison}}
+// OGCG-DAG: @k = addrspace(3) global i32 undef
+// CIR-HOST: cir.global {{.*}} @k = #cir.poison
+// LLVM-HOST: @k = internal global i32 poison
+// OGCG-HOST: @k = internal global i32 undef
+__shared__ int k;
+
+// CIR-PRE: cir.global external  lang_address_space(offload_local) @b = 
#cir.poison : !cir.float
+// CIR-POST: cir.global external  target_address_space(3) @b = #cir.poison : 
!cir.float
+// LLVM-DEVICE-DAG: @b = addrspace(3) global float {{undef|poison}}
+// OGCG-DAG: @b = addrspace(3) global float undef
+__shared__ float b;
 
 __device__ void foo() {
-  // CIR-PRE: cir.get_global @a : !cir.ptr<!s32i, 
lang_address_space(offload_global)>
-  a++;
+  // CIR-PRE: cir.get_global @i : !cir.ptr<!s32i, 
lang_address_space(offload_global)>
+  // CIR-POST: cir.get_global @i : !cir.ptr<!s32i, target_address_space(1)>
+  i++;
+
+  // CIR-PRE: cir.get_global @j : !cir.ptr<!s32i, 
lang_address_space(offload_constant)>
+  // CIR-POST: cir.get_global @j : !cir.ptr<!s32i, target_address_space(4)>
+  j++;
 
-  // CIR-PRE: cir.get_global @c : !cir.ptr<!s32i, 
lang_address_space(offload_constant)>
-  c++;
+  // CIR-PRE: cir.get_global @k : !cir.ptr<!s32i, 
lang_address_space(offload_local)>
+  // CIR-POST: cir.get_global @k : !cir.ptr<!s32i, target_address_space(3)>
+  k++;
 }
 
-// OGCG-DEVICE: @_ZZ2fnvE1j = internal addrspace(3) global i32 undef
 __global__ void fn() {
   int i = 0;
   __shared__ int j;

>From 803287de08ee0ca8c2a8dc8617674f69f6cab740 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <[email protected]>
Date: Fri, 3 Apr 2026 14:15:59 +0700
Subject: [PATCH 4/5] trigger GitHub actions

Signed-off-by: ZakyHermawan <[email protected]>

>From 6f9744b8c14713c1694962130db5a902b2436077 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <[email protected]>
Date: Sat, 4 Apr 2026 18:46:40 +0700
Subject: [PATCH 5/5] trigger GitHub actions

Signed-off-by: ZakyHermawan <[email protected]>

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to