https://github.com/ZakyHermawan updated 
https://github.com/llvm/llvm-project/pull/184248

>From 809121a3d2fe82e146da5cc8177e134b862b2a01 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <[email protected]>
Date: Tue, 3 Mar 2026 05:16:23 +0700
Subject: [PATCH 1/5] [CIR][CUDA] Handle __device__ and __shared__ variables

Signed-off-by: ZakyHermawan <[email protected]>
---
 clang/lib/CIR/CodeGen/CIRGenDecl.cpp         | 14 ++--
 clang/lib/CIR/CodeGen/CIRGenModule.cpp       | 53 ++++++++++++-
 clang/lib/CIR/CodeGen/CIRGenModule.h         | 10 +++
 clang/lib/CIR/CodeGen/TargetInfo.cpp         |  9 +++
 clang/lib/CIR/CodeGen/TargetInfo.h           |  7 ++
 clang/test/CIR/CodeGenCUDA/address-spaces.cu | 78 ++++++++++++++++++++
 clang/test/CIR/CodeGenCUDA/global-vars.cu    | 47 ++++++++++++
 7 files changed, 206 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenCUDA/address-spaces.cu
 create mode 100644 clang/test/CIR/CodeGenCUDA/global-vars.cu

diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp 
b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
index bb3117dfb2c98..b19e48d0f51d4 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
@@ -433,12 +433,15 @@ CIRGenModule::getOrCreateStaticVarDecl(const VarDecl &d,
   mlir::Type lty = getTypes().convertTypeForMem(ty);
   assert(!cir::MissingFeatures::addressSpace());
 
-  if (d.hasAttr<LoaderUninitializedAttr>() || d.hasAttr<CUDASharedAttr>())
+  mlir::Attribute init = nullptr;
+  if (d.hasAttr<LoaderUninitializedAttr>())
     errorNYI(d.getSourceRange(),
              "getOrCreateStaticVarDecl: LoaderUninitializedAttr");
-  assert(!cir::MissingFeatures::addressSpace());
+  else if (ty.getAddressSpace() != LangAS::opencl_local &&
+           !d.hasAttr<CUDASharedAttr>())
+    init = builder.getZeroInitAttr(convertType(ty));
 
-  mlir::Attribute init = builder.getZeroInitAttr(convertType(ty));
+  assert(!cir::MissingFeatures::addressSpace());
 
   cir::GlobalOp gv = builder.createVersionedGlobal(
       getModule(), getLoc(d.getLocation()), name, lty, false, linkage);
@@ -665,11 +668,6 @@ void CIRGenFunction::emitStaticVarDecl(const VarDecl &d,
 
   var.setAlignment(alignment.getAsAlign().value());
 
-  // There are a lot of attributes that need to be handled here. Until
-  // we start to support them, we just report an error if there are any.
-  if (d.hasAttrs())
-    cgm.errorNYI(d.getSourceRange(), "static var with attrs");
-
   if (cgm.getCodeGenOpts().KeepPersistentStorageVariables)
     cgm.errorNYI(d.getSourceRange(), "static var keep persistent storage");
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp 
b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 223b53731359a..1517058af8782 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -797,6 +797,22 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, 
mlir::Type ty,
                "external const declaration with initializer");
   }
 
+  // TODO(cir): if this method is used to handle functions we must have
+  // something closer to GlobalValue::isDeclaration instead of checking for
+  // initializer.
+  if (gv.isDeclaration()) {
+    // TODO(cir): set target attributes
+
+    // External HIP managed variables needed to be recorded for transformation
+    // in both device and host compilations.
+    // External HIP managed variables needed to be recorded for transformation
+    // in both device and host compilations.
+    if (getLangOpts().CUDA && d && d->hasAttr<HIPManagedAttr>() &&
+        d->hasExternalStorage())
+      llvm_unreachable("NYI");
+  }
+
+  // TODO(cir): address space cast when needed for DAddrSpace.
   return gv;
 }
 
@@ -947,10 +963,6 @@ void CIRGenModule::emitGlobalVarDefinition(const 
clang::VarDecl *vd,
     errorNYI(vd->getSourceRange(), "annotate global variable");
   }
 
-  if (langOpts.CUDA) {
-    errorNYI(vd->getSourceRange(), "CUDA global variable");
-  }
-
   // Set initializer and finalize emission
   CIRGenModule::setInitializer(gv, init);
   if (emitter)
@@ -1563,6 +1575,39 @@ CIRGenModule::getAddrOfConstantStringFromLiteral(const 
StringLiteral *s,
   return builder.getGlobalViewAttr(ptrTy, gv);
 }
 
+LangAS CIRGenModule::getGlobalVarAddressSpace(const VarDecl *d) {
+  if (langOpts.OpenCL) {
+    LangAS as = d ? d->getType().getAddressSpace() : LangAS::opencl_global;
+    assert(as == LangAS::opencl_global || as == LangAS::opencl_global_device ||
+           as == LangAS::opencl_global_host || as == LangAS::opencl_constant ||
+           as == LangAS::opencl_local || as >= 
LangAS::FirstTargetAddressSpace);
+    return as;
+  }
+
+  if (langOpts.SYCLIsDevice &&
+      (!d || d->getType().getAddressSpace() == LangAS::Default))
+    llvm_unreachable("NYI");
+
+  if (langOpts.CUDA && langOpts.CUDAIsDevice) {
+    if (d) {
+      if (d->hasAttr<CUDAConstantAttr>())
+        return LangAS::cuda_constant;
+      if (d->hasAttr<CUDASharedAttr>())
+        return LangAS::cuda_shared;
+      if (d->hasAttr<CUDADeviceAttr>())
+        return LangAS::cuda_device;
+      if (d->getType().isConstQualified())
+        return LangAS::cuda_constant;
+    }
+    return LangAS::cuda_device;
+  }
+
+  if (langOpts.OpenMP)
+    llvm_unreachable("NYI");
+
+  return getTargetCIRGenInfo().getGlobalVarAddressSpace(*this, d);
+}
+
 // TODO(cir): this could be a common AST helper for both CIR and LLVM codegen.
 LangAS CIRGenModule::getLangTempAllocaAddressSpace() const {
   if (getLangOpts().OpenCL)
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h 
b/clang/lib/CIR/CodeGen/CIRGenModule.h
index 52464a8bc30c4..d9173234868ee 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.h
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -359,6 +359,16 @@ class CIRGenModule : public CIRGenTypeCache {
   getAddrOfConstantStringFromLiteral(const StringLiteral *s,
                                      llvm::StringRef name = ".str");
 
+  /// Return the AST address space of the underlying global variable for D, as
+  /// determined by its declaration. Normally this is the same as the address
+  /// space of D's type, but in CUDA, address spaces are associated with
+  /// declarations, not types. If D is nullptr, return the default address
+  /// space for global variable.
+  ///
+  /// For languages without explicit address spaces, if D has default address
+  /// space, target-specific global or constant address space may be returned.
+  LangAS getGlobalVarAddressSpace(const VarDecl *d);
+
   /// Returns the address space for temporary allocations in the language. This
   /// ensures that the allocated variable's address space matches the
   /// expectations of the AST, rather than using the target's allocation 
address
diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp 
b/clang/lib/CIR/CodeGen/TargetInfo.cpp
index 2f3824d3d47a7..70ffb46050ea1 100644
--- a/clang/lib/CIR/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp
@@ -91,3 +91,12 @@ bool TargetCIRGenInfo::isNoProtoCallVariadic(
   // For everything else, we just prefer false unless we opt out.
   return false;
 }
+
+clang::LangAS
+TargetCIRGenInfo::getGlobalVarAddressSpace(CIRGenModule &cgm,
+                                           const clang::VarDecl *d) const {
+  assert(!cgm.getLangOpts().OpenCL &&
+         !(cgm.getLangOpts().CUDA && cgm.getLangOpts().CUDAIsDevice) &&
+         "Address space agnostic languages only");
+  return d ? d->getType().getAddressSpace() : LangAS::Default;
+}
diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h 
b/clang/lib/CIR/CodeGen/TargetInfo.h
index f4792d5309e36..8db2cbbce5d23 100644
--- a/clang/lib/CIR/CodeGen/TargetInfo.h
+++ b/clang/lib/CIR/CodeGen/TargetInfo.h
@@ -49,6 +49,13 @@ class TargetCIRGenInfo {
   /// Returns ABI info helper for the target.
   const ABIInfo &getABIInfo() const { return *info; }
 
+  /// Get target favored AST address space of a global variable for languages
+  /// other than OpenCL and CUDA.
+  /// If \p d is nullptr, returns the default target favored address space
+  /// for global variable.
+  virtual clang::LangAS getGlobalVarAddressSpace(CIRGenModule &cgm,
+                                                 const clang::VarDecl *d) 
const;
+
   /// Get the address space for alloca.
   virtual mlir::ptr::MemorySpaceAttrInterface getCIRAllocaAddressSpace() const 
{
     return cir::LangAddressSpaceAttr::get(&info->cgt.getMLIRContext(),
diff --git a/clang/test/CIR/CodeGenCUDA/address-spaces.cu 
b/clang/test/CIR/CodeGenCUDA/address-spaces.cu
new file mode 100644
index 0000000000000..68905a6616ca7
--- /dev/null
+++ b/clang/test/CIR/CodeGenCUDA/address-spaces.cu
@@ -0,0 +1,78 @@
+#include "Inputs/cuda.h"
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-cir -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
+// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
+// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
+
+__global__ void fn() {
+  int i = 0;
+  __shared__ int j;
+  j = i;
+}
+
+// CIR-DEVICE: cir.global "private" internal dso_local @_ZZ2fnvE1j : !s32i
+// CIR-DEVICE: cir.func {{.*}}@_Z2fnv() {{.*}} {
+// CIR-DEVICE:   %[[I:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init]
+// CIR-DEVICE:   %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
+// CIR-DEVICE:   cir.store {{.*}}%[[ZERO]], %[[I]] : !s32i, !cir.ptr<!s32i>
+// CIR-DEVICE:   %[[J:.*]] = cir.get_global @_ZZ2fnvE1j : !cir.ptr<!s32i>
+// CIR-DEVICE:   %[[VAL:.*]] = cir.load {{.*}}%[[I]] : !cir.ptr<!s32i>, !s32i
+// CIR-DEVICE:   cir.store {{.*}}%[[VAL]], %[[J]] : !s32i, !cir.ptr<!s32i>
+// CIR-DEVICE:   cir.return
+
+// CIR-HOST: cir.func private dso_local @__cudaPopCallConfiguration
+// CIR-HOST: cir.func private dso_local @cudaLaunchKernel
+// CIR-HOST: cir.func {{.*}}@_Z17__device_stub__fnv()
+
+// LLVM-DEVICE: @_ZZ2fnvE1j = internal global i32 undef, align 4
+// LLVM-DEVICE: define dso_local void @_Z2fnv()
+// LLVM-DEVICE:   %[[ALLOCA:.*]] = alloca i32, i64 1, align 4
+// LLVM-DEVICE:   store i32 0, ptr %[[ALLOCA]], align 4
+// LLVM-DEVICE:   %[[VAL:.*]] = load i32, ptr %[[ALLOCA]], align 4
+// LLVM-DEVICE:   store i32 %[[VAL]], ptr @_ZZ2fnvE1j, align 4
+// LLVM-DEVICE:   ret void
+
+// LLVM-HOST: %struct.dim3 = type { i32, i32, i32 }
+// LLVM-HOST: declare {{.*}}i32 @__cudaPopCallConfiguration(ptr, ptr, ptr, ptr)
+// LLVM-HOST: declare {{.*}}i32 @cudaLaunchKernel(ptr, %struct.dim3, 
%struct.dim3, ptr, i64, ptr)
+// LLVM-HOST: define dso_local void @_Z17__device_stub__fnv()
+
+// OGCG-HOST: define dso_local void @_Z17__device_stub__fnv()
+// OGCG-HOST: entry:
+// OGCG-HOST:   call i32 @__cudaPopCallConfiguration
+// OGCG-HOST:   call {{.*}}i32 @cudaLaunchKernel
+
+// OGCG-DEVICE: @_ZZ2fnvE1j = internal addrspace(3) global i32 undef, align 4
+// OGCG-DEVICE: define dso_local ptx_kernel void @_Z2fnv()
+// OGCG-DEVICE: entry:
+// OGCG-DEVICE:   %[[I:.*]] = alloca i32, align 4
+// OGCG-DEVICE:   store i32 0, ptr %[[I]], align 4
+// OGCG-DEVICE:   %[[VAL:.*]] = load i32, ptr %[[I]], align 4
+// OGCG-DEVICE:   store i32 %[[VAL]], ptr addrspacecast (ptr addrspace(3) 
@_ZZ2fnvE1j to ptr), align 4
+// OGCG-DEVICE:   ret void
diff --git a/clang/test/CIR/CodeGenCUDA/global-vars.cu 
b/clang/test/CIR/CodeGenCUDA/global-vars.cu
new file mode 100644
index 0000000000000..f497d0e7f5f64
--- /dev/null
+++ b/clang/test/CIR/CodeGenCUDA/global-vars.cu
@@ -0,0 +1,47 @@
+#include "Inputs/cuda.h"
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-cir -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
+// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
+// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
+
+__shared__ int a;
+// CIR-DEVICE: cir.global external [[SHARED:@.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64}
+// CIR-HOST: cir.global external [[SHARED_HOST:@.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64}
+// LLVM-DEVICE: @[[SHARED_LL:.*]] = global i32 0, align 4
+// LLVM-HOST: @[[SHARED_LH:.*]] = global i32 0, align 4
+// OGCG-DEVICE: @[[SHARED_OD:.*]] = addrspace(3) global i32 undef, align 4
+// OGCG-HOST: @[[SHARED_OH:.*]] = internal global i32 undef, align 4
+
+__device__ int b;
+// CIR-DEVICE: cir.global external [[DEV:@.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64}
+// CIR-HOST: cir.global external [[DEV_HOST:@.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64}
+// LLVM-DEVICE: @[[DEV_LD:.*]] = global i32 0, align 4
+// LLVM-HOST: @[[DEV_LH:.*]] = global i32 0, align 4
+// OGCG-HOST: @[[DEV_OH:.*]] = internal global i32 undef, align 4
+// OGCG-DEVICE: @[[DEV_OD:.*]] = addrspace(1) externally_initialized global 
i32 0, align 4

>From 512dd3acf7be7d06505243603fa2af9106a2b58b Mon Sep 17 00:00:00 2001
From: ZakyHermawan <[email protected]>
Date: Fri, 6 Mar 2026 02:39:06 +0700
Subject: [PATCH 2/5] [CIR][CUDA] handle __constant__ variable Remove CIR-HOST
 LLVM-HOST and OGCG-HOST from global-vars.cu because shadow variables did not
 handled properly, yet Make few changes to handle __device__, __shared__, and
 __constant__ global variables using reference from OGCG Create and call a
 hook (setTargetAttributes) if the variable is global and declaration only.

Signed-off-by: ZakyHermawan <[email protected]>
---
 .../clang/CIR/Dialect/IR/CIRCUDAAttrs.td      | 14 +++-
 clang/lib/CIR/CodeGen/CIRGenDecl.cpp          | 22 ++++++
 clang/lib/CIR/CodeGen/CIRGenModule.cpp        | 68 ++++++++++++++-----
 clang/lib/CIR/CodeGen/TargetInfo.h            |  9 +++
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp |  7 ++
 clang/test/CIR/CodeGenCUDA/global-vars.cu     | 39 ++++-------
 6 files changed, 115 insertions(+), 44 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td 
b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
index cf6635fc893fa..257cf396abce7 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td
@@ -36,5 +36,17 @@ def CIR_CUDAKernelNameAttr : CIR_Attr<"CUDAKernelName", 
"cu.kernel_name"> {
   let assemblyFormat = "`<` $kernel_name `>`";
 }
 
+def CUDAExternallyInitializedAttr : CIR_Attr<"CUDAExternallyInitialized",
+                                             "cu.externally_initialized"> {
+  let summary = "The marked variable is externally initialized.";
+  let description =
+  [{
+    CUDA __device__ and __constant__ variables, along with surface and
+    textures, might be initialized by host, hence "externally initialized".
+    Therefore they must be emitted even if they are not referenced.
+
+    The attribute corresponds to the attribute on LLVM with the same name.
+  }];
+}
 
-#endif // CLANG_CIR_DIALECT_IR_CIRCUDAATTRS_TD
\ No newline at end of file
+#endif // CLANG_CIR_DIALECT_IR_CIRCUDAATTRS_TD
diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp 
b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
index b19e48d0f51d4..a636c07876964 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
@@ -14,10 +14,12 @@
 #include "CIRGenFunction.h"
 #include "mlir/IR/Location.h"
 #include "clang/AST/Attr.h"
+#include "clang/AST/Attrs.inc"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclOpenACC.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
+#include "clang/Basic/Cuda.h"
 #include "clang/CIR/MissingFeatures.h"
 
 using namespace clang;
@@ -668,6 +670,26 @@ void CIRGenFunction::emitStaticVarDecl(const VarDecl &d,
 
   var.setAlignment(alignment.getAsAlign().value());
 
+  // There are a lot of attributes that need to be handled here. Until
+  // we start to support them, we just report an error if there are any.
+  if (d.hasAttr<AnnotateAttr>())
+    cgm.errorNYI(d.getSourceRange(), "Global annotations are NYI");
+  if (d.getAttr<PragmaClangBSSSectionAttr>())
+    cgm.errorNYI(d.getSourceRange(), "CIR global BSS section attribute is 
NYI");
+  if (d.getAttr<PragmaClangDataSectionAttr>())
+    cgm.errorNYI(d.getSourceRange(),
+                 "CIR global Data section attribute is NYI");
+  if (d.getAttr<PragmaClangRodataSectionAttr>())
+    cgm.errorNYI(d.getSourceRange(),
+                 "CIR global Rodata section attribute is NYI");
+  if (d.getAttr<PragmaClangRelroSectionAttr>())
+    cgm.errorNYI(d.getSourceRange(),
+                 "CIR global Relro section attribute is NYI");
+
+  if (d.getAttr<SectionAttr>())
+    cgm.errorNYI(d.getSourceRange(),
+                 "CIR global object file section attribute is NYI");
+
   if (cgm.getCodeGenOpts().KeepPersistentStorageVariables)
     cgm.errorNYI(d.getSourceRange(), "static var keep persistent storage");
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp 
b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 1517058af8782..bd4d2d4e5c1a5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -18,6 +18,7 @@
 
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTLambda.h"
+#include "clang/AST/Attrs.inc"
 #include "clang/AST/DeclBase.h"
 #include "clang/AST/DeclOpenACC.h"
 #include "clang/AST/GlobalDecl.h"
@@ -797,22 +798,19 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, 
mlir::Type ty,
                "external const declaration with initializer");
   }
 
-  // TODO(cir): if this method is used to handle functions we must have
-  // something closer to GlobalValue::isDeclaration instead of checking for
-  // initializer.
-  if (gv.isDeclaration()) {
+  if (d &&
+      d->isThisDeclarationADefinition(astContext) == VarDecl::DeclarationOnly) 
{
+    getTargetCIRGenInfo().setTargetAttributes(d, gv, *this);
     // TODO(cir): set target attributes
-
-    // External HIP managed variables needed to be recorded for transformation
-    // in both device and host compilations.
     // External HIP managed variables needed to be recorded for transformation
     // in both device and host compilations.
     if (getLangOpts().CUDA && d && d->hasAttr<HIPManagedAttr>() &&
         d->hasExternalStorage())
-      llvm_unreachable("NYI");
+      errorNYI(d->getSourceRange(), "HIP managed attribute");
   }
 
   // TODO(cir): address space cast when needed for DAddrSpace.
+  assert(!cir::MissingFeatures::addressSpace());
   return gv;
 }
 
@@ -896,9 +894,18 @@ void CIRGenModule::emitGlobalVarDefinition(const 
clang::VarDecl *vd,
 
   assert(!cir::MissingFeatures::cudaSupport());
 
-  if (vd->hasAttr<LoaderUninitializedAttr>()) {
+  // CUDA E.2.4.1 "__shared__ variables cannot have an initialization
+  // as part of their declaration."  Sema has already checked for
+  // error cases, so we just need to set Init to UndefValue.
+  bool isCUDASharedVar =
+      getLangOpts().CUDAIsDevice && vd->hasAttr<CUDASharedAttr>();
+  // TODO(cir): implement isCUDAShadowVar and isCUDADeviceShadowVar, reference:
+  // OGCG
+
+  if (getLangOpts().CUDA && isCUDASharedVar) {
+    init = cir::UndefAttr::get(&getMLIRContext(), convertType(vd->getType()));
+  } else if (vd->hasAttr<LoaderUninitializedAttr>()) {
     errorNYI(vd->getSourceRange(), "loader uninitialized attribute");
-    return;
   } else if (!initExpr) {
     // This is a tentative definition; tentative definitions are
     // implicitly initialized with { 0 }.
@@ -963,6 +970,39 @@ void CIRGenModule::emitGlobalVarDefinition(const 
clang::VarDecl *vd,
     errorNYI(vd->getSourceRange(), "annotate global variable");
   }
 
+  // Set CIR's linkage type as appropriate.
+  cir::GlobalLinkageKind linkage =
+      getCIRLinkageVarDefinition(vd, /*IsConstant=*/false);
+
+  // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on
+  // the device. [...]"
+  // CUDA B.2.2 "The __constant__ qualifier, optionally used together with
+  // __device__, declares a variable that: [...]
+  // Is accessible from all the threads within the grid and from the host
+  // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize()
+  // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())."
+  if (langOpts.CUDA) {
+    if (langOpts.CUDAIsDevice) {
+      // __shared__ variables is not marked as externally initialized,
+      // because they must not be initialized.
+      if (linkage != cir::GlobalLinkageKind::InternalLinkage &&
+          !vd->isConstexpr() && !vd->getType().isConstQualified() &&
+          (vd->hasAttr<CUDADeviceAttr>() || vd->hasAttr<CUDAConstantAttr>() ||
+           vd->getType()->isCUDADeviceBuiltinSurfaceType() ||
+           vd->getType()->isCUDADeviceBuiltinTextureType())) {
+        gv->setAttr(cir::CUDAExternallyInitializedAttr::getMnemonic(),
+                    
cir::CUDAExternallyInitializedAttr::get(&getMLIRContext()));
+      }
+    } else {
+      // TODO(cir):
+      // Adjust linkage of shadow variables in host compilation
+      // getCUDARuntime().internalizeDeviceSideVar(vd, linkage);
+    }
+    // TODO(cir):
+    // Handle variable registration
+    // getCUDARuntime().handleVarRegistration(vd, gv);
+  }
+
   // Set initializer and finalize emission
   CIRGenModule::setInitializer(gv, init);
   if (emitter)
@@ -977,10 +1017,6 @@ void CIRGenModule::emitGlobalVarDefinition(const 
clang::VarDecl *vd,
                                                   /*ExcludeDtor=*/true)));
   assert(!cir::MissingFeatures::opGlobalSection());
 
-  // Set CIR's linkage type as appropriate.
-  cir::GlobalLinkageKind linkage =
-      getCIRLinkageVarDefinition(vd, /*IsConstant=*/false);
-
   // Set CIR linkage and DLL storage class.
   gv.setLinkage(linkage);
   // FIXME(cir): setLinkage should likely set MLIR's visibility automatically.
@@ -1586,7 +1622,7 @@ LangAS CIRGenModule::getGlobalVarAddressSpace(const 
VarDecl *d) {
 
   if (langOpts.SYCLIsDevice &&
       (!d || d->getType().getAddressSpace() == LangAS::Default))
-    llvm_unreachable("NYI");
+    errorNYI(d->getSourceRange(), "global as for SYCL device");
 
   if (langOpts.CUDA && langOpts.CUDAIsDevice) {
     if (d) {
@@ -1603,7 +1639,7 @@ LangAS CIRGenModule::getGlobalVarAddressSpace(const 
VarDecl *d) {
   }
 
   if (langOpts.OpenMP)
-    llvm_unreachable("NYI");
+    errorNYI(d->getSourceRange(), "global as for OpenMP");
 
   return getTargetCIRGenInfo().getGlobalVarAddressSpace(*this, d);
 }
diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h 
b/clang/lib/CIR/CodeGen/TargetInfo.h
index 8db2cbbce5d23..9ba155b220fbc 100644
--- a/clang/lib/CIR/CodeGen/TargetInfo.h
+++ b/clang/lib/CIR/CodeGen/TargetInfo.h
@@ -106,6 +106,15 @@ class TargetCIRGenInfo {
   /// right thing when calling a function with no know signature.
   virtual bool isNoProtoCallVariadic(const FunctionNoProtoType *fnType) const;
 
+  /// Provides a convenient hook to handle extra target-specific attributes
+  /// for the given global.
+  /// In OG, the function receives an llvm::GlobalValue. However, functions
+  /// and global variables are separate types in Clang IR, so we use a general
+  /// mlir::Operation*.
+  virtual void setTargetAttributes(const clang::Decl *decl,
+                                   mlir::Operation *global,
+                                   CIRGenModule &module) const {}
+
   virtual bool isScalarizableAsmOperand(CIRGenFunction &cgf,
                                         mlir::Type ty) const {
     return false;
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp 
b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 03085ad29ab78..eda07dab4d97b 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -2557,6 +2557,13 @@ mlir::LogicalResult 
CIRToLLVMGlobalOpLowering::matchAndRewrite(
   const StringRef symbol = op.getSymName();
   SmallVector<mlir::NamedAttribute> attributes;
 
+  // Mark externally_initialized for __device__ and __constant__
+  if (auto extInit =
+          op->getAttr(CUDAExternallyInitializedAttr::getMnemonic())) {
+    attributes.push_back(rewriter.getNamedAttr("externally_initialized",
+                                               rewriter.getUnitAttr()));
+  }
+
   if (init.has_value()) {
     if (mlir::isa<cir::FPAttr, cir::IntAttr, cir::BoolAttr>(init.value())) {
       GlobalInitAttrRewriter initRewriter(llvmType, rewriter);
diff --git a/clang/test/CIR/CodeGenCUDA/global-vars.cu 
b/clang/test/CIR/CodeGenCUDA/global-vars.cu
index f497d0e7f5f64..4791f145d1bae 100644
--- a/clang/test/CIR/CodeGenCUDA/global-vars.cu
+++ b/clang/test/CIR/CodeGenCUDA/global-vars.cu
@@ -5,43 +5,28 @@
 // RUN:            -I%S/Inputs/ %s -o %t.cir
 // RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
 
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
-// RUN:            -x cuda -emit-cir -target-sdk-version=12.3 \
-// RUN:            -I%S/Inputs/ %s -o %t.cir
-// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
-
 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
 // RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
 // RUN:            -I%S/Inputs/ %s -o %t.ll
 // RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
 
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
-// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
-// RUN:            -I%S/Inputs/ %s -o %t.ll
-// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
-
 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
 // RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
 // RUN:            -I%S/Inputs/ %s -o %t.ll
 // RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
 
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
-// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
-// RUN:            -I%S/Inputs/ %s -o %t.ll
-// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
 
-__shared__ int a;
-// CIR-DEVICE: cir.global external [[SHARED:@.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64}
-// CIR-HOST: cir.global external [[SHARED_HOST:@.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64}
-// LLVM-DEVICE: @[[SHARED_LL:.*]] = global i32 0, align 4
-// LLVM-HOST: @[[SHARED_LH:.*]] = global i32 0, align 4
+__device__ int a;
+// CIR-DEVICE: cir.global external @[[DEV:.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64, cu.externally_initialized = 
#cir.cu.externally_initialized}
+// LLVM-DEVICE: @[[DEV_LD:.*]] = externally_initialized global i32 0, align 4
+// OGCG-DEVICE: @[[DEV_OD:.*]] = addrspace(1) externally_initialized global 
i32 0, align 4
+
+__shared__ int b;
+// CIR-DEVICE: cir.global external @[[SHARED:.*]] = #cir.undef : !s32i 
{alignment = 4 : i64}
+// LLVM-DEVICE: @[[SHARED_LL:.*]] = global i32 undef, align 4
 // OGCG-DEVICE: @[[SHARED_OD:.*]] = addrspace(3) global i32 undef, align 4
-// OGCG-HOST: @[[SHARED_OH:.*]] = internal global i32 undef, align 4
 
-__device__ int b;
-// CIR-DEVICE: cir.global external [[DEV:@.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64}
-// CIR-HOST: cir.global external [[DEV_HOST:@.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64}
-// LLVM-DEVICE: @[[DEV_LD:.*]] = global i32 0, align 4
-// LLVM-HOST: @[[DEV_LH:.*]] = global i32 0, align 4
-// OGCG-HOST: @[[DEV_OH:.*]] = internal global i32 undef, align 4
-// OGCG-DEVICE: @[[DEV_OD:.*]] = addrspace(1) externally_initialized global 
i32 0, align 4
+__constant__ int c;
+// CIR-DEVICE: cir.global constant external @[[CONST:.*]] = #cir.int<0> : 
!s32i {alignment = 4 : i64, cu.externally_initialized = 
#cir.cu.externally_initialized}
+// LLVM-DEVICE: @[[CONST_LL:.*]] = externally_initialized constant i32 0, 
align 4
+// OGCG-DEVICE: @[[CONST_OD:.*]] = addrspace(4) externally_initialized 
constant i32 0, align 4

>From 42105b08feed53b5e637b5e04a550dde21b594f4 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <[email protected]>
Date: Fri, 6 Mar 2026 04:01:25 +0700
Subject: [PATCH 3/5] [CIR][CUDA][NFC] Remove unnecessary comment

Signed-off-by: ZakyHermawan <[email protected]>
---
 clang/lib/CIR/CodeGen/CIRGenModule.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp 
b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index bd4d2d4e5c1a5..2da4ed7b79da2 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -809,7 +809,6 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, 
mlir::Type ty,
       errorNYI(d->getSourceRange(), "HIP managed attribute");
   }
 
-  // TODO(cir): address space cast when needed for DAddrSpace.
   assert(!cir::MissingFeatures::addressSpace());
   return gv;
 }

>From f58883bda75af9e0d2707287aa22afc902ef5b63 Mon Sep 17 00:00:00 2001
From: ZakyHermawan <[email protected]>
Date: Sat, 7 Mar 2026 07:02:17 +0700
Subject: [PATCH 4/5] [CIR][CUDA][NFC] Remove *-HOST lit checks and rename some
 captured variables

Signed-off-by: ZakyHermawan <[email protected]>
---
 clang/test/CIR/CodeGenCUDA/address-spaces.cu | 41 +++-----------------
 1 file changed, 6 insertions(+), 35 deletions(-)

diff --git a/clang/test/CIR/CodeGenCUDA/address-spaces.cu 
b/clang/test/CIR/CodeGenCUDA/address-spaces.cu
index 68905a6616ca7..2eefe8fafa727 100644
--- a/clang/test/CIR/CodeGenCUDA/address-spaces.cu
+++ b/clang/test/CIR/CodeGenCUDA/address-spaces.cu
@@ -5,26 +5,11 @@
 // RUN:            -I%S/Inputs/ %s -o %t.cir
 // RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
 
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
-// RUN:            -x cuda -emit-cir -target-sdk-version=12.3 \
-// RUN:            -I%S/Inputs/ %s -o %t.cir
-// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
-
 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
 // RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
 // RUN:            -I%S/Inputs/ %s -o %t.ll
 // RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
 
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
-// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
-// RUN:            -I%S/Inputs/ %s -o %t.ll
-// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
-
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
-// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
-// RUN:            -I%S/Inputs/ %s -o %t.ll
-// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
-
 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
 // RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
 // RUN:            -I%S/Inputs/ %s -o %t.ll
@@ -38,18 +23,14 @@ __global__ void fn() {
 
 // CIR-DEVICE: cir.global "private" internal dso_local @_ZZ2fnvE1j : !s32i
 // CIR-DEVICE: cir.func {{.*}}@_Z2fnv() {{.*}} {
-// CIR-DEVICE:   %[[I:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init]
+// CIR-DEVICE:   %[[ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", 
init]
 // CIR-DEVICE:   %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
-// CIR-DEVICE:   cir.store {{.*}}%[[ZERO]], %[[I]] : !s32i, !cir.ptr<!s32i>
+// CIR-DEVICE:   cir.store {{.*}}%[[ZERO]], %[[ALLOCA]] : !s32i, 
!cir.ptr<!s32i>
 // CIR-DEVICE:   %[[J:.*]] = cir.get_global @_ZZ2fnvE1j : !cir.ptr<!s32i>
-// CIR-DEVICE:   %[[VAL:.*]] = cir.load {{.*}}%[[I]] : !cir.ptr<!s32i>, !s32i
+// CIR-DEVICE:   %[[VAL:.*]] = cir.load {{.*}}%[[ALLOCA]] : !cir.ptr<!s32i>, 
!s32i
 // CIR-DEVICE:   cir.store {{.*}}%[[VAL]], %[[J]] : !s32i, !cir.ptr<!s32i>
 // CIR-DEVICE:   cir.return
 
-// CIR-HOST: cir.func private dso_local @__cudaPopCallConfiguration
-// CIR-HOST: cir.func private dso_local @cudaLaunchKernel
-// CIR-HOST: cir.func {{.*}}@_Z17__device_stub__fnv()
-
 // LLVM-DEVICE: @_ZZ2fnvE1j = internal global i32 undef, align 4
 // LLVM-DEVICE: define dso_local void @_Z2fnv()
 // LLVM-DEVICE:   %[[ALLOCA:.*]] = alloca i32, i64 1, align 4
@@ -58,21 +39,11 @@ __global__ void fn() {
 // LLVM-DEVICE:   store i32 %[[VAL]], ptr @_ZZ2fnvE1j, align 4
 // LLVM-DEVICE:   ret void
 
-// LLVM-HOST: %struct.dim3 = type { i32, i32, i32 }
-// LLVM-HOST: declare {{.*}}i32 @__cudaPopCallConfiguration(ptr, ptr, ptr, ptr)
-// LLVM-HOST: declare {{.*}}i32 @cudaLaunchKernel(ptr, %struct.dim3, 
%struct.dim3, ptr, i64, ptr)
-// LLVM-HOST: define dso_local void @_Z17__device_stub__fnv()
-
-// OGCG-HOST: define dso_local void @_Z17__device_stub__fnv()
-// OGCG-HOST: entry:
-// OGCG-HOST:   call i32 @__cudaPopCallConfiguration
-// OGCG-HOST:   call {{.*}}i32 @cudaLaunchKernel
-
 // OGCG-DEVICE: @_ZZ2fnvE1j = internal addrspace(3) global i32 undef, align 4
 // OGCG-DEVICE: define dso_local ptx_kernel void @_Z2fnv()
 // OGCG-DEVICE: entry:
-// OGCG-DEVICE:   %[[I:.*]] = alloca i32, align 4
-// OGCG-DEVICE:   store i32 0, ptr %[[I]], align 4
-// OGCG-DEVICE:   %[[VAL:.*]] = load i32, ptr %[[I]], align 4
+// OGCG-DEVICE:   %[[ALLOCA:.*]] = alloca i32, align 4
+// OGCG-DEVICE:   store i32 0, ptr %[[ALLOCA]], align 4
+// OGCG-DEVICE:   %[[VAL:.*]] = load i32, ptr %[[ALLOCA]], align 4
 // OGCG-DEVICE:   store i32 %[[VAL]], ptr addrspacecast (ptr addrspace(3) 
@_ZZ2fnvE1j to ptr), align 4
 // OGCG-DEVICE:   ret void

>From 5af50ea02045f8a439087833f683b03b93bc99cf Mon Sep 17 00:00:00 2001
From: ZakyHermawan <[email protected]>
Date: Wed, 11 Mar 2026 08:33:04 +0700
Subject: [PATCH 5/5] [CIR][CUDA] fix initial value, remove unused variable,
 and improve diagnostics

Set initial value for opencl local variables to undef.
Set initial value for variable with attributes CUDASharedAttr and 
LoaderUninitializedAttr to undef.
Remove unused LangAS getGlobalVarAddressSpace(const VarDecl *d);
Improve diagnostics for NYI: print function name where the diagnostics are 
being emitted.

Signed-off-by: ZakyHermawan <[email protected]>
---
 clang/lib/CIR/CodeGen/CIRGenDecl.cpp         | 42 +++++-----
 clang/lib/CIR/CodeGen/CIRGenModule.cpp       | 82 ++++++++------------
 clang/lib/CIR/CodeGen/CIRGenModule.h         | 10 ---
 clang/test/CIR/CodeGenCUDA/address-spaces.cu | 23 +++++-
 clang/test/CIR/CodeGenCUDA/global-vars.cu    | 32 --------
 5 files changed, 75 insertions(+), 114 deletions(-)
 delete mode 100644 clang/test/CIR/CodeGenCUDA/global-vars.cu

diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp 
b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
index a636c07876964..d8676fe5dc3c4 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
@@ -20,6 +20,7 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/Basic/Cuda.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
 #include "clang/CIR/MissingFeatures.h"
 
 using namespace clang;
@@ -40,7 +41,7 @@ CIRGenFunction::emitAutoVarAlloca(const VarDecl &d,
   emission.isEscapingByRef = d.isEscapingByref();
   if (emission.isEscapingByRef)
     cgm.errorNYI(d.getSourceRange(),
-                 "emitAutoVarDecl: decl escaping by reference");
+                 "emitAutoVarAlloca: decl escaping by reference");
 
   CharUnits alignment = getContext().getDeclAlign(&d);
 
@@ -365,7 +366,7 @@ void CIRGenFunction::emitVarDecl(const VarDecl &d) {
     if (d.getType()->isSamplerT()) {
       // Nothing needs to be done here, but let's flag it as an error until we
       // have a test. It requires OpenCL support.
-      cgm.errorNYI(d.getSourceRange(), "emitVarDecl static sampler type");
+      cgm.errorNYI(d.getSourceRange(), "emitVarDecl: static sampler type");
       return;
     }
 
@@ -380,7 +381,7 @@ void CIRGenFunction::emitVarDecl(const VarDecl &d) {
   }
 
   if (d.getType().getAddressSpace() == LangAS::opencl_local)
-    cgm.errorNYI(d.getSourceRange(), "emitVarDecl openCL address space");
+    cgm.errorNYI(d.getSourceRange(), "emitVarDecl: openCL address space");
 
   assert(d.hasLocalStorage());
 
@@ -401,11 +402,14 @@ static std::string getStaticDeclName(CIRGenModule &cgm, 
const VarDecl &d) {
   if (const auto *fd = dyn_cast<FunctionDecl>(dc))
     contextName = std::string(cgm.getMangledName(fd));
   else if (isa<BlockDecl>(dc))
-    cgm.errorNYI(d.getSourceRange(), "block decl context for static var");
+    cgm.errorNYI(d.getSourceRange(),
+                 "getStaticDeclName: block decl context for static var");
   else if (isa<ObjCMethodDecl>(dc))
-    cgm.errorNYI(d.getSourceRange(), "ObjC decl context for static var");
+    cgm.errorNYI(d.getSourceRange(),
+                 "getStaticDeclName: ObjC decl context for static var");
   else
-    cgm.errorNYI(d.getSourceRange(), "Unknown context for static var decl");
+    cgm.errorNYI(d.getSourceRange(),
+                 "getStaticDeclName: Unknown context for static var decl");
 
   contextName += "." + d.getNameAsString();
   return contextName;
@@ -435,16 +439,15 @@ CIRGenModule::getOrCreateStaticVarDecl(const VarDecl &d,
   mlir::Type lty = getTypes().convertTypeForMem(ty);
   assert(!cir::MissingFeatures::addressSpace());
 
+  // OpenCL variables in local address space and CUDA shared
+  // variables cannot have an initializer.
   mlir::Attribute init = nullptr;
-  if (d.hasAttr<LoaderUninitializedAttr>())
-    errorNYI(d.getSourceRange(),
-             "getOrCreateStaticVarDecl: LoaderUninitializedAttr");
-  else if (ty.getAddressSpace() != LangAS::opencl_local &&
-           !d.hasAttr<CUDASharedAttr>())
+  if (ty.getAddressSpace() == LangAS::opencl_local ||
+      d.hasAttr<CUDASharedAttr>() || d.hasAttr<LoaderUninitializedAttr>())
+    init = cir::UndefAttr::get(lty);
+  else
     init = builder.getZeroInitAttr(convertType(ty));
 
-  assert(!cir::MissingFeatures::addressSpace());
-
   cir::GlobalOp gv = builder.createVersionedGlobal(
       getModule(), getLoc(d.getLocation()), name, lty, false, linkage);
   // TODO(cir): infer visibility from linkage in global op builder.
@@ -673,22 +676,23 @@ void CIRGenFunction::emitStaticVarDecl(const VarDecl &d,
   // There are a lot of attributes that need to be handled here. Until
   // we start to support them, we just report an error if there are any.
   if (d.hasAttr<AnnotateAttr>())
-    cgm.errorNYI(d.getSourceRange(), "Global annotations are NYI");
+    cgm.errorNYI(d.getSourceRange(), "emitStaticVarDecl: Global annotations");
   if (d.getAttr<PragmaClangBSSSectionAttr>())
-    cgm.errorNYI(d.getSourceRange(), "CIR global BSS section attribute is 
NYI");
+    cgm.errorNYI(d.getSourceRange(),
+                 "emitStaticVarDecl: CIR global BSS section attribute");
   if (d.getAttr<PragmaClangDataSectionAttr>())
     cgm.errorNYI(d.getSourceRange(),
-                 "CIR global Data section attribute is NYI");
+                 "emitStaticVarDecl: CIR global Data section attribute");
   if (d.getAttr<PragmaClangRodataSectionAttr>())
     cgm.errorNYI(d.getSourceRange(),
-                 "CIR global Rodata section attribute is NYI");
+                 "emitStaticVarDecl: CIR global Rodata section attribute");
   if (d.getAttr<PragmaClangRelroSectionAttr>())
     cgm.errorNYI(d.getSourceRange(),
-                 "CIR global Relro section attribute is NYI");
+                 "emitStaticVarDecl: CIR global Relro section attribute");
 
   if (d.getAttr<SectionAttr>())
     cgm.errorNYI(d.getSourceRange(),
-                 "CIR global object file section attribute is NYI");
+                 "emitStaticVarDecl: CIR global object file section 
attribute");
 
   if (cgm.getCodeGenOpts().KeepPersistentStorageVariables)
     cgm.errorNYI(d.getSourceRange(), "static var keep persistent storage");
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp 
b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 2da4ed7b79da2..eb2da604e4328 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -58,7 +58,7 @@ static CIRGenCXXABI *createCXXABI(CIRGenModule &cgm) {
   case TargetCXXABI::WebAssembly:
   case TargetCXXABI::XL:
   case TargetCXXABI::Microsoft:
-    cgm.errorNYI("C++ ABI kind not yet implemented");
+    cgm.errorNYI("createCXXABI: C++ ABI kind");
     return nullptr;
   }
 
@@ -696,7 +696,8 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, 
mlir::Type ty,
   cir::GlobalOp entry;
   if (mlir::Operation *v = getGlobalValue(mangledName)) {
     if (!isa<cir::GlobalOp>(v))
-      errorNYI(d->getSourceRange(), "global with non-GlobalOp type");
+      errorNYI(d->getSourceRange(),
+               "getOrCreateCIRGlobal: global with non-GlobalOp type");
     entry = cast<cir::GlobalOp>(v);
   }
 
@@ -717,7 +718,8 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, 
mlir::Type ty,
     // recognizing the global as a declaration, for now only check if
     // initializer is present.
     if (isForDefinition && !entry.isDeclaration()) {
-      errorNYI(d->getSourceRange(), "global with conflicting type");
+      errorNYI(d->getSourceRange(),
+               "getOrCreateCIRGlobal: global with conflicting type");
     }
 
     // Address space check removed because it is unnecessary because CIR 
records
@@ -762,7 +764,8 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, 
mlir::Type ty,
   // Handle things which are present even on external declarations.
   if (d) {
     if (langOpts.OpenMP && !langOpts.OpenMPSimd)
-      errorNYI(d->getSourceRange(), "OpenMP target global variable");
+      errorNYI(d->getSourceRange(),
+               "getOrCreateCIRGlobal: OpenMP target global variable");
 
     gv.setAlignmentAttr(getSize(astContext.getDeclAlign(d)));
 
@@ -770,7 +773,7 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, 
mlir::Type ty,
 
     if (d->getTLSKind()) {
       if (d->getTLSKind() == VarDecl::TLS_Dynamic)
-        errorNYI(d->getSourceRange(), "TLS dynamic");
+        errorNYI(d->getSourceRange(), "getOrCreateCIRGlobal: TLS dynamic");
       setTLSMode(gv, *d);
     }
 
@@ -779,14 +782,16 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, 
mlir::Type ty,
     // If required by the ABI, treat declarations of static data members with
     // inline initializers as definitions.
     if (astContext.isMSStaticDataMemberInlineDefinition(d))
-      errorNYI(d->getSourceRange(), "MS static data member inline definition");
+      errorNYI(d->getSourceRange(),
+               "getOrCreateCIRGlobal: MS static data member inline 
definition");
 
     assert(!cir::MissingFeatures::opGlobalSection());
     gv.setGlobalVisibilityAttr(getGlobalVisibilityAttrFromDecl(d));
 
     // Handle XCore specific ABI requirements.
     if (getTriple().getArch() == llvm::Triple::xcore)
-      errorNYI(d->getSourceRange(), "XCore specific ABI requirements");
+      errorNYI(d->getSourceRange(),
+               "getOrCreateCIRGlobal: XCore specific ABI requirements");
 
     // Check if we a have a const declaration with an initializer, we may be
     // able to emit it as available_externally to expose it's value to the
@@ -794,8 +799,9 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, 
mlir::Type ty,
     if (getLangOpts().CPlusPlus && gv.isPublic() &&
         d->getType().isConstQualified() && gv.isDeclaration() &&
         !d->hasDefinition() && d->hasInit() && !d->hasAttr<DLLImportAttr>())
-      errorNYI(d->getSourceRange(),
-               "external const declaration with initializer");
+      errorNYI(
+          d->getSourceRange(),
+          "getOrCreateCIRGlobal: external const declaration with initializer");
   }
 
   if (d &&
@@ -806,7 +812,8 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, 
mlir::Type ty,
     // in both device and host compilations.
     if (getLangOpts().CUDA && d && d->hasAttr<HIPManagedAttr>() &&
         d->hasExternalStorage())
-      errorNYI(d->getSourceRange(), "HIP managed attribute");
+      errorNYI(d->getSourceRange(),
+               "getOrCreateCIRGlobal: HIP managed attribute");
   }
 
   assert(!cir::MissingFeatures::addressSpace());
@@ -861,7 +868,8 @@ cir::GlobalViewAttr 
CIRGenModule::getAddrOfGlobalVarAttr(const VarDecl *d) {
 void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd,
                                            bool isTentative) {
   if (getLangOpts().OpenCL || getLangOpts().OpenMPIsTargetDevice) {
-    errorNYI(vd->getSourceRange(), "emit OpenCL/OpenMP global variable");
+    errorNYI(vd->getSourceRange(),
+             "emitGlobalVarDefinition: emit OpenCL/OpenMP global variable");
     return;
   }
 
@@ -904,7 +912,8 @@ void CIRGenModule::emitGlobalVarDefinition(const 
clang::VarDecl *vd,
   if (getLangOpts().CUDA && isCUDASharedVar) {
     init = cir::UndefAttr::get(&getMLIRContext(), convertType(vd->getType()));
   } else if (vd->hasAttr<LoaderUninitializedAttr>()) {
-    errorNYI(vd->getSourceRange(), "loader uninitialized attribute");
+    errorNYI(vd->getSourceRange(),
+             "emitGlobalVarDefinition: loader uninitialized attribute");
   } else if (!initExpr) {
     // This is a tentative definition; tentative definitions are
     // implicitly initialized with { 0 }.
@@ -927,12 +936,14 @@ void CIRGenModule::emitGlobalVarDefinition(const 
clang::VarDecl *vd,
 
       if (getLangOpts().CPlusPlus) {
         if (initDecl->hasFlexibleArrayInit(astContext))
-          errorNYI(vd->getSourceRange(), "flexible array initializer");
+          errorNYI(vd->getSourceRange(),
+                   "emitGlobalVarDefinition: flexible array initializer");
         init = builder.getZeroInitAttr(convertType(qt));
         if (!isDefinitionAvailableExternally)
           needsGlobalCtor = true;
       } else {
-        errorNYI(vd->getSourceRange(), "static initializer");
+        errorNYI(vd->getSourceRange(),
+                 "emitGlobalVarDefinition: static initializer");
       }
     } else {
       init = initializer;
@@ -945,7 +956,9 @@ void CIRGenModule::emitGlobalVarDefinition(const 
clang::VarDecl *vd,
 
   mlir::Type initType;
   if (mlir::isa<mlir::SymbolRefAttr>(init)) {
-    errorNYI(vd->getSourceRange(), "global initializer is a symbol reference");
+    errorNYI(
+        vd->getSourceRange(),
+        "emitGlobalVarDefinition: global initializer is a symbol reference");
     return;
   } else {
     assert(mlir::isa<mlir::TypedAttr>(init) && "This should have a type");
@@ -959,14 +972,16 @@ void CIRGenModule::emitGlobalVarDefinition(const 
clang::VarDecl *vd,
   // TODO(cir): Strip off pointer casts from Entry if we get them?
 
   if (!gv || gv.getSymType() != initType) {
-    errorNYI(vd->getSourceRange(), "global initializer with type mismatch");
+    errorNYI(vd->getSourceRange(),
+             "emitGlobalVarDefinition: global initializer with type mismatch");
     return;
   }
 
   assert(!cir::MissingFeatures::maybeHandleStaticInExternC());
 
   if (vd->hasAttr<AnnotateAttr>()) {
-    errorNYI(vd->getSourceRange(), "annotate global variable");
+    errorNYI(vd->getSourceRange(),
+             "emitGlobalVarDefinition: annotate global variable");
   }
 
   // Set CIR's linkage type as appropriate.
@@ -1610,39 +1625,6 @@ CIRGenModule::getAddrOfConstantStringFromLiteral(const 
StringLiteral *s,
   return builder.getGlobalViewAttr(ptrTy, gv);
 }
 
-LangAS CIRGenModule::getGlobalVarAddressSpace(const VarDecl *d) {
-  if (langOpts.OpenCL) {
-    LangAS as = d ? d->getType().getAddressSpace() : LangAS::opencl_global;
-    assert(as == LangAS::opencl_global || as == LangAS::opencl_global_device ||
-           as == LangAS::opencl_global_host || as == LangAS::opencl_constant ||
-           as == LangAS::opencl_local || as >= 
LangAS::FirstTargetAddressSpace);
-    return as;
-  }
-
-  if (langOpts.SYCLIsDevice &&
-      (!d || d->getType().getAddressSpace() == LangAS::Default))
-    errorNYI(d->getSourceRange(), "global as for SYCL device");
-
-  if (langOpts.CUDA && langOpts.CUDAIsDevice) {
-    if (d) {
-      if (d->hasAttr<CUDAConstantAttr>())
-        return LangAS::cuda_constant;
-      if (d->hasAttr<CUDASharedAttr>())
-        return LangAS::cuda_shared;
-      if (d->hasAttr<CUDADeviceAttr>())
-        return LangAS::cuda_device;
-      if (d->getType().isConstQualified())
-        return LangAS::cuda_constant;
-    }
-    return LangAS::cuda_device;
-  }
-
-  if (langOpts.OpenMP)
-    errorNYI(d->getSourceRange(), "global as for OpenMP");
-
-  return getTargetCIRGenInfo().getGlobalVarAddressSpace(*this, d);
-}
-
 // TODO(cir): this could be a common AST helper for both CIR and LLVM codegen.
 LangAS CIRGenModule::getLangTempAllocaAddressSpace() const {
   if (getLangOpts().OpenCL)
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h 
b/clang/lib/CIR/CodeGen/CIRGenModule.h
index d9173234868ee..52464a8bc30c4 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.h
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -359,16 +359,6 @@ class CIRGenModule : public CIRGenTypeCache {
   getAddrOfConstantStringFromLiteral(const StringLiteral *s,
                                      llvm::StringRef name = ".str");
 
-  /// Return the AST address space of the underlying global variable for D, as
-  /// determined by its declaration. Normally this is the same as the address
-  /// space of D's type, but in CUDA, address spaces are associated with
-  /// declarations, not types. If D is nullptr, return the default address
-  /// space for global variable.
-  ///
-  /// For languages without explicit address spaces, if D has default address
-  /// space, target-specific global or constant address space may be returned.
-  LangAS getGlobalVarAddressSpace(const VarDecl *d);
-
   /// Returns the address space for temporary allocations in the language. This
   /// ensures that the allocated variable's address space matches the
   /// expectations of the AST, rather than using the target's allocation 
address
diff --git a/clang/test/CIR/CodeGenCUDA/address-spaces.cu 
b/clang/test/CIR/CodeGenCUDA/address-spaces.cu
index 2eefe8fafa727..166da94fa905d 100644
--- a/clang/test/CIR/CodeGenCUDA/address-spaces.cu
+++ b/clang/test/CIR/CodeGenCUDA/address-spaces.cu
@@ -15,13 +15,32 @@
 // RUN:            -I%S/Inputs/ %s -o %t.ll
 // RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
 
+// CIR-DEVICE: cir.global "private" internal dso_local @_ZZ2fnvE1j = 
#cir.undef : !s32i {alignment = 4 : i64}
+// LLVM-DEVICE: @_ZZ2fnvE1j = internal global i32 undef, align 4
+
+__device__ int a;
+// CIR-DEVICE: cir.global external @[[DEV:.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64, cu.externally_initialized = 
#cir.cu.externally_initialized}
+// LLVM-DEVICE: @[[DEV_LD:.*]] = externally_initialized global i32 0, align 4
+// OGCG-DEVICE: @[[DEV_OD:.*]] = addrspace(1) externally_initialized global 
i32 0, align 4
+
+__shared__ int b;
+// CIR-DEVICE: cir.global external @[[SHARED:.*]] = #cir.undef : !s32i 
{alignment = 4 : i64}
+// LLVM-DEVICE: @[[SHARED_LL:.*]] = global i32 undef, align 4
+// OGCG-DEVICE: @[[SHARED_OD:.*]] = addrspace(3) global i32 undef, align 4
+
+__constant__ int c;
+// CIR-DEVICE: cir.global constant external @[[CONST:.*]] = #cir.int<0> : 
!s32i {alignment = 4 : i64, cu.externally_initialized = 
#cir.cu.externally_initialized}
+// LLVM-DEVICE: @[[CONST_LL:.*]] = externally_initialized constant i32 0, 
align 4
+// OGCG-DEVICE: @[[CONST_OD:.*]] = addrspace(4) externally_initialized 
constant i32 0, align 4
+
+// OGCG-DEVICE: @_ZZ2fnvE1j = internal addrspace(3) global i32 undef, align 4
+
 __global__ void fn() {
   int i = 0;
   __shared__ int j;
   j = i;
 }
 
-// CIR-DEVICE: cir.global "private" internal dso_local @_ZZ2fnvE1j : !s32i
 // CIR-DEVICE: cir.func {{.*}}@_Z2fnv() {{.*}} {
 // CIR-DEVICE:   %[[ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", 
init]
 // CIR-DEVICE:   %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
@@ -31,7 +50,6 @@ __global__ void fn() {
 // CIR-DEVICE:   cir.store {{.*}}%[[VAL]], %[[J]] : !s32i, !cir.ptr<!s32i>
 // CIR-DEVICE:   cir.return
 
-// LLVM-DEVICE: @_ZZ2fnvE1j = internal global i32 undef, align 4
 // LLVM-DEVICE: define dso_local void @_Z2fnv()
 // LLVM-DEVICE:   %[[ALLOCA:.*]] = alloca i32, i64 1, align 4
 // LLVM-DEVICE:   store i32 0, ptr %[[ALLOCA]], align 4
@@ -39,7 +57,6 @@ __global__ void fn() {
 // LLVM-DEVICE:   store i32 %[[VAL]], ptr @_ZZ2fnvE1j, align 4
 // LLVM-DEVICE:   ret void
 
-// OGCG-DEVICE: @_ZZ2fnvE1j = internal addrspace(3) global i32 undef, align 4
 // OGCG-DEVICE: define dso_local ptx_kernel void @_Z2fnv()
 // OGCG-DEVICE: entry:
 // OGCG-DEVICE:   %[[ALLOCA:.*]] = alloca i32, align 4
diff --git a/clang/test/CIR/CodeGenCUDA/global-vars.cu 
b/clang/test/CIR/CodeGenCUDA/global-vars.cu
deleted file mode 100644
index 4791f145d1bae..0000000000000
--- a/clang/test/CIR/CodeGenCUDA/global-vars.cu
+++ /dev/null
@@ -1,32 +0,0 @@
-#include "Inputs/cuda.h"
-
-// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
-// RUN:            -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
-// RUN:            -I%S/Inputs/ %s -o %t.cir
-// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
-
-// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
-// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
-// RUN:            -I%S/Inputs/ %s -o %t.ll
-// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
-
-// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
-// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
-// RUN:            -I%S/Inputs/ %s -o %t.ll
-// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
-
-
-__device__ int a;
-// CIR-DEVICE: cir.global external @[[DEV:.*]] = #cir.int<0> : !s32i 
{alignment = 4 : i64, cu.externally_initialized = 
#cir.cu.externally_initialized}
-// LLVM-DEVICE: @[[DEV_LD:.*]] = externally_initialized global i32 0, align 4
-// OGCG-DEVICE: @[[DEV_OD:.*]] = addrspace(1) externally_initialized global 
i32 0, align 4
-
-__shared__ int b;
-// CIR-DEVICE: cir.global external @[[SHARED:.*]] = #cir.undef : !s32i 
{alignment = 4 : i64}
-// LLVM-DEVICE: @[[SHARED_LL:.*]] = global i32 undef, align 4
-// OGCG-DEVICE: @[[SHARED_OD:.*]] = addrspace(3) global i32 undef, align 4
-
-__constant__ int c;
-// CIR-DEVICE: cir.global constant external @[[CONST:.*]] = #cir.int<0> : 
!s32i {alignment = 4 : i64, cu.externally_initialized = 
#cir.cu.externally_initialized}
-// LLVM-DEVICE: @[[CONST_LL:.*]] = externally_initialized constant i32 0, 
align 4
-// OGCG-DEVICE: @[[CONST_OD:.*]] = addrspace(4) externally_initialized 
constant i32 0, align 4

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to