https://github.com/andykaylor updated 
https://github.com/llvm/llvm-project/pull/200227

>From cc5549a71214ea7155e45607efc2b173e0f35ef6 Mon Sep 17 00:00:00 2001
From: Andy Kaylor <[email protected]>
Date: Wed, 27 May 2026 14:03:16 -0700
Subject: [PATCH 1/8] [CIR] Implement destruction of TLS and static global
 references

This implements destruction of lifetime-extended reference temporaries
used to initialize TLS or static duration reference variables.

Assisted-by: Cursor / claude-opus-4.7
---
 clang/lib/CIR/CodeGen/CIRGenExpr.cpp          |  49 +++-
 clang/lib/CIR/CodeGen/CIRGenModule.cpp        |  18 +-
 .../Dialect/Transforms/LoweringPrepare.cpp    |  15 +-
 clang/test/CIR/CodeGen/global-temp-dtor.cpp   | 220 ++++++++++++++++++
 .../test/CIR/CodeGen/self-ref-temporaries.cpp |   6 +-
 clang/test/CIR/CodeGenCXX/global-refs.cpp     |   4 +-
 6 files changed, 291 insertions(+), 21 deletions(-)
 create mode 100644 clang/test/CIR/CodeGen/global-temp-dtor.cpp

diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp 
b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index 0f40516ee3537..ba9d0b7cc9728 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -1931,8 +1931,37 @@ static void pushTemporaryCleanup(CIRGenFunction &cgf,
     if (!referenceTemporaryDtor)
       return;
 
-    cgf.cgm.errorNYI(e->getSourceRange(), "pushTemporaryCleanup: static/thread 
"
-                                          "storage duration with destructors");
+    // Classic codegen calls registerGlobalDtor here, passing either the
+    // destructor or a generated array-destroy helper. CIR handles globals with
+    // non-trivial destructors by attaching a dtor region to the cir.global op.
+    CIRGenModule &cgm = cgf.cgm;
+    auto globalOp =
+        mlir::cast<cir::GlobalOp>(cgm.getAddrOfGlobalTemporary(m, e));
+
+    CIRGenBuilderTy &builder = cgm.getBuilder();
+    mlir::OpBuilder::InsertionGuard guard(builder);
+    assert(globalOp.getDtorRegion().empty() &&
+           "global temporary already has a dtor region");
+    mlir::Block *block = builder.createBlock(&globalOp.getDtorRegion());
+    builder.setInsertionPointToStart(block);
+
+    mlir::Location loc = cgm.getLoc(m->getSourceRange());
+    mlir::Value tempAddr = builder.createGetGlobal(globalOp);
+
+    if (e->getType()->isArrayType()) {
+      // emitDestroy will produce a cir.array.dtor here. LoweringPrepare's
+      // getOrCreateDtorFunc recognizes the non-trivial dtor region and
+      // hoists it into a __cxx_global_array_dtor helper.
+      Address addr{tempAddr, cgf.convertTypeForMem(e->getType()),
+                   referenceTemporary.getAlignment()};
+      cgf.emitDestroy(addr, e->getType(), CIRGenFunction::destroyCXXObject);
+    } else {
+      GlobalDecl gd(referenceTemporaryDtor, Dtor_Complete);
+      cir::FuncOp dtorFn = cgm.getAddrAndTypeOfCXXStructor(gd).second;
+      builder.createCallOp(loc, dtorFn, mlir::ValueRange{tempAddr});
+    }
+
+    cir::YieldOp::create(builder, loc);
     break;
   }
 
@@ -1985,11 +2014,17 @@ LValue CIRGenFunction::emitMaterializeTemporaryExpr(
 
   // Create and initialize the reference temporary.
   Address object = createReferenceTemporary(*this, m, e);
-
-  if (auto var = object.getPointer().getDefiningOp<cir::GlobalOp>()) {
-    // TODO(cir): add something akin to stripPointerCasts() to ptr above
-    cgm.errorNYI(e->getSourceRange(), "emitMaterializeTemporaryExpr: 
GlobalOp");
-    return {};
+  cir::GlobalOp var = nullptr;
+  if (auto getGlobalOp = object.getPointer().getDefiningOp<cir::GetGlobalOp>())
+    var = mlir::dyn_cast_or_null<cir::GlobalOp>(
+        cgm.getGlobalValue(getGlobalOp.getName()));
+
+  if (var) {
+    if (!var.getInitialValue().has_value()) {
+      var.setInitialValueAttr(cir::ZeroAttr::get(var.getSymType()));
+      assert(!cir::MissingFeatures::pointerAuthentication());
+      emitAnyExprToMem(e, object, Qualifiers(), /*isInitializer=*/true);
+    }
   } else {
     assert(!cir::MissingFeatures::emitLifetimeMarkers());
     emitAnyExprToMem(e, object, Qualifiers(), /*isInitializer=*/true);
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp 
b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index defa5eb12d136..cd5675ffc8881 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -2934,6 +2934,16 @@ void CIRGenModule::setTLSMode(mlir::Operation *op, const 
VarDecl &d) {
   if (d.isStaticLocal() || tlm != cir::TLS_Model::GeneralDynamic)
     return;
 
+  // The wrapper/init machinery belongs to d's own user-facing cir.global.
+  // When we're setting the TLS mode on a different global whose extending
+  // declaration happens to be d (e.g. a lifetime-extended reference
+  // temporary, whose symbol is _ZGR... rather than d's mangled name), the
+  // wrapper plumbing will be attached separately when d's own cir.global
+  // is emitted; attaching it here would point the wrapper alias at the
+  // wrong global.
+  if (global.getSymName() != getMangledName(GlobalDecl(&d)))
+    return;
+
   setGlobalTlsReferences(d, global);
 }
 
@@ -3886,7 +3896,10 @@ CIRGenModule::getAddrOfGlobalTemporary(const 
MaterializeTemporaryExpr *mte,
     }
   }
   cir::GlobalOp gv = createGlobalOp(loc, name, type, isConstant);
-  gv.setInitialValueAttr(initialValue);
+  if (initialValue)
+    gv.setInitialValueAttr(initialValue);
+  gv.setLinkage(linkage);
+  gv.setVisibility(getMLIRVisibilityFromCIRLinkage(linkage));
 
   if (emitter)
     emitter->finalize(gv);
@@ -3901,8 +3914,7 @@ CIRGenModule::getAddrOfGlobalTemporary(const 
MaterializeTemporaryExpr *mte,
     errorNYI(mte->getSourceRange(),
              "Global temporary with comdat/weak linkage");
   if (varDecl->getTLSKind())
-    errorNYI(mte->getSourceRange(),
-             "Global temporary with thread local storage");
+    setTLSMode(gv, *varDecl);
   mlir::Operation *cv = gv;
 
   assert(!cir::MissingFeatures::addressSpace());
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp 
b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index fa970158058e0..731fe78eed28c 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -1597,16 +1597,19 @@ void LoweringPreparePass::lowerGlobalOp(GlobalOp op) {
       // a guard variable for them (since they cannot use the global guard), so
       // we differentiate them that way.
 
-      if (op.getDynTlsRefs()->getGuardName()) {
+      // Some TLS globals (e.g. internal-linkage lifetime-extended reference
+      // temporaries) have no wrapper at all, so there are no dyn_tls_refs to
+      // associate them with. They still need to participate in the ordered
+      // __tls_init flow, but no wrapper alias is required.
+      if (op.getDynTlsRefs() && op.getDynTlsRefs()->getGuardName()) {
         // Unordered: the alias is the function we just generated.
         initAlias = defineGlobalThreadLocalInitAlias(op, f);
       } else {
-        // Ordered: Get the __tls_init, and make the alias to that.
-        initAlias = defineGlobalThreadLocalInitAlias(op, getTlsInitFn());
-        // Ordered inits also need to get called from the __tls_init function,
-        // so we add the init function to the list, so that we can add them to
-        // it later.
+        // Ordered: add the init function to the list so __tls_init picks it
+        // up later. If a wrapper exists, also point its alias at __tls_init.
         globalThreadLocalInitializers.push_back(f);
+        if (op.getDynTlsRefs())
+          initAlias = defineGlobalThreadLocalInitAlias(op, getTlsInitFn());
       }
     } else {
       dynamicInitializers.push_back(f);
diff --git a/clang/test/CIR/CodeGen/global-temp-dtor.cpp 
b/clang/test/CIR/CodeGen/global-temp-dtor.cpp
new file mode 100644
index 0000000000000..408abda29a1c4
--- /dev/null
+++ b/clang/test/CIR/CodeGen/global-temp-dtor.cpp
@@ -0,0 +1,220 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir 
-emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2> 
%t-before.cir
+// RUN: FileCheck --input-file=%t-before.cir %s --check-prefixes=CIR-BEFORE
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir 
-emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefixes=LLVM,LLVMCIR
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -emit-llvm %s 
-o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefixes=LLVM,OGCG
+
+// Exercises lifetime-extended reference temporaries with non-trivial
+// destructors where the extending declaration has static or thread storage
+// duration, for both non-array and array temporary types.
+
+struct NonTrivial {
+  ~NonTrivial();
+  int x;
+};
+
+const NonTrivial &static_ref = NonTrivial();
+thread_local const NonTrivial &thread_ref = NonTrivial();
+
+typedef NonTrivial NonTrivialArr[2];
+
+const NonTrivialArr &static_arr_ref = NonTrivialArr{};
+thread_local const NonTrivialArr &thread_arr_ref = NonTrivialArr{};
+
+//===----------------------------------------------------------------------===//
+// CIR dialect (post-LoweringPrepare): each temporary is emitted as a private
+// internal cir.global, with its destructor registered through __cxa_atexit
+// or __cxa_thread_atexit. Arrays go through a generated
+// __cxx_global_array_dtor helper. The runtime helpers themselves are
+// declared exactly once.
+//===----------------------------------------------------------------------===//
+
+// CIR-BEFORE: cir.global external @static_ref = ctor : 
!cir.ptr<!rec_NonTrivial> {
+// CIR-BEFORE:   %[[STATIC_REF:.*]] = cir.get_global @static_ref
+// CIR-BEFORE:   %[[REF_TEMP:.*]] = cir.get_global @_ZGR10static_ref_
+// CIR-BEFORE:   %[[ZERO:.*]] = cir.const #cir.zero : !rec_NonTrivial
+// CIR-BEFORE:   cir.store{{.*}} %[[ZERO]], %[[REF_TEMP]]
+// CIR-BEFORE:   cir.store{{.*}} %[[REF_TEMP]], %[[STATIC_REF]]
+// CIR-BEFORE: }
+
+// CIR-BEFORE: cir.global "private" internal @_ZGR10static_ref_ = #cir.zero : 
!rec_NonTrivial dtor {
+// CIR-BEFORE:   %[[REF_TEMP:.*]] = cir.get_global @_ZGR10static_ref_
+// CIR-BEFORE:   cir.call @_ZN10NonTrivialD1Ev(%[[REF_TEMP]])
+// CIR-BEFORE: }
+
+// CIR: cir.global "private" internal @_ZGR10static_ref_ = #cir.zero : 
!rec_NonTrivial
+// CIR: cir.func internal private @__cxx_global_var_init.1()
+// CIR:   cir.get_global @_ZGR10static_ref_
+// CIR:   cir.get_global @_ZN10NonTrivialD1Ev
+// CIR:   cir.get_global @__dso_handle
+// CIR:   cir.call @__cxa_atexit
+
+// CIR-BEFORE: cir.global external tls_dyn dyn_tls_refs = <"_ZTW10thread_ref", 
"_ZTH10thread_ref"> @thread_ref = ctor : !cir.ptr<!rec_NonTrivial> {
+// CIR-BEFORE:   %[[THREAD_REF:.*]] = cir.get_global thread_local @thread_ref
+// CIR-BEFORE:   %[[REF_TEMP:.*]] = cir.get_global @_ZGR10thread_ref_
+// CIR-BEFORE:   %[[ZERO:.*]] = cir.const #cir.zero : !rec_NonTrivial
+// CIR-BEFORE:   cir.store{{.*}} %[[ZERO]], %[[REF_TEMP]]
+// CIR-BEFORE:   cir.store{{.*}} %[[REF_TEMP]], %[[THREAD_REF]]
+// CIR-BEFORE: }
+
+// CIR-BEFORE: cir.global "private" internal tls_dyn @_ZGR10thread_ref_ = 
#cir.zero : !rec_NonTrivial dtor {
+// CIR-BEFORE:   %[[REF_TEMP:.*]] = cir.get_global @_ZGR10thread_ref_
+// CIR-BEFORE:   cir.call @_ZN10NonTrivialD1Ev(%[[REF_TEMP]])
+// CIR-BEFORE: }
+
+// CIR: cir.global "private" internal tls_dyn @_ZGR10thread_ref_ = #cir.zero : 
!rec_NonTrivial
+// CIR: cir.func internal private @__cxx_global_var_init.3()
+// CIR:   cir.get_global @_ZGR10thread_ref_
+// CIR:   cir.get_global @_ZN10NonTrivialD1Ev
+// CIR:   cir.get_global @__dso_handle
+// CIR:   cir.call @__cxa_thread_atexit
+
+// CIR-BEFORE: cir.global external @static_arr_ref = ctor : 
!cir.ptr<!cir.array<!rec_NonTrivial x 2>> {
+// CIR-BEFORE:   %[[ARRAY_INIT_TEMP:.*]] = cir.alloca {{.*}}"arrayinit.temp"
+// CIR-BEFORE:   %[[STATIC_ARR_REF:.*]] = cir.get_global @static_arr_ref
+// CIR-BEFORE:   %[[STATIC_ARR_REF_TEMP:.*]] = cir.get_global 
@_ZGR14static_arr_ref_
+// CIR-BEFORE:   %[[DECAY:.*]] = cir.cast array_to_ptrdecay 
%[[STATIC_ARR_REF_TEMP]]
+// CIR-BEFORE:   cir.store{{.*}} %[[DECAY]], %[[ARRAY_INIT_TEMP]]
+// CIR-BEFORE:   %[[TWO:.*]] = cir.const #cir.int<2> : !s64i
+// CIR-BEFORE:   %[[NEXT:.*]] = cir.ptr_stride %[[DECAY]], %[[TWO]] : 
(!cir.ptr<!rec_NonTrivial>, !s64i) -> !cir.ptr<!rec_NonTrivial>
+// CIR-BEFORE:   cir.do {
+// CIR-BEFORE:   } while {
+// CIR-BEFORE:     cir.condition
+// CIR-BEFORE:   }
+// CIR-BEFORE:   cir.store{{.*}} %[[STATIC_ARR_REF_TEMP]], %[[STATIC_ARR_REF]] 
: !cir.ptr<!cir.array<!rec_NonTrivial x 2>>, 
!cir.ptr<!cir.ptr<!cir.array<!rec_NonTrivial x 2>>>
+// CIR-BEFORE: }
+
+// CIR-BEFORE: cir.global "private" internal @_ZGR14static_arr_ref_ = 
#cir.zero : !cir.array<!rec_NonTrivial x 2> dtor {
+// CIR-BEFORE:   %[[STATIC_ARR_REF_TEMP:.*]] = cir.get_global 
@_ZGR14static_arr_ref_
+// CIR-BEFORE:   cir.array.dtor %[[STATIC_ARR_REF_TEMP]] : 
!cir.ptr<!cir.array<!rec_NonTrivial x 2>> {
+// CIR-BEFORE:   ^bb0(%[[ELEMENT:.*]]: !cir.ptr<!rec_NonTrivial>):
+// CIR-BEFORE:     cir.call @_ZN10NonTrivialD1Ev(%[[ELEMENT]])
+// CIR-BEFORE:   }
+// CIR-BEFORE: }
+
+// CIR: cir.global "private" internal @_ZGR14static_arr_ref_ = #cir.zero : 
!cir.array<!rec_NonTrivial x 2>
+// CIR: cir.func internal private @__cxx_global_array_dtor(
+// CIR:   cir.call @_ZN10NonTrivialD1Ev
+
+// CIR: cir.func internal private @__cxx_global_var_init.5()
+// CIR:   cir.get_global @_ZGR14static_arr_ref_
+// CIR:   cir.get_global @__cxx_global_array_dtor
+// CIR:   cir.call @__cxa_atexit
+
+// CIR-BEFORE: cir.global external tls_dyn dyn_tls_refs = 
<"_ZTW14thread_arr_ref", "_ZTH14thread_arr_ref"> @thread_arr_ref = ctor : 
!cir.ptr<!cir.array<!rec_NonTrivial x 2>> {
+// CIR-BEFORE:   %[[ARRAY_INIT_TEMP:.*]] = cir.alloca {{.*}}"arrayinit.temp"
+// CIR-BEFORE:   %[[THREAD_ARR_REF:.*]] = cir.get_global thread_local 
@thread_arr_ref
+// CIR-BEFORE:   %[[THREAD_ARR_REF_TEMP:.*]] = cir.get_global 
@_ZGR14thread_arr_ref_
+// CIR-BEFORE:   %[[DECAY:.*]] = cir.cast array_to_ptrdecay 
%[[THREAD_ARR_REF_TEMP]]
+// CIR-BEFORE:   cir.store{{.*}} %[[DECAY]], %[[ARRAY_INIT_TEMP]]
+// CIR-BEFORE:   %[[TWO:.*]] = cir.const #cir.int<2> : !s64i
+// CIR-BEFORE:   %[[NEXT:.*]] = cir.ptr_stride %[[DECAY]], %[[TWO]] : 
(!cir.ptr<!rec_NonTrivial>, !s64i) -> !cir.ptr<!rec_NonTrivial>
+// CIR-BEFORE:   cir.do {
+// CIR-BEFORE:   } while {
+// CIR-BEFORE:     cir.condition
+// CIR-BEFORE:   }
+// CIR-BEFORE:   cir.store{{.*}} %[[THREAD_ARR_REF_TEMP]], %[[THREAD_ARR_REF]] 
: !cir.ptr<!cir.array<!rec_NonTrivial x 2>>, 
!cir.ptr<!cir.ptr<!cir.array<!rec_NonTrivial x 2>>>
+// CIR-BEFORE: }
+
+// CIR-BEFORE: cir.global "private" internal tls_dyn @_ZGR14thread_arr_ref_ = 
#cir.zero : !cir.array<!rec_NonTrivial x 2> dtor
+// CIR-BEFORE:   %[[THREAD_ARR_REF_TEMP:.*]] = cir.get_global 
@_ZGR14thread_arr_ref_
+// CIR-BEFORE:   cir.array.dtor %[[THREAD_ARR_REF_TEMP]] : 
!cir.ptr<!cir.array<!rec_NonTrivial x 2>> {
+// CIR-BEFORE:   ^bb0(%[[ELEMENT:.*]]: !cir.ptr<!rec_NonTrivial>):
+// CIR-BEFORE:     cir.call @_ZN10NonTrivialD1Ev(%[[ELEMENT]])
+// CIR-BEFORE:   }
+
+// CIR: cir.global "private" internal tls_dyn @_ZGR14thread_arr_ref_ = 
#cir.zero : !cir.array<!rec_NonTrivial x 2>
+// CIR: cir.func internal private @__cxx_global_array_dtor.1(
+// CIR:   cir.call @_ZN10NonTrivialD1Ev
+
+// CIR: cir.func internal private @__cxx_global_var_init.7()
+// CIR:   cir.get_global @_ZGR14thread_arr_ref_
+// CIR:   cir.get_global @__cxx_global_array_dtor.1
+// CIR:   cir.call @__cxa_thread_atexit
+
+//===----------------------------------------------------------------------===//
+// LLVM IR: the shared declarations are identical between both pipelines.
+//===----------------------------------------------------------------------===//
+
+// LLVM-DAG: @static_ref = global ptr null
+// LLVM-DAG: @_ZGR10static_ref_ = internal global %struct.NonTrivial 
zeroinitializer
+// LLVM-DAG: @thread_ref = thread_local global ptr null
+// LLVM-DAG: @_ZGR10thread_ref_ = internal thread_local global 
%struct.NonTrivial zeroinitializer
+// LLVM-DAG: @static_arr_ref = global ptr null
+// LLVM-DAG: @_ZGR14static_arr_ref_ = internal global [2 x %struct.NonTrivial] 
zeroinitializer
+// LLVM-DAG: @thread_arr_ref = thread_local global ptr null
+// LLVM-DAG: @_ZGR14thread_arr_ref_ = internal thread_local global [2 x 
%struct.NonTrivial] zeroinitializer
+
+//===----------------------------------------------------------------------===//
+// LLVM IR: function-body shape diverges between the pipelines. OGCG emits a
+// single __cxx_global_var_init per reference variable that initializes the
+// temporary, registers the cleanup and stores into the reference. CIR-lowered
+// IR splits the binding code and the cleanup registration across two
+// __cxx_global_var_init functions per reference.
+//===----------------------------------------------------------------------===//
+
+// Static, non-array.
+
+// LLVMCIR-LABEL: define internal void @__cxx_global_var_init()
+// LLVMCIR:         store %struct.NonTrivial zeroinitializer, ptr 
@_ZGR10static_ref_
+// LLVMCIR:         store ptr @_ZGR10static_ref_, ptr @static_ref
+
+// LLVMCIR-LABEL: define internal void @__cxx_global_var_init.1()
+// LLVMCIR:         call void @__cxa_atexit(ptr @_ZN10NonTrivialD1Ev, ptr 
@_ZGR10static_ref_, ptr @__dso_handle)
+
+// OGCG-LABEL: define internal void @__cxx_global_var_init()
+// OGCG:         call void @llvm.memset.{{[^(]+}}(ptr 
{{.*}}@_ZGR10static_ref_, i8 0, i64 4, i1 false)
+// OGCG:         call i32 @__cxa_atexit(ptr @_ZN10NonTrivialD1Ev, ptr 
@_ZGR10static_ref_, ptr @__dso_handle)
+// OGCG:         store ptr @_ZGR10static_ref_, ptr @static_ref
+
+// Thread, non-array.
+
+// LLVMCIR-LABEL: define internal void @__cxx_global_var_init.2()
+// LLVMCIR:         call {{.*}}ptr @llvm.threadlocal.address.p0(ptr 
{{.*}}@thread_ref)
+// LLVMCIR:         store %struct.NonTrivial zeroinitializer, ptr 
@_ZGR10thread_ref_
+// LLVMCIR:         store ptr @_ZGR10thread_ref_
+
+// LLVMCIR-LABEL: define internal void @__cxx_global_var_init.3()
+// LLVMCIR:         call void @__cxa_thread_atexit(ptr @_ZN10NonTrivialD1Ev, 
ptr @_ZGR10thread_ref_, ptr @__dso_handle)
+
+// OGCG-LABEL: define internal void @__cxx_global_var_init.1()
+// OGCG:         call void @llvm.memset.{{[^(]+}}(ptr 
{{.*}}@_ZGR10thread_ref_, i8 0, i64 4, i1 false)
+// OGCG:         call i32 @__cxa_thread_atexit(ptr @_ZN10NonTrivialD1Ev, ptr 
@_ZGR10thread_ref_, ptr @__dso_handle)
+// OGCG:         %[[THREAD_REF_ADDR:.*]] = call {{.*}}ptr 
@llvm.threadlocal.address.p0(ptr {{.*}}@thread_ref)
+// OGCG:         store ptr @_ZGR10thread_ref_, ptr %[[THREAD_REF_ADDR]]
+
+// Static, array: a generated array-destroy helper is registered with
+// __cxa_atexit instead of the destructor itself. CIR passes the array
+// pointer as the second argument; OGCG passes null and the helper hard-codes
+// the global reference.
+
+// LLVMCIR-LABEL: define internal void @__cxx_global_array_dtor(ptr {{.*}})
+// LLVMCIR:         call void @_ZN10NonTrivialD1Ev(ptr
+
+// LLVMCIR-LABEL: define internal void @__cxx_global_var_init.5()
+// LLVMCIR:         call void @__cxa_atexit(ptr @__cxx_global_array_dtor, ptr 
@_ZGR14static_arr_ref_, ptr @__dso_handle)
+
+// OGCG-LABEL: define internal void @__cxx_global_var_init.2()
+// OGCG:         call i32 @__cxa_atexit(ptr @__cxx_global_array_dtor, ptr 
null, ptr @__dso_handle)
+// OGCG:         store ptr @_ZGR14static_arr_ref_, ptr @static_arr_ref
+
+// OGCG-LABEL: define internal void @__cxx_global_array_dtor(ptr {{.*}})
+// OGCG:         call void @_ZN10NonTrivialD1Ev(ptr
+
+// Thread, array.
+
+// LLVMCIR-LABEL: define internal void @__cxx_global_array_dtor.1(ptr {{.*}})
+// LLVMCIR:         call void @_ZN10NonTrivialD1Ev(ptr
+
+// LLVMCIR-LABEL: define internal void @__cxx_global_var_init.7()
+// LLVMCIR:         call void @__cxa_thread_atexit(ptr 
@__cxx_global_array_dtor.1, ptr @_ZGR14thread_arr_ref_, ptr @__dso_handle)
+
+// OGCG-LABEL: define internal void @__cxx_global_var_init.3()
+// OGCG:         call i32 @__cxa_thread_atexit(ptr @__cxx_global_array_dtor.4, 
ptr null, ptr @__dso_handle)
+// OGCG:         %[[THREAD_ARR_ADDR:.*]] = call {{.*}}ptr 
@llvm.threadlocal.address.p0(ptr {{.*}}@thread_arr_ref)
+// OGCG:         store ptr @_ZGR14thread_arr_ref_, ptr %[[THREAD_ARR_ADDR]]
+
+// OGCG-LABEL: define internal void @__cxx_global_array_dtor.4(ptr {{.*}})
+// OGCG:         call void @_ZN10NonTrivialD1Ev(ptr
diff --git a/clang/test/CIR/CodeGen/self-ref-temporaries.cpp 
b/clang/test/CIR/CodeGen/self-ref-temporaries.cpp
index a8fb4a8bcbbe8..e1e82620d2725 100644
--- a/clang/test/CIR/CodeGen/self-ref-temporaries.cpp
+++ b/clang/test/CIR/CodeGen/self-ref-temporaries.cpp
@@ -3,7 +3,7 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | 
FileCheck %s --check-prefix=LLVM
 
   constexpr const int &normal = 42;
-// CIR: cir.global "private" constant external @_ZGR6normal_ = #cir.int<42> : 
!s32i
+// CIR: cir.global "private" constant internal @_ZGR6normal_ = #cir.int<42> : 
!s32i
 // CIR: cir.global constant external @normal = #cir.global_view<@_ZGR6normal_> 
: !cir.ptr<!s32i>
 // LLVM: @_ZGR6normal_ = {{.*}}constant i32 42, align 4
 // LLVM: @normal = constant ptr @_ZGR6normal_, align 8
@@ -13,7 +13,7 @@ struct SelfRef {
   int ints[3] = {1, 2, 3};
 };
 constexpr const SelfRef &sr = SelfRef();
-// CIR: cir.global "private" constant external @_ZGR2sr_ = 
#cir.const_record<{#cir.global_view<@_ZGR2sr_, [1 : i32]> : !cir.ptr<!s32i>, 
#cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : 
!s32i]> : !cir.array<!s32i x 3>}> 
+// CIR: cir.global "private" constant internal @_ZGR2sr_ = 
#cir.const_record<{#cir.global_view<@_ZGR2sr_, [1 : i32]> : !cir.ptr<!s32i>, 
#cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : 
!s32i]> : !cir.array<!s32i x 3>}> 
 // CIR: cir.global constant external @sr = #cir.global_view<@_ZGR2sr_> : 
!cir.ptr<!rec_SelfRef>
 // LLVM: @_ZGR2sr_ = {{.*}}constant { ptr, [3 x i32] } { ptr getelementptr 
{{.*}}(i8, ptr @_ZGR2sr_, i64 8), [3 x i32] [i32 1, i32 2, i32 3] }, align 8
 // LLVM: @sr = constant ptr @_ZGR2sr_, align 8
@@ -25,7 +25,7 @@ struct MultiSelfRef {
 };
 
 constexpr const MultiSelfRef &msr = MultiSelfRef();
-// CIR: cir.global "private" constant external @_ZGR3msr_ = 
#cir.const_record<{#cir.global_view<@_ZGR3msr_, [2 : i32]> : !cir.ptr<!s32i>, 
#cir.global_view<@_ZGR3msr_, [2 : i32]> : !cir.ptr<!s32i>, 
#cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : 
!s32i]> : !cir.array<!s32i x 3>}>
+// CIR: cir.global "private" constant internal @_ZGR3msr_ = 
#cir.const_record<{#cir.global_view<@_ZGR3msr_, [2 : i32]> : !cir.ptr<!s32i>, 
#cir.global_view<@_ZGR3msr_, [2 : i32]> : !cir.ptr<!s32i>, 
#cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : 
!s32i]> : !cir.array<!s32i x 3>}>
 // CIR: cir.global constant external @msr = #cir.global_view<@_ZGR3msr_> : 
!cir.ptr<!rec_MultiSelfRef>
 // LLVM: @_ZGR3msr_ = {{.*}}constant { ptr, ptr, [3 x i32] } { ptr 
getelementptr {{.*}}(i8, ptr @_ZGR3msr_, i64 16), ptr getelementptr {{.*}}(i8, 
ptr @_ZGR3msr_, i64 16), [3 x i32] [i32 1, i32 2, i32 3] }, align 8
 // LLVM: @msr = constant ptr @_ZGR3msr_, align 8
diff --git a/clang/test/CIR/CodeGenCXX/global-refs.cpp 
b/clang/test/CIR/CodeGenCXX/global-refs.cpp
index 50dd9432f1ef2..38cc9bf7daa46 100644
--- a/clang/test/CIR/CodeGenCXX/global-refs.cpp
+++ b/clang/test/CIR/CodeGenCXX/global-refs.cpp
@@ -26,7 +26,7 @@ int &globalIntRef = globalInt;
 // LLVM: @globalIntRef = constant ptr @globalInt, align 8
 
 const int &constGlobalIntRef = 5;
-// CIR: cir.global "private" constant external @_ZGR17constGlobalIntRef_ = 
#cir.int<5> : !s32i {alignment = 4 : i64}
+// CIR: cir.global "private" constant internal @_ZGR17constGlobalIntRef_ = 
#cir.int<5> : !s32i {alignment = 4 : i64}
 // CIR: cir.global constant external @constGlobalIntRef = 
#cir.global_view<@_ZGR17constGlobalIntRef_> : !cir.ptr<!s32i> {alignment = 8 : 
i64}
 // LLVM: @_ZGR17constGlobalIntRef_ = {{.*}}constant i32 5, align 4
 // LLVM: @constGlobalIntRef = constant ptr @_ZGR17constGlobalIntRef_, align 8
@@ -40,7 +40,7 @@ DefCtor &defCtorRef = defCtor;
 // LLVM: @defCtorRef = constant ptr @defCtor, align 8
 
 const DefCtor &constDefCtorRef{};
-// CIR: cir.global "private" constant external @_ZGR15constDefCtorRef_ = 
#cir.undef : !rec_DefCtor {alignment = 1 : i64}
+// CIR: cir.global "private" constant internal @_ZGR15constDefCtorRef_ = 
#cir.undef : !rec_DefCtor {alignment = 1 : i64}
 // CIR: cir.global constant external @constDefCtorRef = 
#cir.global_view<@_ZGR15constDefCtorRef_> : !cir.ptr<!rec_DefCtor> {alignment = 
8 : i64}
 // LLVM: @_ZGR15constDefCtorRef_ = {{.*}}constant %struct.DefCtor undef, align 
1
 // LLVM: @constDefCtorRef = constant ptr @_ZGR15constDefCtorRef_, align 8

>From c0204df6c5b41a4e3c5b85ccacb31c6fc3a83123 Mon Sep 17 00:00:00 2001
From: Andy Kaylor <[email protected]>
Date: Thu, 28 May 2026 14:07:36 -0700
Subject: [PATCH 2/8] Use parameter for setTlsMode special case handling

---
 clang/lib/CIR/CodeGen/CIRGenModule.cpp | 17 +++++++----------
 clang/lib/CIR/CodeGen/CIRGenModule.h   |  6 ++++--
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp 
b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index cd5675ffc8881..1bfe491c3a88c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -2917,7 +2917,8 @@ cir::TLS_Model CIRGenModule::getDefaultCIRTLSModel() 
const {
   llvm_unreachable("Invalid TLS model!");
 }
 
-void CIRGenModule::setTLSMode(mlir::Operation *op, const VarDecl &d) {
+void CIRGenModule::setTLSMode(mlir::Operation *op, const VarDecl &d,
+                              bool isExtendingDecl) {
   assert(d.getTLSKind() && "setting TLS mode on non-TLS var!");
 
   cir::TLS_Model tlm = getDefaultCIRTLSModel();
@@ -2934,14 +2935,10 @@ void CIRGenModule::setTLSMode(mlir::Operation *op, 
const VarDecl &d) {
   if (d.isStaticLocal() || tlm != cir::TLS_Model::GeneralDynamic)
     return;
 
-  // The wrapper/init machinery belongs to d's own user-facing cir.global.
-  // When we're setting the TLS mode on a different global whose extending
-  // declaration happens to be d (e.g. a lifetime-extended reference
-  // temporary, whose symbol is _ZGR... rather than d's mangled name), the
-  // wrapper plumbing will be attached separately when d's own cir.global
-  // is emitted; attaching it here would point the wrapper alias at the
-  // wrong global.
-  if (global.getSymName() != getMangledName(GlobalDecl(&d)))
+  // If this function was called to set the TLS mode for a temporary whose
+  // lifetime is extended by the variable declared by `d`, don't emit the
+  // wrapperm init and guard info.
+  if (isExtendingDecl)
     return;
 
   setGlobalTlsReferences(d, global);
@@ -3914,7 +3911,7 @@ CIRGenModule::getAddrOfGlobalTemporary(const 
MaterializeTemporaryExpr *mte,
     errorNYI(mte->getSourceRange(),
              "Global temporary with comdat/weak linkage");
   if (varDecl->getTLSKind())
-    setTLSMode(gv, *varDecl);
+    setTLSMode(gv, *varDecl, /*isExtendingDecl=*/true);
   mlir::Operation *cv = gv;
 
   assert(!cir::MissingFeatures::addressSpace());
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h 
b/clang/lib/CIR/CodeGen/CIRGenModule.h
index 38436fa0ea5db..a0ddd89bc67c2 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.h
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -626,8 +626,10 @@ class CIRGenModule : public CIRGenTypeCache {
   void setGVPropertiesAux(mlir::Operation *op, const NamedDecl *d) const;
 
   /// Set TLS mode for the given operation based on the given variable
-  /// declaration.
-  void setTLSMode(mlir::Operation *op, const VarDecl &d);
+  /// declaration. If `isExtendingDecl` is true, then the operation is a
+  /// temporary whose lifetime is extended by the variable declared by `d`.
+  void setTLSMode(mlir::Operation *op, const VarDecl &d,
+                  bool isExtendingDecl = false);
 
   /// Get TLS mode from CodeGenOptions.
   cir::TLS_Model getDefaultCIRTLSModel() const;

>From 1b7a76bcb276f5a0b3eaea753d5126318a6951ad Mon Sep 17 00:00:00 2001
From: Andy Kaylor <[email protected]>
Date: Thu, 28 May 2026 15:15:56 -0700
Subject: [PATCH 3/8] Added interesting ctor in test, better init checks

---
 clang/test/CIR/CodeGen/global-temp-dtor.cpp | 111 +++++++++++++++-----
 1 file changed, 84 insertions(+), 27 deletions(-)

diff --git a/clang/test/CIR/CodeGen/global-temp-dtor.cpp 
b/clang/test/CIR/CodeGen/global-temp-dtor.cpp
index 408abda29a1c4..485bd529fc275 100644
--- a/clang/test/CIR/CodeGen/global-temp-dtor.cpp
+++ b/clang/test/CIR/CodeGen/global-temp-dtor.cpp
@@ -11,6 +11,7 @@
 // duration, for both non-array and array temporary types.
 
 struct NonTrivial {
+  NonTrivial();
   ~NonTrivial();
   int x;
 };
@@ -34,8 +35,7 @@ thread_local const NonTrivialArr &thread_arr_ref = 
NonTrivialArr{};
 // CIR-BEFORE: cir.global external @static_ref = ctor : 
!cir.ptr<!rec_NonTrivial> {
 // CIR-BEFORE:   %[[STATIC_REF:.*]] = cir.get_global @static_ref
 // CIR-BEFORE:   %[[REF_TEMP:.*]] = cir.get_global @_ZGR10static_ref_
-// CIR-BEFORE:   %[[ZERO:.*]] = cir.const #cir.zero : !rec_NonTrivial
-// CIR-BEFORE:   cir.store{{.*}} %[[ZERO]], %[[REF_TEMP]]
+// CIR-BEFORE:   cir.call @_ZN10NonTrivialC1Ev(%[[REF_TEMP]])
 // CIR-BEFORE:   cir.store{{.*}} %[[REF_TEMP]], %[[STATIC_REF]]
 // CIR-BEFORE: }
 
@@ -54,8 +54,7 @@ thread_local const NonTrivialArr &thread_arr_ref = 
NonTrivialArr{};
 // CIR-BEFORE: cir.global external tls_dyn dyn_tls_refs = <"_ZTW10thread_ref", 
"_ZTH10thread_ref"> @thread_ref = ctor : !cir.ptr<!rec_NonTrivial> {
 // CIR-BEFORE:   %[[THREAD_REF:.*]] = cir.get_global thread_local @thread_ref
 // CIR-BEFORE:   %[[REF_TEMP:.*]] = cir.get_global @_ZGR10thread_ref_
-// CIR-BEFORE:   %[[ZERO:.*]] = cir.const #cir.zero : !rec_NonTrivial
-// CIR-BEFORE:   cir.store{{.*}} %[[ZERO]], %[[REF_TEMP]]
+// CIR-BEFORE:   cir.call @_ZN10NonTrivialC1Ev(%[[REF_TEMP]])
 // CIR-BEFORE:   cir.store{{.*}} %[[REF_TEMP]], %[[THREAD_REF]]
 // CIR-BEFORE: }
 
@@ -80,6 +79,7 @@ thread_local const NonTrivialArr &thread_arr_ref = 
NonTrivialArr{};
 // CIR-BEFORE:   %[[TWO:.*]] = cir.const #cir.int<2> : !s64i
 // CIR-BEFORE:   %[[NEXT:.*]] = cir.ptr_stride %[[DECAY]], %[[TWO]] : 
(!cir.ptr<!rec_NonTrivial>, !s64i) -> !cir.ptr<!rec_NonTrivial>
 // CIR-BEFORE:   cir.do {
+// CIR-BEFORE:     cir.call @_ZN10NonTrivialC1Ev
 // CIR-BEFORE:   } while {
 // CIR-BEFORE:     cir.condition
 // CIR-BEFORE:   }
@@ -96,7 +96,11 @@ thread_local const NonTrivialArr &thread_arr_ref = 
NonTrivialArr{};
 
 // CIR: cir.global "private" internal @_ZGR14static_arr_ref_ = #cir.zero : 
!cir.array<!rec_NonTrivial x 2>
 // CIR: cir.func internal private @__cxx_global_array_dtor(
-// CIR:   cir.call @_ZN10NonTrivialD1Ev
+// CIR:   cir.do {
+// CIR:     cir.call @_ZN10NonTrivialD1Ev
+// CIR:   } while {
+// CIR:     cir.condition
+// CIR:   }
 
 // CIR: cir.func internal private @__cxx_global_var_init.5()
 // CIR:   cir.get_global @_ZGR14static_arr_ref_
@@ -112,6 +116,7 @@ thread_local const NonTrivialArr &thread_arr_ref = 
NonTrivialArr{};
 // CIR-BEFORE:   %[[TWO:.*]] = cir.const #cir.int<2> : !s64i
 // CIR-BEFORE:   %[[NEXT:.*]] = cir.ptr_stride %[[DECAY]], %[[TWO]] : 
(!cir.ptr<!rec_NonTrivial>, !s64i) -> !cir.ptr<!rec_NonTrivial>
 // CIR-BEFORE:   cir.do {
+// CIR-BEFORE:     cir.call @_ZN10NonTrivialC1Ev
 // CIR-BEFORE:   } while {
 // CIR-BEFORE:     cir.condition
 // CIR-BEFORE:   }
@@ -127,7 +132,11 @@ thread_local const NonTrivialArr &thread_arr_ref = 
NonTrivialArr{};
 
 // CIR: cir.global "private" internal tls_dyn @_ZGR14thread_arr_ref_ = 
#cir.zero : !cir.array<!rec_NonTrivial x 2>
 // CIR: cir.func internal private @__cxx_global_array_dtor.1(
-// CIR:   cir.call @_ZN10NonTrivialD1Ev
+// CIR:   cir.do {
+// CIR:     cir.call @_ZN10NonTrivialD1Ev
+// CIR:   } while {
+// CIR:     cir.condition
+// CIR:   }
 
 // CIR: cir.func internal private @__cxx_global_var_init.7()
 // CIR:   cir.get_global @_ZGR14thread_arr_ref_
@@ -157,64 +166,112 @@ thread_local const NonTrivialArr &thread_arr_ref = 
NonTrivialArr{};
 
 // Static, non-array.
 
-// LLVMCIR-LABEL: define internal void @__cxx_global_var_init()
-// LLVMCIR:         store %struct.NonTrivial zeroinitializer, ptr 
@_ZGR10static_ref_
-// LLVMCIR:         store ptr @_ZGR10static_ref_, ptr @static_ref
+// LLVM-LABEL: define internal void @__cxx_global_var_init()
+// LLVM:         call void @_ZN10NonTrivialC1Ev(ptr {{.*}} @_ZGR10static_ref_)
+// OGCG:         call i32 @__cxa_atexit(ptr @_ZN10NonTrivialD1Ev, ptr 
@_ZGR10static_ref_, ptr @__dso_handle)
+// LLVM:         store ptr @_ZGR10static_ref_, ptr @static_ref
 
 // LLVMCIR-LABEL: define internal void @__cxx_global_var_init.1()
 // LLVMCIR:         call void @__cxa_atexit(ptr @_ZN10NonTrivialD1Ev, ptr 
@_ZGR10static_ref_, ptr @__dso_handle)
 
-// OGCG-LABEL: define internal void @__cxx_global_var_init()
-// OGCG:         call void @llvm.memset.{{[^(]+}}(ptr 
{{.*}}@_ZGR10static_ref_, i8 0, i64 4, i1 false)
-// OGCG:         call i32 @__cxa_atexit(ptr @_ZN10NonTrivialD1Ev, ptr 
@_ZGR10static_ref_, ptr @__dso_handle)
-// OGCG:         store ptr @_ZGR10static_ref_, ptr @static_ref
-
 // Thread, non-array.
 
-// LLVMCIR-LABEL: define internal void @__cxx_global_var_init.2()
-// LLVMCIR:         call {{.*}}ptr @llvm.threadlocal.address.p0(ptr 
{{.*}}@thread_ref)
-// LLVMCIR:         store %struct.NonTrivial zeroinitializer, ptr 
@_ZGR10thread_ref_
-// LLVMCIR:         store ptr @_ZGR10thread_ref_
+// LLVM-LABEL: define internal void @__cxx_global_var_init{{.*}}()
+// LLVMCIR:         %[[TLS_ADDR:.*]] = call {{.*}}ptr 
@llvm.threadlocal.address.p0(ptr {{.*}}@thread_ref)
+// LLVM:            call void @_ZN10NonTrivialC1Ev(ptr {{.*}} 
@_ZGR10thread_ref_)
+// OGCG:            call i32 @__cxa_thread_atexit(ptr @_ZN10NonTrivialD1Ev, 
ptr @_ZGR10thread_ref_, ptr @__dso_handle)
+// OGCG:            %[[TLS_ADDR:.*]] = call {{.*}}ptr 
@llvm.threadlocal.address.p0(ptr {{.*}}@thread_ref)
+// LLVM:            store ptr @_ZGR10thread_ref_, ptr %[[TLS_ADDR]]
 
 // LLVMCIR-LABEL: define internal void @__cxx_global_var_init.3()
 // LLVMCIR:         call void @__cxa_thread_atexit(ptr @_ZN10NonTrivialD1Ev, 
ptr @_ZGR10thread_ref_, ptr @__dso_handle)
 
-// OGCG-LABEL: define internal void @__cxx_global_var_init.1()
-// OGCG:         call void @llvm.memset.{{[^(]+}}(ptr 
{{.*}}@_ZGR10thread_ref_, i8 0, i64 4, i1 false)
-// OGCG:         call i32 @__cxa_thread_atexit(ptr @_ZN10NonTrivialD1Ev, ptr 
@_ZGR10thread_ref_, ptr @__dso_handle)
-// OGCG:         %[[THREAD_REF_ADDR:.*]] = call {{.*}}ptr 
@llvm.threadlocal.address.p0(ptr {{.*}}@thread_ref)
-// OGCG:         store ptr @_ZGR10thread_ref_, ptr %[[THREAD_REF_ADDR]]
+// Static, array.
 
-// Static, array: a generated array-destroy helper is registered with
-// __cxa_atexit instead of the destructor itself. CIR passes the array
-// pointer as the second argument; OGCG passes null and the helper hard-codes
-// the global reference.
+// LLVMCIR-LABEL: define internal void @__cxx_global_var_init.4()
+// LLVMCIR:       [[LOOP_CONDITION_BLOCK:.*]]:
+// LLVMCIR:         %[[DONE:.*]] = icmp ne ptr
+// LLVMCIR:         br i1 %[[DONE]], label %[[LOOP_BODY_BLOCK:.*]], label 
%[[LOOP_EXIT_BLOCK:.*]]
+// LLVMCIR:       [[LOOP_BODY_BLOCK]]:
+// LLVMCIR:         call void @_ZN10NonTrivialC1Ev
+// LLVMCIR:         br label %[[LOOP_CONDITION_BLOCK]]
+// LLVMCIR:       [[LOOP_EXIT_BLOCK]]:
+// LLVMCIR:         store ptr @_ZGR14static_arr_ref_, ptr @static_arr_ref
 
 // LLVMCIR-LABEL: define internal void @__cxx_global_array_dtor(ptr {{.*}})
+// LLVMCIR:       [[LOOP_CONDITION_BLOCK:.*]]:
+// LLVMCIR:         %[[DONE:.*]] = icmp ne ptr
+// LLVMCIR:         br i1 %[[DONE]], label %[[LOOP_BODY_BLOCK:.*]], label 
%[[LOOP_EXIT_BLOCK:.*]]
+// LLVMCIR:       [[LOOP_BODY_BLOCK]]:
 // LLVMCIR:         call void @_ZN10NonTrivialD1Ev(ptr
+// LLVMCIR:         br label %[[LOOP_CONDITION_BLOCK]]
+// LLVMCIR:       [[LOOP_EXIT_BLOCK]]:
 
 // LLVMCIR-LABEL: define internal void @__cxx_global_var_init.5()
 // LLVMCIR:         call void @__cxa_atexit(ptr @__cxx_global_array_dtor, ptr 
@_ZGR14static_arr_ref_, ptr @__dso_handle)
 
 // OGCG-LABEL: define internal void @__cxx_global_var_init.2()
+// OGCG:       [[ENTRY:.*]]:
+// OGCG:         br label %[[LOOP_BODY_BLOCK:.*]]
+// OGCG:       [[LOOP_BODY_BLOCK]]:
+// OGCG:         call void @_ZN10NonTrivialC1Ev
+// OGCG:         %[[DONE:.*]] = icmp eq ptr
+// OGCG:         br i1 %[[DONE]], label %[[LOOP_EXIT_BLOCK:.*]], label 
%[[LOOP_BODY_BLOCK]]
+// OGCG:       [[LOOP_EXIT_BLOCK]]:
 // OGCG:         call i32 @__cxa_atexit(ptr @__cxx_global_array_dtor, ptr 
null, ptr @__dso_handle)
 // OGCG:         store ptr @_ZGR14static_arr_ref_, ptr @static_arr_ref
 
 // OGCG-LABEL: define internal void @__cxx_global_array_dtor(ptr {{.*}})
+// OGCG:       [[ENTRY:.*]]:
+// OGCG:         br label %[[LOOP_BODY_BLOCK:.*]]
+// OGCG:       [[LOOP_BODY_BLOCK:.*]]:
 // OGCG:         call void @_ZN10NonTrivialD1Ev(ptr
+// OGCG:         %[[DONE:.*]] = icmp eq ptr
+// OGCG:         br i1 %[[DONE]], label %[[LOOP_EXIT_BLOCK:.*]], label 
%[[LOOP_BODY_BLOCK]]
+// OGCG:       [[LOOP_EXIT_BLOCK]]:
 
 // Thread, array.
 
+// LLVMCIR-LABEL: define internal void @__cxx_global_var_init.6()
+// LLVMCIR:         %[[THREAD_ARR_REF:.*]] = call ptr @_ZTW14thread_arr_ref()
+// LLVMCIR:       [[LOOP_CONDITION_BLOCK:.*]]:
+// LLVMCIR:         %[[DONE:.*]] = icmp ne ptr
+// LLVMCIR:         br i1 %[[DONE]], label %[[LOOP_BODY_BLOCK:.*]], label 
%[[LOOP_EXIT_BLOCK:.*]]
+// LLVMCIR:       [[LOOP_BODY_BLOCK]]:
+// LLVMCIR:         call void @_ZN10NonTrivialC1Ev
+// LLVMCIR:         br label %[[LOOP_CONDITION_BLOCK]]
+// LLVMCIR:       [[LOOP_EXIT_BLOCK]]:
+// LLVMCIR:         store ptr @_ZGR14thread_arr_ref_, ptr %[[THREAD_ARR_REF]]
+
 // LLVMCIR-LABEL: define internal void @__cxx_global_array_dtor.1(ptr {{.*}})
+// LLVMCIR:       [[LOOP_CONDITION_BLOCK:.*]]:
+// LLVMCIR:         %[[DONE:.*]] = icmp ne ptr
+// LLVMCIR:         br i1 %[[DONE]], label %[[LOOP_BODY_BLOCK:.*]], label 
%[[LOOP_EXIT_BLOCK:.*]]
+// LLVMCIR:       [[LOOP_BODY_BLOCK]]:
 // LLVMCIR:         call void @_ZN10NonTrivialD1Ev(ptr
+// LLVMCIR:         br label %[[LOOP_CONDITION_BLOCK]]
+// LLVMCIR:       [[LOOP_EXIT_BLOCK]]:
 
 // LLVMCIR-LABEL: define internal void @__cxx_global_var_init.7()
 // LLVMCIR:         call void @__cxa_thread_atexit(ptr 
@__cxx_global_array_dtor.1, ptr @_ZGR14thread_arr_ref_, ptr @__dso_handle)
 
 // OGCG-LABEL: define internal void @__cxx_global_var_init.3()
+// OGCG:       [[ENTRY:.*]]:
+// OGCG:         br label %[[LOOP_BODY_BLOCK:.*]]
+// OGCG:       [[LOOP_BODY_BLOCK]]:
+// OGCG:         call void @_ZN10NonTrivialC1Ev
+// OGCG:         %[[DONE:.*]] = icmp eq ptr
+// OGCG:         br i1 %[[DONE]], label %[[LOOP_EXIT_BLOCK:.*]], label 
%[[LOOP_BODY_BLOCK]]
+// OGCG:       [[LOOP_EXIT_BLOCK]]:
 // OGCG:         call i32 @__cxa_thread_atexit(ptr @__cxx_global_array_dtor.4, 
ptr null, ptr @__dso_handle)
 // OGCG:         %[[THREAD_ARR_ADDR:.*]] = call {{.*}}ptr 
@llvm.threadlocal.address.p0(ptr {{.*}}@thread_arr_ref)
 // OGCG:         store ptr @_ZGR14thread_arr_ref_, ptr %[[THREAD_ARR_ADDR]]
 
 // OGCG-LABEL: define internal void @__cxx_global_array_dtor.4(ptr {{.*}})
+// OGCG:       [[ENTRY:.*]]:
+// OGCG:         br label %[[LOOP_BODY_BLOCK:.*]]
+// OGCG:       [[LOOP_BODY_BLOCK:.*]]:
 // OGCG:         call void @_ZN10NonTrivialD1Ev(ptr
+// OGCG:         %[[DONE:.*]] = icmp eq ptr
+// OGCG:         br i1 %[[DONE]], label %[[LOOP_EXIT_BLOCK:.*]], label 
%[[LOOP_BODY_BLOCK]]
+// OGCG:       [[LOOP_EXIT_BLOCK]]:

>From 3794d54049491819126f894053aa6961f760a6f8 Mon Sep 17 00:00:00 2001
From: Andy Kaylor <[email protected]>
Date: Mon, 1 Jun 2026 11:24:37 -0700
Subject: [PATCH 4/8] Move reference temporary destruction to the main object
 dtor

---
 clang/lib/CIR/CodeGen/CIRGenExpr.cpp        |  14 +-
 clang/test/CIR/CodeGen/global-temp-dtor.cpp | 144 +++++++++-----------
 2 files changed, 77 insertions(+), 81 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp 
b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index ba9d0b7cc9728..a31aa6f78374c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -1938,11 +1938,19 @@ static void pushTemporaryCleanup(CIRGenFunction &cgf,
     auto globalOp =
         mlir::cast<cir::GlobalOp>(cgm.getAddrOfGlobalTemporary(m, e));
 
+    // The destruction of the reference temporary is done in the dtor
+    // region of the global object it is associated with.
+    const auto *extendingDecl = cast<VarDecl>(m->getExtendingDecl());
+    cir::GlobalOp extendingGlobalOp =
+        cgm.getOrCreateCIRGlobal(extendingDecl, /*ty=*/nullptr,
+                                 NotForDefinition);
+
     CIRGenBuilderTy &builder = cgm.getBuilder();
     mlir::OpBuilder::InsertionGuard guard(builder);
-    assert(globalOp.getDtorRegion().empty() &&
-           "global temporary already has a dtor region");
-    mlir::Block *block = builder.createBlock(&globalOp.getDtorRegion());
+    assert(extendingGlobalOp.getDtorRegion().empty() &&
+           "extending global already has a dtor region");
+    mlir::Block *block =
+        builder.createBlock(&extendingGlobalOp.getDtorRegion());
     builder.setInsertionPointToStart(block);
 
     mlir::Location loc = cgm.getLoc(m->getSourceRange());
diff --git a/clang/test/CIR/CodeGen/global-temp-dtor.cpp 
b/clang/test/CIR/CodeGen/global-temp-dtor.cpp
index 485bd529fc275..ca13c8db2fca3 100644
--- a/clang/test/CIR/CodeGen/global-temp-dtor.cpp
+++ b/clang/test/CIR/CodeGen/global-temp-dtor.cpp
@@ -24,51 +24,51 @@ typedef NonTrivial NonTrivialArr[2];
 const NonTrivialArr &static_arr_ref = NonTrivialArr{};
 thread_local const NonTrivialArr &thread_arr_ref = NonTrivialArr{};
 
-//===----------------------------------------------------------------------===//
-// CIR dialect (post-LoweringPrepare): each temporary is emitted as a private
-// internal cir.global, with its destructor registered through __cxa_atexit
-// or __cxa_thread_atexit. Arrays go through a generated
-// __cxx_global_array_dtor helper. The runtime helpers themselves are
-// declared exactly once.
-//===----------------------------------------------------------------------===//
-
 // CIR-BEFORE: cir.global external @static_ref = ctor : 
!cir.ptr<!rec_NonTrivial> {
 // CIR-BEFORE:   %[[STATIC_REF:.*]] = cir.get_global @static_ref
 // CIR-BEFORE:   %[[REF_TEMP:.*]] = cir.get_global @_ZGR10static_ref_
 // CIR-BEFORE:   cir.call @_ZN10NonTrivialC1Ev(%[[REF_TEMP]])
 // CIR-BEFORE:   cir.store{{.*}} %[[REF_TEMP]], %[[STATIC_REF]]
+// CIR-BEFORE: } dtor {
+// CIR-BEFORE:   %[[REF_TEMP_DTOR:.*]] = cir.get_global @_ZGR10static_ref_
+// CIR-BEFORE:   cir.call @_ZN10NonTrivialD1Ev(%[[REF_TEMP_DTOR]])
 // CIR-BEFORE: }
+// CIR-BEFORE: cir.global "private" internal @_ZGR10static_ref_ = #cir.zero : 
!rec_NonTrivial
 
-// CIR-BEFORE: cir.global "private" internal @_ZGR10static_ref_ = #cir.zero : 
!rec_NonTrivial dtor {
-// CIR-BEFORE:   %[[REF_TEMP:.*]] = cir.get_global @_ZGR10static_ref_
-// CIR-BEFORE:   cir.call @_ZN10NonTrivialD1Ev(%[[REF_TEMP]])
-// CIR-BEFORE: }
-
-// CIR: cir.global "private" internal @_ZGR10static_ref_ = #cir.zero : 
!rec_NonTrivial
-// CIR: cir.func internal private @__cxx_global_var_init.1()
+// CIR: cir.global external @static_ref = #cir.ptr<null> : 
!cir.ptr<!rec_NonTrivial>
+// CIR: cir.func internal private @__cxx_global_var_init()
+// CIR:   cir.get_global @static_ref
+// CIR:   cir.get_global @_ZGR10static_ref_
+// CIR:   cir.call @_ZN10NonTrivialC1Ev
+// CIR:   cir.store
 // CIR:   cir.get_global @_ZGR10static_ref_
 // CIR:   cir.get_global @_ZN10NonTrivialD1Ev
 // CIR:   cir.get_global @__dso_handle
 // CIR:   cir.call @__cxa_atexit
+// CIR: cir.global "private" internal @_ZGR10static_ref_ = #cir.zero : 
!rec_NonTrivial
 
 // CIR-BEFORE: cir.global external tls_dyn dyn_tls_refs = <"_ZTW10thread_ref", 
"_ZTH10thread_ref"> @thread_ref = ctor : !cir.ptr<!rec_NonTrivial> {
 // CIR-BEFORE:   %[[THREAD_REF:.*]] = cir.get_global thread_local @thread_ref
 // CIR-BEFORE:   %[[REF_TEMP:.*]] = cir.get_global @_ZGR10thread_ref_
 // CIR-BEFORE:   cir.call @_ZN10NonTrivialC1Ev(%[[REF_TEMP]])
 // CIR-BEFORE:   cir.store{{.*}} %[[REF_TEMP]], %[[THREAD_REF]]
+// CIR-BEFORE: } dtor {
+// CIR-BEFORE:   %[[REF_TEMP_DTOR:.*]] = cir.get_global @_ZGR10thread_ref_
+// CIR-BEFORE:   cir.call @_ZN10NonTrivialD1Ev(%[[REF_TEMP_DTOR]])
 // CIR-BEFORE: }
+// CIR-BEFORE: cir.global "private" internal tls_dyn @_ZGR10thread_ref_ = 
#cir.zero : !rec_NonTrivial
 
-// CIR-BEFORE: cir.global "private" internal tls_dyn @_ZGR10thread_ref_ = 
#cir.zero : !rec_NonTrivial dtor {
-// CIR-BEFORE:   %[[REF_TEMP:.*]] = cir.get_global @_ZGR10thread_ref_
-// CIR-BEFORE:   cir.call @_ZN10NonTrivialD1Ev(%[[REF_TEMP]])
-// CIR-BEFORE: }
-
-// CIR: cir.global "private" internal tls_dyn @_ZGR10thread_ref_ = #cir.zero : 
!rec_NonTrivial
-// CIR: cir.func internal private @__cxx_global_var_init.3()
+// CIR: cir.global external tls_dyn dyn_tls_refs = <"_ZTW10thread_ref", 
"_ZTH10thread_ref"> @thread_ref = #cir.ptr<null> : !cir.ptr<!rec_NonTrivial>
+// CIR: cir.func internal private @__cxx_global_var_init.1()
+// CIR:   cir.get_global thread_local @thread_ref
+// CIR:   cir.get_global @_ZGR10thread_ref_
+// CIR:   cir.call @_ZN10NonTrivialC1Ev
+// CIR:   cir.store
 // CIR:   cir.get_global @_ZGR10thread_ref_
 // CIR:   cir.get_global @_ZN10NonTrivialD1Ev
 // CIR:   cir.get_global @__dso_handle
 // CIR:   cir.call @__cxa_thread_atexit
+// CIR: cir.global "private" internal tls_dyn @_ZGR10thread_ref_ = #cir.zero : 
!rec_NonTrivial
 
 // CIR-BEFORE: cir.global external @static_arr_ref = ctor : 
!cir.ptr<!cir.array<!rec_NonTrivial x 2>> {
 // CIR-BEFORE:   %[[ARRAY_INIT_TEMP:.*]] = cir.alloca {{.*}}"arrayinit.temp"
@@ -84,28 +84,29 @@ thread_local const NonTrivialArr &thread_arr_ref = 
NonTrivialArr{};
 // CIR-BEFORE:     cir.condition
 // CIR-BEFORE:   }
 // CIR-BEFORE:   cir.store{{.*}} %[[STATIC_ARR_REF_TEMP]], %[[STATIC_ARR_REF]] 
: !cir.ptr<!cir.array<!rec_NonTrivial x 2>>, 
!cir.ptr<!cir.ptr<!cir.array<!rec_NonTrivial x 2>>>
-// CIR-BEFORE: }
-
-// CIR-BEFORE: cir.global "private" internal @_ZGR14static_arr_ref_ = 
#cir.zero : !cir.array<!rec_NonTrivial x 2> dtor {
-// CIR-BEFORE:   %[[STATIC_ARR_REF_TEMP:.*]] = cir.get_global 
@_ZGR14static_arr_ref_
-// CIR-BEFORE:   cir.array.dtor %[[STATIC_ARR_REF_TEMP]] : 
!cir.ptr<!cir.array<!rec_NonTrivial x 2>> {
+// CIR-BEFORE: } dtor {
+// CIR-BEFORE:   %[[STATIC_ARR_REF_TEMP_DTOR:.*]] = cir.get_global 
@_ZGR14static_arr_ref_
+// CIR-BEFORE:   cir.array.dtor %[[STATIC_ARR_REF_TEMP_DTOR]] : 
!cir.ptr<!cir.array<!rec_NonTrivial x 2>> {
 // CIR-BEFORE:   ^bb0(%[[ELEMENT:.*]]: !cir.ptr<!rec_NonTrivial>):
 // CIR-BEFORE:     cir.call @_ZN10NonTrivialD1Ev(%[[ELEMENT]])
 // CIR-BEFORE:   }
 // CIR-BEFORE: }
+// CIR-BEFORE: cir.global "private" internal @_ZGR14static_arr_ref_ = 
#cir.zero : !cir.array<!rec_NonTrivial x 2>
 
-// CIR: cir.global "private" internal @_ZGR14static_arr_ref_ = #cir.zero : 
!cir.array<!rec_NonTrivial x 2>
+// CIR: cir.global external @static_arr_ref = #cir.ptr<null> : 
!cir.ptr<!cir.array<!rec_NonTrivial x 2>>
 // CIR: cir.func internal private @__cxx_global_array_dtor(
 // CIR:   cir.do {
 // CIR:     cir.call @_ZN10NonTrivialD1Ev
 // CIR:   } while {
 // CIR:     cir.condition
 // CIR:   }
-
-// CIR: cir.func internal private @__cxx_global_var_init.5()
-// CIR:   cir.get_global @_ZGR14static_arr_ref_
+// CIR: cir.func internal private @__cxx_global_var_init.2()
+// CIR:   cir.call @_ZN10NonTrivialC1Ev
+// CIR:   cir.store
 // CIR:   cir.get_global @__cxx_global_array_dtor
+// CIR:   cir.get_global @__dso_handle
 // CIR:   cir.call @__cxa_atexit
+// CIR: cir.global "private" internal @_ZGR14static_arr_ref_ = #cir.zero : 
!cir.array<!rec_NonTrivial x 2>
 
 // CIR-BEFORE: cir.global external tls_dyn dyn_tls_refs = 
<"_ZTW14thread_arr_ref", "_ZTH14thread_arr_ref"> @thread_arr_ref = ctor : 
!cir.ptr<!cir.array<!rec_NonTrivial x 2>> {
 // CIR-BEFORE:   %[[ARRAY_INIT_TEMP:.*]] = cir.alloca {{.*}}"arrayinit.temp"
@@ -121,31 +122,29 @@ thread_local const NonTrivialArr &thread_arr_ref = 
NonTrivialArr{};
 // CIR-BEFORE:     cir.condition
 // CIR-BEFORE:   }
 // CIR-BEFORE:   cir.store{{.*}} %[[THREAD_ARR_REF_TEMP]], %[[THREAD_ARR_REF]] 
: !cir.ptr<!cir.array<!rec_NonTrivial x 2>>, 
!cir.ptr<!cir.ptr<!cir.array<!rec_NonTrivial x 2>>>
-// CIR-BEFORE: }
-
-// CIR-BEFORE: cir.global "private" internal tls_dyn @_ZGR14thread_arr_ref_ = 
#cir.zero : !cir.array<!rec_NonTrivial x 2> dtor
-// CIR-BEFORE:   %[[THREAD_ARR_REF_TEMP:.*]] = cir.get_global 
@_ZGR14thread_arr_ref_
-// CIR-BEFORE:   cir.array.dtor %[[THREAD_ARR_REF_TEMP]] : 
!cir.ptr<!cir.array<!rec_NonTrivial x 2>> {
+// CIR-BEFORE: } dtor {
+// CIR-BEFORE:   %[[THREAD_ARR_REF_TEMP_DTOR:.*]] = cir.get_global 
@_ZGR14thread_arr_ref_
+// CIR-BEFORE:   cir.array.dtor %[[THREAD_ARR_REF_TEMP_DTOR]] : 
!cir.ptr<!cir.array<!rec_NonTrivial x 2>> {
 // CIR-BEFORE:   ^bb0(%[[ELEMENT:.*]]: !cir.ptr<!rec_NonTrivial>):
 // CIR-BEFORE:     cir.call @_ZN10NonTrivialD1Ev(%[[ELEMENT]])
 // CIR-BEFORE:   }
+// CIR-BEFORE: }
+// CIR-BEFORE: cir.global "private" internal tls_dyn @_ZGR14thread_arr_ref_ = 
#cir.zero : !cir.array<!rec_NonTrivial x 2>
 
-// CIR: cir.global "private" internal tls_dyn @_ZGR14thread_arr_ref_ = 
#cir.zero : !cir.array<!rec_NonTrivial x 2>
+// CIR: cir.global external tls_dyn dyn_tls_refs = <"_ZTW14thread_arr_ref", 
"_ZTH14thread_arr_ref"> @thread_arr_ref = #cir.ptr<null> : 
!cir.ptr<!cir.array<!rec_NonTrivial x 2>>
 // CIR: cir.func internal private @__cxx_global_array_dtor.1(
 // CIR:   cir.do {
 // CIR:     cir.call @_ZN10NonTrivialD1Ev
 // CIR:   } while {
 // CIR:     cir.condition
 // CIR:   }
-
-// CIR: cir.func internal private @__cxx_global_var_init.7()
-// CIR:   cir.get_global @_ZGR14thread_arr_ref_
+// CIR: cir.func internal private @__cxx_global_var_init.3()
+// CIR:   cir.call @_ZN10NonTrivialC1Ev
+// CIR:   cir.store
 // CIR:   cir.get_global @__cxx_global_array_dtor.1
+// CIR:   cir.get_global @__dso_handle
 // CIR:   cir.call @__cxa_thread_atexit
-
-//===----------------------------------------------------------------------===//
-// LLVM IR: the shared declarations are identical between both pipelines.
-//===----------------------------------------------------------------------===//
+// CIR: cir.global "private" internal tls_dyn @_ZGR14thread_arr_ref_ = 
#cir.zero : !cir.array<!rec_NonTrivial x 2>
 
 // LLVM-DAG: @static_ref = global ptr null
 // LLVM-DAG: @_ZGR10static_ref_ = internal global %struct.NonTrivial 
zeroinitializer
@@ -156,58 +155,49 @@ thread_local const NonTrivialArr &thread_arr_ref = 
NonTrivialArr{};
 // LLVM-DAG: @thread_arr_ref = thread_local global ptr null
 // LLVM-DAG: @_ZGR14thread_arr_ref_ = internal thread_local global [2 x 
%struct.NonTrivial] zeroinitializer
 
-//===----------------------------------------------------------------------===//
-// LLVM IR: function-body shape diverges between the pipelines. OGCG emits a
-// single __cxx_global_var_init per reference variable that initializes the
-// temporary, registers the cleanup and stores into the reference. CIR-lowered
-// IR splits the binding code and the cleanup registration across two
-// __cxx_global_var_init functions per reference.
-//===----------------------------------------------------------------------===//
-
 // Static, non-array.
 
 // LLVM-LABEL: define internal void @__cxx_global_var_init()
 // LLVM:         call void @_ZN10NonTrivialC1Ev(ptr {{.*}} @_ZGR10static_ref_)
+// LLVMCIR:      store ptr @_ZGR10static_ref_, ptr @static_ref
+// LLVMCIR:      call void @__cxa_atexit(ptr @_ZN10NonTrivialD1Ev, ptr 
@_ZGR10static_ref_, ptr @__dso_handle)
 // OGCG:         call i32 @__cxa_atexit(ptr @_ZN10NonTrivialD1Ev, ptr 
@_ZGR10static_ref_, ptr @__dso_handle)
-// LLVM:         store ptr @_ZGR10static_ref_, ptr @static_ref
-
-// LLVMCIR-LABEL: define internal void @__cxx_global_var_init.1()
-// LLVMCIR:         call void @__cxa_atexit(ptr @_ZN10NonTrivialD1Ev, ptr 
@_ZGR10static_ref_, ptr @__dso_handle)
+// OGCG:         store ptr @_ZGR10static_ref_, ptr @static_ref
 
 // Thread, non-array.
 
-// LLVM-LABEL: define internal void @__cxx_global_var_init{{.*}}()
+// LLVMCIR-LABEL: define internal void @__cxx_global_var_init.1()
 // LLVMCIR:         %[[TLS_ADDR:.*]] = call {{.*}}ptr 
@llvm.threadlocal.address.p0(ptr {{.*}}@thread_ref)
-// LLVM:            call void @_ZN10NonTrivialC1Ev(ptr {{.*}} 
@_ZGR10thread_ref_)
-// OGCG:            call i32 @__cxa_thread_atexit(ptr @_ZN10NonTrivialD1Ev, 
ptr @_ZGR10thread_ref_, ptr @__dso_handle)
-// OGCG:            %[[TLS_ADDR:.*]] = call {{.*}}ptr 
@llvm.threadlocal.address.p0(ptr {{.*}}@thread_ref)
-// LLVM:            store ptr @_ZGR10thread_ref_, ptr %[[TLS_ADDR]]
-
-// LLVMCIR-LABEL: define internal void @__cxx_global_var_init.3()
+// LLVMCIR:         call void @_ZN10NonTrivialC1Ev(ptr {{.*}} 
@_ZGR10thread_ref_)
+// LLVMCIR:         store ptr @_ZGR10thread_ref_, ptr %[[TLS_ADDR]]
 // LLVMCIR:         call void @__cxa_thread_atexit(ptr @_ZN10NonTrivialD1Ev, 
ptr @_ZGR10thread_ref_, ptr @__dso_handle)
 
+// OGCG-LABEL: define internal void @__cxx_global_var_init.1()
+// OGCG:         call void @_ZN10NonTrivialC1Ev(ptr {{.*}} @_ZGR10thread_ref_)
+// OGCG:         call i32 @__cxa_thread_atexit(ptr @_ZN10NonTrivialD1Ev, ptr 
@_ZGR10thread_ref_, ptr @__dso_handle)
+// OGCG:         %[[TLS_ADDR:.*]] = call {{.*}}ptr 
@llvm.threadlocal.address.p0(ptr {{.*}}@thread_ref)
+// OGCG:         store ptr @_ZGR10thread_ref_, ptr %[[TLS_ADDR]]
+
 // Static, array.
 
-// LLVMCIR-LABEL: define internal void @__cxx_global_var_init.4()
+// LLVMCIR-LABEL: define internal void @__cxx_global_array_dtor(ptr {{.*}})
 // LLVMCIR:       [[LOOP_CONDITION_BLOCK:.*]]:
 // LLVMCIR:         %[[DONE:.*]] = icmp ne ptr
 // LLVMCIR:         br i1 %[[DONE]], label %[[LOOP_BODY_BLOCK:.*]], label 
%[[LOOP_EXIT_BLOCK:.*]]
 // LLVMCIR:       [[LOOP_BODY_BLOCK]]:
-// LLVMCIR:         call void @_ZN10NonTrivialC1Ev
+// LLVMCIR:         call void @_ZN10NonTrivialD1Ev(ptr
 // LLVMCIR:         br label %[[LOOP_CONDITION_BLOCK]]
 // LLVMCIR:       [[LOOP_EXIT_BLOCK]]:
-// LLVMCIR:         store ptr @_ZGR14static_arr_ref_, ptr @static_arr_ref
 
-// LLVMCIR-LABEL: define internal void @__cxx_global_array_dtor(ptr {{.*}})
+// LLVMCIR-LABEL: define internal void @__cxx_global_var_init.2()
 // LLVMCIR:       [[LOOP_CONDITION_BLOCK:.*]]:
 // LLVMCIR:         %[[DONE:.*]] = icmp ne ptr
 // LLVMCIR:         br i1 %[[DONE]], label %[[LOOP_BODY_BLOCK:.*]], label 
%[[LOOP_EXIT_BLOCK:.*]]
 // LLVMCIR:       [[LOOP_BODY_BLOCK]]:
-// LLVMCIR:         call void @_ZN10NonTrivialD1Ev(ptr
+// LLVMCIR:         call void @_ZN10NonTrivialC1Ev
 // LLVMCIR:         br label %[[LOOP_CONDITION_BLOCK]]
 // LLVMCIR:       [[LOOP_EXIT_BLOCK]]:
-
-// LLVMCIR-LABEL: define internal void @__cxx_global_var_init.5()
+// LLVMCIR:         store ptr @_ZGR14static_arr_ref_, ptr @static_arr_ref
 // LLVMCIR:         call void @__cxa_atexit(ptr @__cxx_global_array_dtor, ptr 
@_ZGR14static_arr_ref_, ptr @__dso_handle)
 
 // OGCG-LABEL: define internal void @__cxx_global_var_init.2()
@@ -232,27 +222,25 @@ thread_local const NonTrivialArr &thread_arr_ref = 
NonTrivialArr{};
 
 // Thread, array.
 
-// LLVMCIR-LABEL: define internal void @__cxx_global_var_init.6()
-// LLVMCIR:         %[[THREAD_ARR_REF:.*]] = call ptr @_ZTW14thread_arr_ref()
+// LLVMCIR-LABEL: define internal void @__cxx_global_array_dtor.1(ptr {{.*}})
 // LLVMCIR:       [[LOOP_CONDITION_BLOCK:.*]]:
 // LLVMCIR:         %[[DONE:.*]] = icmp ne ptr
 // LLVMCIR:         br i1 %[[DONE]], label %[[LOOP_BODY_BLOCK:.*]], label 
%[[LOOP_EXIT_BLOCK:.*]]
 // LLVMCIR:       [[LOOP_BODY_BLOCK]]:
-// LLVMCIR:         call void @_ZN10NonTrivialC1Ev
+// LLVMCIR:         call void @_ZN10NonTrivialD1Ev(ptr
 // LLVMCIR:         br label %[[LOOP_CONDITION_BLOCK]]
 // LLVMCIR:       [[LOOP_EXIT_BLOCK]]:
-// LLVMCIR:         store ptr @_ZGR14thread_arr_ref_, ptr %[[THREAD_ARR_REF]]
 
-// LLVMCIR-LABEL: define internal void @__cxx_global_array_dtor.1(ptr {{.*}})
+// LLVMCIR-LABEL: define internal void @__cxx_global_var_init.3()
+// LLVMCIR:         %[[THREAD_ARR_REF:.*]] = call ptr @_ZTW14thread_arr_ref()
 // LLVMCIR:       [[LOOP_CONDITION_BLOCK:.*]]:
 // LLVMCIR:         %[[DONE:.*]] = icmp ne ptr
 // LLVMCIR:         br i1 %[[DONE]], label %[[LOOP_BODY_BLOCK:.*]], label 
%[[LOOP_EXIT_BLOCK:.*]]
 // LLVMCIR:       [[LOOP_BODY_BLOCK]]:
-// LLVMCIR:         call void @_ZN10NonTrivialD1Ev(ptr
+// LLVMCIR:         call void @_ZN10NonTrivialC1Ev
 // LLVMCIR:         br label %[[LOOP_CONDITION_BLOCK]]
 // LLVMCIR:       [[LOOP_EXIT_BLOCK]]:
-
-// LLVMCIR-LABEL: define internal void @__cxx_global_var_init.7()
+// LLVMCIR:         store ptr @_ZGR14thread_arr_ref_, ptr %[[THREAD_ARR_REF]]
 // LLVMCIR:         call void @__cxa_thread_atexit(ptr 
@__cxx_global_array_dtor.1, ptr @_ZGR14thread_arr_ref_, ptr @__dso_handle)
 
 // OGCG-LABEL: define internal void @__cxx_global_var_init.3()

>From 035cace25fd442fa041678f54677076a0aff7aaf Mon Sep 17 00:00:00 2001
From: Andy Kaylor <[email protected]>
Date: Mon, 1 Jun 2026 11:42:07 -0700
Subject: [PATCH 5/8] Fix formatting

---
 clang/lib/CIR/CodeGen/CIRGenExpr.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp 
b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index a31aa6f78374c..c3fce7dd5383b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -1941,9 +1941,8 @@ static void pushTemporaryCleanup(CIRGenFunction &cgf,
     // The destruction of the reference temporary is done in the dtor
     // region of the global object it is associated with.
     const auto *extendingDecl = cast<VarDecl>(m->getExtendingDecl());
-    cir::GlobalOp extendingGlobalOp =
-        cgm.getOrCreateCIRGlobal(extendingDecl, /*ty=*/nullptr,
-                                 NotForDefinition);
+    cir::GlobalOp extendingGlobalOp = cgm.getOrCreateCIRGlobal(
+        extendingDecl, /*ty=*/nullptr, NotForDefinition);
 
     CIRGenBuilderTy &builder = cgm.getBuilder();
     mlir::OpBuilder::InsertionGuard guard(builder);

>From ab100ecf177c0a2997b035cfaf3be982af763fdd Mon Sep 17 00:00:00 2001
From: Andy Kaylor <[email protected]>
Date: Mon, 1 Jun 2026 13:23:10 -0700
Subject: [PATCH 6/8] Revert LoweringPrepare changes

---
 .../CIR/Dialect/Transforms/LoweringPrepare.cpp    | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp 
b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index 731fe78eed28c..fa970158058e0 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -1597,19 +1597,16 @@ void LoweringPreparePass::lowerGlobalOp(GlobalOp op) {
       // a guard variable for them (since they cannot use the global guard), so
       // we differentiate them that way.
 
-      // Some TLS globals (e.g. internal-linkage lifetime-extended reference
-      // temporaries) have no wrapper at all, so there are no dyn_tls_refs to
-      // associate them with. They still need to participate in the ordered
-      // __tls_init flow, but no wrapper alias is required.
-      if (op.getDynTlsRefs() && op.getDynTlsRefs()->getGuardName()) {
+      if (op.getDynTlsRefs()->getGuardName()) {
         // Unordered: the alias is the function we just generated.
         initAlias = defineGlobalThreadLocalInitAlias(op, f);
       } else {
-        // Ordered: add the init function to the list so __tls_init picks it
-        // up later. If a wrapper exists, also point its alias at __tls_init.
+        // Ordered: Get the __tls_init, and make the alias to that.
+        initAlias = defineGlobalThreadLocalInitAlias(op, getTlsInitFn());
+        // Ordered inits also need to get called from the __tls_init function,
+        // so we add the init function to the list, so that we can add them to
+        // it later.
         globalThreadLocalInitializers.push_back(f);
-        if (op.getDynTlsRefs())
-          initAlias = defineGlobalThreadLocalInitAlias(op, getTlsInitFn());
       }
     } else {
       dynamicInitializers.push_back(f);

>From 3f2c56e953e137388e0c3ef751ed34f1259d8a16 Mon Sep 17 00:00:00 2001
From: Andy Kaylor <[email protected]>
Date: Tue, 2 Jun 2026 15:33:41 -0700
Subject: [PATCH 7/8] Remove unnecessary condition

---
 clang/lib/CIR/CodeGen/CIRGenModule.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp 
b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 1bfe491c3a88c..11e3813bafa73 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -3893,8 +3893,7 @@ CIRGenModule::getAddrOfGlobalTemporary(const 
MaterializeTemporaryExpr *mte,
     }
   }
   cir::GlobalOp gv = createGlobalOp(loc, name, type, isConstant);
-  if (initialValue)
-    gv.setInitialValueAttr(initialValue);
+  gv.setInitialValueAttr(initialValue);
   gv.setLinkage(linkage);
   gv.setVisibility(getMLIRVisibilityFromCIRLinkage(linkage));
 

>From e3fe3b717c7f5ae7dd318e7fd7a9ad68140e2b7d Mon Sep 17 00:00:00 2001
From: Andy Kaylor <[email protected]>
Date: Tue, 2 Jun 2026 15:53:57 -0700
Subject: [PATCH 8/8] Fix typo

---
 clang/lib/CIR/CodeGen/CIRGenModule.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp 
b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 11e3813bafa73..d20ca027ad320 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -2937,7 +2937,7 @@ void CIRGenModule::setTLSMode(mlir::Operation *op, const 
VarDecl &d,
 
   // If this function was called to set the TLS mode for a temporary whose
   // lifetime is extended by the variable declared by `d`, don't emit the
-  // wrapperm init and guard info.
+  // wrapper, init, and guard info.
   if (isExtendingDecl)
     return;
 

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to