https://github.com/Andres-Salamanca updated 
https://github.com/llvm/llvm-project/pull/203802

>From d31ccf9f29235e2e184987bf64d4793209bd6a30 Mon Sep 17 00:00:00 2001
From: Andres Salamanca <[email protected]>
Date: Sun, 14 Jun 2026 17:32:20 -0500
Subject: [PATCH 1/2] [CIR] Implement FlattenCFG for coroutine AwaitOp,
 CoroBodyOp, and CoReturnOp

---
 clang/include/clang/CIR/Dialect/IR/CIROps.td  |   9 +-
 clang/include/clang/CIR/MissingFeatures.h     |   1 +
 clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp     |  23 +-
 clang/lib/CIR/Dialect/IR/CIRDialect.cpp       |  13 +-
 .../lib/CIR/Dialect/Transforms/FlattenCFG.cpp | 219 ++++++++-
 .../CIR/CodeGenCoroutines/Inputs/coroutine.h  | 118 +++++
 .../coro-exceptions.cpp                       |   0
 .../CIR/CodeGenCoroutines/coro-flatten.cpp    | 437 ++++++++++++++++++
 .../coro-task.cpp                             | 132 +-----
 9 files changed, 820 insertions(+), 132 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenCoroutines/Inputs/coroutine.h
 rename clang/test/CIR/{CodeGen => CodeGenCoroutines}/coro-exceptions.cpp (100%)
 create mode 100644 clang/test/CIR/CodeGenCoroutines/coro-flatten.cpp
 rename clang/test/CIR/{CodeGen => CodeGenCoroutines}/coro-task.cpp (91%)

diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td 
b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index c86322b049207..5666e446a5dea 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -585,6 +585,10 @@ def CIR_AllocaOp : CIR_Op<"alloca", [
     The `cleanup_dest_slot` attribute indicates that this was a temporary
     alloca generated by the compiler to handle cleanup exit dispatching.
 
+    The `coroutine_suspend_point` attribute indicates that this alloca is
+    used to track the coroutine suspend destination. The use of this alloca
+    identifies where control should continue when the coroutine suspends.
+
     The result type is a pointer to the input's type.
 
     Example:
@@ -605,6 +609,7 @@ def CIR_AllocaOp : CIR_Op<"alloca", [
     UnitAttr:$init,
     UnitAttr:$constant,
     UnitAttr:$cleanup_dest_slot,
+    UnitAttr:$coroutine_suspend_point,
     ConfinedAttr<I64Attr, [IntMinValue<1>]>:$alignment,
     OptionalAttr<CIR_AnnotationArrayAttr>:$annotations
   );
@@ -643,7 +648,8 @@ def CIR_AllocaOp : CIR_Op<"alloca", [
     `align` `(` $alignment `)`
     oilist( `init`              $init
           | `const`             $constant
-          | `cleanup_dest_slot` $cleanup_dest_slot)
+          | `cleanup_dest_slot` $cleanup_dest_slot
+          | `coroutine_suspend_point` $coroutine_suspend_point)
     (`size` `(` $dynAllocSize^ `)`)? `:` qualified(type($addr))
     ($annotations^)? attr-dict
   }];
@@ -3909,6 +3915,7 @@ def CIR_FuncOp : CIR_Op<"func", [
     TypeAttrOf<CIR_FuncType>:$function_type,
     UnitAttr:$builtin,
     UnitAttr:$coroutine,
+    UnitAttr:$flatten_coroutine,
     OptionalAttr<CIR_InlineKind>:$inline_kind,
     UnitAttr:$lambda,
     UnitAttr:$no_proto,
diff --git a/clang/include/clang/CIR/MissingFeatures.h 
b/clang/include/clang/CIR/MissingFeatures.h
index c09db49a955ac..3ab209fe26c6f 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -125,6 +125,7 @@ struct MissingFeatures {
 
   // Coroutines
   static bool coroOutsideFrameMD() { return false; }
+  static bool coroutineGroManager() { return false; };
 
   // Various handling of deferred processing in CIRGenModule.
   static bool cgmRelease() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp 
b/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp
index 3dd71a8ad3b6c..c2da4407cc533 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp
@@ -16,7 +16,6 @@
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CIR/Dialect/IR/CIRTypes.h"
-#include "clang/CIR/MissingFeatures.h"
 
 using namespace clang;
 using namespace clang::CIRGen;
@@ -49,6 +48,14 @@ struct clang::CIRGen::CGCoroData {
   // body must be skipped. If the promise type does not define an exception
   // handler, this is null.
   Address resumeEHVar = Address::invalid();
+
+  // This alloca must have a single use that represents the coroutine suspend
+  // destination. A cir.await operation uses this destination when the
+  // coroutine is suspended.
+  //
+  // Currently the suspend destination always corresponds to the return block.
+  // In the future it may also represent the GRO path.
+  cir::AllocaOp suspendPoint = nullptr;
 };
 
 // Defining these here allows to keep CGCoroData private to this file.
@@ -351,6 +358,13 @@ CIRGenFunction::emitCoroutineBody(const CoroutineBodyStmt 
&s) {
   cir::CallOp coroId = emitCoroIDBuiltinCall(openCurlyLoc, nullPtrCst);
   createCoroData(*this, curCoro, coroId);
 
+  uint64_t alignment = cgm.getDataLayout().getAlignment(sInt32Ty, 
true).value();
+
+  auto allocaOp = cir::AllocaOp::create(
+      builder, openCurlyLoc, builder.getPointerTo(sInt32Ty),
+      "__coroutine_suspend_point", builder.getI64IntegerAttr(alignment));
+  curCoro.data->suspendPoint = allocaOp;
+  allocaOp.setCoroutineSuspendPoint(true);
   // Backend is allowed to elide memory allocations, to help it, emit
   // auto mem = coro.alloc() ? 0 : ... allocation code ...;
   cir::CallOp coroAlloc = emitCoroAllocBuiltinCall(openCurlyLoc);
@@ -512,6 +526,13 @@ CIRGenFunction::emitCoroutineBody(const CoroutineBodyStmt 
&s) {
     }
   }
 
+  assert(!cir::MissingFeatures::coroutineGroManager());
+
+  cir::StoreOp::create(builder, openCurlyLoc,
+                       builder.getSignedInt(openCurlyLoc, 1, 32),
+                       curCoro.data->suspendPoint, false, {} /*alignment*/,
+                       {} /*sync_scope*/, {} /*mem_order*/);
+
   emitCoroEndBuiltinCall(
       openCurlyLoc, builder.getNullPtr(builder.getVoidPtrTy(), openCurlyLoc));
   if (auto *ret = cast_or_null<ReturnStmt>(s.getReturnStmt())) {
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp 
b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index 660bed1544aac..3008b785ca00b 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -2335,6 +2335,8 @@ ParseResult cir::FuncOp::parse(OpAsmParser &parser, 
OperationState &state) {
 
   mlir::StringAttr builtinNameAttr = getBuiltinAttrName(state.name);
   mlir::StringAttr coroutineNameAttr = getCoroutineAttrName(state.name);
+  mlir::StringAttr flattenCoroutineNameAttr =
+      getFlattenCoroutineAttrName(state.name);
   mlir::StringAttr inlineKindNameAttr = getInlineKindAttrName(state.name);
   mlir::StringAttr lambdaNameAttr = getLambdaAttrName(state.name);
   mlir::StringAttr noProtoNameAttr = getNoProtoAttrName(state.name);
@@ -2348,7 +2350,10 @@ ParseResult cir::FuncOp::parse(OpAsmParser &parser, 
OperationState &state) {
   if (::mlir::succeeded(
           parser.parseOptionalKeyword(coroutineNameAttr.strref())))
     state.addAttribute(coroutineNameAttr, parser.getBuilder().getUnitAttr());
-
+  if (::mlir::succeeded(
+          parser.parseOptionalKeyword(flattenCoroutineNameAttr.strref())))
+    state.addAttribute(flattenCoroutineNameAttr,
+                       parser.getBuilder().getUnitAttr());
   // Parse optional inline kind attribute
   cir::InlineKindAttr inlineKindAttr;
   if (failed(parseInlineKindAttr(parser, inlineKindAttr)))
@@ -2657,6 +2662,9 @@ void cir::FuncOp::print(OpAsmPrinter &p) {
   if (getCoroutine())
     p << " coroutine";
 
+  if (getFlattenCoroutine())
+    p << " flatten-coroutine";
+
   printInlineKindAttr(p, getInlineKindAttr());
 
   if (getLambda())
@@ -3193,7 +3201,8 @@ cir::CoroBodyOp::getSuccessorInputs(RegionSuccessor 
successor) {
 }
 
 LogicalResult cir::CoroBodyOp::verify() {
-  if (!getOperation()->getParentOfType<FuncOp>().getCoroutine())
+  auto funcOp = getOperation()->getParentOfType<FuncOp>();
+  if (!funcOp.getCoroutine() && !funcOp.getFlattenCoroutine())
     return emitOpError("enclosing function must be a coroutine");
   return success();
 }
diff --git a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp 
b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp
index ddeeb98fee820..2823da3e1e33e 100644
--- a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp
@@ -835,7 +835,8 @@ class CIRCleanupScopeOpFlattening
         } else if (isa<cir::LoopOpInterface>(nestedOp)) {
           collectExitsInLoop(nestedOp);
           return mlir::WalkResult::skip();
-        } else if (isa<cir::ReturnOp, cir::ContinueOp>(nestedOp)) {
+        } else if (isa<cir::CoReturnOp, cir::ReturnOp, cir::ContinueOp>(
+                       nestedOp)) {
           exits.emplace_back(nestedOp, nextId++);
         } else if (isGotoThatExitsCleanup(nestedOp)) {
           exits.emplace_back(nestedOp, nextId++);
@@ -857,7 +858,7 @@ class CIRCleanupScopeOpFlattening
         // the nested cleanup.
         if (!ignoreBreak && isa<cir::BreakOp>(op)) {
           exits.emplace_back(op, nextId++);
-        } else if (isa<cir::ContinueOp, cir::ReturnOp>(op)) {
+        } else if (isa<cir::CoReturnOp, cir::ContinueOp, cir::ReturnOp>(op)) {
           exits.emplace_back(op, nextId++);
         } else if (isGotoThatExitsCleanup(op)) {
           exits.emplace_back(op, nextId++);
@@ -1010,6 +1011,13 @@ class CIRCleanupScopeOpFlattening
           cir::ContinueOp::create(rewriter, loc);
           return mlir::success();
         })
+        .Case<cir::CoReturnOp>([&](auto) {
+          // CoReturnOp does not carry a destination operand. The continuation
+          // block determines whether execution proceeds to another cleanup or
+          // to the coroutine's final suspend path.
+          cir::CoReturnOp::create(rewriter, loc);
+          return mlir::success();
+        })
         .Case<cir::ReturnOp>([&](auto returnOp) {
           // Return from the cleanup exit. Note, if this is a return inside a
           // nested cleanup scope, the flattening of the outer scope will 
handle
@@ -1805,11 +1813,212 @@ class CIRTryOpFlattening : public 
mlir::OpRewritePattern<cir::TryOp> {
   }
 };
 
+static mlir::Block *getOrCreateBlockForSuspendPoint(
+    cir::FuncOp funcOp, mlir::PatternRewriter &rewriter, mlir::Location loc) {
+  mlir::Block &entryBlock = funcOp.getBody().front();
+
+  auto it = llvm::find_if(entryBlock, [](auto &op) {
+    return mlir::isa<AllocaOp>(&op) &&
+           mlir::cast<AllocaOp>(&op).getCoroutineSuspendPoint();
+  });
+
+  assert(it->hasOneUse() &&
+         "coroutine suspend point alloca must have exactly one use");
+  auto storeOp = cast<cir::StoreOp>(*it->getUses().begin()->getOwner());
+  auto suspendPoint = 
cast<cir::ConstantOp>(storeOp.getValue().getDefiningOp());
+  mlir::Block *suspendBlock = suspendPoint->getBlock();
+  if (&suspendBlock->front() == suspendPoint)
+    return suspendBlock;
+
+  mlir::OpBuilder::InsertionGuard guard(rewriter);
+  mlir::Block *remainingBlock =
+      rewriter.splitBlock(suspendBlock, suspendPoint->getIterator());
+  rewriter.setInsertionPointToEnd(suspendBlock);
+  cir::BrOp::create(rewriter, loc, remainingBlock);
+  return remainingBlock;
+}
+
+class CIRAwaitOpFlattening : public mlir::OpRewritePattern<cir::AwaitOp> {
+public:
+  using OpRewritePattern<cir::AwaitOp>::OpRewritePattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(cir::AwaitOp awaitOp,
+                  mlir::PatternRewriter &rewriter) const override {
+    mlir::Block *awaitBlock = rewriter.getInsertionBlock();
+    mlir::Block *remainingOpsBlock =
+        rewriter.splitBlock(awaitBlock, rewriter.getInsertionPoint());
+
+    mlir::Location loc = awaitOp.getLoc();
+
+    mlir::Region &readyRegion = awaitOp.getReady();
+    mlir::Block &beforeReady = awaitOp.getReady().front();
+    mlir::Region &suspendRegion = awaitOp.getSuspend();
+    mlir::Region &resumeRegion = awaitOp.getResume();
+    auto conditionOp =
+        cast<cir::ConditionOp>(readyRegion.back().getTerminator());
+    {
+      mlir::OpBuilder::InsertionGuard guard(rewriter);
+      rewriter.setInsertionPoint(conditionOp);
+      rewriter.replaceOpWithNewOp<cir::BrCondOp>(
+          conditionOp, conditionOp.getCondition(), &resumeRegion.front(),
+          &suspendRegion.front());
+    }
+    rewriter.inlineRegionBefore(readyRegion, remainingOpsBlock);
+
+    {
+      mlir::OpBuilder::InsertionGuard guard(rewriter);
+      rewriter.setInsertionPointToEnd(awaitBlock);
+      cir::BrOp::create(rewriter, loc, mlir::ValueRange(), &beforeReady);
+    }
+
+    auto suspendYield =
+        cast<cir::YieldOp>(suspendRegion.back().getTerminator());
+    cir::LLVMIntrinsicCallOp coroSuspendIntri = nullptr;
+    {
+      mlir::OpBuilder::InsertionGuard guard(rewriter);
+      rewriter.setInsertionPoint(&suspendRegion.front().front());
+
+      // Insert coro.save at the beginning of the suspend region.
+      // This captures the current coroutine state before suspension.
+      auto voidPtrTy = cir::PointerType::get(cir::VoidType::get(getContext()));
+      auto nullPtr = cir::ConstantOp::create(
+          rewriter, loc,
+          cir::ConstPtrAttr::get(voidPtrTy, rewriter.getI64IntegerAttr(0)));
+      auto coroSaveIntri = cir::LLVMIntrinsicCallOp::create(
+          rewriter, loc, mlir::StringAttr::get(getContext(), "llvm.coro.save"),
+          cir::IntType::get(getContext(), 32, false),
+          mlir::ValueRange{nullPtr});
+      rewriter.setInsertionPoint(suspendYield);
+
+      bool isFinalSuspend = awaitOp.getKind() == cir::AwaitKind::Final;
+      auto isFinalCoroSuspend = cir::ConstantOp::create(
+          rewriter, loc, cir::BoolAttr::get(getContext(), isFinalSuspend));
+
+      // llvm.coro.suspend returns:
+      //  -1 : coroutine  suspended
+      //   0 : coroutine resumed
+      //   1 : coroutine destroyed
+      coroSuspendIntri = cir::LLVMIntrinsicCallOp::create(
+          rewriter, loc,
+          mlir::StringAttr::get(getContext(), "llvm.coro.suspend"),
+          cir::IntType::get(getContext(), 32, false),
+          mlir::ValueRange{coroSaveIntri.getResult(), isFinalCoroSuspend});
+    }
+    rewriter.inlineRegionBefore(suspendRegion, remainingOpsBlock);
+
+    auto func = awaitOp->getParentOfType<cir::FuncOp>();
+
+    {
+      mlir::OpBuilder::InsertionGuard guard(rewriter);
+      rewriter.setInsertionPoint(suspendYield);
+      llvm::SmallVector<mlir::APInt, 2> caseValues{mlir::APInt(32, 0),
+                                                   mlir::APInt(32, 1)};
+
+      llvm::SmallVector<mlir::ValueRange, 8> caseOperands{
+          mlir::ValueRange(), mlir::ValueRange(), mlir::ValueRange()};
+
+      llvm::SmallVector<mlir::Block *, 8> caseDestinations;
+
+      // In Classic CodeGen, the destroy path reaches the coroutine cleanup by
+      // emitting an EmitBranchThroughCleanup(), ensuring that all nested
+      // cleanup scopes are executed before control reaches the coro.free
+      // cleanup.
+      //
+      // We achieve the same effect by creating a block that only contains a
+      // cir.yield. createExitTerminator() then propagates control through 
every
+      // enclosing cleanup scope until the parent coroutine cleanup (coro.free)
+      // is reached, after which execution continues to the return block.
+      mlir::Block *cleanupBlock = nullptr;
+      {
+        mlir::OpBuilder::InsertionGuard guard(rewriter);
+        cleanupBlock = rewriter.createBlock(remainingOpsBlock);
+        cir::YieldOp::create(rewriter, loc);
+      }
+      caseDestinations.push_back(&resumeRegion.front());
+      caseDestinations.push_back(cleanupBlock);
+
+      assert(!cir::MissingFeatures::coroutineGroManager());
+
+      // Default destination must be de suspend BB (the return block or the pre
+      // gro conv)
+      auto coroSuspendSwitch = cir::SwitchFlatOp::create(
+          rewriter, loc, coroSuspendIntri.getResult(),
+          getOrCreateBlockForSuspendPoint(func, rewriter, loc),
+          mlir::ValueRange(), caseValues, caseDestinations, caseOperands);
+
+      rewriter.replaceOp(suspendYield, coroSuspendSwitch);
+    }
+
+    auto resumeYield = cast<cir::YieldOp>(resumeRegion.back().getTerminator());
+    {
+      mlir::OpBuilder::InsertionGuard guard(rewriter);
+      rewriter.setInsertionPoint(resumeYield);
+      rewriter.replaceOpWithNewOp<cir::BrOp>(resumeYield, remainingOpsBlock);
+    }
+    rewriter.inlineRegionBefore(resumeRegion, remainingOpsBlock);
+
+    rewriter.eraseOp(awaitOp);
+
+    func.setCoroutine(false);
+    func.setFlattenCoroutine(true);
+
+    return mlir::success();
+  }
+};
+
+class CIRCoroBodyOpFlattening : public mlir::OpRewritePattern<cir::CoroBodyOp> 
{
+public:
+  using OpRewritePattern<cir::CoroBodyOp>::OpRewritePattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(cir::CoroBodyOp coroBodyOp,
+                  mlir::PatternRewriter &rewriter) const override {
+
+    if (hasNestedOpsToFlatten(coroBodyOp.getBody()))
+      return mlir::failure();
+
+    llvm::SmallVector<cir::CoReturnOp> coReturns;
+    coroBodyOp.getBody().walk<mlir::WalkOrder::PreOrder>(
+        [&](cir::CoReturnOp op) {
+          coReturns.push_back(op);
+          return mlir::WalkResult::advance();
+        });
+
+    mlir::OpBuilder::InsertionGuard guard(rewriter);
+    mlir::Location loc = coroBodyOp.getLoc();
+
+    mlir::Block *currentBlock = rewriter.getInsertionBlock();
+    mlir::Block *continueBlock =
+        rewriter.splitBlock(currentBlock, rewriter.getInsertionPoint());
+
+    // Inline body region.
+    mlir::Block *beforeBody = &coroBodyOp.getBody().front();
+    rewriter.inlineRegionBefore(coroBodyOp.getBody(), continueBlock);
+
+    rewriter.setInsertionPointToEnd(currentBlock);
+    cir::BrOp::create(rewriter, loc, mlir::ValueRange(), beforeBody);
+
+    // In CIR CodeGen, the operation following CoroBodyOp is always the
+    // final-suspend path. Therefore, the continuation block created by the
+    // split corresponds to the final suspend point.
+    for (cir::CoReturnOp &coReturn : coReturns) {
+      rewriter.setInsertionPoint(coReturn);
+      rewriter.replaceOpWithNewOp<cir::BrOp>(coReturn, continueBlock);
+    }
+
+    rewriter.replaceOp(coroBodyOp, continueBlock->getArguments());
+
+    return mlir::success();
+  }
+};
+
 void populateFlattenCFGPatterns(RewritePatternSet &patterns) {
   patterns
       .add<CIRIfFlattening, CIRLoopOpInterfaceFlattening, CIRScopeOpFlattening,
            CIRSwitchOpFlattening, CIRTernaryOpFlattening,
-           CIRCleanupScopeOpFlattening, CIRTryOpFlattening>(
+           CIRCleanupScopeOpFlattening, CIRTryOpFlattening,
+           CIRAwaitOpFlattening, CIRCoroBodyOpFlattening>(
           patterns.getContext());
 }
 
@@ -1820,8 +2029,8 @@ void CIRFlattenCFGPass::runOnOperation() {
   // Collect operations to apply patterns.
   llvm::SmallVector<Operation *, 16> ops;
   getOperation()->walk<mlir::WalkOrder::PostOrder>([&](Operation *op) {
-    if (isa<IfOp, ScopeOp, SwitchOp, LoopOpInterface, TernaryOp, 
CleanupScopeOp,
-            TryOp>(op))
+    if (isa<AwaitOp, CoroBodyOp, IfOp, ScopeOp, SwitchOp, LoopOpInterface,
+            TernaryOp, CleanupScopeOp, TryOp>(op))
       ops.push_back(op);
   });
 
diff --git a/clang/test/CIR/CodeGenCoroutines/Inputs/coroutine.h 
b/clang/test/CIR/CodeGenCoroutines/Inputs/coroutine.h
new file mode 100644
index 0000000000000..1c0cee22af2fc
--- /dev/null
+++ b/clang/test/CIR/CodeGenCoroutines/Inputs/coroutine.h
@@ -0,0 +1,118 @@
+
+namespace std {
+
+template<typename T> struct remove_reference       { typedef T type; };
+template<typename T> struct remove_reference<T &>  { typedef T type; };
+template<typename T> struct remove_reference<T &&> { typedef T type; };
+
+template<typename T>
+typename remove_reference<T>::type &&move(T &&t) noexcept;
+
+template <class Ret, typename... T>
+struct coroutine_traits { using promise_type = typename Ret::promise_type; };
+
+template <class Promise = void>
+struct coroutine_handle {
+  static coroutine_handle from_address(void *) noexcept;
+};
+template <>
+struct coroutine_handle<void> {
+  template <class PromiseType>
+  coroutine_handle(coroutine_handle<PromiseType>) noexcept;
+  static coroutine_handle from_address(void *);
+};
+
+struct suspend_always {
+  bool await_ready() noexcept { return false; }
+  void await_suspend(coroutine_handle<>) noexcept {}
+  void await_resume() noexcept {}
+};
+
+struct suspend_never {
+  bool await_ready() noexcept { return true; }
+  void await_suspend(coroutine_handle<>) noexcept {}
+  void await_resume() noexcept {}
+};
+
+struct string {
+  int size() const;
+  string();
+  string(char const *s);
+};
+
+template<typename T>
+struct optional {
+  optional();
+  optional(const T&);
+  T &operator*() &;
+  T &&operator*() &&;
+  T &value() &;
+  T &&value() &&;
+};
+} // namespace std
+
+namespace folly {
+namespace coro {
+
+using std::suspend_always;
+using std::suspend_never;
+using std::coroutine_handle;
+
+using SemiFuture = int;
+
+template<class T>
+struct Task {
+    struct promise_type {
+        Task<T> get_return_object() noexcept;
+        suspend_always initial_suspend() noexcept;
+        suspend_always final_suspend() noexcept;
+        void return_value(T);
+        void unhandled_exception();
+        auto yield_value(Task<T>) noexcept { return final_suspend(); }
+    };
+    bool await_ready() noexcept { return false; }
+    void await_suspend(coroutine_handle<>) noexcept {}
+    T await_resume();
+};
+
+template<>
+struct Task<void> {
+    struct promise_type {
+        Task<void> get_return_object() noexcept;
+        suspend_always initial_suspend() noexcept;
+        suspend_always final_suspend() noexcept;
+        void return_void() noexcept;
+        void unhandled_exception() noexcept;
+        auto yield_value(Task<void>) noexcept { return final_suspend(); }
+    };
+    bool await_ready() noexcept { return false; }
+    void await_suspend(coroutine_handle<>) noexcept {}
+    void await_resume() noexcept {}
+    SemiFuture semi();
+};
+
+// FIXME: add CIRGen support here.
+// struct blocking_wait_fn {
+//   template <typename T>
+//   T operator()(Task<T>&& awaitable) const {
+//     return T();
+//   }
+// };
+
+// inline constexpr blocking_wait_fn blocking_wait{};
+// static constexpr blocking_wait_fn const& blockingWait = blocking_wait;
+template <typename T>
+T blockingWait(Task<T>&& awaitable) {
+  return T();
+}
+
+struct co_invoke_fn {
+  template <typename F, typename... A>
+  Task<void> operator()(F&& f, A&&... a) const {
+    return Task<void>();
+  }
+};
+
+co_invoke_fn co_invoke;
+
+}} // namespace folly::coro
diff --git a/clang/test/CIR/CodeGen/coro-exceptions.cpp 
b/clang/test/CIR/CodeGenCoroutines/coro-exceptions.cpp
similarity index 100%
rename from clang/test/CIR/CodeGen/coro-exceptions.cpp
rename to clang/test/CIR/CodeGenCoroutines/coro-exceptions.cpp
diff --git a/clang/test/CIR/CodeGenCoroutines/coro-flatten.cpp 
b/clang/test/CIR/CodeGenCoroutines/coro-flatten.cpp
new file mode 100644
index 0000000000000..fc5ca2e4d43ba
--- /dev/null
+++ b/clang/test/CIR/CodeGenCoroutines/coro-flatten.cpp
@@ -0,0 +1,437 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir 
-emit-cir %s -o %t.cir
+// RUN: cir-opt --cir-flatten-cfg %t.cir -o %t.flat.cir
+// RUN: FileCheck --input-file=%t.flat.cir %s -check-prefix=CIR-FLAT
+
+#include "Inputs/coroutine.h"
+
+using VoidTask = folly::coro::Task<void>;
+
+VoidTask silly_task() {
+  co_await std::suspend_always();
+}
+
+// CIR-FLAT: cir.func flatten-coroutine {{.*}} @_Z10silly_taskv
+
+// CIR-FLAT: %[[CLEANUP_DEST_SLOT:.*]] = cir.alloca "__cleanup_dest_slot"
+// CIR-FLAT: %[[NullPtr:.*]] = cir.const #cir.ptr<null>
+// CIR-FLAT: %[[Align:.*]] = cir.const #cir.int<16>
+// CIR-FLAT: %[[CoroId:.*]] = cir.call @__builtin_coro_id(%[[Align]], 
%[[NullPtr]], %[[NullPtr]], %[[NullPtr]])
+// CIR-FLAT: %[[SUSPEND_POINT:.*]] = cir.alloca "__coroutine_suspend_point"
+// CIR-FLAT: %[[SavedFrameAddr:.*]] = cir.alloca "__coro_frame_addr"
+// CIR-FLAT: %[[SuspendAlwaysAddr:.*]] = cir.alloca "ref.tmp0"
+// CIR-FLAT: %[[ShouldAlloc:.*]] = cir.call @__builtin_coro_alloc(%[[CoroId]]) 
: (!u32i) -> !cir.bool
+// CIR-FLAT: cir.store %[[NullPtr]], %[[SavedFrameAddr]]
+// CIR-FLAT: cir.brcond %[[ShouldAlloc]] ^[[CORO_ALLOC:.*]], ^[[CORO_INIT:.*]]
+// CIR-FLAT: ^[[CORO_ALLOC]]:
+// CIR-FLAT:   %[[CoroSize:.*]] = cir.call @__builtin_coro_size()
+// CIR-FLAT:   %[[AllocAddr:.*]] = cir.call @_Znwm(%[[CoroSize]])
+// CIR-FLAT:   cir.store %[[AllocAddr]], %[[SavedFrameAddr]] : 
!cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CIR-FLAT:   cir.br ^[[CORO_INIT]]
+// CIR-FLAT: ^[[CORO_INIT]]:
+// CIR-FLAT:   %[[LOAD_CORO_FRAME:.*]] = cir.load %[[SavedFrameAddr]]
+// CIR-FLAT:   %[[CoroFrameAddr:.*]] = cir.call 
@__builtin_coro_begin(%[[CoroId]], %[[LOAD_CORO_FRAME]])
+// CIR-FLAT:   cir.br ^[[INIT_AWAIT_READY:.*]]
+// CIR-FLAT: ^[[INIT_AWAIT_READY]]:
+// CIR-FLAT:  %[[RetObj:.*]] = cir.call 
@_ZN5folly4coro4TaskIvE12promise_type17get_return_objectEv(%[[VoidPromisseAddr:.*]])
+// CIR-FLAT:   cir.store {{.*}} %[[RetObj]], %[[VoidTaskAddr:.*]]
+// CIR-FLAT:   %[[Tmp0:.*]] = cir.call 
@_ZN5folly4coro4TaskIvE12promise_type15initial_suspendEv(%[[VoidPromisseAddr]])
+// CIR-FLAT:   cir.store {{.*}} %[[Tmp0]], %[[SuspendAlwaysAddr]]
+// CIR-FLAT:   cir.br ^[[INIT_AWAIT_READY_CONT:.*]]
+// CIR-FLAT: ^[[INIT_AWAIT_READY_CONT]]:
+// CIR-FLAT:   %[[ShouldSuspend:.*]] = cir.call 
@_ZNSt14suspend_always11await_readyEv(%[[SuspendAlwaysAddr]])
+// CIR-FLAT:   cir.brcond %[[ShouldSuspend]] ^[[AWAIT_INIT_RESUME:.*]], 
^[[AWAIT_SUSPEND:.*]]
+// CIR-FLAT: ^[[AWAIT_SUSPEND]]:  // pred: ^bb4
+// CIR-FLAT:   %[[NULLPTR2:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!void>
+
+// TODO (cir): We should use a cir.token or mlir.token instead of returning 
!u32i.
+// CIR-FLAT:   %[[SAVE_TOKEN:.*]] = cir.call_llvm_intrinsic "llvm.coro.save" 
%[[NULLPTR2]] : (!cir.ptr<!void>) -> !u32i
+// CIR-FLAT:   %27 = cir.call 
@_ZNSt16coroutine_handleIN5folly4coro4TaskIvE12promise_typeEE12from_addressEPv(%21)
+// CIR-FLAT:   cir.store align(1) %27, %10
+// CIR-FLAT:   %28 = cir.load align(1) %10
+// CIR-FLAT:   cir.call 
@_ZNSt16coroutine_handleIvEC1IN5folly4coro4TaskIvE12promise_typeEEES_IT_E(%9, 
%28)
+// CIR-FLAT:   %29 = cir.load align(1) %9
+// CIR-FLAT:   cir.call 
@_ZNSt14suspend_always13await_suspendESt16coroutine_handleIvE(%8, %29)
+// CIR-FLAT:   %[[IS_FINAL_SUSPEND:.*]] = cir.const #false
+// CIR-FLAT:   %[[SUSPEND_RESULT:.*]] = cir.call_llvm_intrinsic 
"llvm.coro.suspend" %[[SAVE_TOKEN]], %[[IS_FINAL_SUSPEND]]
+// CIR-FLAT:   cir.switch.flat %[[SUSPEND_RESULT]] : !u32i, ^[[CORO_RET:.*]] [
+// CIR-FLAT:     0: ^[[AWAIT_INIT_RESUME]],
+// CIR-FLAT:     1: ^[[INIT_CLEANUP_DESTROY:.*]]
+// CIR-FLAT:   ]
+// CIR-FLAT: ^[[INIT_CLEANUP_DESTROY]]:
+// CIR-FLAT:   cir.const #cir.int<0>
+// CIR-FLAT:   cir.store {{.*}}, %[[CLEANUP_DEST_SLOT]]
+// CIR-FLAT:   cir.br ^[[CLEANUP_CORO_FREE:.*]]
+// CIR-FLAT: ^[[AWAIT_INIT_RESUME]]:
+// CIR-FLAT:   cir.call 
@_ZNSt14suspend_always12await_resumeEv(%[[SuspendAlwaysAddr]])
+// CIR-FLAT:   cir.br ^[[CORO_BODY:.*]]
+
+// The remaining await operations follow the same lowering pattern as above,
+// so we only check a few key instructions here instead of matching the entire 
IR
+
+// CIR-FLAT: ^[[CORO_BODY:.*]]:
+// CIR-FLAT:   cir.br ^[[USER_AWAIT:.*]]
+// CIR-FLAT: ^[[USER_AWAIT]]:
+// CIR-FLAT:   cir.br ^[[USER_AWAIT_READY_CONT:.*]]
+// CIR-FLAT: ^[[USER_AWAIT_READY_CONT]]:
+// CIR-FLAT:   %[[ShouldSuspend2:.*]] = cir.call 
@_ZNSt14suspend_always11await_readyEv
+// CIR-FLAT:   cir.brcond %[[ShouldSuspend2]] ^[[AWAIT_USER_RESUME:.*]], 
^[[AWAIT_USER_SUSPEND:.*]]
+// CIR-FLAT: ^[[AWAIT_USER_SUSPEND]]
+// CIR-FLAT:   %[[NULLPTR3:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!void>
+// CIR-FLAT:   %[[SAVE_TOKEN2:.*]] = cir.call_llvm_intrinsic "llvm.coro.save" 
%[[NULLPTR3]]
+// CIR-FLAT:   %[[IS_FINAL_SUSPEND2:.*]] = cir.const #false
+// CIR-FLAT:   %[[SUSPEND_RESULT2:.*]] = cir.call_llvm_intrinsic 
"llvm.coro.suspend" %[[SAVE_TOKEN2]], %[[IS_FINAL_SUSPEND2]]
+// CIR-FLAT:   cir.switch.flat %[[SUSPEND_RESULT2]] : !u32i, ^[[CORO_RET]] [
+// CIR-FLAT:     0: ^[[AWAIT_USER_RESUME]],
+// CIR-FLAT:     1: ^[[user_cleanup_destroy:.*]]
+// CIR-FLAT:   ]
+// CIR-FLAT: ^[[user_cleanup_destroy]]:
+// CIR-FLAT:   cir.const #cir.int<1>
+// CIR-FLAT:   cir.store {{.*}}, %[[CLEANUP_DEST_SLOT]]
+// CIR-FLAT:   cir.br ^[[CLEANUP_CORO_FREE:.*]]
+// CIR-FLAT: ^[[AWAIT_USER_RESUME]]:
+// CIR-FLAT:   cir.call @_ZNSt14suspend_always12await_resumeEv
+// CIR-FLAT:   cir.br ^[[CO_RETURN:.*]]
+// CIR-FLAT: ^[[CO_RETURN]]:
+// CIR-FLAT:   cir.call 
@_ZN5folly4coro4TaskIvE12promise_type11return_voidEv(%[[VoidPromisseAddr]])
+
+// This corresponds to the implicit cir.co_return path, which exits the
+// cir.coro.body and branches to the final suspend block.
+// CIR-FLAT:   cir.br ^[[FINAL_SUSPEND_BB:.*]]
+// CIR-FLAT: ^[[FINAL_SUSPEND_BB]]:
+// CIR-FLAT:   %[[final_suspend:.*]] = cir.call 
@_ZN5folly4coro4TaskIvE12promise_type13final_suspendEv(%[[VoidPromisseAddr]])
+// CIR-FLAT:   cir.store {{.*}} %[[final_suspend]], %{{.*}}
+// CIR-FLAT:   cir.br ^[[FINAL_AWAIT_READY_CONT:.*]]
+// CIR-FLAT: ^[[FINAL_AWAIT_READY_CONT]]:
+// CIR-FLAT:   %[[ShouldSuspend3:.*]] = cir.call 
@_ZNSt14suspend_always11await_readyEv
+// CIR-FLAT:   cir.brcond %[[ShouldSuspend3]] ^[[AWAIT_FINAL_RESUME:.*]], 
^[[AWAIT_FINAL_SUSPEND:.*]]
+// CIR-FLAT: ^[[AWAIT_FINAL_SUSPEND]]:
+// CIR-FLAT:   %[[NULLPTR4:.*]] = cir.const #cir.ptr<null>
+// CIR-FLAT:   %[[SAVE_TOKEN3:.*]] = cir.call_llvm_intrinsic "llvm.coro.save" 
%[[NULLPTR4]]
+// CIR-FLAT:   %[[IS_FINAL_SUSPEND3:.*]] = cir.const #true
+// CIR-FLAT:   %[[SUSPEND_RESULT3:.*]] = cir.call_llvm_intrinsic 
"llvm.coro.suspend" %[[SAVE_TOKEN3]], %[[IS_FINAL_SUSPEND3]]
+// CIR-FLAT:   cir.switch.flat %[[SUSPEND_RESULT3]] : !u32i, ^[[CORO_RET]] [
+// CIR-FLAT:     0: ^[[AWAIT_FINAL_RESUME]],
+// CIR-FLAT:     1: ^[[final_cleanup_destroy:.*]]
+// CIR-FLAT:   ]
+// CIR-FLAT: ^[[final_cleanup_destroy]]:
+// CIR-FLAT:   cir.const #cir.int<2>
+// CIR-FLAT:   cir.store {{.*}}, %[[CLEANUP_DEST_SLOT]]
+// CIR-FLAT:   cir.br ^[[CLEANUP_CORO_FREE:.*]]
+// CIR-FLAT: ^[[AWAIT_FINAL_RESUME]]:
+// CIR-FLAT:   cir.call @_ZNSt14suspend_always12await_resumeEv
+// CIR-FLAT:   cir.br ^[[FIANL_CLEANUP_EXIT:.*]]
+// CIR-FLAT: ^[[FIANL_CLEANUP_EXIT]]:
+// CIR-FLAT:   cir.const #cir.int<3> : !s32i
+// CIR-FLAT:   cir.store %{{.*}}, %[[CLEANUP_DEST_SLOT]]
+// CIR-FLAT:   cir.br ^[[CLEANUP_CORO_FREE]]
+
+// Check whether llvm.coro.free returned a non-null pointer, indicating
+// that the coroutine frame must be deallocated.
+// CIR-FLAT: ^[[CLEANUP_CORO_FREE]]:
+// CIR-FLAT:   %[[SHOULD_FREE:.*]] = cir.call 
@__builtin_coro_free(%[[CoroId]], %[[CoroFrameAddr]])
+// CIR-FLAT:   %[[NULLPTR5:.*]] = cir.const #cir.ptr<null>
+// CIR-FLAT:   %[[NEEDS_FREE:.*]] = cir.cmp ne %[[SHOULD_FREE]], %[[NULLPTR5]]
+// CIR-FLAT:   cir.brcond %[[NEEDS_FREE]] ^[[FREE_FRAME:.*]], 
^[[EXIT_CLEANUP:.*]]
+// CIR-FLAT: ^[[FREE_FRAME]]:
+// CIR-FLAT:   %[[CoroSize2:.*]] = cir.call @__builtin_coro_size() : () -> 
(!u64i {llvm.noundef})
+// CIR-FLAT:   cir.call @_ZdlPvm(%[[SHOULD_FREE]], %[[CoroSize2]])
+// CIR-FLAT:   cir.br ^[[EXIT_CLEANUP]]
+// CIR-FLAT: ^[[EXIT_CLEANUP]]:
+// CIR-FLAT:   cir.br ^[[EXIT_CLEANUP_SWITCH:.*]]
+// CIR-FLAT: ^[[EXIT_CLEANUP_SWITCH]]:
+// CIR-FLAT:   %[[LOAD_DEST_SLOT:.*]] = cir.load %[[CLEANUP_DEST_SLOT]]
+// CIR-FLAT:   cir.switch.flat %[[LOAD_DEST_SLOT]] : !s32i, 
^[[DEFAULT_EXIT:.*]] [
+// CIR-FLAT:     0: ^[[EXIT1:.*]],
+// CIR-FLAT:     1: ^[[EXIT2:.*]],
+// CIR-FLAT:     2: ^[[EXIT3:.*]],
+// CIR-FLAT:     3: ^[[EXIT4:.*]]
+// CIR-FLAT:   ]
+// CIR-FLAT: ^[[EXIT1]]:
+// CIR-FLAT:   cir.br ^[[TO_RET:.*]]
+// CIR-FLAT: ^[[EXIT2]]:
+// CIR-FLAT:   cir.br ^[[TO_RET]]
+// CIR-FLAT: ^[[EXIT3]]:
+// CIR-FLAT:   cir.br ^[[TO_RET]]
+// CIR-FLAT: ^[[EXIT4]]:
+// CIR-FLAT:   cir.br ^[[TO_RET]]
+// CIR-FLAT: ^[[DEFAULT_EXIT]]:
+// CIR-FLAT:   cir.unreachable
+// CIR-FLAT: ^[[TO_RET]]:
+// CIR-FLAT:   cir.br ^[[CORO_RET]]
+// CIR-FLAT: ^[[CORO_RET]]:
+// CIR-FLAT:   %58 = cir.const #cir.int<1> : !s32i
+// CIR-FLAT:   cir.store %58, %5 : !s32i, !cir.ptr<!s32i>
+// CIR-FLAT:   %[[NULLPTR6:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!void>
+// CIR-FLAT:   %[[UNWIND:.*]] = cir.const #false
+// CIR-FLAT:   cir.call @__builtin_coro_end(%[[NULLPTR6]], %[[UNWIND]])
+// CIR-FLAT:   cir.return
+// CIR-FLAT: }
+
+struct HasDtor {
+  ~HasDtor();
+};
+
+VoidTask silly_task_with_dtor() {
+  HasDtor local;
+  co_await std::suspend_always();
+}
+
+// CIR-FLAT: cir.func flatten-coroutine {{.*}} @_Z20silly_task_with_dtorv
+// CIR-FLAT:   %[[CLEANUP_DEST_SLOT:.*]] = cir.alloca "__cleanup_dest_slot"
+// CIR-FLAT:   %[[SuspendAlwaysAddr:.*]] = cir.alloca "ref.tmp0"
+
+// CIR-FLAT:   %[[ShouldSuspend:.*]] = cir.call 
@_ZNSt14suspend_always11await_readyEv(%[[SuspendAlwaysAddr]])
+// CIR-FLAT:   cir.brcond %[[ShouldSuspend]]  ^[[AWAIT_INIT_RESUME:.*]], 
^[[AWAIT_INIT_SUSPEND:.*]]
+// CIR-FLAT: ^[[AWAIT_INIT_SUSPEND]]:
+// CIR-FLAT:   %[[NULLPTR:.*]] = cir.const #cir.ptr<null>
+// CIR-FLAT:   %[[SAVE_TOKEN:.*]] = cir.call_llvm_intrinsic "llvm.coro.save" 
%[[NULLPTR]]
+// CIR-FLAT:   %[[IS_FINAL_SUSPEND:.*]] = cir.const #false
+// CIR-FLAT:   %[[SUSPEND_RESULT:.*]] = cir.call_llvm_intrinsic 
"llvm.coro.suspend" %[[SAVE_TOKEN]], %[[IS_FINAL_SUSPEND]]
+// CIR-FLAT:   cir.switch.flat %[[SUSPEND_RESULT]] : !u32i, ^[[CORO_RET:.*]] [
+// CIR-FLAT:     0: ^[[AWAIT_INIT_RESUME]],
+// CIR-FLAT:     1: ^[[INIT_CLEANUP_DESTROY:.*]]
+// CIR-FLAT:   ]
+// CIR-FLAT: ^[[INIT_CLEANUP_DESTROY]]:
+// CIR-FLAT:   cir.const #cir.int<0>
+// CIR-FLAT:   cir.store {{.*}}, %[[CLEANUP_DEST_SLOT]]
+// CIR-FLAT:   cir.br ^[[CLEANUP_CORO_FREE:.*]]
+// CIR-FLAT: ^[[AWAIT_INIT_RESUME]]:
+// CIR-FLAT:   cir.call 
@_ZNSt14suspend_always12await_resumeEv(%[[SuspendAlwaysAddr]])
+// CIR-FLAT:   cir.br ^[[AWAIT_INIT_RESUME_CONT:.*]]
+// CIR-FLAT: ^[[AWAIT_INIT_RESUME_CONT]]:
+// CIR-FLAT:   cir.br ^[[CORO_BODY:.*]]
+// CIR-FLAT: ^[[CORO_BODY]]:
+// CIR-FLAT:   cir.br ^[[HAS_DTOR_CLEANUP_SCOPE:.*]]
+// CIR-FLAT: ^[[HAS_DTOR_CLEANUP_SCOPE]]:
+// CIR-FLAT:   cir.br ^[[USER_AWAIT_READY:.*]]
+// CIR-FLAT: ^[[USER_AWAIT_READY]]:
+// CIR-FLAT:   %[[ShouldSuspend2:.*]] = cir.call 
@_ZNSt14suspend_always11await_readyEv
+// CIR-FLAT:   cir.brcond %[[ShouldSuspend2]] ^[[AWAIT_USER_RESUME:.*]], 
^[[AWAIT_USER_SUSPEND:.*]]
+// CIR-FLAT: ^[[AWAIT_USER_SUSPEND]]:
+// CIR-FLAT:   %[[NULLPTR2:.*]] = cir.const #cir.ptr<null>
+// CIR-FLAT:   %[[SAVE_TOKEN2:.*]] = cir.call_llvm_intrinsic "llvm.coro.save" 
%[[NULLPTR2]]
+// CIR-FLAT:   %[[IS_FINAL_SUSPEND2:.*]] = cir.const #false
+// CIR-FLAT:   %[[SUSPEND_RESULT2:.*]] = cir.call_llvm_intrinsic 
"llvm.coro.suspend" %[[SAVE_TOKEN2]], %[[IS_FINAL_SUSPEND2]]
+
+// The destroy branch cannot jump directly to the coroutine cleanup.
+// Since this await is nested within the lifetime of a local HasDtor object,
+// destruction must first run HasDtor::~HasDtor() before proceeding to the
+// coroutine-frame cleanup.
+//
+// Destroy path:
+// USER_CLEANUP_DESTROY -> HasDtor::~HasDtor() -> coro.free cleanup
+
+// CIR-FLAT:   cir.switch.flat %[[SUSPEND_RESULT2]] : !u32i, ^[[CORO_RET]] [
+// CIR-FLAT:     0: ^[[AWAIT_USER_RESUME]],
+// CIR-FLAT:     1: ^[[USER_CLEANUP_DESTROY:.*]]
+// CIR-FLAT:   ]
+// CIR-FLAT: ^[[USER_CLEANUP_DESTROY]]:
+// CIR-FLAT:   cir.const #cir.int<0>
+// CIR-FLAT:   cir.store {{.*}}, %[[CLEANUP_DEST_SLOT]]
+// CIR-FLAT:   cir.br ^[[HAS_DTR:.*]]
+// CIR-FLAT: ^[[AWAIT_USER_RESUME]]:
+// CIR-FLAT:   cir.call @_ZNSt14suspend_always12await_resumeEv
+// CIR-FLAT:   cir.br ^[[CO_RETURN:.*]]
+// CIR-FLAT: ^[[CO_RETURN]]:
+// CIR-FLAT:   cir.call @_ZN5folly4coro4TaskIvE12promise_type11return_voidEv
+// CIR-FLAT:   cir.const #cir.int<1>
+// CIR-FLAT:   cir.store {{.*}}, %[[CLEANUP_DEST_SLOT]]
+// CIR-FLAT:   cir.br ^[[HAS_DTR]]
+// CIR-FLAT: ^[[HAS_DTR]]:
+// CIR-FLAT:   cir.call @_ZN7HasDtorD1Ev
+// CIR-FLAT:   cir.br ^[[HAS_DTR_CONT:.*]]
+// CIR-FLAT: ^[[HAS_DTR_CONT]]:
+// CIR-FLAT:   %[[LOAD_DEST_SLOT:.*]] = cir.load %[[CLEANUP_DEST_SLOT]]
+// CIR-FLAT:   cir.switch.flat %[[LOAD_DEST_SLOT]] : !s32i, ^[[DEFAULT:.*]] [
+// CIR-FLAT:     0: ^[[FROM_USER_CLEANUP_DESTROY:.*]],
+// CIR-FLAT:     1: ^[[FROM_CO_RETURN:.*]]
+// CIR-FLAT:   ]
+// CIR-FLAT: ^[[FROM_USER_CLEANUP_DESTROY]]:
+// CIR-FLAT:   cir.br ^[[EXIT_TO_CORO_FREE:.*]]
+// CIR-FLAT: ^[[FROM_CO_RETURN]]:
+// CIR-FLAT:   cir.br ^[[FINAL_SUSPEND_BB:.*]]
+// CIR-FLAT: ^[[DEFAULT]]:
+// CIR-FLAT:   cir.unreachable
+// CIR-FLAT: ^[[EXIT_TO_CORO_FREE]]:
+// CIR-FLAT:   cir.const #cir.int<1>
+// CIR-FLAT:   cir.store {{.*}}, %[[CLEANUP_DEST_SLOT]]
+// CIR-FLAT:   cir.br ^[[CLEANUP_CORO_FREE]]
+// CIR-FLAT: ^[[FINAL_SUSPEND_BB:.*]]:
+// CIR-FLAT:   cir.call @_ZN5folly4coro4TaskIvE12promise_type13final_suspendEv
+// CIR-FLAT:   cir.br ^[[FINAL_AWAIT_READY_CONT:.*]]
+// CIR-FLAT:   cir.call @_ZNSt14suspend_always11await_readyEv
+// CIR-FLAT:   cir.brcond {{.*}} ^[[AWAIT_FINAL_RESUME:.*]], 
^[[AWAIT_FINAL_SUSPEND:.*]]
+// CIR-FLAT: ^[[AWAIT_FINAL_SUSPEND]]:
+// CIR-FLAT:   cir.call_llvm_intrinsic "llvm.coro.save"
+// CIR-FLAT:   cir.const #true
+// CIR-FLAT:   %[[SUSPEND_RESULT3:.*]] = cir.call_llvm_intrinsic 
"llvm.coro.suspend"
+// CIR-FLAT:   cir.switch.flat %[[SUSPEND_RESULT3]] : !u32i, ^[[CORO_RET]] [
+// CIR-FLAT:     0: ^[[AWAIT_FINAL_RESUME]],
+// CIR-FLAT:     1: ^[[FINAL_CLEANUP_DESTROY:.*]]
+// CIR-FLAT:   ]
+// CIR-FLAT: ^[[FINAL_CLEANUP_DESTROY]]:
+// CIR-FLAT:   cir.const #cir.int<2>
+// CIR-FLAT:   cir.store {{.*}}, %[[CLEANUP_DEST_SLOT]]
+// CIR-FLAT:   cir.br ^bb28
+// CIR-FLAT: ^[[AWAIT_FINAL_RESUME]]:
+// CIR-FLAT:   cir.call @_ZNSt14suspend_always12await_resumeEv
+// CIR-FLAT:   cir.br ^[[CORO_BODY_EXIT:.*]]
+// CIR-FLAT: ^[[CORO_BODY_EXIT]]:
+// CIR-FLAT:   cir.const #cir.int<3> : !s32i
+// CIR-FLAT:   cir.store {{.*}}, %[[CLEANUP_DEST_SLOT]]
+// CIR-FLAT:   cir.br ^[[CLEANUP_CORO_FREE]]
+// CIR-FLAT: ^[[CLEANUP_CORO_FREE]]:
+// CIR-FLAT:   %[[SHOULD_FREE:.*]] = cir.call @__builtin_coro_free
+// CIR-FLAT:   %[[NULLPTR5:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!void>
+// CIR-FLAT:   %[[NEEDS_FREE:.*]] = cir.cmp ne %[[SHOULD_FREE]], %[[NULLPTR5]]
+// CIR-FLAT:   cir.brcond %[[NEEDS_FREE]] ^[[FREE_FRAME:.*]], 
^[[EXIT_CLEANUP:.*]]
+// CIR-FLAT: ^[[FREE_FRAME]]:
+// CIR-FLAT:   %60 = cir.call @__builtin_coro_size() : () -> (!u64i 
{llvm.noundef})
+// CIR-FLAT:   cir.call @_ZdlPvm(%57, %60) nothrow : (!cir.ptr<!void> 
{llvm.noundef}, !u64i {llvm.noundef}) -> ()
+// CIR-FLAT:   cir.br ^[[EXIT_CLEANUP]]
+// CIR-FLAT: ^[[EXIT_CLEANUP]]:
+// CIR-FLAT:   cir.br ^[[EXIT_CLEANUP_SWITCH:.*]]
+// CIR-FLAT: ^[[EXIT_CLEANUP_SWITCH]]:
+// CIR-FLAT:   %[[LOAD_DEST_SLOT:.*]] = cir.load %[[CLEANUP_DEST_SLOT]]
+// CIR-FLAT:   cir.switch.flat %[[LOAD_DEST_SLOT:.*]]
+// CIR-FLAT: ^[[CORO_RET]]:
+// CIR-FLAT:   cir.return
+
+folly::coro::Task<int> co_returns(int flag) {
+  if (flag == 1) {
+    co_return 1;
+  } else if (flag == 2) {
+    co_return 2;
+  }
+  co_return 3;
+}
+
+// CIR-FLAT: cir.func flatten-coroutine {{.*}} @_Z10co_returnsi
+// CIR-FLAT:   %[[CLEANUP_DEST_SLOT:.*]] = cir.alloca "__cleanup_dest_slot"
+// CIR-FLAT:   %[[SUSPEND_POINT:.*]] = cir.alloca "__coroutine_suspend_point"
+
+// CIR-FLAT:   cir.call @_ZNSt14suspend_always12await_resumeEv
+// CIR-FLAT:   %[[LOAD_FLAG:.*]] = cir.load {{.*}} %[[FLAG:.*]]
+// CIR-FLAT:   %[[ONE:.*]] = cir.const #cir.int<1>
+// CIR-FLAT:   %[[EQ_ONE:.*]] = cir.cmp eq %[[LOAD_FLAG]], %[[ONE]]
+// CIR-FLAT:   cir.brcond %[[EQ_ONE]] ^[[IF_EQ_ONE:.*]], ^[[ELSE_IF:.*]]
+// CIR-FLAT: ^[[IF_EQ_ONE]]:
+// CIR-FLAT:   %[[ONE:.*]] = cir.const #cir.int<1>
+// CIR-FLAT:   cir.call 
@_ZN5folly4coro4TaskIiE12promise_type12return_valueEi(%[[PROMISE:.*]], %[[ONE]])
+// CIR-FLAT:   cir.br ^[[FINAL_SUSPEND_BB:.*]]
+// CIR-FLAT: ^[[ELSE_IF]]:
+// CIR-FLAT:   cir.br ^[[ELSE_IF_CONT:.*]]
+// CIR-FLAT: ^[[ELSE_IF_CONT:.*]]:
+// CIR-FLAT:   %[[LOAD_FLAG2:.*]] = cir.load {{.*}} %[[FLAG]]
+// CIR-FLAT:   %[[TWO:.*]] = cir.const #cir.int<2>
+// CIR-FLAT:   %[[EQ_TWO:.*]] = cir.cmp eq %[[LOAD_FLAG2]], %[[TWO]]
+// CIR-FLAT:   cir.brcond %[[EQ_TWO]] ^[[IF_EQ_TWO:.*]], ^[[ELSE:.*]]
+// CIR-FLAT: ^[[IF_EQ_TWO]]:
+// CIR-FLAT:   %[[TWO:.*]] = cir.const #cir.int<2>
+// CIR-FLAT:   cir.call 
@_ZN5folly4coro4TaskIiE12promise_type12return_valueEi(%[[PROMISE]], %[[TWO]])
+// CIR-FLAT:   cir.br ^[[FINAL_SUSPEND_BB]]
+// CIR-FLAT: ^[[ELSE]]:
+// CIR-FLAT:   cir.br ^[[ELSE_CONT:.*]]
+// CIR-FLAT: ^[[ELSE_CONT]]:
+// CIR-FLAT:   %[[THREE:.*]] = cir.const #cir.int<3>
+// CIR-FLAT:   cir.call 
@_ZN5folly4coro4TaskIiE12promise_type12return_valueEi(%[[PROMISE]], %[[THREE]])
+// CIR-FLAT:   cir.br ^[[FINAL_SUSPEND_BB]]
+// CIR-FLAT: ^[[FINAL_SUSPEND_BB]]:
+// CIR-FLAT:   cir.call @_ZN5folly4coro4TaskIiE12promise_type13final_suspendEv
+
+
+
+folly::coro::Task<int> co_return_with_dtor(int flag) {
+  HasDtor local;
+  if (flag)
+    co_return 1;        // local dtor must run here
+  co_return 2;
+}
+
+// CIR-FLAT:  cir.func flatten-coroutine {{.*}} @_Z19co_return_with_dtori
+// CIR-FLAT:  %[[CLEANUP_DEST_SLOT:.*]] = cir.alloca "__cleanup_dest_slot"
+// CIR-FLAT:  %[[SUSPEND_POINT:.*]] = cir.alloca "__coroutine_suspend_point"
+// CIR-FLAT:  %[[ShouldSuspend:.*]] = cir.call 
@_ZNSt14suspend_always11await_readyEv
+// CIR-FLAT:  cir.brcond %[[ShouldSuspend]] ^[[AWAIT_INIT_RESUME:.*]], 
^[[AWAIT_INIT_SUSPEND:.*]]
+// CIR-FLAT: ^[[AWAIT_INIT_SUSPEND]]:
+// CIR-FLAT:   cir.switch.flat {{.*}} : !u32i, ^[[CORO_RET:.*]] [
+// CIR-FLAT:     0: ^[[AWAIT_INIT_RESUME]],
+// CIR-FLAT:     1: ^[[INIT_CLEANUP_DESTROY:.*]]
+// CIR-FLAT:   ]
+// CIR-FLAT: ^[[INIT_CLEANUP_DESTROY]]:
+// CIR-FLAT:   cir.const #cir.int<0> : !s32i
+// CIR-FLAT:   cir.store {{.*}}, %[[CLEANUP_DEST_SLOT]]
+// CIR-FLAT:   cir.br ^[[CLEANUP_CORO_FREE:.*]]
+// CIR-FLAT: ^[[AWAIT_INIT_RESUME]]:
+// CIR-FLAT:   cir.call @_ZNSt14suspend_always12await_resumeEv(%10)
+// CIR-FLAT:   cir.br ^[[CORO_BODY:.*]]
+// CIR-FLAT: ^[[CORO_BODY]]:
+// CIR-FLAT:   cir.br ^[[HAS_DTR_CLEANUP_SCOPE:.*]]
+// CIR-FLAT: ^[[HAS_DTR_CLEANUP_SCOPE]]:
+// CIR-FLAT:   cir.br ^[[IF_SCOPE:.*]]
+// CIR-FLAT: ^[[IF_SCOPE]]:
+// CIR-FLAT:   cir.br ^[[IF_SCOPE_CONT:.*]]
+// CIR-FLAT: ^[[IF_SCOPE_CONT]]:
+// CIR-FLAT:   %[[LOAD_FLAG:.*]] = cir.load {{.*}} %[[FLAG_ARG:.*]]
+// CIR-FLAT:   %[[CAST_TO_BOOL:.*]] = cir.cast int_to_bool %[[LOAD_FLAG]]
+// CIR-FLAT:   cir.brcond %[[CAST_TO_BOOL]] ^[[IF_BODY:.*]], ^[[IF_CONT:.*]]
+// CIR-FLAT: ^[[IF_BODY]]:
+// CIR-FLAT:   %[[ONE:.*]] = cir.const #cir.int<1>
+// CIR-FLAT:   cir.call 
@_ZN5folly4coro4TaskIiE12promise_type12return_valueEi(%[[PROMISE:.*]], %[[ONE]])
+// CIR-FLAT:   cir.const #cir.int<0>
+// CIR-FLAT:   cir.store {{.*}}, %[[CLEANUP_DEST_SLOT]]
+
+// This cir.co_return exits through the cleanup path. Control is transferred
+// to the destructor cleanup block before reaching the final suspend point.
+// co_return -> cleanup -> final suspend
+// CIR-FLAT:   cir.br ^[[HAS_DTOR_CLEANUP:.*]]
+// CIR-FLAT: ^[[IF_CONT]]:
+// CIR-FLAT:   %[[TWO:.*]] = cir.const #cir.int<2>
+// CIR-FLAT:   cir.call 
@_ZN5folly4coro4TaskIiE12promise_type12return_valueEi(%[[PROMISE]], %[[TWO]])
+// CIR-FLAT:   cir.const #cir.int<1>
+// CIR-FLAT:   cir.store {{.*}}, %[[CLEANUP_DEST_SLOT]]
+
+// This cir.co_return exits through the cleanup path. Control is transferred
+// to the destructor cleanup block before reaching the final suspend point.
+// co_return -> cleanup -> final suspend
+// CIR-FLAT:   cir.br ^[[HAS_DTOR_CLEANUP]]
+// CIR-FLAT: ^[[HAS_DTOR_CLEANUP]]:
+// CIR-FLAT:   cir.call @_ZN7HasDtorD1Ev
+// CIR-FLAT:   cir.br ^[[CLEANUP_EXIT:.*]]
+// CIR-FLAT: ^[[CLEANUP_EXIT]]:
+// CIR-FLAT:   %[[LOAD_CLEANUP:.*]] = cir.load %[[CLEANUP_DEST_SLOT]]
+// CIR-FLAT:   cir.switch.flat %[[LOAD_CLEANUP:.*]] : !s32i, ^[[DEFAULT:.*]] [
+// CIR-FLAT:     0: ^[[EXIT1:.*]],
+// CIR-FLAT:     1: ^[[EXIT2:.*]]
+// CIR-FLAT:   ]
+// CIR-FLAT: ^[[EXIT1]]:
+// CIR-FLAT:   cir.br ^[[FINAL_SUSPEND_BB:.*]]
+// CIR-FLAT: ^[[EXIT2]]:
+// CIR-FLAT:   cir.br ^[[FINAL_SUSPEND_BB]]
+// CIR-FLAT: ^[[DEFAULT]]:
+// CIR-FLAT:   cir.unreachable
+// CIR-FLAT: ^[[FINAL_SUSPEND_BB]]:
+// CIR-FLAT:   cir.call @_ZN5folly4coro4TaskIiE12promise_type13final_suspendEv
+// CIR-FLAT:   %[[ShouldSuspend2:.*]] = cir.call 
@_ZNSt14suspend_always11await_readyEv
+// CIR-FLAT:   cir.brcond %[[ShouldSuspend2]] ^[[AWAIT_FINAL_RESUME:.*]], 
^[[AWAIT_FINAL_SUSPEND:.*]]
+// CIR-FLAT: ^[[AWAIT_FINAL_SUSPEND]]:
+// CIR-FLAT:   %[[IS_FINAL_SUSPEND:.*]] = cir.const #true
+// CIR-FLAT:   cir.call_llvm_intrinsic "llvm.coro.suspend" {{.*}}, 
%[[IS_FINAL_SUSPEND]]
+// CIR-FLAT:   cir.switch.flat {{.*}} : !u32i, ^[[CORO_RET]] [
+// CIR-FLAT:     0: ^[[AWAIT_FINAL_RESUME]],
+// CIR-FLAT:     1: ^[[FINAL_CLEANUP_DESTROY:.*]]
+// CIR-FLAT:   ]
+// CIR-FLAT: ^[[FINAL_CLEANUP_DESTROY]]:
+// CIR-FLAT:   cir.const #cir.int<2>
+// CIR-FLAT:   cir.store {{.*}}, %[[CLEANUP_DEST_SLOT]]
+// CIR-FLAT:   cir.br ^[[CLEANUP_CORO_FREE]]
+// CIR-FLAT: ^[[AWAIT_FINAL_RESUME]]:
+// CIR-FLAT:   cir.call @_ZNSt14suspend_always12await_resumeEv
+
+// CIR-FLAT: ^[[CLEANUP_CORO_FREE]]:
+// CIR-FLAT:   cir.call @__builtin_coro_free
+// CIR-FLAT:   cir.const #cir.ptr<null>
+// CIR-FLAT:   cir.brcond %{{.*}} ^[[FREE_FRAME:.*]], ^[[EXIT_CLEANUP:.*]]
+// CIR-FLAT: ^[[FREE_FRAME]]:
+// CIR-FLAT:   cir.call @__builtin_coro_size()
+// CIR-FLAT:   cir.call @_ZdlPvm
diff --git a/clang/test/CIR/CodeGen/coro-task.cpp 
b/clang/test/CIR/CodeGenCoroutines/coro-task.cpp
similarity index 91%
rename from clang/test/CIR/CodeGen/coro-task.cpp
rename to clang/test/CIR/CodeGenCoroutines/coro-task.cpp
index 8d093180ae307..5236bac065dae 100644
--- a/clang/test/CIR/CodeGen/coro-task.cpp
+++ b/clang/test/CIR/CodeGenCoroutines/coro-task.cpp
@@ -3,123 +3,7 @@
 // RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm  
-disable-llvm-passes %s -o %t-cir.ll
 // RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=OGCG
 
-namespace std {
-
-template<typename T> struct remove_reference       { typedef T type; };
-template<typename T> struct remove_reference<T &>  { typedef T type; };
-template<typename T> struct remove_reference<T &&> { typedef T type; };
-
-template<typename T>
-typename remove_reference<T>::type &&move(T &&t) noexcept;
-
-template <class Ret, typename... T>
-struct coroutine_traits { using promise_type = typename Ret::promise_type; };
-
-template <class Promise = void>
-struct coroutine_handle {
-  static coroutine_handle from_address(void *) noexcept;
-};
-template <>
-struct coroutine_handle<void> {
-  template <class PromiseType>
-  coroutine_handle(coroutine_handle<PromiseType>) noexcept;
-  static coroutine_handle from_address(void *);
-};
-
-struct suspend_always {
-  bool await_ready() noexcept { return false; }
-  void await_suspend(coroutine_handle<>) noexcept {}
-  void await_resume() noexcept {}
-};
-
-struct suspend_never {
-  bool await_ready() noexcept { return true; }
-  void await_suspend(coroutine_handle<>) noexcept {}
-  void await_resume() noexcept {}
-};
-
-struct string {
-  int size() const;
-  string();
-  string(char const *s);
-};
-
-template<typename T>
-struct optional {
-  optional();
-  optional(const T&);
-  T &operator*() &;
-  T &&operator*() &&;
-  T &value() &;
-  T &&value() &&;
-};
-} // namespace std
-
-namespace folly {
-namespace coro {
-
-using std::suspend_always;
-using std::suspend_never;
-using std::coroutine_handle;
-
-using SemiFuture = int;
-
-template<class T>
-struct Task {
-    struct promise_type {
-        Task<T> get_return_object() noexcept;
-        suspend_always initial_suspend() noexcept;
-        suspend_always final_suspend() noexcept;
-        void return_value(T);
-        void unhandled_exception();
-        auto yield_value(Task<T>) noexcept { return final_suspend(); }
-    };
-    bool await_ready() noexcept { return false; }
-    void await_suspend(coroutine_handle<>) noexcept {}
-    T await_resume();
-};
-
-template<>
-struct Task<void> {
-    struct promise_type {
-        Task<void> get_return_object() noexcept;
-        suspend_always initial_suspend() noexcept;
-        suspend_always final_suspend() noexcept;
-        void return_void() noexcept;
-        void unhandled_exception() noexcept;
-        auto yield_value(Task<void>) noexcept { return final_suspend(); }
-    };
-    bool await_ready() noexcept { return false; }
-    void await_suspend(coroutine_handle<>) noexcept {}
-    void await_resume() noexcept {}
-    SemiFuture semi();
-};
-
-// FIXME: add CIRGen support here.
-// struct blocking_wait_fn {
-//   template <typename T>
-//   T operator()(Task<T>&& awaitable) const {
-//     return T();
-//   }
-// };
-
-// inline constexpr blocking_wait_fn blocking_wait{};
-// static constexpr blocking_wait_fn const& blockingWait = blocking_wait;
-template <typename T>
-T blockingWait(Task<T>&& awaitable) {
-  return T();
-}
-
-struct co_invoke_fn {
-  template <typename F, typename... A>
-  Task<void> operator()(F&& f, A&&... a) const {
-    return Task<void>();
-  }
-};
-
-co_invoke_fn co_invoke;
-
-}} // namespace folly::coro
+#include "Inputs/coroutine.h"
 
 // CIR-DAG: ![[VoidTask:.*]] = !cir.struct<"folly::coro::Task<void>" padded 
{!u8i}>
 // CIR-DAG: ![[IntTask:.*]] = !cir.struct<"folly::coro::Task<int>" padded 
{!u8i}>
@@ -151,11 +35,7 @@ VoidTask silly_task() {
 
 // CIR: cir.func coroutine {{.*}} @_Z10silly_taskv() -> ![[VoidTask]]
 // CIR: %[[VoidTaskAddr:.*]] = cir.alloca "__retval" {{.*}} : 
!cir.ptr<![[VoidTask]]>
-// CIR: %[[SavedFrameAddr:.*]] = cir.alloca "__coro_frame_addr" {{.*}} : 
!cir.ptr<!cir.ptr<!void>>
-// CIR: %[[VoidPromisseAddr:.*]] = cir.alloca "__promise" {{.*}} : 
!cir.ptr<![[VoidPromisse]]>
-// CIR: %[[SuspendAlwaysAddr:.*]] = cir.alloca "ref.tmp0" {{.*}} : 
!cir.ptr<![[SuspendAlways]]>
-// CIR: %[[CoroHandleVoidAddr:.*]] = cir.alloca "agg.tmp0" {{.*}} : 
!cir.ptr<![[CoroHandleVoid]]>
-// CIR: %[[CoroHandlePromiseAddr:.*]] = cir.alloca "agg.tmp1" {{.*}} : 
!cir.ptr<![[CoroHandlePromiseVoid]]>
+
 
 // OGCG: %[[VoidPromisseAddr:.*]] = alloca %[[VoidPromisse]], align 1
 // OGCG: %[[VoidTaskAddr:.*]] = alloca %[[VoidTask]], align 1
@@ -166,6 +46,12 @@ VoidTask silly_task() {
 // CIR: %[[NullPtr:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!void>
 // CIR: %[[Align:.*]] = cir.const #cir.int<16> : !u32i
 // CIR: %[[CoroId:.*]] = cir.call @__builtin_coro_id(%[[Align]], %[[NullPtr]], 
%[[NullPtr]], %[[NullPtr]])
+// CIR: %[[CORO_SUSPEND:.*]] = cir.alloca "__coroutine_suspend_point"
+// CIR: %[[SavedFrameAddr:.*]] = cir.alloca "__coro_frame_addr" {{.*}} : 
!cir.ptr<!cir.ptr<!void>>
+// CIR: %[[VoidPromisseAddr:.*]] = cir.alloca "__promise" {{.*}} : 
!cir.ptr<![[VoidPromisse]]>
+// CIR: %[[SuspendAlwaysAddr:.*]] = cir.alloca "ref.tmp0" {{.*}} : 
!cir.ptr<![[SuspendAlways]]>
+// CIR: %[[CoroHandleVoidAddr:.*]] = cir.alloca "agg.tmp0" {{.*}} : 
!cir.ptr<![[CoroHandleVoid]]>
+// CIR: %[[CoroHandlePromiseAddr:.*]] = cir.alloca "agg.tmp1" {{.*}} : 
!cir.ptr<![[CoroHandlePromiseVoid]]>
 
 // OGCG: %[[CoroId:.*]] = call token @llvm.coro.id(i32 16, ptr 
%[[VoidPromisseAddr]], ptr null, ptr null)
 
@@ -353,13 +239,13 @@ folly::coro::Task<int> byRef(const std::string& s) {
 // CIR:  cir.func coroutine {{.*}} @_Z5byRefRKSt6string(%[[ARG:.*]]: 
!cir.ptr<![[StdString]]> {{.*}}) -> ![[IntTask]]
 // CIR:    %[[AllocaParam:.*]] = cir.alloca "s" {{.*}} init const : 
!cir.ptr<!cir.ptr<![[StdString]]>>
 // CIR:    %[[IntTaskAddr:.*]] = cir.alloca "__retval" {{.*}} : 
!cir.ptr<![[IntTask]]>
+// CIR:    cir.store %[[ARG]], %[[AllocaParam]] : !cir.ptr<![[StdString]]>, 
{{.*}}
 // CIR:    %[[SavedFrameAddr:.*]]  = cir.alloca "__coro_frame_addr" {{.*}} : 
!cir.ptr<!cir.ptr<!void>>
 // CIR:    %[[AllocaFnUse:.*]] = cir.alloca "s" {{.*}} init const : 
!cir.ptr<!cir.ptr<![[StdString]]>>
 // CIR:    %[[IntPromisseAddr:.*]] = cir.alloca "__promise" {{.*}} : 
!cir.ptr<![[IntPromisse]]>
 // CIR:    %[[SuspendAlwaysAddr:.*]] = cir.alloca "ref.tmp0" {{.*}} : 
!cir.ptr<![[SuspendAlways]]>
 // CIR:    %[[CoroHandleVoidAddr:.*]] = cir.alloca "agg.tmp0" {{.*}} : 
!cir.ptr<![[CoroHandleVoid]]>
 // CIR:    %[[CoroHandlePromiseAddr:.*]] = cir.alloca "agg.tmp1" {{.*}} : 
!cir.ptr<![[CoroHandlePromiseInt]]>
-// CIR:    cir.store %[[ARG]], %[[AllocaParam]] : !cir.ptr<![[StdString]]>, 
{{.*}}
 
 // CIR:    cir.cleanup.scope {
 // Call promise.get_return_object() to retrieve the task object.

>From 4b57bb9df7c00a70c53248d3941a3dc7ab8b3672 Mon Sep 17 00:00:00 2001
From: Andres Salamanca <[email protected]>
Date: Tue, 16 Jun 2026 21:02:57 -0500
Subject: [PATCH 2/2] Address some review comments

---
 clang/include/clang/CIR/Dialect/IR/CIROps.td  |  2 +-
 clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp     |  2 +-
 clang/lib/CIR/Dialect/IR/CIRDialect.cpp       | 14 ++--
 .../lib/CIR/Dialect/Transforms/FlattenCFG.cpp | 67 +++++++++++++++----
 .../CIR/CodeGenCoroutines/coro-flatten.cpp    |  8 +--
 5 files changed, 67 insertions(+), 26 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td 
b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 5666e446a5dea..cef7feaf99c67 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -3915,7 +3915,7 @@ def CIR_FuncOp : CIR_Op<"func", [
     TypeAttrOf<CIR_FuncType>:$function_type,
     UnitAttr:$builtin,
     UnitAttr:$coroutine,
-    UnitAttr:$flatten_coroutine,
+    UnitAttr:$flattened_coroutine,
     OptionalAttr<CIR_InlineKind>:$inline_kind,
     UnitAttr:$lambda,
     UnitAttr:$no_proto,
diff --git a/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp 
b/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp
index c2da4407cc533..0819690d16ff2 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp
@@ -529,7 +529,7 @@ CIRGenFunction::emitCoroutineBody(const CoroutineBodyStmt 
&s) {
   assert(!cir::MissingFeatures::coroutineGroManager());
 
   cir::StoreOp::create(builder, openCurlyLoc,
-                       builder.getSignedInt(openCurlyLoc, 1, 32),
+                       builder.getSInt32(1, openCurlyLoc),
                        curCoro.data->suspendPoint, false, {} /*alignment*/,
                        {} /*sync_scope*/, {} /*mem_order*/);
 
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp 
b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index 3008b785ca00b..15cbe97748bec 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -2335,8 +2335,8 @@ ParseResult cir::FuncOp::parse(OpAsmParser &parser, 
OperationState &state) {
 
   mlir::StringAttr builtinNameAttr = getBuiltinAttrName(state.name);
   mlir::StringAttr coroutineNameAttr = getCoroutineAttrName(state.name);
-  mlir::StringAttr flattenCoroutineNameAttr =
-      getFlattenCoroutineAttrName(state.name);
+  mlir::StringAttr flattenedCoroutineNameAttr =
+      getFlattenedCoroutineAttrName(state.name);
   mlir::StringAttr inlineKindNameAttr = getInlineKindAttrName(state.name);
   mlir::StringAttr lambdaNameAttr = getLambdaAttrName(state.name);
   mlir::StringAttr noProtoNameAttr = getNoProtoAttrName(state.name);
@@ -2351,8 +2351,8 @@ ParseResult cir::FuncOp::parse(OpAsmParser &parser, 
OperationState &state) {
           parser.parseOptionalKeyword(coroutineNameAttr.strref())))
     state.addAttribute(coroutineNameAttr, parser.getBuilder().getUnitAttr());
   if (::mlir::succeeded(
-          parser.parseOptionalKeyword(flattenCoroutineNameAttr.strref())))
-    state.addAttribute(flattenCoroutineNameAttr,
+          parser.parseOptionalKeyword(flattenedCoroutineNameAttr.strref())))
+    state.addAttribute(flattenedCoroutineNameAttr,
                        parser.getBuilder().getUnitAttr());
   // Parse optional inline kind attribute
   cir::InlineKindAttr inlineKindAttr;
@@ -2662,8 +2662,8 @@ void cir::FuncOp::print(OpAsmPrinter &p) {
   if (getCoroutine())
     p << " coroutine";
 
-  if (getFlattenCoroutine())
-    p << " flatten-coroutine";
+  if (getFlattenedCoroutine())
+    p << " flattened_coroutine";
 
   printInlineKindAttr(p, getInlineKindAttr());
 
@@ -3202,7 +3202,7 @@ cir::CoroBodyOp::getSuccessorInputs(RegionSuccessor 
successor) {
 
 LogicalResult cir::CoroBodyOp::verify() {
   auto funcOp = getOperation()->getParentOfType<FuncOp>();
-  if (!funcOp.getCoroutine() && !funcOp.getFlattenCoroutine())
+  if (!funcOp.getCoroutine() && !funcOp.getFlattenedCoroutine())
     return emitOpError("enclosing function must be a coroutine");
   return success();
 }
diff --git a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp 
b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp
index 2823da3e1e33e..ea9f76f5101c4 100644
--- a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp
@@ -1842,10 +1842,37 @@ class CIRAwaitOpFlattening : public 
mlir::OpRewritePattern<cir::AwaitOp> {
 public:
   using OpRewritePattern<cir::AwaitOp>::OpRewritePattern;
 
+  // Flatten `cir.await` into the following control flow:
+  //
+  //                    awaitBlock
+  //                        |
+  //                        v
+  //                      ready
+  //                     /     \
+  //                    /       \
+  //                   v         v
+  //                resume    suspend
+  //                   |          |
+  //                   |   llvm.coro.suspend
+  //                   |          |
+  //                   |     +----+----+
+  //                   |     |    |    |
+  //                   |     |    |    v
+  //                   |     |    | suspend point
+  //                   |     |    |
+  //                   |     |    v
+  //                   |     | destroy
+  //                   |     |
+  //                   +--> resume
+  //                          |
+  //                          v
+  //                    continuation
   mlir::LogicalResult
   matchAndRewrite(cir::AwaitOp awaitOp,
                   mlir::PatternRewriter &rewriter) const override {
     mlir::Block *awaitBlock = rewriter.getInsertionBlock();
+    // Split the current block before the AwaitOp to create the inlining
+    // point.
     mlir::Block *remainingOpsBlock =
         rewriter.splitBlock(awaitBlock, rewriter.getInsertionPoint());
 
@@ -1860,6 +1887,9 @@ class CIRAwaitOpFlattening : public 
mlir::OpRewritePattern<cir::AwaitOp> {
     {
       mlir::OpBuilder::InsertionGuard guard(rewriter);
       rewriter.setInsertionPoint(conditionOp);
+      // The condition is the result of `await_ready()`. If true, execution
+      // continues in the resume region otherwise, control transfers to the
+      // suspend region.
       rewriter.replaceOpWithNewOp<cir::BrCondOp>(
           conditionOp, conditionOp.getCondition(), &resumeRegion.front(),
           &suspendRegion.front());
@@ -1869,6 +1899,8 @@ class CIRAwaitOpFlattening : public 
mlir::OpRewritePattern<cir::AwaitOp> {
     {
       mlir::OpBuilder::InsertionGuard guard(rewriter);
       rewriter.setInsertionPointToEnd(awaitBlock);
+      // After inlining the ready region, branch from the original await block
+      // to the beginning of the inlined ready-region
       cir::BrOp::create(rewriter, loc, mlir::ValueRange(), &beforeReady);
     }
 
@@ -1885,12 +1917,16 @@ class CIRAwaitOpFlattening : public 
mlir::OpRewritePattern<cir::AwaitOp> {
       auto nullPtr = cir::ConstantOp::create(
           rewriter, loc,
           cir::ConstPtrAttr::get(voidPtrTy, rewriter.getI64IntegerAttr(0)));
+      auto int32Ty = cir::IntType::get(getContext(), 32, false);
       auto coroSaveIntri = cir::LLVMIntrinsicCallOp::create(
-          rewriter, loc, mlir::StringAttr::get(getContext(), "llvm.coro.save"),
-          cir::IntType::get(getContext(), 32, false),
+          rewriter, loc, rewriter.getStringAttr("llvm.coro.save"), int32Ty,
           mlir::ValueRange{nullPtr});
       rewriter.setInsertionPoint(suspendYield);
 
+      // The second argument to `llvm.coro.suspend` indicates whether this is
+      // the final suspend point. Coroutines suspended at a final suspend point
+      // are considered done (`llvm.coro.done` returns true) and may only be
+      // destroyed resuming them is undefined behavior.
       bool isFinalSuspend = awaitOp.getKind() == cir::AwaitKind::Final;
       auto isFinalCoroSuspend = cir::ConstantOp::create(
           rewriter, loc, cir::BoolAttr::get(getContext(), isFinalSuspend));
@@ -1901,8 +1937,7 @@ class CIRAwaitOpFlattening : public 
mlir::OpRewritePattern<cir::AwaitOp> {
       //   1 : coroutine destroyed
       coroSuspendIntri = cir::LLVMIntrinsicCallOp::create(
           rewriter, loc,
-          mlir::StringAttr::get(getContext(), "llvm.coro.suspend"),
-          cir::IntType::get(getContext(), 32, false),
+          mlir::StringAttr::get(getContext(), "llvm.coro.suspend"), int32Ty,
           mlir::ValueRange{coroSaveIntri.getResult(), isFinalCoroSuspend});
     }
     rewriter.inlineRegionBefore(suspendRegion, remainingOpsBlock);
@@ -1912,13 +1947,10 @@ class CIRAwaitOpFlattening : public 
mlir::OpRewritePattern<cir::AwaitOp> {
     {
       mlir::OpBuilder::InsertionGuard guard(rewriter);
       rewriter.setInsertionPoint(suspendYield);
-      llvm::SmallVector<mlir::APInt, 2> caseValues{mlir::APInt(32, 0),
-                                                   mlir::APInt(32, 1)};
-
-      llvm::SmallVector<mlir::ValueRange, 8> caseOperands{
-          mlir::ValueRange(), mlir::ValueRange(), mlir::ValueRange()};
+      llvm::SmallVector<mlir::APInt> caseValues{mlir::APInt(32, 0),
+                                                mlir::APInt(32, 1)};
 
-      llvm::SmallVector<mlir::Block *, 8> caseDestinations;
+      llvm::SmallVector<mlir::ValueRange> caseOperands{3};
 
       // In Classic CodeGen, the destroy path reaches the coroutine cleanup by
       // emitting an EmitBranchThroughCleanup(), ensuring that all nested
@@ -1935,13 +1967,17 @@ class CIRAwaitOpFlattening : public 
mlir::OpRewritePattern<cir::AwaitOp> {
         cleanupBlock = rewriter.createBlock(remainingOpsBlock);
         cir::YieldOp::create(rewriter, loc);
       }
+      // The destroy case dispatches to cleanupBlock, which propagates through
+      // enclosing cleanup scopes before reaching the coroutine-frame cleanup.
+      llvm::SmallVector<mlir::Block *> caseDestinations{&resumeRegion.front(),
+                                                        cleanupBlock};
       caseDestinations.push_back(&resumeRegion.front());
       caseDestinations.push_back(cleanupBlock);
 
       assert(!cir::MissingFeatures::coroutineGroManager());
 
-      // Default destination must be de suspend BB (the return block or the pre
-      // gro conv)
+      // Dispatch to the appropriate coroutine path based on the result of
+      // `llvm.coro.suspend`: resume, destroy, or suspend.
       auto coroSuspendSwitch = cir::SwitchFlatOp::create(
           rewriter, loc, coroSuspendIntri.getResult(),
           getOrCreateBlockForSuspendPoint(func, rewriter, loc),
@@ -1954,14 +1990,19 @@ class CIRAwaitOpFlattening : public 
mlir::OpRewritePattern<cir::AwaitOp> {
     {
       mlir::OpBuilder::InsertionGuard guard(rewriter);
       rewriter.setInsertionPoint(resumeYield);
+      // Once the coroutine resumes, continue with the operations that
+      // originally followed the await.
       rewriter.replaceOpWithNewOp<cir::BrOp>(resumeYield, remainingOpsBlock);
     }
     rewriter.inlineRegionBefore(resumeRegion, remainingOpsBlock);
 
     rewriter.eraseOp(awaitOp);
 
+    // The coroutine regions have been flattened, so the original coroutine
+    // attribute no longer verifies. Preserve the information that this 
function
+    // originated from a coroutine for LLVM lowering.
     func.setCoroutine(false);
-    func.setFlattenCoroutine(true);
+    func.setFlattenedCoroutine(true);
 
     return mlir::success();
   }
diff --git a/clang/test/CIR/CodeGenCoroutines/coro-flatten.cpp 
b/clang/test/CIR/CodeGenCoroutines/coro-flatten.cpp
index fc5ca2e4d43ba..58babff2760dd 100644
--- a/clang/test/CIR/CodeGenCoroutines/coro-flatten.cpp
+++ b/clang/test/CIR/CodeGenCoroutines/coro-flatten.cpp
@@ -10,7 +10,7 @@ VoidTask silly_task() {
   co_await std::suspend_always();
 }
 
-// CIR-FLAT: cir.func flatten-coroutine {{.*}} @_Z10silly_taskv
+// CIR-FLAT: cir.func flattened_coroutine {{.*}} @_Z10silly_taskv
 
 // CIR-FLAT: %[[CLEANUP_DEST_SLOT:.*]] = cir.alloca "__cleanup_dest_slot"
 // CIR-FLAT: %[[NullPtr:.*]] = cir.const #cir.ptr<null>
@@ -176,7 +176,7 @@ VoidTask silly_task_with_dtor() {
   co_await std::suspend_always();
 }
 
-// CIR-FLAT: cir.func flatten-coroutine {{.*}} @_Z20silly_task_with_dtorv
+// CIR-FLAT: cir.func flattened_coroutine {{.*}} @_Z20silly_task_with_dtorv
 // CIR-FLAT:   %[[CLEANUP_DEST_SLOT:.*]] = cir.alloca "__cleanup_dest_slot"
 // CIR-FLAT:   %[[SuspendAlwaysAddr:.*]] = cir.alloca "ref.tmp0"
 
@@ -306,7 +306,7 @@ folly::coro::Task<int> co_returns(int flag) {
   co_return 3;
 }
 
-// CIR-FLAT: cir.func flatten-coroutine {{.*}} @_Z10co_returnsi
+// CIR-FLAT: cir.func flattened_coroutine {{.*}} @_Z10co_returnsi
 // CIR-FLAT:   %[[CLEANUP_DEST_SLOT:.*]] = cir.alloca "__cleanup_dest_slot"
 // CIR-FLAT:   %[[SUSPEND_POINT:.*]] = cir.alloca "__coroutine_suspend_point"
 
@@ -348,7 +348,7 @@ folly::coro::Task<int> co_return_with_dtor(int flag) {
   co_return 2;
 }
 
-// CIR-FLAT:  cir.func flatten-coroutine {{.*}} @_Z19co_return_with_dtori
+// CIR-FLAT:  cir.func flattened_coroutine {{.*}} @_Z19co_return_with_dtori
 // CIR-FLAT:  %[[CLEANUP_DEST_SLOT:.*]] = cir.alloca "__cleanup_dest_slot"
 // CIR-FLAT:  %[[SUSPEND_POINT:.*]] = cir.alloca "__coroutine_suspend_point"
 // CIR-FLAT:  %[[ShouldSuspend:.*]] = cir.call 
@_ZNSt14suspend_always11await_readyEv

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to