[clang] [CIR][OpenMP] Enable lowering of the OpenMP dialect to LLVM IR (PR #178515)

Jan Leyonberg via cfe-commits Wed, 04 Feb 2026 07:33:55 -0800

https://github.com/jsjodin updated 
https://github.com/llvm/llvm-project/pull/178515


>From ba8ef2f036e3f32a46021db19a8fbec197eb35e2 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <[email protected]>
Date: Wed, 28 Jan 2026 15:59:48 -0500
Subject: [PATCH 1/5] [CIR][OpenMP] Enable lowering of the OpenMP dialect to
 LLVM IR

This patch adds the OpenMP dialect to be part of the lowering to LLVM IR.
A couple of minor changes were made to compensate for not yet implemented
features.
---
 clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp    | 12 ++-
 clang/lib/CIR/CodeGen/CIRGenerator.cpp        |  4 +-
 .../CIR/Lowering/DirectToLLVM/CMakeLists.txt  |  1 +
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp |  2 +
 .../CIR/CodeGenOpenMP/not-yet-implemented.c   |  3 +-
 clang/test/CIR/CodeGenOpenMP/parallel.c       | 14 +++-
 clang/test/CIR/Lowering/omp.cir               | 80 +++++++++++++++++++
 clang/tools/cir-opt/cir-opt.cpp               |  3 +-
 clang/tools/cir-translate/cir-translate.cpp   |  4 +-
 9 files changed, 110 insertions(+), 13 deletions(-)
 create mode 100644 clang/test/CIR/Lowering/omp.cir

diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp 
b/clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp
index ee25f7caec619..0d3b44db98307 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp
@@ -14,7 +14,7 @@
 #include "CIRGenFunction.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "clang/AST/StmtOpenMP.h"
-
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
 using namespace clang;
 using namespace clang::CIRGen;
 
@@ -53,9 +53,13 @@ CIRGenFunction::emitOMPParallelDirective(const 
OMPParallelDirective &s) {
     if (s.getTaskReductionRefExpr())
       getCIRGenModule().errorNYI(s.getBeginLoc(),
                                  "OpenMP Parallel with Task Reduction");
-
-    res = emitStmt(s.getAssociatedStmt(), /*useCurrentScope=*/true);
-
+    // Don't lower the captured statement directly since this will be
+    // special-cased depending on the kind of OpenMP directive that is the
+    // parent, also the non-OpenMP context captured statements lowering does
+    // not apply directly.
+    const CapturedStmt *cs = s.getCapturedStmt(llvm::omp::OMPD_parallel);
+    const Stmt *bodyStmt = cs->getCapturedStmt();
+    res = emitStmt(bodyStmt, /*useCurrentScope=*/true);
     mlir::omp::TerminatorOp::create(builder, end);
   }
   return res;
diff --git a/clang/lib/CIR/CodeGen/CIRGenerator.cpp 
b/clang/lib/CIR/CodeGen/CIRGenerator.cpp
index 8c5d81bd61505..afa1599af8e40 100644
--- a/clang/lib/CIR/CodeGen/CIRGenerator.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenerator.cpp
@@ -149,9 +149,7 @@ void CIRGenerator::HandleTagDeclDefinition(TagDecl *d) {
   // inline initializers as definitions.
   if (astContext->getTargetInfo().getCXXABI().isMicrosoft())
     cgm->errorNYI(d->getSourceRange(), "HandleTagDeclDefinition: MSABI");
-  // For OpenMP emit declare reduction functions, if required.
-  if (astContext->getLangOpts().OpenMP)
-    cgm->errorNYI(d->getSourceRange(), "HandleTagDeclDefinition: OpenMP");
+  // TODO(OMP): For OpenMP emit declare reduction functions, if required.
 }
 
 void CIRGenerator::HandleTagDeclRequiredDefinition(const TagDecl *D) {
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt 
b/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt
index 2525e02ae8f85..c7467fe40ba30 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt
@@ -21,6 +21,7 @@ add_clang_library(clangCIRLoweringDirectToLLVM
   MLIRCIRTargetLowering
   MLIRBuiltinToLLVMIRTranslation
   MLIRLLVMToLLVMIRTranslation
+  MLIROpenMPToLLVMIRTranslation
   MLIRIR
   )
 
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp 
b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 4877508b1c3da..6e3fc705ea350 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -29,6 +29,7 @@
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Export.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "clang/CIR/Dialect/IR/CIRAttrs.h"
@@ -4431,6 +4432,7 @@ lowerDirectlyFromCIRToLLVMIR(mlir::ModuleOp mlirModule, 
LLVMContext &llvmCtx) {
 
   mlir::registerBuiltinDialectTranslation(*mlirCtx);
   mlir::registerLLVMDialectTranslation(*mlirCtx);
+  mlir::registerOpenMPDialectTranslation(*mlirCtx);
   mlir::registerCIRDialectTranslation(*mlirCtx);
 
   llvm::TimeTraceScope translateScope("translateModuleToLLVMIR");
diff --git a/clang/test/CIR/CodeGenOpenMP/not-yet-implemented.c 
b/clang/test/CIR/CodeGenOpenMP/not-yet-implemented.c
index 78a0bc8b2d416..6d59f45d6e5e4 100644
--- a/clang/test/CIR/CodeGenOpenMP/not-yet-implemented.c
+++ b/clang/test/CIR/CodeGenOpenMP/not-yet-implemented.c
@@ -12,8 +12,7 @@ void do_things() {
   int i;
   // TODO(OMP): We might consider overloading operator<< for OMPClauseKind in
   // the future if we want to improve this.
-  // expected-error@+2{{ClangIR code gen Not Yet Implemented: OpenMPClause : 
if}}
-  // expected-error@+2{{ClangIR code gen Not Yet Implemented: emitStmt: 
CapturedStmt}}
+  // expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenMPClause : 
if}}
 #pragma omp parallel if(i)
   {}
 }
diff --git a/clang/test/CIR/CodeGenOpenMP/parallel.c 
b/clang/test/CIR/CodeGenOpenMP/parallel.c
index a2bfc8f4ce82e..3b43fff62c5a4 100644
--- a/clang/test/CIR/CodeGenOpenMP/parallel.c
+++ b/clang/test/CIR/CodeGenOpenMP/parallel.c
@@ -23,6 +23,8 @@ void emit_simple_parallel() {
     during(i);
   }
   // CHECK-NEXT: omp.parallel {
+  // CHECK-NEXT: {{.*}} = cir.load align(4) %{{.*}} : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: cir.call @during(%{{.*}}) : (!s32i) -> ()
   // CHECK-NEXT: omp.terminator
   // CHECK-NEXT: }
 
@@ -41,10 +43,18 @@ void parallel_with_operations() {
   // lines will need updating.
 #pragma omp parallel shared(a) firstprivate(b)
   {
-   ++a;
-   ++b;
+   a = a + 1;
+   b = b + 1;
   }
   // CHECK-NEXT: omp.parallel {
+  // CHECK-NEXT: cir.load align(4) %{{.*}}
+  // CHECK-NEXT: cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: cir.binop(add, %{{.*}}, %{{.*}}) nsw : !s32i
+  // CHECK-NEXT: cir.store align(4) %{{.*}}, %{{.*}} : !s32i, !cir.ptr<!s32i>
+  // CHECK-NEXT: cir.load align(4) %{{.*}}
+  // CHECK-NEXT: cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: cir.binop(add, %{{.*}}, %{{.*}}) nsw : !s32i
+  // CHECK-NEXT: cir.store align(4) %{{.*}}, %{{.*}} : !s32i, !cir.ptr<!s32i>
   // CHECK-NEXT: omp.terminator
   // CHECK-NEXT: }
 }
diff --git a/clang/test/CIR/Lowering/omp.cir b/clang/test/CIR/Lowering/omp.cir
new file mode 100644
index 0000000000000..78ff4cf6444c4
--- /dev/null
+++ b/clang/test/CIR/Lowering/omp.cir
@@ -0,0 +1,80 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir --target x86_64-unknown-linux-gnu 
--disable-cc-lowering  | FileCheck %s -check-prefix=LLVM
+!s32i = !cir.int<s, 32>
+
+// MLIR-LABEL: llvm.func @main() -> i32
+// MLIR-SAME: attributes {dso_local, no_inline, no_proto}
+// MLIR: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64
+// MLIR: %[[ALLOCA1:.*]] = llvm.alloca %[[C1]] x i32 {alignment = 4 : i64}
+// MLIR: %[[ALLOCA2:.*]] = llvm.alloca %{{.*}} x i32 {alignment = 4 : i64}
+// MLIR: %[[ALLOCA3:.*]] = llvm.alloca %{{.*}} x i32 {alignment = 4 : i64}
+// MLIR: omp.parallel {
+// MLIR: llvm.br ^bb{{[0-9]+}}
+// MLIR: ^bb{{[0-9]+}}:
+// MLIR: %[[ZERO1:.*]] = llvm.mlir.constant(0 : i32) : i32
+// MLIR: llvm.store %[[ZERO1]], %{{.*}} {alignment = 4 : i64}
+// MLIR: llvm.br ^bb{{[0-9]+}}
+// MLIR: ^bb{{[0-9]+}}:
+// MLIR: %[[LOAD1:.*]] = llvm.load %{{.*}} {alignment = 4 : i64}
+// MLIR: %[[C10000:.*]] = llvm.mlir.constant(10000 : i32) : i32
+// MLIR: %[[CMP:.*]] = llvm.icmp "slt" %[[LOAD1]], %[[C10000]] : i32
+// MLIR: llvm.cond_br %[[CMP]], ^bb{{[0-9]+}}, ^bb{{[0-9]+}}
+// MLIR: ^bb{{[0-9]+}}:
+// MLIR: %[[ZERO2:.*]] = llvm.mlir.constant(0 : i32) : i32
+// MLIR: llvm.store %[[ZERO2]], %{{.*}} {alignment = 4 : i64}
+// MLIR: llvm.br ^bb{{[0-9]+}}
+// MLIR: ^bb{{[0-9]+}}:
+// MLIR: %[[LOAD2:.*]] = llvm.load %{{.*}} {alignment = 4 : i64}
+// MLIR: %[[C1_I32:.*]] = llvm.mlir.constant(1 : i32) : i32
+// MLIR: %[[ADD:.*]] = llvm.add %[[LOAD2]], %[[C1_I32]] overflow<nsw> : i32
+// MLIR: llvm.store %[[ADD]], %{{.*}} {alignment = 4 : i64}
+// MLIR: llvm.br ^bb{{[0-9]+}}
+// MLIR: ^bb{{[0-9]+}}:
+// MLIR: llvm.br ^bb{{[0-9]+}}
+// MLIR: ^bb{{[0-9]+}}:
+// MLIR: omp.terminator
+// MLIR: %[[ZERO3:.*]] = llvm.mlir.constant(0 : i32) : i32
+// MLIR: llvm.store %[[ZERO3]], %{{.*}} {alignment = 4 : i64}
+// MLIR: %[[RETVAL:.*]] = llvm.load %{{.*}} {alignment = 4 : i64}
+// MLIR: llvm.return %[[RETVAL]] : i32
+
+// Test only key runtime calls for LLVM IR CodeGen
+// LLVM: call i32 @__kmpc_global_thread_num
+// LLVM: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr 
@main..omp_par, ptr %structArg)
+// LLVM: define internal void @main..omp_par(ptr noalias %tid.addr, ptr 
noalias %zero.addr, ptr %{{.*}})
+
+module {
+  cir.func no_inline no_proto dso_local @main() -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["j"] {alignment = 4 : i64}
+    omp.parallel {
+      cir.scope {
+        %4 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : 
i64}
+        %5 = cir.const #cir.int<0> : !s32i
+        cir.store align(4) %5, %4 : !s32i, !cir.ptr<!s32i>
+        cir.for : cond {
+          %6 = cir.load align(4) %4 : !cir.ptr<!s32i>, !s32i
+          %7 = cir.const #cir.int<10000> : !s32i
+          %8 = cir.cmp(lt, %6, %7) : !s32i, !cir.bool
+          cir.condition(%8)
+        } body {
+          %6 = cir.const #cir.int<0> : !s32i
+          cir.store align(4) %6, %1 : !s32i, !cir.ptr<!s32i>
+          cir.yield
+        } step {
+          %6 = cir.load align(4) %4 : !cir.ptr<!s32i>, !s32i
+          %7 = cir.const #cir.int<1> : !s32i
+          %8 = cir.binop(add, %6, %7) nsw : !s32i
+          cir.store align(4) %8, %4 : !s32i, !cir.ptr<!s32i>
+          cir.yield
+        }
+      }
+      omp.terminator
+    }
+    %2 = cir.const #cir.int<0> : !s32i
+    cir.store %2, %0 : !s32i, !cir.ptr<!s32i>
+    %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    cir.return %3 : !s32i
+  }
+}
+
diff --git a/clang/tools/cir-opt/cir-opt.cpp b/clang/tools/cir-opt/cir-opt.cpp
index ee42015bb38e9..edadfeec09a2a 100644
--- a/clang/tools/cir-opt/cir-opt.cpp
+++ b/clang/tools/cir-opt/cir-opt.cpp
@@ -17,6 +17,7 @@
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/IR/BuiltinDialect.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Pass/PassOptions.h"
@@ -35,7 +36,7 @@ int main(int argc, char **argv) {
   mlir::DialectRegistry registry;
   registry.insert<mlir::BuiltinDialect, cir::CIRDialect,
                   mlir::memref::MemRefDialect, mlir::LLVM::LLVMDialect,
-                  mlir::DLTIDialect>();
+                  mlir::DLTIDialect, mlir::omp::OpenMPDialect>();
 
   ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
     return mlir::createCIRCanonicalizePass();
diff --git a/clang/tools/cir-translate/cir-translate.cpp 
b/clang/tools/cir-translate/cir-translate.cpp
index 29a310a89de09..2b00d1bd62e4a 100644
--- a/clang/tools/cir-translate/cir-translate.cpp
+++ b/clang/tools/cir-translate/cir-translate.cpp
@@ -13,6 +13,7 @@
 #include "mlir/Dialect/DLTI/DLTI.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/MLIRContext.h"
 #include "mlir/InitAllTranslations.h"
@@ -105,7 +106,8 @@ llvm::LogicalResult 
prepareCIRModuleDataLayout(mlir::ModuleOp mod,
   std::string layoutString = targetInfo->getDataLayoutString();
 
   // Registered dialects may not be loaded yet, ensure they are.
-  context->loadDialect<mlir::DLTIDialect, mlir::LLVM::LLVMDialect>();
+  context->loadDialect<mlir::DLTIDialect, mlir::LLVM::LLVMDialect,
+                       mlir::omp::OpenMPDialect>();
 
   mlir::DataLayoutSpecInterface dlSpec =
       mlir::translateDataLayout(llvm::DataLayout(layoutString), context);

>From 8cd4678270c21dea43d11541e1c2ac7ef38e4c15 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <[email protected]>
Date: Fri, 30 Jan 2026 10:49:19 -0500
Subject: [PATCH 2/5] Refine the OpenMP checks so that we error out if the
 specific cases are encountered.

---
 clang/lib/CIR/CodeGen/CIRGenerator.cpp | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenerator.cpp 
b/clang/lib/CIR/CodeGen/CIRGenerator.cpp
index afa1599af8e40..6453f3565c33d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenerator.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenerator.cpp
@@ -149,7 +149,22 @@ void CIRGenerator::HandleTagDeclDefinition(TagDecl *d) {
   // inline initializers as definitions.
   if (astContext->getTargetInfo().getCXXABI().isMicrosoft())
     cgm->errorNYI(d->getSourceRange(), "HandleTagDeclDefinition: MSABI");
-  // TODO(OMP): For OpenMP emit declare reduction functions, if required.
+
+  // For OpenMP emit declare reduction functions or declare mapper, if
+  // required.
+  if (astContext->getLangOpts().OpenMP) {
+    for (Decl *member : d->decls()) {
+      if (auto *drd = dyn_cast<OMPDeclareReductionDecl>(member)) {
+        if (astContext->DeclMustBeEmitted(drd))
+          cgm->errorNYI(d->getSourceRange(),
+                        "HandleTagDeclDefinition: OMPDeclareReductionDecl");
+      } else if (auto *dmd = dyn_cast<OMPDeclareMapperDecl>(member)) {
+        if (astContext->DeclMustBeEmitted(dmd))
+          cgm->errorNYI(d->getSourceRange(),
+                        "HandleTagDeclDefinition: OMPDeclareMapperDecl");
+      }
+    }
+  }
 }
 
 void CIRGenerator::HandleTagDeclRequiredDefinition(const TagDecl *D) {

>From 445a842c0daaf1a45b08d3ac9bb9b98cff8d6be1 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <[email protected]>
Date: Fri, 30 Jan 2026 17:27:49 -0500
Subject: [PATCH 3/5] Add LLVM-IR codegen test for CIR and OGCG.

---
 clang/test/CIR/Lowering/omp.c | 134 ++++++++++++++++++++++++++++++++++
 1 file changed, 134 insertions(+)
 create mode 100644 clang/test/CIR/Lowering/omp.c

diff --git a/clang/test/CIR/Lowering/omp.c b/clang/test/CIR/Lowering/omp.c
new file mode 100644
index 0000000000000..a2fd82e16cdac
--- /dev/null
+++ b/clang/test/CIR/Lowering/omp.c
@@ -0,0 +1,134 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fopenmp 
-emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck %s -check-prefix=CIR --input-file %t-cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp -emit-llvm %s -o 
%t.ll
+// RUN: FileCheck %s -check-prefix=OGCG --input-file %t.ll
+
+// CIR-LABEL: define dso_local i32 @main()
+// CIR: %[[STRUCTARG:.*]] = alloca { ptr, ptr }, align 8
+// CIR: %[[VAR1:.*]] = alloca i32, i64 1, align 4
+// CIR: %[[VAR2:.*]] = alloca i32, i64 1, align 4
+// CIR: %[[VAR3:.*]] = alloca i32, i64 1, align 4
+// CIR: br label %[[ENTRY:.*]]
+
+// CIR: [[ENTRY]]:
+// CIR: %[[THREAD_NUM:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CIR: br label %[[OMP_PARALLEL:.*]]
+
+// CIR: [[OMP_PARALLEL]]:
+// CIR: %[[GEP1:.*]] = getelementptr { ptr, ptr }, ptr %[[STRUCTARG]], i32 0, 
i32 0
+// CIR: store ptr %[[VAR1]], ptr %[[GEP1]], align 8
+// CIR: %[[GEP2:.*]] = getelementptr { ptr, ptr }, ptr %[[STRUCTARG]], i32 0, 
i32 1
+// CIR: store ptr %[[VAR3]], ptr %[[GEP2]], align 8
+// CIR: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr 
@main..omp_par, ptr %[[STRUCTARG]])
+// CIR: br label %[[OMP_PAR_EXIT:.*]]
+
+// CIR: [[OMP_PAR_EXIT]]:
+// CIR: store i32 0, ptr %[[VAR2]], align 4
+// CIR: %[[LOAD:.*]] = load i32, ptr %[[VAR2]], align 4
+// CIR: ret i32 %[[LOAD]]
+
+// CIR-LABEL: define internal void @main..omp_par(ptr noalias %{{.*}}, ptr 
noalias %{{.*}}, ptr %{{.*}})
+// CIR: [[PAR_ENTRY:.*]]:
+// CIR: %[[GEP_A:.*]] = getelementptr { ptr, ptr }, ptr %{{.*}}, i32 0, i32 0
+// CIR: %[[LOAD_A:.*]] = load ptr, ptr %[[GEP_A]], align 8
+// CIR: %[[GEP_B:.*]] = getelementptr { ptr, ptr }, ptr %{{.*}}, i32 0, i32 1
+// CIR: %[[LOAD_B:.*]] = load ptr, ptr %[[GEP_B]], align 8
+// CIR: %[[TID_LOCAL:.*]] = alloca i32, align 4
+// CIR: %[[TID_VAL:.*]] = load i32, ptr %{{.*}}, align 4
+// CIR: store i32 %[[TID_VAL]], ptr %[[TID_LOCAL]], align 4
+// CIR: %{{.*}} = load i32, ptr %[[TID_LOCAL]], align 4
+// CIR: br label %[[AFTER_ALLOCA:.*]]
+
+// CIR: [[AFTER_ALLOCA]]:
+// CIR: br label %[[PAR_REGION:.*]]
+
+// CIR: [[PAR_REGION]]:
+// CIR: br label %[[PAR_REGION1:.*]]
+
+// CIR: [[PAR_REGION1]]:
+// CIR: br label %[[PAR_REGION2:.*]]
+
+// CIR: [[PAR_REGION2]]:
+// CIR: store i32 0, ptr %[[LOAD_A]], align 4
+// CIR: br label %[[PAR_REGION3:.*]]
+
+// CIR: [[PAR_REGION3]]:
+// CIR: %[[I_LOAD:.*]] = load i32, ptr %[[LOAD_A]], align 4
+// CIR: %[[CMP:.*]] = icmp slt i32 %[[I_LOAD]], 10000
+// CIR: br i1 %[[CMP]], label %[[PAR_REGION4:.*]], label %[[PAR_REGION6:.*]]
+
+// CIR: [[PAR_REGION6]]:
+// CIR: br label %[[PAR_REGION7:.*]]
+
+// CIR: [[PAR_REGION7]]:
+// CIR: br label %[[REGION_CONT:.*]]
+
+// CIR: [[REGION_CONT]]:
+// CIR: br label %[[PRE_FINALIZE:.*]]
+
+// CIR: [[PRE_FINALIZE]]:
+// CIR: br label %[[FINI:.*]]
+
+// CIR: [[FINI]]:
+// CIR: br label %[[EXIT_STUB:.*]]
+
+// CIR: [[PAR_REGION4]]:
+// CIR: store i32 0, ptr %[[LOAD_B]], align 4
+// CIR: br label %[[PAR_REGION5:.*]]
+
+// CIR: [[PAR_REGION5]]:
+// CIR: %[[I_LOAD2:.*]] = load i32, ptr %[[LOAD_A]], align 4
+// CIR: %[[ADD:.*]] = add nsw i32 %[[I_LOAD2]], 1
+// CIR: store i32 %[[ADD]], ptr %[[LOAD_A]], align 4
+// CIR: br label %[[PAR_REGION3]]
+
+// CIR: [[EXIT_STUB]]:
+// CIR: ret void
+
+// OGCG-LABEL: define dso_local i32 @main()
+// OGCG: [[ENTRY:.*]]:
+// OGCG: %[[RETVAL:.*]] = alloca i32, align 4
+// OGCG: %[[J:.*]] = alloca i32, align 4
+// OGCG: store i32 0, ptr %[[RETVAL]], align 4
+// OGCG: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr 
@main.omp_outlined, ptr %[[J]])
+// OGCG: ret i32 0
+
+// OGCG-LABEL: define internal void @main.omp_outlined(ptr noalias noundef 
%{{.*}}, ptr noalias noundef %{{.*}}, ptr noundef nonnull align 4 
dereferenceable(4) %{{.*}})
+// OGCG: [[OUTLINED_ENTRY:.*]]:
+// OGCG: %[[GLOBAL_TID_ADDR:.*]] = alloca ptr, align 8
+// OGCG: %[[BOUND_TID_ADDR:.*]] = alloca ptr, align 8
+// OGCG: %[[J_ADDR:.*]] = alloca ptr, align 8
+// OGCG: %[[I:.*]] = alloca i32, align 4
+// OGCG: store ptr %{{.*}}, ptr %[[GLOBAL_TID_ADDR]], align 8
+// OGCG: store ptr %{{.*}}, ptr %[[BOUND_TID_ADDR]], align 8
+// OGCG: store ptr %{{.*}}, ptr %[[J_ADDR]], align 8
+// OGCG: %[[J_LOAD:.*]] = load ptr, ptr %[[J_ADDR]], align 8
+// OGCG: store i32 0, ptr %[[I]], align 4
+// OGCG: br label %[[FOR_COND:.*]]
+
+// OGCG: [[FOR_COND]]:
+// OGCG: %[[I_LOAD:.*]] = load i32, ptr %[[I]], align 4
+// OGCG: %[[CMP:.*]] = icmp slt i32 %[[I_LOAD]], 10000
+// OGCG: br i1 %[[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+
+// OGCG: [[FOR_BODY]]:
+// OGCG: store i32 0, ptr %[[J_LOAD]], align 4
+// OGCG: br label %[[FOR_INC:.*]]
+
+// OGCG: [[FOR_INC]]:
+// OGCG: %[[I_LOAD2:.*]] = load i32, ptr %[[I]], align 4
+// OGCG: %[[ADD:.*]] = add nsw i32 %[[I_LOAD2]], 1
+// OGCG: store i32 %[[ADD]], ptr %[[I]], align 4
+// OGCG: br label %[[FOR_COND]]
+
+// OGCG: [[FOR_END]]:
+// OGCG: ret void
+
+int main() {
+  int j;
+#pragma omp parallel
+  for (int i = 0; i < 10000; i=i+1)
+    j = 0;
+
+  return 0;
+}

>From 139f63e87cd83ed9b32c2b586e41418baa30b1b3 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <[email protected]>
Date: Tue, 3 Feb 2026 11:20:44 -0500
Subject: [PATCH 4/5] Merge tests into a single file

---
 clang/test/CIR/Lowering/omp.c   | 39 ++++++++++++++++
 clang/test/CIR/Lowering/omp.cir | 80 ---------------------------------
 2 files changed, 39 insertions(+), 80 deletions(-)
 delete mode 100644 clang/test/CIR/Lowering/omp.cir

diff --git a/clang/test/CIR/Lowering/omp.c b/clang/test/CIR/Lowering/omp.c
index a2fd82e16cdac..ea6b33be9880a 100644
--- a/clang/test/CIR/Lowering/omp.c
+++ b/clang/test/CIR/Lowering/omp.c
@@ -1,8 +1,47 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fopenmp 
-emit-cir %s -o %t.cir
+// RUN: cir-opt %t.cir -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %t.cir -cir-to-llvmir --target x86_64-unknown-linux-gnu 
--disable-cc-lowering  | FileCheck %s -check-prefix=CIR
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fopenmp 
-emit-llvm %s -o %t-cir.ll
 // RUN: FileCheck %s -check-prefix=CIR --input-file %t-cir.ll
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp -emit-llvm %s -o 
%t.ll
 // RUN: FileCheck %s -check-prefix=OGCG --input-file %t.ll
 
+// MLIR-LABEL: llvm.func @main() -> i32
+// MLIR-SAME: attributes {dso_local, no_inline, no_proto}
+// MLIR: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64
+// MLIR: %[[ALLOCA1:.*]] = llvm.alloca %[[C1]] x i32 {alignment = 4 : i64}
+// MLIR: %[[ALLOCA2:.*]] = llvm.alloca %{{.*}} x i32 {alignment = 4 : i64}
+// MLIR: %[[ALLOCA3:.*]] = llvm.alloca %{{.*}} x i32 {alignment = 4 : i64}
+// MLIR: omp.parallel {
+// MLIR: llvm.br ^bb{{[0-9]+}}
+// MLIR: ^bb{{[0-9]+}}:
+// MLIR: %[[ZERO1:.*]] = llvm.mlir.constant(0 : i32) : i32
+// MLIR: llvm.store %[[ZERO1]], %{{.*}} {alignment = 4 : i64}
+// MLIR: llvm.br ^bb{{[0-9]+}}
+// MLIR: ^bb{{[0-9]+}}:
+// MLIR: %[[LOAD1:.*]] = llvm.load %{{.*}} {alignment = 4 : i64}
+// MLIR: %[[C10000:.*]] = llvm.mlir.constant(10000 : i32) : i32
+// MLIR: %[[CMP:.*]] = llvm.icmp "slt" %[[LOAD1]], %[[C10000]] : i32
+// MLIR: llvm.cond_br %[[CMP]], ^bb{{[0-9]+}}, ^bb{{[0-9]+}}
+// MLIR: ^bb{{[0-9]+}}:
+// MLIR: %[[ZERO2:.*]] = llvm.mlir.constant(0 : i32) : i32
+// MLIR: llvm.store %[[ZERO2]], %{{.*}} {alignment = 4 : i64}
+// MLIR: llvm.br ^bb{{[0-9]+}}
+// MLIR: ^bb{{[0-9]+}}:
+// MLIR: %[[LOAD2:.*]] = llvm.load %{{.*}} {alignment = 4 : i64}
+// MLIR: %[[C1_I32:.*]] = llvm.mlir.constant(1 : i32) : i32
+// MLIR: %[[ADD:.*]] = llvm.add %[[LOAD2]], %[[C1_I32]] overflow<nsw> : i32
+// MLIR: llvm.store %[[ADD]], %{{.*}} {alignment = 4 : i64}
+// MLIR: llvm.br ^bb{{[0-9]+}}
+// MLIR: ^bb{{[0-9]+}}:
+// MLIR: llvm.br ^bb{{[0-9]+}}
+// MLIR: ^bb{{[0-9]+}}:
+// MLIR: omp.terminator
+// MLIR: %[[ZERO3:.*]] = llvm.mlir.constant(0 : i32) : i32
+// MLIR: llvm.store %[[ZERO3]], %{{.*}} {alignment = 4 : i64}
+// MLIR: %[[RETVAL:.*]] = llvm.load %{{.*}} {alignment = 4 : i64}
+// MLIR: llvm.return %[[RETVAL]] : i32
+
 // CIR-LABEL: define dso_local i32 @main()
 // CIR: %[[STRUCTARG:.*]] = alloca { ptr, ptr }, align 8
 // CIR: %[[VAR1:.*]] = alloca i32, i64 1, align 4
diff --git a/clang/test/CIR/Lowering/omp.cir b/clang/test/CIR/Lowering/omp.cir
deleted file mode 100644
index 78ff4cf6444c4..0000000000000
--- a/clang/test/CIR/Lowering/omp.cir
+++ /dev/null
@@ -1,80 +0,0 @@
-// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
-// RUN: cir-translate %s -cir-to-llvmir --target x86_64-unknown-linux-gnu 
--disable-cc-lowering  | FileCheck %s -check-prefix=LLVM
-!s32i = !cir.int<s, 32>
-
-// MLIR-LABEL: llvm.func @main() -> i32
-// MLIR-SAME: attributes {dso_local, no_inline, no_proto}
-// MLIR: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64
-// MLIR: %[[ALLOCA1:.*]] = llvm.alloca %[[C1]] x i32 {alignment = 4 : i64}
-// MLIR: %[[ALLOCA2:.*]] = llvm.alloca %{{.*}} x i32 {alignment = 4 : i64}
-// MLIR: %[[ALLOCA3:.*]] = llvm.alloca %{{.*}} x i32 {alignment = 4 : i64}
-// MLIR: omp.parallel {
-// MLIR: llvm.br ^bb{{[0-9]+}}
-// MLIR: ^bb{{[0-9]+}}:
-// MLIR: %[[ZERO1:.*]] = llvm.mlir.constant(0 : i32) : i32
-// MLIR: llvm.store %[[ZERO1]], %{{.*}} {alignment = 4 : i64}
-// MLIR: llvm.br ^bb{{[0-9]+}}
-// MLIR: ^bb{{[0-9]+}}:
-// MLIR: %[[LOAD1:.*]] = llvm.load %{{.*}} {alignment = 4 : i64}
-// MLIR: %[[C10000:.*]] = llvm.mlir.constant(10000 : i32) : i32
-// MLIR: %[[CMP:.*]] = llvm.icmp "slt" %[[LOAD1]], %[[C10000]] : i32
-// MLIR: llvm.cond_br %[[CMP]], ^bb{{[0-9]+}}, ^bb{{[0-9]+}}
-// MLIR: ^bb{{[0-9]+}}:
-// MLIR: %[[ZERO2:.*]] = llvm.mlir.constant(0 : i32) : i32
-// MLIR: llvm.store %[[ZERO2]], %{{.*}} {alignment = 4 : i64}
-// MLIR: llvm.br ^bb{{[0-9]+}}
-// MLIR: ^bb{{[0-9]+}}:
-// MLIR: %[[LOAD2:.*]] = llvm.load %{{.*}} {alignment = 4 : i64}
-// MLIR: %[[C1_I32:.*]] = llvm.mlir.constant(1 : i32) : i32
-// MLIR: %[[ADD:.*]] = llvm.add %[[LOAD2]], %[[C1_I32]] overflow<nsw> : i32
-// MLIR: llvm.store %[[ADD]], %{{.*}} {alignment = 4 : i64}
-// MLIR: llvm.br ^bb{{[0-9]+}}
-// MLIR: ^bb{{[0-9]+}}:
-// MLIR: llvm.br ^bb{{[0-9]+}}
-// MLIR: ^bb{{[0-9]+}}:
-// MLIR: omp.terminator
-// MLIR: %[[ZERO3:.*]] = llvm.mlir.constant(0 : i32) : i32
-// MLIR: llvm.store %[[ZERO3]], %{{.*}} {alignment = 4 : i64}
-// MLIR: %[[RETVAL:.*]] = llvm.load %{{.*}} {alignment = 4 : i64}
-// MLIR: llvm.return %[[RETVAL]] : i32
-
-// Test only key runtime calls for LLVM IR CodeGen
-// LLVM: call i32 @__kmpc_global_thread_num
-// LLVM: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr 
@main..omp_par, ptr %structArg)
-// LLVM: define internal void @main..omp_par(ptr noalias %tid.addr, ptr 
noalias %zero.addr, ptr %{{.*}})
-
-module {
-  cir.func no_inline no_proto dso_local @main() -> !s32i {
-    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
-    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["j"] {alignment = 4 : i64}
-    omp.parallel {
-      cir.scope {
-        %4 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : 
i64}
-        %5 = cir.const #cir.int<0> : !s32i
-        cir.store align(4) %5, %4 : !s32i, !cir.ptr<!s32i>
-        cir.for : cond {
-          %6 = cir.load align(4) %4 : !cir.ptr<!s32i>, !s32i
-          %7 = cir.const #cir.int<10000> : !s32i
-          %8 = cir.cmp(lt, %6, %7) : !s32i, !cir.bool
-          cir.condition(%8)
-        } body {
-          %6 = cir.const #cir.int<0> : !s32i
-          cir.store align(4) %6, %1 : !s32i, !cir.ptr<!s32i>
-          cir.yield
-        } step {
-          %6 = cir.load align(4) %4 : !cir.ptr<!s32i>, !s32i
-          %7 = cir.const #cir.int<1> : !s32i
-          %8 = cir.binop(add, %6, %7) nsw : !s32i
-          cir.store align(4) %8, %4 : !s32i, !cir.ptr<!s32i>
-          cir.yield
-        }
-      }
-      omp.terminator
-    }
-    %2 = cir.const #cir.int<0> : !s32i
-    cir.store %2, %0 : !s32i, !cir.ptr<!s32i>
-    %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
-    cir.return %3 : !s32i
-  }
-}
-

>From 35116ba4d8cdd089d892c56baf7f6e7337c022a4 Mon Sep 17 00:00:00 2001
From: Jan Leyonberg <[email protected]>
Date: Wed, 4 Feb 2026 10:26:55 -0500
Subject: [PATCH 5/5] Move test and make check labels consistent with other
 tests. Remove checking of LLVM dialect.

---
 clang/test/CIR/CodeGenOpenMP/omp-llvmir.c | 169 +++++++++++++++++++++
 clang/test/CIR/CodeGenOpenMP/parallel.c   |   2 -
 clang/test/CIR/Lowering/omp.c             | 173 ----------------------
 3 files changed, 169 insertions(+), 175 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenOpenMP/omp-llvmir.c
 delete mode 100644 clang/test/CIR/Lowering/omp.c

diff --git a/clang/test/CIR/CodeGenOpenMP/omp-llvmir.c 
b/clang/test/CIR/CodeGenOpenMP/omp-llvmir.c
new file mode 100644
index 0000000000000..d32753ae4475b
--- /dev/null
+++ b/clang/test/CIR/CodeGenOpenMP/omp-llvmir.c
@@ -0,0 +1,169 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp -fclangir 
-emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp -fclangir 
-emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp -emit-llvm %s -o 
%t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+// CIR-LABEL: cir.func no_inline no_proto dso_local @main() -> !s32i {
+// CIR: [[RETVAL:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] 
{alignment = 4 : i64}
+// CIR: [[J:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["j"] {alignment = 4 : 
i64}
+// CIR:   omp.parallel {
+// CIR:     cir.scope {
+// CIR:       [[I:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] 
{alignment = 4 : i64}
+// CIR:       [[ZERO1:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR:       cir.store align(4) [[ZERO1]], [[I]] : !s32i, !cir.ptr<!s32i>
+// CIR:       cir.for : cond {
+// CIR:         [[LOAD1:%.*]] = cir.load align(4) [[I]] : !cir.ptr<!s32i>, 
!s32i
+// CIR:         [[LIMIT:%.*]] = cir.const #cir.int<10000> : !s32i
+// CIR:         [[CMP:%.*]] = cir.cmp(lt, [[LOAD1]], [[LIMIT]]) : !s32i, 
!cir.bool
+// CIR:         cir.condition([[CMP]])
+// CIR:       } body {
+// CIR:         [[ZERO2:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR:         cir.store align(4) [[ZERO2]], [[J]] : !s32i, !cir.ptr<!s32i>
+// CIR:         cir.yield
+// CIR:       } step {
+// CIR:         [[LOAD2:%.*]] = cir.load align(4) [[I]] : !cir.ptr<!s32i>, 
!s32i
+// CIR:         [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR:         [[ADD:%.*]] = cir.binop(add, [[LOAD2]], [[ONE]]) nsw : !s32i
+// CIR:         cir.store align(4) [[ADD]], [[I]] : !s32i, !cir.ptr<!s32i>
+// CIR:         cir.yield
+// CIR:       }
+// CIR:     }
+// CIR:     omp.terminator
+// CIR:   }
+// CIR:   [[ZERO3:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR:   cir.store [[ZERO3]], [[RETVAL]] : !s32i, !cir.ptr<!s32i>
+// CIR:   [[RET:%.*]] = cir.load [[RETVAL]] : !cir.ptr<!s32i>, !s32i
+// CIR:   cir.return [[RET]] : !s32i
+// CIR: }
+
+// LLVM-LABEL: define dso_local i32 @main()
+// LLVM: %[[STRUCTARG:.*]] = alloca { ptr, ptr }, align 8
+// LLVM: %[[VAR1:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[VAR2:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[VAR3:.*]] = alloca i32, i64 1, align 4
+// LLVM: br label %[[ENTRY:.*]]
+
+// LLVM: [[ENTRY]]:
+// LLVM: %[[THREAD_NUM:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// LLVM: br label %[[OMP_PARALLEL:.*]]
+
+// LLVM: [[OMP_PARALLEL]]:
+// LLVM: %[[GEP1:.*]] = getelementptr { ptr, ptr }, ptr %[[STRUCTARG]], i32 0, 
i32 0
+// LLVM: store ptr %[[VAR1]], ptr %[[GEP1]], align 8
+// LLVM: %[[GEP2:.*]] = getelementptr { ptr, ptr }, ptr %[[STRUCTARG]], i32 0, 
i32 1
+// LLVM: store ptr %[[VAR3]], ptr %[[GEP2]], align 8
+// LLVM: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr 
@main..omp_par, ptr %[[STRUCTARG]])
+// LLVM: br label %[[OMP_PAR_EXIT:.*]]
+
+// LLVM: [[OMP_PAR_EXIT]]:
+// LLVM: store i32 0, ptr %[[VAR2]], align 4
+// LLVM: %[[LOAD:.*]] = load i32, ptr %[[VAR2]], align 4
+// LLVM: ret i32 %[[LOAD]]
+
+// LLVM-LABEL: define internal void @main..omp_par(ptr noalias %{{.*}}, ptr 
noalias %{{.*}}, ptr %{{.*}})
+// LLVM: [[PAR_ENTRY:.*]]:
+// LLVM: %[[GEP_A:.*]] = getelementptr { ptr, ptr }, ptr %{{.*}}, i32 0, i32 0
+// LLVM: %[[LOAD_A:.*]] = load ptr, ptr %[[GEP_A]], align 8
+// LLVM: %[[GEP_B:.*]] = getelementptr { ptr, ptr }, ptr %{{.*}}, i32 0, i32 1
+// LLVM: %[[LOAD_B:.*]] = load ptr, ptr %[[GEP_B]], align 8
+// LLVM: %[[TID_LOCAL:.*]] = alloca i32, align 4
+// LLVM: %[[TID_VAL:.*]] = load i32, ptr %{{.*}}, align 4
+// LLVM: store i32 %[[TID_VAL]], ptr %[[TID_LOCAL]], align 4
+// LLVM: %{{.*}} = load i32, ptr %[[TID_LOCAL]], align 4
+// LLVM: br label %[[AFTER_ALLOCA:.*]]
+
+// LLVM: [[AFTER_ALLOCA]]:
+// LLVM: br label %[[PAR_REGION:.*]]
+
+// LLVM: [[PAR_REGION]]:
+// LLVM: br label %[[PAR_REGION1:.*]]
+
+// LLVM: [[PAR_REGION1]]:
+// LLVM: br label %[[PAR_REGION2:.*]]
+
+// LLVM: [[PAR_REGION2]]:
+// LLVM: store i32 0, ptr %[[LOAD_A]], align 4
+// LLVM: br label %[[PAR_REGION3:.*]]
+
+// LLVM: [[PAR_REGION3]]:
+// LLVM: %[[I_LOAD:.*]] = load i32, ptr %[[LOAD_A]], align 4
+// LLVM: %[[CMP:.*]] = icmp slt i32 %[[I_LOAD]], 10000
+// LLVM: br i1 %[[CMP]], label %[[PAR_REGION4:.*]], label %[[PAR_REGION6:.*]]
+
+// LLVM: [[PAR_REGION6]]:
+// LLVM: br label %[[PAR_REGION7:.*]]
+
+// LLVM: [[PAR_REGION7]]:
+// LLVM: br label %[[REGION_CONT:.*]]
+
+// LLVM: [[REGION_CONT]]:
+// LLVM: br label %[[PRE_FINALIZE:.*]]
+
+// LLVM: [[PRE_FINALIZE]]:
+// LLVM: br label %[[FINI:.*]]
+
+// LLVM: [[FINI]]:
+// LLVM: br label %[[EXIT_STUB:.*]]
+
+// LLVM: [[PAR_REGION4]]:
+// LLVM: store i32 0, ptr %[[LOAD_B]], align 4
+// LLVM: br label %[[PAR_REGION5:.*]]
+
+// LLVM: [[PAR_REGION5]]:
+// LLVM: %[[I_LOAD2:.*]] = load i32, ptr %[[LOAD_A]], align 4
+// LLVM: %[[ADD:.*]] = add nsw i32 %[[I_LOAD2]], 1
+// LLVM: store i32 %[[ADD]], ptr %[[LOAD_A]], align 4
+// LLVM: br label %[[PAR_REGION3]]
+
+// LLVM: [[EXIT_STUB]]:
+// LLVM: ret void
+
+// OGCG-LABEL: define dso_local i32 @main()
+// OGCG: [[ENTRY:.*]]:
+// OGCG: %[[RETVAL:.*]] = alloca i32, align 4
+// OGCG: %[[J:.*]] = alloca i32, align 4
+// OGCG: store i32 0, ptr %[[RETVAL]], align 4
+// OGCG: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr 
@main.omp_outlined, ptr %[[J]])
+// OGCG: ret i32 0
+
+// OGCG-LABEL: define internal void @main.omp_outlined(ptr noalias noundef 
%{{.*}}, ptr noalias noundef %{{.*}}, ptr noundef nonnull align 4 
dereferenceable(4) %{{.*}})
+// OGCG: [[OUTLINED_ENTRY:.*]]:
+// OGCG: %[[GLOBAL_TID_ADDR:.*]] = alloca ptr, align 8
+// OGCG: %[[BOUND_TID_ADDR:.*]] = alloca ptr, align 8
+// OGCG: %[[J_ADDR:.*]] = alloca ptr, align 8
+// OGCG: %[[I:.*]] = alloca i32, align 4
+// OGCG: store ptr %{{.*}}, ptr %[[GLOBAL_TID_ADDR]], align 8
+// OGCG: store ptr %{{.*}}, ptr %[[BOUND_TID_ADDR]], align 8
+// OGCG: store ptr %{{.*}}, ptr %[[J_ADDR]], align 8
+// OGCG: %[[J_LOAD:.*]] = load ptr, ptr %[[J_ADDR]], align 8
+// OGCG: store i32 0, ptr %[[I]], align 4
+// OGCG: br label %[[FOR_COND:.*]]
+
+// OGCG: [[FOR_COND]]:
+// OGCG: %[[I_LOAD:.*]] = load i32, ptr %[[I]], align 4
+// OGCG: %[[CMP:.*]] = icmp slt i32 %[[I_LOAD]], 10000
+// OGCG: br i1 %[[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+
+// OGCG: [[FOR_BODY]]:
+// OGCG: store i32 0, ptr %[[J_LOAD]], align 4
+// OGCG: br label %[[FOR_INC:.*]]
+
+// OGCG: [[FOR_INC]]:
+// OGCG: %[[I_LOAD2:.*]] = load i32, ptr %[[I]], align 4
+// OGCG: %[[ADD:.*]] = add nsw i32 %[[I_LOAD2]], 1
+// OGCG: store i32 %[[ADD]], ptr %[[I]], align 4
+// OGCG: br label %[[FOR_COND]]
+
+// OGCG: [[FOR_END]]:
+// OGCG: ret void
+
+int main() {
+  int j;
+#pragma omp parallel
+  for (int i = 0; i < 10000; i=i+1)
+    j = 0;
+
+  return 0;
+}
diff --git a/clang/test/CIR/CodeGenOpenMP/parallel.c 
b/clang/test/CIR/CodeGenOpenMP/parallel.c
index 3b43fff62c5a4..fcf3683cfb6c4 100644
--- a/clang/test/CIR/CodeGenOpenMP/parallel.c
+++ b/clang/test/CIR/CodeGenOpenMP/parallel.c
@@ -18,8 +18,6 @@ void emit_simple_parallel() {
   // CHECK-NEXT: }
 #pragma omp parallel
   {
-    // TODO(OMP): We don't yet emit captured stmt, so the body of this is 
lost,x
-    // thus we don't emit the 'during' call.
     during(i);
   }
   // CHECK-NEXT: omp.parallel {
diff --git a/clang/test/CIR/Lowering/omp.c b/clang/test/CIR/Lowering/omp.c
deleted file mode 100644
index ea6b33be9880a..0000000000000
--- a/clang/test/CIR/Lowering/omp.c
+++ /dev/null
@@ -1,173 +0,0 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fopenmp 
-emit-cir %s -o %t.cir
-// RUN: cir-opt %t.cir -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
-// RUN: cir-translate %t.cir -cir-to-llvmir --target x86_64-unknown-linux-gnu 
--disable-cc-lowering  | FileCheck %s -check-prefix=CIR
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fopenmp 
-emit-llvm %s -o %t-cir.ll
-// RUN: FileCheck %s -check-prefix=CIR --input-file %t-cir.ll
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp -emit-llvm %s -o 
%t.ll
-// RUN: FileCheck %s -check-prefix=OGCG --input-file %t.ll
-
-// MLIR-LABEL: llvm.func @main() -> i32
-// MLIR-SAME: attributes {dso_local, no_inline, no_proto}
-// MLIR: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64
-// MLIR: %[[ALLOCA1:.*]] = llvm.alloca %[[C1]] x i32 {alignment = 4 : i64}
-// MLIR: %[[ALLOCA2:.*]] = llvm.alloca %{{.*}} x i32 {alignment = 4 : i64}
-// MLIR: %[[ALLOCA3:.*]] = llvm.alloca %{{.*}} x i32 {alignment = 4 : i64}
-// MLIR: omp.parallel {
-// MLIR: llvm.br ^bb{{[0-9]+}}
-// MLIR: ^bb{{[0-9]+}}:
-// MLIR: %[[ZERO1:.*]] = llvm.mlir.constant(0 : i32) : i32
-// MLIR: llvm.store %[[ZERO1]], %{{.*}} {alignment = 4 : i64}
-// MLIR: llvm.br ^bb{{[0-9]+}}
-// MLIR: ^bb{{[0-9]+}}:
-// MLIR: %[[LOAD1:.*]] = llvm.load %{{.*}} {alignment = 4 : i64}
-// MLIR: %[[C10000:.*]] = llvm.mlir.constant(10000 : i32) : i32
-// MLIR: %[[CMP:.*]] = llvm.icmp "slt" %[[LOAD1]], %[[C10000]] : i32
-// MLIR: llvm.cond_br %[[CMP]], ^bb{{[0-9]+}}, ^bb{{[0-9]+}}
-// MLIR: ^bb{{[0-9]+}}:
-// MLIR: %[[ZERO2:.*]] = llvm.mlir.constant(0 : i32) : i32
-// MLIR: llvm.store %[[ZERO2]], %{{.*}} {alignment = 4 : i64}
-// MLIR: llvm.br ^bb{{[0-9]+}}
-// MLIR: ^bb{{[0-9]+}}:
-// MLIR: %[[LOAD2:.*]] = llvm.load %{{.*}} {alignment = 4 : i64}
-// MLIR: %[[C1_I32:.*]] = llvm.mlir.constant(1 : i32) : i32
-// MLIR: %[[ADD:.*]] = llvm.add %[[LOAD2]], %[[C1_I32]] overflow<nsw> : i32
-// MLIR: llvm.store %[[ADD]], %{{.*}} {alignment = 4 : i64}
-// MLIR: llvm.br ^bb{{[0-9]+}}
-// MLIR: ^bb{{[0-9]+}}:
-// MLIR: llvm.br ^bb{{[0-9]+}}
-// MLIR: ^bb{{[0-9]+}}:
-// MLIR: omp.terminator
-// MLIR: %[[ZERO3:.*]] = llvm.mlir.constant(0 : i32) : i32
-// MLIR: llvm.store %[[ZERO3]], %{{.*}} {alignment = 4 : i64}
-// MLIR: %[[RETVAL:.*]] = llvm.load %{{.*}} {alignment = 4 : i64}
-// MLIR: llvm.return %[[RETVAL]] : i32
-
-// CIR-LABEL: define dso_local i32 @main()
-// CIR: %[[STRUCTARG:.*]] = alloca { ptr, ptr }, align 8
-// CIR: %[[VAR1:.*]] = alloca i32, i64 1, align 4
-// CIR: %[[VAR2:.*]] = alloca i32, i64 1, align 4
-// CIR: %[[VAR3:.*]] = alloca i32, i64 1, align 4
-// CIR: br label %[[ENTRY:.*]]
-
-// CIR: [[ENTRY]]:
-// CIR: %[[THREAD_NUM:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
-// CIR: br label %[[OMP_PARALLEL:.*]]
-
-// CIR: [[OMP_PARALLEL]]:
-// CIR: %[[GEP1:.*]] = getelementptr { ptr, ptr }, ptr %[[STRUCTARG]], i32 0, 
i32 0
-// CIR: store ptr %[[VAR1]], ptr %[[GEP1]], align 8
-// CIR: %[[GEP2:.*]] = getelementptr { ptr, ptr }, ptr %[[STRUCTARG]], i32 0, 
i32 1
-// CIR: store ptr %[[VAR3]], ptr %[[GEP2]], align 8
-// CIR: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr 
@main..omp_par, ptr %[[STRUCTARG]])
-// CIR: br label %[[OMP_PAR_EXIT:.*]]
-
-// CIR: [[OMP_PAR_EXIT]]:
-// CIR: store i32 0, ptr %[[VAR2]], align 4
-// CIR: %[[LOAD:.*]] = load i32, ptr %[[VAR2]], align 4
-// CIR: ret i32 %[[LOAD]]
-
-// CIR-LABEL: define internal void @main..omp_par(ptr noalias %{{.*}}, ptr 
noalias %{{.*}}, ptr %{{.*}})
-// CIR: [[PAR_ENTRY:.*]]:
-// CIR: %[[GEP_A:.*]] = getelementptr { ptr, ptr }, ptr %{{.*}}, i32 0, i32 0
-// CIR: %[[LOAD_A:.*]] = load ptr, ptr %[[GEP_A]], align 8
-// CIR: %[[GEP_B:.*]] = getelementptr { ptr, ptr }, ptr %{{.*}}, i32 0, i32 1
-// CIR: %[[LOAD_B:.*]] = load ptr, ptr %[[GEP_B]], align 8
-// CIR: %[[TID_LOCAL:.*]] = alloca i32, align 4
-// CIR: %[[TID_VAL:.*]] = load i32, ptr %{{.*}}, align 4
-// CIR: store i32 %[[TID_VAL]], ptr %[[TID_LOCAL]], align 4
-// CIR: %{{.*}} = load i32, ptr %[[TID_LOCAL]], align 4
-// CIR: br label %[[AFTER_ALLOCA:.*]]
-
-// CIR: [[AFTER_ALLOCA]]:
-// CIR: br label %[[PAR_REGION:.*]]
-
-// CIR: [[PAR_REGION]]:
-// CIR: br label %[[PAR_REGION1:.*]]
-
-// CIR: [[PAR_REGION1]]:
-// CIR: br label %[[PAR_REGION2:.*]]
-
-// CIR: [[PAR_REGION2]]:
-// CIR: store i32 0, ptr %[[LOAD_A]], align 4
-// CIR: br label %[[PAR_REGION3:.*]]
-
-// CIR: [[PAR_REGION3]]:
-// CIR: %[[I_LOAD:.*]] = load i32, ptr %[[LOAD_A]], align 4
-// CIR: %[[CMP:.*]] = icmp slt i32 %[[I_LOAD]], 10000
-// CIR: br i1 %[[CMP]], label %[[PAR_REGION4:.*]], label %[[PAR_REGION6:.*]]
-
-// CIR: [[PAR_REGION6]]:
-// CIR: br label %[[PAR_REGION7:.*]]
-
-// CIR: [[PAR_REGION7]]:
-// CIR: br label %[[REGION_CONT:.*]]
-
-// CIR: [[REGION_CONT]]:
-// CIR: br label %[[PRE_FINALIZE:.*]]
-
-// CIR: [[PRE_FINALIZE]]:
-// CIR: br label %[[FINI:.*]]
-
-// CIR: [[FINI]]:
-// CIR: br label %[[EXIT_STUB:.*]]
-
-// CIR: [[PAR_REGION4]]:
-// CIR: store i32 0, ptr %[[LOAD_B]], align 4
-// CIR: br label %[[PAR_REGION5:.*]]
-
-// CIR: [[PAR_REGION5]]:
-// CIR: %[[I_LOAD2:.*]] = load i32, ptr %[[LOAD_A]], align 4
-// CIR: %[[ADD:.*]] = add nsw i32 %[[I_LOAD2]], 1
-// CIR: store i32 %[[ADD]], ptr %[[LOAD_A]], align 4
-// CIR: br label %[[PAR_REGION3]]
-
-// CIR: [[EXIT_STUB]]:
-// CIR: ret void
-
-// OGCG-LABEL: define dso_local i32 @main()
-// OGCG: [[ENTRY:.*]]:
-// OGCG: %[[RETVAL:.*]] = alloca i32, align 4
-// OGCG: %[[J:.*]] = alloca i32, align 4
-// OGCG: store i32 0, ptr %[[RETVAL]], align 4
-// OGCG: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr 
@main.omp_outlined, ptr %[[J]])
-// OGCG: ret i32 0
-
-// OGCG-LABEL: define internal void @main.omp_outlined(ptr noalias noundef 
%{{.*}}, ptr noalias noundef %{{.*}}, ptr noundef nonnull align 4 
dereferenceable(4) %{{.*}})
-// OGCG: [[OUTLINED_ENTRY:.*]]:
-// OGCG: %[[GLOBAL_TID_ADDR:.*]] = alloca ptr, align 8
-// OGCG: %[[BOUND_TID_ADDR:.*]] = alloca ptr, align 8
-// OGCG: %[[J_ADDR:.*]] = alloca ptr, align 8
-// OGCG: %[[I:.*]] = alloca i32, align 4
-// OGCG: store ptr %{{.*}}, ptr %[[GLOBAL_TID_ADDR]], align 8
-// OGCG: store ptr %{{.*}}, ptr %[[BOUND_TID_ADDR]], align 8
-// OGCG: store ptr %{{.*}}, ptr %[[J_ADDR]], align 8
-// OGCG: %[[J_LOAD:.*]] = load ptr, ptr %[[J_ADDR]], align 8
-// OGCG: store i32 0, ptr %[[I]], align 4
-// OGCG: br label %[[FOR_COND:.*]]
-
-// OGCG: [[FOR_COND]]:
-// OGCG: %[[I_LOAD:.*]] = load i32, ptr %[[I]], align 4
-// OGCG: %[[CMP:.*]] = icmp slt i32 %[[I_LOAD]], 10000
-// OGCG: br i1 %[[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
-
-// OGCG: [[FOR_BODY]]:
-// OGCG: store i32 0, ptr %[[J_LOAD]], align 4
-// OGCG: br label %[[FOR_INC:.*]]
-
-// OGCG: [[FOR_INC]]:
-// OGCG: %[[I_LOAD2:.*]] = load i32, ptr %[[I]], align 4
-// OGCG: %[[ADD:.*]] = add nsw i32 %[[I_LOAD2]], 1
-// OGCG: store i32 %[[ADD]], ptr %[[I]], align 4
-// OGCG: br label %[[FOR_COND]]
-
-// OGCG: [[FOR_END]]:
-// OGCG: ret void
-
-int main() {
-  int j;
-#pragma omp parallel
-  for (int i = 0; i < 10000; i=i+1)
-    j = 0;
-
-  return 0;
-}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR][OpenMP] Enable lowering of the OpenMP dialect to LLVM IR (PR #178515)

Reply via email to