https://github.com/Parigi created https://github.com/llvm/llvm-project/pull/181841
OMPForDirective emit omp.wsloop with omp.loop_nest using CIR ops for bounds/step, converting to standard MLIR integers via UnrealizedConversionCastOp. Add reconcile-unrealized-casts pass to the CIR-to-LLVM pipeline. Add CIR-level and LLVM IR lowering tests. >From b937e1e308df7de84265fd57282cd2d33c40ecc2 Mon Sep 17 00:00:00 2001 From: Luca Parigi <[email protected]> Date: Tue, 17 Feb 2026 16:41:08 +0100 Subject: [PATCH] [CIR][OpenMP] Emit #pragma omp for as omp.wsloop + omp.loop_nest OMPForDirective emit omp.wsloop with omp.loop_nest using CIR ops for bounds/step, converting to standard MLIR integers via UnrealizedConversionCastOp. Add reconcile-unrealized-casts pass to the CIR-to-LLVM pipeline. Add CIR-level and LLVM IR lowering tests. --- clang/lib/CIR/CodeGen/CIRGenFunction.h | 13 + clang/lib/CIR/CodeGen/CIRGenStmt.cpp | 170 ++++++--- clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp | 202 ++++++++++- .../CIR/Lowering/DirectToLLVM/CMakeLists.txt | 1 + .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 6 + clang/test/CIR/CodeGenOpenMP/pragma-omp-for.c | 326 ++++++++++++++++++ clang/test/CIR/Lowering/pragma-omp-for.c | 188 ++++++++++ 7 files changed, 854 insertions(+), 52 deletions(-) create mode 100644 clang/test/CIR/CodeGenOpenMP/pragma-omp-for.c create mode 100644 clang/test/CIR/Lowering/pragma-omp-for.c diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index cc0087ba2d6bd..32e3044dc08a5 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -63,6 +63,19 @@ class CIRGenFunction : public CIRGenTypeCache { /// is where the next operations will be introduced. CIRGenBuilderTy &builder; + /// State used to communicate OpenMP loop bounds from `emitOMPForDirective` + /// to `emitForStmt`. + struct LoopBounds { + mlir::Value lowerBound; + mlir::Value upperBound; + mlir::Value step; + mlir::Type inductionVarType; + const VarDecl *inductionVar; + bool inclusive; + }; + + std::optional<LoopBounds> currentOMPLoopBounds; + /// A jump destination is an abstract label, branching to which may /// require a jump out through normal cleanups. struct JumpDest { diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index db3827340c455..f6b3b976ac499 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -22,6 +22,10 @@ #include "clang/AST/StmtOpenMP.h" #include "clang/CIR/MissingFeatures.h" +// Required to construct OpenMP operations such as `omp.wsloop` and +// `omp.loop_nest` during lowering. +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" + using namespace clang; using namespace clang::CIRGen; using namespace cir; @@ -939,16 +943,36 @@ CIRGenFunction::emitCXXForRangeStmt(const CXXForRangeStmt &s, return mlir::success(); } +/// Emit a `for` statement as either a CIR `cir.for` or, when inside an +/// OpenMP `#pragma omp for`, an `omp.loop_nest` within the wsloop created +/// by emitOMPForDirective. + mlir::LogicalResult CIRGenFunction::emitForStmt(const ForStmt &s) { + + // CIR for-loop operation (used in the non-OpenMP case). cir::ForOp forOp; + // OpenMP loop nest operation (used when inside `omp.wsloop`). + mlir::omp::LoopNestOp loopNestOp; + + auto scopeLoc = getLoc(s.getSourceRange()); + bool isOpenMPFor = currentOMPLoopBounds.has_value(); + + // This lambda emits either an OpenMP `omp.loop_nest` or a regular CIR + // `cir.for`, depending on whether we are inside an OpenMP for directive. // TODO: pass in an array of attributes. auto forStmtBuilder = [&]() -> mlir::LogicalResult { mlir::LogicalResult loopRes = mlir::success(); - // Evaluate the first part before the loop. - if (s.getInit()) - if (emitStmt(s.getInit(), /*useCurrentScope=*/true).failed()) - return mlir::failure(); + + // For OpenMP loops, init is emitted by emitOMPForDirective before the + // wsloop so that the alloca lives outside the loop region. + if (!isOpenMPFor) { + // Evaluate the first part before the loop. + if (s.getInit()) + if (emitStmt(s.getInit(), /*useCurrentScope=*/true).failed()) + return mlir::failure(); + } + assert(!cir::MissingFeatures::loopInfoStack()); // In the classic codegen, if there are any cleanups between here and the // loop-exit scope, a block is created to stage the loop exit. We probably @@ -956,58 +980,110 @@ mlir::LogicalResult CIRGenFunction::emitForStmt(const ForStmt &s) { // to be sure we handle all cases. assert(!cir::MissingFeatures::requiresCleanups()); - forOp = builder.createFor( - getLoc(s.getSourceRange()), - /*condBuilder=*/ - [&](mlir::OpBuilder &b, mlir::Location loc) { - assert(!cir::MissingFeatures::createProfileWeightsForLoop()); - assert(!cir::MissingFeatures::emitCondLikelihoodViaExpectIntrinsic()); - mlir::Value condVal; - if (s.getCond()) { - // If the for statement has a condition scope, - // emit the local variable declaration. - if (s.getConditionVariable()) - emitDecl(*s.getConditionVariable()); - // C99 6.8.5p2/p4: The first substatement is executed if the - // expression compares unequal to 0. The condition must be a - // scalar type. - condVal = evaluateExprAsBool(s.getCond()); - } else { - condVal = cir::ConstantOp::create(b, loc, builder.getTrueAttr()); - } - builder.createCondition(condVal); - }, - /*bodyBuilder=*/ - [&](mlir::OpBuilder &b, mlir::Location loc) { - // The scope of the for loop body is nested within the scope of the - // for loop's init-statement and condition. - if (emitStmt(s.getBody(), /*useCurrentScope=*/false).failed()) - loopRes = mlir::failure(); - emitStopPoint(&s); - }, - /*stepBuilder=*/ - [&](mlir::OpBuilder &b, mlir::Location loc) { - if (s.getInc()) - if (emitStmt(s.getInc(), /*useCurrentScope=*/true).failed()) + // OpenMP path: emit omp.loop_nest using bounds from emitOMPForDirective. + if (isOpenMPFor) { + mlir::OpBuilder::InsertionGuard guard(builder); + + mlir::Type loopBoundsType = currentOMPLoopBounds->inductionVarType; + mlir::Value lb = currentOMPLoopBounds->lowerBound; + mlir::Value ub = currentOMPLoopBounds->upperBound; + mlir::Value step = currentOMPLoopBounds->step; + bool inclusive = currentOMPLoopBounds->inclusive; + const VarDecl *inductionVar = currentOMPLoopBounds->inductionVar; + + loopNestOp = loopNestOp.create(builder, scopeLoc, 1, lb, ub, step, + inclusive, nullptr); + + mlir::Region ®ion = loopNestOp.getRegion(); + mlir::Block *block = new mlir::Block(); + region.push_back(block); + + block->addArgument(loopBoundsType, scopeLoc); + builder.setInsertionPointToStart(block); + + // Store the IV block argument into the loop variable alloca, converting + // back from standard integer to CIR integer type. + mlir::Value iv = block->getArgument(0); + Address inductionAddr = getAddrOfLocalVar(inductionVar); + mlir::Value civVal = + mlir::UnrealizedConversionCastOp::create( + builder, scopeLoc, inductionAddr.getElementType(), iv) + .getResult(0); + cir::StoreOp::create(builder, scopeLoc, civVal, + inductionAddr.getPointer(), + /*is_volatile=*/nullptr, /*alignment=*/nullptr, + /*sync_scope=*/nullptr, /*mem_order=*/nullptr); + + // Emit the loop body. + if (s.getBody()) { + if (emitStmt(s.getBody(), /*useCurrentScope=*/true).failed()) + loopRes = mlir::failure(); + } + + mlir::omp::YieldOp::create(builder, getLoc(s.getEndLoc())); + } else { + forOp = builder.createFor( + getLoc(s.getSourceRange()), + /*condBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + assert(!cir::MissingFeatures::createProfileWeightsForLoop()); + assert( + !cir::MissingFeatures::emitCondLikelihoodViaExpectIntrinsic()); + mlir::Value condVal; + if (s.getCond()) { + // If the for statement has a condition scope, + // emit the local variable declaration. + if (s.getConditionVariable()) + emitDecl(*s.getConditionVariable()); + // C99 6.8.5p2/p4: The first substatement is executed if the + // expression compares unequal to 0. The condition must be a + // scalar type. + condVal = evaluateExprAsBool(s.getCond()); + } else { + condVal = cir::ConstantOp::create(b, loc, builder.getTrueAttr()); + } + builder.createCondition(condVal); + }, + /*bodyBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + // The scope of the for loop body is nested within the scope of the + // for loop's init-statement and condition. + if (emitStmt(s.getBody(), /*useCurrentScope=*/false).failed()) loopRes = mlir::failure(); - builder.createYield(loc); - }); + emitStopPoint(&s); + }, + /*stepBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + if (s.getInc()) + if (emitStmt(s.getInc(), /*useCurrentScope=*/true).failed()) + loopRes = mlir::failure(); + builder.createYield(loc); + }); + } return loopRes; }; auto res = mlir::success(); - auto scopeLoc = getLoc(s.getSourceRange()); - cir::ScopeOp::create(builder, scopeLoc, /*scopeBuilder=*/ - [&](mlir::OpBuilder &b, mlir::Location loc) { - LexicalScope lexScope{*this, loc, - builder.getInsertionBlock()}; - res = forStmtBuilder(); - }); + + if (isOpenMPFor) { + res = forStmtBuilder(); + } else { + cir::ScopeOp::create(builder, scopeLoc, /*scopeBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + LexicalScope lexScope{*this, loc, + builder.getInsertionBlock()}; + res = forStmtBuilder(); + }); + } if (res.failed()) return res; - terminateBody(builder, forOp.getBody(), getLoc(s.getEndLoc())); + // Only regular CIR loops require explicit termination. + // OpenMP wsloop/loop_nest regions terminate via omp.yield. + if (!isOpenMPFor) { + terminateBody(builder, forOp.getBody(), getLoc(s.getEndLoc())); + } return mlir::success(); } diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp index 0d3b44db98307..a3eab79fbba64 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp @@ -13,8 +13,11 @@ #include "CIRGenBuilder.h" #include "CIRGenFunction.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/IR/BuiltinOps.h" #include "clang/AST/StmtOpenMP.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" + using namespace clang; using namespace clang::CIRGen; @@ -65,6 +68,200 @@ CIRGenFunction::emitOMPParallelDirective(const OMPParallelDirective &s) { return res; } +// Helpers and implementation for emitOMPForDirective, which lowers an +// OMPForDirective into an omp.wsloop + omp.loop_nest. + +namespace { +/// Extract integer literal value from an expression, if present. +static std::optional<int64_t> getIntLiteralValue(const Expr *expr) { + if (const auto *intLit = dyn_cast<IntegerLiteral>(expr->IgnoreImpCasts())) + return intLit->getValue().getSExtValue(); + return std::nullopt; +} + +/// Ensure a CIR value has the given CIR integer type, inserting an integral +/// cast if necessary. Loads through CIR pointers first. +static mlir::Value ensureCIRIntType(CIRGenBuilderTy &builder, + mlir::Location loc, mlir::Value cirValue, + cir::IntType targetCIRType) { + if (mlir::isa<cir::PointerType>(cirValue.getType())) + cirValue = cir::LoadOp::create(builder, loc, cirValue).getResult(); + + if (cirValue.getType() == targetCIRType) + return cirValue; + + return builder.createCast(loc, cir::CastKind::integral, cirValue, + targetCIRType); +} + +/// Convert a CIR integer value to a standard MLIR integer type suitable for +/// use as an omp.loop_nest operand. +static mlir::Value cirIntToStdInt(mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value cirValue) { + auto cirIntType = mlir::cast<cir::IntType>(cirValue.getType()); + mlir::Type stdIntType = builder.getIntegerType(cirIntType.getWidth()); + return mlir::UnrealizedConversionCastOp::create(builder, loc, stdIntType, + cirValue) + .getResult(0); +} +} // anonymous namespace + +mlir::LogicalResult +CIRGenFunction::emitOMPForDirective(const OMPForDirective &s) { + + mlir::LogicalResult res = mlir::success(); + mlir::Location begin = getLoc(s.getBeginLoc()); + + // Extract the underlying canonical `for` loop from the CapturedStmt + const CapturedStmt *capturedStmt = s.getInnermostCapturedStmt(); + const ForStmt *forStmt = dyn_cast<ForStmt>(capturedStmt->getCapturedStmt()); + + if (!forStmt) { + return mlir::failure(); + } + + // Loop bounds are first built as CIR integer values, then converted to + // standard MLIR integers via UnrealizedConversionCastOp before being + // passed to omp.loop_nest (which requires IntLikeType operands). + mlir::Value lowerBound; + mlir::Value upperBound; + mlir::Value step; + bool inclusive = false; + + // Extract loop variable type and lower bound. + const auto *declStmt = dyn_cast_or_null<DeclStmt>(forStmt->getInit()); + const auto *varDecl = + declStmt ? dyn_cast<VarDecl>(declStmt->getSingleDecl()) : nullptr; + + if (!varDecl) + return mlir::failure(); + + // The loop variable's CIR integer type is the canonical type for all bounds. + QualType loopVarQType = varDecl->getType(); + auto cirType = convertType(loopVarQType); + auto cirIntType = mlir::cast<cir::IntType>(cirType); + + // Extract lower bound. + if (!varDecl->hasInit()) + return mlir::failure(); + + if (auto constVal = getIntLiteralValue(varDecl->getInit())) { + lowerBound = builder.getConstInt(begin, cirIntType, *constVal); + } else { + mlir::Value cirValue = emitScalarExpr(varDecl->getInit()); + lowerBound = ensureCIRIntType(builder, begin, cirValue, cirIntType); + } + + // Extract upper bound and comparison operator. + const auto *condBinOp = dyn_cast_or_null<BinaryOperator>(forStmt->getCond()); + if (!condBinOp) + return mlir::failure(); + + BinaryOperatorKind opKind = condBinOp->getOpcode(); + + // Determine which side of the comparison holds the upper bound. + // Canonical forms: `i < ub`, `i <= ub` (var on LHS, bound on RHS) + // `ub > i`, `ub >= i` (bound on LHS, var on RHS) + const Expr *boundExpr = nullptr; + if (opKind == BO_LT || opKind == BO_LE) { + boundExpr = condBinOp->getRHS(); + inclusive = (opKind == BO_LE); + } else if (opKind == BO_GT || opKind == BO_GE) { + boundExpr = condBinOp->getLHS(); + inclusive = (opKind == BO_GE); + } else { + return mlir::failure(); + } + + if (auto constVal = getIntLiteralValue(boundExpr)) { + upperBound = builder.getConstInt(begin, cirIntType, *constVal); + } else { + mlir::Value cirValue = emitScalarExpr(boundExpr); + upperBound = ensureCIRIntType(builder, begin, cirValue, cirIntType); + } + + // Extract step. + if (const auto *unaryOp = + dyn_cast_or_null<UnaryOperator>(forStmt->getInc())) { + int64_t val = unaryOp->isIncrementOp() ? 1 : -1; + step = builder.getConstInt(begin, cirIntType, val); + } else if (const auto *binOp = + dyn_cast_or_null<BinaryOperator>(forStmt->getInc())) { + const Expr *stepExpr = nullptr; + + if (binOp->isCompoundAssignmentOp()) { + stepExpr = binOp->getRHS(); + } else if (binOp->isAssignmentOp()) { + // i = i + step or i = step + i + if (auto *subBinOp = + dyn_cast<BinaryOperator>(binOp->getRHS()->IgnoreImpCasts())) { + const Expr *lhs = subBinOp->getLHS()->IgnoreImpCasts(); + const Expr *rhs = subBinOp->getRHS()->IgnoreImpCasts(); + // Identify which operand is the loop variable and which is the step. + if (auto *lhsRef = dyn_cast<DeclRefExpr>(lhs)) { + stepExpr = (lhsRef->getDecl() == varDecl) ? rhs : lhs; + } else if (auto *rhsRef = dyn_cast<DeclRefExpr>(rhs)) { + stepExpr = (rhsRef->getDecl() == varDecl) ? lhs : rhs; + } + } + } + + if (stepExpr) { + if (auto constVal = getIntLiteralValue(stepExpr)) { + step = builder.getConstInt(begin, cirIntType, *constVal); + } else { + mlir::Value cirValue = emitScalarExpr(stepExpr); + step = ensureCIRIntType(builder, begin, cirValue, cirIntType); + } + } + } + + // Default to unit step if not recognized. + if (!step) + step = builder.getConstInt(begin, cirIntType, 1); + + // Emit init, convert bounds to std integers, and create the wsloop. + + // Emit the loop init statement (e.g. `int i = 0`) to create the alloca + // for the induction variable *before* the wsloop. + if (forStmt->getInit()) + if (emitStmt(forStmt->getInit(), /*useCurrentScope=*/true).failed()) + return mlir::failure(); + + // Convert CIR integer bounds to standard MLIR integers at the boundary. + // omp.loop_nest requires IntLikeType (AnyInteger | Index), not CIR types. + mlir::Value stdLB = cirIntToStdInt(builder, begin, lowerBound); + mlir::Value stdUB = cirIntToStdInt(builder, begin, upperBound); + mlir::Value stdStep = cirIntToStdInt(builder, begin, step); + mlir::Type loopBoundsType = stdLB.getType(); + + currentOMPLoopBounds = + LoopBounds{stdLB, stdUB, stdStep, loopBoundsType, varDecl, inclusive}; + + // Create wsloop with empty region + llvm::SmallVector<mlir::Type> retTy; + llvm::SmallVector<mlir::Value> operands; + auto wsloopOp = mlir::omp::WsloopOp::create(builder, begin, retTy, operands); + + mlir::Region ®ion = wsloopOp.getRegion(); + mlir::Block *block = new mlir::Block(); + region.push_back(block); + + // Emit the ForStmt body (will create loop_nest when it detects OpenMP + // context) + mlir::OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToStart(block); + + if (emitStmt(forStmt, /*useCurrentScope=*/false).failed()) { + res = mlir::failure(); + } + + // Clear loop-bound state + currentOMPLoopBounds = std::nullopt; + + return res; +} + mlir::LogicalResult CIRGenFunction::emitOMPTaskwaitDirective(const OMPTaskwaitDirective &s) { getCIRGenModule().errorNYI(s.getSourceRange(), "OpenMP OMPTaskwaitDirective"); @@ -113,11 +310,6 @@ CIRGenFunction::emitOMPFuseDirective(const OMPFuseDirective &s) { return mlir::failure(); } mlir::LogicalResult -CIRGenFunction::emitOMPForDirective(const OMPForDirective &s) { - getCIRGenModule().errorNYI(s.getSourceRange(), "OpenMP OMPForDirective"); - return mlir::failure(); -} -mlir::LogicalResult CIRGenFunction::emitOMPForSimdDirective(const OMPForSimdDirective &s) { getCIRGenModule().errorNYI(s.getSourceRange(), "OpenMP OMPForSimdDirective"); return mlir::failure(); diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt b/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt index c7467fe40ba30..49864dcdb62d5 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt +++ b/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt @@ -22,6 +22,7 @@ add_clang_library(clangCIRLoweringDirectToLLVM MLIRBuiltinToLLVMIRTranslation MLIRLLVMToLLVMIRTranslation MLIROpenMPToLLVMIRTranslation + MLIRReconcileUnrealizedCasts MLIRIR ) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 28b3454d20613..0feeaf748fd75 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -17,6 +17,7 @@ #include <optional> #include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h" #include "mlir/Dialect/DLTI/DLTI.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" @@ -3562,6 +3563,10 @@ void ConvertCIRToLLVMPass::runOnOperation() { target.addIllegalDialect<mlir::BuiltinDialect, cir::CIRDialect, mlir::func::FuncDialect>(); + // Allow unrealized conversion casts to survive CIR-to-LLVM conversion. + // They are resolved by the reconcile-unrealized-casts pass that runs after. + target.addLegalOp<mlir::UnrealizedConversionCastOp>(); + llvm::SmallVector<mlir::Operation *> ops; ops.push_back(module); collectUnreachable(module, ops); @@ -4800,6 +4805,7 @@ std::unique_ptr<mlir::Pass> createConvertCIRToLLVMPass() { void populateCIRToLLVMPasses(mlir::OpPassManager &pm) { mlir::populateCIRPreLoweringPasses(pm); pm.addPass(createConvertCIRToLLVMPass()); + pm.addPass(mlir::createReconcileUnrealizedCastsPass()); } std::unique_ptr<llvm::Module> diff --git a/clang/test/CIR/CodeGenOpenMP/pragma-omp-for.c b/clang/test/CIR/CodeGenOpenMP/pragma-omp-for.c new file mode 100644 index 0000000000000..49a046f358e10 --- /dev/null +++ b/clang/test/CIR/CodeGenOpenMP/pragma-omp-for.c @@ -0,0 +1,326 @@ +// RUN: %clang_cc1 -fopenmp -emit-cir -fclangir %s -o - | FileCheck %s + +void before(int); +void during(int); +void after(int); + +void emit_simple_for() { + // CHECK: cir.func{{.*}}@{{.*}}emit_simple_for + int j = 5; + before(j); + // CHECK: cir.call @{{.*}}before +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 10; i++) { + during(j); + } + } + // CHECK: omp.parallel { + + // CIR constants for bounds, then cast to std integer + // CHECK: %[[C0_CIR:.*]] = cir.const #cir.int<0> : !s32i + // CHECK: %[[C10_CIR:.*]] = cir.const #cir.int<10> : !s32i + // CHECK: %[[C1_CIR:.*]] = cir.const #cir.int<1> : !s32i + + // induction variable alloca (emitted before wsloop) + // CHECK: %[[I_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] + + // conversion to std integer for omp.loop_nest + // CHECK: %[[C0:.*]] = builtin.unrealized_conversion_cast %[[C0_CIR]] : !s32i to i32 + // CHECK: %[[C10:.*]] = builtin.unrealized_conversion_cast %[[C10_CIR]] : !s32i to i32 + // CHECK: %[[C1:.*]] = builtin.unrealized_conversion_cast %[[C1_CIR]] : !s32i to i32 + + // omp loop + // CHECK: omp.wsloop { + // CHECK-NEXT: omp.loop_nest (%[[IV:.*]]) : i32 = (%[[C0]]) to (%[[C10]]) step (%[[C1]]) { + + // store induction variable block arg into alloca + // CHECK: %[[IV_CIR:.*]] = builtin.unrealized_conversion_cast %[[IV]] : i32 to !s32i + // CHECK: cir.store %[[IV_CIR]], %[[I_ALLOCA]] : !s32i, !cir.ptr<!s32i> + + // during(j) + // CHECK: cir.load {{.*}} %{{.*}} : !cir.ptr<!s32i>, !s32i + // CHECK: cir.call @{{.*}}during + + // CHECK: omp.yield + // CHECK: } + // CHECK: } + + // CHECK: omp.terminator + // CHECK: } + after(j); + // CHECK: cir.call @{{.*}}after +} + +void emit_for_with_vars() { + // CHECK: cir.func{{.*}}@{{.*}}emit_for_with_vars + int j = 5; + before(j); + // CHECK: cir.call @{{.*}}before +#pragma omp parallel + { + int lb = 1; + long ub = 10; + short step = 1; +#pragma omp for + for (int i = 0; i < ub; i=i+step) { + during(j); + } + } + + // CHECK: omp.parallel { + + // allocas + // CHECK: %[[LB:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["lb", init] + // CHECK: %[[UB:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["ub", init] + // CHECK: %[[STEP:.*]] = cir.alloca !s16i, !cir.ptr<!s16i>, ["step", init] + + // stores + // CHECK: cir.store {{.*}}, %[[LB]] : !s32i, !cir.ptr<!s32i> + // CHECK: cir.store {{.*}}, %[[UB]] : !s64i, !cir.ptr<!s64i> + // CHECK: cir.store {{.*}}, %[[STEP]] : !s16i, !cir.ptr<!s16i> + + // lower bound (CIR constant + cast to i32) + // CHECK: %[[LB0_CIR:.*]] = cir.const #cir.int<0> : !s32i + + // upper bound: load, integral cast to i32, then unrealized cast + // CHECK: %[[UBLOAD:.*]] = cir.load {{.*}} %[[UB]] : !cir.ptr<!s64i>, !s64i + // CHECK: %[[UBCAST:.*]] = cir.cast integral %[[UBLOAD]] : !s64i -> !s32i + + // step: load, integral cast to i32, then unrealized cast + // CHECK: %[[STEPLOAD:.*]] = cir.load {{.*}} %[[STEP]] : !cir.ptr<!s16i>, !s16i + // CHECK: %[[STEPCONV:.*]] = cir.cast integral %[[STEPLOAD]] : !s16i -> !s32i + + // induction variable alloca (emitted before wsloop) + // CHECK: %[[I2_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] + + // conversion to std integer for omp.loop_nest + // CHECK: %[[LB0:.*]] = builtin.unrealized_conversion_cast %[[LB0_CIR]] : !s32i to i32 + // CHECK: %[[UBSTD:.*]] = builtin.unrealized_conversion_cast %[[UBCAST]] : !s32i to i32 + // CHECK: %[[STEPSTD:.*]] = builtin.unrealized_conversion_cast %[[STEPCONV]] : !s32i to i32 + + // omp loop + // CHECK: omp.wsloop { + // CHECK-NEXT: omp.loop_nest (%[[IV2:.*]]) : i32 = (%[[LB0]]) to (%[[UBSTD]]) step (%[[STEPSTD]]) { + + // store induction variable block arg into alloca + // CHECK: %[[IV2_CIR:.*]] = builtin.unrealized_conversion_cast %[[IV2]] : i32 to !s32i + // CHECK: cir.store %[[IV2_CIR]], %[[I2_ALLOCA]] : !s32i, !cir.ptr<!s32i> + + // during(j) + // CHECK: cir.load {{.*}} %{{.*}} : !cir.ptr<!s32i>, !s32i + // CHECK: cir.call @{{.*}}during + + // CHECK: omp.yield + // CHECK: } + // CHECK: } + + // CHECK: omp.terminator + // CHECK: } + + after(j); + // CHECK: cir.call @{{.*}}after +} + +void emit_for_with_induction_var() { + // CHECK: cir.func{{.*}}@{{.*}}emit_for_with_induction_var +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 10; i++) { + during(i); + } + } + // CHECK: omp.parallel { + + // CIR constants + // CHECK: %[[IC0_CIR:.*]] = cir.const #cir.int<0> : !s32i + // CHECK: %[[IC10_CIR:.*]] = cir.const #cir.int<10> : !s32i + // CHECK: %[[IC1_CIR:.*]] = cir.const #cir.int<1> : !s32i + + // induction variable alloca + // CHECK: %[[IV_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] + + // conversion to std integer + // CHECK: %[[IC0:.*]] = builtin.unrealized_conversion_cast %[[IC0_CIR]] : !s32i to i32 + // CHECK: %[[IC10:.*]] = builtin.unrealized_conversion_cast %[[IC10_CIR]] : !s32i to i32 + // CHECK: %[[IC1:.*]] = builtin.unrealized_conversion_cast %[[IC1_CIR]] : !s32i to i32 + + // omp loop + // CHECK: omp.wsloop { + // CHECK-NEXT: omp.loop_nest (%[[IV3:.*]]) : i32 = (%[[IC0]]) to (%[[IC10]]) step (%[[IC1]]) { + + // store induction variable into alloca + // CHECK: %[[IV3_CIR:.*]] = builtin.unrealized_conversion_cast %[[IV3]] : i32 to !s32i + // CHECK: cir.store %[[IV3_CIR]], %[[IV_ALLOCA]] : !s32i, !cir.ptr<!s32i> + + // during(i) - loads the induction variable from the alloca + // CHECK: %[[I_VAL:.*]] = cir.load %[[IV_ALLOCA]] : !cir.ptr<!s32i>, !s32i + // CHECK: cir.call @{{.*}}during(%[[I_VAL]]) + + // CHECK: omp.yield + // CHECK: } + // CHECK: } + + // CHECK: omp.terminator + // CHECK: } +} + +// Test inclusive upper bound (i <= 9) +void emit_for_inclusive_bound() { + // CHECK: cir.func{{.*}}@{{.*}}emit_for_inclusive_bound +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i <= 9; i++) { + during(i); + } + } + // CHECK: omp.parallel { + + // CHECK: cir.const #cir.int<0> : !s32i + // CHECK: cir.const #cir.int<9> : !s32i + // CHECK: cir.const #cir.int<1> : !s32i + // CHECK: %[[INC_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] + // CHECK: %[[INC_C0:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + // CHECK: %[[INC_C9:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + // CHECK: %[[INC_C1:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + + // CHECK: omp.wsloop { + // inclusive = true + // CHECK-NEXT: omp.loop_nest (%[[INC_IV:.*]]) : i32 = (%[[INC_C0]]) to (%[[INC_C9]]) inclusive step (%[[INC_C1]]) { + + // CHECK: builtin.unrealized_conversion_cast %[[INC_IV]] : i32 to !s32i + // CHECK: cir.store + // CHECK: cir.call @{{.*}}during + + // CHECK: omp.yield + // CHECK: } + // CHECK: } + // CHECK: omp.terminator + // CHECK: } +} + +// Test reversed comparison (10 > i) +void emit_for_reversed_cmp() { + // CHECK: cir.func{{.*}}@{{.*}}emit_for_reversed_cmp +#pragma omp parallel + { +#pragma omp for + for (int i = 0; 10 > i; i++) { + during(i); + } + } + // CHECK: omp.parallel { + + // CHECK: cir.const #cir.int<0> : !s32i + // CHECK: cir.const #cir.int<10> : !s32i + // CHECK: cir.const #cir.int<1> : !s32i + // CHECK: cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] + // CHECK: %[[REV_C0:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + // CHECK: %[[REV_C10:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + // CHECK: %[[REV_C1:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + + // CHECK: omp.wsloop { + // CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%[[REV_C0]]) to (%[[REV_C10]]) step (%[[REV_C1]]) { + // CHECK: omp.yield + // CHECK: } + // CHECK: } + // CHECK: omp.terminator + // CHECK: } +} + +// Test reversed inclusive comparison (9 >= i) +void emit_for_reversed_inclusive_cmp() { + // CHECK: cir.func{{.*}}@{{.*}}emit_for_reversed_inclusive_cmp +#pragma omp parallel + { +#pragma omp for + for (int i = 0; 9 >= i; i++) { + during(i); + } + } + // CHECK: omp.parallel { + + // CHECK: cir.const #cir.int<0> : !s32i + // CHECK: cir.const #cir.int<9> : !s32i + // CHECK: cir.const #cir.int<1> : !s32i + // CHECK: cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] + // CHECK: %[[RI_C0:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + // CHECK: %[[RI_C9:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + // CHECK: %[[RI_C1:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + + // CHECK: omp.wsloop { + // CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%[[RI_C0]]) to (%[[RI_C9]]) inclusive step (%[[RI_C1]]) { + // CHECK: omp.yield + // CHECK: } + // CHECK: } + // CHECK: omp.terminator + // CHECK: } +} + +// Test compound assignment step (i += 2) +void emit_for_compound_step() { + // CHECK: cir.func{{.*}}@{{.*}}emit_for_compound_step +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 20; i += 2) { + during(i); + } + } + // CHECK: omp.parallel { + + // CHECK: cir.const #cir.int<0> : !s32i + // CHECK: cir.const #cir.int<20> : !s32i + // CHECK: cir.const #cir.int<2> : !s32i + // CHECK: cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] + // CHECK: %[[CS_C0:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + // CHECK: %[[CS_C20:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + // CHECK: %[[CS_C2:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + + // CHECK: omp.wsloop { + // CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%[[CS_C0]]) to (%[[CS_C20]]) step (%[[CS_C2]]) { + // CHECK: omp.yield + // CHECK: } + // CHECK: } + // CHECK: omp.terminator + // CHECK: } +} + +// Test commuted step expression (i = step + i) +void emit_for_commuted_step() { + // CHECK: cir.func{{.*}}@{{.*}}emit_for_commuted_step + short step = 3; +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 30; i = step + i) { + during(i); + } + } + // CHECK: omp.parallel { + + // CHECK: cir.const #cir.int<0> : !s32i + // CHECK: cir.const #cir.int<30> : !s32i + + // step is loaded and cast to the loop variable type (i32) in CIR + // CHECK: %[[CM_STEP_LOAD:.*]] = cir.load {{.*}} : !cir.ptr<!s16i>, !s16i + // CHECK: %[[CM_STEP_CIR:.*]] = cir.cast integral %[[CM_STEP_LOAD]] : !s16i -> !s32i + + // CHECK: cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] + + // conversion to std integer + // CHECK: %[[CM_C0:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + // CHECK: %[[CM_C30:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + // CHECK: %[[CM_STEP:.*]] = builtin.unrealized_conversion_cast %[[CM_STEP_CIR]] : !s32i to i32 + + // CHECK: omp.wsloop { + // CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%[[CM_C0]]) to (%[[CM_C30]]) step (%[[CM_STEP]]) { + // CHECK: omp.yield + // CHECK: } + // CHECK: } + // CHECK: omp.terminator + // CHECK: } +} diff --git a/clang/test/CIR/Lowering/pragma-omp-for.c b/clang/test/CIR/Lowering/pragma-omp-for.c new file mode 100644 index 0000000000000..76f069a4cd9a9 --- /dev/null +++ b/clang/test/CIR/Lowering/pragma-omp-for.c @@ -0,0 +1,188 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck %s --input-file %t-cir.ll + +void before(int); +void during(int); +void after(int); + +// Test simple for loop with constant bounds: for (int i = 0; i < 10; i++) +void emit_simple_for() { + int j = 5; + before(j); +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 10; i++) { + during(j); + } + } + after(j); +} + +// CHECK-LABEL: define dso_local void @emit_simple_for() +// CHECK: call void @before(i32 %{{.*}}) +// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @{{.*}}, i32 1, ptr @emit_simple_for..omp_par, ptr %{{.*}}) +// CHECK: call void @after(i32 %{{.*}}) + +// CHECK-LABEL: define internal void @emit_simple_for..omp_par( +// CHECK: store i32 0, ptr %p.lowerbound +// CHECK: store i32 9, ptr %p.upperbound +// CHECK: store i32 1, ptr %p.stride +// CHECK: call void @__kmpc_for_static_init_4u( +// CHECK: omp_loop.body: +// CHECK: omp.loop_nest.region: +// CHECK: store i32 %{{.*}}, ptr %{{.*}}, align 4 +// CHECK: call void @during(i32 %{{.*}}) +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: call void @__kmpc_barrier( + +// Test for loop with variable bounds and type conversions +void emit_for_with_vars() { + int j = 5; + before(j); +#pragma omp parallel + { + int lb = 1; + long ub = 10; + short step = 1; +#pragma omp for + for (int i = 0; i < ub; i = i + step) { + during(j); + } + } + after(j); +} + +// CHECK-LABEL: define dso_local void @emit_for_with_vars() +// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @{{.*}}, i32 1, ptr @emit_for_with_vars..omp_par, ptr %{{.*}}) + +// CHECK-LABEL: define internal void @emit_for_with_vars..omp_par( +// variable upper bound: loaded and truncated from i64 to i32 +// CHECK: %{{.*}} = trunc i64 %{{.*}} to i32 +// variable step: loaded and sign-extended from i16 to i32 +// CHECK: %{{.*}} = sext i16 %{{.*}} to i32 +// CHECK: call void @__kmpc_for_static_init_4u( +// CHECK: omp.loop_nest.region: +// CHECK: call void @during(i32 %{{.*}}) +// CHECK: call void @__kmpc_for_static_fini( + +// Test induction variable is accessible in the loop body: during(i) +void emit_for_with_induction_var() { +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 10; i++) { + during(i); + } + } +} + +// CHECK-LABEL: define internal void @emit_for_with_induction_var..omp_par( +// CHECK: store i32 0, ptr %p.lowerbound +// CHECK: store i32 9, ptr %p.upperbound +// CHECK: omp.loop_nest.region: +// IV is stored to the alloca and then loaded for during(i) +// CHECK: store i32 %{{.*}}, ptr %[[IV_PTR:.*]], align 4 +// CHECK: %[[IV_LOAD:.*]] = load i32, ptr %[[IV_PTR]], align 4 +// CHECK: call void @during(i32 %[[IV_LOAD]]) + +// Test inclusive upper bound: for (int i = 0; i <= 9; i++) +void emit_for_inclusive_bound() { +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i <= 9; i++) { + during(i); + } + } +} + +// CHECK-LABEL: define internal void @emit_for_inclusive_bound..omp_par( +// inclusive i <= 9 has same trip count as i < 10 +// CHECK: store i32 0, ptr %p.lowerbound +// CHECK: store i32 9, ptr %p.upperbound +// CHECK: call void @__kmpc_for_static_init_4u( +// CHECK: omp.loop_nest.region: +// CHECK: call void @during(i32 %{{.*}}) + +// Test reversed comparison: for (int i = 0; 10 > i; i++) +void emit_for_reversed_cmp() { +#pragma omp parallel + { +#pragma omp for + for (int i = 0; 10 > i; i++) { + during(i); + } + } +} + +// CHECK-LABEL: define internal void @emit_for_reversed_cmp..omp_par( +// reversed cmp (10 > i) produces same bounds as (i < 10) +// CHECK: store i32 0, ptr %p.lowerbound +// CHECK: store i32 9, ptr %p.upperbound +// CHECK: call void @__kmpc_for_static_init_4u( + +// Test reversed inclusive comparison: for (int i = 0; 9 >= i; i++) +void emit_for_reversed_inclusive_cmp() { +#pragma omp parallel + { +#pragma omp for + for (int i = 0; 9 >= i; i++) { + during(i); + } + } +} + +// CHECK-LABEL: define internal void @emit_for_reversed_inclusive_cmp..omp_par( +// reversed inclusive cmp (9 >= i) produces same bounds as (i <= 9) +// CHECK: store i32 0, ptr %p.lowerbound +// CHECK: store i32 9, ptr %p.upperbound +// CHECK: call void @__kmpc_for_static_init_4u( + +// Test compound assignment step: for (int i = 0; i < 20; i += 2) +void emit_for_compound_step() { +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 20; i += 2) { + during(i); + } + } +} + +// CHECK-LABEL: define internal void @emit_for_compound_step..omp_par( +// step = 2 visible in the loop body IV computation +// CHECK: call void @__kmpc_for_static_init_4u( +// CHECK: omp_loop.body: +// CHECK: %{{.*}} = mul i32 %{{.*}}, 2 +// CHECK: omp.loop_nest.region: +// CHECK: call void @during(i32 %{{.*}}) + +// Test commuted step expression: for (int i = 0; i < 30; i = step + i) +void emit_for_commuted_step() { + short step = 3; +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 30; i = step + i) { + during(i); + } + } +} + +// CHECK-LABEL: define internal void @emit_for_commuted_step..omp_par( +// variable step loaded and sign-extended from i16 +// CHECK: %{{.*}} = sext i16 %{{.*}} to i32 +// CHECK: call void @__kmpc_for_static_init_4u( +// CHECK: omp_loop.body: +// step is variable, multiplied into IV +// CHECK: %{{.*}} = mul i32 %{{.*}}, %{{.*}} +// CHECK: omp.loop_nest.region: +// CHECK: call void @during(i32 %{{.*}}) + +// Verify OpenMP runtime declarations +// CHECK: declare i32 @__kmpc_global_thread_num(ptr) +// CHECK: declare void @__kmpc_for_static_init_4u(ptr, i32, i32, ptr, ptr, ptr, ptr, i32, i32) +// CHECK: declare void @__kmpc_for_static_fini(ptr, i32) +// CHECK: declare void @__kmpc_barrier(ptr, i32) +// CHECK: declare {{.*}}void @__kmpc_fork_call(ptr, i32, ptr, ...) _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
