https://github.com/ivanradanov updated https://github.com/llvm/llvm-project/pull/101446
>From 62057f90e1e6e9e89df1bb666a3676421e2e52ac Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov <ivanov.i...@m.titech.ac.jp> Date: Fri, 2 Aug 2024 16:10:25 +0900 Subject: [PATCH 1/9] Add custom omp loop wrapper --- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 5199ff50abb95..76f0c472cfdb1 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -308,6 +308,17 @@ def WorkshareOp : OpenMP_Op<"workshare", clauses = [ let hasVerifier = 1; } +def WorkshareLoopWrapperOp : OpenMP_Op<"workshare_loop_wrapper", traits = [ + DeclareOpInterfaceMethods<LoopWrapperInterface>, + RecursiveMemoryEffects, SingleBlock + ], singleRegion = true> { + let summary = "contains loop nests to be parallelized by workshare"; + + let builders = [ + OpBuilder<(ins), [{ build($_builder, $_state, {}); }]> + ]; +} + //===----------------------------------------------------------------------===// // Loop Nest //===----------------------------------------------------------------------===// >From d882f2b7413a9ad306334cc69691671b498985fc Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov <ivanov.i...@m.titech.ac.jp> Date: Fri, 2 Aug 2024 16:08:58 +0900 Subject: [PATCH 2/9] Add recursive memory effects trait to workshare --- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 76f0c472cfdb1..7d1c80333855e 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -290,7 +290,9 @@ def SingleOp : OpenMP_Op<"single", traits = [ // 2.8.3 Workshare Construct //===----------------------------------------------------------------------===// -def WorkshareOp : OpenMP_Op<"workshare", clauses = [ +def WorkshareOp : OpenMP_Op<"workshare", traits = [ + RecursiveMemoryEffects, + ], clauses = [ OpenMP_NowaitClause, ], singleRegion = true> { let summary = "workshare directive"; >From 14878e80f5bcf8dac5100951de803ce584a33b25 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov <ivanov.i...@m.titech.ac.jp> Date: Wed, 31 Jul 2024 14:11:47 +0900 Subject: [PATCH 3/9] [flang][omp] Emit omp.workshare in frontend --- flang/lib/Lower/OpenMP/OpenMP.cpp | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 2b1839b5270d4..f7bc565ea8cbc 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1270,6 +1270,15 @@ static void genTaskwaitClauses(lower::AbstractConverter &converter, loc, llvm::omp::Directive::OMPD_taskwait); } +static void genWorkshareClauses(lower::AbstractConverter &converter, + semantics::SemanticsContext &semaCtx, + lower::StatementContext &stmtCtx, + const List<Clause> &clauses, mlir::Location loc, + mlir::omp::WorkshareOperands &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processNowait(clauseOps); +} + static void genTeamsClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, @@ -1890,6 +1899,22 @@ genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return converter.getFirOpBuilder().create<mlir::omp::TaskyieldOp>(loc); } +static mlir::omp::WorkshareOp +genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::iterator item) { + lower::StatementContext stmtCtx; + mlir::omp::WorkshareOperands clauseOps; + genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); + + return genOpWithBody<mlir::omp::WorkshareOp>( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, + llvm::omp::Directive::OMPD_workshare) + .setClauses(&item->clauses), + queue, item, clauseOps); +} + static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2249,10 +2274,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: - // FIXME: Workshare is not a commonly used OpenMP construct, an - // implementation for this feature will come later. For the codes - // that use this construct, add a single construct for now. - genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); + genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item); break; // Composite constructs >From 16f7146a45ee9b31c00d9d54be4859df312dcb1b Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov <ivanov.i...@m.titech.ac.jp> Date: Wed, 31 Jul 2024 14:12:34 +0900 Subject: [PATCH 4/9] [flang] Introduce ws loop nest generation for HLFIR lowering --- .../flang/Optimizer/Builder/HLFIRTools.h | 12 +++-- flang/lib/Lower/ConvertCall.cpp | 2 +- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 4 +- flang/lib/Optimizer/Builder/HLFIRTools.cpp | 52 ++++++++++++++----- .../HLFIR/Transforms/BufferizeHLFIR.cpp | 3 +- .../LowerHLFIROrderedAssignments.cpp | 30 +++++------ .../Transforms/OptimizedBufferization.cpp | 6 +-- 7 files changed, 69 insertions(+), 40 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 6b41025eea078..14e42c6f358e4 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -357,8 +357,8 @@ hlfir::ElementalOp genElementalOp( /// Structure to describe a loop nest. struct LoopNest { - fir::DoLoopOp outerLoop; - fir::DoLoopOp innerLoop; + mlir::Operation *outerOp; + mlir::Block *body; llvm::SmallVector<mlir::Value> oneBasedIndices; }; @@ -366,11 +366,13 @@ struct LoopNest { /// \p isUnordered specifies whether the loops in the loop nest /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered = false); + mlir::ValueRange extents, bool isUnordered = false, + bool emitWsLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::Value shape, bool isUnordered = false) { + mlir::Value shape, bool isUnordered = false, + bool emitWsLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered); + isUnordered, emitWsLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fd873f55dd844..0689d6e033dd9 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -2128,7 +2128,7 @@ class ElementalCallBuilder { hlfir::genLoopNest(loc, builder, shape, !mustBeOrdered); mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices; auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); callContext.stmtCtx.pushScope(); for (auto &preparedActual : loweredActuals) if (preparedActual) diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c3c1f363033c2..72a90dd0d6f29 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -375,7 +375,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, // know this won't miss any opportuinties for clever elemental inlining hlfir::LoopNest nest = hlfir::genLoopNest( loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); - builder.setInsertionPointToStart(nest.innerLoop.getBody()); + builder.setInsertionPointToStart(nest.body); mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); auto lhsEleAddr = builder.create<fir::ArrayCoorOp>( loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, @@ -389,7 +389,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, builder, loc, redId, refTy, lhsEle, rhsEle); builder.create<fir::StoreOp>(loc, scalarReduction, lhsEleAddr); - builder.setInsertionPointAfter(nest.outerLoop); + builder.setInsertionPointAfter(nest.outerOp); builder.create<mlir::omp::YieldOp>(loc, lhsAddr); } diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index 8d0ae2f195178..cd07cb741eb4b 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/TypeSwitch.h" +#include <mlir/Dialect/OpenMP/OpenMPDialect.h> #include <optional> // Return explicit extents. If the base is a fir.box, this won't read it to @@ -855,26 +856,51 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, - mlir::ValueRange extents, bool isUnordered) { + mlir::ValueRange extents, bool isUnordered, + bool emitWsLoop) { hlfir::LoopNest loopNest; assert(!extents.empty() && "must have at least one extent"); - auto insPt = builder.saveInsertionPoint(); + mlir::OpBuilder::InsertionGuard guard(builder); loopNest.oneBasedIndices.assign(extents.size(), mlir::Value{}); // Build loop nest from column to row. auto one = builder.create<mlir::arith::ConstantIndexOp>(loc, 1); mlir::Type indexType = builder.getIndexType(); - unsigned dim = extents.size() - 1; - for (auto extent : llvm::reverse(extents)) { - auto ub = builder.createConvert(loc, indexType, extent); - loopNest.innerLoop = - builder.create<fir::DoLoopOp>(loc, one, ub, one, isUnordered); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); - // Reverse the indices so they are in column-major order. - loopNest.oneBasedIndices[dim--] = loopNest.innerLoop.getInductionVar(); - if (!loopNest.outerLoop) - loopNest.outerLoop = loopNest.innerLoop; + if (emitWsLoop) { + auto wsloop = builder.create<mlir::omp::WsloopOp>( + loc, mlir::ArrayRef<mlir::NamedAttribute>()); + loopNest.outerOp = wsloop; + builder.createBlock(&wsloop.getRegion()); + mlir::omp::LoopNestOperands lnops; + lnops.loopInclusive = builder.getUnitAttr(); + for (auto extent : llvm::reverse(extents)) { + lnops.loopLowerBounds.push_back(one); + lnops.loopUpperBounds.push_back(extent); + lnops.loopSteps.push_back(one); + } + auto lnOp = builder.create<mlir::omp::LoopNestOp>(loc, lnops); + builder.create<mlir::omp::TerminatorOp>(loc); + mlir::Block *block = builder.createBlock(&lnOp.getRegion()); + for (auto extent : llvm::reverse(extents)) + block->addArgument(extent.getType(), extent.getLoc()); + loopNest.body = block; + builder.create<mlir::omp::YieldOp>(loc); + for (unsigned dim = 0; dim < extents.size(); dim++) + loopNest.oneBasedIndices[extents.size() - dim - 1] = + lnOp.getRegion().front().getArgument(dim); + } else { + unsigned dim = extents.size() - 1; + for (auto extent : llvm::reverse(extents)) { + auto ub = builder.createConvert(loc, indexType, extent); + auto doLoop = + builder.create<fir::DoLoopOp>(loc, one, ub, one, isUnordered); + loopNest.body = doLoop.getBody(); + builder.setInsertionPointToStart(loopNest.body); + // Reverse the indices so they are in column-major order. + loopNest.oneBasedIndices[dim--] = doLoop.getInductionVar(); + if (!loopNest.outerOp) + loopNest.outerOp = doLoop; + } } - builder.restoreInsertionPoint(insPt); return loopNest; } diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp index a70a6b388c4b1..b608677c52631 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp @@ -31,6 +31,7 @@ #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/DialectConversion.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "llvm/ADT/TypeSwitch.h" namespace hlfir { @@ -793,7 +794,7 @@ struct ElementalOpConversion hlfir::LoopNest loopNest = hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered()); auto insPt = builder.saveInsertionPoint(); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); auto yield = hlfir::inlineElementalOp(loc, builder, elemental, loopNest.oneBasedIndices); hlfir::Entity elementValue(yield.getElementValue()); diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp index 85dd517cb5791..645abf65d10a3 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp @@ -464,7 +464,7 @@ void OrderedAssignmentRewriter::pre(hlfir::RegionAssignOp regionAssignOp) { // if the LHS is not). mlir::Value shape = hlfir::genShape(loc, builder, lhsEntity); elementalLoopNest = hlfir::genLoopNest(loc, builder, shape); - builder.setInsertionPointToStart(elementalLoopNest->innerLoop.getBody()); + builder.setInsertionPointToStart(elementalLoopNest->body); lhsEntity = hlfir::getElementAt(loc, builder, lhsEntity, elementalLoopNest->oneBasedIndices); rhsEntity = hlfir::getElementAt(loc, builder, rhsEntity, @@ -484,7 +484,7 @@ void OrderedAssignmentRewriter::pre(hlfir::RegionAssignOp regionAssignOp) { for (auto &cleanupConversion : argConversionCleanups) cleanupConversion(); if (elementalLoopNest) - builder.setInsertionPointAfter(elementalLoopNest->outerLoop); + builder.setInsertionPointAfter(elementalLoopNest->outerOp); } else { // TODO: preserve allocatable assignment aspects for forall once // they are conveyed in hlfir.region_assign. @@ -493,7 +493,7 @@ void OrderedAssignmentRewriter::pre(hlfir::RegionAssignOp regionAssignOp) { generateCleanupIfAny(loweredLhs.elementalCleanup); if (loweredLhs.vectorSubscriptLoopNest) builder.setInsertionPointAfter( - loweredLhs.vectorSubscriptLoopNest->outerLoop); + loweredLhs.vectorSubscriptLoopNest->outerOp); generateCleanupIfAny(oldRhsYield); generateCleanupIfAny(loweredLhs.nonElementalCleanup); } @@ -518,8 +518,8 @@ void OrderedAssignmentRewriter::pre(hlfir::WhereOp whereOp) { hlfir::Entity savedMask{maybeSaved->first}; mlir::Value shape = hlfir::genShape(loc, builder, savedMask); whereLoopNest = hlfir::genLoopNest(loc, builder, shape); - constructStack.push_back(whereLoopNest->outerLoop.getOperation()); - builder.setInsertionPointToStart(whereLoopNest->innerLoop.getBody()); + constructStack.push_back(whereLoopNest->outerOp); + builder.setInsertionPointToStart(whereLoopNest->body); mlir::Value cdt = hlfir::getElementAt(loc, builder, savedMask, whereLoopNest->oneBasedIndices); generateMaskIfOp(cdt); @@ -527,7 +527,7 @@ void OrderedAssignmentRewriter::pre(hlfir::WhereOp whereOp) { // If this is the same run as the one that saved the value, the clean-up // was left-over to be done now. auto insertionPoint = builder.saveInsertionPoint(); - builder.setInsertionPointAfter(whereLoopNest->outerLoop); + builder.setInsertionPointAfter(whereLoopNest->outerOp); generateCleanupIfAny(maybeSaved->second); builder.restoreInsertionPoint(insertionPoint); } @@ -539,8 +539,8 @@ void OrderedAssignmentRewriter::pre(hlfir::WhereOp whereOp) { mask.generateNoneElementalPart(builder, mapper); mlir::Value shape = mask.generateShape(builder, mapper); whereLoopNest = hlfir::genLoopNest(loc, builder, shape); - constructStack.push_back(whereLoopNest->outerLoop.getOperation()); - builder.setInsertionPointToStart(whereLoopNest->innerLoop.getBody()); + constructStack.push_back(whereLoopNest->outerOp); + builder.setInsertionPointToStart(whereLoopNest->body); mlir::Value cdt = generateMaskedEntity(mask); generateMaskIfOp(cdt); return; @@ -754,7 +754,7 @@ OrderedAssignmentRewriter::generateYieldedLHS( loweredLhs.vectorSubscriptLoopNest = hlfir::genLoopNest( loc, builder, loweredLhs.vectorSubscriptShape.value()); builder.setInsertionPointToStart( - loweredLhs.vectorSubscriptLoopNest->innerLoop.getBody()); + loweredLhs.vectorSubscriptLoopNest->body); } loweredLhs.lhs = temp->second.fetch(loc, builder); return loweredLhs; @@ -772,7 +772,7 @@ OrderedAssignmentRewriter::generateYieldedLHS( hlfir::genLoopNest(loc, builder, *loweredLhs.vectorSubscriptShape, !elementalAddrLhs.isOrdered()); builder.setInsertionPointToStart( - loweredLhs.vectorSubscriptLoopNest->innerLoop.getBody()); + loweredLhs.vectorSubscriptLoopNest->body); mapper.map(elementalAddrLhs.getIndices(), loweredLhs.vectorSubscriptLoopNest->oneBasedIndices); for (auto &op : elementalAddrLhs.getBody().front().without_terminator()) @@ -798,11 +798,11 @@ OrderedAssignmentRewriter::generateMaskedEntity(MaskedArrayExpr &maskedExpr) { if (!maskedExpr.noneElementalPartWasGenerated) { // Generate none elemental part before the where loops (but inside the // current forall loops if any). - builder.setInsertionPoint(whereLoopNest->outerLoop); + builder.setInsertionPoint(whereLoopNest->outerOp); maskedExpr.generateNoneElementalPart(builder, mapper); } // Generate the none elemental part cleanup after the where loops. - builder.setInsertionPointAfter(whereLoopNest->outerLoop); + builder.setInsertionPointAfter(whereLoopNest->outerOp); maskedExpr.generateNoneElementalCleanupIfAny(builder, mapper); // Generate the value of the current element for the masked expression // at the current insertion point (inside the where loops, and any fir.if @@ -1242,7 +1242,7 @@ void OrderedAssignmentRewriter::saveLeftHandSide( LhsValueAndCleanUp loweredLhs = generateYieldedLHS(loc, region); fir::factory::TemporaryStorage *temp = nullptr; if (loweredLhs.vectorSubscriptLoopNest) - constructStack.push_back(loweredLhs.vectorSubscriptLoopNest->outerLoop); + constructStack.push_back(loweredLhs.vectorSubscriptLoopNest->outerOp); if (loweredLhs.vectorSubscriptLoopNest && !rhsIsArray(regionAssignOp)) { // Vector subscripted entity for which the shape must also be saved on top // of the element addresses (e.g. the shape may change in each forall @@ -1265,7 +1265,7 @@ void OrderedAssignmentRewriter::saveLeftHandSide( // subscripted LHS. auto &vectorTmp = temp->cast<fir::factory::AnyVectorSubscriptStack>(); auto insertionPoint = builder.saveInsertionPoint(); - builder.setInsertionPoint(loweredLhs.vectorSubscriptLoopNest->outerLoop); + builder.setInsertionPoint(loweredLhs.vectorSubscriptLoopNest->outerOp); vectorTmp.pushShape(loc, builder, shape); builder.restoreInsertionPoint(insertionPoint); } else { @@ -1291,7 +1291,7 @@ void OrderedAssignmentRewriter::saveLeftHandSide( if (loweredLhs.vectorSubscriptLoopNest) { constructStack.pop_back(); builder.setInsertionPointAfter( - loweredLhs.vectorSubscriptLoopNest->outerLoop); + loweredLhs.vectorSubscriptLoopNest->outerOp); } } diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp index c5b809514c54c..c4aed6b79df92 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp @@ -483,7 +483,7 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite( // hlfir.elemental region inside the inner loop hlfir::LoopNest loopNest = hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered()); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); auto yield = hlfir::inlineElementalOp(loc, builder, elemental, loopNest.oneBasedIndices); hlfir::Entity elementValue{yield.getElementValue()}; @@ -554,7 +554,7 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite( hlfir::getIndexExtents(loc, builder, shape); hlfir::LoopNest loopNest = hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); auto arrayElement = hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices); builder.create<hlfir::AssignOp>(loc, rhs, arrayElement); @@ -649,7 +649,7 @@ llvm::LogicalResult VariableAssignBufferization::matchAndRewrite( hlfir::getIndexExtents(loc, builder, shape); hlfir::LoopNest loopNest = hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true); - builder.setInsertionPointToStart(loopNest.innerLoop.getBody()); + builder.setInsertionPointToStart(loopNest.body); auto rhsArrayElement = hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices); rhsArrayElement = hlfir::loadTrivialScalar(loc, builder, rhsArrayElement); >From decd0c5b35dcd5175e06319a793fedd2935b14ca Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov <ivanov.i...@m.titech.ac.jp> Date: Fri, 2 Aug 2024 16:08:34 +0900 Subject: [PATCH 5/9] Emit loop nests in a custom wrapper --- flang/include/flang/Optimizer/Builder/HLFIRTools.h | 6 +++--- flang/lib/Optimizer/Builder/HLFIRTools.cpp | 11 +++++------ 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h index 14e42c6f358e4..6987471957218 100644 --- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h @@ -367,12 +367,12 @@ struct LoopNest { /// are unordered. LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, mlir::ValueRange extents, bool isUnordered = false, - bool emitWsLoop = false); + bool emitWorkshareLoop = false); inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, mlir::Value shape, bool isUnordered = false, - bool emitWsLoop = false) { + bool emitWorkshareLoop = false) { return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape), - isUnordered, emitWsLoop); + isUnordered, emitWorkshareLoop); } /// Inline the body of an hlfir.elemental at the current insertion point diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp index cd07cb741eb4b..91b1b3d774a01 100644 --- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp @@ -857,7 +857,7 @@ mlir::Value hlfir::inlineElementalOp( hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, mlir::ValueRange extents, bool isUnordered, - bool emitWsLoop) { + bool emitWorkshareLoop) { hlfir::LoopNest loopNest; assert(!extents.empty() && "must have at least one extent"); mlir::OpBuilder::InsertionGuard guard(builder); @@ -865,11 +865,10 @@ hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc, // Build loop nest from column to row. auto one = builder.create<mlir::arith::ConstantIndexOp>(loc, 1); mlir::Type indexType = builder.getIndexType(); - if (emitWsLoop) { - auto wsloop = builder.create<mlir::omp::WsloopOp>( - loc, mlir::ArrayRef<mlir::NamedAttribute>()); - loopNest.outerOp = wsloop; - builder.createBlock(&wsloop.getRegion()); + if (emitWorkshareLoop) { + auto wslw = builder.create<mlir::omp::WorkshareLoopWrapperOp>(loc); + loopNest.outerOp = wslw; + builder.createBlock(&wslw.getRegion()); mlir::omp::LoopNestOperands lnops; lnops.loopInclusive = builder.getUnitAttr(); for (auto extent : llvm::reverse(extents)) { >From 3ab40e1600aecd5e39f9379941dabf67667a32e9 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov <ivanov.i...@m.titech.ac.jp> Date: Wed, 31 Jul 2024 14:44:31 +0900 Subject: [PATCH 6/9] [flang] Lower omp.workshare to other omp constructs --- flang/include/flang/Optimizer/CMakeLists.txt | 1 + .../flang/Optimizer/OpenMP/CMakeLists.txt | 4 + flang/include/flang/Optimizer/OpenMP/Passes.h | 30 ++ .../include/flang/Optimizer/OpenMP/Passes.td | 18 ++ flang/include/flang/Tools/CLOptions.inc | 2 + flang/lib/Frontend/CMakeLists.txt | 1 + flang/lib/Optimizer/CMakeLists.txt | 1 + .../HLFIR/Transforms/BufferizeHLFIR.cpp | 6 +- flang/lib/Optimizer/OpenMP/CMakeLists.txt | 26 ++ flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 259 ++++++++++++++++++ flang/test/HLFIR/bufferize-workshare.fir | 58 ++++ .../Transforms/OpenMP/lower-workshare.mlir | 81 ++++++ flang/tools/bbc/CMakeLists.txt | 1 + flang/tools/fir-opt/CMakeLists.txt | 1 + flang/tools/fir-opt/fir-opt.cpp | 2 + flang/tools/tco/CMakeLists.txt | 1 + 16 files changed, 490 insertions(+), 2 deletions(-) create mode 100644 flang/include/flang/Optimizer/OpenMP/CMakeLists.txt create mode 100644 flang/include/flang/Optimizer/OpenMP/Passes.h create mode 100644 flang/include/flang/Optimizer/OpenMP/Passes.td create mode 100644 flang/lib/Optimizer/OpenMP/CMakeLists.txt create mode 100644 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp create mode 100644 flang/test/HLFIR/bufferize-workshare.fir create mode 100644 flang/test/Transforms/OpenMP/lower-workshare.mlir diff --git a/flang/include/flang/Optimizer/CMakeLists.txt b/flang/include/flang/Optimizer/CMakeLists.txt index 89e43a9ee8d62..3336ac935e101 100644 --- a/flang/include/flang/Optimizer/CMakeLists.txt +++ b/flang/include/flang/Optimizer/CMakeLists.txt @@ -2,3 +2,4 @@ add_subdirectory(CodeGen) add_subdirectory(Dialect) add_subdirectory(HLFIR) add_subdirectory(Transforms) +add_subdirectory(OpenMP) diff --git a/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt b/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt new file mode 100644 index 0000000000000..d59573f0f7fd9 --- /dev/null +++ b/flang/include/flang/Optimizer/OpenMP/CMakeLists.txt @@ -0,0 +1,4 @@ +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name FlangOpenMP) + +add_public_tablegen_target(FlangOpenMPPassesIncGen) diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.h b/flang/include/flang/Optimizer/OpenMP/Passes.h new file mode 100644 index 0000000000000..95a05b3005073 --- /dev/null +++ b/flang/include/flang/Optimizer/OpenMP/Passes.h @@ -0,0 +1,30 @@ +//===- Passes.h - OpenMP pass entry points ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header declares OpenMP pass entry points. +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_OPTIMIZER_OPENMP_PASSES_H +#define FORTRAN_OPTIMIZER_OPENMP_PASSES_H + +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassRegistry.h" +#include <memory> + +namespace flangomp { +#define GEN_PASS_DECL +#define GEN_PASS_REGISTRATION +#include "flang/Optimizer/OpenMP/Passes.h.inc" + +bool shouldUseWorkshareLowering(mlir::Operation *op); + +} // namespace flangomp + +#endif // FORTRAN_OPTIMIZER_OPENMP_PASSES_H diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td b/flang/include/flang/Optimizer/OpenMP/Passes.td new file mode 100644 index 0000000000000..6f636ec1df616 --- /dev/null +++ b/flang/include/flang/Optimizer/OpenMP/Passes.td @@ -0,0 +1,18 @@ +//===-- Passes.td - HLFIR pass definition file -------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_DIALECT_OPENMP_PASSES +#define FORTRAN_DIALECT_OPENMP_PASSES + +include "mlir/Pass/PassBase.td" + +def LowerWorkshare : Pass<"lower-workshare"> { + let summary = "Lower workshare construct"; +} + +#endif //FORTRAN_DIALECT_OPENMP_PASSES diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc index 7df5044949463..594369fc2ffe5 100644 --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -17,6 +17,7 @@ #include "mlir/Transforms/Passes.h" #include "flang/Optimizer/CodeGen/CodeGen.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/OpenMP/Passes.h" #include "flang/Optimizer/Transforms/Passes.h" #include "llvm/Passes/OptimizationLevel.h" #include "llvm/Support/CommandLine.h" @@ -344,6 +345,7 @@ inline void createHLFIRToFIRPassPipeline( pm.addPass(hlfir::createLowerHLFIRIntrinsics()); pm.addPass(hlfir::createBufferizeHLFIR()); pm.addPass(hlfir::createConvertHLFIRtoFIR()); + pm.addPass(flangomp::createLowerWorkshare()); } /// Create a pass pipeline for handling certain OpenMP transformations needed diff --git a/flang/lib/Frontend/CMakeLists.txt b/flang/lib/Frontend/CMakeLists.txt index c20b9096aff49..ecdcc73d61ec1 100644 --- a/flang/lib/Frontend/CMakeLists.txt +++ b/flang/lib/Frontend/CMakeLists.txt @@ -38,6 +38,7 @@ add_flang_library(flangFrontend FIRTransforms HLFIRDialect HLFIRTransforms + FlangOpenMPTransforms MLIRTransforms MLIRBuiltinToLLVMIRTranslation MLIRLLVMToLLVMIRTranslation diff --git a/flang/lib/Optimizer/CMakeLists.txt b/flang/lib/Optimizer/CMakeLists.txt index 4a602162ed2b7..dd153ac33c0fb 100644 --- a/flang/lib/Optimizer/CMakeLists.txt +++ b/flang/lib/Optimizer/CMakeLists.txt @@ -5,3 +5,4 @@ add_subdirectory(HLFIR) add_subdirectory(Support) add_subdirectory(Transforms) add_subdirectory(Analysis) +add_subdirectory(OpenMP) diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp index b608677c52631..1848dbe2c7a2c 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp @@ -26,12 +26,13 @@ #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/OpenMP/Passes.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/Dominance.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/DialectConversion.h" -#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "llvm/ADT/TypeSwitch.h" namespace hlfir { @@ -792,7 +793,8 @@ struct ElementalOpConversion // Generate a loop nest looping around the fir.elemental shape and clone // fir.elemental region inside the inner loop. hlfir::LoopNest loopNest = - hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered()); + hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(), + flangomp::shouldUseWorkshareLowering(elemental)); auto insPt = builder.saveInsertionPoint(); builder.setInsertionPointToStart(loopNest.body); auto yield = hlfir::inlineElementalOp(loc, builder, elemental, diff --git a/flang/lib/Optimizer/OpenMP/CMakeLists.txt b/flang/lib/Optimizer/OpenMP/CMakeLists.txt new file mode 100644 index 0000000000000..74419327d76d0 --- /dev/null +++ b/flang/lib/Optimizer/OpenMP/CMakeLists.txt @@ -0,0 +1,26 @@ +get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) + +add_flang_library(FlangOpenMPTransforms + LowerWorkshare.cpp + + DEPENDS + FIRDialect + FlangOpenMPPassesIncGen + ${dialect_libs} + + LINK_LIBS + FIRAnalysis + FIRDialect + FIRBuilder + FIRDialectSupport + FIRSupport + FIRTransforms + HLFIRDialect + MLIRIR + ${dialect_libs} + + LINK_COMPONENTS + AsmParser + AsmPrinter + Remarks +) diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp new file mode 100644 index 0000000000000..40975552d1fe3 --- /dev/null +++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp @@ -0,0 +1,259 @@ +//===- LowerWorkshare.cpp - special cases for bufferization -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Lower omp workshare construct. +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/OpenMP/Passes.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/IRMapping.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/iterator_range.h" + +#include <variant> + +namespace flangomp { +#define GEN_PASS_DEF_LOWERWORKSHARE +#include "flang/Optimizer/OpenMP/Passes.h.inc" +} // namespace flangomp + +#define DEBUG_TYPE "lower-workshare" + +using namespace mlir; + +namespace flangomp { +bool shouldUseWorkshareLowering(Operation *op) { + auto workshare = dyn_cast<omp::WorkshareOp>(op->getParentOp()); + if (!workshare) + return false; + return workshare->getParentOfType<omp::ParallelOp>(); +} +} // namespace flangomp + +namespace { + +struct SingleRegion { + Block::iterator begin, end; +}; + +static bool isSupportedByFirAlloca(Type ty) { + return !isa<fir::ReferenceType>(ty); +} + +static bool isSafeToParallelize(Operation *op) { + if (isa<fir::DeclareOp>(op)) + return true; + + llvm::SmallVector<MemoryEffects::EffectInstance> effects; + MemoryEffectOpInterface interface = dyn_cast<MemoryEffectOpInterface>(op); + if (!interface) { + return false; + } + interface.getEffects(effects); + if (effects.empty()) + return true; + + return false; +} + +/// Lowers workshare to a sequence of single-thread regions and parallel loops +/// +/// For example: +/// +/// omp.workshare { +/// %a = fir.allocmem +/// omp.wsloop {} +/// fir.call Assign %b %a +/// fir.freemem %a +/// } +/// +/// becomes +/// +/// omp.single { +/// %a = fir.allocmem +/// fir.store %a %tmp +/// } +/// %a_reloaded = fir.load %tmp +/// omp.wsloop {} +/// omp.single { +/// fir.call Assign %b %a_reloaded +/// fir.freemem %a_reloaded +/// } +/// +/// Note that we allocate temporary memory for values in omp.single's which need +/// to be accessed in all threads in the closest omp.parallel +/// +/// TODO currently we need to be able to access the encompassing omp.parallel so +/// that we can allocate temporaries accessible by all threads outside of it. +/// In case we do not find it, we fall back to converting the omp.workshare to +/// omp.single. +/// To better handle this we should probably enable yielding values out of an +/// omp.single which will be supported by the omp runtime. +void lowerWorkshare(mlir::omp::WorkshareOp wsOp) { + assert(wsOp.getRegion().getBlocks().size() == 1); + + Location loc = wsOp->getLoc(); + + omp::ParallelOp parallelOp = wsOp->getParentOfType<omp::ParallelOp>(); + if (!parallelOp) { + wsOp.emitWarning("cannot handle workshare, converting to single"); + Operation *terminator = wsOp.getRegion().front().getTerminator(); + wsOp->getBlock()->getOperations().splice( + wsOp->getIterator(), wsOp.getRegion().front().getOperations()); + terminator->erase(); + return; + } + + OpBuilder allocBuilder(parallelOp); + OpBuilder rootBuilder(wsOp); + IRMapping rootMapping; + + omp::SingleOp singleOp = nullptr; + + auto mapReloadedValue = [&](Value v, OpBuilder singleBuilder, + IRMapping singleMapping) { + if (auto reloaded = rootMapping.lookupOrNull(v)) + return; + Type llvmPtrTy = LLVM::LLVMPointerType::get(allocBuilder.getContext()); + Type ty = v.getType(); + Value alloc, reloaded; + if (isSupportedByFirAlloca(ty)) { + alloc = allocBuilder.create<fir::AllocaOp>(loc, ty); + singleBuilder.create<fir::StoreOp>(loc, singleMapping.lookup(v), alloc); + reloaded = rootBuilder.create<fir::LoadOp>(loc, ty, alloc); + } else { + auto one = allocBuilder.create<LLVM::ConstantOp>( + loc, allocBuilder.getI32Type(), 1); + alloc = + allocBuilder.create<LLVM::AllocaOp>(loc, llvmPtrTy, llvmPtrTy, one); + Value toStore = singleBuilder + .create<UnrealizedConversionCastOp>( + loc, llvmPtrTy, singleMapping.lookup(v)) + .getResult(0); + singleBuilder.create<LLVM::StoreOp>(loc, toStore, alloc); + reloaded = rootBuilder.create<LLVM::LoadOp>(loc, llvmPtrTy, alloc); + reloaded = + rootBuilder.create<UnrealizedConversionCastOp>(loc, ty, reloaded) + .getResult(0); + } + rootMapping.map(v, reloaded); + }; + + auto moveToSingle = [&](SingleRegion sr, OpBuilder singleBuilder) { + IRMapping singleMapping = rootMapping; + + for (Operation &op : llvm::make_range(sr.begin, sr.end)) { + singleBuilder.clone(op, singleMapping); + if (isSafeToParallelize(&op)) { + rootBuilder.clone(op, rootMapping); + } else { + // Prepare reloaded values for results of operations that cannot be + // safely parallelized and which are used after the region `sr` + for (auto res : op.getResults()) { + for (auto &use : res.getUses()) { + Operation *user = use.getOwner(); + while (user->getParentOp() != wsOp) + user = user->getParentOp(); + if (!user->isBeforeInBlock(&*sr.end)) { + // We need to reload + mapReloadedValue(use.get(), singleBuilder, singleMapping); + } + } + } + } + } + singleBuilder.create<omp::TerminatorOp>(loc); + }; + + Block *wsBlock = &wsOp.getRegion().front(); + assert(wsBlock->getTerminator()->getNumOperands() == 0); + Operation *terminator = wsBlock->getTerminator(); + + SmallVector<std::variant<SingleRegion, omp::WsloopOp>> regions; + + auto it = wsBlock->begin(); + auto getSingleRegion = [&]() { + if (&*it == terminator) + return false; + if (auto pop = dyn_cast<omp::WsloopOp>(&*it)) { + regions.push_back(pop); + it++; + return true; + } + SingleRegion sr; + sr.begin = it; + while (&*it != terminator && !isa<omp::WsloopOp>(&*it)) + it++; + sr.end = it; + assert(sr.begin != sr.end); + regions.push_back(sr); + return true; + }; + while (getSingleRegion()) + ; + + for (auto [i, loopOrSingle] : llvm::enumerate(regions)) { + bool isLast = i + 1 == regions.size(); + if (std::holds_alternative<SingleRegion>(loopOrSingle)) { + omp::SingleOperands singleOperands; + if (isLast) + singleOperands.nowait = rootBuilder.getUnitAttr(); + singleOp = rootBuilder.create<omp::SingleOp>(loc, singleOperands); + OpBuilder singleBuilder(singleOp); + singleBuilder.createBlock(&singleOp.getRegion()); + moveToSingle(std::get<SingleRegion>(loopOrSingle), singleBuilder); + } else { + rootBuilder.clone(*std::get<omp::WsloopOp>(loopOrSingle), rootMapping); + if (!isLast) + rootBuilder.create<omp::BarrierOp>(loc); + } + } + + if (!wsOp.getNowait()) + rootBuilder.create<omp::BarrierOp>(loc); + + wsOp->erase(); + + return; +} + +class LowerWorksharePass + : public flangomp::impl::LowerWorkshareBase<LowerWorksharePass> { +public: + void runOnOperation() override { + SmallPtrSet<Operation *, 8> parents; + getOperation()->walk([&](mlir::omp::WorkshareOp wsOp) { + Operation *isolatedParent = + wsOp->getParentWithTrait<OpTrait::IsIsolatedFromAbove>(); + parents.insert(isolatedParent); + + lowerWorkshare(wsOp); + }); + + // Do folding + for (Operation *isolatedParent : parents) { + RewritePatternSet patterns(&getContext()); + GreedyRewriteConfig config; + // prevent the pattern driver form merging blocks + config.enableRegionSimplification = + mlir::GreedySimplifyRegionLevel::Disabled; + if (failed(applyPatternsAndFoldGreedily(isolatedParent, + std::move(patterns), config))) { + emitError(isolatedParent->getLoc(), "error in lower workshare\n"); + signalPassFailure(); + } + } + } +}; +} // namespace diff --git a/flang/test/HLFIR/bufferize-workshare.fir b/flang/test/HLFIR/bufferize-workshare.fir new file mode 100644 index 0000000000000..86a2f031478dd --- /dev/null +++ b/flang/test/HLFIR/bufferize-workshare.fir @@ -0,0 +1,58 @@ +// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s + +// CHECK-LABEL: func.func @simple( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<42xi32>>) { +// CHECK: omp.parallel { +// CHECK: omp.workshare { +// CHECK: %[[VAL_1:.*]] = arith.constant 42 : index +// CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_3]]) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) +// CHECK: %[[VAL_5:.*]] = fir.allocmem !fir.array<42xi32> {bindc_name = ".tmp.array", uniq_name = ""} +// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]](%[[VAL_3]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>) +// CHECK: %[[VAL_7:.*]] = arith.constant true +// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index +// CHECK: omp.wsloop { +// CHECK: omp.loop_nest (%[[VAL_9:.*]]) : index = (%[[VAL_8]]) to (%[[VAL_1]]) inclusive step (%[[VAL_8]]) { +// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_9]]) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32> +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32> +// CHECK: %[[VAL_12:.*]] = arith.subi %[[VAL_11]], %[[VAL_2]] : i32 +// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]]) : (!fir.heap<!fir.array<42xi32>>, index) -> !fir.ref<i32> +// CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_13]] temporary_lhs : i32, !fir.ref<i32> +// CHECK: omp.yield +// CHECK: } +// CHECK: omp.terminator +// CHECK: } +// CHECK: %[[VAL_14:.*]] = fir.undefined tuple<!fir.heap<!fir.array<42xi32>>, i1> +// CHECK: %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_7]], [1 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<42xi32>>, i1> +// CHECK: %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_6]]#0, [0 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, !fir.heap<!fir.array<42xi32>>) -> tuple<!fir.heap<!fir.array<42xi32>>, i1> +// CHECK: hlfir.assign %[[VAL_6]]#0 to %[[VAL_4]]#0 : !fir.heap<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>> +// CHECK: fir.freemem %[[VAL_6]]#0 : !fir.heap<!fir.array<42xi32>> +// CHECK: omp.terminator +// CHECK: } +// CHECK: omp.terminator +// CHECK: } +// CHECK: return +// CHECK: } +func.func @simple(%arg: !fir.ref<!fir.array<42xi32>>) { + omp.parallel { + omp.workshare { + %c42 = arith.constant 42 : index + %c1_i32 = arith.constant 1 : i32 + %shape = fir.shape %c42 : (index) -> !fir.shape<1> + %array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) + %elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> { + ^bb0(%i: index): + %ref = hlfir.designate %array#0 (%i) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32> + %val = fir.load %ref : !fir.ref<i32> + %sub = arith.subi %val, %c1_i32 : i32 + hlfir.yield_element %sub : i32 + } + hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>> + hlfir.destroy %elemental : !hlfir.expr<42xi32> + omp.terminator + } + omp.terminator + } + return +} diff --git a/flang/test/Transforms/OpenMP/lower-workshare.mlir b/flang/test/Transforms/OpenMP/lower-workshare.mlir new file mode 100644 index 0000000000000..a8d36443f08bd --- /dev/null +++ b/flang/test/Transforms/OpenMP/lower-workshare.mlir @@ -0,0 +1,81 @@ +// RUN: fir-opt --lower-workshare %s | FileCheck %s + +module { +// CHECK-LABEL: func.func @simple( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<42xi32>>) { +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_3:.*]] = arith.constant 42 : index +// CHECK: %[[VAL_4:.*]] = llvm.mlir.constant(1 : i32) : i32 +// CHECK: %[[VAL_5:.*]] = llvm.alloca %[[VAL_4]] x !llvm.ptr : (i32) -> !llvm.ptr +// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.heap<!fir.array<42xi32>> +// CHECK: omp.parallel { +// CHECK: omp.single { +// CHECK: %[[VAL_7:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_7]]) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) +// CHECK: %[[VAL_9:.*]] = builtin.unrealized_conversion_cast %[[VAL_8]]#0 : !fir.ref<!fir.array<42xi32>> to !llvm.ptr +// CHECK: llvm.store %[[VAL_9]], %[[VAL_5]] : !llvm.ptr, !llvm.ptr +// CHECK: %[[VAL_10:.*]] = fir.allocmem !fir.array<42xi32> {bindc_name = ".tmp.array", uniq_name = ""} +// CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]](%[[VAL_7]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>) +// CHECK: fir.store %[[VAL_11]]#0 to %[[VAL_6]] : !fir.ref<!fir.heap<!fir.array<42xi32>>> +// CHECK: omp.terminator +// CHECK: } +// CHECK: %[[VAL_12:.*]] = llvm.load %[[VAL_5]] : !llvm.ptr -> !llvm.ptr +// CHECK: %[[VAL_13:.*]] = builtin.unrealized_conversion_cast %[[VAL_12]] : !llvm.ptr to !fir.ref<!fir.array<42xi32>> +// CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_6]] : !fir.ref<!fir.heap<!fir.array<42xi32>>> +// CHECK: omp.wsloop { +// CHECK: omp.loop_nest (%[[VAL_15:.*]]) : index = (%[[VAL_1]]) to (%[[VAL_3]]) inclusive step (%[[VAL_1]]) { +// CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_13]] (%[[VAL_15]]) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32> +// CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<i32> +// CHECK: %[[VAL_18:.*]] = arith.subi %[[VAL_17]], %[[VAL_2]] : i32 +// CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_14]] (%[[VAL_15]]) : (!fir.heap<!fir.array<42xi32>>, index) -> !fir.ref<i32> +// CHECK: hlfir.assign %[[VAL_18]] to %[[VAL_19]] temporary_lhs : i32, !fir.ref<i32> +// CHECK: omp.yield +// CHECK: } +// CHECK: omp.terminator +// CHECK: } +// CHECK: omp.barrier +// CHECK: omp.single nowait { +// CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_13]] : !fir.heap<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>> +// CHECK: fir.freemem %[[VAL_14]] : !fir.heap<!fir.array<42xi32>> +// CHECK: omp.terminator +// CHECK: } +// CHECK: omp.barrier +// CHECK: omp.terminator +// CHECK: } +// CHECK: return +// CHECK: } + func.func @simple(%arg0: !fir.ref<!fir.array<42xi32>>) { + omp.parallel { + omp.workshare { + %c42 = arith.constant 42 : index + %c1_i32 = arith.constant 1 : i32 + %0 = fir.shape %c42 : (index) -> !fir.shape<1> + %1:2 = hlfir.declare %arg0(%0) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>) + %2 = fir.allocmem !fir.array<42xi32> {bindc_name = ".tmp.array", uniq_name = ""} + %3:2 = hlfir.declare %2(%0) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>) + %true = arith.constant true + %c1 = arith.constant 1 : index + omp.wsloop { + omp.loop_nest (%arg1) : index = (%c1) to (%c42) inclusive step (%c1) { + %7 = hlfir.designate %1#0 (%arg1) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32> + %8 = fir.load %7 : !fir.ref<i32> + %9 = arith.subi %8, %c1_i32 : i32 + %10 = hlfir.designate %3#0 (%arg1) : (!fir.heap<!fir.array<42xi32>>, index) -> !fir.ref<i32> + hlfir.assign %9 to %10 temporary_lhs : i32, !fir.ref<i32> + omp.yield + } + omp.terminator + } + %4 = fir.undefined tuple<!fir.heap<!fir.array<42xi32>>, i1> + %5 = fir.insert_value %4, %true, [1 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<42xi32>>, i1> + %6 = fir.insert_value %5, %3#0, [0 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, !fir.heap<!fir.array<42xi32>>) -> tuple<!fir.heap<!fir.array<42xi32>>, i1> + hlfir.assign %3#0 to %1#0 : !fir.heap<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>> + fir.freemem %3#0 : !fir.heap<!fir.array<42xi32>> + omp.terminator + } + omp.terminator + } + return + } +} diff --git a/flang/tools/bbc/CMakeLists.txt b/flang/tools/bbc/CMakeLists.txt index 9410fd0056600..69316d4dc61de 100644 --- a/flang/tools/bbc/CMakeLists.txt +++ b/flang/tools/bbc/CMakeLists.txt @@ -25,6 +25,7 @@ FIRTransforms FIRBuilder HLFIRDialect HLFIRTransforms +FlangOpenMPTransforms ${dialect_libs} ${extension_libs} MLIRAffineToStandard diff --git a/flang/tools/fir-opt/CMakeLists.txt b/flang/tools/fir-opt/CMakeLists.txt index 43679a9d53578..4c6dbf7d9c8c3 100644 --- a/flang/tools/fir-opt/CMakeLists.txt +++ b/flang/tools/fir-opt/CMakeLists.txt @@ -19,6 +19,7 @@ target_link_libraries(fir-opt PRIVATE FIRCodeGen HLFIRDialect HLFIRTransforms + FlangOpenMPTransforms FIRAnalysis ${test_libs} ${dialect_libs} diff --git a/flang/tools/fir-opt/fir-opt.cpp b/flang/tools/fir-opt/fir-opt.cpp index 1846c1b317848..f75fba27c68f0 100644 --- a/flang/tools/fir-opt/fir-opt.cpp +++ b/flang/tools/fir-opt/fir-opt.cpp @@ -14,6 +14,7 @@ #include "mlir/Tools/mlir-opt/MlirOptMain.h" #include "flang/Optimizer/CodeGen/CodeGen.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/OpenMP/Passes.h" #include "flang/Optimizer/Support/InitFIR.h" #include "flang/Optimizer/Transforms/Passes.h" @@ -34,6 +35,7 @@ int main(int argc, char **argv) { fir::registerOptCodeGenPasses(); fir::registerOptTransformPasses(); hlfir::registerHLFIRPasses(); + flangomp::registerFlangOpenMPPasses(); #ifdef FLANG_INCLUDE_TESTS fir::test::registerTestFIRAliasAnalysisPass(); mlir::registerSideEffectTestPasses(); diff --git a/flang/tools/tco/CMakeLists.txt b/flang/tools/tco/CMakeLists.txt index 808219ac361f2..698a398547c77 100644 --- a/flang/tools/tco/CMakeLists.txt +++ b/flang/tools/tco/CMakeLists.txt @@ -17,6 +17,7 @@ target_link_libraries(tco PRIVATE FIRBuilder HLFIRDialect HLFIRTransforms + FlangOpenMPTransforms ${dialect_libs} ${extension_libs} MLIRIR >From db4fc37ae430a138d1401b587aa1c54184e84dc9 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov <ivanov.i...@m.titech.ac.jp> Date: Fri, 2 Aug 2024 16:41:09 +0900 Subject: [PATCH 7/9] Change to workshare loop wrapper op --- flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 24 ++++++++++++------- flang/test/HLFIR/bufferize-workshare.fir | 4 ++-- .../Transforms/OpenMP/lower-workshare.mlir | 5 ++-- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp index 40975552d1fe3..cb342b60de4e8 100644 --- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp +++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/iterator_range.h" +#include <mlir/Dialect/OpenMP/OpenMPClauseOperands.h> #include <variant> namespace flangomp { @@ -73,7 +74,7 @@ static bool isSafeToParallelize(Operation *op) { /// /// omp.workshare { /// %a = fir.allocmem -/// omp.wsloop {} +/// omp.workshare_loop_wrapper {} /// fir.call Assign %b %a /// fir.freemem %a /// } @@ -85,7 +86,7 @@ static bool isSafeToParallelize(Operation *op) { /// fir.store %a %tmp /// } /// %a_reloaded = fir.load %tmp -/// omp.wsloop {} +/// omp.workshare_loop_wrapper {} /// omp.single { /// fir.call Assign %b %a_reloaded /// fir.freemem %a_reloaded @@ -180,20 +181,20 @@ void lowerWorkshare(mlir::omp::WorkshareOp wsOp) { assert(wsBlock->getTerminator()->getNumOperands() == 0); Operation *terminator = wsBlock->getTerminator(); - SmallVector<std::variant<SingleRegion, omp::WsloopOp>> regions; + SmallVector<std::variant<SingleRegion, omp::WorkshareLoopWrapperOp>> regions; auto it = wsBlock->begin(); auto getSingleRegion = [&]() { if (&*it == terminator) return false; - if (auto pop = dyn_cast<omp::WsloopOp>(&*it)) { + if (auto pop = dyn_cast<omp::WorkshareLoopWrapperOp>(&*it)) { regions.push_back(pop); it++; return true; } SingleRegion sr; sr.begin = it; - while (&*it != terminator && !isa<omp::WsloopOp>(&*it)) + while (&*it != terminator && !isa<omp::WorkshareLoopWrapperOp>(&*it)) it++; sr.end = it; assert(sr.begin != sr.end); @@ -214,9 +215,16 @@ void lowerWorkshare(mlir::omp::WorkshareOp wsOp) { singleBuilder.createBlock(&singleOp.getRegion()); moveToSingle(std::get<SingleRegion>(loopOrSingle), singleBuilder); } else { - rootBuilder.clone(*std::get<omp::WsloopOp>(loopOrSingle), rootMapping); - if (!isLast) - rootBuilder.create<omp::BarrierOp>(loc); + omp::WsloopOperands wsloopOperands; + if (isLast) + wsloopOperands.nowait = rootBuilder.getUnitAttr(); + auto wsloop = + rootBuilder.create<mlir::omp::WsloopOp>(loc, wsloopOperands); + auto wslw = std::get<omp::WorkshareLoopWrapperOp>(loopOrSingle); + auto clonedWslw = cast<omp::WorkshareLoopWrapperOp>( + rootBuilder.clone(*wslw, rootMapping)); + wsloop.getRegion().takeBody(clonedWslw.getRegion()); + clonedWslw->erase(); } } diff --git a/flang/test/HLFIR/bufferize-workshare.fir b/flang/test/HLFIR/bufferize-workshare.fir index 86a2f031478dd..33b368a62eaab 100644 --- a/flang/test/HLFIR/bufferize-workshare.fir +++ b/flang/test/HLFIR/bufferize-workshare.fir @@ -12,7 +12,7 @@ // CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]](%[[VAL_3]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>) // CHECK: %[[VAL_7:.*]] = arith.constant true // CHECK: %[[VAL_8:.*]] = arith.constant 1 : index -// CHECK: omp.wsloop { +// CHECK: "omp.workshare_loop_wrapper"() ({ // CHECK: omp.loop_nest (%[[VAL_9:.*]]) : index = (%[[VAL_8]]) to (%[[VAL_1]]) inclusive step (%[[VAL_8]]) { // CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_9]]) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32> // CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32> @@ -22,7 +22,7 @@ // CHECK: omp.yield // CHECK: } // CHECK: omp.terminator -// CHECK: } +// CHECK: }) : () -> () // CHECK: %[[VAL_14:.*]] = fir.undefined tuple<!fir.heap<!fir.array<42xi32>>, i1> // CHECK: %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_7]], [1 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<42xi32>>, i1> // CHECK: %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_6]]#0, [0 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, !fir.heap<!fir.array<42xi32>>) -> tuple<!fir.heap<!fir.array<42xi32>>, i1> diff --git a/flang/test/Transforms/OpenMP/lower-workshare.mlir b/flang/test/Transforms/OpenMP/lower-workshare.mlir index a8d36443f08bd..cb5791d35916a 100644 --- a/flang/test/Transforms/OpenMP/lower-workshare.mlir +++ b/flang/test/Transforms/OpenMP/lower-workshare.mlir @@ -34,7 +34,6 @@ module { // CHECK: } // CHECK: omp.terminator // CHECK: } -// CHECK: omp.barrier // CHECK: omp.single nowait { // CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_13]] : !fir.heap<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>> // CHECK: fir.freemem %[[VAL_14]] : !fir.heap<!fir.array<42xi32>> @@ -56,7 +55,7 @@ module { %3:2 = hlfir.declare %2(%0) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>) %true = arith.constant true %c1 = arith.constant 1 : index - omp.wsloop { + "omp.workshare_loop_wrapper"() ({ omp.loop_nest (%arg1) : index = (%c1) to (%c42) inclusive step (%c1) { %7 = hlfir.designate %1#0 (%arg1) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32> %8 = fir.load %7 : !fir.ref<i32> @@ -66,7 +65,7 @@ module { omp.yield } omp.terminator - } + }) : () -> () %4 = fir.undefined tuple<!fir.heap<!fir.array<42xi32>>, i1> %5 = fir.insert_value %4, %true, [1 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<42xi32>>, i1> %6 = fir.insert_value %5, %3#0, [0 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, !fir.heap<!fir.array<42xi32>>) -> tuple<!fir.heap<!fir.array<42xi32>>, i1> >From 1dfdab56b0250bc3e4b0869451f7c21c847f9aee Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov <ivanov.i...@m.titech.ac.jp> Date: Fri, 2 Aug 2024 16:47:27 +0900 Subject: [PATCH 8/9] Move single op declaration --- flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp index cb342b60de4e8..2322d2acbc013 100644 --- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp +++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp @@ -120,8 +120,6 @@ void lowerWorkshare(mlir::omp::WorkshareOp wsOp) { OpBuilder rootBuilder(wsOp); IRMapping rootMapping; - omp::SingleOp singleOp = nullptr; - auto mapReloadedValue = [&](Value v, OpBuilder singleBuilder, IRMapping singleMapping) { if (auto reloaded = rootMapping.lookupOrNull(v)) @@ -210,7 +208,8 @@ void lowerWorkshare(mlir::omp::WorkshareOp wsOp) { omp::SingleOperands singleOperands; if (isLast) singleOperands.nowait = rootBuilder.getUnitAttr(); - singleOp = rootBuilder.create<omp::SingleOp>(loc, singleOperands); + omp::SingleOp singleOp = + rootBuilder.create<omp::SingleOp>(loc, singleOperands); OpBuilder singleBuilder(singleOp); singleBuilder.createBlock(&singleOp.getRegion()); moveToSingle(std::get<SingleRegion>(loopOrSingle), singleBuilder); >From 386157c154d25d22e4e3ea083d0421496072316a Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov <ivanov.i...@m.titech.ac.jp> Date: Fri, 2 Aug 2024 17:13:58 +0900 Subject: [PATCH 9/9] Schedule pass properly --- flang/include/flang/Tools/CLOptions.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc index 594369fc2ffe5..2edaf0c6a0ae8 100644 --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -345,7 +345,7 @@ inline void createHLFIRToFIRPassPipeline( pm.addPass(hlfir::createLowerHLFIRIntrinsics()); pm.addPass(hlfir::createBufferizeHLFIR()); pm.addPass(hlfir::createConvertHLFIRtoFIR()); - pm.addPass(flangomp::createLowerWorkshare()); + addNestedPassToAllTopLevelOperations(pm, flangomp::createLowerWorkshare); } /// Create a pass pipeline for handling certain OpenMP transformations needed _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits