https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/77761
>From 1b5524ae8874e389d373a55417919afa56beb2b5 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek <krzysztof.parzys...@amd.com> Date: Mon, 8 Jan 2024 15:53:07 -0600 Subject: [PATCH 1/3] [Flang][OpenMP] Restructure recursive lowering in `createBodyOfOp` This brings `createBodyOfOp` to its final intended form. First, input privatization is performed, then the recursive lowering takes place, and finally the output privatization (lastprivate) is done. This enables fixing a known issue with infinite loops inside of an OpenMP region, and the fix is included in this patch. Fixes https://github.com/llvm/llvm-project/issues/74348. Recursive lowering [5/5] --- flang/include/flang/Lower/OpenMP.h | 4 +- flang/lib/Lower/OpenMP.cpp | 133 ++++++++++++------ flang/test/Lower/OpenMP/FIR/sections.f90 | 6 +- .../OpenMP/infinite-loop-in-construct.f90 | 19 +++ flang/test/Lower/OpenMP/sections.f90 | 6 +- 5 files changed, 119 insertions(+), 49 deletions(-) create mode 100644 flang/test/Lower/OpenMP/infinite-loop-in-construct.f90 diff --git a/flang/include/flang/Lower/OpenMP.h b/flang/include/flang/Lower/OpenMP.h index 6e772c43d8c46e..477d3e7d9da3a8 100644 --- a/flang/include/flang/Lower/OpenMP.h +++ b/flang/include/flang/Lower/OpenMP.h @@ -50,8 +50,8 @@ struct Variable; } // namespace pft // Generate the OpenMP terminator for Operation at Location. -void genOpenMPTerminator(fir::FirOpBuilder &, mlir::Operation *, - mlir::Location); +mlir::Operation *genOpenMPTerminator(fir::FirOpBuilder &, mlir::Operation *, + mlir::Location); void genOpenMPConstruct(AbstractConverter &, Fortran::lower::SymMap &, semantics::SemanticsContext &, pft::Evaluation &, diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index 6e93387fbff7cc..bb92fdce4ac56e 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -383,7 +383,8 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) { // construct mlir::OpBuilder::InsertPoint unstructuredSectionsIP = firOpBuilder.saveInsertionPoint(); - firOpBuilder.setInsertionPointToStart(&op->getRegion(0).back()); + mlir::Operation *lastOper = op->getRegion(0).back().getTerminator(); + firOpBuilder.setInsertionPoint(lastOper); lastPrivIP = firOpBuilder.saveInsertionPoint(); firOpBuilder.restoreInsertionPoint(unstructuredSectionsIP); } @@ -2133,15 +2134,6 @@ static mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter, return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize); } -static void resetBeforeTerminator(fir::FirOpBuilder &firOpBuilder, - mlir::Operation *storeOp, - mlir::Block &block) { - if (storeOp) - firOpBuilder.setInsertionPointAfter(storeOp); - else - firOpBuilder.setInsertionPointToStart(&block); -} - static mlir::Operation * createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter, mlir::Location loc, mlir::Value indexVal, @@ -2183,11 +2175,43 @@ static void createBodyOfOp( const llvm::SmallVector<const Fortran::semantics::Symbol *> &args = {}, bool outerCombined = false, DataSharingProcessor *dsp = nullptr) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + + auto insertMarker = [](fir::FirOpBuilder &builder) { + mlir::Value undef = builder.create<fir::UndefOp>(builder.getUnknownLoc(), + builder.getIndexType()); + return undef.getDefiningOp(); + }; + + // Find the block where the OMP terminator should go. In simple cases + // it is the single block in the operation's region. When the region + // is more complicated, especially with unstructured control flow, there + // may be multiple blocks, and some of them may have non-OMP terminators + // resulting from lowering of the code contained within the operation. + // By OpenMP rules, there should be a single exit point from the region: + // here exit means transfering control to the code following the operation. + // STOP statement is allowed and does not count as exit for the purpose of + // inserting terminators. + auto findExitBlock = [&](mlir::Region ®ion) -> mlir::Block * { + auto isTerminated = [](mlir::Block &block) -> bool { + if (block.empty()) + return false; + return block.back().hasTrait<mlir::OpTrait::IsTerminator>(); + }; + + mlir::Block *exit = nullptr; + for (auto &block : region) { + if (!isTerminated(block)) { + assert(exit == nullptr && "Multiple exit block in OpenMP region"); + exit = █ + } + } + return exit; + }; + // If an argument for the region is provided then create the block with that // argument. Also update the symbol's address with the mlir argument value. // e.g. For loops the argument is the induction variable. And all further // uses of the induction variable should use this mlir value. - mlir::Operation *storeOp = nullptr; if (args.size()) { std::size_t loopVarTypeSize = 0; for (const Fortran::semantics::Symbol *arg : args) @@ -2198,20 +2222,20 @@ static void createBodyOfOp( firOpBuilder.createBlock(&op.getRegion(), {}, tiv, locs); // The argument is not currently in memory, so make a temporary for the // argument, and store it there, then bind that location to the argument. + mlir::Operation *storeOp = nullptr; for (auto [argIndex, argSymbol] : llvm::enumerate(args)) { mlir::Value indexVal = fir::getBase(op.getRegion().front().getArgument(argIndex)); storeOp = createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); } + firOpBuilder.setInsertionPointAfter(storeOp); } else { firOpBuilder.createBlock(&op.getRegion()); } - // Set the insert for the terminator operation to go at the end of the - // block - this is either empty or the block with the stores above, - // the end of the block works for both. - mlir::Block &block = op.getRegion().back(); - firOpBuilder.setInsertionPointToEnd(&block); + + // Mark the earliest insertion point. + mlir::Operation *marker = insertMarker(firOpBuilder); // If it is an unstructured region and is not the outer region of a combined // construct, create empty blocks for all evaluations. @@ -2220,37 +2244,64 @@ static void createBodyOfOp( mlir::omp::YieldOp>( firOpBuilder, eval.getNestedEvaluations()); - // Insert the terminator. - Fortran::lower::genOpenMPTerminator(firOpBuilder, op.getOperation(), loc); - // Reset the insert point to before the terminator. - resetBeforeTerminator(firOpBuilder, storeOp, block); + // Start with privatization, so that the lowering of the nested + // code will use the right symbols. + constexpr bool isLoop = std::is_same_v<Op, mlir::omp::WsLoopOp> || + std::is_same_v<Op, mlir::omp::SimdLoopOp>; + bool privatize = clauses && !outerCombined; - // Handle privatization. Do not privatize if this is the outer operation. - if (clauses && !outerCombined) { - constexpr bool isLoop = std::is_same_v<Op, mlir::omp::WsLoopOp> || - std::is_same_v<Op, mlir::omp::SimdLoopOp>; + firOpBuilder.setInsertionPoint(marker); + std::optional<DataSharingProcessor> tempDsp; + if (privatize) { if (!dsp) { - DataSharingProcessor proc(converter, *clauses, eval); - proc.processStep1(); - proc.processStep2(op, isLoop); - } else { - if (isLoop && args.size() > 0) - dsp->setLoopIV(converter.getSymbolAddress(*args[0])); - dsp->processStep2(op, isLoop); + tempDsp.emplace(converter, *clauses, eval); + tempDsp->processStep1(); } - - if (storeOp) - firOpBuilder.setInsertionPointAfter(storeOp); } if constexpr (std::is_same_v<Op, mlir::omp::ParallelOp>) { threadPrivatizeVars(converter, eval); - if (clauses) + if (clauses) { + firOpBuilder.setInsertionPoint(marker); ClauseProcessor(converter, *clauses).processCopyin(); + } } - if (genNested) + if (genNested) { + // genFIR(Evaluation&) tries to patch up unterminated blocks, causing + // a lot of trouble if the terminator generation is delayed past this + // point. Insert a temporary terminator here, then delete it. + firOpBuilder.setInsertionPointToEnd(&op.getRegion().back()); + auto *temp = Fortran::lower::genOpenMPTerminator(firOpBuilder, + op.getOperation(), loc); + firOpBuilder.setInsertionPointAfter(marker); genNestedEvaluations(converter, eval); + temp->erase(); + } + + if (auto *exitBlock = findExitBlock(op.getRegion())) { + firOpBuilder.setInsertionPointToEnd(exitBlock); + auto *term = Fortran::lower::genOpenMPTerminator(firOpBuilder, + op.getOperation(), loc); + // Only insert lastprivate code when there actually is an exit block. + // Such a block may not exist if the nested code produced an infinite + // loop (this may not make sense in production code, but a user could + // write that and we should handle it). + firOpBuilder.setInsertionPoint(term); + if (privatize) { + if (!dsp) { + assert(tempDsp.has_value()); + tempDsp->processStep2(op, isLoop); + } else { + if (isLoop && args.size() > 0) + dsp->setLoopIV(converter.getSymbolAddress(*args[0])); + dsp->processStep2(op, isLoop); + } + } + } + + firOpBuilder.setInsertionPointAfter(marker); + marker->erase(); } static void genBodyOfTargetDataOp( @@ -3664,14 +3715,14 @@ genOMP(Fortran::lower::AbstractConverter &converter, // Public functions //===----------------------------------------------------------------------===// -void Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder, - mlir::Operation *op, - mlir::Location loc) { +mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder, + mlir::Operation *op, + mlir::Location loc) { if (mlir::isa<mlir::omp::WsLoopOp, mlir::omp::ReductionDeclareOp, mlir::omp::AtomicUpdateOp, mlir::omp::SimdLoopOp>(op)) - builder.create<mlir::omp::YieldOp>(loc); + return builder.create<mlir::omp::YieldOp>(loc); else - builder.create<mlir::omp::TerminatorOp>(loc); + return builder.create<mlir::omp::TerminatorOp>(loc); } void Fortran::lower::genOpenMPConstruct( diff --git a/flang/test/Lower/OpenMP/FIR/sections.f90 b/flang/test/Lower/OpenMP/FIR/sections.f90 index 87d34e58321be0..640ec34a05bc21 100644 --- a/flang/test/Lower/OpenMP/FIR/sections.f90 +++ b/flang/test/Lower/OpenMP/FIR/sections.f90 @@ -126,11 +126,11 @@ subroutine lastprivate() x = x * 10 !CHECK: omp.section { !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"} -!CHECK: %[[true:.*]] = arith.constant true !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32> !CHECK: %[[const:.*]] = arith.constant 1 : i32 !CHECK: %[[result:.*]] = arith.addi %[[temp]], %[[const]] : i32 !CHECK: fir.store %[[result]] to %[[PRIVATE_X]] : !fir.ref<i32> +!CHECK: %[[true:.*]] = arith.constant true !CHECK: fir.if %[[true]] { !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32> !CHECK: fir.store %[[temp]] to %[[X]] : !fir.ref<i32> @@ -163,11 +163,11 @@ subroutine lastprivate() !CHECK: %[[temp:.*]] = fir.load %[[X]] : !fir.ref<i32> !CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref<i32> !CHECK: omp.barrier -!CHECK: %[[true:.*]] = arith.constant true !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32> !CHECK: %[[const:.*]] = arith.constant 1 : i32 !CHECK: %[[result:.*]] = arith.addi %[[temp]], %[[const]] : i32 !CHECK: fir.store %[[result]] to %[[PRIVATE_X]] : !fir.ref<i32> +!CHECK: %[[true:.*]] = arith.constant true !CHECK: fir.if %true { !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32> !CHECK: fir.store %[[temp]] to %[[X]] : !fir.ref<i32> @@ -200,11 +200,11 @@ subroutine lastprivate() !CHECK: %[[temp:.*]] = fir.load %[[X]] : !fir.ref<i32> !CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref<i32> !CHECK: omp.barrier -!CHECK: %[[true:.*]] = arith.constant true !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32> !CHECK: %[[const:.*]] = arith.constant 1 : i32 !CHECK: %[[result:.*]] = arith.addi %[[temp]], %[[const]] : i32 !CHECK: fir.store %[[result]] to %[[PRIVATE_X]] : !fir.ref<i32> +!CHECK: %[[true:.*]] = arith.constant true !CHECK: fir.if %true { !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref<i32> !CHECK: fir.store %[[temp]] to %[[X]] : !fir.ref<i32> diff --git a/flang/test/Lower/OpenMP/infinite-loop-in-construct.f90 b/flang/test/Lower/OpenMP/infinite-loop-in-construct.f90 new file mode 100644 index 00000000000000..d44b440a72d90d --- /dev/null +++ b/flang/test/Lower/OpenMP/infinite-loop-in-construct.f90 @@ -0,0 +1,19 @@ +! RUN: bbc -fopenmp -o - %s | FileCheck %s + +! Check that this test can be lowered successfully. +! See https://github.com/llvm/llvm-project/issues/74348 + +! CHECK-LABEL: func.func @_QPsb +! CHECK: omp.parallel +subroutine sb(ninter, numnod) + integer :: ninter, numnod + integer, dimension(:), allocatable :: indx_nm + + !$omp parallel + if (ninter>0) then + allocate(indx_nm(numnod)) + endif + 220 continue + goto 220 + !$omp end parallel +end subroutine diff --git a/flang/test/Lower/OpenMP/sections.f90 b/flang/test/Lower/OpenMP/sections.f90 index 16426d070d9a94..6bad688058d282 100644 --- a/flang/test/Lower/OpenMP/sections.f90 +++ b/flang/test/Lower/OpenMP/sections.f90 @@ -141,11 +141,11 @@ subroutine lastprivate() !CHECK: omp.section { !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"} !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) -!CHECK: %[[TRUE:.*]] = arith.constant true !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32> !CHECK: %[[CONST:.*]] = arith.constant 1 : i32 !CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32 !CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32> +!CHECK: %[[TRUE:.*]] = arith.constant true !CHECK: fir.if %[[TRUE]] { !CHECK: %[[TEMP1:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32> !CHECK: hlfir.assign %[[TEMP1]] to %[[X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32> @@ -180,11 +180,11 @@ subroutine lastprivate() !CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32> !CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32> !CHECK: omp.barrier -!CHECK: %[[TRUE:.*]] = arith.constant true !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32> !CHECK: %[[CONST:.*]] = arith.constant 1 : i32 !CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32 !CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32> +!CHECK: %[[TRUE:.*]] = arith.constant true !CHECK: fir.if %[[TRUE]] { !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32> !CHECK: hlfir.assign %[[TEMP]] to %[[X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32> @@ -219,11 +219,11 @@ subroutine lastprivate() !CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32> !CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32> !CHECK: omp.barrier -!CHECK: %[[TRUE:.*]] = arith.constant true !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32> !CHECK: %[[CONST:.*]] = arith.constant 1 : i32 !CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32 !CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32> +!CHECK: %[[TRUE:.*]] = arith.constant true !CHECK: fir.if %[[TRUE]] { !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32> !CHECK: hlfir.assign %[[TEMP]] to %[[X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32> >From a1b59bfe0783fb1ebf90344a1261b4efe073aafe Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek <krzysztof.parzys...@amd.com> Date: Mon, 15 Jan 2024 13:00:03 -0600 Subject: [PATCH 2/3] Add more detailed checks to testcase --- .../test/Lower/OpenMP/infinite-loop-in-construct.f90 | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/flang/test/Lower/OpenMP/infinite-loop-in-construct.f90 b/flang/test/Lower/OpenMP/infinite-loop-in-construct.f90 index d44b440a72d90d..16b400a2318609 100644 --- a/flang/test/Lower/OpenMP/infinite-loop-in-construct.f90 +++ b/flang/test/Lower/OpenMP/infinite-loop-in-construct.f90 @@ -3,8 +3,15 @@ ! Check that this test can be lowered successfully. ! See https://github.com/llvm/llvm-project/issues/74348 -! CHECK-LABEL: func.func @_QPsb -! CHECK: omp.parallel +! CHECK-LABEL: func.func @_QPsb +! CHECK: omp.parallel +! CHECK: cf.cond_br %{{[0-9]+}}, ^bb1, ^bb2 +! CHECK-NEXT: ^bb1: // pred: ^bb0 +! CHECK: cf.br ^bb2 +! CHECK-NEXT: ^bb2: // 3 preds: ^bb0, ^bb1, ^bb2 +! CHECK-NEXT: cf.br ^bb2 +! CHECK-NEXT: } + subroutine sb(ninter, numnod) integer :: ninter, numnod integer, dimension(:), allocatable :: indx_nm >From 8ead6c5ae0df086c04ff6738fcec72525bd0f6d0 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek <krzysztof.parzys...@amd.com> Date: Mon, 15 Jan 2024 13:10:54 -0600 Subject: [PATCH 3/3] Add testcase from https://github.com/llvm/llvm-project/pull/77329 --- .../test/Lower/OpenMP/wsloop-unstructured.f90 | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 flang/test/Lower/OpenMP/wsloop-unstructured.f90 diff --git a/flang/test/Lower/OpenMP/wsloop-unstructured.f90 b/flang/test/Lower/OpenMP/wsloop-unstructured.f90 new file mode 100644 index 00000000000000..7fe63a1fe607c2 --- /dev/null +++ b/flang/test/Lower/OpenMP/wsloop-unstructured.f90 @@ -0,0 +1,61 @@ +! RUN: bbc -emit-hlfir -fopenmp -o - %s | FileCheck %s + +subroutine sub(imax, jmax, x, y) + integer, intent(in) :: imax, jmax + real, intent(in), dimension(1:imax, 1:jmax) :: x, y + + integer :: i, j, ii + + ! collapse(2) is needed to reproduce the issue + !$omp parallel do collapse(2) + do j = 1, jmax + do i = 1, imax + do ii = 1, imax ! note that this loop is not collapsed + if (x(i,j) < y(ii,j)) then + ! exit needed to force unstructured control flow + exit + endif + enddo + enddo + enddo +end subroutine sub + +! this is testing that we don't crash generating code for this: in particular +! that all blocks are terminated + +! CHECK-LABEL: func.func @_QPsub( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<i32> {fir.bindc_name = "imax"}, +! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "jmax"}, +! CHECK-SAME: %[[VAL_2:.*]]: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "x"}, +! CHECK-SAME: %[[VAL_3:.*]]: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "y"}) { +! [...] +! CHECK: omp.wsloop for (%[[VAL_53:.*]], %[[VAL_54:.*]]) : i32 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}}) { +! [...] +! CHECK: cf.br ^bb1 +! CHECK: ^bb1: +! CHECK: cf.br ^bb2 +! CHECK: ^bb2: +! [...] +! CHECK: cf.br ^bb3 +! CHECK: ^bb3: +! [...] +! CHECK: %[[VAL_63:.*]] = arith.cmpi sgt, %{{.*}}, %{{.*}} : i32 +! CHECK: cf.cond_br %[[VAL_63]], ^bb4, ^bb7 +! CHECK: ^bb4: +! [...] +! CHECK: %[[VAL_76:.*]] = arith.cmpf olt, %{{.*}}, %{{.*}} fastmath<contract> : f32 +! CHECK: cf.cond_br %[[VAL_76]], ^bb5, ^bb6 +! CHECK: ^bb5: +! CHECK: cf.br ^bb7 +! CHECK: ^bb6: +! [...] +! CHECK: cf.br ^bb3 +! CHECK: ^bb7: +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: cf.br ^bb1 +! CHECK: ^bb1: +! CHECK: return +! CHECK: } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits