llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-flang-fir-hlfir Author: Sergio Afonso (skatrak) <details> <summary>Changes</summary> This patch moves the creation of `DataSharingProcessor` instances for loop constructs out of `genOMPDispatch()` and into their corresponding codegen functions. This is a necessary first step to enable a proper handling of privatization on composite constructs. Some tests are updated due to a change of order between clause processing and privatization. --- Patch is 34.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/106066.diff 6 Files Affected: - (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+84-58) - (modified) flang/test/Lower/OpenMP/parallel-reduction3.f90 (+7-7) - (modified) flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 (+7-7) - (modified) flang/test/Lower/OpenMP/wsloop-reduction-array.f90 (+9-9) - (modified) flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 (+9-9) - (modified) flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90 (+11-11) ``````````diff diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index d614db8b68ef65..307cf47247b743 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1044,7 +1044,6 @@ static void genDistributeClauses(lower::AbstractConverter &converter, cp.processAllocate(clauseOps); cp.processDistSchedule(stmtCtx, clauseOps); cp.processOrder(clauseOps); - // TODO Support delayed privatization. } static void genFlushClauses(lower::AbstractConverter &converter, @@ -1128,7 +1127,6 @@ static void genSimdClauses(lower::AbstractConverter &converter, cp.processSafelen(clauseOps); cp.processSimdlen(clauseOps); - // TODO Support delayed privatization. cp.processTODO<clause::Linear, clause::Nontemporal>( loc, llvm::omp::Directive::OMPD_simd); } @@ -1299,7 +1297,6 @@ static void genWsloopClauses( cp.processOrdered(clauseOps); cp.processReduction(loc, clauseOps, &reductionTypes, &reductionSyms); cp.processSchedule(stmtCtx, clauseOps); - // TODO Support delayed privatization. cp.processTODO<clause::Allocate, clause::Linear>( loc, llvm::omp::Directive::OMPD_do); @@ -1918,17 +1915,25 @@ genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, // also be a leaf of a composite construct //===----------------------------------------------------------------------===// -static void genStandaloneDistribute( - lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { +static void genStandaloneDistribute(lower::AbstractConverter &converter, + lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + mlir::Location loc, + const ConstructQueue &queue, + ConstructQueue::const_iterator item) { lower::StatementContext stmtCtx; mlir::omp::DistributeOperands distributeClauseOps; genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc, distributeClauseOps); + // TODO: Support delayed privatization. + DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/false, &symTable); + dsp.processStep1(); + mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector<const semantics::Symbol *> iv; genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, @@ -1949,8 +1954,7 @@ static void genStandaloneDo(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, - DataSharingProcessor &dsp) { + ConstructQueue::const_iterator item) { lower::StatementContext stmtCtx; mlir::omp::WsloopOperands wsloopClauseOps; @@ -1959,6 +1963,12 @@ static void genStandaloneDo(lower::AbstractConverter &converter, genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc, wsloopClauseOps, reductionTypes, reductionSyms); + // TODO: Support delayed privatization. + DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/false, &symTable); + dsp.processStep1(); + mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector<const semantics::Symbol *> iv; genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, @@ -1998,11 +2008,16 @@ static void genStandaloneSimd(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, - DataSharingProcessor &dsp) { + ConstructQueue::const_iterator item) { mlir::omp::SimdOperands simdClauseOps; genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps); + // TODO: Support delayed privatization. + DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/false, &symTable); + dsp.processStep1(); + mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector<const semantics::Symbol *> iv; genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, @@ -2018,11 +2033,13 @@ static void genStandaloneSimd(lower::AbstractConverter &converter, llvm::omp::Directive::OMPD_simd, dsp); } -static void genStandaloneTaskloop( - lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { +static void genStandaloneTaskloop(lower::AbstractConverter &converter, + lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + mlir::Location loc, + const ConstructQueue &queue, + ConstructQueue::const_iterator item) { TODO(loc, "Taskloop construct"); } @@ -2034,7 +2051,7 @@ static void genCompositeDistributeParallelDo( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { + ConstructQueue::const_iterator item) { assert(std::distance(item, queue.end()) == 3 && "Invalid leaf constructs"); TODO(loc, "Composite DISTRIBUTE PARALLEL DO"); } @@ -2043,16 +2060,18 @@ static void genCompositeDistributeParallelDoSimd( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { + ConstructQueue::const_iterator item) { assert(std::distance(item, queue.end()) == 4 && "Invalid leaf constructs"); TODO(loc, "Composite DISTRIBUTE PARALLEL DO SIMD"); } -static void genCompositeDistributeSimd( - lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { +static void genCompositeDistributeSimd(lower::AbstractConverter &converter, + lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + mlir::Location loc, + const ConstructQueue &queue, + ConstructQueue::const_iterator item) { lower::StatementContext stmtCtx; assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); @@ -2067,6 +2086,12 @@ static void genCompositeDistributeSimd( mlir::omp::SimdOperands simdClauseOps; genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps); + // TODO: Support delayed privatization. + DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/false, &symTable); + dsp.processStep1(); + // Pass the innermost leaf construct's clauses because that's where COLLAPSE // is placed by construct decomposition. mlir::omp::LoopNestOperands loopNestClauseOps; @@ -2103,8 +2128,7 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, - DataSharingProcessor &dsp) { + ConstructQueue::const_iterator item) { lower::StatementContext stmtCtx; assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); @@ -2121,6 +2145,12 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter, mlir::omp::SimdOperands simdClauseOps; genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps); + // TODO: Support delayed privatization. + DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/false, &symTable); + dsp.processStep1(); + // Pass the innermost leaf construct's clauses because that's where COLLAPSE // is placed by construct decomposition. mlir::omp::LoopNestOperands loopNestClauseOps; @@ -2151,11 +2181,13 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter, llvm::omp::Directive::OMPD_do_simd, dsp); } -static void genCompositeTaskloopSimd( - lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { +static void genCompositeTaskloopSimd(lower::AbstractConverter &converter, + lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + mlir::Location loc, + const ConstructQueue &queue, + ConstructQueue::const_iterator item) { assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); TODO(loc, "Composite TASKLOOP SIMD"); } @@ -2164,30 +2196,35 @@ static void genCompositeTaskloopSimd( // Dispatch //===----------------------------------------------------------------------===// -static bool genOMPCompositeDispatch( - lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { +static bool genOMPCompositeDispatch(lower::AbstractConverter &converter, + lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + mlir::Location loc, + const ConstructQueue &queue, + ConstructQueue::const_iterator item) { using llvm::omp::Directive; using lower::omp::matchLeafSequence; + // TODO: Privatization for composite constructs is currently only done based + // on the clauses for their last leaf construct, which may not always be + // correct. Consider per-leaf privatization of composite constructs once + // delayed privatization is supported by all participating ops. if (matchLeafSequence(item, queue, Directive::OMPD_distribute_parallel_do)) genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval, loc, - queue, item, dsp); + queue, item); else if (matchLeafSequence(item, queue, Directive::OMPD_distribute_parallel_do_simd)) genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval, - loc, queue, item, dsp); + loc, queue, item); else if (matchLeafSequence(item, queue, Directive::OMPD_distribute_simd)) genCompositeDistributeSimd(converter, symTable, semaCtx, eval, loc, queue, - item, dsp); + item); else if (matchLeafSequence(item, queue, Directive::OMPD_do_simd)) - genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item, - dsp); + genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item); else if (matchLeafSequence(item, queue, Directive::OMPD_taskloop_simd)) genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, loc, queue, - item, dsp); + item); else return false; @@ -2202,20 +2239,12 @@ static void genOMPDispatch(lower::AbstractConverter &converter, ConstructQueue::const_iterator item) { assert(item != queue.end()); - std::optional<DataSharingProcessor> loopDsp; bool loopLeaf = llvm::omp::getDirectiveAssociation(item->id) == llvm::omp::Association::Loop; if (loopLeaf) { symTable.pushScope(); - // TODO: Use one DataSharingProcessor for each leaf of a composite - // construct. - loopDsp.emplace(converter, semaCtx, item->clauses, eval, - /*shouldCollectPreDeterminedSymbols=*/true, - /*useDelayedPrivatization=*/false, &symTable); - loopDsp->processStep1(); - if (genOMPCompositeDispatch(converter, symTable, semaCtx, eval, loc, queue, - item, *loopDsp)) { + item)) { symTable.popScope(); return; } @@ -2227,11 +2256,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter, break; case llvm::omp::Directive::OMPD_distribute: genStandaloneDistribute(converter, symTable, semaCtx, eval, loc, queue, - item, *loopDsp); + item); break; case llvm::omp::Directive::OMPD_do: - genStandaloneDo(converter, symTable, semaCtx, eval, loc, queue, item, - *loopDsp); + genStandaloneDo(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_loop: TODO(loc, "Unhandled directive " + llvm::omp::getOpenMPDirectiveName(dir)); @@ -2260,8 +2288,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, // in genBodyOfOp break; case llvm::omp::Directive::OMPD_simd: - genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item, - *loopDsp); + genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_single: genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); @@ -2291,8 +2318,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, genTaskgroupOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_taskloop: - genStandaloneTaskloop(converter, symTable, semaCtx, eval, loc, queue, item, - *loopDsp); + genStandaloneTaskloop(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_taskwait: genTaskwaitOp(converter, symTable, semaCtx, eval, loc, queue, item); diff --git a/flang/test/Lower/OpenMP/parallel-reduction3.f90 b/flang/test/Lower/OpenMP/parallel-reduction3.f90 index 441dff34553d4f..591f41cb946602 100644 --- a/flang/test/Lower/OpenMP/parallel-reduction3.f90 +++ b/flang/test/Lower/OpenMP/parallel-reduction3.f90 @@ -69,19 +69,19 @@ ! CHECK: %[[VAL_13:.*]] = arith.constant 0 : i32 ! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_12]]#0 : i32, !fir.box<!fir.array<?xi32>> ! CHECK: omp.parallel { -! CHECK: %[[VAL_14:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}} -! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) -! CHECK: %[[VAL_16:.*]] = fir.alloca !fir.box<!fir.array<?xi32>> -! CHECK: fir.store %[[VAL_12]]#0 to %[[VAL_16]] : !fir.ref<!fir.box<!fir.array<?xi32>>> +! CHECK: %[[VAL_14:.*]] = fir.alloca !fir.box<!fir.array<?xi32>> +! CHECK: fir.store %[[VAL_12]]#0 to %[[VAL_14]] : !fir.ref<!fir.box<!fir.array<?xi32>>> +! CHECK: %[[VAL_15:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}} +! CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: %[[VAL_17:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_18:.*]] = arith.constant 100 : i32 ! CHECK: %[[VAL_19:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxi32 %[[VAL_16]] -> %[[VAL_20:.*]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) { +! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxi32 %[[VAL_14]] -> %[[VAL_20:.*]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) { ! CHECK-NEXT: omp.loop_nest (%[[VAL_21:.*]]) : i32 = (%[[VAL_17]]) to (%[[VAL_18]]) inclusive step (%[[VAL_19]]) { ! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFsEc"} : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> (!fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.array<?xi32>>>) -! CHECK: fir.store %[[VAL_21]] to %[[VAL_15]]#1 : !fir.ref<i32> +! CHECK: fir.store %[[VAL_21]] to %[[VAL_16]]#1 : !fir.ref<i32> ! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<!fir.box<!fir.array<?xi32>>> -! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_16]]#0 : !fir.ref<i32> ! CHECK: %[[VAL_25:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_26:.*]]:3 = fir.box_dims %[[VAL_23]], %[[VAL_25]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index) ! CHECK: %[[VAL_27:.*]] = fir.shape %[[VAL_26]]#1 : (index) -> !fir.shape<1> diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 index c984ab61bedb3b..d881ff8c1a026a 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 @@ -79,18 +79,18 @@ subroutine reduce(r) ! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFFreduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %{{[0-9]+}} {fortran_attrs = {{.*}}, uniq_name = "_QFFreduceEr"} : (!fir.box<!fir.array<?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?xf64>>, !fir.box<!fir.array<?xf64>>) ! CHECK: omp.parallel { -! CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}} -! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFFreduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) -! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.array<?xf64>> -! CHECK: fir.store %[[VAL_3]]#1 to %[[VAL_6]] : !fir.ref<!fir.box<!fir.array<?xf64>>> +! CHECK: ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/106066 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits