https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/138534
>From 0d309682fa886e3481f95ed0b0ba9a6ca9a5c9f1 Mon Sep 17 00:00:00 2001 From: ergawy <kareem.erg...@amd.com> Date: Mon, 5 May 2025 07:15:52 -0500 Subject: [PATCH 1/2] [flang][fir] Basic PFT to MLIR lowering for do concurrent locality specifiers Extends support for `fir.do_concurrent` locality specifiers to the PFT to MLIR level. This adds code-gen for generating the newly added `fir.local` ops and referencing these ops from `fir.do_concurrent.loop` ops that have locality specifiers attached to them. This reuses the `DataSharingProcessor` component and generalizes it a bit more to allow for handling `omp.private` ops and `fir.local` ops as well. --- flang/include/flang/Lower/AbstractConverter.h | 4 + .../include/flang/Optimizer/Dialect/FIROps.h | 4 + .../include/flang/Optimizer/Dialect/FIROps.td | 15 +++ flang/lib/Lower/Bridge.cpp | 59 ++++++++-- .../lib/Lower/OpenMP/DataSharingProcessor.cpp | 104 +++++++++++++----- flang/lib/Lower/OpenMP/DataSharingProcessor.h | 14 ++- .../Lower/do_concurrent_delayed_locality.f90 | 49 +++++++++ 7 files changed, 209 insertions(+), 40 deletions(-) create mode 100644 flang/test/Lower/do_concurrent_delayed_locality.f90 diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h index 1d1323642bf9c..8ae68e143cd2f 100644 --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -348,6 +348,10 @@ class AbstractConverter { virtual Fortran::lower::SymbolBox lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) = 0; + /// Find the symbol in the inner-most level of the local map or return null. + virtual Fortran::lower::SymbolBox + shallowLookupSymbol(const Fortran::semantics::Symbol &sym) = 0; + /// Return the mlir::SymbolTable associated to the ModuleOp. /// Look-ups are faster using it than using module.lookup<>, /// but the module op should be queried in case of failure diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.h b/flang/include/flang/Optimizer/Dialect/FIROps.h index 1bed227afb50d..62ef8b4b502f2 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.h +++ b/flang/include/flang/Optimizer/Dialect/FIROps.h @@ -147,6 +147,10 @@ class CoordinateIndicesAdaptor { mlir::ValueRange values; }; +struct LocalitySpecifierOperands { + llvm::SmallVector<::mlir::Value> privateVars; + llvm::SmallVector<::mlir::Attribute> privateSyms; +}; } // namespace fir #endif // FORTRAN_OPTIMIZER_DIALECT_FIROPS_H diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 4b97e5ffc6007..c36daa4419c14 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -3600,6 +3600,21 @@ def fir_LocalitySpecifierOp : fir_Op<"local", [IsolatedFromAbove]> { ]; let extraClassDeclaration = [{ + mlir::BlockArgument getInitMoldArg() { + auto ®ion = getInitRegion(); + return region.empty() ? nullptr : region.getArgument(0); + } + mlir::BlockArgument getInitPrivateArg() { + auto ®ion = getInitRegion(); + return region.empty() ? nullptr : region.getArgument(1); + } + + /// Returns true if the init region might read from the mold argument + bool initReadsFromMold() { + mlir::BlockArgument moldArg = getInitMoldArg(); + return moldArg && !moldArg.use_empty(); + } + /// Get the type for arguments to nested regions. This should /// generally be either the same as getType() or some pointer /// type (pointing to the type allocated by this op). diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 0a61f61ab8f75..bf55402ec4714 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -12,6 +12,8 @@ #include "flang/Lower/Bridge.h" +#include "OpenMP/DataSharingProcessor.h" +#include "OpenMP/Utils.h" #include "flang/Lower/Allocatable.h" #include "flang/Lower/CallInterface.h" #include "flang/Lower/Coarray.h" @@ -1144,6 +1146,14 @@ class FirConverter : public Fortran::lower::AbstractConverter { return name; } + /// Find the symbol in the inner-most level of the local map or return null. + Fortran::lower::SymbolBox + shallowLookupSymbol(const Fortran::semantics::Symbol &sym) override { + if (Fortran::lower::SymbolBox v = localSymbols.shallowLookupSymbol(sym)) + return v; + return {}; + } + private: FirConverter() = delete; FirConverter(const FirConverter &) = delete; @@ -1218,14 +1228,6 @@ class FirConverter : public Fortran::lower::AbstractConverter { return {}; } - /// Find the symbol in the inner-most level of the local map or return null. - Fortran::lower::SymbolBox - shallowLookupSymbol(const Fortran::semantics::Symbol &sym) { - if (Fortran::lower::SymbolBox v = localSymbols.shallowLookupSymbol(sym)) - return v; - return {}; - } - /// Find the symbol in one level up of symbol map such as for host-association /// in OpenMP code or return null. Fortran::lower::SymbolBox @@ -2029,9 +2031,30 @@ class FirConverter : public Fortran::lower::AbstractConverter { void handleLocalitySpecs(const IncrementLoopInfo &info) { Fortran::semantics::SemanticsContext &semanticsContext = bridge.getSemanticsContext(); - for (const Fortran::semantics::Symbol *sym : info.localSymList) + Fortran::lower::omp::DataSharingProcessor dsp( + *this, semanticsContext, getEval(), + /*useDelayedPrivatization=*/true, localSymbols); + fir::LocalitySpecifierOperands privateClauseOps; + auto doConcurrentLoopOp = + mlir::dyn_cast_if_present<fir::DoConcurrentLoopOp>(info.loopOp); + bool useDelayedPriv = + enableDelayedPrivatizationStaging && doConcurrentLoopOp; + + for (const Fortran::semantics::Symbol *sym : info.localSymList) { + if (useDelayedPriv) { + dsp.privatizeSymbol<fir::LocalitySpecifierOp>(sym, &privateClauseOps); + continue; + } + createHostAssociateVarClone(*sym, /*skipDefaultInit=*/false); + } + for (const Fortran::semantics::Symbol *sym : info.localInitSymList) { + if (useDelayedPriv) { + dsp.privatizeSymbol<fir::LocalitySpecifierOp>(sym, &privateClauseOps); + continue; + } + createHostAssociateVarClone(*sym, /*skipDefaultInit=*/true); const auto *hostDetails = sym->detailsIf<Fortran::semantics::HostAssocDetails>(); @@ -2050,6 +2073,24 @@ class FirConverter : public Fortran::lower::AbstractConverter { sym->detailsIf<Fortran::semantics::HostAssocDetails>(); copySymbolBinding(hostDetails->symbol(), *sym); } + + if (useDelayedPriv) { + doConcurrentLoopOp.getLocalVarsMutable().assign( + privateClauseOps.privateVars); + doConcurrentLoopOp.setLocalSymsAttr( + builder->getArrayAttr(privateClauseOps.privateSyms)); + + for (auto [sym, privateVar] : llvm::zip_equal( + dsp.getAllSymbolsToPrivatize(), privateClauseOps.privateVars)) { + auto arg = doConcurrentLoopOp.getRegion().begin()->addArgument( + privateVar.getType(), doConcurrentLoopOp.getLoc()); + bindSymbol(*sym, hlfir::translateToExtendedValue( + privateVar.getLoc(), *builder, hlfir::Entity{arg}, + /*contiguousHint=*/true) + .first); + } + } + // Note that allocatable, types with ultimate components, and type // requiring finalization are forbidden in LOCAL/LOCAL_INIT (F2023 C1130), // so no clean-up needs to be generated for these entities. diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp index b88454c45da85..bedd8864a0bc2 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp @@ -20,6 +20,7 @@ #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/HLFIRTools.h" #include "flang/Optimizer/Builder/Todo.h" +#include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Semantics/attr.h" @@ -53,6 +54,15 @@ DataSharingProcessor::DataSharingProcessor( }); } +DataSharingProcessor::DataSharingProcessor(lower::AbstractConverter &converter, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + bool useDelayedPrivatization, + lower::SymMap &symTable) + : DataSharingProcessor(converter, semaCtx, {}, eval, + /*shouldCollectPreDeterminedSymols=*/false, + useDelayedPrivatization, symTable) {} + void DataSharingProcessor::processStep1( mlir::omp::PrivateClauseOps *clauseOps) { collectSymbolsForPrivatization(); @@ -172,7 +182,8 @@ void DataSharingProcessor::cloneSymbol(const semantics::Symbol *sym) { void DataSharingProcessor::copyFirstPrivateSymbol( const semantics::Symbol *sym, mlir::OpBuilder::InsertPoint *copyAssignIP) { - if (sym->test(semantics::Symbol::Flag::OmpFirstPrivate)) + if (sym->test(semantics::Symbol::Flag::OmpFirstPrivate) || + sym->test(semantics::Symbol::Flag::LocalityLocalInit)) converter.copyHostAssociateVar(*sym, copyAssignIP); } @@ -485,9 +496,9 @@ void DataSharingProcessor::privatize(mlir::omp::PrivateClauseOps *clauseOps) { if (const auto *commonDet = sym->detailsIf<semantics::CommonBlockDetails>()) { for (const auto &mem : commonDet->objects()) - doPrivatize(&*mem, clauseOps); + privatizeSymbol(&*mem, clauseOps); } else - doPrivatize(sym, clauseOps); + privatizeSymbol(sym, clauseOps); } } @@ -504,22 +515,30 @@ void DataSharingProcessor::copyLastPrivatize(mlir::Operation *op) { } } -void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, - mlir::omp::PrivateClauseOps *clauseOps) { +template <typename OpType, typename OperandsStructType> +void DataSharingProcessor::privatizeSymbol( + const semantics::Symbol *symToPrivatize, OperandsStructType *clauseOps) { if (!useDelayedPrivatization) { - cloneSymbol(sym); - copyFirstPrivateSymbol(sym); + cloneSymbol(symToPrivatize); + copyFirstPrivateSymbol(symToPrivatize); return; } - lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym); + const semantics::Symbol *sym = symToPrivatize->HasLocalLocality() + ? &symToPrivatize->GetUltimate() + : symToPrivatize; + lower::SymbolBox hsb = symToPrivatize->HasLocalLocality() + ? converter.shallowLookupSymbol(*sym) + : converter.lookupOneLevelUpSymbol(*sym); assert(hsb && "Host symbol box not found"); hlfir::Entity entity{hsb.getAddr()}; bool cannotHaveNonDefaultLowerBounds = !entity.mayHaveNonDefaultLowerBounds(); mlir::Location symLoc = hsb.getAddr().getLoc(); std::string privatizerName = sym->name().ToString() + ".privatizer"; - bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate); + bool isFirstPrivate = + symToPrivatize->test(semantics::Symbol::Flag::OmpFirstPrivate) || + symToPrivatize->test(semantics::Symbol::Flag::LocalityLocalInit); mlir::Value privVal = hsb.getAddr(); mlir::Type allocType = privVal.getType(); @@ -553,7 +572,7 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, mlir::Type argType = privVal.getType(); - mlir::omp::PrivateClauseOp privatizerOp = [&]() { + OpType privatizerOp = [&]() { auto moduleOp = firOpBuilder.getModule(); auto uniquePrivatizerName = fir::getTypeAsString( allocType, converter.getKindMap(), @@ -561,16 +580,25 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, (isFirstPrivate ? "_firstprivate" : "_private")); if (auto existingPrivatizer = - moduleOp.lookupSymbol<mlir::omp::PrivateClauseOp>( - uniquePrivatizerName)) + moduleOp.lookupSymbol<OpType>(uniquePrivatizerName)) return existingPrivatizer; mlir::OpBuilder::InsertionGuard guard(firOpBuilder); firOpBuilder.setInsertionPointToStart(moduleOp.getBody()); - auto result = firOpBuilder.create<mlir::omp::PrivateClauseOp>( - symLoc, uniquePrivatizerName, allocType, - isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate - : mlir::omp::DataSharingClauseType::Private); + OpType result; + + if constexpr (std::is_same_v<OpType, mlir::omp::PrivateClauseOp>) { + result = firOpBuilder.create<OpType>( + symLoc, uniquePrivatizerName, allocType, + isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate + : mlir::omp::DataSharingClauseType::Private); + } else { + result = firOpBuilder.create<OpType>( + symLoc, uniquePrivatizerName, allocType, + isFirstPrivate ? fir::LocalitySpecifierType::LocalInit + : fir::LocalitySpecifierType::Local); + } + fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym); lower::SymMapScope outerScope(symTable); @@ -613,27 +641,36 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, ©Region, /*insertPt=*/{}, {argType, argType}, {symLoc, symLoc}); firOpBuilder.setInsertionPointToEnd(copyEntryBlock); - auto addSymbol = [&](unsigned argIdx, bool force = false) { + auto addSymbol = [&](unsigned argIdx, const semantics::Symbol *symToMap, + bool force = false) { symExV.match( [&](const fir::MutableBoxValue &box) { symTable.addSymbol( - *sym, fir::substBase(box, copyRegion.getArgument(argIdx)), - force); + *symToMap, + fir::substBase(box, copyRegion.getArgument(argIdx)), force); }, [&](const auto &box) { - symTable.addSymbol(*sym, copyRegion.getArgument(argIdx), force); + symTable.addSymbol(*symToMap, copyRegion.getArgument(argIdx), + force); }); }; - addSymbol(0, true); + addSymbol(0, sym, true); lower::SymMapScope innerScope(symTable); - addSymbol(1); + addSymbol(1, symToPrivatize); auto ip = firOpBuilder.saveInsertionPoint(); - copyFirstPrivateSymbol(sym, &ip); - - firOpBuilder.create<mlir::omp::YieldOp>( - hsb.getAddr().getLoc(), symTable.shallowLookupSymbol(*sym).getAddr()); + copyFirstPrivateSymbol(symToPrivatize, &ip); + + if constexpr (std::is_same_v<OpType, mlir::omp::PrivateClauseOp>) { + firOpBuilder.create<mlir::omp::YieldOp>( + hsb.getAddr().getLoc(), + symTable.shallowLookupSymbol(*symToPrivatize).getAddr()); + } else { + firOpBuilder.create<fir::YieldOp>( + hsb.getAddr().getLoc(), + symTable.shallowLookupSymbol(*symToPrivatize).getAddr()); + } } return result; @@ -644,9 +681,22 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, clauseOps->privateVars.push_back(privVal); } - symToPrivatizer[sym] = privatizerOp; + if (symToPrivatize->HasLocalLocality()) + allPrivatizedSymbols.insert(symToPrivatize); } +template void +DataSharingProcessor::privatizeSymbol<mlir::omp::PrivateClauseOp, + mlir::omp::PrivateClauseOps>( + const semantics::Symbol *symToPrivatize, + mlir::omp::PrivateClauseOps *clauseOps); + +template void +DataSharingProcessor::privatizeSymbol<fir::LocalitySpecifierOp, + fir::LocalitySpecifierOperands>( + const semantics::Symbol *symToPrivatize, + fir::LocalitySpecifierOperands *clauseOps); + } // namespace omp } // namespace lower } // namespace Fortran diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h index 54a42fd199831..d3f543c3db75e 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h @@ -77,8 +77,6 @@ class DataSharingProcessor { llvm::SetVector<const semantics::Symbol *> preDeterminedSymbols; llvm::SetVector<const semantics::Symbol *> allPrivatizedSymbols; - llvm::DenseMap<const semantics::Symbol *, mlir::omp::PrivateClauseOp> - symToPrivatizer; lower::AbstractConverter &converter; semantics::SemanticsContext &semaCtx; fir::FirOpBuilder &firOpBuilder; @@ -105,8 +103,6 @@ class DataSharingProcessor { void collectImplicitSymbols(); void collectPreDeterminedSymbols(); void privatize(mlir::omp::PrivateClauseOps *clauseOps); - void doPrivatize(const semantics::Symbol *sym, - mlir::omp::PrivateClauseOps *clauseOps); void copyLastPrivatize(mlir::Operation *op); void insertLastPrivateCompare(mlir::Operation *op); void cloneSymbol(const semantics::Symbol *sym); @@ -125,6 +121,11 @@ class DataSharingProcessor { bool shouldCollectPreDeterminedSymbols, bool useDelayedPrivatization, lower::SymMap &symTable); + DataSharingProcessor(lower::AbstractConverter &converter, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + bool useDelayedPrivatization, lower::SymMap &symTable); + // Privatisation is split into two steps. // Step1 performs cloning of all privatisation clauses and copying for // firstprivates. Step1 is performed at the place where process/processStep1 @@ -151,6 +152,11 @@ class DataSharingProcessor { ? allPrivatizedSymbols.getArrayRef() : llvm::ArrayRef<const semantics::Symbol *>(); } + + template <typename OpType = mlir::omp::PrivateClauseOp, + typename OperandsStructType = mlir::omp::PrivateClauseOps> + void privatizeSymbol(const semantics::Symbol *symToPrivatize, + OperandsStructType *clauseOps); }; } // namespace omp diff --git a/flang/test/Lower/do_concurrent_delayed_locality.f90 b/flang/test/Lower/do_concurrent_delayed_locality.f90 new file mode 100644 index 0000000000000..9b234087ed4be --- /dev/null +++ b/flang/test/Lower/do_concurrent_delayed_locality.f90 @@ -0,0 +1,49 @@ +! RUN: %flang_fc1 -emit-hlfir -mmlir --openmp-enable-delayed-privatization-staging=true -o - %s | FileCheck %s + +subroutine do_concurrent_with_locality_specs + implicit none + integer :: i, local_var, local_init_var + + do concurrent (i=1:10) local(local_var) local_init(local_init_var) + if (i < 5) then + local_var = 42 + else + local_init_var = 84 + end if + end do +end subroutine + +! CHECK: fir.local {type = local_init} @[[LOCAL_INIT_SYM:.*]] : i32 copy { +! CHECK: ^bb0(%[[ORIG_VAL:.*]]: !fir.ref<i32>, %[[LOCAL_VAL:.*]]: !fir.ref<i32>): +! CHECK: %[[ORIG_VAL_LD:.*]] = fir.load %[[ORIG_VAL]] : !fir.ref<i32> +! CHECK: hlfir.assign %[[ORIG_VAL_LD]] to %[[LOCAL_VAL]] : i32, !fir.ref<i32> +! CHECK: fir.yield(%[[LOCAL_VAL]] : !fir.ref<i32>) +! CHECK: } + +! CHECK: fir.local {type = local} @[[LOCAL_SYM:.*]] : i32 + +! CHECK-LABEL: func.func @_QPdo_concurrent_with_locality_specs() { +! CHECK: %[[ORIG_LOCAL_INIT_ALLOC:.*]] = fir.alloca i32 {bindc_name = "local_init_var", {{.*}}} +! CHECK: %[[ORIG_LOCAL_INIT_DECL:.*]]:2 = hlfir.declare %[[ORIG_LOCAL_INIT_ALLOC]] + +! CHECK: %[[ORIG_LOCAL_ALLOC:.*]] = fir.alloca i32 {bindc_name = "local_var", {{.*}}} +! CHECK: %[[ORIG_LOCAL_DECL:.*]]:2 = hlfir.declare %[[ORIG_LOCAL_ALLOC]] + +! CHECK: fir.do_concurrent { +! CHECK: %[[IV_DECL:.*]]:2 = hlfir.declare %{{.*}} + +! CHECK: fir.do_concurrent.loop (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) local(@[[LOCAL_SYM]] %[[ORIG_LOCAL_DECL]]#0 -> %[[LOCAL_ARG:.*]], @[[LOCAL_INIT_SYM]] %[[ORIG_LOCAL_INIT_DECL]]#0 -> %[[LOCAL_INIT_ARG:.*]] : !fir.ref<i32>, !fir.ref<i32>) { +! CHECK: %[[LOCAL_DECL:.*]]:2 = hlfir.declare %[[LOCAL_ARG]] +! CHECK: %[[LOCAL_INIT_DECL:.*]]:2 = hlfir.declare %[[LOCAL_INIT_ARG]] + +! CHECK: fir.if %{{.*}} { +! CHECK: %[[C42:.*]] = arith.constant 42 : i32 +! CHECK: hlfir.assign %[[C42]] to %[[LOCAL_DECL]]#0 : i32, !fir.ref<i32> +! CHECK: } else { +! CHECK: %[[C84:.*]] = arith.constant 84 : i32 +! CHECK: hlfir.assign %[[C84]] to %[[LOCAL_INIT_DECL]]#0 : i32, !fir.ref<i32> +! CHECK: } +! CHECK: } +! CHECK: } +! CHECK: return +! CHECK: } >From 82e94aa9afe6e02fd66018587c764d769df07a17 Mon Sep 17 00:00:00 2001 From: ergawy <kareem.erg...@amd.com> Date: Wed, 7 May 2025 02:24:56 -0500 Subject: [PATCH 2/2] add todo --- flang/lib/Lower/Bridge.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index bf55402ec4714..8529bef5d9c5e 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2037,6 +2037,9 @@ class FirConverter : public Fortran::lower::AbstractConverter { fir::LocalitySpecifierOperands privateClauseOps; auto doConcurrentLoopOp = mlir::dyn_cast_if_present<fir::DoConcurrentLoopOp>(info.loopOp); + // TODO Promote to using `enableDelayedPrivatization` (which is enabled by + // default unlike the staging flag) once the implementation of this is more + // complete. bool useDelayedPriv = enableDelayedPrivatizationStaging && doConcurrentLoopOp; _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits