https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/138512
>From a4acb56894a9e5ec95f857a671e672b126e979c5 Mon Sep 17 00:00:00 2001 From: ergawy <kareem.erg...@amd.com> Date: Mon, 5 May 2025 06:50:49 -0500 Subject: [PATCH 1/2] [flang][fir] Basci lowering `fir.do_concurrent` locality specs to `fir.do_loop ... unordered` Extends lowering `fir.do_concurrent` to `fir.do_loop ... unordered` by adding support for locality specifiers. In particular, for `local` specifiers, a `fir.alloca` op is created using the localizer type. For `local_init` specifiers, the `copy` region is additionally inlined in the `do concurrent` loop's body. --- .../Transforms/SimplifyFIROperations.cpp | 58 +++++++++++++++++- .../do_concurrent-to-do_loop-unodered.fir | 61 +++++++++++++++++++ 2 files changed, 118 insertions(+), 1 deletion(-) diff --git a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp index 6d106046b70f2..e2dc4e14ff650 100644 --- a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp +++ b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp @@ -149,6 +149,17 @@ mlir::LogicalResult BoxTotalElementsConversion::matchAndRewrite( class DoConcurrentConversion : public mlir::OpRewritePattern<fir::DoConcurrentOp> { + /// Looks up from the operation from and returns the LocalitySpecifierOp with + /// name symbolName + static fir::LocalitySpecifierOp + findLocalizer(mlir::Operation *from, mlir::SymbolRefAttr symbolName) { + fir::LocalitySpecifierOp localizer = + mlir::SymbolTable::lookupNearestSymbolFrom<fir::LocalitySpecifierOp>( + from, symbolName); + assert(localizer && "localizer not found in the symbol table"); + return localizer; + } + public: using mlir::OpRewritePattern<fir::DoConcurrentOp>::OpRewritePattern; @@ -162,7 +173,52 @@ class DoConcurrentConversion assert(loop.getRegion().hasOneBlock()); mlir::Block &loopBlock = loop.getRegion().getBlocks().front(); - // Collect iteration variable(s) allocations do that we can move them + // Handle localization + if (!loop.getLocalVars().empty()) { + mlir::OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(&loop.getRegion().front()); + + std::optional<mlir::ArrayAttr> localSyms = loop.getLocalSyms(); + + for (auto [localVar, localArg, localizerSym] : llvm::zip_equal( + loop.getLocalVars(), loop.getRegionLocalArgs(), *localSyms)) { + mlir::SymbolRefAttr localizerName = + llvm::cast<mlir::SymbolRefAttr>(localizerSym); + fir::LocalitySpecifierOp localizer = findLocalizer(loop, localizerName); + + mlir::Value localAlloc = + rewriter.create<fir::AllocaOp>(loop.getLoc(), localizer.getType()); + + if (localizer.getLocalitySpecifierType() == + fir::LocalitySpecifierType::LocalInit) { + // It is reasonable to make this assumption since, at this stage, + // control-flow ops are not converted yet. Therefore, things like `if` + // conditions will still be represented by their encapsulating `fir` + // dialect ops. + assert(localizer.getCopyRegion().hasOneBlock() && + "Expected localizer to have a single block."); + mlir::Block *beforeLocalInit = rewriter.getInsertionBlock(); + mlir::Block *afterLocalInit = rewriter.splitBlock( + rewriter.getInsertionBlock(), rewriter.getInsertionPoint()); + rewriter.cloneRegionBefore(localizer.getCopyRegion(), afterLocalInit); + mlir::Block *copyRegionBody = beforeLocalInit->getNextNode(); + + rewriter.eraseOp(copyRegionBody->getTerminator()); + rewriter.mergeBlocks(afterLocalInit, copyRegionBody); + rewriter.mergeBlocks(copyRegionBody, beforeLocalInit, + {localVar, localArg}); + } + + rewriter.replaceAllUsesWith(localArg, localAlloc); + } + + loop.getRegion().front().eraseArguments(loop.getNumInductionVars(), + loop.getNumLocalOperands()); + loop.getLocalVarsMutable().clear(); + loop.setLocalSymsAttr(nullptr); + } + + // Collect iteration variable(s) allocations so that we can move them // outside the `fir.do_concurrent` wrapper. llvm::SmallVector<mlir::Operation *> opsToMove; for (mlir::Operation &op : llvm::drop_end(wrapperBlock)) diff --git a/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir b/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir index d2ceafdda5b22..d9ef36b175598 100644 --- a/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir +++ b/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir @@ -121,3 +121,64 @@ func.func @dc_2d_reduction(%i_lb: index, %i_ub: index, %i_st: index, // CHECK: } // CHECK: return // CHECK: } + +// ----- + +fir.local {type = local} @local_localizer : i32 + +fir.local {type = local_init} @local_init_localizer : i32 copy { +^bb0(%arg0: !fir.ref<i32>, %arg1: !fir.ref<i32>): + %0 = fir.load %arg0 : !fir.ref<i32> + fir.store %0 to %arg1 : !fir.ref<i32> + fir.yield(%arg1 : !fir.ref<i32>) +} + +func.func @do_concurrent_locality_specs() { + %3 = fir.alloca i32 {bindc_name = "local_init_var", uniq_name = "_QFdo_concurrentElocal_init_var"} + %4:2 = hlfir.declare %3 {uniq_name = "_QFdo_concurrentElocal_init_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + %5 = fir.alloca i32 {bindc_name = "local_var", uniq_name = "_QFdo_concurrentElocal_var"} + %6:2 = hlfir.declare %5 {uniq_name = "_QFdo_concurrentElocal_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + %c1 = arith.constant 1 : index + %c10 = arith.constant 1 : index + fir.do_concurrent { + %9 = fir.alloca i32 {bindc_name = "i"} + %10:2 = hlfir.declare %9 {uniq_name = "_QFdo_concurrentEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + fir.do_concurrent.loop (%arg0) = (%c1) to (%c10) step (%c1) local(@local_localizer %6#0 -> %arg1, @local_init_localizer %4#0 -> %arg2 : !fir.ref<i32>, !fir.ref<i32>) { + %11 = fir.convert %arg0 : (index) -> i32 + fir.store %11 to %10#0 : !fir.ref<i32> + %13:2 = hlfir.declare %arg1 {uniq_name = "_QFdo_concurrentElocal_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + %15:2 = hlfir.declare %arg2 {uniq_name = "_QFdo_concurrentElocal_init_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + %17 = fir.load %10#0 : !fir.ref<i32> + %c5_i32 = arith.constant 5 : i32 + %18 = arith.cmpi slt, %17, %c5_i32 : i32 + fir.if %18 { + %c42_i32 = arith.constant 42 : i32 + hlfir.assign %c42_i32 to %13#0 : i32, !fir.ref<i32> + } else { + %c84_i32 = arith.constant 84 : i32 + hlfir.assign %c84_i32 to %15#0 : i32, !fir.ref<i32> + } + } + } + return +} + +// CHECK-LABEL: func.func @do_concurrent_locality_specs() { +// CHECK: %[[LOC_INIT_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "{{.*}}Elocal_init_var"} +// CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered { +// Verify localization of the `local` var. +// CHECK: %[[PRIV_LOC_ALLOC:.*]] = fir.alloca i32 + +// Verify localization of the `local_init` var. +// CHECK: %[[PRIV_LOC_INIT_ALLOC:.*]] = fir.alloca i32 +// CHECK: %[[LOC_INIT_VAL:.*]] = fir.load %[[LOC_INIT_DECL]]#0 : !fir.ref<i32> +// CHECK: fir.store %[[LOC_INIT_VAL]] to %[[PRIV_LOC_INIT_ALLOC]] : !fir.ref<i32> + +// CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[PRIV_LOC_ALLOC]] +// CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[PRIV_LOC_INIT_ALLOC]] + +// CHECK: hlfir.assign %{{.*}} to %[[VAL_15]]#0 : i32, !fir.ref<i32> +// CHECK: hlfir.assign %{{.*}} to %[[VAL_16]]#0 : i32, !fir.ref<i32> +// CHECK: } +// CHECK: return +// CHECK: } >From f94ef63f4f64ab86abcdf0a61f4fff961c7f6135 Mon Sep 17 00:00:00 2001 From: ergawy <kareem.erg...@amd.com> Date: Wed, 7 May 2025 02:18:05 -0500 Subject: [PATCH 2/2] add todos --- flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp index e2dc4e14ff650..43ffed914ffe9 100644 --- a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp +++ b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp @@ -186,6 +186,8 @@ class DoConcurrentConversion llvm::cast<mlir::SymbolRefAttr>(localizerSym); fir::LocalitySpecifierOp localizer = findLocalizer(loop, localizerName); + // TODO Should this be a heap allocation instead? For now, we allocate + // on the stack for each loop iteration. mlir::Value localAlloc = rewriter.create<fir::AllocaOp>(loop.getLoc(), localizer.getType()); @@ -210,6 +212,9 @@ class DoConcurrentConversion } rewriter.replaceAllUsesWith(localArg, localAlloc); + + // TODO localizers with `init` and `dealloc` regions are not handled + // yet. } loop.getRegion().front().eraseArguments(loop.getNumInductionVars(), _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits