https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/156837
>From c5dde7cbcece549d0996a6671d1ae1b53b9cd63b Mon Sep 17 00:00:00 2001 From: ergawy <kareem.erg...@amd.com> Date: Thu, 4 Sep 2025 01:06:21 -0500 Subject: [PATCH 1/3] [flang][OpenMP] Support multi-block reduction combiner regions on the GPU Fixes a bug related to insertion points when inlining multi-block combiner reduction regions. The IP at the end of the inlined region was not used resulting in emitting BBs with multiple terminators. --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 3 + .../omptarget-multi-block-reduction.mlir | 85 +++++++++++++++++++ 2 files changed, 88 insertions(+) create mode 100644 mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 220eee3cb8b087..b516c3c3f4efee 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -3507,6 +3507,8 @@ Expected<Function *> OpenMPIRBuilder::createReductionFunction( return AfterIP.takeError(); if (!Builder.GetInsertBlock()) return ReductionFunc; + + Builder.SetInsertPoint(AfterIP->getBlock(), AfterIP->getPoint()); Builder.CreateStore(Reduced, LHSPtr); } } @@ -3751,6 +3753,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU( RI.ReductionGen(Builder.saveIP(), RHSValue, LHSValue, Reduced); if (!AfterIP) return AfterIP.takeError(); + Builder.SetInsertPoint(AfterIP->getBlock(), AfterIP->getPoint()); Builder.CreateStore(Reduced, LHS, false); } } diff --git a/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir new file mode 100644 index 00000000000000..aaf06d2d0e0c22 --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir @@ -0,0 +1,85 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// Verifies that the IR builder can handle reductions with multi-block combiner +// regions on the GPU. + +module attributes {dlti.dl_spec = #dlti.dl_spec<"dlti.alloca_memory_space" = 5 : ui64, "dlti.global_memory_space" = 1 : ui64>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} { + llvm.func @bar() {} + llvm.func @baz() {} + + omp.declare_reduction @add_reduction_byref_box_5xf32 : !llvm.ptr alloc { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> : (i64) -> !llvm.ptr<5> + %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr + omp.yield(%2 : !llvm.ptr) + } init { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): + omp.yield(%arg1 : !llvm.ptr) + } combiner { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): + llvm.call @bar() : () -> () + llvm.br ^bb3 + + ^bb3: // pred: ^bb1 + llvm.call @baz() : () -> () + omp.yield(%arg0 : !llvm.ptr) + } + llvm.func @foo_() { + %c1 = llvm.mlir.constant(1 : i64) : i64 + %10 = llvm.alloca %c1 x !llvm.array<5 x f32> {bindc_name = "x"} : (i64) -> !llvm.ptr<5> + %11 = llvm.addrspacecast %10 : !llvm.ptr<5> to !llvm.ptr + %74 = omp.map.info var_ptr(%11 : !llvm.ptr, !llvm.array<5 x f32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "x"} + omp.target map_entries(%74 -> %arg0 : !llvm.ptr) { + %c1_2 = llvm.mlir.constant(1 : i32) : i32 + %c10 = llvm.mlir.constant(10 : i32) : i32 + omp.teams reduction(byref @add_reduction_byref_box_5xf32 %arg0 -> %arg2 : !llvm.ptr) { + omp.parallel { + omp.distribute { + omp.wsloop { + omp.loop_nest (%arg5) : i32 = (%c1_2) to (%c10) inclusive step (%c1_2) { + omp.yield + } + } {omp.composite} + } {omp.composite} + omp.terminator + } {omp.composite} + omp.terminator + } + omp.terminator + } + llvm.return + } +} + +// CHECK: call void @__kmpc_parallel_51({{.*}}, i32 1, i32 -1, i32 -1, +// CHECK-SAME: ptr @[[PAR_OUTLINED:.*]], ptr null, ptr %2, i64 1) + +// CHECK: define internal void @[[PAR_OUTLINED]]{{.*}} { +// CHECK: .omp.reduction.then: +// CHECK: br label %omp.reduction.nonatomic.body + +// CHECK: omp.reduction.nonatomic.body: +// CHECK: call void @bar() +// CHECK: br label %[[BODY_2ND_BB:.*]] + +// CHECK: [[BODY_2ND_BB]]: +// CHECK: call void @baz() +// CHECK: br label %[[CONT_BB:.*]] + +// CHECK: [[CONT_BB]]: +// CHECK: br label %.omp.reduction.done +// CHECK: } + +// CHECK: define internal void @"{{.*}}$reduction$reduction_func"(ptr noundef %0, ptr noundef %1) #0 { +// CHECK: br label %omp.reduction.nonatomic.body + +// CHECK: [[BODY_2ND_BB:.*]]: +// CHECK: call void @baz() +// CHECK: br label %omp.region.cont + + +// CHECK: omp.reduction.nonatomic.body: +// CHECK: call void @bar() +// CHECK: br label %[[BODY_2ND_BB]] + +// CHECK: } >From 03c698d73e985362b22fe0f066228eeebc4eba72 Mon Sep 17 00:00:00 2001 From: ergawy <kareem.erg...@amd.com> Date: Sat, 13 Sep 2025 06:44:15 -0500 Subject: [PATCH 2/3] review comments --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index b516c3c3f4efee..6d948f184392db 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -3508,7 +3508,7 @@ Expected<Function *> OpenMPIRBuilder::createReductionFunction( if (!Builder.GetInsertBlock()) return ReductionFunc; - Builder.SetInsertPoint(AfterIP->getBlock(), AfterIP->getPoint()); + Builder.restoreIP(*AfterIP); Builder.CreateStore(Reduced, LHSPtr); } } @@ -3753,7 +3753,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU( RI.ReductionGen(Builder.saveIP(), RHSValue, LHSValue, Reduced); if (!AfterIP) return AfterIP.takeError(); - Builder.SetInsertPoint(AfterIP->getBlock(), AfterIP->getPoint()); + Builder.restoreIP(*AfterIP); Builder.CreateStore(Reduced, LHS, false); } } >From ee4945d874adfa2f651e85b7296898e16305d878 Mon Sep 17 00:00:00 2001 From: ergawy <kareem.erg...@amd.com> Date: Tue, 16 Sep 2025 07:42:59 -0500 Subject: [PATCH 3/3] more test checks --- mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir index aaf06d2d0e0c22..87ff0ba7866481 100644 --- a/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir @@ -67,7 +67,9 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<"dlti.alloca_memory_space" = 5 : // CHECK: br label %[[CONT_BB:.*]] // CHECK: [[CONT_BB]]: -// CHECK: br label %.omp.reduction.done +// CHECK-NEXT: %[[RED_RHS:.*]] = phi ptr [ %final.rhs, %{{.*}} ] +// CHECK-NEXT: store ptr %[[RED_RHS]], ptr %{{.*}}, align 8 +// CHECK-NEXT: br label %.omp.reduction.done // CHECK: } // CHECK: define internal void @"{{.*}}$reduction$reduction_func"(ptr noundef %0, ptr noundef %1) #0 { _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits