https://github.com/jsjodin updated https://github.com/llvm/llvm-project/pull/206168
>From 1094e5eccd9a9669cc8ec58c747c1d839ff11d68 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg <[email protected]> Date: Fri, 26 Jun 2026 16:10:39 -0400 Subject: [PATCH 1/2] [CIR][OpenMP] Prevent HoistAllocas pass from hoisting above OpenMP regions This patch modifies the HoistAllocas pass to prevent it from hoisting allocas outside OpenMP regions, since this may break isolated from above requirements and affect privatization semantics. Assisted-by: Cursor / claude-4.8-opus-medium --- .../CIR/Dialect/Transforms/HoistAllocas.cpp | 26 ++++-- clang/test/CIR/Transforms/hoist-allocas.cir | 85 +++++++++++++++++++ 2 files changed, 105 insertions(+), 6 deletions(-) diff --git a/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp b/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp index 74b22faadc8ae..af682c159daa2 100644 --- a/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp +++ b/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp @@ -8,6 +8,7 @@ #include "PassDetail.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Support/LogicalResult.h" #include "mlir/Transforms/DialectConversion.h" @@ -33,18 +34,31 @@ struct HoistAllocasPass : public impl::HoistAllocasBase<HoistAllocasPass> { void runOnOperation() override; }; +// Find the block that an alloca should be hoisted into. Allocas are normally +// hoisted to the entry block of the enclosing function. However, an alloca may +// be nested inside an OpenMP region such as omp.parallel, omp.teams +// etc. Hoisting it out of these ops breaks the isolated from above requirement +// for omp.teams and it changes privatization semantics. +static mlir::Block *getHoistDestBlock(cir::AllocaOp alloca) { + mlir::Region *region = alloca->getParentRegion(); + while (true) { + mlir::Operation *parentOp = region->getParentOp(); + if (mlir::isa<cir::FuncOp>(parentOp) || + mlir::isa<mlir::omp::OutlineableOpenMPOpInterface>(parentOp)) + return ®ion->front(); + region = parentOp->getParentRegion(); + } +} + static void process(mlir::ModuleOp mod, cir::FuncOp func) { if (func.getRegion().empty()) return; - // Hoist all static allocas to the entry block. - mlir::Block &entryBlock = func.getRegion().front(); - mlir::Operation *insertPoint = &*entryBlock.begin(); - // Post-order is the default, but the code below requires it, so // let's not depend on the default staying that way. func.getBody().walk<mlir::WalkOrder::PostOrder>([&](cir::AllocaOp alloca) { - if (alloca->getBlock() == &entryBlock) + mlir::Block *destBlock = getHoistDestBlock(alloca); + if (alloca->getBlock() == destBlock) return; // Don't hoist allocas with dynamic alloca size. if (alloca.getDynAllocSize()) @@ -62,7 +76,7 @@ static void process(mlir::ModuleOp mod, cir::FuncOp func) { if (alloca.getConstant()) alloca.setConstant(false); - alloca->moveBefore(insertPoint); + alloca->moveBefore(&*destBlock->begin()); }); } diff --git a/clang/test/CIR/Transforms/hoist-allocas.cir b/clang/test/CIR/Transforms/hoist-allocas.cir index 6c71c00532f56..db04030412252 100644 --- a/clang/test/CIR/Transforms/hoist-allocas.cir +++ b/clang/test/CIR/Transforms/hoist-allocas.cir @@ -110,4 +110,89 @@ module { // CHECK-NEXT: } // CHECK-NEXT: cir.return // CHECK-NEXT: } + + cir.func @l4() { + omp.target kernel_type(generic) { + omp.parallel { + cir.scope { + %0 = cir.alloca "i" align(4) init : !cir.ptr<!s32i> + %1 = cir.const #cir.int<0> : !s32i + cir.store %1, %0 : !s32i, !cir.ptr<!s32i> + } + omp.terminator + } + omp.terminator + } + cir.return + } + // CHECK: cir.func{{.*}} @l4 + // CHECK-NEXT: omp.target kernel_type(generic) { + // CHECK-NEXT: omp.parallel { + // CHECK-NEXT: %[[I:.*]] = cir.alloca "i" align(4) init : !cir.ptr<!s32i> + // CHECK-NEXT: cir.scope { + // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i + // CHECK-NEXT: cir.store %[[ZERO]], %[[I]] : !s32i, !cir.ptr<!s32i> + // CHECK-NEXT: } + // CHECK-NEXT: omp.terminator + // CHECK-NEXT: } + // CHECK-NEXT: omp.terminator + // CHECK-NEXT: } + // CHECK-NEXT: cir.return + // CHECK-NEXT: } + + cir.func @l5() { + omp.target kernel_type(generic) { + cir.scope { + %0 = cir.alloca "i" align(4) init : !cir.ptr<!s32i> + %1 = cir.const #cir.int<0> : !s32i + cir.store %1, %0 : !s32i, !cir.ptr<!s32i> + } + omp.terminator + } + cir.return + } + // CHECK: cir.func{{.*}} @l5 + // CHECK-NEXT: omp.target kernel_type(generic) { + // CHECK-NEXT: %[[I:.*]] = cir.alloca "i" align(4) init : !cir.ptr<!s32i> + // CHECK-NEXT: cir.scope { + // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i + // CHECK-NEXT: cir.store %[[ZERO]], %[[I]] : !s32i, !cir.ptr<!s32i> + // CHECK-NEXT: } + // CHECK-NEXT: omp.terminator + // CHECK-NEXT: } + // CHECK-NEXT: cir.return + // CHECK-NEXT: } + + cir.func @l6() { + cir.scope { + %0 = cir.alloca "i" align(4) init : !cir.ptr<!s32i> + %1 = cir.const #cir.int<0> : !s32i + cir.store %1, %0 : !s32i, !cir.ptr<!s32i> + } + omp.target kernel_type(generic) { + cir.scope { + %2 = cir.alloca "j" align(4) init : !cir.ptr<!s32i> + %3 = cir.const #cir.int<0> : !s32i + cir.store %3, %2 : !s32i, !cir.ptr<!s32i> + } + omp.terminator + } + cir.return + } + // CHECK: cir.func{{.*}} @l6 + // CHECK-NEXT: %[[I:.*]] = cir.alloca "i" align(4) init : !cir.ptr<!s32i> + // CHECK-NEXT: cir.scope { + // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i + // CHECK-NEXT: cir.store %[[ZERO]], %[[I]] : !s32i, !cir.ptr<!s32i> + // CHECK-NEXT: } + // CHECK-NEXT: omp.target kernel_type(generic) { + // CHECK-NEXT: %[[J:.*]] = cir.alloca "j" align(4) init : !cir.ptr<!s32i> + // CHECK-NEXT: cir.scope { + // CHECK-NEXT: %[[ZERO2:.*]] = cir.const #cir.int<0> : !s32i + // CHECK-NEXT: cir.store %[[ZERO2]], %[[J]] : !s32i, !cir.ptr<!s32i> + // CHECK-NEXT: } + // CHECK-NEXT: omp.terminator + // CHECK-NEXT: } + // CHECK-NEXT: cir.return + // CHECK-NEXT: } } >From bfa73482d3bd47419b8a78f755db249d0bcf4702 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg <[email protected]> Date: Fri, 26 Jun 2026 16:55:19 -0400 Subject: [PATCH 2/2] Keep the order of the allocas intact --- .../CIR/Dialect/Transforms/HoistAllocas.cpp | 7 +++- clang/test/CIR/CodeGenOpenMP/omp-llvmir.c | 34 ++++++++----------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp b/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp index af682c159daa2..14f938c8bb0b5 100644 --- a/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp +++ b/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp @@ -54,6 +54,9 @@ static void process(mlir::ModuleOp mod, cir::FuncOp func) { if (func.getRegion().empty()) return; + // Keep track of destination so that the order of allocas is preserved. + llvm::DenseMap<mlir::Block *, mlir::Operation *> insertPoints; + // Post-order is the default, but the code below requires it, so // let's not depend on the default staying that way. func.getBody().walk<mlir::WalkOrder::PostOrder>([&](cir::AllocaOp alloca) { @@ -76,7 +79,9 @@ static void process(mlir::ModuleOp mod, cir::FuncOp func) { if (alloca.getConstant()) alloca.setConstant(false); - alloca->moveBefore(&*destBlock->begin()); + mlir::Operation *&insertPoint = + insertPoints.try_emplace(destBlock, &*destBlock->begin()).first->second; + alloca->moveBefore(insertPoint); }); } diff --git a/clang/test/CIR/CodeGenOpenMP/omp-llvmir.c b/clang/test/CIR/CodeGenOpenMP/omp-llvmir.c index a0343d567f938..1e74000c5173b 100644 --- a/clang/test/CIR/CodeGenOpenMP/omp-llvmir.c +++ b/clang/test/CIR/CodeGenOpenMP/omp-llvmir.c @@ -39,38 +39,34 @@ // CIR: } // LLVM-LABEL: define dso_local i32 @main() -// LLVM: %[[STRUCTARG:.*]] = alloca { ptr, ptr }, align 8 -// LLVM: %[[VAR1:.*]] = alloca i32, i64 1, align 4 -// LLVM: %[[VAR2:.*]] = alloca i32, i64 1, align 4 -// LLVM: %[[VAR3:.*]] = alloca i32, i64 1, align 4 +// LLVM: %[[STRUCTARG:.*]] = alloca { ptr }, align 8 +// LLVM: %[[RETVAL:.*]] = alloca i32, i64 1, align 4 +// LLVM: %[[J:.*]] = alloca i32, i64 1, align 4 // LLVM: br label %[[ENTRY:.*]] // LLVM: [[ENTRY]]: // LLVM: br label %[[OMP_PARALLEL:.*]] // LLVM: [[OMP_PARALLEL]]: -// LLVM: %[[GEP1:.*]] = getelementptr { ptr, ptr }, ptr %[[STRUCTARG]], i32 0, i32 0 -// LLVM: store ptr %[[VAR1]], ptr %[[GEP1]], align 8 -// LLVM: %[[GEP2:.*]] = getelementptr { ptr, ptr }, ptr %[[STRUCTARG]], i32 0, i32 1 -// LLVM: store ptr %[[VAR3]], ptr %[[GEP2]], align 8 +// LLVM: %[[GEP1:.*]] = getelementptr { ptr }, ptr %[[STRUCTARG]], i32 0, i32 0 +// LLVM: store ptr %[[J]], ptr %[[GEP1]], align 8 // LLVM: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @main..omp_par, ptr %[[STRUCTARG]]) // LLVM: br label %[[OMP_PAR_EXIT:.*]] // LLVM: [[OMP_PAR_EXIT]]: -// LLVM: store i32 0, ptr %[[VAR2]], align 4 -// LLVM: %[[LOAD:.*]] = load i32, ptr %[[VAR2]], align 4 +// LLVM: store i32 0, ptr %[[RETVAL]], align 4 +// LLVM: %[[LOAD:.*]] = load i32, ptr %[[RETVAL]], align 4 // LLVM: ret i32 %[[LOAD]] // LLVM-LABEL: define internal void @main..omp_par(ptr noalias %{{.*}}, ptr noalias %{{.*}}, ptr %{{.*}}) // LLVM: [[PAR_ENTRY:.*]]: -// LLVM: %[[GEP_A:.*]] = getelementptr { ptr, ptr }, ptr %{{.*}}, i32 0, i32 0 -// LLVM: %[[LOAD_A:.*]] = load ptr, ptr %[[GEP_A]], align 8 -// LLVM: %[[GEP_B:.*]] = getelementptr { ptr, ptr }, ptr %{{.*}}, i32 0, i32 1 -// LLVM: %[[LOAD_B:.*]] = load ptr, ptr %[[GEP_B]], align 8 +// LLVM: %[[GEP_J:.*]] = getelementptr { ptr }, ptr %{{.*}}, i32 0, i32 0 +// LLVM: %[[LOAD_J:.*]] = load ptr, ptr %[[GEP_J]], align 8 // LLVM: %[[TID_LOCAL:.*]] = alloca i32, align 4 // LLVM: %[[TID_VAL:.*]] = load i32, ptr %{{.*}}, align 4 // LLVM: store i32 %[[TID_VAL]], ptr %[[TID_LOCAL]], align 4 // LLVM: %{{.*}} = load i32, ptr %[[TID_LOCAL]], align 4 +// LLVM: %[[I:.*]] = alloca i32, i64 1, align 4 // LLVM: br label %[[AFTER_ALLOCA:.*]] // LLVM: [[AFTER_ALLOCA]]: @@ -83,11 +79,11 @@ // LLVM: br label %[[PAR_REGION2:.*]] // LLVM: [[PAR_REGION2]]: -// LLVM: store i32 0, ptr %[[LOAD_A]], align 4 +// LLVM: store i32 0, ptr %[[I]], align 4 // LLVM: br label %[[PAR_REGION3:.*]] // LLVM: [[PAR_REGION3]]: -// LLVM: %[[I_LOAD:.*]] = load i32, ptr %[[LOAD_A]], align 4 +// LLVM: %[[I_LOAD:.*]] = load i32, ptr %[[I]], align 4 // LLVM: %[[CMP:.*]] = icmp slt i32 %[[I_LOAD]], 10000 // LLVM: br i1 %[[CMP]], label %[[PAR_REGION4:.*]], label %[[PAR_REGION6:.*]] @@ -107,13 +103,13 @@ // LLVM: br label %[[EXIT_STUB:.*]] // LLVM: [[PAR_REGION4]]: -// LLVM: store i32 0, ptr %[[LOAD_B]], align 4 +// LLVM: store i32 0, ptr %[[LOAD_J]], align 4 // LLVM: br label %[[PAR_REGION5:.*]] // LLVM: [[PAR_REGION5]]: -// LLVM: %[[I_LOAD2:.*]] = load i32, ptr %[[LOAD_A]], align 4 +// LLVM: %[[I_LOAD2:.*]] = load i32, ptr %[[I]], align 4 // LLVM: %[[ADD:.*]] = add nsw i32 %[[I_LOAD2]], 1 -// LLVM: store i32 %[[ADD]], ptr %[[LOAD_A]], align 4 +// LLVM: store i32 %[[ADD]], ptr %[[I]], align 4 // LLVM: br label %[[PAR_REGION3]] // LLVM: [[EXIT_STUB]]: _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
