https://github.com/jsjodin updated https://github.com/llvm/llvm-project/pull/206168
>From cd37f17795343ec8c579c6ebdd91a54e36bef10b Mon Sep 17 00:00:00 2001 From: Jan Leyonberg <[email protected]> Date: Fri, 26 Jun 2026 16:10:39 -0400 Subject: [PATCH 1/3] [CIR][OpenMP] Prevent HoistAllocas pass from hoisting above OpenMP regions This patch modifies the HoistAllocas pass to prevent it from hoisting allocas outside OpenMP regions, since this may break isolated from above requirements and affect privatization semantics. Assisted-by: Cursor / claude-4.8-opus-medium --- .../CIR/Dialect/Transforms/HoistAllocas.cpp | 26 ++++-- clang/test/CIR/Transforms/hoist-allocas.cir | 85 +++++++++++++++++++ 2 files changed, 105 insertions(+), 6 deletions(-) diff --git a/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp b/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp index 74b22faadc8ae..af682c159daa2 100644 --- a/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp +++ b/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp @@ -8,6 +8,7 @@ #include "PassDetail.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Support/LogicalResult.h" #include "mlir/Transforms/DialectConversion.h" @@ -33,18 +34,31 @@ struct HoistAllocasPass : public impl::HoistAllocasBase<HoistAllocasPass> { void runOnOperation() override; }; +// Find the block that an alloca should be hoisted into. Allocas are normally +// hoisted to the entry block of the enclosing function. However, an alloca may +// be nested inside an OpenMP region such as omp.parallel, omp.teams +// etc. Hoisting it out of these ops breaks the isolated from above requirement +// for omp.teams and it changes privatization semantics. +static mlir::Block *getHoistDestBlock(cir::AllocaOp alloca) { + mlir::Region *region = alloca->getParentRegion(); + while (true) { + mlir::Operation *parentOp = region->getParentOp(); + if (mlir::isa<cir::FuncOp>(parentOp) || + mlir::isa<mlir::omp::OutlineableOpenMPOpInterface>(parentOp)) + return ®ion->front(); + region = parentOp->getParentRegion(); + } +} + static void process(mlir::ModuleOp mod, cir::FuncOp func) { if (func.getRegion().empty()) return; - // Hoist all static allocas to the entry block. - mlir::Block &entryBlock = func.getRegion().front(); - mlir::Operation *insertPoint = &*entryBlock.begin(); - // Post-order is the default, but the code below requires it, so // let's not depend on the default staying that way. func.getBody().walk<mlir::WalkOrder::PostOrder>([&](cir::AllocaOp alloca) { - if (alloca->getBlock() == &entryBlock) + mlir::Block *destBlock = getHoistDestBlock(alloca); + if (alloca->getBlock() == destBlock) return; // Don't hoist allocas with dynamic alloca size. if (alloca.getDynAllocSize()) @@ -62,7 +76,7 @@ static void process(mlir::ModuleOp mod, cir::FuncOp func) { if (alloca.getConstant()) alloca.setConstant(false); - alloca->moveBefore(insertPoint); + alloca->moveBefore(&*destBlock->begin()); }); } diff --git a/clang/test/CIR/Transforms/hoist-allocas.cir b/clang/test/CIR/Transforms/hoist-allocas.cir index 6c71c00532f56..db04030412252 100644 --- a/clang/test/CIR/Transforms/hoist-allocas.cir +++ b/clang/test/CIR/Transforms/hoist-allocas.cir @@ -110,4 +110,89 @@ module { // CHECK-NEXT: } // CHECK-NEXT: cir.return // CHECK-NEXT: } + + cir.func @l4() { + omp.target kernel_type(generic) { + omp.parallel { + cir.scope { + %0 = cir.alloca "i" align(4) init : !cir.ptr<!s32i> + %1 = cir.const #cir.int<0> : !s32i + cir.store %1, %0 : !s32i, !cir.ptr<!s32i> + } + omp.terminator + } + omp.terminator + } + cir.return + } + // CHECK: cir.func{{.*}} @l4 + // CHECK-NEXT: omp.target kernel_type(generic) { + // CHECK-NEXT: omp.parallel { + // CHECK-NEXT: %[[I:.*]] = cir.alloca "i" align(4) init : !cir.ptr<!s32i> + // CHECK-NEXT: cir.scope { + // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i + // CHECK-NEXT: cir.store %[[ZERO]], %[[I]] : !s32i, !cir.ptr<!s32i> + // CHECK-NEXT: } + // CHECK-NEXT: omp.terminator + // CHECK-NEXT: } + // CHECK-NEXT: omp.terminator + // CHECK-NEXT: } + // CHECK-NEXT: cir.return + // CHECK-NEXT: } + + cir.func @l5() { + omp.target kernel_type(generic) { + cir.scope { + %0 = cir.alloca "i" align(4) init : !cir.ptr<!s32i> + %1 = cir.const #cir.int<0> : !s32i + cir.store %1, %0 : !s32i, !cir.ptr<!s32i> + } + omp.terminator + } + cir.return + } + // CHECK: cir.func{{.*}} @l5 + // CHECK-NEXT: omp.target kernel_type(generic) { + // CHECK-NEXT: %[[I:.*]] = cir.alloca "i" align(4) init : !cir.ptr<!s32i> + // CHECK-NEXT: cir.scope { + // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i + // CHECK-NEXT: cir.store %[[ZERO]], %[[I]] : !s32i, !cir.ptr<!s32i> + // CHECK-NEXT: } + // CHECK-NEXT: omp.terminator + // CHECK-NEXT: } + // CHECK-NEXT: cir.return + // CHECK-NEXT: } + + cir.func @l6() { + cir.scope { + %0 = cir.alloca "i" align(4) init : !cir.ptr<!s32i> + %1 = cir.const #cir.int<0> : !s32i + cir.store %1, %0 : !s32i, !cir.ptr<!s32i> + } + omp.target kernel_type(generic) { + cir.scope { + %2 = cir.alloca "j" align(4) init : !cir.ptr<!s32i> + %3 = cir.const #cir.int<0> : !s32i + cir.store %3, %2 : !s32i, !cir.ptr<!s32i> + } + omp.terminator + } + cir.return + } + // CHECK: cir.func{{.*}} @l6 + // CHECK-NEXT: %[[I:.*]] = cir.alloca "i" align(4) init : !cir.ptr<!s32i> + // CHECK-NEXT: cir.scope { + // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i + // CHECK-NEXT: cir.store %[[ZERO]], %[[I]] : !s32i, !cir.ptr<!s32i> + // CHECK-NEXT: } + // CHECK-NEXT: omp.target kernel_type(generic) { + // CHECK-NEXT: %[[J:.*]] = cir.alloca "j" align(4) init : !cir.ptr<!s32i> + // CHECK-NEXT: cir.scope { + // CHECK-NEXT: %[[ZERO2:.*]] = cir.const #cir.int<0> : !s32i + // CHECK-NEXT: cir.store %[[ZERO2]], %[[J]] : !s32i, !cir.ptr<!s32i> + // CHECK-NEXT: } + // CHECK-NEXT: omp.terminator + // CHECK-NEXT: } + // CHECK-NEXT: cir.return + // CHECK-NEXT: } } >From 158d8c87de0aed5b698126ce00b03478422256f1 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg <[email protected]> Date: Fri, 26 Jun 2026 16:55:19 -0400 Subject: [PATCH 2/3] Keep the order of the allocas intact --- .../CIR/Dialect/Transforms/HoistAllocas.cpp | 7 +++- clang/test/CIR/CodeGenOpenMP/omp-llvmir.c | 34 ++++++++----------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp b/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp index af682c159daa2..14f938c8bb0b5 100644 --- a/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp +++ b/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp @@ -54,6 +54,9 @@ static void process(mlir::ModuleOp mod, cir::FuncOp func) { if (func.getRegion().empty()) return; + // Keep track of destination so that the order of allocas is preserved. + llvm::DenseMap<mlir::Block *, mlir::Operation *> insertPoints; + // Post-order is the default, but the code below requires it, so // let's not depend on the default staying that way. func.getBody().walk<mlir::WalkOrder::PostOrder>([&](cir::AllocaOp alloca) { @@ -76,7 +79,9 @@ static void process(mlir::ModuleOp mod, cir::FuncOp func) { if (alloca.getConstant()) alloca.setConstant(false); - alloca->moveBefore(&*destBlock->begin()); + mlir::Operation *&insertPoint = + insertPoints.try_emplace(destBlock, &*destBlock->begin()).first->second; + alloca->moveBefore(insertPoint); }); } diff --git a/clang/test/CIR/CodeGenOpenMP/omp-llvmir.c b/clang/test/CIR/CodeGenOpenMP/omp-llvmir.c index a0343d567f938..1e74000c5173b 100644 --- a/clang/test/CIR/CodeGenOpenMP/omp-llvmir.c +++ b/clang/test/CIR/CodeGenOpenMP/omp-llvmir.c @@ -39,38 +39,34 @@ // CIR: } // LLVM-LABEL: define dso_local i32 @main() -// LLVM: %[[STRUCTARG:.*]] = alloca { ptr, ptr }, align 8 -// LLVM: %[[VAR1:.*]] = alloca i32, i64 1, align 4 -// LLVM: %[[VAR2:.*]] = alloca i32, i64 1, align 4 -// LLVM: %[[VAR3:.*]] = alloca i32, i64 1, align 4 +// LLVM: %[[STRUCTARG:.*]] = alloca { ptr }, align 8 +// LLVM: %[[RETVAL:.*]] = alloca i32, i64 1, align 4 +// LLVM: %[[J:.*]] = alloca i32, i64 1, align 4 // LLVM: br label %[[ENTRY:.*]] // LLVM: [[ENTRY]]: // LLVM: br label %[[OMP_PARALLEL:.*]] // LLVM: [[OMP_PARALLEL]]: -// LLVM: %[[GEP1:.*]] = getelementptr { ptr, ptr }, ptr %[[STRUCTARG]], i32 0, i32 0 -// LLVM: store ptr %[[VAR1]], ptr %[[GEP1]], align 8 -// LLVM: %[[GEP2:.*]] = getelementptr { ptr, ptr }, ptr %[[STRUCTARG]], i32 0, i32 1 -// LLVM: store ptr %[[VAR3]], ptr %[[GEP2]], align 8 +// LLVM: %[[GEP1:.*]] = getelementptr { ptr }, ptr %[[STRUCTARG]], i32 0, i32 0 +// LLVM: store ptr %[[J]], ptr %[[GEP1]], align 8 // LLVM: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @main..omp_par, ptr %[[STRUCTARG]]) // LLVM: br label %[[OMP_PAR_EXIT:.*]] // LLVM: [[OMP_PAR_EXIT]]: -// LLVM: store i32 0, ptr %[[VAR2]], align 4 -// LLVM: %[[LOAD:.*]] = load i32, ptr %[[VAR2]], align 4 +// LLVM: store i32 0, ptr %[[RETVAL]], align 4 +// LLVM: %[[LOAD:.*]] = load i32, ptr %[[RETVAL]], align 4 // LLVM: ret i32 %[[LOAD]] // LLVM-LABEL: define internal void @main..omp_par(ptr noalias %{{.*}}, ptr noalias %{{.*}}, ptr %{{.*}}) // LLVM: [[PAR_ENTRY:.*]]: -// LLVM: %[[GEP_A:.*]] = getelementptr { ptr, ptr }, ptr %{{.*}}, i32 0, i32 0 -// LLVM: %[[LOAD_A:.*]] = load ptr, ptr %[[GEP_A]], align 8 -// LLVM: %[[GEP_B:.*]] = getelementptr { ptr, ptr }, ptr %{{.*}}, i32 0, i32 1 -// LLVM: %[[LOAD_B:.*]] = load ptr, ptr %[[GEP_B]], align 8 +// LLVM: %[[GEP_J:.*]] = getelementptr { ptr }, ptr %{{.*}}, i32 0, i32 0 +// LLVM: %[[LOAD_J:.*]] = load ptr, ptr %[[GEP_J]], align 8 // LLVM: %[[TID_LOCAL:.*]] = alloca i32, align 4 // LLVM: %[[TID_VAL:.*]] = load i32, ptr %{{.*}}, align 4 // LLVM: store i32 %[[TID_VAL]], ptr %[[TID_LOCAL]], align 4 // LLVM: %{{.*}} = load i32, ptr %[[TID_LOCAL]], align 4 +// LLVM: %[[I:.*]] = alloca i32, i64 1, align 4 // LLVM: br label %[[AFTER_ALLOCA:.*]] // LLVM: [[AFTER_ALLOCA]]: @@ -83,11 +79,11 @@ // LLVM: br label %[[PAR_REGION2:.*]] // LLVM: [[PAR_REGION2]]: -// LLVM: store i32 0, ptr %[[LOAD_A]], align 4 +// LLVM: store i32 0, ptr %[[I]], align 4 // LLVM: br label %[[PAR_REGION3:.*]] // LLVM: [[PAR_REGION3]]: -// LLVM: %[[I_LOAD:.*]] = load i32, ptr %[[LOAD_A]], align 4 +// LLVM: %[[I_LOAD:.*]] = load i32, ptr %[[I]], align 4 // LLVM: %[[CMP:.*]] = icmp slt i32 %[[I_LOAD]], 10000 // LLVM: br i1 %[[CMP]], label %[[PAR_REGION4:.*]], label %[[PAR_REGION6:.*]] @@ -107,13 +103,13 @@ // LLVM: br label %[[EXIT_STUB:.*]] // LLVM: [[PAR_REGION4]]: -// LLVM: store i32 0, ptr %[[LOAD_B]], align 4 +// LLVM: store i32 0, ptr %[[LOAD_J]], align 4 // LLVM: br label %[[PAR_REGION5:.*]] // LLVM: [[PAR_REGION5]]: -// LLVM: %[[I_LOAD2:.*]] = load i32, ptr %[[LOAD_A]], align 4 +// LLVM: %[[I_LOAD2:.*]] = load i32, ptr %[[I]], align 4 // LLVM: %[[ADD:.*]] = add nsw i32 %[[I_LOAD2]], 1 -// LLVM: store i32 %[[ADD]], ptr %[[LOAD_A]], align 4 +// LLVM: store i32 %[[ADD]], ptr %[[I]], align 4 // LLVM: br label %[[PAR_REGION3]] // LLVM: [[EXIT_STUB]]: >From f96584c49c4c88740b80d911ca257f6587119be1 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg <[email protected]> Date: Sat, 27 Jun 2026 12:35:03 -0400 Subject: [PATCH 3/3] Add comment and make check more general. --- clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp b/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp index 14f938c8bb0b5..5c41541ec76bf 100644 --- a/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp +++ b/clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp @@ -43,7 +43,10 @@ static mlir::Block *getHoistDestBlock(cir::AllocaOp alloca) { mlir::Region *region = alloca->getParentRegion(); while (true) { mlir::Operation *parentOp = region->getParentOp(); - if (mlir::isa<cir::FuncOp>(parentOp) || + + // Note: We may want some kind of interface in the future for blocking + // alloca hoisting since other dialects may have similar restrictions. + if (parentOp->hasTrait<mlir::OpTrait::IsIsolatedFromAbove>() || mlir::isa<mlir::omp::OutlineableOpenMPOpInterface>(parentOp)) return ®ion->front(); region = parentOp->getParentRegion(); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
