https://github.com/tblah updated 
https://github.com/llvm/llvm-project/pull/174588

>From 0e94907969d784a5783f026474be5e4a144b44b3 Mon Sep 17 00:00:00 2001
From: Tom Eccles <[email protected]>
Date: Tue, 6 Jan 2026 13:29:30 +0000
Subject: [PATCH] [mlir][OpenMP] Don't allocate task context structure if not
 needed

Don't allocate a task context structure if none of the private variables
needed it. This was already skipped when there were no private variables
at all.
---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 11 +++--
 .../LLVMIR/openmp-task-no-context-struct.mlir | 48 +++++++++++++++++++
 .../openmp-taskloop-no-context-struct.mlir    | 46 ++++++------------
 3 files changed, 70 insertions(+), 35 deletions(-)
 create mode 100644 mlir/test/Target/LLVMIR/openmp-task-no-context-struct.mlir

diff --git 
a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 
b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index cd210757261bf..43c503757ddf5 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2278,6 +2278,9 @@ void 
TaskContextStructManager::generateTaskContextStruct() {
     privateVarTypes.push_back(moduleTranslation.convertType(mlirType));
   }
 
+  if (privateVarTypes.empty())
+    return;
+
   structTy = llvm::StructType::get(moduleTranslation.getLLVMContext(),
                                    privateVarTypes);
 
@@ -2315,10 +2318,10 @@ SmallVector<llvm::Value *> 
TaskContextStructManager::createGEPsToPrivateVars(
 }
 
 void TaskContextStructManager::createGEPsToPrivateVars() {
-  if (!structPtr) {
+  if (!structPtr)
     assert(privateVarTypes.empty());
-    return;
-  }
+  // Still need to run createGEPsToPrivateVars to populate llvmPrivateVarGEPs
+  // with null values for skipped private decls
 
   llvmPrivateVarGEPs = createGEPsToPrivateVars(structPtr);
 }
@@ -2767,7 +2770,7 @@ convertOmpTaskloopOp(Operation &opInst, 
llvm::IRBuilderBase &builder,
   }
 
   llvm::OpenMPIRBuilder::TaskDupCallbackTy taskDupOrNull = nullptr;
-  if (!taskStructMgr.getLLVMPrivateVarGEPs().empty())
+  if (taskStructMgr.getStructPtr())
     taskDupOrNull = taskDupCB;
 
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
diff --git a/mlir/test/Target/LLVMIR/openmp-task-no-context-struct.mlir 
b/mlir/test/Target/LLVMIR/openmp-task-no-context-struct.mlir
new file mode 100644
index 0000000000000..32ccac8296696
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-task-no-context-struct.mlir
@@ -0,0 +1,48 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// Check that we don't allocate a task context structure when none of the 
private
+// vars need it.
+
+omp.private {type = private} @_QFtestEp_private_i32 : i32
+llvm.func @_QPtest() {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "p"} : (i64) -> !llvm.ptr
+  omp.task private(@_QFtestEp_private_i32 %1 -> %arg0 : !llvm.ptr) {
+    llvm.call @_QPdo_something(%arg0) {fastmathFlags = 
#llvm.fastmath<contract>} : (!llvm.ptr) -> ()
+    omp.terminator
+  }
+  llvm.return
+}
+llvm.func @_QPdo_something(!llvm.ptr) attributes {sym_visibility = "private"}
+
+// CHECK-LABEL: define void @_QPtest()
+// CHECK:         %[[VAL_0:.*]] = alloca i32, i64 1, align 4
+// CHECK:         br label %[[VAL_1:.*]]
+// CHECK:       entry:                                            ; preds = 
%[[VAL_2:.*]]
+// CHECK:         br label %[[VAL_3:.*]]
+// CHECK:       omp.private.init:                                 ; preds = 
%[[VAL_1]]
+// CHECK-NOT:     @malloc
+// CHECK:         br label %[[VAL_4:.*]]
+// CHECK:       omp.private.copy:                                 ; preds = 
%[[VAL_3]]
+// CHECK:         br label %[[VAL_5:.*]]
+// CHECK:       omp.task.start:                                   ; preds = 
%[[VAL_4]]
+// CHECK:         br label %[[VAL_6:.*]]
+// CHECK:       codeRepl:                                         ; preds = 
%[[VAL_5]]
+// CHECK:         %[[VAL_7:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK:         %[[VAL_8:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 
%[[VAL_7]], i32 1, i64 40, i64 0, ptr @_QPtest..omp_par)
+// CHECK:         %[[VAL_9:.*]] = call i32 @__kmpc_omp_task(ptr @1, i32 
%[[VAL_7]], ptr %[[VAL_8]])
+// CHECK:         br label %[[VAL_10:.*]]
+// CHECK:       task.exit:                                        ; preds = 
%[[VAL_6]]
+// CHECK:         ret void
+
+// CHECK-LABEL: define internal void @_QPtest..omp_par
+// CHECK:       task.alloca:
+// CHECK:         %[[VAL_11:.*]] = alloca i32, align 4
+// CHECK:         br label %[[VAL_12:.*]]
+// CHECK:       task.body:                                        ; preds = 
%[[VAL_13:.*]]
+// CHECK:         br label %[[VAL_14:.*]]
+// CHECK:       omp.task.region:                                  ; preds = 
%[[VAL_12]]
+// CHECK:         call void @_QPdo_something(ptr %[[VAL_11]])
+// CHECK:         br label %[[VAL_15:.*]]
+// CHECK:       omp.region.cont:                                  ; preds = 
%[[VAL_14]]
+// CHECK-NOT:     @free
diff --git a/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir 
b/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
index 43b50e7a3206c..69a2edb2a7c0f 100644
--- a/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-taskloop-no-context-struct.mlir
@@ -22,7 +22,8 @@ llvm.func @_QPtest() {
   llvm.return
 }
 // CHECK-LABEL: define void @_QPtest() {
-// CHECK:         %[[STRUCTARG:.*]] = alloca { i64, i64, i64, ptr }, align 8
+// No task context structure:
+// CHECK:         %[[STRUCTARG:.*]] = alloca { i64, i64, i64 }, align 8
 // CHECK:         %[[VAL_0:.*]] = alloca i32, i64 1, align 4
 // CHECK:         %[[VAL_1:.*]] = alloca i32, i64 1, align 4
 // CHECK:         %[[VAL_2:.*]] = alloca i32, i64 1, align 4
@@ -30,31 +31,28 @@ llvm.func @_QPtest() {
 // CHECK:       entry:                                            ; preds = 
%[[VAL_4:.*]]
 // CHECK:         br label %[[VAL_5:.*]]
 // CHECK:       omp.private.init:                                 ; preds = 
%[[VAL_3]]
-// CHECK:         %[[VAL_6:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr 
getelementptr ({}, ptr null, i32 1) to i64))
 // CHECK:         br label %[[VAL_7:.*]]
 // CHECK:       omp.private.copy:                                 ; preds = 
%[[VAL_5]]
 // CHECK:         br label %[[VAL_8:.*]]
 // CHECK:       omp.taskloop.start:                               ; preds = 
%[[VAL_7]]
 // CHECK:         br label %[[VAL_9:.*]]
 // CHECK:       codeRepl:                                         ; preds = 
%[[VAL_8]]
-// CHECK:         %[[VAL_10:.*]] = getelementptr { i64, i64, i64, ptr }, ptr 
%[[STRUCTARG]], i32 0, i32 0
+// CHECK:         %[[VAL_10:.*]] = getelementptr { i64, i64, i64 }, ptr 
%[[STRUCTARG]], i32 0, i32 0
 // CHECK:         store i64 1, ptr %[[VAL_10]], align 4
-// CHECK:         %[[VAL_11:.*]] = getelementptr { i64, i64, i64, ptr }, ptr 
%[[STRUCTARG]], i32 0, i32 1
+// CHECK:         %[[VAL_11:.*]] = getelementptr { i64, i64, i64 }, ptr 
%[[STRUCTARG]], i32 0, i32 1
 // CHECK:         store i64 20, ptr %[[VAL_11]], align 4
-// CHECK:         %[[VAL_12:.*]] = getelementptr { i64, i64, i64, ptr }, ptr 
%[[STRUCTARG]], i32 0, i32 2
+// CHECK:         %[[VAL_12:.*]] = getelementptr { i64, i64, i64 }, ptr 
%[[STRUCTARG]], i32 0, i32 2
 // CHECK:         store i64 1, ptr %[[VAL_12]], align 4
-// CHECK:         %[[VAL_13:.*]] = getelementptr { i64, i64, i64, ptr }, ptr 
%[[STRUCTARG]], i32 0, i32 3
-// CHECK:         store ptr %[[VAL_6]], ptr %[[VAL_13]], align 8
 // CHECK:         %[[VAL_14:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
 // CHECK:         call void @__kmpc_taskgroup(ptr @1, i32 %[[VAL_14]])
-// CHECK:         %[[VAL_15:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 
%[[VAL_14]], i32 1, i64 40, i64 32, ptr @_QPtest..omp_par)
+// CHECK:         %[[VAL_15:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 
%[[VAL_14]], i32 1, i64 40, i64 24, ptr @_QPtest..omp_par)
 // CHECK:         %[[VAL_16:.*]] = load ptr, ptr %[[VAL_15]], align 8
-// CHECK:         call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_16]], 
ptr align 1 %[[STRUCTARG]], i64 32, i1 false)
-// CHECK:         %[[VAL_17:.*]] = getelementptr { i64, i64, i64, ptr }, ptr 
%[[VAL_16]], i32 0, i32 0
-// CHECK:         %[[VAL_18:.*]] = getelementptr { i64, i64, i64, ptr }, ptr 
%[[VAL_16]], i32 0, i32 1
-// CHECK:         %[[VAL_19:.*]] = getelementptr { i64, i64, i64, ptr }, ptr 
%[[VAL_16]], i32 0, i32 2
+// CHECK:         call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[VAL_16]], 
ptr align 1 %[[STRUCTARG]], i64 24, i1 false)
+// CHECK:         %[[VAL_17:.*]] = getelementptr { i64, i64, i64 }, ptr 
%[[VAL_16]], i32 0, i32 0
+// CHECK:         %[[VAL_18:.*]] = getelementptr { i64, i64, i64 }, ptr 
%[[VAL_16]], i32 0, i32 1
+// CHECK:         %[[VAL_19:.*]] = getelementptr { i64, i64, i64 }, ptr 
%[[VAL_16]], i32 0, i32 2
 // CHECK:         %[[VAL_20:.*]] = load i64, ptr %[[VAL_19]], align 4
-// CHECK:         call void @__kmpc_taskloop(ptr @1, i32 %[[VAL_14]], ptr 
%[[VAL_15]], i32 1, ptr %[[VAL_17]], ptr %[[VAL_18]], i64 %[[VAL_20]], i32 1, 
i32 0, i64 0, ptr @omp_taskloop_dup)
+// CHECK:         call void @__kmpc_taskloop(ptr @1, i32 %[[VAL_14]], ptr 
%[[VAL_15]], i32 1, ptr %[[VAL_17]], ptr %[[VAL_18]], i64 %[[VAL_20]], i32 1, 
i32 0, i64 0, ptr null)
 // CHECK:         call void @__kmpc_end_taskgroup(ptr @1, i32 %[[VAL_14]])
 // CHECK:         br label %[[VAL_21:.*]]
 // CHECK:       taskloop.exit:                                    ; preds = 
%[[VAL_9]]
@@ -63,14 +61,12 @@ llvm.func @_QPtest() {
 // CHECK-LABEL: define internal void @_QPtest..omp_par
 // CHECK:       taskloop.alloca:
 // CHECK:         %[[VAL_22:.*]] = load ptr, ptr %[[VAL_23:.*]], align 8
-// CHECK:         %[[VAL_24:.*]] = getelementptr { i64, i64, i64, ptr }, ptr 
%[[VAL_22]], i32 0, i32 0
+// CHECK:         %[[VAL_24:.*]] = getelementptr { i64, i64, i64 }, ptr 
%[[VAL_22]], i32 0, i32 0
 // CHECK:         %[[VAL_25:.*]] = load i64, ptr %[[VAL_24]], align 4
-// CHECK:         %[[VAL_26:.*]] = getelementptr { i64, i64, i64, ptr }, ptr 
%[[VAL_22]], i32 0, i32 1
+// CHECK:         %[[VAL_26:.*]] = getelementptr { i64, i64, i64 }, ptr 
%[[VAL_22]], i32 0, i32 1
 // CHECK:         %[[VAL_27:.*]] = load i64, ptr %[[VAL_26]], align 4
-// CHECK:         %[[VAL_28:.*]] = getelementptr { i64, i64, i64, ptr }, ptr 
%[[VAL_22]], i32 0, i32 2
+// CHECK:         %[[VAL_28:.*]] = getelementptr { i64, i64, i64 }, ptr 
%[[VAL_22]], i32 0, i32 2
 // CHECK:         %[[VAL_29:.*]] = load i64, ptr %[[VAL_28]], align 4
-// CHECK:         %[[VAL_30:.*]] = getelementptr { i64, i64, i64, ptr }, ptr 
%[[VAL_22]], i32 0, i32 3
-// CHECK:         %[[VAL_31:.*]] = load ptr, ptr %[[VAL_30]], align 8, !align 
!1
 // CHECK:         %[[VAL_32:.*]] = alloca i32, align 4
 // CHECK:         %[[VAL_33:.*]] = alloca i32, align 4
 // CHECK:         %[[VAL_34:.*]] = alloca i32, align 4
@@ -97,7 +93,6 @@ llvm.func @_QPtest() {
 // CHECK:       omp_loop.after:                                   ; preds = 
%[[VAL_51]]
 // CHECK:         br label %[[VAL_53:.*]]
 // CHECK:       omp.region.cont:                                  ; preds = 
%[[VAL_52]]
-// CHECK:         tail call void @free(ptr %[[VAL_31]])
 // CHECK:         br label %[[VAL_54:.*]]
 // CHECK:       omp_loop.body:                                    ; preds = 
%[[VAL_48]]
 // CHECK:         %[[VAL_55:.*]] = mul i32 %[[VAL_46]], 1
@@ -114,15 +109,4 @@ llvm.func @_QPtest() {
 // CHECK:       taskloop.exit.exitStub:                           ; preds = 
%[[VAL_53]]
 // CHECK:         ret void
 
-// CHECK-LABEL: define internal void @omp_taskloop_dup(
-// CHECK:       entry:
-// CHECK:         %[[VAL_59:.*]] = getelementptr { %[[VAL_60:.*]], { i64, i64, 
i64, ptr } }, ptr %[[VAL_61:.*]], i32 0, i32 1
-// CHECK:         %[[VAL_62:.*]] = getelementptr { i64, i64, i64, ptr }, ptr 
%[[VAL_59]], i32 0, i32 3
-// CHECK:         %[[VAL_63:.*]] = getelementptr { %[[VAL_60]], { i64, i64, 
i64, ptr } }, ptr %[[VAL_64:.*]], i32 0, i32 1
-// CHECK:         %[[VAL_65:.*]] = getelementptr { i64, i64, i64, ptr }, ptr 
%[[VAL_63]], i32 0, i32 3
-// CHECK:         %[[VAL_66:.*]] = load ptr, ptr %[[VAL_65]], align 8
-// TODO: don't generate allocation for empty task context struct (for later 
patch)
-// CHECK:         %[[VAL_67:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr 
getelementptr ({}, ptr null, i32 1) to i64))
-// CHECK:         store ptr %[[VAL_67]], ptr %[[VAL_62]], align 8
-// CHECK:         ret void
-
+// CHECK-NOT: define internal void @omp_taskloop_dup

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to