[clang] [llvm] [OpenMP][Clang] Add no-loop SPMD kernel promotion (PR #205325)

Nicole Aschenbrenner via cfe-commits Thu, 25 Jun 2026 04:10:51 -0700

https://github.com/nicebert updated 
https://github.com/llvm/llvm-project/pull/205325


>From c325575772f795c75c3447c0a7363efbea14872c Mon Sep 17 00:00:00 2001
From: Nicole Aschenbrenner <[email protected]>
Date: Fri, 19 Jun 2026 05:50:00 -0500
Subject: [PATCH] [OpenMP][Clang] Add no-loop SPMD kernel promotion

Promote target teams distribute parallel for kernels to no-loop SPMD
mode when teams and threads oversubscription are assumed and no
num_teams or reduction clause is present, mirroring Flang's MLIR
promotion. Adds a C offload test alongside the existing Fortran one.
---
 clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 23 +++++-
 offload/test/offloading/target-no-loop.c | 92 ++++++++++++++++++++++++
 2 files changed, 112 insertions(+), 3 deletions(-)
 create mode 100644 offload/test/offloading/target-no-loop.c

diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index dec3bca0ef3eb..999b7569d338a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -701,6 +701,18 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
       "Unknown programming model for OpenMP directive on NVPTX target.");
 }
 
+/// Check whether a target kernel can be promoted to a "no-loop" SPMD kernel,
+/// mirroring Flang's MLIR promotion path.
+static bool canPromoteToNoLoop(const LangOptions &LangOpts,
+                               const OMPExecutableDirective &D) {
+  OpenMPDirectiveKind DKind = D.getDirectiveKind();
+  return (DKind == OMPD_target_teams_distribute_parallel_for ||
+          DKind == OMPD_target_teams_distribute_parallel_for_simd) &&
+         LangOpts.OpenMPTeamSubscription && LangOpts.OpenMPThreadSubscription 
&&
+         !D.hasClausesOfKind<OMPNumTeamsClause>() &&
+         !D.hasClausesOfKind<OMPReductionClause>();
+}
+
 void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
                                              StringRef ParentName,
                                              llvm::Function *&OutlinedFn,
@@ -746,9 +758,14 @@ void CGOpenMPRuntimeGPU::emitKernelInit(const 
OMPExecutableDirective &D,
                                         CodeGenFunction &CGF,
                                         EntryFunctionState &EST, bool IsSPMD) {
   llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs Attrs;
-  Attrs.ExecFlags =
-      IsSPMD ? llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD
-             : llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC;
+  if (IsSPMD && canPromoteToNoLoop(CGM.getLangOpts(), D))
+    Attrs.ExecFlags =
+        llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD_NO_LOOP;
+  else
+    Attrs.ExecFlags =
+        IsSPMD ? llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD
+               : llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC;
+
   computeMinAndMaxThreadsAndTeams(D, CGF, Attrs);
 
   CGBuilderTy &Bld = CGF.Builder;
diff --git a/offload/test/offloading/target-no-loop.c 
b/offload/test/offloading/target-no-loop.c
new file mode 100644
index 0000000000000..25e973cef68c8
--- /dev/null
+++ b/offload/test/offloading/target-no-loop.c
@@ -0,0 +1,92 @@
+// clang-format off
+// C counterpart of fortran/target-no-loop.f90.
+
+// RUN: %libomptarget-compile-generic -O3 
-fopenmp-assume-threads-oversubscription -fopenmp-assume-teams-oversubscription
+// RUN: env LIBOMPTARGET_INFO=16 OMP_NUM_TEAMS=16 OMP_TEAMS_THREAD_LIMIT=16 
%libomptarget-run-generic 2>&1 | %fcheck-generic
+// REQUIRES: gpu
+// XFAIL: intelgpu
+
+#include <stdio.h>
+
+static int check_errors(int *array) {
+  int errors = 0;
+  for (int i = 0; i < 1024; ++i)
+    if (array[i] != i + 1)
+      ++errors;
+  return errors;
+}
+
+int main(void) {
+  int array[1024];
+  int errors = 0;
+  int red;
+
+  for (int i = 0; i < 1024; ++i)
+    array[i] = 1;
+
+  // No-loop kernel
+#pragma omp target teams distribute parallel for
+  for (int i = 0; i < 1024; ++i)
+    array[i] = i + 1;
+  errors += check_errors(array);
+
+  // SPMD kernel (num_teams clause blocks promotion to no-loop)
+  for (int i = 0; i < 1024; ++i)
+    array[i] = 1;
+#pragma omp target teams distribute parallel for num_teams(3)
+  for (int i = 0; i < 1024; ++i)
+    array[i] = i + 1;
+  errors += check_errors(array);
+
+  // No-loop kernel
+  for (int i = 0; i < 1024; ++i)
+    array[i] = 1;
+#pragma omp target teams distribute parallel for num_threads(64)
+  for (int i = 0; i < 1024; ++i)
+    array[i] = i + 1;
+  errors += check_errors(array);
+
+  // SPMD kernel
+  for (int i = 0; i < 1024; ++i)
+    array[i] = 1;
+#pragma omp target parallel for
+  for (int i = 0; i < 1024; ++i)
+    array[i] = i + 1;
+  errors += check_errors(array);
+
+  // Generic kernel
+  for (int i = 0; i < 1024; ++i)
+    array[i] = 1;
+#pragma omp target teams distribute
+  for (int i = 0; i < 1024; ++i)
+    array[i] = i + 1;
+  errors += check_errors(array);
+
+  // SPMD kernel (reduction clause blocks promotion to no-loop)
+  for (int i = 0; i < 1024; ++i)
+    array[i] = 1;
+  red = 0;
+#pragma omp target teams distribute parallel for reduction(+ : red)
+  for (int i = 0; i < 1024; ++i)
+    red += array[i];
+  if (red != 1024)
+    ++errors;
+
+  printf("number of errors: %d\n", errors);
+
+  return 0;
+}
+
+// CHECK:  PluginInterface device {{[0-9]+}} info: Launching kernel {{.*}} 
SPMD-No-Loop mode
+// CHECK:  info: #Args: 2 Teams x Thrds:   64x  16
+// CHECK:  PluginInterface device {{[0-9]+}} info: Launching kernel {{.*}} 
SPMD mode
+// CHECK:  info: #Args: 2 Teams x Thrds:   3x  16 {{.*}}
+// CHECK:  PluginInterface device {{[0-9]+}} info: Launching kernel {{.*}} 
SPMD-No-Loop mode
+// CHECK:  info: #Args: 2 Teams x Thrds:   64x  16 {{.*}}
+// CHECK:  PluginInterface device {{[0-9]+}} info: Launching kernel {{.*}} 
SPMD mode
+// CHECK:  info: #Args: 2 Teams x Thrds:   1x  16
+// CHECK:  PluginInterface device {{[0-9]+}} info: Launching kernel {{.*}} 
Generic-SPMD mode
+// CHECK:  info: #Args: 2 Teams x Thrds:   16x  16 {{.*}}
+// CHECK:  PluginInterface device {{[0-9]+}} info: Launching kernel {{.*}} 
SPMD mode
+// CHECK:  info: #Args: 3 Teams x Thrds:   16x  16 {{.*}}
+// CHECK:  number of errors: 0

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [OpenMP][Clang] Add no-loop SPMD kernel promotion (PR #205325)

Reply via email to