https://github.com/nicebert updated https://github.com/llvm/llvm-project/pull/205325
>From c325575772f795c75c3447c0a7363efbea14872c Mon Sep 17 00:00:00 2001 From: Nicole Aschenbrenner <[email protected]> Date: Fri, 19 Jun 2026 05:50:00 -0500 Subject: [PATCH] [OpenMP][Clang] Add no-loop SPMD kernel promotion Promote target teams distribute parallel for kernels to no-loop SPMD mode when teams and threads oversubscription are assumed and no num_teams or reduction clause is present, mirroring Flang's MLIR promotion. Adds a C offload test alongside the existing Fortran one. --- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 23 +++++- offload/test/offloading/target-no-loop.c | 92 ++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 3 deletions(-) create mode 100644 offload/test/offloading/target-no-loop.c diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index dec3bca0ef3eb..999b7569d338a 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -701,6 +701,18 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, "Unknown programming model for OpenMP directive on NVPTX target."); } +/// Check whether a target kernel can be promoted to a "no-loop" SPMD kernel, +/// mirroring Flang's MLIR promotion path. +static bool canPromoteToNoLoop(const LangOptions &LangOpts, + const OMPExecutableDirective &D) { + OpenMPDirectiveKind DKind = D.getDirectiveKind(); + return (DKind == OMPD_target_teams_distribute_parallel_for || + DKind == OMPD_target_teams_distribute_parallel_for_simd) && + LangOpts.OpenMPTeamSubscription && LangOpts.OpenMPThreadSubscription && + !D.hasClausesOfKind<OMPNumTeamsClause>() && + !D.hasClausesOfKind<OMPReductionClause>(); +} + void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, @@ -746,9 +758,14 @@ void CGOpenMPRuntimeGPU::emitKernelInit(const OMPExecutableDirective &D, CodeGenFunction &CGF, EntryFunctionState &EST, bool IsSPMD) { llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs Attrs; - Attrs.ExecFlags = - IsSPMD ? llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD - : llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC; + if (IsSPMD && canPromoteToNoLoop(CGM.getLangOpts(), D)) + Attrs.ExecFlags = + llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD_NO_LOOP; + else + Attrs.ExecFlags = + IsSPMD ? llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD + : llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC; + computeMinAndMaxThreadsAndTeams(D, CGF, Attrs); CGBuilderTy &Bld = CGF.Builder; diff --git a/offload/test/offloading/target-no-loop.c b/offload/test/offloading/target-no-loop.c new file mode 100644 index 0000000000000..25e973cef68c8 --- /dev/null +++ b/offload/test/offloading/target-no-loop.c @@ -0,0 +1,92 @@ +// clang-format off +// C counterpart of fortran/target-no-loop.f90. + +// RUN: %libomptarget-compile-generic -O3 -fopenmp-assume-threads-oversubscription -fopenmp-assume-teams-oversubscription +// RUN: env LIBOMPTARGET_INFO=16 OMP_NUM_TEAMS=16 OMP_TEAMS_THREAD_LIMIT=16 %libomptarget-run-generic 2>&1 | %fcheck-generic +// REQUIRES: gpu +// XFAIL: intelgpu + +#include <stdio.h> + +static int check_errors(int *array) { + int errors = 0; + for (int i = 0; i < 1024; ++i) + if (array[i] != i + 1) + ++errors; + return errors; +} + +int main(void) { + int array[1024]; + int errors = 0; + int red; + + for (int i = 0; i < 1024; ++i) + array[i] = 1; + + // No-loop kernel +#pragma omp target teams distribute parallel for + for (int i = 0; i < 1024; ++i) + array[i] = i + 1; + errors += check_errors(array); + + // SPMD kernel (num_teams clause blocks promotion to no-loop) + for (int i = 0; i < 1024; ++i) + array[i] = 1; +#pragma omp target teams distribute parallel for num_teams(3) + for (int i = 0; i < 1024; ++i) + array[i] = i + 1; + errors += check_errors(array); + + // No-loop kernel + for (int i = 0; i < 1024; ++i) + array[i] = 1; +#pragma omp target teams distribute parallel for num_threads(64) + for (int i = 0; i < 1024; ++i) + array[i] = i + 1; + errors += check_errors(array); + + // SPMD kernel + for (int i = 0; i < 1024; ++i) + array[i] = 1; +#pragma omp target parallel for + for (int i = 0; i < 1024; ++i) + array[i] = i + 1; + errors += check_errors(array); + + // Generic kernel + for (int i = 0; i < 1024; ++i) + array[i] = 1; +#pragma omp target teams distribute + for (int i = 0; i < 1024; ++i) + array[i] = i + 1; + errors += check_errors(array); + + // SPMD kernel (reduction clause blocks promotion to no-loop) + for (int i = 0; i < 1024; ++i) + array[i] = 1; + red = 0; +#pragma omp target teams distribute parallel for reduction(+ : red) + for (int i = 0; i < 1024; ++i) + red += array[i]; + if (red != 1024) + ++errors; + + printf("number of errors: %d\n", errors); + + return 0; +} + +// CHECK: PluginInterface device {{[0-9]+}} info: Launching kernel {{.*}} SPMD-No-Loop mode +// CHECK: info: #Args: 2 Teams x Thrds: 64x 16 +// CHECK: PluginInterface device {{[0-9]+}} info: Launching kernel {{.*}} SPMD mode +// CHECK: info: #Args: 2 Teams x Thrds: 3x 16 {{.*}} +// CHECK: PluginInterface device {{[0-9]+}} info: Launching kernel {{.*}} SPMD-No-Loop mode +// CHECK: info: #Args: 2 Teams x Thrds: 64x 16 {{.*}} +// CHECK: PluginInterface device {{[0-9]+}} info: Launching kernel {{.*}} SPMD mode +// CHECK: info: #Args: 2 Teams x Thrds: 1x 16 +// CHECK: PluginInterface device {{[0-9]+}} info: Launching kernel {{.*}} Generic-SPMD mode +// CHECK: info: #Args: 2 Teams x Thrds: 16x 16 {{.*}} +// CHECK: PluginInterface device {{[0-9]+}} info: Launching kernel {{.*}} SPMD mode +// CHECK: info: #Args: 3 Teams x Thrds: 16x 16 {{.*}} +// CHECK: number of errors: 0 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
