https://github.com/nicebert updated https://github.com/llvm/llvm-project/pull/178914
>From 6f5b121a24275a671e8027baa5a6e9196fd70fb5 Mon Sep 17 00:00:00 2001 From: Nicole Aschenbrenner <[email protected]> Date: Fri, 6 Feb 2026 01:52:29 -0600 Subject: [PATCH 1/2] [clang][OpenMP] Add negative flag variants for assume options Add -fno-openmp-assume-no-thread-state and -fno-openmp-assume-no-nested-parallelism flags to allow explicit disabling of these assumptions. Currently these flags only exist as MarshallingInfoNegativeFlag which doesn't generate separate OPT_* constants needed for driver code. Making them standalone flags enables proper last-wins behavior and allows them to be used in implication logic. --- clang/include/clang/Options/Options.td | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 421208a812bbc..7bbe833910096 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -3986,9 +3986,13 @@ def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume- def fopenmp_assume_no_thread_state : Flag<["-"], "fopenmp-assume-no-thread-state">, HelpText<"Assert no thread in a parallel region modifies an ICV">, MarshallingInfoFlag<LangOpts<"OpenMPNoThreadState">>; +def fno_openmp_assume_no_thread_state : Flag<["-"], "fno-openmp-assume-no-thread-state">, + HelpText<"Assert that a thread in a parallel region may modify an ICV">; def fopenmp_assume_no_nested_parallelism : Flag<["-"], "fopenmp-assume-no-nested-parallelism">, HelpText<"Assert no nested parallel regions in the GPU">, MarshallingInfoFlag<LangOpts<"OpenMPNoNestedParallelism">>; +def fno_openmp_assume_no_nested_parallelism : Flag<["-"], "fno-openmp-assume-no-nested-parallelism">, + HelpText<"Assert that a nested parallel region may be used in the GPU">; } // let Group = f_Group } // let Visibility = [ClangOption, CC1Option, FC1Option] >From debfa7485ce9530210565ea5e49e5becd92441e9 Mon Sep 17 00:00:00 2001 From: Nicole Aschenbrenner <[email protected]> Date: Fri, 6 Feb 2026 01:52:53 -0600 Subject: [PATCH 2/2] [clang][OpenMP] Add -fopenmp-target-fast convenience flag Add meta-flag that implies multiple GPU optimization flags: - -fopenmp-assume-no-thread-state - -fopenmp-assume-no-nested-parallelism The flag is automatically enabled by -Ofast and can be explicitly disabled with -fno-openmp-target-fast. Individual implied flags can be selectively overridden while keeping others enabled. --- clang/include/clang/Options/Options.td | 7 +++++ clang/lib/Driver/ToolChains/Clang.cpp | 18 +++++++++-- clang/test/Driver/openmp-target-fast-flag.c | 35 +++++++++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 clang/test/Driver/openmp-target-fast-flag.c diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 7bbe833910096..36f416d3fb210 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -4016,6 +4016,13 @@ def fopenmp_target_new_runtime : Flag<["-"], "fopenmp-target-new-runtime">, Group<f_Group>, Flags<[HelpHidden]>, Visibility<[ClangOption, CC1Option]>; def fno_openmp_target_new_runtime : Flag<["-"], "fno-openmp-target-new-runtime">, Group<f_Group>, Flags<[HelpHidden]>, Visibility<[ClangOption, CC1Option]>; +def fopenmp_target_fast : Flag<["-"], "fopenmp-target-fast">, + Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>, + Visibility<[ClangOption]>, + HelpText<"Assert common GPU usage patterns to enable OpenMP runtime optimizations">; +def fno_openmp_target_fast : Flag<["-"], "fno-openmp-target-fast">, + Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>, + Visibility<[ClangOption]>; defm openmp_optimistic_collapse : BoolFOption<"openmp-optimistic-collapse", LangOpts<"OpenMPOptimisticCollapse">, DefaultFalse, PosFlag<SetTrue, [], [ClangOption, CC1Option]>, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 0293b04217673..6967d1d7520bd 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6675,6 +6675,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_fno_offload_via_llvm, false) && (JA.isDeviceOffloading(Action::OFK_None) || JA.isDeviceOffloading(Action::OFK_OpenMP))) { + + // Determine if target-fast optimizations should be enabled + bool TargetFastUsed = + Args.hasFlag(options::OPT_fopenmp_target_fast, + options::OPT_fno_openmp_target_fast, OFastEnabled); switch (D.getOpenMPRuntime(Args)) { case Driver::OMPRT_OMP: case Driver::OMPRT_IOMP5: @@ -6725,10 +6730,19 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_fno_openmp_assume_threads_oversubscription, /*Default=*/false)) CmdArgs.push_back("-fopenmp-assume-threads-oversubscription"); - if (Args.hasArg(options::OPT_fopenmp_assume_no_thread_state)) + + // Handle -fopenmp-assume-no-thread-state (implied by target-fast) + if (Args.hasFlag(options::OPT_fopenmp_assume_no_thread_state, + options::OPT_fno_openmp_assume_no_thread_state, + /*Default=*/TargetFastUsed)) CmdArgs.push_back("-fopenmp-assume-no-thread-state"); - if (Args.hasArg(options::OPT_fopenmp_assume_no_nested_parallelism)) + + // Handle -fopenmp-assume-no-nested-parallelism (implied by target-fast) + if (Args.hasFlag(options::OPT_fopenmp_assume_no_nested_parallelism, + options::OPT_fno_openmp_assume_no_nested_parallelism, + /*Default=*/TargetFastUsed)) CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism"); + if (Args.hasArg(options::OPT_fopenmp_offload_mandatory)) CmdArgs.push_back("-fopenmp-offload-mandatory"); if (Args.hasArg(options::OPT_fopenmp_force_usm)) diff --git a/clang/test/Driver/openmp-target-fast-flag.c b/clang/test/Driver/openmp-target-fast-flag.c new file mode 100644 index 0000000000000..4ded4be341801 --- /dev/null +++ b/clang/test/Driver/openmp-target-fast-flag.c @@ -0,0 +1,35 @@ +// REQUIRES: x86-registered-target, amdgpu-registered-target + +// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib %s -O0 2>&1 \ +// RUN: | FileCheck -check-prefixes=DefaultTFast,DefaultTState,DefaultNoNestParallel %s + +// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -O0 -fopenmp-target-fast %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=TState,NestParallel %s + +// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -O3 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=O3,DefaultTFast,DefaultTState,DefaultNoNestParallel %s + +// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -O3 -fno-openmp-target-fast %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=O3,DefaultTState,DefaultNoNestParallel %s + +// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -Ofast %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=OFast,TState,NestParallel %s + +// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -Ofast -fno-openmp-target-fast %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=OFast,DefaultTState,DefaultNoNestParallel %s + +// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -O0 -fno-openmp-target-fast -fopenmp-target-fast %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=TState,NestParallel %s + +// O3: -O3 +// OFast: -Ofast + +// DefaultTFast-NOT: {{"-f(no-)?openmp-target-fast"}} + +// TState: "-fopenmp-assume-no-thread-state" +// TState-NOT: "-fno-openmp-assume-no-thread-state" +// DefaultTState-NOT: {{"-f(no-)?openmp-assume-no-thread-state"}} + +// NestParallel: "-fopenmp-assume-no-nested-parallelism" +// NestParallel-NOT: "-fno-openmp-assume-no-nested-parallelism" +// DefaultNoNestParallel-NOT: {{"-f(-no-)?openmp-assume-no-nested-parallelism"}} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
