https://github.com/nicebert updated 
https://github.com/llvm/llvm-project/pull/178914

>From 6f5b121a24275a671e8027baa5a6e9196fd70fb5 Mon Sep 17 00:00:00 2001
From: Nicole Aschenbrenner <[email protected]>
Date: Fri, 6 Feb 2026 01:52:29 -0600
Subject: [PATCH 1/2] [clang][OpenMP] Add negative flag variants for assume
 options

Add -fno-openmp-assume-no-thread-state and
-fno-openmp-assume-no-nested-parallelism flags to allow explicit
disabling of these assumptions.

Currently these flags only exist as MarshallingInfoNegativeFlag
which doesn't generate separate OPT_* constants needed for driver
code. Making them standalone flags enables proper last-wins behavior
and allows them to be used in implication logic.
---
 clang/include/clang/Options/Options.td | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/clang/include/clang/Options/Options.td 
b/clang/include/clang/Options/Options.td
index 421208a812bbc..7bbe833910096 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -3986,9 +3986,13 @@ def fno_openmp_assume_threads_oversubscription : 
Flag<["-"], "fno-openmp-assume-
 def fopenmp_assume_no_thread_state : Flag<["-"], 
"fopenmp-assume-no-thread-state">,
   HelpText<"Assert no thread in a parallel region modifies an ICV">,
   MarshallingInfoFlag<LangOpts<"OpenMPNoThreadState">>;
+def fno_openmp_assume_no_thread_state : Flag<["-"], 
"fno-openmp-assume-no-thread-state">,
+  HelpText<"Assert that a thread in a parallel region may modify an ICV">;
 def fopenmp_assume_no_nested_parallelism : Flag<["-"], 
"fopenmp-assume-no-nested-parallelism">,
   HelpText<"Assert no nested parallel regions in the GPU">,
   MarshallingInfoFlag<LangOpts<"OpenMPNoNestedParallelism">>;
+def fno_openmp_assume_no_nested_parallelism : Flag<["-"], 
"fno-openmp-assume-no-nested-parallelism">,
+  HelpText<"Assert that a nested parallel region may be used in the GPU">;
 
 } // let Group = f_Group
 } // let Visibility = [ClangOption, CC1Option, FC1Option]

>From debfa7485ce9530210565ea5e49e5becd92441e9 Mon Sep 17 00:00:00 2001
From: Nicole Aschenbrenner <[email protected]>
Date: Fri, 6 Feb 2026 01:52:53 -0600
Subject: [PATCH 2/2] [clang][OpenMP] Add -fopenmp-target-fast convenience flag

Add meta-flag that implies multiple GPU optimization flags:
- -fopenmp-assume-no-thread-state
- -fopenmp-assume-no-nested-parallelism

The flag is automatically enabled by -Ofast and can be explicitly
disabled with -fno-openmp-target-fast. Individual implied flags can
be selectively overridden while keeping others enabled.
---
 clang/include/clang/Options/Options.td      |  7 +++++
 clang/lib/Driver/ToolChains/Clang.cpp       | 18 +++++++++--
 clang/test/Driver/openmp-target-fast-flag.c | 35 +++++++++++++++++++++
 3 files changed, 58 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/Driver/openmp-target-fast-flag.c

diff --git a/clang/include/clang/Options/Options.td 
b/clang/include/clang/Options/Options.td
index 7bbe833910096..36f416d3fb210 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -4016,6 +4016,13 @@ def fopenmp_target_new_runtime : Flag<["-"], 
"fopenmp-target-new-runtime">,
   Group<f_Group>, Flags<[HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
 def fno_openmp_target_new_runtime : Flag<["-"], 
"fno-openmp-target-new-runtime">,
   Group<f_Group>, Flags<[HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
+def fopenmp_target_fast : Flag<["-"], "fopenmp-target-fast">,
+  Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>,
+  Visibility<[ClangOption]>,
+  HelpText<"Assert common GPU usage patterns to enable OpenMP runtime 
optimizations">;
+def fno_openmp_target_fast : Flag<["-"], "fno-openmp-target-fast">,
+  Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>,
+  Visibility<[ClangOption]>;
 defm openmp_optimistic_collapse : BoolFOption<"openmp-optimistic-collapse",
   LangOpts<"OpenMPOptimisticCollapse">, DefaultFalse,
   PosFlag<SetTrue, [], [ClangOption, CC1Option]>,
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index 0293b04217673..6967d1d7520bd 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6675,6 +6675,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction 
&JA,
                     options::OPT_fno_offload_via_llvm, false) &&
       (JA.isDeviceOffloading(Action::OFK_None) ||
        JA.isDeviceOffloading(Action::OFK_OpenMP))) {
+
+    // Determine if target-fast optimizations should be enabled
+    bool TargetFastUsed =
+        Args.hasFlag(options::OPT_fopenmp_target_fast,
+                     options::OPT_fno_openmp_target_fast, OFastEnabled);
     switch (D.getOpenMPRuntime(Args)) {
     case Driver::OMPRT_OMP:
     case Driver::OMPRT_IOMP5:
@@ -6725,10 +6730,19 @@ void Clang::ConstructJob(Compilation &C, const 
JobAction &JA,
                        options::OPT_fno_openmp_assume_threads_oversubscription,
                        /*Default=*/false))
         CmdArgs.push_back("-fopenmp-assume-threads-oversubscription");
-      if (Args.hasArg(options::OPT_fopenmp_assume_no_thread_state))
+
+      // Handle -fopenmp-assume-no-thread-state (implied by target-fast)
+      if (Args.hasFlag(options::OPT_fopenmp_assume_no_thread_state,
+                       options::OPT_fno_openmp_assume_no_thread_state,
+                       /*Default=*/TargetFastUsed))
         CmdArgs.push_back("-fopenmp-assume-no-thread-state");
-      if (Args.hasArg(options::OPT_fopenmp_assume_no_nested_parallelism))
+
+      // Handle -fopenmp-assume-no-nested-parallelism (implied by target-fast)
+      if (Args.hasFlag(options::OPT_fopenmp_assume_no_nested_parallelism,
+                       options::OPT_fno_openmp_assume_no_nested_parallelism,
+                       /*Default=*/TargetFastUsed))
         CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism");
+
       if (Args.hasArg(options::OPT_fopenmp_offload_mandatory))
         CmdArgs.push_back("-fopenmp-offload-mandatory");
       if (Args.hasArg(options::OPT_fopenmp_force_usm))
diff --git a/clang/test/Driver/openmp-target-fast-flag.c 
b/clang/test/Driver/openmp-target-fast-flag.c
new file mode 100644
index 0000000000000..4ded4be341801
--- /dev/null
+++ b/clang/test/Driver/openmp-target-fast-flag.c
@@ -0,0 +1,35 @@
+// REQUIRES: x86-registered-target, amdgpu-registered-target
+
+// RUN:   %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib %s -O0 2>&1 \
+// RUN:   | FileCheck 
-check-prefixes=DefaultTFast,DefaultTState,DefaultNoNestParallel %s
+
+// RUN:   %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -O0 
-fopenmp-target-fast %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=TState,NestParallel %s
+
+// RUN:   %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -O3 %s 2>&1 \
+// RUN:   | FileCheck 
-check-prefixes=O3,DefaultTFast,DefaultTState,DefaultNoNestParallel %s
+
+// RUN:   %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -O3 
-fno-openmp-target-fast %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=O3,DefaultTState,DefaultNoNestParallel %s
+
+// RUN:   %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -Ofast %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=OFast,TState,NestParallel %s
+
+// RUN:   %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -Ofast 
-fno-openmp-target-fast %s 2>&1 \
+// RUN:   | FileCheck 
-check-prefixes=OFast,DefaultTState,DefaultNoNestParallel %s
+
+// RUN:   %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -O0 
-fno-openmp-target-fast -fopenmp-target-fast %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=TState,NestParallel %s
+
+// O3: -O3
+// OFast: -Ofast
+
+// DefaultTFast-NOT: {{"-f(no-)?openmp-target-fast"}}
+
+// TState: "-fopenmp-assume-no-thread-state"
+// TState-NOT: "-fno-openmp-assume-no-thread-state"
+// DefaultTState-NOT: {{"-f(no-)?openmp-assume-no-thread-state"}}
+
+// NestParallel: "-fopenmp-assume-no-nested-parallelism"
+// NestParallel-NOT: "-fno-openmp-assume-no-nested-parallelism"
+// DefaultNoNestParallel-NOT: 
{{"-f(-no-)?openmp-assume-no-nested-parallelism"}}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to