https://github.com/nicebert updated 
https://github.com/llvm/llvm-project/pull/178914

>From 5b683d32de54aaeae1629595a922a93579d0f484 Mon Sep 17 00:00:00 2001
From: Nicole Aschenbrenner <[email protected]>
Date: Tue, 27 Jan 2026 09:54:06 -0600
Subject: [PATCH 1/3] [clang][OpenMP] Add negative flag variants for assume
 options

Add -fno-openmp-assume-no-thread-state and
-fno-openmp-assume-no-nested-parallelism flags to allow explicit
disabling of these assumptions.

Currently these flags only exist as MarshallingInfoNegativeFlag
which doesn't generate separate OPT_* constants needed for driver
code. Making them standalone flags enables proper last-wins behavior
and allows them to be used in implication logic.
---
 clang/include/clang/Options/Options.td | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/clang/include/clang/Options/Options.td 
b/clang/include/clang/Options/Options.td
index 421208a812bbc..d1944ed1e46db 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -3983,12 +3983,30 @@ def fno_openmp_assume_teams_oversubscription : 
Flag<["-"], "fno-openmp-assume-te
   HelpText<"Do not assume teams oversubscription.">;
 def fno_openmp_assume_threads_oversubscription : Flag<["-"], 
"fno-openmp-assume-threads-oversubscription">,
   HelpText<"Do not assume threads oversubscription.">;
+def fno_openmp_assume_no_thread_state : Flag<["-"], 
"fno-openmp-assume-no-thread-state">,
+  HelpText<"Assert that a thread in a parallel region may modify an ICV">;
+def fopenmp_assume_no_nested_parallelism : Flag<["-"], 
"fopenmp-assume-no-nested-parallelism">,
+  HelpText<"Assert no nested parallel regions in the GPU">,
+  MarshallingInfoFlag<LangOpts<"OpenMPNoNestedParallelism">>;
+def fno_openmp_assume_no_nested_parallelism : Flag<["-"], 
"fno-openmp-assume-no-nested-parallelism">,
+  HelpText<"Assert that a nested parallel region may be used in the GPU">;
 def fopenmp_assume_no_thread_state : Flag<["-"], 
"fopenmp-assume-no-thread-state">,
   HelpText<"Assert no thread in a parallel region modifies an ICV">,
   MarshallingInfoFlag<LangOpts<"OpenMPNoThreadState">>;
+def fno_openmp_assume_no_thread_state : Flag<["-"], 
"fno-openmp-assume-no-thread-state">,
+  HelpText<"Assert that a thread in a parallel region may modify an ICV">;
+def fopenmp_assume_no_nested_parallelism : Flag<["-"], 
"fopenmp-assume-no-nested-parallelism">,
+  HelpText<"Assert no nested parallel regions in the GPU">,
+  MarshallingInfoFlag<LangOpts<"OpenMPNoNestedParallelism">>;
+def fno_openmp_assume_no_nested_parallelism : Flag<["-"], 
"fno-openmp-assume-no-nested-parallelism">,
+  HelpText<"Assert that a nested parallel region may be used in the GPU">;
+def fno_openmp_assume_no_thread_state : Flag<["-"], 
"fno-openmp-assume-no-thread-state">,
+  HelpText<"Assert that a thread in a parallel region may modify an ICV">;
 def fopenmp_assume_no_nested_parallelism : Flag<["-"], 
"fopenmp-assume-no-nested-parallelism">,
   HelpText<"Assert no nested parallel regions in the GPU">,
   MarshallingInfoFlag<LangOpts<"OpenMPNoNestedParallelism">>;
+def fno_openmp_assume_no_nested_parallelism : Flag<["-"], 
"fno-openmp-assume-no-nested-parallelism">,
+  HelpText<"Assert that a nested parallel region may be used in the GPU">;
 
 } // let Group = f_Group
 } // let Visibility = [ClangOption, CC1Option, FC1Option]

>From a59c5d60187d3619324213d959eec385dba97956 Mon Sep 17 00:00:00 2001
From: Nicole Aschenbrenner <[email protected]>
Date: Tue, 27 Jan 2026 10:06:12 -0600
Subject: [PATCH 2/3] [clang][OpenMP] Add -fopenmp-target-ignore-env-vars flag

Add a new flag to indicate that the OpenMP runtime can ignore
environment variables during code generation for GPU offload.

This allows the compiler to generate more efficient GPU code by
assuming environment variables won't affect runtime behavior, enabling
optimizations like skipping runtime checks and eliminating conditional
branches. The flag includes both positive and negative variants and
will be used by the -fopenmp-target-fast meta-flag.
---
 clang/include/clang/Basic/LangOptions.def |  1 +
 clang/include/clang/Options/Options.td    |  8 ++++++++
 clang/lib/Driver/ToolChains/Clang.cpp     | 11 +++++++++++
 3 files changed, 20 insertions(+)

diff --git a/clang/include/clang/Basic/LangOptions.def 
b/clang/include/clang/Basic/LangOptions.def
index ba12e522f331f..a2cc47a6528c4 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -239,6 +239,7 @@ LANGOPT(OpenMPThreadSubscription  , 1, 0, NotCompatible, 
"Assume work-shared loo
 LANGOPT(OpenMPTeamSubscription  , 1, 0, NotCompatible, "Assume distributed 
loops do not have more iterations than participating teams.")
 LANGOPT(OpenMPNoThreadState  , 1, 0, NotCompatible, "Assume that no thread in 
a parallel region will modify an ICV.")
 LANGOPT(OpenMPNoNestedParallelism  , 1, 0, NotCompatible, "Assume that no 
thread in a parallel region will encounter a parallel region")
+LANGOPT(OpenMPTargetIgnoreEnvVars, 1, 0, NotCompatible, "Assume that the 
OpenMP runtime can ignore environment variables during code generation for GPU 
offload")
 LANGOPT(OpenMPOffloadMandatory  , 1, 0, NotCompatible, "Assert that offloading 
is mandatory and do not create a host fallback.")
 LANGOPT(OpenMPForceUSM     , 1, 0, NotCompatible, "Enable OpenMP unified 
shared memory mode via compiler.")
 LANGOPT(NoGPULib  , 1, 0, NotCompatible, "Indicate a build without the 
standard GPU libraries.")
diff --git a/clang/include/clang/Options/Options.td 
b/clang/include/clang/Options/Options.td
index d1944ed1e46db..ec8339ca40eff 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -4030,6 +4030,14 @@ def fopenmp_target_new_runtime : Flag<["-"], 
"fopenmp-target-new-runtime">,
   Group<f_Group>, Flags<[HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
 def fno_openmp_target_new_runtime : Flag<["-"], 
"fno-openmp-target-new-runtime">,
   Group<f_Group>, Flags<[HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
+def fopenmp_target_ignore_env_vars : Flag<["-"], 
"fopenmp-target-ignore-env-vars">,
+  Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>,
+  Visibility<[ClangOption, CC1Option]>,
+  HelpText<"Assume that the OpenMP runtime can ignore environment variables 
during code generation for GPU offload">,
+  MarshallingInfoFlag<LangOpts<"OpenMPTargetIgnoreEnvVars">>;
+def fno_openmp_target_ignore_env_vars : Flag<["-"], 
"fno-openmp-target-ignore-env-vars">,
+  Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>,
+  Visibility<[ClangOption, CC1Option]>;
 defm openmp_optimistic_collapse : BoolFOption<"openmp-optimistic-collapse",
   LangOpts<"OpenMPOptimisticCollapse">, DefaultFalse,
   PosFlag<SetTrue, [], [ClangOption, CC1Option]>,
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index 0293b04217673..ff8c55a702c11 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6725,10 +6725,21 @@ void Clang::ConstructJob(Compilation &C, const 
JobAction &JA,
                        options::OPT_fno_openmp_assume_threads_oversubscription,
                        /*Default=*/false))
         CmdArgs.push_back("-fopenmp-assume-threads-oversubscription");
+      if (Args.hasArg(options::OPT_fopenmp_target_ignore_env_vars))
+        CmdArgs.push_back("-fopenmp-target-ignore-env-vars");
+      else if (Args.hasArg(options::OPT_fno_openmp_target_ignore_env_vars))
+        CmdArgs.push_back("-fno-openmp-target-ignore-env-vars");
+
       if (Args.hasArg(options::OPT_fopenmp_assume_no_thread_state))
         CmdArgs.push_back("-fopenmp-assume-no-thread-state");
+      else if (Args.hasArg(options::OPT_fno_openmp_assume_no_thread_state))
+        CmdArgs.push_back("-fno-openmp-assume-no-thread-state");
+
       if (Args.hasArg(options::OPT_fopenmp_assume_no_nested_parallelism))
         CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism");
+      else if 
(Args.hasArg(options::OPT_fno_openmp_assume_no_nested_parallelism))
+        CmdArgs.push_back("-fno-openmp-assume-no-nested-parallelism");
+
       if (Args.hasArg(options::OPT_fopenmp_offload_mandatory))
         CmdArgs.push_back("-fopenmp-offload-mandatory");
       if (Args.hasArg(options::OPT_fopenmp_force_usm))

>From 4b140f4586b400219c5c9fa88b1f57ce96a6cf66 Mon Sep 17 00:00:00 2001
From: Nicole Aschenbrenner <[email protected]>
Date: Wed, 28 Jan 2026 01:47:32 -0600
Subject: [PATCH 3/3] [clang][OpenMP] Add -fopenmp-target-fast convenience flag

Add meta-flag that implies multiple GPU optimization flags:
- -fopenmp-target-ignore-env-vars
- -fopenmp-assume-no-thread-state
- -fopenmp-assume-no-nested-parallelism

The flag is automatically enabled by -Ofast and can be explicitly
disabled with -fno-openmp-target-fast. Individual implied flags can
be selectively overridden while keeping others enabled.
---
 clang/include/clang/Options/Options.td      | 21 +++-----
 clang/lib/Driver/ToolChains/Clang.cpp       | 57 +++++++++++++++++----
 clang/test/Driver/openmp-target-fast-flag.c | 48 +++++++++++++++++
 3 files changed, 103 insertions(+), 23 deletions(-)
 create mode 100644 clang/test/Driver/openmp-target-fast-flag.c

diff --git a/clang/include/clang/Options/Options.td 
b/clang/include/clang/Options/Options.td
index ec8339ca40eff..2080cfa5fc091 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -3983,13 +3983,6 @@ def fno_openmp_assume_teams_oversubscription : 
Flag<["-"], "fno-openmp-assume-te
   HelpText<"Do not assume teams oversubscription.">;
 def fno_openmp_assume_threads_oversubscription : Flag<["-"], 
"fno-openmp-assume-threads-oversubscription">,
   HelpText<"Do not assume threads oversubscription.">;
-def fno_openmp_assume_no_thread_state : Flag<["-"], 
"fno-openmp-assume-no-thread-state">,
-  HelpText<"Assert that a thread in a parallel region may modify an ICV">;
-def fopenmp_assume_no_nested_parallelism : Flag<["-"], 
"fopenmp-assume-no-nested-parallelism">,
-  HelpText<"Assert no nested parallel regions in the GPU">,
-  MarshallingInfoFlag<LangOpts<"OpenMPNoNestedParallelism">>;
-def fno_openmp_assume_no_nested_parallelism : Flag<["-"], 
"fno-openmp-assume-no-nested-parallelism">,
-  HelpText<"Assert that a nested parallel region may be used in the GPU">;
 def fopenmp_assume_no_thread_state : Flag<["-"], 
"fopenmp-assume-no-thread-state">,
   HelpText<"Assert no thread in a parallel region modifies an ICV">,
   MarshallingInfoFlag<LangOpts<"OpenMPNoThreadState">>;
@@ -4000,13 +3993,6 @@ def fopenmp_assume_no_nested_parallelism : Flag<["-"], 
"fopenmp-assume-no-nested
   MarshallingInfoFlag<LangOpts<"OpenMPNoNestedParallelism">>;
 def fno_openmp_assume_no_nested_parallelism : Flag<["-"], 
"fno-openmp-assume-no-nested-parallelism">,
   HelpText<"Assert that a nested parallel region may be used in the GPU">;
-def fno_openmp_assume_no_thread_state : Flag<["-"], 
"fno-openmp-assume-no-thread-state">,
-  HelpText<"Assert that a thread in a parallel region may modify an ICV">;
-def fopenmp_assume_no_nested_parallelism : Flag<["-"], 
"fopenmp-assume-no-nested-parallelism">,
-  HelpText<"Assert no nested parallel regions in the GPU">,
-  MarshallingInfoFlag<LangOpts<"OpenMPNoNestedParallelism">>;
-def fno_openmp_assume_no_nested_parallelism : Flag<["-"], 
"fno-openmp-assume-no-nested-parallelism">,
-  HelpText<"Assert that a nested parallel region may be used in the GPU">;
 
 } // let Group = f_Group
 } // let Visibility = [ClangOption, CC1Option, FC1Option]
@@ -4038,6 +4024,13 @@ def fopenmp_target_ignore_env_vars : Flag<["-"], 
"fopenmp-target-ignore-env-vars
 def fno_openmp_target_ignore_env_vars : Flag<["-"], 
"fno-openmp-target-ignore-env-vars">,
   Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>,
   Visibility<[ClangOption, CC1Option]>;
+def fopenmp_target_fast : Flag<["-"], "fopenmp-target-fast">,
+  Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>,
+  Visibility<[ClangOption, CC1Option]>,
+  HelpText<"Convenience flag to enable aggressive OpenMP GPU optimizations">;
+def fno_openmp_target_fast : Flag<["-"], "fno-openmp-target-fast">,
+  Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>,
+  Visibility<[ClangOption, CC1Option]>;
 defm openmp_optimistic_collapse : BoolFOption<"openmp-optimistic-collapse",
   LangOpts<"OpenMPOptimisticCollapse">, DefaultFalse,
   PosFlag<SetTrue, [], [ClangOption, CC1Option]>,
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index ff8c55a702c11..06f8e1d3c834d 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6675,6 +6675,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction 
&JA,
                     options::OPT_fno_offload_via_llvm, false) &&
       (JA.isDeviceOffloading(Action::OFK_None) ||
        JA.isDeviceOffloading(Action::OFK_OpenMP))) {
+
+    // Determine if target-fast optimizations should be enabled
+    bool TargetFastUsed =
+        Args.hasFlag(options::OPT_fopenmp_target_fast,
+                     options::OPT_fno_openmp_target_fast, OFastEnabled);
     switch (D.getOpenMPRuntime(Args)) {
     case Driver::OMPRT_OMP:
     case Driver::OMPRT_IOMP5:
@@ -6725,20 +6730,54 @@ void Clang::ConstructJob(Compilation &C, const 
JobAction &JA,
                        options::OPT_fno_openmp_assume_threads_oversubscription,
                        /*Default=*/false))
         CmdArgs.push_back("-fopenmp-assume-threads-oversubscription");
-      if (Args.hasArg(options::OPT_fopenmp_target_ignore_env_vars))
+
+      // Handle -fopenmp-target-fast
+      if (Arg *A = Args.getLastArg(options::OPT_fopenmp_target_fast,
+                                   options::OPT_fno_openmp_target_fast)) {
+        if (A->getOption().matches(options::OPT_fopenmp_target_fast))
+          CmdArgs.push_back("-fopenmp-target-fast");
+        else
+          CmdArgs.push_back("-fno-openmp-target-fast");
+      } else if (OFastEnabled) {
+        CmdArgs.push_back("-fopenmp-target-fast");
+      }
+
+      // Handle -fopenmp-target-ignore-env-vars (implied by target-fast)
+      if (Arg *A =
+              Args.getLastArg(options::OPT_fopenmp_target_ignore_env_vars,
+                              options::OPT_fno_openmp_target_ignore_env_vars)) 
{
+        if 
(A->getOption().matches(options::OPT_fopenmp_target_ignore_env_vars))
+          CmdArgs.push_back("-fopenmp-target-ignore-env-vars");
+        else
+          CmdArgs.push_back("-fno-openmp-target-ignore-env-vars");
+      } else if (TargetFastUsed) {
         CmdArgs.push_back("-fopenmp-target-ignore-env-vars");
-      else if (Args.hasArg(options::OPT_fno_openmp_target_ignore_env_vars))
-        CmdArgs.push_back("-fno-openmp-target-ignore-env-vars");
+      }
 
-      if (Args.hasArg(options::OPT_fopenmp_assume_no_thread_state))
+      // Handle -fopenmp-assume-no-thread-state (implied by target-fast)
+      if (Arg *A =
+              Args.getLastArg(options::OPT_fopenmp_assume_no_thread_state,
+                              options::OPT_fno_openmp_assume_no_thread_state)) 
{
+        if 
(A->getOption().matches(options::OPT_fopenmp_assume_no_thread_state))
+          CmdArgs.push_back("-fopenmp-assume-no-thread-state");
+        else
+          CmdArgs.push_back("-fno-openmp-assume-no-thread-state");
+      } else if (TargetFastUsed) {
         CmdArgs.push_back("-fopenmp-assume-no-thread-state");
-      else if (Args.hasArg(options::OPT_fno_openmp_assume_no_thread_state))
-        CmdArgs.push_back("-fno-openmp-assume-no-thread-state");
+      }
 
-      if (Args.hasArg(options::OPT_fopenmp_assume_no_nested_parallelism))
+      // Handle -fopenmp-assume-no-nested-parallelism (implied by target-fast)
+      if (Arg *A = Args.getLastArg(
+              options::OPT_fopenmp_assume_no_nested_parallelism,
+              options::OPT_fno_openmp_assume_no_nested_parallelism)) {
+        if (A->getOption().matches(
+                options::OPT_fopenmp_assume_no_nested_parallelism))
+          CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism");
+        else
+          CmdArgs.push_back("-fno-openmp-assume-no-nested-parallelism");
+      } else if (TargetFastUsed) {
         CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism");
-      else if 
(Args.hasArg(options::OPT_fno_openmp_assume_no_nested_parallelism))
-        CmdArgs.push_back("-fno-openmp-assume-no-nested-parallelism");
+      }
 
       if (Args.hasArg(options::OPT_fopenmp_offload_mandatory))
         CmdArgs.push_back("-fopenmp-offload-mandatory");
diff --git a/clang/test/Driver/openmp-target-fast-flag.c 
b/clang/test/Driver/openmp-target-fast-flag.c
new file mode 100644
index 0000000000000..b098c39c9f19a
--- /dev/null
+++ b/clang/test/Driver/openmp-target-fast-flag.c
@@ -0,0 +1,48 @@
+// REQUIRES: x86-registered-target, amdgpu-registered-target
+
+// RUN:   %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a %s -O0 2>&1 \
+// RUN:   | FileCheck 
-check-prefixes=DefaultTFast,DefaultEnV,DefaultTState,DefaultNoNestParallel %s
+
+// RUN:   %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -O0 -fopenmp-target-fast %s 
2>&1 \
+// RUN:   | FileCheck -check-prefixes=TFast,EnV,TState,NestParallel %s
+
+// RUN:   %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -O3 %s 2>&1 \
+// RUN:   | FileCheck 
-check-prefixes=O3,DefaultTFast,DefaultEnV,DefaultTState,DefaultNoNestParallel 
%s
+
+// RUN:   %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -O3 -fno-openmp-target-fast %s 
2>&1 \
+// RUN:   | FileCheck 
-check-prefixes=O3,NoTFast,DefaultEnV,DefaultTState,DefaultNoNestParallel %s
+
+// RUN:   %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -Ofast %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=OFast,TFast,EnV,TState,NestParallel %s
+
+// RUN:   %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -Ofast -fno-openmp-target-fast 
%s 2>&1 \
+// RUN:   | FileCheck 
-check-prefixes=OFast,NoTFast,DefaultEnV,DefaultTState,DefaultNoNestParallel %s
+
+// RUN:   %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -fopenmp-target-fast 
-fno-openmp-target-ignore-env-vars %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=TFast,NoEnV,TState,NestParallel %s
+
+// RUN:   %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa 
-Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -O0 -fno-openmp-target-fast 
-fopenmp-target-fast %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=TFast,EnV,TState,NestParallel %s
+
+// O3: -O3
+// OFast: -Ofast
+
+// TFast: "-fopenmp-target-fast"
+// TFast-NOT: "-fno-openmp-target-fast"
+// NoTFast: "-fno-openmp-target-fast"
+// NoTFast-NOT: "-fopenmp-target-fast"
+// DefaultTFast-NOT: {{"-f(no-)?openmp-target-fast"}}
+
+// EnV: "-fopenmp-target-ignore-env-vars"
+// EnV-NOT: "-fno-openmp-target-ignore-env-vars"
+// NoEnV: "-fno-openmp-target-ignore-env-vars"
+// NoEnV-NOT: "-fopenmp-target-ignore-env-vars"
+// DefaultEnV-NOT: {{"-f(no-)?openmp-target-ignore-env-vars"}}
+
+// TState: "-fopenmp-assume-no-thread-state"
+// TState-NOT: "-fno-openmp-assume-no-thread-state"
+// DefaultTState-NOT: {{"-f(no-)?openmp-assume-no-thread-state"}}
+
+// NestParallel: "-fopenmp-assume-no-nested-parallelism"
+// NestParallel-NOT: "-fno-openmp-assume-no-nested-parallelism"
+// DefaultNoNestParallel-NOT: 
{{"-f(-no-)?openmp-assume-no-nested-parallelism"}}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to