https://github.com/nicebert updated https://github.com/llvm/llvm-project/pull/178914
>From 5b683d32de54aaeae1629595a922a93579d0f484 Mon Sep 17 00:00:00 2001 From: Nicole Aschenbrenner <[email protected]> Date: Tue, 27 Jan 2026 09:54:06 -0600 Subject: [PATCH 1/3] [clang][OpenMP] Add negative flag variants for assume options Add -fno-openmp-assume-no-thread-state and -fno-openmp-assume-no-nested-parallelism flags to allow explicit disabling of these assumptions. Currently these flags only exist as MarshallingInfoNegativeFlag which doesn't generate separate OPT_* constants needed for driver code. Making them standalone flags enables proper last-wins behavior and allows them to be used in implication logic. --- clang/include/clang/Options/Options.td | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 421208a812bbc..d1944ed1e46db 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -3983,12 +3983,30 @@ def fno_openmp_assume_teams_oversubscription : Flag<["-"], "fno-openmp-assume-te HelpText<"Do not assume teams oversubscription.">; def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">, HelpText<"Do not assume threads oversubscription.">; +def fno_openmp_assume_no_thread_state : Flag<["-"], "fno-openmp-assume-no-thread-state">, + HelpText<"Assert that a thread in a parallel region may modify an ICV">; +def fopenmp_assume_no_nested_parallelism : Flag<["-"], "fopenmp-assume-no-nested-parallelism">, + HelpText<"Assert no nested parallel regions in the GPU">, + MarshallingInfoFlag<LangOpts<"OpenMPNoNestedParallelism">>; +def fno_openmp_assume_no_nested_parallelism : Flag<["-"], "fno-openmp-assume-no-nested-parallelism">, + HelpText<"Assert that a nested parallel region may be used in the GPU">; def fopenmp_assume_no_thread_state : Flag<["-"], "fopenmp-assume-no-thread-state">, HelpText<"Assert no thread in a parallel region modifies an ICV">, MarshallingInfoFlag<LangOpts<"OpenMPNoThreadState">>; +def fno_openmp_assume_no_thread_state : Flag<["-"], "fno-openmp-assume-no-thread-state">, + HelpText<"Assert that a thread in a parallel region may modify an ICV">; +def fopenmp_assume_no_nested_parallelism : Flag<["-"], "fopenmp-assume-no-nested-parallelism">, + HelpText<"Assert no nested parallel regions in the GPU">, + MarshallingInfoFlag<LangOpts<"OpenMPNoNestedParallelism">>; +def fno_openmp_assume_no_nested_parallelism : Flag<["-"], "fno-openmp-assume-no-nested-parallelism">, + HelpText<"Assert that a nested parallel region may be used in the GPU">; +def fno_openmp_assume_no_thread_state : Flag<["-"], "fno-openmp-assume-no-thread-state">, + HelpText<"Assert that a thread in a parallel region may modify an ICV">; def fopenmp_assume_no_nested_parallelism : Flag<["-"], "fopenmp-assume-no-nested-parallelism">, HelpText<"Assert no nested parallel regions in the GPU">, MarshallingInfoFlag<LangOpts<"OpenMPNoNestedParallelism">>; +def fno_openmp_assume_no_nested_parallelism : Flag<["-"], "fno-openmp-assume-no-nested-parallelism">, + HelpText<"Assert that a nested parallel region may be used in the GPU">; } // let Group = f_Group } // let Visibility = [ClangOption, CC1Option, FC1Option] >From a59c5d60187d3619324213d959eec385dba97956 Mon Sep 17 00:00:00 2001 From: Nicole Aschenbrenner <[email protected]> Date: Tue, 27 Jan 2026 10:06:12 -0600 Subject: [PATCH 2/3] [clang][OpenMP] Add -fopenmp-target-ignore-env-vars flag Add a new flag to indicate that the OpenMP runtime can ignore environment variables during code generation for GPU offload. This allows the compiler to generate more efficient GPU code by assuming environment variables won't affect runtime behavior, enabling optimizations like skipping runtime checks and eliminating conditional branches. The flag includes both positive and negative variants and will be used by the -fopenmp-target-fast meta-flag. --- clang/include/clang/Basic/LangOptions.def | 1 + clang/include/clang/Options/Options.td | 8 ++++++++ clang/lib/Driver/ToolChains/Clang.cpp | 11 +++++++++++ 3 files changed, 20 insertions(+) diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index ba12e522f331f..a2cc47a6528c4 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -239,6 +239,7 @@ LANGOPT(OpenMPThreadSubscription , 1, 0, NotCompatible, "Assume work-shared loo LANGOPT(OpenMPTeamSubscription , 1, 0, NotCompatible, "Assume distributed loops do not have more iterations than participating teams.") LANGOPT(OpenMPNoThreadState , 1, 0, NotCompatible, "Assume that no thread in a parallel region will modify an ICV.") LANGOPT(OpenMPNoNestedParallelism , 1, 0, NotCompatible, "Assume that no thread in a parallel region will encounter a parallel region") +LANGOPT(OpenMPTargetIgnoreEnvVars, 1, 0, NotCompatible, "Assume that the OpenMP runtime can ignore environment variables during code generation for GPU offload") LANGOPT(OpenMPOffloadMandatory , 1, 0, NotCompatible, "Assert that offloading is mandatory and do not create a host fallback.") LANGOPT(OpenMPForceUSM , 1, 0, NotCompatible, "Enable OpenMP unified shared memory mode via compiler.") LANGOPT(NoGPULib , 1, 0, NotCompatible, "Indicate a build without the standard GPU libraries.") diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index d1944ed1e46db..ec8339ca40eff 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -4030,6 +4030,14 @@ def fopenmp_target_new_runtime : Flag<["-"], "fopenmp-target-new-runtime">, Group<f_Group>, Flags<[HelpHidden]>, Visibility<[ClangOption, CC1Option]>; def fno_openmp_target_new_runtime : Flag<["-"], "fno-openmp-target-new-runtime">, Group<f_Group>, Flags<[HelpHidden]>, Visibility<[ClangOption, CC1Option]>; +def fopenmp_target_ignore_env_vars : Flag<["-"], "fopenmp-target-ignore-env-vars">, + Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>, + Visibility<[ClangOption, CC1Option]>, + HelpText<"Assume that the OpenMP runtime can ignore environment variables during code generation for GPU offload">, + MarshallingInfoFlag<LangOpts<"OpenMPTargetIgnoreEnvVars">>; +def fno_openmp_target_ignore_env_vars : Flag<["-"], "fno-openmp-target-ignore-env-vars">, + Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>, + Visibility<[ClangOption, CC1Option]>; defm openmp_optimistic_collapse : BoolFOption<"openmp-optimistic-collapse", LangOpts<"OpenMPOptimisticCollapse">, DefaultFalse, PosFlag<SetTrue, [], [ClangOption, CC1Option]>, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 0293b04217673..ff8c55a702c11 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6725,10 +6725,21 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_fno_openmp_assume_threads_oversubscription, /*Default=*/false)) CmdArgs.push_back("-fopenmp-assume-threads-oversubscription"); + if (Args.hasArg(options::OPT_fopenmp_target_ignore_env_vars)) + CmdArgs.push_back("-fopenmp-target-ignore-env-vars"); + else if (Args.hasArg(options::OPT_fno_openmp_target_ignore_env_vars)) + CmdArgs.push_back("-fno-openmp-target-ignore-env-vars"); + if (Args.hasArg(options::OPT_fopenmp_assume_no_thread_state)) CmdArgs.push_back("-fopenmp-assume-no-thread-state"); + else if (Args.hasArg(options::OPT_fno_openmp_assume_no_thread_state)) + CmdArgs.push_back("-fno-openmp-assume-no-thread-state"); + if (Args.hasArg(options::OPT_fopenmp_assume_no_nested_parallelism)) CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism"); + else if (Args.hasArg(options::OPT_fno_openmp_assume_no_nested_parallelism)) + CmdArgs.push_back("-fno-openmp-assume-no-nested-parallelism"); + if (Args.hasArg(options::OPT_fopenmp_offload_mandatory)) CmdArgs.push_back("-fopenmp-offload-mandatory"); if (Args.hasArg(options::OPT_fopenmp_force_usm)) >From 4f8d1261e59d5ff11bde1402268cf277dcadbb0b Mon Sep 17 00:00:00 2001 From: Nicole Aschenbrenner <[email protected]> Date: Wed, 28 Jan 2026 01:47:32 -0600 Subject: [PATCH 3/3] [clang][OpenMP] Add -fopenmp-target-fast convenience flag Add meta-flag that implies multiple GPU optimization flags: - -fopenmp-target-ignore-env-vars - -fopenmp-assume-no-thread-state - -fopenmp-assume-no-nested-parallelism The flag is automatically enabled by -Ofast and can be explicitly disabled with -fno-openmp-target-fast. Individual implied flags can be selectively overridden while keeping others enabled. --- clang/include/clang/Options/Options.td | 21 +++----- clang/lib/Driver/ToolChains/Clang.cpp | 57 +++++++++++++++++---- clang/test/Driver/openmp-target-fast-flag.c | 48 +++++++++++++++++ 3 files changed, 103 insertions(+), 23 deletions(-) create mode 100644 clang/test/Driver/openmp-target-fast-flag.c diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index ec8339ca40eff..2080cfa5fc091 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -3983,13 +3983,6 @@ def fno_openmp_assume_teams_oversubscription : Flag<["-"], "fno-openmp-assume-te HelpText<"Do not assume teams oversubscription.">; def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">, HelpText<"Do not assume threads oversubscription.">; -def fno_openmp_assume_no_thread_state : Flag<["-"], "fno-openmp-assume-no-thread-state">, - HelpText<"Assert that a thread in a parallel region may modify an ICV">; -def fopenmp_assume_no_nested_parallelism : Flag<["-"], "fopenmp-assume-no-nested-parallelism">, - HelpText<"Assert no nested parallel regions in the GPU">, - MarshallingInfoFlag<LangOpts<"OpenMPNoNestedParallelism">>; -def fno_openmp_assume_no_nested_parallelism : Flag<["-"], "fno-openmp-assume-no-nested-parallelism">, - HelpText<"Assert that a nested parallel region may be used in the GPU">; def fopenmp_assume_no_thread_state : Flag<["-"], "fopenmp-assume-no-thread-state">, HelpText<"Assert no thread in a parallel region modifies an ICV">, MarshallingInfoFlag<LangOpts<"OpenMPNoThreadState">>; @@ -4000,13 +3993,6 @@ def fopenmp_assume_no_nested_parallelism : Flag<["-"], "fopenmp-assume-no-nested MarshallingInfoFlag<LangOpts<"OpenMPNoNestedParallelism">>; def fno_openmp_assume_no_nested_parallelism : Flag<["-"], "fno-openmp-assume-no-nested-parallelism">, HelpText<"Assert that a nested parallel region may be used in the GPU">; -def fno_openmp_assume_no_thread_state : Flag<["-"], "fno-openmp-assume-no-thread-state">, - HelpText<"Assert that a thread in a parallel region may modify an ICV">; -def fopenmp_assume_no_nested_parallelism : Flag<["-"], "fopenmp-assume-no-nested-parallelism">, - HelpText<"Assert no nested parallel regions in the GPU">, - MarshallingInfoFlag<LangOpts<"OpenMPNoNestedParallelism">>; -def fno_openmp_assume_no_nested_parallelism : Flag<["-"], "fno-openmp-assume-no-nested-parallelism">, - HelpText<"Assert that a nested parallel region may be used in the GPU">; } // let Group = f_Group } // let Visibility = [ClangOption, CC1Option, FC1Option] @@ -4038,6 +4024,13 @@ def fopenmp_target_ignore_env_vars : Flag<["-"], "fopenmp-target-ignore-env-vars def fno_openmp_target_ignore_env_vars : Flag<["-"], "fno-openmp-target-ignore-env-vars">, Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>; +def fopenmp_target_fast : Flag<["-"], "fopenmp-target-fast">, + Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>, + Visibility<[ClangOption, CC1Option]>, + HelpText<"Convenience flag to enable aggressive OpenMP GPU optimizations">; +def fno_openmp_target_fast : Flag<["-"], "fno-openmp-target-fast">, + Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>, + Visibility<[ClangOption, CC1Option]>; defm openmp_optimistic_collapse : BoolFOption<"openmp-optimistic-collapse", LangOpts<"OpenMPOptimisticCollapse">, DefaultFalse, PosFlag<SetTrue, [], [ClangOption, CC1Option]>, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index ff8c55a702c11..06f8e1d3c834d 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6675,6 +6675,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_fno_offload_via_llvm, false) && (JA.isDeviceOffloading(Action::OFK_None) || JA.isDeviceOffloading(Action::OFK_OpenMP))) { + + // Determine if target-fast optimizations should be enabled + bool TargetFastUsed = + Args.hasFlag(options::OPT_fopenmp_target_fast, + options::OPT_fno_openmp_target_fast, OFastEnabled); switch (D.getOpenMPRuntime(Args)) { case Driver::OMPRT_OMP: case Driver::OMPRT_IOMP5: @@ -6725,20 +6730,54 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_fno_openmp_assume_threads_oversubscription, /*Default=*/false)) CmdArgs.push_back("-fopenmp-assume-threads-oversubscription"); - if (Args.hasArg(options::OPT_fopenmp_target_ignore_env_vars)) + + // Handle -fopenmp-target-fast + if (Arg *A = Args.getLastArg(options::OPT_fopenmp_target_fast, + options::OPT_fno_openmp_target_fast)) { + if (A->getOption().matches(options::OPT_fopenmp_target_fast)) + CmdArgs.push_back("-fopenmp-target-fast"); + else + CmdArgs.push_back("-fno-openmp-target-fast"); + } else if (OFastEnabled) { + CmdArgs.push_back("-fopenmp-target-fast"); + } + + // Handle -fopenmp-target-ignore-env-vars (implied by target-fast) + if (Arg *A = + Args.getLastArg(options::OPT_fopenmp_target_ignore_env_vars, + options::OPT_fno_openmp_target_ignore_env_vars)) { + if (A->getOption().matches(options::OPT_fopenmp_target_ignore_env_vars)) + CmdArgs.push_back("-fopenmp-target-ignore-env-vars"); + else + CmdArgs.push_back("-fno-openmp-target-ignore-env-vars"); + } else if (TargetFastUsed) { CmdArgs.push_back("-fopenmp-target-ignore-env-vars"); - else if (Args.hasArg(options::OPT_fno_openmp_target_ignore_env_vars)) - CmdArgs.push_back("-fno-openmp-target-ignore-env-vars"); + } - if (Args.hasArg(options::OPT_fopenmp_assume_no_thread_state)) + // Handle -fopenmp-assume-no-thread-state (implied by target-fast) + if (Arg *A = + Args.getLastArg(options::OPT_fopenmp_assume_no_thread_state, + options::OPT_fno_openmp_assume_no_thread_state)) { + if (A->getOption().matches(options::OPT_fopenmp_assume_no_thread_state)) + CmdArgs.push_back("-fopenmp-assume-no-thread-state"); + else + CmdArgs.push_back("-fno-openmp-assume-no-thread-state"); + } else if (TargetFastUsed) { CmdArgs.push_back("-fopenmp-assume-no-thread-state"); - else if (Args.hasArg(options::OPT_fno_openmp_assume_no_thread_state)) - CmdArgs.push_back("-fno-openmp-assume-no-thread-state"); + } - if (Args.hasArg(options::OPT_fopenmp_assume_no_nested_parallelism)) + // Handle -fopenmp-assume-no-nested-parallelism (implied by target-fast) + if (Arg *A = Args.getLastArg( + options::OPT_fopenmp_assume_no_nested_parallelism, + options::OPT_fno_openmp_assume_no_nested_parallelism)) { + if (A->getOption().matches( + options::OPT_fopenmp_assume_no_nested_parallelism)) + CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism"); + else + CmdArgs.push_back("-fno-openmp-assume-no-nested-parallelism"); + } else if (TargetFastUsed) { CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism"); - else if (Args.hasArg(options::OPT_fno_openmp_assume_no_nested_parallelism)) - CmdArgs.push_back("-fno-openmp-assume-no-nested-parallelism"); + } if (Args.hasArg(options::OPT_fopenmp_offload_mandatory)) CmdArgs.push_back("-fopenmp-offload-mandatory"); diff --git a/clang/test/Driver/openmp-target-fast-flag.c b/clang/test/Driver/openmp-target-fast-flag.c new file mode 100644 index 0000000000000..009e7ea6250d7 --- /dev/null +++ b/clang/test/Driver/openmp-target-fast-flag.c @@ -0,0 +1,48 @@ +// REQUIRES: x86-registered-target, amdgpu-registered-target + +// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib %s -O0 2>&1 \ +// RUN: | FileCheck -check-prefixes=DefaultTFast,DefaultEnV,DefaultTState,DefaultNoNestParallel %s + +// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -O0 -fopenmp-target-fast %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=TFast,EnV,TState,NestParallel %s + +// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -O3 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=O3,DefaultTFast,DefaultEnV,DefaultTState,DefaultNoNestParallel %s + +// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -O3 -fno-openmp-target-fast %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=O3,NoTFast,DefaultEnV,DefaultTState,DefaultNoNestParallel %s + +// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -Ofast %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=OFast,TFast,EnV,TState,NestParallel %s + +// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -Ofast -fno-openmp-target-fast %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=OFast,NoTFast,DefaultEnV,DefaultTState,DefaultNoNestParallel %s + +// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -fopenmp-target-fast -fno-openmp-target-ignore-env-vars %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=TFast,NoEnV,TState,NestParallel %s + +// RUN: %clang -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -nogpulib -O0 -fno-openmp-target-fast -fopenmp-target-fast %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=TFast,EnV,TState,NestParallel %s + +// O3: -O3 +// OFast: -Ofast + +// TFast: "-fopenmp-target-fast" +// TFast-NOT: "-fno-openmp-target-fast" +// NoTFast: "-fno-openmp-target-fast" +// NoTFast-NOT: "-fopenmp-target-fast" +// DefaultTFast-NOT: {{"-f(no-)?openmp-target-fast"}} + +// EnV: "-fopenmp-target-ignore-env-vars" +// EnV-NOT: "-fno-openmp-target-ignore-env-vars" +// NoEnV: "-fno-openmp-target-ignore-env-vars" +// NoEnV-NOT: "-fopenmp-target-ignore-env-vars" +// DefaultEnV-NOT: {{"-f(no-)?openmp-target-ignore-env-vars"}} + +// TState: "-fopenmp-assume-no-thread-state" +// TState-NOT: "-fno-openmp-assume-no-thread-state" +// DefaultTState-NOT: {{"-f(no-)?openmp-assume-no-thread-state"}} + +// NestParallel: "-fopenmp-assume-no-nested-parallelism" +// NestParallel-NOT: "-fno-openmp-assume-no-nested-parallelism" +// DefaultNoNestParallel-NOT: {{"-f(-no-)?openmp-assume-no-nested-parallelism"}} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
