This revision was automatically updated to reflect the committed changes.
Closed by commit rG0870a4f59aef: [OpenMP] Add flag for disabling thread state 
in runtime (authored by jhuber6).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120106/new/

https://reviews.llvm.org/D120106

Files:
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/OpenMP/target_globals_codegen.cpp
  openmp/libomptarget/DeviceRTL/include/Configuration.h
  openmp/libomptarget/DeviceRTL/src/Configuration.cpp
  openmp/libomptarget/DeviceRTL/src/State.cpp

Index: openmp/libomptarget/DeviceRTL/src/State.cpp
===================================================================
--- openmp/libomptarget/DeviceRTL/src/State.cpp
+++ openmp/libomptarget/DeviceRTL/src/State.cpp
@@ -285,7 +285,8 @@
 #pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
 
 uint32_t &lookupForModify32Impl(uint32_t ICVStateTy::*Var, IdentTy *Ident) {
-  if (OMP_LIKELY(TeamState.ICVState.LevelVar == 0))
+  if (OMP_LIKELY(!config::mayUseThreadStates() ||
+                 TeamState.ICVState.LevelVar == 0))
     return TeamState.ICVState.*Var;
   uint32_t TId = mapping::getThreadIdInBlock();
   if (!ThreadStates[TId]) {
@@ -299,13 +300,13 @@
 
 uint32_t &lookup32Impl(uint32_t ICVStateTy::*Var) {
   uint32_t TId = mapping::getThreadIdInBlock();
-  if (OMP_UNLIKELY(ThreadStates[TId]))
+  if (OMP_UNLIKELY(config::mayUseThreadStates() && ThreadStates[TId]))
     return ThreadStates[TId]->ICVState.*Var;
   return TeamState.ICVState.*Var;
 }
 uint64_t &lookup64Impl(uint64_t ICVStateTy::*Var) {
   uint64_t TId = mapping::getThreadIdInBlock();
-  if (OMP_UNLIKELY(ThreadStates[TId]))
+  if (OMP_UNLIKELY(config::mayUseThreadStates() && ThreadStates[TId]))
     return ThreadStates[TId]->ICVState.*Var;
   return TeamState.ICVState.*Var;
 }
@@ -380,6 +381,9 @@
 }
 
 void state::enterDataEnvironment(IdentTy *Ident) {
+  ASSERT(config::mayUseThreadStates() &&
+         "Thread state modified while explicitly disabled!");
+
   unsigned TId = mapping::getThreadIdInBlock();
   ThreadStateTy *NewThreadState =
       static_cast<ThreadStateTy *>(__kmpc_alloc_shared(sizeof(ThreadStateTy)));
@@ -388,6 +392,9 @@
 }
 
 void state::exitDataEnvironment() {
+  ASSERT(config::mayUseThreadStates() &&
+         "Thread state modified while explicitly disabled!");
+
   unsigned TId = mapping::getThreadIdInBlock();
   resetStateForThread(TId);
 }
Index: openmp/libomptarget/DeviceRTL/src/Configuration.cpp
===================================================================
--- openmp/libomptarget/DeviceRTL/src/Configuration.cpp
+++ openmp/libomptarget/DeviceRTL/src/Configuration.cpp
@@ -20,7 +20,9 @@
 
 #pragma omp declare target
 
-extern uint32_t __omp_rtl_debug_kind; // defined by CGOpenMPRuntimeGPU
+// defined by CGOpenMPRuntimeGPU
+extern uint32_t __omp_rtl_debug_kind;
+extern uint32_t __omp_rtl_assume_no_thread_state;
 
 // TODO: We want to change the name as soon as the old runtime is gone.
 // This variable should be visibile to the plugin so we override the default
@@ -48,4 +50,6 @@
   return config::getDebugKind() & Kind;
 }
 
+bool config::mayUseThreadStates() { return !__omp_rtl_assume_no_thread_state; }
+
 #pragma omp end declare target
Index: openmp/libomptarget/DeviceRTL/include/Configuration.h
===================================================================
--- openmp/libomptarget/DeviceRTL/include/Configuration.h
+++ openmp/libomptarget/DeviceRTL/include/Configuration.h
@@ -38,8 +38,13 @@
 /// Return the amount of dynamic shared memory that was allocated at launch.
 uint64_t getDynamicMemorySize();
 
+/// Return if debugging is enabled for the given debug kind.
 bool isDebugMode(DebugKind Level);
 
+/// Indicates if this kernel may require thread-specific states, or if it was
+/// explicitly disabled by the user.
+bool mayUseThreadStates();
+
 } // namespace config
 } // namespace _OMP
 
Index: clang/test/OpenMP/target_globals_codegen.cpp
===================================================================
--- clang/test/OpenMP/target_globals_codegen.cpp
+++ clang/test/OpenMP/target_globals_codegen.cpp
@@ -6,6 +6,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-DEFAULT
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-assume-threads-oversubscription -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-THREADS
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-assume-teams-oversubscription -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-TEAMS
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-assume-no-thread-state -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-STATE
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-assume-teams-oversubscription -fopenmp-is-device -o - | FileCheck %s --check-prefix=CHECK-RUNTIME
 // expected-no-diagnostics
 
@@ -16,26 +17,37 @@
 // CHECK: @__omp_rtl_debug_kind = weak_odr hidden constant i32 1
 // CHECK: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
 // CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
+// CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
 //.
 // CHECK-EQ: @__omp_rtl_debug_kind = weak_odr hidden constant i32 111
 // CHECK-EQ: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
 // CHECK-EQ: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
+// CHECK-EQ: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
 //.
 // CHECK-DEFAULT: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
 // CHECK-DEFAULT: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
 // CHECK-DEFAULT: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
+// CHECK-DEFAULT: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
 //.
 // CHECK-THREADS: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
 // CHECK-THREADS: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
 // CHECK-THREADS: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 1
+// CHECK-THREADS: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
 //.
 // CHECK-TEAMS: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
 // CHECK-TEAMS: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 1
 // CHECK-TEAMS: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
+// CHECK-TEAMS: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
+//.
+// CHECK-STATE: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
+// CHECK-STATE: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0
+// CHECK-STATE: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
+// CHECK-STATE: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 1
 //.
 // CHECK-RUNTIME-NOT: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
 // CHECK-RUNTIME-NOT: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 1
 // CHECK-RUNTIME-NOT: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
+// CHECK-RUNTIME-NOT: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0
 //.
 void foo() {
 #pragma omp target
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -5995,6 +5995,8 @@
                        options::OPT_fno_openmp_assume_threads_oversubscription,
                        /*Default=*/false))
         CmdArgs.push_back("-fopenmp-assume-threads-oversubscription");
+      if (Args.hasArg(options::OPT_fopenmp_assume_no_thread_state))
+        CmdArgs.push_back("-fopenmp-assume-no-thread-state");
       break;
     default:
       // By default, if Clang doesn't know how to generate useful OpenMP code
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -1210,6 +1210,8 @@
                                 "__omp_rtl_assume_teams_oversubscription");
     OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPThreadSubscription,
                                 "__omp_rtl_assume_threads_oversubscription");
+    OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPNoThreadState,
+                                "__omp_rtl_assume_no_thread_state");
   }
 }
 
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -2473,6 +2473,10 @@
   Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
 def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">,
   Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
+def fopenmp_assume_no_thread_state : Flag<["-"], "fopenmp-assume-no-thread-state">, Group<f_Group>, 
+  Flags<[CC1Option, NoArgumentUnused, HelpHidden]>, 
+  HelpText<"Assert no thread in a parallel region modifies an ICV">,
+  MarshallingInfoFlag<LangOpts<"OpenMPNoThreadState">>;
 defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime",
   LangOpts<"OpenMPTargetNewRuntime">, DefaultTrue,
   PosFlag<SetTrue, [CC1Option], "Use the new bitcode library for OpenMP offloading">,
Index: clang/include/clang/Basic/LangOptions.def
===================================================================
--- clang/include/clang/Basic/LangOptions.def
+++ clang/include/clang/Basic/LangOptions.def
@@ -246,6 +246,7 @@
 LANGOPT(OpenMPOptimisticCollapse  , 1, 0, "Use at most 32 bits to represent the collapsed loop nest counter.")
 LANGOPT(OpenMPThreadSubscription  , 1, 0, "Assume work-shared loops do not have more iterations than participating threads.")
 LANGOPT(OpenMPTeamSubscription  , 1, 0, "Assume distributed loops do not have more iterations than participating teams.")
+LANGOPT(OpenMPNoThreadState  , 1, 0, "Assume that no thread in a parallel region will modify an ICV.")
 LANGOPT(RenderScript      , 1, 0, "RenderScript")
 
 LANGOPT(CUDAIsDevice      , 1, 0, "compiling for CUDA device")
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to