jhuber6 updated this revision to Diff 295342.
jhuber6 added a comment.

Moving the Clang error checks into D88594 <https://reviews.llvm.org/D88594>.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88430/new/

https://reviews.llvm.org/D88430

Files:
  clang/lib/CodeGen/CGOpenMPRuntime.h
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  clang/lib/CodeGen/CodeGenModule.h
  clang/test/OpenMP/nvptx_parallel_codegen.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
  llvm/test/Transforms/OpenMP/add_attributes.ll

Index: llvm/test/Transforms/OpenMP/add_attributes.ll
===================================================================
--- llvm/test/Transforms/OpenMP/add_attributes.ll
+++ llvm/test/Transforms/OpenMP/add_attributes.ll
@@ -888,313 +888,313 @@
 ; CHECK: declare dso_local i32 @omp_pause_resource_all(i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare dso_local i32 @omp_get_supported_active_levels() #0
+; CHECK-NEXT: declare dso_local i32 @omp_get_supported_active_levels()
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_barrier(%struct.ident_t*, i32) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare void @__kmpc_barrier(%struct.ident_t*, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) #0
+; CHECK-NEXT: declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_flush(%struct.ident_t*) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare void @__kmpc_flush(%struct.ident_t*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #0
+; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #0
+; CHECK-NEXT: declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32) #0
+; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) #0
+; CHECK-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32) #0
+; CHECK-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32) #0
+; CHECK-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32) #0
+; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_master(%struct.ident_t*, i32) #0
+; CHECK-NEXT: declare i32 @__kmpc_master(%struct.ident_t*, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_master(%struct.ident_t*, i32) #0
+; CHECK-NEXT: declare void @__kmpc_end_master(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_begin(%struct.ident_t*, i32) #0
+; CHECK-NEXT: declare void @__kmpc_begin(%struct.ident_t*, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end(%struct.ident_t*) #0
+; CHECK-NEXT: declare void @__kmpc_end(%struct.ident_t*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_ordered(%struct.ident_t*, i32) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare void @__kmpc_ordered(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t*, i32) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t*, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) #0
+; CHECK-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) #0
+; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) #0
+; CHECK-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) #0
+; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t*, i32) #0
+; CHECK-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t*, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) #0
+; CHECK-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) #0
+; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) #0
+; CHECK-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) #0
+; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) #0
+; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) #0
+; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) #0
+; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) #0
+; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_single(%struct.ident_t*, i32) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare i32 @__kmpc_single(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_single(%struct.ident_t*, i32) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare void @__kmpc_end_single(%struct.ident_t*, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*) #0
+; CHECK-NEXT: declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*) #0
+; CHECK-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t*, i32) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t*, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) #0
+; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) #0
+; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) #0
+; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) #0
+; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32) #0
+; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32) #0
+; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64) #0
+; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64) #0
+; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) #0
+; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) #0
+; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) #0
+; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) #0
+; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32) #0
+; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32) #0
+; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32) #0
+; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32) #0
+; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*) #0
+; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*) #0
+; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*) #0
+; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32) #0
+; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32) #0
+; CHECK-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #0
+; CHECK-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*) #0
+; CHECK-NEXT: declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64) #0
+; CHECK-NEXT: declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*) #0
+; CHECK-NEXT: declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*) #0
+; CHECK-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32) #0
+; CHECK-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***) #0
+; CHECK-NEXT: declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*) #0
+; CHECK-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t*, i32) #0
+; CHECK: ; Function Attrs: convergent nounwind
+; CHECK-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t*, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_alloc(i32, i64, i8*) #0
+; CHECK-NEXT: declare i8* @__kmpc_alloc(i32, i64, i8*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_free(i32, i8*, i8*) #0
+; CHECK-NEXT: declare void @__kmpc_free(i32, i8*, i8*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*) #0
+; CHECK-NEXT: declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) #0
+; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64) #0
+; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) #0
+; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) #0
+; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) #0
+; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) #0
+; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_register_requires(i64) #0
+; CHECK-NEXT: declare void @__tgt_register_requires(i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
+; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
+; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
+; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
+; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
+; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
+; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*) #0
+; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64) #0
+; CHECK-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*) #0
+; CHECK-NEXT: declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*) #0
+; CHECK-NEXT: declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_task_reduction_init(i32, i32, i8*) #0
+; CHECK-NEXT: declare i8* @__kmpc_task_reduction_init(i32, i32, i8*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*) #0
+; CHECK-NEXT: declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*) #0
+; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*)
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_num_threads(i32)
@@ -1212,52 +1212,52 @@
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_schedule(i32, i32)
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
-; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_threads() #1
+; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_threads()
 
 ; OPTIMISTIC-NOT: Function Attrs
 ; OPTIMISTIC: declare dso_local void @use_int(i32)
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
-; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_dynamic() #1
+; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_dynamic()
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
-; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_nested() #1
+; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_nested()
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
-; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_threads() #1
+; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_threads()
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
-; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_num() #1
+; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_num()
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
-; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_procs() #1
+; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_procs()
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
-; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_parallel() #1
+; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_parallel()
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
-; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_final() #1
+; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_final()
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
-; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_active_level() #1
+; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_active_level()
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
-; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_level() #1
+; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_level()
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
-; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_ancestor_thread_num(i32) #1
+; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_ancestor_thread_num(i32)
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
-; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_team_size(i32) #1
+; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_team_size(i32)
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
-; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_limit() #1
+; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_limit()
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
-; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_active_levels() #1
+; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_active_levels()
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly) #2
+; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly)
 
 ; OPTIMISTIC-NOT: Function Attrs
 ; OPTIMISTIC: declare dso_local i32 @omp_get_max_task_priority()
@@ -1326,7 +1326,7 @@
 ; OPTIMISTIC: declare dso_local i32 @omp_get_team_num()
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
-; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_cancellation() #1
+; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_cancellation()
 
 ; OPTIMISTIC-NOT: Function Attrs
 ; OPTIMISTIC: declare dso_local i32 @omp_get_initial_device()
@@ -1356,25 +1356,25 @@
 ; OPTIMISTIC: declare dso_local i32 @omp_get_device_num()
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
-; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_proc_bind() #1
+; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_proc_bind()
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
-; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_places() #1
+; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_places()
 
 ; OPTIMISTIC-NOT: Function Attrs
 ; OPTIMISTIC: declare dso_local i32 @omp_get_place_num_procs(i32)
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind
-; OPTIMISTIC-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, i32* nocapture writeonly) #2
+; OPTIMISTIC-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, i32* nocapture writeonly)
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
-; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_place_num() #1
+; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_place_num()
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
-; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_partition_num_places() #1
+; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_partition_num_places()
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
-; OPTIMISTIC-NEXT: declare dso_local void @omp_get_partition_place_nums(i32*) #1
+; OPTIMISTIC-NEXT: declare dso_local void @omp_get_partition_place_nums(i32*)
 
 ; OPTIMISTIC-NOT: Function Attrs
 ; OPTIMISTIC: declare dso_local i32 @omp_control_tool(i32, i32, i8*)
@@ -1419,7 +1419,7 @@
 ; OPTIMISTIC: declare dso_local i32 @omp_pause_resource_all(i32)
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
-; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_supported_active_levels() #1
+; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_supported_active_levels()
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t* nocapture nofree readonly)
@@ -1427,7 +1427,7 @@
 ; OPTIMISTIC: ; Function Attrs: nounwind
 ; OPTIMISTIC-NEXT: declare void @__kmpc_fork_call(%struct.ident_t* nocapture nofree readonly, i32, void (i32*, i32*, ...)* nocapture nofree readonly, ...)
 
-; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t* nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
@@ -1451,13 +1451,13 @@
 ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
 ; OPTIMISTIC-NEXT: declare void @__kmpc_end_master(%struct.ident_t* nocapture nofree readonly, i32)
 
-; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare void @__kmpc_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
 
-; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*, i32)
 
-; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare void @__kmpc_end_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
@@ -1466,22 +1466,22 @@
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
 ; OPTIMISTIC-NEXT: declare void @__kmpc_end(%struct.ident_t* nocapture nofree readonly)
 
-; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*)
 
-; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*)
 
-; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
 
-; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
 
-; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare void @__kmpc_ordered(%struct.ident_t* nocapture nofree readonly, i32)
 
-; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t* nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
@@ -1523,10 +1523,10 @@
 ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
 ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
 
-; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare i32 @__kmpc_single(%struct.ident_t* nocapture nofree readonly, i32)
 
-; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare void @__kmpc_end_single(%struct.ident_t* nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
@@ -1535,10 +1535,10 @@
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
 ; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t* nocapture nofree readonly, i32, i8*)
 
-; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t* nocapture nofree readonly, i32)
 
-; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t* nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
@@ -1598,7 +1598,7 @@
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
 ; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t* nocapture nofree readonly, i32, i8*, i32, i8* nocapture nofree readonly, i32, i8* nocapture nofree readonly)
 
-; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t* nocapture nofree readonly, i32, i32, i8* nocapture nofree readonly, i32, i8*)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
@@ -1622,7 +1622,7 @@
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
 ; OPTIMISTIC-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*)
 
-; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* nocapture nofree readonly, i32, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
@@ -1634,16 +1634,16 @@
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
 ; OPTIMISTIC-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t* nocapture nofree readonly, i8*, i8* (i8*)* nocapture nofree readonly, i8* (i8*, i8*)* nocapture nofree readonly, void (i8*)* nocapture nofree readonly)
 
-; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t* nocapture nofree readonly, i32, i32, i8*)
 
-; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly)
 
-; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly)
 
-; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t* nocapture nofree readonly, i32)
 
 ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
Index: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
===================================================================
--- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -220,6 +220,9 @@
 __OMP_FUNCTION_TYPE(KmpcCopyCtor, false, VoidPtr, VoidPtr, VoidPtr)
 __OMP_FUNCTION_TYPE(TaskRoutineEntry, false, Int32, Int32,
                     /* kmp_task_t */ VoidPtr)
+__OMP_FUNCTION_TYPE(ShuffleReduce, false, Void, VoidPtr, Int16, Int16, Int16)
+__OMP_FUNCTION_TYPE(InterWarpCopy, false, Void, VoidPtr, Int32)
+__OMP_FUNCTION_TYPE(GlobalList, false, Void, VoidPtr, Int32, VoidPtr)
 
 #undef __OMP_FUNCTION_TYPE
 #undef OMP_FUNCTION_TYPE
@@ -295,8 +298,6 @@
 __OMP_RTL(__kmpc_push_num_threads, false, Void, IdentPtr, Int32,
           /* Int */ Int32)
 __OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */ Int32)
-__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32)
-__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
 __OMP_RTL(__kmpc_omp_reg_task_with_affinity, false, Int32, IdentPtr, Int32,
           /* kmp_task_t */ VoidPtr, Int32,
           /* kmp_task_affinity_info_t */ VoidPtr)
@@ -502,17 +503,42 @@
 __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr,
           /* Int */ Int32, /* kmp_task_t */ VoidPtr)
 
+/// OpenMP Device runtime functions
+__OMP_RTL(__kmpc_kernel_init, false, Void, Int32, Int16)
+__OMP_RTL(__kmpc_kernel_deinit, false, Void, Int16)
+__OMP_RTL(__kmpc_spmd_kernel_init, false, Void, Int32, Int16, Int16)
+__OMP_RTL(__kmpc_spmd_kernel_deinit_v2, false, Void, Int16)
+__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
+__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr)
+__OMP_RTL(__kmpc_kernel_end_parallel, false, Void, )
+__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32)
+__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
+__OMP_RTL(__kmpc_shuffle_int32, false, Int32, Int32, Int16, Int16)
+__OMP_RTL(__kmpc_nvptx_parallel_reduce_nowait_v2, false, Int32, IdentPtr, Int32,
+          Int32, SizeTy, VoidPtr, ShuffleReducePtr, InterWarpCopyPtr)
+__OMP_RTL(__kmpc_nvptx_end_reduce_nowait, false, Void, Int32)
+__OMP_RTL(__kmpc_nvptx_teams_reduce_nowait_v2, false, Int32, IdentPtr, Int32,
+          VoidPtr, Int32, VoidPtr, ShuffleReducePtr, InterWarpCopyPtr,
+          GlobalListPtr, GlobalListPtr, GlobalListPtr, GlobalListPtr) 
+
+__OMP_RTL(__kmpc_shuffle_int64, false, Int64, Int64, Int16, Int16)
 __OMP_RTL(__kmpc_data_sharing_init_stack, false, Void, )
-__OMP_RTL(__kmpc_data_sharing_init_stack_spmd, false, Void, ) 
-__OMP_RTL(__kmpc_data_sharing_coalesced_push_stack, false, VoidPtr, SizeTy,
-          Int16)
+__OMP_RTL(__kmpc_data_sharing_init_stack_spmd, false, Void, )
+
+__OMP_RTL(__kmpc_data_sharing_coalesced_push_stack, false, VoidPtr, SizeTy, Int16)
 __OMP_RTL(__kmpc_data_sharing_push_stack, false, VoidPtr, SizeTy, Int16)
 __OMP_RTL(__kmpc_data_sharing_pop_stack, false, Void, VoidPtr)
-
-/// Note that device runtime functions (in the following) do not necessarily
-/// need attributes as we expect to see the definitions.
-__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr)
-__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
+__OMP_RTL(__kmpc_begin_sharing_variables, false, Void, VoidPtrPtrPtr, SizeTy)
+__OMP_RTL(__kmpc_end_sharing_variables, false, Void, )
+__OMP_RTL(__kmpc_get_shared_variables, false, Void, VoidPtrPtrPtr)
+__OMP_RTL(__kmpc_parallel_level, false, Int16, IdentPtr, Int32)
+__OMP_RTL(__kmpc_is_spmd_exec_mode, false, Int8, )
+__OMP_RTL(__kmpc_get_team_static_memory, false, Void, Int16, VoidPtr, SizeTy,
+          Int16, VoidPtrPtr)
+__OMP_RTL(__kmpc_restore_team_static_memory, false, Void, Int16, Int16)
+__OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32)
+__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int32, )
+__OMP_RTL(__kmpc_syncwarp, false, Void, Int32)
 
 __OMP_RTL(__last, false, Void, )
 
@@ -561,8 +587,8 @@
 
 __OMP_ATTRS_SET(BarrierAttrs,
                 OptimisticAttributes
-                    ? AttributeSet(EnumAttr(NoUnwind))
-                    : AttributeSet(EnumAttr(NoUnwind)))
+                    ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent))
+                    : AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent)))
 
 __OMP_ATTRS_SET(InaccessibleArgOnlyAttrs,
                 OptimisticAttributes
@@ -634,6 +660,11 @@
 
 __OMP_RTL_ATTRS(__kmpc_barrier, BarrierAttrs, AttributeSet(),
                 ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_barrier_simple_spmd, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_warp_active_thread_mask, BarrierAttrs, AttributeSet(),
+                ParamAttrs())
+__OMP_RTL_ATTRS(__kmpc_syncwarp, BarrierAttrs, AttributeSet(), ParamAttrs())
 __OMP_RTL_ATTRS(__kmpc_cancel, InaccessibleArgOnlyAttrs, AttributeSet(),
                 ParamAttrs(ReadOnlyPtrAttrs))
 __OMP_RTL_ATTRS(__kmpc_cancel_barrier, BarrierAttrs, AttributeSet(),
Index: clang/test/OpenMP/nvptx_parallel_codegen.cpp
===================================================================
--- clang/test/OpenMP/nvptx_parallel_codegen.cpp
+++ clang/test/OpenMP/nvptx_parallel_codegen.cpp
@@ -91,7 +91,7 @@
 // CHECK: br label {{%?}}[[AWAIT_WORK:.+]]
 //
 // CHECK: [[AWAIT_WORK]]
-// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) #[[#CONVERGENT:]]
+// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
 // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]])
 // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8
 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1
@@ -321,10 +321,10 @@
 // CHECK: define internal void [[PARALLEL_FN4]](
 // CHECK: [[A:%.+]] = alloca i[[SZ:32|64]],
 // CHECK: store i[[SZ]] 45, i[[SZ]]* %a,
-// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) #[[#CONVERGENT:]]
+// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}})
 // CHECK: ret void
 
-// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#CONVERGENT]]
+// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#CONVERGENT:]]
 
 // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l58}}_worker()
 // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l58}}(
@@ -377,6 +377,6 @@
 // CHECK: declare i32 @__kmpc_warp_active_thread_mask() #[[#CONVERGENT:]]
 // CHECK: declare void @__kmpc_syncwarp(i32) #[[#CONVERGENT:]]
 
-// CHECK: attributes #[[#CONVERGENT]] = {{.*}} convergent {{.*}}
+// CHECK: attributes #[[#CONVERGENT:]] = {{.*}} convergent {{.*}}
 
 #endif
Index: clang/lib/CodeGen/CodeGenModule.h
===================================================================
--- clang/lib/CodeGen/CodeGenModule.h
+++ clang/lib/CodeGen/CodeGenModule.h
@@ -1068,16 +1068,6 @@
                         llvm::AttributeList ExtraAttrs = llvm::AttributeList(),
                         bool Local = false, bool AssumeConvergent = false);
 
-  /// Create or return a runtime function declaration with the specified type
-  /// and name. This will automatically add the convergent attribute to the
-  /// function declaration.
-  llvm::FunctionCallee CreateConvergentRuntimeFunction(
-      llvm::FunctionType *Ty, StringRef Name,
-      llvm::AttributeList ExtraAttrs = llvm::AttributeList(),
-      bool Local = false) {
-    return CreateRuntimeFunction(Ty, Name, ExtraAttrs, Local, true);
-  }
-
   /// Create a new runtime global variable with the specified type and name.
   llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty,
                                         StringRef Name);
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -28,96 +28,6 @@
 using namespace llvm::omp;
 
 namespace {
-enum OpenMPRTLFunctionNVPTX {
-  /// Call to void __kmpc_kernel_init(kmp_int32 thread_limit,
-  /// int16_t RequiresOMPRuntime);
-  OMPRTL_NVPTX__kmpc_kernel_init,
-  /// Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
-  OMPRTL_NVPTX__kmpc_kernel_deinit,
-  /// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
-  /// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
-  OMPRTL_NVPTX__kmpc_spmd_kernel_init,
-  /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
-  OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2,
-  /// Call to void __kmpc_kernel_prepare_parallel(void
-  /// *outlined_function);
-  OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
-  /// Call to bool __kmpc_kernel_parallel(void **outlined_function);
-  OMPRTL_NVPTX__kmpc_kernel_parallel,
-  /// Call to void __kmpc_kernel_end_parallel();
-  OMPRTL_NVPTX__kmpc_kernel_end_parallel,
-  /// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
-  /// global_tid);
-  OMPRTL_NVPTX__kmpc_serialized_parallel,
-  /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
-  /// global_tid);
-  OMPRTL_NVPTX__kmpc_end_serialized_parallel,
-  /// Call to int32_t __kmpc_shuffle_int32(int32_t element,
-  /// int16_t lane_offset, int16_t warp_size);
-  OMPRTL_NVPTX__kmpc_shuffle_int32,
-  /// Call to int64_t __kmpc_shuffle_int64(int64_t element,
-  /// int16_t lane_offset, int16_t warp_size);
-  OMPRTL_NVPTX__kmpc_shuffle_int64,
-  /// Call to __kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, kmp_int32
-  /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
-  /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
-  /// lane_offset, int16_t shortCircuit),
-  /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
-  OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2,
-  /// Call to __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32
-  /// global_tid, void *global_buffer, int32_t num_of_records, void*
-  /// reduce_data,
-  /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
-  /// lane_offset, int16_t shortCircuit),
-  /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void
-  /// (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data),
-  /// void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx,
-  /// void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer,
-  /// int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void
-  /// *buffer, int idx, void *reduce_data));
-  OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2,
-  /// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid);
-  OMPRTL_NVPTX__kmpc_end_reduce_nowait,
-  /// Call to void __kmpc_data_sharing_init_stack();
-  OMPRTL_NVPTX__kmpc_data_sharing_init_stack,
-  /// Call to void __kmpc_data_sharing_init_stack_spmd();
-  OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd,
-  /// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size,
-  /// int16_t UseSharedMemory);
-  OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack,
-  /// Call to void* __kmpc_data_sharing_push_stack(size_t size, int16_t
-  /// UseSharedMemory);
-  OMPRTL_NVPTX__kmpc_data_sharing_push_stack,
-  /// Call to void __kmpc_data_sharing_pop_stack(void *a);
-  OMPRTL_NVPTX__kmpc_data_sharing_pop_stack,
-  /// Call to void __kmpc_begin_sharing_variables(void ***args,
-  /// size_t n_args);
-  OMPRTL_NVPTX__kmpc_begin_sharing_variables,
-  /// Call to void __kmpc_end_sharing_variables();
-  OMPRTL_NVPTX__kmpc_end_sharing_variables,
-  /// Call to void __kmpc_get_shared_variables(void ***GlobalArgs)
-  OMPRTL_NVPTX__kmpc_get_shared_variables,
-  /// Call to uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32
-  /// global_tid);
-  OMPRTL_NVPTX__kmpc_parallel_level,
-  /// Call to int8_t __kmpc_is_spmd_exec_mode();
-  OMPRTL_NVPTX__kmpc_is_spmd_exec_mode,
-  /// Call to void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
-  /// const void *buf, size_t size, int16_t is_shared, const void **res);
-  OMPRTL_NVPTX__kmpc_get_team_static_memory,
-  /// Call to void __kmpc_restore_team_static_memory(int16_t
-  /// isSPMDExecutionMode, int16_t is_shared);
-  OMPRTL_NVPTX__kmpc_restore_team_static_memory,
-  /// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
-  OMPRTL__kmpc_barrier,
-  /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
-  /// global_tid);
-  OMPRTL__kmpc_barrier_simple_spmd,
-  /// Call to int32_t __kmpc_warp_active_thread_mask(void);
-  OMPRTL_NVPTX__kmpc_warp_active_thread_mask,
-  /// Call to void __kmpc_syncwarp(int32_t Mask);
-  OMPRTL_NVPTX__kmpc_syncwarp,
-};
 
 /// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
 class NVPTXActionTy final : public PrePostActionTy {
@@ -1243,13 +1153,13 @@
   // TODO: Optimize runtime initialization and pass in correct value.
   llvm::Value *Args[] = {getThreadLimit(CGF),
                          Bld.getInt16(/*RequiresOMPRuntime=*/1)};
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args);
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                          CGM.getModule(), OMPRTL___kmpc_kernel_init),
+                      Args);
 
   // For data sharing, we need to initialize the stack.
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(
-          OMPRTL_NVPTX__kmpc_data_sharing_init_stack));
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+      CGM.getModule(), OMPRTL___kmpc_data_sharing_init_stack));
 
   emitGenericVarsProlog(CGF, WST.Loc);
 }
@@ -1272,8 +1182,9 @@
   // Signal termination condition.
   // TODO: Optimize runtime initialization and pass in correct value.
   llvm::Value *Args[] = {CGF.Builder.getInt16(/*IsOMPRuntimeInitialized=*/1)};
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args);
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                          CGM.getModule(), OMPRTL___kmpc_kernel_deinit),
+                      Args);
   // Barrier to terminate worker threads.
   syncCTAThreads(CGF);
   // Master thread jumps to exit point.
@@ -1347,13 +1258,14 @@
                          /*RequiresOMPRuntime=*/
                          Bld.getInt16(RequiresFullRuntime ? 1 : 0),
                          /*RequiresDataSharing=*/Bld.getInt16(0)};
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                          CGM.getModule(), OMPRTL___kmpc_spmd_kernel_init),
+                      Args);
 
   if (RequiresFullRuntime) {
     // For data sharing, we need to initialize the stack.
-    CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
-        OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
+    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+        CGM.getModule(), OMPRTL___kmpc_data_sharing_init_stack_spmd));
   }
 
   CGF.EmitBranch(ExecuteBB);
@@ -1379,9 +1291,9 @@
   // DeInitialize the OMP state in the runtime; called by all active threads.
   llvm::Value *Args[] = {/*RequiresOMPRuntime=*/
                          CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)};
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(
-          OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args);
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                          CGM.getModule(), OMPRTL___kmpc_spmd_kernel_deinit_v2),
+                      Args);
   CGF.EmitBranch(EST.ExitBB);
 
   CGF.EmitBlock(EST.ExitBB);
@@ -1415,7 +1327,7 @@
 }
 
 void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF,
-                                          WorkerFunctionState &WST) {
+                                        WorkerFunctionState &WST) {
   //
   // The workers enter this loop and wait for parallel work from the master.
   // When the master encounters a parallel region it sets up the work + variable
@@ -1450,8 +1362,10 @@
 
   // TODO: Optimize runtime initialization and pass in correct value.
   llvm::Value *Args[] = {WorkFn.getPointer()};
-  llvm::Value *Ret = CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);
+  llvm::Value *Ret =
+      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                              CGM.getModule(), OMPRTL___kmpc_kernel_parallel),
+                          Args);
   Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus);
 
   // On termination condition (workid == 0), exit loop.
@@ -1516,9 +1430,9 @@
 
   // Signal end of parallel region.
   CGF.EmitBlock(TerminateBB);
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel),
-      llvm::None);
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                          CGM.getModule(), OMPRTL___kmpc_kernel_end_parallel),
+                      llvm::None);
   CGF.EmitBranch(BarrierBB);
 
   // All active and inactive workers wait at a barrier after parallel region.
@@ -1533,328 +1447,6 @@
   clearLocThreadIdInsertPt(CGF);
 }
 
-/// Returns specified OpenMP runtime function for the current OpenMP
-/// implementation.  Specialized for the NVPTX device.
-/// \param Function OpenMP runtime function.
-/// \return Specified function.
-llvm::FunctionCallee
-CGOpenMPRuntimeGPU::createNVPTXRuntimeFunction(unsigned Function) {
-  llvm::FunctionCallee RTLFn = nullptr;
-  switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) {
-  case OMPRTL_NVPTX__kmpc_kernel_init: {
-    // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t
-    // RequiresOMPRuntime);
-    llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_kernel_deinit: {
-    // Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
-    llvm::Type *TypeParams[] = {CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_spmd_kernel_init: {
-    // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
-    // int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
-    llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: {
-    // Build void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
-    llvm::Type *TypeParams[] = {CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
-    /// Build void __kmpc_kernel_prepare_parallel(
-    /// void *outlined_function);
-    llvm::Type *TypeParams[] = {CGM.Int8PtrTy};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_kernel_parallel: {
-    /// Build bool __kmpc_kernel_parallel(void **outlined_function);
-    llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy};
-    llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy);
-    auto *FnTy =
-        llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_kernel_end_parallel: {
-    /// Build void __kmpc_kernel_end_parallel();
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_serialized_parallel: {
-    // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
-    // global_tid);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_end_serialized_parallel: {
-    // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
-    // global_tid);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_shuffle_int32: {
-    // Build int32_t __kmpc_shuffle_int32(int32_t element,
-    // int16_t lane_offset, int16_t warp_size);
-    llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_shuffle_int64: {
-    // Build int64_t __kmpc_shuffle_int64(int64_t element,
-    // int16_t lane_offset, int16_t warp_size);
-    llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2: {
-    // Build int32_t kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc,
-    // kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void*
-    // reduce_data, void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t
-    // lane_id, int16_t lane_offset, int16_t Algorithm Version), void
-    // (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num));
-    llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
-                                             CGM.Int16Ty, CGM.Int16Ty};
-    auto *ShuffleReduceFnTy =
-        llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
-                                /*isVarArg=*/false);
-    llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
-    auto *InterWarpCopyFnTy =
-        llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
-                                /*isVarArg=*/false);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
-                                CGM.Int32Ty,
-                                CGM.Int32Ty,
-                                CGM.SizeTy,
-                                CGM.VoidPtrTy,
-                                ShuffleReduceFnTy->getPointerTo(),
-                                InterWarpCopyFnTy->getPointerTo()};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(
-        FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait_v2");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
-    // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid);
-    llvm::Type *TypeParams[] = {CGM.Int32Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(
-        FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2: {
-    // Build int32_t __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32
-    // global_tid, void *global_buffer, int32_t num_of_records, void*
-    // reduce_data,
-    // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
-    // lane_offset, int16_t shortCircuit),
-    // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void
-    // (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data),
-    // void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx,
-    // void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer,
-    // int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void
-    // *buffer, int idx, void *reduce_data));
-    llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
-                                             CGM.Int16Ty, CGM.Int16Ty};
-    auto *ShuffleReduceFnTy =
-        llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
-                                /*isVarArg=*/false);
-    llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
-    auto *InterWarpCopyFnTy =
-        llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
-                                /*isVarArg=*/false);
-    llvm::Type *GlobalListTypeParams[] = {CGM.VoidPtrTy, CGM.IntTy,
-                                          CGM.VoidPtrTy};
-    auto *GlobalListFnTy =
-        llvm::FunctionType::get(CGM.VoidTy, GlobalListTypeParams,
-                                /*isVarArg=*/false);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
-                                CGM.Int32Ty,
-                                CGM.VoidPtrTy,
-                                CGM.Int32Ty,
-                                CGM.VoidPtrTy,
-                                ShuffleReduceFnTy->getPointerTo(),
-                                InterWarpCopyFnTy->getPointerTo(),
-                                GlobalListFnTy->getPointerTo(),
-                                GlobalListFnTy->getPointerTo(),
-                                GlobalListFnTy->getPointerTo(),
-                                GlobalListFnTy->getPointerTo()};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(
-        FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_v2");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: {
-    /// Build void __kmpc_data_sharing_init_stack();
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: {
-    /// Build void __kmpc_data_sharing_init_stack_spmd();
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
-    RTLFn =
-        CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: {
-    // Build void *__kmpc_data_sharing_coalesced_push_stack(size_t size,
-    // int16_t UseSharedMemory);
-    llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(
-        FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: {
-    // Build void *__kmpc_data_sharing_push_stack(size_t size, int16_t
-    // UseSharedMemory);
-    llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(
-        FnTy, /*Name=*/"__kmpc_data_sharing_push_stack");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: {
-    // Build void __kmpc_data_sharing_pop_stack(void *a);
-    llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy,
-                                      /*Name=*/"__kmpc_data_sharing_pop_stack");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_begin_sharing_variables: {
-    /// Build void __kmpc_begin_sharing_variables(void ***args,
-    /// size_t n_args);
-    llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo(), CGM.SizeTy};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_begin_sharing_variables");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_end_sharing_variables: {
-    /// Build void __kmpc_end_sharing_variables();
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_sharing_variables");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_get_shared_variables: {
-    /// Build void __kmpc_get_shared_variables(void ***GlobalArgs);
-    llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo()};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_shared_variables");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_parallel_level: {
-    // Build uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 global_tid);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.Int16Ty, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_parallel_level");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: {
-    // Build int8_t __kmpc_is_spmd_exec_mode();
-    auto *FnTy = llvm::FunctionType::get(CGM.Int8Ty, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_get_team_static_memory: {
-    // Build void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
-    // const void *buf, size_t size, int16_t is_shared, const void **res);
-    llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.VoidPtrTy, CGM.SizeTy,
-                                CGM.Int16Ty, CGM.VoidPtrPtrTy};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_team_static_memory");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_restore_team_static_memory: {
-    // Build void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode,
-    // int16_t is_shared);
-    llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
-    RTLFn =
-        CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory");
-    break;
-  }
-  case OMPRTL__kmpc_barrier: {
-    // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn =
-        CGM.CreateConvergentRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
-    break;
-  }
-  case OMPRTL__kmpc_barrier_simple_spmd: {
-    // Build void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
-    // global_tid);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateConvergentRuntimeFunction(
-        FnTy, /*Name*/ "__kmpc_barrier_simple_spmd");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: {
-    // Build int32_t __kmpc_warp_active_thread_mask(void);
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false);
-    RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_syncwarp: {
-    // Build void __kmpc_syncwarp(kmp_int32 Mask);
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false);
-    RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_syncwarp");
-    break;
-  }
-  }
-  return RTLFn;
-}
-
 void CGOpenMPRuntimeGPU::createOffloadEntry(llvm::Constant *ID,
                                               llvm::Constant *Addr,
                                               uint64_t Size, int32_t,
@@ -2157,12 +1749,14 @@
         llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
         llvm::Value *ThreadID = getThreadID(CGF, Loc);
         llvm::Value *PL = CGF.EmitRuntimeCall(
-            createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
+            OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
+                                                  OMPRTL___kmpc_parallel_level),
             {RTLoc, ThreadID});
         IsTTD = Bld.CreateIsNull(PL);
       }
-      llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
-          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
+      llvm::Value *IsSPMD = Bld.CreateIsNotNull(
+          CGF.EmitNounwindRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+              CGM.getModule(), OMPRTL___kmpc_is_spmd_exec_mode)));
       Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);
       // There is no need to emit line number for unconditional branch.
       (void)ApplyDebugLocation::CreateEmpty(CGF);
@@ -2196,8 +1790,8 @@
       llvm::Value *GlobalRecordSizeArg[] = {
           Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
       llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
-          createNVPTXRuntimeFunction(
-              OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
+          OMPBuilder.getOrCreateRuntimeFunction(
+              CGM.getModule(), OMPRTL___kmpc_data_sharing_coalesced_push_stack),
           GlobalRecordSizeArg);
       GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
           GlobalRecValue, GlobalRecPtrTy);
@@ -2259,9 +1853,10 @@
                 CGM.Int16Ty,
                 getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0),
             StaticGlobalized, Ld, IsInSharedMemory, ResAddr};
-        CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
-                                OMPRTL_NVPTX__kmpc_get_team_static_memory),
-                            GlobalRecordSizeArg);
+        CGF.EmitRuntimeCall(
+            OMPBuilder.getOrCreateRuntimeFunction(
+                CGM.getModule(), OMPRTL___kmpc_get_team_static_memory),
+            GlobalRecordSizeArg);
         GlobalizedRecords.back().Buffer = StaticGlobalized;
         GlobalizedRecords.back().RecSize = RecSize;
         GlobalizedRecords.back().UseSharedMemory = UseSharedMemory;
@@ -2288,10 +1883,10 @@
           llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
           CGF.Builder.getInt16(UseSharedMemory ? 1 : 0)};
       llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
-          createNVPTXRuntimeFunction(
-              IsInTTDRegion
-                  ? OMPRTL_NVPTX__kmpc_data_sharing_push_stack
-                  : OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
+          OMPBuilder.getOrCreateRuntimeFunction(
+              CGM.getModule(),
+              IsInTTDRegion ? OMPRTL___kmpc_data_sharing_push_stack
+                            : OMPRTL___kmpc_data_sharing_coalesced_push_stack),
           GlobalRecordSizeArg);
       GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
           GlobalRecValue, GlobalRecPtrTy);
@@ -2390,8 +1985,8 @@
     llvm::Value *GlobalRecordSizeArg[] = {
         Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
     llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
-        createNVPTXRuntimeFunction(
-            OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
+        OMPBuilder.getOrCreateRuntimeFunction(
+            CGM.getModule(), OMPRTL___kmpc_data_sharing_coalesced_push_stack),
         GlobalRecordSizeArg);
     llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
         GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo());
@@ -2419,7 +2014,8 @@
     for (llvm::Value *Addr :
          llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {
       CGF.EmitRuntimeCall(
-          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
+          OMPBuilder.getOrCreateRuntimeFunction(
+              CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack),
           Addr);
     }
     if (I->getSecond().GlobalRecordAddr) {
@@ -2434,8 +2030,8 @@
         (void)ApplyDebugLocation::CreateEmpty(CGF);
         CGF.EmitBlock(NonSPMDBB);
         CGF.EmitRuntimeCall(
-            createNVPTXRuntimeFunction(
-                OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
+            OMPBuilder.getOrCreateRuntimeFunction(
+                CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack),
             CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr));
         CGF.EmitBlock(ExitBB);
       } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) {
@@ -2456,14 +2052,15 @@
                   getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0),
               IsInSharedMemory};
           CGF.EmitRuntimeCall(
-              createNVPTXRuntimeFunction(
-                  OMPRTL_NVPTX__kmpc_restore_team_static_memory),
+              OMPBuilder.getOrCreateRuntimeFunction(
+                  CGM.getModule(), OMPRTL___kmpc_restore_team_static_memory),
               Args);
         }
       } else {
-        CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
-                                OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
-                            I->getSecond().GlobalRecordAddr);
+        CGF.EmitRuntimeCall(
+            OMPBuilder.getOrCreateRuntimeFunction(
+                CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack),
+            I->getSecond().GlobalRecordAddr);
       }
     }
   }
@@ -2535,9 +2132,11 @@
     llvm::Value *Args[] = {RTLoc, ThreadID};
 
     NVPTXActionTy Action(
-        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
+        OMPBuilder.getOrCreateRuntimeFunction(
+            CGM.getModule(), OMPRTL___kmpc_serialized_parallel),
         Args,
-        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
+        OMPBuilder.getOrCreateRuntimeFunction(
+            CGM.getModule(), OMPRTL___kmpc_end_serialized_parallel),
         Args);
     RCG.setAction(Action);
     RCG(CGF);
@@ -2553,7 +2152,8 @@
     // Prepare for parallel region. Indicate the outlined function.
     llvm::Value *Args[] = {ID};
     CGF.EmitRuntimeCall(
-        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
+        OMPBuilder.getOrCreateRuntimeFunction(
+            CGM.getModule(), OMPRTL___kmpc_kernel_prepare_parallel),
         Args);
 
     // Create a private scope that will globalize the arguments
@@ -2570,9 +2170,10 @@
       llvm::Value *DataSharingArgs[] = {
           SharedArgsPtr,
           llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())};
-      CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
-                              OMPRTL_NVPTX__kmpc_begin_sharing_variables),
-                          DataSharingArgs);
+      CGF.EmitRuntimeCall(
+          OMPBuilder.getOrCreateRuntimeFunction(
+              CGM.getModule(), OMPRTL___kmpc_begin_sharing_variables),
+          DataSharingArgs);
 
       // Store variable address in a list of references to pass to workers.
       unsigned Idx = 0;
@@ -2606,8 +2207,8 @@
     syncCTAThreads(CGF);
 
     if (!CapturedVars.empty())
-      CGF.EmitRuntimeCall(
-          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_sharing_variables));
+      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+          CGM.getModule(), OMPRTL___kmpc_end_sharing_variables));
 
     // Remember for post-processing in worker loop.
     Work.emplace_back(WFn);
@@ -2631,8 +2232,9 @@
       llvm::BasicBlock *SeqBB = CGF.createBasicBlock(".sequential");
       llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck");
       llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
-      llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
-          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
+      llvm::Value *IsSPMD = Bld.CreateIsNotNull(
+          CGF.EmitNounwindRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+              CGM.getModule(), OMPRTL___kmpc_is_spmd_exec_mode)));
       Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB);
       // There is no need to emit line number for unconditional branch.
       (void)ApplyDebugLocation::CreateEmpty(CGF);
@@ -2640,7 +2242,8 @@
       llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
       llvm::Value *ThreadID = getThreadID(CGF, Loc);
       llvm::Value *PL = CGF.EmitRuntimeCall(
-          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
+          OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
+                                                OMPRTL___kmpc_parallel_level),
           {RTLoc, ThreadID});
       llvm::Value *Res = Bld.CreateIsNotNull(PL);
       Bld.CreateCondBr(Res, SeqBB, MasterBB);
@@ -2704,9 +2307,11 @@
     llvm::Value *Args[] = {RTLoc, ThreadID};
 
     NVPTXActionTy Action(
-        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
+        OMPBuilder.getOrCreateRuntimeFunction(
+            CGM.getModule(), OMPRTL___kmpc_serialized_parallel),
         Args,
-        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
+        OMPBuilder.getOrCreateRuntimeFunction(
+            CGM.getModule(), OMPRTL___kmpc_end_serialized_parallel),
         Args);
     RCG.setAction(Action);
     RCG(CGF);
@@ -2736,9 +2341,9 @@
       llvm::ConstantPointerNull::get(
           cast<llvm::PointerType>(getIdentTyPointerTy())),
       llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)};
-  llvm::CallInst *Call = CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args);
-  Call->setConvergent();
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                          CGM.getModule(), OMPRTL___kmpc_barrier_simple_spmd),
+                      Args);
 }
 
 void CGOpenMPRuntimeGPU::emitBarrierCall(CodeGenFunction &CGF,
@@ -2752,9 +2357,10 @@
   unsigned Flags = getDefaultFlagsForBarriers(Kind);
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
                          getThreadID(CGF, Loc)};
-  llvm::CallInst *Call = CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args);
-  Call->setConvergent();
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                          CGM.getModule(), OMPRTL___kmpc_barrier),
+                      Args);
 }
 
 void CGOpenMPRuntimeGPU::emitCriticalRegion(
@@ -2770,8 +2376,8 @@
   auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
 
   // Get the mask of active threads in the warp.
-  llvm::Value *Mask = CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask));
+  llvm::Value *Mask = CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+      CGM.getModule(), OMPRTL___kmpc_warp_active_thread_mask));
   // Fetch team-local id of the thread.
   llvm::Value *ThreadID = RT.getGPUThreadID(CGF);
 
@@ -2813,8 +2419,9 @@
   // counter variable and returns to the loop.
   CGF.EmitBlock(SyncBB);
   // Reconverge active threads in the warp.
-  (void)CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask);
+  (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                                CGM.getModule(), OMPRTL___kmpc_syncwarp),
+                            Mask);
 
   llvm::Value *IncCounterVal =
       CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1));
@@ -2864,14 +2471,15 @@
   CGBuilderTy &Bld = CGF.Builder;
   CGOpenMPRuntimeGPU &RT =
       *(static_cast<CGOpenMPRuntimeGPU *>(&CGM.getOpenMPRuntime()));
+  llvm::OpenMPIRBuilder &OMPBuilder = RT.getOMPBuilder();
 
   CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType);
   assert(Size.getQuantity() <= 8 &&
          "Unsupported bitwidth in shuffle instruction.");
 
-  OpenMPRTLFunctionNVPTX ShuffleFn = Size.getQuantity() <= 4
-                                         ? OMPRTL_NVPTX__kmpc_shuffle_int32
-                                         : OMPRTL_NVPTX__kmpc_shuffle_int64;
+  RuntimeFunction ShuffleFn = Size.getQuantity() <= 4
+                                  ? OMPRTL___kmpc_shuffle_int32
+                                  : OMPRTL___kmpc_shuffle_int64;
 
   // Cast all types to 32- or 64-bit values before calling shuffle routines.
   QualType CastTy = CGF.getContext().getIntTypeForBitwidth(
@@ -2881,7 +2489,8 @@
       Bld.CreateIntCast(RT.getGPUWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true);
 
   llvm::Value *ShuffledVal = CGF.EmitRuntimeCall(
-      RT.createNVPTXRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize});
+      OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), ShuffleFn),
+      {ElemCast, Offset, WarpSize});
 
   return castValueToType(CGF, ShuffledVal, CastTy, ElemType, Loc);
 }
@@ -4391,8 +4000,8 @@
                            InterWarpCopyFn};
 
     Res = CGF.EmitRuntimeCall(
-        createNVPTXRuntimeFunction(
-            OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2),
+        OMPBuilder.getOrCreateRuntimeFunction(
+            CGM.getModule(), OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2),
         Args);
   } else {
     assert(TeamsReduction && "expected teams reduction.");
@@ -4441,8 +4050,8 @@
         BufferToGlobalRedFn};
 
     Res = CGF.EmitRuntimeCall(
-        createNVPTXRuntimeFunction(
-            OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2),
+        OMPBuilder.getOrCreateRuntimeFunction(
+            CGM.getModule(), OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2),
         Args);
   }
 
@@ -4477,7 +4086,8 @@
   RegionCodeGenTy RCG(CodeGen);
   NVPTXActionTy Action(
       nullptr, llvm::None,
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait),
+      OMPBuilder.getOrCreateRuntimeFunction(
+          CGM.getModule(), OMPRTL___kmpc_nvptx_end_reduce_nowait),
       EndArgs);
   RCG.setAction(Action);
   RCG(CGF);
@@ -4488,7 +4098,7 @@
 
 const VarDecl *
 CGOpenMPRuntimeGPU::translateParameter(const FieldDecl *FD,
-                                         const VarDecl *NativeParam) const {
+                                       const VarDecl *NativeParam) const {
   if (!NativeParam->getType()->isReferenceType())
     return NativeParam;
   QualType ArgType = NativeParam->getType();
@@ -4638,9 +4248,9 @@
       CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args");
   llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer();
   llvm::Value *DataSharingArgs[] = {GlobalArgsPtr};
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_get_shared_variables),
-      DataSharingArgs);
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                          CGM.getModule(), OMPRTL___kmpc_get_shared_variables),
+                      DataSharingArgs);
 
   // Retrieve the shared variables from the list of references returned
   // by the runtime. Pass the variables to the outlined function.
Index: clang/lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntime.h
+++ clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -306,6 +306,9 @@
   CodeGenModule &CGM;
   StringRef FirstSeparator, Separator;
 
+  /// An OpenMP-IR-Builder instance.
+  llvm::OpenMPIRBuilder OMPBuilder;
+
   /// Constructor allowing to redefine the name separator for the variables.
   explicit CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
                            StringRef Separator);
@@ -386,8 +389,6 @@
   llvm::Value *getCriticalRegionLock(StringRef CriticalName);
 
 private:
-  /// An OpenMP-IR-Builder instance.
-  llvm::OpenMPIRBuilder OMPBuilder;
 
   /// Map for SourceLocation and OpenMP runtime library debug locations.
   typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDebugLocMapTy;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to