Author: Joseph Huber Date: 2022-02-21T13:49:10-08:00 New Revision: 9bf8897c84f797c98ec4c2e1475f5f1539f8277b
URL: https://github.com/llvm/llvm-project/commit/9bf8897c84f797c98ec4c2e1475f5f1539f8277b DIFF: https://github.com/llvm/llvm-project/commit/9bf8897c84f797c98ec4c2e1475f5f1539f8277b.diff LOG: [OpenMP] Add RTL function to externalization RAII This patch adds the '_kmpc_get_hardware_num_threads_in_block' OpenMP RTL function to the externalization RAII struct. This was getting optimized out and then being replaced with an undefined value once added back in, causing bugs for complex reductions. Fixes #53909. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D120076 (cherry picked from commit 74cacf212bb31f8ba837b7eb2434258dd79eaccb) Added: Modified: llvm/lib/Transforms/IPO/OpenMPOpt.cpp llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll Removed: ################################################################################ diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 520b6ebf9e74f..5113c0c67acc6 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2119,6 +2119,8 @@ struct OpenMPOpt { OMPRTL___kmpc_barrier_simple_generic); ExternalizationRAII ThreadId(OMPInfoCache, OMPRTL___kmpc_get_hardware_thread_id_in_block); + ExternalizationRAII NumThreads( + OMPInfoCache, OMPRTL___kmpc_get_hardware_num_threads_in_block); ExternalizationRAII WarpSize(OMPInfoCache, OMPRTL___kmpc_get_warp_size); registerAAs(IsModulePass); diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll index b72031a9b68c0..57eaebc7e141c 100644 --- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll +++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll @@ -178,7 +178,16 @@ entry: ret void } -declare i32 @__kmpc_get_hardware_num_threads_in_block() +define internal i32 @__kmpc_get_hardware_num_threads_in_block() { +; CHECK-LABEL: define {{[^@]+}}@__kmpc_get_hardware_num_threads_in_block +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: [[RET:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block_dummy() +; CHECK-NEXT: ret i32 [[RET]] +; + %ret = call i32 @__kmpc_get_hardware_num_threads_in_block_dummy() + ret i32 %ret +} +declare i32 @__kmpc_get_hardware_num_threads_in_block_dummy() declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext, i1 zeroext) #1 declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8, i1 zeroext) #1 declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
