[PATCH] D106033: [OpenMP] Folding threadLimit and numThreads when single value in kernels
This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rG5ab6aedda9d9: [OpenMP] Folding threadLimit and numThreads when single value in kernels (authored by Jose M Monsalve Diaz, committed by tianshilei1992). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D106033/new/ https://reviews.llvm.org/D106033 Files: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def llvm/lib/Transforms/IPO/OpenMPOpt.cpp llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll openmp/libomptarget/deviceRTLs/target_interface.h Index: openmp/libomptarget/deviceRTLs/target_interface.h === --- openmp/libomptarget/deviceRTLs/target_interface.h +++ openmp/libomptarget/deviceRTLs/target_interface.h @@ -18,8 +18,8 @@ // Calls to the NVPTX layer (assuming 1D layout) EXTERN int __kmpc_get_hardware_thread_id_in_block(); EXTERN int GetBlockIdInKernel(); -EXTERN int __kmpc_get_hardware_num_blocks(); -EXTERN int __kmpc_get_hardware_num_threads_in_block(); +EXTERN NOINLINE int __kmpc_get_hardware_num_blocks(); +EXTERN NOINLINE int __kmpc_get_hardware_num_threads_in_block(); EXTERN unsigned GetWarpId(); EXTERN unsigned GetWarpSize(); EXTERN unsigned GetLaneId(); Index: llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll === --- /dev/null +++ llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals +; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s +target triple = "nvptx64" + +%struct.ident_t = type { i32, i32, i32, i32, i8* } + +@kernel0_exec_mode = weak constant i8 1 + +@G = external global i32 +;. +; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32 +;. +define weak void @kernel0() #0 { +; CHECK-LABEL: define {{[^@]+}}@kernel0() +; CHECK: #[[ATTR0:[0-9]+]] { +; CHECK-NEXT:[[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) +; CHECK-NEXT:call void @helper0() +; CHECK-NEXT:call void @helper1() +; CHECK-NEXT:call void @helper2() +; CHECK-NEXT:call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) +; CHECK-NEXT:ret void +; + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) + call void @helper0() + call void @helper1() + call void @helper2() + call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) + ret void +} + +@kernel1_exec_mode = weak constant i8 1 + +define weak void @kernel1() #0 { +; CHECK-LABEL: define {{[^@]+}}@kernel1() +; CHECK: #[[ATTR0]] { +; CHECK-NEXT:[[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) +; CHECK-NEXT:call void @helper1() +; CHECK-NEXT:call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) +; CHECK-NEXT:ret void +; + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) + call void @helper1() + call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) + ret void +} + +@kernel2_exec_mode = weak constant i8 1 + +define weak void @kernel2() #0 { +; CHECK-LABEL: define {{[^@]+}}@kernel2() +; CHECK: #[[ATTR0]] { +; CHECK-NEXT:[[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false) +; CHECK-NEXT:call void @helper0() +; CHECK-NEXT:call void @helper1() +; CHECK-NEXT:call void @helper2() +; CHECK-NEXT:call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) +; CHECK-NEXT:ret void +; + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false) + call void @helper0() + call void @helper1() + call void @helper2() + call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) + ret void +} + +define internal void @helper0() { +; CHECK-LABEL: define {{[^@]+}}@helper0() {{#[0-9]+}} { +; CHECK-NEXT:store i32 666, i32* @G, align 4 +; CHECK-NEXT:ret void +; + %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block() + store i32 %threadLimit, i32* @G + ret void +} + +define internal void @helper1() { +; CHECK-LABEL: define {{[^@]+}}@helper1() {{#[0-9]+}} { +; CHECK-NEXT:br label [[F:%.*]] +; CHECK: t: +; CHECK-NEXT:unreachable +; CHECK: f: +; CHECK-NEXT:ret void +; + %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block() + %c = icmp eq i32 %threadLimit, 666 + br i1 %c, label %f, label %t +t: + call void @helper0() + ret void +f: + ret void +} + +define internal void @helper2() { +; CHECK-LABEL: define {{[^@]+}}@helper2() {{#[0-9]+}} { +; CHECK-NEXT:store i32 666, i32* @G +; CHECK-NEXT:
[PATCH] D106033: [OpenMP] Folding threadLimit and numThreads when single value in kernels
josemonsalve2 updated this revision to Diff 362248. josemonsalve2 added a comment. Rebasing to main this time for real Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D106033/new/ https://reviews.llvm.org/D106033 Files: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def llvm/lib/Transforms/IPO/OpenMPOpt.cpp llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll openmp/libomptarget/deviceRTLs/target_interface.h Index: openmp/libomptarget/deviceRTLs/target_interface.h === --- openmp/libomptarget/deviceRTLs/target_interface.h +++ openmp/libomptarget/deviceRTLs/target_interface.h @@ -18,8 +18,8 @@ // Calls to the NVPTX layer (assuming 1D layout) EXTERN int __kmpc_get_hardware_thread_id_in_block(); EXTERN int GetBlockIdInKernel(); -EXTERN int __kmpc_get_hardware_num_blocks(); -EXTERN int __kmpc_get_hardware_num_threads_in_block(); +EXTERN NOINLINE int __kmpc_get_hardware_num_blocks(); +EXTERN NOINLINE int __kmpc_get_hardware_num_threads_in_block(); EXTERN unsigned GetWarpId(); EXTERN unsigned GetWarpSize(); EXTERN unsigned GetLaneId(); Index: llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll === --- /dev/null +++ llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals +; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s +target triple = "nvptx64" + +%struct.ident_t = type { i32, i32, i32, i32, i8* } + +@kernel0_exec_mode = weak constant i8 1 + +@G = external global i32 +;. +; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32 +;. +define weak void @kernel0() #0 { +; CHECK-LABEL: define {{[^@]+}}@kernel0() +; CHECK: #[[ATTR0:[0-9]+]] { +; CHECK-NEXT:[[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) +; CHECK-NEXT:call void @helper0() +; CHECK-NEXT:call void @helper1() +; CHECK-NEXT:call void @helper2() +; CHECK-NEXT:call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) +; CHECK-NEXT:ret void +; + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) + call void @helper0() + call void @helper1() + call void @helper2() + call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) + ret void +} + +@kernel1_exec_mode = weak constant i8 1 + +define weak void @kernel1() #0 { +; CHECK-LABEL: define {{[^@]+}}@kernel1() +; CHECK: #[[ATTR0]] { +; CHECK-NEXT:[[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) +; CHECK-NEXT:call void @helper1() +; CHECK-NEXT:call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) +; CHECK-NEXT:ret void +; + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) + call void @helper1() + call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) + ret void +} + +@kernel2_exec_mode = weak constant i8 1 + +define weak void @kernel2() #0 { +; CHECK-LABEL: define {{[^@]+}}@kernel2() +; CHECK: #[[ATTR0]] { +; CHECK-NEXT:[[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false) +; CHECK-NEXT:call void @helper0() +; CHECK-NEXT:call void @helper1() +; CHECK-NEXT:call void @helper2() +; CHECK-NEXT:call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) +; CHECK-NEXT:ret void +; + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false) + call void @helper0() + call void @helper1() + call void @helper2() + call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) + ret void +} + +define internal void @helper0() { +; CHECK-LABEL: define {{[^@]+}}@helper0() {{#[0-9]+}} { +; CHECK-NEXT:store i32 666, i32* @G, align 4 +; CHECK-NEXT:ret void +; + %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block() + store i32 %threadLimit, i32* @G + ret void +} + +define internal void @helper1() { +; CHECK-LABEL: define {{[^@]+}}@helper1() {{#[0-9]+}} { +; CHECK-NEXT:br label [[F:%.*]] +; CHECK: t: +; CHECK-NEXT:unreachable +; CHECK: f: +; CHECK-NEXT:ret void +; + %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block() + %c = icmp eq i32 %threadLimit, 666 + br i1 %c, label %f, label %t +t: + call void @helper0() + ret void +f: + ret void +} + +define internal void @helper2() { +; CHECK-LABEL: define {{[^@]+}}@helper2() {{#[0-9]+}} { +; CHECK-NEXT:store i32 666, i32* @G +; CHECK-NEXT:ret void +; + %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block() + store i32 %threadLimit, i32* @G + ret void +} + +declare i32 @__kmpc_get_hardware_num_threads_in_block() +declare i32
[PATCH] D106033: [OpenMP] Folding threadLimit and numThreads when single value in kernels
josemonsalve2 updated this revision to Diff 362234. josemonsalve2 added a comment. Resync again Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D106033/new/ https://reviews.llvm.org/D106033 Files: clang/test/OpenMP/declare_target_codegen_globalization.cpp clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp clang/test/OpenMP/nvptx_target_codegen.cpp clang/test/OpenMP/nvptx_target_parallel_codegen.cpp clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp clang/test/OpenMP/target_map_codegen_03.cpp clang/test/OpenMP/target_parallel_codegen.cpp clang/test/OpenMP/target_parallel_for_codegen.cpp clang/test/OpenMP/target_parallel_for_simd_codegen.cpp clang/test/OpenMP/target_parallel_if_codegen.cpp clang/test/OpenMP/target_parallel_num_threads_codegen.cpp clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp llvm/include/llvm/Frontend/OpenMP/OMPKinds.def llvm/lib/Transforms/IPO/OpenMPOpt.cpp llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll openmp/libomptarget/deviceRTLs/target_interface.h ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D106033: [OpenMP] Folding threadLimit and numThreads when single value in kernels
josemonsalve2 updated this revision to Diff 362226. josemonsalve2 added a comment. Herald added a project: clang. Herald added a subscriber: cfe-commits. Sync to main Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D106033/new/ https://reviews.llvm.org/D106033 Files: clang/test/OpenMP/declare_target_codegen_globalization.cpp clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp clang/test/OpenMP/nvptx_target_codegen.cpp clang/test/OpenMP/nvptx_target_parallel_codegen.cpp clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp clang/test/OpenMP/target_map_codegen_03.cpp clang/test/OpenMP/target_parallel_codegen.cpp clang/test/OpenMP/target_parallel_for_codegen.cpp clang/test/OpenMP/target_parallel_for_simd_codegen.cpp clang/test/OpenMP/target_parallel_if_codegen.cpp clang/test/OpenMP/target_parallel_num_threads_codegen.cpp clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp llvm/include/llvm/Frontend/OpenMP/OMPKinds.def llvm/lib/Transforms/IPO/OpenMPOpt.cpp llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll openmp/libomptarget/deviceRTLs/target_interface.h ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits