[PATCH] D106033: [OpenMP] Folding threadLimit and numThreads when single value in kernels

2021-07-27 Thread Shilei Tian via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG5ab6aedda9d9: [OpenMP] Folding threadLimit and numThreads 
when single value in kernels (authored by Jose M Monsalve Diaz 
, committed by tianshilei1992).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106033/new/

https://reviews.llvm.org/D106033

Files:
  llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
  llvm/lib/Transforms/IPO/OpenMPOpt.cpp
  llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
  openmp/libomptarget/deviceRTLs/target_interface.h

Index: openmp/libomptarget/deviceRTLs/target_interface.h
===
--- openmp/libomptarget/deviceRTLs/target_interface.h
+++ openmp/libomptarget/deviceRTLs/target_interface.h
@@ -18,8 +18,8 @@
 // Calls to the NVPTX layer (assuming 1D layout)
 EXTERN int __kmpc_get_hardware_thread_id_in_block();
 EXTERN int GetBlockIdInKernel();
-EXTERN int __kmpc_get_hardware_num_blocks();
-EXTERN int __kmpc_get_hardware_num_threads_in_block();
+EXTERN NOINLINE int __kmpc_get_hardware_num_blocks();
+EXTERN NOINLINE int __kmpc_get_hardware_num_threads_in_block();
 EXTERN unsigned GetWarpId();
 EXTERN unsigned GetWarpSize();
 EXTERN unsigned GetLaneId();
Index: llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
===
--- /dev/null
+++ llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
@@ -0,0 +1,128 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
+; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
+target triple = "nvptx64"
+
+%struct.ident_t = type { i32, i32, i32, i32, i8* }
+
+@kernel0_exec_mode = weak constant i8 1
+
+@G = external global i32
+;.
+; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32
+;.
+define weak void @kernel0() #0 {
+; CHECK-LABEL: define {{[^@]+}}@kernel0()
+; CHECK: #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:[[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
+; CHECK-NEXT:call void @helper0()
+; CHECK-NEXT:call void @helper1()
+; CHECK-NEXT:call void @helper2()
+; CHECK-NEXT:call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false)
+; CHECK-NEXT:ret void
+;
+  %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
+  call void @helper0()
+  call void @helper1()
+  call void @helper2()
+  call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false)
+  ret void
+}
+
+@kernel1_exec_mode = weak constant i8 1
+
+define weak void @kernel1() #0 {
+; CHECK-LABEL: define {{[^@]+}}@kernel1()
+; CHECK: #[[ATTR0]] {
+; CHECK-NEXT:[[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
+; CHECK-NEXT:call void @helper1()
+; CHECK-NEXT:call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
+; CHECK-NEXT:ret void
+;
+  %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
+  call void @helper1()
+  call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
+  ret void
+}
+
+@kernel2_exec_mode = weak constant i8 1
+
+define weak void @kernel2() #0 {
+; CHECK-LABEL: define {{[^@]+}}@kernel2()
+; CHECK: #[[ATTR0]] {
+; CHECK-NEXT:[[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false)
+; CHECK-NEXT:call void @helper0()
+; CHECK-NEXT:call void @helper1()
+; CHECK-NEXT:call void @helper2()
+; CHECK-NEXT:call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
+; CHECK-NEXT:ret void
+;
+  %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false)
+  call void @helper0()
+  call void @helper1()
+  call void @helper2()
+  call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
+  ret void
+}
+
+define internal void @helper0() {
+; CHECK-LABEL: define {{[^@]+}}@helper0() {{#[0-9]+}} {
+; CHECK-NEXT:store i32 666, i32* @G, align 4
+; CHECK-NEXT:ret void
+;
+  %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block()
+  store i32 %threadLimit, i32* @G
+  ret void
+}
+
+define internal void @helper1() {
+; CHECK-LABEL: define {{[^@]+}}@helper1() {{#[0-9]+}} {
+; CHECK-NEXT:br label [[F:%.*]]
+; CHECK:   t:
+; CHECK-NEXT:unreachable
+; CHECK:   f:
+; CHECK-NEXT:ret void
+;
+  %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block()
+  %c = icmp eq i32 %threadLimit, 666
+  br i1 %c, label %f, label %t
+t:
+  call void @helper0()
+  ret void
+f:
+  ret void
+}
+
+define internal void @helper2() {
+; CHECK-LABEL: define {{[^@]+}}@helper2() {{#[0-9]+}} {
+; CHECK-NEXT:store i32 666, i32* @G
+; CHECK-NEXT:  

[PATCH] D106033: [OpenMP] Folding threadLimit and numThreads when single value in kernels

2021-07-27 Thread Jose Manuel Monsalve Diaz via Phabricator via cfe-commits
josemonsalve2 updated this revision to Diff 362248.
josemonsalve2 added a comment.

Rebasing to main this time for real


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106033/new/

https://reviews.llvm.org/D106033

Files:
  llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
  llvm/lib/Transforms/IPO/OpenMPOpt.cpp
  llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
  openmp/libomptarget/deviceRTLs/target_interface.h

Index: openmp/libomptarget/deviceRTLs/target_interface.h
===
--- openmp/libomptarget/deviceRTLs/target_interface.h
+++ openmp/libomptarget/deviceRTLs/target_interface.h
@@ -18,8 +18,8 @@
 // Calls to the NVPTX layer (assuming 1D layout)
 EXTERN int __kmpc_get_hardware_thread_id_in_block();
 EXTERN int GetBlockIdInKernel();
-EXTERN int __kmpc_get_hardware_num_blocks();
-EXTERN int __kmpc_get_hardware_num_threads_in_block();
+EXTERN NOINLINE int __kmpc_get_hardware_num_blocks();
+EXTERN NOINLINE int __kmpc_get_hardware_num_threads_in_block();
 EXTERN unsigned GetWarpId();
 EXTERN unsigned GetWarpSize();
 EXTERN unsigned GetLaneId();
Index: llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
===
--- /dev/null
+++ llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
@@ -0,0 +1,128 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
+; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
+target triple = "nvptx64"
+
+%struct.ident_t = type { i32, i32, i32, i32, i8* }
+
+@kernel0_exec_mode = weak constant i8 1
+
+@G = external global i32
+;.
+; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32
+;.
+define weak void @kernel0() #0 {
+; CHECK-LABEL: define {{[^@]+}}@kernel0()
+; CHECK: #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:[[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
+; CHECK-NEXT:call void @helper0()
+; CHECK-NEXT:call void @helper1()
+; CHECK-NEXT:call void @helper2()
+; CHECK-NEXT:call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false)
+; CHECK-NEXT:ret void
+;
+  %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
+  call void @helper0()
+  call void @helper1()
+  call void @helper2()
+  call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false)
+  ret void
+}
+
+@kernel1_exec_mode = weak constant i8 1
+
+define weak void @kernel1() #0 {
+; CHECK-LABEL: define {{[^@]+}}@kernel1()
+; CHECK: #[[ATTR0]] {
+; CHECK-NEXT:[[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
+; CHECK-NEXT:call void @helper1()
+; CHECK-NEXT:call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
+; CHECK-NEXT:ret void
+;
+  %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false)
+  call void @helper1()
+  call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
+  ret void
+}
+
+@kernel2_exec_mode = weak constant i8 1
+
+define weak void @kernel2() #0 {
+; CHECK-LABEL: define {{[^@]+}}@kernel2()
+; CHECK: #[[ATTR0]] {
+; CHECK-NEXT:[[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false)
+; CHECK-NEXT:call void @helper0()
+; CHECK-NEXT:call void @helper1()
+; CHECK-NEXT:call void @helper2()
+; CHECK-NEXT:call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
+; CHECK-NEXT:ret void
+;
+  %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false)
+  call void @helper0()
+  call void @helper1()
+  call void @helper2()
+  call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false)
+  ret void
+}
+
+define internal void @helper0() {
+; CHECK-LABEL: define {{[^@]+}}@helper0() {{#[0-9]+}} {
+; CHECK-NEXT:store i32 666, i32* @G, align 4
+; CHECK-NEXT:ret void
+;
+  %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block()
+  store i32 %threadLimit, i32* @G
+  ret void
+}
+
+define internal void @helper1() {
+; CHECK-LABEL: define {{[^@]+}}@helper1() {{#[0-9]+}} {
+; CHECK-NEXT:br label [[F:%.*]]
+; CHECK:   t:
+; CHECK-NEXT:unreachable
+; CHECK:   f:
+; CHECK-NEXT:ret void
+;
+  %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block()
+  %c = icmp eq i32 %threadLimit, 666
+  br i1 %c, label %f, label %t
+t:
+  call void @helper0()
+  ret void
+f:
+  ret void
+}
+
+define internal void @helper2() {
+; CHECK-LABEL: define {{[^@]+}}@helper2() {{#[0-9]+}} {
+; CHECK-NEXT:store i32 666, i32* @G
+; CHECK-NEXT:ret void
+;
+  %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block()
+  store i32 %threadLimit, i32* @G
+  ret void
+}
+
+declare i32 @__kmpc_get_hardware_num_threads_in_block()
+declare i32

[PATCH] D106033: [OpenMP] Folding threadLimit and numThreads when single value in kernels

2021-07-27 Thread Jose Manuel Monsalve Diaz via Phabricator via cfe-commits
josemonsalve2 updated this revision to Diff 362234.
josemonsalve2 added a comment.

Resync again


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106033/new/

https://reviews.llvm.org/D106033

Files:
  clang/test/OpenMP/declare_target_codegen_globalization.cpp
  clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp
  clang/test/OpenMP/nvptx_target_codegen.cpp
  clang/test/OpenMP/nvptx_target_parallel_codegen.cpp
  clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
  clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
  clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
  clang/test/OpenMP/target_map_codegen_03.cpp
  clang/test/OpenMP/target_parallel_codegen.cpp
  clang/test/OpenMP/target_parallel_for_codegen.cpp
  clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
  clang/test/OpenMP/target_parallel_if_codegen.cpp
  clang/test/OpenMP/target_parallel_num_threads_codegen.cpp
  clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
  clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp
  clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp
  clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp
  clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp
  clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp
  clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
  llvm/lib/Transforms/IPO/OpenMPOpt.cpp
  llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
  openmp/libomptarget/deviceRTLs/target_interface.h

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106033: [OpenMP] Folding threadLimit and numThreads when single value in kernels

2021-07-27 Thread Jose Manuel Monsalve Diaz via Phabricator via cfe-commits
josemonsalve2 updated this revision to Diff 362226.
josemonsalve2 added a comment.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Sync to main


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106033/new/

https://reviews.llvm.org/D106033

Files:
  clang/test/OpenMP/declare_target_codegen_globalization.cpp
  clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp
  clang/test/OpenMP/nvptx_target_codegen.cpp
  clang/test/OpenMP/nvptx_target_parallel_codegen.cpp
  clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
  clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
  clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
  clang/test/OpenMP/target_map_codegen_03.cpp
  clang/test/OpenMP/target_parallel_codegen.cpp
  clang/test/OpenMP/target_parallel_for_codegen.cpp
  clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
  clang/test/OpenMP/target_parallel_if_codegen.cpp
  clang/test/OpenMP/target_parallel_num_threads_codegen.cpp
  clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
  clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp
  clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp
  clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp
  clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp
  clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp
  clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
  llvm/lib/Transforms/IPO/OpenMPOpt.cpp
  llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
  openmp/libomptarget/deviceRTLs/target_interface.h

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits