================
@@ -0,0 +1,215 @@
+// RUN: fir-opt --split-input-file --omp-stack-to-shared %s | FileCheck %s
+
+module attributes {omp.is_target_device = true} {
+ omp.declare_reduction @add_reduction_i32 : i32 init {
+ ^bb0(%arg0: i32):
+ %c0_i32 = arith.constant 0 : i32
+ omp.yield(%c0_i32 : i32)
+ } combiner {
+ ^bb0(%arg0: i32, %arg1: i32):
+ %0 = arith.addi %arg0, %arg1 : i32
+ omp.yield(%0 : i32)
+ }
+
+ omp.private {type = private} @privatizer_i32 : i32
+ omp.private {type = firstprivate} @firstprivatizer_i32 : i32 copy {
+ ^bb0(%arg0: i32, %arg1: i32):
+ omp.yield(%arg0 : i32)
+ }
+
+ // Verify that target device functions are searched for allocas shared across
+ // threads of a parallel region.
+ //
+ // Also ensure that all fir.alloca information is adequately forwarded to the
+ // new allocation, that uses of the allocation through hlfir.declare are
+ // detected and that only the expected types of uses (parallel reduction and
+ // non-private uses inside of a parallel region) are replaced.
+ // CHECK-LABEL: func.func @standalone_func
+ func.func @standalone_func(%lb: i32, %ub: i32, %step: i32) attributes
{omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause
= (to)>} {
+ // CHECK: %[[ALLOC_0:.*]] = omp.alloc_shared_mem i32 {uniq_name = "x"} :
!fir.ref<i32>
+ %0 = fir.alloca i32 {uniq_name = "x"}
+ %c = arith.constant 1 : index
+ // CHECK: %[[ALLOC_1:.*]] = omp.alloc_shared_mem !fir.char<1,?>(%[[C:.*]]
: index), %[[C]] {bindc_name = "y", uniq_name = "y"} : !fir.ref<!fir.char<1,?>>
+ %1 = fir.alloca !fir.char<1,?>(%c : index), %c {bindc_name = "y",
uniq_name = "y"}
+ // CHECK: %{{.*}}:2 = hlfir.declare %[[ALLOC_1]] typeparams %[[C]]
{uniq_name = "y"} : (!fir.ref<!fir.char<1,?>>, index) -> (!fir.boxchar<1>,
!fir.ref<!fir.char<1,?>>)
+ %decl:2 = hlfir.declare %1 typeparams %c {uniq_name = "y"} :
(!fir.ref<!fir.char<1,?>>, index) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+ // CHECK: %{{.*}} = fir.alloca i32 {uniq_name = "z"}
+ %2 = fir.alloca i32 {uniq_name = "z"}
+ // CHECK: %[[ALLOC_2:.*]] = omp.alloc_shared_mem i32 {uniq_name = "a"} :
!fir.ref<i32>
+ %3 = fir.alloca i32 {uniq_name = "a"}
+ // CHECK: %{{.*}} = fir.alloca i32 {uniq_name = "b"}
+ %4 = fir.alloca i32 {uniq_name = "b"}
+ omp.parallel reduction(@add_reduction_i32 %0 -> %arg0 : !fir.ref<i32>) {
+ // CHECK: %{{.*}} = fir.alloca i32 {uniq_name = "c"}
+ %5 = fir.alloca i32 {uniq_name = "c"}
+ %6:2 = fir.unboxchar %decl#0 : (!fir.boxchar<1>) ->
(!fir.ref<!fir.char<1,?>>, index)
+ omp.wsloop private(@privatizer_i32 %2 -> %arg1, @firstprivatizer_i32 %3
-> %arg2 : !fir.ref<i32>, !fir.ref<i32>) {
+ omp.loop_nest (%arg3) : i32 = (%lb) to (%ub) inclusive step (%step) {
+ %7 = fir.load %5 : !fir.ref<i32>
+ omp.yield
+ }
+ }
+ omp.terminator
+ }
+ %5 = fir.load %4 : !fir.ref<i32>
+ // CHECK: omp.free_shared_mem %[[ALLOC_0]] : !fir.ref<i32>
+ // CHECK-NEXT: omp.free_shared_mem %[[ALLOC_1]] : !fir.ref<!fir.char<1,?>>
+ // CHECK-NEXT: omp.free_shared_mem %[[ALLOC_2]] : !fir.ref<i32>
+ // CHECK-NEXT: return
+ return
+ }
+
+ // Verify that generic target regions are searched for allocas shared across
+ // threads of a parallel region.
+ // CHECK-LABEL: func.func @target_generic
+ func.func @target_generic() {
+ // CHECK: omp.target
+ omp.target {
+ %c = arith.constant 0 : i32
+ // CHECK: %[[ALLOC_0:.*]] = omp.alloc_shared_mem i32 {uniq_name = "x"} :
!fir.ref<i32>
+ %0 = fir.alloca i32 {uniq_name = "x"}
----------------
Meinersbur wrote:
`omp.alloc_shared_mem` calls `__kmpc_alloc_shared`, i.e. teams-private memory.
But should't `fir.alloca` here be accessible from all teams, i.e. global in the
sense of `llvm_omp_target_alloc_device`/`TARGET_ALLOC_DEVICE`?
https://github.com/llvm/llvm-project/pull/161863
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits