https://gcc.gnu.org/g:e361f9db605ec2601de633e302b4a77ff8764948
commit e361f9db605ec2601de633e302b4a77ff8764948 Author: Cesar Philippidis <ce...@codesourcery.com> Date: Thu Apr 17 14:10:11 2025 +0000 Fortran "declare create"/allocate support for OpenACC This patch incorporates these commits from OG14 branch: 65be1389eeda9b3b97f6587721215c3f31bd7f98 9d43e819d88f97c7ade7f8c95c35ea3464ea7771 f2cf2b994c4d8c871fad5502ffb9aaee9ea4f4e0 2770ce41615557e595065ce0c5db71e9f3d82b0a a29e58f4b314862a72730119f85e9125879abf0b ffd990543f805ed448aaa355d190f37103f8f1f0 gcc/ChangeLog * gimplify.cc (omp_group_base): Handle GOMP_MAP_DECLARE_ALLOCATE and GOMP_MAP_DECLARE_DEALLOCATE. (gimplify_adjust_omp_clauses): Likewise. * omp-low.cc (scan_sharing_clauses): Update handling of OpenACC declare create, declare copyin and declare deviceptr to have local lifetimes. (convert_to_firstprivate_int): Handle pointer types. (convert_from_firstprivate_int): Likewise. Create local storage for the values being pointed to. Add new orig_type argument. Use VIEW_CONVERT also for vectors. (lower_omp_target): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. Add orig_type argument to convert_from_firstprivate_int call. Allow pointer types with GOMP_MAP_FIRSTPRIVATE_INT. Don't privatize firstprivate VLAs. * tree-pretty-print.cc (dump_omp_clause): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. gcc/fortran/ChangeLog * gfortran.h (enum gfc_omp_map_op): Add OMP_MAP_DECLARE_ALLOCATE, OMP_MAP_DECLARE_DEALLOCATE. (gfc_omp_clauses): Add update_allocatable. * trans-array.cc (gfc_array_allocate): Call gfc_trans_oacc_declare_allocate for decls that have oacc_declare_create attribute set. * trans-decl.cc (find_module_oacc_declare_clauses): Relax oacc_declare_create to OMP_MAP_ALLOC, and oacc_declare_copyin to OMP_MAP_TO, in order to match OpenACC 2.5 semantics. * trans-openmp.cc (gfc_omp_check_optional_argument): Handle non-decl case. (gfc_trans_omp_clauses): Use GOMP_MAP_ALWAYS_POINTER (for update directive) or GOMP_MAP_FIRSTPRIVATE_POINTER (otherwise) for allocatable scalar decls. Handle OMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE} clauses. (gfc_trans_oacc_executable_directive): Use GOMP_MAP_ALWAYS_POINTER for allocatable scalar data clauses inside acc update directives. (gfc_trans_oacc_declare_allocate): New function. * trans-stmt.cc (gfc_trans_allocate): Call gfc_trans_oacc_declare_allocate for decls with oacc_declare_create attribute set. (gfc_trans_deallocate): Likewise. * trans.h (gfc_trans_oacc_declare_allocate): Declare. gcc/testsuite/ChangeLog * gfortran.dg/goacc/declare-allocatable-1.f90: New test. * gfortran.dg/goacc/declare-3.f95: Adjust expected dump output. include/ChangeLog * gomp-constants.h (enum gomp_map_kind): Define GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE} and GOMP_MAP_FLAG_SPECIAL_4. libgomp/ChangeLog * libgomp.h (gomp_acc_declare_allocate): Remove prototype. * oacc-mem.c (gomp_acc_declare_allocate): New function. (find_group_last): Handle GOMP_MAP_DECLARE_ALLOCATE and GOMP_MAP_DECLARE_DEALLOCATE groupings. (goacc_enter_data_internal): Fix kind check for GOMP_MAP_DECLARE_ALLOCATE. Pass new pointer argument to gomp_acc_declare_allocate. Unlock mutex before calling gomp_acc_declare_allocate and relock it afterwards. (goacc_exit_data_internal): Unlock device mutex around gomp_acc_declare_allocate call. Pass new pointer argument. Handle group pointer mapping for deallocate. * testsuite/libgomp.oacc-fortran/allocatable-scalar.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-1-directive.f90: Adjust. * testsuite/libgomp.oacc-fortran/declare-allocatable-1-runtime.f90: Likewise. * testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-directive.f90: Adjust. * testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-runtime.f90: Likewise. * testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1.f90: New test. Co-Authored-By: Julian Brown <jul...@codesourcery.com> Co-Authored-By: Kwok Cheung Yeung <k...@codesourcery.com> Co-Authored-By: Tobias Burnus <tob...@codesourcery.com> Co-Authored-By: Thomas Schwinge <tho...@codesourcery.com> Co-Authored-By: Paul-Antoine Arras <par...@baylibre.com> Diff: --- gcc/fortran/gfortran.h | 6 +- gcc/fortran/trans-array.cc | 4 + gcc/fortran/trans-decl.cc | 4 +- gcc/fortran/trans-openmp.cc | 59 ++- gcc/fortran/trans-stmt.cc | 12 + gcc/fortran/trans.h | 1 + gcc/gimplify.cc | 29 +- gcc/omp-low.cc | 63 +++- gcc/testsuite/gfortran.dg/goacc/declare-3.f95 | 3 +- .../gfortran.dg/goacc/declare-allocatable-1.f90 | 25 ++ gcc/tree-pretty-print.cc | 6 + include/gomp-constants.h | 5 + libgomp/libgomp.h | 2 - libgomp/oacc-mem.c | 78 +++- .../libgomp.oacc-fortran/allocatable-scalar.f90 | 33 ++ .../declare-allocatable-1-directive.f90 | 7 +- .../declare-allocatable-1-runtime.f90 | 7 +- .../libgomp.oacc-fortran/declare-allocatable-1.f90 | 6 +- .../libgomp.oacc-fortran/declare-allocatable-2.f90 | 48 +++ .../libgomp.oacc-fortran/declare-allocatable-3.f90 | 219 +++++++++++ .../libgomp.oacc-fortran/declare-allocatable-4.f90 | 66 ++++ ...re-allocatable-array_descriptor-1-directive.f90 | 41 +-- ...lare-allocatable-array_descriptor-1-runtime.f90 | 107 ++++-- .../declare-allocatable-array_descriptor-1.f90 | 405 +++++++++++++++++++++ 24 files changed, 1131 insertions(+), 105 deletions(-) diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 5ef70378b1b5..5b18bcadef8f 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -1348,7 +1348,9 @@ enum gfc_omp_map_op OMP_MAP_PRESENT_TOFROM, OMP_MAP_ALWAYS_PRESENT_TO, OMP_MAP_ALWAYS_PRESENT_FROM, - OMP_MAP_ALWAYS_PRESENT_TOFROM + OMP_MAP_ALWAYS_PRESENT_TOFROM, + OMP_MAP_DECLARE_ALLOCATE, + OMP_MAP_DECLARE_DEALLOCATE }; enum gfc_omp_defaultmap @@ -1675,7 +1677,7 @@ typedef struct gfc_omp_clauses unsigned async:1, gang:1, worker:1, vector:1, seq:1, independent:1; unsigned par_auto:1, gang_static:1; unsigned if_present:1, finalize:1; - unsigned nohost:1; + unsigned nohost:1, update_allocatable:1; locus loc; } gfc_omp_clauses; diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index 960613167f72..92254fe11240 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -6504,6 +6504,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, bool allocatable, coarray, dimension, alloc_w_e3_arr_spec = false, non_ulimate_coarray_ptr_comp; tree omp_cond = NULL_TREE, omp_alt_alloc = NULL_TREE; + bool oacc_declare = false; ref = expr->ref; @@ -6518,6 +6519,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, allocatable = expr->symtree->n.sym->attr.allocatable; dimension = expr->symtree->n.sym->attr.dimension; non_ulimate_coarray_ptr_comp = false; + oacc_declare = expr->symtree->n.sym->attr.oacc_declare_create; } else { @@ -6755,6 +6757,8 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, gfc_conv_descriptor_offset_set (&set_descriptor_block, se->expr, offset); tmp = fold_convert (gfc_array_index_type, element_size); gfc_conv_descriptor_span_set (&set_descriptor_block, se->expr, tmp); + if (oacc_declare) + gfc_trans_oacc_declare_allocate (&set_descriptor_block, expr, true); } set_descriptor = gfc_finish_block (&set_descriptor_block); diff --git a/gcc/fortran/trans-decl.cc b/gcc/fortran/trans-decl.cc index 4f2ea7636b77..7ec3a1f5ea6a 100644 --- a/gcc/fortran/trans-decl.cc +++ b/gcc/fortran/trans-decl.cc @@ -6941,10 +6941,10 @@ find_module_oacc_declare_clauses (gfc_symbol *sym) gfc_omp_map_op map_op; if (sym->attr.oacc_declare_create) - map_op = OMP_MAP_FORCE_ALLOC; + map_op = OMP_MAP_ALLOC; if (sym->attr.oacc_declare_copyin) - map_op = OMP_MAP_FORCE_TO; + map_op = OMP_MAP_TO; if (sym->attr.oacc_declare_deviceptr) map_op = OMP_MAP_FORCE_DEVICEPTR; diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc index 9767f2023860..c76c98bf3a4b 100644 --- a/gcc/fortran/trans-openmp.cc +++ b/gcc/fortran/trans-openmp.cc @@ -102,6 +102,10 @@ gfc_omp_check_optional_argument (tree decl, bool for_present_check) if (!for_present_check) return gfc_omp_is_optional_argument (decl) ? decl : NULL_TREE; + if (!DECL_P (decl)) + return fold_build2_loc (input_location, NE_EXPR, boolean_type_node, + decl, null_pointer_node); + if (!DECL_LANG_SPECIFIC (decl)) return NULL_TREE; @@ -4117,6 +4121,12 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, case OMP_MAP_FORCE_DEVICEPTR: OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_FORCE_DEVICEPTR); break; + case OMP_MAP_DECLARE_ALLOCATE: + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_DECLARE_ALLOCATE); + break; + case OMP_MAP_DECLARE_DEALLOCATE: + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_DECLARE_DEALLOCATE); + break; default: gcc_unreachable (); } @@ -4239,6 +4249,14 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, gmk = GOMP_MAP_DELETE; else if (op == EXEC_OMP_TARGET_EXIT_DATA) gmk = GOMP_MAP_RELEASE; + else if (GFC_DECL_GET_SCALAR_ALLOCATABLE (decl) + && n->sym->attr.oacc_declare_create) + { + if (clauses->update_allocatable) + gmk = GOMP_MAP_ALWAYS_POINTER; + else + gmk = GOMP_MAP_FIRSTPRIVATE_POINTER; + } tree size; if (gmk == GOMP_MAP_RELEASE || gmk == GOMP_MAP_DELETE) size = TYPE_SIZE_UNIT (TREE_TYPE (decl)); @@ -5978,12 +5996,14 @@ gfc_trans_oacc_executable_directive (gfc_code *code) { stmtblock_t block; tree stmt, oacc_clauses; + gfc_omp_clauses *clauses = code->ext.omp_clauses; enum tree_code construct_code; switch (code->op) { case EXEC_OACC_UPDATE: construct_code = OACC_UPDATE; + clauses->update_allocatable = 1; break; case EXEC_OACC_ENTER_DATA: construct_code = OACC_ENTER_DATA; @@ -5999,8 +6019,8 @@ gfc_trans_oacc_executable_directive (gfc_code *code) } gfc_start_block (&block); - oacc_clauses = gfc_trans_omp_clauses (&block, code->ext.omp_clauses, - code->loc, false, true, code->op); + oacc_clauses = gfc_trans_omp_clauses (&block, clauses, code->loc, + false, true, code->op); stmt = build1_loc (input_location, construct_code, void_type_node, oacc_clauses); gfc_add_expr_to_block (&block, stmt); @@ -9324,6 +9344,41 @@ gfc_trans_oacc_declare (gfc_code *code) return gfc_finish_block (&block); } +/* Create an OpenACC enter or exit data construct for an OpenACC declared + variable that has been allocated or deallocated. */ + +tree +gfc_trans_oacc_declare_allocate (stmtblock_t *block, gfc_expr *expr, + bool allocate) +{ + gfc_omp_clauses *clauses = gfc_get_omp_clauses (); + gfc_omp_namelist *p = gfc_get_omp_namelist (); + tree oacc_clauses, stmt; + enum tree_code construct_code; + + p->sym = expr->symtree->n.sym; + p->where = expr->where; + + if (allocate) + { + p->u.map.op = OMP_MAP_DECLARE_ALLOCATE; + construct_code = OACC_ENTER_DATA; + } + else + { + p->u.map.op = OMP_MAP_DECLARE_DEALLOCATE; + construct_code = OACC_EXIT_DATA; + } + clauses->lists[OMP_LIST_MAP] = p; + + oacc_clauses = gfc_trans_omp_clauses (block, clauses, expr->where); + stmt = build1_loc (input_location, construct_code, void_type_node, + oacc_clauses); + gfc_add_expr_to_block (block, stmt); + + return stmt; +} + tree gfc_trans_oacc_directive (gfc_code *code) { diff --git a/gcc/fortran/trans-stmt.cc b/gcc/fortran/trans-stmt.cc index 37f8acaea3f6..2dfecfff4e8c 100644 --- a/gcc/fortran/trans-stmt.cc +++ b/gcc/fortran/trans-stmt.cc @@ -7330,6 +7330,10 @@ gfc_trans_allocate (gfc_code * code, gfc_omp_namelist *omp_allocate) else gfc_allocate_using_malloc (&se.pre, se.expr, memsz, stat, omp_cond, omp_alt_alloc, succ_add_expr); + + /* Allocate memory for OpenACC declared variables. */ + if (expr->symtree->n.sym->attr.oacc_declare_create) + gfc_trans_oacc_declare_allocate (&se.pre, expr, true); } else { @@ -7876,6 +7880,10 @@ gfc_trans_deallocate (gfc_code *code) if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (se.expr))) { + if (!is_coarray + && expr->symtree->n.sym->attr.oacc_declare_create) + gfc_trans_oacc_declare_allocate (&se.pre, expr, false); + gfc_coarray_deregtype caf_dtype; if (is_coarray) @@ -7929,6 +7937,10 @@ gfc_trans_deallocate (gfc_code *code) } else { + /* Deallocate memory for OpenACC declared variables. */ + if (expr->symtree->n.sym->attr.oacc_declare_create) + gfc_trans_oacc_declare_allocate (&se.pre, expr, false); + tmp = gfc_deallocate_scalar_with_status (se.expr, pstat, label_finish, false, al->expr, al->expr->ts, NULL_TREE, diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h index ae7be9f81a8c..f4a562fd7fb7 100644 --- a/gcc/fortran/trans.h +++ b/gcc/fortran/trans.h @@ -851,6 +851,7 @@ bool gfc_omp_private_debug_clause (tree, bool); bool gfc_omp_private_outer_ref (tree); struct gimplify_omp_ctx; void gfc_omp_firstprivatize_type_sizes (struct gimplify_omp_ctx *, tree); +tree gfc_trans_oacc_declare_allocate (stmtblock_t *, gfc_expr *, bool); /* In trans-intrinsic.cc. */ void gfc_conv_intrinsic_mvbits (gfc_se *, gfc_actual_arglist *, diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc index bd458161ddae..17e5ca439d13 100644 --- a/gcc/gimplify.cc +++ b/gcc/gimplify.cc @@ -10755,6 +10755,8 @@ omp_group_base (omp_mapping_group *grp, unsigned int *chained, case GOMP_MAP_FIRSTPRIVATE_INT: case GOMP_MAP_USE_DEVICE_PTR: case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION: + case GOMP_MAP_DECLARE_ALLOCATE: + case GOMP_MAP_DECLARE_DEALLOCATE: return NULL_TREE; case GOMP_MAP_FIRSTPRIVATE_POINTER: @@ -14737,7 +14739,7 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p, enum tree_code code) { struct gimplify_omp_ctx *ctx = gimplify_omp_ctxp; - tree *orig_list_p = list_p; + tree *prev_list_p = NULL, *orig_list_p = list_p; tree c, decl; bool has_inscan_reductions = false; @@ -15048,9 +15050,15 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p, case OMP_TARGET_DATA: case OMP_TARGET_ENTER_DATA: case OMP_TARGET_EXIT_DATA: - if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER - || (OMP_CLAUSE_MAP_KIND (c) - == GOMP_MAP_FIRSTPRIVATE_REFERENCE)) + if ((OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER + || (OMP_CLAUSE_MAP_KIND (c) + == GOMP_MAP_FIRSTPRIVATE_REFERENCE)) + && !(prev_list_p + && OMP_CLAUSE_CODE (*prev_list_p) == OMP_CLAUSE_MAP + && ((OMP_CLAUSE_MAP_KIND (*prev_list_p) + == GOMP_MAP_DECLARE_ALLOCATE) + || (OMP_CLAUSE_MAP_KIND (*prev_list_p) + == GOMP_MAP_DECLARE_DEALLOCATE)))) /* For target {,enter ,exit }data only the array slice is mapped, but not the pointer to it. */ remove = true; @@ -15392,6 +15400,19 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p, == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))) move_attach = true; + if (!remove && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_ALWAYS_POINTER + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_ATTACH_DETACH + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_TO_PSET + && OMP_CLAUSE_CHAIN (c) + && OMP_CLAUSE_CODE (OMP_CLAUSE_CHAIN (c)) == OMP_CLAUSE_MAP + && ((OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (c)) + == GOMP_MAP_ALWAYS_POINTER) + || (OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (c)) + == GOMP_MAP_ATTACH_DETACH) + || (OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (c)) + == GOMP_MAP_TO_PSET))) + prev_list_p = list_p; + break; case OMP_CLAUSE_TO: diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc index 01882516ae23..0d6f947c2910 100644 --- a/gcc/omp-low.cc +++ b/gcc/omp-low.cc @@ -1708,7 +1708,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) && is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)) && varpool_node::get_create (decl)->offloadable && !lookup_attribute ("omp declare target link", - DECL_ATTRIBUTES (decl))) + DECL_ATTRIBUTES (decl)) + && !is_gimple_omp_oacc (ctx->stmt)) break; if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER) @@ -12875,7 +12876,7 @@ convert_to_firstprivate_int (tree var, gimple_seq *gs) { tree type = TREE_TYPE (var), new_type = NULL_TREE; - if (omp_privatize_by_reference (var)) + if (omp_privatize_by_reference (var) || POINTER_TYPE_P (type)) { type = TREE_TYPE (type); tree tmp = create_tmp_var (type); @@ -12900,7 +12901,8 @@ convert_to_firstprivate_int (tree var, gimple_seq *gs) /* Like convert_to_firstprivate_int, but restore the original type. */ static tree -convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs) +convert_from_firstprivate_int (tree var, tree orig_type, bool is_ref, + gimple_seq *gs) { tree type = TREE_TYPE (var); tree new_type = NULL_TREE; @@ -12909,7 +12911,32 @@ convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs) gcc_assert (TREE_CODE (var) == MEM_REF); var = TREE_OPERAND (var, 0); - if (INTEGRAL_TYPE_P (var) || POINTER_TYPE_P (type)) + if (is_ref || POINTER_TYPE_P (orig_type)) + { + tree_code code = NOP_EXPR; + + if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == COMPLEX_TYPE + || VECTOR_TYPE_P (type)) + code = VIEW_CONVERT_EXPR; + + if (code == VIEW_CONVERT_EXPR + && TYPE_SIZE (type) != TYPE_SIZE (orig_type)) + { + tree ptype = build_pointer_type (type); + var = fold_build1 (code, ptype, build_fold_addr_expr (var)); + var = build_simple_mem_ref (var); + } + else + var = fold_build1 (code, type, var); + + tree inst = create_tmp_var (type); + gimplify_assign (inst, var, gs); + var = build_fold_addr_expr (inst); + + return var; + } + + if (INTEGRAL_TYPE_P (var)) return fold_convert (type, var); gcc_assert (tree_to_uhwi (TYPE_SIZE (type)) <= POINTER_SIZE); @@ -12920,16 +12947,8 @@ convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs) tmp = create_tmp_var (new_type); var = fold_convert (new_type, var); gimplify_assign (tmp, var, gs); - var = fold_build1 (VIEW_CONVERT_EXPR, type, tmp); - - if (is_ref) - { - tmp = create_tmp_var (build_pointer_type (type)); - gimplify_assign (tmp, build_fold_addr_expr (var), gs); - var = tmp; - } - return var; + return fold_build1 (VIEW_CONVERT_EXPR, type, tmp); } /* Lower the GIMPLE_OMP_TARGET in the current statement @@ -13087,6 +13106,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) case GOMP_MAP_NONCONTIG_ARRAY_ALLOC: case GOMP_MAP_NONCONTIG_ARRAY_FORCE_ALLOC: case GOMP_MAP_NONCONTIG_ARRAY_FORCE_PRESENT: + case GOMP_MAP_DECLARE_ALLOCATE: + case GOMP_MAP_DECLARE_DEALLOCATE: case GOMP_MAP_LINK: case GOMP_MAP_FORCE_DETACH: gcc_assert (is_gimple_omp_oacc (stmt)); @@ -13191,7 +13212,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) && !maybe_lookup_field_in_outer_ctx (var, ctx)) { gcc_assert (is_gimple_omp_oacc (ctx->stmt)); - x = convert_from_firstprivate_int (x, + x = convert_from_firstprivate_int (x, TREE_TYPE (new_var), omp_privatize_by_reference (var), &fplist); gimplify_assign (new_var, x, &fplist); @@ -13209,13 +13230,20 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) gcc_assert (is_gimple_omp_oacc (ctx->stmt)); if (omp_privatize_by_reference (new_var) && (TREE_CODE (var_type) != POINTER_TYPE - || DECL_BY_REFERENCE (var))) + || DECL_BY_REFERENCE (var)) + /* Accelerators may not have alloca, so it's not + possible to privatize local storage for those + objects. */ + && TREE_CONSTANT (TYPE_SIZE (TREE_TYPE (var_type)))) { /* Create a local object to hold the instance value. */ const char *id = IDENTIFIER_POINTER (DECL_NAME (new_var)); tree inst = create_tmp_var (TREE_TYPE (var_type), id); - gimplify_assign (inst, fold_indirect_ref (x), &fplist); + if (TREE_CODE (var_type) == POINTER_TYPE) + gimplify_assign (inst, x, &fplist); + else + gimplify_assign (inst, fold_indirect_ref (x), &fplist); x = build_fold_addr_expr (inst); } gimplify_assign (new_var, x, &fplist); @@ -13659,9 +13687,10 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt)); + tree new_var = lookup_decl (var, ctx); tree type = TREE_TYPE (var); tree inner_type - = omp_privatize_by_reference (var) + = omp_privatize_by_reference (new_var) ? TREE_TYPE (type) : type; if ((FLOAT_TYPE_P (inner_type) || ANY_INTEGRAL_TYPE_P (inner_type)) diff --git a/gcc/testsuite/gfortran.dg/goacc/declare-3.f95 b/gcc/testsuite/gfortran.dg/goacc/declare-3.f95 index 9127cba6600d..c94f515898b6 100644 --- a/gcc/testsuite/gfortran.dg/goacc/declare-3.f95 +++ b/gcc/testsuite/gfortran.dg/goacc/declare-3.f95 @@ -38,8 +38,7 @@ program test use mod_b use mod_d use mod_e - - ! { dg-final { scan-tree-dump {(?n)#pragma acc data map\(force_alloc:d\) map\(force_to:b\) map\(force_alloc:a\)$} original } } + ! { dg-final { scan-tree-dump {(?n)#pragma acc data map\(force_alloc:d\) map\(to:b\) map\(alloc:a\)$} original } } end program test ! { dg-final { scan-tree-dump-times {#pragma acc data} 1 original } } diff --git a/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 b/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 new file mode 100644 index 000000000000..5349e0d5b00e --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 @@ -0,0 +1,25 @@ +! Verify that OpenACC declared allocatable arrays have implicit +! OpenACC enter and exit pragmas at the time of allocation and +! deallocation. + +! { dg-additional-options "-fdump-tree-original" } + +program allocate + implicit none + integer, allocatable :: a(:), b + integer, parameter :: n = 100 + integer i + !$acc declare create(a,b) + + allocate (a(n), b) + + !$acc parallel loop copyout(a, b) + do i = 1, n + a(i) = b + end do + + deallocate (a, b) +end program allocate + +! { dg-final { scan-tree-dump-times "pragma acc enter data map.declare_allocate" 2 "original" } } +! { dg-final { scan-tree-dump-times "pragma acc exit data map.declare_deallocate" 2 "original" } } diff --git a/gcc/tree-pretty-print.cc b/gcc/tree-pretty-print.cc index 662236f8ef8e..952573323d0d 100644 --- a/gcc/tree-pretty-print.cc +++ b/gcc/tree-pretty-print.cc @@ -1086,6 +1086,12 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags) case GOMP_MAP_LINK: pp_string (pp, "link"); break; + case GOMP_MAP_DECLARE_ALLOCATE: + pp_string (pp, "declare_allocate"); + break; + case GOMP_MAP_DECLARE_DEALLOCATE: + pp_string (pp, "declare_deallocate"); + break; case GOMP_MAP_ATTACH: pp_string (pp, "attach"); break; diff --git a/include/gomp-constants.h b/include/gomp-constants.h index 71f827d5346a..217f45747305 100644 --- a/include/gomp-constants.h +++ b/include/gomp-constants.h @@ -176,6 +176,11 @@ enum gomp_map_kind /* Decrement usage count and deallocate if zero. */ GOMP_MAP_RELEASE = (GOMP_MAP_FLAG_SPECIAL_2 | GOMP_MAP_DELETE), + /* Mapping kinds for allocatable arrays. */ + GOMP_MAP_DECLARE_ALLOCATE = (GOMP_MAP_FLAG_SPECIAL_4 + | GOMP_MAP_FORCE_TO), + GOMP_MAP_DECLARE_DEALLOCATE = (GOMP_MAP_FLAG_SPECIAL_4 + | GOMP_MAP_FORCE_FROM), /* The attach/detach mappings below use the OMP_CLAUSE_SIZE field as a bias. This will typically be zero, except when mapping an array slice with a non-zero base. In that case the bias will indicate the diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 34a41e86565e..ac2734bc82c5 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1458,8 +1458,6 @@ enum gomp_map_vars_kind GOMP_MAP_VARS_ENTER_DATA = 8 }; -extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *, - unsigned short *); struct gomp_coalesce_buf; extern void gomp_copy_host2dev (struct gomp_device_descr *, struct goacc_asyncqueue *, void *, const void *, diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 6062f2061a26..97df6de72d15 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -925,6 +925,35 @@ acc_update_self_async (void *h, size_t s, int async) update_dev_host (0, h, s, async); } +/* Implement "declare allocate" and "declare deallocate" operations. The + device lock must not be held before calling this function. */ + +static void +gomp_acc_declare_allocate (bool allocate, bool pointer, void **hostaddrs, + size_t *sizes, unsigned short *kinds) +{ + gomp_debug (0, " %s: processing\n", __FUNCTION__); + + if (allocate) + { + /* Allocate memory for the array data. */ + uintptr_t data = (uintptr_t) acc_create (hostaddrs[0], sizes[0]); + + if (pointer) + { + /* Update the PSET. */ + acc_update_device (hostaddrs[1], sizes[1]); + void *pset = acc_deviceptr (hostaddrs[1]); + acc_memcpy_to_device (pset, &data, sizeof (uintptr_t)); + } + } + else + /* Deallocate memory for the array data. */ + acc_delete (hostaddrs[0], sizes[0]); + + gomp_debug (0, " %s: end\n", __FUNCTION__); +} + void acc_attach_async (void **hostaddr, int async) { @@ -1056,6 +1085,28 @@ find_group_last (int pos, size_t mapnum, size_t *sizes, unsigned short *kinds) case GOMP_MAP_ATTACH: break; + case GOMP_MAP_DECLARE_ALLOCATE: + case GOMP_MAP_DECLARE_DEALLOCATE: + { + /* The "declare allocate" and "declare deallocate" mappings can be + used to specify either a scalar allocatable (which just appears as + GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE} by itself), or an array + allocatable (which appears as that directive followed by a + GOMP_MAP_TO_PSET and one (or more?) GOMP_MAP_POINTER mappings. */ + if (pos + 1 >= mapnum) + break; + + unsigned char kind1 = kinds[pos + 1] & 0xff; + if (kind1 != GOMP_MAP_TO_PSET) + break; + + pos++; + + while (pos + 1 < mapnum && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER) + pos++; + } + break; + default: /* GOMP_MAP_ALWAYS_POINTER can only appear directly after some other mapping. */ @@ -1121,7 +1172,14 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, n = lookup_host (acc_dev, hostaddrs[i], size); - if (n && struct_p) + if ((kinds[i] & 0xff) == GOMP_MAP_DECLARE_ALLOCATE) + { + gomp_mutex_unlock (&acc_dev->lock); + gomp_acc_declare_allocate (true, group_last > i, &hostaddrs[i], + &sizes[i], &kinds[i]); + gomp_mutex_lock (&acc_dev->lock); + } + else if (n && struct_p) { for (size_t j = i + 1; j <= group_last; j++) { @@ -1365,6 +1423,24 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, reference counts ('n->refcount', 'n->dynamic_refcount'). */ break; + case GOMP_MAP_DECLARE_DEALLOCATE: + { + bool deallocate_pointer + = i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_TO_PSET; + gomp_mutex_unlock (&acc_dev->lock); + gomp_acc_declare_allocate (false, deallocate_pointer, + &hostaddrs[i], &sizes[i], &kinds[i]); + gomp_mutex_lock (&acc_dev->lock); + if (deallocate_pointer) + { + i++; + while (i + 1 < mapnum + && (kinds[i + 1] & 0xff) == GOMP_MAP_POINTER) + i++; + } + } + break; + default: gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x", kind); diff --git a/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 new file mode 100644 index 000000000000..42b340822888 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 @@ -0,0 +1,33 @@ +! Test non-declared allocatable scalars in OpenACC data clauses. + +! { dg-do run } + +program main + implicit none + integer, parameter :: n = 100 + integer, allocatable :: a, c + integer :: i, b(n) + + allocate (a) + + a = 50 + + !$acc parallel loop + do i = 1, n; + b(i) = a + end do + + do i = 1, n + if (b(i) /= a) stop 1 + end do + + allocate (c) + + !$acc parallel copyout(c) num_gangs(1) + c = a + !$acc end parallel + + if (c /= a) stop 2 + + deallocate (a, c) +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1-directive.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1-directive.f90 index 759873bad675..bdeabca3eb50 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1-directive.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1-directive.f90 @@ -2,11 +2,10 @@ ! { dg-do run } -!TODO-OpenACC-declare-allocate -! Missing support for OpenACC "Changes from Version 2.0 to 2.5": +! We've got support for OpenACC "Changes from Version 2.0 to 2.5": ! "The 'declare create' directive with a Fortran 'allocatable' has new behavior". -! Thus, after 'allocate'/before 'deallocate', do -! '!$acc enter data create'/'!$acc exit data delete' manually. +! Yet, after 'allocate'/before 'deallocate', do +! '!$acc enter data create'/'!$acc exit data delete' manually, too. !TODO { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'. diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1-runtime.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1-runtime.f90 index e4cb9c378a34..a3e17c82828d 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1-runtime.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1-runtime.f90 @@ -2,11 +2,10 @@ ! { dg-do run } -!TODO-OpenACC-declare-allocate -! Missing support for OpenACC "Changes from Version 2.0 to 2.5": +! We've got support for OpenACC "Changes from Version 2.0 to 2.5": ! "The 'declare create' directive with a Fortran 'allocatable' has new behavior". -! Thus, after 'allocate'/before 'deallocate', call 'acc_create'/'acc_delete' -! manually. +! Yet, after 'allocate'/before 'deallocate', call 'acc_create'/'acc_delete' +! manually, too. !TODO { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'. diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 index 1c8ccd9f61f2..7220661b54aa 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 @@ -1,12 +1,10 @@ ! Test OpenACC 'declare create' with allocatable arrays. ! { dg-do run } +! { dg-additional-options "-Wopenacc-parallelism" } -!TODO-OpenACC-declare-allocate -! Not currently implementing correct '-DACC_MEM_SHARED=0' behavior: -! Missing support for OpenACC "Changes from Version 2.0 to 2.5": +! We've got support for OpenACC "Changes from Version 2.0 to 2.5": ! "The 'declare create' directive with a Fortran 'allocatable' has new behavior". -! { dg-xfail-run-if TODO { *-*-* } { -DACC_MEM_SHARED=0 } } !TODO { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'. diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 new file mode 100644 index 000000000000..df5ab26b8c26 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 @@ -0,0 +1,48 @@ +! Test declare create with allocatable scalars. + +! { dg-do run } + +program main + use openacc + implicit none + integer, parameter :: n = 100 + integer, allocatable :: a, c + integer :: i, b(n) + !$acc declare create (c) + + allocate (a) + + a = 50 + + !$acc parallel loop firstprivate(a) + do i = 1, n; + b(i) = a + end do + + do i = 1, n + if (b(i) /= a) stop 1 + end do + + allocate (c) + a = 100 + + if (.not.acc_is_present(c)) stop 2 + + !$acc parallel num_gangs(1) present(c) + c = a + !$acc end parallel + + !$acc update host(c) + if (c /= a) stop 3 + + !$acc parallel loop + do i = 1, n + b(i) = c + end do + + do i = 1, n + if (b(i) /= a) stop 4 + end do + + deallocate (a, c) +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 new file mode 100644 index 000000000000..c64d4bbe2112 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 @@ -0,0 +1,219 @@ +! Test declare create with allocatable arrays. + +! { dg-do run } +! { dg-additional-options "-Wopenacc-parallelism" } + +module vars + implicit none + integer, parameter :: n = 100 + real*8, allocatable :: a, b(:) + !$acc declare create (a, b) +end module vars + +program test + use vars + use openacc + implicit none + integer :: i + + interface + subroutine sub1 + !$acc routine gang + end subroutine sub1 + + subroutine sub2 + end subroutine sub2 + + real*8 function fun1 (ix) + integer ix + !$acc routine seq + end function fun1 + + real*8 function fun2 (ix) + integer ix + !$acc routine seq + end function fun2 + end interface + + if (allocated (a)) stop 1 + if (allocated (b)) stop 2 + + ! Test local usage of an allocated declared array. + + allocate (a) + + if (.not.allocated (a)) stop 3 + if (acc_is_present (a) .neqv. .true.) stop 4 + + allocate (b(n)) + + if (.not.allocated (b)) stop 5 + if (acc_is_present (b) .neqv. .true.) stop 6 + + a = 2.0 + !$acc update device(a) + + !$acc parallel loop + do i = 1, n + b(i) = i * a + end do + + if (.not.acc_is_present (b)) stop 7 + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i*a) stop 8 + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside an acc + ! routine subroutine. + + allocate (b(n)) + + if (.not.allocated (b)) stop 9 + if (acc_is_present (b) .neqv. .true.) stop 10 + + !$acc parallel + call sub1 + !$acc end parallel + + if (.not.acc_is_present (b)) stop 11 + + !$acc update host(b) + + do i = 1, n + if (b(i) /= a+i*2) stop 12 + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! subroutine. + + call sub2 + + if (.not.acc_is_present (b)) stop 13 + + !$acc update host(b) + + do i = 1, n + if (b(i) /= 1.0) stop 14 + end do + + deallocate (b) + + if (allocated (b)) stop 15 + + ! Test the usage of an allocated declared array inside an acc + ! routine function. + + allocate (b(n)) + + if (.not.allocated (b)) stop 16 + if (acc_is_present (b) .neqv. .true.) stop 17 + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do + + !$acc parallel loop + do i = 1, n + b(i) = fun1 (i) + end do + + if (.not.acc_is_present (b)) stop 18 + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i) stop 19 + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! function. + + allocate (b(n)) + + if (.not.allocated (b)) stop 20 + if (acc_is_present (b) .neqv. .true.) stop 21 + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do + + !$acc update host(b) + + do i = 1, n + b(i) = fun2 (i) + end do + + if (.not.acc_is_present (b)) stop 22 + + do i = 1, n + if (b(i) /= i*a) stop 23 + end do + + deallocate (a) + deallocate (b) +end program test + +! Set each element in array 'b' at index i to a+i*2. + +subroutine sub1 ! { dg-warning "region is worker partitioned" } + use vars + implicit none + integer i + !$acc routine gang + + !$acc loop + do i = 1, n + b(i) = a+i*2 + end do +end subroutine sub1 + +! Allocate array 'b', and set it to all 1.0. + +subroutine sub2 + use vars + use openacc + implicit none + integer i + + allocate (b(n)) + + if (.not.allocated (b)) stop 24 + if (acc_is_present (b) .neqv. .true.) stop 25 + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do +end subroutine sub2 + +! Return b(i) * i; + +real*8 function fun1 (i) + use vars + implicit none + integer i + !$acc routine seq + + fun1 = b(i) * i +end function fun1 + +! Return b(i) * i * a; + +real*8 function fun2 (i) + use vars + implicit none + integer i + + fun2 = b(i) * i * a +end function fun2 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 new file mode 100644 index 000000000000..afbe52f07072 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 @@ -0,0 +1,66 @@ +! Test declare create with allocatable arrays and scalars. The unused +! declared array 'b' caused an ICE in the past. + +! { dg-do run } + +module vars + implicit none + integer, parameter :: n = 100 + real*8, allocatable :: a, b(:) + !$acc declare create (a, b) +end module vars + +program test + use vars + implicit none + integer :: i + + interface + subroutine sub1 + end subroutine sub1 + + subroutine sub2 + end subroutine sub2 + + real*8 function fun1 (ix) + integer ix + !$acc routine seq + end function fun1 + + real*8 function fun2 (ix) + integer ix + !$acc routine seq + end function fun2 + end interface + + if (allocated (a)) stop 1 + if (allocated (b)) stop 2 + + ! Test the usage of an allocated declared array inside an acc + ! routine subroutine. + + allocate (a) + allocate (b(n)) + + if (.not.allocated (b)) stop 3 + + call sub1 + + !$acc update self(a) + if (a /= 50) stop 4 + + deallocate (a) + deallocate (b) + +end program test + +! Set 'a' to 50. + +subroutine sub1 + use vars + implicit none + integer i + + a = 50 + !$acc update device(a) +end subroutine sub1 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-directive.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-directive.f90 index 6604f72c5c18..0f4d21a138b1 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-directive.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-directive.f90 @@ -7,11 +7,10 @@ ! host/device array descriptors. ! { dg-skip-if n/a { *-*-* } { -DACC_MEM_SHARED=1 } } -!TODO-OpenACC-declare-allocate -! Missing support for OpenACC "Changes from Version 2.0 to 2.5": +! We've got support for OpenACC "Changes from Version 2.0 to 2.5": ! "The 'declare create' directive with a Fortran 'allocatable' has new behavior". -! Thus, after 'allocate'/before 'deallocate', do -! '!$acc enter data create'/'!$acc exit data delete' manually. +! Yet, after 'allocate'/before 'deallocate', do +! '!$acc enter data create'/'!$acc exit data delete' manually, too. !TODO { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'. @@ -101,8 +100,6 @@ program test allocate (b(n1_lb:n1_ub)) call verify_n1_allocated - if (acc_is_present (b)) error stop - !$acc enter data create (b) ! This is now OpenACC "present": if (.not.acc_is_present (b)) error stop ! ..., and got the actual array descriptor installed: @@ -110,15 +107,16 @@ program test call verify_n1_allocated !$acc end serial + !$acc enter data create (b) + if (.not.acc_is_present (b)) error stop + !$acc serial + call verify_n1_allocated + !$acc end serial + do i = n1_lb, n1_ub b(i) = i - 1 end do - ! In 'declare-allocatable-array_descriptor-1-runtime.f90', this does "verify - ! that host-to-device copy doesn't touch the device-side (still initial) - ! array descriptor (but it does copy the array data"). This is here not - ! applicable anymore, as we've already gotten the actual array descriptor - ! installed. Thus now verify that it does copy the array data. call acc_update_device (b) !$acc serial call verify_n1_allocated @@ -143,12 +141,6 @@ program test !TODO 'GOMP_MAP_TO_PSET': ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_parallel map\(tofrom:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(from:id1_2 \[len: [0-9]+\]\)$} 1 gimple } } - ! In 'declare-allocatable-array_descriptor-1-runtime.f90', this does "verify - ! that device-to-host copy doesn't touch the host-side array descriptor, - ! doesn't copy out the device-side (still initial) array descriptor (but it - ! does copy the array data)". This is here not applicable anymore, as we've - ! already gotten the actual array descriptor installed. Thus now verify that - ! it does copy the array data. call acc_update_self (b) call verify_n1_allocated @@ -223,14 +215,13 @@ program test !$acc exit data delete (b) if (.not.allocated (b)) error stop - if (acc_is_present (b)) error stop - ! The device-side array descriptor doesn't get updated, so 'b' still appears - ! as "allocated": + if (.not.acc_is_present (b)) error stop !$acc serial call verify_n1_allocated !$acc end serial deallocate (b) + !if (acc_is_present (b)) error stop call verify_n1_deallocated (.false.) ! The device-side array descriptor doesn't get updated, so 'b' still appears ! as "allocated": @@ -260,10 +251,13 @@ program test allocate (b(n2_lb:n2_ub)) call verify_n2_allocated - if (acc_is_present (b)) error stop + if (.not.acc_is_present (b)) error stop + !$acc serial + call verify_n2_allocated + !$acc end serial + !$acc enter data create (b) if (.not.acc_is_present (b)) error stop - ! ..., and got the actual array descriptor installed: !$acc serial call verify_n2_allocated !$acc end serial @@ -337,12 +331,13 @@ program test !$acc exit data delete (b) if (.not.allocated (b)) error stop - if (acc_is_present (b)) error stop + if (.not.acc_is_present (b)) error stop !$acc serial call verify_n2_allocated !$acc end serial deallocate (b) + !if (acc_is_present (b)) error stop call verify_n2_deallocated (.false.) !$acc serial call verify_n2_allocated diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-runtime.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-runtime.f90 index b27f312631db..0682256dd91b 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-runtime.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-runtime.f90 @@ -7,11 +7,10 @@ ! host/device array descriptors. ! { dg-skip-if n/a { *-*-* } { -DACC_MEM_SHARED=1 } } -!TODO-OpenACC-declare-allocate -! Missing support for OpenACC "Changes from Version 2.0 to 2.5": +! We've got support for OpenACC "Changes from Version 2.0 to 2.5": ! "The 'declare create' directive with a Fortran 'allocatable' has new behavior". -! Thus, after 'allocate'/before 'deallocate', call 'acc_create'/'acc_delete' -! manually. +! Yet, after 'allocate'/before 'deallocate', call 'acc_create'/'acc_delete' +! manually, too. !TODO { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'. @@ -101,31 +100,47 @@ program test allocate (b(n1_lb:n1_ub)) call verify_n1_allocated - if (acc_is_present (b)) error stop - call acc_create (b) ! This is now OpenACC "present": if (.not.acc_is_present (b)) error stop - ! This still has the initial array descriptor: + ! ..., and got the actual array descriptor installed: !$acc serial - call verify_initial + call verify_n1_allocated + !$acc end serial + + call acc_create (b) + if (.not.acc_is_present (b)) error stop + !$acc serial + call verify_n1_allocated !$acc end serial do i = n1_lb, n1_ub b(i) = i - 1 end do - ! Verify that host-to-device copy doesn't touch the device-side (still - ! initial) array descriptor (but it does copy the array data). call acc_update_device (b) !$acc serial - call verify_initial + call verify_n1_allocated !$acc end serial b = 40 - ! Verify that device-to-host copy doesn't touch the host-side array - ! descriptor, doesn't copy out the device-side (still initial) array - ! descriptor (but it does copy the array data). + !$acc parallel copyout (id1_1) ! No data clause for 'b' (explicit or implicit): no 'GOMP_MAP_TO_PSET'. + call verify_n1_values (-1) + id1_1 = 0 + !$acc end parallel + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc parallel map\(from:id1_1\)$} 1 original } } + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_parallel map\(from:id1_1 \[len: [0-9]+\]\)$} 1 gimple } } + + !$acc parallel copy (b) copyout (id1_2) + ! As already present, 'copy (b)' doesn't copy; addend is still '-1'. + call verify_n1_values (-1) + id1_2 = 0 + !$acc end parallel + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc parallel map\(tofrom:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(from:id1_2\)$} 1 original } } + !TODO ..., but without an actual use of 'b', the gimplifier removes the + !TODO 'GOMP_MAP_TO_PSET': + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_parallel map\(tofrom:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(from:id1_2 \[len: [0-9]+\]\)$} 1 gimple } } + call acc_update_self (b) call verify_n1_allocated @@ -142,11 +157,19 @@ program test ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_update map\(force_to:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_1 \[len: [0-9]+\]\)$} 1 gimple } } ! ..., but it's silently skipped in 'GOACC_update'. !$acc serial - call verify_initial + call verify_n1_allocated !$acc end serial b = 41 + !$acc parallel + call verify_n1_values (1) + !$acc end parallel + + !$acc parallel copy (b) + call verify_n1_values (1) + !$acc end parallel + !$acc update self (b) self (id1_2) ! We do have 'GOMP_MAP_TO_PSET' here: ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc update map\(force_from:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_2\);$} 1 original } } @@ -159,20 +182,9 @@ program test b(i) = b(i) + 2 end do - ! Now install the actual array descriptor, via a data clause for 'b' - ! (explicit or implicit): must get a 'GOMP_MAP_TO_PSET', which then in - ! 'gomp_map_vars_internal' is handled as 'declare target', and because of - ! '*(void **) hostaddrs[i] != NULL', we've got 'has_always_ptrset == true', - ! 'always_to_cnt == 1', and therefore 'gomp_map_vars_existing' does update - ! the 'GOMP_MAP_TO_PSET'. - !$acc serial present (b) copyin (id1_1) - call verify_initial - id1_1 = 0 - !$acc end serial - ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc serial map\(force_present:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(to:id1_1\)$} 1 original } } - !TODO ..., but without an actual use of 'b', the gimplifier removes the - !TODO 'GOMP_MAP_TO_PSET': - ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_serial map\(force_present:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(to:id1_1 \[len: [0-9]+\]\)$} 1 gimple } } + ! Now test that (potentially re-)installing the actual array descriptor is a + ! no-op, via a data clause for 'b' (explicit or implicit): must get a + ! 'GOMP_MAP_TO_PSET'. !$acc serial present (b) copyin (id1_2) call verify_n1_allocated !TODO Use of 'b': @@ -203,14 +215,13 @@ program test call acc_delete (b) if (.not.allocated (b)) error stop - if (acc_is_present (b)) error stop - ! The device-side array descriptor doesn't get updated, so 'b' still appears - ! as "allocated": + if (.not.acc_is_present (b)) error stop !$acc serial call verify_n1_allocated !$acc end serial deallocate (b) + !if (acc_is_present (b)) error stop call verify_n1_deallocated (.false.) ! The device-side array descriptor doesn't get updated, so 'b' still appears ! as "allocated": @@ -240,12 +251,15 @@ program test allocate (b(n2_lb:n2_ub)) call verify_n2_allocated - if (acc_is_present (b)) error stop + if (.not.acc_is_present (b)) error stop + !$acc serial + call verify_n2_allocated + !$acc end serial + call acc_create (b) if (.not.acc_is_present (b)) error stop - ! This still has the previous (n1) array descriptor: !$acc serial - call verify_n1_deallocated (.true.) + call verify_n2_allocated !$acc end serial do i = n2_lb, n2_ub @@ -254,11 +268,19 @@ program test call acc_update_device (b) !$acc serial - call verify_n1_deallocated (.true.) + call verify_n2_allocated !$acc end serial b = -40 + !$acc parallel + call verify_n2_values (20) + !$acc end parallel + + !$acc parallel copy (b) + call verify_n2_values (20) + !$acc end parallel + call acc_update_self (b) call verify_n2_allocated @@ -269,11 +291,19 @@ program test !$acc update device (b) !$acc serial - call verify_n1_deallocated (.true.) + call verify_n2_allocated !$acc end serial b = -41 + !$acc parallel + call verify_n2_values (-20) + !$acc end parallel + + !$acc parallel copy (b) + call verify_n2_values (-20) + !$acc end parallel + !$acc update self (b) call verify_n2_allocated @@ -301,12 +331,13 @@ program test call acc_delete (b) if (.not.allocated (b)) error stop - if (acc_is_present (b)) error stop + if (.not.acc_is_present (b)) error stop !$acc serial call verify_n2_allocated !$acc end serial deallocate (b) + !if (acc_is_present (b)) error stop call verify_n2_deallocated (.false.) !$acc serial call verify_n2_allocated diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1.f90 new file mode 100644 index 000000000000..1105a5786189 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1.f90 @@ -0,0 +1,405 @@ +! Test OpenACC 'declare create' with allocatable arrays. + +! { dg-do run } + +! Note that we're not testing OpenACC semantics here, but rather documenting +! current GCC behavior, specifically, behavior concerning updating of +! host/device array descriptors. +! { dg-skip-if n/a { *-*-* } { -DACC_MEM_SHARED=1 } } + +! We've got support for OpenACC "Changes from Version 2.0 to 2.5": +! "The 'declare create' directive with a Fortran 'allocatable' has new behavior". + + +!TODO { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'. + + +!TODO OpenACC 'serial' vs. GCC/nvptx: +!TODO { dg-prune-output {using 'vector_length \(32\)', ignoring 1} } + + +! { dg-additional-options -fdump-tree-original } +! { dg-additional-options -fdump-tree-gimple } + + +module vars + implicit none + integer, parameter :: n1_lb = -3 + integer, parameter :: n1_ub = 6 + integer, parameter :: n2_lb = -9999 + integer, parameter :: n2_ub = 22222 + + integer, allocatable :: b(:) + !$acc declare create (b) + +end module vars + +program test + use vars + use openacc + implicit none + integer :: i + + ! Identifiers for purposes of reliable '-fdump-tree-[...]' scanning. + integer :: id1_1, id1_2 + + interface + + subroutine verify_initial + implicit none + !$acc routine seq + end subroutine verify_initial + + subroutine verify_n1_allocated + implicit none + !$acc routine seq + end subroutine verify_n1_allocated + + subroutine verify_n1_values (addend) + implicit none + !$acc routine gang + integer, value :: addend + end subroutine verify_n1_values + + subroutine verify_n1_deallocated (expect_allocated) + implicit none + !$acc routine seq + logical, value :: expect_allocated + end subroutine verify_n1_deallocated + + subroutine verify_n2_allocated + implicit none + !$acc routine seq + end subroutine verify_n2_allocated + + subroutine verify_n2_values (addend) + implicit none + !$acc routine gang + integer, value :: addend + end subroutine verify_n2_values + + subroutine verify_n2_deallocated (expect_allocated) + implicit none + !$acc routine seq + logical, value :: expect_allocated + end subroutine verify_n2_deallocated + + end interface + + call acc_create (id1_1) + call acc_create (id1_2) + + call verify_initial + ! It is important here (and similarly, following) that there is no data + ! clause for 'b' (explicit or implicit): no 'GOMP_MAP_TO_PSET'. + !$acc serial + call verify_initial + !$acc end serial + + allocate (b(n1_lb:n1_ub)) + call verify_n1_allocated + ! This is now OpenACC "present": + if (.not.acc_is_present (b)) error stop + ! ..., and got the actual array descriptor installed: + !$acc serial + call verify_n1_allocated + !$acc end serial + + do i = n1_lb, n1_ub + b(i) = i - 1 + end do + + call acc_update_device (b) + !$acc serial + call verify_n1_allocated + !$acc end serial + + b = 40 + + !$acc parallel copyout (id1_1) ! No data clause for 'b' (explicit or implicit): no 'GOMP_MAP_TO_PSET'. + call verify_n1_values (-1) + id1_1 = 0 + !$acc end parallel + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc parallel map\(from:id1_1\)$} 1 original } } + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_parallel map\(from:id1_1 \[len: [0-9]+\]\)$} 1 gimple } } + + !$acc parallel copy (b) copyout (id1_2) + ! As already present, 'copy (b)' doesn't copy; addend is still '-1'. + call verify_n1_values (-1) + id1_2 = 0 + !$acc end parallel + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc parallel map\(tofrom:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(from:id1_2\)$} 1 original } } + !TODO ..., but without an actual use of 'b', the gimplifier removes the + !TODO 'GOMP_MAP_TO_PSET': + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_parallel map\(tofrom:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(from:id1_2 \[len: [0-9]+\]\)$} 1 gimple } } + + call acc_update_self (b) + call verify_n1_allocated + + do i = n1_lb, n1_ub + if (b(i) /= i - 1) error stop + b(i) = b(i) + 2 + end do + + ! The same using the OpenACC 'update' directive. + + !$acc update device (b) self (id1_1) + ! We do have 'GOMP_MAP_TO_PSET' here: + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc update map\(force_to:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_1\);$} 1 original } } + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_update map\(force_to:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_1 \[len: [0-9]+\]\)$} 1 gimple } } + ! ..., but it's silently skipped in 'GOACC_update'. + !$acc serial + call verify_n1_allocated + !$acc end serial + + b = 41 + + !$acc parallel + call verify_n1_values (1) + !$acc end parallel + + !$acc parallel copy (b) + call verify_n1_values (1) + !$acc end parallel + + !$acc update self (b) self (id1_2) + ! We do have 'GOMP_MAP_TO_PSET' here: + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc update map\(force_from:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_2\);$} 1 original } } + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_update map\(force_from:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_2 \[len: [0-9]+\]\)$} 1 gimple } } + ! ..., but it's silently skipped in 'GOACC_update'. + call verify_n1_allocated + + do i = n1_lb, n1_ub + if (b(i) /= i + 1) error stop + b(i) = b(i) + 2 + end do + + ! Now test that (potentially re-)installing the actual array descriptor is a + ! no-op, via a data clause for 'b' (explicit or implicit): must get a + ! 'GOMP_MAP_TO_PSET'. + !$acc serial present (b) copyin (id1_2) + call verify_n1_allocated + !TODO Use of 'b': + id1_2 = ubound (b, 1) + !$acc end serial + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc serial map\(force_present:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(to:id1_2\)$} 1 original } } + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_serial map\(force_present:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(to:id1_2 \[len: [0-9]+\]\)$} 1 gimple } } + + !$acc parallel copyin (id1_1) ! No data clause for 'b' (explicit or implicit): no 'GOMP_MAP_TO_PSET'. + call verify_n1_values (1) + id1_1 = 0 + !$acc end parallel + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc parallel map\(to:id1_1\)$} 1 original } } + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_parallel map\(to:id1_1 \[len: [0-9]+\]\)$} 1 gimple } } + + !$acc parallel copy (b) copyin (id1_2) + ! As already present, 'copy (b)' doesn't copy; addend is still '1'. + call verify_n1_values (1) + id1_2 = 0 + !$acc end parallel + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc parallel map\(tofrom:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(to:id1_2\)$} 1 original } } + !TODO ..., but without an actual use of 'b', the gimplifier removes the + !TODO 'GOMP_MAP_TO_PSET': + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_parallel map\(tofrom:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(to:id1_2 \[len: [0-9]+\]\)$} 1 gimple } } + + call verify_n1_allocated + if (.not.acc_is_present (b)) error stop + + deallocate (b) + !if (acc_is_present (b)) error stop + call verify_n1_deallocated (.false.) + ! The device-side array descriptor doesn't get updated, so 'b' still appears + ! as "allocated": + !$acc serial + call verify_n1_allocated + !$acc end serial + + ! Now try to install the actual array descriptor, via a data clause for 'b' + ! (explicit or implicit): must get a 'GOMP_MAP_TO_PSET', which then in + ! 'gomp_map_vars_internal' is handled as 'declare target', but because of + ! '*(void **) hostaddrs[i] == NULL', we've got 'has_always_ptrset == false', + ! 'always_to_cnt == 0', and therefore 'gomp_map_vars_existing' doesn't update + ! the 'GOMP_MAP_TO_PSET'. + ! The device-side array descriptor doesn't get updated, so 'b' still appears + ! as "allocated": + !TODO Why does 'present (b)' still work here? + !$acc serial present (b) copyout (id1_2) + call verify_n1_deallocated (.true.) + !TODO Use of 'b'. + id1_2 = ubound (b, 1) + !$acc end serial + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc serial map\(force_present:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(from:id1_2\)$} 1 original } } + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_serial map\(force_present:MEM <integer\(kind=[0-9]+\)\[0:\]> \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(from:id1_2 \[len: [0-9]+\]\)$} 1 gimple } } + + + ! Restart the procedure, with different array dimensions. + + allocate (b(n2_lb:n2_ub)) + call verify_n2_allocated + if (.not.acc_is_present (b)) error stop + !$acc serial + call verify_n2_allocated + !$acc end serial + + do i = n2_lb, n2_ub + b(i) = i + 20 + end do + + call acc_update_device (b) + !$acc serial + call verify_n2_allocated + !$acc end serial + + b = -40 + + !$acc parallel + call verify_n2_values (20) + !$acc end parallel + + !$acc parallel copy (b) + call verify_n2_values (20) + !$acc end parallel + + call acc_update_self (b) + call verify_n2_allocated + + do i = n2_lb, n2_ub + if (b(i) /= i + 20) error stop + b(i) = b(i) - 40 + end do + + !$acc update device (b) + !$acc serial + call verify_n2_allocated + !$acc end serial + + b = -41 + + !$acc parallel + call verify_n2_values (-20) + !$acc end parallel + + !$acc parallel copy (b) + call verify_n2_values (-20) + !$acc end parallel + + !$acc update self (b) + call verify_n2_allocated + + do i = n2_lb, n2_ub + if (b(i) /= i - 20) error stop + b(i) = b(i) + 10 + end do + + !$acc serial present (b) copy (id1_2) + call verify_n2_allocated + !TODO Use of 'b': + id1_2 = ubound (b, 1) + !$acc end serial + + !$acc parallel + call verify_n2_values (-20) + !$acc end parallel + + !$acc parallel copy (b) + call verify_n2_values (-20) + !$acc end parallel + + call verify_n2_allocated + if (.not.acc_is_present (b)) error stop + + deallocate (b) + !if (acc_is_present (b)) error stop + call verify_n2_deallocated (.false.) + !$acc serial + call verify_n2_allocated + !$acc end serial + + !$acc serial present (b) copy (id1_2) + call verify_n2_deallocated (.true.) + !TODO Use of 'b': + id1_2 = ubound (b, 1) + !$acc end serial + +end program test + + +subroutine verify_initial + use vars + implicit none + !$acc routine seq + + if (allocated (b)) error stop "verify_initial allocated" + if (any (lbound (b) /= [0])) error stop "verify_initial lbound" + if (any (ubound (b) /= [0])) error stop "verify_initial ubound" +end subroutine verify_initial + +subroutine verify_n1_allocated + use vars + implicit none + !$acc routine seq + + if (.not.allocated (b)) error stop "verify_n1_allocated allocated" + if (any (lbound (b) /= [n1_lb])) error stop "verify_n1_allocated lbound" + if (any (ubound (b) /= [n1_ub])) error stop "verify_n1_allocated ubound" +end subroutine verify_n1_allocated + +subroutine verify_n1_values (addend) + use vars + implicit none + !$acc routine gang + integer, value :: addend + integer :: i + + !$acc loop + do i = n1_lb, n1_ub + if (b(i) /= i + addend) error stop + end do +end subroutine verify_n1_values + +subroutine verify_n1_deallocated (expect_allocated) + use vars + implicit none + !$acc routine seq + logical, value :: expect_allocated + + if (allocated(b) .neqv. expect_allocated) error stop "verify_n1_deallocated allocated" + ! Apparently 'deallocate'ing doesn't unset the bounds. + if (any (lbound (b) /= [n1_lb])) error stop "verify_n1_deallocated lbound" + if (any (ubound (b) /= [n1_ub])) error stop "verify_n1_deallocated ubound" +end subroutine verify_n1_deallocated + +subroutine verify_n2_allocated + use vars + implicit none + !$acc routine seq + + if (.not.allocated(b)) error stop "verify_n2_allocated allocated" + if (any (lbound (b) /= [n2_lb])) error stop "verify_n2_allocated lbound" + if (any (ubound (b) /= [n2_ub])) error stop "verify_n2_allocated ubound" +end subroutine verify_n2_allocated + +subroutine verify_n2_values (addend) + use vars + implicit none + !$acc routine gang + integer, value :: addend + integer :: i + + !$acc loop + do i = n2_lb, n2_ub + if (b(i) /= i + addend) error stop + end do +end subroutine verify_n2_values + +subroutine verify_n2_deallocated (expect_allocated) + use vars + implicit none + !$acc routine seq + logical, value :: expect_allocated + + if (allocated(b) .neqv. expect_allocated) error stop "verify_n2_deallocated allocated" + ! Apparently 'deallocate'ing doesn't unset the bounds. + if (any (lbound (b) /= [n2_lb])) error stop "verify_n2_deallocated lbound" + if (any (ubound (b) /= [n2_ub])) error stop "verify_n2_deallocated ubound" +end subroutine verify_n2_deallocated