Hi! This patch finishes the C #pragma omp taskloop support on the gomp 4.1 branch, including library support.
2015-05-21 Jakub Jelinek <ja...@redhat.com> * tree.h (OMP_STANDALONE_CLAUSES): Adjust to cover OMP_TARGET_{ENTER,EXIT}_DATA. (OMP_CLAUSE_SHARED_FIRSTPRIVATE): Define. * gimplify.c (gimplify_scan_omp_clauses): Add lastprivate clause to outer taskloop if needed. (gimplify_omp_for): Fix a typo. Fixup OMP_TASKLOOP gimplification. * omp-low.c (omp_copy_decl_2): If var is TREE_ADDRESSABLE listed in task_shared_vars, clear TREE_ADDRESSABLE on the copy. (build_outer_var_ref): Add lastprivate argument, pass it through recursively. Handle lastprivate on taskloop construct. (install_var_field): Allow multiple fields for a single decl - one for firstprivate, another for shared clauses on task. (scan_sharing_clauses): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE. (add_taskreg_looptemp_clauses): Add one more _looptemp_ clause for taskloop GIMPLE_OMP_TASK, if it is collapse > 1 with non-constant iteration count and there is lastprivate clause on the inner GIMPLE_OMP_FOR. (finish_taskreg_scan): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE. (lower_rec_input_clauses): Likewise. Ignore all OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE clauses on taskloop construct. (lower_lastprivate_clauses): For OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE on taskloop lookup decl in outer context. Pass true to build_outer_var_ref lastprivate argument. (lower_send_clauses): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE. (lower_send_shared_vars): Ignore fields with NULL or FIELD_DECL abstract origin. (expand_task_call): Use GOMP_TASK_* defines instead of hardcoded integers. (expand_omp_simd): Handle addressable fd->loop.v. (expand_omp_taskloop_for_outer): Initialize the last _looptemp_ with total iteration count if needed. (expand_omp_taskloop_for_inner): Handle bias and broken_loop. (lower_omp_for_lastprivate): Use last _looptemp_ clause on taskloop for comparison. (create_task_copyfn): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE. gcc/c-family/ * c-omp.c (c_finish_omp_for): Clear DECL_INITIAL. gcc/testsuite/ * gcc.dg/gomp/taskloop-1.c: New test. include/ * gomp-constants.h (GOMP_TASK_FLAG_UNTIED, GOMP_TASK_FLAG_FINAL, GOMP_TASK_FLAG_MERGEABLE, GOMP_TASK_FLAG_DEPEND, GOMP_TASK_FLAG_UP, GOMP_TASK_FLAG_GRAINSIZE, GOMP_TASK_FLAG_IF, GOMP_TASK_FLAG_NOGROUP): Define. libgomp/ * libgomp.map (GOMP_4.1): Export GOMP_taskloop and GOMP_taskloop_ull. * task.c: Include gomp-constants.h. Include taskloop.c twice with appropriate macros. (GOMP_task): Use GOMP_TASK_FLAG_* defines instead of hardcoded constants. * taskloop.c: New file. * testsuite/libgomp.c/for-4.c: New test. * testsuite/libgomp.c/taskloop-1.c: New test. * testsuite/libgomp.c/taskloop-2.c: New test. * testsuite/libgomp.c/taskloop-3.c: New test. --- gcc/tree.h.jj 2015-05-19 18:56:50.982256719 +0200 +++ gcc/tree.h 2015-05-19 19:04:52.496759752 +0200 @@ -1206,7 +1206,7 @@ extern void protected_set_expr_location /* Generic accessors for OMP nodes that keep clauses as operand 0. */ #define OMP_STANDALONE_CLAUSES(NODE) \ - TREE_OPERAND (TREE_RANGE_CHECK (NODE, OACC_CACHE, OMP_TARGET_UPDATE), 0) + TREE_OPERAND (TREE_RANGE_CHECK (NODE, OACC_CACHE, OMP_TARGET_EXIT_DATA), 0) #define OACC_PARALLEL_BODY(NODE) \ TREE_OPERAND (OACC_PARALLEL_CHECK (NODE), 0) @@ -1366,6 +1366,12 @@ extern void protected_set_expr_location #define OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ(NODE) \ (OMP_CLAUSE_CHECK (NODE))->omp_clause.gimple_reduction_init +/* True on a SHARED clause if a FIRSTPRIVATE clause for the same + decl is present in the chain (this can happen only for taskloop + with FIRSTPRIVATE/LASTPRIVATE on it originally. */ +#define OMP_CLAUSE_SHARED_FIRSTPRIVATE(NODE) \ + (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_SHARED)->base.public_flag) + #define OMP_CLAUSE_FINAL_EXPR(NODE) \ OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_FINAL), 0) #define OMP_CLAUSE_IF_EXPR(NODE) \ --- gcc/gimplify.c.jj 2015-05-19 19:02:52.230632257 +0200 +++ gcc/gimplify.c 2015-05-20 19:07:01.317440243 +0200 @@ -6167,6 +6167,12 @@ gimplify_scan_omp_clauses (tree *list_p, (splay_tree_key) decl) == NULL) omp_add_variable (outer_ctx, decl, GOVD_SHARED | GOVD_SEEN); else if (outer_ctx + && (outer_ctx->region_type & ORT_TASK) != 0 + && outer_ctx->combined_loop + && splay_tree_lookup (outer_ctx->variables, + (splay_tree_key) decl) == NULL) + omp_add_variable (outer_ctx, decl, GOVD_LASTPRIVATE | GOVD_SEEN); + else if (outer_ctx && outer_ctx->region_type == ORT_WORKSHARE && outer_ctx->combined_loop && splay_tree_lookup (outer_ctx->variables, @@ -6227,6 +6233,10 @@ gimplify_scan_omp_clauses (tree *list_p, else if (omp_check_private (octx, decl, false)) break; } + else if (octx + && (octx->region_type & ORT_TASK) != 0 + && octx->combined_loop) + ; else break; gcc_checking_assert (splay_tree_lookup (octx->variables, @@ -7061,7 +7071,7 @@ gimplify_omp_for (tree *expr_p, gimple_s /* Set OMP_CLAUSE_LINEAR_NO_COPYIN flag on explicit linear clause for the IV. */ - if (org == ORT_SIMD && TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)) == 1) + if (ort == ORT_SIMD && TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)) == 1) { t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), 0); gcc_assert (TREE_CODE (t) == MODIFY_EXPR); @@ -7075,7 +7085,8 @@ gimplify_omp_for (tree *expr_p, gimple_s } } - gimplify_scan_omp_clauses (&OMP_FOR_CLAUSES (for_stmt), pre_p, ort); + if (TREE_CODE (for_stmt) != OMP_TASKLOOP) + gimplify_scan_omp_clauses (&OMP_FOR_CLAUSES (for_stmt), pre_p, ort); if (TREE_CODE (for_stmt) == OMP_DISTRIBUTE) gimplify_omp_ctxp->distribute = true; @@ -7113,9 +7124,69 @@ gimplify_omp_for (tree *expr_p, gimple_s for_stmt = walk_tree (&OMP_FOR_BODY (for_stmt), find_combined_omp_for, NULL, NULL); gcc_assert (for_stmt != NULL_TREE); - gimplify_omp_ctxp->combined_loop = true; } + /* For taskloop, need to gimplify the start, end and step before the + taskloop, outside of the taskloop omp context. */ + if (TREE_CODE (orig_for_stmt) == OMP_TASKLOOP) + { + for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)); i++) + { + t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), i); + if (!is_gimple_constant (TREE_OPERAND (t, 1))) + { + TREE_OPERAND (t, 1) + = get_initialized_tmp_var (TREE_OPERAND (t, 1), + pre_p, NULL); + tree c = build_omp_clause (input_location, + OMP_CLAUSE_FIRSTPRIVATE); + OMP_CLAUSE_DECL (c) = TREE_OPERAND (t, 1); + OMP_CLAUSE_CHAIN (c) = OMP_FOR_CLAUSES (orig_for_stmt); + OMP_FOR_CLAUSES (orig_for_stmt) = c; + } + + /* Handle OMP_FOR_COND. */ + t = TREE_VEC_ELT (OMP_FOR_COND (for_stmt), i); + if (!is_gimple_constant (TREE_OPERAND (t, 1))) + { + TREE_OPERAND (t, 1) + = get_initialized_tmp_var (TREE_OPERAND (t, 1), + pre_p, NULL); + tree c = build_omp_clause (input_location, + OMP_CLAUSE_FIRSTPRIVATE); + OMP_CLAUSE_DECL (c) = TREE_OPERAND (t, 1); + OMP_CLAUSE_CHAIN (c) = OMP_FOR_CLAUSES (orig_for_stmt); + OMP_FOR_CLAUSES (orig_for_stmt) = c; + } + + /* Handle OMP_FOR_INCR. */ + t = TREE_VEC_ELT (OMP_FOR_INCR (for_stmt), i); + if (TREE_CODE (t) == MODIFY_EXPR) + { + decl = TREE_OPERAND (t, 0); + t = TREE_OPERAND (t, 1); + tree *tp = &TREE_OPERAND (t, 1); + if (TREE_CODE (t) == PLUS_EXPR && *tp == decl) + tp = &TREE_OPERAND (t, 0); + + if (!is_gimple_constant (*tp)) + { + *tp = get_initialized_tmp_var (*tp, pre_p, NULL); + tree c = build_omp_clause (input_location, + OMP_CLAUSE_FIRSTPRIVATE); + OMP_CLAUSE_DECL (c) = *tp; + OMP_CLAUSE_CHAIN (c) = OMP_FOR_CLAUSES (orig_for_stmt); + OMP_FOR_CLAUSES (orig_for_stmt) = c; + } + } + } + + gimplify_scan_omp_clauses (&OMP_FOR_CLAUSES (orig_for_stmt), pre_p, ort); + } + + if (orig_for_stmt != for_stmt) + gimplify_omp_ctxp->combined_loop = true; + for_body = NULL; gcc_assert (TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)) == TREE_VEC_LENGTH (OMP_FOR_COND (for_stmt))); @@ -7175,6 +7246,11 @@ gimplify_omp_for (tree *expr_p, gimple_s else if (omp_check_private (outer, decl, false)) outer = NULL; } + else if (((outer->region_type & ORT_TASK) != 0) + && outer->combined_loop + && !omp_check_private (gimplify_omp_ctxp, + decl, false)) + ; else if (outer->region_type != ORT_COMBINED_PARALLEL) outer = NULL; if (outer) @@ -7206,6 +7282,11 @@ gimplify_omp_for (tree *expr_p, gimple_s else if (omp_check_private (outer, decl, false)) outer = NULL; } + else if (((outer->region_type & ORT_TASK) != 0) + && outer->combined_loop + && !omp_check_private (gimplify_omp_ctxp, + decl, false)) + ; else if (outer->region_type != ORT_COMBINED_PARALLEL) outer = NULL; if (outer) @@ -7418,14 +7499,39 @@ gimplify_omp_for (tree *expr_p, gimple_s BITMAP_FREE (has_decl_expr); - gimplify_and_add (OMP_FOR_BODY (orig_for_stmt), &for_body); + if (TREE_CODE (orig_for_stmt) == OMP_TASKLOOP) + { + push_gimplify_context (); + if (TREE_CODE (OMP_FOR_BODY (orig_for_stmt)) != BIND_EXPR) + { + OMP_FOR_BODY (orig_for_stmt) + = build3 (BIND_EXPR, void_type_node, NULL, + OMP_FOR_BODY (orig_for_stmt), NULL); + TREE_SIDE_EFFECTS (OMP_FOR_BODY (orig_for_stmt)) = 1; + } + } + + gimple g = gimplify_and_return_first (OMP_FOR_BODY (orig_for_stmt), + &for_body); + + if (TREE_CODE (orig_for_stmt) == OMP_TASKLOOP) + { + if (gimple_code (g) == GIMPLE_BIND) + pop_gimplify_context (g); + else + pop_gimplify_context (NULL); + } if (orig_for_stmt != for_stmt) for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)); i++) { t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), i); decl = TREE_OPERAND (t, 0); + struct gimplify_omp_ctx *ctx = gimplify_omp_ctxp; + if (TREE_CODE (orig_for_stmt) == OMP_TASKLOOP) + gimplify_omp_ctxp = ctx->outer_context; var = create_tmp_var (TREE_TYPE (decl), get_name (decl)); + gimplify_omp_ctxp = ctx; omp_add_variable (gimplify_omp_ctxp, var, GOVD_PRIVATE | GOVD_SEEN); TREE_OPERAND (t, 0) = var; t = TREE_VEC_ELT (OMP_FOR_INCR (for_stmt), i); @@ -7524,14 +7630,18 @@ gimplify_omp_for (tree *expr_p, gimple_s gforo_clauses_ptr = &OMP_CLAUSE_CHAIN (*gforo_clauses_ptr); break; /* For lastprivate, keep the clause on inner taskloop, and add - a shared clause on task. */ + a shared clause on task. If the same decl is also firstprivate, + add also firstprivate clause on the inner taskloop. */ case OMP_CLAUSE_LASTPRIVATE: *gfor_clauses_ptr = c; gfor_clauses_ptr = &OMP_CLAUSE_CHAIN (c); - *gtask_clauses_ptr = build_omp_clause (OMP_CLAUSE_LOCATION (c), - OMP_CLAUSE_SHARED); + *gtask_clauses_ptr + = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_SHARED); OMP_CLAUSE_DECL (*gtask_clauses_ptr) = OMP_CLAUSE_DECL (c); - gtask_clauses_ptr = &OMP_CLAUSE_CHAIN (*gtask_clauses_ptr); + if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c)) + OMP_CLAUSE_SHARED_FIRSTPRIVATE (*gtask_clauses_ptr) = 1; + gtask_clauses_ptr + = &OMP_CLAUSE_CHAIN (*gtask_clauses_ptr); break; default: gcc_unreachable (); @@ -7539,8 +7649,7 @@ gimplify_omp_for (tree *expr_p, gimple_s *gfor_clauses_ptr = NULL_TREE; *gtask_clauses_ptr = NULL_TREE; *gforo_clauses_ptr = NULL_TREE; - gimple g - = gimple_build_bind (NULL_TREE, gfor, NULL_TREE); + g = gimple_build_bind (NULL_TREE, gfor, NULL_TREE); g = gimple_build_omp_task (g, task_clauses, NULL_TREE, NULL_TREE, NULL_TREE, NULL_TREE, NULL_TREE); gimple_omp_task_set_taskloop_p (g, true); --- gcc/omp-low.c.jj 2015-05-19 18:56:55.730182802 +0200 +++ gcc/omp-low.c 2015-05-20 19:20:25.828928071 +0200 @@ -1129,6 +1129,14 @@ omp_copy_decl_2 (tree var, tree name, tr DECL_CONTEXT (copy) = current_function_decl; DECL_CHAIN (copy) = ctx->block_vars; + /* If VAR is listed in task_shared_vars, it means it wasn't + originally addressable and is just because task needs to take + it's address. But we don't need to take address of privatizations + from that var. */ + if (TREE_ADDRESSABLE (var) + && task_shared_vars + && bitmap_bit_p (task_shared_vars, DECL_UID (var))) + TREE_ADDRESSABLE (copy) = 0; ctx->block_vars = copy; return copy; @@ -1179,7 +1187,7 @@ build_receiver_ref (tree var, bool by_re this is some variable. */ static tree -build_outer_var_ref (tree var, omp_context *ctx) +build_outer_var_ref (tree var, omp_context *ctx, bool lastprivate = false) { tree x; @@ -1188,7 +1196,7 @@ build_outer_var_ref (tree var, omp_conte else if (is_variable_sized (var)) { x = TREE_OPERAND (DECL_VALUE_EXPR (var), 0); - x = build_outer_var_ref (x, ctx); + x = build_outer_var_ref (x, ctx, lastprivate); x = build_simple_mem_ref (x); } else if (is_taskreg_ctx (ctx)) @@ -1209,6 +1217,33 @@ build_outer_var_ref (tree var, omp_conte if (x == NULL_TREE) x = var; } + else if (lastprivate && is_taskloop_ctx (ctx)) + { + gcc_assert (ctx->outer); + splay_tree_node n + = splay_tree_lookup (ctx->outer->field_map, + (splay_tree_key) &DECL_UID (var)); + if (n == NULL) + { + if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx->outer))) + x = var; + else + x = lookup_decl (var, ctx->outer); + } + else + { + tree field = (tree) n->value; + /* If the receiver record type was remapped in the child function, + remap the field into the new record type. */ + x = maybe_lookup_field (field, ctx->outer); + if (x != NULL) + field = x; + + x = build_simple_mem_ref (ctx->outer->receiver_decl); + x = omp_build_component_ref (x, field); + x = build_simple_mem_ref (x); + } + } else if (ctx->outer) x = lookup_decl (var, ctx->outer); else if (is_reference (var)) @@ -1239,11 +1274,17 @@ static void install_var_field (tree var, bool by_ref, int mask, omp_context *ctx) { tree field, type, sfield = NULL_TREE; + splay_tree_key key = (splay_tree_key) var; + if ((mask & 8) != 0) + { + key = (splay_tree_key) &DECL_UID (var); + gcc_checking_assert (key != (splay_tree_key) var); + } gcc_assert ((mask & 1) == 0 - || !splay_tree_lookup (ctx->field_map, (splay_tree_key) var)); + || !splay_tree_lookup (ctx->field_map, key)); gcc_assert ((mask & 2) == 0 || !ctx->sfield_map - || !splay_tree_lookup (ctx->sfield_map, (splay_tree_key) var)); + || !splay_tree_lookup (ctx->sfield_map, key)); gcc_assert ((mask & 3) == 3 || !is_gimple_omp_oacc (ctx->stmt)); @@ -1298,7 +1339,7 @@ install_var_field (tree var, bool by_ref ctx->sfield_map = splay_tree_new (splay_tree_compare_pointers, 0, 0); for (t = TYPE_FIELDS (ctx->record_type); t ; t = TREE_CHAIN (t)) { - sfield = build_decl (DECL_SOURCE_LOCATION (var), + sfield = build_decl (DECL_SOURCE_LOCATION (t), FIELD_DECL, DECL_NAME (t), TREE_TYPE (t)); DECL_ABSTRACT_ORIGIN (sfield) = DECL_ABSTRACT_ORIGIN (t); insert_field_into_struct (ctx->srecord_type, sfield); @@ -1313,11 +1354,9 @@ install_var_field (tree var, bool by_ref } if (mask & 1) - splay_tree_insert (ctx->field_map, (splay_tree_key) var, - (splay_tree_value) field); + splay_tree_insert (ctx->field_map, key, (splay_tree_value) field); if ((mask & 2) && ctx->sfield_map) - splay_tree_insert (ctx->sfield_map, (splay_tree_key) var, - (splay_tree_value) sfield); + splay_tree_insert (ctx->sfield_map, key, (splay_tree_value) sfield); } static tree @@ -1718,6 +1757,11 @@ scan_sharing_clauses (tree clauses, omp_ if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx))) break; by_ref = use_pointer_for_field (decl, ctx); + if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c)) + { + gcc_assert (by_ref); + break; + } if (! TREE_READONLY (decl) || TREE_ADDRESSABLE (decl) || by_ref @@ -1998,8 +2042,14 @@ scan_sharing_clauses (tree clauses, omp_ if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS) break; decl = OMP_CLAUSE_DECL (c); - if (! is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx))) - fixup_remapped_decl (decl, ctx, false); + if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx))) + break; + if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c)) + { + install_var_field (decl, true, 11, ctx); + break; + } + fixup_remapped_decl (decl, ctx, false); break; case OMP_CLAUSE_MAP: @@ -2336,7 +2386,16 @@ add_taskreg_looptemp_clauses (enum gf_ma tree type = fd.iter_type; if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST) - count += fd.collapse - 1; + { + count += fd.collapse - 1; + /* For taskloop, if there are lastprivate clauses on the inner + GIMPLE_OMP_FOR, add one more temporaries for the total number + of iterations (product of count1 ... countN-1). */ + if (msk == GF_OMP_FOR_KIND_TASKLOOP + && find_omp_clause (gimple_omp_for_clauses (for_stmt), + OMP_CLAUSE_LASTPRIVATE)) + count++; + } for (i = 0; i < count; i++) { tree temp = create_tmp_var (type); @@ -2480,7 +2539,8 @@ finish_taskreg_scan (omp_context *ctx) for (c = gimple_omp_taskreg_clauses (ctx->stmt); c; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED + && !OMP_CLAUSE_SHARED_FIRSTPRIVATE (c)) { tree decl = OMP_CLAUSE_DECL (c); @@ -3755,7 +3815,8 @@ lower_rec_input_clauses (tree clauses, g continue; if (maybe_lookup_decl (OMP_CLAUSE_DECL (c), ctx) == NULL) { - gcc_assert (is_global_var (OMP_CLAUSE_DECL (c))); + gcc_assert (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c) + || is_global_var (OMP_CLAUSE_DECL (c))); continue; } case OMP_CLAUSE_FIRSTPRIVATE: @@ -3775,7 +3836,7 @@ lower_rec_input_clauses (tree clauses, g if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c)) { lastprivate_firstprivate = true; - if (pass != 0) + if (pass != 0 || is_taskloop_ctx (ctx)) continue; } /* Even without corresponding firstprivate, if @@ -3936,6 +3997,11 @@ lower_rec_input_clauses (tree clauses, g /* Shared global vars are just accessed directly. */ if (is_global_var (new_var)) break; + /* For taskloop firstprivate/lastprivate, represented + as firstprivate and shared clause on the task, new_var + is the firstprivate var. */ + if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c)) + break; /* Set up the DECL_VALUE_EXPR for shared variables now. This needs to be delayed until after fixup_child_record_type so that we get the correct type during the dereference. */ @@ -4467,7 +4533,15 @@ lower_lastprivate_clauses (tree clauses, && !OMP_CLAUSE_LINEAR_NO_COPYOUT (c))) { var = OMP_CLAUSE_DECL (c); - new_var = lookup_decl (var, ctx); + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE + && OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c) + && is_taskloop_ctx (ctx)) + { + gcc_checking_assert (ctx->outer && is_task_ctx (ctx->outer)); + new_var = lookup_decl (var, ctx->outer); + } + else + new_var = lookup_decl (var, ctx); if (simduid && DECL_HAS_VALUE_EXPR_P (new_var)) { @@ -4511,7 +4585,7 @@ lower_lastprivate_clauses (tree clauses, OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c) = NULL; } - x = build_outer_var_ref (var, ctx); + x = build_outer_var_ref (var, ctx, true); if (is_reference (var)) new_var = build_simple_mem_ref_loc (clause_loc, new_var); x = lang_hooks.decls.omp_clause_assign_op (c, x, new_var); @@ -4792,6 +4866,10 @@ lower_send_clauses (tree clauses, gimple case OMP_CLAUSE_LASTPRIVATE: case OMP_CLAUSE_REDUCTION: break; + case OMP_CLAUSE_SHARED: + if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c)) + break; + continue; case OMP_CLAUSE__LOOPTEMP_: if (ignored_looptemp) { @@ -4809,6 +4887,25 @@ lower_send_clauses (tree clauses, gimple if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYIN && is_global_var (var)) continue; + + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED) + { + /* Handle taskloop firstprivate/lastprivate, where the + lastprivate on GIMPLE_OMP_TASK is represented as + OMP_CLAUSE_SHARED_FIRSTPRIVATE. */ + tree f + = (tree) + splay_tree_lookup (ctx->sfield_map + ? ctx->sfield_map : ctx->field_map, + (splay_tree_key) &DECL_UID (val))->value; + gcc_assert (use_pointer_for_field (val, ctx)); + x = omp_build_component_ref (ctx->sender_decl, f); + var = build_fold_addr_expr (var); + gimplify_assign (x, var, ilist); + DECL_ABSTRACT_ORIGIN (f) = NULL; + continue; + } + if (is_variable_sized (val)) continue; by_ref = use_pointer_for_field (val, NULL); @@ -4879,6 +4976,9 @@ lower_send_shared_vars (gimple_seq *ilis for (f = TYPE_FIELDS (record_type); f ; f = DECL_CHAIN (f)) { ovar = DECL_ABSTRACT_ORIGIN (f); + if (!ovar || TREE_CODE (ovar) == FIELD_DECL) + continue; + nvar = maybe_lookup_decl (ovar, ctx); if (!nvar || !DECL_HAS_VALUE_EXPR_P (nvar)) continue; @@ -5158,7 +5258,9 @@ expand_task_call (struct omp_region *reg tree finalc = find_omp_clause (clauses, OMP_CLAUSE_FINAL); unsigned int iflags - = (untied ? 1 : 0) | (mergeable ? 4 : 0) | (depend ? 8 : 0); + = (untied ? GOMP_TASK_FLAG_UNTIED : 0) + | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0) + | (depend ? GOMP_TASK_FLAG_DEPEND : 0); bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt); tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE; @@ -5178,7 +5280,7 @@ expand_task_call (struct omp_region *reg endvar = OMP_CLAUSE_DECL (endvar); step = fold_convert_loc (loc, fd.iter_type, fd.loop.step); if (fd.loop.cond_code == LT_EXPR) - iflags |= 256; + iflags |= GOMP_TASK_FLAG_UP; tree tclauses = gimple_omp_for_clauses (g); num_tasks = find_omp_clause (tclauses, OMP_CLAUSE_NUM_TASKS); if (num_tasks) @@ -5188,7 +5290,7 @@ expand_task_call (struct omp_region *reg num_tasks = find_omp_clause (tclauses, OMP_CLAUSE_GRAINSIZE); if (num_tasks) { - iflags |= 512; + iflags |= GOMP_TASK_FLAG_GRAINSIZE; num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks); } else @@ -5196,9 +5298,9 @@ expand_task_call (struct omp_region *reg } num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks); if (ifc == NULL_TREE) - iflags |= 1024; + iflags |= GOMP_TASK_FLAG_IF; if (find_omp_clause (tclauses, OMP_CLAUSE_NOGROUP)) - iflags |= 2048; + iflags |= GOMP_TASK_FLAG_NOGROUP; ull = fd.iter_type == long_long_unsigned_type_node; } @@ -5211,7 +5313,8 @@ expand_task_call (struct omp_region *reg { tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, - build_int_cst (unsigned_type_node, 1024), + build_int_cst (unsigned_type_node, + GOMP_TASK_FLAG_IF), build_int_cst (unsigned_type_node, 0)); flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t); @@ -5224,7 +5327,8 @@ expand_task_call (struct omp_region *reg { tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc)); t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, - build_int_cst (unsigned_type_node, 2), + build_int_cst (unsigned_type_node, + GOMP_TASK_FLAG_FINAL), build_int_cst (unsigned_type_node, 0)); flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t); } @@ -7925,7 +8029,11 @@ expand_omp_simd (struct omp_region *regi t = fold_convert (type, n2); t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, GSI_CONTINUE_LINKING); - t = build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t); + tree v = fd->loop.v; + if (DECL_P (v) && TREE_ADDRESSABLE (v)) + v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + t = build2 (fd->loop.cond_code, boolean_type_node, v, t); cond_stmt = gimple_build_cond_empty (t); gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p, @@ -8124,6 +8232,28 @@ expand_omp_taskloop_for_outer (struct om innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); gcc_assert (innerc); tree endvar = OMP_CLAUSE_DECL (innerc); + if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST) + { + gcc_assert (innerc); + for (i = 1; i < fd->collapse; i++) + { + innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + } + innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + if (innerc) + { + /* If needed (inner taskloop has lastprivate clause), propagate + down the total number of iterations. */ + tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false, + NULL_TREE, false, + GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + } t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false, GSI_CONTINUE_LINKING); @@ -8167,7 +8297,7 @@ expand_omp_taskloop_for_inner (struct om struct omp_for_data *fd, gimple inner_stmt) { - tree e, t, type, itype, vmain, vback; + tree e, t, type, itype, vmain, vback, bias = NULL_TREE; basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL; basic_block fin_bb; gimple_stmt_iterator gsi; @@ -8180,6 +8310,29 @@ expand_omp_taskloop_for_inner (struct om if (POINTER_TYPE_P (type)) itype = signed_type_for (type); + /* See if we need to bias by LLONG_MIN. */ + if (fd->iter_type == long_long_unsigned_type_node + && TREE_CODE (type) == INTEGER_TYPE + && !TYPE_UNSIGNED (type)) + { + tree n1, n2; + + if (fd->loop.cond_code == LT_EXPR) + { + n1 = fd->loop.n1; + n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); + } + else + { + n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); + n2 = fd->loop.n1; + } + if (TREE_CODE (n1) != INTEGER_CST + || TREE_CODE (n2) != INTEGER_CST + || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) + bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); + } + entry_bb = region->entry; cont_bb = region->cont; gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); @@ -8220,6 +8373,11 @@ expand_omp_taskloop_for_inner (struct om innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); gcc_assert (innerc); n2 = OMP_CLAUSE_DECL (innerc); + if (bias) + { + n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias); + n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias); + } n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), true, NULL_TREE, true, GSI_SAME_STMT); n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), @@ -8310,7 +8468,13 @@ expand_omp_taskloop_for_inner (struct om gsi_remove (&gsi, true); FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE; - remove_edge (BRANCH_EDGE (entry_bb)); + if (!broken_loop) + remove_edge (BRANCH_EDGE (entry_bb)); + else + { + remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb)); + region->outer->cont = NULL; + } /* Connect all the blocks. */ if (!broken_loop) @@ -8334,8 +8498,9 @@ expand_omp_taskloop_for_inner (struct om set_immediate_dominator (CDI_DOMINATORS, body_bb, recompute_dominator (CDI_DOMINATORS, body_bb)); - set_immediate_dominator (CDI_DOMINATORS, fin_bb, - recompute_dominator (CDI_DOMINATORS, fin_bb)); + if (!broken_loop) + set_immediate_dominator (CDI_DOMINATORS, fin_bb, + recompute_dominator (CDI_DOMINATORS, fin_bb)); if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) { @@ -11054,15 +11219,42 @@ lower_omp_for_lastprivate (struct omp_fo tree n2 = fd->loop.n2; if (fd->collapse > 1 && TREE_CODE (n2) != INTEGER_CST - && gimple_omp_for_combined_into_p (fd->for_stmt) - && gimple_code (ctx->outer->stmt) == GIMPLE_OMP_FOR) + && gimple_omp_for_combined_into_p (fd->for_stmt)) { - gomp_for *gfor = as_a <gomp_for *> (ctx->outer->stmt); - if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_FOR) + struct omp_context *task_ctx = NULL; + if (gimple_code (ctx->outer->stmt) == GIMPLE_OMP_FOR) { - struct omp_for_data outer_fd; - extract_omp_for_data (gfor, &outer_fd, NULL); - n2 = fold_convert (TREE_TYPE (n2), outer_fd.loop.n2); + gomp_for *gfor = as_a <gomp_for *> (ctx->outer->stmt); + if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_FOR) + { + struct omp_for_data outer_fd; + extract_omp_for_data (gfor, &outer_fd, NULL); + n2 = fold_convert (TREE_TYPE (n2), outer_fd.loop.n2); + } + else if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_TASKLOOP) + task_ctx = ctx->outer->outer; + } + else if (is_task_ctx (ctx->outer)) + task_ctx = ctx->outer; + if (task_ctx) + { + int i; + tree innerc + = find_omp_clause (gimple_omp_task_clauses (task_ctx->stmt), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + for (i = 0; i < fd->collapse; i++) + { + innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + } + innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + if (innerc) + n2 = fold_convert (TREE_TYPE (n2), + lookup_decl (OMP_CLAUSE_DECL (innerc), + task_ctx)); } } cond = build2 (cond_code, boolean_type_node, fd->loop.v, n2); @@ -11426,6 +11618,13 @@ create_task_copyfn (gomp_task *task_stmt n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl); if (n == NULL) break; + if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c)) + { + decl = (tree) n->value; + n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl); + if (n == NULL) + break; + } f = (tree) n->value; if (tcctx.cb.decl_map) f = *tcctx.cb.decl_map->get (f); --- gcc/c-family/c-omp.c.jj 2015-05-19 18:54:16.202666384 +0200 +++ gcc/c-family/c-omp.c 2015-05-19 19:04:52.500759690 +0200 @@ -491,6 +491,7 @@ c_finish_omp_for (location_t locus, enum init = integer_zero_node; fail = true; } + DECL_INITIAL (decl) = NULL_TREE; init = build_modify_expr (elocus, decl, NULL_TREE, NOP_EXPR, /* FIXME diagnostics: This should --- gcc/testsuite/gcc.dg/gomp/taskloop-1.c.jj 2015-05-19 19:04:52.495759768 +0200 +++ gcc/testsuite/gcc.dg/gomp/taskloop-1.c 2015-05-19 19:04:52.495759768 +0200 @@ -0,0 +1,15 @@ +/* { dg-do compile } */ + +int e; +int bar (int, int); +void baz (int, int, int, int *, int *, int *); + +void +foo (int a, int b, int c, int d, int f, int g, int h, int j, int k, int l) +{ + int i; + #pragma omp taskloop if (a) final (b) untied default(none) mergeable \ + private(c) firstprivate (e) shared (d) num_tasks(f) collapse(1) + for (i = bar (g, h) + j; i < k; i += l) + baz (i, d, e++, &c, &d, &e); +} --- include/gomp-constants.h.jj 2015-05-19 18:54:15.724673826 +0200 +++ include/gomp-constants.h 2015-05-19 19:04:52.494759783 +0200 @@ -113,4 +113,14 @@ enum gomp_map_kind #define GOMP_DEVICE_ICV -1 #define GOMP_DEVICE_HOST_FALLBACK -2 +/* GOMP_task/GOMP_taskloop* flags argument. */ +#define GOMP_TASK_FLAG_UNTIED (1 << 0) +#define GOMP_TASK_FLAG_FINAL (1 << 1) +#define GOMP_TASK_FLAG_MERGEABLE (1 << 2) +#define GOMP_TASK_FLAG_DEPEND (1 << 3) +#define GOMP_TASK_FLAG_UP (1 << 8) +#define GOMP_TASK_FLAG_GRAINSIZE (1 << 9) +#define GOMP_TASK_FLAG_IF (1 << 10) +#define GOMP_TASK_FLAG_NOGROUP (1 << 11) + #endif --- libgomp/libgomp.map.jj 2015-05-19 18:54:16.332664361 +0200 +++ libgomp/libgomp.map 2015-05-19 19:04:52.493759799 +0200 @@ -240,6 +240,12 @@ GOMP_4.0.1 { GOMP_offload_unregister; } GOMP_4.0; +GOMP_4.1 { + global: + GOMP_taskloop; + GOMP_taskloop_ull; +} GOMP_4.0.1; + OACC_2.0 { global: acc_get_num_devices; --- libgomp/task.c.jj 2015-05-19 18:54:16.322664516 +0200 +++ libgomp/task.c 2015-05-19 19:04:52.494759783 +0200 @@ -29,6 +29,7 @@ #include "libgomp.h" #include <stdlib.h> #include <string.h> +#include "gomp-constants.h" typedef struct gomp_task_depend_entry *hash_entry_type; @@ -126,8 +127,7 @@ GOMP_task (void (*fn) (void *), void *da might be running on different thread than FN. */ if (cpyfn) if_clause = false; - if (flags & 1) - flags &= ~1; + flags &= ~GOMP_TASK_FLAG_UNTIED; #endif /* If parallel or taskgroup has been cancelled, don't start new tasks. */ @@ -148,12 +148,14 @@ GOMP_task (void (*fn) (void *), void *da depend clauses for non-deferred tasks other than this, because the parent task is suspended until the child task finishes and thus it can't start further child tasks. */ - if ((flags & 8) && thr->task && thr->task->depend_hash) + if ((flags & GOMP_TASK_FLAG_DEPEND) + && thr->task && thr->task->depend_hash) gomp_task_maybe_wait_for_dependencies (depend); gomp_init_task (&task, thr->task, gomp_icv (false)); task.kind = GOMP_TASK_IFFALSE; - task.final_task = (thr->task && thr->task->final_task) || (flags & 2); + task.final_task = (thr->task && thr->task->final_task) + || (flags & GOMP_TASK_FLAG_FINAL); if (thr->task) { task.in_tied_task = thr->task->in_tied_task; @@ -196,7 +198,7 @@ GOMP_task (void (*fn) (void *), void *da bool do_wake; size_t depend_size = 0; - if (flags & 8) + if (flags & GOMP_TASK_FLAG_DEPEND) depend_size = ((uintptr_t) depend[0] * sizeof (struct gomp_task_depend_entry)); task = gomp_malloc (sizeof (*task) + depend_size @@ -219,7 +221,7 @@ GOMP_task (void (*fn) (void *), void *da task->kind = GOMP_TASK_WAITING; task->fn = fn; task->fn_data = arg; - task->final_task = (flags & 2) >> 1; + task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; gomp_mutex_lock (&team->task_lock); /* If parallel or taskgroup has been cancelled, don't start new tasks. */ @@ -412,6 +414,25 @@ GOMP_task (void (*fn) (void *), void *da } } +ialias (GOMP_taskgroup_start) +ialias (GOMP_taskgroup_end) + +#define TYPE long +#define UTYPE unsigned long +#define TYPE_is_long 1 +#include "taskloop.c" +#undef TYPE +#undef UTYPE +#undef TYPE_is_long + +#define TYPE unsigned long long +#define UTYPE TYPE +#define GOMP_taskloop GOMP_taskloop_ull +#include "taskloop.c" +#undef TYPE +#undef UTYPE +#undef GOMP_taskloop + static inline bool gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent, struct gomp_taskgroup *taskgroup, struct gomp_team *team) --- libgomp/taskloop.c.jj 2015-05-19 19:04:52.493759799 +0200 +++ libgomp/taskloop.c 2015-05-20 18:37:38.584454280 +0200 @@ -0,0 +1,360 @@ +/* Copyright (C) 2015 Free Software Foundation, Inc. + Contributed by Jakub Jelinek <ja...@redhat.com>. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file handles the taskloop construct. It is included twice, once + for the long and once for unsigned long long variant. */ + +/* Called when encountering an explicit task directive. If IF_CLAUSE is + false, then we must not delay in executing the task. If UNTIED is true, + then the task may be executed by any member of the team. */ + +void +GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), + long arg_size, long arg_align, unsigned flags, + unsigned long num_tasks, + TYPE start, TYPE end, TYPE step) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + +#ifdef HAVE_BROKEN_POSIX_SEMAPHORES + /* If pthread_mutex_* is used for omp_*lock*, then each task must be + tied to one thread all the time. This means UNTIED tasks must be + tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN + might be running on different thread than FN. */ + if (cpyfn) + flags &= ~GOMP_TASK_FLAG_IF; + flags &= ~GOMP_TASK_FLAG_UNTIED; +#endif + + /* If parallel or taskgroup has been cancelled, don't start new tasks. */ + if (team && gomp_team_barrier_cancelled (&team->barrier)) + return; + +#ifdef TYPE_is_long + TYPE s = step; + if (step > 0) + { + if (start >= end) + return; + s--; + } + else + { + if (start <= end) + return; + s++; + } + UTYPE n = (end - start + s) / step; +#else + UTYPE n; + if (flags & GOMP_TASK_FLAG_UP) + { + if (start >= end) + return; + n = (end - start + step - 1) / step; + } + else + { + if (start <= end) + return; + n = (start - end - step - 1) / -step; + } +#endif + + TYPE task_step = step; + unsigned long nfirst = n; + if (flags & GOMP_TASK_FLAG_GRAINSIZE) + { + unsigned long grainsize = num_tasks; +#ifdef TYPE_is_long + num_tasks = n / grainsize; +#else + UTYPE ndiv = n / grainsize; + num_tasks = ndiv; + if (num_tasks != ndiv) + num_tasks = ~0UL; +#endif + if (num_tasks <= 1) + { + num_tasks = 1; + task_step = end - start; + } + else if (num_tasks >= grainsize +#ifndef TYPE_is_long + && num_tasks != ~0UL +#endif + ) + { + UTYPE mul = num_tasks * grainsize; + task_step = (TYPE) grainsize * step; + if (mul != n) + { + task_step += step; + nfirst = n - mul - 1; + } + } + else + { + UTYPE div = n / num_tasks; + UTYPE mod = n % num_tasks; + task_step = (TYPE) div * step; + if (mod) + { + task_step += step; + nfirst = mod - 1; + } + } + } + else + { + if (num_tasks == 0) + num_tasks = team ? team->nthreads : 1; + if (num_tasks >= n) + num_tasks = n; + else + { + UTYPE div = n / num_tasks; + UTYPE mod = n % num_tasks; + task_step = (TYPE) div * step; + if (mod) + { + task_step += step; + nfirst = mod - 1; + } + } + } + + if (flags & GOMP_TASK_FLAG_NOGROUP) + { + if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled) + return; + } + else + ialias_call (GOMP_taskgroup_start) (); + + if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL + || (thr->task && thr->task->final_task) + || team->task_count + num_tasks > 64 * team->nthreads) + { + unsigned long i; + if (__builtin_expect (cpyfn != NULL, 0)) + { + struct gomp_task task[num_tasks]; + struct gomp_task *parent = thr->task; + arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1); + char buf[num_tasks * arg_size + arg_align - 1]; + char *arg = (char *) (((uintptr_t) buf + arg_align - 1) + & ~(uintptr_t) (arg_align - 1)); + char *orig_arg = arg; + for (i = 0; i < num_tasks; i++) + { + gomp_init_task (&task[i], parent, gomp_icv (false)); + task[i].kind = GOMP_TASK_IFFALSE; + task[i].final_task = (thr->task && thr->task->final_task) + || (flags & GOMP_TASK_FLAG_FINAL); + if (thr->task) + { + task[i].in_tied_task = thr->task->in_tied_task; + task[i].taskgroup = thr->task->taskgroup; + } + thr->task = &task[i]; + cpyfn (arg, data); + arg += arg_size; + } + arg = orig_arg; + for (i = 0; i < num_tasks; i++) + { + thr->task = &task[i]; + ((TYPE *)arg)[0] = start; + start += task_step; + ((TYPE *)arg)[1] = start; + if (i == nfirst) + task_step -= step; + fn (arg); + arg += arg_size; + if (task[i].children != NULL) + { + gomp_mutex_lock (&team->task_lock); + gomp_clear_parent (task[i].children); + gomp_mutex_unlock (&team->task_lock); + } + gomp_end_task (); + } + } + else + for (i = 0; i < num_tasks; i++) + { + struct gomp_task task; + + gomp_init_task (&task, thr->task, gomp_icv (false)); + task.kind = GOMP_TASK_IFFALSE; + task.final_task = (thr->task && thr->task->final_task) + || (flags & GOMP_TASK_FLAG_FINAL); + if (thr->task) + { + task.in_tied_task = thr->task->in_tied_task; + task.taskgroup = thr->task->taskgroup; + } + thr->task = &task; + ((TYPE *)data)[0] = start; + start += task_step; + ((TYPE *)data)[1] = start; + if (i == nfirst) + task_step -= step; + fn (data); + if (task.children != NULL) + { + gomp_mutex_lock (&team->task_lock); + gomp_clear_parent (task.children); + gomp_mutex_unlock (&team->task_lock); + } + gomp_end_task (); + } + } + else + { + struct gomp_task *tasks[num_tasks]; + struct gomp_task *parent = thr->task; + struct gomp_taskgroup *taskgroup = parent->taskgroup; + char *arg; + int do_wake; + unsigned long i; + + for (i = 0; i < num_tasks; i++) + { + struct gomp_task *task + = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1); + tasks[i] = task; + arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1) + & ~(uintptr_t) (arg_align - 1)); + gomp_init_task (task, parent, gomp_icv (false)); + task->kind = GOMP_TASK_IFFALSE; + task->in_tied_task = parent->in_tied_task; + task->taskgroup = taskgroup; + thr->task = task; + if (cpyfn) + { + cpyfn (arg, data); + task->copy_ctors_done = true; + } + else + memcpy (arg, data, arg_size); + ((TYPE *)arg)[0] = start; + start += task_step; + ((TYPE *)arg)[1] = start; + if (i == nfirst) + task_step -= step; + thr->task = parent; + task->kind = GOMP_TASK_WAITING; + task->fn = fn; + task->fn_data = arg; + task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; + } + gomp_mutex_lock (&team->task_lock); + /* If parallel or taskgroup has been cancelled, don't start new + tasks. */ + if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier) + || (taskgroup && taskgroup->cancelled)) + && cpyfn == NULL, 0)) + { + gomp_mutex_unlock (&team->task_lock); + for (i = 0; i < num_tasks; i++) + { + gomp_finish_task (tasks[i]); + free (tasks[i]); + } + if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) + ialias_call (GOMP_taskgroup_end) (); + return; + } + if (taskgroup) + taskgroup->num_children += num_tasks; + for (i = 0; i < num_tasks; i++) + { + struct gomp_task *task = tasks[i]; + if (parent->children) + { + task->next_child = parent->children; + task->prev_child = parent->children->prev_child; + task->next_child->prev_child = task; + task->prev_child->next_child = task; + } + else + { + task->next_child = task; + task->prev_child = task; + } + parent->children = task; + if (taskgroup) + { + if (taskgroup->children) + { + task->next_taskgroup = taskgroup->children; + task->prev_taskgroup = taskgroup->children->prev_taskgroup; + task->next_taskgroup->prev_taskgroup = task; + task->prev_taskgroup->next_taskgroup = task; + } + else + { + task->next_taskgroup = task; + task->prev_taskgroup = task; + } + taskgroup->children = task; + } + if (team->task_queue) + { + task->next_queue = team->task_queue; + task->prev_queue = team->task_queue->prev_queue; + task->next_queue->prev_queue = task; + task->prev_queue->next_queue = task; + } + else + { + task->next_queue = task; + task->prev_queue = task; + team->task_queue = task; + } + ++team->task_count; + ++team->task_queued_count; + } + gomp_team_barrier_set_task_pending (&team->barrier); + if (team->task_running_count + !parent->in_tied_task + < team->nthreads) + { + do_wake = team->nthreads - team->task_running_count + - !parent->in_tied_task; + if ((unsigned long) do_wake > num_tasks) + do_wake = num_tasks; + } + else + do_wake = 0; + gomp_mutex_unlock (&team->task_lock); + if (do_wake) + gomp_team_barrier_wake (&team->barrier, do_wake); + } + if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) + ialias_call (GOMP_taskgroup_end) (); +} --- libgomp/testsuite/libgomp.c/for-4.c.jj 2015-05-19 19:04:52.491759830 +0200 +++ libgomp/testsuite/libgomp.c/for-4.c 2015-05-20 18:37:38.583454296 +0200 @@ -0,0 +1,42 @@ +/* { dg-options "-std=gnu99 -fopenmp" } */ + +extern void abort (void); + +#define M(x, y, z) O(x, y, z) +#define O(x, y, z) x ## _ ## y ## _ ## z + +#define F taskloop +#define G taskloop +#define S +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F taskloop simd +#define G taskloop_simd +#define S +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +int +main () +{ + int err = 0; + #pragma omp parallel reduction(|:err) + #pragma omp single + { + if (test_taskloop_normal () + || test_taskloop_simd_normal ()) + err = 1; + } + if (err) + abort (); + return 0; +} --- libgomp/testsuite/libgomp.c/taskloop-1.c.jj 2015-05-19 19:04:52.492759814 +0200 +++ libgomp/testsuite/libgomp.c/taskloop-1.c 2015-05-20 18:37:38.583454296 +0200 @@ -0,0 +1,46 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp -std=c99" } */ + +int q, r, e; + +__attribute__((noinline, noclone)) void +foo (long a, long b) +{ + #pragma omp taskloop lastprivate (q) nogroup + for (long d = a; d < b; d += 2) + { + q = d; + if (d < 2 || d > 6 || (d & 1)) + #pragma omp atomic + e |= 1; + } +} + +__attribute__((noinline, noclone)) int +bar (int a, int b) +{ + int q = 7; + #pragma omp taskloop lastprivate (q) + for (int d = a; d < b; d++) + { + if (d < 12 || d > 17) + #pragma omp atomic + e |= 1; + q = d; + } + return q; +} + +int +main () +{ + #pragma omp parallel + #pragma omp single + { + foo (2, 7); + r = bar (12, 18); + } + if (q != 6 || r != 17 || e) + __builtin_abort (); + return 0; +} --- libgomp/testsuite/libgomp.c/taskloop-2.c.jj 2015-05-19 19:04:52.492759814 +0200 +++ libgomp/testsuite/libgomp.c/taskloop-2.c 2015-05-20 18:37:38.582454311 +0200 @@ -0,0 +1,147 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -std=c99" } */ +/* { dg-additional-options "-msse2" { target sse2_runtime } } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +int u[1024], v[1024], w[1024], m; + +__attribute__((noinline, noclone)) void +f1 (long a, long b) +{ + #pragma omp taskloop simd default(none) shared(u, v, w) nogroup + for (long d = a; d < b; d++) + u[d] = v[d] + w[d]; +} + +__attribute__((noinline, noclone)) int +f2 (long a, long b, long c) +{ + int d, e; + #pragma omp taskloop simd default(none) shared(u, v, w) linear(d:1) linear(c:5) lastprivate(e) + for (d = a; d < b; d++) + { + u[d] = v[d] + w[d]; + c = c + 5; + e = c + 9; + } + return d + c + e; +} + +__attribute__((noinline, noclone)) int +f3 (long a, long b) +{ + int d; + #pragma omp taskloop simd default(none) shared(u, v, w) + for (d = a; d < b; d++) + { + int *p = &d; + u[d] = v[d] + w[d]; + } + return d; +} + +__attribute__((noinline, noclone)) int +f4 (long a, long b, long c, long d) +{ + int e, f, g; + #pragma omp taskloop simd default(none) shared(u, v, w) collapse(2) lastprivate(g) + for (e = a; e < b; e++) + for (f = c; f < d; f++) + { + int *p = &e; + int *q = &f; + int r = 32 * e + f; + u[r] = v[r] + w[r]; + g = r; + } + return e + f + g; +} + +__attribute__((noinline, noclone)) int +f5 (long a, long b, long c, long d) +{ + int e, f; + #pragma omp taskloop simd default(none) shared(u, v, w) collapse(2) + for (e = a; e < b; e++) + for (f = c; f < d; f++) + { + int r = 32 * e + f; + u[r] = v[r] + w[r]; + } + return e + f; +} + +int +main () +{ + int i; + for (i = 0; i < 1024; i++) + { + v[i] = i; + w[i] = i + 1; + } + #pragma omp parallel + #pragma omp single + f1 (0, 1024); + for (i = 0; i < 1024; i++) + if (u[i] != 2 * i + 1) + __builtin_abort (); + else + { + v[i] = 1024 - i; + w[i] = 512 - i; + } + #pragma omp parallel + #pragma omp single + m = f2 (2, 1022, 17); + for (i = 0; i < 1024; i++) + if ((i < 2 || i >= 1022) ? u[i] != 2 * i + 1 : u[i] != 1536 - 2 * i) + __builtin_abort (); + else + { + v[i] = i; + w[i] = i + 1; + } + if (m != 1022 + 2 * (1020 * 5 + 17) + 9) + __builtin_abort (); + #pragma omp parallel + #pragma omp single + m = f3 (0, 1024); + for (i = 0; i < 1024; i++) + if (u[i] != 2 * i + 1) + __builtin_abort (); + else + { + v[i] = 1024 - i; + w[i] = 512 - i; + } + if (m != 1024) + __builtin_abort (); + #pragma omp parallel + #pragma omp single + m = f4 (0, 32, 0, 32); + for (i = 0; i < 1024; i++) + if (u[i] != 1536 - 2 * i) + __builtin_abort (); + else + { + v[i] = i; + w[i] = i + 1; + } + if (m != 32 + 32 + 1023) + __builtin_abort (); + #pragma omp parallel + #pragma omp single + m = f5 (0, 32, 0, 32); + for (i = 0; i < 1024; i++) + if (u[i] != 2 * i + 1) + __builtin_abort (); + else + { + v[i] = 1024 - i; + w[i] = 512 - i; + } + if (m != 32 + 32) + __builtin_abort (); + return 0; +} --- libgomp/testsuite/libgomp.c/taskloop-3.c.jj 2015-05-19 19:04:52.492759814 +0200 +++ libgomp/testsuite/libgomp.c/taskloop-3.c 2015-05-20 18:37:38.583454296 +0200 @@ -0,0 +1,84 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fopenmp -std=c99" } */ + +int g; +int a[1024]; + +__attribute__((noinline, noclone)) int +f1 (int x) +{ + #pragma omp taskloop firstprivate (x) lastprivate (x) + for (int i = 0; i < 64; i++) + { + if (x != 74) + __builtin_abort (); + if (i == 63) + x = i + 4; + } + return x; +} + +__attribute__((noinline, noclone)) void +f2 (void) +{ + #pragma omp taskloop firstprivate (g) lastprivate (g) nogroup + for (int i = 0; i < 64; i++) + { + if (g != 77) + __builtin_abort (); + if (i == 63) + g = i + 9; + } +} + +__attribute__((noinline, noclone)) long long +f3 (long long a, long long b, long long c) +{ + long long i; + int l; + #pragma omp taskloop default (none) lastprivate (i, l) + for (i = a; i < b; i += c) + l = i; + return l * 7 + i; +} + +__attribute__((noinline, noclone)) long long +f4 (long long a, long long b, long long c, long long d, + long long e, long long f, int k) +{ + long long i, j; + int l; + #pragma omp taskloop default (none) collapse(2) \ + firstprivate (k) lastprivate (i, j, k, l) + for (i = a; i < b; i += e) + for (j = c; j < d; j += f) + { + if (k != 73) + __builtin_abort (); + if (i == 31 && j == 46) + k = i; + l = j; + } + return i + 5 * j + 11 * k + 17 * l; +} + +int +main () +{ + #pragma omp parallel + #pragma omp single + { + if (f1 (74) != 63 + 4) + __builtin_abort (); + g = 77; + f2 (); + #pragma omp taskwait + if (g != 63 + 9) + __builtin_abort (); + if (f3 (7, 12, 2) != 11 * 7 + 13) + __builtin_abort (); + if (f4 (0, 32, 16, 48, 1, 2, 73) != 32 + 5 * 48 + 11 * 31 + 17 * 46) + __builtin_abort (); + } + return 0; +} Jakub