Hi! A recent discussion on omp lang reminded me that our firstprivate target implementation doesn't match the final spec - while I've initially thought that the vars are to be copied after waiting for the dependencies (that is where map is mapped), the final spec says they are copied when creating the target task. This means we can use GOMP_MAP_FIRSTPRIVATE_INT optimization regardless of presence of depend clauses (that is the gcc side of the changes), and we have to copy earlier. The actual libgomp situation is that for target without nowait, we were copying too late, after waiting for dependencies (which the patch fixes), and for nowait target, we actually were copying twice, not once (once when creating the target task and then again after waiting for dependencies). This patch kills the latter copying, the earlier one should be enough, the vars are allocated within the target task allocation and so should live until the task is over.
Bootstrapped/regtested on x86_64-linux and i686-linux and tested with HSA offloading by Martin Liska. Committed to trunk. 2016-04-12 Jakub Jelinek <ja...@redhat.com> * omp-low.c (lower_omp_target): Use GOMP_MAP_FIRSTPRIVATE_INT regardless whether there are depend clauses or not. * libgomp.h (struct gomp_target_task): Remove firstprivate_copies field. * target.c (gomp_target_fallback_firstprivate, gomp_target_unshare_firstprivate): Removed. (GOMP_target_ext): Copy firstprivate vars into gomp_allocaed memory before waiting for dependencies. (gomp_target_task_fn): Don't copy firstprivate vars here. * task.c (GOMP_PLUGIN_target_task_completion): Don't free firstprivate_copies here. (gomp_create_target_task): Don't initialize firstprivate_copies field. * testsuite/libgomp.c/target-25.c (main): Use map (to:) instead of explicit/implicit firstprivate. --- gcc/omp-low.c.jj 2016-04-09 13:21:08.000000000 +0200 +++ gcc/omp-low.c 2016-04-11 15:05:58.732557472 +0200 @@ -15730,7 +15730,6 @@ lower_omp_target (gimple_stmt_iterator * location_t loc = gimple_location (stmt); bool offloaded, data_region; unsigned int map_cnt = 0; - bool has_depend = false; offloaded = is_gimple_omp_offloaded (stmt); switch (gimple_omp_target_kind (stmt)) @@ -15765,7 +15764,6 @@ lower_omp_target (gimple_stmt_iterator * dep_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK)); lower_depend_clauses (gimple_omp_target_clauses_ptr (stmt), &dep_ilist, &dep_olist); - has_depend = true; } tgt_bind = NULL; @@ -16280,44 +16278,9 @@ lower_omp_target (gimple_stmt_iterator * type = TREE_TYPE (ovar); if (is_reference (ovar)) type = TREE_TYPE (type); - bool use_firstprivate_int, force_addr; - use_firstprivate_int = false; - force_addr = false; if ((INTEGRAL_TYPE_P (type) && TYPE_PRECISION (type) <= POINTER_SIZE) || TREE_CODE (type) == POINTER_TYPE) - use_firstprivate_int = true; - if (has_depend) - { - if (is_reference (var)) - use_firstprivate_int = false; - else if (is_gimple_reg (var)) - { - if (DECL_HAS_VALUE_EXPR_P (var)) - { - tree v = get_base_address (var); - if (DECL_P (v) && TREE_ADDRESSABLE (v)) - { - use_firstprivate_int = false; - force_addr = true; - } - else - switch (TREE_CODE (v)) - { - case INDIRECT_REF: - case MEM_REF: - use_firstprivate_int = false; - force_addr = true; - break; - default: - break; - } - } - } - else - use_firstprivate_int = false; - } - if (use_firstprivate_int) { tkind = GOMP_MAP_FIRSTPRIVATE_INT; tree t = var; @@ -16332,7 +16295,7 @@ lower_omp_target (gimple_stmt_iterator * } else if (is_reference (var)) gimplify_assign (x, var, &ilist); - else if (!force_addr && is_gimple_reg (var)) + else if (is_gimple_reg (var)) { tree avar = create_tmp_var (TREE_TYPE (var)); mark_addressable (avar); @@ -16470,40 +16433,9 @@ lower_omp_target (gimple_stmt_iterator * type = TREE_TYPE (var); if (is_reference (var)) type = TREE_TYPE (type); - bool use_firstprivate_int; - use_firstprivate_int = false; if ((INTEGRAL_TYPE_P (type) && TYPE_PRECISION (type) <= POINTER_SIZE) || TREE_CODE (type) == POINTER_TYPE) - use_firstprivate_int = true; - if (has_depend) - { - tree v = lookup_decl_in_outer_ctx (var, ctx); - if (is_reference (v)) - use_firstprivate_int = false; - else if (is_gimple_reg (v)) - { - if (DECL_HAS_VALUE_EXPR_P (v)) - { - v = get_base_address (v); - if (DECL_P (v) && TREE_ADDRESSABLE (v)) - use_firstprivate_int = false; - else - switch (TREE_CODE (v)) - { - case INDIRECT_REF: - case MEM_REF: - use_firstprivate_int = false; - break; - default: - break; - } - } - } - else - use_firstprivate_int = false; - } - if (use_firstprivate_int) { x = build_receiver_ref (var, false, ctx); if (TREE_CODE (type) != POINTER_TYPE) --- libgomp/libgomp.h.jj 2016-01-19 13:31:06.000000000 +0100 +++ libgomp/libgomp.h 2016-04-11 17:24:15.363907141 +0200 @@ -496,8 +496,6 @@ struct gomp_target_task struct target_mem_desc *tgt; struct gomp_task *task; struct gomp_team *team; - /* Copies of firstprivate mapped data for shared memory accelerators. */ - void *firstprivate_copies; /* Device-specific target arguments. */ void **args; void *hostaddrs[]; --- libgomp/target.c.jj 2016-01-23 00:13:06.000000000 +0100 +++ libgomp/target.c 2016-04-11 17:36:13.954172003 +0200 @@ -1372,47 +1372,6 @@ copy_firstprivate_data (char *tgt, size_ } } -/* Host fallback with firstprivate map-type handling. */ - -static void -gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum, - void **hostaddrs, size_t *sizes, - unsigned short *kinds) -{ - size_t tgt_align = 0, tgt_size = 0; - calculate_firstprivate_requirements (mapnum, sizes, kinds, &tgt_align, - &tgt_size); - if (tgt_align) - { - char *tgt = gomp_alloca (tgt_size + tgt_align - 1); - copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, tgt_align, - tgt_size); - } - gomp_target_fallback (fn, hostaddrs); -} - -/* Handle firstprivate map-type for shared memory devices and the host - fallback. Return the pointer of firstprivate copies which has to be freed - after use. */ - -static void * -gomp_target_unshare_firstprivate (size_t mapnum, void **hostaddrs, - size_t *sizes, unsigned short *kinds) -{ - size_t tgt_align = 0, tgt_size = 0; - char *tgt = NULL; - - calculate_firstprivate_requirements (mapnum, sizes, kinds, &tgt_align, - &tgt_size); - if (tgt_align) - { - tgt = gomp_malloc (tgt_size + tgt_align - 1); - copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, tgt_align, - tgt_size); - } - return tgt; -} - /* Helper function of GOMP_target{,_ext} routines. */ static void * @@ -1504,6 +1463,8 @@ GOMP_target_ext (int device, void (*fn) unsigned int flags, void **depend, void **args) { struct gomp_device_descr *devicep = resolve_device (device); + size_t tgt_align = 0, tgt_size = 0; + bool fpc_done = false; if (flags & GOMP_TARGET_FLAG_NOWAIT) { @@ -1555,7 +1516,19 @@ GOMP_target_ext (int device, void (*fn) { struct gomp_thread *thr = gomp_thread (); if (thr->task && thr->task->depend_hash) - gomp_task_maybe_wait_for_dependencies (depend); + { + /* If we might need to wait, copy firstprivate now. */ + calculate_firstprivate_requirements (mapnum, sizes, kinds, + &tgt_align, &tgt_size); + if (tgt_align) + { + char *tgt = gomp_alloca (tgt_size + tgt_align - 1); + copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, + tgt_align, tgt_size); + } + fpc_done = true; + gomp_task_maybe_wait_for_dependencies (depend); + } } void *fn_addr; @@ -1564,15 +1537,35 @@ GOMP_target_ext (int device, void (*fn) || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)) || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) { - gomp_target_fallback_firstprivate (fn, mapnum, hostaddrs, sizes, kinds); + if (!fpc_done) + { + calculate_firstprivate_requirements (mapnum, sizes, kinds, + &tgt_align, &tgt_size); + if (tgt_align) + { + char *tgt = gomp_alloca (tgt_size + tgt_align - 1); + copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, + tgt_align, tgt_size); + } + } + gomp_target_fallback (fn, hostaddrs); return; } struct target_mem_desc *tgt_vars; - void *fpc = NULL; if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) { - fpc = gomp_target_unshare_firstprivate (mapnum, hostaddrs, sizes, kinds); + if (!fpc_done) + { + calculate_firstprivate_requirements (mapnum, sizes, kinds, + &tgt_align, &tgt_size); + if (tgt_align) + { + char *tgt = gomp_alloca (tgt_size + tgt_align - 1); + copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, + tgt_align, tgt_size); + } + } tgt_vars = NULL; } else @@ -1583,8 +1576,6 @@ GOMP_target_ext (int device, void (*fn) args); if (tgt_vars) gomp_unmap_vars (tgt_vars, true); - else - free (fpc); } /* Host fallback for GOMP_target_data{,_ext} routines. */ @@ -1891,9 +1882,7 @@ gomp_target_task_fn (void *data) || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) { ttask->state = GOMP_TARGET_TASK_FALLBACK; - gomp_target_fallback_firstprivate (ttask->fn, ttask->mapnum, - ttask->hostaddrs, ttask->sizes, - ttask->kinds); + gomp_target_fallback (ttask->fn, ttask->hostaddrs); return false; } @@ -1908,9 +1897,6 @@ gomp_target_task_fn (void *data) if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) { ttask->tgt = NULL; - ttask->firstprivate_copies - = gomp_target_unshare_firstprivate (ttask->mapnum, ttask->hostaddrs, - ttask->sizes, ttask->kinds); actual_arguments = ttask->hostaddrs; } else --- libgomp/task.c.jj 2016-01-21 00:41:49.000000000 +0100 +++ libgomp/task.c 2016-04-11 17:36:19.873091915 +0200 @@ -582,7 +582,6 @@ GOMP_PLUGIN_target_task_completion (void return; } ttask->state = GOMP_TARGET_TASK_FINISHED; - free (ttask->firstprivate_copies); gomp_target_task_completion (team, task); gomp_mutex_unlock (&team->task_lock); } @@ -683,7 +682,6 @@ gomp_create_target_task (struct gomp_dev ttask->state = state; ttask->task = task; ttask->team = team; - ttask->firstprivate_copies = NULL; task->fn = NULL; task->fn_data = ttask; task->final_task = 0; --- libgomp/testsuite/libgomp.c/target-25.c.jj 2015-10-13 20:57:41.000000000 +0200 +++ libgomp/testsuite/libgomp.c/target-25.c 2016-04-11 17:43:42.523102432 +0200 @@ -23,7 +23,7 @@ main () usleep (7000); z = 3; } - #pragma omp target map(tofrom: x) map(from: err) firstprivate (y) depend(inout: x, z) + #pragma omp target map(tofrom: x) map(from: err) map (to: y, z) depend(inout: x, z) err = (x != 1 || y != 2 || z != 3); if (err) abort (); Jakub