Hi,
This patch introduces a new plugin hook in libgomp to register a
callback function to clean up host-side bookkeeping data after an
asynchronous operation has completed (replacing the previous ad-hoc
method used in the NVPTX backend), and adds code to ensure that same
cleanup is done reliably in the NVPTX backend when the user program
hits a wait directive, or equivalent.
OK for the gomp4 branch?
Thanks,
Julian
ChangeLog
libgomp/
* oacc-host.c (openacc_register_async_cleanup): New.
(host_dispatch): Initialise register_async_cleanup_func entry.
* oacc-int.h (struct ACC_dispatch_t): Add
register_async_cleanup_func hook.
* oacc-parallel.c (GOACC_parallel): Call
register_async_cleanup_func hook after queuing asynchronous
copy-back.
* plugin-nvptx.c (enum PTX_event_type): Add PTX_EVT_ASYNC_CLEANUP.
(struct PTX_event): Remove tgt field.
(event_gc): Don't do async cleanup in PTX_EVT_KNL, do it in
PTX_EVT_ASYNC_CLEANUP instead.
(event_add): Remove tgt argument. Support PTX_EVT_ASYNC_CLEANUP
events.
(PTX_exec, PTX_host2dev, PTX_dev2host, PTX_wait_async)
(PTX_wait_all_async): Update calls to event_add.
(openacc_register_async_cleanup): New.
(PTX_async_test): Call event_gc on success path.
(PTX_async_test_all): Likewise.
* target.c (gomp_load_plugin_for_device): Initialise
register_async_cleanup hook.
commit 78d6b16bf258106282f791f2e7b3010bf75f2a86
Author: Julian Brown jul...@codesourcery.com
Date: Wed Oct 15 02:10:00 2014 -0700
Async fixes/improvements.
diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
index a47617a..f44ca5e 100644
--- a/libgomp/oacc-host.c
+++ b/libgomp/oacc-host.c
@@ -294,6 +294,16 @@ openacc_parallel (void (*fn) (void *), size_t mapnum __attribute__((unused)),
}
STATIC void
+openacc_register_async_cleanup (void *targ_mem_desc)
+{
+#ifdef HOST_NONSHM_PLUGIN
+ /* Asynchronous launches are executed synchronously on the (non-SHM) host,
+ so there's no point in delaying host-side cleanup -- just do it now. */
+ GOMP_PLUGIN_async_unmap_vars (targ_mem_desc);
+#endif
+}
+
+STATIC void
openacc_async_set_async (int async __attribute__((unused)))
{
#ifdef DEBUG
@@ -397,6 +407,8 @@ static struct gomp_device_descr host_dispatch =
.exec_func = openacc_parallel,
+ .register_async_cleanup_func = openacc_register_async_cleanup,
+
.async_set_async_func = openacc_async_set_async,
.async_test_func = openacc_async_test,
.async_test_all_func = openacc_async_test_all,
diff --git a/libgomp/oacc-int.h b/libgomp/oacc-int.h
index e1d2e32..03529cc 100644
--- a/libgomp/oacc-int.h
+++ b/libgomp/oacc-int.h
@@ -64,6 +64,9 @@ typedef struct ACC_dispatch_t
void (*exec_func) (void (*) (void *), size_t, void **, void **, size_t *,
unsigned short *, int, int, int, int, void *);
+ /* async cleanup callback registration */
+ void (*register_async_cleanup_func) (void *);
+
/* asynchronous routines */
int (*async_test_func) (int);
int (*async_test_all_func) (void);
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index 57ac8de..e3f156c 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -213,7 +213,10 @@ GOACC_parallel (int device, void (*fn) (void *), const void *openmp_target,
if (async acc_async_noval)
gomp_unmap_vars (tgt, true);
else
-gomp_copy_from_async (tgt);
+{
+ gomp_copy_from_async (tgt);
+ ACC_dev-openacc.register_async_cleanup_func (tgt);
+}
ACC_dev-openacc.async_set_async_func (acc_async_sync);
}
diff --git a/libgomp/plugin-nvptx.c b/libgomp/plugin-nvptx.c
index e163f3a..f193229 100644
--- a/libgomp/plugin-nvptx.c
+++ b/libgomp/plugin-nvptx.c
@@ -317,7 +317,8 @@ enum PTX_event_type
{
PTX_EVT_MEM,
PTX_EVT_KNL,
- PTX_EVT_SYNC
+ PTX_EVT_SYNC,
+ PTX_EVT_ASYNC_CLEANUP
};
struct PTX_event
@@ -325,7 +326,6 @@ struct PTX_event
CUevent *evt;
int type;
void *addr;
- void *tgt;
int ord;
SLIST_ENTRY(PTX_event) next;
};
@@ -946,6 +946,10 @@ event_gc (bool memmap_lockable)
break;
case PTX_EVT_KNL:
+ map_pop (ptx_event-addr);
+ break;
+
+ case PTX_EVT_ASYNC_CLEANUP:
{
/* The function GOMP_PLUGIN_async_unmap_vars needs to claim the
memory-map splay tree lock for the current device, so we
@@ -955,9 +959,7 @@ event_gc (bool memmap_lockable)
if (!memmap_lockable)
goto next_event;
- map_pop (ptx_event-addr);
- if (ptx_event-tgt)
- GOMP_PLUGIN_async_unmap_vars (ptx_event-tgt);
+ GOMP_PLUGIN_async_unmap_vars (ptx_event-addr);
}
break;
}
@@ -978,17 +980,17 @@ event_gc (bool memmap_lockable)
}
static void
-event_add (enum PTX_event_type type, CUevent *e, void *h, void *tgt)
+event_add (enum PTX_event_type type, CUevent *e, void *h)
{
struct PTX_event *ptx_event;
- assert (type == PTX_EVT_MEM || type == PTX_EVT_KNL || type