Re: [gomp4] Asynchronous data unmapping wait fixes for OpenACC

2014-10-20 Thread Thomas Schwinge
Hi Julian!

On Fri, 17 Oct 2014 17:05:40 +0100, Julian Brown jul...@codesourcery.com 
wrote:
 This patch introduces a new plugin hook in libgomp to register a
 callback function to clean up host-side bookkeeping data after an
 asynchronous operation has completed (replacing the previous ad-hoc
 method used in the NVPTX backend), and adds code to ensure that same
 cleanup is done reliably in the NVPTX backend when the user program
 hits a wait directive, or equivalent.
 
 OK for the gomp4 branch?

Yes, thanks.

 libgomp/
 * oacc-host.c (openacc_register_async_cleanup): New.
 (host_dispatch): Initialise register_async_cleanup_func entry.
 * oacc-int.h (struct ACC_dispatch_t): Add
 register_async_cleanup_func hook.
 * oacc-parallel.c (GOACC_parallel): Call
 register_async_cleanup_func hook after queuing asynchronous
 copy-back.
 * plugin-nvptx.c (enum PTX_event_type): Add PTX_EVT_ASYNC_CLEANUP.
 (struct PTX_event): Remove tgt field.
 (event_gc): Don't do async cleanup in PTX_EVT_KNL, do it in
 PTX_EVT_ASYNC_CLEANUP instead.
 (event_add): Remove tgt argument. Support PTX_EVT_ASYNC_CLEANUP
 events.
 (PTX_exec, PTX_host2dev, PTX_dev2host, PTX_wait_async)
 (PTX_wait_all_async): Update calls to event_add.
 (openacc_register_async_cleanup): New.
 (PTX_async_test): Call event_gc on success path.
 (PTX_async_test_all): Likewise.
 * target.c (gomp_load_plugin_for_device): Initialise
 register_async_cleanup hook.


Grüße,
 Thomas


pgpflhmGFV4Bm.pgp
Description: PGP signature


[gomp4] Asynchronous data unmapping wait fixes for OpenACC

2014-10-17 Thread Julian Brown
Hi,

This patch introduces a new plugin hook in libgomp to register a
callback function to clean up host-side bookkeeping data after an
asynchronous operation has completed (replacing the previous ad-hoc
method used in the NVPTX backend), and adds code to ensure that same
cleanup is done reliably in the NVPTX backend when the user program
hits a wait directive, or equivalent.

OK for the gomp4 branch?

Thanks,

Julian

ChangeLog

libgomp/
* oacc-host.c (openacc_register_async_cleanup): New.
(host_dispatch): Initialise register_async_cleanup_func entry.
* oacc-int.h (struct ACC_dispatch_t): Add
register_async_cleanup_func hook.
* oacc-parallel.c (GOACC_parallel): Call
register_async_cleanup_func hook after queuing asynchronous
copy-back.
* plugin-nvptx.c (enum PTX_event_type): Add PTX_EVT_ASYNC_CLEANUP.
(struct PTX_event): Remove tgt field.
(event_gc): Don't do async cleanup in PTX_EVT_KNL, do it in
PTX_EVT_ASYNC_CLEANUP instead.
(event_add): Remove tgt argument. Support PTX_EVT_ASYNC_CLEANUP
events.
(PTX_exec, PTX_host2dev, PTX_dev2host, PTX_wait_async)
(PTX_wait_all_async): Update calls to event_add.
(openacc_register_async_cleanup): New.
(PTX_async_test): Call event_gc on success path.
(PTX_async_test_all): Likewise.
* target.c (gomp_load_plugin_for_device): Initialise
register_async_cleanup hook.
commit 78d6b16bf258106282f791f2e7b3010bf75f2a86
Author: Julian Brown jul...@codesourcery.com
Date:   Wed Oct 15 02:10:00 2014 -0700

Async fixes/improvements.

diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
index a47617a..f44ca5e 100644
--- a/libgomp/oacc-host.c
+++ b/libgomp/oacc-host.c
@@ -294,6 +294,16 @@ openacc_parallel (void (*fn) (void *), size_t mapnum __attribute__((unused)),
 }
 
 STATIC void
+openacc_register_async_cleanup (void *targ_mem_desc)
+{
+#ifdef HOST_NONSHM_PLUGIN
+  /* Asynchronous launches are executed synchronously on the (non-SHM) host,
+ so there's no point in delaying host-side cleanup -- just do it now.  */
+  GOMP_PLUGIN_async_unmap_vars (targ_mem_desc);
+#endif
+}
+
+STATIC void
 openacc_async_set_async (int async __attribute__((unused)))
 {
 #ifdef DEBUG
@@ -397,6 +407,8 @@ static struct gomp_device_descr host_dispatch =
 
   .exec_func = openacc_parallel,
 
+  .register_async_cleanup_func = openacc_register_async_cleanup,
+
   .async_set_async_func = openacc_async_set_async,
   .async_test_func = openacc_async_test,
   .async_test_all_func = openacc_async_test_all,
diff --git a/libgomp/oacc-int.h b/libgomp/oacc-int.h
index e1d2e32..03529cc 100644
--- a/libgomp/oacc-int.h
+++ b/libgomp/oacc-int.h
@@ -64,6 +64,9 @@ typedef struct ACC_dispatch_t
   void (*exec_func) (void (*) (void *), size_t, void **, void **, size_t *,
 		 unsigned short *, int, int, int, int, void *);
 
+  /* async cleanup callback registration */
+  void (*register_async_cleanup_func) (void *);
+
   /* asynchronous routines  */
   int (*async_test_func) (int);
   int (*async_test_all_func) (void);
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index 57ac8de..e3f156c 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -213,7 +213,10 @@ GOACC_parallel (int device, void (*fn) (void *), const void *openmp_target,
   if (async  acc_async_noval)
 gomp_unmap_vars (tgt, true);
   else
-gomp_copy_from_async (tgt);
+{
+  gomp_copy_from_async (tgt);
+  ACC_dev-openacc.register_async_cleanup_func (tgt);
+}
 
   ACC_dev-openacc.async_set_async_func (acc_async_sync);
 }
diff --git a/libgomp/plugin-nvptx.c b/libgomp/plugin-nvptx.c
index e163f3a..f193229 100644
--- a/libgomp/plugin-nvptx.c
+++ b/libgomp/plugin-nvptx.c
@@ -317,7 +317,8 @@ enum PTX_event_type
 {
   PTX_EVT_MEM,
   PTX_EVT_KNL,
-  PTX_EVT_SYNC
+  PTX_EVT_SYNC,
+  PTX_EVT_ASYNC_CLEANUP
 };
 
 struct PTX_event
@@ -325,7 +326,6 @@ struct PTX_event
   CUevent *evt;
   int type;
   void *addr;
-  void *tgt;
   int ord;
   SLIST_ENTRY(PTX_event) next;
 };
@@ -946,6 +946,10 @@ event_gc (bool memmap_lockable)
 	  break;
 	
 	case PTX_EVT_KNL:
+  map_pop (ptx_event-addr);
+	  break;
+
+	case PTX_EVT_ASYNC_CLEANUP:
   {
 	/* The function GOMP_PLUGIN_async_unmap_vars needs to claim the
 		   memory-map splay tree lock for the current device, so we
@@ -955,9 +959,7 @@ event_gc (bool memmap_lockable)
 	if (!memmap_lockable)
 		  goto next_event;
 
-	map_pop (ptx_event-addr);
-		if (ptx_event-tgt)
-		  GOMP_PLUGIN_async_unmap_vars (ptx_event-tgt);
+		GOMP_PLUGIN_async_unmap_vars (ptx_event-addr);
   }
 	  break;
 	}
@@ -978,17 +980,17 @@ event_gc (bool memmap_lockable)
 }
 
 static void
-event_add (enum PTX_event_type type, CUevent *e, void *h, void *tgt)
+event_add (enum PTX_event_type type, CUevent *e, void *h)
 {
   struct PTX_event *ptx_event;
 
-  assert (type == PTX_EVT_MEM || type == PTX_EVT_KNL || type