Re: [libgomp, nvptx, committed] Calculate default dims per device

2018-07-30 Thread Cesar Philippidis
On 07/30/2018 03:19 AM, Tom de Vries wrote:
> 
> [libgomp, nvptx] Calculate default dims per device
> 
> The default dimensions are calculated using per-device properties, but
> initialized once and used on all devices.
> 
> This patch fixes this problem by introducing per-device default dimensions.

Neat, thanks!

I wonder if it's worthwhile to optimize the case where a system has more
than one identical GPU.

Cesar


[libgomp, nvptx, committed] Calculate default dims per device

2018-07-30 Thread Tom de Vries
Hi,

Build and reg-tested on x86_64 with nvptx accelerator.

Committed to trunk.

Thanks,
- Tom
[libgomp, nvptx] Calculate default dims per device

The default dimensions are calculated using per-device properties, but
initialized once and used on all devices.

This patch fixes this problem by introducing per-device default dimensions.

2018-07-27  Tom de Vries  

	* plugin/plugin-nvptx.c (struct ptx_device): Add default_dims field.
	(nvptx_open_device): Init default_dims for device.
	(nvptx_exec): Use default_dims from device.

---
 libgomp/plugin/plugin-nvptx.c | 28 +---
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 3a4077a1315..5c522aaf281 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -417,6 +417,7 @@ struct ptx_device
   int warp_size;
   int max_threads_per_block;
   int max_threads_per_multiprocessor;
+  int default_dims[GOMP_DIM_MAX];
 
   struct ptx_image_data *images;  /* Images loaded on device.  */
   pthread_mutex_t image_lock; /* Lock for above list.  */
@@ -818,6 +819,9 @@ nvptx_open_device (int n)
   if (r != CUDA_SUCCESS)
 async_engines = 1;
 
+  for (int i = 0; i != GOMP_DIM_MAX; i++)
+ptx_dev->default_dims[i] = 0;
+
   ptx_dev->images = NULL;
   pthread_mutex_init (_dev->image_lock, NULL);
 
@@ -1152,15 +1156,22 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
 
   if (seen_zero)
 {
-  /* See if the user provided GOMP_OPENACC_DIM environment
-	 variable to specify runtime defaults. */
-  static int default_dims[GOMP_DIM_MAX];
-
   pthread_mutex_lock (_dev_lock);
-  if (!default_dims[0])
+
+  static int gomp_openacc_dims[GOMP_DIM_MAX];
+  if (!gomp_openacc_dims[0])
+	{
+	  /* See if the user provided GOMP_OPENACC_DIM environment
+	 variable to specify runtime defaults.  */
+	  for (int i = 0; i < GOMP_DIM_MAX; ++i)
+	gomp_openacc_dims[i] = GOMP_PLUGIN_acc_default_dim (i);
+	}
+
+  if (!nvthd->ptx_dev->default_dims[0])
 	{
+	  int default_dims[GOMP_DIM_MAX];
 	  for (int i = 0; i < GOMP_DIM_MAX; ++i)
-	default_dims[i] = GOMP_PLUGIN_acc_default_dim (i);
+	default_dims[i] = gomp_openacc_dims[i];
 
 	  int gang, worker, vector;
 	  {
@@ -1196,12 +1207,15 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
 			 default_dims[GOMP_DIM_GANG],
 			 default_dims[GOMP_DIM_WORKER],
 			 default_dims[GOMP_DIM_VECTOR]);
+
+	  for (i = 0; i != GOMP_DIM_MAX; i++)
+	nvthd->ptx_dev->default_dims[i] = default_dims[i];
 	}
   pthread_mutex_unlock (_dev_lock);
 
   for (i = 0; i != GOMP_DIM_MAX; i++)
 	if (!dims[i])
-	  dims[i] = default_dims[i];
+	  dims[i] = nvthd->ptx_dev->default_dims[i];
 }
 
   /* Check if the accelerator has sufficient hardware resources to