Hi!

On 2022-06-08T05:56:02+0200, Tobias Burnus <tobias_bur...@mentor.com> wrote:
> [...] On the libgomp side: The devices which do not fulfill the requirements 
> are
> now filtered out.  [...]

> --- a/libgomp/plugin/plugin-gcn.c
> +++ b/libgomp/plugin/plugin-gcn.c

>  /* Return the number of GCN devices on the system.  */
>  
>  int
> -GOMP_OFFLOAD_get_num_devices (void)
> +GOMP_OFFLOAD_get_num_devices (unsigned int omp_requires_mask)
>  {
>    if (!init_hsa_context ())
>      return 0;
> +  /* Return -1 if no omp_requires_mask cannot be fulfilled but
> +     devices were present.  */
> +  if (hsa_context.agent_count > 0 && omp_requires_mask != 0)
> +    return -1;
>    return hsa_context.agent_count;
>  }

> --- a/libgomp/plugin/plugin-nvptx.c
> +++ b/libgomp/plugin/plugin-nvptx.c

>  int
> -GOMP_OFFLOAD_get_num_devices (void)
> +GOMP_OFFLOAD_get_num_devices (unsigned int omp_requires_mask)
>  {
> -  return nvptx_get_num_devices ();
> +  int num_devices = nvptx_get_num_devices ();
> +  /* Return -1 if no omp_requires_mask cannot be fulfilled but
> +     devices were present.  */
> +  if (num_devices > 0 && omp_requires_mask != 0)
> +    return -1;
> +  return num_devices;
>  }

> --- a/libgomp/target.c
> +++ b/libgomp/target.c

> @@ -4132,8 +4183,19 @@ gomp_target_init (void)
>  
>       if (gomp_load_plugin_for_device (&current_device, plugin_name))
>         {
> -         new_num_devs = current_device.get_num_devices_func ();
> -         if (new_num_devs >= 1)
> +         new_num_devs = current_device.get_num_devices_func (requires_mask);
> +         if (new_num_devs < 0)
> +           {
> +             [...]
> +           }
> +         else if (new_num_devs >= 1)
>             {
>               /* Augment DEVICES and NUM_DEVICES.  */

OK to push the attached "nvptx: 'cuDeviceGetCount' failure is fatal"?


Grüße
 Thomas


>From 8090da93cb00e4aa47a8b21b6548d739b2cebc49 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <tschwi...@baylibre.com>
Date: Thu, 7 Mar 2024 13:18:23 +0100
Subject: [PATCH] nvptx: 'cuDeviceGetCount' failure is fatal

Per commit 683f11843974f0bdf42f79cdcbb0c2b43c7b81b0
"OpenMP: Move omp requires checks to libgomp", we're now using 'return -1'
from 'GOMP_OFFLOAD_get_num_devices' for 'omp_requires_mask' purposes.  This
missed that via 'nvptx_get_num_devices', we could also 'return -1' for
'cuDeviceGetCount' failure.  Before, this meant (in 'gomp_target_init') to
silently ignore the plugin/device -- which also has been doubtful behavior.
Let's instead turn 'cuDeviceGetCount' failure into a fatal error, similar to
other errors during device initialization.

	libgomp/
	* plugin/plugin-nvptx.c (nvptx_get_num_devices):
	'cuDeviceGetCount' failure is fatal.
---
 libgomp/plugin/plugin-nvptx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index ffb1db67d20..81b4a7f499a 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -630,7 +630,7 @@ nvptx_get_num_devices (void)
 	}
     }
 
-  CUDA_CALL_ERET (-1, cuDeviceGetCount, &n);
+  CUDA_CALL_ASSERT (cuDeviceGetCount, &n);
   return n;
 }
 
-- 
2.34.1

Reply via email to