On 26 Jan 14:44, Thomas Schwinge wrote:
> On 17 Jan 02:16, Ilya Verbin wrote:
> > Such things are not covered by the
> > testsuite, that's why you missed this issue.  Here is a simple testcase:
> 
> <http://news.gmane.org/find-root.php?message_id=%3C20150116231632.GB48380%40msticlxl57.ims.intel.com%3E>
> 
> Probably a good motivation for adding such a test case.  ;-)

I thought about it, but I don't know how to compile 2 binaries and run one of
them using dejagnu.

> > So, you don't assume that a device can have multiple images from multiple 
> > libs?
> 
> This probably is "just" a bug that we introduced with our changes?
> (Julian?)
> 
> > Also, could you please explain, why did you divide a device initialization 
> > into
> > two functions -- gomp_init_device and gomp_init_tables?
> 
> As I understand it (again, Julian, please correct me if I got that
> wrong), the reason is that for OpenACC support, we need these as two
> separate (independent) actions.  Is this causing problems for OpenMP
> offloading?

I'm asking since in this patch 
http://gcc.gnu.org/ml/gcc-patches/2014-11/msg01604.html
I tried to change libgomp<->plugin interface to enable offloading from libs,
loaded at any time.
My proposal was to replace GOMP_OFFLOAD_register_image and
GOMP_OFFLOAD_get_table with GOMP_OFFLOAD_[un]load_image.
When target device is initialized, GOMP_OFFLOAD_load_image registers one image
in the plugin and returns corresponding target addresses for the image.
The mapping between host and target addresses happens as previously.
I hope that this approach is suitable for both MIC and PTX.

Here is my current patch, it works for OpenMP->MIC, but obviously will not work
for PTX, since it requires symmetrical changes in the plugin.  Could you please
take a look, whether it is possible to support this new interface in PTX plugin?


diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h
index d9cbff5..1072ae4 100644
--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -51,14 +51,12 @@ enum offload_target_type
   OFFLOAD_TARGET_TYPE_INTEL_MIC = 6
 };
 
-/* Auxiliary struct, used for transferring a host-target address range mapping
-   from plugin to libgomp.  */
-struct mapping_table
+/* Auxiliary struct, used for transferring pairs of addresses from plugin
+   to libgomp.  */
+struct addr_pair
 {
-  uintptr_t host_start;
-  uintptr_t host_end;
-  uintptr_t tgt_start;
-  uintptr_t tgt_end;
+  uintptr_t start;
+  uintptr_t end;
 };
 
 /* Miscellaneous functions.  */
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 3089401..4e021f9 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -773,10 +773,10 @@ struct gomp_device_descr
   unsigned int (*get_caps_func) (void);
   int (*get_type_func) (void);
   int (*get_num_devices_func) (void);
-  void (*register_image_func) (void *, void *);
   void (*init_device_func) (int);
   void (*fini_device_func) (int);
-  int (*get_table_func) (int, struct mapping_table **);
+  int (*load_image_func) (int, void *, struct addr_pair **);
+  void (*unload_image_func) (int, void *);
   void *(*alloc_func) (int, size_t);
   void (*free_func) (int, void *);
   void *(*dev2host_func) (int, void *, const void *, size_t);
@@ -793,9 +793,6 @@ struct gomp_device_descr
   /* Set to true when device is initialized.  */
   bool is_initialized;
 
-  /* True when offload regions have been registered with this device.  */
-  bool offload_regions_registered;
-
   /* OpenACC-specific data and functions.  */
   /* This is mutable because of its mutable data_environ and target_data
      members.  */
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index f44174e..2b2b953 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -231,6 +231,7 @@ GOMP_4.0 {
 GOMP_4.0.1 {
   global:
        GOMP_offload_register;
+       GOMP_offload_unregister;
 } GOMP_4.0;
 
 OACC_2.0 {
diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
index 6aeb1e7..5d67c6c 100644
--- a/libgomp/oacc-host.c
+++ b/libgomp/oacc-host.c
@@ -43,10 +43,10 @@ static struct gomp_device_descr host_dispatch =
     .get_caps_func = GOMP_OFFLOAD_get_caps,
     .get_type_func = GOMP_OFFLOAD_get_type,
     .get_num_devices_func = GOMP_OFFLOAD_get_num_devices,
-    .register_image_func = GOMP_OFFLOAD_register_image,
     .init_device_func = GOMP_OFFLOAD_init_device,
     .fini_device_func = GOMP_OFFLOAD_fini_device,
-    .get_table_func = GOMP_OFFLOAD_get_table,
+    .load_image_func = GOMP_OFFLOAD_load_image,
+    .unload_image_func = GOMP_OFFLOAD_unload_image,
     .alloc_func = GOMP_OFFLOAD_alloc,
     .free_func = GOMP_OFFLOAD_free,
     .dev2host_func = GOMP_OFFLOAD_dev2host,
@@ -56,7 +56,6 @@ static struct gomp_device_descr host_dispatch =
     .mem_map.is_initialized = false,
     .mem_map.splay_tree.root = NULL,
     .is_initialized = false,
-    .offload_regions_registered = false,
 
     .openacc = {
       .open_device_func = GOMP_OFFLOAD_openacc_open_device,
diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c
index 166eb55..19b937a 100644
--- a/libgomp/oacc-init.c
+++ b/libgomp/oacc-init.c
@@ -284,12 +284,6 @@ lazy_open (int ord)
     = acc_dev->openacc.create_thread_data_func (acc_dev->openacc.target_data);
 
   acc_dev->openacc.async_set_async_func (acc_async_sync);
-
-  struct gomp_memory_mapping *mem_map = &acc_dev->mem_map;
-  gomp_mutex_lock (&mem_map->lock);
-  if (!mem_map->is_initialized)
-    gomp_init_tables (acc_dev, mem_map);
-  gomp_mutex_unlock (&mem_map->lock);
 }
 
 /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of
diff --git a/libgomp/plugin/plugin-host.c b/libgomp/plugin/plugin-host.c
index ebf7f11..bc60f72 100644
--- a/libgomp/plugin/plugin-host.c
+++ b/libgomp/plugin/plugin-host.c
@@ -95,12 +95,6 @@ GOMP_OFFLOAD_get_num_devices (void)
 }
 
 STATIC void
-GOMP_OFFLOAD_register_image (void *host_table __attribute__ ((unused)),
-                            void *target_data __attribute__ ((unused)))
-{
-}
-
-STATIC void
 GOMP_OFFLOAD_init_device (int n __attribute__ ((unused)))
 {
 }
@@ -111,12 +105,19 @@ GOMP_OFFLOAD_fini_device (int n __attribute__ ((unused)))
 }
 
 STATIC int
-GOMP_OFFLOAD_get_table (int n __attribute__ ((unused)),
-                       struct mapping_table **table __attribute__ ((unused)))
+GOMP_OFFLOAD_load_image (int n __attribute__ ((unused)),
+                        void *i __attribute__ ((unused)),
+                        struct addr_pair **r __attribute__ ((unused)))
 {
   return 0;
 }
 
+STATIC void
+GOMP_OFFLOAD_unload_image (int n __attribute__ ((unused)),
+                          void *i __attribute__ ((unused)))
+{
+}
+
 STATIC void *
 GOMP_OFFLOAD_openacc_open_device (int n)
 {
diff --git a/libgomp/target.c b/libgomp/target.c
index ebff55e..ce2017c 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -635,7 +635,84 @@ gomp_update (struct gomp_device_descr *devicep, struct 
gomp_memory_mapping *mm,
   gomp_mutex_unlock (&mm->lock);
 }
 
-/* This function should be called from every offload image.
+
+/* Insert mapping of host -> target address pairs to splay tree.  */
+
+static void
+gomp_splay_tree_insert_mapping (struct gomp_device_descr *devicep,
+                               struct addr_pair *host_addr,
+                               struct addr_pair *tgt_addr)
+{
+  struct gomp_memory_mapping *mm = &devicep->mem_map;
+  struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
+  tgt->refcount = 1;
+  tgt->array = gomp_malloc (sizeof (*tgt->array));
+  tgt->tgt_start = tgt_addr->start;
+  tgt->tgt_end = tgt_addr->end;
+  tgt->to_free = NULL;
+  tgt->list_count = 0;
+  tgt->device_descr = devicep;
+  splay_tree_node node = tgt->array;
+  splay_tree_key k = &node->key;
+  k->host_start = host_addr->start;
+  k->host_end = host_addr->end;
+  k->tgt_offset = 0;
+  k->refcount = 1;
+  k->copy_from = false;
+  k->tgt = tgt;
+  node->left = NULL;
+  node->right = NULL;
+  splay_tree_insert (&mm->splay_tree, node);
+}
+
+/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
+   And insert to splay tree the mapping between addresses from HOST_TABLE and
+   from loaded target image.  */
+
+static void
+gomp_offload_image_to_device (struct gomp_device_descr *devicep,
+                             void *host_table, void *target_data)
+{
+  void **host_func_table = ((void ***) host_table)[0];
+  void **host_funcs_end  = ((void ***) host_table)[1];
+  void **host_var_table  = ((void ***) host_table)[2];
+  void **host_vars_end   = ((void ***) host_table)[3];
+
+  /* The func table contains only addresses, the var table contains addresses
+     and corresponding sizes.  */
+  int num_funcs = host_funcs_end - host_func_table;
+  int num_vars  = (host_vars_end - host_var_table) / 2;
+
+  /* Load image to device and get target addresses for the image.  */
+  struct addr_pair *target_table = NULL;
+  int i, num_target_entries
+    = devicep->load_image_func (devicep->target_id, target_data, 
&target_table);
+
+  if (num_target_entries != num_funcs + num_vars)
+    gomp_fatal ("Can't map target functions or variables");
+
+  /* Insert host-target address mapping into devicep->dev_splay_tree.  */
+  for (i = 0; i < num_funcs; i++)
+    {
+      struct addr_pair host_addr;
+      host_addr.start = (uintptr_t) host_func_table[i];
+      host_addr.end = host_addr.start + 1;
+      gomp_splay_tree_insert_mapping (devicep, &host_addr, &target_table[i]);
+    }
+
+  for (i = 0; i < num_vars; i++)
+    {
+      struct addr_pair host_addr;
+      host_addr.start = (uintptr_t) host_var_table[i*2];
+      host_addr.end = host_addr.start + (uintptr_t) host_var_table[i*2+1];
+      gomp_splay_tree_insert_mapping (devicep, &host_addr,
+                                     &target_table[num_funcs+i]);
+    }
+
+  free (target_table);
+}
+
+/* This function should be called from every offload image while loading.
    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
    the target, and TARGET_DATA needed by target plugin.  */
 
@@ -643,6 +720,17 @@ void
 GOMP_offload_register (void *host_table, enum offload_target_type target_type,
                       void *target_data)
 {
+  int i;
+
+  /* Load image to all initialized devices.  */
+  for (i = 0; i < num_devices; i++)
+    {
+      struct gomp_device_descr *devicep = &devices[i];
+      if (devicep->type == target_type && devicep->is_initialized)
+       gomp_offload_image_to_device (devicep, host_table, target_data);
+    }
+
+  /* Insert image to array of pending images.  */
   offload_images = gomp_realloc (offload_images,
                                 (num_offload_images + 1)
                                 * sizeof (struct offload_image_descr));
@@ -654,54 +742,83 @@ GOMP_offload_register (void *host_table, enum 
offload_target_type target_type,
   num_offload_images++;
 }
 
-/* This function initializes the target device, specified by DEVICEP.  DEVICEP
-   must be locked on entry, and remains locked on return.  */
+/* This function should be called from every offload image while unloading.
+   It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
+   the target, and TARGET_DATA needed by target plugin.  */
 
-attribute_hidden void
-gomp_init_device (struct gomp_device_descr *devicep)
+void
+GOMP_offload_unregister (void *host_table, enum offload_target_type 
target_type,
+                        void *target_data)
 {
-  devicep->init_device_func (devicep->target_id);
-  devicep->is_initialized = true;
+  void **host_func_table = ((void ***) host_table)[0];
+  void **host_funcs_end  = ((void ***) host_table)[1];
+  void **host_var_table  = ((void ***) host_table)[2];
+  void **host_vars_end   = ((void ***) host_table)[3];
+  int i;
+
+  /* The func table contains only addresses, the var table contains addresses
+     and corresponding sizes.  */
+  int num_funcs = host_funcs_end - host_func_table;
+  int num_vars  = (host_vars_end - host_var_table) / 2;
+
+  /* Unload image from all initialized devices.  */
+  for (i = 0; i < num_devices; i++)
+    {
+      int j;
+      struct gomp_device_descr *devicep = &devices[i];
+      struct gomp_memory_mapping *mm = &devicep->mem_map;
+
+      if (devicep->type != target_type || !devicep->is_initialized)
+       continue;
+
+      devicep->unload_image_func (devicep->target_id, target_data);
+
+      /* Remove mapping from splay tree.  */
+      for (j = 0; j < num_funcs; j++)
+       {
+         struct splay_tree_key_s k;
+         k.host_start = (uintptr_t) host_func_table[j];
+         k.host_end = k.host_start + 1;
+         splay_tree_remove (&mm->splay_tree, &k);
+       }
+
+      for (j = 0; j < num_vars; j++)
+       {
+         struct splay_tree_key_s k;
+         k.host_start = (uintptr_t) host_var_table[j*2];
+         k.host_end = k.host_start + (uintptr_t) host_var_table[j*2+1];
+         splay_tree_remove (&mm->splay_tree, &k);
+       }
+    }
+
+  /* Remove image from array of pending images.  */
+  for (i = 0; i < num_offload_images; i++)
+    if (offload_images[i].target_data == target_data)
+      {
+       offload_images[i] = offload_images[--num_offload_images];
+       break;
+      }
 }
 
-/* Initialize address mapping tables.  MM must be locked on entry, and remains
-   locked on return.  */
+/* This function initializes the target device, specified by DEVICEP.  DEVICEP
+   must be locked on entry, and remains locked on return.  */
 
 attribute_hidden void
-gomp_init_tables (struct gomp_device_descr *devicep,
-                 struct gomp_memory_mapping *mm)
+gomp_init_device (struct gomp_device_descr *devicep)
 {
-  /* Get address mapping table for device.  */
-  struct mapping_table *table = NULL;
-  int num_entries = devicep->get_table_func (devicep->target_id, &table);
-
-  /* Insert host-target address mapping into dev_splay_tree.  */
   int i;
-  for (i = 0; i < num_entries; i++)
+  devicep->init_device_func (devicep->target_id);
+
+  /* Load to device all images registered by the moment.  */
+  for (i = 0; i < num_offload_images; i++)
     {
-      struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
-      tgt->refcount = 1;
-      tgt->array = gomp_malloc (sizeof (*tgt->array));
-      tgt->tgt_start = table[i].tgt_start;
-      tgt->tgt_end = table[i].tgt_end;
-      tgt->to_free = NULL;
-      tgt->list_count = 0;
-      tgt->device_descr = devicep;
-      splay_tree_node node = tgt->array;
-      splay_tree_key k = &node->key;
-      k->host_start = table[i].host_start;
-      k->host_end = table[i].host_end;
-      k->tgt_offset = 0;
-      k->refcount = 1;
-      k->copy_from = false;
-      k->tgt = tgt;
-      node->left = NULL;
-      node->right = NULL;
-      splay_tree_insert (&mm->splay_tree, node);
+      struct offload_image_descr *image = &offload_images[i];
+      if (image->type == devicep->type)
+       gomp_offload_image_to_device (devicep, image->host_table,
+                                     image->target_data);
     }
 
-  free (table);
-  mm->is_initialized = true;
+  devicep->is_initialized = true;
 }
 
 /* Free address mapping tables.  MM must be locked on entry, and remains locked
@@ -750,6 +867,7 @@ GOMP_target (int device, void (*fn) (void *), const void 
*unused,
             unsigned char *kinds)
 {
   struct gomp_device_descr *devicep = resolve_device (device);
+  struct gomp_memory_mapping *mm = &devicep->mem_map;
 
   if (devicep == NULL
       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
@@ -780,21 +898,12 @@ GOMP_target (int device, void (*fn) (void *), const void 
*unused,
     fn_addr = (void *) fn;
   else
     {
-      struct gomp_memory_mapping *mm = &devicep->mem_map;
-      gomp_mutex_lock (&mm->lock);
-
-      if (!mm->is_initialized)
-       gomp_init_tables (devicep, mm);
-
       struct splay_tree_key_s k;
       k.host_start = (uintptr_t) fn;
       k.host_end = k.host_start + 1;
       splay_tree_key tgt_fn = splay_tree_lookup (&mm->splay_tree, &k);
       if (tgt_fn == NULL)
        gomp_fatal ("Target function wasn't mapped");
-
-      gomp_mutex_unlock (&mm->lock);
-
       fn_addr = (void *) tgt_fn->tgt->tgt_start;
     }
 
@@ -845,12 +954,6 @@ GOMP_target_data (int device, const void *unused, size_t 
mapnum,
     gomp_init_device (devicep);
   gomp_mutex_unlock (&devicep->lock);
 
-  struct gomp_memory_mapping *mm = &devicep->mem_map;
-  gomp_mutex_lock (&mm->lock);
-  if (!mm->is_initialized)
-    gomp_init_tables (devicep, mm);
-  gomp_mutex_unlock (&mm->lock);
-
   struct target_mem_desc *tgt
     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
                     false);
@@ -886,13 +989,7 @@ GOMP_target_update (int device, const void *unused, size_t 
mapnum,
     gomp_init_device (devicep);
   gomp_mutex_unlock (&devicep->lock);
 
-  struct gomp_memory_mapping *mm = &devicep->mem_map;
-  gomp_mutex_lock (&mm->lock);
-  if (!mm->is_initialized)
-    gomp_init_tables (devicep, mm);
-  gomp_mutex_unlock (&mm->lock);
-
-  gomp_update (devicep, mm, mapnum, hostaddrs, sizes, kinds, false);
+  gomp_update (devicep, &devicep->mem_map, mapnum, hostaddrs, sizes, kinds, 
false);
 }
 
 void
@@ -961,10 +1058,10 @@ gomp_load_plugin_for_device (struct gomp_device_descr 
*device,
   DLSYM (get_caps);
   DLSYM (get_type);
   DLSYM (get_num_devices);
-  DLSYM (register_image);
   DLSYM (init_device);
   DLSYM (fini_device);
-  DLSYM (get_table);
+  DLSYM (load_image);
+  DLSYM (unload_image);
   DLSYM (alloc);
   DLSYM (free);
   DLSYM (dev2host);
@@ -1027,22 +1124,6 @@ gomp_load_plugin_for_device (struct gomp_device_descr 
*device,
   return err == NULL;
 }
 
-/* This function adds a compatible offload image IMAGE to an accelerator device
-   DEVICE.  DEVICE must be locked on entry, and remains locked on return.  */
-
-static void
-gomp_register_image_for_device (struct gomp_device_descr *device,
-                               struct offload_image_descr *image)
-{
-  if (!device->offload_regions_registered
-      && (device->type == image->type
-         || device->type == OFFLOAD_TARGET_TYPE_HOST))
-    {
-      device->register_image_func (image->host_table, image->target_data);
-      device->offload_regions_registered = true;
-    }
-}
-
 /* This function initializes the runtime needed for offloading.
    It parses the list of offload targets and tries to load the plugins for
    these targets.  On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
@@ -1104,7 +1185,6 @@ gomp_target_init (void)
                current_device.mem_map.is_initialized = false;
                current_device.mem_map.splay_tree.root = NULL;
                current_device.is_initialized = false;
-               current_device.offload_regions_registered = false;
                current_device.openacc.data_environ = NULL;
                current_device.openacc.target_data = NULL;
                for (i = 0; i < new_num_devices; i++)
@@ -1146,21 +1226,12 @@ gomp_target_init (void)
 
   for (i = 0; i < num_devices; i++)
     {
-      int j;
-
-      for (j = 0; j < num_offload_images; j++)
-       gomp_register_image_for_device (&devices[i], &offload_images[j]);
-
       /* The 'devices' array can be moved (by the realloc call) until we have
         found all the plugins, so registering with the OpenACC runtime (which
         takes a copy of the pointer argument) must be delayed until now.  */
       if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
        goacc_register (&devices[i]);
     }
-
-  free (offload_images);
-  offload_images = NULL;
-  num_offload_images = 0;
 }
 
 #else /* PLUGIN_SUPPORT */


Thanks,
  -- Ilya

Reply via email to