On Sat, Jun 20, 2015 at 00:35:14 +0300, Ilya Verbin wrote:
> Given that a mapped variable in 4.1 can have different kinds across nested 
> data
> regions, we need to store map-type not only for each var, but also for each
> structured mapping.  Here is my WIP patch, is it sane? :)
> Attached testcase works OK on the device with non-shared memory.

A bit updated version with a fix for GOMP_MAP_TO_PSET.
make check-target-libgomp passed.


include/gcc/
        * gomp-constants.h (GOMP_MAP_ALWAYS_TO_P,
        GOMP_MAP_ALWAYS_FROM_P): Define.
libgomp/
        * libgomp.h (struct target_var_desc): New.
        (struct target_mem_desc): Replace array of splay_tree_key with array of
        target_var_desc.
        (struct splay_tree_key_s): Move copy_from to target_var_desc.
        * oacc-mem.c (gomp_acc_remove_pointer): Use copy_from from
        target_var_desc.
        * oacc-parallel.c (GOACC_parallel): Use copy_from from target_var_desc.
        * target.c (gomp_map_vars_existing): Copy data to device if map-type is
        'always to' or 'always tofrom'.
        (gomp_map_vars): Use key from target_var_desc.  Set copy_from and
        always_copy_from.
        (gomp_copy_from_async): Use key and copy_from from target_var_desc.
        (gomp_unmap_vars): Copy data from device if always_copy_from is set.
        (gomp_offload_image_to_device): Do not use copy_from.
        * testsuite/libgomp.c/target-11.c: New test.


diff --git a/include/gomp-constants.h b/include/gomp-constants.h
index 1849478..42bec04 100644
--- a/include/gomp-constants.h
+++ b/include/gomp-constants.h
@@ -107,6 +107,12 @@ enum gomp_map_kind
 #define GOMP_MAP_POINTER_P(X) \
   ((X) == GOMP_MAP_POINTER)
 
+#define GOMP_MAP_ALWAYS_TO_P(X) \
+  (((X) == GOMP_MAP_ALWAYS_TO) || ((X) == GOMP_MAP_ALWAYS_TOFROM))
+
+#define GOMP_MAP_ALWAYS_FROM_P(X) \
+  (((X) == GOMP_MAP_ALWAYS_FROM) || ((X) == GOMP_MAP_ALWAYS_TOFROM))
+
 
 /* Asynchronous behavior.  Keep in sync with
    libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_async_t.  */
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 87d6c40..8e6d4ac 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -636,6 +636,15 @@ typedef struct splay_tree_node_s *splay_tree_node;
 typedef struct splay_tree_s *splay_tree;
 typedef struct splay_tree_key_s *splay_tree_key;
 
+struct target_var_desc {
+  /* Splay key.  */
+  splay_tree_key key;
+  /* True if data should be copied from device to host at the end.  */
+  bool copy_from;
+  /* True if data always should be copied from device to host at the end.  */
+  bool always_copy_from;
+};
+
 struct target_mem_desc {
   /* Reference count.  */
   uintptr_t refcount;
@@ -655,9 +664,9 @@ struct target_mem_desc {
   /* Corresponding target device descriptor.  */
   struct gomp_device_descr *device_descr;
 
-  /* List of splay keys to remove (or decrease refcount)
+  /* List of target items to remove (or decrease refcount)
      at the end of region.  */
-  splay_tree_key list[];
+  struct target_var_desc list[];
 };
 
 struct splay_tree_key_s {
@@ -673,8 +682,6 @@ struct splay_tree_key_s {
   uintptr_t refcount;
   /* Asynchronous reference count.  */
   uintptr_t async_refcount;
-  /* True if data should be copied from device to host at the end.  */
-  bool copy_from;
 };
 
 #include "splay-tree.h"
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 90d43eb..c0fcb07 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -651,7 +651,7 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int 
async, int mapnum)
     }
 
   if (force_copyfrom)
-    t->list[0]->copy_from = 1;
+    t->list[0].copy_from = 1;
 
   gomp_mutex_unlock (&acc_dev->lock);
 
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index d899946..8ea3dd1 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -135,8 +135,8 @@ GOACC_parallel (int device, void (*fn) (void *),
 
   devaddrs = gomp_alloca (sizeof (void *) * mapnum);
   for (i = 0; i < mapnum; i++)
-    devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
-                           + tgt->list[i]->tgt_offset);
+    devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
+                           + tgt->list[i].key->tgt_offset);
 
   acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, 
kinds,
                              num_gangs, num_workers, vector_length, async,
diff --git a/libgomp/target.c b/libgomp/target.c
index fb8487a..b1640c1 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -161,6 +161,12 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, 
splay_tree_key oldn,
                  (void *) newn->host_start, (void *) newn->host_end,
                  (void *) oldn->host_start, (void *) oldn->host_end);
     }
+
+  if (GOMP_MAP_ALWAYS_TO_P (kind))
+    devicep->host2dev_func (devicep->target_id,
+                           (void *) (oldn->tgt->tgt_start + oldn->tgt_offset),
+                           (void *) newn->host_start,
+                           newn->host_end - newn->host_start);
   oldn->refcount++;
 }
 
@@ -260,7 +266,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
       int kind = get_kind (short_mapkind, kinds, i);
       if (hostaddrs[i] == NULL)
        {
-         tgt->list[i] = NULL;
+         tgt->list[i].key = NULL;
          continue;
        }
       cur_node.host_start = (uintptr_t) hostaddrs[i];
@@ -271,12 +277,15 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
       splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
       if (n)
        {
-         tgt->list[i] = n;
+         tgt->list[i].key = n;
+         tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
+         tgt->list[i].always_copy_from
+           = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
          gomp_map_vars_existing (devicep, n, &cur_node, kind & typemask);
        }
       else
        {
-         tgt->list[i] = NULL;
+         tgt->list[i].key = NULL;
 
          size_t align = (size_t) 1 << (kind >> rshift);
          not_found_cnt++;
@@ -297,7 +306,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
                  break;
                else
                  {
-                   tgt->list[j] = NULL;
+                   tgt->list[j].key = NULL;
                    i++;
                  }
            }
@@ -345,7 +354,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
       size_t j;
 
       for (i = 0; i < mapnum; i++)
-       if (tgt->list[i] == NULL)
+       if (tgt->list[i].key == NULL)
          {
            int kind = get_kind (short_mapkind, kinds, i);
            if (hostaddrs[i] == NULL)
@@ -359,18 +368,23 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
            splay_tree_key n = splay_tree_lookup (mem_map, k);
            if (n)
              {
-               tgt->list[i] = n;
+               tgt->list[i].key = n;
+               tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
+               tgt->list[i].always_copy_from
+                 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
                gomp_map_vars_existing (devicep, n, k, kind & typemask);
              }
            else
              {
                size_t align = (size_t) 1 << (kind >> rshift);
-               tgt->list[i] = k;
+               tgt->list[i].key = k;
                tgt_size = (tgt_size + align - 1) & ~(align - 1);
                k->tgt = tgt;
                k->tgt_offset = tgt_size;
                tgt_size += k->host_end - k->host_start;
-               k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
+               tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
+               tgt->list[i].always_copy_from
+                 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
                k->refcount = 1;
                k->async_refcount = 0;
                tgt->refcount++;
@@ -388,6 +402,8 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
                  case GOMP_MAP_TOFROM:
                  case GOMP_MAP_FORCE_TO:
                  case GOMP_MAP_FORCE_TOFROM:
+                 case GOMP_MAP_ALWAYS_TO:
+                 case GOMP_MAP_ALWAYS_TOFROM:
                    /* FIXME: Perhaps add some smarts, like if copying
                       several adjacent fields from host to target, use some
                       host buffer to avoid sending each var individually.  */
@@ -420,7 +436,9 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
                        break;
                      else
                        {
-                         tgt->list[j] = k;
+                         tgt->list[j].key = k;
+                         tgt->list[j].copy_from = false;
+                         tgt->list[j].always_copy_from = false;
                          k->refcount++;
                          gomp_map_pointer (tgt,
                                            (uintptr_t) *(void **) hostaddrs[j],
@@ -472,11 +490,11 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
     {
       for (i = 0; i < mapnum; i++)
        {
-         if (tgt->list[i] == NULL)
+         if (tgt->list[i].key == NULL)
            cur_node.tgt_offset = (uintptr_t) NULL;
          else
-           cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
-                                 + tgt->list[i]->tgt_offset;
+           cur_node.tgt_offset = tgt->list[i].key->tgt->tgt_start
+                                 + tgt->list[i].key->tgt_offset;
          /* FIXME: see above FIXME comment.  */
          devicep->host2dev_func (devicep->target_id,
                                  (void *) (tgt->tgt_start
@@ -516,17 +534,17 @@ gomp_copy_from_async (struct target_mem_desc *tgt)
   gomp_mutex_lock (&devicep->lock);
 
   for (i = 0; i < tgt->list_count; i++)
-    if (tgt->list[i] == NULL)
+    if (tgt->list[i].key == NULL)
       ;
-    else if (tgt->list[i]->refcount > 1)
+    else if (tgt->list[i].key->refcount > 1)
       {
-       tgt->list[i]->refcount--;
-       tgt->list[i]->async_refcount++;
+       tgt->list[i].key->refcount--;
+       tgt->list[i].key->async_refcount++;
       }
     else
       {
-       splay_tree_key k = tgt->list[i];
-       if (k->copy_from)
+       splay_tree_key k = tgt->list[i].key;
+       if (tgt->list[i].copy_from)
          devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
                                  (void *) (k->tgt->tgt_start + k->tgt_offset),
                                  k->host_end - k->host_start);
@@ -554,25 +572,33 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool 
do_copyfrom)
 
   size_t i;
   for (i = 0; i < tgt->list_count; i++)
-    if (tgt->list[i] == NULL)
-      ;
-    else if (tgt->list[i]->refcount > 1)
-      tgt->list[i]->refcount--;
-    else if (tgt->list[i]->async_refcount > 0)
-      tgt->list[i]->async_refcount--;
-    else
-      {
-       splay_tree_key k = tgt->list[i];
-       if (k->copy_from && do_copyfrom)
-         devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
-                                 (void *) (k->tgt->tgt_start + k->tgt_offset),
-                                 k->host_end - k->host_start);
-       splay_tree_remove (&devicep->mem_map, k);
-       if (k->tgt->refcount > 1)
-         k->tgt->refcount--;
-       else
-         gomp_unmap_tgt (k->tgt);
-      }
+    {
+      splay_tree_key k = tgt->list[i].key;
+      if (k == NULL)
+       continue;
+
+      bool do_unmap = false;
+      if (k->refcount > 1)
+       k->refcount--;
+      else if (k->async_refcount > 0)
+       k->async_refcount--;
+      else
+       do_unmap = true;
+
+      if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
+         || tgt->list[i].always_copy_from)
+       devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
+                               (void *) (k->tgt->tgt_start + k->tgt_offset),
+                               k->host_end - k->host_start);
+      if (do_unmap)
+       {
+         splay_tree_remove (&devicep->mem_map, k);
+         if (k->tgt->refcount > 1)
+           k->tgt->refcount--;
+         else
+           gomp_unmap_tgt (k->tgt);
+       }
+    }
 
   if (tgt->refcount > 1)
     tgt->refcount--;
@@ -699,7 +725,6 @@ gomp_offload_image_to_device (struct gomp_device_descr 
*devicep,
       k->tgt_offset = target_table[i].start;
       k->refcount = 1;
       k->async_refcount = 0;
-      k->copy_from = false;
       array->left = NULL;
       array->right = NULL;
       splay_tree_insert (&devicep->mem_map, array);
@@ -725,7 +750,6 @@ gomp_offload_image_to_device (struct gomp_device_descr 
*devicep,
       k->tgt_offset = target_var->start;
       k->refcount = 1;
       k->async_refcount = 0;
-      k->copy_from = false;
       array->left = NULL;
       array->right = NULL;
       splay_tree_insert (&devicep->mem_map, array);
diff --git a/libgomp/testsuite/libgomp.c/target-11.c 
b/libgomp/testsuite/libgomp.c/target-11.c
new file mode 100644
index 0000000..4562d88
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-11.c
@@ -0,0 +1,51 @@
+/* { dg-require-effective-target offload_device } */
+
+#include <assert.h>
+
+int main ()
+{
+  int aa = 0, bb = 0, cc = 0, dd = 0;
+
+  #pragma omp target data map(tofrom: aa) map(to: bb) map(from: cc, dd)
+    {
+      int ok;
+      aa = bb = cc = 1;
+
+      /* Set dd on target to 0 for the further check.  */
+      #pragma omp target map(always to: dd)
+       { dd; }
+
+      dd = 1;
+      #pragma omp target map(tofrom: aa) map(always to: bb) \
+       map(always from: cc) map(to: dd) map(from: ok)
+       {
+         /* bb is always to, aa and dd are not.  */
+         ok = (aa == 0) && (bb == 1) && (dd == 0);
+         aa = bb = cc = dd = 2;
+       }
+
+      assert (ok);
+      assert (aa == 1);
+      assert (bb == 1);
+      assert (cc == 2); /* cc is always from.  */
+      assert (dd == 1);
+
+      dd = 3;
+      #pragma omp target map(from: cc) map(always to: dd) map(from: ok)
+       {
+         ok = (dd == 3); /* dd is always to.  */
+         cc = dd = 4;
+       }
+
+      assert (ok);
+      assert (cc == 2);
+      assert (dd == 3);
+    }
+
+  assert (aa == 2);
+  assert (bb == 1);
+  assert (cc == 4);
+  assert (dd == 4);
+
+  return 0;
+}


  -- Ilya

Reply via email to