This adds necessary plumbing to spawn multiple teams.

To be reverted on this branch prior to merge.
---
 gcc/builtin-types.def                            |   7 +-
 gcc/fortran/types.def                            |   5 +-
 gcc/omp-builtins.def                             |   2 +-
 gcc/omp-low.c                                    | 149 ++++++++++++++++---
 include/gomp-constants.h                         |  21 +++
 libgomp/libgomp.h                                |  12 +-
 libgomp/libgomp_g.h                              |   3 +-
 libgomp/oacc-host.c                              |   3 +-
 libgomp/target.c                                 | 179 +++++++++++++++++------
 libgomp/task.c                                   |   3 +-
 liboffloadmic/plugin/libgomp-plugin-intelmic.cpp |   4 +-
 11 files changed, 299 insertions(+), 89 deletions(-)

diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def
index c68fb19..33bee1d 100644
--- a/gcc/builtin-types.def
+++ b/gcc/builtin-types.def
@@ -555,10 +555,9 @@ DEF_FUNCTION_TYPE_9 
(BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT,
                     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
                     BT_PTR_FN_VOID_PTR_PTR, BT_LONG, BT_LONG,
                     BT_BOOL, BT_UINT, BT_PTR, BT_INT)
-
-DEF_FUNCTION_TYPE_10 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT,
-                     BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR,
-                     BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_INT, BT_INT)
+DEF_FUNCTION_TYPE_9 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR,
+                    BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR,
+                    BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR)
 
 DEF_FUNCTION_TYPE_11 
(BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG,
                      BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
diff --git a/gcc/fortran/types.def b/gcc/fortran/types.def
index a37e856..5838f04 100644
--- a/gcc/fortran/types.def
+++ b/gcc/fortran/types.def
@@ -220,10 +220,9 @@ DEF_FUNCTION_TYPE_9 
(BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT,
                     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
                     BT_PTR_FN_VOID_PTR_PTR, BT_LONG, BT_LONG,
                     BT_BOOL, BT_UINT, BT_PTR, BT_INT)
-
-DEF_FUNCTION_TYPE_10 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT,
+DEF_FUNCTION_TYPE_9 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR,
                      BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR,
-                     BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_INT, BT_INT)
+                     BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR)
 
 DEF_FUNCTION_TYPE_11 
(BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG,
                      BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def
index 35f5014..35c2724 100644
--- a/gcc/omp-builtins.def
+++ b/gcc/omp-builtins.def
@@ -341,7 +341,7 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SINGLE_COPY_START, 
"GOMP_single_copy_start",
 DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SINGLE_COPY_END, "GOMP_single_copy_end",
                  BT_FN_VOID_PTR, ATTR_NOTHROW_LEAF_LIST)
 DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET, "GOMP_target_ext",
-                 BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT,
+                 BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR,
                  ATTR_NOTHROW_LIST)
 DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET_DATA, "GOMP_target_data_ext",
                  BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST)
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 8996b8d..2e02c6f 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -12731,6 +12731,130 @@ mark_loops_in_oacc_kernels_region (basic_block 
region_entry,
     loop->in_oacc_kernels_region = true;
 }
 
+/* Build target argument identifier from the DEVICE identifier, value
+   identifier ID and whether the element also has a SUBSEQUENT_PARAM.  */
+
+static tree
+get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
+{
+  tree t = build_int_cst (integer_type_node, device);
+  if (subseqent_param)
+    t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
+                    build_int_cst (integer_type_node,
+                                   GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
+  t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
+                  build_int_cst (integer_type_node, id));
+  return t;
+}
+
+/* Like above but return it in type that can be directly stored as an element
+   of the argument array.  */
+
+static tree
+get_target_argument_identifier (int device, bool subseqent_param, int id)
+{
+  tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
+  return fold_convert (ptr_type_node, t);
+}
+
+/* Return a target argument consisiting of DEVICE identifier, value identifier
+   ID, and the actual VALUE.  */
+
+static tree
+get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
+                          tree value)
+{
+  tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
+                       fold_convert (integer_type_node, value),
+                       build_int_cst (unsigned_type_node,
+                                      GOMP_TARGET_ARG_VALUE_SHIFT));
+  t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
+                  get_target_argument_identifier_1 (device, false, id));
+  t = fold_convert (ptr_type_node, t);
+  return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
+}
+
+/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
+   push one argument to ARGS with bot the DEVICE, ID and VALUE embeded in it,
+   otherwise push an iedntifier (with DEVICE and ID) and the VALUE in two
+   arguments.  */
+
+static void
+push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
+                                        int id, tree value, vec <tree> *args)
+{
+  if (tree_fits_shwi_p (value)
+      && tree_to_shwi (value) > -(1 << 15)
+      && tree_to_shwi (value) < (1 << 15))
+    args->quick_push (get_target_argument_value (gsi, device, id, value));
+  else
+    {
+      args->quick_push (get_target_argument_identifier (device, true, id));
+      value = fold_convert (ptr_type_node, value);
+      value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
+                                       GSI_SAME_STMT);
+      args->quick_push (value);
+    }
+}
+
+/* Create an array of arguments that is then passed to GOMP_target.   */
+
+static tree
+get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
+{
+  auto_vec <tree, 6> args;
+  tree clauses = gimple_omp_target_clauses (tgt_stmt);
+  tree t, c = find_omp_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
+  if (c)
+    t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
+  else
+    t = integer_minus_one_node;
+  push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
+                                          GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
+
+  c = find_omp_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
+  if (c)
+    t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
+  else
+    t = integer_minus_one_node;
+  push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
+                                          GOMP_TARGET_ARG_THREAD_LIMIT, t,
+                                          &args);
+
+#if 0
+  /* Add HSA-specific grid sizes, if available.  */
+  if (find_omp_clause (gimple_omp_target_clauses (tgt_stmt),
+                      OMP_CLAUSE__GRIDDIM_))
+    {
+      t = get_target_argument_identifier (GOMP_DEVICE_HSA, true,
+                                         
GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES);
+      args.quick_push (t);
+      args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
+    }
+#endif
+
+  /* Produce more, perhaps device specific, arguments here.  */
+
+  tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
+                                                         args.length () + 1),
+                                 ".omp_target_args");
+  for (unsigned i = 0; i < args.length (); i++)
+    {
+      tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
+                        build_int_cst (integer_type_node, i),
+                        NULL_TREE, NULL_TREE);
+      gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
+                        GSI_SAME_STMT);
+    }
+  tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
+                    build_int_cst (integer_type_node, args.length ()),
+                    NULL_TREE, NULL_TREE);
+  gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
+                    GSI_SAME_STMT);
+  TREE_ADDRESSABLE (argarray) = 1;
+  return build_fold_addr_expr (argarray);
+}
+
 /* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
 
 static void
@@ -13148,30 +13272,7 @@ expand_omp_target (struct omp_region *region)
        depend = build_int_cst (ptr_type_node, 0);
       args.quick_push (depend);
       if (start_ix == BUILT_IN_GOMP_TARGET)
-       {
-         c = find_omp_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
-         if (c)
-           {
-             t = fold_convert (integer_type_node,
-                               OMP_CLAUSE_NUM_TEAMS_EXPR (c));
-             t = force_gimple_operand_gsi (&gsi, t, true, NULL,
-                                           true, GSI_SAME_STMT);
-           }
-         else
-           t = integer_minus_one_node;
-         args.quick_push (t);
-         c = find_omp_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
-         if (c)
-           {
-             t = fold_convert (integer_type_node,
-                               OMP_CLAUSE_THREAD_LIMIT_EXPR (c));
-             t = force_gimple_operand_gsi (&gsi, t, true, NULL,
-                                           true, GSI_SAME_STMT);
-           }
-         else
-           t = integer_minus_one_node;
-         args.quick_push (t);
-       }
+       args.quick_push (get_target_arguments (&gsi, entry_stmt));
       break;
     case BUILT_IN_GOACC_PARALLEL:
       {
diff --git a/include/gomp-constants.h b/include/gomp-constants.h
index dffd631..fef27e4 100644
--- a/include/gomp-constants.h
+++ b/include/gomp-constants.h
@@ -228,4 +228,25 @@ enum gomp_map_kind
 #define GOMP_LAUNCH_OP(X) (((X) >> GOMP_LAUNCH_OP_SHIFT) & 0xffff)
 #define GOMP_LAUNCH_OP_MAX 0xffff
 
+/* Bitmask to apply in order to find out the intended device of a target
+   argument.  */
+#define GOMP_TARGET_ARG_DEVICE_MASK            ((1 << 7) - 1)
+/* The target argument is significant for all devices.  */
+#define GOMP_TARGET_ARG_DEVICE_ALL             0
+
+/* Flag set when the subsequent element in the device-specific argument
+   values.  */
+#define GOMP_TARGET_ARG_SUBSEQUENT_PARAM       (1 << 7)
+
+/* Bitmask to apply to a target argument to find out the value identifier.  */
+#define GOMP_TARGET_ARG_ID_MASK                        (((1 << 8) - 1) << 8)
+/* Target argument index of NUM_TEAMS.  */
+#define GOMP_TARGET_ARG_NUM_TEAMS              (1 << 8)
+/* Target argument index of THREAD_LIMIT.  */
+#define GOMP_TARGET_ARG_THREAD_LIMIT           (2 << 8)
+
+/* If the value is directly embeded in target argument, it should be a 16-bit
+   at most and shifted by this many bits.  */
+#define GOMP_TARGET_ARG_VALUE_SHIFT            16
+
 #endif
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 31ffba0..1d137f1 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -499,6 +499,10 @@ struct gomp_target_task
   struct target_mem_desc *tgt;
   struct gomp_task *task;
   struct gomp_team *team;
+  /* Copies of firstprivate mapped data for shared memory accelerators.  */
+  void *firstprivate_copies;
+  /* Device-specific target arguments.  */
+  void **args;
   void *hostaddrs[];
 };
 
@@ -765,7 +769,8 @@ extern void gomp_task_maybe_wait_for_dependencies (void **);
 extern bool gomp_create_target_task (struct gomp_device_descr *,
                                     void (*) (void *), size_t, void **,
                                     size_t *, unsigned short *, unsigned int,
-                                    void **, enum gomp_target_task_state);
+                                    void **, void **,
+                                    enum gomp_target_task_state);
 
 static void inline
 gomp_finish_task (struct gomp_task *task)
@@ -939,8 +944,9 @@ struct gomp_device_descr
   void *(*dev2host_func) (int, void *, const void *, size_t);
   void *(*host2dev_func) (int, void *, const void *, size_t);
   void *(*dev2dev_func) (int, void *, const void *, size_t);
-  void (*run_func) (int, void *, void *);
-  void (*async_run_func) (int, void *, void *, void *);
+  bool (*can_run_func) (void *);
+  void (*run_func) (int, void *, void *, void **);
+  void (*async_run_func) (int, void *, void *, void **, void *);
 
   /* Splay tree containing information about mapped memory regions.  */
   struct splay_tree_s mem_map;
diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h
index c238e6a..9c90d59 100644
--- a/libgomp/libgomp_g.h
+++ b/libgomp/libgomp_g.h
@@ -278,8 +278,7 @@ extern void GOMP_single_copy_end (void *);
 extern void GOMP_target (int, void (*) (void *), const void *,
                         size_t, void **, size_t *, unsigned char *);
 extern void GOMP_target_ext (int, void (*) (void *), size_t, void **, size_t *,
-                            unsigned short *, unsigned int, void **,
-                            int, int);
+                            unsigned short *, unsigned int, void **, void **);
 extern void GOMP_target_data (int, const void *,
                              size_t, void **, size_t *, unsigned char *);
 extern void GOMP_target_data_ext (int, size_t, void **, size_t *,
diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
index 9874804..a769211 100644
--- a/libgomp/oacc-host.c
+++ b/libgomp/oacc-host.c
@@ -123,7 +123,8 @@ host_host2dev (int n __attribute__ ((unused)),
 }
 
 static void
-host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars)
+host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars,
+         void **args __attribute__((unused)))
 {
   void (*fn)(void *) = (void (*)(void *)) fn_ptr;
 
diff --git a/libgomp/target.c b/libgomp/target.c
index cf9d0e6..f990a9e 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -1261,15 +1261,38 @@ gomp_target_fallback (void (*fn) (void *), void 
**hostaddrs)
   *thr = old_thr;
 }
 
-/* Host fallback with firstprivate map-type handling.  */
+/* Calculate alignment and size requirements of a private copy of data shared
+   as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE.  */
 
-static void
-gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum,
-                                  void **hostaddrs, size_t *sizes,
-                                  unsigned short *kinds)
+static inline void
+calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
+                                    unsigned short *kinds, size_t *tgt_align,
+                                    size_t *tgt_size)
 {
-  size_t i, tgt_align = 0, tgt_size = 0;
-  char *tgt = NULL;
+  size_t i;
+  for (i = 0; i < mapnum; i++)
+    if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
+      {
+       size_t align = (size_t) 1 << (kinds[i] >> 8);
+       if (*tgt_align < align)
+         *tgt_align = align;
+       *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
+       *tgt_size += sizes[i];
+      }
+}
+
+/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST.  */
+
+static inline void
+copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
+                       size_t *sizes, unsigned short *kinds, size_t tgt_align,
+                       size_t tgt_size)
+{
+  uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
+  if (al)
+    tgt += tgt_align - al;
+  tgt_size = 0;
+  size_t i;
   for (i = 0; i < mapnum; i++)
     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
       {
@@ -1277,28 +1300,53 @@ gomp_target_fallback_firstprivate (void (*fn) (void *), 
size_t mapnum,
        if (tgt_align < align)
          tgt_align = align;
        tgt_size = (tgt_size + align - 1) & ~(align - 1);
-       tgt_size += sizes[i];
+       memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
+       hostaddrs[i] = tgt + tgt_size;
+       tgt_size = tgt_size + sizes[i];
       }
+}
+
+/* Host fallback with firstprivate map-type handling.  */
+
+static void
+gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum,
+                                  void **hostaddrs, size_t *sizes,
+                                  unsigned short *kinds)
+{
+  size_t tgt_align = 0, tgt_size = 0;
+  calculate_firstprivate_requirements (mapnum, sizes, kinds, &tgt_align,
+                                      &tgt_size);
   if (tgt_align)
     {
-      tgt = gomp_alloca (tgt_size + tgt_align - 1);
-      uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
-      if (al)
-       tgt += tgt_align - al;
-      tgt_size = 0;
-      for (i = 0; i < mapnum; i++)
-       if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
-         {
-           size_t align = (size_t) 1 << (kinds[i] >> 8);
-           tgt_size = (tgt_size + align - 1) & ~(align - 1);
-           memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
-           hostaddrs[i] = tgt + tgt_size;
-           tgt_size = tgt_size + sizes[i];
-         }
+      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
+      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, tgt_align,
+                             tgt_size);
     }
   gomp_target_fallback (fn, hostaddrs);
 }
 
+/* Handle firstprivate map-type for shared memory devices and the host
+   fallback.  Return the pointer of firstprivate copies which has to be freed
+   after use.  */
+
+static void *
+gomp_target_unshare_firstprivate (size_t mapnum, void **hostaddrs,
+                                 size_t *sizes, unsigned short *kinds)
+{
+  size_t tgt_align = 0, tgt_size = 0;
+  char *tgt = NULL;
+
+  calculate_firstprivate_requirements (mapnum, sizes, kinds, &tgt_align,
+                                      &tgt_size);
+  if (tgt_align)
+    {
+      tgt = gomp_malloc (tgt_size + tgt_align - 1);
+      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, tgt_align,
+                             tgt_size);
+    }
+  return tgt;
+}
+
 /* Helper function of GOMP_target{,_ext} routines.  */
 
 static void *
@@ -1348,7 +1396,8 @@ GOMP_target (int device, void (*fn) (void *), const void 
*unused,
   struct target_mem_desc *tgt_vars
     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
                     GOMP_MAP_VARS_TARGET);
-  devicep->run_func (devicep->target_id, fn_addr, (void *) 
tgt_vars->tgt_start);
+  devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
+                    NULL);
   gomp_unmap_vars (tgt_vars, true);
 }
 
@@ -1356,6 +1405,15 @@ GOMP_target (int device, void (*fn) (void *), const void 
*unused,
    and several arguments have been added:
    FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
    DEPEND is array of dependencies, see GOMP_task for details.
+
+   ARGS is a pointer to an array consisting of a variable number of both
+   device-independent and device-specific arguments, which can take one two
+   elements where the first specifies for which device it is intended, the type
+   and optionally also the value.  If the value is not present in the first
+   one, the whole second element the actual value.  The last element of the
+   array is a single NULL.  Among the device independent can be for example
+   NUM_TEAMS and THREAD_LIMIT.
+
    NUM_TEAMS is positive if GOMP_teams will be called in the body with
    that value, or 1 if teams construct is not present, or 0, if
    teams construct does not have num_teams clause and so the choice is
@@ -1369,14 +1427,10 @@ GOMP_target (int device, void (*fn) (void *), const 
void *unused,
 void
 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
                 void **hostaddrs, size_t *sizes, unsigned short *kinds,
-                unsigned int flags, void **depend, int num_teams,
-                int thread_limit)
+                unsigned int flags, void **depend, void **args)
 {
   struct gomp_device_descr *devicep = resolve_device (device);
 
-  (void) num_teams;
-  (void) thread_limit;
-
   if (flags & GOMP_TARGET_FLAG_NOWAIT)
     {
       struct gomp_thread *thr = gomp_thread ();
@@ -1413,7 +1467,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t 
mapnum,
          && !thr->task->final_task)
        {
          gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
-                                  sizes, kinds, flags, depend,
+                                  sizes, kinds, flags, depend, args,
                                   GOMP_TARGET_TASK_BEFORE_MAP);
          return;
        }
@@ -1430,20 +1484,33 @@ GOMP_target_ext (int device, void (*fn) (void *), 
size_t mapnum,
        gomp_task_maybe_wait_for_dependencies (depend);
     }
 
+  void *fn_addr;
   if (devicep == NULL
-      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+      || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
+      || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
     {
       gomp_target_fallback_firstprivate (fn, mapnum, hostaddrs, sizes, kinds);
       return;
     }
 
-  void *fn_addr = gomp_get_target_fn_addr (devicep, fn);
-
-  struct target_mem_desc *tgt_vars
-    = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
-                    GOMP_MAP_VARS_TARGET);
-  devicep->run_func (devicep->target_id, fn_addr, (void *) 
tgt_vars->tgt_start);
-  gomp_unmap_vars (tgt_vars, true);
+  struct target_mem_desc *tgt_vars;
+  void *fpc = NULL;
+  if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    {
+      fpc = gomp_target_unshare_firstprivate (mapnum, hostaddrs, sizes, kinds);
+      tgt_vars = NULL;
+    }
+  else
+    tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
+                             true, GOMP_MAP_VARS_TARGET);
+  devicep->run_func (devicep->target_id, fn_addr,
+                    tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
+                    args);
+  if (tgt_vars)
+    gomp_unmap_vars (tgt_vars, true);
+  else
+    free (fpc);
 }
 
 /* Host fallback for GOMP_target_data{,_ext} routines.  */
@@ -1552,7 +1619,7 @@ GOMP_target_update_ext (int device, size_t mapnum, void 
**hostaddrs,
              if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
                                           mapnum, hostaddrs, sizes, kinds,
                                           flags | GOMP_TARGET_FLAG_UPDATE,
-                                          depend, GOMP_TARGET_TASK_DATA))
+                                          depend, NULL, GOMP_TARGET_TASK_DATA))
                return;
            }
          else
@@ -1673,7 +1740,7 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, 
void **hostaddrs,
            {
              if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
                                           mapnum, hostaddrs, sizes, kinds,
-                                          flags, depend,
+                                          flags, depend, NULL,
                                           GOMP_TARGET_TASK_DATA))
                return;
            }
@@ -1729,8 +1796,11 @@ gomp_target_task_fn (void *data)
 
   if (ttask->fn != NULL)
     {
+      void *fn_addr;
       if (devicep == NULL
-         || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+         || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+         || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
+         || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
        {
          ttask->state = GOMP_TARGET_TASK_FALLBACK;
          gomp_target_fallback_firstprivate (ttask->fn, ttask->mapnum,
@@ -1745,19 +1815,31 @@ gomp_target_task_fn (void *data)
          return false;
        }
 
-      void *fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn);
-      ttask->tgt
-       = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, NULL,
-                        ttask->sizes, ttask->kinds, true,
-                        GOMP_MAP_VARS_TARGET);
+      void *actual_arguments;
+      if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+       {
+         ttask->tgt = NULL;
+         ttask->firstprivate_copies
+           = gomp_target_unshare_firstprivate (ttask->mapnum, ttask->hostaddrs,
+                                               ttask->sizes, ttask->kinds);
+         actual_arguments = ttask->hostaddrs;
+       }
+      else
+       {
+         ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
+                                     NULL, ttask->sizes, ttask->kinds, true,
+                                     GOMP_MAP_VARS_TARGET);
+         actual_arguments = (void *) ttask->tgt->tgt_start;
+       }
       ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
 
-      devicep->async_run_func (devicep->target_id, fn_addr,
-                              (void *) ttask->tgt->tgt_start, (void *) ttask);
+      devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
+                              ttask->args, (void *) ttask);
       return true;
     }
   else if (devicep == NULL
-          || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+          || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+          || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
     return false;
 
   size_t i;
@@ -2225,6 +2307,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr 
*device,
     {
       DLSYM (run);
       DLSYM (async_run);
+      DLSYM_OPT (can_run, can_run);
       DLSYM (dev2dev);
     }
   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
diff --git a/libgomp/task.c b/libgomp/task.c
index 620facd..f3b05e5 100644
--- a/libgomp/task.c
+++ b/libgomp/task.c
@@ -593,7 +593,7 @@ bool
 gomp_create_target_task (struct gomp_device_descr *devicep,
                         void (*fn) (void *), size_t mapnum, void **hostaddrs,
                         size_t *sizes, unsigned short *kinds,
-                        unsigned int flags, void **depend,
+                        unsigned int flags, void **depend, void **args,
                         enum gomp_target_task_state state)
 {
   struct gomp_thread *thr = gomp_thread ();
@@ -653,6 +653,7 @@ gomp_create_target_task (struct gomp_device_descr *devicep,
   ttask->devicep = devicep;
   ttask->fn = fn;
   ttask->mapnum = mapnum;
+  ttask->args = args;
   memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
   ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
   memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
diff --git a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp 
b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
index f8c1725..48599dd 100644
--- a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
+++ b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
@@ -539,7 +539,7 @@ GOMP_OFFLOAD_dev2dev (int device, void *dst_ptr, const void 
*src_ptr,
 
 extern "C" void
 GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
-                       void *async_data)
+                       void **, void *async_data)
 {
   TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p, async_data = %p)", device,
         tgt_fn, tgt_vars, async_data);
@@ -555,7 +555,7 @@ GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void 
*tgt_vars,
 }
 
 extern "C" void
-GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars)
+GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars, void **)
 {
   TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p)", device, tgt_fn, 
tgt_vars);
 

Reply via email to