Hi!

This patch finishes the C #pragma omp taskloop support on the gomp 4.1
branch, including library support.

2015-05-21  Jakub Jelinek  <ja...@redhat.com>

        * tree.h (OMP_STANDALONE_CLAUSES): Adjust to cover
        OMP_TARGET_{ENTER,EXIT}_DATA.
        (OMP_CLAUSE_SHARED_FIRSTPRIVATE): Define.
        * gimplify.c (gimplify_scan_omp_clauses): Add lastprivate
        clause to outer taskloop if needed.
        (gimplify_omp_for): Fix a typo.  Fixup OMP_TASKLOOP
        gimplification.
        * omp-low.c (omp_copy_decl_2): If var is TREE_ADDRESSABLE
        listed in task_shared_vars, clear TREE_ADDRESSABLE on the
        copy.
        (build_outer_var_ref): Add lastprivate argument, pass it through
        recursively.  Handle lastprivate on taskloop construct.
        (install_var_field): Allow multiple fields for a single
        decl - one for firstprivate, another for shared clauses
        on task.
        (scan_sharing_clauses): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE.
        (add_taskreg_looptemp_clauses): Add one more _looptemp_ clause
        for taskloop GIMPLE_OMP_TASK, if it is collapse > 1 with
        non-constant iteration count and there is lastprivate clause
        on the inner GIMPLE_OMP_FOR.
        (finish_taskreg_scan): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE.
        (lower_rec_input_clauses): Likewise.  Ignore all
        OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE clauses on taskloop construct.
        (lower_lastprivate_clauses): For OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE
        on taskloop lookup decl in outer context.  Pass true
        to build_outer_var_ref lastprivate argument.
        (lower_send_clauses): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE.
        (lower_send_shared_vars): Ignore fields with NULL or
        FIELD_DECL abstract origin.
        (expand_task_call): Use GOMP_TASK_* defines instead of
        hardcoded integers.
        (expand_omp_simd): Handle addressable fd->loop.v.
        (expand_omp_taskloop_for_outer): Initialize the last
        _looptemp_ with total iteration count if needed.
        (expand_omp_taskloop_for_inner): Handle bias and broken_loop.
        (lower_omp_for_lastprivate): Use last _looptemp_ clause
        on taskloop for comparison.
        (create_task_copyfn): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE.
gcc/c-family/
        * c-omp.c (c_finish_omp_for): Clear DECL_INITIAL.
gcc/testsuite/
        * gcc.dg/gomp/taskloop-1.c: New test.
include/
        * gomp-constants.h (GOMP_TASK_FLAG_UNTIED, GOMP_TASK_FLAG_FINAL,
        GOMP_TASK_FLAG_MERGEABLE, GOMP_TASK_FLAG_DEPEND, GOMP_TASK_FLAG_UP,
        GOMP_TASK_FLAG_GRAINSIZE, GOMP_TASK_FLAG_IF, GOMP_TASK_FLAG_NOGROUP):
        Define.
libgomp/
        * libgomp.map (GOMP_4.1): Export GOMP_taskloop and GOMP_taskloop_ull.
        * task.c: Include gomp-constants.h.  Include taskloop.c twice
        with appropriate macros.
        (GOMP_task): Use GOMP_TASK_FLAG_* defines instead of hardcoded
        constants.
        * taskloop.c: New file.
        * testsuite/libgomp.c/for-4.c: New test.
        * testsuite/libgomp.c/taskloop-1.c: New test.
        * testsuite/libgomp.c/taskloop-2.c: New test.
        * testsuite/libgomp.c/taskloop-3.c: New test.

--- gcc/tree.h.jj       2015-05-19 18:56:50.982256719 +0200
+++ gcc/tree.h  2015-05-19 19:04:52.496759752 +0200
@@ -1206,7 +1206,7 @@ extern void protected_set_expr_location
 
 /* Generic accessors for OMP nodes that keep clauses as operand 0.  */
 #define OMP_STANDALONE_CLAUSES(NODE) \
-  TREE_OPERAND (TREE_RANGE_CHECK (NODE, OACC_CACHE, OMP_TARGET_UPDATE), 0)
+  TREE_OPERAND (TREE_RANGE_CHECK (NODE, OACC_CACHE, OMP_TARGET_EXIT_DATA), 0)
 
 #define OACC_PARALLEL_BODY(NODE) \
   TREE_OPERAND (OACC_PARALLEL_CHECK (NODE), 0)
@@ -1366,6 +1366,12 @@ extern void protected_set_expr_location
 #define OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ(NODE) \
   (OMP_CLAUSE_CHECK (NODE))->omp_clause.gimple_reduction_init
 
+/* True on a SHARED clause if a FIRSTPRIVATE clause for the same
+   decl is present in the chain (this can happen only for taskloop
+   with FIRSTPRIVATE/LASTPRIVATE on it originally.  */
+#define OMP_CLAUSE_SHARED_FIRSTPRIVATE(NODE) \
+  (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_SHARED)->base.public_flag)
+
 #define OMP_CLAUSE_FINAL_EXPR(NODE) \
   OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_FINAL), 0)
 #define OMP_CLAUSE_IF_EXPR(NODE) \
--- gcc/gimplify.c.jj   2015-05-19 19:02:52.230632257 +0200
+++ gcc/gimplify.c      2015-05-20 19:07:01.317440243 +0200
@@ -6167,6 +6167,12 @@ gimplify_scan_omp_clauses (tree *list_p,
                                         (splay_tree_key) decl) == NULL)
            omp_add_variable (outer_ctx, decl, GOVD_SHARED | GOVD_SEEN);
          else if (outer_ctx
+                  && (outer_ctx->region_type & ORT_TASK) != 0
+                  && outer_ctx->combined_loop
+                  && splay_tree_lookup (outer_ctx->variables,
+                                        (splay_tree_key) decl) == NULL)
+           omp_add_variable (outer_ctx, decl, GOVD_LASTPRIVATE | GOVD_SEEN);
+         else if (outer_ctx
                   && outer_ctx->region_type == ORT_WORKSHARE
                   && outer_ctx->combined_loop
                   && splay_tree_lookup (outer_ctx->variables,
@@ -6227,6 +6233,10 @@ gimplify_scan_omp_clauses (tree *list_p,
                      else if (omp_check_private (octx, decl, false))
                        break;
                    }
+                 else if (octx
+                          && (octx->region_type & ORT_TASK) != 0
+                          && octx->combined_loop)
+                   ;
                  else
                    break;
                  gcc_checking_assert (splay_tree_lookup (octx->variables,
@@ -7061,7 +7071,7 @@ gimplify_omp_for (tree *expr_p, gimple_s
 
   /* Set OMP_CLAUSE_LINEAR_NO_COPYIN flag on explicit linear
      clause for the IV.  */
-  if (org == ORT_SIMD && TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)) == 1)
+  if (ort == ORT_SIMD && TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)) == 1)
     {
       t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), 0);
       gcc_assert (TREE_CODE (t) == MODIFY_EXPR);
@@ -7075,7 +7085,8 @@ gimplify_omp_for (tree *expr_p, gimple_s
          }
     }
 
-  gimplify_scan_omp_clauses (&OMP_FOR_CLAUSES (for_stmt), pre_p, ort);
+  if (TREE_CODE (for_stmt) != OMP_TASKLOOP)
+    gimplify_scan_omp_clauses (&OMP_FOR_CLAUSES (for_stmt), pre_p, ort);
   if (TREE_CODE (for_stmt) == OMP_DISTRIBUTE)
     gimplify_omp_ctxp->distribute = true;
 
@@ -7113,9 +7124,69 @@ gimplify_omp_for (tree *expr_p, gimple_s
       for_stmt = walk_tree (&OMP_FOR_BODY (for_stmt), find_combined_omp_for,
                            NULL, NULL);
       gcc_assert (for_stmt != NULL_TREE);
-      gimplify_omp_ctxp->combined_loop = true;
     }
 
+  /* For taskloop, need to gimplify the start, end and step before the
+     taskloop, outside of the taskloop omp context.  */
+  if (TREE_CODE (orig_for_stmt) == OMP_TASKLOOP)
+    {
+      for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)); i++)
+       {
+         t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), i);
+         if (!is_gimple_constant (TREE_OPERAND (t, 1)))
+           {
+             TREE_OPERAND (t, 1)
+               = get_initialized_tmp_var (TREE_OPERAND (t, 1),
+                                          pre_p, NULL);
+             tree c = build_omp_clause (input_location,
+                                        OMP_CLAUSE_FIRSTPRIVATE);
+             OMP_CLAUSE_DECL (c) = TREE_OPERAND (t, 1);
+             OMP_CLAUSE_CHAIN (c) = OMP_FOR_CLAUSES (orig_for_stmt);
+             OMP_FOR_CLAUSES (orig_for_stmt) = c;
+           }
+
+         /* Handle OMP_FOR_COND.  */
+         t = TREE_VEC_ELT (OMP_FOR_COND (for_stmt), i);
+         if (!is_gimple_constant (TREE_OPERAND (t, 1)))
+           {
+             TREE_OPERAND (t, 1)
+               = get_initialized_tmp_var (TREE_OPERAND (t, 1),
+                                          pre_p, NULL);
+             tree c = build_omp_clause (input_location,
+                                        OMP_CLAUSE_FIRSTPRIVATE);
+             OMP_CLAUSE_DECL (c) = TREE_OPERAND (t, 1);
+             OMP_CLAUSE_CHAIN (c) = OMP_FOR_CLAUSES (orig_for_stmt);
+             OMP_FOR_CLAUSES (orig_for_stmt) = c;
+           }
+
+         /* Handle OMP_FOR_INCR.  */
+         t = TREE_VEC_ELT (OMP_FOR_INCR (for_stmt), i);
+         if (TREE_CODE (t) == MODIFY_EXPR)
+           {
+             decl = TREE_OPERAND (t, 0);
+             t = TREE_OPERAND (t, 1);
+             tree *tp = &TREE_OPERAND (t, 1);
+             if (TREE_CODE (t) == PLUS_EXPR && *tp == decl)
+               tp = &TREE_OPERAND (t, 0);
+
+             if (!is_gimple_constant (*tp))
+               {
+                 *tp = get_initialized_tmp_var (*tp, pre_p, NULL);
+                 tree c = build_omp_clause (input_location,
+                                            OMP_CLAUSE_FIRSTPRIVATE);
+                 OMP_CLAUSE_DECL (c) = *tp;
+                 OMP_CLAUSE_CHAIN (c) = OMP_FOR_CLAUSES (orig_for_stmt);
+                 OMP_FOR_CLAUSES (orig_for_stmt) = c;
+               }
+           }
+       }
+
+      gimplify_scan_omp_clauses (&OMP_FOR_CLAUSES (orig_for_stmt), pre_p, ort);
+    }
+
+  if (orig_for_stmt != for_stmt)
+    gimplify_omp_ctxp->combined_loop = true;
+
   for_body = NULL;
   gcc_assert (TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt))
              == TREE_VEC_LENGTH (OMP_FOR_COND (for_stmt)));
@@ -7175,6 +7246,11 @@ gimplify_omp_for (tree *expr_p, gimple_s
                      else if (omp_check_private (outer, decl, false))
                        outer = NULL;
                    }
+                 else if (((outer->region_type & ORT_TASK) != 0)
+                          && outer->combined_loop
+                          && !omp_check_private (gimplify_omp_ctxp,
+                                                 decl, false))
+                   ;
                  else if (outer->region_type != ORT_COMBINED_PARALLEL)
                    outer = NULL;
                  if (outer)
@@ -7206,6 +7282,11 @@ gimplify_omp_for (tree *expr_p, gimple_s
                      else if (omp_check_private (outer, decl, false))
                        outer = NULL;
                    }
+                 else if (((outer->region_type & ORT_TASK) != 0)
+                          && outer->combined_loop
+                          && !omp_check_private (gimplify_omp_ctxp,
+                                                 decl, false))
+                   ;
                  else if (outer->region_type != ORT_COMBINED_PARALLEL)
                    outer = NULL;
                  if (outer)
@@ -7418,14 +7499,39 @@ gimplify_omp_for (tree *expr_p, gimple_s
 
   BITMAP_FREE (has_decl_expr);
 
-  gimplify_and_add (OMP_FOR_BODY (orig_for_stmt), &for_body);
+  if (TREE_CODE (orig_for_stmt) == OMP_TASKLOOP)
+    {
+      push_gimplify_context ();
+      if (TREE_CODE (OMP_FOR_BODY (orig_for_stmt)) != BIND_EXPR)
+       {
+         OMP_FOR_BODY (orig_for_stmt)
+           = build3 (BIND_EXPR, void_type_node, NULL,
+                     OMP_FOR_BODY (orig_for_stmt), NULL);
+         TREE_SIDE_EFFECTS (OMP_FOR_BODY (orig_for_stmt)) = 1;
+       }
+    }
+
+  gimple g = gimplify_and_return_first (OMP_FOR_BODY (orig_for_stmt),
+                                       &for_body);
+
+  if (TREE_CODE (orig_for_stmt) == OMP_TASKLOOP)
+    {
+      if (gimple_code (g) == GIMPLE_BIND)
+       pop_gimplify_context (g);
+      else
+       pop_gimplify_context (NULL);
+    }
 
   if (orig_for_stmt != for_stmt)
     for (i = 0; i < TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)); i++)
       {
        t = TREE_VEC_ELT (OMP_FOR_INIT (for_stmt), i);
        decl = TREE_OPERAND (t, 0);
+       struct gimplify_omp_ctx *ctx = gimplify_omp_ctxp;
+       if (TREE_CODE (orig_for_stmt) == OMP_TASKLOOP)
+         gimplify_omp_ctxp = ctx->outer_context;
        var = create_tmp_var (TREE_TYPE (decl), get_name (decl));
+       gimplify_omp_ctxp = ctx;
        omp_add_variable (gimplify_omp_ctxp, var, GOVD_PRIVATE | GOVD_SEEN);
        TREE_OPERAND (t, 0) = var;
        t = TREE_VEC_ELT (OMP_FOR_INCR (for_stmt), i);
@@ -7524,14 +7630,18 @@ gimplify_omp_for (tree *expr_p, gimple_s
            gforo_clauses_ptr = &OMP_CLAUSE_CHAIN (*gforo_clauses_ptr);
            break;
          /* For lastprivate, keep the clause on inner taskloop, and add
-            a shared clause on task.  */
+            a shared clause on task.  If the same decl is also firstprivate,
+            add also firstprivate clause on the inner taskloop.  */
          case OMP_CLAUSE_LASTPRIVATE:
            *gfor_clauses_ptr = c;
            gfor_clauses_ptr = &OMP_CLAUSE_CHAIN (c);
-           *gtask_clauses_ptr = build_omp_clause (OMP_CLAUSE_LOCATION (c),
-                                                  OMP_CLAUSE_SHARED);
+           *gtask_clauses_ptr
+             = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_SHARED);
            OMP_CLAUSE_DECL (*gtask_clauses_ptr) = OMP_CLAUSE_DECL (c);
-           gtask_clauses_ptr = &OMP_CLAUSE_CHAIN (*gtask_clauses_ptr);
+           if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
+             OMP_CLAUSE_SHARED_FIRSTPRIVATE (*gtask_clauses_ptr) = 1;
+           gtask_clauses_ptr
+             = &OMP_CLAUSE_CHAIN (*gtask_clauses_ptr);
            break;
          default:
            gcc_unreachable ();
@@ -7539,8 +7649,7 @@ gimplify_omp_for (tree *expr_p, gimple_s
       *gfor_clauses_ptr = NULL_TREE;
       *gtask_clauses_ptr = NULL_TREE;
       *gforo_clauses_ptr = NULL_TREE;
-      gimple g
-       = gimple_build_bind (NULL_TREE, gfor, NULL_TREE);
+      g = gimple_build_bind (NULL_TREE, gfor, NULL_TREE);
       g = gimple_build_omp_task (g, task_clauses, NULL_TREE, NULL_TREE,
                                 NULL_TREE, NULL_TREE, NULL_TREE);
       gimple_omp_task_set_taskloop_p (g, true);
--- gcc/omp-low.c.jj    2015-05-19 18:56:55.730182802 +0200
+++ gcc/omp-low.c       2015-05-20 19:20:25.828928071 +0200
@@ -1129,6 +1129,14 @@ omp_copy_decl_2 (tree var, tree name, tr
 
   DECL_CONTEXT (copy) = current_function_decl;
   DECL_CHAIN (copy) = ctx->block_vars;
+  /* If VAR is listed in task_shared_vars, it means it wasn't
+     originally addressable and is just because task needs to take
+     it's address.  But we don't need to take address of privatizations
+     from that var.  */
+  if (TREE_ADDRESSABLE (var)
+      && task_shared_vars
+      && bitmap_bit_p (task_shared_vars, DECL_UID (var)))
+    TREE_ADDRESSABLE (copy) = 0;
   ctx->block_vars = copy;
 
   return copy;
@@ -1179,7 +1187,7 @@ build_receiver_ref (tree var, bool by_re
    this is some variable.  */
 
 static tree
-build_outer_var_ref (tree var, omp_context *ctx)
+build_outer_var_ref (tree var, omp_context *ctx, bool lastprivate = false)
 {
   tree x;
 
@@ -1188,7 +1196,7 @@ build_outer_var_ref (tree var, omp_conte
   else if (is_variable_sized (var))
     {
       x = TREE_OPERAND (DECL_VALUE_EXPR (var), 0);
-      x = build_outer_var_ref (x, ctx);
+      x = build_outer_var_ref (x, ctx, lastprivate);
       x = build_simple_mem_ref (x);
     }
   else if (is_taskreg_ctx (ctx))
@@ -1209,6 +1217,33 @@ build_outer_var_ref (tree var, omp_conte
       if (x == NULL_TREE)
        x = var;
     }
+  else if (lastprivate && is_taskloop_ctx (ctx))
+    {
+      gcc_assert (ctx->outer);
+      splay_tree_node n
+       = splay_tree_lookup (ctx->outer->field_map,
+                            (splay_tree_key) &DECL_UID (var));
+      if (n == NULL)
+       {
+         if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx->outer)))
+           x = var;
+         else
+           x = lookup_decl (var, ctx->outer);
+       }
+      else
+       {
+         tree field = (tree) n->value;
+         /* If the receiver record type was remapped in the child function,
+            remap the field into the new record type.  */
+         x = maybe_lookup_field (field, ctx->outer);
+         if (x != NULL)
+           field = x;
+
+         x = build_simple_mem_ref (ctx->outer->receiver_decl);
+         x = omp_build_component_ref (x, field);
+         x = build_simple_mem_ref (x);
+       }
+    }
   else if (ctx->outer)
     x = lookup_decl (var, ctx->outer);
   else if (is_reference (var))
@@ -1239,11 +1274,17 @@ static void
 install_var_field (tree var, bool by_ref, int mask, omp_context *ctx)
 {
   tree field, type, sfield = NULL_TREE;
+  splay_tree_key key = (splay_tree_key) var;
 
+  if ((mask & 8) != 0)
+    {
+      key = (splay_tree_key) &DECL_UID (var);
+      gcc_checking_assert (key != (splay_tree_key) var);
+    }
   gcc_assert ((mask & 1) == 0
-             || !splay_tree_lookup (ctx->field_map, (splay_tree_key) var));
+             || !splay_tree_lookup (ctx->field_map, key));
   gcc_assert ((mask & 2) == 0 || !ctx->sfield_map
-             || !splay_tree_lookup (ctx->sfield_map, (splay_tree_key) var));
+             || !splay_tree_lookup (ctx->sfield_map, key));
   gcc_assert ((mask & 3) == 3
              || !is_gimple_omp_oacc (ctx->stmt));
 
@@ -1298,7 +1339,7 @@ install_var_field (tree var, bool by_ref
          ctx->sfield_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
          for (t = TYPE_FIELDS (ctx->record_type); t ; t = TREE_CHAIN (t))
            {
-             sfield = build_decl (DECL_SOURCE_LOCATION (var),
+             sfield = build_decl (DECL_SOURCE_LOCATION (t),
                                   FIELD_DECL, DECL_NAME (t), TREE_TYPE (t));
              DECL_ABSTRACT_ORIGIN (sfield) = DECL_ABSTRACT_ORIGIN (t);
              insert_field_into_struct (ctx->srecord_type, sfield);
@@ -1313,11 +1354,9 @@ install_var_field (tree var, bool by_ref
     }
 
   if (mask & 1)
-    splay_tree_insert (ctx->field_map, (splay_tree_key) var,
-                      (splay_tree_value) field);
+    splay_tree_insert (ctx->field_map, key, (splay_tree_value) field);
   if ((mask & 2) && ctx->sfield_map)
-    splay_tree_insert (ctx->sfield_map, (splay_tree_key) var,
-                      (splay_tree_value) sfield);
+    splay_tree_insert (ctx->sfield_map, key, (splay_tree_value) sfield);
 }
 
 static tree
@@ -1718,6 +1757,11 @@ scan_sharing_clauses (tree clauses, omp_
          if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
            break;
          by_ref = use_pointer_for_field (decl, ctx);
+         if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
+           {
+             gcc_assert (by_ref);
+             break;
+           }
          if (! TREE_READONLY (decl)
              || TREE_ADDRESSABLE (decl)
              || by_ref
@@ -1998,8 +2042,14 @@ scan_sharing_clauses (tree clauses, omp_
          if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
            break;
          decl = OMP_CLAUSE_DECL (c);
-         if (! is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
-           fixup_remapped_decl (decl, ctx, false);
+         if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
+           break;
+         if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
+           {
+             install_var_field (decl, true, 11, ctx);
+             break;
+           }
+         fixup_remapped_decl (decl, ctx, false);
          break;
 
        case OMP_CLAUSE_MAP:
@@ -2336,7 +2386,16 @@ add_taskreg_looptemp_clauses (enum gf_ma
       tree type = fd.iter_type;
       if (fd.collapse > 1
          && TREE_CODE (fd.loop.n2) != INTEGER_CST)
-       count += fd.collapse - 1;
+       {
+         count += fd.collapse - 1;
+         /* For taskloop, if there are lastprivate clauses on the inner
+            GIMPLE_OMP_FOR, add one more temporaries for the total number
+            of iterations (product of count1 ... countN-1).  */
+         if (msk == GF_OMP_FOR_KIND_TASKLOOP
+             && find_omp_clause (gimple_omp_for_clauses (for_stmt),
+                                 OMP_CLAUSE_LASTPRIVATE))
+           count++;
+       }
       for (i = 0; i < count; i++)
        {
          tree temp = create_tmp_var (type);
@@ -2480,7 +2539,8 @@ finish_taskreg_scan (omp_context *ctx)
 
       for (c = gimple_omp_taskreg_clauses (ctx->stmt);
           c; c = OMP_CLAUSE_CHAIN (c))
-       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED)
+       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED
+           && !OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
          {
            tree decl = OMP_CLAUSE_DECL (c);
 
@@ -3755,7 +3815,8 @@ lower_rec_input_clauses (tree clauses, g
                continue;
              if (maybe_lookup_decl (OMP_CLAUSE_DECL (c), ctx) == NULL)
                {
-                 gcc_assert (is_global_var (OMP_CLAUSE_DECL (c)));
+                 gcc_assert (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c)
+                             || is_global_var (OMP_CLAUSE_DECL (c)));
                  continue;
                }
            case OMP_CLAUSE_FIRSTPRIVATE:
@@ -3775,7 +3836,7 @@ lower_rec_input_clauses (tree clauses, g
              if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
                {
                  lastprivate_firstprivate = true;
-                 if (pass != 0)
+                 if (pass != 0 || is_taskloop_ctx (ctx))
                    continue;
                }
              /* Even without corresponding firstprivate, if
@@ -3936,6 +3997,11 @@ lower_rec_input_clauses (tree clauses, g
              /* Shared global vars are just accessed directly.  */
              if (is_global_var (new_var))
                break;
+             /* For taskloop firstprivate/lastprivate, represented
+                as firstprivate and shared clause on the task, new_var
+                is the firstprivate var.  */
+             if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
+               break;
              /* Set up the DECL_VALUE_EXPR for shared variables now.  This
                 needs to be delayed until after fixup_child_record_type so
                 that we get the correct type during the dereference.  */
@@ -4467,7 +4533,15 @@ lower_lastprivate_clauses (tree clauses,
              && !OMP_CLAUSE_LINEAR_NO_COPYOUT (c)))
        {
          var = OMP_CLAUSE_DECL (c);
-         new_var = lookup_decl (var, ctx);
+         if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
+             && OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c)
+             && is_taskloop_ctx (ctx))
+           {
+             gcc_checking_assert (ctx->outer && is_task_ctx (ctx->outer));
+             new_var = lookup_decl (var, ctx->outer);
+           }
+         else
+           new_var = lookup_decl (var, ctx);
 
          if (simduid && DECL_HAS_VALUE_EXPR_P (new_var))
            {
@@ -4511,7 +4585,7 @@ lower_lastprivate_clauses (tree clauses,
              OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c) = NULL;
            }
 
-         x = build_outer_var_ref (var, ctx);
+         x = build_outer_var_ref (var, ctx, true);
          if (is_reference (var))
            new_var = build_simple_mem_ref_loc (clause_loc, new_var);
          x = lang_hooks.decls.omp_clause_assign_op (c, x, new_var);
@@ -4792,6 +4866,10 @@ lower_send_clauses (tree clauses, gimple
        case OMP_CLAUSE_LASTPRIVATE:
        case OMP_CLAUSE_REDUCTION:
          break;
+       case OMP_CLAUSE_SHARED:
+         if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
+           break;
+         continue;
        case OMP_CLAUSE__LOOPTEMP_:
          if (ignored_looptemp)
            {
@@ -4809,6 +4887,25 @@ lower_send_clauses (tree clauses, gimple
       if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYIN
          && is_global_var (var))
        continue;
+
+      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED)
+       {
+         /* Handle taskloop firstprivate/lastprivate, where the
+            lastprivate on GIMPLE_OMP_TASK is represented as
+            OMP_CLAUSE_SHARED_FIRSTPRIVATE.  */
+         tree f
+           = (tree)
+             splay_tree_lookup (ctx->sfield_map
+                                ? ctx->sfield_map : ctx->field_map,
+                                (splay_tree_key) &DECL_UID (val))->value;
+         gcc_assert (use_pointer_for_field (val, ctx));
+         x = omp_build_component_ref (ctx->sender_decl, f);
+         var = build_fold_addr_expr (var);
+         gimplify_assign (x, var, ilist);
+         DECL_ABSTRACT_ORIGIN (f) = NULL;
+         continue;
+       }
+
       if (is_variable_sized (val))
        continue;
       by_ref = use_pointer_for_field (val, NULL);
@@ -4879,6 +4976,9 @@ lower_send_shared_vars (gimple_seq *ilis
   for (f = TYPE_FIELDS (record_type); f ; f = DECL_CHAIN (f))
     {
       ovar = DECL_ABSTRACT_ORIGIN (f);
+      if (!ovar || TREE_CODE (ovar) == FIELD_DECL)
+       continue;
+
       nvar = maybe_lookup_decl (ovar, ctx);
       if (!nvar || !DECL_HAS_VALUE_EXPR_P (nvar))
        continue;
@@ -5158,7 +5258,9 @@ expand_task_call (struct omp_region *reg
   tree finalc = find_omp_clause (clauses, OMP_CLAUSE_FINAL);
 
   unsigned int iflags
-    = (untied ? 1 : 0) | (mergeable ? 4 : 0) | (depend ? 8 : 0);
+    = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
+      | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
+      | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
 
   bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
   tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
@@ -5178,7 +5280,7 @@ expand_task_call (struct omp_region *reg
       endvar = OMP_CLAUSE_DECL (endvar);
       step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
       if (fd.loop.cond_code == LT_EXPR)
-       iflags |= 256;
+       iflags |= GOMP_TASK_FLAG_UP;
       tree tclauses = gimple_omp_for_clauses (g);
       num_tasks = find_omp_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
       if (num_tasks)
@@ -5188,7 +5290,7 @@ expand_task_call (struct omp_region *reg
          num_tasks = find_omp_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
          if (num_tasks)
            {
-             iflags |= 512;
+             iflags |= GOMP_TASK_FLAG_GRAINSIZE;
              num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
            }
          else
@@ -5196,9 +5298,9 @@ expand_task_call (struct omp_region *reg
        }
       num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
       if (ifc == NULL_TREE)
-       iflags |= 1024;
+       iflags |= GOMP_TASK_FLAG_IF;
       if (find_omp_clause (tclauses, OMP_CLAUSE_NOGROUP))
-       iflags |= 2048;
+       iflags |= GOMP_TASK_FLAG_NOGROUP;
       ull = fd.iter_type == long_long_unsigned_type_node;
     }
 
@@ -5211,7 +5313,8 @@ expand_task_call (struct omp_region *reg
        {
          tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
          t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
-                              build_int_cst (unsigned_type_node, 1024),
+                              build_int_cst (unsigned_type_node,
+                                             GOMP_TASK_FLAG_IF),
                               build_int_cst (unsigned_type_node, 0));
          flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
                                   flags, t);
@@ -5224,7 +5327,8 @@ expand_task_call (struct omp_region *reg
     {
       tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
       t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
-                          build_int_cst (unsigned_type_node, 2),
+                          build_int_cst (unsigned_type_node,
+                                         GOMP_TASK_FLAG_FINAL),
                           build_int_cst (unsigned_type_node, 0));
       flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
     }
@@ -7925,7 +8029,11 @@ expand_omp_simd (struct omp_region *regi
   t = fold_convert (type, n2);
   t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
                                false, GSI_CONTINUE_LINKING);
-  t = build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t);
+  tree v = fd->loop.v;
+  if (DECL_P (v) && TREE_ADDRESSABLE (v))
+    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
+                                 false, GSI_CONTINUE_LINKING);
+  t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
   cond_stmt = gimple_build_cond_empty (t);
   gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
   if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
@@ -8124,6 +8232,28 @@ expand_omp_taskloop_for_outer (struct om
   innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
   gcc_assert (innerc);
   tree endvar = OMP_CLAUSE_DECL (innerc);
+  if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
+    {
+      gcc_assert (innerc);
+      for (i = 1; i < fd->collapse; i++)
+       {
+         innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+                                   OMP_CLAUSE__LOOPTEMP_);
+         gcc_assert (innerc);
+       }
+      innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+                               OMP_CLAUSE__LOOPTEMP_);
+      if (innerc)
+       {
+         /* If needed (inner taskloop has lastprivate clause), propagate
+            down the total number of iterations.  */
+         tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
+                                            NULL_TREE, false,
+                                            GSI_CONTINUE_LINKING);
+         assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
+         gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
+       }
+    }
 
   t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
                                 GSI_CONTINUE_LINKING);
@@ -8167,7 +8297,7 @@ expand_omp_taskloop_for_inner (struct om
                               struct omp_for_data *fd,
                               gimple inner_stmt)
 {
-  tree e, t, type, itype, vmain, vback;
+  tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
   basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
   basic_block fin_bb;
   gimple_stmt_iterator gsi;
@@ -8180,6 +8310,29 @@ expand_omp_taskloop_for_inner (struct om
   if (POINTER_TYPE_P (type))
     itype = signed_type_for (type);
 
+  /* See if we need to bias by LLONG_MIN.  */
+  if (fd->iter_type == long_long_unsigned_type_node
+      && TREE_CODE (type) == INTEGER_TYPE
+      && !TYPE_UNSIGNED (type))
+    {
+      tree n1, n2;
+
+      if (fd->loop.cond_code == LT_EXPR)
+       {
+         n1 = fd->loop.n1;
+         n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
+       }
+      else
+       {
+         n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
+         n2 = fd->loop.n1;
+       }
+      if (TREE_CODE (n1) != INTEGER_CST
+         || TREE_CODE (n2) != INTEGER_CST
+         || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
+       bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
+    }
+
   entry_bb = region->entry;
   cont_bb = region->cont;
   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
@@ -8220,6 +8373,11 @@ expand_omp_taskloop_for_inner (struct om
   innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
   gcc_assert (innerc);
   n2 = OMP_CLAUSE_DECL (innerc);
+  if (bias)
+    {
+      n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
+      n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
+    }
   n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
                                 true, NULL_TREE, true, GSI_SAME_STMT);
   n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
@@ -8310,7 +8468,13 @@ expand_omp_taskloop_for_inner (struct om
   gsi_remove (&gsi, true);
 
   FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
-  remove_edge (BRANCH_EDGE (entry_bb));
+  if (!broken_loop)
+    remove_edge (BRANCH_EDGE (entry_bb));
+  else
+    {
+      remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
+      region->outer->cont = NULL;
+    }
 
   /* Connect all the blocks.  */
   if (!broken_loop)
@@ -8334,8 +8498,9 @@ expand_omp_taskloop_for_inner (struct om
 
   set_immediate_dominator (CDI_DOMINATORS, body_bb,
                           recompute_dominator (CDI_DOMINATORS, body_bb));
-  set_immediate_dominator (CDI_DOMINATORS, fin_bb,
-                          recompute_dominator (CDI_DOMINATORS, fin_bb));
+  if (!broken_loop)
+    set_immediate_dominator (CDI_DOMINATORS, fin_bb,
+                            recompute_dominator (CDI_DOMINATORS, fin_bb));
 
   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
     {
@@ -11054,15 +11219,42 @@ lower_omp_for_lastprivate (struct omp_fo
   tree n2 = fd->loop.n2;
   if (fd->collapse > 1
       && TREE_CODE (n2) != INTEGER_CST
-      && gimple_omp_for_combined_into_p (fd->for_stmt)
-      && gimple_code (ctx->outer->stmt) == GIMPLE_OMP_FOR)
+      && gimple_omp_for_combined_into_p (fd->for_stmt))
     {
-      gomp_for *gfor = as_a <gomp_for *> (ctx->outer->stmt);
-      if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_FOR)
+      struct omp_context *task_ctx = NULL;
+      if (gimple_code (ctx->outer->stmt) == GIMPLE_OMP_FOR)
        {
-         struct omp_for_data outer_fd;
-         extract_omp_for_data (gfor, &outer_fd, NULL);
-         n2 = fold_convert (TREE_TYPE (n2), outer_fd.loop.n2);
+         gomp_for *gfor = as_a <gomp_for *> (ctx->outer->stmt);
+         if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_FOR)
+           {
+             struct omp_for_data outer_fd;
+             extract_omp_for_data (gfor, &outer_fd, NULL);
+             n2 = fold_convert (TREE_TYPE (n2), outer_fd.loop.n2);
+           }
+         else if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_TASKLOOP)
+           task_ctx = ctx->outer->outer;
+       }
+      else if (is_task_ctx (ctx->outer))
+       task_ctx = ctx->outer;
+      if (task_ctx)
+       {
+         int i;
+         tree innerc
+           = find_omp_clause (gimple_omp_task_clauses (task_ctx->stmt),
+                              OMP_CLAUSE__LOOPTEMP_);
+         gcc_assert (innerc);
+         for (i = 0; i < fd->collapse; i++)
+           {
+             innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+                                       OMP_CLAUSE__LOOPTEMP_);
+             gcc_assert (innerc);
+           }
+         innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
+                                   OMP_CLAUSE__LOOPTEMP_);
+         if (innerc)
+           n2 = fold_convert (TREE_TYPE (n2),
+                              lookup_decl (OMP_CLAUSE_DECL (innerc),
+                                           task_ctx));
        }
     }
   cond = build2 (cond_code, boolean_type_node, fd->loop.v, n2);
@@ -11426,6 +11618,13 @@ create_task_copyfn (gomp_task *task_stmt
        n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
        if (n == NULL)
          break;
+       if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
+         {
+           decl = (tree) n->value;
+           n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
+           if (n == NULL)
+             break;
+         }
        f = (tree) n->value;
        if (tcctx.cb.decl_map)
          f = *tcctx.cb.decl_map->get (f);
--- gcc/c-family/c-omp.c.jj     2015-05-19 18:54:16.202666384 +0200
+++ gcc/c-family/c-omp.c        2015-05-19 19:04:52.500759690 +0200
@@ -491,6 +491,7 @@ c_finish_omp_for (location_t locus, enum
              init = integer_zero_node;
              fail = true;
            }
+         DECL_INITIAL (decl) = NULL_TREE;
 
          init = build_modify_expr (elocus, decl, NULL_TREE, NOP_EXPR,
                                    /* FIXME diagnostics: This should
--- gcc/testsuite/gcc.dg/gomp/taskloop-1.c.jj   2015-05-19 19:04:52.495759768 
+0200
+++ gcc/testsuite/gcc.dg/gomp/taskloop-1.c      2015-05-19 19:04:52.495759768 
+0200
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+
+int e;
+int bar (int, int);
+void baz (int, int, int, int *, int *, int *);
+
+void
+foo (int a, int b, int c, int d, int f, int g, int h, int j, int k, int l)
+{
+  int i;
+  #pragma omp taskloop if (a) final (b) untied default(none) mergeable \
+    private(c) firstprivate (e) shared (d) num_tasks(f) collapse(1)
+  for (i = bar (g, h) + j; i < k; i += l)
+    baz (i, d, e++, &c, &d, &e);
+}
--- include/gomp-constants.h.jj 2015-05-19 18:54:15.724673826 +0200
+++ include/gomp-constants.h    2015-05-19 19:04:52.494759783 +0200
@@ -113,4 +113,14 @@ enum gomp_map_kind
 #define GOMP_DEVICE_ICV                        -1
 #define GOMP_DEVICE_HOST_FALLBACK      -2
 
+/* GOMP_task/GOMP_taskloop* flags argument.  */
+#define GOMP_TASK_FLAG_UNTIED          (1 << 0)
+#define GOMP_TASK_FLAG_FINAL           (1 << 1)
+#define GOMP_TASK_FLAG_MERGEABLE       (1 << 2)
+#define GOMP_TASK_FLAG_DEPEND          (1 << 3)
+#define GOMP_TASK_FLAG_UP              (1 << 8)
+#define GOMP_TASK_FLAG_GRAINSIZE       (1 << 9)
+#define GOMP_TASK_FLAG_IF              (1 << 10)
+#define GOMP_TASK_FLAG_NOGROUP         (1 << 11)
+
 #endif
--- libgomp/libgomp.map.jj      2015-05-19 18:54:16.332664361 +0200
+++ libgomp/libgomp.map 2015-05-19 19:04:52.493759799 +0200
@@ -240,6 +240,12 @@ GOMP_4.0.1 {
        GOMP_offload_unregister;
 } GOMP_4.0;
 
+GOMP_4.1 {
+  global:
+       GOMP_taskloop;
+       GOMP_taskloop_ull;
+} GOMP_4.0.1;
+
 OACC_2.0 {
   global:
        acc_get_num_devices;
--- libgomp/task.c.jj   2015-05-19 18:54:16.322664516 +0200
+++ libgomp/task.c      2015-05-19 19:04:52.494759783 +0200
@@ -29,6 +29,7 @@
 #include "libgomp.h"
 #include <stdlib.h>
 #include <string.h>
+#include "gomp-constants.h"
 
 typedef struct gomp_task_depend_entry *hash_entry_type;
 
@@ -126,8 +127,7 @@ GOMP_task (void (*fn) (void *), void *da
      might be running on different thread than FN.  */
   if (cpyfn)
     if_clause = false;
-  if (flags & 1)
-    flags &= ~1;
+  flags &= ~GOMP_TASK_FLAG_UNTIED;
 #endif
 
   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
@@ -148,12 +148,14 @@ GOMP_task (void (*fn) (void *), void *da
         depend clauses for non-deferred tasks other than this, because
         the parent task is suspended until the child task finishes and thus
         it can't start further child tasks.  */
-      if ((flags & 8) && thr->task && thr->task->depend_hash)
+      if ((flags & GOMP_TASK_FLAG_DEPEND)
+         && thr->task && thr->task->depend_hash)
        gomp_task_maybe_wait_for_dependencies (depend);
 
       gomp_init_task (&task, thr->task, gomp_icv (false));
       task.kind = GOMP_TASK_IFFALSE;
-      task.final_task = (thr->task && thr->task->final_task) || (flags & 2);
+      task.final_task = (thr->task && thr->task->final_task)
+                       || (flags & GOMP_TASK_FLAG_FINAL);
       if (thr->task)
        {
          task.in_tied_task = thr->task->in_tied_task;
@@ -196,7 +198,7 @@ GOMP_task (void (*fn) (void *), void *da
       bool do_wake;
       size_t depend_size = 0;
 
-      if (flags & 8)
+      if (flags & GOMP_TASK_FLAG_DEPEND)
        depend_size = ((uintptr_t) depend[0]
                       * sizeof (struct gomp_task_depend_entry));
       task = gomp_malloc (sizeof (*task) + depend_size
@@ -219,7 +221,7 @@ GOMP_task (void (*fn) (void *), void *da
       task->kind = GOMP_TASK_WAITING;
       task->fn = fn;
       task->fn_data = arg;
-      task->final_task = (flags & 2) >> 1;
+      task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
       gomp_mutex_lock (&team->task_lock);
       /* If parallel or taskgroup has been cancelled, don't start new
         tasks.  */
@@ -412,6 +414,25 @@ GOMP_task (void (*fn) (void *), void *da
     }
 }
 
+ialias (GOMP_taskgroup_start)
+ialias (GOMP_taskgroup_end)
+
+#define TYPE long
+#define UTYPE unsigned long
+#define TYPE_is_long 1
+#include "taskloop.c"
+#undef TYPE
+#undef UTYPE
+#undef TYPE_is_long
+
+#define TYPE unsigned long long
+#define UTYPE TYPE
+#define GOMP_taskloop GOMP_taskloop_ull
+#include "taskloop.c"
+#undef TYPE
+#undef UTYPE
+#undef GOMP_taskloop
+
 static inline bool
 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
                   struct gomp_taskgroup *taskgroup, struct gomp_team *team)
--- libgomp/taskloop.c.jj       2015-05-19 19:04:52.493759799 +0200
+++ libgomp/taskloop.c  2015-05-20 18:37:38.584454280 +0200
@@ -0,0 +1,360 @@
+/* Copyright (C) 2015 Free Software Foundation, Inc.
+   Contributed by Jakub Jelinek <ja...@redhat.com>.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file handles the taskloop construct.  It is included twice, once
+   for the long and once for unsigned long long variant.  */
+
+/* Called when encountering an explicit task directive.  If IF_CLAUSE is
+   false, then we must not delay in executing the task.  If UNTIED is true,
+   then the task may be executed by any member of the team.  */
+
+void
+GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
+              long arg_size, long arg_align, unsigned flags,
+              unsigned long num_tasks,
+              TYPE start, TYPE end, TYPE step)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  struct gomp_team *team = thr->ts.team;
+
+#ifdef HAVE_BROKEN_POSIX_SEMAPHORES
+  /* If pthread_mutex_* is used for omp_*lock*, then each task must be
+     tied to one thread all the time.  This means UNTIED tasks must be
+     tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
+     might be running on different thread than FN.  */
+  if (cpyfn)
+    flags &= ~GOMP_TASK_FLAG_IF;
+  flags &= ~GOMP_TASK_FLAG_UNTIED;
+#endif
+
+  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
+  if (team && gomp_team_barrier_cancelled (&team->barrier))
+    return;
+
+#ifdef TYPE_is_long
+  TYPE s = step;
+  if (step > 0)
+    {
+      if (start >= end)
+       return;
+      s--;
+    }
+  else
+    {
+      if (start <= end)
+       return;
+      s++;
+    }
+  UTYPE n = (end - start + s) / step;
+#else
+  UTYPE n;
+  if (flags & GOMP_TASK_FLAG_UP)
+    {
+      if (start >= end)
+       return;
+      n = (end - start + step - 1) / step;
+    }
+  else
+    {
+      if (start <= end)
+       return;
+      n = (start - end - step - 1) / -step;
+    }
+#endif
+
+  TYPE task_step = step;
+  unsigned long nfirst = n;
+  if (flags & GOMP_TASK_FLAG_GRAINSIZE)
+    {
+      unsigned long grainsize = num_tasks;
+#ifdef TYPE_is_long
+      num_tasks = n / grainsize;
+#else
+      UTYPE ndiv = n / grainsize;
+      num_tasks = ndiv;
+      if (num_tasks != ndiv)
+       num_tasks = ~0UL;
+#endif
+      if (num_tasks <= 1)
+       {
+         num_tasks = 1;
+         task_step = end - start;
+       }
+      else if (num_tasks >= grainsize
+#ifndef TYPE_is_long
+              && num_tasks != ~0UL
+#endif
+             )
+       {
+         UTYPE mul = num_tasks * grainsize;
+         task_step = (TYPE) grainsize * step;
+         if (mul != n)
+           {
+             task_step += step;
+             nfirst = n - mul - 1;
+           }
+       }
+      else
+       {
+         UTYPE div = n / num_tasks;
+         UTYPE mod = n % num_tasks;
+         task_step = (TYPE) div * step;
+         if (mod)
+           {
+             task_step += step;
+             nfirst = mod - 1;
+           }
+       }
+    }
+  else
+    {
+      if (num_tasks == 0)
+       num_tasks = team ? team->nthreads : 1;
+      if (num_tasks >= n)
+       num_tasks = n;
+      else
+       {
+         UTYPE div = n / num_tasks;
+         UTYPE mod = n % num_tasks;
+         task_step = (TYPE) div * step;
+         if (mod)
+           {
+             task_step += step;
+             nfirst = mod - 1;
+           }
+       }
+    }
+
+  if (flags & GOMP_TASK_FLAG_NOGROUP)
+    {
+      if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled)
+       return;
+    }
+  else
+    ialias_call (GOMP_taskgroup_start) ();
+
+  if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
+      || (thr->task && thr->task->final_task)
+      || team->task_count + num_tasks > 64 * team->nthreads)
+    {
+      unsigned long i;
+      if (__builtin_expect (cpyfn != NULL, 0))
+       {
+         struct gomp_task task[num_tasks];
+         struct gomp_task *parent = thr->task;
+         arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
+         char buf[num_tasks * arg_size + arg_align - 1];
+         char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
+                               & ~(uintptr_t) (arg_align - 1));
+         char *orig_arg = arg;
+         for (i = 0; i < num_tasks; i++)
+           {
+             gomp_init_task (&task[i], parent, gomp_icv (false));
+             task[i].kind = GOMP_TASK_IFFALSE;
+             task[i].final_task = (thr->task && thr->task->final_task)
+                                  || (flags & GOMP_TASK_FLAG_FINAL);
+             if (thr->task)
+               {
+                 task[i].in_tied_task = thr->task->in_tied_task;
+                 task[i].taskgroup = thr->task->taskgroup;
+               }
+             thr->task = &task[i];
+             cpyfn (arg, data);
+             arg += arg_size;
+           }
+         arg = orig_arg;
+         for (i = 0; i < num_tasks; i++)
+           {
+             thr->task = &task[i];
+             ((TYPE *)arg)[0] = start;
+             start += task_step;
+             ((TYPE *)arg)[1] = start;
+             if (i == nfirst)
+               task_step -= step;
+             fn (arg);
+             arg += arg_size;
+             if (task[i].children != NULL)
+               {
+                 gomp_mutex_lock (&team->task_lock);
+                 gomp_clear_parent (task[i].children);
+                 gomp_mutex_unlock (&team->task_lock);
+               }
+             gomp_end_task ();
+           }
+       }
+      else
+       for (i = 0; i < num_tasks; i++)
+         {
+           struct gomp_task task;
+
+           gomp_init_task (&task, thr->task, gomp_icv (false));
+           task.kind = GOMP_TASK_IFFALSE;
+           task.final_task = (thr->task && thr->task->final_task)
+                             || (flags & GOMP_TASK_FLAG_FINAL);
+           if (thr->task)
+             {
+               task.in_tied_task = thr->task->in_tied_task;
+               task.taskgroup = thr->task->taskgroup;
+             }
+           thr->task = &task;
+           ((TYPE *)data)[0] = start;
+           start += task_step;
+           ((TYPE *)data)[1] = start;
+           if (i == nfirst)
+             task_step -= step;
+           fn (data);
+           if (task.children != NULL)
+             {
+               gomp_mutex_lock (&team->task_lock);
+               gomp_clear_parent (task.children);
+               gomp_mutex_unlock (&team->task_lock);
+             }
+           gomp_end_task ();
+         }
+    }
+  else
+    {
+      struct gomp_task *tasks[num_tasks];
+      struct gomp_task *parent = thr->task;
+      struct gomp_taskgroup *taskgroup = parent->taskgroup;
+      char *arg;
+      int do_wake;
+      unsigned long i;
+
+      for (i = 0; i < num_tasks; i++)
+       {
+         struct gomp_task *task
+           = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
+         tasks[i] = task;
+         arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
+                         & ~(uintptr_t) (arg_align - 1));
+         gomp_init_task (task, parent, gomp_icv (false));
+         task->kind = GOMP_TASK_IFFALSE;
+         task->in_tied_task = parent->in_tied_task;
+         task->taskgroup = taskgroup;
+         thr->task = task;
+         if (cpyfn)
+           {
+             cpyfn (arg, data);
+             task->copy_ctors_done = true;
+           }
+         else
+           memcpy (arg, data, arg_size);
+         ((TYPE *)arg)[0] = start;
+         start += task_step;
+         ((TYPE *)arg)[1] = start;
+         if (i == nfirst)
+           task_step -= step;
+         thr->task = parent;
+         task->kind = GOMP_TASK_WAITING;
+         task->fn = fn;
+         task->fn_data = arg;
+         task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
+       }
+      gomp_mutex_lock (&team->task_lock);
+      /* If parallel or taskgroup has been cancelled, don't start new
+        tasks.  */
+      if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
+                            || (taskgroup && taskgroup->cancelled))
+                           && cpyfn == NULL, 0))
+       {
+         gomp_mutex_unlock (&team->task_lock);
+         for (i = 0; i < num_tasks; i++)
+           {
+             gomp_finish_task (tasks[i]);
+             free (tasks[i]);
+           }
+         if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
+           ialias_call (GOMP_taskgroup_end) ();
+         return;
+       }
+      if (taskgroup)
+       taskgroup->num_children += num_tasks;
+      for (i = 0; i < num_tasks; i++)
+       {
+         struct gomp_task *task = tasks[i];
+         if (parent->children)
+           {
+             task->next_child = parent->children;
+             task->prev_child = parent->children->prev_child;
+             task->next_child->prev_child = task;
+             task->prev_child->next_child = task;
+           }
+         else
+           {
+             task->next_child = task;
+             task->prev_child = task;
+           }
+         parent->children = task;
+         if (taskgroup)
+           {
+             if (taskgroup->children)
+               {
+                 task->next_taskgroup = taskgroup->children;
+                 task->prev_taskgroup = taskgroup->children->prev_taskgroup;
+                 task->next_taskgroup->prev_taskgroup = task;
+                 task->prev_taskgroup->next_taskgroup = task;
+               }
+             else
+               {
+                 task->next_taskgroup = task;
+                 task->prev_taskgroup = task;
+               }
+             taskgroup->children = task;
+           }
+         if (team->task_queue)
+           {
+             task->next_queue = team->task_queue;
+             task->prev_queue = team->task_queue->prev_queue;
+             task->next_queue->prev_queue = task;
+             task->prev_queue->next_queue = task;
+           }
+         else
+           {
+             task->next_queue = task;
+             task->prev_queue = task;
+             team->task_queue = task;
+           }
+         ++team->task_count;
+         ++team->task_queued_count;
+       }
+      gomp_team_barrier_set_task_pending (&team->barrier);
+      if (team->task_running_count + !parent->in_tied_task
+         < team->nthreads)
+       {
+         do_wake = team->nthreads - team->task_running_count
+                   - !parent->in_tied_task;
+         if ((unsigned long) do_wake > num_tasks)
+           do_wake = num_tasks;
+       }
+      else
+       do_wake = 0;
+      gomp_mutex_unlock (&team->task_lock);
+      if (do_wake)
+       gomp_team_barrier_wake (&team->barrier, do_wake);
+    }
+  if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
+    ialias_call (GOMP_taskgroup_end) ();
+}
--- libgomp/testsuite/libgomp.c/for-4.c.jj      2015-05-19 19:04:52.491759830 
+0200
+++ libgomp/testsuite/libgomp.c/for-4.c 2015-05-20 18:37:38.583454296 +0200
@@ -0,0 +1,42 @@
+/* { dg-options "-std=gnu99 -fopenmp" } */
+
+extern void abort (void);
+
+#define M(x, y, z) O(x, y, z)
+#define O(x, y, z) x ## _ ## y ## _ ## z
+
+#define F taskloop
+#define G taskloop
+#define S
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+#define F taskloop simd
+#define G taskloop_simd
+#define S
+#define N(x) M(x, G, normal)
+#include "for-2.h"
+#undef S
+#undef N
+#undef F
+#undef G
+
+int
+main ()
+{
+  int err = 0;
+  #pragma omp parallel reduction(|:err)
+    #pragma omp single
+      {
+       if (test_taskloop_normal ()
+           || test_taskloop_simd_normal ())
+         err = 1;
+      }
+  if (err)
+    abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/taskloop-1.c.jj 2015-05-19 19:04:52.492759814 
+0200
+++ libgomp/testsuite/libgomp.c/taskloop-1.c    2015-05-20 18:37:38.583454296 
+0200
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp -std=c99" } */
+
+int q, r, e;
+
+__attribute__((noinline, noclone)) void
+foo (long a, long b)
+{
+  #pragma omp taskloop lastprivate (q) nogroup
+    for (long d = a; d < b; d += 2)
+      {
+       q = d;
+       if (d < 2 || d > 6 || (d & 1))
+         #pragma omp atomic
+           e |= 1;
+      }
+}
+
+__attribute__((noinline, noclone)) int
+bar (int a, int b)
+{
+  int q = 7;
+  #pragma omp taskloop lastprivate (q)
+    for (int d = a; d < b; d++)
+      {
+       if (d < 12 || d > 17)
+         #pragma omp atomic
+           e |= 1;
+       q = d;
+      }
+  return q;
+}
+
+int
+main ()
+{
+  #pragma omp parallel
+    #pragma omp single
+      {
+       foo (2, 7);
+       r = bar (12, 18);
+      }
+  if (q != 6 || r != 17 || e)
+    __builtin_abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/taskloop-2.c.jj 2015-05-19 19:04:52.492759814 
+0200
+++ libgomp/testsuite/libgomp.c/taskloop-2.c    2015-05-20 18:37:38.582454311 
+0200
@@ -0,0 +1,147 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -std=c99" } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+int u[1024], v[1024], w[1024], m;
+
+__attribute__((noinline, noclone)) void
+f1 (long a, long b)
+{
+  #pragma omp taskloop simd default(none) shared(u, v, w) nogroup
+  for (long d = a; d < b; d++)
+    u[d] = v[d] + w[d];
+}
+
+__attribute__((noinline, noclone)) int
+f2 (long a, long b, long c)
+{
+  int d, e;
+  #pragma omp taskloop simd default(none) shared(u, v, w) linear(d:1) 
linear(c:5) lastprivate(e)
+  for (d = a; d < b; d++)
+    {
+      u[d] = v[d] + w[d];
+      c = c + 5;
+      e = c + 9;
+    }
+  return d + c + e;
+}
+
+__attribute__((noinline, noclone)) int
+f3 (long a, long b)
+{
+  int d;
+  #pragma omp taskloop simd default(none) shared(u, v, w)
+  for (d = a; d < b; d++)
+    {
+      int *p = &d;
+      u[d] = v[d] + w[d];
+    }
+  return d;
+}
+
+__attribute__((noinline, noclone)) int
+f4 (long a, long b, long c, long d)
+{
+  int e, f, g;
+  #pragma omp taskloop simd default(none) shared(u, v, w) collapse(2) 
lastprivate(g)
+  for (e = a; e < b; e++)
+    for (f = c; f < d; f++)
+      {
+       int *p = &e;
+       int *q = &f;
+       int r = 32 * e + f;
+       u[r] = v[r] + w[r];
+       g = r;
+      }
+  return e + f + g;
+}
+
+__attribute__((noinline, noclone)) int
+f5 (long a, long b, long c, long d)
+{
+  int e, f;
+  #pragma omp taskloop simd default(none) shared(u, v, w) collapse(2)
+  for (e = a; e < b; e++)
+    for (f = c; f < d; f++)
+      {
+       int r = 32 * e + f;
+       u[r] = v[r] + w[r];
+      }
+  return e + f;
+}
+
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 1024; i++)
+    {
+      v[i] = i;
+      w[i] = i + 1;
+    }
+  #pragma omp parallel
+    #pragma omp single
+      f1 (0, 1024);
+  for (i = 0; i < 1024; i++)
+    if (u[i] != 2 * i + 1)
+      __builtin_abort ();
+    else
+      {
+       v[i] = 1024 - i;
+       w[i] = 512 - i;
+      }
+  #pragma omp parallel
+    #pragma omp single
+      m = f2 (2, 1022, 17);
+  for (i = 0; i < 1024; i++)
+    if ((i < 2 || i >= 1022) ? u[i] != 2 * i + 1 : u[i] != 1536 - 2 * i)
+      __builtin_abort ();
+    else
+      {
+       v[i] = i;
+       w[i] = i + 1;
+      }
+  if (m != 1022 + 2 * (1020 * 5 + 17) + 9)
+    __builtin_abort ();
+  #pragma omp parallel
+    #pragma omp single
+      m = f3 (0, 1024);
+  for (i = 0; i < 1024; i++)
+    if (u[i] != 2 * i + 1)
+      __builtin_abort ();
+    else
+      {
+       v[i] = 1024 - i;
+       w[i] = 512 - i;
+      }
+  if (m != 1024)
+    __builtin_abort ();
+  #pragma omp parallel
+    #pragma omp single
+      m = f4 (0, 32, 0, 32);
+  for (i = 0; i < 1024; i++)
+    if (u[i] != 1536 - 2 * i)
+      __builtin_abort ();
+    else
+      {
+       v[i] = i;
+       w[i] = i + 1;
+      }
+  if (m != 32 + 32 + 1023)
+    __builtin_abort ();
+  #pragma omp parallel
+    #pragma omp single
+      m = f5 (0, 32, 0, 32);
+  for (i = 0; i < 1024; i++)
+    if (u[i] != 2 * i + 1)
+      __builtin_abort ();
+    else
+      {
+       v[i] = 1024 - i;
+       w[i] = 512 - i;
+      }
+  if (m != 32 + 32)
+    __builtin_abort ();
+  return 0;
+}
--- libgomp/testsuite/libgomp.c/taskloop-3.c.jj 2015-05-19 19:04:52.492759814 
+0200
+++ libgomp/testsuite/libgomp.c/taskloop-3.c    2015-05-20 18:37:38.583454296 
+0200
@@ -0,0 +1,84 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp -std=c99" } */
+
+int g;
+int a[1024];
+
+__attribute__((noinline, noclone)) int
+f1 (int x)
+{
+  #pragma omp taskloop firstprivate (x) lastprivate (x)
+  for (int i = 0; i < 64; i++)
+    {
+      if (x != 74)
+       __builtin_abort ();
+      if (i == 63)
+       x = i + 4;
+    }
+  return x;
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+  #pragma omp taskloop firstprivate (g) lastprivate (g) nogroup
+  for (int i = 0; i < 64; i++)
+    {
+      if (g != 77)
+       __builtin_abort ();
+      if (i == 63)
+       g = i + 9;
+    }
+}
+
+__attribute__((noinline, noclone)) long long
+f3 (long long a, long long b, long long c)
+{
+  long long i;
+  int l;
+  #pragma omp taskloop default (none) lastprivate (i, l)
+  for (i = a; i < b; i += c)
+    l = i;
+  return l * 7 + i;
+}
+
+__attribute__((noinline, noclone)) long long
+f4 (long long a, long long b, long long c, long long d,
+    long long e, long long f, int k)
+{
+  long long i, j;
+  int l;
+  #pragma omp taskloop default (none) collapse(2) \
+             firstprivate (k) lastprivate (i, j, k, l)
+  for (i = a; i < b; i += e)
+    for (j = c; j < d; j += f)
+      {
+       if (k != 73)
+         __builtin_abort ();
+       if (i == 31 && j == 46)
+         k = i;
+       l = j;
+      }
+  return i + 5 * j + 11 * k + 17 * l;
+}
+
+int
+main ()
+{
+  #pragma omp parallel
+    #pragma omp single
+      {
+       if (f1 (74) != 63 + 4)
+         __builtin_abort ();
+       g = 77;
+       f2 ();
+       #pragma omp taskwait
+       if (g != 63 + 9)
+         __builtin_abort ();
+       if (f3 (7, 12, 2) != 11 * 7 + 13)
+         __builtin_abort ();
+       if (f4 (0, 32, 16, 48, 1, 2, 73) != 32 + 5 * 48 + 11 * 31 + 17 * 46)
+         __builtin_abort ();
+      }
+  return 0;
+}

        Jakub

Reply via email to