Hi!

I've committed following patch to parse collapse+ordered-1 loops
if ordered(n) clause is present, and adjust ompexp, so that we actually
expand it as a collapsed loop with normal ordered-1 loops inside of it.

Example testcase (for now can be parsed and expanded only with the
ordered constructs commented out, Aldy is working on that).

void bar (int, int, int);

void
foo (int n, int m, int o)
{
  int i, j, k;
  #pragma omp for collapse(2) ordered(2)
  for (i = 0; i < m; i++)
    {
      for (j = 0; j < n; j++)
        for (k = 0; k < o; k++)
          {
#pragma omp ordered depend(sink: i-1,j,k) depend(sink: i,j-1,k-1) depend(sink: 
i-1,j-1,k+1)
            bar (i, j, k);
#pragma omp ordered depend(source)
          }
    }
}

int
baz ()
{
  int i, j;
#pragma omp parallel for ordered(2)
  for (i=0; i < 100; ++i)
    for (j=0; j < 100; ++j)
      {
#pragma omp ordered depend(sink:i-1,j-3)
        bar (i, j, 0);
#pragma omp ordered depend(source)
      }
}

2015-07-02  Jakub Jelinek  <ja...@redhat.com>

        * omp-low.c (struct omp_for_data): Add ordered field.
        (extract_omp_for_data): Handle loops with ordered(n) clause.
        (expand_omp_for_ordered_loops): New function.
        (expand_omp_for_generic): Call it.
c/
        * c-parser.c (c_parser_omp_for_loop): Parse collapse + ordered - 1
        nested loops if ordered(n) clause is present.
cp/
        * parser.c (cp_parser_omp_for_loop): Parse collapse + ordered - 1
        nested loops if ordered(n) clause is present.

--- gcc/omp-low.c.jj    2015-07-01 12:50:49.000000000 +0200
+++ gcc/omp-low.c       2015-07-02 09:27:03.546405031 +0200
@@ -236,6 +236,7 @@ struct omp_for_data
   gomp_for *for_stmt;
   tree pre, iter_type;
   int collapse;
+  int ordered;
   bool have_nowait, have_ordered, simd_schedule;
   enum omp_clause_schedule_kind sched_kind;
   struct omp_for_data_loop *loops;
@@ -489,14 +490,15 @@ extract_omp_for_data (gomp_for *for_stmt
 
   fd->for_stmt = for_stmt;
   fd->pre = NULL;
-  fd->collapse = gimple_omp_for_collapse (for_stmt);
-  if (fd->collapse > 1)
+  if (gimple_omp_for_collapse (for_stmt) > 1)
     fd->loops = loops;
   else
     fd->loops = &fd->loop;
 
   fd->have_nowait = distribute || simd;
   fd->have_ordered = false;
+  fd->collapse = 1;
+  fd->ordered = 0;
   fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
   fd->chunk_size = NULL_TREE;
   fd->simd_schedule = false;
@@ -513,6 +515,8 @@ extract_omp_for_data (gomp_for *for_stmt
        break;
       case OMP_CLAUSE_ORDERED:
        fd->have_ordered = true;
+       if (OMP_CLAUSE_ORDERED_EXPR (t))
+         fd->ordered = tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (t));
        break;
       case OMP_CLAUSE_SCHEDULE:
        gcc_assert (!distribute && !taskloop);
@@ -525,6 +529,7 @@ extract_omp_for_data (gomp_for *for_stmt
        fd->chunk_size = OMP_CLAUSE_DIST_SCHEDULE_CHUNK_EXPR (t);
        break;
       case OMP_CLAUSE_COLLAPSE:
+       fd->collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (t));
        if (fd->collapse > 1)
          {
            collapse_iter = &OMP_CLAUSE_COLLAPSE_ITERVAR (t);
@@ -559,9 +564,10 @@ extract_omp_for_data (gomp_for *for_stmt
                         ? integer_zero_node : integer_one_node;
     }
 
-  for (i = 0; i < fd->collapse; i++)
+  int cnt = fd->collapse + (fd->ordered > 0 ? fd->ordered - 1 : 0);
+  for (i = 0; i < cnt; i++)
     {
-      if (fd->collapse == 1)
+      if (i == 0 && fd->collapse == 1)
        loop = &fd->loop;
       else if (loops != NULL)
        loop = loops + i;
@@ -589,6 +595,8 @@ extract_omp_for_data (gomp_for *for_stmt
                          == GF_OMP_FOR_KIND_CILKFOR));
          break;
        case LE_EXPR:
+         if (i >= fd->collapse)
+           break;
          if (POINTER_TYPE_P (TREE_TYPE (loop->n2)))
            loop->n2 = fold_build_pointer_plus_hwi_loc (loc, loop->n2, 1);
          else
@@ -598,6 +606,8 @@ extract_omp_for_data (gomp_for *for_stmt
          loop->cond_code = LT_EXPR;
          break;
        case GE_EXPR:
+         if (i >= fd->collapse)
+           break;
          if (POINTER_TYPE_P (TREE_TYPE (loop->n2)))
            loop->n2 = fold_build_pointer_plus_hwi_loc (loc, loop->n2, -1);
          else
@@ -690,6 +700,9 @@ extract_omp_for_data (gomp_for *for_stmt
            }
        }
 
+      if (i >= fd->collapse)
+       continue;
+
       if (collapse_count && *collapse_count == NULL)
        {
          t = fold_binary (loop->cond_code, boolean_type_node,
@@ -770,6 +783,8 @@ extract_omp_for_data (gomp_for *for_stmt
       fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1);
       fd->loop.cond_code = LT_EXPR;
     }
+  else if (loops)
+    loops[0] = fd->loop;
 
   /* For OpenACC loops, force a chunk size of one, as this avoids the default
     scheduling where several subsequent iterations are being executed by the
@@ -6827,6 +6842,81 @@ extract_omp_for_update_vars (struct omp_
 }
 
 
+/* Wrap the body into fd->ordered - 1 loops that aren't collapsed.  */
+
+static basic_block
+expand_omp_for_ordered_loops (struct omp_for_data *fd, basic_block cont_bb,
+                             basic_block body_bb)
+{
+  if (fd->ordered <= 1)
+    return cont_bb;
+
+  if (!cont_bb)
+    {
+      gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
+      for (int i = fd->collapse; i < fd->collapse + fd->ordered - 1; i++)
+       {
+         tree type = TREE_TYPE (fd->loops[i].v);
+         tree n1 = fold_convert (type, fd->loops[i].n1);
+         expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
+       }
+      return NULL;
+    }
+
+  for (int i = fd->collapse + fd->ordered - 2; i >= fd->collapse; i--)
+    {
+      tree t, type = TREE_TYPE (fd->loops[i].v);
+      gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
+      expand_omp_build_assign (&gsi, fd->loops[i].v,
+                              fold_convert (type, fd->loops[i].n1));
+      if (!gsi_end_p (gsi))
+       gsi_prev (&gsi);
+      else
+       gsi_last_bb (body_bb);
+      edge e1 = split_block (body_bb, gsi_stmt (gsi));
+      basic_block new_body = e1->dest;
+      if (body_bb == cont_bb)
+       cont_bb = new_body;
+      gsi = gsi_last_bb (cont_bb);
+      if (POINTER_TYPE_P (type))
+       t = fold_build_pointer_plus (fd->loops[i].v,
+                                    fold_convert (sizetype, fd->loop.step));
+      else
+       t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
+                        fold_convert (type, fd->loop.step));
+      expand_omp_build_assign (&gsi, fd->loops[i].v, t);
+      gsi_prev (&gsi);
+      edge e2 = split_block (cont_bb, gsi_stmt (gsi));
+      basic_block new_header = e2->dest;
+      gsi = gsi_after_labels (new_header);
+      tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
+                                        true, GSI_SAME_STMT);
+      tree n2
+       = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
+                                   true, NULL_TREE, true, GSI_SAME_STMT);
+      t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
+      gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
+      edge e3 = split_block (new_header, gsi_stmt (gsi));
+      cont_bb = e3->dest;
+      remove_edge (e1);
+      make_edge (body_bb, new_header, EDGE_FALLTHRU);
+      e3->flags = EDGE_FALSE_VALUE;
+      e3->probability = REG_BR_PROB_BASE / 8;
+      e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
+      e1->probability = REG_BR_PROB_BASE - e3->probability;
+
+      set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
+      set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
+
+      struct loop *loop = alloc_loop ();
+      loop->header = new_header;
+      loop->latch = e2->src;
+      add_loop (loop, body_bb->loop_father);
+    }
+  return cont_bb;
+}
+
+
 /* A subroutine of expand_omp_for.  Generate code for a parallel
    loop with any schedule.  Given parameters:
 
@@ -7226,6 +7316,8 @@ expand_omp_for_generic (struct omp_regio
   if (fd->collapse > 1)
     expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
 
+  cont_bb = expand_omp_for_ordered_loops (fd, cont_bb, l1_bb);
+
   if (!broken_loop)
     {
       /* Code to control the increment and predicate for the sequential
--- gcc/c/c-parser.c.jj 2015-07-01 12:50:49.000000000 +0200
+++ gcc/c/c-parser.c    2015-07-01 13:01:23.665521635 +0200
@@ -13279,20 +13279,24 @@ c_parser_omp_for_loop (location_t loc, c
   tree decl, cond, incr, save_break, save_cont, body, init, stmt, cl;
   tree declv, condv, incrv, initv, ret = NULL;
   bool fail = false, open_brace_parsed = false;
-  int i, collapse = 1, nbraces = 0;
+  int i, collapse = 1, ordered = 0, count, nbraces = 0;
   location_t for_loc;
   vec<tree, va_gc> *for_block = make_tree_vector ();
 
   for (cl = clauses; cl; cl = OMP_CLAUSE_CHAIN (cl))
     if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_COLLAPSE)
       collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (cl));
-
-  gcc_assert (collapse >= 1);
-
-  declv = make_tree_vec (collapse);
-  initv = make_tree_vec (collapse);
-  condv = make_tree_vec (collapse);
-  incrv = make_tree_vec (collapse);
+    else if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_ORDERED
+            && OMP_CLAUSE_ORDERED_EXPR (cl))
+      ordered = tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (cl));
+
+  gcc_assert (collapse >= 1 && ordered >= 0);
+  count = collapse + (ordered > 0 ? ordered - 1 : 0);
+
+  declv = make_tree_vec (count);
+  initv = make_tree_vec (count);
+  condv = make_tree_vec (count);
+  incrv = make_tree_vec (count);
 
   if (code != CILK_FOR
       && !c_parser_next_token_is_keyword (parser, RID_FOR))
@@ -13309,7 +13313,7 @@ c_parser_omp_for_loop (location_t loc, c
   for_loc = c_parser_peek_token (parser)->location;
   c_parser_consume_token (parser);
 
-  for (i = 0; i < collapse; i++)
+  for (i = 0; i < count; i++)
     {
       int bracecount = 0;
 
@@ -13418,7 +13422,7 @@ c_parser_omp_for_loop (location_t loc, c
        }
 
     parse_next:
-      if (i == collapse - 1)
+      if (i == count - 1)
        break;
 
       /* FIXME: OpenMP 3.0 draft isn't very clear on what exactly is allowed
@@ -13449,7 +13453,7 @@ c_parser_omp_for_loop (location_t loc, c
                  bracecount--;
                }
              fail = true;
-             collapse = 0;
+             count = 0;
              break;
            }
        }
@@ -13530,10 +13534,10 @@ c_parser_omp_for_loop (location_t loc, c
                  c = &OMP_CLAUSE_CHAIN (*c);
                else
                  {
-                   for (i = 0; i < collapse; i++)
+                   for (i = 0; i < count; i++)
                      if (TREE_VEC_ELT (declv, i) == OMP_CLAUSE_DECL (*c))
                        break;
-                   if (i == collapse)
+                   if (i == count)
                      c = &OMP_CLAUSE_CHAIN (*c);
                    else if (OMP_CLAUSE_CODE (*c) == OMP_CLAUSE_FIRSTPRIVATE)
                      {
--- gcc/cp/parser.c.jj  2015-07-01 12:50:50.000000000 +0200
+++ gcc/cp/parser.c     2015-07-02 09:29:23.766300769 +0200
@@ -30881,23 +30881,27 @@ cp_parser_omp_for_loop (cp_parser *parse
   tree this_pre_body, cl;
   location_t loc_first;
   bool collapse_err = false;
-  int i, collapse = 1, nbraces = 0;
+  int i, collapse = 1, ordered = 0, count, nbraces = 0;
   vec<tree, va_gc> *for_block = make_tree_vector ();
 
   for (cl = clauses; cl; cl = OMP_CLAUSE_CHAIN (cl))
     if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_COLLAPSE)
       collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (cl));
-
-  gcc_assert (collapse >= 1);
-
-  declv = make_tree_vec (collapse);
-  initv = make_tree_vec (collapse);
-  condv = make_tree_vec (collapse);
-  incrv = make_tree_vec (collapse);
+    else if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_ORDERED
+            && OMP_CLAUSE_ORDERED_EXPR (cl))
+      ordered = tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (cl));
+
+  gcc_assert (collapse >= 1 && ordered >= 0);
+  count = collapse + (ordered > 0 ? ordered - 1 : 0);
+
+  declv = make_tree_vec (count);
+  initv = make_tree_vec (count);
+  condv = make_tree_vec (count);
+  incrv = make_tree_vec (count);
 
   loc_first = cp_lexer_peek_token (parser->lexer)->location;
 
-  for (i = 0; i < collapse; i++)
+  for (i = 0; i < count; i++)
     {
       int bracecount = 0;
       tree add_private_clause = NULL_TREE;
@@ -31059,7 +31063,7 @@ cp_parser_omp_for_loop (cp_parser *parse
       TREE_VEC_ELT (condv, i) = cond;
       TREE_VEC_ELT (incrv, i) = incr;
 
-      if (i == collapse - 1)
+      if (i == count - 1)
        break;
 
       /* FIXME: OpenMP 3.0 draft isn't very clear on what exactly is allowed

        Jakub

Reply via email to