Re: #pragma GCC unroll support

Mike Stump Fri, 30 Jan 2015 08:29:56 -0800

On Jan 30, 2015, at 7:49 AM, Joseph Myers <[email protected]> wrote:
> Use error_at, and %u directly in the format.


Done.

Index: ada/gcc-interface/trans.c
===================================================================
--- ada/gcc-interface/trans.c   (revision 220084)
+++ ada/gcc-interface/trans.c   (working copy)
@@ -7870,17 +7870,20 @@ gnat_gimplify_stmt (tree *stmt_p)
          {
            /* Deal with the optimization hints.  */
            if (LOOP_STMT_IVDEP (stmt))
-             gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+             gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
                                 build_int_cst (integer_type_node,
-                                               annot_expr_ivdep_kind));
+                                               annot_expr_ivdep_kind),
+                                NULL_TREE);
            if (LOOP_STMT_NO_VECTOR (stmt))
-             gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+             gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
                                 build_int_cst (integer_type_node,
-                                               annot_expr_no_vector_kind));
+                                               annot_expr_no_vector_kind),
+                                NULL_TREE);
            if (LOOP_STMT_VECTOR (stmt))
-             gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+             gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
                                 build_int_cst (integer_type_node,
-                                               annot_expr_vector_kind));
+                                               annot_expr_vector_kind),
+                                NULL_TREE);
 
            gnu_cond
              = build3 (COND_EXPR, void_type_node, gnu_cond, NULL_TREE,
Index: c/c-parser.c
===================================================================
--- c/c-parser.c        (revision 220084)
+++ c/c-parser.c        (working copy)
@@ -1217,9 +1217,9 @@ static void c_parser_statement (c_parser
 static void c_parser_statement_after_labels (c_parser *);
 static void c_parser_if_statement (c_parser *);
 static void c_parser_switch_statement (c_parser *);
-static void c_parser_while_statement (c_parser *, bool);
-static void c_parser_do_statement (c_parser *, bool);
-static void c_parser_for_statement (c_parser *, bool);
+static void c_parser_while_statement (c_parser *, bool, unsigned short);
+static void c_parser_do_statement (c_parser *, bool, unsigned short);
+static void c_parser_for_statement (c_parser *, bool, unsigned short);
 static tree c_parser_asm_statement (c_parser *);
 static tree c_parser_asm_operands (c_parser *);
 static tree c_parser_asm_goto_operands (c_parser *);
@@ -4972,13 +4972,13 @@ c_parser_statement_after_labels (c_parse
          c_parser_switch_statement (parser);
          break;
        case RID_WHILE:
-         c_parser_while_statement (parser, false);
+         c_parser_while_statement (parser, false, 0);
          break;
        case RID_DO:
-         c_parser_do_statement (parser, false);
+         c_parser_do_statement (parser, false, 0);
          break;
        case RID_FOR:
-         c_parser_for_statement (parser, false);
+         c_parser_for_statement (parser, false, 0);
          break;
        case RID_CILK_FOR:
          if (!flag_cilkplus)
@@ -5340,7 +5340,7 @@ c_parser_switch_statement (c_parser *par
 */
 
 static void
-c_parser_while_statement (c_parser *parser, bool ivdep)
+c_parser_while_statement (c_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree block, cond, body, save_break, save_cont;
   location_t loc;
@@ -5354,9 +5354,15 @@ c_parser_while_statement (c_parser *pars
         "%<_Cilk_spawn%> statement cannot be used as a condition for while 
statement"))
     cond = error_mark_node;
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
                   build_int_cst (integer_type_node,
-                  annot_expr_ivdep_kind));
+                                 annot_expr_ivdep_kind),
+                  NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+                  build_int_cst (integer_type_node,
+                                 annot_expr_unroll_kind),
+                  build_int_cst (integer_type_node, unroll));
   save_break = c_break_label;
   c_break_label = NULL_TREE;
   save_cont = c_cont_label;
@@ -5375,7 +5381,7 @@ c_parser_while_statement (c_parser *pars
 */
 
 static void
-c_parser_do_statement (c_parser *parser, bool ivdep)
+c_parser_do_statement (c_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree block, cond, body, save_break, save_cont, new_break, new_cont;
   location_t loc;
@@ -5403,9 +5409,16 @@ c_parser_do_statement (c_parser *parser,
         "%<_Cilk_spawn%> statement cannot be used as a condition for a 
do-while statement"))
     cond = error_mark_node;
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+                  build_int_cst (integer_type_node,
+                                 annot_expr_ivdep_kind),
+                  NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
                   build_int_cst (integer_type_node,
-                  annot_expr_ivdep_kind));
+                                 annot_expr_unroll_kind),
+                  build_int_cst (integer_type_node,
+                                 unroll));
   if (!c_parser_require (parser, CPP_SEMICOLON, "expected %<;%>"))
     c_parser_skip_to_end_of_block_or_statement (parser);
   c_finish_loop (loc, cond, NULL, body, new_break, new_cont, false);
@@ -5469,7 +5482,7 @@ c_parser_do_statement (c_parser *parser,
 */
 
 static void
-c_parser_for_statement (c_parser *parser, bool ivdep)
+c_parser_for_statement (c_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree block, cond, incr, save_break, save_cont, body;
   /* The following are only used when parsing an ObjC foreach statement.  */
@@ -5587,6 +5600,12 @@ c_parser_for_statement (c_parser *parser
                                  "%<GCC ivdep%> pragma");
                  cond = error_mark_node;
                }
+             else if (unroll)
+               {
+                 c_parser_error (parser, "missing loop condition in loop with "
+                                 "%<GCC unroll%> pragma");
+                 cond = error_mark_node;
+               }
              else
                {
                  c_parser_consume_token (parser);
@@ -5604,9 +5623,15 @@ c_parser_for_statement (c_parser *parser
                                         "expected %<;%>");
            }
          if (ivdep && cond != error_mark_node)
-           cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+           cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+                          build_int_cst (integer_type_node,
+                                         annot_expr_ivdep_kind),
+                          NULL_TREE);
+         if (unroll && cond != error_mark_node)
+           cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
                           build_int_cst (integer_type_node,
-                          annot_expr_ivdep_kind));
+                                         annot_expr_unroll_kind),
+                          build_int_cst (integer_type_node, unroll));
        }
       /* Parse the increment expression (the third expression in a
         for-statement).  In the case of a foreach-statement, this is
@@ -9592,6 +9617,45 @@ c_parser_objc_at_dynamic_declaration (c_
 }
 
 
+static bool
+c_parse_pragma_ivdep (c_parser *parser)
+{
+  c_parser_consume_pragma (parser);
+  c_parser_skip_to_pragma_eol (parser);
+  return true;
+}
+
+static unsigned short
+c_parser_pragma_unroll (c_parser *parser)
+{
+  unsigned short unroll;
+  c_parser_consume_pragma (parser);
+  location_t location = c_parser_peek_token (parser)->location;
+  tree expr = c_parser_expr_no_commas (parser, NULL).value;
+  mark_exp_read (expr);
+  expr = c_fully_fold (expr, false, NULL);
+  HOST_WIDE_INT lunroll = 0;
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (expr))
+      || TREE_CODE (expr) != INTEGER_CST
+      || (lunroll = tree_to_shwi (expr)) < 0
+      || lunroll > USHRT_MAX)
+    {
+      error_at (location, "%<#pragma GCC unroll%> requires an"
+               " assignment-expression that evaluates to a non-negative"
+               " integral constant less than or equal to %u", USHRT_MAX);
+      unroll = 0;
+    }
+  else
+    {
+      unroll = (unsigned short) lunroll;
+      if (unroll == 0)
+       unroll = 1;
+    }
+
+  c_parser_skip_to_pragma_eol (parser);
+  return unroll;
+}
+
 /* Handle pragmas.  Some OpenMP pragmas are associated with, and therefore
    should be considered, statements.  ALLOW_STMT is true if we're within
    the context of a function and such pragmas are to be allowed.  Returns
@@ -9714,21 +9778,46 @@ c_parser_pragma (c_parser *parser, enum
       c_parser_omp_declare (parser, context);
       return false;
     case PRAGMA_IVDEP:
-      c_parser_consume_pragma (parser);
-      c_parser_skip_to_pragma_eol (parser);
-      if (!c_parser_next_token_is_keyword (parser, RID_FOR)
-         && !c_parser_next_token_is_keyword (parser, RID_WHILE)
-         && !c_parser_next_token_is_keyword (parser, RID_DO))
-       {
-         c_parser_error (parser, "for, while or do statement expected");
-         return false;
-       }
-      if (c_parser_next_token_is_keyword (parser, RID_FOR))
-       c_parser_for_statement (parser, true);
-      else if (c_parser_next_token_is_keyword (parser, RID_WHILE))
-       c_parser_while_statement (parser, true);
-      else
-       c_parser_do_statement (parser, true);
+      {
+       bool ivdep = c_parse_pragma_ivdep (parser);
+       unsigned short unroll = 0;
+       if (c_parser_peek_token (parser)->pragma_kind == PRAGMA_UNROLL)
+         unroll = c_parser_pragma_unroll (parser);
+       if (!c_parser_next_token_is_keyword (parser, RID_FOR)
+           && !c_parser_next_token_is_keyword (parser, RID_WHILE)
+           && !c_parser_next_token_is_keyword (parser, RID_DO))
+         {
+           c_parser_error (parser, "for, while or do statement expected");
+           return false;
+         }
+       if (c_parser_next_token_is_keyword (parser, RID_FOR))
+         c_parser_for_statement (parser, ivdep, unroll);
+       else if (c_parser_next_token_is_keyword (parser, RID_WHILE))
+         c_parser_while_statement (parser, ivdep, unroll);
+       else
+         c_parser_do_statement (parser, ivdep, unroll);
+      }
+      return false;
+    case PRAGMA_UNROLL:
+      {
+       unsigned short unroll = c_parser_pragma_unroll (parser);
+       bool ivdep = false;
+       if (c_parser_peek_token (parser)->pragma_kind == PRAGMA_IVDEP)
+         ivdep = c_parse_pragma_ivdep (parser);
+       if (!c_parser_next_token_is_keyword (parser, RID_FOR)
+           && !c_parser_next_token_is_keyword (parser, RID_WHILE)
+           && !c_parser_next_token_is_keyword (parser, RID_DO))
+         {
+           c_parser_error (parser, "for, while or do statement expected");
+           return false;
+         }
+       if (c_parser_next_token_is_keyword (parser, RID_FOR))
+         c_parser_for_statement (parser, ivdep, unroll);
+       else if (c_parser_next_token_is_keyword (parser, RID_WHILE))
+         c_parser_while_statement (parser, ivdep, unroll);
+       else
+         c_parser_do_statement (parser, ivdep, unroll);
+      }
       return false;
 
     case PRAGMA_GCC_PCH_PREPROCESS:
Index: c-family/c-pragma.c
===================================================================
--- c-family/c-pragma.c (revision 220084)
+++ c-family/c-pragma.c (working copy)
@@ -1456,6 +1456,10 @@ init_pragma (void)
     cpp_register_deferred_pragma (parse_in, "GCC", "ivdep", PRAGMA_IVDEP, 
false,
                                  false);
 
+  if (!flag_preprocess_only)
+    cpp_register_deferred_pragma (parse_in, "GCC", "unroll", PRAGMA_UNROLL, 
false,
+                                 false);
+
   if (flag_cilkplus && !flag_preprocess_only)
     cpp_register_deferred_pragma (parse_in, "cilk", "grainsize",
                                  PRAGMA_CILK_GRAINSIZE, true, false);
Index: c-family/c-pragma.h
===================================================================
--- c-family/c-pragma.h (revision 220084)
+++ c-family/c-pragma.h (working copy)
@@ -69,6 +69,7 @@ typedef enum pragma_kind {
 
   PRAGMA_GCC_PCH_PREPROCESS,
   PRAGMA_IVDEP,
+  PRAGMA_UNROLL,
 
   PRAGMA_FIRST_EXTERNAL
 } pragma_kind;
Index: cfgloop.h
===================================================================
--- cfgloop.h   (revision 220084)
+++ cfgloop.h   (working copy)
@@ -189,6 +189,11 @@ struct GTY ((chain_next ("%h.next"))) lo
      of the loop can be safely evaluated concurrently.  */
   int safelen;
 
+  /* The number of times to unroll the loop.  0, means no information
+     given, just do what we always do.  A value of 1, means don't unroll
+     the loop.  */
+  unsigned short unroll;
+
   /* True if this loop should never be vectorized.  */
   bool dont_vectorize;
 
Index: cfgloopmanip.c
===================================================================
--- cfgloopmanip.c      (revision 220084)
+++ cfgloopmanip.c      (working copy)
@@ -1038,6 +1038,7 @@ copy_loop_info (struct loop *loop, struc
   target->estimate_state = loop->estimate_state;
   target->warned_aggressive_loop_optimizations
     |= loop->warned_aggressive_loop_optimizations;
+  target->unroll = loop->unroll;
 }
 
 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
Index: cp/cp-array-notation.c
===================================================================
--- cp/cp-array-notation.c      (revision 220084)
+++ cp/cp-array-notation.c      (working copy)
@@ -81,7 +81,7 @@ create_an_loop (tree init, tree cond, tr
   finish_expr_stmt (init);
   for_stmt = begin_for_stmt (NULL_TREE, NULL_TREE);
   finish_for_init_stmt (for_stmt);
-  finish_for_cond (cond, for_stmt, false);
+  finish_for_cond (cond, for_stmt, false, 0);
   finish_for_expr (incr, for_stmt);
   finish_expr_stmt (body);
   finish_for_stmt (for_stmt);
Index: cp/cp-tree.h
===================================================================
--- cp/cp-tree.h        (revision 220084)
+++ cp/cp-tree.h        (working copy)
@@ -5644,7 +5644,7 @@ extern tree implicitly_declare_fn
 extern bool maybe_clone_body                   (tree);
 
 /* In parser.c */
-extern tree cp_convert_range_for (tree, tree, tree, bool);
+extern tree cp_convert_range_for (tree, tree, tree, bool, unsigned short);
 extern bool parsing_nsdmi (void);
 extern void inject_this_parameter (tree, cp_cv_quals);
 
@@ -5880,16 +5880,16 @@ extern void begin_else_clause                   (tree);
 extern void finish_else_clause                 (tree);
 extern void finish_if_stmt                     (tree);
 extern tree begin_while_stmt                   (void);
-extern void finish_while_stmt_cond             (tree, tree, bool);
+extern void finish_while_stmt_cond             (tree, tree, bool, unsigned 
short);
 extern void finish_while_stmt                  (tree);
 extern tree begin_do_stmt                      (void);
 extern void finish_do_body                     (tree);
-extern void finish_do_stmt                     (tree, tree, bool);
+extern void finish_do_stmt                     (tree, tree, bool, unsigned 
short);
 extern tree finish_return_stmt                 (tree);
 extern tree begin_for_scope                    (tree *);
 extern tree begin_for_stmt                     (tree, tree);
 extern void finish_for_init_stmt               (tree);
-extern void finish_for_cond                    (tree, tree, bool);
+extern void finish_for_cond                    (tree, tree, bool, unsigned 
short);
 extern void finish_for_expr                    (tree, tree);
 extern void finish_for_stmt                    (tree);
 extern tree begin_range_for_stmt               (tree, tree);
Index: cp/init.c
===================================================================
--- cp/init.c   (revision 220084)
+++ cp/init.c   (working copy)
@@ -3693,7 +3693,7 @@ build_vec_init (tree base, tree maxindex
       finish_for_init_stmt (for_stmt);
       finish_for_cond (build2 (NE_EXPR, boolean_type_node, iterator,
                               build_int_cst (TREE_TYPE (iterator), -1)),
-                      for_stmt, false);
+                      for_stmt, false, 0);
       elt_init = cp_build_unary_op (PREDECREMENT_EXPR, iterator, 0,
                                    complain);
       if (elt_init == error_mark_node)
Index: cp/parser.c
===================================================================
--- cp/parser.c (revision 220084)
+++ cp/parser.c (working copy)
@@ -2044,15 +2044,15 @@ static tree cp_parser_selection_statemen
 static tree cp_parser_condition
   (cp_parser *);
 static tree cp_parser_iteration_statement
-  (cp_parser *, bool);
+  (cp_parser *, bool, unsigned short);
 static bool cp_parser_for_init_statement
   (cp_parser *, tree *decl);
 static tree cp_parser_for
-  (cp_parser *, bool);
+  (cp_parser *, bool, unsigned short);
 static tree cp_parser_c_for
-  (cp_parser *, tree, tree, bool);
+  (cp_parser *, tree, tree, bool, unsigned short);
 static tree cp_parser_range_for
-  (cp_parser *, tree, tree, tree, bool);
+  (cp_parser *, tree, tree, tree, bool, unsigned short);
 static void do_range_for_auto_deduction
   (tree, tree);
 static tree cp_parser_perform_range_for_lookup
@@ -9698,7 +9698,7 @@ cp_parser_statement (cp_parser* parser,
        case RID_WHILE:
        case RID_DO:
        case RID_FOR:
-         statement = cp_parser_iteration_statement (parser, false);
+         statement = cp_parser_iteration_statement (parser, false, 0);
          break;
 
        case RID_CILK_FOR:
@@ -10390,7 +10390,7 @@ cp_parser_condition (cp_parser* parser)
    not included. */
 
 static tree
-cp_parser_for (cp_parser *parser, bool ivdep)
+cp_parser_for (cp_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree init, scope, decl;
   bool is_range_for;
@@ -10402,13 +10402,14 @@ cp_parser_for (cp_parser *parser, bool i
   is_range_for = cp_parser_for_init_statement (parser, &decl);
 
   if (is_range_for)
-    return cp_parser_range_for (parser, scope, init, decl, ivdep);
+    return cp_parser_range_for (parser, scope, init, decl, ivdep, unroll);
   else
-    return cp_parser_c_for (parser, scope, init, ivdep);
+    return cp_parser_c_for (parser, scope, init, ivdep, unroll);
 }
 
 static tree
-cp_parser_c_for (cp_parser *parser, tree scope, tree init, bool ivdep)
+cp_parser_c_for (cp_parser *parser, tree scope, tree init, bool ivdep,
+                unsigned short unroll)
 {
   /* Normal for loop */
   tree condition = NULL_TREE;
@@ -10429,7 +10430,13 @@ cp_parser_c_for (cp_parser *parser, tree
                       "%<GCC ivdep%> pragma");
       condition = error_mark_node;
     }
-  finish_for_cond (condition, stmt, ivdep);
+  else if (unroll)
+    {
+      cp_parser_error (parser, "missing loop condition in loop with "
+                      "%<GCC unroll%> pragma");
+      condition = error_mark_node;
+    }
+  finish_for_cond (condition, stmt, ivdep, unroll);
   /* Look for the `;'.  */
   cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
 
@@ -10453,7 +10460,7 @@ cp_parser_c_for (cp_parser *parser, tree
 
 static tree
 cp_parser_range_for (cp_parser *parser, tree scope, tree init, tree range_decl,
-                    bool ivdep)
+                    bool ivdep, unsigned short unroll)
 {
   tree stmt, range_expr;
 
@@ -10474,6 +10481,8 @@ cp_parser_range_for (cp_parser *parser,
       stmt = begin_range_for_stmt (scope, init);
       if (ivdep)
        RANGE_FOR_IVDEP (stmt) = 1;
+      if (unroll)
+       /* TODO */(void)0;
       finish_range_for_decl (stmt, range_decl, range_expr);
       if (!type_dependent_expression_p (range_expr)
          /* do_auto_deduction doesn't mess with template init-lists.  */
@@ -10483,7 +10492,7 @@ cp_parser_range_for (cp_parser *parser,
   else
     {
       stmt = begin_for_stmt (scope, init);
-      stmt = cp_convert_range_for (stmt, range_decl, range_expr, ivdep);
+      stmt = cp_convert_range_for (stmt, range_decl, range_expr, ivdep, 
unroll);
     }
   return stmt;
 }
@@ -10575,7 +10584,7 @@ do_range_for_auto_deduction (tree decl,
 
 tree
 cp_convert_range_for (tree statement, tree range_decl, tree range_expr,
-                     bool ivdep)
+                     bool ivdep, unsigned short unroll)
 {
   tree begin, end;
   tree iter_type, begin_expr, end_expr;
@@ -10632,7 +10641,7 @@ cp_convert_range_for (tree statement, tr
                                 begin, ERROR_MARK,
                                 end, ERROR_MARK,
                                 NULL, tf_warning_or_error);
-  finish_for_cond (condition, statement, ivdep);
+  finish_for_cond (condition, statement, ivdep, unroll);
 
   /* The new increment expression.  */
   expression = finish_unary_op_expr (input_location,
@@ -10793,7 +10802,8 @@ cp_parser_range_for_member_function (tre
    Returns the new WHILE_STMT, DO_STMT, FOR_STMT or RANGE_FOR_STMT.  */
 
 static tree
-cp_parser_iteration_statement (cp_parser* parser, bool ivdep)
+cp_parser_iteration_statement (cp_parser* parser, bool ivdep,
+                              unsigned short unroll)
 {
   cp_token *token;
   enum rid keyword;
@@ -10823,7 +10833,7 @@ cp_parser_iteration_statement (cp_parser
        cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN);
        /* Parse the condition.  */
        condition = cp_parser_condition (parser);
-       finish_while_stmt_cond (condition, statement, ivdep);
+       finish_while_stmt_cond (condition, statement, ivdep, unroll);
        /* Look for the `)'.  */
        cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
        /* Parse the dependent statement.  */
@@ -10853,7 +10863,7 @@ cp_parser_iteration_statement (cp_parser
        /* Parse the expression.  */
        expression = cp_parser_expression (parser);
        /* We're done with the do-statement.  */
-       finish_do_stmt (expression, statement, ivdep);
+       finish_do_stmt (expression, statement, ivdep, unroll);
        /* Look for the `)'.  */
        cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
        /* Look for the `;'.  */
@@ -10866,7 +10876,7 @@ cp_parser_iteration_statement (cp_parser
        /* Look for the `('.  */
        cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN);
 
-       statement = cp_parser_for (parser, ivdep);
+       statement = cp_parser_for (parser, ivdep, unroll);
 
        /* Look for the `)'.  */
        cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
@@ -32901,6 +32911,41 @@ cp_parser_cilk_grainsize (cp_parser *par
   cp_parser_skip_to_pragma_eol (parser, pragma_tok);
 }
 
+static bool
+cp_parser_pragma_ivdep (cp_parser *parser, cp_token *pragma_tok)
+{
+  cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+  return true;
+}
+
+static unsigned short
+cp_parser_pragma_unroll (cp_parser *parser, cp_token *pragma_tok)
+{
+  location_t location = cp_lexer_peek_token (parser->lexer)->location;
+  tree expr = cp_parser_constant_expression (parser);
+  unsigned short unroll;
+  expr = maybe_constant_value (expr);
+  cp_parser_require_pragma_eol (parser, pragma_tok);
+  HOST_WIDE_INT lunroll = 0;
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (expr))
+      || TREE_CODE (expr) != INTEGER_CST
+      || (lunroll = tree_to_shwi (expr)) < 0
+      || lunroll > USHRT_MAX)
+    {
+      error_at (location, "%<#pragma GCC unroll%> requires an"
+               " assignment-expression that evaluates to a non-negative"
+               " integral constant less than or equal to %u", USHRT_MAX);
+      unroll = 0;
+    }
+  else
+    {
+      unroll = (unsigned short) lunroll;
+      if (unroll == 0)
+       unroll = 1;
+    }
+  return unroll;
+}
+
 /* Normal parsing of a pragma token.  Here we can (and must) use the
    regular lexer.  */
 
@@ -33068,9 +33113,39 @@ cp_parser_pragma (cp_parser *parser, enu
 
     case PRAGMA_IVDEP:
       {
-       cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+       bool ivdep = cp_parser_pragma_ivdep (parser, pragma_tok);
+       unsigned short unroll = 0;
        cp_token *tok;
        tok = cp_lexer_peek_token (the_parser->lexer);
+       if (tok->type == CPP_PRAGMA &&
+           tok->pragma_kind == PRAGMA_UNROLL)
+         {
+           unroll = cp_parser_pragma_unroll (parser, pragma_tok);
+           tok = cp_lexer_peek_token (the_parser->lexer);
+         }
+       if (tok->type != CPP_KEYWORD
+           || (tok->keyword != RID_FOR && tok->keyword != RID_WHILE
+               && tok->keyword != RID_DO))
+         {
+           cp_parser_error (parser, "for, while or do statement expected");
+           return false;
+         }
+       cp_parser_iteration_statement (parser, ivdep, unroll);
+       return true;
+      }
+
+    case PRAGMA_UNROLL:
+      {
+       unsigned short unroll = cp_parser_pragma_unroll (parser, pragma_tok);
+       bool ivdep = false;
+       cp_token *tok;
+       tok = cp_lexer_peek_token (the_parser->lexer);
+       if (tok->type == CPP_PRAGMA &&
+           tok->pragma_kind == PRAGMA_IVDEP)
+         {
+           ivdep = cp_parser_pragma_ivdep (parser, tok);
+           tok = cp_lexer_peek_token (the_parser->lexer);
+         }
        if (tok->type != CPP_KEYWORD
            || (tok->keyword != RID_FOR && tok->keyword != RID_WHILE
                && tok->keyword != RID_DO))
@@ -33078,7 +33153,7 @@ cp_parser_pragma (cp_parser *parser, enu
            cp_parser_error (parser, "for, while or do statement expected");
            return false;
          }
-       cp_parser_iteration_statement (parser, true);
+       cp_parser_iteration_statement (parser, ivdep, unroll);
        return true;
       }
 
Index: cp/pt.c
===================================================================
--- cp/pt.c     (revision 220084)
+++ cp/pt.c     (working copy)
@@ -13886,7 +13886,7 @@ tsubst_expr (tree t, tree args, tsubst_f
       RECUR (FOR_INIT_STMT (t));
       finish_for_init_stmt (stmt);
       tmp = RECUR (FOR_COND (t));
-      finish_for_cond (tmp, stmt, false);
+      finish_for_cond (tmp, stmt, false, 0);
       tmp = RECUR (FOR_EXPR (t));
       finish_for_expr (tmp, stmt);
       RECUR (FOR_BODY (t));
@@ -13901,7 +13901,7 @@ tsubst_expr (tree t, tree args, tsubst_f
         decl = tsubst (decl, args, complain, in_decl);
         maybe_push_decl (decl);
         expr = RECUR (RANGE_FOR_EXPR (t));
-        stmt = cp_convert_range_for (stmt, decl, expr, RANGE_FOR_IVDEP (t));
+        stmt = cp_convert_range_for (stmt, decl, expr, RANGE_FOR_IVDEP (t), 0);
         RECUR (RANGE_FOR_BODY (t));
         finish_for_stmt (stmt);
       }
@@ -13910,7 +13910,7 @@ tsubst_expr (tree t, tree args, tsubst_f
     case WHILE_STMT:
       stmt = begin_while_stmt ();
       tmp = RECUR (WHILE_COND (t));
-      finish_while_stmt_cond (tmp, stmt, false);
+      finish_while_stmt_cond (tmp, stmt, false, 0);
       RECUR (WHILE_BODY (t));
       finish_while_stmt (stmt);
       break;
@@ -13920,7 +13920,7 @@ tsubst_expr (tree t, tree args, tsubst_f
       RECUR (DO_BODY (t));
       finish_do_body (stmt);
       tmp = RECUR (DO_COND (t));
-      finish_do_stmt (tmp, stmt, false);
+      finish_do_stmt (tmp, stmt, false, 0);
       break;
 
     case IF_STMT:
@@ -14358,8 +14358,10 @@ tsubst_expr (tree t, tree args, tsubst_f
 
     case ANNOTATE_EXPR:
       tmp = RECUR (TREE_OPERAND (t, 0));
-      RETURN (build2_loc (EXPR_LOCATION (t), ANNOTATE_EXPR,
-                         TREE_TYPE (tmp), tmp, RECUR (TREE_OPERAND (t, 1))));
+      RETURN (build3_loc (EXPR_LOCATION (t), ANNOTATE_EXPR,
+                         TREE_TYPE (tmp), tmp,
+                         RECUR (TREE_OPERAND (t, 1)),
+                         RECUR (TREE_OPERAND (t, 2))));
 
     default:
       gcc_assert (!STATEMENT_CODE_P (TREE_CODE (t)));
Index: cp/semantics.c
===================================================================
--- cp/semantics.c      (revision 220084)
+++ cp/semantics.c      (working copy)
@@ -802,7 +802,8 @@ begin_while_stmt (void)
    WHILE_STMT.  */
 
 void
-finish_while_stmt_cond (tree cond, tree while_stmt, bool ivdep)
+finish_while_stmt_cond (tree cond, tree while_stmt, bool ivdep,
+                       unsigned short unroll)
 {
   if (check_no_cilk (cond,
       "Cilk array notation cannot be used as a condition for while statement",
@@ -812,11 +813,19 @@ finish_while_stmt_cond (tree cond, tree
   finish_cond (&WHILE_COND (while_stmt), cond);
   begin_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    WHILE_COND (while_stmt) = build2 (ANNOTATE_EXPR,
+    WHILE_COND (while_stmt) = build3 (ANNOTATE_EXPR,
                                      TREE_TYPE (WHILE_COND (while_stmt)),
                                      WHILE_COND (while_stmt),
                                      build_int_cst (integer_type_node,
-                                                    annot_expr_ivdep_kind));
+                                                    annot_expr_ivdep_kind),
+                                     NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    WHILE_COND (while_stmt) = build3 (ANNOTATE_EXPR,
+                                     TREE_TYPE (WHILE_COND (while_stmt)),
+                                     WHILE_COND (while_stmt),
+                                     build_int_cst (integer_type_node,
+                                                    annot_expr_unroll_kind),
+                                     build_int_cst (integer_type_node, 
unroll));
   simplify_loop_decl_cond (&WHILE_COND (while_stmt), WHILE_BODY (while_stmt));
 }
 
@@ -861,7 +870,7 @@ finish_do_body (tree do_stmt)
    COND is as indicated.  */
 
 void
-finish_do_stmt (tree cond, tree do_stmt, bool ivdep)
+finish_do_stmt (tree cond, tree do_stmt, bool ivdep, unsigned short unroll)
 {
   if (check_no_cilk (cond,
   "Cilk array notation cannot be used as a condition for a do-while statement",
@@ -870,8 +879,13 @@ finish_do_stmt (tree cond, tree do_stmt,
   cond = maybe_convert_cond (cond);
   end_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
-                  build_int_cst (integer_type_node, annot_expr_ivdep_kind));
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+                  build_int_cst (integer_type_node, annot_expr_ivdep_kind),
+                  NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+                  build_int_cst (integer_type_node, annot_expr_unroll_kind),
+                  build_int_cst (integer_type_node, unroll));
   DO_COND (do_stmt) = cond;
 }
 
@@ -974,7 +988,7 @@ finish_for_init_stmt (tree for_stmt)
    FOR_STMT.  */
 
 void
-finish_for_cond (tree cond, tree for_stmt, bool ivdep)
+finish_for_cond (tree cond, tree for_stmt, bool ivdep, unsigned short unroll)
 {
   if (check_no_cilk (cond,
         "Cilk array notation cannot be used in a condition for a for-loop",
@@ -984,11 +998,20 @@ finish_for_cond (tree cond, tree for_stm
   finish_cond (&FOR_COND (for_stmt), cond);
   begin_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    FOR_COND (for_stmt) = build2 (ANNOTATE_EXPR,
+    FOR_COND (for_stmt) = build3 (ANNOTATE_EXPR,
                                  TREE_TYPE (FOR_COND (for_stmt)),
                                  FOR_COND (for_stmt),
                                  build_int_cst (integer_type_node,
-                                                annot_expr_ivdep_kind));
+                                                annot_expr_ivdep_kind),
+                                 NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    FOR_COND (for_stmt) = build3 (ANNOTATE_EXPR,
+                                 TREE_TYPE (FOR_COND (for_stmt)),
+                                 FOR_COND (for_stmt),
+                                 build_int_cst (integer_type_node,
+                                                annot_expr_unroll_kind),
+                                 build_int_cst (integer_type_node,
+                                                unroll));
   simplify_loop_decl_cond (&FOR_COND (for_stmt), FOR_BODY (for_stmt));
 }
 
Index: doc/extend.texi
===================================================================
--- doc/extend.texi     (revision 220084)
+++ doc/extend.texi     (working copy)
@@ -17881,6 +17881,18 @@ void ignore_vec_dep (int *a, int k, int
 @}
 @end smallexample
 
+@table @code
+@item #pragma GCC unroll @var{n}
+@cindex pragma GCC unroll @var{n}
+
+With this pragma, the programmer informs the optimizer how many times
+a loop should be unrolled.  A 0 or 1 informs the compiler to not
+perform any loop unrolling.  The pragma must be immediately before
+@samp{#pragma ivdep} or a @code{for}, @code{while} or @code{do} loop
+and applies only to the loop that follows.  @var{n} is an
+assignment-expression that evaluates to an integer constant.
+
+@end table
 
 @node Unnamed Fields
 @section Unnamed struct/union fields within structs/unions
Index: fortran/trans-stmt.c
===================================================================
--- fortran/trans-stmt.c        (revision 220084)
+++ fortran/trans-stmt.c        (working copy)
@@ -2876,9 +2876,10 @@ gfc_trans_forall_loop (forall_info *fora
       cond = fold_build2_loc (input_location, LE_EXPR, boolean_type_node,
                              count, build_int_cst (TREE_TYPE (count), 0));
       if (forall_tmp->do_concurrent)
-       cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+       cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
                       build_int_cst (integer_type_node,
-                                     annot_expr_ivdep_kind));
+                                     annot_expr_ivdep_kind),
+                      NULL_TREE);
 
       tmp = build1_v (GOTO_EXPR, exit_label);
       tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node,
Index: function.h
===================================================================
--- function.h  (revision 220084)
+++ function.h  (working copy)
@@ -670,6 +670,10 @@ struct GTY(()) function {
 
   /* Set when the tail call has been identified.  */
   unsigned int tail_call_marked : 1;
+
+  /* Set when #pragma unroll has been used in the body.  Used by rtl
+     unrolling to know when to perform unrolling in the function.  */
+  unsigned int has_unroll : 1;
 };
 
 /* Add the decl D to the local_decls list of FUN.  */
Index: gimple-low.c
===================================================================
--- gimple-low.c        (revision 220084)
+++ gimple-low.c        (working copy)
@@ -347,7 +347,7 @@ lower_stmt (gimple_stmt_iterator *gsi, s
        for (i = 0; i < gimple_call_num_args (stmt); i++)
          {
            tree arg = gimple_call_arg (stmt, i);
-           if (EXPR_P (arg))
+           if (arg && EXPR_P (arg))
              TREE_SET_BLOCK (arg, data->block);
          }
 
Index: gimple-walk.c
===================================================================
--- gimple-walk.c       (revision 220084)
+++ gimple-walk.c       (working copy)
@@ -261,7 +261,7 @@ walk_gimple_op (gimple stmt, walk_tree_f
 
       for (i = 0; i < gimple_call_num_args (stmt); i++)
        {
-         if (wi)
+         if (wi && gimple_call_arg (stmt, i))
            wi->val_only
              = is_gimple_reg_type (TREE_TYPE (gimple_call_arg (stmt, i)));
          ret = walk_tree (gimple_call_arg_ptr (stmt, i), callback_op, wi,
Index: gimplify.c
===================================================================
--- gimplify.c  (revision 220084)
+++ gimplify.c  (working copy)
@@ -2908,6 +2908,9 @@ gimple_boolify (tree expr)
     case ANNOTATE_EXPR:
       switch ((enum annot_expr_kind) TREE_INT_CST_LOW (TREE_OPERAND (expr, 1)))
        {
+       case annot_expr_unroll_kind:
+         cfun->has_unroll = 1;
+         /* fall-through */
        case annot_expr_ivdep_kind:
        case annot_expr_no_vector_kind:
        case annot_expr_vector_kind:
@@ -7947,6 +7950,7 @@ gimplify_expr (tree *expr_p, gimple_seq
          {
            tree cond = TREE_OPERAND (*expr_p, 0);
            tree kind = TREE_OPERAND (*expr_p, 1);
+           tree data = TREE_OPERAND (*expr_p, 2);
            tree type = TREE_TYPE (cond);
            if (!INTEGRAL_TYPE_P (type))
              {
@@ -7957,7 +7961,7 @@ gimplify_expr (tree *expr_p, gimple_seq
            tree tmp = create_tmp_var (type);
            gimplify_arg (&cond, pre_p, EXPR_LOCATION (*expr_p));
            gcall *call
-             = gimple_build_call_internal (IFN_ANNOTATE, 2, cond, kind);
+             = gimple_build_call_internal (IFN_ANNOTATE, 3, cond, kind, data);
            gimple_call_set_lhs (call, tmp);
            gimplify_seq_add_stmt (pre_p, call);
            *expr_p = tmp;
Index: loop-init.c
===================================================================
--- loop-init.c (revision 220084)
+++ loop-init.c (working copy)
@@ -375,6 +375,7 @@ pass_loop2::gate (function *fun)
       && (flag_move_loop_invariants
          || flag_unswitch_loops
          || flag_unroll_loops
+         || cfun->has_unroll
 #ifdef HAVE_doloop_end
          || (flag_branch_on_count_reg && HAVE_doloop_end)
 #endif
@@ -576,7 +577,8 @@ public:
   /* opt_pass methods: */
   virtual bool gate (function *)
     {
-      return (flag_peel_loops || flag_unroll_loops || flag_unroll_all_loops);
+      return (flag_peel_loops || flag_unroll_loops || flag_unroll_all_loops
+             || cfun->has_unroll);
     }
 
   virtual unsigned int execute (function *);
Index: loop-unroll.c
===================================================================
--- loop-unroll.c       (revision 220084)
+++ loop-unroll.c       (working copy)
@@ -243,16 +243,26 @@ report_unroll (struct loop *loop, locati
 
 /* Decide whether unroll loops and how much.  */
 static void
-decide_unrolling (int flags)
+decide_unrolling (int base_flags)
 {
   struct loop *loop;
 
   /* Scan the loops, inner ones first.  */
   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
     {
+      int flags = base_flags;
+      if (loop->unroll > 1)
+       flags = UAP_UNROLL | UAP_UNROLL_ALL;
       loop->lpt_decision.decision = LPT_NONE;
       location_t locus = get_loop_location (loop);
 
+      if (loop->unroll == 1)
+       {
+         dump_printf_loc (TDF_RTL, locus,
+                          "not unrolling loop, user didn't want it 
unrolled\n");
+         continue;
+       }
+
       if (dump_enabled_p ())
        dump_printf_loc (TDF_RTL, locus,
                          ";; *** Considering loop %d at BB %d for "
@@ -422,6 +432,19 @@ decide_unroll_constant_iterations (struc
       return;
     }
 
+  if (loop->unroll)
+    {
+      loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
+      loop->lpt_decision.times = loop->unroll - 1;
+      if (loop->lpt_decision.times > desc->niter - 2)
+       {
+         /* They won't do this for us.  */
+         loop->lpt_decision.decision = LPT_NONE;
+         loop->lpt_decision.times = desc->niter - 2;
+       }
+      return;
+    }
+
   /* Check whether the loop rolls enough to consider.  
      Consult also loop bounds and profile; in the case the loop has more
      than one exit it may well loop less than determined maximal number
@@ -443,7 +466,7 @@ decide_unroll_constant_iterations (struc
   best_copies = 2 * nunroll + 10;
 
   i = 2 * nunroll + 2;
-  if (i - 1 >= desc->niter)
+  if (i > desc->niter - 2)
     i = desc->niter - 2;
 
   for (; i >= nunroll - 1; i--)
@@ -695,6 +718,9 @@ decide_unroll_runtime_iterations (struct
   if (targetm.loop_unroll_adjust)
     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 
+  if (loop->unroll)
+    nunroll = loop->unroll;
+
   /* Skip big loops.  */
   if (nunroll <= 1)
     {
@@ -733,8 +759,9 @@ decide_unroll_runtime_iterations (struct
       return;
     }
 
-  /* Success; now force nunroll to be power of 2, as we are unable to
-     cope with overflows in computation of number of iterations.  */
+  /* Success; now force nunroll to be power of 2, as code-gen
+     requires it, we are unable to cope with overflows in
+     computation of number of iterations.  */
   for (i = 1; 2 * i <= nunroll; i *= 2)
     continue;
 
@@ -843,9 +870,10 @@ compare_and_jump_seq (rtx op0, rtx op1,
   return seq;
 }
 
-/* Unroll LOOP for which we are able to count number of iterations in runtime
-   LOOP->LPT_DECISION.TIMES times.  The transformation does this (with some
-   extra care for case n < 0):
+/* Unroll LOOP for which we are able to count number of iterations in
+   runtime LOOP->LPT_DECISION.TIMES times.  The times value must be a
+   power of two.  The transformation does this (with some extra care
+   for case n < 0):
 
    for (i = 0; i < n; i++)
      body;
@@ -1142,6 +1170,9 @@ decide_unroll_stupid (struct loop *loop,
   if (targetm.loop_unroll_adjust)
     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 
+  if (loop->unroll)
+    nunroll = loop->unroll;
+
   /* Skip big loops.  */
   if (nunroll <= 1)
     {
Index: lto-streamer-in.c
===================================================================
--- lto-streamer-in.c   (revision 220084)
+++ lto-streamer-in.c   (working copy)
@@ -751,6 +751,7 @@ input_cfg (struct lto_input_block *ib, s
 
       /* Read OMP SIMD related info.  */
       loop->safelen = streamer_read_hwi (ib);
+      loop->unroll = streamer_read_hwi (ib);
       loop->dont_vectorize = streamer_read_hwi (ib);
       loop->force_vectorize = streamer_read_hwi (ib);
       loop->simduid = stream_read_tree (ib, data_in);
Index: lto-streamer-out.c
===================================================================
--- lto-streamer-out.c  (revision 220084)
+++ lto-streamer-out.c  (working copy)
@@ -1884,6 +1884,7 @@ output_cfg (struct output_block *ob, str
 
       /* Write OMP SIMD related info.  */
       streamer_write_hwi (ob, loop->safelen);
+      streamer_write_hwi (ob, loop->unroll);
       streamer_write_hwi (ob, loop->dont_vectorize);
       streamer_write_hwi (ob, loop->force_vectorize);
       stream_write_tree (ob, loop->simduid, true);
Index: testsuite/c-c++-common/unroll-1.c
===================================================================
--- testsuite/c-c++-common/unroll-1.c   (revision 0)
+++ testsuite/c-c++-common/unroll-1.c   (working copy)
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdisable-tree-cunroll -fdump-rtl-loop2_unroll 
-fdump-tree-cunrolli-details" } */
+
+void bar(int);
+
+int j;
+
+void test1()
+{
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "loop with 9 iterations completely unrolled" 
"cunrolli" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "21:\(5|11\): note: loop unrolled 7 times" 
"loop2_unroll" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "26:\(5|11\): note: loop unrolled 3 times" 
"loop2_unroll" } } */
+  #pragma GCC unroll 7
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "3\[31\]:3: note: loop unrolled 2 times" 
"loop2_unroll" } } */
+  i = 0;
+  #pragma GCC unroll 3
+  do {
+    bar(i);
+  } while (++i < 9);
+
+  #pragma GCC unroll 4+4
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+}
+
+/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
Index: testsuite/c-c++-common/unroll-2.c
===================================================================
--- testsuite/c-c++-common/unroll-2.c   (revision 0)
+++ testsuite/c-c++-common/unroll-2.c   (working copy)
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */
+
+void bar(int);
+
+int j;
+
+void test1()
+{
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "15:\[0-9\]*: note: loop turned into 
non-loop; it never loops"  "cunrolli" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "\(19|21\):\(5|11\): note: loop unrolled 7 
times" "loop2_unroll" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "26:\[0-9\]*: note: loop unrolled 3 times" 
"loop2_unroll" } } */
+  #pragma GCC unroll 7
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "3\[13\]:\[0-9\]*: note: loop unrolled 2 
times" "loop2_unroll" } } */
+  i = 0;
+  #pragma GCC unroll 3
+  do {
+    bar(i);
+  } while (++i < 9);
+}
+
+void test2 () {
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "\[424\]:\[0-9\]*: note: loop turned into 
non-loop; it never loops" "cunrolli" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= 7; ++i)
+    bar(i);
+
+  /* { dg-final { scan-tree-dump "4\[79\]:\[0-9\]*: note: loop turned into 
non-loop; it never loops" "cunrolli" } } */
+  #pragma GCC unroll 9
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "52:\[0-9\]*: note: loop unrolled 3 times" 
"loop2_unroll" } } */
+  #pragma GCC unroll 4
+  for (unsigned long i = 1; i <= 15; ++i)
+    bar(i);
+
+  /* { dg-final { scan-tree-dump "5\[79\]:\[0-9\]*: note: loop turned into 
non-loop; it never loops" "cunrolli" } } */
+  #pragma GCC unroll 709
+  for (unsigned long i = 1; i <= 709; ++i)
+    bar(i);
+
+  /* { dg-final { scan-tree-dump "6\[24\]:\[0-9\]*: note: not unrolling loop, 
user didn't want it unrolled completely" "cunrolli" } } */
+  #pragma GCC unroll 0
+  for (unsigned long i = 1; i <= 3; ++i)
+    bar(i);
+}
+
+
+/* { dg-final { cleanup-tree-dump "cunrolli" } } */
+/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
Index: testsuite/c-c++-common/unroll-3.c
===================================================================
--- testsuite/c-c++-common/unroll-3.c   (revision 0)
+++ testsuite/c-c++-common/unroll-3.c   (working copy)
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -funroll-all-loops -fdump-rtl-loop2_unroll 
-fdump-tree-cunrolli-details" } */
+
+void bar(int);
+
+int j;
+
+void test1()
+{
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "16:\[0-9\]*: note: not unrolling loop, user 
didn't want it unrolled completely" "cunrolli" } } */
+  /* { dg-final { scan-rtl-dump "16:\[0-9\]*: note: not unrolling loop, user 
didn't want it unrolled" "loop2_unroll" } } */
+  #pragma GCC unroll 0
+  for (unsigned long i = 1; i <= 3; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "21:\[0-9\]*: note: not unrolling loop, user 
didn't want it unrolled" "loop2_unroll" } } */
+  #pragma GCC unroll 0
+  for (unsigned long i = 1; i <= m; ++i)
+    bar(i);
+}
+
+/* { dg-final { cleanup-tree-dump "cunrolli" } } */
+/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
Index: testsuite/c-c++-common/unroll-4.c
===================================================================
--- testsuite/c-c++-common/unroll-4.c   (revision 0)
+++ testsuite/c-c++-common/unroll-4.c   (working copy)
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdisable-tree-cunroll" } */
+
+void bar(int);
+
+int j;
+
+void test1() {
+  unsigned long m = j;
+  unsigned long i;
+
+  #pragma GCC unroll 20000000000       /* { dg-error "requires an 
assignment-expression that evaluates to a non-negative integral constant less 
than or equal to" } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll i /* { dg-error "requires an assignment-expression that 
evaluates to a non-negative integral constant less than or equal to" } */
+  /* { dg-error "cannot appear in a constant-expression|is not usable in a 
constant expression" "" { target c++ } 16 } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll n /* { dg-error "requires an assignment-expression that 
evaluates to a non-negative integral constant less than or equal to" } */
+  /* { dg-error "declared" "" { target *-*-* } 21 } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll 1+i       /* { dg-error "requires an 
assignment-expression that evaluates to a non-negative integral constant less 
than or equal to" } */
+  /* { dg-error "cannot appear in a constant-expression|is not usable in a 
constant expression" "" { target c++ } 26 } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll  4,4              /* { dg-error "expected end of line 
before" } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll  4.2      /* { dg-error "requires an 
assignment-expression that evaluates to a non-negative integral constant less 
than or equal to" } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+}
Index: tree-cfg.c
===================================================================
--- tree-cfg.c  (revision 220084)
+++ tree-cfg.c  (working copy)
@@ -316,6 +316,10 @@ replace_loop_annotate_in_block (basic_bl
          loop->force_vectorize = true;
          cfun->has_force_vectorize_loops = true;
          break;
+       case annot_expr_unroll_kind:
+         loop->unroll = (unsigned short)tree_to_shwi (gimple_call_arg (stmt,
+                                                                       2));
+         break;
        default:
          gcc_unreachable ();
        }
@@ -365,6 +369,7 @@ replace_loop_annotate (void)
            case annot_expr_ivdep_kind:
            case annot_expr_no_vector_kind:
            case annot_expr_vector_kind:
+           case annot_expr_unroll_kind:
              break;
            default:
              gcc_unreachable ();
@@ -3385,6 +3390,8 @@ verify_gimple_call (gcall *stmt)
   for (i = 0; i < gimple_call_num_args (stmt); ++i)
     {
       tree arg = gimple_call_arg (stmt, i);
+      if (! arg)
+       continue;
       if ((is_gimple_reg_type (TREE_TYPE (arg))
           && !is_gimple_val (arg))
          || (!is_gimple_reg_type (TREE_TYPE (arg))
@@ -7512,6 +7519,8 @@ print_loop (FILE *file, struct loop *loo
       fprintf (file, ", estimate = ");
       print_decu (loop->nb_iterations_estimate, file);
     }
+  if (loop->unroll)
+    fprintf (file, ", unroll = %d", loop->unroll);
   fprintf (file, ")\n");
 
   /* Print loop's body.  */
Index: tree-core.h
===================================================================
--- tree-core.h (revision 220084)
+++ tree-core.h (working copy)
@@ -725,6 +725,7 @@ enum annot_expr_kind {
   annot_expr_ivdep_kind,
   annot_expr_no_vector_kind,
   annot_expr_vector_kind,
+  annot_expr_unroll_kind,
   annot_expr_kind_last
 };
 
Index: tree-pretty-print.c
===================================================================
--- tree-pretty-print.c (revision 220084)
+++ tree-pretty-print.c (working copy)
@@ -2313,6 +2313,10 @@ dump_generic_node (pretty_printer *pp, t
        case annot_expr_vector_kind:
          pp_string (pp, ", vector");
          break;
+       case annot_expr_unroll_kind:
+         pp_printf (pp, ", unroll %d",
+                    (int)TREE_INT_CST_LOW (TREE_OPERAND (node, 2)));
+         break;
        default:
          gcc_unreachable ();
        }
Index: tree-ssa-loop-ivcanon.c
===================================================================
--- tree-ssa-loop-ivcanon.c     (revision 220084)
+++ tree-ssa-loop-ivcanon.c     (working copy)
@@ -686,8 +686,7 @@ try_unroll_loop_completely (struct loop
                            HOST_WIDE_INT maxiter,
                            location_t locus)
 {
-  unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
-  struct loop_size size;
+  unsigned HOST_WIDE_INT n_unroll = 0;
   bool n_unroll_found = false;
   edge edge_to_cancel = NULL;
   int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
@@ -731,7 +730,8 @@ try_unroll_loop_completely (struct loop
   if (!n_unroll_found)
     return false;
 
-  if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
+  if (loop->unroll == 0 &&
+      n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
        fprintf (dump_file, "Not unrolling loop %d "
@@ -753,107 +753,130 @@ try_unroll_loop_completely (struct loop
       if (ul == UL_SINGLE_ITER)
        return false;
 
-      large = tree_estimate_loop_size
-                (loop, exit, edge_to_cancel, &size,
-                 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
-      ninsns = size.overall;
-      if (large)
+      if (loop->unroll)
        {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
-                    loop->num);
-         return false;
+         /* If they wanted to unroll more than we want, don't unroll
+            it completely.  */
+         if (n_unroll > (unsigned)loop->unroll)
+           {
+             dump_printf_loc (report_flags, locus,
+               "not unrolling loop, "
+               "user didn't want it unrolled completely.\n");
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file,
+                 "Not unrolling loop %d: "
+                 "user didn't want it unrolled completely.\n",
+                        loop->num);
+             return false;
+           }
        }
-
-      unr_insns = estimated_unrolled_size (&size, n_unroll);
-      if (dump_file && (dump_flags & TDF_DETAILS))
+      else
        {
-         fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
-         fprintf (dump_file, "  Estimated size after unrolling: %d\n",
-                  (int) unr_insns);
-       }
+         struct loop_size size;
+         large = tree_estimate_loop_size
+                   (loop, exit, edge_to_cancel, &size,
+                    PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
+         unsigned HOST_WIDE_INT ninsns = size.overall;
+         if (large)
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
+                        loop->num);
+             return false;
+           }
 
-      /* If the code is going to shrink, we don't need to be extra cautious
-        on guessing if the unrolling is going to be profitable.  */
-      if (unr_insns
-         /* If there is IV variable that will become constant, we save
-            one instruction in the loop prologue we do not account
-            otherwise.  */
-         <= ninsns + (size.constant_iv != false))
-       ;
-      /* We unroll only inner loops, because we do not consider it profitable
-        otheriwse.  We still can cancel loopback edge of not rolling loop;
-        this is always a good idea.  */
-      else if (ul == UL_NO_GROWTH)
-       {
+         unsigned HOST_WIDE_INT unr_insns
+           = estimated_unrolled_size (&size, n_unroll);
          if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
-                    loop->num);
-         return false;
-       }
-      /* Outer loops tend to be less interesting candidates for complete
-        unrolling unless we can do a lot of propagation into the inner loop
-        body.  For now we disable outer loop unrolling when the code would
-        grow.  */
-      else if (loop->inner)
-       {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: "
-                    "it is not innermost and code would grow.\n",
-                    loop->num);
-         return false;
-       }
-      /* If there is call on a hot path through the loop, then
-        there is most probably not much to optimize.  */
-      else if (size.num_non_pure_calls_on_hot_path)
-       {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: "
-                    "contains call and code would grow.\n",
-                    loop->num);
-         return false;
-       }
-      /* If there is pure/const call in the function, then we
-        can still optimize the unrolled loop body if it contains
-        some other interesting code than the calls and code
-        storing or cumulating the return value.  */
-      else if (size.num_pure_calls_on_hot_path
-              /* One IV increment, one test, one ivtmp store
-                 and one useful stmt.  That is about minimal loop
-                 doing pure call.  */
-              && (size.non_call_stmts_on_hot_path
-                  <= 3 + size.num_pure_calls_on_hot_path))
-       {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: "
-                    "contains just pure calls and code would grow.\n",
-                    loop->num);
-         return false;
-       }
-      /* Complette unrolling is major win when control flow is removed and
-        one big basic block is created.  If the loop contains control flow
-        the optimization may still be a win because of eliminating the loop
-        overhead but it also may blow the branch predictor tables.
-        Limit number of branches on the hot path through the peeled
-        sequence.  */
-      else if (size.num_branches_on_hot_path * (int)n_unroll
-              > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
-       {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: "
-                    " number of branches on hot path in the unrolled sequence"
-                    " reach --param max-peel-branches limit.\n",
-                    loop->num);
-         return false;
-       }
-      else if (unr_insns
-              > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
-       {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           fprintf (dump_file, "Not unrolling loop %d: "
-                    "(--param max-completely-peeled-insns limit reached).\n",
-                    loop->num);
-         return false;
+           {
+             fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
+             fprintf (dump_file, "  Estimated size after unrolling: %d\n",
+                      (int) unr_insns);
+           }
+
+         /* If the code is going to shrink, we don't need to be extra
+            cautious on guessing if the unrolling is going to be
+            profitable.  */
+         if (unr_insns
+             /* If there is IV variable that will become constant, we
+                save one instruction in the loop prologue we do not
+                account otherwise.  */
+             <= ninsns + (size.constant_iv != false))
+           ;
+         /* We unroll only inner loops, because we do not consider it
+            profitable otherwise.  We still can cancel loopback edge
+            of not rolling loop; this is always a good idea.  */
+         else if (ul == UL_NO_GROWTH)
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
+                        loop->num);
+             return false;
+           }
+         /* Outer loops tend to be less interesting candidates for
+            complete unrolling unless we can do a lot of propagation
+            into the inner loop body.  For now we disable outer loop
+            unrolling when the code would grow.  */
+         else if (loop->inner)
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: "
+                        "it is not innermost and code would grow.\n",
+                        loop->num);
+             return false;
+           }
+         /* If there is call on a hot path through the loop, then
+            there is most probably not much to optimize.  */
+         else if (size.num_non_pure_calls_on_hot_path)
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: "
+                        "contains call and code would grow.\n",
+                        loop->num);
+             return false;
+           }
+         /* If there is pure/const call in the function, then we can
+            still optimize the unrolled loop body if it contains some
+            other interesting code than the calls and code storing or
+            cumulating the return value.  */
+         else if (size.num_pure_calls_on_hot_path
+                  /* One IV increment, one test, one ivtmp store and
+                     one useful stmt.  That is about minimal loop
+                     doing pure call.  */
+                  && (size.non_call_stmts_on_hot_path
+                      <= 3 + size.num_pure_calls_on_hot_path))
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: "
+                        "contains just pure calls and code would grow.\n",
+                        loop->num);
+             return false;
+           }
+         /* Complete unrolling is major win when control flow is
+            removed and one big basic block is created.  If the loop
+            contains control flow the optimization may still be a win
+            because of eliminating the loop overhead but it also may
+            blow the branch predictor tables.  Limit number of
+            branches on the hot path through the peeled sequence.  */
+         else if (size.num_branches_on_hot_path * (int)n_unroll
+                  > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: "
+                        " number of branches on hot path in the unrolled 
sequence"
+                        " reach --param max-peel-branches limit.\n",
+                        loop->num);
+             return false;
+           }
+         else if (unr_insns
+                  > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
+           {
+             if (dump_file && (dump_flags & TDF_DETAILS))
+               fprintf (dump_file, "Not unrolling loop %d: "
+                        "(--param max-completely-peeled-insns limit 
reached).\n",
+                        loop->num);
+             return false;
+           }
        }
       dump_printf_loc (report_flags, locus,
                        "loop turned into non-loop; it never loops.\n");
@@ -897,8 +920,9 @@ try_unroll_loop_completely (struct loop
       else
        gimple_cond_make_true (cond);
       update_stmt (cond);
-      /* Do not remove the path. Doing so may remove outer loop
-        and confuse bookkeeping code in tree_unroll_loops_completelly.  */
+      /* Do not remove the path. Doing so may remove outer loop and
+        confuse bookkeeping code in
+        tree_unroll_loops_completelly.  */
     }
 
   /* Store the loop for later unlooping and exit removal.  */
@@ -974,23 +998,33 @@ try_peel_loop (struct loop *loop,
   if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0)
     return false;
 
+  /* We don't peel loops that will be unrolled as this can duplicate a
+     loop more times than the user requested.  */
+  if (loop->unroll)
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
+      return false;
+    }
+
   /* Peel only innermost loops.  */
   if (loop->inner)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: outer loop\n");
+       fprintf (dump_file, "Not peeling: outer loop\n");
       return false;
     }
 
   if (!optimize_loop_for_speed_p (loop))
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: cold loop\n");
+       fprintf (dump_file, "Not peeling: cold loop\n");
       return false;
     }
 
   /* Check if there is an estimate on the number of iterations.  */
   npeel = estimated_loop_iterations_int (loop);
+
   if (npeel < 0)
     {
       if (dump_file)
@@ -998,10 +1032,11 @@ try_peel_loop (struct loop *loop,
                 "estimated\n");
       return false;
     }
+
   if (maxiter >= 0 && maxiter <= npeel)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: upper bound is known so can "
+       fprintf (dump_file, "Not peeling: upper bound is known so can "
                 "unroll completely\n");
       return false;
     }
@@ -1012,7 +1047,7 @@ try_peel_loop (struct loop *loop,
   if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: rolls too much "
+       fprintf (dump_file, "Not peeling: rolls too much "
                 "(%i + 1 > --param max-peel-times)\n", npeel);
       return false;
     }
@@ -1025,7 +1060,7 @@ try_peel_loop (struct loop *loop,
       > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: peeled sequence size is too large "
+       fprintf (dump_file, "Not peeling: peeled sequence size is too large "
                 "(%i insns > --param max-peel-insns)", peeled_size);
       return false;
     }
@@ -1302,7 +1337,9 @@ tree_unroll_loops_completely_1 (bool may
   if (!loop_father)
     return false;
 
-  if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
+  if (loop->unroll > 1)
+    ul = UL_ALL;
+  else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
       /* Unroll outermost loops only if asked to do so or they do
         not cause code growth.  */
       && (unroll_outer || loop_outer (loop_father)))
@@ -1539,7 +1576,9 @@ public:
   {}
 
   /* opt_pass methods: */
-  virtual bool gate (function *) { return optimize >= 2; }
+  virtual bool gate (function *) {
+    return optimize >= 2 || cfun->has_unroll;
+  }
   virtual unsigned int execute (function *);
 
 }; // class pass_complete_unrolli
Index: tree.def
===================================================================
--- tree.def    (revision 220084)
+++ tree.def    (working copy)
@@ -1365,8 +1365,9 @@ DEFTREECODE (TARGET_OPTION_NODE, "target
 
 /* ANNOTATE_EXPR.
    Operand 0 is the expression to be annotated.
-   Operand 1 is the annotation kind.  */
-DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 2)
+   Operand 1 is the annotation kind.
+   Operand 2 is optional data.  */
+DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 3)
 
 /* Cilk spawn statement
    Operand 0 is the CALL_EXPR.  */

Re: #pragma GCC unroll support

Reply via email to