I have two patches who also record the maximum number of loop iterations
for the epilogue loop the vectorizer creates.  The first one reverts
an old patch that made us re-use that epilogue loop also for the
unvectorized case (when versioning for alignment or aliasing).  Thus
it creates one more loop copy (though it then properly separates the
epilogue with very few iterations from the unvectorized loops with
the original number of iterations).  The second patch simply only
records the maximum number of iterations when we do not re-use
that loop body for the unvectorized version.

Numbers on the single testcase I tried this on shows the second version
has slightly better code-size behavior.  I was testing this on
the single-file leslie3d SPEC CPU 2006 testcase whose file-size regressed
the most when enabling array-prefetching by default on certain AMD models.
The numbers are as follows, base flags -Ofast -funroll-loops -fpeel-loops 
-march=barcelona.

                          unpatched  patch #1  patch #2
                          564423     391623    386151
-fno-prefetch-loop-arrays 366247     308711    303783
-fwhole-program           471481     326944    322925

the difference is barely noticable but the testcase is surely special.

Both patches were bootstrapped and tested on x86_64-unknown-linux-gnu
(the first one needs some assembler scan adjustments in the x86 testsuite
still).

I'm leaning towards using the first patch, as only that will enable
us to more easily dis-entangle the various loops generated by the
vectorizer and eventually optimize them more (like unroll the
epilogue, re-order and re-structure the cost model, alignment and
alias checks, etc.).

But for wider testing coverage (and also to get some runtime numbers,
which should show just noise for both patches ...) I'm going to
apply patch two first and later revert to patch one.

Thanks,
Richard.

2012-04-24  Richard Guenther  <rguent...@suse.de>

        * tree-vectorizer.h (vect_loop_versioning): Adjust prototype.
        * tree-vect-loop.c (vect_transform_loop): Adjust.
        * tree-vect-loop-manip.c (vect_do_peeling_for_loop_bound): Record
        the maximum number of iterations for the epilogue loop.
        (vect_loop_versioning): Remove case re-using the peeled
        epilogue loop.

Index: gcc/tree-vectorizer.h
===================================================================
*** gcc/tree-vectorizer.h       (revision 186709)
--- gcc/tree-vectorizer.h       (working copy)
*************** extern LOC vect_loop_location;
*** 807,813 ****
     in tree-vect-loop-manip.c.  */
  extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
  extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
! extern void vect_loop_versioning (loop_vec_info, bool, tree *, gimple_seq *);
  extern void vect_do_peeling_for_loop_bound (loop_vec_info, tree *,
                                              tree, gimple_seq);
  extern void vect_do_peeling_for_alignment (loop_vec_info);
--- 807,813 ----
     in tree-vect-loop-manip.c.  */
  extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
  extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
! extern void vect_loop_versioning (loop_vec_info);
  extern void vect_do_peeling_for_loop_bound (loop_vec_info, tree *,
                                              tree, gimple_seq);
  extern void vect_do_peeling_for_alignment (loop_vec_info);
Index: gcc/tree-vect-loop.c
===================================================================
*** gcc/tree-vect-loop.c        (revision 186709)
--- gcc/tree-vect-loop.c        (working copy)
*************** vect_transform_loop (loop_vec_info loop_
*** 5229,5235 ****
    unsigned int nunits;
    tree cond_expr = NULL_TREE;
    gimple_seq cond_expr_stmt_list = NULL;
-   bool do_peeling_for_loop_bound;
    gimple stmt, pattern_stmt;
    gimple_seq pattern_def_seq = NULL;
    gimple_stmt_iterator pattern_def_si = gsi_start (NULL);
--- 5229,5234 ----
*************** vect_transform_loop (loop_vec_info loop_
*** 5244,5260 ****
    if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
      vect_do_peeling_for_alignment (loop_vinfo);
  
-   do_peeling_for_loop_bound
-     = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-        || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-          && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
-        || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
- 
    if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
        || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
!     vect_loop_versioning (loop_vinfo,
!                         !do_peeling_for_loop_bound,
!                         &cond_expr, &cond_expr_stmt_list);
  
    /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
       compile time constant), or it is a constant that doesn't divide by the
--- 5243,5251 ----
    if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
      vect_do_peeling_for_alignment (loop_vinfo);
  
    if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
        || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
!     vect_loop_versioning (loop_vinfo);
  
    /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
       compile time constant), or it is a constant that doesn't divide by the
*************** vect_transform_loop (loop_vec_info loop_
*** 5264,5270 ****
       will remain scalar and will compute the remaining (n%VF) iterations.
       (VF is the vectorization factor).  */
  
!   if (do_peeling_for_loop_bound)
      vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
                                    cond_expr, cond_expr_stmt_list);
    else
--- 5255,5264 ----
       will remain scalar and will compute the remaining (n%VF) iterations.
       (VF is the vectorization factor).  */
  
!   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
!        || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
!          && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
!        || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
      vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
                                    cond_expr, cond_expr_stmt_list);
    else
Index: gcc/tree-vect-loop-manip.c
===================================================================
*** gcc/tree-vect-loop-manip.c  (revision 186710)
--- gcc/tree-vect-loop-manip.c  (working copy)
*************** vect_do_peeling_for_loop_bound (loop_vec
*** 1907,1912 ****
--- 1907,1913 ----
    bool check_profitability = false;
    unsigned int th = 0;
    int min_profitable_iters;
+   int max_iter;
  
    if (vect_print_dump_info (REPORT_DETAILS))
      fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
*************** vect_do_peeling_for_loop_bound (loop_vec
*** 1966,1971 ****
--- 1967,1978 ----
       by ratio_mult_vf_name steps.  */
    vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
  
+   max_iter = MAX (LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1, (int) th);
+   record_niter_bound (new_loop, shwi_to_double_int (max_iter), false, true);
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     fprintf (dump_file, "Setting upper bound of nb iterations for epilogue "
+            "loop to %d\n", max_iter);
+ 
    /* After peeling we have to reset scalar evolution analyzer.  */
    scev_reset ();
  
*************** vect_create_cond_for_alias_checks (loop_
*** 2537,2548 ****
     cost model initially.
  
     The versioning precondition(s) are placed in *COND_EXPR and
!    *COND_EXPR_STMT_LIST.  If DO_VERSIONING is true versioning is
!    also performed, otherwise only the conditions are generated.  */
  
  void
! vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning,
!                     tree *cond_expr, gimple_seq *cond_expr_stmt_list)
  {
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    basic_block condition_bb;
--- 2544,2553 ----
     cost model initially.
  
     The versioning precondition(s) are placed in *COND_EXPR and
!    *COND_EXPR_STMT_LIST.  */
  
  void
! vect_loop_versioning (loop_vec_info loop_vinfo)
  {
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    basic_block condition_bb;
*************** vect_loop_versioning (loop_vec_info loop
*** 2551,2556 ****
--- 2556,2563 ----
    basic_block new_exit_bb;
    edge new_exit_e, e;
    gimple orig_phi, new_phi;
+   tree cond_expr;
+   gimple_seq cond_expr_stmt_list = NULL;
    tree arg;
    unsigned prob = 4 * REG_BR_PROB_BASE / 5;
    gimple_seq gimplify_stmt_list = NULL;
*************** vect_loop_versioning (loop_vec_info loop
*** 2564,2593 ****
    th = conservative_cost_threshold (loop_vinfo,
                                    min_profitable_iters);
  
!   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
!                           build_int_cst (TREE_TYPE (scalar_loop_iters), th));
!   *cond_expr = force_gimple_operand_1 (*cond_expr, cond_expr_stmt_list,
!                                      is_gimple_condexpr, NULL_TREE);
  
    if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
!     vect_create_cond_for_align_checks (loop_vinfo, cond_expr,
!                                      cond_expr_stmt_list);
  
    if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
!     vect_create_cond_for_alias_checks (loop_vinfo, cond_expr,
!                                      cond_expr_stmt_list);
  
!   *cond_expr = force_gimple_operand_1 (*cond_expr, &gimplify_stmt_list,
!                                      is_gimple_condexpr, NULL_TREE);
!   gimple_seq_add_seq (cond_expr_stmt_list, gimplify_stmt_list);
! 
!   /* If we only needed the extra conditions and a new loop copy
!      bail out here.  */
!   if (!do_versioning)
!     return;
  
    initialize_original_copy_tables ();
!   loop_version (loop, *cond_expr, &condition_bb,
                prob, prob, REG_BR_PROB_BASE - prob, true);
    free_original_copy_tables();
  
--- 2571,2595 ----
    th = conservative_cost_threshold (loop_vinfo,
                                    min_profitable_iters);
  
!   cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
!                          build_int_cst (TREE_TYPE (scalar_loop_iters), th));
!   cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
!                                     is_gimple_condexpr, NULL_TREE);
  
    if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
!     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
!                                      &cond_expr_stmt_list);
  
    if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
!     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
!                                      &cond_expr_stmt_list);
  
!   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
!                                     is_gimple_condexpr, NULL_TREE);
!   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
  
    initialize_original_copy_tables ();
!   loop_version (loop, cond_expr, &condition_bb,
                prob, prob, REG_BR_PROB_BASE - prob, true);
    free_original_copy_tables();
  
*************** vect_loop_versioning (loop_vec_info loop
*** 2619,2631 ****
    /* End loop-exit-fixes after versioning.  */
  
    update_ssa (TODO_update_ssa);
!   if (*cond_expr_stmt_list)
      {
        cond_exp_gsi = gsi_last_bb (condition_bb);
!       gsi_insert_seq_before (&cond_exp_gsi, *cond_expr_stmt_list,
                             GSI_SAME_STMT);
-       *cond_expr_stmt_list = NULL;
      }
-   *cond_expr = NULL_TREE;
  }
  
--- 2621,2631 ----
    /* End loop-exit-fixes after versioning.  */
  
    update_ssa (TODO_update_ssa);
!   if (cond_expr_stmt_list)
      {
        cond_exp_gsi = gsi_last_bb (condition_bb);
!       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
                             GSI_SAME_STMT);
      }
  }
  

2012-04-24  Richard Guenther  <rguent...@suse.de>

        * tree-vect-loop-manip.c (vect_do_peeling_for_loop_bound): If
        the epilogue loop is not re-used as unvectorized version
        record the its maximum number of iterations.

Index: gcc/tree-vect-loop-manip.c
===================================================================
*** gcc/tree-vect-loop-manip.c  (revision 186757)
--- gcc/tree-vect-loop-manip.c  (working copy)
*************** vect_do_peeling_for_loop_bound (loop_vec
*** 1966,1971 ****
--- 1966,1981 ----
       by ratio_mult_vf_name steps.  */
    vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
  
+   if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
+       && !LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
+     {
+       int max_iter = MAX (LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1, (int) th);
+       record_niter_bound (new_loop, shwi_to_double_int (max_iter), false, 
true);
+       if (dump_file && (dump_flags & TDF_DETAILS))
+       fprintf (dump_file, "Setting upper bound of nb iterations for epilogue "
+                "loop to %d\n", max_iter);
+     }
+ 
    /* After peeling we have to reset scalar evolution analyzer.  */
    scev_reset ();
  

Reply via email to