[PATCH][3/n] Reduction vectorization improvements

2015-05-26 Thread Richard Biener

This refactors code to arrange that for loop SLP we pass in the SLP
node and instance to the vectorizable_* functions also at analysis
phase (not only at transform phase).

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2015-05-26  Richard Biener  rguent...@suse.de

* tree-vect-loop.c (vect_update_vf_for_slp): Split out from ...
(vect_analyze_loop_operations): ... here.  Remove slp parameter,
detect whether we apply SLP.  Remove call to
vect_update_slp_costs_according_to_vf.
(vect_analyze_loop_2): Call vect_update_vf_for_slp and
vect_update_slp_costs_according_to_vf from here.  Dispatch
to vect_slp_analyze_operations to analyze SLP stmts.
* tree-vect-slp.c (vect_slp_analyze_node_operations): Drop
unused bb_vec_info parameter, adjust assert.
(vect_slp_analyze_operations): Pass in the slp instance tree
instead of bb_vec_info.
(vect_slp_analyze_bb_1): Adjust call to vect_slp_analyze_operations.
* tree-vectorizer.h (vect_slp_analyze_operations): Declare.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c(revision 223574)
+++ gcc/tree-vect-loop.c(working copy)
@@ -1355,25 +1355,85 @@ vect_analyze_loop_form (struct loop *loo
   return loop_vinfo;
 }
 
+/* Scan the loop stmts and dependent on whether there are any (non-)SLP
+   statements update the vectorization factor.  */
+
+static void
+vect_update_vf_for_slp (loop_vec_info loop_vinfo)
+{
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+  int nbbs = loop-num_nodes;
+  unsigned int vectorization_factor;
+  int i;
+
+  if (dump_enabled_p ())
+dump_printf_loc (MSG_NOTE, vect_location,
+=== vect_update_vf_for_slp ===\n);
+
+  vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  gcc_assert (vectorization_factor != 0);
+
+  /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+ vectorization factor of the loop is the unrolling factor required by
+ the SLP instances.  If that unrolling factor is 1, we say, that we
+ perform pure SLP on loop - cross iteration parallelism is not
+ exploited.  */
+  bool only_slp_in_loop = true;
+  for (i = 0; i  nbbs; i++)
+{
+  basic_block bb = bbs[i];
+  for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
+  gsi_next (si))
+   {
+ gimple stmt = gsi_stmt (si);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+  STMT_VINFO_RELATED_STMT (stmt_info))
+   {
+ stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+ stmt_info = vinfo_for_stmt (stmt);
+   }
+ if ((STMT_VINFO_RELEVANT_P (stmt_info)
+  || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+  !PURE_SLP_STMT (stmt_info))
+   /* STMT needs both SLP and loop-based vectorization.  */
+   only_slp_in_loop = false;
+   }
+}
+
+  if (only_slp_in_loop)
+vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+  else
+vectorization_factor
+  = least_common_multiple (vectorization_factor,
+  LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
+
+  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+  if (dump_enabled_p ())
+dump_printf_loc (MSG_NOTE, vect_location,
+Updating vectorization factor to %d\n,
+vectorization_factor);
+}
 
 /* Function vect_analyze_loop_operations.
 
Scan the loop stmts and make sure they are all vectorizable.  */
 
 static bool
-vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
+vect_analyze_loop_operations (loop_vec_info loop_vinfo)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
   int nbbs = loop-num_nodes;
-  unsigned int vectorization_factor = 0;
+  unsigned int vectorization_factor;
   int i;
   stmt_vec_info stmt_info;
   bool need_to_vectorize = false;
   int min_profitable_iters;
   int min_scalar_loop_bound;
   unsigned int th;
-  bool only_slp_in_loop = true, ok;
+  bool ok;
   HOST_WIDE_INT max_niter;
   HOST_WIDE_INT estimated_niter;
   int min_profitable_estimate;
@@ -1382,50 +1442,6 @@ vect_analyze_loop_operations (loop_vec_i
 dump_printf_loc (MSG_NOTE, vect_location,
 === vect_analyze_loop_operations ===\n);
 
-  gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
-  vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
-  if (slp)
-{
-  /* If all the stmts in the loop can be SLPed, we perform only SLP, and
-vectorization factor of the loop is the unrolling factor required by
-the SLP instances.  If that unrolling factor is 1, we say, that we
-perform 

[PATCH][3/n] Reduction vectorization improvements

2015-05-22 Thread Richard Biener

This does some more cleanup and refactoring with two fixes, the
pure slp compute in vect_analyze_loop_operations was failing
to look at pattern stmts and the vect_is_slp_reduction hunk makes
reduction detection fail because the pattern state changes in
between reduction detection and vectoriztaion (which re-calls
the early code).

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2015-05-22  Richard Biener  rguent...@suse.de

* tree-vectorizer.h (struct _slp_oprnd_info): Add second_pattern
member.
* tree-vect-loop.c (vect_analyze_loop_operations): Look at
patterns when determining whether SLP is pure.
(vect_is_slp_reduction): Remove check for pattern stmts.
(vect_is_simple_reduction_1): Remove dead code.
* tree-vect-slp.c (vect_create_oprnd_info): Initialize second_pattern.
(vect_get_and_check_slp_defs): Pass in the stmt number.
Allow the first def in a reduction to be not a pattern stmt when
the rest of the stmts def are patterns.
(vect_build_slp_tree_1): Allow tcc_expression codes like
SAD_EXPR and DOT_PROD_EXPR.
(vect_build_slp_tree): Adjust.
(vect_analyze_slp): Refactor and move BB vect error message ...
(vect_slp_analyze_bb_1): ... here.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c(revision 223529)
+++ gcc/tree-vect-loop.c(working copy)
@@ -1399,7 +1399,12 @@ vect_analyze_loop_operations (loop_vec_i
{
  gimple stmt = gsi_stmt (si);
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- gcc_assert (stmt_info);
+ if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+  STMT_VINFO_RELATED_STMT (stmt_info))
+   {
+ stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+ stmt_info = vinfo_for_stmt (stmt);
+   }
  if ((STMT_VINFO_RELEVANT_P (stmt_info)
   || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
   !PURE_SLP_STMT (stmt_info))
@@ -2031,12 +2036,8 @@ vect_is_slp_reduction (loop_vec_info loo
 
   if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
 {
-  if (vinfo_for_stmt (use_stmt)
-   !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
-{
-  loop_use_stmt = use_stmt;
-  nloop_uses++;
-}
+ loop_use_stmt = use_stmt;
+ nloop_uses++;
 }
else
  n_out_of_loop_uses++;
@@ -2265,9 +2266,7 @@ vect_is_simple_reduction_1 (loop_vec_inf
   return NULL;
 }
 
-  if (vinfo_for_stmt (use_stmt)
-  !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
-nloop_uses++;
+  nloop_uses++;
   if (nloop_uses  1)
 {
   if (dump_enabled_p ())
@@ -2325,9 +2324,7 @@ vect_is_simple_reduction_1 (loop_vec_inf
   gimple use_stmt = USE_STMT (use_p);
   if (is_gimple_debug (use_stmt))
continue;
-  if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
-  vinfo_for_stmt (use_stmt)
-  !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
+  if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
nloop_uses++;
   if (nloop_uses  1)
{
Index: gcc/tree-vect-slp.c
===
--- gcc/tree-vect-slp.c (revision 223529)
+++ gcc/tree-vect-slp.c (working copy)
@@ -183,6 +183,7 @@ vect_create_oprnd_info (int nops, int gr
   oprnd_info-first_dt = vect_uninitialized_def;
   oprnd_info-first_op_type = NULL_TREE;
   oprnd_info-first_pattern = false;
+  oprnd_info-second_pattern = false;
   oprnds_info.quick_push (oprnd_info);
 }
 
@@ -242,7 +243,7 @@ vect_get_place_in_interleaving_chain (gi
 
 static int 
 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
- gimple stmt, bool first,
+ gimple stmt, unsigned stmt_num,
  vecslp_oprnd_info *oprnds_info)
 {
   tree oprnd;
@@ -256,6 +257,8 @@ vect_get_and_check_slp_defs (loop_vec_in
   int first_op_idx = 1;
   bool commutative = false;
   bool first_op_cond = false;
+  bool first = stmt_num == 0;
+  bool second = stmt_num == 1;
 
   if (loop_vinfo)
 loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -326,7 +329,11 @@ again:
   !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
 {
   pattern = true;
-  if (!first  !oprnd_info-first_pattern)
+  if (!first  !oprnd_info-first_pattern
+ /* Allow different pattern state for the defs of the
+first stmt in reduction chains.  */
+  (oprnd_info-first_dt != vect_reduction_def
+ || (!second  !oprnd_info-second_pattern)))