The following fixes a missed optimization in basic-block vectorization.
Currently we require the SLP chain to end up in a sequence of loads
we support.  But of course we can in theory end the SLP chain at
any point and simply construct the vector operand of the uses by
pieces.  This is what the patch does to handle the case where
"external" defs are not really external.  As the patch is somewhat
more generic it also handles more cases and relies on the cost model
to reject the outright non-profitable ones (like the bb-slp-14.c
case which is run with -fno-vect-cost-model though).

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2015-04-28  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/62283
        * tree-vect-slp.c (vect_build_slp_tree): When the SLP build
        fails fatally and we are vectorizing a basic-block simply
        cause the child to be constructed piecewise.
        (vect_analyze_slp_cost_1): Adjust.
        (vect_detect_hybrid_slp_stmts): Likewise.
        (vect_bb_slp_scalar_cost): Likewise.
        (vect_get_constant_vectors): For piecewise constructed
        constants place them after the last def.
        (vect_get_slp_defs): Adjust.
        * tree-vect-stmts.c (vect_is_simple_use): Detect in-BB
        externals for basic-block vectorization.

        * gfortran.dg/vect/pr62283-2.f: New testcase.
        * gcc.dg/vect/bb-slp-14.c: Adjust.

Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c.orig    2015-04-27 14:42:11.396582142 +0200
--- gcc/tree-vect-slp.c 2015-04-27 14:48:25.406851724 +0200
*************** vect_build_slp_tree (loop_vec_info loop_
*** 1017,1022 ****
--- 1017,1045 ----
          continue;
        }
  
+       /* If the SLP build failed fatally and we analyze a basic-block
+          simply treat nodes we fail to build as externally defined
+        (and thus build vectors from the scalar defs).
+        The cost model will reject outright expensive cases.
+        ???  This doesn't treat cases where permutation ultimatively
+        fails (or we don't try permutation below).  Ideally we'd
+        even compute a permutation that will end up with the maximum
+        SLP tree size...  */
+       if (bb_vinfo
+         && !matches[0]
+         /* ???  Rejecting patterns this way doesn't work.  We'd have to
+            do extra work to cancel the pattern so the uses see the
+            scalar version.  */
+         && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
+       {
+         dump_printf_loc (MSG_NOTE, vect_location,
+                          "Building vector operands from scalars\n");
+         oprnd_info->def_stmts = vNULL;
+         vect_free_slp_tree (child);
+         SLP_TREE_CHILDREN (*node).quick_push (NULL);
+         continue;
+       }
+ 
        /* If the SLP build for operand zero failed and operand zero
         and one can be commutated try that for the scalar stmts
         that failed the match.  */
*************** vect_analyze_slp_cost_1 (loop_vec_info l
*** 1417,1425 ****
  
    /* Recurse down the SLP tree.  */
    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
!     vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
!                            instance, child, prologue_cost_vec,
!                            ncopies_for_cost);
  
    /* Look at the first scalar stmt to determine the cost.  */
    stmt = SLP_TREE_SCALAR_STMTS (node)[0];
--- 1440,1449 ----
  
    /* Recurse down the SLP tree.  */
    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
!     if (child)
!       vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
!                              instance, child, prologue_cost_vec,
!                              ncopies_for_cost);
  
    /* Look at the first scalar stmt to determine the cost.  */
    stmt = SLP_TREE_SCALAR_STMTS (node)[0];
*************** vect_detect_hybrid_slp_stmts (slp_tree n
*** 1885,1891 ****
      STMT_SLP_TYPE (stmt_vinfo) = hybrid;
  
    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
!     vect_detect_hybrid_slp_stmts (child, i, stype);
  }
  
  /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses.  */
--- 1909,1916 ----
      STMT_SLP_TYPE (stmt_vinfo) = hybrid;
  
    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
!     if (child)
!       vect_detect_hybrid_slp_stmts (child, i, stype);
  }
  
  /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses.  */
*************** vect_bb_slp_scalar_cost (basic_block bb,
*** 2162,2168 ****
      }
  
    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
!     scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
  
    return scalar_cost;
  }
--- 2187,2194 ----
      }
  
    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
!     if (child)
!       scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
  
    return scalar_cost;
  }
*************** vect_get_constant_vectors (tree op, slp_
*** 2612,2617 ****
--- 2638,2644 ----
  
    number_of_places_left_in_vector = nunits;
    elts = XALLOCAVEC (tree, nunits);
+   bool place_after_defs = false;
    for (j = 0; j < number_of_copies; j++)
      {
        for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
*************** vect_get_constant_vectors (tree op, slp_
*** 2682,2687 ****
--- 2709,2715 ----
  
            /* Create 'vect_ = {op0,op1,...,opn}'.  */
            number_of_places_left_in_vector--;
+         tree orig_op = op;
          if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
            {
              if (CONSTANT_CLASS_P (op))
*************** vect_get_constant_vectors (tree op, slp_
*** 2704,2709 ****
--- 2732,2743 ----
          elts[number_of_places_left_in_vector] = op;
          if (!CONSTANT_CLASS_P (op))
            constant_p = false;
+         if (TREE_CODE (orig_op) == SSA_NAME
+             && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
+             && STMT_VINFO_BB_VINFO (stmt_vinfo)
+             && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
+                 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
+           place_after_defs = true;
  
            if (number_of_places_left_in_vector == 0)
              {
*************** vect_get_constant_vectors (tree op, slp_
*** 2720,2735 ****
                    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
                  vec_cst = build_constructor (vector_type, v);
                }
!               voprnds.quick_push (vect_init_vector (stmt, vec_cst,
!                                                   vector_type, NULL));
              if (ctor_seq != NULL)
                {
!                 gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
!                 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
                  gsi_insert_seq_before_without_update (&gsi, ctor_seq,
                                                        GSI_SAME_STMT);
                  ctor_seq = NULL;
                }
              }
          }
      }
--- 2754,2778 ----
                    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
                  vec_cst = build_constructor (vector_type, v);
                }
!             tree init;
!             gimple_stmt_iterator gsi;
!             if (place_after_defs)
!               {
!                 gsi = gsi_for_stmt
!                         (vect_find_last_scalar_stmt_in_slp (slp_node));
!                 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
!               }
!             else
!               init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
              if (ctor_seq != NULL)
                {
!                 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
                  gsi_insert_seq_before_without_update (&gsi, ctor_seq,
                                                        GSI_SAME_STMT);
                  ctor_seq = NULL;
                }
+             voprnds.quick_push (init);
+             place_after_defs = false;
              }
          }
      }
*************** vect_get_slp_defs (vec<tree> ops, slp_tr
*** 2825,2844 ****
            child = SLP_TREE_CHILDREN (slp_node)[child_index];
  
          /* We have to check both pattern and original def, if available.  */
!         gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
!         gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
! 
!         if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
!             || (related
!                 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
            {
!             /* The number of vector defs is determined by the number of
!                vector statements in the node from which we get those
!                statements.  */
!             number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
!             vectorized_defs = true;
!             child_index++;
            }
          }
  
        if (!vectorized_defs)
--- 2868,2893 ----
            child = SLP_TREE_CHILDREN (slp_node)[child_index];
  
          /* We have to check both pattern and original def, if available.  */
!         if (child)
            {
!             gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
!             gimple related
!               = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
! 
!             if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
!                 || (related
!                     && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
!               {
!                 /* The number of vector defs is determined by the number of
!                    vector statements in the node from which we get those
!                    statements.  */
!                 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
!                 vectorized_defs = true;
!                 child_index++;
!               }
            }
+         else
+           child_index++;
          }
  
        if (!vectorized_defs)
Index: gcc/testsuite/gfortran.dg/vect/pr62283-2.f
===================================================================
*** /dev/null   1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gfortran.dg/vect/pr62283-2.f  2015-04-27 14:48:25.454852144 
+0200
***************
*** 0 ****
--- 1,13 ----
+ ! { dg-do compile }
+ ! { dg-require-effective-target vect_float }
+ ! { dg-additional-options "-fdump-tree-slp2-details" }
+       subroutine saxpy(alpha,x,y)
+       real x(4),y(4),alpha
+       y(1)=y(1)+alpha*x(1)
+       y(2)=y(2)+alpha*x(2)
+       y(3)=y(3)+alpha*x(3)
+       y(4)=y(4)+alpha*x(4)
+       end
+ ! { dg-final { scan-tree-dump "basic block vectorized" "slp2" } }
+ ! { dg-final { cleanup-tree-dump "slp2" } }
+ ! { dg-final { cleanup-tree-dump "vect" } }
Index: gcc/testsuite/gcc.dg/vect/bb-slp-14.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/bb-slp-14.c.orig  2014-06-23 18:48:59.686799283 
+0200
--- gcc/testsuite/gcc.dg/vect/bb-slp-14.c       2015-04-27 15:25:48.277119878 
+0200
*************** main1 (unsigned int x, unsigned int y)
*** 14,20 ****
    int i;
    unsigned int a0, a1, a2, a3;
  
!   /* Not consecutive load with permutation - not supported.  */
    a0 = in[0] + 23;
    a1 = in[1] + 142;
    a2 = in[1] + 2;
--- 14,21 ----
    int i;
    unsigned int a0, a1, a2, a3;
  
!   /* Not consecutive load with permutation - supported with building up
!      the vector from scalars.  */
    a0 = in[0] + 23;
    a1 = in[1] + 142;
    a2 = in[1] + 2;
*************** int main (void)
*** 47,52 ****
    return 0;
  }
  
! /* { dg-final { scan-tree-dump-times "basic block vectorized" 0 "slp2"  } } */
  /* { dg-final { cleanup-tree-dump "slp2" } } */
    
--- 48,53 ----
    return 0;
  }
  
! /* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2"  } } */
  /* { dg-final { cleanup-tree-dump "slp2" } } */
    
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c.orig  2015-04-23 10:44:27.960869519 +0200
--- gcc/tree-vect-stmts.c       2015-04-27 15:21:36.008809210 +0200
*************** vect_is_simple_use (tree operand, gimple
*** 7752,7758 ****
    else
      {
        stmt_vinfo = vinfo_for_stmt (*def_stmt);
!       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
      }
  
    if (dump_enabled_p ())
--- 7752,7761 ----
    else
      {
        stmt_vinfo = vinfo_for_stmt (*def_stmt);
!       if (!loop && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
!       *dt = vect_external_def;
!       else
!       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
      }
  
    if (dump_enabled_p ())

Reply via email to