This re-does the fix for PR53185 in a less intrusive way, allowing
both strided load vectorization and peeling for alignment.  Testing
on my local machine tells me that this fix results in a 3% speedup
of rnflow.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2013-02-05  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/53342
        PR tree-optimization/53185
        * tree-vectorizer.h (vect_check_strided_load): Remove.
        * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do
        not disallow peeling for vectorized strided loads.
        (vect_check_strided_load): Make static and simplify.
        (vect_analyze_data_refs): Adjust.
        * tree-vect-stmts.c (vectorizable_load): Handle peeled loops
        correctly when vectorizing strided loads.

        * gcc.dg/vect/pr53185-2.c: New testcase.

Index: gcc/tree-vectorizer.h
===================================================================
*** gcc/tree-vectorizer.h       (revision 195751)
--- gcc/tree-vectorizer.h       (working copy)
*************** extern bool vect_analyze_data_ref_access
*** 923,929 ****
  extern bool vect_prune_runtime_alias_test_list (loop_vec_info);
  extern tree vect_check_gather (gimple, loop_vec_info, tree *, tree *,
                               int *);
- extern bool vect_check_strided_load (gimple, loop_vec_info, tree *, tree *);
  extern bool vect_analyze_data_refs (loop_vec_info, bb_vec_info, int *);
  extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree,
                                      tree *, gimple_stmt_iterator *,
--- 923,928 ----
Index: gcc/tree-vect-data-refs.c
===================================================================
*** gcc/tree-vect-data-refs.c   (revision 195751)
--- gcc/tree-vect-data-refs.c   (working copy)
*************** vect_enhance_data_refs_alignment (loop_v
*** 1615,1632 ****
            && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
          continue;
  
-       /* FORNOW: Any strided load prevents peeling.  The induction
-          variable analysis will fail when the prologue loop is generated,
-        and so we can't generate the new base for the pointer.  */
-       if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
-       {
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                              "strided load prevents peeling");
-         do_peeling = false;
-         break;
-       }
- 
        /* For invariant accesses there is nothing to enhance.  */
        if (integer_zerop (DR_STEP (dr)))
        continue;
--- 1615,1620 ----
*************** vect_check_gather (gimple stmt, loop_vec
*** 2890,2898 ****
     This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant
     base pointer) only.  */
  
! bool
! vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
!                        tree *stepp)
  {
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
--- 2878,2885 ----
     This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant
     base pointer) only.  */
  
! static bool
! vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo)
  {
    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
*************** vect_check_strided_load (gimple stmt, lo
*** 2925,2934 ****
        || !simple_iv (loop, loop_containing_stmt (stmt), off, &iv, true))
      return false;
  
-   if (basep)
-     *basep = iv.base;
-   if (stepp)
-     *stepp = iv.step;
    return true;
  }
  
--- 2912,2917 ----
*************** vect_analyze_data_refs (loop_vec_info lo
*** 3473,3480 ****
        {
          bool strided_load = false;
          if (!nested_in_vect_loop_p (loop, stmt))
!           strided_load
!             = vect_check_strided_load (stmt, loop_vinfo, NULL, NULL);
          if (!strided_load)
            {
              if (dump_enabled_p ())
--- 3456,3462 ----
        {
          bool strided_load = false;
          if (!nested_in_vect_loop_p (loop, stmt))
!           strided_load = vect_check_strided_load (stmt, loop_vinfo);
          if (!strided_load)
            {
              if (dump_enabled_p ())
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c       (revision 195751)
--- gcc/tree-vect-stmts.c       (working copy)
*************** vectorizable_load (gimple stmt, gimple_s
*** 4353,4359 ****
    tree aggr_type;
    tree gather_base = NULL_TREE, gather_off = NULL_TREE;
    tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
-   tree stride_base, stride_step;
    int gather_scale = 1;
    enum vect_def_type gather_dt = vect_unknown_def_type;
  
--- 4353,4358 ----
*************** vectorizable_load (gimple stmt, gimple_s
*** 4462,4472 ****
        }
      }
    else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
!     {
!       if (!vect_check_strided_load (stmt, loop_vinfo,
!                                   &stride_base, &stride_step))
!       return false;
!     }
    else
      {
        negative = tree_int_cst_compare (nested_in_vect_loop
--- 4461,4467 ----
        }
      }
    else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
!     ;
    else
      {
        negative = tree_int_cst_compare (nested_in_vect_loop
*************** vectorizable_load (gimple stmt, gimple_s
*** 4674,4686 ****
        bool insert_after;
        gimple incr;
        tree offvar;
-       tree ref = DR_REF (dr);
        tree ivstep;
        tree running_off;
        vec<constructor_elt, va_gc> *v = NULL;
        gimple_seq stmts = NULL;
  
!       gcc_assert (stride_base && stride_step);
  
        /* For a load with loop-invariant (but other than power-of-2)
           stride (i.e. not a grouped access) like so:
--- 4669,4689 ----
        bool insert_after;
        gimple incr;
        tree offvar;
        tree ivstep;
        tree running_off;
        vec<constructor_elt, va_gc> *v = NULL;
        gimple_seq stmts = NULL;
+       tree stride_base, stride_step, alias_off;
+ 
+       gcc_assert (!nested_in_vect_loop);
  
!       stride_base
!       = fold_build_pointer_plus
!           (unshare_expr (DR_BASE_ADDRESS (dr)),
!            size_binop (PLUS_EXPR,
!                        convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
!                        convert_to_ptrofftype (DR_INIT(dr))));
!       stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
  
        /* For a load with loop-invariant (but other than power-of-2)
           stride (i.e. not a grouped access) like so:
*************** vectorizable_load (gimple stmt, gimple_s
*** 4716,4721 ****
--- 4719,4725 ----
  
        prev_stmt_info = NULL;
        running_off = offvar;
+       alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
        for (j = 0; j < ncopies; j++)
        {
          tree vec_inv;
*************** vectorizable_load (gimple stmt, gimple_s
*** 4725,4757 ****
            {
              tree newref, newoff;
              gimple incr;
!             if (TREE_CODE (ref) == ARRAY_REF)
!               {
!                 newref = build4 (ARRAY_REF, TREE_TYPE (ref),
!                                  unshare_expr (TREE_OPERAND (ref, 0)),
!                                  running_off,
!                                  NULL_TREE, NULL_TREE);
!                 if (!useless_type_conversion_p (TREE_TYPE (vectype),
!                                                 TREE_TYPE (newref)))
!                   newref = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype),
!                                    newref);
!               }
!             else
!               newref = build2 (MEM_REF, TREE_TYPE (vectype),
!                                running_off,
!                                TREE_OPERAND (ref, 1));
  
              newref = force_gimple_operand_gsi (gsi, newref, true,
                                                 NULL_TREE, true,
                                                 GSI_SAME_STMT);
              CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
              newoff = copy_ssa_name (running_off, NULL);
!             if (POINTER_TYPE_P (TREE_TYPE (newoff)))
!               incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
!                                                    running_off, stride_step);
!             else
!               incr = gimple_build_assign_with_ops (PLUS_EXPR, newoff,
!                                                    running_off, stride_step);
              vect_finish_stmt_generation (stmt, incr, gsi);
  
              running_off = newoff;
--- 4729,4744 ----
            {
              tree newref, newoff;
              gimple incr;
!             newref = build2 (MEM_REF, TREE_TYPE (vectype),
!                              running_off, alias_off);
  
              newref = force_gimple_operand_gsi (gsi, newref, true,
                                                 NULL_TREE, true,
                                                 GSI_SAME_STMT);
              CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
              newoff = copy_ssa_name (running_off, NULL);
!             incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
!                                                  running_off, stride_step);
              vect_finish_stmt_generation (stmt, incr, gsi);
  
              running_off = newoff;
Index: gcc/testsuite/gcc.dg/vect/pr53185-2.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/pr53185-2.c       (revision 0)
--- gcc/testsuite/gcc.dg/vect/pr53185-2.c       (working copy)
***************
*** 0 ****
--- 1,27 ----
+ void __attribute__((noinline,noclone))
+ fn1 (int * __restrict f, int * __restrict d, unsigned short a, int c)
+ {
+   unsigned short e;
+   for (e = 0; e < a; ++e)
+     f[e] = d[e * c];
+ }
+ 
+ extern void abort (void);
+ 
+ int main ()
+ {
+   int a[32], b[3 * 32];
+   int i, off;
+   for (i = 0; i < 3 * 32; ++i)
+     b[i] = i;
+   for (off = 0; off < 8; ++off)
+     {
+       fn1 (&a[off], &b[off], 32 - off, 3);
+       for (i = 0; i < 32 - off; ++i)
+       if (a[off+i] != b[off+i*3])
+         abort ();
+     }
+   return 0;
+ }
+ 
+ /* { dg-final { cleanup-tree-dump "vect" } } */

Reply via email to