Re-indent elided loop bodies

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Will squash, repost and push if all OK.

Richard.

        * tree-vect-stmts.cc (vectorizable_load):
---
 gcc/tree-vect-stmts.cc | 1686 ++++++++++++++++++++--------------------
 1 file changed, 831 insertions(+), 855 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 717d4694b88..db1b539b6c7 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -11007,345 +11007,327 @@ vectorizable_load (vec_info *vinfo,
       gcc_assert (!grouped_load && !slp_perm);
 
       unsigned int inside_cost = 0, prologue_cost = 0;
+
+      /* 1. Create the vector or array pointer update chain.  */
+      if (!costing_p)
        {
-         /* 1. Create the vector or array pointer update chain.  */
+         if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+           vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
+                                        slp_node, &gs_info, &dataref_ptr,
+                                        &vec_offsets);
+         else
+           dataref_ptr
+             = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
+                                         at_loop, offset, &dummy, gsi,
+                                         &ptr_incr, false, bump);
+       }
+
+      gimple *new_stmt = NULL;
+      for (i = 0; i < vec_num; i++)
+       {
+         tree final_mask = NULL_TREE;
+         tree final_len = NULL_TREE;
+         tree bias = NULL_TREE;
          if (!costing_p)
            {
-             if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-               vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
-                                            slp_node, &gs_info, &dataref_ptr,
-                                            &vec_offsets);
-             else
-               dataref_ptr
-                 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
-                                             at_loop, offset, &dummy, gsi,
-                                             &ptr_incr, false, bump);
+             if (mask)
+               vec_mask = vec_masks[i];
+             if (loop_masks)
+               final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
+                                                vec_num, vectype, i);
+             if (vec_mask)
+               final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
+                                              final_mask, vec_mask, gsi);
+
+             if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+               dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
+                                              gsi, stmt_info, bump);
            }
 
-         gimple *new_stmt = NULL;
-         for (i = 0; i < vec_num; i++)
+         /* 2. Create the vector-load in the loop.  */
+         unsigned HOST_WIDE_INT align;
+         if (gs_info.ifn != IFN_LAST)
            {
-             tree final_mask = NULL_TREE;
-             tree final_len = NULL_TREE;
-             tree bias = NULL_TREE;
-             if (!costing_p)
+             if (costing_p)
                {
-                 if (mask)
-                   vec_mask = vec_masks[i];
-                 if (loop_masks)
-                   final_mask
-                     = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
-                                           vec_num, vectype, i);
-                 if (vec_mask)
-                   final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
-                                                  final_mask, vec_mask, gsi);
-
-                 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-                   dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
-                                                  gsi, stmt_info, bump);
+                 unsigned int cnunits = vect_nunits_for_cost (vectype);
+                 inside_cost
+                   = record_stmt_cost (cost_vec, cnunits, scalar_load,
+                                       slp_node, 0, vect_body);
+                 continue;
                }
+             if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+               vec_offset = vec_offsets[i];
+             tree zero = build_zero_cst (vectype);
+             tree scale = size_int (gs_info.scale);
 
-             /* 2. Create the vector-load in the loop.  */
-             unsigned HOST_WIDE_INT align;
-             if (gs_info.ifn != IFN_LAST)
+             if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
                {
-                 if (costing_p)
-                   {
-                     unsigned int cnunits = vect_nunits_for_cost (vectype);
-                     inside_cost
-                       = record_stmt_cost (cost_vec, cnunits, scalar_load,
-                                           slp_node, 0, vect_body);
-                     continue;
-                   }
-                 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-                   vec_offset = vec_offsets[i];
-                 tree zero = build_zero_cst (vectype);
-                 tree scale = size_int (gs_info.scale);
-
-                 if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
+                 if (loop_lens)
+                   final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
+                                                  vec_num, vectype, i, 1);
+                 else
+                   final_len = build_int_cst (sizetype,
+                                              TYPE_VECTOR_SUBPARTS (vectype));
+                 signed char biasval
+                   = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+                 bias = build_int_cst (intQI_type_node, biasval);
+                 if (!final_mask)
                    {
-                     if (loop_lens)
-                       final_len
-                         = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                              vec_num, vectype, i, 1);
-                     else
-                       final_len
-                         = build_int_cst (sizetype,
-                                          TYPE_VECTOR_SUBPARTS (vectype));
-                     signed char biasval
-                       = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
-                     bias = build_int_cst (intQI_type_node, biasval);
-                     if (!final_mask)
-                       {
-                         mask_vectype = truth_type_for (vectype);
-                         final_mask = build_minus_one_cst (mask_vectype);
-                       }
+                     mask_vectype = truth_type_for (vectype);
+                     final_mask = build_minus_one_cst (mask_vectype);
                    }
+               }
 
-                 if (final_mask)
-                   {
-                     vec_els = vect_get_mask_load_else
-                       (maskload_elsval, vectype);
-                     if (type_mode_padding_p
-                         && maskload_elsval != MASK_LOAD_ELSE_ZERO)
-                       need_zeroing = true;
-                   }
+             if (final_mask)
+               {
+                 vec_els = vect_get_mask_load_else (maskload_elsval, vectype);
+                 if (type_mode_padding_p
+                     && maskload_elsval != MASK_LOAD_ELSE_ZERO)
+                   need_zeroing = true;
+               }
 
-                 gcall *call;
-                 if (final_len && final_mask)
-                   {
-                     if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
-                       call = gimple_build_call_internal (
-                         IFN_MASK_LEN_GATHER_LOAD, 8, dataref_ptr, vec_offset,
-                         scale, zero, final_mask, vec_els, final_len, bias);
-                     else
-                       /* Non-vector offset indicates that prefer to take
-                          MASK_LEN_STRIDED_LOAD instead of the
-                          MASK_LEN_GATHER_LOAD with direct stride arg.  */
-                       call = gimple_build_call_internal (
-                         IFN_MASK_LEN_STRIDED_LOAD, 7, dataref_ptr, vec_offset,
-                         zero, final_mask, vec_els, final_len, bias);
-                   }
-                 else if (final_mask)
-                   call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD,
-                                                      6, dataref_ptr,
-                                                      vec_offset, scale,
-                                                      zero, final_mask,
-                                                      vec_els);
+             gcall *call;
+             if (final_len && final_mask)
+               {
+                 if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
+                   call = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD,
+                                                      8, dataref_ptr,
+                                                      vec_offset, scale, zero,
+                                                      final_mask, vec_els,
+                                                      final_len, bias);
                  else
-                   call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
-                                                      dataref_ptr, vec_offset,
-                                                      scale, zero);
-                 gimple_call_set_nothrow (call, true);
-                 new_stmt = call;
+                   /* Non-vector offset indicates that prefer to take
+                      MASK_LEN_STRIDED_LOAD instead of the
+                      MASK_LEN_GATHER_LOAD with direct stride arg.  */
+                   call = gimple_build_call_internal
+                            (IFN_MASK_LEN_STRIDED_LOAD, 7, dataref_ptr,
+                             vec_offset, zero, final_mask, vec_els, final_len,
+                             bias);
+               }
+             else if (final_mask)
+               call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD,
+                                                  6, dataref_ptr,
+                                                  vec_offset, scale,
+                                                  zero, final_mask, vec_els);
+             else
+               call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
+                                                  dataref_ptr, vec_offset,
+                                                  scale, zero);
+             gimple_call_set_nothrow (call, true);
+             new_stmt = call;
+             data_ref = NULL_TREE;
+           }
+         else if (gs_info.decl)
+           {
+             /* The builtin decls path for gather is legacy, x86 only.  */
+             gcc_assert (!final_len && nunits.is_constant ());
+             if (costing_p)
+               {
+                 unsigned int cnunits = vect_nunits_for_cost (vectype);
+                 inside_cost
+                   = record_stmt_cost (cost_vec, cnunits, scalar_load,
+                                       slp_node, 0, vect_body);
+                 continue;
+               }
+             poly_uint64 offset_nunits
+                 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
+             if (known_eq (nunits, offset_nunits))
+               {
+                 new_stmt = vect_build_one_gather_load_call
+                              (vinfo, stmt_info, gsi, &gs_info,
+                               dataref_ptr, vec_offsets[i], final_mask);
                  data_ref = NULL_TREE;
                }
-             else if (gs_info.decl)
+             else if (known_eq (nunits, offset_nunits * 2))
                {
-                 /* The builtin decls path for gather is legacy, x86 only.  */
-                 gcc_assert (!final_len && nunits.is_constant ());
-                 if (costing_p)
-                   {
-                     unsigned int cnunits = vect_nunits_for_cost (vectype);
-                     inside_cost
-                       = record_stmt_cost (cost_vec, cnunits, scalar_load,
-                                           slp_node, 0, vect_body);
-                     continue;
-                   }
-                 poly_uint64 offset_nunits
-                   = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
-                 if (known_eq (nunits, offset_nunits))
+                 /* We have a offset vector with half the number of
+                    lanes but the builtins will produce full vectype
+                    data with just the lower lanes filled.  */
+                 new_stmt = vect_build_one_gather_load_call
+                              (vinfo, stmt_info, gsi, &gs_info,
+                               dataref_ptr, vec_offsets[2 * i], final_mask);
+                 tree low = make_ssa_name (vectype);
+                 gimple_set_lhs (new_stmt, low);
+                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+
+                 /* now put upper half of final_mask in final_mask low. */
+                 if (final_mask
+                     && !SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE 
(final_mask))))
                    {
-                     new_stmt = vect_build_one_gather_load_call
-                                  (vinfo, stmt_info, gsi, &gs_info,
-                                   dataref_ptr, vec_offsets[i],
-                                   final_mask);
-                     data_ref = NULL_TREE;
+                     int count = nunits.to_constant ();
+                     vec_perm_builder sel (count, count, 1);
+                     sel.quick_grow (count);
+                     for (int i = 0; i < count; ++i)
+                       sel[i] = i | (count / 2);
+                     vec_perm_indices indices (sel, 2, count);
+                     tree perm_mask = vect_gen_perm_mask_checked
+                                        (TREE_TYPE (final_mask), indices);
+                     new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
+                                                     final_mask, final_mask,
+                                                     perm_mask);
+                     final_mask = make_ssa_name (TREE_TYPE (final_mask));
+                     gimple_set_lhs (new_stmt, final_mask);
+                     vect_finish_stmt_generation (vinfo, stmt_info,
+                                                  new_stmt, gsi);
                    }
-                 else if (known_eq (nunits, offset_nunits * 2))
+                 else if (final_mask)
                    {
-                     /* We have a offset vector with half the number of
-                        lanes but the builtins will produce full vectype
-                        data with just the lower lanes filled.  */
-                     new_stmt = vect_build_one_gather_load_call
-                         (vinfo, stmt_info, gsi, &gs_info,
-                          dataref_ptr, vec_offsets[2 * i],
-                          final_mask);
-                     tree low = make_ssa_name (vectype);
-                     gimple_set_lhs (new_stmt, low);
+                     new_stmt = gimple_build_assign (NULL_TREE,
+                                                     VEC_UNPACK_HI_EXPR,
+                                                     final_mask);
+                     final_mask = make_ssa_name
+                                   (truth_type_for (gs_info.offset_vectype));
+                     gimple_set_lhs (new_stmt, final_mask);
                      vect_finish_stmt_generation (vinfo, stmt_info,
                                                   new_stmt, gsi);
+                   }
 
-                     /* now put upper half of final_mask in final_mask low. */
-                     if (final_mask
-                         && !SCALAR_INT_MODE_P
-                               (TYPE_MODE (TREE_TYPE (final_mask))))
-                       {
-                         int count = nunits.to_constant ();
-                         vec_perm_builder sel (count, count, 1);
-                         sel.quick_grow (count);
-                         for (int i = 0; i < count; ++i)
-                           sel[i] = i | (count / 2);
-                         vec_perm_indices indices (sel, 2, count);
-                         tree perm_mask = vect_gen_perm_mask_checked
-                                            (TREE_TYPE (final_mask), indices);
-                         new_stmt = gimple_build_assign (NULL_TREE,
-                                                         VEC_PERM_EXPR,
-                                                         final_mask,
-                                                         final_mask,
-                                                         perm_mask);
-                         final_mask = make_ssa_name (TREE_TYPE (final_mask));
-                         gimple_set_lhs (new_stmt, final_mask);
-                         vect_finish_stmt_generation (vinfo, stmt_info,
-                                                      new_stmt, gsi);
-                       }
-                     else if (final_mask)
-                       {
-                         new_stmt = gimple_build_assign (NULL_TREE,
-                                                         VEC_UNPACK_HI_EXPR,
-                                                         final_mask);
-                         final_mask = make_ssa_name
-                           (truth_type_for (gs_info.offset_vectype));
-                         gimple_set_lhs (new_stmt, final_mask);
-                         vect_finish_stmt_generation (vinfo, stmt_info,
-                                                      new_stmt, gsi);
-                       }
-
-                     new_stmt = vect_build_one_gather_load_call
-                                  (vinfo, stmt_info, gsi, &gs_info,
-                                   dataref_ptr,
-                                   vec_offsets[2 * i + 1],
-                                   final_mask);
-                     tree high = make_ssa_name (vectype);
-                     gimple_set_lhs (new_stmt, high);
-                     vect_finish_stmt_generation (vinfo, stmt_info,
-                                                  new_stmt, gsi);
+                 new_stmt = vect_build_one_gather_load_call
+                              (vinfo, stmt_info, gsi, &gs_info, dataref_ptr,
+                               vec_offsets[2 * i + 1], final_mask);
+                 tree high = make_ssa_name (vectype);
+                 gimple_set_lhs (new_stmt, high);
+                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
 
-                     /* compose low + high.  */
-                     int count = nunits.to_constant ();
+                 /* compose low + high.  */
+                 int count = nunits.to_constant ();
+                 vec_perm_builder sel (count, count, 1);
+                 sel.quick_grow (count);
+                 for (int i = 0; i < count; ++i)
+                   sel[i] = i < count / 2 ? i : i + count / 2;
+                 vec_perm_indices indices (sel, 2, count);
+                 tree perm_mask
+                   = vect_gen_perm_mask_checked (vectype, indices);
+                 new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
+                                                 low, high, perm_mask);
+                 data_ref = NULL_TREE;
+               }
+             else if (known_eq (nunits * 2, offset_nunits))
+               {
+                 /* We have a offset vector with double the number of
+                    lanes.  Select the low/high part accordingly.  */
+                 vec_offset = vec_offsets[i / 2];
+                 if (i & 1)
+                   {
+                     int count = offset_nunits.to_constant ();
                      vec_perm_builder sel (count, count, 1);
                      sel.quick_grow (count);
                      for (int i = 0; i < count; ++i)
-                       sel[i] = i < count / 2 ? i : i + count / 2;
+                       sel[i] = i | (count / 2);
                      vec_perm_indices indices (sel, 2, count);
-                     tree perm_mask
-                       = vect_gen_perm_mask_checked (vectype, indices);
-                     new_stmt = gimple_build_assign (NULL_TREE,
-                                                     VEC_PERM_EXPR,
-                                                     low, high, perm_mask);
-                     data_ref = NULL_TREE;
-                   }
-                 else if (known_eq (nunits * 2, offset_nunits))
-                   {
-                     /* We have a offset vector with double the number of
-                        lanes.  Select the low/high part accordingly.  */
-                     vec_offset = vec_offsets[i / 2];
-                     if (i & 1)
-                       {
-                         int count = offset_nunits.to_constant ();
-                         vec_perm_builder sel (count, count, 1);
-                         sel.quick_grow (count);
-                         for (int i = 0; i < count; ++i)
-                           sel[i] = i | (count / 2);
-                         vec_perm_indices indices (sel, 2, count);
-                         tree perm_mask = vect_gen_perm_mask_checked
-                                            (TREE_TYPE (vec_offset), indices);
-                         new_stmt = gimple_build_assign (NULL_TREE,
-                                                         VEC_PERM_EXPR,
-                                                         vec_offset,
-                                                         vec_offset,
-                                                         perm_mask);
-                         vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
-                         gimple_set_lhs (new_stmt, vec_offset);
-                         vect_finish_stmt_generation (vinfo, stmt_info,
-                                                      new_stmt, gsi);
-                       }
-                     new_stmt = vect_build_one_gather_load_call
-                                  (vinfo, stmt_info, gsi, &gs_info,
-                                   dataref_ptr, vec_offset, final_mask);
-                     data_ref = NULL_TREE;
+                     tree perm_mask = vect_gen_perm_mask_checked
+                                        (TREE_TYPE (vec_offset), indices);
+                     new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
+                                                     vec_offset, vec_offset,
+                                                     perm_mask);
+                     vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
+                     gimple_set_lhs (new_stmt, vec_offset);
+                     vect_finish_stmt_generation (vinfo, stmt_info,
+                                                  new_stmt, gsi);
                    }
-                 else
-                   gcc_unreachable ();
+                 new_stmt = vect_build_one_gather_load_call
+                              (vinfo, stmt_info, gsi, &gs_info,
+                               dataref_ptr, vec_offset, final_mask);
+                 data_ref = NULL_TREE;
                }
              else
+               gcc_unreachable ();
+           }
+         else
+           {
+             /* Emulated gather-scatter.  */
+             gcc_assert (!final_mask);
+             unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
+             if (costing_p)
                {
-                 /* Emulated gather-scatter.  */
-                 gcc_assert (!final_mask);
-                 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
-                 if (costing_p)
-                   {
-                     /* For emulated gathers N offset vector element
-                        offset add is consumed by the load).  */
-                     inside_cost = record_stmt_cost (cost_vec, const_nunits,
-                                                     vec_to_scalar,
-                                                     slp_node, 0, vect_body);
-                     /* N scalar loads plus gathering them into a
-                        vector.  */
-                     inside_cost
-                       = record_stmt_cost (cost_vec, const_nunits, scalar_load,
-                                           slp_node, 0, vect_body);
-                     inside_cost
-                       = record_stmt_cost (cost_vec, 1, vec_construct,
-                                           slp_node, 0, vect_body);
-                     continue;
-                   }
-                 unsigned HOST_WIDE_INT const_offset_nunits
-                   = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
-                       .to_constant ();
-                 vec<constructor_elt, va_gc> *ctor_elts;
-                 vec_alloc (ctor_elts, const_nunits);
-                 gimple_seq stmts = NULL;
-                 /* We support offset vectors with more elements
-                    than the data vector for now.  */
-                 unsigned HOST_WIDE_INT factor
-                   = const_offset_nunits / const_nunits;
-                 vec_offset = vec_offsets[i / factor];
-                 unsigned elt_offset = (i % factor) * const_nunits;
-                 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
-                 tree scale = size_int (gs_info.scale);
-                 align = get_object_alignment (DR_REF (first_dr_info->dr));
-                 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
-                 for (unsigned k = 0; k < const_nunits; ++k)
-                   {
-                     tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
-                                             bitsize_int (k + elt_offset));
-                     tree idx
-                       = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
-                                       vec_offset, TYPE_SIZE (idx_type), boff);
-                     idx = gimple_convert (&stmts, sizetype, idx);
-                     idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx,
-                                         scale);
-                     tree ptr = gimple_build (&stmts, PLUS_EXPR,
-                                              TREE_TYPE (dataref_ptr),
-                                              dataref_ptr, idx);
-                     ptr = gimple_convert (&stmts, ptr_type_node, ptr);
-                     tree elt = make_ssa_name (TREE_TYPE (vectype));
-                     tree ref = build2 (MEM_REF, ltype, ptr,
-                                        build_int_cst (ref_type, 0));
-                     new_stmt = gimple_build_assign (elt, ref);
-                     gimple_set_vuse (new_stmt, gimple_vuse (gsi_stmt (*gsi)));
-                     gimple_seq_add_stmt (&stmts, new_stmt);
-                     CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
-                   }
-                 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
-                 new_stmt = gimple_build_assign (
-                   NULL_TREE, build_constructor (vectype, ctor_elts));
-                 data_ref = NULL_TREE;
+                 /* For emulated gathers N offset vector element
+                    offset add is consumed by the load).  */
+                 inside_cost = record_stmt_cost (cost_vec, const_nunits,
+                                                 vec_to_scalar,
+                                                 slp_node, 0, vect_body);
+                 /* N scalar loads plus gathering them into a
+                    vector.  */
+                 inside_cost
+                   = record_stmt_cost (cost_vec, const_nunits, scalar_load,
+                                       slp_node, 0, vect_body);
+                 inside_cost
+                   = record_stmt_cost (cost_vec, 1, vec_construct,
+                                       slp_node, 0, vect_body);
+                 continue;
                }
-
-             vec_dest = vect_create_destination_var (scalar_dest, vectype);
-             /* DATA_REF is null if we've already built the statement.  */
-             if (data_ref)
+             unsigned HOST_WIDE_INT const_offset_nunits
+               = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype) .to_constant ();
+             vec<constructor_elt, va_gc> *ctor_elts;
+             vec_alloc (ctor_elts, const_nunits);
+             gimple_seq stmts = NULL;
+             /* We support offset vectors with more elements
+                than the data vector for now.  */
+             unsigned HOST_WIDE_INT factor
+               = const_offset_nunits / const_nunits;
+             vec_offset = vec_offsets[i / factor];
+             unsigned elt_offset = (i % factor) * const_nunits;
+             tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
+             tree scale = size_int (gs_info.scale);
+             align = get_object_alignment (DR_REF (first_dr_info->dr));
+             tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
+             for (unsigned k = 0; k < const_nunits; ++k)
                {
-                 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
-                 new_stmt = gimple_build_assign (vec_dest, data_ref);
+                 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
+                                         bitsize_int (k + elt_offset));
+                 tree idx = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
+                                          vec_offset, TYPE_SIZE (idx_type),
+                                          boff);
+                 idx = gimple_convert (&stmts, sizetype, idx);
+                 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx, scale);
+                 tree ptr = gimple_build (&stmts, PLUS_EXPR,
+                                          TREE_TYPE (dataref_ptr),
+                                          dataref_ptr, idx);
+                 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
+                 tree elt = make_ssa_name (TREE_TYPE (vectype));
+                 tree ref = build2 (MEM_REF, ltype, ptr,
+                                    build_int_cst (ref_type, 0));
+                 new_stmt = gimple_build_assign (elt, ref);
+                 gimple_set_vuse (new_stmt, gimple_vuse (gsi_stmt (*gsi)));
+                 gimple_seq_add_stmt (&stmts, new_stmt);
+                 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
                }
-             new_temp = need_zeroing
-               ? make_ssa_name (vectype)
-               : make_ssa_name (vec_dest, new_stmt);
-             gimple_set_lhs (new_stmt, new_temp);
-             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+             gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+             new_stmt = gimple_build_assign (NULL_TREE,
+                                             build_constructor (vectype,
+                                                                ctor_elts));
+             data_ref = NULL_TREE;
+           }
 
-             /* If we need to explicitly zero inactive elements emit a
-                VEC_COND_EXPR that does so.  */
-             if (need_zeroing)
-               {
-                 vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
-                                                    vectype);
+         vec_dest = vect_create_destination_var (scalar_dest, vectype);
+         /* DATA_REF is null if we've already built the statement.  */
+         if (data_ref)
+           {
+             vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
+             new_stmt = gimple_build_assign (vec_dest, data_ref);
+           }
+         new_temp = (need_zeroing
+                     ? make_ssa_name (vectype)
+                     : make_ssa_name (vec_dest, new_stmt));
+         gimple_set_lhs (new_stmt, new_temp);
+         vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
 
-                 tree new_temp2 = make_ssa_name (vec_dest, new_stmt);
-                 new_stmt
-                   = gimple_build_assign (new_temp2, VEC_COND_EXPR,
-                                          final_mask, new_temp, vec_els);
-                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
-                                              gsi);
-                 new_temp = new_temp2;
-               }
+         /* If we need to explicitly zero inactive elements emit a
+            VEC_COND_EXPR that does so.  */
+         if (need_zeroing)
+           {
+             vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
+                                                vectype);
 
-             /* Store vector loads in the corresponding SLP_NODE.  */
-             slp_node->push_vec_def (new_stmt);
+             tree new_temp2 = make_ssa_name (vec_dest, new_stmt);
+             new_stmt = gimple_build_assign (new_temp2, VEC_COND_EXPR,
+                                             final_mask, new_temp, vec_els);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+             new_temp = new_temp2;
            }
+
+         /* Store vector loads in the corresponding SLP_NODE.  */
+         slp_node->push_vec_def (new_stmt);
        }
 
       if (costing_p && dump_enabled_p ())
@@ -11361,270 +11343,293 @@ vectorizable_load (vec_info *vinfo,
   /* For costing some adjacent vector loads, we'd like to cost with
      the total number of them once instead of cost each one by one. */
   unsigned int n_adjacent_loads = 0;
+
+  /* 1. Create the vector or array pointer update chain.  */
+  if (!costing_p)
     {
-      /* 1. Create the vector or array pointer update chain.  */
-      if (!costing_p)
-       {
-         bool simd_lane_access_p
-           = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
-         if (simd_lane_access_p
-             && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
-             && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
-             && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
-             && integer_zerop (DR_INIT (first_dr_info->dr))
-             && alias_sets_conflict_p (get_alias_set (aggr_type),
-                                       get_alias_set (TREE_TYPE (ref_type)))
-             && (alignment_support_scheme == dr_aligned
-                 || alignment_support_scheme == dr_unaligned_supported))
-           {
-             dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
-             dataref_offset = build_int_cst (ref_type, 0);
-           }
-         else if (diff_first_stmt_info)
-           {
-             dataref_ptr
-               = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
-                                           aggr_type, at_loop, offset, &dummy,
-                                           gsi, &ptr_incr, simd_lane_access_p,
-                                           bump);
-             /* Adjust the pointer by the difference to first_stmt.  */
-             data_reference_p ptrdr
-               = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
-             tree diff
-               = fold_convert (sizetype,
-                               size_binop (MINUS_EXPR,
-                                           DR_INIT (first_dr_info->dr),
-                                           DR_INIT (ptrdr)));
-             dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
-                                            stmt_info, diff);
-             if (alignment_support_scheme == dr_explicit_realign)
-               {
-                 msq = vect_setup_realignment (vinfo,
-                                               first_stmt_info_for_drptr, gsi,
-                                               &realignment_token,
-                                               alignment_support_scheme,
-                                               dataref_ptr, &at_loop);
-                 gcc_assert (!compute_in_loop);
-               }
+      bool simd_lane_access_p
+         = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
+      if (simd_lane_access_p
+         && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
+         && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
+         && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
+         && integer_zerop (DR_INIT (first_dr_info->dr))
+         && alias_sets_conflict_p (get_alias_set (aggr_type),
+                                   get_alias_set (TREE_TYPE (ref_type)))
+         && (alignment_support_scheme == dr_aligned
+             || alignment_support_scheme == dr_unaligned_supported))
+       {
+         dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
+         dataref_offset = build_int_cst (ref_type, 0);
+       }
+      else if (diff_first_stmt_info)
+       {
+         dataref_ptr
+           = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
+                                       aggr_type, at_loop, offset, &dummy,
+                                       gsi, &ptr_incr, simd_lane_access_p,
+                                       bump);
+         /* Adjust the pointer by the difference to first_stmt.  */
+         data_reference_p ptrdr
+           = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
+         tree diff = fold_convert (sizetype,
+                                   size_binop (MINUS_EXPR,
+                                               DR_INIT (first_dr_info->dr),
+                                               DR_INIT (ptrdr)));
+         dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
+                                        stmt_info, diff);
+         if (alignment_support_scheme == dr_explicit_realign)
+           {
+             msq = vect_setup_realignment (vinfo,
+                                           first_stmt_info_for_drptr, gsi,
+                                           &realignment_token,
+                                           alignment_support_scheme,
+                                           dataref_ptr, &at_loop);
+             gcc_assert (!compute_in_loop);
            }
-         else
-           dataref_ptr
-             = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
-                                         at_loop,
-                                         offset, &dummy, gsi, &ptr_incr,
-                                         simd_lane_access_p, bump);
-       }
-      else if (!costing_p)
-       {
-         gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
-         if (dataref_offset)
-           dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
-                                             bump);
-         else
-           dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
-                                          stmt_info, bump);
        }
+      else
+       dataref_ptr
+         = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
+                                     at_loop,
+                                     offset, &dummy, gsi, &ptr_incr,
+                                     simd_lane_access_p, bump);
+    }
+  else if (!costing_p)
+    {
+      gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
+      if (dataref_offset)
+       dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
+      else
+       dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
+                                      stmt_info, bump);
+    }
 
-      if (grouped_load || slp_perm)
-       dr_chain.create (vec_num);
+  if (grouped_load || slp_perm)
+    dr_chain.create (vec_num);
 
-      gimple *new_stmt = NULL;
-      for (i = 0; i < vec_num; i++)
+  gimple *new_stmt = NULL;
+  for (i = 0; i < vec_num; i++)
+    {
+      tree final_mask = NULL_TREE;
+      tree final_len = NULL_TREE;
+      tree bias = NULL_TREE;
+
+      if (!costing_p)
        {
-         tree final_mask = NULL_TREE;
-         tree final_len = NULL_TREE;
-         tree bias = NULL_TREE;
+         if (mask)
+           vec_mask = vec_masks[i];
+         if (loop_masks)
+           final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
+                                            vec_num, vectype, i);
+         if (vec_mask)
+           final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
+                                          final_mask, vec_mask, gsi);
 
-         if (!costing_p)
-           {
-             if (mask)
-               vec_mask = vec_masks[i];
-             if (loop_masks)
-               final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
-                                                vec_num, vectype, i);
-             if (vec_mask)
-               final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
-                                              final_mask, vec_mask, gsi);
+         if (i > 0)
+           dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
+                                          gsi, stmt_info, bump);
+       }
 
-             if (i > 0)
-               dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
-                                              gsi, stmt_info, bump);
-           }
+      /* 2. Create the vector-load in the loop.  */
+      switch (alignment_support_scheme)
+       {
+       case dr_aligned:
+       case dr_unaligned_supported:
+         {
+           if (costing_p)
+             break;
 
-         /* 2. Create the vector-load in the loop.  */
-         switch (alignment_support_scheme)
-           {
-           case dr_aligned:
-           case dr_unaligned_supported:
+           unsigned int misalign;
+           unsigned HOST_WIDE_INT align;
+           align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+           if (alignment_support_scheme == dr_aligned)
+             misalign = 0;
+           else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
              {
-               if (costing_p)
-                 break;
-
-               unsigned int misalign;
-               unsigned HOST_WIDE_INT align;
-               align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
-               if (alignment_support_scheme == dr_aligned)
-                 misalign = 0;
-               else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
-                 {
-                   align
-                     = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
-                   misalign = 0;
-                 }
-               else
-                 misalign = misalignment;
-               if (dataref_offset == NULL_TREE
-                   && TREE_CODE (dataref_ptr) == SSA_NAME)
-                 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
-                                         misalign);
-               align = least_bit_hwi (misalign | align);
-
-               /* Compute IFN when LOOP_LENS or final_mask valid.  */
-               machine_mode vmode = TYPE_MODE (vectype);
-               machine_mode new_vmode = vmode;
-               internal_fn partial_ifn = IFN_LAST;
-               if (loop_lens)
-                 {
-                   opt_machine_mode new_ovmode
-                     = get_len_load_store_mode (vmode, true, &partial_ifn);
-                   new_vmode = new_ovmode.require ();
-                   unsigned factor
-                     = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
-                   final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                                  vec_num, vectype, i, factor);
-                 }
-               else if (final_mask)
-                 {
-                   if (!can_vec_mask_load_store_p (
-                         vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
-                         &partial_ifn))
-                     gcc_unreachable ();
-                 }
+               align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+               misalign = 0;
+             }
+           else
+             misalign = misalignment;
+           if (dataref_offset == NULL_TREE
+               && TREE_CODE (dataref_ptr) == SSA_NAME)
+             set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
+                                     misalign);
+           align = least_bit_hwi (misalign | align);
+
+           /* Compute IFN when LOOP_LENS or final_mask valid.  */
+           machine_mode vmode = TYPE_MODE (vectype);
+           machine_mode new_vmode = vmode;
+           internal_fn partial_ifn = IFN_LAST;
+           if (loop_lens)
+             {
+               opt_machine_mode new_ovmode
+                 = get_len_load_store_mode (vmode, true, &partial_ifn);
+               new_vmode = new_ovmode.require ();
+               unsigned factor
+                 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
+               final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
+                                              vec_num, vectype, i, factor);
+             }
+           else if (final_mask)
+             {
+               if (!can_vec_mask_load_store_p (vmode,
+                                               TYPE_MODE
+                                                 (TREE_TYPE (final_mask)),
+                                               true, &partial_ifn))
+                 gcc_unreachable ();
+             }
 
-               if (partial_ifn == IFN_MASK_LEN_LOAD)
+           if (partial_ifn == IFN_MASK_LEN_LOAD)
+             {
+               if (!final_len)
                  {
-                   if (!final_len)
-                     {
-                       /* Pass VF value to 'len' argument of
-                          MASK_LEN_LOAD if LOOP_LENS is invalid.  */
-                       final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
-                     }
-                   if (!final_mask)
-                     {
-                       /* Pass all ones value to 'mask' argument of
-                          MASK_LEN_LOAD if final_mask is invalid.  */
-                       mask_vectype = truth_type_for (vectype);
-                       final_mask = build_minus_one_cst (mask_vectype);
-                     }
+                   /* Pass VF value to 'len' argument of
+                      MASK_LEN_LOAD if LOOP_LENS is invalid.  */
+                   final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
                  }
-               if (final_len)
+               if (!final_mask)
                  {
-                   signed char biasval
-                     = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
-
-                   bias = build_int_cst (intQI_type_node, biasval);
+                   /* Pass all ones value to 'mask' argument of
+                      MASK_LEN_LOAD if final_mask is invalid.  */
+                   mask_vectype = truth_type_for (vectype);
+                   final_mask = build_minus_one_cst (mask_vectype);
                  }
+             }
+           if (final_len)
+             {
+               signed char biasval
+                 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+               bias = build_int_cst (intQI_type_node, biasval);
+             }
 
-               tree vec_els;
+           tree vec_els;
 
-               if (final_len)
-                 {
-                   tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
-                   gcall *call;
-                   if (partial_ifn == IFN_MASK_LEN_LOAD)
-                     {
-                       vec_els = vect_get_mask_load_else
-                         (maskload_elsval, vectype);
-                       if (type_mode_padding_p
-                           && maskload_elsval != MASK_LOAD_ELSE_ZERO)
-                         need_zeroing = true;
-                       call = gimple_build_call_internal (IFN_MASK_LEN_LOAD,
-                                                          6, dataref_ptr, ptr,
-                                                          final_mask, vec_els,
-                                                          final_len, bias);
-                     }
-                   else
-                     call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
-                                                        dataref_ptr, ptr,
-                                                        final_len, bias);
-                   gimple_call_set_nothrow (call, true);
-                   new_stmt = call;
-                   data_ref = NULL_TREE;
-
-                   /* Need conversion if it's wrapped with VnQI.  */
-                   if (vmode != new_vmode)
-                     {
-                       tree new_vtype = build_vector_type_for_mode (
-                         unsigned_intQI_type_node, new_vmode);
-                       tree var
-                         = vect_get_new_ssa_name (new_vtype, vect_simple_var);
-                       gimple_set_lhs (call, var);
-                       vect_finish_stmt_generation (vinfo, stmt_info, call,
-                                                    gsi);
-                       tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
-                       new_stmt = gimple_build_assign (vec_dest,
-                                                       VIEW_CONVERT_EXPR, op);
-                     }
-                 }
-               else if (final_mask)
+           if (final_len)
+             {
+               tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
+               gcall *call;
+               if (partial_ifn == IFN_MASK_LEN_LOAD)
                  {
-                   tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
-                   vec_els = vect_get_mask_load_else
-                     (maskload_elsval, vectype);
+                   vec_els = vect_get_mask_load_else (maskload_elsval,
+                                                      vectype);
                    if (type_mode_padding_p
                        && maskload_elsval != MASK_LOAD_ELSE_ZERO)
                      need_zeroing = true;
-                   gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 4,
-                                                             dataref_ptr, ptr,
-                                                             final_mask,
-                                                             vec_els);
-                   gimple_call_set_nothrow (call, true);
-                   new_stmt = call;
-                   data_ref = NULL_TREE;
+                   call = gimple_build_call_internal (IFN_MASK_LEN_LOAD,
+                                                      6, dataref_ptr, ptr,
+                                                      final_mask, vec_els,
+                                                      final_len, bias);
                  }
                else
+                 call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
+                                                    dataref_ptr, ptr,
+                                                    final_len, bias);
+               gimple_call_set_nothrow (call, true);
+               new_stmt = call;
+               data_ref = NULL_TREE;
+
+               /* Need conversion if it's wrapped with VnQI.  */
+               if (vmode != new_vmode)
                  {
-                   tree ltype = vectype;
-                   tree new_vtype = NULL_TREE;
-                   unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
-                   unsigned HOST_WIDE_INT dr_size
-                     = vect_get_scalar_dr_size (first_dr_info);
-                   poly_int64 off = 0;
-                   if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
-                     off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
-                   unsigned int vect_align
-                     = vect_known_alignment_in_bytes (first_dr_info, vectype,
-                                                      off);
-                   /* Try to use a single smaller load when we are about
-                      to load excess elements compared to the unrolled
-                      scalar loop.  */
-                   if (known_gt ((i + 1) * nunits,
-                                      (group_size * vf - gap)))
+                   tree new_vtype
+                     = build_vector_type_for_mode (unsigned_intQI_type_node,
+                                                   new_vmode);
+                   tree var = vect_get_new_ssa_name (new_vtype,
+                                                     vect_simple_var);
+                   gimple_set_lhs (call, var);
+                   vect_finish_stmt_generation (vinfo, stmt_info, call,
+                                                gsi);
+                   tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
+                   new_stmt = gimple_build_assign (vec_dest,
+                                                   VIEW_CONVERT_EXPR, op);
+                 }
+             }
+           else if (final_mask)
+             {
+               tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
+               vec_els = vect_get_mask_load_else (maskload_elsval, vectype);
+               if (type_mode_padding_p
+                   && maskload_elsval != MASK_LOAD_ELSE_ZERO)
+                 need_zeroing = true;
+               gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 4,
+                                                         dataref_ptr, ptr,
+                                                         final_mask,
+                                                         vec_els);
+               gimple_call_set_nothrow (call, true);
+               new_stmt = call;
+               data_ref = NULL_TREE;
+             }
+           else
+             {
+               tree ltype = vectype;
+               tree new_vtype = NULL_TREE;
+               unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
+               unsigned HOST_WIDE_INT dr_size
+                 = vect_get_scalar_dr_size (first_dr_info);
+               poly_int64 off = 0;
+               if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+                 off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
+               unsigned int vect_align
+                 = vect_known_alignment_in_bytes (first_dr_info, vectype,
+                                                  off);
+               /* Try to use a single smaller load when we are about
+                  to load excess elements compared to the unrolled
+                  scalar loop.  */
+               if (known_gt ((i + 1) * nunits,
+                             (group_size * vf - gap)))
+                 {
+                   poly_uint64 remain = ((group_size * vf - gap) - i * nunits);
+                   if (known_ge ((i + 1) * nunits - (group_size * vf - gap),
+                                 nunits))
+                     /* DR will be unused.  */
+                     ltype = NULL_TREE;
+                   else if (known_ge (vect_align,
+                                      tree_to_poly_uint64
+                                        (TYPE_SIZE_UNIT (vectype))))
+                     /* Aligned access to excess elements is OK if
+                        at least one element is accessed in the
+                        scalar loop.  */
+                     ;
+                   else if (known_gt (vect_align,
+                                      ((nunits - remain) * dr_size)))
+                     /* Aligned access to the gap area when there's
+                        at least one element in it is OK.  */
+                     ;
+                   else
                      {
-                       poly_uint64 remain = ((group_size * vf - gap)
-                                             - i * nunits);
-                       if (known_ge ((i + 1) * nunits
-                                     - (group_size * vf - gap), nunits))
-                         /* DR will be unused.  */
-                         ltype = NULL_TREE;
-                       else if (known_ge (vect_align,
-                                          tree_to_poly_uint64
-                                            (TYPE_SIZE_UNIT (vectype))))
-                         /* Aligned access to excess elements is OK if
-                            at least one element is accessed in the
-                            scalar loop.  */
-                         ;
-                       else if (known_gt (vect_align,
-                                          ((nunits - remain) * dr_size)))
-                         /* Aligned access to the gap area when there's
-                            at least one element in it is OK.  */
-                         ;
-                       else
+                       /* remain should now be > 0 and < nunits.  */
+                       unsigned num;
+                       if (known_ne (remain, 0u)
+                           && constant_multiple_p (nunits, remain, &num))
+                         {
+                           tree ptype;
+                           new_vtype
+                             = vector_vector_composition_type (vectype, num,
+                                                               &ptype);
+                           if (new_vtype)
+                             ltype = ptype;
+                         }
+                       /* Else use multiple loads or a masked load?  */
+                       /* For loop vectorization we now should have
+                          an alternate type or LOOP_VINFO_PEELING_FOR_GAPS
+                          set.  */
+                       if (loop_vinfo)
+                         gcc_assert (new_vtype
+                                     || LOOP_VINFO_PEELING_FOR_GAPS
+                                          (loop_vinfo));
+                       /* But still reduce the access size to the next
+                          required power-of-two so peeling a single
+                          scalar iteration is sufficient.  */
+                       unsigned HOST_WIDE_INT cremain;
+                       if (remain.is_constant (&cremain))
                          {
-                           /* remain should now be > 0 and < nunits.  */
-                           unsigned num;
-                           if (known_ne (remain, 0u)
-                               && constant_multiple_p (nunits, remain, &num))
+                           unsigned HOST_WIDE_INT cpart_size
+                             = 1 << ceil_log2 (cremain);
+                           if (known_gt (nunits, cpart_size)
+                               && constant_multiple_p (nunits, cpart_size,
+                                                       &num))
                              {
                                tree ptype;
                                new_vtype
@@ -11634,333 +11639,289 @@ vectorizable_load (vec_info *vinfo,
                                if (new_vtype)
                                  ltype = ptype;
                              }
-                           /* Else use multiple loads or a masked load?  */
-                           /* For loop vectorization we now should have
-                              an alternate type or LOOP_VINFO_PEELING_FOR_GAPS
-                              set.  */
-                           if (loop_vinfo)
-                             gcc_assert (new_vtype
-                                         || LOOP_VINFO_PEELING_FOR_GAPS
-                                              (loop_vinfo));
-                           /* But still reduce the access size to the next
-                              required power-of-two so peeling a single
-                              scalar iteration is sufficient.  */
-                           unsigned HOST_WIDE_INT cremain;
-                           if (remain.is_constant (&cremain))
-                             {
-                               unsigned HOST_WIDE_INT cpart_size
-                                 = 1 << ceil_log2 (cremain);
-                               if (known_gt (nunits, cpart_size)
-                                   && constant_multiple_p (nunits, cpart_size,
-                                                           &num))
-                                 {
-                                   tree ptype;
-                                   new_vtype
-                                     = vector_vector_composition_type (vectype,
-                                                                       num,
-                                                                       &ptype);
-                                   if (new_vtype)
-                                     ltype = ptype;
-                                 }
-                             }
                          }
                      }
-                   tree offset
-                     = (dataref_offset ? dataref_offset
-                                       : build_int_cst (ref_type, 0));
-                   if (!ltype)
+                 }
+               tree offset = (dataref_offset ? dataref_offset
+                              : build_int_cst (ref_type, 0));
+               if (!ltype)
+                 ;
+               else if (ltype != vectype
+                        && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+                 {
+                   poly_uint64 gap_offset
+                     = (tree_to_poly_uint64 (TYPE_SIZE_UNIT (vectype))
+                        - tree_to_poly_uint64 (TYPE_SIZE_UNIT (ltype)));
+                   tree gapcst = build_int_cstu (ref_type, gap_offset);
+                   offset = size_binop (PLUS_EXPR, offset, gapcst);
+                 }
+               if (ltype)
+                 {
+                   data_ref = fold_build2 (MEM_REF, ltype,
+                                           dataref_ptr, offset);
+                   if (alignment_support_scheme == dr_aligned)
                      ;
-                   else if (ltype != vectype
-                            && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+                   else
+                     TREE_TYPE (data_ref)
+                       = build_aligned_type (TREE_TYPE (data_ref),
+                                             align * BITS_PER_UNIT);
+                 }
+               if (!ltype)
+                 data_ref = build_constructor (vectype, NULL);
+               else if (ltype != vectype)
+                 {
+                   vect_copy_ref_info (data_ref,
+                                       DR_REF (first_dr_info->dr));
+                   tree tem = make_ssa_name (ltype);
+                   new_stmt = gimple_build_assign (tem, data_ref);
+                   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
+                                                gsi);
+                   data_ref = NULL;
+                   vec<constructor_elt, va_gc> *v;
+                   /* We've computed 'num' above to statically two
+                      or via constant_multiple_p.  */
+                   unsigned num
+                     = (exact_div (tree_to_poly_uint64
+                                     (TYPE_SIZE_UNIT (vectype)),
+                                   tree_to_poly_uint64
+                                     (TYPE_SIZE_UNIT (ltype)))
+                        .to_constant ());
+                   vec_alloc (v, num);
+                   if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
                      {
-                       poly_uint64 gap_offset
-                         = (tree_to_poly_uint64 (TYPE_SIZE_UNIT (vectype))
-                            - tree_to_poly_uint64 (TYPE_SIZE_UNIT (ltype)));
-                       tree gapcst = build_int_cstu (ref_type, gap_offset);
-                       offset = size_binop (PLUS_EXPR, offset, gapcst);
+                       while (--num)
+                         CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+                                                 build_zero_cst (ltype));
+                       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
                      }
-                   if (ltype)
+                   else
                      {
-                       data_ref
-                         = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
-                       if (alignment_support_scheme == dr_aligned)
-                         ;
-                       else
-                         TREE_TYPE (data_ref)
-                           = build_aligned_type (TREE_TYPE (data_ref),
-                                                 align * BITS_PER_UNIT);
+                       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
+                       while (--num)
+                         CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+                                                 build_zero_cst (ltype));
                      }
-                   if (!ltype)
-                     data_ref = build_constructor (vectype, NULL);
-                   else if (ltype != vectype)
+                   gcc_assert (new_vtype != NULL_TREE);
+                   if (new_vtype == vectype)
+                     new_stmt
+                       = gimple_build_assign (vec_dest,
+                                              build_constructor (vectype, v));
+                   else
                      {
-                       vect_copy_ref_info (data_ref,
-                                           DR_REF (first_dr_info->dr));
-                       tree tem = make_ssa_name (ltype);
-                       new_stmt = gimple_build_assign (tem, data_ref);
-                       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
-                                                    gsi);
-                       data_ref = NULL;
-                       vec<constructor_elt, va_gc> *v;
-                       /* We've computed 'num' above to statically two
-                          or via constant_multiple_p.  */
-                       unsigned num
-                         = (exact_div (tree_to_poly_uint64
-                                         (TYPE_SIZE_UNIT (vectype)),
-                                       tree_to_poly_uint64
-                                         (TYPE_SIZE_UNIT (ltype)))
-                            .to_constant ());
-                       vec_alloc (v, num);
-                       if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
-                         {
-                           while (--num)
-                             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
-                                                     build_zero_cst (ltype));
-                           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
-                         }
-                       else
-                         {
-                           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
-                           while (--num)
-                             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
-                                                     build_zero_cst (ltype));
-                         }
-                       gcc_assert (new_vtype != NULL_TREE);
-                       if (new_vtype == vectype)
-                         new_stmt = gimple_build_assign (
-                           vec_dest, build_constructor (vectype, v));
-                       else
-                         {
-                           tree new_vname = make_ssa_name (new_vtype);
-                           new_stmt = gimple_build_assign (
-                             new_vname, build_constructor (new_vtype, v));
-                           vect_finish_stmt_generation (vinfo, stmt_info,
-                                                        new_stmt, gsi);
-                           new_stmt = gimple_build_assign (
-                             vec_dest,
-                             build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
-                         }
+                       tree new_vname = make_ssa_name (new_vtype);
+                       new_stmt
+                         = gimple_build_assign (new_vname,
+                                                build_constructor (new_vtype,
+                                                                   v));
+                       vect_finish_stmt_generation (vinfo, stmt_info,
+                                                    new_stmt, gsi);
+                       new_stmt
+                         = gimple_build_assign (vec_dest,
+                                                build1 (VIEW_CONVERT_EXPR,
+                                                        vectype, new_vname));
                      }
                  }
-               break;
              }
-           case dr_explicit_realign:
-             {
-               if (costing_p)
-                 break;
-               tree ptr, bump;
-
-               tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
-
-               if (compute_in_loop)
-                 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
-                                               &realignment_token,
-                                               dr_explicit_realign,
-                                               dataref_ptr, NULL);
+           break;
+         }
+       case dr_explicit_realign:
+         {
+           if (costing_p)
+             break;
+           tree ptr, bump;
+
+           tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
+
+           if (compute_in_loop)
+             msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
+                                           &realignment_token,
+                                           dr_explicit_realign,
+                                           dataref_ptr, NULL);
+
+           if (TREE_CODE (dataref_ptr) == SSA_NAME)
+             ptr = copy_ssa_name (dataref_ptr);
+           else
+             ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
+           // For explicit realign the target alignment should be
+           // known at compile time.
+           unsigned HOST_WIDE_INT align
+             = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
+           new_stmt = gimple_build_assign (ptr, BIT_AND_EXPR, dataref_ptr,
+                                           build_int_cst
+                                             (TREE_TYPE (dataref_ptr),
+                                              -(HOST_WIDE_INT) align));
+           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+           data_ref = build2 (MEM_REF, vectype,
+                              ptr, build_int_cst (ref_type, 0));
+           vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
+           vec_dest = vect_create_destination_var (scalar_dest, vectype);
+           new_stmt = gimple_build_assign (vec_dest, data_ref);
+           new_temp = make_ssa_name (vec_dest, new_stmt);
+           gimple_assign_set_lhs (new_stmt, new_temp);
+           gimple_move_vops (new_stmt, stmt_info->stmt);
+           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+           msq = new_temp;
+
+           bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
+           bump = size_binop (MINUS_EXPR, bump, size_one_node);
+           ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
+                                  bump);
+           new_stmt = gimple_build_assign (NULL_TREE, BIT_AND_EXPR, ptr,
+                                           build_int_cst (TREE_TYPE (ptr),
+                                                          -(HOST_WIDE_INT) 
align));
+           if (TREE_CODE (ptr) == SSA_NAME)
+             ptr = copy_ssa_name (ptr, new_stmt);
+           else
+             ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
+           gimple_assign_set_lhs (new_stmt, ptr);
+           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+           data_ref = build2 (MEM_REF, vectype,
+                              ptr, build_int_cst (ref_type, 0));
+           break;
+         }
+       case dr_explicit_realign_optimized:
+         {
+           if (costing_p)
+             break;
+           if (TREE_CODE (dataref_ptr) == SSA_NAME)
+             new_temp = copy_ssa_name (dataref_ptr);
+           else
+             new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
+           // We should only be doing this if we know the target
+           // alignment at compile time.
+           unsigned HOST_WIDE_INT align
+             = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
+           new_stmt = gimple_build_assign (new_temp, BIT_AND_EXPR, dataref_ptr,
+                                           build_int_cst (TREE_TYPE 
(dataref_ptr),
+                                                          -(HOST_WIDE_INT) 
align));
+           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+           data_ref = build2 (MEM_REF, vectype, new_temp,
+                              build_int_cst (ref_type, 0));
+           break;
+         }
+       default:
+       gcc_unreachable ();
+       }
 
-               if (TREE_CODE (dataref_ptr) == SSA_NAME)
-                 ptr = copy_ssa_name (dataref_ptr);
-               else
-                 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
-               // For explicit realign the target alignment should be
-               // known at compile time.
-               unsigned HOST_WIDE_INT align
-                 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
-               new_stmt = gimple_build_assign (
-                 ptr, BIT_AND_EXPR, dataref_ptr,
-                 build_int_cst (TREE_TYPE (dataref_ptr),
-                                -(HOST_WIDE_INT) align));
-               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
-               data_ref
-                 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
-               vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
-               vec_dest = vect_create_destination_var (scalar_dest, vectype);
-               new_stmt = gimple_build_assign (vec_dest, data_ref);
-               new_temp = make_ssa_name (vec_dest, new_stmt);
-               gimple_assign_set_lhs (new_stmt, new_temp);
-               gimple_move_vops (new_stmt, stmt_info->stmt);
-               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
-               msq = new_temp;
-
-               bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
-               bump = size_binop (MINUS_EXPR, bump, size_one_node);
-               ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
-                                      bump);
-               new_stmt = gimple_build_assign (
-                 NULL_TREE, BIT_AND_EXPR, ptr,
-                 build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
-               if (TREE_CODE (ptr) == SSA_NAME)
-                 ptr = copy_ssa_name (ptr, new_stmt);
-               else
-                 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
-               gimple_assign_set_lhs (new_stmt, ptr);
-               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
-               data_ref
-                 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
-               break;
-             }
-           case dr_explicit_realign_optimized:
-             {
-               if (costing_p)
-                 break;
-               if (TREE_CODE (dataref_ptr) == SSA_NAME)
-                 new_temp = copy_ssa_name (dataref_ptr);
-               else
-                 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
-               // We should only be doing this if we know the target
-               // alignment at compile time.
-               unsigned HOST_WIDE_INT align
-                 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
-               new_stmt = gimple_build_assign (
-                 new_temp, BIT_AND_EXPR, dataref_ptr,
-                 build_int_cst (TREE_TYPE (dataref_ptr),
-                                -(HOST_WIDE_INT) align));
-               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
-               data_ref = build2 (MEM_REF, vectype, new_temp,
-                                  build_int_cst (ref_type, 0));
-               break;
-             }
-           default:
-             gcc_unreachable ();
+      /* One common place to cost the above vect load for different
+        alignment support schemes.  */
+      if (costing_p)
+       {
+         /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
+            only need to take care of the first stmt, whose
+            stmt_info is first_stmt_info, vec_num iterating on it
+            will cover the cost for the remaining, it's consistent
+            with transforming.  For the prologue cost for realign,
+            we only need to count it once for the whole group.  */
+         bool first_stmt_info_p = first_stmt_info == stmt_info;
+         bool add_realign_cost = first_stmt_info_p && i == 0;
+         if (memory_access_type == VMAT_CONTIGUOUS
+             || memory_access_type == VMAT_CONTIGUOUS_REVERSE
+             || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
+                 && (!grouped_load || first_stmt_info_p)))
+           {
+             /* Leave realign cases alone to keep them simple.  */
+             if (alignment_support_scheme == dr_explicit_realign_optimized
+                 || alignment_support_scheme == dr_explicit_realign)
+               vect_get_load_cost (vinfo, stmt_info, slp_node, 1,
+                                   alignment_support_scheme, misalignment,
+                                   add_realign_cost, &inside_cost,
+                                   &prologue_cost, cost_vec, cost_vec,
+                                   true);
+             else
+               n_adjacent_loads++;
            }
-
-         /* One common place to cost the above vect load for different
-            alignment support schemes.  */
-         if (costing_p)
+       }
+      else
+       {
+         vec_dest = vect_create_destination_var (scalar_dest, vectype);
+         /* DATA_REF is null if we've already built the statement.  */
+         if (data_ref)
            {
-             /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
-                only need to take care of the first stmt, whose
-                stmt_info is first_stmt_info, vec_num iterating on it
-                will cover the cost for the remaining, it's consistent
-                with transforming.  For the prologue cost for realign,
-                we only need to count it once for the whole group.  */
-             bool first_stmt_info_p = first_stmt_info == stmt_info;
-             bool add_realign_cost = first_stmt_info_p && i == 0;
-             if (memory_access_type == VMAT_CONTIGUOUS
-                 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
-                 || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
-                     && (!grouped_load || first_stmt_info_p)))
-               {
-                 /* Leave realign cases alone to keep them simple.  */
-                 if (alignment_support_scheme == dr_explicit_realign_optimized
-                     || alignment_support_scheme == dr_explicit_realign)
-                   vect_get_load_cost (vinfo, stmt_info, slp_node, 1,
-                                       alignment_support_scheme, misalignment,
-                                       add_realign_cost, &inside_cost,
-                                       &prologue_cost, cost_vec, cost_vec,
-                                       true);
-                 else
-                   n_adjacent_loads++;
-               }
+             vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
+             new_stmt = gimple_build_assign (vec_dest, data_ref);
            }
-         else
-           {
-             vec_dest = vect_create_destination_var (scalar_dest, vectype);
-             /* DATA_REF is null if we've already built the statement.  */
-             if (data_ref)
-               {
-                 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
-                 new_stmt = gimple_build_assign (vec_dest, data_ref);
-               }
 
-             new_temp = need_zeroing
-               ? make_ssa_name (vectype)
-               : make_ssa_name (vec_dest, new_stmt);
-             gimple_set_lhs (new_stmt, new_temp);
-             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+         new_temp = (need_zeroing
+                     ? make_ssa_name (vectype)
+                     : make_ssa_name (vec_dest, new_stmt));
+         gimple_set_lhs (new_stmt, new_temp);
+         vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
 
-             /* If we need to explicitly zero inactive elements emit a
-                VEC_COND_EXPR that does so.  */
-             if (need_zeroing)
-               {
-                 vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
-                                                    vectype);
+         /* If we need to explicitly zero inactive elements emit a
+            VEC_COND_EXPR that does so.  */
+         if (need_zeroing)
+           {
+             vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
+                                                vectype);
 
-                 tree new_temp2 = make_ssa_name (vec_dest, new_stmt);
-                 new_stmt
-                   = gimple_build_assign (new_temp2, VEC_COND_EXPR,
-                                          final_mask, new_temp, vec_els);
-                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
-                                              gsi);
-                 new_temp = new_temp2;
-               }
+             tree new_temp2 = make_ssa_name (vec_dest, new_stmt);
+             new_stmt = gimple_build_assign (new_temp2, VEC_COND_EXPR,
+                                             final_mask, new_temp, vec_els);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
+                                          gsi);
+             new_temp = new_temp2;
            }
+       }
 
-         /* 3. Handle explicit realignment if necessary/supported.
-            Create in loop:
-              vec_dest = realign_load (msq, lsq, realignment_token)  */
-         if (!costing_p
-             && (alignment_support_scheme == dr_explicit_realign_optimized
-                 || alignment_support_scheme == dr_explicit_realign))
-           {
-             lsq = gimple_assign_lhs (new_stmt);
-             if (!realignment_token)
-               realignment_token = dataref_ptr;
-             vec_dest = vect_create_destination_var (scalar_dest, vectype);
-             new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
-                                             lsq, realignment_token);
-             new_temp = make_ssa_name (vec_dest, new_stmt);
-             gimple_assign_set_lhs (new_stmt, new_temp);
-             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+      /* 3. Handle explicit realignment if necessary/supported.
+        Create in loop:
+        vec_dest = realign_load (msq, lsq, realignment_token)  */
+      if (!costing_p
+         && (alignment_support_scheme == dr_explicit_realign_optimized
+             || alignment_support_scheme == dr_explicit_realign))
+       {
+         lsq = gimple_assign_lhs (new_stmt);
+         if (!realignment_token)
+           realignment_token = dataref_ptr;
+         vec_dest = vect_create_destination_var (scalar_dest, vectype);
+         new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
+                                         lsq, realignment_token);
+         new_temp = make_ssa_name (vec_dest, new_stmt);
+         gimple_assign_set_lhs (new_stmt, new_temp);
+         vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
 
-             if (alignment_support_scheme == dr_explicit_realign_optimized)
-               {
-                 gcc_assert (phi);
-                 if (i == vec_num - 1)
-                   add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
-                                UNKNOWN_LOCATION);
-                 msq = lsq;
-               }
+         if (alignment_support_scheme == dr_explicit_realign_optimized)
+           {
+             gcc_assert (phi);
+             if (i == vec_num - 1)
+               add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
+                            UNKNOWN_LOCATION);
+             msq = lsq;
            }
+       }
 
-         if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+      if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+       {
+         if (costing_p)
+           inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
+                                           slp_node, 0, vect_body);
+         else
            {
-             if (costing_p)
-               inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
-                                               slp_node, 0, vect_body);
-             else
-               {
-                 tree perm_mask = perm_mask_for_reverse (vectype);
-                 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
-                                                  perm_mask, stmt_info, gsi);
-                 new_stmt = SSA_NAME_DEF_STMT (new_temp);
-               }
+             tree perm_mask = perm_mask_for_reverse (vectype);
+             new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
+                                              perm_mask, stmt_info, gsi);
+             new_stmt = SSA_NAME_DEF_STMT (new_temp);
            }
+       }
 
-         /* Collect vector loads and later create their permutation in
-            vect_transform_grouped_load ().  */
-         if (!costing_p && (grouped_load || slp_perm))
-           dr_chain.quick_push (new_temp);
+      /* Collect vector loads and later create their permutation in
+        vect_transform_grouped_load ().  */
+      if (!costing_p && (grouped_load || slp_perm))
+       dr_chain.quick_push (new_temp);
 
-         /* Store vector loads in the corresponding SLP_NODE.  */
-         if (!costing_p && !slp_perm)
-           slp_node->push_vec_def (new_stmt);
+      /* Store vector loads in the corresponding SLP_NODE.  */
+      if (!costing_p && !slp_perm)
+       slp_node->push_vec_def (new_stmt);
 
-         /* With SLP permutation we load the gaps as well, without
-            we need to skip the gaps after we manage to fully load
-            all elements.  group_gap_adj is DR_GROUP_SIZE here.  */
-         group_elt += nunits;
-         if (!costing_p
-             && maybe_ne (group_gap_adj, 0U)
-             && !slp_perm
-             && known_eq (group_elt, group_size - group_gap_adj))
-           {
-             poly_wide_int bump_val
-               = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
-             if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
-                 == -1)
-               bump_val = -bump_val;
-             tree bump = wide_int_to_tree (sizetype, bump_val);
-             dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
-                                            stmt_info, bump);
-             group_elt = 0;
-           }
-       }
-      /* Bump the vector pointer to account for a gap or for excess
-        elements loaded for a permuted SLP load.  */
+      /* With SLP permutation we load the gaps as well, without
+        we need to skip the gaps after we manage to fully load
+        all elements.  group_gap_adj is DR_GROUP_SIZE here.  */
+      group_elt += nunits;
       if (!costing_p
          && maybe_ne (group_gap_adj, 0U)
-         && slp_perm)
+         && !slp_perm
+         && known_eq (group_elt, group_size - group_gap_adj))
        {
          poly_wide_int bump_val
            = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
@@ -11969,31 +11930,46 @@ vectorizable_load (vec_info *vinfo,
          tree bump = wide_int_to_tree (sizetype, bump_val);
          dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
                                         stmt_info, bump);
+         group_elt = 0;
        }
+    }
+  /* Bump the vector pointer to account for a gap or for excess
+     elements loaded for a permuted SLP load.  */
+  if (!costing_p
+      && maybe_ne (group_gap_adj, 0U)
+      && slp_perm)
+    {
+      poly_wide_int bump_val
+       = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
+      if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
+       bump_val = -bump_val;
+      tree bump = wide_int_to_tree (sizetype, bump_val);
+      dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
+                                    stmt_info, bump);
+    }
 
-      if (slp_perm)
+  if (slp_perm)
+    {
+      unsigned n_perms;
+      /* For SLP we know we've seen all possible uses of dr_chain so
+        direct vect_transform_slp_perm_load to DCE the unused parts.
+        ???  This is a hack to prevent compile-time issues as seen
+        in PR101120 and friends.  */
+      if (costing_p)
        {
-         unsigned n_perms;
-         /* For SLP we know we've seen all possible uses of dr_chain so
-            direct vect_transform_slp_perm_load to DCE the unused parts.
-            ???  This is a hack to prevent compile-time issues as seen
-            in PR101120 and friends.  */
-         if (costing_p)
-           {
-             vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
-                                           true, &n_perms, nullptr);
-             inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
-                                             slp_node, 0, vect_body);
-           }
-         else
-           {
-             bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
-                                                     gsi, vf, false, &n_perms,
-                                                     nullptr, true);
-             gcc_assert (ok);
-           }
-         dr_chain.release ();
+         vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
+                                       true, &n_perms, nullptr);
+         inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
+                                         slp_node, 0, vect_body);
+       }
+      else
+       {
+         bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
+                                                 gsi, vf, false, &n_perms,
+                                                 nullptr, true);
+         gcc_assert (ok);
        }
+      dr_chain.release ();
     }
 
   if (costing_p)
-- 
2.43.0

Reply via email to