Hi,

Like commit r14-3214 which moves the handlings on memory
access type VMAT_LOAD_STORE_LANES in vectorizable_load
final loop nest, this one is to deal with the function
vectorizable_store.

Bootstrapped and regtested on x86_64-redhat-linux,
aarch64-linux-gnu and powerpc64{,le}-linux-gnu.

Is it ok for trunk?

BR,
Kewen
-----

gcc/ChangeLog:

        * tree-vect-stmts.cc (vectorizable_store): Move the handlings on
        VMAT_LOAD_STORE_LANES in the final loop nest to its own loop,
        and update the final nest accordingly.
---
 gcc/tree-vect-stmts.cc | 732 ++++++++++++++++++++++-------------------
 1 file changed, 387 insertions(+), 345 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index fcaa4127e52..18f5ebcc09c 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8779,42 +8779,29 @@ vectorizable_store (vec_info *vinfo,
   */

   auto_vec<tree> dr_chain (group_size);
-  auto_vec<tree> result_chain (group_size);
   auto_vec<tree> vec_masks;
   tree vec_mask = NULL;
-  auto_vec<tree> vec_offsets;
   auto_delete_vec<auto_vec<tree>> gvec_oprnds (group_size);
   for (i = 0; i < group_size; i++)
     gvec_oprnds.quick_push (new auto_vec<tree> (ncopies));
-  auto_vec<tree, 1> vec_oprnds;
-  for (j = 0; j < ncopies; j++)
+
+  if (memory_access_type == VMAT_LOAD_STORE_LANES)
     {
-      gimple *new_stmt;
-      if (j == 0)
+      gcc_assert (!slp && grouped_store);
+      for (j = 0; j < ncopies; j++)
        {
-          if (slp)
-            {
-             /* Get vectorized arguments for SLP_NODE.  */
-             vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
-                                op, &vec_oprnds);
-              vec_oprnd = vec_oprnds[0];
-            }
-          else
-            {
-             /* For interleaved stores we collect vectorized defs for all the
-                stores in the group in DR_CHAIN. DR_CHAIN is then used as an
-                input to vect_permute_store_chain().
-
-                If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
-                is of size 1.  */
+         gimple *new_stmt;
+         if (j == 0)
+           {
+             /* For interleaved stores we collect vectorized defs for all
+                the stores in the group in DR_CHAIN. DR_CHAIN is then used
+                as an input to vect_permute_store_chain().  */
              stmt_vec_info next_stmt_info = first_stmt_info;
              for (i = 0; i < group_size; i++)
                {
                  /* Since gaps are not supported for interleaved stores,
-                    DR_GROUP_SIZE is the exact number of stmts in the chain.
-                    Therefore, NEXT_STMT_INFO can't be NULL_TREE.  In case
-                    that there is no interleaving, DR_GROUP_SIZE is 1,
-                    and only one iteration of the loop will be executed.  */
+                    DR_GROUP_SIZE is the exact number of stmts in the
+                    chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE.  */
                  op = vect_get_store_rhs (next_stmt_info);
                  vect_get_vec_defs_for_operand (vinfo, next_stmt_info, ncopies,
                                                 op, gvec_oprnds[i]);
@@ -8825,66 +8812,37 @@ vectorizable_store (vec_info *vinfo,
              if (mask)
                {
                  vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
-                                                mask, &vec_masks, 
mask_vectype);
+                                                mask, &vec_masks,
+                                                mask_vectype);
                  vec_mask = vec_masks[0];
                }
-           }

-         /* We should have catched mismatched types earlier.  */
-         gcc_assert (useless_type_conversion_p (vectype,
-                                                TREE_TYPE (vec_oprnd)));
-         bool simd_lane_access_p
-           = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
-         if (simd_lane_access_p
-             && !loop_masks
-             && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
-             && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
-             && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
-             && integer_zerop (DR_INIT (first_dr_info->dr))
-             && alias_sets_conflict_p (get_alias_set (aggr_type),
-                                       get_alias_set (TREE_TYPE (ref_type))))
-           {
-             dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
-             dataref_offset = build_int_cst (ref_type, 0);
+             /* We should have catched mismatched types earlier.  */
+             gcc_assert (
+               useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
+             dataref_ptr
+               = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
+                                           NULL, offset, &dummy, gsi,
+                                           &ptr_incr, false, bump);
            }
-         else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-           vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
-                                        slp_node, &gs_info, &dataref_ptr,
-                                        &vec_offsets);
          else
-           dataref_ptr
-             = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
-                                         simd_lane_access_p ? loop : NULL,
-                                         offset, &dummy, gsi, &ptr_incr,
-                                         simd_lane_access_p, bump);
-       }
-      else
-       {
-         gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
-         /* DR_CHAIN is then used as an input to vect_permute_store_chain().
-            If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
-            of size 1.  */
-         for (i = 0; i < group_size; i++)
            {
-             vec_oprnd = (*gvec_oprnds[i])[j];
-             dr_chain[i] = vec_oprnd;
+             gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
+             /* DR_CHAIN is then used as an input to
+                vect_permute_store_chain().  */
+             for (i = 0; i < group_size; i++)
+               {
+                 vec_oprnd = (*gvec_oprnds[i])[j];
+                 dr_chain[i] = vec_oprnd;
+               }
+             if (mask)
+               vec_mask = vec_masks[j];
+             dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
+                                            stmt_info, bump);
            }
-         if (mask)
-           vec_mask = vec_masks[j];
-         if (dataref_offset)
-           dataref_offset
-             = int_const_binop (PLUS_EXPR, dataref_offset, bump);
-         else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-           dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
-                                          stmt_info, bump);
-       }
-
-      if (memory_access_type == VMAT_LOAD_STORE_LANES)
-       {
-         tree vec_array;

          /* Get an array into which we can store the individual vectors.  */
-         vec_array = create_vector_array (vectype, vec_num);
+         tree vec_array = create_vector_array (vectype, vec_num);

          /* Invalidate the current contents of VEC_ARRAY.  This should
             become an RTL clobber too, which prevents the vector registers
@@ -8895,8 +8853,8 @@ vectorizable_store (vec_info *vinfo,
          for (i = 0; i < vec_num; i++)
            {
              vec_oprnd = dr_chain[i];
-             write_vector_array (vinfo, stmt_info,
-                                 gsi, vec_oprnd, vec_array, i);
+             write_vector_array (vinfo, stmt_info, gsi, vec_oprnd, vec_array,
+                                 i);
            }

          tree final_mask = NULL;
@@ -8906,8 +8864,8 @@ vectorizable_store (vec_info *vinfo,
            final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
                                             ncopies, vectype, j);
          if (vec_mask)
-           final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
-                                          final_mask, vec_mask, gsi);
+           final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
+                                          vec_mask, gsi);

          if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
            {
@@ -8955,8 +8913,7 @@ vectorizable_store (vec_info *vinfo,
              /* Emit:
                   MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
              data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
-             call = gimple_build_call_internal (IFN_STORE_LANES, 1,
-                                                vec_array);
+             call = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
              gimple_call_set_lhs (call, data_ref);
            }
          gimple_call_set_nothrow (call, true);
@@ -8965,301 +8922,386 @@ vectorizable_store (vec_info *vinfo,

          /* Record that VEC_ARRAY is now dead.  */
          vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
+         if (j == 0)
+           *vec_stmt = new_stmt;
+         STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
        }
-      else
-       {
-         new_stmt = NULL;
-         if (grouped_store)
-           /* Permute.  */
-           vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
-                                     gsi, &result_chain);

-         stmt_vec_info next_stmt_info = first_stmt_info;
-         for (i = 0; i < vec_num; i++)
-           {
-             unsigned misalign;
-             unsigned HOST_WIDE_INT align;
+      return true;
+    }

-             tree final_mask = NULL_TREE;
-             tree final_len = NULL_TREE;
-             tree bias = NULL_TREE;
-             if (loop_masks)
-               final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
-                                                vec_num * ncopies,
-                                                vectype, vec_num * j + i);
-             if (vec_mask)
-               final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
-                                              final_mask, vec_mask, gsi);
+  auto_vec<tree> result_chain (group_size);
+  auto_vec<tree> vec_offsets;
+  auto_vec<tree, 1> vec_oprnds;
+  for (j = 0; j < ncopies; j++)
+    {
+      gimple *new_stmt;
+      if (j == 0)
+       {
+         if (slp)
+           {
+             /* Get vectorized arguments for SLP_NODE.  */
+             vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op,
+                                &vec_oprnds);
+             vec_oprnd = vec_oprnds[0];
+           }
+         else
+           {
+             /* For interleaved stores we collect vectorized defs for all the
+                stores in the group in DR_CHAIN. DR_CHAIN is then used as an
+                input to vect_permute_store_chain().

-             if (memory_access_type == VMAT_GATHER_SCATTER
-                 && gs_info.ifn != IFN_LAST)
+                If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
+                is of size 1.  */
+             stmt_vec_info next_stmt_info = first_stmt_info;
+             for (i = 0; i < group_size; i++)
                {
-                 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-                   vec_offset = vec_offsets[vec_num * j + i];
-                 tree scale = size_int (gs_info.scale);
-
-                 if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
-                   {
-                     if (loop_lens)
-                       final_len
-                         = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                              vec_num * ncopies, vectype,
-                                              vec_num * j + i, 1);
-                     else
-                       final_len
-                         = build_int_cst (sizetype,
-                                          TYPE_VECTOR_SUBPARTS (vectype));
-                     signed char biasval
-                       = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
-                     bias = build_int_cst (intQI_type_node, biasval);
-                     if (!final_mask)
-                       {
-                         mask_vectype = truth_type_for (vectype);
-                         final_mask = build_minus_one_cst (mask_vectype);
-                       }
-                   }
-
-                 gcall *call;
-                 if (final_len && final_mask)
-                   call
-                     = gimple_build_call_internal (IFN_MASK_LEN_SCATTER_STORE,
-                                                   7, dataref_ptr, vec_offset,
-                                                   scale, vec_oprnd, 
final_mask,
-                                                   final_len, bias);
-                 else if (final_mask)
-                   call = gimple_build_call_internal
-                     (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
-                      scale, vec_oprnd, final_mask);
-                 else
-                   call = gimple_build_call_internal
-                     (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
-                      scale, vec_oprnd);
-                 gimple_call_set_nothrow (call, true);
-                 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
-                 new_stmt = call;
-                 break;
+                 /* Since gaps are not supported for interleaved stores,
+                    DR_GROUP_SIZE is the exact number of stmts in the chain.
+                    Therefore, NEXT_STMT_INFO can't be NULL_TREE.  In case
+                    that there is no interleaving, DR_GROUP_SIZE is 1,
+                    and only one iteration of the loop will be executed.  */
+                 op = vect_get_store_rhs (next_stmt_info);
+                 vect_get_vec_defs_for_operand (vinfo, next_stmt_info, ncopies,
+                                                op, gvec_oprnds[i]);
+                 vec_oprnd = (*gvec_oprnds[i])[0];
+                 dr_chain.quick_push (vec_oprnd);
+                 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
                }
-             else if (memory_access_type == VMAT_GATHER_SCATTER)
+             if (mask)
                {
-                 /* Emulated scatter.  */
-                 gcc_assert (!final_mask);
-                 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
-                 unsigned HOST_WIDE_INT const_offset_nunits
-                   = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
-                       .to_constant ();
-                 vec<constructor_elt, va_gc> *ctor_elts;
-                 vec_alloc (ctor_elts, const_nunits);
-                 gimple_seq stmts = NULL;
-                 tree elt_type = TREE_TYPE (vectype);
-                 unsigned HOST_WIDE_INT elt_size
-                   = tree_to_uhwi (TYPE_SIZE (elt_type));
-                 /* We support offset vectors with more elements
-                    than the data vector for now.  */
-                 unsigned HOST_WIDE_INT factor
-                   = const_offset_nunits / const_nunits;
-                 vec_offset = vec_offsets[j / factor];
-                 unsigned elt_offset = (j % factor) * const_nunits;
-                 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
-                 tree scale = size_int (gs_info.scale);
-                 align = get_object_alignment (DR_REF (first_dr_info->dr));
-                 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
-                 for (unsigned k = 0; k < const_nunits; ++k)
-                   {
-                     /* Compute the offsetted pointer.  */
-                     tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
-                                             bitsize_int (k + elt_offset));
-                     tree idx = gimple_build (&stmts, BIT_FIELD_REF,
-                                              idx_type, vec_offset,
-                                              TYPE_SIZE (idx_type), boff);
-                     idx = gimple_convert (&stmts, sizetype, idx);
-                     idx = gimple_build (&stmts, MULT_EXPR,
-                                         sizetype, idx, scale);
-                     tree ptr = gimple_build (&stmts, PLUS_EXPR,
-                                              TREE_TYPE (dataref_ptr),
-                                              dataref_ptr, idx);
-                     ptr = gimple_convert (&stmts, ptr_type_node, ptr);
-                     /* Extract the element to be stored.  */
-                     tree elt = gimple_build (&stmts, BIT_FIELD_REF,
-                                              TREE_TYPE (vectype), vec_oprnd,
-                                              TYPE_SIZE (elt_type),
-                                              bitsize_int (k * elt_size));
-                     gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
-                     stmts = NULL;
-                     tree ref = build2 (MEM_REF, ltype, ptr,
-                                        build_int_cst (ref_type, 0));
-                     new_stmt = gimple_build_assign (ref, elt);
-                     vect_finish_stmt_generation (vinfo, stmt_info,
-                                                  new_stmt, gsi);
-                   }
-                 break;
+                 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
+                                                mask, &vec_masks,
+                                                mask_vectype);
+                 vec_mask = vec_masks[0];
                }
+           }

-             if (i > 0)
-               /* Bump the vector pointer.  */
-               dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
-                                              gsi, stmt_info, bump);
+         /* We should have catched mismatched types earlier.  */
+         gcc_assert (useless_type_conversion_p (vectype,
+                                                TREE_TYPE (vec_oprnd)));
+         bool simd_lane_access_p
+           = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
+         if (simd_lane_access_p
+             && !loop_masks
+             && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
+             && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
+             && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
+             && integer_zerop (DR_INIT (first_dr_info->dr))
+             && alias_sets_conflict_p (get_alias_set (aggr_type),
+                                       get_alias_set (TREE_TYPE (ref_type))))
+           {
+             dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
+             dataref_offset = build_int_cst (ref_type, 0);
+           }
+         else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+           vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info, slp_node,
+                                        &gs_info, &dataref_ptr, &vec_offsets);
+         else
+           dataref_ptr
+             = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
+                                         simd_lane_access_p ? loop : NULL,
+                                         offset, &dummy, gsi, &ptr_incr,
+                                         simd_lane_access_p, bump);
+       }
+      else
+       {
+         gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
+         /* DR_CHAIN is then used as an input to vect_permute_store_chain().
+            If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
+            of size 1.  */
+         for (i = 0; i < group_size; i++)
+           {
+             vec_oprnd = (*gvec_oprnds[i])[j];
+             dr_chain[i] = vec_oprnd;
+           }
+         if (mask)
+           vec_mask = vec_masks[j];
+         if (dataref_offset)
+           dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
+         else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+           dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
+                                          stmt_info, bump);
+       }

-             if (slp)
-               vec_oprnd = vec_oprnds[i];
-             else if (grouped_store)
-               /* For grouped stores vectorized defs are interleaved in
-                  vect_permute_store_chain().  */
-               vec_oprnd = result_chain[i];
+      new_stmt = NULL;
+      if (grouped_store)
+       /* Permute.  */
+       vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info, gsi,
+                                 &result_chain);

-             align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
-             if (alignment_support_scheme == dr_aligned)
-               misalign = 0;
-             else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
-               {
-                 align = dr_alignment (vect_dr_behavior (vinfo, 
first_dr_info));
-                 misalign = 0;
-               }
-             else
-               misalign = misalignment;
-             if (dataref_offset == NULL_TREE
-                 && TREE_CODE (dataref_ptr) == SSA_NAME)
-               set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
-                                       misalign);
-             align = least_bit_hwi (misalign | align);
-
-             if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
-               {
-                 tree perm_mask = perm_mask_for_reverse (vectype);
-                 tree perm_dest = vect_create_destination_var
-                   (vect_get_store_rhs (stmt_info), vectype);
-                 tree new_temp = make_ssa_name (perm_dest);
-
-                 /* Generate the permute statement.  */
-                 gimple *perm_stmt
-                   = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
-                                          vec_oprnd, perm_mask);
-                 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, 
gsi);
-
-                 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
-                 vec_oprnd = new_temp;
-               }
+      stmt_vec_info next_stmt_info = first_stmt_info;
+      for (i = 0; i < vec_num; i++)
+       {
+         unsigned misalign;
+         unsigned HOST_WIDE_INT align;

-             /* Compute IFN when LOOP_LENS or final_mask valid.  */
-             machine_mode vmode = TYPE_MODE (vectype);
-             machine_mode new_vmode = vmode;
-             internal_fn partial_ifn = IFN_LAST;
-             if (loop_lens)
-               {
-                 opt_machine_mode new_ovmode
-                   = get_len_load_store_mode (vmode, false, &partial_ifn);
-                 new_vmode = new_ovmode.require ();
-                 unsigned factor
-                   = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
-                 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                                vec_num * ncopies, vectype,
-                                                vec_num * j + i, factor);
-               }
-             else if (final_mask)
-               {
-                 if (!can_vec_mask_load_store_p (vmode,
-                                                 TYPE_MODE (TREE_TYPE 
(final_mask)),
-                                                 false, &partial_ifn))
-                   gcc_unreachable ();
-               }
+         tree final_mask = NULL_TREE;
+         tree final_len = NULL_TREE;
+         tree bias = NULL_TREE;
+         if (loop_masks)
+           final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
+                                            vec_num * ncopies, vectype,
+                                            vec_num * j + i);
+         if (vec_mask)
+           final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
+                                          vec_mask, gsi);

-             if (partial_ifn == IFN_MASK_LEN_STORE)
+         if (memory_access_type == VMAT_GATHER_SCATTER
+             && gs_info.ifn != IFN_LAST)
+           {
+             if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+               vec_offset = vec_offsets[vec_num * j + i];
+             tree scale = size_int (gs_info.scale);
+
+             if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
                {
-                 if (!final_len)
-                   {
-                     /* Pass VF value to 'len' argument of
-                        MASK_LEN_STORE if LOOP_LENS is invalid.  */
-                     final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
-                   }
+                 if (loop_lens)
+                   final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
+                                                  vec_num * ncopies, vectype,
+                                                  vec_num * j + i, 1);
+                 else
+                   final_len = build_int_cst (sizetype,
+                                              TYPE_VECTOR_SUBPARTS (vectype));
+                 signed char biasval
+                   = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+                 bias = build_int_cst (intQI_type_node, biasval);
                  if (!final_mask)
                    {
-                     /* Pass all ones value to 'mask' argument of
-                        MASK_LEN_STORE if final_mask is invalid.  */
                      mask_vectype = truth_type_for (vectype);
                      final_mask = build_minus_one_cst (mask_vectype);
                    }
                }
-             if (final_len)
-               {
-                 signed char biasval
-                   = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);

-                 bias = build_int_cst (intQI_type_node, biasval);
+             gcall *call;
+             if (final_len && final_mask)
+               call = gimple_build_call_internal (IFN_MASK_LEN_SCATTER_STORE,
+                                                  7, dataref_ptr, vec_offset,
+                                                  scale, vec_oprnd, final_mask,
+                                                  final_len, bias);
+             else if (final_mask)
+               call
+                 = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
+                                               dataref_ptr, vec_offset, scale,
+                                               vec_oprnd, final_mask);
+             else
+               call = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
+                                                  dataref_ptr, vec_offset,
+                                                  scale, vec_oprnd);
+             gimple_call_set_nothrow (call, true);
+             vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+             new_stmt = call;
+             break;
+           }
+         else if (memory_access_type == VMAT_GATHER_SCATTER)
+           {
+             /* Emulated scatter.  */
+             gcc_assert (!final_mask);
+             unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
+             unsigned HOST_WIDE_INT const_offset_nunits
+               = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant ();
+             vec<constructor_elt, va_gc> *ctor_elts;
+             vec_alloc (ctor_elts, const_nunits);
+             gimple_seq stmts = NULL;
+             tree elt_type = TREE_TYPE (vectype);
+             unsigned HOST_WIDE_INT elt_size
+               = tree_to_uhwi (TYPE_SIZE (elt_type));
+             /* We support offset vectors with more elements
+                than the data vector for now.  */
+             unsigned HOST_WIDE_INT factor
+               = const_offset_nunits / const_nunits;
+             vec_offset = vec_offsets[j / factor];
+             unsigned elt_offset = (j % factor) * const_nunits;
+             tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
+             tree scale = size_int (gs_info.scale);
+             align = get_object_alignment (DR_REF (first_dr_info->dr));
+             tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
+             for (unsigned k = 0; k < const_nunits; ++k)
+               {
+                 /* Compute the offsetted pointer.  */
+                 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
+                                         bitsize_int (k + elt_offset));
+                 tree idx
+                   = gimple_build (&stmts, BIT_FIELD_REF, idx_type, vec_offset,
+                                   TYPE_SIZE (idx_type), boff);
+                 idx = gimple_convert (&stmts, sizetype, idx);
+                 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx, scale);
+                 tree ptr
+                   = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (dataref_ptr),
+                                   dataref_ptr, idx);
+                 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
+                 /* Extract the element to be stored.  */
+                 tree elt
+                   = gimple_build (&stmts, BIT_FIELD_REF, TREE_TYPE (vectype),
+                                   vec_oprnd, TYPE_SIZE (elt_type),
+                                   bitsize_int (k * elt_size));
+                 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+                 stmts = NULL;
+                 tree ref
+                   = build2 (MEM_REF, ltype, ptr, build_int_cst (ref_type, 0));
+                 new_stmt = gimple_build_assign (ref, elt);
+                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
                }
+             break;
+           }

-             /* Arguments are ready.  Create the new vector stmt.  */
-             if (final_len)
-               {
-                 gcall *call;
-                 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
-                 /* Need conversion if it's wrapped with VnQI.  */
-                 if (vmode != new_vmode)
-                   {
-                     tree new_vtype
-                       = build_vector_type_for_mode (unsigned_intQI_type_node,
-                                                     new_vmode);
-                     tree var
-                       = vect_get_new_ssa_name (new_vtype, vect_simple_var);
-                     vec_oprnd
-                       = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
-                     gassign *new_stmt
-                       = gimple_build_assign (var, VIEW_CONVERT_EXPR,
-                                              vec_oprnd);
-                     vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
-                                                  gsi);
-                     vec_oprnd = var;
-                   }
+         if (i > 0)
+           /* Bump the vector pointer.  */
+           dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
+                                          stmt_info, bump);

-                 if (partial_ifn == IFN_MASK_LEN_STORE)
-                   call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
-                                                      dataref_ptr, ptr,
-                                                      final_mask, final_len,
-                                                      bias, vec_oprnd);
-                 else
-                   call
-                     = gimple_build_call_internal (IFN_LEN_STORE, 5,
-                                                   dataref_ptr, ptr,
-                                                   final_len, bias,
-                                                   vec_oprnd);
-                 gimple_call_set_nothrow (call, true);
-                 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
-                 new_stmt = call;
+         if (slp)
+           vec_oprnd = vec_oprnds[i];
+         else if (grouped_store)
+           /* For grouped stores vectorized defs are interleaved in
+              vect_permute_store_chain().  */
+           vec_oprnd = result_chain[i];
+
+         align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+         if (alignment_support_scheme == dr_aligned)
+           misalign = 0;
+         else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
+           {
+             align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+             misalign = 0;
+           }
+         else
+           misalign = misalignment;
+         if (dataref_offset == NULL_TREE
+             && TREE_CODE (dataref_ptr) == SSA_NAME)
+           set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
+                                   misalign);
+         align = least_bit_hwi (misalign | align);
+
+         if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+           {
+             tree perm_mask = perm_mask_for_reverse (vectype);
+             tree perm_dest
+               = vect_create_destination_var (vect_get_store_rhs (stmt_info),
+                                              vectype);
+             tree new_temp = make_ssa_name (perm_dest);
+
+             /* Generate the permute statement.  */
+             gimple *perm_stmt
+               = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
+                                      vec_oprnd, perm_mask);
+             vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
+
+             perm_stmt = SSA_NAME_DEF_STMT (new_temp);
+             vec_oprnd = new_temp;
+           }
+
+         /* Compute IFN when LOOP_LENS or final_mask valid.  */
+         machine_mode vmode = TYPE_MODE (vectype);
+         machine_mode new_vmode = vmode;
+         internal_fn partial_ifn = IFN_LAST;
+         if (loop_lens)
+           {
+             opt_machine_mode new_ovmode
+               = get_len_load_store_mode (vmode, false, &partial_ifn);
+             new_vmode = new_ovmode.require ();
+             unsigned factor
+               = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
+             final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
+                                            vec_num * ncopies, vectype,
+                                            vec_num * j + i, factor);
+           }
+         else if (final_mask)
+           {
+             if (!can_vec_mask_load_store_p (
+                   vmode, TYPE_MODE (TREE_TYPE (final_mask)), false,
+                   &partial_ifn))
+               gcc_unreachable ();
+           }
+
+         if (partial_ifn == IFN_MASK_LEN_STORE)
+           {
+             if (!final_len)
+               {
+                 /* Pass VF value to 'len' argument of
+                    MASK_LEN_STORE if LOOP_LENS is invalid.  */
+                 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
                }
-             else if (final_mask)
+             if (!final_mask)
                {
-                 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
-                 gcall *call
-                   = gimple_build_call_internal (IFN_MASK_STORE, 4,
-                                                 dataref_ptr, ptr,
-                                                 final_mask, vec_oprnd);
-                 gimple_call_set_nothrow (call, true);
-                 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
-                 new_stmt = call;
+                 /* Pass all ones value to 'mask' argument of
+                    MASK_LEN_STORE if final_mask is invalid.  */
+                 mask_vectype = truth_type_for (vectype);
+                 final_mask = build_minus_one_cst (mask_vectype);
                }
-             else
+           }
+         if (final_len)
+           {
+             signed char biasval
+               = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+
+             bias = build_int_cst (intQI_type_node, biasval);
+           }
+
+         /* Arguments are ready.  Create the new vector stmt.  */
+         if (final_len)
+           {
+             gcall *call;
+             tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
+             /* Need conversion if it's wrapped with VnQI.  */
+             if (vmode != new_vmode)
                {
-                 data_ref = fold_build2 (MEM_REF, vectype,
-                                         dataref_ptr,
-                                         dataref_offset
-                                         ? dataref_offset
-                                         : build_int_cst (ref_type, 0));
-                 if (alignment_support_scheme == dr_aligned)
-                   ;
-                 else
-                   TREE_TYPE (data_ref)
-                     = build_aligned_type (TREE_TYPE (data_ref),
-                                           align * BITS_PER_UNIT);
-                 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
-                 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
+                 tree new_vtype
+                   = build_vector_type_for_mode (unsigned_intQI_type_node,
+                                                 new_vmode);
+                 tree var = vect_get_new_ssa_name (new_vtype, vect_simple_var);
+                 vec_oprnd = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
+                 gassign *new_stmt
+                   = gimple_build_assign (var, VIEW_CONVERT_EXPR, vec_oprnd);
                  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+                 vec_oprnd = var;
                }

-             if (slp)
-               continue;
-
-             next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
-             if (!next_stmt_info)
-               break;
+             if (partial_ifn == IFN_MASK_LEN_STORE)
+               call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
+                                                  dataref_ptr, ptr, final_mask,
+                                                  final_len, bias, vec_oprnd);
+             else
+               call = gimple_build_call_internal (IFN_LEN_STORE, 5,
+                                                  dataref_ptr, ptr, final_len,
+                                                  bias, vec_oprnd);
+             gimple_call_set_nothrow (call, true);
+             vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+             new_stmt = call;
+           }
+         else if (final_mask)
+           {
+             tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
+             gcall *call
+               = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
+                                             ptr, final_mask, vec_oprnd);
+             gimple_call_set_nothrow (call, true);
+             vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+             new_stmt = call;
+           }
+         else
+           {
+             data_ref
+               = fold_build2 (MEM_REF, vectype, dataref_ptr,
+                              dataref_offset ? dataref_offset
+                                             : build_int_cst (ref_type, 0));
+             if (alignment_support_scheme == dr_aligned)
+               ;
+             else
+               TREE_TYPE (data_ref)
+                 = build_aligned_type (TREE_TYPE (data_ref),
+                                       align * BITS_PER_UNIT);
+             vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
+             new_stmt = gimple_build_assign (data_ref, vec_oprnd);
+             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
            }
+
+         if (slp)
+           continue;
+
+         next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
+         if (!next_stmt_info)
+           break;
        }
       if (!slp)
        {
--
2.31.1

Reply via email to