https://gcc.gnu.org/g:95aecacc8d9c40100f01433ba214103e756dc984
commit r16-2709-g95aecacc8d9c40100f01433ba214103e756dc984 Author: Richard Biener <rguent...@suse.de> Date: Fri Aug 1 13:06:51 2025 +0200 Avoid representing SLP mask by scalar op The following removes the scalar mask output from vect_check_scalar_mask and deals with the fallout, eliminating uses of it. That's mostly replacing checks on 'mask' by checks on 'mask_node' but also realizing PR121349 and fixing that up a bit in check_load_store_for_partial_vectors. PR tree-optimization/121349 * tree-vect-stmts.cc (check_load_store_for_partial_vectors): Get full SLP mask, reduce to uniform scalar_mask for further processing if possible. (vect_check_scalar_mask): Remove scalar mask output, remove code conditional on slp_mask. (vectorizable_call): Adjust. (check_scan_store): Get and check SLP mask. (vectorizable_store): Eliminate scalar mask variable. (vectorizable_load): Likewise. Diff: --- gcc/tree-vect-stmts.cc | 117 ++++++++++++++++++++++++++----------------------- 1 file changed, 62 insertions(+), 55 deletions(-) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 4b3ba3983cf4..7f922c277996 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1423,13 +1423,30 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, vect_memory_access_type memory_access_type, const gather_scatter_info *gs_info, - tree scalar_mask, + slp_tree mask_node, vec<int> *elsvals = nullptr) { /* Invariant loads need no special support. */ if (memory_access_type == VMAT_INVARIANT) return; + /* Figure whether the mask is uniform. scalar_mask is used to + populate the scalar_cond_masked_set. */ + tree scalar_mask = NULL_TREE; + if (mask_node) + for (unsigned i = 0; i < SLP_TREE_LANES (mask_node); ++i) + { + tree def = vect_get_slp_scalar_def (mask_node, i); + if (!def + || (scalar_mask && def != scalar_mask)) + { + scalar_mask = NULL; + break; + } + else + scalar_mask = def; + } + unsigned int nvectors = vect_get_num_copies (loop_vinfo, slp_node, vectype); vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); @@ -2484,21 +2501,21 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing conditional operation STMT_INFO. When returning true, store the mask - in *MASK, the type of its definition in *MASK_DT_OUT, the type of the - vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding - to the mask in *MASK_NODE if MASK_NODE is not NULL. */ + in *MASK_NODE, the type of its definition in *MASK_DT_OUT and the type of + the vectorized mask in *MASK_VECTYPE_OUT. */ static bool vect_check_scalar_mask (vec_info *vinfo, slp_tree slp_node, unsigned mask_index, - tree *mask, slp_tree *mask_node, + slp_tree *mask_node, vect_def_type *mask_dt_out, tree *mask_vectype_out) { enum vect_def_type mask_dt; tree mask_vectype; slp_tree mask_node_1; + tree mask_; if (!vect_is_simple_use (vinfo, slp_node, mask_index, - mask, &mask_node_1, &mask_dt, &mask_vectype)) + &mask_, &mask_node_1, &mask_dt, &mask_vectype)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -2507,7 +2524,7 @@ vect_check_scalar_mask (vec_info *vinfo, } if ((mask_dt == vect_constant_def || mask_dt == vect_external_def) - && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask))) + && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask_))) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -2515,17 +2532,6 @@ vect_check_scalar_mask (vec_info *vinfo, return false; } - /* If the caller is not prepared for adjusting an external/constant - SLP mask vector type fail. */ - if (!mask_node - && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "SLP mask argument is not vectorized.\n"); - return false; - } - tree vectype = SLP_TREE_VECTYPE (slp_node); if (!mask_vectype) mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype), @@ -2553,11 +2559,11 @@ vect_check_scalar_mask (vec_info *vinfo, *mask_dt_out = mask_dt; *mask_vectype_out = mask_vectype; - if (mask_node) - *mask_node = mask_node_1; + *mask_node = mask_node_1; return true; } + /* Return true if stored value is suitable for vectorizing store statement STMT_INFO. When returning true, store the scalar stored in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT, @@ -3265,7 +3271,7 @@ vectorizable_call (vec_info *vinfo, if ((int) i == mask_opno) { if (!vect_check_scalar_mask (vinfo, slp_node, mask_opno, - &op, &slp_op[i], &dt[i], &vectypes[i])) + &slp_op[i], &dt[i], &vectypes[i])) return false; continue; } @@ -7029,7 +7035,8 @@ scan_store_can_perm_p (tree vectype, tree init, static bool check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype, - enum vect_def_type rhs_dt, slp_tree slp_node, tree mask, + enum vect_def_type rhs_dt, slp_tree slp_node, + slp_tree mask_node, vect_memory_access_type memory_access_type) { loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); @@ -7038,7 +7045,7 @@ check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype, gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1); if (SLP_TREE_LANES (slp_node) > 1 - || mask + || mask_node || memory_access_type != VMAT_CONTIGUOUS || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0)) @@ -7735,7 +7742,7 @@ vectorizable_store (vec_info *vinfo, /* Is vectorizable store? */ - tree mask = NULL_TREE, mask_vectype = NULL_TREE; + tree mask_vectype = NULL_TREE; slp_tree mask_node = NULL; if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt)) { @@ -7768,7 +7775,7 @@ vectorizable_store (vec_info *vinfo, (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info)); if (mask_index >= 0 && !vect_check_scalar_mask (vinfo, slp_node, mask_index, - &mask, &mask_node, &mask_dt, + &mask_node, &mask_dt, &mask_vectype)) return false; } @@ -7815,8 +7822,8 @@ vectorizable_store (vec_info *vinfo, int misalignment; poly_int64 poffset; internal_fn lanes_ifn; - if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type, - &memory_access_type, &poffset, + if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node, + vls_type, &memory_access_type, &poffset, &alignment_support_scheme, &misalignment, &gs_info, &lanes_ifn)) return false; @@ -7830,7 +7837,7 @@ vectorizable_store (vec_info *vinfo, return false; } - if (mask) + if (mask_node) { if (memory_access_type == VMAT_CONTIGUOUS) { @@ -7884,8 +7891,8 @@ vectorizable_store (vec_info *vinfo, if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && cost_vec) { - if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp_node, mask, - memory_access_type)) + if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp_node, + mask_node, memory_access_type)) return false; } @@ -7899,10 +7906,10 @@ vectorizable_store (vec_info *vinfo, check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, vls_type, group_size, memory_access_type, &gs_info, - mask); + mask_node); if (!vect_maybe_update_slp_op_vectype (op_node, vectype) - || (mask + || (mask_node && !vect_maybe_update_slp_op_vectype (mask_node, mask_vectype))) { @@ -8266,7 +8273,7 @@ vectorizable_store (vec_info *vinfo, realignment. vect_supportable_dr_alignment always returns either dr_aligned or dr_unaligned_supported for masked operations. */ gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES - && !mask + && !mask_node && !loop_masks) || alignment_support_scheme == dr_aligned || alignment_support_scheme == dr_unaligned_supported); @@ -8301,7 +8308,7 @@ vectorizable_store (vec_info *vinfo, memory_access_type, loop_lens); } - if (mask && !costing_p) + if (mask_node && !costing_p) LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true; /* In case the vectorization factor (VF) is bigger than the number @@ -8341,7 +8348,7 @@ vectorizable_store (vec_info *vinfo, { if (!costing_p) { - if (mask) + if (mask_node) { vect_get_slp_defs (mask_node, &vec_masks); vec_mask = vec_masks[0]; @@ -8355,7 +8362,7 @@ vectorizable_store (vec_info *vinfo, else if (!costing_p) { gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo)); - if (mask) + if (mask_node) vec_mask = vec_masks[j]; dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi, stmt_info, bump); @@ -8490,7 +8497,7 @@ vectorizable_store (vec_info *vinfo, DR_CHAIN is of size 1. */ gcc_assert (group_size == 1); vect_get_slp_defs (op_node, gvec_oprnds[0]); - if (mask) + if (mask_node) vect_get_slp_defs (mask_node, &vec_masks); if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) @@ -8516,7 +8523,7 @@ vectorizable_store (vec_info *vinfo, if (!costing_p) { vec_oprnd = (*gvec_oprnds[0])[j]; - if (mask) + if (mask_node) vec_mask = vec_masks[j]; /* We should have catched mismatched types earlier. */ gcc_assert (useless_type_conversion_p (vectype, @@ -8818,7 +8825,7 @@ vectorizable_store (vec_info *vinfo, /* Get vectorized arguments for SLP_NODE. */ vect_get_slp_defs (op_node, &vec_oprnds); vec_oprnd = vec_oprnds[0]; - if (mask) + if (mask_node) { vect_get_slp_defs (mask_node, &vec_masks); vec_mask = vec_masks[0]; @@ -9250,12 +9257,12 @@ vectorizable_load (vec_info *vinfo, if (!STMT_VINFO_DATA_REF (stmt_info)) return false; - tree mask = NULL_TREE, mask_vectype = NULL_TREE; + tree mask_vectype = NULL_TREE; tree els = NULL_TREE; tree els_vectype = NULL_TREE; int mask_index = -1; int els_index = -1; - slp_tree slp_op = NULL; + slp_tree mask_node = NULL; slp_tree els_op = NULL; if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt)) { @@ -9294,7 +9301,7 @@ vectorizable_load (vec_info *vinfo, (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info)); if (mask_index >= 0 && !vect_check_scalar_mask (vinfo, slp_node, mask_index, - &mask, &slp_op, &mask_dt, &mask_vectype)) + &mask_node, &mask_dt, &mask_vectype)) return false; els_index = internal_fn_else_index (ifn); @@ -9377,8 +9384,8 @@ vectorizable_load (vec_info *vinfo, auto_vec<int> elsvals; int maskload_elsval = 0; bool need_zeroing = false; - if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD, - &memory_access_type, &poffset, + if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node, + VLS_LOAD, &memory_access_type, &poffset, &alignment_support_scheme, &misalignment, &gs_info, &lanes_ifn, &elsvals)) return false; @@ -9445,7 +9452,7 @@ vectorizable_load (vec_info *vinfo, return false; } - if (mask) + if (mask_node) { if (memory_access_type == VMAT_CONTIGUOUS) { @@ -9486,8 +9493,8 @@ vectorizable_load (vec_info *vinfo, if (costing_p) /* transformation not required. */ { - if (mask - && !vect_maybe_update_slp_op_vectype (slp_op, + if (mask_node + && !vect_maybe_update_slp_op_vectype (mask_node, mask_vectype)) { if (dump_enabled_p ()) @@ -9503,7 +9510,7 @@ vectorizable_load (vec_info *vinfo, check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, VLS_LOAD, group_size, memory_access_type, &gs_info, - mask, &elsvals); + mask_node, &elsvals); if (dump_enabled_p () && memory_access_type != VMAT_ELEMENTWISE @@ -9527,7 +9534,7 @@ vectorizable_load (vec_info *vinfo, check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, VLS_LOAD, group_size, memory_access_type, &gs_info, - mask, &elsvals); + mask_node, &elsvals); } /* If the type needs padding we must zero inactive elements. @@ -9562,7 +9569,7 @@ vectorizable_load (vec_info *vinfo, if (memory_access_type == VMAT_INVARIANT) { - gcc_assert (!grouped_load && !mask && !bb_vinfo); + gcc_assert (!grouped_load && !mask_node && !bb_vinfo); /* If we have versioned for aliasing or the loop doesn't have any data dependencies that would preclude this, then we are sure this is a loop invariant load and @@ -10037,7 +10044,7 @@ vectorizable_load (vec_info *vinfo, dr_aligned or dr_unaligned_supported for (non-length) masked operations. */ gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES - && !mask + && !mask_node && !loop_masks) || memory_access_type == VMAT_GATHER_SCATTER || alignment_support_scheme == dr_aligned @@ -10159,7 +10166,7 @@ vectorizable_load (vec_info *vinfo, auto_vec<tree> vec_offsets; auto_vec<tree> vec_masks; - if (mask && !costing_p) + if (mask_node && !costing_p) vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index], &vec_masks); @@ -10227,7 +10234,7 @@ vectorizable_load (vec_info *vinfo, dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi, stmt_info, bump); } - if (mask) + if (mask_node) vec_mask = vec_masks[j]; tree vec_array = create_vector_array (vectype, group_size); @@ -10370,7 +10377,7 @@ vectorizable_load (vec_info *vinfo, tree bias = NULL_TREE; if (!costing_p) { - if (mask) + if (mask_node) vec_mask = vec_masks[i]; if (loop_masks) final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks, @@ -10766,7 +10773,7 @@ vectorizable_load (vec_info *vinfo, if (!costing_p) { - if (mask) + if (mask_node) vec_mask = vec_masks[i]; if (loop_masks) final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,