---
Hi All, This is a prototype patch for the phase 1 of the implementation of the RFC (https://gcc.gnu.org/pipermail/gcc/2026-May/248167.html) for supporting vpcompress instruction in gcc. Original PR: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91198 Regards, Raghesh gcc/tree-vect-loop.cc | 400 ++++++++++++++++++++++++++++++++++++++++- gcc/tree-vect-stmts.cc | 26 +++ gcc/tree-vectorizer.h | 5 + 3 files changed, 430 insertions(+), 1 deletion(-) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index ac7e08cf205..cc1d5b0b8ac 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -165,6 +165,135 @@ static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info, gphi **); +/* Function pred_phi_strip_conversion. + + This function check if CUR is an assigment statement and if it is an SSA + whose defining statement is NOP/CONVERT/VIEW_CONVERT/NON_LVALUE to SSA. + Stores that rhs SSA into CUR + + t_1 = (unsigned int) n; + + Stores n into CUR. */ +static bool +pred_phi_strip_conversion (tree *cur) +{ + if (TREE_CODE (*cur) != SSA_NAME) + return false; + + gimple *def = SSA_NAME_DEF_STMT (*cur); + gassign *assign = dyn_cast <gassign *> (def); + if (!assign) + return false; + + tree_code rhs_code = gimple_assign_rhs_code (assign); + if (rhs_code != NOP_EXPR + && rhs_code != CONVERT_EXPR + && rhs_code != VIEW_CONVERT_EXPR + && rhs_code != NON_LVALUE_EXPR) + return false; + + tree rhs1 = gimple_assign_rhs1 (assign); + if (TREE_CODE (rhs1) != SSA_NAME) + return false; + + *cur = rhs1; + return true; +} + +/* Function predicated_index_plus_one. + + This function checks if one of the arms of the COND_EXPR, + PRED_PHI_BACK_EDGE_ASGN_STMT is PRED_PHI itself and the other is + PRED_PHI + 1. Note that the increment can be passed through cast + instructions as in the below example. So we need to traverse through them. + + # n = PHI <n_1, ..> + ... + addr = base + n; + .MASK_STORE (addr, mask, ..) + t_1 = (unsigned int) n; + t_2 = t1_1 + 1; + n_1 = int (t_2); + n_2 = mask ? n_1 : n; */ +static bool +predicated_index_plus_one (tree &pred_phi, + gassign *pred_phi_back_edge_asgn_stmt) +{ + /* Check that one of the arms of COND_EXPR is pred-phi (n) and the other is + an increment value (n_1 = n + 1). */ + tree true_val = gimple_assign_rhs2 (pred_phi_back_edge_asgn_stmt); + tree false_val = gimple_assign_rhs3 (pred_phi_back_edge_asgn_stmt); + tree pred_phi_inc_val = NULL_TREE; + if (true_val == pred_phi) + pred_phi_inc_val = false_val; + else if (false_val == pred_phi) + pred_phi_inc_val = true_val; + else + return false; + if (TREE_CODE (pred_phi_inc_val) != SSA_NAME) + return false; + + /* Check if n1 = n + 1?. We may need to strip the cast instructions to reach + the PLUS_EXPR. TODO: Change depth logic - traverse until CUR go out of + the current basic block. */ + gimple *cur = SSA_NAME_DEF_STMT (pred_phi_inc_val); + for (unsigned depth = 0; depth < 8; depth++) + { + /* CUR should be an assignment statement. */ + gassign *cur_asgn = dyn_cast <gassign *> (cur); + if (!cur_asgn) + return false; + /* Stop traversing further if we reach a PLUS_EXPR. */ + if (gimple_assign_rhs_code (cur_asgn) == PLUS_EXPR) + { + /* Check for +1 increment. It can be either t_1 + 1 or 1 + t_1. */ + tree rhs1 = gimple_assign_rhs1 (cur_asgn); + tree rhs2 = gimple_assign_rhs2 (cur_asgn); + tree plus_base = NULL_TREE; + if (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == INTEGER_CST + && integer_onep (rhs2)) + plus_base = rhs1; + else if (TREE_CODE (rhs2) == SSA_NAME && + TREE_CODE (rhs1) == INTEGER_CST && integer_onep (rhs1)) + plus_base = rhs2; + /* If plus_base is not set we dont have a +1 pattern. */ + if (!plus_base) + return false; + + /* Check if plus_base is pred_phi itself. We may still need to strip + some casts to reacy the actual base. TODO: Change depth logic - + traverse untill plus_base go out of current basic block. */ + for (unsigned depth_1 = 0; depth_1 < 8; depth_1++) + { + if (!pred_phi_strip_conversion (&plus_base)) + break; + } + /* The stripped base should be pred_phi. */ + if (plus_base == pred_phi) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "predicated_index_plus_one match: found\n"); + return true; + } + /* We don't have +1 pattern. */ + return false; + } + /* If we reach an non-PLUS_EXPR check if we can continue walk through + casts */ + tree cur_ssa = gimple_assign_lhs (cur_asgn); + if (pred_phi_strip_conversion(&cur_ssa)) + { + cur = SSA_NAME_DEF_STMT (cur_ssa); + continue; + } + /* We cannot reach a PLUS_EXPR. */ + return false; + } + + return false; +} + /* Function vect_is_simple_iv_evolution. FORNOW: A simple evolution of an induction variables in the loop is @@ -333,6 +462,265 @@ vect_phi_first_order_recurrence_p (loop_vec_info loop_vinfo, class loop *loop, return true; } +/* Function pred_phi_in_mask_store_addr_computation. + + Starting from the store pointer in MASK_STORE_CALL, this function tries to + check if the base + offset computation is done and offset finaly reaches + PRED_PHI. + + # n = PHI <n_1, ..> + ... + a_1 = (long unsigned int) n; + a_2 = a_1 * 4; + a_3 = base + a_2; + addr = (int *) a_3; + .MASK_STORE (addr, mask, ..) + n_1 = n + 1; + ... + n_2 = mask ? n_1 : n; */ +static bool +pred_phi_in_mask_store_addr_computation (class loop *loop, + gcall *mask_store_call, tree pred_phi) +{ + /* Get the pointer operand of MASK_SLP_CALL. 'addr' in the example. */ + tree ptr = gimple_call_arg (mask_store_call, 0); + if (TREE_CODE (ptr) != SSA_NAME) + return false; + + /* Strip off the cast and see if we can reach to reach a PLUS_EXPR. + a_3 = base + a_2; + addr = (int *) a_3; */ + gassign *add_stmt = NULL; + tree cur = ptr; + /* TODO: traverse until CUR go out of the current bb. */ + /* TODO: Can we club it with pred_phi_strip_conversion */ + for (unsigned depth = 0; depth < 8; depth++) + { + gimple *def = SSA_NAME_DEF_STMT (cur); + gassign *a = dyn_cast <gassign *> (def); + if (!a) + return false; + + tree_code code = gimple_assign_rhs_code (a); + if (code == PLUS_EXPR || code == POINTER_PLUS_EXPR) + { + add_stmt = a; + break; + } + if (code != NOP_EXPR && code != CONVERT_EXPR + && code != VIEW_CONVERT_EXPR && code != NON_LVALUE_EXPR) + return false; + + tree rhs1 = gimple_assign_rhs1 (a); + if (TREE_CODE (rhs1) != SSA_NAME) + return false; + cur = rhs1; + } + + if (!add_stmt) + return false; + + /* Check if add_stmt is base + offset format. Note that it can be either + base + offset or offset + base. We take the loop invariant pointer as + the base. Example - a_3 = base + a_2; */ + tree add_op0 = gimple_assign_rhs1 (add_stmt); + tree add_op1 = gimple_assign_rhs2 (add_stmt); + /* Initilize base and offset as follows. We will correct if that is not the + case or bail out if both are not invariants. */ + tree base = add_op0; + tree offset = add_op1; + if (expr_invariant_in_loop_p (loop, add_op1)) + { + base = add_op1; + offset = add_op0; + } + else if (!expr_invariant_in_loop_p (loop, add_op0)) + return false; + + /* Now offset can be a multiplication with the element size as in + a_2 = a_1 * 4; */ + if (TREE_CODE (offset) != SSA_NAME) + return false; + gimple *def = SSA_NAME_DEF_STMT (offset); + gassign *mul = dyn_cast <gassign *> (def); + if (!mul || gimple_assign_rhs_code (mul) != MULT_EXPR) + return false; + tree mul_op0 = gimple_assign_rhs1 (mul); + tree mul_op1 = gimple_assign_rhs2 (mul); + tree index_op = mul_op0; + tree size_op = mul_op1; + if (TREE_CODE (mul_op0) == INTEGER_CST) + { + index_op = mul_op1; + size_op = mul_op0; + } + else if (TREE_CODE (mul_op1) != INTEGER_CST) + return false; + /* The index_op should be an SSA_NAM. */ + if (TREE_CODE (index_op) != SSA_NAME) + return false; + /* The size_op should be the element size pointed by ptr. */ + tree eltype = TREE_TYPE (TREE_TYPE (ptr)); + tree elem_size_unit = TYPE_SIZE_UNIT (eltype); + if (!elem_size_unit || TREE_CODE (elem_size_unit) != INTEGER_CST) + return false; + if (!operand_equal_p (size_op, elem_size_unit, OEP_ONLY_CONST)) + return false; + + /* No the index_op can be a casted one. So strip off that cast as in + a_1 = (long unsigned int) n; */ + pred_phi_strip_conversion (&index_op); + + /* Finally index_op should be PRED_PHI. */ + return index_op == pred_phi; +} + +/* Function vect_find_predicated_index_mask_store. + + This function establishes there relationship between the PRED_PHI detected + so far with an IFN_MASK_STORE. The store address for IFN_MASK_STORE should + be computed using PRED_PHI as index and PRED_PHI_MASK should be used as its + mask. Note that the address computation is done as (base + n * size) and + can pass through multiiple cast instructions as in the below example. + So we need to traverse through them. + + + # n = PHI <n_1, ..> + ... + a_1 = (long unsigned int) n; + a_2 = a_1 * 4; + a_3 = base + a_2; + addr = (int *) a_3; + .MASK_STORE (addr, mask, ..) + n_1 = n + 1; + ... + n_2 = mask ? n_1 : n; */ +static gcall * +vect_find_predicated_index_mask_store (class loop *loop, gphi *phi, + tree pred_phi, tree pred_phi_mask) +{ + /* Search only the basic block that holds PHI; TODO: to scan other blocks + when generalizing. */ + basic_block pred_bb = gimple_bb (phi); + /* Iterate over GIMPLE statements to find a matching IFN_MASK_STORE. */ + for (gimple_stmt_iterator gsi = gsi_after_labels (pred_bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *st = gsi_stmt (gsi); + + /* If not an IFN_MASK_STORE call continue. */ + gcall *mask_store_call = dyn_cast <gcall *> (st); + if (!mask_store_call || !gimple_call_internal_p (mask_store_call) + || gimple_call_internal_fn (mask_store_call) != IFN_MASK_STORE) + continue; + + /* Check if the mask of call is same as PRED_PHI_MASK. */ + if (gimple_call_arg (mask_store_call, 2) != pred_phi_mask) + continue; + + /* Check if PRED_PHI is used to compute mask_store_call address. */ + /* TODO: Can use any match operator (as in instcombine)?? */ + if (!pred_phi_in_mask_store_addr_computation(loop, mask_store_call, + pred_phi)) + continue; + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "vect_find_predicated_index_mask_store: matched " + "IFN_MASK_STORE %G\n", + (gimple *) mask_store_call); + + return mask_store_call; + } + + if (TREE_CODE (pred_phi_mask) != SSA_NAME) + return NULL; + return NULL; +} + +/* Part of vpcompress pattern detection. Checks if PHI matches the predicated- + index phi shape, which expects its incoming value from the backedge to be + incremented under the same mask used by an IFN_MASK_STORE, whose address is + indexed by the PHI itself. An outline of the pattern is given below. + + # n = PHI <n_1, ..> + ... + addr = base + n; + .MASK_STORE (addr, mask, ..) + n_1 = n + 1; + ... + n_2 = mask ? n_1 : n; */ +static bool +vect_analyze_predicated_index_phi (loop_vec_info loop_vinfo, class loop *loop, + gphi *phi) +{ + /* Require a loop-header PHI. */ + if (gimple_bb (phi) != loop->header) + return false; + + tree pred_phi = PHI_RESULT (phi); + /* PHI should be an integer. */ + if (TREE_CODE (pred_phi) != SSA_NAME + || TREE_CODE (TREE_TYPE (pred_phi)) != INTEGER_TYPE) + return false; + + /* Restricting to zero initializatin from preheader for now. TODO: Can be + generalized to arbitrary integer starts. */ + tree pre_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); + if (TREE_CODE (pre_init) != INTEGER_CST || !integer_zerop (pre_init)) + return false; + + /* The incoming value for pred-phi must be in the if-converted form. + n_2 = mask ? n_1 : n; TODO: This may be generalized later for more + patterns. */ + /* Get the incoming value (n_2) of pred-phi through backedge. */ + tree pred_phi_back_edge_val = + PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); + if (TREE_CODE (pred_phi_back_edge_val) != SSA_NAME) + return false; + /* Check if pred_phi_back_edge_val is a COND_EXPR. */ + gimple *pred_phi_back_edge_stmt + = SSA_NAME_DEF_STMT (pred_phi_back_edge_val); + if (!pred_phi_back_edge_stmt) + return false; + gassign *pred_phi_back_edge_asgn_stmt + = dyn_cast <gassign *> (pred_phi_back_edge_stmt); + if (!pred_phi_back_edge_asgn_stmt + || gimple_assign_rhs_code (pred_phi_back_edge_asgn_stmt) != COND_EXPR) + return false; + + /* Check that one of the arms of COND_EXPR is pred-phi (n) and the other is + an increment value (n_1 = n + 1). */ + if (!predicated_index_plus_one (pred_phi, pred_phi_back_edge_asgn_stmt)) + return false; + + /* Check if there is a IFN_MASK_STORE, which is connected to the same + * phi that we detected (pred-phi) so far. The store address should be + * computed based on pred-phi and it should use the same mask as that of + * COND_EXPR. */ + tree mask = gimple_assign_rhs1 (pred_phi_back_edge_asgn_stmt); + gcall *mask_store = + vect_find_predicated_index_mask_store (loop, phi, pred_phi, mask); + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "predicated-index PHI: matched predicated phi %G\n", + (gimple *) phi); + + if (!mask_store) + return false; + + /* Keep at most one predicated-index PHI per loop_vinfo. */ + if (loop_vinfo->predicated_index_phi != NULL + && loop_vinfo->predicated_index_phi != phi) + return false; + + loop_vinfo->predicated_index_phi = phi; + loop_vinfo->predicated_index_mask_store = mask_store; + + /* All requirements satisfiled. */ + return true; +} /* Function vect_analyze_scalar_cycles_1. Examine the cross iteration def-use cycles of scalar variables @@ -479,6 +867,14 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop) } else if (vect_phi_first_order_recurrence_p (loop_vinfo, loop, phi)) STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_first_order_recurrence; + else if (loop == LOOP_VINFO_LOOP (loop_vinfo) + && vect_analyze_predicated_index_phi (loop_vinfo, loop, phi)) + { + STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_predicated_index_def; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Detected predicated-index PHI.\n"); + } else if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -775,7 +1171,9 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) drs_advanced_by (NULL_TREE), vec_loop_main_exit (NULL), vec_epilogue_loop_main_exit (NULL), - scalar_loop_main_exit (NULL) + scalar_loop_main_exit (NULL), + predicated_index_phi (NULL), + predicated_index_mask_store (NULL) { /* CHECKME: We want to visit all BBs before their successors (except for latch blocks, for which this assertion wouldn't hold). In the simple diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 32691f47eb7..596dce6df57 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -704,6 +704,12 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal) if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p)) { + /* TODO: Till predicated-index PHI is handled, exit + gracefully. */ + if (STMT_VINFO_DEF_TYPE (phi_info) == vect_predicated_index_def) + return opt_result::failure_at + (*si, "not vectorized: predicated-index PHI " + "not supported yet: %G", *si); if (STMT_VINFO_DEF_TYPE (phi_info) == vect_unknown_def_type) return opt_result::failure_at (*si, "not vectorized: unhandled relevant PHI: %G", *si); @@ -13264,6 +13270,14 @@ vect_analyze_stmt (vec_info *vinfo, case vect_constant_def: case vect_external_def: case vect_unknown_def_type: + gcc_unreachable (); + /* TODO: Till predicated-index PHI is handled, exit gracefully. */ + case vect_predicated_index_def: + return opt_result::failure_at + (stmt_info->stmt, + "not vectorized: predicated-index PHI not supported yet: %G\n", + stmt_info->stmt); + default: gcc_unreachable (); } @@ -13899,12 +13913,24 @@ vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt, case vect_condition_def: dump_printf (MSG_NOTE, "control flow\n"); break; + case vect_predicated_index_def: + dump_printf (MSG_NOTE, "predicated index\n"); + break; case vect_unknown_def_type: dump_printf (MSG_NOTE, "unknown\n"); break; } } + /* TODO: Till predicated-index PHI is handled, exit gracefully. */ + if (*dt == vect_predicated_index_def) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "unsupported: predicated-index PHI.\n"); + return false; + } + if (*dt == vect_unknown_def_type) { if (dump_enabled_p ()) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index de50ed3277c..c8108ea75d0 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -76,6 +76,7 @@ enum vect_def_type { vect_nested_cycle, vect_first_order_recurrence, vect_condition_def, + vect_predicated_index_def, vect_unknown_def_type }; @@ -1266,6 +1267,10 @@ public: /* Record statements that are needed to be live for early break vectorization but may not have an LC PHI node materialized yet in the exits. */ auto_vec<stmt_vec_info> early_break_live_ivs; + + /* Predicated-index PHI and matching IFN_MASK_STORE. */ + gphi *predicated_index_phi; + gcall *predicated_index_mask_store; } *loop_vec_info; /* Access Functions. */ -- 2.34.1
