---

Hi All,

This is a prototype patch for the phase 1 of the implementation of the RFC 
(https://gcc.gnu.org/pipermail/gcc/2026-May/248167.html) for supporting 
vpcompress instruction in gcc.

Original PR: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91198

Regards,
Raghesh

 gcc/tree-vect-loop.cc  | 400 ++++++++++++++++++++++++++++++++++++++++-
 gcc/tree-vect-stmts.cc |  26 +++
 gcc/tree-vectorizer.h  |   5 +
 3 files changed, 430 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index ac7e08cf205..cc1d5b0b8ac 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -165,6 +165,135 @@ static stmt_vec_info vect_is_simple_reduction 
(loop_vec_info, stmt_vec_info,
                                               gphi **);


+/* Function pred_phi_strip_conversion.
+
+   This function check if CUR is an assigment statement and if it is an SSA
+   whose defining statement is NOP/CONVERT/VIEW_CONVERT/NON_LVALUE to SSA.
+   Stores that rhs SSA into CUR
+
+     t_1 = (unsigned int) n;
+
+     Stores n into CUR.  */
+static bool
+pred_phi_strip_conversion (tree *cur)
+{
+  if (TREE_CODE (*cur) != SSA_NAME)
+    return false;
+
+  gimple *def = SSA_NAME_DEF_STMT (*cur);
+  gassign *assign = dyn_cast <gassign *> (def);
+  if (!assign)
+    return false;
+
+  tree_code rhs_code = gimple_assign_rhs_code (assign);
+  if (rhs_code != NOP_EXPR
+      && rhs_code != CONVERT_EXPR
+      && rhs_code != VIEW_CONVERT_EXPR
+      && rhs_code != NON_LVALUE_EXPR)
+    return false;
+
+  tree rhs1 = gimple_assign_rhs1 (assign);
+  if (TREE_CODE (rhs1) != SSA_NAME)
+    return false;
+
+  *cur = rhs1;
+  return true;
+}
+
+/* Function predicated_index_plus_one.
+
+   This function checks if one of the arms of the COND_EXPR,
+   PRED_PHI_BACK_EDGE_ASGN_STMT is PRED_PHI itself and the other is
+   PRED_PHI + 1.  Note that the increment can be passed through cast
+   instructions as in the below example. So we need to traverse through them.
+
+     # n = PHI <n_1, ..>
+     ...
+     addr = base + n;
+     .MASK_STORE (addr, mask, ..)
+     t_1 = (unsigned int) n;
+     t_2 = t1_1 + 1;
+     n_1 = int (t_2);
+     n_2 = mask ? n_1 : n;  */
+static bool
+predicated_index_plus_one (tree &pred_phi,
+                          gassign *pred_phi_back_edge_asgn_stmt)
+{
+  /* Check that one of the arms of COND_EXPR is pred-phi (n) and the other is
+     an increment value (n_1 = n + 1).  */
+  tree true_val = gimple_assign_rhs2 (pred_phi_back_edge_asgn_stmt);
+  tree false_val = gimple_assign_rhs3 (pred_phi_back_edge_asgn_stmt);
+  tree pred_phi_inc_val = NULL_TREE;
+  if (true_val == pred_phi)
+    pred_phi_inc_val = false_val;
+  else if (false_val == pred_phi)
+    pred_phi_inc_val = true_val;
+  else
+    return false;
+  if (TREE_CODE (pred_phi_inc_val) != SSA_NAME)
+    return false;
+
+  /* Check if n1 = n + 1?. We may need to strip the cast instructions to reach
+     the PLUS_EXPR. TODO: Change depth logic - traverse until CUR go out of
+     the current basic block.  */
+  gimple *cur = SSA_NAME_DEF_STMT (pred_phi_inc_val);
+  for (unsigned depth = 0; depth < 8; depth++)
+    {
+      /* CUR should be an assignment statement.  */
+      gassign *cur_asgn = dyn_cast <gassign *> (cur);
+      if (!cur_asgn)
+       return false;
+      /* Stop traversing further if we reach a PLUS_EXPR.  */
+      if (gimple_assign_rhs_code (cur_asgn) == PLUS_EXPR)
+       {
+         /* Check for +1 increment. It can be either t_1 + 1 or 1 + t_1.  */
+         tree rhs1 = gimple_assign_rhs1 (cur_asgn);
+         tree rhs2 = gimple_assign_rhs2 (cur_asgn);
+         tree plus_base = NULL_TREE;
+         if (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == INTEGER_CST
+             && integer_onep (rhs2))
+           plus_base = rhs1;
+         else if (TREE_CODE (rhs2) == SSA_NAME &&
+                  TREE_CODE (rhs1) == INTEGER_CST && integer_onep (rhs1))
+           plus_base = rhs2;
+         /* If plus_base is not set we dont have a +1 pattern.  */
+         if (!plus_base)
+           return false;
+
+         /* Check if plus_base is pred_phi itself. We may still need to strip
+            some casts to reacy the actual base. TODO: Change depth logic -
+            traverse untill plus_base go out of current basic block.  */
+         for (unsigned depth_1 = 0; depth_1 < 8; depth_1++)
+           {
+             if (!pred_phi_strip_conversion (&plus_base))
+               break;
+           }
+         /* The stripped base should be pred_phi.  */
+         if (plus_base == pred_phi)
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_NOTE, vect_location,
+                                "predicated_index_plus_one match: found\n");
+             return true;
+           }
+         /* We don't have +1 pattern.  */
+         return false;
+       }
+      /* If we reach an non-PLUS_EXPR check if we can continue walk through
+        casts */
+      tree cur_ssa = gimple_assign_lhs (cur_asgn);
+      if (pred_phi_strip_conversion(&cur_ssa))
+       {
+         cur = SSA_NAME_DEF_STMT (cur_ssa);
+         continue;
+       }
+      /* We cannot reach a PLUS_EXPR.  */
+      return false;
+    }
+
+  return false;
+}
+
 /* Function vect_is_simple_iv_evolution.
 
    FORNOW: A simple evolution of an induction variables in the loop is
@@ -333,6 +462,265 @@ vect_phi_first_order_recurrence_p (loop_vec_info 
loop_vinfo, class loop *loop,
   return true;
 }
 
+/* Function pred_phi_in_mask_store_addr_computation.
+
+   Starting from the store pointer in MASK_STORE_CALL, this function tries to
+   check if the base + offset computation is done and offset finaly reaches
+   PRED_PHI.
+
+     # n = PHI <n_1, ..>
+     ...
+     a_1 = (long unsigned int) n;
+     a_2 = a_1 * 4;
+     a_3 = base + a_2;
+     addr =  (int *) a_3;
+     .MASK_STORE (addr, mask, ..)
+     n_1 = n + 1;
+     ...
+     n_2 = mask ? n_1 : n;  */
+static bool
+pred_phi_in_mask_store_addr_computation (class loop *loop,
+                                        gcall *mask_store_call, tree pred_phi)
+{
+  /* Get the pointer operand of MASK_SLP_CALL. 'addr' in the example.  */
+  tree ptr = gimple_call_arg (mask_store_call, 0);
+  if (TREE_CODE (ptr) != SSA_NAME)
+    return false;
+
+  /* Strip off the cast and see if we can reach to reach a PLUS_EXPR.
+     a_3 = base + a_2;
+     addr =  (int *) a_3;  */
+  gassign *add_stmt = NULL;
+  tree cur = ptr;
+  /* TODO: traverse until CUR go out of the current bb.  */
+  /* TODO: Can we club it with pred_phi_strip_conversion  */
+  for (unsigned depth = 0; depth < 8; depth++)
+    {
+      gimple *def = SSA_NAME_DEF_STMT (cur);
+      gassign *a = dyn_cast <gassign *> (def);
+      if (!a)
+       return false;
+
+      tree_code code = gimple_assign_rhs_code (a);
+      if (code == PLUS_EXPR || code == POINTER_PLUS_EXPR)
+       {
+         add_stmt = a;
+         break;
+       }
+      if (code != NOP_EXPR && code != CONVERT_EXPR
+         && code != VIEW_CONVERT_EXPR && code != NON_LVALUE_EXPR)
+       return false;
+
+      tree rhs1 = gimple_assign_rhs1 (a);
+      if (TREE_CODE (rhs1) != SSA_NAME)
+       return false;
+      cur = rhs1;
+    }
+
+  if (!add_stmt)
+    return false;
+
+  /* Check if add_stmt is base + offset format. Note that it can be either
+     base + offset or offset + base.  We take the loop invariant pointer as
+     the base.  Example -  a_3 = base + a_2;  */
+  tree add_op0 = gimple_assign_rhs1 (add_stmt);
+  tree add_op1 = gimple_assign_rhs2 (add_stmt);
+  /* Initilize base and offset as follows. We will correct if that is not the
+     case or bail out if both are not invariants.  */
+  tree base = add_op0;
+  tree offset = add_op1;
+  if (expr_invariant_in_loop_p (loop, add_op1))
+    {
+      base = add_op1;
+      offset = add_op0;
+    }
+  else if (!expr_invariant_in_loop_p (loop, add_op0))
+    return false;
+
+  /* Now offset can be a multiplication with the element size as in
+     a_2 = a_1 * 4;  */
+  if (TREE_CODE (offset) != SSA_NAME)
+    return false;
+  gimple *def = SSA_NAME_DEF_STMT (offset);
+  gassign *mul = dyn_cast <gassign *> (def);
+  if (!mul || gimple_assign_rhs_code (mul) != MULT_EXPR)
+    return false;
+  tree mul_op0 = gimple_assign_rhs1 (mul);
+  tree mul_op1 = gimple_assign_rhs2 (mul);
+  tree index_op =  mul_op0;
+  tree size_op = mul_op1;
+  if (TREE_CODE (mul_op0) == INTEGER_CST)
+    {
+      index_op = mul_op1;
+      size_op = mul_op0;
+    }
+  else if (TREE_CODE (mul_op1) != INTEGER_CST)
+    return false;
+  /* The index_op should be an SSA_NAM. */
+  if (TREE_CODE (index_op) != SSA_NAME)
+      return false;
+  /* The size_op should be the element size pointed by ptr.  */
+  tree eltype = TREE_TYPE (TREE_TYPE (ptr));
+  tree elem_size_unit = TYPE_SIZE_UNIT (eltype);
+  if (!elem_size_unit || TREE_CODE (elem_size_unit) != INTEGER_CST)
+    return false;
+  if (!operand_equal_p (size_op, elem_size_unit, OEP_ONLY_CONST))
+    return false;
+
+  /* No the index_op can be a casted one. So strip off that cast as in
+     a_1 = (long unsigned int) n;  */
+  pred_phi_strip_conversion (&index_op);
+
+  /* Finally index_op should be PRED_PHI.  */
+  return index_op == pred_phi;
+}
+
+/* Function vect_find_predicated_index_mask_store.
+
+   This function establishes there relationship between the PRED_PHI detected
+   so far with an IFN_MASK_STORE. The store address for IFN_MASK_STORE should
+   be computed using PRED_PHI as index and PRED_PHI_MASK should be used as its
+   mask.  Note that the address computation is done as (base + n * size) and
+   can pass through multiiple cast instructions as in the below example.
+   So we need to traverse through them.
+
+
+     # n = PHI <n_1, ..>
+     ...
+     a_1 = (long unsigned int) n;
+     a_2 = a_1 * 4;
+     a_3 = base + a_2;
+     addr =  (int *) a_3;
+     .MASK_STORE (addr, mask, ..)
+     n_1 = n + 1;
+     ...
+     n_2 = mask ? n_1 : n;  */
+static gcall *
+vect_find_predicated_index_mask_store (class loop *loop, gphi *phi,
+                                      tree pred_phi, tree pred_phi_mask)
+{
+  /* Search only the basic block that holds PHI; TODO: to scan other blocks
+     when generalizing.  */
+  basic_block pred_bb = gimple_bb (phi);
+  /* Iterate over GIMPLE statements to find a matching IFN_MASK_STORE.  */
+  for (gimple_stmt_iterator gsi = gsi_after_labels (pred_bb); !gsi_end_p (gsi);
+       gsi_next (&gsi))
+    {
+      gimple *st = gsi_stmt (gsi);
+
+      /* If not an IFN_MASK_STORE call continue.  */
+      gcall *mask_store_call = dyn_cast <gcall *> (st);
+      if (!mask_store_call || !gimple_call_internal_p (mask_store_call)
+         || gimple_call_internal_fn (mask_store_call) != IFN_MASK_STORE)
+       continue;
+
+      /* Check if the mask of call is same as PRED_PHI_MASK.  */
+      if (gimple_call_arg (mask_store_call, 2) != pred_phi_mask)
+       continue;
+
+      /* Check if PRED_PHI is used to compute mask_store_call address.  */
+      /* TODO: Can use any match operator (as in instcombine)??  */
+      if (!pred_phi_in_mask_store_addr_computation(loop, mask_store_call,
+                                                  pred_phi))
+       continue;
+
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_NOTE, vect_location,
+                        "vect_find_predicated_index_mask_store: matched "
+                        "IFN_MASK_STORE %G\n",
+                        (gimple *) mask_store_call);
+
+      return mask_store_call;
+    }
+
+  if (TREE_CODE (pred_phi_mask) != SSA_NAME)
+    return NULL;
+  return NULL;
+}
+
+/* Part of vpcompress pattern detection. Checks if PHI matches the predicated-
+   index phi shape, which expects its incoming value from the backedge to be
+   incremented under the same mask used by an IFN_MASK_STORE, whose address is
+   indexed by the PHI itself. An outline of the pattern is given below.
+
+     # n = PHI <n_1, ..>
+     ...
+     addr = base + n;
+     .MASK_STORE (addr, mask, ..)
+     n_1 = n + 1;
+     ...
+     n_2 = mask ? n_1 : n;  */
+static bool
+vect_analyze_predicated_index_phi (loop_vec_info loop_vinfo, class loop *loop,
+                                  gphi *phi)
+{
+  /* Require a loop-header PHI.  */
+  if (gimple_bb (phi) != loop->header)
+    return false;
+
+  tree pred_phi = PHI_RESULT (phi);
+  /* PHI should be an integer.  */
+  if (TREE_CODE (pred_phi) != SSA_NAME
+      || TREE_CODE (TREE_TYPE (pred_phi)) != INTEGER_TYPE)
+    return false;
+
+  /* Restricting to zero initializatin from preheader for now. TODO: Can be
+     generalized to arbitrary integer starts.  */
+  tree pre_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
+  if (TREE_CODE (pre_init) != INTEGER_CST || !integer_zerop (pre_init))
+    return false;
+
+  /* The incoming value for pred-phi must be in the if-converted form.
+     n_2 = mask ? n_1 : n; TODO: This may be generalized later for more
+     patterns. */
+  /* Get the incoming value (n_2) of pred-phi through backedge.  */
+  tree pred_phi_back_edge_val =
+    PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
+  if (TREE_CODE (pred_phi_back_edge_val) != SSA_NAME)
+    return false;
+  /* Check if pred_phi_back_edge_val is a COND_EXPR.  */
+  gimple *pred_phi_back_edge_stmt
+    = SSA_NAME_DEF_STMT (pred_phi_back_edge_val);
+  if (!pred_phi_back_edge_stmt)
+    return false;
+  gassign *pred_phi_back_edge_asgn_stmt
+    = dyn_cast <gassign *> (pred_phi_back_edge_stmt);
+  if (!pred_phi_back_edge_asgn_stmt
+      || gimple_assign_rhs_code (pred_phi_back_edge_asgn_stmt) != COND_EXPR)
+    return false;
+
+  /* Check that one of the arms of COND_EXPR is pred-phi (n) and the other is
+     an increment value (n_1 = n + 1).  */
+  if (!predicated_index_plus_one (pred_phi, pred_phi_back_edge_asgn_stmt))
+    return false;
+
+  /* Check if there is a IFN_MASK_STORE, which is connected to the same
+   * phi that we detected (pred-phi) so far. The store address should be
+   * computed based on pred-phi and it should use the same mask as that of
+   * COND_EXPR.  */
+  tree mask = gimple_assign_rhs1 (pred_phi_back_edge_asgn_stmt);
+  gcall *mask_store =
+    vect_find_predicated_index_mask_store (loop, phi, pred_phi, mask);
+
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+                    "predicated-index PHI: matched predicated phi %G\n",
+                    (gimple *) phi);
+
+  if (!mask_store)
+    return false;
+
+  /* Keep at most one predicated-index PHI per loop_vinfo.  */
+  if (loop_vinfo->predicated_index_phi != NULL
+      && loop_vinfo->predicated_index_phi != phi)
+    return false;
+
+  loop_vinfo->predicated_index_phi = phi;
+  loop_vinfo->predicated_index_mask_store = mask_store;
+
+  /* All requirements satisfiled.  */
+  return true;
+}
 /* Function vect_analyze_scalar_cycles_1.
 
    Examine the cross iteration def-use cycles of scalar variables
@@ -479,6 +867,14 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, 
class loop *loop)
        }
       else if (vect_phi_first_order_recurrence_p (loop_vinfo, loop, phi))
        STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_first_order_recurrence;
+      else if (loop == LOOP_VINFO_LOOP (loop_vinfo)
+              && vect_analyze_predicated_index_phi (loop_vinfo, loop, phi))
+       {
+         STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_predicated_index_def;
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "Detected predicated-index PHI.\n");
+       }
       else
         if (dump_enabled_p ())
           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -775,7 +1171,9 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, 
vec_info_shared *shared)
     drs_advanced_by (NULL_TREE),
     vec_loop_main_exit (NULL),
     vec_epilogue_loop_main_exit (NULL),
-    scalar_loop_main_exit (NULL)
+    scalar_loop_main_exit (NULL),
+    predicated_index_phi (NULL),
+    predicated_index_mask_store (NULL)
 {
   /* CHECKME: We want to visit all BBs before their successors (except for
      latch blocks, for which this assertion wouldn't hold).  In the simple
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 32691f47eb7..596dce6df57 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -704,6 +704,12 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info 
loop_vinfo, bool *fatal)
 
          if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
            {
+             /* TODO: Till predicated-index PHI is handled, exit
+                gracefully.  */
+             if (STMT_VINFO_DEF_TYPE (phi_info) == vect_predicated_index_def)
+               return opt_result::failure_at
+                 (*si, "not vectorized: predicated-index PHI "
+                 "not supported yet: %G", *si);
              if (STMT_VINFO_DEF_TYPE (phi_info) == vect_unknown_def_type)
                return opt_result::failure_at
                  (*si, "not vectorized: unhandled relevant PHI: %G", *si);
@@ -13264,6 +13270,14 @@ vect_analyze_stmt (vec_info *vinfo,
       case vect_constant_def:
       case vect_external_def:
       case vect_unknown_def_type:
+       gcc_unreachable ();
+      /* TODO: Till predicated-index PHI is handled, exit gracefully.  */
+      case vect_predicated_index_def:
+       return opt_result::failure_at
+         (stmt_info->stmt,
+          "not vectorized: predicated-index PHI not supported yet: %G\n",
+          stmt_info->stmt);
+
       default:
         gcc_unreachable ();
     }
@@ -13899,12 +13913,24 @@ vect_is_simple_use (tree operand, vec_info *vinfo, 
enum vect_def_type *dt,
        case vect_condition_def:
          dump_printf (MSG_NOTE, "control flow\n");
          break;
+       case vect_predicated_index_def:
+         dump_printf (MSG_NOTE, "predicated index\n");
+         break;
        case vect_unknown_def_type:
          dump_printf (MSG_NOTE, "unknown\n");
          break;
        }
     }
 
+  /* TODO: Till predicated-index PHI is handled, exit gracefully.  */
+  if (*dt == vect_predicated_index_def)
+    {
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "unsupported: predicated-index PHI.\n");
+      return false;
+    }
+
   if (*dt == vect_unknown_def_type)
     {
       if (dump_enabled_p ())
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index de50ed3277c..c8108ea75d0 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -76,6 +76,7 @@ enum vect_def_type {
   vect_nested_cycle,
   vect_first_order_recurrence,
   vect_condition_def,
+  vect_predicated_index_def,
   vect_unknown_def_type
 };
 
@@ -1266,6 +1267,10 @@ public:
   /* Record statements that are needed to be live for early break vectorization
      but may not have an LC PHI node materialized yet in the exits.  */
   auto_vec<stmt_vec_info> early_break_live_ivs;
+
+  /* Predicated-index PHI and matching IFN_MASK_STORE.  */
+  gphi *predicated_index_phi;
+  gcall *predicated_index_mask_store;
 } *loop_vec_info;
 
 /* Access Functions.  */
-- 
2.34.1

Reply via email to