[Public]

Hi,

This patch enables BB SLP vectorization with reduction for  "sum of diffs" kind 
of patterns. The original behaviour is retained with a first attempt and if it 
fails a second attempt is made by prohibiting expansion of the reduction chain 
into statements with MINUX_EXPR.

Bootstrapped and tested on x86.

Thank You,
Raghesh

gcc/ChangeLog:

        * tree-vect-slp.cc : (vect_slp_linearize_chain): Optional parameter 
allow_alt_code added (default true), check added not to follow MINUS_EXPR, when 
false
        (vect_slp_check_reduction_root): New function
        (vect_slp_check_for_roots): Calls to vect_slp_check_reduction_root

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/bb-slp-sum-of-diffs.c: New test.

>From 2082e7df1a6d577edbe22bff8597e3c65115254b Mon Sep 17 00:00:00 2001
From: Raghesh Aloor [email protected]<mailto:[email protected]>
Date: Wed, 11 Feb 2026 09:29:55 +0530
Subject: [PATCH] BB SLP: Finding reduction roots with two attempts

Add an optional parameter allow_alt_code to vect_slp_linearize_chain
(default true).  When false, do not follow into MINUS_EXPR when building
a PLUS reduction chain; treat MINUS results as leaves.

In vect_slp_check_for_roots, try building the chain with allow_alt_code
enabled first.  If that yields an invalid chain (e.g. mixed ops), retry
with allow_alt_code disabled so that "sum of diffs" (d_i = a_i - b_i,
sum = d0 + d1 + ... + d7) is recognized: the second attempt keeps d_i
as chain operands instead of expanding into the MINUS.

Pure PLUS chains are unchanged (first attempt succeeds).  Other callers
of vect_slp_linearize_chain keep the default and are unaffected.

Test case added: vect/bb-slp-sum-of-diffs.c
---
.../gcc.dg/vect/bb-slp-sum-of-diffs.c         |  25 +++
gcc/tree-vect-slp.cc                          | 156 +++++++++++-------
2 files changed, 117 insertions(+), 64 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-sum-of-diffs.c

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-sum-of-diffs.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-sum-of-diffs.c
new file mode 100644
index 00000000000..a5f066a2692
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-sum-of-diffs.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3 -fdump-tree-slp2" } */
+
+/* Test that SLP recognizes "sum of diffs" (d_i = a[i]-b[i], sum = d0+...+d7)
+   as a reduction chain and vectorizes the basic block.  */
+
+int __attribute__((noipa))
+sum_of_diffs (int *a, int *b)
+{
+  int d0 = a[0] - b[0];
+  int d1 = a[1] - b[1];
+  int d2 = a[2] - b[2];
+  int d3 = a[3] - b[3];
+  int d4 = a[4] - b[4];
+  int d5 = a[5] - b[5];
+  int d6 = a[6] - b[6];
+  int d7 = a[7] - b[7];
+
+  int sum = d0 + d1 + d2 + d3 + d4 + d5 + d6 + d7;
+  return sum;
+}
+
+/* Check that the reduction was SLP-vectorized (REDUC_PLUS appears in the 
tree).  */
+/* { dg-final { scan-tree-dump "\.REDUC_PLUS" "slp2" } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index c932e8d5cba..5ce692fc05b 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -1755,8 +1755,9 @@ dt_sort_cmp (const void *op1_, const void *op2_, void *)
    associatable operation CODE (where PLUS_EXPR also allows MINUS_EXPR),
    filling CHAIN with the result and using WORKLIST as intermediate storage.
    CODE_STMT and ALT_CODE_STMT are filled with the first stmt using CODE
-   or MINUS_EXPR.  *CHAIN_STMTS if not NULL is filled with all computation
-   stmts, starting with START.  */
+   or MINUS_EXPR.     *CHAIN_STMTS if not NULL is filled with all computation
+   stmts, starting with START.  When ALLOW_ALT_CODE is false, do not
+   follow into MINUS_EXPR when building a PLUS chain (treat MINUS as leaf).  */

static void
vect_slp_linearize_chain (vec_info *vinfo,
@@ -1764,7 +1765,8 @@ vect_slp_linearize_chain (vec_info *vinfo,
                                             vec<chain_op_t> &chain,
                                             enum tree_code code, gimple *start,
                                             gimple *&code_stmt, gimple 
*&alt_code_stmt,
-                                            vec<gimple *> *chain_stmts)
+                                           vec<gimple *> *chain_stmts,
+                                           bool allow_alt_code = true)
{
   /* For each lane linearize the addition/subtraction (or other
      uniform associatable operation) expression tree.  */
@@ -1800,7 +1802,8 @@ vect_slp_linearize_chain (vec_info *vinfo,
                    && single_imm_use (op, &use_p, &use_stmt)
                    && is_gimple_assign (def_stmt_info->stmt)
                    && (gimple_assign_rhs_code (def_stmt_info->stmt) == code
-                             || (code == PLUS_EXPR
+                            || (allow_alt_code
+                                && code == PLUS_EXPR
                                   && (gimple_assign_rhs_code 
(def_stmt_info->stmt)
                                             == MINUS_EXPR))))
                  {
@@ -9874,6 +9877,75 @@ vect_slp_is_lane_insert (gimple *use_stmt, tree vec, 
unsigned *this_lane)
   return true;
}

+/* Check if ASSIGN is the end of a valid reduction chain and, if so, register
+   it as a reduction root in BB_VINFO->roots.  Linearize with ALLOW_ALT_CODE
+   (when false, do not follow into MINUS when building a PLUS chain).  Return
+   true if a root was registered, false otherwise.  */
+
+static bool
+vect_slp_check_reduction_root (bb_vec_info bb_vinfo,
+                                                vec<std::pair<tree_code, 
gimple *> > &worklist,
+                                                vec<chain_op_t> &chain,
+                                                enum tree_code code, gimple 
*assign,
+                                                gimple *&code_stmt, gimple 
*&alt_code_stmt,
+                                                vec<gimple *> *chain_stmts,
+                                                bool allow_alt_code)
+{
+  vect_slp_linearize_chain (bb_vinfo, worklist, chain, code, assign,
+                                             code_stmt, alt_code_stmt, 
chain_stmts,
+                                             allow_alt_code);
+  if (chain.length () <= 1)
+    return false;
+  chain.sort (dt_sort_cmp, bb_vinfo);
+  /* ???  Now we'd want to strip externals and constants
+     but record those to be handled in the epilogue.  */
+  /* ???  For now do not allow mixing ops or externs/constants.  */
+  bool invalid = false;
+  unsigned remain_cnt = 0;
+  unsigned last_idx = 0;
+  for (unsigned i = 0; i < chain.length (); ++i)
+    {
+      if (chain[i].code != code)
+            {
+              invalid = true;
+              break;
+            }
+      if (chain[i].dt != vect_internal_def
+              || (gimple_get_lhs (bb_vinfo->lookup_def (chain[i].op)->stmt)
+                  != chain[i].op))
+            remain_cnt++;
+      else
+            last_idx = i;
+    }
+  if ((chain.length () - remain_cnt) & 1)
+    remain_cnt++;
+  if (invalid || chain.length () - remain_cnt <= 1)
+    return false;
+  vec<stmt_vec_info> stmts;
+  vec<tree> remain = vNULL;
+  stmts.create (chain.length ());
+  if (remain_cnt > 0)
+    remain.create (remain_cnt);
+  for (unsigned i = 0; i < chain.length (); ++i)
+    {
+      stmt_vec_info stmt_info;
+      if (chain[i].dt == vect_internal_def
+              && ((stmt_info = bb_vinfo->lookup_def (chain[i].op)),
+                  gimple_get_lhs (stmt_info->stmt) == chain[i].op)
+              && (i != last_idx || (stmts.length () & 1)))
+            stmts.quick_push (stmt_info);
+      else
+            remain.quick_push (chain[i].op);
+    }
+  vec<stmt_vec_info> roots;
+  roots.create (chain_stmts->length ());
+  for (unsigned i = 0; i < chain_stmts->length (); ++i)
+    roots.quick_push (bb_vinfo->lookup_stmt ((*chain_stmts)[i]));
+  bb_vinfo->roots.safe_push (slp_root (slp_inst_kind_bb_reduc,
+                                                              stmts, roots, 
remain));
+  return true;
+}
+
/* Find any vectorizable constructors and add them to the grouped_store
    array.  */

@@ -10046,66 +10118,22 @@ vect_slp_check_for_roots (bb_vec_info bb_vinfo)
                if (!reduction_fn_for_scalar_code (code, &reduc_fn)
                    || reduc_fn == IFN_LAST)
                  continue;
-               vect_slp_linearize_chain (bb_vinfo, worklist, chain, code, 
assign,
-                                                            /* ??? */
-                                                            code_stmt, 
alt_code_stmt, &chain_stmts);
-               if (chain.length () > 1)
-                 {
-                   /* Sort the chain according to def_type and operation.  */
-                   chain.sort (dt_sort_cmp, bb_vinfo);
-                   /* ???  Now we'd want to strip externals and constants
-                           but record those to be handled in the epilogue.  */
-                   /* ???  For now do not allow mixing ops or 
externs/constants.  */
-                   bool invalid = false;
-                   unsigned remain_cnt = 0;
-                   unsigned last_idx = 0;
-                   for (unsigned i = 0; i < chain.length (); ++i)
-                           {
-                             if (chain[i].code != code)
-                               {
-                                 invalid = true;
-                                 break;
-                               }
-                             if (chain[i].dt != vect_internal_def
-                                 /* Avoid stmts where the def is not the LHS, 
like
-                                          ASMs.  */
-                                 || (gimple_get_lhs (bb_vinfo->lookup_def
-                                                                               
            (chain[i].op)->stmt)
-                                            != chain[i].op))
-                               remain_cnt++;
-                             else
-                               last_idx = i;
-                           }
-                   /* Make sure to have an even number of lanes as we later do
-                           all-or-nothing discovery, not trying to split 
further.  */
-                   if ((chain.length () - remain_cnt) & 1)
-                           remain_cnt++;
-                   if (!invalid && chain.length () - remain_cnt > 1)
-                           {
-                             vec<stmt_vec_info> stmts;
-                             vec<tree> remain = vNULL;
-                             stmts.create (chain.length ());
-                             if (remain_cnt > 0)
-                               remain.create (remain_cnt);
-                             for (unsigned i = 0; i < chain.length (); ++i)
-                               {
-                                 stmt_vec_info stmt_info;
-                                 if (chain[i].dt == vect_internal_def
-                                            && ((stmt_info = 
bb_vinfo->lookup_def (chain[i].op)),
-                                                gimple_get_lhs 
(stmt_info->stmt) == chain[i].op)
-                                            && (i != last_idx
-                                                || (stmts.length () & 1)))
-                                          stmts.quick_push (stmt_info);
-                                 else
-                                          remain.quick_push (chain[i].op);
-                               }
-                             vec<stmt_vec_info> roots;
-                             roots.create (chain_stmts.length ());
-                             for (unsigned i = 0; i < chain_stmts.length (); 
++i)
-                               roots.quick_push (bb_vinfo->lookup_stmt 
(chain_stmts[i]));
-                             bb_vinfo->roots.safe_push (slp_root 
(slp_inst_kind_bb_reduc,
-                                                                               
             stmts, roots, remain));
-                           }
+              /* First attempt: build chain with following into MINUS_EXPR 
enabled.
+                 If that yields a valid uniform chain, use it.  Otherwise retry
+                 with following MINUS disabled (e.g. sum of diffs d0+d1+... 
where
+                 d_i = a_i - b_i).  */
+              if (!vect_slp_check_reduction_root (bb_vinfo, worklist, chain, 
code,
+                                                                           
assign, code_stmt, alt_code_stmt,
+                                                                           
&chain_stmts, true))
+                {
+                  worklist.truncate (0);
+                  chain.truncate (0);
+                  chain_stmts.truncate (0);
+                  code_stmt = NULL;
+                  alt_code_stmt = NULL;
+                  vect_slp_check_reduction_root (bb_vinfo, worklist, chain, 
code,
+                                                                          
assign, code_stmt, alt_code_stmt,
+                                                                          
&chain_stmts, false);
                  }
              }
     }
--
2.34.1

Reply via email to