Hi,

Following is version 2 of the patch proposed for master aiming to fix
PR104116. This has been bootstrapped and regtested on powerpc64le with
regression failures.
Kindly review.

Just had one question.
If I have to implement similar thing for {CEIL, ROUND}_{MOD,DIV} can I
create a separate patch, or should I follow it up with this patch like
[PATCH v2 1/3] and so on. 

Thanks and regards,
Avinash Jayakar

Changes from v1:
        - Added new tests for checking vectorization of FLOOR_{DIV.MOD}
          for multiple paths.
        - Incorporated review comments to use proper vector masks and
          checks for if the target supports generated code.

vect: Add vectorization logic for FLOOR_{MOD,DIV}[PR104116]

Add logic in tree-vectorizer for FLOOR_MOD_EXPR and FLOOR_DIV_EXPR. As
mentioned in PR104116 the logic for
FLOOR_MOD_EXPR:
        r = x %[fl] y; is
        r = x % y; if (r && (x ^ y) < 0) r += y;
FLOOR_DIV_EXPR:
        d = x /[fl] y; is
        r = x % y; d = x / y; if (r && (x ^ y) < 0) --d;

Added a new helper function "add_code_for_floor_divmod" in
tree-vect-patterns.cc for adding compensating code for floor mod and
floor div. This function checks if target supports all required
operations required to implement floor_{div,mod} and generates
vectorized code for the respective operations. A pseudocode of generated
code is given below:
        v0 = x^y
        v1 = -r
        v2 = r | -r (if r!=0, then v2 < 0)
        v3 = v0 & v2
        v4 = v3 < 0 (equivalent to (r && (x ^ y) < 0))
        if floor_mod
          v5 = v4 ? y : 0
          v6 = r + v5  (final result)
        else if floor_div
          v5 = v4 ? 1 : 0
          v6 = d - 1  (final result)

Added tests to check vectorization in all paths
1. If operand1 == 2
2. If operand1 == power of 2
3. If operand1 != power of 2

2025-09-18  Avinash Jayakar  <[email protected]>

gcc/ChangeLog:
        PR vect/104116
        * tree-vect-patterns.cc (add_code_for_floor_divmod): Helper to
        generate vect code for floor_divmod.
        (vect_recog_divmod_pattern): Added patterns for
        floor_{div,mod}_expr.

gcc/testsuite/ChangeLog:
        PR vect/104116
        * gcc.dg/vect/pr104116-floor-divmod.c: New test.
---
 .../gcc.dg/vect/pr104116-floor-divmod.c       |  58 +++++++
 gcc/tree-vect-patterns.cc                     | 148 ++++++++++++++++--
 2 files changed, 193 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-floor-divmod.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-divmod.c 
b/gcc/testsuite/gcc.dg/vect/pr104116-floor-divmod.c
new file mode 100644
index 00000000000..507c1d1e33a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-divmod.c
@@ -0,0 +1,58 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fgimple -fdump-tree-optimized -mcpu=power8" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+#define TEST_FN(OP, CONST, NAME) void __GIMPLE (ssa,guessed_local(10737416)) \
+NAME (int * a) \
+{ \
+  int i; \
+  long unsigned int _1; \
+  long unsigned int _2; \
+  int * _3; \
+  int _4; \
+  int _5; \
+  unsigned int _12; \
+  unsigned int _13; \
+ \
+  __BB(2,guessed_local(10737416)): \
+  goto __BB3(precise(134217728)); \
+ \
+  __BB(3,loop_header(1),guessed_local(1063004408)): \
+  i_14 = __PHI (__BB5: i_11, __BB2: 0); \
+  _13 = __PHI (__BB5: _12, __BB2: 512u); \
+  _1 = (long unsigned int) i_14; \
+  _2 = _1 * 4ul; \
+  _3 = a_9(D) + _2; \
+  _4 = __MEM <int> (_3); \
+  _5 = _4 OP CONST; \
+  __MEM <int> (_3) = _5; \
+  i_11 = i_14 + 2; \
+  _12 = _13 - 1u; \
+  if (_12 != 0u) \
+    goto __BB5(guessed(132861994)); \
+  else \
+    goto __BB4(guessed(1355734)); \
+ \
+  __BB(5,guessed_local(1052266995)): \
+  goto __BB3(precise(134217728)); \
+ \
+  __BB(4,guessed_local(10737416)): \
+  return; \
+} \
+
+TEST_FN(%, 2, trunc_mod_2)
+TEST_FN(__FLOOR_MOD, 2, floor_mod_2)
+TEST_FN(__FLOOR_DIV, 2, floor_div_2)
+
+TEST_FN(%, 4, trunc_mod_pow2)
+TEST_FN(__FLOOR_MOD, 4, floor_mod_pow2)
+TEST_FN(__FLOOR_DIV, 4, floor_div_pow2)
+
+TEST_FN(%, 5, trunc_mod)
+TEST_FN(__FLOOR_MOD, 5, floor_mod)
+TEST_FN(__FLOOR_DIV, 5, floor_div)
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 9 "vect" } } 
*/
+
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 70bf768d339..72b8b2768ef 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -4833,6 +4833,88 @@ vect_recog_sat_trunc_pattern (vec_info *vinfo, 
stmt_vec_info stmt_vinfo,
   return NULL;
 }
 
+gimple * 
+add_code_for_floor_divmod(tree vectype, vec_info* vinfo, 
+  stmt_vec_info stmt_vinfo, enum tree_code rhs_code,
+  tree q, tree r, tree oprnd0, tree oprnd1, tree itype) 
+{
+  gimple *def_stmt;
+  tree mask_vectype = truth_type_for (vectype);
+  if (!mask_vectype)
+    return NULL;
+  if (!target_has_vecop_for_code(NEGATE_EXPR, vectype)
+      || !target_has_vecop_for_code(BIT_XOR_EXPR, vectype)
+      || !target_has_vecop_for_code(BIT_IOR_EXPR, vectype)
+      || !target_has_vecop_for_code(PLUS_EXPR, vectype)
+      || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
+      || !expand_vec_cond_expr_p (vectype, mask_vectype)
+    )
+    return NULL;
+
+
+  // r = x %[fl] y;                                            
+  // is                                                        
+  // r = x % y; if (r && (x ^ y) < 0) r += y;
+  // Produce following sequence
+  // v0 = x^y
+  // v1 = -r
+  // v2 = r | -r
+  // v3 = v0 & v2
+  // v4 = v3 < 0 (equivalent to (r && (x ^ y) < 0))
+  // v5 = v4 ? y : 0
+  // v6 = r + v5  (final result)
+  tree cond_reg = vect_recog_temp_ssa_var(itype, NULL);
+  def_stmt = gimple_build_assign(cond_reg, BIT_XOR_EXPR, oprnd0, oprnd1);
+  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+  // -r
+  tree negate_r = vect_recog_temp_ssa_var(itype, NULL);
+  def_stmt = gimple_build_assign(negate_r, NEGATE_EXPR, r);
+  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+  // r | -r , sign bit is set if r!=0
+  tree r_or_negr = vect_recog_temp_ssa_var(itype, NULL);
+  def_stmt = gimple_build_assign(r_or_negr, BIT_IOR_EXPR, r, negate_r);
+  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+  // (x^y) & (r|-r)
+  tree r_or_negr_and_xor = vect_recog_temp_ssa_var(itype, NULL);
+  def_stmt = gimple_build_assign(r_or_negr_and_xor, BIT_AND_EXPR, r_or_negr,
+      cond_reg);
+  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+  // (x^y) & (r|-r) < 0 which is equivalent to  (x^y < 0 && r!=0)
+  tree bool_cond = vect_recog_temp_ssa_var(boolean_type_node,NULL);
+  def_stmt = gimple_build_assign(bool_cond, LT_EXPR, r_or_negr_and_xor,
+    build_int_cst(itype, 0));
+  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
+      mask_vectype, itype);
+
+  if (rhs_code == FLOOR_MOD_EXPR) {
+    //  (x^y < 0 && r) ? y : 0
+    tree extr_cond = vect_recog_temp_ssa_var(itype, NULL);
+    def_stmt = gimple_build_assign(extr_cond, COND_EXPR, bool_cond, oprnd1,
+      build_int_cst(itype, 0));
+    append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+    
+    // r += (x ^ y < 0 && r) ? y : 0
+    tree floor_mod_r = vect_recog_temp_ssa_var(itype, NULL);
+    return gimple_build_assign(floor_mod_r, PLUS_EXPR, r, extr_cond);
+  } else if (rhs_code == FLOOR_DIV_EXPR) {
+    //  (x^y < 0 && r) ? 1 : 0
+    tree extr_cond = vect_recog_temp_ssa_var(itype, NULL);
+    def_stmt = gimple_build_assign(extr_cond, COND_EXPR, bool_cond, 
build_int_cst(itype, 1),
+      build_int_cst(itype, 0));
+    append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+    
+    // q -= (x ^ y < 0 && r) ? 1 : 0
+    tree floor_mod_r = vect_recog_temp_ssa_var(itype, NULL);
+    return gimple_build_assign(floor_mod_r, MINUS_EXPR, q, extr_cond);
+  } else {
+    return NULL;
+  }
+}
+
 /* Detect a signed division by a constant that wouldn't be
    otherwise vectorized:
 
@@ -4894,6 +4976,8 @@ vect_recog_divmod_pattern (vec_info *vinfo,
     case TRUNC_DIV_EXPR:
     case EXACT_DIV_EXPR:
     case TRUNC_MOD_EXPR:
+    case FLOOR_MOD_EXPR:
+    case FLOOR_DIV_EXPR:
       break;
     default:
       return NULL;
@@ -4945,17 +5029,27 @@ vect_recog_divmod_pattern (vec_info *vinfo,
          gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
          gimple_call_set_lhs (div_stmt, var_div);
 
-         if (rhs_code == TRUNC_MOD_EXPR)
+         if (rhs_code == TRUNC_MOD_EXPR || rhs_code == FLOOR_MOD_EXPR
+        || rhs_code == FLOOR_DIV_EXPR)
            {
              append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
+        tree t1 = vect_recog_temp_ssa_var (itype, NULL);
              def_stmt
-               = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
-                                      LSHIFT_EXPR, var_div, shift);
+               = gimple_build_assign (t1, LSHIFT_EXPR, var_div, shift);
              append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
              pattern_stmt
                = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
-                                      MINUS_EXPR, oprnd0,
-                                      gimple_assign_lhs (def_stmt));
+                                      MINUS_EXPR, oprnd0, t1);
+        if (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR)
+        {
+          append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+          pattern_stmt = add_code_for_floor_divmod(vectype, vinfo, stmt_vinfo, 
+            rhs_code, var_div, t1, oprnd0, oprnd1,
+            itype);
+          if (pattern_stmt == NULL)
+            return NULL;
+        }
+        
            }
          else
            pattern_stmt = div_stmt;
@@ -4969,8 +5063,10 @@ vect_recog_divmod_pattern (vec_info *vinfo,
                                      build_int_cst (itype, 0));
       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
                              truth_type_for (vectype), itype);
+      tree div_result = NULL_TREE;
       if (rhs_code == TRUNC_DIV_EXPR
-         || rhs_code == EXACT_DIV_EXPR)
+         || rhs_code == EXACT_DIV_EXPR
+    || rhs_code == FLOOR_DIV_EXPR)
        {
          tree var = vect_recog_temp_ssa_var (itype, NULL);
          tree shift;
@@ -4987,12 +5083,18 @@ vect_recog_divmod_pattern (vec_info *vinfo,
          append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
 
          shift = build_int_cst (itype, tree_log2 (oprnd1));
+    div_result = vect_recog_temp_ssa_var (itype, NULL);
          pattern_stmt
-           = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
+           = gimple_build_assign (div_result,
                                   RSHIFT_EXPR, var, shift);
        }
-      else
+      if (rhs_code == TRUNC_MOD_EXPR
+      || rhs_code == FLOOR_MOD_EXPR
+      || rhs_code == FLOOR_DIV_EXPR)
        {
+    if (rhs_code == FLOOR_DIV_EXPR)
+      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+
          tree signmask;
          if (compare_tree_int (oprnd1, 2) == 0)
            {
@@ -5037,10 +5139,19 @@ vect_recog_divmod_pattern (vec_info *vinfo,
                                                build_int_cst (itype, 1)));
          append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
 
+    tree r = vect_recog_temp_ssa_var (itype, NULL);
          pattern_stmt
-           = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
-                                  MINUS_EXPR, gimple_assign_lhs (def_stmt),
+           = gimple_build_assign (r, MINUS_EXPR, gimple_assign_lhs (def_stmt),
                                   signmask);
+    if (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR) 
+    {
+      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+      pattern_stmt = add_code_for_floor_divmod(vectype, vinfo, stmt_vinfo, 
+        rhs_code, div_result, r, oprnd0, oprnd1,
+        itype);
+      if (pattern_stmt == NULL)
+        return NULL;
+    }
        }
 
       return pattern_stmt;
@@ -5347,13 +5458,15 @@ vect_recog_divmod_pattern (vec_info *vinfo,
        }
     }
 
-  if (rhs_code == TRUNC_MOD_EXPR)
+  if (rhs_code == TRUNC_MOD_EXPR 
+      || rhs_code == FLOOR_MOD_EXPR
+      || rhs_code == FLOOR_DIV_EXPR)
     {
       tree r, t1;
 
       /* We divided.  Now finish by:
-        t1 = q * oprnd1;
-        r = oprnd0 - t1;  */
+      t1 = q * oprnd1;
+      r = oprnd0 - t1;  */
       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
 
       t1 = vect_recog_temp_ssa_var (itype, NULL);
@@ -5362,6 +5475,15 @@ vect_recog_divmod_pattern (vec_info *vinfo,
 
       r = vect_recog_temp_ssa_var (itype, NULL);
       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
+
+      if (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR)
+      {
+        append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+        pattern_stmt = add_code_for_floor_divmod(vectype, vinfo, stmt_vinfo, 
rhs_code,
+          q, r, oprnd0, oprnd1, itype);
+        if (pattern_stmt == NULL)
+          return NULL;
+      }
     }
 
   /* Pattern detected.  */
-- 
2.47.3

Reply via email to