Hi,
Following is version 2 of the patch proposed for master aiming to fix
PR104116. This has been bootstrapped and regtested on powerpc64le with
regression failures.
Kindly review.
Just had one question.
If I have to implement similar thing for {CEIL, ROUND}_{MOD,DIV} can I
create a separate patch, or should I follow it up with this patch like
[PATCH v2 1/3] and so on.
Thanks and regards,
Avinash Jayakar
Changes from v1:
- Added new tests for checking vectorization of FLOOR_{DIV.MOD}
for multiple paths.
- Incorporated review comments to use proper vector masks and
checks for if the target supports generated code.
vect: Add vectorization logic for FLOOR_{MOD,DIV}[PR104116]
Add logic in tree-vectorizer for FLOOR_MOD_EXPR and FLOOR_DIV_EXPR. As
mentioned in PR104116 the logic for
FLOOR_MOD_EXPR:
r = x %[fl] y; is
r = x % y; if (r && (x ^ y) < 0) r += y;
FLOOR_DIV_EXPR:
d = x /[fl] y; is
r = x % y; d = x / y; if (r && (x ^ y) < 0) --d;
Added a new helper function "add_code_for_floor_divmod" in
tree-vect-patterns.cc for adding compensating code for floor mod and
floor div. This function checks if target supports all required
operations required to implement floor_{div,mod} and generates
vectorized code for the respective operations. A pseudocode of generated
code is given below:
v0 = x^y
v1 = -r
v2 = r | -r (if r!=0, then v2 < 0)
v3 = v0 & v2
v4 = v3 < 0 (equivalent to (r && (x ^ y) < 0))
if floor_mod
v5 = v4 ? y : 0
v6 = r + v5 (final result)
else if floor_div
v5 = v4 ? 1 : 0
v6 = d - 1 (final result)
Added tests to check vectorization in all paths
1. If operand1 == 2
2. If operand1 == power of 2
3. If operand1 != power of 2
2025-09-18 Avinash Jayakar <[email protected]>
gcc/ChangeLog:
PR vect/104116
* tree-vect-patterns.cc (add_code_for_floor_divmod): Helper to
generate vect code for floor_divmod.
(vect_recog_divmod_pattern): Added patterns for
floor_{div,mod}_expr.
gcc/testsuite/ChangeLog:
PR vect/104116
* gcc.dg/vect/pr104116-floor-divmod.c: New test.
---
.../gcc.dg/vect/pr104116-floor-divmod.c | 58 +++++++
gcc/tree-vect-patterns.cc | 148 ++++++++++++++++--
2 files changed, 193 insertions(+), 13 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/pr104116-floor-divmod.c
diff --git a/gcc/testsuite/gcc.dg/vect/pr104116-floor-divmod.c
b/gcc/testsuite/gcc.dg/vect/pr104116-floor-divmod.c
new file mode 100644
index 00000000000..507c1d1e33a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104116-floor-divmod.c
@@ -0,0 +1,58 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fgimple -fdump-tree-optimized -mcpu=power8" } */
+/* { dg-require-effective-target vect_int} */
+/* { dg-require-effective-target vect_condition} */
+/* { dg-require-effective-target vect_shift} */
+
+#define TEST_FN(OP, CONST, NAME) void __GIMPLE (ssa,guessed_local(10737416)) \
+NAME (int * a) \
+{ \
+ int i; \
+ long unsigned int _1; \
+ long unsigned int _2; \
+ int * _3; \
+ int _4; \
+ int _5; \
+ unsigned int _12; \
+ unsigned int _13; \
+ \
+ __BB(2,guessed_local(10737416)): \
+ goto __BB3(precise(134217728)); \
+ \
+ __BB(3,loop_header(1),guessed_local(1063004408)): \
+ i_14 = __PHI (__BB5: i_11, __BB2: 0); \
+ _13 = __PHI (__BB5: _12, __BB2: 512u); \
+ _1 = (long unsigned int) i_14; \
+ _2 = _1 * 4ul; \
+ _3 = a_9(D) + _2; \
+ _4 = __MEM <int> (_3); \
+ _5 = _4 OP CONST; \
+ __MEM <int> (_3) = _5; \
+ i_11 = i_14 + 2; \
+ _12 = _13 - 1u; \
+ if (_12 != 0u) \
+ goto __BB5(guessed(132861994)); \
+ else \
+ goto __BB4(guessed(1355734)); \
+ \
+ __BB(5,guessed_local(1052266995)): \
+ goto __BB3(precise(134217728)); \
+ \
+ __BB(4,guessed_local(10737416)): \
+ return; \
+} \
+
+TEST_FN(%, 2, trunc_mod_2)
+TEST_FN(__FLOOR_MOD, 2, floor_mod_2)
+TEST_FN(__FLOOR_DIV, 2, floor_div_2)
+
+TEST_FN(%, 4, trunc_mod_pow2)
+TEST_FN(__FLOOR_MOD, 4, floor_mod_pow2)
+TEST_FN(__FLOOR_DIV, 4, floor_div_pow2)
+
+TEST_FN(%, 5, trunc_mod)
+TEST_FN(__FLOOR_MOD, 5, floor_mod)
+TEST_FN(__FLOOR_DIV, 5, floor_div)
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 9 "vect" } }
*/
+
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 70bf768d339..72b8b2768ef 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -4833,6 +4833,88 @@ vect_recog_sat_trunc_pattern (vec_info *vinfo,
stmt_vec_info stmt_vinfo,
return NULL;
}
+gimple *
+add_code_for_floor_divmod(tree vectype, vec_info* vinfo,
+ stmt_vec_info stmt_vinfo, enum tree_code rhs_code,
+ tree q, tree r, tree oprnd0, tree oprnd1, tree itype)
+{
+ gimple *def_stmt;
+ tree mask_vectype = truth_type_for (vectype);
+ if (!mask_vectype)
+ return NULL;
+ if (!target_has_vecop_for_code(NEGATE_EXPR, vectype)
+ || !target_has_vecop_for_code(BIT_XOR_EXPR, vectype)
+ || !target_has_vecop_for_code(BIT_IOR_EXPR, vectype)
+ || !target_has_vecop_for_code(PLUS_EXPR, vectype)
+ || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
+ || !expand_vec_cond_expr_p (vectype, mask_vectype)
+ )
+ return NULL;
+
+
+ // r = x %[fl] y;
+ // is
+ // r = x % y; if (r && (x ^ y) < 0) r += y;
+ // Produce following sequence
+ // v0 = x^y
+ // v1 = -r
+ // v2 = r | -r
+ // v3 = v0 & v2
+ // v4 = v3 < 0 (equivalent to (r && (x ^ y) < 0))
+ // v5 = v4 ? y : 0
+ // v6 = r + v5 (final result)
+ tree cond_reg = vect_recog_temp_ssa_var(itype, NULL);
+ def_stmt = gimple_build_assign(cond_reg, BIT_XOR_EXPR, oprnd0, oprnd1);
+ append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+ // -r
+ tree negate_r = vect_recog_temp_ssa_var(itype, NULL);
+ def_stmt = gimple_build_assign(negate_r, NEGATE_EXPR, r);
+ append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+ // r | -r , sign bit is set if r!=0
+ tree r_or_negr = vect_recog_temp_ssa_var(itype, NULL);
+ def_stmt = gimple_build_assign(r_or_negr, BIT_IOR_EXPR, r, negate_r);
+ append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+ // (x^y) & (r|-r)
+ tree r_or_negr_and_xor = vect_recog_temp_ssa_var(itype, NULL);
+ def_stmt = gimple_build_assign(r_or_negr_and_xor, BIT_AND_EXPR, r_or_negr,
+ cond_reg);
+ append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+ // (x^y) & (r|-r) < 0 which is equivalent to (x^y < 0 && r!=0)
+ tree bool_cond = vect_recog_temp_ssa_var(boolean_type_node,NULL);
+ def_stmt = gimple_build_assign(bool_cond, LT_EXPR, r_or_negr_and_xor,
+ build_int_cst(itype, 0));
+ append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
+ mask_vectype, itype);
+
+ if (rhs_code == FLOOR_MOD_EXPR) {
+ // (x^y < 0 && r) ? y : 0
+ tree extr_cond = vect_recog_temp_ssa_var(itype, NULL);
+ def_stmt = gimple_build_assign(extr_cond, COND_EXPR, bool_cond, oprnd1,
+ build_int_cst(itype, 0));
+ append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+ // r += (x ^ y < 0 && r) ? y : 0
+ tree floor_mod_r = vect_recog_temp_ssa_var(itype, NULL);
+ return gimple_build_assign(floor_mod_r, PLUS_EXPR, r, extr_cond);
+ } else if (rhs_code == FLOOR_DIV_EXPR) {
+ // (x^y < 0 && r) ? 1 : 0
+ tree extr_cond = vect_recog_temp_ssa_var(itype, NULL);
+ def_stmt = gimple_build_assign(extr_cond, COND_EXPR, bool_cond,
build_int_cst(itype, 1),
+ build_int_cst(itype, 0));
+ append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+
+ // q -= (x ^ y < 0 && r) ? 1 : 0
+ tree floor_mod_r = vect_recog_temp_ssa_var(itype, NULL);
+ return gimple_build_assign(floor_mod_r, MINUS_EXPR, q, extr_cond);
+ } else {
+ return NULL;
+ }
+}
+
/* Detect a signed division by a constant that wouldn't be
otherwise vectorized:
@@ -4894,6 +4976,8 @@ vect_recog_divmod_pattern (vec_info *vinfo,
case TRUNC_DIV_EXPR:
case EXACT_DIV_EXPR:
case TRUNC_MOD_EXPR:
+ case FLOOR_MOD_EXPR:
+ case FLOOR_DIV_EXPR:
break;
default:
return NULL;
@@ -4945,17 +5029,27 @@ vect_recog_divmod_pattern (vec_info *vinfo,
gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
gimple_call_set_lhs (div_stmt, var_div);
- if (rhs_code == TRUNC_MOD_EXPR)
+ if (rhs_code == TRUNC_MOD_EXPR || rhs_code == FLOOR_MOD_EXPR
+ || rhs_code == FLOOR_DIV_EXPR)
{
append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
+ tree t1 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
- = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
- LSHIFT_EXPR, var_div, shift);
+ = gimple_build_assign (t1, LSHIFT_EXPR, var_div, shift);
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
- MINUS_EXPR, oprnd0,
- gimple_assign_lhs (def_stmt));
+ MINUS_EXPR, oprnd0, t1);
+ if (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR)
+ {
+ append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+ pattern_stmt = add_code_for_floor_divmod(vectype, vinfo, stmt_vinfo,
+ rhs_code, var_div, t1, oprnd0, oprnd1,
+ itype);
+ if (pattern_stmt == NULL)
+ return NULL;
+ }
+
}
else
pattern_stmt = div_stmt;
@@ -4969,8 +5063,10 @@ vect_recog_divmod_pattern (vec_info *vinfo,
build_int_cst (itype, 0));
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
truth_type_for (vectype), itype);
+ tree div_result = NULL_TREE;
if (rhs_code == TRUNC_DIV_EXPR
- || rhs_code == EXACT_DIV_EXPR)
+ || rhs_code == EXACT_DIV_EXPR
+ || rhs_code == FLOOR_DIV_EXPR)
{
tree var = vect_recog_temp_ssa_var (itype, NULL);
tree shift;
@@ -4987,12 +5083,18 @@ vect_recog_divmod_pattern (vec_info *vinfo,
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
shift = build_int_cst (itype, tree_log2 (oprnd1));
+ div_result = vect_recog_temp_ssa_var (itype, NULL);
pattern_stmt
- = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
+ = gimple_build_assign (div_result,
RSHIFT_EXPR, var, shift);
}
- else
+ if (rhs_code == TRUNC_MOD_EXPR
+ || rhs_code == FLOOR_MOD_EXPR
+ || rhs_code == FLOOR_DIV_EXPR)
{
+ if (rhs_code == FLOOR_DIV_EXPR)
+ append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+
tree signmask;
if (compare_tree_int (oprnd1, 2) == 0)
{
@@ -5037,10 +5139,19 @@ vect_recog_divmod_pattern (vec_info *vinfo,
build_int_cst (itype, 1)));
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
+ tree r = vect_recog_temp_ssa_var (itype, NULL);
pattern_stmt
- = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
- MINUS_EXPR, gimple_assign_lhs (def_stmt),
+ = gimple_build_assign (r, MINUS_EXPR, gimple_assign_lhs (def_stmt),
signmask);
+ if (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR)
+ {
+ append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+ pattern_stmt = add_code_for_floor_divmod(vectype, vinfo, stmt_vinfo,
+ rhs_code, div_result, r, oprnd0, oprnd1,
+ itype);
+ if (pattern_stmt == NULL)
+ return NULL;
+ }
}
return pattern_stmt;
@@ -5347,13 +5458,15 @@ vect_recog_divmod_pattern (vec_info *vinfo,
}
}
- if (rhs_code == TRUNC_MOD_EXPR)
+ if (rhs_code == TRUNC_MOD_EXPR
+ || rhs_code == FLOOR_MOD_EXPR
+ || rhs_code == FLOOR_DIV_EXPR)
{
tree r, t1;
/* We divided. Now finish by:
- t1 = q * oprnd1;
- r = oprnd0 - t1; */
+ t1 = q * oprnd1;
+ r = oprnd0 - t1; */
append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
t1 = vect_recog_temp_ssa_var (itype, NULL);
@@ -5362,6 +5475,15 @@ vect_recog_divmod_pattern (vec_info *vinfo,
r = vect_recog_temp_ssa_var (itype, NULL);
pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
+
+ if (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR)
+ {
+ append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+ pattern_stmt = add_code_for_floor_divmod(vectype, vinfo, stmt_vinfo,
rhs_code,
+ q, r, oprnd0, oprnd1, itype);
+ if (pattern_stmt == NULL)
+ return NULL;
+ }
}
/* Pattern detected. */
--
2.47.3