Hi,
When first introducing the cbranch_any/_all patterns I messed up the all
pattern. After giving it more thought, I removed the patterns entirely.
Our current early-break handling via autovec-opt.md is not ideal but
similar to what the patterns would give us, so no need for confusion.
The situation will improve anyway once the no-scalar-epilogue early-break
patches land.
While at it, I tried unifying the int and float comparison emitter
functions. The latter now also has "mask" capabilities.
Regtested on rv64gcv_zvl512b. Waiting for the CI.
Regards
Robin
gcc/ChangeLog:
* config/riscv/autovec.md (<cbranch_optab><mode>): Remove.
* config/riscv/riscv-protos.h (expand_vec_cmp_float): Add mask
and else arguments.
* config/riscv/riscv-v.cc (expand_vec_cmp): Add mask and else
arguments.
(expand_vec_cmp_float): Ditto.
* config/riscv/vector-iterators.md: Remove iterators.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/early-break-5.c: Remove redundant
comments.
---
gcc/config/riscv/autovec.md | 77 -------------------
gcc/config/riscv/riscv-protos.h | 3 +-
gcc/config/riscv/riscv-v.cc | 42 ++++++----
gcc/config/riscv/vector-iterators.md | 18 -----
.../riscv/rvv/autovec/early-break-5.c | 4 -
5 files changed, 31 insertions(+), 113 deletions(-)
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index fc5a31c9396..2c9dc00f763 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -3017,83 +3017,6 @@ (define_expand "cbranch<mode>4"
}
)
-;; Implement cond_len_vec_cbranch_any and cond_len_vec_cbranch_all
-;; Vector comparison with length and mask, then branch for integer types.
-(define_expand "<cbranch_optab><mode>"
- [(set (pc)
- (unspec:V_VLSI
- [(if_then_else
- (match_operator 0 "riscv_cbranch_comparison_operator"
- [(match_operand:<VM> 1 "register_operand")
- (match_operand:V_VLSI 2 "register_operand")
- (match_operand:V_VLSI 3 "nonmemory_operand")
- (match_operand 4 "autovec_length_operand")
- (match_operand 5 "const_0_operand")])
- (label_ref (match_operand 6 ""))
- (pc))]
- COND_LEN_CBRANCH_CMP))]
- "TARGET_VECTOR"
-{
- rtx_code code = GET_CODE (operands[0]);
- rtx mask = gen_reg_rtx (<VM>mode);
-
- /* Generate the masked comparison. */
- rtx maskoff = CONST0_RTX (<VM>mode);
- riscv_vector::expand_vec_cmp (mask, code, operands[2], operands[3],
- operands[1], maskoff);
-
- /* Use vcpop to count the number of active elements. */
- rtx count = gen_reg_rtx (Pmode);
- rtx cpop_ops[] = {count, mask};
- riscv_vector::emit_vlmax_insn (code_for_pred_popcount (<VM>mode, Pmode),
- riscv_vector::CPOP_OP, cpop_ops);
-
- /* Branch based on whether count is zero or non-zero. */
- riscv_expand_conditional_branch (operands[6], <cbranch_op>, count,
- const0_rtx);
- DONE;
-})
-
-;; Floating-point version with length and mask
-(define_expand "<cbranch_optab><mode>"
- [(set (pc)
- (unspec:V_VLSF
- [(if_then_else
- (match_operator 0 "riscv_cbranch_comparison_operator"
- [(match_operand:<VM> 1 "register_operand")
- (match_operand:V_VLSF 2 "register_operand")
- (match_operand:V_VLSF 3 "register_operand")
- (match_operand 4 "autovec_length_operand")
- (match_operand 5 "const_0_operand")])
- (label_ref (match_operand 6 ""))
- (pc))]
- COND_LEN_CBRANCH_CMP))]
- "TARGET_VECTOR"
-{
- rtx_code code = GET_CODE (operands[0]);
- rtx mask = gen_reg_rtx (<VM>mode);
-
- rtx tmp = gen_reg_rtx (<VM>mode);
- riscv_vector::expand_vec_cmp_float (tmp, code, operands[2], operands[3],
- false);
-
- /* Combine with the incoming mask using AND. */
- rtx ops[] = {mask, operands[1], tmp};
- riscv_vector::emit_vlmax_insn (code_for_pred (AND, <VM>mode),
- riscv_vector::BINARY_MASK_OP, ops);
-
- /* Use vcpop to count the number of active elements. */
- rtx count = gen_reg_rtx (Pmode);
- rtx cpop_ops[] = {count, mask};
- riscv_vector::emit_vlmax_insn (code_for_pred_popcount (<VM>mode, Pmode),
- riscv_vector::CPOP_OP, cpop_ops);
-
- /* Branch based on whether count is zero or non-zero. */
- riscv_expand_conditional_branch (operands[6], <cbranch_op>, count,
- const0_rtx);
- DONE;
-})
-
;; -------------------------------------------------------------------------
;; - vrol.vv vror.vv
;; -------------------------------------------------------------------------
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 8b362e323d9..40e9564e8a9 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -661,7 +661,8 @@ bool neg_simm5_p (rtx);
#ifdef RTX_CODE
bool has_vi_variant_p (rtx_code, rtx);
void expand_vec_cmp (rtx, rtx_code, rtx, rtx, rtx = nullptr, rtx = nullptr);
-bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
+bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool, rtx = nullptr,
+ rtx = nullptr);
void expand_cond_len_unop (unsigned, rtx *);
void expand_cond_len_binop (unsigned, rtx *);
void expand_reduction (unsigned, unsigned, unsigned, rtx *, rtx);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 291850bfb11..88bff235f06 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3111,7 +3111,7 @@ vectorize_related_mode (machine_mode vector_mode,
scalar_mode element_mode,
void
expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1, rtx mask,
- rtx maskoff)
+ rtx els)
{
machine_mode mask_mode = GET_MODE (target);
machine_mode data_mode = GET_MODE (op0);
@@ -3121,8 +3121,8 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx
op1, rtx mask,
{
rtx lt = gen_reg_rtx (mask_mode);
rtx gt = gen_reg_rtx (mask_mode);
- expand_vec_cmp (lt, LT, op0, op1, mask, maskoff);
- expand_vec_cmp (gt, GT, op0, op1, mask, maskoff);
+ expand_vec_cmp (lt, LT, op0, op1, mask, els);
+ expand_vec_cmp (gt, GT, op0, op1, mask, els);
icode = code_for_pred (IOR, mask_mode);
rtx ops[] = {target, lt, gt};
emit_vlmax_insn (icode, BINARY_MASK_OP, ops);
@@ -3130,14 +3130,14 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx
op1, rtx mask,
}
rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1);
- if (!mask && !maskoff)
+ if (!mask && !els)
{
rtx ops[] = {target, cmp, op0, op1};
emit_vlmax_insn (icode, COMPARE_OP, ops);
}
else
{
- rtx ops[] = {target, mask, maskoff, cmp, op0, op1};
+ rtx ops[] = {target, mask, els, cmp, op0, op1};
emit_vlmax_insn (icode, COMPARE_OP_MU, ops);
}
}
@@ -3145,14 +3145,18 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx
op1, rtx mask,
/* Expand an RVV floating-point comparison:
If CAN_INVERT_P is true, the caller can also handle inverted results;
- return true if the result is in fact inverted. */
+ return true if the result is in fact inverted.
+
+ If MASK is non-null, inactive lanes get the respective element from
+ ELS. */
bool
expand_vec_cmp_float (rtx target, rtx_code code, rtx op0, rtx op1,
- bool can_invert_p)
+ bool can_invert_p, rtx mask, rtx els)
{
machine_mode mask_mode = GET_MODE (target);
machine_mode data_mode = GET_MODE (op0);
+ gcc_assert (!mask || !can_invert_p);
/* If can_invert_p = true:
It suffices to implement a u>= b as !(a < b) but with the NaNs masked off:
@@ -3192,7 +3196,7 @@ expand_vec_cmp_float (rtx target, rtx_code code, rtx op0,
rtx op1,
case GE:
case LTGT:
/* There is native support for the comparison. */
- expand_vec_cmp (target, code, op0, op1);
+ expand_vec_cmp (target, code, op0, op1, mask, els);
return false;
case UNEQ:
case ORDERED:
@@ -3226,15 +3230,19 @@ expand_vec_cmp_float (rtx target, rtx_code code, rtx
op0, rtx op1,
if (code == ORDERED)
{
- emit_move_insn (target, eq0);
+ if (mask)
+ {
+ rtx ops[] = {target, eq0, mask};
+ emit_vlmax_insn (code_for_pred (AND, mask_mode), BINARY_MASK_OP, ops);
+ }
+ else
+ emit_move_insn (target, eq0);
return false;
}
/* There is native support for the inverse comparison. */
code = reverse_condition_maybe_unordered (code);
- if (code == ORDERED)
- emit_move_insn (target, eq0);
- else
+ if (code != ORDERED)
expand_vec_cmp (eq0, code, op0, op1, eq0, eq0);
if (can_invert_p)
@@ -3245,7 +3253,15 @@ expand_vec_cmp_float (rtx target, rtx_code code, rtx
op0, rtx op1,
/* We use one_cmpl<mode>2 to make Combine PASS to combine mask instructions
into: vmand.mm/vmnor.mm/vmnand.mm/vmxnor.mm. */
- emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mask_mode, eq0)));
+ rtx not_eq0 = gen_reg_rtx (mask_mode);
+ emit_insn (gen_rtx_SET (not_eq0, gen_rtx_NOT (mask_mode, eq0)));
+ if (mask)
+ {
+ rtx ops[] = {target, not_eq0, mask};
+ emit_vlmax_insn (code_for_pred (AND, mask_mode), BINARY_MASK_OP, ops);
+ }
+ else
+ emit_move_insn (target, not_eq0);
return false;
}
diff --git a/gcc/config/riscv/vector-iterators.md
b/gcc/config/riscv/vector-iterators.md
index b2383de8549..902f1648675 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -122,10 +122,6 @@ (define_c_enum "unspec" [
UNSPEC_SF_VFNRCLIP
UNSPEC_SF_VFNRCLIPU
UNSPEC_SF_CV
-
- ;; Vector conditional branch optabs
- UNSPEC_COND_LEN_CMP_ALL
- UNSPEC_COND_LEN_CMP_ANY
])
(define_c_enum "unspecv" [
@@ -6524,17 +6520,3 @@ (define_mode_attr NDS_QUAD_FIX [
(V32DI "V32HI") (V64DI "V64HI") (V128DI "V128HI") (V256DI "V256HI")
(V512DI "V512HI")
])
-
-;; Vector conditional branch iterators
-(define_int_iterator COND_LEN_CBRANCH_CMP
- [UNSPEC_COND_LEN_CMP_ALL UNSPEC_COND_LEN_CMP_ANY])
-
-(define_int_attr cbranch_op [
- (UNSPEC_COND_LEN_CMP_ALL "EQ")
- (UNSPEC_COND_LEN_CMP_ANY "NE")
-])
-
-(define_int_attr cbranch_optab [
- (UNSPEC_COND_LEN_CMP_ALL "cond_len_vec_cbranch_all")
- (UNSPEC_COND_LEN_CMP_ANY "cond_len_vec_cbranch_any")
-])
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-5.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-5.c
index 1547914d8b9..e7199fb0a93 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-5.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/early-break-5.c
@@ -31,7 +31,6 @@ DEFINE_TEST_FUNC(f4, !=)
DEFINE_TEST_FUNC(f5, <)
DEFINE_TEST_FUNC(f6, <=)
-/* Array setup macro. */
#define RESET_ARRAYS(_aval, _idx, _force, _bval) \
do { \
_Pragma("GCC novector") \
@@ -43,7 +42,6 @@ DEFINE_TEST_FUNC(f6, <=)
a[_idx] = _force; \
} while (0)
-/* Value check macros. */
#define CHECK_EQ(_i, _val) \
do { \
if (b[_i] != _val) \
@@ -68,7 +66,6 @@ DEFINE_TEST_FUNC(f6, <=)
} while (0)
int main(void) {
- /* Break on random intervals. */
TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10));
TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10));
TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0));
@@ -76,7 +73,6 @@ int main(void) {
TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5));
TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7));
- /* Break on last iteration. */
TEST_FUNC (f1, 0, N-1, 1, 1,
CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2));
--
2.53.0