From: Pan Li <[email protected]>
This patch would like to combine the vec_duplicate + vmsltu.wv to the
vmsltu.vx. From example as below code. The related pattern will depend
on the cost of vec_duplicate from GR2VR. Then the late-combine will
take action if the cost of GR2VR is zero, and reject the combination
if the GR2VR cost is greater than zero.
Assume we have asm code like below, GR2VR cost is 0.
Before this patch:
11 beq a3,zero,.L8
12 vsetvli a5,zero,e32,m1,ta,ma
13 vmv.v.x v2,a2
...
16 .L3:
17 vsetvli a5,a3,e32,m1,ta,ma
...
22 vmsltu.wv v1,v2,v3
...
25 bne a3,zero,.L3
After this patch:
11 beq a3,zero,.L8
...
14 .L3:
15 vsetvli a5,a3,e32,m1,ta,ma
...
20 vmsltu.wx v1,a2,v3
...
23 bne a3,zero,.L3
gcc/ChangeLog:
* config/riscv/autovec-opt.md (*pred_cmp_swapped<mode>_scalar):
Add new pattern to match vec_dup > vec for vmsltu.
* config/riscv/predicates.md (comparison_swappable_operator):
Add new iterator for above pattern
* config/riscv/riscv-protos.h (expand_vx_cmp_vec_dup_vec): Add
new func to emit vmsltu.vx.
* config/riscv/riscv-v.cc (get_swapped_cmp_rtx_code): Add new
func to convert cmp code to swapped, like gtu to ltu.
(expand_vx_cmp_vec_dup_vec): Add new func decl.
Signed-off-by: Pan Li <[email protected]>
---
gcc/config/riscv/autovec-opt.md | 30 ++++++++++++++++++++++++++++++
gcc/config/riscv/predicates.md | 3 +++
gcc/config/riscv/riscv-protos.h | 1 +
gcc/config/riscv/riscv-v.cc | 32 ++++++++++++++++++++++++++++++++
4 files changed, 66 insertions(+)
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 52ab79c555a..40627fac91c 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1976,6 +1976,36 @@ (define_insn_and_split "*widen_mul_plus_vx_<mode>"
}
[(set_attr "type" "viwmuladd")])
+(define_insn_and_split "*pred_cmp_swapped<mode>_scalar"
+ [(set (match_operand:<VM> 0 "register_operand")
+ (if_then_else:<VM>
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand")
+ (match_operand 6 "vector_length_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operator:<VM> 3 "comparison_swappable_operator"
+ [(vec_duplicate:V_VLSI
+ (match_operand:<VEL> 4 "register_operand"))
+ (match_operand:V_VLSI 5 "register_operand")])
+ (unspec:<VM>
+ [(match_operand:DI 2 "register_operand")] UNSPEC_VUNDEF)))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::expand_vx_cmp_vec_dup_vec (operands[0], operands[4],
+ operands[5],
+ GET_CODE (operands[3]),
+ <MODE>mode);
+
+ DONE;
+ }
+ [(set_attr "type" "vicmp")])
+
;;
=============================================================================
;; Combine vec_duplicate + op.vv to op.vf
;; Include
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index f811a4e40ca..3cc954e10cc 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -611,6 +611,9 @@ (define_predicate "comparison_except_ltge_operator"
(define_predicate "comparison_except_ge_operator"
(match_code "eq,ne,le,leu,gt,gtu,lt,ltu"))
+(define_predicate "comparison_swappable_operator"
+ (match_code "gtu"))
+
(define_predicate "ge_operator"
(match_code "ge,geu"))
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 570acb14f58..a57e39d11b9 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -690,6 +690,7 @@ void expand_vx_binary_vec_dup_vec (rtx, rtx, rtx, rtx_code,
machine_mode);
void expand_vx_binary_vec_vec_dup (rtx, rtx, rtx, rtx_code, machine_mode);
void expand_vx_binary_vxrm_vec_vec_dup (rtx, rtx, rtx, int, int, machine_mode);
void expand_vx_binary_vxrm_vec_dup_vec (rtx, rtx, rtx, int, int, machine_mode);
+void expand_vx_cmp_vec_dup_vec (rtx, rtx, rtx, rtx_code, machine_mode);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
bool, void (*)(rtx *, rtx), enum avl_type);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 658c6327cc5..850a82ad276 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -5861,6 +5861,38 @@ expand_vx_binary_vec_vec_dup (rtx op_0, rtx op_1, rtx
op_2,
emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops);
}
+static rtx_code
+get_swapped_cmp_rtx_code (rtx_code code)
+{
+ switch (code)
+ {
+ case GTU:
+ return LTU;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Expand the binary vx combine with the format like v2 = vec_dup(x) > v1.
+ Aka the first op comes from the vec_duplicate, and the second op is the
vector
+ reg. Unfortunately, the RVV vms* only form like v2 = v1 < vec_dup(x), so
+ we need to swap the op_1 and op_2, then emit the swapped(from gtu to ltu)
+ insn instead. */
+
+void
+expand_vx_cmp_vec_dup_vec (rtx op_0, rtx op_1, rtx op_2, rtx_code code,
+ machine_mode mode)
+{
+ machine_mode mask_mode = get_mask_mode (mode);
+ rtx_code swapped_code = get_swapped_cmp_rtx_code (code);
+
+ insn_code icode = code_for_pred_cmp_scalar (mode);
+ rtx cmp = gen_rtx_fmt_ee (swapped_code, mask_mode, op_2, op_1);
+ rtx ops[] = {op_0, cmp, op_2, op_1};
+
+ emit_vlmax_insn (icode, COMPARE_OP, ops);
+}
+
/* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as
well. */
void
--
2.43.0