Re: [PATCH] RISC-V: Allow simplification non-vlmax with len = NUNITS reg to reg move

2024-01-05 Thread Robin Dapp
> +/* Return true it is whole register-register move.  */
> +bool
> +whole_reg_to_reg_move_p (rtx *ops, machine_mode mode)
> +{
> +  if (register_operand (ops[0], mode)
> +  && register_operand (ops[3], mode)
> +  && satisfies_constraint_vu (ops[2])
> +  && satisfies_constraint_Wc1 (ops[1]))
> +{
> +  int vlmax_index = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL ? 5 : 7;
> +  if (INTVAL (ops[vlmax_index]) == VLMAX)
> + return true;

Is that indent correct?  Looks odd on my screen but I didn't verify.

> +  /* AVL propagation PASS will transform FIXED-VLMAX with NUNITS < 32
> +  into NON-VLMAX with LEN = NUNITS.  */
> +  else if (CONST_INT_P (ops[4])
> +&& known_eq (INTVAL (ops[4]), GET_MODE_NUNITS (mode)))
> + return true;
> +}
> +  return false;
> +}

I would prefer having the vlmax_index as a parameter.  Even though
it's clear that a mask set operation has two operands less I don't
find it particularly intuitive to check that in the function.

Also explain both cases in the function-level comment and mention
the preconditions for calling the function.  Something like:
 "An operation is a whole-register move if either
   (1) Its vlmax operand equals VLMAX
   (2) Its vl operand equals the number of units of its mode."

Maybe some more asserts or checks wouldn't hurt either so the function
can't accidentally be called on other operations than vlde/vste/vimov.

Regards
 Robin



[PATCH] RISC-V: Allow simplification non-vlmax with len = NUNITS reg to reg move

2024-01-04 Thread Juzhe-Zhong
While working on fixing a bug, I notice this following code has redundant move:

#include "riscv_vector.h"
void
f (float x, float y, void *out)
{
  float f[4] = { x, x, x, y };
  vfloat32m1_t v = __riscv_vle32_v_f32m1 (f, 4);
  __riscv_vse32_v_f32m1 (out, v, 4);
}

Before this patch:

f:
vsetivlizero,4,e32,m1,ta,ma
addisp,sp,-16
vfmv.v.fv1,fa0
vfslide1down.vf v1,v1,fa1
vmv.v.v v1,v1   > redundant move.
vse32.v v1,0(a0)
addisp,sp,16
jr  ra

The rootcause is that the complicate vmv.v.v pattern doesn't simplify it
into simple (set (reg) (reg)) reg-to-reg move pattern.

Currently, we support such simplification for VLMAX.

However, the case I found is non-VLMAX but with LEN = NUNITS which should be
considered as equivalent to VLMAX.

Add a simple fix for such situation.

Tested on both RV32/RV64 no regressions. Ok for trunk ?

gcc/ChangeLog:

* config/riscv/riscv-protos.h (whole_reg_to_reg_move_p): New function.
* config/riscv/riscv-v.cc (whole_reg_to_reg_move_p): Ditto.
* config/riscv/vector.md: Allow non-vlmax with len = NUNITS 
simplification.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/vf_avl-4.c: New test.

---
 gcc/config/riscv/riscv-protos.h   |  1 +
 gcc/config/riscv/riscv-v.cc   | 21 +++
 gcc/config/riscv/vector.md|  9 ++--
 .../gcc.target/riscv/rvv/base/vf_avl-4.c  | 13 
 4 files changed, 37 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-4.c

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 0f0337cfb38..064e8f443f3 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -687,6 +687,7 @@ bool imm_avl_p (machine_mode);
 bool can_be_broadcasted_p (rtx);
 bool gather_scatter_valid_offset_p (machine_mode);
 HOST_WIDE_INT estimated_poly_value (poly_int64, unsigned int);
+bool whole_reg_to_reg_move_p (rtx *, machine_mode);
 }
 
 /* We classify builtin types into two classes:
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index b7727b2b3e6..e5ba28d9078 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -5122,4 +5122,25 @@ estimated_poly_value (poly_int64 val, unsigned int kind)
   return val.coeffs[0] + val.coeffs[1] * over_min_vlen / TARGET_MIN_VLEN;
 }
 
+/* Return true it is whole register-register move.  */
+bool
+whole_reg_to_reg_move_p (rtx *ops, machine_mode mode)
+{
+  if (register_operand (ops[0], mode)
+  && register_operand (ops[3], mode)
+  && satisfies_constraint_vu (ops[2])
+  && satisfies_constraint_Wc1 (ops[1]))
+{
+  int vlmax_index = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL ? 5 : 7;
+  if (INTVAL (ops[vlmax_index]) == VLMAX)
+   return true;
+  /* AVL propagation PASS will transform FIXED-VLMAX with NUNITS < 32
+into NON-VLMAX with LEN = NUNITS.  */
+  else if (CONST_INT_P (ops[4])
+  && known_eq (INTVAL (ops[4]), GET_MODE_NUNITS (mode)))
+   return true;
+}
+  return false;
+}
+
 } // namespace riscv_vector
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 3d2c1c3ce8f..abd293f310c 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1724,10 +1724,7 @@
vse.v\t%3,%0%p1
vmv.v.v\t%0,%3
vmv.v.v\t%0,%3"
-  "&& register_operand (operands[0], mode)
-   && register_operand (operands[3], mode)
-   && satisfies_constraint_vu (operands[2])
-   && INTVAL (operands[7]) == riscv_vector::VLMAX"
+  "&& riscv_vector::whole_reg_to_reg_move_p (operands, mode)"
   [(set (match_dup 0) (match_dup 3))]
   ""
   [(set_attr "type" "vlde,vlde,vlde,vste,vimov,vimov")
@@ -1776,9 +1773,7 @@
vmmv.m\t%0,%3
vmclr.m\t%0
vmset.m\t%0"
-  "&& register_operand (operands[0], mode)
-   && register_operand (operands[3], mode)
-   && INTVAL (operands[5]) == riscv_vector::VLMAX"
+  "&& riscv_vector::whole_reg_to_reg_move_p (operands, mode)"
   [(set (match_dup 0) (match_dup 3))]
   ""
   [(set_attr "type" "vldm,vstm,vmalu,vmalu,vmalu")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-4.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-4.c
new file mode 100644
index 000..1b4bfd96481
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d --param 
riscv-autovec-preference=fixed-vlmax" } */
+
+#include "riscv_vector.h"
+void
+f (float x, float y, void *out)
+{
+  float f[4] = { x, x, x, y };
+  vfloat32m1_t v = __riscv_vle32_v_f32m1 (f, 4);
+  __riscv_vse32_v_f32m1 (out, v, 4);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
-- 
2.36.3