From: Zhongyao Chen <[email protected]>
The vectorizer's compute_estimated_lmul function could previously
return a bad value when the estimated lmul was larger than RVV_M8.
This is corrected to return RVV_M8, preventing a register spill.
The patch includes a new regression test for PR target/121910, based
on the x264 mc_chroma function. The test uses scan-tree-dump to
confirm that the compiler chooses the expected vector mode (RVVM1QI)
at -O3, verifying the fix.
gcc/ChangeLog:
* config/riscv/riscv-vector-costs.cc (compute_estimated_lmul):
Return RVV_M8 when estimated lmul is too large.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/pr121910.c: New file.
Signed-off-by: Zhongyao Chen <[email protected]>
---
gcc/config/riscv/riscv-vector-costs.cc | 2 +-
.../gcc.target/riscv/rvv/autovec/pr121910.c | 34 +++++++++++++++++++
2 files changed, 35 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121910.c
diff --git a/gcc/config/riscv/riscv-vector-costs.cc
b/gcc/config/riscv/riscv-vector-costs.cc
index 5e6cb6714..27ced61e8 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -632,7 +632,7 @@ compute_estimated_lmul (loop_vec_info loop_vinfo,
machine_mode mode)
int estimated_lmul = estimated_vf * GET_MODE_BITSIZE (mode).to_constant
()
/ TARGET_MIN_VLEN;
if (estimated_lmul > RVV_M8)
- return regno_alignment;
+ return RVV_M8;
else
return estimated_lmul;
}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121910.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121910.c
new file mode 100644
index 000000000..5faeeefa4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121910.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -ftree-vectorize -mabi=lp64d -march=rv64gcv
-mrvv-max-lmul=dynamic -fdump-tree-vect-all" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-O2" "-Os" "-Og" "-Oz" } } */
+
+#include <stdint.h>
+
+/* full chroma mc (ie until 1/8 pixel)*/
+void mc_chroma(uint8_t* dst, int i_dst_stride, uint8_t* src, int i_src_stride,
+ int mvx, int mvy, int i_width, int i_height) {
+ uint8_t* srcp;
+
+ int d8x = mvx & 0x07;
+ int d8y = mvy & 0x07;
+ int cA = (8 - d8x) * (8 - d8y);
+ int cB = d8x * (8 - d8y);
+ int cC = (8 - d8x) * d8y;
+ int cD = d8x * d8y;
+
+ src += (mvy >> 3) * i_src_stride + (mvx >> 3);
+ srcp = &src[i_src_stride];
+
+ for (int y = 0; y < i_height; y++) {
+ for (int x = 0; x < i_width; x++)
+ dst[x] = (cA * src[x] + cB * src[x + 1] + cC * srcp[x] +
+ cD * srcp[x + 1] + 32) >>
+ 6;
+ dst += i_dst_stride;
+ src = srcp;
+ srcp += i_src_stride;
+ }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "Choosing vector mode RVVM1QI" "vect" } } */
\ No newline at end of file
--
2.43.0