https://gcc.gnu.org/g:49a556bfe79b0427e56fe2205f56b53fee5e80ec
commit 49a556bfe79b0427e56fe2205f56b53fee5e80ec Author: Pan Li <[email protected]> Date: Wed Oct 15 22:16:11 2025 +0800 RISC-V: Combine vsext.vf2 and vsll.vi to vwsll.vi on ZVBB The vwsll.vi of zvbb ext take zero extend before ashift. But we can still do some combine based on sign extend if and only if the shift is imm and the sign extend bits are all shifted. For example as below vsetvli zero, zero, e32, m1, ta, ma vsext.vf2 v1, v2 vsll.vi v1, v1, 16 If the ashift bits is greater than or equals to truncated bitsize, (aka 16 for e32), the sign or zero extend bits will be ashifted and never pollute the final result. Then we have vsetvli zero, zero, e32, m1, ta, ma vwsll.vi v1, v2, 16 PR target.121959 The below test suites are passed for this patch series. * The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/autovec-opt.md (*vwsll_sign_extend_<mode>): Add pattern to combine vsext.vf2 and vslli.vi to vwsll.vi. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr121959-1.c: New test. * gcc.target/riscv/rvv/autovec/pr121959-2.c: New test. * gcc.target/riscv/rvv/autovec/pr121959-3.c: New test. * gcc.target/riscv/rvv/autovec/pr121959-4.c: New test. * gcc.target/riscv/rvv/autovec/pr121959-5.c: New test. * gcc.target/riscv/rvv/autovec/pr121959-run-1.c: New test. * gcc.target/riscv/rvv/autovec/pr121959.h: New test. Signed-off-by: Pan Li <[email protected]> (cherry picked from commit dd305514bbca46a39d020018e1bef0cfa15c99c8) Diff: --- gcc/config/riscv/autovec-opt.md | 41 ++++++++++++++ .../gcc.target/riscv/rvv/autovec/pr121959-1.c | 9 +++ .../gcc.target/riscv/rvv/autovec/pr121959-2.c | 9 +++ .../gcc.target/riscv/rvv/autovec/pr121959-3.c | 9 +++ .../gcc.target/riscv/rvv/autovec/pr121959-4.c | 9 +++ .../gcc.target/riscv/rvv/autovec/pr121959-5.c | 9 +++ .../gcc.target/riscv/rvv/autovec/pr121959-run-1.c | 65 ++++++++++++++++++++++ .../gcc.target/riscv/rvv/autovec/pr121959.h | 24 ++++++++ 8 files changed, 175 insertions(+) diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index 063c9a0122b6..52ab79c555a6 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -2424,3 +2424,44 @@ } [(set_attr "type" "vfalu")] ) + +;; Combine vsext.vf + vsll.vi to vwsll.vi depends on ZVBB. +;; The vwsll.vi is zero extend, thus only the ashift bits +;; is equal or greater than double truncated bits is valid. +;; Appears in the satd function of x264. +(define_insn_and_split "*vwsll_sign_extend_<mode>" + [(set (match_operand:VWEXTI 0 "register_operand") + (ashift:VWEXTI + (sign_extend:VWEXTI + (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand")) + (match_operand 2 "const_int_operand")))] + "TARGET_VECTOR && TARGET_ZVBB && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + int imm = INTVAL (operands[2]); + int trunc_prec = GET_MODE_PRECISION (GET_MODE_INNER (<V_DOUBLE_TRUNC>mode)); + + if (imm >= trunc_prec) + { + insn_code icode = code_for_pred_vwsll_scalar (<MODE>mode); + emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands); + } + else + { + insn_code icode = code_for_pred_vf2 (SIGN_EXTEND, <MODE>mode); + rtx extend = gen_reg_rtx (<MODE>mode); + rtx unary_ops[] = {extend, operands[1]}; + riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP, + unary_ops); + + icode = code_for_pred_scalar (ASHIFT, <MODE>mode); + rtx binary_ops[] = {operands[0], extend, operands[2]}; + riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, + binary_ops); + } + + DONE; + } +) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c new file mode 100644 index 000000000000..a42d7c4de609 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */ + +#include "pr121959.h" + +DEF_VWSLL_FUNC_0(int32_t, uint8_t, 16) + +/* { dg-final { scan-assembler-times {vwsll.vi} 1 } } */ +/* { dg-final { scan-assembler-not {vsll.vi} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c new file mode 100644 index 000000000000..2a3ef8d26179 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "pr121959.h" + +DEF_VWSLL_FUNC_0(int32_t, uint8_t, 16) + +/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */ +/* { dg-final { scan-assembler-not {vwsll.vi} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c new file mode 100644 index 000000000000..59a930a1efa9 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */ + +#include "pr121959.h" + +DEF_VWSLL_FUNC_0(int32_t, uint8_t, 17) + +/* { dg-final { scan-assembler-times {vwsll.vi} 1 } } */ +/* { dg-final { scan-assembler-not {vsll.vi} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c new file mode 100644 index 000000000000..59a6d365af41 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "pr121959.h" + +DEF_VWSLL_FUNC_0(int32_t, uint8_t, 17) + +/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */ +/* { dg-final { scan-assembler-not {vwsll.vi} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c new file mode 100644 index 000000000000..a9319a3a9599 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */ + +#include "pr121959.h" + +DEF_VWSLL_FUNC_0(int32_t, uint8_t, 15) + +/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */ +/* { dg-final { scan-assembler-not {vwsll.vi} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c new file mode 100644 index 000000000000..77fd95b8ebb1 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c @@ -0,0 +1,65 @@ +/* { dg-do run } */ +/* { dg-require-effective-target "riscv_zvbb_ok" } */ +/* { dg-add-options "riscv_v" } */ +/* { dg-add-options "riscv_zvbb" } */ +/* { dg-additional-options "-std=c99" } */ + +#include "pr121959.h" + +#define WT int32_t +#define NT uint8_t +#define IMM 16 +#define N 16 + +DEF_VWSLL_FUNC_0_WRAP(WT, NT, IMM) + +NT g_data[][2][N] = { + { + /* a */ + { + 2, 2, 2, 1, + 255, 255, 255, 255, + 128, 128, 128, 128, + 127, 127, 127, 127, + }, + /* b */ + { + 1, 1, 1, 1, + 0, 0, 0, 0, + 2, 2, 2, 2, + 7, 7, 7, 7, + }, + }, +}; + +WT g_expect[][N] = { + /* 0 */ + { + 65536, 65536, 65536, 65536, + 16711680, 16711680, 16711680, 16711680, + 8257536, 8257536, 8257536, 8257536, + 7864320, 7864320, 7864320, 7864320, + }, +}; + +int +main () +{ + unsigned i, k; + WT out[N]; + + for (i = 0; i < sizeof (g_data) / sizeof (g_data[0]); i++) + { + NT *a = g_data[i][0]; + NT *b = g_data[i][1]; + WT *expect = g_expect[i]; + + RUN_VWSLL_FUNC_0_WRAP (WT, NT, IMM, out, a, b, N); + + for (k = 0; k < N; k++) + if (out[k] != expect[k]) + __builtin_abort (); + } + + return 0; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h new file mode 100644 index 000000000000..10b1b6239797 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h @@ -0,0 +1,24 @@ +#ifndef HAVE_DEFINED_PR121959_H +#define HAVE_DEFINED_PR121959_H + +#include <stdint.h> + +#define DEF_VWSLL_FUNC_0(WT, NT, IMM) \ +void \ +test_from_##NT##_to_##WT##_##IMM##_0(WT * restrict res, \ + NT * restrict a, \ + NT * restrict b, \ + int n) \ +{ \ + for (int i = 0; i < n; i++) \ + { \ + res[i] = (a[i] - b[i]) << IMM; \ + } \ +} +#define DEF_VWSLL_FUNC_0_WRAP(WT, NT, IMM) DEF_VWSLL_FUNC_0(WT, NT, IMM) +#define RUN_VWSLL_FUNC_0(WT, NT, IMM, res, a, b, n) \ + test_from_##NT##_to_##WT##_##IMM##_0(res, a, b, n) +#define RUN_VWSLL_FUNC_0_WRAP(WT, NT, IMM, res, a, b, n) \ + RUN_VWSLL_FUNC_0(WT, NT, IMM, res, a, b, n) + +#endif
