https://gcc.gnu.org/g:dd305514bbca46a39d020018e1bef0cfa15c99c8

commit r16-5078-gdd305514bbca46a39d020018e1bef0cfa15c99c8
Author: Pan Li <[email protected]>
Date:   Wed Oct 15 22:16:11 2025 +0800

    RISC-V: Combine vsext.vf2 and vsll.vi to vwsll.vi on ZVBB
    
    The vwsll.vi of zvbb ext take zero extend before ashift.  But
    we can still do some combine based on sign extend if and only
    if the shift is imm and the sign extend bits are all shifted.
    For example as below
    
      vsetvli   zero, zero, e32, m1, ta, ma
      vsext.vf2 v1, v2
      vsll.vi   v1, v1, 16
    
    If the ashift bits is greater than or equals to truncated bitsize,
    (aka 16 for e32), the sign or zero extend bits will be ashifted
    and never pollute the final result.  Then we have
    
      vsetvli   zero, zero, e32, m1, ta, ma
      vwsll.vi  v1, v2, 16
    
            PR target.121959
    
    The below test suites are passed for this patch series.
     * The rv64gcv fully regression test.
    
    gcc/ChangeLog:
    
            * config/riscv/autovec-opt.md (*vwsll_sign_extend_<mode>): Add
            pattern to combine vsext.vf2 and vslli.vi to vwsll.vi.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/rvv/autovec/pr121959-1.c: New test.
            * gcc.target/riscv/rvv/autovec/pr121959-2.c: New test.
            * gcc.target/riscv/rvv/autovec/pr121959-3.c: New test.
            * gcc.target/riscv/rvv/autovec/pr121959-4.c: New test.
            * gcc.target/riscv/rvv/autovec/pr121959-5.c: New test.
            * gcc.target/riscv/rvv/autovec/pr121959-run-1.c: New test.
            * gcc.target/riscv/rvv/autovec/pr121959.h: New test.
    
    Signed-off-by: Pan Li <[email protected]>

Diff:
---
 gcc/config/riscv/autovec-opt.md                    | 41 ++++++++++++++
 .../gcc.target/riscv/rvv/autovec/pr121959-1.c      |  9 +++
 .../gcc.target/riscv/rvv/autovec/pr121959-2.c      |  9 +++
 .../gcc.target/riscv/rvv/autovec/pr121959-3.c      |  9 +++
 .../gcc.target/riscv/rvv/autovec/pr121959-4.c      |  9 +++
 .../gcc.target/riscv/rvv/autovec/pr121959-5.c      |  9 +++
 .../gcc.target/riscv/rvv/autovec/pr121959-run-1.c  | 65 ++++++++++++++++++++++
 .../gcc.target/riscv/rvv/autovec/pr121959.h        | 24 ++++++++
 8 files changed, 175 insertions(+)

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 063c9a0122b6..52ab79c555a6 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -2424,3 +2424,44 @@
   }
   [(set_attr "type" "vfalu")]
 )
+
+;; Combine vsext.vf + vsll.vi to vwsll.vi depends on ZVBB.
+;; The vwsll.vi is zero extend, thus only the ashift bits
+;; is equal or greater than double truncated bits is valid.
+;; Appears in the satd function of x264.
+(define_insn_and_split "*vwsll_sign_extend_<mode>"
+  [(set (match_operand:VWEXTI               0 "register_operand")
+       (ashift:VWEXTI
+         (sign_extend:VWEXTI
+           (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
+         (match_operand                    2 "const_int_operand")))]
+  "TARGET_VECTOR && TARGET_ZVBB && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    int imm = INTVAL (operands[2]);
+    int trunc_prec = GET_MODE_PRECISION (GET_MODE_INNER 
(<V_DOUBLE_TRUNC>mode));
+
+    if (imm >= trunc_prec)
+      {
+       insn_code icode = code_for_pred_vwsll_scalar (<MODE>mode);
+       emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
+      }
+    else
+      {
+       insn_code icode = code_for_pred_vf2 (SIGN_EXTEND, <MODE>mode);
+       rtx extend = gen_reg_rtx (<MODE>mode);
+       rtx unary_ops[] = {extend, operands[1]};
+       riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP,
+                                      unary_ops);
+
+       icode = code_for_pred_scalar (ASHIFT, <MODE>mode);
+       rtx binary_ops[] = {operands[0], extend, operands[2]};
+       riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP,
+                                      binary_ops);
+      }
+
+    DONE;
+  }
+)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c
new file mode 100644
index 000000000000..a42d7c4de609
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */
+
+#include "pr121959.h"
+
+DEF_VWSLL_FUNC_0(int32_t, uint8_t, 16)
+
+/* { dg-final { scan-assembler-times {vwsll.vi} 1 } } */
+/* { dg-final { scan-assembler-not {vsll.vi} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c
new file mode 100644
index 000000000000..2a3ef8d26179
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "pr121959.h"
+
+DEF_VWSLL_FUNC_0(int32_t, uint8_t, 16)
+
+/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */
+/* { dg-final { scan-assembler-not {vwsll.vi} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c
new file mode 100644
index 000000000000..59a930a1efa9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-3.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */
+
+#include "pr121959.h"
+
+DEF_VWSLL_FUNC_0(int32_t, uint8_t, 17)
+
+/* { dg-final { scan-assembler-times {vwsll.vi} 1 } } */
+/* { dg-final { scan-assembler-not {vsll.vi} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c
new file mode 100644
index 000000000000..59a6d365af41
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-4.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "pr121959.h"
+
+DEF_VWSLL_FUNC_0(int32_t, uint8_t, 17)
+
+/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */
+/* { dg-final { scan-assembler-not {vwsll.vi} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c
new file mode 100644
index 000000000000..a9319a3a9599
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-5.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvbb -mabi=lp64d -O3" } */
+
+#include "pr121959.h"
+
+DEF_VWSLL_FUNC_0(int32_t, uint8_t, 15)
+
+/* { dg-final { scan-assembler-times {vsll.vi} 1 } } */
+/* { dg-final { scan-assembler-not {vwsll.vi} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c
new file mode 100644
index 000000000000..77fd95b8ebb1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959-run-1.c
@@ -0,0 +1,65 @@
+/* { dg-do run } */
+/* { dg-require-effective-target "riscv_zvbb_ok" } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvbb" } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "pr121959.h"
+
+#define WT int32_t
+#define NT uint8_t
+#define IMM 16
+#define N 16
+
+DEF_VWSLL_FUNC_0_WRAP(WT, NT, IMM)
+
+NT g_data[][2][N] = {
+  {
+    /* a */
+    {
+        2,   2,   2,   1,
+      255, 255, 255, 255,
+      128, 128, 128, 128,
+      127, 127, 127, 127,
+    },
+    /* b */
+    {
+      1, 1, 1, 1,
+      0, 0, 0, 0,
+      2, 2, 2, 2,
+      7, 7, 7, 7,
+    },
+  },
+};
+
+WT g_expect[][N] = {
+  /* 0 */
+  {
+       65536,    65536,    65536,    65536,
+    16711680, 16711680, 16711680, 16711680,
+     8257536,  8257536,  8257536,  8257536,
+     7864320,  7864320,  7864320,  7864320,
+  },
+};
+
+int
+main ()
+{
+  unsigned i, k;
+  WT out[N];
+
+  for (i = 0; i < sizeof (g_data) / sizeof (g_data[0]); i++)
+    {
+      NT *a = g_data[i][0];
+      NT *b = g_data[i][1];
+      WT *expect = g_expect[i];
+
+      RUN_VWSLL_FUNC_0_WRAP (WT, NT, IMM, out, a, b, N);
+
+      for (k = 0; k < N; k++)
+       if (out[k] != expect[k])
+         __builtin_abort ();
+    }
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h
new file mode 100644
index 000000000000..10b1b6239797
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121959.h
@@ -0,0 +1,24 @@
+#ifndef HAVE_DEFINED_PR121959_H
+#define HAVE_DEFINED_PR121959_H
+
+#include <stdint.h>
+
+#define DEF_VWSLL_FUNC_0(WT, NT, IMM)                   \
+void                                                    \
+test_from_##NT##_to_##WT##_##IMM##_0(WT * restrict res, \
+                                    NT * restrict a,   \
+                                    NT * restrict b,   \
+                                    int n)             \
+{                                                       \
+  for (int i = 0; i < n; i++)                           \
+    {                                                   \
+      res[i] = (a[i] - b[i]) << IMM;                    \
+    }                                                   \
+}
+#define DEF_VWSLL_FUNC_0_WRAP(WT, NT, IMM) DEF_VWSLL_FUNC_0(WT, NT, IMM)
+#define RUN_VWSLL_FUNC_0(WT, NT, IMM, res, a, b, n) \
+  test_from_##NT##_to_##WT##_##IMM##_0(res, a, b, n)
+#define RUN_VWSLL_FUNC_0_WRAP(WT, NT, IMM, res, a, b, n) \
+  RUN_VWSLL_FUNC_0(WT, NT, IMM, res, a, b, n)
+
+#endif

Reply via email to