[PATCH v2] RISC-V: Improve slide patterns recognition

Raphael Moreira Zinsly Tue, 16 Sep 2025 07:23:10 -0700

Changes since v1:
        - Fixed permutations with two pivots and repeated elements.


-- >8 --

Improve shuffle_slide_patterns to better recognize permutations that
can be constructed by a slideup or slidedown, covering more cases:
Slideup one vector into the middle the other like
  {0, 4, 5, 3}.
Slidedown one vector not ending in the last element like
  {5, 6, 2, 3}.
Slidedown one vector from the beginning like
  {4, 5, 2, 3}.

gcc/ChangeLog:

        * riscv/riscv-v.c
        (shuffle_slide_patterns): Cover more permutations.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/shuffle-slide-run.h:
        New test.
        * gcc.target/riscv/rvv/autovec/shuffle-slidedown-run.c:
        Likewise.
        * gcc.target/riscv/rvv/autovec/shuffle-slideup-run.c:
        Likewise.
        * gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slide.h:
        Likewise.
        * gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slidedown-1.c:
        Likewise.
        * gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slidedown-2.c:
        Likewise.
        * gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slidedown-perm.h:
        Likewise.
        * gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slideup-1.c:
        Likewise.
        * gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slideup-2.c:
        Likewise.
        * gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slideup-perm.h:
        Likewise.
---
 gcc/config/riscv/riscv-v.cc                   |  56 ++--
 .../riscv/rvv/autovec/shuffle-slide-run.h     | 106 ++++++++
 .../riscv/rvv/autovec/shuffle-slidedown-run.c |   7 +
 .../riscv/rvv/autovec/shuffle-slideup-run.c   |   7 +
 .../rvv/autovec/vls-vlmax/shuffle-slide.h     | 240 ++++++++++++++++++
 .../autovec/vls-vlmax/shuffle-slidedown-1.c   |  41 +++
 .../autovec/vls-vlmax/shuffle-slidedown-2.c   |  41 +++
 .../vls-vlmax/shuffle-slidedown-perm.h        | 107 ++++++++
 .../rvv/autovec/vls-vlmax/shuffle-slideup-1.c |  37 +++
 .../rvv/autovec/vls-vlmax/shuffle-slideup-2.c |  37 +++
 .../autovec/vls-vlmax/shuffle-slideup-perm.h  |  93 +++++++
 11 files changed, 751 insertions(+), 21 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/shuffle-slide-run.h
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/shuffle-slidedown-run.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/shuffle-slideup-run.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slide.h
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slidedown-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slidedown-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slidedown-perm.h
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slideup-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slideup-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slideup-perm.h

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 012ca5918cb..8021bc14e7c 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3742,8 +3742,8 @@ shuffle_compress_patterns (struct expand_vec_perm_d *d)
   return true;
 }
 
-/* Recognize patterns like [4 5 6 7 12 13 14 15] where either the lower
-   or the higher parts of both vectors are combined into one.  */
+/* Recognize patterns like [4 5 6 7 12 13 14 15] where a consecutive part of a
+   vector is combined into another.  */
 
 static bool
 shuffle_slide_patterns (struct expand_vec_perm_d *d)
@@ -3755,6 +3755,7 @@ shuffle_slide_patterns (struct expand_vec_perm_d *d)
     return false;
 
   int vlen = vec_len.to_constant ();
+  int len = 0;
   if (vlen < 4)
     return false;
 
@@ -3763,8 +3764,7 @@ shuffle_slide_patterns (struct expand_vec_perm_d *d)
 
   /* For a slideup OP0 can stay, for a slidedown OP1 can.
      The former requires that the first element of the permutation
-     is the first element of OP0, the latter that the last permutation
-     element is the last element of OP1.  */
+     is the first element of OP0.  */
   bool slideup = false;
   bool slidedown = false;
 
@@ -3776,13 +3776,10 @@ shuffle_slide_patterns (struct expand_vec_perm_d *d)
   if (known_eq (d->perm[vlen - 1], 2 * vlen - 1))
     slidedown = true;
 
-  if (slideup && slidedown)
-    return false;
-
   if (!slideup && !slidedown)
     return false;
 
-  /* Check for a monotonic sequence with one pivot.  */
+  /* Check for a monotonic sequence with one or two pivots.  */
   int pivot = -1;
   for (int i = 0; i < vlen; i++)
     {
@@ -3790,21 +3787,37 @@ shuffle_slide_patterns (struct expand_vec_perm_d *d)
        pivot = i;
       if (i > 0 && i != pivot
          && maybe_ne (d->perm[i], d->perm[i - 1] + 1))
-       return false;
+       {
+         if (pivot == -1 || len != 0)
+           return false;
+         /* A second pivot would indicate the vector length.  */
+         len = i;
+       }
     }
 
   if (pivot == -1)
     return false;
 
+  /* In case we have both the permutation starting at OP0's first element and
+     ending at OP1's last element we may have a slidedown from the
+     beginning.  */
+  if (slideup && slidedown)
+    {
+      /* The first pivot must be OP1's element in the PIVOT position.  */
+      if (maybe_ne (d->perm[pivot], vlen + pivot))
+       return false;
+
+      slideup = false;
+    }
+
   /* For a slideup OP1's part (to be slid up) must be a low part,
      i.e. starting with its first element.  */
   if (slideup && maybe_ne (d->perm[pivot], vlen))
       return false;
 
-  /* For a slidedown OP0's part (to be slid down) must be a high part,
-     i.e. ending with its last element.  */
-  if (slidedown && maybe_ne (d->perm[pivot - 1], vlen - 1))
-    return false;
+  /* The second pivot in a slideup must be following OP0's position.  */
+  if (slideup && len && maybe_ne (d->perm[len], len))
+      return false;
 
   /* Success!  */
   if (d->testing_p)
@@ -3813,22 +3826,23 @@ shuffle_slide_patterns (struct expand_vec_perm_d *d)
   /* PIVOT is the start of the lower/higher part of OP1 or OP2.
      For a slideup it indicates how many elements of OP1 to
      skip/slide over.  For a slidedown it indicates how long
-     OP1's high part is, while VLEN - PIVOT is the amount to slide.  */
-  int slide_cnt = slideup ? pivot : vlen - pivot;
+     OP1's high part is, while the first element is the amount to slide.  */
   insn_code icode;
+  int slide_cnt = slideup ? pivot : d->perm[0].to_constant();
   if (slideup)
     {
-      /* No need for a vector length because we slide up until the
-        end of OP1 anyway.  */
       rtx ops[] = {d->target, d->op0, d->op1, gen_int_mode (slide_cnt, Pmode)};
       icode = code_for_pred_slide (UNSPEC_VSLIDEUP, vmode);
-      emit_vlmax_insn (icode, SLIDEUP_OP_MERGE, ops);
+      /* If we didn't set a vector length we slide up until the end of OP1.  */
+      if (len)
+       emit_nonvlmax_insn (icode, BINARY_OP_TUMA, ops,
+                           gen_int_mode (len, Pmode));
+      else
+       emit_vlmax_insn (icode, SLIDEUP_OP_MERGE, ops);
     }
   else
     {
-      /* Here we need a length because we slide to the beginning of OP1
-        leaving the remaining elements undisturbed.  */
-      int len = pivot;
+      len = pivot;
       rtx ops[] = {d->target, d->op1, d->op0,
                   gen_int_mode (slide_cnt, Pmode)};
       icode = code_for_pred_slide (UNSPEC_VSLIDEDOWN, vmode);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/shuffle-slide-run.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/shuffle-slide-run.h
new file mode 100644
index 00000000000..e14ebeb974e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/shuffle-slide-run.h
@@ -0,0 +1,106 @@
+#define comp(a, b, n)                                                          
\
+  for (unsigned i = 0; i < n; ++i)                                             
\
+    if ((a)[i] != (b)[i])                                                      
\
+      __builtin_abort ();
+
+#define CHECK4(TYPE, NUNITS, A, B, C)                                          
\
+  __attribute__ ((noipa)) void check4_##A##_##B##_##C##_##TYPE ()              
 \
+  {                                                                            
\
+    TYPE v0_##TYPE = (TYPE){SERIES_##NUNITS (0, NUNITS)};                     \
+    TYPE v1_##TYPE = (TYPE){SERIES_##NUNITS (NUNITS, NUNITS)};                \
+    TYPE ref_##TYPE = (TYPE){MASK4_##NUNITS (0, NUNITS, A, B, C)};             
\
+    TYPE res_##TYPE;                                                          \
+    permute4_##A##_##B##_##C##_##TYPE (v0_##TYPE, v1_##TYPE, &res_##TYPE);   \
+    comp (res_##TYPE, ref_##TYPE, NUNITS);                                   \
+  }
+
+#define CHECK8(TYPE, NUNITS, A, B, C)                                          
\
+  __attribute__ ((noipa)) void check8_##A##_##B##_##C##_##TYPE ()              
 \
+  {                                                                            
\
+    TYPE v0_##TYPE = (TYPE){SERIES_##NUNITS (0, NUNITS)};                     \
+    TYPE v1_##TYPE = (TYPE){SERIES_##NUNITS (NUNITS, NUNITS)};                \
+    TYPE ref_##TYPE = (TYPE){MASK8_##NUNITS (0, NUNITS, A, B, C)};             
\
+    TYPE res_##TYPE;                                                          \
+    permute8_##A##_##B##_##C##_##TYPE (v0_##TYPE, v1_##TYPE, &res_##TYPE);   \
+    comp (res_##TYPE, ref_##TYPE, NUNITS);                                   \
+  }
+
+#define CHECK8(TYPE, NUNITS, A, B, C)                                          
\
+  __attribute__ ((noipa)) void check8_##A##_##B##_##C##_##TYPE ()              
 \
+  {                                                                            
\
+    TYPE v0_##TYPE = (TYPE){SERIES_##NUNITS (0, NUNITS)};                     \
+    TYPE v1_##TYPE = (TYPE){SERIES_##NUNITS (NUNITS, NUNITS)};                \
+    TYPE ref_##TYPE = (TYPE){MASK8_##NUNITS (0, NUNITS, A, B, C)};             
\
+    TYPE res_##TYPE;                                                          \
+    permute8_##A##_##B##_##C##_##TYPE (v0_##TYPE, v1_##TYPE, &res_##TYPE);   \
+    comp (res_##TYPE, ref_##TYPE, NUNITS);                                   \
+  }
+
+#define CHECK16(TYPE, NUNITS, A, B, C)                                         
 \
+  __attribute__ ((noipa)) void check16_##A##_##B##_##C##_##TYPE ()             
  \
+  {                                                                            
\
+    TYPE v0_##TYPE = (TYPE){SERIES_##NUNITS (0, NUNITS)};                     \
+    TYPE v1_##TYPE = (TYPE){SERIES_##NUNITS (NUNITS, NUNITS)};                \
+    TYPE ref_##TYPE = (TYPE){MASK16_##NUNITS (0, NUNITS, A, B, C)};            
 \
+    TYPE res_##TYPE;                                                          \
+    permute16_##A##_##B##_##C##_##TYPE (v0_##TYPE, v1_##TYPE, &res_##TYPE);   \
+    comp (res_##TYPE, ref_##TYPE, NUNITS);                                   \
+  }
+
+#define CHECK32(TYPE, NUNITS, A, B, C)                                         
 \
+  __attribute__ ((noipa)) void check32_##A##_##B##_##C##_##TYPE ()             
  \
+  {                                                                            
\
+    TYPE v0_##TYPE = (TYPE){SERIES_##NUNITS (0, NUNITS)};                     \
+    TYPE v1_##TYPE = (TYPE){SERIES_##NUNITS (NUNITS, NUNITS)};                \
+    TYPE ref_##TYPE = (TYPE){MASK32_##NUNITS (0, NUNITS, A, B, C)};            
 \
+    TYPE res_##TYPE;                                                          \
+    permute32_##A##_##B##_##C##_##TYPE (v0_##TYPE, v1_##TYPE, &res_##TYPE);   \
+    comp (res_##TYPE, ref_##TYPE, NUNITS);                                   \
+  }
+
+#define CHECK64(TYPE, NUNITS, A, B, C)                                         
 \
+  __attribute__ ((noipa)) void check64_##A##_##B##_##C##_##TYPE ()             
  \
+  {                                                                            
\
+    TYPE v0_##TYPE = (TYPE){SERIES_##NUNITS (0, NUNITS)};                     \
+    TYPE v1_##TYPE = (TYPE){SERIES_##NUNITS (NUNITS, NUNITS)};                \
+    TYPE ref_##TYPE = (TYPE){MASK64_##NUNITS (0, NUNITS, A, B, C)};            
 \
+    TYPE res_##TYPE;                                                          \
+    permute64_##A##_##B##_##C##_##TYPE (v0_##TYPE, v1_##TYPE, &res_##TYPE);   \
+    comp (res_##TYPE, ref_##TYPE, NUNITS);                                   \
+  }
+
+#define CHECK128(TYPE, NUNITS, A, B, C)                                        
  \
+  __attribute__ ((noipa)) void check128_##A##_##B##_##C##_##TYPE ()            
   \
+  {                                                                            
\
+    TYPE v0_##TYPE = (TYPE){SERIES_##NUNITS (0, NUNITS)};                     \
+    TYPE v1_##TYPE = (TYPE){SERIES_##NUNITS (NUNITS, NUNITS)};                \
+    TYPE ref_##TYPE = (TYPE){MASK128_##NUNITS (0, NUNITS, A, B, C)};           
  \
+    TYPE res_##TYPE;                                                          \
+    permute128_##A##_##B##_##C##_##TYPE (v0_##TYPE, v1_##TYPE, &res_##TYPE);   
\
+    comp (res_##TYPE, ref_##TYPE, NUNITS);                                   \
+  }
+
+DO_ALL_TEST4(CHECK4)
+DO_ALL_TEST8(CHECK8)
+DO_ALL_TEST16(CHECK16)
+DO_ALL_TEST32(CHECK32)
+DO_ALL_TEST64(CHECK64)
+DO_ALL_TEST128(CHECK128)
+
+#define CALL_CHECK4(TYPE, NUNITS, A, B, C) check4_##A##_##B##_##C##_##TYPE ();
+#define CALL_CHECK8(TYPE, NUNITS, A, B, C) check8_##A##_##B##_##C##_##TYPE ();
+#define CALL_CHECK16(TYPE, NUNITS, A, B, C) check16_##A##_##B##_##C##_##TYPE 
();
+#define CALL_CHECK32(TYPE, NUNITS, A, B, C) check32_##A##_##B##_##C##_##TYPE 
();
+#define CALL_CHECK64(TYPE, NUNITS, A, B, C) check64_##A##_##B##_##C##_##TYPE 
();
+#define CALL_CHECK128(TYPE, NUNITS, A, B, C) check128_##A##_##B##_##C##_##TYPE 
();
+
+int
+main ()
+{
+  DO_ALL_TEST4(CALL_CHECK4)
+  DO_ALL_TEST8(CALL_CHECK8)
+  DO_ALL_TEST16(CALL_CHECK16)
+  DO_ALL_TEST32(CALL_CHECK32)
+  DO_ALL_TEST64(CALL_CHECK64)
+  DO_ALL_TEST128(CALL_CHECK128)
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/shuffle-slidedown-run.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/shuffle-slidedown-run.c
new file mode 100644
index 00000000000..1c7203d26a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/shuffle-slidedown-run.c
@@ -0,0 +1,7 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-add-options riscv_v } */
+/* { dg-additional-options "-O3 -std=gnu99 -mrvv-max-lmul=m8 -Wno-overflow" } 
*/
+
+#include "vls-vlmax/shuffle-slidedown-1.c"
+#include "shuffle-slide-run.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/shuffle-slideup-run.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/shuffle-slideup-run.c
new file mode 100644
index 00000000000..201b36f65cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/shuffle-slideup-run.c
@@ -0,0 +1,7 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-add-options riscv_v } */
+/* { dg-additional-options "-O3 -std=gnu99 -mrvv-max-lmul=m8 -Wno-overflow" } 
*/
+
+#include "vls-vlmax/shuffle-slideup-1.c"
+#include "shuffle-slide-run.h"
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slide.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slide.h
new file mode 100644
index 00000000000..d426433d6ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slide.h
@@ -0,0 +1,240 @@
+#include "perm.h"
+
+#define SERIES_1(x, y) (x)
+#define SERIES_2(x, y) (x), (x + 1)
+#define SERIES_3(x, y) SERIES_1 (x, y), SERIES_2 (x + 1, y)
+#define SERIES_4(x, y) SERIES_2 (x, y), SERIES_2 (x + 2, y)
+#define SERIES_5(x, y) SERIES_2 (x, y), SERIES_3 (x + 2, y)
+#define SERIES_6(x, y) SERIES_3 (x, y), SERIES_3 (x + 3, y)
+#define SERIES_7(x, y) SERIES_3 (x, y), SERIES_4 (x + 3, y)
+#define SERIES_8(x, y) SERIES_4 (x, y), SERIES_4 (x + 4, y)
+#define SERIES_9(x, y) SERIES_4 (x, y), SERIES_5 (x + 4, y)
+#define SERIES_10(x, y) SERIES_5 (x, y), SERIES_5 (x + 5, y)
+#define SERIES_11(x, y) SERIES_5 (x, y), SERIES_6 (x + 5, y)
+#define SERIES_12(x, y) SERIES_6 (x, y), SERIES_6 (x + 6, y)
+#define SERIES_13(x, y) SERIES_6 (x, y), SERIES_7 (x + 6, y)
+#define SERIES_14(x, y) SERIES_7 (x, y), SERIES_7 (x + 7, y)
+#define SERIES_15(x, y) SERIES_7 (x, y), SERIES_8 (x + 7, y)
+#define SERIES_16(x, y) SERIES_8 (x, y), SERIES_8 (x + 8, y)
+#define SERIES_17(x, y) SERIES_8 (x, y), SERIES_9 (x + 8, y)
+#define SERIES_18(x, y) SERIES_9 (x, y), SERIES_9 (x + 9, y)
+#define SERIES_19(x, y) SERIES_9 (x, y), SERIES_10 (x + 9, y)
+#define SERIES_20(x, y) SERIES_10 (x, y), SERIES_10 (x + 10, y)
+#define SERIES_21(x, y) SERIES_10 (x, y), SERIES_11 (x + 10, y)
+#define SERIES_22(x, y) SERIES_11 (x, y), SERIES_11 (x + 11, y)
+#define SERIES_23(x, y) SERIES_11 (x, y), SERIES_12 (x + 11, y)
+#define SERIES_24(x, y) SERIES_12 (x, y), SERIES_12 (x + 12, y)
+#define SERIES_25(x, y) SERIES_12 (x, y), SERIES_13 (x + 12, y)
+#define SERIES_26(x, y) SERIES_13 (x, y), SERIES_13 (x + 13, y)
+#define SERIES_27(x, y) SERIES_13 (x, y), SERIES_14 (x + 13, y)
+#define SERIES_28(x, y) SERIES_14 (x, y), SERIES_14 (x + 14, y)
+#define SERIES_29(x, y) SERIES_14 (x, y), SERIES_15 (x + 14, y)
+#define SERIES_30(x, y) SERIES_15 (x, y), SERIES_15 (x + 15, y)
+#define SERIES_31(x, y) SERIES_15 (x, y), SERIES_16 (x + 15, y)
+#define SERIES_32(x, y) SERIES_16 (x, y), SERIES_16 (x + 16, y)
+#define SERIES_33(x, y) SERIES_16 (x, y), SERIES_17 (x + 16, y)
+#define SERIES_34(x, y) SERIES_17 (x, y), SERIES_17 (x + 17, y)
+#define SERIES_35(x, y) SERIES_17 (x, y), SERIES_18 (x + 17, y)
+#define SERIES_36(x, y) SERIES_18 (x, y), SERIES_18 (x + 18, y)
+#define SERIES_37(x, y) SERIES_18 (x, y), SERIES_19 (x + 18, y)
+#define SERIES_38(x, y) SERIES_19 (x, y), SERIES_19 (x + 19, y)
+#define SERIES_39(x, y) SERIES_19 (x, y), SERIES_20 (x + 19, y)
+#define SERIES_40(x, y) SERIES_20 (x, y), SERIES_20 (x + 20, y)
+#define SERIES_41(x, y) SERIES_20 (x, y), SERIES_21 (x + 20, y)
+#define SERIES_42(x, y) SERIES_21 (x, y), SERIES_21 (x + 21, y)
+#define SERIES_43(x, y) SERIES_21 (x, y), SERIES_22 (x + 21, y)
+#define SERIES_44(x, y) SERIES_22 (x, y), SERIES_22 (x + 22, y)
+#define SERIES_45(x, y) SERIES_22 (x, y), SERIES_23 (x + 22, y)
+#define SERIES_46(x, y) SERIES_23 (x, y), SERIES_23 (x + 23, y)
+#define SERIES_47(x, y) SERIES_23 (x, y), SERIES_24 (x + 23, y)
+#define SERIES_48(x, y) SERIES_24 (x, y), SERIES_24 (x + 24, y)
+#define SERIES_49(x, y) SERIES_24 (x, y), SERIES_25 (x + 24, y)
+#define SERIES_50(x, y) SERIES_25 (x, y), SERIES_25 (x + 25, y)
+#define SERIES_51(x, y) SERIES_25 (x, y), SERIES_26 (x + 25, y)
+#define SERIES_52(x, y) SERIES_26 (x, y), SERIES_26 (x + 26, y)
+#define SERIES_53(x, y) SERIES_26 (x, y), SERIES_27 (x + 26, y)
+#define SERIES_54(x, y) SERIES_27 (x, y), SERIES_27 (x + 27, y)
+#define SERIES_55(x, y) SERIES_27 (x, y), SERIES_28 (x + 27, y)
+#define SERIES_56(x, y) SERIES_28 (x, y), SERIES_28 (x + 28, y)
+#define SERIES_57(x, y) SERIES_28 (x, y), SERIES_29 (x + 28, y)
+#define SERIES_58(x, y) SERIES_29 (x, y), SERIES_29 (x + 29, y)
+#define SERIES_59(x, y) SERIES_29 (x, y), SERIES_30 (x + 29, y)
+#define SERIES_60(x, y) SERIES_30 (x, y), SERIES_30 (x + 30, y)
+#define SERIES_61(x, y) SERIES_30 (x, y), SERIES_31 (x + 30, y)
+#define SERIES_62(x, y) SERIES_31 (x, y), SERIES_31 (x + 31, y)
+#define SERIES_63(x, y) SERIES_31 (x, y), SERIES_32 (x + 31, y)
+#define SERIES_64(x, y) SERIES_32 (x, y), SERIES_32 (x + 32, y)
+#define SERIES_65(x, y) SERIES_32 (x, y), SERIES_33 (x + 32, y)
+#define SERIES_66(x, y) SERIES_33 (x, y), SERIES_33 (x + 33, y)
+#define SERIES_67(x, y) SERIES_33 (x, y), SERIES_34 (x + 33, y)
+#define SERIES_68(x, y) SERIES_34 (x, y), SERIES_34 (x + 34, y)
+#define SERIES_69(x, y) SERIES_34 (x, y), SERIES_35 (x + 34, y)
+#define SERIES_70(x, y) SERIES_35 (x, y), SERIES_35 (x + 35, y)
+#define SERIES_71(x, y) SERIES_35 (x, y), SERIES_36 (x + 35, y)
+#define SERIES_72(x, y) SERIES_36 (x, y), SERIES_36 (x + 36, y)
+#define SERIES_73(x, y) SERIES_36 (x, y), SERIES_37 (x + 36, y)
+#define SERIES_74(x, y) SERIES_37 (x, y), SERIES_37 (x + 37, y)
+#define SERIES_75(x, y) SERIES_37 (x, y), SERIES_38 (x + 37, y)
+#define SERIES_76(x, y) SERIES_38 (x, y), SERIES_38 (x + 38, y)
+#define SERIES_77(x, y) SERIES_38 (x, y), SERIES_39 (x + 38, y)
+#define SERIES_78(x, y) SERIES_39 (x, y), SERIES_39 (x + 39, y)
+#define SERIES_79(x, y) SERIES_39 (x, y), SERIES_40 (x + 39, y)
+#define SERIES_80(x, y) SERIES_40 (x, y), SERIES_40 (x + 40, y)
+#define SERIES_81(x, y) SERIES_40 (x, y), SERIES_41 (x + 40, y)
+#define SERIES_82(x, y) SERIES_41 (x, y), SERIES_41 (x + 41, y)
+#define SERIES_83(x, y) SERIES_41 (x, y), SERIES_42 (x + 41, y)
+#define SERIES_84(x, y) SERIES_42 (x, y), SERIES_42 (x + 42, y)
+#define SERIES_85(x, y) SERIES_42 (x, y), SERIES_43 (x + 42, y)
+#define SERIES_86(x, y) SERIES_43 (x, y), SERIES_43 (x + 43, y)
+#define SERIES_87(x, y) SERIES_43 (x, y), SERIES_44 (x + 43, y)
+#define SERIES_88(x, y) SERIES_44 (x, y), SERIES_44 (x + 44, y)
+#define SERIES_89(x, y) SERIES_44 (x, y), SERIES_45 (x + 44, y)
+#define SERIES_90(x, y) SERIES_45 (x, y), SERIES_45 (x + 45, y)
+#define SERIES_91(x, y) SERIES_45 (x, y), SERIES_46 (x + 45, y)
+#define SERIES_92(x, y) SERIES_46 (x, y), SERIES_46 (x + 46, y)
+#define SERIES_93(x, y) SERIES_46 (x, y), SERIES_47 (x + 46, y)
+#define SERIES_94(x, y) SERIES_47 (x, y), SERIES_47 (x + 47, y)
+#define SERIES_95(x, y) SERIES_47 (x, y), SERIES_48 (x + 47, y)
+#define SERIES_96(x, y) SERIES_48 (x, y), SERIES_48 (x + 48, y)
+#define SERIES_97(x, y) SERIES_48 (x, y), SERIES_49 (x + 48, y)
+#define SERIES_98(x, y) SERIES_49 (x, y), SERIES_49 (x + 49, y)
+#define SERIES_99(x, y) SERIES_49 (x, y), SERIES_50 (x + 49, y)
+#define SERIES_100(x, y) SERIES_50 (x, y), SERIES_50 (x + 50, y)
+#define SERIES_101(x, y) SERIES_50 (x, y), SERIES_51 (x + 50, y)
+#define SERIES_102(x, y) SERIES_51 (x, y), SERIES_51 (x + 51, y)
+#define SERIES_103(x, y) SERIES_51 (x, y), SERIES_52 (x + 51, y)
+#define SERIES_104(x, y) SERIES_52 (x, y), SERIES_52 (x + 52, y)
+#define SERIES_105(x, y) SERIES_52 (x, y), SERIES_53 (x + 52, y)
+#define SERIES_106(x, y) SERIES_53 (x, y), SERIES_53 (x + 53, y)
+#define SERIES_107(x, y) SERIES_53 (x, y), SERIES_54 (x + 53, y)
+#define SERIES_108(x, y) SERIES_54 (x, y), SERIES_54 (x + 54, y)
+#define SERIES_109(x, y) SERIES_54 (x, y), SERIES_55 (x + 54, y)
+#define SERIES_110(x, y) SERIES_55 (x, y), SERIES_55 (x + 55, y)
+#define SERIES_111(x, y) SERIES_55 (x, y), SERIES_56 (x + 55, y)
+#define SERIES_112(x, y) SERIES_56 (x, y), SERIES_56 (x + 56, y)
+#define SERIES_113(x, y) SERIES_56 (x, y), SERIES_57 (x + 56, y)
+#define SERIES_114(x, y) SERIES_57 (x, y), SERIES_57 (x + 57, y)
+#define SERIES_115(x, y) SERIES_57 (x, y), SERIES_58 (x + 57, y)
+#define SERIES_116(x, y) SERIES_58 (x, y), SERIES_58 (x + 58, y)
+#define SERIES_117(x, y) SERIES_58 (x, y), SERIES_59 (x + 58, y)
+#define SERIES_118(x, y) SERIES_59 (x, y), SERIES_59 (x + 59, y)
+#define SERIES_119(x, y) SERIES_59 (x, y), SERIES_60 (x + 59, y)
+#define SERIES_120(x, y) SERIES_60 (x, y), SERIES_60 (x + 60, y)
+#define SERIES_121(x, y) SERIES_60 (x, y), SERIES_61 (x + 60, y)
+#define SERIES_122(x, y) SERIES_61 (x, y), SERIES_61 (x + 61, y)
+#define SERIES_123(x, y) SERIES_61 (x, y), SERIES_62 (x + 61, y)
+#define SERIES_124(x, y) SERIES_62 (x, y), SERIES_62 (x + 62, y)
+#define SERIES_125(x, y) SERIES_62 (x, y), SERIES_63 (x + 62, y)
+#define SERIES_126(x, y) SERIES_63 (x, y), SERIES_63 (x + 63, y)
+#define SERIES_127(x, y) SERIES_63 (x, y), SERIES_64 (x + 63, y)
+#define SERIES_128(x, y) SERIES_64 (x, y), SERIES_64 (x + 64, y)
+#define SERIES_129(x, y) SERIES_64 (x, y), SERIES_65 (x + 64, y)
+#define SERIES_128(x, y) SERIES_64 (x, y), SERIES_64 (x + 64, y)
+
+#define PERMUTE4(TYPE, NUNITS, A, B, C)                                        
\
+  __attribute__ ((noipa)) void permute4_##A##_##B##_##C##_##TYPE               
\
+                                                               (TYPE values1, \
+                                                                TYPE values2, \
+                                                                TYPE *out)    \
+  {                                                                            
\
+    TYPE v = __builtin_shufflevector (values1, values2,                        
\
+                                     MASK4_##NUNITS (0, NUNITS, A, B, C));    \
+    *(TYPE *) out = v;                                                         
\
+  }
+
+#define PERMUTE8(TYPE, NUNITS, A, B, C)                                        
\
+  __attribute__ ((noipa)) void permute8_##A##_##B##_##C##_##TYPE               
\
+                                                               (TYPE values1, \
+                                                                TYPE values2, \
+                                                                TYPE *out)    \
+  {                                                                            
\
+    TYPE v = __builtin_shufflevector (values1, values2,                        
\
+                                     MASK8_##NUNITS (0, NUNITS, A, B, C));    \
+    *(TYPE *) out = v;                                                         
\
+  }
+
+#define PERMUTE16(TYPE, NUNITS, A, B, C)                                       
\
+  __attribute__ ((noipa)) void permute16_##A##_##B##_##C##_##TYPE              
\
+                                                               (TYPE values1, \
+                                                                TYPE values2, \
+                                                                TYPE *out)    \
+  {                                                                            
\
+    TYPE v = __builtin_shufflevector (values1, values2,                        
\
+                                     MASK16_##NUNITS (0, NUNITS, A, B, C));   \
+    *(TYPE *) out = v;                                                         
\
+  }
+
+#define PERMUTE32(TYPE, NUNITS, A, B, C)                                       
\
+  __attribute__ ((noipa)) void permute32_##A##_##B##_##C##_##TYPE              
\
+                                                               (TYPE values1, \
+                                                                TYPE values2, \
+                                                                TYPE *out)    \
+  {                                                                            
\
+    TYPE v = __builtin_shufflevector (values1, values2,                        
\
+                                     MASK32_##NUNITS (0, NUNITS, A, B, C));   \
+    *(TYPE *) out = v;                                                         
\
+  }
+
+#define PERMUTE64(TYPE, NUNITS, A, B, C)                                       
\
+  __attribute__ ((noipa)) void permute64_##A##_##B##_##C##_##TYPE              
\
+                                                               (TYPE values1, \
+                                                                TYPE values2, \
+                                                                TYPE *out)    \
+  {                                                                            
\
+    TYPE v = __builtin_shufflevector (values1, values2,                        
\
+                                     MASK64_##NUNITS (0, NUNITS, A, B, C));   \
+    *(TYPE *) out = v;                                                         
\
+  }
+
+#define PERMUTE128(TYPE, NUNITS, A, B, C)                                      
\
+  __attribute__ ((noipa)) void permute128_##A##_##B##_##C##_##TYPE             
\
+                                                               (TYPE values1, \
+                                                                TYPE values2, \
+                                                                TYPE *out)    \
+  {                                                                            
\
+    TYPE v = __builtin_shufflevector (values1, values2,                        
\
+                                     MASK128_##NUNITS (0, NUNITS, A, B, C));  \
+    *(TYPE *) out = v;                                                         
\
+  }
+
+#define TEST_128(FUNC, T)                                                      
\
+  T (vnx128qi, 128, FUNC)
+
+#define TEST_64(FUNC, T)                                                       
\
+  T (vnx64qi, 64, FUNC)                                                        
 \
+  T (vnx64hi, 64, FUNC)                                                        
 \
+  TEST_128(FUNC, T)
+
+#define TEST_32(FUNC, T)                                                       
\
+  T (vnx32hi, 32, FUNC)                                                        
 \
+  T (vnx32si, 32, FUNC)                                                        
 \
+  T (vnx32sf, 32, FUNC)                                                        
 \
+  T (vnx32qi, 32, FUNC)                                                        
 \
+  TEST_64(FUNC, T)
+
+#define TEST_16(FUNC, T)                                                       
\
+  T (vnx16qi, 16, FUNC)                                                        
 \
+  T (vnx16hi, 16, FUNC)                                                        
 \
+  T (vnx16si, 16, FUNC)                                                        
 \
+  T (vnx16di, 16, FUNC)                                                        
 \
+  T (vnx16sf, 16, FUNC)                                                        
 \
+  T (vnx16df, 16, FUNC)                                                        
\
+  TEST_32(FUNC, T)
+
+#define TEST_8(FUNC, T)                                                        
        \
+  T (vnx8qi, 8, FUNC)                                                          
 \
+  T (vnx8hi, 8, FUNC)                                                          
 \
+  T (vnx8si, 8, FUNC)                                                          
 \
+  T (vnx8di, 8, FUNC)                                                          
 \
+  T (vnx8sf, 8, FUNC)                                                          
 \
+  T (vnx8df, 8, FUNC)                                                          
 \
+  TEST_16(FUNC, T)
+
+#define TEST_4(FUNC, T)                                                        
 \
+  T (vnx4qi, 4, FUNC)                                                          
 \
+  T (vnx4hi, 4, FUNC)                                                          
 \
+  T (vnx4si, 4, FUNC)                                                          
 \
+  T (vnx4di, 4, FUNC)                                                          
 \
+  T (vnx4sf, 4, FUNC)                                                          
 \
+  T (vnx4df, 4, FUNC)                                                          
 \
+  TEST_8(FUNC, T)
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slidedown-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slidedown-1.c
new file mode 100644
index 00000000000..c91e1bc3735
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slidedown-1.c
@@ -0,0 +1,41 @@
+/* { dg-do compile { target { ! riscv_abi_e } } } */
+/* { dg-options "-O3 -march=rv64gcv -mrvv-max-lmul=m8 -Wno-overflow" { target 
{ rv64 } } } */
+/* { dg-options "-O3 -march=rv32gcv -mrvv-max-lmul=m8 -Wno-overflow" { target 
{ rv32 } } } */
+
+#define MASK(X, Y, A, B, C) SERIES_##A (X + C, Y), SERIES_##B (X + Y + A, Y)
+
+#define MASK4_4(X, Y, A, B, C) MASK(X, Y, A, B, C)
+#define MASK4_8(X, Y, A, B, C) MASK4_4(X, Y, A, B, C), SERIES_4 (X + Y + 4, Y)
+#define MASK4_16(X, Y, A, B, C) MASK4_8(X, Y, A, B, C), SERIES_8 (X + Y + 8, 
Y) 
+#define MASK4_32(X, Y, A, B, C) MASK4_16(X, Y, A, B, C), SERIES_16 (X + Y + 
16, Y) 
+#define MASK4_64(X, Y, A, B, C) MASK4_32(X, Y, A, B, C), SERIES_32 (X + Y + 
32, Y) 
+#define MASK4_128(X, Y, A, B, C) MASK4_64(X, Y, A, B, C), SERIES_64 (X + Y + 
64, Y) 
+
+#define MASK8_8(X, Y, A, B, C) MASK(X, Y, A, B, C)
+#define MASK8_16(X, Y, A, B, C) MASK8_8(X, Y, A, B, C), SERIES_8 (X + Y + 8, 
Y) 
+#define MASK8_32(X, Y, A, B, C) MASK8_16(X, Y, A, B, C), SERIES_16 (X + Y + 
16, Y) 
+#define MASK8_64(X, Y, A, B, C) MASK8_32(X, Y, A, B, C), SERIES_32 (X + Y + 
32, Y) 
+#define MASK8_128(X, Y, A, B, C) MASK8_64(X, Y, A, B, C), SERIES_64 (X + Y + 
64, Y) 
+
+#define MASK16_16(X, Y, A, B, C) MASK(X, Y, A, B, C)
+#define MASK16_32(X, Y, A, B, C) MASK16_16(X, Y, A, B, C), SERIES_16 (X + Y + 
16, Y) 
+#define MASK16_64(X, Y, A, B, C) MASK16_32(X, Y, A, B, C), SERIES_32 (X + Y + 
32, Y) 
+#define MASK16_128(X, Y, A, B, C) MASK16_64(X, Y, A, B, C), SERIES_64 (X + Y + 
64, Y) 
+
+#define MASK32_32(X, Y, A, B, C) MASK(X, Y, A, B, C)
+#define MASK32_64(X, Y, A, B, C) MASK32_32(X, Y, A, B, C), SERIES_32 (X + Y + 
32, Y) 
+#define MASK32_128(X, Y, A, B, C) MASK32_64(X, Y, A, B, C), SERIES_64 (X + Y + 
64, Y) 
+
+#define MASK64_64(X, Y, A, B, C) MASK(X, Y, A, B, C)
+#define MASK64_128(X, Y, A, B, C) MASK64_64(X, Y, A, B, C), SERIES_64 (X + Y + 
64, Y) 
+
+#define MASK128_128(X, Y, A, B, C) MASK(X, Y, A, B, C)
+
+#include "shuffle-slidedown-perm.h"
+
+/* All cases are covered by shuffle_slide_patterns but shuffle_merge_patterns 
is
+   called first, that's why we see some vmerge here.  */
+/* { dg-final { scan-assembler-times "vslidedown" 477 } } */
+/* { dg-final { scan-assembler-times "vmerge" 164 } } */
+/* { dg-final { scan-assembler-not "vslideup" } } */
+/* { dg-final { scan-assembler-not "vrgather" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slidedown-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slidedown-2.c
new file mode 100644
index 00000000000..5fa7848f04c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slidedown-2.c
@@ -0,0 +1,41 @@
+/* { dg-do compile { target { ! riscv_abi_e } } } */
+/* { dg-options "-O3 -march=rv64gcv -mrvv-max-lmul=m8 -Wno-overflow" { target 
{ rv64 } } } */
+/* { dg-options "-O3 -march=rv32gcv -mrvv-max-lmul=m8 -Wno-overflow" { target 
{ rv32 } } } */
+
+#define MASK(X, Y, A, B, C) SERIES_##A (X + Y + C, Y), SERIES_##B (X + A, Y)
+
+#define MASK4_4(X, Y, A, B, C) MASK(X, Y, A, B, C)
+#define MASK4_8(X, Y, A, B, C) MASK4_4(X, Y, A, B, C), SERIES_4 (X  + 4, Y)
+#define MASK4_16(X, Y, A, B, C) MASK4_8(X, Y, A, B, C), SERIES_8 (X  + 8, Y) 
+#define MASK4_32(X, Y, A, B, C) MASK4_16(X, Y, A, B, C), SERIES_16 (X + 16, Y) 
+#define MASK4_64(X, Y, A, B, C) MASK4_32(X, Y, A, B, C), SERIES_32 (X + 32, Y) 
+#define MASK4_128(X, Y, A, B, C) MASK4_64(X, Y, A, B, C), SERIES_64 (X + 64, 
Y) 
+
+#define MASK8_8(X, Y, A, B, C) MASK(X, Y, A, B, C)
+#define MASK8_16(X, Y, A, B, C) MASK8_8(X, Y, A, B, C), SERIES_8 (X + 8, Y) 
+#define MASK8_32(X, Y, A, B, C) MASK8_16(X, Y, A, B, C), SERIES_16 (X + 16, Y) 
+#define MASK8_64(X, Y, A, B, C) MASK8_32(X, Y, A, B, C), SERIES_32 (X  + 32, 
Y) 
+#define MASK8_128(X, Y, A, B, C) MASK8_64(X, Y, A, B, C), SERIES_64 (X + 64, 
Y) 
+
+#define MASK16_16(X, Y, A, B, C) MASK(X, Y, A, B, C)
+#define MASK16_32(X, Y, A, B, C) MASK16_16(X, Y, A, B, C), SERIES_16 (X + 16, 
Y) 
+#define MASK16_64(X, Y, A, B, C) MASK16_32(X, Y, A, B, C), SERIES_32 (X + 32, 
Y) 
+#define MASK16_128(X, Y, A, B, C) MASK16_64(X, Y, A, B, C), SERIES_64 (X + 64, 
Y) 
+
+#define MASK32_32(X, Y, A, B, C) MASK(X, Y, A, B, C)
+#define MASK32_64(X, Y, A, B, C) MASK32_32(X, Y, A, B, C), SERIES_32 (X + 32, 
Y) 
+#define MASK32_128(X, Y, A, B, C) MASK32_64(X, Y, A, B, C), SERIES_64 (X + 64, 
Y) 
+
+#define MASK64_64(X, Y, A, B, C) MASK(X, Y, A, B, C)
+#define MASK64_128(X, Y, A, B, C) MASK64_64(X, Y, A, B, C), SERIES_64 (X + 64, 
Y) 
+
+#define MASK128_128(X, Y, A, B, C) MASK(X, Y, A, B, C)
+
+#include "shuffle-slidedown-perm.h"
+
+/* All cases are covered by shuffle_slide_patterns but shuffle_merge_patterns 
is
+   called first, that's why we see some vmerge here.  */
+/* { dg-final { scan-assembler-times "vslidedown" 477 } } */
+/* { dg-final { scan-assembler-times "vmerge" 164 } } */
+/* { dg-final { scan-assembler-not "vslideup" } } */
+/* { dg-final { scan-assembler-not "vrgather" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slidedown-perm.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slidedown-perm.h
new file mode 100644
index 00000000000..f031de4173c
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slidedown-perm.h
@@ -0,0 +1,107 @@
+#include "shuffle-slide.h"
+
+/* All permutations with 4 and 8 elements.  */
+#define PERM4_1(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 1, 3, 0)
+#define PERM4_2(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 1, 3, 1)
+#define PERM4_3(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 1, 3, 2)
+#define PERM4_4(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 2, 2, 0)
+#define PERM4_5(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 2, 2, 1)
+#define PERM4_6(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 3, 1, 0)
+#define PERM8_1(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 1, 7, 3)
+#define PERM8_2(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 1, 7, 4)
+#define PERM8_3(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 1, 7, 5)
+#define PERM8_4(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 1, 7, 6)
+#define PERM8_5(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 2, 6, 2)
+#define PERM8_6(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 2, 6, 3)
+#define PERM8_7(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 2, 6, 4)
+#define PERM8_8(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 2, 6, 5)
+#define PERM8_9(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 3, 5, 1)
+#define PERM8_10(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 3, 5, 2)
+#define PERM8_11(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 3, 5, 3)
+#define PERM8_12(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 3, 5, 4)
+#define PERM8_13(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 4, 4, 0)
+#define PERM8_14(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 4, 4, 1)
+#define PERM8_15(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 4, 4, 2)
+#define PERM8_16(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 4, 4, 3)
+#define PERM8_17(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 5, 3, 0)
+#define PERM8_18(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 5, 3, 1)
+#define PERM8_19(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 5, 3, 2)
+#define PERM8_20(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 6, 2, 0)
+#define PERM8_21(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 6, 2, 1)
+#define PERM8_22(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 7, 1, 0)
+
+/* We don't test all possible permutations with higher number of elements to 
avoid
+   timing out.  */
+#define PERM16_1(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 4, 12, 6)
+#define PERM16_2(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 7, 9, 4)
+#define PERM16_3(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 14, 2, 0)
+#define PERM32_1(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 2, 30, 17)
+#define PERM32_2(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 3, 29, 20)
+#define PERM32_3(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 13, 19, 18)
+#define PERM64_1(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 1, 63, 31)
+#define PERM64_2(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 25, 39, 14)
+#define PERM64_3(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 59, 5, 3)
+#define PERM128_1(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 4, 124, 73)
+#define PERM128_2(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 10, 118, 117)
+#define PERM128_3(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 22, 106, 50)
+#define PERM128_4(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 35, 93, 42)
+
+#define DO_ALL_TEST4(FUNC)      \
+  TEST_4 (FUNC, PERM4_1)        \
+  TEST_4 (FUNC, PERM4_2)        \
+  TEST_4 (FUNC, PERM4_3)        \
+  TEST_4 (FUNC, PERM4_4)        \
+  TEST_4 (FUNC, PERM4_5)        \
+  TEST_4 (FUNC, PERM4_6)
+
+#define DO_ALL_TEST8(FUNC)      \
+  TEST_8 (FUNC, PERM8_1)        \
+  TEST_8 (FUNC, PERM8_2)        \
+  TEST_8 (FUNC, PERM8_3)        \
+  TEST_8 (FUNC, PERM8_4)        \
+  TEST_8 (FUNC, PERM8_5)        \
+  TEST_8 (FUNC, PERM8_6)        \
+  TEST_8 (FUNC, PERM8_7)        \
+  TEST_8 (FUNC, PERM8_8)        \
+  TEST_8 (FUNC, PERM8_9)        \
+  TEST_8 (FUNC, PERM8_10)        \
+  TEST_8 (FUNC, PERM8_11)        \
+  TEST_8 (FUNC, PERM8_12)        \
+  TEST_8 (FUNC, PERM8_13)        \
+  TEST_8 (FUNC, PERM8_14)        \
+  TEST_8 (FUNC, PERM8_15)        \
+  TEST_8 (FUNC, PERM8_16)        \
+  TEST_8 (FUNC, PERM8_17)        \
+  TEST_8 (FUNC, PERM8_18)        \
+  TEST_8 (FUNC, PERM8_19)        \
+  TEST_8 (FUNC, PERM8_20)        \
+  TEST_8 (FUNC, PERM8_21)        \
+  TEST_8 (FUNC, PERM8_22)
+
+#define DO_ALL_TEST16(FUNC)       \
+  TEST_16 (FUNC, PERM16_1)        \
+  TEST_16 (FUNC, PERM16_2)        \
+  TEST_16 (FUNC, PERM16_3)
+
+#define DO_ALL_TEST32(FUNC)       \
+  TEST_32 (FUNC, PERM32_1)        \
+  TEST_32 (FUNC, PERM32_2)        \
+  TEST_32 (FUNC, PERM32_3)
+
+#define DO_ALL_TEST64(FUNC)       \
+  TEST_64 (FUNC, PERM64_1)        \
+  TEST_64 (FUNC, PERM64_2)        \
+  TEST_64 (FUNC, PERM64_3)
+
+#define DO_ALL_TEST128(FUNC)        \
+  TEST_128 (FUNC, PERM128_1)        \
+  TEST_128 (FUNC, PERM128_2)        \
+  TEST_128 (FUNC, PERM128_3)        \
+  TEST_128 (FUNC, PERM128_4)
+
+DO_ALL_TEST4(PERMUTE4)
+DO_ALL_TEST8(PERMUTE8)
+DO_ALL_TEST16(PERMUTE16)
+DO_ALL_TEST32(PERMUTE32)
+DO_ALL_TEST64(PERMUTE64)
+DO_ALL_TEST128(PERMUTE128)
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slideup-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slideup-1.c
new file mode 100644
index 00000000000..47a5f8692ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slideup-1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile { target { ! riscv_abi_e } } } */
+/* { dg-options "-O3 -march=rv64gcv -mrvv-max-lmul=m8 -Wno-overflow" { target 
{ rv64 } } } */
+/* { dg-options "-O3 -march=rv32gcv -mrvv-max-lmul=m8 -Wno-overflow" { target 
{ rv32 } } } */
+
+#define MASK4_4(X, Y, A, B, C) SERIES_##A (X, Y), SERIES_##B (X + Y,  Y), 
SERIES_##C (X + 4 - C, Y)
+#define MASK4_8(X, Y, A, B, C) MASK4_4(X, Y, A, B, C), SERIES_4 (X + 4, Y)
+#define MASK4_16(X, Y, A, B, C) MASK4_8(X, Y, A, B, C), SERIES_8 (X + 8, Y) 
+#define MASK4_32(X, Y, A, B, C) MASK4_16(X, Y, A, B, C), SERIES_16 (X + 16, Y) 
+#define MASK4_64(X, Y, A, B, C) MASK4_32(X, Y, A, B, C), SERIES_32 (X + 32, Y) 
+#define MASK4_128(X, Y, A, B, C) MASK4_64(X, Y, A, B, C), SERIES_64 (X + 64, 
Y) 
+
+#define MASK8_8(X, Y, A, B, C) SERIES_##A (X, Y), SERIES_##B (X + Y,  Y), 
SERIES_##C (X + 8 - C, Y)
+#define MASK8_16(X, Y, A, B, C) MASK8_8(X, Y, A, B, C), SERIES_8 (X + 8, Y) 
+#define MASK8_32(X, Y, A, B, C) MASK8_16(X, Y, A, B, C), SERIES_16 (X + 16, Y) 
+#define MASK8_64(X, Y, A, B, C) MASK8_32(X, Y, A, B, C), SERIES_32 (X + 32, Y) 
+#define MASK8_128(X, Y, A, B, C) MASK8_64(X, Y, A, B, C), SERIES_64 (X + 64, 
Y) 
+
+#define MASK16_16(X, Y, A, B, C) SERIES_##A (X, Y), SERIES_##B (X + Y,  Y), 
SERIES_##C (X + 16 - C, Y)
+#define MASK16_32(X, Y, A, B, C) MASK16_16(X, Y, A, B, C), SERIES_16 (X + 16, 
Y) 
+#define MASK16_64(X, Y, A, B, C) MASK16_32(X, Y, A, B, C), SERIES_32 (X + 32, 
Y) 
+#define MASK16_128(X, Y, A, B, C) MASK16_64(X, Y, A, B, C), SERIES_64 (X + 64, 
Y) 
+
+#define MASK32_32(X, Y, A, B, C) SERIES_##A (X, Y), SERIES_##B (X + Y,  Y), 
SERIES_##C (X + 32 - C, Y)
+#define MASK32_64(X, Y, A, B, C) MASK32_32(X, Y, A, B, C), SERIES_32 (X + 32, 
Y) 
+#define MASK32_128(X, Y, A, B, C) MASK32_64(X, Y, A, B, C), SERIES_64 (X + 64, 
Y) 
+
+#define MASK64_64(X, Y, A, B, C) SERIES_##A (X, Y), SERIES_##B (X + Y,  Y), 
SERIES_##C (X + 64 - C, Y)
+#define MASK64_128(X, Y, A, B, C) MASK64_64(X, Y, A, B, C), SERIES_64 (X + 64, 
Y) 
+
+#define MASK128_128(X, Y, A, B, C) SERIES_##A (X, Y), SERIES_##B (X + Y,  Y), 
SERIES_##C (X + 128 - C, Y)
+
+#include "shuffle-slideup-perm.h"
+
+/* { dg-final { scan-assembler-times "vslideup" 490 } } */
+/* { dg-final { scan-assembler-not "vslidedown" } } */
+/* { dg-final { scan-assembler-not "vrgather" } } */
+/* { dg-final { scan-assembler-not "vmerge" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slideup-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slideup-2.c
new file mode 100644
index 00000000000..cc82dd185ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slideup-2.c
@@ -0,0 +1,37 @@
+/* { dg-do compile { target { ! riscv_abi_e } } } */
+/* { dg-options "-O3 -march=rv64gcv -mrvv-max-lmul=m8 -Wno-overflow" { target 
{ rv64 } } } */
+/* { dg-options "-O3 -march=rv32gcv -mrvv-max-lmul=m8 -Wno-overflow" { target 
{ rv32 } } } */
+
+#define MASK4_4(X, Y, A, B, C) SERIES_##A (X + Y, Y), SERIES_##B (X,  Y), 
SERIES_##C (X + Y + 4 - C, Y)
+#define MASK4_8(X, Y, A, B, C) MASK4_4(X, Y, A, B, C), SERIES_4 (X + Y + 4, Y)
+#define MASK4_16(X, Y, A, B, C) MASK4_8(X, Y, A, B, C), SERIES_8 (X + Y + 8, 
Y) 
+#define MASK4_32(X, Y, A, B, C) MASK4_16(X, Y, A, B, C), SERIES_16 (X + Y + 
16, Y) 
+#define MASK4_64(X, Y, A, B, C) MASK4_32(X, Y, A, B, C), SERIES_32 (X + Y + 
32, Y) 
+#define MASK4_128(X, Y, A, B, C) MASK4_64(X, Y, A, B, C), SERIES_64 (X + Y + 
64, Y) 
+
+#define MASK8_8(X, Y, A, B, C) SERIES_##A (X + Y, Y), SERIES_##B (X,  Y), 
SERIES_##C (X + Y + 8 - C, Y)
+#define MASK8_16(X, Y, A, B, C) MASK8_8(X, Y, A, B, C), SERIES_8 (X + Y + 8, 
Y) 
+#define MASK8_32(X, Y, A, B, C) MASK8_16(X, Y, A, B, C), SERIES_16 (X + Y + 
16, Y) 
+#define MASK8_64(X, Y, A, B, C) MASK8_32(X, Y, A, B, C), SERIES_32 (X + Y + 
32, Y) 
+#define MASK8_128(X, Y, A, B, C) MASK8_64(X, Y, A, B, C), SERIES_64 (X + Y + 
64, Y) 
+
+#define MASK16_16(X, Y, A, B, C) SERIES_##A (X + Y, Y), SERIES_##B (X,  Y), 
SERIES_##C (X + Y + 16 - C, Y)
+#define MASK16_32(X, Y, A, B, C) MASK16_16(X, Y, A, B, C), SERIES_16 (X + Y + 
16, Y) 
+#define MASK16_64(X, Y, A, B, C) MASK16_32(X, Y, A, B, C), SERIES_32 (X + Y + 
32, Y) 
+#define MASK16_128(X, Y, A, B, C) MASK16_64(X, Y, A, B, C), SERIES_64 (X + Y + 
64, Y) 
+
+#define MASK32_32(X, Y, A, B, C) SERIES_##A (X + Y, Y), SERIES_##B (X,  Y), 
SERIES_##C (X + Y + 32 - C, Y)
+#define MASK32_64(X, Y, A, B, C) MASK32_32(X, Y, A, B, C), SERIES_32 (X + Y + 
32, Y) 
+#define MASK32_128(X, Y, A, B, C) MASK32_64(X, Y, A, B, C), SERIES_64 (X + Y + 
64, Y) 
+
+#define MASK64_64(X, Y, A, B, C) SERIES_##A (X + Y, Y), SERIES_##B (X,  Y), 
SERIES_##C (X + Y + 64 - C, Y)
+#define MASK64_128(X, Y, A, B, C) MASK64_64(X, Y, A, B, C), SERIES_64 (X + Y + 
64, Y) 
+
+#define MASK128_128(X, Y, A, B, C) SERIES_##A (X + Y, Y), SERIES_##B (X,  Y), 
SERIES_##C (X + Y + 128 - C, Y)
+
+#include "shuffle-slideup-perm.h"
+
+/* { dg-final { scan-assembler-times "vslideup" 490 } } */
+/* { dg-final { scan-assembler-not "vslidedown" } } */
+/* { dg-final { scan-assembler-not "vrgather" } } */
+/* { dg-final { scan-assembler-not "vmerge" } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slideup-perm.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slideup-perm.h
new file mode 100644
index 00000000000..907793f5e1c
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slideup-perm.h
@@ -0,0 +1,93 @@
+#include "shuffle-slide.h"
+
+/* All permutations with 4 and 8 elements.  */
+#define PERM4_1(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 1, 1, 2)
+#define PERM4_2(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 1, 2, 1)
+#define PERM4_3(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 2, 1, 1)
+#define PERM8_1(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 1, 3, 4)
+#define PERM8_2(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 1, 4, 3)
+#define PERM8_3(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 1, 5, 2)
+#define PERM8_4(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 1, 6, 1)
+#define PERM8_5(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 2, 2, 4)
+#define PERM8_6(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 2, 3, 3)
+#define PERM8_7(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 2, 4, 2)
+#define PERM8_8(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 2, 5, 1)
+#define PERM8_9(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 3, 1, 4)
+#define PERM8_10(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 3, 2, 3)
+#define PERM8_11(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 3, 3, 2)
+#define PERM8_12(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 3, 4, 1)
+#define PERM8_13(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 4, 1, 3)
+#define PERM8_14(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 4, 2, 2)
+#define PERM8_15(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 4, 3, 1)
+#define PERM8_16(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 5, 1, 2)
+#define PERM8_17(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 5, 2, 1)
+#define PERM8_18(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 6, 1, 1)
+
+/* We don't test all possible permutations with higher number of elements to 
avoid
+   timing out.  */
+#define PERM16_1(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 1, 13, 2)
+#define PERM16_2(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 4, 9, 3)
+#define PERM16_3(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 11, 4, 1)
+#define PERM32_1(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 4, 27, 1)
+#define PERM32_2(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 6, 19, 7)
+#define PERM32_3(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 20, 4, 8)
+#define PERM64_1(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 2, 37, 25)
+#define PERM64_2(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 6, 29, 29)
+#define PERM64_3(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 34, 10, 20)
+#define PERM128_1(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 2, 68, 58)
+#define PERM128_2(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 32, 45, 51)
+#define PERM128_3(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 60, 63, 5)
+#define PERM128_4(TYPE, NUNITS, FUNC) FUNC(TYPE, NUNITS, 81, 7, 40)
+
+#define DO_ALL_TEST4(FUNC)  \
+  TEST_4 (FUNC, PERM4_1)    \
+  TEST_4 (FUNC, PERM4_2)    \
+  TEST_4 (FUNC, PERM4_3)
+
+#define DO_ALL_TEST8(FUNC)      \
+  TEST_8 (FUNC, PERM8_1)        \
+  TEST_8 (FUNC, PERM8_2)        \
+  TEST_8 (FUNC, PERM8_3)        \
+  TEST_8 (FUNC, PERM8_4)        \
+  TEST_8 (FUNC, PERM8_5)        \
+  TEST_8 (FUNC, PERM8_6)        \
+  TEST_8 (FUNC, PERM8_7)        \
+  TEST_8 (FUNC, PERM8_8)        \
+  TEST_8 (FUNC, PERM8_9)        \
+  TEST_8 (FUNC, PERM8_10)        \
+  TEST_8 (FUNC, PERM8_11)        \
+  TEST_8 (FUNC, PERM8_12)        \
+  TEST_8 (FUNC, PERM8_13)        \
+  TEST_8 (FUNC, PERM8_14)        \
+  TEST_8 (FUNC, PERM8_15)        \
+  TEST_8 (FUNC, PERM8_16)        \
+  TEST_8 (FUNC, PERM8_17)        \
+  TEST_8 (FUNC, PERM8_18)
+
+#define DO_ALL_TEST16(FUNC)      \
+  TEST_16 (FUNC, PERM16_1)        \
+  TEST_16 (FUNC, PERM16_2)        \
+  TEST_16 (FUNC, PERM16_3)
+
+#define DO_ALL_TEST32(FUNC)       \
+  TEST_32 (FUNC, PERM32_1)        \
+  TEST_32 (FUNC, PERM32_2)        \
+  TEST_32 (FUNC, PERM32_3)
+
+#define DO_ALL_TEST64(FUNC)       \
+  TEST_64 (FUNC, PERM64_1)        \
+  TEST_64 (FUNC, PERM64_2)        \
+  TEST_64 (FUNC, PERM64_3)
+
+#define DO_ALL_TEST128(FUNC)        \
+  TEST_128 (FUNC, PERM128_1)        \
+  TEST_128 (FUNC, PERM128_2)        \
+  TEST_128 (FUNC, PERM128_3)        \
+  TEST_128 (FUNC, PERM128_4)
+
+DO_ALL_TEST4(PERMUTE4)
+DO_ALL_TEST8(PERMUTE8)
+DO_ALL_TEST16(PERMUTE16)
+DO_ALL_TEST32(PERMUTE32)
+DO_ALL_TEST64(PERMUTE64)
+DO_ALL_TEST128(PERMUTE128)
-- 
2.51.0

[PATCH v2] RISC-V: Improve slide patterns recognition

Reply via email to