https://gcc.gnu.org/g:62631c39a788161ff2f686adf355d10443e0d899

commit r16-4271-g62631c39a788161ff2f686adf355d10443e0d899
Author: Robin Dapp <[email protected]>
Date:   Tue Oct 7 07:18:27 2025 -0600

    [PATCH] RISC-V: Detect wrap in shuffle_series_pattern [PR121845].
    
    Hi,
    
    In shuffle_series_pattern we use series_p to determine if the permute
    mask is a simple series.  This didn't take into account that series_p
    also returns true for e.g. {0, 3, 2, 1} where the step is 3 and the
    indices form a series modulo 4.
    
    We emit
     vid + vmul
    in order to synthesize a series.  In order to be always correct we would
    need a vrem afterwards still which does not seem worth it.
    
    This patch adds the modulo for VLA permutes and punts if we wrap around
    for VLS permutes.  I'm not really certain whether we'll really see a 
wrapping
    VLA series (certainly we haven't so far in the test suite) but as we 
observed
    a VLS one here now it appears conservatively correct to module the indices.
    
    Regtested on rv64gcv_zvl512b.
    
    Regards
     Robin
    
            PR target/121845
    
    gcc/ChangeLog:
    
            * config/riscv/riscv-v.cc (shuffle_series_patterns):
            Modulo indices for VLA and punt when wrapping for VLS.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/rvv/autovec/pr121845.c: New test.

Diff:
---
 gcc/config/riscv/riscv-v.cc                        | 38 +++++++++++++++++++++-
 .../gcc.target/riscv/rvv/autovec/pr121845.c        | 37 +++++++++++++++++++++
 2 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index ec713eea263b..70f02fd01537 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4230,6 +4230,9 @@ shuffle_series_patterns (struct expand_vec_perm_d *d)
   bool need_insert = false;
   bool have_series = false;
 
+  poly_int64 len = d->perm.length ();
+  bool need_modulo = !len.is_constant ();
+
   /* Check for a full series.  */
   if (known_ne (step1, 0) && d->perm.series_p (0, 1, el1, step1))
     have_series = true;
@@ -4241,7 +4244,33 @@ shuffle_series_patterns (struct expand_vec_perm_d *d)
       need_insert = true;
     }
 
-  if (!have_series)
+  /* A permute like {0, 3, 2, 1} is recognized as series because series_p also
+     allows wrapping/modulo of the permute index.  The step would be 3 and the
+     indices are correct modulo 4.  As noted in expand_vec_perm vrgather does
+     not handle wrapping but rather zeros out-of-bounds indices.
+     This means we would need to emit an explicit modulo operation here which
+     does not seem worth it.  We rather defer to the generic handling instead.
+     Even in the non-wrapping case it is doubtful whether
+      vid
+      vmul
+      vrgather
+     is preferable over
+      vle
+      vrgather.
+     If the permute mask can be reused there shouldn't be any difference and
+     otherwise it becomes a question of load bandwidth.  */
+  if (have_series && len.is_constant ())
+    {
+      int64_t step = need_insert ? step2.to_constant () : step1.to_constant ();
+      int prec = GET_MODE_PRECISION (GET_MODE_INNER (d->vmode));
+      wide_int wlen = wide_int::from (len.to_constant (), prec * 2, SIGNED);
+      wide_int wstep = wide_int::from (step, prec * 2, SIGNED);
+      wide_int result = wi::mul (wlen, wstep);
+      if (wi::gt_p (result, wlen, SIGNED))
+       need_modulo = true;
+    }
+
+  if (!have_series || (len.is_constant () && need_modulo))
     return false;
 
   /* Disable shuffle if we can't find an appropriate integer index mode for
@@ -4260,6 +4289,13 @@ shuffle_series_patterns (struct expand_vec_perm_d *d)
   expand_vec_series (series, gen_int_mode (need_insert ? el2 : el1, eltmode),
                     gen_int_mode (need_insert ? step2 : step1, eltmode));
 
+  if (need_modulo)
+    {
+      rtx mod = gen_const_vector_dup (sel_mode, len - 1);
+      series = expand_simple_binop (sel_mode, AND, series, mod, NULL,
+                                   0, OPTAB_DIRECT);
+    }
+
   /* Insert the remaining element if necessary.  */
   if (need_insert)
     {
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121845.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121845.c
new file mode 100644
index 000000000000..84aca3cd8e74
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121845.c
@@ -0,0 +1,37 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O0" } */
+
+#include <stdint.h>
+typedef uint32_t a;
+typedef uint64_t uint64;
+
+uint64 b;
+__attribute__ ((__vector_size__ (4 * sizeof (a)))) a f = {504339, 7, 3};
+uint64 *g = &b;
+
+int32_t *
+c (uint8_t, int32_t *, uint32_t, uint32_t, int64_t);
+int8_t
+d ()
+{
+  int32_t e;
+  c (0, &e, 0, 0, 1);
+  return 0;
+}
+
+int32_t *
+c (uint8_t, int32_t *j, uint32_t, uint32_t, int64_t)
+{
+  f = __builtin_shufflevector (f, f, 0, 3, 2, 1);
+  *g = f[2];
+  return j;
+}
+
+int
+main ()
+{
+  d ();
+  if (b != 3)
+    __builtin_abort ();
+}

Reply via email to