[gcc r16-6297] RISC-V: Generic vec_extract via subreg.

Robin Dapp via Gcc-cvs Fri, 19 Dec 2025 10:44:15 -0800

https://gcc.gnu.org/g:92edf176fff176381646db7abe8bfd2dfbb83cb6


commit r16-6297-g92edf176fff176381646db7abe8bfd2dfbb83cb6
Author: Robin Dapp <[email protected]>
Date:   Thu Nov 13 09:23:40 2025 +0100

    RISC-V: Generic vec_extract via subreg.
    
    We are missing several vec_extract chances because the current autovec
    patterns are not comprehensive.  In particular we don't extract from
    pseudo-VLA modes that are actually VLS modes (just VLA modes in name).
    
    Rather than add even more mode combinations to vec_extract, this patch
    uses a dynamic approach in legitimize_move.  At that point we can just check
    if the mode sizes make sense and then emit the same code as before.
    
    This is not the ideal solution as the middle-end and the vectorizer in
    particular queries the vec_extract optab for support and won't emit
    certain code sequences if it's not present (e.g. in VMAT_STRIDED_SLP
    or when trying intermediate-sized vectors in a chain).
    For simple BIT_FIELD_REFs it works, though.
    
    gcc/ChangeLog:
    
            * config/riscv/riscv-v.cc (expand_vector_subreg_extract): New
            function that checks for and performs "vector extracts".
            (legitimize_move): Call new function.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/rvv/autovec/subreg-extract.c: New test.

Diff:
---
 gcc/config/riscv/riscv-v.cc                        | 88 ++++++++++++++++++++++
 .../gcc.target/riscv/rvv/autovec/subreg-extract.c  | 19 +++++
 2 files changed, 107 insertions(+)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index e519211d6919..321b5172783d 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1880,6 +1880,76 @@ get_frm_mode (rtx operand)
   gcc_unreachable ();
 }
 
+/* Expand vector extraction from a SUBREG source using slidedown.
+   Handles patterns like (subreg:V4DI (reg:V8DI) 32) by emitting
+   a slidedown instruction when extracting non-low parts.
+   Return true if the move was handled and emitted.  */
+static bool
+expand_vector_subreg_extract (rtx dest, rtx src)
+{
+  gcc_assert (SUBREG_P (src) && REG_P (SUBREG_REG (src)));
+
+  machine_mode mode = GET_MODE (dest);
+  machine_mode inner_mode = GET_MODE (SUBREG_REG (src));
+
+  gcc_assert (VECTOR_MODE_P (mode));
+  gcc_assert (VECTOR_MODE_P (inner_mode));
+
+  poly_uint16 outer_size = GET_MODE_BITSIZE (mode);
+  poly_uint16 inner_size = GET_MODE_BITSIZE (inner_mode);
+
+  poly_uint16 factor;
+  if (riscv_tuple_mode_p (inner_mode)
+      || !multiple_p (inner_size, outer_size, &factor)
+      || !factor.is_constant ()
+      || !pow2p_hwi (factor.to_constant ())
+      || factor.to_constant () <= 1)
+    return false;
+
+  enum vlmul_type lmul = get_vlmul (mode);
+  enum vlmul_type inner_lmul = get_vlmul (inner_mode);
+
+  /* These are just "renames".  */
+  if ((inner_lmul == LMUL_2 || inner_lmul == LMUL_4 || inner_lmul == LMUL_8)
+      && (lmul == LMUL_1 || lmul == LMUL_2 || lmul == LMUL_4))
+    return false;
+
+  poly_uint64 outer_nunits = GET_MODE_NUNITS (mode);
+  poly_uint64 subreg_byte = SUBREG_BYTE (src);
+
+  /* Calculate which part we're extracting (0 for low half, 1 for
+     higher half/quarter, etc.)  */
+  uint64_t part;
+  if (!exact_div (subreg_byte * BITS_PER_UNIT, outer_size).is_constant (&part))
+    return false;
+
+  rtx inner_reg = SUBREG_REG (src);
+  rtx tmp_out = gen_reg_rtx (mode);
+
+  if (part == 0)
+    {
+      /* Emit a direct reg-reg set here instead of emit_move_insn as that
+        would trigger another legitimize_move.  */
+      emit_insn (gen_rtx_SET (tmp_out, gen_lowpart (mode, inner_reg)));
+    }
+  else
+    {
+      /* Extracting a non-zero part means we need to slide down.  */
+      poly_uint64 slide_count = part * outer_nunits;
+
+      rtx tmp = gen_reg_rtx (inner_mode);
+      rtx ops[] = {tmp, inner_reg, gen_int_mode (slide_count, Pmode)};
+      insn_code icode = code_for_pred_slide (UNSPEC_VSLIDEDOWN, inner_mode);
+      emit_vlmax_insn (icode, BINARY_OP, ops);
+
+      /* Extract the low part after sliding.  */
+      emit_insn (gen_rtx_SET (tmp_out, gen_lowpart (mode, tmp)));
+    }
+
+  emit_move_insn (dest, tmp_out);
+  return true;
+}
+
 /* Expand a pre-RA RVV data move from SRC to DEST.
    It expands move for RVV fractional vector modes.
    Return true if the move as already been emitted.  */
@@ -1894,6 +1964,24 @@ legitimize_move (rtx dest, rtx *srcp)
       return true;
     }
 
+  /* The canonical way of extracting vectors from vectors is the vec_extract
+     optab with appropriate source and dest modes.  This is rather a VLS style
+     approach, though as we would need to enumerate all dest modes that are
+     half, quarter, etc. the size of the source.  It becomes particularly
+     cumbersome if we have a mix of VLA and VLS, i.e. extracting a smaller
+     VLS vector from a "VLA" vector.  Therefore we recognize patterns like
+       (set reg:V4DI
+         (subreg:V4DI (reg:V8DI) offset))
+     and transform them into vector slidedowns.  */
+  if (SUBREG_P (src) && REG_P (SUBREG_REG (src))
+      && VECTOR_MODE_P (GET_MODE (SUBREG_REG (src)))
+      && VECTOR_MODE_P (mode)
+      && !lra_in_progress)
+    {
+      if (expand_vector_subreg_extract (dest, src))
+       return true;
+    }
+
   if (riscv_vls_mode_p (mode))
     {
       if (GET_MODE_NUNITS (mode).to_constant () <= 31)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/subreg-extract.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/subreg-extract.c
new file mode 100644
index 000000000000..a2b568a0ee74
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/subreg-extract.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv_zvl256b -mabi=lp64d -mrvv-vector-bits=zvl 
-fno-vect-cost-model" } */
+
+int a[35] = { 1, 1, 3 };
+
+void
+foo ()
+{
+  for (int b = 4; b >= 0; b--)
+    {
+      int tem = a[b * 5 + 3 + 1];
+      a[b * 5 + 3] = tem;
+      a[b * 5 + 2] = tem;
+      a[b * 5 + 1] = tem;
+      a[b * 5 + 0] = tem;
+    }
+}
+
+/* { dg-final { scan-assembler-times "vslidedown" 2 } } */

[gcc r16-6297] RISC-V: Generic vec_extract via subreg.

Reply via email to