https://gcc.gnu.org/g:92edf176fff176381646db7abe8bfd2dfbb83cb6
commit r16-6297-g92edf176fff176381646db7abe8bfd2dfbb83cb6 Author: Robin Dapp <[email protected]> Date: Thu Nov 13 09:23:40 2025 +0100 RISC-V: Generic vec_extract via subreg. We are missing several vec_extract chances because the current autovec patterns are not comprehensive. In particular we don't extract from pseudo-VLA modes that are actually VLS modes (just VLA modes in name). Rather than add even more mode combinations to vec_extract, this patch uses a dynamic approach in legitimize_move. At that point we can just check if the mode sizes make sense and then emit the same code as before. This is not the ideal solution as the middle-end and the vectorizer in particular queries the vec_extract optab for support and won't emit certain code sequences if it's not present (e.g. in VMAT_STRIDED_SLP or when trying intermediate-sized vectors in a chain). For simple BIT_FIELD_REFs it works, though. gcc/ChangeLog: * config/riscv/riscv-v.cc (expand_vector_subreg_extract): New function that checks for and performs "vector extracts". (legitimize_move): Call new function. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/subreg-extract.c: New test. Diff: --- gcc/config/riscv/riscv-v.cc | 88 ++++++++++++++++++++++ .../gcc.target/riscv/rvv/autovec/subreg-extract.c | 19 +++++ 2 files changed, 107 insertions(+) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index e519211d6919..321b5172783d 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -1880,6 +1880,76 @@ get_frm_mode (rtx operand) gcc_unreachable (); } +/* Expand vector extraction from a SUBREG source using slidedown. + Handles patterns like (subreg:V4DI (reg:V8DI) 32) by emitting + a slidedown instruction when extracting non-low parts. + Return true if the move was handled and emitted. */ +static bool +expand_vector_subreg_extract (rtx dest, rtx src) +{ + gcc_assert (SUBREG_P (src) && REG_P (SUBREG_REG (src))); + + machine_mode mode = GET_MODE (dest); + machine_mode inner_mode = GET_MODE (SUBREG_REG (src)); + + gcc_assert (VECTOR_MODE_P (mode)); + gcc_assert (VECTOR_MODE_P (inner_mode)); + + poly_uint16 outer_size = GET_MODE_BITSIZE (mode); + poly_uint16 inner_size = GET_MODE_BITSIZE (inner_mode); + + poly_uint16 factor; + if (riscv_tuple_mode_p (inner_mode) + || !multiple_p (inner_size, outer_size, &factor) + || !factor.is_constant () + || !pow2p_hwi (factor.to_constant ()) + || factor.to_constant () <= 1) + return false; + + enum vlmul_type lmul = get_vlmul (mode); + enum vlmul_type inner_lmul = get_vlmul (inner_mode); + + /* These are just "renames". */ + if ((inner_lmul == LMUL_2 || inner_lmul == LMUL_4 || inner_lmul == LMUL_8) + && (lmul == LMUL_1 || lmul == LMUL_2 || lmul == LMUL_4)) + return false; + + poly_uint64 outer_nunits = GET_MODE_NUNITS (mode); + poly_uint64 subreg_byte = SUBREG_BYTE (src); + + /* Calculate which part we're extracting (0 for low half, 1 for + higher half/quarter, etc.) */ + uint64_t part; + if (!exact_div (subreg_byte * BITS_PER_UNIT, outer_size).is_constant (&part)) + return false; + + rtx inner_reg = SUBREG_REG (src); + rtx tmp_out = gen_reg_rtx (mode); + + if (part == 0) + { + /* Emit a direct reg-reg set here instead of emit_move_insn as that + would trigger another legitimize_move. */ + emit_insn (gen_rtx_SET (tmp_out, gen_lowpart (mode, inner_reg))); + } + else + { + /* Extracting a non-zero part means we need to slide down. */ + poly_uint64 slide_count = part * outer_nunits; + + rtx tmp = gen_reg_rtx (inner_mode); + rtx ops[] = {tmp, inner_reg, gen_int_mode (slide_count, Pmode)}; + insn_code icode = code_for_pred_slide (UNSPEC_VSLIDEDOWN, inner_mode); + emit_vlmax_insn (icode, BINARY_OP, ops); + + /* Extract the low part after sliding. */ + emit_insn (gen_rtx_SET (tmp_out, gen_lowpart (mode, tmp))); + } + + emit_move_insn (dest, tmp_out); + return true; +} + /* Expand a pre-RA RVV data move from SRC to DEST. It expands move for RVV fractional vector modes. Return true if the move as already been emitted. */ @@ -1894,6 +1964,24 @@ legitimize_move (rtx dest, rtx *srcp) return true; } + /* The canonical way of extracting vectors from vectors is the vec_extract + optab with appropriate source and dest modes. This is rather a VLS style + approach, though as we would need to enumerate all dest modes that are + half, quarter, etc. the size of the source. It becomes particularly + cumbersome if we have a mix of VLA and VLS, i.e. extracting a smaller + VLS vector from a "VLA" vector. Therefore we recognize patterns like + (set reg:V4DI + (subreg:V4DI (reg:V8DI) offset)) + and transform them into vector slidedowns. */ + if (SUBREG_P (src) && REG_P (SUBREG_REG (src)) + && VECTOR_MODE_P (GET_MODE (SUBREG_REG (src))) + && VECTOR_MODE_P (mode) + && !lra_in_progress) + { + if (expand_vector_subreg_extract (dest, src)) + return true; + } + if (riscv_vls_mode_p (mode)) { if (GET_MODE_NUNITS (mode).to_constant () <= 31) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/subreg-extract.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/subreg-extract.c new file mode 100644 index 000000000000..a2b568a0ee74 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/subreg-extract.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=rv64gcv_zvl256b -mabi=lp64d -mrvv-vector-bits=zvl -fno-vect-cost-model" } */ + +int a[35] = { 1, 1, 3 }; + +void +foo () +{ + for (int b = 4; b >= 0; b--) + { + int tem = a[b * 5 + 3 + 1]; + a[b * 5 + 3] = tem; + a[b * 5 + 2] = tem; + a[b * 5 + 1] = tem; + a[b * 5 + 0] = tem; + } +} + +/* { dg-final { scan-assembler-times "vslidedown" 2 } } */
