On Tue, Dec 2, 2025 at 10:32 PM Robin Dapp <[email protected]> wrote:
>
> Similar to vec_extract this implements vec_sets that are present as
> subreg inserts. Similar to a single element we can just slide up
> a vector with the TU policy.
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-v.cc (expand_vector_subreg_insert):
> New function.
> (legitimize_move): Use new function.
> ---
> gcc/config/riscv/riscv-v.cc | 87 +++++++++++++++++++++++++++++++++++++
> 1 file changed, 87 insertions(+)
>
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 1b22f9e948e..835947892e9 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -1944,6 +1944,80 @@ expand_vector_subreg_extract (rtx dest, rtx src)
> return true;
> }
>
> +/* Expand vector insertion into a SUBREG destination using slideup.
> + Handles patterns like (subreg:V4DI (reg:V8DI) 32) by emitting
> + a slideup instruction when inserting into non-low parts.
> + Return true if the move was handled and emitted. */
> +static bool
> +expand_vector_subreg_insert (rtx dest, rtx src)
> +{
> + gcc_assert (SUBREG_P (dest) && REG_P (SUBREG_REG (dest)) && REG_P (src));
> +
> + machine_mode mode = GET_MODE (src);
> + machine_mode inner_mode = GET_MODE (SUBREG_REG (dest));
> +
> + gcc_assert (VECTOR_MODE_P (mode));
> + gcc_assert (VECTOR_MODE_P (inner_mode));
> +
> + poly_uint16 outer_size = GET_MODE_BITSIZE (mode);
> + poly_uint16 inner_size = GET_MODE_BITSIZE (inner_mode);
> +
> + poly_uint16 factor;
> + if (riscv_tuple_mode_p (inner_mode)
> + || !multiple_p (inner_size, outer_size, &factor)
> + || !factor.is_constant ()
> + || !pow2p_hwi (factor.to_constant ())
> + || factor.to_constant () <= 1)
> + return false;
> +
> + enum vlmul_type lmul = get_vlmul (mode);
> + enum vlmul_type inner_lmul = get_vlmul (inner_mode);
> +
> + /* These are just "renames". */
> + if ((inner_lmul == LMUL_2 || inner_lmul == LMUL_4 || inner_lmul == LMUL_8)
> + && (lmul == LMUL_1 || lmul == LMUL_2 || lmul == LMUL_4))
Should we also check SUBREG_BYTE (dest) == 0 here?
> + {
> + /* Inserting into a non-zero part means we need to slide up. */
> + poly_uint64 slide_count = part * outer_nunits;
> +
> + /* First, broadcast the source value into a temporary vector. */
> + rtx tmp = gen_reg_rtx (inner_mode);
> +
> + /* Create a vector with src in the low part. */
> + rtx low_tmp = gen_lowpart (mode, tmp);
> + emit_insn (gen_rtx_SET (low_tmp, src));
> +
> + /* Slide it up to the correct position in inner_reg.
> + Use TUMA (tail-undisturbed, mask-undisturbed) to preserve
typo here: s/mask-undisturbed/mask-agnostic/