On Wed, 1 Jul 2026, Tamar Christina wrote:
> Currently vec_init does not support VLA vec_init and we instead fall back to
> storing piecewise through memory.
>
> However there's no defined semantics for this. This patch adds the semantics
> that for VLA constructors the vector has to be cleared with zero before
> piecewise being constructed from scalar elements. This means unspecified
> elements are initialized to zero.
>
> Without this patch
>
> #include <arm_sve.h>
>
> svint32_t __attribute__ ((noipa))
> func_init4 (int32_t a, int32_t b, int32_t c)
> {
> svint32_t temp = {a, b, c};
> return temp;
> }
>
> compiles to:
>
> func_init4:
> addvl sp, sp, #-3
> movi d30, #0
> str z30, [sp, #2, mul vl]
> addvl x3, sp, #2
> str w0, [x3]
> addvl x0, sp, #1
> add x0, x0, 4
> ldr z31, [sp, #2, mul vl]
> str z31, [sp, #1, mul vl]
> str w1, [x0]
> ldr z31, [sp, #1, mul vl]
> str z31, [sp]
> str w2, [sp, 8]
> ldr z0, [sp]
> addvl sp, sp, #3
> ret
>
> and with the patch
>
> func_init4:
> fmov s0, w2
> fmov s0, s0
> insr z0.s, w1
> insr z0.s, w0
> ret
>
> note that this is still not optimal as the
>
> fmov s0, s0
>
> that's doing the zero-ing of the vector is not actually needed since the
> transfer instruction
>
> fmov s0, w2
>
> already zeros the destination SVE register. But this is an AArch64 deficiency
> that will be dealt with in the backend.
>
> the optimal codegen here is:
>
> func_init4:
> orr x1, x1, x2, lsl 32
> fmov d0, x1
> insr z0.s, w0
> ret
>
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> -m32, -m64 and no issues.
>
> Ok for master?
OK.
Thanks,
Richard.
> Thanks,
> Tamar
>
> Co-Authored-By: Chris Bazley <[email protected]>
>
> gcc/ChangeLog:
>
> * expr.cc (store_constructor): Handle VLA vec_init support and generic
> fall through piecewise copy.
> * doc/md.texi: Document change
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/sve/copsi.c: New test.
>
> ---
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index
> 1ef748796f5d0de63127b86c9903c9b12420bebf..be40cc695e071babe1928b555a11fd67af0d331b
> 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -7552,7 +7552,9 @@ Initialize the vector to given values. Operand 0 is
> the vector to initialize
> and operand 1 is parallel containing values for individual fields. The
> @var{n} mode is the mode of the elements, should be either element mode of
> the vector mode @var{m}, or a vector mode with the same element mode and
> -smaller number of elements.
> +smaller number of elements. If @var{m} specifies a scalable vector mode,
> +then operand 1 only specifies the minimum number of elements implied
> +by @var{m} and elements beyond are zero initialized.
>
> @mdindex vec_duplicate@var{m}
> @item @samp{vec_duplicate@var{m}}
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index
> de73215ccc6623fa90f4a90212fd8dc7c50991a9..31edad218e4afd6481535034a039eaebcb7de6b1
> 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -7498,11 +7498,14 @@ fields_length (const_tree type)
> return count;
> }
>
> -
> /* Store the value of constructor EXP into the rtx TARGET.
> TARGET is either a REG or a MEM; we know it cannot conflict, since
> safe_from_p has been called.
> CLEARED is true if TARGET is known to have been zero'd.
> + If the constructor EXP has a vector type then elements of TARGET for which
> + there is no corresponding element in EXP are zero'd. For a
> variable-length
> + vector type, only elements up to the minimum number of subparts of the
> type
> + are explicitly zero'd; any elements beyond that are implicitly zero.
> SIZE is the number of bytes of TARGET we are allowed to modify: this
> may not be the same as the size of EXP if we are assigning to a field
> which has been packed to exclude padding bits.
> @@ -8075,14 +8078,22 @@ store_constructor (tree exp, rtx target, int cleared,
> poly_int64 size,
> similarly non-const type vectors. */
> icode = convert_optab_handler (vec_init_optab, mode, eltmode);
> }
> + else
> + {
> + /* Handle variable-length vector types. */
> + icode = convert_optab_handler (vec_init_optab, mode, eltmode);
> + const_n_elts = constant_lower_bound (n_elts);
> + }
>
> - if (const_n_elts && icode != CODE_FOR_nothing)
> - {
> - vector = rtvec_alloc (const_n_elts);
> - for (unsigned int k = 0; k < const_n_elts; k++)
> - RTVEC_ELT (vector, k) = CONST0_RTX (eltmode);
> - }
> + if (const_n_elts && icode != CODE_FOR_nothing)
> + {
> + vector = rtvec_alloc (const_n_elts);
> + for (unsigned int k = 0; k < const_n_elts; k++)
> + RTVEC_ELT (vector, k) = CONST0_RTX (eltmode);
> + }
> }
> + else
> + gcc_assert (n_elts.is_constant ());
>
> /* Compute the size of the elements in the CTOR. It differs
> from the size of the vector type elements only when the
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/copsi.c
> b/gcc/testsuite/gcc.target/aarch64/sve/copsi.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..d85403640b9ab894b378e741013eb27b76a7e19a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/copsi.c
> @@ -0,0 +1,33 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +#include <arm_sve.h>
> +
> +/*
> +** func_init4:
> +** mov z0\.d, x1
> +** insr z0\.d, x0
> +** ret
> +*/
> +svint64_t __attribute__ ((noipa))
> +func_init4 (int64_t a, int64_t b)
> +{
> + svint64_t temp = { a, b };
> + return temp;
> +}
> +
> +/*
> +** func_init3:
> +** fmov s0, w2
> +** fmov s0, s0
> +** insr z0\.s, w1
> +** insr z0\.s, w0
> +** ret
> +*/
> +svint32_t __attribute__ ((noipa))
> +func_init3 (int32_t a, int32_t b, int32_t c)
> +{
> + svint32_t temp = { a, b, c };
> + return temp;
> +}
>
>
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Jochen Jaser, Andrew McDonald; (HRB 36809, AG Nuernberg)