On Wed, 1 Jul 2026, Tamar Christina wrote:

> Currently vec_init does not support VLA vec_init and we instead fall back to
> storing piecewise through memory.
> 
> However there's no defined semantics for this.  This patch adds the semantics
> that for VLA constructors the vector has to be cleared with zero before
> piecewise being constructed from scalar elements.  This means unspecified
> elements are initialized to zero.
> 
> Without this patch
> 
> #include <arm_sve.h>
> 
> svint32_t __attribute__ ((noipa))
> func_init4 (int32_t a, int32_t b, int32_t c)
> {
>   svint32_t temp = {a, b, c};
>   return temp;
> }
> 
> compiles to:
> 
> func_init4:
>         addvl   sp, sp, #-3
>         movi    d30, #0
>         str     z30, [sp, #2, mul vl]
>         addvl   x3, sp, #2
>         str     w0, [x3]
>         addvl   x0, sp, #1
>         add     x0, x0, 4
>         ldr     z31, [sp, #2, mul vl]
>         str     z31, [sp, #1, mul vl]
>         str     w1, [x0]
>         ldr     z31, [sp, #1, mul vl]
>         str     z31, [sp]
>         str     w2, [sp, 8]
>         ldr     z0, [sp]
>         addvl   sp, sp, #3
>         ret
> 
> and with the patch
> 
> func_init4:
>         fmov    s0, w2
>         fmov    s0, s0
>         insr    z0.s, w1
>         insr    z0.s, w0
>         ret
> 
> note that this is still not optimal as the
> 
>         fmov    s0, s0
> 
> that's doing the zero-ing of the vector is not actually needed since the
> transfer instruction
> 
>         fmov    s0, w2
> 
> already zeros the destination SVE register.  But this is an AArch64 deficiency
> that will be dealt with in the backend.
> 
> the optimal codegen here is:
> 
>  func_init4:
>         orr     x1, x1, x2, lsl 32
>         fmov    d0, x1
>         insr    z0.s, w0
>         ret
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> -m32, -m64 and no issues.
> 
> Ok for master?

OK.

Thanks,
Richard.

> Thanks,
> Tamar
> 
> Co-Authored-By: Chris Bazley <[email protected]>
> 
> gcc/ChangeLog:
> 
>       * expr.cc (store_constructor): Handle VLA vec_init support and generic
>       fall through piecewise copy.
>       * doc/md.texi: Document change
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.target/aarch64/sve/copsi.c: New test.
> 
> ---
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 
> 1ef748796f5d0de63127b86c9903c9b12420bebf..be40cc695e071babe1928b555a11fd67af0d331b
>  100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -7552,7 +7552,9 @@ Initialize the vector to given values.  Operand 0 is 
> the vector to initialize
>  and operand 1 is parallel containing values for individual fields.  The
>  @var{n} mode is the mode of the elements, should be either element mode of
>  the vector mode @var{m}, or a vector mode with the same element mode and
> -smaller number of elements.
> +smaller number of elements.  If @var{m} specifies a scalable vector mode,
> +then operand 1 only specifies the minimum number of elements implied
> +by @var{m} and elements beyond are zero initialized.
>  
>  @mdindex vec_duplicate@var{m}
>  @item @samp{vec_duplicate@var{m}}
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index 
> de73215ccc6623fa90f4a90212fd8dc7c50991a9..31edad218e4afd6481535034a039eaebcb7de6b1
>  100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -7498,11 +7498,14 @@ fields_length (const_tree type)
>    return count;
>  }
>  
> -
>  /* Store the value of constructor EXP into the rtx TARGET.
>     TARGET is either a REG or a MEM; we know it cannot conflict, since
>     safe_from_p has been called.
>     CLEARED is true if TARGET is known to have been zero'd.
> +   If the constructor EXP has a vector type then elements of TARGET for which
> +   there is no corresponding element in EXP are zero'd.  For a 
> variable-length
> +   vector type, only elements up to the minimum number of subparts of the 
> type
> +   are explicitly zero'd; any elements beyond that are implicitly zero.
>     SIZE is the number of bytes of TARGET we are allowed to modify: this
>     may not be the same as the size of EXP if we are assigning to a field
>     which has been packed to exclude padding bits.
> @@ -8075,14 +8078,22 @@ store_constructor (tree exp, rtx target, int cleared, 
> poly_int64 size,
>                  similarly non-const type vectors. */
>               icode = convert_optab_handler (vec_init_optab, mode, eltmode);
>             }
> +         else
> +           {
> +             /* Handle variable-length vector types.  */
> +             icode = convert_optab_handler (vec_init_optab, mode, eltmode);
> +             const_n_elts = constant_lower_bound (n_elts);
> +           }
>  
> -       if (const_n_elts && icode != CODE_FOR_nothing)
> -         {
> -           vector = rtvec_alloc (const_n_elts);
> -           for (unsigned int k = 0; k < const_n_elts; k++)
> -             RTVEC_ELT (vector, k) = CONST0_RTX (eltmode);
> -         }
> +         if (const_n_elts && icode != CODE_FOR_nothing)
> +           {
> +             vector = rtvec_alloc (const_n_elts);
> +             for (unsigned int k = 0; k < const_n_elts; k++)
> +               RTVEC_ELT (vector, k) = CONST0_RTX (eltmode);
> +           }
>         }
> +     else
> +       gcc_assert (n_elts.is_constant ());
>  
>       /* Compute the size of the elements in the CTOR.  It differs
>          from the size of the vector type elements only when the
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/copsi.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/copsi.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..d85403640b9ab894b378e741013eb27b76a7e19a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/copsi.c
> @@ -0,0 +1,33 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +#include <arm_sve.h>
> +
> +/*
> +** func_init4:
> +**   mov     z0\.d, x1
> +**   insr    z0\.d, x0
> +**   ret
> +*/
> +svint64_t __attribute__ ((noipa))
> +func_init4 (int64_t a, int64_t b)
> +{
> +  svint64_t temp = { a, b };
> +  return temp;
> +}
> +
> +/*
> +** func_init3:
> +**   fmov    s0, w2
> +**   fmov    s0, s0
> +**   insr    z0\.s, w1
> +**   insr    z0\.s, w0
> +**   ret
> +*/
> +svint32_t __attribute__ ((noipa))
> +func_init3 (int32_t a, int32_t b, int32_t c)
> +{
> +  svint32_t temp = { a, b, c };
> +  return temp;
> +}
> 
> 
> 

-- 
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Jochen Jaser, Andrew McDonald; (HRB 36809, AG Nuernberg)

Reply via email to