On Tue, 30 Jun 2026, Tamar Christina wrote:

> > -----Original Message-----
> > From: Richard Biener <[email protected]>
> > Sent: 30 June 2026 10:21
> > To: Tamar Christina <[email protected]>
> > Cc: [email protected]; nd <[email protected]>
> > Subject: Re: [patch]middle-end: Handle variable-length vector types in
> > store_constructor
> > 
> > On Fri, 26 Jun 2026, Tamar Christina wrote:
> > 
> > > Currently vec_init does not support VLA vec_init and we instead fall back 
> > > to
> > > storing piecewise through memory.
> > >
> > > However there's no defined semantics for this.  This patch adds the
> > semantics
> > > that for VLA constructors the vector has to be cleared with zero before
> > > piecewise being constructed from scalar elements.  This means unspecified
> > > elements are initialized to zero.
> > >
> > > Without this patch
> > >
> > > #include <arm_sve.h>
> > >
> > > svint32_t __attribute__ ((noipa))
> > > func_init4 (int32_t a, int32_t b, int32_t c)
> > > {
> > >   svint32_t temp = {a, b, c};
> > >   return temp;
> > > }
> > >
> > > compiles to:
> > >
> > > func_init4:
> > >         addvl   sp, sp, #-3
> > >         movi    d30, #0
> > >         str     z30, [sp, #2, mul vl]
> > >         addvl   x3, sp, #2
> > >         str     w0, [x3]
> > >         addvl   x0, sp, #1
> > >         add     x0, x0, 4
> > >         ldr     z31, [sp, #2, mul vl]
> > >         str     z31, [sp, #1, mul vl]
> > >         str     w1, [x0]
> > >         ldr     z31, [sp, #1, mul vl]
> > >         str     z31, [sp]
> > >         str     w2, [sp, 8]
> > >         ldr     z0, [sp]
> > >         addvl   sp, sp, #3
> > >         ret
> > >
> > > and with the patch
> > >
> > > func_init4:
> > >         fmov    s0, w2
> > >         fmov    s0, s0
> > >         insr    z0.s, w1
> > >         insr    z0.s, w0
> > >         ret
> > >
> > > note that this is still not optimal as the
> > >
> > >         fmov    s0, s0
> > >
> > > that's doing the zero-ing of the vector is not actually needed since the
> > > transfer instruction
> > >
> > >         fmov    s0, w2
> > >
> > > already zeros the destination SVE register.  But this is an AArch64 
> > > deficiency
> > > that will be dealt with in the backend.
> > >
> > > the optimal codegen here is:
> > >
> > >  func_init4:
> > >         orr     x1, x1, x2, lsl 32
> > >         fmov    d0, x1
> > >         insr    z0.s, w0
> > >         ret
> > >
> > > Bootstrapped Regtested on aarch64-none-linux-gnu,
> > > arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> > > -m32, -m64 and no issues.
> > >
> > > Ok for master?
> > >
> > > Thanks,
> > > Tamar
> > >
> > > Co-Authored-By: Chris Bazley <[email protected]>
> > >
> > > gcc/ChangeLog:
> > >
> > >   * expr.cc (store_constructor): Handle VLA vec_init support and generic
> > >   fall through piecewise copy.
> > >   * doc/md.texi: Document change
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >   * gcc.target/aarch64/sve/copsi.c: New test.
> > >
> > > ---
> > > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> > > index
> > 1ef748796f5d0de63127b86c9903c9b12420bebf..be40cc695e071babe192
> > 8b555a11fd67af0d331b 100644
> > > --- a/gcc/doc/md.texi
> > > +++ b/gcc/doc/md.texi
> > > @@ -7552,7 +7552,9 @@ Initialize the vector to given values.  Operand 0 is
> > the vector to initialize
> > >  and operand 1 is parallel containing values for individual fields.  The
> > >  @var{n} mode is the mode of the elements, should be either element mode
> > of
> > >  the vector mode @var{m}, or a vector mode with the same element mode
> > and
> > > -smaller number of elements.
> > > +smaller number of elements.  If @var{m} specifies a scalable vector mode,
> > > +then operand 1 only specifies the minimum number of elements implied
> > > +by @var{m} and elements beyond are zero initialized.
> > >
> > >  @mdindex vec_duplicate@var{m}
> > >  @item @samp{vec_duplicate@var{m}}
> > > diff --git a/gcc/expr.cc b/gcc/expr.cc
> > > index
> > de73215ccc6623fa90f4a90212fd8dc7c50991a9..373bec1322e7c554cbb31
> > 4aed923abd7c3267ad8 100644
> > > --- a/gcc/expr.cc
> > > +++ b/gcc/expr.cc
> > > @@ -7498,11 +7498,14 @@ fields_length (const_tree type)
> > >    return count;
> > >  }
> > >
> > > -
> > >  /* Store the value of constructor EXP into the rtx TARGET.
> > >     TARGET is either a REG or a MEM; we know it cannot conflict, since
> > >     safe_from_p has been called.
> > >     CLEARED is true if TARGET is known to have been zero'd.
> > > +   If the constructor EXP has a vector type then elements of TARGET for
> > which
> > > +   there is no corresponding element in EXP are zero'd.  For a 
> > > variable-length
> > > +   vector type, only elements up to the minimum number of subparts of the
> > type
> > > +   are explicitly zero'd; any elements beyond that are implicitly zero.
> > >     SIZE is the number of bytes of TARGET we are allowed to modify: this
> > >     may not be the same as the size of EXP if we are assigning to a field
> > >     which has been packed to exclude padding bits.
> > > @@ -8075,13 +8078,20 @@ store_constructor (tree exp, rtx target, int
> > cleared, poly_int64 size,
> > >              similarly non-const type vectors. */
> > >           icode = convert_optab_handler (vec_init_optab, mode,
> > eltmode);
> > >         }
> > > +     else
> > > +       {
> > > +         /* Handle variable-length vector types.  */
> > > +         icode = convert_optab_handler (vec_init_optab, mode,
> > eltmode);
> > > +         const_n_elts = constant_lower_bound (n_elts);
> > > +         cleared = 0;
> > > +       }
> > 
> > Why set 'cleared' to 0?  If the caller cleared TARGET we shouldn't
> > need to do this again?
> > 
> > > -   if (const_n_elts && icode != CODE_FOR_nothing)
> > > -     {
> > > -       vector = rtvec_alloc (const_n_elts);
> > > -       for (unsigned int k = 0; k < const_n_elts; k++)
> > > -         RTVEC_ELT (vector, k) = CONST0_RTX (eltmode);
> > > -     }
> > > +     if (const_n_elts && icode != CODE_FOR_nothing)
> > > +       {
> > > +         vector = rtvec_alloc (const_n_elts);
> > > +         for (unsigned int k = 0; k < const_n_elts; k++)
> > > +           RTVEC_ELT (vector, k) = CONST0_RTX (eltmode);
> > > +       }
> > >     }
> > >
> > >   /* Compute the size of the elements in the CTOR.  It differs
> > > @@ -8121,7 +8131,8 @@ store_constructor (tree exp, rtx target, int
> > cleared, poly_int64 size,
> > >                        || maybe_gt (4 * zero_count, 3 * count));
> > 
> > and this maybe_lt (count, n_elts) should ensure need_to_clear is set
> > for VLA vectors?  So I don't understand why you need to change the
> > condition below?  'vector' means we go the vec_init expander way,
> > so we expect that to perform the clearing (possibly redundant with
> > the passed in cleared).
> 
> Ah, damn. I misread this but. Yeah the ` maybe_lt (count, n_elts)` already
> ensures it's set to need_to_clear.
> 
> > 
> > What's more interesting is if we ever get to store_constructor
> > with a variable-length vector mode but !REG_P (target)?
> > 
> 
> Yes, clear_storage requires statically knowing the amount of memory to clear
> by pieces.  With size a poly I don't think there's a way for this to work?
> 
> Though I suppose since poly vector stores would work, a fall back could be
> storing a vector of 0.  
> 
> But then the question is why doesn't store_constructor use emit_move_insn
> for all cases where the target supports move of that type? Since surely a 
> clear of
> a MEM_P with a zero store is cheaper than piece wise if supported?
> 
> So should the condition there instead of REG_P (target) be optab_handler 
> (mov_optab, mode)?

I don't want to change the condition ;)  I'm merely wanting re-assurance
of some way that store_constructor behaves according to specs we set
for VLA vectors and not miscompile but at most ICE.  So ... maybe
place an assert in the else part of if (REG_P && VECTOR_MODE_P)
that gcc_assert (n_elts.constant_p ())?

> Thanks,
> Tamar
> 
> > >     }
> > >
> > > - if (need_to_clear && maybe_gt (size, 0) && !vector)
> > > + if (need_to_clear
> > > +     && (maybe_gt (size, 0) || REG_P (target)))
> > >     {
> > >       if (REG_P (target))
> > >         emit_move_insn (target, CONST0_RTX (mode));
> > > @@ -8138,6 +8149,9 @@ store_constructor (tree exp, rtx target, int
> > cleared, poly_int64 size,
> > >       cleared = 1;
> > >     }
> > >
> > > + /* Ensure that something has cleared the register.  */
> > > + gcc_assert ((need_to_clear && cleared) || !need_to_clear);
> > > +
> > >          if (MEM_P (target))
> > >     alias = MEM_ALIAS_SET (target);
> > >   else
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/copsi.c
> > b/gcc/testsuite/gcc.target/aarch64/sve/copsi.c
> > > new file mode 100644
> > > index
> > 0000000000000000000000000000000000000000..d85403640b9ab894b
> > 378e741013eb27b76a7e19a
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/copsi.c
> > > @@ -0,0 +1,33 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-additional-options "-O2" } */
> > > +/* { dg-final { check-function-bodies "**" "" "" } } */
> > > +
> > > +#include <arm_sve.h>
> > > +
> > > +/*
> > > +** func_init4:
> > > +**       mov     z0\.d, x1
> > > +**       insr    z0\.d, x0
> > > +**       ret
> > > +*/
> > > +svint64_t __attribute__ ((noipa))
> > > +func_init4 (int64_t a, int64_t b)
> > > +{
> > > +  svint64_t temp = { a, b };
> > > +  return temp;
> > > +}
> > > +
> > > +/*
> > > +** func_init3:
> > > +**       fmov    s0, w2
> > > +**       fmov    s0, s0
> > > +**       insr    z0\.s, w1
> > > +**       insr    z0\.s, w0
> > > +**       ret
> > > +*/
> > > +svint32_t __attribute__ ((noipa))
> > > +func_init3 (int32_t a, int32_t b, int32_t c)
> > > +{
> > > +  svint32_t temp = { a, b, c };
> > > +  return temp;
> > > +}
> > >
> > >
> > >
> > 
> > --
> > Richard Biener <[email protected]>
> > SUSE Software Solutions Germany GmbH,
> > Frankenstrasse 146, 90461 Nuernberg, Germany;
> > GF: Jochen Jaser, Andrew McDonald, Werner Knoblich; (HRB 36809, AG
> > Nuernberg)
> 

-- 
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Jochen Jaser, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to