When given a constructor of variable-length vector type, the
store_constructor function now builds a vector with the lower bound
of the number of subparts in the vector type and uses it to emit the
body of the kind of insn chosen by the convert_optab_handler function.
Previously, this function used a fallback path of calling
store_constructor_field upon discovering that the number of subparts
in the vector type was not a constant multiple of the number of
subparts in the element type.
For example, this allows GCC to generate the following AArch64 assembly
language output for the tail of a reduction in the slp_6 test:
uaddv d31, p6, z31.b
uaddv d27, p6, z27.b
uaddv d26, p6, z26.b
movi d30, #0
insr z30.b, b26
insr z30.b, b27
insr z30.b, b31
add z25.b, z25.b, z30.b
instead of the following output (with predicated tails for basic block
SLP vectorization but without this change):
addvl x0, sp, #2
movi d0, #0
st1b z0.b, p6, [sp, #2, mul vl]
uaddv d27, p6, z27.b
uaddv d26, p6, z26.b
uaddv d25, p6, z25.b
str b27, [x0]
addvl x0, sp, #1
add x0, x0, 1
ptrue p7.b, vl3
ld1b z0.b, p6/z, [sp, #2, mul vl]
st1b z0.b, p6, [sp, #1, mul vl]
str b26, [x0]
ld1b z0.b, p6/z, [sp, #1, mul vl]
st1b z0.b, p6, [sp]
str b25, [sp, 2]
ld1b z0.b, p6/z, [sp]
add z28.b, z28.b, z0.b
st1b z28.b, p7, [x1]
addvl sp, sp, #3
or the original assembly language output (with neither predicated tails
for basic block SLP vectorization nor this change):
uaddv d31, p6, z31.b
fmov x0, d31
uaddv d31, p6, z26.b
add w6, w6, w0
fmov x0, d31
uaddv d31, p6, z27.b
add w5, w5, w0
fmov x0, d31
add w4, w4, w0
gcc/ChangeLog:
* expr.cc (store_constructor): Add an else block to handle
cases of TREE_CODE (TREE_TYPE (exp)) == VECTOR_TYPE in which
exact_div (n_elts, GET_MODE_NUNITS (eltmode)).is_constant
(&const_n_elts) is false similar to the existing "element type
is not a vector type" case except that const_n_elts is taken
from the lower bound of the subparts of the vector type.
---
gcc/expr.cc | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/gcc/expr.cc b/gcc/expr.cc
index de73215ccc6..f163ea16fe2 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -7498,11 +7498,14 @@ fields_length (const_tree type)
return count;
}
-
/* Store the value of constructor EXP into the rtx TARGET.
TARGET is either a REG or a MEM; we know it cannot conflict, since
safe_from_p has been called.
CLEARED is true if TARGET is known to have been zero'd.
+ If the constructor EXP has a vector type then elements of TARGET for which
+ there is no corresponding element in EXP are zero'd. For a variable-length
+ vector type, elements beyond the minimum number of subparts of the type are
+ not zero'd.
SIZE is the number of bytes of TARGET we are allowed to modify: this
may not be the same as the size of EXP if we are assigning to a field
which has been packed to exclude padding bits.
@@ -8075,7 +8078,12 @@ store_constructor (tree exp, rtx target, int cleared,
poly_int64 size,
similarly non-const type vectors. */
icode = convert_optab_handler (vec_init_optab, mode, eltmode);
}
-
+ else
+ {
+ /* Handle variable-length vector types. */
+ icode = convert_optab_handler (vec_init_optab, mode, eltmode);
+ const_n_elts = constant_lower_bound (n_elts);
+ }
if (const_n_elts && icode != CODE_FOR_nothing)
{
vector = rtvec_alloc (const_n_elts);
--
2.43.0