Currently vec_init does not support VLA vec_init and we instead fall back to
storing piecewise through memory.
However there's no defined semantics for this. This patch adds the semantics
that for VLA constructors the vector has to be cleared with zero before
piecewise being constructed from scalar elements. This means unspecified
elements are initialized to zero.
Without this patch
#include <arm_sve.h>
svint32_t __attribute__ ((noipa))
func_init4 (int32_t a, int32_t b, int32_t c)
{
svint32_t temp = {a, b, c};
return temp;
}
compiles to:
func_init4:
addvl sp, sp, #-3
movi d30, #0
str z30, [sp, #2, mul vl]
addvl x3, sp, #2
str w0, [x3]
addvl x0, sp, #1
add x0, x0, 4
ldr z31, [sp, #2, mul vl]
str z31, [sp, #1, mul vl]
str w1, [x0]
ldr z31, [sp, #1, mul vl]
str z31, [sp]
str w2, [sp, 8]
ldr z0, [sp]
addvl sp, sp, #3
ret
and with the patch
func_init4:
fmov s0, w2
fmov s0, s0
insr z0.s, w1
insr z0.s, w0
ret
note that this is still not optimal as the
fmov s0, s0
that's doing the zero-ing of the vector is not actually needed since the
transfer instruction
fmov s0, w2
already zeros the destination SVE register. But this is an AArch64 deficiency
that will be dealt with in the backend.
the optimal codegen here is:
func_init4:
orr x1, x1, x2, lsl 32
fmov d0, x1
insr z0.s, w0
ret
Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.
Ok for master?
Thanks,
Tamar
Co-Authored-By: Chris Bazley <[email protected]>
gcc/ChangeLog:
* expr.cc (store_constructor): Handle VLA vec_init support and generic
fall through piecewise copy.
* doc/md.texi: Document change
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/sve/copsi.c: New test.
---
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index
1ef748796f5d0de63127b86c9903c9b12420bebf..be40cc695e071babe1928b555a11fd67af0d331b
100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -7552,7 +7552,9 @@ Initialize the vector to given values. Operand 0 is the
vector to initialize
and operand 1 is parallel containing values for individual fields. The
@var{n} mode is the mode of the elements, should be either element mode of
the vector mode @var{m}, or a vector mode with the same element mode and
-smaller number of elements.
+smaller number of elements. If @var{m} specifies a scalable vector mode,
+then operand 1 only specifies the minimum number of elements implied
+by @var{m} and elements beyond are zero initialized.
@mdindex vec_duplicate@var{m}
@item @samp{vec_duplicate@var{m}}
diff --git a/gcc/expr.cc b/gcc/expr.cc
index
de73215ccc6623fa90f4a90212fd8dc7c50991a9..373bec1322e7c554cbb314aed923abd7c3267ad8
100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -7498,11 +7498,14 @@ fields_length (const_tree type)
return count;
}
-
/* Store the value of constructor EXP into the rtx TARGET.
TARGET is either a REG or a MEM; we know it cannot conflict, since
safe_from_p has been called.
CLEARED is true if TARGET is known to have been zero'd.
+ If the constructor EXP has a vector type then elements of TARGET for which
+ there is no corresponding element in EXP are zero'd. For a variable-length
+ vector type, only elements up to the minimum number of subparts of the type
+ are explicitly zero'd; any elements beyond that are implicitly zero.
SIZE is the number of bytes of TARGET we are allowed to modify: this
may not be the same as the size of EXP if we are assigning to a field
which has been packed to exclude padding bits.
@@ -8075,13 +8078,20 @@ store_constructor (tree exp, rtx target, int cleared,
poly_int64 size,
similarly non-const type vectors. */
icode = convert_optab_handler (vec_init_optab, mode, eltmode);
}
+ else
+ {
+ /* Handle variable-length vector types. */
+ icode = convert_optab_handler (vec_init_optab, mode, eltmode);
+ const_n_elts = constant_lower_bound (n_elts);
+ cleared = 0;
+ }
- if (const_n_elts && icode != CODE_FOR_nothing)
- {
- vector = rtvec_alloc (const_n_elts);
- for (unsigned int k = 0; k < const_n_elts; k++)
- RTVEC_ELT (vector, k) = CONST0_RTX (eltmode);
- }
+ if (const_n_elts && icode != CODE_FOR_nothing)
+ {
+ vector = rtvec_alloc (const_n_elts);
+ for (unsigned int k = 0; k < const_n_elts; k++)
+ RTVEC_ELT (vector, k) = CONST0_RTX (eltmode);
+ }
}
/* Compute the size of the elements in the CTOR. It differs
@@ -8121,7 +8131,8 @@ store_constructor (tree exp, rtx target, int cleared,
poly_int64 size,
|| maybe_gt (4 * zero_count, 3 * count));
}
- if (need_to_clear && maybe_gt (size, 0) && !vector)
+ if (need_to_clear
+ && (maybe_gt (size, 0) || REG_P (target)))
{
if (REG_P (target))
emit_move_insn (target, CONST0_RTX (mode));
@@ -8138,6 +8149,9 @@ store_constructor (tree exp, rtx target, int cleared,
poly_int64 size,
cleared = 1;
}
+ /* Ensure that something has cleared the register. */
+ gcc_assert ((need_to_clear && cleared) || !need_to_clear);
+
if (MEM_P (target))
alias = MEM_ALIAS_SET (target);
else
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/copsi.c
b/gcc/testsuite/gcc.target/aarch64/sve/copsi.c
new file mode 100644
index
0000000000000000000000000000000000000000..d85403640b9ab894b378e741013eb27b76a7e19a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/copsi.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_sve.h>
+
+/*
+** func_init4:
+** mov z0\.d, x1
+** insr z0\.d, x0
+** ret
+*/
+svint64_t __attribute__ ((noipa))
+func_init4 (int64_t a, int64_t b)
+{
+ svint64_t temp = { a, b };
+ return temp;
+}
+
+/*
+** func_init3:
+** fmov s0, w2
+** fmov s0, s0
+** insr z0\.s, w1
+** insr z0\.s, w0
+** ret
+*/
+svint32_t __attribute__ ((noipa))
+func_init3 (int32_t a, int32_t b, int32_t c)
+{
+ svint32_t temp = { a, b, c };
+ return temp;
+}
--
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 1ef748796f5d0de63127b86c9903c9b12420bebf..be40cc695e071babe1928b555a11fd67af0d331b 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -7552,7 +7552,9 @@ Initialize the vector to given values. Operand 0 is the vector to initialize
and operand 1 is parallel containing values for individual fields. The
@var{n} mode is the mode of the elements, should be either element mode of
the vector mode @var{m}, or a vector mode with the same element mode and
-smaller number of elements.
+smaller number of elements. If @var{m} specifies a scalable vector mode,
+then operand 1 only specifies the minimum number of elements implied
+by @var{m} and elements beyond are zero initialized.
@mdindex vec_duplicate@var{m}
@item @samp{vec_duplicate@var{m}}
diff --git a/gcc/expr.cc b/gcc/expr.cc
index de73215ccc6623fa90f4a90212fd8dc7c50991a9..373bec1322e7c554cbb314aed923abd7c3267ad8 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -7498,11 +7498,14 @@ fields_length (const_tree type)
return count;
}
-
/* Store the value of constructor EXP into the rtx TARGET.
TARGET is either a REG or a MEM; we know it cannot conflict, since
safe_from_p has been called.
CLEARED is true if TARGET is known to have been zero'd.
+ If the constructor EXP has a vector type then elements of TARGET for which
+ there is no corresponding element in EXP are zero'd. For a variable-length
+ vector type, only elements up to the minimum number of subparts of the type
+ are explicitly zero'd; any elements beyond that are implicitly zero.
SIZE is the number of bytes of TARGET we are allowed to modify: this
may not be the same as the size of EXP if we are assigning to a field
which has been packed to exclude padding bits.
@@ -8075,13 +8078,20 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
similarly non-const type vectors. */
icode = convert_optab_handler (vec_init_optab, mode, eltmode);
}
+ else
+ {
+ /* Handle variable-length vector types. */
+ icode = convert_optab_handler (vec_init_optab, mode, eltmode);
+ const_n_elts = constant_lower_bound (n_elts);
+ cleared = 0;
+ }
- if (const_n_elts && icode != CODE_FOR_nothing)
- {
- vector = rtvec_alloc (const_n_elts);
- for (unsigned int k = 0; k < const_n_elts; k++)
- RTVEC_ELT (vector, k) = CONST0_RTX (eltmode);
- }
+ if (const_n_elts && icode != CODE_FOR_nothing)
+ {
+ vector = rtvec_alloc (const_n_elts);
+ for (unsigned int k = 0; k < const_n_elts; k++)
+ RTVEC_ELT (vector, k) = CONST0_RTX (eltmode);
+ }
}
/* Compute the size of the elements in the CTOR. It differs
@@ -8121,7 +8131,8 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
|| maybe_gt (4 * zero_count, 3 * count));
}
- if (need_to_clear && maybe_gt (size, 0) && !vector)
+ if (need_to_clear
+ && (maybe_gt (size, 0) || REG_P (target)))
{
if (REG_P (target))
emit_move_insn (target, CONST0_RTX (mode));
@@ -8138,6 +8149,9 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
cleared = 1;
}
+ /* Ensure that something has cleared the register. */
+ gcc_assert ((need_to_clear && cleared) || !need_to_clear);
+
if (MEM_P (target))
alias = MEM_ALIAS_SET (target);
else
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/copsi.c b/gcc/testsuite/gcc.target/aarch64/sve/copsi.c
new file mode 100644
index 0000000000000000000000000000000000000000..d85403640b9ab894b378e741013eb27b76a7e19a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/copsi.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_sve.h>
+
+/*
+** func_init4:
+** mov z0\.d, x1
+** insr z0\.d, x0
+** ret
+*/
+svint64_t __attribute__ ((noipa))
+func_init4 (int64_t a, int64_t b)
+{
+ svint64_t temp = { a, b };
+ return temp;
+}
+
+/*
+** func_init3:
+** fmov s0, w2
+** fmov s0, s0
+** insr z0\.s, w1
+** insr z0\.s, w0
+** ret
+*/
+svint32_t __attribute__ ((noipa))
+func_init3 (int32_t a, int32_t b, int32_t c)
+{
+ svint32_t temp = { a, b, c };
+ return temp;
+}