Extend AdvSIMD constant materialization to recognize vectors where only
the low element is a representable floating-point constant and all other
elements are zero.
Bootstrapped and tested on aarch64-linux-gnu.
PR target/113856
gcc/ChangeLog:
* config/aarch64/aarch64-protos.h
(aarch64_output_simd_mov_imm_low): New.
(aarch64_const_vec_fmov_p): New.
* config/aarch64/aarch64-simd.md (mov<mode>): Do not expand constant
vectors handled by aarch64_const_vec_fmov_p into VDUP.
(*aarch64_simd_mov<VDMOV:mode>): Add Dc alternatives for FMOV based
SIMD constant moves.
(*aarch64_simd_mov<VQMOV:mode>): Likewise.
* config/aarch64/aarch64.cc (aarch64_const_vec_fmov_p): New function.
(aarch64_output_simd_mov_imm_low): New function.
* config/aarch64/constraints.md (Dc): New constraint.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/pr113856.c: New test.
Signed-off-by: Naveen SiddeGowda <[email protected]>
---
gcc/config/aarch64/aarch64-protos.h | 2 +
gcc/config/aarch64/aarch64-simd.md | 5 +-
gcc/config/aarch64/aarch64.cc | 77 +++++++++++++++++++++
gcc/config/aarch64/constraints.md | 7 ++
gcc/testsuite/gcc.target/aarch64/pr113856.c | 67 ++++++++++++++++++
5 files changed, 157 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113856.c
diff --git a/gcc/config/aarch64/aarch64-protos.h
b/gcc/config/aarch64/aarch64-protos.h
index 3f359b0069d..b172263a9ae 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -952,8 +952,10 @@ char *aarch64_output_simd_and_imm (rtx, unsigned);
char *aarch64_output_simd_xor_imm (rtx, unsigned);
char *aarch64_output_fmov (rtx);
+char *aarch64_output_simd_mov_imm_low (rtx *);
char *aarch64_output_sve_mov_immediate (rtx);
char *aarch64_output_sve_ptrues (rtx);
+bool aarch64_const_vec_fmov_p (rtx);
bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
bool aarch64_regno_ok_for_base_p (int, bool);
bool aarch64_regno_ok_for_index_p (int, bool);
diff --git a/gcc/config/aarch64/aarch64-simd.md
b/gcc/config/aarch64/aarch64-simd.md
index c314e85927d..2e142b1e1ee 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -79,7 +79,8 @@
}
else if (!aarch64_simd_imm_zero (operands[1], <MODE>mode)
&& !aarch64_simd_special_constant_p (operands[1], <MODE>mode)
- && !aarch64_simd_valid_mov_imm (operands[1]))
+ && !aarch64_simd_valid_mov_imm (operands[1])
+ && !aarch64_const_vec_fmov_p (operands[1]))
{
rtx x;
/* Expand into VDUP. */
@@ -183,6 +184,7 @@
[?r, w ; neon_to_gp<q> , * , *] fmov\t%x0, %d1
[?w, r ; f_mcr , * , *] fmov\t%d0, %1
[?r, r ; mov_reg , * , *] mov\t%0, %1
+ [w , Dc; fmov , * , *] <<
aarch64_output_simd_mov_imm_low (operands);
[w , Dn; neon_move<q> , simd , *] <<
aarch64_output_simd_mov_imm (operands[1], 64);
[w , Dz; f_mcr , * , *] fmov\t%d0, xzr
[w , Dx; neon_move , simd , 8] #
@@ -212,6 +214,7 @@
[?r , w ; multiple , * , 8] #
[?w , r ; multiple , * , 8] #
[?r , r ; multiple , * , 8] #
+ [w , Dc; fmov , * , 4] <<
aarch64_output_simd_mov_imm_low (operands);
[w , Dn; neon_move<q> , simd, 4] << aarch64_output_simd_mov_imm
(operands[1], 128);
[w , Dz; fmov , * , 4] fmov\t%d0, xzr
[w , Dx; neon_move , simd, 8] #
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 197d4f27269..b6c6c20eb2f 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -24630,6 +24630,83 @@ aarch64_simd_valid_mov_imm (rtx op)
return aarch64_simd_valid_imm (op, NULL, AARCH64_CHECK_MOV);
}
+
+/* Return true if OP is a V2SF constant vector of the form { c, 0 }
+ or a V4SF constant vector of the form { c, 0, 0, 0 }. */
+bool
+aarch64_const_vec_fmov_p (rtx op)
+{
+ if (!CONST_VECTOR_P (op))
+ return false;
+
+ machine_mode mode = GET_MODE (op);
+ scalar_mode inner_mode = GET_MODE_INNER (mode);
+
+ if (inner_mode != E_HFmode
+ && inner_mode != E_SFmode
+ && inner_mode != E_DFmode)
+ return false;
+
+ unsigned int nunits = GET_MODE_NUNITS (mode).to_constant ();
+ unsigned int const_idx = BYTES_BIG_ENDIAN ? nunits - 1 : 0;
+
+ rtx elt = CONST_VECTOR_ELT (op, const_idx);
+ if (!CONST_DOUBLE_P (elt))
+ return false;
+
+ REAL_VALUE_TYPE r = *CONST_DOUBLE_REAL_VALUE (elt);
+ if (!aarch64_real_float_const_representable_p (r))
+ return false;
+
+ for (unsigned int i = 0; i < nunits; ++i)
+ {
+ if (i == const_idx)
+ continue;
+
+ rtx x = CONST_VECTOR_ELT (op, i);
+ if (!rtx_equal_p (x, CONST0_RTX (inner_mode))
+ && !(CONST_INT_P (x) && INTVAL (x) == 0))
+ return false;
+ }
+
+ return true;
+}
+
+/* Output a move of either a V2SF constant of the form { c, 0 } or V4SF
+ constant of the form { c, 0, 0, 0 }. */
+char *
+aarch64_output_simd_mov_imm_low (rtx *operands)
+{
+ machine_mode mode = GET_MODE (operands[1]);
+ scalar_mode inner_mode = GET_MODE_INNER (mode);
+ unsigned int nunits = GET_MODE_NUNITS (mode).to_constant ();
+ unsigned int const_idx = BYTES_BIG_ENDIAN ? nunits - 1 : 0;
+ rtx elt = CONST_VECTOR_ELT (operands[1], const_idx);
+ rtx xop[2];
+
+ xop[0] = lowpart_subreg (inner_mode, operands[0], mode);
+ xop[1] = elt;
+
+ switch (inner_mode)
+ {
+ case E_HFmode:
+ output_asm_insn ("fmov\t%h0, %1", xop);
+ break;
+
+ case E_SFmode:
+ output_asm_insn ("fmov\t%s0, %1", xop);
+ break;
+
+ case E_DFmode:
+ output_asm_insn ("fmov\t%d0, %1", xop);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ return "";
+}
+
/* Return true if OP is a valid SIMD orr immediate for SVE or AdvSIMD. */
bool
aarch64_simd_valid_orr_imm (rtx op)
diff --git a/gcc/config/aarch64/constraints.md
b/gcc/config/aarch64/constraints.md
index 3d166fe3a17..34a30274dbf 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -503,6 +503,13 @@
(and (match_code "const_vector")
(match_test "aarch64_simd_valid_xor_imm (op)")))
+(define_constraint "Dc"
+ "@internal
+ A constraint that matches a V2SF constant vector of the form { c, 0 }\
+ or a V4SF constant of the form { c, 0, 0, 0 }."
+ (and (match_code "const_vector")
+ (match_test "aarch64_const_vec_fmov_p (op)")))
+
(define_constraint "Dn"
"@internal
A constraint that matches vector of immediates."
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113856.c
b/gcc/testsuite/gcc.target/aarch64/pr113856.c
new file mode 100644
index 00000000000..8bc632a5bc1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113856.c
@@ -0,0 +1,67 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 " } */
+/* { dg-additional-options "-march=armv8-a+fp16" } */
+
+/* Check that V2SF {1.0f, 0.0f } and V4SF {1.0f, 1.0f, 0.0f, 0.0f } are
+ materialized with FMOV rather than a literal pool load. */
+
+typedef float vect64_float __attribute__((vector_size(8)));
+typedef float vect128_float __attribute__((vector_size(16)));
+typedef _Float16 vect64_half __attribute__((vector_size(8)));
+typedef _Float16 vect128_half __attribute__((vector_size(16)));
+typedef double vect128_double __attribute__((vector_size(16)));
+
+vect64_float
+f1 (float a)
+{
+ return (vect64_float) { 1.0f, 0.0f };
+}
+
+vect64_float
+f2 (float a)
+{
+ return (vect64_float) { 1.0f, 1.0f };
+}
+
+vect128_float
+f3 (void)
+{
+ return (vect128_float) { 1.0f, 0.0f, 0.0f, 0.0f };
+}
+
+vect64_half
+f4 (void)
+{
+ return (vect64_half) { (_Float16) 1.0, (_Float16) 0.0, (_Float16) 0.0,
+ (_Float16) 0.0 };
+}
+
+vect128_half
+f5 (void)
+{
+ return (vect128_half) { (_Float16) 1.0, (_Float16) 0.0, (_Float16) 0.0,
+ (_Float16) 0.0, (_Float16) 0.0, (_Float16) 0.0,
+ (_Float16) 0.0, (_Float16) 0.0 };
+}
+
+vect128_double
+f6 (void)
+{
+ return (vect128_double) { 1.0, 0.0 };
+}
+
+/* f1: New case, should use scalar-FMOV based SIMD materialization. */
+/* f3: V4SF low-half initialized from FMOV form. */
+/* { dg-final { scan-assembler-times {\tfmov\ts0, 1\.0} 2 } } */
+
+/* f2: Existing case, should use scalar-FMOV based SIMD materialization. */
+/* { dg-final { scan-assembler-times {\tfmov\tv[0-9]+\.2s, 1\.0} 1 } } */
+
+/* f4, f5: V4HF, V8HF initialized from FMOV form. */
+/* { dg-final { scan-assembler-times {\tfmov\th0, 1\.0} 2 } } */
+
+/* f6: V2DF initialized from FMOV form. */
+/* { dg-final { scan-assembler-times {\tfmov\td0, 1\.0} 1 } } */
+
+/* None of them should need a literal pool load. */
+/* { dg-final { scan-assembler-not {\tldr\tq[0-9]+,} } } */
--
2.34.1