This enables construction of V4SF CST like `{1.0f, 1.0f, 0.0f, 0.0f}` (and other fp enabled CSTs) by using `fmov v0.2s, 1.0` as the instruction is designed to zero out the other bits. This is a small extension on top of the code that creates fmov for the case where the all but the first element is non-zero.
Built and tested for aarch64-linux-gnu with no regressions. PR target/113856 gcc/ChangeLog: * config/aarch64/aarch64.cc (simd_immediate_info): Add bool to the float mode constructor. Document modifier field for FMOV_SDH. (aarch64_simd_valid_immediate): Recognize where the first half of the const float vect is the same. (aarch64_output_simd_mov_immediate): Handle the case where insn is FMOV_SDH and modifier is MSL. gcc/testsuite/ChangeLog: * gcc.target/aarch64/fmov-zero-cst-3.c: New test. Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> --- gcc/config/aarch64/aarch64.cc | 34 ++++++++++++++++--- .../gcc.target/aarch64/fmov-zero-cst-3.c | 28 +++++++++++++++ 2 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/fmov-zero-cst-3.c diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index c4386591a9b..89bd0c5e5a6 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -130,7 +130,7 @@ struct simd_immediate_info enum modifier_type { LSL, MSL }; simd_immediate_info () {} - simd_immediate_info (scalar_float_mode, rtx, insn_type = MOV); + simd_immediate_info (scalar_float_mode, rtx, insn_type = MOV, bool = false); simd_immediate_info (scalar_int_mode, unsigned HOST_WIDE_INT, insn_type = MOV, modifier_type = LSL, unsigned int = 0); @@ -153,6 +153,8 @@ struct simd_immediate_info /* The kind of shift modifier to use, and the number of bits to shift. This is (LSL, 0) if no shift is needed. */ + /* For FMOV_SDH, LSL says it is a single while MSL + says if it is either .4h/.2s fmov. */ modifier_type modifier; unsigned int shift; } mov; @@ -173,12 +175,12 @@ struct simd_immediate_info /* Construct a floating-point immediate in which each element has mode ELT_MODE_IN and value VALUE_IN. */ inline simd_immediate_info -::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in, insn_type insn_in) +::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in, insn_type insn_in, bool firsthalfsame) : elt_mode (elt_mode_in), insn (insn_in) { gcc_assert (insn_in == MOV || insn_in == FMOV_SDH); u.mov.value = value_in; - u.mov.modifier = LSL; + u.mov.modifier = firsthalfsame ? MSL : LSL; u.mov.shift = 0; } @@ -22944,10 +22946,23 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info, || aarch64_float_const_representable_p (elt)) { bool valid = true; + bool firsthalfsame = false; for (unsigned int i = 1; i < n_elts; i++) { rtx elt1 = CONST_VECTOR_ENCODED_ELT (op, i); if (!aarch64_float_const_zero_rtx_p (elt1)) + { + if (i == 1) + firsthalfsame = true; + if (!firsthalfsame + || i >= n_elts/2 + || !rtx_equal_p (elt, elt1)) + { + valid = false; + break; + } + } + else if (firsthalfsame && i < n_elts/2) { valid = false; break; @@ -22957,7 +22972,8 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info, { if (info) *info = simd_immediate_info (elt_float_mode, elt, - simd_immediate_info::FMOV_SDH); + simd_immediate_info::FMOV_SDH, + firsthalfsame); return true; } } @@ -25165,8 +25181,16 @@ aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width, real_to_decimal_for_mode (float_buf, CONST_DOUBLE_REAL_VALUE (info.u.mov.value), buf_size, buf_size, 1, info.elt_mode); - if (info.insn == simd_immediate_info::FMOV_SDH) + if (info.insn == simd_immediate_info::FMOV_SDH + && info.u.mov.modifier == simd_immediate_info::LSL) snprintf (templ, sizeof (templ), "fmov\t%%%c0, %s", element_char, float_buf); + else if (info.insn == simd_immediate_info::FMOV_SDH + && info.u.mov.modifier == simd_immediate_info::MSL) + { + gcc_assert (element_char != 'd'); + gcc_assert (lane_count > 2); + snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s", lane_count/2, element_char, float_buf); + } else if (lane_count == 1) snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf); else diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-zero-cst-3.c b/gcc/testsuite/gcc.target/aarch64/fmov-zero-cst-3.c new file mode 100644 index 00000000000..7a78b6d3caf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fmov-zero-cst-3.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mcmodel=tiny" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ +/* PR target/113856 */ + +#define vect64 __attribute__((vector_size(8) )) +#define vect128 __attribute__((vector_size(16) )) + +/* +** f2: +** fmov v0.2s, 1.0e\+0 +** ret +*/ +vect128 float f2() +{ + return (vect128 float){1.0f, 1.0f, 0, 0}; +} + +/* +** f3: +** ldr q0, \.LC[0-9]+ +** ret +*/ +vect128 float f3() +{ + return (vect128 float){1.0f, 1.0f, 1.0f, 0.0}; +} + -- 2.43.0