This enables the vectorizer to vectorize conversion from long to float for
aarch64 target.
Bootstrapped and tested on aarch64_linux_gnu.
PR target/123748
gcc/ChangeLog:
* config/aarch64/aarch64-simd.md (vec_packs_float_v2di): New pattern.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/pr123748.c: New test.
Signed-off-by: Pengxuan Zheng <[email protected]>
---
gcc/config/aarch64/aarch64-simd.md | 26 +++++++++++++++++++++
gcc/testsuite/gcc.target/aarch64/pr123748.c | 13 +++++++++++
2 files changed, 39 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/aarch64/pr123748.c
diff --git a/gcc/config/aarch64/aarch64-simd.md
b/gcc/config/aarch64/aarch64-simd.md
index 2e142b1e1ee..21f13564280 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3206,6 +3206,32 @@ (define_insn
"<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
[(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
)
+(define_expand "vec_packs_float_v2di"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+ (vec_concat:V4SF
+ (float:V2SF (match_operand:V2DI 1 "register_operand" "w"))
+ (float:V2SF (match_operand:V2DI 2 "register_operand" "w"))))]
+ "TARGET_SIMD && flag_unsafe_math_optimizations"
+ {
+ rtx tmp = gen_reg_rtx (V2DFmode);
+ rtx tmp1 = gen_reg_rtx (V2DFmode);
+ rtx tmp2 = gen_reg_rtx (V2SFmode);
+ rtx tmp3 = gen_reg_rtx (V2SFmode);
+ emit_insn (gen_floatv2div2df2 (tmp, operands[1]));
+ emit_insn (gen_floatv2div2df2 (tmp1, operands[2]));
+ emit_insn (gen_truncv2dfv2sf2 (tmp2, tmp));
+ emit_insn (gen_truncv2dfv2sf2 (tmp3, tmp1));
+ if (BYTES_BIG_ENDIAN)
+ std::swap (tmp2, tmp3);
+
+ rtx tmp4 = gen_reg_rtx (V2DImode);
+ emit_insn (gen_aarch64_zip1v2di_low (tmp4, gen_lowpart (DImode, tmp2),
+ gen_lowpart (DImode, tmp3)));
+ emit_move_insn (operands[0], gen_lowpart (V4SFmode, tmp4));
+ DONE;
+ }
+)
+
;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
;; is inconsistent with vector ordering elsewhere in the compiler, in that
;; the meaning of HI and LO changes depending on the target endianness.
diff --git a/gcc/testsuite/gcc.target/aarch64/pr123748.c
b/gcc/testsuite/gcc.target/aarch64/pr123748.c
new file mode 100644
index 00000000000..8ba290cf12d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr123748.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast" } */
+
+void
+f (float *__restrict f, long *__restrict l)
+{
+ for (int i = 0; i < 128; i++)
+ f[i] = l[i];
+}
+
+/* { dg-final { scan-assembler-times {scvtf\t} 2 } } */
+/* { dg-final { scan-assembler-times {fcvtn\t} 2 } } */
+/* { dg-final { scan-assembler-times {zip1\t} 1 } } */
--
2.34.1