Hi all,

This patch aims to fix the cvtps2pd insn, which should also work on
memory operand but currently does not. After this fix, when loop == 2,
it will eliminate movq instruction.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

        PR target/43618
        * config/i386/sse.md (extendv2sfv2df2): New define_expand.
        (sse2_cvtps2pd_load<mask_name>): Rename extendvsdfv2df2.

gcc/testsuite/ChangeLog:

        PR target/43618
        * gcc.target/i386/pr43618-1.c: New test.
---
 gcc/config/i386/sse.md                    | 24 ++++++++++++++++++-----
 gcc/testsuite/gcc.target/i386/pr43618-1.c | 13 ++++++++++++
 2 files changed, 32 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr43618-1.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 8b2602bfa79..f96bb3dc6c3 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -9175,11 +9175,25 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_expand "extendv2sfv2df2"
+  [(set (match_operand:V2DF 0 "register_operand")
+        (float_extend:V2DF
+          (match_operand:V2SF 1 "nonimmediate_operand")))]
+  "TARGET_MMX_WITH_SSE"
+{
+  if (!MEM_P (operands[1]))
+    {
+      operands[1] = lowpart_subreg (V4SFmode, operands[1], V2SFmode);
+      emit_insn (gen_sse2_cvtps2pd (operands[0], operands[1]));
+      DONE;
+    }
+})
+
 (define_insn "sse2_cvtps2pd<mask_name>"
   [(set (match_operand:V2DF 0 "register_operand" "=v")
        (float_extend:V2DF
          (vec_select:V2SF
-           (match_operand:V4SF 1 "vector_operand" "vm")
+           (match_operand:V4SF 1 "register_operand" "v")
            (parallel [(const_int 0) (const_int 1)]))))]
   "TARGET_SSE2 && <mask_avx512vl_condition>"
   "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
@@ -9191,12 +9205,12 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V2DF")])
 
-(define_insn "extendv2sfv2df2"
+(define_insn "sse2_cvtps2pd_load<mask_name>"
   [(set (match_operand:V2DF 0 "register_operand" "=v")
        (float_extend:V2DF
-         (match_operand:V2SF 1 "register_operand" "v")))]
-  "TARGET_MMX_WITH_SSE"
-  "%vcvtps2pd\t{%1, %0|%0, %1}"
+         (match_operand:V2SF 1 "memory_operand" "m")))]
+  "TARGET_MMX_WITH_SSE && <mask_avx512vl_condition>"
+  "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
   [(set_attr "type" "ssecvt")
    (set_attr "amdfam10_decode" "direct")
    (set_attr "athlon_decode" "double")
diff --git a/gcc/testsuite/gcc.target/i386/pr43618-1.c 
b/gcc/testsuite/gcc.target/i386/pr43618-1.c
new file mode 100644
index 00000000000..3c84ea444aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr43618-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "movq" } } */
+/* { dg-final { scan-assembler "cvtps2pd" } } */
+
+void
+foo (float a[2], double b[2])
+{
+    int i;
+    for (i = 0; i < 2; i++)
+      b[i] = a[i];
+}
+
-- 
2.18.1

Reply via email to