Add an experimental support for native complex operation handling in
the x86 backend. For now it only support add, sub, mul, conj, neg, mov
in SCmode (complex float). Performance gains are still marginal on this
target because there are no particular instructions to speedup complex
operation, except some SIMD tricks.

gcc/ChangeLog:

        * config/i386/i386.cc (classify_argument): Align complex
        element to the whole size, not size of the parts
        (ix86_return_in_memory): Handle complex modes like a scalar
        with the same size
        (ix86_class_max_nregs): Likewise
        (ix86_hard_regno_nregs): Likewise
        (function_value_ms_64): Add case for SCmode
        (ix86_build_const_vector): Likewise
        (ix86_build_signbit_mask): Likewise
        (x86_gen_rtx_complex): New: Implement the gen_rtx_complex
        hook, use registers of complex modes to represent complex
        elements in rtl
        (x86_read_complex_part): New: Implement the read_complex_part
        hook, handle registers of complex modes
        (x86_write_complex_part): New: Implement the write_complex_part
        hook, handle registers of complex modes
        * config/i386/i386.h: Add SCmode in several predicates
        * config/i386/sse.md: Add pattern for some complex operations in
        SCmode. This includes movsc, addsc3, subsc3, negsc2, mulsc3,
        and conjsc2
---
 gcc/config/i386/i386.cc | 296 +++++++++++++++++++++++++++++++++++++++-
 gcc/config/i386/i386.h  |  11 +-
 gcc/config/i386/sse.md  | 144 +++++++++++++++++++
 3 files changed, 440 insertions(+), 11 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index f0d6167e667..a65ac92a4a9 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -2339,8 +2339,8 @@ classify_argument (machine_mode mode, const_tree type,
        mode_alignment = 128;
       else if (mode == XCmode)
        mode_alignment = 256;
-      if (COMPLEX_MODE_P (mode))
-       mode_alignment /= 2;
+      /*if (COMPLEX_MODE_P (mode))
+       mode_alignment /= 2;*/
       /* Misaligned fields are always returned in memory.  */
       if (bit_offset % mode_alignment)
        return 0;
@@ -3007,6 +3007,7 @@ pass_in_reg:
     case E_V4BFmode:
     case E_V2SImode:
     case E_V2SFmode:
+    case E_SCmode:
     case E_V1TImode:
     case E_V1DImode:
       if (!type || !AGGREGATE_TYPE_P (type))
@@ -3257,6 +3258,7 @@ pass_in_reg:
     case E_V4BFmode:
     case E_V2SImode:
     case E_V2SFmode:
+    case E_SCmode:
     case E_V1TImode:
     case E_V1DImode:
       if (!type || !AGGREGATE_TYPE_P (type))
@@ -4158,8 +4160,8 @@ function_value_ms_64 (machine_mode orig_mode, 
machine_mode mode,
              && !INTEGRAL_TYPE_P (valtype)
              && !VECTOR_FLOAT_TYPE_P (valtype))
            break;
-         if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
-             && !COMPLEX_MODE_P (mode))
+         if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)))
+            // && !COMPLEX_MODE_P (mode))
            regno = FIRST_SSE_REG;
          break;
        case 8:
@@ -4266,7 +4268,7 @@ ix86_return_in_memory (const_tree type, const_tree fntype 
ATTRIBUTE_UNUSED)
               || INTEGRAL_TYPE_P (type)
               || VECTOR_FLOAT_TYPE_P (type))
              && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
-             && !COMPLEX_MODE_P (mode)
+             //&& !COMPLEX_MODE_P (mode)
              && (GET_MODE_SIZE (mode) == 16 || size == 16))
            return false;
 
@@ -15722,6 +15724,7 @@ ix86_build_const_vector (machine_mode mode, bool vect, 
rtx value)
     case E_V8SFmode:
     case E_V4SFmode:
     case E_V2SFmode:
+    case E_SCmode:
     case E_V8DFmode:
     case E_V4DFmode:
     case E_V2DFmode:
@@ -15770,6 +15773,7 @@ ix86_build_signbit_mask (machine_mode mode, bool vect, 
bool invert)
     case E_V8SFmode:
     case E_V4SFmode:
     case E_V2SFmode:
+    case E_SCmode:
     case E_V2SImode:
       vec_mode = mode;
       imode = SImode;
@@ -19821,7 +19825,8 @@ ix86_class_max_nregs (reg_class_t rclass, machine_mode 
mode)
   else
     {
       if (COMPLEX_MODE_P (mode))
-       return 2;
+       return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
+       //return 2;
       else
        return 1;
     }
@@ -20157,7 +20162,8 @@ ix86_hard_regno_nregs (unsigned int regno, machine_mode 
mode)
       return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
     }
   if (COMPLEX_MODE_P (mode))
-    return 2;
+    return 1;
+    //return 2;
   /* Register pair for mask registers.  */
   if (mode == P2QImode || mode == P2HImode)
     return 2;
@@ -23613,6 +23619,273 @@ ix86_preferred_simd_mode (scalar_mode mode)
     }
 }
 
+static rtx
+x86_gen_rtx_complex (machine_mode mode, rtx real_part, rtx imag_part)
+{
+  machine_mode imode = GET_MODE_INNER (mode);
+
+  if ((real_part == imag_part) && (real_part == CONST0_RTX (imode)))
+    {
+      if (CONST_DOUBLE_P (real_part))
+       return const_double_from_real_value (dconst0, mode);
+      else if (CONST_INT_P (real_part))
+       return GEN_INT (0);
+      else
+       gcc_unreachable ();
+    }
+
+  bool saved_generating_concat_p = generating_concat_p;
+  generating_concat_p = false;
+  rtx complex_reg = gen_reg_rtx (mode);
+  generating_concat_p = saved_generating_concat_p;
+
+  if (real_part)
+    {
+      gcc_assert (imode == GET_MODE (real_part));
+      write_complex_part (complex_reg, real_part, REAL_P, false);
+    }
+
+  if (imag_part)
+    {
+      gcc_assert (imode == GET_MODE (imag_part));
+      write_complex_part (complex_reg, imag_part, IMAG_P, false);
+    }
+
+  return complex_reg;
+}
+
+static rtx
+x86_read_complex_part (rtx cplx, complex_part_t part)
+{
+  machine_mode cmode;
+  scalar_mode imode;
+  unsigned ibitsize;
+
+  if (GET_CODE (cplx) == CONCAT)
+    return XEXP (cplx, part);
+
+  cmode = GET_MODE (cplx);
+  imode = GET_MODE_INNER (cmode);
+  ibitsize = GET_MODE_BITSIZE (imode);
+
+  if (COMPLEX_MODE_P (cmode) && (part == BOTH_P))
+    return cplx;
+
+  /* For constants under 32-bit vector constans are folded during expand,
+   * so we need to compensate for it as cplx is an integer constant
+   * In this case cmode and imode are equal */
+  if (cmode == imode)
+    ibitsize /= 2;
+
+  if (cmode == E_VOIDmode)
+    return cplx;               /* FIXME case used when initialising mock in a 
complex register */
+
+  if ((cmode == E_DCmode) && (GET_CODE (cplx) == CONST_DOUBLE))        /* 
FIXME stop generation of DC const_double, because not patterns and wired */
+    return CONST0_RTX (E_DFmode);
+  /* verify aswell SC const_double */
+
+  /* Special case reads from complex constants that got spilled to memory.  */
+  if (MEM_P (cplx) && GET_CODE (XEXP (cplx, 0)) == SYMBOL_REF)
+    {
+      tree decl = SYMBOL_REF_DECL (XEXP (cplx, 0));
+      if (decl && TREE_CODE (decl) == COMPLEX_CST)
+       {
+         tree cplx_part = (part == IMAG_P) ? TREE_IMAGPART (decl)
+                         : (part == REAL_P) ? TREE_REALPART (decl)
+                         : TREE_COMPLEX_BOTH_PARTS (decl);
+       if (CONSTANT_CLASS_P (cplx_part))
+         return expand_expr (cplx_part, NULL_RTX, imode, EXPAND_NORMAL);
+       }
+    }
+
+  /* For MEMs simplify_gen_subreg may generate an invalid new address
+     because, e.g., the original address is considered mode-dependent
+     by the target, which restricts simplify_subreg from invoking
+     adjust_address_nv.  Instead of preparing fallback support for an
+     invalid address, we call adjust_address_nv directly.  */
+  if (MEM_P (cplx))
+    {
+      if (part == BOTH_P)
+       return adjust_address_nv (cplx, cmode, 0);
+      else
+       return adjust_address_nv (cplx, imode, (part == IMAG_P)
+                                ? GET_MODE_SIZE (imode) : 0);
+    }
+
+  /* If the sub-object is at least word sized, then we know that subregging
+     will work.  This special case is important, since extract_bit_field
+     wants to operate on integer modes, and there's rarely an OImode to
+     correspond to TCmode.  */
+  if (ibitsize >= BITS_PER_WORD
+      /* For hard regs we have exact predicates.  Assume we can split
+        the original object if it spans an even number of hard regs.
+        This special case is important for SCmode on 64-bit platforms
+        where the natural size of floating-point regs is 32-bit.  */
+      || (REG_P (cplx)
+         && REGNO (cplx) < FIRST_PSEUDO_REGISTER
+         && REG_NREGS (cplx) % 2 == 0))
+    {
+      rtx ret = simplify_gen_subreg (imode, cplx, cmode, (part == IMAG_P)
+                                    ? GET_MODE_SIZE (imode) : 0);
+      if (ret)
+       return ret;
+      else
+       /* simplify_gen_subreg may fail for sub-word MEMs.  */
+       gcc_assert (MEM_P (cplx) && ibitsize < BITS_PER_WORD);
+    }
+
+  if (part == BOTH_P)
+    return extract_bit_field (cplx, 2 * ibitsize, 0, true, NULL_RTX, cmode,
+                             cmode, false, NULL);
+  else
+    return extract_bit_field (cplx, ibitsize, (part == IMAG_P) ? ibitsize : 0,
+                             true, NULL_RTX, imode, imode, false, NULL);
+}
+
+static void
+x86_write_complex_part (rtx cplx, rtx val, complex_part_t part, bool 
undefined_p)
+{
+  machine_mode cmode;
+  scalar_mode imode;
+  unsigned ibitsize;
+
+  cmode = GET_MODE (cplx);
+  imode = GET_MODE_INNER (cmode);
+  ibitsize = GET_MODE_BITSIZE (imode);
+
+  /* special case for constants */
+  if (GET_CODE (val) == CONST_VECTOR)
+    {
+      if (part == BOTH_P)
+       {
+         machine_mode temp_mode = E_BLKmode;;
+         switch (cmode)
+           {
+           case E_CQImode:
+             temp_mode = E_HImode;
+             break;
+           case E_CHImode:
+             temp_mode = E_SImode;
+             break;
+           case E_CSImode:
+             temp_mode = E_DImode;
+             break;
+           case E_SCmode:
+             temp_mode = E_DFmode;
+             break;
+           case E_CDImode:
+             temp_mode = E_TImode;
+             break;
+           case E_DCmode:
+           default:
+             break;
+           }
+
+         if (temp_mode != E_BLKmode)
+           {
+             rtx temp_reg = gen_reg_rtx (temp_mode);
+             store_bit_field (temp_reg, GET_MODE_BITSIZE (temp_mode), 0, 0,
+                              0, GET_MODE (val), val, false, undefined_p);
+             emit_move_insn (cplx,
+                             simplify_gen_subreg (cmode, temp_reg, temp_mode,
+                                                  0));
+           }
+         else
+           {
+             /* write real part and imag part separetly */
+             gcc_assert (GET_CODE (val) == CONST_VECTOR);
+             write_complex_part (cplx, const_vector_elt (val, 0), REAL_P, 
false);
+             write_complex_part (cplx, const_vector_elt (val, 1), IMAG_P, 
false);
+           }
+       }
+      else
+       write_complex_part (cplx,
+                           const_vector_elt (val,
+                           ((part == REAL_P) ? 0 : 1)),
+                           part, false);
+      return;
+    }
+
+  if ((part == BOTH_P) && !MEM_P (cplx)
+      /*&& (optab_handler (mov_optab, cmode) != CODE_FOR_nothing)*/)
+    {
+      write_complex_part (cplx, read_complex_part(cplx, REAL_P), REAL_P, 
undefined_p);
+      write_complex_part (cplx, read_complex_part(cplx, IMAG_P), IMAG_P, 
undefined_p);
+      //emit_move_insn (cplx, val);
+      return;
+    }
+
+  if ((GET_CODE (val) == CONST_DOUBLE) || (GET_CODE (val) == CONST_INT))
+    {
+      if (part == REAL_P)
+       {
+         emit_move_insn (gen_lowpart (imode, cplx), val);
+         return;
+       }
+      else if (part == IMAG_P)
+       {
+         /* cannot set highpart of a pseudo register */
+         if (REGNO (cplx) < FIRST_PSEUDO_REGISTER)
+           {
+             emit_move_insn (gen_highpart (imode, cplx), val);
+             return;
+           }
+       }
+      else
+       gcc_unreachable ();
+    }
+
+  if (GET_CODE (cplx) == CONCAT)
+    {
+      emit_move_insn (XEXP (cplx, part), val);
+      return;
+    }
+
+  /* For MEMs simplify_gen_subreg may generate an invalid new address
+     because, e.g., the original address is considered mode-dependent
+     by the target, which restricts simplify_subreg from invoking
+     adjust_address_nv.  Instead of preparing fallback support for an
+     invalid address, we call adjust_address_nv directly.  */
+  if (MEM_P (cplx))
+    {
+      if (part == BOTH_P)
+       emit_move_insn (adjust_address_nv (cplx, cmode, 0), val);
+      else
+       emit_move_insn (adjust_address_nv (cplx, imode, (part == IMAG_P)
+                                         ? GET_MODE_SIZE (imode) : 0), val);
+      return;
+    }
+
+  /* If the sub-object is at least word sized, then we know that subregging
+     will work.  This special case is important, since store_bit_field
+     wants to operate on integer modes, and there's rarely an OImode to
+     correspond to TCmode.  */
+  if (ibitsize >= BITS_PER_WORD
+      /* For hard regs we have exact predicates.  Assume we can split
+        the original object if it spans an even number of hard regs.
+        This special case is important for SCmode on 64-bit platforms
+        where the natural size of floating-point regs is 32-bit.  */
+      || (REG_P (cplx)
+         && REGNO (cplx) < FIRST_PSEUDO_REGISTER
+         && REG_NREGS (cplx) % 2 == 0))
+    {
+      rtx cplx_part = simplify_gen_subreg (imode, cplx, cmode,
+                                          (part == IMAG_P)
+                                          ? GET_MODE_SIZE (imode) : 0);
+      if (cplx_part)
+       {
+         emit_move_insn (cplx_part, val);
+         return;
+       }
+      else
+       /* simplify_gen_subreg may fail for sub-word MEMs.  */
+       gcc_assert (MEM_P (cplx) && ibitsize < BITS_PER_WORD);
+    }
+
+  store_bit_field (cplx, ibitsize, (part == IMAG_P) ? ibitsize : 0, 0, 0,
+                  imode, val, false, undefined_p);
+}
+
 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
    vectors.  If AVX512F is enabled then try vectorizing with 512bit,
    256bit and 128bit vectors.  */
@@ -25621,6 +25894,15 @@ ix86_libgcc_floating_mode_supported_p
 #undef TARGET_IFUNC_REF_LOCAL_OK
 #define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok
 
+#undef TARGET_GEN_RTX_COMPLEX
+#define TARGET_GEN_RTX_COMPLEX x86_gen_rtx_complex
+
+#undef TARGET_READ_COMPLEX_PART
+#define TARGET_READ_COMPLEX_PART x86_read_complex_part
+
+#undef TARGET_WRITE_COMPLEX_PART
+#define TARGET_WRITE_COMPLEX_PART x86_write_complex_part
+
 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
 # undef TARGET_ASM_RELOC_RW_MASK
 # define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index aea3209d5a3..86157b97b25 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1054,7 +1054,8 @@ extern const char *host_detect_local_cpu (int argc, const 
char **argv);
    || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode   \
    || (MODE) == V2DImode || (MODE) == V2QImode                         \
    || (MODE) == DFmode || (MODE) == DImode                             \
-   || (MODE) == HFmode || (MODE) == BFmode)
+   || (MODE) == HFmode || (MODE) == BFmode                             \
+   || (MODE) == SCmode)
 
 #define VALID_SSE_REG_MODE(MODE)                                       \
   ((MODE) == V1TImode || (MODE) == TImode                              \
@@ -1063,7 +1064,7 @@ extern const char *host_detect_local_cpu (int argc, const 
char **argv);
    || (MODE) == TFmode || (MODE) == TDmode)
 
 #define VALID_MMX_REG_MODE_3DNOW(MODE) \
-  ((MODE) == V2SFmode || (MODE) == SFmode)
+  ((MODE) == V2SFmode || (MODE) == SFmode || (MODE) == SCmode)
 
 /* To match ia32 psABI, V4HFmode should be added here.  */
 #define VALID_MMX_REG_MODE(MODE)                                       \
@@ -1106,13 +1107,15 @@ extern const char *host_detect_local_cpu (int argc, 
const char **argv);
    || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode \
    || (MODE) == V16SFmode \
    || (MODE) == V32HFmode || (MODE) == V16HFmode || (MODE) == V8HFmode  \
-   || (MODE) == V32BFmode || (MODE) == V16BFmode || (MODE) == V8BFmode)
+   || (MODE) == V32BFmode || (MODE) == V16BFmode || (MODE) == V8BFmode \
+   || (MODE) == SCmode)
 
 #define X87_FLOAT_MODE_P(MODE) \
   (TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode))
 
 #define SSE_FLOAT_MODE_P(MODE) \
-  ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode))
+  ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode) \
+   || (TARGET_SSE2 && (MODE) == SCmode))
 
 #define SSE_FLOAT_MODE_SSEMATH_OR_HF_P(MODE)                           \
   ((SSE_FLOAT_MODE_P (MODE) && TARGET_SSE_MATH)                                
\
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 6bf9c99a2c1..b2b354c439e 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -30209,3 +30209,147 @@
   "vcvtneo<bf16_ph>2ps\t{%1, %0|%0, %1}"
   [(set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "movsc"
+  [(match_operand:SC 0 "nonimmediate_operand" "")
+   (match_operand:SC 1 "nonimmediate_operand" "")]
+  ""
+  {
+    emit_insn (gen_movv2sf (simplify_gen_subreg (V2SFmode, operands[0], 
SCmode, 0),
+                           simplify_gen_subreg (V2SFmode, operands[1], SCmode, 
0)));
+    DONE;
+  }
+)
+
+(define_expand "addsc3"
+  [(match_operand:SC 0 "register_operand" "=r")
+   (match_operand:SC 1 "register_operand" "r")
+   (match_operand:SC 2 "register_operand" "r")]
+  ""
+  {
+    emit_insn (gen_addv2sf3 (simplify_gen_subreg (V2SFmode, operands[0], 
SCmode, 0),
+                            simplify_gen_subreg (V2SFmode, operands[1], 
SCmode, 0),
+                            simplify_gen_subreg (V2SFmode, operands[2], 
SCmode, 0)));
+    DONE;
+  }
+)
+
+(define_expand "subsc3"
+  [(match_operand:SC 0 "register_operand" "=r")
+   (match_operand:SC 1 "register_operand" "r")
+   (match_operand:SC 2 "register_operand" "r")]
+  ""
+  {
+    emit_insn (gen_subv2sf3 (simplify_gen_subreg (V2SFmode, operands[0], 
SCmode, 0),
+                            simplify_gen_subreg (V2SFmode, operands[1], 
SCmode, 0),
+                            simplify_gen_subreg (V2SFmode, operands[2], 
SCmode, 0)));
+    DONE;
+  }
+)
+
+(define_expand "negsc2"
+  [(match_operand:SC 0 "register_operand" "=r")
+   (match_operand:SC 1 "register_operand" "r")]
+  ""
+  {
+    emit_insn (gen_negv2sf2 (simplify_gen_subreg (V2SFmode, operands[0], 
SCmode, 0),
+                             simplify_gen_subreg (V2SFmode, operands[1], 
SCmode, 0)));
+    DONE;
+  }
+)
+
+(define_expand "sse_shufsc"
+  [(match_operand:V4SF 0 "register_operand")
+   (match_operand:SC 1 "register_operand")
+   (match_operand:SC 2 "vector_operand")
+   (match_operand:SI 3 "const_int_operand")]
+  "TARGET_SSE"
+{
+  int mask = INTVAL (operands[3]);
+  emit_insn (gen_sse_shufsc_sc (operands[0],
+                                                    operands[1],
+                                                    operands[2],
+                                                    GEN_INT ((mask >> 0) & 3),
+                                                    GEN_INT ((mask >> 2) & 3),
+                                                    GEN_INT (((mask >> 4) & 3) 
+ 4),
+                                                    GEN_INT (((mask >> 6) & 3) 
+ 4)));
+  DONE;
+})
+
+(define_insn "sse_shufsc_sc"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,v")
+       (vec_select:V4SF
+         (vec_concat:V4SF
+           (match_operand:V2SF 1 "register_operand" "0,v")
+           (match_operand:V2SF 2 "vector_operand" "xBm,vm"))
+         (parallel [(match_operand 3 "const_0_to_3_operand")
+                    (match_operand 4 "const_0_to_3_operand")
+                    (match_operand 5 "const_4_to_7_operand")
+                    (match_operand 6 "const_4_to_7_operand")])))]
+  "TARGET_SSE"
+{
+  int mask = 0;
+  mask |= INTVAL (operands[3]) << 0;
+  mask |= INTVAL (operands[4]) << 2;
+  mask |= (INTVAL (operands[5]) - 4) << 4;
+  mask |= (INTVAL (operands[6]) - 4) << 6;
+  operands[3] = GEN_INT (mask);
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "shufps\t{%3, %2, %0|%0, %2, %3}";
+    case 1:
+      return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseshuf")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "orig,maybe_evex")
+   (set_attr "mode" "V4SF")])
+
+(define_expand "mulsc3"
+  [(match_operand:SC 0 "register_operand" "=r")
+   (match_operand:SC 1 "register_operand" "r")
+   (match_operand:SC 2 "register_operand" "r")]
+  "TARGET_SSE3"
+  {
+    rtx a = gen_reg_rtx (V4SFmode);
+    rtx b = gen_reg_rtx (V4SFmode);
+    emit_insn (gen_sse_shufsc (a,
+                                    simplify_gen_subreg (V2SFmode, 
operands[1], SCmode, 0),
+                                    simplify_gen_subreg (V2SFmode, 
operands[1], SCmode, 0),
+                                    GEN_INT (0b01000100)));
+    emit_insn (gen_sse_shufsc (b,
+                                    simplify_gen_subreg (V2SFmode, 
operands[2], SCmode, 0),
+                                    simplify_gen_subreg (V2SFmode, 
operands[2], SCmode, 0),
+                                    GEN_INT (0b00010100)));
+    emit_insn (gen_mulv4sf3 (a, a, b));
+    emit_insn (gen_sse_shufps (b,
+                                    a,
+                                    a,
+                                    GEN_INT (0b00001101)));
+    emit_insn (gen_sse_shufps (a,
+                                    a,
+                                    a,
+                                    GEN_INT (0b00001000)));
+    emit_insn (gen_vec_addsubv2sf3 (simplify_gen_subreg (V2SFmode, 
operands[0], SCmode, 0),
+                                   simplify_gen_subreg (V2SFmode, a, V4SFmode, 
0),
+                                   simplify_gen_subreg (V2SFmode, b, V4SFmode, 
0)));
+    DONE;
+  }
+)
+
+(define_expand "conjsc2"
+  [(match_operand:SC 0 "register_operand" "=r")
+   (match_operand:SC 1 "register_operand" "r")]
+  ""
+  {
+    emit_insn (gen_negdf2 (simplify_gen_subreg (DFmode, operands[0], SCmode, 
0),
+                          simplify_gen_subreg (DFmode, operands[1], SCmode, 
0)));
+    DONE;
+  }
+)
-- 
2.17.1





Reply via email to