Author: mberg
Date: 2011-06-21 14:55:15 -0400 (Tue, 21 Jun 2011)
New Revision: 3660

Modified:
   trunk/osprey/be/cg/oputil.cxx
   trunk/osprey/be/cg/whirl2ops.cxx
   trunk/osprey/be/cg/x8664/expand.cxx
Log:
Additions to handle complex arithmetic for fma instructions as well as
some updates to shift right and movddup.

CR by Jian-Xin.



Modified: trunk/osprey/be/cg/oputil.cxx
===================================================================
--- trunk/osprey/be/cg/oputil.cxx       2011-06-21 04:40:48 UTC (rev 3659)
+++ trunk/osprey/be/cg/oputil.cxx       2011-06-21 18:55:15 UTC (rev 3660)
@@ -2172,7 +2172,7 @@
     {TOP_movhlps,           TOP_vmovhlps},
     {TOP_movlhps,           TOP_vmovlhps},
     {TOP_psrldq,            TOP_vpsrldq},
-    {TOP_psrlq128v64,       TOP_vpsrlq},
+    {TOP_psrlq128v64,       TOP_vpsrlqi},
     {TOP_pslldq,            TOP_vpslldq},
     {TOP_psllw,             TOP_vpsllw},
     {TOP_pslld,             TOP_vpslld},
@@ -2495,6 +2495,18 @@
     {TOP_phaddsx128v16,      TOP_vphaddsx128v16},
     {TOP_phaddsxx128v16,     TOP_vphaddsxx128v16},
     {TOP_phaddsxxx128v16,    TOP_vphaddsxxx128v16},
+    {TOP_fmovddup,           TOP_vmovddup},
+    {TOP_fmovddupx,          TOP_vmovddupx},
+    {TOP_fmovddupxx,         TOP_vmovddupxx},
+    {TOP_fmovddupxxx,        TOP_vmovddupxxx},
+    {TOP_fmovshdup,          TOP_vmovshdup},
+    {TOP_fmovshdupx,         TOP_vmovshdupx},
+    {TOP_fmovshdupxx,        TOP_vmovshdupxx},
+    {TOP_fmovshdupxxx,       TOP_vmovshdupxxx},
+    {TOP_fmovsldup,          TOP_vmovsldup},
+    {TOP_fmovsldupx,         TOP_vmovsldupx},
+    {TOP_fmovsldupxx,        TOP_vmovsldupxx},
+    {TOP_fmovsldupxxx,       TOP_vmovsldupxxx},
 };
 
 void Init_LegacySSE_To_Vex_Group(void)

Modified: trunk/osprey/be/cg/whirl2ops.cxx
===================================================================
--- trunk/osprey/be/cg/whirl2ops.cxx    2011-06-21 04:40:48 UTC (rev 3659)
+++ trunk/osprey/be/cg/whirl2ops.cxx    2011-06-21 18:55:15 UTC (rev 3660)
@@ -4693,7 +4693,7 @@
   opnd2 = Expand_Expr(add_wn, expr,  NULL); 
   opnd1 = Expand_Expr(WN_kid1(mul_wn), mul_wn, NULL);
   opnd0 = Expand_Expr(WN_kid0(mul_wn), mul_wn, NULL);
- 
+
   if(result == NULL) 
     result = Allocate_Result_TN(expr, NULL); 
 
@@ -5348,12 +5348,22 @@
   case OPR_ADD:
     if ((CG_opt_level > 1) && Is_Target_Orochi() && 
         Is_Target_AVX() && Is_Target_FMA4()) {
+      BOOL expr_is_complex = FALSE;
       TYPE_ID rtype = OPCODE_rtype(opcode);
       WN *mul_wn = NULL;
+      if ((rtype == MTYPE_V16C4) ||
+          (rtype == MTYPE_V16C8)) {
+        expr_is_complex = TRUE;
+      }
+      
       // Looking for a fm{a/s} candidate via FMA4 insns
-      if (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) {
+      if ( (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) &&
+           (expr_is_complex == FALSE) ) {
         if ((WN_operator(mul_wn = WN_kid(expr, 0)) == OPR_MPY) &&
-            (WN_opcode(mul_wn) != OPC_FQMPY) && (WN_opcode(mul_wn) != 
OPC_F10MPY) ) {
+            (WN_opcode(mul_wn) != OPC_V16C8MPY) &&
+            (WN_opcode(mul_wn) != OPC_V16C4MPY) &&
+            (WN_opcode(mul_wn) != OPC_FQMPY) && 
+            (WN_opcode(mul_wn) != OPC_F10MPY) ) {
           rtype = OPCODE_rtype(WN_opcode (mul_wn));
           if (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) {
             if (WN_operator(expr) == OPR_ADD) {
@@ -5363,7 +5373,10 @@
             }
           }
         } else if ((WN_operator(mul_wn = WN_kid(expr, 1)) == OPR_MPY) &&
-                   (WN_opcode(mul_wn) != OPC_FQMPY) && (WN_opcode(mul_wn) != 
OPC_F10MPY)) {
+                   (WN_opcode(mul_wn) != OPC_V16C8MPY) &&
+                   (WN_opcode(mul_wn) != OPC_V16C4MPY) &&
+                   (WN_opcode(mul_wn) != OPC_FQMPY) && 
+                   (WN_opcode(mul_wn) != OPC_F10MPY)) {
           rtype = OPCODE_rtype(WN_opcode (mul_wn));
           if (MTYPE_is_float(rtype) || MTYPE_is_vector(rtype)) {
             if (WN_operator(expr) == OPR_ADD) {

Modified: trunk/osprey/be/cg/x8664/expand.cxx
===================================================================
--- trunk/osprey/be/cg/x8664/expand.cxx 2011-06-21 04:40:48 UTC (rev 3659)
+++ trunk/osprey/be/cg/x8664/expand.cxx 2011-06-21 18:55:15 UTC (rev 3660)
@@ -5630,12 +5630,19 @@
     TN* tmp5 = Build_TN_Like(src1);
     
     Build_OP(TOP_fmovsldup, tmp1, src2, ops);
-    Build_OP(TOP_fmul128v32, tmp2, tmp1, src1, ops);
-    Build_OP(TOP_fmovshdup,tmp3, src2, ops);
-    Build_OP(TOP_shufps, tmp4, src1, src1, Gen_Literal_TN(177, 1), ops);
-    Build_OP(TOP_fmul128v32, tmp5, tmp3, tmp4, ops);
-    Build_OP(TOP_faddsub128v32, result, tmp2, tmp5, ops);
-
+    if ((CG_opt_level > 1) && Is_Target_Orochi() &&
+        Is_Target_AVX() && Is_Target_FMA4()) {
+      Build_OP(TOP_fmovshdup,tmp3, src2, ops);
+      Build_OP(TOP_shufps, tmp4, src1, src1, Gen_Literal_TN(177, 1), ops);
+      Build_OP(TOP_fmul128v32, tmp5, tmp3, tmp4, ops);
+      Build_OP(TOP_vfmaddsubps, result, tmp1, src1, tmp5, ops);  
+    } else {
+      Build_OP(TOP_fmul128v32, tmp2, tmp1, src1, ops);
+      Build_OP(TOP_fmovshdup,tmp3, src2, ops);
+      Build_OP(TOP_shufps, tmp4, src1, src1, Gen_Literal_TN(177, 1), ops);
+      Build_OP(TOP_fmul128v32, tmp5, tmp3, tmp4, ops);
+      Build_OP(TOP_faddsub128v32, result, tmp2, tmp5, ops);
+    }
   } else if (TN_size(src1) != TN_size(src2)){
     TN* src1_t;
     TN* src2_t;
@@ -5669,12 +5676,21 @@
     TN* tmp6 = Build_TN_Like(src1);
     
     Build_OP(TOP_fmovddup, tmp1, src2_t, ops);
-    Build_OP(TOP_fmul128v64, tmp2, src1_t, tmp1, ops);
-    Build_OP(TOP_shufpd, tmp3, src1_t, src1_t, Gen_Literal_TN(1, 1), ops);
-    Build_OP(TOP_shufpd, tmp4, src2_t, src2_t, Gen_Literal_TN(1, 1), ops);
-    Build_OP(TOP_fmovddup, tmp5, tmp4, ops);
-    Build_OP(TOP_fmul128v64, tmp6, tmp3, tmp5, ops);
-    Build_OP(TOP_faddsub128v64, result, tmp2, tmp6, ops);
+    if ((CG_opt_level > 1) && Is_Target_Orochi() &&
+        Is_Target_AVX() && Is_Target_FMA4()) {
+      Build_OP(TOP_shufpd, tmp3, src1_t, src1_t, Gen_Literal_TN(1, 1), ops);
+      Build_OP(TOP_shufpd, tmp4, src2_t, src2_t, Gen_Literal_TN(1, 1), ops);
+      Build_OP(TOP_fmovddup, tmp5, tmp4, ops);
+      Build_OP(TOP_fmul128v64, tmp6, tmp3, tmp5, ops);
+      Build_OP(TOP_vfmaddsubpd, result, src1_t, tmp1, tmp6, ops);  
+    } else {
+      Build_OP(TOP_fmul128v64, tmp2, src1_t, tmp1, ops);
+      Build_OP(TOP_shufpd, tmp3, src1_t, src1_t, Gen_Literal_TN(1, 1), ops);
+      Build_OP(TOP_shufpd, tmp4, src2_t, src2_t, Gen_Literal_TN(1, 1), ops);
+      Build_OP(TOP_fmovddup, tmp5, tmp4, ops);
+      Build_OP(TOP_fmul128v64, tmp6, tmp3, tmp5, ops);
+      Build_OP(TOP_faddsub128v64, result, tmp2, tmp6, ops);
+    }
   }
   return;
 }
@@ -5720,12 +5736,21 @@
     Build_OP(TOP_addsd, tmp4, tmp3, tmp1, ops);
     Build_OP(TOP_fmovddup, tmp5, tmp4, ops);
     Build_OP(TOP_fmovddup, tmp6, src2, ops);
-    Build_OP(TOP_fmul128v64, tmp8, tmp6, src1, ops);
-    Build_OP(TOP_shufpd, tmp9, src1, src1, Gen_Literal_TN(1, 1), ops);
-    Expand_Neg(tmp12, tmp2, MTYPE_F8, ops);
-    Build_OP(TOP_fmovddup, tmp13, tmp12, ops);
-    Build_OP(TOP_fmul128v64, tmp10, tmp9, tmp13, ops);
-    Build_OP(TOP_faddsub128v64, tmp11, tmp8, tmp10, ops);
+    if ((CG_opt_level > 1) && Is_Target_Orochi() &&
+        Is_Target_AVX() && Is_Target_FMA4()) {
+      Build_OP(TOP_shufpd, tmp9, src1, src1, Gen_Literal_TN(1, 1), ops);
+      Expand_Neg(tmp12, tmp2, MTYPE_F8, ops);
+      Build_OP(TOP_fmovddup, tmp13, tmp12, ops);
+      Build_OP(TOP_fmul128v64, tmp10, tmp9, tmp13, ops);
+      Build_OP(TOP_vfmaddsubpd, tmp11, tmp6, src1, tmp10, ops);  
+    } else {
+      Build_OP(TOP_fmul128v64, tmp8, tmp6, src1, ops);
+      Build_OP(TOP_shufpd, tmp9, src1, src1, Gen_Literal_TN(1, 1), ops);
+      Expand_Neg(tmp12, tmp2, MTYPE_F8, ops);
+      Build_OP(TOP_fmovddup, tmp13, tmp12, ops);
+      Build_OP(TOP_fmul128v64, tmp10, tmp9, tmp13, ops);
+      Build_OP(TOP_faddsub128v64, tmp11, tmp8, tmp10, ops);
+    }
     Build_OP(TOP_fdiv128v64, result, tmp11, tmp5, ops);
 
   } else if (opcode == OPC_V16C8DIV) {
@@ -5792,11 +5817,19 @@
     Build_OP(TOP_unpckhpd, tmp5, tmp2, tmp2, ops);
     Build_OP(TOP_fmul128v64, tmp6, tmp5, tmp1, ops);
     Build_OP(TOP_shufpd, tmp7, tmp1, tmp1, Gen_Literal_TN(1, 1), ops);
-    Build_OP(TOP_fmul128v64, tmp8, tmp7, tmp4, ops);
-    Build_OP(TOP_fhadd128v64, tmp9, tmp3, tmp3, ops);
-    Build_OP(TOP_shufps, tmp10, src1, src1, Gen_Literal_TN(238, 1), ops);
-    Build_OP(TOP_cvtps2pd, tmp11, tmp10, ops);
-    Build_OP(TOP_faddsub128v64, tmp12, tmp8, tmp6, ops);
+    if ((CG_opt_level > 1) && Is_Target_Orochi() &&
+        Is_Target_AVX() && Is_Target_FMA4()) {
+      Build_OP(TOP_fhadd128v64, tmp9, tmp3, tmp3, ops);
+      Build_OP(TOP_shufps, tmp10, src1, src1, Gen_Literal_TN(238, 1), ops);
+      Build_OP(TOP_cvtps2pd, tmp11, tmp10, ops);
+      Build_OP(TOP_vfmaddsubpd, tmp12, tmp7, tmp4, tmp6, ops);
+    } else {
+      Build_OP(TOP_fmul128v64, tmp8, tmp7, tmp4, ops);
+      Build_OP(TOP_fhadd128v64, tmp9, tmp3, tmp3, ops);
+      Build_OP(TOP_shufps, tmp10, src1, src1, Gen_Literal_TN(238, 1), ops);
+      Build_OP(TOP_cvtps2pd, tmp11, tmp10, ops);
+      Build_OP(TOP_faddsub128v64, tmp12, tmp8, tmp6, ops);
+    }
     Build_OP(TOP_shufpd, tmp13, tmp12, tmp12, Gen_Literal_TN(1, 1), ops);
     Build_OP(TOP_fdiv128v64, tmp14, tmp13, tmp9, ops);
     Build_OP(TOP_cvtpd2ps, tmp15, tmp14, ops);
@@ -5807,9 +5840,15 @@
     Build_OP(TOP_unpckhpd, tmp20, tmp17, tmp17, ops);
     Build_OP(TOP_fmul128v64, tmp21, tmp20, tmp11, ops);
     Build_OP(TOP_shufpd, tmp22, tmp11, tmp11, Gen_Literal_TN(1, 1), ops);
-    Build_OP(TOP_fmul128v64, tmp23, tmp22, tmp19, ops);
-    Build_OP(TOP_fhadd128v64, tmp24, tmp18, tmp18, ops);
-    Build_OP(TOP_faddsub128v64, tmp25, tmp23, tmp21, ops);
+    if ((CG_opt_level > 1) && Is_Target_Orochi() &&
+        Is_Target_AVX() && Is_Target_FMA4()) {
+      Build_OP(TOP_fhadd128v64, tmp24, tmp18, tmp18, ops);
+      Build_OP(TOP_vfmaddsubpd, tmp25, tmp22, tmp19, tmp21, ops);
+    } else {
+      Build_OP(TOP_fmul128v64, tmp23, tmp22, tmp19, ops);
+      Build_OP(TOP_fhadd128v64, tmp24, tmp18, tmp18, ops);
+      Build_OP(TOP_faddsub128v64, tmp25, tmp23, tmp21, ops);
+    }
     Build_OP(TOP_shufpd, tmp26, tmp25, tmp25, Gen_Literal_TN(1, 1), ops);
     Build_OP(TOP_fdiv128v64, tmp27, tmp26, tmp24, ops);
     Build_OP(TOP_cvtpd2ps, tmp28, tmp27, ops);
@@ -7245,9 +7284,15 @@
       Build_OP(TOP_fmovddup, tmp1, op2, ops);
       Build_OP(TOP_shufpd, tmp2, op2, op2, Gen_Literal_TN(1, 1), ops);
       Build_OP(TOP_fmovddup, tmp3, tmp2, ops);
-      Build_OP(TOP_fmul128v64, tmp4, op0, tmp1, ops);
-      Build_OP(TOP_fmul128v64, tmp5, op1, tmp3, ops);
-      Build_OP(TOP_faddsub128v64, result, tmp4, tmp5, ops);
+      if ((CG_opt_level > 1) && Is_Target_Orochi() &&
+          Is_Target_AVX() && Is_Target_FMA4()) {
+        Build_OP(TOP_fmul128v64, tmp5, op1, tmp3, ops);
+        Build_OP(TOP_vfmaddsubpd, result, op0, tmp1, tmp5, ops);
+      } else {
+        Build_OP(TOP_fmul128v64, tmp4, op0, tmp1, ops);
+        Build_OP(TOP_fmul128v64, tmp5, op1, tmp3, ops);
+        Build_OP(TOP_faddsub128v64, result, tmp4, tmp5, ops);
+      }
       break;
     }
   case INTRN_V16C8CONJG:


------------------------------------------------------------------------------
EditLive Enterprise is the world's most technically advanced content
authoring tool. Experience the power of Track Changes, Inline Image
Editing and ensure content is compliant with Accessibility Checking.
http://p.sf.net/sfu/ephox-dev2dev
_______________________________________________
Open64-devel mailing list
Open64-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/open64-devel

Reply via email to