(Attachment added this time.)

On Mon, May 16, 2011 at 5:01 PM, David Coakley <dcoak...@gmail.com> wrote:
> Could a gatekeeper review the attached patch?  It fixes some AMD64 ABI
> compatibility problems and also eliminates some unnecessary struct
> copies.  The second part should apply to all targets.
>
> Here is the proposed log message:
>
>
> Improve AMD64 ABI compliance and avoid unnecessary struct copies.
>
> For the following code, the compiler generated unnecessary struct copies
> for a return value that has a size too big to be passed in registers.
>
>  typedef struct { char big[1024]; } C;
>  C gc;
>  extern C bar9 (void);
>  extern C check9 (void) { return bar9 (); }
>
> In this case there were two copies and one temporary variable generated.
> After this change the compiler does not generate any extra copies.
>
> According to the AMD64 ABI, the caller provides space for the return
> value in a hidden first argument and this address is returned in %rax.
> Previously the compiler was not following this rule.
>
> The change handles the similar cases of initialization by function
> call ("C c = bar9();") and assignment to a pointer ("*cp = bar9()").
>
> Also, a complex long double field is now returned in the register pair
> (%st0, %st1) as specified by the AMD64 ABI.
>
>
>
> I am still looking for a review for the patch I posted last week (May
> 10) as well.  Thanks,
>
> -David Coakley / AMD Open Source Compiler Engineering
>
Index: osprey/be/cg/reg_live.cxx
===================================================================
--- osprey/be/cg/reg_live.cxx	(revision 3553)
+++ osprey/be/cg/reg_live.cxx	(working copy)
@@ -268,6 +268,17 @@
 	retpreg[i] = RETURN_INFO_preg (return_info, i);
 	Add_PREG_To_REGSET (retpreg[i], return_regs);
     }
+#ifdef TARG_X8664
+    // AMD64 ABI
+    // on return %rax will contain the address that has been
+    // passed in by the caller in %rdi
+    // for 32 bit, the return address is in %eax
+    if (RETURN_INFO_return_via_first_arg(return_info)) {
+       FmtAssert (RETURN_INFO_count(return_info) == 0,
+             ("Compute_Return_Regs:  more return registers than can handle"));
+       Add_PREG_To_REGSET (First_Int_Preg_Return_Offset, return_regs);
+    }
+#endif
 #if defined(TARG_SL)
     if (MTYPE_byte_size(TY_mtype(TY_ret_type(call_ty))) == 8) { //I8/U8/F8
       FmtAssert (RETURN_INFO_count(return_info) <= 1, 
Index: osprey/be/com/wn_lower.cxx
===================================================================
--- osprey/be/com/wn_lower.cxx	(revision 3553)
+++ osprey/be/com/wn_lower.cxx	(working copy)
@@ -7331,7 +7331,8 @@
   ST *preg_st;
   WN *n_rhs;
   WN *wn = NULL;	// init to prevent upward-exposed use
-  RETURN_INFO return_info = Get_Return_Info(WN_ty(tree), Complex_Not_Simulated
+  RETURN_INFO return_info = Get_Return_Info(WN_ty(WN_kid0(tree)), 
+                                            Complex_Not_Simulated
 #ifdef TARG_X8664
 					    , last_call_ff2c_abi
 #endif
@@ -7353,7 +7354,7 @@
     if (WN_store_offset(tree) != 0) { // generate an ADD node for the offset
       WN *iwn = WN_CreateIntconst(OPR_INTCONST, Pointer_Mtype, MTYPE_V, 
 				  WN_store_offset(tree));
-      awn = WN_CreateExp2(OPR_ADD, Pointer_Mtype, Pointer_Mtype, awn, iwn);
+      awn = WN_CreateExp2(OPR_ADD, Pointer_Mtype, MTYPE_V, awn, iwn);
     }
     awn = lower_expr(block, awn, actions);
     WN *n_call = add_fake_parm(call, awn, WN_ty(tree));
@@ -12132,25 +12133,94 @@
       WN *n_rhs;
 
       // fix rhs
-      if (WN_operator(o_rhs) == OPR_LDID)
-        n_rhs = lower_mldid(block, o_rhs, LOWER_MLDID_MSTID);
-      else if (WN_operator(o_rhs) == OPR_ILOAD) 
-        n_rhs = lower_miload(block, o_rhs, LOWER_MLDID_MSTID);
-      else n_rhs = o_rhs; 		// MLOAD
-
-      // create an mstore
+ 
       WN *first_formal = WN_formal(current_function, 0);
       TY_IDX tidx = ST_type(WN_st(first_formal));
-      WN *awn = WN_CreateLdid(OPR_LDID, 
-			      TY_mtype(Ty_Table[tidx]), 
-			      TY_mtype(Ty_Table[tidx]),
-			      WN_idname_offset(first_formal), 
-			      WN_st(first_formal), 
-			      tidx);
-      WN *swn = WN_CopyNode(WN_kid1(n_rhs));
-      wn  = WN_CreateMstore (0, tidx, n_rhs, awn, swn);
-      WN_Set_Linenum(wn, current_srcpos);  // Bug 1268
-      WN_INSERT_BlockLast (block, wn);
+      if (WN_operator(o_rhs) == OPR_LDID && 
+            WN_st(o_rhs) == Return_Val_Preg) {
+        // the Return_Val_Preg must be returned by previous call
+        // so we need to get the previous MCALL statement and 
+        // fake first parm
+        //
+        // MCALL 126 <1,51,bar9>
+        //   MMLDID -1 <1,49,.preg_return_val> T<53,.anonymous.1,1>
+        // MRETURN_VAL 
+        //
+        //  ==>
+        //
+        //     U8LDID 0 <2,3,_temp_.return...>
+        //   U8PARM 33 T<55,anon_ptr.,8>
+        // VCALL 126 <1,51,bar9>
+        // 
+        WN *call = WN_last(block);
+        if ((WN_operator(call) == OPR_CALL || WN_operator(call) == OPR_ICALL ||
+	        WN_operator(call) == OPR_PICCALL) && WN_rtype(call) == MTYPE_M) {
+          TY_IDX prototype;
+          if (WN_operator(call) == OPR_ICALL) 
+            prototype = WN_ty(call);
+          else {
+            ST_IDX func_stidx = WN_st_idx(call);
+            PU_IDX puidx = ST_pu(St_Table[func_stidx]);
+            prototype = PU_prototype(Pu_Table[puidx]);
+          }
+          WN *awn = WN_CreateLdid(OPR_LDID, 
+			        TY_mtype(Ty_Table[tidx]), 
+			        TY_mtype(Ty_Table[tidx]),
+			        WN_idname_offset(first_formal), 
+			        WN_st(first_formal), 
+			        tidx);
+          awn = lower_expr(block, awn, actions);
+          WN *n_call = add_fake_parm(call, awn, WN_ty(awn));
+          WN_DELETE_FromBlock(block, call);
+          WN_INSERT_BlockLast(block, n_call); 
+        }
+      }
+      else {
+        if (WN_operator(o_rhs) == OPR_LDID) {
+          n_rhs = lower_mldid(block, o_rhs, LOWER_MLDID_MSTID);
+        }
+        else if (WN_operator(o_rhs) == OPR_ILOAD) 
+          n_rhs = lower_miload(block, o_rhs, LOWER_MLDID_MSTID);
+        else n_rhs = o_rhs; 		// MLOAD
+  
+        // create an mstore
+        WN *awn = WN_CreateLdid(OPR_LDID, 
+			        TY_mtype(Ty_Table[tidx]), 
+			        TY_mtype(Ty_Table[tidx]),
+			        WN_idname_offset(first_formal), 
+			        WN_st(first_formal), 
+			        tidx);
+        WN *swn = WN_CopyNode(WN_kid1(n_rhs));
+        wn  = WN_CreateMstore (0, tidx, n_rhs, awn, swn);
+        WN_Set_Linenum(wn, current_srcpos);  // Bug 1268
+        WN_INSERT_BlockLast (block, wn);
+      }
+#ifdef TARG_X8664
+      // AMD64 ABI
+      // on return %rax will contain the address that has been 
+      // passed in by the caller in %rdi
+      // for 32 bit, the return address is in %eax
+      //
+      //   U8U8LDID 0 <2,3,_temp_.return...>
+      // U8STID 1 <1,5,.preg_I8>  T<5,.predef_I8,8> # $r1
+      // 
+ 
+      mtype = Is_Target_64bit() ? MTYPE_U8 : MTYPE_U4;
+      WN *ld = WN_CreateLdid(OPR_LDID, 
+                          TY_mtype(Ty_Table[tidx]), 
+                          TY_mtype(Ty_Table[tidx]),
+                          WN_idname_offset(first_formal), 
+                          WN_st(first_formal), 
+                          tidx);
+      WN *stid = WN_Stid( mtype, First_Int_Preg_Return_Offset,
+        Int_Preg, ST_type(Int_Preg), ld );
+      WN_Set_Linenum(stid, current_srcpos);
+      WN_INSERT_BlockLast( block, stid );
+      if (traceMload) {
+        fprintf(TFile, "Return_val lower [Return_Val_Preg]\n");
+        fdump_tree(TFile, block);
+      }
+#endif
     }
     else { // return in return registers
       INT32 i;
@@ -12261,6 +12331,10 @@
     }
   }
 
+  // lastly make a normal return statement
+  //
+  // RETURN 
+  //  
   WN *wn_return = WN_CreateReturn ();
   WN_Set_Linenum(wn_return, current_srcpos);  // Bug 1268
   if ( Cur_PU_Feedback )
Index: osprey/common/com/x8664/targ_sim.h
===================================================================
--- osprey/common/com/x8664/targ_sim.h	(revision 3553)
+++ osprey/common/com/x8664/targ_sim.h	(working copy)
@@ -110,6 +110,7 @@
     X86_64_X87_CLASS,
     X86_64_X87UP_CLASS,
     X86_64_SSEUP_CLASS,
+    X86_64_COMPLEX_X87_CLASS
 };
 
 #define MAX_CLASSES 4
Index: osprey/common/com/x8664/targ_sim.cxx
===================================================================
--- osprey/common/com/x8664/targ_sim.cxx	(revision 3553)
+++ osprey/common/com/x8664/targ_sim.cxx	(working copy)
@@ -178,9 +178,12 @@
   if (class1 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGER_CLASS)
     return X86_64_INTEGER_CLASS;
 
-  /* rule 5: if one of the classes is X87 or X87UP, result is MEMORY */
+  /* rule 5: if one of the classes is X87, X87UP, or COMPLEX_X87 class,
+     result is MEMORY */
   if (class1 == X86_64_X87_CLASS || class2 == X86_64_X87_CLASS ||
-      class1 == X86_64_X87UP_CLASS || class2 == X86_64_X87UP_CLASS)
+      class1 == X86_64_X87UP_CLASS || class2 == X86_64_X87UP_CLASS ||
+      class1 == X86_64_COMPLEX_X87_CLASS ||
+      class2 == X86_64_COMPLEX_X87_CLASS)
     return X86_64_MEMORY_CLASS;
   
   /* rule 6: otherwise, SSE class */
@@ -258,10 +261,13 @@
     case MTYPE_F8:
       classes[0] = X86_64_SSE_CLASS;
       return 1;
-    case MTYPE_C10:
     case MTYPE_F10:
       classes[0] = X86_64_X87_CLASS;
-      return 0;
+      classes[1] = X86_64_X87UP_CLASS;
+      return 2;
+    case MTYPE_C10:
+      classes[0] = X86_64_COMPLEX_X87_CLASS;
+      return 1;
     case MTYPE_C4:
       classes[0] = X86_64_SSE_CLASS;
       return 1;
@@ -577,9 +583,7 @@
         info.mtype [0] = mtype;
         info.preg  [0] = PR_first_reg(SIM_INFO.flt_results);
       }
-
       else {
-
         info.count     = 2;
         info.mtype [0] = Mtype_complex_to_real(mtype);
         info.mtype [1] = Mtype_complex_to_real(mtype);
@@ -600,9 +604,7 @@
         info.mtype [0] = mtype;
         info.preg  [0] = First_X87_Preg_Return_Offset;
       }
-
       else {
-
         info.count     = 2;
         info.mtype [0] = Mtype_complex_to_real(mtype);
         info.mtype [1] = Mtype_complex_to_real(mtype);
@@ -645,30 +647,56 @@
               info.return_via_first_arg = FALSE;
               info.count = n;
 
-	      if (classes[0] == X86_64_SSE_CLASS) {
-		info.mtype[0] = SIM_INFO.dbl_type;
-		info.preg[0] = PR_first_reg(SIM_INFO.dbl_results);
-		next_float_return_num = PR_last_reg(SIM_INFO.dbl_results);
-		next_int_return_num = PR_first_reg(SIM_INFO.int_results);
+	      if (classes[0] == X86_64_X87_CLASS) {
+                  info.count = 1;
+                  info.mtype[0] = MTYPE_F10;
+                  info.preg[0]  = First_X87_Preg_Return_Offset;
+              }
+              else if (classes[0] == X86_64_COMPLEX_X87_CLASS) {
+                  if (Is_Target_32bit()) {
+                    info.count = 0;
+                    info.return_via_first_arg = TRUE;
+                  }
+                  else if (level == Use_Simulated) {
+            
+                    info.count     = 1;
+                    info.mtype [0] = MTYPE_C10;
+                    info.preg  [0] = First_X87_Preg_Return_Offset;
+                  }
+                  else {
+                    info.count     = 2;
+                    info.mtype [0] = Mtype_complex_to_real(MTYPE_C10);
+                    info.mtype [1] = Mtype_complex_to_real(MTYPE_C10);
+                    info.preg  [0] = First_X87_Preg_Return_Offset;
+                    info.preg  [1] = Last_X87_Preg_Return_Offset;
+                  }
+              } 
+              else {
+                if (classes[0] == X86_64_SSE_CLASS) {
+		  info.mtype[0] = SIM_INFO.dbl_type;
+		  info.preg[0] = PR_first_reg(SIM_INFO.dbl_results);
+		  next_float_return_num = PR_last_reg(SIM_INFO.dbl_results);
+		  next_int_return_num = PR_first_reg(SIM_INFO.int_results);
+	        }
+	        else {
+		  info.mtype[0] = SIM_INFO.int_type;
+		  info.preg[0] = PR_first_reg(SIM_INFO.int_results);
+		  next_float_return_num = PR_first_reg(SIM_INFO.dbl_results);
+		  next_int_return_num = PR_last_reg(SIM_INFO.int_results);
+	        }
+  
+	        if (n > 1) {
+	          if (classes[1] == X86_64_SSE_CLASS) {
+		    info.mtype[1] = SIM_INFO.dbl_type;
+		    info.preg[1] = next_float_return_num;
+		  }
+		  else {
+		    info.mtype[1] = SIM_INFO.int_type;
+		    info.preg[1] = next_int_return_num;
+		  }
+	        }
 	      }
-	      else {
-		info.mtype[0] = SIM_INFO.int_type;
-		info.preg[0] = PR_first_reg(SIM_INFO.int_results);
-		next_float_return_num = PR_first_reg(SIM_INFO.dbl_results);
-		next_int_return_num = PR_last_reg(SIM_INFO.int_results);
-	      }
-
-	      if (n > 1) {
-	        if (classes[1] == X86_64_SSE_CLASS) {
-		  info.mtype[1] = SIM_INFO.dbl_type;
-		  info.preg[1] = next_float_return_num;
-		}
-		else {
-		  info.mtype[1] = SIM_INFO.int_type;
-		  info.preg[1] = next_int_return_num;
-		}
-	      }
-            }
+           }
         }
       }
       break;
@@ -934,6 +962,13 @@
 	  INT Save_Current_Float_Param_Num = Current_Float_Param_Num;
           ploc.size = TY_size (ty);
 	  INT n = Classify_Aggregate(ty, classes);
+          // handle X87 X87UP and COMPLEX_X87 cases
+          if (n != 0 && (classes[0] == X86_64_X87_CLASS ||
+                         classes[0] == X86_64_X87UP_CLASS ||
+                         classes[0] == X86_64_COMPLEX_X87_CLASS)) {
+             // x87, x87up and complex_x87 are passed in memory
+             n = 0;
+          }
 	  if (n > 0) { // passed in registers
 	    if (classes[0] == X86_64_SSE_CLASS) {
 	      ++Current_Float_Param_Num;
Index: osprey/wgen/wgen_spin_symbol.cxx
===================================================================
--- osprey/wgen/wgen_spin_symbol.cxx	(revision 3553)
+++ osprey/wgen/wgen_spin_symbol.cxx	(working copy)
@@ -852,6 +852,7 @@
 			Set_TY_is_union(idx);
 		}
 #ifdef KEY
+                // gs_aggregate_value_p is only set for c++
 		if (gs_aggregate_value_p(type_tree)) {
 			Set_TY_return_in_mem(idx);
 		}
Index: osprey/wgen/wgen_expr.cxx
===================================================================
--- osprey/wgen/wgen_expr.cxx	(revision 3553)
+++ osprey/wgen/wgen_expr.cxx	(working copy)
@@ -1354,6 +1354,26 @@
 } 
 #endif
 
+// ideally we should use ABI to check if the struct
+// type is returned in memory (which needs a hidden
+// parameter to pass the address), but ABI is not
+// available in wgen, so we just roughly estimate
+// by compile mode and type size
+BOOL Need_Hidden_Parameter(TY_IDX ty)
+{
+   if (TY_mtype(ty) != MTYPE_M)
+      return FALSE;
+
+#if defined(TARG_X8664)
+   if (Is_Target_32bit() || TY_size(ty) > 16)
+      return TRUE;
+#elif defined(TARG_IA64)
+   if (TY_size(ty) > 32)
+      return TRUE;
+#endif
+   return FALSE;
+}
+
 /* rhs_wn is the WN representing the rhs of a MODIFY_EXPR node; this
  * routine processes the lhs of the node and generate the appropriate
  * form of store.
@@ -1645,9 +1665,52 @@
 	if (volt) 
 	  Set_TY_is_volatile(hi_ty_idx);
 #endif
-        wn = WN_Stid (desc, ST_ofst(st) + component_offset + lhs_preg_num, st,
-		      hi_ty_idx, rhs_wn, field_id);
-        WGEN_Stmt_Append(wn, Get_Srcpos());
+        // if return type is struct type, and the result is used in lhs,
+        // then the Mreturn temp variable is not needed, the address of lhs 
+        // should be passed as hidden parameter
+
+        // the same pattern match is used in different places
+        // to handle return struct value, init struct value and
+        // assign struct value from a function call, if anything
+        // changed in the pattern in future, we need to change all 
+        // these places, just look for return_val_transformed.
+        bool return_val_transformed =  false;
+        if (WN_operator(rhs_wn) == OPR_COMMA &&
+              WN_rtype(rhs_wn) == MTYPE_M) {
+           WN *block = WN_kid0(rhs_wn);
+           WN *ldidTemp = WN_kid1(rhs_wn);
+           if (WN_operator(ldidTemp) == OPR_LDID &&
+                 WN_operator(block) == OPR_BLOCK) {
+
+              // replace MSTID _temp_.Mreturn.1
+              // with    MSTID lhs
+              WN *stidTemp = WN_last(block);
+              if (WN_operator(stidTemp) == OPR_STID &&
+                     Need_Hidden_Parameter(WN_ty(ldidTemp))) {
+                 // remove block from the rhs_wn
+                 WN_kid0(rhs_wn) = 0;
+                 WN_DELETE_Tree(rhs_wn);
+                 rhs_wn = block;
+
+                 WN *kid = WN_kid0(stidTemp);
+                 WN_kid0(stidTemp) = 0;
+                 WN_DELETE_FromBlock(block, stidTemp);
+
+                 WN *stid = WN_Stid (desc, 
+                       ST_ofst(st) + component_offset + lhs_preg_num,
+                       st, hi_ty_idx, kid, field_id);
+                 WN_INSERT_BlockLast(block, stid);
+                 WGEN_Stmt_Append(block, Get_Srcpos());
+                 wn = block;
+                 return_val_transformed = true;
+              }
+           }
+        }
+        if (!return_val_transformed) {
+           wn = WN_Stid (desc, ST_ofst(st) + component_offset + lhs_preg_num, st,
+		         hi_ty_idx, rhs_wn, field_id);
+           WGEN_Stmt_Append(wn, Get_Srcpos());
+        }
 #if defined(TARG_SL)
         if (need_append) {
           WN *ldid_wn;
@@ -1869,7 +1932,6 @@
         wn = NULL;
       }
       else {
-#ifdef KEY
 	// The store target could be an INDIRECT_REF that kg++fe added to make
 	// the store write to the area pointed to by the fake first param.  If
 	// so, check that copying the object does not involve a copy
@@ -1889,31 +1951,67 @@
         if (Current_Entry_WN() != NULL) {
             first_formal = WN_formal(Current_Entry_WN(), 0);
         }
-        if (TY_return_in_mem(hi_ty_idx) &&
-	    field_id == 0 &&
-	    // See if it is an indirect ref of the fake first parm.
-	    // bug fix for OSP_314
-	    //
-	    first_formal != NULL && (WN_operator(first_formal) != OPR_BLOCK) &&
-	    gs_tree_code(addr) == GS_VAR_DECL &&
-	    DECL_ST(addr) == WN_st(first_formal)) {
-	  FmtAssert(TY_mtype(hi_ty_idx) == MTYPE_M,
-		    ("WGEN_Lhs_Of_Modify_Expr: return_in_mem type not MTYPE_M"));
-	  gs_t ptr_type = gs_tree_type(gs_tree_operand(lhs, 0));
-	  gs_t type = gs_tree_type(ptr_type);
-	  FmtAssert(gs_tree_code(ptr_type) == GS_POINTER_TYPE,
-	    ("WGEN_Lhs_Of_Modify_Expr: INDIRECT_REF opnd0 is not POINTER_TYPE"));
-	  FmtAssert(component_offset == 0,
-		    ("WGEN_Lhs_Of_Modify_Expr: component_offset nonzero"));
-	  TY_IDX tidx = Get_TY(ptr_type);
-	  // Check object has no copy constructor.
-	  FmtAssert(!WGEN_has_copy_constructor(type),
-	      ("WGEN_Lhs_Of_Modify_Expr: object needs copy constructor"));
+        // if return type is struct type, and the result is used in lhs,
+        // then the Mreturn temp variable is not needed, the address of lhs 
+        // should be passed as hidden parameter
+        bool return_val_transformed = false;
+        if (WN_operator(rhs_wn) == OPR_COMMA &&
+              WN_rtype(rhs_wn) == MTYPE_M) {
+           WN *block = WN_kid0(rhs_wn);
+           WN *ldidTemp = WN_kid1(rhs_wn);
+           if (WN_operator(ldidTemp) == OPR_LDID &&
+                 WN_operator(block) == OPR_BLOCK) {
+
+              // replace MSTID _temp_.Mreturn.1
+              // with    MISTORE lhs
+              WN *stidTemp = WN_last(block);
+              if (WN_operator(stidTemp) == OPR_STID &&
+                     Need_Hidden_Parameter(WN_ty(ldidTemp))) {
+                 // remove block from the rhs_wn
+                 WN_kid0(rhs_wn) = 0;
+                 WN_DELETE_Tree(rhs_wn);
+                 rhs_wn = block;
+
+                 WN *kid = WN_kid0(stidTemp);
+                 WN_kid0(stidTemp) = 0;
+                 WN_DELETE_FromBlock(block, stidTemp);
+
+                 WN *istore = 
+                    WN_CreateIstore(OPR_ISTORE, MTYPE_V, desc, component_offset, 
+			       Make_Pointer_Type (hi_ty_idx, FALSE),
+			       kid, addr_wn, field_id);
+                 WN_INSERT_BlockLast(block, istore);
+                 wn = block;
+                 return_val_transformed = true;
+              }
+           }
         }
-#endif
-        wn = WN_CreateIstore(OPR_ISTORE, MTYPE_V, desc, component_offset, 
-			     Make_Pointer_Type (hi_ty_idx, FALSE),
-			     rhs_wn, addr_wn, field_id);
+        if (!return_val_transformed) {
+          if (TY_return_in_mem(hi_ty_idx) &&
+	      field_id == 0 &&
+	      // See if it is an indirect ref of the fake first parm.
+	      // bug fix for OSP_314
+	      //
+	      first_formal != NULL && (WN_operator(first_formal) != OPR_BLOCK) &&
+	      gs_tree_code(addr) == GS_VAR_DECL &&
+	      DECL_ST(addr) == WN_st(first_formal)) {
+	    FmtAssert(TY_mtype(hi_ty_idx) == MTYPE_M,
+		      ("WGEN_Lhs_Of_Modify_Expr: return_in_mem type not MTYPE_M"));
+	    gs_t ptr_type = gs_tree_type(gs_tree_operand(lhs, 0));
+	    gs_t type = gs_tree_type(ptr_type);
+	    FmtAssert(gs_tree_code(ptr_type) == GS_POINTER_TYPE,
+	      ("WGEN_Lhs_Of_Modify_Expr: INDIRECT_REF opnd0 is not POINTER_TYPE"));
+	    FmtAssert(component_offset == 0,
+		      ("WGEN_Lhs_Of_Modify_Expr: component_offset nonzero"));
+	    TY_IDX tidx = Get_TY(ptr_type);
+	    // Check object has no copy constructor.
+	    FmtAssert(!WGEN_has_copy_constructor(type),
+	        ("WGEN_Lhs_Of_Modify_Expr: object needs copy constructor"));
+          }
+          wn = WN_CreateIstore(OPR_ISTORE, MTYPE_V, desc, component_offset, 
+			       Make_Pointer_Type (hi_ty_idx, FALSE),
+			       rhs_wn, addr_wn, field_id);
+        }
 #ifdef TARG_SL
         /* so far I only handle *p++=... cases, change this case to 
          *   *p = ... ;
@@ -10126,6 +10224,13 @@
 	  else {
 	    enum X86_64_PARM_CLASS classes[MAX_CLASSES];
 	    INT n = Classify_Aggregate(ty_idx, classes);
+            // handle X87 X87UP and COMPLEX_X87 cases
+            if (n != 0 && (classes[0] == X86_64_X87_CLASS ||
+                           classes[0] == X86_64_X87UP_CLASS ||
+                           classes[0] == X86_64_COMPLEX_X87_CLASS)) {
+               // x87, x87up and complex_x87 are passed in memory
+               n = 0;
+            }
 	    if (n == 0) { /* can only pass in memory */
 	      /* increment overflow_arg_area pointer by 8 */
 	      INT delta = ((TY_size(ty_idx) + 7) / 8) * 8;
Index: osprey/wgen/wgen_expr.h
===================================================================
--- osprey/wgen/wgen_expr.h	(revision 3553)
+++ osprey/wgen/wgen_expr.h	(working copy)
@@ -127,6 +127,7 @@
 extern void WGEN_Expand_Start_Stmt_Expr (gs_t);
 extern void WGEN_Expand_End_Stmt_Expr (gs_t);
 extern gs_t first_in_compound_expr(gs_t);
+extern BOOL Need_Hidden_Parameter(TY_IDX ty);
 
 #ifdef __cplusplus
 }
Index: osprey/wgen/wgen_decl.cxx
===================================================================
--- osprey/wgen/wgen_decl.cxx	(revision 3553)
+++ osprey/wgen/wgen_decl.cxx	(working copy)
@@ -2817,14 +2817,62 @@
         Is_True( (WN_operator(target) == OPR_LDID ||
                   WN_operator(target) == OPR_LDA),
                  ("Invalid operator for target"));
-        if( WN_operator(target) == OPR_LDID ) {
+        // handle struct init by a return value of a call,
+        // remove redundant temp variable copy
+        
+        // if return type is struct type, and the result is used in lhs,
+        // then the Mreturn temp variable is not needed, the address of lhs 
+        // should be passed as hidden parameter
+        bool return_val_transformed = false;
+        WN *block = NULL;
+        if (WN_operator(init_wn) == OPR_COMMA &&
+              WN_rtype(init_wn) == MTYPE_M) {
+           WN *block = WN_kid0(init_wn);
+           WN *ldidTemp = WN_kid1(init_wn);
+           if (WN_operator(ldidTemp) == OPR_LDID &&
+                 WN_operator(block) == OPR_BLOCK) {
+
+              // replace MSTID _temp_.Mreturn.1
+              // with    MSTID lhs
+              WN *stidTemp = WN_last(block);
+              if (WN_operator(stidTemp) == OPR_STID &&
+                    Need_Hidden_Parameter(WN_ty(ldidTemp))) {
+                 // remove block from the init_wn
+                 WN_kid0(init_wn) = 0;
+                 WN_DELETE_Tree(init_wn);
+                 init_wn = block;
+                 
+                 WN *kid = WN_kid0(stidTemp);
+                 WN_kid0(stidTemp) = 0;
+                 WN_DELETE_FromBlock(block, stidTemp);
+
+                 if( WN_operator(target) == OPR_LDID ) {
+                   TY_IDX ptr_ty = Make_Pointer_Type(ty);
+                   wn = WN_Istore(mtype, offset, ptr_ty, target, kid, field_id);
+                 }
+                 else { // OPR_LDA
+                   ST *st = WN_st(target);
+                   wn = WN_Stid (mtype, WN_lda_offset(target) + offset, st,
+                                 ty, kid, field_id); 
+                 }
+                 WN_INSERT_BlockLast(block, wn);
+                 wn = block;
+                 return_val_transformed = true;
+              }
+           }
+           else
+              block = NULL; // not a block
+        }
+        if (!return_val_transformed) {
+          if( WN_operator(target) == OPR_LDID ) {
             TY_IDX ptr_ty = Make_Pointer_Type(ty);
             wn = WN_Istore(mtype, offset, ptr_ty, target, init_wn, field_id);
-        }
-        else { // OPR_LDA
+          }
+          else { // OPR_LDA
             ST *st = WN_st(target);
             wn = WN_Stid (mtype, WN_lda_offset(target) + offset, st,
                           ty, init_wn, field_id); 
+          }
         }
 #else
 	WN *wn = WN_Stid (mtype, ST_ofst(st) + offset, st,
------------------------------------------------------------------------------
Achieve unprecedented app performance and reliability
What every C/C++ and Fortran developer should know.
Learn how Intel has extended the reach of its next-generation tools
to help boost performance applications - inlcuding clusters.
http://p.sf.net/sfu/intel-dev2devmay
_______________________________________________
Open64-devel mailing list
Open64-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/open64-devel

Reply via email to