(Attachment added this time.)
On Mon, May 16, 2011 at 5:01 PM, David Coakley <dcoak...@gmail.com> wrote:
> Could a gatekeeper review the attached patch? It fixes some AMD64 ABI
> compatibility problems and also eliminates some unnecessary struct
> copies. The second part should apply to all targets.
>
> Here is the proposed log message:
>
>
> Improve AMD64 ABI compliance and avoid unnecessary struct copies.
>
> For the following code, the compiler generated unnecessary struct copies
> for a return value that has a size too big to be passed in registers.
>
> typedef struct { char big[1024]; } C;
> C gc;
> extern C bar9 (void);
> extern C check9 (void) { return bar9 (); }
>
> In this case there were two copies and one temporary variable generated.
> After this change the compiler does not generate any extra copies.
>
> According to the AMD64 ABI, the caller provides space for the return
> value in a hidden first argument and this address is returned in %rax.
> Previously the compiler was not following this rule.
>
> The change handles the similar cases of initialization by function
> call ("C c = bar9();") and assignment to a pointer ("*cp = bar9()").
>
> Also, a complex long double field is now returned in the register pair
> (%st0, %st1) as specified by the AMD64 ABI.
>
>
>
> I am still looking for a review for the patch I posted last week (May
> 10) as well. Thanks,
>
> -David Coakley / AMD Open Source Compiler Engineering
>
Index: osprey/be/cg/reg_live.cxx
===================================================================
--- osprey/be/cg/reg_live.cxx (revision 3553)
+++ osprey/be/cg/reg_live.cxx (working copy)
@@ -268,6 +268,17 @@
retpreg[i] = RETURN_INFO_preg (return_info, i);
Add_PREG_To_REGSET (retpreg[i], return_regs);
}
+#ifdef TARG_X8664
+ // AMD64 ABI
+ // on return %rax will contain the address that has been
+ // passed in by the caller in %rdi
+ // for 32 bit, the return address is in %eax
+ if (RETURN_INFO_return_via_first_arg(return_info)) {
+ FmtAssert (RETURN_INFO_count(return_info) == 0,
+ ("Compute_Return_Regs: more return registers than can handle"));
+ Add_PREG_To_REGSET (First_Int_Preg_Return_Offset, return_regs);
+ }
+#endif
#if defined(TARG_SL)
if (MTYPE_byte_size(TY_mtype(TY_ret_type(call_ty))) == 8) { //I8/U8/F8
FmtAssert (RETURN_INFO_count(return_info) <= 1,
Index: osprey/be/com/wn_lower.cxx
===================================================================
--- osprey/be/com/wn_lower.cxx (revision 3553)
+++ osprey/be/com/wn_lower.cxx (working copy)
@@ -7331,7 +7331,8 @@
ST *preg_st;
WN *n_rhs;
WN *wn = NULL; // init to prevent upward-exposed use
- RETURN_INFO return_info = Get_Return_Info(WN_ty(tree), Complex_Not_Simulated
+ RETURN_INFO return_info = Get_Return_Info(WN_ty(WN_kid0(tree)),
+ Complex_Not_Simulated
#ifdef TARG_X8664
, last_call_ff2c_abi
#endif
@@ -7353,7 +7354,7 @@
if (WN_store_offset(tree) != 0) { // generate an ADD node for the offset
WN *iwn = WN_CreateIntconst(OPR_INTCONST, Pointer_Mtype, MTYPE_V,
WN_store_offset(tree));
- awn = WN_CreateExp2(OPR_ADD, Pointer_Mtype, Pointer_Mtype, awn, iwn);
+ awn = WN_CreateExp2(OPR_ADD, Pointer_Mtype, MTYPE_V, awn, iwn);
}
awn = lower_expr(block, awn, actions);
WN *n_call = add_fake_parm(call, awn, WN_ty(tree));
@@ -12132,25 +12133,94 @@
WN *n_rhs;
// fix rhs
- if (WN_operator(o_rhs) == OPR_LDID)
- n_rhs = lower_mldid(block, o_rhs, LOWER_MLDID_MSTID);
- else if (WN_operator(o_rhs) == OPR_ILOAD)
- n_rhs = lower_miload(block, o_rhs, LOWER_MLDID_MSTID);
- else n_rhs = o_rhs; // MLOAD
-
- // create an mstore
+
WN *first_formal = WN_formal(current_function, 0);
TY_IDX tidx = ST_type(WN_st(first_formal));
- WN *awn = WN_CreateLdid(OPR_LDID,
- TY_mtype(Ty_Table[tidx]),
- TY_mtype(Ty_Table[tidx]),
- WN_idname_offset(first_formal),
- WN_st(first_formal),
- tidx);
- WN *swn = WN_CopyNode(WN_kid1(n_rhs));
- wn = WN_CreateMstore (0, tidx, n_rhs, awn, swn);
- WN_Set_Linenum(wn, current_srcpos); // Bug 1268
- WN_INSERT_BlockLast (block, wn);
+ if (WN_operator(o_rhs) == OPR_LDID &&
+ WN_st(o_rhs) == Return_Val_Preg) {
+ // the Return_Val_Preg must be returned by previous call
+ // so we need to get the previous MCALL statement and
+ // fake first parm
+ //
+ // MCALL 126 <1,51,bar9>
+ // MMLDID -1 <1,49,.preg_return_val> T<53,.anonymous.1,1>
+ // MRETURN_VAL
+ //
+ // ==>
+ //
+ // U8LDID 0 <2,3,_temp_.return...>
+ // U8PARM 33 T<55,anon_ptr.,8>
+ // VCALL 126 <1,51,bar9>
+ //
+ WN *call = WN_last(block);
+ if ((WN_operator(call) == OPR_CALL || WN_operator(call) == OPR_ICALL ||
+ WN_operator(call) == OPR_PICCALL) && WN_rtype(call) == MTYPE_M) {
+ TY_IDX prototype;
+ if (WN_operator(call) == OPR_ICALL)
+ prototype = WN_ty(call);
+ else {
+ ST_IDX func_stidx = WN_st_idx(call);
+ PU_IDX puidx = ST_pu(St_Table[func_stidx]);
+ prototype = PU_prototype(Pu_Table[puidx]);
+ }
+ WN *awn = WN_CreateLdid(OPR_LDID,
+ TY_mtype(Ty_Table[tidx]),
+ TY_mtype(Ty_Table[tidx]),
+ WN_idname_offset(first_formal),
+ WN_st(first_formal),
+ tidx);
+ awn = lower_expr(block, awn, actions);
+ WN *n_call = add_fake_parm(call, awn, WN_ty(awn));
+ WN_DELETE_FromBlock(block, call);
+ WN_INSERT_BlockLast(block, n_call);
+ }
+ }
+ else {
+ if (WN_operator(o_rhs) == OPR_LDID) {
+ n_rhs = lower_mldid(block, o_rhs, LOWER_MLDID_MSTID);
+ }
+ else if (WN_operator(o_rhs) == OPR_ILOAD)
+ n_rhs = lower_miload(block, o_rhs, LOWER_MLDID_MSTID);
+ else n_rhs = o_rhs; // MLOAD
+
+ // create an mstore
+ WN *awn = WN_CreateLdid(OPR_LDID,
+ TY_mtype(Ty_Table[tidx]),
+ TY_mtype(Ty_Table[tidx]),
+ WN_idname_offset(first_formal),
+ WN_st(first_formal),
+ tidx);
+ WN *swn = WN_CopyNode(WN_kid1(n_rhs));
+ wn = WN_CreateMstore (0, tidx, n_rhs, awn, swn);
+ WN_Set_Linenum(wn, current_srcpos); // Bug 1268
+ WN_INSERT_BlockLast (block, wn);
+ }
+#ifdef TARG_X8664
+ // AMD64 ABI
+ // on return %rax will contain the address that has been
+ // passed in by the caller in %rdi
+ // for 32 bit, the return address is in %eax
+ //
+ // U8U8LDID 0 <2,3,_temp_.return...>
+ // U8STID 1 <1,5,.preg_I8> T<5,.predef_I8,8> # $r1
+ //
+
+ mtype = Is_Target_64bit() ? MTYPE_U8 : MTYPE_U4;
+ WN *ld = WN_CreateLdid(OPR_LDID,
+ TY_mtype(Ty_Table[tidx]),
+ TY_mtype(Ty_Table[tidx]),
+ WN_idname_offset(first_formal),
+ WN_st(first_formal),
+ tidx);
+ WN *stid = WN_Stid( mtype, First_Int_Preg_Return_Offset,
+ Int_Preg, ST_type(Int_Preg), ld );
+ WN_Set_Linenum(stid, current_srcpos);
+ WN_INSERT_BlockLast( block, stid );
+ if (traceMload) {
+ fprintf(TFile, "Return_val lower [Return_Val_Preg]\n");
+ fdump_tree(TFile, block);
+ }
+#endif
}
else { // return in return registers
INT32 i;
@@ -12261,6 +12331,10 @@
}
}
+ // lastly make a normal return statement
+ //
+ // RETURN
+ //
WN *wn_return = WN_CreateReturn ();
WN_Set_Linenum(wn_return, current_srcpos); // Bug 1268
if ( Cur_PU_Feedback )
Index: osprey/common/com/x8664/targ_sim.h
===================================================================
--- osprey/common/com/x8664/targ_sim.h (revision 3553)
+++ osprey/common/com/x8664/targ_sim.h (working copy)
@@ -110,6 +110,7 @@
X86_64_X87_CLASS,
X86_64_X87UP_CLASS,
X86_64_SSEUP_CLASS,
+ X86_64_COMPLEX_X87_CLASS
};
#define MAX_CLASSES 4
Index: osprey/common/com/x8664/targ_sim.cxx
===================================================================
--- osprey/common/com/x8664/targ_sim.cxx (revision 3553)
+++ osprey/common/com/x8664/targ_sim.cxx (working copy)
@@ -178,9 +178,12 @@
if (class1 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGER_CLASS)
return X86_64_INTEGER_CLASS;
- /* rule 5: if one of the classes is X87 or X87UP, result is MEMORY */
+ /* rule 5: if one of the classes is X87, X87UP, or COMPLEX_X87 class,
+ result is MEMORY */
if (class1 == X86_64_X87_CLASS || class2 == X86_64_X87_CLASS ||
- class1 == X86_64_X87UP_CLASS || class2 == X86_64_X87UP_CLASS)
+ class1 == X86_64_X87UP_CLASS || class2 == X86_64_X87UP_CLASS ||
+ class1 == X86_64_COMPLEX_X87_CLASS ||
+ class2 == X86_64_COMPLEX_X87_CLASS)
return X86_64_MEMORY_CLASS;
/* rule 6: otherwise, SSE class */
@@ -258,10 +261,13 @@
case MTYPE_F8:
classes[0] = X86_64_SSE_CLASS;
return 1;
- case MTYPE_C10:
case MTYPE_F10:
classes[0] = X86_64_X87_CLASS;
- return 0;
+ classes[1] = X86_64_X87UP_CLASS;
+ return 2;
+ case MTYPE_C10:
+ classes[0] = X86_64_COMPLEX_X87_CLASS;
+ return 1;
case MTYPE_C4:
classes[0] = X86_64_SSE_CLASS;
return 1;
@@ -577,9 +583,7 @@
info.mtype [0] = mtype;
info.preg [0] = PR_first_reg(SIM_INFO.flt_results);
}
-
else {
-
info.count = 2;
info.mtype [0] = Mtype_complex_to_real(mtype);
info.mtype [1] = Mtype_complex_to_real(mtype);
@@ -600,9 +604,7 @@
info.mtype [0] = mtype;
info.preg [0] = First_X87_Preg_Return_Offset;
}
-
else {
-
info.count = 2;
info.mtype [0] = Mtype_complex_to_real(mtype);
info.mtype [1] = Mtype_complex_to_real(mtype);
@@ -645,30 +647,56 @@
info.return_via_first_arg = FALSE;
info.count = n;
- if (classes[0] == X86_64_SSE_CLASS) {
- info.mtype[0] = SIM_INFO.dbl_type;
- info.preg[0] = PR_first_reg(SIM_INFO.dbl_results);
- next_float_return_num = PR_last_reg(SIM_INFO.dbl_results);
- next_int_return_num = PR_first_reg(SIM_INFO.int_results);
+ if (classes[0] == X86_64_X87_CLASS) {
+ info.count = 1;
+ info.mtype[0] = MTYPE_F10;
+ info.preg[0] = First_X87_Preg_Return_Offset;
+ }
+ else if (classes[0] == X86_64_COMPLEX_X87_CLASS) {
+ if (Is_Target_32bit()) {
+ info.count = 0;
+ info.return_via_first_arg = TRUE;
+ }
+ else if (level == Use_Simulated) {
+
+ info.count = 1;
+ info.mtype [0] = MTYPE_C10;
+ info.preg [0] = First_X87_Preg_Return_Offset;
+ }
+ else {
+ info.count = 2;
+ info.mtype [0] = Mtype_complex_to_real(MTYPE_C10);
+ info.mtype [1] = Mtype_complex_to_real(MTYPE_C10);
+ info.preg [0] = First_X87_Preg_Return_Offset;
+ info.preg [1] = Last_X87_Preg_Return_Offset;
+ }
+ }
+ else {
+ if (classes[0] == X86_64_SSE_CLASS) {
+ info.mtype[0] = SIM_INFO.dbl_type;
+ info.preg[0] = PR_first_reg(SIM_INFO.dbl_results);
+ next_float_return_num = PR_last_reg(SIM_INFO.dbl_results);
+ next_int_return_num = PR_first_reg(SIM_INFO.int_results);
+ }
+ else {
+ info.mtype[0] = SIM_INFO.int_type;
+ info.preg[0] = PR_first_reg(SIM_INFO.int_results);
+ next_float_return_num = PR_first_reg(SIM_INFO.dbl_results);
+ next_int_return_num = PR_last_reg(SIM_INFO.int_results);
+ }
+
+ if (n > 1) {
+ if (classes[1] == X86_64_SSE_CLASS) {
+ info.mtype[1] = SIM_INFO.dbl_type;
+ info.preg[1] = next_float_return_num;
+ }
+ else {
+ info.mtype[1] = SIM_INFO.int_type;
+ info.preg[1] = next_int_return_num;
+ }
+ }
}
- else {
- info.mtype[0] = SIM_INFO.int_type;
- info.preg[0] = PR_first_reg(SIM_INFO.int_results);
- next_float_return_num = PR_first_reg(SIM_INFO.dbl_results);
- next_int_return_num = PR_last_reg(SIM_INFO.int_results);
- }
-
- if (n > 1) {
- if (classes[1] == X86_64_SSE_CLASS) {
- info.mtype[1] = SIM_INFO.dbl_type;
- info.preg[1] = next_float_return_num;
- }
- else {
- info.mtype[1] = SIM_INFO.int_type;
- info.preg[1] = next_int_return_num;
- }
- }
- }
+ }
}
}
break;
@@ -934,6 +962,13 @@
INT Save_Current_Float_Param_Num = Current_Float_Param_Num;
ploc.size = TY_size (ty);
INT n = Classify_Aggregate(ty, classes);
+ // handle X87 X87UP and COMPLEX_X87 cases
+ if (n != 0 && (classes[0] == X86_64_X87_CLASS ||
+ classes[0] == X86_64_X87UP_CLASS ||
+ classes[0] == X86_64_COMPLEX_X87_CLASS)) {
+ // x87, x87up and complex_x87 are passed in memory
+ n = 0;
+ }
if (n > 0) { // passed in registers
if (classes[0] == X86_64_SSE_CLASS) {
++Current_Float_Param_Num;
Index: osprey/wgen/wgen_spin_symbol.cxx
===================================================================
--- osprey/wgen/wgen_spin_symbol.cxx (revision 3553)
+++ osprey/wgen/wgen_spin_symbol.cxx (working copy)
@@ -852,6 +852,7 @@
Set_TY_is_union(idx);
}
#ifdef KEY
+ // gs_aggregate_value_p is only set for c++
if (gs_aggregate_value_p(type_tree)) {
Set_TY_return_in_mem(idx);
}
Index: osprey/wgen/wgen_expr.cxx
===================================================================
--- osprey/wgen/wgen_expr.cxx (revision 3553)
+++ osprey/wgen/wgen_expr.cxx (working copy)
@@ -1354,6 +1354,26 @@
}
#endif
+// ideally we should use ABI to check if the struct
+// type is returned in memory (which needs a hidden
+// parameter to pass the address), but ABI is not
+// available in wgen, so we just roughly estimate
+// by compile mode and type size
+BOOL Need_Hidden_Parameter(TY_IDX ty)
+{
+ if (TY_mtype(ty) != MTYPE_M)
+ return FALSE;
+
+#if defined(TARG_X8664)
+ if (Is_Target_32bit() || TY_size(ty) > 16)
+ return TRUE;
+#elif defined(TARG_IA64)
+ if (TY_size(ty) > 32)
+ return TRUE;
+#endif
+ return FALSE;
+}
+
/* rhs_wn is the WN representing the rhs of a MODIFY_EXPR node; this
* routine processes the lhs of the node and generate the appropriate
* form of store.
@@ -1645,9 +1665,52 @@
if (volt)
Set_TY_is_volatile(hi_ty_idx);
#endif
- wn = WN_Stid (desc, ST_ofst(st) + component_offset + lhs_preg_num, st,
- hi_ty_idx, rhs_wn, field_id);
- WGEN_Stmt_Append(wn, Get_Srcpos());
+ // if return type is struct type, and the result is used in lhs,
+ // then the Mreturn temp variable is not needed, the address of lhs
+ // should be passed as hidden parameter
+
+ // the same pattern match is used in different places
+ // to handle return struct value, init struct value and
+ // assign struct value from a function call, if anything
+ // changed in the pattern in future, we need to change all
+ // these places, just look for return_val_transformed.
+ bool return_val_transformed = false;
+ if (WN_operator(rhs_wn) == OPR_COMMA &&
+ WN_rtype(rhs_wn) == MTYPE_M) {
+ WN *block = WN_kid0(rhs_wn);
+ WN *ldidTemp = WN_kid1(rhs_wn);
+ if (WN_operator(ldidTemp) == OPR_LDID &&
+ WN_operator(block) == OPR_BLOCK) {
+
+ // replace MSTID _temp_.Mreturn.1
+ // with MSTID lhs
+ WN *stidTemp = WN_last(block);
+ if (WN_operator(stidTemp) == OPR_STID &&
+ Need_Hidden_Parameter(WN_ty(ldidTemp))) {
+ // remove block from the rhs_wn
+ WN_kid0(rhs_wn) = 0;
+ WN_DELETE_Tree(rhs_wn);
+ rhs_wn = block;
+
+ WN *kid = WN_kid0(stidTemp);
+ WN_kid0(stidTemp) = 0;
+ WN_DELETE_FromBlock(block, stidTemp);
+
+ WN *stid = WN_Stid (desc,
+ ST_ofst(st) + component_offset + lhs_preg_num,
+ st, hi_ty_idx, kid, field_id);
+ WN_INSERT_BlockLast(block, stid);
+ WGEN_Stmt_Append(block, Get_Srcpos());
+ wn = block;
+ return_val_transformed = true;
+ }
+ }
+ }
+ if (!return_val_transformed) {
+ wn = WN_Stid (desc, ST_ofst(st) + component_offset + lhs_preg_num, st,
+ hi_ty_idx, rhs_wn, field_id);
+ WGEN_Stmt_Append(wn, Get_Srcpos());
+ }
#if defined(TARG_SL)
if (need_append) {
WN *ldid_wn;
@@ -1869,7 +1932,6 @@
wn = NULL;
}
else {
-#ifdef KEY
// The store target could be an INDIRECT_REF that kg++fe added to make
// the store write to the area pointed to by the fake first param. If
// so, check that copying the object does not involve a copy
@@ -1889,31 +1951,67 @@
if (Current_Entry_WN() != NULL) {
first_formal = WN_formal(Current_Entry_WN(), 0);
}
- if (TY_return_in_mem(hi_ty_idx) &&
- field_id == 0 &&
- // See if it is an indirect ref of the fake first parm.
- // bug fix for OSP_314
- //
- first_formal != NULL && (WN_operator(first_formal) != OPR_BLOCK) &&
- gs_tree_code(addr) == GS_VAR_DECL &&
- DECL_ST(addr) == WN_st(first_formal)) {
- FmtAssert(TY_mtype(hi_ty_idx) == MTYPE_M,
- ("WGEN_Lhs_Of_Modify_Expr: return_in_mem type not MTYPE_M"));
- gs_t ptr_type = gs_tree_type(gs_tree_operand(lhs, 0));
- gs_t type = gs_tree_type(ptr_type);
- FmtAssert(gs_tree_code(ptr_type) == GS_POINTER_TYPE,
- ("WGEN_Lhs_Of_Modify_Expr: INDIRECT_REF opnd0 is not POINTER_TYPE"));
- FmtAssert(component_offset == 0,
- ("WGEN_Lhs_Of_Modify_Expr: component_offset nonzero"));
- TY_IDX tidx = Get_TY(ptr_type);
- // Check object has no copy constructor.
- FmtAssert(!WGEN_has_copy_constructor(type),
- ("WGEN_Lhs_Of_Modify_Expr: object needs copy constructor"));
+ // if return type is struct type, and the result is used in lhs,
+ // then the Mreturn temp variable is not needed, the address of lhs
+ // should be passed as hidden parameter
+ bool return_val_transformed = false;
+ if (WN_operator(rhs_wn) == OPR_COMMA &&
+ WN_rtype(rhs_wn) == MTYPE_M) {
+ WN *block = WN_kid0(rhs_wn);
+ WN *ldidTemp = WN_kid1(rhs_wn);
+ if (WN_operator(ldidTemp) == OPR_LDID &&
+ WN_operator(block) == OPR_BLOCK) {
+
+ // replace MSTID _temp_.Mreturn.1
+ // with MISTORE lhs
+ WN *stidTemp = WN_last(block);
+ if (WN_operator(stidTemp) == OPR_STID &&
+ Need_Hidden_Parameter(WN_ty(ldidTemp))) {
+ // remove block from the rhs_wn
+ WN_kid0(rhs_wn) = 0;
+ WN_DELETE_Tree(rhs_wn);
+ rhs_wn = block;
+
+ WN *kid = WN_kid0(stidTemp);
+ WN_kid0(stidTemp) = 0;
+ WN_DELETE_FromBlock(block, stidTemp);
+
+ WN *istore =
+ WN_CreateIstore(OPR_ISTORE, MTYPE_V, desc, component_offset,
+ Make_Pointer_Type (hi_ty_idx, FALSE),
+ kid, addr_wn, field_id);
+ WN_INSERT_BlockLast(block, istore);
+ wn = block;
+ return_val_transformed = true;
+ }
+ }
}
-#endif
- wn = WN_CreateIstore(OPR_ISTORE, MTYPE_V, desc, component_offset,
- Make_Pointer_Type (hi_ty_idx, FALSE),
- rhs_wn, addr_wn, field_id);
+ if (!return_val_transformed) {
+ if (TY_return_in_mem(hi_ty_idx) &&
+ field_id == 0 &&
+ // See if it is an indirect ref of the fake first parm.
+ // bug fix for OSP_314
+ //
+ first_formal != NULL && (WN_operator(first_formal) != OPR_BLOCK) &&
+ gs_tree_code(addr) == GS_VAR_DECL &&
+ DECL_ST(addr) == WN_st(first_formal)) {
+ FmtAssert(TY_mtype(hi_ty_idx) == MTYPE_M,
+ ("WGEN_Lhs_Of_Modify_Expr: return_in_mem type not MTYPE_M"));
+ gs_t ptr_type = gs_tree_type(gs_tree_operand(lhs, 0));
+ gs_t type = gs_tree_type(ptr_type);
+ FmtAssert(gs_tree_code(ptr_type) == GS_POINTER_TYPE,
+ ("WGEN_Lhs_Of_Modify_Expr: INDIRECT_REF opnd0 is not POINTER_TYPE"));
+ FmtAssert(component_offset == 0,
+ ("WGEN_Lhs_Of_Modify_Expr: component_offset nonzero"));
+ TY_IDX tidx = Get_TY(ptr_type);
+ // Check object has no copy constructor.
+ FmtAssert(!WGEN_has_copy_constructor(type),
+ ("WGEN_Lhs_Of_Modify_Expr: object needs copy constructor"));
+ }
+ wn = WN_CreateIstore(OPR_ISTORE, MTYPE_V, desc, component_offset,
+ Make_Pointer_Type (hi_ty_idx, FALSE),
+ rhs_wn, addr_wn, field_id);
+ }
#ifdef TARG_SL
/* so far I only handle *p++=... cases, change this case to
* *p = ... ;
@@ -10126,6 +10224,13 @@
else {
enum X86_64_PARM_CLASS classes[MAX_CLASSES];
INT n = Classify_Aggregate(ty_idx, classes);
+ // handle X87 X87UP and COMPLEX_X87 cases
+ if (n != 0 && (classes[0] == X86_64_X87_CLASS ||
+ classes[0] == X86_64_X87UP_CLASS ||
+ classes[0] == X86_64_COMPLEX_X87_CLASS)) {
+ // x87, x87up and complex_x87 are passed in memory
+ n = 0;
+ }
if (n == 0) { /* can only pass in memory */
/* increment overflow_arg_area pointer by 8 */
INT delta = ((TY_size(ty_idx) + 7) / 8) * 8;
Index: osprey/wgen/wgen_expr.h
===================================================================
--- osprey/wgen/wgen_expr.h (revision 3553)
+++ osprey/wgen/wgen_expr.h (working copy)
@@ -127,6 +127,7 @@
extern void WGEN_Expand_Start_Stmt_Expr (gs_t);
extern void WGEN_Expand_End_Stmt_Expr (gs_t);
extern gs_t first_in_compound_expr(gs_t);
+extern BOOL Need_Hidden_Parameter(TY_IDX ty);
#ifdef __cplusplus
}
Index: osprey/wgen/wgen_decl.cxx
===================================================================
--- osprey/wgen/wgen_decl.cxx (revision 3553)
+++ osprey/wgen/wgen_decl.cxx (working copy)
@@ -2817,14 +2817,62 @@
Is_True( (WN_operator(target) == OPR_LDID ||
WN_operator(target) == OPR_LDA),
("Invalid operator for target"));
- if( WN_operator(target) == OPR_LDID ) {
+ // handle struct init by a return value of a call,
+ // remove redundant temp variable copy
+
+ // if return type is struct type, and the result is used in lhs,
+ // then the Mreturn temp variable is not needed, the address of lhs
+ // should be passed as hidden parameter
+ bool return_val_transformed = false;
+ WN *block = NULL;
+ if (WN_operator(init_wn) == OPR_COMMA &&
+ WN_rtype(init_wn) == MTYPE_M) {
+ WN *block = WN_kid0(init_wn);
+ WN *ldidTemp = WN_kid1(init_wn);
+ if (WN_operator(ldidTemp) == OPR_LDID &&
+ WN_operator(block) == OPR_BLOCK) {
+
+ // replace MSTID _temp_.Mreturn.1
+ // with MSTID lhs
+ WN *stidTemp = WN_last(block);
+ if (WN_operator(stidTemp) == OPR_STID &&
+ Need_Hidden_Parameter(WN_ty(ldidTemp))) {
+ // remove block from the init_wn
+ WN_kid0(init_wn) = 0;
+ WN_DELETE_Tree(init_wn);
+ init_wn = block;
+
+ WN *kid = WN_kid0(stidTemp);
+ WN_kid0(stidTemp) = 0;
+ WN_DELETE_FromBlock(block, stidTemp);
+
+ if( WN_operator(target) == OPR_LDID ) {
+ TY_IDX ptr_ty = Make_Pointer_Type(ty);
+ wn = WN_Istore(mtype, offset, ptr_ty, target, kid, field_id);
+ }
+ else { // OPR_LDA
+ ST *st = WN_st(target);
+ wn = WN_Stid (mtype, WN_lda_offset(target) + offset, st,
+ ty, kid, field_id);
+ }
+ WN_INSERT_BlockLast(block, wn);
+ wn = block;
+ return_val_transformed = true;
+ }
+ }
+ else
+ block = NULL; // not a block
+ }
+ if (!return_val_transformed) {
+ if( WN_operator(target) == OPR_LDID ) {
TY_IDX ptr_ty = Make_Pointer_Type(ty);
wn = WN_Istore(mtype, offset, ptr_ty, target, init_wn, field_id);
- }
- else { // OPR_LDA
+ }
+ else { // OPR_LDA
ST *st = WN_st(target);
wn = WN_Stid (mtype, WN_lda_offset(target) + offset, st,
ty, init_wn, field_id);
+ }
}
#else
WN *wn = WN_Stid (mtype, ST_ofst(st) + offset, st,
------------------------------------------------------------------------------
Achieve unprecedented app performance and reliability
What every C/C++ and Fortran developer should know.
Learn how Intel has extended the reach of its next-generation tools
to help boost performance applications - inlcuding clusters.
http://p.sf.net/sfu/intel-dev2devmay
_______________________________________________
Open64-devel mailing list
Open64-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/open64-devel