The attached patch is a revision of a patch I had posted a few weeks
ago ("patch for better AMD64 ABI compatibility"). Most of the code
related to ABI compatibility has been committed in r3652. The
motivation for the patch is to reduce unnecessary copying of structs,
especially when used as return values in C++.
We have generalized the patch to remove ABI-dependent code from wgen
and retested; Need_Hidden_Parameter() is no longer needed.
Here is the proposed commit message:
Avoid unnecessary struct copies.
For the following code, the compiler generated unnecessary struct copies
for a return value that has a size too big to be passed in registers.
typedef struct { char big[1024]; } C;
C gc;
extern C bar9 (void);
extern C check9 (void) { return bar9 (); }
In this case there were two copies and one temporary variable generated.
After this change the compiler does not generate any extra copies.
The change handles the similar cases of initialization by function
call ("C c = bar9();") and assignment to a pointer ("*cp = bar9()").
For struct return values, the patch also includes a fix to follow the
AMD64 ABI: the caller provides space for the return value in a hidden
first argument and this address is returned in %rax or %eax for a
32-bit target. Previously the compiler was not following this rule.
Could a gatekeeper please review the changes? Yin, please let us know
if the revision addresses your concerns.
-David Coakley / AMD Open Source Compiler Engineering
Index: osprey/be/cg/reg_live.cxx
===================================================================
--- osprey/be/cg/reg_live.cxx (revision 3652)
+++ osprey/be/cg/reg_live.cxx (working copy)
@@ -268,6 +268,17 @@
retpreg[i] = RETURN_INFO_preg (return_info, i);
Add_PREG_To_REGSET (retpreg[i], return_regs);
}
+#ifdef TARG_X8664
+ // AMD64 ABI
+ // on return %rax will contain the address that has been
+ // passed in by the caller in %rdi
+ // for 32 bit, the return address is in %eax
+ if (RETURN_INFO_return_via_first_arg(return_info)) {
+ FmtAssert (RETURN_INFO_count(return_info) == 0,
+ ("Compute_Return_Regs: more return registers than can handle"));
+ Add_PREG_To_REGSET (First_Int_Preg_Return_Offset, return_regs);
+ }
+#endif
#if defined(TARG_SL)
if (MTYPE_byte_size(TY_mtype(TY_ret_type(call_ty))) == 8) { //I8/U8/F8
FmtAssert (RETURN_INFO_count(return_info) <= 1,
Index: osprey/be/com/wn_lower.cxx
===================================================================
--- osprey/be/com/wn_lower.cxx (revision 3652)
+++ osprey/be/com/wn_lower.cxx (working copy)
@@ -7331,7 +7331,8 @@
ST *preg_st;
WN *n_rhs;
WN *wn = NULL; // init to prevent upward-exposed use
- RETURN_INFO return_info = Get_Return_Info(WN_ty(tree), Complex_Not_Simulated
+ RETURN_INFO return_info = Get_Return_Info(WN_ty(WN_kid0(tree)),
+ Complex_Not_Simulated
#ifdef TARG_X8664
, last_call_ff2c_abi
#endif
@@ -7353,7 +7354,7 @@
if (WN_store_offset(tree) != 0) { // generate an ADD node for the offset
WN *iwn = WN_CreateIntconst(OPR_INTCONST, Pointer_Mtype, MTYPE_V,
WN_store_offset(tree));
- awn = WN_CreateExp2(OPR_ADD, Pointer_Mtype, Pointer_Mtype, awn, iwn);
+ awn = WN_CreateExp2(OPR_ADD, Pointer_Mtype, MTYPE_V, awn, iwn);
}
awn = lower_expr(block, awn, actions);
WN *n_call = add_fake_parm(call, awn, WN_ty(tree));
@@ -12132,25 +12133,94 @@
WN *n_rhs;
// fix rhs
- if (WN_operator(o_rhs) == OPR_LDID)
- n_rhs = lower_mldid(block, o_rhs, LOWER_MLDID_MSTID);
- else if (WN_operator(o_rhs) == OPR_ILOAD)
- n_rhs = lower_miload(block, o_rhs, LOWER_MLDID_MSTID);
- else n_rhs = o_rhs; // MLOAD
-
- // create an mstore
+
WN *first_formal = WN_formal(current_function, 0);
TY_IDX tidx = ST_type(WN_st(first_formal));
- WN *awn = WN_CreateLdid(OPR_LDID,
- TY_mtype(Ty_Table[tidx]),
- TY_mtype(Ty_Table[tidx]),
- WN_idname_offset(first_formal),
- WN_st(first_formal),
- tidx);
- WN *swn = WN_CopyNode(WN_kid1(n_rhs));
- wn = WN_CreateMstore (0, tidx, n_rhs, awn, swn);
- WN_Set_Linenum(wn, current_srcpos); // Bug 1268
- WN_INSERT_BlockLast (block, wn);
+ if (WN_operator(o_rhs) == OPR_LDID &&
+ WN_st(o_rhs) == Return_Val_Preg) {
+ // the Return_Val_Preg must be returned by previous call
+ // so we need to get the previous MCALL statement and
+ // fake first parm
+ //
+ // MCALL 126 <1,51,bar9>
+ // MMLDID -1 <1,49,.preg_return_val> T<53,.anonymous.1,1>
+ // MRETURN_VAL
+ //
+ // ==>
+ //
+ // U8LDID 0 <2,3,_temp_.return...>
+ // U8PARM 33 T<55,anon_ptr.,8>
+ // VCALL 126 <1,51,bar9>
+ //
+ WN *call = WN_last(block);
+ if ((WN_operator(call) == OPR_CALL || WN_operator(call) == OPR_ICALL ||
+ WN_operator(call) == OPR_PICCALL) && WN_rtype(call) == MTYPE_M) {
+ TY_IDX prototype;
+ if (WN_operator(call) == OPR_ICALL)
+ prototype = WN_ty(call);
+ else {
+ ST_IDX func_stidx = WN_st_idx(call);
+ PU_IDX puidx = ST_pu(St_Table[func_stidx]);
+ prototype = PU_prototype(Pu_Table[puidx]);
+ }
+ WN *awn = WN_CreateLdid(OPR_LDID,
+ TY_mtype(Ty_Table[tidx]),
+ TY_mtype(Ty_Table[tidx]),
+ WN_idname_offset(first_formal),
+ WN_st(first_formal),
+ tidx);
+ awn = lower_expr(block, awn, actions);
+ WN *n_call = add_fake_parm(call, awn, WN_ty(awn));
+ WN_DELETE_FromBlock(block, call);
+ WN_INSERT_BlockLast(block, n_call);
+ }
+ }
+ else {
+ if (WN_operator(o_rhs) == OPR_LDID) {
+ n_rhs = lower_mldid(block, o_rhs, LOWER_MLDID_MSTID);
+ }
+ else if (WN_operator(o_rhs) == OPR_ILOAD)
+ n_rhs = lower_miload(block, o_rhs, LOWER_MLDID_MSTID);
+ else n_rhs = o_rhs; // MLOAD
+
+ // create an mstore
+ WN *awn = WN_CreateLdid(OPR_LDID,
+ TY_mtype(Ty_Table[tidx]),
+ TY_mtype(Ty_Table[tidx]),
+ WN_idname_offset(first_formal),
+ WN_st(first_formal),
+ tidx);
+ WN *swn = WN_CopyNode(WN_kid1(n_rhs));
+ wn = WN_CreateMstore (0, tidx, n_rhs, awn, swn);
+ WN_Set_Linenum(wn, current_srcpos); // Bug 1268
+ WN_INSERT_BlockLast (block, wn);
+ }
+#ifdef TARG_X8664
+ // AMD64 ABI
+ // on return %rax will contain the address that has been
+ // passed in by the caller in %rdi
+ // for 32 bit, the return address is in %eax
+ //
+ // U8U8LDID 0 <2,3,_temp_.return...>
+ // U8STID 1 <1,5,.preg_I8> T<5,.predef_I8,8> # $r1
+ //
+
+ mtype = Is_Target_64bit() ? MTYPE_U8 : MTYPE_U4;
+ WN *ld = WN_CreateLdid(OPR_LDID,
+ TY_mtype(Ty_Table[tidx]),
+ TY_mtype(Ty_Table[tidx]),
+ WN_idname_offset(first_formal),
+ WN_st(first_formal),
+ tidx);
+ WN *stid = WN_Stid( mtype, First_Int_Preg_Return_Offset,
+ Int_Preg, ST_type(Int_Preg), ld );
+ WN_Set_Linenum(stid, current_srcpos);
+ WN_INSERT_BlockLast( block, stid );
+ if (traceMload) {
+ fprintf(TFile, "Return_val lower [Return_Val_Preg]\n");
+ fdump_tree(TFile, block);
+ }
+#endif
}
else { // return in return registers
INT32 i;
@@ -12261,6 +12331,10 @@
}
}
+ // lastly make a normal return statement
+ //
+ // RETURN
+ //
WN *wn_return = WN_CreateReturn ();
WN_Set_Linenum(wn_return, current_srcpos); // Bug 1268
if ( Cur_PU_Feedback )
Index: osprey/wgen/wgen_spin_symbol.cxx
===================================================================
--- osprey/wgen/wgen_spin_symbol.cxx (revision 3652)
+++ osprey/wgen/wgen_spin_symbol.cxx (working copy)
@@ -852,6 +852,7 @@
Set_TY_is_union(idx);
}
#ifdef KEY
+ // gs_aggregate_value_p is only set for c++
if (gs_aggregate_value_p(type_tree)) {
Set_TY_return_in_mem(idx);
}
Index: osprey/wgen/wgen_expr.cxx
===================================================================
--- osprey/wgen/wgen_expr.cxx (revision 3652)
+++ osprey/wgen/wgen_expr.cxx (working copy)
@@ -1645,9 +1645,51 @@
if (volt)
Set_TY_is_volatile(hi_ty_idx);
#endif
- wn = WN_Stid (desc, ST_ofst(st) + component_offset + lhs_preg_num, st,
- hi_ty_idx, rhs_wn, field_id);
- WGEN_Stmt_Append(wn, Get_Srcpos());
+ // if return type is struct type, and the result is used in lhs,
+ // then the Mreturn temp variable is not needed, the address of lhs
+ // should be passed as hidden parameter
+
+ // the same pattern match is used in different places
+ // to handle return struct value, init struct value and
+ // assign struct value from a function call, if anything
+ // changed in the pattern in future, we need to change all
+ // these places, just look for return_val_transformed.
+ bool return_val_transformed = false;
+ if (WN_operator(rhs_wn) == OPR_COMMA &&
+ WN_rtype(rhs_wn) == MTYPE_M) {
+ WN *block = WN_kid0(rhs_wn);
+ WN *ldidTemp = WN_kid1(rhs_wn);
+ if (WN_operator(ldidTemp) == OPR_LDID &&
+ WN_operator(block) == OPR_BLOCK) {
+
+ // replace MSTID _temp_.Mreturn.1
+ // with MSTID lhs
+ WN *stidTemp = WN_last(block);
+ if (WN_operator(stidTemp) == OPR_STID ) {
+ // remove block from the rhs_wn
+ WN_kid0(rhs_wn) = 0;
+ WN_DELETE_Tree(rhs_wn);
+ rhs_wn = block;
+
+ WN *kid = WN_kid0(stidTemp);
+ WN_kid0(stidTemp) = 0;
+ WN_DELETE_FromBlock(block, stidTemp);
+
+ WN *stid = WN_Stid (desc,
+ ST_ofst(st) + component_offset + lhs_preg_num,
+ st, hi_ty_idx, kid, field_id);
+ WN_INSERT_BlockLast(block, stid);
+ WGEN_Stmt_Append(block, Get_Srcpos());
+ wn = block;
+ return_val_transformed = true;
+ }
+ }
+ }
+ if (!return_val_transformed) {
+ wn = WN_Stid (desc, ST_ofst(st) + component_offset + lhs_preg_num, st,
+ hi_ty_idx, rhs_wn, field_id);
+ WGEN_Stmt_Append(wn, Get_Srcpos());
+ }
#if defined(TARG_SL)
if (need_append) {
WN *ldid_wn;
@@ -1869,7 +1911,6 @@
wn = NULL;
}
else {
-#ifdef KEY
// The store target could be an INDIRECT_REF that kg++fe added to make
// the store write to the area pointed to by the fake first param. If
// so, check that copying the object does not involve a copy
@@ -1889,31 +1930,66 @@
if (Current_Entry_WN() != NULL) {
first_formal = WN_formal(Current_Entry_WN(), 0);
}
- if (TY_return_in_mem(hi_ty_idx) &&
- field_id == 0 &&
- // See if it is an indirect ref of the fake first parm.
- // bug fix for OSP_314
- //
- first_formal != NULL && (WN_operator(first_formal) != OPR_BLOCK) &&
- gs_tree_code(addr) == GS_VAR_DECL &&
- DECL_ST(addr) == WN_st(first_formal)) {
- FmtAssert(TY_mtype(hi_ty_idx) == MTYPE_M,
- ("WGEN_Lhs_Of_Modify_Expr: return_in_mem type not MTYPE_M"));
- gs_t ptr_type = gs_tree_type(gs_tree_operand(lhs, 0));
- gs_t type = gs_tree_type(ptr_type);
- FmtAssert(gs_tree_code(ptr_type) == GS_POINTER_TYPE,
- ("WGEN_Lhs_Of_Modify_Expr: INDIRECT_REF opnd0 is not POINTER_TYPE"));
- FmtAssert(component_offset == 0,
- ("WGEN_Lhs_Of_Modify_Expr: component_offset nonzero"));
- TY_IDX tidx = Get_TY(ptr_type);
- // Check object has no copy constructor.
- FmtAssert(!WGEN_has_copy_constructor(type),
- ("WGEN_Lhs_Of_Modify_Expr: object needs copy constructor"));
+ // if return type is struct type, and the result is used in lhs,
+ // then the Mreturn temp variable is not needed, the address of lhs
+ // should be passed as hidden parameter
+ bool return_val_transformed = false;
+ if (WN_operator(rhs_wn) == OPR_COMMA &&
+ WN_rtype(rhs_wn) == MTYPE_M) {
+ WN *block = WN_kid0(rhs_wn);
+ WN *ldidTemp = WN_kid1(rhs_wn);
+ if (WN_operator(ldidTemp) == OPR_LDID &&
+ WN_operator(block) == OPR_BLOCK) {
+
+ // replace MSTID _temp_.Mreturn.1
+ // with MISTORE lhs
+ WN *stidTemp = WN_last(block);
+ if (WN_operator(stidTemp) == OPR_STID) {
+ // remove block from the rhs_wn
+ WN_kid0(rhs_wn) = 0;
+ WN_DELETE_Tree(rhs_wn);
+ rhs_wn = block;
+
+ WN *kid = WN_kid0(stidTemp);
+ WN_kid0(stidTemp) = 0;
+ WN_DELETE_FromBlock(block, stidTemp);
+
+ WN *istore =
+ WN_CreateIstore(OPR_ISTORE, MTYPE_V, desc, component_offset,
+ Make_Pointer_Type (hi_ty_idx, FALSE),
+ kid, addr_wn, field_id);
+ WN_INSERT_BlockLast(block, istore);
+ wn = block;
+ return_val_transformed = true;
+ }
+ }
}
-#endif
- wn = WN_CreateIstore(OPR_ISTORE, MTYPE_V, desc, component_offset,
- Make_Pointer_Type (hi_ty_idx, FALSE),
- rhs_wn, addr_wn, field_id);
+ if (!return_val_transformed) {
+ if (TY_return_in_mem(hi_ty_idx) &&
+ field_id == 0 &&
+ // See if it is an indirect ref of the fake first parm.
+ // bug fix for OSP_314
+ //
+ first_formal != NULL && (WN_operator(first_formal) != OPR_BLOCK) &&
+ gs_tree_code(addr) == GS_VAR_DECL &&
+ DECL_ST(addr) == WN_st(first_formal)) {
+ FmtAssert(TY_mtype(hi_ty_idx) == MTYPE_M,
+ ("WGEN_Lhs_Of_Modify_Expr: return_in_mem type not MTYPE_M"));
+ gs_t ptr_type = gs_tree_type(gs_tree_operand(lhs, 0));
+ gs_t type = gs_tree_type(ptr_type);
+ FmtAssert(gs_tree_code(ptr_type) == GS_POINTER_TYPE,
+ ("WGEN_Lhs_Of_Modify_Expr: INDIRECT_REF opnd0 is not POINTER_TYPE"));
+ FmtAssert(component_offset == 0,
+ ("WGEN_Lhs_Of_Modify_Expr: component_offset nonzero"));
+ TY_IDX tidx = Get_TY(ptr_type);
+ // Check object has no copy constructor.
+ FmtAssert(!WGEN_has_copy_constructor(type),
+ ("WGEN_Lhs_Of_Modify_Expr: object needs copy constructor"));
+ }
+ wn = WN_CreateIstore(OPR_ISTORE, MTYPE_V, desc, component_offset,
+ Make_Pointer_Type (hi_ty_idx, FALSE),
+ rhs_wn, addr_wn, field_id);
+ }
#ifdef TARG_SL
/* so far I only handle *p++=... cases, change this case to
* *p = ... ;
Index: osprey/wgen/wgen_decl.cxx
===================================================================
--- osprey/wgen/wgen_decl.cxx (revision 3652)
+++ osprey/wgen/wgen_decl.cxx (working copy)
@@ -2825,14 +2825,61 @@
Is_True( (WN_operator(target) == OPR_LDID ||
WN_operator(target) == OPR_LDA),
("Invalid operator for target"));
- if( WN_operator(target) == OPR_LDID ) {
+ // handle struct init by a return value of a call,
+ // remove redundant temp variable copy
+
+ // if return type is struct type, and the result is used in lhs,
+ // then the Mreturn temp variable is not needed, the address of lhs
+ // should be passed as hidden parameter
+ bool return_val_transformed = false;
+ WN *block = NULL;
+ if (WN_operator(init_wn) == OPR_COMMA &&
+ WN_rtype(init_wn) == MTYPE_M) {
+ WN *block = WN_kid0(init_wn);
+ WN *ldidTemp = WN_kid1(init_wn);
+ if (WN_operator(ldidTemp) == OPR_LDID &&
+ WN_operator(block) == OPR_BLOCK) {
+
+ // replace MSTID _temp_.Mreturn.1
+ // with MSTID lhs
+ WN *stidTemp = WN_last(block);
+ if (WN_operator(stidTemp) == OPR_STID) {
+ // remove block from the init_wn
+ WN_kid0(init_wn) = 0;
+ WN_DELETE_Tree(init_wn);
+ init_wn = block;
+
+ WN *kid = WN_kid0(stidTemp);
+ WN_kid0(stidTemp) = 0;
+ WN_DELETE_FromBlock(block, stidTemp);
+
+ if( WN_operator(target) == OPR_LDID ) {
+ TY_IDX ptr_ty = Make_Pointer_Type(ty);
+ wn = WN_Istore(mtype, offset, ptr_ty, target, kid, field_id);
+ }
+ else { // OPR_LDA
+ ST *st = WN_st(target);
+ wn = WN_Stid (mtype, WN_lda_offset(target) + offset, st,
+ ty, kid, field_id);
+ }
+ WN_INSERT_BlockLast(block, wn);
+ wn = block;
+ return_val_transformed = true;
+ }
+ }
+ else
+ block = NULL; // not a block
+ }
+ if (!return_val_transformed) {
+ if( WN_operator(target) == OPR_LDID ) {
TY_IDX ptr_ty = Make_Pointer_Type(ty);
wn = WN_Istore(mtype, offset, ptr_ty, target, init_wn, field_id);
- }
- else { // OPR_LDA
+ }
+ else { // OPR_LDA
ST *st = WN_st(target);
wn = WN_Stid (mtype, WN_lda_offset(target) + offset, st,
ty, init_wn, field_id);
+ }
}
#else
WN *wn = WN_Stid (mtype, ST_ofst(st) + offset, st,
------------------------------------------------------------------------------
EditLive Enterprise is the world's most technically advanced content
authoring tool. Experience the power of Track Changes, Inline Image
Editing and ensure content is compliant with Accessibility Checking.
http://p.sf.net/sfu/ephox-dev2dev
_______________________________________________
Open64-devel mailing list
Open64-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/open64-devel