Hi, When assigning a parameter to a variable, or assigning a variable to return value with struct type, "block move" are used to expand the assignment. It would be better to use the register mode according to the target/ABI to move the blocks. And then this would raise more opportunities for other optimization passes(cse/dse/xprop).
As the example code (like code in PR65421): typedef struct SA {double a[3];} A; A ret_arg_pt (A *a){return *a;} // on ppc64le, only 3 lfd(s) A ret_arg (A a) {return a;} // just empty fun body void st_arg (A a, A *p) {*p = a;} //only 3 stfd(s) This patch is based on the previous version which supports assignments from parameter: https://gcc.gnu.org/pipermail/gcc-patches/2022-November/605709.html This patch also supports returns. I also tried to update gimplify/nrv to replace "return D.xxx;" with "return <retval>;". While there is one issue: "<retval>" with PARALLEL code can not be accessed through address/component_ref. This issue blocks a few passes (e.g. sra, expand). On ppc64, some dead stores are not eliminated. e.g. for ret_arg: .cfi_startproc std 4,56(1)//reductant std 5,64(1)//reductant std 6,72(1)//reductant std 4,0(3) std 5,8(3) std 6,16(3) blr Bootstraped and regtested on ppc64le and x86_64. I'm wondering if this patch could be committed first. Thanks for the comments and suggestions. BR, Jeff (Jiufu) PR target/65421 gcc/ChangeLog: * cfgexpand.cc (expand_used_vars): Add collecting return VARs. (expand_gimple_stmt_1): Call expand_special_struct_assignment. (pass_expand::execute): Free collections of return VARs. * expr.cc (expand_special_struct_assignment): New function. * expr.h (expand_special_struct_assignment): Declare. gcc/testsuite/ChangeLog: * gcc.target/powerpc/pr65421-1.c: New test. * gcc.target/powerpc/pr65421.c: New test. --- gcc/cfgexpand.cc | 37 +++++++++++++++++ gcc/expr.cc | 43 ++++++++++++++++++++ gcc/expr.h | 3 ++ gcc/testsuite/gcc.target/powerpc/pr65421-1.c | 21 ++++++++++ gcc/testsuite/gcc.target/powerpc/pr65421.c | 19 +++++++++ 5 files changed, 123 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421.c diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc index dd29ffffc03..f185de39341 100644 --- a/gcc/cfgexpand.cc +++ b/gcc/cfgexpand.cc @@ -341,6 +341,9 @@ static hash_map<tree, size_t> *decl_to_stack_part; all of them in one big sweep. */ static bitmap_obstack stack_var_bitmap_obstack; +/* Those VARs on returns. */ +static bitmap return_vars; + /* An array of indices such that stack_vars[stack_vars_sorted[i]].size is non-decreasing. */ static size_t *stack_vars_sorted; @@ -2158,6 +2161,24 @@ expand_used_vars (bitmap forced_stack_vars) frame_phase = off ? align - off : 0; } + /* Collect VARs on returns. */ + return_vars = NULL; + if (DECL_RESULT (current_function_decl) + && TYPE_MODE (TREE_TYPE (DECL_RESULT (current_function_decl))) == BLKmode) + { + return_vars = BITMAP_ALLOC (NULL); + + edge_iterator ei; + edge e; + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) + if (greturn *ret = safe_dyn_cast<greturn *> (last_stmt (e->src))) + { + tree val = gimple_return_retval (ret); + if (val && VAR_P (val)) + bitmap_set_bit (return_vars, DECL_UID (val)); + } + } + /* Set TREE_USED on all variables in the local_decls. */ FOR_EACH_LOCAL_DECL (cfun, i, var) TREE_USED (var) = 1; @@ -3942,6 +3963,17 @@ expand_gimple_stmt_1 (gimple *stmt) /* This is a clobber to mark the going out of scope for this LHS. */ expand_clobber (lhs); + else if ((TREE_CODE (rhs) == PARM_DECL && DECL_INCOMING_RTL (rhs) + && TYPE_MODE (TREE_TYPE (rhs)) == BLKmode + && (GET_CODE (DECL_INCOMING_RTL (rhs)) == PARALLEL + || REG_P (DECL_INCOMING_RTL (rhs)))) + || (VAR_P (lhs) && return_vars + && DECL_RTL_SET_P (DECL_RESULT (current_function_decl)) + && GET_CODE ( + DECL_RTL (DECL_RESULT (current_function_decl))) + == PARALLEL + && bitmap_bit_p (return_vars, DECL_UID (lhs)))) + expand_special_struct_assignment (lhs, rhs); else expand_assignment (lhs, rhs, gimple_assign_nontemporal_move_p ( @@ -7025,6 +7057,11 @@ pass_expand::execute (function *fun) /* After expanding, the return labels are no longer needed. */ return_label = NULL; naked_return_label = NULL; + if (return_vars) + { + BITMAP_FREE (return_vars); + return_vars = NULL; + } /* After expanding, the tm_restart map is no longer needed. */ if (fun->gimple_df->tm_restart) diff --git a/gcc/expr.cc b/gcc/expr.cc index d9407432ea5..6ffd9439188 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -5559,6 +5559,49 @@ mem_ref_refers_to_non_mem_p (tree ref) return non_mem_decl_p (base); } +/* Expand the assignment from parameter or to returns if it needs + "block move" on struct type. */ + +void +expand_special_struct_assignment (tree to, tree from) +{ + rtx result; + + push_temp_slots (); + rtx par_ret = TREE_CODE (from) == PARM_DECL + ? DECL_INCOMING_RTL (from) + : DECL_RTL (DECL_RESULT (current_function_decl)); + machine_mode mode = GET_CODE (par_ret) == PARALLEL + ? GET_MODE (XEXP (XVECEXP (par_ret, 0, 0), 0)) + : word_mode; + int mode_size = GET_MODE_SIZE (mode).to_constant (); + int size = INTVAL (expr_size (from)); + rtx to_rtx = expand_expr (to, NULL_RTX, VOIDmode, EXPAND_WRITE); + + /* Here using a heurisitic number for how many words may pass via gprs. */ + int hurstc_num = 8; + if (size < mode_size || (size % mode_size) != 0 + || (GET_CODE (par_ret) != PARALLEL && size > (mode_size * hurstc_num))) + result = store_expr (from, to_rtx, 0, false, false); + else + { + rtx from_rtx + = expand_expr (from, NULL_RTX, GET_MODE (to_rtx), EXPAND_NORMAL); + for (int i = 0; i < size / mode_size; i++) + { + rtx temp = gen_reg_rtx (mode); + rtx src = adjust_address (from_rtx, mode, mode_size * i); + rtx dest = adjust_address (to_rtx, mode, mode_size * i); + emit_move_insn (temp, src); + emit_move_insn (dest, temp); + } + result = to_rtx; + } + + preserve_temp_slots (result); + pop_temp_slots (); +} + /* Expand an assignment that stores the value of FROM into TO. If NONTEMPORAL is true, try generating a nontemporal store. */ diff --git a/gcc/expr.h b/gcc/expr.h index 08b59b8d869..10527f23a56 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -281,6 +281,9 @@ extern void get_bit_range (poly_uint64_pod *, poly_uint64_pod *, tree, /* Expand an assignment that stores the value of FROM into TO. */ extern void expand_assignment (tree, tree, bool); +/* Expand an assignment from parameters or to returns. */ +extern void expand_special_struct_assignment (tree, tree); + /* Generate code for computing expression EXP, and storing the value into TARGET. If SUGGEST_REG is nonzero, copy the value through a register diff --git a/gcc/testsuite/gcc.target/powerpc/pr65421-1.c b/gcc/testsuite/gcc.target/powerpc/pr65421-1.c new file mode 100644 index 00000000000..f55a0fe0002 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr65421-1.c @@ -0,0 +1,21 @@ +/* PR target/65421 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -m64" } */ + +typedef struct SA +{ + double a[3]; + long l; +} A; + +A ret_arg_pt (A *a){return *a;} + +A ret_arg (A a) {return a;} + +void st_arg (A a, A *p) {*p = a;} + +/* { dg-final { scan-assembler-times {\mlxvd2x\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstxvd2x\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstd\M} 8 } } */ +/* { dg-final { scan-assembler-times {\mblr\M} 3 } } */ +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 16 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr65421.c b/gcc/testsuite/gcc.target/powerpc/pr65421.c new file mode 100644 index 00000000000..26e85468470 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr65421.c @@ -0,0 +1,19 @@ +/* PR target/65421 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -m64" } */ + +typedef struct SA +{ + double a[3]; +} A; + +A ret_arg_pt (A *a){return *a;} + +A ret_arg (A a) {return a;} + +void st_arg (A a, A *p) {*p = a;} + +/* { dg-final { scan-assembler-times {\mlfd\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mstfd\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mblr\M} 3 } } */ +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 9 } } */ -- 2.17.1