Double array in structure as function arguments or return value is accessed by BLKmode, they are stored to stack and load from stack with redundant conversion from DF->DI->DF. This patch checks the homogeneous type and use the actual element type to do block move to by pass the conversions.
gcc/ChangeLog: 2020-06-02 Xionghu Luo <luo...@linux.ibm.com> PR target/65421 * config/rs6000/rs6000-string.c (expand_block_move): Use elt_mode to copy when homogeneous REAL_TYPE. gcc/testsuite/ChangeLog: 2020-06-02 Xionghu Luo <luo...@linux.ibm.com> PR target/65421 * gcc.target/powerpc/pr65421.c: New test. --- gcc/config/rs6000/rs6000-string.c | 15 ++++++++++++++- gcc/testsuite/gcc.target/powerpc/pr65421.c | 17 +++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421.c diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c index fe7177f10fd..ea217840d88 100644 --- a/gcc/config/rs6000/rs6000-string.c +++ b/gcc/config/rs6000/rs6000-string.c @@ -37,6 +37,7 @@ #include "target.h" #include "profile-count.h" #include "predict.h" +#include "rs6000-internal.h" /* Expand a block clear operation, and return 1 if successful. Return 0 if we should let the compiler generate normal code. @@ -2733,6 +2734,7 @@ expand_block_move (rtx operands[], bool might_overlap) rtx loads[MAX_MOVE_REG]; rtx stores[MAX_MOVE_REG]; int num_reg = 0; + machine_mode elt_mode = DImode; /* If this is not a fixed size move, just call memcpy */ if (! constp) @@ -2750,6 +2752,17 @@ expand_block_move (rtx operands[], bool might_overlap) if (bytes > rs6000_block_move_inline_limit) return 0; + tree type = TREE_TYPE (MEM_EXPR (orig_dest)); + if (TREE_CODE (type) == RECORD_TYPE + && rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type, NULL, + NULL)) + { + tree field_type = TREE_TYPE (first_field (type)); + if (field_type && TREE_CODE (field_type) == ARRAY_TYPE + && TREE_CODE (TREE_TYPE (field_type)) == REAL_TYPE) + elt_mode = TYPE_MODE (TREE_TYPE (field_type)); + } + for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes) { union { @@ -2771,7 +2784,7 @@ expand_block_move (rtx operands[], bool might_overlap) && (align >= 64 || !STRICT_ALIGNMENT)) { move_bytes = 8; - mode = DImode; + mode = elt_mode; gen_func.mov = gen_movdi; if (offset == 0 && align < 64) { diff --git a/gcc/testsuite/gcc.target/powerpc/pr65421.c b/gcc/testsuite/gcc.target/powerpc/pr65421.c new file mode 100644 index 00000000000..ec8f4824de5 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr65421.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ + +typedef struct +{ + double a[4]; +} A; + +A +foo (const A *a) +{ + return *a; +} + +/* { dg-final { scan-assembler-not {\mld\M} } } */ +/* { dg-final { scan-assembler-not {\mstd\M} } } */ +/* { dg-final { scan-assembler-times {\mlfd\M} 4 } } */ -- 2.21.0.777.g83232e3864