Double array in structure as function arguments or return value is accessed
by BLKmode, they are stored to stack and load from stack with redundant
conversion from DF->DI->DF.  This patch checks the homogeneous type and
use the actual element type to do block move to by pass the conversions.

gcc/ChangeLog:

        2020-06-02  Xionghu Luo  <luo...@linux.ibm.com>

        PR target/65421
        * config/rs6000/rs6000-string.c (expand_block_move): Use
        elt_mode to copy when homogeneous REAL_TYPE.

gcc/testsuite/ChangeLog:

        2020-06-02  Xionghu Luo  <luo...@linux.ibm.com>

        PR target/65421
        * gcc.target/powerpc/pr65421.c: New test.
---
 gcc/config/rs6000/rs6000-string.c          | 15 ++++++++++++++-
 gcc/testsuite/gcc.target/powerpc/pr65421.c | 17 +++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421.c

diff --git a/gcc/config/rs6000/rs6000-string.c 
b/gcc/config/rs6000/rs6000-string.c
index fe7177f10fd..ea217840d88 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -37,6 +37,7 @@
 #include "target.h"
 #include "profile-count.h"
 #include "predict.h"
+#include "rs6000-internal.h"
 
 /* Expand a block clear operation, and return 1 if successful.  Return 0
    if we should let the compiler generate normal code.
@@ -2733,6 +2734,7 @@ expand_block_move (rtx operands[], bool might_overlap)
   rtx loads[MAX_MOVE_REG];
   rtx stores[MAX_MOVE_REG];
   int num_reg = 0;
+  machine_mode elt_mode = DImode;
 
   /* If this is not a fixed size move, just call memcpy */
   if (! constp)
@@ -2750,6 +2752,17 @@ expand_block_move (rtx operands[], bool might_overlap)
   if (bytes > rs6000_block_move_inline_limit)
     return 0;
 
+  tree type = TREE_TYPE (MEM_EXPR (orig_dest));
+  if (TREE_CODE (type) == RECORD_TYPE
+      && rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type, NULL,
+                                               NULL))
+    {
+      tree field_type = TREE_TYPE (first_field (type));
+      if (field_type && TREE_CODE (field_type) == ARRAY_TYPE
+         && TREE_CODE (TREE_TYPE (field_type)) == REAL_TYPE)
+       elt_mode = TYPE_MODE (TREE_TYPE (field_type));
+    }
+
   for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
     {
       union {
@@ -2771,7 +2784,7 @@ expand_block_move (rtx operands[], bool might_overlap)
               && (align >= 64 || !STRICT_ALIGNMENT))
        {
          move_bytes = 8;
-         mode = DImode;
+         mode = elt_mode;
          gen_func.mov = gen_movdi;
          if (offset == 0 && align < 64)
            {
diff --git a/gcc/testsuite/gcc.target/powerpc/pr65421.c 
b/gcc/testsuite/gcc.target/powerpc/pr65421.c
new file mode 100644
index 00000000000..ec8f4824de5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr65421.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+typedef struct
+{
+  double a[4];
+} A;
+
+A
+foo (const A *a)
+{
+  return *a;
+}
+
+/* { dg-final { scan-assembler-not       {\mld\M}    } } */
+/* { dg-final { scan-assembler-not       {\mstd\M}   } } */
+/* { dg-final { scan-assembler-times     {\mlfd\M}  4 } } */
-- 
2.21.0.777.g83232e3864

Reply via email to