On Tue, Jun 2, 2020 at 11:43 AM Xionghu Luo via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Double array in structure as function arguments or return value is accessed
> by BLKmode, they are stored to stack and load from stack with redundant
> conversion from DF->DI->DF.  This patch checks the homogeneous type and
> use the actual element type to do block move to by pass the conversions.

Is it correct to do this when the actual data in the place is DImode?
We generally
avoid using any floating point modes here because the DImode data could
for example correspond to a signalling NaN or a non-canonical NaN.

What makes a case with, say, struct { double a; long b; } different?

Richard.

> gcc/ChangeLog:
>
>         2020-06-02  Xionghu Luo  <luo...@linux.ibm.com>
>
>         PR target/65421
>         * config/rs6000/rs6000-string.c (expand_block_move): Use
>         elt_mode to copy when homogeneous REAL_TYPE.
>
> gcc/testsuite/ChangeLog:
>
>         2020-06-02  Xionghu Luo  <luo...@linux.ibm.com>
>
>         PR target/65421
>         * gcc.target/powerpc/pr65421.c: New test.
> ---
>  gcc/config/rs6000/rs6000-string.c          | 15 ++++++++++++++-
>  gcc/testsuite/gcc.target/powerpc/pr65421.c | 17 +++++++++++++++++
>  2 files changed, 31 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421.c
>
> diff --git a/gcc/config/rs6000/rs6000-string.c 
> b/gcc/config/rs6000/rs6000-string.c
> index fe7177f10fd..ea217840d88 100644
> --- a/gcc/config/rs6000/rs6000-string.c
> +++ b/gcc/config/rs6000/rs6000-string.c
> @@ -37,6 +37,7 @@
>  #include "target.h"
>  #include "profile-count.h"
>  #include "predict.h"
> +#include "rs6000-internal.h"
>
>  /* Expand a block clear operation, and return 1 if successful.  Return 0
>     if we should let the compiler generate normal code.
> @@ -2733,6 +2734,7 @@ expand_block_move (rtx operands[], bool might_overlap)
>    rtx loads[MAX_MOVE_REG];
>    rtx stores[MAX_MOVE_REG];
>    int num_reg = 0;
> +  machine_mode elt_mode = DImode;
>
>    /* If this is not a fixed size move, just call memcpy */
>    if (! constp)
> @@ -2750,6 +2752,17 @@ expand_block_move (rtx operands[], bool might_overlap)
>    if (bytes > rs6000_block_move_inline_limit)
>      return 0;
>
> +  tree type = TREE_TYPE (MEM_EXPR (orig_dest));
> +  if (TREE_CODE (type) == RECORD_TYPE
> +      && rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type, NULL,
> +                                               NULL))
> +    {
> +      tree field_type = TREE_TYPE (first_field (type));
> +      if (field_type && TREE_CODE (field_type) == ARRAY_TYPE
> +         && TREE_CODE (TREE_TYPE (field_type)) == REAL_TYPE)
> +       elt_mode = TYPE_MODE (TREE_TYPE (field_type));
> +    }
> +
>    for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
>      {
>        union {
> @@ -2771,7 +2784,7 @@ expand_block_move (rtx operands[], bool might_overlap)
>                && (align >= 64 || !STRICT_ALIGNMENT))
>         {
>           move_bytes = 8;
> -         mode = DImode;
> +         mode = elt_mode;
>           gen_func.mov = gen_movdi;
>           if (offset == 0 && align < 64)
>             {
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr65421.c 
> b/gcc/testsuite/gcc.target/powerpc/pr65421.c
> new file mode 100644
> index 00000000000..ec8f4824de5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr65421.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +typedef struct
> +{
> +  double a[4];
> +} A;
> +
> +A
> +foo (const A *a)
> +{
> +  return *a;
> +}
> +
> +/* { dg-final { scan-assembler-not       {\mld\M}    } } */
> +/* { dg-final { scan-assembler-not       {\mstd\M}   } } */
> +/* { dg-final { scan-assembler-times     {\mlfd\M}  4 } } */
> --
> 2.21.0.777.g83232e3864
>

Reply via email to