Re: [PATCH, AARCH64] improve float/double 0.0 support

James Greenhalgh Fri, 19 Jun 2015 10:08:15 -0700

On Fri, Jun 19, 2015 at 05:43:01PM +0100, Jim Wilson wrote:
> This is a follow on to the long double 0.0 patch.  The float and
> double support has similar problems and need similar fixes, though a
> little smaller in scope.  Before the patch this testcase
> void sub1 (float *f)  { *f = 0.0; }
> void sub2 (double *d) { *d = 0.0; }
> gives assembly code
> sub1:
>         fmov    s0, wzr
>         str     s0, [x0]
> ...
> sub2:
>         fmov    d0, xzr
>         str     d0, [x0]
> after the patch, the assembly code is
> sub1:
>         str     wzr, [x0]
> ...
> sub2:
>         str     xzr, [x0]
> 
> The fixes are along the same lines as the earlier long double patch.
> The expander is changed so that a zero source isn't forced into a reg,
> the patterns are changed to allow a zero source, and the instruction
> types are changed because storing zero to mem is an integer
> instruction not an FP instruction.
> 
> Testing showed that we have testcases for float/double 0, but the
> testcases are a little confused as they are storing 0 to mem and
> expecting to see an fmov instruction.  I split the testcases into two,
> so we can have a  0 to reg case that expects fmov, and a 0 to mem case
> that expects str of the zero reg.  I also added the equivalent missing
> long double testcases.
> 
> This was tested with a default languages make bootstrap and make
> check..  I see an additional 8 passes, and no other change in the
> testsuite results.
>
> gcc/
> 2015-06-19  Jim Wilson  <jim.wil...@linaro.org>
> 
>       * config/aarch64/aarch64.md (mov<mode>:GPF): Don't call force_reg if
>       op1 is an fp zero.
>       (movsf_aarch64): Change condition from register_operand to
>       aarch64_reg_or_fp_zero for op1.  Change type for alternative 6 to
>       load1.  Change type for alternative 7 to store1.
>       (movdf_aarch64): Likewise.
> 
> gcc/testsuite/
> 2015-06-19  Jim Wilson  <jim.wil...@linaro.org>
> 
>       * gcc.target/aarch64/fmovd-zero-mem.c: New.
>       * gcc.target/aarch64/fmovd-zero-reg.c: New.
>       * gcc.target/aarch64/fmovf-zero-mem.c: New.
>       * gcc.target/aarch64/fmovf-zero-reg.c: New.
>       * gcc.target/aarch64/fmovld-zero-mem.c: New.
>       * gcc.target/aarch64/fmovld-zero-mem.c: New.
>       * gcc.target/aarch64/fmovd-zero.c: Delete.
>       * gcc.target/aarch64/fmovf-zero.c: Delete.


This is OK.

Thanks,
James

> Index: config/aarch64/aarch64.md
> ===================================================================
> --- config/aarch64/aarch64.md (revision 224493)
> +++ config/aarch64/aarch64.md (working copy)
> @@ -986,7 +986,9 @@ (define_expand "mov<mode>"
>       FAIL;
>       }
>  
> -    if (GET_CODE (operands[0]) == MEM)
> +    if (GET_CODE (operands[0]) == MEM
> +        && ! (GET_CODE (operands[1]) == CONST_DOUBLE
> +           && aarch64_float_const_zero_rtx_p (operands[1])))
>        operands[1] = force_reg (<MODE>mode, operands[1]);
>    "
>  )
> @@ -995,7 +997,7 @@ (define_insn "*movsf_aarch64"
>    [(set (match_operand:SF 0 "nonimmediate_operand" "=w, ?r,w,w  ,w,m,r,m ,r")
>       (match_operand:SF 1 "general_operand"      "?rY, w,w,Ufc,m,w,m,rY,r"))]
>    "TARGET_FLOAT && (register_operand (operands[0], SFmode)
> -    || register_operand (operands[1], SFmode))"
> +    || aarch64_reg_or_fp_zero (operands[1], SFmode))"
>    "@
>     fmov\\t%s0, %w1
>     fmov\\t%w0, %s1
> @@ -1007,14 +1009,14 @@ (define_insn "*movsf_aarch64"
>     str\\t%w1, %0
>     mov\\t%w0, %w1"
>    [(set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\
> -                     f_loads,f_stores,f_loads,f_stores,mov_reg")]
> +                     f_loads,f_stores,load1,store1,mov_reg")]
>  )
>  
>  (define_insn "*movdf_aarch64"
>    [(set (match_operand:DF 0 "nonimmediate_operand" "=w, ?r,w,w  ,w,m,r,m ,r")
>       (match_operand:DF 1 "general_operand"      "?rY, w,w,Ufc,m,w,m,rY,r"))]
>    "TARGET_FLOAT && (register_operand (operands[0], DFmode)
> -    || register_operand (operands[1], DFmode))"
> +    || aarch64_reg_or_fp_zero (operands[1], DFmode))"
>    "@
>     fmov\\t%d0, %x1
>     fmov\\t%x0, %d1
> @@ -1026,7 +1028,7 @@ (define_insn "*movdf_aarch64"
>     str\\t%x1, %0
>     mov\\t%x0, %x1"
>    [(set_attr "type" "f_mcr,f_mrc,fmov,fconstd,\
> -                     f_loadd,f_stored,f_loadd,f_stored,mov_reg")]
> +                     f_loadd,f_stored,load1,store1,mov_reg")]
>  )
>  
>  (define_expand "movtf"
> Index: testsuite/gcc.target/aarch64/fmovd-zero-mem.c
> ===================================================================
> --- testsuite/gcc.target/aarch64/fmovd-zero-mem.c     (revision 0)
> +++ testsuite/gcc.target/aarch64/fmovd-zero-mem.c     (working copy)
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +void
> +foo (double *output)
> +{
> +  *output = 0.0;
> +}
> +
> +/* { dg-final { scan-assembler "str\\txzr, \\\[x0\\\]" } } */
> Index: testsuite/gcc.target/aarch64/fmovd-zero-reg.c
> ===================================================================
> --- testsuite/gcc.target/aarch64/fmovd-zero-reg.c     (revision 0)
> +++ testsuite/gcc.target/aarch64/fmovd-zero-reg.c     (working copy)
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +void bar (double);
> +void
> +foo (void)
> +{
> +  bar (0.0);
> +}
> +
> +/* { dg-final { scan-assembler "fmov\\td0, xzr" } } */
> Index: testsuite/gcc.target/aarch64/fmovd-zero.c
> ===================================================================
> --- testsuite/gcc.target/aarch64/fmovd-zero.c (revision 224493)
> +++ testsuite/gcc.target/aarch64/fmovd-zero.c (working copy)
> @@ -1,10 +0,0 @@
> -/* { dg-do compile } */
> -/* { dg-options "-O2" } */
> -
> -void
> -foo (double *output)
> -{
> -  *output = 0.0;
> -}
> -
> -/* { dg-final { scan-assembler "fmov\\td\[0-9\]+, xzr" } } */
> Index: testsuite/gcc.target/aarch64/fmovf-zero-mem.c
> ===================================================================
> --- testsuite/gcc.target/aarch64/fmovf-zero-mem.c     (revision 0)
> +++ testsuite/gcc.target/aarch64/fmovf-zero-mem.c     (working copy)
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +void
> +foo (float *output)
> +{
> +  *output = 0.0;
> +}
> +
> +/* { dg-final { scan-assembler "str\\twzr, \\\[x0\\\]" } } */
> Index: testsuite/gcc.target/aarch64/fmovf-zero-reg.c
> ===================================================================
> --- testsuite/gcc.target/aarch64/fmovf-zero-reg.c     (revision 0)
> +++ testsuite/gcc.target/aarch64/fmovf-zero-reg.c     (working copy)
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +void bar (float);
> +void
> +foo (void)
> +{
> +  bar (0.0);
> +}
> +
> +/* { dg-final { scan-assembler "fmov\\ts0, wzr" } } */
> Index: testsuite/gcc.target/aarch64/fmovf-zero.c
> ===================================================================
> --- testsuite/gcc.target/aarch64/fmovf-zero.c (revision 224493)
> +++ testsuite/gcc.target/aarch64/fmovf-zero.c (working copy)
> @@ -1,10 +0,0 @@
> -/* { dg-do compile } */
> -/* { dg-options "-O2" } */
> -
> -void
> -foo (float *output)
> -{
> -  *output = 0.0;
> -}
> -
> -/* { dg-final { scan-assembler "fmov\\ts\[0-9\]+, wzr" } } */
> Index: testsuite/gcc.target/aarch64/fmovld-zero-mem.c
> ===================================================================
> --- testsuite/gcc.target/aarch64/fmovld-zero-mem.c    (revision 0)
> +++ testsuite/gcc.target/aarch64/fmovld-zero-mem.c    (working copy)
> @@ -0,0 +1,10 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +void
> +foo (long double *output)
> +{
> +  *output = 0.0;
> +}
> +
> +/* { dg-final { scan-assembler "stp\\txzr, xzr, \\\[x0\\\]" } } */
> Index: testsuite/gcc.target/aarch64/fmovld-zero-reg.c
> ===================================================================
> --- testsuite/gcc.target/aarch64/fmovld-zero-reg.c    (revision 0)
> +++ testsuite/gcc.target/aarch64/fmovld-zero-reg.c    (working copy)
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +void bar (long double);
> +void
> +foo (void)
> +{
> +  bar (0.0);
> +}
> +
> +/* { dg-final { scan-assembler "movi\\tv0\.2d, #0" } } */

Re: [PATCH, AARCH64] improve float/double 0.0 support

Reply via email to