Re: [PATCH][GCC-8][AArch64] PR target/90075 Prefer bsl/bit/bif for copysignf. (backport GCC-8)

2019-04-30 Thread Richard Earnshaw (lists)
On 29/04/2019 17:56, Srinath Parvathaneni wrote:
> Hi All,
> 
> This patch is to fix the ICE caused in expand pattern of copysignf 
> builtin. This is a back port to r267019 of trunk.
> 
> Bootstrapped on aarch64-none-linux-gnu and regression tested on 
> aarch64-none-elf.
> 
> Ok for gcc 8 branch? If ok, could someone commit the patch on my behalf, 
> I don't have commit rights.

Applied.

R.

> 
> *** gcc/ChangeLog ***
> 
> 2019-04-29  Srinath Parvathaneni  
> 
>   Backport from mainline
>   2018-12-11  Richard Earnshaw 
> 
>   PR target/37369
>   * config/aarch64/iterators.md (sizem1): Add sizes for
>   SFmode and DFmode.
>   (Vbtype): Add SFmode mapping.
>   * config/aarch64/aarch64.md (copysigndf3, copysignsf3): Delete.
>   (copysign3): New expand pattern.
>   (copysign3_insn): New insn pattern.
> 
> *** gcc/testsuite/ChangeLog ***
> 
> 2019-04-29  Srinath Parvathaneni  
> 
>   PR target/90075
>   * gcc.target/aarch64/pr90075.c: New test.
> 
> 
> 
> rb0.patch
> 
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 
> 32a0e1f3685746b9a7d70239745586d0f0fa7ee1..11c0ef00dcfd5fc41a0eb93c8d5b5bda8a6e6885
>  100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -189,6 +189,7 @@
>  UNSPEC_CLASTB
>  UNSPEC_FADDA
>  UNSPEC_REV_SUBREG
> +UNSPEC_COPYSIGN
>  ])
>  
>  (define_c_enum "unspecv" [
> @@ -5427,49 +5428,48 @@
>  ;;   LDR d2, #(1 << 63)
>  ;;   BSL v2.8b, [y], [x]
>  ;;
> -;; or another, equivalent, sequence using one of BSL/BIT/BIF.
> -;; aarch64_simd_bsldf will select the best suited of these instructions
> -;; to generate based on register allocation, and knows how to partially
> -;; constant fold based on the values of X and Y, so expand through that.
> -
> -(define_expand "copysigndf3"
> -  [(match_operand:DF 0 "register_operand")
> -   (match_operand:DF 1 "register_operand")
> -   (match_operand:DF 2 "register_operand")]
> +;; or another, equivalent, sequence using one of BSL/BIT/BIF.  Because
> +;; we expect these operations to nearly always operate on
> +;; floating-point values, we do not want the operation to be
> +;; simplified into a bit-field insert operation that operates on the
> +;; integer side, since typically that would involve three inter-bank
> +;; register copies.  As we do not expect copysign to be followed by
> +;; other logical operations on the result, it seems preferable to keep
> +;; this as an unspec operation, rather than exposing the underlying
> +;; logic to the compiler.
> +
> +(define_expand "copysign3"
> +  [(match_operand:GPF 0 "register_operand")
> +   (match_operand:GPF 1 "register_operand")
> +   (match_operand:GPF 2 "register_operand")]
>"TARGET_FLOAT && TARGET_SIMD"
>  {
> -  rtx mask = gen_reg_rtx (DImode);
> -  emit_move_insn (mask, GEN_INT (HOST_WIDE_INT_1U << 63));
> -  emit_insn (gen_aarch64_simd_bsldf (operands[0], mask,
> -  operands[2], operands[1]));
> +  rtx bitmask = gen_reg_rtx (mode);
> +  emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U
> + << (GET_MODE_BITSIZE (mode) - 1)));
> +  emit_insn (gen_copysign3_insn (operands[0], operands[1], operands[2],
> +bitmask));
>DONE;
>  }
>  )
>  
> -;; As above, but we must first get to a 64-bit value if we wish to use
> -;; aarch64_simd_bslv2sf.
> -
> -(define_expand "copysignsf3"
> -  [(match_operand:SF 0 "register_operand")
> -   (match_operand:SF 1 "register_operand")
> -   (match_operand:SF 2 "register_operand")]
> +(define_insn "copysign3_insn"
> +  [(set (match_operand:GPF 0 "register_operand" "=w,w,w,r")
> + (unspec:GPF [(match_operand:GPF 1 "register_operand" "w,0,w,r")
> +  (match_operand:GPF 2 "register_operand" "w,w,0,0")
> +  (match_operand: 3 "register_operand"
> +   "0,w,w,X")]
> +  UNSPEC_COPYSIGN))]
>"TARGET_FLOAT && TARGET_SIMD"
> -{
> -  rtx v_bitmask = gen_reg_rtx (V2SImode);
> -
> -  /* Juggle modes to get us in to a vector mode for BSL.  */
> -  rtx op1 = lowpart_subreg (DImode, operands[1], SFmode);
> -  rtx op2 = lowpart_subreg (V2SFmode, operands[2], SFmode);
> -  rtx tmp = gen_reg_rtx (V2SFmode);
> -  emit_move_insn (v_bitmask,
> -   aarch64_simd_gen_const_vector_dup (V2SImode,
> -  HOST_WIDE_INT_M1U << 31));
> -  emit_insn (gen_aarch64_simd_bslv2sf (tmp, v_bitmask, op2, op1));
> -  emit_move_insn (operands[0], lowpart_subreg (SFmode, tmp, V2SFmode));
> -  DONE;
> -}
> +  "@
> +   bsl\\t%0., %2., %1.
> +   bit\\t%0., %2., %3.
> +   bif\\t%0., %1., %3.
> +   bfxil\\t%0, %1, #0, "
> +  [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,bfm")]
>  )
>  
> +
>  ;; For xorsign (x, y), we want to generate:
>  ;;
>  ;; LDR   d2, #1<<63
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 
> 25991d97836498f48ca3

[PATCH][GCC-8][AArch64] PR target/90075 Prefer bsl/bit/bif for copysignf. (backport GCC-8)

2019-04-29 Thread Srinath Parvathaneni
Hi All,

This patch is to fix the ICE caused in expand pattern of copysignf 
builtin. This is a back port to r267019 of trunk.

Bootstrapped on aarch64-none-linux-gnu and regression tested on 
aarch64-none-elf.

Ok for gcc 8 branch? If ok, could someone commit the patch on my behalf, 
I don't have commit rights.

*** gcc/ChangeLog ***

2019-04-29  Srinath Parvathaneni  

Backport from mainline
2018-12-11  Richard Earnshaw 

PR target/37369
* config/aarch64/iterators.md (sizem1): Add sizes for
SFmode and DFmode.
(Vbtype): Add SFmode mapping.
* config/aarch64/aarch64.md (copysigndf3, copysignsf3): Delete.
(copysign3): New expand pattern.
(copysign3_insn): New insn pattern.

*** gcc/testsuite/ChangeLog ***

2019-04-29  Srinath Parvathaneni  

PR target/90075
* gcc.target/aarch64/pr90075.c: New test.


diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 32a0e1f3685746b9a7d70239745586d0f0fa7ee1..11c0ef00dcfd5fc41a0eb93c8d5b5bda8a6e6885 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -189,6 +189,7 @@
 UNSPEC_CLASTB
 UNSPEC_FADDA
 UNSPEC_REV_SUBREG
+UNSPEC_COPYSIGN
 ])
 
 (define_c_enum "unspecv" [
@@ -5427,49 +5428,48 @@
 ;;   LDR d2, #(1 << 63)
 ;;   BSL v2.8b, [y], [x]
 ;;
-;; or another, equivalent, sequence using one of BSL/BIT/BIF.
-;; aarch64_simd_bsldf will select the best suited of these instructions
-;; to generate based on register allocation, and knows how to partially
-;; constant fold based on the values of X and Y, so expand through that.
-
-(define_expand "copysigndf3"
-  [(match_operand:DF 0 "register_operand")
-   (match_operand:DF 1 "register_operand")
-   (match_operand:DF 2 "register_operand")]
+;; or another, equivalent, sequence using one of BSL/BIT/BIF.  Because
+;; we expect these operations to nearly always operate on
+;; floating-point values, we do not want the operation to be
+;; simplified into a bit-field insert operation that operates on the
+;; integer side, since typically that would involve three inter-bank
+;; register copies.  As we do not expect copysign to be followed by
+;; other logical operations on the result, it seems preferable to keep
+;; this as an unspec operation, rather than exposing the underlying
+;; logic to the compiler.
+
+(define_expand "copysign3"
+  [(match_operand:GPF 0 "register_operand")
+   (match_operand:GPF 1 "register_operand")
+   (match_operand:GPF 2 "register_operand")]
   "TARGET_FLOAT && TARGET_SIMD"
 {
-  rtx mask = gen_reg_rtx (DImode);
-  emit_move_insn (mask, GEN_INT (HOST_WIDE_INT_1U << 63));
-  emit_insn (gen_aarch64_simd_bsldf (operands[0], mask,
- operands[2], operands[1]));
+  rtx bitmask = gen_reg_rtx (mode);
+  emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U
+<< (GET_MODE_BITSIZE (mode) - 1)));
+  emit_insn (gen_copysign3_insn (operands[0], operands[1], operands[2],
+   bitmask));
   DONE;
 }
 )
 
-;; As above, but we must first get to a 64-bit value if we wish to use
-;; aarch64_simd_bslv2sf.
-
-(define_expand "copysignsf3"
-  [(match_operand:SF 0 "register_operand")
-   (match_operand:SF 1 "register_operand")
-   (match_operand:SF 2 "register_operand")]
+(define_insn "copysign3_insn"
+  [(set (match_operand:GPF 0 "register_operand" "=w,w,w,r")
+	(unspec:GPF [(match_operand:GPF 1 "register_operand" "w,0,w,r")
+		 (match_operand:GPF 2 "register_operand" "w,w,0,0")
+		 (match_operand: 3 "register_operand"
+		  "0,w,w,X")]
+	 UNSPEC_COPYSIGN))]
   "TARGET_FLOAT && TARGET_SIMD"
-{
-  rtx v_bitmask = gen_reg_rtx (V2SImode);
-
-  /* Juggle modes to get us in to a vector mode for BSL.  */
-  rtx op1 = lowpart_subreg (DImode, operands[1], SFmode);
-  rtx op2 = lowpart_subreg (V2SFmode, operands[2], SFmode);
-  rtx tmp = gen_reg_rtx (V2SFmode);
-  emit_move_insn (v_bitmask,
-		  aarch64_simd_gen_const_vector_dup (V2SImode,
-		 HOST_WIDE_INT_M1U << 31));
-  emit_insn (gen_aarch64_simd_bslv2sf (tmp, v_bitmask, op2, op1));
-  emit_move_insn (operands[0], lowpart_subreg (SFmode, tmp, V2SFmode));
-  DONE;
-}
+  "@
+   bsl\\t%0., %2., %1.
+   bit\\t%0., %2., %3.
+   bif\\t%0., %1., %3.
+   bfxil\\t%0, %1, #0, "
+  [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,bfm")]
 )
 
+
 ;; For xorsign (x, y), we want to generate:
 ;;
 ;; LDR   d2, #1<<63
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 25991d97836498f48ca30b8757ed7963217da417..21d66d36f82cff639fbaf2de70e97e9767b1c748 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -578,7 +578,8 @@
 (define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")])
 
 ;; Give the ordinal of the MSB in the mode
-(define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63")])
+(define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63")
+			  (HF "#15") (SF "#31") (DF "#63")])
 
 ;; Attribute to describe constants a