Re: [PATCH][GCC-8][AArch64] PR target/90075 Prefer bsl/bit/bif for copysignf. (backport GCC-8)
On 29/04/2019 17:56, Srinath Parvathaneni wrote: > Hi All, > > This patch is to fix the ICE caused in expand pattern of copysignf > builtin. This is a back port to r267019 of trunk. > > Bootstrapped on aarch64-none-linux-gnu and regression tested on > aarch64-none-elf. > > Ok for gcc 8 branch? If ok, could someone commit the patch on my behalf, > I don't have commit rights. Applied. R. > > *** gcc/ChangeLog *** > > 2019-04-29 Srinath Parvathaneni > > Backport from mainline > 2018-12-11 Richard Earnshaw > > PR target/37369 > * config/aarch64/iterators.md (sizem1): Add sizes for > SFmode and DFmode. > (Vbtype): Add SFmode mapping. > * config/aarch64/aarch64.md (copysigndf3, copysignsf3): Delete. > (copysign3): New expand pattern. > (copysign3_insn): New insn pattern. > > *** gcc/testsuite/ChangeLog *** > > 2019-04-29 Srinath Parvathaneni > > PR target/90075 > * gcc.target/aarch64/pr90075.c: New test. > > > > rb0.patch > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index > 32a0e1f3685746b9a7d70239745586d0f0fa7ee1..11c0ef00dcfd5fc41a0eb93c8d5b5bda8a6e6885 > 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -189,6 +189,7 @@ > UNSPEC_CLASTB > UNSPEC_FADDA > UNSPEC_REV_SUBREG > +UNSPEC_COPYSIGN > ]) > > (define_c_enum "unspecv" [ > @@ -5427,49 +5428,48 @@ > ;; LDR d2, #(1 << 63) > ;; BSL v2.8b, [y], [x] > ;; > -;; or another, equivalent, sequence using one of BSL/BIT/BIF. > -;; aarch64_simd_bsldf will select the best suited of these instructions > -;; to generate based on register allocation, and knows how to partially > -;; constant fold based on the values of X and Y, so expand through that. > - > -(define_expand "copysigndf3" > - [(match_operand:DF 0 "register_operand") > - (match_operand:DF 1 "register_operand") > - (match_operand:DF 2 "register_operand")] > +;; or another, equivalent, sequence using one of BSL/BIT/BIF. Because > +;; we expect these operations to nearly always operate on > +;; floating-point values, we do not want the operation to be > +;; simplified into a bit-field insert operation that operates on the > +;; integer side, since typically that would involve three inter-bank > +;; register copies. As we do not expect copysign to be followed by > +;; other logical operations on the result, it seems preferable to keep > +;; this as an unspec operation, rather than exposing the underlying > +;; logic to the compiler. > + > +(define_expand "copysign3" > + [(match_operand:GPF 0 "register_operand") > + (match_operand:GPF 1 "register_operand") > + (match_operand:GPF 2 "register_operand")] >"TARGET_FLOAT && TARGET_SIMD" > { > - rtx mask = gen_reg_rtx (DImode); > - emit_move_insn (mask, GEN_INT (HOST_WIDE_INT_1U << 63)); > - emit_insn (gen_aarch64_simd_bsldf (operands[0], mask, > - operands[2], operands[1])); > + rtx bitmask = gen_reg_rtx (mode); > + emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U > + << (GET_MODE_BITSIZE (mode) - 1))); > + emit_insn (gen_copysign3_insn (operands[0], operands[1], operands[2], > +bitmask)); >DONE; > } > ) > > -;; As above, but we must first get to a 64-bit value if we wish to use > -;; aarch64_simd_bslv2sf. > - > -(define_expand "copysignsf3" > - [(match_operand:SF 0 "register_operand") > - (match_operand:SF 1 "register_operand") > - (match_operand:SF 2 "register_operand")] > +(define_insn "copysign3_insn" > + [(set (match_operand:GPF 0 "register_operand" "=w,w,w,r") > + (unspec:GPF [(match_operand:GPF 1 "register_operand" "w,0,w,r") > + (match_operand:GPF 2 "register_operand" "w,w,0,0") > + (match_operand: 3 "register_operand" > + "0,w,w,X")] > + UNSPEC_COPYSIGN))] >"TARGET_FLOAT && TARGET_SIMD" > -{ > - rtx v_bitmask = gen_reg_rtx (V2SImode); > - > - /* Juggle modes to get us in to a vector mode for BSL. */ > - rtx op1 = lowpart_subreg (DImode, operands[1], SFmode); > - rtx op2 = lowpart_subreg (V2SFmode, operands[2], SFmode); > - rtx tmp = gen_reg_rtx (V2SFmode); > - emit_move_insn (v_bitmask, > - aarch64_simd_gen_const_vector_dup (V2SImode, > - HOST_WIDE_INT_M1U << 31)); > - emit_insn (gen_aarch64_simd_bslv2sf (tmp, v_bitmask, op2, op1)); > - emit_move_insn (operands[0], lowpart_subreg (SFmode, tmp, V2SFmode)); > - DONE; > -} > + "@ > + bsl\\t%0., %2., %1. > + bit\\t%0., %2., %3. > + bif\\t%0., %1., %3. > + bfxil\\t%0, %1, #0, " > + [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,bfm")] > ) > > + > ;; For xorsign (x, y), we want to generate: > ;; > ;; LDR d2, #1<<63 > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md > index > 25991d97836498f48ca3
[PATCH][GCC-8][AArch64] PR target/90075 Prefer bsl/bit/bif for copysignf. (backport GCC-8)
Hi All, This patch is to fix the ICE caused in expand pattern of copysignf builtin. This is a back port to r267019 of trunk. Bootstrapped on aarch64-none-linux-gnu and regression tested on aarch64-none-elf. Ok for gcc 8 branch? If ok, could someone commit the patch on my behalf, I don't have commit rights. *** gcc/ChangeLog *** 2019-04-29 Srinath Parvathaneni Backport from mainline 2018-12-11 Richard Earnshaw PR target/37369 * config/aarch64/iterators.md (sizem1): Add sizes for SFmode and DFmode. (Vbtype): Add SFmode mapping. * config/aarch64/aarch64.md (copysigndf3, copysignsf3): Delete. (copysign3): New expand pattern. (copysign3_insn): New insn pattern. *** gcc/testsuite/ChangeLog *** 2019-04-29 Srinath Parvathaneni PR target/90075 * gcc.target/aarch64/pr90075.c: New test. diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 32a0e1f3685746b9a7d70239745586d0f0fa7ee1..11c0ef00dcfd5fc41a0eb93c8d5b5bda8a6e6885 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -189,6 +189,7 @@ UNSPEC_CLASTB UNSPEC_FADDA UNSPEC_REV_SUBREG +UNSPEC_COPYSIGN ]) (define_c_enum "unspecv" [ @@ -5427,49 +5428,48 @@ ;; LDR d2, #(1 << 63) ;; BSL v2.8b, [y], [x] ;; -;; or another, equivalent, sequence using one of BSL/BIT/BIF. -;; aarch64_simd_bsldf will select the best suited of these instructions -;; to generate based on register allocation, and knows how to partially -;; constant fold based on the values of X and Y, so expand through that. - -(define_expand "copysigndf3" - [(match_operand:DF 0 "register_operand") - (match_operand:DF 1 "register_operand") - (match_operand:DF 2 "register_operand")] +;; or another, equivalent, sequence using one of BSL/BIT/BIF. Because +;; we expect these operations to nearly always operate on +;; floating-point values, we do not want the operation to be +;; simplified into a bit-field insert operation that operates on the +;; integer side, since typically that would involve three inter-bank +;; register copies. As we do not expect copysign to be followed by +;; other logical operations on the result, it seems preferable to keep +;; this as an unspec operation, rather than exposing the underlying +;; logic to the compiler. + +(define_expand "copysign3" + [(match_operand:GPF 0 "register_operand") + (match_operand:GPF 1 "register_operand") + (match_operand:GPF 2 "register_operand")] "TARGET_FLOAT && TARGET_SIMD" { - rtx mask = gen_reg_rtx (DImode); - emit_move_insn (mask, GEN_INT (HOST_WIDE_INT_1U << 63)); - emit_insn (gen_aarch64_simd_bsldf (operands[0], mask, - operands[2], operands[1])); + rtx bitmask = gen_reg_rtx (mode); + emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U +<< (GET_MODE_BITSIZE (mode) - 1))); + emit_insn (gen_copysign3_insn (operands[0], operands[1], operands[2], + bitmask)); DONE; } ) -;; As above, but we must first get to a 64-bit value if we wish to use -;; aarch64_simd_bslv2sf. - -(define_expand "copysignsf3" - [(match_operand:SF 0 "register_operand") - (match_operand:SF 1 "register_operand") - (match_operand:SF 2 "register_operand")] +(define_insn "copysign3_insn" + [(set (match_operand:GPF 0 "register_operand" "=w,w,w,r") + (unspec:GPF [(match_operand:GPF 1 "register_operand" "w,0,w,r") + (match_operand:GPF 2 "register_operand" "w,w,0,0") + (match_operand: 3 "register_operand" + "0,w,w,X")] + UNSPEC_COPYSIGN))] "TARGET_FLOAT && TARGET_SIMD" -{ - rtx v_bitmask = gen_reg_rtx (V2SImode); - - /* Juggle modes to get us in to a vector mode for BSL. */ - rtx op1 = lowpart_subreg (DImode, operands[1], SFmode); - rtx op2 = lowpart_subreg (V2SFmode, operands[2], SFmode); - rtx tmp = gen_reg_rtx (V2SFmode); - emit_move_insn (v_bitmask, - aarch64_simd_gen_const_vector_dup (V2SImode, - HOST_WIDE_INT_M1U << 31)); - emit_insn (gen_aarch64_simd_bslv2sf (tmp, v_bitmask, op2, op1)); - emit_move_insn (operands[0], lowpart_subreg (SFmode, tmp, V2SFmode)); - DONE; -} + "@ + bsl\\t%0., %2., %1. + bit\\t%0., %2., %3. + bif\\t%0., %1., %3. + bfxil\\t%0, %1, #0, " + [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,bfm")] ) + ;; For xorsign (x, y), we want to generate: ;; ;; LDR d2, #1<<63 diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 25991d97836498f48ca30b8757ed7963217da417..21d66d36f82cff639fbaf2de70e97e9767b1c748 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -578,7 +578,8 @@ (define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")]) ;; Give the ordinal of the MSB in the mode -(define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63")]) +(define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63") + (HF "#15") (SF "#31") (DF "#63")]) ;; Attribute to describe constants a