Yuliang Wang <yuliang.w...@arm.com> writes:
> Hi Richard,
>
> Thanks for the suggestions, updated.
>
> Regards,
> Yuliang
>
>
> gcc/ChangeLog:
>
> 2019-10-17  Yuliang Wang  <yuliang.w...@arm.com>
>
>       * config/aarch64/aarch64-sve2.md (aarch64_sve2_eor3<mode>)
>       (aarch64_sve2_nor<mode>, aarch64_sve2_nand<mode>)
>       (aarch64_sve2_bsl<mode>, aarch64_sve2_nbsl<mode>)
>       (aarch64_sve2_bsl1n<mode>, aarch64_sve2_bsl2n<mode>):
>       New combine patterns.
>       * config/aarch64/iterators.md (BSL_DUP): New int iterator for the above.
>       (bsl_1st, bsl_2nd, bsl_dup, bsl_mov): Attributes for the above.
>       * config/aarch64/aarch64.h (AARCH64_ISA_SVE2_SHA3): New ISA flag macro.
>       (TARGET_SVE2_SHA3): New CPU target.
>
> gcc/testsuite/ChangeLog:
>
> 2019-10-17  Yuliang Wang  <yuliang.w...@arm.com>
>
>       * gcc.target/aarch64/sve2/eor3_1.c: New test.
>       * gcc.target/aarch64/sve2/eor3_2.c: As above.
>       * gcc.target/aarch64/sve2/nlogic_1.c: As above.
>       * gcc.target/aarch64/sve2/nlogic_2.c: As above.
>       * gcc.target/aarch64/sve2/bitsel_1.c: As above.
>       * gcc.target/aarch64/sve2/bitsel_2.c: As above.
>       * gcc.target/aarch64/sve2/bitsel_3.c: As above.
>       * gcc.target/aarch64/sve2/bitsel_4.c: As above.
>
>
> diff --git a/gcc/config/aarch64/aarch64-sve2.md 
> b/gcc/config/aarch64/aarch64-sve2.md
> index 
> b018f5b0bc9b51edf831e2571f0f5a9af2210829..08d5214a3debb9e9a0796da0af3009ed3ff55774
>  100644
> --- a/gcc/config/aarch64/aarch64-sve2.md
> +++ b/gcc/config/aarch64/aarch64-sve2.md
> @@ -142,3 +142,189 @@
>    }
>  )
>  
> +;; Unpredicated 3-way exclusive OR.
> +(define_insn "*aarch64_sve2_eor3<mode>"
> +  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w")
> +     (xor:SVE_I
> +       (xor:SVE_I
> +         (match_operand:SVE_I 1 "register_operand" "0, w, w, w")
> +         (match_operand:SVE_I 2 "register_operand" "w, 0, w, w"))
> +       (match_operand:SVE_I 3 "register_operand" "w, w, 0, w")))]
> +  "TARGET_SVE2_SHA3"

EOR3 is part of base SVE2, it doesn't require the SHA3 extension.

> +;; Unpredicated bitwise select.
> +;; N.B. non-canonical equivalent form due to expand pass.

Think it would be better to drop this line (and similarly for
the patterns below).  The form isn't non-canonical -- there just
isn't a defined canonical from here. :-)  It is the expected form
as things stand.

> +;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
> +(define_insn "*aarch64_sve2_bsl<mode>"
> +  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
> +     (xor:SVE_I
> +       (and:SVE_I
> +         (xor:SVE_I
> +           (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
> +           (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w"))
> +         (match_operand:SVE_I 3 "register_operand" "w, w"))
> +       (match_dup BSL_DUP)))]
> +  "TARGET_SVE2"
> +  "@
> +  bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
> +  movprfx\t%0, %<bsl_mov>\;bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
> +  [(set_attr "movprfx" "*,yes")]
> +)
> +
> +;; Unpredicated bitwise inverted select.
> +;; N.B. non-canonical equivalent form.
> +;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup))
> +(define_insn_and_rewrite "*aarch64_sve2_nbsl<mode>"
> +  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
> +     (unspec:SVE_I
> +       [(match_operand 4)
> +        (not:SVE_I
> +          (xor:SVE_I
> +            (and:SVE_I
> +              (xor:SVE_I
> +                (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
> +                (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w"))
> +              (match_operand:SVE_I 3 "register_operand" "w, w"))
> +            (match_dup BSL_DUP)))]
> +       UNSPEC_PRED_X))]
> +  "TARGET_SVE2"
> +  "@
> +  nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
> +  movprfx\t%0, %<bsl_mov>\;nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
> +  "&& !CONSTANT_P (operands[4])"
> +  {
> +    operands[4] = CONSTM1_RTX (<VPRED>mode);
> +  }
> +  [(set_attr "movprfx" "*,yes")]
> +)
> +
> +;; Unpredicated bitwise select with inverted first operand.
> +;; N.B. non-canonical equivalent form.
> +;; (op3 ? ~bsl_mov : bsl_dup) == (((~bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)

That's true, but I think:

;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)

is clearer, to match the rtl.

> +(define_insn_and_rewrite "*aarch64_sve2_bsl1n<mode>"
> +  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
> +     (xor:SVE_I
> +       (and:SVE_I
> +         (unspec:SVE_I
> +           [(match_operand 4)
> +            (not:SVE_I
> +              (xor:SVE_I
> +                (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
> +                (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w")))]
> +           UNSPEC_PRED_X)
> +         (match_operand:SVE_I 3 "register_operand" "w, w"))
> +       (match_dup BSL_DUP)))]
> +  "TARGET_SVE2"
> +  "@
> +  bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d
> +  movprfx\t%0, %<bsl_mov>\;bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
> +  "&& !CONSTANT_P (operands[4])"
> +  {
> +    operands[4] = CONSTM1_RTX (<VPRED>mode);
> +  }
> +  [(set_attr "movprfx" "*,yes")]
> +)
> +
> +;; Unpredicated bitwise select with inverted second operand.
> +(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"

Would be good to have a comment here too:

;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup))

> +  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
> +     (ior:SVE_I
> +       (and:SVE_I
> +         (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
> +         (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w"))
> +       (unspec:SVE_I
> +         [(match_operand 4)
> +          (and:SVE_I
> +            (not:SVE_I
> +              (match_operand:SVE_I 3 "register_operand" "w, w"))
> +            (not:SVE_I
> +              (match_dup BSL_DUP)))]
> +         UNSPEC_PRED_X)))]
> +  "TARGET_SVE2"
> +  "@
> +  bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
> +  movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d"
> +  "&& !CONSTANT_P (operands[4])"
> +  {
> +    operands[4] = CONSTM1_RTX (<VPRED>mode);
> +  }
> +  [(set_attr "movprfx" "*,yes")]
> +)
> +
> +;; Unpredicated bitwise select with inverted second operand, alternative 
> form.

;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~bsl_dup & ~op3))

> +(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
> +  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
> +     (ior:SVE_I
> +       (and:SVE_I
> +         (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
> +         (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w"))
> +       (unspec:SVE_I
> +         [(match_operand 4)
> +          (and:SVE_I
> +            (not:SVE_I
> +              (match_dup BSL_DUP))
> +            (not:SVE_I
> +              (match_operand:SVE_I 3 "register_operand" "w, w")))]
> +         UNSPEC_PRED_X)))]
> +  "TARGET_SVE2"
> +  "@
> +  bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
> +  movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d"
> +  "&& !CONSTANT_P (operands[4])"
> +  {
> +    operands[4] = CONSTM1_RTX (<VPRED>mode);
> +  }
> +  [(set_attr "movprfx" "*,yes")]
> +)
> +
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index 
> abd14a2f92c06828adfc6d2e2e81b63a6163d3a3..cad401ceb2419b6a0a64f2396c8e7d5b9105fb22
>  100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -236,6 +236,7 @@ extern unsigned aarch64_architecture_version;
>  #define AARCH64_ISA_F16                 (aarch64_isa_flags & AARCH64_FL_F16)
>  #define AARCH64_ISA_SVE            (aarch64_isa_flags & AARCH64_FL_SVE)
>  #define AARCH64_ISA_SVE2        (aarch64_isa_flags & AARCH64_FL_SVE2)
> +#define AARCH64_ISA_SVE2_SHA3           (aarch64_isa_flags & 
> AARCH64_FL_SVE2_SHA3)
>  #define AARCH64_ISA_V8_3        (aarch64_isa_flags & AARCH64_FL_V8_3)
>  #define AARCH64_ISA_DOTPROD     (aarch64_isa_flags & AARCH64_FL_DOTPROD)
>  #define AARCH64_ISA_AES                 (aarch64_isa_flags & AARCH64_FL_AES)
> @@ -285,6 +286,9 @@ extern unsigned aarch64_architecture_version;
>  /* SVE2 instructions, enabled through +sve2.  */
>  #define TARGET_SVE2 (AARCH64_ISA_SVE2)
>  
> +/* SVE2 SHA3 instructions, enabled through +sve2-sha3.  */
> +#define TARGET_SVE2_SHA3 (TARGET_SVE2 && AARCH64_ISA_SVE2_SHA3)
> +
>  /* ARMv8.3-A features.  */
>  #define TARGET_ARMV8_3       (AARCH64_ISA_V8_3)

With the above change, these macros aren't needed.

Thanks,
Richard

Reply via email to