Yuliang Wang <yuliang.w...@arm.com> writes: > Hi Richard, > > Thanks for the suggestions, updated. > > Regards, > Yuliang > > > gcc/ChangeLog: > > 2019-10-17 Yuliang Wang <yuliang.w...@arm.com> > > * config/aarch64/aarch64-sve2.md (aarch64_sve2_eor3<mode>) > (aarch64_sve2_nor<mode>, aarch64_sve2_nand<mode>) > (aarch64_sve2_bsl<mode>, aarch64_sve2_nbsl<mode>) > (aarch64_sve2_bsl1n<mode>, aarch64_sve2_bsl2n<mode>): > New combine patterns. > * config/aarch64/iterators.md (BSL_DUP): New int iterator for the above. > (bsl_1st, bsl_2nd, bsl_dup, bsl_mov): Attributes for the above. > * config/aarch64/aarch64.h (AARCH64_ISA_SVE2_SHA3): New ISA flag macro. > (TARGET_SVE2_SHA3): New CPU target. > > gcc/testsuite/ChangeLog: > > 2019-10-17 Yuliang Wang <yuliang.w...@arm.com> > > * gcc.target/aarch64/sve2/eor3_1.c: New test. > * gcc.target/aarch64/sve2/eor3_2.c: As above. > * gcc.target/aarch64/sve2/nlogic_1.c: As above. > * gcc.target/aarch64/sve2/nlogic_2.c: As above. > * gcc.target/aarch64/sve2/bitsel_1.c: As above. > * gcc.target/aarch64/sve2/bitsel_2.c: As above. > * gcc.target/aarch64/sve2/bitsel_3.c: As above. > * gcc.target/aarch64/sve2/bitsel_4.c: As above. > > > diff --git a/gcc/config/aarch64/aarch64-sve2.md > b/gcc/config/aarch64/aarch64-sve2.md > index > b018f5b0bc9b51edf831e2571f0f5a9af2210829..08d5214a3debb9e9a0796da0af3009ed3ff55774 > 100644 > --- a/gcc/config/aarch64/aarch64-sve2.md > +++ b/gcc/config/aarch64/aarch64-sve2.md > @@ -142,3 +142,189 @@ > } > ) > > +;; Unpredicated 3-way exclusive OR. > +(define_insn "*aarch64_sve2_eor3<mode>" > + [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w") > + (xor:SVE_I > + (xor:SVE_I > + (match_operand:SVE_I 1 "register_operand" "0, w, w, w") > + (match_operand:SVE_I 2 "register_operand" "w, 0, w, w")) > + (match_operand:SVE_I 3 "register_operand" "w, w, 0, w")))] > + "TARGET_SVE2_SHA3"
EOR3 is part of base SVE2, it doesn't require the SHA3 extension. > +;; Unpredicated bitwise select. > +;; N.B. non-canonical equivalent form due to expand pass. Think it would be better to drop this line (and similarly for the patterns below). The form isn't non-canonical -- there just isn't a defined canonical from here. :-) It is the expected form as things stand. > +;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup) > +(define_insn "*aarch64_sve2_bsl<mode>" > + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") > + (xor:SVE_I > + (and:SVE_I > + (xor:SVE_I > + (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w") > + (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w")) > + (match_operand:SVE_I 3 "register_operand" "w, w")) > + (match_dup BSL_DUP)))] > + "TARGET_SVE2" > + "@ > + bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d > + movprfx\t%0, %<bsl_mov>\;bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d" > + [(set_attr "movprfx" "*,yes")] > +) > + > +;; Unpredicated bitwise inverted select. > +;; N.B. non-canonical equivalent form. > +;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)) > +(define_insn_and_rewrite "*aarch64_sve2_nbsl<mode>" > + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") > + (unspec:SVE_I > + [(match_operand 4) > + (not:SVE_I > + (xor:SVE_I > + (and:SVE_I > + (xor:SVE_I > + (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w") > + (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w")) > + (match_operand:SVE_I 3 "register_operand" "w, w")) > + (match_dup BSL_DUP)))] > + UNSPEC_PRED_X))] > + "TARGET_SVE2" > + "@ > + nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d > + movprfx\t%0, %<bsl_mov>\;nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d" > + "&& !CONSTANT_P (operands[4])" > + { > + operands[4] = CONSTM1_RTX (<VPRED>mode); > + } > + [(set_attr "movprfx" "*,yes")] > +) > + > +;; Unpredicated bitwise select with inverted first operand. > +;; N.B. non-canonical equivalent form. > +;; (op3 ? ~bsl_mov : bsl_dup) == (((~bsl_mov ^ bsl_dup) & op3) ^ bsl_dup) That's true, but I think: ;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup) is clearer, to match the rtl. > +(define_insn_and_rewrite "*aarch64_sve2_bsl1n<mode>" > + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") > + (xor:SVE_I > + (and:SVE_I > + (unspec:SVE_I > + [(match_operand 4) > + (not:SVE_I > + (xor:SVE_I > + (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w") > + (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w")))] > + UNSPEC_PRED_X) > + (match_operand:SVE_I 3 "register_operand" "w, w")) > + (match_dup BSL_DUP)))] > + "TARGET_SVE2" > + "@ > + bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d > + movprfx\t%0, %<bsl_mov>\;bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d" > + "&& !CONSTANT_P (operands[4])" > + { > + operands[4] = CONSTM1_RTX (<VPRED>mode); > + } > + [(set_attr "movprfx" "*,yes")] > +) > + > +;; Unpredicated bitwise select with inverted second operand. > +(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>" Would be good to have a comment here too: ;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup)) > + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") > + (ior:SVE_I > + (and:SVE_I > + (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w") > + (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w")) > + (unspec:SVE_I > + [(match_operand 4) > + (and:SVE_I > + (not:SVE_I > + (match_operand:SVE_I 3 "register_operand" "w, w")) > + (not:SVE_I > + (match_dup BSL_DUP)))] > + UNSPEC_PRED_X)))] > + "TARGET_SVE2" > + "@ > + bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d > + movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d" > + "&& !CONSTANT_P (operands[4])" > + { > + operands[4] = CONSTM1_RTX (<VPRED>mode); > + } > + [(set_attr "movprfx" "*,yes")] > +) > + > +;; Unpredicated bitwise select with inverted second operand, alternative > form. ;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~bsl_dup & ~op3)) > +(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>" > + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") > + (ior:SVE_I > + (and:SVE_I > + (match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w") > + (match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w")) > + (unspec:SVE_I > + [(match_operand 4) > + (and:SVE_I > + (not:SVE_I > + (match_dup BSL_DUP)) > + (not:SVE_I > + (match_operand:SVE_I 3 "register_operand" "w, w")))] > + UNSPEC_PRED_X)))] > + "TARGET_SVE2" > + "@ > + bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d > + movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d" > + "&& !CONSTANT_P (operands[4])" > + { > + operands[4] = CONSTM1_RTX (<VPRED>mode); > + } > + [(set_attr "movprfx" "*,yes")] > +) > + > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h > index > abd14a2f92c06828adfc6d2e2e81b63a6163d3a3..cad401ceb2419b6a0a64f2396c8e7d5b9105fb22 > 100644 > --- a/gcc/config/aarch64/aarch64.h > +++ b/gcc/config/aarch64/aarch64.h > @@ -236,6 +236,7 @@ extern unsigned aarch64_architecture_version; > #define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16) > #define AARCH64_ISA_SVE (aarch64_isa_flags & AARCH64_FL_SVE) > #define AARCH64_ISA_SVE2 (aarch64_isa_flags & AARCH64_FL_SVE2) > +#define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & > AARCH64_FL_SVE2_SHA3) > #define AARCH64_ISA_V8_3 (aarch64_isa_flags & AARCH64_FL_V8_3) > #define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD) > #define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES) > @@ -285,6 +286,9 @@ extern unsigned aarch64_architecture_version; > /* SVE2 instructions, enabled through +sve2. */ > #define TARGET_SVE2 (AARCH64_ISA_SVE2) > > +/* SVE2 SHA3 instructions, enabled through +sve2-sha3. */ > +#define TARGET_SVE2_SHA3 (TARGET_SVE2 && AARCH64_ISA_SVE2_SHA3) > + > /* ARMv8.3-A features. */ > #define TARGET_ARMV8_3 (AARCH64_ISA_V8_3) With the above change, these macros aren't needed. Thanks, Richard