Kyrylo Tkachov <ktkac...@nvidia.com> writes: >> On 29 Jul 2025, at 18:41, Richard Sandiford <richard.sandif...@arm.com> >> wrote: >> >> This patch continues the work of making ACLE intrinsics use VNx16BI >> for svbool_t results. It deals with the svpnext* intrinsics. >> > > I wonder if the new patterns need pred_clobber alternatives in this and the > other patches? > If they do we can add them in follow-up patches I suppose.
I think the series preserves the status quo in terms of where pred_clobber is applied and where it isn't. But yeah, it seems that the status quo misses cases, here and in the *_cc patterns for MATCH/NMATCH. Like you say, I think we can deal with that separately, although the series does unfortunately add to the number of patterns that would need changing. Thanks for the reviews! Richard > Ok. > Thanks, > Kyrill > >> gcc/ >> * config/aarch64/iterators.md (PNEXT_ONLY): New int iterator. >> * config/aarch64/aarch64-sve.md >> (@aarch64_sve_<sve_pred_op><mode>): Restrict SVE_PITER pattern >> to VNx16BI_ONLY. >> (@aarch64_sve_<sve_pred_op><mode>): New PNEXT_ONLY pattern for >> PRED_HSD. >> (*aarch64_sve_<sve_pred_op><mode>): Likewise. >> (*aarch64_sve_<sve_pred_op><mode>_cc): Likewise. >> >> gcc/testsuite/ >> * gcc.target/aarch64/sve/acle/general/pnext_3.c: New test. >> --- >> gcc/config/aarch64/aarch64-sve.md | 77 ++++++++++- >> gcc/config/aarch64/iterators.md | 2 + >> .../aarch64/sve/acle/general/pnext_3.c | 130 ++++++++++++++++++ >> 3 files changed, 204 insertions(+), 5 deletions(-) >> create mode 100644 >> gcc/testsuite/gcc.target/aarch64/sve/acle/general/pnext_3.c >> >> diff --git a/gcc/config/aarch64/aarch64-sve.md >> b/gcc/config/aarch64/aarch64-sve.md >> index b881ba851b6..8011227e2d9 100644 >> --- a/gcc/config/aarch64/aarch64-sve.md >> +++ b/gcc/config/aarch64/aarch64-sve.md >> @@ -11171,14 +11171,49 @@ (define_insn "*aarch64_brk<brk_op>_ptest" >> ;; ------------------------------------------------------------------------- >> >> (define_insn "@aarch64_sve_<sve_pred_op><mode>" >> - [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") >> - (unspec:PRED_ALL >> - [(match_operand:PRED_ALL 1 "register_operand" "Upa") >> + [(set (match_operand:VNx16BI_ONLY 0 "register_operand" "=Upa") >> + (unspec:VNx16BI_ONLY >> + [(match_operand:VNx16BI_ONLY 1 "register_operand" "Upa") >> (match_operand:SI 2 "aarch64_sve_ptrue_flag") >> - (match_operand:PRED_ALL 3 "register_operand" "0")] >> + (match_operand:VNx16BI_ONLY 3 "register_operand" "0")] >> SVE_PITER)) >> (clobber (reg:CC_NZC CC_REGNUM))] >> - "TARGET_SVE && <max_elem_bits> >= <elem_bits>" >> + "TARGET_SVE" >> + "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>" >> +) >> + >> +(define_expand "@aarch64_sve_<sve_pred_op><mode>" >> + [(parallel >> + [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") >> + (and:VNx16BI >> + (subreg:VNx16BI >> + (unspec:PRED_HSD >> + [(match_operand:PRED_HSD 1 "register_operand" "Upa") >> + (match_operand:SI 2 "aarch64_sve_ptrue_flag") >> + (match_operand:PRED_HSD 3 "register_operand" "0")] >> + PNEXT_ONLY) >> + 0) >> + (match_dup 4))) >> + (clobber (reg:CC_NZC CC_REGNUM))])] >> + "TARGET_SVE" >> + { >> + operands[4] = aarch64_ptrue_all (<data_bytes>); >> + } >> +) >> + >> +(define_insn "*aarch64_sve_<sve_pred_op><mode>" >> + [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") >> + (and:VNx16BI >> + (subreg:VNx16BI >> + (unspec:PRED_HSD >> + [(match_operand:PRED_HSD 1 "register_operand" "Upa") >> + (match_operand:SI 2 "aarch64_sve_ptrue_flag") >> + (match_operand:PRED_HSD 3 "register_operand" "0")] >> + PNEXT_ONLY) >> + 0) >> + (match_operand:PRED_HSD 4 "aarch64_ptrue_all_operand"))) >> + (clobber (reg:CC_NZC CC_REGNUM))] >> + "TARGET_SVE" >> "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>" >> ) >> >> @@ -11212,6 +11247,38 @@ (define_insn_and_rewrite >> "*aarch64_sve_<sve_pred_op><mode>_cc" >> } >> ) >> >> +(define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc" >> + [(set (reg:CC_NZC CC_REGNUM) >> + (unspec:CC_NZC >> + [(match_operand:VNx16BI 1 "register_operand" "Upa") >> + (match_operand 2) >> + (match_operand:SI 3 "aarch64_sve_ptrue_flag") >> + (unspec:PRED_HSD >> + [(match_operand 4) >> + (match_operand:SI 5 "aarch64_sve_ptrue_flag") >> + (match_operand:PRED_HSD 6 "register_operand" "0")] >> + PNEXT_ONLY)] >> + UNSPEC_PTEST)) >> + (set (match_operand:VNx16BI 0 "register_operand" "=Upa") >> + (and:VNx16BI >> + (subreg:VNx16BI >> + (unspec:PRED_HSD >> + [(match_dup 4) >> + (match_dup 5) >> + (match_dup 6)] >> + PNEXT_ONLY) >> + 0) >> + (match_operand:PRED_HSD 7 "aarch64_ptrue_all_operand")))] >> + "TARGET_SVE >> + && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])" >> + "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>" >> + "&& !rtx_equal_p (operands[2], operands[4])" >> + { >> + operands[4] = operands[2]; >> + operands[5] = operands[3]; >> + } >> +) >> + >> ;; Same, but with only the flags result being interesting. >> (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest" >> [(set (reg:CC_NZC CC_REGNUM) >> diff --git a/gcc/config/aarch64/iterators.md >> b/gcc/config/aarch64/iterators.md >> index e619af14928..8f8237edf6c 100644 >> --- a/gcc/config/aarch64/iterators.md >> +++ b/gcc/config/aarch64/iterators.md >> @@ -3880,6 +3880,8 @@ (define_int_iterator SVE_BRK_BINARY [UNSPEC_BRKN >> UNSPEC_BRKPA UNSPEC_BRKPB]) >> >> (define_int_iterator SVE_PITER [UNSPEC_PFIRST UNSPEC_PNEXT]) >> >> +(define_int_iterator PNEXT_ONLY [UNSPEC_PNEXT]) >> + >> (define_int_iterator MATMUL [UNSPEC_SMATMUL UNSPEC_UMATMUL >> UNSPEC_USMATMUL]) >> >> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pnext_3.c >> b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pnext_3.c >> new file mode 100644 >> index 00000000000..d9c009062d7 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pnext_3.c >> @@ -0,0 +1,130 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2" } */ >> +/* { dg-final { check-function-bodies "**" "" } } */ >> + >> +#include <arm_sve.h> >> + >> +#ifdef __cplusplus >> +extern "C" { >> +#endif >> + >> +/* >> +** test1: >> +** pnext p0\.h, p1, p0\.h >> +** ret >> +*/ >> +svbool_t >> +test1 (svbool_t pg, svbool_t prev) >> +{ >> + return svand_z (svptrue_b8 (), >> + svpnext_b16 (prev, pg), >> + svptrue_b16 ()); >> +} >> + >> +/* >> +** test2: >> +** pnext p0\.h, p1, p0\.h >> +** ret >> +*/ >> +svbool_t >> +test2 (svbool_t pg, svbool_t prev) >> +{ >> + return svand_z (svptrue_b16 (), >> + svpnext_b16 (prev, pg), >> + svptrue_b8 ()); >> +} >> + >> +/* >> +** test3: >> +** pnext p0\.h, p1, p0\.h >> +** ret >> +*/ >> +svbool_t >> +test3 (svbool_t pg, svbool_t prev) >> +{ >> + return svand_z (svptrue_b16 (), >> + svpnext_b16 (prev, pg), >> + svptrue_b16 ()); >> +} >> + >> +/* >> +** test4: >> +** pnext p0\.s, p1, p0\.s >> +** ret >> +*/ >> +svbool_t >> +test4 (svbool_t pg, svbool_t prev) >> +{ >> + return svand_z (svptrue_b32 (), >> + svpnext_b32 (prev, pg), >> + svptrue_b8 ()); >> +} >> + >> +/* >> +** test5: >> +** pnext p0\.s, p1, p0\.s >> +** ret >> +*/ >> +svbool_t >> +test5 (svbool_t pg, svbool_t prev) >> +{ >> + return svand_z (svptrue_b16 (), >> + svpnext_b32 (prev, pg), >> + svptrue_b8 ()); >> +} >> + >> +/* >> +** test6: >> +** pnext p0\.s, p1, p0\.s >> +** ret >> +*/ >> +svbool_t >> +test6 (svbool_t pg, svbool_t prev) >> +{ >> + return svand_z (svptrue_b8 (), >> + svpnext_b32 (prev, pg), >> + svptrue_b32 ()); >> +} >> + >> +/* >> +** test7: >> +** pnext p0\.d, p1, p0\.d >> +** ret >> +*/ >> +svbool_t >> +test7 (svbool_t pg, svbool_t prev) >> +{ >> + return svand_z (svptrue_b16 (), >> + svpnext_b64 (prev, pg), >> + svptrue_b8 ()); >> +} >> + >> +/* >> +** test8: >> +** pnext p0\.d, p1, p0\.d >> +** ret >> +*/ >> +svbool_t >> +test8 (svbool_t pg, svbool_t prev) >> +{ >> + return svand_z (svptrue_b32 (), >> + svpnext_b64 (prev, pg), >> + svptrue_b8 ()); >> +} >> + >> +/* >> +** test9: >> +** pnext p0\.d, p1, p0\.d >> +** ret >> +*/ >> +svbool_t >> +test9 (svbool_t pg, svbool_t prev) >> +{ >> + return svand_z (svptrue_b8 (), >> + svpnext_b64 (prev, pg), >> + svptrue_b64 ()); >> +} >> + >> +#ifdef __cplusplus >> +} >> +#endif >> -- >> 2.43.0 >>