Re: [pushed] aarch64: Fix bogus cnot optimisation [PR114603]
Richard Biener writes: > On Fri, Apr 5, 2024 at 3:52 PM Richard Sandiford >> This isn't a regression on a known testcase. However, it's a nasty >> wrong code bug that could conceivably trigger for autovec code (although >> I've not been able to construct a reproducer so far). That fix is also >> quite localised to the buggy operation. I'd therefore prefer to push >> the fix now rather than wait for GCC 15. > > wrong-code bugs (and also rejects-valid or ice-on-valid) are always exempt > from the regression-only fixing. In practice every such bug will be a > regression, > in this case to when the combining pattern was introduced (unless that was > with the version with the initial introduction of the port of course). Ah, thanks, hadn't realised that. Makes sense though. It's good news of a sort since unfortunately I've another SVE wrong-code fix in the works... Richard
Re: [pushed] aarch64: Fix bogus cnot optimisation [PR114603]
On Fri, Apr 5, 2024 at 3:52 PM Richard Sandiford wrote: > > aarch64-sve.md had a pattern that combined: > > cmpeq pb.T, pa/z, zc.T, #0 > mov zd.T, pb/z, #1 > > into: > > cnotzd.T, pa/m, zc.T > > But this is only valid if pa.T is a ptrue. In other cases, the > original would set inactive elements of zd.T to 0, whereas the > combined form would copy elements from zc.T. > > This isn't a regression on a known testcase. However, it's a nasty > wrong code bug that could conceivably trigger for autovec code (although > I've not been able to construct a reproducer so far). That fix is also > quite localised to the buggy operation. I'd therefore prefer to push > the fix now rather than wait for GCC 15. wrong-code bugs (and also rejects-valid or ice-on-valid) are always exempt from the regression-only fixing. In practice every such bug will be a regression, in this case to when the combining pattern was introduced (unless that was with the version with the initial introduction of the port of course). Richard. > Tested on aarch64-linux-gnu & pushed. I'll backport to branches if > there is no fallout. > > Richard > > gcc/ > PR target/114603 > * config/aarch64/aarch64-sve.md (@aarch64_pred_cnot): Replace > with... > (@aarch64_ptrue_cnot): ...this, requiring operand 1 to be > a ptrue. > (*cnot): Require operand 1 to be a ptrue. > * config/aarch64/aarch64-sve-builtins-base.cc (svcnot_impl::expand): > Use aarch64_ptrue_cnot for _x operations that are predicated > with a ptrue. Represent other _x operations as fully-defined _m > operations. > > gcc/testsuite/ > PR target/114603 > * gcc.target/aarch64/sve/acle/general/cnot_1.c: New test. > --- > .../aarch64/aarch64-sve-builtins-base.cc | 25 --- > gcc/config/aarch64/aarch64-sve.md | 22 > .../aarch64/sve/acle/general/cnot_1.c | 23 + > 3 files changed, 50 insertions(+), 20 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c > > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc > b/gcc/config/aarch64/aarch64-sve-builtins-base.cc > index 257ca5bf6ad..5be2315a3c6 100644 > --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc > +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc > @@ -517,15 +517,22 @@ public: >expand (function_expander ) const override >{ > machine_mode mode = e.vector_mode (0); > -if (e.pred == PRED_x) > - { > - /* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs > - a ptrue hint. */ > - e.add_ptrue_hint (0, e.gp_mode (0)); > - return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode)); > - } > - > -return e.use_cond_insn (code_for_cond_cnot (mode), 0); > +machine_mode pred_mode = e.gp_mode (0); > +/* The underlying _x pattern is effectively: > + > +dst = src == 0 ? 1 : 0 > + > + rather than an UNSPEC_PRED_X. Using this form allows autovec > + constructs to be matched by combine, but it means that the > + predicate on the src == 0 comparison must be all-true. > + > + For simplicity, represent other _x operations as fully-defined _m > + operations rather than using a separate bespoke pattern. */ > +if (e.pred == PRED_x > + && gen_lowpart (pred_mode, e.args[0]) == CONSTM1_RTX (pred_mode)) > + return e.use_pred_x_insn (code_for_aarch64_ptrue_cnot (mode)); > +return e.use_cond_insn (code_for_cond_cnot (mode), > + e.pred == PRED_x ? 1 : 0); >} > }; > > diff --git a/gcc/config/aarch64/aarch64-sve.md > b/gcc/config/aarch64/aarch64-sve.md > index eca8623e587..0434358122d 100644 > --- a/gcc/config/aarch64/aarch64-sve.md > +++ b/gcc/config/aarch64/aarch64-sve.md > @@ -3363,24 +3363,24 @@ (define_insn_and_split > "trunc2" > ;; - CNOT > ;; - > > -;; Predicated logical inverse. > -(define_expand "@aarch64_pred_cnot" > +;; Logical inverse, predicated with a ptrue. > +(define_expand "@aarch64_ptrue_cnot" >[(set (match_operand:SVE_FULL_I 0 "register_operand") > (unspec:SVE_FULL_I > [(unspec: > [(match_operand: 1 "register_operand") > - (match_operand:SI 2 "aarch64_sve_ptrue_flag") > + (const_int SVE_KNOWN_PTRUE) > (eq: > - (match_operand:SVE_FULL_I 3 "register_operand") > - (match_dup 4))] > + (match_operand:SVE_FULL_I 2 "register_operand") > + (match_dup 3))] > UNSPEC_PRED_Z) > - (match_dup 5) > - (match_dup 4)] > + (match_dup 4) > + (match_dup 3)] > UNSPEC_SEL))] >"TARGET_SVE" >{ > -operands[4] = CONST0_RTX (mode); > -operands[5] = CONST1_RTX (mode); > +
[pushed] aarch64: Fix bogus cnot optimisation [PR114603]
aarch64-sve.md had a pattern that combined: cmpeq pb.T, pa/z, zc.T, #0 mov zd.T, pb/z, #1 into: cnotzd.T, pa/m, zc.T But this is only valid if pa.T is a ptrue. In other cases, the original would set inactive elements of zd.T to 0, whereas the combined form would copy elements from zc.T. This isn't a regression on a known testcase. However, it's a nasty wrong code bug that could conceivably trigger for autovec code (although I've not been able to construct a reproducer so far). That fix is also quite localised to the buggy operation. I'd therefore prefer to push the fix now rather than wait for GCC 15. Tested on aarch64-linux-gnu & pushed. I'll backport to branches if there is no fallout. Richard gcc/ PR target/114603 * config/aarch64/aarch64-sve.md (@aarch64_pred_cnot): Replace with... (@aarch64_ptrue_cnot): ...this, requiring operand 1 to be a ptrue. (*cnot): Require operand 1 to be a ptrue. * config/aarch64/aarch64-sve-builtins-base.cc (svcnot_impl::expand): Use aarch64_ptrue_cnot for _x operations that are predicated with a ptrue. Represent other _x operations as fully-defined _m operations. gcc/testsuite/ PR target/114603 * gcc.target/aarch64/sve/acle/general/cnot_1.c: New test. --- .../aarch64/aarch64-sve-builtins-base.cc | 25 --- gcc/config/aarch64/aarch64-sve.md | 22 .../aarch64/sve/acle/general/cnot_1.c | 23 + 3 files changed, 50 insertions(+), 20 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 257ca5bf6ad..5be2315a3c6 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -517,15 +517,22 @@ public: expand (function_expander ) const override { machine_mode mode = e.vector_mode (0); -if (e.pred == PRED_x) - { - /* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs - a ptrue hint. */ - e.add_ptrue_hint (0, e.gp_mode (0)); - return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode)); - } - -return e.use_cond_insn (code_for_cond_cnot (mode), 0); +machine_mode pred_mode = e.gp_mode (0); +/* The underlying _x pattern is effectively: + +dst = src == 0 ? 1 : 0 + + rather than an UNSPEC_PRED_X. Using this form allows autovec + constructs to be matched by combine, but it means that the + predicate on the src == 0 comparison must be all-true. + + For simplicity, represent other _x operations as fully-defined _m + operations rather than using a separate bespoke pattern. */ +if (e.pred == PRED_x + && gen_lowpart (pred_mode, e.args[0]) == CONSTM1_RTX (pred_mode)) + return e.use_pred_x_insn (code_for_aarch64_ptrue_cnot (mode)); +return e.use_cond_insn (code_for_cond_cnot (mode), + e.pred == PRED_x ? 1 : 0); } }; diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index eca8623e587..0434358122d 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3363,24 +3363,24 @@ (define_insn_and_split "trunc2" ;; - CNOT ;; - -;; Predicated logical inverse. -(define_expand "@aarch64_pred_cnot" +;; Logical inverse, predicated with a ptrue. +(define_expand "@aarch64_ptrue_cnot" [(set (match_operand:SVE_FULL_I 0 "register_operand") (unspec:SVE_FULL_I [(unspec: [(match_operand: 1 "register_operand") - (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (const_int SVE_KNOWN_PTRUE) (eq: - (match_operand:SVE_FULL_I 3 "register_operand") - (match_dup 4))] + (match_operand:SVE_FULL_I 2 "register_operand") + (match_dup 3))] UNSPEC_PRED_Z) - (match_dup 5) - (match_dup 4)] + (match_dup 4) + (match_dup 3)] UNSPEC_SEL))] "TARGET_SVE" { -operands[4] = CONST0_RTX (mode); -operands[5] = CONST1_RTX (mode); +operands[3] = CONST0_RTX (mode); +operands[4] = CONST1_RTX (mode); } ) @@ -3389,7 +3389,7 @@ (define_insn "*cnot" (unspec:SVE_I [(unspec: [(match_operand: 1 "register_operand") - (match_operand:SI 5 "aarch64_sve_ptrue_flag") + (const_int SVE_KNOWN_PTRUE) (eq: (match_operand:SVE_I 2 "register_operand") (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))] @@ -11001,4 +11001,4 @@ (define_insn "@aarch64_sve_set_neonq_" GET_MODE (operands[2])); return