On Fri, Apr 5, 2024 at 3:52 PM Richard Sandiford
<richard.sandif...@arm.com> wrote:
>
> aarch64-sve.md had a pattern that combined:
>
>         cmpeq   pb.T, pa/z, zc.T, #0
>         mov     zd.T, pb/z, #1
>
> into:
>
>         cnot    zd.T, pa/m, zc.T
>
> But this is only valid if pa.T is a ptrue.  In other cases, the
> original would set inactive elements of zd.T to 0, whereas the
> combined form would copy elements from zc.T.
>
> This isn't a regression on a known testcase.  However, it's a nasty
> wrong code bug that could conceivably trigger for autovec code (although
> I've not been able to construct a reproducer so far).  That fix is also
> quite localised to the buggy operation.  I'd therefore prefer to push
> the fix now rather than wait for GCC 15.

wrong-code bugs (and also rejects-valid or ice-on-valid) are always exempt
from the regression-only fixing.  In practice every such bug will be a
regression,
in this case to when the combining pattern was introduced (unless that was
with the version with the initial introduction of the port of course).

Richard.

> Tested on aarch64-linux-gnu & pushed.  I'll backport to branches if
> there is no fallout.
>
> Richard
>
> gcc/
>         PR target/114603
>         * config/aarch64/aarch64-sve.md (@aarch64_pred_cnot<mode>): Replace
>         with...
>         (@aarch64_ptrue_cnot<mode>): ...this, requiring operand 1 to be
>         a ptrue.
>         (*cnot<mode>): Require operand 1 to be a ptrue.
>         * config/aarch64/aarch64-sve-builtins-base.cc (svcnot_impl::expand):
>         Use aarch64_ptrue_cnot<mode> for _x operations that are predicated
>         with a ptrue.  Represent other _x operations as fully-defined _m
>         operations.
>
> gcc/testsuite/
>         PR target/114603
>         * gcc.target/aarch64/sve/acle/general/cnot_1.c: New test.
> ---
>  .../aarch64/aarch64-sve-builtins-base.cc      | 25 ++++++++++++-------
>  gcc/config/aarch64/aarch64-sve.md             | 22 ++++++++--------
>  .../aarch64/sve/acle/general/cnot_1.c         | 23 +++++++++++++++++
>  3 files changed, 50 insertions(+), 20 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c
>
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
> b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> index 257ca5bf6ad..5be2315a3c6 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> @@ -517,15 +517,22 @@ public:
>    expand (function_expander &e) const override
>    {
>      machine_mode mode = e.vector_mode (0);
> -    if (e.pred == PRED_x)
> -      {
> -       /* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs
> -          a ptrue hint.  */
> -       e.add_ptrue_hint (0, e.gp_mode (0));
> -       return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode));
> -      }
> -
> -    return e.use_cond_insn (code_for_cond_cnot (mode), 0);
> +    machine_mode pred_mode = e.gp_mode (0);
> +    /* The underlying _x pattern is effectively:
> +
> +        dst = src == 0 ? 1 : 0
> +
> +       rather than an UNSPEC_PRED_X.  Using this form allows autovec
> +       constructs to be matched by combine, but it means that the
> +       predicate on the src == 0 comparison must be all-true.
> +
> +       For simplicity, represent other _x operations as fully-defined _m
> +       operations rather than using a separate bespoke pattern.  */
> +    if (e.pred == PRED_x
> +       && gen_lowpart (pred_mode, e.args[0]) == CONSTM1_RTX (pred_mode))
> +      return e.use_pred_x_insn (code_for_aarch64_ptrue_cnot (mode));
> +    return e.use_cond_insn (code_for_cond_cnot (mode),
> +                           e.pred == PRED_x ? 1 : 0);
>    }
>  };
>
> diff --git a/gcc/config/aarch64/aarch64-sve.md 
> b/gcc/config/aarch64/aarch64-sve.md
> index eca8623e587..0434358122d 100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -3363,24 +3363,24 @@ (define_insn_and_split 
> "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
>  ;; - CNOT
>  ;; -------------------------------------------------------------------------
>
> -;; Predicated logical inverse.
> -(define_expand "@aarch64_pred_cnot<mode>"
> +;; Logical inverse, predicated with a ptrue.
> +(define_expand "@aarch64_ptrue_cnot<mode>"
>    [(set (match_operand:SVE_FULL_I 0 "register_operand")
>         (unspec:SVE_FULL_I
>           [(unspec:<VPRED>
>              [(match_operand:<VPRED> 1 "register_operand")
> -             (match_operand:SI 2 "aarch64_sve_ptrue_flag")
> +             (const_int SVE_KNOWN_PTRUE)
>               (eq:<VPRED>
> -               (match_operand:SVE_FULL_I 3 "register_operand")
> -               (match_dup 4))]
> +               (match_operand:SVE_FULL_I 2 "register_operand")
> +               (match_dup 3))]
>              UNSPEC_PRED_Z)
> -          (match_dup 5)
> -          (match_dup 4)]
> +          (match_dup 4)
> +          (match_dup 3)]
>           UNSPEC_SEL))]
>    "TARGET_SVE"
>    {
> -    operands[4] = CONST0_RTX (<MODE>mode);
> -    operands[5] = CONST1_RTX (<MODE>mode);
> +    operands[3] = CONST0_RTX (<MODE>mode);
> +    operands[4] = CONST1_RTX (<MODE>mode);
>    }
>  )
>
> @@ -3389,7 +3389,7 @@ (define_insn "*cnot<mode>"
>         (unspec:SVE_I
>           [(unspec:<VPRED>
>              [(match_operand:<VPRED> 1 "register_operand")
> -             (match_operand:SI 5 "aarch64_sve_ptrue_flag")
> +             (const_int SVE_KNOWN_PTRUE)
>               (eq:<VPRED>
>                 (match_operand:SVE_I 2 "register_operand")
>                 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
> @@ -11001,4 +11001,4 @@ (define_insn "@aarch64_sve_set_neonq_<mode>"
>                                    GET_MODE (operands[2]));
>      return "sel\t%0.<Vetype>, %3, %2.<Vetype>, %1.<Vetype>";
>    }
> -)
> \ No newline at end of file
> +)
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c
> new file mode 100644
> index 00000000000..b1a489f0cf0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c
> @@ -0,0 +1,23 @@
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include <arm_sve.h>
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +/*
> +** foo:
> +**     cmpeq   (p[0-7])\.s, p0/z, z0\.s, #0
> +**     mov     z0\.s, \1/z, #1
> +**     ret
> +*/
> +svint32_t foo(svbool_t pg, svint32_t y)
> +{
> +  return svsel(svcmpeq(pg, y, 0), svdup_s32(1), svdup_s32(0));
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> --
> 2.25.1
>

Reply via email to