> On 2 Feb 2026, at 14:22, Artemiy Volkov <[email protected]> wrote:
>
> This commit implements patterns and intrinsics for these two instructions
> new in SVE2.2 (or in streaming mode, SME2.2):
>
> - FIRSTP (Scalar index of first true predicate element (predicated))
> - LASTP (Scalar index of last true predicate element (predicated))
>
> The new intrinsics are documented in the ACLE manual [0] and have the
> following signatures:
>
> int64_t svfirstp_b{8,16,32,64} (svbool_t pg, svbool_t pn);
> int64_t svlastp_b{8,16,32,64} (svbool_t pg, svbool_t pn);
>
> The intrinsics are implemented in the usual way; the new
> svfirst_lastp_impl base class is used for both families. The ->fold ()
> method implements constant folding except for LASTP under
> -msve-vector-bits=scalable.
>
> On the .md side, the pattern for LASTP required creating a new UNSPEC
> since the number of elements in an SVE vector is generally unknown, but
> the FIRSTP RTL can be expressed in terms of AND, FFS, and PLUS.
>
> Included are standard asm tests (which are heavily based on cntp_* tests
> from the sve directory), as well as some general C tests
> demonstrating aforementioned optimizations when PG and/or PN are constant
> vectors.
>
> [0] https://github.com/ARM-software/acle
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64-sve-builtins-sve2.cc
> (class svfirst_lastp_impl): Define new SVE function base class.
> (svfirstp): Define new SVE function base.
> (svlastp): Likewise.
> * config/aarch64/aarch64-sve-builtins-sve2.def (svfirstp): Define
> new SVE function.
> (svlastp): Likewise.
> * config/aarch64/aarch64-sve-builtins-sve2.h (svfirstp): Declare
> new SVE function base.
> * config/aarch64/aarch64-sve2.md (@aarch64_pred_firstp<mode>): New
> insn pattern.
> (@aarch64_pred_lastp<mode>): Likewise.
> * config/aarch64/iterators.md (UNSPEC_LASTP): New UNSPEC.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/sve2/acle/asm/firstp_b16.c: New test.
> * gcc.target/aarch64/sve2/acle/asm/firstp_b32.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/firstp_b64.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/firstp_b8.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/lastp_b16.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/lastp_b32.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/lastp_b64.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/lastp_b8.c: Likewise.
> * gcc.target/aarch64/sve2/acle/general/firstp.c: Likewise.
> * gcc.target/aarch64/sve2/acle/general/lastp.c: Likewise.
> ---
> .../aarch64/aarch64-sve-builtins-sve2.cc | 61 +++++
> .../aarch64/aarch64-sve-builtins-sve2.def | 2 +
> .../aarch64/aarch64-sve-builtins-sve2.h | 2 +
> gcc/config/aarch64/aarch64-sve2.md | 36 +++
> gcc/config/aarch64/iterators.md | 1 +
> .../aarch64/sve2/acle/asm/firstp_b16.c | 192 ++++++++++++++++
> .../aarch64/sve2/acle/asm/firstp_b32.c | 192 ++++++++++++++++
> .../aarch64/sve2/acle/asm/firstp_b64.c | 192 ++++++++++++++++
> .../aarch64/sve2/acle/asm/firstp_b8.c | 192 ++++++++++++++++
> .../aarch64/sve2/acle/asm/lastp_b16.c | 192 ++++++++++++++++
> .../aarch64/sve2/acle/asm/lastp_b32.c | 192 ++++++++++++++++
> .../aarch64/sve2/acle/asm/lastp_b64.c | 192 ++++++++++++++++
> .../aarch64/sve2/acle/asm/lastp_b8.c | 192 ++++++++++++++++
> .../aarch64/sve2/acle/general/firstp.c | 212 ++++++++++++++++++
> .../aarch64/sve2/acle/general/lastp.c | 212 ++++++++++++++++++
> 15 files changed, 2062 insertions(+)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b16.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b32.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b64.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b8.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b16.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b32.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b64.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b8.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/general/firstp.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/general/lastp.c
>
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
> b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
> index d45012e7936..5ea08056ae3 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
> @@ -291,6 +291,65 @@ public:
> }
> };
>
> +class svfirst_lastp_impl : public function_base
> +{
> +public:
> + CONSTEXPR svfirst_lastp_impl (bool first)
> + : m_first (first)
> + {}
> +
> + gimple *
> + fold (gimple_folder &f) const override
> + {
> + tree pg = gimple_call_arg (f.call, 0);
> + tree pn = gimple_call_arg (f.call, 1);
> +
> + gcc_assert (TYPE_MODE (TREE_TYPE (pg)) == TYPE_MODE (TREE_TYPE (pn)));
> +
> + if (is_pfalse (pg) || is_pfalse (pn))
> + return f.fold_call_to (build_minus_one_cst (TREE_TYPE (f.lhs)));
> +
> + if (TREE_CODE (pg) != VECTOR_CST
> + || TREE_CODE (pn) != VECTOR_CST)
> + return NULL;
> +
> + HOST_WIDE_INT nelts_full_vector = aarch64_fold_sve_cnt_pat
> (AARCH64_SV_ALL,
> + f.elements_per_vq (0));
> + if (!m_first && nelts_full_vector < 0)
> + return NULL;
> +
> + tree pa = fold_build2 (BIT_AND_EXPR, TREE_TYPE (pg), pg, pn);
> + gcc_assert (TREE_CODE (pa) == VECTOR_CST);
> +
> + int elt_size = f.type_suffix (0).element_bytes;
> + unsigned int nelts = vector_cst_encoded_nelts (pa);
> + for (unsigned int i = 0; i < nelts; i++)
> + {
> + unsigned int idx = m_first ? i : nelts - 1 - i;
> + if (tree_to_shwi (VECTOR_CST_ENCODED_ELT (pa, idx)) != 0)
> + return f.fold_call_to (build_int_cst (TREE_TYPE (f.lhs),
> + m_first
> + ? i / elt_size
> + : (nelts_full_vector - 1
> + - i / elt_size)));
> + }
> +
> + return f.fold_call_to (build_minus_one_cst (TREE_TYPE (f.lhs)));
> + }
> +
> + rtx
> + expand (function_expander &e) const override
> + {
> + machine_mode mode = e.vector_mode (0);
> + return e.use_exact_insn (m_first ? code_for_aarch64_pred_firstp (mode)
> + : code_for_aarch64_pred_lastp (mode));
> + }
> +
> +private:
> + /* True for svfirstp, false for svlastp. */
> + bool m_first;
> +};
> +
> class svld1q_gather_impl : public full_width_access
> {
> public:
> @@ -1023,12 +1082,14 @@ FUNCTION (sveorbt, unspec_based_function,
> (UNSPEC_EORBT, UNSPEC_EORBT, -1))
> FUNCTION (sveorqv, reduction, (UNSPEC_EORQV, UNSPEC_EORQV, -1))
> FUNCTION (sveortb, unspec_based_function, (UNSPEC_EORTB, UNSPEC_EORTB, -1))
> FUNCTION (svextq, svextq_impl,)
> +FUNCTION (svfirstp, svfirst_lastp_impl, (true))
> FUNCTION (svhadd, unspec_based_function, (UNSPEC_SHADD, UNSPEC_UHADD, -1))
> FUNCTION (svhsub, unspec_based_function, (UNSPEC_SHSUB, UNSPEC_UHSUB, -1))
> FUNCTION (svhistcnt, CODE_FOR_MODE0 (aarch64_sve2_histcnt),)
> FUNCTION (svhistseg, CODE_FOR_MODE0 (aarch64_sve2_histseg),)
> FUNCTION (svhsubr, unspec_based_function_rotated, (UNSPEC_SHSUB,
> UNSPEC_UHSUB, -1))
> +FUNCTION (svlastp, svfirst_lastp_impl, (false))
> FUNCTION (svld1q_gather, svld1q_gather_impl,)
> FUNCTION (svld1udq, svld1uxq_impl, (VNx1DImode))
> FUNCTION (svld1uwq, svld1uxq_impl, (VNx1SImode))
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
> b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
> index d1795c64e8e..6ecfc2a45c1 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
> @@ -305,6 +305,8 @@ DEF_SVE_FUNCTION (svcvtlt, unary_convert, cvt_long, z)
> DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_narrow, z)
> DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, z)
> DEF_SVE_FUNCTION (svcvtxnt, unary_convert_narrowt, cvt_narrow_s, z)
> +DEF_SVE_FUNCTION (svfirstp, count_pred, all_pred, implicit)
> +DEF_SVE_FUNCTION (svlastp, count_pred, all_pred, implicit)
> DEF_SVE_FUNCTION (svrint32x, unary, sd_float, mxz)
> DEF_SVE_FUNCTION (svrint32z, unary, sd_float, mxz)
> DEF_SVE_FUNCTION (svrint64x, unary, sd_float, mxz)
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
> b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
> index 8b1581f8568..b2f2698b880 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
> @@ -80,11 +80,13 @@ namespace aarch64_sve
> extern const function_base *const sveorqv;
> extern const function_base *const sveortb;
> extern const function_base *const svextq;
> + extern const function_base *const svfirstp;
> extern const function_base *const svhadd;
> extern const function_base *const svhistcnt;
> extern const function_base *const svhistseg;
> extern const function_base *const svhsub;
> extern const function_base *const svhsubr;
> + extern const function_base *const svlastp;
> extern const function_base *const svld1q_gather;
> extern const function_base *const svld1udq;
> extern const function_base *const svld1uwq;
> diff --git a/gcc/config/aarch64/aarch64-sve2.md
> b/gcc/config/aarch64/aarch64-sve2.md
> index 69e16571afc..5fc84b79423 100644
> --- a/gcc/config/aarch64/aarch64-sve2.md
> +++ b/gcc/config/aarch64/aarch64-sve2.md
> @@ -46,6 +46,7 @@
> ;; ---- [PRED] Predicate extraction
> ;; ---- [PRED] Predicate selection
> ;; ---- [PRED] Predicate count
> +;; ---- [PRED] Predicate first/last true element
> ;;
> ;; == Uniform unary arithmnetic
> ;; ---- [FP] General unary arithmetic that maps to unspecs
> @@ -721,6 +722,41 @@
> [(set_attr "sve_type" "sve_pred_cnt_scalar")]
> )
>
> +;; -------------------------------------------------------------------------
> +;; ---- [PRED] Predicate first/last true element
> +;; -------------------------------------------------------------------------
> +;; Includes
> +;; - FIRSTP (predicate first true element)
> +;; - LASTP (predicate last true element)
Please add a comment on the FEAT_* extension that includes these.
> +;; -------------------------------------------------------------------------
> +
> +;; Count the number of set bits in a predicate. Operand 3 is true if
> +;; operand 1 is known to be all-true.
> +(define_insn "@aarch64_pred_firstp<mode>"
> + [(set (match_operand:DI 0 "register_operand" "=r")
> + (plus:DI
> + (ffs:DI
> + (and:PRED_ALL
Neat idea, but I think it’s invalid RTL to take a DImode FFS of a predicate
vector mode. According to the RTL documentation the mode of the ffs and its
argument should match.
I don’t know if it’s possible to wrap it inside a suitable subreg somehow. If
not, then making the whole operation an UNSPEC may be the only way to go.
Thanks,
Kyrill
> + (match_operand:PRED_ALL 1 "register_operand" "Upl")
> + (match_operand:PRED_ALL 2 "register_operand" "Upa")))
> + (const_int -1)))]
> + "TARGET_SVE2p2_OR_SME2p2"
> + "firstp\t%x0, %1, %2.<Vetype>"
> + [(set_attr "sve_type" "sve_pred_cnt_scalar")]
> +)
> +
> +;; Count the number of set bits in a predicate. Operand 3 is true if
> +;; operand 1 is known to be all-true.
> +(define_insn "@aarch64_pred_lastp<mode>"
> + [(set (match_operand:DI 0 "register_operand" "=r")
> + (unspec:DI [(match_operand:PRED_ALL 1 "register_operand" "Upl")
> + (match_operand:PRED_ALL 2 "register_operand" "Upa")]
> + UNSPEC_LASTP))]
> + "TARGET_SVE2p2_OR_SME2p2"
> + "lastp\t%x0, %1, %2.<Vetype>"
> + [(set_attr "sve_type" "sve_pred_cnt_scalar")]
> +)
> +
> ;; =========================================================================
> ;; == Uniform unary arithmnetic
> ;; =========================================================================
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index b711df60f26..dbf9d6272a8 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -1150,6 +1150,7 @@
> UNSPEC_FP8FCVTN ; Used in aarch64-sve2.md.
> UNSPEC_HISTCNT ; Used in aarch64-sve2.md.
> UNSPEC_HISTSEG ; Used in aarch64-sve2.md.
> + UNSPEC_LASTP ; Used in aarch64-sve2.md.
> UNSPEC_LD1_COUNT ; Used in aarch64-sve2.md.
> UNSPEC_LDNT1_COUNT ; Used in aarch64-sve2.md.
> UNSPEC_MATCH ; Used in aarch64-sve2.md.
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b16.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b16.c
> new file mode 100644
> index 00000000000..06ea1e1b9ef
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b16.c
> @@ -0,0 +1,192 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-additional-options "-msve-vector-bits=scalable" } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +#include <stdbool.h>
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** firstp_b16_32:
> +** firstp x0, p0, p1\.h
> +** ret
> +*/
> +TEST_PTEST (firstp_b16_32, uint32_t,
> + x0 = svfirstp_b16 (p0, p1));
> +
> +/*
> +** firstp_b16_64:
> +** firstp x0, p0, p1\.h
> +** ret
> +*/
> +TEST_PTEST (firstp_b16_64, uint64_t,
> + x0 = svfirstp_b16 (p0, p1));
> +
> +/*
> +** firstp_inc_b16_32_general_x0:
> +** firstp x([0-9]+), p0, p1\.h
> +** add w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b16_32_general_x0, uint32_t,
> + x0 += svfirstp_b16 (p0, p1));
> +
> +/*
> +** firstp_inc_b16_32_general_x1:
> +** firstp x([0-9]+), p0, p1\.h
> +** add w0, (w1, w\1|w\1, w1)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b16_32_general_x1, uint32_t,
> + x0 = x1 + svfirstp_b16 (p0, p1));
> +
> +/*
> +** firstp_inc_b16_64_general_x0:
> +** firstp (x[0-9]+), p0, p1\.h
> +** add x0, (x0, \1|\1, x0)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b16_64_general_x0, uint64_t,
> + x0 += svfirstp_b16 (p0, p1));
> +
> +/*
> +** firstp_inc_b16_64_general_x1:
> +** firstp (x[0-9]+), p0, p1\.h
> +** add x0, (x1, \1|\1, x1)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b16_64_general_x1, uint64_t,
> + x0 = x1 + svfirstp_b16 (p0, p1));
> +
> +/*
> +** firstp_dec_b16_32_general_x0:
> +** firstp x([0-9]+), p0, p1\.h
> +** sub w0, w0, w\1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b16_32_general_x0, uint32_t,
> + x0 -= svfirstp_b16 (p0, p1));
> +
> +/*
> +** firstp_dec_b16_32_general_x1:
> +** firstp x([0-9]+), p0, p1\.h
> +** sub w0, w1, w\1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b16_32_general_x1, uint32_t,
> + x0 = x1 - svfirstp_b16 (p0, p1));
> +
> +/*
> +** firstp_dec_b16_64_general_x0:
> +** firstp (x[0-9]+), p0, p1\.h
> +** sub x0, x0, \1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b16_64_general_x0, uint64_t,
> + x0 -= svfirstp_b16 (p0, p1));
> +
> +/*
> +** firstp_dec_b16_64_general_x1:
> +** firstp (x[0-9]+), p0, p1\.h
> +** sub x0, x1, \1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b16_64_general_x1, uint64_t,
> + x0 = x1 - svfirstp_b16 (p0, p1));
> +
> +/*
> +** firstp_inc_b16_u16_general_z0:
> +** firstp x([0-9]+), p0, p1\.h
> +** mov (z[0-9]+\.h), w\1
> +** add z0\.h, (z0\.h, \2|\2, z0\.h)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b16_u16_general_z0, svuint16_t,
> + z0 = svadd_n_u16_x (svptrue_b16 (), z0, svfirstp_b16 (p0, p1)),
> + z0 = svadd_x (svptrue_b16 (), z0, svfirstp_b16 (p0, p1)));
> +
> +/*
> +** firstp_inc_b16_u16_general_z1:
> +** firstp x([0-9]+), p0, p1\.h
> +** mov (z[0-9]+\.h), w\1
> +** add z0\.h, (z1\.h, \2|\2, z1\.h)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b16_u16_general_z1, svuint16_t,
> + z0 = svadd_n_u16_x (svptrue_b16 (), z1, svfirstp_b16 (p0, p1)),
> + z0 = svadd_x (svptrue_b16 (), z1, svfirstp_b16 (p0, p1)));
> +
> +/*
> +** firstp_inc_b16_u16_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.h
> +** mov (z[0-9]+\.h), w\2
> +** add z0\.h, (z0\.h, \3|\3, z0\.h)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b16_u16_ptrue_z0, svuint16_t,
> + z0 = svadd_n_u16_x (svptrue_b16 (), z0, svfirstp_b16 (svptrue_b16 (), p0)),
> + z0 = svadd_x (svptrue_b16 (), z0, svfirstp_b16 (svptrue_b16 (), p0)));
> +
> +/*
> +** firstp_inc_b16_u16_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.h
> +** mov (z[0-9]+\.h), w\2
> +** add z0\.h, (z1\.h, \3|\3, z1\.h)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b16_u16_ptrue_z1, svuint16_t,
> + z0 = svadd_n_u16_x (svptrue_b16 (), z1, svfirstp_b16 (svptrue_b16 (), p0)),
> + z0 = svadd_x (svptrue_b16 (), z1, svfirstp_b16 (svptrue_b16 (), p0)));
> +
> +/*
> +** firstp_dec_b16_u16_general_z0:
> +** firstp x([0-9]+), p0, p1\.h
> +** mov (z[0-9]+\.h), w\1
> +** sub z0\.h, z0\.h, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b16_u16_general_z0, svuint16_t,
> + z0 = svsub_n_u16_x (svptrue_b16 (), z0, svfirstp_b16 (p0, p1)),
> + z0 = svsub_x (svptrue_b16 (), z0, svfirstp_b16 (p0, p1)));
> +
> +/*
> +** firstp_dec_b16_u16_general_z1:
> +** firstp x([0-9]+), p0, p1\.h
> +** mov (z[0-9]+\.h), w\1
> +** sub z0\.h, z1\.h, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b16_u16_general_z1, svuint16_t,
> + z0 = svsub_n_u16_x (svptrue_b16 (), z1, svfirstp_b16 (p0, p1)),
> + z0 = svsub_x (svptrue_b16 (), z1, svfirstp_b16 (p0, p1)));
> +
> +/*
> +** firstp_dec_b16_u16_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.h
> +** mov (z[0-9]+\.h), w\2
> +** sub z0\.h, z0\.h, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b16_u16_ptrue_z0, svuint16_t,
> + z0 = svsub_n_u16_x (svptrue_b16 (), z0, svfirstp_b16 (svptrue_b16 (), p0)),
> + z0 = svsub_x (svptrue_b16 (), z0, svfirstp_b16 (svptrue_b16 (), p0)));
> +
> +/*
> +** firstp_dec_b16_u16_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.h
> +** mov (z[0-9]+\.h), w\2
> +** sub z0\.h, z1\.h, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b16_u16_ptrue_z1, svuint16_t,
> + z0 = svsub_n_u16_x (svptrue_b16 (), z1, svfirstp_b16 (svptrue_b16 (), p0)),
> + z0 = svsub_x (svptrue_b16 (), z1, svfirstp_b16 (svptrue_b16 (), p0)));
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b32.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b32.c
> new file mode 100644
> index 00000000000..668920bba16
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b32.c
> @@ -0,0 +1,192 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-additional-options "-msve-vector-bits=scalable" } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +#include <stdbool.h>
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** firstp_b32_32:
> +** firstp x0, p0, p1\.s
> +** ret
> +*/
> +TEST_PTEST (firstp_b32_32, uint32_t,
> + x0 = svfirstp_b32 (p0, p1));
> +
> +/*
> +** firstp_b32_64:
> +** firstp x0, p0, p1\.s
> +** ret
> +*/
> +TEST_PTEST (firstp_b32_64, uint64_t,
> + x0 = svfirstp_b32 (p0, p1));
> +
> +/*
> +** firstp_inc_b32_32_general_x0:
> +** firstp x([0-9]+), p0, p1\.s
> +** add w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b32_32_general_x0, uint32_t,
> + x0 += svfirstp_b32 (p0, p1));
> +
> +/*
> +** firstp_inc_b32_32_general_x1:
> +** firstp x([0-9]+), p0, p1\.s
> +** add w0, (w1, w\1|w\1, w1)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b32_32_general_x1, uint32_t,
> + x0 = x1 + svfirstp_b32 (p0, p1));
> +
> +/*
> +** firstp_inc_b32_64_general_x0:
> +** firstp (x[0-9]+), p0, p1\.s
> +** add x0, (x0, \1|\1, x0)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b32_64_general_x0, uint64_t,
> + x0 += svfirstp_b32 (p0, p1));
> +
> +/*
> +** firstp_inc_b32_64_general_x1:
> +** firstp (x[0-9]+), p0, p1\.s
> +** add x0, (x1, \1|\1, x1)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b32_64_general_x1, uint64_t,
> + x0 = x1 + svfirstp_b32 (p0, p1));
> +
> +/*
> +** firstp_dec_b32_32_general_x0:
> +** firstp x([0-9]+), p0, p1\.s
> +** sub w0, w0, w\1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b32_32_general_x0, uint32_t,
> + x0 -= svfirstp_b32 (p0, p1));
> +
> +/*
> +** firstp_dec_b32_32_general_x1:
> +** firstp x([0-9]+), p0, p1\.s
> +** sub w0, w1, w\1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b32_32_general_x1, uint32_t,
> + x0 = x1 - svfirstp_b32 (p0, p1));
> +
> +/*
> +** firstp_dec_b32_64_general_x0:
> +** firstp (x[0-9]+), p0, p1\.s
> +** sub x0, x0, \1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b32_64_general_x0, uint64_t,
> + x0 -= svfirstp_b32 (p0, p1));
> +
> +/*
> +** firstp_dec_b32_64_general_x1:
> +** firstp (x[0-9]+), p0, p1\.s
> +** sub x0, x1, \1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b32_64_general_x1, uint64_t,
> + x0 = x1 - svfirstp_b32 (p0, p1));
> +
> +/*
> +** firstp_inc_b32_u32_general_z0:
> +** firstp x([0-9]+), p0, p1\.s
> +** mov (z[0-9]+\.s), w\1
> +** add z0\.s, (z0\.s, \2|\2, z0\.s)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b32_u32_general_z0, svuint32_t,
> + z0 = svadd_n_u32_x (svptrue_b32 (), z0, svfirstp_b32 (p0, p1)),
> + z0 = svadd_x (svptrue_b32 (), z0, svfirstp_b32 (p0, p1)));
> +
> +/*
> +** firstp_inc_b32_u32_general_z1:
> +** firstp x([0-9]+), p0, p1\.s
> +** mov (z[0-9]+\.s), w\1
> +** add z0\.s, (z1\.s, \2|\2, z1\.s)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b32_u32_general_z1, svuint32_t,
> + z0 = svadd_n_u32_x (svptrue_b32 (), z1, svfirstp_b32 (p0, p1)),
> + z0 = svadd_x (svptrue_b32 (), z1, svfirstp_b32 (p0, p1)));
> +
> +/*
> +** firstp_inc_b32_u32_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.s
> +** mov (z[0-9]+\.s), w\2
> +** add z0\.s, (z0\.s, \3|\3, z0\.s)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b32_u32_ptrue_z0, svuint32_t,
> + z0 = svadd_n_u32_x (svptrue_b32 (), z0, svfirstp_b32 (svptrue_b32 (), p0)),
> + z0 = svadd_x (svptrue_b32 (), z0, svfirstp_b32 (svptrue_b32 (), p0)));
> +
> +/*
> +** firstp_inc_b32_u32_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.s
> +** mov (z[0-9]+\.s), w\2
> +** add z0\.s, (z1\.s, \3|\3, z1\.s)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b32_u32_ptrue_z1, svuint32_t,
> + z0 = svadd_n_u32_x (svptrue_b32 (), z1, svfirstp_b32 (svptrue_b32 (), p0)),
> + z0 = svadd_x (svptrue_b32 (), z1, svfirstp_b32 (svptrue_b32 (), p0)));
> +
> +/*
> +** firstp_dec_b32_u32_general_z0:
> +** firstp x([0-9]+), p0, p1\.s
> +** mov (z[0-9]+\.s), w\1
> +** sub z0\.s, z0\.s, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b32_u32_general_z0, svuint32_t,
> + z0 = svsub_n_u32_x (svptrue_b32 (), z0, svfirstp_b32 (p0, p1)),
> + z0 = svsub_x (svptrue_b32 (), z0, svfirstp_b32 (p0, p1)));
> +
> +/*
> +** firstp_dec_b32_u32_general_z1:
> +** firstp x([0-9]+), p0, p1\.s
> +** mov (z[0-9]+\.s), w\1
> +** sub z0\.s, z1\.s, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b32_u32_general_z1, svuint32_t,
> + z0 = svsub_n_u32_x (svptrue_b32 (), z1, svfirstp_b32 (p0, p1)),
> + z0 = svsub_x (svptrue_b32 (), z1, svfirstp_b32 (p0, p1)));
> +
> +/*
> +** firstp_dec_b32_u32_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.s
> +** mov (z[0-9]+\.s), w\2
> +** sub z0\.s, z0\.s, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b32_u32_ptrue_z0, svuint32_t,
> + z0 = svsub_n_u32_x (svptrue_b32 (), z0, svfirstp_b32 (svptrue_b32 (), p0)),
> + z0 = svsub_x (svptrue_b32 (), z0, svfirstp_b32 (svptrue_b32 (), p0)));
> +
> +/*
> +** firstp_dec_b32_u32_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.s
> +** mov (z[0-9]+\.s), w\2
> +** sub z0\.s, z1\.s, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b32_u32_ptrue_z1, svuint32_t,
> + z0 = svsub_n_u32_x (svptrue_b32 (), z1, svfirstp_b32 (svptrue_b32 (), p0)),
> + z0 = svsub_x (svptrue_b32 (), z1, svfirstp_b32 (svptrue_b32 (), p0)));
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b64.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b64.c
> new file mode 100644
> index 00000000000..330b0b04768
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b64.c
> @@ -0,0 +1,192 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-additional-options "-msve-vector-bits=scalable" } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +#include <stdbool.h>
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** firstp_b64_32:
> +** firstp x0, p0, p1\.d
> +** ret
> +*/
> +TEST_PTEST (firstp_b64_32, uint32_t,
> + x0 = svfirstp_b64 (p0, p1));
> +
> +/*
> +** firstp_b64_64:
> +** firstp x0, p0, p1\.d
> +** ret
> +*/
> +TEST_PTEST (firstp_b64_64, uint64_t,
> + x0 = svfirstp_b64 (p0, p1));
> +
> +/*
> +** firstp_inc_b64_32_general_x0:
> +** firstp x([0-9]+), p0, p1\.d
> +** add w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b64_32_general_x0, uint32_t,
> + x0 += svfirstp_b64 (p0, p1));
> +
> +/*
> +** firstp_inc_b64_32_general_x1:
> +** firstp x([0-9]+), p0, p1\.d
> +** add w0, (w1, w\1|w\1, w1)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b64_32_general_x1, uint32_t,
> + x0 = x1 + svfirstp_b64 (p0, p1));
> +
> +/*
> +** firstp_inc_b64_64_general_x0:
> +** firstp (x[0-9]+), p0, p1\.d
> +** add x0, (x0, \1|\1, x0)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b64_64_general_x0, uint64_t,
> + x0 += svfirstp_b64 (p0, p1));
> +
> +/*
> +** firstp_inc_b64_64_general_x1:
> +** firstp (x[0-9]+), p0, p1\.d
> +** add x0, (x1, \1|\1, x1)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b64_64_general_x1, uint64_t,
> + x0 = x1 + svfirstp_b64 (p0, p1));
> +
> +/*
> +** firstp_dec_b64_32_general_x0:
> +** firstp x([0-9]+), p0, p1\.d
> +** sub w0, w0, w\1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b64_32_general_x0, uint32_t,
> + x0 -= svfirstp_b64 (p0, p1));
> +
> +/*
> +** firstp_dec_b64_32_general_x1:
> +** firstp x([0-9]+), p0, p1\.d
> +** sub w0, w1, w\1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b64_32_general_x1, uint32_t,
> + x0 = x1 - svfirstp_b64 (p0, p1));
> +
> +/*
> +** firstp_dec_b64_64_general_x0:
> +** firstp (x[0-9]+), p0, p1\.d
> +** sub x0, x0, \1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b64_64_general_x0, uint64_t,
> + x0 -= svfirstp_b64 (p0, p1));
> +
> +/*
> +** firstp_dec_b64_64_general_x1:
> +** firstp (x[0-9]+), p0, p1\.d
> +** sub x0, x1, \1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b64_64_general_x1, uint64_t,
> + x0 = x1 - svfirstp_b64 (p0, p1));
> +
> +/*
> +** firstp_inc_b64_u64_general_z0:
> +** firstp (x[0-9]+), p0, p1\.d
> +** mov (z[0-9]+\.d), \1
> +** add z0\.d, (z0\.d, \2|\2, z0\.d)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b64_u64_general_z0, svuint64_t,
> + z0 = svadd_n_u64_x (svptrue_b64 (), z0, svfirstp_b64 (p0, p1)),
> + z0 = svadd_x (svptrue_b64 (), z0, svfirstp_b64 (p0, p1)));
> +
> +/*
> +** firstp_inc_b64_u64_general_z1:
> +** firstp (x[0-9]+), p0, p1\.d
> +** mov (z[0-9]+\.d), \1
> +** add z0\.d, (z1\.d, \2|\2, z1\.d)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b64_u64_general_z1, svuint64_t,
> + z0 = svadd_n_u64_x (svptrue_b64 (), z1, svfirstp_b64 (p0, p1)),
> + z0 = svadd_x (svptrue_b64 (), z1, svfirstp_b64 (p0, p1)));
> +
> +/*
> +** firstp_inc_b64_u64_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** firstp (x[0-9]+), \1, p0\.d
> +** mov (z[0-9]+\.d), \2
> +** add z0\.d, (z0\.d, \3|\3, z0\.d)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b64_u64_ptrue_z0, svuint64_t,
> + z0 = svadd_n_u64_x (svptrue_b64 (), z0, svfirstp_b64 (svptrue_b64 (), p0)),
> + z0 = svadd_x (svptrue_b64 (), z0, svfirstp_b64 (svptrue_b64 (), p0)));
> +
> +/*
> +** firstp_inc_b64_u64_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** firstp (x[0-9]+), \1, p0\.d
> +** mov (z[0-9]+\.d), \2
> +** add z0\.d, (z1\.d, \3|\3, z1\.d)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b64_u64_ptrue_z1, svuint64_t,
> + z0 = svadd_n_u64_x (svptrue_b64 (), z1, svfirstp_b64 (svptrue_b64 (), p0)),
> + z0 = svadd_x (svptrue_b64 (), z1, svfirstp_b64 (svptrue_b64 (), p0)));
> +
> +/*
> +** firstp_dec_b64_u64_general_z0:
> +** firstp (x[0-9]+), p0, p1\.d
> +** mov (z[0-9]+\.d), \1
> +** sub z0\.d, z0\.d, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b64_u64_general_z0, svuint64_t,
> + z0 = svsub_n_u64_x (svptrue_b64 (), z0, svfirstp_b64 (p0, p1)),
> + z0 = svsub_x (svptrue_b64 (), z0, svfirstp_b64 (p0, p1)));
> +
> +/*
> +** firstp_dec_b64_u64_general_z1:
> +** firstp (x[0-9]+), p0, p1\.d
> +** mov (z[0-9]+\.d), \1
> +** sub z0\.d, z1\.d, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b64_u64_general_z1, svuint64_t,
> + z0 = svsub_n_u64_x (svptrue_b64 (), z1, svfirstp_b64 (p0, p1)),
> + z0 = svsub_x (svptrue_b64 (), z1, svfirstp_b64 (p0, p1)));
> +
> +/*
> +** firstp_dec_b64_u64_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** firstp (x[0-9]+), \1, p0\.d
> +** mov (z[0-9]+\.d), \2
> +** sub z0\.d, z0\.d, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b64_u64_ptrue_z0, svuint64_t,
> + z0 = svsub_n_u64_x (svptrue_b64 (), z0, svfirstp_b64 (svptrue_b64 (), p0)),
> + z0 = svsub_x (svptrue_b64 (), z0, svfirstp_b64 (svptrue_b64 (), p0)));
> +
> +/*
> +** firstp_dec_b64_u64_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** firstp (x[0-9]+), \1, p0\.d
> +** mov (z[0-9]+\.d), \2
> +** sub z0\.d, z1\.d, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b64_u64_ptrue_z1, svuint64_t,
> + z0 = svsub_n_u64_x (svptrue_b64 (), z1, svfirstp_b64 (svptrue_b64 (), p0)),
> + z0 = svsub_x (svptrue_b64 (), z1, svfirstp_b64 (svptrue_b64 (), p0)));
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b8.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b8.c
> new file mode 100644
> index 00000000000..653d903577a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b8.c
> @@ -0,0 +1,192 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-additional-options "-msve-vector-bits=scalable" } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +#include <stdbool.h>
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** firstp_b8_32:
> +** firstp x0, p0, p1\.b
> +** ret
> +*/
> +TEST_PTEST (firstp_b8_32, uint32_t,
> + x0 = svfirstp_b8 (p0, p1));
> +
> +/*
> +** firstp_b8_64:
> +** firstp x0, p0, p1\.b
> +** ret
> +*/
> +TEST_PTEST (firstp_b8_64, uint64_t,
> + x0 = svfirstp_b8 (p0, p1));
> +
> +/*
> +** firstp_inc_b8_32_general_x0:
> +** firstp x([0-9]+), p0, p1\.b
> +** add w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b8_32_general_x0, uint32_t,
> + x0 += svfirstp_b8 (p0, p1));
> +
> +/*
> +** firstp_inc_b8_32_general_x1:
> +** firstp x([0-9]+), p0, p1\.b
> +** add w0, (w1, w\1|w\1, w1)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b8_32_general_x1, uint32_t,
> + x0 = x1 + svfirstp_b8 (p0, p1));
> +
> +/*
> +** firstp_inc_b8_64_general_x0:
> +** firstp (x[0-9]+), p0, p1\.b
> +** add x0, (x0, \1|\1, x0)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b8_64_general_x0, uint64_t,
> + x0 += svfirstp_b8 (p0, p1));
> +
> +/*
> +** firstp_inc_b8_64_general_x1:
> +** firstp (x[0-9]+), p0, p1\.b
> +** add x0, (x1, \1|\1, x1)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b8_64_general_x1, uint64_t,
> + x0 = x1 + svfirstp_b8 (p0, p1));
> +
> +/*
> +** firstp_dec_b8_32_general_x0:
> +** firstp x([0-9]+), p0, p1\.b
> +** sub w0, w0, w\1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b8_32_general_x0, uint32_t,
> + x0 -= svfirstp_b8 (p0, p1));
> +
> +/*
> +** firstp_dec_b8_32_general_x1:
> +** firstp x([0-9]+), p0, p1\.b
> +** sub w0, w1, w\1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b8_32_general_x1, uint32_t,
> + x0 = x1 - svfirstp_b8 (p0, p1));
> +
> +/*
> +** firstp_dec_b8_64_general_x0:
> +** firstp (x[0-9]+), p0, p1\.b
> +** sub x0, x0, \1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b8_64_general_x0, uint64_t,
> + x0 -= svfirstp_b8 (p0, p1));
> +
> +/*
> +** firstp_dec_b8_64_general_x1:
> +** firstp (x[0-9]+), p0, p1\.b
> +** sub x0, x1, \1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b8_64_general_x1, uint64_t,
> + x0 = x1 - svfirstp_b8 (p0, p1));
> +
> +/*
> +** firstp_inc_b8_u8_general_z0:
> +** firstp x([0-9]+), p0, p1\.b
> +** mov (z[0-9]+\.b), w\1
> +** add z0\.b, (z0\.b, \2|\2, z0\.b)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b8_u8_general_z0, svuint8_t,
> + z0 = svadd_n_u8_x (svptrue_b8 (), z0, svfirstp_b8 (p0, p1)),
> + z0 = svadd_x (svptrue_b8 (), z0, svfirstp_b8 (p0, p1)));
> +
> +/*
> +** firstp_inc_b8_u8_general_z1:
> +** firstp x([0-9]+), p0, p1\.b
> +** mov (z[0-9]+\.b), w\1
> +** add z0\.b, (z1\.b, \2|\2, z1\.b)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b8_u8_general_z1, svuint8_t,
> + z0 = svadd_n_u8_x (svptrue_b8 (), z1, svfirstp_b8 (p0, p1)),
> + z0 = svadd_x (svptrue_b8 (), z1, svfirstp_b8 (p0, p1)));
> +
> +/*
> +** firstp_inc_b8_u8_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.b
> +** mov (z[0-9]+\.b), w\2
> +** add z0\.b, (z0\.b, \3|\3, z0\.b)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b8_u8_ptrue_z0, svuint8_t,
> + z0 = svadd_n_u8_x (svptrue_b8 (), z0, svfirstp_b8 (svptrue_b8 (), p0)),
> + z0 = svadd_x (svptrue_b8 (), z0, svfirstp_b8 (svptrue_b8 (), p0)));
> +
> +/*
> +** firstp_inc_b8_u8_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.b
> +** mov (z[0-9]+\.b), w\2
> +** add z0\.b, (z1\.b, \3|\3, z1\.b)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b8_u8_ptrue_z1, svuint8_t,
> + z0 = svadd_n_u8_x (svptrue_b8 (), z1, svfirstp_b8 (svptrue_b8 (), p0)),
> + z0 = svadd_x (svptrue_b8 (), z1, svfirstp_b8 (svptrue_b8 (), p0)));
> +
> +/*
> +** firstp_dec_b8_u8_general_z0:
> +** firstp x([0-9]+), p0, p1\.b
> +** mov (z[0-9]+\.b), w\1
> +** sub z0\.b, z0\.b, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b8_u8_general_z0, svuint8_t,
> + z0 = svsub_n_u8_x (svptrue_b8 (), z0, svfirstp_b8 (p0, p1)),
> + z0 = svsub_x (svptrue_b8 (), z0, svfirstp_b8 (p0, p1)));
> +
> +/*
> +** firstp_dec_b8_u8_general_z1:
> +** firstp x([0-9]+), p0, p1\.b
> +** mov (z[0-9]+\.b), w\1
> +** sub z0\.b, z1\.b, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b8_u8_general_z1, svuint8_t,
> + z0 = svsub_n_u8_x (svptrue_b8 (), z1, svfirstp_b8 (p0, p1)),
> + z0 = svsub_x (svptrue_b8 (), z1, svfirstp_b8 (p0, p1)));
> +
> +/*
> +** firstp_dec_b8_u8_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.b
> +** mov (z[0-9]+\.b), w\2
> +** sub z0\.b, z0\.b, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b8_u8_ptrue_z0, svuint8_t,
> + z0 = svsub_n_u8_x (svptrue_b8 (), z0, svfirstp_b8 (svptrue_b8 (), p0)),
> + z0 = svsub_x (svptrue_b8 (), z0, svfirstp_b8 (svptrue_b8 (), p0)));
> +
> +/*
> +** firstp_dec_b8_u8_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.b
> +** mov (z[0-9]+\.b), w\2
> +** sub z0\.b, z1\.b, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b8_u8_ptrue_z1, svuint8_t,
> + z0 = svsub_n_u8_x (svptrue_b8 (), z1, svfirstp_b8 (svptrue_b8 (), p0)),
> + z0 = svsub_x (svptrue_b8 (), z1, svfirstp_b8 (svptrue_b8 (), p0)));
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b16.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b16.c
> new file mode 100644
> index 00000000000..e70df211cf9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b16.c
> @@ -0,0 +1,192 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-additional-options "-msve-vector-bits=scalable" } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +#include <stdbool.h>
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** lastp_b16_32:
> +** lastp x0, p0, p1\.h
> +** ret
> +*/
> +TEST_PTEST (lastp_b16_32, uint32_t,
> + x0 = svlastp_b16 (p0, p1));
> +
> +/*
> +** lastp_b16_64:
> +** lastp x0, p0, p1\.h
> +** ret
> +*/
> +TEST_PTEST (lastp_b16_64, uint64_t,
> + x0 = svlastp_b16 (p0, p1));
> +
> +/*
> +** lastp_inc_b16_32_general_x0:
> +** lastp x([0-9]+), p0, p1\.h
> +** add w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b16_32_general_x0, uint32_t,
> + x0 += svlastp_b16 (p0, p1));
> +
> +/*
> +** lastp_inc_b16_32_general_x1:
> +** lastp x([0-9]+), p0, p1\.h
> +** add w0, (w1, w\1|w\1, w1)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b16_32_general_x1, uint32_t,
> + x0 = x1 + svlastp_b16 (p0, p1));
> +
> +/*
> +** lastp_inc_b16_64_general_x0:
> +** lastp (x[0-9]+), p0, p1\.h
> +** add x0, (x0, \1|\1, x0)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b16_64_general_x0, uint64_t,
> + x0 += svlastp_b16 (p0, p1));
> +
> +/*
> +** lastp_inc_b16_64_general_x1:
> +** lastp (x[0-9]+), p0, p1\.h
> +** add x0, (x1, \1|\1, x1)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b16_64_general_x1, uint64_t,
> + x0 = x1 + svlastp_b16 (p0, p1));
> +
> +/*
> +** lastp_dec_b16_32_general_x0:
> +** lastp x([0-9]+), p0, p1\.h
> +** sub w0, w0, w\1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b16_32_general_x0, uint32_t,
> + x0 -= svlastp_b16 (p0, p1));
> +
> +/*
> +** lastp_dec_b16_32_general_x1:
> +** lastp x([0-9]+), p0, p1\.h
> +** sub w0, w1, w\1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b16_32_general_x1, uint32_t,
> + x0 = x1 - svlastp_b16 (p0, p1));
> +
> +/*
> +** lastp_dec_b16_64_general_x0:
> +** lastp (x[0-9]+), p0, p1\.h
> +** sub x0, x0, \1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b16_64_general_x0, uint64_t,
> + x0 -= svlastp_b16 (p0, p1));
> +
> +/*
> +** lastp_dec_b16_64_general_x1:
> +** lastp (x[0-9]+), p0, p1\.h
> +** sub x0, x1, \1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b16_64_general_x1, uint64_t,
> + x0 = x1 - svlastp_b16 (p0, p1));
> +
> +/*
> +** lastp_inc_b16_u16_general_z0:
> +** lastp x([0-9]+), p0, p1\.h
> +** mov (z[0-9]+\.h), w\1
> +** add z0\.h, (z0\.h, \2|\2, z0\.h)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b16_u16_general_z0, svuint16_t,
> + z0 = svadd_n_u16_x (svptrue_b16 (), z0, svlastp_b16 (p0, p1)),
> + z0 = svadd_x (svptrue_b16 (), z0, svlastp_b16 (p0, p1)));
> +
> +/*
> +** lastp_inc_b16_u16_general_z1:
> +** lastp x([0-9]+), p0, p1\.h
> +** mov (z[0-9]+\.h), w\1
> +** add z0\.h, (z1\.h, \2|\2, z1\.h)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b16_u16_general_z1, svuint16_t,
> + z0 = svadd_n_u16_x (svptrue_b16 (), z1, svlastp_b16 (p0, p1)),
> + z0 = svadd_x (svptrue_b16 (), z1, svlastp_b16 (p0, p1)));
> +
> +/*
> +** lastp_inc_b16_u16_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.h
> +** mov (z[0-9]+\.h), w\2
> +** add z0\.h, (z0\.h, \3|\3, z0\.h)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b16_u16_ptrue_z0, svuint16_t,
> + z0 = svadd_n_u16_x (svptrue_b16 (), z0, svlastp_b16 (svptrue_b16 (), p0)),
> + z0 = svadd_x (svptrue_b16 (), z0, svlastp_b16 (svptrue_b16 (), p0)));
> +
> +/*
> +** lastp_inc_b16_u16_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.h
> +** mov (z[0-9]+\.h), w\2
> +** add z0\.h, (z1\.h, \3|\3, z1\.h)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b16_u16_ptrue_z1, svuint16_t,
> + z0 = svadd_n_u16_x (svptrue_b16 (), z1, svlastp_b16 (svptrue_b16 (), p0)),
> + z0 = svadd_x (svptrue_b16 (), z1, svlastp_b16 (svptrue_b16 (), p0)));
> +
> +/*
> +** lastp_dec_b16_u16_general_z0:
> +** lastp x([0-9]+), p0, p1\.h
> +** mov (z[0-9]+\.h), w\1
> +** sub z0\.h, z0\.h, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b16_u16_general_z0, svuint16_t,
> + z0 = svsub_n_u16_x (svptrue_b16 (), z0, svlastp_b16 (p0, p1)),
> + z0 = svsub_x (svptrue_b16 (), z0, svlastp_b16 (p0, p1)));
> +
> +/*
> +** lastp_dec_b16_u16_general_z1:
> +** lastp x([0-9]+), p0, p1\.h
> +** mov (z[0-9]+\.h), w\1
> +** sub z0\.h, z1\.h, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b16_u16_general_z1, svuint16_t,
> + z0 = svsub_n_u16_x (svptrue_b16 (), z1, svlastp_b16 (p0, p1)),
> + z0 = svsub_x (svptrue_b16 (), z1, svlastp_b16 (p0, p1)));
> +
> +/*
> +** lastp_dec_b16_u16_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.h
> +** mov (z[0-9]+\.h), w\2
> +** sub z0\.h, z0\.h, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b16_u16_ptrue_z0, svuint16_t,
> + z0 = svsub_n_u16_x (svptrue_b16 (), z0, svlastp_b16 (svptrue_b16 (), p0)),
> + z0 = svsub_x (svptrue_b16 (), z0, svlastp_b16 (svptrue_b16 (), p0)));
> +
> +/*
> +** lastp_dec_b16_u16_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.h
> +** mov (z[0-9]+\.h), w\2
> +** sub z0\.h, z1\.h, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b16_u16_ptrue_z1, svuint16_t,
> + z0 = svsub_n_u16_x (svptrue_b16 (), z1, svlastp_b16 (svptrue_b16 (), p0)),
> + z0 = svsub_x (svptrue_b16 (), z1, svlastp_b16 (svptrue_b16 (), p0)));
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b32.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b32.c
> new file mode 100644
> index 00000000000..b5b64407f7b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b32.c
> @@ -0,0 +1,192 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-additional-options "-msve-vector-bits=scalable" } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +#include <stdbool.h>
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** lastp_b32_32:
> +** lastp x0, p0, p1\.s
> +** ret
> +*/
> +TEST_PTEST (lastp_b32_32, uint32_t,
> + x0 = svlastp_b32 (p0, p1));
> +
> +/*
> +** lastp_b32_64:
> +** lastp x0, p0, p1\.s
> +** ret
> +*/
> +TEST_PTEST (lastp_b32_64, uint64_t,
> + x0 = svlastp_b32 (p0, p1));
> +
> +/*
> +** lastp_inc_b32_32_general_x0:
> +** lastp x([0-9]+), p0, p1\.s
> +** add w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b32_32_general_x0, uint32_t,
> + x0 += svlastp_b32 (p0, p1));
> +
> +/*
> +** lastp_inc_b32_32_general_x1:
> +** lastp x([0-9]+), p0, p1\.s
> +** add w0, (w1, w\1|w\1, w1)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b32_32_general_x1, uint32_t,
> + x0 = x1 + svlastp_b32 (p0, p1));
> +
> +/*
> +** lastp_inc_b32_64_general_x0:
> +** lastp (x[0-9]+), p0, p1\.s
> +** add x0, (x0, \1|\1, x0)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b32_64_general_x0, uint64_t,
> + x0 += svlastp_b32 (p0, p1));
> +
> +/*
> +** lastp_inc_b32_64_general_x1:
> +** lastp (x[0-9]+), p0, p1\.s
> +** add x0, (x1, \1|\1, x1)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b32_64_general_x1, uint64_t,
> + x0 = x1 + svlastp_b32 (p0, p1));
> +
> +/*
> +** lastp_dec_b32_32_general_x0:
> +** lastp x([0-9]+), p0, p1\.s
> +** sub w0, w0, w\1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b32_32_general_x0, uint32_t,
> + x0 -= svlastp_b32 (p0, p1));
> +
> +/*
> +** lastp_dec_b32_32_general_x1:
> +** lastp x([0-9]+), p0, p1\.s
> +** sub w0, w1, w\1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b32_32_general_x1, uint32_t,
> + x0 = x1 - svlastp_b32 (p0, p1));
> +
> +/*
> +** lastp_dec_b32_64_general_x0:
> +** lastp (x[0-9]+), p0, p1\.s
> +** sub x0, x0, \1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b32_64_general_x0, uint64_t,
> + x0 -= svlastp_b32 (p0, p1));
> +
> +/*
> +** lastp_dec_b32_64_general_x1:
> +** lastp (x[0-9]+), p0, p1\.s
> +** sub x0, x1, \1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b32_64_general_x1, uint64_t,
> + x0 = x1 - svlastp_b32 (p0, p1));
> +
> +/*
> +** lastp_inc_b32_u32_general_z0:
> +** lastp x([0-9]+), p0, p1\.s
> +** mov (z[0-9]+\.s), w\1
> +** add z0\.s, (z0\.s, \2|\2, z0\.s)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b32_u32_general_z0, svuint32_t,
> + z0 = svadd_n_u32_x (svptrue_b32 (), z0, svlastp_b32 (p0, p1)),
> + z0 = svadd_x (svptrue_b32 (), z0, svlastp_b32 (p0, p1)));
> +
> +/*
> +** lastp_inc_b32_u32_general_z1:
> +** lastp x([0-9]+), p0, p1\.s
> +** mov (z[0-9]+\.s), w\1
> +** add z0\.s, (z1\.s, \2|\2, z1\.s)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b32_u32_general_z1, svuint32_t,
> + z0 = svadd_n_u32_x (svptrue_b32 (), z1, svlastp_b32 (p0, p1)),
> + z0 = svadd_x (svptrue_b32 (), z1, svlastp_b32 (p0, p1)));
> +
> +/*
> +** lastp_inc_b32_u32_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.s
> +** mov (z[0-9]+\.s), w\2
> +** add z0\.s, (z0\.s, \3|\3, z0\.s)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b32_u32_ptrue_z0, svuint32_t,
> + z0 = svadd_n_u32_x (svptrue_b32 (), z0, svlastp_b32 (svptrue_b32 (), p0)),
> + z0 = svadd_x (svptrue_b32 (), z0, svlastp_b32 (svptrue_b32 (), p0)));
> +
> +/*
> +** lastp_inc_b32_u32_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.s
> +** mov (z[0-9]+\.s), w\2
> +** add z0\.s, (z1\.s, \3|\3, z1\.s)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b32_u32_ptrue_z1, svuint32_t,
> + z0 = svadd_n_u32_x (svptrue_b32 (), z1, svlastp_b32 (svptrue_b32 (), p0)),
> + z0 = svadd_x (svptrue_b32 (), z1, svlastp_b32 (svptrue_b32 (), p0)));
> +
> +/*
> +** lastp_dec_b32_u32_general_z0:
> +** lastp x([0-9]+), p0, p1\.s
> +** mov (z[0-9]+\.s), w\1
> +** sub z0\.s, z0\.s, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b32_u32_general_z0, svuint32_t,
> + z0 = svsub_n_u32_x (svptrue_b32 (), z0, svlastp_b32 (p0, p1)),
> + z0 = svsub_x (svptrue_b32 (), z0, svlastp_b32 (p0, p1)));
> +
> +/*
> +** lastp_dec_b32_u32_general_z1:
> +** lastp x([0-9]+), p0, p1\.s
> +** mov (z[0-9]+\.s), w\1
> +** sub z0\.s, z1\.s, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b32_u32_general_z1, svuint32_t,
> + z0 = svsub_n_u32_x (svptrue_b32 (), z1, svlastp_b32 (p0, p1)),
> + z0 = svsub_x (svptrue_b32 (), z1, svlastp_b32 (p0, p1)));
> +
> +/*
> +** lastp_dec_b32_u32_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.s
> +** mov (z[0-9]+\.s), w\2
> +** sub z0\.s, z0\.s, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b32_u32_ptrue_z0, svuint32_t,
> + z0 = svsub_n_u32_x (svptrue_b32 (), z0, svlastp_b32 (svptrue_b32 (), p0)),
> + z0 = svsub_x (svptrue_b32 (), z0, svlastp_b32 (svptrue_b32 (), p0)));
> +
> +/*
> +** lastp_dec_b32_u32_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.s
> +** mov (z[0-9]+\.s), w\2
> +** sub z0\.s, z1\.s, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b32_u32_ptrue_z1, svuint32_t,
> + z0 = svsub_n_u32_x (svptrue_b32 (), z1, svlastp_b32 (svptrue_b32 (), p0)),
> + z0 = svsub_x (svptrue_b32 (), z1, svlastp_b32 (svptrue_b32 (), p0)));
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b64.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b64.c
> new file mode 100644
> index 00000000000..343be3da9f8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b64.c
> @@ -0,0 +1,192 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-additional-options "-msve-vector-bits=scalable" } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +#include <stdbool.h>
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** lastp_b64_32:
> +** lastp x0, p0, p1\.d
> +** ret
> +*/
> +TEST_PTEST (lastp_b64_32, uint32_t,
> + x0 = svlastp_b64 (p0, p1));
> +
> +/*
> +** lastp_b64_64:
> +** lastp x0, p0, p1\.d
> +** ret
> +*/
> +TEST_PTEST (lastp_b64_64, uint64_t,
> + x0 = svlastp_b64 (p0, p1));
> +
> +/*
> +** lastp_inc_b64_32_general_x0:
> +** lastp x([0-9]+), p0, p1\.d
> +** add w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b64_32_general_x0, uint32_t,
> + x0 += svlastp_b64 (p0, p1));
> +
> +/*
> +** lastp_inc_b64_32_general_x1:
> +** lastp x([0-9]+), p0, p1\.d
> +** add w0, (w1, w\1|w\1, w1)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b64_32_general_x1, uint32_t,
> + x0 = x1 + svlastp_b64 (p0, p1));
> +
> +/*
> +** lastp_inc_b64_64_general_x0:
> +** lastp (x[0-9]+), p0, p1\.d
> +** add x0, (x0, \1|\1, x0)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b64_64_general_x0, uint64_t,
> + x0 += svlastp_b64 (p0, p1));
> +
> +/*
> +** lastp_inc_b64_64_general_x1:
> +** lastp (x[0-9]+), p0, p1\.d
> +** add x0, (x1, \1|\1, x1)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b64_64_general_x1, uint64_t,
> + x0 = x1 + svlastp_b64 (p0, p1));
> +
> +/*
> +** lastp_dec_b64_32_general_x0:
> +** lastp x([0-9]+), p0, p1\.d
> +** sub w0, w0, w\1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b64_32_general_x0, uint32_t,
> + x0 -= svlastp_b64 (p0, p1));
> +
> +/*
> +** lastp_dec_b64_32_general_x1:
> +** lastp x([0-9]+), p0, p1\.d
> +** sub w0, w1, w\1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b64_32_general_x1, uint32_t,
> + x0 = x1 - svlastp_b64 (p0, p1));
> +
> +/*
> +** lastp_dec_b64_64_general_x0:
> +** lastp (x[0-9]+), p0, p1\.d
> +** sub x0, x0, \1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b64_64_general_x0, uint64_t,
> + x0 -= svlastp_b64 (p0, p1));
> +
> +/*
> +** lastp_dec_b64_64_general_x1:
> +** lastp (x[0-9]+), p0, p1\.d
> +** sub x0, x1, \1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b64_64_general_x1, uint64_t,
> + x0 = x1 - svlastp_b64 (p0, p1));
> +
> +/*
> +** lastp_inc_b64_u64_general_z0:
> +** lastp (x[0-9]+), p0, p1\.d
> +** mov (z[0-9]+\.d), \1
> +** add z0\.d, (z0\.d, \2|\2, z0\.d)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b64_u64_general_z0, svuint64_t,
> + z0 = svadd_n_u64_x (svptrue_b64 (), z0, svlastp_b64 (p0, p1)),
> + z0 = svadd_x (svptrue_b64 (), z0, svlastp_b64 (p0, p1)));
> +
> +/*
> +** lastp_inc_b64_u64_general_z1:
> +** lastp (x[0-9]+), p0, p1\.d
> +** mov (z[0-9]+\.d), \1
> +** add z0\.d, (z1\.d, \2|\2, z1\.d)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b64_u64_general_z1, svuint64_t,
> + z0 = svadd_n_u64_x (svptrue_b64 (), z1, svlastp_b64 (p0, p1)),
> + z0 = svadd_x (svptrue_b64 (), z1, svlastp_b64 (p0, p1)));
> +
> +/*
> +** lastp_inc_b64_u64_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** lastp (x[0-9]+), \1, p0\.d
> +** mov (z[0-9]+\.d), \2
> +** add z0\.d, (z0\.d, \3|\3, z0\.d)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b64_u64_ptrue_z0, svuint64_t,
> + z0 = svadd_n_u64_x (svptrue_b64 (), z0, svlastp_b64 (svptrue_b64 (), p0)),
> + z0 = svadd_x (svptrue_b64 (), z0, svlastp_b64 (svptrue_b64 (), p0)));
> +
> +/*
> +** lastp_inc_b64_u64_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** lastp (x[0-9]+), \1, p0\.d
> +** mov (z[0-9]+\.d), \2
> +** add z0\.d, (z1\.d, \3|\3, z1\.d)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b64_u64_ptrue_z1, svuint64_t,
> + z0 = svadd_n_u64_x (svptrue_b64 (), z1, svlastp_b64 (svptrue_b64 (), p0)),
> + z0 = svadd_x (svptrue_b64 (), z1, svlastp_b64 (svptrue_b64 (), p0)));
> +
> +/*
> +** lastp_dec_b64_u64_general_z0:
> +** lastp (x[0-9]+), p0, p1\.d
> +** mov (z[0-9]+\.d), \1
> +** sub z0\.d, z0\.d, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b64_u64_general_z0, svuint64_t,
> + z0 = svsub_n_u64_x (svptrue_b64 (), z0, svlastp_b64 (p0, p1)),
> + z0 = svsub_x (svptrue_b64 (), z0, svlastp_b64 (p0, p1)));
> +
> +/*
> +** lastp_dec_b64_u64_general_z1:
> +** lastp (x[0-9]+), p0, p1\.d
> +** mov (z[0-9]+\.d), \1
> +** sub z0\.d, z1\.d, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b64_u64_general_z1, svuint64_t,
> + z0 = svsub_n_u64_x (svptrue_b64 (), z1, svlastp_b64 (p0, p1)),
> + z0 = svsub_x (svptrue_b64 (), z1, svlastp_b64 (p0, p1)));
> +
> +/*
> +** lastp_dec_b64_u64_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** lastp (x[0-9]+), \1, p0\.d
> +** mov (z[0-9]+\.d), \2
> +** sub z0\.d, z0\.d, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b64_u64_ptrue_z0, svuint64_t,
> + z0 = svsub_n_u64_x (svptrue_b64 (), z0, svlastp_b64 (svptrue_b64 (), p0)),
> + z0 = svsub_x (svptrue_b64 (), z0, svlastp_b64 (svptrue_b64 (), p0)));
> +
> +/*
> +** lastp_dec_b64_u64_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** lastp (x[0-9]+), \1, p0\.d
> +** mov (z[0-9]+\.d), \2
> +** sub z0\.d, z1\.d, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b64_u64_ptrue_z1, svuint64_t,
> + z0 = svsub_n_u64_x (svptrue_b64 (), z1, svlastp_b64 (svptrue_b64 (), p0)),
> + z0 = svsub_x (svptrue_b64 (), z1, svlastp_b64 (svptrue_b64 (), p0)));
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b8.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b8.c
> new file mode 100644
> index 00000000000..5fa0f26f5b9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b8.c
> @@ -0,0 +1,192 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-additional-options "-msve-vector-bits=scalable" } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +#include <stdbool.h>
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** lastp_b8_32:
> +** lastp x0, p0, p1\.b
> +** ret
> +*/
> +TEST_PTEST (lastp_b8_32, uint32_t,
> + x0 = svlastp_b8 (p0, p1));
> +
> +/*
> +** lastp_b8_64:
> +** lastp x0, p0, p1\.b
> +** ret
> +*/
> +TEST_PTEST (lastp_b8_64, uint64_t,
> + x0 = svlastp_b8 (p0, p1));
> +
> +/*
> +** lastp_inc_b8_32_general_x0:
> +** lastp x([0-9]+), p0, p1\.b
> +** add w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b8_32_general_x0, uint32_t,
> + x0 += svlastp_b8 (p0, p1));
> +
> +/*
> +** lastp_inc_b8_32_general_x1:
> +** lastp x([0-9]+), p0, p1\.b
> +** add w0, (w1, w\1|w\1, w1)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b8_32_general_x1, uint32_t,
> + x0 = x1 + svlastp_b8 (p0, p1));
> +
> +/*
> +** lastp_inc_b8_64_general_x0:
> +** lastp (x[0-9]+), p0, p1\.b
> +** add x0, (x0, \1|\1, x0)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b8_64_general_x0, uint64_t,
> + x0 += svlastp_b8 (p0, p1));
> +
> +/*
> +** lastp_inc_b8_64_general_x1:
> +** lastp (x[0-9]+), p0, p1\.b
> +** add x0, (x1, \1|\1, x1)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b8_64_general_x1, uint64_t,
> + x0 = x1 + svlastp_b8 (p0, p1));
> +
> +/*
> +** lastp_dec_b8_32_general_x0:
> +** lastp x([0-9]+), p0, p1\.b
> +** sub w0, w0, w\1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b8_32_general_x0, uint32_t,
> + x0 -= svlastp_b8 (p0, p1));
> +
> +/*
> +** lastp_dec_b8_32_general_x1:
> +** lastp x([0-9]+), p0, p1\.b
> +** sub w0, w1, w\1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b8_32_general_x1, uint32_t,
> + x0 = x1 - svlastp_b8 (p0, p1));
> +
> +/*
> +** lastp_dec_b8_64_general_x0:
> +** lastp (x[0-9]+), p0, p1\.b
> +** sub x0, x0, \1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b8_64_general_x0, uint64_t,
> + x0 -= svlastp_b8 (p0, p1));
> +
> +/*
> +** lastp_dec_b8_64_general_x1:
> +** lastp (x[0-9]+), p0, p1\.b
> +** sub x0, x1, \1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b8_64_general_x1, uint64_t,
> + x0 = x1 - svlastp_b8 (p0, p1));
> +
> +/*
> +** lastp_inc_b8_u8_general_z0:
> +** lastp x([0-9]+), p0, p1\.b
> +** mov (z[0-9]+\.b), w\1
> +** add z0\.b, (z0\.b, \2|\2, z0\.b)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b8_u8_general_z0, svuint8_t,
> + z0 = svadd_n_u8_x (svptrue_b8 (), z0, svlastp_b8 (p0, p1)),
> + z0 = svadd_x (svptrue_b8 (), z0, svlastp_b8 (p0, p1)));
> +
> +/*
> +** lastp_inc_b8_u8_general_z1:
> +** lastp x([0-9]+), p0, p1\.b
> +** mov (z[0-9]+\.b), w\1
> +** add z0\.b, (z1\.b, \2|\2, z1\.b)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b8_u8_general_z1, svuint8_t,
> + z0 = svadd_n_u8_x (svptrue_b8 (), z1, svlastp_b8 (p0, p1)),
> + z0 = svadd_x (svptrue_b8 (), z1, svlastp_b8 (p0, p1)));
> +
> +/*
> +** lastp_inc_b8_u8_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.b
> +** mov (z[0-9]+\.b), w\2
> +** add z0\.b, (z0\.b, \3|\3, z0\.b)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b8_u8_ptrue_z0, svuint8_t,
> + z0 = svadd_n_u8_x (svptrue_b8 (), z0, svlastp_b8 (svptrue_b8 (), p0)),
> + z0 = svadd_x (svptrue_b8 (), z0, svlastp_b8 (svptrue_b8 (), p0)));
> +
> +/*
> +** lastp_inc_b8_u8_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.b
> +** mov (z[0-9]+\.b), w\2
> +** add z0\.b, (z1\.b, \3|\3, z1\.b)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b8_u8_ptrue_z1, svuint8_t,
> + z0 = svadd_n_u8_x (svptrue_b8 (), z1, svlastp_b8 (svptrue_b8 (), p0)),
> + z0 = svadd_x (svptrue_b8 (), z1, svlastp_b8 (svptrue_b8 (), p0)));
> +
> +/*
> +** lastp_dec_b8_u8_general_z0:
> +** lastp x([0-9]+), p0, p1\.b
> +** mov (z[0-9]+\.b), w\1
> +** sub z0\.b, z0\.b, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b8_u8_general_z0, svuint8_t,
> + z0 = svsub_n_u8_x (svptrue_b8 (), z0, svlastp_b8 (p0, p1)),
> + z0 = svsub_x (svptrue_b8 (), z0, svlastp_b8 (p0, p1)));
> +
> +/*
> +** lastp_dec_b8_u8_general_z1:
> +** lastp x([0-9]+), p0, p1\.b
> +** mov (z[0-9]+\.b), w\1
> +** sub z0\.b, z1\.b, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b8_u8_general_z1, svuint8_t,
> + z0 = svsub_n_u8_x (svptrue_b8 (), z1, svlastp_b8 (p0, p1)),
> + z0 = svsub_x (svptrue_b8 (), z1, svlastp_b8 (p0, p1)));
> +
> +/*
> +** lastp_dec_b8_u8_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.b
> +** mov (z[0-9]+\.b), w\2
> +** sub z0\.b, z0\.b, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b8_u8_ptrue_z0, svuint8_t,
> + z0 = svsub_n_u8_x (svptrue_b8 (), z0, svlastp_b8 (svptrue_b8 (), p0)),
> + z0 = svsub_x (svptrue_b8 (), z0, svlastp_b8 (svptrue_b8 (), p0)));
> +
> +/*
> +** lastp_dec_b8_u8_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.b
> +** mov (z[0-9]+\.b), w\2
> +** sub z0\.b, z1\.b, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b8_u8_ptrue_z1, svuint8_t,
> + z0 = svsub_n_u8_x (svptrue_b8 (), z1, svlastp_b8 (svptrue_b8 (), p0)),
> + z0 = svsub_x (svptrue_b8 (), z1, svlastp_b8 (svptrue_b8 (), p0)));
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/firstp.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/firstp.c
> new file mode 100644
> index 00000000000..c61a308bc89
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/firstp.c
> @@ -0,0 +1,212 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include <arm_sve.h>
> +
> +#pragma GCC target "+sve2p2"
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +/*
> +** test1:
> +** mov x0, 0
> +** ret
> +*/
> +uint64_t
> +test1 ()
> +{
> + return svfirstp_b8 (svptrue_b8 (),
> + svptrue_b8 ());
> +}
> +
> +/*
> +** test2:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test2 ()
> +{
> + return svfirstp_b8 (svpfalse_b (),
> + svptrue_b8 ());
> +}
> +
> +/*
> +** test3:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test3 ()
> +{
> + return svfirstp_b8 (svptrue_b8 (),
> + svpfalse_b ());
> +}
> +
> +/*
> +** test4:
> +** mov x0, 15
> +** ret
> +*/
> +uint64_t
> +test4 ()
> +{
> + return svfirstp_b8 (svdupq_n_b8 (false, false, false, false,
> + false, false, false, false,
> + false, false, false, false,
> + false, false, false, true),
> + svptrue_b8 ());
> +}
> +
> +/*
> +** test5:
> +** mov x0, 0
> +** ret
> +*/
> +uint64_t
> +test5 ()
> +{
> + return svfirstp_b16 (svptrue_b16 (),
> + svptrue_b16 ());
> +}
> +
> +/*
> +** test6:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test6 ()
> +{
> + return svfirstp_b16 (svpfalse_b (),
> + svptrue_b16 ());
> +}
> +
> +/*
> +** test7:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test7 ()
> +{
> + return svfirstp_b16 (svptrue_b16 (),
> + svpfalse_b ());
> +}
> +
> +/*
> +** test8:
> +** mov x0, 7
> +** ret
> +*/
> +uint64_t
> +test8 ()
> +{
> + return svfirstp_b16 (svdupq_n_b16 (false, false, false, false,
> + false, false, false, true),
> + svptrue_b16 ());
> +}
> +
> +/*
> +** test9:
> +** mov x0, 0
> +** ret
> +*/
> +uint64_t
> +test9 ()
> +{
> + return svfirstp_b32 (svptrue_b32 (),
> + svptrue_b32 ());
> +}
> +
> +/*
> +** test10:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test10 ()
> +{
> + return svfirstp_b32 (svpfalse_b (),
> + svptrue_b32 ());
> +}
> +
> +/*
> +** test11:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test11 ()
> +{
> + return svfirstp_b32 (svptrue_b32 (),
> + svpfalse_b ());
> +}
> +
> +/*
> +** test12:
> +** mov x0, 3
> +** ret
> +*/
> +uint64_t
> +test12 ()
> +{
> + return svfirstp_b32 (svdupq_n_b32 (false, false, false, true),
> + svptrue_b32 ());
> +}
> +
> +/*
> +** test13:
> +** mov x0, 0
> +** ret
> +*/
> +uint64_t
> +test13 ()
> +{
> + return svfirstp_b64 (svptrue_b64 (),
> + svptrue_b64 ());
> +}
> +
> +/*
> +** test14:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test14 ()
> +{
> + return svfirstp_b64 (svpfalse_b (),
> + svptrue_b64 ());
> +}
> +
> +/*
> +** test15:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test15 ()
> +{
> + return svfirstp_b64 (svptrue_b64 (),
> + svpfalse_b ());
> +}
> +
> +/*
> +** test16:
> +** mov x0, 1
> +** ret
> +*/
> +uint64_t
> +test16 ()
> +{
> + return svfirstp_b64 (svdupq_n_b64 (false, true),
> + svptrue_b64 ());
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/lastp.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/lastp.c
> new file mode 100644
> index 00000000000..2dbb65d798d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/lastp.c
> @@ -0,0 +1,212 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-options "-O2 -msve-vector-bits=256" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include <arm_sve.h>
> +
> +#pragma GCC target "+sve2p2"
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +/*
> +** test1:
> +** mov x0, 31
> +** ret
> +*/
> +uint64_t
> +test1 ()
> +{
> + return svlastp_b8 (svptrue_b8 (),
> + svptrue_b8 ());
> +}
> +
> +/*
> +** test2:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test2 ()
> +{
> + return svlastp_b8 (svpfalse_b (),
> + svptrue_b8 ());
> +}
> +
> +/*
> +** test3:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test3 ()
> +{
> + return svlastp_b8 (svptrue_b8 (),
> + svpfalse_b ());
> +}
> +
> +/*
> +** test4:
> +** mov x0, 31
> +** ret
> +*/
> +uint64_t
> +test4 ()
> +{
> + return svlastp_b8 (svdupq_n_b8 (false, false, false, false,
> + false, false, false, false,
> + false, false, false, false,
> + false, false, false, true),
> + svptrue_b8 ());
> +}
> +
> +/*
> +** test5:
> +** mov x0, 15
> +** ret
> +*/
> +uint64_t
> +test5 ()
> +{
> + return svlastp_b16 (svptrue_b16 (),
> + svptrue_b16 ());
> +}
> +
> +/*
> +** test6:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test6 ()
> +{
> + return svlastp_b16 (svpfalse_b (),
> + svptrue_b16 ());
> +}
> +
> +/*
> +** test7:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test7 ()
> +{
> + return svlastp_b16 (svptrue_b16 (),
> + svpfalse_b ());
> +}
> +
> +/*
> +** test8:
> +** mov x0, 15
> +** ret
> +*/
> +uint64_t
> +test8 ()
> +{
> + return svlastp_b16 (svdupq_n_b16 (false, false, false, false,
> + false, false, false, true),
> + svptrue_b16 ());
> +}
> +
> +/*
> +** test9:
> +** mov x0, 7
> +** ret
> +*/
> +uint64_t
> +test9 ()
> +{
> + return svlastp_b32 (svptrue_b32 (),
> + svptrue_b32 ());
> +}
> +
> +/*
> +** test10:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test10 ()
> +{
> + return svlastp_b32 (svpfalse_b (),
> + svptrue_b32 ());
> +}
> +
> +/*
> +** test11:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test11 ()
> +{
> + return svlastp_b32 (svptrue_b32 (),
> + svpfalse_b ());
> +}
> +
> +/*
> +** test12:
> +** mov x0, 7
> +** ret
> +*/
> +uint64_t
> +test12 ()
> +{
> + return svlastp_b32 (svdupq_n_b32 (false, false, false, true),
> + svptrue_b32 ());
> +}
> +
> +/*
> +** test13:
> +** mov x0, 3
> +** ret
> +*/
> +uint64_t
> +test13 ()
> +{
> + return svlastp_b64 (svptrue_b64 (),
> + svptrue_b64 ());
> +}
> +
> +/*
> +** test14:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test14 ()
> +{
> + return svlastp_b64 (svpfalse_b (),
> + svptrue_b64 ());
> +}
> +
> +/*
> +** test15:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test15 ()
> +{
> + return svlastp_b64 (svptrue_b64 (),
> + svpfalse_b ());
> +}
> +
> +/*
> +** test16:
> +** mov x0, 3
> +** ret
> +*/
> +uint64_t
> +test16 ()
> +{
> + return svlastp_b64 (svdupq_n_b64 (false, true),
> + svptrue_b64 ());
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> --
> 2.43.0
>