Re: [GCC17 PATCH 10/12] aarch64: implement FIRSTP and LASTP SVE instructions

Kyrylo Tkachov Thu, 05 Feb 2026 05:27:03 -0800


> On 2 Feb 2026, at 14:22, Artemiy Volkov <[email protected]> wrote:
> 
> This commit implements patterns and intrinsics for these two instructions
> new in SVE2.2 (or in streaming mode, SME2.2):
> 
> - FIRSTP (Scalar index of first true predicate element (predicated))
> - LASTP (Scalar index of last true predicate element (predicated))
> 
> The new intrinsics are documented in the ACLE manual [0] and have the
> following signatures:
> 
> int64_t svfirstp_b{8,16,32,64} (svbool_t pg, svbool_t pn);
> int64_t svlastp_b{8,16,32,64} (svbool_t pg, svbool_t pn);
> 
> The intrinsics are implemented in the usual way; the new
> svfirst_lastp_impl base class is used for both families.  The ->fold ()
> method implements constant folding except for LASTP under
> -msve-vector-bits=scalable.
> 
> On the .md side, the pattern for LASTP required creating a new UNSPEC
> since the number of elements in an SVE vector is generally unknown, but
> the FIRSTP RTL can be expressed in terms of AND, FFS, and PLUS.
> 
> Included are standard asm tests (which are heavily based on cntp_* tests
> from the sve directory), as well as some general C tests
> demonstrating aforementioned optimizations when PG and/or PN are constant
> vectors.
> 
> [0] https://github.com/ARM-software/acle
> 
> gcc/ChangeLog:
> 
> * config/aarch64/aarch64-sve-builtins-sve2.cc
> (class svfirst_lastp_impl): Define new SVE function base class.
> (svfirstp): Define new SVE function base.
> (svlastp): Likewise.
> * config/aarch64/aarch64-sve-builtins-sve2.def (svfirstp): Define
> new SVE function.
> (svlastp): Likewise.
> * config/aarch64/aarch64-sve-builtins-sve2.h (svfirstp): Declare
> new SVE function base.
> * config/aarch64/aarch64-sve2.md (@aarch64_pred_firstp<mode>): New
> insn pattern.
> (@aarch64_pred_lastp<mode>): Likewise.
> * config/aarch64/iterators.md (UNSPEC_LASTP): New UNSPEC.
> 
> gcc/testsuite/ChangeLog:
> 
> * gcc.target/aarch64/sve2/acle/asm/firstp_b16.c: New test.
> * gcc.target/aarch64/sve2/acle/asm/firstp_b32.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/firstp_b64.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/firstp_b8.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/lastp_b16.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/lastp_b32.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/lastp_b64.c: Likewise.
> * gcc.target/aarch64/sve2/acle/asm/lastp_b8.c: Likewise.
> * gcc.target/aarch64/sve2/acle/general/firstp.c: Likewise.
> * gcc.target/aarch64/sve2/acle/general/lastp.c: Likewise.
> ---
> .../aarch64/aarch64-sve-builtins-sve2.cc      |  61 +++++
> .../aarch64/aarch64-sve-builtins-sve2.def     |   2 +
> .../aarch64/aarch64-sve-builtins-sve2.h       |   2 +
> gcc/config/aarch64/aarch64-sve2.md            |  36 +++
> gcc/config/aarch64/iterators.md               |   1 +
> .../aarch64/sve2/acle/asm/firstp_b16.c        | 192 ++++++++++++++++
> .../aarch64/sve2/acle/asm/firstp_b32.c        | 192 ++++++++++++++++
> .../aarch64/sve2/acle/asm/firstp_b64.c        | 192 ++++++++++++++++
> .../aarch64/sve2/acle/asm/firstp_b8.c         | 192 ++++++++++++++++
> .../aarch64/sve2/acle/asm/lastp_b16.c         | 192 ++++++++++++++++
> .../aarch64/sve2/acle/asm/lastp_b32.c         | 192 ++++++++++++++++
> .../aarch64/sve2/acle/asm/lastp_b64.c         | 192 ++++++++++++++++
> .../aarch64/sve2/acle/asm/lastp_b8.c          | 192 ++++++++++++++++
> .../aarch64/sve2/acle/general/firstp.c        | 212 ++++++++++++++++++
> .../aarch64/sve2/acle/general/lastp.c         | 212 ++++++++++++++++++
> 15 files changed, 2062 insertions(+)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b16.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b32.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b64.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b8.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b16.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b32.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b64.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b8.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/general/firstp.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/general/lastp.c
> 
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc 
> b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
> index d45012e7936..5ea08056ae3 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
> @@ -291,6 +291,65 @@ public:
>   }
> };
> 
> +class svfirst_lastp_impl : public function_base
> +{
> +public:
> +  CONSTEXPR svfirst_lastp_impl (bool first)
> +    : m_first (first)
> +  {}
> +
> +  gimple *
> +  fold (gimple_folder &f) const override
> +  {
> +    tree pg = gimple_call_arg (f.call, 0);
> +    tree pn = gimple_call_arg (f.call, 1);
> +
> +    gcc_assert (TYPE_MODE (TREE_TYPE (pg)) == TYPE_MODE (TREE_TYPE (pn)));
> +
> +    if (is_pfalse (pg) || is_pfalse (pn))
> +      return f.fold_call_to (build_minus_one_cst (TREE_TYPE (f.lhs)));
> +
> +    if (TREE_CODE (pg) != VECTOR_CST
> + || TREE_CODE (pn) != VECTOR_CST)
> +      return NULL;
> +
> +    HOST_WIDE_INT nelts_full_vector = aarch64_fold_sve_cnt_pat 
> (AARCH64_SV_ALL,
> +      f.elements_per_vq (0));
> +    if (!m_first && nelts_full_vector < 0)
> +      return NULL;
> +
> +    tree pa = fold_build2 (BIT_AND_EXPR, TREE_TYPE (pg), pg, pn);
> +    gcc_assert (TREE_CODE (pa) == VECTOR_CST);
> +
> +    int elt_size = f.type_suffix (0).element_bytes;
> +    unsigned int nelts = vector_cst_encoded_nelts (pa);
> +    for (unsigned int i = 0; i < nelts; i++)
> +      {
> + unsigned int idx = m_first ? i : nelts - 1 - i;
> + if (tree_to_shwi (VECTOR_CST_ENCODED_ELT (pa, idx)) != 0)
> +  return f.fold_call_to (build_int_cst (TREE_TYPE (f.lhs),
> + m_first
> + ? i / elt_size
> + : (nelts_full_vector - 1
> +   - i / elt_size)));
> +      }
> +
> +    return f.fold_call_to (build_minus_one_cst (TREE_TYPE (f.lhs)));
> +  }
> +
> +  rtx
> +  expand (function_expander &e) const override
> +  {
> +    machine_mode mode = e.vector_mode (0);
> +    return e.use_exact_insn (m_first ? code_for_aarch64_pred_firstp (mode)
> +     : code_for_aarch64_pred_lastp (mode));
> +  }
> +
> +private:
> +  /* True for svfirstp, false for svlastp.  */
> +  bool m_first;
> +};
> +
> class svld1q_gather_impl : public full_width_access
> {
> public:
> @@ -1023,12 +1082,14 @@ FUNCTION (sveorbt, unspec_based_function, 
> (UNSPEC_EORBT, UNSPEC_EORBT, -1))
> FUNCTION (sveorqv, reduction, (UNSPEC_EORQV, UNSPEC_EORQV, -1))
> FUNCTION (sveortb, unspec_based_function, (UNSPEC_EORTB, UNSPEC_EORTB, -1))
> FUNCTION (svextq, svextq_impl,)
> +FUNCTION (svfirstp, svfirst_lastp_impl, (true))
> FUNCTION (svhadd, unspec_based_function, (UNSPEC_SHADD, UNSPEC_UHADD, -1))
> FUNCTION (svhsub, unspec_based_function, (UNSPEC_SHSUB, UNSPEC_UHSUB, -1))
> FUNCTION (svhistcnt, CODE_FOR_MODE0 (aarch64_sve2_histcnt),)
> FUNCTION (svhistseg, CODE_FOR_MODE0 (aarch64_sve2_histseg),)
> FUNCTION (svhsubr, unspec_based_function_rotated, (UNSPEC_SHSUB,
>   UNSPEC_UHSUB, -1))
> +FUNCTION (svlastp, svfirst_lastp_impl, (false))
> FUNCTION (svld1q_gather, svld1q_gather_impl,)
> FUNCTION (svld1udq, svld1uxq_impl, (VNx1DImode))
> FUNCTION (svld1uwq, svld1uxq_impl, (VNx1SImode))
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def 
> b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
> index d1795c64e8e..6ecfc2a45c1 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
> @@ -305,6 +305,8 @@ DEF_SVE_FUNCTION (svcvtlt, unary_convert, cvt_long, z)
> DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_narrow, z)
> DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, z)
> DEF_SVE_FUNCTION (svcvtxnt, unary_convert_narrowt, cvt_narrow_s, z)
> +DEF_SVE_FUNCTION (svfirstp, count_pred, all_pred, implicit)
> +DEF_SVE_FUNCTION (svlastp, count_pred, all_pred, implicit)
> DEF_SVE_FUNCTION (svrint32x, unary, sd_float, mxz)
> DEF_SVE_FUNCTION (svrint32z, unary, sd_float, mxz)
> DEF_SVE_FUNCTION (svrint64x, unary, sd_float, mxz)
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h 
> b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
> index 8b1581f8568..b2f2698b880 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
> @@ -80,11 +80,13 @@ namespace aarch64_sve
>     extern const function_base *const sveorqv;
>     extern const function_base *const sveortb;
>     extern const function_base *const svextq;
> +    extern const function_base *const svfirstp;
>     extern const function_base *const svhadd;
>     extern const function_base *const svhistcnt;
>     extern const function_base *const svhistseg;
>     extern const function_base *const svhsub;
>     extern const function_base *const svhsubr;
> +    extern const function_base *const svlastp;
>     extern const function_base *const svld1q_gather;
>     extern const function_base *const svld1udq;
>     extern const function_base *const svld1uwq;
> diff --git a/gcc/config/aarch64/aarch64-sve2.md 
> b/gcc/config/aarch64/aarch64-sve2.md
> index 69e16571afc..5fc84b79423 100644
> --- a/gcc/config/aarch64/aarch64-sve2.md
> +++ b/gcc/config/aarch64/aarch64-sve2.md
> @@ -46,6 +46,7 @@
> ;; ---- [PRED] Predicate extraction
> ;; ---- [PRED] Predicate selection
> ;; ---- [PRED] Predicate count
> +;; ---- [PRED] Predicate first/last true element
> ;;
> ;; == Uniform unary arithmnetic
> ;; ---- [FP] General unary arithmetic that maps to unspecs
> @@ -721,6 +722,41 @@
>   [(set_attr "sve_type" "sve_pred_cnt_scalar")]
> )
> 
> +;; -------------------------------------------------------------------------
> +;; ---- [PRED] Predicate first/last true element
> +;; -------------------------------------------------------------------------
> +;; Includes
> +;; - FIRSTP (predicate first true element)
> +;; - LASTP (predicate last true element)


Please add a comment on the FEAT_* extension that includes these.

> +;; -------------------------------------------------------------------------
> +
> +;; Count the number of set bits in a predicate.  Operand 3 is true if
> +;; operand 1 is known to be all-true.
> +(define_insn "@aarch64_pred_firstp<mode>"
> +  [(set (match_operand:DI 0 "register_operand" "=r")
> + (plus:DI
> +  (ffs:DI
> +    (and:PRED_ALL

Neat idea, but I think it’s invalid RTL to take a DImode FFS of a predicate 
vector mode. According to the RTL documentation the mode of the ffs and its 
argument should match.
I don’t know if it’s possible to wrap it inside a suitable subreg somehow. If 
not, then making the whole operation an UNSPEC may be the only way to go.
Thanks,
Kyrill

> +      (match_operand:PRED_ALL 1 "register_operand" "Upl")
> +      (match_operand:PRED_ALL 2 "register_operand" "Upa")))
> +   (const_int -1)))]
> +  "TARGET_SVE2p2_OR_SME2p2"
> +  "firstp\t%x0, %1, %2.<Vetype>"
> +  [(set_attr "sve_type" "sve_pred_cnt_scalar")]
> +)
> +
> +;; Count the number of set bits in a predicate.  Operand 3 is true if
> +;; operand 1 is known to be all-true.
> +(define_insn "@aarch64_pred_lastp<mode>"
> +  [(set (match_operand:DI 0 "register_operand" "=r")
> + (unspec:DI [(match_operand:PRED_ALL 1 "register_operand" "Upl")
> +    (match_operand:PRED_ALL 2 "register_operand" "Upa")]
> +    UNSPEC_LASTP))]
> +  "TARGET_SVE2p2_OR_SME2p2"
> +  "lastp\t%x0, %1, %2.<Vetype>"
> +  [(set_attr "sve_type" "sve_pred_cnt_scalar")]
> +)
> +
> ;; =========================================================================
> ;; == Uniform unary arithmnetic
> ;; =========================================================================
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index b711df60f26..dbf9d6272a8 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -1150,6 +1150,7 @@
>     UNSPEC_FP8FCVTN ; Used in aarch64-sve2.md.
>     UNSPEC_HISTCNT ; Used in aarch64-sve2.md.
>     UNSPEC_HISTSEG ; Used in aarch64-sve2.md.
> +    UNSPEC_LASTP ; Used in aarch64-sve2.md.
>     UNSPEC_LD1_COUNT ; Used in aarch64-sve2.md.
>     UNSPEC_LDNT1_COUNT ; Used in aarch64-sve2.md.
>     UNSPEC_MATCH ; Used in aarch64-sve2.md.
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b16.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b16.c
> new file mode 100644
> index 00000000000..06ea1e1b9ef
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b16.c
> @@ -0,0 +1,192 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-additional-options "-msve-vector-bits=scalable" } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +#include <stdbool.h>
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** firstp_b16_32:
> +** firstp x0, p0, p1\.h
> +** ret
> +*/
> +TEST_PTEST (firstp_b16_32, uint32_t,
> +    x0 = svfirstp_b16 (p0, p1));
> +
> +/*
> +** firstp_b16_64:
> +** firstp x0, p0, p1\.h
> +** ret
> +*/
> +TEST_PTEST (firstp_b16_64, uint64_t,
> +    x0 = svfirstp_b16 (p0, p1));
> +
> +/*
> +** firstp_inc_b16_32_general_x0:
> +** firstp x([0-9]+), p0, p1\.h
> +** add w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b16_32_general_x0, uint32_t,
> +    x0 += svfirstp_b16 (p0, p1));
> +
> +/*
> +** firstp_inc_b16_32_general_x1:
> +** firstp x([0-9]+), p0, p1\.h
> +** add w0, (w1, w\1|w\1, w1)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b16_32_general_x1, uint32_t,
> +    x0 = x1 + svfirstp_b16 (p0, p1));
> +
> +/*
> +** firstp_inc_b16_64_general_x0:
> +** firstp (x[0-9]+), p0, p1\.h
> +** add x0, (x0, \1|\1, x0)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b16_64_general_x0, uint64_t,
> +    x0 += svfirstp_b16 (p0, p1));
> +
> +/*
> +** firstp_inc_b16_64_general_x1:
> +** firstp (x[0-9]+), p0, p1\.h
> +** add x0, (x1, \1|\1, x1)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b16_64_general_x1, uint64_t,
> +    x0 = x1 + svfirstp_b16 (p0, p1));
> +
> +/*
> +** firstp_dec_b16_32_general_x0:
> +** firstp x([0-9]+), p0, p1\.h
> +** sub w0, w0, w\1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b16_32_general_x0, uint32_t,
> +    x0 -= svfirstp_b16 (p0, p1));
> +
> +/*
> +** firstp_dec_b16_32_general_x1:
> +** firstp x([0-9]+), p0, p1\.h
> +** sub w0, w1, w\1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b16_32_general_x1, uint32_t,
> +    x0 = x1 - svfirstp_b16 (p0, p1));
> +
> +/*
> +** firstp_dec_b16_64_general_x0:
> +** firstp (x[0-9]+), p0, p1\.h
> +** sub x0, x0, \1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b16_64_general_x0, uint64_t,
> +    x0 -= svfirstp_b16 (p0, p1));
> +
> +/*
> +** firstp_dec_b16_64_general_x1:
> +** firstp (x[0-9]+), p0, p1\.h
> +** sub x0, x1, \1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b16_64_general_x1, uint64_t,
> +    x0 = x1 - svfirstp_b16 (p0, p1));
> +
> +/*
> +** firstp_inc_b16_u16_general_z0:
> +** firstp x([0-9]+), p0, p1\.h
> +** mov (z[0-9]+\.h), w\1
> +** add z0\.h, (z0\.h, \2|\2, z0\.h)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b16_u16_general_z0, svuint16_t,
> + z0 = svadd_n_u16_x (svptrue_b16 (), z0, svfirstp_b16 (p0, p1)),
> + z0 = svadd_x (svptrue_b16 (), z0, svfirstp_b16 (p0, p1)));
> +
> +/*
> +** firstp_inc_b16_u16_general_z1:
> +** firstp x([0-9]+), p0, p1\.h
> +** mov (z[0-9]+\.h), w\1
> +** add z0\.h, (z1\.h, \2|\2, z1\.h)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b16_u16_general_z1, svuint16_t,
> + z0 = svadd_n_u16_x (svptrue_b16 (), z1, svfirstp_b16 (p0, p1)),
> + z0 = svadd_x (svptrue_b16 (), z1, svfirstp_b16 (p0, p1)));
> +
> +/*
> +** firstp_inc_b16_u16_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.h
> +** mov (z[0-9]+\.h), w\2
> +** add z0\.h, (z0\.h, \3|\3, z0\.h)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b16_u16_ptrue_z0, svuint16_t,
> + z0 = svadd_n_u16_x (svptrue_b16 (), z0, svfirstp_b16 (svptrue_b16 (), p0)),
> + z0 = svadd_x (svptrue_b16 (), z0, svfirstp_b16 (svptrue_b16 (), p0)));
> +
> +/*
> +** firstp_inc_b16_u16_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.h
> +** mov (z[0-9]+\.h), w\2
> +** add z0\.h, (z1\.h, \3|\3, z1\.h)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b16_u16_ptrue_z1, svuint16_t,
> + z0 = svadd_n_u16_x (svptrue_b16 (), z1, svfirstp_b16 (svptrue_b16 (), p0)),
> + z0 = svadd_x (svptrue_b16 (), z1, svfirstp_b16 (svptrue_b16 (), p0)));
> +
> +/*
> +** firstp_dec_b16_u16_general_z0:
> +** firstp x([0-9]+), p0, p1\.h
> +** mov (z[0-9]+\.h), w\1
> +** sub z0\.h, z0\.h, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b16_u16_general_z0, svuint16_t,
> + z0 = svsub_n_u16_x (svptrue_b16 (), z0, svfirstp_b16 (p0, p1)),
> + z0 = svsub_x (svptrue_b16 (), z0, svfirstp_b16 (p0, p1)));
> +
> +/*
> +** firstp_dec_b16_u16_general_z1:
> +** firstp x([0-9]+), p0, p1\.h
> +** mov (z[0-9]+\.h), w\1
> +** sub z0\.h, z1\.h, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b16_u16_general_z1, svuint16_t,
> + z0 = svsub_n_u16_x (svptrue_b16 (), z1, svfirstp_b16 (p0, p1)),
> + z0 = svsub_x (svptrue_b16 (), z1, svfirstp_b16 (p0, p1)));
> +
> +/*
> +** firstp_dec_b16_u16_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.h
> +** mov (z[0-9]+\.h), w\2
> +** sub z0\.h, z0\.h, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b16_u16_ptrue_z0, svuint16_t,
> + z0 = svsub_n_u16_x (svptrue_b16 (), z0, svfirstp_b16 (svptrue_b16 (), p0)),
> + z0 = svsub_x (svptrue_b16 (), z0, svfirstp_b16 (svptrue_b16 (), p0)));
> +
> +/*
> +** firstp_dec_b16_u16_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.h
> +** mov (z[0-9]+\.h), w\2
> +** sub z0\.h, z1\.h, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b16_u16_ptrue_z1, svuint16_t,
> + z0 = svsub_n_u16_x (svptrue_b16 (), z1, svfirstp_b16 (svptrue_b16 (), p0)),
> + z0 = svsub_x (svptrue_b16 (), z1, svfirstp_b16 (svptrue_b16 (), p0)));
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b32.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b32.c
> new file mode 100644
> index 00000000000..668920bba16
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b32.c
> @@ -0,0 +1,192 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-additional-options "-msve-vector-bits=scalable" } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +#include <stdbool.h>
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** firstp_b32_32:
> +** firstp x0, p0, p1\.s
> +** ret
> +*/
> +TEST_PTEST (firstp_b32_32, uint32_t,
> +    x0 = svfirstp_b32 (p0, p1));
> +
> +/*
> +** firstp_b32_64:
> +** firstp x0, p0, p1\.s
> +** ret
> +*/
> +TEST_PTEST (firstp_b32_64, uint64_t,
> +    x0 = svfirstp_b32 (p0, p1));
> +
> +/*
> +** firstp_inc_b32_32_general_x0:
> +** firstp x([0-9]+), p0, p1\.s
> +** add w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b32_32_general_x0, uint32_t,
> +    x0 += svfirstp_b32 (p0, p1));
> +
> +/*
> +** firstp_inc_b32_32_general_x1:
> +** firstp x([0-9]+), p0, p1\.s
> +** add w0, (w1, w\1|w\1, w1)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b32_32_general_x1, uint32_t,
> +    x0 = x1 + svfirstp_b32 (p0, p1));
> +
> +/*
> +** firstp_inc_b32_64_general_x0:
> +** firstp (x[0-9]+), p0, p1\.s
> +** add x0, (x0, \1|\1, x0)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b32_64_general_x0, uint64_t,
> +    x0 += svfirstp_b32 (p0, p1));
> +
> +/*
> +** firstp_inc_b32_64_general_x1:
> +** firstp (x[0-9]+), p0, p1\.s
> +** add x0, (x1, \1|\1, x1)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b32_64_general_x1, uint64_t,
> +    x0 = x1 + svfirstp_b32 (p0, p1));
> +
> +/*
> +** firstp_dec_b32_32_general_x0:
> +** firstp x([0-9]+), p0, p1\.s
> +** sub w0, w0, w\1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b32_32_general_x0, uint32_t,
> +    x0 -= svfirstp_b32 (p0, p1));
> +
> +/*
> +** firstp_dec_b32_32_general_x1:
> +** firstp x([0-9]+), p0, p1\.s
> +** sub w0, w1, w\1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b32_32_general_x1, uint32_t,
> +    x0 = x1 - svfirstp_b32 (p0, p1));
> +
> +/*
> +** firstp_dec_b32_64_general_x0:
> +** firstp (x[0-9]+), p0, p1\.s
> +** sub x0, x0, \1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b32_64_general_x0, uint64_t,
> +    x0 -= svfirstp_b32 (p0, p1));
> +
> +/*
> +** firstp_dec_b32_64_general_x1:
> +** firstp (x[0-9]+), p0, p1\.s
> +** sub x0, x1, \1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b32_64_general_x1, uint64_t,
> +    x0 = x1 - svfirstp_b32 (p0, p1));
> +
> +/*
> +** firstp_inc_b32_u32_general_z0:
> +** firstp x([0-9]+), p0, p1\.s
> +** mov (z[0-9]+\.s), w\1
> +** add z0\.s, (z0\.s, \2|\2, z0\.s)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b32_u32_general_z0, svuint32_t,
> + z0 = svadd_n_u32_x (svptrue_b32 (), z0, svfirstp_b32 (p0, p1)),
> + z0 = svadd_x (svptrue_b32 (), z0, svfirstp_b32 (p0, p1)));
> +
> +/*
> +** firstp_inc_b32_u32_general_z1:
> +** firstp x([0-9]+), p0, p1\.s
> +** mov (z[0-9]+\.s), w\1
> +** add z0\.s, (z1\.s, \2|\2, z1\.s)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b32_u32_general_z1, svuint32_t,
> + z0 = svadd_n_u32_x (svptrue_b32 (), z1, svfirstp_b32 (p0, p1)),
> + z0 = svadd_x (svptrue_b32 (), z1, svfirstp_b32 (p0, p1)));
> +
> +/*
> +** firstp_inc_b32_u32_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.s
> +** mov (z[0-9]+\.s), w\2
> +** add z0\.s, (z0\.s, \3|\3, z0\.s)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b32_u32_ptrue_z0, svuint32_t,
> + z0 = svadd_n_u32_x (svptrue_b32 (), z0, svfirstp_b32 (svptrue_b32 (), p0)),
> + z0 = svadd_x (svptrue_b32 (), z0, svfirstp_b32 (svptrue_b32 (), p0)));
> +
> +/*
> +** firstp_inc_b32_u32_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.s
> +** mov (z[0-9]+\.s), w\2
> +** add z0\.s, (z1\.s, \3|\3, z1\.s)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b32_u32_ptrue_z1, svuint32_t,
> + z0 = svadd_n_u32_x (svptrue_b32 (), z1, svfirstp_b32 (svptrue_b32 (), p0)),
> + z0 = svadd_x (svptrue_b32 (), z1, svfirstp_b32 (svptrue_b32 (), p0)));
> +
> +/*
> +** firstp_dec_b32_u32_general_z0:
> +** firstp x([0-9]+), p0, p1\.s
> +** mov (z[0-9]+\.s), w\1
> +** sub z0\.s, z0\.s, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b32_u32_general_z0, svuint32_t,
> + z0 = svsub_n_u32_x (svptrue_b32 (), z0, svfirstp_b32 (p0, p1)),
> + z0 = svsub_x (svptrue_b32 (), z0, svfirstp_b32 (p0, p1)));
> +
> +/*
> +** firstp_dec_b32_u32_general_z1:
> +** firstp x([0-9]+), p0, p1\.s
> +** mov (z[0-9]+\.s), w\1
> +** sub z0\.s, z1\.s, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b32_u32_general_z1, svuint32_t,
> + z0 = svsub_n_u32_x (svptrue_b32 (), z1, svfirstp_b32 (p0, p1)),
> + z0 = svsub_x (svptrue_b32 (), z1, svfirstp_b32 (p0, p1)));
> +
> +/*
> +** firstp_dec_b32_u32_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.s
> +** mov (z[0-9]+\.s), w\2
> +** sub z0\.s, z0\.s, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b32_u32_ptrue_z0, svuint32_t,
> + z0 = svsub_n_u32_x (svptrue_b32 (), z0, svfirstp_b32 (svptrue_b32 (), p0)),
> + z0 = svsub_x (svptrue_b32 (), z0, svfirstp_b32 (svptrue_b32 (), p0)));
> +
> +/*
> +** firstp_dec_b32_u32_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.s
> +** mov (z[0-9]+\.s), w\2
> +** sub z0\.s, z1\.s, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b32_u32_ptrue_z1, svuint32_t,
> + z0 = svsub_n_u32_x (svptrue_b32 (), z1, svfirstp_b32 (svptrue_b32 (), p0)),
> + z0 = svsub_x (svptrue_b32 (), z1, svfirstp_b32 (svptrue_b32 (), p0)));
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b64.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b64.c
> new file mode 100644
> index 00000000000..330b0b04768
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b64.c
> @@ -0,0 +1,192 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-additional-options "-msve-vector-bits=scalable" } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +#include <stdbool.h>
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** firstp_b64_32:
> +** firstp x0, p0, p1\.d
> +** ret
> +*/
> +TEST_PTEST (firstp_b64_32, uint32_t,
> +    x0 = svfirstp_b64 (p0, p1));
> +
> +/*
> +** firstp_b64_64:
> +** firstp x0, p0, p1\.d
> +** ret
> +*/
> +TEST_PTEST (firstp_b64_64, uint64_t,
> +    x0 = svfirstp_b64 (p0, p1));
> +
> +/*
> +** firstp_inc_b64_32_general_x0:
> +** firstp x([0-9]+), p0, p1\.d
> +** add w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b64_32_general_x0, uint32_t,
> +    x0 += svfirstp_b64 (p0, p1));
> +
> +/*
> +** firstp_inc_b64_32_general_x1:
> +** firstp x([0-9]+), p0, p1\.d
> +** add w0, (w1, w\1|w\1, w1)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b64_32_general_x1, uint32_t,
> +    x0 = x1 + svfirstp_b64 (p0, p1));
> +
> +/*
> +** firstp_inc_b64_64_general_x0:
> +** firstp (x[0-9]+), p0, p1\.d
> +** add x0, (x0, \1|\1, x0)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b64_64_general_x0, uint64_t,
> +    x0 += svfirstp_b64 (p0, p1));
> +
> +/*
> +** firstp_inc_b64_64_general_x1:
> +** firstp (x[0-9]+), p0, p1\.d
> +** add x0, (x1, \1|\1, x1)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b64_64_general_x1, uint64_t,
> +    x0 = x1 + svfirstp_b64 (p0, p1));
> +
> +/*
> +** firstp_dec_b64_32_general_x0:
> +** firstp x([0-9]+), p0, p1\.d
> +** sub w0, w0, w\1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b64_32_general_x0, uint32_t,
> +    x0 -= svfirstp_b64 (p0, p1));
> +
> +/*
> +** firstp_dec_b64_32_general_x1:
> +** firstp x([0-9]+), p0, p1\.d
> +** sub w0, w1, w\1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b64_32_general_x1, uint32_t,
> +    x0 = x1 - svfirstp_b64 (p0, p1));
> +
> +/*
> +** firstp_dec_b64_64_general_x0:
> +** firstp (x[0-9]+), p0, p1\.d
> +** sub x0, x0, \1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b64_64_general_x0, uint64_t,
> +    x0 -= svfirstp_b64 (p0, p1));
> +
> +/*
> +** firstp_dec_b64_64_general_x1:
> +** firstp (x[0-9]+), p0, p1\.d
> +** sub x0, x1, \1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b64_64_general_x1, uint64_t,
> +    x0 = x1 - svfirstp_b64 (p0, p1));
> +
> +/*
> +** firstp_inc_b64_u64_general_z0:
> +** firstp (x[0-9]+), p0, p1\.d
> +** mov (z[0-9]+\.d), \1
> +** add z0\.d, (z0\.d, \2|\2, z0\.d)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b64_u64_general_z0, svuint64_t,
> + z0 = svadd_n_u64_x (svptrue_b64 (), z0, svfirstp_b64 (p0, p1)),
> + z0 = svadd_x (svptrue_b64 (), z0, svfirstp_b64 (p0, p1)));
> +
> +/*
> +** firstp_inc_b64_u64_general_z1:
> +** firstp (x[0-9]+), p0, p1\.d
> +** mov (z[0-9]+\.d), \1
> +** add z0\.d, (z1\.d, \2|\2, z1\.d)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b64_u64_general_z1, svuint64_t,
> + z0 = svadd_n_u64_x (svptrue_b64 (), z1, svfirstp_b64 (p0, p1)),
> + z0 = svadd_x (svptrue_b64 (), z1, svfirstp_b64 (p0, p1)));
> +
> +/*
> +** firstp_inc_b64_u64_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** firstp (x[0-9]+), \1, p0\.d
> +** mov (z[0-9]+\.d), \2
> +** add z0\.d, (z0\.d, \3|\3, z0\.d)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b64_u64_ptrue_z0, svuint64_t,
> + z0 = svadd_n_u64_x (svptrue_b64 (), z0, svfirstp_b64 (svptrue_b64 (), p0)),
> + z0 = svadd_x (svptrue_b64 (), z0, svfirstp_b64 (svptrue_b64 (), p0)));
> +
> +/*
> +** firstp_inc_b64_u64_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** firstp (x[0-9]+), \1, p0\.d
> +** mov (z[0-9]+\.d), \2
> +** add z0\.d, (z1\.d, \3|\3, z1\.d)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b64_u64_ptrue_z1, svuint64_t,
> + z0 = svadd_n_u64_x (svptrue_b64 (), z1, svfirstp_b64 (svptrue_b64 (), p0)),
> + z0 = svadd_x (svptrue_b64 (), z1, svfirstp_b64 (svptrue_b64 (), p0)));
> +
> +/*
> +** firstp_dec_b64_u64_general_z0:
> +** firstp (x[0-9]+), p0, p1\.d
> +** mov (z[0-9]+\.d), \1
> +** sub z0\.d, z0\.d, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b64_u64_general_z0, svuint64_t,
> + z0 = svsub_n_u64_x (svptrue_b64 (), z0, svfirstp_b64 (p0, p1)),
> + z0 = svsub_x (svptrue_b64 (), z0, svfirstp_b64 (p0, p1)));
> +
> +/*
> +** firstp_dec_b64_u64_general_z1:
> +** firstp (x[0-9]+), p0, p1\.d
> +** mov (z[0-9]+\.d), \1
> +** sub z0\.d, z1\.d, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b64_u64_general_z1, svuint64_t,
> + z0 = svsub_n_u64_x (svptrue_b64 (), z1, svfirstp_b64 (p0, p1)),
> + z0 = svsub_x (svptrue_b64 (), z1, svfirstp_b64 (p0, p1)));
> +
> +/*
> +** firstp_dec_b64_u64_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** firstp (x[0-9]+), \1, p0\.d
> +** mov (z[0-9]+\.d), \2
> +** sub z0\.d, z0\.d, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b64_u64_ptrue_z0, svuint64_t,
> + z0 = svsub_n_u64_x (svptrue_b64 (), z0, svfirstp_b64 (svptrue_b64 (), p0)),
> + z0 = svsub_x (svptrue_b64 (), z0, svfirstp_b64 (svptrue_b64 (), p0)));
> +
> +/*
> +** firstp_dec_b64_u64_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** firstp (x[0-9]+), \1, p0\.d
> +** mov (z[0-9]+\.d), \2
> +** sub z0\.d, z1\.d, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b64_u64_ptrue_z1, svuint64_t,
> + z0 = svsub_n_u64_x (svptrue_b64 (), z1, svfirstp_b64 (svptrue_b64 (), p0)),
> + z0 = svsub_x (svptrue_b64 (), z1, svfirstp_b64 (svptrue_b64 (), p0)));
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b8.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b8.c
> new file mode 100644
> index 00000000000..653d903577a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b8.c
> @@ -0,0 +1,192 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-additional-options "-msve-vector-bits=scalable" } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +#include <stdbool.h>
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** firstp_b8_32:
> +** firstp x0, p0, p1\.b
> +** ret
> +*/
> +TEST_PTEST (firstp_b8_32, uint32_t,
> +    x0 = svfirstp_b8 (p0, p1));
> +
> +/*
> +** firstp_b8_64:
> +** firstp x0, p0, p1\.b
> +** ret
> +*/
> +TEST_PTEST (firstp_b8_64, uint64_t,
> +    x0 = svfirstp_b8 (p0, p1));
> +
> +/*
> +** firstp_inc_b8_32_general_x0:
> +** firstp x([0-9]+), p0, p1\.b
> +** add w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b8_32_general_x0, uint32_t,
> +    x0 += svfirstp_b8 (p0, p1));
> +
> +/*
> +** firstp_inc_b8_32_general_x1:
> +** firstp x([0-9]+), p0, p1\.b
> +** add w0, (w1, w\1|w\1, w1)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b8_32_general_x1, uint32_t,
> +    x0 = x1 + svfirstp_b8 (p0, p1));
> +
> +/*
> +** firstp_inc_b8_64_general_x0:
> +** firstp (x[0-9]+), p0, p1\.b
> +** add x0, (x0, \1|\1, x0)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b8_64_general_x0, uint64_t,
> +    x0 += svfirstp_b8 (p0, p1));
> +
> +/*
> +** firstp_inc_b8_64_general_x1:
> +** firstp (x[0-9]+), p0, p1\.b
> +** add x0, (x1, \1|\1, x1)
> +** ret
> +*/
> +TEST_PTEST (firstp_inc_b8_64_general_x1, uint64_t,
> +    x0 = x1 + svfirstp_b8 (p0, p1));
> +
> +/*
> +** firstp_dec_b8_32_general_x0:
> +** firstp x([0-9]+), p0, p1\.b
> +** sub w0, w0, w\1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b8_32_general_x0, uint32_t,
> +    x0 -= svfirstp_b8 (p0, p1));
> +
> +/*
> +** firstp_dec_b8_32_general_x1:
> +** firstp x([0-9]+), p0, p1\.b
> +** sub w0, w1, w\1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b8_32_general_x1, uint32_t,
> +    x0 = x1 - svfirstp_b8 (p0, p1));
> +
> +/*
> +** firstp_dec_b8_64_general_x0:
> +** firstp (x[0-9]+), p0, p1\.b
> +** sub x0, x0, \1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b8_64_general_x0, uint64_t,
> +    x0 -= svfirstp_b8 (p0, p1));
> +
> +/*
> +** firstp_dec_b8_64_general_x1:
> +** firstp (x[0-9]+), p0, p1\.b
> +** sub x0, x1, \1
> +** ret
> +*/
> +TEST_PTEST (firstp_dec_b8_64_general_x1, uint64_t,
> +    x0 = x1 - svfirstp_b8 (p0, p1));
> +
> +/*
> +** firstp_inc_b8_u8_general_z0:
> +** firstp x([0-9]+), p0, p1\.b
> +** mov (z[0-9]+\.b), w\1
> +** add z0\.b, (z0\.b, \2|\2, z0\.b)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b8_u8_general_z0, svuint8_t,
> + z0 = svadd_n_u8_x (svptrue_b8 (), z0, svfirstp_b8 (p0, p1)),
> + z0 = svadd_x (svptrue_b8 (), z0, svfirstp_b8 (p0, p1)));
> +
> +/*
> +** firstp_inc_b8_u8_general_z1:
> +** firstp x([0-9]+), p0, p1\.b
> +** mov (z[0-9]+\.b), w\1
> +** add z0\.b, (z1\.b, \2|\2, z1\.b)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b8_u8_general_z1, svuint8_t,
> + z0 = svadd_n_u8_x (svptrue_b8 (), z1, svfirstp_b8 (p0, p1)),
> + z0 = svadd_x (svptrue_b8 (), z1, svfirstp_b8 (p0, p1)));
> +
> +/*
> +** firstp_inc_b8_u8_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.b
> +** mov (z[0-9]+\.b), w\2
> +** add z0\.b, (z0\.b, \3|\3, z0\.b)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b8_u8_ptrue_z0, svuint8_t,
> + z0 = svadd_n_u8_x (svptrue_b8 (), z0, svfirstp_b8 (svptrue_b8 (), p0)),
> + z0 = svadd_x (svptrue_b8 (), z0, svfirstp_b8 (svptrue_b8 (), p0)));
> +
> +/*
> +** firstp_inc_b8_u8_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.b
> +** mov (z[0-9]+\.b), w\2
> +** add z0\.b, (z1\.b, \3|\3, z1\.b)
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_inc_b8_u8_ptrue_z1, svuint8_t,
> + z0 = svadd_n_u8_x (svptrue_b8 (), z1, svfirstp_b8 (svptrue_b8 (), p0)),
> + z0 = svadd_x (svptrue_b8 (), z1, svfirstp_b8 (svptrue_b8 (), p0)));
> +
> +/*
> +** firstp_dec_b8_u8_general_z0:
> +** firstp x([0-9]+), p0, p1\.b
> +** mov (z[0-9]+\.b), w\1
> +** sub z0\.b, z0\.b, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b8_u8_general_z0, svuint8_t,
> + z0 = svsub_n_u8_x (svptrue_b8 (), z0, svfirstp_b8 (p0, p1)),
> + z0 = svsub_x (svptrue_b8 (), z0, svfirstp_b8 (p0, p1)));
> +
> +/*
> +** firstp_dec_b8_u8_general_z1:
> +** firstp x([0-9]+), p0, p1\.b
> +** mov (z[0-9]+\.b), w\1
> +** sub z0\.b, z1\.b, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b8_u8_general_z1, svuint8_t,
> + z0 = svsub_n_u8_x (svptrue_b8 (), z1, svfirstp_b8 (p0, p1)),
> + z0 = svsub_x (svptrue_b8 (), z1, svfirstp_b8 (p0, p1)));
> +
> +/*
> +** firstp_dec_b8_u8_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.b
> +** mov (z[0-9]+\.b), w\2
> +** sub z0\.b, z0\.b, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b8_u8_ptrue_z0, svuint8_t,
> + z0 = svsub_n_u8_x (svptrue_b8 (), z0, svfirstp_b8 (svptrue_b8 (), p0)),
> + z0 = svsub_x (svptrue_b8 (), z0, svfirstp_b8 (svptrue_b8 (), p0)));
> +
> +/*
> +** firstp_dec_b8_u8_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** firstp x([0-9]+), \1, p0\.b
> +** mov (z[0-9]+\.b), w\2
> +** sub z0\.b, z1\.b, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (firstp_dec_b8_u8_ptrue_z1, svuint8_t,
> + z0 = svsub_n_u8_x (svptrue_b8 (), z1, svfirstp_b8 (svptrue_b8 (), p0)),
> + z0 = svsub_x (svptrue_b8 (), z1, svfirstp_b8 (svptrue_b8 (), p0)));
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b16.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b16.c
> new file mode 100644
> index 00000000000..e70df211cf9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b16.c
> @@ -0,0 +1,192 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-additional-options "-msve-vector-bits=scalable" } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +#include <stdbool.h>
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** lastp_b16_32:
> +** lastp x0, p0, p1\.h
> +** ret
> +*/
> +TEST_PTEST (lastp_b16_32, uint32_t,
> +    x0 = svlastp_b16 (p0, p1));
> +
> +/*
> +** lastp_b16_64:
> +** lastp x0, p0, p1\.h
> +** ret
> +*/
> +TEST_PTEST (lastp_b16_64, uint64_t,
> +    x0 = svlastp_b16 (p0, p1));
> +
> +/*
> +** lastp_inc_b16_32_general_x0:
> +** lastp x([0-9]+), p0, p1\.h
> +** add w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b16_32_general_x0, uint32_t,
> +    x0 += svlastp_b16 (p0, p1));
> +
> +/*
> +** lastp_inc_b16_32_general_x1:
> +** lastp x([0-9]+), p0, p1\.h
> +** add w0, (w1, w\1|w\1, w1)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b16_32_general_x1, uint32_t,
> +    x0 = x1 + svlastp_b16 (p0, p1));
> +
> +/*
> +** lastp_inc_b16_64_general_x0:
> +** lastp (x[0-9]+), p0, p1\.h
> +** add x0, (x0, \1|\1, x0)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b16_64_general_x0, uint64_t,
> +    x0 += svlastp_b16 (p0, p1));
> +
> +/*
> +** lastp_inc_b16_64_general_x1:
> +** lastp (x[0-9]+), p0, p1\.h
> +** add x0, (x1, \1|\1, x1)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b16_64_general_x1, uint64_t,
> +    x0 = x1 + svlastp_b16 (p0, p1));
> +
> +/*
> +** lastp_dec_b16_32_general_x0:
> +** lastp x([0-9]+), p0, p1\.h
> +** sub w0, w0, w\1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b16_32_general_x0, uint32_t,
> +    x0 -= svlastp_b16 (p0, p1));
> +
> +/*
> +** lastp_dec_b16_32_general_x1:
> +** lastp x([0-9]+), p0, p1\.h
> +** sub w0, w1, w\1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b16_32_general_x1, uint32_t,
> +    x0 = x1 - svlastp_b16 (p0, p1));
> +
> +/*
> +** lastp_dec_b16_64_general_x0:
> +** lastp (x[0-9]+), p0, p1\.h
> +** sub x0, x0, \1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b16_64_general_x0, uint64_t,
> +    x0 -= svlastp_b16 (p0, p1));
> +
> +/*
> +** lastp_dec_b16_64_general_x1:
> +** lastp (x[0-9]+), p0, p1\.h
> +** sub x0, x1, \1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b16_64_general_x1, uint64_t,
> +    x0 = x1 - svlastp_b16 (p0, p1));
> +
> +/*
> +** lastp_inc_b16_u16_general_z0:
> +** lastp x([0-9]+), p0, p1\.h
> +** mov (z[0-9]+\.h), w\1
> +** add z0\.h, (z0\.h, \2|\2, z0\.h)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b16_u16_general_z0, svuint16_t,
> + z0 = svadd_n_u16_x (svptrue_b16 (), z0, svlastp_b16 (p0, p1)),
> + z0 = svadd_x (svptrue_b16 (), z0, svlastp_b16 (p0, p1)));
> +
> +/*
> +** lastp_inc_b16_u16_general_z1:
> +** lastp x([0-9]+), p0, p1\.h
> +** mov (z[0-9]+\.h), w\1
> +** add z0\.h, (z1\.h, \2|\2, z1\.h)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b16_u16_general_z1, svuint16_t,
> + z0 = svadd_n_u16_x (svptrue_b16 (), z1, svlastp_b16 (p0, p1)),
> + z0 = svadd_x (svptrue_b16 (), z1, svlastp_b16 (p0, p1)));
> +
> +/*
> +** lastp_inc_b16_u16_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.h
> +** mov (z[0-9]+\.h), w\2
> +** add z0\.h, (z0\.h, \3|\3, z0\.h)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b16_u16_ptrue_z0, svuint16_t,
> + z0 = svadd_n_u16_x (svptrue_b16 (), z0, svlastp_b16 (svptrue_b16 (), p0)),
> + z0 = svadd_x (svptrue_b16 (), z0, svlastp_b16 (svptrue_b16 (), p0)));
> +
> +/*
> +** lastp_inc_b16_u16_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.h
> +** mov (z[0-9]+\.h), w\2
> +** add z0\.h, (z1\.h, \3|\3, z1\.h)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b16_u16_ptrue_z1, svuint16_t,
> + z0 = svadd_n_u16_x (svptrue_b16 (), z1, svlastp_b16 (svptrue_b16 (), p0)),
> + z0 = svadd_x (svptrue_b16 (), z1, svlastp_b16 (svptrue_b16 (), p0)));
> +
> +/*
> +** lastp_dec_b16_u16_general_z0:
> +** lastp x([0-9]+), p0, p1\.h
> +** mov (z[0-9]+\.h), w\1
> +** sub z0\.h, z0\.h, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b16_u16_general_z0, svuint16_t,
> + z0 = svsub_n_u16_x (svptrue_b16 (), z0, svlastp_b16 (p0, p1)),
> + z0 = svsub_x (svptrue_b16 (), z0, svlastp_b16 (p0, p1)));
> +
> +/*
> +** lastp_dec_b16_u16_general_z1:
> +** lastp x([0-9]+), p0, p1\.h
> +** mov (z[0-9]+\.h), w\1
> +** sub z0\.h, z1\.h, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b16_u16_general_z1, svuint16_t,
> + z0 = svsub_n_u16_x (svptrue_b16 (), z1, svlastp_b16 (p0, p1)),
> + z0 = svsub_x (svptrue_b16 (), z1, svlastp_b16 (p0, p1)));
> +
> +/*
> +** lastp_dec_b16_u16_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.h
> +** mov (z[0-9]+\.h), w\2
> +** sub z0\.h, z0\.h, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b16_u16_ptrue_z0, svuint16_t,
> + z0 = svsub_n_u16_x (svptrue_b16 (), z0, svlastp_b16 (svptrue_b16 (), p0)),
> + z0 = svsub_x (svptrue_b16 (), z0, svlastp_b16 (svptrue_b16 (), p0)));
> +
> +/*
> +** lastp_dec_b16_u16_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.h
> +** mov (z[0-9]+\.h), w\2
> +** sub z0\.h, z1\.h, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b16_u16_ptrue_z1, svuint16_t,
> + z0 = svsub_n_u16_x (svptrue_b16 (), z1, svlastp_b16 (svptrue_b16 (), p0)),
> + z0 = svsub_x (svptrue_b16 (), z1, svlastp_b16 (svptrue_b16 (), p0)));
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b32.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b32.c
> new file mode 100644
> index 00000000000..b5b64407f7b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b32.c
> @@ -0,0 +1,192 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-additional-options "-msve-vector-bits=scalable" } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +#include <stdbool.h>
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** lastp_b32_32:
> +** lastp x0, p0, p1\.s
> +** ret
> +*/
> +TEST_PTEST (lastp_b32_32, uint32_t,
> +    x0 = svlastp_b32 (p0, p1));
> +
> +/*
> +** lastp_b32_64:
> +** lastp x0, p0, p1\.s
> +** ret
> +*/
> +TEST_PTEST (lastp_b32_64, uint64_t,
> +    x0 = svlastp_b32 (p0, p1));
> +
> +/*
> +** lastp_inc_b32_32_general_x0:
> +** lastp x([0-9]+), p0, p1\.s
> +** add w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b32_32_general_x0, uint32_t,
> +    x0 += svlastp_b32 (p0, p1));
> +
> +/*
> +** lastp_inc_b32_32_general_x1:
> +** lastp x([0-9]+), p0, p1\.s
> +** add w0, (w1, w\1|w\1, w1)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b32_32_general_x1, uint32_t,
> +    x0 = x1 + svlastp_b32 (p0, p1));
> +
> +/*
> +** lastp_inc_b32_64_general_x0:
> +** lastp (x[0-9]+), p0, p1\.s
> +** add x0, (x0, \1|\1, x0)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b32_64_general_x0, uint64_t,
> +    x0 += svlastp_b32 (p0, p1));
> +
> +/*
> +** lastp_inc_b32_64_general_x1:
> +** lastp (x[0-9]+), p0, p1\.s
> +** add x0, (x1, \1|\1, x1)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b32_64_general_x1, uint64_t,
> +    x0 = x1 + svlastp_b32 (p0, p1));
> +
> +/*
> +** lastp_dec_b32_32_general_x0:
> +** lastp x([0-9]+), p0, p1\.s
> +** sub w0, w0, w\1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b32_32_general_x0, uint32_t,
> +    x0 -= svlastp_b32 (p0, p1));
> +
> +/*
> +** lastp_dec_b32_32_general_x1:
> +** lastp x([0-9]+), p0, p1\.s
> +** sub w0, w1, w\1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b32_32_general_x1, uint32_t,
> +    x0 = x1 - svlastp_b32 (p0, p1));
> +
> +/*
> +** lastp_dec_b32_64_general_x0:
> +** lastp (x[0-9]+), p0, p1\.s
> +** sub x0, x0, \1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b32_64_general_x0, uint64_t,
> +    x0 -= svlastp_b32 (p0, p1));
> +
> +/*
> +** lastp_dec_b32_64_general_x1:
> +** lastp (x[0-9]+), p0, p1\.s
> +** sub x0, x1, \1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b32_64_general_x1, uint64_t,
> +    x0 = x1 - svlastp_b32 (p0, p1));
> +
> +/*
> +** lastp_inc_b32_u32_general_z0:
> +** lastp x([0-9]+), p0, p1\.s
> +** mov (z[0-9]+\.s), w\1
> +** add z0\.s, (z0\.s, \2|\2, z0\.s)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b32_u32_general_z0, svuint32_t,
> + z0 = svadd_n_u32_x (svptrue_b32 (), z0, svlastp_b32 (p0, p1)),
> + z0 = svadd_x (svptrue_b32 (), z0, svlastp_b32 (p0, p1)));
> +
> +/*
> +** lastp_inc_b32_u32_general_z1:
> +** lastp x([0-9]+), p0, p1\.s
> +** mov (z[0-9]+\.s), w\1
> +** add z0\.s, (z1\.s, \2|\2, z1\.s)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b32_u32_general_z1, svuint32_t,
> + z0 = svadd_n_u32_x (svptrue_b32 (), z1, svlastp_b32 (p0, p1)),
> + z0 = svadd_x (svptrue_b32 (), z1, svlastp_b32 (p0, p1)));
> +
> +/*
> +** lastp_inc_b32_u32_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.s
> +** mov (z[0-9]+\.s), w\2
> +** add z0\.s, (z0\.s, \3|\3, z0\.s)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b32_u32_ptrue_z0, svuint32_t,
> + z0 = svadd_n_u32_x (svptrue_b32 (), z0, svlastp_b32 (svptrue_b32 (), p0)),
> + z0 = svadd_x (svptrue_b32 (), z0, svlastp_b32 (svptrue_b32 (), p0)));
> +
> +/*
> +** lastp_inc_b32_u32_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.s
> +** mov (z[0-9]+\.s), w\2
> +** add z0\.s, (z1\.s, \3|\3, z1\.s)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b32_u32_ptrue_z1, svuint32_t,
> + z0 = svadd_n_u32_x (svptrue_b32 (), z1, svlastp_b32 (svptrue_b32 (), p0)),
> + z0 = svadd_x (svptrue_b32 (), z1, svlastp_b32 (svptrue_b32 (), p0)));
> +
> +/*
> +** lastp_dec_b32_u32_general_z0:
> +** lastp x([0-9]+), p0, p1\.s
> +** mov (z[0-9]+\.s), w\1
> +** sub z0\.s, z0\.s, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b32_u32_general_z0, svuint32_t,
> + z0 = svsub_n_u32_x (svptrue_b32 (), z0, svlastp_b32 (p0, p1)),
> + z0 = svsub_x (svptrue_b32 (), z0, svlastp_b32 (p0, p1)));
> +
> +/*
> +** lastp_dec_b32_u32_general_z1:
> +** lastp x([0-9]+), p0, p1\.s
> +** mov (z[0-9]+\.s), w\1
> +** sub z0\.s, z1\.s, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b32_u32_general_z1, svuint32_t,
> + z0 = svsub_n_u32_x (svptrue_b32 (), z1, svlastp_b32 (p0, p1)),
> + z0 = svsub_x (svptrue_b32 (), z1, svlastp_b32 (p0, p1)));
> +
> +/*
> +** lastp_dec_b32_u32_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.s
> +** mov (z[0-9]+\.s), w\2
> +** sub z0\.s, z0\.s, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b32_u32_ptrue_z0, svuint32_t,
> + z0 = svsub_n_u32_x (svptrue_b32 (), z0, svlastp_b32 (svptrue_b32 (), p0)),
> + z0 = svsub_x (svptrue_b32 (), z0, svlastp_b32 (svptrue_b32 (), p0)));
> +
> +/*
> +** lastp_dec_b32_u32_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.s
> +** mov (z[0-9]+\.s), w\2
> +** sub z0\.s, z1\.s, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b32_u32_ptrue_z1, svuint32_t,
> + z0 = svsub_n_u32_x (svptrue_b32 (), z1, svlastp_b32 (svptrue_b32 (), p0)),
> + z0 = svsub_x (svptrue_b32 (), z1, svlastp_b32 (svptrue_b32 (), p0)));
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b64.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b64.c
> new file mode 100644
> index 00000000000..343be3da9f8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b64.c
> @@ -0,0 +1,192 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-additional-options "-msve-vector-bits=scalable" } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +#include <stdbool.h>
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** lastp_b64_32:
> +** lastp x0, p0, p1\.d
> +** ret
> +*/
> +TEST_PTEST (lastp_b64_32, uint32_t,
> +    x0 = svlastp_b64 (p0, p1));
> +
> +/*
> +** lastp_b64_64:
> +** lastp x0, p0, p1\.d
> +** ret
> +*/
> +TEST_PTEST (lastp_b64_64, uint64_t,
> +    x0 = svlastp_b64 (p0, p1));
> +
> +/*
> +** lastp_inc_b64_32_general_x0:
> +** lastp x([0-9]+), p0, p1\.d
> +** add w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b64_32_general_x0, uint32_t,
> +    x0 += svlastp_b64 (p0, p1));
> +
> +/*
> +** lastp_inc_b64_32_general_x1:
> +** lastp x([0-9]+), p0, p1\.d
> +** add w0, (w1, w\1|w\1, w1)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b64_32_general_x1, uint32_t,
> +    x0 = x1 + svlastp_b64 (p0, p1));
> +
> +/*
> +** lastp_inc_b64_64_general_x0:
> +** lastp (x[0-9]+), p0, p1\.d
> +** add x0, (x0, \1|\1, x0)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b64_64_general_x0, uint64_t,
> +    x0 += svlastp_b64 (p0, p1));
> +
> +/*
> +** lastp_inc_b64_64_general_x1:
> +** lastp (x[0-9]+), p0, p1\.d
> +** add x0, (x1, \1|\1, x1)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b64_64_general_x1, uint64_t,
> +    x0 = x1 + svlastp_b64 (p0, p1));
> +
> +/*
> +** lastp_dec_b64_32_general_x0:
> +** lastp x([0-9]+), p0, p1\.d
> +** sub w0, w0, w\1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b64_32_general_x0, uint32_t,
> +    x0 -= svlastp_b64 (p0, p1));
> +
> +/*
> +** lastp_dec_b64_32_general_x1:
> +** lastp x([0-9]+), p0, p1\.d
> +** sub w0, w1, w\1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b64_32_general_x1, uint32_t,
> +    x0 = x1 - svlastp_b64 (p0, p1));
> +
> +/*
> +** lastp_dec_b64_64_general_x0:
> +** lastp (x[0-9]+), p0, p1\.d
> +** sub x0, x0, \1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b64_64_general_x0, uint64_t,
> +    x0 -= svlastp_b64 (p0, p1));
> +
> +/*
> +** lastp_dec_b64_64_general_x1:
> +** lastp (x[0-9]+), p0, p1\.d
> +** sub x0, x1, \1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b64_64_general_x1, uint64_t,
> +    x0 = x1 - svlastp_b64 (p0, p1));
> +
> +/*
> +** lastp_inc_b64_u64_general_z0:
> +** lastp (x[0-9]+), p0, p1\.d
> +** mov (z[0-9]+\.d), \1
> +** add z0\.d, (z0\.d, \2|\2, z0\.d)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b64_u64_general_z0, svuint64_t,
> + z0 = svadd_n_u64_x (svptrue_b64 (), z0, svlastp_b64 (p0, p1)),
> + z0 = svadd_x (svptrue_b64 (), z0, svlastp_b64 (p0, p1)));
> +
> +/*
> +** lastp_inc_b64_u64_general_z1:
> +** lastp (x[0-9]+), p0, p1\.d
> +** mov (z[0-9]+\.d), \1
> +** add z0\.d, (z1\.d, \2|\2, z1\.d)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b64_u64_general_z1, svuint64_t,
> + z0 = svadd_n_u64_x (svptrue_b64 (), z1, svlastp_b64 (p0, p1)),
> + z0 = svadd_x (svptrue_b64 (), z1, svlastp_b64 (p0, p1)));
> +
> +/*
> +** lastp_inc_b64_u64_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** lastp (x[0-9]+), \1, p0\.d
> +** mov (z[0-9]+\.d), \2
> +** add z0\.d, (z0\.d, \3|\3, z0\.d)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b64_u64_ptrue_z0, svuint64_t,
> + z0 = svadd_n_u64_x (svptrue_b64 (), z0, svlastp_b64 (svptrue_b64 (), p0)),
> + z0 = svadd_x (svptrue_b64 (), z0, svlastp_b64 (svptrue_b64 (), p0)));
> +
> +/*
> +** lastp_inc_b64_u64_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** lastp (x[0-9]+), \1, p0\.d
> +** mov (z[0-9]+\.d), \2
> +** add z0\.d, (z1\.d, \3|\3, z1\.d)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b64_u64_ptrue_z1, svuint64_t,
> + z0 = svadd_n_u64_x (svptrue_b64 (), z1, svlastp_b64 (svptrue_b64 (), p0)),
> + z0 = svadd_x (svptrue_b64 (), z1, svlastp_b64 (svptrue_b64 (), p0)));
> +
> +/*
> +** lastp_dec_b64_u64_general_z0:
> +** lastp (x[0-9]+), p0, p1\.d
> +** mov (z[0-9]+\.d), \1
> +** sub z0\.d, z0\.d, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b64_u64_general_z0, svuint64_t,
> + z0 = svsub_n_u64_x (svptrue_b64 (), z0, svlastp_b64 (p0, p1)),
> + z0 = svsub_x (svptrue_b64 (), z0, svlastp_b64 (p0, p1)));
> +
> +/*
> +** lastp_dec_b64_u64_general_z1:
> +** lastp (x[0-9]+), p0, p1\.d
> +** mov (z[0-9]+\.d), \1
> +** sub z0\.d, z1\.d, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b64_u64_general_z1, svuint64_t,
> + z0 = svsub_n_u64_x (svptrue_b64 (), z1, svlastp_b64 (p0, p1)),
> + z0 = svsub_x (svptrue_b64 (), z1, svlastp_b64 (p0, p1)));
> +
> +/*
> +** lastp_dec_b64_u64_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** lastp (x[0-9]+), \1, p0\.d
> +** mov (z[0-9]+\.d), \2
> +** sub z0\.d, z0\.d, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b64_u64_ptrue_z0, svuint64_t,
> + z0 = svsub_n_u64_x (svptrue_b64 (), z0, svlastp_b64 (svptrue_b64 (), p0)),
> + z0 = svsub_x (svptrue_b64 (), z0, svlastp_b64 (svptrue_b64 (), p0)));
> +
> +/*
> +** lastp_dec_b64_u64_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** lastp (x[0-9]+), \1, p0\.d
> +** mov (z[0-9]+\.d), \2
> +** sub z0\.d, z1\.d, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b64_u64_ptrue_z1, svuint64_t,
> + z0 = svsub_n_u64_x (svptrue_b64 (), z1, svlastp_b64 (svptrue_b64 (), p0)),
> + z0 = svsub_x (svptrue_b64 (), z1, svlastp_b64 (svptrue_b64 (), p0)));
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b8.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b8.c
> new file mode 100644
> index 00000000000..5fa0f26f5b9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b8.c
> @@ -0,0 +1,192 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-additional-options "-msve-vector-bits=scalable" } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +#include <stdbool.h>
> +
> +#pragma GCC target "+sve2p2"
> +#ifdef STREAMING_COMPATIBLE
> +#pragma GCC target "+sme2p2"
> +#endif
> +
> +/*
> +** lastp_b8_32:
> +** lastp x0, p0, p1\.b
> +** ret
> +*/
> +TEST_PTEST (lastp_b8_32, uint32_t,
> +    x0 = svlastp_b8 (p0, p1));
> +
> +/*
> +** lastp_b8_64:
> +** lastp x0, p0, p1\.b
> +** ret
> +*/
> +TEST_PTEST (lastp_b8_64, uint64_t,
> +    x0 = svlastp_b8 (p0, p1));
> +
> +/*
> +** lastp_inc_b8_32_general_x0:
> +** lastp x([0-9]+), p0, p1\.b
> +** add w0, (w0, w\1|w\1, w0)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b8_32_general_x0, uint32_t,
> +    x0 += svlastp_b8 (p0, p1));
> +
> +/*
> +** lastp_inc_b8_32_general_x1:
> +** lastp x([0-9]+), p0, p1\.b
> +** add w0, (w1, w\1|w\1, w1)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b8_32_general_x1, uint32_t,
> +    x0 = x1 + svlastp_b8 (p0, p1));
> +
> +/*
> +** lastp_inc_b8_64_general_x0:
> +** lastp (x[0-9]+), p0, p1\.b
> +** add x0, (x0, \1|\1, x0)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b8_64_general_x0, uint64_t,
> +    x0 += svlastp_b8 (p0, p1));
> +
> +/*
> +** lastp_inc_b8_64_general_x1:
> +** lastp (x[0-9]+), p0, p1\.b
> +** add x0, (x1, \1|\1, x1)
> +** ret
> +*/
> +TEST_PTEST (lastp_inc_b8_64_general_x1, uint64_t,
> +    x0 = x1 + svlastp_b8 (p0, p1));
> +
> +/*
> +** lastp_dec_b8_32_general_x0:
> +** lastp x([0-9]+), p0, p1\.b
> +** sub w0, w0, w\1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b8_32_general_x0, uint32_t,
> +    x0 -= svlastp_b8 (p0, p1));
> +
> +/*
> +** lastp_dec_b8_32_general_x1:
> +** lastp x([0-9]+), p0, p1\.b
> +** sub w0, w1, w\1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b8_32_general_x1, uint32_t,
> +    x0 = x1 - svlastp_b8 (p0, p1));
> +
> +/*
> +** lastp_dec_b8_64_general_x0:
> +** lastp (x[0-9]+), p0, p1\.b
> +** sub x0, x0, \1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b8_64_general_x0, uint64_t,
> +    x0 -= svlastp_b8 (p0, p1));
> +
> +/*
> +** lastp_dec_b8_64_general_x1:
> +** lastp (x[0-9]+), p0, p1\.b
> +** sub x0, x1, \1
> +** ret
> +*/
> +TEST_PTEST (lastp_dec_b8_64_general_x1, uint64_t,
> +    x0 = x1 - svlastp_b8 (p0, p1));
> +
> +/*
> +** lastp_inc_b8_u8_general_z0:
> +** lastp x([0-9]+), p0, p1\.b
> +** mov (z[0-9]+\.b), w\1
> +** add z0\.b, (z0\.b, \2|\2, z0\.b)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b8_u8_general_z0, svuint8_t,
> + z0 = svadd_n_u8_x (svptrue_b8 (), z0, svlastp_b8 (p0, p1)),
> + z0 = svadd_x (svptrue_b8 (), z0, svlastp_b8 (p0, p1)));
> +
> +/*
> +** lastp_inc_b8_u8_general_z1:
> +** lastp x([0-9]+), p0, p1\.b
> +** mov (z[0-9]+\.b), w\1
> +** add z0\.b, (z1\.b, \2|\2, z1\.b)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b8_u8_general_z1, svuint8_t,
> + z0 = svadd_n_u8_x (svptrue_b8 (), z1, svlastp_b8 (p0, p1)),
> + z0 = svadd_x (svptrue_b8 (), z1, svlastp_b8 (p0, p1)));
> +
> +/*
> +** lastp_inc_b8_u8_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.b
> +** mov (z[0-9]+\.b), w\2
> +** add z0\.b, (z0\.b, \3|\3, z0\.b)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b8_u8_ptrue_z0, svuint8_t,
> + z0 = svadd_n_u8_x (svptrue_b8 (), z0, svlastp_b8 (svptrue_b8 (), p0)),
> + z0 = svadd_x (svptrue_b8 (), z0, svlastp_b8 (svptrue_b8 (), p0)));
> +
> +/*
> +** lastp_inc_b8_u8_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.b
> +** mov (z[0-9]+\.b), w\2
> +** add z0\.b, (z1\.b, \3|\3, z1\.b)
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_inc_b8_u8_ptrue_z1, svuint8_t,
> + z0 = svadd_n_u8_x (svptrue_b8 (), z1, svlastp_b8 (svptrue_b8 (), p0)),
> + z0 = svadd_x (svptrue_b8 (), z1, svlastp_b8 (svptrue_b8 (), p0)));
> +
> +/*
> +** lastp_dec_b8_u8_general_z0:
> +** lastp x([0-9]+), p0, p1\.b
> +** mov (z[0-9]+\.b), w\1
> +** sub z0\.b, z0\.b, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b8_u8_general_z0, svuint8_t,
> + z0 = svsub_n_u8_x (svptrue_b8 (), z0, svlastp_b8 (p0, p1)),
> + z0 = svsub_x (svptrue_b8 (), z0, svlastp_b8 (p0, p1)));
> +
> +/*
> +** lastp_dec_b8_u8_general_z1:
> +** lastp x([0-9]+), p0, p1\.b
> +** mov (z[0-9]+\.b), w\1
> +** sub z0\.b, z1\.b, \2
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b8_u8_general_z1, svuint8_t,
> + z0 = svsub_n_u8_x (svptrue_b8 (), z1, svlastp_b8 (p0, p1)),
> + z0 = svsub_x (svptrue_b8 (), z1, svlastp_b8 (p0, p1)));
> +
> +/*
> +** lastp_dec_b8_u8_ptrue_z0:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.b
> +** mov (z[0-9]+\.b), w\2
> +** sub z0\.b, z0\.b, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b8_u8_ptrue_z0, svuint8_t,
> + z0 = svsub_n_u8_x (svptrue_b8 (), z0, svlastp_b8 (svptrue_b8 (), p0)),
> + z0 = svsub_x (svptrue_b8 (), z0, svlastp_b8 (svptrue_b8 (), p0)));
> +
> +/*
> +** lastp_dec_b8_u8_ptrue_z1:
> +** ptrue (p[0-7])\.b, all
> +** lastp x([0-9]+), \1, p0\.b
> +** mov (z[0-9]+\.b), w\2
> +** sub z0\.b, z1\.b, \3
> +** ret
> +*/
> +TEST_UNIFORM_Z (lastp_dec_b8_u8_ptrue_z1, svuint8_t,
> + z0 = svsub_n_u8_x (svptrue_b8 (), z1, svlastp_b8 (svptrue_b8 (), p0)),
> + z0 = svsub_x (svptrue_b8 (), z1, svlastp_b8 (svptrue_b8 (), p0)));
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/firstp.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/firstp.c
> new file mode 100644
> index 00000000000..c61a308bc89
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/firstp.c
> @@ -0,0 +1,212 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include <arm_sve.h>
> +
> +#pragma GCC target "+sve2p2"
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +/*
> +** test1:
> +** mov x0, 0 
> +** ret
> +*/
> +uint64_t
> +test1 ()
> +{
> +  return svfirstp_b8 (svptrue_b8 (),
> +      svptrue_b8 ());
> +}
> +
> +/*
> +** test2:
> +** mov x0, -1 
> +** ret
> +*/
> +uint64_t
> +test2 ()
> +{
> +  return svfirstp_b8 (svpfalse_b (),
> +      svptrue_b8 ());
> +}
> +
> +/*
> +** test3:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test3 ()
> +{
> +  return svfirstp_b8 (svptrue_b8 (),
> +      svpfalse_b ());
> +}
> +
> +/*
> +** test4:
> +** mov x0, 15 
> +** ret
> +*/
> +uint64_t
> +test4 ()
> +{
> +  return svfirstp_b8 (svdupq_n_b8 (false, false, false, false,
> +   false, false, false, false,
> +   false, false, false, false,
> +   false, false, false, true),
> +      svptrue_b8 ());
> +}
> +
> +/*
> +** test5:
> +** mov x0, 0 
> +** ret
> +*/
> +uint64_t
> +test5 ()
> +{
> +  return svfirstp_b16 (svptrue_b16 (),
> +       svptrue_b16 ());
> +}
> +
> +/*
> +** test6:
> +** mov x0, -1 
> +** ret
> +*/
> +uint64_t
> +test6 ()
> +{
> +  return svfirstp_b16 (svpfalse_b (),
> +       svptrue_b16 ());
> +}
> +
> +/*
> +** test7:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test7 ()
> +{
> +  return svfirstp_b16 (svptrue_b16 (),
> +       svpfalse_b ());
> +}
> +
> +/*
> +** test8:
> +** mov x0, 7 
> +** ret
> +*/
> +uint64_t
> +test8 ()
> +{
> +  return svfirstp_b16 (svdupq_n_b16 (false, false, false, false,
> +     false, false, false, true),
> +       svptrue_b16 ());
> +}
> +
> +/*
> +** test9:
> +** mov x0, 0 
> +** ret
> +*/
> +uint64_t
> +test9 ()
> +{
> +  return svfirstp_b32 (svptrue_b32 (),
> +       svptrue_b32 ());
> +}
> +
> +/*
> +** test10:
> +** mov x0, -1 
> +** ret
> +*/
> +uint64_t
> +test10 ()
> +{
> +  return svfirstp_b32 (svpfalse_b (),
> +       svptrue_b32 ());
> +}
> +
> +/*
> +** test11:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test11 ()
> +{
> +  return svfirstp_b32 (svptrue_b32 (),
> +       svpfalse_b ());
> +}
> +
> +/*
> +** test12:
> +** mov x0, 3 
> +** ret
> +*/
> +uint64_t
> +test12 ()
> +{
> +  return svfirstp_b32 (svdupq_n_b32 (false, false, false, true),
> +       svptrue_b32 ());
> +}
> +
> +/*
> +** test13:
> +** mov x0, 0 
> +** ret
> +*/
> +uint64_t
> +test13 ()
> +{
> +  return svfirstp_b64 (svptrue_b64 (),
> +       svptrue_b64 ());
> +}
> +
> +/*
> +** test14:
> +** mov x0, -1 
> +** ret
> +*/
> +uint64_t
> +test14 ()
> +{
> +  return svfirstp_b64 (svpfalse_b (),
> +       svptrue_b64 ());
> +}
> +
> +/*
> +** test15:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test15 ()
> +{
> +  return svfirstp_b64 (svptrue_b64 (),
> +       svpfalse_b ());
> +}
> +
> +/*
> +** test16:
> +** mov x0, 1 
> +** ret
> +*/
> +uint64_t
> +test16 ()
> +{
> +  return svfirstp_b64 (svdupq_n_b64 (false, true),
> +       svptrue_b64 ());
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/lastp.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/lastp.c
> new file mode 100644
> index 00000000000..2dbb65d798d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/lastp.c
> @@ -0,0 +1,212 @@
> +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
> +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
> +/* { dg-options "-O2 -msve-vector-bits=256" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include <arm_sve.h>
> +
> +#pragma GCC target "+sve2p2"
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +/*
> +** test1:
> +** mov x0, 31 
> +** ret
> +*/
> +uint64_t
> +test1 ()
> +{
> +  return svlastp_b8 (svptrue_b8 (),
> +     svptrue_b8 ());
> +}
> +
> +/*
> +** test2:
> +** mov x0, -1 
> +** ret
> +*/
> +uint64_t
> +test2 ()
> +{
> +  return svlastp_b8 (svpfalse_b (),
> +     svptrue_b8 ());
> +}
> +
> +/*
> +** test3:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test3 ()
> +{
> +  return svlastp_b8 (svptrue_b8 (),
> +     svpfalse_b ());
> +}
> +
> +/*
> +** test4:
> +** mov x0, 31 
> +** ret
> +*/
> +uint64_t
> +test4 ()
> +{
> +  return svlastp_b8 (svdupq_n_b8 (false, false, false, false,
> +   false, false, false, false,
> +   false, false, false, false,
> +   false, false, false, true),
> +     svptrue_b8 ());
> +}
> +
> +/*
> +** test5:
> +** mov x0, 15 
> +** ret
> +*/
> +uint64_t
> +test5 ()
> +{
> +  return svlastp_b16 (svptrue_b16 (),
> +      svptrue_b16 ());
> +}
> +
> +/*
> +** test6:
> +** mov x0, -1 
> +** ret
> +*/
> +uint64_t
> +test6 ()
> +{
> +  return svlastp_b16 (svpfalse_b (),
> +      svptrue_b16 ());
> +}
> +
> +/*
> +** test7:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test7 ()
> +{
> +  return svlastp_b16 (svptrue_b16 (),
> +      svpfalse_b ());
> +}
> +
> +/*
> +** test8:
> +** mov x0, 15 
> +** ret
> +*/
> +uint64_t
> +test8 ()
> +{
> +  return svlastp_b16 (svdupq_n_b16 (false, false, false, false,
> +    false, false, false, true),
> +      svptrue_b16 ());
> +}
> +
> +/*
> +** test9:
> +** mov x0, 7 
> +** ret
> +*/
> +uint64_t
> +test9 ()
> +{
> +  return svlastp_b32 (svptrue_b32 (),
> +      svptrue_b32 ());
> +}
> +
> +/*
> +** test10:
> +** mov x0, -1 
> +** ret
> +*/
> +uint64_t
> +test10 ()
> +{
> +  return svlastp_b32 (svpfalse_b (),
> +      svptrue_b32 ());
> +}
> +
> +/*
> +** test11:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test11 ()
> +{
> +  return svlastp_b32 (svptrue_b32 (),
> +      svpfalse_b ());
> +}
> +
> +/*
> +** test12:
> +** mov x0, 7 
> +** ret
> +*/
> +uint64_t
> +test12 ()
> +{
> +  return svlastp_b32 (svdupq_n_b32 (false, false, false, true),
> +      svptrue_b32 ());
> +}
> +
> +/*
> +** test13:
> +** mov x0, 3 
> +** ret
> +*/
> +uint64_t
> +test13 ()
> +{
> +  return svlastp_b64 (svptrue_b64 (),
> +      svptrue_b64 ());
> +}
> +
> +/*
> +** test14:
> +** mov x0, -1 
> +** ret
> +*/
> +uint64_t
> +test14 ()
> +{
> +  return svlastp_b64 (svpfalse_b (),
> +      svptrue_b64 ());
> +}
> +
> +/*
> +** test15:
> +** mov x0, -1
> +** ret
> +*/
> +uint64_t
> +test15 ()
> +{
> +  return svlastp_b64 (svptrue_b64 (),
> +      svpfalse_b ());
> +}
> +
> +/*
> +** test16:
> +** mov x0, 3
> +** ret
> +*/
> +uint64_t
> +test16 ()
> +{
> +  return svlastp_b64 (svdupq_n_b64 (false, true),
> +      svptrue_b64 ());
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> -- 
> 2.43.0
>

Re: [GCC17 PATCH 10/12] aarch64: implement FIRSTP and LASTP SVE instructions

Reply via email to