This commit implements patterns and intrinsics for these two instructions
new in SVE2.2 (or in streaming mode, SME2.2):
- FIRSTP (Scalar index of first true predicate element (predicated))
- LASTP (Scalar index of last true predicate element (predicated))
The new intrinsics are documented in the ACLE manual [0] and have the
following signatures:
int64_t svfirstp_b{8,16,32,64} (svbool_t pg, svbool_t pn);
int64_t svlastp_b{8,16,32,64} (svbool_t pg, svbool_t pn);
The intrinsics are implemented in the usual way; the new
svfirst_lastp_impl base class is used for both families. The ->fold ()
method implements constant folding except for LASTP under
-msve-vector-bits=scalable.
On the .md side, the pattern for LASTP required creating a new UNSPEC
since the number of elements in an SVE vector is generally unknown, but
the FIRSTP RTL can be expressed in terms of AND, FFS, and PLUS.
Included are standard asm tests (which are heavily based on cntp_* tests
from the sve directory), as well as some general C tests
demonstrating aforementioned optimizations when PG and/or PN are constant
vectors.
[0] https://github.com/ARM-software/acle
gcc/ChangeLog:
* config/aarch64/aarch64-sve-builtins-sve2.cc
(class svfirst_lastp_impl): Define new SVE function base class.
(svfirstp): Define new SVE function base.
(svlastp): Likewise.
* config/aarch64/aarch64-sve-builtins-sve2.def (svfirstp): Define
new SVE function.
(svlastp): Likewise.
* config/aarch64/aarch64-sve-builtins-sve2.h (svfirstp): Declare
new SVE function base.
* config/aarch64/aarch64-sve2.md (@aarch64_pred_firstp<mode>): New
insn pattern.
(@aarch64_pred_lastp<mode>): Likewise.
* config/aarch64/iterators.md (UNSPEC_LASTP): New UNSPEC.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/sve2/acle/asm/firstp_b16.c: New test.
* gcc.target/aarch64/sve2/acle/asm/firstp_b32.c: Likewise.
* gcc.target/aarch64/sve2/acle/asm/firstp_b64.c: Likewise.
* gcc.target/aarch64/sve2/acle/asm/firstp_b8.c: Likewise.
* gcc.target/aarch64/sve2/acle/asm/lastp_b16.c: Likewise.
* gcc.target/aarch64/sve2/acle/asm/lastp_b32.c: Likewise.
* gcc.target/aarch64/sve2/acle/asm/lastp_b64.c: Likewise.
* gcc.target/aarch64/sve2/acle/asm/lastp_b8.c: Likewise.
* gcc.target/aarch64/sve2/acle/general/firstp.c: Likewise.
* gcc.target/aarch64/sve2/acle/general/lastp.c: Likewise.
---
.../aarch64/aarch64-sve-builtins-sve2.cc | 61 +++++
.../aarch64/aarch64-sve-builtins-sve2.def | 2 +
.../aarch64/aarch64-sve-builtins-sve2.h | 2 +
gcc/config/aarch64/aarch64-sve2.md | 36 +++
gcc/config/aarch64/iterators.md | 1 +
.../aarch64/sve2/acle/asm/firstp_b16.c | 192 ++++++++++++++++
.../aarch64/sve2/acle/asm/firstp_b32.c | 192 ++++++++++++++++
.../aarch64/sve2/acle/asm/firstp_b64.c | 192 ++++++++++++++++
.../aarch64/sve2/acle/asm/firstp_b8.c | 192 ++++++++++++++++
.../aarch64/sve2/acle/asm/lastp_b16.c | 192 ++++++++++++++++
.../aarch64/sve2/acle/asm/lastp_b32.c | 192 ++++++++++++++++
.../aarch64/sve2/acle/asm/lastp_b64.c | 192 ++++++++++++++++
.../aarch64/sve2/acle/asm/lastp_b8.c | 192 ++++++++++++++++
.../aarch64/sve2/acle/general/firstp.c | 212 ++++++++++++++++++
.../aarch64/sve2/acle/general/lastp.c | 212 ++++++++++++++++++
15 files changed, 2062 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b16.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b32.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b64.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b8.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b16.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b32.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b64.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b8.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/general/firstp.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/general/lastp.c
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
index d45012e7936..5ea08056ae3 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -291,6 +291,65 @@ public:
}
};
+class svfirst_lastp_impl : public function_base
+{
+public:
+ CONSTEXPR svfirst_lastp_impl (bool first)
+ : m_first (first)
+ {}
+
+ gimple *
+ fold (gimple_folder &f) const override
+ {
+ tree pg = gimple_call_arg (f.call, 0);
+ tree pn = gimple_call_arg (f.call, 1);
+
+ gcc_assert (TYPE_MODE (TREE_TYPE (pg)) == TYPE_MODE (TREE_TYPE (pn)));
+
+ if (is_pfalse (pg) || is_pfalse (pn))
+ return f.fold_call_to (build_minus_one_cst (TREE_TYPE (f.lhs)));
+
+ if (TREE_CODE (pg) != VECTOR_CST
+ || TREE_CODE (pn) != VECTOR_CST)
+ return NULL;
+
+ HOST_WIDE_INT nelts_full_vector = aarch64_fold_sve_cnt_pat (AARCH64_SV_ALL,
+ f.elements_per_vq (0));
+ if (!m_first && nelts_full_vector < 0)
+ return NULL;
+
+ tree pa = fold_build2 (BIT_AND_EXPR, TREE_TYPE (pg), pg, pn);
+ gcc_assert (TREE_CODE (pa) == VECTOR_CST);
+
+ int elt_size = f.type_suffix (0).element_bytes;
+ unsigned int nelts = vector_cst_encoded_nelts (pa);
+ for (unsigned int i = 0; i < nelts; i++)
+ {
+ unsigned int idx = m_first ? i : nelts - 1 - i;
+ if (tree_to_shwi (VECTOR_CST_ENCODED_ELT (pa, idx)) != 0)
+ return f.fold_call_to (build_int_cst (TREE_TYPE (f.lhs),
+ m_first
+ ? i / elt_size
+ : (nelts_full_vector - 1
+ - i / elt_size)));
+ }
+
+ return f.fold_call_to (build_minus_one_cst (TREE_TYPE (f.lhs)));
+ }
+
+ rtx
+ expand (function_expander &e) const override
+ {
+ machine_mode mode = e.vector_mode (0);
+ return e.use_exact_insn (m_first ? code_for_aarch64_pred_firstp (mode)
+ : code_for_aarch64_pred_lastp (mode));
+ }
+
+private:
+ /* True for svfirstp, false for svlastp. */
+ bool m_first;
+};
+
class svld1q_gather_impl : public full_width_access
{
public:
@@ -1023,12 +1082,14 @@ FUNCTION (sveorbt, unspec_based_function,
(UNSPEC_EORBT, UNSPEC_EORBT, -1))
FUNCTION (sveorqv, reduction, (UNSPEC_EORQV, UNSPEC_EORQV, -1))
FUNCTION (sveortb, unspec_based_function, (UNSPEC_EORTB, UNSPEC_EORTB, -1))
FUNCTION (svextq, svextq_impl,)
+FUNCTION (svfirstp, svfirst_lastp_impl, (true))
FUNCTION (svhadd, unspec_based_function, (UNSPEC_SHADD, UNSPEC_UHADD, -1))
FUNCTION (svhsub, unspec_based_function, (UNSPEC_SHSUB, UNSPEC_UHSUB, -1))
FUNCTION (svhistcnt, CODE_FOR_MODE0 (aarch64_sve2_histcnt),)
FUNCTION (svhistseg, CODE_FOR_MODE0 (aarch64_sve2_histseg),)
FUNCTION (svhsubr, unspec_based_function_rotated, (UNSPEC_SHSUB,
UNSPEC_UHSUB, -1))
+FUNCTION (svlastp, svfirst_lastp_impl, (false))
FUNCTION (svld1q_gather, svld1q_gather_impl,)
FUNCTION (svld1udq, svld1uxq_impl, (VNx1DImode))
FUNCTION (svld1uwq, svld1uxq_impl, (VNx1SImode))
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
index d1795c64e8e..6ecfc2a45c1 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
@@ -305,6 +305,8 @@ DEF_SVE_FUNCTION (svcvtlt, unary_convert, cvt_long, z)
DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_narrow, z)
DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, z)
DEF_SVE_FUNCTION (svcvtxnt, unary_convert_narrowt, cvt_narrow_s, z)
+DEF_SVE_FUNCTION (svfirstp, count_pred, all_pred, implicit)
+DEF_SVE_FUNCTION (svlastp, count_pred, all_pred, implicit)
DEF_SVE_FUNCTION (svrint32x, unary, sd_float, mxz)
DEF_SVE_FUNCTION (svrint32z, unary, sd_float, mxz)
DEF_SVE_FUNCTION (svrint64x, unary, sd_float, mxz)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
index 8b1581f8568..b2f2698b880 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
@@ -80,11 +80,13 @@ namespace aarch64_sve
extern const function_base *const sveorqv;
extern const function_base *const sveortb;
extern const function_base *const svextq;
+ extern const function_base *const svfirstp;
extern const function_base *const svhadd;
extern const function_base *const svhistcnt;
extern const function_base *const svhistseg;
extern const function_base *const svhsub;
extern const function_base *const svhsubr;
+ extern const function_base *const svlastp;
extern const function_base *const svld1q_gather;
extern const function_base *const svld1udq;
extern const function_base *const svld1uwq;
diff --git a/gcc/config/aarch64/aarch64-sve2.md
b/gcc/config/aarch64/aarch64-sve2.md
index 69e16571afc..5fc84b79423 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -46,6 +46,7 @@
;; ---- [PRED] Predicate extraction
;; ---- [PRED] Predicate selection
;; ---- [PRED] Predicate count
+;; ---- [PRED] Predicate first/last true element
;;
;; == Uniform unary arithmnetic
;; ---- [FP] General unary arithmetic that maps to unspecs
@@ -721,6 +722,41 @@
[(set_attr "sve_type" "sve_pred_cnt_scalar")]
)
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Predicate first/last true element
+;; -------------------------------------------------------------------------
+;; Includes
+;; - FIRSTP (predicate first true element)
+;; - LASTP (predicate last true element)
+;; -------------------------------------------------------------------------
+
+;; Count the number of set bits in a predicate. Operand 3 is true if
+;; operand 1 is known to be all-true.
+(define_insn "@aarch64_pred_firstp<mode>"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (plus:DI
+ (ffs:DI
+ (and:PRED_ALL
+ (match_operand:PRED_ALL 1 "register_operand" "Upl")
+ (match_operand:PRED_ALL 2 "register_operand" "Upa")))
+ (const_int -1)))]
+ "TARGET_SVE2p2_OR_SME2p2"
+ "firstp\t%x0, %1, %2.<Vetype>"
+ [(set_attr "sve_type" "sve_pred_cnt_scalar")]
+)
+
+;; Count the number of set bits in a predicate. Operand 3 is true if
+;; operand 1 is known to be all-true.
+(define_insn "@aarch64_pred_lastp<mode>"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI [(match_operand:PRED_ALL 1 "register_operand" "Upl")
+ (match_operand:PRED_ALL 2 "register_operand" "Upa")]
+ UNSPEC_LASTP))]
+ "TARGET_SVE2p2_OR_SME2p2"
+ "lastp\t%x0, %1, %2.<Vetype>"
+ [(set_attr "sve_type" "sve_pred_cnt_scalar")]
+)
+
;; =========================================================================
;; == Uniform unary arithmnetic
;; =========================================================================
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index b711df60f26..dbf9d6272a8 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1150,6 +1150,7 @@
UNSPEC_FP8FCVTN ; Used in aarch64-sve2.md.
UNSPEC_HISTCNT ; Used in aarch64-sve2.md.
UNSPEC_HISTSEG ; Used in aarch64-sve2.md.
+ UNSPEC_LASTP ; Used in aarch64-sve2.md.
UNSPEC_LD1_COUNT ; Used in aarch64-sve2.md.
UNSPEC_LDNT1_COUNT ; Used in aarch64-sve2.md.
UNSPEC_MATCH ; Used in aarch64-sve2.md.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b16.c
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b16.c
new file mode 100644
index 00000000000..06ea1e1b9ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b16.c
@@ -0,0 +1,192 @@
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-additional-options "-msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+#include <stdbool.h>
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** firstp_b16_32:
+** firstp x0, p0, p1\.h
+** ret
+*/
+TEST_PTEST (firstp_b16_32, uint32_t,
+ x0 = svfirstp_b16 (p0, p1));
+
+/*
+** firstp_b16_64:
+** firstp x0, p0, p1\.h
+** ret
+*/
+TEST_PTEST (firstp_b16_64, uint64_t,
+ x0 = svfirstp_b16 (p0, p1));
+
+/*
+** firstp_inc_b16_32_general_x0:
+** firstp x([0-9]+), p0, p1\.h
+** add w0, (w0, w\1|w\1, w0)
+** ret
+*/
+TEST_PTEST (firstp_inc_b16_32_general_x0, uint32_t,
+ x0 += svfirstp_b16 (p0, p1));
+
+/*
+** firstp_inc_b16_32_general_x1:
+** firstp x([0-9]+), p0, p1\.h
+** add w0, (w1, w\1|w\1, w1)
+** ret
+*/
+TEST_PTEST (firstp_inc_b16_32_general_x1, uint32_t,
+ x0 = x1 + svfirstp_b16 (p0, p1));
+
+/*
+** firstp_inc_b16_64_general_x0:
+** firstp (x[0-9]+), p0, p1\.h
+** add x0, (x0, \1|\1, x0)
+** ret
+*/
+TEST_PTEST (firstp_inc_b16_64_general_x0, uint64_t,
+ x0 += svfirstp_b16 (p0, p1));
+
+/*
+** firstp_inc_b16_64_general_x1:
+** firstp (x[0-9]+), p0, p1\.h
+** add x0, (x1, \1|\1, x1)
+** ret
+*/
+TEST_PTEST (firstp_inc_b16_64_general_x1, uint64_t,
+ x0 = x1 + svfirstp_b16 (p0, p1));
+
+/*
+** firstp_dec_b16_32_general_x0:
+** firstp x([0-9]+), p0, p1\.h
+** sub w0, w0, w\1
+** ret
+*/
+TEST_PTEST (firstp_dec_b16_32_general_x0, uint32_t,
+ x0 -= svfirstp_b16 (p0, p1));
+
+/*
+** firstp_dec_b16_32_general_x1:
+** firstp x([0-9]+), p0, p1\.h
+** sub w0, w1, w\1
+** ret
+*/
+TEST_PTEST (firstp_dec_b16_32_general_x1, uint32_t,
+ x0 = x1 - svfirstp_b16 (p0, p1));
+
+/*
+** firstp_dec_b16_64_general_x0:
+** firstp (x[0-9]+), p0, p1\.h
+** sub x0, x0, \1
+** ret
+*/
+TEST_PTEST (firstp_dec_b16_64_general_x0, uint64_t,
+ x0 -= svfirstp_b16 (p0, p1));
+
+/*
+** firstp_dec_b16_64_general_x1:
+** firstp (x[0-9]+), p0, p1\.h
+** sub x0, x1, \1
+** ret
+*/
+TEST_PTEST (firstp_dec_b16_64_general_x1, uint64_t,
+ x0 = x1 - svfirstp_b16 (p0, p1));
+
+/*
+** firstp_inc_b16_u16_general_z0:
+** firstp x([0-9]+), p0, p1\.h
+** mov (z[0-9]+\.h), w\1
+** add z0\.h, (z0\.h, \2|\2, z0\.h)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b16_u16_general_z0, svuint16_t,
+ z0 = svadd_n_u16_x (svptrue_b16 (), z0, svfirstp_b16 (p0, p1)),
+ z0 = svadd_x (svptrue_b16 (), z0, svfirstp_b16 (p0, p1)));
+
+/*
+** firstp_inc_b16_u16_general_z1:
+** firstp x([0-9]+), p0, p1\.h
+** mov (z[0-9]+\.h), w\1
+** add z0\.h, (z1\.h, \2|\2, z1\.h)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b16_u16_general_z1, svuint16_t,
+ z0 = svadd_n_u16_x (svptrue_b16 (), z1, svfirstp_b16 (p0, p1)),
+ z0 = svadd_x (svptrue_b16 (), z1, svfirstp_b16 (p0, p1)));
+
+/*
+** firstp_inc_b16_u16_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.h
+** mov (z[0-9]+\.h), w\2
+** add z0\.h, (z0\.h, \3|\3, z0\.h)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b16_u16_ptrue_z0, svuint16_t,
+ z0 = svadd_n_u16_x (svptrue_b16 (), z0, svfirstp_b16
(svptrue_b16 (), p0)),
+ z0 = svadd_x (svptrue_b16 (), z0, svfirstp_b16 (svptrue_b16 (),
p0)));
+
+/*
+** firstp_inc_b16_u16_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.h
+** mov (z[0-9]+\.h), w\2
+** add z0\.h, (z1\.h, \3|\3, z1\.h)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b16_u16_ptrue_z1, svuint16_t,
+ z0 = svadd_n_u16_x (svptrue_b16 (), z1, svfirstp_b16
(svptrue_b16 (), p0)),
+ z0 = svadd_x (svptrue_b16 (), z1, svfirstp_b16 (svptrue_b16 (),
p0)));
+
+/*
+** firstp_dec_b16_u16_general_z0:
+** firstp x([0-9]+), p0, p1\.h
+** mov (z[0-9]+\.h), w\1
+** sub z0\.h, z0\.h, \2
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b16_u16_general_z0, svuint16_t,
+ z0 = svsub_n_u16_x (svptrue_b16 (), z0, svfirstp_b16 (p0, p1)),
+ z0 = svsub_x (svptrue_b16 (), z0, svfirstp_b16 (p0, p1)));
+
+/*
+** firstp_dec_b16_u16_general_z1:
+** firstp x([0-9]+), p0, p1\.h
+** mov (z[0-9]+\.h), w\1
+** sub z0\.h, z1\.h, \2
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b16_u16_general_z1, svuint16_t,
+ z0 = svsub_n_u16_x (svptrue_b16 (), z1, svfirstp_b16 (p0, p1)),
+ z0 = svsub_x (svptrue_b16 (), z1, svfirstp_b16 (p0, p1)));
+
+/*
+** firstp_dec_b16_u16_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.h
+** mov (z[0-9]+\.h), w\2
+** sub z0\.h, z0\.h, \3
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b16_u16_ptrue_z0, svuint16_t,
+ z0 = svsub_n_u16_x (svptrue_b16 (), z0, svfirstp_b16
(svptrue_b16 (), p0)),
+ z0 = svsub_x (svptrue_b16 (), z0, svfirstp_b16 (svptrue_b16 (),
p0)));
+
+/*
+** firstp_dec_b16_u16_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.h
+** mov (z[0-9]+\.h), w\2
+** sub z0\.h, z1\.h, \3
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b16_u16_ptrue_z1, svuint16_t,
+ z0 = svsub_n_u16_x (svptrue_b16 (), z1, svfirstp_b16
(svptrue_b16 (), p0)),
+ z0 = svsub_x (svptrue_b16 (), z1, svfirstp_b16 (svptrue_b16 (),
p0)));
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b32.c
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b32.c
new file mode 100644
index 00000000000..668920bba16
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b32.c
@@ -0,0 +1,192 @@
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-additional-options "-msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+#include <stdbool.h>
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** firstp_b32_32:
+** firstp x0, p0, p1\.s
+** ret
+*/
+TEST_PTEST (firstp_b32_32, uint32_t,
+ x0 = svfirstp_b32 (p0, p1));
+
+/*
+** firstp_b32_64:
+** firstp x0, p0, p1\.s
+** ret
+*/
+TEST_PTEST (firstp_b32_64, uint64_t,
+ x0 = svfirstp_b32 (p0, p1));
+
+/*
+** firstp_inc_b32_32_general_x0:
+** firstp x([0-9]+), p0, p1\.s
+** add w0, (w0, w\1|w\1, w0)
+** ret
+*/
+TEST_PTEST (firstp_inc_b32_32_general_x0, uint32_t,
+ x0 += svfirstp_b32 (p0, p1));
+
+/*
+** firstp_inc_b32_32_general_x1:
+** firstp x([0-9]+), p0, p1\.s
+** add w0, (w1, w\1|w\1, w1)
+** ret
+*/
+TEST_PTEST (firstp_inc_b32_32_general_x1, uint32_t,
+ x0 = x1 + svfirstp_b32 (p0, p1));
+
+/*
+** firstp_inc_b32_64_general_x0:
+** firstp (x[0-9]+), p0, p1\.s
+** add x0, (x0, \1|\1, x0)
+** ret
+*/
+TEST_PTEST (firstp_inc_b32_64_general_x0, uint64_t,
+ x0 += svfirstp_b32 (p0, p1));
+
+/*
+** firstp_inc_b32_64_general_x1:
+** firstp (x[0-9]+), p0, p1\.s
+** add x0, (x1, \1|\1, x1)
+** ret
+*/
+TEST_PTEST (firstp_inc_b32_64_general_x1, uint64_t,
+ x0 = x1 + svfirstp_b32 (p0, p1));
+
+/*
+** firstp_dec_b32_32_general_x0:
+** firstp x([0-9]+), p0, p1\.s
+** sub w0, w0, w\1
+** ret
+*/
+TEST_PTEST (firstp_dec_b32_32_general_x0, uint32_t,
+ x0 -= svfirstp_b32 (p0, p1));
+
+/*
+** firstp_dec_b32_32_general_x1:
+** firstp x([0-9]+), p0, p1\.s
+** sub w0, w1, w\1
+** ret
+*/
+TEST_PTEST (firstp_dec_b32_32_general_x1, uint32_t,
+ x0 = x1 - svfirstp_b32 (p0, p1));
+
+/*
+** firstp_dec_b32_64_general_x0:
+** firstp (x[0-9]+), p0, p1\.s
+** sub x0, x0, \1
+** ret
+*/
+TEST_PTEST (firstp_dec_b32_64_general_x0, uint64_t,
+ x0 -= svfirstp_b32 (p0, p1));
+
+/*
+** firstp_dec_b32_64_general_x1:
+** firstp (x[0-9]+), p0, p1\.s
+** sub x0, x1, \1
+** ret
+*/
+TEST_PTEST (firstp_dec_b32_64_general_x1, uint64_t,
+ x0 = x1 - svfirstp_b32 (p0, p1));
+
+/*
+** firstp_inc_b32_u32_general_z0:
+** firstp x([0-9]+), p0, p1\.s
+** mov (z[0-9]+\.s), w\1
+** add z0\.s, (z0\.s, \2|\2, z0\.s)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b32_u32_general_z0, svuint32_t,
+ z0 = svadd_n_u32_x (svptrue_b32 (), z0, svfirstp_b32 (p0, p1)),
+ z0 = svadd_x (svptrue_b32 (), z0, svfirstp_b32 (p0, p1)));
+
+/*
+** firstp_inc_b32_u32_general_z1:
+** firstp x([0-9]+), p0, p1\.s
+** mov (z[0-9]+\.s), w\1
+** add z0\.s, (z1\.s, \2|\2, z1\.s)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b32_u32_general_z1, svuint32_t,
+ z0 = svadd_n_u32_x (svptrue_b32 (), z1, svfirstp_b32 (p0, p1)),
+ z0 = svadd_x (svptrue_b32 (), z1, svfirstp_b32 (p0, p1)));
+
+/*
+** firstp_inc_b32_u32_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.s
+** mov (z[0-9]+\.s), w\2
+** add z0\.s, (z0\.s, \3|\3, z0\.s)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b32_u32_ptrue_z0, svuint32_t,
+ z0 = svadd_n_u32_x (svptrue_b32 (), z0, svfirstp_b32
(svptrue_b32 (), p0)),
+ z0 = svadd_x (svptrue_b32 (), z0, svfirstp_b32 (svptrue_b32 (),
p0)));
+
+/*
+** firstp_inc_b32_u32_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.s
+** mov (z[0-9]+\.s), w\2
+** add z0\.s, (z1\.s, \3|\3, z1\.s)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b32_u32_ptrue_z1, svuint32_t,
+ z0 = svadd_n_u32_x (svptrue_b32 (), z1, svfirstp_b32
(svptrue_b32 (), p0)),
+ z0 = svadd_x (svptrue_b32 (), z1, svfirstp_b32 (svptrue_b32 (),
p0)));
+
+/*
+** firstp_dec_b32_u32_general_z0:
+** firstp x([0-9]+), p0, p1\.s
+** mov (z[0-9]+\.s), w\1
+** sub z0\.s, z0\.s, \2
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b32_u32_general_z0, svuint32_t,
+ z0 = svsub_n_u32_x (svptrue_b32 (), z0, svfirstp_b32 (p0, p1)),
+ z0 = svsub_x (svptrue_b32 (), z0, svfirstp_b32 (p0, p1)));
+
+/*
+** firstp_dec_b32_u32_general_z1:
+** firstp x([0-9]+), p0, p1\.s
+** mov (z[0-9]+\.s), w\1
+** sub z0\.s, z1\.s, \2
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b32_u32_general_z1, svuint32_t,
+ z0 = svsub_n_u32_x (svptrue_b32 (), z1, svfirstp_b32 (p0, p1)),
+ z0 = svsub_x (svptrue_b32 (), z1, svfirstp_b32 (p0, p1)));
+
+/*
+** firstp_dec_b32_u32_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.s
+** mov (z[0-9]+\.s), w\2
+** sub z0\.s, z0\.s, \3
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b32_u32_ptrue_z0, svuint32_t,
+ z0 = svsub_n_u32_x (svptrue_b32 (), z0, svfirstp_b32
(svptrue_b32 (), p0)),
+ z0 = svsub_x (svptrue_b32 (), z0, svfirstp_b32 (svptrue_b32 (),
p0)));
+
+/*
+** firstp_dec_b32_u32_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.s
+** mov (z[0-9]+\.s), w\2
+** sub z0\.s, z1\.s, \3
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b32_u32_ptrue_z1, svuint32_t,
+ z0 = svsub_n_u32_x (svptrue_b32 (), z1, svfirstp_b32
(svptrue_b32 (), p0)),
+ z0 = svsub_x (svptrue_b32 (), z1, svfirstp_b32 (svptrue_b32 (),
p0)));
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b64.c
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b64.c
new file mode 100644
index 00000000000..330b0b04768
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b64.c
@@ -0,0 +1,192 @@
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-additional-options "-msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+#include <stdbool.h>
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** firstp_b64_32:
+** firstp x0, p0, p1\.d
+** ret
+*/
+TEST_PTEST (firstp_b64_32, uint32_t,
+ x0 = svfirstp_b64 (p0, p1));
+
+/*
+** firstp_b64_64:
+** firstp x0, p0, p1\.d
+** ret
+*/
+TEST_PTEST (firstp_b64_64, uint64_t,
+ x0 = svfirstp_b64 (p0, p1));
+
+/*
+** firstp_inc_b64_32_general_x0:
+** firstp x([0-9]+), p0, p1\.d
+** add w0, (w0, w\1|w\1, w0)
+** ret
+*/
+TEST_PTEST (firstp_inc_b64_32_general_x0, uint32_t,
+ x0 += svfirstp_b64 (p0, p1));
+
+/*
+** firstp_inc_b64_32_general_x1:
+** firstp x([0-9]+), p0, p1\.d
+** add w0, (w1, w\1|w\1, w1)
+** ret
+*/
+TEST_PTEST (firstp_inc_b64_32_general_x1, uint32_t,
+ x0 = x1 + svfirstp_b64 (p0, p1));
+
+/*
+** firstp_inc_b64_64_general_x0:
+** firstp (x[0-9]+), p0, p1\.d
+** add x0, (x0, \1|\1, x0)
+** ret
+*/
+TEST_PTEST (firstp_inc_b64_64_general_x0, uint64_t,
+ x0 += svfirstp_b64 (p0, p1));
+
+/*
+** firstp_inc_b64_64_general_x1:
+** firstp (x[0-9]+), p0, p1\.d
+** add x0, (x1, \1|\1, x1)
+** ret
+*/
+TEST_PTEST (firstp_inc_b64_64_general_x1, uint64_t,
+ x0 = x1 + svfirstp_b64 (p0, p1));
+
+/*
+** firstp_dec_b64_32_general_x0:
+** firstp x([0-9]+), p0, p1\.d
+** sub w0, w0, w\1
+** ret
+*/
+TEST_PTEST (firstp_dec_b64_32_general_x0, uint32_t,
+ x0 -= svfirstp_b64 (p0, p1));
+
+/*
+** firstp_dec_b64_32_general_x1:
+** firstp x([0-9]+), p0, p1\.d
+** sub w0, w1, w\1
+** ret
+*/
+TEST_PTEST (firstp_dec_b64_32_general_x1, uint32_t,
+ x0 = x1 - svfirstp_b64 (p0, p1));
+
+/*
+** firstp_dec_b64_64_general_x0:
+** firstp (x[0-9]+), p0, p1\.d
+** sub x0, x0, \1
+** ret
+*/
+TEST_PTEST (firstp_dec_b64_64_general_x0, uint64_t,
+ x0 -= svfirstp_b64 (p0, p1));
+
+/*
+** firstp_dec_b64_64_general_x1:
+** firstp (x[0-9]+), p0, p1\.d
+** sub x0, x1, \1
+** ret
+*/
+TEST_PTEST (firstp_dec_b64_64_general_x1, uint64_t,
+ x0 = x1 - svfirstp_b64 (p0, p1));
+
+/*
+** firstp_inc_b64_u64_general_z0:
+** firstp (x[0-9]+), p0, p1\.d
+** mov (z[0-9]+\.d), \1
+** add z0\.d, (z0\.d, \2|\2, z0\.d)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b64_u64_general_z0, svuint64_t,
+ z0 = svadd_n_u64_x (svptrue_b64 (), z0, svfirstp_b64 (p0, p1)),
+ z0 = svadd_x (svptrue_b64 (), z0, svfirstp_b64 (p0, p1)));
+
+/*
+** firstp_inc_b64_u64_general_z1:
+** firstp (x[0-9]+), p0, p1\.d
+** mov (z[0-9]+\.d), \1
+** add z0\.d, (z1\.d, \2|\2, z1\.d)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b64_u64_general_z1, svuint64_t,
+ z0 = svadd_n_u64_x (svptrue_b64 (), z1, svfirstp_b64 (p0, p1)),
+ z0 = svadd_x (svptrue_b64 (), z1, svfirstp_b64 (p0, p1)));
+
+/*
+** firstp_inc_b64_u64_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** firstp (x[0-9]+), \1, p0\.d
+** mov (z[0-9]+\.d), \2
+** add z0\.d, (z0\.d, \3|\3, z0\.d)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b64_u64_ptrue_z0, svuint64_t,
+ z0 = svadd_n_u64_x (svptrue_b64 (), z0, svfirstp_b64
(svptrue_b64 (), p0)),
+ z0 = svadd_x (svptrue_b64 (), z0, svfirstp_b64 (svptrue_b64 (),
p0)));
+
+/*
+** firstp_inc_b64_u64_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** firstp (x[0-9]+), \1, p0\.d
+** mov (z[0-9]+\.d), \2
+** add z0\.d, (z1\.d, \3|\3, z1\.d)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b64_u64_ptrue_z1, svuint64_t,
+ z0 = svadd_n_u64_x (svptrue_b64 (), z1, svfirstp_b64
(svptrue_b64 (), p0)),
+ z0 = svadd_x (svptrue_b64 (), z1, svfirstp_b64 (svptrue_b64 (),
p0)));
+
+/*
+** firstp_dec_b64_u64_general_z0:
+** firstp (x[0-9]+), p0, p1\.d
+** mov (z[0-9]+\.d), \1
+** sub z0\.d, z0\.d, \2
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b64_u64_general_z0, svuint64_t,
+ z0 = svsub_n_u64_x (svptrue_b64 (), z0, svfirstp_b64 (p0, p1)),
+ z0 = svsub_x (svptrue_b64 (), z0, svfirstp_b64 (p0, p1)));
+
+/*
+** firstp_dec_b64_u64_general_z1:
+** firstp (x[0-9]+), p0, p1\.d
+** mov (z[0-9]+\.d), \1
+** sub z0\.d, z1\.d, \2
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b64_u64_general_z1, svuint64_t,
+ z0 = svsub_n_u64_x (svptrue_b64 (), z1, svfirstp_b64 (p0, p1)),
+ z0 = svsub_x (svptrue_b64 (), z1, svfirstp_b64 (p0, p1)));
+
+/*
+** firstp_dec_b64_u64_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** firstp (x[0-9]+), \1, p0\.d
+** mov (z[0-9]+\.d), \2
+** sub z0\.d, z0\.d, \3
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b64_u64_ptrue_z0, svuint64_t,
+ z0 = svsub_n_u64_x (svptrue_b64 (), z0, svfirstp_b64
(svptrue_b64 (), p0)),
+ z0 = svsub_x (svptrue_b64 (), z0, svfirstp_b64 (svptrue_b64 (),
p0)));
+
+/*
+** firstp_dec_b64_u64_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** firstp (x[0-9]+), \1, p0\.d
+** mov (z[0-9]+\.d), \2
+** sub z0\.d, z1\.d, \3
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b64_u64_ptrue_z1, svuint64_t,
+ z0 = svsub_n_u64_x (svptrue_b64 (), z1, svfirstp_b64
(svptrue_b64 (), p0)),
+ z0 = svsub_x (svptrue_b64 (), z1, svfirstp_b64 (svptrue_b64 (),
p0)));
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b8.c
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b8.c
new file mode 100644
index 00000000000..653d903577a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b8.c
@@ -0,0 +1,192 @@
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-additional-options "-msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+#include <stdbool.h>
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** firstp_b8_32:
+** firstp x0, p0, p1\.b
+** ret
+*/
+TEST_PTEST (firstp_b8_32, uint32_t,
+ x0 = svfirstp_b8 (p0, p1));
+
+/*
+** firstp_b8_64:
+** firstp x0, p0, p1\.b
+** ret
+*/
+TEST_PTEST (firstp_b8_64, uint64_t,
+ x0 = svfirstp_b8 (p0, p1));
+
+/*
+** firstp_inc_b8_32_general_x0:
+** firstp x([0-9]+), p0, p1\.b
+** add w0, (w0, w\1|w\1, w0)
+** ret
+*/
+TEST_PTEST (firstp_inc_b8_32_general_x0, uint32_t,
+ x0 += svfirstp_b8 (p0, p1));
+
+/*
+** firstp_inc_b8_32_general_x1:
+** firstp x([0-9]+), p0, p1\.b
+** add w0, (w1, w\1|w\1, w1)
+** ret
+*/
+TEST_PTEST (firstp_inc_b8_32_general_x1, uint32_t,
+ x0 = x1 + svfirstp_b8 (p0, p1));
+
+/*
+** firstp_inc_b8_64_general_x0:
+** firstp (x[0-9]+), p0, p1\.b
+** add x0, (x0, \1|\1, x0)
+** ret
+*/
+TEST_PTEST (firstp_inc_b8_64_general_x0, uint64_t,
+ x0 += svfirstp_b8 (p0, p1));
+
+/*
+** firstp_inc_b8_64_general_x1:
+** firstp (x[0-9]+), p0, p1\.b
+** add x0, (x1, \1|\1, x1)
+** ret
+*/
+TEST_PTEST (firstp_inc_b8_64_general_x1, uint64_t,
+ x0 = x1 + svfirstp_b8 (p0, p1));
+
+/*
+** firstp_dec_b8_32_general_x0:
+** firstp x([0-9]+), p0, p1\.b
+** sub w0, w0, w\1
+** ret
+*/
+TEST_PTEST (firstp_dec_b8_32_general_x0, uint32_t,
+ x0 -= svfirstp_b8 (p0, p1));
+
+/*
+** firstp_dec_b8_32_general_x1:
+** firstp x([0-9]+), p0, p1\.b
+** sub w0, w1, w\1
+** ret
+*/
+TEST_PTEST (firstp_dec_b8_32_general_x1, uint32_t,
+ x0 = x1 - svfirstp_b8 (p0, p1));
+
+/*
+** firstp_dec_b8_64_general_x0:
+** firstp (x[0-9]+), p0, p1\.b
+** sub x0, x0, \1
+** ret
+*/
+TEST_PTEST (firstp_dec_b8_64_general_x0, uint64_t,
+ x0 -= svfirstp_b8 (p0, p1));
+
+/*
+** firstp_dec_b8_64_general_x1:
+** firstp (x[0-9]+), p0, p1\.b
+** sub x0, x1, \1
+** ret
+*/
+TEST_PTEST (firstp_dec_b8_64_general_x1, uint64_t,
+ x0 = x1 - svfirstp_b8 (p0, p1));
+
+/*
+** firstp_inc_b8_u8_general_z0:
+** firstp x([0-9]+), p0, p1\.b
+** mov (z[0-9]+\.b), w\1
+** add z0\.b, (z0\.b, \2|\2, z0\.b)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b8_u8_general_z0, svuint8_t,
+ z0 = svadd_n_u8_x (svptrue_b8 (), z0, svfirstp_b8 (p0, p1)),
+ z0 = svadd_x (svptrue_b8 (), z0, svfirstp_b8 (p0, p1)));
+
+/*
+** firstp_inc_b8_u8_general_z1:
+** firstp x([0-9]+), p0, p1\.b
+** mov (z[0-9]+\.b), w\1
+** add z0\.b, (z1\.b, \2|\2, z1\.b)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b8_u8_general_z1, svuint8_t,
+ z0 = svadd_n_u8_x (svptrue_b8 (), z1, svfirstp_b8 (p0, p1)),
+ z0 = svadd_x (svptrue_b8 (), z1, svfirstp_b8 (p0, p1)));
+
+/*
+** firstp_inc_b8_u8_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.b
+** mov (z[0-9]+\.b), w\2
+** add z0\.b, (z0\.b, \3|\3, z0\.b)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b8_u8_ptrue_z0, svuint8_t,
+ z0 = svadd_n_u8_x (svptrue_b8 (), z0, svfirstp_b8 (svptrue_b8
(), p0)),
+ z0 = svadd_x (svptrue_b8 (), z0, svfirstp_b8 (svptrue_b8 (),
p0)));
+
+/*
+** firstp_inc_b8_u8_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.b
+** mov (z[0-9]+\.b), w\2
+** add z0\.b, (z1\.b, \3|\3, z1\.b)
+** ret
+*/
+TEST_UNIFORM_Z (firstp_inc_b8_u8_ptrue_z1, svuint8_t,
+ z0 = svadd_n_u8_x (svptrue_b8 (), z1, svfirstp_b8 (svptrue_b8
(), p0)),
+ z0 = svadd_x (svptrue_b8 (), z1, svfirstp_b8 (svptrue_b8 (),
p0)));
+
+/*
+** firstp_dec_b8_u8_general_z0:
+** firstp x([0-9]+), p0, p1\.b
+** mov (z[0-9]+\.b), w\1
+** sub z0\.b, z0\.b, \2
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b8_u8_general_z0, svuint8_t,
+ z0 = svsub_n_u8_x (svptrue_b8 (), z0, svfirstp_b8 (p0, p1)),
+ z0 = svsub_x (svptrue_b8 (), z0, svfirstp_b8 (p0, p1)));
+
+/*
+** firstp_dec_b8_u8_general_z1:
+** firstp x([0-9]+), p0, p1\.b
+** mov (z[0-9]+\.b), w\1
+** sub z0\.b, z1\.b, \2
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b8_u8_general_z1, svuint8_t,
+ z0 = svsub_n_u8_x (svptrue_b8 (), z1, svfirstp_b8 (p0, p1)),
+ z0 = svsub_x (svptrue_b8 (), z1, svfirstp_b8 (p0, p1)));
+
+/*
+** firstp_dec_b8_u8_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.b
+** mov (z[0-9]+\.b), w\2
+** sub z0\.b, z0\.b, \3
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b8_u8_ptrue_z0, svuint8_t,
+ z0 = svsub_n_u8_x (svptrue_b8 (), z0, svfirstp_b8 (svptrue_b8
(), p0)),
+ z0 = svsub_x (svptrue_b8 (), z0, svfirstp_b8 (svptrue_b8 (),
p0)));
+
+/*
+** firstp_dec_b8_u8_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** firstp x([0-9]+), \1, p0\.b
+** mov (z[0-9]+\.b), w\2
+** sub z0\.b, z1\.b, \3
+** ret
+*/
+TEST_UNIFORM_Z (firstp_dec_b8_u8_ptrue_z1, svuint8_t,
+ z0 = svsub_n_u8_x (svptrue_b8 (), z1, svfirstp_b8 (svptrue_b8
(), p0)),
+ z0 = svsub_x (svptrue_b8 (), z1, svfirstp_b8 (svptrue_b8 (),
p0)));
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b16.c
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b16.c
new file mode 100644
index 00000000000..e70df211cf9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b16.c
@@ -0,0 +1,192 @@
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-additional-options "-msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+#include <stdbool.h>
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** lastp_b16_32:
+** lastp x0, p0, p1\.h
+** ret
+*/
+TEST_PTEST (lastp_b16_32, uint32_t,
+ x0 = svlastp_b16 (p0, p1));
+
+/*
+** lastp_b16_64:
+** lastp x0, p0, p1\.h
+** ret
+*/
+TEST_PTEST (lastp_b16_64, uint64_t,
+ x0 = svlastp_b16 (p0, p1));
+
+/*
+** lastp_inc_b16_32_general_x0:
+** lastp x([0-9]+), p0, p1\.h
+** add w0, (w0, w\1|w\1, w0)
+** ret
+*/
+TEST_PTEST (lastp_inc_b16_32_general_x0, uint32_t,
+ x0 += svlastp_b16 (p0, p1));
+
+/*
+** lastp_inc_b16_32_general_x1:
+** lastp x([0-9]+), p0, p1\.h
+** add w0, (w1, w\1|w\1, w1)
+** ret
+*/
+TEST_PTEST (lastp_inc_b16_32_general_x1, uint32_t,
+ x0 = x1 + svlastp_b16 (p0, p1));
+
+/*
+** lastp_inc_b16_64_general_x0:
+** lastp (x[0-9]+), p0, p1\.h
+** add x0, (x0, \1|\1, x0)
+** ret
+*/
+TEST_PTEST (lastp_inc_b16_64_general_x0, uint64_t,
+ x0 += svlastp_b16 (p0, p1));
+
+/*
+** lastp_inc_b16_64_general_x1:
+** lastp (x[0-9]+), p0, p1\.h
+** add x0, (x1, \1|\1, x1)
+** ret
+*/
+TEST_PTEST (lastp_inc_b16_64_general_x1, uint64_t,
+ x0 = x1 + svlastp_b16 (p0, p1));
+
+/*
+** lastp_dec_b16_32_general_x0:
+** lastp x([0-9]+), p0, p1\.h
+** sub w0, w0, w\1
+** ret
+*/
+TEST_PTEST (lastp_dec_b16_32_general_x0, uint32_t,
+ x0 -= svlastp_b16 (p0, p1));
+
+/*
+** lastp_dec_b16_32_general_x1:
+** lastp x([0-9]+), p0, p1\.h
+** sub w0, w1, w\1
+** ret
+*/
+TEST_PTEST (lastp_dec_b16_32_general_x1, uint32_t,
+ x0 = x1 - svlastp_b16 (p0, p1));
+
+/*
+** lastp_dec_b16_64_general_x0:
+** lastp (x[0-9]+), p0, p1\.h
+** sub x0, x0, \1
+** ret
+*/
+TEST_PTEST (lastp_dec_b16_64_general_x0, uint64_t,
+ x0 -= svlastp_b16 (p0, p1));
+
+/*
+** lastp_dec_b16_64_general_x1:
+** lastp (x[0-9]+), p0, p1\.h
+** sub x0, x1, \1
+** ret
+*/
+TEST_PTEST (lastp_dec_b16_64_general_x1, uint64_t,
+ x0 = x1 - svlastp_b16 (p0, p1));
+
+/*
+** lastp_inc_b16_u16_general_z0:
+** lastp x([0-9]+), p0, p1\.h
+** mov (z[0-9]+\.h), w\1
+** add z0\.h, (z0\.h, \2|\2, z0\.h)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b16_u16_general_z0, svuint16_t,
+ z0 = svadd_n_u16_x (svptrue_b16 (), z0, svlastp_b16 (p0, p1)),
+ z0 = svadd_x (svptrue_b16 (), z0, svlastp_b16 (p0, p1)));
+
+/*
+** lastp_inc_b16_u16_general_z1:
+** lastp x([0-9]+), p0, p1\.h
+** mov (z[0-9]+\.h), w\1
+** add z0\.h, (z1\.h, \2|\2, z1\.h)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b16_u16_general_z1, svuint16_t,
+ z0 = svadd_n_u16_x (svptrue_b16 (), z1, svlastp_b16 (p0, p1)),
+ z0 = svadd_x (svptrue_b16 (), z1, svlastp_b16 (p0, p1)));
+
+/*
+** lastp_inc_b16_u16_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.h
+** mov (z[0-9]+\.h), w\2
+** add z0\.h, (z0\.h, \3|\3, z0\.h)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b16_u16_ptrue_z0, svuint16_t,
+ z0 = svadd_n_u16_x (svptrue_b16 (), z0, svlastp_b16
(svptrue_b16 (), p0)),
+ z0 = svadd_x (svptrue_b16 (), z0, svlastp_b16 (svptrue_b16 (),
p0)));
+
+/*
+** lastp_inc_b16_u16_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.h
+** mov (z[0-9]+\.h), w\2
+** add z0\.h, (z1\.h, \3|\3, z1\.h)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b16_u16_ptrue_z1, svuint16_t,
+ z0 = svadd_n_u16_x (svptrue_b16 (), z1, svlastp_b16
(svptrue_b16 (), p0)),
+ z0 = svadd_x (svptrue_b16 (), z1, svlastp_b16 (svptrue_b16 (),
p0)));
+
+/*
+** lastp_dec_b16_u16_general_z0:
+** lastp x([0-9]+), p0, p1\.h
+** mov (z[0-9]+\.h), w\1
+** sub z0\.h, z0\.h, \2
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b16_u16_general_z0, svuint16_t,
+ z0 = svsub_n_u16_x (svptrue_b16 (), z0, svlastp_b16 (p0, p1)),
+ z0 = svsub_x (svptrue_b16 (), z0, svlastp_b16 (p0, p1)));
+
+/*
+** lastp_dec_b16_u16_general_z1:
+** lastp x([0-9]+), p0, p1\.h
+** mov (z[0-9]+\.h), w\1
+** sub z0\.h, z1\.h, \2
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b16_u16_general_z1, svuint16_t,
+ z0 = svsub_n_u16_x (svptrue_b16 (), z1, svlastp_b16 (p0, p1)),
+ z0 = svsub_x (svptrue_b16 (), z1, svlastp_b16 (p0, p1)));
+
+/*
+** lastp_dec_b16_u16_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.h
+** mov (z[0-9]+\.h), w\2
+** sub z0\.h, z0\.h, \3
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b16_u16_ptrue_z0, svuint16_t,
+ z0 = svsub_n_u16_x (svptrue_b16 (), z0, svlastp_b16
(svptrue_b16 (), p0)),
+ z0 = svsub_x (svptrue_b16 (), z0, svlastp_b16 (svptrue_b16 (),
p0)));
+
+/*
+** lastp_dec_b16_u16_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.h
+** mov (z[0-9]+\.h), w\2
+** sub z0\.h, z1\.h, \3
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b16_u16_ptrue_z1, svuint16_t,
+ z0 = svsub_n_u16_x (svptrue_b16 (), z1, svlastp_b16
(svptrue_b16 (), p0)),
+ z0 = svsub_x (svptrue_b16 (), z1, svlastp_b16 (svptrue_b16 (),
p0)));
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b32.c
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b32.c
new file mode 100644
index 00000000000..b5b64407f7b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b32.c
@@ -0,0 +1,192 @@
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-additional-options "-msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+#include <stdbool.h>
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** lastp_b32_32:
+** lastp x0, p0, p1\.s
+** ret
+*/
+TEST_PTEST (lastp_b32_32, uint32_t,
+ x0 = svlastp_b32 (p0, p1));
+
+/*
+** lastp_b32_64:
+** lastp x0, p0, p1\.s
+** ret
+*/
+TEST_PTEST (lastp_b32_64, uint64_t,
+ x0 = svlastp_b32 (p0, p1));
+
+/*
+** lastp_inc_b32_32_general_x0:
+** lastp x([0-9]+), p0, p1\.s
+** add w0, (w0, w\1|w\1, w0)
+** ret
+*/
+TEST_PTEST (lastp_inc_b32_32_general_x0, uint32_t,
+ x0 += svlastp_b32 (p0, p1));
+
+/*
+** lastp_inc_b32_32_general_x1:
+** lastp x([0-9]+), p0, p1\.s
+** add w0, (w1, w\1|w\1, w1)
+** ret
+*/
+TEST_PTEST (lastp_inc_b32_32_general_x1, uint32_t,
+ x0 = x1 + svlastp_b32 (p0, p1));
+
+/*
+** lastp_inc_b32_64_general_x0:
+** lastp (x[0-9]+), p0, p1\.s
+** add x0, (x0, \1|\1, x0)
+** ret
+*/
+TEST_PTEST (lastp_inc_b32_64_general_x0, uint64_t,
+ x0 += svlastp_b32 (p0, p1));
+
+/*
+** lastp_inc_b32_64_general_x1:
+** lastp (x[0-9]+), p0, p1\.s
+** add x0, (x1, \1|\1, x1)
+** ret
+*/
+TEST_PTEST (lastp_inc_b32_64_general_x1, uint64_t,
+ x0 = x1 + svlastp_b32 (p0, p1));
+
+/*
+** lastp_dec_b32_32_general_x0:
+** lastp x([0-9]+), p0, p1\.s
+** sub w0, w0, w\1
+** ret
+*/
+TEST_PTEST (lastp_dec_b32_32_general_x0, uint32_t,
+ x0 -= svlastp_b32 (p0, p1));
+
+/*
+** lastp_dec_b32_32_general_x1:
+** lastp x([0-9]+), p0, p1\.s
+** sub w0, w1, w\1
+** ret
+*/
+TEST_PTEST (lastp_dec_b32_32_general_x1, uint32_t,
+ x0 = x1 - svlastp_b32 (p0, p1));
+
+/*
+** lastp_dec_b32_64_general_x0:
+** lastp (x[0-9]+), p0, p1\.s
+** sub x0, x0, \1
+** ret
+*/
+TEST_PTEST (lastp_dec_b32_64_general_x0, uint64_t,
+ x0 -= svlastp_b32 (p0, p1));
+
+/*
+** lastp_dec_b32_64_general_x1:
+** lastp (x[0-9]+), p0, p1\.s
+** sub x0, x1, \1
+** ret
+*/
+TEST_PTEST (lastp_dec_b32_64_general_x1, uint64_t,
+ x0 = x1 - svlastp_b32 (p0, p1));
+
+/*
+** lastp_inc_b32_u32_general_z0:
+** lastp x([0-9]+), p0, p1\.s
+** mov (z[0-9]+\.s), w\1
+** add z0\.s, (z0\.s, \2|\2, z0\.s)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b32_u32_general_z0, svuint32_t,
+ z0 = svadd_n_u32_x (svptrue_b32 (), z0, svlastp_b32 (p0, p1)),
+ z0 = svadd_x (svptrue_b32 (), z0, svlastp_b32 (p0, p1)));
+
+/*
+** lastp_inc_b32_u32_general_z1:
+** lastp x([0-9]+), p0, p1\.s
+** mov (z[0-9]+\.s), w\1
+** add z0\.s, (z1\.s, \2|\2, z1\.s)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b32_u32_general_z1, svuint32_t,
+ z0 = svadd_n_u32_x (svptrue_b32 (), z1, svlastp_b32 (p0, p1)),
+ z0 = svadd_x (svptrue_b32 (), z1, svlastp_b32 (p0, p1)));
+
+/*
+** lastp_inc_b32_u32_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.s
+** mov (z[0-9]+\.s), w\2
+** add z0\.s, (z0\.s, \3|\3, z0\.s)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b32_u32_ptrue_z0, svuint32_t,
+ z0 = svadd_n_u32_x (svptrue_b32 (), z0, svlastp_b32
(svptrue_b32 (), p0)),
+ z0 = svadd_x (svptrue_b32 (), z0, svlastp_b32 (svptrue_b32 (),
p0)));
+
+/*
+** lastp_inc_b32_u32_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.s
+** mov (z[0-9]+\.s), w\2
+** add z0\.s, (z1\.s, \3|\3, z1\.s)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b32_u32_ptrue_z1, svuint32_t,
+ z0 = svadd_n_u32_x (svptrue_b32 (), z1, svlastp_b32
(svptrue_b32 (), p0)),
+ z0 = svadd_x (svptrue_b32 (), z1, svlastp_b32 (svptrue_b32 (),
p0)));
+
+/*
+** lastp_dec_b32_u32_general_z0:
+** lastp x([0-9]+), p0, p1\.s
+** mov (z[0-9]+\.s), w\1
+** sub z0\.s, z0\.s, \2
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b32_u32_general_z0, svuint32_t,
+ z0 = svsub_n_u32_x (svptrue_b32 (), z0, svlastp_b32 (p0, p1)),
+ z0 = svsub_x (svptrue_b32 (), z0, svlastp_b32 (p0, p1)));
+
+/*
+** lastp_dec_b32_u32_general_z1:
+** lastp x([0-9]+), p0, p1\.s
+** mov (z[0-9]+\.s), w\1
+** sub z0\.s, z1\.s, \2
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b32_u32_general_z1, svuint32_t,
+ z0 = svsub_n_u32_x (svptrue_b32 (), z1, svlastp_b32 (p0, p1)),
+ z0 = svsub_x (svptrue_b32 (), z1, svlastp_b32 (p0, p1)));
+
+/*
+** lastp_dec_b32_u32_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.s
+** mov (z[0-9]+\.s), w\2
+** sub z0\.s, z0\.s, \3
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b32_u32_ptrue_z0, svuint32_t,
+ z0 = svsub_n_u32_x (svptrue_b32 (), z0, svlastp_b32
(svptrue_b32 (), p0)),
+ z0 = svsub_x (svptrue_b32 (), z0, svlastp_b32 (svptrue_b32 (),
p0)));
+
+/*
+** lastp_dec_b32_u32_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.s
+** mov (z[0-9]+\.s), w\2
+** sub z0\.s, z1\.s, \3
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b32_u32_ptrue_z1, svuint32_t,
+ z0 = svsub_n_u32_x (svptrue_b32 (), z1, svlastp_b32
(svptrue_b32 (), p0)),
+ z0 = svsub_x (svptrue_b32 (), z1, svlastp_b32 (svptrue_b32 (),
p0)));
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b64.c
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b64.c
new file mode 100644
index 00000000000..343be3da9f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b64.c
@@ -0,0 +1,192 @@
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-additional-options "-msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+#include <stdbool.h>
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** lastp_b64_32:
+** lastp x0, p0, p1\.d
+** ret
+*/
+TEST_PTEST (lastp_b64_32, uint32_t,
+ x0 = svlastp_b64 (p0, p1));
+
+/*
+** lastp_b64_64:
+** lastp x0, p0, p1\.d
+** ret
+*/
+TEST_PTEST (lastp_b64_64, uint64_t,
+ x0 = svlastp_b64 (p0, p1));
+
+/*
+** lastp_inc_b64_32_general_x0:
+** lastp x([0-9]+), p0, p1\.d
+** add w0, (w0, w\1|w\1, w0)
+** ret
+*/
+TEST_PTEST (lastp_inc_b64_32_general_x0, uint32_t,
+ x0 += svlastp_b64 (p0, p1));
+
+/*
+** lastp_inc_b64_32_general_x1:
+** lastp x([0-9]+), p0, p1\.d
+** add w0, (w1, w\1|w\1, w1)
+** ret
+*/
+TEST_PTEST (lastp_inc_b64_32_general_x1, uint32_t,
+ x0 = x1 + svlastp_b64 (p0, p1));
+
+/*
+** lastp_inc_b64_64_general_x0:
+** lastp (x[0-9]+), p0, p1\.d
+** add x0, (x0, \1|\1, x0)
+** ret
+*/
+TEST_PTEST (lastp_inc_b64_64_general_x0, uint64_t,
+ x0 += svlastp_b64 (p0, p1));
+
+/*
+** lastp_inc_b64_64_general_x1:
+** lastp (x[0-9]+), p0, p1\.d
+** add x0, (x1, \1|\1, x1)
+** ret
+*/
+TEST_PTEST (lastp_inc_b64_64_general_x1, uint64_t,
+ x0 = x1 + svlastp_b64 (p0, p1));
+
+/*
+** lastp_dec_b64_32_general_x0:
+** lastp x([0-9]+), p0, p1\.d
+** sub w0, w0, w\1
+** ret
+*/
+TEST_PTEST (lastp_dec_b64_32_general_x0, uint32_t,
+ x0 -= svlastp_b64 (p0, p1));
+
+/*
+** lastp_dec_b64_32_general_x1:
+** lastp x([0-9]+), p0, p1\.d
+** sub w0, w1, w\1
+** ret
+*/
+TEST_PTEST (lastp_dec_b64_32_general_x1, uint32_t,
+ x0 = x1 - svlastp_b64 (p0, p1));
+
+/*
+** lastp_dec_b64_64_general_x0:
+** lastp (x[0-9]+), p0, p1\.d
+** sub x0, x0, \1
+** ret
+*/
+TEST_PTEST (lastp_dec_b64_64_general_x0, uint64_t,
+ x0 -= svlastp_b64 (p0, p1));
+
+/*
+** lastp_dec_b64_64_general_x1:
+** lastp (x[0-9]+), p0, p1\.d
+** sub x0, x1, \1
+** ret
+*/
+TEST_PTEST (lastp_dec_b64_64_general_x1, uint64_t,
+ x0 = x1 - svlastp_b64 (p0, p1));
+
+/*
+** lastp_inc_b64_u64_general_z0:
+** lastp (x[0-9]+), p0, p1\.d
+** mov (z[0-9]+\.d), \1
+** add z0\.d, (z0\.d, \2|\2, z0\.d)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b64_u64_general_z0, svuint64_t,
+ z0 = svadd_n_u64_x (svptrue_b64 (), z0, svlastp_b64 (p0, p1)),
+ z0 = svadd_x (svptrue_b64 (), z0, svlastp_b64 (p0, p1)));
+
+/*
+** lastp_inc_b64_u64_general_z1:
+** lastp (x[0-9]+), p0, p1\.d
+** mov (z[0-9]+\.d), \1
+** add z0\.d, (z1\.d, \2|\2, z1\.d)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b64_u64_general_z1, svuint64_t,
+ z0 = svadd_n_u64_x (svptrue_b64 (), z1, svlastp_b64 (p0, p1)),
+ z0 = svadd_x (svptrue_b64 (), z1, svlastp_b64 (p0, p1)));
+
+/*
+** lastp_inc_b64_u64_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** lastp (x[0-9]+), \1, p0\.d
+** mov (z[0-9]+\.d), \2
+** add z0\.d, (z0\.d, \3|\3, z0\.d)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b64_u64_ptrue_z0, svuint64_t,
+ z0 = svadd_n_u64_x (svptrue_b64 (), z0, svlastp_b64
(svptrue_b64 (), p0)),
+ z0 = svadd_x (svptrue_b64 (), z0, svlastp_b64 (svptrue_b64 (),
p0)));
+
+/*
+** lastp_inc_b64_u64_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** lastp (x[0-9]+), \1, p0\.d
+** mov (z[0-9]+\.d), \2
+** add z0\.d, (z1\.d, \3|\3, z1\.d)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b64_u64_ptrue_z1, svuint64_t,
+ z0 = svadd_n_u64_x (svptrue_b64 (), z1, svlastp_b64
(svptrue_b64 (), p0)),
+ z0 = svadd_x (svptrue_b64 (), z1, svlastp_b64 (svptrue_b64 (),
p0)));
+
+/*
+** lastp_dec_b64_u64_general_z0:
+** lastp (x[0-9]+), p0, p1\.d
+** mov (z[0-9]+\.d), \1
+** sub z0\.d, z0\.d, \2
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b64_u64_general_z0, svuint64_t,
+ z0 = svsub_n_u64_x (svptrue_b64 (), z0, svlastp_b64 (p0, p1)),
+ z0 = svsub_x (svptrue_b64 (), z0, svlastp_b64 (p0, p1)));
+
+/*
+** lastp_dec_b64_u64_general_z1:
+** lastp (x[0-9]+), p0, p1\.d
+** mov (z[0-9]+\.d), \1
+** sub z0\.d, z1\.d, \2
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b64_u64_general_z1, svuint64_t,
+ z0 = svsub_n_u64_x (svptrue_b64 (), z1, svlastp_b64 (p0, p1)),
+ z0 = svsub_x (svptrue_b64 (), z1, svlastp_b64 (p0, p1)));
+
+/*
+** lastp_dec_b64_u64_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** lastp (x[0-9]+), \1, p0\.d
+** mov (z[0-9]+\.d), \2
+** sub z0\.d, z0\.d, \3
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b64_u64_ptrue_z0, svuint64_t,
+ z0 = svsub_n_u64_x (svptrue_b64 (), z0, svlastp_b64
(svptrue_b64 (), p0)),
+ z0 = svsub_x (svptrue_b64 (), z0, svlastp_b64 (svptrue_b64 (),
p0)));
+
+/*
+** lastp_dec_b64_u64_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** lastp (x[0-9]+), \1, p0\.d
+** mov (z[0-9]+\.d), \2
+** sub z0\.d, z1\.d, \3
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b64_u64_ptrue_z1, svuint64_t,
+ z0 = svsub_n_u64_x (svptrue_b64 (), z1, svlastp_b64
(svptrue_b64 (), p0)),
+ z0 = svsub_x (svptrue_b64 (), z1, svlastp_b64 (svptrue_b64 (),
p0)));
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b8.c
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b8.c
new file mode 100644
index 00000000000..5fa0f26f5b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b8.c
@@ -0,0 +1,192 @@
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-additional-options "-msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+#include <stdbool.h>
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** lastp_b8_32:
+** lastp x0, p0, p1\.b
+** ret
+*/
+TEST_PTEST (lastp_b8_32, uint32_t,
+ x0 = svlastp_b8 (p0, p1));
+
+/*
+** lastp_b8_64:
+** lastp x0, p0, p1\.b
+** ret
+*/
+TEST_PTEST (lastp_b8_64, uint64_t,
+ x0 = svlastp_b8 (p0, p1));
+
+/*
+** lastp_inc_b8_32_general_x0:
+** lastp x([0-9]+), p0, p1\.b
+** add w0, (w0, w\1|w\1, w0)
+** ret
+*/
+TEST_PTEST (lastp_inc_b8_32_general_x0, uint32_t,
+ x0 += svlastp_b8 (p0, p1));
+
+/*
+** lastp_inc_b8_32_general_x1:
+** lastp x([0-9]+), p0, p1\.b
+** add w0, (w1, w\1|w\1, w1)
+** ret
+*/
+TEST_PTEST (lastp_inc_b8_32_general_x1, uint32_t,
+ x0 = x1 + svlastp_b8 (p0, p1));
+
+/*
+** lastp_inc_b8_64_general_x0:
+** lastp (x[0-9]+), p0, p1\.b
+** add x0, (x0, \1|\1, x0)
+** ret
+*/
+TEST_PTEST (lastp_inc_b8_64_general_x0, uint64_t,
+ x0 += svlastp_b8 (p0, p1));
+
+/*
+** lastp_inc_b8_64_general_x1:
+** lastp (x[0-9]+), p0, p1\.b
+** add x0, (x1, \1|\1, x1)
+** ret
+*/
+TEST_PTEST (lastp_inc_b8_64_general_x1, uint64_t,
+ x0 = x1 + svlastp_b8 (p0, p1));
+
+/*
+** lastp_dec_b8_32_general_x0:
+** lastp x([0-9]+), p0, p1\.b
+** sub w0, w0, w\1
+** ret
+*/
+TEST_PTEST (lastp_dec_b8_32_general_x0, uint32_t,
+ x0 -= svlastp_b8 (p0, p1));
+
+/*
+** lastp_dec_b8_32_general_x1:
+** lastp x([0-9]+), p0, p1\.b
+** sub w0, w1, w\1
+** ret
+*/
+TEST_PTEST (lastp_dec_b8_32_general_x1, uint32_t,
+ x0 = x1 - svlastp_b8 (p0, p1));
+
+/*
+** lastp_dec_b8_64_general_x0:
+** lastp (x[0-9]+), p0, p1\.b
+** sub x0, x0, \1
+** ret
+*/
+TEST_PTEST (lastp_dec_b8_64_general_x0, uint64_t,
+ x0 -= svlastp_b8 (p0, p1));
+
+/*
+** lastp_dec_b8_64_general_x1:
+** lastp (x[0-9]+), p0, p1\.b
+** sub x0, x1, \1
+** ret
+*/
+TEST_PTEST (lastp_dec_b8_64_general_x1, uint64_t,
+ x0 = x1 - svlastp_b8 (p0, p1));
+
+/*
+** lastp_inc_b8_u8_general_z0:
+** lastp x([0-9]+), p0, p1\.b
+** mov (z[0-9]+\.b), w\1
+** add z0\.b, (z0\.b, \2|\2, z0\.b)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b8_u8_general_z0, svuint8_t,
+ z0 = svadd_n_u8_x (svptrue_b8 (), z0, svlastp_b8 (p0, p1)),
+ z0 = svadd_x (svptrue_b8 (), z0, svlastp_b8 (p0, p1)));
+
+/*
+** lastp_inc_b8_u8_general_z1:
+** lastp x([0-9]+), p0, p1\.b
+** mov (z[0-9]+\.b), w\1
+** add z0\.b, (z1\.b, \2|\2, z1\.b)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b8_u8_general_z1, svuint8_t,
+ z0 = svadd_n_u8_x (svptrue_b8 (), z1, svlastp_b8 (p0, p1)),
+ z0 = svadd_x (svptrue_b8 (), z1, svlastp_b8 (p0, p1)));
+
+/*
+** lastp_inc_b8_u8_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.b
+** mov (z[0-9]+\.b), w\2
+** add z0\.b, (z0\.b, \3|\3, z0\.b)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b8_u8_ptrue_z0, svuint8_t,
+ z0 = svadd_n_u8_x (svptrue_b8 (), z0, svlastp_b8 (svptrue_b8
(), p0)),
+ z0 = svadd_x (svptrue_b8 (), z0, svlastp_b8 (svptrue_b8 (),
p0)));
+
+/*
+** lastp_inc_b8_u8_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.b
+** mov (z[0-9]+\.b), w\2
+** add z0\.b, (z1\.b, \3|\3, z1\.b)
+** ret
+*/
+TEST_UNIFORM_Z (lastp_inc_b8_u8_ptrue_z1, svuint8_t,
+ z0 = svadd_n_u8_x (svptrue_b8 (), z1, svlastp_b8 (svptrue_b8
(), p0)),
+ z0 = svadd_x (svptrue_b8 (), z1, svlastp_b8 (svptrue_b8 (),
p0)));
+
+/*
+** lastp_dec_b8_u8_general_z0:
+** lastp x([0-9]+), p0, p1\.b
+** mov (z[0-9]+\.b), w\1
+** sub z0\.b, z0\.b, \2
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b8_u8_general_z0, svuint8_t,
+ z0 = svsub_n_u8_x (svptrue_b8 (), z0, svlastp_b8 (p0, p1)),
+ z0 = svsub_x (svptrue_b8 (), z0, svlastp_b8 (p0, p1)));
+
+/*
+** lastp_dec_b8_u8_general_z1:
+** lastp x([0-9]+), p0, p1\.b
+** mov (z[0-9]+\.b), w\1
+** sub z0\.b, z1\.b, \2
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b8_u8_general_z1, svuint8_t,
+ z0 = svsub_n_u8_x (svptrue_b8 (), z1, svlastp_b8 (p0, p1)),
+ z0 = svsub_x (svptrue_b8 (), z1, svlastp_b8 (p0, p1)));
+
+/*
+** lastp_dec_b8_u8_ptrue_z0:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.b
+** mov (z[0-9]+\.b), w\2
+** sub z0\.b, z0\.b, \3
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b8_u8_ptrue_z0, svuint8_t,
+ z0 = svsub_n_u8_x (svptrue_b8 (), z0, svlastp_b8 (svptrue_b8
(), p0)),
+ z0 = svsub_x (svptrue_b8 (), z0, svlastp_b8 (svptrue_b8 (),
p0)));
+
+/*
+** lastp_dec_b8_u8_ptrue_z1:
+** ptrue (p[0-7])\.b, all
+** lastp x([0-9]+), \1, p0\.b
+** mov (z[0-9]+\.b), w\2
+** sub z0\.b, z1\.b, \3
+** ret
+*/
+TEST_UNIFORM_Z (lastp_dec_b8_u8_ptrue_z1, svuint8_t,
+ z0 = svsub_n_u8_x (svptrue_b8 (), z1, svlastp_b8 (svptrue_b8
(), p0)),
+ z0 = svsub_x (svptrue_b8 (), z1, svlastp_b8 (svptrue_b8 (),
p0)));
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/firstp.c
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/firstp.c
new file mode 100644
index 00000000000..c61a308bc89
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/firstp.c
@@ -0,0 +1,212 @@
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sve2p2"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** test1:
+** mov x0, 0
+** ret
+*/
+uint64_t
+test1 ()
+{
+ return svfirstp_b8 (svptrue_b8 (),
+ svptrue_b8 ());
+}
+
+/*
+** test2:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test2 ()
+{
+ return svfirstp_b8 (svpfalse_b (),
+ svptrue_b8 ());
+}
+
+/*
+** test3:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test3 ()
+{
+ return svfirstp_b8 (svptrue_b8 (),
+ svpfalse_b ());
+}
+
+/*
+** test4:
+** mov x0, 15
+** ret
+*/
+uint64_t
+test4 ()
+{
+ return svfirstp_b8 (svdupq_n_b8 (false, false, false, false,
+ false, false, false, false,
+ false, false, false, false,
+ false, false, false, true),
+ svptrue_b8 ());
+}
+
+/*
+** test5:
+** mov x0, 0
+** ret
+*/
+uint64_t
+test5 ()
+{
+ return svfirstp_b16 (svptrue_b16 (),
+ svptrue_b16 ());
+}
+
+/*
+** test6:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test6 ()
+{
+ return svfirstp_b16 (svpfalse_b (),
+ svptrue_b16 ());
+}
+
+/*
+** test7:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test7 ()
+{
+ return svfirstp_b16 (svptrue_b16 (),
+ svpfalse_b ());
+}
+
+/*
+** test8:
+** mov x0, 7
+** ret
+*/
+uint64_t
+test8 ()
+{
+ return svfirstp_b16 (svdupq_n_b16 (false, false, false, false,
+ false, false, false, true),
+ svptrue_b16 ());
+}
+
+/*
+** test9:
+** mov x0, 0
+** ret
+*/
+uint64_t
+test9 ()
+{
+ return svfirstp_b32 (svptrue_b32 (),
+ svptrue_b32 ());
+}
+
+/*
+** test10:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test10 ()
+{
+ return svfirstp_b32 (svpfalse_b (),
+ svptrue_b32 ());
+}
+
+/*
+** test11:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test11 ()
+{
+ return svfirstp_b32 (svptrue_b32 (),
+ svpfalse_b ());
+}
+
+/*
+** test12:
+** mov x0, 3
+** ret
+*/
+uint64_t
+test12 ()
+{
+ return svfirstp_b32 (svdupq_n_b32 (false, false, false, true),
+ svptrue_b32 ());
+}
+
+/*
+** test13:
+** mov x0, 0
+** ret
+*/
+uint64_t
+test13 ()
+{
+ return svfirstp_b64 (svptrue_b64 (),
+ svptrue_b64 ());
+}
+
+/*
+** test14:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test14 ()
+{
+ return svfirstp_b64 (svpfalse_b (),
+ svptrue_b64 ());
+}
+
+/*
+** test15:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test15 ()
+{
+ return svfirstp_b64 (svptrue_b64 (),
+ svpfalse_b ());
+}
+
+/*
+** test16:
+** mov x0, 1
+** ret
+*/
+uint64_t
+test16 ()
+{
+ return svfirstp_b64 (svdupq_n_b64 (false, true),
+ svptrue_b64 ());
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/lastp.c
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/lastp.c
new file mode 100644
index 00000000000..2dbb65d798d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/lastp.c
@@ -0,0 +1,212 @@
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-options "-O2 -msve-vector-bits=256" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sve2p2"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** test1:
+** mov x0, 31
+** ret
+*/
+uint64_t
+test1 ()
+{
+ return svlastp_b8 (svptrue_b8 (),
+ svptrue_b8 ());
+}
+
+/*
+** test2:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test2 ()
+{
+ return svlastp_b8 (svpfalse_b (),
+ svptrue_b8 ());
+}
+
+/*
+** test3:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test3 ()
+{
+ return svlastp_b8 (svptrue_b8 (),
+ svpfalse_b ());
+}
+
+/*
+** test4:
+** mov x0, 31
+** ret
+*/
+uint64_t
+test4 ()
+{
+ return svlastp_b8 (svdupq_n_b8 (false, false, false, false,
+ false, false, false, false,
+ false, false, false, false,
+ false, false, false, true),
+ svptrue_b8 ());
+}
+
+/*
+** test5:
+** mov x0, 15
+** ret
+*/
+uint64_t
+test5 ()
+{
+ return svlastp_b16 (svptrue_b16 (),
+ svptrue_b16 ());
+}
+
+/*
+** test6:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test6 ()
+{
+ return svlastp_b16 (svpfalse_b (),
+ svptrue_b16 ());
+}
+
+/*
+** test7:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test7 ()
+{
+ return svlastp_b16 (svptrue_b16 (),
+ svpfalse_b ());
+}
+
+/*
+** test8:
+** mov x0, 15
+** ret
+*/
+uint64_t
+test8 ()
+{
+ return svlastp_b16 (svdupq_n_b16 (false, false, false, false,
+ false, false, false, true),
+ svptrue_b16 ());
+}
+
+/*
+** test9:
+** mov x0, 7
+** ret
+*/
+uint64_t
+test9 ()
+{
+ return svlastp_b32 (svptrue_b32 (),
+ svptrue_b32 ());
+}
+
+/*
+** test10:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test10 ()
+{
+ return svlastp_b32 (svpfalse_b (),
+ svptrue_b32 ());
+}
+
+/*
+** test11:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test11 ()
+{
+ return svlastp_b32 (svptrue_b32 (),
+ svpfalse_b ());
+}
+
+/*
+** test12:
+** mov x0, 7
+** ret
+*/
+uint64_t
+test12 ()
+{
+ return svlastp_b32 (svdupq_n_b32 (false, false, false, true),
+ svptrue_b32 ());
+}
+
+/*
+** test13:
+** mov x0, 3
+** ret
+*/
+uint64_t
+test13 ()
+{
+ return svlastp_b64 (svptrue_b64 (),
+ svptrue_b64 ());
+}
+
+/*
+** test14:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test14 ()
+{
+ return svlastp_b64 (svpfalse_b (),
+ svptrue_b64 ());
+}
+
+/*
+** test15:
+** mov x0, -1
+** ret
+*/
+uint64_t
+test15 ()
+{
+ return svlastp_b64 (svptrue_b64 (),
+ svpfalse_b ());
+}
+
+/*
+** test16:
+** mov x0, 3
+** ret
+*/
+uint64_t
+test16 ()
+{
+ return svlastp_b64 (svdupq_n_b64 (false, true),
+ svptrue_b64 ());
+}
+
+#ifdef __cplusplus
+}
+#endif
--
2.43.0