From: Karl Meakin <[email protected]>

Port the following intrinsics to the pragma-based framework:
* vand
* vbcax
* vbic
* vbsl
* vcls
* vclz
* vcnt
* veor
* veor3
* vmvn
* vorn
* vorr
* vrax1
* vrbit
* vxar

gcc/ChangeLog:

        * config/aarch64/aarch64.md (UNSPEC_BSL): Delete unspec.
        * config/aarch64/aarch64-simd-pragma-builtins.def (vbsl_mf8,
        vbslq_mf8): Delete functions.
        * config/aarch64/aarch64-neon-builtins-base.cc (build_cast): New
        function.
        (class gimple_not_rhs, class gimple_bsl, class gimple_rbit,
        class gimple_eor3, class gimple_bcax, class gimple_rax1, class
        gimple_xar, class gimple_ifn): New classes.
        (vand, vandq, vbic, vbicq, vbsl, vbslq, veor, veorq, vmvn,
        vmvnq, vorn, vornq, vorr, vorrq, vrbit, vrbitq, vbcaxq, veor3q,
        vrax1q, vxarq, vcls, vclsq, vclz, vclzq, vcnt, vcntq): New
        function bases.
        * config/aarch64/aarch64-neon-builtins-shapes.cc (shift): New function.
        * config/aarch64/aarch64-builtins.cc
        (aarch64_types_bsl_p_qualifiers,
        aarch64_types_bsl_s_qualifiers,
        aarch64_types_bsl_u_qualifiers): Delete unused qualifiers.
        * config/aarch64/aarch64-simd.md
        (@aarch64_rbit<mode><vczle><vczbe>): Add `@` modifier so that
        it is callable from `aarch64-neon-builtins-base.cc`.
        * config/aarch64/aarch64-acle-builtins.h (TYPES_b_neon,
        TYPES_b_poly): New type lists.
        * config/aarch64/aarch64-neon-builtins-base.def (vand, vandq,
        vbic, vbicq, vbsl, vbslq, veor, veorq, vmvn, vmvnq, vorn,
        vornq, vorr, vorrq, vrbit, vrbitq, vbcaxq, veor3q, vrax1q,
        vxarq, vcls, vclsq, vclz, vclzq, vcnt, vcntq): New function
        groups.
        * config/aarch64/aarch64-simd-builtins.def (clrsb, clz, ctz,
        popcount, rbit, simd_bsl): Delete builtin functions.
        * config/aarch64/arm_neon.h (vbsl_f16, vbsl_f32, vbsl_f64,
        vbsl_p8, vbsl_p16, vbsl_p64, vbsl_s8, vbsl_s16, vbsl_s32,
        vbsl_s64, vbsl_u8, vbsl_u16, vbsl_u32, vbsl_u64, vbslq_f16,
        vbslq_f32, vbslq_f64, vbslq_p8, vbslq_p16, vbslq_s8,
        vbslq_s16, vbslq_p64, vbslq_s32, vbslq_s64, vbslq_u8,
        vbslq_u16, vbslq_u32, vbslq_u64, vcls_s8, vcls_s16, vcls_s32,
        vclsq_s8, vclsq_s16, vclsq_s32, vcls_u8, vcls_u16, vcls_u32,
        vclsq_u8, vclsq_u16, vclsq_u32, vclz_s8, vclz_s16, vclz_s32,
        vclz_u8, vclz_u16, vclz_u32, vclzq_s8, vclzq_s16, vclzq_s32,
        vclzq_u8, vclzq_u16, vclzq_u32, vcnt_p8, vcnt_s8, vcnt_u8,
        vcntq_p8, vcntq_s8, vcntq_u8, vrbit_p8, vrbit_s8, vrbit_u8,
        vrbitq_p8, vrbitq_s8, vrbitq_u8, veor3q_u8, veor3q_u16,
        veor3q_u32, veor3q_u64, veor3q_s8, veor3q_s16, veor3q_s32,
        veor3q_s64, vrax1q_u64, vxarq_u64, vbcaxq_u8, vbcaxq_u16,
        vbcaxq_u32, vbcaxq_u64, vbcaxq_s8, vbcaxq_s16, vbcaxq_s32,
        vbcaxq_s64): Delete functions.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/neon/vand.c: New test.
        * gcc.target/aarch64/neon/vbcax.c: New test.
        * gcc.target/aarch64/neon/vbic.c: New test.
        * gcc.target/aarch64/neon/vbsl.c: New test.
        * gcc.target/aarch64/neon/vcls.c: New test.
        * gcc.target/aarch64/neon/vclz.c: New test.
        * gcc.target/aarch64/neon/vcnt.c: New test.
        * gcc.target/aarch64/neon/veor.c: New test.
        * gcc.target/aarch64/neon/veor3.c: New test.
        * gcc.target/aarch64/neon/vmvn.c: New test.
        * gcc.target/aarch64/neon/vorn.c: New test.
        * gcc.target/aarch64/neon/vorr.c: New test.
        * gcc.target/aarch64/neon/vrax1.c: New test.
        * gcc.target/aarch64/neon/vrbit.c: New test.
        * gcc.target/aarch64/neon/vxar.c: New test.
        * gcc.target/aarch64/sme/inlining_10.c: Delete `call_vbsl` since
        the intrinsic is no longer implemented as an `always_inline`
        function.
        * gcc.target/aarch64/sme/inlining_11.c: Likewise.
        * gcc.target/aarch64/sha3_1.c,
        gcc.target/aarch64/sha3_2.c,
        gcc.target/aarch64/sha3_3.c: Now produces worse assembly at
        `-O0`. Add `-O1` flag to ensure expected optimized assembly is
        emitted.
        * gcc.target/aarch64/target_attr_10.c: Fix expected error message.
---
 gcc/config/aarch64/aarch64-acle-builtins.h    |    9 +
 gcc/config/aarch64/aarch64-builtins.cc        |   20 -
 .../aarch64/aarch64-neon-builtins-base.cc     |  207 +++
 .../aarch64/aarch64-neon-builtins-base.def    |   40 +
 .../aarch64/aarch64-neon-builtins-shapes.cc   |    8 +
 gcc/config/aarch64/aarch64-simd-builtins.def  |   24 -
 .../aarch64/aarch64-simd-pragma-builtins.def  |    6 -
 gcc/config/aarch64/aarch64-simd.md            |   13 +-
 gcc/config/aarch64/aarch64.md                 |    1 -
 gcc/config/aarch64/arm_neon.h                 | 1452 ++---------------
 gcc/testsuite/gcc.target/aarch64/neon/vand.c  |  116 ++
 gcc/testsuite/gcc.target/aarch64/neon/vbcax.c |   60 +
 gcc/testsuite/gcc.target/aarch64/neon/vbic.c  |  116 ++
 gcc/testsuite/gcc.target/aarch64/neon/vbsl.c  |  214 +++
 gcc/testsuite/gcc.target/aarch64/neon/vcls.c  |   88 +
 gcc/testsuite/gcc.target/aarch64/neon/vclz.c  |   88 +
 gcc/testsuite/gcc.target/aarch64/neon/vcnt.c  |   25 +
 gcc/testsuite/gcc.target/aarch64/neon/veor.c  |  116 ++
 gcc/testsuite/gcc.target/aarch64/neon/veor3.c |   60 +
 gcc/testsuite/gcc.target/aarch64/neon/vmvn.c  |  102 ++
 gcc/testsuite/gcc.target/aarch64/neon/vorn.c  |  116 ++
 gcc/testsuite/gcc.target/aarch64/neon/vorr.c  |  116 ++
 gcc/testsuite/gcc.target/aarch64/neon/vrax1.c |   11 +
 gcc/testsuite/gcc.target/aarch64/neon/vrbit.c |   46 +
 gcc/testsuite/gcc.target/aarch64/neon/vxar.c  |   25 +
 gcc/testsuite/gcc.target/aarch64/sha3_1.c     |    2 +-
 gcc/testsuite/gcc.target/aarch64/sha3_2.c     |    2 +-
 gcc/testsuite/gcc.target/aarch64/sha3_3.c     |    2 +-
 .../gcc.target/aarch64/sme/inlining_10.c      |    6 +-
 .../gcc.target/aarch64/sme/inlining_11.c      |    7 +-
 .../gcc.target/aarch64/target_attr_10.c       |    4 +-
 31 files changed, 1684 insertions(+), 1418 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/neon/vand.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/neon/vbcax.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/neon/vbic.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/neon/vbsl.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/neon/vcls.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/neon/vclz.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/neon/vcnt.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/neon/veor.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/neon/veor3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/neon/vmvn.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/neon/vorn.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/neon/vorr.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/neon/vrax1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/neon/vrbit.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/neon/vxar.c

diff --git a/gcc/config/aarch64/aarch64-acle-builtins.h 
b/gcc/config/aarch64/aarch64-acle-builtins.h
index 76b565adba5d..f570434810f5 100644
--- a/gcc/config/aarch64/aarch64-acle-builtins.h
+++ b/gcc/config/aarch64/aarch64-acle-builtins.h
@@ -1784,6 +1784,14 @@ void build_all (function_builder &b, const char 
*signature,
   TYPES_bhsd_neon (S, D, T), \
   TYPES_h_bfloat (S, D, T)
 
+/* _p8 _s8 _u8.  */
+#define TYPES_b_neon(S, D, T) \
+  S (p8), S (s8), S (u8)
+
+/* _p8.  */
+#define TYPES_b_poly(S, D, T) \
+  S (p8)
+
 /* _p8 _p16 _p64.  */
 #define TYPES_bhd_poly(S, D, T) \
   S (p8), S (p16), S (p64)
@@ -1951,6 +1959,7 @@ DEF_SVE_TYPES_ARRAY (b_float);
 DEF_SVE_TYPES_ARRAY (all_neon);
 DEF_SVE_TYPES_ARRAY (b_neon);
 DEF_SVE_TYPES_ARRAY (h_neon);
+DEF_SVE_TYPES_ARRAY (b_poly);
 DEF_SVE_TYPES_ARRAY (bhd_poly);
 DEF_SVE_TYPES_ARRAY (bhdq_poly);
 DEF_SVE_TYPES_ARRAY (bhsd_neon);
diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 3f74203c3c35..fdf5468f93af 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -417,22 +417,6 @@ 
aarch64_types_loadstruct_lane_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
       qualifier_poly, qualifier_struct_load_store_lane_index };
 #define TYPES_LOADSTRUCT_LANE_P (aarch64_types_loadstruct_lane_p_qualifiers)
 
-static enum aarch64_type_qualifiers
-aarch64_types_bsl_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_poly, qualifier_unsigned,
-      qualifier_poly, qualifier_poly };
-#define TYPES_BSL_P (aarch64_types_bsl_p_qualifiers)
-static enum aarch64_type_qualifiers
-aarch64_types_bsl_s_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_none, qualifier_unsigned,
-      qualifier_none, qualifier_none };
-#define TYPES_BSL_S (aarch64_types_bsl_s_qualifiers)
-static enum aarch64_type_qualifiers
-aarch64_types_bsl_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_unsigned,
-      qualifier_unsigned, qualifier_unsigned };
-#define TYPES_BSL_U (aarch64_types_bsl_u_qualifiers)
-
 /* The first argument (return type) of a store should be void type,
    which we represent with qualifier_void.  Their first operand will be
    a DImode pointer to the location to store to, so we must use
@@ -4091,10 +4075,6 @@ aarch64_expand_pragma_builtin (tree exp, rtx target,
   insn_code icode;
   switch (builtin_data.unspec)
     {
-    case UNSPEC_BSL:
-      icode = code_for_aarch64_simd_bsl (ops[0].mode);
-      break;
-
     case UNSPEC_DUP:
       if (builtin_data.signature == aarch64_builtin_signatures::load)
        aarch64_dereference_pointer (&ops[1], GET_MODE_INNER (ops[0].mode));
diff --git a/gcc/config/aarch64/aarch64-neon-builtins-base.cc 
b/gcc/config/aarch64/aarch64-neon-builtins-base.cc
index 6ae24be6ac81..dfe25875da8b 100644
--- a/gcc/config/aarch64/aarch64-neon-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-neon-builtins-base.cc
@@ -47,6 +47,15 @@
 
 using namespace aarch64_acle;
 
+/* Build a cast expression, `(TYPE)EXPR`, if necessary to make an expression
+   with type TYPE.  */
+tree
+build_cast (tree type, tree expr)
+{
+  return TREE_TYPE (expr) != type ? fold_build1 (VIEW_CONVERT_EXPR, type, expr)
+                                 : expr;
+}
+
 /* Build a `VEC[INDEX]` expression.  */
 tree
 build_lane_get (tree vec, tree index)
@@ -249,6 +258,172 @@ struct gimple_dup_lane : public gimple_function_base
   }
 };
 
+/* For intrinsics that map to a GIMPLE expression with a `BIT_NOT` applied to
+   the second argument.  */
+class gimple_not_rhs : public gimple_function_base
+{
+  tree_code m_code;
+
+public:
+  constexpr gimple_not_rhs (tree_code code)
+    : m_code (code)
+    {}
+
+  gimple *fold (gimple_folder &f) const override
+  {
+    auto lhs = gimple_call_arg (f.call, 0);
+    auto rhs = gimple_call_arg (f.call, 1);
+    auto type = TREE_TYPE (lhs);
+
+    // tmp1 = ~rhs
+    auto tmp1 = f.force_val (fold_build1 (BIT_NOT_EXPR, type, rhs));
+    return gimple_build_assign (f.lhs, this->m_code, lhs, tmp1);
+  }
+};
+
+/* BSL (d, n, m) == m ^ ((m ^ n) & d).  */
+class gimple_bsl : public gimple_function_base
+{
+public:
+  gimple *fold (gimple_folder &f) const override
+  {
+    auto d = gimple_call_arg (f.call, 0);
+    auto n = gimple_call_arg (f.call, 1);
+    auto m = gimple_call_arg (f.call, 2);
+
+    auto uint_type = TREE_TYPE (d);
+    auto ret_type = TREE_TYPE (f.lhs);
+
+    // Cast to unsigned integer type if necessary.
+    m = f.force_val (build_cast (uint_type, m));
+    n = f.force_val (build_cast (uint_type, n));
+
+    // tmp1 = m ^ n
+    auto tmp1 = f.force_val (fold_build2 (BIT_XOR_EXPR, uint_type, m, n));
+
+    // tmp2 = (m ^ n) & d
+    auto tmp2 = f.force_val (fold_build2 (BIT_AND_EXPR, uint_type, tmp1, d));
+
+    // tmp3 = m ^ ((m ^ n) & d)
+    auto tmp3 = f.force_val (fold_build2 (BIT_XOR_EXPR, uint_type, m, tmp2));
+
+    return gimple_build_assign (f.lhs, build_cast (ret_type, tmp3));
+  }
+};
+
+/* FIXME: how to express this in GIMPLE?  */
+class gimple_rbit : public gimple_function_base
+{
+  rtx expand (function_expander &e) const override
+  {
+    return e.use_exact_insn (code_for_aarch64_rbit (e.args[0]->mode));
+  }
+
+  gimple *fold (gimple_folder &) const override { return nullptr; }
+};
+
+/* EOR3 (a, b, c) = (a ^ b) ^ c.  */
+class gimple_eor3 : public gimple_function_base
+{
+public:
+  gimple *fold (gimple_folder &f) const override
+  {
+    auto a = gimple_call_arg (f.call, 0);
+    auto b = gimple_call_arg (f.call, 1);
+    auto c = gimple_call_arg (f.call, 2);
+    auto type = TREE_TYPE (f.lhs);
+
+    // tmp1 = a ^ b
+    auto tmp1 = f.force_val (fold_build2 (BIT_XOR_EXPR, type, a, b));
+
+    // lhs = (a ^ b) ^ c
+    return gimple_build_assign (f.lhs, BIT_XOR_EXPR, tmp1, c);
+  }
+};
+
+/* BCAX (a, b, c) = a ^ (b & ~c).  */
+class gimple_bcax : public gimple_function_base
+{
+public:
+  gimple *fold (gimple_folder &f) const override
+  {
+    auto a = gimple_call_arg (f.call, 0);
+    auto b = gimple_call_arg (f.call, 1);
+    auto c = gimple_call_arg (f.call, 2);
+    auto arg_type = TREE_TYPE (a);
+
+    // tmp1 = ~c
+    auto tmp1 = f.force_val (fold_build1 (BIT_NOT_EXPR, arg_type, c));
+
+    // tmp2 = b & ~c
+    auto tmp2 = f.force_val (fold_build2 (BIT_AND_EXPR, arg_type, b, tmp1));
+
+    // lhs = a ^ (b & ~c)
+    return gimple_build_assign (f.lhs, BIT_XOR_EXPR, a, tmp2);
+  }
+};
+
+/* RAX1 (a, b) = rotl (a, 1) ^ b.  */
+class gimple_rax1 : public gimple_function_base
+{
+public:
+  gimple *fold (gimple_folder &f) const override
+  {
+    auto a = gimple_call_arg (f.call, 0);
+    auto b = gimple_call_arg (f.call, 1);
+    auto arg_type = TREE_TYPE (a);
+
+    // tmp1 = rotl (a, 1)
+    auto tmp1 = f.force_val (
+      fold_build2 (LROTATE_EXPR, arg_type, a, build_one_cst (arg_type)));
+
+    // lhs = rotl (a, 1) ^ b
+    return gimple_build_assign (f.lhs, BIT_XOR_EXPR, tmp1, b);
+  }
+};
+
+/* XAR (a, b, c) = rotr (a ^ b, c).  */
+class gimple_xar : public gimple_function_base
+{
+public:
+  gimple *fold (gimple_folder &f) const override
+  {
+    auto a = gimple_call_arg (f.call, 0);
+    auto b = gimple_call_arg (f.call, 1);
+    auto c = gimple_call_arg (f.call, 2);
+    auto type = TREE_TYPE (f.lhs);
+
+    // tmp1 = a ^ b
+    auto tmp1 = f.force_val (fold_build2 (BIT_XOR_EXPR, type, a, b));
+
+    // lhs = rotr (a ^ b, c)
+    return gimple_build_assign (f.lhs, RROTATE_EXPR, tmp1, c);
+  }
+};
+
+/* For intrinsics that map to a single GIMPLE IFN with no argument
+   preparation necessary.  */
+class gimple_ifn : public gimple_function_base
+{
+  internal_fn m_ifn;
+
+public:
+  constexpr gimple_ifn (internal_fn fn)
+    : m_ifn (fn)
+      {}
+
+  gimple *fold (gimple_folder &f) const override
+  {
+    vec<tree> args{};
+    for (unsigned i = 0; i < gimple_call_num_args (f.call); i++)
+      args.safe_push (gimple_call_arg (f.call, i));
+
+    auto call = gimple_build_call_internal_vec (this->m_ifn, args);
+    gimple_call_set_lhs (call, f.lhs);
+    return call;
+  }
+};
+
 // Lane get/set
 NEON_FUNCTION (vcreate,      gimple_create,)
 NEON_FUNCTION (vcombine,     gimple_combine,)
@@ -283,3 +458,35 @@ NEON_FUNCTION (vdupd_laneq,  gimple_get_lane,)
 NEON_FUNCTION (vaddd, gimple_expr, (PLUS_EXPR))
 NEON_FUNCTION (vadd,  gimple_expr, (PLUS_EXPR, PLUS_EXPR, BIT_XOR_EXPR))
 NEON_FUNCTION (vaddq, gimple_expr, (PLUS_EXPR, PLUS_EXPR, BIT_XOR_EXPR))
+
+// Bitwise operations
+NEON_FUNCTION (vand,   gimple_expr,    (BIT_AND_EXPR))
+NEON_FUNCTION (vandq,  gimple_expr,    (BIT_AND_EXPR))
+NEON_FUNCTION (vbic,   gimple_not_rhs, (BIT_AND_EXPR))
+NEON_FUNCTION (vbicq,  gimple_not_rhs, (BIT_AND_EXPR))
+NEON_FUNCTION (vbsl,   gimple_bsl,)
+NEON_FUNCTION (vbslq,  gimple_bsl,)
+NEON_FUNCTION (veor,   gimple_expr,    (BIT_XOR_EXPR))
+NEON_FUNCTION (veorq,  gimple_expr,    (BIT_XOR_EXPR))
+NEON_FUNCTION (vmvn,   gimple_expr,    (BIT_NOT_EXPR))
+NEON_FUNCTION (vmvnq,  gimple_expr,    (BIT_NOT_EXPR))
+NEON_FUNCTION (vorn,   gimple_not_rhs, (BIT_IOR_EXPR))
+NEON_FUNCTION (vornq,  gimple_not_rhs, (BIT_IOR_EXPR))
+NEON_FUNCTION (vorr,   gimple_expr,    (BIT_IOR_EXPR))
+NEON_FUNCTION (vorrq,  gimple_expr,    (BIT_IOR_EXPR))
+NEON_FUNCTION (vrbit,  gimple_rbit,)
+NEON_FUNCTION (vrbitq, gimple_rbit,)
+
+// Bitwise operations (SHA3)
+NEON_FUNCTION (vbcaxq, gimple_bcax,)
+NEON_FUNCTION (veor3q, gimple_eor3,)
+NEON_FUNCTION (vrax1q, gimple_rax1,)
+NEON_FUNCTION (vxarq,  gimple_xar,)
+
+// Bit counting operations
+NEON_FUNCTION (vcls,  gimple_ifn, (IFN_CLRSB))
+NEON_FUNCTION (vclsq, gimple_ifn, (IFN_CLRSB))
+NEON_FUNCTION (vclz,  gimple_ifn, (IFN_CLZ))
+NEON_FUNCTION (vclzq, gimple_ifn, (IFN_CLZ))
+NEON_FUNCTION (vcnt,  gimple_ifn, (IFN_POPCOUNT))
+NEON_FUNCTION (vcntq, gimple_ifn, (IFN_POPCOUNT))
diff --git a/gcc/config/aarch64/aarch64-neon-builtins-base.def 
b/gcc/config/aarch64/aarch64-neon-builtins-base.def
index 5f61d8f6634f..e963e506571c 100644
--- a/gcc/config/aarch64/aarch64-neon-builtins-base.def
+++ b/gcc/config/aarch64/aarch64-neon-builtins-base.def
@@ -69,3 +69,43 @@ DEF_NEON_FUNCTION (vaddq, bhdq_poly,      ("Q0,Q0,Q0"))
 DEF_NEON_FUNCTION (vadd,  h_float, ("D0,D0,D0"))
 DEF_NEON_FUNCTION (vaddq, h_float, ("Q0,Q0,Q0"))
 #undef REQUIRED_EXTENSIONS
+
+// Bitwise operations
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_SIMD)
+DEF_NEON_FUNCTION (vand,   all_integer, ("D0,D0,D0"))
+DEF_NEON_FUNCTION (vandq,  all_integer, ("Q0,Q0,Q0"))
+DEF_NEON_FUNCTION (vbic,   all_integer, ("D0,D0,D0"))
+DEF_NEON_FUNCTION (vbicq,  all_integer, ("Q0,Q0,Q0"))
+DEF_NEON_FUNCTION (vbsl,   bhsd_neon,   ("D0,Du0,D0,D0"))
+DEF_NEON_FUNCTION (vbslq,  bhsd_neon,   ("Q0,Qu0,Q0,Q0"))
+DEF_NEON_FUNCTION (veor,   all_integer, ("D0,D0,D0"))
+DEF_NEON_FUNCTION (veorq,  all_integer, ("Q0,Q0,Q0"))
+DEF_NEON_FUNCTION (vmvn,   b_poly,      ("D0,D0"))
+DEF_NEON_FUNCTION (vmvnq,  b_poly,      ("Q0,Q0"))
+DEF_NEON_FUNCTION (vmvn,   bhs_integer, ("D0,D0"))
+DEF_NEON_FUNCTION (vmvnq,  bhs_integer, ("Q0,Q0"))
+DEF_NEON_FUNCTION (vorn,   all_integer, ("D0,D0,D0"))
+DEF_NEON_FUNCTION (vornq,  all_integer, ("Q0,Q0,Q0"))
+DEF_NEON_FUNCTION (vorr,   all_integer, ("D0,D0,D0"))
+DEF_NEON_FUNCTION (vorrq,  all_integer, ("Q0,Q0,Q0"))
+DEF_NEON_FUNCTION (vrbit,  b_neon,      ("D0,D0"))
+DEF_NEON_FUNCTION (vrbitq, b_neon,      ("Q0,Q0"))
+#undef REQUIRED_EXTENSIONS
+
+// Bitwise operations (FEAT_SHA3)
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_SHA3)
+DEF_NEON_FUNCTION (vbcaxq, all_integer, ("Q0,Q0,Q0,Q0"))
+DEF_NEON_FUNCTION (veor3q, all_integer, ("Q0,Q0,Q0,Q0"))
+DEF_NEON_FUNCTION (vrax1q, d_unsigned,  ("Q0,Q0,Q0"))
+DEF_NEON_FUNCTION (vxarq,  d_unsigned,  ("Q0,Q0,Q0,ss32", shift<2>))
+#undef REQUIRED_EXTENSIONS
+
+// Bit counting operations
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_SIMD)
+DEF_NEON_FUNCTION (vcls,  bhs_integer, ("Ds0,D0"))
+DEF_NEON_FUNCTION (vclsq, bhs_integer, ("Qs0,Q0"))
+DEF_NEON_FUNCTION (vclz,  bhs_integer, ("D0,D0"))
+DEF_NEON_FUNCTION (vclzq, bhs_integer, ("Q0,Q0"))
+DEF_NEON_FUNCTION (vcnt,  b_neon,      ("D0,D0"))
+DEF_NEON_FUNCTION (vcntq, b_neon,      ("Q0,Q0"))
+#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-neon-builtins-shapes.cc 
b/gcc/config/aarch64/aarch64-neon-builtins-shapes.cc
index e88307eedf63..6059cee19a67 100644
--- a/gcc/config/aarch64/aarch64-neon-builtins-shapes.cc
+++ b/gcc/config/aarch64/aarch64-neon-builtins-shapes.cc
@@ -73,6 +73,14 @@ lane (function_checker &c)
   return c.require_immediate_range (PARAM_INDEX, 0, element_count - 1);
 }
 
+/* Require that the parameter at PARAM_INDEX is a valid shift amount.  */
+template <unsigned int PARAM_INDEX>
+bool
+shift (function_checker &c)
+{
+  auto bits = c.type_suffix (0).element_bits;
+  return c.require_immediate_range (PARAM_INDEX, 0, bits - 1);
+}
 
 /* A checker that always returns true.  */
 bool
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def 
b/gcc/config/aarch64/aarch64-simd-builtins.def
index b9600bdca30c..2d8c613ca5ef 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -57,10 +57,6 @@
   BUILTIN_VHSDF_HSDF (UNOP, sqrt, 2, FP)
   BUILTIN_VDQ_I (BINOP, addp, 0, DEFAULT)
   BUILTIN_VDQ_I (BINOPU, addp, 0, DEFAULT)
-  BUILTIN_VDQ_BHSI (UNOP, clrsb, 2, DEFAULT)
-  BUILTIN_VDQ_BHSI (UNOP, clz, 2, DEFAULT)
-  BUILTIN_VS (UNOP, ctz, 2, DEFAULT)
-  BUILTIN_VB (UNOP, popcount, 2, DEFAULT)
 
   /* Implemented by aarch64_<sur>q<r>shl<mode>.  */
   BUILTIN_VSDQ_I (BINOP, sqshl, 0, DEFAULT)
@@ -648,10 +644,6 @@
   VAR1 (UNOP, floatunsv4si, 2, FP, v4sf)
   VAR1 (UNOP, floatunsv2di, 2, FP, v2df)
 
-  VAR5 (UNOPU, bswap, 2, DEFAULT, v4hi, v8hi, v2si, v4si, v2di)
-
-  BUILTIN_VB (UNOP, rbit, 0, DEFAULT)
-
   /* Implemented by
      aarch64_<PERMUTE:perm_insn><mode>.  */
   BUILTIN_VALL (BINOP, zip1, 0, QUIET)
@@ -713,12 +705,6 @@
   BUILTIN_VDQSF (QUADOP_LANE, float_mla_laneq, 0, FP)
   BUILTIN_VDQSF (QUADOP_LANE, float_mls_laneq, 0, FP)
 
-  /* Implemented by aarch64_simd_bsl<mode>.  */
-  BUILTIN_VDQQH (BSL_P, simd_bsl, 0, DEFAULT)
-  VAR2 (BSL_P, simd_bsl,0, DEFAULT, di, v2di)
-  BUILTIN_VSDQ_I_DI (BSL_U, simd_bsl, 0, DEFAULT)
-  BUILTIN_VALLDIF (BSL_S, simd_bsl, 0, QUIET)
-
   /* Implemented by aarch64_crypto_aes<op><mode>.  */
   VAR1 (BINOPU, crypto_aese, 0, DEFAULT, v16qi)
   VAR1 (BINOPU, crypto_aesd, 0, DEFAULT, v16qi)
@@ -881,16 +867,6 @@
   VAR1 (BINOPU, crypto_sha512su0q, 0, DEFAULT, v2di)
   /* Implemented by aarch64_crypto_sha512su1qv2di.  */
   VAR1 (TERNOPU, crypto_sha512su1q, 0, DEFAULT, v2di)
-  /* Implemented by eor3q<mode>4.  */
-  BUILTIN_VQ_I (TERNOPU, eor3q, 4, DEFAULT)
-  BUILTIN_VQ_I (TERNOP, eor3q, 4, DEFAULT)
-  /* Implemented by aarch64_rax1qv2di.  */
-  VAR1 (BINOPU, rax1q, 0, DEFAULT, v2di)
-  /* Implemented by aarch64_xarqv2di.  */
-  VAR1 (TERNOPUI, xarq, 0, DEFAULT, v2di)
-  /* Implemented by bcaxq<mode>4.  */
-  BUILTIN_VQ_I (TERNOPU, bcaxq, 4, DEFAULT)
-  BUILTIN_VQ_I (TERNOP, bcaxq, 4, DEFAULT)
 
   /* Implemented by aarch64_fml<f16mac1>l<f16quad>_low<mode>.  */
   VAR1 (TERNOP, fmlal_low, 0, FP, v2sf)
diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def 
b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
index e9e7e163def3..ebafcd618cd7 100644
--- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
@@ -196,12 +196,6 @@ ENTRY_FMA_FPM (vmlalltb, f32, UNSPEC_FMLALLTB_FP8)
 ENTRY_FMA_FPM (vmlalltt, f32, UNSPEC_FMLALLTT_FP8)
 #undef REQUIRED_EXTENSIONS
 
-// bsl
-#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_SIMD)
-ENTRY_TERNARY (vbsl_mf8, mf8, u8, mf8, mf8, UNSPEC_BSL, QUIET)
-ENTRY_TERNARY (vbslq_mf8, mf8q, u8q, mf8q, mf8q, UNSPEC_BSL, QUIET)
-#undef REQUIRED_EXTENSIONS
-
 // ext
 #define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_SIMD)
 ENTRY_BINARY_LANE (vext_mf8, mf8, mf8, mf8, UNSPEC_EXT, QUIET)
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 2e142b1e1ee7..282b395abcc5 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -400,7 +400,7 @@
   [(set_attr "type" "neon_rev<q>")]
 )
 
-(define_insn "aarch64_rbit<mode><vczle><vczbe>"
+(define_insn "@aarch64_rbit<mode><vczle><vczbe>"
   [(set (match_operand:VB 0 "register_operand" "=w")
        (bitreverse:VB (match_operand:VB 1 "register_operand" "w")))]
   "TARGET_SIMD"
@@ -9566,15 +9566,16 @@
   [(set_attr "type" "crypto_sha3")]
 )
 
+;; matches 'rotl (a, splat (1)) ^ b'
 (define_insn "aarch64_rax1qv2di"
   [(set (match_operand:V2DI 0 "register_operand" "=w")
        (xor:V2DI
         (rotate:V2DI
-         (match_operand:V2DI 2 "register_operand" "w")
-         (const_int 1))
-        (match_operand:V2DI 1 "register_operand" "w")))]
-  "TARGET_SHA3"
-  "rax1\\t%0.2d, %1.2d, %2.2d"
+         (match_operand:V2DI 1 "register_operand" "w")
+         (match_operand:V2DI 2 "aarch64_simd_lshift_imm" "Dl"))
+        (match_operand:V2DI 3 "register_operand" "w")))]
+  "TARGET_SHA3 && INTVAL (unwrap_const_vec_duplicate (operands[2])) == 1"
+  "rax1\\t%0.2d, %1.2d, %3.2d"
   [(set_attr "type" "crypto_sha3")]
 )
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index bbf8ec264841..ab71a49c839a 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -227,7 +227,6 @@
     UNSPEC_AUTIB1716
     UNSPEC_AUTIASP
     UNSPEC_AUTIBSP
-    UNSPEC_BSL
     UNSPEC_CALLEE_ABI
     UNSPEC_CASESI
     UNSPEC_CPYMEM
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 2af9c54f1d8b..ec2383d870a6 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -763,566 +763,6 @@ vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
   return __builtin_aarch64_pmulv16qi_ppp (__a, __b);
 }
 
-__extension__ extern __inline int8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vand_s8 (int8x8_t __a, int8x8_t __b)
-{
-  return __a & __b;
-}
-
-__extension__ extern __inline int16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vand_s16 (int16x4_t __a, int16x4_t __b)
-{
-  return __a & __b;
-}
-
-__extension__ extern __inline int32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vand_s32 (int32x2_t __a, int32x2_t __b)
-{
-  return __a & __b;
-}
-
-__extension__ extern __inline uint8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vand_u8 (uint8x8_t __a, uint8x8_t __b)
-{
-  return __a & __b;
-}
-
-__extension__ extern __inline uint16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vand_u16 (uint16x4_t __a, uint16x4_t __b)
-{
-  return __a & __b;
-}
-
-__extension__ extern __inline uint32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vand_u32 (uint32x2_t __a, uint32x2_t __b)
-{
-  return __a & __b;
-}
-
-__extension__ extern __inline int64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vand_s64 (int64x1_t __a, int64x1_t __b)
-{
-  return __a & __b;
-}
-
-__extension__ extern __inline uint64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vand_u64 (uint64x1_t __a, uint64x1_t __b)
-{
-  return __a & __b;
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vandq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __a & __b;
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vandq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __a & __b;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vandq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __a & __b;
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vandq_s64 (int64x2_t __a, int64x2_t __b)
-{
-  return __a & __b;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vandq_u8 (uint8x16_t __a, uint8x16_t __b)
-{
-  return __a & __b;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vandq_u16 (uint16x8_t __a, uint16x8_t __b)
-{
-  return __a & __b;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vandq_u32 (uint32x4_t __a, uint32x4_t __b)
-{
-  return __a & __b;
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vandq_u64 (uint64x2_t __a, uint64x2_t __b)
-{
-  return __a & __b;
-}
-
-__extension__ extern __inline int8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorr_s8 (int8x8_t __a, int8x8_t __b)
-{
-  return __a | __b;
-}
-
-__extension__ extern __inline int16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorr_s16 (int16x4_t __a, int16x4_t __b)
-{
-  return __a | __b;
-}
-
-__extension__ extern __inline int32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorr_s32 (int32x2_t __a, int32x2_t __b)
-{
-  return __a | __b;
-}
-
-__extension__ extern __inline uint8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorr_u8 (uint8x8_t __a, uint8x8_t __b)
-{
-  return __a | __b;
-}
-
-__extension__ extern __inline uint16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorr_u16 (uint16x4_t __a, uint16x4_t __b)
-{
-  return __a | __b;
-}
-
-__extension__ extern __inline uint32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorr_u32 (uint32x2_t __a, uint32x2_t __b)
-{
-  return __a | __b;
-}
-
-__extension__ extern __inline int64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorr_s64 (int64x1_t __a, int64x1_t __b)
-{
-  return __a | __b;
-}
-
-__extension__ extern __inline uint64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorr_u64 (uint64x1_t __a, uint64x1_t __b)
-{
-  return __a | __b;
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorrq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __a | __b;
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorrq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __a | __b;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorrq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __a | __b;
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorrq_s64 (int64x2_t __a, int64x2_t __b)
-{
-  return __a | __b;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
-{
-  return __a | __b;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
-{
-  return __a | __b;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
-{
-  return __a | __b;
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
-{
-  return __a | __b;
-}
-
-__extension__ extern __inline int8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veor_s8 (int8x8_t __a, int8x8_t __b)
-{
-  return __a ^ __b;
-}
-
-__extension__ extern __inline int16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veor_s16 (int16x4_t __a, int16x4_t __b)
-{
-  return __a ^ __b;
-}
-
-__extension__ extern __inline int32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veor_s32 (int32x2_t __a, int32x2_t __b)
-{
-  return __a ^ __b;
-}
-
-__extension__ extern __inline uint8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veor_u8 (uint8x8_t __a, uint8x8_t __b)
-{
-  return __a ^ __b;
-}
-
-__extension__ extern __inline uint16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veor_u16 (uint16x4_t __a, uint16x4_t __b)
-{
-  return __a ^ __b;
-}
-
-__extension__ extern __inline uint32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veor_u32 (uint32x2_t __a, uint32x2_t __b)
-{
-  return __a ^ __b;
-}
-
-__extension__ extern __inline int64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veor_s64 (int64x1_t __a, int64x1_t __b)
-{
-  return __a ^ __b;
-}
-
-__extension__ extern __inline uint64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veor_u64 (uint64x1_t __a, uint64x1_t __b)
-{
-  return __a ^ __b;
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veorq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __a ^ __b;
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veorq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __a ^ __b;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veorq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __a ^ __b;
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veorq_s64 (int64x2_t __a, int64x2_t __b)
-{
-  return __a ^ __b;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veorq_u8 (uint8x16_t __a, uint8x16_t __b)
-{
-  return __a ^ __b;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veorq_u16 (uint16x8_t __a, uint16x8_t __b)
-{
-  return __a ^ __b;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veorq_u32 (uint32x4_t __a, uint32x4_t __b)
-{
-  return __a ^ __b;
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veorq_u64 (uint64x2_t __a, uint64x2_t __b)
-{
-  return __a ^ __b;
-}
-
-__extension__ extern __inline int8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbic_s8 (int8x8_t __a, int8x8_t __b)
-{
-  return __a & ~__b;
-}
-
-__extension__ extern __inline int16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbic_s16 (int16x4_t __a, int16x4_t __b)
-{
-  return __a & ~__b;
-}
-
-__extension__ extern __inline int32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbic_s32 (int32x2_t __a, int32x2_t __b)
-{
-  return __a & ~__b;
-}
-
-__extension__ extern __inline uint8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbic_u8 (uint8x8_t __a, uint8x8_t __b)
-{
-  return __a & ~__b;
-}
-
-__extension__ extern __inline uint16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbic_u16 (uint16x4_t __a, uint16x4_t __b)
-{
-  return __a & ~__b;
-}
-
-__extension__ extern __inline uint32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbic_u32 (uint32x2_t __a, uint32x2_t __b)
-{
-  return __a & ~__b;
-}
-
-__extension__ extern __inline int64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbic_s64 (int64x1_t __a, int64x1_t __b)
-{
-  return __a & ~__b;
-}
-
-__extension__ extern __inline uint64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbic_u64 (uint64x1_t __a, uint64x1_t __b)
-{
-  return __a & ~__b;
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbicq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __a & ~__b;
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbicq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __a & ~__b;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbicq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __a & ~__b;
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbicq_s64 (int64x2_t __a, int64x2_t __b)
-{
-  return __a & ~__b;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
-{
-  return __a & ~__b;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
-{
-  return __a & ~__b;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
-{
-  return __a & ~__b;
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
-{
-  return __a & ~__b;
-}
-
-__extension__ extern __inline int8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorn_s8 (int8x8_t __a, int8x8_t __b)
-{
-  return __a | ~__b;
-}
-
-__extension__ extern __inline int16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorn_s16 (int16x4_t __a, int16x4_t __b)
-{
-  return __a | ~__b;
-}
-
-__extension__ extern __inline int32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorn_s32 (int32x2_t __a, int32x2_t __b)
-{
-  return __a | ~__b;
-}
-
-__extension__ extern __inline uint8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorn_u8 (uint8x8_t __a, uint8x8_t __b)
-{
-  return __a | ~__b;
-}
-
-__extension__ extern __inline uint16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorn_u16 (uint16x4_t __a, uint16x4_t __b)
-{
-  return __a | ~__b;
-}
-
-__extension__ extern __inline uint32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorn_u32 (uint32x2_t __a, uint32x2_t __b)
-{
-  return __a | ~__b;
-}
-
-__extension__ extern __inline int64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorn_s64 (int64x1_t __a, int64x1_t __b)
-{
-  return __a | ~__b;
-}
-
-__extension__ extern __inline uint64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vorn_u64 (uint64x1_t __a, uint64x1_t __b)
-{
-  return __a | ~__b;
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vornq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __a | ~__b;
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vornq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __a | ~__b;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vornq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __a | ~__b;
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vornq_s64 (int64x2_t __a, int64x2_t __b)
-{
-  return __a | ~__b;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vornq_u8 (uint8x16_t __a, uint8x16_t __b)
-{
-  return __a | ~__b;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vornq_u16 (uint16x8_t __a, uint16x8_t __b)
-{
-  return __a | ~__b;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vornq_u32 (uint32x4_t __a, uint32x4_t __b)
-{
-  return __a | ~__b;
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vornq_u64 (uint64x2_t __a, uint64x2_t __b)
-{
-  return __a | ~__b;
-}
-
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vsub_s8 (int8x8_t __a, int8x8_t __b)
@@ -5843,338 +5283,137 @@ vabsd_s64 (int64_t __a)
 
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddd_s64 (int64_t __a, int64_t __b)
-{
-  return __a + __b;
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddd_u64 (uint64_t __a, uint64_t __b)
-{
-  return __a + __b;
-}
-
-/* vaddv */
-
-__extension__ extern __inline int8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddv_s8 (int8x8_t __a)
-{
-  return __builtin_aarch64_reduc_plus_scal_v8qi (__a);
-}
-
-__extension__ extern __inline int16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddv_s16 (int16x4_t __a)
-{
-  return __builtin_aarch64_reduc_plus_scal_v4hi (__a);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddv_s32 (int32x2_t __a)
-{
-  return __builtin_aarch64_reduc_plus_scal_v2si (__a);
-}
-
-__extension__ extern __inline uint8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddv_u8 (uint8x8_t __a)
-{
-  return __builtin_aarch64_reduc_plus_scal_v8qi_uu (__a);
-}
-
-__extension__ extern __inline uint16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddv_u16 (uint16x4_t __a)
-{
-  return __builtin_aarch64_reduc_plus_scal_v4hi_uu (__a);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddv_u32 (uint32x2_t __a)
-{
-  return __builtin_aarch64_reduc_plus_scal_v2si_uu (__a);
-}
-
-__extension__ extern __inline int8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddvq_s8 (int8x16_t __a)
-{
-  return __builtin_aarch64_reduc_plus_scal_v16qi (__a);
-}
-
-__extension__ extern __inline int16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddvq_s16 (int16x8_t __a)
-{
-  return __builtin_aarch64_reduc_plus_scal_v8hi (__a);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddvq_s32 (int32x4_t __a)
-{
-  return __builtin_aarch64_reduc_plus_scal_v4si (__a);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddvq_s64 (int64x2_t __a)
-{
-  return __builtin_aarch64_reduc_plus_scal_v2di (__a);
-}
-
-__extension__ extern __inline uint8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddvq_u8 (uint8x16_t __a)
-{
-  return __builtin_aarch64_reduc_plus_scal_v16qi_uu (__a);
-}
-
-__extension__ extern __inline uint16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddvq_u16 (uint16x8_t __a)
-{
-  return __builtin_aarch64_reduc_plus_scal_v8hi_uu (__a);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddvq_u32 (uint32x4_t __a)
-{
-  return __builtin_aarch64_reduc_plus_scal_v4si_uu (__a);
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddvq_u64 (uint64x2_t __a)
-{
-  return __builtin_aarch64_reduc_plus_scal_v2di_uu (__a);
-}
-
-__extension__ extern __inline float32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddv_f32 (float32x2_t __a)
-{
-  return __builtin_aarch64_reduc_plus_scal_v2sf (__a);
-}
-
-__extension__ extern __inline float32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddvq_f32 (float32x4_t __a)
-{
-  return __builtin_aarch64_reduc_plus_scal_v4sf (__a);
-}
-
-__extension__ extern __inline float64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vaddvq_f64 (float64x2_t __a)
-{
-  return __builtin_aarch64_reduc_plus_scal_v2df (__a);
-}
-
-/* vbsl  */
-
-__extension__ extern __inline float16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_f16 (uint16x4_t __a, float16x4_t __b, float16x4_t __c)
-{
-  return __builtin_aarch64_simd_bslv4hf_suss (__a, __b, __c);
-}
-
-__extension__ extern __inline float32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
-{
-  return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c);
-}
-
-__extension__ extern __inline float64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_f64 (uint64x1_t __a, float64x1_t __b, float64x1_t __c)
-{
-  return (float64x1_t)
-    { __builtin_aarch64_simd_bsldf_suss (__a[0], __b[0], __c[0]) };
-}
-
-__extension__ extern __inline poly8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
-{
-  return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c);
-}
-
-__extension__ extern __inline poly16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
-{
-  return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
-}
-__extension__ extern __inline poly64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_p64 (uint64x1_t __a, poly64x1_t __b, poly64x1_t __c)
-{
-  return (poly64x1_t)
-      {__builtin_aarch64_simd_bsldi_pupp (__a[0], __b[0], __c[0])};
-}
-
-__extension__ extern __inline int8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
-{
-  return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c);
-}
-
-__extension__ extern __inline int16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
-{
-  return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c);
-}
-
-__extension__ extern __inline int32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
-{
-  return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c);
-}
-
-__extension__ extern __inline int64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
+vaddd_s64 (int64_t __a, int64_t __b)
 {
-  return (int64x1_t)
-      {__builtin_aarch64_simd_bsldi_suss (__a[0], __b[0], __c[0])};
+  return __a + __b;
 }
 
-__extension__ extern __inline uint8x8_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
+vaddd_u64 (uint64_t __a, uint64_t __b)
 {
-  return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c);
+  return __a + __b;
 }
 
-__extension__ extern __inline uint16x4_t
+/* vaddv */
+
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
+vaddv_s8 (int8x8_t __a)
 {
-  return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v8qi (__a);
 }
 
-__extension__ extern __inline uint32x2_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
+vaddv_s16 (int16x4_t __a)
 {
-  return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v4hi (__a);
 }
 
-__extension__ extern __inline uint64x1_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
+vaddv_s32 (int32x2_t __a)
 {
-  return (uint64x1_t)
-      {__builtin_aarch64_simd_bsldi_uuuu (__a[0], __b[0], __c[0])};
+  return __builtin_aarch64_reduc_plus_scal_v2si (__a);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_f16 (uint16x8_t __a, float16x8_t __b, float16x8_t __c)
+vaddv_u8 (uint8x8_t __a)
 {
-  return __builtin_aarch64_simd_bslv8hf_suss (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v8qi_uu (__a);
 }
 
-__extension__ extern __inline float32x4_t
+__extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
+vaddv_u16 (uint16x4_t __a)
 {
-  return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v4hi_uu (__a);
 }
 
-__extension__ extern __inline float64x2_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c)
+vaddv_u32 (uint32x2_t __a)
 {
-  return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v2si_uu (__a);
 }
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
+vaddvq_s8 (int8x16_t __a)
 {
-  return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v16qi (__a);
 }
 
-__extension__ extern __inline poly16x8_t
+__extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
+vaddvq_s16 (int16x8_t __a)
 {
-  return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v8hi (__a);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
+vaddvq_s32 (int32x4_t __a)
 {
-  return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v4si (__a);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
+vaddvq_s64 (int64x2_t __a)
 {
-  return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v2di (__a);
 }
 
-__extension__ extern __inline poly64x2_t
+__extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_p64 (uint64x2_t __a, poly64x2_t __b, poly64x2_t __c)
+vaddvq_u8 (uint8x16_t __a)
 {
-  return __builtin_aarch64_simd_bslv2di_pupp (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v16qi_uu (__a);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
+vaddvq_u16 (uint16x8_t __a)
 {
-  return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v8hi_uu (__a);
 }
 
-__extension__ extern __inline int64x2_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
+vaddvq_u32 (uint32x4_t __a)
 {
-  return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v4si_uu (__a);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
+vaddvq_u64 (uint64x2_t __a)
 {
-  return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v2di_uu (__a);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
+vaddv_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v2sf (__a);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline float32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+vaddvq_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v4sf (__a);
 }
 
-__extension__ extern __inline uint64x2_t
+__extension__ extern __inline float64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
+vaddvq_f64 (float64x2_t __a)
 {
-  return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
+  return __builtin_aarch64_reduc_plus_scal_v2df (__a);
 }
 
 /* ARMv8.1-A instrinsics.  */
@@ -8069,334 +7308,118 @@ vcltd_u64 (uint64_t __a, uint64_t __b)
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltd_f64 (float64_t __a, float64_t __b)
-{
-  return __a < __b ? -1ll : 0ll;
-}
-
-/* vcltz - vector.  */
-
-__extension__ extern __inline uint32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltz_f32 (float32x2_t __a)
-{
-  return (uint32x2_t) (__a < 0.0f);
-}
-
-__extension__ extern __inline uint64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltz_f64 (float64x1_t __a)
-{
-  return (uint64x1_t) (__a < (float64x1_t) {0.0});
-}
-
-__extension__ extern __inline uint8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltz_s8 (int8x8_t __a)
-{
-  return (uint8x8_t) (__a < 0);
-}
-
-__extension__ extern __inline uint16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltz_s16 (int16x4_t __a)
-{
-  return (uint16x4_t) (__a < 0);
-}
-
-__extension__ extern __inline uint32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltz_s32 (int32x2_t __a)
-{
-  return (uint32x2_t) (__a < 0);
-}
-
-__extension__ extern __inline uint64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltz_s64 (int64x1_t __a)
-{
-  return (uint64x1_t) (__a < __AARCH64_INT64_C (0));
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltzq_f32 (float32x4_t __a)
-{
-  return (uint32x4_t) (__a < 0.0f);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltzq_f64 (float64x2_t __a)
-{
-  return (uint64x2_t) (__a < 0.0);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltzq_s8 (int8x16_t __a)
-{
-  return (uint8x16_t) (__a < 0);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltzq_s16 (int16x8_t __a)
-{
-  return (uint16x8_t) (__a < 0);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltzq_s32 (int32x4_t __a)
-{
-  return (uint32x4_t) (__a < 0);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltzq_s64 (int64x2_t __a)
-{
-  return (uint64x2_t) (__a < __AARCH64_INT64_C (0));
-}
-
-/* vcltz - scalar.  */
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltzs_f32 (float32_t __a)
-{
-  return __a < 0.0f ? -1 : 0;
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltzd_s64 (int64_t __a)
-{
-  return __a < 0 ? -1ll : 0ll;
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcltzd_f64 (float64_t __a)
-{
-  return __a < 0.0 ? -1ll : 0ll;
-}
-
-/* vcls.  */
-
-__extension__ extern __inline int8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcls_s8 (int8x8_t __a)
-{
-  return __builtin_aarch64_clrsbv8qi (__a);
-}
-
-__extension__ extern __inline int16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcls_s16 (int16x4_t __a)
-{
-  return __builtin_aarch64_clrsbv4hi (__a);
-}
-
-__extension__ extern __inline int32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcls_s32 (int32x2_t __a)
-{
-  return __builtin_aarch64_clrsbv2si (__a);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclsq_s8 (int8x16_t __a)
-{
-  return __builtin_aarch64_clrsbv16qi (__a);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclsq_s16 (int16x8_t __a)
-{
-  return __builtin_aarch64_clrsbv8hi (__a);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclsq_s32 (int32x4_t __a)
-{
-  return __builtin_aarch64_clrsbv4si (__a);
-}
-
-__extension__ extern __inline int8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcls_u8 (uint8x8_t __a)
-{
-  return __builtin_aarch64_clrsbv8qi ((int8x8_t) __a);
-}
-
-__extension__ extern __inline int16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcls_u16 (uint16x4_t __a)
-{
-  return __builtin_aarch64_clrsbv4hi ((int16x4_t) __a);
-}
-
-__extension__ extern __inline int32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcls_u32 (uint32x2_t __a)
-{
-  return __builtin_aarch64_clrsbv2si ((int32x2_t) __a);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclsq_u8 (uint8x16_t __a)
-{
-  return __builtin_aarch64_clrsbv16qi ((int8x16_t) __a);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclsq_u16 (uint16x8_t __a)
-{
-  return __builtin_aarch64_clrsbv8hi ((int16x8_t) __a);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclsq_u32 (uint32x4_t __a)
-{
-  return __builtin_aarch64_clrsbv4si ((int32x4_t) __a);
-}
-
-/* vclz.  */
-
-__extension__ extern __inline int8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclz_s8 (int8x8_t __a)
+vcltd_f64 (float64_t __a, float64_t __b)
 {
-  return __builtin_aarch64_clzv8qi (__a);
+  return __a < __b ? -1ll : 0ll;
 }
 
-__extension__ extern __inline int16x4_t
+/* vcltz - vector.  */
+
+__extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclz_s16 (int16x4_t __a)
+vcltz_f32 (float32x2_t __a)
 {
-  return __builtin_aarch64_clzv4hi (__a);
+  return (uint32x2_t) (__a < 0.0f);
 }
 
-__extension__ extern __inline int32x2_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclz_s32 (int32x2_t __a)
+vcltz_f64 (float64x1_t __a)
 {
-  return __builtin_aarch64_clzv2si (__a);
+  return (uint64x1_t) (__a < (float64x1_t) {0.0});
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclz_u8 (uint8x8_t __a)
+vcltz_s8 (int8x8_t __a)
 {
-  return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a);
+  return (uint8x8_t) (__a < 0);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclz_u16 (uint16x4_t __a)
+vcltz_s16 (int16x4_t __a)
 {
-  return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a);
+  return (uint16x4_t) (__a < 0);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclz_u32 (uint32x2_t __a)
+vcltz_s32 (int32x2_t __a)
 {
-  return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a);
+  return (uint32x2_t) (__a < 0);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclzq_s8 (int8x16_t __a)
+vcltz_s64 (int64x1_t __a)
 {
-  return __builtin_aarch64_clzv16qi (__a);
+  return (uint64x1_t) (__a < __AARCH64_INT64_C (0));
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclzq_s16 (int16x8_t __a)
+vcltzq_f32 (float32x4_t __a)
 {
-  return __builtin_aarch64_clzv8hi (__a);
+  return (uint32x4_t) (__a < 0.0f);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclzq_s32 (int32x4_t __a)
+vcltzq_f64 (float64x2_t __a)
 {
-  return __builtin_aarch64_clzv4si (__a);
+  return (uint64x2_t) (__a < 0.0);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclzq_u8 (uint8x16_t __a)
+vcltzq_s8 (int8x16_t __a)
 {
-  return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a);
+  return (uint8x16_t) (__a < 0);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclzq_u16 (uint16x8_t __a)
+vcltzq_s16 (int16x8_t __a)
 {
-  return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a);
+  return (uint16x8_t) (__a < 0);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vclzq_u32 (uint32x4_t __a)
-{
-  return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
-}
-
-/* vcnt.  */
-
-__extension__ extern __inline poly8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcnt_p8 (poly8x8_t __a)
+vcltzq_s32 (int32x4_t __a)
 {
-  return (poly8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
+  return (uint32x4_t) (__a < 0);
 }
 
-__extension__ extern __inline int8x8_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcnt_s8 (int8x8_t __a)
+vcltzq_s64 (int64x2_t __a)
 {
-  return __builtin_aarch64_popcountv8qi (__a);
+  return (uint64x2_t) (__a < __AARCH64_INT64_C (0));
 }
 
-__extension__ extern __inline uint8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcnt_u8 (uint8x8_t __a)
-{
-  return (uint8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a);
-}
+/* vcltz - scalar.  */
 
-__extension__ extern __inline poly8x16_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcntq_p8 (poly8x16_t __a)
+vcltzs_f32 (float32_t __a)
 {
-  return (poly8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
+  return __a < 0.0f ? -1 : 0;
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcntq_s8 (int8x16_t __a)
+vcltzd_s64 (int64_t __a)
 {
-  return __builtin_aarch64_popcountv16qi (__a);
+  return __a < 0 ? -1ll : 0ll;
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcntq_u8 (uint8x16_t __a)
+vcltzd_f64 (float64_t __a)
 {
-  return (uint8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a);
+  return __a < 0.0 ? -1ll : 0ll;
 }
 
 /* vcvt (double -> float).  */
@@ -14902,106 +13925,6 @@ vmulq_n_u32 (uint32x4_t __a, uint32_t __b)
   return __a * __b;
 }
 
-/* vmvn  */
-
-__extension__ extern __inline poly8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvn_p8 (poly8x8_t __a)
-{
-  return (poly8x8_t) ~((int8x8_t) __a);
-}
-
-__extension__ extern __inline int8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvn_s8 (int8x8_t __a)
-{
-  return ~__a;
-}
-
-__extension__ extern __inline int16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvn_s16 (int16x4_t __a)
-{
-  return ~__a;
-}
-
-__extension__ extern __inline int32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvn_s32 (int32x2_t __a)
-{
-  return ~__a;
-}
-
-__extension__ extern __inline uint8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvn_u8 (uint8x8_t __a)
-{
-  return ~__a;
-}
-
-__extension__ extern __inline uint16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvn_u16 (uint16x4_t __a)
-{
-  return ~__a;
-}
-
-__extension__ extern __inline uint32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvn_u32 (uint32x2_t __a)
-{
-  return ~__a;
-}
-
-__extension__ extern __inline poly8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvnq_p8 (poly8x16_t __a)
-{
-  return (poly8x16_t) ~((int8x16_t) __a);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvnq_s8 (int8x16_t __a)
-{
-  return ~__a;
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvnq_s16 (int16x8_t __a)
-{
-  return ~__a;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvnq_s32 (int32x4_t __a)
-{
-  return ~__a;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvnq_u8 (uint8x16_t __a)
-{
-  return ~__a;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvnq_u16 (uint16x8_t __a)
-{
-  return ~__a;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmvnq_u32 (uint32x4_t __a)
-{
-  return ~__a;
-}
-
 /* vneg  */
 
 __extension__ extern __inline float32x2_t
@@ -17258,50 +16181,6 @@ vqtbx4q_p8 (poly8x16_t __r, poly8x16x4_t __tab, 
uint8x16_t __idx)
   return __builtin_aarch64_qtbx4v16qi_pppu (__r, __tab, __idx);
 }
 
-/* vrbit  */
-
-__extension__ extern __inline poly8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrbit_p8 (poly8x8_t __a)
-{
-  return (poly8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
-}
-
-__extension__ extern __inline int8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrbit_s8 (int8x8_t __a)
-{
-  return __builtin_aarch64_rbitv8qi (__a);
-}
-
-__extension__ extern __inline uint8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrbit_u8 (uint8x8_t __a)
-{
-  return (uint8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
-}
-
-__extension__ extern __inline poly8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrbitq_p8 (poly8x16_t __a)
-{
-  return (poly8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t)__a);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrbitq_s8 (int8x16_t __a)
-{
-  return __builtin_aarch64_rbitv16qi (__a);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrbitq_u8 (uint8x16_t __a)
-{
-  return (uint8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t) __a);
-}
-
 /* vrecpe  */
 
 __extension__ extern __inline uint32x2_t
@@ -24529,133 +23408,6 @@ vsha512su1q_u64 (uint64x2_t __a, uint64x2_t __b, 
uint64x2_t __c)
   return __builtin_aarch64_crypto_sha512su1qv2di_uuuu (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veor3q_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
-{
-  return __builtin_aarch64_eor3qv16qi_uuuu (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veor3q_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
-{
-  return __builtin_aarch64_eor3qv8hi_uuuu (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veor3q_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
-{
-  return __builtin_aarch64_eor3qv4si_uuuu (__a, __b, __c);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veor3q_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
-{
-  return __builtin_aarch64_eor3qv2di_uuuu (__a, __b, __c);
-}
-
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veor3q_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
-{
-  return __builtin_aarch64_eor3qv16qi (__a, __b, __c);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veor3q_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
-{
-  return __builtin_aarch64_eor3qv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veor3q_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
-{
-  return __builtin_aarch64_eor3qv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-veor3q_s64 (int64x2_t __a, int64x2_t __b, int64x2_t __c)
-{
-  return __builtin_aarch64_eor3qv2di (__a, __b, __c);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vrax1q_u64 (uint64x2_t __a, uint64x2_t __b)
-{
-  return __builtin_aarch64_rax1qv2di_uuu (__a, __b);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vxarq_u64 (uint64x2_t __a, uint64x2_t __b, const int __imm6)
-{
-  return __builtin_aarch64_xarqv2di_uuus (__a, __b, __imm6);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbcaxq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
-{
-  return __builtin_aarch64_bcaxqv16qi_uuuu (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbcaxq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
-{
-  return __builtin_aarch64_bcaxqv8hi_uuuu (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbcaxq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
-{
-  return __builtin_aarch64_bcaxqv4si_uuuu (__a, __b, __c);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbcaxq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
-{
-  return __builtin_aarch64_bcaxqv2di_uuuu (__a, __b, __c);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbcaxq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
-{
-  return __builtin_aarch64_bcaxqv16qi (__a, __b, __c);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbcaxq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
-{
-  return __builtin_aarch64_bcaxqv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbcaxq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
-{
-  return __builtin_aarch64_bcaxqv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vbcaxq_s64 (int64x2_t __a, int64x2_t __b, int64x2_t __c)
-{
-  return __builtin_aarch64_bcaxqv2di (__a, __b, __c);
-}
-
 #pragma GCC pop_options
 
 /* AdvSIMD Complex numbers intrinsics.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/neon/vand.c 
b/gcc/testsuite/gcc.target/aarch64/neon/vand.c
new file mode 100644
index 000000000000..fd85f8992e18
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neon/vand.c
@@ -0,0 +1,116 @@
+/* { dg-do compile } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon_test.h"
+
+/*
+** test_vand_u8:
+** and v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vand_u8, uint8x8_t)
+
+/*
+** test_vand_s8:
+** and v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vand_s8, int8x8_t)
+
+/*
+** test_vand_u16:
+** and v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vand_u16, uint16x4_t)
+
+/*
+** test_vand_s16:
+** and v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vand_s16, int16x4_t)
+
+/*
+** test_vand_u32:
+** and v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vand_u32, uint32x2_t)
+
+/*
+** test_vand_s32:
+** and v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vand_s32, int32x2_t)
+
+/*
+** test_vand_u64:
+** and v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vand_u64, uint64x1_t)
+
+/*
+** test_vand_s64:
+** and v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vand_s64, int64x1_t)
+
+/*
+** test_vandq_u8:
+** and v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vandq_u8, uint8x16_t)
+
+/*
+** test_vandq_s8:
+** and v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vandq_s8, int8x16_t)
+
+/*
+** test_vandq_u16:
+** and v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vandq_u16, uint16x8_t)
+
+/*
+** test_vandq_s16:
+** and v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vandq_s16, int16x8_t)
+
+/*
+** test_vandq_u32:
+** and v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vandq_u32, uint32x4_t)
+
+/*
+** test_vandq_s32:
+** and v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vandq_s32, int32x4_t)
+
+/*
+** test_vandq_u64:
+** and v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vandq_u64, uint64x2_t)
+
+/*
+** test_vandq_s64:
+** and v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vandq_s64, int64x2_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/neon/vbcax.c 
b/gcc/testsuite/gcc.target/aarch64/neon/vbcax.c
new file mode 100644
index 000000000000..ae61e65dc6a3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neon/vbcax.c
@@ -0,0 +1,60 @@
+/* { dg-do compile } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon_test.h"
+
+/*
+** test_vbcaxq_u8:
+** bcax        v0\.16b, v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_UNIFORM_TERNARY (vbcaxq_u8, uint8x16_t)
+
+/*
+** test_vbcaxq_u16:
+** bcax        v0\.16b, v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_UNIFORM_TERNARY (vbcaxq_u16, uint16x8_t)
+
+/*
+** test_vbcaxq_u32:
+** bcax        v0\.16b, v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_UNIFORM_TERNARY (vbcaxq_u32, uint32x4_t)
+
+/*
+** test_vbcaxq_u64:
+** bcax        v0\.16b, v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_UNIFORM_TERNARY (vbcaxq_u64, uint64x2_t)
+
+/*
+** test_vbcaxq_s8:
+** bcax        v0\.16b, v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_UNIFORM_TERNARY (vbcaxq_s8, int8x16_t)
+
+/*
+** test_vbcaxq_s16:
+** bcax        v0\.16b, v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_UNIFORM_TERNARY (vbcaxq_s16, int16x8_t)
+
+/*
+** test_vbcaxq_s32:
+** bcax        v0\.16b, v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_UNIFORM_TERNARY (vbcaxq_s32, int32x4_t)
+
+/*
+** test_vbcaxq_s64:
+** bcax        v0\.16b, v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_UNIFORM_TERNARY (vbcaxq_s64, int64x2_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/neon/vbic.c 
b/gcc/testsuite/gcc.target/aarch64/neon/vbic.c
new file mode 100644
index 000000000000..d67cb72fda18
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neon/vbic.c
@@ -0,0 +1,116 @@
+/* { dg-do compile } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon_test.h"
+
+/*
+** test_vbic_u8:
+** bic v0\.8b, v0\.8b, v1\.8b
+** ret
+*/
+TEST_UNIFORM_BINARY (vbic_u8, uint8x8_t)
+
+/*
+** test_vbic_s8:
+** bic v0\.8b, v0\.8b, v1\.8b
+** ret
+*/
+TEST_UNIFORM_BINARY (vbic_s8, int8x8_t)
+
+/*
+** test_vbic_u16:
+** bic v0\.8b, v0\.8b, v1\.8b
+** ret
+*/
+TEST_UNIFORM_BINARY (vbic_u16, uint16x4_t)
+
+/*
+** test_vbic_s16:
+** bic v0\.8b, v0\.8b, v1\.8b
+** ret
+*/
+TEST_UNIFORM_BINARY (vbic_s16, int16x4_t)
+
+/*
+** test_vbic_u32:
+** bic v0\.8b, v0\.8b, v1\.8b
+** ret
+*/
+TEST_UNIFORM_BINARY (vbic_u32, uint32x2_t)
+
+/*
+** test_vbic_s32:
+** bic v0\.8b, v0\.8b, v1\.8b
+** ret
+*/
+TEST_UNIFORM_BINARY (vbic_s32, int32x2_t)
+
+/*
+** test_vbic_u64:
+** bic v0\.8b, v0\.8b, v1\.8b
+** ret
+*/
+TEST_UNIFORM_BINARY (vbic_u64, uint64x1_t)
+
+/*
+** test_vbic_s64:
+** bic v0\.8b, v0\.8b, v1\.8b
+** ret
+*/
+TEST_UNIFORM_BINARY (vbic_s64, int64x1_t)
+
+/*
+** test_vbicq_u8:
+** bic v0\.16b, v0\.16b, v1\.16b
+** ret
+*/
+TEST_UNIFORM_BINARY (vbicq_u8, uint8x16_t)
+
+/*
+** test_vbicq_s8:
+** bic v0\.16b, v0\.16b, v1\.16b
+** ret
+*/
+TEST_UNIFORM_BINARY (vbicq_s8, int8x16_t)
+
+/*
+** test_vbicq_u16:
+** bic v0\.16b, v0\.16b, v1\.16b
+** ret
+*/
+TEST_UNIFORM_BINARY (vbicq_u16, uint16x8_t)
+
+/*
+** test_vbicq_s16:
+** bic v0\.16b, v0\.16b, v1\.16b
+** ret
+*/
+TEST_UNIFORM_BINARY (vbicq_s16, int16x8_t)
+
+/*
+** test_vbicq_u32:
+** bic v0\.16b, v0\.16b, v1\.16b
+** ret
+*/
+TEST_UNIFORM_BINARY (vbicq_u32, uint32x4_t)
+
+/*
+** test_vbicq_s32:
+** bic v0\.16b, v0\.16b, v1\.16b
+** ret
+*/
+TEST_UNIFORM_BINARY (vbicq_s32, int32x4_t)
+
+/*
+** test_vbicq_u64:
+** bic v0\.16b, v0\.16b, v1\.16b
+** ret
+*/
+TEST_UNIFORM_BINARY (vbicq_u64, uint64x2_t)
+
+/*
+** test_vbicq_s64:
+** bic v0\.16b, v0\.16b, v1\.16b
+** ret
+*/
+TEST_UNIFORM_BINARY (vbicq_s64, int64x2_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/neon/vbsl.c 
b/gcc/testsuite/gcc.target/aarch64/neon/vbsl.c
new file mode 100644
index 000000000000..9a677600ace6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neon/vbsl.c
@@ -0,0 +1,214 @@
+/* { dg-do compile } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon_test.h"
+
+/*
+** test_vbsl_u8:
+** bsl v0\.8b, v1\.8b, v2\.8b
+** ret
+*/
+TEST_TERNARY (vbsl_u8, uint8x8_t, uint8x8_t, uint8x8_t, uint8x8_t)
+
+/*
+** test_vbsl_s8:
+** bsl v0\.8b, v1\.8b, v2\.8b
+** ret
+*/
+TEST_TERNARY (vbsl_s8, int8x8_t, uint8x8_t, int8x8_t, int8x8_t)
+
+/*
+** test_vbsl_p8:
+** bsl v0\.8b, v1\.8b, v2\.8b
+** ret
+*/
+TEST_TERNARY (vbsl_p8, poly8x8_t, uint8x8_t, poly8x8_t, poly8x8_t)
+
+/*
+** test_vbsl_mf8:
+** bsl v0\.8b, v1\.8b, v2\.8b
+** ret
+*/
+TEST_TERNARY (vbsl_mf8, mfloat8x8_t, uint8x8_t, mfloat8x8_t, mfloat8x8_t)
+
+/*
+** test_vbsl_u16:
+** bsl v0\.8b, v1\.8b, v2\.8b
+** ret
+*/
+TEST_TERNARY (vbsl_u16, uint16x4_t, uint16x4_t, uint16x4_t, uint16x4_t)
+
+/*
+** test_vbsl_s16:
+** bsl v0\.8b, v1\.8b, v2\.8b
+** ret
+*/
+TEST_TERNARY (vbsl_s16, int16x4_t, uint16x4_t, int16x4_t, int16x4_t)
+
+/*
+** test_vbsl_p16:
+** bsl v0\.8b, v1\.8b, v2\.8b
+** ret
+*/
+TEST_TERNARY (vbsl_p16, poly16x4_t, uint16x4_t, poly16x4_t, poly16x4_t)
+
+/*
+** test_vbsl_f16:
+** bsl v0\.8b, v1\.8b, v2\.8b
+** ret
+*/
+TEST_TERNARY (vbsl_f16, float16x4_t, uint16x4_t, float16x4_t, float16x4_t)
+
+/*
+** test_vbsl_u32:
+** bsl v0\.8b, v1\.8b, v2\.8b
+** ret
+*/
+TEST_TERNARY (vbsl_u32, uint32x2_t, uint32x2_t, uint32x2_t, uint32x2_t)
+
+/*
+** test_vbsl_s32:
+** bsl v0\.8b, v1\.8b, v2\.8b
+** ret
+*/
+TEST_TERNARY (vbsl_s32, int32x2_t, uint32x2_t, int32x2_t, int32x2_t)
+
+/*
+** test_vbsl_f32:
+** bsl v0\.8b, v1\.8b, v2\.8b
+** ret
+*/
+TEST_TERNARY (vbsl_f32, float32x2_t, uint32x2_t, float32x2_t, float32x2_t)
+
+/*
+** test_vbsl_u64:
+** bsl v0\.8b, v1\.8b, v2\.8b
+** ret
+*/
+TEST_TERNARY (vbsl_u64, uint64x1_t, uint64x1_t, uint64x1_t, uint64x1_t)
+
+/*
+** test_vbsl_s64:
+** bsl v0\.8b, v1\.8b, v2\.8b
+** ret
+*/
+TEST_TERNARY (vbsl_s64, int64x1_t, uint64x1_t, int64x1_t, int64x1_t)
+
+/*
+** test_vbsl_p64:
+** bsl v0\.8b, v1\.8b, v2\.8b
+** ret
+*/
+TEST_TERNARY (vbsl_p64, poly64x1_t, uint64x1_t, poly64x1_t, poly64x1_t)
+
+/*
+** test_vbsl_f64:
+** bsl v0\.8b, v1\.8b, v2\.8b
+** ret
+*/
+TEST_TERNARY (vbsl_f64, float64x1_t, uint64x1_t, float64x1_t, float64x1_t)
+
+/*
+** test_vbslq_u8:
+** bsl v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_TERNARY (vbslq_u8, uint8x16_t, uint8x16_t, uint8x16_t, uint8x16_t)
+
+/*
+** test_vbslq_s8:
+** bsl v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_TERNARY (vbslq_s8, int8x16_t, uint8x16_t, int8x16_t, int8x16_t)
+
+/*
+** test_vbslq_p8:
+** bsl v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_TERNARY (vbslq_p8, poly8x16_t, uint8x16_t, poly8x16_t, poly8x16_t)
+
+/*
+** test_vbslq_mf8:
+** bsl v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_TERNARY (vbslq_mf8, mfloat8x16_t, uint8x16_t, mfloat8x16_t, mfloat8x16_t)
+
+/*
+** test_vbslq_u16:
+** bsl v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_TERNARY (vbslq_u16, uint16x8_t, uint16x8_t, uint16x8_t, uint16x8_t)
+
+/*
+** test_vbslq_s16:
+** bsl v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_TERNARY (vbslq_s16, int16x8_t, uint16x8_t, int16x8_t, int16x8_t)
+
+/*
+** test_vbslq_p16:
+** bsl v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_TERNARY (vbslq_p16, poly16x8_t, uint16x8_t, poly16x8_t, poly16x8_t)
+
+/*
+** test_vbslq_f16:
+** bsl v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_TERNARY (vbslq_f16, float16x8_t, uint16x8_t, float16x8_t, float16x8_t)
+
+/*
+** test_vbslq_u32:
+** bsl v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_TERNARY (vbslq_u32, uint32x4_t, uint32x4_t, uint32x4_t, uint32x4_t)
+
+/*
+** test_vbslq_s32:
+** bsl v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_TERNARY (vbslq_s32, int32x4_t, uint32x4_t, int32x4_t, int32x4_t)
+
+/*
+** test_vbslq_f32:
+** bsl v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_TERNARY (vbslq_f32, float32x4_t, uint32x4_t, float32x4_t, float32x4_t)
+
+/*
+** test_vbslq_u64:
+** bsl v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_TERNARY (vbslq_u64, uint64x2_t, uint64x2_t, uint64x2_t, uint64x2_t)
+
+/*
+** test_vbslq_s64:
+** bsl v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_TERNARY (vbslq_s64, int64x2_t, uint64x2_t, int64x2_t, int64x2_t)
+
+/*
+** test_vbslq_p64:
+** bsl v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_TERNARY (vbslq_p64, poly64x2_t, uint64x2_t, poly64x2_t, poly64x2_t)
+
+/*
+** test_vbslq_f64:
+** bsl v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_TERNARY (vbslq_f64, float64x2_t, uint64x2_t, float64x2_t, float64x2_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/neon/vcls.c 
b/gcc/testsuite/gcc.target/aarch64/neon/vcls.c
new file mode 100644
index 000000000000..83b3e2eb70c3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neon/vcls.c
@@ -0,0 +1,88 @@
+/* { dg-do compile } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon_test.h"
+
+/*
+** test_vcls_u8:
+** cls v0\.8b, v0\.8b
+** ret
+*/
+TEST_UNARY (vcls_u8, int8x8_t, uint8x8_t)
+
+/*
+** test_vcls_s8:
+** cls v0\.8b, v0\.8b
+** ret
+*/
+TEST_UNARY (vcls_s8, int8x8_t, int8x8_t)
+
+/*
+** test_vcls_u16:
+** cls v0\.4h, v0\.4h
+** ret
+*/
+TEST_UNARY (vcls_u16, int16x4_t, uint16x4_t)
+
+/*
+** test_vcls_s16:
+** cls v0\.4h, v0\.4h
+** ret
+*/
+TEST_UNARY (vcls_s16, int16x4_t, int16x4_t)
+
+/*
+** test_vcls_u32:
+** cls v0\.2s, v0\.2s
+** ret
+*/
+TEST_UNARY (vcls_u32, int32x2_t, uint32x2_t)
+
+/*
+** test_vcls_s32:
+** cls v0\.2s, v0\.2s
+** ret
+*/
+TEST_UNARY (vcls_s32, int32x2_t, int32x2_t)
+
+/*
+** test_vclsq_u8:
+** cls v0\.16b, v0\.16b
+** ret
+*/
+TEST_UNARY (vclsq_u8, int8x16_t, uint8x16_t)
+
+/*
+** test_vclsq_s8:
+** cls v0\.16b, v0\.16b
+** ret
+*/
+TEST_UNARY (vclsq_s8, int8x16_t, int8x16_t)
+
+/*
+** test_vclsq_u16:
+** cls v0\.8h, v0\.8h
+** ret
+*/
+TEST_UNARY (vclsq_u16, int16x8_t, uint16x8_t)
+
+/*
+** test_vclsq_s16:
+** cls v0\.8h, v0\.8h
+** ret
+*/
+TEST_UNARY (vclsq_s16, int16x8_t, int16x8_t)
+
+/*
+** test_vclsq_u32:
+** cls v0\.4s, v0\.4s
+** ret
+*/
+TEST_UNARY (vclsq_u32, int32x4_t, uint32x4_t)
+
+/*
+** test_vclsq_s32:
+** cls v0\.4s, v0\.4s
+** ret
+*/
+TEST_UNARY (vclsq_s32, int32x4_t, int32x4_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/neon/vclz.c 
b/gcc/testsuite/gcc.target/aarch64/neon/vclz.c
new file mode 100644
index 000000000000..ad806367e13e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neon/vclz.c
@@ -0,0 +1,88 @@
+/* { dg-do compile } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon_test.h"
+
+/*
+** test_vclz_u8:
+** clz v0\.8b, v0\.8b
+** ret
+*/
+TEST_UNARY (vclz_u8, uint8x8_t, uint8x8_t)
+
+/*
+** test_vclz_s8:
+** clz v0\.8b, v0\.8b
+** ret
+*/
+TEST_UNARY (vclz_s8, int8x8_t, int8x8_t)
+
+/*
+** test_vclz_u16:
+** clz v0\.4h, v0\.4h
+** ret
+*/
+TEST_UNARY (vclz_u16, uint16x4_t, uint16x4_t)
+
+/*
+** test_vclz_s16:
+** clz v0\.4h, v0\.4h
+** ret
+*/
+TEST_UNARY (vclz_s16, int16x4_t, int16x4_t)
+
+/*
+** test_vclz_u32:
+** clz v0\.2s, v0\.2s
+** ret
+*/
+TEST_UNARY (vclz_u32, uint32x2_t, uint32x2_t)
+
+/*
+** test_vclz_s32:
+** clz v0\.2s, v0\.2s
+** ret
+*/
+TEST_UNARY (vclz_s32, int32x2_t, int32x2_t)
+
+/*
+** test_vclzq_u8:
+** clz v0\.16b, v0\.16b
+** ret
+*/
+TEST_UNARY (vclzq_u8, uint8x16_t, uint8x16_t)
+
+/*
+** test_vclzq_s8:
+** clz v0\.16b, v0\.16b
+** ret
+*/
+TEST_UNARY (vclzq_s8, int8x16_t, int8x16_t)
+
+/*
+** test_vclzq_u16:
+** clz v0\.8h, v0\.8h
+** ret
+*/
+TEST_UNARY (vclzq_u16, uint16x8_t, uint16x8_t)
+
+/*
+** test_vclzq_s16:
+** clz v0\.8h, v0\.8h
+** ret
+*/
+TEST_UNARY (vclzq_s16, int16x8_t, int16x8_t)
+
+/*
+** test_vclzq_u32:
+** clz v0\.4s, v0\.4s
+** ret
+*/
+TEST_UNARY (vclzq_u32, uint32x4_t, uint32x4_t)
+
+/*
+** test_vclzq_s32:
+** clz v0\.4s, v0\.4s
+** ret
+*/
+TEST_UNARY (vclzq_s32, int32x4_t, int32x4_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/neon/vcnt.c 
b/gcc/testsuite/gcc.target/aarch64/neon/vcnt.c
new file mode 100644
index 000000000000..9e1ce67012f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neon/vcnt.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon_test.h"
+
+/*
+** test_vcnt_u8:
+** cnt v0\.8b, v0\.8b
+** ret
+*/
+TEST_UNARY (vcnt_u8, uint8x8_t, uint8x8_t)
+
+/*
+** test_vcnt_s8:
+** cnt v0\.8b, v0\.8b
+** ret
+*/
+TEST_UNARY (vcnt_s8, int8x8_t, int8x8_t)
+
+/*
+** test_vcnt_p8:
+** cnt v0\.8b, v0\.8b
+** ret
+*/
+TEST_UNARY (vcnt_p8, poly8x8_t, poly8x8_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/neon/veor.c 
b/gcc/testsuite/gcc.target/aarch64/neon/veor.c
new file mode 100644
index 000000000000..fd2f4836929e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neon/veor.c
@@ -0,0 +1,116 @@
+/* { dg-do compile } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon_test.h"
+
+/*
+** test_veor_u8:
+** eor v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (veor_u8, uint8x8_t)
+
+/*
+** test_veor_s8:
+** eor v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (veor_s8, int8x8_t)
+
+/*
+** test_veor_u16:
+** eor v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (veor_u16, uint16x4_t)
+
+/*
+** test_veor_s16:
+** eor v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (veor_s16, int16x4_t)
+
+/*
+** test_veor_u32:
+** eor v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (veor_u32, uint32x2_t)
+
+/*
+** test_veor_s32:
+** eor v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (veor_s32, int32x2_t)
+
+/*
+** test_veor_u64:
+** eor v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (veor_u64, uint64x1_t)
+
+/*
+** test_veor_s64:
+** eor v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (veor_s64, int64x1_t)
+
+/*
+** test_veorq_u8:
+** eor v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (veorq_u8, uint8x16_t)
+
+/*
+** test_veorq_s8:
+** eor v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (veorq_s8, int8x16_t)
+
+/*
+** test_veorq_u16:
+** eor v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (veorq_u16, uint16x8_t)
+
+/*
+** test_veorq_s16:
+** eor v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (veorq_s16, int16x8_t)
+
+/*
+** test_veorq_u32:
+** eor v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (veorq_u32, uint32x4_t)
+
+/*
+** test_veorq_s32:
+** eor v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (veorq_s32, int32x4_t)
+
+/*
+** test_veorq_u64:
+** eor v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (veorq_u64, uint64x2_t)
+
+/*
+** test_veorq_s64:
+** eor v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (veorq_s64, int64x2_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/neon/veor3.c 
b/gcc/testsuite/gcc.target/aarch64/neon/veor3.c
new file mode 100644
index 000000000000..bda4040e5e54
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neon/veor3.c
@@ -0,0 +1,60 @@
+/* { dg-do compile } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon_test.h"
+
+/*
+** test_veor3q_u8:
+** eor3        v0\.16b, v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_UNIFORM_TERNARY (veor3q_u8, uint8x16_t)
+
+/*
+** test_veor3q_u16:
+** eor3        v0\.16b, v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_UNIFORM_TERNARY (veor3q_u16, uint16x8_t)
+
+/*
+** test_veor3q_u32:
+** eor3        v0\.16b, v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_UNIFORM_TERNARY (veor3q_u32, uint32x4_t)
+
+/*
+** test_veor3q_u64:
+** eor3        v0\.16b, v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_UNIFORM_TERNARY (veor3q_u64, uint64x2_t)
+
+/*
+** test_veor3q_s8:
+** eor3        v0\.16b, v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_UNIFORM_TERNARY (veor3q_s8, int8x16_t)
+
+/*
+** test_veor3q_s16:
+** eor3        v0\.16b, v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_UNIFORM_TERNARY (veor3q_s16, int16x8_t)
+
+/*
+** test_veor3q_s32:
+** eor3        v0\.16b, v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_UNIFORM_TERNARY (veor3q_s32, int32x4_t)
+
+/*
+** test_veor3q_s64:
+** eor3        v0\.16b, v0\.16b, v1\.16b, v2\.16b
+** ret
+*/
+TEST_UNIFORM_TERNARY (veor3q_s64, int64x2_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/neon/vmvn.c 
b/gcc/testsuite/gcc.target/aarch64/neon/vmvn.c
new file mode 100644
index 000000000000..83a591408bb3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neon/vmvn.c
@@ -0,0 +1,102 @@
+/* { dg-do compile } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon_test.h"
+
+/*
+** test_vmvn_u8:
+** not v0\.8b, v0\.8b
+** ret
+*/
+TEST_UNIFORM_UNARY (vmvn_u8, uint8x8_t)
+
+/*
+** test_vmvn_s8:
+** not v0\.8b, v0\.8b
+** ret
+*/
+TEST_UNIFORM_UNARY (vmvn_s8, int8x8_t)
+
+/*
+** test_vmvn_p8:
+** not v0\.8b, v0\.8b
+** ret
+*/
+TEST_UNIFORM_UNARY (vmvn_p8, poly8x8_t)
+
+/*
+** test_vmvn_u16:
+** not v0\.8b, v0\.8b
+** ret
+*/
+TEST_UNIFORM_UNARY (vmvn_u16, uint16x4_t)
+
+/*
+** test_vmvn_s16:
+** not v0\.8b, v0\.8b
+** ret
+*/
+TEST_UNIFORM_UNARY (vmvn_s16, int16x4_t)
+
+/*
+** test_vmvn_u32:
+** not v0\.8b, v0\.8b
+** ret
+*/
+TEST_UNIFORM_UNARY (vmvn_u32, uint32x2_t)
+
+/*
+** test_vmvn_s32:
+** not v0\.8b, v0\.8b
+** ret
+*/
+TEST_UNIFORM_UNARY (vmvn_s32, int32x2_t)
+
+/*
+** test_vmvnq_u8:
+** not v0\.16b, v0\.16b
+** ret
+*/
+TEST_UNIFORM_UNARY (vmvnq_u8, uint8x16_t)
+
+/*
+** test_vmvnq_s8:
+** not v0\.16b, v0\.16b
+** ret
+*/
+TEST_UNIFORM_UNARY (vmvnq_s8, int8x16_t)
+
+/*
+** test_vmvnq_p8:
+** not v0\.16b, v0\.16b
+** ret
+*/
+TEST_UNIFORM_UNARY (vmvnq_p8, poly8x16_t)
+
+/*
+** test_vmvnq_u16:
+** not v0\.16b, v0\.16b
+** ret
+*/
+TEST_UNIFORM_UNARY (vmvnq_u16, uint16x8_t)
+
+/*
+** test_vmvnq_s16:
+** not v0\.16b, v0\.16b
+** ret
+*/
+TEST_UNIFORM_UNARY (vmvnq_s16, int16x8_t)
+
+/*
+** test_vmvnq_u32:
+** not v0\.16b, v0\.16b
+** ret
+*/
+TEST_UNIFORM_UNARY (vmvnq_u32, uint32x4_t)
+
+/*
+** test_vmvnq_s32:
+** not v0\.16b, v0\.16b
+** ret
+*/
+TEST_UNIFORM_UNARY (vmvnq_s32, int32x4_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/neon/vorn.c 
b/gcc/testsuite/gcc.target/aarch64/neon/vorn.c
new file mode 100644
index 000000000000..fd6c13c11408
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neon/vorn.c
@@ -0,0 +1,116 @@
+/* { dg-do compile } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon_test.h"
+
+/*
+** test_vorn_u8:
+** orn v0\.8b, v0\.8b, v1\.8b
+** ret
+*/
+TEST_UNIFORM_BINARY (vorn_u8, uint8x8_t)
+
+/*
+** test_vorn_s8:
+** orn v0\.8b, v0\.8b, v1\.8b
+** ret
+*/
+TEST_UNIFORM_BINARY (vorn_s8, int8x8_t)
+
+/*
+** test_vorn_u16:
+** orn v0\.8b, v0\.8b, v1\.8b
+** ret
+*/
+TEST_UNIFORM_BINARY (vorn_u16, uint16x4_t)
+
+/*
+** test_vorn_s16:
+** orn v0\.8b, v0\.8b, v1\.8b
+** ret
+*/
+TEST_UNIFORM_BINARY (vorn_s16, int16x4_t)
+
+/*
+** test_vorn_u32:
+** orn v0\.8b, v0\.8b, v1\.8b
+** ret
+*/
+TEST_UNIFORM_BINARY (vorn_u32, uint32x2_t)
+
+/*
+** test_vorn_s32:
+** orn v0\.8b, v0\.8b, v1\.8b
+** ret
+*/
+TEST_UNIFORM_BINARY (vorn_s32, int32x2_t)
+
+/*
+** test_vorn_u64:
+** orn v0\.8b, v0\.8b, v1\.8b
+** ret
+*/
+TEST_UNIFORM_BINARY (vorn_u64, uint64x1_t)
+
+/*
+** test_vorn_s64:
+** orn v0\.8b, v0\.8b, v1\.8b
+** ret
+*/
+TEST_UNIFORM_BINARY (vorn_s64, int64x1_t)
+
+/*
+** test_vornq_u8:
+** orn v0\.16b, v0\.16b, v1\.16b
+** ret
+*/
+TEST_UNIFORM_BINARY (vornq_u8, uint8x16_t)
+
+/*
+** test_vornq_s8:
+** orn v0\.16b, v0\.16b, v1\.16b
+** ret
+*/
+TEST_UNIFORM_BINARY (vornq_s8, int8x16_t)
+
+/*
+** test_vornq_u16:
+** orn v0\.16b, v0\.16b, v1\.16b
+** ret
+*/
+TEST_UNIFORM_BINARY (vornq_u16, uint16x8_t)
+
+/*
+** test_vornq_s16:
+** orn v0\.16b, v0\.16b, v1\.16b
+** ret
+*/
+TEST_UNIFORM_BINARY (vornq_s16, int16x8_t)
+
+/*
+** test_vornq_u32:
+** orn v0\.16b, v0\.16b, v1\.16b
+** ret
+*/
+TEST_UNIFORM_BINARY (vornq_u32, uint32x4_t)
+
+/*
+** test_vornq_s32:
+** orn v0\.16b, v0\.16b, v1\.16b
+** ret
+*/
+TEST_UNIFORM_BINARY (vornq_s32, int32x4_t)
+
+/*
+** test_vornq_u64:
+** orn v0\.16b, v0\.16b, v1\.16b
+** ret
+*/
+TEST_UNIFORM_BINARY (vornq_u64, uint64x2_t)
+
+/*
+** test_vornq_s64:
+** orn v0\.16b, v0\.16b, v1\.16b
+** ret
+*/
+TEST_UNIFORM_BINARY (vornq_s64, int64x2_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/neon/vorr.c 
b/gcc/testsuite/gcc.target/aarch64/neon/vorr.c
new file mode 100644
index 000000000000..d2c7b6b2c3db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neon/vorr.c
@@ -0,0 +1,116 @@
+/* { dg-do compile } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon_test.h"
+
+/*
+** test_vorr_u8:
+** orr v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vorr_u8, uint8x8_t)
+
+/*
+** test_vorr_s8:
+** orr v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vorr_s8, int8x8_t)
+
+/*
+** test_vorr_u16:
+** orr v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vorr_u16, uint16x4_t)
+
+/*
+** test_vorr_s16:
+** orr v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vorr_s16, int16x4_t)
+
+/*
+** test_vorr_u32:
+** orr v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vorr_u32, uint32x2_t)
+
+/*
+** test_vorr_s32:
+** orr v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vorr_s32, int32x2_t)
+
+/*
+** test_vorr_u64:
+** orr v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vorr_u64, uint64x1_t)
+
+/*
+** test_vorr_s64:
+** orr v0\.8b, (v0\.8b, v1\.8b|v1\.8b, v0\.8b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vorr_s64, int64x1_t)
+
+/*
+** test_vorrq_u8:
+** orr v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vorrq_u8, uint8x16_t)
+
+/*
+** test_vorrq_s8:
+** orr v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vorrq_s8, int8x16_t)
+
+/*
+** test_vorrq_u16:
+** orr v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vorrq_u16, uint16x8_t)
+
+/*
+** test_vorrq_s16:
+** orr v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vorrq_s16, int16x8_t)
+
+/*
+** test_vorrq_u32:
+** orr v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vorrq_u32, uint32x4_t)
+
+/*
+** test_vorrq_s32:
+** orr v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vorrq_s32, int32x4_t)
+
+/*
+** test_vorrq_u64:
+** orr v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vorrq_u64, uint64x2_t)
+
+/*
+** test_vorrq_s64:
+** orr v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+TEST_UNIFORM_BINARY (vorrq_s64, int64x2_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/neon/vrax1.c 
b/gcc/testsuite/gcc.target/aarch64/neon/vrax1.c
new file mode 100644
index 000000000000..0f5fdd088b4a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neon/vrax1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon_test.h"
+
+/*
+** test_vrax1q_u64:
+** rax1        v0\.2d, v0\.2d, v1\.2d
+** ret
+*/
+TEST_UNIFORM_BINARY (vrax1q_u64, uint64x2_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/neon/vrbit.c 
b/gcc/testsuite/gcc.target/aarch64/neon/vrbit.c
new file mode 100644
index 000000000000..9168d54c1108
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neon/vrbit.c
@@ -0,0 +1,46 @@
+/* { dg-do compile } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon_test.h"
+
+/*
+** test_vrbit_u8:
+** rbit        v0\.8b, v0\.8b
+** ret
+*/
+TEST_UNIFORM_UNARY (vrbit_u8, uint8x8_t)
+
+/*
+** test_vrbit_s8:
+** rbit        v0\.8b, v0\.8b
+** ret
+*/
+TEST_UNIFORM_UNARY (vrbit_s8, int8x8_t)
+
+/*
+** test_vrbit_p8:
+** rbit        v0\.8b, v0\.8b
+** ret
+*/
+TEST_UNIFORM_UNARY (vrbit_p8, poly8x8_t)
+
+/*
+** test_vrbitq_u8:
+** rbit        v0\.16b, v0\.16b
+** ret
+*/
+TEST_UNIFORM_UNARY (vrbitq_u8, uint8x16_t)
+
+/*
+** test_vrbitq_s8:
+** rbit        v0\.16b, v0\.16b
+** ret
+*/
+TEST_UNIFORM_UNARY (vrbitq_s8, int8x16_t)
+
+/*
+** test_vrbitq_p8:
+** rbit        v0\.16b, v0\.16b
+** ret
+*/
+TEST_UNIFORM_UNARY (vrbitq_p8, poly8x16_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/neon/vxar.c 
b/gcc/testsuite/gcc.target/aarch64/neon/vxar.c
new file mode 100644
index 000000000000..5893a83214d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neon/vxar.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon_test.h"
+
+/*
+** test_vxarq_u64_0:
+** eor v0\.16b, (v0\.16b, v1\.16b|v1\.16b, v0\.16b)
+** ret
+*/
+uint64x2_t test_vxarq_u64_0 (uint64x2_t a, uint64x2_t b) { return vxarq_u64 
(a, b, 0); }
+
+/*
+** test_vxarq_u64_1:
+** xar v0\.2d, v0\.2d, v1\.2d, #?1
+** ret
+*/
+uint64x2_t test_vxarq_u64_1 (uint64x2_t a, uint64x2_t b) { return vxarq_u64 
(a, b, 1); }
+
+/*
+** test_vxarq_u64_31:
+** xar v0\.2d, v0\.2d, v1\.2d, #?31
+** ret
+*/
+uint64x2_t test_vxarq_u64_31 (uint64x2_t a, uint64x2_t b) { return vxarq_u64 
(a, b, 31); }
diff --git a/gcc/testsuite/gcc.target/aarch64/sha3_1.c 
b/gcc/testsuite/gcc.target/aarch64/sha3_1.c
index cf02865bfe85..189ee470c7dc 100644
--- a/gcc/testsuite/gcc.target/aarch64/sha3_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sha3_1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=armv8.2-a+sha3" } */
+/* { dg-options "-O1 -march=armv8.2-a+sha3" } */
 
 #include "sha3.h"
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sha3_2.c 
b/gcc/testsuite/gcc.target/aarch64/sha3_2.c
index 8b085cbe9803..c73ecb08ce65 100644
--- a/gcc/testsuite/gcc.target/aarch64/sha3_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sha3_2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=armv8.3-a+sha3" } */
+/* { dg-options "-O1 -march=armv8.3-a+sha3" } */
 
 #include "sha3.h"
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sha3_3.c 
b/gcc/testsuite/gcc.target/aarch64/sha3_3.c
index 51ae0a4da6bb..74236ffeb2bf 100644
--- a/gcc/testsuite/gcc.target/aarch64/sha3_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sha3_3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=armv8.4-a+sha3" } */
+/* { dg-options "-O1 -march=armv8.4-a+sha3" } */
 
 #include "sha3.h"
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_10.c 
b/gcc/testsuite/gcc.target/aarch64/sme/inlining_10.c
index 78e737e2f40b..05396a1b6b11 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/inlining_10.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_10.c
@@ -17,8 +17,10 @@ call_vadd ()
   neon[4] = vaddq_u8 (neon[5], neon[6]);
 }
 
-inline void __attribute__((always_inline))
-call_vbsl () // { dg-error "inlining failed" }
+// Gets expanded to bitwise select early, so no error.  An error would be
+// more correct though.
+inline void __attribute__ ((always_inline))
+call_vbsl ()
 {
   neon[0] = vbslq_u8 (neon[1], neon[2], neon[3]);
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_11.c 
b/gcc/testsuite/gcc.target/aarch64/sme/inlining_11.c
index 0cd3487973e3..5ece89372d87 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/inlining_11.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_11.c
@@ -17,8 +17,10 @@ call_vadd ()
   neon[4] = vaddq_u8 (neon[5], neon[6]);
 }
 
-inline void __attribute__((always_inline))
-call_vbsl () // { dg-error "inlining failed" }
+// Gets expanded to bitwise select early, so no error.  An error would be
+// more correct though.
+inline void __attribute__ ((always_inline))
+call_vbsl ()
 {
   neon[0] = vbslq_u8 (neon[1], neon[2], neon[3]);
 }
@@ -51,7 +53,6 @@ void
 sc_caller () [[arm::inout("za"), arm::streaming]]
 {
   call_vadd ();
-  call_vbsl ();
   call_svadd ();
   call_svld1_gather ();
   call_svzero ();
diff --git a/gcc/testsuite/gcc.target/aarch64/target_attr_10.c 
b/gcc/testsuite/gcc.target/aarch64/target_attr_10.c
index d96a8733a575..fd02607ddfcb 100644
--- a/gcc/testsuite/gcc.target/aarch64/target_attr_10.c
+++ b/gcc/testsuite/gcc.target/aarch64/target_attr_10.c
@@ -10,7 +10,5 @@ __attribute__ ((target ("+nosimd")))
 uint8x16_t
 foo (uint8x16_t a, uint8x16_t b, uint8x16_t c)
 {
-  return vbslq_u8 (a, b, c); /* { dg-message "called from here" } */
+  return vbslq_u8 (a, b, c); /* { dg-error {ACLE function 'vbslq_u8' requires 
ISA extension 'simd'} } */
 }
-
-/* { dg-error "inlining failed in call to 'always_inline'" "" { target *-*-* } 
0 } */
-- 
2.54.0

Reply via email to