From: Karl Meakin <[email protected]>
Port the following intrinsics to the pragma-based framework:
* vcreate
* vcombine
* vdup_n
* vmov_n
gcc/ChangeLog:
* config/aarch64/aarch64-simd-builtins.def (combine): Delete
function declarations.
* config/aarch64/aarch64-simd-pragma-builtins.def (vcombine_mf8,
vcreate_mf8, vdup_n_mf8, vdupq_n_mf8, vmov_n_mf8,
vmovq_n_mf8,): Delete entries.
(aarch64_expand_pragma_builtin): Delete cases for
`UNSPEC_COMBINE` and `UNSPEC_VCREATE`.
(aarch64_fold_combine): Delete function.
(aarch64_gimple_fold_pragma_builtin): Delete cases for
`UNSPEC_COMBINE` and `UNSPEC_VCREATE`.
(aarch64_general_gimple_fold_builtin): Delete cases for
`vcombine`.
* config/aarch64/aarch64.md (UNSPEC_COMBINE): Delete unspec.
* config/aarch64/aarch64-acle-builtins.cc
(gimple_folder::force_val, gimple_folder::assign): New
functions.
* config/aarch64/aarch64-acle-builtins.h (TYPES_all_neon,
TYPES_bhsd_neon): New type lists.
* config/aarch64/aarch64-neon-builtins-base.def (vcreate,
vcombine, vdup_n, vdupq_n, vmov_n, vmovq_n): New function groups.
* config/aarch64/aarch64-neon-builtins-base.cc (build_vec_dup): New
function.
(struct gimple_create, struct gimple_combine, struct struct
gimple_dup): New structs.
(vcreate, vcombine, vdup_n, vdupq_n, vmov_n, vmovq_n): New
function bases.
* config/aarch64/arm_neon.h (vcreate_s8, vcreate_s16,
vcreate_s32, vcreate_s64, vcreate_f16, vcreate_f32,
vcreate_u8, vcreate_u16, vcreate_u32, vcreate_u64,
vcreate_f64, vcreate_p8, vcreate_p16, vcreate_p64,
vcombine_s8, vcombine_s16, vcombine_s32, vcombine_s64,
vcombine_f16, vcombine_f32, vcombine_u8, vcombine_u16,
vcombine_u32, vcombine_u64, vcombine_f64, vcombine_p8,
vcombine_p16, vcombine_p64, vdup_n_f16, vdup_n_f32,
vdup_n_f64, vdup_n_p8, vdup_n_p16, vdup_n_p64, vdup_n_s8,
vdup_n_s16, vdup_n_s32, vdup_n_s64, vdup_n_u8, vdup_n_u16,
vdup_n_u32, vdup_n_u64, vdupq_n_f16, vdupq_n_f32, vdupq_n_f64,
vdupq_n_p8, vdupq_n_p16, vdupq_n_p64, vdupq_n_s8, vdupq_n_s16,
vdupq_n_s32, vdupq_n_s64, vdupq_n_u8, vdupq_n_u16,
vdupq_n_u32, vdupq_n_u64, vmov_n_f16, vmov_n_f32, vmov_n_f64,
vmov_n_p8, vmov_n_p16, vmov_n_p64, vmov_n_s8, vmov_n_s16,
vmov_n_s32, vmov_n_s64, vmov_n_u8, vmov_n_u16, vmov_n_u32,
vmov_n_u64, vmovq_n_f16, vmovq_n_f32, vmovq_n_f64, vmovq_n_p8,
vmovq_n_p16, vmovq_n_p64, vmovq_n_s8, vmovq_n_s16,
vmovq_n_s32, vmovq_n_s64, vmovq_n_u8, vmovq_n_u16,
vmovq_n_u32, vmovq_n_u64, vcreate_bf16, vcombine_bf16,
vdup_n_bf16, vdupq_n_bf16): Delete functions.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/advsimd-intrinsics/bf16_dup.c: Fix test.
* gcc.target/aarch64/vmov_n_1.c: Fix test.
* gcc.target/aarch64/neon/vcombine.c: New test.
* gcc.target/aarch64/neon/vcreate.c: New test.
* gcc.target/aarch64/neon/vdup.c: New test.
* gcc.target/aarch64/neon/vmov_n.c: New test.
---
gcc/config/aarch64/aarch64-acle-builtins.cc | 15 +
gcc/config/aarch64/aarch64-acle-builtins.h | 24 +
gcc/config/aarch64/aarch64-builtins.cc | 43 --
.../aarch64/aarch64-neon-builtins-base.cc | 63 ++
.../aarch64/aarch64-neon-builtins-base.def | 10 +
gcc/config/aarch64/aarch64-simd-builtins.def | 3 -
.../aarch64/aarch64-simd-pragma-builtins.def | 27 -
gcc/config/aarch64/aarch64.md | 2 -
gcc/config/aarch64/arm_neon.h | 628 ------------------
.../aarch64/advsimd-intrinsics/bf16_dup.c | 7 +-
.../gcc.target/aarch64/neon/vcombine.c | 122 ++++
.../gcc.target/aarch64/neon/vcreate.c | 119 ++++
gcc/testsuite/gcc.target/aarch64/neon/vdup.c | 226 +++++++
.../gcc.target/aarch64/neon/vmov_n.c | 212 ++++++
gcc/testsuite/gcc.target/aarch64/vmov_n_1.c | 2 +-
15 files changed, 796 insertions(+), 707 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/neon/vcombine.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/neon/vcreate.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/neon/vdup.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/neon/vmov_n.c
diff --git a/gcc/config/aarch64/aarch64-acle-builtins.cc
b/gcc/config/aarch64/aarch64-acle-builtins.cc
index d065457c0118..f441509ea0e6 100644
--- a/gcc/config/aarch64/aarch64-acle-builtins.cc
+++ b/gcc/config/aarch64/aarch64-acle-builtins.cc
@@ -3091,6 +3091,21 @@ gimple_folder::fold ()
return base->fold (*this);
}
+/* Force EXPR to be a GIMPLE value, putting it into a new variable if
+ necessary. */
+tree
+gimple_folder::force_val (tree expr)
+{
+ if (is_gimple_val (expr))
+ return expr;
+
+ auto type = TREE_TYPE (expr);
+ auto var = create_tmp_var (type);
+ auto stmt = gimple_build_assign (var, expr);
+ gsi_insert_before (this->gsi, stmt, GSI_SAME_STMT);
+ return var;
+}
+
function_expander::function_expander (const function_instance &instance,
tree fndecl, tree call_expr_in,
rtx possible_target_in)
diff --git a/gcc/config/aarch64/aarch64-acle-builtins.h
b/gcc/config/aarch64/aarch64-acle-builtins.h
index 5e90ba21bf39..150f73d21b7f 100644
--- a/gcc/config/aarch64/aarch64-acle-builtins.h
+++ b/gcc/config/aarch64/aarch64-acle-builtins.h
@@ -659,6 +659,11 @@ public:
gimple *convert_and_fold (tree, gimple *(*) (gimple_folder &,
tree, vec<tree> &));
+ tree force_val (tree expr);
+ gassign *assign (tree lhs, tree rhs);
+ gassign *assign (tree lhs, tree_code code, tree rhs1, tree rhs2 = NULL_TREE,
+ tree rhs3 = NULL_TREE);
+
gimple *fold_to_cstu (poly_uint64);
gimple *fold_to_pfalse ();
gimple *fold_to_ptrue ();
@@ -1762,6 +1767,14 @@ function_expander::result_mode () const
#define TYPES_za(S, D, T) \
S (za)
+/* _p8 _s8 _u8 _mf8
+ _p16 _s16 _u16 _f16 _bf16
+ _s32 _u32 _f32
+ _p64 _s64 _u64 _f64. */
+#define TYPES_all_neon(S, D, T) \
+ TYPES_bhsd_neon (S, D, T), \
+ TYPES_h_bfloat (S, D, T)
+
/* _p8 _p16 _p64. */
#define TYPES_bhd_poly(S, D, T) \
S (p8), S (p16), S (p64)
@@ -1770,6 +1783,15 @@ function_expander::result_mode () const
#define TYPES_bhdq_poly(S, D, T) \
S (p8), S (p16), S (p64), S (p128)
+/* _p8 _s8 _u8 _mf8
+ _p16 _s16 _u16 _f16
+ _s32 _u32 _f32
+ _p64 _s64 _u64 _f64. */
+#define TYPES_bhsd_neon(S, D, T) \
+ TYPES_bhd_poly (S, D, T), S (mf8), \
+ TYPES_all_integer (S, D, T), \
+ TYPES_all_float (S, D, T)
+
/* Describe a tuple of type suffixes in which only the first is used. */
#define DEF_VECTOR_TYPE(X) \
{ TYPE_SUFFIX_ ## X, NUM_TYPE_SUFFIXES, NUM_TYPE_SUFFIXES }
@@ -1904,8 +1926,10 @@ DEF_SVE_TYPES_ARRAY (mop_i16i64_signed);
DEF_SVE_TYPES_ARRAY (mop_i16i64_unsigned);
DEF_SVE_TYPES_ARRAY (za);
+DEF_SVE_TYPES_ARRAY (all_neon);
DEF_SVE_TYPES_ARRAY (bhd_poly);
DEF_SVE_TYPES_ARRAY (bhdq_poly);
+DEF_SVE_TYPES_ARRAY (bhsd_neon);
static const group_suffix_index groups_none[] = {
GROUP_none, NUM_GROUP_SUFFIXES
diff --git a/gcc/config/aarch64/aarch64-builtins.cc
b/gcc/config/aarch64/aarch64-builtins.cc
index e34fa4a59d8e..6497db6f1b06 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -4289,10 +4289,6 @@ aarch64_expand_pragma_builtin (tree exp, rtx target,
icode = code_for_aarch64_simd_bsl (ops[0].mode);
break;
- case UNSPEC_COMBINE:
- icode = code_for_aarch64_combine (ops[1].mode);
- break;
-
case UNSPEC_DUP:
if (builtin_data.signature == aarch64_builtin_signatures::load)
aarch64_dereference_pointer (&ops[1], GET_MODE_INNER (ops[0].mode));
@@ -4497,9 +4493,6 @@ aarch64_expand_pragma_builtin (tree exp, rtx target,
case UNSPEC_UZP:
return aarch64_expand_permute_pair (ops, UNSPEC_UZP1, UNSPEC_UZP2);
- case UNSPEC_VCREATE:
- return force_lowpart_subreg (ops[0].mode, ops[1].value, ops[1].mode);
-
case UNSPEC_VEC_COPY:
{
aarch64_convert_to_lane_mask (&ops[2], ops[1].mode);
@@ -5055,27 +5048,6 @@ aarch64_set_lane (tree lhs, tree elt, tree vec, tree
lane)
return gimple_build_assign (lhs, BIT_INSERT_EXPR, vec, elt, bit);
}
-/* Fold a call to vcombine. */
-static gimple *
-aarch64_fold_combine (gcall *stmt)
-{
- tree first_part, second_part;
- if (BYTES_BIG_ENDIAN)
- {
- second_part = gimple_call_arg (stmt, 0);
- first_part = gimple_call_arg (stmt, 1);
- }
- else
- {
- first_part = gimple_call_arg (stmt, 0);
- second_part = gimple_call_arg (stmt, 1);
- }
- tree ret_type = gimple_call_return_type (stmt);
- tree ctor = build_constructor_va (ret_type, 2, NULL_TREE, first_part,
- NULL_TREE, second_part);
- return gimple_build_assign (gimple_call_lhs (stmt), ctor);
-}
-
/* Fold a call to vaeseq_u8 and vaesdq_u8.
That is `vaeseq_u8 (x ^ y, 0)` gets folded
into `vaeseq_u8 (x, y)`.*/
@@ -5219,9 +5191,6 @@ aarch64_gimple_fold_pragma_builtin
switch (builtin_data.unspec)
{
- case UNSPEC_COMBINE:
- return aarch64_fold_combine (stmt);
-
case UNSPEC_DUP:
case UNSPEC_DUP_LANE:
{
@@ -5309,12 +5278,6 @@ aarch64_gimple_fold_pragma_builtin
case UNSPEC_UZP2:
return aarch64_fold_permute (stmt, 2, aarch64_uzp_index, 1);
- case UNSPEC_VCREATE:
- return gimple_build_assign (gimple_call_lhs (stmt),
- fold_build1 (VIEW_CONVERT_EXPR,
- types[0].type (),
- gimple_call_arg (stmt, 0)));
-
case UNSPEC_VEC_COPY:
{
tree elt = aarch64_get_lane (gimple_call_arg (stmt, 2),
@@ -5534,12 +5497,6 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode,
gcall *stmt,
gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
break;
- BUILTIN_VDC (BINOP, combine, 0, QUIET)
- BUILTIN_VD_I (BINOPU, combine, 0, DEFAULT)
- BUILTIN_VDC_P (BINOPP, combine, 0, DEFAULT)
- new_stmt = aarch64_fold_combine (stmt);
- break;
-
/*lower store and load neon builtins to gimple. */
BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD)
BUILTIN_VDQ_I (LOAD1_U, ld1, 0, LOAD)
diff --git a/gcc/config/aarch64/aarch64-neon-builtins-base.cc
b/gcc/config/aarch64/aarch64-neon-builtins-base.cc
index 379c89e714d6..d69b65d5ca60 100644
--- a/gcc/config/aarch64/aarch64-neon-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-neon-builtins-base.cc
@@ -45,6 +45,16 @@
#include "gimple-fold.h"
namespace aarch64_acle {
+
+/* Build an expression for a vector with all lanes set to `ELEM`. */
+tree
+build_vec_dup (tree type, tree elem)
+{
+ return known_eq (TYPE_VECTOR_SUBPARTS (type), 1U)
+ ? fold_build1 (VIEW_CONVERT_EXPR, type, elem)
+ : fold_build1 (VEC_DUPLICATE_EXPR, type, elem);
+}
+
/* Base class for all function expanders.
At least one of `expand` or `fold` must be overriden by derived classes. */
class gimple_function_base : public function_base
@@ -53,6 +63,41 @@ class gimple_function_base : public function_base
gimple *fold (gimple_folder &) const override { gcc_unreachable (); }
};
+struct gimple_create : public gimple_function_base
+{
+ gimple *fold (gimple_folder &f) const override
+ {
+ auto arg = gimple_call_arg (f.call, 0);
+ return gimple_build_assign (f.lhs, fold_build1 (VIEW_CONVERT_EXPR,
+ TREE_TYPE (f.lhs), arg));
+ }
+};
+
+struct gimple_combine : public gimple_function_base
+{
+ gimple *fold (gimple_folder &f) const override
+ {
+ auto arg1 = gimple_call_arg (f.call, 0);
+ auto arg2 = gimple_call_arg (f.call, 1);
+ auto arg_type = TREE_TYPE (arg1);
+ auto elem_type = TREE_TYPE (arg_type);
+ auto ret_type = TREE_TYPE (f.lhs);
+
+ if (known_eq (TYPE_VECTOR_SUBPARTS (arg_type), 1U))
+ {
+ arg1 = f.force_val (fold_build1 (VIEW_CONVERT_EXPR, elem_type, arg1));
+ arg2 = f.force_val (fold_build1 (VIEW_CONVERT_EXPR, elem_type, arg2));
+ }
+
+ if (BYTES_BIG_ENDIAN)
+ std::swap (arg1, arg2);
+
+ return gimple_build_assign (f.lhs,
+ build_constructor_va (ret_type, 2, NULL_TREE,
+ arg1, NULL_TREE, arg2));
+ }
+};
+
/* For intrinsics that map to a single GIMPLE expression with no argument
preparation necessary. */
class gimple_expr : public gimple_function_base
@@ -99,6 +144,24 @@ public:
}
};
+struct gimple_dup : public gimple_function_base
+{
+ gimple *fold (gimple_folder &f) const override
+ {
+ auto elem = gimple_call_arg (f.call, 0);
+ auto ret_type = TREE_TYPE (f.lhs);
+ return gimple_build_assign (f.lhs, build_vec_dup (ret_type, elem));
+ }
+};
+
+// Vector creation
+NEON_FUNCTION (vcreate, gimple_create,)
+NEON_FUNCTION (vcombine, gimple_combine,)
+NEON_FUNCTION (vdup_n, gimple_dup,)
+NEON_FUNCTION (vdupq_n, gimple_dup,)
+NEON_FUNCTION (vmov_n, gimple_dup,)
+NEON_FUNCTION (vmovq_n, gimple_dup,)
+
// Lanewise arithmetic
NEON_FUNCTION (vaddd, gimple_expr, (PLUS_EXPR))
NEON_FUNCTION (vadd, gimple_expr, (PLUS_EXPR, PLUS_EXPR, BIT_XOR_EXPR))
diff --git a/gcc/config/aarch64/aarch64-neon-builtins-base.def
b/gcc/config/aarch64/aarch64-neon-builtins-base.def
index 1291e2164535..3ea0432da0e9 100644
--- a/gcc/config/aarch64/aarch64-neon-builtins-base.def
+++ b/gcc/config/aarch64/aarch64-neon-builtins-base.def
@@ -17,6 +17,16 @@
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
+// Lane manipulation
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_SIMD)
+DEF_NEON_FUNCTION (vcreate, all_neon, ("D0,su64"))
+DEF_NEON_FUNCTION (vcombine, all_neon, ("Q0,D0,D0"))
+DEF_NEON_FUNCTION (vdup_n, all_neon, ("D0,s0"))
+DEF_NEON_FUNCTION (vdupq_n, all_neon, ("Q0,s0"))
+DEF_NEON_FUNCTION (vmov_n, bhsd_neon, ("D0,s0"))
+DEF_NEON_FUNCTION (vmovq_n, bhsd_neon, ("Q0,s0"))
+#undef REQUIRED_EXTENSIONS
+
// Lanewise arithmetic
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_SIMD)
DEF_NEON_FUNCTION (vaddd, d_integer, ("s0,s0,s0"))
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def
b/gcc/config/aarch64/aarch64-simd-builtins.def
index 1b09191a0c01..8cea47f6b67d 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -50,9 +50,6 @@
BUILTIN_V12DI (STORESTRUCT_LANE_U, vec_stl1_lane, 0, ALL)
BUILTIN_V12DI (STORESTRUCT_LANE_P, vec_stl1_lane, 0, ALL)
- BUILTIN_VDC (BINOP, combine, 0, QUIET)
- BUILTIN_VD_I (BINOPU, combine, 0, DEFAULT)
- BUILTIN_VDC_P (BINOPP, combine, 0, DEFAULT)
BUILTIN_VB (BINOPP, pmul, 0, DEFAULT)
VAR1 (BINOPP, pmull, 0, DEFAULT, v8qi)
VAR1 (BINOPP, pmull_hi, 0, DEFAULT, v16qi)
diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
index 41bafb2a96ef..c17a0a1c600e 100644
--- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
@@ -202,11 +202,6 @@ ENTRY_TERNARY (vbsl_mf8, mf8, u8, mf8, mf8, UNSPEC_BSL,
QUIET)
ENTRY_TERNARY (vbslq_mf8, mf8q, u8q, mf8q, mf8q, UNSPEC_BSL, QUIET)
#undef REQUIRED_EXTENSIONS
-// combine
-#define REQUIRED_EXTENSIONS nonstreaming_only (0)
-ENTRY_BINARY (vcombine_mf8, mf8q, mf8, mf8, UNSPEC_COMBINE, QUIET)
-#undef REQUIRED_EXTENSIONS
-
// copy_lane
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_SIMD)
ENTRY_BINARY_TWO_LANES (vcopy_lane_mf8, mf8, mf8, mf8,
@@ -219,22 +214,6 @@ ENTRY_BINARY_TWO_LANES (vcopyq_laneq_mf8, mf8q, mf8q, mf8q,
UNSPEC_VEC_COPY, QUIET)
#undef REQUIRED_EXTENSIONS
-// create
-#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_SIMD)
-ENTRY_UNARY (vcreate_mf8, mf8, u64_scalar, UNSPEC_VCREATE, QUIET)
-#undef REQUIRED_EXTENSIONS
-
-// dup
-#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_SIMD)
-ENTRY_UNARY (vdup_n_mf8, mf8, mf8_scalar, UNSPEC_DUP, QUIET)
-ENTRY_UNARY (vdupq_n_mf8, mf8q, mf8_scalar, UNSPEC_DUP, QUIET)
-
-ENTRY_UNARY_LANE (vdup_lane_mf8, mf8, mf8, UNSPEC_DUP_LANE, QUIET)
-ENTRY_UNARY_LANE (vdupq_lane_mf8, mf8q, mf8, UNSPEC_DUP_LANE, QUIET)
-ENTRY_UNARY_LANE (vdup_laneq_mf8, mf8, mf8q, UNSPEC_DUP_LANE, QUIET)
-ENTRY_UNARY_LANE (vdupq_laneq_mf8, mf8q, mf8q, UNSPEC_DUP_LANE, QUIET)
-#undef REQUIRED_EXTENSIONS
-
// dupb_lane
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_SIMD)
ENTRY_UNARY_LANE (vdupb_lane_mf8, mf8_scalar, mf8, UNSPEC_GET_LANE, QUIET)
@@ -306,12 +285,6 @@ ENTRY_TERNARY (vmmlaq_f16_mf8, f16q, f16q, mf8q, mf8q,
UNSPEC_FMMLA, FP8)
ENTRY_TERNARY (vmmlaq_f32_mf8, f32q, f32q, mf8q, mf8q, UNSPEC_FMMLA, FP8)
#undef REQUIRED_EXTENSIONS
-// mov
-#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_SIMD)
-ENTRY_UNARY (vmov_n_mf8, mf8, mf8_scalar, UNSPEC_DUP, QUIET)
-ENTRY_UNARY (vmovq_n_mf8, mf8q, mf8_scalar, UNSPEC_DUP, QUIET)
-#undef REQUIRED_EXTENSIONS
-
// rev
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_SIMD)
ENTRY_UNARY (vrev64_mf8, mf8, mf8, UNSPEC_REV64, QUIET)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index d8c8c73a6321..6f1ec6bf3616 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -230,7 +230,6 @@
UNSPEC_BSL
UNSPEC_CALLEE_ABI
UNSPEC_CASESI
- UNSPEC_COMBINE
UNSPEC_CPYMEM
UNSPEC_CRC32B
UNSPEC_CRC32CB
@@ -358,7 +357,6 @@
UNSPEC_UNPACKSLO
UNSPEC_UNPACKULO
UNSPEC_PACK
- UNSPEC_VCREATE
UNSPEC_VEC_COPY
UNSPEC_WHILEGE
UNSPEC_WHILEGT
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index b45f53663ba8..6146acf8e819 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -2349,104 +2349,6 @@ vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
}
-__extension__ extern __inline int8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcreate_s8 (uint64_t __a)
-{
- return (int8x8_t) __a;
-}
-
-__extension__ extern __inline int16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcreate_s16 (uint64_t __a)
-{
- return (int16x4_t) __a;
-}
-
-__extension__ extern __inline int32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcreate_s32 (uint64_t __a)
-{
- return (int32x2_t) __a;
-}
-
-__extension__ extern __inline int64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcreate_s64 (uint64_t __a)
-{
- return (int64x1_t) {__a};
-}
-
-__extension__ extern __inline float16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcreate_f16 (uint64_t __a)
-{
- return (float16x4_t) __a;
-}
-
-__extension__ extern __inline float32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcreate_f32 (uint64_t __a)
-{
- return (float32x2_t) __a;
-}
-
-__extension__ extern __inline uint8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcreate_u8 (uint64_t __a)
-{
- return (uint8x8_t) __a;
-}
-
-__extension__ extern __inline uint16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcreate_u16 (uint64_t __a)
-{
- return (uint16x4_t) __a;
-}
-
-__extension__ extern __inline uint32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcreate_u32 (uint64_t __a)
-{
- return (uint32x2_t) __a;
-}
-
-__extension__ extern __inline uint64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcreate_u64 (uint64_t __a)
-{
- return (uint64x1_t) {__a};
-}
-
-__extension__ extern __inline float64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcreate_f64 (uint64_t __a)
-{
- return (float64x1_t) __a;
-}
-
-__extension__ extern __inline poly8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcreate_p8 (uint64_t __a)
-{
- return (poly8x8_t) __a;
-}
-
-__extension__ extern __inline poly16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcreate_p16 (uint64_t __a)
-{
- return (poly16x4_t) __a;
-}
-
-__extension__ extern __inline poly64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcreate_p64 (uint64_t __a)
-{
- return (poly64x1_t) __a;
-}
-
/* vget_lane */
__extension__ extern __inline float16_t
@@ -2847,105 +2749,6 @@ vsetq_lane_u64 (uint64_t __elem, uint64x2_t __vec,
const int __index)
return __aarch64_vset_lane_any (__elem, __vec, __index);
}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcombine_s8 (int8x8_t __a, int8x8_t __b)
-{
- return __builtin_aarch64_combinev8qi (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcombine_s16 (int16x4_t __a, int16x4_t __b)
-{
- return __builtin_aarch64_combinev4hi (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcombine_s32 (int32x2_t __a, int32x2_t __b)
-{
- return __builtin_aarch64_combinev2si (__a, __b);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcombine_s64 (int64x1_t __a, int64x1_t __b)
-{
- return __builtin_aarch64_combinedi (__a[0], __b[0]);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcombine_f16 (float16x4_t __a, float16x4_t __b)
-{
- return __builtin_aarch64_combinev4hf (__a, __b);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcombine_f32 (float32x2_t __a, float32x2_t __b)
-{
- return __builtin_aarch64_combinev2sf (__a, __b);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
-{
- return __builtin_aarch64_combinev8qi_uuu (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
-{
- return __builtin_aarch64_combinev4hi_uuu (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
-{
- return __builtin_aarch64_combinev2si_uuu (__a, __b);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
-{
- return __builtin_aarch64_combinedi_uuu (__a[0], __b[0]);
-}
-
-__extension__ extern __inline float64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcombine_f64 (float64x1_t __a, float64x1_t __b)
-{
- return __builtin_aarch64_combinedf (__a[0], __b[0]);
-}
-
-__extension__ extern __inline poly8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
-{
- return __builtin_aarch64_combinev8qi_ppp (__a, __b);
-}
-
-__extension__ extern __inline poly16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
-{
- return __builtin_aarch64_combinev4hi_ppp (__a, __b);
-}
-
-__extension__ extern __inline poly64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcombine_p64 (poly64x1_t __a, poly64x1_t __b)
-{
- return __builtin_aarch64_combinedi_ppp (__a[0], __b[0]);
-}
-
/* Start of temporary inline asm implementations. */
__extension__ extern __inline int8x8_t
@@ -10452,209 +10255,6 @@ vcvtpq_u64_f64 (float64x2_t __a)
return __builtin_aarch64_lceiluv2dfv2di_us (__a);
}
-/* vdup_n */
-
-__extension__ extern __inline float16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_f16 (float16_t __a)
-{
- return (float16x4_t) {__a, __a, __a, __a};
-}
-
-__extension__ extern __inline float32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_f32 (float32_t __a)
-{
- return (float32x2_t) {__a, __a};
-}
-
-__extension__ extern __inline float64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_f64 (float64_t __a)
-{
- return (float64x1_t) {__a};
-}
-
-__extension__ extern __inline poly8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_p8 (poly8_t __a)
-{
- return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
-}
-
-__extension__ extern __inline poly16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_p16 (poly16_t __a)
-{
- return (poly16x4_t) {__a, __a, __a, __a};
-}
-
-__extension__ extern __inline poly64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_p64 (poly64_t __a)
-{
- return (poly64x1_t) {__a};
-}
-
-__extension__ extern __inline int8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_s8 (int8_t __a)
-{
- return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
-}
-
-__extension__ extern __inline int16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_s16 (int16_t __a)
-{
- return (int16x4_t) {__a, __a, __a, __a};
-}
-
-__extension__ extern __inline int32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_s32 (int32_t __a)
-{
- return (int32x2_t) {__a, __a};
-}
-
-__extension__ extern __inline int64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_s64 (int64_t __a)
-{
- return (int64x1_t) {__a};
-}
-
-__extension__ extern __inline uint8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_u8 (uint8_t __a)
-{
- return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
-}
-
-__extension__ extern __inline uint16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_u16 (uint16_t __a)
-{
- return (uint16x4_t) {__a, __a, __a, __a};
-}
-
-__extension__ extern __inline uint32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_u32 (uint32_t __a)
-{
- return (uint32x2_t) {__a, __a};
-}
-
-__extension__ extern __inline uint64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_u64 (uint64_t __a)
-{
- return (uint64x1_t) {__a};
-}
-
-/* vdupq_n */
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_f16 (float16_t __a)
-{
- return (float16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_f32 (float32_t __a)
-{
- return (float32x4_t) {__a, __a, __a, __a};
-}
-
-__extension__ extern __inline float64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_f64 (float64_t __a)
-{
- return (float64x2_t) {__a, __a};
-}
-
-__extension__ extern __inline poly8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_p8 (poly8_t __a)
-{
- return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
- __a, __a, __a, __a, __a, __a, __a, __a};
-}
-
-__extension__ extern __inline poly16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_p16 (poly16_t __a)
-{
- return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
-}
-
-__extension__ extern __inline poly64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_p64 (poly64_t __a)
-{
- return (poly64x2_t) {__a, __a};
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_s8 (int8_t __a)
-{
- return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
- __a, __a, __a, __a, __a, __a, __a, __a};
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_s16 (int16_t __a)
-{
- return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_s32 (int32_t __a)
-{
- return (int32x4_t) {__a, __a, __a, __a};
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_s64 (int64_t __a)
-{
- return (int64x2_t) {__a, __a};
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_u8 (uint8_t __a)
-{
- return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
- __a, __a, __a, __a, __a, __a, __a, __a};
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_u16 (uint16_t __a)
-{
- return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_u32 (uint32_t __a)
-{
- return (uint32x4_t) {__a, __a, __a, __a};
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_u64 (uint64_t __a)
-{
- return (uint64x2_t) {__a, __a};
-}
-
/* vdup_lane */
__extension__ extern __inline float16x4_t
@@ -16705,204 +16305,6 @@ vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
return (__a - (__b * __aarch64_vget_lane_any (__c, __lane)));
}
-/* vmov_n_ */
-
-__extension__ extern __inline float16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_f16 (float16_t __a)
-{
- return vdup_n_f16 (__a);
-}
-
-__extension__ extern __inline float32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_f32 (float32_t __a)
-{
- return vdup_n_f32 (__a);
-}
-
-__extension__ extern __inline float64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_f64 (float64_t __a)
-{
- return (float64x1_t) {__a};
-}
-
-__extension__ extern __inline poly8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_p8 (poly8_t __a)
-{
- return vdup_n_p8 (__a);
-}
-
-__extension__ extern __inline poly16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_p16 (poly16_t __a)
-{
- return vdup_n_p16 (__a);
-}
-
-__extension__ extern __inline poly64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_p64 (poly64_t __a)
-{
- return vdup_n_p64 (__a);
-}
-
-__extension__ extern __inline int8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_s8 (int8_t __a)
-{
- return vdup_n_s8 (__a);
-}
-
-__extension__ extern __inline int16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_s16 (int16_t __a)
-{
- return vdup_n_s16 (__a);
-}
-
-__extension__ extern __inline int32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_s32 (int32_t __a)
-{
- return vdup_n_s32 (__a);
-}
-
-__extension__ extern __inline int64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_s64 (int64_t __a)
-{
- return (int64x1_t) {__a};
-}
-
-__extension__ extern __inline uint8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_u8 (uint8_t __a)
-{
- return vdup_n_u8 (__a);
-}
-
-__extension__ extern __inline uint16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_u16 (uint16_t __a)
-{
- return vdup_n_u16 (__a);
-}
-
-__extension__ extern __inline uint32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_u32 (uint32_t __a)
-{
- return vdup_n_u32 (__a);
-}
-
-__extension__ extern __inline uint64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmov_n_u64 (uint64_t __a)
-{
- return (uint64x1_t) {__a};
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_f16 (float16_t __a)
-{
- return vdupq_n_f16 (__a);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_f32 (float32_t __a)
-{
- return vdupq_n_f32 (__a);
-}
-
-__extension__ extern __inline float64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_f64 (float64_t __a)
-{
- return vdupq_n_f64 (__a);
-}
-
-__extension__ extern __inline poly8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_p8 (poly8_t __a)
-{
- return vdupq_n_p8 (__a);
-}
-
-__extension__ extern __inline poly16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_p16 (poly16_t __a)
-{
- return vdupq_n_p16 (__a);
-}
-
-__extension__ extern __inline poly64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_p64 (poly64_t __a)
-{
- return vdupq_n_p64 (__a);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_s8 (int8_t __a)
-{
- return vdupq_n_s8 (__a);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_s16 (int16_t __a)
-{
- return vdupq_n_s16 (__a);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_s32 (int32_t __a)
-{
- return vdupq_n_s32 (__a);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_s64 (int64_t __a)
-{
- return vdupq_n_s64 (__a);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_u8 (uint8_t __a)
-{
- return vdupq_n_u8 (__a);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_u16 (uint16_t __a)
-{
- return vdupq_n_u16 (__a);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_u32 (uint32_t __a)
-{
- return vdupq_n_u32 (__a);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vmovq_n_u64 (uint64_t __a)
-{
- return vdupq_n_u64 (__a);
-}
-
/* vmul_lane */
__extension__ extern __inline float32x2_t
@@ -27772,36 +27174,6 @@ vgetq_lane_bf16 (bfloat16x8_t __a, const int __b)
return __aarch64_vget_lane_any (__a, __b);
}
-__extension__ extern __inline bfloat16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcreate_bf16 (uint64_t __a)
-{
- return (bfloat16x4_t) __a;
-}
-
-__extension__ extern __inline bfloat16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vcombine_bf16 (bfloat16x4_t __a, bfloat16x4_t __b)
-{
- return __builtin_aarch64_combinev4bf (__a, __b);
-}
-
-/* vdup */
-
-__extension__ extern __inline bfloat16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdup_n_bf16 (bfloat16_t __a)
-{
- return (bfloat16x4_t) {__a, __a, __a, __a};
-}
-
-__extension__ extern __inline bfloat16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_bf16 (bfloat16_t __a)
-{
- return (bfloat16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
-}
-
__extension__ extern __inline bfloat16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vdup_lane_bf16 (bfloat16x4_t __a, const int __b)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_dup.c
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_dup.c
index da9370ba52bb..b85625234ab9 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_dup.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_dup.c
@@ -36,18 +36,19 @@ bfloat16x8_t vdupq_test (bfloat16_t a)
{
return vdupq_n_bf16 (a);
}
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h,
v\[0-9\]+.h\\\[0\\\]" 1 } } */
bfloat16x8_t test_vdupq_lane_bf16 (bfloat16x4_t a)
{
return vdupq_lane_bf16 (a, 1);
}
-/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h,
v\[0-9\]+.h\\\[0\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h,
v\[0-9\]+.h\\\[1\\\]" 1 } } */
bfloat16_t test_vget_lane_bf16 (bfloat16x4_t a)
{
return vget_lane_bf16 (a, 1);
}
-/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[1\\\]"
2 } } */
+/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[1\\\]"
1 } } */
bfloat16x4_t test_vdup_lane_bf16 (bfloat16x4_t a)
{
@@ -59,7 +60,7 @@ bfloat16x4_t test_vdup_laneq_bf16 (bfloat16x8_t a)
{
return vdup_laneq_bf16 (a, 7);
}
-/* { dg-final { scan-assembler "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[7\\\]" }
} */
+/* { dg-final { scan-assembler "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[7\\\]" }
} */
bfloat16x8_t test_vdupq_laneq_bf16 (bfloat16x8_t a)
{
diff --git a/gcc/testsuite/gcc.target/aarch64/neon/vcombine.c
b/gcc/testsuite/gcc.target/aarch64/neon/vcombine.c
new file mode 100644
index 000000000000..0ba10cde0a48
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neon/vcombine.c
@@ -0,0 +1,122 @@
+/* { dg-do compile } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon_test.h"
+
+#define TEST_COMBINE(NAME, RET_TYPE, ARG_TYPE)
\
+ RET_TYPE test_##NAME (ARG_TYPE arg1, ARG_TYPE arg2)
\
+ {
\
+ return NAME (arg1, arg2);
\
+ }
+
+/*
+** test_vcombine_u8:
+** (zip1|uzp1) v0\.2d, v0\.2d, v1\.2d
+** ret
+*/
+TEST_COMBINE (vcombine_u8, uint8x16_t, uint8x8_t)
+
+/*
+** test_vcombine_s8:
+** zip1 v0\.2d, v0\.2d, v1\.2d
+** ret
+*/
+TEST_COMBINE (vcombine_s8, int8x16_t, int8x8_t)
+
+/*
+** test_vcombine_p8:
+** zip1 v0\.2d, v0\.2d, v1\.2d
+** ret
+*/
+TEST_COMBINE (vcombine_p8, poly8x16_t, poly8x8_t)
+
+/*
+** test_vcombine_mf8:
+** zip1 v0\.2d, v0\.2d, v1\.2d
+** ret
+*/
+TEST_COMBINE (vcombine_mf8, mfloat8x16_t, mfloat8x8_t)
+
+/*
+** test_vcombine_u16:
+** zip1 v0\.2d, v0\.2d, v1\.2d
+** ret
+*/
+TEST_COMBINE (vcombine_u16, uint16x8_t, uint16x4_t)
+
+/*
+** test_vcombine_s16:
+** zip1 v0\.2d, v0\.2d, v1\.2d
+** ret
+*/
+TEST_COMBINE (vcombine_s16, int16x8_t, int16x4_t)
+
+/*
+** test_vcombine_p16:
+** zip1 v0\.2d, v0\.2d, v1\.2d
+** ret
+*/
+TEST_COMBINE (vcombine_p16, poly16x8_t, poly16x4_t)
+
+/*
+** test_vcombine_f16:
+** zip1 v0\.2d, v0\.2d, v1\.2d
+** ret
+*/
+TEST_COMBINE (vcombine_f16, float16x8_t, float16x4_t)
+
+/*
+** test_vcombine_bf16:
+** zip1 v0\.2d, v0\.2d, v1\.2d
+** ret
+*/
+TEST_COMBINE (vcombine_bf16, bfloat16x8_t, bfloat16x4_t)
+
+/*
+** test_vcombine_u32:
+** zip1 v0\.2d, v0\.2d, v1\.2d
+** ret
+*/
+TEST_COMBINE (vcombine_u32, uint32x4_t, uint32x2_t)
+
+/*
+** test_vcombine_s32:
+** zip1 v0\.2d, v0\.2d, v1\.2d
+** ret
+*/
+TEST_COMBINE (vcombine_s32, int32x4_t, int32x2_t)
+
+/*
+** test_vcombine_f32:
+** zip1 v0\.2d, v0\.2d, v1\.2d
+** ret
+*/
+TEST_COMBINE (vcombine_f32, float32x4_t, float32x2_t)
+
+/*
+** test_vcombine_u64:
+** zip1 v0\.2d, v0\.2d, v1\.2d
+** ret
+*/
+TEST_COMBINE (vcombine_u64, uint64x2_t, uint64x1_t)
+
+/*
+** test_vcombine_s64:
+** zip1 v0\.2d, v0\.2d, v1\.2d
+** ret
+*/
+TEST_COMBINE (vcombine_s64, int64x2_t, int64x1_t)
+
+/*
+** test_vcombine_p64:
+** zip1 v0\.2d, v0\.2d, v1\.2d
+** ret
+*/
+TEST_COMBINE (vcombine_p64, poly64x2_t, poly64x1_t)
+
+/*
+** test_vcombine_f64:
+** zip1 v0\.2d, v0\.2d, v1\.2d
+** ret
+*/
+TEST_COMBINE (vcombine_f64, float64x2_t, float64x1_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/neon/vcreate.c
b/gcc/testsuite/gcc.target/aarch64/neon/vcreate.c
new file mode 100644
index 000000000000..f553ea703934
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neon/vcreate.c
@@ -0,0 +1,119 @@
+/* { dg-do compile } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon_test.h"
+
+#define TEST_CREATE(NAME, RET_TYPE)
\
+ RET_TYPE test_##NAME (uint64_t arg1) { return NAME (arg1); }
+
+/*
+** test_vcreate_u8:
+** fmov d0, x0
+** ret
+*/
+TEST_CREATE (vcreate_u8, uint8x8_t)
+
+/*
+** test_vcreate_s8:
+** fmov d0, x0
+** ret
+*/
+TEST_CREATE (vcreate_s8, int8x8_t)
+
+/*
+** test_vcreate_p8:
+** fmov d0, x0
+** ret
+*/
+TEST_CREATE (vcreate_p8, poly8x8_t)
+
+/*
+** test_vcreate_mf8:
+** fmov d0, x0
+** ret
+*/
+TEST_CREATE (vcreate_mf8, mfloat8x8_t)
+
+/*
+** test_vcreate_u16:
+** fmov d0, x0
+** ret
+*/
+TEST_CREATE (vcreate_u16, uint16x4_t)
+
+/*
+** test_vcreate_s16:
+** fmov d0, x0
+** ret
+*/
+TEST_CREATE (vcreate_s16, int16x4_t)
+
+/*
+** test_vcreate_p16:
+** fmov d0, x0
+** ret
+*/
+TEST_CREATE (vcreate_p16, poly16x4_t)
+
+/*
+** test_vcreate_f16:
+** fmov d0, x0
+** ret
+*/
+TEST_CREATE (vcreate_f16, float16x4_t)
+
+/*
+** test_vcreate_bf16:
+** fmov d0, x0
+** ret
+*/
+TEST_CREATE (vcreate_bf16, bfloat16x4_t)
+
+/*
+** test_vcreate_u32:
+** fmov d0, x0
+** ret
+*/
+TEST_CREATE (vcreate_u32, uint32x2_t)
+
+/*
+** test_vcreate_s32:
+** fmov d0, x0
+** ret
+*/
+TEST_CREATE (vcreate_s32, int32x2_t)
+
+/*
+** test_vcreate_f32:
+** fmov d0, x0
+** ret
+*/
+TEST_CREATE (vcreate_f32, float32x2_t)
+
+/*
+** test_vcreate_u64:
+** fmov d0, x0
+** ret
+*/
+TEST_CREATE (vcreate_u64, uint64x1_t)
+
+/*
+** test_vcreate_s64:
+** fmov d0, x0
+** ret
+*/
+TEST_CREATE (vcreate_s64, int64x1_t)
+
+/*
+** test_vcreate_p64:
+** fmov d0, x0
+** ret
+*/
+TEST_CREATE (vcreate_p64, poly64x1_t)
+
+/*
+** test_vcreate_f64:
+** fmov d0, x0
+** ret
+*/
+TEST_CREATE (vcreate_f64, float64x1_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/neon/vdup.c
b/gcc/testsuite/gcc.target/aarch64/neon/vdup.c
new file mode 100644
index 000000000000..c1b2c166e624
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neon/vdup.c
@@ -0,0 +1,226 @@
+/* { dg-do compile } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon_test.h"
+
+/*
+** test_vdup_n_p8:
+** dup v0\.8b, w0
+** ret
+*/
+TEST_UNARY (vdup_n_p8, poly8x8_t, poly8_t)
+
+/*
+** test_vdup_n_mf8:
+** dup v0\.8b, v0\.b\[0\]
+** ret
+*/
+TEST_UNARY (vdup_n_mf8, mfloat8x8_t, mfloat8_t)
+
+/*
+** test_vdup_n_u8:
+** dup v0\.8b, w0
+** ret
+*/
+TEST_UNARY (vdup_n_u8, uint8x8_t, uint8_t)
+
+/*
+** test_vdup_n_s8:
+** dup v0\.8b, w0
+** ret
+*/
+TEST_UNARY (vdup_n_s8, int8x8_t, int8_t)
+
+/*
+** test_vdupq_n_p8:
+** dup v0\.16b, w0
+** ret
+*/
+TEST_UNARY (vdupq_n_p8, poly8x16_t, poly8_t)
+
+/*
+** test_vdupq_n_mf8:
+** dup v0\.16b, v0\.b\[0\]
+** ret
+*/
+TEST_UNARY (vdupq_n_mf8, mfloat8x16_t, mfloat8_t)
+
+/*
+** test_vdupq_n_u8:
+** dup v0\.16b, w0
+** ret
+*/
+TEST_UNARY (vdupq_n_u8, uint8x16_t, uint8_t)
+
+/*
+** test_vdupq_n_s8:
+** dup v0\.16b, w0
+** ret
+*/
+TEST_UNARY (vdupq_n_s8, int8x16_t, int8_t)
+
+/*
+** test_vdup_n_p16:
+** dup v0\.4h, w0
+** ret
+*/
+TEST_UNARY (vdup_n_p16, poly16x4_t, poly16_t)
+
+/*
+** test_vdup_n_f16:
+** dup v0\.4h, v0\.h\[0\]
+** ret
+*/
+TEST_UNARY (vdup_n_f16, float16x4_t, float16_t)
+
+/*
+** test_vdup_n_bf16:
+** dup v0\.4h, v0\.h\[0\]
+** ret
+*/
+TEST_UNARY (vdup_n_bf16, bfloat16x4_t, bfloat16_t)
+
+/*
+** test_vdup_n_u16:
+** dup v0\.4h, w0
+** ret
+*/
+TEST_UNARY (vdup_n_u16, uint16x4_t, uint16_t)
+
+/*
+** test_vdup_n_s16:
+** dup v0\.4h, w0
+** ret
+*/
+TEST_UNARY (vdup_n_s16, int16x4_t, int16_t)
+
+/*
+** test_vdupq_n_p16:
+** dup v0\.8h, w0
+** ret
+*/
+TEST_UNARY (vdupq_n_p16, poly16x8_t, poly16_t)
+
+/*
+** test_vdupq_n_f16:
+** dup v0\.8h, v0\.h\[0\]
+** ret
+*/
+TEST_UNARY (vdupq_n_f16, float16x8_t, float16_t)
+
+/*
+** test_vdupq_n_bf16:
+** dup v0\.8h, v0\.h\[0\]
+** ret
+*/
+TEST_UNARY (vdupq_n_bf16, bfloat16x8_t, bfloat16_t)
+
+/*
+** test_vdupq_n_u16:
+** dup v0\.8h, w0
+** ret
+*/
+TEST_UNARY (vdupq_n_u16, uint16x8_t, uint16_t)
+
+/*
+** test_vdupq_n_s16:
+** dup v0\.8h, w0
+** ret
+*/
+TEST_UNARY (vdupq_n_s16, int16x8_t, int16_t)
+
+/*
+** test_vdup_n_f32:
+** dup v0\.2s, v0\.s\[0\]
+** ret
+*/
+TEST_UNARY (vdup_n_f32, float32x2_t, float32_t)
+
+/*
+** test_vdup_n_u32:
+** dup v0\.2s, w0
+** ret
+*/
+TEST_UNARY (vdup_n_u32, uint32x2_t, uint32_t)
+
+/*
+** test_vdup_n_s32:
+** dup v0\.2s, w0
+** ret
+*/
+TEST_UNARY (vdup_n_s32, int32x2_t, int32_t)
+
+/*
+** test_vdupq_n_f32:
+** dup v0\.4s, v0\.s\[0\]
+** ret
+*/
+TEST_UNARY (vdupq_n_f32, float32x4_t, float32_t)
+
+/*
+** test_vdupq_n_u32:
+** dup v0\.4s, w0
+** ret
+*/
+TEST_UNARY (vdupq_n_u32, uint32x4_t, uint32_t)
+
+/*
+** test_vdupq_n_s32:
+** dup v0\.4s, w0
+** ret
+*/
+TEST_UNARY (vdupq_n_s32, int32x4_t, int32_t)
+
+/*
+** test_vdup_n_p64:
+** fmov d0, x0
+** ret
+*/
+TEST_UNARY (vdup_n_p64, poly64x1_t, poly64_t)
+
+/*
+** test_vdup_n_f64:
+** ret
+*/
+TEST_UNARY (vdup_n_f64, float64x1_t, float64_t)
+
+/*
+** test_vdup_n_u64:
+** fmov d0, x0
+** ret
+*/
+TEST_UNARY (vdup_n_u64, uint64x1_t, uint64_t)
+/*
+** test_vdup_n_s64:
+** fmov d0, x0
+** ret
+*/
+TEST_UNARY (vdup_n_s64, int64x1_t, int64_t)
+
+/*
+** test_vdupq_n_p64:
+** dup v0\.2d, x0
+** ret
+*/
+TEST_UNARY (vdupq_n_p64, poly64x2_t, poly64_t)
+
+/*
+** test_vdupq_n_f64:
+** dup v0\.2d, v0\.d\[0\]
+** ret
+*/
+TEST_UNARY (vdupq_n_f64, float64x2_t, float64_t)
+
+/*
+** test_vdupq_n_u64:
+** dup v0\.2d, x0
+** ret
+*/
+TEST_UNARY (vdupq_n_u64, uint64x2_t, uint64_t)
+
+/*
+** test_vdupq_n_s64:
+** dup v0\.2d, x0
+** ret
+*/
+TEST_UNARY (vdupq_n_s64, int64x2_t, int64_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/neon/vmov_n.c
b/gcc/testsuite/gcc.target/aarch64/neon/vmov_n.c
new file mode 100644
index 000000000000..072d2466b29f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/neon/vmov_n.c
@@ -0,0 +1,212 @@
+/* { dg-do compile } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon_test.h"
+
+/*
+** test_vmov_n_p8:
+** dup v0\.8b, w0
+** ret
+*/
+TEST_UNARY (vmov_n_p8, poly8x8_t, poly8_t)
+
+/*
+** test_vmov_n_mf8:
+** dup v0\.8b, v0\.b\[0\]
+** ret
+*/
+TEST_UNARY (vmov_n_mf8, mfloat8x8_t, mfloat8_t)
+
+/*
+** test_vmov_n_u8:
+** dup v0\.8b, w0
+** ret
+*/
+TEST_UNARY (vmov_n_u8, uint8x8_t, uint8_t)
+
+/*
+** test_vmov_n_s8:
+** dup v0\.8b, w0
+** ret
+*/
+TEST_UNARY (vmov_n_s8, int8x8_t, int8_t)
+
+/*
+** test_vmovq_n_p8:
+** dup v0\.16b, w0
+** ret
+*/
+TEST_UNARY (vmovq_n_p8, poly8x16_t, poly8_t)
+
+/*
+** test_vmovq_n_mf8:
+** dup v0\.16b, v0\.b\[0\]
+** ret
+*/
+TEST_UNARY (vmovq_n_mf8, mfloat8x16_t, mfloat8_t)
+
+/*
+** test_vmovq_n_u8:
+** dup v0\.16b, w0
+** ret
+*/
+TEST_UNARY (vmovq_n_u8, uint8x16_t, uint8_t)
+
+/*
+** test_vmovq_n_s8:
+** dup v0\.16b, w0
+** ret
+*/
+TEST_UNARY (vmovq_n_s8, int8x16_t, int8_t)
+
+/*
+** test_vmov_n_p16:
+** dup v0\.4h, w0
+** ret
+*/
+TEST_UNARY (vmov_n_p16, poly16x4_t, poly16_t)
+
+/*
+** test_vmov_n_f16:
+** dup v0\.4h, v0\.h\[0\]
+** ret
+*/
+TEST_UNARY (vmov_n_f16, float16x4_t, float16_t)
+
+/*
+** test_vmov_n_u16:
+** dup v0\.4h, w0
+** ret
+*/
+TEST_UNARY (vmov_n_u16, uint16x4_t, uint16_t)
+
+/*
+** test_vmov_n_s16:
+** dup v0\.4h, w0
+** ret
+*/
+TEST_UNARY (vmov_n_s16, int16x4_t, int16_t)
+
+/*
+** test_vmovq_n_p16:
+** dup v0\.8h, w0
+** ret
+*/
+TEST_UNARY (vmovq_n_p16, poly16x8_t, poly16_t)
+
+/*
+** test_vmovq_n_f16:
+** dup v0\.8h, v0\.h\[0\]
+** ret
+*/
+TEST_UNARY (vmovq_n_f16, float16x8_t, float16_t)
+
+/*
+** test_vmovq_n_u16:
+** dup v0\.8h, w0
+** ret
+*/
+TEST_UNARY (vmovq_n_u16, uint16x8_t, uint16_t)
+
+/*
+** test_vmovq_n_s16:
+** dup v0\.8h, w0
+** ret
+*/
+TEST_UNARY (vmovq_n_s16, int16x8_t, int16_t)
+
+/*
+** test_vmov_n_f32:
+** dup v0\.2s, v0\.s\[0\]
+** ret
+*/
+TEST_UNARY (vmov_n_f32, float32x2_t, float32_t)
+
+/*
+** test_vmov_n_u32:
+** dup v0\.2s, w0
+** ret
+*/
+TEST_UNARY (vmov_n_u32, uint32x2_t, uint32_t)
+
+/*
+** test_vmov_n_s32:
+** dup v0\.2s, w0
+** ret
+*/
+TEST_UNARY (vmov_n_s32, int32x2_t, int32_t)
+
+/*
+** test_vmovq_n_f32:
+** dup v0\.4s, v0\.s\[0\]
+** ret
+*/
+TEST_UNARY (vmovq_n_f32, float32x4_t, float32_t)
+
+/*
+** test_vmovq_n_u32:
+** dup v0\.4s, w0
+** ret
+*/
+TEST_UNARY (vmovq_n_u32, uint32x4_t, uint32_t)
+
+/*
+** test_vmovq_n_s32:
+** dup v0\.4s, w0
+** ret
+*/
+TEST_UNARY (vmovq_n_s32, int32x4_t, int32_t)
+
+/*
+** test_vmov_n_p64:
+** fmov d0, x0
+** ret
+*/
+TEST_UNARY (vmov_n_p64, poly64x1_t, poly64_t)
+
+/*
+** test_vmov_n_f64:
+** ret
+*/
+TEST_UNARY (vmov_n_f64, float64x1_t, float64_t)
+
+/*
+** test_vmov_n_u64:
+** fmov d0, x0
+** ret
+*/
+TEST_UNARY (vmov_n_u64, uint64x1_t, uint64_t)
+/*
+** test_vmov_n_s64:
+** fmov d0, x0
+** ret
+*/
+TEST_UNARY (vmov_n_s64, int64x1_t, int64_t)
+
+/*
+** test_vmovq_n_p64:
+** dup v0\.2d, x0
+** ret
+*/
+TEST_UNARY (vmovq_n_p64, poly64x2_t, poly64_t)
+
+/*
+** test_vmovq_n_f64:
+** dup v0\.2d, v0\.d\[0\]
+** ret
+*/
+TEST_UNARY (vmovq_n_f64, float64x2_t, float64_t)
+
+/*
+** test_vmovq_n_u64:
+** dup v0\.2d, x0
+** ret
+*/
+TEST_UNARY (vmovq_n_u64, uint64x2_t, uint64_t)
+
+/*
+** test_vmovq_n_s64:
+** dup v0\.2d, x0
+** ret
+*/
+TEST_UNARY (vmovq_n_s64, int64x2_t, int64_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/vmov_n_1.c
b/gcc/testsuite/gcc.target/aarch64/vmov_n_1.c
index 528cb84402f8..fde2ab3f4978 100644
--- a/gcc/testsuite/gcc.target/aarch64/vmov_n_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/vmov_n_1.c
@@ -65,7 +65,7 @@ extern void abort (void);
for (i = 0; i < n; i++) \
{ \
INHIB_OPTIMIZATION; \
- a = GET_ELEMENT (reg_len, data_len, data_type) (b, i); \
+ a = b[i];
\
if ((a) != (c)) \
return 1; \
} \
--
2.54.0