Re: [PATCH] aarch64: Add vec_set/extract for tuple modes [PR113027]

Kyrylo Tkachov Tue, 17 Jun 2025 03:21:29 -0700

> On 16 Jun 2025, at 09:54, Richard Sandiford <richard.sandif...@arm.com> wrote:
> 
> We generated inefficient code for bitfield references to Advanced
> SIMD structure modes.  In RTL, these modes are just extra-long
> vectors, and so inserting and extracting an element is simply
> a vec_set or vec_extract operation.
> 
> For the record, I don't think these modes should ever become fully
> fledged vector modes.  We shouldn't provide add, etc. for them.
> But vec_set and vec_extract are the vector equivalent of insv
> and extv.  From that point of view, they seem closer to moves
> than to arithmetic.
> 
> Tested on aarch64-linux-gnu.  OK to install?
> 

Yes, makes sense to me.
Thanks,
Kyrill

> Richard
> 
> 
> gcc/
> PR target/113027
> * config/aarch64/aarch64-protos.h (aarch64_decompose_vec_struct_index):
> Declare.
> * config/aarch64/aarch64.cc (aarch64_decompose_vec_struct_index): New
> function.
> * config/aarch64/iterators.md (VEL, Vel): Add Advanced SIMD
> structure modes.
> * config/aarch64/aarch64-simd.md (vec_set<VSTRUCT_QD:mode>)
> (vec_extract<VSTRUCT_QD:mode>): New patterns.
> 
> gcc/testsuite/
> PR target/113027
> * gcc.target/aarch64/pr113027-1.c: New test.
> * gcc.target/aarch64/pr113027-2.c: Likewise.
> * gcc.target/aarch64/pr113027-3.c: Likewise.
> * gcc.target/aarch64/pr113027-4.c: Likewise.
> * gcc.target/aarch64/pr113027-5.c: Likewise.
> * gcc.target/aarch64/pr113027-6.c: Likewise.
> * gcc.target/aarch64/pr113027-7.c: Likewise.
> ---
> gcc/config/aarch64/aarch64-protos.h           |   1 +
> gcc/config/aarch64/aarch64-simd.md            |  38 +++
> gcc/config/aarch64/aarch64.cc                 |  22 ++
> gcc/config/aarch64/iterators.md               |  48 ++++
> gcc/testsuite/gcc.target/aarch64/pr113027-1.c |  27 ++
> gcc/testsuite/gcc.target/aarch64/pr113027-2.c | 268 ++++++++++++++++++
> gcc/testsuite/gcc.target/aarch64/pr113027-3.c | 268 ++++++++++++++++++
> gcc/testsuite/gcc.target/aarch64/pr113027-4.c | 268 ++++++++++++++++++
> gcc/testsuite/gcc.target/aarch64/pr113027-5.c | 268 ++++++++++++++++++
> gcc/testsuite/gcc.target/aarch64/pr113027-6.c | 267 +++++++++++++++++
> gcc/testsuite/gcc.target/aarch64/pr113027-7.c | 267 +++++++++++++++++
> 11 files changed, 1742 insertions(+)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113027-1.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113027-2.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113027-3.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113027-4.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113027-5.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113027-6.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113027-7.c
> 
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 8f37e56d440..2c413cc9e22 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -1036,6 +1036,7 @@ bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx);
> rtx aarch64_replace_reg_mode (rtx, machine_mode);
> void aarch64_split_sve_subreg_move (rtx, rtx, rtx);
> void aarch64_expand_prologue (void);
> +void aarch64_decompose_vec_struct_index (machine_mode, rtx *, rtx *, bool);
> void aarch64_expand_vector_init (rtx, rtx);
> void aarch64_sve_expand_vector_init_subvector (rtx, rtx);
> void aarch64_sve_expand_vector_init (rtx, rtx);
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 6e30dc48934..e771defc73f 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -1628,6 +1628,24 @@ (define_expand "vec_set<mode>"
>   }
> )
> 
> +(define_expand "vec_set<mode>"
> +  [(match_operand:VSTRUCT_QD 0 "register_operand")
> +   (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
> +   (match_operand:SI 2 "immediate_operand")]
> +  "TARGET_SIMD"
> +{
> +  aarch64_decompose_vec_struct_index (<VSTRUCT_ELT>mode, &operands[0],
> +      &operands[2], true);
> +  /* For tuples of 64-bit modes, <vstruct_elt> is the 64-bit scalar mode.
> +     Allow gen_vec_set<vstruct_elt> to cope with those cases too.  */
> +  auto gen_vec_setdi ATTRIBUTE_UNUSED = [](rtx x0, rtx x1, rtx)
> +    {
> +      return gen_move_insn (x0, x1);
> +    };
> +  auto gen_vec_setdf ATTRIBUTE_UNUSED = gen_vec_setdi;
> +  emit_insn (gen_vec_set<vstruct_elt> (operands[0], operands[1], 
> operands[2]));
> +  DONE;
> +})
> 
> (define_insn "aarch64_mla<mode><vczle><vczbe>"
>  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
> @@ -8883,6 +8901,26 @@ (define_expand "vec_extract<mode><Vel>"
>     DONE;
> })
> 
> +(define_expand "vec_extract<mode><Vel>"
> +  [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
> +   (match_operand:VSTRUCT_QD 1 "register_operand")
> +   (match_operand:SI 2 "immediate_operand")]
> +  "TARGET_SIMD"
> +{
> +  aarch64_decompose_vec_struct_index (<VSTRUCT_ELT>mode, &operands[1],
> +      &operands[2], false);
> +  /* For tuples of 64-bit modes, <vstruct_elt> is the 64-bit scalar mode.
> +     Allow gen_vec_extract<vstruct_elt><Vel> to cope with those cases too.  
> */
> +  auto gen_vec_extractdidi ATTRIBUTE_UNUSED = [](rtx x0, rtx x1, rtx)
> +    {
> +      return gen_move_insn (x0, x1);
> +    };
> +  auto gen_vec_extractdfdf ATTRIBUTE_UNUSED = gen_vec_extractdidi;
> +  emit_insn (gen_vec_extract<vstruct_elt><Vel> (operands[0], operands[1],
> + operands[2]));
> +  DONE;
> +})
> +
> ;; Extract a 64-bit vector from one half of a 128-bit vector.
> (define_expand "vec_extract<mode><Vhalf>"
>   [(match_operand:<VHALF> 0 "register_operand")
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index c8977b5a948..a4eee4ff56e 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -24658,6 +24658,28 @@ seq_cost_ignoring_scalar_moves (const rtx_insn *seq, 
> bool speed)
>   return cost;
> }
> 
> +/* *VECTOR is an Advanced SIMD structure mode and *INDEX is a constant index
> +   into it.  Narrow *VECTOR and *INDEX so that they reference a single vector
> +   of mode SUBVEC_MODE.  IS_DEST is true if *VECTOR is a destination operand,
> +   false if it is a source operand.  */
> +
> +void
> +aarch64_decompose_vec_struct_index (machine_mode subvec_mode,
> +    rtx *vector, rtx *index, bool is_dest)
> +{
> +  auto elts_per_vector = GET_MODE_NUNITS (subvec_mode).to_constant ();
> +  auto subvec = UINTVAL (*index) / elts_per_vector;
> +  auto subelt = UINTVAL (*index) % elts_per_vector;
> +  auto subvec_byte = subvec * GET_MODE_SIZE (subvec_mode);
> +  if (is_dest)
> +    *vector = simplify_gen_subreg (subvec_mode, *vector, GET_MODE (*vector),
> +   subvec_byte);
> +  else
> +    *vector = force_subreg (subvec_mode, *vector, GET_MODE (*vector),
> +    subvec_byte);
> +  *index = gen_int_mode (subelt, SImode);
> +}
> +
> /* Expand a vector initialization sequence, such that TARGET is
>    initialized to contain VALS.  */
> 
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 146453b0516..033a88a8467 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -1678,6 +1678,30 @@ (define_mode_attr VEL [(V8QI  "QI") (V16QI "QI")
>       (SI   "SI") (HI    "HI")
>       (QI   "QI")
>       (V4BF "BF") (V8BF "BF")
> +       (V2x8QI "QI") (V2x4HI "HI")
> +       (V2x2SI "SI") (V2x1DI "DI")
> +       (V2x4HF "HF") (V2x2SF "SF")
> +       (V2x1DF "DF") (V2x4BF "BF")
> +       (V3x8QI "QI") (V3x4HI "HI")
> +       (V3x2SI "SI") (V3x1DI "DI")
> +       (V3x4HF "HF") (V3x2SF "SF")
> +       (V3x1DF "DF") (V3x4BF "BF")
> +       (V4x8QI "QI") (V4x4HI "HI")
> +       (V4x2SI "SI") (V4x1DI "DI")
> +       (V4x4HF "HF") (V4x2SF "SF")
> +       (V4x1DF "DF") (V4x4BF "BF")
> +       (V2x16QI "QI") (V2x8HI "HI")
> +       (V2x4SI "SI") (V2x2DI "DI")
> +       (V2x8HF "HF") (V2x4SF "SF")
> +       (V2x2DF "DF") (V2x8BF "BF")
> +       (V3x16QI "QI") (V3x8HI "HI")
> +       (V3x4SI "SI") (V3x2DI "DI")
> +       (V3x8HF "HF") (V3x4SF "SF")
> +       (V3x2DF "DF") (V3x8BF "BF")
> +       (V4x16QI "QI") (V4x8HI "HI")
> +       (V4x4SI "SI") (V4x2DI "DI")
> +       (V4x8HF "HF") (V4x4SF "SF")
> +       (V4x2DF "DF") (V4x8BF "BF")
>       (VNx16QI "QI") (VNx8QI "QI") (VNx4QI "QI") (VNx2QI "QI")
>       (VNx8HI "HI") (VNx4HI "HI") (VNx2HI "HI")
>       (VNx8HF "HF") (VNx4HF "HF") (VNx2HF "HF")
> @@ -1699,6 +1723,30 @@ (define_mode_attr Vel [(V8QI "qi") (V16QI "qi")
>       (DF   "df") (SI   "si")
>       (HI   "hi") (QI   "qi")
>       (V4BF "bf") (V8BF "bf")
> +       (V2x8QI "qi") (V2x4HI "hi")
> +       (V2x2SI "si") (V2x1DI "di")
> +       (V2x4HF "hf") (V2x2SF "sf")
> +       (V2x1DF "df") (V2x4BF "bf")
> +       (V3x8QI "qi") (V3x4HI "hi")
> +       (V3x2SI "si") (V3x1DI "di")
> +       (V3x4HF "hf") (V3x2SF "sf")
> +       (V3x1DF "df") (V3x4BF "bf")
> +       (V4x8QI "qi") (V4x4HI "hi")
> +       (V4x2SI "si") (V4x1DI "di")
> +       (V4x4HF "hf") (V4x2SF "sf")
> +       (V4x1DF "df") (V4x4BF "bf")
> +       (V2x16QI "qi") (V2x8HI "hi")
> +       (V2x4SI "si") (V2x2DI "di")
> +       (V2x8HF "hf") (V2x4SF "sf")
> +       (V2x2DF "df") (V2x8BF "bf")
> +       (V3x16QI "qi") (V3x8HI "hi")
> +       (V3x4SI "si") (V3x2DI "di")
> +       (V3x8HF "hf") (V3x4SF "sf")
> +       (V3x2DF "df") (V3x8BF "bf")
> +       (V4x16QI "qi") (V4x8HI "hi")
> +       (V4x4SI "si") (V4x2DI "di")
> +       (V4x8HF "hf") (V4x4SF "sf")
> +       (V4x2DF "df") (V4x8BF "bf")
>       (VNx16QI "qi") (VNx8QI "qi") (VNx4QI "qi") (VNx2QI "qi")
>       (VNx8HI "hi") (VNx4HI "hi") (VNx2HI "hi")
>       (VNx8HF "hf") (VNx4HF "hf") (VNx2HF "hf")
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-1.c 
> b/gcc/testsuite/gcc.target/aarch64/pr113027-1.c
> new file mode 100644
> index 00000000000..6d9a51fd408
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr113027-1.c
> @@ -0,0 +1,27 @@
> +/* { dg-options "-O2" } */
> +
> +#include <arm_neon.h>
> +
> +float64x2x2_t
> +f1 (float64x2x2_t x)
> +{
> +  x.val[0][1] += 1.0;
> +  return x;
> +}
> +
> +float64x2x3_t
> +f2 (float64x2x3_t x)
> +{
> +  x.val[0][0] = x.val[1][1] + x.val[2][0];
> +  return x;
> +}
> +
> +float64x2x4_t
> +f3 (float64x2x4_t x)
> +{
> +  x.val[0][0] = x.val[1][1] + x.val[2][0] - x.val[3][1];
> +  return x;
> +}
> +
> +/* { dg-final { scan-assembler-not {\tmov\t} } } */
> +/* { dg-final { scan-assembler-not {\[sp,} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-2.c 
> b/gcc/testsuite/gcc.target/aarch64/pr113027-2.c
> new file mode 100644
> index 00000000000..ec756ec86e4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr113027-2.c
> @@ -0,0 +1,268 @@
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target 
> aarch64_little_endian } } } */
> +
> +#include <arm_neon.h>
> +
> +#define TEST(TYPE, A, B, C, D) \
> +  TYPE \
> +  test_##TYPE (TYPE a) \
> +  { \
> +    a.val[A][B] = a.val[C][D]; \
> +    return a; \
> +  }
> +
> +/*
> +** test_bfloat16x4x2_t:
> +** ins v1\.h\[3\], v0\.h\[2\]
> +** ret
> +*/
> +TEST (bfloat16x4x2_t, 1, 3, 0, 2)
> +
> +/*
> +** test_float16x4x2_t:
> +** ins v1\.h\[1\], v0\.h\[3\]
> +** ret
> +*/
> +TEST (float16x4x2_t, 1, 1, 0, 3)
> +
> +/*
> +** test_float32x2x2_t:
> +** ins v1\.s\[0\], v0\.s\[1\]
> +** ret
> +*/
> +TEST (float32x2x2_t, 1, 0, 0, 1)
> +
> +/*
> +** test_float64x1x2_t:
> +** fmov d1, d0
> +** ret
> +*/
> +TEST (float64x1x2_t, 1, 0, 0, 0)
> +
> +/*
> +** test_int8x8x2_t:
> +** ins v0\.b\[5\], v1\.b\[7\]
> +** ret
> +*/
> +TEST (int8x8x2_t, 0, 5, 1, 7)
> +
> +/*
> +** test_int16x4x2_t:
> +** ins v0\.h\[2\], v1\.h\[2\]
> +** ret
> +*/
> +TEST (int16x4x2_t, 0, 2, 1, 2)
> +
> +/*
> +** test_int32x2x2_t:
> +** ins v0\.s\[0\], v1\.s\[1\]
> +** ret
> +*/
> +TEST (int32x2x2_t, 0, 0, 1, 1)
> +
> +/*
> +** test_int64x1x2_t:
> +** fmov d0, d1
> +** ret
> +*/
> +TEST (int64x1x2_t, 0, 0, 1, 0)
> +
> +/*
> +** test_uint8x8x2_t:
> +** ins v1\.b\[6\], v0\.b\[3\]
> +** ret
> +*/
> +TEST (uint8x8x2_t, 1, 6, 0, 3)
> +
> +/*
> +** test_uint16x4x2_t:
> +** ins v1\.h\[2\], v1\.h\[0\]
> +** ret
> +*/
> +TEST (uint16x4x2_t, 1, 2, 1, 0)
> +
> +/*
> +** test_uint32x2x2_t:
> +** ins v1\.s\[0\], v1\.s\[1\]
> +** ret
> +*/
> +TEST (uint32x2x2_t, 1, 0, 1, 1)
> +
> +/*
> +** test_uint64x1x2_t:
> +** fmov d1, d0
> +** ret
> +*/
> +TEST (uint64x1x2_t, 1, 0, 0, 0)
> +
> +//--------------------------------------------------------------
> +
> +/*
> +** test_bfloat16x4x3_t:
> +** ins v2\.h\[3\], v0\.h\[2\]
> +** ret
> +*/
> +TEST (bfloat16x4x3_t, 2, 3, 0, 2)
> +
> +/*
> +** test_float16x4x3_t:
> +** ins v0\.h\[1\], v1\.h\[3\]
> +** ret
> +*/
> +TEST (float16x4x3_t, 0, 1, 1, 3)
> +
> +/*
> +** test_float32x2x3_t:
> +** ins v1\.s\[0\], v2\.s\[1\]
> +** ret
> +*/
> +TEST (float32x2x3_t, 1, 0, 2, 1)
> +
> +/*
> +** test_float64x1x3_t:
> +** fmov d1, d2
> +** ret
> +*/
> +TEST (float64x1x3_t, 1, 0, 2, 0)
> +
> +/*
> +** test_int8x8x3_t:
> +** ins v0\.b\[5\], v2\.b\[6\]
> +** ret
> +*/
> +TEST (int8x8x3_t, 0, 5, 2, 6)
> +
> +/*
> +** test_int16x4x3_t:
> +** ins v2\.h\[2\], v1\.h\[1\]
> +** ret
> +*/
> +TEST (int16x4x3_t, 2, 2, 1, 1)
> +
> +/*
> +** test_int32x2x3_t:
> +** ins v1\.s\[0\], v1\.s\[1\]
> +** ret
> +*/
> +TEST (int32x2x3_t, 1, 0, 1, 1)
> +
> +/*
> +** test_int64x1x3_t:
> +** fmov d2, d1
> +** ret
> +*/
> +TEST (int64x1x3_t, 2, 0, 1, 0)
> +
> +/*
> +** test_uint8x8x3_t:
> +** ins v1\.b\[6\], v2\.b\[7\]
> +** ret
> +*/
> +TEST (uint8x8x3_t, 1, 6, 2, 7)
> +
> +/*
> +** test_uint16x4x3_t:
> +** ins v2\.h\[2\], v1\.h\[3\]
> +** ret
> +*/
> +TEST (uint16x4x3_t, 2, 2, 1, 3)
> +
> +/*
> +** test_uint32x2x3_t:
> +** ins v2\.s\[0\], v0\.s\[1\]
> +** ret
> +*/
> +TEST (uint32x2x3_t, 2, 0, 0, 1)
> +
> +/*
> +** test_uint64x1x3_t:
> +** fmov d1, d2
> +** ret
> +*/
> +TEST (uint64x1x3_t, 1, 0, 2, 0)
> +
> +//--------------------------------------------------------------
> +
> +/*
> +** test_bfloat16x4x4_t:
> +** ins v2\.h\[3\], v3\.h\[2\]
> +** ret
> +*/
> +TEST (bfloat16x4x4_t, 2, 3, 3, 2)
> +
> +/*
> +** test_float16x4x4_t:
> +** ins v0\.h\[2\], v3\.h\[1\]
> +** ret
> +*/
> +TEST (float16x4x4_t, 0, 2, 3, 1)
> +
> +/*
> +** test_float32x2x4_t:
> +** ins v3\.s\[0\], v2\.s\[1\]
> +** ret
> +*/
> +TEST (float32x2x4_t, 3, 0, 2, 1)
> +
> +/*
> +** test_float64x1x4_t:
> +** fmov d1, d3
> +** ret
> +*/
> +TEST (float64x1x4_t, 1, 0, 3, 0)
> +
> +/*
> +** test_int8x8x4_t:
> +** ins v0\.b\[4\], v3\.b\[7\]
> +** ret
> +*/
> +TEST (int8x8x4_t, 0, 4, 3, 7)
> +
> +/*
> +** test_int16x4x4_t:
> +** ins v3\.h\[3\], v1\.h\[1\]
> +** ret
> +*/
> +TEST (int16x4x4_t, 3, 3, 1, 1)
> +
> +/*
> +** test_int32x2x4_t:
> +** ins v1\.s\[0\], v3\.s\[1\]
> +** ret
> +*/
> +TEST (int32x2x4_t, 1, 0, 3, 1)
> +
> +/*
> +** test_int64x1x4_t:
> +** fmov d3, d1
> +** ret
> +*/
> +TEST (int64x1x4_t, 3, 0, 1, 0)
> +
> +/*
> +** test_uint8x8x4_t:
> +** ins v3\.b\[6\], v2\.b\[4\]
> +** ret
> +*/
> +TEST (uint8x8x4_t, 3, 6, 2, 4)
> +
> +/*
> +** test_uint16x4x4_t:
> +** ins v3\.h\[1\], v1\.h\[3\]
> +** ret
> +*/
> +TEST (uint16x4x4_t, 3, 1, 1, 3)
> +
> +/*
> +** test_uint32x2x4_t:
> +** ins v0\.s\[0\], v3\.s\[1\]
> +** ret
> +*/
> +TEST (uint32x2x4_t, 0, 0, 3, 1)
> +
> +/*
> +** test_uint64x1x4_t:
> +** fmov d1, d3
> +** ret
> +*/
> +TEST (uint64x1x4_t, 1, 0, 3, 0)
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-3.c 
> b/gcc/testsuite/gcc.target/aarch64/pr113027-3.c
> new file mode 100644
> index 00000000000..561e6721a80
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr113027-3.c
> @@ -0,0 +1,268 @@
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target 
> aarch64_little_endian } } } */
> +
> +#include <arm_neon.h>
> +
> +#define TEST(TYPE, A, B, C, D) \
> +  TYPE \
> +  test_##TYPE (TYPE a) \
> +  { \
> +    a.val[A][B] = a.val[C][D]; \
> +    return a; \
> +  }
> +
> +/*
> +** test_bfloat16x8x2_t:
> +** ins v1\.h\[6\], v0\.h\[5\]
> +** ret
> +*/
> +TEST (bfloat16x8x2_t, 1, 6, 0, 5)
> +
> +/*
> +** test_float16x8x2_t:
> +** ins v1\.h\[2\], v0\.h\[7\]
> +** ret
> +*/
> +TEST (float16x8x2_t, 1, 2, 0, 7)
> +
> +/*
> +** test_float32x4x2_t:
> +** ins v1\.s\[3\], v0\.s\[1\]
> +** ret
> +*/
> +TEST (float32x4x2_t, 1, 3, 0, 1)
> +
> +/*
> +** test_float64x2x2_t:
> +** ins v1\.d\[0\], v0\.d\[0\]
> +** ret
> +*/
> +TEST (float64x2x2_t, 1, 0, 0, 0)
> +
> +/*
> +** test_int8x16x2_t:
> +** ins v0\.b\[15\], v1\.b\[13\]
> +** ret
> +*/
> +TEST (int8x16x2_t, 0, 15, 1, 13)
> +
> +/*
> +** test_int16x8x2_t:
> +** ins v0\.h\[2\], v1\.h\[7\]
> +** ret
> +*/
> +TEST (int16x8x2_t, 0, 2, 1, 7)
> +
> +/*
> +** test_int32x4x2_t:
> +** ins v0\.s\[3\], v1\.s\[1\]
> +** ret
> +*/
> +TEST (int32x4x2_t, 0, 3, 1, 1)
> +
> +/*
> +** test_int64x2x2_t:
> +** ins v0\.d\[0\], v1\.d\[1\]
> +** ret
> +*/
> +TEST (int64x2x2_t, 0, 0, 1, 1)
> +
> +/*
> +** test_uint8x16x2_t:
> +** ins v1\.b\[13\], v0\.b\[11\]
> +** ret
> +*/
> +TEST (uint8x16x2_t, 1, 13, 0, 11)
> +
> +/*
> +** test_uint16x8x2_t:
> +** ins v1\.h\[6\], v1\.h\[3\]
> +** ret
> +*/
> +TEST (uint16x8x2_t, 1, 6, 1, 3)
> +
> +/*
> +** test_uint32x4x2_t:
> +** ins v1\.s\[3\], v1\.s\[1\]
> +** ret
> +*/
> +TEST (uint32x4x2_t, 1, 3, 1, 1)
> +
> +/*
> +** test_uint64x2x2_t:
> +** ins v1\.d\[0\], v1\.d\[1\]
> +** ret
> +*/
> +TEST (uint64x2x2_t, 1, 0, 1, 1)
> +
> +//--------------------------------------------------------------
> +
> +/*
> +** test_bfloat16x8x3_t:
> +** ins v2\.h\[3\], v0\.h\[7\]
> +** ret
> +*/
> +TEST (bfloat16x8x3_t, 2, 3, 0, 7)
> +
> +/*
> +** test_float16x8x3_t:
> +** ins v0\.h\[4\], v1\.h\[6\]
> +** ret
> +*/
> +TEST (float16x8x3_t, 0, 4, 1, 6)
> +
> +/*
> +** test_float32x4x3_t:
> +** ins v1\.s\[2\], v2\.s\[1\]
> +** ret
> +*/
> +TEST (float32x4x3_t, 1, 2, 2, 1)
> +
> +/*
> +** test_float64x2x3_t:
> +** ins v1\.d\[0\], v2\.d\[1\]
> +** ret
> +*/
> +TEST (float64x2x3_t, 1, 0, 2, 1)
> +
> +/*
> +** test_int8x16x3_t:
> +** ins v0\.b\[9\], v2\.b\[14\]
> +** ret
> +*/
> +TEST (int8x16x3_t, 0, 9, 2, 14)
> +
> +/*
> +** test_int16x8x3_t:
> +** ins v2\.h\[6\], v1\.h\[3\]
> +** ret
> +*/
> +TEST (int16x8x3_t, 2, 6, 1, 3)
> +
> +/*
> +** test_int32x4x3_t:
> +** ins v1\.s\[3\], v1\.s\[1\]
> +** ret
> +*/
> +TEST (int32x4x3_t, 1, 3, 1, 1)
> +
> +/*
> +** test_int64x2x3_t:
> +** ins v2\.d\[1\], v1\.d\[0\]
> +** ret
> +*/
> +TEST (int64x2x3_t, 2, 1, 1, 0)
> +
> +/*
> +** test_uint8x16x3_t:
> +** ins v1\.b\[10\], v2\.b\[8\]
> +** ret
> +*/
> +TEST (uint8x16x3_t, 1, 10, 2, 8)
> +
> +/*
> +** test_uint16x8x3_t:
> +** ins v2\.h\[5\], v1\.h\[2\]
> +** ret
> +*/
> +TEST (uint16x8x3_t, 2, 5, 1, 2)
> +
> +/*
> +** test_uint32x4x3_t:
> +** ins v2\.s\[3\], v0\.s\[1\]
> +** ret
> +*/
> +TEST (uint32x4x3_t, 2, 3, 0, 1)
> +
> +/*
> +** test_uint64x2x3_t:
> +** ins v1\.d\[0\], v2\.d\[1\]
> +** ret
> +*/
> +TEST (uint64x2x3_t, 1, 0, 2, 1)
> +
> +//--------------------------------------------------------------
> +
> +/*
> +** test_bfloat16x8x4_t:
> +** ins v2\.h\[5\], v3\.h\[6\]
> +** ret
> +*/
> +TEST (bfloat16x8x4_t, 2, 5, 3, 6)
> +
> +/*
> +** test_float16x8x4_t:
> +** ins v0\.h\[3\], v3\.h\[5\]
> +** ret
> +*/
> +TEST (float16x8x4_t, 0, 3, 3, 5)
> +
> +/*
> +** test_float32x4x4_t:
> +** ins v3\.s\[2\], v2\.s\[1\]
> +** ret
> +*/
> +TEST (float32x4x4_t, 3, 2, 2, 1)
> +
> +/*
> +** test_float64x2x4_t:
> +** ins v1\.d\[1\], v3\.d\[0\]
> +** ret
> +*/
> +TEST (float64x2x4_t, 1, 1, 3, 0)
> +
> +/*
> +** test_int8x16x4_t:
> +** ins v0\.b\[14\], v3\.b\[10\]
> +** ret
> +*/
> +TEST (int8x16x4_t, 0, 14, 3, 10)
> +
> +/*
> +** test_int16x8x4_t:
> +** ins v3\.h\[4\], v1\.h\[6\]
> +** ret
> +*/
> +TEST (int16x8x4_t, 3, 4, 1, 6)
> +
> +/*
> +** test_int32x4x4_t:
> +** ins v1\.s\[3\], v3\.s\[1\]
> +** ret
> +*/
> +TEST (int32x4x4_t, 1, 3, 3, 1)
> +
> +/*
> +** test_int64x2x4_t:
> +** ins v3\.d\[0\], v2\.d\[0\]
> +** ret
> +*/
> +TEST (int64x2x4_t, 3, 0, 2, 0)
> +
> +/*
> +** test_uint8x16x4_t:
> +** ins v3\.b\[13\], v2\.b\[6\]
> +** ret
> +*/
> +TEST (uint8x16x4_t, 3, 13, 2, 6)
> +
> +/*
> +** test_uint16x8x4_t:
> +** ins v3\.h\[2\], v1\.h\[7\]
> +** ret
> +*/
> +TEST (uint16x8x4_t, 3, 2, 1, 7)
> +
> +/*
> +** test_uint32x4x4_t:
> +** ins v0\.s\[3\], v3\.s\[2\]
> +** ret
> +*/
> +TEST (uint32x4x4_t, 0, 3, 3, 2)
> +
> +/*
> +** test_uint64x2x4_t:
> +** ins v1\.d\[0\], v3\.d\[1\]
> +** ret
> +*/
> +TEST (uint64x2x4_t, 1, 0, 3, 1)
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-4.c 
> b/gcc/testsuite/gcc.target/aarch64/pr113027-4.c
> new file mode 100644
> index 00000000000..67f45dfa4f0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr113027-4.c
> @@ -0,0 +1,268 @@
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target 
> aarch64_little_endian } } } */
> +
> +#include <arm_neon.h>
> +
> +#define TEST(TYPE, A, B) \
> +  TYPE \
> +  test_##TYPE (TYPE a, TYPE *ptr) \
> +  { \
> +    a.val[A][B] = ptr->val[0][0]; \
> +    return a; \
> +  }
> +
> +/*
> +** test_bfloat16x4x2_t:
> +** ld1 \{v1\.h\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (bfloat16x4x2_t, 1, 3)
> +
> +/*
> +** test_float16x4x2_t:
> +** ld1 \{v1\.h\}\[1\], \[x0\]
> +** ret
> +*/
> +TEST (float16x4x2_t, 1, 1)
> +
> +/*
> +** test_float32x2x2_t:
> +** ld1 \{v1\.s\}\[0\], \[x0\]
> +** ret
> +*/
> +TEST (float32x2x2_t, 1, 0)
> +
> +/*
> +** test_float64x1x2_t:
> +** ldr d1, \[x0\]
> +** ret
> +*/
> +TEST (float64x1x2_t, 1, 0)
> +
> +/*
> +** test_int8x8x2_t:
> +** ld1 \{v0\.b\}\[5\], \[x0\]
> +** ret
> +*/
> +TEST (int8x8x2_t, 0, 5)
> +
> +/*
> +** test_int16x4x2_t:
> +** ld1 \{v0\.h\}\[2\], \[x0\]
> +** ret
> +*/
> +TEST (int16x4x2_t, 0, 2)
> +
> +/*
> +** test_int32x2x2_t:
> +** ld1 \{v0\.s\}\[0\], \[x0\]
> +** ret
> +*/
> +TEST (int32x2x2_t, 0, 0)
> +
> +/*
> +** test_int64x1x2_t:
> +** ldr d0, \[x0\]
> +** ret
> +*/
> +TEST (int64x1x2_t, 0, 0)
> +
> +/*
> +** test_uint8x8x2_t:
> +** ld1 \{v1\.b\}\[6\], \[x0\]
> +** ret
> +*/
> +TEST (uint8x8x2_t, 1, 6)
> +
> +/*
> +** test_uint16x4x2_t:
> +** ld1 \{v1\.h\}\[2\], \[x0\]
> +** ret
> +*/
> +TEST (uint16x4x2_t, 1, 2)
> +
> +/*
> +** test_uint32x2x2_t:
> +** ld1 \{v1\.s\}\[0\], \[x0\]
> +** ret
> +*/
> +TEST (uint32x2x2_t, 1, 0)
> +
> +/*
> +** test_uint64x1x2_t:
> +** ldr d1, \[x0\]
> +** ret
> +*/
> +TEST (uint64x1x2_t, 1, 0)
> +
> +//--------------------------------------------------------------
> +
> +/*
> +** test_bfloat16x4x3_t:
> +** ld1 \{v2\.h\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (bfloat16x4x3_t, 2, 3)
> +
> +/*
> +** test_float16x4x3_t:
> +** ld1 \{v0\.h\}\[1\], \[x0\]
> +** ret
> +*/
> +TEST (float16x4x3_t, 0, 1)
> +
> +/*
> +** test_float32x2x3_t:
> +** ld1 \{v1\.s\}\[0\], \[x0\]
> +** ret
> +*/
> +TEST (float32x2x3_t, 1, 0)
> +
> +/*
> +** test_float64x1x3_t:
> +** ldr d1, \[x0\]
> +** ret
> +*/
> +TEST (float64x1x3_t, 1, 0)
> +
> +/*
> +** test_int8x8x3_t:
> +** ld1 \{v0\.b\}\[5\], \[x0\]
> +** ret
> +*/
> +TEST (int8x8x3_t, 0, 5)
> +
> +/*
> +** test_int16x4x3_t:
> +** ld1 \{v2\.h\}\[2\], \[x0\]
> +** ret
> +*/
> +TEST (int16x4x3_t, 2, 2)
> +
> +/*
> +** test_int32x2x3_t:
> +** ld1 \{v1\.s\}\[0\], \[x0\]
> +** ret
> +*/
> +TEST (int32x2x3_t, 1, 0)
> +
> +/*
> +** test_int64x1x3_t:
> +** ldr d2, \[x0\]
> +** ret
> +*/
> +TEST (int64x1x3_t, 2, 0)
> +
> +/*
> +** test_uint8x8x3_t:
> +** ld1 \{v1\.b\}\[6\], \[x0\]
> +** ret
> +*/
> +TEST (uint8x8x3_t, 1, 6)
> +
> +/*
> +** test_uint16x4x3_t:
> +** ld1 \{v2\.h\}\[2\], \[x0\]
> +** ret
> +*/
> +TEST (uint16x4x3_t, 2, 2)
> +
> +/*
> +** test_uint32x2x3_t:
> +** ld1 \{v2\.s\}\[0\], \[x0\]
> +** ret
> +*/
> +TEST (uint32x2x3_t, 2, 0)
> +
> +/*
> +** test_uint64x1x3_t:
> +** ldr d1, \[x0\]
> +** ret
> +*/
> +TEST (uint64x1x3_t, 1, 0)
> +
> +//--------------------------------------------------------------
> +
> +/*
> +** test_bfloat16x4x4_t:
> +** ld1 \{v2\.h\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (bfloat16x4x4_t, 2, 3)
> +
> +/*
> +** test_float16x4x4_t:
> +** ld1 \{v0\.h\}\[2\], \[x0\]
> +** ret
> +*/
> +TEST (float16x4x4_t, 0, 2)
> +
> +/*
> +** test_float32x2x4_t:
> +** ld1 \{v3\.s\}\[0\], \[x0\]
> +** ret
> +*/
> +TEST (float32x2x4_t, 3, 0)
> +
> +/*
> +** test_float64x1x4_t:
> +** ldr d1, \[x0\]
> +** ret
> +*/
> +TEST (float64x1x4_t, 1, 0)
> +
> +/*
> +** test_int8x8x4_t:
> +** ld1 \{v0\.b\}\[4\], \[x0\]
> +** ret
> +*/
> +TEST (int8x8x4_t, 0, 4)
> +
> +/*
> +** test_int16x4x4_t:
> +** ld1 \{v3\.h\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (int16x4x4_t, 3, 3)
> +
> +/*
> +** test_int32x2x4_t:
> +** ld1 \{v1\.s\}\[0\], \[x0\]
> +** ret
> +*/
> +TEST (int32x2x4_t, 1, 0)
> +
> +/*
> +** test_int64x1x4_t:
> +** ldr d3, \[x0\]
> +** ret
> +*/
> +TEST (int64x1x4_t, 3, 0)
> +
> +/*
> +** test_uint8x8x4_t:
> +** ld1 \{v3\.b\}\[6\], \[x0\]
> +** ret
> +*/
> +TEST (uint8x8x4_t, 3, 6)
> +
> +/*
> +** test_uint16x4x4_t:
> +** ld1 \{v3\.h\}\[1\], \[x0\]
> +** ret
> +*/
> +TEST (uint16x4x4_t, 3, 1)
> +
> +/*
> +** test_uint32x2x4_t:
> +** ld1 \{v0\.s\}\[0\], \[x0\]
> +** ret
> +*/
> +TEST (uint32x2x4_t, 0, 0)
> +
> +/*
> +** test_uint64x1x4_t:
> +** ldr d1, \[x0\]
> +** ret
> +*/
> +TEST (uint64x1x4_t, 1, 0)
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-5.c 
> b/gcc/testsuite/gcc.target/aarch64/pr113027-5.c
> new file mode 100644
> index 00000000000..5695ecab8ae
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr113027-5.c
> @@ -0,0 +1,268 @@
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target 
> aarch64_little_endian } } } */
> +
> +#include <arm_neon.h>
> +
> +#define TEST(TYPE, A, B) \
> +  TYPE \
> +  test_##TYPE (TYPE a, TYPE *ptr) \
> +  { \
> +    a.val[A][B] = ptr->val[0][0]; \
> +    return a; \
> +  }
> +
> +/*
> +** test_bfloat16x8x2_t:
> +** ld1 \{v1\.h\}\[6\], \[x0\]
> +** ret
> +*/
> +TEST (bfloat16x8x2_t, 1, 6)
> +
> +/*
> +** test_float16x8x2_t:
> +** ld1 \{v1\.h\}\[2\], \[x0\]
> +** ret
> +*/
> +TEST (float16x8x2_t, 1, 2)
> +
> +/*
> +** test_float32x4x2_t:
> +** ld1 \{v1\.s\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (float32x4x2_t, 1, 3)
> +
> +/*
> +** test_float64x2x2_t:
> +** ld1 \{v1\.d\}\[0\], \[x0\]
> +** ret
> +*/
> +TEST (float64x2x2_t, 1, 0)
> +
> +/*
> +** test_int8x16x2_t:
> +** ld1 \{v0\.b\}\[15\], \[x0\]
> +** ret
> +*/
> +TEST (int8x16x2_t, 0, 15)
> +
> +/*
> +** test_int16x8x2_t:
> +** ld1 \{v0\.h\}\[2\], \[x0\]
> +** ret
> +*/
> +TEST (int16x8x2_t, 0, 2)
> +
> +/*
> +** test_int32x4x2_t:
> +** ld1 \{v0\.s\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (int32x4x2_t, 0, 3)
> +
> +/*
> +** test_int64x2x2_t:
> +** ld1 \{v0\.d\}\[0\], \[x0\]
> +** ret
> +*/
> +TEST (int64x2x2_t, 0, 0)
> +
> +/*
> +** test_uint8x16x2_t:
> +** ld1 \{v1\.b\}\[13\], \[x0\]
> +** ret
> +*/
> +TEST (uint8x16x2_t, 1, 13)
> +
> +/*
> +** test_uint16x8x2_t:
> +** ld1 \{v1\.h\}\[6\], \[x0\]
> +** ret
> +*/
> +TEST (uint16x8x2_t, 1, 6)
> +
> +/*
> +** test_uint32x4x2_t:
> +** ld1 \{v1\.s\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (uint32x4x2_t, 1, 3)
> +
> +/*
> +** test_uint64x2x2_t:
> +** ld1 \{v1\.d\}\[0\], \[x0\]
> +** ret
> +*/
> +TEST (uint64x2x2_t, 1, 0)
> +
> +//--------------------------------------------------------------
> +
> +/*
> +** test_bfloat16x8x3_t:
> +** ld1 \{v2\.h\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (bfloat16x8x3_t, 2, 3)
> +
> +/*
> +** test_float16x8x3_t:
> +** ld1 \{v0\.h\}\[4\], \[x0\]
> +** ret
> +*/
> +TEST (float16x8x3_t, 0, 4)
> +
> +/*
> +** test_float32x4x3_t:
> +** ld1 \{v1\.s\}\[2\], \[x0\]
> +** ret
> +*/
> +TEST (float32x4x3_t, 1, 2)
> +
> +/*
> +** test_float64x2x3_t:
> +** ld1 \{v1\.d\}\[0\], \[x0\]
> +** ret
> +*/
> +TEST (float64x2x3_t, 1, 0)
> +
> +/*
> +** test_int8x16x3_t:
> +** ld1 \{v0\.b\}\[9\], \[x0\]
> +** ret
> +*/
> +TEST (int8x16x3_t, 0, 9)
> +
> +/*
> +** test_int16x8x3_t:
> +** ld1 \{v2\.h\}\[6\], \[x0\]
> +** ret
> +*/
> +TEST (int16x8x3_t, 2, 6)
> +
> +/*
> +** test_int32x4x3_t:
> +** ld1 \{v1\.s\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (int32x4x3_t, 1, 3)
> +
> +/*
> +** test_int64x2x3_t:
> +** ld1 \{v2\.d\}\[1\], \[x0\]
> +** ret
> +*/
> +TEST (int64x2x3_t, 2, 1)
> +
> +/*
> +** test_uint8x16x3_t:
> +** ld1 \{v1\.b\}\[10\], \[x0\]
> +** ret
> +*/
> +TEST (uint8x16x3_t, 1, 10)
> +
> +/*
> +** test_uint16x8x3_t:
> +** ld1 \{v2\.h\}\[5\], \[x0\]
> +** ret
> +*/
> +TEST (uint16x8x3_t, 2, 5)
> +
> +/*
> +** test_uint32x4x3_t:
> +** ld1 \{v2\.s\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (uint32x4x3_t, 2, 3)
> +
> +/*
> +** test_uint64x2x3_t:
> +** ld1 \{v1\.d\}\[0\], \[x0\]
> +** ret
> +*/
> +TEST (uint64x2x3_t, 1, 0)
> +
> +//--------------------------------------------------------------
> +
> +/*
> +** test_bfloat16x8x4_t:
> +** ld1 \{v2\.h\}\[5\], \[x0\]
> +** ret
> +*/
> +TEST (bfloat16x8x4_t, 2, 5)
> +
> +/*
> +** test_float16x8x4_t:
> +** ld1 \{v0\.h\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (float16x8x4_t, 0, 3)
> +
> +/*
> +** test_float32x4x4_t:
> +** ld1 \{v3\.s\}\[2\], \[x0\]
> +** ret
> +*/
> +TEST (float32x4x4_t, 3, 2)
> +
> +/*
> +** test_float64x2x4_t:
> +** ld1 \{v1\.d\}\[1\], \[x0\]
> +** ret
> +*/
> +TEST (float64x2x4_t, 1, 1)
> +
> +/*
> +** test_int8x16x4_t:
> +** ld1 \{v0\.b\}\[14\], \[x0\]
> +** ret
> +*/
> +TEST (int8x16x4_t, 0, 14)
> +
> +/*
> +** test_int16x8x4_t:
> +** ld1 \{v3\.h\}\[4\], \[x0\]
> +** ret
> +*/
> +TEST (int16x8x4_t, 3, 4)
> +
> +/*
> +** test_int32x4x4_t:
> +** ld1 \{v1\.s\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (int32x4x4_t, 1, 3)
> +
> +/*
> +** test_int64x2x4_t:
> +** ld1 \{v3\.d\}\[0\], \[x0\]
> +** ret
> +*/
> +TEST (int64x2x4_t, 3, 0)
> +
> +/*
> +** test_uint8x16x4_t:
> +** ld1 \{v3\.b\}\[13\], \[x0\]
> +** ret
> +*/
> +TEST (uint8x16x4_t, 3, 13)
> +
> +/*
> +** test_uint16x8x4_t:
> +** ld1 \{v3\.h\}\[2\], \[x0\]
> +** ret
> +*/
> +TEST (uint16x8x4_t, 3, 2)
> +
> +/*
> +** test_uint32x4x4_t:
> +** ld1 \{v0\.s\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (uint32x4x4_t, 0, 3)
> +
> +/*
> +** test_uint64x2x4_t:
> +** ld1 \{v1\.d\}\[0\], \[x0\]
> +** ret
> +*/
> +TEST (uint64x2x4_t, 1, 0)
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-6.c 
> b/gcc/testsuite/gcc.target/aarch64/pr113027-6.c
> new file mode 100644
> index 00000000000..12d3a38f74b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr113027-6.c
> @@ -0,0 +1,267 @@
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target 
> aarch64_little_endian } } } */
> +
> +#include <arm_neon.h>
> +
> +#define TEST(TYPE, A, B) \
> +  void \
> +  test_##TYPE (TYPE a, TYPE *ptr) \
> +  { \
> +    ptr->val[0][0] = a.val[A][B]; \
> +  }
> +
> +/*
> +** test_bfloat16x4x2_t:
> +** st1 \{v1\.h\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (bfloat16x4x2_t, 1, 3)
> +
> +/*
> +** test_float16x4x2_t:
> +** st1 \{v1\.h\}\[1\], \[x0\]
> +** ret
> +*/
> +TEST (float16x4x2_t, 1, 1)
> +
> +/*
> +** test_float32x2x2_t:
> +** str s1, \[x0\]
> +** ret
> +*/
> +TEST (float32x2x2_t, 1, 0)
> +
> +/*
> +** test_float64x1x2_t:
> +** str d1, \[x0\]
> +** ret
> +*/
> +TEST (float64x1x2_t, 1, 0)
> +
> +/*
> +** test_int8x8x2_t:
> +** st1 \{v0\.b\}\[5\], \[x0\]
> +** ret
> +*/
> +TEST (int8x8x2_t, 0, 5)
> +
> +/*
> +** test_int16x4x2_t:
> +** st1 \{v0\.h\}\[2\], \[x0\]
> +** ret
> +*/
> +TEST (int16x4x2_t, 0, 2)
> +
> +/*
> +** test_int32x2x2_t:
> +** str s0, \[x0\]
> +** ret
> +*/
> +TEST (int32x2x2_t, 0, 0)
> +
> +/*
> +** test_int64x1x2_t:
> +** str d0, \[x0\]
> +** ret
> +*/
> +TEST (int64x1x2_t, 0, 0)
> +
> +/*
> +** test_uint8x8x2_t:
> +** st1 \{v1\.b\}\[6\], \[x0\]
> +** ret
> +*/
> +TEST (uint8x8x2_t, 1, 6)
> +
> +/*
> +** test_uint16x4x2_t:
> +** st1 \{v1\.h\}\[2\], \[x0\]
> +** ret
> +*/
> +TEST (uint16x4x2_t, 1, 2)
> +
> +/*
> +** test_uint32x2x2_t:
> +** str s1, \[x0\]
> +** ret
> +*/
> +TEST (uint32x2x2_t, 1, 0)
> +
> +/*
> +** test_uint64x1x2_t:
> +** str d1, \[x0\]
> +** ret
> +*/
> +TEST (uint64x1x2_t, 1, 0)
> +
> +//--------------------------------------------------------------
> +
> +/*
> +** test_bfloat16x4x3_t:
> +** st1 \{v2\.h\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (bfloat16x4x3_t, 2, 3)
> +
> +/*
> +** test_float16x4x3_t:
> +** st1 \{v0\.h\}\[1\], \[x0\]
> +** ret
> +*/
> +TEST (float16x4x3_t, 0, 1)
> +
> +/*
> +** test_float32x2x3_t:
> +** str s1, \[x0\]
> +** ret
> +*/
> +TEST (float32x2x3_t, 1, 0)
> +
> +/*
> +** test_float64x1x3_t:
> +** str d1, \[x0\]
> +** ret
> +*/
> +TEST (float64x1x3_t, 1, 0)
> +
> +/*
> +** test_int8x8x3_t:
> +** st1 \{v0\.b\}\[5\], \[x0\]
> +** ret
> +*/
> +TEST (int8x8x3_t, 0, 5)
> +
> +/*
> +** test_int16x4x3_t:
> +** st1 \{v2\.h\}\[2\], \[x0\]
> +** ret
> +*/
> +TEST (int16x4x3_t, 2, 2)
> +
> +/*
> +** test_int32x2x3_t:
> +** str s1, \[x0\]
> +** ret
> +*/
> +TEST (int32x2x3_t, 1, 0)
> +
> +/*
> +** test_int64x1x3_t:
> +** str d2, \[x0\]
> +** ret
> +*/
> +TEST (int64x1x3_t, 2, 0)
> +
> +/*
> +** test_uint8x8x3_t:
> +** st1 \{v1\.b\}\[6\], \[x0\]
> +** ret
> +*/
> +TEST (uint8x8x3_t, 1, 6)
> +
> +/*
> +** test_uint16x4x3_t:
> +** st1 \{v2\.h\}\[2\], \[x0\]
> +** ret
> +*/
> +TEST (uint16x4x3_t, 2, 2)
> +
> +/*
> +** test_uint32x2x3_t:
> +** str s2, \[x0\]
> +** ret
> +*/
> +TEST (uint32x2x3_t, 2, 0)
> +
> +/*
> +** test_uint64x1x3_t:
> +** str d1, \[x0\]
> +** ret
> +*/
> +TEST (uint64x1x3_t, 1, 0)
> +
> +//--------------------------------------------------------------
> +
> +/*
> +** test_bfloat16x4x4_t:
> +** st1 \{v2\.h\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (bfloat16x4x4_t, 2, 3)
> +
> +/*
> +** test_float16x4x4_t:
> +** st1 \{v0\.h\}\[2\], \[x0\]
> +** ret
> +*/
> +TEST (float16x4x4_t, 0, 2)
> +
> +/*
> +** test_float32x2x4_t:
> +** str s3, \[x0\]
> +** ret
> +*/
> +TEST (float32x2x4_t, 3, 0)
> +
> +/*
> +** test_float64x1x4_t:
> +** str d1, \[x0\]
> +** ret
> +*/
> +TEST (float64x1x4_t, 1, 0)
> +
> +/*
> +** test_int8x8x4_t:
> +** st1 \{v0\.b\}\[4\], \[x0\]
> +** ret
> +*/
> +TEST (int8x8x4_t, 0, 4)
> +
> +/*
> +** test_int16x4x4_t:
> +** st1 \{v3\.h\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (int16x4x4_t, 3, 3)
> +
> +/*
> +** test_int32x2x4_t:
> +** str s1, \[x0\]
> +** ret
> +*/
> +TEST (int32x2x4_t, 1, 0)
> +
> +/*
> +** test_int64x1x4_t:
> +** str d3, \[x0\]
> +** ret
> +*/
> +TEST (int64x1x4_t, 3, 0)
> +
> +/*
> +** test_uint8x8x4_t:
> +** st1 \{v3\.b\}\[6\], \[x0\]
> +** ret
> +*/
> +TEST (uint8x8x4_t, 3, 6)
> +
> +/*
> +** test_uint16x4x4_t:
> +** st1 \{v3\.h\}\[1\], \[x0\]
> +** ret
> +*/
> +TEST (uint16x4x4_t, 3, 1)
> +
> +/*
> +** test_uint32x2x4_t:
> +** str s0, \[x0\]
> +** ret
> +*/
> +TEST (uint32x2x4_t, 0, 0)
> +
> +/*
> +** test_uint64x1x4_t:
> +** str d1, \[x0\]
> +** ret
> +*/
> +TEST (uint64x1x4_t, 1, 0)
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-7.c 
> b/gcc/testsuite/gcc.target/aarch64/pr113027-7.c
> new file mode 100644
> index 00000000000..b3ae1a74f76
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr113027-7.c
> @@ -0,0 +1,267 @@
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target 
> aarch64_little_endian } } } */
> +
> +#include <arm_neon.h>
> +
> +#define TEST(TYPE, A, B) \
> +  void \
> +  test_##TYPE (TYPE a, TYPE *ptr) \
> +  { \
> +    ptr->val[0][0] = a.val[A][B]; \
> +  }
> +
> +/*
> +** test_bfloat16x8x2_t:
> +** st1 \{v1\.h\}\[6\], \[x0\]
> +** ret
> +*/
> +TEST (bfloat16x8x2_t, 1, 6)
> +
> +/*
> +** test_float16x8x2_t:
> +** st1 \{v1\.h\}\[2\], \[x0\]
> +** ret
> +*/
> +TEST (float16x8x2_t, 1, 2)
> +
> +/*
> +** test_float32x4x2_t:
> +** st1 \{v1\.s\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (float32x4x2_t, 1, 3)
> +
> +/*
> +** test_float64x2x2_t:
> +** str d1, \[x0\]
> +** ret
> +*/
> +TEST (float64x2x2_t, 1, 0)
> +
> +/*
> +** test_int8x16x2_t:
> +** st1 \{v0\.b\}\[15\], \[x0\]
> +** ret
> +*/
> +TEST (int8x16x2_t, 0, 15)
> +
> +/*
> +** test_int16x8x2_t:
> +** st1 \{v0\.h\}\[2\], \[x0\]
> +** ret
> +*/
> +TEST (int16x8x2_t, 0, 2)
> +
> +/*
> +** test_int32x4x2_t:
> +** st1 \{v0\.s\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (int32x4x2_t, 0, 3)
> +
> +/*
> +** test_int64x2x2_t:
> +** str d0, \[x0\]
> +** ret
> +*/
> +TEST (int64x2x2_t, 0, 0)
> +
> +/*
> +** test_uint8x16x2_t:
> +** st1 \{v1\.b\}\[13\], \[x0\]
> +** ret
> +*/
> +TEST (uint8x16x2_t, 1, 13)
> +
> +/*
> +** test_uint16x8x2_t:
> +** st1 \{v1\.h\}\[6\], \[x0\]
> +** ret
> +*/
> +TEST (uint16x8x2_t, 1, 6)
> +
> +/*
> +** test_uint32x4x2_t:
> +** st1 \{v1\.s\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (uint32x4x2_t, 1, 3)
> +
> +/*
> +** test_uint64x2x2_t:
> +** str d1, \[x0\]
> +** ret
> +*/
> +TEST (uint64x2x2_t, 1, 0)
> +
> +//--------------------------------------------------------------
> +
> +/*
> +** test_bfloat16x8x3_t:
> +** st1 \{v2\.h\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (bfloat16x8x3_t, 2, 3)
> +
> +/*
> +** test_float16x8x3_t:
> +** st1 \{v0\.h\}\[4\], \[x0\]
> +** ret
> +*/
> +TEST (float16x8x3_t, 0, 4)
> +
> +/*
> +** test_float32x4x3_t:
> +** st1 \{v1\.s\}\[2\], \[x0\]
> +** ret
> +*/
> +TEST (float32x4x3_t, 1, 2)
> +
> +/*
> +** test_float64x2x3_t:
> +** str d1, \[x0\]
> +** ret
> +*/
> +TEST (float64x2x3_t, 1, 0)
> +
> +/*
> +** test_int8x16x3_t:
> +** st1 \{v0\.b\}\[9\], \[x0\]
> +** ret
> +*/
> +TEST (int8x16x3_t, 0, 9)
> +
> +/*
> +** test_int16x8x3_t:
> +** st1 \{v2\.h\}\[6\], \[x0\]
> +** ret
> +*/
> +TEST (int16x8x3_t, 2, 6)
> +
> +/*
> +** test_int32x4x3_t:
> +** st1 \{v1\.s\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (int32x4x3_t, 1, 3)
> +
> +/*
> +** test_int64x2x3_t:
> +** st1 \{v2\.d\}\[1\], \[x0\]
> +** ret
> +*/
> +TEST (int64x2x3_t, 2, 1)
> +
> +/*
> +** test_uint8x16x3_t:
> +** st1 \{v1\.b\}\[10\], \[x0\]
> +** ret
> +*/
> +TEST (uint8x16x3_t, 1, 10)
> +
> +/*
> +** test_uint16x8x3_t:
> +** st1 \{v2\.h\}\[5\], \[x0\]
> +** ret
> +*/
> +TEST (uint16x8x3_t, 2, 5)
> +
> +/*
> +** test_uint32x4x3_t:
> +** st1 \{v2\.s\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (uint32x4x3_t, 2, 3)
> +
> +/*
> +** test_uint64x2x3_t:
> +** str d1, \[x0\]
> +** ret
> +*/
> +TEST (uint64x2x3_t, 1, 0)
> +
> +//--------------------------------------------------------------
> +
> +/*
> +** test_bfloat16x8x4_t:
> +** st1 \{v2\.h\}\[5\], \[x0\]
> +** ret
> +*/
> +TEST (bfloat16x8x4_t, 2, 5)
> +
> +/*
> +** test_float16x8x4_t:
> +** st1 \{v0\.h\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (float16x8x4_t, 0, 3)
> +
> +/*
> +** test_float32x4x4_t:
> +** st1 \{v3\.s\}\[2\], \[x0\]
> +** ret
> +*/
> +TEST (float32x4x4_t, 3, 2)
> +
> +/*
> +** test_float64x2x4_t:
> +** st1 \{v1\.d\}\[1\], \[x0\]
> +** ret
> +*/
> +TEST (float64x2x4_t, 1, 1)
> +
> +/*
> +** test_int8x16x4_t:
> +** st1 \{v0\.b\}\[14\], \[x0\]
> +** ret
> +*/
> +TEST (int8x16x4_t, 0, 14)
> +
> +/*
> +** test_int16x8x4_t:
> +** st1 \{v3\.h\}\[4\], \[x0\]
> +** ret
> +*/
> +TEST (int16x8x4_t, 3, 4)
> +
> +/*
> +** test_int32x4x4_t:
> +** st1 \{v1\.s\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (int32x4x4_t, 1, 3)
> +
> +/*
> +** test_int64x2x4_t:
> +** str d3, \[x0\]
> +** ret
> +*/
> +TEST (int64x2x4_t, 3, 0)
> +
> +/*
> +** test_uint8x16x4_t:
> +** st1 \{v3\.b\}\[13\], \[x0\]
> +** ret
> +*/
> +TEST (uint8x16x4_t, 3, 13)
> +
> +/*
> +** test_uint16x8x4_t:
> +** st1 \{v3\.h\}\[2\], \[x0\]
> +** ret
> +*/
> +TEST (uint16x8x4_t, 3, 2)
> +
> +/*
> +** test_uint32x4x4_t:
> +** st1 \{v0\.s\}\[3\], \[x0\]
> +** ret
> +*/
> +TEST (uint32x4x4_t, 0, 3)
> +
> +/*
> +** test_uint64x2x4_t:
> +** str d1, \[x0\]
> +** ret
> +*/
> +TEST (uint64x2x4_t, 1, 0)
> -- 
> 2.43.0
>
Re: [PATCH] aarch64: Add vec_set/extract for tuple modes [PR113027]

Reply via email to