[PATCH] aarch64: Add vec_set/extract for tuple modes [PR113027]

Richard Sandiford Mon, 16 Jun 2025 00:55:29 -0700

We generated inefficient code for bitfield references to Advanced
SIMD structure modes.  In RTL, these modes are just extra-long
vectors, and so inserting and extracting an element is simply
a vec_set or vec_extract operation.


For the record, I don't think these modes should ever become fully
fledged vector modes.  We shouldn't provide add, etc. for them.
But vec_set and vec_extract are the vector equivalent of insv
and extv.  From that point of view, they seem closer to moves
than to arithmetic.

Tested on aarch64-linux-gnu.  OK to install?

Richard


gcc/
        PR target/113027
        * config/aarch64/aarch64-protos.h (aarch64_decompose_vec_struct_index):
        Declare.
        * config/aarch64/aarch64.cc (aarch64_decompose_vec_struct_index): New
        function.
        * config/aarch64/iterators.md (VEL, Vel): Add Advanced SIMD
        structure modes.
        * config/aarch64/aarch64-simd.md (vec_set<VSTRUCT_QD:mode>)
        (vec_extract<VSTRUCT_QD:mode>): New patterns.

gcc/testsuite/
        PR target/113027
        * gcc.target/aarch64/pr113027-1.c: New test.
        * gcc.target/aarch64/pr113027-2.c: Likewise.
        * gcc.target/aarch64/pr113027-3.c: Likewise.
        * gcc.target/aarch64/pr113027-4.c: Likewise.
        * gcc.target/aarch64/pr113027-5.c: Likewise.
        * gcc.target/aarch64/pr113027-6.c: Likewise.
        * gcc.target/aarch64/pr113027-7.c: Likewise.
---
 gcc/config/aarch64/aarch64-protos.h           |   1 +
 gcc/config/aarch64/aarch64-simd.md            |  38 +++
 gcc/config/aarch64/aarch64.cc                 |  22 ++
 gcc/config/aarch64/iterators.md               |  48 ++++
 gcc/testsuite/gcc.target/aarch64/pr113027-1.c |  27 ++
 gcc/testsuite/gcc.target/aarch64/pr113027-2.c | 268 ++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/pr113027-3.c | 268 ++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/pr113027-4.c | 268 ++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/pr113027-5.c | 268 ++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/pr113027-6.c | 267 +++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/pr113027-7.c | 267 +++++++++++++++++
 11 files changed, 1742 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113027-1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113027-2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113027-3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113027-4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113027-5.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113027-6.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr113027-7.c

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 8f37e56d440..2c413cc9e22 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1036,6 +1036,7 @@ bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx);
 rtx aarch64_replace_reg_mode (rtx, machine_mode);
 void aarch64_split_sve_subreg_move (rtx, rtx, rtx);
 void aarch64_expand_prologue (void);
+void aarch64_decompose_vec_struct_index (machine_mode, rtx *, rtx *, bool);
 void aarch64_expand_vector_init (rtx, rtx);
 void aarch64_sve_expand_vector_init_subvector (rtx, rtx);
 void aarch64_sve_expand_vector_init (rtx, rtx);
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 6e30dc48934..e771defc73f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1628,6 +1628,24 @@ (define_expand "vec_set<mode>"
   }
 )
 
+(define_expand "vec_set<mode>"
+  [(match_operand:VSTRUCT_QD 0 "register_operand")
+   (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
+   (match_operand:SI 2 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  aarch64_decompose_vec_struct_index (<VSTRUCT_ELT>mode, &operands[0],
+                                     &operands[2], true);
+  /* For tuples of 64-bit modes, <vstruct_elt> is the 64-bit scalar mode.
+     Allow gen_vec_set<vstruct_elt> to cope with those cases too.  */
+  auto gen_vec_setdi ATTRIBUTE_UNUSED = [](rtx x0, rtx x1, rtx)
+    {
+      return gen_move_insn (x0, x1);
+    };
+  auto gen_vec_setdf ATTRIBUTE_UNUSED = gen_vec_setdi;
+  emit_insn (gen_vec_set<vstruct_elt> (operands[0], operands[1], operands[2]));
+  DONE;
+})
 
 (define_insn "aarch64_mla<mode><vczle><vczbe>"
  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
@@ -8883,6 +8901,26 @@ (define_expand "vec_extract<mode><Vel>"
     DONE;
 })
 
+(define_expand "vec_extract<mode><Vel>"
+  [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
+   (match_operand:VSTRUCT_QD 1 "register_operand")
+   (match_operand:SI 2 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  aarch64_decompose_vec_struct_index (<VSTRUCT_ELT>mode, &operands[1],
+                                     &operands[2], false);
+  /* For tuples of 64-bit modes, <vstruct_elt> is the 64-bit scalar mode.
+     Allow gen_vec_extract<vstruct_elt><Vel> to cope with those cases too.  */
+  auto gen_vec_extractdidi ATTRIBUTE_UNUSED = [](rtx x0, rtx x1, rtx)
+    {
+      return gen_move_insn (x0, x1);
+    };
+  auto gen_vec_extractdfdf ATTRIBUTE_UNUSED = gen_vec_extractdidi;
+  emit_insn (gen_vec_extract<vstruct_elt><Vel> (operands[0], operands[1],
+                                               operands[2]));
+  DONE;
+})
+
 ;; Extract a 64-bit vector from one half of a 128-bit vector.
 (define_expand "vec_extract<mode><Vhalf>"
   [(match_operand:<VHALF> 0 "register_operand")
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index c8977b5a948..a4eee4ff56e 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -24658,6 +24658,28 @@ seq_cost_ignoring_scalar_moves (const rtx_insn *seq, 
bool speed)
   return cost;
 }
 
+/* *VECTOR is an Advanced SIMD structure mode and *INDEX is a constant index
+   into it.  Narrow *VECTOR and *INDEX so that they reference a single vector
+   of mode SUBVEC_MODE.  IS_DEST is true if *VECTOR is a destination operand,
+   false if it is a source operand.  */
+
+void
+aarch64_decompose_vec_struct_index (machine_mode subvec_mode,
+                                   rtx *vector, rtx *index, bool is_dest)
+{
+  auto elts_per_vector = GET_MODE_NUNITS (subvec_mode).to_constant ();
+  auto subvec = UINTVAL (*index) / elts_per_vector;
+  auto subelt = UINTVAL (*index) % elts_per_vector;
+  auto subvec_byte = subvec * GET_MODE_SIZE (subvec_mode);
+  if (is_dest)
+    *vector = simplify_gen_subreg (subvec_mode, *vector, GET_MODE (*vector),
+                                  subvec_byte);
+  else
+    *vector = force_subreg (subvec_mode, *vector, GET_MODE (*vector),
+                           subvec_byte);
+  *index = gen_int_mode (subelt, SImode);
+}
+
 /* Expand a vector initialization sequence, such that TARGET is
    initialized to contain VALS.  */
 
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 146453b0516..033a88a8467 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1678,6 +1678,30 @@ (define_mode_attr VEL [(V8QI  "QI") (V16QI "QI")
                       (SI   "SI") (HI    "HI")
                       (QI   "QI")
                       (V4BF "BF") (V8BF "BF")
+                      (V2x8QI "QI") (V2x4HI "HI")
+                      (V2x2SI "SI") (V2x1DI "DI")
+                      (V2x4HF "HF") (V2x2SF "SF")
+                      (V2x1DF "DF") (V2x4BF "BF")
+                      (V3x8QI "QI") (V3x4HI "HI")
+                      (V3x2SI "SI") (V3x1DI "DI")
+                      (V3x4HF "HF") (V3x2SF "SF")
+                      (V3x1DF "DF") (V3x4BF "BF")
+                      (V4x8QI "QI") (V4x4HI "HI")
+                      (V4x2SI "SI") (V4x1DI "DI")
+                      (V4x4HF "HF") (V4x2SF "SF")
+                      (V4x1DF "DF") (V4x4BF "BF")
+                      (V2x16QI "QI") (V2x8HI "HI")
+                      (V2x4SI "SI") (V2x2DI "DI")
+                      (V2x8HF "HF") (V2x4SF "SF")
+                      (V2x2DF "DF") (V2x8BF "BF")
+                      (V3x16QI "QI") (V3x8HI "HI")
+                      (V3x4SI "SI") (V3x2DI "DI")
+                      (V3x8HF "HF") (V3x4SF "SF")
+                      (V3x2DF "DF") (V3x8BF "BF")
+                      (V4x16QI "QI") (V4x8HI "HI")
+                      (V4x4SI "SI") (V4x2DI "DI")
+                      (V4x8HF "HF") (V4x4SF "SF")
+                      (V4x2DF "DF") (V4x8BF "BF")
                       (VNx16QI "QI") (VNx8QI "QI") (VNx4QI "QI") (VNx2QI "QI")
                       (VNx8HI "HI") (VNx4HI "HI") (VNx2HI "HI")
                       (VNx8HF "HF") (VNx4HF "HF") (VNx2HF "HF")
@@ -1699,6 +1723,30 @@ (define_mode_attr Vel [(V8QI "qi") (V16QI "qi")
                       (DF   "df") (SI   "si")
                       (HI   "hi") (QI   "qi")
                       (V4BF "bf") (V8BF "bf")
+                      (V2x8QI "qi") (V2x4HI "hi")
+                      (V2x2SI "si") (V2x1DI "di")
+                      (V2x4HF "hf") (V2x2SF "sf")
+                      (V2x1DF "df") (V2x4BF "bf")
+                      (V3x8QI "qi") (V3x4HI "hi")
+                      (V3x2SI "si") (V3x1DI "di")
+                      (V3x4HF "hf") (V3x2SF "sf")
+                      (V3x1DF "df") (V3x4BF "bf")
+                      (V4x8QI "qi") (V4x4HI "hi")
+                      (V4x2SI "si") (V4x1DI "di")
+                      (V4x4HF "hf") (V4x2SF "sf")
+                      (V4x1DF "df") (V4x4BF "bf")
+                      (V2x16QI "qi") (V2x8HI "hi")
+                      (V2x4SI "si") (V2x2DI "di")
+                      (V2x8HF "hf") (V2x4SF "sf")
+                      (V2x2DF "df") (V2x8BF "bf")
+                      (V3x16QI "qi") (V3x8HI "hi")
+                      (V3x4SI "si") (V3x2DI "di")
+                      (V3x8HF "hf") (V3x4SF "sf")
+                      (V3x2DF "df") (V3x8BF "bf")
+                      (V4x16QI "qi") (V4x8HI "hi")
+                      (V4x4SI "si") (V4x2DI "di")
+                      (V4x8HF "hf") (V4x4SF "sf")
+                      (V4x2DF "df") (V4x8BF "bf")
                       (VNx16QI "qi") (VNx8QI "qi") (VNx4QI "qi") (VNx2QI "qi")
                       (VNx8HI "hi") (VNx4HI "hi") (VNx2HI "hi")
                       (VNx8HF "hf") (VNx4HF "hf") (VNx2HF "hf")
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-1.c 
b/gcc/testsuite/gcc.target/aarch64/pr113027-1.c
new file mode 100644
index 00000000000..6d9a51fd408
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113027-1.c
@@ -0,0 +1,27 @@
+/* { dg-options "-O2" } */
+
+#include <arm_neon.h>
+
+float64x2x2_t
+f1 (float64x2x2_t x)
+{
+  x.val[0][1] += 1.0;
+  return x;
+}
+
+float64x2x3_t
+f2 (float64x2x3_t x)
+{
+  x.val[0][0] = x.val[1][1] + x.val[2][0];
+  return x;
+}
+
+float64x2x4_t
+f3 (float64x2x4_t x)
+{
+  x.val[0][0] = x.val[1][1] + x.val[2][0] - x.val[3][1];
+  return x;
+}
+
+/* { dg-final { scan-assembler-not {\tmov\t} } } */
+/* { dg-final { scan-assembler-not {\[sp,} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-2.c 
b/gcc/testsuite/gcc.target/aarch64/pr113027-2.c
new file mode 100644
index 00000000000..ec756ec86e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113027-2.c
@@ -0,0 +1,268 @@
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target 
aarch64_little_endian } } } */
+
+#include <arm_neon.h>
+
+#define TEST(TYPE, A, B, C, D)                 \
+  TYPE                                         \
+  test_##TYPE (TYPE a)                         \
+  {                                            \
+    a.val[A][B] = a.val[C][D];                 \
+    return a;                                  \
+  }
+
+/*
+** test_bfloat16x4x2_t:
+**     ins     v1\.h\[3\], v0\.h\[2\]
+**     ret
+*/
+TEST (bfloat16x4x2_t, 1, 3, 0, 2)
+
+/*
+** test_float16x4x2_t:
+**     ins     v1\.h\[1\], v0\.h\[3\]
+**     ret
+*/
+TEST (float16x4x2_t, 1, 1, 0, 3)
+
+/*
+** test_float32x2x2_t:
+**     ins     v1\.s\[0\], v0\.s\[1\]
+**     ret
+*/
+TEST (float32x2x2_t, 1, 0, 0, 1)
+
+/*
+** test_float64x1x2_t:
+**     fmov    d1, d0
+**     ret
+*/
+TEST (float64x1x2_t, 1, 0, 0, 0)
+
+/*
+** test_int8x8x2_t:
+**     ins     v0\.b\[5\], v1\.b\[7\]
+**     ret
+*/
+TEST (int8x8x2_t, 0, 5, 1, 7)
+
+/*
+** test_int16x4x2_t:
+**     ins     v0\.h\[2\], v1\.h\[2\]
+**     ret
+*/
+TEST (int16x4x2_t, 0, 2, 1, 2)
+
+/*
+** test_int32x2x2_t:
+**     ins     v0\.s\[0\], v1\.s\[1\]
+**     ret
+*/
+TEST (int32x2x2_t, 0, 0, 1, 1)
+
+/*
+** test_int64x1x2_t:
+**     fmov    d0, d1
+**     ret
+*/
+TEST (int64x1x2_t, 0, 0, 1, 0)
+
+/*
+** test_uint8x8x2_t:
+**     ins     v1\.b\[6\], v0\.b\[3\]
+**     ret
+*/
+TEST (uint8x8x2_t, 1, 6, 0, 3)
+
+/*
+** test_uint16x4x2_t:
+**     ins     v1\.h\[2\], v1\.h\[0\]
+**     ret
+*/
+TEST (uint16x4x2_t, 1, 2, 1, 0)
+
+/*
+** test_uint32x2x2_t:
+**     ins     v1\.s\[0\], v1\.s\[1\]
+**     ret
+*/
+TEST (uint32x2x2_t, 1, 0, 1, 1)
+
+/*
+** test_uint64x1x2_t:
+**     fmov    d1, d0
+**     ret
+*/
+TEST (uint64x1x2_t, 1, 0, 0, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x4x3_t:
+**     ins     v2\.h\[3\], v0\.h\[2\]
+**     ret
+*/
+TEST (bfloat16x4x3_t, 2, 3, 0, 2)
+
+/*
+** test_float16x4x3_t:
+**     ins     v0\.h\[1\], v1\.h\[3\]
+**     ret
+*/
+TEST (float16x4x3_t, 0, 1, 1, 3)
+
+/*
+** test_float32x2x3_t:
+**     ins     v1\.s\[0\], v2\.s\[1\]
+**     ret
+*/
+TEST (float32x2x3_t, 1, 0, 2, 1)
+
+/*
+** test_float64x1x3_t:
+**     fmov    d1, d2
+**     ret
+*/
+TEST (float64x1x3_t, 1, 0, 2, 0)
+
+/*
+** test_int8x8x3_t:
+**     ins     v0\.b\[5\], v2\.b\[6\]
+**     ret
+*/
+TEST (int8x8x3_t, 0, 5, 2, 6)
+
+/*
+** test_int16x4x3_t:
+**     ins     v2\.h\[2\], v1\.h\[1\]
+**     ret
+*/
+TEST (int16x4x3_t, 2, 2, 1, 1)
+
+/*
+** test_int32x2x3_t:
+**     ins     v1\.s\[0\], v1\.s\[1\]
+**     ret
+*/
+TEST (int32x2x3_t, 1, 0, 1, 1)
+
+/*
+** test_int64x1x3_t:
+**     fmov    d2, d1
+**     ret
+*/
+TEST (int64x1x3_t, 2, 0, 1, 0)
+
+/*
+** test_uint8x8x3_t:
+**     ins     v1\.b\[6\], v2\.b\[7\]
+**     ret
+*/
+TEST (uint8x8x3_t, 1, 6, 2, 7)
+
+/*
+** test_uint16x4x3_t:
+**     ins     v2\.h\[2\], v1\.h\[3\]
+**     ret
+*/
+TEST (uint16x4x3_t, 2, 2, 1, 3)
+
+/*
+** test_uint32x2x3_t:
+**     ins     v2\.s\[0\], v0\.s\[1\]
+**     ret
+*/
+TEST (uint32x2x3_t, 2, 0, 0, 1)
+
+/*
+** test_uint64x1x3_t:
+**     fmov    d1, d2
+**     ret
+*/
+TEST (uint64x1x3_t, 1, 0, 2, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x4x4_t:
+**     ins     v2\.h\[3\], v3\.h\[2\]
+**     ret
+*/
+TEST (bfloat16x4x4_t, 2, 3, 3, 2)
+
+/*
+** test_float16x4x4_t:
+**     ins     v0\.h\[2\], v3\.h\[1\]
+**     ret
+*/
+TEST (float16x4x4_t, 0, 2, 3, 1)
+
+/*
+** test_float32x2x4_t:
+**     ins     v3\.s\[0\], v2\.s\[1\]
+**     ret
+*/
+TEST (float32x2x4_t, 3, 0, 2, 1)
+
+/*
+** test_float64x1x4_t:
+**     fmov    d1, d3
+**     ret
+*/
+TEST (float64x1x4_t, 1, 0, 3, 0)
+
+/*
+** test_int8x8x4_t:
+**     ins     v0\.b\[4\], v3\.b\[7\]
+**     ret
+*/
+TEST (int8x8x4_t, 0, 4, 3, 7)
+
+/*
+** test_int16x4x4_t:
+**     ins     v3\.h\[3\], v1\.h\[1\]
+**     ret
+*/
+TEST (int16x4x4_t, 3, 3, 1, 1)
+
+/*
+** test_int32x2x4_t:
+**     ins     v1\.s\[0\], v3\.s\[1\]
+**     ret
+*/
+TEST (int32x2x4_t, 1, 0, 3, 1)
+
+/*
+** test_int64x1x4_t:
+**     fmov    d3, d1
+**     ret
+*/
+TEST (int64x1x4_t, 3, 0, 1, 0)
+
+/*
+** test_uint8x8x4_t:
+**     ins     v3\.b\[6\], v2\.b\[4\]
+**     ret
+*/
+TEST (uint8x8x4_t, 3, 6, 2, 4)
+
+/*
+** test_uint16x4x4_t:
+**     ins     v3\.h\[1\], v1\.h\[3\]
+**     ret
+*/
+TEST (uint16x4x4_t, 3, 1, 1, 3)
+
+/*
+** test_uint32x2x4_t:
+**     ins     v0\.s\[0\], v3\.s\[1\]
+**     ret
+*/
+TEST (uint32x2x4_t, 0, 0, 3, 1)
+
+/*
+** test_uint64x1x4_t:
+**     fmov    d1, d3
+**     ret
+*/
+TEST (uint64x1x4_t, 1, 0, 3, 0)
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-3.c 
b/gcc/testsuite/gcc.target/aarch64/pr113027-3.c
new file mode 100644
index 00000000000..561e6721a80
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113027-3.c
@@ -0,0 +1,268 @@
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target 
aarch64_little_endian } } } */
+
+#include <arm_neon.h>
+
+#define TEST(TYPE, A, B, C, D)                 \
+  TYPE                                         \
+  test_##TYPE (TYPE a)                         \
+  {                                            \
+    a.val[A][B] = a.val[C][D];                 \
+    return a;                                  \
+  }
+
+/*
+** test_bfloat16x8x2_t:
+**     ins     v1\.h\[6\], v0\.h\[5\]
+**     ret
+*/
+TEST (bfloat16x8x2_t, 1, 6, 0, 5)
+
+/*
+** test_float16x8x2_t:
+**     ins     v1\.h\[2\], v0\.h\[7\]
+**     ret
+*/
+TEST (float16x8x2_t, 1, 2, 0, 7)
+
+/*
+** test_float32x4x2_t:
+**     ins     v1\.s\[3\], v0\.s\[1\]
+**     ret
+*/
+TEST (float32x4x2_t, 1, 3, 0, 1)
+
+/*
+** test_float64x2x2_t:
+**     ins     v1\.d\[0\], v0\.d\[0\]
+**     ret
+*/
+TEST (float64x2x2_t, 1, 0, 0, 0)
+
+/*
+** test_int8x16x2_t:
+**     ins     v0\.b\[15\], v1\.b\[13\]
+**     ret
+*/
+TEST (int8x16x2_t, 0, 15, 1, 13)
+
+/*
+** test_int16x8x2_t:
+**     ins     v0\.h\[2\], v1\.h\[7\]
+**     ret
+*/
+TEST (int16x8x2_t, 0, 2, 1, 7)
+
+/*
+** test_int32x4x2_t:
+**     ins     v0\.s\[3\], v1\.s\[1\]
+**     ret
+*/
+TEST (int32x4x2_t, 0, 3, 1, 1)
+
+/*
+** test_int64x2x2_t:
+**     ins     v0\.d\[0\], v1\.d\[1\]
+**     ret
+*/
+TEST (int64x2x2_t, 0, 0, 1, 1)
+
+/*
+** test_uint8x16x2_t:
+**     ins     v1\.b\[13\], v0\.b\[11\]
+**     ret
+*/
+TEST (uint8x16x2_t, 1, 13, 0, 11)
+
+/*
+** test_uint16x8x2_t:
+**     ins     v1\.h\[6\], v1\.h\[3\]
+**     ret
+*/
+TEST (uint16x8x2_t, 1, 6, 1, 3)
+
+/*
+** test_uint32x4x2_t:
+**     ins     v1\.s\[3\], v1\.s\[1\]
+**     ret
+*/
+TEST (uint32x4x2_t, 1, 3, 1, 1)
+
+/*
+** test_uint64x2x2_t:
+**     ins     v1\.d\[0\], v1\.d\[1\]
+**     ret
+*/
+TEST (uint64x2x2_t, 1, 0, 1, 1)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x8x3_t:
+**     ins     v2\.h\[3\], v0\.h\[7\]
+**     ret
+*/
+TEST (bfloat16x8x3_t, 2, 3, 0, 7)
+
+/*
+** test_float16x8x3_t:
+**     ins     v0\.h\[4\], v1\.h\[6\]
+**     ret
+*/
+TEST (float16x8x3_t, 0, 4, 1, 6)
+
+/*
+** test_float32x4x3_t:
+**     ins     v1\.s\[2\], v2\.s\[1\]
+**     ret
+*/
+TEST (float32x4x3_t, 1, 2, 2, 1)
+
+/*
+** test_float64x2x3_t:
+**     ins     v1\.d\[0\], v2\.d\[1\]
+**     ret
+*/
+TEST (float64x2x3_t, 1, 0, 2, 1)
+
+/*
+** test_int8x16x3_t:
+**     ins     v0\.b\[9\], v2\.b\[14\]
+**     ret
+*/
+TEST (int8x16x3_t, 0, 9, 2, 14)
+
+/*
+** test_int16x8x3_t:
+**     ins     v2\.h\[6\], v1\.h\[3\]
+**     ret
+*/
+TEST (int16x8x3_t, 2, 6, 1, 3)
+
+/*
+** test_int32x4x3_t:
+**     ins     v1\.s\[3\], v1\.s\[1\]
+**     ret
+*/
+TEST (int32x4x3_t, 1, 3, 1, 1)
+
+/*
+** test_int64x2x3_t:
+**     ins     v2\.d\[1\], v1\.d\[0\]
+**     ret
+*/
+TEST (int64x2x3_t, 2, 1, 1, 0)
+
+/*
+** test_uint8x16x3_t:
+**     ins     v1\.b\[10\], v2\.b\[8\]
+**     ret
+*/
+TEST (uint8x16x3_t, 1, 10, 2, 8)
+
+/*
+** test_uint16x8x3_t:
+**     ins     v2\.h\[5\], v1\.h\[2\]
+**     ret
+*/
+TEST (uint16x8x3_t, 2, 5, 1, 2)
+
+/*
+** test_uint32x4x3_t:
+**     ins     v2\.s\[3\], v0\.s\[1\]
+**     ret
+*/
+TEST (uint32x4x3_t, 2, 3, 0, 1)
+
+/*
+** test_uint64x2x3_t:
+**     ins     v1\.d\[0\], v2\.d\[1\]
+**     ret
+*/
+TEST (uint64x2x3_t, 1, 0, 2, 1)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x8x4_t:
+**     ins     v2\.h\[5\], v3\.h\[6\]
+**     ret
+*/
+TEST (bfloat16x8x4_t, 2, 5, 3, 6)
+
+/*
+** test_float16x8x4_t:
+**     ins     v0\.h\[3\], v3\.h\[5\]
+**     ret
+*/
+TEST (float16x8x4_t, 0, 3, 3, 5)
+
+/*
+** test_float32x4x4_t:
+**     ins     v3\.s\[2\], v2\.s\[1\]
+**     ret
+*/
+TEST (float32x4x4_t, 3, 2, 2, 1)
+
+/*
+** test_float64x2x4_t:
+**     ins     v1\.d\[1\], v3\.d\[0\]
+**     ret
+*/
+TEST (float64x2x4_t, 1, 1, 3, 0)
+
+/*
+** test_int8x16x4_t:
+**     ins     v0\.b\[14\], v3\.b\[10\]
+**     ret
+*/
+TEST (int8x16x4_t, 0, 14, 3, 10)
+
+/*
+** test_int16x8x4_t:
+**     ins     v3\.h\[4\], v1\.h\[6\]
+**     ret
+*/
+TEST (int16x8x4_t, 3, 4, 1, 6)
+
+/*
+** test_int32x4x4_t:
+**     ins     v1\.s\[3\], v3\.s\[1\]
+**     ret
+*/
+TEST (int32x4x4_t, 1, 3, 3, 1)
+
+/*
+** test_int64x2x4_t:
+**     ins     v3\.d\[0\], v2\.d\[0\]
+**     ret
+*/
+TEST (int64x2x4_t, 3, 0, 2, 0)
+
+/*
+** test_uint8x16x4_t:
+**     ins     v3\.b\[13\], v2\.b\[6\]
+**     ret
+*/
+TEST (uint8x16x4_t, 3, 13, 2, 6)
+
+/*
+** test_uint16x8x4_t:
+**     ins     v3\.h\[2\], v1\.h\[7\]
+**     ret
+*/
+TEST (uint16x8x4_t, 3, 2, 1, 7)
+
+/*
+** test_uint32x4x4_t:
+**     ins     v0\.s\[3\], v3\.s\[2\]
+**     ret
+*/
+TEST (uint32x4x4_t, 0, 3, 3, 2)
+
+/*
+** test_uint64x2x4_t:
+**     ins     v1\.d\[0\], v3\.d\[1\]
+**     ret
+*/
+TEST (uint64x2x4_t, 1, 0, 3, 1)
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-4.c 
b/gcc/testsuite/gcc.target/aarch64/pr113027-4.c
new file mode 100644
index 00000000000..67f45dfa4f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113027-4.c
@@ -0,0 +1,268 @@
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target 
aarch64_little_endian } } } */
+
+#include <arm_neon.h>
+
+#define TEST(TYPE, A, B)               \
+  TYPE                                 \
+  test_##TYPE (TYPE a, TYPE *ptr)      \
+  {                                    \
+    a.val[A][B] = ptr->val[0][0];      \
+    return a;                          \
+  }
+
+/*
+** test_bfloat16x4x2_t:
+**     ld1     \{v1\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x4x2_t, 1, 3)
+
+/*
+** test_float16x4x2_t:
+**     ld1     \{v1\.h\}\[1\], \[x0\]
+**     ret
+*/
+TEST (float16x4x2_t, 1, 1)
+
+/*
+** test_float32x2x2_t:
+**     ld1     \{v1\.s\}\[0\], \[x0\]
+**     ret
+*/
+TEST (float32x2x2_t, 1, 0)
+
+/*
+** test_float64x1x2_t:
+**     ldr     d1, \[x0\]
+**     ret
+*/
+TEST (float64x1x2_t, 1, 0)
+
+/*
+** test_int8x8x2_t:
+**     ld1     \{v0\.b\}\[5\], \[x0\]
+**     ret
+*/
+TEST (int8x8x2_t, 0, 5)
+
+/*
+** test_int16x4x2_t:
+**     ld1     \{v0\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (int16x4x2_t, 0, 2)
+
+/*
+** test_int32x2x2_t:
+**     ld1     \{v0\.s\}\[0\], \[x0\]
+**     ret
+*/
+TEST (int32x2x2_t, 0, 0)
+
+/*
+** test_int64x1x2_t:
+**     ldr     d0, \[x0\]
+**     ret
+*/
+TEST (int64x1x2_t, 0, 0)
+
+/*
+** test_uint8x8x2_t:
+**     ld1     \{v1\.b\}\[6\], \[x0\]
+**     ret
+*/
+TEST (uint8x8x2_t, 1, 6)
+
+/*
+** test_uint16x4x2_t:
+**     ld1     \{v1\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (uint16x4x2_t, 1, 2)
+
+/*
+** test_uint32x2x2_t:
+**     ld1     \{v1\.s\}\[0\], \[x0\]
+**     ret
+*/
+TEST (uint32x2x2_t, 1, 0)
+
+/*
+** test_uint64x1x2_t:
+**     ldr     d1, \[x0\]
+**     ret
+*/
+TEST (uint64x1x2_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x4x3_t:
+**     ld1     \{v2\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x4x3_t, 2, 3)
+
+/*
+** test_float16x4x3_t:
+**     ld1     \{v0\.h\}\[1\], \[x0\]
+**     ret
+*/
+TEST (float16x4x3_t, 0, 1)
+
+/*
+** test_float32x2x3_t:
+**     ld1     \{v1\.s\}\[0\], \[x0\]
+**     ret
+*/
+TEST (float32x2x3_t, 1, 0)
+
+/*
+** test_float64x1x3_t:
+**     ldr     d1, \[x0\]
+**     ret
+*/
+TEST (float64x1x3_t, 1, 0)
+
+/*
+** test_int8x8x3_t:
+**     ld1     \{v0\.b\}\[5\], \[x0\]
+**     ret
+*/
+TEST (int8x8x3_t, 0, 5)
+
+/*
+** test_int16x4x3_t:
+**     ld1     \{v2\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (int16x4x3_t, 2, 2)
+
+/*
+** test_int32x2x3_t:
+**     ld1     \{v1\.s\}\[0\], \[x0\]
+**     ret
+*/
+TEST (int32x2x3_t, 1, 0)
+
+/*
+** test_int64x1x3_t:
+**     ldr     d2, \[x0\]
+**     ret
+*/
+TEST (int64x1x3_t, 2, 0)
+
+/*
+** test_uint8x8x3_t:
+**     ld1     \{v1\.b\}\[6\], \[x0\]
+**     ret
+*/
+TEST (uint8x8x3_t, 1, 6)
+
+/*
+** test_uint16x4x3_t:
+**     ld1     \{v2\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (uint16x4x3_t, 2, 2)
+
+/*
+** test_uint32x2x3_t:
+**     ld1     \{v2\.s\}\[0\], \[x0\]
+**     ret
+*/
+TEST (uint32x2x3_t, 2, 0)
+
+/*
+** test_uint64x1x3_t:
+**     ldr     d1, \[x0\]
+**     ret
+*/
+TEST (uint64x1x3_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x4x4_t:
+**     ld1     \{v2\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x4x4_t, 2, 3)
+
+/*
+** test_float16x4x4_t:
+**     ld1     \{v0\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (float16x4x4_t, 0, 2)
+
+/*
+** test_float32x2x4_t:
+**     ld1     \{v3\.s\}\[0\], \[x0\]
+**     ret
+*/
+TEST (float32x2x4_t, 3, 0)
+
+/*
+** test_float64x1x4_t:
+**     ldr     d1, \[x0\]
+**     ret
+*/
+TEST (float64x1x4_t, 1, 0)
+
+/*
+** test_int8x8x4_t:
+**     ld1     \{v0\.b\}\[4\], \[x0\]
+**     ret
+*/
+TEST (int8x8x4_t, 0, 4)
+
+/*
+** test_int16x4x4_t:
+**     ld1     \{v3\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (int16x4x4_t, 3, 3)
+
+/*
+** test_int32x2x4_t:
+**     ld1     \{v1\.s\}\[0\], \[x0\]
+**     ret
+*/
+TEST (int32x2x4_t, 1, 0)
+
+/*
+** test_int64x1x4_t:
+**     ldr     d3, \[x0\]
+**     ret
+*/
+TEST (int64x1x4_t, 3, 0)
+
+/*
+** test_uint8x8x4_t:
+**     ld1     \{v3\.b\}\[6\], \[x0\]
+**     ret
+*/
+TEST (uint8x8x4_t, 3, 6)
+
+/*
+** test_uint16x4x4_t:
+**     ld1     \{v3\.h\}\[1\], \[x0\]
+**     ret
+*/
+TEST (uint16x4x4_t, 3, 1)
+
+/*
+** test_uint32x2x4_t:
+**     ld1     \{v0\.s\}\[0\], \[x0\]
+**     ret
+*/
+TEST (uint32x2x4_t, 0, 0)
+
+/*
+** test_uint64x1x4_t:
+**     ldr     d1, \[x0\]
+**     ret
+*/
+TEST (uint64x1x4_t, 1, 0)
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-5.c 
b/gcc/testsuite/gcc.target/aarch64/pr113027-5.c
new file mode 100644
index 00000000000..5695ecab8ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113027-5.c
@@ -0,0 +1,268 @@
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target 
aarch64_little_endian } } } */
+
+#include <arm_neon.h>
+
+#define TEST(TYPE, A, B)                       \
+  TYPE                                         \
+  test_##TYPE (TYPE a, TYPE *ptr)              \
+  {                                            \
+    a.val[A][B] = ptr->val[0][0];              \
+    return a;                                  \
+  }
+
+/*
+** test_bfloat16x8x2_t:
+**     ld1     \{v1\.h\}\[6\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x8x2_t, 1, 6)
+
+/*
+** test_float16x8x2_t:
+**     ld1     \{v1\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (float16x8x2_t, 1, 2)
+
+/*
+** test_float32x4x2_t:
+**     ld1     \{v1\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (float32x4x2_t, 1, 3)
+
+/*
+** test_float64x2x2_t:
+**     ld1     \{v1\.d\}\[0\], \[x0\]
+**     ret
+*/
+TEST (float64x2x2_t, 1, 0)
+
+/*
+** test_int8x16x2_t:
+**     ld1     \{v0\.b\}\[15\], \[x0\]
+**     ret
+*/
+TEST (int8x16x2_t, 0, 15)
+
+/*
+** test_int16x8x2_t:
+**     ld1     \{v0\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (int16x8x2_t, 0, 2)
+
+/*
+** test_int32x4x2_t:
+**     ld1     \{v0\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (int32x4x2_t, 0, 3)
+
+/*
+** test_int64x2x2_t:
+**     ld1     \{v0\.d\}\[0\], \[x0\]
+**     ret
+*/
+TEST (int64x2x2_t, 0, 0)
+
+/*
+** test_uint8x16x2_t:
+**     ld1     \{v1\.b\}\[13\], \[x0\]
+**     ret
+*/
+TEST (uint8x16x2_t, 1, 13)
+
+/*
+** test_uint16x8x2_t:
+**     ld1     \{v1\.h\}\[6\], \[x0\]
+**     ret
+*/
+TEST (uint16x8x2_t, 1, 6)
+
+/*
+** test_uint32x4x2_t:
+**     ld1     \{v1\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (uint32x4x2_t, 1, 3)
+
+/*
+** test_uint64x2x2_t:
+**     ld1     \{v1\.d\}\[0\], \[x0\]
+**     ret
+*/
+TEST (uint64x2x2_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x8x3_t:
+**     ld1     \{v2\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x8x3_t, 2, 3)
+
+/*
+** test_float16x8x3_t:
+**     ld1     \{v0\.h\}\[4\], \[x0\]
+**     ret
+*/
+TEST (float16x8x3_t, 0, 4)
+
+/*
+** test_float32x4x3_t:
+**     ld1     \{v1\.s\}\[2\], \[x0\]
+**     ret
+*/
+TEST (float32x4x3_t, 1, 2)
+
+/*
+** test_float64x2x3_t:
+**     ld1     \{v1\.d\}\[0\], \[x0\]
+**     ret
+*/
+TEST (float64x2x3_t, 1, 0)
+
+/*
+** test_int8x16x3_t:
+**     ld1     \{v0\.b\}\[9\], \[x0\]
+**     ret
+*/
+TEST (int8x16x3_t, 0, 9)
+
+/*
+** test_int16x8x3_t:
+**     ld1     \{v2\.h\}\[6\], \[x0\]
+**     ret
+*/
+TEST (int16x8x3_t, 2, 6)
+
+/*
+** test_int32x4x3_t:
+**     ld1     \{v1\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (int32x4x3_t, 1, 3)
+
+/*
+** test_int64x2x3_t:
+**     ld1     \{v2\.d\}\[1\], \[x0\]
+**     ret
+*/
+TEST (int64x2x3_t, 2, 1)
+
+/*
+** test_uint8x16x3_t:
+**     ld1     \{v1\.b\}\[10\], \[x0\]
+**     ret
+*/
+TEST (uint8x16x3_t, 1, 10)
+
+/*
+** test_uint16x8x3_t:
+**     ld1     \{v2\.h\}\[5\], \[x0\]
+**     ret
+*/
+TEST (uint16x8x3_t, 2, 5)
+
+/*
+** test_uint32x4x3_t:
+**     ld1     \{v2\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (uint32x4x3_t, 2, 3)
+
+/*
+** test_uint64x2x3_t:
+**     ld1     \{v1\.d\}\[0\], \[x0\]
+**     ret
+*/
+TEST (uint64x2x3_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x8x4_t:
+**     ld1     \{v2\.h\}\[5\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x8x4_t, 2, 5)
+
+/*
+** test_float16x8x4_t:
+**     ld1     \{v0\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (float16x8x4_t, 0, 3)
+
+/*
+** test_float32x4x4_t:
+**     ld1     \{v3\.s\}\[2\], \[x0\]
+**     ret
+*/
+TEST (float32x4x4_t, 3, 2)
+
+/*
+** test_float64x2x4_t:
+**     ld1     \{v1\.d\}\[1\], \[x0\]
+**     ret
+*/
+TEST (float64x2x4_t, 1, 1)
+
+/*
+** test_int8x16x4_t:
+**     ld1     \{v0\.b\}\[14\], \[x0\]
+**     ret
+*/
+TEST (int8x16x4_t, 0, 14)
+
+/*
+** test_int16x8x4_t:
+**     ld1     \{v3\.h\}\[4\], \[x0\]
+**     ret
+*/
+TEST (int16x8x4_t, 3, 4)
+
+/*
+** test_int32x4x4_t:
+**     ld1     \{v1\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (int32x4x4_t, 1, 3)
+
+/*
+** test_int64x2x4_t:
+**     ld1     \{v3\.d\}\[0\], \[x0\]
+**     ret
+*/
+TEST (int64x2x4_t, 3, 0)
+
+/*
+** test_uint8x16x4_t:
+**     ld1     \{v3\.b\}\[13\], \[x0\]
+**     ret
+*/
+TEST (uint8x16x4_t, 3, 13)
+
+/*
+** test_uint16x8x4_t:
+**     ld1     \{v3\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (uint16x8x4_t, 3, 2)
+
+/*
+** test_uint32x4x4_t:
+**     ld1     \{v0\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (uint32x4x4_t, 0, 3)
+
+/*
+** test_uint64x2x4_t:
+**     ld1     \{v1\.d\}\[0\], \[x0\]
+**     ret
+*/
+TEST (uint64x2x4_t, 1, 0)
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-6.c 
b/gcc/testsuite/gcc.target/aarch64/pr113027-6.c
new file mode 100644
index 00000000000..12d3a38f74b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113027-6.c
@@ -0,0 +1,267 @@
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target 
aarch64_little_endian } } } */
+
+#include <arm_neon.h>
+
+#define TEST(TYPE, A, B)               \
+  void                                 \
+  test_##TYPE (TYPE a, TYPE *ptr)      \
+  {                                    \
+    ptr->val[0][0] = a.val[A][B];      \
+  }
+
+/*
+** test_bfloat16x4x2_t:
+**     st1     \{v1\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x4x2_t, 1, 3)
+
+/*
+** test_float16x4x2_t:
+**     st1     \{v1\.h\}\[1\], \[x0\]
+**     ret
+*/
+TEST (float16x4x2_t, 1, 1)
+
+/*
+** test_float32x2x2_t:
+**     str     s1, \[x0\]
+**     ret
+*/
+TEST (float32x2x2_t, 1, 0)
+
+/*
+** test_float64x1x2_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (float64x1x2_t, 1, 0)
+
+/*
+** test_int8x8x2_t:
+**     st1     \{v0\.b\}\[5\], \[x0\]
+**     ret
+*/
+TEST (int8x8x2_t, 0, 5)
+
+/*
+** test_int16x4x2_t:
+**     st1     \{v0\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (int16x4x2_t, 0, 2)
+
+/*
+** test_int32x2x2_t:
+**     str     s0, \[x0\]
+**     ret
+*/
+TEST (int32x2x2_t, 0, 0)
+
+/*
+** test_int64x1x2_t:
+**     str     d0, \[x0\]
+**     ret
+*/
+TEST (int64x1x2_t, 0, 0)
+
+/*
+** test_uint8x8x2_t:
+**     st1     \{v1\.b\}\[6\], \[x0\]
+**     ret
+*/
+TEST (uint8x8x2_t, 1, 6)
+
+/*
+** test_uint16x4x2_t:
+**     st1     \{v1\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (uint16x4x2_t, 1, 2)
+
+/*
+** test_uint32x2x2_t:
+**     str     s1, \[x0\]
+**     ret
+*/
+TEST (uint32x2x2_t, 1, 0)
+
+/*
+** test_uint64x1x2_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (uint64x1x2_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x4x3_t:
+**     st1     \{v2\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x4x3_t, 2, 3)
+
+/*
+** test_float16x4x3_t:
+**     st1     \{v0\.h\}\[1\], \[x0\]
+**     ret
+*/
+TEST (float16x4x3_t, 0, 1)
+
+/*
+** test_float32x2x3_t:
+**     str     s1, \[x0\]
+**     ret
+*/
+TEST (float32x2x3_t, 1, 0)
+
+/*
+** test_float64x1x3_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (float64x1x3_t, 1, 0)
+
+/*
+** test_int8x8x3_t:
+**     st1     \{v0\.b\}\[5\], \[x0\]
+**     ret
+*/
+TEST (int8x8x3_t, 0, 5)
+
+/*
+** test_int16x4x3_t:
+**     st1     \{v2\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (int16x4x3_t, 2, 2)
+
+/*
+** test_int32x2x3_t:
+**     str     s1, \[x0\]
+**     ret
+*/
+TEST (int32x2x3_t, 1, 0)
+
+/*
+** test_int64x1x3_t:
+**     str     d2, \[x0\]
+**     ret
+*/
+TEST (int64x1x3_t, 2, 0)
+
+/*
+** test_uint8x8x3_t:
+**     st1     \{v1\.b\}\[6\], \[x0\]
+**     ret
+*/
+TEST (uint8x8x3_t, 1, 6)
+
+/*
+** test_uint16x4x3_t:
+**     st1     \{v2\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (uint16x4x3_t, 2, 2)
+
+/*
+** test_uint32x2x3_t:
+**     str     s2, \[x0\]
+**     ret
+*/
+TEST (uint32x2x3_t, 2, 0)
+
+/*
+** test_uint64x1x3_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (uint64x1x3_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x4x4_t:
+**     st1     \{v2\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x4x4_t, 2, 3)
+
+/*
+** test_float16x4x4_t:
+**     st1     \{v0\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (float16x4x4_t, 0, 2)
+
+/*
+** test_float32x2x4_t:
+**     str     s3, \[x0\]
+**     ret
+*/
+TEST (float32x2x4_t, 3, 0)
+
+/*
+** test_float64x1x4_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (float64x1x4_t, 1, 0)
+
+/*
+** test_int8x8x4_t:
+**     st1     \{v0\.b\}\[4\], \[x0\]
+**     ret
+*/
+TEST (int8x8x4_t, 0, 4)
+
+/*
+** test_int16x4x4_t:
+**     st1     \{v3\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (int16x4x4_t, 3, 3)
+
+/*
+** test_int32x2x4_t:
+**     str     s1, \[x0\]
+**     ret
+*/
+TEST (int32x2x4_t, 1, 0)
+
+/*
+** test_int64x1x4_t:
+**     str     d3, \[x0\]
+**     ret
+*/
+TEST (int64x1x4_t, 3, 0)
+
+/*
+** test_uint8x8x4_t:
+**     st1     \{v3\.b\}\[6\], \[x0\]
+**     ret
+*/
+TEST (uint8x8x4_t, 3, 6)
+
+/*
+** test_uint16x4x4_t:
+**     st1     \{v3\.h\}\[1\], \[x0\]
+**     ret
+*/
+TEST (uint16x4x4_t, 3, 1)
+
+/*
+** test_uint32x2x4_t:
+**     str     s0, \[x0\]
+**     ret
+*/
+TEST (uint32x2x4_t, 0, 0)
+
+/*
+** test_uint64x1x4_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (uint64x1x4_t, 1, 0)
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-7.c 
b/gcc/testsuite/gcc.target/aarch64/pr113027-7.c
new file mode 100644
index 00000000000..b3ae1a74f76
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113027-7.c
@@ -0,0 +1,267 @@
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target 
aarch64_little_endian } } } */
+
+#include <arm_neon.h>
+
+#define TEST(TYPE, A, B)                       \
+  void                                         \
+  test_##TYPE (TYPE a, TYPE *ptr)              \
+  {                                            \
+    ptr->val[0][0] = a.val[A][B];              \
+  }
+
+/*
+** test_bfloat16x8x2_t:
+**     st1     \{v1\.h\}\[6\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x8x2_t, 1, 6)
+
+/*
+** test_float16x8x2_t:
+**     st1     \{v1\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (float16x8x2_t, 1, 2)
+
+/*
+** test_float32x4x2_t:
+**     st1     \{v1\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (float32x4x2_t, 1, 3)
+
+/*
+** test_float64x2x2_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (float64x2x2_t, 1, 0)
+
+/*
+** test_int8x16x2_t:
+**     st1     \{v0\.b\}\[15\], \[x0\]
+**     ret
+*/
+TEST (int8x16x2_t, 0, 15)
+
+/*
+** test_int16x8x2_t:
+**     st1     \{v0\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (int16x8x2_t, 0, 2)
+
+/*
+** test_int32x4x2_t:
+**     st1     \{v0\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (int32x4x2_t, 0, 3)
+
+/*
+** test_int64x2x2_t:
+**     str     d0, \[x0\]
+**     ret
+*/
+TEST (int64x2x2_t, 0, 0)
+
+/*
+** test_uint8x16x2_t:
+**     st1     \{v1\.b\}\[13\], \[x0\]
+**     ret
+*/
+TEST (uint8x16x2_t, 1, 13)
+
+/*
+** test_uint16x8x2_t:
+**     st1     \{v1\.h\}\[6\], \[x0\]
+**     ret
+*/
+TEST (uint16x8x2_t, 1, 6)
+
+/*
+** test_uint32x4x2_t:
+**     st1     \{v1\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (uint32x4x2_t, 1, 3)
+
+/*
+** test_uint64x2x2_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (uint64x2x2_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x8x3_t:
+**     st1     \{v2\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x8x3_t, 2, 3)
+
+/*
+** test_float16x8x3_t:
+**     st1     \{v0\.h\}\[4\], \[x0\]
+**     ret
+*/
+TEST (float16x8x3_t, 0, 4)
+
+/*
+** test_float32x4x3_t:
+**     st1     \{v1\.s\}\[2\], \[x0\]
+**     ret
+*/
+TEST (float32x4x3_t, 1, 2)
+
+/*
+** test_float64x2x3_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (float64x2x3_t, 1, 0)
+
+/*
+** test_int8x16x3_t:
+**     st1     \{v0\.b\}\[9\], \[x0\]
+**     ret
+*/
+TEST (int8x16x3_t, 0, 9)
+
+/*
+** test_int16x8x3_t:
+**     st1     \{v2\.h\}\[6\], \[x0\]
+**     ret
+*/
+TEST (int16x8x3_t, 2, 6)
+
+/*
+** test_int32x4x3_t:
+**     st1     \{v1\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (int32x4x3_t, 1, 3)
+
+/*
+** test_int64x2x3_t:
+**     st1     \{v2\.d\}\[1\], \[x0\]
+**     ret
+*/
+TEST (int64x2x3_t, 2, 1)
+
+/*
+** test_uint8x16x3_t:
+**     st1     \{v1\.b\}\[10\], \[x0\]
+**     ret
+*/
+TEST (uint8x16x3_t, 1, 10)
+
+/*
+** test_uint16x8x3_t:
+**     st1     \{v2\.h\}\[5\], \[x0\]
+**     ret
+*/
+TEST (uint16x8x3_t, 2, 5)
+
+/*
+** test_uint32x4x3_t:
+**     st1     \{v2\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (uint32x4x3_t, 2, 3)
+
+/*
+** test_uint64x2x3_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (uint64x2x3_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x8x4_t:
+**     st1     \{v2\.h\}\[5\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x8x4_t, 2, 5)
+
+/*
+** test_float16x8x4_t:
+**     st1     \{v0\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (float16x8x4_t, 0, 3)
+
+/*
+** test_float32x4x4_t:
+**     st1     \{v3\.s\}\[2\], \[x0\]
+**     ret
+*/
+TEST (float32x4x4_t, 3, 2)
+
+/*
+** test_float64x2x4_t:
+**     st1     \{v1\.d\}\[1\], \[x0\]
+**     ret
+*/
+TEST (float64x2x4_t, 1, 1)
+
+/*
+** test_int8x16x4_t:
+**     st1     \{v0\.b\}\[14\], \[x0\]
+**     ret
+*/
+TEST (int8x16x4_t, 0, 14)
+
+/*
+** test_int16x8x4_t:
+**     st1     \{v3\.h\}\[4\], \[x0\]
+**     ret
+*/
+TEST (int16x8x4_t, 3, 4)
+
+/*
+** test_int32x4x4_t:
+**     st1     \{v1\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (int32x4x4_t, 1, 3)
+
+/*
+** test_int64x2x4_t:
+**     str     d3, \[x0\]
+**     ret
+*/
+TEST (int64x2x4_t, 3, 0)
+
+/*
+** test_uint8x16x4_t:
+**     st1     \{v3\.b\}\[13\], \[x0\]
+**     ret
+*/
+TEST (uint8x16x4_t, 3, 13)
+
+/*
+** test_uint16x8x4_t:
+**     st1     \{v3\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (uint16x8x4_t, 3, 2)
+
+/*
+** test_uint32x4x4_t:
+**     st1     \{v0\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (uint32x4x4_t, 0, 3)
+
+/*
+** test_uint64x2x4_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (uint64x2x4_t, 1, 0)
-- 
2.43.0

[PATCH] aarch64: Add vec_set/extract for tuple modes [PR113027]

Reply via email to