[PATCH] aarch64: use ZIP1 instead of UZP1 for concatenation [PR125550]

Artemiy Volkov Mon, 08 Jun 2026 05:47:04 -0700

This patch addresses the issue in PR125550, where two float16 values are
being concatenated using uzp1, i.e., this code:


svfloat16_t foo (float x0, float x1)
{
  return svdupq_n_f16 (x0, x1, x0, x1, x0, x1, x0, x1);
}

is being compiled into:

        fcvt    h0, s0
        fcvt    h1, s1
        uzp1    v0.4h, v0.4h, v1.4h
        mov     z0.s, s0
        ret

causing the duplication of a 2-element vector (0, (float16) x0) into z0.

This is a copy-paste error from the original combine_internal patterns,
where UZP1 always operates on vectors of 2 elements, in which circumstance
it is equivalent to ZIP1.  For smaller element sizes (and thus higher
element counts) only ZIP1 is correct.

The fix is to emit ZIP1 when concatenating values on vector registers.
For consistency, I've changed the original combine_internal patterns as
well as the ones added in r17-898-g920eeb67a3537b.  Since this latter
change has nothing to do with the PR, it could have been better to split
the patch in two; I'd be happy to do that if necessary.

Both aforementioned changes required adjusting existing AdvSIMD/SVE
vec_init-related testcases; I've added pr125550.c from the PR on top of
that as well.

Bootstrapped and regtested on aarch64-linux-gnu.

        PR target/125550

gcc/ChangeLog:

        * config/aarch64/aarch64-simd.md
        (*aarch64_combine_internal<mode>): Use zip1 instead of uzp1
        to concatenate values residing in SIMD registers.
        (*aarch64_combine_internal_be<mode>: Likewise.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/ldp_stp_16.c: Adjust testcases.
        * gcc.target/aarch64/pr109072_1.c: Likewise.
        * gcc.target/aarch64/simd/mf8_data_1.c: Likewise.
        * gcc.target/aarch64/sve/vec_init_5.c: Likewise.
        * gcc.target/aarch64/vec-init-14.c: Likewise.
        * gcc.target/aarch64/vec-init-23.c: Likewise.
        * gcc.target/aarch64/vec-init-9.c: Likewise.
        * gcc.target/aarch64/sve/pr125550.c: New test.
---
 gcc/config/aarch64/aarch64-simd.md            |  8 +++---
 gcc/testsuite/gcc.target/aarch64/ldp_stp_16.c | 10 +++----
 gcc/testsuite/gcc.target/aarch64/pr109072_1.c |  4 +--
 .../gcc.target/aarch64/simd/mf8_data_1.c      | 18 ++++++-------
 .../gcc.target/aarch64/sve/pr125550.c         | 19 ++++++++++++++
 .../gcc.target/aarch64/sve/vec_init_5.c       | 26 +++++++++----------
 .../gcc.target/aarch64/vec-init-14.c          |  4 +--
 .../gcc.target/aarch64/vec-init-23.c          | 26 +++++++++----------
 gcc/testsuite/gcc.target/aarch64/vec-init-9.c | 12 ++++-----
 9 files changed, 73 insertions(+), 54 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr125550.c

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 843ad6cb076..b2e8fe3f6a9 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4869,7 +4869,7 @@
    && (register_operand (operands[0], <VDBL>mode)
        || register_operand (operands[2], <MODE>mode))"
   {@ [ cons: =0 , 1  , 2   ; attrs: type               , arch  ]
-     [ w        , w  , w   ; neon_permute<dblq>        , simd  ] 
uzp1\t%0.2<single_type>, %1.2<single_type>, %2.2<single_type>
+     [ w        , w  , w   ; neon_permute<dblq>        , simd  ] 
zip1\t%0.2<single_type>, %1.2<single_type>, %2.2<single_type>
      [ w        , 0  , ?r  ; neon_from_gp<dblq>        , simd  ] 
ins\t%0.<single_type>[1], %<single_wx>2
      [ w        , 0  , ?r  ; f_mcr                     , *     ] 
fmov\t%0.d[1], %2
      [ w        , 0  , Utv ; neon_load1_one_lane<dblq> , simd  ] 
ld1\t{%0.<single_type>}[1], %2
@@ -4886,7 +4886,7 @@
   "TARGET_FLOAT
    && !BYTES_BIG_ENDIAN"
   {@ [ cons: =0 , 1  , 2   ; attrs: type               , arch  ]
-     [ w        , w  , w   ; neon_permute              , simd  ] 
uzp1\t%0.<Vdduptype>, %1.<Vdduptype>, %2.<Vdduptype>
+     [ w        , w  , w   ; neon_permute              , simd  ] 
zip1\t%0.<Vdduptype>, %1.<Vdduptype>, %2.<Vdduptype>
      [ w        , 0  , w   ; neon_move                 , simd  ] 
mov\t%0.<single_type>[1], %2.<single_type>[0]
      [ w        , 0  , Utv ; neon_load1_one_lane       , simd  ] 
ld1\t{%0.<single_type>}[1], %2
      [ w        , 0  , r   ; neon_from_gp              , simd  ] 
ins\t%0.<single_type>[1], %<single_wx>2
@@ -4916,7 +4916,7 @@
    && (register_operand (operands[0], <VDBL>mode)
        || register_operand (operands[2], <MODE>mode))"
   {@ [ cons: =0 , 1  , 2   ; attrs: type               , arch  ]
-     [ w        , w  , w   ; neon_permute<dblq>        , simd  ] 
uzp1\t%0.2<single_type>, %1.2<single_type>, %2.2<single_type>
+     [ w        , w  , w   ; neon_permute<dblq>        , simd  ] 
zip1\t%0.2<single_type>, %1.2<single_type>, %2.2<single_type>
      [ w        , 0  , ?r  ; neon_from_gp<dblq>        , simd  ] 
ins\t%0.<single_type>[1], %<single_wx>2
      [ w        , 0  , ?r  ; f_mcr                     , *     ] 
fmov\t%0.d[1], %2
      [ w        , 0  , Utv ; neon_load1_one_lane<dblq> , simd  ] 
ld1\t{%0.<single_type>}[1], %2
@@ -4933,7 +4933,7 @@
   "TARGET_FLOAT
    && BYTES_BIG_ENDIAN"
   {@ [ cons: =0 , 1  , 2   ; attrs: type               , arch  ]
-     [ w        , w  , w   ; neon_permute              , simd  ] 
uzp1\t%0.<Vdduptype>, %1.<Vdduptype>, %2.<Vdduptype>
+     [ w        , w  , w   ; neon_permute              , simd  ] 
zip1\t%0.<Vdduptype>, %1.<Vdduptype>, %2.<Vdduptype>
      [ w        , 0  , w   ; neon_move                 , simd  ] 
mov\t%0.<single_type>[1], %2.<single_type>[0]
      [ w        , 0  , Utv ; neon_load1_one_lane       , simd  ] 
ld1\t{%0.<single_type>}[1], %2
      [ w        , 0  , r   ; neon_from_gp              , simd  ] 
ins\t%0.<single_type>[1], %<single_wx>2
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_16.c 
b/gcc/testsuite/gcc.target/aarch64/ldp_stp_16.c
index a6b4d50f34f..e8c975e900f 100644
--- a/gcc/testsuite/gcc.target/aarch64/ldp_stp_16.c
+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_16.c
@@ -80,14 +80,14 @@ CONS2_FN (2, float);
 
 /*
 ** cons2_4_float:      { target aarch64_little_endian }
-**     uzp1    v([0-9])\.2s, v0\.2s, v1\.2s
+**     zip1    v([0-9])\.2s, v0\.2s, v1\.2s
 **     stp     d\1, d\1, \[x0\]
 **     stp     d\1, d\1, \[x0, #?16\]
 **     ret
 */
 /*
 ** cons2_4_float:      { target aarch64_big_endian }
-**     uzp1    v([0-9])\.2s, v1\.2s, v0\.2s
+**     zip1    v([0-9])\.2s, v1\.2s, v0\.2s
 **     stp     d\1, d\1, \[x0\]
 **     stp     d\1, d\1, \[x0, #?16\]
 **     ret
@@ -96,7 +96,7 @@ CONS2_FN (4, float);
 
 /*
 ** cons2_8_float:
-**     uzp1    v1\.2s, v0\.2s, v1\.2s
+**     zip1    v1\.2s, v0\.2s, v1\.2s
 **     dup     v([0-9]+)\.2d, v1\.d\[0\]
 **     stp     q\1, q\1, \[x0\]
 **     stp     q\1, q\1, \[x0, #?32\]
@@ -124,8 +124,8 @@ CONS4_FN (2, float);
 
 /*
 ** cons4_4_float:
-**     uzp1    v[0-9]+\.2s[^\n]+
-**     uzp1    v[0-9]+\.2s[^\n]+
+**     zip1    v[0-9]+\.2s[^\n]+
+**     zip1    v[0-9]+\.2s[^\n]+
 **     zip1    v([0-9]+).4s, [^\n]+
 **     stp     q\1, q\1, \[x0\]
 **     stp     q\1, q\1, \[x0, #?32\]
diff --git a/gcc/testsuite/gcc.target/aarch64/pr109072_1.c 
b/gcc/testsuite/gcc.target/aarch64/pr109072_1.c
index 39d80222142..daaccf0b881 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr109072_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr109072_1.c
@@ -54,7 +54,7 @@ f32x2_1 (float32_t x)
 
 /*
 ** f32x2_2:
-**     uzp1    v0\.2s, v0\.2s, v1\.2s
+**     zip1    v0\.2s, v0\.2s, v1\.2s
 **     ret
 */
 float32x2_t
@@ -166,7 +166,7 @@ f64x2_1 (float64_t x)
 
 /*
 ** f64x2_2:
-**     uzp1    v0\.2d, v0\.2d, v1\.2d
+**     zip1    v0\.2d, v0\.2d, v1\.2d
 **     ret
 */
 float64x2_t
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/mf8_data_1.c 
b/gcc/testsuite/gcc.target/aarch64/simd/mf8_data_1.c
index 79d1ccf6f7d..e440b899f59 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/mf8_data_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/mf8_data_1.c
@@ -66,7 +66,7 @@ mfloat8x16_t test_bslq3(mfloat8x16_t a, uint8x16_t b, 
mfloat8x16_t c)
 
 /*
 ** test_combine1:
-**     uzp1    v0.2d, v1.2d, v2.2d
+**     zip1    v0.2d, v1.2d, v2.2d
 **     ret
 */
 mfloat8x16_t test_combine1(mfloat8_t a, mfloat8x8_t b, mfloat8x8_t c)
@@ -1397,7 +1397,7 @@ mfloat8x8_t test_tbl1(mfloat8x8_t a, uint8x8_t b)
 
 /*
 ** test_tbl2:
-**     uzp1    v([0-9]+).2d, v0.2d, v1.2d
+**     zip1    v([0-9]+).2d, v0.2d, v1.2d
 **     tbl     v0.8b, {v\1.16b}, v2.8b
 **     ret
 */
@@ -1408,7 +1408,7 @@ mfloat8x8_t test_tbl2(mfloat8x8x2_t a, uint8x8_t b)
 
 /*
 ** test_tbl3:
-**     uzp1    v([0-9]+).2d, v0.2d, v1.2d
+**     zip1    v([0-9]+).2d, v0.2d, v1.2d
 **     fmov    d([0-9]+), d2
 **     tbl     v0.8b, {v\1.16b( - |, )v\2.16b}, v3.8b
 **     ret
@@ -1420,8 +1420,8 @@ mfloat8x8_t test_tbl3(mfloat8x8x3_t a, uint8x8_t b)
 
 /*
 ** test_tbl4:
-**     uzp1    v([0-9]+).2d, v0.2d, v1.2d
-**     uzp1    v([0-9]+).2d, v2.2d, v3.2d
+**     zip1    v([0-9]+).2d, v0.2d, v1.2d
+**     zip1    v([0-9]+).2d, v2.2d, v3.2d
 **     tbl     v0.8b, {v\1.16b( - |, )v\2.16b}, v4.8b
 **     ret
 */
@@ -1526,7 +1526,7 @@ mfloat8x8_t test_tbx1(mfloat8x8_t a, mfloat8x8_t b, 
uint8x8_t c)
 
 /*
 ** test_tbx2:
-**     uzp1    v([0-9]+).2d, v1.2d, v2.2d
+**     zip1    v([0-9]+).2d, v1.2d, v2.2d
 **     tbx     v[0-9]+.8b, {v\1.16b}, v3.8b
 **     ret
 */
@@ -1537,7 +1537,7 @@ mfloat8x8_t test_tbx2(mfloat8x8_t a, mfloat8x8x2_t b, 
uint8x8_t c)
 
 /*
 ** test_tbx3:
-**     uzp1    v([0-9]+).2d, v1.2d, v2.2d
+**     zip1    v([0-9]+).2d, v1.2d, v2.2d
 **     fmov    d([0-9]+), d3
 **     tbl     v[0-9]+.8b, {v\1.16b( - |, )v\2.16b}, v4.8b
 **     ...
@@ -1552,8 +1552,8 @@ mfloat8x8_t test_tbx3(mfloat8x8_t a, mfloat8x8x3_t b, 
uint8x8_t c)
 
 /*
 ** test_tbx4:
-**     uzp1    v([0-9]+).2d, v1.2d, v2.2d
-**     uzp1    v([0-9]+).2d, v3.2d, v4.2d
+**     zip1    v([0-9]+).2d, v1.2d, v2.2d
+**     zip1    v([0-9]+).2d, v3.2d, v4.2d
 **     tbx     v0.8b, {v\1.16b( - |, )v\2.16b}, v5.8b
 **     ret
 */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr125550.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr125550.c
new file mode 100644
index 00000000000..89186dc07c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr125550.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv9.5-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_sve.h>
+
+svfloat16_t foo (float x0, float x1)
+{
+  return svdupq_n_f16 (x0, x1, x0, x1, x0, x1, x0, x1);
+}
+
+/*
+** foo:
+**     fcvt    h([01]), s\1
+**     fcvt    h([01]), s\2
+**     zip1    v0\.4h, v0\.4h, v1\.4h
+**     mov     z0\.s, s0
+**     ret
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_5.c 
b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_5.c
index 2bc9a3aeba5..0dd085a9423 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_5.c
@@ -183,15 +183,15 @@
 ** test_float16_2:
 **     fcvt    h1, s1
 **     fcvt    h0, s0
-**     uzp1    v0\.4h, v0\.4h, v1\.4h
+**     zip1    v0\.4h, v0\.4h, v1\.4h
 **     mov     z0\.s, s0
 **     ret
 */
 
 /*
 ** test_float16_3:
-**     uzp1    v2\.2s, v0\.2s, v2\.2s
-**     uzp1    v3\.2s, v1\.2s, v3\.2s
+**     zip1    v2\.2s, v0\.2s, v2\.2s
+**     zip1    v3\.2s, v1\.2s, v3\.2s
 **     zip1    v3\.4s, v2\.4s, v3\.4s
 **     fcvtn   v3\.4h, v3\.4s
 **     mov     z0\.d, d3
@@ -210,7 +210,7 @@
 ** test_float16_5:
 **     movi    v31\.4h, #0
 **     fcvt    h0, s0
-**     uzp1    v0\.4h, v31\.4h, v0\.4h
+**     zip1    v0\.4h, v31\.4h, v0\.4h
 **     mov     z0\.s, s0
 **     ret
 */
@@ -221,7 +221,7 @@
 **     fcvt    h1, s1
 **     fmov    h31, 1.0e\+0
 **     fmov    h2, h2
-**     uzp1    v1\.4h, v1\.4h, v31\.4h
+**     zip1    v1\.4h, v1\.4h, v31\.4h
 **     dup     v0\.2s, v2\.s\[0\]
 **     dup     v1\.2s, v1\.s\[0\]
 **     zip1    v0\.8h, v0\.8h, v1\.8h
@@ -235,8 +235,8 @@
 **     fcvt    h2, s1
 **     movi    v0\.4h, #0
 **     fmov    h1, 1.0e\+0
-**     uzp1    v1\.4h, v1\.4h, v2\.4h
-**     uzp1    v0\.4h, v0\.4h, v3\.4h
+**     zip1    v1\.4h, v1\.4h, v2\.4h
+**     zip1    v0\.4h, v0\.4h, v3\.4h
 **     dup     v1\.2s, v1\.s\[0\]
 **     dup     v0\.2s, v0\.s\[0\]
 **     zip1    v0\.8h, v0\.8h, v1\.8h
@@ -249,7 +249,7 @@
 **     fcvt    h1, s1
 **     fcvt    h0, s0
 **     movi    v31\.2s, 0x3c, lsl 24
-**     uzp1    v0\.4h, v0\.4h, v1.4h
+**     zip1    v0\.4h, v0\.4h, v1.4h
 **     dup     v0\.2s, v0\.s\[0\]
 **     zip1    v0\.8h, v31\.8h, v0\.8h
 **     dup     z0\.q, z0\.q\[0\]
@@ -261,8 +261,8 @@
 **     fcvt    h1, s1
 **     fcvt    h2, s2
 **     fcvt    h0, s0
-**     uzp1    v0\.4h, v0\.4h, v1\.4h
-**     uzp1    v1\.4h, v1\.4h, v2\.4h
+**     zip1    v0\.4h, v0\.4h, v1\.4h
+**     zip1    v1\.4h, v1\.4h, v2\.4h
 **     dup     v0\.2s, v0\.s\[0\]
 **     dup     v1\.2s, v1\.s\[0\]
 **     zip1    v0\.8h, v0\.8h, v1\.8h
@@ -275,7 +275,7 @@
 **     fcvt    h2, s2
 **     fcvt    h0, s0
 **     fcvt    h1, s1
-**     uzp1    v0\.4h, v0\.4h, v2\.4h
+**     zip1    v0\.4h, v0\.4h, v2\.4h
 **     dup     v1\.4h, v1\.h\[0\]
 **     dup     v0\.2s, v0\.s\[0\]
 **     zip1    v0\.8h, v0\.8h, v1\.8h
@@ -386,7 +386,7 @@
 
 /*
 ** test_float32_2:
-**     uzp1    v0\.2s, v0\.2s, v1\.2s
+**     zip1    v0\.2s, v0\.2s, v1\.2s
 **     mov     z0\.d, d0
 **     ret
 */
@@ -401,7 +401,7 @@
 /*
 ** test_float32_4:
 **     movi    v31\.2s, #0
-**     uzp1    v0\.2s, v31\.2s, v0\.2s
+**     zip1    v0\.2s, v31\.2s, v0\.2s
 **     mov     z0\.d, d0
 **     ret
 */
diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-14.c 
b/gcc/testsuite/gcc.target/aarch64/vec-init-14.c
index 1a2cc9fbf47..ea719f32e4f 100644
--- a/gcc/testsuite/gcc.target/aarch64/vec-init-14.c
+++ b/gcc/testsuite/gcc.target/aarch64/vec-init-14.c
@@ -67,7 +67,7 @@ int32x2_t s32_6(int32_t a0, int32_t a1) {
 
 /*
 ** f32_1:
-**     uzp1    v0\.2s, v0\.2s, v1\.2s
+**     zip1    v0\.2s, v0\.2s, v1\.2s
 **     ret
 */
 float32x2_t f32_1(float32_t a0, float32_t a1) {
@@ -90,7 +90,7 @@ float32x2_t f32_2(float32_t a0, float32_t *ptr) {
 /*
 ** f32_3:
 **     ldr     s0, \[x0\]
-**     uzp1    v0\.2s, v0\.2s, v1\.2s
+**     zip1    v0\.2s, v0\.2s, v1\.2s
 **     ret
 */
 float32x2_t f32_3(float32_t a0, float32_t a1, float32_t *ptr) {
diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-23.c 
b/gcc/testsuite/gcc.target/aarch64/vec-init-23.c
index 2a209509d1b..9374da8a84d 100644
--- a/gcc/testsuite/gcc.target/aarch64/vec-init-23.c
+++ b/gcc/testsuite/gcc.target/aarch64/vec-init-23.c
@@ -242,15 +242,15 @@ TEST_64(int, int64_t, s)
 ** test_float16_2:
 **     fcvt    h1, s1
 **     fcvt    h0, s0
-**     uzp1    v0\.4h, v0\.4h, v1\.4h
+**     zip1    v0\.4h, v0\.4h, v1\.4h
 **     dup     v0\.4s, v0\.s\[0\]
 **     ret
 */
 
 /*
 ** test_float16_3:
-**     uzp1    v2\.2s, v0\.2s, v2\.2s
-**     uzp1    v3\.2s, v1\.2s, v3\.2s
+**     zip1    v2\.2s, v0\.2s, v2\.2s
+**     zip1    v3\.2s, v1\.2s, v3\.2s
 **     zip1    v3\.4s, v2\.4s, v3\.4s
 **     fcvtn   v3\.4h, v3\.4s
 **     dup     v0\.2d, v3\.d\[0\]
@@ -269,7 +269,7 @@ TEST_64(int, int64_t, s)
 ** test_float16_5:
 **     movi    v31\.4h, #0
 **     fcvt    h0, s0
-**     uzp1    v0\.4h, v31\.4h, v0\.4h
+**     zip1    v0\.4h, v31\.4h, v0\.4h
 **     dup     v0\.4s, v0\.s\[0\]
 **     ret
 */
@@ -280,7 +280,7 @@ TEST_64(int, int64_t, s)
 **     fcvt    h1, s1
 **     fmov    h31, 1.0e\+0
 **     fmov    h0, h0
-**     uzp1    v1\.4h, v1\.4h, v31\.4h
+**     zip1    v1\.4h, v1\.4h, v31\.4h
 **     dup     v0\.2s, v0\.s\[0\]
 **     dup     v1\.2s, v1\.s\[0\]
 **     zip1    v0\.8h, v0\.8h, v1\.8h
@@ -292,9 +292,9 @@ TEST_64(int, int64_t, s)
 **     fcvt    h0, s0
 **     movi    v31\.4h, #0
 **     fcvt    h1, s1
-**     uzp1    v31\.4h, v31\.4h, v0\.4h
+**     zip1    v31\.4h, v31\.4h, v0\.4h
 **     fmov    h0, 1.0e\+0
-**     uzp1    v0\.4h, v0\.4h, v1\.4h
+**     zip1    v0\.4h, v0\.4h, v1\.4h
 **     dup     v31\.2s, v31\.s\[0\]
 **     dup     v0\.2s, v0\.s\[0\]
 **     zip1    v0\.8h, v31\.8h, v0\.8h
@@ -306,7 +306,7 @@ TEST_64(int, int64_t, s)
 **     fcvt    h1, s1
 **     fcvt    h0, s0
 **     movi    v31\.2s, 0x3c, lsl 24
-**     uzp1    v0\.4h, v0\.4h, v1\.4h
+**     zip1    v0\.4h, v0\.4h, v1\.4h
 **     dup     v0\.2s, v0\.s\[0\]
 **     zip1    v0\.8h, v31\.8h, v0\.8h
 **     ret
@@ -317,8 +317,8 @@ TEST_64(int, int64_t, s)
 **     fcvt    h1, s1
 **     fcvt    h2, s2
 **     fcvt    h0, s0
-**     uzp1    v0\.4h, v0\.4h, v1\.4h
-**     uzp1    v1\.4h, v1\.4h, v2\.4h
+**     zip1    v0\.4h, v0\.4h, v1\.4h
+**     zip1    v1\.4h, v1\.4h, v2\.4h
 **     dup     v0\.2s, v0\.s\[0\]
 **     dup     v1\.2s, v1\.s\[0\]
 **     zip1    v0\.8h, v0\.8h, v1\.8h
@@ -330,7 +330,7 @@ TEST_64(int, int64_t, s)
 **     fcvt    h2, s2
 **     fcvt    h0, s0
 **     fcvt    h1, s1
-**     uzp1    v0\.4h, v0\.4h, v2\.4h
+**     zip1    v0\.4h, v0\.4h, v2\.4h
 **     dup     v1\.4h, v1\.h\[0\]
 **     dup     v0\.2s, v0\.s\[0\]
 **     zip1    v0\.8h, v0\.8h, v1\.8h
@@ -434,7 +434,7 @@ TEST_64(int, int64_t, s)
 
 /*
 ** test_float32_2:
-**     uzp1    v0\.2s, v0\.2s, v1\.2s
+**     zip1    v0\.2s, v0\.2s, v1\.2s
 **     dup     v0\.2d, v0\.d\[0\]
 **     ret
 */
@@ -449,7 +449,7 @@ TEST_64(int, int64_t, s)
 /*
 ** test_float32_4:
 **     movi    v31\.2s, #0
-**     uzp1    v0\.2s, v31\.2s, v0\.2s
+**     zip1    v0\.2s, v31\.2s, v0\.2s
 **     dup     v0\.2d, v0\.d\[0\]
 **     ret
 */
diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-9.c 
b/gcc/testsuite/gcc.target/aarch64/vec-init-9.c
index 3cf05cf865e..8fccf278d31 100644
--- a/gcc/testsuite/gcc.target/aarch64/vec-init-9.c
+++ b/gcc/testsuite/gcc.target/aarch64/vec-init-9.c
@@ -75,7 +75,7 @@ int64x2_t s64q_6(int64_t a0, int64_t a1) {
 
 /*
 ** f64q_1:
-**     uzp1    v0\.2d, v0\.2d, v1\.2d
+**     zip1    v0\.2d, v0\.2d, v1\.2d
 **     ret
 */
 float64x2_t f64q_1(float64_t a0, float64_t a1) {
@@ -98,7 +98,7 @@ float64x2_t f64q_2(float64_t a0, float64_t *ptr) {
 /*
 ** f64q_3:
 **     ldr     d0, \[x0\]
-**     uzp1    v0\.2d, v0\.2d, v1\.2d
+**     zip1    v0\.2d, v0\.2d, v1\.2d
 **     ret
 */
 float64x2_t f64q_3(float64_t a0, float64_t a1, float64_t *ptr) {
@@ -140,7 +140,7 @@ float64x2_t f64q_6(float64_t a0, float64_t a1) {
 
 /*
 ** s32q_1:
-**     uzp1    v0\.2d, v0\.2d, v1\.2d
+**     zip1    v0\.2d, v0\.2d, v1\.2d
 **     ret
 */
 int32x4_t s32q_1(int32x2_t a0, int32x2_t a1) {
@@ -157,7 +157,7 @@ int32x4_t s32q_2(int32x2_t a0, int32x2_t *ptr) {
 /*
 ** s32q_3:
 **     ldr     d0, \[x0\]
-**     uzp1    v0\.2d, v0\.2d, v1\.2d
+**     zip1    v0\.2d, v0\.2d, v1\.2d
 **     ret
 */
 int32x4_t s32q_3(int32x2_t a0, int32x2_t a1, int32x2_t *ptr) {
@@ -204,7 +204,7 @@ int32x4_t s32q_6(int32x2_t a0, int32x2_t a1) {
 
 /*
 ** f32q_1:
-**     uzp1    v0\.2d, v0\.2d, v1\.2d
+**     zip1    v0\.2d, v0\.2d, v1\.2d
 **     ret
 */
 float32x4_t f32q_1(float32x2_t a0, float32x2_t a1) {
@@ -221,7 +221,7 @@ float32x4_t f32q_2(float32x2_t a0, float32x2_t *ptr) {
 /*
 ** f32q_3:
 **     ldr     d0, \[x0\]
-**     uzp1    v0\.2d, v0\.2d, v1\.2d
+**     zip1    v0\.2d, v0\.2d, v1\.2d
 **     ret
 */
 float32x4_t f32q_3(float32x2_t a0, float32x2_t a1, float32x2_t *ptr) {
-- 
2.43.0

[PATCH] aarch64: use ZIP1 instead of UZP1 for concatenation [PR125550]

Reply via email to