I have no objections to this going into 4.5 and 4.6 since it corrects
the implementation of the neon intrinsics but please check with the
release managers.

OK to backport to 4.5 and 4.6 - both tested on arm-linux-gnueabi?


Ok. Please allow 24 hours for an RM (cc'd) to comment since this is technically not a regression but is a long term bug that needs fixing.

cheers
Ramana


Thanks,
Ira

4.5 and 4.6 ChangeLog:

        Backport from mainline:
        2011-04-18  Ulrich Weigand<ulrich.weig...@linaro.org>
                   Ira Rosen<ira.ro...@linaro.org>

        PR target/48252
        * config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments
        to match neon_vzip/vuzp/vtrn_internal.
        * config/arm/neon.md (neon_vtrn<mode>_internal): Make both
        outputs explicitly dependent on both inputs.
        (neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise.

testsuite/Changelog:

        Backport from mainline:
        2011-04-18  Ulrich Weigand<ulrich.weig...@linaro.org>
                   Ira Rosen<ira.ro...@linaro.org>

        PR target/48252
        * gcc.target/arm/pr48252.c: New test.


4.5 patch:

Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c    (revision 172714)
+++ config/arm/arm.c    (working copy)
@@ -18237,7 +18237,7 @@ neon_emit_pair_result_insn (enum machine_mode mode
    rtx tmp1 = gen_reg_rtx (mode);
    rtx tmp2 = gen_reg_rtx (mode);

-  emit_insn (intfn (tmp1, op1, tmp2, op2));
+  emit_insn (intfn (tmp1, op1, op2, tmp2));

    emit_move_insn (mem, tmp1);
    mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
Index: config/arm/neon.md
===================================================================
--- config/arm/neon.md  (revision 172714)
+++ config/arm/neon.md  (working copy)
@@ -3895,13 +3895,14 @@

  (define_insn "neon_vtrn<mode>_internal"
    [(set (match_operand:VDQW 0 "s_register_operand" "=w")
-       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
-                    UNSPEC_VTRN1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=w")
-        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
-                    UNSPEC_VTRN2))]
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
+                     UNSPEC_VTRN1))
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+         (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VTRN2))]
    "TARGET_NEON"
-  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
    [(set (attr "neon_type")
        (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                      (const_string "neon_bp_simple")
@@ -3921,13 +3922,14 @@

  (define_insn "neon_vzip<mode>_internal"
    [(set (match_operand:VDQW 0 "s_register_operand" "=w")
-       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
-                    UNSPEC_VZIP1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=w")
-        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
-                    UNSPEC_VZIP2))]
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
+                     UNSPEC_VZIP1))
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+        (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VZIP2))]
    "TARGET_NEON"
-  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
    [(set (attr "neon_type")
        (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                      (const_string "neon_bp_simple")
@@ -3947,13 +3949,14 @@

  (define_insn "neon_vuzp<mode>_internal"
    [(set (match_operand:VDQW 0 "s_register_operand" "=w")
-       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
                       UNSPEC_VUZP1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=w")
-        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
-                    UNSPEC_VUZP2))]
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+        (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VUZP2))]
    "TARGET_NEON"
-  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
    [(set (attr "neon_type")
        (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                      (const_string "neon_bp_simple")
Index: testsuite/gcc.target/arm/pr48252.c
===================================================================
--- testsuite/gcc.target/arm/pr48252.c  (revision 0)
+++ testsuite/gcc.target/arm/pr48252.c  (revision 0)
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include<stdlib.h>
+
+int main(void)
+{
+    uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1};
+    uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2};
+    uint8x8x2_t vd1, vd2;
+    union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4;
+    int i;
+
+    vd1 = vzip_u8(v1, vdup_n_u8(0));
+    vd2 = vzip_u8(v2, vdup_n_u8(0));
+
+    vst1_u8(d1.buf, vd1.val[0]);
+    vst1_u8(d2.buf, vd1.val[1]);
+    vst1_u8(d3.buf, vd2.val[0]);
+    vst1_u8(d4.buf, vd2.val[1]);
+
+    for (i = 0; i<  8; i++)
+      if ((i % 2 == 0&&  d4.buf[i] != 2)
+          || (i % 2 == 1&&  d4.buf[i] != 0))
+         abort ();
+
+    return 0;
+}
+


4.6 patch:

Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c    (revision 172810)
+++ config/arm/arm.c    (working copy)
@@ -19564,7 +19564,7 @@ neon_emit_pair_result_insn (enum machine_mode mode
    rtx tmp1 = gen_reg_rtx (mode);
    rtx tmp2 = gen_reg_rtx (mode);

-  emit_insn (intfn (tmp1, op1, tmp2, op2));
+  emit_insn (intfn (tmp1, op1, op2, tmp2));

    emit_move_insn (mem, tmp1);
    mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
Index: config/arm/neon.md
===================================================================
--- config/arm/neon.md  (revision 172810)
+++ config/arm/neon.md  (working copy)
@@ -4079,13 +4079,14 @@

  (define_insn "neon_vtrn<mode>_internal"
    [(set (match_operand:VDQW 0 "s_register_operand" "=w")
-       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
-                    UNSPEC_VTRN1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=w")
-        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
-                    UNSPEC_VTRN2))]
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
+                     UNSPEC_VTRN1))
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+         (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VTRN2))]
    "TARGET_NEON"
-  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
    [(set (attr "neon_type")
        (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                      (const_string "neon_bp_simple")
@@ -4105,13 +4106,14 @@

  (define_insn "neon_vzip<mode>_internal"
    [(set (match_operand:VDQW 0 "s_register_operand" "=w")
-       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
-                    UNSPEC_VZIP1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=w")
-        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
-                    UNSPEC_VZIP2))]
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
+                     UNSPEC_VZIP1))
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+        (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VZIP2))]
    "TARGET_NEON"
-  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
    [(set (attr "neon_type")
        (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                      (const_string "neon_bp_simple")
@@ -4131,13 +4133,14 @@

  (define_insn "neon_vuzp<mode>_internal"
    [(set (match_operand:VDQW 0 "s_register_operand" "=w")
-       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
                       UNSPEC_VUZP1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=w")
-        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
-                    UNSPEC_VUZP2))]
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+        (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VUZP2))]
    "TARGET_NEON"
-  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
    [(set (attr "neon_type")
        (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                      (const_string "neon_bp_simple")
Index: testsuite/gcc.target/arm/pr48252.c
===================================================================
--- testsuite/gcc.target/arm/pr48252.c  (revision 0)
+++ testsuite/gcc.target/arm/pr48252.c  (revision 0)
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include<stdlib.h>
+
+int main(void)
+{
+    uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1};
+    uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2};
+    uint8x8x2_t vd1, vd2;
+    union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4;
+    int i;
+
+    vd1 = vzip_u8(v1, vdup_n_u8(0));
+    vd2 = vzip_u8(v2, vdup_n_u8(0));
+
+    vst1_u8(d1.buf, vd1.val[0]);
+    vst1_u8(d2.buf, vd1.val[1]);
+    vst1_u8(d3.buf, vd2.val[0]);
+    vst1_u8(d4.buf, vd2.val[1]);
+
+    for (i = 0; i<  8; i++)
+      if ((i % 2 == 0&&  d4.buf[i] != 2)
+          || (i % 2 == 1&&  d4.buf[i] != 0))
+         abort ();
+
+    return 0;
+}
+



cheers
Ramana


Thanks,
Ira

ChangeLog:

2011-04-07  Ulrich Weigand<ulrich.weig...@linaro.org>
                    Ira Rosen<ira.ro...@linaro.org>

       PR target/48252
       * config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments
       to match neon_vzip/vuzp/vtrn_internal.
       * config/arm/neon.md (neon_vtrn<mode>_internal): Make both
       outputs explicitly dependent on both inputs.
       (neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise.

testsuite/Changelog:

       PR target/48252
       * gcc.target/arm/pr48252.c: New test.



Reply via email to