Hi,

One final patch in the series, this one for vec_sum2s.  This builtin
requires some additional code generation for the case of little endian
without -maltivec=be.  Here's an example:

  va = {-10,1,2,3};        0x 00000003 00000002 00000001 fffffff6
  vb = {100,101,102,-103}; 0x ffffff99 00000066 00000065 00000064
  vc = vec_sum2s (va, vb); 0x ffffff9e 00000000 0000005c 00000000
                              = {0,92,0,-98};

We need to add -10 + 1 + 101 = 92 and place it in vc[1], and add 2 + 3 +
-103 and place the result in vc[3], with zeroes in the other two
elements.  To do this, we first use "vsldoi vs,vb,vb,12" to rotate 101
and -103 into big-endian elements 1 and 3, as required by the vsum2sws
instruction:

  0x ffffff99 00000066 00000065 00000064 ffffff99 00000066 00000065 00000064
                                ^^^^^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^^^^^
                          vs =  00000064 ffffff99 00000066 00000065

Executing "vsum2sws vs,va,vs" then gives

  vs = 0x 00000000 ffffff9e 00000000 0000005c

which then must be shifted into position with "vsldoi vc,vs,vs,4"

  0x 00000000 ffffff9e 00000000 0000005c 00000000 ffffff9e 00000000 0000005c
              ^^^^^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^^^^^
         vc = ffffff9e 00000000 0000005c 00000000

which is the desired result.

In addition to this change, I noticed a redundant test from one of my
previous patches and simplified it.  (BYTES_BIG_ENDIAN implies
VECTOR_ELT_ORDER_BIG, so we don't need to test BYTES_BIG_ENDIAN.)

As usual, new test cases are added to cover the possible cases.  These
are simpler this time since only vector signed integer is a legal type
for vec_sum2s.

Bootstrapped and tested on powerpc64{,le}-unknown-linux-gnu with no
regressions.  Is this ok for trunk?

Thanks,
Bill


gcc:

2014-02-04  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        * config/rs6000/altivec.md (altivec_vsum2sws): Adjust code
        generation for -maltivec=be.
        (altivec_vsumsws): Simplify redundant test.

gcc/testsuite:

2014-02-04  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        * gcc.dg/vmx/sum2s.c: New.
        * gcc.dg/vmx/sum2s-be-order.c: New.


Index: gcc/testsuite/gcc.dg/vmx/sum2s.c
===================================================================
--- gcc/testsuite/gcc.dg/vmx/sum2s.c    (revision 0)
+++ gcc/testsuite/gcc.dg/vmx/sum2s.c    (revision 0)
@@ -0,0 +1,13 @@
+#include "harness.h"
+
+static void test()
+{
+  vector signed int vsia = {-10,1,2,3};
+  vector signed int vsib = {100,101,102,-103};
+  vector signed int vsir;
+  vector signed int vsier = {0,92,0,-98};
+
+  vsir = vec_sum2s (vsia, vsib);
+
+  check (vec_all_eq (vsir, vsier), "vsir");
+}
Index: gcc/testsuite/gcc.dg/vmx/sum2s-be-order.c
===================================================================
--- gcc/testsuite/gcc.dg/vmx/sum2s-be-order.c   (revision 0)
+++ gcc/testsuite/gcc.dg/vmx/sum2s-be-order.c   (revision 0)
@@ -0,0 +1,19 @@
+/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */
+
+#include "harness.h"
+
+static void test()
+{
+  vector signed int vsia = {-10,1,2,3};
+  vector signed int vsib = {100,101,102,-103};
+  vector signed int vsir;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+  vector signed int vsier = {91,0,107,0};
+#else
+  vector signed int vsier = {0,92,0,-98};
+#endif
+
+  vsir = vec_sum2s (vsia, vsib);
+
+  check (vec_all_eq (vsir, vsier), "vsir");
+}
Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md        (revision 207479)
+++ gcc/config/rs6000/altivec.md        (working copy)
@@ -1592,10 +1610,21 @@
         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
                       (match_operand:V4SI 2 "register_operand" "v")]
                     UNSPEC_VSUM2SWS))
-   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))
+   (clobber (match_scratch:V4SI 3 "=v"))]
   "TARGET_ALTIVEC"
-  "vsum2sws %0,%1,%2"
-  [(set_attr "type" "veccomplex")])
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    return "vsum2sws %0,%1,%2";
+  else
+    return "vsldoi %3,%2,%2,12\n\tvsum2sws %3,%1,%3\n\tvsldoi %0,%3,%3,4";
+}
+  [(set_attr "type" "veccomplex")
+   (set (attr "length")
+     (if_then_else
+       (match_test "VECTOR_ELT_ORDER_BIG")
+       (const_string "4")
+       (const_string "12")))])
 
 (define_insn "altivec_vsumsws"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
@@ -1606,7 +1635,7 @@
    (clobber (match_scratch:V4SI 3 "=v"))]
   "TARGET_ALTIVEC"
 {
-  if (BYTES_BIG_ENDIAN || VECTOR_ELT_ORDER_BIG)
+  if (VECTOR_ELT_ORDER_BIG)
     return "vsumsws %0,%1,%2";
   else
     return "vspltw %3,%2,0\n\tvsumsws %3,%1,%3\n\tvspltw %0,%3,3";
@@ -1614,7 +1643,7 @@
   [(set_attr "type" "veccomplex")
    (set (attr "length")
      (if_then_else
-       (match_test "(BYTES_BIG_ENDIAN || VECTOR_ELT_ORDER_BIG)")
+       (match_test "(VECTOR_ELT_ORDER_BIG)")
        (const_string "4")
        (const_string "12")))])
 


Reply via email to