Hi,
One final patch in the series, this one for vec_sum2s. This builtin
requires some additional code generation for the case of little endian
without -maltivec=be. Here's an example:
va = {-10,1,2,3};0x 0003 0002 0001 fff6
vb = {100,101,102,-103}; 0x ff99 0066 0065 0064
vc = vec_sum2s (va, vb); 0x ff9e 005c
= {0,92,0,-98};
We need to add -10 + 1 + 101 = 92 and place it in vc[1], and add 2 + 3 +
-103 and place the result in vc[3], with zeroes in the other two
elements. To do this, we first use vsldoi vs,vb,vb,12 to rotate 101
and -103 into big-endian elements 1 and 3, as required by the vsum2sws
instruction:
0x ff99 0066 0065 0064 ff99 0066 0065 0064
vs = 0064 ff99 0066 0065
Executing vsum2sws vs,va,vs then gives
vs = 0x ff9e 005c
which then must be shifted into position with vsldoi vc,vs,vs,4
0x ff9e 005c ff9e 005c
vc = ff9e 005c
which is the desired result.
In addition to this change, I noticed a redundant test from one of my
previous patches and simplified it. (BYTES_BIG_ENDIAN implies
VECTOR_ELT_ORDER_BIG, so we don't need to test BYTES_BIG_ENDIAN.)
As usual, new test cases are added to cover the possible cases. These
are simpler this time since only vector signed integer is a legal type
for vec_sum2s.
Bootstrapped and tested on powerpc64{,le}-unknown-linux-gnu with no
regressions. Is this ok for trunk?
Thanks,
Bill
gcc:
2014-02-04 Bill Schmidt wschm...@linux.vnet.ibm.com
* config/rs6000/altivec.md (altivec_vsum2sws): Adjust code
generation for -maltivec=be.
(altivec_vsumsws): Simplify redundant test.
gcc/testsuite:
2014-02-04 Bill Schmidt wschm...@linux.vnet.ibm.com
* gcc.dg/vmx/sum2s.c: New.
* gcc.dg/vmx/sum2s-be-order.c: New.
Index: gcc/testsuite/gcc.dg/vmx/sum2s.c
===
--- gcc/testsuite/gcc.dg/vmx/sum2s.c(revision 0)
+++ gcc/testsuite/gcc.dg/vmx/sum2s.c(revision 0)
@@ -0,0 +1,13 @@
+#include harness.h
+
+static void test()
+{
+ vector signed int vsia = {-10,1,2,3};
+ vector signed int vsib = {100,101,102,-103};
+ vector signed int vsir;
+ vector signed int vsier = {0,92,0,-98};
+
+ vsir = vec_sum2s (vsia, vsib);
+
+ check (vec_all_eq (vsir, vsier), vsir);
+}
Index: gcc/testsuite/gcc.dg/vmx/sum2s-be-order.c
===
--- gcc/testsuite/gcc.dg/vmx/sum2s-be-order.c (revision 0)
+++ gcc/testsuite/gcc.dg/vmx/sum2s-be-order.c (revision 0)
@@ -0,0 +1,19 @@
+/* { dg-options -maltivec=be -mabi=altivec -std=gnu99 -mno-vsx } */
+
+#include harness.h
+
+static void test()
+{
+ vector signed int vsia = {-10,1,2,3};
+ vector signed int vsib = {100,101,102,-103};
+ vector signed int vsir;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ vector signed int vsier = {91,0,107,0};
+#else
+ vector signed int vsier = {0,92,0,-98};
+#endif
+
+ vsir = vec_sum2s (vsia, vsib);
+
+ check (vec_all_eq (vsir, vsier), vsir);
+}
Index: gcc/config/rs6000/altivec.md
===
--- gcc/config/rs6000/altivec.md(revision 207479)
+++ gcc/config/rs6000/altivec.md(working copy)
@@ -1592,10 +1610,21 @@
(unspec:V4SI [(match_operand:V4SI 1 register_operand v)
(match_operand:V4SI 2 register_operand v)]
UNSPEC_VSUM2SWS))
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+ (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))
+ (clobber (match_scratch:V4SI 3 =v))]
TARGET_ALTIVEC
- vsum2sws %0,%1,%2
- [(set_attr type veccomplex)])
+{
+ if (VECTOR_ELT_ORDER_BIG)
+return vsum2sws %0,%1,%2;
+ else
+return vsldoi %3,%2,%2,12\n\tvsum2sws %3,%1,%3\n\tvsldoi %0,%3,%3,4;
+}
+ [(set_attr type veccomplex)
+ (set (attr length)
+ (if_then_else
+ (match_test VECTOR_ELT_ORDER_BIG)
+ (const_string 4)
+ (const_string 12)))])
(define_insn altivec_vsumsws
[(set (match_operand:V4SI 0 register_operand =v)
@@ -1606,7 +1635,7 @@
(clobber (match_scratch:V4SI 3 =v))]
TARGET_ALTIVEC
{
- if (BYTES_BIG_ENDIAN || VECTOR_ELT_ORDER_BIG)
+ if (VECTOR_ELT_ORDER_BIG)
return vsumsws %0,%1,%2;
else
return vspltw %3,%2,0\n\tvsumsws %3,%1,%3\n\tvspltw %0,%3,3;
@@ -1614,7 +1643,7 @@
[(set_attr type veccomplex)
(set (attr length)
(if_then_else
- (match_test (BYTES_BIG_ENDIAN || VECTOR_ELT_ORDER_BIG))
+ (match_test (VECTOR_ELT_ORDER_BIG))
(const_string 4)
(const_string 12)))])