Re: [PATCH, rs6000 2/2] Add compatible implementations of x86 SSSE3 intrinsics

2018-10-23 Thread Segher Boessenkool
On Tue, Oct 23, 2018 at 10:29:35AM -0500, Paul Clarke wrote:
> On 10/22/2018 06:38 PM, Segher Boessenkool wrote:
> > On Mon, Oct 22, 2018 at 01:26:11PM -0500, Paul Clarke wrote:
> >> Target tests for the intrinsics defined in pmmintrin.h, copied from
> >> gcc.target/i386.
> >>
> >> Tested on POWER8 ppc64le and ppc64 (-m64 and -m32, the latter only 
> >> reporting
> >> 16 new unsupported tests), and also by forcing -mcpu=power7 on ppc64.
> > 
> > Why are they unsupported?  lp64?  Why do many of those tests require
> > lp64 anyway?  It's not obvious to me.
> 
> None of the x86 intrinsics compatibility implementation code has thus far 
> supported -m32, and I'd venture that it's not interesting to anyone.  Or at 
> least not worth the effort.

You can use it with -m32 just fine.  Disabling the tests isn't a good idea.
Either disable the *feature*, or enable the tests.

> > You tested on a >=p8 (with a compiler defaulting to that, too) I hope ;-)
> 
> As stated, "Tested on POWER8 ppc64le" (which defaults to -mcpu=power8), or 
> did you mean something else?

The big-endian part.  It does not default to p8 unless you arrange for
that specially.  The tests require p8 hardware, but that means if you
tested BE without --with-cpu=power8 or similar you really didn't test
anything.


Segher


Re: [PATCH, rs6000 2/2] Add compatible implementations of x86 SSSE3 intrinsics

2018-10-23 Thread Paul Clarke
On 10/22/2018 06:38 PM, Segher Boessenkool wrote:
> On Mon, Oct 22, 2018 at 01:26:11PM -0500, Paul Clarke wrote:
>> Target tests for the intrinsics defined in pmmintrin.h, copied from
>> gcc.target/i386.
>>
>> Tested on POWER8 ppc64le and ppc64 (-m64 and -m32, the latter only reporting
>> 16 new unsupported tests), and also by forcing -mcpu=power7 on ppc64.
> 
> Why are they unsupported?  lp64?  Why do many of those tests require
> lp64 anyway?  It's not obvious to me.

None of the x86 intrinsics compatibility implementation code has thus far 
supported -m32, and I'd venture that it's not interesting to anyone.  Or at 
least not worth the effort.

> You tested on a >=p8 (with a compiler defaulting to that, too) I hope ;-)

As stated, "Tested on POWER8 ppc64le" (which defaults to -mcpu=power8), or did 
you mean something else?

PC



Re: [PATCH, rs6000 2/2] Add compatible implementations of x86 SSSE3 intrinsics

2018-10-22 Thread Segher Boessenkool
On Mon, Oct 22, 2018 at 01:26:11PM -0500, Paul Clarke wrote:
> Target tests for the intrinsics defined in pmmintrin.h, copied from
> gcc.target/i386.
> 
> Tested on POWER8 ppc64le and ppc64 (-m64 and -m32, the latter only reporting
> 16 new unsupported tests), and also by forcing -mcpu=power7 on ppc64.

Why are they unsupported?  lp64?  Why do many of those tests require
lp64 anyway?  It's not obvious to me.

You tested on a >=p8 (with a compiler defaulting to that, too) I hope ;-)


Segher


[PATCH, rs6000 2/2] Add compatible implementations of x86 SSSE3 intrinsics

2018-10-22 Thread Paul Clarke
Target tests for the intrinsics defined in pmmintrin.h, copied from
gcc.target/i386.

Tested on POWER8 ppc64le and ppc64 (-m64 and -m32, the latter only reporting
16 new unsupported tests), and also by forcing -mcpu=power7 on ppc64.

[gcc/testsuite]

2018-10-22  Paul A. Clarke  

* gcc.target/powerpc/sse3-check.h: New file.
* gcc.target/powerpc/ssse3-vals.h: New file.
* gcc.target/powerpc/ssse3-pabsb.c: New file.
* gcc.target/powerpc/ssse3-pabsd.c: New file.
* gcc.target/powerpc/ssse3-pabsw.c: New file.
* gcc.target/powerpc/ssse3-palignr.c: New file.
* gcc.target/powerpc/ssse3-phaddd.c: New file.
* gcc.target/powerpc/ssse3-phaddsw.c: New file.
* gcc.target/powerpc/ssse3-phaddw.c: New file.
* gcc.target/powerpc/ssse3-phsubd.c: New file.
* gcc.target/powerpc/ssse3-phsubsw.c: New file.
* gcc.target/powerpc/ssse3-phsubw.c: New file.
* gcc.target/powerpc/ssse3-pmaddubsw.c: New file.
* gcc.target/powerpc/ssse3-pmulhrsw.c: New file.
* gcc.target/powerpc/ssse3-pshufb.c: New file.
* gcc.target/powerpc/ssse3-psignb.c: New file.
* gcc.target/powerpc/ssse3-psignd.c: New file.
* gcc.target/powerpc/ssse3-psignw.c: New file.

Index: gcc/testsuite/gcc.target/powerpc/ssse3-check.h
===
diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-check.h 
b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-check.h
new file mode 10644
--- /dev/null   (revision 0)
+++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-check.h  (working copy)
@@ -0,0 +1,43 @@
+#include 
+#include 
+
+#include "m128-check.h"
+
+/* define DEBUG replace abort with printf on error.  */
+//#define DEBUG 1
+
+#define TEST ssse3_test
+
+static void ssse3_test (void);
+
+static void
+__attribute__ ((noinline))
+do_test (void)
+{
+  ssse3_test ();
+}
+
+int
+main ()
+{
+#ifdef __BUILTIN_CPU_SUPPORTS__
+  /* Most SSE intrinsic operations can be implemented via VMX
+ instructions, but some operations may be faster / simpler
+ using the POWER8 VSX instructions.  This is especially true
+ when we are transferring / converting to / from __m64 types.
+ The direct register transfer instructions from POWER8 are
+ especially important.  So we test for arch_2_07.  */
+  if (__builtin_cpu_supports ("arch_2_07"))
+{
+  do_test ();
+#ifdef DEBUG
+  printf ("PASSED\n");
+#endif
+}
+#ifdef DEBUG
+  else
+printf ("SKIPPED\n");
+#endif
+#endif /* __BUILTIN_CPU_SUPPORTS__ */
+  return 0;
+}
Index: gcc/testsuite/gcc.target/powerpc/ssse3-vals.h
===
diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-vals.h 
b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-vals.h
new file mode 10644
--- /dev/null   (revision 0)
+++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-vals.h   (working copy)
@@ -0,0 +1,60 @@
+/* Routine to check correctness of the results */
+static int
+chk_128 (int *v1, int *v2)
+{
+  int i;
+  int n_fails = 0;
+
+  for (i = 0; i < 4; i++)
+if (v1[i] != v2[i])
+  n_fails += 1;
+
+  return n_fails;
+}
+
+static int vals [256] __attribute__ ((aligned(16))) =
+{
+  0x, 0x, 0x, 0x, 0x5be800ee, 0x4f2d7b15,
+  0x409d9291, 0xdd95f27f, 0x423986e3, 0x21a4d2cd, 0xa7056d84, 0x4f4e5a3b,
+  0x, 0x, 0x, 0x, 0x, 0x,
+  0x, 0x, 0x, 0x, 0x, 0x,
+  0x73ef0244, 0xcd836329, 0x847f634f, 0xa7e3abcf, 0xb4c14764, 0x1ef42c06,
+  0x504f29ac, 0x4ae7ca73, 0xaddde3c9, 0xf63ded2e, 0xa5d3553d, 0xa52ae05f,
+  0x6fd3c83a, 0x7dc2b300, 0x76b05de7, 0xea8ebae5, 0x549568dd, 0x172f0358,
+  0x917eadf0, 0x796fb0a7, 0xb39381af, 0xd0591d61, 0x731d2f17, 0xbc4b6f5d,
+  0x8ec664c2, 0x3c199c19, 0x9c81db12, 0x6d85913b, 0x486107a9, 0xab6f4b26,
+  0x5630d37c, 0x20836e85, 0x40d4e746, 0xdfbaba36, 0xbeacaa69, 0xb3c84083,
+  0x8a688eb4, 0x08cde481, 0x66e7a190, 0x74ee1639, 0xb3942a19, 0xe0c40471,
+  0x9b789489, 0x9751207a, 0x543a1524, 0x41da7ad6, 0x614bb563, 0xf86f57b1,
+  0x69e62199, 0x2150cb12, 0x9ed74062, 0x429471f4, 0xad28502b, 0xf2e2d4d5,
+  0x45b6ce09, 0xaaa5e649, 0xb46da484, 0x0a637515, 0xae7a3212, 0x5afc784c,
+  0x776cfbbe, 0x9c542bb2, 0x64193aa8, 0x16e8a655, 0x4e3d2f92, 0xe05d7b72,
+  0x89854ebc, 0x8c318814, 0xb81e76e0, 0x3f2625f5, 0x61b44852, 0x5209d7ad,
+  0x842fe317, 0xd3cfcca1, 0x8d287cc7, 0x80f0c9a8, 0x4215f4e5, 0x563993d6,
+  0x5d627433, 0xc4449e35, 0x5b4fe009, 0x3ef92286, 0xacbc8927, 0x549ab870,
+  0x9ac5b959, 0xed8f1c91, 0x7ecf02cd, 0x989c0e8b, 0xa31d6918, 0x1dc2bcc1,
+  0x99d3f3cc, 0x6857acc8, 0x45d7324a, 0xaebdf2e6, 0x7af2f2ae, 0x09716f73,
+  0x7816e694, 0xc65493c0, 0x9f7e87bc, 0xaa96cd40, 0xbfb5bfc6, 0x01a2cce7,
+  0x5f1d8c46, 0x45303efb, 0xb24607c3, 0xef2009a7, 0xba873753, 0xbefb14bc,
+  0x74e53cd3, 0x70124708, 0x6eb4bdbd, 0xf3ba5e43, 0x4c94085f,