Hi,

There is a gap in swap optimization that does not properly handle code
generated by __builtin_vsx_xxspltd.  This is expanded into an 
UNSPEC_VSX_XXSPLTD, which is currently treated as ok to swap.  It should
instead be treated as ok to swap, with special handling to modify the lane
used as the source of the splat.  We have existing code to do this for
other splat forms, so the patch is quite simple.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no regressions.
Is this ok for trunk?  We also require backports for 5 and 6.

Thanks,
Bill


[gcc]

2017-01-13  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        * config/rs6000/rs6000.c (rtx_is_swappable_p): Change
        UNSPEC_VSX__XXSPLTD to require special splat handling.

[gcc/testsuite]

2017-01-13  Bill Schmidt  <wschm...@linux.vnet.ibm.com>

        * gcc.target/powerpc/swaps-p8-27.c: New.


Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c  (revision 244382)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -41271,6 +41271,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
          case UNSPEC_VSX_VEC_INIT:
            return 0;
          case UNSPEC_VSPLT_DIRECT:
+         case UNSPEC_VSX_XXSPLTD:
            *special = SH_SPLAT;
            return 1;
          case UNSPEC_REDUC_PLUS:
Index: gcc/testsuite/gcc.target/powerpc/swaps-p8-27.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/swaps-p8-27.c      (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/swaps-p8-27.c      (working copy)
@@ -0,0 +1,36 @@
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power8" } } */
+/* { dg-options "-mcpu=power8 -O3 " } */
+/* { dg-final { scan-assembler-times "lxvd2x" 2 } } */
+/* { dg-final { scan-assembler-times "stxvd2x" 1 } } */
+/* { dg-final { scan-assembler-times "xxpermdi" 3 } } */
+
+/* Verify that swap optimization works correctly for a VSX direct splat.
+   The three xxpermdi's that are generated correspond to two splats
+   and the __builtin_vsx_xxpermdi.  */
+
+int printf (const char *__restrict __format, ...);
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+double s1[] = {2134.3343, 6678.346};
+double s2[] = {41124.234, 6678.346};
+long long dd[] = {1, 2}, d[2];
+union{long long l[2]; double d[2];} e;
+
+void
+foo ()
+{
+  __m128d source1, source2, dest;
+  __m128d a, b, c;
+
+  e.d[1] = s1[1];
+  e.l[0] = !__builtin_isunordered(s1[0], s2[0]) 
+    && s1[0] == s2[0] ? -1 : 0;
+  source1 = __builtin_vec_vsx_ld (0, s1);
+  source2 = __builtin_vec_vsx_ld (0, s2);
+  a = __builtin_vec_splat (source1, 0);
+  b = __builtin_vec_splat (source2, 0);
+  c = (__m128d)__builtin_vec_cmpeq (a, b);
+  dest = __builtin_vsx_xxpermdi (source1, c, 1);
+  *(__m128d *)d = dest;
+}


Reply via email to