[PATCH v2, rs6000 1/4] Fixes for x86 intrinsics on POWER 32bit

Paul Clarke Thu, 25 Oct 2018 12:08:11 -0700

Various clean-ups for 32bit support.

Implement various corrections in the compatibility implementations of the
x86 vector intrinsics found after enabling 32bit mode for the associated
test cases.  (Actual enablement coming in a subsequent patch.)


Bootstrapped and tested on Linux POWER8 LE, POWER8 BE (64 & 32), and POWER7.

OK for trunk?

v2: This patch is new in v2.

2018-10-25  Paul A. Clarke  <p...@us.ibm.com>

gcc/ChangeLog:

        * config/rs6000/mmintrin.h: Enable 32bit compilation.
        * config/rs6000/xmmintrin.h: Likewise.

Index: gcc/config/rs6000/mmintrin.h
===================================================================
diff --git a/trunk/gcc/config/rs6000/mmintrin.h 
b/trunk/gcc/config/rs6000/mmintrin.h
--- a/trunk/gcc/config/rs6000/mmintrin.h        (revision 265495)
+++ b/trunk/gcc/config/rs6000/mmintrin.h        (working copy)
@@ -112,7 +112,6 @@
   return _mm_cvtsi64_si32 (__i);
 }
 
-#ifdef __powerpc64__
 /* Convert I to a __m64 object.  */
 
 /* Intel intrinsic.  */
@@ -173,9 +172,9 @@
   __vector signed short vm1;
   __vector signed char vresult;
 
-  vm1 = (__vector signed short)__builtin_pack_vector_int128 (__m2, __m1);
+  vm1 = (__vector signed short) (__vector unsigned long long) { __m2, __m1 };
   vresult = vec_vpkshss (vm1, vm1);
-  return (__m64) __builtin_unpack_vector_int128 ((__vector __int128)vresult, 
0);
+  return (__m64) ((vector long long) vresult)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -193,9 +192,9 @@
   __vector signed int vm1;
   __vector signed short vresult;
 
-  vm1 = (__vector signed int)__builtin_pack_vector_int128 (__m2, __m1);
+  vm1 = (__vector signed int) (__vector unsigned long long) { __m2, __m1 };
   vresult = vec_vpkswss (vm1, vm1);
-  return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)vresult, 
0));
+  return (__m64) ((vector long long) vresult)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -213,9 +212,9 @@
   __vector signed short vm1;
   __vector unsigned char vresult;
 
-  vm1 = (__vector signed short)__builtin_pack_vector_int128 (__m2, __m1);
+  vm1 = (__vector signed short) (__vector unsigned long long) { __m2, __m1 };
   vresult = vec_vpkshus (vm1, vm1);
-  return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)vresult, 
0));
+  return (__m64) ((vector long long) vresult)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -236,7 +235,7 @@
   a = (__vector unsigned char)vec_splats (__m1);
   b = (__vector unsigned char)vec_splats (__m2);
   c = vec_mergel (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 #else
   __m64_union m1, m2, res;
 
@@ -317,7 +316,7 @@
   a = (__vector unsigned char)vec_splats (__m1);
   b = (__vector unsigned char)vec_splats (__m2);
   c = vec_mergel (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 1));
+  return (__m64) ((vector long long) c)[1];
 #else
   __m64_union m1, m2, res;
 
@@ -398,7 +397,7 @@
   a = (__vector signed char)vec_splats (__m1);
   b = (__vector signed char)vec_splats (__m2);
   c = vec_add (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 #else
   __m64_union m1, m2, res;
 
@@ -434,7 +433,7 @@
   a = (__vector signed short)vec_splats (__m1);
   b = (__vector signed short)vec_splats (__m2);
   c = vec_add (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 #else
   __m64_union m1, m2, res;
 
@@ -466,7 +465,7 @@
   a = (__vector signed int)vec_splats (__m1);
   b = (__vector signed int)vec_splats (__m2);
   c = vec_add (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 #else
   __m64_union m1, m2, res;
 
@@ -496,7 +495,7 @@
   a = (__vector signed char)vec_splats (__m1);
   b = (__vector signed char)vec_splats (__m2);
   c = vec_sub (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 #else
   __m64_union m1, m2, res;
 
@@ -532,7 +531,7 @@
   a = (__vector signed short)vec_splats (__m1);
   b = (__vector signed short)vec_splats (__m2);
   c = vec_sub (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 #else
   __m64_union m1, m2, res;
 
@@ -564,7 +563,7 @@
   a = (__vector signed int)vec_splats (__m1);
   b = (__vector signed int)vec_splats (__m2);
   c = vec_sub (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 #else
   __m64_union m1, m2, res;
 
@@ -754,7 +753,7 @@
   a = (__vector signed char)vec_splats (__m1);
   b = (__vector signed char)vec_splats (__m2);
   c = (__vector signed char)vec_cmpgt (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 #else
   __m64_union m1, m2, res;
 
@@ -791,7 +790,7 @@
   a = (__vector signed short)vec_splats (__m1);
   b = (__vector signed short)vec_splats (__m2);
   c = (__vector signed short)vec_cmpeq (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 #else
   __m64_union m1, m2, res;
 
@@ -822,7 +821,7 @@
   a = (__vector signed short)vec_splats (__m1);
   b = (__vector signed short)vec_splats (__m2);
   c = (__vector signed short)vec_cmpgt (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 #else
   __m64_union m1, m2, res;
 
@@ -855,7 +854,7 @@
   a = (__vector signed int)vec_splats (__m1);
   b = (__vector signed int)vec_splats (__m2);
   c = (__vector signed int)vec_cmpeq (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 #else
   __m64_union m1, m2, res;
 
@@ -884,7 +883,7 @@
   a = (__vector signed int)vec_splats (__m1);
   b = (__vector signed int)vec_splats (__m2);
   c = (__vector signed int)vec_cmpgt (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 #else
   __m64_union m1, m2, res;
 
@@ -915,7 +914,7 @@
   a = (__vector signed char)vec_splats (__m1);
   b = (__vector signed char)vec_splats (__m2);
   c = vec_adds (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -933,7 +932,7 @@
   a = (__vector signed short)vec_splats (__m1);
   b = (__vector signed short)vec_splats (__m2);
   c = vec_adds (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -951,7 +950,7 @@
   a = (__vector unsigned char)vec_splats (__m1);
   b = (__vector unsigned char)vec_splats (__m2);
   c = vec_adds (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -970,7 +969,7 @@
   a = (__vector unsigned short)vec_splats (__m1);
   b = (__vector unsigned short)vec_splats (__m2);
   c = vec_adds (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -989,7 +988,7 @@
   a = (__vector signed char)vec_splats (__m1);
   b = (__vector signed char)vec_splats (__m2);
   c = vec_subs (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -1008,7 +1007,7 @@
   a = (__vector signed short)vec_splats (__m1);
   b = (__vector signed short)vec_splats (__m2);
   c = vec_subs (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -1027,7 +1026,7 @@
   a = (__vector unsigned char)vec_splats (__m1);
   b = (__vector unsigned char)vec_splats (__m2);
   c = vec_subs (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -1046,7 +1045,7 @@
   a = (__vector unsigned short)vec_splats (__m1);
   b = (__vector unsigned short)vec_splats (__m2);
   c = vec_subs (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -1068,7 +1067,7 @@
   a = (__vector signed short)vec_splats (__m1);
   b = (__vector signed short)vec_splats (__m2);
   c = vec_vmsumshm (a, b, zero);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -1096,7 +1095,7 @@
   w1 = vec_vmulosh (a, b);
   c = (__vector signed short)vec_perm (w0, w1, xform1);
 
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -1115,7 +1114,7 @@
   a = (__vector signed short)vec_splats (__m1);
   b = (__vector signed short)vec_splats (__m2);
   c = a * b;
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -1136,7 +1135,7 @@
       m = (__vector signed short)vec_splats (__m);
       c = (__vector unsigned short)vec_splats ((unsigned short)__count);
       r = vec_sl (m, (__vector unsigned short)c);
-      return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
+      return (__m64) ((vector long long) r)[0];
     }
   else
   return (0);
@@ -1205,7 +1204,7 @@
        m = (__vector signed short)vec_splats (__m);
        c = (__vector unsigned short)vec_splats ((unsigned short)__count);
        r = vec_sra (m, (__vector unsigned short)c);
-       return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
+        return (__m64) ((vector long long) r)[0];
     }
   else
   return (0);
@@ -1274,7 +1273,7 @@
        m = (__vector unsigned short)vec_splats (__m);
        c = (__vector unsigned short)vec_splats ((unsigned short)__count);
        r = vec_sr (m, (__vector unsigned short)c);
-       return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
+        return (__m64) ((vector long long) r)[0];
     }
   else
     return (0);
@@ -1417,7 +1416,7 @@
   __vector signed short w;
 
   w = (__vector signed short)vec_splats (__w);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)w, 0));
+  return (__m64) ((vector long long) w)[0];
 #else
   __m64_union res;
 
@@ -1437,7 +1436,7 @@
   __vector signed char b;
 
   b = (__vector signed char)vec_splats (__b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)b, 0));
+  return (__m64) ((vector long long) b)[0];
 #else
   __m64_union res;
 
@@ -1452,5 +1451,4 @@
   return (res.as_m64);
 #endif
 }
-#endif /* __powerpc64__ */
 #endif /* _MMINTRIN_H_INCLUDED */
Index: gcc/config/rs6000/xmmintrin.h
===================================================================
diff --git a/trunk/gcc/config/rs6000/xmmintrin.h 
b/trunk/gcc/config/rs6000/xmmintrin.h
--- a/trunk/gcc/config/rs6000/xmmintrin.h       (revision 265495)
+++ b/trunk/gcc/config/rs6000/xmmintrin.h       (working copy)
@@ -996,7 +996,7 @@
   rounded = vec_rint(temp);
   result = (__vector unsigned long long) vec_cts (rounded, 0);
 
-  return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 
0));
+  return (__m64) ((vector long long) result)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -1053,7 +1053,7 @@
   temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
   result = (__vector unsigned long long) vec_cts (temp, 0);
 
-  return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 
0));
+  return (__m64) ((vector long long) result)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -1104,7 +1104,7 @@
   __vector signed int vm1;
   __vector float vf1;
 
-  vm1 = (__vector signed int) __builtin_pack_vector_int128 (__B, __B);
+  vm1 = (__vector signed int) (__vector unsigned long long) {__B, __B};
   vf1 = (__vector float) vec_ctf (vm1, 0);
 
   return ((__m128) (__vector unsigned long long)
@@ -1126,7 +1126,7 @@
   __vector signed int vi4;
   __vector float vf1;
 
-  vs8 = (__vector signed short) __builtin_pack_vector_int128 (__A, __A);
+  vs8 = (__vector signed short) (__vector unsigned long long) { __A, __A };
   vi4 = vec_vupklsh (vs8);
   vf1 = (__vector float) vec_ctf (vi4, 0);
 
@@ -1143,7 +1143,7 @@
   __vector unsigned int vi4;
   __vector float vf1;
 
-  vs8 = (__vector unsigned short) __builtin_pack_vector_int128 (__A, __A);
+  vs8 = (__vector unsigned short) (__vector unsigned long long) { __A, __A };
   vi4 = (__vector unsigned int) vec_vmrglh (vs8, zero);
   vf1 = (__vector float) vec_ctf (vi4, 0);
 
@@ -1159,7 +1159,7 @@
   __vector signed int vi4;
   __vector float vf1;
 
-  vc16 = (__vector signed char) __builtin_pack_vector_int128 (__A, __A);
+  vc16 = (__vector signed char) (__vector unsigned long long) { __A, __A };
   vs8 = vec_vupkhsb (vc16);
   vi4 = vec_vupkhsh (vs8);
   vf1 = (__vector float) vec_ctf (vi4, 0);
@@ -1179,7 +1179,7 @@
   __vector unsigned int vi4;
   __vector float vf1;
 
-  vc16 = (__vector unsigned char) __builtin_pack_vector_int128 (__A, __A);
+  vc16 = (__vector unsigned char) (__vector unsigned long long) { __A, __A };
   vs8 = (__vector unsigned short) vec_vmrglb (vc16, zero);
   vi4 = (__vector unsigned int) vec_vmrghh (vs8,
                                            (__vector unsigned short) zero);
@@ -1195,7 +1195,7 @@
   __vector signed int vi4;
   __vector float vf4;
 
-  vi4 = (__vector signed int) __builtin_pack_vector_int128 (__B, __A);
+  vi4 = (__vector signed int) (__vector unsigned long long) { __B, __A };
   vf4 = (__vector float) vec_ctf (vi4, 0);
   return (__m128) vf4;
 }
@@ -1212,7 +1212,7 @@
   temp = vec_cts (rounded, 0);
   result = (__vector unsigned long long) vec_pack (temp, temp);
 
-  return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 
0));
+  return (__m64) ((vector long long) result)[0];
 }
 
 /* Convert the four SPFP values in A to four signed 8-bit integers.  */
@@ -1224,15 +1224,12 @@
   static const __vector signed int zero = {0, 0, 0, 0};
   __vector signed short tmp_s;
   __vector signed char res_v;
-  __m64 result;
 
   rounded = vec_rint(__A);
   tmp_i = vec_cts (rounded, 0);
   tmp_s = vec_pack (tmp_i, zero);
   res_v = vec_pack (tmp_s, tmp_s);
-  result = (__m64) __builtin_unpack_vector_int128 ((__vector __int128)res_v, 
0);
-
-  return (result);
+  return (__m64) ((vector long long) res_v)[0];
 }
 
 /* Selects four specific SPFP values from A and B based on MASK.  */
@@ -1386,9 +1383,12 @@
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_extract_pi16 (__m64 const __A, int const __N)
 {
-  const int shiftr = (__N & 3) * 16;
+  unsigned int shiftr = __N & 3;
+#ifdef __BIG_ENDIAN__
+  shiftr = 3 - shiftr;
+#endif
 
-  return ((__A >> shiftr) & 0xffff);
+  return ((__A >> (shiftr * 16)) & 0xffff);
 }
 
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -1429,7 +1429,7 @@
   b = (__vector signed short)vec_splats (__B);
   c = (__vector __bool short)vec_cmpgt (a, b);
   r = vec_sel (b, a, c);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
+  return (__m64) ((vector long long) r)[0];
 #else
   __m64_union m1, m2, res;
 
@@ -1467,7 +1467,7 @@
   b = (__vector unsigned char)vec_splats (__B);
   c = (__vector __bool char)vec_cmpgt (a, b);
   r = vec_sel (b, a, c);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
+  return (__m64) ((vector long long) r)[0];
 #else
   __m64_union m1, m2, res;
   long i;
@@ -1503,7 +1503,7 @@
   b = (__vector signed short)vec_splats (__B);
   c = (__vector __bool short)vec_cmplt (a, b);
   r = vec_sel (b, a, c);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
+  return (__m64) ((vector long long) r)[0];
 #else
   __m64_union m1, m2, res;
 
@@ -1541,7 +1541,7 @@
   b = (__vector unsigned char)vec_splats (__B);
   c = (__vector __bool char)vec_cmplt (a, b);
   r = vec_sel (b, a, c);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
+  return (__m64) ((vector long long) r)[0];
 #else
   __m64_union m1, m2, res;
   long i;
@@ -1569,7 +1569,7 @@
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_movemask_pi8 (__m64 __A)
 {
-  unsigned long p = 0x0008101820283038UL; // permute control for sign bits
+  unsigned long long p = 0x0008101820283038UL; // permute control for sign bits
 
   return __builtin_bpermd (p, __A);
 }
@@ -1600,7 +1600,7 @@
   w1 = vec_vmulouh (a, b);
   c = (__vector unsigned short)vec_perm (w0, w1, xform1);
 
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -1643,7 +1643,7 @@
   p = vec_splats (t.as_m64);
   a = vec_splats (__A);
   r = vec_perm (a, a, (__vector unsigned char)p);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
+  return (__m64) ((vector long long) r)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -1683,7 +1683,7 @@
   a = (__vector unsigned char)vec_splats (__A);
   b = (__vector unsigned char)vec_splats (__B);
   c = vec_avg (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -1701,7 +1701,7 @@
   a = (__vector unsigned short)vec_splats (__A);
   b = (__vector unsigned short)vec_splats (__B);
   c = vec_avg (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 }
 
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
@@ -1723,8 +1723,8 @@
     { 0, 0, 0, 0 };
   unsigned short result;
 
-  a = (__vector unsigned char) __builtin_pack_vector_int128 (0UL, __A);
-  b = (__vector unsigned char) __builtin_pack_vector_int128 (0UL, __B);
+  a = (__vector unsigned char) (__vector unsigned long long) { 0UL, __A };
+  b = (__vector unsigned char) (__vector unsigned long long) { 0UL, __B };
   vmin = vec_min (a, b);
   vmax = vec_max (a, b);
   vabsdiff = vec_sub (vmax, vmin);

[PATCH v2, rs6000 1/4] Fixes for x86 intrinsics on POWER 32bit

Reply via email to