Re: [PATCH 2/2] libstdc++: Fix simd compilation with Clang

2023-03-21 Thread Jonathan Wakely via Gcc-patches
On Tue, 21 Mar 2023 at 09:24, Matthias Kretz via Libstdc++ <
libstd...@gcc.gnu.org> wrote:

>
>
> Clang fails to compile some constant expressions involving simd.
> Therefore, just disable this non-conforming extension for clang.
>
> Fix AVX512 blend implementation for Clang. It was converting the bitmask
> to bool before, which is obviously wrong. Instead use a Clang builtin to
> convert the bitmask to vector-mask before using a vector blend ?:. A
> similar change is required for the masked unary implementation, because
> the GCC builtins do not exist on Clang.
>
> Signed-off-by: Matthias Kretz 
>
> libstdc++-v3/ChangeLog:
>
> * include/experimental/bits/simd_detail.h: Don't declare the
> simd API as constexpr with Clang.
> * include/experimental/bits/simd_x86.h (__movm): New.
> (_S_blend_avx512): Resolve FIXME. Implement blend using __movm
> and ?:.
> (_SimdImplX86::_S_masked_unary): Clang does not implement the
> same builtins. Implement the function using __movm, ?:, and -
> operators on vector_size types instead.
>


+#if (defined __STRICT_ANSI__ && __STRICT_ANSI__) || defined __clang__

We don't generally are about -Wundef so this could be simplified to:

#if __STRICT_ANSI__ || defined __clang__

But it's OK as it is. OK for trunk.


[PATCH 2/2] libstdc++: Fix simd compilation with Clang

2023-03-21 Thread Matthias Kretz via Gcc-patches


Clang fails to compile some constant expressions involving simd.
Therefore, just disable this non-conforming extension for clang.

Fix AVX512 blend implementation for Clang. It was converting the bitmask
to bool before, which is obviously wrong. Instead use a Clang builtin to
convert the bitmask to vector-mask before using a vector blend ?:. A
similar change is required for the masked unary implementation, because
the GCC builtins do not exist on Clang.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_detail.h: Don't declare the
simd API as constexpr with Clang.
* include/experimental/bits/simd_x86.h (__movm): New.
(_S_blend_avx512): Resolve FIXME. Implement blend using __movm
and ?:.
(_SimdImplX86::_S_masked_unary): Clang does not implement the
same builtins. Implement the function using __movm, ?:, and -
operators on vector_size types instead.
---
 .../include/experimental/bits/simd_detail.h   |  2 +-
 .../include/experimental/bits/simd_x86.h  | 59 +--
 2 files changed, 56 insertions(+), 5 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_detail.h b/libstdc++-v3/include/experimental/bits/simd_detail.h
index 30cc1ef0eef..f3745bf3e4c 100644
--- a/libstdc++-v3/include/experimental/bits/simd_detail.h
+++ b/libstdc++-v3/include/experimental/bits/simd_detail.h
@@ -267,7 +267,7 @@ namespace experimental
 #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
 #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
 
-#if defined __STRICT_ANSI__ && __STRICT_ANSI__
+#if (defined __STRICT_ANSI__ && __STRICT_ANSI__) || defined __clang__
 #define _GLIBCXX_SIMD_CONSTEXPR
 #define _GLIBCXX_SIMD_USE_CONSTEXPR_API const
 #else
diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 608918542c6..165738c4e2c 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -363,6 +363,53 @@ __maskload_pd(const double* __ptr, _Tp __k)
 
 // }}}
 
+#ifdef __clang__
+template 
+  _GLIBCXX_SIMD_INTRINSIC constexpr auto
+  __movm(_Kp __k) noexcept
+  {
+static_assert(is_unsigned_v<_Kp>);
+if constexpr (sizeof(_Tp) == 1 && __have_avx512bw)
+  {
+	if constexpr (_Np <= 16 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2b128(__k);
+	else if constexpr (_Np <= 32 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2b256(__k);
+	else
+	  return __builtin_ia32_cvtmask2b512(__k);
+  }
+else if constexpr (sizeof(_Tp) == 2 && __have_avx512bw)
+  {
+	if constexpr (_Np <= 8 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2w128(__k);
+	else if constexpr (_Np <= 16 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2w256(__k);
+	else
+	  return __builtin_ia32_cvtmask2w512(__k);
+  }
+else if constexpr (sizeof(_Tp) == 4 && __have_avx512dq)
+  {
+	if constexpr (_Np <= 4 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2d128(__k);
+	else if constexpr (_Np <= 8 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2d256(__k);
+	else
+	  return __builtin_ia32_cvtmask2d512(__k);
+  }
+else if constexpr (sizeof(_Tp) == 8 && __have_avx512dq)
+  {
+	if constexpr (_Np <= 2 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2q128(__k);
+	else if constexpr (_Np <= 4 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2q256(__k);
+	else
+	  return __builtin_ia32_cvtmask2q512(__k);
+  }
+else
+  __assert_unreachable<_Tp>();
+  }
+#endif // __clang__
+
 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
 #include "simd_x86_conversions.h"
 #endif
@@ -619,14 +666,13 @@ _pdep_u32(
 _GLIBCXX_SIMD_INTRINSIC static _TV
 _S_blend_avx512(const _Kp __k, const _TV __a, const _TV __b) noexcept
 {
-#ifdef __clang__
-  // FIXME: this does a boolean choice, not a blend
-  return __k ? __a : __b;
-#else
   static_assert(__is_vector_type_v<_TV>);
   using _Tp = typename _VectorTraits<_TV>::value_type;
   static_assert(sizeof(_TV) >= 16);
   static_assert(sizeof(_Tp) <= 8);
+#ifdef __clang__
+  return __movm<_VectorTraits<_TV>::_S_full_size, _Tp>(__k) ? __b : __a;
+#else
   using _IntT
 	= conditional_t<(sizeof(_Tp) > 2),
 			conditional_t,
@@ -3483,6 +3529,10 @@ _S_masked_unary(const _SimdWrapper<_K, _Np> __k, const _SimdWrapper<_Tp, _Np> __
 	// optimize masked unary increment and decrement as masked sub +/-1
 	constexpr int __pm_one
 	  = is_same_v<_Op, __increment> ? -1 : 1;
+#ifdef __clang__
+	return __vector_bitcast<_Tp, _Np>(__movm<_Np,