Re: [PATCH v2] libstdc++: add ARM SVE support to std::experimental::simd

2024-03-27 Thread Matthias Kretz
On Wednesday, 27 March 2024 10:50:41 CET Jonathan Wakely wrote:
> As discussed on IRC, please push the revised patch with your
> suggestions incorporated (and post to the lists for posterity).

The patch as pushed is attached.

-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Center for Heavy Ion Research   https://gsi.de
 std::simd
──
commit 9ac3119fec81fb64d11dee8f853145f937389366
Author: Srinivas Yadav Singanaboina 
Date:   Sat Mar 16 19:04:35 2024 +

libstdc++: add ARM SVE support to std::experimental::simd

libstdc++-v3/ChangeLog:

* include/Makefile.am: Add simd_sve.h.
* include/Makefile.in: Add simd_sve.h.
* include/experimental/bits/simd.h: Add new SveAbi.
* include/experimental/bits/simd_builtin.h: Use
__no_sve_deduce_t to support existing Neon Abi.
* include/experimental/bits/simd_converter.h: Convert
sequentially when sve is available.
* include/experimental/bits/simd_detail.h: Define sve
specific macro.
* include/experimental/bits/simd_math.h: Fallback frexp
to execute sequntially when sve is available, to handle
fixed_size_simd return type that always uses sve.
* include/experimental/simd: Include bits/simd_sve.h.
* testsuite/experimental/simd/tests/bits/main.h: Enable
testing for sve128, sve256, sve512.
* include/experimental/bits/simd_sve.h: New file.

Signed-off-by: Srinivas Yadav Singanaboina 

diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am
index cb902de36ae..422a0f4bd0a 100644
--- a/libstdc++-v3/include/Makefile.am
+++ b/libstdc++-v3/include/Makefile.am
@@ -835,6 +835,7 @@ experimental_bits_headers = \
 	${experimental_bits_srcdir}/simd_neon.h \
 	${experimental_bits_srcdir}/simd_ppc.h \
 	${experimental_bits_srcdir}/simd_scalar.h \
+	${experimental_bits_srcdir}/simd_sve.h \
 	${experimental_bits_srcdir}/simd_x86.h \
 	${experimental_bits_srcdir}/simd_x86_conversions.h \
 	${experimental_bits_srcdir}/string_view.tcc \
diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in
index 9357087acb4..9fd4ab4848c 100644
--- a/libstdc++-v3/include/Makefile.in
+++ b/libstdc++-v3/include/Makefile.in
@@ -1181,6 +1181,7 @@ experimental_bits_headers = \
 	${experimental_bits_srcdir}/simd_neon.h \
 	${experimental_bits_srcdir}/simd_ppc.h \
 	${experimental_bits_srcdir}/simd_scalar.h \
+	${experimental_bits_srcdir}/simd_sve.h \
 	${experimental_bits_srcdir}/simd_x86.h \
 	${experimental_bits_srcdir}/simd_x86_conversions.h \
 	${experimental_bits_srcdir}/string_view.tcc \
diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 974377c6798..03c2e17a326 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -39,12 +39,16 @@
 #include 
 #include 
 #include 
+#include 
 
 #if _GLIBCXX_SIMD_X86INTRIN
 #include 
 #elif _GLIBCXX_SIMD_HAVE_NEON
 #include 
 #endif
+#if _GLIBCXX_SIMD_HAVE_SVE
+#include 
+#endif
 
 /** @ingroup ts_simd
  * @{
@@ -83,6 +87,12 @@
 using __m512i [[__gnu__::__vector_size__(64)]] = long long;
 #endif
 
+#if _GLIBCXX_SIMD_HAVE_SVE
+constexpr inline int __sve_vectorized_size_bytes = __ARM_FEATURE_SVE_BITS / 8;
+#else
+constexpr inline int __sve_vectorized_size_bytes = 0;
+#endif
+
 namespace simd_abi {
 // simd_abi forward declarations {{{
 // implementation details:
@@ -108,6 +118,9 @@ struct _VecBuiltin
 template 
   struct _VecBltnBtmsk;
 
+template 
+  struct _SveAbi;
+
 template 
   using _VecN = _VecBuiltin;
 
@@ -123,6 +136,9 @@ struct _VecBltnBtmsk
 template 
   using _Neon = _VecBuiltin<_UsedBytes>;
 
+template 
+  using _Sve = _SveAbi<_UsedBytes, __sve_vectorized_size_bytes>;
+
 // implementation-defined:
 using __sse = _Sse<>;
 using __avx = _Avx<>;
@@ -130,6 +146,7 @@ struct _VecBltnBtmsk
 using __neon = _Neon<>;
 using __neon128 = _Neon<16>;
 using __neon64 = _Neon<8>;
+using __sve = _Sve<>;
 
 // standard:
 template 
@@ -250,6 +267,9 @@ _S_apply(_Up* __ptr)
   false;
 #endif
 
+constexpr inline bool __have_sve = _GLIBCXX_SIMD_HAVE_SVE;
+constexpr inline bool __have_sve2 = _GLIBCXX_SIMD_HAVE_SVE2;
+
 #ifdef _ARCH_PWR10
 constexpr inline bool __have_power10vec = true;
 #else
@@ -356,12 +376,14 @@ __machine_flags()
 		 | (__have_avx512vnni << 27)
 		 | (__have_avx512vpopcntdq<< 28)
 		 | (__have_avx512vp2intersect << 29);
-else if constexpr (__have_neon)
+else if constexpr (__have_neon || __have_sve)
   return __have_neon
 	   | (__have_neon_a32 << 1)
 	   | (__have_neon_a64 << 2)
 	   | (__have_neon_a64 << 2)
-	   | (__support_neon_float << 3);
+	   

Re: [PATCH v2] libstdc++: add ARM SVE support to std::experimental::simd

2024-03-27 Thread Jonathan Wakely
On Wed, 27 Mar 2024 at 12:13, Richard Sandiford
 wrote:
>
> Matthias Kretz  writes:
> > On Wednesday, 27 March 2024 11:07:14 CET Richard Sandiford wrote:
> >> I'm still worried about:
> >>
> >>   #if _GLIBCXX_SIMD_HAVE_SVE
> >>   constexpr inline int __sve_vectorized_size_bytes = __ARM_FEATURE_SVE_BITS
> >> / 8; #else
> >>   constexpr inline int __sve_vectorized_size_bytes = 0;
> >>   #endif
> >>
> >> and the direct use __ARM_FEATURE_SVE_BITS elsewhere, for the reasons
> >> discussed here (including possible ODR problems):
> >>
> >>   https://gcc.gnu.org/pipermail/gcc-patches/2023-December/640037.html
> >>   https://gcc.gnu.org/pipermail/gcc-patches/2024-January/643734.html
> >>
> >> Logically the vector length should be a template parameter rather than
> >> an invariant.  Has this been resolved?  If not, it feels like a blocker
> >> to me (sorry).
> >
> > The vector length is always a template parameter to all user-facing API. 
> > Some
> > examples
> >
> > 1. on aarch64 the following is independent of SVE flags (and status quo):
> >
> >   simd is an alias for
> >   simd
> >
> >   fixed_size_simd is supposed to be ABI-stable anyway (passed via
> >   the stack, alignof == sizeof).
> >
> > 2. with -msve-vector-bits=512:
> >
> >   native_simd is an alias for
> >   simd>
> >
> >   simd> is an alias for
> >   simd>
> >
> > 3. with -msve-vector-bits=256:
> >
> >   native_simd is an alias for
> >   simd>
> >
> >   simd> is an alias for
> >   simd>
> >
> > Implementation functions are either [[gnu::always_inline]] or tagged with 
> > the
> > ABI tag type and the __odr_helper template argument (to ensure not-inlined
> > inline functions have unique names).
>
> Ah, thanks for the explanation.  I think the global native_float alias
> is problematic for reasons that you touched on in your later message.
> I'll reply more about that there.  But in other respects this looks good.
>
> > Does that make __ARM_FEATURE_SVE_BITS usage indirect enough?
>
> In principle, the only use of __ARM_FEATURE_SVE_BITS should be to determine
> the definition of native_simd (with the caveats above).  But current
> GCC restrictions might make that impractical.
>
> > Also for context, please consider that this is std::*experimental*::simd. 
> > The
> > underlying ISO document will likely get retracted at some point and the 
> > whole
> > API and implementation (hopefully) superseded by C++26. The main purpose of
> > the spec and implementation is to gather experience.
>
> Ah, ok.  If this is a deliberate experiment for evidence-gathering
> purposes, rather than a long-term commitment, then I agree the barrier
> should be lower.

Yes, that's definitely what this code is for. The more feedback and
impl-experience we can get now with the std::experimental::simd
version, the better std::simd will be when that happens.

In practice, we probably won't ever actually remove the
 header even when the experiment is over (e.g. we
still have  with std::tr1::shared_ptr!), but we are likely
to consider it unmaintained and deprecated once it's superseded by
std::simd.

> So yeah, I'll withdraw my objection.  I've no problem with this going
> into GCC 14 on the basis above.  Thanks again to you and Srinivas for
> working on this.
>
> Richard
>



Re: [PATCH v2] libstdc++: add ARM SVE support to std::experimental::simd

2024-03-27 Thread Richard Sandiford
Matthias Kretz  writes:
> On Wednesday, 27 March 2024 11:07:14 CET Richard Sandiford wrote:
>> I'm still worried about:
>> 
>>   #if _GLIBCXX_SIMD_HAVE_SVE
>>   constexpr inline int __sve_vectorized_size_bytes = __ARM_FEATURE_SVE_BITS
>> / 8; #else
>>   constexpr inline int __sve_vectorized_size_bytes = 0;
>>   #endif
>> 
>> and the direct use __ARM_FEATURE_SVE_BITS elsewhere, for the reasons
>> discussed here (including possible ODR problems):
>> 
>>   https://gcc.gnu.org/pipermail/gcc-patches/2023-December/640037.html
>>   https://gcc.gnu.org/pipermail/gcc-patches/2024-January/643734.html
>> 
>> Logically the vector length should be a template parameter rather than
>> an invariant.  Has this been resolved?  If not, it feels like a blocker
>> to me (sorry).
>
> The vector length is always a template parameter to all user-facing API. Some 
> examples
>
> 1. on aarch64 the following is independent of SVE flags (and status quo):
>
>   simd is an alias for
>   simd
>
>   fixed_size_simd is supposed to be ABI-stable anyway (passed via
>   the stack, alignof == sizeof).
>
> 2. with -msve-vector-bits=512:
>
>   native_simd is an alias for
>   simd>
>
>   simd> is an alias for
>   simd>
>
> 3. with -msve-vector-bits=256: 
>
>   native_simd is an alias for
>   simd>
>
>   simd> is an alias for
>   simd>
>
> Implementation functions are either [[gnu::always_inline]] or tagged with the 
> ABI tag type and the __odr_helper template argument (to ensure not-inlined 
> inline functions have unique names).

Ah, thanks for the explanation.  I think the global native_float alias
is problematic for reasons that you touched on in your later message.
I'll reply more about that there.  But in other respects this looks good.

> Does that make __ARM_FEATURE_SVE_BITS usage indirect enough?

In principle, the only use of __ARM_FEATURE_SVE_BITS should be to determine
the definition of native_simd (with the caveats above).  But current
GCC restrictions might make that impractical.

> Also for context, please consider that this is std::*experimental*::simd. The 
> underlying ISO document will likely get retracted at some point and the whole 
> API and implementation (hopefully) superseded by C++26. The main purpose of 
> the spec and implementation is to gather experience.

Ah, ok.  If this is a deliberate experiment for evidence-gathering
purposes, rather than a long-term commitment, then I agree the barrier
should be lower.

So yeah, I'll withdraw my objection.  I've no problem with this going
into GCC 14 on the basis above.  Thanks again to you and Srinivas for
working on this.

Richard


Re: [PATCH v2] libstdc++: add ARM SVE support to std::experimental::simd

2024-03-27 Thread Matthias Kretz
On Wednesday, 27 March 2024 11:07:14 CET Richard Sandiford wrote:
> I'm still worried about:
> 
>   #if _GLIBCXX_SIMD_HAVE_SVE
>   constexpr inline int __sve_vectorized_size_bytes = __ARM_FEATURE_SVE_BITS
> / 8; #else
>   constexpr inline int __sve_vectorized_size_bytes = 0;
>   #endif
> 
> and the direct use __ARM_FEATURE_SVE_BITS elsewhere, for the reasons
> discussed here (including possible ODR problems):
> 
>   https://gcc.gnu.org/pipermail/gcc-patches/2023-December/640037.html
>   https://gcc.gnu.org/pipermail/gcc-patches/2024-January/643734.html
> 
> Logically the vector length should be a template parameter rather than
> an invariant.  Has this been resolved?  If not, it feels like a blocker
> to me (sorry).

The vector length is always a template parameter to all user-facing API. Some 
examples

1. on aarch64 the following is independent of SVE flags (and status quo):

  simd is an alias for
  simd

  fixed_size_simd is supposed to be ABI-stable anyway (passed via
  the stack, alignof == sizeof).

2. with -msve-vector-bits=512:

  native_simd is an alias for
  simd>

  simd> is an alias for
  simd>

3. with -msve-vector-bits=256: 

  native_simd is an alias for
  simd>

  simd> is an alias for
  simd>

Implementation functions are either [[gnu::always_inline]] or tagged with the 
ABI tag type and the __odr_helper template argument (to ensure not-inlined 
inline functions have unique names).

Does that make __ARM_FEATURE_SVE_BITS usage indirect enough?

Also for context, please consider that this is std::*experimental*::simd. The 
underlying ISO document will likely get retracted at some point and the whole 
API and implementation (hopefully) superseded by C++26. The main purpose of 
the spec and implementation is to gather experience.

Best,
  Matthias

-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Center for Heavy Ion Research   https://gsi.de
 std::simd
──


signature.asc
Description: This is a digitally signed message part.


Re: [PATCH v2] libstdc++: add ARM SVE support to std::experimental::simd

2024-03-27 Thread Richard Sandiford
Jonathan Wakely  writes:
> On Fri, 8 Mar 2024 at 09:58, Matthias Kretz wrote:
>>
>> Hi,
>>
>> I applied and did extended testing on x86_64 (no regressions) and aarch64
>> using qemu testing SVE 256, 512, and 1024. Looks good!
>>
>> While going through the applied patch I noticed a few style issues that I
>> simply turned into a patch (attached).
>>
> [...]
>>
>> From my side, with the noted changes the patch is ready for merging.
>> @Jonathan, any chance for a green light before GCC 14.1?
>
> As discussed on IRC, please push the revised patch with your
> suggestions incorporated (and post to the lists for posterity).
>
> Thanks, everybody, for the patches and the thorough review.

I'm still worried about:

  #if _GLIBCXX_SIMD_HAVE_SVE
  constexpr inline int __sve_vectorized_size_bytes = __ARM_FEATURE_SVE_BITS / 8;
  #else
  constexpr inline int __sve_vectorized_size_bytes = 0;
  #endif

and the direct use __ARM_FEATURE_SVE_BITS elsewhere, for the reasons
discussed here (including possible ODR problems):

  https://gcc.gnu.org/pipermail/gcc-patches/2023-December/640037.html
  https://gcc.gnu.org/pipermail/gcc-patches/2024-January/643734.html

Logically the vector length should be a template parameter rather than
an invariant.  Has this been resolved?  If not, it feels like a blocker
to me (sorry).

Thanks,
Richard


Re: [PATCH v2] libstdc++: add ARM SVE support to std::experimental::simd

2024-03-27 Thread Jonathan Wakely
On Fri, 8 Mar 2024 at 09:58, Matthias Kretz wrote:
>
> Hi,
>
> I applied and did extended testing on x86_64 (no regressions) and aarch64
> using qemu testing SVE 256, 512, and 1024. Looks good!
>
> While going through the applied patch I noticed a few style issues that I
> simply turned into a patch (attached).
>
[...]
>
> From my side, with the noted changes the patch is ready for merging.
> @Jonathan, any chance for a green light before GCC 14.1?

As discussed on IRC, please push the revised patch with your
suggestions incorporated (and post to the lists for posterity).

Thanks, everybody, for the patches and the thorough review.



Re: [PATCH v2] libstdc++: add ARM SVE support to std::experimental::simd

2024-03-08 Thread Matthias Kretz
Hi,

I applied and did extended testing on x86_64 (no regressions) and aarch64 
using qemu testing SVE 256, 512, and 1024. Looks good!

While going through the applied patch I noticed a few style issues that I 
simply turned into a patch (attached).

A few comments inline. Sorry for not seeing these before.

On Friday, 9 February 2024 15:28:10 CET Srinivas Yadav Singanaboina wrote:
> diff --git a/libstdc++-v3/include/experimental/bits/simd.h
> b/libstdc++-v3/include/experimental/bits/simd.h index
> 90523ea57dc..d274cd740fe 100644
> --- a/libstdc++-v3/include/experimental/bits/simd.h
> +++ b/libstdc++-v3/include/experimental/bits/simd.h
> @@ -39,12 +39,16 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  #if _GLIBCXX_SIMD_X86INTRIN
>  #include 
>  #elif _GLIBCXX_SIMD_HAVE_NEON
>  #include 
>  #endif
> +#if _GLIBCXX_SIMD_HAVE_SVE
> +#include 
> +#endif
> 
>  /** @ingroup ts_simd
>   * @{
> @@ -83,6 +87,12 @@ using __m512d [[__gnu__::__vector_size__(64)]] = double;
>  using __m512i [[__gnu__::__vector_size__(64)]] = long long;
>  #endif
> 
> +#if _GLIBCXX_SIMD_HAVE_SVE
> +constexpr inline int __sve_vectorized_size_bytes = __ARM_FEATURE_SVE_BITS /
> 8; +#else
> +constexpr inline int __sve_vectorized_size_bytes = 0;
> +#endif
> +
>  namespace simd_abi {
>  // simd_abi forward declarations {{{
>  // implementation details:
> @@ -108,6 +118,9 @@ template 
>  template 
>struct _VecBltnBtmsk;
> 
> +template 
> +  struct _SveAbi;
> +
>  template 
>using _VecN = _VecBuiltin;
> 
> @@ -123,6 +136,9 @@ template 
>  template 
>using _Neon = _VecBuiltin<_UsedBytes>;
> 
> +template 
> +  using _Sve = _SveAbi<_UsedBytes, __sve_vectorized_size_bytes>;
> +
>  // implementation-defined:
>  using __sse = _Sse<>;
>  using __avx = _Avx<>;
> @@ -130,6 +146,7 @@ using __avx512 = _Avx512<>;
>  using __neon = _Neon<>;
>  using __neon128 = _Neon<16>;
>  using __neon64 = _Neon<8>;
> +using __sve = _Sve<>;
> 
>  // standard:
>  template 
> @@ -250,6 +267,8 @@ constexpr inline bool __support_neon_float =
>false;
>  #endif
> 
> +constexpr inline bool __have_sve = _GLIBCXX_SIMD_HAVE_SVE;
> +
>  #ifdef _ARCH_PWR10
>  constexpr inline bool __have_power10vec = true;
>  #else
> @@ -356,12 +375,13 @@ namespace __detail
> 
>| (__have_avx512vnni << 27)
>| (__have_avx512vpopcntdq<< 28)
>| (__have_avx512vp2intersect << 29);
> 
> -else if constexpr (__have_neon)
> +else if constexpr (__have_neon || __have_sve)
>return __have_neon
> 
>  | (__have_neon_a32 << 1)
>  | (__have_neon_a64 << 2)
>  | (__have_neon_a64 << 2)
> 
> -| (__support_neon_float << 3);
> +| (__support_neon_float << 3)
> + | (__have_sve << 4);

This is not enough. This should list all feature flags that might have a 
(significant enough) influence on code-gen in inline functions (that are not 
always_inline). AFAIU at least __ARM_FEATURE_SVE2 is necessary. But I assume 
__ARM_FEATURE_SVE2_BITPERM, __ARM_FEATURE_SVE_BITS, 
__ARM_FEATURE_SVE_MATMUL_INT8, and __ARM_FEATURE_SVE_VECTOR_OPERATORS are also 
relevant. Maybe more?

> [...]
bits/simd.h:

>  // fall back to fixed_size only if scalar and native ABIs don't match
>  template 
>struct __deduce_fixed_size_fallback {};
> 
> +template 
> +  struct __no_sve_deduce_fixed_size_fallback {};
> +
>  template 
>struct __deduce_fixed_size_fallback<_Tp, _Np,
>  enable_if_t::template _S_is_valid_v<_Tp>>>
>{ using type = simd_abi::fixed_size<_Np>; };
> 
> +template 
> +  struct __no_sve_deduce_fixed_size_fallback<_Tp, _Np,
> +enable_if_t::template _S_is_valid_v<_Tp>>>
> +  { using type = simd_abi::fixed_size<_Np>; };
> +
>  template 
>struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {};
> 
> +template 
> +  struct __no_sve_deduce_impl : public
> __no_sve_deduce_fixed_size_fallback<_Tp, _Np> {};

I believe you don't need __no_sve_deduce_fixed_size_fallback. Simply derive 
__no_sve_deduce_impl from __deduce_fixed_size_fallback. No?


> diff --git a/libstdc++-v3/include/experimental/bits/simd_converter.h
> b/libstdc++-v3/include/experimental/bits/simd_converter.h index
> 3160e251632..b233d2c70a5 100644
> --- a/libstdc++-v3/include/experimental/bits/simd_converter.h
> +++ b/libstdc++-v3/include/experimental/bits/simd_converter.h
> @@ -28,6 +28,18 @@
>  #if __cplusplus >= 201703L
> 
>  _GLIBCXX_SIMD_BEGIN_NAMESPACE
> +
> +template 
> +_Ret __converter_fallback(_Arg __a)
> +  {
> +  _Ret __ret{};
> +  __execute_n_times<_Np>(
> +  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
> +__ret._M_set(__i, static_cast<_To>(__a[__i]));
> +});
> +  return __ret;
> +  }
> +
>  // _SimdConverter scalar -> scalar {{{
>  template 
>struct _SimdConverter<_From, simd_abi::scalar, _To, simd_abi::scalar,
> @@ -56,14 +68,16 @@ template 
>};
> 
>  // }}}
> -// _SimdConverter "native 1" -> "native 2" {{{
> +// _SimdConverter "native 

[PATCH v2] libstdc++: add ARM SVE support to std::experimental::simd

2024-02-09 Thread Srinivas Yadav Singanaboina
Hi,

Thanks for review @Richard!. I have tried to address most of your comments in 
this patch.
The major updates include optimizing operator[] for masks, find_first_set and 
find_last_set.

My further comments on some of the pointed out issues are
a. regarding the coverage of types supported for sve : Yes, all the types are 
covered by 
mapping any type using simple two rules : the size of the type and signedness 
of it.
b. all the operator overloads now use infix operators. For division and 
remainder, 
the inactive elements are padded with 1 to avoid undefined behavior.
c. isnan is optimized to have only two cases i.e finite_math_only case or case 
where svcmpuo is used.
d. _S_load for masks (bool) now uses svld1 by reinterpret_casting the pointer 
to uint8_t pointer and then performing a svunpklo.
The same optimization is not done for masked_load and stores, as conversion of 
mask from a higher size type to lower size type is not optimal (sequential).
e. _S_unary_minus could not use svneg_x because it does not support unsigned 
types.
f. added specializations for reductions.
g. find_first_set and find_last_set are optimized using svclastb.


libstdc++-v3/ChangeLog:

* include/Makefile.am: Add simd_sve.h.
* include/Makefile.in: Add simd_sve.h.
* include/experimental/bits/simd.h: Add new SveAbi.
* include/experimental/bits/simd_builtin.h: Use
  __no_sve_deduce_t to support existing Neon Abi.
* include/experimental/bits/simd_converter.h: Convert
  sequentially when sve is available.
* include/experimental/bits/simd_detail.h: Define sve
  specific macro.
* include/experimental/bits/simd_math.h: Fallback frexp
  to execute sequntially when sve is available, to handle
  fixed_size_simd return type that always uses sve.
* include/experimental/simd: Include bits/simd_sve.h.
* testsuite/experimental/simd/tests/bits/main.h: Enable
  testing for sve128, sve256, sve512.
* include/experimental/bits/simd_sve.h: New file.

 Signed-off-by: Srinivas Yadav Singanaboina
 vasu.srinivasvasu...@gmail.com
---
 libstdc++-v3/include/Makefile.am  |1 +
 libstdc++-v3/include/Makefile.in  |1 +
 libstdc++-v3/include/experimental/bits/simd.h |  131 +-
 .../include/experimental/bits/simd_builtin.h  |   35 +-
 .../experimental/bits/simd_converter.h|   57 +-
 .../include/experimental/bits/simd_detail.h   |7 +-
 .../include/experimental/bits/simd_math.h |   14 +-
 .../include/experimental/bits/simd_sve.h  | 1863 +
 libstdc++-v3/include/experimental/simd|3 +
 .../experimental/simd/tests/bits/main.h   |3 +
 10 files changed, 2084 insertions(+), 31 deletions(-)
 create mode 100644 libstdc++-v3/include/experimental/bits/simd_sve.h

diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am
index 6209f390e08..1170cb047a6 100644
--- a/libstdc++-v3/include/Makefile.am
+++ b/libstdc++-v3/include/Makefile.am
@@ -826,6 +826,7 @@ experimental_bits_headers = \
${experimental_bits_srcdir}/simd_neon.h \
${experimental_bits_srcdir}/simd_ppc.h \
${experimental_bits_srcdir}/simd_scalar.h \
+   ${experimental_bits_srcdir}/simd_sve.h \
${experimental_bits_srcdir}/simd_x86.h \
${experimental_bits_srcdir}/simd_x86_conversions.h \
${experimental_bits_srcdir}/string_view.tcc \
diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in
index 596fa0d2390..bc44582a2da 100644
--- a/libstdc++-v3/include/Makefile.in
+++ b/libstdc++-v3/include/Makefile.in
@@ -1172,6 +1172,7 @@ experimental_bits_headers = \
${experimental_bits_srcdir}/simd_neon.h \
${experimental_bits_srcdir}/simd_ppc.h \
${experimental_bits_srcdir}/simd_scalar.h \
+   ${experimental_bits_srcdir}/simd_sve.h \
${experimental_bits_srcdir}/simd_x86.h \
${experimental_bits_srcdir}/simd_x86_conversions.h \
${experimental_bits_srcdir}/string_view.tcc \
diff --git a/libstdc++-v3/include/experimental/bits/simd.h 
b/libstdc++-v3/include/experimental/bits/simd.h
index 90523ea57dc..d274cd740fe 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -39,12 +39,16 @@
 #include 
 #include 
 #include 
+#include 
 
 #if _GLIBCXX_SIMD_X86INTRIN
 #include 
 #elif _GLIBCXX_SIMD_HAVE_NEON
 #include 
 #endif
+#if _GLIBCXX_SIMD_HAVE_SVE
+#include 
+#endif
 
 /** @ingroup ts_simd
  * @{
@@ -83,6 +87,12 @@ using __m512d [[__gnu__::__vector_size__(64)]] = double;
 using __m512i [[__gnu__::__vector_size__(64)]] = long long;
 #endif
 
+#if _GLIBCXX_SIMD_HAVE_SVE
+constexpr inline int __sve_vectorized_size_bytes = __ARM_FEATURE_SVE_BITS / 8;
+#else
+constexpr inline int __sve_vectorized_size_bytes = 0;
+#endif 
+
 namespace simd_abi {
 // simd_abi forward declarations {{{
 // implementation details:
@@ -108,6