On Wed, Dec 17, 2025 at 04:30:17AM -0500, Nathan Myers wrote:
> > So
> >    constexpr _RealT __Rk = _RealT(_UInt(1) << (__log2_Rk - 1)) * 2.0;
> > can be done in that case even using unsigned long long type.
> 
> Shifting uint64_t(1) left 64 places, as would occur for
> __d == 64 and _RealT=long double (with its 64-bit mantissa)
> is UB and ill-formed in constexpr context...

But if __log2_Rk is at most 64, then the above can still be done in
uint64_t.

> Emulating 128-bit integer operations seems like heroics unless C++
> is not expected ever to get __int128. The intention was to support

I think __int128 support is very unlikely coming for 32-bit targets,
both for C and C++.  First of all, each target would need to define
ABI for it (what alignment it has, how it is passed, how it is returned).
As shown with _BitInt, that takes years even after getting it standardized.
_BitInt got into C23, and in GCC 14/15 we got support only for 3 targets
(x86_64, i686, aarch64) and only in GCC 16 we are getting a few others
(s390x, loongarch, maybe riscv, maybe arm 32-bit).
I think getting
https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2025/p3666r2.html
in would be easier (but see above, it will take a few further years before
all target maintains do something).

> 128-bit floating-point formats, with mantissa >= 106 bits, in places
> where users have them. Does it seem likely a quad-precision floating
> point type would be implemented, but not __int128?

That is a very common thing, std::float128_t is supported on many 32-bit
targets.

For __generate_canonical_pow2 I think something like untested class
below could do the job.
__generate_canonical_any would be harder, that needs
multiplication/division, and from what I can see, all this stuff is compiled
with C++11 as well and so for constexpr we are limited by the C++11 return
stmt only requirement.

--- libstdc++-v3/include/bits/random.tcc.jj     2025-12-17 09:16:43.300578917 
+0100
+++ libstdc++-v3/include/bits/random.tcc        2025-12-17 10:58:32.541786465 
+0100
@@ -3481,6 +3481,64 @@ namespace __detail
 #pragma GCC diagnostic ignored "-Wc++14-extensions" // for variable templates
 #pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr
 
+#ifndef __SIZEOF_INT128__
+  namespace __detail
+  {
+    struct __rng_uint128 {
+      uint64_t _M_l, _M_h;
+
+      constexpr __rng_uint128(uint64_t __x) noexcept
+      : _M_l (__x), _M_h (0) {}
+
+      constexpr __rng_uint128(uint64_t __h, uint64_t __l) noexcept
+      : _M_l (__l), _M_h (__h) {}
+
+      template<typename _RealT>
+      constexpr operator _RealT() noexcept {
+       return _M_h == 0 ? _RealT(_M_l)
+                        : _RealT(_M_h) * _RealT(18446744073709551616.0)
+                          + _RealT(_M_l);
+      }
+    };
+
+    constexpr __rng_uint128
+    operator+(const __rng_uint128& __x, const __rng_uint128& __y) noexcept
+    {
+      return __rng_uint128(__x._M_h + __y._M_h
+                          + (__x._M_l + __y._M_l < __x._M_l),
+                          __x._M_l + __y._M_l);
+    }
+
+    constexpr __rng_uint128
+    operator<<(const __rng_uint128& __x, size_t __y) noexcept
+    {
+      return (__y >= 64
+             ? __rng_uint128(__x._M_l << (__y - 64), 0)
+             : __y
+             ? __rng_uint128((__x._M_h << __y) | (__x._M_l >> (64 - __y)),
+                             __x._M_l << __y)
+             : __x);
+    }
+
+    constexpr __rng_uint128
+    operator>>(const __rng_uint128& __x, size_t __y) noexcept
+    {
+      return (__y >= 64
+             ? __rng_uint128(0, __x._M_h >> (__y - 64))
+             : __y
+             ? __rng_uint128(__x._M_h >> __y,
+                             (__x._M_l >> __y) | (__x._M_h << (64 - __y)))
+             : __x);
+    }
+
+    constexpr __rng_uint128
+    operator|(const __rng_uint128& __x, const __rng_uint128& __y) noexcept
+    {
+      return __rng_uint128(__x._M_h | __y._M_h, __x._M_l | __y._M_l);
+    }
+  }
+#endif  
+
   // This version is used when Urbg::max()-Urbg::min() is a power of
   // two less 1, the norm in real programs. It works by calling urng()
   // as many times as needed to fill the target mantissa, accumulating
@@ -3511,7 +3569,8 @@ namespace __detail
   // This implementation works with std::bfloat16, which can exactly
   // represent 2^32, but not with std::float16_t, limited to 2^15.
 
-  template<typename _RealT, typename _UInt, size_t __d, typename _Urbg>
+  template<typename _RealT, typename _UInt, size_t __d, typename _Urbg,
+          int __log2_uint_max>
     _RealT
     __generate_canonical_pow2(_Urbg& __urng)
     {
@@ -3521,10 +3580,8 @@ namespace __detail
       // r = 2;  // Redundant, we only support radix 2.
       using _Rng = decltype(_Urbg::max());
       const _Rng __rng_range_less_1 = _Urbg::max() - _Urbg::min();
-      const _UInt __uint_range_less_1 = ~_UInt(0);
       // R = _UInt(__rng_range_less_1) + 1;  // May wrap to 0.
       const auto __log2_R = __builtin_popcountg(__rng_range_less_1);
-      const auto __log2_uint_max = __builtin_popcountg(__uint_range_less_1);
       // rd = _UInt(1) << __d;  // Could overflow, UB.
       const unsigned __k = (__d + __log2_R - 1) / __log2_R;
       const unsigned __log2_Rk_max = __k * __log2_R;
@@ -3532,7 +3589,8 @@ namespace __detail
        __log2_uint_max < __log2_Rk_max ? __log2_uint_max : __log2_Rk_max;
       static_assert(__log2_Rk >= __d);
       // Rk = _UInt(1) << __log2_Rk;  // Likely overflows, UB.
-      constexpr _RealT __Rk = _RealT(_UInt(1) << (__log2_Rk - 1)) * 2.0;
+      constexpr _RealT __Rk
+       = _RealT(_UInt(1) << (__log2_Rk - 1)) * _RealT(2.0);
 #if defined(_GLIBCXX_GENERATE_CANONICAL_STRICT)
       const unsigned __log2_x = __log2_Rk - __d; // # of spare entropy bits.
 #else
@@ -3593,7 +3651,7 @@ namespace __detail
       const _UInt __rd = _UInt(1) << __d;
       const unsigned __k = __gen_can_rng_calls_needed(__R, __rd);
       const _UInt __Rk = __gen_can_pow(__R, __k);
-      const _UInt __x =  __Rk/__rd;
+      const _UInt __x =  __Rk / __rd;
 
       while (true)
        {
@@ -3670,18 +3728,20 @@ namespace __detail
       if constexpr (__is_power_of_2_less_1(_Urbg::max() - _Urbg::min()))
        {
          if constexpr (__d <= 32)
-           return __generate_canonical_pow2<_RealT, uint32_t, __d>(__urng);
+           return __generate_canonical_pow2<_RealT, uint32_t, __d,
+                                            32>(__urng);
          else if constexpr (__d <= 64)
-           return __generate_canonical_pow2<_RealT, uint64_t, __d>(__urng);
+           return __generate_canonical_pow2<_RealT, uint64_t, __d,
+                                            64>(__urng);
          else
            {
-#if defined(__SIZEOF_INT128__)
              // Accommodate double double or float128.
+#if defined(__SIZEOF_INT128__)
              return __extension__ __generate_canonical_pow2<
-               _RealT, unsigned __int128, __d>(__urng);
+               _RealT, unsigned __int128, __d, 128>(__urng);
 #else
-             static_assert(false,
-               "float precision >64 requires __int128 support");
+             return __extension__ __generate_canonical_pow2<
+               _RealT, __detail::__rng_uint128, __d, 128>(__urng);
 #endif
            }
        }


        Jakub

Reply via email to