If the span of the range R produced by uniform bit generator U passed to
generate_canonical is not power of two, we need to use algorithm that
requires computing power R^k that is greater than 2^d, where d is number
of digits in mantissa of _RealT. Previously we have used an integer type
that is has twice as many digits as d. However, in most situations R^k
fits with much smaller number.

This patch changes the algorithm for d < 64 to use uin32_t and uint64_t
if they are sufficient to hold R^k. We determine that by p, such that R^p
is greater than 2^32 and 2^64 respectively, and using  given type if k < p.
The values of k and p are determined using __gen_can_ceil_log function,
that replaced __gen_can_rng_calls_needed function. We compute the logarithm
by dividing the value, instead of multiplying the base, to avoid overflow
when R^k does not fit in type used to represent value.

Furthermore we define simplified __generate_canonical_k1 function, that is
used if R >= r^d, i.e. k == 1. In such case we know we unsigned counterpart
of type produced by U, as it is guaranteed to fit R^k. For example when
RealT is float, and U generates long long.

Finally, for d >= 64, we either use __generate_canonical_k1 (ranges produces
128bit integers) or __generate_canonical_any using unsigned __int128. In
situation when more than 128bits are required to represent R^k (e.g. mantissa
is 104 bit, R requires 75bits), we still emit static_assert. However, the
error is no longer emitted unconditionally.

libstdc++-v3/ChangeLog:

        * include/bits/random.tcc (__gen_can_rng_calls_needed):
        Replace with __gen_can_ceil_log.
        (__gen_can_ceil_log, __generate_canonical_k1): Define.
        (__generate_canonical_any): Replace __gen_can_rng_calls_needed
        with __gen_can_ceil_log.
---
Testing this currently, but posted to empahsize the case, where
we have d > 64, and log2(R) > 64 but log2(R) < d. In that k will be 2,
and R^k, would require more than 128 bits. If R > r^d, then k == 1.

 libstdc++-v3/include/bits/random.tcc | 80 +++++++++++++++++++++++-----
 1 file changed, 66 insertions(+), 14 deletions(-)

diff --git a/libstdc++-v3/include/bits/random.tcc 
b/libstdc++-v3/include/bits/random.tcc
index e16eb9814a5..1d0c3495519 100644
--- a/libstdc++-v3/include/bits/random.tcc
+++ b/libstdc++-v3/include/bits/random.tcc
@@ -3565,13 +3565,16 @@ namespace __detail
       return __prod;
     }
 
-  template <typename _UInt>
+  template <typename _UInt1, typename _UInt2>
     constexpr unsigned
-    __gen_can_rng_calls_needed(_UInt __R, _UInt __rd)
+    __gen_can_ceil_log(_UInt1 __val, _UInt2 __base)
     {
       unsigned __i = 1;
-      for (auto __Ri = __R; __Ri < __rd; __Ri *= __R)
-       ++__i;
+      while (__val > __base)
+        {
+         __val /= __base;
+         ++__i;
+       }       
       return __i;
     }
 
@@ -3580,7 +3583,6 @@ namespace __detail
   // type passed must be big enough to represent Rk, R^k, a power of R
   // (the range of values produced by the rng) up to twice the length
   // of the mantissa.
-
   template<typename _RealT, typename _UInt, size_t __d, typename _Urbg>
     _RealT
     __generate_canonical_any(_Urbg& __urng)
@@ -3591,7 +3593,7 @@ namespace __detail
       const _UInt __r = 2;
       const _UInt __R = _UInt(_Urbg::max() - _Urbg::min()) + 1;
       const _UInt __rd = _UInt(1) << __d;
-      const unsigned __k = __gen_can_rng_calls_needed(__R, __rd);
+      const unsigned __k = __gen_can_ceil_log(__rd, __R);
       const _UInt __Rk = __gen_can_pow(__R, __k);
       const _UInt __x =  __Rk/__rd;
 
@@ -3609,6 +3611,28 @@ namespace __detail
        }
     }
 
+   // This version must be used when the range of possible RNG results,
+   // Urbg::max()-Urbg::min(), is not a power of two less one, and
+   // it is greater than __rd, i.e. k == 1.
+   template<typename _RealT, typename _UInt, size_t __d, typename _Urbg>
+    _RealT
+    __generate_canonical_k1(_Urbg& __urng)
+    {
+      // This cannot overflow, as in such case Urbg::max()-Urbg::min(),
+      // would be power of two less.
+      constexpr _UInt __R = _UInt(_Urbg::max() - _Urbg::min()) + 1;
+      constexpr _UInt __rd = _UInt(1) << __d;
+      constexpr _UInt __x = __R / __rd;
+
+      while (true)
+      {
+        const _RealT __ret = _RealT((__urng() - _Urbg::min()) / __x) / __rd;
+        if (__ret < _RealT(1.0))
+         return __ret;
+      }
+    }
+ 
+
 #if !defined(_GLIBCXX_GENERATE_CANONICAL_STRICT)
   template <typename _Tp>
     const bool __is_rand_dist_float_v = is_floating_point<_Tp>::value;
@@ -3659,7 +3683,6 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
        "float16_t type is not supported, consider using bfloat16_t");
 #endif
 
-      using _Rng = decltype(_Urbg::max());
       const unsigned __d_max = std::numeric_limits<_RealT>::digits;
       const unsigned __d = __digits > __d_max ? __d_max : __digits;
 
@@ -3675,23 +3698,30 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
            {
 #if defined(__SIZEOF_INT128__)
              // Accommodate double double or float128.
-             return __extension__ __generate_canonical_pow2<
-               _RealT, unsigned __int128, __d>(__urng);
+             return __generate_canonical_pow2<_RealT, __uint128_t, 
__d>(__urng);
 #else
              static_assert(false,
                "float precision >64 requires __int128 support");
 #endif
            }
        }
-      else // Need up to 2x bits.
-       {
-         if constexpr (__d <= 32)
+      else if constexpr (__d < 64)
+        {      
+          using _UIntR = typename make_unsigned<decltype(_Urbg::max())>::type;
+         // This cannot overflow, as _Urbg::max() - _Urbg::min() is not power
+         // of two less one.
+          constexpr _UIntR __R = _UIntR(_Urbg::max() - _Urbg::min()) + 1;
+          constexpr uint64_t __rd = uint64_t(1) << __d;
+         constexpr unsigned __k = __gen_can_ceil_log(__rd, __R);
+         if constexpr (__k == 1)
+           return __generate_canonical_k1<_RealT, _UIntR, __d>(__urng);
+         if constexpr (__k < __gen_can_ceil_log(uint32_t(-1), __R))
+           return __generate_canonical_any<_RealT, uint32_t, __d>(__urng);
+         else if constexpr (__k < __gen_can_ceil_log(uint64_t(-1), __R))
            return __generate_canonical_any<_RealT, uint64_t, __d>(__urng);
          else
            {
 #if defined(__SIZEOF_INT128__)
-             static_assert(__d <= 64,
-               "irregular RNG with float precision >64 is not supported");
              return __extension__ __generate_canonical_any<
                _RealT, unsigned __int128, __d>(__urng);
 #else
@@ -3700,6 +3730,28 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
 #endif
            }
        }
+      else
+       {
+#if defined(__SIZEOF_INT128__)
+         __extension__ using __uint128_t = unsigned __int128;
+          using _UIntR = typename make_unsigned<decltype(_Urbg::max())>::type;
+         // This cannot overflow, as _Urbg::max() - _Urbg::min() is not power
+         // of two less one.
+          constexpr _UIntR __R = _UIntR(_Urbg::max() - _Urbg::min()) + 1;
+          constexpr __uint128_t __rd = __uint128_t(1) << __d;
+         constexpr unsigned __k = __gen_can_ceil_log(__rd, __R);
+         if constexpr (__k == 1)
+           return __generate_canonical_k1<_RealT, _UIntR, __d>(__urng);
+         else if constexpr (__k < __gen_can_ceil_log(__uint128_t(-1), __R))
+           return __generate_canonical_any<_RealT, __uint128_t, __d>(__urng);
+         else
+           static_assert(false, "irregular RNG with __int128 output "
+             "and float precision >=64 are not supported");
This seem to require 256 bits, until I got it toally wrong.
+#else
+          static_assert(false, "irregular RNG with float precision"
+           " >=64 requires __int128 support");
+#endif
+       }
     }
 _GLIBCXX_END_INLINE_ABI_NAMESPACE(_V2)
 
-- 
2.52.0

Reply via email to