On Wed, Dec 17, 2025 at 4:29 PM Tomasz Kamiński <[email protected]> wrote:
> If the span of the range R produced by uniform bit generator U passed to
> generate_canonical is not power of two, we need to use algorithm that
> requires computing power R^k that is greater than 2^d, where d is number
> of digits in mantissa of _RealT. Previously we have used an integer type
> that is has twice as many digits as d. However, in most situations R^k
> fits with much smaller number.
>
> This patch changes the algorithm for d < 64 to use uin32_t and uint64_t
> if they are sufficient to hold R^k. We determine that by p, such that R^p
> is greater than 2^32 and 2^64 respectively, and using given type if k < p.
> The values of k and p are determined using __gen_can_ceil_log function,
> that replaced __gen_can_rng_calls_needed function. We compute the logarithm
> by dividing the value, instead of multiplying the base, to avoid overflow
> when R^k does not fit in type used to represent value.
>
> Furthermore we define simplified __generate_canonical_k1 function, that is
> used if R >= r^d, i.e. k == 1. In such case we know we unsigned counterpart
> of type produced by U, as it is guaranteed to fit R^k. For example when
> RealT is float, and U generates long long.
>
> Finally, for d >= 64, we either use __generate_canonical_k1 (ranges
> produces
> 128bit integers) or __generate_canonical_any using unsigned __int128. In
> situation when more than 128bits are required to represent R^k (e.g.
> mantissa
> is 104 bit, R requires 75bits), we still emit static_assert. However, the
> error is no longer emitted unconditionally.
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/random.tcc (__gen_can_rng_calls_needed):
> Replace with __gen_can_ceil_log.
> (__gen_can_ceil_log, __generate_canonical_k1): Define.
> (__generate_canonical_any): Replace __gen_can_rng_calls_needed
> with __gen_can_ceil_log.
> ---
> Testing this currently, but posted to empahsize the case, where
> we have d > 64, and log2(R) > 64 but log2(R) < d. In that k will be 2,
> and R^k, would require more than 128 bits. If R > r^d, then k == 1.
>
> libstdc++-v3/include/bits/random.tcc | 80 +++++++++++++++++++++++-----
> 1 file changed, 66 insertions(+), 14 deletions(-)
>
> diff --git a/libstdc++-v3/include/bits/random.tcc
> b/libstdc++-v3/include/bits/random.tcc
> index e16eb9814a5..1d0c3495519 100644
> --- a/libstdc++-v3/include/bits/random.tcc
> +++ b/libstdc++-v3/include/bits/random.tcc
> @@ -3565,13 +3565,16 @@ namespace __detail
> return __prod;
> }
>
> - template <typename _UInt>
> + template <typename _UInt1, typename _UInt2>
> constexpr unsigned
> - __gen_can_rng_calls_needed(_UInt __R, _UInt __rd)
> + __gen_can_ceil_log(_UInt1 __val, _UInt2 __base)
> {
> unsigned __i = 1;
> - for (auto __Ri = __R; __Ri < __rd; __Ri *= __R)
> - ++__i;
> + while (__val > __base)
> + {
> + __val /= __base;
> + ++__i;
> + }
> return __i;
> }
>
> @@ -3580,7 +3583,6 @@ namespace __detail
> // type passed must be big enough to represent Rk, R^k, a power of R
> // (the range of values produced by the rng) up to twice the length
> // of the mantissa.
> -
> template<typename _RealT, typename _UInt, size_t __d, typename _Urbg>
> _RealT
> __generate_canonical_any(_Urbg& __urng)
> @@ -3591,7 +3593,7 @@ namespace __detail
> const _UInt __r = 2;
> const _UInt __R = _UInt(_Urbg::max() - _Urbg::min()) + 1;
> const _UInt __rd = _UInt(1) << __d;
> - const unsigned __k = __gen_can_rng_calls_needed(__R, __rd);
> + const unsigned __k = __gen_can_ceil_log(__rd, __R);
> const _UInt __Rk = __gen_can_pow(__R, __k);
> const _UInt __x = __Rk/__rd;
>
> @@ -3609,6 +3611,28 @@ namespace __detail
> }
> }
>
> + // This version must be used when the range of possible RNG results,
> + // Urbg::max()-Urbg::min(), is not a power of two less one, and
> + // it is greater than __rd, i.e. k == 1.
> + template<typename _RealT, typename _UInt, size_t __d, typename _Urbg>
> + _RealT
> + __generate_canonical_k1(_Urbg& __urng)
> + {
> + // This cannot overflow, as in such case Urbg::max()-Urbg::min(),
> + // would be power of two less.
> + constexpr _UInt __R = _UInt(_Urbg::max() - _Urbg::min()) + 1;
> + constexpr _UInt __rd = _UInt(1) << __d;
> + constexpr _UInt __x = __R / __rd;
> +
> + while (true)
> + {
> + const _RealT __ret = _RealT((__urng() - _Urbg::min()) / __x) /
> __rd;
> + if (__ret < _RealT(1.0))
> + return __ret;
> + }
> + }
> +
> +
> #if !defined(_GLIBCXX_GENERATE_CANONICAL_STRICT)
> template <typename _Tp>
> const bool __is_rand_dist_float_v = is_floating_point<_Tp>::value;
> @@ -3659,7 +3683,6 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> "float16_t type is not supported, consider using bfloat16_t");
> #endif
>
> - using _Rng = decltype(_Urbg::max());
> const unsigned __d_max = std::numeric_limits<_RealT>::digits;
> const unsigned __d = __digits > __d_max ? __d_max : __digits;
>
> @@ -3675,23 +3698,30 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> {
> #if defined(__SIZEOF_INT128__)
> // Accommodate double double or float128.
> - return __extension__ __generate_canonical_pow2<
> - _RealT, unsigned __int128, __d>(__urng);
> + return __generate_canonical_pow2<_RealT, __uint128_t,
> __d>(__urng);
> #else
> static_assert(false,
> "float precision >64 requires __int128 support");
> #endif
> }
> }
> - else // Need up to 2x bits.
> - {
> - if constexpr (__d <= 32)
> + else if constexpr (__d < 64)
> + {
> + using _UIntR = typename
> make_unsigned<decltype(_Urbg::max())>::type;
> + // This cannot overflow, as _Urbg::max() - _Urbg::min() is not
> power
> + // of two less one.
> + constexpr _UIntR __R = _UIntR(_Urbg::max() - _Urbg::min()) + 1;
> + constexpr uint64_t __rd = uint64_t(1) << __d;
> + constexpr unsigned __k = __gen_can_ceil_log(__rd, __R);
> + if constexpr (__k == 1)
> + return __generate_canonical_k1<_RealT, _UIntR, __d>(__urng);
> + if constexpr (__k < __gen_can_ceil_log(uint32_t(-1), __R))
> + return __generate_canonical_any<_RealT, uint32_t, __d>(__urng);
>
Maybe I should pass _k and __rd as template parameters, instead of __d,
as I am computing them twice now. I could then merge _k1 function.
> + else if constexpr (__k < __gen_can_ceil_log(uint64_t(-1), __R))
> return __generate_canonical_any<_RealT, uint64_t, __d>(__urng);
> else
> {
> #if defined(__SIZEOF_INT128__)
> - static_assert(__d <= 64,
> - "irregular RNG with float precision >64 is not supported");
> return __extension__ __generate_canonical_any<
> _RealT, unsigned __int128, __d>(__urng);
> #else
> @@ -3700,6 +3730,28 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
> #endif
> }
> }
> + else
> + {
> +#if defined(__SIZEOF_INT128__)
> + __extension__ using __uint128_t = unsigned __int128;
> + using _UIntR = typename
> make_unsigned<decltype(_Urbg::max())>::type;
> + // This cannot overflow, as _Urbg::max() - _Urbg::min() is not
> power
> + // of two less one.
> + constexpr _UIntR __R = _UIntR(_Urbg::max() - _Urbg::min()) + 1;
> + constexpr __uint128_t __rd = __uint128_t(1) << __d;
> + constexpr unsigned __k = __gen_can_ceil_log(__rd, __R);
> + if constexpr (__k == 1)
> + return __generate_canonical_k1<_RealT, _UIntR, __d>(__urng);
> + else if constexpr (__k < __gen_can_ceil_log(__uint128_t(-1),
> __R))
> + return __generate_canonical_any<_RealT, __uint128_t,
> __d>(__urng);
> + else
> + static_assert(false, "irregular RNG with __int128 output "
> + "and float precision >=64 are not supported");
> This seem to require 256 bits, until I got it toally wrong.
> +#else
> + static_assert(false, "irregular RNG with float precision"
> + " >=64 requires __int128 support");
> +#endif
> + }
> }
> _GLIBCXX_END_INLINE_ABI_NAMESPACE(_V2)
>
> --
> 2.52.0
>
>