Re: [PATCH v2] libstdc++: Use smallest possible integer for __generate_cannonical_any

Jonathan Wakely Fri, 19 Dec 2025 06:35:39 -0800

On Fri, 19 Dec 2025 at 08:35, Tomasz Kamiński <[email protected]> wrote:
>
> If the span of the range R produced by uniform bit generator U passed to
> generate_canonical is not power of two, we need to use algorithm that
> requires computing power R^k that is greater than 2^d, where d is number
> of digits in mantissa of _RealT. Previously we have used an integer type
> that is has twice as many digits as d. This lead to situation that for
> standard engines that produced such range (like std::minstd_rand0,
> std::minstd_rand, std::ranlux24, ....) 256bit integer support was
> required for 128bit floats. However, in this cases R^4 provides more
> than d bits of precision, while requring 124 bits.
>
> We overestimate the number of required bits, by computing a value
> l * bit_width(R) (log2(R) + 1), where l is value such that log2(R) * l >= d.
> As R >= 2^log2(R), then R^l >= (2^log2(R))^l == 2^(log(R) * l) >= 2^d,
>  so  k+1 >= l >= k. In consequence R^k is smaller R^l which require at most
> l * bit_width(R). This is an overestimate, but difference should not be
> higher than l bits.
>
> In C++11 mode for architectures that do not support __int128 we cannot 
> compute,
> __k at compile time as the required operations on __rand_unint128 as support
> since C++14. In this case we use overestimate an number of required bits, by
> computing how many power of two that are smaller than R we need. This is done
> by computing ceil(d / log2(R)).
>
> We replace __gen_can_pow and __gen_can_rng_calls_needed with __gen_can_log(v, 
> b),
> that computes the largest power of b that fits into v. As such number is 
> smaller
> than v, the result will always fit in provided value. Both the logirtm and
> the power value are returned in __gen_can_pow-res struct.
>
> libstdc++-v3/ChangeLog:
>
>         * include/bits/random.h (__rand_uint128::operator>)
>         (__rand_uint128::operator>=): Define.
>         * include/bits/random.tcc (__generate_canonical_pow2):
>         Adjust for use of __rand_uint128 in C++11.
>         (__gen_can_pow, __gen_can_rng_calls_needed): Replace with
>         __gen_can_log.
>         (__gen_can_log_res, __gen_can_log): Define.
>         (__generate_canonical_any): Reworked how _UInt is determined.
>         * 
> testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon_eng.cc:
>         New test.
> ---
> v3 switches to always using bit_width(R) * ceil(d / log2(R)) bits,
> as this vastly simplies the code, while the overestimate does not impact
> support for standard engines for float128.
>
>  libstdc++-v3/include/bits/random.h            |   8 ++
>  libstdc++-v3/include/bits/random.tcc          | 104 +++++++++++++-----
>  .../operators/gencanon_eng.cc                 |  39 +++++++
>  3 files changed, 121 insertions(+), 30 deletions(-)
>  create mode 100644 
> libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon_eng.cc
>
> diff --git a/libstdc++-v3/include/bits/random.h 
> b/libstdc++-v3/include/bits/random.h
> index 7a9ba0a7a9e..117d63dff7f 100644
> --- a/libstdc++-v3/include/bits/random.h
> +++ b/libstdc++-v3/include/bits/random.h
> @@ -463,9 +463,17 @@ _GLIBCXX_END_INLINE_ABI_NAMESPACE(_V2)
>           return false;
>        }
>
> +      friend _GLIBCXX14_CONSTEXPR bool
> +      operator>(const type& __l, const type& __r) noexcept
> +      { return __r < __l; }
> +
>        friend _GLIBCXX14_CONSTEXPR bool
>        operator<=(const type& __l, const type& __r) noexcept
>        { return !(__r < __l); }
> +
> +      friend _GLIBCXX14_CONSTEXPR bool
> +      operator>=(const type& __l, const type& __r) noexcept
> +      { return !(__l < __r); }
>  #endif
>
>        friend _GLIBCXX14_CONSTEXPR bool
> diff --git a/libstdc++-v3/include/bits/random.tcc 
> b/libstdc++-v3/include/bits/random.tcc
> index 0256059ab72..38d60588fdc 100644
> --- a/libstdc++-v3/include/bits/random.tcc
> +++ b/libstdc++-v3/include/bits/random.tcc
> @@ -3532,16 +3532,16 @@ namespace __detail
>        const unsigned __log2_Rk_max = __k * __log2_R;
>        const unsigned __log2_Rk =  // Bits of entropy actually obtained:
>         __log2_uint_max < __log2_Rk_max ? __log2_uint_max : __log2_Rk_max;
> -      static_assert(__log2_Rk >= __d);
>        // Rk = _UInt(1) << __log2_Rk;  // Likely overflows, UB.
> -      constexpr _RealT __Rk = _RealT(_UInt(1) << (__log2_Rk - 1)) * 
> _RealT(2.0);
> +      _GLIBCXX14_CONSTEXPR const _RealT __Rk
> +        = _RealT(_UInt(1) << (__log2_Rk - 1)) * _RealT(2.0);
>  #if defined(_GLIBCXX_GENERATE_CANONICAL_STRICT)
>        const unsigned __log2_x = __log2_Rk - __d; // # of spare entropy bits.
>  #else
>        const unsigned __log2_x = 0;
>  #endif
> -      const _UInt __x = _UInt(1) << __log2_x;
> -      constexpr _RealT __rd = __Rk / __x;
> +      _GLIBCXX14_CONSTEXPR const _UInt __x = _UInt(1) << __log2_x;
> +      _GLIBCXX14_CONSTEXPR const _RealT __rd = __Rk / _RealT(__x);
>        // xrd = __x << __d;  // Could overflow.
>
>        while (true)
> @@ -3552,29 +3552,50 @@ namespace __detail
>               __shift += __log2_R;
>               __sum |= _UInt(__urng() - _Urbg::min()) << __shift;
>             }
> -         const _RealT __ret = _RealT(__sum >> __log2_x) / __rd;
> +         const _RealT __ret = _RealT(__sum >> __log2_x) / _RealT(__rd);
>           if (__ret < _RealT(1.0))
>             return __ret;
>         }
>      }
>
> -  template <typename _UInt>
> -    constexpr _UInt
> -    __gen_can_pow(_UInt __base, size_t __exponent)
> +
> +  template<typename _UInt>
> +    struct __gen_can_log_res
>      {
> -      _UInt __prod = __base;
> -      while (--__exponent != 0) __prod *= __base;
> -      return __prod;
> -    }
> +       unsigned __floor_log;
> +       _UInt __floor_pow;
> +
> +       constexpr __gen_can_log_res
> +       update(_UInt __base) const
> +       { return {__floor_log + 1, __floor_pow * __base}; }
> +    };
>
> -  template <typename _UInt>
> -    constexpr unsigned
> -    __gen_can_rng_calls_needed(_UInt __R, _UInt __rd)
> +
> +  template <typename _UInt1, typename _UInt2,
> +            typename _UComm = __conditional_t<(sizeof(_UInt2) > 
> sizeof(_UInt1)),
> +                                             _UInt2, _UInt1>>
> +    constexpr __gen_can_log_res<_UInt1>
> +    __gen_can_log(_UInt1 __val, _UInt2 __base)
>      {
> -      unsigned __i = 1;
> -      for (auto __Ri = __R; __Ri < __rd; __Ri *= __R)
> -       ++__i;
> -      return __i;
> +#if __cplusplus >= 201402L
> +      __gen_can_log_res<_UInt1> __res{0, _UInt1(1)};
> +      if (_UComm(__base) > _UComm(__val))
> +       return __res;
> +
> +      const _UInt1 __base1(__base);
> +      do
> +        {
> +         __val /= __base1;
> +          __res = __res.update(__base1);
> +        }
> +      while (__val >= __base1);
> +      return __res;
> +#else
> +      return (_UComm(__val) >= _UComm(__base))
> +            ? __gen_can_log(__val / _UInt1(__base), _UInt1(__base))
> +                .update(_UInt1(__base))
> +            : __gen_can_log_res<_UInt1>{0, _UInt1(1)};
> +#endif
>      }
>
>    // This version must be used when the range of possible RNG results,
> @@ -3587,30 +3608,53 @@ namespace __detail
>      _RealT
>      __generate_canonical_any(_Urbg& __urng)
>      {
> -      using _UInt = typename __detail::_Select_uint_least_t<__d * 2>::type;
> -
>        // Names below are chosen to match the description in the Standard.
>        // Parameter d is the actual target number of bits.
> -      const _UInt __r{2};
> -      const _UInt __R = _UInt(_Urbg::max() - _Urbg::min()) + 1;
> -      const _UInt __rd = _UInt(1) << __d;
> -      const unsigned __k = __gen_can_rng_calls_needed(__R, __rd);
> -      const _UInt __Rk = __gen_can_pow(__R, __k);
> -      const _UInt __x =  __Rk / __rd;
> +#if (__cplusplus >= 201402L) || defined(__SIZEOF_INT128__)
> +#  define _GLIBCXX_GEN_CON_CONST constexpr
> +#else
> +#  define _GLIBCXX_GEN_CON_CONST const


I assume CON should be CAN here. CANON would be the most natural
English abbreviation of "canonical", so _GLIBCXX_GEN_CANON_CONST ...
although that will make the lines that use it too long.

OK with that change to the macro name, and the fix for "always fit in
it's" in the commit msg.



> +#endif
> +
> +      using _UIntR = typename make_unsigned<decltype(_Urbg::max())>::type;
> +      // Cannot overflow, as _Urbg::max() - _Urbg::min() is not power of
> +      // two minus one
> +      constexpr _UIntR __R = _UIntR(_Urbg::max() - _Urbg::min()) + 1;
> +      constexpr unsigned __log2R
> +       = sizeof(_UIntR) * __CHAR_BIT__ - __builtin_clzg(__R) - 1;
> +      // We overstimate number of required bits, by computing
> +      // r such that l * log2(R) >= d, so:
> +      // R^l >= (2 ^ log2(R)) ^ l == 2 ^ (log2(r) * l) >= 2^d
> +      // And then requiring l * bit_width(R) bits.
> +      constexpr unsigned __l = (__d + __log2R - 1) / __log2R;
> +      constexpr unsigned __bits = (__log2R + 1) * __l;
> +      using _UInt = typename __detail::_Select_uint_least_t<__bits>::type;
> +
> +      _GLIBCXX_GEN_CON_CONST _UInt __rd = _UInt(1) << __d;
> +      _GLIBCXX_GEN_CON_CONST auto __logRrd = __gen_can_log(__rd, __R);
> +      _GLIBCXX_GEN_CON_CONST unsigned __k
> +        = __logRrd.__floor_log + (__rd > __logRrd.__floor_pow);
> +
> +      _GLIBCXX_GEN_CON_CONST _UInt __Rk
> +        = (__k > __logRrd.__floor_log)
> +          ? _UInt(__logRrd.__floor_pow) * _UInt(__R)
> +          : _UInt(__logRrd.__floor_pow);
> +      _GLIBCXX_GEN_CON_CONST _UInt __x =  __Rk / __rd;
>
>        while (true)
>         {
>           _UInt __Ri{1};
> -         _UInt __sum{__urng() - _Urbg::min()};
> +         _UInt __sum(__urng() - _Urbg::min());
>           for (int __i = __k - 1; __i > 0; --__i)
>             {
> -             __Ri *= __R;
> -             __sum += _UInt{__urng() - _Urbg::min()} * __Ri;
> +             __Ri *= _UInt(__R);
> +             __sum += _UInt(__urng() - _Urbg::min()) * __Ri;
>             }
>           const _RealT __ret = _RealT(__sum / __x) / _RealT(__rd);
>           if (__ret < _RealT(1.0))
>             return __ret;
>         }
> +#undef _GLIBCXX_GEN_CON_CONST
>      }
>
>  #if !defined(_GLIBCXX_GENERATE_CANONICAL_STRICT)
> diff --git 
> a/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon_eng.cc
>  
> b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon_eng.cc
> new file mode 100644
> index 00000000000..c8d5b5d263b
> --- /dev/null
> +++ 
> b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon_eng.cc
> @@ -0,0 +1,39 @@
> +// { dg-do compile { target { c++11 } } }
> +
> +#include <random>
> +
> +template<typename _Real, typename _URBG>
> +void
> +test_engine()
> +{
> +  _URBG __engine;
> +  (void)std::generate_canonical<_Real, size_t(-1)>(__engine);
> +}
> +
> +template<typename _Real>
> +void
> +test_all_engines()
> +{
> +  test_engine<_Real, std::default_random_engine>();
> +
> +  test_engine<_Real, std::minstd_rand0>();
> +  test_engine<_Real, std::minstd_rand>();
> +  test_engine<_Real, std::mt19937>();
> +  test_engine<_Real, std::mt19937_64>();
> +  test_engine<_Real, std::ranlux24_base>();
> +  test_engine<_Real, std::ranlux48_base>();
> +  test_engine<_Real, std::ranlux24>();
> +  test_engine<_Real, std::ranlux48>();
> +  test_engine<_Real, std::knuth_b>();
> +#if __cplusplus > 202302L
> +  test_engine<_Real, std::philox4x32>();
> +  test_engine<_Real, std::philox4x64>();
> +#endif
> +}
> +
> +int main()
> +{
> +  test_all_engines<float>();
> +  test_all_engines<double>();
> +  test_all_engines<long double>();
> +}
> --
> 2.52.0
>

Re: [PATCH v2] libstdc++: Use smallest possible integer for __generate_cannonical_any

Reply via email to