Re: [PATCH v2] libstdc++: Use smallest possible integer for __generate_cannonical_any

Jonathan Wakely Fri, 19 Dec 2025 06:32:19 -0800

On Fri, 19 Dec 2025 at 10:23, Tomasz Kaminski <[email protected]> wrote:
>
>
>
> On Fri, Dec 19, 2025 at 9:35 AM Tomasz Kamiński <[email protected]> wrote:
>>
>> If the span of the range R produced by uniform bit generator U passed to
>> generate_canonical is not power of two, we need to use algorithm that
>> requires computing power R^k that is greater than 2^d, where d is number
>> of digits in mantissa of _RealT. Previously we have used an integer type
>> that is has twice as many digits as d. This lead to situation that for
>> standard engines that produced such range (like std::minstd_rand0,
>> std::minstd_rand, std::ranlux24, ....) 256bit integer support was
>> required for 128bit floats. However, in this cases R^4 provides more
>> than d bits of precision, while requring 124 bits.
>>
>> We overestimate the number of required bits, by computing a value
>> l * bit_width(R) (log2(R) + 1), where l is value such that log2(R) * l >= d.
>> As R >= 2^log2(R), then R^l >= (2^log2(R))^l == 2^(log(R) * l) >= 2^d,
>>  so  k+1 >= l >= k. In consequence R^k is smaller R^l which require at most
>> l * bit_width(R). This is an overestimate, but difference should not be
>> higher than l bits.
>>
>> In C++11 mode for architectures that do not support __int128 we cannot 
>> compute,
>> __k at compile time as the required operations on __rand_unint128 as support
>> since C++14. In this case we use overestimate an number of required bits, by
>> computing how many power of two that are smaller than R we need. This is done
>> by computing ceil(d / log2(R)).
>
> This is a leftover paragraph from the previous description.
>>
>>
>> We replace __gen_can_pow and __gen_can_rng_calls_needed with 
>> __gen_can_log(v, b),
>> that computes the largest power of b that fits into v. As such number is 
>> smaller
>> than v, the result will always fit in provided value. Both the logirtm and
>> the power value are returned in __gen_can_pow-res struct.
>
> And fixed this one to:
> We replace __gen_can_pow and __gen_can_rng_calls_needed with __gen_can_log(v, 
> b),
> which computes the largest power of b that fits into v. As such a number is 
> smaller
> than v, the result will always fit in it's. Both the logarithm and the power 
> value


"always fit in it's" doesn't seem right, maybe "always fit in it"?
Or something else?


> are returned in __gen_can_pow_res struct.
>
>>
>>
>> libstdc++-v3/ChangeLog:
>>
>>         * include/bits/random.h (__rand_uint128::operator>)
>>         (__rand_uint128::operator>=): Define.
>>         * include/bits/random.tcc (__generate_canonical_pow2):
>>         Adjust for use of __rand_uint128 in C++11.
>>         (__gen_can_pow, __gen_can_rng_calls_needed): Replace with
>>         __gen_can_log.
>>         (__gen_can_log_res, __gen_can_log): Define.
>>         (__generate_canonical_any): Reworked how _UInt is determined.
>>         * 
>> testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon_eng.cc:
>>         New test.
>> ---
>> v3 switches to always using bit_width(R) * ceil(d / log2(R)) bits,
>> as this vastly simplies the code, while the overestimate does not impact
>> support for standard engines for float128.
>>
>>  libstdc++-v3/include/bits/random.h            |   8 ++
>>  libstdc++-v3/include/bits/random.tcc          | 104 +++++++++++++-----
>>  .../operators/gencanon_eng.cc                 |  39 +++++++
>>  3 files changed, 121 insertions(+), 30 deletions(-)
>>  create mode 100644 
>> libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon_eng.cc
>>
>> diff --git a/libstdc++-v3/include/bits/random.h 
>> b/libstdc++-v3/include/bits/random.h
>> index 7a9ba0a7a9e..117d63dff7f 100644
>> --- a/libstdc++-v3/include/bits/random.h
>> +++ b/libstdc++-v3/include/bits/random.h
>> @@ -463,9 +463,17 @@ _GLIBCXX_END_INLINE_ABI_NAMESPACE(_V2)
>>           return false;
>>        }
>>
>> +      friend _GLIBCXX14_CONSTEXPR bool
>> +      operator>(const type& __l, const type& __r) noexcept
>> +      { return __r < __l; }
>> +
>>        friend _GLIBCXX14_CONSTEXPR bool
>>        operator<=(const type& __l, const type& __r) noexcept
>>        { return !(__r < __l); }
>> +
>> +      friend _GLIBCXX14_CONSTEXPR bool
>> +      operator>=(const type& __l, const type& __r) noexcept
>> +      { return !(__l < __r); }
>>  #endif
>>
>>        friend _GLIBCXX14_CONSTEXPR bool
>> diff --git a/libstdc++-v3/include/bits/random.tcc 
>> b/libstdc++-v3/include/bits/random.tcc
>> index 0256059ab72..38d60588fdc 100644
>> --- a/libstdc++-v3/include/bits/random.tcc
>> +++ b/libstdc++-v3/include/bits/random.tcc
>> @@ -3532,16 +3532,16 @@ namespace __detail
>>        const unsigned __log2_Rk_max = __k * __log2_R;
>>        const unsigned __log2_Rk =  // Bits of entropy actually obtained:
>>         __log2_uint_max < __log2_Rk_max ? __log2_uint_max : __log2_Rk_max;
>> -      static_assert(__log2_Rk >= __d);
>>        // Rk = _UInt(1) << __log2_Rk;  // Likely overflows, UB.
>> -      constexpr _RealT __Rk = _RealT(_UInt(1) << (__log2_Rk - 1)) * 
>> _RealT(2.0);
>> +      _GLIBCXX14_CONSTEXPR const _RealT __Rk
>> +        = _RealT(_UInt(1) << (__log2_Rk - 1)) * _RealT(2.0);
>>  #if defined(_GLIBCXX_GENERATE_CANONICAL_STRICT)
>>        const unsigned __log2_x = __log2_Rk - __d; // # of spare entropy bits.
>>  #else
>>        const unsigned __log2_x = 0;
>>  #endif
>> -      const _UInt __x = _UInt(1) << __log2_x;
>> -      constexpr _RealT __rd = __Rk / __x;
>> +      _GLIBCXX14_CONSTEXPR const _UInt __x = _UInt(1) << __log2_x;
>> +      _GLIBCXX14_CONSTEXPR const _RealT __rd = __Rk / _RealT(__x);
>>        // xrd = __x << __d;  // Could overflow.
>>
>>        while (true)
>> @@ -3552,29 +3552,50 @@ namespace __detail
>>               __shift += __log2_R;
>>               __sum |= _UInt(__urng() - _Urbg::min()) << __shift;
>>             }
>> -         const _RealT __ret = _RealT(__sum >> __log2_x) / __rd;
>> +         const _RealT __ret = _RealT(__sum >> __log2_x) / _RealT(__rd);
>>           if (__ret < _RealT(1.0))
>>             return __ret;
>>         }
>>      }
>>
>> -  template <typename _UInt>
>> -    constexpr _UInt
>> -    __gen_can_pow(_UInt __base, size_t __exponent)
>> +
>> +  template<typename _UInt>
>> +    struct __gen_can_log_res
>>      {
>> -      _UInt __prod = __base;
>> -      while (--__exponent != 0) __prod *= __base;
>> -      return __prod;
>> -    }
>> +       unsigned __floor_log;
>> +       _UInt __floor_pow;
>> +
>> +       constexpr __gen_can_log_res
>> +       update(_UInt __base) const
>> +       { return {__floor_log + 1, __floor_pow * __base}; }
>> +    };
>>
>> -  template <typename _UInt>
>> -    constexpr unsigned
>> -    __gen_can_rng_calls_needed(_UInt __R, _UInt __rd)
>> +
>> +  template <typename _UInt1, typename _UInt2,
>> +            typename _UComm = __conditional_t<(sizeof(_UInt2) > 
>> sizeof(_UInt1)),
>> +                                             _UInt2, _UInt1>>
>> +    constexpr __gen_can_log_res<_UInt1>
>> +    __gen_can_log(_UInt1 __val, _UInt2 __base)
>>      {
>> -      unsigned __i = 1;
>> -      for (auto __Ri = __R; __Ri < __rd; __Ri *= __R)
>> -       ++__i;
>> -      return __i;
>> +#if __cplusplus >= 201402L
>> +      __gen_can_log_res<_UInt1> __res{0, _UInt1(1)};
>> +      if (_UComm(__base) > _UComm(__val))
>> +       return __res;
>> +
>> +      const _UInt1 __base1(__base);
>> +      do
>> +        {
>> +         __val /= __base1;
>> +          __res = __res.update(__base1);
>> +        }
>> +      while (__val >= __base1);
>> +      return __res;
>> +#else
>> +      return (_UComm(__val) >= _UComm(__base))
>> +            ? __gen_can_log(__val / _UInt1(__base), _UInt1(__base))
>> +                .update(_UInt1(__base))
>> +            : __gen_can_log_res<_UInt1>{0, _UInt1(1)};
>> +#endif
>>      }
>>
>>    // This version must be used when the range of possible RNG results,
>> @@ -3587,30 +3608,53 @@ namespace __detail
>>      _RealT
>>      __generate_canonical_any(_Urbg& __urng)
>>      {
>> -      using _UInt = typename __detail::_Select_uint_least_t<__d * 2>::type;
>> -
>>        // Names below are chosen to match the description in the Standard.
>>        // Parameter d is the actual target number of bits.
>> -      const _UInt __r{2};
>> -      const _UInt __R = _UInt(_Urbg::max() - _Urbg::min()) + 1;
>> -      const _UInt __rd = _UInt(1) << __d;
>> -      const unsigned __k = __gen_can_rng_calls_needed(__R, __rd);
>> -      const _UInt __Rk = __gen_can_pow(__R, __k);
>> -      const _UInt __x =  __Rk / __rd;
>> +#if (__cplusplus >= 201402L) || defined(__SIZEOF_INT128__)
>> +#  define _GLIBCXX_GEN_CON_CONST constexpr
>> +#else
>> +#  define _GLIBCXX_GEN_CON_CONST const
>> +#endif
>> +
>> +      using _UIntR = typename make_unsigned<decltype(_Urbg::max())>::type;
>> +      // Cannot overflow, as _Urbg::max() - _Urbg::min() is not power of
>> +      // two minus one
>> +      constexpr _UIntR __R = _UIntR(_Urbg::max() - _Urbg::min()) + 1;
>> +      constexpr unsigned __log2R
>> +       = sizeof(_UIntR) * __CHAR_BIT__ - __builtin_clzg(__R) - 1;
>> +      // We overstimate number of required bits, by computing
>> +      // r such that l * log2(R) >= d, so:
>> +      // R^l >= (2 ^ log2(R)) ^ l == 2 ^ (log2(r) * l) >= 2^d
>> +      // And then requiring l * bit_width(R) bits.
>> +      constexpr unsigned __l = (__d + __log2R - 1) / __log2R;
>> +      constexpr unsigned __bits = (__log2R + 1) * __l;
>> +      using _UInt = typename __detail::_Select_uint_least_t<__bits>::type;
>> +
>> +      _GLIBCXX_GEN_CON_CONST _UInt __rd = _UInt(1) << __d;
>> +      _GLIBCXX_GEN_CON_CONST auto __logRrd = __gen_can_log(__rd, __R);
>> +      _GLIBCXX_GEN_CON_CONST unsigned __k
>> +        = __logRrd.__floor_log + (__rd > __logRrd.__floor_pow);
>> +
>> +      _GLIBCXX_GEN_CON_CONST _UInt __Rk
>> +        = (__k > __logRrd.__floor_log)
>> +          ? _UInt(__logRrd.__floor_pow) * _UInt(__R)
>> +          : _UInt(__logRrd.__floor_pow);
>> +      _GLIBCXX_GEN_CON_CONST _UInt __x =  __Rk / __rd;
>>
>>        while (true)
>>         {
>>           _UInt __Ri{1};
>> -         _UInt __sum{__urng() - _Urbg::min()};
>> +         _UInt __sum(__urng() - _Urbg::min());
>>           for (int __i = __k - 1; __i > 0; --__i)
>>             {
>> -             __Ri *= __R;
>> -             __sum += _UInt{__urng() - _Urbg::min()} * __Ri;
>> +             __Ri *= _UInt(__R);
>> +             __sum += _UInt(__urng() - _Urbg::min()) * __Ri;
>>             }
>>           const _RealT __ret = _RealT(__sum / __x) / _RealT(__rd);
>>           if (__ret < _RealT(1.0))
>>             return __ret;
>>         }
>> +#undef _GLIBCXX_GEN_CON_CONST
>>      }
>>
>>  #if !defined(_GLIBCXX_GENERATE_CANONICAL_STRICT)
>> diff --git 
>> a/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon_eng.cc
>>  
>> b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon_eng.cc
>> new file mode 100644
>> index 00000000000..c8d5b5d263b
>> --- /dev/null
>> +++ 
>> b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon_eng.cc
>> @@ -0,0 +1,39 @@
>> +// { dg-do compile { target { c++11 } } }
>> +
>> +#include <random>
>> +
>> +template<typename _Real, typename _URBG>
>> +void
>> +test_engine()
>> +{
>> +  _URBG __engine;
>> +  (void)std::generate_canonical<_Real, size_t(-1)>(__engine);
>> +}
>> +
>> +template<typename _Real>
>> +void
>> +test_all_engines()
>> +{
>> +  test_engine<_Real, std::default_random_engine>();
>> +
>> +  test_engine<_Real, std::minstd_rand0>();
>> +  test_engine<_Real, std::minstd_rand>();
>> +  test_engine<_Real, std::mt19937>();
>> +  test_engine<_Real, std::mt19937_64>();
>> +  test_engine<_Real, std::ranlux24_base>();
>> +  test_engine<_Real, std::ranlux48_base>();
>> +  test_engine<_Real, std::ranlux24>();
>> +  test_engine<_Real, std::ranlux48>();
>> +  test_engine<_Real, std::knuth_b>();
>> +#if __cplusplus > 202302L
>> +  test_engine<_Real, std::philox4x32>();
>> +  test_engine<_Real, std::philox4x64>();
>> +#endif
>> +}
>> +
>> +int main()
>> +{
>> +  test_all_engines<float>();
>> +  test_all_engines<double>();
>> +  test_all_engines<long double>();
>> +}
>> --
>> 2.52.0
>>

Re: [PATCH v2] libstdc++: Use smallest possible integer for __generate_cannonical_any

Reply via email to