[PATCH] libstdc++: Use smallest possible integer for __generate_cannonical_any

Tomasz Kamiński Thu, 18 Dec 2025 05:50:13 -0800

If the span of the range R produced by uniform bit generator U passed to
generate_canonical is not power of two, we need to use algorithm that
requires computing power R^k that is greater than 2^d, where d is number
of digits in mantissa of _RealT. Previously we have used an integer type
that is has twice as many digits as d. This lead to situation that for
standared engines that produced such range (like std::minstd_rand0,
 std::minstd_rand, std::ranlux24, ....) an 256bit integer support was
required for 128bit floats. However, in this cases R^4 provides more
than d bits of precision, while fitting in 124 bits.


This patch changes the algorithm for computing the number of digits required
to store R^k, based on value of k:
* bit_width of R in case of k == 1,
* __d + 1 if R^k fits inside them,
* __d * 2 otherwise.
For C++11 we cannot compute __k at compile time as the required operations on
__rand_unint128 as support since C++14. In this case we use overestimate an
number of required bits, by computing how many power of two that are smaller
than R we need. This is done by computing ceil(d / log2(R)).

We replace __gen_can_pow and __gen_can_rng_calls_needed with __gen_can_log(v, 
b),
that computes the largest power of b that fits into v. As such number is smaller
than v, the result will always fit in provided value. Both the logirtm and
the power value are returned in __gen_can_pow-res struct.

libstdc++-v3/ChangeLog:

        * include/bits/random.h (__rand_uint128::operator>)
        (__rand_uint128::operator>=): Define.
        * include/bits/random.tcc (__generate_canonical_pow2):
        Adjust for use of __rand_uint128 in C++11.
        (__gen_can_pow, __gen_can_rng_calls_needed): Replace with
        __gen_can_log.
        (__gen_can_log_res, __gen_can_log): Define.
        (__generate_canonical_any): Reworked how _UInt is determined.
        * 
testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon_eng.cc:
        New test.
---
Testing on x86_64-linux. The new gencanon_eng.cc passed both on with and
without -m32. With this change we support 128bit floats with all standard
defined enginess in all supported modes. It is still possible to construct an
custom engine, for which 256 bits integer would be required.

Testint on x86_64-linux. OK for trunk, when tests passes.

 libstdc++-v3/include/bits/random.h            |   8 ++
 libstdc++-v3/include/bits/random.tcc          | 112 +++++++++++++-----
 .../operators/gencanon_eng.cc                 |  44 +++++++
 3 files changed, 133 insertions(+), 31 deletions(-)
 create mode 100644 
libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon_eng.cc

diff --git a/libstdc++-v3/include/bits/random.h 
b/libstdc++-v3/include/bits/random.h
index 7a9ba0a7a9e..117d63dff7f 100644
--- a/libstdc++-v3/include/bits/random.h
+++ b/libstdc++-v3/include/bits/random.h
@@ -463,9 +463,17 @@ _GLIBCXX_END_INLINE_ABI_NAMESPACE(_V2)
          return false;
       }
 
+      friend _GLIBCXX14_CONSTEXPR bool
+      operator>(const type& __l, const type& __r) noexcept
+      { return __r < __l; }
+
       friend _GLIBCXX14_CONSTEXPR bool
       operator<=(const type& __l, const type& __r) noexcept
       { return !(__r < __l); }
+
+      friend _GLIBCXX14_CONSTEXPR bool
+      operator>=(const type& __l, const type& __r) noexcept
+      { return !(__l < __r); }
 #endif
 
       friend _GLIBCXX14_CONSTEXPR bool
diff --git a/libstdc++-v3/include/bits/random.tcc 
b/libstdc++-v3/include/bits/random.tcc
index 0256059ab72..edea1eb209a 100644
--- a/libstdc++-v3/include/bits/random.tcc
+++ b/libstdc++-v3/include/bits/random.tcc
@@ -3532,16 +3532,16 @@ namespace __detail
       const unsigned __log2_Rk_max = __k * __log2_R;
       const unsigned __log2_Rk =  // Bits of entropy actually obtained:
        __log2_uint_max < __log2_Rk_max ? __log2_uint_max : __log2_Rk_max;
-      static_assert(__log2_Rk >= __d);
       // Rk = _UInt(1) << __log2_Rk;  // Likely overflows, UB.
-      constexpr _RealT __Rk = _RealT(_UInt(1) << (__log2_Rk - 1)) * 
_RealT(2.0);
+      _GLIBCXX14_CONSTEXPR const _RealT __Rk
+        = _RealT(_UInt(1) << (__log2_Rk - 1)) * _RealT(2.0);
 #if defined(_GLIBCXX_GENERATE_CANONICAL_STRICT)
       const unsigned __log2_x = __log2_Rk - __d; // # of spare entropy bits.
 #else
       const unsigned __log2_x = 0;
 #endif
-      const _UInt __x = _UInt(1) << __log2_x;
-      constexpr _RealT __rd = __Rk / __x;
+      _GLIBCXX14_CONSTEXPR const _UInt __x = _UInt(1) << __log2_x;
+      _GLIBCXX14_CONSTEXPR const _RealT __rd = __Rk / _RealT(__x);
       // xrd = __x << __d;  // Could overflow.
 
       while (true)
@@ -3552,31 +3552,52 @@ namespace __detail
              __shift += __log2_R;
              __sum |= _UInt(__urng() - _Urbg::min()) << __shift;
            }
-         const _RealT __ret = _RealT(__sum >> __log2_x) / __rd;
+         const _RealT __ret = _RealT(__sum >> __log2_x) / _RealT(__rd);
          if (__ret < _RealT(1.0))
            return __ret;
        }
     }
 
-  template <typename _UInt>
-    constexpr _UInt
-    __gen_can_pow(_UInt __base, size_t __exponent)
+
+  template<typename _UInt>
+    struct __gen_can_log_res
     {
-      _UInt __prod = __base;
-      while (--__exponent != 0) __prod *= __base;
-      return __prod;
-    }
+       unsigned __floor_log;
+       _UInt __floor_pow;
+
+       template<typename _UInt2>
+       constexpr
+       operator __gen_can_log_res<_UInt2>() const
+       { return {__floor_log, _UInt2(__floor_pow)}; }
+
+       constexpr __gen_can_log_res
+       update(_UInt __base) const
+       { return {__floor_log + 1, __floor_pow * __base}; }
+    };
 
-  template <typename _UInt>
-    constexpr unsigned
-    __gen_can_rng_calls_needed(_UInt __R, _UInt __rd)
+  template <typename _UInt1, typename _UInt2>
+    constexpr __gen_can_log_res<_UInt1>
+    __gen_can_log(_UInt1 __val, _UInt2 __base)
     {
-      unsigned __i = 1;
-      for (auto __Ri = __R; __Ri < __rd; __Ri *= __R)
-       ++__i;
-      return __i;
+      if constexpr (sizeof(_UInt2) > sizeof(_UInt1))
+       return __gen_can_log(_UInt2(__val), __base);
+      else if constexpr (!is_same<_UInt1, _UInt2>::value)
+       return __gen_can_log(__val, _UInt1(__base));
+      else
+#if __cplusplus >= 201402L
+       {
+          __gen_can_log_res<_UInt1> __res{0, _UInt1(1)};
+          for (_UInt1 __x = __val; __x >= __base; __x /= __base)
+            __res = __res.update(__base);
+          return __res;
+       }
+#else
+       return (__val >= __base)
+            ? __gen_can_log(__val / __base, __base).update(__base)
+             : __gen_can_log_res<_UInt1>{0, _UInt1(1)};
+#endif
     }
-
+  
   // This version must be used when the range of possible RNG results,
   // Urbg::max()-Urbg::min(), is not a power of two less one. The UInt
   // type passed must be big enough to represent Rk, R^k, a power of R
@@ -3587,25 +3608,54 @@ namespace __detail
     _RealT
     __generate_canonical_any(_Urbg& __urng)
     {
-      using _UInt = typename __detail::_Select_uint_least_t<__d * 2>::type;
-
       // Names below are chosen to match the description in the Standard.
       // Parameter d is the actual target number of bits.
-      const _UInt __r{2};
-      const _UInt __R = _UInt(_Urbg::max() - _Urbg::min()) + 1;
-      const _UInt __rd = _UInt(1) << __d;
-      const unsigned __k = __gen_can_rng_calls_needed(__R, __rd);
-      const _UInt __Rk = __gen_can_pow(__R, __k);
-      const _UInt __x =  __Rk / __rd;
+
+      using _UIntR = typename make_unsigned<decltype(_Urbg::max())>::type;
+      // Cannot overflow, as _Urbg::max() - _Urbg::min() is not power of
+      // two minus one
+      constexpr _UIntR __R = _UIntR(_Urbg::max() - _Urbg::min()) + 1;
+
+      using _UIntrd = typename __detail::_Select_uint_least_t<__d + 1>::type;
+      _GLIBCXX14_CONSTEXPR const _UIntrd __rd = _UIntrd(1) << __d;
+      _GLIBCXX14_CONSTEXPR const auto __logRrd = __gen_can_log(__rd, __R);
+      _GLIBCXX14_CONSTEXPR const unsigned __k
+        = __logRrd.__floor_log + (__rd > __logRrd.__floor_pow);
+
+      constexpr unsigned __log2R
+       = sizeof(_UIntR) * __CHAR_BIT__ - __builtin_clzg(__R) - 1;
+#if __cplusplus >= 201402L
+      constexpr unsigned __bits =
+        // R is larger than rd, use as much bits as _R requires
+        (__k == 1) ? __log2R + 1 :
+        // R^k == rd, we can use _UIntrd, i.e. __d + 1
+        (__k == __logRrd.__floor_log) ? __d + 1 :
+        // R^k > rd, but still first into _UIntd,
+        (~_UIntrd(0) / __logRrd.__floor_pow >= _UIntrd(__R)) ? __d + 1 :
+        // R^k will not fit in _Uintrd, we need bit_width(R^(k-1)) + 
bit_width(R),
+        // but next integer will have twice as many bits anyway, so use it
+        __d * 2;
+#else
+      // the value of __k is not know at compile time in that case
+      constexpr unsigned __kover = (__d + __log2R - 1) / __log2R;
+      constexpr unsigned __bits = __log2R * __kover;
+#endif       
+      using _UInt = typename __detail::_Select_uint_least_t<__bits>::type;
+
+      _GLIBCXX14_CONSTEXPR const _UInt __Rk
+        = (__k > __logRrd.__floor_log)
+          ? _UInt(__logRrd.__floor_pow) * _UInt(__R)
+          : _UInt(__logRrd.__floor_pow);
+      _GLIBCXX14_CONSTEXPR const _UInt __x =  __Rk / __rd;
 
       while (true)
        {
          _UInt __Ri{1};
-         _UInt __sum{__urng() - _Urbg::min()};
+         _UInt __sum(__urng() - _Urbg::min());
          for (int __i = __k - 1; __i > 0; --__i)
            {
-             __Ri *= __R;
-             __sum += _UInt{__urng() - _Urbg::min()} * __Ri;
+             __Ri *= _UInt(__R);
+             __sum += _UInt(__urng() - _Urbg::min()) * __Ri;
            }
          const _RealT __ret = _RealT(__sum / __x) / _RealT(__rd);
          if (__ret < _RealT(1.0))
diff --git 
a/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon_eng.cc
 
b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon_eng.cc
new file mode 100644
index 00000000000..c970ba30922
--- /dev/null
+++ 
b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon_eng.cc
@@ -0,0 +1,44 @@
+// { dg-do compile { target { c++11 } } }
+
+#include <random>
+
+template<typename _Real, typename _URBG>
+void
+test_engine()
+{
+  _URBG __engine;
+  (void)std::generate_canonical<_Real, size_t(-1)>(__engine);
+}
+
+template<typename _Real>
+void
+test_all_engines()
+{
+  test_engine<_Real, std::default_random_engine>();
+
+  test_engine<_Real, std::minstd_rand0>(); 
+  test_engine<_Real, std::minstd_rand>();
+  test_engine<_Real, std::mt19937>();
+  test_engine<_Real, std::mt19937_64>();
+  test_engine<_Real, std::ranlux24_base>();
+  test_engine<_Real, std::ranlux48_base>();
+  test_engine<_Real, std::ranlux24>();
+  test_engine<_Real, std::ranlux48>();
+  test_engine<_Real, std::knuth_b>();
+#if __cplusplus > 202302L 
+  test_engine<_Real, std::philox4x32>();
+  test_engine<_Real, std::philox4x64>();
+#endif
+}
+
+int main()
+{
+  test_all_engines<float>();
+  test_all_engines<double>();
+  test_all_engines<long double>();
+#ifndef _GLIBCXX_GENERATE_CANONICAL_STRICT
+#ifdef __SIZEOF_FLOAT128__
+  test_all_engines<__float128>();
+#endif  
+#endif  
+}
-- 
2.52.0

[PATCH] libstdc++: Use smallest possible integer for __generate_cannonical_any

Reply via email to