[PATCH v2] libstdc++: Use smallest possible integer for __generate_cannonical_any

Tomasz Kamiński Fri, 19 Dec 2025 00:35:20 -0800

If the span of the range R produced by uniform bit generator U passed to
generate_canonical is not power of two, we need to use algorithm that
requires computing power R^k that is greater than 2^d, where d is number
of digits in mantissa of _RealT. Previously we have used an integer type
that is has twice as many digits as d. This lead to situation that for
standard engines that produced such range (like std::minstd_rand0,
std::minstd_rand, std::ranlux24, ....) 256bit integer support was
required for 128bit floats. However, in this cases R^4 provides more
than d bits of precision, while requring 124 bits.


We overestimate the number of required bits, by computing a value
l * bit_width(R) (log2(R) + 1), where l is value such that log2(R) * l >= d.
As R >= 2^log2(R), then R^l >= (2^log2(R))^l == 2^(log(R) * l) >= 2^d,
 so  k+1 >= l >= k. In consequence R^k is smaller R^l which require at most
l * bit_width(R). This is an overestimate, but difference should not be
higher than l bits.

In C++11 mode for architectures that do not support __int128 we cannot compute,
__k at compile time as the required operations on __rand_unint128 as support
since C++14. In this case we use overestimate an number of required bits, by
computing how many power of two that are smaller than R we need. This is done
by computing ceil(d / log2(R)).

We replace __gen_can_pow and __gen_can_rng_calls_needed with __gen_can_log(v, 
b),
that computes the largest power of b that fits into v. As such number is smaller
than v, the result will always fit in provided value. Both the logirtm and
the power value are returned in __gen_can_pow-res struct.

libstdc++-v3/ChangeLog:

        * include/bits/random.h (__rand_uint128::operator>)
        (__rand_uint128::operator>=): Define.
        * include/bits/random.tcc (__generate_canonical_pow2):
        Adjust for use of __rand_uint128 in C++11.
        (__gen_can_pow, __gen_can_rng_calls_needed): Replace with
        __gen_can_log.
        (__gen_can_log_res, __gen_can_log): Define.
        (__generate_canonical_any): Reworked how _UInt is determined.
        * 
testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon_eng.cc:
        New test.
---
v3 switches to always using bit_width(R) * ceil(d / log2(R)) bits,
as this vastly simplies the code, while the overestimate does not impact
support for standard engines for float128.

 libstdc++-v3/include/bits/random.h            |   8 ++
 libstdc++-v3/include/bits/random.tcc          | 104 +++++++++++++-----
 .../operators/gencanon_eng.cc                 |  39 +++++++
 3 files changed, 121 insertions(+), 30 deletions(-)
 create mode 100644 
libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon_eng.cc

diff --git a/libstdc++-v3/include/bits/random.h 
b/libstdc++-v3/include/bits/random.h
index 7a9ba0a7a9e..117d63dff7f 100644
--- a/libstdc++-v3/include/bits/random.h
+++ b/libstdc++-v3/include/bits/random.h
@@ -463,9 +463,17 @@ _GLIBCXX_END_INLINE_ABI_NAMESPACE(_V2)
          return false;
       }
 
+      friend _GLIBCXX14_CONSTEXPR bool
+      operator>(const type& __l, const type& __r) noexcept
+      { return __r < __l; }
+
       friend _GLIBCXX14_CONSTEXPR bool
       operator<=(const type& __l, const type& __r) noexcept
       { return !(__r < __l); }
+
+      friend _GLIBCXX14_CONSTEXPR bool
+      operator>=(const type& __l, const type& __r) noexcept
+      { return !(__l < __r); }
 #endif
 
       friend _GLIBCXX14_CONSTEXPR bool
diff --git a/libstdc++-v3/include/bits/random.tcc 
b/libstdc++-v3/include/bits/random.tcc
index 0256059ab72..38d60588fdc 100644
--- a/libstdc++-v3/include/bits/random.tcc
+++ b/libstdc++-v3/include/bits/random.tcc
@@ -3532,16 +3532,16 @@ namespace __detail
       const unsigned __log2_Rk_max = __k * __log2_R;
       const unsigned __log2_Rk =  // Bits of entropy actually obtained:
        __log2_uint_max < __log2_Rk_max ? __log2_uint_max : __log2_Rk_max;
-      static_assert(__log2_Rk >= __d);
       // Rk = _UInt(1) << __log2_Rk;  // Likely overflows, UB.
-      constexpr _RealT __Rk = _RealT(_UInt(1) << (__log2_Rk - 1)) * 
_RealT(2.0);
+      _GLIBCXX14_CONSTEXPR const _RealT __Rk
+        = _RealT(_UInt(1) << (__log2_Rk - 1)) * _RealT(2.0);
 #if defined(_GLIBCXX_GENERATE_CANONICAL_STRICT)
       const unsigned __log2_x = __log2_Rk - __d; // # of spare entropy bits.
 #else
       const unsigned __log2_x = 0;
 #endif
-      const _UInt __x = _UInt(1) << __log2_x;
-      constexpr _RealT __rd = __Rk / __x;
+      _GLIBCXX14_CONSTEXPR const _UInt __x = _UInt(1) << __log2_x;
+      _GLIBCXX14_CONSTEXPR const _RealT __rd = __Rk / _RealT(__x);
       // xrd = __x << __d;  // Could overflow.
 
       while (true)
@@ -3552,29 +3552,50 @@ namespace __detail
              __shift += __log2_R;
              __sum |= _UInt(__urng() - _Urbg::min()) << __shift;
            }
-         const _RealT __ret = _RealT(__sum >> __log2_x) / __rd;
+         const _RealT __ret = _RealT(__sum >> __log2_x) / _RealT(__rd);
          if (__ret < _RealT(1.0))
            return __ret;
        }
     }
 
-  template <typename _UInt>
-    constexpr _UInt
-    __gen_can_pow(_UInt __base, size_t __exponent)
+
+  template<typename _UInt>
+    struct __gen_can_log_res
     {
-      _UInt __prod = __base;
-      while (--__exponent != 0) __prod *= __base;
-      return __prod;
-    }
+       unsigned __floor_log;
+       _UInt __floor_pow;
+
+       constexpr __gen_can_log_res
+       update(_UInt __base) const
+       { return {__floor_log + 1, __floor_pow * __base}; }
+    };
 
-  template <typename _UInt>
-    constexpr unsigned
-    __gen_can_rng_calls_needed(_UInt __R, _UInt __rd)
+
+  template <typename _UInt1, typename _UInt2,
+            typename _UComm = __conditional_t<(sizeof(_UInt2) > 
sizeof(_UInt1)),
+                                             _UInt2, _UInt1>>
+    constexpr __gen_can_log_res<_UInt1>
+    __gen_can_log(_UInt1 __val, _UInt2 __base)
     {
-      unsigned __i = 1;
-      for (auto __Ri = __R; __Ri < __rd; __Ri *= __R)
-       ++__i;
-      return __i;
+#if __cplusplus >= 201402L
+      __gen_can_log_res<_UInt1> __res{0, _UInt1(1)};
+      if (_UComm(__base) > _UComm(__val))
+       return __res;
+
+      const _UInt1 __base1(__base);
+      do
+        { 
+         __val /= __base1;
+          __res = __res.update(__base1);
+        }
+      while (__val >= __base1);
+      return __res;
+#else
+      return (_UComm(__val) >= _UComm(__base))
+            ? __gen_can_log(__val / _UInt1(__base), _UInt1(__base))
+                .update(_UInt1(__base))
+            : __gen_can_log_res<_UInt1>{0, _UInt1(1)};
+#endif
     }
 
   // This version must be used when the range of possible RNG results,
@@ -3587,30 +3608,53 @@ namespace __detail
     _RealT
     __generate_canonical_any(_Urbg& __urng)
     {
-      using _UInt = typename __detail::_Select_uint_least_t<__d * 2>::type;
-
       // Names below are chosen to match the description in the Standard.
       // Parameter d is the actual target number of bits.
-      const _UInt __r{2};
-      const _UInt __R = _UInt(_Urbg::max() - _Urbg::min()) + 1;
-      const _UInt __rd = _UInt(1) << __d;
-      const unsigned __k = __gen_can_rng_calls_needed(__R, __rd);
-      const _UInt __Rk = __gen_can_pow(__R, __k);
-      const _UInt __x =  __Rk / __rd;
+#if (__cplusplus >= 201402L) || defined(__SIZEOF_INT128__)
+#  define _GLIBCXX_GEN_CON_CONST constexpr
+#else
+#  define _GLIBCXX_GEN_CON_CONST const
+#endif
+
+      using _UIntR = typename make_unsigned<decltype(_Urbg::max())>::type;
+      // Cannot overflow, as _Urbg::max() - _Urbg::min() is not power of
+      // two minus one
+      constexpr _UIntR __R = _UIntR(_Urbg::max() - _Urbg::min()) + 1;
+      constexpr unsigned __log2R
+       = sizeof(_UIntR) * __CHAR_BIT__ - __builtin_clzg(__R) - 1;
+      // We overstimate number of required bits, by computing
+      // r such that l * log2(R) >= d, so:
+      // R^l >= (2 ^ log2(R)) ^ l == 2 ^ (log2(r) * l) >= 2^d
+      // And then requiring l * bit_width(R) bits.
+      constexpr unsigned __l = (__d + __log2R - 1) / __log2R;
+      constexpr unsigned __bits = (__log2R + 1) * __l;
+      using _UInt = typename __detail::_Select_uint_least_t<__bits>::type;
+
+      _GLIBCXX_GEN_CON_CONST _UInt __rd = _UInt(1) << __d;
+      _GLIBCXX_GEN_CON_CONST auto __logRrd = __gen_can_log(__rd, __R);
+      _GLIBCXX_GEN_CON_CONST unsigned __k
+        = __logRrd.__floor_log + (__rd > __logRrd.__floor_pow);
+
+      _GLIBCXX_GEN_CON_CONST _UInt __Rk
+        = (__k > __logRrd.__floor_log)
+          ? _UInt(__logRrd.__floor_pow) * _UInt(__R)
+          : _UInt(__logRrd.__floor_pow);
+      _GLIBCXX_GEN_CON_CONST _UInt __x =  __Rk / __rd;
 
       while (true)
        {
          _UInt __Ri{1};
-         _UInt __sum{__urng() - _Urbg::min()};
+         _UInt __sum(__urng() - _Urbg::min());
          for (int __i = __k - 1; __i > 0; --__i)
            {
-             __Ri *= __R;
-             __sum += _UInt{__urng() - _Urbg::min()} * __Ri;
+             __Ri *= _UInt(__R);
+             __sum += _UInt(__urng() - _Urbg::min()) * __Ri;
            }
          const _RealT __ret = _RealT(__sum / __x) / _RealT(__rd);
          if (__ret < _RealT(1.0))
            return __ret;
        }
+#undef _GLIBCXX_GEN_CON_CONST 
     }
 
 #if !defined(_GLIBCXX_GENERATE_CANONICAL_STRICT)
diff --git 
a/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon_eng.cc
 
b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon_eng.cc
new file mode 100644
index 00000000000..c8d5b5d263b
--- /dev/null
+++ 
b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon_eng.cc
@@ -0,0 +1,39 @@
+// { dg-do compile { target { c++11 } } }
+
+#include <random>
+
+template<typename _Real, typename _URBG>
+void
+test_engine()
+{
+  _URBG __engine;
+  (void)std::generate_canonical<_Real, size_t(-1)>(__engine);
+}
+
+template<typename _Real>
+void
+test_all_engines()
+{
+  test_engine<_Real, std::default_random_engine>();
+
+  test_engine<_Real, std::minstd_rand0>(); 
+  test_engine<_Real, std::minstd_rand>();
+  test_engine<_Real, std::mt19937>();
+  test_engine<_Real, std::mt19937_64>();
+  test_engine<_Real, std::ranlux24_base>();
+  test_engine<_Real, std::ranlux48_base>();
+  test_engine<_Real, std::ranlux24>();
+  test_engine<_Real, std::ranlux48>();
+  test_engine<_Real, std::knuth_b>();
+#if __cplusplus > 202302L 
+  test_engine<_Real, std::philox4x32>();
+  test_engine<_Real, std::philox4x64>();
+#endif
+}
+
+int main()
+{
+  test_all_engines<float>();
+  test_all_engines<double>();
+  test_all_engines<long double>();
+}
-- 
2.52.0

[PATCH v2] libstdc++: Use smallest possible integer for __generate_cannonical_any

Reply via email to