[PATCHv9] libstdc++: Add generate_canonical impl (P0952, LWG2524) [PR119739]

Nathan Myers Mon, 18 Aug 2025 19:15:00 -0700

Changes in v9:
 * Split out implementations from public pre-26 and 26 APIs.
 * Add remaining support for non-radix2 float types in C++26.
 * Handle pow() and floor() compatibly with non-built-in types, and Clang.
 * Add testing for a non-radix2 float type.
 * Iron out gratuitous differences between radix2 and p0952 impls.


Changes in v8:
 * Apply LWG 2524 resolution to C++11..C++23 implementations, too.
 * In those, eliminate std::log called twice per entry, and do all
  setup calculations at compile time.
 * "if constexpr" the C++26 version to call the slightly-faster
  older version when conditions permit: actually almost always.

Change in v7:
 * Fix local consteval floor() meant for Clang.

Changes in v6:
 * Implement also for Clang, using local consteval substitutes for its
  non-constexpr __builtin_pow and __builtin_floor.
 * Add a test for long double.

Changes in v5:
 * Static-assert movable RNG object correctly.
 * Add a more comprehensive test gencanon.cc

Changes in v4:
 * Static-assert arg is floating-point, coercible from bigger unsigned.
 * Static-assert arg satisfies uniform_random_bit_generator, movable.
 * Include uniform_int_dist.h for concept uniform_random_bit_generator
 * Coerce floating consts from unsigned literals, matching other usage.
 * Remove redundant type decorations static and -> .
 * Use __builtin_floor for broader applicability than std::floor.
 * Use __builtin_pow in place of immediate lambda.
 * Rename, rearrange local objects for clarity.
 * Add redundant "if constexpr (k > 1)" for clarity.
 * #if-guard new impl against Clang, which lacks constexpr floor() etc.

Changes in v3:
 * Implement exactly as specified in the WP, for C++26 and up.

Implement P0952R2 "A new specification for std::generate_canonical".

It has us start over if the naïve generation of a value in 0..1
comes up exactly equal to 1, which occurs too rarely for the change
to measurably affect performance. The algorithm in 26 more general,
supporting floating point types other than radix-2.

The fix is extended to all of C++11/14/17/20/23/26, though only
C++26 gets support for non-radix-2 floats. (Yay z-series?)
When conditions permit, the 26 version calls the old, slightly
faster implementation.

This patch adds a new, exact, thorough test for statistical
properties, on all standard releases. It adds a new static_assert
on the RNG requirements for C++26, and verifies correct operation
on a non-radix-2 float type.

A test for the case addressed in P0952 already appears in 64351.cc,
but is extended gencanon.cc. This change amounts to a different 
resolution for PR64351 and LWG 2524.

libstdc++-v3/ChangeLog:
        PR libstdc++/119739
        * include/bits/random.tcc: Add generate_canonical impl for C++26.
        * include/std/random: Include uniform_int_dist.h for concept.
        * 
testsuite/26_numerics/random/uniform_real_distribution/operators/64351.cc:
        Adapt test for both pre- and post- C++26.
        * 
testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon.cc:
        Test for float and double from 32-bit RNG, including 1.0 do-over.
---
 libstdc++-v3/include/bits/random.tcc          | 163 ++++++++++++++----
 libstdc++-v3/include/std/random               |   1 +
 .../operators/64351.cc                        |  25 +--
 .../operators/gencanon.cc                     | 161 +++++++++++++++++
 4 files changed, 301 insertions(+), 49 deletions(-)
 create mode 100644 
libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon.cc

diff --git a/libstdc++-v3/include/bits/random.tcc 
b/libstdc++-v3/include/bits/random.tcc
index 53ccacb2e38..78afad42f97 100644
--- a/libstdc++-v3/include/bits/random.tcc
+++ b/libstdc++-v3/include/bits/random.tcc
@@ -3349,43 +3349,148 @@ namespace __detail
        }
     }
 
-  template<typename _RealType, size_t __bits,
+//////
+// generate_canonical(RNG&)
+
+  // This works when float radix == 2 and the RNG returns [0..2^n).
+  // It is slightly faster than the P0952 version below, and is C++11.
+  template<typename _RealT, size_t __bits, typename _URBG>
+    _RealT
+    __generate_canonical_base_2(_URBG& __urng)
+    {
+      const size_t __b_max = std::numeric_limits<_RealT>::digits;
+      const size_t __b = __bits < __b_max ? __bits : __b_max;
+      const unsigned long long __range = _URBG::max() - _URBG::min();
+      const size_t __ullsize = std::numeric_limits<unsigned long long>::digits;
+      const size_t __log2r = __ullsize - __builtin_clzll(__range);
+      const _RealT __r = (_RealT)(__range) + 1.0L;
+      const int __m_min = (__b + __log2r - 1UL) / __log2r;
+      const int __m = __m_min < 1 ? 1 : __m_min;
+
+      _RealT __ret;
+      do
+       {
+         _RealT __tmp = _RealT(__r);
+         _RealT __sum = _RealT(__urng() - __urng.min());
+         for (int __k = __m - 1; __k > 0; --__k, __tmp *= __r)
+            __sum += _RealT(__urng() - __urng.min()) * __tmp;
+         __ret = __sum / __tmp;
+       } while (__builtin_expect(__ret >= _RealT(1), 0));
+      return __ret;
+    }
+
+#if __cplusplus < 202400
+
+  template<typename _RealT, size_t __bits,
           typename _UniformRandomNumberGenerator>
-    _RealType
+    _RealT
     generate_canonical(_UniformRandomNumberGenerator& __urng)
     {
-      static_assert(std::is_floating_point<_RealType>::value,
-                   "template argument must be a floating point type");
-
-      const size_t __b
-       = std::min(static_cast<size_t>(std::numeric_limits<_RealType>::digits),
-                   __bits);
-      const long double __r = static_cast<long double>(__urng.max())
-                           - static_cast<long double>(__urng.min()) + 1.0L;
-      const size_t __log2r = std::log(__r) / std::log(2.0L);
-      const size_t __m = std::max<size_t>(1UL,
-                                         (__b + __log2r - 1UL) / __log2r);
-      _RealType __ret;
-      _RealType __sum = _RealType(0);
-      _RealType __tmp = _RealType(1);
-      for (size_t __k = __m; __k != 0; --__k)
+      static_assert(std::is_floating_point<_RealT>::value,
+       "template argument must be a floating point type");
+      static_assert(std::numeric_limits<_RealT>::radix == 2,
+       "floating point type must be radix 2");
+
+      return __generate_canonical_base_2<_RealT, __bits>(__urng);
+    }
+
+#else // C++26, conform to P0952.
+
+  template <typename _Tp>
+    consteval _Tp
+    __gen_con_pow(_Tp __base, size_t __exponent)
+    {
+      if constexpr (requires { std::pow(__base, __exponent); })
+       return std::pow(__base, __exponent);
+# ifndef __clang__ // Clang 20 gets confused here.
+      else if constexpr (requires { __builtin_pow(__base, __exponent); })
+       return __builtin_pow(__base, __exponent);
+# endif
+      else
+      {
+       _Tp __prod = __base;
+       while (--__exponent != 0) __prod *= __base;
+       return __prod;
+      }
+    }
+
+  template <typename _Tp>
+    constexpr _Tp
+    __gen_con_floor(_Tp __x)
+    {
+      if constexpr (requires { std::floor(__x); })
+       return std::floor(__x);
+# ifndef __clang__ // Clang 20 gets confused here.
+      else if constexpr (requires { __builtin_floor(__x); })
+       return __builtin_floor(__x);
+# endif
+      else
+      {
+       unsigned long long __tmp = __x;
+       return __tmp - (_Tp(__tmp) > __x);
+      }
+    }
+
+  template<typename _RealT, size_t __digits, typename _URBG>
+    _RealT
+    __generate_canonical_anyradix(_URBG& __urng)
+    {
+      // Names below are chosen to match the description in the Standard.
+      const size_t  __r = std::numeric_limits<_RealT>::radix;
+      const auto  __sample = [](auto __high) constexpr
+       { return _RealT(__high - _URBG::min()); };
+      constexpr _RealT  __R = _RealT(1) + __sample(_URBG::max());
+      const size_t  __d_max = std::numeric_limits<_RealT>::digits;
+      const size_t  __d = __digits < __d_max ? __digits : __d_max;
+      constexpr _RealT  __rd = __gen_con_pow(_RealT(__r), __d);
+      const unsigned  __k = [](_RealT __R, _RealT __rd) consteval
        {
-         __sum += _RealType(__urng() - __urng.min()) * __tmp;
-         __tmp *= __r;
-       }
-      __ret = __sum / __tmp;
-      if (__builtin_expect(__ret >= _RealType(1), 0))
+         unsigned __i = 1;
+         for (auto __Ri = __R; __Ri < __rd; __Ri *= __R)
+           ++__i;
+         return __i;
+       } (__R, __rd);
+      constexpr _RealT  __Rk = __gen_con_pow(__R, __k);
+      constexpr _RealT  __x = __gen_con_floor(__Rk / __rd);
+      static_assert((__x <= __Rk / __rd) && (__x + _RealT(1) > __Rk / __rd));
+      constexpr _RealT  __xrd = __x * __rd;
+
+      _RealT __sum;
+      do
        {
-#if _GLIBCXX_USE_C99_MATH_FUNCS
-         __ret = std::nextafter(_RealType(1), _RealType(0));
-#else
-         __ret = _RealType(1)
-           - std::numeric_limits<_RealType>::epsilon() / _RealType(2);
-#endif
-       }
+         _RealT __Ri = _RealT(1);
+         __sum = __sample(__urng());
+         for (int __i = __k - 1; __i > 0; --__i)
+           __Ri *= __R, __sum += __sample(__urng()) * __Ri;
+       } while (__builtin_expect( __sum >= __xrd, false ));
+      const _RealT __ret = __gen_con_floor(__sum / __x) / __rd;
       return __ret;
     }
 
+  template<typename _RealT, size_t __digits, typename _URBG>
+    _RealT
+    generate_canonical(_URBG& __urng)
+    {
+      static_assert(requires {
+       [](uniform_random_bit_generator auto) {}(
+           static_cast<std::remove_cv_t<_URBG>&&>(__urng)); },
+       "argument must meet uniform_random_bit_generator requirements");
+      static_assert(
+       std::is_floating_point_v<_RealT> && requires { _RealT(_URBG::max()); },
+       "template argument must be floating point, coercible from unsigned");
+
+      // See if radix 2 and RNG range is a power of 2.
+      //   (Note, the latter check works even when (__range + 1) overflows.)
+      const size_t  __r = std::numeric_limits<_RealT>::radix;
+      const auto __range = _URBG::max() - _URBG::min();
+      if constexpr (__r == 2 && ((__range + 1) & __range) == 0)
+       return __generate_canonical_base_2<_RealT, __digits>(__urng);
+      else
+       return __generate_canonical_anyradix<_RealT, __digits>(__urng);
+    }
+
+#endif // C++26
+
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace
 
diff --git a/libstdc++-v3/include/std/random b/libstdc++-v3/include/std/random
index 0e058a04bd9..86d4f0a8380 100644
--- a/libstdc++-v3/include/std/random
+++ b/libstdc++-v3/include/std/random
@@ -49,6 +49,7 @@
 #include <type_traits>
 #include <bits/random.h>
 #include <bits/opt_random.h>
+#include <bits/uniform_int_dist.h>
 #include <bits/random.tcc>
 
 #endif // C++11
diff --git 
a/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/64351.cc
 
b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/64351.cc
index 3c0cb8bbd49..a5c8302f7b3 100644
--- 
a/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/64351.cc
+++ 
b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/64351.cc
@@ -21,21 +21,6 @@
 #include <random>
 #include <testsuite_hooks.h>
 
-// libstdc++/64351
-void
-test01()
-{
-  std::mt19937 rng(8890);
-  std::uniform_real_distribution<float> dist;
-
-  rng.discard(30e6);
-  for (long i = 0; i < 10e6; ++i)
-    {
-      auto n = dist(rng);
-      VERIFY( n != 1.f );
-    }
-}
-
 // libstdc++/63176
 void
 test02()
@@ -45,21 +30,21 @@ test02()
   rng.seed(sequence);
   rng.discard(12 * 629143);
   std::mt19937 rng2{rng};
+  int mismatch = 0;
   for (int i = 0; i < 20; ++i)
   {
-    float n =
-      std::generate_canonical<float, std::numeric_limits<float>::digits>(rng);
+    float n = std::generate_canonical<float, 100>(rng);
     VERIFY( n != 1.f );
 
-    // PR libstdc++/80137
     rng2.discard(1);
-    VERIFY( rng == rng2 );
   }
+  // PR libstdc++/80137
+  rng2.discard(1);  // account for a 1.0 generated and discarded.
+  VERIFY( rng == rng2 );
 }
 
 int
 main()
 {
-  test01();
   test02();
 }
diff --git 
a/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon.cc
 
b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon.cc
new file mode 100644
index 00000000000..8269444647a
--- /dev/null
+++ 
b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon.cc
@@ -0,0 +1,161 @@
+// { dg-do run { target { c++11 && { ! simulator } } } }
+
+#include <random>
+#include <limits>
+#include <type_traits>
+#include <cmath>
+#include <testsuite_hooks.h>
+#include <array>
+
+#if __cplusplus >= 202400
+struct real_radix4
+{
+  float f;
+  constexpr real_radix4() = default;
+
+  constexpr real_radix4(float g) : f(g) {}
+  constexpr explicit real_radix4(std::integral auto n) : f(n) {}
+  constexpr real_radix4 operator/(real_radix4 g) const
+    { return real_radix4(f / g.f); }
+  constexpr real_radix4 operator*(real_radix4 g) const
+    { return real_radix4(f * g.f); }
+  constexpr real_radix4 operator+(real_radix4 g) const
+    { return real_radix4(f + g.f); }
+
+  constexpr auto operator<=>(real_radix4 const& x) const = default;
+
+  constexpr real_radix4& operator+=(real_radix4 g) { f += g.f; return *this; }
+  constexpr real_radix4& operator*=(real_radix4 g) { f *= g.f; return *this; }
+  template <std::integral I>
+    constexpr operator I() const { return I(f); }
+};
+
+namespace std
+{
+  template <>
+    struct numeric_limits<real_radix4>
+    {
+      static const std::size_t digits = 12;
+      static const std::size_t radix = 4;
+      static const bool is_iec559 = false;
+    };
+  template <>
+    struct is_floating_point<real_radix4> : std::true_type {};
+
+    inline constexpr real_radix4
+    floor(real_radix4 f) { return  real_radix4(std::floor(f.f)); }
+}
+#endif
+
+// Verify P0952R9 implementation requiring a second round-trip
+// if first yields exactly 1. In this test, the RNG delivering
+// 32 bits per call is seeded such that this occurs once on the
+// sixth iteration for float, and not at all for double.
+// However, each double iteration requires two calls to the RNG.
+
+template <typename T, typename RNG>
+void
+test01(RNG& rng, RNG& rng2,
+  int& deviation, int& max, int& rms, int& zero, int& skips)
+{
+  const auto size = 1000000, buckets = 100;
+  std::array<int, buckets> histo{};
+  for (auto i = 0; i != size; ++i) {
+    T sample = std::generate_canonical<T, 100>(rng);
+    VERIFY(sample >= T(0.0));
+    VERIFY(sample < T(1.0));  // libstdc++/64351
+    if (sample == T(0.0)) {
+      ++zero;
+    }
+    auto bucket = static_cast<int>(std::floor(sample * buckets));
+    ++histo[bucket];
+    rng2.discard(1);
+    if (rng != rng2) {
+      ++skips;
+      rng2.discard(1);
+      VERIFY(rng == rng2);
+    }
+  }
+  int devsquare = 0;
+  for (int i = 0; i < buckets; ++i) {
+    const auto expected = size / buckets;
+    auto count = histo[i];
+    auto diff = count - expected;
+    if (diff < 0) diff = -diff;
+    deviation += diff;
+    devsquare += diff * diff;
+    if (diff > max) max = diff;
+  }
+  rms = std::sqrt(devsquare);
+}
+
+int
+main()
+{
+  std::mt19937 rng(8890);
+  std::seed_seq sequence{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+  rng.seed(sequence);
+  rng.discard(12 * 629143);
+
+  { // float
+    int deviation{}, max{}, rms{}, zero{}, skips{};
+    auto rng2{rng};
+    auto rng3{rng};
+    test01<float>(rng2, rng3, deviation, max, rms, zero, skips);
+
+    if (std::numeric_limits<float>::is_iec559) {
+      VERIFY(deviation == 7032);
+      VERIFY(max == 276);
+      VERIFY(rms == 906);
+      VERIFY(zero == 0);
+    }
+    VERIFY(skips == 1);
+  }
+  { // double
+    int deviation{}, max{}, rms{}, zero{}, skips{};
+    auto rng2{rng};
+    auto rng3{rng};
+    test01<double>(rng2, rng3, deviation, max, rms, zero, skips);
+
+    if (std::numeric_limits<double>::is_iec559) {
+      VERIFY(deviation == 7650);
+      VERIFY(max == 259);
+      VERIFY(rms == 975);
+      VERIFY(zero == 0);
+    }
+    VERIFY(skips == 1000000);
+  }
+  { // long double, same answers as double
+    int deviation{}, max{}, rms{}, zero{}, skips{};
+    auto rng2{rng};
+    auto rng3{rng};
+    test01<long double>(rng2, rng3, deviation, max, rms, zero, skips);
+
+    if (std::numeric_limits<double>::is_iec559) {
+      VERIFY(deviation == 7650);
+      VERIFY(max == 259);
+      VERIFY(rms == 975);
+      VERIFY(zero == 0);
+    }
+    VERIFY(skips == 1000000);
+  }
+
+#if __cplusplus >= 202400
+  { // local type, radix == 4
+    int deviation{}, max{}, rms{}, zero{}, skips{};
+    auto rng2{rng};
+    auto rng3{rng};
+    test01<real_radix4>(rng2, rng3, deviation, max, rms, zero, skips);
+
+    if constexpr (std::numeric_limits<float>::is_iec559)
+    {
+      VERIFY(deviation == 7032);
+      VERIFY(max == 276);
+      VERIFY(rms == 906);
+      VERIFY(zero == 0);
+    }
+    VERIFY(skips == 1);
+  }
+#endif
+  return 0;
+}
-- 
2.50.0

[PATCHv9] libstdc++: Add generate_canonical impl (P0952, LWG2524) [PR119739]

Reply via email to