Changes in v11: * Add doxygen entry for generate_canonical. Changes in v10: * Rewrite entirely after consultation with P0952 authors. * Require radix2 for all float types. * Perform all intermediate calculations on integers. * Use one implementation for all C++11 to C++26. * Optimize for each digits resolution. * Test also with irregular URBG returning 0..999999. * Work under Clang, for -std=c++14 and up. * By default and where possible (which is usually), preserve more entropy than the naive Standard prescription.
Changes in v9: * Split out implementations from public pre-26 and 26 APIs. * Add remaining support for non-radix2 float types in C++26. * Handle pow() and floor() compatibly with non-built-in types, and Clang. * Add testing for a non-radix2 float type. * Iron out gratuitous differences between radix2 and p0952 impls. Changes in v8: * Apply LWG 2524 resolution to C++11..C++23 implementations, too. * In those, eliminate std::log called twice per entry, and do all setup calculations at compile time. * "if constexpr" the C++26 version to call the slightly-faster older version when conditions permit: actually almost always. Change in v7: * Fix local consteval floor() meant for Clang. Changes in v6: * Implement also for Clang, using local consteval substitutes for its non-constexpr __builtin_pow and __builtin_floor. * Add a test for long double. Changes in v5: * Static-assert movable RNG object correctly. * Add a more comprehensive test gencanon.cc Changes in v4: * Static-assert arg is floating-point, coercible from bigger unsigned. * Static-assert arg satisfies uniform_random_bit_generator, movable. * Include uniform_int_dist.h for concept uniform_random_bit_generator * Coerce floating consts from unsigned literals, matching other usage. * Remove redundant type decorations static and -> . * Use __builtin_floor for broader applicability than std::floor. * Use __builtin_pow in place of immediate lambda. * Rename, rearrange local objects for clarity. * Add redundant "if constexpr (k > 1)" for clarity. * #if-guard new impl against Clang, which lacks constexpr floor() etc. Changes in v3: * Implement exactly as specified in the WP, for C++26 and up. Implement P0952R2 "A new specification for std::generate_canonical". It has us start over, using new entropy, if the naïve generation of a value in 0..1 comes up not-less-than 1, which occurs too rarely for the change to measurably affect performance. The fix is extended to all of C++11 to 26. It dispatches to variations optimized for each bit depth, using minimal integer sizes for exact intermediate calculations, and prefers shift operations over multiply/divide where practical. This patch adds thorough tests for statistical properties. It adds new static asserts on template argument requirements where supported. It adds a test using a non-optimal RNG that yields values 0..999999. A test for the case addressed in P0952 already appears in 64351.cc. This change amounts to a different resolution for PR64351 and LWG 2524. libstdc++-v3/ChangeLog: PR libstdc++/119739 * include/bits/random.tcc: Add generate_canonical impl for C++26. * include/std/random: Include uniform_int_dist.h for concept. * testsuite/26_numerics/random/uniform_real_distribution/operators/64351.cc: Adapt test for both pre- and post- C++26. * testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon.cc: Test for float and double from 32-bit RNG, including 1.0 do-over. --- libstdc++-v3/include/bits/random.tcc | 243 +++++++++++++++--- libstdc++-v3/include/std/random | 1 + .../operators/64351.cc | 25 +- .../operators/gencanon.cc | 165 ++++++++++++ 4 files changed, 382 insertions(+), 52 deletions(-) create mode 100644 libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon.cc diff --git a/libstdc++-v3/include/bits/random.tcc b/libstdc++-v3/include/bits/random.tcc index 53ccacb2e38..ff2577e1141 100644 --- a/libstdc++-v3/include/bits/random.tcc +++ b/libstdc++-v3/include/bits/random.tcc @@ -31,6 +31,7 @@ #define _RANDOM_TCC 1 #include <numeric> // std::accumulate and std::partial_sum +#include <cstdint> namespace std _GLIBCXX_VISIBILITY(default) { @@ -3349,43 +3350,221 @@ namespace __detail } } - template<typename _RealType, size_t __bits, - typename _UniformRandomNumberGenerator> - _RealType - generate_canonical(_UniformRandomNumberGenerator& __urng) - { - static_assert(std::is_floating_point<_RealType>::value, - "template argument must be a floating point type"); - - const size_t __b - = std::min(static_cast<size_t>(std::numeric_limits<_RealType>::digits), - __bits); - const long double __r = static_cast<long double>(__urng.max()) - - static_cast<long double>(__urng.min()) + 1.0L; - const size_t __log2r = std::log(__r) / std::log(2.0L); - const size_t __m = std::max<size_t>(1UL, - (__b + __log2r - 1UL) / __log2r); - _RealType __ret; - _RealType __sum = _RealType(0); - _RealType __tmp = _RealType(1); - for (size_t __k = __m; __k != 0; --__k) - { - __sum += _RealType(__urng() - __urng.min()) * __tmp; - __tmp *= __r; - } - __ret = __sum / __tmp; - if (__builtin_expect(__ret >= _RealType(1), 0)) - { -#if _GLIBCXX_USE_C99_MATH_FUNCS - __ret = std::nextafter(_RealType(1), _RealType(0)); +// [rand.util.canonical] +// generate_canonical(RNG&) + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr + + constexpr unsigned + __gen_con_popcount(unsigned i) { return __builtin_popcount(i); } + constexpr unsigned + __gen_con_popcount(unsigned long i) { return __builtin_popcountl(i); } + constexpr unsigned + __gen_con_popcount(unsigned long long i) { return __builtin_popcountll(i); } + constexpr unsigned + __gen_con_popcount(unsigned __int128 i) + { + return __gen_con_popcount(uint64_t(i)) + + __gen_con_popcount(uint64_t(i >> 64)); + } + + // Call this version when _URBG::max()-_URBG::min()+1 is a power of + // two, the norm in real programs. It works by calling urng() as many + // times as needed to fill the target mantissa, accumulating entropy + // into an integer value, converting that to the float type, and then + // dividing by the range of the integer value (a power of 2, so just + // adjusts the exponent) to produce a result in [0..1]. In case of an + // exact 1.0 result, we will re-try. + // + // It needs to work when the integer type used is only as big as the + // float mantissa, such as uint64_t for long double. So, commented-out + // lines below represent computations the Standard prescribes, but + // cannot be performed as described, or are not used. + // + // When the result is close to zero, the naive calculation would leave + // low-order zeroes in the mantissa; where spare entropy has been + // extracted, as is usual for float and double, some or all of the + // spare entropy can be pulled into the result for better randomness. + // + // When two or more calls to urng() yield more bits of entropy than fit + // into _Int, we end up discarding some of it by overflowing, which is + // OK. When converting the integer representation of the sample to the + // float value, we must divide out the truncated size. + + template<typename _RealT, typename _Int, size_t __d, typename _URBG> + _RealT + __generate_canonical_ok_rng(_URBG& __urng) + { + // Names below are chosen to match the description in the Standard. + // Parameter __d is the actual target number of digits. + // const _Int __r = 2; + const auto __rng_range = _URBG::max() - _URBG::min(); + const auto __int_range = ~_Int(0); + // const _Int __R = _Int(__rng_range) + 1; // may wrap to 0 + const auto __log2_R = __gen_con_popcount(__rng_range); + const auto __log2_int_max = __gen_con_popcount(__int_range); + // const _Int __rd = _Int(1) << __d; // could overflow, UB + const unsigned __k = (__d + __log2_R - 1) / __log2_R; + const unsigned __log2_Rk_max = __k * __log2_R; + const unsigned __log2_Rk = // bits of entropy actually obtained + __log2_int_max < __log2_Rk_max ? __log2_int_max : __log2_Rk_max; + static_assert(__log2_Rk >= __d); + // const _Int __Rk = _Int(1) << __log2_Rk; // likely overflows, UB + constexpr _RealT __Rk = _RealT(_Int(1) << (__log2_Rk - 1)) * 2.0; +#if defined(__STRICT_ANSI__) + const unsigned __log2_x = __log2_Rk - __d; // # of spare entropy bits. + const _Int __x = _Int(1) << __log2_x; + constexpr _RealT __rd = __Rk / __x; +#endif + // const _Int __xrd = __x << __d; // could overflow. + + while (true) + { + _Int __sum; + unsigned __shift = 0; + __sum = _Int(__urng() - _URBG::min()); + for (int __i = __k - 1; __i > 0; --__i) + __shift += __log2_R, + __sum |= _Int(__urng() - _URBG::min()) << __shift; +#if defined(_STRICT_ANSI__) + const _RealT __ret = _RealT(__sum >> __log2_x) / __rd; +#else + const _RealT __ret = _RealT(__sum) / __Rk; +#endif + if (__ret < _RealT(1.0)) return __ret; + } + } + + template <typename _Int> + constexpr _Int + __gen_con_pow(_Int __base, size_t __exponent) + { + _Int __prod = __base; + while (--__exponent != 0) __prod *= __base; + return __prod; + } + + template <typename _Int> + constexpr unsigned + __gen_con_rng_calls_needed(_Int __R, _Int __rd) + { + unsigned __i = 1; + for (auto __Ri = __R; __Ri < __rd; __Ri *= __R) + ++__i; + return __i; + } + + // This version must be used when the range of possible RNG + // results, _URBG::max()-_URBG::min()+1, is not a power of two. + // The Int type passed must be big enough to represent Rk, R^k, + // a power of R (the range of values produced by the rng) up to + // twice the length of the mantissa. + + template<typename _RealT, typename _Int, size_t __d, typename _URBG> + _RealT + __generate_canonical_ill_rng(_URBG& __urng) + { + // Names below are chosen to match the description in the Standard. + // Parameter __d is the actual target number of digits. + const _Int __r = 2; + const _Int __R = _Int(_URBG::max() - _URBG::min()) + 1; + const _Int __rd = __gen_con_pow(__r, __d); + const unsigned __k = __gen_con_rng_calls_needed(__R, __rd); + const _Int __Rk = __gen_con_pow(__R, __k); + const _Int __x = __Rk/__rd; + + while (true) + { + _Int __Ri = 1, __sum = __urng() - _URBG::min(); + for (int __i = __k - 1; __i > 0; --__i) + __Ri *= __R, __sum += (__urng() - _URBG::min()) * __Ri; + const _RealT __ret = _RealT(__sum / __x) / __rd; + if (__ret < _RealT(1.0)) return __ret; + } + } + +#if !defined(_STRICT_ANSI__) + template <typename _Tp> struct __gen_con_is_float + { static const bool __value = is_floating_point<_Tp>::value; }; #else - __ret = _RealType(1) - - std::numeric_limits<_RealType>::epsilon() / _RealType(2); + template <typename _Tp> struct __gen_con_is_float + { static const bool __value = false; }; + template <> struct __gen_con_is_float<float> + { static const bool __value = true; }; + template <> struct __gen_con_is_float<double> + { static const bool __value = true; }; + template <> struct __gen_con_is_float<long double> + { static const bool __value = true; }; #endif + + /** Produce a random floating-point value in the range [0..1) + * + * The result of `std::generate_canonical<RealT,digits>(rng)` is a + * random floating-point value of type `RealT` in the range [0..1), + * using entropy provided by the uniform random bit generator `rng`. + * A non-negative value for `digits` may be used to limit the + * precision of the result to so many bits, but commonly -1 is + * passed to get the native precision of `RealT`. As many `rng()` + * calls are made as needed to obtain the required entropy. On rare + * occasions one or more additional `rng()` calls are used. It is + * fastest when the value of `rng.max()` is a power of two less one, + * such as from a `std::mt19937` (for `float`) or `mt19937_64 (for + * `double`). + * + * @since C++11 + */ + template<typename _RealT, size_t __digits, typename _URBG> + _RealT + generate_canonical(_URBG& __urng) + { +#if __cpp_lib_concepts >= 201907 + static_assert(requires { [](uniform_random_bit_generator auto) {}( + static_cast<std::remove_cv_t<_URBG>&&>(__urng)); }, + "argument must meet uniform_random_bit_generator requirements"); +#endif +#if __cpp_lib_concepts >= 201806 + static_assert(requires { _RealT(_URBG::max()); }, + "template float argument must be coercible from unsigned"); +#endif + static_assert(__gen_con_is_float<_RealT>::__value, + "template argument must be floating point"); + static_assert(__digits != 0 && _URBG::max() > _URBG::min(), + "random samples with 0 digits are not meaningful"); + static_assert(std::numeric_limits<_RealT>::radix == 2, + "only base-2 floats are supported"); + + const unsigned __d_max = std::numeric_limits<_RealT>::digits; + const unsigned __d = __digits > __d_max ? __d_max : __digits; + const auto __range = _URBG::max() - _URBG::min(); + + // If the RNG's range + 1 is a power of 2, the float type's mantissa + // is enough bits. If not, you need more. + if constexpr (((__range + 1) & __range) == 0) + // (Note, the range check works even when (__range + 1) overflows.) + { + if constexpr (__d <= 32) + return __generate_canonical_ok_rng<_RealT, uint32_t, __d>(__urng); + else // accommodate double double or float128 + return __generate_canonical_ok_rng< + _RealT, unsigned __int128, __d>(__urng); + } + else // need 2x bits + { + if constexpr (__d <= 32) + return __generate_canonical_ill_rng<_RealT, uint64_t, __d>(__urng); + else + { + static_assert(__d <= 64, + "irregular RNG with float precision > 64 is not supported"); + return __generate_canonical_ill_rng< + _RealT, unsigned __int128, __d>(__urng); } - return __ret; + } } +#pragma GCC diagnostic pop + _GLIBCXX_END_NAMESPACE_VERSION } // namespace diff --git a/libstdc++-v3/include/std/random b/libstdc++-v3/include/std/random index 0e058a04bd9..86d4f0a8380 100644 --- a/libstdc++-v3/include/std/random +++ b/libstdc++-v3/include/std/random @@ -49,6 +49,7 @@ #include <type_traits> #include <bits/random.h> #include <bits/opt_random.h> +#include <bits/uniform_int_dist.h> #include <bits/random.tcc> #endif // C++11 diff --git a/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/64351.cc b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/64351.cc index 3c0cb8bbd49..a5c8302f7b3 100644 --- a/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/64351.cc +++ b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/64351.cc @@ -21,21 +21,6 @@ #include <random> #include <testsuite_hooks.h> -// libstdc++/64351 -void -test01() -{ - std::mt19937 rng(8890); - std::uniform_real_distribution<float> dist; - - rng.discard(30e6); - for (long i = 0; i < 10e6; ++i) - { - auto n = dist(rng); - VERIFY( n != 1.f ); - } -} - // libstdc++/63176 void test02() @@ -45,21 +30,21 @@ test02() rng.seed(sequence); rng.discard(12 * 629143); std::mt19937 rng2{rng}; + int mismatch = 0; for (int i = 0; i < 20; ++i) { - float n = - std::generate_canonical<float, std::numeric_limits<float>::digits>(rng); + float n = std::generate_canonical<float, 100>(rng); VERIFY( n != 1.f ); - // PR libstdc++/80137 rng2.discard(1); - VERIFY( rng == rng2 ); } + // PR libstdc++/80137 + rng2.discard(1); // account for a 1.0 generated and discarded. + VERIFY( rng == rng2 ); } int main() { - test01(); test02(); } diff --git a/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon.cc b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon.cc new file mode 100644 index 00000000000..041a4466c90 --- /dev/null +++ b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon.cc @@ -0,0 +1,165 @@ +// { dg-do run { target { c++11 && { ! simulator } } } } + +#include <random> +#include <limits> +#include <type_traits> +#include <cmath> +#include <testsuite_hooks.h> +#include <array> + +struct local_rng : std::mt19937 +{ + static constexpr std::uint64_t min() { return 0; } + static constexpr std::uint64_t max() { return 999999; } + std::uint64_t operator()() + { return static_cast<std::mt19937&>(*this)() % (max() + 1); } + local_rng(std::mt19937 const& arg) : std::mt19937(arg) {} +}; + +// Verify P0952R9 implementation requiring a second round-trip +// if first yields exactly 1. In this test, the RNG delivering +// 32 bits per call is seeded such that this occurs once on the +// sixth iteration for float, and not at all for double. +// However, each double iteration requires two calls to the RNG. + +template <typename T, typename RNG> +void +test01(RNG& rng, RNG& rng2, + int& deviation, int& max, int& rms, int& zero, int& skips) +{ + const auto size = 1000000, buckets = 100; + std::array<int, buckets> histo{}; + for (auto i = 0; i != size; ++i) { + T sample = std::generate_canonical<T, 100>(rng); + VERIFY(sample >= T(0.0)); + VERIFY(sample < T(1.0)); // libstdc++/64351 + if (sample == T(0.0)) { + ++zero; + } + auto bucket = static_cast<int>(std::floor(sample * buckets)); + ++histo[bucket]; + rng2.discard(1); + if (rng != rng2) { + ++skips; + rng2.discard(1); + VERIFY(rng == rng2); + } + } + int devsquare = 0; + for (int i = 0; i < buckets; ++i) { + const auto expected = size / buckets; + auto count = histo[i]; + auto diff = count - expected; + if (diff < 0) diff = -diff; + deviation += diff; + devsquare += diff * diff; + if (diff > max) max = diff; + } + rms = std::sqrt(devsquare); +} + +// This one is for use with local_rng +template <typename T, typename RNG> +void +test02(RNG& rng, RNG& rng2, + int& deviation, int& max, int& rms, int& zero, int& skips) +{ + const auto size = 1000000, buckets = 100; + std::array<int, buckets> histo{}; + for (auto i = 0; i != size; ++i) { + T sample = std::generate_canonical<T, 100>(rng); + VERIFY(sample >= T(0.0)); + VERIFY(sample < T(1.0)); // libstdc++/64351 + if (sample == T(0.0)) { + ++zero; + } + auto bucket = static_cast<int>(std::floor(sample * buckets)); + ++histo[bucket]; + rng2.discard(2); + if (rng != rng2) { + ++skips; + rng2.discard(2); + VERIFY(rng == rng2); + } + } + int devsquare = 0; + for (int i = 0; i < buckets; ++i) { + const auto expected = size / buckets; + auto count = histo[i]; + auto diff = count - expected; + if (diff < 0) diff = -diff; + deviation += diff; + devsquare += diff * diff; + if (diff > max) max = diff; + } + rms = std::sqrt(devsquare); +} + +int +main() +{ + std::mt19937 rng(8890); + std::seed_seq sequence{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + rng.seed(sequence); + rng.discard(12 * 629143); + + { // float + int deviation{}, max{}, rms{}, zero{}, skips{}; + auto rng2{rng}; + auto rng3{rng}; + test01<float>(rng2, rng3, deviation, max, rms, zero, skips); + + if (std::numeric_limits<float>::is_iec559) { + VERIFY(deviation == 7032); + VERIFY(max == 276); + VERIFY(rms == 906); + VERIFY(zero == 0); + } + VERIFY(skips == 1); + } + { // double + int deviation{}, max{}, rms{}, zero{}, skips{}; + auto rng2{rng}; + auto rng3{rng}; + test01<double>(rng2, rng3, deviation, max, rms, zero, skips); + + if (std::numeric_limits<double>::is_iec559) { + VERIFY(deviation == 7650); + VERIFY(max == 259); + VERIFY(rms == 975); + VERIFY(zero == 0); + } + VERIFY(skips == 1000000); + } + { // long double, same answers as double + int deviation{}, max{}, rms{}, zero{}, skips{}; + auto rng2{rng}; + auto rng3{rng}; + test01<long double>(rng2, rng3, deviation, max, rms, zero, skips); + + if (std::numeric_limits<double>::is_iec559) { + VERIFY(deviation == 7650); + VERIFY(max == 259); + VERIFY(rms == 975); + VERIFY(zero == 0); + } + VERIFY(skips == 1000000); + } + + { // local RNG, returns [0..1000000) + int deviation{}, max{}, rms{}, zero{}, skips{}; + local_rng rng2{rng}; + local_rng rng3{rng}; + test02<float>(rng2, rng3, deviation, max, rms, zero, skips); + + if (std::numeric_limits<float>::is_iec559) + { + VERIFY(deviation == 8146); + VERIFY(max == 250); + VERIFY(rms == 1021); + VERIFY(zero == 0); + } + VERIFY(skips == 18); + } + return 0; +} -- 2.50.1