If the span of the range R produced by uniform bit generator U passed to
generate_canonical is not power of two, we need to use algorithm that
requires computing power R^k that is greater than 2^d, where d is number
of digits in mantissa of _RealT. Previously we have used an integer type
that is has twice as many digits as d. However, in most situations R^k
fits with much smaller number.
This patch changes the algorithm for d < 64 to use uin32_t and uint64_t
if they are sufficient to hold R^k. We determine that by p, such that R^p
is greater than 2^32 and 2^64 respectively, and using given type if k < p.
The values of k and p are determined using __gen_can_ceil_log function,
that replaced __gen_can_rng_calls_needed function. We compute the logarithm
by dividing the value, instead of multiplying the base, to avoid overflow
when R^k does not fit in type used to represent value.
Furthermore we define simplified __generate_canonical_k1 function, that is
used if R >= r^d, i.e. k == 1. In such case we know we unsigned counterpart
of type produced by U, as it is guaranteed to fit R^k. For example when
RealT is float, and U generates long long.
Finally, for d >= 64, we either use __generate_canonical_k1 (ranges produces
128bit integers) or __generate_canonical_any using unsigned __int128. In
situation when more than 128bits are required to represent R^k (e.g. mantissa
is 104 bit, R requires 75bits), we still emit static_assert. However, the
error is no longer emitted unconditionally.
libstdc++-v3/ChangeLog:
* include/bits/random.tcc (__gen_can_rng_calls_needed):
Replace with __gen_can_ceil_log.
(__gen_can_ceil_log, __generate_canonical_k1): Define.
(__generate_canonical_any): Replace __gen_can_rng_calls_needed
with __gen_can_ceil_log.
---
Testing this currently, but posted to empahsize the case, where
we have d > 64, and log2(R) > 64 but log2(R) < d. In that k will be 2,
and R^k, would require more than 128 bits. If R > r^d, then k == 1.
libstdc++-v3/include/bits/random.tcc | 80 +++++++++++++++++++++++-----
1 file changed, 66 insertions(+), 14 deletions(-)
diff --git a/libstdc++-v3/include/bits/random.tcc
b/libstdc++-v3/include/bits/random.tcc
index e16eb9814a5..1d0c3495519 100644
--- a/libstdc++-v3/include/bits/random.tcc
+++ b/libstdc++-v3/include/bits/random.tcc
@@ -3565,13 +3565,16 @@ namespace __detail
return __prod;
}
- template <typename _UInt>
+ template <typename _UInt1, typename _UInt2>
constexpr unsigned
- __gen_can_rng_calls_needed(_UInt __R, _UInt __rd)
+ __gen_can_ceil_log(_UInt1 __val, _UInt2 __base)
{
unsigned __i = 1;
- for (auto __Ri = __R; __Ri < __rd; __Ri *= __R)
- ++__i;
+ while (__val > __base)
+ {
+ __val /= __base;
+ ++__i;
+ }
return __i;
}
@@ -3580,7 +3583,6 @@ namespace __detail
// type passed must be big enough to represent Rk, R^k, a power of R
// (the range of values produced by the rng) up to twice the length
// of the mantissa.
-
template<typename _RealT, typename _UInt, size_t __d, typename _Urbg>
_RealT
__generate_canonical_any(_Urbg& __urng)
@@ -3591,7 +3593,7 @@ namespace __detail
const _UInt __r = 2;
const _UInt __R = _UInt(_Urbg::max() - _Urbg::min()) + 1;
const _UInt __rd = _UInt(1) << __d;
- const unsigned __k = __gen_can_rng_calls_needed(__R, __rd);
+ const unsigned __k = __gen_can_ceil_log(__rd, __R);
const _UInt __Rk = __gen_can_pow(__R, __k);
const _UInt __x = __Rk/__rd;
@@ -3609,6 +3611,28 @@ namespace __detail
}
}
+ // This version must be used when the range of possible RNG results,
+ // Urbg::max()-Urbg::min(), is not a power of two less one, and
+ // it is greater than __rd, i.e. k == 1.
+ template<typename _RealT, typename _UInt, size_t __d, typename _Urbg>
+ _RealT
+ __generate_canonical_k1(_Urbg& __urng)
+ {
+ // This cannot overflow, as in such case Urbg::max()-Urbg::min(),
+ // would be power of two less.
+ constexpr _UInt __R = _UInt(_Urbg::max() - _Urbg::min()) + 1;
+ constexpr _UInt __rd = _UInt(1) << __d;
+ constexpr _UInt __x = __R / __rd;
+
+ while (true)
+ {
+ const _RealT __ret = _RealT((__urng() - _Urbg::min()) / __x) / __rd;
+ if (__ret < _RealT(1.0))
+ return __ret;
+ }
+ }
+
+
#if !defined(_GLIBCXX_GENERATE_CANONICAL_STRICT)
template <typename _Tp>
const bool __is_rand_dist_float_v = is_floating_point<_Tp>::value;
@@ -3659,7 +3683,6 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
"float16_t type is not supported, consider using bfloat16_t");
#endif
- using _Rng = decltype(_Urbg::max());
const unsigned __d_max = std::numeric_limits<_RealT>::digits;
const unsigned __d = __digits > __d_max ? __d_max : __digits;
@@ -3675,23 +3698,30 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
{
#if defined(__SIZEOF_INT128__)
// Accommodate double double or float128.
- return __extension__ __generate_canonical_pow2<
- _RealT, unsigned __int128, __d>(__urng);
+ return __generate_canonical_pow2<_RealT, __uint128_t,
__d>(__urng);
#else
static_assert(false,
"float precision >64 requires __int128 support");
#endif
}
}
- else // Need up to 2x bits.
- {
- if constexpr (__d <= 32)
+ else if constexpr (__d < 64)
+ {
+ using _UIntR = typename make_unsigned<decltype(_Urbg::max())>::type;
+ // This cannot overflow, as _Urbg::max() - _Urbg::min() is not power
+ // of two less one.
+ constexpr _UIntR __R = _UIntR(_Urbg::max() - _Urbg::min()) + 1;
+ constexpr uint64_t __rd = uint64_t(1) << __d;
+ constexpr unsigned __k = __gen_can_ceil_log(__rd, __R);
+ if constexpr (__k == 1)
+ return __generate_canonical_k1<_RealT, _UIntR, __d>(__urng);
+ if constexpr (__k < __gen_can_ceil_log(uint32_t(-1), __R))
+ return __generate_canonical_any<_RealT, uint32_t, __d>(__urng);
+ else if constexpr (__k < __gen_can_ceil_log(uint64_t(-1), __R))
return __generate_canonical_any<_RealT, uint64_t, __d>(__urng);
else
{
#if defined(__SIZEOF_INT128__)
- static_assert(__d <= 64,
- "irregular RNG with float precision >64 is not supported");
return __extension__ __generate_canonical_any<
_RealT, unsigned __int128, __d>(__urng);
#else
@@ -3700,6 +3730,28 @@ _GLIBCXX_BEGIN_INLINE_ABI_NAMESPACE(_V2)
#endif
}
}
+ else
+ {
+#if defined(__SIZEOF_INT128__)
+ __extension__ using __uint128_t = unsigned __int128;
+ using _UIntR = typename make_unsigned<decltype(_Urbg::max())>::type;
+ // This cannot overflow, as _Urbg::max() - _Urbg::min() is not power
+ // of two less one.
+ constexpr _UIntR __R = _UIntR(_Urbg::max() - _Urbg::min()) + 1;
+ constexpr __uint128_t __rd = __uint128_t(1) << __d;
+ constexpr unsigned __k = __gen_can_ceil_log(__rd, __R);
+ if constexpr (__k == 1)
+ return __generate_canonical_k1<_RealT, _UIntR, __d>(__urng);
+ else if constexpr (__k < __gen_can_ceil_log(__uint128_t(-1), __R))
+ return __generate_canonical_any<_RealT, __uint128_t, __d>(__urng);
+ else
+ static_assert(false, "irregular RNG with __int128 output "
+ "and float precision >=64 are not supported");
This seem to require 256 bits, until I got it toally wrong.
+#else
+ static_assert(false, "irregular RNG with float precision"
+ " >=64 requires __int128 support");
+#endif
+ }
}
_GLIBCXX_END_INLINE_ABI_NAMESPACE(_V2)
--
2.52.0