Hi, in order to drop frequencies from basic blocks and counts from edges I need to make probabilities more precise (so we do not get all those roundoff errors from 10000-base fixpoint arithmetics). Increasing base is easy now, but it means that in temporaries one can get overflows easily.
I need to compute (a*b)/c in a overflow safe way when a*b does not necessarily need to fit into 64bit integer. The following implement safe_scale_64bit for it but I am not quite sure if that is most reasonable implementation. (it uses builtins to detect overflows, inline 64bit computation if it is safe and 128bit computation if it is not). Any ideas for better version? If not I will go ahead with this variant and increase profile probability base. Honza * profile-count.h (slow_safe_scale_64bit): New function. (safe_scale_64bit): New inline. (profile_count::max_safe_multiplier): Remove; use safe_scale_64bit. Index: profile-count.h =================================================================== --- profile-count.h (revision 253512) +++ profile-count.h (working copy) @@ -43,6 +43,35 @@ enum profile_quality { #define RDIV(X,Y) (((X) + (Y) / 2) / (Y)) +bool slow_safe_scale_64bit (uint64_t a, uint64_t b, uint64_t c, uint64_t *res); + +/* Compute RES=(a*b + c/2)/c capping and return false if overflow happened. */ + +inline bool +safe_scale_64bit (uint64_t a, uint64_t b, uint64_t c, uint64_t *res) +{ +#if (GCC_VERSION >= 5000) + uint64_t tmp; + if (!__builtin_mul_overflow (a, b, &tmp) + && !__builtin_add_overflow (tmp, c/2, &tmp)) + { + *res = tmp / c; + return true; + } + if (c == 1) + { + *res = (uint64_t) -1; + return false; + } +#else + if (a < ((uint64_t)1 << 31) + && b < ((uint64_t)1 << 31) + && c < ((uint64_t)1 << 31)) + return (a * b + (c / 2)) / c; +#endif + return slow_safe_scale_64bit (a, b, c, res); +} + /* Data type to hold probabilities. It implements fixed point arithmetics with capping so probability is always in range [0,1] and scaling requiring values greater than 1 needs to be represented otherwise. @@ -87,7 +116,8 @@ class GTY((user)) profile_probability static const int n_bits = 30; static const uint32_t max_probability = REG_BR_PROB_BASE; - static const uint32_t uninitialized_probability = ((uint32_t) 1 << n_bits) - 1; + static const uint32_t uninitialized_probability + = ((uint32_t) 1 << (n_bits - 1)) - 1; uint32_t m_val : 30; enum profile_quality m_quality : 2; @@ -535,11 +565,6 @@ class GTY(()) profile_count uint64_t m_val : n_bits; enum profile_quality m_quality : 2; - - /* Assume numbers smaller than this to multiply. This is set to make - testsuite pass, in future we may implement precise multiplication in higer - rangers. */ - static const uint64_t max_safe_multiplier = 131072; public: /* Used for counters which are expected to be never executed. */ @@ -790,12 +815,9 @@ public: return *this; profile_count ret; - /* Take care for overflows! */ - if (num.m_val < max_safe_multiplier || m_val < max_safe_multiplier) - ret.m_val = RDIV (m_val * num.m_val, den.m_val); - else - ret.m_val = RDIV (m_val * RDIV (num.m_val * max_safe_multiplier, - den.m_val), max_safe_multiplier); + uint64_t val; + safe_scale_64bit (m_val, num.m_val, den.m_val, &val); + ret.m_val = MIN (val, max_count); ret.m_quality = MIN (m_quality, profile_adjusted); return ret; } Index: profile-count.c =================================================================== --- profile-count.c (revision 253512) +++ profile-count.c (working copy) @@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. #include "gimple.h" #include "data-streamer.h" #include "cgraph.h" +#include "wide-int.h" /* Dump THIS to F. */ @@ -194,3 +195,21 @@ profile_probability::stream_out (struct streamer_write_uhwi_stream (ob, m_val); streamer_write_uhwi_stream (ob, m_quality); } + +/* Compute RES=(a*b + c/2)/c capping and return false if overflow happened. */ + +bool +slow_safe_scale_64bit (uint64_t a, uint64_t b, uint64_t c, uint64_t *res) +{ + FIXED_WIDE_INT (128) tmp = a; + bool overflow; + tmp = wi::udiv_floor (wi::umul (tmp, b, &overflow) + (c / 2), c); + gcc_checking_assert (!overflow); + if (wi::fits_uhwi_p (tmp)) + { + *res = tmp.to_uhwi (); + return true; + } + *res = (uint64_t) -1; + return false; +}