This is an automated email from the ASF dual-hosted git repository. domino pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git
commit bd54e013f8c2cba95bcfc5cc05fb73cdbfa91cbe Author: Domino Valdano <[email protected]> AuthorDate: Fri Jan 22 12:28:34 2021 -0800 sample: Use bernoulli_distribution from boost::, not stl:: or boost::TR1 The TR1 sub namespace no longer exists in recent versions of boost. Previously, this was fixed by using the C++11 STL version if it's available, otherwise falling back on TR1. But that didn't work due to a slight incompatibility between the C++11 and the boost version. The boost version still exists, just not under TR1--and we already use it elsewhere in the codebase, so this should be the cleanest fix. Also fix memory-corruption crash in std::domain_error(), by copying formatted string to memory allocated safely via postgres with palloc() Also improve some non-critical C++ issues: - Convert ptr_fun() to function() to get rid of C++11 deprecation warning - src/dbal/Reference_proto.hpp: Fix an error in the template definition of class Ref. The default value for IsMutable should be the value true or false, not the type bool. The only reason this wasn't generating compiler errors is that IsMutable happens to be passed explicitly in every place in the code where the template is instantiated--so the default was ignored. - Hide global boost symbols from external libraries We were already doing this for all STL symbols--for the same reasons, doing it for boost as well should make MADlib more robust when integrating with 3rd party libraries. --- src/dbal/BoostIntegration/MathToolkit_impl.hpp | 26 ++++++++++++++-------- src/dbal/Reference_proto.hpp | 2 +- src/library.ver | 2 +- src/modules/lda/lda.cpp | 6 ++--- src/modules/recursive_partitioning/DT_impl.hpp | 2 +- .../recursive_partitioning/feature_encoding.cpp | 2 +- src/modules/sample/WeightedSample_impl.hpp | 14 ++---------- src/modules/stats/wilcoxon_signed_rank_test.cpp | 10 ++++++--- .../NativeRandomNumberGenerator_proto.hpp | 3 +-- 9 files changed, 34 insertions(+), 33 deletions(-) diff --git a/src/dbal/BoostIntegration/MathToolkit_impl.hpp b/src/dbal/BoostIntegration/MathToolkit_impl.hpp index a83b421..2b93c8c 100644 --- a/src/dbal/BoostIntegration/MathToolkit_impl.hpp +++ b/src/dbal/BoostIntegration/MathToolkit_impl.hpp @@ -49,22 +49,30 @@ user_domain_error(const char*, const char* inMessage, const T& inVal) { if (std::isnan(inVal)) return std::numeric_limits<double>::quiet_NaN(); - // The following line is taken from - // http://www.boost.org/doc/libs/1_49_0/libs/math/doc/sf_and_dist/html/math_toolkit/policy/pol_tutorial/user_def_err_pol.html +#if _GLIBCXX_USE_CXX11_ABI + int prec = std::numeric_limits<T>::max_digits10; +#else + // For older C++ standards, max_digits10 was not available so we have to convert manually + // http://www.boost.org/doc/libs/1_49_0/libs/math/doc/sf_and_dist/html/math_toolkit/policy/pol_tutorial/user_def_err_pol.html int prec = 2 + (std::numeric_limits<T>::digits * 30103UL) / 100000UL; +#endif // _GLIBCXX_USE_CXX11_ABI - std::string msg = (boost::format(inMessage) - % boost::io::group(std::setprecision(prec), inVal) - ).str(); + throw std::domain_error(inMessage); + std:: string *msg = new std::string( + (boost::format(inMessage) + % boost::io::group(std::setprecision(prec), inVal) + ).str() + ); + // Some Boost error messages contain a space before the punctuation mark, // which we will remove. - std::string::iterator lastChar = msg.end() - 1; - std::string::iterator secondLastChar = msg.end() - 2; + std::string::iterator lastChar = msg->end() - 1; + std::string::iterator secondLastChar = msg->end() - 2; if (std::ispunct(*lastChar) && std::isspace(*secondLastChar)) - msg.erase(secondLastChar, lastChar); + msg->erase(secondLastChar, lastChar); - throw std::domain_error(msg); + throw std::domain_error(*msg); } } // namespace policies diff --git a/src/dbal/Reference_proto.hpp b/src/dbal/Reference_proto.hpp index be3ffaa..550b7db 100644 --- a/src/dbal/Reference_proto.hpp +++ b/src/dbal/Reference_proto.hpp @@ -18,7 +18,7 @@ namespace dbal { * @tparam IsMutable Whether the target value is mutable. Note that \c IsMutable * overrides any const-qualifier that \c T may contain. */ -template <typename T, bool IsMutable = boost::is_const<T>::value_type> +template <typename T, bool IsMutable = boost::is_const<T>::value> class Ref { public: typedef const T val_type; diff --git a/src/library.ver b/src/library.ver index 81021d1..680928d 100644 --- a/src/library.ver +++ b/src/library.ver @@ -36,6 +36,6 @@ VERS_1.0 { # hiding those in namespace std. # std::*; - + boost::*; }; }; diff --git a/src/modules/lda/lda.cpp b/src/modules/lda/lda.cpp index 6d61b43..2cd7c45 100644 --- a/src/modules/lda/lda.cpp +++ b/src/modules/lda/lda.cpp @@ -210,9 +210,9 @@ AnyType lda_gibbs_sample::run(AnyType & args) if (!args.getUserFuncContext()) { ArrayHandle<int64_t> model64 = args[3].getAs<ArrayHandle<int64_t> >(); if (model64.size() != model64_size) { - std::stringstream ss; - ss << "invalid dimension: model64.size() = " << model64.size(); - throw std::invalid_argument(ss.str()); + std::stringstream err_msg; + err_msg << "invalid dimension: model64.size() = " << model64.size(); + throw std::invalid_argument(err_msg.str()); } if (__min(model64) < 0) { throw std::invalid_argument("invalid topic counts in model"); diff --git a/src/modules/recursive_partitioning/DT_impl.hpp b/src/modules/recursive_partitioning/DT_impl.hpp index 8f36173..e86d476 100644 --- a/src/modules/recursive_partitioning/DT_impl.hpp +++ b/src/modules/recursive_partitioning/DT_impl.hpp @@ -385,7 +385,7 @@ DecisionTree<Container>::impurity(const ColumnVector &stats) const { if (impurity_type == GINI){ return 1 - proportions.cwiseProduct(proportions).sum(); } else if (impurity_type == ENTROPY){ - return proportions.unaryExpr(std::ptr_fun(computeEntropy)).sum(); + return proportions.unaryExpr(std::function<double(const double&)>(computeEntropy)).sum(); } else if (impurity_type == MISCLASS){ return 1 - proportions.maxCoeff(); } else diff --git a/src/modules/recursive_partitioning/feature_encoding.cpp b/src/modules/recursive_partitioning/feature_encoding.cpp index a5f131b..7d0083a 100644 --- a/src/modules/recursive_partitioning/feature_encoding.cpp +++ b/src/modules/recursive_partitioning/feature_encoding.cpp @@ -164,7 +164,7 @@ dst_compute_entropy_final::run(AnyType &args){ ColumnVector probs = state.cast<double>() / sum_of_dep_counts; // usage of unaryExpr with functor: // http://eigen.tuxfamily.org/dox/classEigen_1_1MatrixBase.html#a23fc4bf97168dee2516f85edcfd4cfe7 - return -(probs.unaryExpr(std::ptr_fun(p_log2_p))).sum(); + return -(probs.unaryExpr(std::function<double(const double&)>(p_log2_p))).sum(); } // ------------------------------------------------------------ diff --git a/src/modules/sample/WeightedSample_impl.hpp b/src/modules/sample/WeightedSample_impl.hpp index 7137b5b..0f6ae87 100644 --- a/src/modules/sample/WeightedSample_impl.hpp +++ b/src/modules/sample/WeightedSample_impl.hpp @@ -7,17 +7,7 @@ #ifndef MADLIB_MODULES_SAMPLE_WEIGHTED_SAMPLE_IMPL_HPP #define MADLIB_MODULES_SAMPLE_WEIGHTED_SAMPLE_IMPL_HPP -#if _GLIBCXX_USE_CXX11_ABI -#include <random> -#else -#include <boost/tr1/random.hpp> - -// Import TR1 names (currently used from boost). This can go away once we make -// the switch to C++11. -namespace std { - using tr1::bernoulli_distribution; -} -#endif // _GNUCXX_USE_CXX11_ABI +#include <boost/random/bernoulli_distribution.hpp> namespace madlib { @@ -113,7 +103,7 @@ WeightedSampleAccumulator<Container, T>::operator<<( // weight if (weight > 0.) { weight_sum += weight; - std::bernoulli_distribution success(weight / weight_sum); + boost::bernoulli_distribution<double> success(weight / weight_sum); // Note that a NativeRandomNumberGenerator object is stateless, so it // is not a problem to instantiate an object for each RN generation... NativeRandomNumberGenerator generator; diff --git a/src/modules/stats/wilcoxon_signed_rank_test.cpp b/src/modules/stats/wilcoxon_signed_rank_test.cpp index 40f7fa0..4fcd6b5 100644 --- a/src/modules/stats/wilcoxon_signed_rank_test.cpp +++ b/src/modules/stats/wilcoxon_signed_rank_test.cpp @@ -68,9 +68,13 @@ wsr_test_transition::run(AnyType &args) { ? args[2].getAs<double>() : -1; - if (!std::isfinite(precision)) - throw std::invalid_argument((boost::format( - "Precision must be finite, but got %1%.") % precision).str()); + + if (!std::isfinite(precision)) { + std::stringstream ss; + ss << "Precision must be finite, but got " << precision; + std::string *msg = new std::string(ss.str()); + throw std::invalid_argument( *msg ); + } else if (precision < 0) precision = value * std::numeric_limits<double>::epsilon(); diff --git a/src/ports/postgres/dbconnector/NativeRandomNumberGenerator_proto.hpp b/src/ports/postgres/dbconnector/NativeRandomNumberGenerator_proto.hpp index f7f5555..52f8e73 100644 --- a/src/ports/postgres/dbconnector/NativeRandomNumberGenerator_proto.hpp +++ b/src/ports/postgres/dbconnector/NativeRandomNumberGenerator_proto.hpp @@ -22,12 +22,11 @@ namespace postgres { */ class NativeRandomNumberGenerator { public: + typedef double result_type; #if _GLIBCXX_USE_CXX11_ABI - typedef long long result_type; #define CONST_EXPR constexpr #else - typedef double result_type; #define CONST_EXPR #endif
