Repository: thrift Updated Branches: refs/heads/master 5c3de6d3d -> 117a5cca7
THRIFT-4138: Remove undefined behavior imported from Boost Client: C++ There is undefined behavior in boost::lexical_cast that was fixed in https://github.com/boostorg/lexical_cast/issues/21, but that fix is only available in recent Boost releases. This patch removes all uses of lexical_cast instead. That removes the last undefined behavior, so this patch also makes ubsan.sh unconditionally fail on undefined behavior. This closes #1232 Project: http://git-wip-us.apache.org/repos/asf/thrift/repo Commit: http://git-wip-us.apache.org/repos/asf/thrift/commit/117a5cca Tree: http://git-wip-us.apache.org/repos/asf/thrift/tree/117a5cca Diff: http://git-wip-us.apache.org/repos/asf/thrift/diff/117a5cca Branch: refs/heads/master Commit: 117a5cca77c3994a62e0287f703232af44a48d9f Parents: 5c3de6d Author: Jim Apple <jbapple-imp...@apache.org> Authored: Wed Mar 29 20:39:36 2017 -0700 Committer: James E. King, III <jk...@apache.org> Committed: Sat Apr 1 10:51:48 2017 -0400 ---------------------------------------------------------------------- build/docker/scripts/ubsan.sh | 2 +- lib/cpp/src/thrift/TOutput.cpp | 6 +-- lib/cpp/src/thrift/TToString.h | 35 +++++++++++++--- lib/cpp/src/thrift/protocol/TDebugProtocol.cpp | 22 +++++----- lib/cpp/src/thrift/protocol/TJSONProtocol.cpp | 42 ++++++++++---------- lib/cpp/src/thrift/protocol/TJSONProtocol.h | 9 +++-- lib/cpp/src/thrift/transport/TSSLSocket.cpp | 6 +-- .../thrift/transport/TTransportException.cpp | 2 - lib/cpp/src/thrift/transport/TZlibTransport.h | 6 +-- lib/rb/lib/thrift/protocol/json_protocol.rb | 2 +- 10 files changed, 79 insertions(+), 53 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/thrift/blob/117a5cca/build/docker/scripts/ubsan.sh ---------------------------------------------------------------------- diff --git a/build/docker/scripts/ubsan.sh b/build/docker/scripts/ubsan.sh index 6db10f3..d39cc83 100755 --- a/build/docker/scripts/ubsan.sh +++ b/build/docker/scripts/ubsan.sh @@ -15,7 +15,7 @@ export CXX=clang++-3.8 # undefined casting, aka "vptr". # # TODO: fix undefined vptr behavior and turn this option back on. -export CFLAGS="-fsanitize=undefined -fno-sanitize-recover=undefined -fno-sanitize=vptr" +export CFLAGS="-fsanitize=undefined -fno-sanitize-recover=undefined" # Builds without optimization and with debugging symbols for making crash reports more # readable. export CFLAGS="${CFLAGS} -O0 -ggdb3" http://git-wip-us.apache.org/repos/asf/thrift/blob/117a5cca/lib/cpp/src/thrift/TOutput.cpp ---------------------------------------------------------------------- diff --git a/lib/cpp/src/thrift/TOutput.cpp b/lib/cpp/src/thrift/TOutput.cpp index 5739d0f..bb46263 100644 --- a/lib/cpp/src/thrift/TOutput.cpp +++ b/lib/cpp/src/thrift/TOutput.cpp @@ -18,9 +18,9 @@ */ #include <thrift/Thrift.h> +#include <thrift/TToString.h> #include <cstring> #include <cstdlib> -#include <boost/lexical_cast.hpp> #include <stdarg.h> #include <stdio.h> @@ -100,7 +100,7 @@ void TOutput::perror(const char* message, int errno_copy) { std::string TOutput::strerror_s(int errno_copy) { #ifndef HAVE_STRERROR_R - return "errno = " + boost::lexical_cast<std::string>(errno_copy); + return "errno = " + to_string(errno_copy); #else // HAVE_STRERROR_R char b_errbuf[1024] = {'\0'}; @@ -112,7 +112,7 @@ std::string TOutput::strerror_s(int errno_copy) { if (rv == -1) { // strerror_r failed. omgwtfbbq. return "XSI-compliant strerror_r() failed with errno = " - + boost::lexical_cast<std::string>(errno_copy); + + to_string(errno_copy); } #endif // Can anyone prove that explicit cast is probably not necessary http://git-wip-us.apache.org/repos/asf/thrift/blob/117a5cca/lib/cpp/src/thrift/TToString.h ---------------------------------------------------------------------- diff --git a/lib/cpp/src/thrift/TToString.h b/lib/cpp/src/thrift/TToString.h index 5023869..fdf191e 100644 --- a/lib/cpp/src/thrift/TToString.h +++ b/lib/cpp/src/thrift/TToString.h @@ -20,20 +20,45 @@ #ifndef _THRIFT_TOSTRING_H_ #define _THRIFT_TOSTRING_H_ 1 -#include <boost/lexical_cast.hpp> - -#include <vector> +#include <cmath> +#include <limits> #include <map> #include <set> -#include <string> #include <sstream> +#include <string> +#include <vector> namespace apache { namespace thrift { template <typename T> std::string to_string(const T& t) { - return boost::lexical_cast<std::string>(t); + std::ostringstream o; + o << t; + return o.str(); +} + +// TODO: replace the computations below with std::numeric_limits::max_digits10 once C++11 +// is enabled. +inline std::string to_string(const float& t) { + std::ostringstream o; + o.precision(std::ceil(std::numeric_limits<float>::digits * std::log10(2) + 1)); + o << t; + return o.str(); +} + +inline std::string to_string(const double& t) { + std::ostringstream o; + o.precision(std::ceil(std::numeric_limits<double>::digits * std::log10(2) + 1)); + o << t; + return o.str(); +} + +inline std::string to_string(const long double& t) { + std::ostringstream o; + o.precision(std::ceil(std::numeric_limits<long double>::digits * std::log10(2) + 1)); + o << t; + return o.str(); } template <typename K, typename V> http://git-wip-us.apache.org/repos/asf/thrift/blob/117a5cca/lib/cpp/src/thrift/protocol/TDebugProtocol.cpp ---------------------------------------------------------------------- diff --git a/lib/cpp/src/thrift/protocol/TDebugProtocol.cpp b/lib/cpp/src/thrift/protocol/TDebugProtocol.cpp index 09b978c..d3c6beb 100644 --- a/lib/cpp/src/thrift/protocol/TDebugProtocol.cpp +++ b/lib/cpp/src/thrift/protocol/TDebugProtocol.cpp @@ -19,12 +19,12 @@ #include <thrift/protocol/TDebugProtocol.h> +#include <thrift/TToString.h> #include <cassert> #include <cctype> #include <cstdio> #include <stdexcept> #include <boost/static_assert.hpp> -#include <boost/lexical_cast.hpp> using std::string; @@ -129,7 +129,7 @@ uint32_t TDebugProtocol::startItem() { case MAP_VALUE: return writePlain(" -> "); case LIST: - size = writeIndented("[" + boost::lexical_cast<string>(list_idx_.back()) + "] = "); + size = writeIndented("[" + to_string(list_idx_.back()) + "] = "); list_idx_.back()++; return size; default: @@ -223,7 +223,7 @@ uint32_t TDebugProtocol::writeFieldBegin(const char* name, const TType fieldType, const int16_t fieldId) { // sprintf(id_str, "%02d", fieldId); - string id_str = boost::lexical_cast<string>(fieldId); + string id_str = to_string(fieldId); if (id_str.length() == 1) id_str = '0' + id_str; @@ -248,7 +248,7 @@ uint32_t TDebugProtocol::writeMapBegin(const TType keyType, bsize += startItem(); bsize += writePlain( "map<" + fieldTypeName(keyType) + "," + fieldTypeName(valType) + ">" - "[" + boost::lexical_cast<string>(size) + "] {\n"); + "[" + to_string(size) + "] {\n"); indentUp(); write_state_.push_back(MAP_KEY); return bsize; @@ -269,7 +269,7 @@ uint32_t TDebugProtocol::writeListBegin(const TType elemType, const uint32_t siz bsize += startItem(); bsize += writePlain( "list<" + fieldTypeName(elemType) + ">" - "[" + boost::lexical_cast<string>(size) + "] {\n"); + "[" + to_string(size) + "] {\n"); indentUp(); write_state_.push_back(LIST); list_idx_.push_back(0); @@ -292,7 +292,7 @@ uint32_t TDebugProtocol::writeSetBegin(const TType elemType, const uint32_t size bsize += startItem(); bsize += writePlain( "set<" + fieldTypeName(elemType) + ">" - "[" + boost::lexical_cast<string>(size) + "] {\n"); + "[" + to_string(size) + "] {\n"); indentUp(); write_state_.push_back(SET); return bsize; @@ -316,19 +316,19 @@ uint32_t TDebugProtocol::writeByte(const int8_t byte) { } uint32_t TDebugProtocol::writeI16(const int16_t i16) { - return writeItem(boost::lexical_cast<string>(i16)); + return writeItem(to_string(i16)); } uint32_t TDebugProtocol::writeI32(const int32_t i32) { - return writeItem(boost::lexical_cast<string>(i32)); + return writeItem(to_string(i32)); } uint32_t TDebugProtocol::writeI64(const int64_t i64) { - return writeItem(boost::lexical_cast<string>(i64)); + return writeItem(to_string(i64)); } uint32_t TDebugProtocol::writeDouble(const double dub) { - return writeItem(boost::lexical_cast<string>(dub)); + return writeItem(to_string(dub)); } uint32_t TDebugProtocol::writeString(const string& str) { @@ -337,7 +337,7 @@ uint32_t TDebugProtocol::writeString(const string& str) { string to_show = str; if (to_show.length() > (string::size_type)string_limit_) { to_show = str.substr(0, string_prefix_size_); - to_show += "[...](" + boost::lexical_cast<string>(str.length()) + ")"; + to_show += "[...](" + to_string(str.length()) + ")"; } string output = "\""; http://git-wip-us.apache.org/repos/asf/thrift/blob/117a5cca/lib/cpp/src/thrift/protocol/TJSONProtocol.cpp ---------------------------------------------------------------------- diff --git a/lib/cpp/src/thrift/protocol/TJSONProtocol.cpp b/lib/cpp/src/thrift/protocol/TJSONProtocol.cpp index 431e7c4..943d960 100644 --- a/lib/cpp/src/thrift/protocol/TJSONProtocol.cpp +++ b/lib/cpp/src/thrift/protocol/TJSONProtocol.cpp @@ -19,9 +19,9 @@ #include <thrift/protocol/TJSONProtocol.h> -#include <boost/lexical_cast.hpp> #include <boost/locale.hpp> #include <boost/math/special_functions/fpclassify.hpp> +#include <boost/math/special_functions/sign.hpp> #include <cmath> #include <limits> @@ -31,6 +31,7 @@ #include <thrift/protocol/TBase64Utils.h> #include <thrift/transport/TTransportException.h> +#include <thrift/TToString.h> using namespace apache::thrift::transport; @@ -503,7 +504,7 @@ uint32_t TJSONProtocol::writeJSONBase64(const std::string& str) { template <typename NumberType> uint32_t TJSONProtocol::writeJSONInteger(NumberType num) { uint32_t result = context_->write(*trans_); - std::string val(boost::lexical_cast<std::string>(num)); + std::string val(to_string(num)); bool escapeNum = context_->escapeNum(); if (escapeNum) { trans_->write(&kJSONStringDelimiter, 1); @@ -684,7 +685,7 @@ uint32_t TJSONProtocol::writeBool(const bool value) { } uint32_t TJSONProtocol::writeByte(const int8_t byte) { - // writeByte() must be handled specially because boost::lexical cast sees + // writeByte() must be handled specially because to_string sees // int8_t as a text type instead of an integer type return writeJSONInteger((int16_t)byte); } @@ -842,6 +843,19 @@ uint32_t TJSONProtocol::readJSONNumericChars(std::string& str) { return result; } +namespace { +template <typename T> +T fromString(const std::string& s) { + T t; + std::istringstream str(s); + str.imbue(std::locale::classic()); + str >> t; + if (str.bad() || !str.eof()) + throw std::runtime_error(s); + return t; +} +} + // Reads a sequence of characters and assembles them into a number, // returning them via num template <typename NumberType> @@ -853,10 +867,10 @@ uint32_t TJSONProtocol::readJSONInteger(NumberType& num) { std::string str; result += readJSONNumericChars(str); try { - num = boost::lexical_cast<NumberType>(str); - } catch (boost::bad_lexical_cast e) { + num = fromString<NumberType>(str); + } catch (const std::runtime_error&) { throw TProtocolException(TProtocolException::INVALID_DATA, - "Expected numeric value; got \"" + str + "\""); + "Expected numeric value; got \"" + str + "\""); } if (context_->escapeNum()) { result += readJSONSyntaxChar(kJSONStringDelimiter); @@ -864,18 +878,6 @@ uint32_t TJSONProtocol::readJSONInteger(NumberType& num) { return result; } -namespace { -double stringToDouble(const std::string& s) { - double d; - std::istringstream str(s); - str.imbue(std::locale::classic()); - str >> d; - if (str.bad() || !str.eof()) - throw std::runtime_error(s); - return d; -} -} - // Reads a JSON number or string and interprets it as a double. uint32_t TJSONProtocol::readJSONDouble(double& num) { uint32_t result = context_->read(reader_); @@ -896,7 +898,7 @@ uint32_t TJSONProtocol::readJSONDouble(double& num) { "Numeric data unexpectedly quoted"); } try { - num = stringToDouble(str); + num = fromString<double>(str); } catch (std::runtime_error e) { throw TProtocolException(TProtocolException::INVALID_DATA, "Expected numeric value; got \"" + str + "\""); @@ -909,7 +911,7 @@ uint32_t TJSONProtocol::readJSONDouble(double& num) { } result += readJSONNumericChars(str); try { - num = stringToDouble(str); + num = fromString<double>(str); } catch (std::runtime_error e) { throw TProtocolException(TProtocolException::INVALID_DATA, "Expected numeric value; got \"" + str + "\""); http://git-wip-us.apache.org/repos/asf/thrift/blob/117a5cca/lib/cpp/src/thrift/protocol/TJSONProtocol.h ---------------------------------------------------------------------- diff --git a/lib/cpp/src/thrift/protocol/TJSONProtocol.h b/lib/cpp/src/thrift/protocol/TJSONProtocol.h index 5834eff..5a94624 100644 --- a/lib/cpp/src/thrift/protocol/TJSONProtocol.h +++ b/lib/cpp/src/thrift/protocol/TJSONProtocol.h @@ -87,10 +87,11 @@ class TJSONContext; * the current implementation is to match as closely as possible the behavior * of Java's Double.toString(), which has no precision loss. Implementors in * other languages should strive to achieve that where possible. I have not - * yet verified whether boost:lexical_cast, which is doing that work for me in - * C++, loses any precision, but I am leaving this as a future improvement. I - * may try to provide a C component for this, so that other languages could - * bind to the same underlying implementation for maximum consistency. + * yet verified whether std::istringstream::operator>>, which is doing that + * work for me in C++, loses any precision, but I am leaving this as a future + * improvement. I may try to provide a C component for this, so that other + * languages could bind to the same underlying implementation for maximum + * consistency. * */ class TJSONProtocol : public TVirtualProtocol<TJSONProtocol> { http://git-wip-us.apache.org/repos/asf/thrift/blob/117a5cca/lib/cpp/src/thrift/transport/TSSLSocket.cpp ---------------------------------------------------------------------- diff --git a/lib/cpp/src/thrift/transport/TSSLSocket.cpp b/lib/cpp/src/thrift/transport/TSSLSocket.cpp index edcfb9d..b0f9166 100644 --- a/lib/cpp/src/thrift/transport/TSSLSocket.cpp +++ b/lib/cpp/src/thrift/transport/TSSLSocket.cpp @@ -36,7 +36,6 @@ #endif -#include <boost/lexical_cast.hpp> #include <boost/shared_array.hpp> #include <openssl/err.h> #include <openssl/rand.h> @@ -45,6 +44,7 @@ #include <thrift/concurrency/Mutex.h> #include <thrift/transport/TSSLSocket.h> #include <thrift/transport/PlatformSocket.h> +#include <thrift/TToString.h> #define OPENSSL_VERSION_NO_THREAD_ID 0x10000000L @@ -629,7 +629,7 @@ unsigned int TSSLSocket::waitForEvent(bool wantRead) { } struct THRIFT_POLLFD fds[2]; - std::memset(fds, 0, sizeof(fds)); + memset(fds, 0, sizeof(fds)); fds[0].fd = fdSocket; fds[0].events = wantRead ? THRIFT_POLLIN : THRIFT_POLLOUT; @@ -851,7 +851,7 @@ void buildErrors(string& errors, int errno_copy) { } } if (errors.empty()) { - errors = "error code: " + boost::lexical_cast<string>(errno_copy); + errors = "error code: " + to_string(errno_copy); } } http://git-wip-us.apache.org/repos/asf/thrift/blob/117a5cca/lib/cpp/src/thrift/transport/TTransportException.cpp ---------------------------------------------------------------------- diff --git a/lib/cpp/src/thrift/transport/TTransportException.cpp b/lib/cpp/src/thrift/transport/TTransportException.cpp index 612e7b7..9e15dbd 100644 --- a/lib/cpp/src/thrift/transport/TTransportException.cpp +++ b/lib/cpp/src/thrift/transport/TTransportException.cpp @@ -18,13 +18,11 @@ */ #include <thrift/transport/TTransportException.h> -#include <boost/lexical_cast.hpp> #include <cstring> #include <thrift/thrift-config.h> using std::string; -using boost::lexical_cast; namespace apache { namespace thrift { http://git-wip-us.apache.org/repos/asf/thrift/blob/117a5cca/lib/cpp/src/thrift/transport/TZlibTransport.h ---------------------------------------------------------------------- diff --git a/lib/cpp/src/thrift/transport/TZlibTransport.h b/lib/cpp/src/thrift/transport/TZlibTransport.h index 1e7b5ec..18ca588 100644 --- a/lib/cpp/src/thrift/transport/TZlibTransport.h +++ b/lib/cpp/src/thrift/transport/TZlibTransport.h @@ -20,9 +20,9 @@ #ifndef _THRIFT_TRANSPORT_TZLIBTRANSPORT_H_ #define _THRIFT_TRANSPORT_TZLIBTRANSPORT_H_ 1 -#include <boost/lexical_cast.hpp> #include <thrift/transport/TTransport.h> #include <thrift/transport/TVirtualTransport.h> +#include <thrift/TToString.h> #include <zlib.h> struct z_stream_s; @@ -51,7 +51,7 @@ public: rv += "(no message)"; } rv += " (status = "; - rv += boost::lexical_cast<std::string>(status); + rv += to_string(status); rv += ")"; return rv; } @@ -105,7 +105,7 @@ public: int minimum = MIN_DIRECT_DEFLATE_SIZE; throw TTransportException(TTransportException::BAD_ARGS, "TZLibTransport: uncompressed write buffer must be at least" - + boost::lexical_cast<std::string>(minimum) + "."); + + to_string(minimum) + "."); } try { http://git-wip-us.apache.org/repos/asf/thrift/blob/117a5cca/lib/rb/lib/thrift/protocol/json_protocol.rb ---------------------------------------------------------------------- diff --git a/lib/rb/lib/thrift/protocol/json_protocol.rb b/lib/rb/lib/thrift/protocol/json_protocol.rb index 2b8ac15..514bdbf 100644 --- a/lib/rb/lib/thrift/protocol/json_protocol.rb +++ b/lib/rb/lib/thrift/protocol/json_protocol.rb @@ -333,7 +333,7 @@ module Thrift # "NaN" or "Infinity" or "-Infinity". def write_json_double(num) @context.write(trans) - # Normalize output of boost::lexical_cast for NaNs and Infinities + # Normalize output of thrift::to_string for NaNs and Infinities special = false; if (num.nan?) special = true;