Repository: incubator-impala Updated Branches: refs/heads/master c46379602 -> 0ad55ba5a
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c87ab35a/be/src/runtime/decimal-value.inline.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/decimal-value.inline.h b/be/src/runtime/decimal-value.inline.h index 40b1119..5782bfe 100644 --- a/be/src/runtime/decimal-value.inline.h +++ b/be/src/runtime/decimal-value.inline.h @@ -110,9 +110,8 @@ inline typename RESULT_T::underlying_type_t DecimalValue<T>::ToInt(int scale, // N.B. also - no std::abs for int128_t if (abs(remainder) >= divisor >> 1) { // Round away from zero. - // Note this trick works with both signed and unsigned types - const int shift = std::numeric_limits<T>::digits; - result += 1 | (result >> shift); + // Bias at zero must be corrected by sign of dividend. + result += BitUtil::Sign(v); } } *overflow |= @@ -148,7 +147,7 @@ template<typename T> template<typename RESULT_T> inline DecimalValue<RESULT_T> DecimalValue<T>::Add(int this_scale, const DecimalValue& other, int other_scale, int result_precision, int result_scale, - bool* overflow) const { + bool round, bool* overflow) const { DCHECK_EQ(result_scale, std::max(this_scale, other_scale)); RESULT_T x = 0; RESULT_T y = 0; @@ -170,7 +169,7 @@ template<typename T> template<typename RESULT_T> inline DecimalValue<RESULT_T> DecimalValue<T>::Add(int this_scale, const DecimalValue& other, int other_scale, int result_precision, int result_scale, - bool* overflow) const { + bool round, bool* overflow) const { DCHECK_EQ(result_scale, std::max(this_scale, other_scale)); RESULT_T x = 0; RESULT_T y = 0; @@ -193,6 +192,30 @@ inline DecimalValue<RESULT_T> DecimalValue<T>::Add(int this_scale, } #endif +namespace detail { + +// Helper function to scale down over multiplied values back into result type, +// truncating if round is false or rounding otherwise. +template<typename T, typename RESULT_T> +inline RESULT_T ScaleDownAndRound(RESULT_T value, int delta_scale, bool round) { + DCHECK_GT(delta_scale, 0); + // Multiplier can always be computed in potentially smaller type T + T multiplier = DecimalUtil::GetScaleMultiplier<T>(delta_scale); + DCHECK(multiplier > 1 && multiplier % 2 == 0); + RESULT_T result = value / multiplier; + if (round) { + RESULT_T remainder = value % multiplier; + // In general, shifting down the multiplier is not safe, but we know + // here that it is a multiple of two. + if (abs(remainder) > (multiplier >> 1)) { + // Bias at zero must be corrected by sign of dividend. + result += BitUtil::Sign(value); + } + } + return result; +} +} + // Use __builtin_mul_overflow on GCC if available. // Avoid using on Clang: it requires a function __muloti present in the Clang runtime // library but not the GCC runtime library and regresses performance. @@ -201,7 +224,7 @@ template<typename T> template<typename RESULT_T> inline DecimalValue<RESULT_T> DecimalValue<T>::Multiply(int this_scale, const DecimalValue& other, int other_scale, int result_precision, int result_scale, - bool* overflow) const { + bool round, bool* overflow) const { // In the non-overflow case, we don't need to adjust by the scale since // that is already handled by the FE when it computes the result decimal type. // e.g. 1.23 * .2 (scale 2, scale 1 respectively) is identical to: @@ -209,16 +232,10 @@ inline DecimalValue<RESULT_T> DecimalValue<T>::Multiply(int this_scale, // The result scale in this case is the sum of the input scales. RESULT_T x = value(); RESULT_T y = other.value(); - if (x == 0 || y == 0) { - // Handle zero to avoid divide by zero in the overflow check below. - return DecimalValue<RESULT_T>(0); - } RESULT_T result = 0; if (result_precision == ColumnType::MAX_PRECISION) { DCHECK_EQ(sizeof(RESULT_T), 16); - // Check overflow *overflow |= __builtin_mul_overflow(x, y, &result); - *overflow |= abs(result) > DecimalUtil::MAX_UNSCALED_DECIMAL16; } else { result = x * y; } @@ -227,8 +244,12 @@ inline DecimalValue<RESULT_T> DecimalValue<T>::Multiply(int this_scale, // In this case, the required resulting scale is larger than the max we support. // We cap the resulting scale to the max supported scale (e.g. truncate) in the FE. // TODO: we could also return NULL. - DCHECK_GT(delta_scale, 0); - result /= DecimalUtil::GetScaleMultiplier<T>(delta_scale); + result = detail::ScaleDownAndRound<T, RESULT_T>(result, delta_scale, round); + + // Check overflow again after rounding since +/-1 could cause decimal overflow + if (round && result_precision == ColumnType::MAX_PRECISION) { + *overflow |= abs(result) > DecimalUtil::MAX_UNSCALED_DECIMAL16; + } } return DecimalValue<RESULT_T>(result); } @@ -237,7 +258,7 @@ template<typename T> template<typename RESULT_T> DecimalValue<RESULT_T> DecimalValue<T>::Multiply(int this_scale, const DecimalValue& other, int other_scale, int result_precision, int result_scale, - bool* overflow) const { + bool round, bool* overflow) const { // In the non-overflow case, we don't need to adjust by the scale since // that is already handled by the FE when it computes the result decimal type. // e.g. 1.23 * .2 (scale 2, scale 1 respectively) is identical to: @@ -260,8 +281,11 @@ DecimalValue<RESULT_T> DecimalValue<T>::Multiply(int this_scale, // In this case, the required resulting scale is larger than the max we support. // We cap the resulting scale to the max supported scale (e.g. truncate) in the FE. // TODO: we could also return NULL. - DCHECK_GT(delta_scale, 0); - result /= DecimalUtil::GetScaleMultiplier<T>(delta_scale); + result = detail::ScaleDownAndRound<T, RESULT_T>(result, delta_scale, round); + // Check overflow again after rounding since +/-1 could cause decimal overflow + if (result_precision == ColumnType::MAX_PRECISION) { + *overflow |= abs(result) > DecimalUtil::MAX_UNSCALED_DECIMAL16; + } } return DecimalValue<RESULT_T>(result); } @@ -271,7 +295,7 @@ template<typename T> template<typename RESULT_T> inline DecimalValue<RESULT_T> DecimalValue<T>::Divide(int this_scale, const DecimalValue& other, int other_scale, int result_precision, int result_scale, - bool* is_nan, bool* overflow) const { + bool round, bool* is_nan, bool* overflow) const { DCHECK_GE(result_scale + other_scale, this_scale); if (other.value() == 0) { // Divide by 0. @@ -280,20 +304,57 @@ inline DecimalValue<RESULT_T> DecimalValue<T>::Divide(int this_scale, } // We need to scale x up by the result scale and then do an integer divide. // This truncates the result to the output scale. - // TODO: implement rounding when decimal_v2=true. int scale_by = result_scale + other_scale - this_scale; // Use higher precision ints for intermediates to avoid overflows. Divides lead to // large numbers very quickly (and get eliminated by the int divide). if (sizeof(T) == 16) { - int256_t x = DecimalUtil::MultiplyByScale<int256_t>( - ConvertToInt256(value()), scale_by); - int256_t y = ConvertToInt256(other.value()); + int128_t x_sp = value(); + int256_t x = DecimalUtil::MultiplyByScale<int256_t>(ConvertToInt256(x_sp), scale_by); + int128_t y_sp = other.value(); + int256_t y = ConvertToInt256(y_sp); int128_t r = ConvertToInt128(x / y, DecimalUtil::MAX_UNSCALED_DECIMAL16, overflow); + if (round) { + int256_t remainder = x % y; + // The following is frought with apparent difficulty, as there is only 1 bit + // free in the implementation of int128_t representing our maximum value and + // doubling such a value would overflow in two's complement. However, we + // converted y to a 256 bit value, and remainder must be less than y, so there + // is plenty of space. Building a value to DCHECK for this is rather awkward, but + // quite obviously 2 * MAX_UNSCALED_DECIMAL16 has plenty of room in 256 bits. + // This will need to be fixed if we optimize to get back a 128-bit signed value. + if (abs(2 * remainder) >= abs(y)) { + // Bias at zero must be corrected by sign of divisor and dividend. + r += (BitUtil::Sign(x_sp) ^ BitUtil::Sign(y_sp)) + 1; + } + } + // Check overflow again after rounding since +/-1 could cause decimal overflow + if (result_precision == ColumnType::MAX_PRECISION) { + *overflow |= abs(r) > DecimalUtil::MAX_UNSCALED_DECIMAL16; + } return DecimalValue<RESULT_T>(r); } else { + DCHECK(DecimalUtil::GetScaleMultiplier<RESULT_T>(scale_by) > 0); int128_t x = DecimalUtil::MultiplyByScale<RESULT_T>(value(), scale_by); int128_t y = other.value(); int128_t r = x / y; + if (round) { + int128_t remainder = x % y; + // No overflow because doubling the result of 8-byte integers fits in 128 bits + DCHECK_LT(sizeof(T), sizeof(remainder)); + if (abs(2 * remainder) >= abs(y)) { + // No bias at zero. The result scale was chosen such that the smallest non-zero + // 'x' divided by the largest 'y' will always produce a non-zero result. + // If higher precision were required due to a very large scale, we would be + // computing in 256 bits, where getting a zero result is actually a posibility. + // In addition, we know the dividend is non-zero, since there was a remainder. + // The two conditions combined mean that the result must also be non-zero. + DCHECK(r != 0); + r += BitUtil::Sign(r); + } + } + DCHECK(abs(r) <= DecimalUtil::MAX_UNSCALED_DECIMAL16 && + (sizeof(RESULT_T) > 8 || abs(r) <= DecimalUtil::MAX_UNSCALED_DECIMAL8) && + (sizeof(RESULT_T) > 4 || abs(r) <= DecimalUtil::MAX_UNSCALED_DECIMAL4)); return DecimalValue<RESULT_T>(static_cast<RESULT_T>(r)); } } @@ -302,7 +363,7 @@ template<typename T> template<typename RESULT_T> inline DecimalValue<RESULT_T> DecimalValue<T>::Mod(int this_scale, const DecimalValue& other, int other_scale, int result_precision, int result_scale, - bool* is_nan, bool* overflow) const { + bool round, bool* is_nan, bool* overflow) const { DCHECK_EQ(result_scale, std::max(this_scale, other_scale)); if (other.value() == 0) { // Mod by 0. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c87ab35a/be/src/runtime/types.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/types.cc b/be/src/runtime/types.cc index 2cc63ef..2de50e6 100644 --- a/be/src/runtime/types.cc +++ b/be/src/runtime/types.cc @@ -30,6 +30,13 @@ using namespace llvm; namespace impala { +const int ColumnType::MAX_PRECISION; +const int ColumnType::MAX_SCALE; +const int ColumnType::MIN_ADJUSTED_SCALE; + +const int ColumnType::MAX_DECIMAL4_PRECISION; +const int ColumnType::MAX_DECIMAL8_PRECISION; + const char* ColumnType::LLVM_CLASS_NAME = "struct.impala::ColumnType"; ColumnType::ColumnType(const std::vector<TTypeNode>& types, int* idx) http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c87ab35a/be/src/runtime/types.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h index f2db3bd..0657e5f 100644 --- a/be/src/runtime/types.h +++ b/be/src/runtime/types.h @@ -84,6 +84,7 @@ struct ColumnType { /// Must be kept in sync with FE's max precision/scale. static const int MAX_PRECISION = 38; static const int MAX_SCALE = MAX_PRECISION; + static const int MIN_ADJUSTED_SCALE = 6; /// The maximum precision representable by a 4-byte decimal (Decimal4Value) static const int MAX_DECIMAL4_PRECISION = 9; @@ -140,6 +141,17 @@ struct ColumnType { return ret; } + // Matches the results of createAdjustedDecimalType in front-end code. + static ColumnType CreateAdjustedDecimalType(int precision, int scale) { + if (precision > MAX_PRECISION) { + int min_scale = std::min(scale, MIN_ADJUSTED_SCALE); + int delta = precision - MAX_PRECISION; + precision = MAX_PRECISION; + scale = std::max(scale - delta, min_scale); + } + return CreateDecimalType(precision, scale); + } + static ColumnType FromThrift(const TColumnType& t) { int idx = 0; ColumnType result(t.types, &idx); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c87ab35a/be/src/util/bit-util-test.cc ---------------------------------------------------------------------- diff --git a/be/src/util/bit-util-test.cc b/be/src/util/bit-util-test.cc index 663f10f..2559d18 100644 --- a/be/src/util/bit-util-test.cc +++ b/be/src/util/bit-util-test.cc @@ -29,9 +29,37 @@ #include "util/cpu-info.h" #include "common/names.h" +#include "runtime/multi-precision.h" namespace impala { +TEST(BitUtil, UnsignedWidth) { + EXPECT_EQ(BitUtil::UnsignedWidth<signed char>(), 7); + EXPECT_EQ(BitUtil::UnsignedWidth<unsigned char>(), 8); + EXPECT_EQ(BitUtil::UnsignedWidth<volatile long long>(), 63); + EXPECT_EQ(BitUtil::UnsignedWidth<unsigned long long&>(), 64); + EXPECT_EQ(BitUtil::UnsignedWidth<const int128_t&>(), 127); + EXPECT_EQ(BitUtil::UnsignedWidth<const volatile unsigned __int128&>(), 128); +} + +TEST(BitUtil, Sign) { + EXPECT_EQ(BitUtil::Sign<int>(0), 1); + EXPECT_EQ(BitUtil::Sign<int>(1), 1); + EXPECT_EQ(BitUtil::Sign<int>(-1), -1); + EXPECT_EQ(BitUtil::Sign<int>(200), 1); + EXPECT_EQ(BitUtil::Sign<int>(-200), -1); + EXPECT_EQ(BitUtil::Sign<unsigned int>(0), 1); + EXPECT_EQ(BitUtil::Sign<unsigned int>(1), 1); + EXPECT_EQ(BitUtil::Sign<unsigned int>(-1U), 1); + EXPECT_EQ(BitUtil::Sign<unsigned int>(200), 1); + EXPECT_EQ(BitUtil::Sign<unsigned int>(-200), 1); + EXPECT_EQ(BitUtil::Sign<int128_t>(0), 1); + EXPECT_EQ(BitUtil::Sign<int128_t>(1), 1); + EXPECT_EQ(BitUtil::Sign<int128_t>(-1), -1); + EXPECT_EQ(BitUtil::Sign<int128_t>(200), 1); + EXPECT_EQ(BitUtil::Sign<int128_t>(-200), -1); +} + TEST(BitUtil, Ceil) { EXPECT_EQ(BitUtil::Ceil(0, 1), 0); EXPECT_EQ(BitUtil::Ceil(1, 1), 1); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c87ab35a/be/src/util/bit-util.h ---------------------------------------------------------------------- diff --git a/be/src/util/bit-util.h b/be/src/util/bit-util.h index bc9bcaf..571f1ae 100644 --- a/be/src/util/bit-util.h +++ b/be/src/util/bit-util.h @@ -26,8 +26,8 @@ #endif #include <climits> - #include <limits> +#include <typeinfo> #include <boost/type_traits/make_unsigned.hpp> @@ -43,6 +43,29 @@ using boost::make_unsigned; /// TODO: is this in boost or something else like that? class BitUtil { public: + + /// Returns the width of the integer portion of the type, not counting the sign bit. + /// Not safe for use with unknown or non-native types, so make it undefined + template<typename T, typename CVR_REMOVED = typename std::decay<T>::type, + typename std::enable_if<std::is_integral<CVR_REMOVED>{} || + std::is_same<CVR_REMOVED, unsigned __int128>{} || + std::is_same<CVR_REMOVED, __int128>{}, int>::type = 0> + constexpr static inline int UnsignedWidth() { + return std::is_integral<CVR_REMOVED>::value ? + std::numeric_limits<CVR_REMOVED>::digits : + std::is_same<CVR_REMOVED, unsigned __int128>::value ? 128 : + std::is_same<CVR_REMOVED, __int128>::value ? 127 : -1; + } + + /// Return an integer signifying the sign of the value, returning +1 for + /// positive integers (and zero), -1 for negative integers. + /// The extra shift is to silence GCC warnings about full width shift on + /// unsigned types. It compiles out in optimized builds into the expected increment + template<typename T> + constexpr static inline T Sign(T value) { + return 1 | ((value >> (UnsignedWidth<T>() - 1)) >> 1); + } + /// Returns the ceil of value/divisor constexpr static inline int64_t Ceil(int64_t value, int64_t divisor) { return value / divisor + (value % divisor != 0); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c87ab35a/be/src/util/string-parser.h ---------------------------------------------------------------------- diff --git a/be/src/util/string-parser.h b/be/src/util/string-parser.h index a123924..62e7dcc 100644 --- a/be/src/util/string-parser.h +++ b/be/src/util/string-parser.h @@ -100,12 +100,6 @@ class StringParser { /// On overflow or invalid values, the return value is undefined. /// On underflow, the truncated value is returned. template <typename T> - static inline DecimalValue<T> StringToDecimal(const uint8_t* s, int len, - const ColumnType& type, StringParser::ParseResult* result) { - return StringToDecimal<T>(reinterpret_cast<const char*>(s), len, type, result); - } - - template <typename T> static inline DecimalValue<T> StringToDecimal(const char* s, int len, const ColumnType& type, StringParser::ParseResult* result) { return StringToDecimal<T>(s, len, type.precision, type.scale, result); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c87ab35a/testdata/workloads/functional-query/queries/QueryTest/decimal-exprs.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/decimal-exprs.test b/testdata/workloads/functional-query/queries/QueryTest/decimal-exprs.test index 1dff5f1..b1c62ec 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/decimal-exprs.test +++ b/testdata/workloads/functional-query/queries/QueryTest/decimal-exprs.test @@ -19,10 +19,10 @@ DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL set decimal_v2=true; select d1 / d2, d2 / d1, d3 / d4, d5 / d3, d3 / d5 from decimal_tbl; ---- RESULTS -0.55535553555,1.8006482982,10.000000,10000.08918100081154710738507,0.000099999108197945064 -21.12612612612,0.0473347547,100.000000,0.25442100231523112106860,3.930493123209169054441 -37.07207207207,0.0269744835,1000.000000,0.09088200082702620752593,11.003278877005347593582 -37.07207207207,0.0269744835,10000.000000,0.00008100000073710000670,12345.678900000000000000000 +0.55535553555,1.8006482982,10.000000,10000.08918100081154710738508,0.000099999108197945065 +21.12612612613,0.0473347548,100.000000,0.25442100231523112106860,3.930493123209169054441 +37.07207207207,0.0269744836,1000.000000,0.09088200082702620752594,11.003278877005347593583 +37.07207207207,0.0269744836,10000.000000,0.00008100000073710000671,12345.678900000000000000000 398.92492492492,0.0025067373,100000.000000,0.00006309009057411982422,15850.349728459731155875669 ---- TYPES DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL @@ -114,14 +114,14 @@ select l_tax, avg(cast(l_extendedprice as decimal(38,10))), avg(l_extendedprice) from tpch_parquet.lineitem group by l_tax order by 1; ---- RESULTS 0.00,38241.5984613546,38241.598461 -0.01,38283.5417664599,38283.541766 +0.01,38283.5417664600,38283.541766 0.02,38250.4873094187,38250.487309 0.03,38259.2810374789,38259.281037 0.04,38247.1967454731,38247.196745 0.05,38234.8480874721,38234.848087 0.06,38246.4342924027,38246.434292 -0.07,38281.1963710003,38281.196371 -0.08,38251.6233675941,38251.623367 +0.07,38281.1963710004,38281.196371 +0.08,38251.6233675942,38251.623368 ---- TYPES DECIMAL,DECIMAL,DECIMAL ==== @@ -147,7 +147,7 @@ group by l_tax having a > 38247.190 order by 1; ---- RESULTS 38247.196745 38250.487309 -38251.623367 +38251.623368 38259.281037 38281.196371 38283.541766
