[1/3] incubator-impala git commit: IMPALA-4813: Round on divide and multiply

jrussell Thu, 02 Mar 2017 13:51:24 -0800

Repository: incubator-impala
Updated Branches:
  refs/heads/master c46379602 -> 0ad55ba5a



http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c87ab35a/be/src/runtime/decimal-value.inline.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/decimal-value.inline.h 
b/be/src/runtime/decimal-value.inline.h
index 40b1119..5782bfe 100644
--- a/be/src/runtime/decimal-value.inline.h
+++ b/be/src/runtime/decimal-value.inline.h
@@ -110,9 +110,8 @@ inline typename RESULT_T::underlying_type_t 
DecimalValue<T>::ToInt(int scale,
     // N.B. also - no std::abs for int128_t
     if (abs(remainder) >= divisor >> 1) {
       // Round away from zero.
-      // Note this trick works with both signed and unsigned types
-      const int shift = std::numeric_limits<T>::digits;
-      result += 1 | (result >> shift);
+      // Bias at zero must be corrected by sign of dividend.
+      result += BitUtil::Sign(v);
     }
   }
   *overflow |=
@@ -148,7 +147,7 @@ template<typename T>
 template<typename RESULT_T>
 inline DecimalValue<RESULT_T> DecimalValue<T>::Add(int this_scale,
     const DecimalValue& other, int other_scale, int result_precision, int 
result_scale,
-    bool* overflow) const {
+    bool round, bool* overflow) const {
   DCHECK_EQ(result_scale, std::max(this_scale, other_scale));
   RESULT_T x = 0;
   RESULT_T y = 0;
@@ -170,7 +169,7 @@ template<typename T>
 template<typename RESULT_T>
 inline DecimalValue<RESULT_T> DecimalValue<T>::Add(int this_scale,
     const DecimalValue& other, int other_scale, int result_precision, int 
result_scale,
-    bool* overflow) const {
+    bool round, bool* overflow) const {
   DCHECK_EQ(result_scale, std::max(this_scale, other_scale));
   RESULT_T x = 0;
   RESULT_T y = 0;
@@ -193,6 +192,30 @@ inline DecimalValue<RESULT_T> DecimalValue<T>::Add(int 
this_scale,
 }
 #endif
 
+namespace detail {
+
+// Helper function to scale down over multiplied values back into result type,
+// truncating if round is false or rounding otherwise.
+template<typename T, typename RESULT_T>
+inline RESULT_T ScaleDownAndRound(RESULT_T value, int delta_scale, bool round) 
{
+  DCHECK_GT(delta_scale, 0);
+  // Multiplier can always be computed in potentially smaller type T
+  T multiplier = DecimalUtil::GetScaleMultiplier<T>(delta_scale);
+  DCHECK(multiplier > 1 && multiplier % 2 == 0);
+  RESULT_T result = value / multiplier;
+  if (round) {
+    RESULT_T remainder = value % multiplier;
+    // In general, shifting down the multiplier is not safe, but we know
+    // here that it is a multiple of two.
+    if (abs(remainder) > (multiplier >> 1)) {
+      // Bias at zero must be corrected by sign of dividend.
+      result += BitUtil::Sign(value);
+    }
+  }
+  return result;
+}
+}
+
 // Use __builtin_mul_overflow on GCC if available.
 // Avoid using on Clang: it requires a function __muloti present in the Clang 
runtime
 // library but not the GCC runtime library and regresses performance.
@@ -201,7 +224,7 @@ template<typename T>
 template<typename RESULT_T>
 inline DecimalValue<RESULT_T> DecimalValue<T>::Multiply(int this_scale,
     const DecimalValue& other, int other_scale, int result_precision, int 
result_scale,
-    bool* overflow) const {
+    bool round, bool* overflow) const {
   // In the non-overflow case, we don't need to adjust by the scale since
   // that is already handled by the FE when it computes the result decimal 
type.
   // e.g. 1.23 * .2 (scale 2, scale 1 respectively) is identical to:
@@ -209,16 +232,10 @@ inline DecimalValue<RESULT_T> 
DecimalValue<T>::Multiply(int this_scale,
   // The result scale in this case is the sum of the input scales.
   RESULT_T x = value();
   RESULT_T y = other.value();
-  if (x == 0 || y == 0) {
-    // Handle zero to avoid divide by zero in the overflow check below.
-    return DecimalValue<RESULT_T>(0);
-  }
   RESULT_T result = 0;
   if (result_precision == ColumnType::MAX_PRECISION) {
     DCHECK_EQ(sizeof(RESULT_T), 16);
-    // Check overflow
     *overflow |= __builtin_mul_overflow(x, y, &result);
-    *overflow |= abs(result) > DecimalUtil::MAX_UNSCALED_DECIMAL16;
   } else {
     result = x * y;
   }
@@ -227,8 +244,12 @@ inline DecimalValue<RESULT_T> 
DecimalValue<T>::Multiply(int this_scale,
     // In this case, the required resulting scale is larger than the max we 
support.
     // We cap the resulting scale to the max supported scale (e.g. truncate) 
in the FE.
     // TODO: we could also return NULL.
-    DCHECK_GT(delta_scale, 0);
-    result /= DecimalUtil::GetScaleMultiplier<T>(delta_scale);
+    result = detail::ScaleDownAndRound<T, RESULT_T>(result, delta_scale, 
round);
+
+    // Check overflow again after rounding since +/-1 could cause decimal 
overflow
+    if (round && result_precision == ColumnType::MAX_PRECISION) {
+      *overflow |= abs(result) > DecimalUtil::MAX_UNSCALED_DECIMAL16;
+    }
   }
   return DecimalValue<RESULT_T>(result);
 }
@@ -237,7 +258,7 @@ template<typename T>
 template<typename RESULT_T>
 DecimalValue<RESULT_T> DecimalValue<T>::Multiply(int this_scale,
     const DecimalValue& other, int other_scale, int result_precision, int 
result_scale,
-    bool* overflow) const {
+    bool round, bool* overflow) const {
   // In the non-overflow case, we don't need to adjust by the scale since
   // that is already handled by the FE when it computes the result decimal 
type.
   // e.g. 1.23 * .2 (scale 2, scale 1 respectively) is identical to:
@@ -260,8 +281,11 @@ DecimalValue<RESULT_T> DecimalValue<T>::Multiply(int 
this_scale,
     // In this case, the required resulting scale is larger than the max we 
support.
     // We cap the resulting scale to the max supported scale (e.g. truncate) 
in the FE.
     // TODO: we could also return NULL.
-    DCHECK_GT(delta_scale, 0);
-    result /= DecimalUtil::GetScaleMultiplier<T>(delta_scale);
+    result = detail::ScaleDownAndRound<T, RESULT_T>(result, delta_scale, 
round);
+    // Check overflow again after rounding since +/-1 could cause decimal 
overflow
+    if (result_precision == ColumnType::MAX_PRECISION) {
+      *overflow |= abs(result) > DecimalUtil::MAX_UNSCALED_DECIMAL16;
+    }
   }
   return DecimalValue<RESULT_T>(result);
 }
@@ -271,7 +295,7 @@ template<typename T>
 template<typename RESULT_T>
 inline DecimalValue<RESULT_T> DecimalValue<T>::Divide(int this_scale,
     const DecimalValue& other, int other_scale, int result_precision, int 
result_scale,
-    bool* is_nan, bool* overflow) const {
+    bool round, bool* is_nan, bool* overflow) const {
   DCHECK_GE(result_scale + other_scale, this_scale);
   if (other.value() == 0) {
     // Divide by 0.
@@ -280,20 +304,57 @@ inline DecimalValue<RESULT_T> DecimalValue<T>::Divide(int 
this_scale,
   }
   // We need to scale x up by the result scale and then do an integer divide.
   // This truncates the result to the output scale.
-  // TODO: implement rounding when decimal_v2=true.
   int scale_by = result_scale + other_scale - this_scale;
   // Use higher precision ints for intermediates to avoid overflows. Divides 
lead to
   // large numbers very quickly (and get eliminated by the int divide).
   if (sizeof(T) == 16) {
-    int256_t x = DecimalUtil::MultiplyByScale<int256_t>(
-        ConvertToInt256(value()), scale_by);
-    int256_t y = ConvertToInt256(other.value());
+    int128_t x_sp = value();
+    int256_t x = DecimalUtil::MultiplyByScale<int256_t>(ConvertToInt256(x_sp), 
scale_by);
+    int128_t y_sp = other.value();
+    int256_t y = ConvertToInt256(y_sp);
     int128_t r = ConvertToInt128(x / y, DecimalUtil::MAX_UNSCALED_DECIMAL16, 
overflow);
+    if (round) {
+      int256_t remainder = x % y;
+      // The following is frought with apparent difficulty, as there is only 1 
bit
+      // free in the implementation of int128_t representing our maximum value 
and
+      // doubling such a value would overflow in two's complement.  However, we
+      // converted y to a 256 bit value, and remainder must be less than y, so 
there
+      // is plenty of space.  Building a value to DCHECK for this is rather 
awkward, but
+      // quite obviously 2 * MAX_UNSCALED_DECIMAL16 has plenty of room in 256 
bits.
+      // This will need to be fixed if we optimize to get back a 128-bit 
signed value.
+      if (abs(2 * remainder) >= abs(y)) {
+        // Bias at zero must be corrected by sign of divisor and dividend.
+        r += (BitUtil::Sign(x_sp) ^ BitUtil::Sign(y_sp)) + 1;
+      }
+    }
+    // Check overflow again after rounding since +/-1 could cause decimal 
overflow
+    if (result_precision == ColumnType::MAX_PRECISION) {
+      *overflow |= abs(r) > DecimalUtil::MAX_UNSCALED_DECIMAL16;
+    }
     return DecimalValue<RESULT_T>(r);
   } else {
+    DCHECK(DecimalUtil::GetScaleMultiplier<RESULT_T>(scale_by) > 0);
     int128_t x = DecimalUtil::MultiplyByScale<RESULT_T>(value(), scale_by);
     int128_t y = other.value();
     int128_t r = x / y;
+    if (round) {
+      int128_t remainder = x % y;
+      // No overflow because doubling the result of 8-byte integers fits in 
128 bits
+      DCHECK_LT(sizeof(T), sizeof(remainder));
+      if (abs(2 * remainder) >= abs(y)) {
+        // No bias at zero.  The result scale was chosen such that the 
smallest non-zero
+        // 'x' divided by the largest 'y' will always produce a non-zero 
result.
+        // If higher precision were required due to a very large scale, we 
would be
+        // computing in 256 bits, where getting a zero result is actually a 
posibility.
+        // In addition, we know the dividend is non-zero, since there was a 
remainder.
+        // The two conditions combined mean that the result must also be 
non-zero.
+        DCHECK(r != 0);
+        r += BitUtil::Sign(r);
+      }
+    }
+    DCHECK(abs(r) <= DecimalUtil::MAX_UNSCALED_DECIMAL16 &&
+        (sizeof(RESULT_T) > 8 || abs(r) <= DecimalUtil::MAX_UNSCALED_DECIMAL8) 
&&
+        (sizeof(RESULT_T) > 4 || abs(r) <= 
DecimalUtil::MAX_UNSCALED_DECIMAL4));
     return DecimalValue<RESULT_T>(static_cast<RESULT_T>(r));
   }
 }
@@ -302,7 +363,7 @@ template<typename T>
 template<typename RESULT_T>
 inline DecimalValue<RESULT_T> DecimalValue<T>::Mod(int this_scale,
     const DecimalValue& other, int other_scale, int result_precision, int 
result_scale,
-    bool* is_nan, bool* overflow) const {
+    bool round, bool* is_nan, bool* overflow) const {
   DCHECK_EQ(result_scale, std::max(this_scale, other_scale));
   if (other.value() == 0) {
     // Mod by 0.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c87ab35a/be/src/runtime/types.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/types.cc b/be/src/runtime/types.cc
index 2cc63ef..2de50e6 100644
--- a/be/src/runtime/types.cc
+++ b/be/src/runtime/types.cc
@@ -30,6 +30,13 @@ using namespace llvm;
 
 namespace impala {
 
+const int ColumnType::MAX_PRECISION;
+const int ColumnType::MAX_SCALE;
+const int ColumnType::MIN_ADJUSTED_SCALE;
+
+const int ColumnType::MAX_DECIMAL4_PRECISION;
+const int ColumnType::MAX_DECIMAL8_PRECISION;
+
 const char* ColumnType::LLVM_CLASS_NAME = "struct.impala::ColumnType";
 
 ColumnType::ColumnType(const std::vector<TTypeNode>& types, int* idx)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c87ab35a/be/src/runtime/types.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h
index f2db3bd..0657e5f 100644
--- a/be/src/runtime/types.h
+++ b/be/src/runtime/types.h
@@ -84,6 +84,7 @@ struct ColumnType {
   /// Must be kept in sync with FE's max precision/scale.
   static const int MAX_PRECISION = 38;
   static const int MAX_SCALE = MAX_PRECISION;
+  static const int MIN_ADJUSTED_SCALE = 6;
 
   /// The maximum precision representable by a 4-byte decimal (Decimal4Value)
   static const int MAX_DECIMAL4_PRECISION = 9;
@@ -140,6 +141,17 @@ struct ColumnType {
     return ret;
   }
 
+  // Matches the results of createAdjustedDecimalType in front-end code.
+  static ColumnType CreateAdjustedDecimalType(int precision, int scale) {
+    if (precision > MAX_PRECISION) {
+      int min_scale = std::min(scale, MIN_ADJUSTED_SCALE);
+      int delta = precision - MAX_PRECISION;
+      precision = MAX_PRECISION;
+      scale = std::max(scale - delta, min_scale);
+    }
+    return CreateDecimalType(precision, scale);
+  }
+
   static ColumnType FromThrift(const TColumnType& t) {
     int idx = 0;
     ColumnType result(t.types, &idx);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c87ab35a/be/src/util/bit-util-test.cc
----------------------------------------------------------------------
diff --git a/be/src/util/bit-util-test.cc b/be/src/util/bit-util-test.cc
index 663f10f..2559d18 100644
--- a/be/src/util/bit-util-test.cc
+++ b/be/src/util/bit-util-test.cc
@@ -29,9 +29,37 @@
 #include "util/cpu-info.h"
 
 #include "common/names.h"
+#include "runtime/multi-precision.h"
 
 namespace impala {
 
+TEST(BitUtil, UnsignedWidth) {
+  EXPECT_EQ(BitUtil::UnsignedWidth<signed char>(), 7);
+  EXPECT_EQ(BitUtil::UnsignedWidth<unsigned char>(), 8);
+  EXPECT_EQ(BitUtil::UnsignedWidth<volatile long long>(), 63);
+  EXPECT_EQ(BitUtil::UnsignedWidth<unsigned long long&>(), 64);
+  EXPECT_EQ(BitUtil::UnsignedWidth<const int128_t&>(), 127);
+  EXPECT_EQ(BitUtil::UnsignedWidth<const volatile unsigned __int128&>(), 128);
+}
+
+TEST(BitUtil, Sign) {
+  EXPECT_EQ(BitUtil::Sign<int>(0), 1);
+  EXPECT_EQ(BitUtil::Sign<int>(1), 1);
+  EXPECT_EQ(BitUtil::Sign<int>(-1), -1);
+  EXPECT_EQ(BitUtil::Sign<int>(200), 1);
+  EXPECT_EQ(BitUtil::Sign<int>(-200), -1);
+  EXPECT_EQ(BitUtil::Sign<unsigned int>(0), 1);
+  EXPECT_EQ(BitUtil::Sign<unsigned int>(1), 1);
+  EXPECT_EQ(BitUtil::Sign<unsigned int>(-1U), 1);
+  EXPECT_EQ(BitUtil::Sign<unsigned int>(200), 1);
+  EXPECT_EQ(BitUtil::Sign<unsigned int>(-200), 1);
+  EXPECT_EQ(BitUtil::Sign<int128_t>(0), 1);
+  EXPECT_EQ(BitUtil::Sign<int128_t>(1), 1);
+  EXPECT_EQ(BitUtil::Sign<int128_t>(-1), -1);
+  EXPECT_EQ(BitUtil::Sign<int128_t>(200), 1);
+  EXPECT_EQ(BitUtil::Sign<int128_t>(-200), -1);
+}
+
 TEST(BitUtil, Ceil) {
   EXPECT_EQ(BitUtil::Ceil(0, 1), 0);
   EXPECT_EQ(BitUtil::Ceil(1, 1), 1);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c87ab35a/be/src/util/bit-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/bit-util.h b/be/src/util/bit-util.h
index bc9bcaf..571f1ae 100644
--- a/be/src/util/bit-util.h
+++ b/be/src/util/bit-util.h
@@ -26,8 +26,8 @@
 #endif
 
 #include <climits>
-
 #include <limits>
+#include <typeinfo>
 
 #include <boost/type_traits/make_unsigned.hpp>
 
@@ -43,6 +43,29 @@ using boost::make_unsigned;
 /// TODO: is this in boost or something else like that?
 class BitUtil {
  public:
+
+  /// Returns the width of the integer portion of the type, not counting the 
sign bit.
+  /// Not safe for use with unknown or non-native types, so make it undefined
+  template<typename T, typename CVR_REMOVED = typename std::decay<T>::type,
+      typename std::enable_if<std::is_integral<CVR_REMOVED>{} ||
+                              std::is_same<CVR_REMOVED, unsigned __int128>{} ||
+                              std::is_same<CVR_REMOVED, __int128>{}, 
int>::type = 0>
+  constexpr static inline int UnsignedWidth() {
+    return std::is_integral<CVR_REMOVED>::value ?
+        std::numeric_limits<CVR_REMOVED>::digits :
+        std::is_same<CVR_REMOVED, unsigned __int128>::value ? 128 :
+        std::is_same<CVR_REMOVED, __int128>::value ? 127 : -1;
+  }
+
+  /// Return an integer signifying the sign of the value, returning +1 for
+  /// positive integers (and zero), -1 for negative integers.
+  /// The extra shift is to silence GCC warnings about full width shift on
+  /// unsigned types.  It compiles out in optimized builds into the expected 
increment
+  template<typename T>
+  constexpr static inline T Sign(T value) {
+    return 1 | ((value >> (UnsignedWidth<T>() - 1)) >> 1);
+  }
+
   /// Returns the ceil of value/divisor
   constexpr static inline int64_t Ceil(int64_t value, int64_t divisor) {
     return value / divisor + (value % divisor != 0);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c87ab35a/be/src/util/string-parser.h
----------------------------------------------------------------------
diff --git a/be/src/util/string-parser.h b/be/src/util/string-parser.h
index a123924..62e7dcc 100644
--- a/be/src/util/string-parser.h
+++ b/be/src/util/string-parser.h
@@ -100,12 +100,6 @@ class StringParser {
   /// On overflow or invalid values, the return value is undefined.
   /// On underflow, the truncated value is returned.
   template <typename T>
-  static inline DecimalValue<T> StringToDecimal(const uint8_t* s, int len,
-      const ColumnType& type, StringParser::ParseResult* result) {
-    return StringToDecimal<T>(reinterpret_cast<const char*>(s), len, type, 
result);
-  }
-
-  template <typename T>
   static inline DecimalValue<T> StringToDecimal(const char* s, int len,
       const ColumnType& type, StringParser::ParseResult* result) {
     return StringToDecimal<T>(s, len, type.precision, type.scale, result);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c87ab35a/testdata/workloads/functional-query/queries/QueryTest/decimal-exprs.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/decimal-exprs.test 
b/testdata/workloads/functional-query/queries/QueryTest/decimal-exprs.test
index 1dff5f1..b1c62ec 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/decimal-exprs.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/decimal-exprs.test
@@ -19,10 +19,10 @@ DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
 set decimal_v2=true;
 select d1 / d2, d2 / d1, d3 / d4, d5 / d3, d3 / d5 from decimal_tbl;
 ---- RESULTS
-0.55535553555,1.8006482982,10.000000,10000.08918100081154710738507,0.000099999108197945064
-21.12612612612,0.0473347547,100.000000,0.25442100231523112106860,3.930493123209169054441
-37.07207207207,0.0269744835,1000.000000,0.09088200082702620752593,11.003278877005347593582
-37.07207207207,0.0269744835,10000.000000,0.00008100000073710000670,12345.678900000000000000000
+0.55535553555,1.8006482982,10.000000,10000.08918100081154710738508,0.000099999108197945065
+21.12612612613,0.0473347548,100.000000,0.25442100231523112106860,3.930493123209169054441
+37.07207207207,0.0269744836,1000.000000,0.09088200082702620752594,11.003278877005347593583
+37.07207207207,0.0269744836,10000.000000,0.00008100000073710000671,12345.678900000000000000000
 
398.92492492492,0.0025067373,100000.000000,0.00006309009057411982422,15850.349728459731155875669
 ---- TYPES
 DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
@@ -114,14 +114,14 @@ select l_tax, avg(cast(l_extendedprice as 
decimal(38,10))), avg(l_extendedprice)
 from tpch_parquet.lineitem group by l_tax order by 1;
 ---- RESULTS
 0.00,38241.5984613546,38241.598461
-0.01,38283.5417664599,38283.541766
+0.01,38283.5417664600,38283.541766
 0.02,38250.4873094187,38250.487309
 0.03,38259.2810374789,38259.281037
 0.04,38247.1967454731,38247.196745
 0.05,38234.8480874721,38234.848087
 0.06,38246.4342924027,38246.434292
-0.07,38281.1963710003,38281.196371
-0.08,38251.6233675941,38251.623367
+0.07,38281.1963710004,38281.196371
+0.08,38251.6233675942,38251.623368
 ---- TYPES
 DECIMAL,DECIMAL,DECIMAL
 ====
@@ -147,7 +147,7 @@ group by l_tax having a > 38247.190 order by 1;
 ---- RESULTS
 38247.196745
 38250.487309
-38251.623367
+38251.623368
 38259.281037
 38281.196371
 38283.541766

[1/3] incubator-impala git commit: IMPALA-4813: Round on divide and multiply

Reply via email to