[13/23] incubator-impala git commit: IMPALA-3163: Fix Decimal to Timestamp casting

tarmstrong Tue, 31 May 2016 23:33:07 -0700

IMPALA-3163: Fix Decimal to Timestamp casting

Before this patch, we would first convert the Decimal to Double, then
Double to Timestamp. This resulted in imprecise results.


I ran a benchmark where we read decimal values from a large parquet
table and cast them to timestamp. The new correct implementation is
slightly slower than the old one (101 seconds vs 70 seconds).

Change-Id: Iabeea9f4ab4880b2f814408add63c77916e2dba9
Reviewed-on: http://gerrit.cloudera.org:8080/3154
Reviewed-by: Dan Hecht <[email protected]>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/98d7b8a9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/98d7b8a9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/98d7b8a9

Branch: refs/heads/master
Commit: 98d7b8a90d534736915206fea9b8a64ef049ba08
Parents: 4896895
Author: Taras Bobrovytsky <[email protected]>
Authored: Thu Apr 14 13:32:50 2016 -0700
Committer: Tim Armstrong <[email protected]>
Committed: Tue May 31 23:32:11 2016 -0700

----------------------------------------------------------------------
 be/src/exprs/decimal-operators-ir.cc | 35 ++++++++++++++++++++++---
 be/src/exprs/decimal-operators.h     |  4 +++
 be/src/exprs/expr-test.cc            | 43 ++++++++++++++++++++++++++++---
 3 files changed, 75 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/98d7b8a9/be/src/exprs/decimal-operators-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/decimal-operators-ir.cc 
b/be/src/exprs/decimal-operators-ir.cc
index 52d7dae..229b38f 100644
--- a/be/src/exprs/decimal-operators-ir.cc
+++ b/be/src/exprs/decimal-operators-ir.cc
@@ -555,6 +555,21 @@ StringVal DecimalOperators::CastToStringVal(
   return result;
 }
 
+template <typename T>
+IR_ALWAYS_INLINE T DecimalOperators::ConvertToNanoseconds(T val, int scale) {
+  // Nanosecond scale means there should be 9 decimal digits.
+  const int NANOSECOND_SCALE = 9;
+  T nanoseconds;
+  if (LIKELY(scale <= NANOSECOND_SCALE)) {
+    nanoseconds = val * DecimalUtil::GetScaleMultiplier<T>(
+        NANOSECOND_SCALE - scale);
+  } else {
+    nanoseconds = val / DecimalUtil::GetScaleMultiplier<T>(
+        scale - NANOSECOND_SCALE);
+  }
+  return nanoseconds;
+}
+
 TimestampVal DecimalOperators::CastToTimestampVal(
     FunctionContext* context, const DecimalVal& val) {
   if (val.is_null) return TimestampVal::null();
@@ -564,19 +579,33 @@ TimestampVal DecimalOperators::CastToTimestampVal(
   switch (ColumnType::GetDecimalByteSize(precision)) {
     case 4: {
       Decimal4Value dv(val.val4);
-      TimestampValue tv(dv.ToDouble(scale));
+      int32_t seconds = dv.whole_part(scale);
+      int32_t nanoseconds = ConvertToNanoseconds(
+          dv.fractional_part(scale), scale);
+      TimestampValue tv(seconds, nanoseconds);
       tv.ToTimestampVal(&result);
       break;
     }
     case 8: {
       Decimal8Value dv(val.val8);
-      TimestampValue tv(dv.ToDouble(scale));
+      int64_t seconds = dv.whole_part(scale);
+      int64_t nanoseconds = ConvertToNanoseconds(
+          dv.fractional_part(scale), scale);
+      TimestampValue tv(seconds, nanoseconds);
       tv.ToTimestampVal(&result);
       break;
     }
     case 16: {
       Decimal16Value dv(val.val16);
-      TimestampValue tv(dv.ToDouble(scale));
+      int128_t seconds = dv.whole_part(scale);
+      if (seconds < numeric_limits<int64_t>::min() ||
+          seconds > numeric_limits<int64_t>::max()) {
+        // TimeStampVal() takes int64_t.
+        return TimestampVal::null();
+      }
+      int128_t nanoseconds = ConvertToNanoseconds(
+          dv.fractional_part(scale), scale);
+      TimestampValue tv(seconds, nanoseconds);
       tv.ToTimestampVal(&result);
       break;
     }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/98d7b8a9/be/src/exprs/decimal-operators.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/decimal-operators.h b/be/src/exprs/decimal-operators.h
index eb19765..a7cb11f 100644
--- a/be/src/exprs/decimal-operators.h
+++ b/be/src/exprs/decimal-operators.h
@@ -148,6 +148,10 @@ class DecimalOperators {
   template <typename T>
   static T RoundDelta(const DecimalValue<T>& v, int src_scale,
       int target_scale, const DecimalRoundOp& op);
+
+  /// Converts fractional 'val' with the given 'scale' to nanoseconds.
+  template <typename T>
+  static T ConvertToNanoseconds(T val, int scale);
 };
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/98d7b8a9/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index ed60016..641e97e 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -1350,6 +1350,43 @@ TEST_F(ExprTest, CastExprs) {
   TestStringValue("cast(cast(cast('2012-01-01 09:10:11.123456789' as 
timestamp) as"
       " timestamp) as string)", "2012-01-01 09:10:11.123456789");
 
+  // IMPALA-3163: Test precise conversion from Decimal to Timestamp.
+  TestTimestampValue("cast(cast(1457473016.1230 as decimal(17,4)) as 
timestamp)",
+      TimestampValue("2016-03-08 21:36:56.123000000", 29));
+  // 32 bit Decimal.
+  TestTimestampValue("cast(cast(123.45 as decimal(9,2)) as timestamp)",
+      TimestampValue("1970-01-01 00:02:03.450000000", 29));
+  // 64 bit Decimal.
+  TestTimestampValue("cast(cast(123.45 as decimal(18,2)) as timestamp)",
+      TimestampValue("1970-01-01 00:02:03.450000000", 29));
+  TestTimestampValue("cast(cast(253402300799.99 as decimal(18, 2)) as 
timestamp)",
+      TimestampValue("9999-12-31 23:59:59.990000000", 29));
+  TestIsNull("cast(cast(260000000000.00 as decimal(18, 2)) as timestamp)",
+      TYPE_TIMESTAMP);
+  // 128 bit Decimal.
+  TestTimestampValue("cast(cast(123.45 as decimal(38,2)) as timestamp)",
+      TimestampValue("1970-01-01 00:02:03.450000000", 29));
+  TestTimestampValue("cast(cast(253402300799.99 as decimal(38, 2)) as 
timestamp)",
+      TimestampValue("9999-12-31 23:59:59.990000000", 29));
+  TestTimestampValue("cast(cast(253402300799.99 as decimal(38, 26)) as 
timestamp)",
+      TimestampValue("9999-12-31 23:59:59.990000000", 29));
+  TestIsNull("cast(cast(260000000000.00 as decimal(38, 2)) as timestamp)",
+      TYPE_TIMESTAMP);
+  // numeric_limits<int64_t>::max()
+  TestIsNull("cast(cast(9223372036854775807 as decimal(38, 0)) as timestamp)",
+      TYPE_TIMESTAMP);
+  // numeric_limits<int64_t>::max() + 1
+  TestIsNull("cast(cast(9223372036854775808 as decimal(38, 0)) as timestamp)",
+      TYPE_TIMESTAMP);
+  // numeric_limits<int64_t>::min()
+  TestIsNull("cast(cast(-9223372036854775808 as decimal(38, 0)) as timestamp)",
+      TYPE_TIMESTAMP);
+  // numeric_limits<int64_t>::min() - 1
+  TestIsNull("cast(cast(-9223372036854775809 as decimal(38, 0)) as timestamp)",
+      TYPE_TIMESTAMP);
+  // 2^70 + 1
+  TestIsNull("cast(cast(1180591620717411303425 as decimal(38, 0)) as 
timestamp)",
+      TYPE_TIMESTAMP);
 
   // Timestamp <--> Int
   TestIsNull("cast(cast('09:10:11.000000' as timestamp) as int)", TYPE_INT);
@@ -3745,9 +3782,8 @@ TEST_F(ExprTest, TimestampFunctions) {
       "as double)", TYPE_DOUBLE, 1.2938724611E9);
   TestValue("cast(to_utc_timestamp(cast('2011-01-01 01:01:01.0001' as 
timestamp), 'PST') "
       "as double)", TYPE_DOUBLE, 1.2938724610001E9);
-  // We get some decimal-binary skew here
   TestStringValue("cast(from_utc_timestamp(cast(1.3041352164485E9 as 
timestamp), 'PST') "
-      "as string)", "2011-04-29 20:46:56.448499917");
+      "as string)", "2011-04-29 20:46:56.448500000");
   // NULL arguments.
   TestIsNull("from_utc_timestamp(NULL, 'PST')", TYPE_TIMESTAMP);
   TestIsNull("from_utc_timestamp(cast('2011-01-01 01:01:01.1' as timestamp), 
NULL)",
@@ -3778,9 +3814,8 @@ TEST_F(ExprTest, TimestampFunctions) {
         1.2938724611E9);
     TestValue("cast(cast('2011-01-01 01:01:01.0001' as timestamp) as double)",
         TYPE_DOUBLE, 1.2938724610001E9);
-    // We get some decimal-binary skew here
     TestStringValue("cast(cast(1.3041352164485E9 as timestamp) as string)",
-        "2011-04-29 20:46:56.448499917");
+        "2011-04-29 20:46:56.448500000");
     // NULL arguments.
     TestIsNull("from_utc_timestamp(NULL, 'PST')", TYPE_TIMESTAMP);
     TestIsNull("from_utc_timestamp(cast('2011-01-01 01:01:01.1' as timestamp), 
NULL)",

[13/23] incubator-impala git commit: IMPALA-3163: Fix Decimal to Timestamp casting

Reply via email to