IMPALA-3163: Fix Decimal to Timestamp casting Before this patch, we would first convert the Decimal to Double, then Double to Timestamp. This resulted in imprecise results.
I ran a benchmark where we read decimal values from a large parquet table and cast them to timestamp. The new correct implementation is slightly slower than the old one (101 seconds vs 70 seconds). Change-Id: Iabeea9f4ab4880b2f814408add63c77916e2dba9 Reviewed-on: http://gerrit.cloudera.org:8080/3154 Reviewed-by: Dan Hecht <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/98d7b8a9 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/98d7b8a9 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/98d7b8a9 Branch: refs/heads/master Commit: 98d7b8a90d534736915206fea9b8a64ef049ba08 Parents: 4896895 Author: Taras Bobrovytsky <[email protected]> Authored: Thu Apr 14 13:32:50 2016 -0700 Committer: Tim Armstrong <[email protected]> Committed: Tue May 31 23:32:11 2016 -0700 ---------------------------------------------------------------------- be/src/exprs/decimal-operators-ir.cc | 35 ++++++++++++++++++++++--- be/src/exprs/decimal-operators.h | 4 +++ be/src/exprs/expr-test.cc | 43 ++++++++++++++++++++++++++++--- 3 files changed, 75 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/98d7b8a9/be/src/exprs/decimal-operators-ir.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/decimal-operators-ir.cc b/be/src/exprs/decimal-operators-ir.cc index 52d7dae..229b38f 100644 --- a/be/src/exprs/decimal-operators-ir.cc +++ b/be/src/exprs/decimal-operators-ir.cc @@ -555,6 +555,21 @@ StringVal DecimalOperators::CastToStringVal( return result; } +template <typename T> +IR_ALWAYS_INLINE T DecimalOperators::ConvertToNanoseconds(T val, int scale) { + // Nanosecond scale means there should be 9 decimal digits. + const int NANOSECOND_SCALE = 9; + T nanoseconds; + if (LIKELY(scale <= NANOSECOND_SCALE)) { + nanoseconds = val * DecimalUtil::GetScaleMultiplier<T>( + NANOSECOND_SCALE - scale); + } else { + nanoseconds = val / DecimalUtil::GetScaleMultiplier<T>( + scale - NANOSECOND_SCALE); + } + return nanoseconds; +} + TimestampVal DecimalOperators::CastToTimestampVal( FunctionContext* context, const DecimalVal& val) { if (val.is_null) return TimestampVal::null(); @@ -564,19 +579,33 @@ TimestampVal DecimalOperators::CastToTimestampVal( switch (ColumnType::GetDecimalByteSize(precision)) { case 4: { Decimal4Value dv(val.val4); - TimestampValue tv(dv.ToDouble(scale)); + int32_t seconds = dv.whole_part(scale); + int32_t nanoseconds = ConvertToNanoseconds( + dv.fractional_part(scale), scale); + TimestampValue tv(seconds, nanoseconds); tv.ToTimestampVal(&result); break; } case 8: { Decimal8Value dv(val.val8); - TimestampValue tv(dv.ToDouble(scale)); + int64_t seconds = dv.whole_part(scale); + int64_t nanoseconds = ConvertToNanoseconds( + dv.fractional_part(scale), scale); + TimestampValue tv(seconds, nanoseconds); tv.ToTimestampVal(&result); break; } case 16: { Decimal16Value dv(val.val16); - TimestampValue tv(dv.ToDouble(scale)); + int128_t seconds = dv.whole_part(scale); + if (seconds < numeric_limits<int64_t>::min() || + seconds > numeric_limits<int64_t>::max()) { + // TimeStampVal() takes int64_t. + return TimestampVal::null(); + } + int128_t nanoseconds = ConvertToNanoseconds( + dv.fractional_part(scale), scale); + TimestampValue tv(seconds, nanoseconds); tv.ToTimestampVal(&result); break; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/98d7b8a9/be/src/exprs/decimal-operators.h ---------------------------------------------------------------------- diff --git a/be/src/exprs/decimal-operators.h b/be/src/exprs/decimal-operators.h index eb19765..a7cb11f 100644 --- a/be/src/exprs/decimal-operators.h +++ b/be/src/exprs/decimal-operators.h @@ -148,6 +148,10 @@ class DecimalOperators { template <typename T> static T RoundDelta(const DecimalValue<T>& v, int src_scale, int target_scale, const DecimalRoundOp& op); + + /// Converts fractional 'val' with the given 'scale' to nanoseconds. + template <typename T> + static T ConvertToNanoseconds(T val, int scale); }; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/98d7b8a9/be/src/exprs/expr-test.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc index ed60016..641e97e 100644 --- a/be/src/exprs/expr-test.cc +++ b/be/src/exprs/expr-test.cc @@ -1350,6 +1350,43 @@ TEST_F(ExprTest, CastExprs) { TestStringValue("cast(cast(cast('2012-01-01 09:10:11.123456789' as timestamp) as" " timestamp) as string)", "2012-01-01 09:10:11.123456789"); + // IMPALA-3163: Test precise conversion from Decimal to Timestamp. + TestTimestampValue("cast(cast(1457473016.1230 as decimal(17,4)) as timestamp)", + TimestampValue("2016-03-08 21:36:56.123000000", 29)); + // 32 bit Decimal. + TestTimestampValue("cast(cast(123.45 as decimal(9,2)) as timestamp)", + TimestampValue("1970-01-01 00:02:03.450000000", 29)); + // 64 bit Decimal. + TestTimestampValue("cast(cast(123.45 as decimal(18,2)) as timestamp)", + TimestampValue("1970-01-01 00:02:03.450000000", 29)); + TestTimestampValue("cast(cast(253402300799.99 as decimal(18, 2)) as timestamp)", + TimestampValue("9999-12-31 23:59:59.990000000", 29)); + TestIsNull("cast(cast(260000000000.00 as decimal(18, 2)) as timestamp)", + TYPE_TIMESTAMP); + // 128 bit Decimal. + TestTimestampValue("cast(cast(123.45 as decimal(38,2)) as timestamp)", + TimestampValue("1970-01-01 00:02:03.450000000", 29)); + TestTimestampValue("cast(cast(253402300799.99 as decimal(38, 2)) as timestamp)", + TimestampValue("9999-12-31 23:59:59.990000000", 29)); + TestTimestampValue("cast(cast(253402300799.99 as decimal(38, 26)) as timestamp)", + TimestampValue("9999-12-31 23:59:59.990000000", 29)); + TestIsNull("cast(cast(260000000000.00 as decimal(38, 2)) as timestamp)", + TYPE_TIMESTAMP); + // numeric_limits<int64_t>::max() + TestIsNull("cast(cast(9223372036854775807 as decimal(38, 0)) as timestamp)", + TYPE_TIMESTAMP); + // numeric_limits<int64_t>::max() + 1 + TestIsNull("cast(cast(9223372036854775808 as decimal(38, 0)) as timestamp)", + TYPE_TIMESTAMP); + // numeric_limits<int64_t>::min() + TestIsNull("cast(cast(-9223372036854775808 as decimal(38, 0)) as timestamp)", + TYPE_TIMESTAMP); + // numeric_limits<int64_t>::min() - 1 + TestIsNull("cast(cast(-9223372036854775809 as decimal(38, 0)) as timestamp)", + TYPE_TIMESTAMP); + // 2^70 + 1 + TestIsNull("cast(cast(1180591620717411303425 as decimal(38, 0)) as timestamp)", + TYPE_TIMESTAMP); // Timestamp <--> Int TestIsNull("cast(cast('09:10:11.000000' as timestamp) as int)", TYPE_INT); @@ -3745,9 +3782,8 @@ TEST_F(ExprTest, TimestampFunctions) { "as double)", TYPE_DOUBLE, 1.2938724611E9); TestValue("cast(to_utc_timestamp(cast('2011-01-01 01:01:01.0001' as timestamp), 'PST') " "as double)", TYPE_DOUBLE, 1.2938724610001E9); - // We get some decimal-binary skew here TestStringValue("cast(from_utc_timestamp(cast(1.3041352164485E9 as timestamp), 'PST') " - "as string)", "2011-04-29 20:46:56.448499917"); + "as string)", "2011-04-29 20:46:56.448500000"); // NULL arguments. TestIsNull("from_utc_timestamp(NULL, 'PST')", TYPE_TIMESTAMP); TestIsNull("from_utc_timestamp(cast('2011-01-01 01:01:01.1' as timestamp), NULL)", @@ -3778,9 +3814,8 @@ TEST_F(ExprTest, TimestampFunctions) { 1.2938724611E9); TestValue("cast(cast('2011-01-01 01:01:01.0001' as timestamp) as double)", TYPE_DOUBLE, 1.2938724610001E9); - // We get some decimal-binary skew here TestStringValue("cast(cast(1.3041352164485E9 as timestamp) as string)", - "2011-04-29 20:46:56.448499917"); + "2011-04-29 20:46:56.448500000"); // NULL arguments. TestIsNull("from_utc_timestamp(NULL, 'PST')", TYPE_TIMESTAMP); TestIsNull("from_utc_timestamp(cast('2011-01-01 01:01:01.1' as timestamp), NULL)",
