Repository: incubator-impala Updated Branches: refs/heads/master 406632640 -> f590bc0da
IMPALA-4055: Speed up to_date() with custom implementation. Simple implementation of to_date() that avoids calling into boost for a speedup of 10x. Perf: I generated a synthetic Parquet table with 26437248 rows and a single timestamp column. I tested the response time of the following query before and after this change. set mt_dop=1; select count(*) from to_date_test where to_date(ts) = '2017-10-23'; Before: 38.1s After: 3.4s Testing: I locally ran expr-test.cc and expr_test.py. Change-Id: I5713b3e0c27b739aae597a6911cf3b2ddd01f822 Reviewed-on: http://gerrit.cloudera.org:8080/5791 Reviewed-by: Alex Behm <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/6154a695 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/6154a695 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/6154a695 Branch: refs/heads/master Commit: 6154a695b738c654fd8f094a5e6d31b1018ff687 Parents: 4066326 Author: Alex Behm <[email protected]> Authored: Tue Jan 24 00:03:14 2017 -0800 Committer: Impala Public Jenkins <[email protected]> Committed: Thu Jan 26 09:24:05 2017 +0000 ---------------------------------------------------------------------- be/src/exprs/timestamp-functions-ir.cc | 27 +++++++++++++++++++++++++-- be/src/exprs/timestamp-functions.cc | 8 -------- be/src/exprs/timestamp-functions.h | 2 -- 3 files changed, 25 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6154a695/be/src/exprs/timestamp-functions-ir.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/timestamp-functions-ir.cc b/be/src/exprs/timestamp-functions-ir.cc index f250d5a..c70f286 100644 --- a/be/src/exprs/timestamp-functions-ir.cc +++ b/be/src/exprs/timestamp-functions-ir.cc @@ -321,12 +321,35 @@ TimestampVal TimestampFunctions::Now(FunctionContext* context) { return return_val; } +// Writes 'num' as ASCII into 'dst'. If necessary, adds leading zeros to make the ASCII +// representation exactly 'len' characters. Both 'num' and 'len' must be >= 0. +static inline void IntToChar(uint8_t* dst, int num, int len) { + DCHECK_GE(len, 0); + DCHECK_GE(num, 0); + for (int i = len - 1; i >= 0; --i) { + *(dst + i) = '0' + (num % 10); + num /= 10; + } +} + StringVal TimestampFunctions::ToDate(FunctionContext* context, const TimestampVal& ts_val) { if (ts_val.is_null) return StringVal::null(); const TimestampValue ts_value = TimestampValue::FromTimestampVal(ts_val); - string result = ToIsoExtendedString(ts_value); - return AnyValUtil::FromString(context, result); + // Defensively, return NULL if the timestamp does not have a date portion. Some of + // our built-in functions might incorrectly return such a malformed timestamp. + if (!ts_value.HasDate()) return StringVal::null(); + DCHECK(ts_value.IsValidDate()); + StringVal result(context, 10); + result.len = 10; + // Fill in year, month, and day. + IntToChar(result.ptr, ts_value.date().year(), 4); + IntToChar(result.ptr + 5, ts_value.date().month(), 2); + IntToChar(result.ptr + 8, ts_value.date().day(), 2); + // Fill in dashes. + result.ptr[7] = '-'; + result.ptr[4] = '-'; + return result; } inline bool IsLeapYear(int year) { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6154a695/be/src/exprs/timestamp-functions.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/timestamp-functions.cc b/be/src/exprs/timestamp-functions.cc index 1afe349..365021f 100644 --- a/be/src/exprs/timestamp-functions.cc +++ b/be/src/exprs/timestamp-functions.cc @@ -40,14 +40,6 @@ using boost::posix_time::to_iso_extended_string; namespace impala { -// This function is not cross-compiled to avoid including unnecessary boost library's -// header files which bring in a bunch of unused code and global variables and increase -// the codegen time. boost::posix_time::to_iso_extended_string() is large enough that -// it won't benefit much from inlining. -string TimestampFunctions::ToIsoExtendedString(const TimestampValue& ts_value) { - return to_iso_extended_string(ts_value.date()); -} - namespace { /// Uses Boost's internal checking to throw an exception if 'date' is out of the /// supported range of boost::gregorian. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6154a695/be/src/exprs/timestamp-functions.h ---------------------------------------------------------------------- diff --git a/be/src/exprs/timestamp-functions.h b/be/src/exprs/timestamp-functions.h index 8abe7d7..433054b 100644 --- a/be/src/exprs/timestamp-functions.h +++ b/be/src/exprs/timestamp-functions.h @@ -185,8 +185,6 @@ class TimestampFunctions { const StringVal& format, bool is_error); private: - static std::string ToIsoExtendedString(const TimestampValue& ts_value); - /// Static result values for DayName() function. static const char* MONDAY; static const char* TUESDAY;
