Repository: incubator-impala
Updated Branches:
  refs/heads/master 406632640 -> f590bc0da


IMPALA-4055: Speed up to_date() with custom implementation.

Simple implementation of to_date() that avoids calling
into boost for a speedup of 10x.

Perf:
I generated a synthetic Parquet table with 26437248 rows
and a single timestamp column. I tested the response time
of the following query before and after this change.

set mt_dop=1;
select count(*) from to_date_test
where to_date(ts) = '2017-10-23';

Before: 38.1s
After:   3.4s

Testing: I locally ran expr-test.cc and expr_test.py.

Change-Id: I5713b3e0c27b739aae597a6911cf3b2ddd01f822
Reviewed-on: http://gerrit.cloudera.org:8080/5791
Reviewed-by: Alex Behm <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/6154a695
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/6154a695
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/6154a695

Branch: refs/heads/master
Commit: 6154a695b738c654fd8f094a5e6d31b1018ff687
Parents: 4066326
Author: Alex Behm <[email protected]>
Authored: Tue Jan 24 00:03:14 2017 -0800
Committer: Impala Public Jenkins <[email protected]>
Committed: Thu Jan 26 09:24:05 2017 +0000

----------------------------------------------------------------------
 be/src/exprs/timestamp-functions-ir.cc | 27 +++++++++++++++++++++++++--
 be/src/exprs/timestamp-functions.cc    |  8 --------
 be/src/exprs/timestamp-functions.h     |  2 --
 3 files changed, 25 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6154a695/be/src/exprs/timestamp-functions-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/timestamp-functions-ir.cc 
b/be/src/exprs/timestamp-functions-ir.cc
index f250d5a..c70f286 100644
--- a/be/src/exprs/timestamp-functions-ir.cc
+++ b/be/src/exprs/timestamp-functions-ir.cc
@@ -321,12 +321,35 @@ TimestampVal TimestampFunctions::Now(FunctionContext* 
context) {
   return return_val;
 }
 
+// Writes 'num' as ASCII into 'dst'. If necessary, adds leading zeros to make 
the ASCII
+// representation exactly 'len' characters. Both 'num' and 'len' must be >= 0.
+static inline void IntToChar(uint8_t* dst, int num, int len) {
+  DCHECK_GE(len, 0);
+  DCHECK_GE(num, 0);
+  for (int i = len - 1; i >= 0; --i) {
+    *(dst + i) = '0' + (num % 10);
+    num /= 10;
+  }
+}
+
 StringVal TimestampFunctions::ToDate(FunctionContext* context,
     const TimestampVal& ts_val) {
   if (ts_val.is_null) return StringVal::null();
   const TimestampValue ts_value = TimestampValue::FromTimestampVal(ts_val);
-  string result = ToIsoExtendedString(ts_value);
-  return AnyValUtil::FromString(context, result);
+  // Defensively, return NULL if the timestamp does not have a date portion. 
Some of
+  // our built-in functions might incorrectly return such a malformed 
timestamp.
+  if (!ts_value.HasDate()) return StringVal::null();
+  DCHECK(ts_value.IsValidDate());
+  StringVal result(context, 10);
+  result.len = 10;
+  // Fill in year, month, and day.
+  IntToChar(result.ptr, ts_value.date().year(), 4);
+  IntToChar(result.ptr + 5, ts_value.date().month(), 2);
+  IntToChar(result.ptr + 8, ts_value.date().day(), 2);
+  // Fill in dashes.
+  result.ptr[7] = '-';
+  result.ptr[4] = '-';
+  return result;
 }
 
 inline bool IsLeapYear(int year) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6154a695/be/src/exprs/timestamp-functions.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/timestamp-functions.cc 
b/be/src/exprs/timestamp-functions.cc
index 1afe349..365021f 100644
--- a/be/src/exprs/timestamp-functions.cc
+++ b/be/src/exprs/timestamp-functions.cc
@@ -40,14 +40,6 @@ using boost::posix_time::to_iso_extended_string;
 
 namespace impala {
 
-// This function is not cross-compiled to avoid including unnecessary boost 
library's
-// header files which bring in a bunch of unused code and global variables and 
increase
-// the codegen time. boost::posix_time::to_iso_extended_string() is large 
enough that
-// it won't benefit much from inlining.
-string TimestampFunctions::ToIsoExtendedString(const TimestampValue& ts_value) 
{
-  return to_iso_extended_string(ts_value.date());
-}
-
 namespace {
 /// Uses Boost's internal checking to throw an exception if 'date' is out of 
the
 /// supported range of boost::gregorian.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6154a695/be/src/exprs/timestamp-functions.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/timestamp-functions.h 
b/be/src/exprs/timestamp-functions.h
index 8abe7d7..433054b 100644
--- a/be/src/exprs/timestamp-functions.h
+++ b/be/src/exprs/timestamp-functions.h
@@ -185,8 +185,6 @@ class TimestampFunctions {
       const StringVal& format, bool is_error);
 
  private:
-  static std::string ToIsoExtendedString(const TimestampValue& ts_value);
-
   /// Static result values for DayName() function.
   static const char* MONDAY;
   static const char* TUESDAY;

Reply via email to