Repository: impala
Updated Branches:
  refs/heads/2.x 66ca5db71 -> 57f95865c


IMPALA-6641: Support more separators between date and time in default
timestamp format

This change adds support to the multi-space separator and the
'T' separator between the date and time component of a datetime
string during a cast (x as timestamp).

Testing:
Added valid and invalid tests to expr-test.cc to validate
the functionality during a cast.

Change-Id: Id2ce3ba09256b3996170e42d42d49d12776cab97
Reviewed-on: http://gerrit.cloudera.org:8080/9725
Reviewed-by: Tim Armstrong <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/57f95865
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/57f95865
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/57f95865

Branch: refs/heads/2.x
Commit: 57f95865c17c6b4614f7078c916e9299a59aeea4
Parents: 66ca5db
Author: Vincent Tran <[email protected]>
Authored: Tue Mar 20 06:22:47 2018 -0400
Committer: Impala Public Jenkins <[email protected]>
Committed: Mon Mar 26 20:10:15 2018 +0000

----------------------------------------------------------------------
 be/src/exprs/expr-test.cc              | 30 ++++++++++++++++++----
 be/src/runtime/timestamp-parse-util.cc | 39 +++++++++++++++++++++--------
 2 files changed, 53 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/57f95865/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index bd25328..07f5eda 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -3073,7 +3073,6 @@ TEST_F(ExprTest, CastExprs) {
   TestTimestampValue("cast('1:05:1' as timestamp)", 
TimestampValue::Parse("01:05:01"));
   TestTimestampValue("cast('        2001-01-9 1:05:1        ' as timestamp)",
       TimestampValue::Parse("2001-01-09 01:05:01"));
-  TestIsNull("cast('2001-1-21     11:2:3' as timestamp)", TYPE_TIMESTAMP);
   TestIsNull("cast('2001-6' as timestamp)", TYPE_TIMESTAMP);
   TestIsNull("cast('01-1-21' as timestamp)", TYPE_TIMESTAMP);
   TestIsNull("cast('2001-1-21 12:5:3 AM' as timestamp)", TYPE_TIMESTAMP);
@@ -3154,14 +3153,35 @@ TEST_F(ExprTest, CastExprs) {
   TestTimestampValue("cast('  \t\r\n      2001-1-9    \t\r\n    ' as 
timestamp)",
       TimestampValue::Parse("2001-01-09"));
 
+  // Test valid multi-space and 'T' separators between date and time
+  // components
+  TestTimestampValue("cast('2001-01-09   01:05:01' as timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01"));
+  TestTimestampValue("cast('2001-01-09T01:05:01' as timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01"));
+  TestTimestampValue("cast('2001-01-09   01:05:01.123456789101112' as 
timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01.123456789"));
+  TestTimestampValue("cast('2001-01-09T01:05:01.123456789101112' as 
timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01.123456789"));
+  TestTimestampValue("cast('  \t\r\n 2001-01-09   01:05:01   \t\r\n ' as 
timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01"));
+  TestTimestampValue("cast('  \t\r\n 2001-01-09T01:05:01   \t\r\n ' as 
timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01"));
+  TestTimestampValue(
+      "cast('  \t\r\n 2001-01-09   01:05:01.12345678910   \t\r\n ' as 
timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01.123456789"));
+  TestTimestampValue(
+      "cast('  \t\r\n 2001-01-09T01:05:01.12345678910   \t\r\n ' as 
timestamp)",
+      TimestampValue::Parse("2001-01-09 01:05:01.123456789"));
+
+  // Test invalid variations of the 'T' separator
+  TestIsNull("cast('2001-01-09TTTTT01:05:01' as timestamp)", TYPE_TIMESTAMP);
+  TestIsNull("cast('2001-01-09t01:05:01' as timestamp)", TYPE_TIMESTAMP);
+
   // Test invalid whitespace locations in strings to be casted to timestamp
-  TestIsNull(
-      "cast(' \t\r\n  2001-01-09      01:05:01  \t\r\n ' as timestamp)", 
TYPE_TIMESTAMP);
-  TestIsNull("cast('2001-01-09   01:05:01' as timestamp)", TYPE_TIMESTAMP);
   TestIsNull("cast('2001-01-09\t01:05:01' as timestamp)", TYPE_TIMESTAMP);
   TestIsNull("cast('2001-01-09\r01:05:01' as timestamp)", TYPE_TIMESTAMP);
   TestIsNull("cast('2001-01-09\n01:05:01' as timestamp)", TYPE_TIMESTAMP);
-  TestIsNull("cast('2001-1-9    1:5:1' as timestamp)", TYPE_TIMESTAMP);
   TestIsNull("cast('2001-1-9\t1:5:1' as timestamp)", TYPE_TIMESTAMP);
   TestIsNull("cast('2001-1-9\r1:5:1' as timestamp)", TYPE_TIMESTAMP);
   TestIsNull("cast('2001-1-9\n1:5:1' as timestamp)", TYPE_TIMESTAMP);

http://git-wip-us.apache.org/repos/asf/impala/blob/57f95865/be/src/runtime/timestamp-parse-util.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/timestamp-parse-util.cc 
b/be/src/runtime/timestamp-parse-util.cc
index c444214..cbabeab 100644
--- a/be/src/runtime/timestamp-parse-util.cc
+++ b/be/src/runtime/timestamp-parse-util.cc
@@ -285,8 +285,12 @@ bool 
TimestampParser::ParseFormatTokensByStr(DateTimeFormatContext* dt_ctx) {
     if (str == str_end) return true;
 
     // Check for the space between date and time component
-    tok_end = ParseSeparatorToken(str, str_end, ' ');
-    if (tok_end - str != 1) return false;
+    if (*str != ' ' && *str != 'T') return false;
+    char sep = *str;
+    tok_end = ParseSeparatorToken(str, str_end, sep);
+    if (tok_end - str < 1) return false;
+    // IMPALA-6641: Multiple spaces are okay, 'T' separator must be single
+    if (sep == 'T' && tok_end - str > 1) return false;
     dt_ctx->toks.push_back(
         DateTimeFormatToken(SEPARATOR, str - str_begin, tok_end - str, str));
     str = tok_end;
@@ -399,23 +403,35 @@ bool TimestampParser::Parse(const char* str, int len, 
boost::gregorian::date* d,
   DateTimeFormatContext* dt_ctx = NULL;
   if (LIKELY(len >= DEFAULT_TIME_FMT_LEN)) {
     // This string starts with a date component
-    if (str[4] == '-') {
+    if (str[4] == '-' && str[7] == '-') {
       switch (len) {
         case DEFAULT_DATE_FMT_LEN: {
           dt_ctx = &DEFAULT_DATE_CTX;
           break;
         }
-        case DEFAULT_SHORT_DATE_TIME_FMT_LEN:  {
-          switch (str[10]) {
-            case ' ': dt_ctx = &DEFAULT_SHORT_DATE_TIME_CTX; break;
-            case 'T': dt_ctx = &DEFAULT_SHORT_ISO_DATE_TIME_CTX; break;
+        case DEFAULT_SHORT_DATE_TIME_FMT_LEN: {
+          if (LIKELY(str[13] == ':')) {
+            switch (str[10]) {
+              case ' ':
+                dt_ctx = &DEFAULT_SHORT_DATE_TIME_CTX;
+                break;
+              case 'T':
+                dt_ctx = &DEFAULT_SHORT_ISO_DATE_TIME_CTX;
+                break;
+            }
           }
           break;
         }
         case DEFAULT_DATE_TIME_FMT_LEN: {
-          switch (str[10]) {
-            case ' ': dt_ctx = &DEFAULT_DATE_TIME_CTX[9]; break;
-            case 'T': dt_ctx = &DEFAULT_ISO_DATE_TIME_CTX[9]; break;
+          if (LIKELY(str[13] == ':')) {
+            switch (str[10]) {
+              case ' ':
+                dt_ctx = &DEFAULT_DATE_TIME_CTX[9];
+                break;
+              case 'T':
+                dt_ctx = &DEFAULT_ISO_DATE_TIME_CTX[9];
+                break;
+            }
           }
           break;
         }
@@ -423,7 +439,8 @@ bool TimestampParser::Parse(const char* str, int len, 
boost::gregorian::date* d,
           // There is likely a fractional component that's below the expected 
9 chars.
           // We will need to work out which default context to use that 
corresponds to
           // the fractional length in the string.
-          if (LIKELY(len > DEFAULT_SHORT_DATE_TIME_FMT_LEN) && LIKELY(str[19] 
== '.')) {
+          if (LIKELY(len > DEFAULT_SHORT_DATE_TIME_FMT_LEN) && LIKELY(str[19] 
== '.')
+              && LIKELY(str[13] == ':')) {
             switch (str[10]) {
               case ' ': {
                 dt_ctx =

Reply via email to