Repository: impala Updated Branches: refs/heads/2.x 66ca5db71 -> 57f95865c
IMPALA-6641: Support more separators between date and time in default timestamp format This change adds support to the multi-space separator and the 'T' separator between the date and time component of a datetime string during a cast (x as timestamp). Testing: Added valid and invalid tests to expr-test.cc to validate the functionality during a cast. Change-Id: Id2ce3ba09256b3996170e42d42d49d12776cab97 Reviewed-on: http://gerrit.cloudera.org:8080/9725 Reviewed-by: Tim Armstrong <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/57f95865 Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/57f95865 Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/57f95865 Branch: refs/heads/2.x Commit: 57f95865c17c6b4614f7078c916e9299a59aeea4 Parents: 66ca5db Author: Vincent Tran <[email protected]> Authored: Tue Mar 20 06:22:47 2018 -0400 Committer: Impala Public Jenkins <[email protected]> Committed: Mon Mar 26 20:10:15 2018 +0000 ---------------------------------------------------------------------- be/src/exprs/expr-test.cc | 30 ++++++++++++++++++---- be/src/runtime/timestamp-parse-util.cc | 39 +++++++++++++++++++++-------- 2 files changed, 53 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/57f95865/be/src/exprs/expr-test.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc index bd25328..07f5eda 100644 --- a/be/src/exprs/expr-test.cc +++ b/be/src/exprs/expr-test.cc @@ -3073,7 +3073,6 @@ TEST_F(ExprTest, CastExprs) { TestTimestampValue("cast('1:05:1' as timestamp)", TimestampValue::Parse("01:05:01")); TestTimestampValue("cast(' 2001-01-9 1:05:1 ' as timestamp)", TimestampValue::Parse("2001-01-09 01:05:01")); - TestIsNull("cast('2001-1-21 11:2:3' as timestamp)", TYPE_TIMESTAMP); TestIsNull("cast('2001-6' as timestamp)", TYPE_TIMESTAMP); TestIsNull("cast('01-1-21' as timestamp)", TYPE_TIMESTAMP); TestIsNull("cast('2001-1-21 12:5:3 AM' as timestamp)", TYPE_TIMESTAMP); @@ -3154,14 +3153,35 @@ TEST_F(ExprTest, CastExprs) { TestTimestampValue("cast(' \t\r\n 2001-1-9 \t\r\n ' as timestamp)", TimestampValue::Parse("2001-01-09")); + // Test valid multi-space and 'T' separators between date and time + // components + TestTimestampValue("cast('2001-01-09 01:05:01' as timestamp)", + TimestampValue::Parse("2001-01-09 01:05:01")); + TestTimestampValue("cast('2001-01-09T01:05:01' as timestamp)", + TimestampValue::Parse("2001-01-09 01:05:01")); + TestTimestampValue("cast('2001-01-09 01:05:01.123456789101112' as timestamp)", + TimestampValue::Parse("2001-01-09 01:05:01.123456789")); + TestTimestampValue("cast('2001-01-09T01:05:01.123456789101112' as timestamp)", + TimestampValue::Parse("2001-01-09 01:05:01.123456789")); + TestTimestampValue("cast(' \t\r\n 2001-01-09 01:05:01 \t\r\n ' as timestamp)", + TimestampValue::Parse("2001-01-09 01:05:01")); + TestTimestampValue("cast(' \t\r\n 2001-01-09T01:05:01 \t\r\n ' as timestamp)", + TimestampValue::Parse("2001-01-09 01:05:01")); + TestTimestampValue( + "cast(' \t\r\n 2001-01-09 01:05:01.12345678910 \t\r\n ' as timestamp)", + TimestampValue::Parse("2001-01-09 01:05:01.123456789")); + TestTimestampValue( + "cast(' \t\r\n 2001-01-09T01:05:01.12345678910 \t\r\n ' as timestamp)", + TimestampValue::Parse("2001-01-09 01:05:01.123456789")); + + // Test invalid variations of the 'T' separator + TestIsNull("cast('2001-01-09TTTTT01:05:01' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('2001-01-09t01:05:01' as timestamp)", TYPE_TIMESTAMP); + // Test invalid whitespace locations in strings to be casted to timestamp - TestIsNull( - "cast(' \t\r\n 2001-01-09 01:05:01 \t\r\n ' as timestamp)", TYPE_TIMESTAMP); - TestIsNull("cast('2001-01-09 01:05:01' as timestamp)", TYPE_TIMESTAMP); TestIsNull("cast('2001-01-09\t01:05:01' as timestamp)", TYPE_TIMESTAMP); TestIsNull("cast('2001-01-09\r01:05:01' as timestamp)", TYPE_TIMESTAMP); TestIsNull("cast('2001-01-09\n01:05:01' as timestamp)", TYPE_TIMESTAMP); - TestIsNull("cast('2001-1-9 1:5:1' as timestamp)", TYPE_TIMESTAMP); TestIsNull("cast('2001-1-9\t1:5:1' as timestamp)", TYPE_TIMESTAMP); TestIsNull("cast('2001-1-9\r1:5:1' as timestamp)", TYPE_TIMESTAMP); TestIsNull("cast('2001-1-9\n1:5:1' as timestamp)", TYPE_TIMESTAMP); http://git-wip-us.apache.org/repos/asf/impala/blob/57f95865/be/src/runtime/timestamp-parse-util.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/timestamp-parse-util.cc b/be/src/runtime/timestamp-parse-util.cc index c444214..cbabeab 100644 --- a/be/src/runtime/timestamp-parse-util.cc +++ b/be/src/runtime/timestamp-parse-util.cc @@ -285,8 +285,12 @@ bool TimestampParser::ParseFormatTokensByStr(DateTimeFormatContext* dt_ctx) { if (str == str_end) return true; // Check for the space between date and time component - tok_end = ParseSeparatorToken(str, str_end, ' '); - if (tok_end - str != 1) return false; + if (*str != ' ' && *str != 'T') return false; + char sep = *str; + tok_end = ParseSeparatorToken(str, str_end, sep); + if (tok_end - str < 1) return false; + // IMPALA-6641: Multiple spaces are okay, 'T' separator must be single + if (sep == 'T' && tok_end - str > 1) return false; dt_ctx->toks.push_back( DateTimeFormatToken(SEPARATOR, str - str_begin, tok_end - str, str)); str = tok_end; @@ -399,23 +403,35 @@ bool TimestampParser::Parse(const char* str, int len, boost::gregorian::date* d, DateTimeFormatContext* dt_ctx = NULL; if (LIKELY(len >= DEFAULT_TIME_FMT_LEN)) { // This string starts with a date component - if (str[4] == '-') { + if (str[4] == '-' && str[7] == '-') { switch (len) { case DEFAULT_DATE_FMT_LEN: { dt_ctx = &DEFAULT_DATE_CTX; break; } - case DEFAULT_SHORT_DATE_TIME_FMT_LEN: { - switch (str[10]) { - case ' ': dt_ctx = &DEFAULT_SHORT_DATE_TIME_CTX; break; - case 'T': dt_ctx = &DEFAULT_SHORT_ISO_DATE_TIME_CTX; break; + case DEFAULT_SHORT_DATE_TIME_FMT_LEN: { + if (LIKELY(str[13] == ':')) { + switch (str[10]) { + case ' ': + dt_ctx = &DEFAULT_SHORT_DATE_TIME_CTX; + break; + case 'T': + dt_ctx = &DEFAULT_SHORT_ISO_DATE_TIME_CTX; + break; + } } break; } case DEFAULT_DATE_TIME_FMT_LEN: { - switch (str[10]) { - case ' ': dt_ctx = &DEFAULT_DATE_TIME_CTX[9]; break; - case 'T': dt_ctx = &DEFAULT_ISO_DATE_TIME_CTX[9]; break; + if (LIKELY(str[13] == ':')) { + switch (str[10]) { + case ' ': + dt_ctx = &DEFAULT_DATE_TIME_CTX[9]; + break; + case 'T': + dt_ctx = &DEFAULT_ISO_DATE_TIME_CTX[9]; + break; + } } break; } @@ -423,7 +439,8 @@ bool TimestampParser::Parse(const char* str, int len, boost::gregorian::date* d, // There is likely a fractional component that's below the expected 9 chars. // We will need to work out which default context to use that corresponds to // the fractional length in the string. - if (LIKELY(len > DEFAULT_SHORT_DATE_TIME_FMT_LEN) && LIKELY(str[19] == '.')) { + if (LIKELY(len > DEFAULT_SHORT_DATE_TIME_FMT_LEN) && LIKELY(str[19] == '.') + && LIKELY(str[13] == ':')) { switch (str[10]) { case ' ': { dt_ctx =
