IMPALA-5315: Cast to timestamp fails for YYYY-M-D format This change allows casting of a string in 'lazy' date/time format to timestamp. The supported lazy date formats are: yyyy-[M]M-[d]d yyyy-[M]M-[d]d [H]H:[m]m:[s]s[.SSSSSSSSS] [H]H:[m]m:[s]s[.SSSSSSSSS]
We will incur a SCAN performance penalty (approximately 1/2 TotalReadThroughput) when the string is in one of these lazy date/time format. Testing: Benchmarked the performance consequence by executing this SQL on a private build over 3.8 billion rows: select min(cast (time_string as timestamp)) from private.impala_5315 Added tests for valid and invalid date/time format strings in expr-test.cc to be inline with existing tests for CAST() function. Added end-to-end tests into exprs.test and select-lazy-timestamp.test to exercise the new function within the context of a query. Added tests to exercise the leading and trailing white space trimming behaviour in default and lazy date/time string format (IMPALA-6630). Change-Id: Ib9a184a09d7e7783f04d47588537612c2ecec28f Reviewed-on: http://gerrit.cloudera.org:8080/7009 Reviewed-by: Tim Armstrong <tarmstr...@cloudera.com> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/0d7787fe Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/0d7787fe Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/0d7787fe Branch: refs/heads/master Commit: 0d7787fe4df1ab2bb8231b0ee0912e3cf2787f9e Parents: 6d8ce64 Author: Vincent Tran <vtt...@cloudera.com> Authored: Sat May 27 03:02:19 2017 -0400 Committer: Impala Public Jenkins <impala-public-jenk...@gerrit.cloudera.org> Committed: Tue Mar 13 22:10:18 2018 +0000 ---------------------------------------------------------------------- be/src/exprs/expr-test.cc | 117 ++++++++++++- be/src/runtime/timestamp-parse-util.cc | 168 ++++++++++++++++++- be/src/runtime/timestamp-parse-util.h | 29 ++++ testdata/data/lazy_timestamp.csv | 13 ++ .../queries/QueryTest/exprs.test | 36 ++++ .../QueryTest/select-lazy-timestamp.test | 20 +++ tests/query_test/test_scanners.py | 22 +++ 7 files changed, 398 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/be/src/exprs/expr-test.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc index e02ff6a..bd25328 100644 --- a/be/src/exprs/expr-test.cc +++ b/be/src/exprs/expr-test.cc @@ -3051,6 +3051,121 @@ TEST_F(ExprTest, CastExprs) { TestStringValue("cast(cast(cast('2012-01-01 09:10:11.123456789' as timestamp) as" " timestamp) as string)", "2012-01-01 09:10:11.123456789"); + // Test casting of lazy date and/or time format string to timestamp + TestTimestampValue( + "cast('2001-1-2' as timestamp)", TimestampValue::Parse("2001-01-02 00:00:00")); + TestTimestampValue( + "cast('2001-01-3' as timestamp)", TimestampValue::Parse("2001-01-03 00:00:00")); + TestTimestampValue( + "cast('2001-1-21' as timestamp)", TimestampValue::Parse("2001-01-21 00:00:00")); + TestTimestampValue("cast('2001-1-21 12:5:30' as timestamp)", + TimestampValue::Parse("2001-01-21 12:05:30")); + TestTimestampValue("cast('2001-1-21 13:5:05' as timestamp)", + TimestampValue::Parse("2001-01-21 13:05:05")); + TestTimestampValue("cast('2001-1-21 1:2:3' as timestamp)", + TimestampValue::Parse("2001-01-21 01:02:03")); + TestTimestampValue("cast('2001-1-21 1:5:31.12345' as timestamp)", + TimestampValue::Parse("2001-01-21 01:05:31.123450000")); + TestTimestampValue("cast('2001-1-21 1:5:31.12345678910111213' as timestamp)", + TimestampValue::Parse("2001-01-21 01:05:31.123456789")); + TestTimestampValue( + "cast('1:05:1.12' as timestamp)", TimestampValue::Parse("01:05:01.120000000")); + TestTimestampValue("cast('1:05:1' as timestamp)", TimestampValue::Parse("01:05:01")); + TestTimestampValue("cast(' 2001-01-9 1:05:1 ' as timestamp)", + TimestampValue::Parse("2001-01-09 01:05:01")); + TestIsNull("cast('2001-1-21 11:2:3' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('2001-6' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('01-1-21' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('2001-1-21 12:5:3 AM' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1:05:31.123456foo' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('10/feb/10' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-foo1-2bar' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909/1-/2' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-1-2 12:32:1.111bar' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 12:32:1.111.111.2' as timestamp)", TYPE_TIMESTAMP); + + // Test various ways of truncating a "lazy" format to produce an invalid timestamp. + TestIsNull("cast('1909-10-2 12:32:1.' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 12:32:11.' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 12:32:11. ' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 12:32:' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 12:32: ' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 1:32:' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 1:2:' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 1:2' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 1:2 ' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 12:' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 12' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 12 ' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 2' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10- ' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909' as timestamp)", TYPE_TIMESTAMP); + + // Test missing number from format. + TestIsNull("cast('1909-10-2 12:32:.9999' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 12::1.9999' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 :32:1.9999' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10- 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909--2 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('-10-2 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP); + + // Test duplicate separators - should return NULL because not a valid format. + TestIsNull("cast('1909--10-2 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10--2 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 12::32:1.9999' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 12:32::1.9999' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 12:32:1..9999' as timestamp)", TYPE_TIMESTAMP); + + // Test numbers with too many digits in date/time - should return NULL because not a + // valid timestamp. + TestIsNull("cast('19097-10-2 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-107-2 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-277 12:32:1.9999' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 127:32:1.9999' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 12:327:1.9999' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('1909-10-2 12:32:177.9999' as timestamp)", TYPE_TIMESTAMP); + + // IMPALA-6630: Test whitespace trimming mechanism when cast from string to timestamp + TestTimestampValue("cast(' \t\r\n 2001-01-09 01:05:01.123456789 \t\r\n' as timestamp)", + TimestampValue::Parse("2001-01-09 01:05:01.123456789")); + TestTimestampValue("cast(' \t\r\n 2001-01-09T01:05:01.123456789 \t\r\n' as timestamp)", + TimestampValue::Parse("2001-01-09 01:05:01.123456789")); + TestTimestampValue("cast(' \t\r\n 2001-01-09 01:05:01 \t\r\n ' as timestamp)", + TimestampValue::Parse("2001-01-09 01:05:01")); + TestTimestampValue("cast(' \t\r\n 2001-01-09T01:05:01 \t\r\n ' as timestamp)", + TimestampValue::Parse("2001-01-09 01:05:01")); + TestTimestampValue("cast(' \t\r\n 2001-01-09 \t\r\n ' as timestamp)", + TimestampValue::Parse("2001-01-09")); + TestTimestampValue("cast(' \t\r\n 01:05:01 \t\r\n ' as timestamp)", + TimestampValue::Parse("01:05:01")); + TestTimestampValue("cast(' \t\r\n 01:05:01.123456789 \t\r\n ' as timestamp)", + TimestampValue::Parse("01:05:01.123456789")); + TestTimestampValue("cast(' \t\r\n 2001-1-9 1:5:1 \t\r\n ' as timestamp)", + TimestampValue::Parse("2001-01-09 01:05:01")); + TestTimestampValue("cast(' \t\r\n 2001-1-9 1:5:1.12345678 \t\r\n ' as timestamp)", + TimestampValue::Parse("2001-01-09 01:05:01.123456780")); + TestTimestampValue("cast(' \t\r\n 1:5:1 \t\r\n ' as timestamp)", + TimestampValue::Parse("01:05:01")); + TestTimestampValue("cast(' \t\r\n 1:5:1.12345678 \t\r\n ' as timestamp)", + TimestampValue::Parse("01:05:01.123456780")); + TestTimestampValue("cast(' \t\r\n 2001-1-9 \t\r\n ' as timestamp)", + TimestampValue::Parse("2001-01-09")); + + // Test invalid whitespace locations in strings to be casted to timestamp + TestIsNull( + "cast(' \t\r\n 2001-01-09 01:05:01 \t\r\n ' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('2001-01-09 01:05:01' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('2001-01-09\t01:05:01' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('2001-01-09\r01:05:01' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('2001-01-09\n01:05:01' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('2001-1-9 1:5:1' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('2001-1-9\t1:5:1' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('2001-1-9\r1:5:1' as timestamp)", TYPE_TIMESTAMP); + TestIsNull("cast('2001-1-9\n1:5:1' as timestamp)", TYPE_TIMESTAMP); + // IMPALA-3163: Test precise conversion from Decimal to Timestamp. TestTimestampValue("cast(cast(1457473016.1230 as decimal(17,4)) as timestamp)", TimestampValue::Parse("2016-03-08 21:36:56.123000000", 29)); @@ -5984,7 +6099,7 @@ TEST_F(ExprTest, TimestampFunctions) { TestIsNull("timestamp_cmp('','1966-05-04 15:33:45')", TYPE_INT); TestIsNull("timestamp_cmp(NULL,'1966-05-04 15:33:45')", TYPE_INT); // Invalid timestamp test case - TestIsNull("timestamp_cmp('1966-5-4 5:33:45','1966-5-4 15:33:45')", TYPE_INT); + TestIsNull("timestamp_cmp('1966-5-4 50:33:45','1966-5-4 15:33:45')", TYPE_INT); TestValue("int_months_between('1967-07-19','1966-06-04')", TYPE_INT, 13); TestValue("int_months_between('1966-06-04 16:34:45','1967-07-19 15:33:46')", http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/be/src/runtime/timestamp-parse-util.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/timestamp-parse-util.cc b/be/src/runtime/timestamp-parse-util.cc index e64d904..c444214 100644 --- a/be/src/runtime/timestamp-parse-util.cc +++ b/be/src/runtime/timestamp-parse-util.cc @@ -17,6 +17,8 @@ #include "runtime/timestamp-parse-util.h" +#include <algorithm> + #include <boost/assign/list_of.hpp> #include <boost/date_time/gregorian/gregorian.hpp> #include <boost/unordered_map.hpp> @@ -214,8 +216,150 @@ bool TimestampParser::ParseFormatTokens(DateTimeFormatContext* dt_ctx) { return dt_ctx->has_date_toks || dt_ctx->has_time_toks; } +const char* TimestampParser::ParseDigitToken(const char* str, const char* str_end) { + const char* tok_end = str; + while (tok_end < str_end) { + if (!isdigit(*tok_end)) return tok_end; + ++tok_end; + } + return tok_end; +} + +const char* TimestampParser::ParseSeparatorToken( + const char* str, const char* str_end, const char sep) { + const char* tok_end = str; + while (tok_end < str_end) { + if (*tok_end != sep) return tok_end; + ++tok_end; + } + return tok_end; +} + +bool TimestampParser::ParseFormatTokensByStr(DateTimeFormatContext* dt_ctx) { + DCHECK(dt_ctx != NULL); + DCHECK(dt_ctx->fmt != NULL); + DCHECK_GT(dt_ctx->fmt_len, 0); + DCHECK_EQ(dt_ctx->toks.size(), 0); + const char* str_begin = dt_ctx->fmt; + const char* str_end = str_begin + dt_ctx->fmt_len; + const char* str = str_begin; + const char* tok_end; + + // Parse the 4-digit year + tok_end = ParseDigitToken(str, str_end); + if (tok_end - str == 4) { + dt_ctx->toks.push_back( + DateTimeFormatToken(YEAR, str - str_begin, tok_end - str, str)); + str = tok_end; + + // Check for the date separator '-' + tok_end = ParseSeparatorToken(str, str_end, '-'); + if (tok_end - str != 1) return false; + dt_ctx->toks.push_back( + DateTimeFormatToken(SEPARATOR, str - str_begin, tok_end - str, str)); + str = tok_end; + + // Parse the 1 or 2 digit month. + tok_end = ParseDigitToken(str, str_end); + if (tok_end - str != 1 && tok_end - str != 2) return false; + dt_ctx->toks.push_back( + DateTimeFormatToken(MONTH_IN_YEAR, str - str_begin, tok_end - str, str)); + str = tok_end; + + // Check for the date separator '-' + tok_end = ParseSeparatorToken(str, str_end, '-'); + if (tok_end - str != 1) return false; + dt_ctx->toks.push_back( + DateTimeFormatToken(SEPARATOR, str - str_begin, tok_end - str, str)); + str = tok_end; + + // Parse the 1 or 2 digit day in month + tok_end = ParseDigitToken(str, str_end); + if (tok_end - str != 1 && tok_end - str != 2) return false; + dt_ctx->toks.push_back( + DateTimeFormatToken(DAY_IN_MONTH, str - str_begin, tok_end - str, str)); + str = tok_end; + dt_ctx->has_date_toks = true; + + // If the string ends here, we only have a date component + if (str == str_end) return true; + + // Check for the space between date and time component + tok_end = ParseSeparatorToken(str, str_end, ' '); + if (tok_end - str != 1) return false; + dt_ctx->toks.push_back( + DateTimeFormatToken(SEPARATOR, str - str_begin, tok_end - str, str)); + str = tok_end; + + // Invalid format if date-time separator is not followed by more digits + if (str > str_end) return false; + tok_end = ParseDigitToken(str, str_end); + } + + // Parse the 1 or 2 digit hour + if (tok_end - str != 1 && tok_end - str != 2) return false; + dt_ctx->toks.push_back( + DateTimeFormatToken(HOUR_IN_DAY, str - str_begin, tok_end - str, str)); + str = tok_end; + + // Check for the time component separator ':' + tok_end = ParseSeparatorToken(str, str_end, ':'); + if (tok_end - str != 1) return false; + dt_ctx->toks.push_back( + DateTimeFormatToken(SEPARATOR, str - str_begin, tok_end - str, str)); + str = tok_end; + + // Parse the 1 or 2 digit minute + tok_end = ParseDigitToken(str, str_end); + if (tok_end - str != 1 && tok_end - str != 2) return false; + dt_ctx->toks.push_back( + DateTimeFormatToken(MINUTE_IN_HOUR, str - str_begin, tok_end - str, str)); + str = tok_end; + + // Check for the time component separator ':' + tok_end = ParseSeparatorToken(str, str_end, ':'); + if (tok_end - str != 1) return false; + dt_ctx->toks.push_back( + DateTimeFormatToken(SEPARATOR, str - str_begin, tok_end - str, str)); + str = tok_end; + + // Parse the 1 or 2 digit second + tok_end = ParseDigitToken(str, str_end); + if (tok_end - str != 1 && tok_end - str != 2) return false; + dt_ctx->toks.push_back( + DateTimeFormatToken(SECOND_IN_MINUTE, str - str_begin, tok_end - str, str)); + str = tok_end; + dt_ctx->has_time_toks = true; + + // There is more to parse, there maybe a fractional component. + if (str < str_end) { + tok_end = ParseSeparatorToken(str, str_end, '.'); + if (tok_end - str != 1) return false; + dt_ctx->toks.push_back( + DateTimeFormatToken(SEPARATOR, str - str_begin, tok_end - str, str)); + str = tok_end; + + // Invalid format when there is no fractional component following '.' + if (str > str_end) return false; + + // Parse the fractional component. + // Like the non-lazy path, this will parse up to 9 fractional digits + tok_end = ParseDigitToken(str, str_end); + int num_digits = std::min<int>(9, tok_end - str); + dt_ctx->toks.push_back( + DateTimeFormatToken(FRACTION, str - str_begin, num_digits, str)); + str = tok_end; + + // Invalid format if there is more to parse after the fractional component + if (str < str_end) return false; + } + return true; +} + bool TimestampParser::Parse(const char* str, int len, boost::gregorian::date* d, boost::posix_time::time_duration* t) { + int lazy_len; + DCHECK(TimestampParser::initialized_); DCHECK(d != NULL); DCHECK(t != NULL); @@ -248,6 +392,7 @@ bool TimestampParser::Parse(const char* str, int len, boost::gregorian::date* d, } } + lazy_len = len; // Only process what we have to. if (len > DEFAULT_DATE_TIME_FMT_LEN) len = DEFAULT_DATE_TIME_FMT_LEN; // Determine the default formatting context that's required for parsing. @@ -278,7 +423,7 @@ bool TimestampParser::Parse(const char* str, int len, boost::gregorian::date* d, // There is likely a fractional component that's below the expected 9 chars. // We will need to work out which default context to use that corresponds to // the fractional length in the string. - if (LIKELY(len > DEFAULT_SHORT_DATE_TIME_FMT_LEN)) { + if (LIKELY(len > DEFAULT_SHORT_DATE_TIME_FMT_LEN) && LIKELY(str[19] == '.')) { switch (str[10]) { case ' ': { dt_ctx = @@ -295,7 +440,7 @@ bool TimestampParser::Parse(const char* str, int len, boost::gregorian::date* d, break; } } - } else if (str[2] == ':') { + } else if (str[2] == ':' && str[5] == ':' && isdigit(str[7])) { if (len > DEFAULT_TIME_FRAC_FMT_LEN) len = DEFAULT_TIME_FRAC_FMT_LEN; if (len > DEFAULT_TIME_FMT_LEN && str[8] == '.') { dt_ctx = &DEFAULT_TIME_FRAC_CTX[len - DEFAULT_TIME_FMT_LEN - 1]; @@ -304,12 +449,23 @@ bool TimestampParser::Parse(const char* str, int len, boost::gregorian::date* d, } } } - if (LIKELY(dt_ctx != NULL)) { + + // Generating context lazily as a fall back if default formats fail. + // ParseFormatTokenByStr() does not require a template format string. + if (dt_ctx != nullptr) { return Parse(str, len, *dt_ctx, d, t); } else { - *d = boost::gregorian::date(); - *t = boost::posix_time::time_duration(boost::posix_time::not_a_date_time); - return false; + DateTimeFormatContext lazy_ctx; + lazy_ctx.Reset(str, lazy_len); + if (ParseFormatTokensByStr(&lazy_ctx)) { + dt_ctx = &lazy_ctx; + len = lazy_len; + return Parse(str, len, *dt_ctx, d, t); + } else { + *d = boost::gregorian::date(); + *t = boost::posix_time::time_duration(boost::posix_time::not_a_date_time); + return false; + } } } http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/be/src/runtime/timestamp-parse-util.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/timestamp-parse-util.h b/be/src/runtime/timestamp-parse-util.h index bbcc03f..bccf0b7 100644 --- a/be/src/runtime/timestamp-parse-util.h +++ b/be/src/runtime/timestamp-parse-util.h @@ -177,6 +177,35 @@ class TimestampParser { /// Return true if the parse was successful. static bool ParseFormatTokens(DateTimeFormatContext* dt_ctx); + // Parse out the next digit token from the date/time string by checking for contiguous + // digit characters and return a pointer to the end of that token. + // str -- pointer to the string to be parsed + // str_end -- the pointer to the end of the string to be parsed + // Returns the pointer within the string to the end of the valid digit token. + static const char* ParseDigitToken(const char* str, const char* str_end); + + // Parse out the next separator token from the date/time string against an expected + // character. + // str -- pointer to the string to be parsed + // str_end -- the pointer to the end of the string to be parsed + // sep -- the separator char to compare the token to + // Returns the pointer within the string to the end of the valid separator token. + static const char* ParseSeparatorToken( + const char* str, const char* str_end, const char sep); + + /// Parse the date/time string to generate the DateTimeFormatToken required by + /// DateTimeFormatContext. Similar to ParseFormatTokens() this function will take the + /// string and length, then heuristically determine whether the value contains date + // tokens, time tokens, or both. Unlike ParseFormatTokens, it does not require the + // template format string. + /// str -- valid pointer to the string to parse + /// len -- length of the string to parse (must be > 0) + /// dt_ctx -- date/time format context (must contain valid tokens) + /// d -- the date value where the results of the parsing will be placed + /// t -- the time value where the results of the parsing will be placed + /// Returns true if the date/time was successfully parsed. + static bool ParseFormatTokensByStr(DateTimeFormatContext* dt_ctx); + /// Parse a default date/time string. The default timestamp format is: /// yyyy-MM-dd HH:mm:ss.SSSSSSSSS or yyyy-MM-ddTHH:mm:ss.SSSSSSSSS. Either just the /// date or just the time may be specified. All components are required in either the http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/testdata/data/lazy_timestamp.csv ---------------------------------------------------------------------- diff --git a/testdata/data/lazy_timestamp.csv b/testdata/data/lazy_timestamp.csv new file mode 100644 index 0000000..d44db1d --- /dev/null +++ b/testdata/data/lazy_timestamp.csv @@ -0,0 +1,13 @@ +2001-1-2 +2001-1-02 +2001-01-2 +1:6:8 +01:6:8 +1:06:8 +1:6:08 +1:6:8.123456789101112 +1:6:8.123456789 +1:6:8.12345 +2001-1-2 1:6:8 +2001-1-2 1:6:8.123456 +2001-1-2 1:6:8.123456789101112 http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/testdata/workloads/functional-query/queries/QueryTest/exprs.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test index b3d0ca8..b6909c1 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test +++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test @@ -2959,3 +2959,39 @@ from functional.alltypes where id = 7 ---- TYPES BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT ==== +---- QUERY +# IMPALA-5315: Test support for non zero-padded date/time strings cast as timestamp +select cast('2001-1-21 12:5:30' as timestamp) +---- RESULTS +2001-01-21 12:05:30 +---- TYPES +timestamp +==== +---- QUERY +select cast('2001-1-2 1:5:3.123' as timestamp) +---- RESULTS +2001-01-02 01:05:03.123000000 +---- TYPES +timestamp +==== +---- QUERY +select cast('1:5:3' as timestamp) +---- RESULTS +01:05:03 +---- TYPES +timestamp +==== +---- QUERY +select cast('1:5:3.1234567' as timestamp) +---- RESULTS +01:05:03.123456700 +---- TYPES +timestamp +==== +---- QUERY +select cast('2001-1-2' as timestamp) +---- RESULTS +2001-01-02 00:00:00 +---- TYPES +timestamp +==== http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/testdata/workloads/functional-query/queries/QueryTest/select-lazy-timestamp.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/select-lazy-timestamp.test b/testdata/workloads/functional-query/queries/QueryTest/select-lazy-timestamp.test new file mode 100644 index 0000000..8258072 --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/select-lazy-timestamp.test @@ -0,0 +1,20 @@ +==== +---- QUERY +select ts from lazy_ts +---- RESULTS: VERIFY_IS_EQUAL_SORTED +2001-01-02 00:00:00 +2001-01-02 00:00:00 +2001-01-02 00:00:00 +01:06:08 +01:06:08 +01:06:08 +01:06:08 +01:06:08.123456789 +01:06:08.123456789 +01:06:08.123450000 +2001-01-02 01:06:08 +2001-01-02 01:06:08.123456000 +2001-01-02 01:06:08.123456789 +---- TYPES +timestamp +==== http://git-wip-us.apache.org/repos/asf/impala/blob/0d7787fe/tests/query_test/test_scanners.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py index a67b793..a9ba5b8 100644 --- a/tests/query_test/test_scanners.py +++ b/tests/query_test/test_scanners.py @@ -855,3 +855,25 @@ class TestScanTruncatedFiles(ImpalaTestSuite): result = self.execute_query("select count(*) from %s" % fq_tbl_name) assert(len(result.data) == 1) assert(result.data[0] == str(num_rows)) + +class TestUncompressedText(ImpalaTestSuite): + @classmethod + def get_workload(cls): + return 'functional-query' + + @classmethod + def add_test_dimensions(cls): + super(TestUncompressedText, cls).add_test_dimensions() + cls.ImpalaTestMatrix.add_constraint(lambda v: + v.get_value('table_format').file_format == 'text' and + v.get_value('table_format').compression_codec == 'none') + + # IMPALA-5315: Test support for date/time in unpadded format + def test_scan_lazy_timestamp(self, vector, unique_database): + self.client.execute(("""CREATE TABLE {0}.lazy_ts (ts TIMESTAMP)""").format + (unique_database)) + tbl_loc = get_fs_path("/test-warehouse/%s.db/%s" % (unique_database, + "lazy_ts")) + check_call(['hdfs', 'dfs', '-copyFromLocal', os.environ['IMPALA_HOME'] + + "/testdata/data/lazy_timestamp.csv", tbl_loc]) + self.run_test_case('QueryTest/select-lazy-timestamp', vector, unique_database)