IMPALA-6606: date_trunc() misinterprets MILLENNIUM/CENTURY precision

This change fixes the following correctness bug: date_trunc() function
returns the wrong result when truncating a TIMESTAMP value to
'MILLENNIUM' precision. E.g.:

select now(), date_trunc('millennium', now());
+-------------------------------+---------------------------------+
| now()                         | date_trunc('millennium', now()) |
+-------------------------------+---------------------------------+
| 2017-12-05 14:00:30.296812000 | 2000-01-01 00:00:00             |
+-------------------------------+---------------------------------+

The first year of the current millennium should be 2001.

'CENTURY' precision has the same issue.

Change-Id: I4f4aac17875dbf17dcabff25473ffeb006fce20b
Reviewed-on: http://gerrit.cloudera.org:8080/9508
Reviewed-by: Tim Armstrong <tarmstr...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/dd088347
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/dd088347
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/dd088347

Branch: refs/heads/master
Commit: dd0883479384a76c6d29f0f7501c9400d0307644
Parents: 772afb0
Author: Attila Jeges <atti...@cloudera.com>
Authored: Tue Mar 6 17:01:37 2018 +0100
Committer: Impala Public Jenkins <impala-public-jenk...@gerrit.cloudera.org>
Committed: Wed Mar 7 21:37:02 2018 +0000

----------------------------------------------------------------------
 be/src/exprs/expr-test.cc    | 44 ++++++++++++++++++++++++---------------
 be/src/exprs/udf-builtins.cc | 25 +++++++++++++++-------
 2 files changed, 44 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/dd088347/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index 0b9a2e1..e02ff6a 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -8338,13 +8338,15 @@ TEST_F(ExprTest, UuidTest) {
 
 TEST_F(ExprTest, DateTruncTest) {
   TestTimestampValue("date_trunc('MILLENNIUM', '2016-05-08 10:30:00')",
-      TimestampValue::Parse("2000-01-01 00:00:00"));
-  TestTimestampValue("date_trunc('MILLENNIUM', '2000-01-01 00:00:00')",
-      TimestampValue::Parse("2000-01-01 00:00:00"));
+      TimestampValue::Parse("2001-01-01 00:00:00"));
+  TestTimestampValue("date_trunc('MILLENNIUM', '3000-12-31 
23:59:59.999999999')",
+      TimestampValue::Parse("2001-01-01 00:00:00"));
+  TestTimestampValue("date_trunc('MILLENNIUM', '3001-01-01 00:00:00')",
+      TimestampValue::Parse("3001-01-01 00:00:00"));
   TestTimestampValue("date_trunc('CENTURY', '2016-05-08 10:30:00')",
-      TimestampValue::Parse("2000-01-01 00:00:00  "));
+      TimestampValue::Parse("2001-01-01 00:00:00  "));
   TestTimestampValue("date_trunc('CENTURY', '2116-05-08 10:30:00')",
-      TimestampValue::Parse("2100-01-01 00:00:00"));
+      TimestampValue::Parse("2101-01-01 00:00:00"));
   TestTimestampValue("date_trunc('DECADE', '2116-05-08 10:30:00')",
       TimestampValue::Parse("2110-01-01 00:00:00"));
   TestTimestampValue("date_trunc('YEAR', '2016-05-08 10:30:00')",
@@ -8381,9 +8383,9 @@ TEST_F(ExprTest, DateTruncTest) {
 
   // Test corner cases.
   TestTimestampValue("date_trunc('MILLENNIUM', '9999-12-31 
23:59:59.999999999')",
-      TimestampValue::Parse("9000-01-01 00:00:00"));
+      TimestampValue::Parse("9001-01-01 00:00:00"));
   TestTimestampValue("date_trunc('CENTURY', '9999-12-31 23:59:59.999999999')",
-      TimestampValue::Parse("9900-01-01 00:00:00"));
+      TimestampValue::Parse("9901-01-01 00:00:00"));
   TestTimestampValue("date_trunc('DECADE', '9999-12-31 23:59:59.999999999')",
       TimestampValue::Parse("9990-01-01 00:00:00"));
   TestTimestampValue("date_trunc('YEAR', '9999-12-31 23:59:59.999999999')",
@@ -8392,10 +8394,6 @@ TEST_F(ExprTest, DateTruncTest) {
       TimestampValue::Parse("9999-12-01 00:00:00"));
   TestTimestampValue("date_trunc('WEEK', '9999-12-31 23:59:59.999999999')",
       TimestampValue::Parse("9999-12-27 00:00:00"));
-  TestTimestampValue("date_trunc('WEEK', '1400-01-06 23:59:59.999999999')",
-      TimestampValue::Parse("1400-01-06 00:00:00"));
-  TestTimestampValue("date_trunc('WEEK', '1400-01-07 23:59:59.999999999')",
-      TimestampValue::Parse("1400-01-06 00:00:00"));
   TestTimestampValue("date_trunc('DAY', '9999-12-31 23:59:59.999999999')",
       TimestampValue::Parse("9999-12-31 00:00:00"));
   TestTimestampValue("date_trunc('HOUR', '9999-12-31 23:59:59.999999999')",
@@ -8409,8 +8407,6 @@ TEST_F(ExprTest, DateTruncTest) {
   TestTimestampValue("date_trunc('MICROSECONDS', '9999-12-31 
23:59:59.999999999')",
       TimestampValue::Parse("9999-12-31 23:59:59.999999"));
 
-  TestTimestampValue("date_trunc('CENTURY', '1400-01-01 00:00:00')",
-      TimestampValue::Parse("1400-01-01 00:00:00"));
   TestTimestampValue("date_trunc('DECADE', '1400-01-01 00:00:00')",
       TimestampValue::Parse("1400-01-01 00:00:00"));
   TestTimestampValue("date_trunc('YEAR', '1400-01-01 00:00:00')",
@@ -8430,11 +8426,25 @@ TEST_F(ExprTest, DateTruncTest) {
   TestTimestampValue("date_trunc('MICROSECONDS', '1400-01-01 00:00:00')",
       TimestampValue::Parse("1400-01-01 00:00:00"));
 
-  // valid input with invalid output
-  TestIsNull("date_trunc('MILLENNIUM', '1416-05-08 10:30:00')", 
TYPE_TIMESTAMP);
-  TestIsNull("date_trunc('MILLENNIUM', '1999-12-31 11:59:59.999999')", 
TYPE_TIMESTAMP);
+  // Test lower limit for century
+  TestIsNull("date_trunc('CENTURY', '1400-01-01 00:00:00')", TYPE_TIMESTAMP);
+  TestIsNull("date_trunc('CENTURY', '1400-12-31 23:59:59.999999999')", 
TYPE_TIMESTAMP);
+  TestTimestampValue("date_trunc('CENTURY', '1401-01-01 00:00:00')",
+      TimestampValue::Parse("1401-01-01 00:00:00"));
+
+  // Test lower limit for millennium
+  TestIsNull("date_trunc('MILLENNIUM', '1400-01-01 00:00:00')", 
TYPE_TIMESTAMP);
+  TestIsNull("date_trunc('MILLENNIUM', '2000-12-31 23:59:59.999999999')", 
TYPE_TIMESTAMP);
+  TestTimestampValue("date_trunc('MILLENNIUM', '2001-01-01 00:00:00')",
+      TimestampValue::Parse("2001-01-01 00:00:00"));
+
+  // Test lower limit for week
   TestIsNull("date_trunc('WEEK', '1400-01-01 00:00:00')", TYPE_TIMESTAMP);
-  TestIsNull("date_trunc('WEEK', '1400-01-05 00:00:00')", TYPE_TIMESTAMP);
+  TestIsNull("date_trunc('WEEK', '1400-01-05 23:59:59.999999999')", 
TYPE_TIMESTAMP);
+  TestTimestampValue("date_trunc('WEEK', '1400-01-06 00:00:00')",
+      TimestampValue::Parse("1400-01-06 00:00:00"));
+  TestTimestampValue("date_trunc('WEEK', '1400-01-07 23:59:59.999999999')",
+      TimestampValue::Parse("1400-01-06 00:00:00"));
 
   // Test invalid input.
   TestIsNull("date_trunc('HOUR', '12202010')", TYPE_TIMESTAMP);

http://git-wip-us.apache.org/repos/asf/impala/blob/dd088347/be/src/exprs/udf-builtins.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/udf-builtins.cc b/be/src/exprs/udf-builtins.cc
index ebfca35..61594ed 100644
--- a/be/src/exprs/udf-builtins.cc
+++ b/be/src/exprs/udf-builtins.cc
@@ -140,22 +140,27 @@ TruncUnit StrToTruncUnit(FunctionContext* ctx, const 
StringVal& unit_str) {
   }
 }
 
-// Truncate to first day of Milllenium
-TimestampValue TruncMillenium(const date& orig_date) {
-  date new_date(orig_date.year() / 1000 * 1000, 1, 1);
+// Truncate to first day of millennium
+TimestampValue TruncMillennium(const date& orig_date) {
+  DCHECK_GT(orig_date.year(), 2000);
+  // First year of current millennium is 2001
+  date new_date((orig_date.year() - 1) / 1000 * 1000 + 1, 1, 1);
   time_duration new_time(0, 0, 0, 0);
   return TimestampValue(new_date, new_time);
 }
 
 // Truncate to first day of century
 TimestampValue TruncCentury(const date& orig_date) {
-  date new_date(orig_date.year() / 100 * 100, 1, 1);
+  DCHECK_GT(orig_date.year(), 1400);
+  // First year of current century is 2001
+  date new_date((orig_date.year() - 1) / 100 * 100 + 1, 1, 1);
   time_duration new_time(0, 0, 0, 0);
   return TimestampValue(new_date, new_time);
 }
 
 // Truncate to first day of decade
 TimestampValue TruncDecade(const date& orig_date) {
+  // Decades start with years ending in '0'.
   date new_date(orig_date.year() / 10 * 10, 1, 1);
   time_duration new_time(0, 0, 0, 0);
   return TimestampValue(new_date, new_time);
@@ -270,9 +275,14 @@ TimestampVal DoTrunc(
   // check for invalid or malformed timestamps
   switch (trunc_unit) {
     case TruncUnit::MILLENNIUM:
-      // for millenium < 2000 year value goes to 1000 (outside the supported 
range)
+      // for millenium <= 2000 year value goes to 1001 (outside the supported 
range)
       if (orig_date.is_special()) return TimestampVal::null();
-      if (orig_date.year() < 2000) return TimestampVal::null();
+      if (orig_date.year() <= 2000) return TimestampVal::null();
+      break;
+    case TruncUnit::CENTURY:
+      // for century <= 1400 year value goes to 1301 (outside the supported 
range)
+      if (orig_date.is_special()) return TimestampVal::null();
+      if (orig_date.year() <= 1400) return TimestampVal::null();
       break;
     case TruncUnit::WEEK:
       // anything less than 1400-1-6 we have to move to year 1399
@@ -286,7 +296,6 @@ TimestampVal DoTrunc(
     case TruncUnit::W:
     case TruncUnit::DAY:
     case TruncUnit::DAY_OF_WEEK:
-    case TruncUnit::CENTURY:
     case TruncUnit::DECADE:
       if (orig_date.is_special()) return TimestampVal::null();
       break;
@@ -330,7 +339,7 @@ TimestampVal DoTrunc(
       ret = TruncMinute(orig_date, orig_time);
       break;
     case TruncUnit::MILLENNIUM:
-      ret = TruncMillenium(orig_date);
+      ret = TruncMillennium(orig_date);
       break;
     case TruncUnit::CENTURY:
       ret = TruncCentury(orig_date);

Reply via email to