IMPALA-4546: Fix Moscow timezone conversion after 2014

In 2014 Moscow timezone rules changed from UTC+3 with no DST to UTC+4
with no DST. A special case has been added to timestamp functions to
handle this.

Testing:
Added BE Expr tests.

Change-Id: Id6e3f2c9f6ba29749a26bc1087e664637bc02528
Reviewed-on: http://gerrit.cloudera.org:8080/5969
Reviewed-by: Taras Bobrovytsky <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/545bfcfe
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/545bfcfe
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/545bfcfe

Branch: refs/heads/master
Commit: 545bfcfe673dd24356eb8f84b664b03b6230b01e
Parents: b48bdaa
Author: Taras Bobrovytsky <[email protected]>
Authored: Thu Feb 9 20:53:31 2017 -0800
Committer: Impala Public Jenkins <[email protected]>
Committed: Fri Mar 3 00:41:35 2017 +0000

----------------------------------------------------------------------
 be/src/exprs/expr-test.cc           | 80 +++++++++++++++++++++++---------
 be/src/exprs/timestamp-functions.cc | 61 +++++++++++++++++-------
 be/src/exprs/timezone_db.cc         |  4 +-
 be/src/exprs/timezone_db.h          | 15 ++++--
 4 files changed, 115 insertions(+), 45 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/545bfcfe/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index 604644e..0ca2da2 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -4416,28 +4416,66 @@ TEST_F(ExprTest, UnaryOperators) {
   TestValue("-1 & 8", TYPE_TINYINT, 8);
 }
 
-// TODO: I think a lot of these casts are not necessary and we should fix this
-TEST_F(ExprTest, TimestampFunctions) {
-  // Regression test for IMPALA-4209
-  TestStringValue("cast(from_utc_timestamp(cast(1301180400 as timestamp),"
-      "'Europe/Moscow') as string)", "2011-03-27 03:00:00");
-  TestStringValue("cast(from_utc_timestamp(cast(1301180399 as timestamp),"
-      "'Europe/Moscow') as string)", "2011-03-27 01:59:59");
-  TestStringValue("cast(from_utc_timestamp(cast(1288404000 as timestamp),"
-      "'Europe/Moscow') as string)", "2010-10-30 06:00:00");
-  TestStringValue("cast(from_utc_timestamp(cast(1288584000 as timestamp),"
-      "'Europe/Moscow') as string)", "2010-11-01 07:00:00");
-  TestStringValue("cast(from_utc_timestamp(cast(1301104740 as timestamp),"
-      "'Europe/Moscow') as string)", "2011-03-26 04:59:00");
-  TestStringValue("cast(from_utc_timestamp(cast(1301277600 as timestamp),"
-      "'Europe/Moscow') as string)", "2011-03-28 06:00:00");
-  TestStringValue("cast(from_utc_timestamp(cast(1324947600 as timestamp),"
-      "'Europe/Moscow') as string)", "2011-12-27 05:00:00");
-  TestStringValue("cast(from_utc_timestamp(cast(1325725200 as timestamp),"
-      "'Europe/Moscow') as string)", "2012-01-05 05:00:00");
-  TestStringValue("cast(from_utc_timestamp(cast(1333594800 as timestamp),"
-      "'Europe/Moscow') as string)", "2012-04-05 07:00:00");
+TEST_F(ExprTest, MoscowTimezoneConversion) {
+#pragma push_macro("UTC_TO_MSC")
+#pragma push_macro("MSC_TO_UTC")
+#define UTC_TO_MSC(X) ("cast(from_utc_timestamp('" X "', 'Europe/Moscow') as 
string)")
+#define MSC_TO_UTC(X) ("cast(to_utc_timestamp('" X "', 'Europe/Moscow') as 
string)")
+
+  // IMPALA-4209: Moscow time change in 2011.
+  // Last DST change before the transition.
+  TestStringValue(UTC_TO_MSC("2010-10-30 22:59:59"), "2010-10-31 02:59:59");
+  TestStringValue(UTC_TO_MSC("2010-10-30 23:00:00"), "2010-10-31 02:00:00");
+  TestStringValue(MSC_TO_UTC("2010-10-31 01:59:59"), "2010-10-30 21:59:59");
+  // Since 2am to 2:59:59.999...am MSC happens twice, the ambiguity gets 
resolved by
+  // returning null.
+  TestIsNull(MSC_TO_UTC("2010-10-31 02:00:00"), TYPE_STRING);
+  TestIsNull(MSC_TO_UTC("2010-10-31 02:59:59"), TYPE_STRING);
+  TestStringValue(MSC_TO_UTC("2010-10-31 03:00:00"), "2010-10-31 00:00:00");
+
+  // Moscow time transitions to UTC+4.
+  TestStringValue(UTC_TO_MSC("2011-03-26 22:59:59"), "2011-03-27 01:59:59");
+  TestStringValue(UTC_TO_MSC("2011-03-26 23:00:00"), "2011-03-27 03:00:00");
+  TestStringValue(MSC_TO_UTC("2011-03-27 01:59:59"), "2011-03-26 22:59:59");
+  // Since 2am to 2:59:59.999...am MSC happens twice, the ambiguity gets 
resolved by
+  // returning null.
+  TestIsNull(MSC_TO_UTC("2011-03-27 02:00:00"), TYPE_STRING);
+  TestIsNull(MSC_TO_UTC("2011-03-27 02:59:59"), TYPE_STRING);
+  TestStringValue(MSC_TO_UTC("2011-03-27 03:00:00"), "2011-03-26 23:00:00");
+
+  // No more DST after the transition.
+  TestStringValue(UTC_TO_MSC("2011-12-20 09:00:00"), "2011-12-20 13:00:00");
+  TestStringValue(UTC_TO_MSC("2012-06-20 09:00:00"), "2012-06-20 13:00:00");
+  TestStringValue(UTC_TO_MSC("2012-12-20 09:00:00"), "2012-12-20 13:00:00");
+  TestStringValue(MSC_TO_UTC("2011-12-20 13:00:00"), "2011-12-20 09:00:00");
+  TestStringValue(MSC_TO_UTC("2012-06-20 13:00:00"), "2012-06-20 09:00:00");
+  TestStringValue(MSC_TO_UTC("2012-12-20 13:00:00"), "2012-12-20 09:00:00");
+
+  // IMPALA-4546: Moscow time change in 2014.
+  // UTC+4 is changed to UTC+3
+  TestStringValue(UTC_TO_MSC("2014-10-25 21:59:59"), "2014-10-26 01:59:59");
+  TestStringValue(UTC_TO_MSC("2014-10-25 22:00:00"), "2014-10-26 01:00:00");
+  TestStringValue(UTC_TO_MSC("2014-10-25 23:00:00"), "2014-10-26 02:00:00");
+  TestStringValue(MSC_TO_UTC("2014-10-26 00:59:59"), "2014-10-25 20:59:59");
+  // Since 1am to 1:59:59.999...am MSC happens twice, the ambiguity gets 
resolved by
+  // returning null.
+  TestIsNull(MSC_TO_UTC("2014-10-26 01:00:00"), TYPE_STRING);
+  TestIsNull(MSC_TO_UTC("2014-10-26 01:59:59"), TYPE_STRING);
+  TestStringValue(MSC_TO_UTC("2014-10-26 02:00:00"), "2014-10-25 23:00:00");
+
+  // Still no DST after the transition.
+  TestStringValue(UTC_TO_MSC("2014-12-20 09:00:00"), "2014-12-20 12:00:00");
+  TestStringValue(UTC_TO_MSC("2015-06-20 09:00:00"), "2015-06-20 12:00:00");
+  TestStringValue(UTC_TO_MSC("2015-12-20 09:00:00"), "2015-12-20 12:00:00");
+  TestStringValue(MSC_TO_UTC("2014-12-20 12:00:00"), "2014-12-20 09:00:00");
+  TestStringValue(MSC_TO_UTC("2015-06-20 12:00:00"), "2015-06-20 09:00:00");
+  TestStringValue(MSC_TO_UTC("2015-12-20 12:00:00"), "2015-12-20 09:00:00");
+
+#pragma pop_macro("MSC_TO_UTC")
+#pragma pop_macro("UTC_TO_MSC")
+}
 
+TEST_F(ExprTest, TimestampFunctions) {
   // Regression for IMPALA-1105
   TestIsNull("cast(cast('NOTATIMESTAMP' as timestamp) as string)", 
TYPE_STRING);
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/545bfcfe/be/src/exprs/timestamp-functions.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/timestamp-functions.cc 
b/be/src/exprs/timestamp-functions.cc
index a63438c..89bb498 100644
--- a/be/src/exprs/timestamp-functions.cc
+++ b/be/src/exprs/timestamp-functions.cc
@@ -85,7 +85,7 @@ TimestampVal TimestampFunctions::FromUtc(FunctionContext* 
context,
 
   const StringValue& tz_string_value = 
StringValue::FromStringVal(tz_string_val);
   time_zone_ptr timezone = TimezoneDatabase::FindTimezone(
-      string(tz_string_value.ptr, tz_string_value.len), ts_value);
+      string(tz_string_value.ptr, tz_string_value.len), ts_value, true);
   if (timezone == NULL) {
     // This should return null. Hive just ignores it.
     stringstream ss;
@@ -123,15 +123,25 @@ TimestampVal TimestampFunctions::ToUtc(FunctionContext* 
context,
 
   const StringValue& tz_string_value = 
StringValue::FromStringVal(tz_string_val);
   time_zone_ptr timezone = TimezoneDatabase::FindTimezone(
-      string(tz_string_value.ptr, tz_string_value.len), ts_value);
+      string(tz_string_value.ptr, tz_string_value.len), ts_value, false);
   // This should raise some sort of error or at least null. Hive Just ignores 
it.
-  if (timezone == NULL) {
+  if (UNLIKELY(timezone == NULL)) {
     stringstream ss;
     ss << "Unknown timezone '" << tz_string_value << "'" << endl;
     context->AddWarning(ss.str().c_str());
     return ts_val;
   }
 
+  if (UNLIKELY(timezone == TimezoneDatabase::TIMEZONE_MSK_PRE_2014 &&
+      ts_value.date().day_number() == 2456957 &&
+      ts_value.time().hours() >= 1)) {
+    // On October 27, 2014 at 1:00 am MSC, Moscow time transitions from UTC+4 
with no DST
+    // to UTC+3 with no DST. Because of this, 1am to 1:59:59.999...am MSC 
happens twice.
+    // We want to be consistent with the existing rule of "in case of 
ambiguity return
+    // NULL".
+    DCHECK_LT(ts_value.time().hours(), 2);
+    return TimestampVal::null();
+  }
   try {
     local_date_time lt(ts_value.date(), ts_value.time(), timezone,
         local_date_time::NOT_DATE_TIME_ON_ERROR);
@@ -186,21 +196,36 @@ void 
TimestampFunctions::UnixAndFromUnixClose(FunctionContext* context,
   }
 }
 
-time_zone_ptr TimezoneDatabase::FindTimezone(const string& tz, const 
TimestampValue& tv) {
-  // The backing database does not capture some subtleties, there are special 
cases
-  if ((tv.date().year() < 2011 || (tv.date().year() == 2011 && 
tv.date().month() < 4)) &&
-      (iequals("Europe/Moscow", tz) || iequals("Moscow", tz) || iequals("MSK", 
tz))) {
-    // We transition in pre April 2011 from using the tz_database_ to a custom 
rule
-    // Russia stopped using daylight savings in 2011, the tz_database_ is
-    // set up assuming Russia uses daylight saving every year.
-    // Sun, Mar 27, 2:00AM Moscow clocks moved forward +1 hour (a total of GMT 
+4)
-    // Specifically,
-    // UTC Time 26 Mar 2011 22:59:59 +0000 ===> Sun Mar 27 01:59:59 MSK 2011
-    // UTC Time 26 Mar 2011 23:00:00 +0000 ===> Sun Mar 27 03:00:00 MSK 2011
-    // This means in 2011, The database rule will apply DST starting March 26 
2011.
-    // This will be a correct +4 offset, and the database rule can apply until
-    // Oct 31 when tz_database_ will incorrectly attempt to turn clocks 
backwards 1 hour.
-    return TIMEZONE_MSK_PRE_2011_DST;
+time_zone_ptr TimezoneDatabase::FindTimezone(
+    const string& tz, const TimestampValue& tv, bool tv_in_utc) {
+  // The backing database does not handle timezone rule changes.
+  if (iequals("Europe/Moscow", tz) || iequals("Moscow", tz) || iequals("MSK", 
tz)) {
+    if (tv.date().year() < 2011 || (tv.date().year() == 2011 && 
tv.date().month() < 4)) {
+      // Between January 19, 1992 and March 27, 2011 Moscow time was UTC+3 
with DST. On
+      // March 27, 2011 Moscow time transitioned to UTC+4 with no DST. NOTE: 
We currently
+      // do not handle Moscow time conversions for dates before January 19, 
1992
+      // correctly (Impala incorrectly thinks the Moscow timezone is UTC+3 
with DST
+      // instead of UTC+2 with DST for those dates).
+      return TIMEZONE_MSK_PRE_2011_DST;
+    }
+    // On October 26, 2014 at 22:00:00 UTC, Moscow time transitioned to UTC+3 
with no
+    // DST. We have to make a precise time check here, unlike in the case 
above, because
+    // we can't rely on the timezone database to handle the moment of 
transition because
+    // the rule change does not coincide with a DST change.
+    const int MSK_TRANSITION_DAY = 2456956;
+    const int MSK_TRANSITION_HOUR_UTC = 22;
+    const int MSK_UTC_OFFSET = 4;
+    if (tv.date().day_number() < MSK_TRANSITION_DAY) return 
TIMEZONE_MSK_PRE_2014;
+    if (tv_in_utc) {
+      if (tv.date().day_number() == MSK_TRANSITION_DAY &&
+          tv.time().hours() < MSK_TRANSITION_HOUR_UTC) {
+        return TIMEZONE_MSK_PRE_2014;
+      }
+    } else if (tv.date().day_number() < MSK_TRANSITION_DAY + 1 || (
+        tv.date().day_number() == MSK_TRANSITION_DAY + 1 &&
+        tv.time().hours() < (MSK_TRANSITION_HOUR_UTC + MSK_UTC_OFFSET) % 24)) {
+      return TIMEZONE_MSK_PRE_2014;
+    }
   }
 
   // See if they specified a zone id

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/545bfcfe/be/src/exprs/timezone_db.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/timezone_db.cc b/be/src/exprs/timezone_db.cc
index ae58f86..c7ae888 100644
--- a/be/src/exprs/timezone_db.cc
+++ b/be/src/exprs/timezone_db.cc
@@ -36,6 +36,8 @@ vector<string> TimezoneDatabase::tz_region_list_;
 
 const time_zone_ptr TimezoneDatabase::TIMEZONE_MSK_PRE_2011_DST(time_zone_ptr(
     new 
posix_time_zone(string("MSK+03MSK+01,M3.5.0/02:00:00,M10.5.0/03:00:00"))));
+const time_zone_ptr TimezoneDatabase::TIMEZONE_MSK_PRE_2014(time_zone_ptr(
+    new posix_time_zone(string("MSK+04"))));
 
 const char* TimezoneDatabase::TIMEZONE_DATABASE_STR = "\"ID\",\"STD 
ABBR\",\"STD NAME\",\"DST ABBR\",\"DST NAME\",\"GMT offset\",\"DST 
adjustment\",\"DST Start Date rule\",\"Start time\",\"DST End date rule\",\"End 
time\"\n\
 \"ACT\",\"ACST\",\"Australian Central Standard Time (Northern 
Territory)\",\"\",\"\",\"+09:30:00\",\"+00:00:00\",\"\",\"\",\"\",\"\"\n\
@@ -502,7 +504,7 @@ const char* TimezoneDatabase::TIMEZONE_DATABASE_STR = 
"\"ID\",\"STD ABBR\",\"STD
 \"Europe/Mariehamn\",\"EET\",\"Eastern European Time\",\"EEST\",\"Eastern 
European Summer 
Time\",\"+02:00:00\",\"+01:00:00\",\"-1;0;3\",\"+01:00:00\",\"-1;0;10\",\"+01:00:00\"\n\
 \"Europe/Minsk\",\"MSK\",\"Moscow Standard 
Time\",\"\",\"\",\"+03:00:00\",\"+00:00:00\",\"\",\"\",\"\",\"\"\n\
 \"Europe/Monaco\",\"CET\",\"Central European Time\",\"CEST\",\"Central 
European Summer 
Time\",\"+01:00:00\",\"+01:00:00\",\"-1;0;3\",\"+01:00:00\",\"-1;0;10\",\"+01:00:00\"\n\
-\"Europe/Moscow\",\"MSK\",\"Moscow Standard 
Time\",\"\",\"\",\"+04:00:00\",\"+00:00:00\",\"\",\"\",\"\",\"\"\n\
+\"Europe/Moscow\",\"MSK\",\"Moscow Standard 
Time\",\"\",\"\",\"+03:00:00\",\"+00:00:00\",\"\",\"\",\"\",\"\"\n\
 \"Europe/Nicosia\",\"EET\",\"Eastern European Time\",\"EEST\",\"Eastern 
European Summer 
Time\",\"+02:00:00\",\"+01:00:00\",\"-1;0;3\",\"+01:00:00\",\"-1;0;10\",\"+01:00:00\"\n\
 \"Europe/Oslo\",\"CET\",\"Central European Time\",\"CEST\",\"Central European 
Summer 
Time\",\"+01:00:00\",\"+01:00:00\",\"-1;0;3\",\"+01:00:00\",\"-1;0;10\",\"+01:00:00\"\n\
 \"Europe/Paris\",\"CET\",\"Central European Time\",\"CEST\",\"Central European 
Summer 
Time\",\"+01:00:00\",\"+01:00:00\",\"-1;0;3\",\"+01:00:00\",\"-1;0;10\",\"+01:00:00\"\n\

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/545bfcfe/be/src/exprs/timezone_db.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/timezone_db.h b/be/src/exprs/timezone_db.h
index 76d08d5..20ea205 100644
--- a/be/src/exprs/timezone_db.h
+++ b/be/src/exprs/timezone_db.h
@@ -34,15 +34,20 @@ class TimezoneDatabase {
   /// Set up the static timezone database.
   static Status Initialize();
 
-  /// Converts the name of a timezone to a boost timezone object.
-  /// Some countries change their timezones, the tiemstamp is required to 
correctly
-  /// determine the timezone information.
+  /// Converts the name of a timezone to a boost timezone object. In some 
cases, the
+  /// timestamp is required to determine the timezone because occasionally 
timezone
+  /// rules change. The flag "tv_in_utc" specifies whether the given timestamp 
value is
+  /// in the UTC timezone. If the flag is false, then the timestamp value is 
assumed to
+  /// be in the 'tz' timezone.
   static boost::local_time::time_zone_ptr FindTimezone(const std::string& tz,
-      const TimestampValue& tv);
+      const TimestampValue& tv, bool tv_in_utc);
 
-  /// Moscow Timezone No Daylight Savings Time (GMT+4), for use after March 
2011
+  /// Moscow timezone UTC+3 with DST, for use before March 27, 2011.
   static const boost::local_time::time_zone_ptr TIMEZONE_MSK_PRE_2011_DST;
 
+  /// Moscow timezone UTC+4 with no DST, for use before October 26, 2014.
+  static const boost::local_time::time_zone_ptr TIMEZONE_MSK_PRE_2014;
+
  private:
   static const char* TIMEZONE_DATABASE_STR;
   static boost::local_time::tz_database tz_database_;

Reply via email to